From 50104cc32a498f7517a51c8dc93106c51c7a54b4 Mon Sep 17 00:00:00 2001 From: Ondřej Surý Date: Wed, 20 Apr 2011 15:44:41 +0200 Subject: Imported Upstream version 2011.03.07.1 --- src/Make.common | 2 +- src/Make.pkg | 58 +- src/clean.bash | 11 +- src/cmd/5a/lex.c | 2 + src/cmd/5c/txt.c | 4 +- src/cmd/5g/gg.h | 1 + src/cmd/5g/gsubr.c | 1 + src/cmd/5g/reg.c | 4 +- src/cmd/5l/5.out.h | 3 + src/cmd/5l/asm.c | 49 +- src/cmd/5l/doc.go | 2 + src/cmd/5l/l.h | 5 + src/cmd/5l/list.c | 4 + src/cmd/5l/noop.c | 24 +- src/cmd/5l/obj.c | 51 +- src/cmd/5l/optab.c | 3 + src/cmd/5l/pass.c | 12 - src/cmd/5l/span.c | 2 + src/cmd/6l/asm.c | 58 +- src/cmd/6l/doc.go | 8 +- src/cmd/6l/l.h | 4 +- src/cmd/6l/obj.c | 68 ++- src/cmd/6l/pass.c | 30 +- src/cmd/8a/lex.c | 4 + src/cmd/8l/8.out.h | 5 + src/cmd/8l/asm.c | 56 +- src/cmd/8l/doc.go | 10 +- src/cmd/8l/l.h | 3 + src/cmd/8l/obj.c | 94 ++- src/cmd/8l/optab.c | 5 + src/cmd/8l/pass.c | 30 +- src/cmd/Makefile | 66 +++ src/cmd/cgo/gcc.go | 65 +- src/cmd/cgo/main.go | 12 +- src/cmd/cgo/out.go | 96 ++- src/cmd/clean.bash | 16 - src/cmd/ebnflint/ebnflint.go | 4 +- src/cmd/gc/const.c | 6 +- src/cmd/gc/doc.go | 2 + src/cmd/gc/export.c | 12 +- src/cmd/gc/go.y | 9 +- src/cmd/gc/init.c | 27 +- src/cmd/gc/reflect.c | 33 +- src/cmd/gc/subr.c | 11 +- src/cmd/gc/typecheck.c | 6 +- src/cmd/godoc/dirtrees.go | 29 +- src/cmd/godoc/godoc.go | 82 +-- src/cmd/godoc/index.go | 8 +- src/cmd/godoc/main.go | 10 +- src/cmd/godoc/mapping.go | 47 +- src/cmd/godoc/utils.go | 12 +- src/cmd/gofmt/gofmt.go | 4 +- src/cmd/gofmt/test.sh | 2 +- src/cmd/goinstall/download.go | 19 +- src/cmd/goinstall/main.go | 11 +- src/cmd/goinstall/make.go | 23 + src/cmd/goinstall/parse.go | 12 +- src/cmd/gopack/ar.c | 2 +- src/cmd/gopack/doc.go | 4 +- src/cmd/gotest/doc.go | 20 +- src/cmd/govet/govet.go | 6 +- src/cmd/goyacc/Makefile | 2 +- src/cmd/goyacc/doc.go | 12 +- src/cmd/goyacc/goyacc.go | 314 +++++----- src/cmd/goyacc/units.y | 9 +- src/cmd/hgpatch/main.go | 4 +- src/cmd/ld/data.c | 6 +- src/cmd/ld/dwarf.c | 42 +- src/cmd/ld/go.c | 33 ++ src/cmd/ld/lib.c | 207 +++++++ src/cmd/ld/lib.h | 37 +- src/cmd/ld/macho.c | 7 - src/cmd/ld/pe.c | 8 +- src/cmd/ld/pe.h | 3 + src/cmd/make.bash | 30 - src/env.bash | 2 +- src/make.bash | 25 +- src/pkg/Makefile | 24 +- src/pkg/compress/bzip2/Makefile | 14 + src/pkg/compress/bzip2/bit_reader.go | 88 +++ src/pkg/compress/bzip2/bzip2.go | 390 ++++++++++++ src/pkg/compress/bzip2/bzip2_test.go | 158 +++++ src/pkg/compress/bzip2/huffman.go | 223 +++++++ src/pkg/compress/bzip2/move_to_front.go | 105 ++++ src/pkg/compress/flate/deflate_test.go | 147 +---- src/pkg/compress/lzw/Makefile | 12 + src/pkg/compress/lzw/reader.go | 210 +++++++ src/pkg/compress/lzw/reader_test.go | 132 +++++ src/pkg/compress/lzw/writer.go | 259 ++++++++ src/pkg/compress/lzw/writer_test.go | 111 ++++ src/pkg/compress/testdata/e.txt | 1 + src/pkg/compress/testdata/pi.txt | 1 + src/pkg/compress/zlib/testdata/e.txt | 1 - src/pkg/compress/zlib/testdata/pi.txt | 1 - src/pkg/compress/zlib/writer_test.go | 4 +- src/pkg/crypto/openpgp/Makefile | 14 + src/pkg/crypto/openpgp/canonical_text.go | 58 ++ src/pkg/crypto/openpgp/canonical_text_test.go | 50 ++ src/pkg/crypto/openpgp/keys.go | 280 +++++++++ src/pkg/crypto/openpgp/packet/encrypted_key.go | 4 +- src/pkg/crypto/openpgp/packet/literal.go | 4 +- src/pkg/crypto/openpgp/packet/packet.go | 6 +- src/pkg/crypto/openpgp/packet/public_key.go | 2 +- .../openpgp/packet/symmetrically_encrypted_test.go | 2 +- src/pkg/crypto/openpgp/read.go | 413 +++++++++++++ src/pkg/crypto/openpgp/read_test.go | 237 ++++++++ src/pkg/crypto/openpgp/write.go | 92 +++ src/pkg/crypto/openpgp/write_test.go | 34 ++ src/pkg/crypto/rand/rand_unix.go | 5 +- src/pkg/crypto/rsa/rsa.go | 8 + src/pkg/crypto/rsa/rsa_test.go | 2 +- src/pkg/crypto/tls/handshake_client.go | 2 +- src/pkg/crypto/tls/handshake_client_test.go | 2 +- src/pkg/crypto/tls/handshake_server_test.go | 2 +- src/pkg/exp/eval/stmt_test.go | 12 +- src/pkg/exp/wingui/Makefile | 2 + src/pkg/fmt/doc.go | 18 +- src/pkg/fmt/fmt_test.go | 21 +- src/pkg/fmt/format.go | 4 +- src/pkg/fmt/print.go | 18 +- src/pkg/fmt/scan.go | 215 ++++--- src/pkg/fmt/scan_test.go | 237 +++++++- src/pkg/go/ast/ast.go | 2 +- src/pkg/go/ast/walk.go | 4 +- src/pkg/go/parser/interface.go | 4 +- src/pkg/go/parser/parser.go | 93 ++- src/pkg/go/parser/parser_test.go | 35 +- src/pkg/go/printer/printer.go | 4 +- src/pkg/go/printer/printer_test.go | 6 +- src/pkg/go/printer/testdata/statements.golden | 10 +- src/pkg/go/printer/testdata/statements.input | 10 +- src/pkg/go/scanner/scanner.go | 11 +- src/pkg/gob/codec_test.go | 38 +- src/pkg/gob/debug.go | 35 +- src/pkg/gob/decode.go | 410 +++++++++---- src/pkg/gob/decoder.go | 2 +- src/pkg/gob/encode.go | 202 +++++-- src/pkg/gob/encoder.go | 120 ++-- src/pkg/gob/encoder_test.go | 18 + src/pkg/gob/gobencdec_test.go | 331 +++++++++++ src/pkg/gob/type.go | 437 +++++++++++--- src/pkg/gob/type_test.go | 24 +- src/pkg/html/doc.go | 3 + src/pkg/html/token.go | 135 +++-- src/pkg/html/token_test.go | 86 ++- src/pkg/http/Makefile | 3 + src/pkg/http/cgi/Makefile | 11 + src/pkg/http/cgi/cgi.go | 201 +++++++ src/pkg/http/cgi/cgi_test.go | 247 ++++++++ src/pkg/http/cgi/testdata/test.cgi | 34 ++ src/pkg/http/client.go | 216 ++++--- src/pkg/http/client_test.go | 26 + src/pkg/http/cookie.go | 336 +++++++++++ src/pkg/http/cookie_test.go | 96 +++ src/pkg/http/fs.go | 12 +- src/pkg/http/fs_test.go | 94 +-- src/pkg/http/header.go | 43 ++ src/pkg/http/httptest/Makefile | 12 + src/pkg/http/httptest/recorder.go | 79 +++ src/pkg/http/httptest/server.go | 42 ++ src/pkg/http/persist.go | 94 +-- src/pkg/http/proxy_test.go | 45 ++ src/pkg/http/range_test.go | 57 ++ src/pkg/http/readrequest_test.go | 18 +- src/pkg/http/request.go | 220 ++----- src/pkg/http/request_test.go | 20 +- src/pkg/http/requestwrite_test.go | 77 ++- src/pkg/http/response.go | 94 ++- src/pkg/http/response_test.go | 18 +- src/pkg/http/responsewrite_test.go | 6 +- src/pkg/http/serve_test.go | 162 ++--- src/pkg/http/server.go | 136 +++-- src/pkg/http/transfer.go | 63 +- src/pkg/http/transport.go | 151 +++++ src/pkg/image/decode_test.go | 89 +++ src/pkg/image/png/reader.go | 174 +++++- src/pkg/image/png/reader_test.go | 57 +- src/pkg/image/png/testdata/pngsuite/README | 11 +- .../image/png/testdata/pngsuite/basn0g01-30.png | Bin 0 -> 162 bytes .../image/png/testdata/pngsuite/basn0g01-30.sng | 39 ++ src/pkg/image/png/testdata/pngsuite/basn0g01.sng | 66 +-- .../image/png/testdata/pngsuite/basn0g02-29.png | Bin 0 -> 110 bytes .../image/png/testdata/pngsuite/basn0g02-29.sng | 38 ++ src/pkg/image/png/testdata/pngsuite/basn0g02.sng | 66 +-- .../image/png/testdata/pngsuite/basn0g04-31.png | Bin 0 -> 153 bytes .../image/png/testdata/pngsuite/basn0g04-31.sng | 40 ++ src/pkg/image/png/testdata/pngsuite/basn0g04.sng | 66 +-- src/pkg/image/png/testdata/pngsuite/basn3p02.sng | 11 +- src/pkg/image/png/testdata/pngsuite/basn3p04.sng | 7 +- src/pkg/image/png/testdata/pngsuite/basn4a08.sng | 66 +-- src/pkg/image/testdata/video-001.bmp | Bin 0 -> 46610 bytes src/pkg/image/testdata/video-001.gif | Bin 0 -> 13106 bytes src/pkg/image/testdata/video-001.jpeg | Bin 0 -> 21459 bytes src/pkg/image/testdata/video-001.png | Bin 0 -> 29228 bytes src/pkg/image/testdata/video-001.tiff | Bin 0 -> 30810 bytes src/pkg/io/ioutil/tempfile.go | 33 +- src/pkg/io/ioutil/tempfile_test.go | 27 +- src/pkg/json/decode.go | 72 ++- src/pkg/json/decode_test.go | 35 +- src/pkg/json/encode.go | 46 +- src/pkg/mime/multipart/multipart.go | 14 +- src/pkg/mime/multipart/multipart_test.go | 13 +- src/pkg/net/dial.go | 28 +- src/pkg/net/fd_windows.go | 655 ++++++++++----------- src/pkg/net/iprawsock.go | 6 +- src/pkg/net/ipsock.go | 29 +- src/pkg/net/multicast_test.go | 62 ++ src/pkg/net/net.go | 3 - src/pkg/net/parse.go | 10 - src/pkg/net/textproto/Makefile | 1 + src/pkg/net/textproto/header.go | 43 ++ src/pkg/net/textproto/reader.go | 16 +- src/pkg/net/textproto/reader_test.go | 8 +- src/pkg/net/udpsock.go | 41 ++ src/pkg/netchan/common.go | 15 +- src/pkg/netchan/export.go | 50 +- src/pkg/netchan/import.go | 39 +- src/pkg/netchan/netchan_test.go | 146 +---- src/pkg/os/error.go | 1 + src/pkg/path/Makefile | 12 - src/pkg/path/filepath/Makefile | 26 + src/pkg/path/filepath/match.go | 282 +++++++++ src/pkg/path/filepath/match_test.go | 106 ++++ src/pkg/path/filepath/path.go | 270 +++++++++ src/pkg/path/filepath/path_test.go | 392 ++++++++++++ src/pkg/path/filepath/path_unix.go | 10 + src/pkg/path/match.go | 83 +-- src/pkg/path/match_test.go | 28 - src/pkg/path/path.go | 78 +-- src/pkg/path/path_test.go | 159 ----- src/pkg/path/path_unix.go | 11 - src/pkg/path/path_windows.go | 11 - src/pkg/reflect/all_test.go | 75 +++ src/pkg/reflect/deepequal.go | 4 +- src/pkg/reflect/type.go | 114 +++- src/pkg/reflect/value.go | 158 +++-- src/pkg/rpc/server.go | 2 +- src/pkg/runtime/Makefile | 12 +- src/pkg/runtime/amd64/asm.s | 20 + src/pkg/runtime/amd64/traceback.c | 23 + src/pkg/runtime/arm/asm.s | 41 +- src/pkg/runtime/arm/cas5.s | 43 -- src/pkg/runtime/arm/cas6.s | 29 - src/pkg/runtime/arm/traceback.c | 37 +- src/pkg/runtime/cgocall.c | 3 +- src/pkg/runtime/chan.c | 47 +- src/pkg/runtime/darwin/386/signal.c | 10 +- src/pkg/runtime/darwin/386/sys.s | 29 +- src/pkg/runtime/darwin/amd64/signal.c | 10 +- src/pkg/runtime/darwin/amd64/sys.s | 19 +- src/pkg/runtime/darwin/thread.c | 3 +- src/pkg/runtime/freebsd/386/signal.c | 10 +- src/pkg/runtime/freebsd/386/sys.s | 42 +- src/pkg/runtime/freebsd/amd64/signal.c | 10 +- src/pkg/runtime/freebsd/amd64/sys.s | 24 +- src/pkg/runtime/freebsd/thread.c | 3 +- src/pkg/runtime/hashmap.c | 18 + src/pkg/runtime/iface.c | 33 +- src/pkg/runtime/linux/386/signal.c | 10 +- src/pkg/runtime/linux/386/sys.s | 7 +- src/pkg/runtime/linux/amd64/signal.c | 10 +- src/pkg/runtime/linux/amd64/sys.s | 10 +- src/pkg/runtime/linux/arm/signal.c | 9 +- src/pkg/runtime/linux/arm/sys.s | 31 + src/pkg/runtime/linux/thread.c | 3 +- src/pkg/runtime/malloc.goc | 17 +- src/pkg/runtime/mfinal.c | 5 +- src/pkg/runtime/mgc0.c | 61 +- src/pkg/runtime/print.c | 59 +- src/pkg/runtime/proc.c | 114 +++- src/pkg/runtime/runtime-gdb.py | 5 +- src/pkg/runtime/runtime.c | 40 +- src/pkg/runtime/runtime.h | 92 +-- src/pkg/runtime/stack.h | 86 +++ src/pkg/runtime/type.go | 3 +- src/pkg/runtime/type.h | 1 + src/pkg/runtime/windows/386/signal.c | 1 - src/pkg/runtime/windows/signals.h | 3 + src/pkg/sync/Makefile | 18 +- src/pkg/sync/asm_386.s | 23 - src/pkg/sync/asm_amd64.s | 23 - src/pkg/sync/asm_arm5.s | 40 -- src/pkg/sync/asm_arm6.s | 30 - src/pkg/sync/atomic/Makefile | 18 + src/pkg/sync/atomic/asm_386.s | 87 +++ src/pkg/sync/atomic/asm_amd64.s | 59 ++ src/pkg/sync/atomic/asm_arm.s | 78 +++ src/pkg/sync/atomic/asm_linux_arm.s | 68 +++ src/pkg/sync/atomic/atomic_test.go | 506 ++++++++++++++++ src/pkg/sync/atomic/doc.go | 57 ++ src/pkg/sync/cond.go | 90 +++ src/pkg/sync/cond_test.go | 99 ++++ src/pkg/sync/mutex.go | 31 +- src/pkg/sync/rwmutex.go | 23 +- src/pkg/sync/rwmutex_test.go | 50 +- src/pkg/sync/xadd_test.go | 9 - src/pkg/syscall/exec_unix.go | 4 + src/pkg/syscall/exec_windows.go | 2 +- src/pkg/syscall/mkerrors.sh | 2 +- src/pkg/syscall/syscall_windows.go | 63 +- src/pkg/syscall/zerrors_freebsd_amd64.go | 1 - src/pkg/syscall/zerrors_linux_386.go | 2 + src/pkg/syscall/zerrors_linux_amd64.go | 2 + src/pkg/syscall/zsyscall_windows_386.go | 15 + src/pkg/syscall/zsysnum_freebsd_amd64.go | 2 +- src/pkg/syscall/ztypes_windows_386.go | 2 + src/pkg/template/template.go | 8 +- src/pkg/testing/benchmark.go | 8 +- src/pkg/testing/testing.go | 10 +- src/pkg/time/Makefile | 1 + src/pkg/time/sleep.go | 25 - src/pkg/time/sys.go | 54 ++ src/pkg/time/time.go | 24 - src/pkg/unsafe/unsafe.go | 11 +- src/pkg/websocket/client.go | 20 +- src/pkg/websocket/server.go | 36 +- src/pkg/websocket/websocket_test.go | 11 +- src/pkg/xml/read_test.go | 10 +- src/pkg/xml/xml.go | 23 +- src/pkg/xml/xml_test.go | 46 ++ src/run.bash | 1 - 321 files changed, 12917 insertions(+), 4020 deletions(-) create mode 100644 src/cmd/Makefile delete mode 100644 src/cmd/clean.bash delete mode 100755 src/cmd/make.bash create mode 100644 src/pkg/compress/bzip2/Makefile create mode 100644 src/pkg/compress/bzip2/bit_reader.go create mode 100644 src/pkg/compress/bzip2/bzip2.go create mode 100644 src/pkg/compress/bzip2/bzip2_test.go create mode 100644 src/pkg/compress/bzip2/huffman.go create mode 100644 src/pkg/compress/bzip2/move_to_front.go create mode 100644 src/pkg/compress/lzw/Makefile create mode 100644 src/pkg/compress/lzw/reader.go create mode 100644 src/pkg/compress/lzw/reader_test.go create mode 100644 src/pkg/compress/lzw/writer.go create mode 100644 src/pkg/compress/lzw/writer_test.go create mode 100644 src/pkg/compress/testdata/e.txt create mode 100644 src/pkg/compress/testdata/pi.txt delete mode 100644 src/pkg/compress/zlib/testdata/e.txt delete mode 100644 src/pkg/compress/zlib/testdata/pi.txt create mode 100644 src/pkg/crypto/openpgp/Makefile create mode 100644 src/pkg/crypto/openpgp/canonical_text.go create mode 100644 src/pkg/crypto/openpgp/canonical_text_test.go create mode 100644 src/pkg/crypto/openpgp/keys.go create mode 100644 src/pkg/crypto/openpgp/read.go create mode 100644 src/pkg/crypto/openpgp/read_test.go create mode 100644 src/pkg/crypto/openpgp/write.go create mode 100644 src/pkg/crypto/openpgp/write_test.go create mode 100644 src/pkg/gob/gobencdec_test.go create mode 100644 src/pkg/http/cgi/Makefile create mode 100644 src/pkg/http/cgi/cgi.go create mode 100644 src/pkg/http/cgi/cgi_test.go create mode 100755 src/pkg/http/cgi/testdata/test.cgi create mode 100644 src/pkg/http/cookie.go create mode 100644 src/pkg/http/cookie_test.go create mode 100644 src/pkg/http/header.go create mode 100644 src/pkg/http/httptest/Makefile create mode 100644 src/pkg/http/httptest/recorder.go create mode 100644 src/pkg/http/httptest/server.go create mode 100644 src/pkg/http/proxy_test.go create mode 100644 src/pkg/http/range_test.go create mode 100644 src/pkg/http/transport.go create mode 100644 src/pkg/image/decode_test.go create mode 100644 src/pkg/image/png/testdata/pngsuite/basn0g01-30.png create mode 100644 src/pkg/image/png/testdata/pngsuite/basn0g01-30.sng create mode 100644 src/pkg/image/png/testdata/pngsuite/basn0g02-29.png create mode 100644 src/pkg/image/png/testdata/pngsuite/basn0g02-29.sng create mode 100644 src/pkg/image/png/testdata/pngsuite/basn0g04-31.png create mode 100644 src/pkg/image/png/testdata/pngsuite/basn0g04-31.sng create mode 100644 src/pkg/image/testdata/video-001.bmp create mode 100644 src/pkg/image/testdata/video-001.gif create mode 100644 src/pkg/image/testdata/video-001.jpeg create mode 100644 src/pkg/image/testdata/video-001.png create mode 100644 src/pkg/image/testdata/video-001.tiff create mode 100644 src/pkg/net/multicast_test.go create mode 100644 src/pkg/net/textproto/header.go create mode 100644 src/pkg/path/filepath/Makefile create mode 100644 src/pkg/path/filepath/match.go create mode 100644 src/pkg/path/filepath/match_test.go create mode 100644 src/pkg/path/filepath/path.go create mode 100644 src/pkg/path/filepath/path_test.go create mode 100644 src/pkg/path/filepath/path_unix.go delete mode 100644 src/pkg/path/path_unix.go delete mode 100644 src/pkg/path/path_windows.go delete mode 100644 src/pkg/runtime/arm/cas5.s delete mode 100644 src/pkg/runtime/arm/cas6.s create mode 100644 src/pkg/runtime/stack.h create mode 100644 src/pkg/runtime/windows/signals.h delete mode 100644 src/pkg/sync/asm_386.s delete mode 100644 src/pkg/sync/asm_amd64.s delete mode 100644 src/pkg/sync/asm_arm5.s delete mode 100644 src/pkg/sync/asm_arm6.s create mode 100644 src/pkg/sync/atomic/Makefile create mode 100644 src/pkg/sync/atomic/asm_386.s create mode 100644 src/pkg/sync/atomic/asm_amd64.s create mode 100644 src/pkg/sync/atomic/asm_arm.s create mode 100644 src/pkg/sync/atomic/asm_linux_arm.s create mode 100644 src/pkg/sync/atomic/atomic_test.go create mode 100644 src/pkg/sync/atomic/doc.go create mode 100644 src/pkg/sync/cond.go create mode 100644 src/pkg/sync/cond_test.go delete mode 100644 src/pkg/sync/xadd_test.go create mode 100644 src/pkg/time/sys.go (limited to 'src') diff --git a/src/Make.common b/src/Make.common index e3f415a1f..af6d04adc 100644 --- a/src/Make.common +++ b/src/Make.common @@ -6,7 +6,7 @@ clean: rm -rf *.o *.a *.[$(OS)] [$(OS)].out $(CLEANFILES) %.make: - (cd $* && gomake install) + $(MAKE) -C $* install .PHONY: all clean nuke install coverage test bench testpackage-clean\ importpath dir diff --git a/src/Make.pkg b/src/Make.pkg index ca0fa9ee2..3d616ca99 100644 --- a/src/Make.pkg +++ b/src/Make.pkg @@ -36,7 +36,7 @@ INSTALLFILES+=$(pkgdir)/$(TARG).a # The rest of the cgo rules are below, but these variable updates # must be done here so they apply to the main rules. ifdef CGOFILES -GOFILES+=$(patsubst %.go,%.cgo1.go,$(CGOFILES)) _cgo_gotypes.go +GOFILES+=$(patsubst %.go,_obj/%.cgo1.go,$(CGOFILES)) _obj/_cgo_gotypes.go CGO_OFILES+=$(patsubst %.go,%.cgo2.o,$(CGOFILES)) _cgo_export.o OFILES+=_cgo_defun.$O _cgo_import.$O $(CGO_OFILES) endif @@ -47,21 +47,19 @@ coverage: gotest 6cov -g $(shell pwd) $O.out | grep -v '_test\.go:' -CLEANFILES+=*.cgo1.go *.cgo2.c _cgo_defun.c _cgo_gotypes.go _cgo_export.* -CLEANFILES+=_cgo_.c _cgo_import.c _cgo_main.c _cgo_flags _cgo_run -CLEANFILES+=*.so _obj _test _testmain.go *.exe +CLEANFILES+=*.so _obj _test _testmain.go *.exe _cgo* *.cgo[12].* test: gotest bench: - gotest -benchmarks=. -match="Do not run tests" + gotest -test.bench=. -test.run="Do not run tests" nuke: clean rm -f $(pkgdir)/$(TARG).a testpackage-clean: - rm -f _test/$(TARG).a _gotest_.$O + rm -f _test/$(TARG).a install: $(INSTALLFILES) @@ -103,33 +101,34 @@ dir: # x.go and y.go. # Cgo translates each x.go file listed in $(CGOFILES) into a basic -# translation of x.go, called x.cgo1.go. Additionally, three other +# translation of x.go, called _obj/x.cgo1.go. Additionally, three other # files are created: # -# _cgo_gotypes.go - declarations needed for all .go files in the package; imports "unsafe" -# _cgo_defun.c - C trampoline code to be compiled with 6c and linked into the package -# x.cgo2.c - C implementations compiled with gcc to create a dynamic library +# _obj/_cgo_gotypes.go - declarations needed for all .go files in the package; imports "unsafe" +# _obj/_cgo_defun.c - C trampoline code to be compiled with 6c and linked into the package +# _obj/x.cgo2.c - C implementations compiled with gcc to create a dynamic library # ifdef CGOFILES -_cgo_run: $(CGOFILES) +_obj/_cgo_run: $(CGOFILES) + @mkdir -p _obj CGOPKGPATH=$(dir) cgo -- $(CGO_CFLAGS) $(CGOFILES) - touch _cgo_run + touch _obj/_cgo_run # _CGO_CFLAGS and _CGO_LDFLAGS are defined via the evaluation of _cgo_flags. # The include happens before the commands in the recipe run, # so it cannot be done in the same recipe that runs cgo. -_load_cgo_flags: _cgo_run - $(eval include _cgo_flags) +_obj/_load_cgo_flags: _obj/_cgo_run + $(eval include _obj/_cgo_flags) # Include any previous flags in case cgo files are up to date. --include _cgo_flags +-include _obj/_cgo_flags # Ugly but necessary - cgo writes these files too. -_cgo_gotypes.go _cgo_export.c _cgo_export.h _cgo_main.c _cgo_defun.c: _load_cgo_flags +_obj/_cgo_gotypes.go _obj/_cgo_export.c _obj/_cgo_export.h _obj/_cgo_main.c _obj/_cgo_defun.c: _obj/_load_cgo_flags @true -%.cgo1.go %.cgo2.c: _cgo_defun.c +_obj/%.cgo1.go _obj/%.cgo2.c: _obj/_cgo_defun.c @true endif @@ -137,6 +136,9 @@ endif %.o: %.c $(HOST_CC) $(_CGO_CFLAGS_$(GOARCH)) -g -fPIC -O2 -o $@ -c $(CGO_CFLAGS) $(_CGO_CFLAGS) $*.c +%.o: _obj/%.c + $(HOST_CC) $(_CGO_CFLAGS_$(GOARCH)) -I . -g -fPIC -O2 -o $@ -c $(CGO_CFLAGS) $(_CGO_CFLAGS) $^ + # To find out which symbols are needed from external libraries # and which libraries are needed, we build a simple a.out that # links all the objects we just created and then use cgo -dynimport @@ -145,14 +147,12 @@ endif # After main we have to define all the symbols that will be provided # by Go code. That's crosscall2 and any exported symbols. -_cgo_main.o: _cgo_main.c - $(HOST_CC) $(_CGO_CFLAGS_$(GOARCH)) -g -fPIC -O2 -o $@ -c $(CGO_CFLAGS) $(_CGO_CFLAGS) _cgo_main.c - _cgo1_.o: _cgo_main.o $(CGO_OFILES) $(HOST_CC) $(_CGO_CFLAGS_$(GOARCH)) -g -fPIC -O2 -o $@ $^ $(CGO_LDFLAGS) $(_CGO_LDFLAGS) -_cgo_import.c: _cgo1_.o - cgo -dynimport _cgo1_.o >_$@ && mv -f _$@ $@ +_obj/_cgo_import.c: _cgo1_.o + @mkdir -p _obj + cgo -dynimport _cgo1_.o >$@_ && mv -f $@_ $@ # The rules above added x.cgo1.go and _cgo_gotypes.go to $(GOFILES), # added _cgo_defun.$O to $OFILES, and added the installed copy of @@ -170,17 +170,17 @@ _CGO_LDFLAGS_windows=-shared -lm -mthreads RUNTIME_CFLAGS=-I$(pkgdir) # Compile _cgo_defun.c with 6c; needs access to the runtime headers. -_cgo_defun.$O: _cgo_defun.c - $(CC) $(CFLAGS) $(RUNTIME_CFLAGS) _cgo_defun.c +_cgo_defun.$O: _obj/_cgo_defun.c + $(CC) $(CFLAGS) $(RUNTIME_CFLAGS) -I . -o "$@" _obj/_cgo_defun.c # Generic build rules. # These come last so that the rules above can override them # for more specific file names. -%.$O: %.c - $(CC) $(CFLAGS) $*.c +%.$O: %.c $(HFILES) + $(CC) $(CFLAGS) -o "$@" $*.c + +%.$O: _obj/%.c $(HFILES) + $(CC) $(CFLAGS) -I . -o "$@" _obj/$*.c %.$O: %.s $(AS) $*.s - -%.$O: $(HFILES) - diff --git a/src/clean.bash b/src/clean.bash index 5c1dded56..7969e2cd0 100755 --- a/src/clean.bash +++ b/src/clean.bash @@ -22,11 +22,6 @@ rm -f "$GOROOT"/lib/*.a for i in lib9 libbio libmach cmd pkg \ ../misc/cgo/gmp ../misc/cgo/stdio \ ../test/bench ../test/garbage -do( - cd "$GOROOT"/src/$i || exit 1 - if test -f clean.bash; then - bash clean.bash --gomake $MAKE - else - $MAKE clean - fi -)done +do + gomake -C "$GOROOT/src/$i" clean +done diff --git a/src/cmd/5a/lex.c b/src/cmd/5a/lex.c index e762f5646..dbee3657f 100644 --- a/src/cmd/5a/lex.c +++ b/src/cmd/5a/lex.c @@ -364,7 +364,9 @@ struct "MOVWF", LTYPE3, AMOVWF, "LDREX", LTYPE3, ALDREX, + "LDREXD", LTYPE3, ALDREXD, "STREX", LTYPE9, ASTREX, + "STREXD", LTYPE9, ASTREXD, /* "ABSF", LTYPEI, AABSF, diff --git a/src/cmd/5c/txt.c b/src/cmd/5c/txt.c index 0f17cea89..f5619f800 100644 --- a/src/cmd/5c/txt.c +++ b/src/cmd/5c/txt.c @@ -1194,8 +1194,10 @@ gpseudo(int a, Sym *s, Node *n) p->from.type = D_OREG; p->from.sym = s; p->from.name = D_EXTERN; - if(a == ATEXT) + if(a == ATEXT) { p->reg = textflag; + textflag = 0; + } if(s->class == CSTATIC) p->from.name = D_STATIC; naddr(n, &p->to); diff --git a/src/cmd/5g/gg.h b/src/cmd/5g/gg.h index 603c09fc8..ce4575be9 100644 --- a/src/cmd/5g/gg.h +++ b/src/cmd/5g/gg.h @@ -27,6 +27,7 @@ struct Addr uchar type; char name; char reg; + char pun; uchar etype; }; #define A ((Addr*)0) diff --git a/src/cmd/5g/gsubr.c b/src/cmd/5g/gsubr.c index 133a21b3e..83a9949d6 100644 --- a/src/cmd/5g/gsubr.c +++ b/src/cmd/5g/gsubr.c @@ -1168,6 +1168,7 @@ naddr(Node *n, Addr *a, int canemitcode) a->etype = simtype[n->type->etype]; a->width = n->type->width; } + a->pun = n->pun; a->offset = n->xoffset; a->sym = n->sym; if(a->sym == S) diff --git a/src/cmd/5g/reg.c b/src/cmd/5g/reg.c index f31f70535..1cbeb3e3d 100644 --- a/src/cmd/5g/reg.c +++ b/src/cmd/5g/reg.c @@ -697,8 +697,8 @@ mkvar(Reg *r, Adr *a) n = D_NONE; flag = 0; -// if(a->pun) -// flag = 1; + if(a->pun) + flag = 1; switch(t) { default: diff --git a/src/cmd/5l/5.out.h b/src/cmd/5l/5.out.h index a25c0f71d..002b46d45 100644 --- a/src/cmd/5l/5.out.h +++ b/src/cmd/5l/5.out.h @@ -179,6 +179,9 @@ enum as ALDREX, ASTREX, + + ALDREXD, + ASTREXD, ALAST, }; diff --git a/src/cmd/5l/asm.c b/src/cmd/5l/asm.c index 7163997c0..af6d1dfda 100644 --- a/src/cmd/5l/asm.c +++ b/src/cmd/5l/asm.c @@ -331,21 +331,21 @@ asmb(void) Bprint(&bso, "%5.2f sym\n", cputime()); Bflush(&bso); switch(HEADTYPE) { - case 0: - case 1: - case 4: - case 5: + case Hnoheader: + case Hrisc: + case Hixp1200: + case Hipaq: debug['s'] = 1; break; - case 2: + case Hplan9x32: OFFSET = HEADR+textsize+segdata.filelen; seek(cout, OFFSET, 0); break; - case 3: + case Hnetbsd: OFFSET += rnd(segdata.filelen, 4096); seek(cout, OFFSET, 0); break; - case 6: + case Hlinux: OFFSET += segdata.filelen; seek(cout, rnd(OFFSET, INITRND), 0); break; @@ -362,9 +362,9 @@ asmb(void) OFFSET = 0; seek(cout, OFFSET, 0); switch(HEADTYPE) { - case 0: /* no header */ + case Hnoheader: /* no header */ break; - case 1: /* aif for risc os */ + case Hrisc: /* aif for risc os */ lputl(0xe1a00000); /* NOP - decompress code */ lputl(0xe1a00000); /* NOP - relocation code */ lputl(0xeb000000 + 12); /* BL - zero init code */ @@ -394,7 +394,7 @@ asmb(void) lputl(0xe1a00000); /* NOP - zero init code */ lputl(0xe1a0f00e); /* B (R14) - zero init return */ break; - case 2: /* plan 9 */ + case Hplan9x32: /* plan 9 */ lput(0x647); /* magic */ lput(textsize); /* sizes */ lput(segdata.filelen); @@ -404,7 +404,7 @@ asmb(void) lput(0L); lput(lcsize); break; - case 3: /* boot for NetBSD */ + case Hnetbsd: /* boot for NetBSD */ lput((143<<16)|0413); /* magic */ lputl(rnd(HEADR+textsize, 4096)); lputl(rnd(segdata.filelen, 4096)); @@ -414,15 +414,15 @@ asmb(void) lputl(0L); lputl(0L); break; - case 4: /* boot for IXP1200 */ + case Hixp1200: /* boot for IXP1200 */ break; - case 5: /* boot for ipaq */ + case Hipaq: /* boot for ipaq */ lputl(0xe3300000); /* nop */ lputl(0xe3300000); /* nop */ lputl(0xe3300000); /* nop */ lputl(0xe3300000); /* nop */ break; - case 6: + case Hlinux: /* elf arm */ eh = getElfEhdr(); fo = HEADR; @@ -1463,7 +1463,7 @@ if(debug['G']) print("%ux: %s: arm %d %d %d\n", (uint32)(p->pc), p->from.sym->na aclass(&p->from); if(instoffset != 0) diag("offset must be zero in STREX"); - o1 = (0x3<<23) | (0xf9<<4); + o1 = (0x18<<20) | (0xf90); o1 |= p->from.reg << 16; o1 |= p->reg << 0; o1 |= p->to.reg << 12; @@ -1553,6 +1553,25 @@ if(debug['G']) print("%ux: %s: arm %d %d %d\n", (uint32)(p->pc), p->from.sym->na o1 = oprrr(ACMP+AEND, p->scond); o1 |= p->from.reg<<16; break; + case 91: /* ldrexd oreg,reg */ + aclass(&p->from); + if(instoffset != 0) + diag("offset must be zero in LDREX"); + o1 = (0x1b<<20) | (0xf9f); + o1 |= p->from.reg << 16; + o1 |= p->to.reg << 12; + o1 |= (p->scond & C_SCOND) << 28; + break; + case 92: /* strexd reg,oreg,reg */ + aclass(&p->from); + if(instoffset != 0) + diag("offset must be zero in STREX"); + o1 = (0x1a<<20) | (0xf90); + o1 |= p->from.reg << 16; + o1 |= p->reg << 0; + o1 |= p->to.reg << 12; + o1 |= (p->scond & C_SCOND) << 28; + break; } out[0] = o1; diff --git a/src/cmd/5l/doc.go b/src/cmd/5l/doc.go index d266b9233..aa7ccebfc 100644 --- a/src/cmd/5l/doc.go +++ b/src/cmd/5l/doc.go @@ -23,6 +23,8 @@ Options new in this version: -F Force use of software floating point. Also implied by setting GOARM=5 in the environment. +-Hlinux + Write Linux ELF binaries (default when $GOOS is linux) -I interpreter Set the ELF dynamic linker to use. -L dir1 -L dir2 diff --git a/src/cmd/5l/l.h b/src/cmd/5l/l.h index e42be4e98..2e887dad7 100644 --- a/src/cmd/5l/l.h +++ b/src/cmd/5l/l.h @@ -35,6 +35,7 @@ enum { + thechar = '5', PtrSize = 4 }; @@ -109,6 +110,7 @@ struct Prog Prog* dlink; int32 pc; int32 line; + int32 spadj; uchar mark; uchar optab; uchar as; @@ -122,6 +124,8 @@ struct Prog #define datasize reg #define textflag reg +#define iscall(p) ((p)->as == ABL) + struct Sym { char* name; @@ -131,6 +135,7 @@ struct Sym uchar reachable; uchar dynexport; uchar leaf; + uchar stkcheck; int32 dynid; int32 plt; int32 got; diff --git a/src/cmd/5l/list.c b/src/cmd/5l/list.c index b4df89587..2ae25d491 100644 --- a/src/cmd/5l/list.c +++ b/src/cmd/5l/list.c @@ -98,6 +98,10 @@ Pconv(Fmt *fp) fmtprint(fp, "(%d) DWORD %D %D", p->line, &p->from, &p->to); break; } + + if(p->spadj) + fmtprint(fp, " (spadj%+d)", p->spadj); + return 0; } diff --git a/src/cmd/5l/noop.c b/src/cmd/5l/noop.c index a9439c27a..a5e66f038 100644 --- a/src/cmd/5l/noop.c +++ b/src/cmd/5l/noop.c @@ -227,7 +227,7 @@ noops(void) #ifdef CALLEEBX if(p->from.sym->foreign){ if(thumb) - // don't allow literal pool to seperate these + // don't allow literal pool to separate these p = adword(0xe28f7001, 0xe12fff17, p); // arm add 1, pc, r7 and bx r7 // p = aword(0xe12fff17, aword(0xe28f7001, p)); // arm add 1, pc, r7 and bx r7 else @@ -282,6 +282,7 @@ noops(void) q1->to.type = D_OREG; q1->to.offset = -autosize; q1->to.reg = REGSP; + q1->spadj = autosize; q1->link = p->link; p->link = q1; } else if (autosize < StackBig) { @@ -376,6 +377,7 @@ noops(void) p->to.type = D_OREG; p->to.offset = -autosize; p->to.reg = REGSP; + p->spadj = autosize; } else { // > StackBig // MOVW $autosize, R1 // MOVW $args, R2 @@ -424,6 +426,7 @@ noops(void) p->to.type = D_OREG; p->to.offset = -autosize; p->to.reg = REGSP; + p->spadj = autosize; } break; @@ -527,9 +530,20 @@ noops(void) p->from.reg = REGSP; p->to.type = D_REG; p->to.reg = REGPC; + // no spadj because it doesn't fall through } break; + case AADD: + if(p->from.type == D_CONST && p->from.reg == NREG && p->to.type == D_REG && p->to.reg == REGSP) + p->spadj = -p->from.offset; + break; + + case ASUB: + if(p->from.type == D_CONST && p->from.reg == NREG && p->to.type == D_REG && p->to.reg == REGSP) + p->spadj = p->from.offset; + break; + case ADIV: case ADIVU: case AMOD: @@ -635,6 +649,7 @@ noops(void) p->reg = NREG; p->to.type = D_REG; p->to.reg = REGSP; + p->spadj = -8; /* SUB $8,SP */ q1->as = ASUB; @@ -644,6 +659,7 @@ noops(void) q1->reg = NREG; q1->to.type = D_REG; q1->to.reg = REGSP; + q1->spadj = 8; break; case AMOVW: @@ -653,6 +669,12 @@ noops(void) if(a->type == D_CONST && ((a->name == D_NONE && a->reg == REGSP) || a->name == D_AUTO || a->name == D_PARAM) && (a->offset & 3)) diag("SP offset not multiple of 4"); } + if((p->scond & C_WBIT) && p->to.type == D_OREG && p->to.reg == REGSP) + p->spadj = -p->to.offset; + if((p->scond & C_PBIT) && p->from.type == D_OREG && p->from.reg == REGSP && p->to.reg != REGPC) + p->spadj = -p->from.offset; + if(p->from.type == D_CONST && p->from.reg == REGSP && p->to.type == D_REG && p->to.reg == REGSP) + p->spadj = -p->from.offset; break; case AMOVB: case AMOVBU: diff --git a/src/cmd/5l/obj.c b/src/cmd/5l/obj.c index 5b778d777..f252f9fc5 100644 --- a/src/cmd/5l/obj.c +++ b/src/cmd/5l/obj.c @@ -41,15 +41,26 @@ #endif char *noname = ""; -char thechar = '5'; char *thestring = "arm"; +Header headers[] = { + "noheader", Hnoheader, + "risc", Hrisc, + "plan9", Hplan9x32, + "netbsd", Hnetbsd, + "ixp1200", Hixp1200, + "ipaq", Hipaq, + "linux", Hlinux, + 0, 0 +}; + /* - * -H1 -T0x10005000 -R4 is aif for risc os - * -H2 -T4128 -R4096 is plan9 format - * -H3 -T0xF0000020 -R4 is NetBSD format - * -H4 is IXP1200 (raw) - * -H5 -T0xC0008010 -R1024 is ipaq + * -Hrisc -T0x10005000 -R4 is aif for risc os + * -Hplan9 -T4128 -R4096 is plan9 format + * -Hnetbsd -T0xF0000020 -R4 is NetBSD format + * -Hixp1200 is IXP1200 (raw) + * -Hipaq -T0xC0008010 -R1024 is ipaq + * -Hlinux -Tx -Rx is linux elf */ static char* @@ -119,7 +130,7 @@ main(int argc, char *argv[]) rpath = EARGF(usage()); break; case 'H': - HEADTYPE = atolwhex(EARGF(usage())); + HEADTYPE = headtype(EARGF(usage())); /* do something about setting INITTEXT */ break; case 'V': @@ -133,25 +144,23 @@ main(int argc, char *argv[]) usage(); libinit(); - if(rpath == nil) - rpath = smprint("%s/pkg/%s_%s", goroot, goos, goarch); if(!debug['9'] && !debug['U'] && !debug['B']) debug[DEFAULT] = 1; if(HEADTYPE == -1) { if(debug['U']) - HEADTYPE = 0; + HEADTYPE = Hnoheader; if(debug['B']) - HEADTYPE = 1; + HEADTYPE = Hrisc; if(debug['9']) - HEADTYPE = 2; - HEADTYPE = 6; + HEADTYPE = Hplan9x32; + HEADTYPE = Hlinux; } switch(HEADTYPE) { default: diag("unknown -H option"); errorexit(); - case 0: /* no header */ + case Hnoheader: /* no header */ HEADR = 0L; if(INITTEXT == -1) INITTEXT = 0; @@ -160,7 +169,7 @@ main(int argc, char *argv[]) if(INITRND == -1) INITRND = 4; break; - case 1: /* aif for risc os */ + case Hrisc: /* aif for risc os */ HEADR = 128L; if(INITTEXT == -1) INITTEXT = 0x10005000 + HEADR; @@ -169,7 +178,7 @@ main(int argc, char *argv[]) if(INITRND == -1) INITRND = 4; break; - case 2: /* plan 9 */ + case Hplan9x32: /* plan 9 */ HEADR = 32L; if(INITTEXT == -1) INITTEXT = 4128; @@ -178,7 +187,7 @@ main(int argc, char *argv[]) if(INITRND == -1) INITRND = 4096; break; - case 3: /* boot for NetBSD */ + case Hnetbsd: /* boot for NetBSD */ HEADR = 32L; if(INITTEXT == -1) INITTEXT = 0xF0000020L; @@ -187,7 +196,7 @@ main(int argc, char *argv[]) if(INITRND == -1) INITRND = 4096; break; - case 4: /* boot for IXP1200 */ + case Hixp1200: /* boot for IXP1200 */ HEADR = 0L; if(INITTEXT == -1) INITTEXT = 0x0; @@ -196,7 +205,7 @@ main(int argc, char *argv[]) if(INITRND == -1) INITRND = 4; break; - case 5: /* boot for ipaq */ + case Hipaq: /* boot for ipaq */ HEADR = 16L; if(INITTEXT == -1) INITTEXT = 0xC0008010; @@ -205,7 +214,7 @@ main(int argc, char *argv[]) if(INITRND == -1) INITRND = 1024; break; - case 6: /* arm elf */ + case Hlinux: /* arm elf */ debug['d'] = 1; // no dynamic linking elfinit(); HEADR = ELFRESERVE; @@ -265,11 +274,13 @@ main(int argc, char *argv[]) follow(); softfloat(); noops(); + dostkcheck(); span(); pclntab(); symtab(); dodata(); address(); + doweak(); reloc(); asmb(); undef(); diff --git a/src/cmd/5l/optab.c b/src/cmd/5l/optab.c index 8b3135e06..625b66812 100644 --- a/src/cmd/5l/optab.c +++ b/src/cmd/5l/optab.c @@ -253,5 +253,8 @@ Optab optab[] = { ATST, C_REG, C_NONE, C_NONE, 90, 4, 0 }, + { ALDREXD, C_SOREG,C_NONE, C_REG, 91, 4, 0 }, + { ASTREXD, C_SOREG,C_REG, C_REG, 92, 4, 0 }, + { AXXX, C_NONE, C_NONE, C_NONE, 0, 4, 0 }, }; diff --git a/src/cmd/5l/pass.c b/src/cmd/5l/pass.c index e16b34171..7e1ba6a09 100644 --- a/src/cmd/5l/pass.c +++ b/src/cmd/5l/pass.c @@ -35,18 +35,6 @@ static void xfol(Prog*, Prog**); -void -undef(void) -{ - int i; - Sym *s; - - for(i=0; ihash) - if(s->type == SXREF) - diag("%s: not defined", s->name); -} - Prog* brchain(Prog *p) { diff --git a/src/cmd/5l/span.c b/src/cmd/5l/span.c index 220140f43..482d3e90a 100644 --- a/src/cmd/5l/span.c +++ b/src/cmd/5l/span.c @@ -1054,6 +1054,8 @@ buildop(void) case ALDREX: case ASTREX: + case ALDREXD: + case ASTREXD: case ATST: break; } diff --git a/src/cmd/6l/asm.c b/src/cmd/6l/asm.c index d179e77b1..fb041d83a 100644 --- a/src/cmd/6l/asm.c +++ b/src/cmd/6l/asm.c @@ -262,7 +262,7 @@ adddynrel(Sym *s, Reloc *r) r->type = 256; // ignore during relocsym return; } - if(HEADTYPE == 6 && s->size == PtrSize && r->off == 0) { + if(HEADTYPE == Hdarwin && s->size == PtrSize && r->off == 0) { // Mach-O relocations are a royal pain to lay out. // They use a compact stateful bytecode representation // that is too much bother to deal with. @@ -365,7 +365,7 @@ addpltsym(Sym *s) adduint64(rela, 0); s->plt = plt->size - 16; - } else if(HEADTYPE == 6) { // Mach-O + } else if(HEADTYPE == Hdarwin) { // To do lazy symbol lookup right, we're supposed // to tell the dynamic loader which library each // symbol comes from and format the link info @@ -412,7 +412,7 @@ addgotsym(Sym *s) addaddrplus(rela, got, s->got); adduint64(rela, ELF64_R_INFO(s->dynid, R_X86_64_GLOB_DAT)); adduint64(rela, 0); - } else if(HEADTYPE == 6) { // Mach-O + } else if(HEADTYPE == Hdarwin) { adduint32(lookup(".linkedit.got", 0), s->dynid); } else { diag("addgotsym: unsupported binary format"); @@ -486,7 +486,7 @@ adddynsym(Sym *s) elfwritedynent(lookup(".dynamic", 0), DT_NEEDED, addstring(lookup(".dynstr", 0), s->dynimplib)); } - } else if(HEADTYPE == 6) { + } else if(HEADTYPE == Hdarwin) { // Mach-o symbol nlist64 d = lookup(".dynsym", 0); name = s->dynimpname; @@ -539,7 +539,7 @@ adddynlib(char *lib) if(s->size == 0) addstring(s, ""); elfwritedynent(lookup(".dynamic", 0), DT_NEEDED, addstring(s, lib)); - } else if(HEADTYPE == 6) { // Mach-O + } else if(HEADTYPE == Hdarwin) { machoadddynlib(lib); } else { diag("adddynlib: unsupported binary format"); @@ -551,7 +551,7 @@ doelf(void) { Sym *s, *shstrtab, *dynstr; - if(HEADTYPE != 7 && HEADTYPE != 9) + if(HEADTYPE != Hlinux && HEADTYPE != Hfreebsd) return; /* predefine strings we need for section headers */ @@ -717,20 +717,20 @@ asmb(void) datblk(segdata.vaddr, segdata.filelen); machlink = 0; - if(HEADTYPE == 6) + if(HEADTYPE == Hdarwin) machlink = domacholink(); switch(HEADTYPE) { default: diag("unknown header type %d", HEADTYPE); - case 2: - case 5: + case Hplan9x32: + case Helf: break; - case 6: + case Hdarwin: debug['8'] = 1; /* 64-bit addresses */ break; - case 7: - case 9: + case Hlinux: + case Hfreebsd: debug['8'] = 1; /* 64-bit addresses */ /* index of elf text section; needed by asmelfsym, double-checked below */ /* !debug['d'] causes extra sections before the .text section */ @@ -738,7 +738,7 @@ asmb(void) if(!debug['d']) elftextsh += 10; break; - case 10: + case Hwindows: break; } @@ -752,20 +752,20 @@ asmb(void) Bflush(&bso); switch(HEADTYPE) { default: - case 2: - case 5: + case Hplan9x32: + case Helf: debug['s'] = 1; symo = HEADR+segtext.len+segdata.filelen; break; - case 6: + case Hdarwin: symo = rnd(HEADR+segtext.len, INITRND)+rnd(segdata.filelen, INITRND)+machlink; break; - case 7: - case 9: + case Hlinux: + case Hfreebsd: symo = rnd(HEADR+segtext.len, INITRND)+segdata.filelen; symo = rnd(symo, INITRND); break; - case 10: + case Hwindows: symo = rnd(HEADR+segtext.filelen, PEFILEALIGN)+segdata.filelen; symo = rnd(symo, PEFILEALIGN); break; @@ -791,7 +791,7 @@ asmb(void) lputl(symsize); lputl(lcsize); cflush(); - if(HEADTYPE != 10 && !debug['s']) { + if(HEADTYPE != Hwindows && !debug['s']) { elfsymo = symo+8+symsize+lcsize; seek(cout, elfsymo, 0); asmelfsym64(); @@ -813,7 +813,7 @@ asmb(void) seek(cout, 0L, 0); switch(HEADTYPE) { default: - case 2: /* plan9 */ + case Hplan9x32: /* plan9 */ magic = 4*26*26+7; magic |= 0x00008000; /* fat header */ lputb(magic); /* magic */ @@ -827,7 +827,7 @@ asmb(void) lputb(lcsize); /* line offsets */ vputb(vl); /* va of entry */ break; - case 3: /* plan9 */ + case Hplan9x64: /* plan9 */ magic = 4*26*26+7; lputb(magic); /* magic */ lputb(segtext.filelen); /* sizes */ @@ -838,11 +838,11 @@ asmb(void) lputb(spsize); /* sp offsets */ lputb(lcsize); /* line offsets */ break; - case 6: + case Hdarwin: asmbmacho(); break; - case 7: - case 9: + case Hlinux: + case Hfreebsd: /* elf amd-64 */ eh = getElfEhdr(); @@ -871,10 +871,10 @@ asmb(void) sh->addralign = 1; if(interpreter == nil) { switch(HEADTYPE) { - case 7: + case Hlinux: interpreter = linuxdynld; break; - case 9: + case Hfreebsd: interpreter = freebsddynld; break; } @@ -1032,7 +1032,7 @@ asmb(void) eh->ident[EI_MAG1] = 'E'; eh->ident[EI_MAG2] = 'L'; eh->ident[EI_MAG3] = 'F'; - if(HEADTYPE == 9) + if(HEADTYPE == Hfreebsd) eh->ident[EI_OSABI] = 9; eh->ident[EI_CLASS] = ELFCLASS64; eh->ident[EI_DATA] = ELFDATA2LSB; @@ -1055,7 +1055,7 @@ asmb(void) if(a+elfwriteinterp() > ELFRESERVE) diag("ELFRESERVE too small: %d > %d", a, ELFRESERVE); break; - case 10: + case Hwindows: asmbpe(); break; } diff --git a/src/cmd/6l/doc.go b/src/cmd/6l/doc.go index 97fa2cc5a..cc7782cfe 100644 --- a/src/cmd/6l/doc.go +++ b/src/cmd/6l/doc.go @@ -28,10 +28,14 @@ Options new in this version: -e Emit an extra ELF-compatible symbol table useful with tools such as nm, gdb, and oprofile. This option makes the binary file considerably larger. --H6 +-Hdarwin Write Apple Mach-O binaries (default when $GOOS is darwin) --H7 +-Hlinux Write Linux ELF binaries (default when $GOOS is linux) +-Hfreebsd + Write FreeBSD ELF binaries (default when $GOOS is freebsd) +-Hwindows + Write Windows PE32+ binaries (default when $GOOS is windows) -I interpreter Set the ELF dynamic linker to use. -L dir1 -L dir2 diff --git a/src/cmd/6l/l.h b/src/cmd/6l/l.h index 70473ecd2..6933d8eb1 100644 --- a/src/cmd/6l/l.h +++ b/src/cmd/6l/l.h @@ -39,6 +39,7 @@ enum { + thechar = '6', PtrSize = 8 }; @@ -111,6 +112,7 @@ struct Prog }; #define datasize from.scale #define textflag from.scale +#define iscall(p) ((p)->as == ACALL) struct Auto { @@ -129,6 +131,7 @@ struct Sym uchar reachable; uchar dynexport; uchar special; + uchar stkcheck; int32 dynid; int32 sig; int32 plt; @@ -367,7 +370,6 @@ EXTERN Sym* fromgotype; // type symbol on last p->from read EXTERN vlong textstksiz; EXTERN vlong textarg; -extern char thechar; EXTERN int elfstrsize; EXTERN char* elfstrdat; EXTERN int elftextsh; diff --git a/src/cmd/6l/obj.c b/src/cmd/6l/obj.c index f9e257842..f113e3ec1 100644 --- a/src/cmd/6l/obj.c +++ b/src/cmd/6l/obj.c @@ -40,17 +40,29 @@ #include char *noname = ""; -char thechar = '6'; char* thestring = "amd64"; char* paramspace = "FP"; +Header headers[] = { + "plan9x32", Hplan9x32, + "plan9", Hplan9x64, + "elf", Helf, + "darwin", Hdarwin, + "linux", Hlinux, + "freebsd", Hfreebsd, + "windows", Hwindows, + "windowsgui", Hwindows, + 0, 0 +}; + /* - * -H2 -T4136 -R4096 is plan9 64-bit format - * -H3 -T4128 -R4096 is plan9 32-bit format - * -H5 -T0x80110000 -R4096 is ELF32 - * -H6 -Tx -Rx is apple MH-exec - * -H7 -Tx -Rx is linux elf-exec - * -H9 -Tx -Rx is FreeBSD elf-exec + * -Hplan9x32 -T4136 -R4096 is plan9 64-bit format + * -Hplan9 -T4128 -R4096 is plan9 32-bit format + * -Helf -T0x80110000 -R4096 is ELF32 + * -Hdarwin -Tx -Rx is apple MH-exec + * -Hlinux -Tx -Rx is linux elf-exec + * -Hfreebsd -Tx -Rx is FreeBSD elf-exec + * -Hwindows -Tx -Rx is MS Windows PE32+ * * options used: 189BLQSWabcjlnpsvz */ @@ -94,7 +106,7 @@ main(int argc, char *argv[]) INITENTRY = EARGF(usage()); break; case 'H': - HEADTYPE = atolwhex(EARGF(usage())); + HEADTYPE = headtype(EARGF(usage())); break; case 'I': interpreter = EARGF(usage()); @@ -123,31 +135,15 @@ main(int argc, char *argv[]) usage(); libinit(); - if(rpath == nil) - rpath = smprint("%s/pkg/%s_%s", goroot, goos, goarch); - if(HEADTYPE == -1) { - HEADTYPE = 2; - if(strcmp(goos, "linux") == 0) - HEADTYPE = 7; - else - if(strcmp(goos, "darwin") == 0) - HEADTYPE = 6; - else - if(strcmp(goos, "freebsd") == 0) - HEADTYPE = 9; - else - if(strcmp(goos, "windows") == 0) - HEADTYPE = 10; - else - print("goos is not known: %s\n", goos); - } + if(HEADTYPE == -1) + HEADTYPE = headtype(goos); switch(HEADTYPE) { default: diag("unknown -H option"); errorexit(); - case 2: /* plan 9 */ + case Hplan9x32: /* plan 9 */ HEADR = 32L+8L; if(INITTEXT == -1) INITTEXT = 4096+HEADR; @@ -156,7 +152,7 @@ main(int argc, char *argv[]) if(INITRND == -1) INITRND = 4096; break; - case 3: /* plan 9 */ + case Hplan9x64: /* plan 9 */ HEADR = 32L; if(INITTEXT == -1) INITTEXT = 4096+32; @@ -165,7 +161,7 @@ main(int argc, char *argv[]) if(INITRND == -1) INITRND = 4096; break; - case 5: /* elf32 executable */ + case Helf: /* elf32 executable */ HEADR = rnd(52L+3*32L, 16); if(INITTEXT == -1) INITTEXT = 0x80110000L; @@ -174,7 +170,7 @@ main(int argc, char *argv[]) if(INITRND == -1) INITRND = 4096; break; - case 6: /* apple MACH */ + case Hdarwin: /* apple MACH */ /* * OS X system constant - offset from 0(GS) to our TLS. * Explained in ../../libcgo/darwin_amd64.c. @@ -189,8 +185,8 @@ main(int argc, char *argv[]) if(INITDAT == -1) INITDAT = 0; break; - case 7: /* elf64 executable */ - case 9: /* freebsd */ + case Hlinux: /* elf64 executable */ + case Hfreebsd: /* freebsd */ /* * ELF uses TLS offset negative from FS. * Translate 0(FS) and 8(FS) into -16(FS) and -8(FS). @@ -207,7 +203,7 @@ main(int argc, char *argv[]) if(INITRND == -1) INITRND = 4096; break; - case 10: /* PE executable */ + case Hwindows: /* PE executable */ peinit(); HEADR = PEFILEHEADR; if(INITTEXT == -1) @@ -252,9 +248,10 @@ main(int argc, char *argv[]) patch(); follow(); doelf(); - if(HEADTYPE == 6) + if(HEADTYPE == Hdarwin) domacho(); dostkoff(); + dostkcheck(); paramspace = "SP"; /* (FP) now (SP) on output */ if(debug['p']) if(debug['1']) @@ -262,7 +259,7 @@ main(int argc, char *argv[]) else doprof2(); span(); - if(HEADTYPE == 10) + if(HEADTYPE == Hwindows) dope(); addexport(); textaddress(); @@ -270,6 +267,7 @@ main(int argc, char *argv[]) symtab(); dodata(); address(); + doweak(); reloc(); asmb(); undef(); diff --git a/src/cmd/6l/pass.c b/src/cmd/6l/pass.c index 5eb221a35..8fda94392 100644 --- a/src/cmd/6l/pass.c +++ b/src/cmd/6l/pass.c @@ -32,16 +32,10 @@ #include "l.h" #include "../ld/lib.h" +#include "../../pkg/runtime/stack.h" static void xfol(Prog*, Prog**); -// see ../../runtime/proc.c:/StackGuard -enum -{ - StackSmall = 128, - StackBig = 4096, -}; - Prog* brchain(Prog *p) { @@ -277,7 +271,7 @@ patch(void) vexit = s->value; for(cursym = textp; cursym != nil; cursym = cursym->next) for(p = cursym->text; p != P; p = p->link) { - if(HEADTYPE == 10) { + if(HEADTYPE == Hwindows) { // Windows // Convert // op n(GS), reg @@ -289,7 +283,7 @@ patch(void) // a different method is used to access them. if(p->from.type == D_INDIR+D_GS && p->to.type >= D_AX && p->to.type <= D_DI - && p->from.offset != 0x58) { + && p->from.offset <= 8) { q = appendp(p); q->from = p->from; q->from.type = D_INDIR + p->to.type; @@ -300,7 +294,7 @@ patch(void) p->from.offset = 0x58; } } - if(HEADTYPE == 7 || HEADTYPE == 9) { + if(HEADTYPE == Hlinux || HEADTYPE == Hfreebsd) { // ELF uses FS instead of GS. if(p->from.type == D_INDIR+D_GS) p->from.type = D_INDIR+D_FS; @@ -428,13 +422,13 @@ dostkoff(void) if(!(p->from.scale & NOSPLIT)) { p = appendp(p); // load g into CX p->as = AMOVQ; - if(HEADTYPE == 7 || HEADTYPE == 9) // ELF uses FS + if(HEADTYPE == Hlinux || HEADTYPE == Hfreebsd) // ELF uses FS p->from.type = D_INDIR+D_FS; else p->from.type = D_INDIR+D_GS; p->from.offset = tlsoffset+0; p->to.type = D_CX; - if(HEADTYPE == 10) { // Windows + if(HEADTYPE == Hwindows) { // movq %gs:0x58, %rcx // movq (%rcx), %rcx p->as = AMOVQ; @@ -724,15 +718,3 @@ atolwhex(char *s) n = -n; return n; } - -void -undef(void) -{ - int i; - Sym *s; - - for(i=0; ihash) - if(s->type == SXREF) - diag("%s: not defined", s->name); -} diff --git a/src/cmd/8a/lex.c b/src/cmd/8a/lex.c index d5fa959aa..ca18b69ce 100644 --- a/src/cmd/8a/lex.c +++ b/src/cmd/8a/lex.c @@ -332,6 +332,7 @@ struct "CMPSB", LTYPE0, ACMPSB, "CMPSL", LTYPE0, ACMPSL, "CMPSW", LTYPE0, ACMPSW, + "CMPXCHG8B", LTYPE1, ACMPXCHG8B, "CMPXCHGB", LTYPE3, ACMPXCHGB, "CMPXCHGL", LTYPE3, ACMPXCHGL, "CMPXCHGW", LTYPE3, ACMPXCHGW, @@ -546,6 +547,9 @@ struct "VERW", LTYPE2, AVERW, "WAIT", LTYPE0, AWAIT, "WORD", LTYPE2, AWORD, + "XADDB", LTYPE3, AXADDB, + "XADDL", LTYPE3, AXADDL, + "XADDW", LTYPE3, AXADDW, "XCHGB", LTYPE3, AXCHGB, "XCHGL", LTYPE3, AXCHGL, "XCHGW", LTYPE3, AXCHGW, diff --git a/src/cmd/8l/8.out.h b/src/cmd/8l/8.out.h index 0866f05f0..03db0016b 100644 --- a/src/cmd/8l/8.out.h +++ b/src/cmd/8l/8.out.h @@ -392,6 +392,11 @@ enum as ACMPXCHGB, ACMPXCHGL, ACMPXCHGW, + ACMPXCHG8B, + + AXADDB, + AXADDL, + AXADDW, /* conditional move */ ACMOVLCC, diff --git a/src/cmd/8l/asm.c b/src/cmd/8l/asm.c index d90eab7e7..1e760d89e 100644 --- a/src/cmd/8l/asm.c +++ b/src/cmd/8l/asm.c @@ -246,7 +246,7 @@ adddynrel(Sym *s, Reloc *r) r->sym = S; return; } - if(HEADTYPE == 6 && s->size == PtrSize && r->off == 0) { + if(HEADTYPE == Hdarwin && s->size == PtrSize && r->off == 0) { // Mach-O relocations are a royal pain to lay out. // They use a compact stateful bytecode representation // that is too much bother to deal with. @@ -356,7 +356,7 @@ addpltsym(Sym *s) adduint32(rel, ELF32_R_INFO(s->dynid, R_386_JMP_SLOT)); s->plt = plt->size - 16; - } else if(HEADTYPE == 6) { // Mach-O + } else if(HEADTYPE == Hdarwin) { // Same laziness as in 6l. Sym *plt; @@ -395,7 +395,7 @@ addgotsym(Sym *s) rel = lookup(".rel", 0); addaddrplus(rel, got, s->got); adduint32(rel, ELF32_R_INFO(s->dynid, R_386_GLOB_DAT)); - } else if(HEADTYPE == 6) { // Mach-O + } else if(HEADTYPE == Hdarwin) { adduint32(lookup(".linkedit.got", 0), s->dynid); } else { diag("addgotsym: unsupported binary format"); @@ -465,7 +465,7 @@ adddynsym(Sym *s) } adduint16(d, t); } - } else if(HEADTYPE == 6) { + } else if(HEADTYPE == Hdarwin) { // Mach-O symbol nlist32 d = lookup(".dynsym", 0); name = s->dynimpname; @@ -481,7 +481,7 @@ adddynsym(Sym *s) adduint8(d, 0); // section adduint16(d, 0); // desc adduint32(d, 0); // value - } else if(HEADTYPE != 10) { + } else if(HEADTYPE != Hwindows) { diag("adddynsym: unsupported binary format"); } } @@ -499,9 +499,9 @@ adddynlib(char *lib) if(s->size == 0) addstring(s, ""); elfwritedynent(lookup(".dynamic", 0), DT_NEEDED, addstring(s, lib)); - } else if(HEADTYPE == 6) { // Mach-O + } else if(HEADTYPE == Hdarwin) { machoadddynlib(lib); - } else if(HEADTYPE != 10) { + } else if(HEADTYPE != Hwindows) { diag("adddynlib: unsupported binary format"); } } @@ -673,7 +673,7 @@ asmb(void) datblk(segdata.vaddr, segdata.filelen); machlink = 0; - if(HEADTYPE == 6) + if(HEADTYPE == Hdarwin) machlink = domacholink(); if(iself) { @@ -697,28 +697,28 @@ asmb(void) default: if(iself) goto Elfsym; - case 0: + case Hgarbunix: seek(cout, rnd(HEADR+segtext.filelen, 8192)+segdata.filelen, 0); break; - case 1: + case Hunixcoff: seek(cout, rnd(HEADR+segtext.filelen, INITRND)+segdata.filelen, 0); break; - case 2: + case Hplan9x32: symo = HEADR+segtext.filelen+segdata.filelen; break; - case 3: - case 4: + case Hmsdoscom: + case Hmsdosexe: debug['s'] = 1; symo = HEADR+segtext.filelen+segdata.filelen; break; - case 6: + case Hdarwin: symo = rnd(HEADR+segtext.filelen, INITRND)+rnd(segdata.filelen, INITRND)+machlink; break; Elfsym: symo = rnd(HEADR+segtext.filelen, INITRND)+segdata.filelen; symo = rnd(symo, INITRND); break; - case 10: + case Hwindows: // TODO(brainman): not sure what symo meant to be, but it is not used for Windows PE for now anyway symo = rnd(HEADR+segtext.filelen, PEFILEALIGN)+segdata.filelen; symo = rnd(symo, PEFILEALIGN); @@ -727,7 +727,7 @@ asmb(void) if(!debug['s']) { seek(cout, symo, 0); - if(HEADTYPE == 2) { + if(HEADTYPE == Hplan9x32) { asmplan9sym(); cflush(); @@ -740,7 +740,7 @@ asmb(void) cflush(); } - } else if(HEADTYPE != 10) { + } else if(HEADTYPE != Hwindows) { if(debug['v']) Bprint(&bso, "%5.2f dwarf\n", cputime()); dwarfemitdebugsections(); @@ -755,7 +755,7 @@ asmb(void) default: if(iself) goto Elfput; - case 0: /* garbage */ + case Hgarbunix: /* garbage */ lputb(0x160L<<16); /* magic and sections */ lputb(0L); /* time and date */ lputb(rnd(HEADR+segtext.filelen, 4096)+segdata.filelen); @@ -777,7 +777,7 @@ asmb(void) lputb(~0L); /* gp value ?? */ break; lputl(0); /* x */ - case 1: /* unix coff */ + case Hunixcoff: /* unix coff */ /* * file header */ @@ -845,7 +845,7 @@ asmb(void) lputl(0); /* relocation, line numbers */ lputl(0x200); /* flags comment only */ break; - case 2: /* plan9 */ + case Hplan9x32: /* plan9 */ magic = 4*11*11+7; lputb(magic); /* magic */ lputb(segtext.filelen); /* sizes */ @@ -856,10 +856,10 @@ asmb(void) lputb(spsize); /* sp offsets */ lputb(lcsize); /* line offsets */ break; - case 3: + case Hmsdoscom: /* MS-DOS .COM */ break; - case 4: + case Hmsdosexe: /* fake MS-DOS .EXE */ v = rnd(HEADR+segtext.filelen, INITRND)+segdata.filelen; wputl(0x5A4D); /* 'MZ' */ @@ -882,13 +882,13 @@ asmb(void) wputl(0x0000); /* overlay number */ break; - case 6: + case Hdarwin: asmbmacho(); break; Elfput: /* elf 386 */ - if(HEADTYPE == 11) + if(HEADTYPE == Htiny) debug['d'] = 1; eh = getElfEhdr(); @@ -917,10 +917,10 @@ asmb(void) sh->addralign = 1; if(interpreter == nil) { switch(HEADTYPE) { - case 7: + case Hlinux: interpreter = linuxdynld; break; - case 9: + case Hfreebsd: interpreter = freebsddynld; break; } @@ -1068,7 +1068,7 @@ asmb(void) eh->ident[EI_DATA] = ELFDATA2LSB; eh->ident[EI_VERSION] = EV_CURRENT; switch(HEADTYPE) { - case 9: + case Hfreebsd: eh->ident[EI_OSABI] = 9; break; } @@ -1093,7 +1093,7 @@ asmb(void) diag("ELFRESERVE too small: %d > %d", a, ELFRESERVE); break; - case 10: + case Hwindows: asmbpe(); break; } diff --git a/src/cmd/8l/doc.go b/src/cmd/8l/doc.go index ef5ebc31d..b70888907 100644 --- a/src/cmd/8l/doc.go +++ b/src/cmd/8l/doc.go @@ -25,10 +25,16 @@ Options new in this version: Elide the dynamic linking header. With this option, the binary is statically linked and does not refer to dynld. Without this option (the default), the binary's contents are identical but it is loaded with dynld. --H6 +-Hplan9 + Write Plan 9 32-bit format binaries (default when $GOOS is plan9) +-Hdarwin Write Apple Mach-O binaries (default when $GOOS is darwin) --H7 +-Hlinux Write Linux ELF binaries (default when $GOOS is linux) +-Hfreebsd + Write FreeBSD ELF binaries (default when $GOOS is freebsd) +-Hwindows + Write Windows PE32 binaries (default when $GOOS is windows) -I interpreter Set the ELF dynamic linker to use. -L dir1 -L dir2 diff --git a/src/cmd/8l/l.h b/src/cmd/8l/l.h index f2546cf20..e4650ee58 100644 --- a/src/cmd/8l/l.h +++ b/src/cmd/8l/l.h @@ -39,6 +39,7 @@ enum { + thechar = '8', PtrSize = 4 }; @@ -110,6 +111,7 @@ struct Prog }; #define datasize from.scale #define textflag from.scale +#define iscall(p) ((p)->as == ACALL) struct Auto { @@ -128,6 +130,7 @@ struct Sym uchar reachable; uchar dynexport; uchar special; + uchar stkcheck; int32 value; int32 size; int32 sig; diff --git a/src/cmd/8l/obj.c b/src/cmd/8l/obj.c index 9c687f2fc..d505dc10e 100644 --- a/src/cmd/8l/obj.c +++ b/src/cmd/8l/obj.c @@ -44,21 +44,36 @@ #endif char *noname = ""; -char thechar = '8'; char *thestring = "386"; +Header headers[] = { + "garbunix", Hgarbunix, + "unixcoff", Hunixcoff, + "plan9", Hplan9x32, + "msdoscom", Hmsdoscom, + "msdosexe", Hmsdosexe, + "darwin", Hdarwin, + "linux", Hlinux, + "nacl", Hnacl, + "freebsd", Hfreebsd, + "windows", Hwindows, + "windowsgui", Hwindows, + "tiny", Htiny, + 0, 0 +}; + /* - * -H0 -T0x40004C -D0x10000000 is garbage unix - * -H1 -T0xd0 -R4 is unix coff - * -H2 -T4128 -R4096 is plan9 format - * -H3 -Tx -Rx is MS-DOS .COM - * -H4 -Tx -Rx is fake MS-DOS .EXE - * -H6 -Tx -Rx is Apple Mach-O - * -H7 -Tx -Rx is Linux ELF32 - * -H8 -Tx -Rx was Google Native Client - * -H9 -Tx -Rx is FreeBSD ELF32 - * -H10 -Tx -Rx is MS Windows PE - * -H11 -Tx -Rx is tiny (os image) + * -Hgarbunix -T0x40004C -D0x10000000 is garbage unix + * -Hunixcoff -T0xd0 -R4 is unix coff + * -Hplan9 -T4128 -R4096 is plan9 format + * -Hmsdoscom -Tx -Rx is MS-DOS .COM + * -Hmsdosexe -Tx -Rx is fake MS-DOS .EXE + * -Hdarwin -Tx -Rx is Apple Mach-O + * -Hlinux -Tx -Rx is Linux ELF32 + * -Hnacl -Tx -Rx was Google Native Client + * -Hfreebsd -Tx -Rx is FreeBSD ELF32 + * -Hwindows -Tx -Rx is MS Windows PE32 + * -Htiny -Tx -Rx is tiny (os image) */ void @@ -100,7 +115,7 @@ main(int argc, char *argv[]) INITENTRY = EARGF(usage()); break; case 'H': - HEADTYPE = atolwhex(EARGF(usage())); + HEADTYPE = headtype(EARGF(usage())); break; case 'I': interpreter = EARGF(usage()); @@ -130,46 +145,24 @@ main(int argc, char *argv[]) mywhatsys(); // get goos - if(HEADTYPE == -1) { - HEADTYPE = 2; - if(strcmp(goos, "linux") == 0) - HEADTYPE = 7; - else - if(strcmp(goos, "darwin") == 0) - HEADTYPE = 6; - else - if(strcmp(goos, "freebsd") == 0) - HEADTYPE = 9; - else - if(strcmp(goos, "windows") == 0) - HEADTYPE = 10; - else - if(strcmp(goos, "tiny") == 0) - HEADTYPE = 11; - else - if(strcmp(goos, "plan9") == 0) - HEADTYPE = 2; - else - print("goos is not known: %s\n", goos); - } + if(HEADTYPE == -1) + HEADTYPE = headtype(goos); if(outfile == nil) { - if(HEADTYPE == 10) + if(HEADTYPE == Hwindows) outfile = "8.out.exe"; else outfile = "8.out"; } libinit(); - if(rpath == nil) - rpath = smprint("%s/pkg/%s_%s", goroot, goos, goarch); switch(HEADTYPE) { default: diag("unknown -H option"); errorexit(); - case 0: /* this is garbage */ + case Hgarbunix: /* this is garbage */ HEADR = 20L+56L; if(INITTEXT == -1) INITTEXT = 0x40004CL; @@ -178,7 +171,7 @@ main(int argc, char *argv[]) if(INITRND == -1) INITRND = 0; break; - case 1: /* is unix coff */ + case Hunixcoff: /* is unix coff */ HEADR = 0xd0L; if(INITTEXT == -1) INITTEXT = 0xd0; @@ -187,7 +180,7 @@ main(int argc, char *argv[]) if(INITRND == -1) INITRND = 0; break; - case 2: /* plan 9 */ + case Hplan9x32: /* plan 9 */ tlsoffset = -8; HEADR = 32L; if(INITTEXT == -1) @@ -197,7 +190,7 @@ main(int argc, char *argv[]) if(INITRND == -1) INITRND = 1; break; - case 3: /* MS-DOS .COM */ + case Hmsdoscom: /* MS-DOS .COM */ HEADR = 0; if(INITTEXT == -1) INITTEXT = 0x0100; @@ -206,7 +199,7 @@ main(int argc, char *argv[]) if(INITRND == -1) INITRND = 4; break; - case 4: /* fake MS-DOS .EXE */ + case Hmsdosexe: /* fake MS-DOS .EXE */ HEADR = 0x200; if(INITTEXT == -1) INITTEXT = 0x0100; @@ -218,7 +211,7 @@ main(int argc, char *argv[]) if(debug['v']) Bprint(&bso, "HEADR = 0x%d\n", HEADR); break; - case 6: /* apple MACH */ + case Hdarwin: /* apple MACH */ /* * OS X system constant - offset from %gs to our TLS. * Explained in ../../libcgo/darwin_386.c. @@ -233,8 +226,8 @@ main(int argc, char *argv[]) if(INITRND == -1) INITRND = 4096; break; - case 7: /* elf32 executable */ - case 9: + case Hlinux: /* elf32 executable */ + case Hfreebsd: /* * ELF uses TLS offsets negative from %gs. * Translate 0(GS) and 4(GS) into -8(GS) and -4(GS). @@ -251,7 +244,7 @@ main(int argc, char *argv[]) if(INITRND == -1) INITRND = 4096; break; - case 10: /* PE executable */ + case Hwindows: /* PE executable */ peinit(); HEADR = PEFILEHEADR; if(INITTEXT == -1) @@ -261,7 +254,7 @@ main(int argc, char *argv[]) if(INITRND == -1) INITRND = PESECTALIGN; break; - case 11: + case Htiny: tlsoffset = 0; elfinit(); HEADR = ELFRESERVE; @@ -306,9 +299,9 @@ main(int argc, char *argv[]) patch(); follow(); doelf(); - if(HEADTYPE == 6) + if(HEADTYPE == Hdarwin) domacho(); - if(HEADTYPE == 10) + if(HEADTYPE == Hwindows) dope(); dostkoff(); if(debug['p']) @@ -323,6 +316,7 @@ main(int argc, char *argv[]) symtab(); dodata(); address(); + doweak(); reloc(); asmb(); undef(); diff --git a/src/cmd/8l/optab.c b/src/cmd/8l/optab.c index fceab785d..1e89a2105 100644 --- a/src/cmd/8l/optab.c +++ b/src/cmd/8l/optab.c @@ -702,6 +702,11 @@ Optab optab[] = { ACMPXCHGB, yrb_mb, Pm, 0xb0 }, { ACMPXCHGL, yrl_ml, Pm, 0xb1 }, { ACMPXCHGW, yrl_ml, Pm, 0xb1 }, + { ACMPXCHG8B, yscond, Pm, 0xc7,(01) }, + + { AXADDB, yrb_mb, Pb, 0x0f,0xc0 }, + { AXADDL, yrl_ml, Pm, 0xc1 }, + { AXADDW, yrl_ml, Pe, 0x0f,0xc1 }, { ACMOVLCC, yml_rl, Pm, 0x43 }, { ACMOVLCS, yml_rl, Pm, 0x42 }, diff --git a/src/cmd/8l/pass.c b/src/cmd/8l/pass.c index 67acfa167..294926f29 100644 --- a/src/cmd/8l/pass.c +++ b/src/cmd/8l/pass.c @@ -262,12 +262,13 @@ patch(void) s = lookup("exit", 0); vexit = s->value; - if(HEADTYPE == 2) + plan9_tos = S; + if(HEADTYPE == Hplan9x32) plan9_tos = lookup("_tos", 0); for(cursym = textp; cursym != nil; cursym = cursym->next) { for(p = cursym->text; p != P; p = p->link) { - if(HEADTYPE == 10) { // Windows + if(HEADTYPE == Hwindows) { // Convert // op n(GS), reg // to @@ -288,7 +289,7 @@ patch(void) p->from.offset = 0x2C; } } - if(HEADTYPE == 7) { // Linux + if(HEADTYPE == Hlinux) { // Running binaries under Xen requires using // MOVL 0(GS), reg // and then off(reg) instead of saying off(GS) directly @@ -305,7 +306,7 @@ patch(void) p->from.offset = 0; } } - if(HEADTYPE == 2) { // Plan 9 + if(HEADTYPE == Hplan9x32) { if(p->from.type == D_INDIR+D_GS && p->to.type >= D_AX && p->to.type <= D_DI) { q = appendp(p); @@ -412,7 +413,8 @@ dostkoff(void) symmorestack->text->from.scale |= NOSPLIT; } - if(HEADTYPE == 2) + plan9_tos = S; + if(HEADTYPE == Hplan9x32) plan9_tos = lookup("_tos", 0); for(cursym = textp; cursym != nil; cursym = cursym->next) { @@ -430,7 +432,7 @@ dostkoff(void) if(!(p->from.scale & NOSPLIT)) { p = appendp(p); // load g into CX switch(HEADTYPE) { - case 10: // Windows + case Hwindows: p->as = AMOVL; p->from.type = D_INDIR+D_FS; p->from.offset = 0x2c; @@ -443,7 +445,7 @@ dostkoff(void) p->to.type = D_CX; break; - case 7: // Linux + case Hlinux: p->as = AMOVL; p->from.type = D_INDIR+D_GS; p->from.offset = 0; @@ -456,7 +458,7 @@ dostkoff(void) p->to.type = D_CX; break; - case 2: // Plan 9 + case Hplan9x32: p->as = AMOVL; p->from.type = D_EXTERN; p->from.sym = plan9_tos; @@ -664,15 +666,3 @@ atolwhex(char *s) n = -n; return n; } - -void -undef(void) -{ - int i; - Sym *s; - - for(i=0; ihash) - if(s->type == SXREF) - diag("%s(%d): not defined", s->name, s->version); -} diff --git a/src/cmd/Makefile b/src/cmd/Makefile new file mode 100644 index 000000000..104e9f5df --- /dev/null +++ b/src/cmd/Makefile @@ -0,0 +1,66 @@ +# Copyright 2011 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../Make.inc + +all: install + +# Only build tools for current architecture, and only tools written in C. +# The tools written in Go are managed by ../pkg/Makefile. +DIRS=\ + $(O)a\ + $(O)c\ + $(O)g\ + $(O)l\ + cc\ + cov\ + gc\ + godefs\ + gopack\ + gotest\ + nm\ + prof\ + +# Clean applies to all directories, even for other architectures or +# written in Go. +CLEANDIRS=\ + $(DIRS)\ + 5a\ + 5c\ + 5g\ + 5l\ + 6a\ + 6c\ + 6g\ + 6l\ + 8a\ + 8c\ + 8g\ + 8l\ + cgo\ + ebnflint\ + godoc\ + gofmt\ + goinstall\ + goyacc\ + hgpatch\ + +install: $(patsubst %,%.install,$(DIRS)) +clean: $(patsubst %,%.clean,$(CLEANDIRS)) + +%.install: + @echo + @echo %%%% making $* %%%% + @echo + $(MAKE) -C $* install + +gc.install $(O)c.install: cc.install +$(O)g.install: gc.install +$(O)a.install $(O)c.install $(O)g.install: $(O)l.install + +%.clean: + $(MAKE) -C $* clean + +echo-dirs: + @echo $(DIRS) diff --git a/src/cmd/cgo/gcc.go b/src/cmd/cgo/gcc.go index e6ce21ed3..f7ecc9e14 100644 --- a/src/cmd/cgo/gcc.go +++ b/src/cmd/cgo/gcc.go @@ -599,7 +599,7 @@ func (p *Package) gccMachine() string { return "-m32" } -const gccTmp = "_cgo_.o" +const gccTmp = "_obj/_cgo_.o" // gccCmd returns the gcc command line to use for compiling // the input. @@ -776,6 +776,32 @@ var dwarfToName = map[string]string{ const signedDelta = 64 +// String returns the current type representation. Format arguments +// are assembled within this method so that any changes in mutable +// values are taken into account. +func (tr *TypeRepr) String() string { + if len(tr.Repr) == 0 { + return "" + } + if len(tr.FormatArgs) == 0 { + return tr.Repr + } + return fmt.Sprintf(tr.Repr, tr.FormatArgs...) +} + +// Empty returns true if the result of String would be "". +func (tr *TypeRepr) Empty() bool { + return len(tr.Repr) == 0 +} + +// Set modifies the type representation. +// If fargs are provided, repr is used as a format for fmt.Sprintf. +// Otherwise, repr is used unprocessed as the type representation. +func (tr *TypeRepr) Set(repr string, fargs ...interface{}) { + tr.Repr = repr + tr.FormatArgs = fargs +} + // Type returns a *Type with the same memory layout as // dtype when used as the type of a variable or a struct field. func (c *typeConv) Type(dtype dwarf.Type) *Type { @@ -789,16 +815,15 @@ func (c *typeConv) Type(dtype dwarf.Type) *Type { t := new(Type) t.Size = dtype.Size() t.Align = -1 - t.C = dtype.Common().Name - t.EnumValues = nil + t.C = &TypeRepr{Repr: dtype.Common().Name} c.m[dtype] = t if t.Size < 0 { // Unsized types are [0]byte t.Size = 0 t.Go = c.Opaque(0) - if t.C == "" { - t.C = "void" + if t.C.Empty() { + t.C.Set("void") } return t } @@ -827,7 +852,7 @@ func (c *typeConv) Type(dtype dwarf.Type) *Type { sub := c.Type(dt.Type) t.Align = sub.Align gt.Elt = sub.Go - t.C = fmt.Sprintf("typeof(%s[%d])", sub.C, dt.Count) + t.C.Set("typeof(%s[%d])", sub.C, dt.Count) case *dwarf.BoolType: t.Go = c.bool @@ -844,7 +869,7 @@ func (c *typeConv) Type(dtype dwarf.Type) *Type { if t.Align = t.Size; t.Align >= c.ptrSize { t.Align = c.ptrSize } - t.C = "enum " + dt.EnumName + t.C.Set("enum " + dt.EnumName) signed := 0 t.EnumValues = make(map[string]int64) for _, ev := range dt.Val { @@ -932,7 +957,7 @@ func (c *typeConv) Type(dtype dwarf.Type) *Type { // Translate void* as unsafe.Pointer if _, ok := base(dt.Type).(*dwarf.VoidType); ok { t.Go = c.unsafePointer - t.C = "void*" + t.C.Set("void*") break } @@ -940,7 +965,7 @@ func (c *typeConv) Type(dtype dwarf.Type) *Type { t.Go = gt // publish before recursive call sub := c.Type(dt.Type) gt.X = sub.Go - t.C = sub.C + "*" + t.C.Set("%s*", sub.C) case *dwarf.QualType: // Ignore qualifier. @@ -955,21 +980,21 @@ func (c *typeConv) Type(dtype dwarf.Type) *Type { if tag == "" { tag = "__" + strconv.Itoa(tagGen) tagGen++ - } else if t.C == "" { - t.C = dt.Kind + " " + tag + } else if t.C.Empty() { + t.C.Set(dt.Kind + " " + tag) } name := c.Ident("_Ctype_" + dt.Kind + "_" + tag) t.Go = name // publish before recursive calls switch dt.Kind { case "union", "class": typedef[name.Name] = c.Opaque(t.Size) - if t.C == "" { - t.C = fmt.Sprintf("typeof(unsigned char[%d])", t.Size) + if t.C.Empty() { + t.C.Set("typeof(unsigned char[%d])", t.Size) } case "struct": g, csyntax, align := c.Struct(dt) - if t.C == "" { - t.C = csyntax + if t.C.Empty() { + t.C.Set(csyntax) } t.Align = align typedef[name.Name] = g @@ -1024,7 +1049,7 @@ func (c *typeConv) Type(dtype dwarf.Type) *Type { case *dwarf.VoidType: t.Go = c.void - t.C = "void" + t.C.Set("void") } switch dtype.(type) { @@ -1041,7 +1066,7 @@ func (c *typeConv) Type(dtype dwarf.Type) *Type { } } - if t.C == "" { + if t.C.Empty() { fatal("internal error: did not create C name for %s", dtype) } @@ -1056,11 +1081,13 @@ func (c *typeConv) FuncArg(dtype dwarf.Type) *Type { case *dwarf.ArrayType: // Arrays are passed implicitly as pointers in C. // In Go, we must be explicit. + tr := &TypeRepr{} + tr.Set("%s*", t.C) return &Type{ Size: c.ptrSize, Align: c.ptrSize, Go: &ast.StarExpr{X: t.Go}, - C: t.C + "*", + C: tr, } case *dwarf.TypedefType: // C has much more relaxed rules than Go for @@ -1189,7 +1216,7 @@ func (c *typeConv) Struct(dt *dwarf.StructType) (expr *ast.StructType, csyntax s fld[n] = &ast.Field{Names: []*ast.Ident{c.Ident(ident[f.Name])}, Type: t.Go} off += t.Size - buf.WriteString(t.C) + buf.WriteString(t.C.String()) buf.WriteString(" ") buf.WriteString(f.Name) buf.WriteString("; ") diff --git a/src/cmd/cgo/main.go b/src/cmd/cgo/main.go index b15d34527..2dc662de5 100644 --- a/src/cmd/cgo/main.go +++ b/src/cmd/cgo/main.go @@ -82,11 +82,17 @@ type ExpFunc struct { ExpName string // name to use from C } +// A TypeRepr contains the string representation of a type. +type TypeRepr struct { + Repr string + FormatArgs []interface{} +} + // A Type collects information about a type in both the C and Go worlds. type Type struct { Size int64 Align int64 - C string + C *TypeRepr Go ast.Expr EnumValues map[string]int64 } @@ -215,6 +221,10 @@ func main() { fs[i] = f } + // make sure that _obj directory exists, so that we can write + // all the output files there. + os.Mkdir("_obj", 0777) + for i, input := range goFiles { f := fs[i] p.Translate(f) diff --git a/src/cmd/cgo/out.go b/src/cmd/cgo/out.go index ede8f57d8..4a5fa6a73 100644 --- a/src/cmd/cgo/out.go +++ b/src/cmd/cgo/out.go @@ -20,20 +20,11 @@ import ( // writeDefs creates output files to be compiled by 6g, 6c, and gcc. // (The comments here say 6g and 6c but the code applies to the 8 and 5 tools too.) func (p *Package) writeDefs() { - // The path for the shared object is slash-free so that ELF loaders - // will treat it as a relative path. We rewrite slashes to underscores. - sopath := "cgo_" + strings.Map(slashToUnderscore, p.PackagePath) - soprefix := "" - if os.Getenv("GOOS") == "darwin" { - // OS X requires its own prefix for a relative path - soprefix = "@rpath/" - } - - fgo2 := creat("_cgo_gotypes.go") - fc := creat("_cgo_defun.c") - fm := creat("_cgo_main.c") + fgo2 := creat("_obj/_cgo_gotypes.go") + fc := creat("_obj/_cgo_defun.c") + fm := creat("_obj/_cgo_main.c") - fflg := creat("_cgo_flags") + fflg := creat("_obj/_cgo_flags") for k, v := range p.CgoFlags { fmt.Fprintf(fflg, "_CGO_%s=%s\n", k, v) } @@ -94,7 +85,7 @@ func (p *Package) writeDefs() { for _, n := range p.Name { if n.FuncType != nil { - p.writeDefsFunc(fc, fgo2, n, soprefix, sopath) + p.writeDefsFunc(fc, fgo2, n) } } @@ -172,7 +163,7 @@ func (p *Package) structType(n *Name) (string, int64) { off += pad } qual := "" - if t.C[len(t.C)-1] == '*' { + if c := t.C.String(); c[len(c)-1] == '*' { qual = "const " } fmt.Fprintf(&buf, "\t\t%s%s r;\n", qual, t.C) @@ -189,13 +180,12 @@ func (p *Package) structType(n *Name) (string, int64) { } if off == 0 { fmt.Fprintf(&buf, "\t\tchar unused;\n") // avoid empty struct - off++ } fmt.Fprintf(&buf, "\t}") return buf.String(), off } -func (p *Package) writeDefsFunc(fc, fgo2 *os.File, n *Name, soprefix, sopath string) { +func (p *Package) writeDefsFunc(fc, fgo2 *os.File, n *Name) { name := n.Go gtype := n.FuncType.Go if n.AddError { @@ -234,6 +224,9 @@ func (p *Package) writeDefsFunc(fc, fgo2 *os.File, n *Name, soprefix, sopath str fmt.Fprintf(fc, "void _cgo%s%s(void*);\n", cPrefix, n.Mangle) fmt.Fprintf(fc, "\n") fmt.Fprintf(fc, "void\n") + if argSize == 0 { + argSize++ + } fmt.Fprintf(fc, "·%s(struct{uint8 x[%d];}p)\n", n.Mangle, argSize) fmt.Fprintf(fc, "{\n") fmt.Fprintf(fc, "\truntime·cgocall(_cgo%s%s, &p);\n", cPrefix, n.Mangle) @@ -271,8 +264,8 @@ func (p *Package) writeOutput(f *File, srcfile string) { base = base[0 : len(base)-3] } base = strings.Map(slashToUnderscore, base) - fgo1 := creat(base + ".cgo1.go") - fgcc := creat(base + ".cgo2.c") + fgo1 := creat("_obj/" + base + ".cgo1.go") + fgcc := creat("_obj/" + base + ".cgo2.c") p.GoFiles = append(p.GoFiles, base+".cgo1.go") p.GccFiles = append(p.GccFiles, base+".cgo2.c") @@ -340,7 +333,7 @@ func (p *Package) writeOutputFunc(fgcc *os.File, n *Name) { // Write out the various stubs we need to support functions exported // from Go so that they are callable from C. func (p *Package) writeExports(fgo2, fc, fm *os.File) { - fgcc := creat("_cgo_export.c") + fgcc := creat("_obj/_cgo_export.c") fgcch := creat("_cgo_export.h") fmt.Fprintf(fgcch, "/* Created by cgo - DO NOT EDIT. */\n") @@ -401,7 +394,6 @@ func (p *Package) writeExports(fgo2, fc, fm *os.File) { } if ctype == "struct {\n" { ctype += "\t\tchar unused;\n" // avoid empty struct - off++ } ctype += "\t}" @@ -411,7 +403,7 @@ func (p *Package) writeExports(fgo2, fc, fm *os.File) { if fntype.Results == nil || len(fntype.Results.List) == 0 { gccResult = "void" } else if len(fntype.Results.List) == 1 && len(fntype.Results.List[0].Names) <= 1 { - gccResult = p.cgoType(fntype.Results.List[0].Type).C + gccResult = p.cgoType(fntype.Results.List[0].Type).C.String() } else { fmt.Fprintf(fgcch, "\n/* Return type for %s */\n", exp.ExpName) fmt.Fprintf(fgcch, "struct %s_return {\n", exp.ExpName) @@ -426,7 +418,7 @@ func (p *Package) writeExports(fgo2, fc, fm *os.File) { // Build the wrapper function compiled by gcc. s := fmt.Sprintf("%s %s(", gccResult, exp.ExpName) if fn.Recv != nil { - s += p.cgoType(fn.Recv.List[0].Type).C + s += p.cgoType(fn.Recv.List[0].Type).C.String() s += " recv" } forFieldList(fntype.Params, @@ -453,7 +445,7 @@ func (p *Package) writeExports(fgo2, fc, fm *os.File) { func(i int, atype ast.Expr) { fmt.Fprintf(fgcc, "\ta.p%d = p%d;\n", i, i) }) - fmt.Fprintf(fgcc, "\tcrosscall2(_cgoexp%s_%s, &a, (int) sizeof a);\n", cPrefix, exp.ExpName) + fmt.Fprintf(fgcc, "\tcrosscall2(_cgoexp%s_%s, &a, %d);\n", cPrefix, exp.ExpName, off) if gccResult != "void" { if len(fntype.Results.List) == 1 && len(fntype.Results.List[0].Names) <= 1 { fmt.Fprintf(fgcc, "\treturn a.r0;\n") @@ -542,24 +534,28 @@ func forFieldList(fl *ast.FieldList, fn func(int, ast.Expr)) { } } +func c(repr string, args ...interface{}) *TypeRepr { + return &TypeRepr{repr, args} +} + // Map predeclared Go types to Type. var goTypes = map[string]*Type{ - "int": &Type{Size: 4, Align: 4, C: "int"}, - "uint": &Type{Size: 4, Align: 4, C: "uint"}, - "int8": &Type{Size: 1, Align: 1, C: "schar"}, - "uint8": &Type{Size: 1, Align: 1, C: "uchar"}, - "int16": &Type{Size: 2, Align: 2, C: "short"}, - "uint16": &Type{Size: 2, Align: 2, C: "ushort"}, - "int32": &Type{Size: 4, Align: 4, C: "int"}, - "uint32": &Type{Size: 4, Align: 4, C: "uint"}, - "int64": &Type{Size: 8, Align: 8, C: "int64"}, - "uint64": &Type{Size: 8, Align: 8, C: "uint64"}, - "float": &Type{Size: 4, Align: 4, C: "float"}, - "float32": &Type{Size: 4, Align: 4, C: "float"}, - "float64": &Type{Size: 8, Align: 8, C: "double"}, - "complex": &Type{Size: 8, Align: 8, C: "__complex float"}, - "complex64": &Type{Size: 8, Align: 8, C: "__complex float"}, - "complex128": &Type{Size: 16, Align: 16, C: "__complex double"}, + "int": &Type{Size: 4, Align: 4, C: c("int")}, + "uint": &Type{Size: 4, Align: 4, C: c("uint")}, + "int8": &Type{Size: 1, Align: 1, C: c("schar")}, + "uint8": &Type{Size: 1, Align: 1, C: c("uchar")}, + "int16": &Type{Size: 2, Align: 2, C: c("short")}, + "uint16": &Type{Size: 2, Align: 2, C: c("ushort")}, + "int32": &Type{Size: 4, Align: 4, C: c("int")}, + "uint32": &Type{Size: 4, Align: 4, C: c("uint")}, + "int64": &Type{Size: 8, Align: 8, C: c("int64")}, + "uint64": &Type{Size: 8, Align: 8, C: c("uint64")}, + "float": &Type{Size: 4, Align: 4, C: c("float")}, + "float32": &Type{Size: 4, Align: 4, C: c("float")}, + "float64": &Type{Size: 8, Align: 8, C: c("double")}, + "complex": &Type{Size: 8, Align: 8, C: c("__complex float")}, + "complex64": &Type{Size: 8, Align: 8, C: c("__complex float")}, + "complex128": &Type{Size: 16, Align: 16, C: c("__complex double")}, } // Map an ast type to a Type. @@ -567,21 +563,21 @@ func (p *Package) cgoType(e ast.Expr) *Type { switch t := e.(type) { case *ast.StarExpr: x := p.cgoType(t.X) - return &Type{Size: p.PtrSize, Align: p.PtrSize, C: x.C + "*"} + return &Type{Size: p.PtrSize, Align: p.PtrSize, C: c("%s*", x.C)} case *ast.ArrayType: if t.Len == nil { - return &Type{Size: p.PtrSize + 8, Align: p.PtrSize, C: "GoSlice"} + return &Type{Size: p.PtrSize + 8, Align: p.PtrSize, C: c("GoSlice")} } case *ast.StructType: // TODO case *ast.FuncType: - return &Type{Size: p.PtrSize, Align: p.PtrSize, C: "void*"} + return &Type{Size: p.PtrSize, Align: p.PtrSize, C: c("void*")} case *ast.InterfaceType: - return &Type{Size: 3 * p.PtrSize, Align: p.PtrSize, C: "GoInterface"} + return &Type{Size: 3 * p.PtrSize, Align: p.PtrSize, C: c("GoInterface")} case *ast.MapType: - return &Type{Size: p.PtrSize, Align: p.PtrSize, C: "GoMap"} + return &Type{Size: p.PtrSize, Align: p.PtrSize, C: c("GoMap")} case *ast.ChanType: - return &Type{Size: p.PtrSize, Align: p.PtrSize, C: "GoChan"} + return &Type{Size: p.PtrSize, Align: p.PtrSize, C: c("GoChan")} case *ast.Ident: // Look up the type in the top level declarations. // TODO: Handle types defined within a function. @@ -606,10 +602,10 @@ func (p *Package) cgoType(e ast.Expr) *Type { } } if t.Name == "uintptr" { - return &Type{Size: p.PtrSize, Align: p.PtrSize, C: "uintptr"} + return &Type{Size: p.PtrSize, Align: p.PtrSize, C: c("uintptr")} } if t.Name == "string" { - return &Type{Size: p.PtrSize + 4, Align: p.PtrSize, C: "GoString"} + return &Type{Size: p.PtrSize + 4, Align: p.PtrSize, C: c("GoString")} } if r, ok := goTypes[t.Name]; ok { if r.Align > p.PtrSize { @@ -620,11 +616,11 @@ func (p *Package) cgoType(e ast.Expr) *Type { case *ast.SelectorExpr: id, ok := t.X.(*ast.Ident) if ok && id.Name == "unsafe" && t.Sel.Name == "Pointer" { - return &Type{Size: p.PtrSize, Align: p.PtrSize, C: "void*"} + return &Type{Size: p.PtrSize, Align: p.PtrSize, C: c("void*")} } } error(e.Pos(), "unrecognized Go type %T", e) - return &Type{Size: 4, Align: 4, C: "int"} + return &Type{Size: 4, Align: 4, C: c("int")} } const gccProlog = ` diff --git a/src/cmd/clean.bash b/src/cmd/clean.bash deleted file mode 100644 index 92d8cc5c9..000000000 --- a/src/cmd/clean.bash +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2009 The Go Authors. All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -gomake=gomake -if [ "$1" == "--gomake" -a "$2" != "" ]; then - gomake=$2 -fi - -for i in cc 6l 6a 6c 8l 8a 8c 8g 5l 5a 5c 5g gc 6g gopack nm cgo cov ebnflint godefs godoc gofmt goinstall gotest goyacc hgpatch prof -do - cd $i - $gomake clean - cd .. -done diff --git a/src/cmd/ebnflint/ebnflint.go b/src/cmd/ebnflint/ebnflint.go index 5eb398735..cac39179f 100644 --- a/src/cmd/ebnflint/ebnflint.go +++ b/src/cmd/ebnflint/ebnflint.go @@ -13,7 +13,7 @@ import ( "go/token" "io/ioutil" "os" - "path" + "path/filepath" ) @@ -91,7 +91,7 @@ func main() { os.Exit(1) } - if path.Ext(filename) == ".html" { + if filepath.Ext(filename) == ".html" { src = extractEBNF(src) } diff --git a/src/cmd/gc/const.c b/src/cmd/gc/const.c index 0ee693c02..a54c40f6c 100644 --- a/src/cmd/gc/const.c +++ b/src/cmd/gc/const.c @@ -1051,12 +1051,12 @@ int cmpslit(Node *l, Node *r) { int32 l1, l2, i, m; - char *s1, *s2; + uchar *s1, *s2; l1 = l->val.u.sval->len; l2 = r->val.u.sval->len; - s1 = l->val.u.sval->s; - s2 = r->val.u.sval->s; + s1 = (uchar*)l->val.u.sval->s; + s2 = (uchar*)r->val.u.sval->s; m = l1; if(l2 < m) diff --git a/src/cmd/gc/doc.go b/src/cmd/gc/doc.go index 21e1b103b..3fe7fafdd 100644 --- a/src/cmd/gc/doc.go +++ b/src/cmd/gc/doc.go @@ -43,6 +43,8 @@ Flags: disable optimization -S write assembly language text to standard output + -u + disallow importing packages not marked as safe -V print the compiler version diff --git a/src/cmd/gc/export.c b/src/cmd/gc/export.c index 594509915..09b963f27 100644 --- a/src/cmd/gc/export.c +++ b/src/cmd/gc/export.c @@ -51,6 +51,12 @@ exportname(char *s) return isupperrune(r); } +static int +initname(char *s) +{ + return strcmp(s, "init") == 0; +} + void autoexport(Node *n, int ctxt) { @@ -60,7 +66,7 @@ autoexport(Node *n, int ctxt) return; if(n->ntype && n->ntype->op == OTFUNC && n->ntype->left) // method return; - if(exportname(n->sym->name) || strcmp(n->sym->name, "init") == 0) + if(exportname(n->sym->name) || initname(n->sym->name)) exportsym(n); else packagesym(n); @@ -304,7 +310,7 @@ importsym(Sym *s, int op) // mark the symbol so it is not reexported if(s->def == N) { - if(exportname(s->name)) + if(exportname(s->name) || initname(s->name)) s->flags |= SymExport; else s->flags |= SymPackage; // package scope @@ -374,7 +380,7 @@ importvar(Sym *s, Type *t, int ctxt) { Node *n; - if(!exportname(s->name) && !mypackage(s)) + if(!exportname(s->name) && !initname(s->name) && !mypackage(s)) return; importsym(s, ONAME); diff --git a/src/cmd/gc/go.y b/src/cmd/gc/go.y index 86e3cae33..4b838a491 100644 --- a/src/cmd/gc/go.y +++ b/src/cmd/gc/go.y @@ -640,10 +640,15 @@ if_stmt: { markdcl(); } - if_header loop_body + if_header + { + if($3->ntest == N) + yyerror("missing condition in if statement"); + } + loop_body { $$ = $3; - $$->nbody = $4; + $$->nbody = $5; // no popdcl; maybe there's an LELSE } diff --git a/src/cmd/gc/init.c b/src/cmd/gc/init.c index dc073443e..af4eb0336 100644 --- a/src/cmd/gc/init.c +++ b/src/cmd/gc/init.c @@ -30,19 +30,19 @@ renameinit(Node *n) /* * hand-craft the following initialization code - * var initdone· uint8 (1) - * func Init·() (2) - * if initdone· != 0 { (3) - * if initdone· == 2 (4) + * var initdone· uint8 (1) + * func init() (2) + * if initdone· != 0 { (3) + * if initdone· == 2 (4) * return * throw(); (5) * } - * initdone. = 1; (6) + * initdone· = 1; (6) * // over all matching imported symbols - * .init·() (7) + * .init() (7) * { } (8) - * init·() // if any (9) - * initdone. = 2; (10) + * init·() // if any (9) + * initdone· = 2; (10) * return (11) * } */ @@ -79,7 +79,7 @@ anyinit(NodeList *n) // are there any imported init functions for(h=0; hlink) { - if(s->name[0] != 'I' || strncmp(s->name, "Init·", 6) != 0) + if(s->name[0] != 'i' || strcmp(s->name, "init") != 0) continue; if(s->def == N) continue; @@ -118,12 +118,7 @@ fninit(NodeList *n) // (2) maxarg = 0; - snprint(namebuf, sizeof(namebuf), "Init·"); - - // this is a botch since we need a known name to - // call the top level init function out of rt0 - if(strcmp(localpkg->name, "main") == 0) - snprint(namebuf, sizeof(namebuf), "init"); + snprint(namebuf, sizeof(namebuf), "init"); fn = nod(ODCLFUNC, N, N); initsym = lookup(namebuf); @@ -154,7 +149,7 @@ fninit(NodeList *n) // (7) for(h=0; hlink) { - if(s->name[0] != 'I' || strncmp(s->name, "Init·", 6) != 0) + if(s->name[0] != 'i' || strcmp(s->name, "init") != 0) continue; if(s->def == N) continue; diff --git a/src/cmd/gc/reflect.c b/src/cmd/gc/reflect.c index 36c245d47..8129bf1ce 100644 --- a/src/cmd/gc/reflect.c +++ b/src/cmd/gc/reflect.c @@ -10,6 +10,7 @@ static NodeList* signatlist; static Sym* dtypesym(Type*); +static Sym* weaktypesym(Type*); static int sigcmp(Sig *a, Sig *b) @@ -570,9 +571,17 @@ dcommontype(Sym *s, int ot, Type *t) { int i; Sym *s1; + Sym *sptr; char *p; dowidth(t); + + sptr = nil; + if(t->sym != nil && !isptr[t->etype]) + sptr = dtypesym(ptrto(t)); + else + sptr = weaktypesym(ptrto(t)); + s1 = dextratype(t); // empty interface pointing at this type. @@ -592,7 +601,8 @@ dcommontype(Sym *s, int ot, Type *t) // fieldAlign uint8; // kind uint8; // string *string; - // *nameInfo; + // *extraType; + // ptrToThis *Type // } ot = duintptr(s, ot, t->width); ot = duint32(s, ot, typehash(t)); @@ -616,7 +626,7 @@ dcommontype(Sym *s, int ot, Type *t) ot = dsymptr(s, ot, s1, 0); // extraType else ot = duintptr(s, ot, 0); - + ot = dsymptr(s, ot, sptr, 0); // ptr to type return ot; } @@ -661,6 +671,25 @@ typename(Type *t) return n; } +static Sym* +weaktypesym(Type *t) +{ + char *p; + Sym *s; + static Pkg *weak; + + if(weak == nil) { + weak = mkpkg(strlit("weak.type")); + weak->name = "weak.type"; + weak->prefix = "weak.type"; // not weak%2etype + } + + p = smprint("%#-T", t); + s = pkglookup(p, weak); + free(p); + return s; +} + static Sym* dtypesym(Type *t) { diff --git a/src/cmd/gc/subr.c b/src/cmd/gc/subr.c index 0755ca3cd..142e5ba41 100644 --- a/src/cmd/gc/subr.c +++ b/src/cmd/gc/subr.c @@ -203,7 +203,6 @@ fatal(char *fmt, ...) flusherrors(); -*(int*)0=0; print("%L: internal compiler error: ", lineno); va_start(arg, fmt); vfprint(1, fmt, arg); @@ -1909,8 +1908,12 @@ assignop(Type *src, Type *dst, char **why) return 0; } if(src->etype == TINTER && dst->etype != TBLANK) { - if(why != nil) - *why = ": need type assertion"; + if(why != nil) { + if(isptrto(dst, TINTER)) + *why = smprint(":\n\t%T is interface, not pointer to interface", src); + else + *why = ": need type assertion"; + } return 0; } @@ -2265,7 +2268,7 @@ syslook(char *name, int copy) s = pkglookup(name, runtimepkg); if(s == S || s->def == N) - fatal("looksys: cant find runtime.%s", name); + fatal("syslook: can't find runtime.%s", name); if(!copy) return s->def; diff --git a/src/cmd/gc/typecheck.c b/src/cmd/gc/typecheck.c index 5edca964a..3e8f35877 100644 --- a/src/cmd/gc/typecheck.c +++ b/src/cmd/gc/typecheck.c @@ -318,7 +318,7 @@ reswitch: n->left = N; goto ret; } - if(!isptr[t->etype]) { + if(!isptr[t->etype] || (t->type != T && t->type->etype == TANY) /* unsafe.Pointer */) { yyerror("invalid indirect of %+N", n->left); goto error; } @@ -1613,7 +1613,7 @@ typecheckaste(int op, Node *call, int isddd, Type *tstruct, NodeList *nl, char * exportassignok(tn->type, desc); if(assignop(tn->type, tl->type->type, &why) == 0) { if(call != N) - yyerror("cannot use %T as type %T in argument to %#N%s", tn->type, tl->type->type, desc, call, why); + yyerror("cannot use %T as type %T in argument to %#N%s", tn->type, tl->type->type, call, why); else yyerror("cannot use %T as type %T in %s%s", tn->type, tl->type->type, desc, why); } @@ -1625,7 +1625,7 @@ typecheckaste(int op, Node *call, int isddd, Type *tstruct, NodeList *nl, char * exportassignok(tn->type, desc); if(assignop(tn->type, tl->type, &why) == 0) { if(call != N) - yyerror("cannot use %T as type %T in argument to %#N%s", tn->type, tl->type, desc, call, why); + yyerror("cannot use %T as type %T in argument to %#N%s", tn->type, tl->type, call, why); else yyerror("cannot use %T as type %T in %s%s", tn->type, tl->type, desc, why); } diff --git a/src/cmd/godoc/dirtrees.go b/src/cmd/godoc/dirtrees.go index edb4a169d..3ad7c8cfc 100644 --- a/src/cmd/godoc/dirtrees.go +++ b/src/cmd/godoc/dirtrees.go @@ -12,8 +12,9 @@ import ( "go/parser" "go/token" "io/ioutil" + "log" "os" - pathutil "path" + "path/filepath" "strings" "unicode" ) @@ -31,7 +32,7 @@ type Directory struct { func isGoFile(f *os.FileInfo) bool { return f.IsRegular() && !strings.HasPrefix(f.Name, ".") && // ignore .files - pathutil.Ext(f.Name) == ".go" + filepath.Ext(f.Name) == ".go" } @@ -100,7 +101,13 @@ func (b *treeBuilder) newDirTree(fset *token.FileSet, path, name string, depth i return &Directory{depth, path, name, "", nil} } - list, _ := ioutil.ReadDir(path) // ignore errors + list, err := ioutil.ReadDir(path) + if err != nil { + // newDirTree is called with a path that should be a package + // directory; errors here should not happen, but if they do, + // we want to know about them + log.Printf("ioutil.ReadDir(%s): %s", path, err) + } // determine number of subdirectories and if there are package files ndirs := 0 @@ -116,7 +123,7 @@ func (b *treeBuilder) newDirTree(fset *token.FileSet, path, name string, depth i // though the directory doesn't contain any real package files - was bug) if synopses[0] == "" { // no "optimal" package synopsis yet; continue to collect synopses - file, err := parser.ParseFile(fset, pathutil.Join(path, d.Name), nil, + file, err := parser.ParseFile(fset, filepath.Join(path, d.Name), nil, parser.ParseComments|parser.PackageClauseOnly) if err == nil { hasPkgFiles = true @@ -149,7 +156,7 @@ func (b *treeBuilder) newDirTree(fset *token.FileSet, path, name string, depth i i := 0 for _, d := range list { if isPkgDir(d) { - dd := b.newDirTree(fset, pathutil.Join(path, d.Name), d.Name, depth+1) + dd := b.newDirTree(fset, filepath.Join(path, d.Name), d.Name, depth+1) if dd != nil { dirs[i] = dd i++ @@ -188,8 +195,16 @@ func (b *treeBuilder) newDirTree(fset *token.FileSet, path, name string, depth i // (i.e., in this case the tree may contain directories w/o any package files). // func newDirectory(root string, pathFilter func(string) bool, maxDepth int) *Directory { - d, err := os.Lstat(root) - if err != nil || !isPkgDir(d) { + // The root could be a symbolic link so use os.Stat not os.Lstat. + d, err := os.Stat(root) + // If we fail here, report detailed error messages; otherwise + // is is hard to see why a directory tree was not built. + switch { + case err != nil: + log.Printf("newDirectory(%s): %s", root, err) + return nil + case !isPkgDir(d): + log.Printf("newDirectory(%s): not a package directory", root) return nil } if maxDepth < 0 { diff --git a/src/cmd/godoc/godoc.go b/src/cmd/godoc/godoc.go index c91dc33db..9dce5edf9 100644 --- a/src/cmd/godoc/godoc.go +++ b/src/cmd/godoc/godoc.go @@ -18,7 +18,8 @@ import ( "io/ioutil" "log" "os" - pathutil "path" + "path" + "path/filepath" "regexp" "runtime" "sort" @@ -81,8 +82,8 @@ var ( func initHandlers() { fsMap.Init(*pkgPath) fileServer = http.FileServer(*goroot, "") - cmdHandler = httpHandler{"/cmd/", pathutil.Join(*goroot, "src/cmd"), false} - pkgHandler = httpHandler{"/pkg/", pathutil.Join(*goroot, "src/pkg"), true} + cmdHandler = httpHandler{"/cmd/", filepath.Join(*goroot, "src", "cmd"), false} + pkgHandler = httpHandler{"/pkg/", filepath.Join(*goroot, "src", "pkg"), true} } @@ -91,12 +92,13 @@ func registerPublicHandlers(mux *http.ServeMux) { mux.Handle(pkgHandler.pattern, &pkgHandler) mux.HandleFunc("/doc/codewalk/", codewalk) mux.HandleFunc("/search", search) + mux.Handle("/robots.txt", fileServer) mux.HandleFunc("/", serveFile) } func initFSTree() { - fsTree.set(newDirectory(pathutil.Join(*goroot, *testDir), nil, -1)) + fsTree.set(newDirectory(filepath.Join(*goroot, *testDir), nil, -1)) invalidateIndex() } @@ -147,8 +149,13 @@ func readDirList(filename string) ([]string, os.Error) { } // create a sorted list of valid directory names filter := func(path string) bool { - d, err := os.Lstat(path) - return err == nil && isPkgDir(d) + d, e := os.Lstat(path) + if e != nil && err == nil { + // remember first error and return it from readDirList + // so we have at least some information if things go bad + err = e + } + return e == nil && isPkgDir(d) } list := canonicalizePaths(strings.Split(string(contents), "\n", -1), filter) // for each parent path, remove all it's children q @@ -160,7 +167,7 @@ func readDirList(filename string) ([]string, os.Error) { i++ } } - return list[0:i], nil + return list[0:i], err } @@ -207,9 +214,10 @@ func initDirTrees() { if *filter != "" { list, err := readDirList(*filter) if err != nil { - log.Printf("%s", err) - } else if len(list) == 0 { - log.Printf("no directory paths in file %s", *filter) + log.Printf("readDirList(%s): %s", *filter, err) + } + if *verbose || len(list) == 0 { + log.Printf("found %d directory paths in file %s", len(list), *filter) } setPathFilter(list) } @@ -239,27 +247,30 @@ func initDirTrees() { // ---------------------------------------------------------------------------- // Path mapping -func absolutePath(path, defaultRoot string) string { - abspath := fsMap.ToAbsolute(path) +// Absolute paths are file system paths (backslash-separated on Windows), +// but relative paths are always slash-separated. + +func absolutePath(relpath, defaultRoot string) string { + abspath := fsMap.ToAbsolute(relpath) if abspath == "" { // no user-defined mapping found; use default mapping - abspath = pathutil.Join(defaultRoot, path) + abspath = filepath.Join(defaultRoot, filepath.FromSlash(relpath)) } return abspath } -func relativePath(path string) string { - relpath := fsMap.ToRelative(path) +func relativeURL(abspath string) string { + relpath := fsMap.ToRelative(abspath) if relpath == "" { - // prefix must end in '/' + // prefix must end in a path separator prefix := *goroot - if len(prefix) > 0 && prefix[len(prefix)-1] != '/' { - prefix += "/" + if len(prefix) > 0 && prefix[len(prefix)-1] != filepath.Separator { + prefix += string(filepath.Separator) } - if strings.HasPrefix(path, prefix) { + if strings.HasPrefix(abspath, prefix) { // no user-defined mapping found; use default mapping - relpath = path[len(prefix):] + relpath = filepath.ToSlash(abspath[len(prefix):]) } } // Only if path is an invalid absolute path is relpath == "" @@ -474,7 +485,7 @@ func urlFmt(w io.Writer, format string, x ...interface{}) { } // map path - relpath := relativePath(path) + relpath := relativeURL(path) // convert to relative URLs so that they can also // be used as relative file names in .txt templates @@ -591,7 +602,7 @@ func dirslashFmt(w io.Writer, format string, x ...interface{}) { // Template formatter for "localname" format. func localnameFmt(w io.Writer, format string, x ...interface{}) { - _, localname := pathutil.Split(x[0].(string)) + _, localname := filepath.Split(x[0].(string)) template.HTMLEscape(w, []byte(localname)) } @@ -623,7 +634,7 @@ var fmap = template.FormatterMap{ func readTemplate(name string) *template.Template { - path := pathutil.Join(*goroot, "lib/godoc/"+name) + path := filepath.Join(*goroot, "lib", "godoc", name) data, err := ioutil.ReadFile(path) if err != nil { log.Fatalf("ReadFile %s: %v", path, err) @@ -760,14 +771,13 @@ func applyTemplate(t *template.Template, name string, data interface{}) []byte { func redirect(w http.ResponseWriter, r *http.Request) (redirected bool) { - if canonical := pathutil.Clean(r.URL.Path) + "/"; r.URL.Path != canonical { + if canonical := path.Clean(r.URL.Path) + "/"; r.URL.Path != canonical { http.Redirect(w, r, canonical, http.StatusMovedPermanently) redirected = true } return } - func serveTextFile(w http.ResponseWriter, r *http.Request, abspath, relpath, title string) { src, err := ioutil.ReadFile(abspath) if err != nil { @@ -778,7 +788,7 @@ func serveTextFile(w http.ResponseWriter, r *http.Request, abspath, relpath, tit var buf bytes.Buffer buf.WriteString("
")
-	FormatText(&buf, src, 1, pathutil.Ext(abspath) == ".go", r.FormValue("h"), rangeSelection(r.FormValue("s")))
+	FormatText(&buf, src, 1, filepath.Ext(abspath) == ".go", r.FormValue("h"), rangeSelection(r.FormValue("s")))
 	buf.WriteString("
") servePage(w, title+" "+relpath, "", "", buf.Bytes()) @@ -815,7 +825,7 @@ func serveFile(w http.ResponseWriter, r *http.Request) { // pick off special cases and hand the rest to the standard file server switch r.URL.Path { case "/": - serveHTMLDoc(w, r, pathutil.Join(*goroot, "doc/root.html"), "doc/root.html") + serveHTMLDoc(w, r, filepath.Join(*goroot, "doc", "root.html"), "doc/root.html") return case "/doc/root.html": @@ -824,9 +834,9 @@ func serveFile(w http.ResponseWriter, r *http.Request) { return } - switch pathutil.Ext(abspath) { + switch path.Ext(relpath) { case ".html": - if strings.HasSuffix(abspath, "/index.html") { + if strings.HasSuffix(relpath, "/index.html") { // We'll show index.html for the directory. // Use the dir/ version as canonical instead of dir/index.html. http.Redirect(w, r, r.URL.Path[0:len(r.URL.Path)-len("index.html")], http.StatusMovedPermanently) @@ -851,8 +861,8 @@ func serveFile(w http.ResponseWriter, r *http.Request) { if redirect(w, r) { return } - if index := abspath + "/index.html"; isTextFile(index) { - serveHTMLDoc(w, r, index, relativePath(index)) + if index := filepath.Join(abspath, "index.html"); isTextFile(index) { + serveHTMLDoc(w, r, index, relativeURL(index)) return } serveDirectory(w, r, abspath, relpath) @@ -948,13 +958,13 @@ func (h *httpHandler) getPageInfo(abspath, relpath, pkgname string, mode PageInf // the package with dirname, and the 3rd choice is a package // that is not called "main" if there is exactly one such // package. Otherwise, don't select a package. - dirpath, dirname := pathutil.Split(abspath) + dirpath, dirname := filepath.Split(abspath) // If the dirname is "go" we might be in a sub-directory for // .go files - use the outer directory name instead for better // results. if dirname == "go" { - _, dirname = pathutil.Split(pathutil.Clean(dirpath)) + _, dirname = filepath.Split(filepath.Clean(dirpath)) } var choice3 *ast.Package @@ -995,7 +1005,7 @@ func (h *httpHandler) getPageInfo(abspath, relpath, pkgname string, mode PageInf ast.PackageExports(pkg) } if mode&genDoc != 0 { - pdoc = doc.NewPackageDoc(pkg, pathutil.Clean(relpath)) // no trailing '/' in importpath + pdoc = doc.NewPackageDoc(pkg, path.Clean(relpath)) // no trailing '/' in importpath } else { past = ast.MergePackageFiles(pkg, ast.FilterUnassociatedComments) } @@ -1081,13 +1091,13 @@ func (h *httpHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { title = "Package " + info.PDoc.PackageName case info.PDoc.PackageName == fakePkgName: // assume that the directory name is the command name - _, pkgname := pathutil.Split(pathutil.Clean(relpath)) + _, pkgname := path.Split(path.Clean(relpath)) title = "Command " + pkgname default: title = "Command " + info.PDoc.PackageName } default: - title = "Directory " + relativePath(info.Dirname) + title = "Directory " + relativeURL(info.Dirname) if *showTimestamps { subtitle = "Last update: " + time.SecondsToLocalTime(info.DirTime).String() } diff --git a/src/cmd/godoc/index.go b/src/cmd/godoc/index.go index 56f31f5cf..5af4d15cb 100644 --- a/src/cmd/godoc/index.go +++ b/src/cmd/godoc/index.go @@ -47,7 +47,7 @@ import ( "index/suffixarray" "io/ioutil" "os" - "path" + "path/filepath" "regexp" "sort" "strings" @@ -718,7 +718,7 @@ var whitelisted = map[string]bool{ // of "permitted" files for indexing. The filename must // be the directory-local name of the file. func isWhitelisted(filename string) bool { - key := path.Ext(filename) + key := filepath.Ext(filename) if key == "" { // file has no extension - use entire filename key = filename @@ -732,7 +732,7 @@ func (x *Indexer) visitFile(dirname string, f *os.FileInfo, fulltextIndex bool) return } - filename := path.Join(dirname, f.Name) + filename := filepath.Join(dirname, f.Name) goFile := false switch { @@ -757,7 +757,7 @@ func (x *Indexer) visitFile(dirname string, f *os.FileInfo, fulltextIndex bool) if fast != nil { // we've got a Go file to index x.current = file - dir, _ := path.Split(filename) + dir, _ := filepath.Split(filename) pak := Pak{dir, fast.Name.Name} x.file = &File{filename, pak} ast.Walk(x, fast) diff --git a/src/cmd/godoc/main.go b/src/cmd/godoc/main.go index ea1e3c42e..1ebb80279 100644 --- a/src/cmd/godoc/main.go +++ b/src/cmd/godoc/main.go @@ -36,7 +36,7 @@ import ( "io" "log" "os" - pathutil "path" + "path/filepath" "regexp" "runtime" "strings" @@ -314,14 +314,14 @@ func main() { if len(path) > 0 && path[0] == '.' { // assume cwd; don't assume -goroot cwd, _ := os.Getwd() // ignore errors - path = pathutil.Join(cwd, path) + path = filepath.Join(cwd, path) } relpath := path abspath := path - if !pathutil.IsAbs(path) { + if !filepath.IsAbs(path) { abspath = absolutePath(path, pkgHandler.fsRoot) } else { - relpath = relativePath(path) + relpath = relativeURL(path) } var mode PageInfoMode @@ -339,7 +339,7 @@ func main() { if info.IsEmpty() { // try again, this time assume it's a command - if !pathutil.IsAbs(path) { + if !filepath.IsAbs(path) { abspath = absolutePath(path, cmdHandler.fsRoot) } cmdInfo := cmdHandler.getPageInfo(abspath, relpath, "", mode) diff --git a/src/cmd/godoc/mapping.go b/src/cmd/godoc/mapping.go index 1d87bbc76..6ae9032e4 100644 --- a/src/cmd/godoc/mapping.go +++ b/src/cmd/godoc/mapping.go @@ -10,7 +10,8 @@ import ( "fmt" "io" "os" - pathutil "path" + "path" + "path/filepath" "sort" "strings" ) @@ -59,10 +60,10 @@ type mapping struct { } -// Init initializes the Mapping from a list of ':'-separated -// paths. Empty paths are ignored; relative paths are assumed -// to be relative to the current working directory and converted -// to absolute paths. For each path of the form: +// Init initializes the Mapping from a list of paths separated by +// filepath.ListSeparator. Empty paths are ignored; relative paths +// are assumed to be relative to the current working directory and +// converted to absolute paths. For each path of the form: // // dirname/localname // @@ -71,7 +72,7 @@ type mapping struct { // localname -> path // // is added to the Mapping object, in the order of occurrence. -// For instance, the argument: +// For instance, under Unix, the argument: // // /home/user:/home/build/public // @@ -81,12 +82,12 @@ type mapping struct { // public -> /home/build/public // func (m *Mapping) Init(paths string) { - pathlist := canonicalizePaths(strings.Split(paths, ":", -1), nil) + pathlist := canonicalizePaths(filepath.SplitList(paths), nil) list := make([]mapping, len(pathlist)) // create mapping list for i, path := range pathlist { - _, prefix := pathutil.Split(path) + _, prefix := filepath.Split(path) list[i] = mapping{prefix, path, new(RWValue)} } @@ -147,7 +148,7 @@ func (m *Mapping) Fprint(w io.Writer) { func splitFirst(path string) (head, tail string) { - i := strings.Index(path, "/") + i := strings.Index(path, string(filepath.Separator)) if i > 0 { // 0 < i < len(path) return path[0:i], path[i+1:] @@ -156,22 +157,23 @@ func splitFirst(path string) (head, tail string) { } -// ToAbsolute maps a relative path to an absolute path using the Mapping -// specified by the receiver. If the path cannot be mapped, the empty -// string is returned. +// ToAbsolute maps a slash-separated relative path to an absolute filesystem +// path using the Mapping specified by the receiver. If the path cannot +// be mapped, the empty string is returned. // -func (m *Mapping) ToAbsolute(path string) string { - prefix, tail := splitFirst(path) +func (m *Mapping) ToAbsolute(spath string) string { + fpath := filepath.FromSlash(spath) + prefix, tail := splitFirst(fpath) for _, e := range m.list { switch { case e.prefix == prefix: // use tail case e.prefix == "": - tail = path + tail = fpath default: continue // no match } - abspath := pathutil.Join(e.path, tail) + abspath := filepath.Join(e.path, tail) if _, err := os.Stat(abspath); err == nil { return abspath } @@ -181,15 +183,16 @@ func (m *Mapping) ToAbsolute(path string) string { } -// ToRelative maps an absolute path to a relative path using the Mapping -// specified by the receiver. If the path cannot be mapped, the empty -// string is returned. +// ToRelative maps an absolute filesystem path to a relative slash-separated +// path using the Mapping specified by the receiver. If the path cannot +// be mapped, the empty string is returned. // -func (m *Mapping) ToRelative(path string) string { +func (m *Mapping) ToRelative(fpath string) string { for _, e := range m.list { - if strings.HasPrefix(path, e.path) { + if strings.HasPrefix(fpath, e.path) { + spath := filepath.ToSlash(fpath) // /absolute/prefix/foo -> prefix/foo - return pathutil.Join(e.prefix, path[len(e.path):]) // Join will remove a trailing '/' + return path.Join(e.prefix, spath[len(e.path):]) // Join will remove a trailing '/' } } return "" // no match diff --git a/src/cmd/godoc/utils.go b/src/cmd/godoc/utils.go index cc028cc4d..9517aee7a 100644 --- a/src/cmd/godoc/utils.go +++ b/src/cmd/godoc/utils.go @@ -10,7 +10,7 @@ import ( "io" "io/ioutil" "os" - pathutil "path" + "path/filepath" "sort" "strings" "sync" @@ -60,10 +60,10 @@ func canonicalizePaths(list []string, filter func(path string) bool) []string { continue // ignore empty paths (don't assume ".") } // len(path) > 0: normalize path - if pathutil.IsAbs(path) { - path = pathutil.Clean(path) + if filepath.IsAbs(path) { + path = filepath.Clean(path) } else { - path = pathutil.Join(cwd, path) + path = filepath.Join(cwd, path) } // we have a non-empty absolute path if filter != nil && !filter(path) { @@ -95,7 +95,7 @@ func canonicalizePaths(list []string, filter func(path string) bool) []string { // atomically renames that file to the file named by filename. // func writeFileAtomically(filename string, data []byte) os.Error { - f, err := ioutil.TempFile(cwd, filename) + f, err := ioutil.TempFile(filepath.Split(filename)) if err != nil { return err } @@ -149,7 +149,7 @@ var textExt = map[string]bool{ // func isTextFile(filename string) bool { // if the extension is known, use it for decision making - if isText, found := textExt[pathutil.Ext(filename)]; found { + if isText, found := textExt[filepath.Ext(filename)]; found { return isText } diff --git a/src/cmd/gofmt/gofmt.go b/src/cmd/gofmt/gofmt.go index 41c12b88d..224aee717 100644 --- a/src/cmd/gofmt/gofmt.go +++ b/src/cmd/gofmt/gofmt.go @@ -15,7 +15,7 @@ import ( "go/token" "io/ioutil" "os" - pathutil "path" + "path/filepath" "strings" ) @@ -181,7 +181,7 @@ func walkDir(path string) { done <- true }() // walk the tree - pathutil.Walk(path, v, v) + filepath.Walk(path, v, v) close(v) // terminate error handler loop <-done // wait for all errors to be reported } diff --git a/src/cmd/gofmt/test.sh b/src/cmd/gofmt/test.sh index 2f60a3e7b..3340c48f0 100755 --- a/src/cmd/gofmt/test.sh +++ b/src/cmd/gofmt/test.sh @@ -42,7 +42,7 @@ apply1() { bug163.go | bug166.go | bug169.go | bug217.go | bug222.go | \ bug226.go | bug228.go | bug248.go | bug274.go | bug280.go | \ bug282.go | bug287.go | bug298.go | bug299.go | bug300.go | \ - bug302.go | bug306.go | bug322.go ) return ;; + bug302.go | bug306.go | bug322.go | bug324.go ) return ;; esac # the following directories are skipped because they contain test # cases for syntax errors and thus won't parse in the first place: diff --git a/src/cmd/goinstall/download.go b/src/cmd/goinstall/download.go index 889f9d857..88befc0dc 100644 --- a/src/cmd/goinstall/download.go +++ b/src/cmd/goinstall/download.go @@ -9,7 +9,7 @@ package main import ( "http" "os" - "path" + "path/filepath" "regexp" "strings" ) @@ -42,7 +42,7 @@ func download(pkg string) (string, os.Error) { return "", os.ErrorString("invalid path (contains ..)") } if m := bitbucket.FindStringSubmatch(pkg); m != nil { - if err := vcsCheckout(&hg, root+m[1], "http://"+m[1], m[1]); err != nil { + if err := vcsCheckout(&hg, m[1], "http://"+m[1], m[1]); err != nil { return "", err } return root + pkg, nil @@ -58,7 +58,7 @@ func download(pkg string) (string, os.Error) { // regexp only allows hg, svn to get through panic("missing case in download: " + pkg) } - if err := vcsCheckout(v, root+m[1], "https://"+m[1], m[1]); err != nil { + if err := vcsCheckout(v, m[1], "https://"+m[1], m[1]); err != nil { return "", err } return root + pkg, nil @@ -67,7 +67,7 @@ func download(pkg string) (string, os.Error) { if strings.HasSuffix(m[1], ".git") { return "", os.ErrorString("repository " + pkg + " should not have .git suffix") } - if err := vcsCheckout(&git, root+m[1], "http://"+m[1]+".git", m[1]); err != nil { + if err := vcsCheckout(&git, m[1], "http://"+m[1]+".git", m[1]); err != nil { return "", err } return root + pkg, nil @@ -75,7 +75,7 @@ func download(pkg string) (string, os.Error) { if m := launchpad.FindStringSubmatch(pkg); m != nil { // Either lp.net/[/[/]] // or lp.net/~//[/] - if err := vcsCheckout(&bzr, root+m[1], "https://"+m[1], m[1]); err != nil { + if err := vcsCheckout(&bzr, m[1], "https://"+m[1], m[1]); err != nil { return "", err } return root + pkg, nil @@ -172,17 +172,18 @@ func (v *vcs) updateRepo(dst string) os.Error { // exists and -u was specified on the command line) // the repository at tag/branch "release". If there is no // such tag or branch, it falls back to the repository tip. -func vcsCheckout(vcs *vcs, dst, repo, dashpath string) os.Error { - dir, err := os.Stat(dst + "/" + vcs.metadir) +func vcsCheckout(vcs *vcs, pkgprefix, repo, dashpath string) os.Error { + dst := filepath.Join(root, filepath.FromSlash(pkgprefix)) + dir, err := os.Stat(filepath.Join(dst, vcs.metadir)) if err == nil && !dir.IsDirectory() { return os.ErrorString("not a directory: " + dst) } if err != nil { - parent, _ := path.Split(dst) + parent, _ := filepath.Split(dst) if err := os.MkdirAll(parent, 0777); err != nil { return err } - if err := run("/", nil, vcs.cmd, vcs.clone, repo, dst); err != nil { + if err := run(string(filepath.Separator), nil, vcs.cmd, vcs.clone, repo, dst); err != nil { return err } if err := vcs.updateRepo(dst); err != nil { diff --git a/src/cmd/goinstall/main.go b/src/cmd/goinstall/main.go index f13aeb3bc..34441be45 100644 --- a/src/cmd/goinstall/main.go +++ b/src/cmd/goinstall/main.go @@ -15,7 +15,7 @@ import ( "io" "io/ioutil" "os" - "path" + "path/filepath" "runtime" "strings" ) @@ -34,7 +34,7 @@ var ( parents = make(map[string]string) root = runtime.GOROOT() visit = make(map[string]status) - logfile = path.Join(root, "goinstall.log") + logfile = filepath.Join(root, "goinstall.log") installedPkgs = make(map[string]bool) allpkg = flag.Bool("a", false, "install all previously installed packages") @@ -59,7 +59,7 @@ func main() { fmt.Fprintf(os.Stderr, "%s: no $GOROOT\n", argv0) os.Exit(1) } - root += "/src/pkg/" + root += filepath.FromSlash("/src/pkg/") // special case - "unsafe" is already installed visit["unsafe"] = done @@ -160,7 +160,7 @@ func install(pkg, parent string) { dir = pkg local = true } else if isStandardPath(pkg) { - dir = path.Join(root, pkg) + dir = filepath.Join(root, filepath.FromSlash(pkg)) local = true } else { var err os.Error @@ -216,7 +216,8 @@ func install(pkg, parent string) { // Is this a local path? /foo ./foo ../foo . .. func isLocalPath(s string) bool { - return strings.HasPrefix(s, "/") || strings.HasPrefix(s, "./") || strings.HasPrefix(s, "../") || s == "." || s == ".." + const sep = string(filepath.Separator) + return strings.HasPrefix(s, sep) || strings.HasPrefix(s, "."+sep) || strings.HasPrefix(s, ".."+sep) || s == "." || s == ".." } // Is this a standard package path? strings container/vector etc. diff --git a/src/cmd/goinstall/make.go b/src/cmd/goinstall/make.go index 8d4d6c5d2..e2d99bb47 100644 --- a/src/cmd/goinstall/make.go +++ b/src/cmd/goinstall/make.go @@ -44,6 +44,9 @@ func domake(dir, pkg string, local bool) (err os.Error) { // installing as package pkg. It includes all *.go files in the directory // except those in package main and those ending in _test.go. func makeMakefile(dir, pkg string) ([]byte, os.Error) { + if !safeName(pkg) { + return nil, os.ErrorString("unsafe name: " + pkg) + } dirInfo, err := scanDir(dir, false) if err != nil { return nil, err @@ -58,16 +61,25 @@ func makeMakefile(dir, pkg string) ([]byte, os.Error) { cgoFiles := dirInfo.cgoFiles isCgo := make(map[string]bool, len(cgoFiles)) for _, file := range cgoFiles { + if !safeName(file) { + return nil, os.ErrorString("bad name: " + file) + } isCgo[file] = true } oFiles := make([]string, 0, len(dirInfo.cFiles)) for _, file := range dirInfo.cFiles { + if !safeName(file) { + return nil, os.ErrorString("unsafe name: " + file) + } oFiles = append(oFiles, file[:len(file)-2]+".o") } goFiles := make([]string, 0, len(dirInfo.goFiles)) for _, file := range dirInfo.goFiles { + if !safeName(file) { + return nil, os.ErrorString("unsafe name: " + file) + } if !isCgo[file] { goFiles = append(goFiles, file) } @@ -81,6 +93,17 @@ func makeMakefile(dir, pkg string) ([]byte, os.Error) { return buf.Bytes(), nil } +var safeBytes = []byte("+-./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz") + +func safeName(s string) bool { + for i := 0; i < len(s); i++ { + if c := s[i]; c < 0x80 && bytes.IndexByte(safeBytes, c) < 0 { + return false + } + } + return true +} + // makedata is the data type for the makefileTemplate. type makedata struct { Pkg string // package import path diff --git a/src/cmd/goinstall/parse.go b/src/cmd/goinstall/parse.go index 679edfabc..014b8fcb2 100644 --- a/src/cmd/goinstall/parse.go +++ b/src/cmd/goinstall/parse.go @@ -7,13 +7,13 @@ package main import ( - "path" - "os" - "log" - "strings" - "strconv" "go/ast" "go/parser" + "log" + "os" + "path/filepath" + "strconv" + "strings" ) @@ -64,7 +64,7 @@ func scanDir(dir string, allowMain bool) (info *dirInfo, err os.Error) { if !strings.HasSuffix(d.Name, ".go") || strings.HasSuffix(d.Name, "_test.go") { continue } - filename := path.Join(dir, d.Name) + filename := filepath.Join(dir, d.Name) pf, err := parser.ParseFile(fset, filename, nil, parser.ImportsOnly) if err != nil { return nil, err diff --git a/src/cmd/gopack/ar.c b/src/cmd/gopack/ar.c index 702f104a6..a7e2c41af 100644 --- a/src/cmd/gopack/ar.c +++ b/src/cmd/gopack/ar.c @@ -109,7 +109,7 @@ typedef struct Hashchain /* constants and flags */ char *man = "mrxtdpq"; -char *opt = "uvnbailoS"; +char *opt = "uvnbailogS"; char artemp[] = "/tmp/vXXXXX"; char movtemp[] = "/tmp/v1XXXXX"; char tailtemp[] = "/tmp/v2XXXXX"; diff --git a/src/cmd/gopack/doc.go b/src/cmd/gopack/doc.go index 74c272fd2..08711e72e 100644 --- a/src/cmd/gopack/doc.go +++ b/src/cmd/gopack/doc.go @@ -12,10 +12,12 @@ It adds a special Go-specific section __.PKGDEF that collects all the Go type information from the files in the archive; that section is used by the compiler when importing the package during compilation. -Usage: gopack [uvnbailo][mrxtdpq] archive files ... +Usage: gopack [uvnbailogS][mrxtdpq] archive files ... The new option 'g' causes gopack to maintain the __.PKGDEF section as files are added to the archive. +The new option 'S' forces gopack to mark the archive as safe. + */ package documentation diff --git a/src/cmd/gotest/doc.go b/src/cmd/gotest/doc.go index 40c40fc1f..581eaaab9 100644 --- a/src/cmd/gotest/doc.go +++ b/src/cmd/gotest/doc.go @@ -20,7 +20,7 @@ They should have signature func TestXXX(t *testing.T) { ... } Benchmark functions can be written as well; they will be run only -when the -benchmarks flag is provided. Benchmarks should have +when the -test.bench flag is provided. Benchmarks should have signature func BenchmarkXXX(b *testing.B) { ... } @@ -42,15 +42,15 @@ The resulting binary, called (for amd64) 6.out, has a couple of arguments. Usage: - 6.out [-v] [-match pattern] [-benchmarks pattern] - -The -v flag causes the tests to be logged as they run. The -match -flag causes only those tests whose names match the regular expression -pattern to be run. By default all tests are run silently. If all -the specified test pass, 6.out prints PASS and exits with a 0 exit -code. If any tests fail, it prints FAIL and exits with a non-zero -code. The -benchmarks flag is analogous to the -match flag, but -applies to benchmarks. No benchmarks run by default. + 6.out [-test.v] [-test.run pattern] [-test.bench pattern] + +The -test.v flag causes the tests to be logged as they run. The +-test.run flag causes only those tests whose names match the regular +expression pattern to be run. By default all tests are run silently. +If all the specified test pass, 6.out prints PASS and exits with a 0 +exit code. If any tests fail, it prints FAIL and exits with a +non-zero code. The -test.bench flag is analogous to the -test.run +flag, but applies to benchmarks. No benchmarks run by default. */ package documentation diff --git a/src/cmd/govet/govet.go b/src/cmd/govet/govet.go index 5619b12ba..ff6421de8 100644 --- a/src/cmd/govet/govet.go +++ b/src/cmd/govet/govet.go @@ -15,7 +15,7 @@ import ( "go/parser" "go/token" "os" - "path" + "path/filepath" "strconv" "strings" ) @@ -99,7 +99,7 @@ func doFile(name string, reader io.Reader) { file.checkFile(name, parsedFile) } -// Visitor for path.Walk - trivial. Just calls doFile on each file. +// Visitor for filepath.Walk - trivial. Just calls doFile on each file. // TODO: if govet becomes richer, might want to process // a directory (package) at a time. type V struct{} @@ -124,7 +124,7 @@ func walkDir(root string) { } done <- true }() - path.Walk(root, V{}, errors) + filepath.Walk(root, V{}, errors) close(errors) <-done } diff --git a/src/cmd/goyacc/Makefile b/src/cmd/goyacc/Makefile index 54b8f3360..ac0f427cc 100644 --- a/src/cmd/goyacc/Makefile +++ b/src/cmd/goyacc/Makefile @@ -11,7 +11,7 @@ GOFILES=\ include ../../Make.cmd units: goyacc units.y - ./goyacc units.y + ./goyacc -p units_ units.y $(GC) y.go $(LD) -o units y.$O diff --git a/src/cmd/goyacc/doc.go b/src/cmd/goyacc/doc.go index 686f75745..5dd6abe69 100644 --- a/src/cmd/goyacc/doc.go +++ b/src/cmd/goyacc/doc.go @@ -17,7 +17,8 @@ Yacc adepts will have no trouble adapting to this form of the tool. The file units.y in this directory is a yacc grammar for a version of the Unix tool units, also written in Go and largely transliterated -from the Plan 9 C version. +from the Plan 9 C version. It needs the flag "-p units_" (see +below). The generated parser is reentrant. Parse expects to be given an argument that conforms to the following interface: @@ -31,8 +32,15 @@ Lex should return the token identifier, and place other token information in lval (which replaces the usual yylval). Error is equivalent to yyerror in the original yacc. -Code inside the parser may refer to the variable yylex +Code inside the parser may refer to the variable yylex, which holds the yyLexer passed to Parse. +Multiple grammars compiled into a single program should be placed in +distinct packages. If that is impossible, the "-p prefix" flag to +goyacc sets the prefix, by default yy, that begins the names of +symbols, including types, the parser, and the lexer, generated and +referenced by goyacc's generated code. Setting it to distinct values +allows multiple grammars to be placed in a single package. + */ package documentation diff --git a/src/cmd/goyacc/goyacc.go b/src/cmd/goyacc/goyacc.go index c9fa6bfb9..32816b700 100644 --- a/src/cmd/goyacc/goyacc.go +++ b/src/cmd/goyacc/goyacc.go @@ -153,9 +153,17 @@ var ftable *bufio.Writer // y.go file var fcode = &bytes.Buffer{} // saved code var foutput *bufio.Writer // y.output file -var oflag string // -o [y.go] - y.go file -var vflag string // -v [y.output] - y.output file -var lflag bool // -l - disable line directives +var oflag string // -o [y.go] - y.go file +var vflag string // -v [y.output] - y.output file +var lflag bool // -l - disable line directives +var prefix string // name prefix for identifiers, default yy + +func init() { + flag.StringVar(&oflag, "o", "y.go", "parser output") + flag.StringVar(&prefix, "p", "yy", "name prefix to use in generated code") + flag.StringVar(&vflag, "v", "y.output", "create parsing tables") + flag.BoolVar(&lflag, "l", false, "disable line directives") +} var stacksize = 200 @@ -349,10 +357,6 @@ func setup() { stderr = bufio.NewWriter(os.NewFile(2, "stderr")) foutput = nil - flag.StringVar(&oflag, "o", "", "parser output") - flag.StringVar(&vflag, "v", "", "create parsing tables") - flag.BoolVar(&lflag, "l", false, "disable line directives") - flag.Parse() if flag.NArg() != 1 { usage() @@ -362,6 +366,7 @@ func setup() { fmt.Fprintf(stderr, "yacc: stack size too small\n") usage() } + yaccpar = strings.Replace(yaccpartext, "$$", prefix, -1) openup() defin(0, "$end") @@ -506,20 +511,20 @@ outer: } // put out names of token names - fmt.Fprintf(ftable, "var\tyyToknames\t =[]string {\n") + fmt.Fprintf(ftable, "var\t%sToknames\t =[]string {\n", prefix) for i := TOKSTART; i <= ntokens; i++ { fmt.Fprintf(ftable, "\t\"%v\",\n", tokset[i].name) } fmt.Fprintf(ftable, "}\n") // put out names of state names - fmt.Fprintf(ftable, "var\tyyStatenames\t =[]string {\n") + fmt.Fprintf(ftable, "var\t%sStatenames\t =[]string {\n", prefix) // for i:=TOKSTART; i<=ntokens; i++ { // fmt.Fprintf(ftable, "\t\"%v\",\n", tokset[i].name); // } fmt.Fprintf(ftable, "}\n") - fmt.Fprintf(fcode, "switch yynt {\n") + fmt.Fprintf(fcode, "switch %snt {\n", prefix) moreprod() prdptr[0] = []int{NTBASE, start, 1, 0} @@ -648,8 +653,8 @@ outer: error("default action causes potential type clash") } fmt.Fprintf(fcode, "\ncase %v:", nprod) - fmt.Fprintf(fcode, "\n\tYYVAL.%v = YYS[yypt-0].%v;", - typeset[tempty], typeset[tempty]) + fmt.Fprintf(fcode, "\n\t%sVAL.%v = %sS[%spt-0].%v;", + prefix, typeset[tempty], prefix, prefix, typeset[tempty]) } moreprod() prdptr[nprod] = make([]int, mem) @@ -666,9 +671,9 @@ outer: fmt.Fprintf(fcode, "\n\t}") - fmt.Fprintf(ftable, "const yyEofCode = 1\n") - fmt.Fprintf(ftable, "const yyErrCode = 2\n") - fmt.Fprintf(ftable, "const yyMaxDepth = %v\n", stacksize) + fmt.Fprintf(ftable, "const %sEofCode = 1\n", prefix) + fmt.Fprintf(ftable, "const %sErrCode = 2\n", prefix) + fmt.Fprintf(ftable, "const %sMaxDepth = %v\n", prefix, stacksize) // // copy any postfix code @@ -1034,7 +1039,7 @@ func cpyunion() { if !lflag { fmt.Fprintf(ftable, "\n//line %v:%v\n", infile, lineno) } - fmt.Fprintf(ftable, "type\tyySymType\tstruct") + fmt.Fprintf(ftable, "type\t%sSymType\tstruct", prefix) level := 0 @@ -1197,7 +1202,7 @@ loop: c = getrune(finput) } if c == '$' { - fmt.Fprintf(fcode, "YYVAL") + fmt.Fprintf(fcode, "%sVAL", prefix) // put out the proper tag... if ntypes != 0 { @@ -1258,7 +1263,7 @@ loop: ungetrune(finput, c) continue loop } - fmt.Fprintf(fcode, "YYS[yypt-%v]", max-j-1) + fmt.Fprintf(fcode, "%sS[%spt-%v]", prefix, prefix, max-j-1) // put out the proper tag if ntypes != 0 { @@ -2067,7 +2072,7 @@ func output() { var c, u, v int fmt.Fprintf(ftable, "\n//line yacctab:1\n") - fmt.Fprintf(ftable, "var\tyyExca = []int {\n") + fmt.Fprintf(ftable, "var\t%sExca = []int {\n", prefix) noset := mkset() @@ -2140,10 +2145,10 @@ func output() { } fmt.Fprintf(ftable, "}\n") - fmt.Fprintf(ftable, "const\tyyNprod\t= %v\n", nprod) - fmt.Fprintf(ftable, "const\tyyPrivate\t= %v\n", PRIVATE) - fmt.Fprintf(ftable, "var\tyyTokenNames []string\n") - fmt.Fprintf(ftable, "var\tyyStates []string\n") + fmt.Fprintf(ftable, "const\t%sNprod\t= %v\n", prefix, nprod) + fmt.Fprintf(ftable, "const\t%sPrivate\t= %v\n", prefix, PRIVATE) + fmt.Fprintf(ftable, "var\t%sTokenNames []string\n", prefix) + fmt.Fprintf(ftable, "var\t%sStates []string\n", prefix) } // @@ -2718,10 +2723,10 @@ nextn: // write out the optimized parser // func aoutput() { - fmt.Fprintf(ftable, "const\tyyLast\t= %v\n", maxa+1) - arout("yyAct", amem, maxa+1) - arout("yyPact", indgo, nstate) - arout("yyPgo", pgo, nnonter+1) + fmt.Fprintf(ftable, "const\t%sLast\t= %v\n", prefix, maxa+1) + arout("Act", amem, maxa+1) + arout("Pact", indgo, nstate) + arout("Pgo", pgo, nnonter+1) } // @@ -2730,7 +2735,7 @@ func aoutput() { func others() { var i, j int - arout("yyR1", levprd, nprod) + arout("R1", levprd, nprod) aryfil(temp1, nprod, 0) // @@ -2739,7 +2744,7 @@ func others() { for i = 1; i < nprod; i++ { temp1[i] = len(prdptr[i]) - 2 } - arout("yyR2", temp1, nprod) + arout("R2", temp1, nprod) aryfil(temp1, nstate, -1000) for i = 0; i <= ntokens; i++ { @@ -2752,8 +2757,8 @@ func others() { temp1[j] = -i } } - arout("yyChk", temp1, nstate) - arout("yyDef", defact, nstate) + arout("Chk", temp1, nstate) + arout("Def", defact, nstate) // put out token translation tables // table 1 has 0-256 @@ -2778,7 +2783,7 @@ func others() { temp1[i] = YYLEXUNK } } - arout("yyTok1", temp1, c+1) + arout("Tok1", temp1, c+1) // table 2 has PRIVATE-PRIVATE+256 aryfil(temp1, 256, 0) @@ -2797,10 +2802,10 @@ func others() { } } } - arout("yyTok2", temp1, c+1) + arout("Tok2", temp1, c+1) // table 3 has everything else - fmt.Fprintf(ftable, "var\tyyTok3\t= []int {\n") + fmt.Fprintf(ftable, "var\t%sTok3\t= []int {\n", prefix) c = 0 for i = 1; i <= ntokens; i++ { j = tokset[i].value @@ -2829,13 +2834,14 @@ func others() { // copy yaccpar fmt.Fprintf(ftable, "\n//line yaccpar:1\n") - parts := strings.Split(yaccpar, "yyrun()", 2) + parts := strings.Split(yaccpar, prefix+"run()", 2) fmt.Fprintf(ftable, "%v", parts[0]) ftable.Write(fcode.Bytes()) fmt.Fprintf(ftable, "%v", parts[1]) } func arout(s string, v []int, n int) { + s = prefix + s fmt.Fprintf(ftable, "var\t%v\t= []int {\n", s) for i := 0; i < n; i++ { if i%10 == 0 { @@ -3076,86 +3082,84 @@ func exit(status int) { os.Exit(status) } -var yaccpar = ` +var yaccpar string // will be processed version of yaccpartext: s/$$/prefix/g +var yaccpartext = ` /* parser for yacc output */ -var yyDebug = 0 +var $$Debug = 0 -type yyLexer interface { - Lex(lval *yySymType) int +type $$Lexer interface { + Lex(lval *$$SymType) int Error(s string) } -const yyFlag = -1000 +const $$Flag = -1000 -func yyTokname(yyc int) string { - if yyc > 0 && yyc <= len(yyToknames) { - if yyToknames[yyc-1] != "" { - return yyToknames[yyc-1] +func $$Tokname(c int) string { + if c > 0 && c <= len($$Toknames) { + if $$Toknames[c-1] != "" { + return $$Toknames[c-1] } } - return fmt.Sprintf("tok-%v", yyc) + return fmt.Sprintf("tok-%v", c) } -func yyStatname(yys int) string { - if yys >= 0 && yys < len(yyStatenames) { - if yyStatenames[yys] != "" { - return yyStatenames[yys] +func $$Statname(s int) string { + if s >= 0 && s < len($$Statenames) { + if $$Statenames[s] != "" { + return $$Statenames[s] } } - return fmt.Sprintf("state-%v", yys) + return fmt.Sprintf("state-%v", s) } -func yylex1(yylex yyLexer, lval *yySymType) int { - var yychar int - var c int - - yychar = yylex.Lex(lval) - if yychar <= 0 { - c = yyTok1[0] +func $$lex1(lex $$Lexer, lval *$$SymType) int { + c := 0 + char := lex.Lex(lval) + if char <= 0 { + c = $$Tok1[0] goto out } - if yychar < len(yyTok1) { - c = yyTok1[yychar] + if char < len($$Tok1) { + c = $$Tok1[char] goto out } - if yychar >= yyPrivate { - if yychar < yyPrivate+len(yyTok2) { - c = yyTok2[yychar-yyPrivate] + if char >= $$Private { + if char < $$Private+len($$Tok2) { + c = $$Tok2[char-$$Private] goto out } } - for i := 0; i < len(yyTok3); i += 2 { - c = yyTok3[i+0] - if c == yychar { - c = yyTok3[i+1] + for i := 0; i < len($$Tok3); i += 2 { + c = $$Tok3[i+0] + if c == char { + c = $$Tok3[i+1] goto out } } - c = 0 out: if c == 0 { - c = yyTok2[1] /* unknown char */ + c = $$Tok2[1] /* unknown char */ } - if yyDebug >= 3 { - fmt.Printf("lex %U %s\n", uint(yychar), yyTokname(c)) + if $$Debug >= 3 { + fmt.Printf("lex %U %s\n", uint(char), $$Tokname(c)) } return c } -func yyParse(yylex yyLexer) int { - var yyn int - var yylval yySymType - var YYVAL yySymType - YYS := make([]yySymType, yyMaxDepth) +func $$Parse($$lex $$Lexer) int { + var $$n int + var $$lval $$SymType + var $$VAL $$SymType + $$S := make([]$$SymType, $$MaxDepth) Nerrs := 0 /* number of errors */ Errflag := 0 /* error recovery flag */ - yystate := 0 - yychar := -1 - yyp := -1 - goto yystack + $$state := 0 + $$char := -1 + $$p := -1 + goto $$stack ret0: return 0 @@ -3163,80 +3167,80 @@ ret0: ret1: return 1 -yystack: +$$stack: /* put a state and value onto the stack */ - if yyDebug >= 4 { - fmt.Printf("char %v in %v\n", yyTokname(yychar), yyStatname(yystate)) + if $$Debug >= 4 { + fmt.Printf("char %v in %v\n", $$Tokname($$char), $$Statname($$state)) } - yyp++ - if yyp >= len(YYS) { - nyys := make([]yySymType, len(YYS)*2) - copy(nyys, YYS) - YYS = nyys + $$p++ + if $$p >= len($$S) { + nyys := make([]$$SymType, len($$S)*2) + copy(nyys, $$S) + $$S = nyys } - YYS[yyp] = YYVAL - YYS[yyp].yys = yystate + $$S[$$p] = $$VAL + $$S[$$p].yys = $$state -yynewstate: - yyn = yyPact[yystate] - if yyn <= yyFlag { - goto yydefault /* simple state */ +$$newstate: + $$n = $$Pact[$$state] + if $$n <= $$Flag { + goto $$default /* simple state */ } - if yychar < 0 { - yychar = yylex1(yylex, &yylval) + if $$char < 0 { + $$char = $$lex1($$lex, &$$lval) } - yyn += yychar - if yyn < 0 || yyn >= yyLast { - goto yydefault + $$n += $$char + if $$n < 0 || $$n >= $$Last { + goto $$default } - yyn = yyAct[yyn] - if yyChk[yyn] == yychar { /* valid shift */ - yychar = -1 - YYVAL = yylval - yystate = yyn + $$n = $$Act[$$n] + if $$Chk[$$n] == $$char { /* valid shift */ + $$char = -1 + $$VAL = $$lval + $$state = $$n if Errflag > 0 { Errflag-- } - goto yystack + goto $$stack } -yydefault: +$$default: /* default state action */ - yyn = yyDef[yystate] - if yyn == -2 { - if yychar < 0 { - yychar = yylex1(yylex, &yylval) + $$n = $$Def[$$state] + if $$n == -2 { + if $$char < 0 { + $$char = $$lex1($$lex, &$$lval) } /* look through exception table */ - yyxi := 0 + xi := 0 for { - if yyExca[yyxi+0] == -1 && yyExca[yyxi+1] == yystate { + if $$Exca[xi+0] == -1 && $$Exca[xi+1] == $$state { break } - yyxi += 2 + xi += 2 } - for yyxi += 2; ; yyxi += 2 { - yyn = yyExca[yyxi+0] - if yyn < 0 || yyn == yychar { + for xi += 2; ; xi += 2 { + $$n = $$Exca[xi+0] + if $$n < 0 || $$n == $$char { break } } - yyn = yyExca[yyxi+1] - if yyn < 0 { + $$n = $$Exca[xi+1] + if $$n < 0 { goto ret0 } } - if yyn == 0 { + if $$n == 0 { /* error ... attempt to resume parsing */ switch Errflag { case 0: /* brand new error */ - yylex.Error("syntax error") + $$lex.Error("syntax error") Nerrs++ - if yyDebug >= 1 { - fmt.Printf("%s", yyStatname(yystate)) - fmt.Printf("saw %s\n", yyTokname(yychar)) + if $$Debug >= 1 { + fmt.Printf("%s", $$Statname($$state)) + fmt.Printf("saw %s\n", $$Tokname($$char)) } fallthrough @@ -3244,64 +3248,64 @@ yydefault: Errflag = 3 /* find a state where "error" is a legal shift action */ - for yyp >= 0 { - yyn = yyPact[YYS[yyp].yys] + yyErrCode - if yyn >= 0 && yyn < yyLast { - yystate = yyAct[yyn] /* simulate a shift of "error" */ - if yyChk[yystate] == yyErrCode { - goto yystack + for $$p >= 0 { + $$n = $$Pact[$$S[$$p].yys] + $$ErrCode + if $$n >= 0 && $$n < $$Last { + $$state = $$Act[$$n] /* simulate a shift of "error" */ + if $$Chk[$$state] == $$ErrCode { + goto $$stack } } - /* the current yyp has no shift onn "error", pop stack */ - if yyDebug >= 2 { + /* the current p has no shift onn "error", pop stack */ + if $$Debug >= 2 { fmt.Printf("error recovery pops state %d, uncovers %d\n", - YYS[yyp].yys, YYS[yyp-1].yys) + $$S[$$p].yys, $$S[$$p-1].yys) } - yyp-- + $$p-- } /* there is no state on the stack with an error shift ... abort */ goto ret1 case 3: /* no shift yet; clobber input char */ - if yyDebug >= 2 { - fmt.Printf("error recovery discards %s\n", yyTokname(yychar)) + if $$Debug >= 2 { + fmt.Printf("error recovery discards %s\n", $$Tokname($$char)) } - if yychar == yyEofCode { + if $$char == $$EofCode { goto ret1 } - yychar = -1 - goto yynewstate /* try again in the same state */ + $$char = -1 + goto $$newstate /* try again in the same state */ } } - /* reduction by production yyn */ - if yyDebug >= 2 { - fmt.Printf("reduce %v in:\n\t%v\n", yyn, yyStatname(yystate)) + /* reduction by production $$n */ + if $$Debug >= 2 { + fmt.Printf("reduce %v in:\n\t%v\n", $$n, $$Statname($$state)) } - yynt := yyn - yypt := yyp - _ = yypt // guard against "declared and not used" + $$nt := $$n + $$pt := $$p + _ = $$pt // guard against "declared and not used" - yyp -= yyR2[yyn] - YYVAL = YYS[yyp+1] + $$p -= $$R2[$$n] + $$VAL = $$S[$$p+1] /* consult goto table to find next state */ - yyn = yyR1[yyn] - yyg := yyPgo[yyn] - yyj := yyg + YYS[yyp].yys + 1 + $$n = $$R1[$$n] + $$g := $$Pgo[$$n] + $$j := $$g + $$S[$$p].yys + 1 - if yyj >= yyLast { - yystate = yyAct[yyg] + if $$j >= $$Last { + $$state = $$Act[$$g] } else { - yystate = yyAct[yyj] - if yyChk[yystate] != -yyn { - yystate = yyAct[yyg] + $$state = $$Act[$$j] + if $$Chk[$$state] != -$$n { + $$state = $$Act[$$g] } } // dummy call; replaced with literal code - yyrun() - goto yystack /* stack new state and value */ + $$run() + goto $$stack /* stack new state and value */ } ` diff --git a/src/cmd/goyacc/units.y b/src/cmd/goyacc/units.y index a7d472fc6..5d3f9aca2 100644 --- a/src/cmd/goyacc/units.y +++ b/src/cmd/goyacc/units.y @@ -6,6 +6,9 @@ // Distributed under the terms of the Lucent Public License Version 1.02 // See http://plan9.bell-labs.com/plan9/license.html +// Generate parser with prefix "units_": +// goyacc -p "units_" + %{ // units.y @@ -215,7 +218,7 @@ expr0: type UnitsLex int -func (UnitsLex) Lex(yylval *yySymType) int { +func (UnitsLex) Lex(yylval *units_SymType) int { var c, i int c = peekrune @@ -319,7 +322,7 @@ func main() { continue } peekrune = ':' - yyParse(UnitsLex(0)) + units_Parse(UnitsLex(0)) } /* @@ -340,7 +343,7 @@ func main() { } peekrune = '?' nerrors = 0 - yyParse(UnitsLex(0)) + units_Parse(UnitsLex(0)) if nerrors != 0 { continue } diff --git a/src/cmd/hgpatch/main.go b/src/cmd/hgpatch/main.go index bd4b563f9..2dcb5234c 100644 --- a/src/cmd/hgpatch/main.go +++ b/src/cmd/hgpatch/main.go @@ -14,7 +14,7 @@ import ( "io/ioutil" "os" "patch" - "path" + "path/filepath" "sort" "strings" ) @@ -186,7 +186,7 @@ func main() { // make parent directory for name, if necessary func makeParent(name string) { - parent, _ := path.Split(name) + parent, _ := filepath.Split(name) chk(mkdirAll(parent, 0755)) } diff --git a/src/cmd/ld/data.c b/src/cmd/ld/data.c index 0551232cf..a20b057ce 100644 --- a/src/cmd/ld/data.c +++ b/src/cmd/ld/data.c @@ -241,7 +241,7 @@ dynrelocsym(Sym *s) { Reloc *r; - if(thechar == '8' && HEADTYPE == 10) { // Windows PE + if(HEADTYPE == Hwindows) { Sym *rel, *targ; rel = lookup(".rel", 0); @@ -898,9 +898,9 @@ address(void) segdata.rwx = 06; segdata.vaddr = va; segdata.fileoff = va - segtext.vaddr + segtext.fileoff; - if((thechar == '6' || thechar == '8') && HEADTYPE == 10) // Windows PE + if(HEADTYPE == Hwindows) segdata.fileoff = segtext.fileoff + rnd(segtext.len, PEFILEALIGN); - if(thechar == '8' && HEADTYPE == 2) { // Plan 9 + if(HEADTYPE == Hplan9x32) { segdata.vaddr = va = rnd(va, 4096); segdata.fileoff = segtext.fileoff + segtext.filelen; } diff --git a/src/cmd/ld/dwarf.c b/src/cmd/ld/dwarf.c index 5df3515f5..5ba4b7c64 100644 --- a/src/cmd/ld/dwarf.c +++ b/src/cmd/ld/dwarf.c @@ -772,6 +772,9 @@ enum { KindUnsafePointer, KindNoPointers = 1<<7, + + // size of Type interface header + CommonType structure. + CommonSize = 2*PtrSize+ 4*PtrSize + 8, }; static Reloc* @@ -849,59 +852,59 @@ decodetype_size(Sym *s) static Sym* decodetype_arrayelem(Sym *s) { - return decode_reloc_sym(s, 5*PtrSize + 8); // 0x1c / 0x30 + return decode_reloc_sym(s, CommonSize); // 0x1c / 0x30 } static vlong decodetype_arraylen(Sym *s) { - return decode_inuxi(s->p + 6*PtrSize + 8, PtrSize); + return decode_inuxi(s->p + CommonSize+PtrSize, PtrSize); } // Type.PtrType.elem static Sym* decodetype_ptrelem(Sym *s) { - return decode_reloc_sym(s, 5*PtrSize + 8); // 0x1c / 0x30 + return decode_reloc_sym(s, CommonSize); // 0x1c / 0x30 } // Type.MapType.key, elem static Sym* decodetype_mapkey(Sym *s) { - return decode_reloc_sym(s, 5*PtrSize + 8); // 0x1c / 0x30 + return decode_reloc_sym(s, CommonSize); // 0x1c / 0x30 } static Sym* decodetype_mapvalue(Sym *s) { - return decode_reloc_sym(s, 6*PtrSize + 8); // 0x20 / 0x38 + return decode_reloc_sym(s, CommonSize+PtrSize); // 0x20 / 0x38 } // Type.ChanType.elem static Sym* decodetype_chanelem(Sym *s) { - return decode_reloc_sym(s, 5*PtrSize + 8); // 0x1c / 0x30 + return decode_reloc_sym(s, CommonSize); // 0x1c / 0x30 } // Type.FuncType.dotdotdot static int decodetype_funcdotdotdot(Sym *s) { - return s->p[5*PtrSize + 8]; + return s->p[CommonSize]; } // Type.FuncType.in.len static int decodetype_funcincount(Sym *s) { - return decode_inuxi(s->p + 7*PtrSize + 8, 4); + return decode_inuxi(s->p + CommonSize+2*PtrSize, 4); } static int decodetype_funcoutcount(Sym *s) { - return decode_inuxi(s->p + 8*PtrSize + 16, 4); + return decode_inuxi(s->p + CommonSize+3*PtrSize + 2*4, 4); } static Sym* @@ -909,7 +912,7 @@ decodetype_funcintype(Sym *s, int i) { Reloc *r; - r = decode_reloc(s, 6*PtrSize + 8); + r = decode_reloc(s, CommonSize + PtrSize); if (r == nil) return nil; return decode_reloc_sym(r->sym, r->add + i * PtrSize); @@ -920,7 +923,7 @@ decodetype_funcouttype(Sym *s, int i) { Reloc *r; - r = decode_reloc(s, 7*PtrSize + 16); + r = decode_reloc(s, CommonSize + 2*PtrSize + 2*4); if (r == nil) return nil; return decode_reloc_sym(r->sym, r->add + i * PtrSize); @@ -930,15 +933,18 @@ decodetype_funcouttype(Sym *s, int i) static int decodetype_structfieldcount(Sym *s) { - return decode_inuxi(s->p + 6*PtrSize + 8, 4); // 0x20 / 0x38 + return decode_inuxi(s->p + CommonSize + PtrSize, 4); } -// Type.StructType.fields[]-> name, typ and offset. sizeof(structField) = 5*PtrSize +enum { + StructFieldSize = 5*PtrSize +}; +// Type.StructType.fields[]-> name, typ and offset. static char* decodetype_structfieldname(Sym *s, int i) { // go.string."foo" 0x28 / 0x40 - s = decode_reloc_sym(s, 6*PtrSize + 0x10 + i*5*PtrSize); + s = decode_reloc_sym(s, CommonSize + PtrSize + 2*4 + i*StructFieldSize); if (s == nil) // embedded structs have a nil name. return nil; s = decode_reloc_sym(s, 0); // string."foo" @@ -950,20 +956,20 @@ decodetype_structfieldname(Sym *s, int i) static Sym* decodetype_structfieldtype(Sym *s, int i) { - return decode_reloc_sym(s, 8*PtrSize + 0x10 + i*5*PtrSize); // 0x30 / 0x50 + return decode_reloc_sym(s, CommonSize + PtrSize + 2*4 + i*StructFieldSize + 2*PtrSize); } static vlong decodetype_structfieldoffs(Sym *s, int i) { - return decode_inuxi(s->p + 10*PtrSize + 0x10 + i*5*PtrSize, 4); // 0x38 / 0x60 + return decode_inuxi(s->p + CommonSize + PtrSize + 2*4 + i*StructFieldSize + 4*PtrSize, 4); } // InterfaceTYpe.methods.len static vlong decodetype_ifacemethodcount(Sym *s) { - return decode_inuxi(s->p + 6*PtrSize + 8, 4); + return decode_inuxi(s->p + CommonSize + PtrSize, 4); } @@ -2302,7 +2308,7 @@ writegdbscript(void) static void align(vlong size) { - if((thechar == '6' || thechar == '8') && HEADTYPE == 10) // Only Windows PE need section align. + if(HEADTYPE == Hwindows) // Only Windows PE need section align. strnput("", rnd(size, PEFILEALIGN) - size); } diff --git a/src/cmd/ld/go.c b/src/cmd/ld/go.c index 2c6a6d084..3c1e230b4 100644 --- a/src/cmd/ld/go.c +++ b/src/cmd/ld/go.c @@ -550,6 +550,8 @@ mark(Sym *s) if(s == S || s->reachable) return; + if(strncmp(s->name, "weak.", 5) == 0) + return; s->reachable = 1; if(s->text) marktext(s); @@ -654,6 +656,37 @@ deadcode(void) textp = nil; else last->next = nil; + + for(i=0; ihash) + if(strncmp(s->name, "weak.", 5) == 0) { + s->special = 1; // do not lay out in data segment + s->reachable = 1; + } +} + +void +doweak(void) +{ + int i; + Sym *s, *t; + + // resolve weak references only if + // target symbol will be in binary anyway. + for(i=0; ihash) { + if(strncmp(s->name, "weak.", 5) == 0) { + t = lookup(s->name+5, s->version); + if(t->type != 0 && t->reachable) { + s->value = t->value; + s->type = t->type; + } else { + s->type = SCONST; + s->value = 0; + } + continue; + } + } } void diff --git a/src/cmd/ld/lib.c b/src/cmd/ld/lib.c index c144d4295..e645502b3 100644 --- a/src/cmd/ld/lib.c +++ b/src/cmd/ld/lib.c @@ -31,6 +31,8 @@ #include "l.h" #include "lib.h" +#include "../../pkg/runtime/stack.h" + #include int iconv(Fmt*); @@ -1084,3 +1086,208 @@ be64(uchar *b) Endian be = { be16, be32, be64 }; Endian le = { le16, le32, le64 }; + +typedef struct Chain Chain; +struct Chain +{ + Sym *sym; + Chain *up; + int limit; // limit on entry to sym +}; + +static int stkcheck(Chain*, int); +static void stkprint(Chain*, int); +static void stkbroke(Chain*, int); +static Sym *morestack; +static Sym *newstack; + +enum +{ + HasLinkRegister = (thechar == '5'), + CallSize = (!HasLinkRegister)*PtrSize, // bytes of stack required for a call +}; + +void +dostkcheck(void) +{ + Chain ch; + Sym *s; + + morestack = lookup("runtime.morestack", 0); + newstack = lookup("runtime.newstack", 0); + + // First the nosplits on their own. + for(s = textp; s != nil; s = s->next) { + if(s->text == nil || s->text->link == nil || (s->text->textflag & NOSPLIT) == 0) + continue; + cursym = s; + ch.up = nil; + ch.sym = s; + ch.limit = StackLimit - CallSize; + stkcheck(&ch, 0); + s->stkcheck = 1; + } + + // Check calling contexts. + // Some nosplits get called a little further down, + // like newproc and deferproc. We could hard-code + // that knowledge but it's more robust to look at + // the actual call sites. + for(s = textp; s != nil; s = s->next) { + if(s->text == nil || s->text->link == nil || (s->text->textflag & NOSPLIT) != 0) + continue; + cursym = s; + ch.up = nil; + ch.sym = s; + ch.limit = StackLimit - CallSize; + stkcheck(&ch, 0); + } +} + +static int +stkcheck(Chain *up, int depth) +{ + Chain ch, ch1; + Prog *p; + Sym *s; + int limit, prolog; + + limit = up->limit; + s = up->sym; + p = s->text; + + // Small optimization: don't repeat work at top. + if(s->stkcheck && limit == StackLimit-CallSize) + return 0; + + if(depth > 100) { + diag("nosplit stack check too deep"); + stkbroke(up, 0); + return -1; + } + + if(p == nil || p->link == nil) { + // external function. + // should never be called directly. + // only diagnose the direct caller. + if(depth == 1) + diag("call to external function %s", s->name); + return -1; + } + + if(limit < 0) { + stkbroke(up, limit); + return -1; + } + + // morestack looks like it calls functions, + // but it switches the stack pointer first. + if(s == morestack) + return 0; + + ch.up = up; + prolog = (s->text->textflag & NOSPLIT) == 0; + for(p = s->text; p != P; p = p->link) { + limit -= p->spadj; + if(prolog && p->spadj != 0) { + // The first stack adjustment in a function with a + // split-checking prologue marks the end of the + // prologue. Assuming the split check is correct, + // after the adjustment there should still be at least + // StackLimit bytes available below the stack pointer. + // If this is not the top call in the chain, no need + // to duplicate effort, so just stop. + if(depth > 0) + return 0; + prolog = 0; + limit = StackLimit; + } + if(limit < 0) { + stkbroke(up, limit); + return -1; + } + if(iscall(p)) { + limit -= CallSize; + ch.limit = limit; + if(p->to.type == D_BRANCH) { + // Direct call. + ch.sym = p->to.sym; + if(stkcheck(&ch, depth+1) < 0) + return -1; + } else { + // Indirect call. Assume it is a splitting function, + // so we have to make sure it can call morestack. + limit -= CallSize; + ch.sym = nil; + ch1.limit = limit; + ch1.up = &ch; + ch1.sym = morestack; + if(stkcheck(&ch1, depth+2) < 0) + return -1; + limit += CallSize; + } + limit += CallSize; + } + + } + return 0; +} + +static void +stkbroke(Chain *ch, int limit) +{ + diag("nosplit stack overflow"); + stkprint(ch, limit); +} + +static void +stkprint(Chain *ch, int limit) +{ + char *name; + + if(ch->sym) + name = ch->sym->name; + else + name = "function pointer"; + + if(ch->up == nil) { + // top of chain. ch->sym != nil. + if(ch->sym->text->textflag & NOSPLIT) + print("\t%d\tassumed on entry to %s\n", ch->limit, name); + else + print("\t%d\tguaranteed after split check in %s\n", ch->limit, name); + } else { + stkprint(ch->up, ch->limit + (!HasLinkRegister)*PtrSize); + if(!HasLinkRegister) + print("\t%d\ton entry to %s\n", ch->limit, name); + } + if(ch->limit != limit) + print("\t%d\tafter %s uses %d\n", limit, name, ch->limit - limit); +} + +int +headtype(char *name) +{ + int i; + + for(i=0; headers[i].name; i++) + if(strcmp(name, headers[i].name) == 0) { + headstring = headers[i].name; + return headers[i].val; + } + fprint(2, "unknown header type -H %s\n", name); + errorexit(); + return -1; // not reached +} + +void +undef(void) +{ + int i; + Sym *s; + + for(i=0; ihash) + if(s->type == SXREF) + diag("%s(%d): not defined", s->name, s->version); +} diff --git a/src/cmd/ld/lib.h b/src/cmd/ld/lib.h index 16dfb0dc3..adde2c9ff 100644 --- a/src/cmd/ld/lib.h +++ b/src/cmd/ld/lib.h @@ -74,7 +74,6 @@ extern int nlibdir; extern int cout; EXTERN char* INITENTRY; -EXTERN char thechar; EXTERN char* thestring; EXTERN Library* library; EXTERN int libraryp; @@ -167,6 +166,9 @@ void adddynlib(char*); int archreloc(Reloc*, Sym*, vlong*); void adddynsym(Sym*); void addexport(void); +void dostkcheck(void); +void undef(void); +void doweak(void); int pathchar(void); void* mal(uint32); @@ -208,3 +210,36 @@ enum { ArchiveObj, Pkgdef }; + +/* executable header types */ +enum { + Hgarbunix = 0, // garbage unix + Hnoheader, // no header + Hunixcoff, // unix coff + Hrisc, // aif for risc os + Hplan9x32, // plan 9 32-bit format + Hplan9x64, // plan 9 64-bit format + Hmsdoscom, // MS-DOS .COM + Hnetbsd, // NetBSD + Hmsdosexe, // fake MS-DOS .EXE + Hixp1200, // IXP1200 (raw) + Helf, // ELF32 + Hipaq, // ipaq + Hdarwin, // Apple Mach-O + Hlinux, // Linux ELF + Hnacl, // Google Native Client + Hfreebsd, // FreeBSD ELF + Hwindows, // MS Windows PE + Htiny // tiny (os image) +}; + +typedef struct Header Header; +struct Header { + char *name; + int val; +}; + +EXTERN char* headstring; +extern Header headers[]; + +int headtype(char*); diff --git a/src/cmd/ld/macho.c b/src/cmd/ld/macho.c index 402e0ec63..c8d7c4a6d 100644 --- a/src/cmd/ld/macho.c +++ b/src/cmd/ld/macho.c @@ -276,7 +276,6 @@ asmbmacho(void) vlong v, w; vlong va; int a, i; - char *pkgroot; MachoHdr *mh; MachoSect *msect; MachoSeg *ms; @@ -428,12 +427,6 @@ asmbmacho(void) ml->data[0] = 12; /* offset to string */ strcpy((char*)&ml->data[1], "/usr/lib/dyld"); - if(ndylib > 0) { /* add reference to where .so files are installed */ - pkgroot = smprint("%s/pkg/%s_%s", goroot, goos, goarch); - ml = newMachoLoad(0x80000000 | 0x1c, 1+(strlen(pkgroot)+1+7)/8*2); /* LC_RPATH */ - ml->data[0] = 12; /* offset of string from beginning of load */ - strcpy((char*)&ml->data[1], pkgroot); - } for(i=0; idata[0] = 24; /* offset of string from beginning of load */ diff --git a/src/cmd/ld/pe.c b/src/cmd/ld/pe.c index 2c34daab4..e72b0b2a0 100644 --- a/src/cmd/ld/pe.c +++ b/src/cmd/ld/pe.c @@ -500,6 +500,7 @@ asmbpe(void) IMAGE_FILE_EXECUTABLE_IMAGE|IMAGE_FILE_DEBUG_STRIPPED; if (pe64) { fh.SizeOfOptionalHeader = sizeof(oh64); + fh.Characteristics |= IMAGE_FILE_LARGE_ADDRESS_AWARE; set(Magic, 0x20b); // PE32+ } else { fh.SizeOfOptionalHeader = sizeof(oh); @@ -525,8 +526,11 @@ asmbpe(void) set(MinorSubsystemVersion, 0); set(SizeOfImage, nextsectoff); set(SizeOfHeaders, PEFILEHEADR); - set(Subsystem, 3); // WINDOWS_CUI - set(SizeOfStackReserve, 0x00200000); + if(strcmp(headstring, "windowsgui") == 0) + set(Subsystem, IMAGE_SUBSYSTEM_WINDOWS_GUI); + else + set(Subsystem, IMAGE_SUBSYSTEM_WINDOWS_CUI); + set(SizeOfStackReserve, 0x0040000); set(SizeOfStackCommit, 0x00001000); set(SizeOfHeapReserve, 0x00100000); set(SizeOfHeapCommit, 0x00001000); diff --git a/src/cmd/ld/pe.h b/src/cmd/ld/pe.h index 6dbf6a5be..2180fb88c 100644 --- a/src/cmd/ld/pe.h +++ b/src/cmd/ld/pe.h @@ -131,6 +131,9 @@ enum { IMAGE_DIRECTORY_ENTRY_IAT = 12, IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT = 13, IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR = 14, + + IMAGE_SUBSYSTEM_WINDOWS_GUI = 2, + IMAGE_SUBSYSTEM_WINDOWS_CUI = 3, }; void peinit(void); diff --git a/src/cmd/make.bash b/src/cmd/make.bash deleted file mode 100755 index 63da74625..000000000 --- a/src/cmd/make.bash +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2009 The Go Authors. All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -set -e - -bash clean.bash - -eval $(gomake --no-print-directory -f ../Make.inc go-env) -if [ -z "$O" ]; then - echo 'missing $O - maybe no Make.$GOARCH?' 1>&2 - exit 1 -fi - -cd ${O}l -bash mkenam -gomake enam.o -cd .. - -# Note: commands written in Go are not listed here. -# They are in ../pkg/Makefile so that they can be built -# after the Go libraries on which they depend. -for i in cc ${O}l ${O}a ${O}c gc ${O}g cov godefs gopack gotest nm prof -do - echo; echo; echo %%%% making $i %%%%; echo - cd $i - gomake install - cd .. -done diff --git a/src/env.bash b/src/env.bash index 4fc762821..c1055d561 100644 --- a/src/env.bash +++ b/src/env.bash @@ -29,7 +29,7 @@ if [ ! -d "$GOBIN" -a "$GOBIN" != "$GOROOT/bin" ]; then fi export OLDPATH=$PATH -export PATH=/bin:/usr/bin:"$GOBIN":$PATH +export PATH="$GOBIN":/bin:/usr/bin:$PATH MAKE=make if ! make --version 2>/dev/null | grep 'GNU Make' >/dev/null; then diff --git a/src/make.bash b/src/make.bash index 43c70a87b..d9ca40d42 100755 --- a/src/make.bash +++ b/src/make.bash @@ -61,29 +61,8 @@ bash "$GOROOT"/src/clean.bash # pkg builds libcgo and the Go programs in cmd. for i in lib9 libbio libmach cmd pkg do - case "$i-$GOOS-$GOARCH" in - cmd/*-nacl-*) - ;; - *) - # The ( ) here are to preserve the current directory - # for the next round despite the cd $i below. - # set -e does not apply to ( ) so we must explicitly - # test the exit status. - ( - echo; echo; echo %%%% making $i %%%%; echo - cd "$GOROOT"/src/$i - case $i in - cmd) - bash make.bash - ;; - pkg) - gomake install - ;; - *) - gomake install - esac - ) || exit 1 - esac + echo; echo; echo %%%% making $i %%%%; echo + gomake -C $i install done # Print post-install messages. diff --git a/src/pkg/Makefile b/src/pkg/Makefile index 619167ca4..6e70690d1 100644 --- a/src/pkg/Makefile +++ b/src/pkg/Makefile @@ -21,8 +21,10 @@ DIRS=\ bufio\ bytes\ cmath\ + compress/bzip2\ compress/flate\ compress/gzip\ + compress/lzw \ compress/zlib\ container/heap\ container/list\ @@ -40,6 +42,11 @@ DIRS=\ crypto/md4\ crypto/md5\ crypto/ocsp\ + crypto/openpgp\ + crypto/openpgp/armor\ + crypto/openpgp/error\ + crypto/openpgp/packet\ + crypto/openpgp/s2k\ crypto/rand\ crypto/rc4\ crypto/ripemd160\ @@ -89,7 +96,9 @@ DIRS=\ hash/crc64\ html\ http\ + http/cgi\ http/pprof\ + http/httptest\ image\ image/jpeg\ image/png\ @@ -109,6 +118,7 @@ DIRS=\ os/signal\ patch\ path\ + path/filepath\ rand\ reflect\ regexp\ @@ -124,6 +134,7 @@ DIRS=\ strconv\ strings\ sync\ + sync/atomic\ syscall\ syslog\ tabwriter\ @@ -156,6 +167,7 @@ endif NOTEST=\ crypto\ + crypto/openpgp/error\ debug/proc\ exp/draw/x11\ go/ast\ @@ -163,7 +175,7 @@ NOTEST=\ go/token\ hash\ http/pprof\ - image\ + http/httptest\ image/jpeg\ net/dict\ rand\ @@ -211,19 +223,19 @@ test.dirs: $(addsuffix .test, $(TEST)) bench.dirs: $(addsuffix .bench, $(BENCH)) %.clean: - +cd $* && $(MAKE) clean + +$(MAKE) -C $* clean %.install: - +cd $* && $(MAKE) install + +$(MAKE) -C $* install %.nuke: - +cd $* && $(MAKE) nuke + +$(MAKE) -C $* nuke %.test: - +cd $* && $(MAKE) test + +$(MAKE) -C $* test %.bench: - +cd $* && $(MAKE) bench + +$(MAKE) -C $* bench clean: clean.dirs diff --git a/src/pkg/compress/bzip2/Makefile b/src/pkg/compress/bzip2/Makefile new file mode 100644 index 000000000..a4bceef16 --- /dev/null +++ b/src/pkg/compress/bzip2/Makefile @@ -0,0 +1,14 @@ +# Copyright 2011 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.inc + +TARG=compress/bzip2 +GOFILES=\ + bit_reader.go\ + bzip2.go\ + huffman.go\ + move_to_front.go\ + +include ../../../Make.pkg diff --git a/src/pkg/compress/bzip2/bit_reader.go b/src/pkg/compress/bzip2/bit_reader.go new file mode 100644 index 000000000..50f0ec836 --- /dev/null +++ b/src/pkg/compress/bzip2/bit_reader.go @@ -0,0 +1,88 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bzip2 + +import ( + "bufio" + "io" + "os" +) + +// bitReader wraps an io.Reader and provides the ability to read values, +// bit-by-bit, from it. Its Read* methods don't return the usual os.Error +// because the error handling was verbose. Instead, any error is kept and can +// be checked afterwards. +type bitReader struct { + r byteReader + n uint64 + bits uint + err os.Error +} + +// bitReader needs to read bytes from an io.Reader. We attempt to cast the +// given io.Reader to this interface and, if it doesn't already fit, we wrap in +// a bufio.Reader. +type byteReader interface { + ReadByte() (byte, os.Error) +} + +func newBitReader(r io.Reader) bitReader { + byter, ok := r.(byteReader) + if !ok { + byter = bufio.NewReader(r) + } + return bitReader{r: byter} +} + +// ReadBits64 reads the given number of bits and returns them in the +// least-significant part of a uint64. In the event of an error, it returns 0 +// and the error can be obtained by calling Error(). +func (br *bitReader) ReadBits64(bits uint) (n uint64) { + for bits > br.bits { + b, err := br.r.ReadByte() + if err == os.EOF { + err = io.ErrUnexpectedEOF + } + if err != nil { + br.err = err + return 0 + } + br.n <<= 8 + br.n |= uint64(b) + br.bits += 8 + } + + // br.n looks like this (assuming that br.bits = 14 and bits = 6): + // Bit: 111111 + // 5432109876543210 + // + // (6 bits, the desired output) + // |-----| + // V V + // 0101101101001110 + // ^ ^ + // |------------| + // br.bits (num valid bits) + // + // This the next line right shifts the desired bits into the + // least-significant places and masks off anything above. + n = (br.n >> (br.bits - bits)) & ((1 << bits) - 1) + br.bits -= bits + return +} + +func (br *bitReader) ReadBits(bits uint) (n int) { + n64 := br.ReadBits64(bits) + return int(n64) +} + +func (br *bitReader) ReadBit() bool { + n := br.ReadBits(1) + return n != 0 +} + +func (br *bitReader) Error() os.Error { + return br.err +} diff --git a/src/pkg/compress/bzip2/bzip2.go b/src/pkg/compress/bzip2/bzip2.go new file mode 100644 index 000000000..9e97edec1 --- /dev/null +++ b/src/pkg/compress/bzip2/bzip2.go @@ -0,0 +1,390 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package bzip2 implements bzip2 decompression. +package bzip2 + +import ( + "io" + "os" +) + +// There's no RFC for bzip2. I used the Wikipedia page for reference and a lot +// of guessing: http://en.wikipedia.org/wiki/Bzip2 +// The source code to pyflate was useful for debugging: +// http://www.paul.sladen.org/projects/pyflate + +// A StructuralError is returned when the bzip2 data is found to be +// syntactically invalid. +type StructuralError string + +func (s StructuralError) String() string { + return "bzip2 data invalid: " + string(s) +} + +// A reader decompresses bzip2 compressed data. +type reader struct { + br bitReader + setupDone bool // true if we have parsed the bzip2 header. + blockSize int // blockSize in bytes, i.e. 900 * 1024. + eof bool + buf []byte // stores Burrows-Wheeler transformed data. + c [256]uint // the `C' array for the inverse BWT. + tt []uint32 // mirrors the `tt' array in the bzip2 source and contains the P array in the upper 24 bits. + tPos uint32 // Index of the next output byte in tt. + + preRLE []uint32 // contains the RLE data still to be processed. + preRLEUsed int // number of entries of preRLE used. + lastByte int // the last byte value seen. + byteRepeats uint // the number of repeats of lastByte seen. + repeats uint // the number of copies of lastByte to output. +} + +// NewReader returns an io.Reader which decompresses bzip2 data from r. +func NewReader(r io.Reader) io.Reader { + bz2 := new(reader) + bz2.br = newBitReader(r) + return bz2 +} + +const bzip2FileMagic = 0x425a // "BZ" +const bzip2BlockMagic = 0x314159265359 +const bzip2FinalMagic = 0x177245385090 + +// setup parses the bzip2 header. +func (bz2 *reader) setup() os.Error { + br := &bz2.br + + magic := br.ReadBits(16) + if magic != bzip2FileMagic { + return StructuralError("bad magic value") + } + + t := br.ReadBits(8) + if t != 'h' { + return StructuralError("non-Huffman entropy encoding") + } + + level := br.ReadBits(8) + if level < '1' || level > '9' { + return StructuralError("invalid compression level") + } + + bz2.blockSize = 100 * 1024 * (int(level) - '0') + bz2.tt = make([]uint32, bz2.blockSize) + return nil +} + +func (bz2 *reader) Read(buf []byte) (n int, err os.Error) { + if bz2.eof { + return 0, os.EOF + } + + if !bz2.setupDone { + err = bz2.setup() + brErr := bz2.br.Error() + if brErr != nil { + err = brErr + } + if err != nil { + return 0, err + } + bz2.setupDone = true + } + + n, err = bz2.read(buf) + brErr := bz2.br.Error() + if brErr != nil { + err = brErr + } + return +} + +func (bz2 *reader) read(buf []byte) (n int, err os.Error) { + // bzip2 is a block based compressor, except that it has a run-length + // preprocessing step. The block based nature means that we can + // preallocate fixed-size buffers and reuse them. However, the RLE + // preprocessing would require allocating huge buffers to store the + // maximum expansion. Thus we process blocks all at once, except for + // the RLE which we decompress as required. + + for (bz2.repeats > 0 || bz2.preRLEUsed < len(bz2.preRLE)) && n < len(buf) { + // We have RLE data pending. + + // The run-length encoding works like this: + // Any sequence of four equal bytes is followed by a length + // byte which contains the number of repeats of that byte to + // include. (The number of repeats can be zero.) Because we are + // decompressing on-demand our state is kept in the reader + // object. + + if bz2.repeats > 0 { + buf[n] = byte(bz2.lastByte) + n++ + bz2.repeats-- + if bz2.repeats == 0 { + bz2.lastByte = -1 + } + continue + } + + bz2.tPos = bz2.preRLE[bz2.tPos] + b := byte(bz2.tPos) + bz2.tPos >>= 8 + bz2.preRLEUsed++ + + if bz2.byteRepeats == 3 { + bz2.repeats = uint(b) + bz2.byteRepeats = 0 + continue + } + + if bz2.lastByte == int(b) { + bz2.byteRepeats++ + } else { + bz2.byteRepeats = 0 + } + bz2.lastByte = int(b) + + buf[n] = b + n++ + } + + if n > 0 { + return + } + + // No RLE data is pending so we need to read a block. + + br := &bz2.br + magic := br.ReadBits64(48) + if magic == bzip2FinalMagic { + br.ReadBits64(32) // ignored CRC + bz2.eof = true + return 0, os.EOF + } else if magic != bzip2BlockMagic { + return 0, StructuralError("bad magic value found") + } + + err = bz2.readBlock() + if err != nil { + return 0, err + } + + return bz2.read(buf) +} + +// readBlock reads a bzip2 block. The magic number should already have been consumed. +func (bz2 *reader) readBlock() (err os.Error) { + br := &bz2.br + br.ReadBits64(32) // skip checksum. TODO: check it if we can figure out what it is. + randomized := br.ReadBits(1) + if randomized != 0 { + return StructuralError("deprecated randomized files") + } + origPtr := uint(br.ReadBits(24)) + + // If not every byte value is used in the block (i.e., it's text) then + // the symbol set is reduced. The symbols used are stored as a + // two-level, 16x16 bitmap. + symbolRangeUsedBitmap := br.ReadBits(16) + symbolPresent := make([]bool, 256) + numSymbols := 0 + for symRange := uint(0); symRange < 16; symRange++ { + if symbolRangeUsedBitmap&(1<<(15-symRange)) != 0 { + bits := br.ReadBits(16) + for symbol := uint(0); symbol < 16; symbol++ { + if bits&(1<<(15-symbol)) != 0 { + symbolPresent[16*symRange+symbol] = true + numSymbols++ + } + } + } + } + + // A block uses between two and six different Huffman trees. + numHuffmanTrees := br.ReadBits(3) + if numHuffmanTrees < 2 || numHuffmanTrees > 6 { + return StructuralError("invalid number of Huffman trees") + } + + // The Huffman tree can switch every 50 symbols so there's a list of + // tree indexes telling us which tree to use for each 50 symbol block. + numSelectors := br.ReadBits(15) + treeIndexes := make([]uint8, numSelectors) + + // The tree indexes are move-to-front transformed and stored as unary + // numbers. + mtfTreeDecoder := newMTFDecoderWithRange(numHuffmanTrees) + for i := range treeIndexes { + c := 0 + for { + inc := br.ReadBits(1) + if inc == 0 { + break + } + c++ + } + if c >= numHuffmanTrees { + return StructuralError("tree index too large") + } + treeIndexes[i] = uint8(mtfTreeDecoder.Decode(c)) + } + + // The list of symbols for the move-to-front transform is taken from + // the previously decoded symbol bitmap. + symbols := make([]byte, numSymbols) + nextSymbol := 0 + for i := 0; i < 256; i++ { + if symbolPresent[i] { + symbols[nextSymbol] = byte(i) + nextSymbol++ + } + } + mtf := newMTFDecoder(symbols) + + numSymbols += 2 // to account for RUNA and RUNB symbols + huffmanTrees := make([]huffmanTree, numHuffmanTrees) + + // Now we decode the arrays of code-lengths for each tree. + lengths := make([]uint8, numSymbols) + for i := 0; i < numHuffmanTrees; i++ { + // The code lengths are delta encoded from a 5-bit base value. + length := br.ReadBits(5) + for j := 0; j < numSymbols; j++ { + for { + if !br.ReadBit() { + break + } + if br.ReadBit() { + length-- + } else { + length++ + } + } + if length < 0 || length > 20 { + return StructuralError("Huffman length out of range") + } + lengths[j] = uint8(length) + } + huffmanTrees[i], err = newHuffmanTree(lengths) + if err != nil { + return err + } + } + + selectorIndex := 1 // the next tree index to use + currentHuffmanTree := huffmanTrees[treeIndexes[0]] + bufIndex := 0 // indexes bz2.buf, the output buffer. + // The output of the move-to-front transform is run-length encoded and + // we merge the decoding into the Huffman parsing loop. These two + // variables accumulate the repeat count. See the Wikipedia page for + // details. + repeat := 0 + repeat_power := 0 + + // The `C' array (used by the inverse BWT) needs to be zero initialised. + for i := range bz2.c { + bz2.c[i] = 0 + } + + decoded := 0 // counts the number of symbols decoded by the current tree. + for { + if decoded == 50 { + currentHuffmanTree = huffmanTrees[treeIndexes[selectorIndex]] + selectorIndex++ + decoded = 0 + } + + v := currentHuffmanTree.Decode(br) + decoded++ + + if v < 2 { + // This is either the RUNA or RUNB symbol. + if repeat == 0 { + repeat_power = 1 + } + repeat += repeat_power << v + repeat_power <<= 1 + + // This limit of 2 million comes from the bzip2 source + // code. It prevents repeat from overflowing. + if repeat > 2*1024*1024 { + return StructuralError("repeat count too large") + } + continue + } + + if repeat > 0 { + // We have decoded a complete run-length so we need to + // replicate the last output symbol. + for i := 0; i < repeat; i++ { + b := byte(mtf.First()) + bz2.tt[bufIndex] = uint32(b) + bz2.c[b]++ + bufIndex++ + } + repeat = 0 + } + + if int(v) == numSymbols-1 { + // This is the EOF symbol. Because it's always at the + // end of the move-to-front list, and nevers gets moved + // to the front, it has this unique value. + break + } + + // Since two metasymbols (RUNA and RUNB) have values 0 and 1, + // one would expect |v-2| to be passed to the MTF decoder. + // However, the front of the MTF list is never referenced as 0, + // it's always referenced with a run-length of 1. Thus 0 + // doesn't need to be encoded and we have |v-1| in the next + // line. + b := byte(mtf.Decode(int(v - 1))) + bz2.tt[bufIndex] = uint32(b) + bz2.c[b]++ + bufIndex++ + } + + if origPtr >= uint(bufIndex) { + return StructuralError("origPtr out of bounds") + } + + // We have completed the entropy decoding. Now we can perform the + // inverse BWT and setup the RLE buffer. + bz2.preRLE = bz2.tt[:bufIndex] + bz2.preRLEUsed = 0 + bz2.tPos = inverseBWT(bz2.preRLE, origPtr, bz2.c[:]) + bz2.lastByte = -1 + bz2.byteRepeats = 0 + bz2.repeats = 0 + + return nil +} + +// inverseBWT implements the inverse Burrows-Wheeler transform as described in +// http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf, section 4.2. +// In that document, origPtr is called `I' and c is the `C' array after the +// first pass over the data. It's an argument here because we merge the first +// pass with the Huffman decoding. +// +// This also implements the `single array' method from the bzip2 source code +// which leaves the output, still shuffled, in the bottom 8 bits of tt with the +// index of the next byte in the top 24-bits. The index of the first byte is +// returned. +func inverseBWT(tt []uint32, origPtr uint, c []uint) uint32 { + sum := uint(0) + for i := 0; i < 256; i++ { + sum += c[i] + c[i] = sum - c[i] + } + + for i := range tt { + b := tt[i] & 0xff + tt[c[b]] |= uint32(i) << 8 + c[b]++ + } + + return tt[origPtr] >> 8 +} diff --git a/src/pkg/compress/bzip2/bzip2_test.go b/src/pkg/compress/bzip2/bzip2_test.go new file mode 100644 index 000000000..156eea83f --- /dev/null +++ b/src/pkg/compress/bzip2/bzip2_test.go @@ -0,0 +1,158 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bzip2 + +import ( + "bytes" + "encoding/hex" + "io" + "io/ioutil" + "os" + "testing" +) + +func TestBitReader(t *testing.T) { + buf := bytes.NewBuffer([]byte{0xaa}) + br := newBitReader(buf) + if n := br.ReadBits(1); n != 1 { + t.Errorf("read 1 wrong") + } + if n := br.ReadBits(1); n != 0 { + t.Errorf("read 2 wrong") + } + if n := br.ReadBits(1); n != 1 { + t.Errorf("read 3 wrong") + } + if n := br.ReadBits(1); n != 0 { + t.Errorf("read 4 wrong") + } +} + +func TestBitReaderLarge(t *testing.T) { + buf := bytes.NewBuffer([]byte{0x12, 0x34, 0x56, 0x78}) + br := newBitReader(buf) + if n := br.ReadBits(32); n != 0x12345678 { + t.Errorf("got: %x want: %x", n, 0x12345678) + } +} + +func readerFromHex(s string) io.Reader { + data, err := hex.DecodeString(s) + if err != nil { + panic("readerFromHex: bad input") + } + return bytes.NewBuffer(data) +} + +func decompressHex(s string) (out []byte, err os.Error) { + r := NewReader(readerFromHex(s)) + return ioutil.ReadAll(r) +} + +func TestHelloWorldBZ2(t *testing.T) { + out, err := decompressHex(helloWorldBZ2Hex) + if err != nil { + t.Errorf("error from Read: %s", err) + return + } + + if !bytes.Equal(helloWorld, out) { + t.Errorf("got %x, want %x", out, helloWorld) + } +} + +func testZeros(t *testing.T, inHex string, n int) { + out, err := decompressHex(inHex) + if err != nil { + t.Errorf("error from Read: %s", err) + return + } + + expected := make([]byte, n) + + if !bytes.Equal(expected, out) { + allZeros := true + for _, b := range out { + if b != 0 { + allZeros = false + break + } + } + t.Errorf("incorrect result, got %d bytes (allZeros: %t)", len(out), allZeros) + } +} + +func Test32Zeros(t *testing.T) { + testZeros(t, thirtyTwoZerosBZ2Hex, 32) +} + +func Test1MBZeros(t *testing.T) { + testZeros(t, oneMBZerosBZ2Hex, 1024*1024) +} + +func testRandomData(t *testing.T, compressedHex, uncompressedHex string) { + out, err := decompressHex(compressedHex) + if err != nil { + t.Errorf("error from Read: %s", err) + return + } + + expected, _ := hex.DecodeString(uncompressedHex) + + if !bytes.Equal(out, expected) { + t.Errorf("incorrect result\ngot: %x\nwant: %x", out, expected) + } +} + +func TestRandomData1(t *testing.T) { + testRandomData(t, randBZ2Hex, randHex) +} + +func TestRandomData2(t *testing.T) { + // This test involves several repeated bytes in the output, but they + // should trigger RLE decoding. + testRandomData(t, rand2BZ2Hex, rand2Hex) +} + +func TestRandomData3(t *testing.T) { + // This test uses the full range of symbols. + testRandomData(t, rand3BZ2Hex, rand3Hex) +} + +func Test1MBSawtooth(t *testing.T) { + out, err := decompressHex(oneMBSawtoothBZ2Hex) + if err != nil { + t.Errorf("error from Read: %s", err) + return + } + + expected := make([]byte, 1024*1024) + + for i := range expected { + expected[i] = byte(i) + } + + if !bytes.Equal(out, expected) { + t.Error("incorrect result") + } +} + +const helloWorldBZ2Hex = "425a68393141592653594eece83600000251800010400006449080200031064c4101a7a9a580bb9431f8bb9229c28482776741b0" + +var helloWorld = []byte("hello world\n") + +const thirtyTwoZerosBZ2Hex = "425a6839314159265359b5aa5098000000600040000004200021008283177245385090b5aa5098" +const oneMBZerosBZ2Hex = "425a683931415926535938571ce50008084000c0040008200030cc0529a60806c4201e2ee48a70a12070ae39ca" + +const randBZ2Hex = "425a6839314159265359905d990d0001957fffffffffffafffffffffffffffffbfff6fffdfffffffffffffffffffffffffffffc002b6dd75676ed5b77720098320d11a64626981323d4da47a83131a13d09e8040f534cd4f4d27a464d193008cd09804601347a980026350c9886234d36864193d1351b44c136919e90340d26127a4cd264c32023009898981310c0344c340027a8303427a99a04c00003534c230d034f5006468d268cf54d36a3009a69a62626261311b40026013d34201a6934c9a604c98ca6c8460989fa9346234d30d3469a2604fd4131a7aa6d0046043d4c62098479269e89e835190d018d4c046001a11e801a0264792321932308c43a130688c260d46686804cd01a9e80981193684c6a68c00000004c4c20c04627a4c0000260003400d04c0681a01334026009a6f48041466132581ec5212b081d96b0effc16543e2228b052fcd30f2567ee8d970e0f10aabca68dd8270591c376cfc1baae0dba00aaff2d6caf6b211322c997cc18eaee5927f75185336bf907021324c71626c1dd20e22b9b0977f05d0f901eaa51db9fbaf7c603b4c87bc82890e6dd7e61d0079e27ec050dd788fd958152061cd01e222f9547cb9efc465d775b6fc98bac7d387bffd151ae09dadf19494f7a638e2eae58e550faba5fe6820ea520eb986096de4e527d80def3ba625e71fbefdcf7e7844e0a25d29b52dcd1344fca083737d42692aab38d230485f3c8ed54c2ed31f15cf0270c8143765b10b92157233fa1dfe0d7ce8ffe70b8b8f7250071701dfe9f1c94de362c9031455951c93eb098a6b50ee45c6131fefc3b6f9643e21f4adc59497138e246f5c57d834aa67c4f10d8bd8b3908d8130dd7388409c299a268eab3664fa4907c5c31574874bd8d388a4ab22b339660804e53e1b8d05867d40e3082560608d35d5d2c6054e8bab23da28f61f83efd41d25529ad6ea15fb50505cacfabb0902166427354ca3830a2c8415f21b19e592690fbe447020d685a4bcd16ecc4ff1a1c0e572627d0ef6265c008a43fc243240541061ed7840606be466d1c0dac2c53250ed567507d926c844154560d631960c65e15157829b2c7f16859f111a3a8cb72bf24ffa57a680c3be67b1be67c8dd8aea73ac2437a78df5b686d427080ebc01bd30b71a49f6ea31dc0f08e4849e38face96717690239538bc08b6cc5aa8d467cb9c36aa83d40ac7e58bddbfa185b22065e89a86c0145569d9e23726651aec49e31588d70f40fe9a4449dcf4f89eac220171e9c938e803dc195679651004b79ad33cc0c13aeeba5941b33ffeeb8fbe16e76c7811445c67b4269c90479433ddf9e8ed1d00c166b6c17217fb22c3ef1b0c1c7e28e185446a111c37f1ea6c07a59fbcc6546ecc6968d36ba58bc5489a5640647e426b0c39350cb6f07d5dc7a717648c4ec7f841467597ae1f65f408fd2d9940a4b1b860b3c9ae351dcae0b4425f7e8538710f2e40b7f70d13b51ac05ccc6ecda8264a88cad2d721d18132a9b9110a9e759c2483c77dcefc7e464ec88588174cb0c9abff93230ea0bed8decdd8ed8bfe2b5df0a253803678df04fab44c03b9ab7cc97d6e6d6fd0c4c840ce0efc498436f453bbb181603459471f2b588724592b222ec990614db530e10cadd84705621cfdd9261fa44a5f5806a2d74b575056b3c915255c65678f9c16e6dc00a99180fef1a840aff0e842ac02731080cc92782538360a60a727991013984da4fad95f79d5030677b7528d076b2483685fca4429edf804682fdc110dfc2f7c30e23e20a72e039108a0ad6fdee2f76985a4b4be4f5afc6101bf9d5042b657a05dc914e1424241766434" +const randHex = "c95138082bdf2b9bfa5b1072b23f729735d42c785eeb94320fb14c265b9c2ca421d01a3db986df1ac2acde5a0e6bf955d6f95e61261540905928e195f1a66644cc7f37281744fff4dc6df35566a494c41a8167151950eb74f5fc45f85ad0e5ed28b49adfe218aa7ec1707e8e1d55825f61f72beda3b4c006b8c9188d7336a5d875329b1b58c27cc4e89ecbae02c7712400c39dd131d2c6de82e2863da51d472bdfb21ecce62cc9cf769ed28aedc7583d755da45a0d90874bda269dd53283a9bdfd05f95fc8e9a304bb338ea1a2111894678c18134f17d31a15d9bfc1237894650f3e715e2548639ecbddb845cfe4a46a7b3a3c540f48629488e8c869f1e9f3f4c552243a8105b20eb8e264994214349dae83b165fd6c2a5b8e83fce09fc0a80d3281c8d53a9a08095bd19cbc1388df23975646ed259e003d39261ee68cbece8bcf32971f7fe7e588e8ba8f5e8597909abaea693836a79a1964050ed910a45a0f13a58cd2d3ae18992c5b23082407fd920d0bf01e33118a017bb5e39f44931346845af52128f7965206759433a346034ea481671f501280067567619f5ecef6cded077f92ed7f3b3ce8e308c80f34ba06939e9303f91b4318c8c1dd4cc223c1f057ac0c91211c629cd30e46ee9ec1d9fd493086b7bc2bc83e33f08749a5d430b0ed4f79d70f481940c9b0930b16321886a0df4fa5a1465d5208c7d3494a7987d9a5e42aa256f0c9523947f8318d0ef0af3d59a45cfc2418d0785c9a548b32b81e7de18be7d55a69a4c156bbb3d7579c0ac8e9c72b24646e54b0d0e8725f8f49fb44ae3c6b9d0287be118586255a90a4a83483ed0328518037e52aa959c5748ed83e13023e532306be98b8288da306bbb040bcf5d92176f84a9306dc6b274b040370b61d71fde58dd6d20e6fee348eae0c54bd0a5a487b2d005f329794f2a902c296af0a4c1f638f63292a1fa18e006c1b1838636f4de71c73635b25660d32e88a0917e1a5677f6a02ca65585b82cbd99fb4badbfa97a585da1e6cadf6737b4ec6ca33f245d66ee6a9fae6785d69b003c17b9fc6ec34fe5824ab8caae5e8e14dc6f9e116e7bf4a60c04388783c8ae929e1b46b3ef3bbe81b38f2fa6da771bf39dfba2374d3d2ed356b8e2c42081d885a91a3afb2f31986d2f9873354c48cf5448492c32e62385af423aa4f83db6d1b2669650379a1134b0a04cbca0862d6f9743c791cbb527d36cd5d1f0fc7f503831c8bd1b7a0ef8ae1a5ed1155dfdd9e32b6bb33138112d3d476b802179cb85a2a6c354ccfed2f31604fbd8d6ec4baf9f1c8454f72c6588c06a7df3178c43a6970bfa02dd6f74cb5ec3b63f9eddaa17db5cbf27fac6de8e57c384afd0954179f7b5690c3bee42abc4fa79b4b12101a9cf5f0b9aecdda945def0bd04163237247d3539850e123fe18139f316fa0256d5bd2faa8" + +const oneMBSawtoothBZ2Hex = "425a683931415926535971931ea00006ddffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe007de00000000000000024c00130001300000000000000000000000000000000000000000000000000000000126000980009800000000000000000000000000000000000000000000000000000000930004c0004c000000000000000000000000000000000000000000000000000000004980026000260000000000000000000000000000000000000000000000000000000009aaaaa0000000000000000000000000000000000000000000000000000000000000000498002600026000000000000000000000000000000000000000000000000000000007fc42271980d044c0a822607411304a08982d044c1a82260f411308a08984d044c2a82261741130ca08986d044c3a82261f411310a08988d044c4a822627411314a0898ad044c5a82262f411318a0898cd044c6a82263741131ca0898ed044c7a82263f411320a08990d044c8a822647411324a08992d044c9a82264f411328a08994d044caa82265741132ca08996d044cba82265f411330a08998d044cca822667411334a0899ad044cda82266f411338a0899cd044cea82267741133ca0899ed044cfa82267f411340a089a0d044d0a822687411344a089a2d044d1a82268f411348a089a4d044d2a82269741134ca089a6d044d3a82269f411350a089a8d044d4a8226a7411354a089aad044d5a8226af411358a089acd044d6a8226b741135ca089aed044d7a8226bf411360a089b0d044d8a8226c7411364a089b2d044d9a8226cf411368a089b4d044daa8226d741136ca089b6d044dba8226df411370a089b8d044dca8226e7411374a089bad044dda8226ef411378a089bcd044dea8226f741137ca089bed044dfa8226ff411380a089c0d044e0a822707411384a089c2d044e1a82270f411388a089c4d044e2a82271741138ca089c59089c69089c71089c79089c81089c89089c91089c99089ca1089ca9089cb1089cb9089cc1089cc9089cd1089cd9089ce1089ce9089cf1089cf9089d01089d09089d11089d19089d21089d29089d31089d39089d41089d49089d51089d59089d61089d69089d71089d79089d81089d89089d91089d99089da1089da9089db1089db9089dc1089dc9089dd1089dd9089de1089de9089df1089df9089e01089e09089e11089e19089e21089e29089e31089e39089e41089e49089e51089e59089e61089e69089e71089e79089e81089e89089e91089e99089ea1089ea9089eb1089eb9089ec1089ec9089ed1089ed9089ee1089ee9089ef1089ef9089f01089f09089f11089f19089f21089f29089f31089f39089f41089f49089f51089f59089f61089f69089f71089f79089f81089f89089f91089f99089fa1089fa9089fb1089fb9089fc1089fc9089fd1089fd9089fe1089fe9089ff1089ff98a0ac9329acf23ba884804fdd3ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff0034f800000000000024c00130001300000000000000000000000000000000000000000000000000000000126000980009800000000000000000000000000000000000000000000000000000000930004c0004c000000000000000000000000000000000000000000000000000000004980026000260000000000000000000000000000000000000000000000000000000024c0013000130000000000000000000000000000000000000000000000000000000002955540000000000000000000000000000000000000000000000000000000000000001ff108c00846024230221181908c108460a4230621183908c20846124230a21185908c308461a4230e21187908c40846224231221189908c508462a423162118b908c60846324231a2118d908c708463a4231e2118f908c80846424232221191908c908464a4232621193908ca0846524232a21195908cb08465a4232e21197908cc0846624233221199908cd08466a423362119b908ce0846724233a2119d908cf08467a4233e2119f908d008468242342211a1908d108468a42346211a3908d20846924234a211a5908d308469a4234e211a7908d40846a242352211a9908d50846aa42356211ab908d60846b24235a211ad908d70846ba4235e211af908d80846c242362211b1908d90846ca42366211b3908da0846d24236a211b5908db0846da4236e211b7908dc0846e242372211b9908dd0846ea42376211bb908de0846f24237a211bd908df0846fa4237e211bf908e008470242382211c1908e108470a42386211c3908e20847124238a211c5908e2f8c211c6c8471d211c7c84721211c8c84725211c9c84729211cac8472d211cbc84731211ccc84735211cdc84739211cec8473d211cfc84741211d0c84745211d1c84749211d2c8474d211d3c84751211d4c84755211d5c84759211d6c8475d211d7c84761211d8c84765211d9c84769211dac8476d211dbc84771211dcc84775211ddc84779211dec8477d211dfc84781211e0c84785211e1c84789211e2c8478d211e3c84791211e4c84795211e5c84799211e6c8479d211e7c847a1211e8c847a5211e9c847a9211eac847ad211ebc847b1211ecc847b5211edc847b9211eec847bd211efc847c1211f0c847c5211f1c847c9211f2c847cd211f3c847d1211f4c847d5211f5c847d9211f6c847dd211f7c847e1211f8c847e5211f9c847e9211fac847ed211fbc847f1211fcc847f5211fdc847f9211fec847fd211ff8bb9229c284803a8b6248" + +const rand2BZ2Hex = "425a6839314159265359d992d0f60000137dfe84020310091c1e280e100e042801099210094806c0110002e70806402000546034000034000000f2830000032000d3403264049270eb7a9280d308ca06ad28f6981bee1bf8160727c7364510d73a1e123083421b63f031f63993a0f40051fbf177245385090d992d0f60" +const rand2Hex = "92d5652616ac444a4a04af1a8a3964aca0450d43d6cf233bd03233f4ba92f8719e6c2a2bd4f5f88db07ecd0da3a33b263483db9b2c158786ad6363be35d17335ba" + +const rand3BZ2Hex = "425a68393141592653593be669d00000327ffffffffffffffffffffffffffffffffffff7ffffffffffffffffffffffffffffffc002b3b2b1b6e2bae400004c00132300004c0d268c004c08c0130026001a008683234c0684c34008c230261a04c0260064d07a8d00034000d27a1268c9931a8d327a3427a41faa69ea0da264c1a34219326869b51b49a6469a3268c689fa53269a62794687a9a68f5189994c9e487a8f534fd49a3d34043629e8c93d04da4f4648d30d4f44d3234c4d3023d0840680984d309934c234d3131a000640984f536a6132601300130130c8d00d04d1841ea7a8d31a02609b40023460010c01a34d4c1a0d04d3069306810034d0d0d4c0046130d034d0131a9a64d321804c68003400098344c13000991808c0001a00000000098004d3d4da4604c47a13012140aadf8d673c922c607ef6212a8c0403adea4b28aee578900e653b9cdeb8d11e6b838815f3ebaad5a01c5408d84a332170aff8734d4e06612d3c2889f31925fb89e33561f5100ae89b1f7047102e729373d3667e58d73aaa80fa7be368a1cc2dadd81d81ec8e1b504bd772ca31d03649269b01ceddaca07bf3d4eba24de141be3f86f93601e03714c0f64654671684f9f9528626fd4e1b76753dc0c54b842486b8d59d8ab314e86ca818e7a1f079463cbbd70d9b79b283c7edc419406311022e4be98c2c1374df9cdde2d008ce1d00e5f06ad1024baf555631f70831fc1023034e62be7c4bcb648caf276963ffa20e96bb50377fe1c113da0db4625b50741c35a058edb009c6ee5dbf93b8a6b060eec568180e8db791b82aab96cbf4326ca98361461379425ba8dcc347be670bdba7641883e5526ae3d833f6e9cb9bac9557747c79e206151072f7f0071dff3880411846f66bf4075c7462f302b53cb3400a74cf35652ad5641ed33572fd54e7ed7f85f58a0acba89327e7c6be5c58cb71528b99df2431f1d0358f8d28d81d95292da631fb06701decabb205fac59ff0fb1df536afc681eece6ea658c4d9eaa45f1342aa1ff70bdaff2ddaf25ec88c22f12829a0553db1ec2505554cb17d7b282e213a5a2aa30431ded2bce665bb199d023840832fedb2c0c350a27291407ff77440792872137df281592e82076a05c64c345ffb058c64f7f7c207ef78420b7010520610f17e302cc4dfcfaef72a0ed091aab4b541eb0531bbe941ca2f792bf7b31ca6162882b68054a8470115bc2c19f2df2023f7800432b39b04d3a304e8085ba3f1f0ca5b1ba4d38d339e6084de979cdea6d0e244c6c9fa0366bd890621e3d30846f5e8497e21597b8f29bbf52c961a485dfbea647600da0fc1f25ce4d203a8352ece310c39073525044e7ac46acf2ed9120bae1b4f6f02364abfe343f80b290983160c103557af1c68416480d024cc31b6c06cfec011456f1e95c420a12b48b1c3fe220c2879a982fb099948ac440db844b9a112a5188c7783fd3b19593290785f908d95c9db4b280bafe89c1313aeec24772046d9bc089645f0d182a21184e143823c5f52de50e5d7e98d3d7ab56f5413bbccd1415c9bcff707def475b643fb7f29842582104d4cc1dbaaca8f10a2f44273c339e0984f2b1e06ab2f0771db01fafa8142298345f3196f23e5847bda024034b6f59b11c29e981c881456e40d211929fd4f766200258aad8212016322bd5c605790dcfdf1bd2a93d99c9b8f498722d311d7eae7ff420496a31804c55f4759a7b13aaaf5f7ce006c3a8a998897d5e0a504398c2b627852545baf440798bcc5cc049357cf3f17d9771e4528a1af3d77dc794a11346e1bdf5efe37a405b127b4c43b616d61fbc5dc914e14240ef99a7400" +const rand3Hex = "1744b384d68c042371244e13500d4bfb98c6244e3d71a5b700224420b59c593553f33bd786e3d0ce31626f511bc985f59d1a88aa38ba8ad6218d306abee60dd9172540232b95be1af146c69e72e5fde667a090dc3f93bdc5c5af0ab80acdbaa7a505f628c59dc0247b31a439cacf5010a94376d71521df08c178b02fb96fdb1809144ea38c68536187c53201fea8631fb0a880b4451ccdca7cc61f6aafca21cc7449d920599db61789ac3b1e164b3390124f95022aeea39ccca3ec1053f4fa10de2978e2861ea58e477085c2220021a0927aa94c5d0006b5055abba340e4f9eba22e969978dfd18e278a8b89d877328ae34268bc0174cfe211954c0036f078025217d1269fac1932a03b05a0b616012271bbe1fb554171c7a59b196d8a4479f45a77931b5d97aaf6c0c673cbe597b79b96e2a0c1eae2e66e46ccc8c85798e23ffe972ebdaa3f6caea243c004e60321eb47cd79137d78fd0613be606feacc5b3637bdc96a89c13746db8cad886f3ccf912b2178c823bcac395f06d28080269bdca2debf3419c66c690fd1adcfbd53e32e79443d7a42511a84cb22ca94fffad9149275a075b2f8ae0b021dcde9bf62b102db920733b897560518b06e1ad7f4b03458493ddaa7f4fa2c1609f7a1735aeeb1b3e2cea3ab45fc376323cc91873b7e9c90d07c192e38d3f5dfc9bfab1fd821c854da9e607ea596c391c7ec4161c6c4493929a8176badaa5a5af7211c623f29643a937677d3df0da9266181b7c4da5dd40376db677fe8f4a1dc456adf6f33c1e37cec471dd318c2647644fe52f93707a77da7d1702380a80e14cc0fdce7bf2eed48a529090bae0388ee277ce6c7018c5fb00b88362554362205c641f0d0fab94fd5b8357b5ff08b207fee023709bc126ec90cfb17c006754638f8186aaeb1265e80be0c1189ec07d01d5f6f96cb9ce82744147d18490de7dc72862f42f024a16968891a356f5e7e0e695d8c933ba5b5e43ad4c4ade5399bc2cae9bb6189b7870d7f22956194d277f28b10e01c10c6ffe3e065f7e2d6d056aa790db5649ca84dc64c35566c0af1b68c32b5b7874aaa66467afa44f40e9a0846a07ae75360a641dd2acc69d93219b2891f190621511e62a27f5e4fbe641ece1fa234fc7e9a74f48d2a760d82160d9540f649256b169d1fed6fbefdc491126530f3cbad7913e19fbd7aa53b1e243fbf28d5f38c10ebd77c8b986775975cc1d619efb27cdcd733fa1ca36cffe9c0a33cc9f02463c91a886601fd349efee85ef1462065ef9bd2c8f533220ad93138b8382d5938103ab25b2d9af8ae106e1211eb9b18793fba033900c809c02cd6d17e2f3e6fc84dae873411f8e87c3f0a8f1765b7825d185ce3730f299c3028d4a62da9ee95c2b870fb70c79370d485f9d5d9acb78926d20444033d960524d2776dc31988ec7c0dbf23b9905d" diff --git a/src/pkg/compress/bzip2/huffman.go b/src/pkg/compress/bzip2/huffman.go new file mode 100644 index 000000000..732bc4a21 --- /dev/null +++ b/src/pkg/compress/bzip2/huffman.go @@ -0,0 +1,223 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bzip2 + +import ( + "os" + "sort" +) + +// A huffmanTree is a binary tree which is navigated, bit-by-bit to reach a +// symbol. +type huffmanTree struct { + // nodes contains all the non-leaf nodes in the tree. nodes[0] is the + // root of the tree and nextNode contains the index of the next element + // of nodes to use when the tree is being constructed. + nodes []huffmanNode + nextNode int +} + +// A huffmanNode is a node in the tree. left and right contain indexes into the +// nodes slice of the tree. If left or right is invalidNodeValue then the child +// is a left node and its value is in leftValue/rightValue. +// +// The symbols are uint16s because bzip2 encodes not only MTF indexes in the +// tree, but also two magic values for run-length encoding and an EOF symbol. +// Thus there are more than 256 possible symbols. +type huffmanNode struct { + left, right uint16 + leftValue, rightValue uint16 +} + +// invalidNodeValue is an invalid index which marks a leaf node in the tree. +const invalidNodeValue = 0xffff + +// Decode reads bits from the given bitReader and navigates the tree until a +// symbol is found. +func (t huffmanTree) Decode(br *bitReader) (v uint16) { + nodeIndex := uint16(0) // node 0 is the root of the tree. + + for { + node := &t.nodes[nodeIndex] + bit := br.ReadBit() + // bzip2 encodes left as a true bit. + if bit { + // left + if node.left == invalidNodeValue { + return node.leftValue + } + nodeIndex = node.left + } else { + // right + if node.right == invalidNodeValue { + return node.rightValue + } + nodeIndex = node.right + } + } + + panic("unreachable") +} + +// newHuffmanTree builds a Huffman tree from a slice containing the code +// lengths of each symbol. The maximum code length is 32 bits. +func newHuffmanTree(lengths []uint8) (huffmanTree, os.Error) { + // There are many possible trees that assign the same code length to + // each symbol (consider reflecting a tree down the middle, for + // example). Since the code length assignments determine the + // efficiency of the tree, each of these trees is equally good. In + // order to minimise the amount of information needed to build a tree + // bzip2 uses a canonical tree so that it can be reconstructed given + // only the code length assignments. + + if len(lengths) < 2 { + panic("newHuffmanTree: too few symbols") + } + + var t huffmanTree + + // First we sort the code length assignments by ascending code length, + // using the symbol value to break ties. + pairs := huffmanSymbolLengthPairs(make([]huffmanSymbolLengthPair, len(lengths))) + for i, length := range lengths { + pairs[i].value = uint16(i) + pairs[i].length = length + } + + sort.Sort(pairs) + + // Now we assign codes to the symbols, starting with the longest code. + // We keep the codes packed into a uint32, at the most-significant end. + // So branches are taken from the MSB downwards. This makes it easy to + // sort them later. + code := uint32(0) + length := uint8(32) + + codes := huffmanCodes(make([]huffmanCode, len(lengths))) + for i := len(pairs) - 1; i >= 0; i-- { + if length > pairs[i].length { + // If the code length decreases we shift in order to + // zero any bits beyond the end of the code. + length >>= 32 - pairs[i].length + length <<= 32 - pairs[i].length + length = pairs[i].length + } + codes[i].code = code + codes[i].codeLen = length + codes[i].value = pairs[i].value + // We need to 'increment' the code, which means treating |code| + // like a |length| bit number. + code += 1 << (32 - length) + } + + // Now we can sort by the code so that the left half of each branch are + // grouped together, recursively. + sort.Sort(codes) + + t.nodes = make([]huffmanNode, len(codes)) + _, err := buildHuffmanNode(&t, codes, 0) + return t, err +} + +// huffmanSymbolLengthPair contains a symbol and its code length. +type huffmanSymbolLengthPair struct { + value uint16 + length uint8 +} + +// huffmanSymbolLengthPair is used to provide an interface for sorting. +type huffmanSymbolLengthPairs []huffmanSymbolLengthPair + +func (h huffmanSymbolLengthPairs) Len() int { + return len(h) +} + +func (h huffmanSymbolLengthPairs) Less(i, j int) bool { + if h[i].length < h[j].length { + return true + } + if h[i].length > h[j].length { + return false + } + if h[i].value < h[j].value { + return true + } + return false +} + +func (h huffmanSymbolLengthPairs) Swap(i, j int) { + h[i], h[j] = h[j], h[i] +} + +// huffmanCode contains a symbol, its code and code length. +type huffmanCode struct { + code uint32 + codeLen uint8 + value uint16 +} + +// huffmanCodes is used to provide an interface for sorting. +type huffmanCodes []huffmanCode + +func (n huffmanCodes) Len() int { + return len(n) +} + +func (n huffmanCodes) Less(i, j int) bool { + return n[i].code < n[j].code +} + +func (n huffmanCodes) Swap(i, j int) { + n[i], n[j] = n[j], n[i] +} + +// buildHuffmanNode takes a slice of sorted huffmanCodes and builds a node in +// the Huffman tree at the given level. It returns the index of the newly +// constructed node. +func buildHuffmanNode(t *huffmanTree, codes []huffmanCode, level uint32) (nodeIndex uint16, err os.Error) { + test := uint32(1) << (31 - level) + + // We have to search the list of codes to find the divide between the left and right sides. + firstRightIndex := len(codes) + for i, code := range codes { + if code.code&test != 0 { + firstRightIndex = i + break + } + } + + left := codes[:firstRightIndex] + right := codes[firstRightIndex:] + + if len(left) == 0 || len(right) == 0 { + return 0, StructuralError("superfluous level in Huffman tree") + } + + nodeIndex = uint16(t.nextNode) + node := &t.nodes[t.nextNode] + t.nextNode++ + + if len(left) == 1 { + // leaf node + node.left = invalidNodeValue + node.leftValue = left[0].value + } else { + node.left, err = buildHuffmanNode(t, left, level+1) + } + + if err != nil { + return + } + + if len(right) == 1 { + // leaf node + node.right = invalidNodeValue + node.rightValue = right[0].value + } else { + node.right, err = buildHuffmanNode(t, right, level+1) + } + + return +} diff --git a/src/pkg/compress/bzip2/move_to_front.go b/src/pkg/compress/bzip2/move_to_front.go new file mode 100644 index 000000000..0ed19dec3 --- /dev/null +++ b/src/pkg/compress/bzip2/move_to_front.go @@ -0,0 +1,105 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bzip2 + +// moveToFrontDecoder implements a move-to-front list. Such a list is an +// efficient way to transform a string with repeating elements into one with +// many small valued numbers, which is suitable for entropy encoding. It works +// by starting with an initial list of symbols and references symbols by their +// index into that list. When a symbol is referenced, it's moved to the front +// of the list. Thus, a repeated symbol ends up being encoded with many zeros, +// as the symbol will be at the front of the list after the first access. +type moveToFrontDecoder struct { + // Rather than actually keep the list in memory, the symbols are stored + // as a circular, double linked list with the symbol indexed by head + // at the front of the list. + symbols []byte + next []uint8 + prev []uint8 + head uint8 +} + +// newMTFDecoder creates a move-to-front decoder with an explicit initial list +// of symbols. +func newMTFDecoder(symbols []byte) *moveToFrontDecoder { + if len(symbols) > 256 { + panic("too many symbols") + } + + m := &moveToFrontDecoder{ + symbols: symbols, + next: make([]uint8, len(symbols)), + prev: make([]uint8, len(symbols)), + } + + m.threadLinkedList() + return m +} + +// newMTFDecoderWithRange creates a move-to-front decoder with an initial +// symbol list of 0...n-1. +func newMTFDecoderWithRange(n int) *moveToFrontDecoder { + if n > 256 { + panic("newMTFDecoderWithRange: cannot have > 256 symbols") + } + + m := &moveToFrontDecoder{ + symbols: make([]uint8, n), + next: make([]uint8, n), + prev: make([]uint8, n), + } + + for i := 0; i < n; i++ { + m.symbols[i] = byte(i) + } + + m.threadLinkedList() + return m +} + +// threadLinkedList creates the initial linked-list pointers. +func (m *moveToFrontDecoder) threadLinkedList() { + if len(m.symbols) == 0 { + return + } + + m.prev[0] = uint8(len(m.symbols) - 1) + + for i := 0; i < len(m.symbols)-1; i++ { + m.next[i] = uint8(i + 1) + m.prev[i+1] = uint8(i) + } + + m.next[len(m.symbols)-1] = 0 +} + +func (m *moveToFrontDecoder) Decode(n int) (b byte) { + // Most of the time, n will be zero so it's worth dealing with this + // simple case. + if n == 0 { + return m.symbols[m.head] + } + + i := m.head + for j := 0; j < n; j++ { + i = m.next[i] + } + b = m.symbols[i] + + m.next[m.prev[i]] = m.next[i] + m.prev[m.next[i]] = m.prev[i] + m.next[i] = m.head + m.prev[i] = m.prev[m.head] + m.next[m.prev[m.head]] = i + m.prev[m.head] = i + m.head = i + + return +} + +// First returns the symbol at the front of the list. +func (m *moveToFrontDecoder) First() byte { + return m.symbols[m.head] +} diff --git a/src/pkg/compress/flate/deflate_test.go b/src/pkg/compress/flate/deflate_test.go index 68dcd7bcc..ed5884a4b 100644 --- a/src/pkg/compress/flate/deflate_test.go +++ b/src/pkg/compress/flate/deflate_test.go @@ -191,9 +191,16 @@ func testSync(t *testing.T, level int, input []byte, name string) { t.Errorf("testSync/%d: read wrong bytes: %x vs %x", i, input[lo:hi], out[:hi-lo]) return } - if i == 0 && buf.buf.Len() != 0 { - t.Errorf("testSync/%d (%d, %d, %s): extra data after %d", i, level, len(input), name, hi-lo) - } + // This test originally checked that after reading + // the first half of the input, there was nothing left + // in the read buffer (buf.buf.Len() != 0) but that is + // not necessarily the case: the write Flush may emit + // some extra framing bits that are not necessary + // to process to obtain the first half of the uncompressed + // data. The test ran correctly most of the time, because + // the background goroutine had usually read even + // those extra bits by now, but it's not a useful thing to + // check. buf.WriteMode() } buf.ReadMode() @@ -262,135 +269,9 @@ func TestReverseBits(t *testing.T) { } func TestDeflateInflateString(t *testing.T) { - gold := bytes.NewBufferString(getEdata()).Bytes() + gold, err := ioutil.ReadFile("../testdata/e.txt") + if err != nil { + t.Error(err) + } testToFromWithLevel(t, 1, gold, "2.718281828...") } - -func getEdata() string { - return "2.718281828459045235360287471352662497757247093699959574966967627724076630353547" + - "59457138217852516642742746639193200305992181741359662904357290033429526059563073" + - "81323286279434907632338298807531952510190115738341879307021540891499348841675092" + - "44761460668082264800168477411853742345442437107539077744992069551702761838606261" + - "33138458300075204493382656029760673711320070932870912744374704723069697720931014" + - "16928368190255151086574637721112523897844250569536967707854499699679468644549059" + - "87931636889230098793127736178215424999229576351482208269895193668033182528869398" + - "49646510582093923982948879332036250944311730123819706841614039701983767932068328" + - "23764648042953118023287825098194558153017567173613320698112509961818815930416903" + - "51598888519345807273866738589422879228499892086805825749279610484198444363463244" + - "96848756023362482704197862320900216099023530436994184914631409343173814364054625" + - "31520961836908887070167683964243781405927145635490613031072085103837505101157477" + - "04171898610687396965521267154688957035035402123407849819334321068170121005627880" + - "23519303322474501585390473041995777709350366041699732972508868769664035557071622" + - "68447162560798826517871341951246652010305921236677194325278675398558944896970964" + - "09754591856956380236370162112047742722836489613422516445078182442352948636372141" + - "74023889344124796357437026375529444833799801612549227850925778256209262264832627" + - "79333865664816277251640191059004916449982893150566047258027786318641551956532442" + - "58698294695930801915298721172556347546396447910145904090586298496791287406870504" + - "89585867174798546677575732056812884592054133405392200011378630094556068816674001" + - "69842055804033637953764520304024322566135278369511778838638744396625322498506549" + - "95886234281899707733276171783928034946501434558897071942586398772754710962953741" + - "52111513683506275260232648472870392076431005958411661205452970302364725492966693" + - "81151373227536450988890313602057248176585118063036442812314965507047510254465011" + - "72721155519486685080036853228183152196003735625279449515828418829478761085263981" + - "39559900673764829224437528718462457803619298197139914756448826260390338144182326" + - "25150974827987779964373089970388867782271383605772978824125611907176639465070633" + - "04527954661855096666185664709711344474016070462621568071748187784437143698821855" + - "96709591025968620023537185887485696522000503117343920732113908032936344797273559" + - "55277349071783793421637012050054513263835440001863239914907054797780566978533580" + - "48966906295119432473099587655236812859041383241160722602998330535370876138939639" + - "17795745401613722361878936526053815584158718692553860616477983402543512843961294" + - "60352913325942794904337299085731580290958631382683291477116396337092400316894586" + - "36060645845925126994655724839186564209752685082307544254599376917041977780085362" + - "73094171016343490769642372229435236612557250881477922315197477806056967253801718" + - "07763603462459278778465850656050780844211529697521890874019660906651803516501792" + - "50461950136658543663271254963990854914420001457476081930221206602433009641270489" + - "43903971771951806990869986066365832322787093765022601492910115171776359446020232" + - "49300280401867723910288097866605651183260043688508817157238669842242201024950551" + - "88169480322100251542649463981287367765892768816359831247788652014117411091360116" + - "49950766290779436460058519419985601626479076153210387275571269925182756879893027" + - "61761146162549356495903798045838182323368612016243736569846703785853305275833337" + - "93990752166069238053369887956513728559388349989470741618155012539706464817194670" + - "83481972144888987906765037959036696724949925452790337296361626589760394985767413" + - "97359441023744329709355477982629614591442936451428617158587339746791897571211956" + - "18738578364475844842355558105002561149239151889309946342841393608038309166281881" + - "15037152849670597416256282360921680751501777253874025642534708790891372917228286" + - "11515915683725241630772254406337875931059826760944203261924285317018781772960235" + - "41306067213604600038966109364709514141718577701418060644363681546444005331608778" + - "31431744408119494229755993140118886833148328027065538330046932901157441475631399" + - "97221703804617092894579096271662260740718749975359212756084414737823303270330168" + - "23719364800217328573493594756433412994302485023573221459784328264142168487872167" + - "33670106150942434569844018733128101079451272237378861260581656680537143961278887" + - "32527373890392890506865324138062796025930387727697783792868409325365880733988457" + - "21874602100531148335132385004782716937621800490479559795929059165547050577751430" + - "81751126989851884087185640260353055837378324229241856256442550226721559802740126" + - "17971928047139600689163828665277009752767069777036439260224372841840883251848770" + - "47263844037953016690546593746161932384036389313136432713768884102681121989127522" + - "30562567562547017250863497653672886059667527408686274079128565769963137897530346" + - "60616669804218267724560530660773899624218340859882071864682623215080288286359746" + - "83965435885668550377313129658797581050121491620765676995065971534476347032085321" + - "56036748286083786568030730626576334697742956346437167093971930608769634953288468" + - "33613038829431040800296873869117066666146800015121143442256023874474325250769387" + - "07777519329994213727721125884360871583483562696166198057252661220679754062106208" + - "06498829184543953015299820925030054982570433905535701686531205264956148572492573" + - "86206917403695213533732531666345466588597286659451136441370331393672118569553952" + - "10845840724432383558606310680696492485123263269951460359603729725319836842336390" + - "46321367101161928217111502828016044880588023820319814930963695967358327420249882" + - "45684941273860566491352526706046234450549227581151709314921879592718001940968866" + - "98683703730220047531433818109270803001720593553052070070607223399946399057131158" + - "70996357773590271962850611465148375262095653467132900259943976631145459026858989" + - "79115837093419370441155121920117164880566945938131183843765620627846310490346293" + - "95002945834116482411496975832601180073169943739350696629571241027323913874175492" + - "30718624545432220395527352952402459038057445028922468862853365422138157221311632" + - "88112052146489805180092024719391710555390113943316681515828843687606961102505171" + - "00739276238555338627255353883096067164466237092264680967125406186950214317621166" + - "81400975952814939072226011126811531083873176173232352636058381731510345957365382" + - "23534992935822836851007810884634349983518404451704270189381994243410090575376257" + - "76757111809008816418331920196262341628816652137471732547772778348877436651882875" + - "21566857195063719365653903894493664217640031215278702223664636357555035655769488" + - "86549500270853923617105502131147413744106134445544192101336172996285694899193369" + - "18472947858072915608851039678195942983318648075608367955149663644896559294818785" + - "17840387733262470519450504198477420141839477312028158868457072905440575106012852" + - "58056594703046836344592652552137008068752009593453607316226118728173928074623094" + - "68536782310609792159936001994623799343421068781349734695924646975250624695861690" + - "91785739765951993929939955675427146549104568607020990126068187049841780791739240" + - "71945996323060254707901774527513186809982284730860766536866855516467702911336827" + - "56310722334672611370549079536583453863719623585631261838715677411873852772292259" + - "47433737856955384562468010139057278710165129666367644518724656537304024436841408" + - "14488732957847348490003019477888020460324660842875351848364959195082888323206522" + - "12810419044804724794929134228495197002260131043006241071797150279343326340799596" + - "05314460532304885289729176598760166678119379323724538572096075822771784833616135" + - "82612896226118129455927462767137794487586753657544861407611931125958512655759734" + - "57301533364263076798544338576171533346232527057200530398828949903425956623297578" + - "24887350292591668258944568946559926584547626945287805165017206747854178879822768" + - "06536650641910973434528878338621726156269582654478205672987756426325321594294418" + - "03994321700009054265076309558846589517170914760743713689331946909098190450129030" + - "70995662266203031826493657336984195557769637876249188528656866076005660256054457" + - "11337286840205574416030837052312242587223438854123179481388550075689381124935386" + - "31863528708379984569261998179452336408742959118074745341955142035172618420084550" + - "91708456823682008977394558426792142734775608796442792027083121501564063413416171" + - "66448069815483764491573900121217041547872591998943825364950514771379399147205219" + - "52907939613762110723849429061635760459623125350606853765142311534966568371511660" + - "42207963944666211632551577290709784731562782775987881364919512574833287937715714" + - "59091064841642678309949723674420175862269402159407924480541255360431317992696739" + - "15754241929660731239376354213923061787675395871143610408940996608947141834069836" + - "29936753626215452472984642137528910798843813060955526227208375186298370667872244" + - "30195793793786072107254277289071732854874374355781966511716618330881129120245204" + - "04868220007234403502544820283425418788465360259150644527165770004452109773558589" + - "76226554849416217149895323834216001140629507184904277892585527430352213968356790" + - "18076406042138307308774460170842688272261177180842664333651780002171903449234264" + - "26629226145600433738386833555534345300426481847398921562708609565062934040526494" + - "32442614456659212912256488935696550091543064261342526684725949143142393988454324" + - "86327461842846655985332312210466259890141712103446084271616619001257195870793217" + - "56969854401339762209674945418540711844643394699016269835160784892451405894094639" + - "52678073545797003070511636825194877011897640028276484141605872061841852971891540" + - "19688253289309149665345753571427318482016384644832499037886069008072709327673127" + - "58196656394114896171683298045513972950668760474091542042842999354102582911350224" + - "16907694316685742425225090269390348148564513030699251995904363840284292674125734" + - "22447765584177886171737265462085498294498946787350929581652632072258992368768457" + - "01782303809656788311228930580914057261086588484587310165815116753332767488701482" + - "91674197015125597825727074064318086014281490241467804723275976842696339357735429" + - "30186739439716388611764209004068663398856841681003872389214483176070116684503887" + - "21236436704331409115573328018297798873659091665961240202177855885487617616198937" + - "07943800566633648843650891448055710397652146960276625835990519870423001794655367" + - "9" -} diff --git a/src/pkg/compress/lzw/Makefile b/src/pkg/compress/lzw/Makefile new file mode 100644 index 000000000..28f5e6abc --- /dev/null +++ b/src/pkg/compress/lzw/Makefile @@ -0,0 +1,12 @@ +# Copyright 2011 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.inc + +TARG=compress/lzw +GOFILES=\ + reader.go\ + writer.go\ + +include ../../../Make.pkg diff --git a/src/pkg/compress/lzw/reader.go b/src/pkg/compress/lzw/reader.go new file mode 100644 index 000000000..8a540cbe6 --- /dev/null +++ b/src/pkg/compress/lzw/reader.go @@ -0,0 +1,210 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// The lzw package implements the Lempel-Ziv-Welch compressed data format, +// described in T. A. Welch, ``A Technique for High-Performance Data +// Compression'', Computer, 17(6) (June 1984), pp 8-19. +// +// In particular, it implements LZW as used by the GIF, TIFF and PDF file +// formats, which means variable-width codes up to 12 bits and the first +// two non-literal codes are a clear code and an EOF code. +package lzw + +// TODO(nigeltao): check that TIFF and PDF use LZW in the same way as GIF, +// modulo LSB/MSB packing order. + +import ( + "bufio" + "fmt" + "io" + "os" +) + +// Order specifies the bit ordering in an LZW data stream. +type Order int + +const ( + // LSB means Least Significant Bits first, as used in the GIF file format. + LSB Order = iota + // MSB means Most Significant Bits first, as used in the TIFF and PDF + // file formats. + MSB +) + +// decoder is the state from which the readXxx method converts a byte +// stream into a code stream. +type decoder struct { + r io.ByteReader + bits uint32 + nBits uint + width uint +} + +// readLSB returns the next code for "Least Significant Bits first" data. +func (d *decoder) readLSB() (uint16, os.Error) { + for d.nBits < d.width { + x, err := d.r.ReadByte() + if err != nil { + return 0, err + } + d.bits |= uint32(x) << d.nBits + d.nBits += 8 + } + code := uint16(d.bits & (1<>= d.width + d.nBits -= d.width + return code, nil +} + +// readMSB returns the next code for "Most Significant Bits first" data. +func (d *decoder) readMSB() (uint16, os.Error) { + for d.nBits < d.width { + x, err := d.r.ReadByte() + if err != nil { + return 0, err + } + d.bits |= uint32(x) << (24 - d.nBits) + d.nBits += 8 + } + code := uint16(d.bits >> (32 - d.width)) + d.bits <<= d.width + d.nBits -= d.width + return code, nil +} + +// decode decompresses bytes from r and writes them to pw. +// read specifies how to decode bytes into codes. +// litWidth is the width in bits of literal codes. +func decode(r io.Reader, read func(*decoder) (uint16, os.Error), litWidth int, pw *io.PipeWriter) { + br, ok := r.(io.ByteReader) + if !ok { + br = bufio.NewReader(r) + } + pw.CloseWithError(decode1(pw, br, read, uint(litWidth))) +} + +func decode1(pw *io.PipeWriter, r io.ByteReader, read func(*decoder) (uint16, os.Error), litWidth uint) os.Error { + const ( + maxWidth = 12 + invalidCode = 0xffff + ) + d := decoder{r, 0, 0, 1 + litWidth} + w := bufio.NewWriter(pw) + // The first 1<= clear { + c = prefix[c] + } + buf[i] = uint8(c) + i-- + c = last + } + // Copy the suffix chain into buf and then write that to w. + for c >= clear { + buf[i] = suffix[c] + i-- + c = prefix[c] + } + buf[i] = uint8(c) + if _, err := w.Write(buf[i:]); err != nil { + return err + } + // Save what the hi code expands to. + suffix[hi] = uint8(c) + prefix[hi] = last + default: + return os.NewError("lzw: invalid code") + } + last, hi = code, hi+1 + if hi == overflow { + if d.width == maxWidth { + return os.NewError("lzw: missing clear code") + } + d.width++ + overflow <<= 1 + } + } + panic("unreachable") +} + +// NewReader creates a new io.ReadCloser that satisfies reads by decompressing +// the data read from r. +// It is the caller's responsibility to call Close on the ReadCloser when +// finished reading. +// The number of bits to use for literal codes, litWidth, must be in the +// range [2,8] and is typically 8. +func NewReader(r io.Reader, order Order, litWidth int) io.ReadCloser { + pr, pw := io.Pipe() + var read func(*decoder) (uint16, os.Error) + switch order { + case LSB: + read = (*decoder).readLSB + case MSB: + read = (*decoder).readMSB + default: + pw.CloseWithError(os.NewError("lzw: unknown order")) + return pr + } + if litWidth < 2 || 8 < litWidth { + pw.CloseWithError(fmt.Errorf("lzw: litWidth %d out of range", litWidth)) + return pr + } + go decode(r, read, litWidth, pw) + return pr +} diff --git a/src/pkg/compress/lzw/reader_test.go b/src/pkg/compress/lzw/reader_test.go new file mode 100644 index 000000000..7795a4c14 --- /dev/null +++ b/src/pkg/compress/lzw/reader_test.go @@ -0,0 +1,132 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzw + +import ( + "bytes" + "io" + "io/ioutil" + "os" + "strconv" + "strings" + "testing" +) + +type lzwTest struct { + desc string + raw string + compressed string + err os.Error +} + +var lzwTests = []lzwTest{ + { + "empty;LSB;8", + "", + "\x01\x01", + nil, + }, + { + "empty;MSB;8", + "", + "\x80\x80", + nil, + }, + { + "tobe;LSB;7", + "TOBEORNOTTOBEORTOBEORNOT", + "\x54\x4f\x42\x45\x4f\x52\x4e\x4f\x54\x82\x84\x86\x8b\x85\x87\x89\x81", + nil, + }, + { + "tobe;LSB;8", + "TOBEORNOTTOBEORTOBEORNOT", + "\x54\x9e\x08\x29\xf2\x44\x8a\x93\x27\x54\x04\x12\x34\xb8\xb0\xe0\xc1\x84\x01\x01", + nil, + }, + { + "tobe;MSB;7", + "TOBEORNOTTOBEORTOBEORNOT", + "\x54\x4f\x42\x45\x4f\x52\x4e\x4f\x54\x82\x84\x86\x8b\x85\x87\x89\x81", + nil, + }, + { + "tobe;MSB;8", + "TOBEORNOTTOBEORTOBEORNOT", + "\x2a\x13\xc8\x44\x52\x79\x48\x9c\x4f\x2a\x40\xa0\x90\x68\x5c\x16\x0f\x09\x80\x80", + nil, + }, + { + "tobe-truncated;LSB;8", + "TOBEORNOTTOBEORTOBEORNOT", + "\x54\x9e\x08\x29\xf2\x44\x8a\x93\x27\x54\x04", + io.ErrUnexpectedEOF, + }, + // This example comes from http://en.wikipedia.org/wiki/Graphics_Interchange_Format. + { + "gif;LSB;8", + "\x28\xff\xff\xff\x28\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff", + "\x00\x51\xfc\x1b\x28\x70\xa0\xc1\x83\x01\x01", + nil, + }, + // This example comes from http://compgroups.net/comp.lang.ruby/Decompressing-LZW-compression-from-PDF-file + { + "pdf;MSB;8", + "-----A---B", + "\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01", + nil, + }, +} + +func TestReader(t *testing.T) { + b := bytes.NewBuffer(nil) + for _, tt := range lzwTests { + d := strings.Split(tt.desc, ";", -1) + var order Order + switch d[1] { + case "LSB": + order = LSB + case "MSB": + order = MSB + default: + t.Errorf("%s: bad order %q", tt.desc, d[1]) + } + litWidth, _ := strconv.Atoi(d[2]) + rc := NewReader(strings.NewReader(tt.compressed), order, litWidth) + defer rc.Close() + b.Reset() + n, err := io.Copy(b, rc) + if err != nil { + if err != tt.err { + t.Errorf("%s: io.Copy: %v want %v", tt.desc, err, tt.err) + } + continue + } + s := b.String() + if s != tt.raw { + t.Errorf("%s: got %d-byte %q want %d-byte %q", tt.desc, n, s, len(tt.raw), tt.raw) + } + } +} + +type devNull struct{} + +func (devNull) Write(p []byte) (int, os.Error) { + return len(p), nil +} + +func BenchmarkDecoder(b *testing.B) { + b.StopTimer() + buf0, _ := ioutil.ReadFile("../testdata/e.txt") + compressed := bytes.NewBuffer(nil) + w := NewWriter(compressed, LSB, 8) + io.Copy(w, bytes.NewBuffer(buf0)) + w.Close() + buf1 := compressed.Bytes() + b.StartTimer() + for i := 0; i < b.N; i++ { + io.Copy(devNull{}, NewReader(bytes.NewBuffer(buf1), LSB, 8)) + } +} diff --git a/src/pkg/compress/lzw/writer.go b/src/pkg/compress/lzw/writer.go new file mode 100644 index 000000000..87143b7aa --- /dev/null +++ b/src/pkg/compress/lzw/writer.go @@ -0,0 +1,259 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzw + +import ( + "bufio" + "fmt" + "io" + "os" +) + +// A writer is a buffered, flushable writer. +type writer interface { + WriteByte(byte) os.Error + Flush() os.Error +} + +// An errWriteCloser is an io.WriteCloser that always returns a given error. +type errWriteCloser struct { + err os.Error +} + +func (e *errWriteCloser) Write([]byte) (int, os.Error) { + return 0, e.err +} + +func (e *errWriteCloser) Close() os.Error { + return e.err +} + +const ( + // A code is a 12 bit value, stored as a uint32 when encoding to avoid + // type conversions when shifting bits. + maxCode = 1<<12 - 1 + invalidCode = 1<<32 - 1 + // There are 1<<12 possible codes, which is an upper bound on the number of + // valid hash table entries at any given point in time. tableSize is 4x that. + tableSize = 4 * 1 << 12 + tableMask = tableSize - 1 + // A hash table entry is a uint32. Zero is an invalid entry since the + // lower 12 bits of a valid entry must be a non-literal code. + invalidEntry = 0 +) + +// encoder is LZW compressor. +type encoder struct { + // w is the writer that compressed bytes are written to. + w writer + // write, bits, nBits and width are the state for converting a code stream + // into a byte stream. + write func(*encoder, uint32) os.Error + bits uint32 + nBits uint + width uint + // litWidth is the width in bits of literal codes. + litWidth uint + // hi is the code implied by the next code emission. + // overflow is the code at which hi overflows the code width. + hi, overflow uint32 + // savedCode is the accumulated code at the end of the most recent Write + // call. It is equal to invalidCode if there was no such call. + savedCode uint32 + // err is the first error encountered during writing. Closing the encoder + // will make any future Write calls return os.EINVAL. + err os.Error + // table is the hash table from 20-bit keys to 12-bit values. Each table + // entry contains key<<12|val and collisions resolve by linear probing. + // The keys consist of a 12-bit code prefix and an 8-bit byte suffix. + // The values are a 12-bit code. + table [tableSize]uint32 +} + +// writeLSB writes the code c for "Least Significant Bits first" data. +func (e *encoder) writeLSB(c uint32) os.Error { + e.bits |= c << e.nBits + e.nBits += e.width + for e.nBits >= 8 { + if err := e.w.WriteByte(uint8(e.bits)); err != nil { + return err + } + e.bits >>= 8 + e.nBits -= 8 + } + return nil +} + +// writeMSB writes the code c for "Most Significant Bits first" data. +func (e *encoder) writeMSB(c uint32) os.Error { + e.bits |= c << (32 - e.width - e.nBits) + e.nBits += e.width + for e.nBits >= 8 { + if err := e.w.WriteByte(uint8(e.bits >> 24)); err != nil { + return err + } + e.bits <<= 8 + e.nBits -= 8 + } + return nil +} + +// errOutOfCodes is an internal error that means that the encoder has run out +// of unused codes and a clear code needs to be sent next. +var errOutOfCodes = os.NewError("lzw: out of codes") + +// incHi increments e.hi and checks for both overflow and running out of +// unused codes. In the latter case, incHi sends a clear code, resets the +// encoder state and returns errOutOfCodes. +func (e *encoder) incHi() os.Error { + e.hi++ + if e.hi == e.overflow { + e.width++ + e.overflow <<= 1 + } + if e.hi == maxCode { + clear := uint32(1) << e.litWidth + if err := e.write(e, clear); err != nil { + return err + } + e.width = uint(e.litWidth) + 1 + e.hi = clear + 1 + e.overflow = clear << 1 + for i := range e.table { + e.table[i] = invalidEntry + } + return errOutOfCodes + } + return nil +} + +// Write writes a compressed representation of p to e's underlying writer. +func (e *encoder) Write(p []byte) (int, os.Error) { + if e.err != nil { + return 0, e.err + } + if len(p) == 0 { + return 0, nil + } + litMask := uint32(1<>12 ^ key) & tableMask + for h, t := hash, e.table[hash]; t != invalidEntry; { + if key == t>>12 { + code = t & maxCode + continue loop + } + h = (h + 1) & tableMask + t = e.table[h] + } + // Otherwise, write the current code, and literal becomes the start of + // the next emitted code. + if e.err = e.write(e, code); e.err != nil { + return 0, e.err + } + code = literal + // Increment e.hi, the next implied code. If we run out of codes, reset + // the encoder state (including clearing the hash table) and continue. + if err := e.incHi(); err != nil { + if err == errOutOfCodes { + continue + } + e.err = err + return 0, e.err + } + // Otherwise, insert key -> e.hi into the map that e.table represents. + for { + if e.table[hash] == invalidEntry { + e.table[hash] = (key << 12) | e.hi + break + } + hash = (hash + 1) & tableMask + } + } + e.savedCode = code + return len(p), nil +} + +// Close closes the encoder, flushing any pending output. It does not close or +// flush e's underlying writer. +func (e *encoder) Close() os.Error { + if e.err != nil { + if e.err == os.EINVAL { + return nil + } + return e.err + } + // Make any future calls to Write return os.EINVAL. + e.err = os.EINVAL + // Write the savedCode if valid. + if e.savedCode != invalidCode { + if err := e.write(e, e.savedCode); err != nil { + return err + } + if err := e.incHi(); err != nil && err != errOutOfCodes { + return err + } + } + // Write the eof code. + eof := uint32(1)< 0 { + if e.write == (*encoder).writeMSB { + e.bits >>= 24 + } + if err := e.w.WriteByte(uint8(e.bits)); err != nil { + return err + } + } + return e.w.Flush() +} + +// NewWriter creates a new io.WriteCloser that satisfies writes by compressing +// the data and writing it to w. +// It is the caller's responsibility to call Close on the WriteCloser when +// finished writing. +// The number of bits to use for literal codes, litWidth, must be in the +// range [2,8] and is typically 8. +func NewWriter(w io.Writer, order Order, litWidth int) io.WriteCloser { + var write func(*encoder, uint32) os.Error + switch order { + case LSB: + write = (*encoder).writeLSB + case MSB: + write = (*encoder).writeMSB + default: + return &errWriteCloser{os.NewError("lzw: unknown order")} + } + if litWidth < 2 || 8 < litWidth { + return &errWriteCloser{fmt.Errorf("lzw: litWidth %d out of range", litWidth)} + } + bw, ok := w.(writer) + if !ok { + bw = bufio.NewWriter(w) + } + lw := uint(litWidth) + return &encoder{ + w: bw, + write: write, + width: 1 + lw, + litWidth: lw, + hi: 1<" + UserId *packet.UserId + SelfSignature *packet.Signature + Signatures []*packet.Signature +} + +// A Subkey is an additional public key in an Entity. Subkeys can be used for +// encryption. +type Subkey struct { + PublicKey *packet.PublicKey + PrivateKey *packet.PrivateKey + Sig *packet.Signature +} + +// A Key identifies a specific public key in an Entity. This is either the +// Entity's primary key or a subkey. +type Key struct { + Entity *Entity + PublicKey *packet.PublicKey + PrivateKey *packet.PrivateKey + SelfSignature *packet.Signature +} + +// A KeyRing provides access to public and private keys. +type KeyRing interface { + // KeysById returns the set of keys that have the given key id. + KeysById(id uint64) []Key + // DecryptionKeys returns all private keys that are valid for + // decryption. + DecryptionKeys() []Key +} + +// An EntityList contains one or more Entities. +type EntityList []*Entity + +// KeysById returns the set of keys that have the given key id. +func (el EntityList) KeysById(id uint64) (keys []Key) { + for _, e := range el { + if e.PrimaryKey.KeyId == id { + var selfSig *packet.Signature + for _, ident := range e.Identities { + if selfSig == nil { + selfSig = ident.SelfSignature + } else if ident.SelfSignature.IsPrimaryId != nil && *ident.SelfSignature.IsPrimaryId { + selfSig = ident.SelfSignature + break + } + } + keys = append(keys, Key{e, e.PrimaryKey, e.PrivateKey, selfSig}) + } + + for _, subKey := range e.Subkeys { + if subKey.PublicKey.KeyId == id { + keys = append(keys, Key{e, subKey.PublicKey, subKey.PrivateKey, subKey.Sig}) + } + } + } + return +} + +// DecryptionKeys returns all private keys that are valid for decryption. +func (el EntityList) DecryptionKeys() (keys []Key) { + for _, e := range el { + for _, subKey := range e.Subkeys { + if subKey.PrivateKey != nil && (!subKey.Sig.FlagsValid || subKey.Sig.FlagEncryptStorage || subKey.Sig.FlagEncryptCommunications) { + keys = append(keys, Key{e, subKey.PublicKey, subKey.PrivateKey, subKey.Sig}) + } + } + } + return +} + +// ReadArmoredKeyRing reads one or more public/private keys from an armor keyring file. +func ReadArmoredKeyRing(r io.Reader) (EntityList, os.Error) { + body, err := readArmored(r, PublicKeyType) + if err != nil { + return nil, err + } + + return ReadKeyRing(body) +} + +// ReadKeyRing reads one or more public/private keys, ignoring unsupported keys. +func ReadKeyRing(r io.Reader) (el EntityList, err os.Error) { + packets := packet.NewReader(r) + + for { + var e *Entity + e, err = readEntity(packets) + if err != nil { + if _, ok := err.(error.UnsupportedError); ok { + err = readToNextPublicKey(packets) + } + if err == os.EOF { + err = nil + return + } + if err != nil { + el = nil + return + } + } else { + el = append(el, e) + } + } + return +} + +// readToNextPublicKey reads packets until the start of the entity and leaves +// the first packet of the new entity in the Reader. +func readToNextPublicKey(packets *packet.Reader) (err os.Error) { + var p packet.Packet + for { + p, err = packets.Next() + if err == os.EOF { + return + } else if err != nil { + if _, ok := err.(error.UnsupportedError); ok { + err = nil + continue + } + return + } + + if pk, ok := p.(*packet.PublicKey); ok && !pk.IsSubkey { + packets.Unread(p) + return + } + } + + panic("unreachable") +} + +// readEntity reads an entity (public key, identities, subkeys etc) from the +// given Reader. +func readEntity(packets *packet.Reader) (*Entity, os.Error) { + e := new(Entity) + e.Identities = make(map[string]*Identity) + + p, err := packets.Next() + if err != nil { + return nil, err + } + + var ok bool + if e.PrimaryKey, ok = p.(*packet.PublicKey); !ok { + if e.PrivateKey, ok = p.(*packet.PrivateKey); !ok { + packets.Unread(p) + return nil, error.StructuralError("first packet was not a public/private key") + } else { + e.PrimaryKey = &e.PrivateKey.PublicKey + } + } + + var current *Identity +EachPacket: + for { + p, err := packets.Next() + if err == os.EOF { + break + } else if err != nil { + return nil, err + } + + switch pkt := p.(type) { + case *packet.UserId: + current = new(Identity) + current.Name = pkt.Id + current.UserId = pkt + e.Identities[pkt.Id] = current + p, err = packets.Next() + if err == os.EOF { + err = io.ErrUnexpectedEOF + } + if err != nil { + if _, ok := err.(error.UnsupportedError); ok { + return nil, err + } + return nil, error.StructuralError("identity self-signature invalid: " + err.String()) + } + current.SelfSignature, ok = p.(*packet.Signature) + if !ok { + return nil, error.StructuralError("user ID packet not followed by self signature") + } + if current.SelfSignature.SigType != packet.SigTypePositiveCert { + return nil, error.StructuralError("user ID self-signature with wrong type") + } + if err = e.PrimaryKey.VerifyUserIdSignature(pkt.Id, current.SelfSignature); err != nil { + return nil, error.StructuralError("user ID self-signature invalid: " + err.String()) + } + case *packet.Signature: + if current == nil { + return nil, error.StructuralError("signature packet found before user id packet") + } + current.Signatures = append(current.Signatures, pkt) + case *packet.PrivateKey: + if pkt.IsSubkey == false { + packets.Unread(p) + break EachPacket + } + err = addSubkey(e, packets, &pkt.PublicKey, pkt) + if err != nil { + return nil, err + } + case *packet.PublicKey: + if pkt.IsSubkey == false { + packets.Unread(p) + break EachPacket + } + err = addSubkey(e, packets, pkt, nil) + if err != nil { + return nil, err + } + default: + // we ignore unknown packets + } + } + + if len(e.Identities) == 0 { + return nil, error.StructuralError("entity without any identities") + } + + return e, nil +} + +func addSubkey(e *Entity, packets *packet.Reader, pub *packet.PublicKey, priv *packet.PrivateKey) os.Error { + var subKey Subkey + subKey.PublicKey = pub + subKey.PrivateKey = priv + p, err := packets.Next() + if err == os.EOF { + return io.ErrUnexpectedEOF + } + if err != nil { + return error.StructuralError("subkey signature invalid: " + err.String()) + } + var ok bool + subKey.Sig, ok = p.(*packet.Signature) + if !ok { + return error.StructuralError("subkey packet not followed by signature") + } + if subKey.Sig.SigType != packet.SigTypeSubkeyBinding { + return error.StructuralError("subkey signature with wrong type") + } + err = e.PrimaryKey.VerifyKeySignature(subKey.PublicKey, subKey.Sig) + if err != nil { + return error.StructuralError("subkey signature invalid: " + err.String()) + } + e.Subkeys = append(e.Subkeys, subKey) + return nil +} diff --git a/src/pkg/crypto/openpgp/packet/encrypted_key.go b/src/pkg/crypto/openpgp/packet/encrypted_key.go index 4a926cdb1..b11a9b830 100644 --- a/src/pkg/crypto/openpgp/packet/encrypted_key.go +++ b/src/pkg/crypto/openpgp/packet/encrypted_key.go @@ -20,8 +20,8 @@ type EncryptedKey struct { KeyId uint64 Algo PublicKeyAlgorithm Encrypted []byte - CipherFunc CipherFunction // only valid after a sucessful Decrypt - Key []byte // only valid after a sucessful Decrypt + CipherFunc CipherFunction // only valid after a successful Decrypt + Key []byte // only valid after a successful Decrypt } func (e *EncryptedKey) parse(r io.Reader) (err os.Error) { diff --git a/src/pkg/crypto/openpgp/packet/literal.go b/src/pkg/crypto/openpgp/packet/literal.go index 5f72d6a2c..04f50e53e 100644 --- a/src/pkg/crypto/openpgp/packet/literal.go +++ b/src/pkg/crypto/openpgp/packet/literal.go @@ -14,11 +14,11 @@ import ( type LiteralData struct { IsBinary bool FileName string - Time uint32 // Unix epoc time. Either creation time or modification time. 0 means undefined. + Time uint32 // Unix epoch time. Either creation time or modification time. 0 means undefined. Body io.Reader } -// ForEyesOnly return whether the contents of the LiteralData have been marked +// ForEyesOnly returns whether the contents of the LiteralData have been marked // as especially sensitive. func (l *LiteralData) ForEyesOnly() bool { return l.FileName == "_CONSOLE" diff --git a/src/pkg/crypto/openpgp/packet/packet.go b/src/pkg/crypto/openpgp/packet/packet.go index 80e25e250..269603ba4 100644 --- a/src/pkg/crypto/openpgp/packet/packet.go +++ b/src/pkg/crypto/openpgp/packet/packet.go @@ -261,13 +261,13 @@ func Read(r io.Reader) (p Packet, err os.Error) { case packetTypePrivateKey, packetTypePrivateSubkey: pk := new(PrivateKey) if tag == packetTypePrivateSubkey { - pk.IsSubKey = true + pk.IsSubkey = true } p = pk case packetTypePublicKey, packetTypePublicSubkey: pk := new(PublicKey) if tag == packetTypePublicSubkey { - pk.IsSubKey = true + pk.IsSubkey = true } p = pk case packetTypeCompressed: @@ -300,7 +300,7 @@ type SignatureType uint8 const ( SigTypeBinary SignatureType = 0 - SigTypeText SignatureType = 1 + SigTypeText = 1 SigTypeGenericCert = 0x10 SigTypePersonaCert = 0x11 SigTypeCasualCert = 0x12 diff --git a/src/pkg/crypto/openpgp/packet/public_key.go b/src/pkg/crypto/openpgp/packet/public_key.go index 4a2ed0aca..8866bdaaa 100644 --- a/src/pkg/crypto/openpgp/packet/public_key.go +++ b/src/pkg/crypto/openpgp/packet/public_key.go @@ -23,7 +23,7 @@ type PublicKey struct { PublicKey interface{} // Either a *rsa.PublicKey or *dsa.PublicKey Fingerprint [20]byte KeyId uint64 - IsSubKey bool + IsSubkey bool n, e, p, q, g, y parsedMPI } diff --git a/src/pkg/crypto/openpgp/packet/symmetrically_encrypted_test.go b/src/pkg/crypto/openpgp/packet/symmetrically_encrypted_test.go index ee5a30d32..5543b2029 100644 --- a/src/pkg/crypto/openpgp/packet/symmetrically_encrypted_test.go +++ b/src/pkg/crypto/openpgp/packet/symmetrically_encrypted_test.go @@ -54,7 +54,7 @@ func testMDCReader(t *testing.T) { err = mdcReader.Close() if err != nil { - t.Errorf("stride: %d, error on Close: %s", err) + t.Errorf("stride: %d, error on Close: %s", stride, err) } } diff --git a/src/pkg/crypto/openpgp/read.go b/src/pkg/crypto/openpgp/read.go new file mode 100644 index 000000000..ac6998f0d --- /dev/null +++ b/src/pkg/crypto/openpgp/read.go @@ -0,0 +1,413 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This openpgp package implements high level operations on OpenPGP messages. +package openpgp + +import ( + "crypto" + "crypto/openpgp/armor" + "crypto/openpgp/error" + "crypto/openpgp/packet" + "crypto/rsa" + _ "crypto/sha256" + "hash" + "io" + "os" + "strconv" +) + +// SignatureType is the armor type for a PGP signature. +var SignatureType = "PGP SIGNATURE" + +// readArmored reads an armored block with the given type. +func readArmored(r io.Reader, expectedType string) (body io.Reader, err os.Error) { + block, err := armor.Decode(r) + if err != nil { + return + } + + if block.Type != expectedType { + return nil, error.InvalidArgumentError("expected '" + expectedType + "', got: " + block.Type) + } + + return block.Body, nil +} + +// MessageDetails contains the result of parsing an OpenPGP encrypted and/or +// signed message. +type MessageDetails struct { + IsEncrypted bool // true if the message was encrypted. + EncryptedToKeyIds []uint64 // the list of recipient key ids. + IsSymmetricallyEncrypted bool // true if a passphrase could have decrypted the message. + DecryptedWith Key // the private key used to decrypt the message, if any. + IsSigned bool // true if the message is signed. + SignedByKeyId uint64 // the key id of the signer, if any. + SignedBy *Key // the key of the signer, if availible. + LiteralData *packet.LiteralData // the metadata of the contents + UnverifiedBody io.Reader // the contents of the message. + + // If IsSigned is true and SignedBy is non-zero then the signature will + // be verified as UnverifiedBody is read. The signature cannot be + // checked until the whole of UnverifiedBody is read so UnverifiedBody + // must be consumed until EOF before the data can trusted. Even if a + // message isn't signed (or the signer is unknown) the data may contain + // an authentication code that is only checked once UnverifiedBody has + // been consumed. Once EOF has been seen, the following fields are + // valid. (An authentication code failure is reported as a + // SignatureError error when reading from UnverifiedBody.) + + SignatureError os.Error // nil if the signature is good. + Signature *packet.Signature // the signature packet itself. + + decrypted io.ReadCloser +} + +// A PromptFunction is used as a callback by functions that may need to decrypt +// a private key, or prompt for a passphrase. It is called with a list of +// acceptable, encrypted private keys and a boolean that indicates whether a +// passphrase is usable. It should either decrypt a private key or return a +// passphrase to try. If the decrypted private key or given passphrase isn't +// correct, the function will be called again, forever. Any error returned will +// be passed up. +type PromptFunction func(keys []Key, symmetric bool) ([]byte, os.Error) + +// A keyEnvelopePair is used to store a private key with the envelope that +// contains a symmetric key, encrypted with that key. +type keyEnvelopePair struct { + key Key + encryptedKey *packet.EncryptedKey +} + +// ReadMessage parses an OpenPGP message that may be signed and/or encrypted. +// The given KeyRing should contain both public keys (for signature +// verification) and, possibly encrypted, private keys for decrypting. +func ReadMessage(r io.Reader, keyring KeyRing, prompt PromptFunction) (md *MessageDetails, err os.Error) { + var p packet.Packet + + var symKeys []*packet.SymmetricKeyEncrypted + var pubKeys []keyEnvelopePair + var se *packet.SymmetricallyEncrypted + + packets := packet.NewReader(r) + md = new(MessageDetails) + md.IsEncrypted = true + + // The message, if encrypted, starts with a number of packets + // containing an encrypted decryption key. The decryption key is either + // encrypted to a public key, or with a passphrase. This loop + // collects these packets. +ParsePackets: + for { + p, err = packets.Next() + if err != nil { + return nil, err + } + switch p := p.(type) { + case *packet.SymmetricKeyEncrypted: + // This packet contains the decryption key encrypted with a passphrase. + md.IsSymmetricallyEncrypted = true + symKeys = append(symKeys, p) + case *packet.EncryptedKey: + // This packet contains the decryption key encrypted to a public key. + md.EncryptedToKeyIds = append(md.EncryptedToKeyIds, p.KeyId) + if p.Algo != packet.PubKeyAlgoRSA && p.Algo != packet.PubKeyAlgoRSAEncryptOnly { + continue + } + var keys []Key + if p.KeyId == 0 { + keys = keyring.DecryptionKeys() + } else { + keys = keyring.KeysById(p.KeyId) + } + for _, k := range keys { + pubKeys = append(pubKeys, keyEnvelopePair{k, p}) + } + case *packet.SymmetricallyEncrypted: + se = p + break ParsePackets + case *packet.Compressed, *packet.LiteralData, *packet.OnePassSignature: + // This message isn't encrypted. + if len(symKeys) != 0 || len(pubKeys) != 0 { + return nil, error.StructuralError("key material not followed by encrypted message") + } + packets.Unread(p) + return readSignedMessage(packets, nil, keyring) + } + } + + var candidates []Key + var decrypted io.ReadCloser + + // Now that we have the list of encrypted keys we need to decrypt at + // least one of them or, if we cannot, we need to call the prompt + // function so that it can decrypt a key or give us a passphrase. +FindKey: + for { + // See if any of the keys already have a private key availible + candidates = candidates[:0] + candidateFingerprints := make(map[string]bool) + + for _, pk := range pubKeys { + if pk.key.PrivateKey == nil { + continue + } + if !pk.key.PrivateKey.Encrypted { + if len(pk.encryptedKey.Key) == 0 { + pk.encryptedKey.DecryptRSA(pk.key.PrivateKey.PrivateKey.(*rsa.PrivateKey)) + } + if len(pk.encryptedKey.Key) == 0 { + continue + } + decrypted, err = se.Decrypt(pk.encryptedKey.CipherFunc, pk.encryptedKey.Key) + if err != nil && err != error.KeyIncorrectError { + return nil, err + } + if decrypted != nil { + md.DecryptedWith = pk.key + break FindKey + } + } else { + fpr := string(pk.key.PublicKey.Fingerprint[:]) + if v := candidateFingerprints[fpr]; v { + continue + } + candidates = append(candidates, pk.key) + candidateFingerprints[fpr] = true + } + } + + if len(candidates) == 0 && len(symKeys) == 0 { + return nil, error.KeyIncorrectError + } + + if prompt == nil { + return nil, error.KeyIncorrectError + } + + passphrase, err := prompt(candidates, len(symKeys) != 0) + if err != nil { + return nil, err + } + + // Try the symmetric passphrase first + if len(symKeys) != 0 && passphrase != nil { + for _, s := range symKeys { + err = s.Decrypt(passphrase) + if err == nil && !s.Encrypted { + decrypted, err = se.Decrypt(s.CipherFunc, s.Key) + if err != nil && err != error.KeyIncorrectError { + return nil, err + } + if decrypted != nil { + break FindKey + } + } + + } + } + } + + md.decrypted = decrypted + packets.Push(decrypted) + return readSignedMessage(packets, md, keyring) +} + +// readSignedMessage reads a possibily signed message if mdin is non-zero then +// that structure is updated and returned. Otherwise a fresh MessageDetails is +// used. +func readSignedMessage(packets *packet.Reader, mdin *MessageDetails, keyring KeyRing) (md *MessageDetails, err os.Error) { + if mdin == nil { + mdin = new(MessageDetails) + } + md = mdin + + var p packet.Packet + var h hash.Hash + var wrappedHash hash.Hash +FindLiteralData: + for { + p, err = packets.Next() + if err != nil { + return nil, err + } + switch p := p.(type) { + case *packet.Compressed: + packets.Push(p.Body) + case *packet.OnePassSignature: + if !p.IsLast { + return nil, error.UnsupportedError("nested signatures") + } + + h, wrappedHash, err = hashForSignature(p.Hash, p.SigType) + if err != nil { + md = nil + return + } + + md.IsSigned = true + md.SignedByKeyId = p.KeyId + keys := keyring.KeysById(p.KeyId) + for _, key := range keys { + if key.SelfSignature.FlagsValid && !key.SelfSignature.FlagSign { + continue + } + md.SignedBy = &key + } + case *packet.LiteralData: + md.LiteralData = p + break FindLiteralData + } + } + + if md.SignedBy != nil { + md.UnverifiedBody = &signatureCheckReader{packets, h, wrappedHash, md} + } else if md.decrypted != nil { + md.UnverifiedBody = checkReader{md} + } else { + md.UnverifiedBody = md.LiteralData.Body + } + + return md, nil +} + +// hashForSignature returns a pair of hashes that can be used to verify a +// signature. The signature may specify that the contents of the signed message +// should be preprocessed (i.e. to normalise line endings). Thus this function +// returns two hashes. The second should be used to hash the message itself and +// performs any needed preprocessing. +func hashForSignature(hashId crypto.Hash, sigType packet.SignatureType) (hash.Hash, hash.Hash, os.Error) { + h := hashId.New() + if h == nil { + return nil, nil, error.UnsupportedError("hash not availible: " + strconv.Itoa(int(hashId))) + } + + switch sigType { + case packet.SigTypeBinary: + return h, h, nil + case packet.SigTypeText: + return h, NewCanonicalTextHash(h), nil + } + + return nil, nil, error.UnsupportedError("unsupported signature type: " + strconv.Itoa(int(sigType))) +} + +// checkReader wraps an io.Reader from a LiteralData packet. When it sees EOF +// it closes the ReadCloser from any SymmetricallyEncrypted packet to trigger +// MDC checks. +type checkReader struct { + md *MessageDetails +} + +func (cr checkReader) Read(buf []byte) (n int, err os.Error) { + n, err = cr.md.LiteralData.Body.Read(buf) + if err == os.EOF { + mdcErr := cr.md.decrypted.Close() + if mdcErr != nil { + err = mdcErr + } + } + return +} + +// signatureCheckReader wraps an io.Reader from a LiteralData packet and hashes +// the data as it is read. When it sees an EOF from the underlying io.Reader +// it parses and checks a trailing Signature packet and triggers any MDC checks. +type signatureCheckReader struct { + packets *packet.Reader + h, wrappedHash hash.Hash + md *MessageDetails +} + +func (scr *signatureCheckReader) Read(buf []byte) (n int, err os.Error) { + n, err = scr.md.LiteralData.Body.Read(buf) + scr.wrappedHash.Write(buf[:n]) + if err == os.EOF { + var p packet.Packet + p, scr.md.SignatureError = scr.packets.Next() + if scr.md.SignatureError != nil { + return + } + + var ok bool + if scr.md.Signature, ok = p.(*packet.Signature); !ok { + scr.md.SignatureError = error.StructuralError("LiteralData not followed by Signature") + return + } + + scr.md.SignatureError = scr.md.SignedBy.PublicKey.VerifySignature(scr.h, scr.md.Signature) + + // The SymmetricallyEncrypted packet, if any, might have an + // unsigned hash of its own. In order to check this we need to + // close that Reader. + if scr.md.decrypted != nil { + mdcErr := scr.md.decrypted.Close() + if mdcErr != nil { + err = mdcErr + } + } + } + return +} + +// CheckDetachedSignature takes a signed file and a detached signature and +// returns the signer if the signature is valid. If the signer isn't know, +// UnknownIssuerError is returned. +func CheckDetachedSignature(keyring KeyRing, signed, signature io.Reader) (signer *Entity, err os.Error) { + p, err := packet.Read(signature) + if err != nil { + return + } + + sig, ok := p.(*packet.Signature) + if !ok { + return nil, error.StructuralError("non signature packet found") + } + + if sig.IssuerKeyId == nil { + return nil, error.StructuralError("signature doesn't have an issuer") + } + + keys := keyring.KeysById(*sig.IssuerKeyId) + if len(keys) == 0 { + return nil, error.UnknownIssuerError + } + + h, wrappedHash, err := hashForSignature(sig.Hash, sig.SigType) + if err != nil { + return + } + + _, err = io.Copy(wrappedHash, signed) + if err != nil && err != os.EOF { + return + } + + for _, key := range keys { + if key.SelfSignature.FlagsValid && !key.SelfSignature.FlagSign { + continue + } + err = key.PublicKey.VerifySignature(h, sig) + if err == nil { + return key.Entity, nil + } + } + + if err != nil { + return + } + + return nil, error.UnknownIssuerError +} + +// CheckArmoredDetachedSignature performs the same actions as +// CheckDetachedSignature but expects the signature to be armored. +func CheckArmoredDetachedSignature(keyring KeyRing, signed, signature io.Reader) (signer *Entity, err os.Error) { + body, err := readArmored(signature, SignatureType) + if err != nil { + return + } + + return CheckDetachedSignature(keyring, signed, body) +} diff --git a/src/pkg/crypto/openpgp/read_test.go b/src/pkg/crypto/openpgp/read_test.go new file mode 100644 index 000000000..58199e132 --- /dev/null +++ b/src/pkg/crypto/openpgp/read_test.go @@ -0,0 +1,237 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package openpgp + +import ( + "bytes" + "crypto/openpgp/error" + "encoding/hex" + "io" + "io/ioutil" + "os" + "testing" +) + +func readerFromHex(s string) io.Reader { + data, err := hex.DecodeString(s) + if err != nil { + panic("readerFromHex: bad input") + } + return bytes.NewBuffer(data) +} + +func TestReadKeyRing(t *testing.T) { + kring, err := ReadKeyRing(readerFromHex(testKeys1And2Hex)) + if err != nil { + t.Error(err) + return + } + if len(kring) != 2 || uint32(kring[0].PrimaryKey.KeyId) != 0xC20C31BB || uint32(kring[1].PrimaryKey.KeyId) != 0x1E35246B { + t.Errorf("bad keyring: %#v", kring) + } +} + +func TestReadPrivateKeyRing(t *testing.T) { + kring, err := ReadKeyRing(readerFromHex(testKeys1And2PrivateHex)) + if err != nil { + t.Error(err) + return + } + if len(kring) != 2 || uint32(kring[0].PrimaryKey.KeyId) != 0xC20C31BB || uint32(kring[1].PrimaryKey.KeyId) != 0x1E35246B || kring[0].PrimaryKey == nil { + t.Errorf("bad keyring: %#v", kring) + } +} + +func TestGetKeyById(t *testing.T) { + kring, _ := ReadKeyRing(readerFromHex(testKeys1And2Hex)) + + keys := kring.KeysById(0xa34d7e18c20c31bb) + if len(keys) != 1 || keys[0].Entity != kring[0] { + t.Errorf("bad result for 0xa34d7e18c20c31bb: %#v", keys) + } + + keys = kring.KeysById(0xfd94408d4543314f) + if len(keys) != 1 || keys[0].Entity != kring[0] { + t.Errorf("bad result for 0xa34d7e18c20c31bb: %#v", keys) + } +} + +func checkSignedMessage(t *testing.T, signedHex, expected string) { + kring, _ := ReadKeyRing(readerFromHex(testKeys1And2Hex)) + + md, err := ReadMessage(readerFromHex(signedHex), kring, nil) + if err != nil { + t.Error(err) + return + } + + if !md.IsSigned || md.SignedByKeyId != 0xa34d7e18c20c31bb || md.SignedBy == nil || md.IsEncrypted || md.IsSymmetricallyEncrypted || len(md.EncryptedToKeyIds) != 0 || md.IsSymmetricallyEncrypted { + t.Errorf("bad MessageDetails: %#v", md) + } + + contents, err := ioutil.ReadAll(md.UnverifiedBody) + if err != nil { + t.Errorf("error reading UnverifiedBody: %s", err) + } + if string(contents) != expected { + t.Errorf("bad UnverifiedBody got:%s want:%s", string(contents), expected) + } + if md.SignatureError != nil || md.Signature == nil { + t.Errorf("failed to validate: %s", md.SignatureError) + } +} + +func TestSignedMessage(t *testing.T) { + checkSignedMessage(t, signedMessageHex, signedInput) +} + +func TestTextSignedMessage(t *testing.T) { + checkSignedMessage(t, signedTextMessageHex, signedTextInput) +} + +func TestSignedEncryptedMessage(t *testing.T) { + expected := "Signed and encrypted message\n" + kring, _ := ReadKeyRing(readerFromHex(testKeys1And2PrivateHex)) + prompt := func(keys []Key, symmetric bool) ([]byte, os.Error) { + if symmetric { + t.Errorf("prompt: message was marked as symmetrically encrypted") + return nil, error.KeyIncorrectError + } + + if len(keys) == 0 { + t.Error("prompt: no keys requested") + return nil, error.KeyIncorrectError + } + + err := keys[0].PrivateKey.Decrypt([]byte("passphrase")) + if err != nil { + t.Errorf("prompt: error decrypting key: %s", err) + return nil, error.KeyIncorrectError + } + + return nil, nil + } + + md, err := ReadMessage(readerFromHex(signedEncryptedMessageHex), kring, prompt) + if err != nil { + t.Errorf("error reading message: %s", err) + return + } + + if !md.IsSigned || md.SignedByKeyId != 0xa34d7e18c20c31bb || md.SignedBy == nil || !md.IsEncrypted || md.IsSymmetricallyEncrypted || len(md.EncryptedToKeyIds) == 0 || md.EncryptedToKeyIds[0] != 0x2a67d68660df41c7 { + t.Errorf("bad MessageDetails: %#v", md) + } + + contents, err := ioutil.ReadAll(md.UnverifiedBody) + if err != nil { + t.Errorf("error reading UnverifiedBody: %s", err) + } + if string(contents) != expected { + t.Errorf("bad UnverifiedBody got:%s want:%s", string(contents), expected) + } + + if md.SignatureError != nil || md.Signature == nil { + t.Errorf("failed to validate: %s", md.SignatureError) + } +} + +func TestUnspecifiedRecipient(t *testing.T) { + expected := "Recipient unspecified\n" + kring, _ := ReadKeyRing(readerFromHex(testKeys1And2PrivateHex)) + + md, err := ReadMessage(readerFromHex(recipientUnspecifiedHex), kring, nil) + if err != nil { + t.Errorf("error reading message: %s", err) + return + } + + contents, err := ioutil.ReadAll(md.UnverifiedBody) + if err != nil { + t.Errorf("error reading UnverifiedBody: %s", err) + } + if string(contents) != expected { + t.Errorf("bad UnverifiedBody got:%s want:%s", string(contents), expected) + } +} + +func TestSymmetricallyEncrypted(t *testing.T) { + expected := "Symmetrically encrypted.\n" + + prompt := func(keys []Key, symmetric bool) ([]byte, os.Error) { + if len(keys) != 0 { + t.Errorf("prompt: len(keys) = %d (want 0)", len(keys)) + } + + if !symmetric { + t.Errorf("symmetric is not set") + } + + return []byte("password"), nil + } + + md, err := ReadMessage(readerFromHex(symmetricallyEncryptedCompressedHex), nil, prompt) + if err != nil { + t.Errorf("ReadMessage: %s", err) + return + } + + contents, err := ioutil.ReadAll(md.UnverifiedBody) + if err != nil { + t.Errorf("ReadAll: %s", err) + } + + expectedCreatationTime := uint32(1295992998) + if md.LiteralData.Time != expectedCreatationTime { + t.Errorf("LiteralData.Time is %d, want %d", md.LiteralData.Time, expectedCreatationTime) + } + + if string(contents) != expected { + t.Errorf("contents got: %s want: %s", string(contents), expected) + } +} + +func testDetachedSignature(t *testing.T, kring KeyRing, signature io.Reader, sigInput, tag string) { + signed := bytes.NewBufferString(sigInput) + signer, err := CheckDetachedSignature(kring, signed, signature) + if err != nil { + t.Errorf("%s: signature error: %s", tag, err) + return + } + if signer == nil { + t.Errorf("%s: signer is nil", tag) + return + } + expectedSignerKeyId := uint64(0xa34d7e18c20c31bb) + if signer.PrimaryKey.KeyId != expectedSignerKeyId { + t.Errorf("%s: wrong signer got:%x want:%x", tag, signer.PrimaryKey.KeyId, expectedSignerKeyId) + } +} + +func TestDetachedSignature(t *testing.T) { + kring, _ := ReadKeyRing(readerFromHex(testKeys1And2Hex)) + testDetachedSignature(t, kring, readerFromHex(detachedSignatureHex), signedInput, "binary") + testDetachedSignature(t, kring, readerFromHex(detachedSignatureTextHex), signedInput, "text") +} + +const signedInput = "Signed message\nline 2\nline 3\n" +const signedTextInput = "Signed message\r\nline 2\r\nline 3\r\n" + +const recipientUnspecifiedHex = "848c0300000000000000000103ff62d4d578d03cf40c3da998dfe216c074fa6ddec5e31c197c9666ba292830d91d18716a80f699f9d897389a90e6d62d0238f5f07a5248073c0f24920e4bc4a30c2d17ee4e0cae7c3d4aaa4e8dced50e3010a80ee692175fa0385f62ecca4b56ee6e9980aa3ec51b61b077096ac9e800edaf161268593eedb6cc7027ff5cb32745d250010d407a6221ae22ef18469b444f2822478c4d190b24d36371a95cb40087cdd42d9399c3d06a53c0673349bfb607927f20d1e122bde1e2bf3aa6cae6edf489629bcaa0689539ae3b718914d88ededc3b" + +const detachedSignatureHex = "889c04000102000605024d449cd1000a0910a34d7e18c20c31bb167603ff57718d09f28a519fdc7b5a68b6a3336da04df85e38c5cd5d5bd2092fa4629848a33d85b1729402a2aab39c3ac19f9d573f773cc62c264dc924c067a79dfd8a863ae06c7c8686120760749f5fd9b1e03a64d20a7df3446ddc8f0aeadeaeba7cbaee5c1e366d65b6a0c6cc749bcb912d2f15013f812795c2e29eb7f7b77f39ce77" + +const detachedSignatureTextHex = "889c04010102000605024d449d21000a0910a34d7e18c20c31bbc8c60400a24fbef7342603a41cb1165767bd18985d015fb72fe05db42db36cfb2f1d455967f1e491194fbf6cf88146222b23bf6ffbd50d17598d976a0417d3192ff9cc0034fd00f287b02e90418bbefe609484b09231e4e7a5f3562e199bf39909ab5276c4d37382fe088f6b5c3426fc1052865da8b3ab158672d58b6264b10823dc4b39" + +const testKeys1And2Hex = "988d044d3c5c10010400b1d13382944bd5aba23a4312968b5095d14f947f600eb478e14a6fcb16b0e0cac764884909c020bc495cfcc39a935387c661507bdb236a0612fb582cac3af9b29cc2c8c70090616c41b662f4da4c1201e195472eb7f4ae1ccbcbf9940fe21d985e379a5563dde5b9a23d35f1cfaa5790da3b79db26f23695107bfaca8e7b5bcd0011010001b41054657374204b6579203120285253412988b804130102002205024d3c5c10021b03060b090807030206150802090a0b0416020301021e01021780000a0910a34d7e18c20c31bbb5b304009cc45fe610b641a2c146331be94dade0a396e73ca725e1b25c21708d9cab46ecca5ccebc23055879df8f99eea39b377962a400f2ebdc36a7c99c333d74aeba346315137c3ff9d0a09b0273299090343048afb8107cf94cbd1400e3026f0ccac7ecebbc4d78588eb3e478fe2754d3ca664bcf3eac96ca4a6b0c8d7df5102f60f6b0020003b88d044d3c5c10010400b201df61d67487301f11879d514f4248ade90c8f68c7af1284c161098de4c28c2850f1ec7b8e30f959793e571542ffc6532189409cb51c3d30dad78c4ad5165eda18b20d9826d8707d0f742e2ab492103a85bbd9ddf4f5720f6de7064feb0d39ee002219765bb07bcfb8b877f47abe270ddeda4f676108cecb6b9bb2ad484a4f0011010001889f04180102000905024d3c5c10021b0c000a0910a34d7e18c20c31bb1a03040085c8d62e16d05dc4e9dad64953c8a2eed8b6c12f92b1575eeaa6dcf7be9473dd5b24b37b6dffbb4e7c99ed1bd3cb11634be19b3e6e207bed7505c7ca111ccf47cb323bf1f8851eb6360e8034cbff8dd149993c959de89f8f77f38e7e98b8e3076323aa719328e2b408db5ec0d03936efd57422ba04f925cdc7b4c1af7590e40ab0020003988d044d3c5c33010400b488c3e5f83f4d561f317817538d9d0397981e9aef1321ca68ebfae1cf8b7d388e19f4b5a24a82e2fbbf1c6c26557a6c5845307a03d815756f564ac7325b02bc83e87d5480a8fae848f07cb891f2d51ce7df83dcafdc12324517c86d472cc0ee10d47a68fd1d9ae49a6c19bbd36d82af597a0d88cc9c49de9df4e696fc1f0b5d0011010001b42754657374204b6579203220285253412c20656e637279707465642070726976617465206b65792988b804130102002205024d3c5c33021b03060b090807030206150802090a0b0416020301021e01021780000a0910d4984f961e35246b98940400908a73b6a6169f700434f076c6c79015a49bee37130eaf23aaa3cfa9ce60bfe4acaa7bc95f1146ada5867e0079babb38804891f4f0b8ebca57a86b249dee786161a755b7a342e68ccf3f78ed6440a93a6626beb9a37aa66afcd4f888790cb4bb46d94a4ae3eb3d7d3e6b00f6bfec940303e89ec5b32a1eaaacce66497d539328b0020003b88d044d3c5c33010400a4e913f9442abcc7f1804ccab27d2f787ffa592077ca935a8bb23165bd8d57576acac647cc596b2c3f814518cc8c82953c7a4478f32e0cf645630a5ba38d9618ef2bc3add69d459ae3dece5cab778938d988239f8c5ae437807075e06c828019959c644ff05ef6a5a1dab72227c98e3a040b0cf219026640698d7a13d8538a570011010001889f04180102000905024d3c5c33021b0c000a0910d4984f961e35246b26c703ff7ee29ef53bc1ae1ead533c408fa136db508434e233d6e62be621e031e5940bbd4c08142aed0f82217e7c3e1ec8de574bc06ccf3c36633be41ad78a9eacd209f861cae7b064100758545cc9dd83db71806dc1cfd5fb9ae5c7474bba0c19c44034ae61bae5eca379383339dece94ff56ff7aa44a582f3e5c38f45763af577c0934b0020003" + +const testKeys1And2PrivateHex = "9501d8044d3c5c10010400b1d13382944bd5aba23a4312968b5095d14f947f600eb478e14a6fcb16b0e0cac764884909c020bc495cfcc39a935387c661507bdb236a0612fb582cac3af9b29cc2c8c70090616c41b662f4da4c1201e195472eb7f4ae1ccbcbf9940fe21d985e379a5563dde5b9a23d35f1cfaa5790da3b79db26f23695107bfaca8e7b5bcd00110100010003ff4d91393b9a8e3430b14d6209df42f98dc927425b881f1209f319220841273a802a97c7bdb8b3a7740b3ab5866c4d1d308ad0d3a79bd1e883aacf1ac92dfe720285d10d08752a7efe3c609b1d00f17f2805b217be53999a7da7e493bfc3e9618fd17018991b8128aea70a05dbce30e4fbe626aa45775fa255dd9177aabf4df7cf0200c1ded12566e4bc2bb590455e5becfb2e2c9796482270a943343a7835de41080582c2be3caf5981aa838140e97afa40ad652a0b544f83eb1833b0957dce26e47b0200eacd6046741e9ce2ec5beb6fb5e6335457844fb09477f83b050a96be7da043e17f3a9523567ed40e7a521f818813a8b8a72209f1442844843ccc7eb9805442570200bdafe0438d97ac36e773c7162028d65844c4d463e2420aa2228c6e50dc2743c3d6c72d0d782a5173fe7be2169c8a9f4ef8a7cf3e37165e8c61b89c346cdc6c1799d2b41054657374204b6579203120285253412988b804130102002205024d3c5c10021b03060b090807030206150802090a0b0416020301021e01021780000a0910a34d7e18c20c31bbb5b304009cc45fe610b641a2c146331be94dade0a396e73ca725e1b25c21708d9cab46ecca5ccebc23055879df8f99eea39b377962a400f2ebdc36a7c99c333d74aeba346315137c3ff9d0a09b0273299090343048afb8107cf94cbd1400e3026f0ccac7ecebbc4d78588eb3e478fe2754d3ca664bcf3eac96ca4a6b0c8d7df5102f60f6b00200009d01d8044d3c5c10010400b201df61d67487301f11879d514f4248ade90c8f68c7af1284c161098de4c28c2850f1ec7b8e30f959793e571542ffc6532189409cb51c3d30dad78c4ad5165eda18b20d9826d8707d0f742e2ab492103a85bbd9ddf4f5720f6de7064feb0d39ee002219765bb07bcfb8b877f47abe270ddeda4f676108cecb6b9bb2ad484a4f00110100010003fd17a7490c22a79c59281fb7b20f5e6553ec0c1637ae382e8adaea295f50241037f8997cf42c1ce26417e015091451b15424b2c59eb8d4161b0975630408e394d3b00f88d4b4e18e2cc85e8251d4753a27c639c83f5ad4a571c4f19d7cd460b9b73c25ade730c99df09637bd173d8e3e981ac64432078263bb6dc30d3e974150dd0200d0ee05be3d4604d2146fb0457f31ba17c057560785aa804e8ca5530a7cd81d3440d0f4ba6851efcfd3954b7e68908fc0ba47f7ac37bf559c6c168b70d3a7c8cd0200da1c677c4bce06a068070f2b3733b0a714e88d62aa3f9a26c6f5216d48d5c2b5624144f3807c0df30be66b3268eeeca4df1fbded58faf49fc95dc3c35f134f8b01fd1396b6c0fc1b6c4f0eb8f5e44b8eace1e6073e20d0b8bc5385f86f1cf3f050f66af789f3ef1fc107b7f4421e19e0349c730c68f0a226981f4e889054fdb4dc149e8e889f04180102000905024d3c5c10021b0c000a0910a34d7e18c20c31bb1a03040085c8d62e16d05dc4e9dad64953c8a2eed8b6c12f92b1575eeaa6dcf7be9473dd5b24b37b6dffbb4e7c99ed1bd3cb11634be19b3e6e207bed7505c7ca111ccf47cb323bf1f8851eb6360e8034cbff8dd149993c959de89f8f77f38e7e98b8e3076323aa719328e2b408db5ec0d03936efd57422ba04f925cdc7b4c1af7590e40ab00200009501fe044d3c5c33010400b488c3e5f83f4d561f317817538d9d0397981e9aef1321ca68ebfae1cf8b7d388e19f4b5a24a82e2fbbf1c6c26557a6c5845307a03d815756f564ac7325b02bc83e87d5480a8fae848f07cb891f2d51ce7df83dcafdc12324517c86d472cc0ee10d47a68fd1d9ae49a6c19bbd36d82af597a0d88cc9c49de9df4e696fc1f0b5d0011010001fe030302e9030f3c783e14856063f16938530e148bc57a7aa3f3e4f90df9dceccdc779bc0835e1ad3d006e4a8d7b36d08b8e0de5a0d947254ecfbd22037e6572b426bcfdc517796b224b0036ff90bc574b5509bede85512f2eefb520fb4b02aa523ba739bff424a6fe81c5041f253f8d757e69a503d3563a104d0d49e9e890b9d0c26f96b55b743883b472caa7050c4acfd4a21f875bdf1258d88bd61224d303dc9df77f743137d51e6d5246b88c406780528fd9a3e15bab5452e5b93970d9dcc79f48b38651b9f15bfbcf6da452837e9cc70683d1bdca94507870f743e4ad902005812488dd342f836e72869afd00ce1850eea4cfa53ce10e3608e13d3c149394ee3cbd0e23d018fcbcb6e2ec5a1a22972d1d462ca05355d0d290dd2751e550d5efb38c6c89686344df64852bf4ff86638708f644e8ec6bd4af9b50d8541cb91891a431326ab2e332faa7ae86cfb6e0540aa63160c1e5cdd5a4add518b303fff0a20117c6bc77f7cfbaf36b04c865c6c2b42754657374204b6579203220285253412c20656e637279707465642070726976617465206b65792988b804130102002205024d3c5c33021b03060b090807030206150802090a0b0416020301021e01021780000a0910d4984f961e35246b98940400908a73b6a6169f700434f076c6c79015a49bee37130eaf23aaa3cfa9ce60bfe4acaa7bc95f1146ada5867e0079babb38804891f4f0b8ebca57a86b249dee786161a755b7a342e68ccf3f78ed6440a93a6626beb9a37aa66afcd4f888790cb4bb46d94a4ae3eb3d7d3e6b00f6bfec940303e89ec5b32a1eaaacce66497d539328b00200009d01fe044d3c5c33010400a4e913f9442abcc7f1804ccab27d2f787ffa592077ca935a8bb23165bd8d57576acac647cc596b2c3f814518cc8c82953c7a4478f32e0cf645630a5ba38d9618ef2bc3add69d459ae3dece5cab778938d988239f8c5ae437807075e06c828019959c644ff05ef6a5a1dab72227c98e3a040b0cf219026640698d7a13d8538a570011010001fe030302e9030f3c783e148560f936097339ae381d63116efcf802ff8b1c9360767db5219cc987375702a4123fd8657d3e22700f23f95020d1b261eda5257e9a72f9a918e8ef22dd5b3323ae03bbc1923dd224db988cadc16acc04b120a9f8b7e84da9716c53e0334d7b66586ddb9014df604b41be1e960dcfcbc96f4ed150a1a0dd070b9eb14276b9b6be413a769a75b519a53d3ecc0c220e85cd91ca354d57e7344517e64b43b6e29823cbd87eae26e2b2e78e6dedfbb76e3e9f77bcb844f9a8932eb3db2c3f9e44316e6f5d60e9e2a56e46b72abe6b06dc9a31cc63f10023d1f5e12d2a3ee93b675c96f504af0001220991c88db759e231b3320dcedf814dcf723fd9857e3d72d66a0f2af26950b915abdf56c1596f46a325bf17ad4810d3535fb02a259b247ac3dbd4cc3ecf9c51b6c07cebb009c1506fba0a89321ec8683e3fd009a6e551d50243e2d5092fefb3321083a4bad91320dc624bd6b5dddf93553e3d53924c05bfebec1fb4bd47e89a1a889f04180102000905024d3c5c33021b0c000a0910d4984f961e35246b26c703ff7ee29ef53bc1ae1ead533c408fa136db508434e233d6e62be621e031e5940bbd4c08142aed0f82217e7c3e1ec8de574bc06ccf3c36633be41ad78a9eacd209f861cae7b064100758545cc9dd83db71806dc1cfd5fb9ae5c7474bba0c19c44034ae61bae5eca379383339dece94ff56ff7aa44a582f3e5c38f45763af577c0934b0020000" + +const signedMessageHex = "a3019bc0cbccc0c4b8d8b74ee2108fe16ec6d3ca490cbe362d3f8333d3f352531472538b8b13d353b97232f352158c20943157c71c16064626063656269052062e4e01987e9b6fccff4b7df3a34c534b23e679cbec3bc0f8f6e64dfb4b55fe3f8efa9ce110ddb5cd79faf1d753c51aecfa669f7e7aa043436596cccc3359cb7dd6bbe9ecaa69e5989d9e57209571edc0b2fa7f57b9b79a64ee6e99ce1371395fee92fec2796f7b15a77c386ff668ee27f6d38f0baa6c438b561657377bf6acff3c5947befd7bf4c196252f1d6e5c524d0300" + +const signedTextMessageHex = "a3019bc0cbccc8c4b8d8b74ee2108fe16ec6d36a250cbece0c178233d3f352531472538b8b13d35379b97232f352158ca0b4312f57c71c1646462606365626906a062e4e019811591798ff99bf8afee860b0d8a8c2a85c3387e3bcf0bb3b17987f2bbcfab2aa526d930cbfd3d98757184df3995c9f3e7790e36e3e9779f06089d4c64e9e47dd6202cb6e9bc73c5d11bb59fbaf89d22d8dc7cf199ddf17af96e77c5f65f9bbed56f427bd8db7af37f6c9984bf9385efaf5f184f986fb3e6adb0ecfe35bbf92d16a7aa2a344fb0bc52fb7624f0200" + +const signedEncryptedMessageHex = "848c032a67d68660df41c70103ff5789d0de26b6a50c985a02a13131ca829c413a35d0e6fa8d6842599252162808ac7439c72151c8c6183e76923fe3299301414d0c25a2f06a2257db3839e7df0ec964773f6e4c4ac7ff3b48c444237166dd46ba8ff443a5410dc670cb486672fdbe7c9dfafb75b4fea83af3a204fe2a7dfa86bd20122b4f3d2646cbeecb8f7be8d2c03b018bd210b1d3791e1aba74b0f1034e122ab72e760492c192383cf5e20b5628bd043272d63df9b923f147eb6091cd897553204832aba48fec54aa447547bb16305a1024713b90e77fd0065f1918271947549205af3c74891af22ee0b56cd29bfec6d6e351901cd4ab3ece7c486f1e32a792d4e474aed98ee84b3f591c7dff37b64e0ecd68fd036d517e412dcadf85840ce184ad7921ad446c4ee28db80447aea1ca8d4f574db4d4e37688158ddd19e14ee2eab4873d46947d65d14a23e788d912cf9a19624ca7352469b72a83866b7c23cb5ace3deab3c7018061b0ba0f39ed2befe27163e5083cf9b8271e3e3d52cc7ad6e2a3bd81d4c3d7022f8d" + +const symmetricallyEncryptedCompressedHex = "8c0d04030302eb4a03808145d0d260c92f714339e13de5a79881216431925bf67ee2898ea61815f07894cd0703c50d0a76ef64d482196f47a8bc729af9b80bb6" diff --git a/src/pkg/crypto/openpgp/write.go b/src/pkg/crypto/openpgp/write.go new file mode 100644 index 000000000..1a2e2bf04 --- /dev/null +++ b/src/pkg/crypto/openpgp/write.go @@ -0,0 +1,92 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package openpgp + +import ( + "crypto" + "crypto/openpgp/armor" + "crypto/openpgp/error" + "crypto/openpgp/packet" + "crypto/rsa" + _ "crypto/sha256" + "io" + "os" + "strconv" + "time" +) + +// DetachSign signs message with the private key from signer (which must +// already have been decrypted) and writes the signature to w. +func DetachSign(w io.Writer, signer *Entity, message io.Reader) os.Error { + return detachSign(w, signer, message, packet.SigTypeBinary) +} + +// ArmoredDetachSign signs message with the private key from signer (which +// must already have been decrypted) and writes an armored signature to w. +func ArmoredDetachSign(w io.Writer, signer *Entity, message io.Reader) (err os.Error) { + return armoredDetachSign(w, signer, message, packet.SigTypeBinary) +} + +// DetachSignText signs message (after canonicalising the line endings) with +// the private key from signer (which must already have been decrypted) and +// writes the signature to w. +func DetachSignText(w io.Writer, signer *Entity, message io.Reader) os.Error { + return detachSign(w, signer, message, packet.SigTypeText) +} + +// ArmoredDetachSignText signs message (after canonicalising the line endings) +// with the private key from signer (which must already have been decrypted) +// and writes an armored signature to w. +func SignTextDetachedArmored(w io.Writer, signer *Entity, message io.Reader) os.Error { + return armoredDetachSign(w, signer, message, packet.SigTypeText) +} + +func armoredDetachSign(w io.Writer, signer *Entity, message io.Reader, sigType packet.SignatureType) (err os.Error) { + out, err := armor.Encode(w, SignatureType, nil) + if err != nil { + return + } + err = detachSign(out, signer, message, sigType) + if err != nil { + return + } + return out.Close() +} + +func detachSign(w io.Writer, signer *Entity, message io.Reader, sigType packet.SignatureType) (err os.Error) { + if signer.PrivateKey == nil { + return error.InvalidArgumentError("signing key doesn't have a private key") + } + if signer.PrivateKey.Encrypted { + return error.InvalidArgumentError("signing key is encrypted") + } + + sig := new(packet.Signature) + sig.SigType = sigType + sig.PubKeyAlgo = signer.PrivateKey.PubKeyAlgo + sig.Hash = crypto.SHA256 + sig.CreationTime = uint32(time.Seconds()) + sig.IssuerKeyId = &signer.PrivateKey.KeyId + + h, wrappedHash, err := hashForSignature(sig.Hash, sig.SigType) + if err != nil { + return + } + io.Copy(wrappedHash, message) + + switch signer.PrivateKey.PubKeyAlgo { + case packet.PubKeyAlgoRSA, packet.PubKeyAlgoRSASignOnly: + priv := signer.PrivateKey.PrivateKey.(*rsa.PrivateKey) + err = sig.SignRSA(h, priv) + default: + err = error.UnsupportedError("public key algorithm: " + strconv.Itoa(int(sig.PubKeyAlgo))) + } + + if err != nil { + return + } + + return sig.Serialize(w) +} diff --git a/src/pkg/crypto/openpgp/write_test.go b/src/pkg/crypto/openpgp/write_test.go new file mode 100644 index 000000000..33e8809f2 --- /dev/null +++ b/src/pkg/crypto/openpgp/write_test.go @@ -0,0 +1,34 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package openpgp + +import ( + "bytes" + "testing" +) + +func TestSignDetached(t *testing.T) { + kring, _ := ReadKeyRing(readerFromHex(testKeys1And2PrivateHex)) + out := bytes.NewBuffer(nil) + message := bytes.NewBufferString(signedInput) + err := DetachSign(out, kring[0], message) + if err != nil { + t.Error(err) + } + + testDetachedSignature(t, kring, out, signedInput, "check") +} + +func TestSignTextDetached(t *testing.T) { + kring, _ := ReadKeyRing(readerFromHex(testKeys1And2PrivateHex)) + out := bytes.NewBuffer(nil) + message := bytes.NewBufferString(signedInput) + err := DetachSignText(out, kring[0], message) + if err != nil { + t.Error(err) + } + + testDetachedSignature(t, kring, out, signedInput, "check") +} diff --git a/src/pkg/crypto/rand/rand_unix.go b/src/pkg/crypto/rand/rand_unix.go index 900b57330..66b72c076 100644 --- a/src/pkg/crypto/rand/rand_unix.go +++ b/src/pkg/crypto/rand/rand_unix.go @@ -8,6 +8,7 @@ package rand import ( + "bufio" "crypto/aes" "io" "os" @@ -23,7 +24,7 @@ func init() { Reader = &devReader{name: "/dev/urandom"} } // A devReader satisfies reads by reading the file named name. type devReader struct { name string - f *os.File + f io.Reader mu sync.Mutex } @@ -35,7 +36,7 @@ func (r *devReader) Read(b []byte) (n int, err os.Error) { if f == nil { return 0, err } - r.f = f + r.f = bufio.NewReader(f) } return r.f.Read(b) } diff --git a/src/pkg/crypto/rsa/rsa.go b/src/pkg/crypto/rsa/rsa.go index c7a8d2053..faf914991 100644 --- a/src/pkg/crypto/rsa/rsa.go +++ b/src/pkg/crypto/rsa/rsa.go @@ -274,6 +274,14 @@ func EncryptOAEP(hash hash.Hash, rand io.Reader, pub *PublicKey, msg []byte, lab m.SetBytes(em) c := encrypt(new(big.Int), pub, m) out = c.Bytes() + + if len(out) < k { + // If the output is too small, we need to left-pad with zeros. + t := make([]byte, k) + copy(t[k-len(out):], out) + out = t + } + return } diff --git a/src/pkg/crypto/rsa/rsa_test.go b/src/pkg/crypto/rsa/rsa_test.go index df1f17f17..22d4576e8 100644 --- a/src/pkg/crypto/rsa/rsa_test.go +++ b/src/pkg/crypto/rsa/rsa_test.go @@ -66,7 +66,7 @@ func TestEncryptOAEP(t *testing.T) { t.Errorf("#%d,%d error: %s", i, j, err) } if bytes.Compare(out, message.out) != 0 { - t.Errorf("#%d,%d bad result: %s (want %s)", i, j, out, message.out) + t.Errorf("#%d,%d bad result: %x (want %x)", i, j, out, message.out) } } } diff --git a/src/pkg/crypto/tls/handshake_client.go b/src/pkg/crypto/tls/handshake_client.go index 19d2bfa3b..a325a9b95 100644 --- a/src/pkg/crypto/tls/handshake_client.go +++ b/src/pkg/crypto/tls/handshake_client.go @@ -57,7 +57,7 @@ func (c *Conn) clientHandshake() os.Error { vers, ok := mutualVersion(serverHello.vers) if !ok { - c.sendAlert(alertProtocolVersion) + return c.sendAlert(alertProtocolVersion) } c.vers = vers c.haveVers = true diff --git a/src/pkg/crypto/tls/handshake_client_test.go b/src/pkg/crypto/tls/handshake_client_test.go index e5c9684b9..fd1f145cf 100644 --- a/src/pkg/crypto/tls/handshake_client_test.go +++ b/src/pkg/crypto/tls/handshake_client_test.go @@ -61,7 +61,7 @@ func TestRunClient(t *testing.T) { // Script of interaction with gnutls implementation. // The values for this test are obtained by building and running in client mode: -// % gotest -match "TestRunClient" -connect +// % gotest -test.run "TestRunClient" -connect // and then: // % gnutls-serv -p 10443 --debug 100 --x509keyfile key.pem --x509certfile cert.pem -a > /tmp/log 2>&1 // % python parse-gnutls-cli-debug-log.py < /tmp/log diff --git a/src/pkg/crypto/tls/handshake_server_test.go b/src/pkg/crypto/tls/handshake_server_test.go index 5cf3ae049..6beb6a9f6 100644 --- a/src/pkg/crypto/tls/handshake_server_test.go +++ b/src/pkg/crypto/tls/handshake_server_test.go @@ -194,7 +194,7 @@ var testPrivateKey = &rsa.PrivateKey{ // Script of interaction with gnutls implementation. // The values for this test are obtained by building and running in server mode: -// % gotest -match "TestRunServer" -serve +// % gotest -test.run "TestRunServer" -serve // and then: // % gnutls-cli --insecure --debug 100 -p 10443 localhost > /tmp/log 2>&1 // % python parse-gnutls-cli-debug-log.py < /tmp/log diff --git a/src/pkg/exp/eval/stmt_test.go b/src/pkg/exp/eval/stmt_test.go index a14a288d9..4a883ef5e 100644 --- a/src/pkg/exp/eval/stmt_test.go +++ b/src/pkg/exp/eval/stmt_test.go @@ -217,7 +217,7 @@ var stmtTests = []test{ Val2("if false { i = 2 } else { i = 3 }; i2 = 4", "i", 3, "i2", 4), Val2("if i == i2 { i = 2 } else { i = 3 }; i2 = 4", "i", 3, "i2", 4), // Omit optional parts - Val2("if { i = 2 } else { i = 3 }; i2 = 4", "i", 2, "i2", 4), + Val2("if true { i = 2 } else { i = 3 }; i2 = 4", "i", 2, "i2", 4), Val2("if true { i = 2 }; i2 = 4", "i", 2, "i2", 4), Val2("if false { i = 2 }; i2 = 4", "i", 1, "i2", 4), // Init @@ -243,11 +243,11 @@ var stmtTests = []test{ CErr("fn1 := func() int { if true { return 1 } }", "return"), CErr("fn1 := func() int { if true { } }", "return"), Run("fn1 := func() int { if true { }; return 1 }"), - CErr("fn1 := func() int { if { } }", "return"), - CErr("fn1 := func() int { if { } else { return 2 } }", "return"), - Run("fn1 := func() int { if { return 1 } }"), - Run("fn1 := func() int { if { return 1 } else { } }"), - Run("fn1 := func() int { if { return 1 } else { } }"), + CErr("fn1 := func() int { if true { } }", "return"), + CErr("fn1 := func() int { if true { } else { return 2 } }", "return"), + Run("fn1 := func() int { if true { return 1 }; return 0 }"), + Run("fn1 := func() int { if true { return 1 } else { }; return 0 }"), + Run("fn1 := func() int { if true { return 1 } else { }; return 0 }"), // Switch Val1("switch { case false: i += 2; case true: i += 4; default: i += 8 }", "i", 1+4), diff --git a/src/pkg/exp/wingui/Makefile b/src/pkg/exp/wingui/Makefile index e9d44d2bc..983a8270b 100644 --- a/src/pkg/exp/wingui/Makefile +++ b/src/pkg/exp/wingui/Makefile @@ -6,6 +6,8 @@ GOOS=windows include ../../../Make.inc +LD:=$(LD) -Hwindowsgui + TARG=wingui GOFILES=\ diff --git a/src/pkg/fmt/doc.go b/src/pkg/fmt/doc.go index b40e265ae..77ee62bb1 100644 --- a/src/pkg/fmt/doc.go +++ b/src/pkg/fmt/doc.go @@ -120,7 +120,7 @@ An analogous set of functions scans formatted text to yield values. Scan, Scanf and Scanln read from os.Stdin; Fscan, Fscanf and Fscanln read from a specified os.Reader; Sscan, - Sscanf and Sscanln read from an argument string. Sscanln, + Sscanf and Sscanln read from an argument string. Scanln, Fscanln and Sscanln stop scanning at a newline and require that the items be followed by one; Sscanf, Fscanf and Sscanf require newlines in the input to match newlines in the format; the other @@ -164,13 +164,15 @@ All arguments to be scanned must be either pointers to basic types or implementations of the Scanner interface. - Note: Fscan etc. can read one character (rune) past the - input they return, which means that a loop calling a scan - routine may skip some of the input. This is usually a - problem only when there is no space between input values. - However, if the reader provided to Fscan implements UnreadRune, + Note: Fscan etc. can read one character (rune) past the input + they return, which means that a loop calling a scan routine + may skip some of the input. This is usually a problem only + when there is no space between input values. If the reader + provided to Fscan implements ReadRune, that method will be used + to read characters. If the reader also implements UnreadRune, that method will be used to save the character and successive - calls will not lose data. To attach an UnreadRune method - to a reader without that capability, use bufio.NewReader. + calls will not lose data. To attach ReadRune and UnreadRune + methods to a reader without that capability, use + bufio.NewReader. */ package fmt diff --git a/src/pkg/fmt/fmt_test.go b/src/pkg/fmt/fmt_test.go index 3f085b722..c8aa6090b 100644 --- a/src/pkg/fmt/fmt_test.go +++ b/src/pkg/fmt/fmt_test.go @@ -311,9 +311,9 @@ var fmttests = []struct { // go syntax {"%#v", A{1, 2, "a", []int{1, 2}}, `fmt_test.A{i:1, j:0x2, s:"a", x:[]int{1, 2}}`}, - {"%#v", &b, "(*uint8)(PTR)"}, - {"%#v", TestFmtInterface, "(func(*testing.T))(PTR)"}, - {"%#v", make(chan int), "(chan int)(PTR)"}, + {"%#v", &b, "(*uint8)(0xPTR)"}, + {"%#v", TestFmtInterface, "(func(*testing.T))(0xPTR)"}, + {"%#v", make(chan int), "(chan int)(0xPTR)"}, {"%#v", uint64(1<<64 - 1), "0xffffffffffffffff"}, {"%#v", 1000000000, "1000000000"}, {"%#v", map[string]int{"a": 1, "b": 2}, `map[string] int{"a":1, "b":2}`}, @@ -365,14 +365,15 @@ var fmttests = []struct { {"%6T", &intVal, " *int"}, // %p - {"p0=%p", new(int), "p0=PTR"}, + {"p0=%p", new(int), "p0=0xPTR"}, {"p1=%s", &pValue, "p1=String(p)"}, // String method... - {"p2=%p", &pValue, "p2=PTR"}, // ... not called with %p + {"p2=%p", &pValue, "p2=0xPTR"}, // ... not called with %p + {"p4=%#p", new(int), "p4=PTR"}, // %p on non-pointers - {"%p", make(chan int), "PTR"}, - {"%p", make(map[int]int), "PTR"}, - {"%p", make([]int, 1), "PTR"}, + {"%p", make(chan int), "0xPTR"}, + {"%p", make(map[int]int), "0xPTR"}, + {"%p", make([]int, 1), "0xPTR"}, {"%p", 27, "%!p(int=27)"}, // not a pointer at all // erroneous things @@ -388,8 +389,8 @@ var fmttests = []struct { func TestSprintf(t *testing.T) { for _, tt := range fmttests { s := Sprintf(tt.fmt, tt.val) - if i := strings.Index(s, "0x"); i >= 0 && strings.Contains(tt.out, "PTR") { - j := i + 2 + if i := strings.Index(tt.out, "PTR"); i >= 0 { + j := i for ; j < len(s); j++ { c := s[j] if (c < '0' || c > '9') && (c < 'a' || c > 'f') && (c < 'A' || c > 'F') { diff --git a/src/pkg/fmt/format.go b/src/pkg/fmt/format.go index 86057bf69..caaa7ac1a 100644 --- a/src/pkg/fmt/format.go +++ b/src/pkg/fmt/format.go @@ -107,7 +107,7 @@ func (f *fmt) writePadding(n int, padding []byte) { } // Append b to f.buf, padded on left (w > 0) or right (w < 0 or f.minus) -// clear flags aftewards. +// clear flags afterwards. func (f *fmt) pad(b []byte) { var padding []byte var left, right int @@ -124,7 +124,7 @@ func (f *fmt) pad(b []byte) { } // append s to buf, padded on left (w > 0) or right (w < 0 or f.minus). -// clear flags aftewards. +// clear flags afterwards. func (f *fmt) padString(s string) { var padding []byte var left, right int diff --git a/src/pkg/fmt/print.go b/src/pkg/fmt/print.go index d6dc8eb3d..4e14fdaa4 100644 --- a/src/pkg/fmt/print.go +++ b/src/pkg/fmt/print.go @@ -348,11 +348,11 @@ func (p *pp) fmtInt64(v int64, verb int, value interface{}) { } } -// fmt0x64 formats a uint64 in hexadecimal and prefixes it with 0x by -// temporarily turning on the sharp flag. -func (p *pp) fmt0x64(v uint64) { +// fmt0x64 formats a uint64 in hexadecimal and prefixes it with 0x or +// not, as requested, by temporarily setting the sharp flag. +func (p *pp) fmt0x64(v uint64, leading0x bool) { sharp := p.fmt.sharp - p.fmt.sharp = true // turn on 0x + p.fmt.sharp = leading0x p.fmt.integer(int64(v), 16, unsigned, ldigits) p.fmt.sharp = sharp } @@ -384,7 +384,7 @@ func (p *pp) fmtUint64(v uint64, verb int, goSyntax bool, value interface{}) { p.fmt.integer(int64(v), 10, unsigned, ldigits) case 'v': if goSyntax { - p.fmt0x64(v) + p.fmt0x64(v, true) } else { p.fmt.integer(int64(v), 10, unsigned, ldigits) } @@ -534,11 +534,11 @@ func (p *pp) fmtPointer(field interface{}, value reflect.Value, verb int, goSynt if u == 0 { p.buf.Write(nilBytes) } else { - p.fmt0x64(uint64(v.Get())) + p.fmt0x64(uint64(v.Get()), true) } p.add(')') } else { - p.fmt0x64(uint64(u)) + p.fmt0x64(uint64(u), !p.fmt.sharp) } } @@ -801,7 +801,7 @@ BigSwitch: if v == 0 { p.buf.Write(nilBytes) } else { - p.fmt0x64(uint64(v)) + p.fmt0x64(uint64(v), true) } p.buf.WriteByte(')') break @@ -810,7 +810,7 @@ BigSwitch: p.buf.Write(nilAngleBytes) break } - p.fmt0x64(uint64(v)) + p.fmt0x64(uint64(v), true) case uintptrGetter: p.fmtPointer(field, value, verb, goSyntax) default: diff --git a/src/pkg/fmt/scan.go b/src/pkg/fmt/scan.go index 53d88d574..c0f2bacb6 100644 --- a/src/pkg/fmt/scan.go +++ b/src/pkg/fmt/scan.go @@ -28,23 +28,30 @@ type runeUnreader interface { // Scanners may do rune-at-a-time scanning or ask the ScanState // to discover the next space-delimited token. type ScanState interface { - // GetRune reads the next rune (Unicode code point) from the input. - GetRune() (rune int, err os.Error) - // UngetRune causes the next call to GetRune to return the same rune. - UngetRune() - // Width returns the value of the width option and whether it has been set. - // The unit is Unicode code points. - Width() (wid int, ok bool) + // ReadRune reads the next rune (Unicode code point) from the input. + // If invoked during Scanln, Fscanln, or Sscanln, ReadRune() will + // return EOF after returning the first '\n' or when reading beyond + // the specified width. + ReadRune() (rune int, size int, err os.Error) + // UnreadRune causes the next call to ReadRune to return the same rune. + UnreadRune() os.Error // Token returns the next space-delimited token from the input. If // a width has been specified, the returned token will be no longer // than the width. Token() (token string, err os.Error) + // Width returns the value of the width option and whether it has been set. + // The unit is Unicode code points. + Width() (wid int, ok bool) + // Because ReadRune is implemented by the interface, Read should never be + // called by the scanning routines and a valid implementation of + // ScanState may choose always to return an error from Read. + Read(buf []byte) (n int, err os.Error) } // Scanner is implemented by any value that has a Scan method, which scans // the input for the representation of a value and stores the result in the // receiver, which must be a pointer to be useful. The Scan method is called -// for any argument to Scan or Scanln that implements it. +// for any argument to Scan, Scanf, or Scanln that implements it. type Scanner interface { Scan(state ScanState, verb int) os.Error } @@ -96,18 +103,18 @@ func Sscanf(str string, format string, a ...interface{}) (n int, err os.Error) { // returns the number of items successfully scanned. If that is less // than the number of arguments, err will report why. func Fscan(r io.Reader, a ...interface{}) (n int, err os.Error) { - s := newScanState(r, true) + s, old := newScanState(r, true, false) n, err = s.doScan(a) - s.free() + s.free(old) return } // Fscanln is similar to Fscan, but stops scanning at a newline and // after the final item there must be a newline or EOF. func Fscanln(r io.Reader, a ...interface{}) (n int, err os.Error) { - s := newScanState(r, false) + s, old := newScanState(r, false, true) n, err = s.doScan(a) - s.free() + s.free(old) return } @@ -115,9 +122,9 @@ func Fscanln(r io.Reader, a ...interface{}) (n int, err os.Error) { // values into successive arguments as determined by the format. It // returns the number of items successfully parsed. func Fscanf(r io.Reader, format string, a ...interface{}) (n int, err os.Error) { - s := newScanState(r, false) + s, old := newScanState(r, false, false) n, err = s.doScanf(format, a) - s.free() + s.free(old) return } @@ -131,53 +138,70 @@ const EOF = -1 // ss is the internal implementation of ScanState. type ss struct { - rr io.RuneReader // where to read input - buf bytes.Buffer // token accumulator - nlIsSpace bool // whether newline counts as white space - peekRune int // one-rune lookahead - prevRune int // last rune returned by GetRune - atEOF bool // already read EOF - maxWid int // max width of field, in runes - widPresent bool // width was specified - wid int // width consumed so far; used in accept() + rr io.RuneReader // where to read input + buf bytes.Buffer // token accumulator + peekRune int // one-rune lookahead + prevRune int // last rune returned by ReadRune + count int // runes consumed so far. + atEOF bool // already read EOF + ssave } -func (s *ss) GetRune() (rune int, err os.Error) { +// ssave holds the parts of ss that need to be +// saved and restored on recursive scans. +type ssave struct { + validSave bool // is or was a part of an actual ss. + nlIsEnd bool // whether newline terminates scan + nlIsSpace bool // whether newline counts as white space + fieldLimit int // max value of ss.count for this field; fieldLimit <= limit + limit int // max value of ss.count. + maxWid int // width of this field. +} + +// The Read method is only in ScanState so that ScanState +// satisfies io.Reader. It will never be called when used as +// intended, so there is no need to make it actually work. +func (s *ss) Read(buf []byte) (n int, err os.Error) { + return 0, os.ErrorString("ScanState's Read should not be called. Use ReadRune") +} + +func (s *ss) ReadRune() (rune int, size int, err os.Error) { if s.peekRune >= 0 { + s.count++ rune = s.peekRune + size = utf8.RuneLen(rune) s.prevRune = rune s.peekRune = -1 return } - rune, _, err = s.rr.ReadRune() + if s.atEOF || s.nlIsEnd && s.prevRune == '\n' || s.count >= s.fieldLimit { + err = os.EOF + return + } + + rune, size, err = s.rr.ReadRune() if err == nil { + s.count++ s.prevRune = rune + } else if err == os.EOF { + s.atEOF = true } return } func (s *ss) Width() (wid int, ok bool) { - return s.maxWid, s.widPresent + if s.maxWid == hugeWid { + return 0, false + } + return s.maxWid, true } // The public method returns an error; this private one panics. // If getRune reaches EOF, the return value is EOF (-1). func (s *ss) getRune() (rune int) { - if s.atEOF { - return EOF - } - if s.peekRune >= 0 { - rune = s.peekRune - s.prevRune = rune - s.peekRune = -1 - return - } - rune, _, err := s.rr.ReadRune() - if err == nil { - s.prevRune = rune - } else if err != nil { + rune, _, err := s.ReadRune() + if err != nil { if err == os.EOF { - s.atEOF = true return EOF } s.error(err) @@ -185,35 +209,25 @@ func (s *ss) getRune() (rune int) { return } -// mustGetRune turns os.EOF into a panic(io.ErrUnexpectedEOF). +// mustReadRune turns os.EOF into a panic(io.ErrUnexpectedEOF). // It is called in cases such as string scanning where an EOF is a // syntax error. -func (s *ss) mustGetRune() (rune int) { - if s.atEOF { +func (s *ss) mustReadRune() (rune int) { + rune = s.getRune() + if rune == EOF { s.error(io.ErrUnexpectedEOF) } - if s.peekRune >= 0 { - rune = s.peekRune - s.peekRune = -1 - return - } - rune, _, err := s.rr.ReadRune() - if err != nil { - if err == os.EOF { - err = io.ErrUnexpectedEOF - } - s.error(err) - } return } - -func (s *ss) UngetRune() { +func (s *ss) UnreadRune() os.Error { if u, ok := s.rr.(runeUnreader); ok { u.UnreadRune() } else { s.peekRune = s.prevRune } + s.count-- + return nil } func (s *ss) error(err os.Error) { @@ -300,23 +314,43 @@ func (r *readRune) ReadRune() (rune int, size int, err os.Error) { var ssFree = newCache(func() interface{} { return new(ss) }) // Allocate a new ss struct or grab a cached one. -func newScanState(r io.Reader, nlIsSpace bool) *ss { - s := ssFree.get().(*ss) +func newScanState(r io.Reader, nlIsSpace, nlIsEnd bool) (s *ss, old ssave) { + // If the reader is a *ss, then we've got a recursive + // call to Scan, so re-use the scan state. + s, ok := r.(*ss) + if ok { + old = s.ssave + s.limit = s.fieldLimit + s.nlIsEnd = nlIsEnd || s.nlIsEnd + s.nlIsSpace = nlIsSpace + return + } + + s = ssFree.get().(*ss) if rr, ok := r.(io.RuneReader); ok { s.rr = rr } else { s.rr = &readRune{reader: r} } s.nlIsSpace = nlIsSpace + s.nlIsEnd = nlIsEnd + s.prevRune = -1 s.peekRune = -1 s.atEOF = false - s.maxWid = 0 - s.widPresent = false - return s + s.limit = hugeWid + s.fieldLimit = hugeWid + s.maxWid = hugeWid + s.validSave = true + return } // Save used ss structs in ssFree; avoid an allocation per invocation. -func (s *ss) free() { +func (s *ss) free(old ssave) { + // If it was used recursively, just restore the old state. + if old.validSave { + s.ssave = old + return + } // Don't hold on to ss structs with large buffers. if cap(s.buf.Bytes()) > 1024 { return @@ -344,7 +378,7 @@ func (s *ss) skipSpace(stopAtNewline bool) { return } if !unicode.IsSpace(rune) { - s.UngetRune() + s.UnreadRune() break } } @@ -356,13 +390,13 @@ func (s *ss) skipSpace(stopAtNewline bool) { func (s *ss) token() string { s.skipSpace(false) // read until white space or newline - for nrunes := 0; !s.widPresent || nrunes < s.maxWid; nrunes++ { + for { rune := s.getRune() if rune == EOF { break } if unicode.IsSpace(rune) { - s.UngetRune() + s.UnreadRune() break } s.buf.WriteRune(rune) @@ -381,9 +415,6 @@ var boolError = os.ErrorString("syntax error scanning boolean") // consume reads the next rune in the input and reports whether it is in the ok string. // If accept is true, it puts the character into the input token. func (s *ss) consume(ok string, accept bool) bool { - if s.wid >= s.maxWid { - return false - } rune := s.getRune() if rune == EOF { return false @@ -391,12 +422,11 @@ func (s *ss) consume(ok string, accept bool) bool { if strings.IndexRune(ok, rune) >= 0 { if accept { s.buf.WriteRune(rune) - s.wid++ } return true } if rune != EOF && accept { - s.UngetRune() + s.UnreadRune() } return false } @@ -405,7 +435,7 @@ func (s *ss) consume(ok string, accept bool) bool { func (s *ss) peek(ok string) bool { rune := s.getRune() if rune != EOF { - s.UngetRune() + s.UnreadRune() } return strings.IndexRune(ok, rune) >= 0 } @@ -433,7 +463,7 @@ func (s *ss) scanBool(verb int) bool { return false } // Syntax-checking a boolean is annoying. We're not fastidious about case. - switch s.mustGetRune() { + switch s.mustReadRune() { case '0': return false case '1': @@ -494,7 +524,7 @@ func (s *ss) scanNumber(digits string, haveDigits bool) string { // scanRune returns the next rune value in the input. func (s *ss) scanRune(bitSize int) int64 { - rune := int64(s.mustGetRune()) + rune := int64(s.mustReadRune()) n := uint(bitSize) x := (rune << (64 - n)) >> (64 - n) if x != rune { @@ -710,12 +740,12 @@ func (s *ss) convertString(verb int) (str string) { // quotedString returns the double- or back-quoted string represented by the next input characters. func (s *ss) quotedString() string { - quote := s.mustGetRune() + quote := s.mustReadRune() switch quote { case '`': // Back-quoted: Anything goes until EOF or back quote. for { - rune := s.mustGetRune() + rune := s.mustReadRune() if rune == quote { break } @@ -726,13 +756,13 @@ func (s *ss) quotedString() string { // Double-quoted: Include the quotes and let strconv.Unquote do the backslash escapes. s.buf.WriteRune(quote) for { - rune := s.mustGetRune() + rune := s.mustReadRune() s.buf.WriteRune(rune) if rune == '\\' { // In a legal backslash escape, no matter how long, only the character // immediately after the escape can itself be a backslash or quote. // Thus we only need to protect the first character after the backslash. - rune := s.mustGetRune() + rune := s.mustReadRune() s.buf.WriteRune(rune) } else if rune == '"' { break @@ -771,10 +801,10 @@ func (s *ss) hexByte() (b byte, ok bool) { return } if unicode.IsSpace(rune1) { - s.UngetRune() + s.UnreadRune() return } - rune2 := s.mustGetRune() + rune2 := s.mustReadRune() return byte(s.hexDigit(rune1)<<4 | s.hexDigit(rune2)), true } @@ -796,6 +826,8 @@ func (s *ss) hexString() string { const floatVerbs = "beEfFgGv" +const hugeWid = 1 << 30 + // scanOne scans a single value, deriving the scanner from the type of the argument. func (s *ss) scanOne(verb int, field interface{}) { s.buf.Reset() @@ -804,14 +836,13 @@ func (s *ss) scanOne(verb int, field interface{}) { if v, ok := field.(Scanner); ok { err = v.Scan(s, verb) if err != nil { + if err == os.EOF { + err = io.ErrUnexpectedEOF + } s.error(err) } return } - if !s.widPresent { - s.maxWid = 1 << 30 // Huge - } - s.wid = 0 switch v := field.(type) { case *bool: *v = s.scanBool(verb) @@ -912,7 +943,6 @@ func errorHandler(errp *os.Error) { } // doScan does the real work for scanning without a format string. -// At the moment, it handles only pointers to basic types. func (s *ss) doScan(a []interface{}) (numProcessed int, err os.Error) { defer errorHandler(&err) for _, field := range a { @@ -973,9 +1003,9 @@ func (s *ss) advance(format string) (i int) { s.skipSpace(true) continue } - inputc := s.mustGetRune() + inputc := s.mustReadRune() if fmtc != inputc { - s.UngetRune() + s.UnreadRune() return -1 } i += w @@ -1007,7 +1037,15 @@ func (s *ss) doScanf(format string, a []interface{}) (numProcessed int, err os.E i++ // % is one byte // do we have 20 (width)? - s.maxWid, s.widPresent, i = parsenum(format, i, end) + var widPresent bool + s.maxWid, widPresent, i = parsenum(format, i, end) + if !widPresent { + s.maxWid = hugeWid + } + s.fieldLimit = s.limit + if f := s.count + s.maxWid; f < s.fieldLimit { + s.fieldLimit = f + } c, w := utf8.DecodeRuneInString(format[i:]) i += w @@ -1020,6 +1058,7 @@ func (s *ss) doScanf(format string, a []interface{}) (numProcessed int, err os.E s.scanOne(c, field) numProcessed++ + s.fieldLimit = s.limit } if numProcessed < len(a) { s.errorString("too many operands") diff --git a/src/pkg/fmt/scan_test.go b/src/pkg/fmt/scan_test.go index 478b10923..cab86dd98 100644 --- a/src/pkg/fmt/scan_test.go +++ b/src/pkg/fmt/scan_test.go @@ -6,6 +6,7 @@ package fmt_test import ( "bufio" + "bytes" . "fmt" "io" "math" @@ -87,21 +88,7 @@ type FloatTest struct { type Xs string func (x *Xs) Scan(state ScanState, verb int) os.Error { - var tok string - var c int - var err os.Error - wid, present := state.Width() - if !present { - tok, err = state.Token() - } else { - for i := 0; i < wid; i++ { - c, err = state.GetRune() - if err != nil { - break - } - tok += string(c) - } - } + tok, err := state.Token() if err != nil { return err } @@ -114,6 +101,26 @@ func (x *Xs) Scan(state ScanState, verb int) os.Error { var xVal Xs +// IntString accepts an integer followed immediately by a string. +// It tests the embedding of a scan within a scan. +type IntString struct { + i int + s string +} + +func (s *IntString) Scan(state ScanState, verb int) os.Error { + if _, err := Fscan(state, &s.i); err != nil { + return err + } + + if _, err := Fscan(state, &s.s); err != nil { + return err + } + return nil +} + +var intStringVal IntString + // myStringReader implements Read but not ReadRune, allowing us to test our readRune wrapper // type that creates something that can read runes given only Read(). type myStringReader struct { @@ -200,8 +207,9 @@ var scanTests = []ScanTest{ {"114\n", &renamedStringVal, renamedString("114")}, {"115\n", &renamedBytesVal, renamedBytes([]byte("115"))}, - // Custom scanner. + // Custom scanners. {" vvv ", &xVal, Xs("vvv")}, + {" 1234hello", &intStringVal, IntString{1234, "hello"}}, // Fixed bugs {"2147483648\n", &int64Val, int64(2147483648)}, // was: integer overflow @@ -308,6 +316,7 @@ var f float64 var s, t string var c complex128 var x, y Xs +var z IntString var multiTests = []ScanfMultiTest{ {"", "", nil, nil, ""}, @@ -321,8 +330,9 @@ var multiTests = []ScanfMultiTest{ {"%d%s", "123abc", args(&i, &s), args(123, "abc"), ""}, {"%c%c%c", "2\u50c2X", args(&i, &j, &k), args('2', '\u50c2', 'X'), ""}, - // Custom scanner. + // Custom scanners. {"%2e%f", "eefffff", args(&x, &y), args(Xs("ee"), Xs("fffff")), ""}, + {"%4v%s", "12abcd", args(&z, &s), args(IntString{12, "ab"}, "cd"), ""}, // Errors {"%t", "23 18", args(&i), nil, "bad verb"}, @@ -345,7 +355,11 @@ func testScan(name string, t *testing.T, scan func(r io.Reader, a ...interface{} } n, err := scan(r, test.in) if err != nil { - t.Errorf("%s got error scanning %q: %s", name, test.text, err) + m := "" + if n > 0 { + m = Sprintf(" (%d fields ok)", n) + } + t.Errorf("%s got error scanning %q: %s%s", name, test.text, err, m) continue } if n != 1 { @@ -462,22 +476,12 @@ func verifyInf(str string, t *testing.T) { } } - func TestInf(t *testing.T) { for _, s := range []string{"inf", "+inf", "-inf", "INF", "-INF", "+INF", "Inf", "-Inf", "+Inf"} { verifyInf(s, t) } } -// TODO: there's no conversion from []T to ...T, but we can fake it. These -// functions do the faking. We index the table by the length of the param list. -var fscanf = []func(io.Reader, string, []interface{}) (int, os.Error){ - 0: func(r io.Reader, f string, i []interface{}) (int, os.Error) { return Fscanf(r, f) }, - 1: func(r io.Reader, f string, i []interface{}) (int, os.Error) { return Fscanf(r, f, i[0]) }, - 2: func(r io.Reader, f string, i []interface{}) (int, os.Error) { return Fscanf(r, f, i[0], i[1]) }, - 3: func(r io.Reader, f string, i []interface{}) (int, os.Error) { return Fscanf(r, f, i[0], i[1], i[2]) }, -} - func testScanfMulti(name string, t *testing.T) { sliceType := reflect.Typeof(make([]interface{}, 1)).(*reflect.SliceType) for _, test := range multiTests { @@ -487,7 +491,7 @@ func testScanfMulti(name string, t *testing.T) { } else { r = newReader(test.text) } - n, err := fscanf[len(test.in)](r, test.format, test.in) + n, err := Fscanf(r, test.format, test.in...) if err != nil { if test.err == "" { t.Errorf("got error scanning (%q, %q): %q", test.format, test.text, err) @@ -673,3 +677,178 @@ func TestUnreadRuneWithBufio(t *testing.T) { t.Errorf("expected αb; got %q", a) } } + +type TwoLines string + +// Attempt to read two lines into the object. Scanln should prevent this +// because it stops at newline; Scan and Scanf should be fine. +func (t *TwoLines) Scan(state ScanState, verb int) os.Error { + chars := make([]int, 0, 100) + for nlCount := 0; nlCount < 2; { + c, _, err := state.ReadRune() + if err != nil { + return err + } + chars = append(chars, c) + if c == '\n' { + nlCount++ + } + } + *t = TwoLines(string(chars)) + return nil +} + +func TestMultiLine(t *testing.T) { + input := "abc\ndef\n" + // Sscan should work + var tscan TwoLines + n, err := Sscan(input, &tscan) + if n != 1 { + t.Errorf("Sscan: expected 1 item; got %d", n) + } + if err != nil { + t.Errorf("Sscan: expected no error; got %s", err) + } + if string(tscan) != input { + t.Errorf("Sscan: expected %q; got %q", input, tscan) + } + // Sscanf should work + var tscanf TwoLines + n, err = Sscanf(input, "%s", &tscanf) + if n != 1 { + t.Errorf("Sscanf: expected 1 item; got %d", n) + } + if err != nil { + t.Errorf("Sscanf: expected no error; got %s", err) + } + if string(tscanf) != input { + t.Errorf("Sscanf: expected %q; got %q", input, tscanf) + } + // Sscanln should not work + var tscanln TwoLines + n, err = Sscanln(input, &tscanln) + if n != 0 { + t.Errorf("Sscanln: expected 0 items; got %d: %q", n, tscanln) + } + if err == nil { + t.Error("Sscanln: expected error; got none") + } else if err != io.ErrUnexpectedEOF { + t.Errorf("Sscanln: expected io.ErrUnexpectedEOF (ha!); got %s", err) + } +} + +// RecursiveInt accepts an string matching %d.%d.%d.... +// and parses it into a linked list. +// It allows us to benchmark recursive descent style scanners. +type RecursiveInt struct { + i int + next *RecursiveInt +} + +func (r *RecursiveInt) Scan(state ScanState, verb int) (err os.Error) { + _, err = Fscan(state, &r.i) + if err != nil { + return + } + next := new(RecursiveInt) + _, err = Fscanf(state, ".%v", next) + if err != nil { + if err == os.ErrorString("input does not match format") || err == io.ErrUnexpectedEOF { + err = nil + } + return + } + r.next = next + return +} + +// Perform the same scanning task as RecursiveInt.Scan +// but without recurring through scanner, so we can compare +// performance more directly. +func scanInts(r *RecursiveInt, b *bytes.Buffer) (err os.Error) { + r.next = nil + _, err = Fscan(b, &r.i) + if err != nil { + return + } + var c int + c, _, err = b.ReadRune() + if err != nil { + if err == os.EOF { + err = nil + } + return + } + if c != '.' { + return + } + next := new(RecursiveInt) + err = scanInts(next, b) + if err == nil { + r.next = next + } + return +} + +func makeInts(n int) []byte { + var buf bytes.Buffer + Fprintf(&buf, "1") + for i := 1; i < n; i++ { + Fprintf(&buf, ".%d", i+1) + } + return buf.Bytes() +} + +func TestScanInts(t *testing.T) { + testScanInts(t, scanInts) + testScanInts(t, func(r *RecursiveInt, b *bytes.Buffer) (err os.Error) { + _, err = Fscan(b, r) + return + }) +} + +const intCount = 1000 + +func testScanInts(t *testing.T, scan func(*RecursiveInt, *bytes.Buffer) os.Error) { + r := new(RecursiveInt) + ints := makeInts(intCount) + buf := bytes.NewBuffer(ints) + err := scan(r, buf) + if err != nil { + t.Error("unexpected error", err) + } + i := 1 + for ; r != nil; r = r.next { + if r.i != i { + t.Fatal("bad scan: expected %d got %d", i, r.i) + } + i++ + } + if i-1 != intCount { + t.Fatal("bad scan count: expected %d got %d", intCount, i-1) + } +} + +func BenchmarkScanInts(b *testing.B) { + b.ResetTimer() + ints := makeInts(intCount) + var r RecursiveInt + for i := b.N - 1; i >= 0; i-- { + buf := bytes.NewBuffer(ints) + b.StartTimer() + scanInts(&r, buf) + b.StopTimer() + } +} + +func BenchmarkScanRecursiveInt(b *testing.B) { + b.ResetTimer() + ints := makeInts(intCount) + var r RecursiveInt + for i := b.N - 1; i >= 0; i-- { + buf := bytes.NewBuffer(ints) + b.StartTimer() + Fscan(buf, &r) + b.StopTimer() + } +} diff --git a/src/pkg/go/ast/ast.go b/src/pkg/go/ast/ast.go index 2e8f0973f..abafb5663 100644 --- a/src/pkg/go/ast/ast.go +++ b/src/pkg/go/ast/ast.go @@ -597,7 +597,7 @@ type ( IfStmt struct { If token.Pos // position of "if" keyword Init Stmt // initalization statement; or nil - Cond Expr // condition; or nil + Cond Expr // condition Body *BlockStmt Else Stmt // else branch; or nil } diff --git a/src/pkg/go/ast/walk.go b/src/pkg/go/ast/walk.go index a77f8ee5e..20c337c3b 100644 --- a/src/pkg/go/ast/walk.go +++ b/src/pkg/go/ast/walk.go @@ -227,9 +227,7 @@ func Walk(v Visitor, node Node) { if n.Init != nil { Walk(v, n.Init) } - if n.Cond != nil { - Walk(v, n.Cond) - } + Walk(v, n.Cond) Walk(v, n.Body) if n.Else != nil { Walk(v, n.Else) diff --git a/src/pkg/go/parser/interface.go b/src/pkg/go/parser/interface.go index 84d699a67..6f35b495e 100644 --- a/src/pkg/go/parser/interface.go +++ b/src/pkg/go/parser/interface.go @@ -14,7 +14,7 @@ import ( "io" "io/ioutil" "os" - pathutil "path" + "path/filepath" ) @@ -198,7 +198,7 @@ func ParseDir(fset *token.FileSet, path string, filter func(*os.FileInfo) bool, for i := 0; i < len(list); i++ { d := &list[i] if filter == nil || filter(d) { - filenames[n] = pathutil.Join(path, d.Name) + filenames[n] = filepath.Join(path, d.Name) n++ } } diff --git a/src/pkg/go/parser/parser.go b/src/pkg/go/parser/parser.go index 2395b8158..7c5843f36 100644 --- a/src/pkg/go/parser/parser.go +++ b/src/pkg/go/parser/parser.go @@ -1327,44 +1327,34 @@ func (p *parser) makeExpr(s ast.Stmt) ast.Expr { } -func (p *parser) parseControlClause(isForStmt bool) (s1, s2, s3 ast.Stmt) { - if p.tok != token.LBRACE { +func (p *parser) parseIfStmt() *ast.IfStmt { + if p.trace { + defer un(trace(p, "IfStmt")) + } + + pos := p.expect(token.IF) + + var s ast.Stmt + var x ast.Expr + { prevLev := p.exprLev p.exprLev = -1 - - if p.tok != token.SEMICOLON { - s1 = p.parseSimpleStmt(false) - } if p.tok == token.SEMICOLON { p.next() - if p.tok != token.LBRACE && p.tok != token.SEMICOLON { - s2 = p.parseSimpleStmt(false) - } - if isForStmt { - // for statements have a 3rd section - p.expectSemi() - if p.tok != token.LBRACE { - s3 = p.parseSimpleStmt(false) - } - } + x = p.parseExpr() } else { - s1, s2 = nil, s1 + s = p.parseSimpleStmt(false) + if p.tok == token.SEMICOLON { + p.next() + x = p.parseExpr() + } else { + x = p.makeExpr(s) + s = nil + } } - p.exprLev = prevLev } - return s1, s2, s3 -} - - -func (p *parser) parseIfStmt() *ast.IfStmt { - if p.trace { - defer un(trace(p, "IfStmt")) - } - - pos := p.expect(token.IF) - s1, s2, _ := p.parseControlClause(false) body := p.parseBlockStmt() var else_ ast.Stmt if p.tok == token.ELSE { @@ -1374,7 +1364,7 @@ func (p *parser) parseIfStmt() *ast.IfStmt { p.expectSemi() } - return &ast.IfStmt{pos, s1, p.makeExpr(s2), body, else_} + return &ast.IfStmt{pos, s, x, body, else_} } @@ -1457,7 +1447,24 @@ func (p *parser) parseSwitchStmt() ast.Stmt { } pos := p.expect(token.SWITCH) - s1, s2, _ := p.parseControlClause(false) + + var s1, s2 ast.Stmt + if p.tok != token.LBRACE { + prevLev := p.exprLev + p.exprLev = -1 + if p.tok != token.SEMICOLON { + s2 = p.parseSimpleStmt(false) + } + if p.tok == token.SEMICOLON { + p.next() + s1 = s2 + s2 = nil + if p.tok != token.LBRACE { + s2 = p.parseSimpleStmt(false) + } + } + p.exprLev = prevLev + } if isExprSwitch(s2) { lbrace := p.expect(token.LBRACE) @@ -1575,7 +1582,29 @@ func (p *parser) parseForStmt() ast.Stmt { } pos := p.expect(token.FOR) - s1, s2, s3 := p.parseControlClause(true) + + var s1, s2, s3 ast.Stmt + if p.tok != token.LBRACE { + prevLev := p.exprLev + p.exprLev = -1 + if p.tok != token.SEMICOLON { + s2 = p.parseSimpleStmt(false) + } + if p.tok == token.SEMICOLON { + p.next() + s1 = s2 + s2 = nil + if p.tok != token.SEMICOLON { + s2 = p.parseSimpleStmt(false) + } + p.expectSemi() + if p.tok != token.LBRACE { + s3 = p.parseSimpleStmt(false) + } + } + p.exprLev = prevLev + } + body := p.parseBlockStmt() p.expectSemi() diff --git a/src/pkg/go/parser/parser_test.go b/src/pkg/go/parser/parser_test.go index 5a7f05ca8..38535627a 100644 --- a/src/pkg/go/parser/parser_test.go +++ b/src/pkg/go/parser/parser_test.go @@ -18,6 +18,9 @@ var illegalInputs = []interface{}{ 3.14, []byte(nil), "foo!", + `package p; func f() { if /* should have condition */ {} };`, + `package p; func f() { if ; /* should have condition */ {} };`, + `package p; func f() { if f(); /* should have condition */ {} };`, } @@ -32,21 +35,23 @@ func TestParseIllegalInputs(t *testing.T) { var validPrograms = []interface{}{ - "package main\n", - `package main;`, - `package main; import "fmt"; func main() { fmt.Println("Hello, World!") };`, - `package main; func main() { if f(T{}) {} };`, - `package main; func main() { _ = (<-chan int)(x) };`, - `package main; func main() { _ = (<-chan <-chan int)(x) };`, - `package main; func f(func() func() func());`, - `package main; func f(...T);`, - `package main; func f(float, ...int);`, - `package main; func f(x int, a ...int) { f(0, a...); f(1, a...,) };`, - `package main; type T []int; var a []bool; func f() { if a[T{42}[0]] {} };`, - `package main; type T []int; func g(int) bool { return true }; func f() { if g(T{42}[0]) {} };`, - `package main; type T []int; func f() { for _ = range []int{T{42}[0]} {} };`, - `package main; var a = T{{1, 2}, {3, 4}}`, - `package main; func f() { select { case <- c: case c <- d: case c <- <- d: case <-c <- d: } };`, + "package p\n", + `package p;`, + `package p; import "fmt"; func f() { fmt.Println("Hello, World!") };`, + `package p; func f() { if f(T{}) {} };`, + `package p; func f() { _ = (<-chan int)(x) };`, + `package p; func f() { _ = (<-chan <-chan int)(x) };`, + `package p; func f(func() func() func());`, + `package p; func f(...T);`, + `package p; func f(float, ...int);`, + `package p; func f(x int, a ...int) { f(0, a...); f(1, a...,) };`, + `package p; type T []int; var a []bool; func f() { if a[T{42}[0]] {} };`, + `package p; type T []int; func g(int) bool { return true }; func f() { if g(T{42}[0]) {} };`, + `package p; type T []int; func f() { for _ = range []int{T{42}[0]} {} };`, + `package p; var a = T{{1, 2}, {3, 4}}`, + `package p; func f() { select { case <- c: case c <- d: case c <- <- d: case <-c <- d: } };`, + `package p; func f() { if ; true {} };`, + `package p; func f() { switch ; {} };`, } diff --git a/src/pkg/go/printer/printer.go b/src/pkg/go/printer/printer.go index 48e2af1b7..90d9784ac 100644 --- a/src/pkg/go/printer/printer.go +++ b/src/pkg/go/printer/printer.go @@ -12,7 +12,7 @@ import ( "go/token" "io" "os" - "path" + "path/filepath" "runtime" "tabwriter" ) @@ -244,7 +244,7 @@ func (p *printer) writeItem(pos token.Position, data []byte) { } if debug { // do not update p.pos - use write0 - _, filename := path.Split(pos.Filename) + _, filename := filepath.Split(pos.Filename) p.write0([]byte(fmt.Sprintf("[%s:%d:%d]", filename, pos.Line, pos.Column))) } p.write(data) diff --git a/src/pkg/go/printer/printer_test.go b/src/pkg/go/printer/printer_test.go index 565075aa2..62b726913 100644 --- a/src/pkg/go/printer/printer_test.go +++ b/src/pkg/go/printer/printer_test.go @@ -11,7 +11,7 @@ import ( "go/ast" "go/parser" "go/token" - "path" + "path/filepath" "testing" ) @@ -129,8 +129,8 @@ var data = []entry{ func TestFiles(t *testing.T) { for _, e := range data { - source := path.Join(dataDir, e.source) - golden := path.Join(dataDir, e.golden) + source := filepath.Join(dataDir, e.source) + golden := filepath.Join(dataDir, e.golden) check(t, source, golden, e.mode) // TODO(gri) check that golden is idempotent //check(t, golden, golden, e.mode); diff --git a/src/pkg/go/printer/testdata/statements.golden b/src/pkg/go/printer/testdata/statements.golden index 5eceb7dd5..290060269 100644 --- a/src/pkg/go/printer/testdata/statements.golden +++ b/src/pkg/go/printer/testdata/statements.golden @@ -10,9 +10,9 @@ func use(x interface{}) {} // Formatting of if-statement headers. func _() { - if { + if true { } - if { + if true { } // no semicolon printed if expr { } @@ -22,7 +22,7 @@ func _() { } // no parens printed if expr { } // no semicolon and parens printed - if x := expr; { + if x := expr; true { use(x) } if x := expr; expr { @@ -354,14 +354,14 @@ func _() { func _() { - if { + if true { _ = 0 } _ = 0 // the indentation here should not be affected by the long label name AnOverlongLabel: _ = 0 - if { + if true { _ = 0 } _ = 0 diff --git a/src/pkg/go/printer/testdata/statements.input b/src/pkg/go/printer/testdata/statements.input index 7819820ed..21e61efc4 100644 --- a/src/pkg/go/printer/testdata/statements.input +++ b/src/pkg/go/printer/testdata/statements.input @@ -10,13 +10,13 @@ func use(x interface{}) {} // Formatting of if-statement headers. func _() { - if {} - if;{} // no semicolon printed + if true {} + if; true {} // no semicolon printed if expr{} if;expr{} // no semicolon printed if (expr){} // no parens printed if;((expr)){} // no semicolon and parens printed - if x:=expr;{ + if x:=expr;true{ use(x)} if x:=expr; expr {use(x)} } @@ -271,14 +271,14 @@ func _() { func _() { - if { + if true { _ = 0 } _ = 0 // the indentation here should not be affected by the long label name AnOverlongLabel: _ = 0 - if { + if true { _ = 0 } _ = 0 diff --git a/src/pkg/go/scanner/scanner.go b/src/pkg/go/scanner/scanner.go index 8c3205230..153707f59 100644 --- a/src/pkg/go/scanner/scanner.go +++ b/src/pkg/go/scanner/scanner.go @@ -8,7 +8,8 @@ // // var s Scanner // fset := token.NewFileSet() // position information is relative to fset -// s.Init(fset, filename, src, nil /* no error handler */, 0) +// file := fset.AddFile(filename, fset.Base(), len(src)) // register file +// s.Init(file, src, nil /* no error handler */, 0) // for { // pos, tok, lit := s.Scan() // if tok == token.EOF { @@ -22,7 +23,7 @@ package scanner import ( "bytes" "go/token" - "path" + "path/filepath" "strconv" "unicode" "utf8" @@ -117,7 +118,7 @@ func (S *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode uint panic("file size does not match src len") } S.file = file - S.dir, _ = path.Split(file.Name()) + S.dir, _ = filepath.Split(file.Name()) S.src = src S.err = err S.mode = mode @@ -179,10 +180,10 @@ func (S *Scanner) interpretLineComment(text []byte) { if i := bytes.Index(text, []byte{':'}); i > 0 { if line, err := strconv.Atoi(string(text[i+1:])); err == nil && line > 0 { // valid //line filename:line comment; - filename := path.Clean(string(text[len(prefix):i])) + filename := filepath.Clean(string(text[len(prefix):i])) if filename[0] != '/' { // make filename relative to current directory - filename = path.Join(S.dir, filename) + filename = filepath.Join(S.dir, filename) } // update scanner position S.file.AddLineInfo(S.lineOffset, filename, line-1) // -1 since comment applies to next line diff --git a/src/pkg/gob/codec_test.go b/src/pkg/gob/codec_test.go index fe1f60ba7..4562e1930 100644 --- a/src/pkg/gob/codec_test.go +++ b/src/pkg/gob/codec_test.go @@ -303,7 +303,7 @@ func TestScalarEncInstructions(t *testing.T) { } } -func execDec(typ string, instr *decInstr, state *decodeState, t *testing.T, p unsafe.Pointer) { +func execDec(typ string, instr *decInstr, state *decoderState, t *testing.T, p unsafe.Pointer) { defer testError(t) v := int(state.decodeUint()) if v+state.fieldnum != 6 { @@ -313,7 +313,7 @@ func execDec(typ string, instr *decInstr, state *decodeState, t *testing.T, p un state.fieldnum = 6 } -func newDecodeStateFromData(data []byte) *decodeState { +func newDecodeStateFromData(data []byte) *decoderState { b := bytes.NewBuffer(data) state := newDecodeState(nil, b) state.fieldnum = -1 @@ -342,7 +342,7 @@ func TestScalarDecInstructions(t *testing.T) { var data struct { a int } - instr := &decInstr{decOpMap[reflect.Int], 6, 0, 0, ovfl} + instr := &decInstr{decOpTable[reflect.Int], 6, 0, 0, ovfl} state := newDecodeStateFromData(signedResult) execDec("int", instr, state, t, unsafe.Pointer(&data)) if data.a != 17 { @@ -355,7 +355,7 @@ func TestScalarDecInstructions(t *testing.T) { var data struct { a uint } - instr := &decInstr{decOpMap[reflect.Uint], 6, 0, 0, ovfl} + instr := &decInstr{decOpTable[reflect.Uint], 6, 0, 0, ovfl} state := newDecodeStateFromData(unsignedResult) execDec("uint", instr, state, t, unsafe.Pointer(&data)) if data.a != 17 { @@ -446,7 +446,7 @@ func TestScalarDecInstructions(t *testing.T) { var data struct { a uintptr } - instr := &decInstr{decOpMap[reflect.Uintptr], 6, 0, 0, ovfl} + instr := &decInstr{decOpTable[reflect.Uintptr], 6, 0, 0, ovfl} state := newDecodeStateFromData(unsignedResult) execDec("uintptr", instr, state, t, unsafe.Pointer(&data)) if data.a != 17 { @@ -511,7 +511,7 @@ func TestScalarDecInstructions(t *testing.T) { var data struct { a complex64 } - instr := &decInstr{decOpMap[reflect.Complex64], 6, 0, 0, ovfl} + instr := &decInstr{decOpTable[reflect.Complex64], 6, 0, 0, ovfl} state := newDecodeStateFromData(complexResult) execDec("complex", instr, state, t, unsafe.Pointer(&data)) if data.a != 17+19i { @@ -524,7 +524,7 @@ func TestScalarDecInstructions(t *testing.T) { var data struct { a complex128 } - instr := &decInstr{decOpMap[reflect.Complex128], 6, 0, 0, ovfl} + instr := &decInstr{decOpTable[reflect.Complex128], 6, 0, 0, ovfl} state := newDecodeStateFromData(complexResult) execDec("complex", instr, state, t, unsafe.Pointer(&data)) if data.a != 17+19i { @@ -973,18 +973,32 @@ func TestIgnoredFields(t *testing.T) { } } + +func TestBadRecursiveType(t *testing.T) { + type Rec ***Rec + var rec Rec + b := new(bytes.Buffer) + err := NewEncoder(b).Encode(&rec) + if err == nil { + t.Error("expected error; got none") + } else if strings.Index(err.String(), "recursive") < 0 { + t.Error("expected recursive type error; got", err) + } + // Can't test decode easily because we can't encode one, so we can't pass one to a Decoder. +} + type Bad0 struct { - ch chan int - c float64 + CH chan int + C float64 } -var nilEncoder *Encoder func TestInvalidField(t *testing.T) { var bad0 Bad0 - bad0.ch = make(chan int) + bad0.CH = make(chan int) b := new(bytes.Buffer) - err := nilEncoder.encode(b, reflect.NewValue(&bad0)) + var nilEncoder *Encoder + err := nilEncoder.encode(b, reflect.NewValue(&bad0), userType(reflect.Typeof(&bad0))) if err == nil { t.Error("expected error; got none") } else if strings.Index(err.String(), "type") < 0 { diff --git a/src/pkg/gob/debug.go b/src/pkg/gob/debug.go index e4583901e..69c83bda7 100644 --- a/src/pkg/gob/debug.go +++ b/src/pkg/gob/debug.go @@ -155,6 +155,16 @@ func (deb *debugger) dump(format string, args ...interface{}) { // Debug prints a human-readable representation of the gob data read from r. func Debug(r io.Reader) { + err := debug(r) + if err != nil { + fmt.Fprintf(os.Stderr, "gob debug: %s\n", err) + } +} + +// debug implements Debug, but catches panics and returns +// them as errors to be printed by Debug. +func debug(r io.Reader) (err os.Error) { + defer catchError(&err) fmt.Fprintln(os.Stderr, "Start of debugging") deb := &debugger{ r: newPeekReader(r), @@ -166,6 +176,7 @@ func Debug(r io.Reader) { deb.remainingKnown = true } deb.gobStream() + return } // note that we've consumed some bytes @@ -386,11 +397,15 @@ func (deb *debugger) typeDefinition(indent tab, id typeId) { // Field number 1 is type Id of key deb.delta(1) keyId := deb.typeId() - wire.SliceT = &sliceType{com, id} // Field number 2 is type Id of elem deb.delta(1) elemId := deb.typeId() wire.MapT = &mapType{com, keyId, elemId} + case 4: // GobEncoder type, one field of {{Common}} + // Field number 0 is CommonType + deb.delta(1) + com := deb.common() + wire.GobEncoderT = &gobEncoderType{com} default: errorf("bad field in type %d", fieldNum) } @@ -507,6 +522,8 @@ func (deb *debugger) printWireType(indent tab, wire *wireType) { for i, field := range wire.StructT.Field { fmt.Fprintf(os.Stderr, "%sfield %d:\t%s\tid=%d\n", indent+1, i, field.Name, field.Id) } + case wire.GobEncoderT != nil: + deb.printCommonType(indent, "GobEncoder", &wire.GobEncoderT.CommonType) } indent-- fmt.Fprintf(os.Stderr, "%s}\n", indent) @@ -538,6 +555,8 @@ func (deb *debugger) fieldValue(indent tab, id typeId) { deb.sliceValue(indent, wire) case wire.StructT != nil: deb.structValue(indent, id) + case wire.GobEncoderT != nil: + deb.gobEncoderValue(indent, id) default: panic("bad wire type for field") } @@ -654,3 +673,17 @@ func (deb *debugger) structValue(indent tab, id typeId) { fmt.Fprintf(os.Stderr, "%s} // end %s struct\n", indent, id.name()) deb.dump(">> End of struct value of type %d %q", id, id.name()) } + +// GobEncoderValue: +// uint(n) byte*n +func (deb *debugger) gobEncoderValue(indent tab, id typeId) { + len := deb.uint64() + deb.dump("GobEncoder value of %q id=%d, length %d\n", id.name(), id, len) + fmt.Fprintf(os.Stderr, "%s%s (implements GobEncoder)\n", indent, id.name()) + data := make([]byte, len) + _, err := deb.r.Read(data) + if err != nil { + errorf("gobEncoder data read: %s", err) + } + fmt.Fprintf(os.Stderr, "%s[% .2x]\n", indent+1, data) +} diff --git a/src/pkg/gob/decode.go b/src/pkg/gob/decode.go index 9667f6157..b7ae78200 100644 --- a/src/pkg/gob/decode.go +++ b/src/pkg/gob/decode.go @@ -13,9 +13,7 @@ import ( "math" "os" "reflect" - "unicode" "unsafe" - "utf8" ) var ( @@ -24,9 +22,9 @@ var ( errRange = os.ErrorString("gob: internal error: field numbers out of bounds") ) -// The execution state of an instance of the decoder. A new state +// decoderState is the execution state of an instance of the decoder. A new state // is created for nested objects. -type decodeState struct { +type decoderState struct { dec *Decoder // The buffer is stored with an extra indirection because it may be replaced // if we load a type during decode (when reading an interface value). @@ -37,8 +35,8 @@ type decodeState struct { // We pass the bytes.Buffer separately for easier testing of the infrastructure // without requiring a full Decoder. -func newDecodeState(dec *Decoder, buf *bytes.Buffer) *decodeState { - d := new(decodeState) +func newDecodeState(dec *Decoder, buf *bytes.Buffer) *decoderState { + d := new(decoderState) d.dec = dec d.b = buf d.buf = make([]byte, uint64Size) @@ -85,7 +83,7 @@ func decodeUintReader(r io.Reader, buf []byte) (x uint64, width int, err os.Erro // decodeUint reads an encoded unsigned integer from state.r. // Does not check for overflow. -func (state *decodeState) decodeUint() (x uint64) { +func (state *decoderState) decodeUint() (x uint64) { b, err := state.b.ReadByte() if err != nil { error(err) @@ -112,7 +110,7 @@ func (state *decodeState) decodeUint() (x uint64) { // decodeInt reads an encoded signed integer from state.r. // Does not check for overflow. -func (state *decodeState) decodeInt() int64 { +func (state *decoderState) decodeInt() int64 { x := state.decodeUint() if x&1 != 0 { return ^int64(x >> 1) @@ -120,7 +118,8 @@ func (state *decodeState) decodeInt() int64 { return int64(x >> 1) } -type decOp func(i *decInstr, state *decodeState, p unsafe.Pointer) +// decOp is the signature of a decoding operator for a given type. +type decOp func(i *decInstr, state *decoderState, p unsafe.Pointer) // The 'instructions' of the decoding machine type decInstr struct { @@ -150,26 +149,31 @@ func decIndirect(p unsafe.Pointer, indir int) unsafe.Pointer { return p } -func ignoreUint(i *decInstr, state *decodeState, p unsafe.Pointer) { +// ignoreUint discards a uint value with no destination. +func ignoreUint(i *decInstr, state *decoderState, p unsafe.Pointer) { state.decodeUint() } -func ignoreTwoUints(i *decInstr, state *decodeState, p unsafe.Pointer) { +// ignoreTwoUints discards a uint value with no destination. It's used to skip +// complex values. +func ignoreTwoUints(i *decInstr, state *decoderState, p unsafe.Pointer) { state.decodeUint() state.decodeUint() } -func decBool(i *decInstr, state *decodeState, p unsafe.Pointer) { +// decBool decodes a uiint and stores it as a boolean through p. +func decBool(i *decInstr, state *decoderState, p unsafe.Pointer) { if i.indir > 0 { if *(*unsafe.Pointer)(p) == nil { *(*unsafe.Pointer)(p) = unsafe.Pointer(new(bool)) } p = *(*unsafe.Pointer)(p) } - *(*bool)(p) = state.decodeInt() != 0 + *(*bool)(p) = state.decodeUint() != 0 } -func decInt8(i *decInstr, state *decodeState, p unsafe.Pointer) { +// decInt8 decodes an integer and stores it as an int8 through p. +func decInt8(i *decInstr, state *decoderState, p unsafe.Pointer) { if i.indir > 0 { if *(*unsafe.Pointer)(p) == nil { *(*unsafe.Pointer)(p) = unsafe.Pointer(new(int8)) @@ -184,7 +188,8 @@ func decInt8(i *decInstr, state *decodeState, p unsafe.Pointer) { } } -func decUint8(i *decInstr, state *decodeState, p unsafe.Pointer) { +// decUint8 decodes an unsigned integer and stores it as a uint8 through p. +func decUint8(i *decInstr, state *decoderState, p unsafe.Pointer) { if i.indir > 0 { if *(*unsafe.Pointer)(p) == nil { *(*unsafe.Pointer)(p) = unsafe.Pointer(new(uint8)) @@ -199,7 +204,8 @@ func decUint8(i *decInstr, state *decodeState, p unsafe.Pointer) { } } -func decInt16(i *decInstr, state *decodeState, p unsafe.Pointer) { +// decInt16 decodes an integer and stores it as an int16 through p. +func decInt16(i *decInstr, state *decoderState, p unsafe.Pointer) { if i.indir > 0 { if *(*unsafe.Pointer)(p) == nil { *(*unsafe.Pointer)(p) = unsafe.Pointer(new(int16)) @@ -214,7 +220,8 @@ func decInt16(i *decInstr, state *decodeState, p unsafe.Pointer) { } } -func decUint16(i *decInstr, state *decodeState, p unsafe.Pointer) { +// decUint16 decodes an unsigned integer and stores it as a uint16 through p. +func decUint16(i *decInstr, state *decoderState, p unsafe.Pointer) { if i.indir > 0 { if *(*unsafe.Pointer)(p) == nil { *(*unsafe.Pointer)(p) = unsafe.Pointer(new(uint16)) @@ -229,7 +236,8 @@ func decUint16(i *decInstr, state *decodeState, p unsafe.Pointer) { } } -func decInt32(i *decInstr, state *decodeState, p unsafe.Pointer) { +// decInt32 decodes an integer and stores it as an int32 through p. +func decInt32(i *decInstr, state *decoderState, p unsafe.Pointer) { if i.indir > 0 { if *(*unsafe.Pointer)(p) == nil { *(*unsafe.Pointer)(p) = unsafe.Pointer(new(int32)) @@ -244,7 +252,8 @@ func decInt32(i *decInstr, state *decodeState, p unsafe.Pointer) { } } -func decUint32(i *decInstr, state *decodeState, p unsafe.Pointer) { +// decUint32 decodes an unsigned integer and stores it as a uint32 through p. +func decUint32(i *decInstr, state *decoderState, p unsafe.Pointer) { if i.indir > 0 { if *(*unsafe.Pointer)(p) == nil { *(*unsafe.Pointer)(p) = unsafe.Pointer(new(uint32)) @@ -259,7 +268,8 @@ func decUint32(i *decInstr, state *decodeState, p unsafe.Pointer) { } } -func decInt64(i *decInstr, state *decodeState, p unsafe.Pointer) { +// decInt64 decodes an integer and stores it as an int64 through p. +func decInt64(i *decInstr, state *decoderState, p unsafe.Pointer) { if i.indir > 0 { if *(*unsafe.Pointer)(p) == nil { *(*unsafe.Pointer)(p) = unsafe.Pointer(new(int64)) @@ -269,7 +279,8 @@ func decInt64(i *decInstr, state *decodeState, p unsafe.Pointer) { *(*int64)(p) = int64(state.decodeInt()) } -func decUint64(i *decInstr, state *decodeState, p unsafe.Pointer) { +// decUint64 decodes an unsigned integer and stores it as a uint64 through p. +func decUint64(i *decInstr, state *decoderState, p unsafe.Pointer) { if i.indir > 0 { if *(*unsafe.Pointer)(p) == nil { *(*unsafe.Pointer)(p) = unsafe.Pointer(new(uint64)) @@ -294,7 +305,9 @@ func floatFromBits(u uint64) float64 { return math.Float64frombits(v) } -func storeFloat32(i *decInstr, state *decodeState, p unsafe.Pointer) { +// storeFloat32 decodes an unsigned integer, treats it as a 32-bit floating-point +// number, and stores it through p. It's a helper function for float32 and complex64. +func storeFloat32(i *decInstr, state *decoderState, p unsafe.Pointer) { v := floatFromBits(state.decodeUint()) av := v if av < 0 { @@ -308,7 +321,9 @@ func storeFloat32(i *decInstr, state *decodeState, p unsafe.Pointer) { } } -func decFloat32(i *decInstr, state *decodeState, p unsafe.Pointer) { +// decFloat32 decodes an unsigned integer, treats it as a 32-bit floating-point +// number, and stores it through p. +func decFloat32(i *decInstr, state *decoderState, p unsafe.Pointer) { if i.indir > 0 { if *(*unsafe.Pointer)(p) == nil { *(*unsafe.Pointer)(p) = unsafe.Pointer(new(float32)) @@ -318,7 +333,9 @@ func decFloat32(i *decInstr, state *decodeState, p unsafe.Pointer) { storeFloat32(i, state, p) } -func decFloat64(i *decInstr, state *decodeState, p unsafe.Pointer) { +// decFloat64 decodes an unsigned integer, treats it as a 64-bit floating-point +// number, and stores it through p. +func decFloat64(i *decInstr, state *decoderState, p unsafe.Pointer) { if i.indir > 0 { if *(*unsafe.Pointer)(p) == nil { *(*unsafe.Pointer)(p) = unsafe.Pointer(new(float64)) @@ -328,8 +345,10 @@ func decFloat64(i *decInstr, state *decodeState, p unsafe.Pointer) { *(*float64)(p) = floatFromBits(uint64(state.decodeUint())) } -// Complex numbers are just a pair of floating-point numbers, real part first. -func decComplex64(i *decInstr, state *decodeState, p unsafe.Pointer) { +// decComplex64 decodes a pair of unsigned integers, treats them as a +// pair of floating point numbers, and stores them as a complex64 through p. +// The real part comes first. +func decComplex64(i *decInstr, state *decoderState, p unsafe.Pointer) { if i.indir > 0 { if *(*unsafe.Pointer)(p) == nil { *(*unsafe.Pointer)(p) = unsafe.Pointer(new(complex64)) @@ -340,7 +359,10 @@ func decComplex64(i *decInstr, state *decodeState, p unsafe.Pointer) { storeFloat32(i, state, unsafe.Pointer(uintptr(p)+uintptr(unsafe.Sizeof(float32(0))))) } -func decComplex128(i *decInstr, state *decodeState, p unsafe.Pointer) { +// decComplex128 decodes a pair of unsigned integers, treats them as a +// pair of floating point numbers, and stores them as a complex128 through p. +// The real part comes first. +func decComplex128(i *decInstr, state *decoderState, p unsafe.Pointer) { if i.indir > 0 { if *(*unsafe.Pointer)(p) == nil { *(*unsafe.Pointer)(p) = unsafe.Pointer(new(complex128)) @@ -352,8 +374,10 @@ func decComplex128(i *decInstr, state *decodeState, p unsafe.Pointer) { *(*complex128)(p) = complex(real, imag) } +// decUint8Array decodes byte array and stores through p a slice header +// describing the data. // uint8 arrays are encoded as an unsigned count followed by the raw bytes. -func decUint8Array(i *decInstr, state *decodeState, p unsafe.Pointer) { +func decUint8Array(i *decInstr, state *decoderState, p unsafe.Pointer) { if i.indir > 0 { if *(*unsafe.Pointer)(p) == nil { *(*unsafe.Pointer)(p) = unsafe.Pointer(new([]uint8)) @@ -365,8 +389,10 @@ func decUint8Array(i *decInstr, state *decodeState, p unsafe.Pointer) { *(*[]uint8)(p) = b } +// decString decodes byte array and stores through p a string header +// describing the data. // Strings are encoded as an unsigned count followed by the raw bytes. -func decString(i *decInstr, state *decodeState, p unsafe.Pointer) { +func decString(i *decInstr, state *decoderState, p unsafe.Pointer) { if i.indir > 0 { if *(*unsafe.Pointer)(p) == nil { *(*unsafe.Pointer)(p) = unsafe.Pointer(new([]byte)) @@ -378,7 +404,8 @@ func decString(i *decInstr, state *decodeState, p unsafe.Pointer) { *(*string)(p) = string(b) } -func ignoreUint8Array(i *decInstr, state *decodeState, p unsafe.Pointer) { +// ignoreUint8Array skips over the data for a byte slice value with no destination. +func ignoreUint8Array(i *decInstr, state *decoderState, p unsafe.Pointer) { b := make([]byte, state.decodeUint()) state.b.Read(b) } @@ -409,9 +436,15 @@ func allocate(rtyp reflect.Type, p uintptr, indir int) uintptr { return *(*uintptr)(up) } -func (dec *Decoder) decodeSingle(engine *decEngine, rtyp reflect.Type, p uintptr, indir int) (err os.Error) { - defer catchError(&err) - p = allocate(rtyp, p, indir) +// decodeSingle decodes a top-level value that is not a struct and stores it through p. +// Such values are preceded by a zero, making them have the memory layout of a +// struct field (although with an illegal field number). +func (dec *Decoder) decodeSingle(engine *decEngine, ut *userTypeInfo, p uintptr) (err os.Error) { + indir := ut.indir + if ut.isGobDecoder { + indir = int(ut.decIndir) + } + p = allocate(ut.base, p, indir) state := newDecodeState(dec, &dec.buf) state.fieldnum = singletonField basep := p @@ -428,9 +461,13 @@ func (dec *Decoder) decodeSingle(engine *decEngine, rtyp reflect.Type, p uintptr return nil } -func (dec *Decoder) decodeStruct(engine *decEngine, rtyp *reflect.StructType, p uintptr, indir int) (err os.Error) { - defer catchError(&err) - p = allocate(rtyp, p, indir) +// decodeSingle decodes a top-level struct and stores it through p. +// Indir is for the value, not the type. At the time of the call it may +// differ from ut.indir, which was computed when the engine was built. +// This state cannot arise for decodeSingle, which is called directly +// from the user's value, not from the innards of an engine. +func (dec *Decoder) decodeStruct(engine *decEngine, ut *userTypeInfo, p uintptr, indir int) (err os.Error) { + p = allocate(ut.base.(*reflect.StructType), p, indir) state := newDecodeState(dec, &dec.buf) state.fieldnum = -1 basep := p @@ -458,8 +495,8 @@ func (dec *Decoder) decodeStruct(engine *decEngine, rtyp *reflect.StructType, p return nil } +// ignoreStruct discards the data for a struct with no destination. func (dec *Decoder) ignoreStruct(engine *decEngine) (err os.Error) { - defer catchError(&err) state := newDecodeState(dec, &dec.buf) state.fieldnum = -1 for state.b.Len() > 0 { @@ -481,8 +518,9 @@ func (dec *Decoder) ignoreStruct(engine *decEngine) (err os.Error) { return nil } +// ignoreSingle discards the data for a top-level non-struct value with no +// destination. It's used when calling Decode with a nil value. func (dec *Decoder) ignoreSingle(engine *decEngine) (err os.Error) { - defer catchError(&err) state := newDecodeState(dec, &dec.buf) state.fieldnum = singletonField delta := int(state.decodeUint()) @@ -494,7 +532,8 @@ func (dec *Decoder) ignoreSingle(engine *decEngine) (err os.Error) { return nil } -func (dec *Decoder) decodeArrayHelper(state *decodeState, p uintptr, elemOp decOp, elemWid uintptr, length, elemIndir int, ovfl os.ErrorString) { +// decodeArrayHelper does the work for decoding arrays and slices. +func (dec *Decoder) decodeArrayHelper(state *decoderState, p uintptr, elemOp decOp, elemWid uintptr, length, elemIndir int, ovfl os.ErrorString) { instr := &decInstr{elemOp, 0, elemIndir, 0, ovfl} for i := 0; i < length; i++ { up := unsafe.Pointer(p) @@ -506,7 +545,10 @@ func (dec *Decoder) decodeArrayHelper(state *decodeState, p uintptr, elemOp decO } } -func (dec *Decoder) decodeArray(atyp *reflect.ArrayType, state *decodeState, p uintptr, elemOp decOp, elemWid uintptr, length, indir, elemIndir int, ovfl os.ErrorString) { +// decodeArray decodes an array and stores it through p, that is, p points to the zeroth element. +// The length is an unsigned integer preceding the elements. Even though the length is redundant +// (it's part of the type), it's a useful check and is included in the encoding. +func (dec *Decoder) decodeArray(atyp *reflect.ArrayType, state *decoderState, p uintptr, elemOp decOp, elemWid uintptr, length, indir, elemIndir int, ovfl os.ErrorString) { if indir > 0 { p = allocate(atyp, p, 1) // All but the last level has been allocated by dec.Indirect } @@ -516,9 +558,11 @@ func (dec *Decoder) decodeArray(atyp *reflect.ArrayType, state *decodeState, p u dec.decodeArrayHelper(state, p, elemOp, elemWid, length, elemIndir, ovfl) } -func decodeIntoValue(state *decodeState, op decOp, indir int, v reflect.Value, ovfl os.ErrorString) reflect.Value { +// decodeIntoValue is a helper for map decoding. Since maps are decoded using reflection, +// unlike the other items we can't use a pointer directly. +func decodeIntoValue(state *decoderState, op decOp, indir int, v reflect.Value, ovfl os.ErrorString) reflect.Value { instr := &decInstr{op, 0, indir, 0, ovfl} - up := unsafe.Pointer(v.Addr()) + up := unsafe.Pointer(v.UnsafeAddr()) if indir > 1 { up = decIndirect(up, indir) } @@ -526,7 +570,11 @@ func decodeIntoValue(state *decodeState, op decOp, indir int, v reflect.Value, o return v } -func (dec *Decoder) decodeMap(mtyp *reflect.MapType, state *decodeState, p uintptr, keyOp, elemOp decOp, indir, keyIndir, elemIndir int, ovfl os.ErrorString) { +// decodeMap decodes a map and stores its header through p. +// Maps are encoded as a length followed by key:value pairs. +// Because the internals of maps are not visible to us, we must +// use reflection rather than pointer magic. +func (dec *Decoder) decodeMap(mtyp *reflect.MapType, state *decoderState, p uintptr, keyOp, elemOp decOp, indir, keyIndir, elemIndir int, ovfl os.ErrorString) { if indir > 0 { p = allocate(mtyp, p, 1) // All but the last level has been allocated by dec.Indirect } @@ -538,7 +586,7 @@ func (dec *Decoder) decodeMap(mtyp *reflect.MapType, state *decodeState, p uintp // Maps cannot be accessed by moving addresses around the way // that slices etc. can. We must recover a full reflection value for // the iteration. - v := reflect.NewValue(unsafe.Unreflect(mtyp, unsafe.Pointer((p)))).(*reflect.MapValue) + v := reflect.NewValue(unsafe.Unreflect(mtyp, unsafe.Pointer(p))).(*reflect.MapValue) n := int(state.decodeUint()) for i := 0; i < n; i++ { key := decodeIntoValue(state, keyOp, keyIndir, reflect.MakeZero(mtyp.Key()), ovfl) @@ -547,21 +595,24 @@ func (dec *Decoder) decodeMap(mtyp *reflect.MapType, state *decodeState, p uintp } } -func (dec *Decoder) ignoreArrayHelper(state *decodeState, elemOp decOp, length int) { +// ignoreArrayHelper does the work for discarding arrays and slices. +func (dec *Decoder) ignoreArrayHelper(state *decoderState, elemOp decOp, length int) { instr := &decInstr{elemOp, 0, 0, 0, os.ErrorString("no error")} for i := 0; i < length; i++ { elemOp(instr, state, nil) } } -func (dec *Decoder) ignoreArray(state *decodeState, elemOp decOp, length int) { +// ignoreArray discards the data for an array value with no destination. +func (dec *Decoder) ignoreArray(state *decoderState, elemOp decOp, length int) { if n := state.decodeUint(); n != uint64(length) { errorf("gob: length mismatch in ignoreArray") } dec.ignoreArrayHelper(state, elemOp, length) } -func (dec *Decoder) ignoreMap(state *decodeState, keyOp, elemOp decOp) { +// ignoreMap discards the data for a map value with no destination. +func (dec *Decoder) ignoreMap(state *decoderState, keyOp, elemOp decOp) { n := int(state.decodeUint()) keyInstr := &decInstr{keyOp, 0, 0, 0, os.ErrorString("no error")} elemInstr := &decInstr{elemOp, 0, 0, 0, os.ErrorString("no error")} @@ -571,7 +622,9 @@ func (dec *Decoder) ignoreMap(state *decodeState, keyOp, elemOp decOp) { } } -func (dec *Decoder) decodeSlice(atyp *reflect.SliceType, state *decodeState, p uintptr, elemOp decOp, elemWid uintptr, indir, elemIndir int, ovfl os.ErrorString) { +// decodeSlice decodes a slice and stores the slice header through p. +// Slices are encoded as an unsigned length followed by the elements. +func (dec *Decoder) decodeSlice(atyp *reflect.SliceType, state *decoderState, p uintptr, elemOp decOp, elemWid uintptr, indir, elemIndir int, ovfl os.ErrorString) { n := int(uintptr(state.decodeUint())) if indir > 0 { up := unsafe.Pointer(p) @@ -590,7 +643,8 @@ func (dec *Decoder) decodeSlice(atyp *reflect.SliceType, state *decodeState, p u dec.decodeArrayHelper(state, hdrp.Data, elemOp, elemWid, n, elemIndir, ovfl) } -func (dec *Decoder) ignoreSlice(state *decodeState, elemOp decOp) { +// ignoreSlice skips over the data for a slice value with no destination. +func (dec *Decoder) ignoreSlice(state *decoderState, elemOp decOp) { dec.ignoreArrayHelper(state, elemOp, int(state.decodeUint())) } @@ -609,9 +663,10 @@ func setInterfaceValue(ivalue *reflect.InterfaceValue, value reflect.Value) { ivalue.Set(value) } -// decodeInterface receives the name of a concrete type followed by its value. +// decodeInterface decodes an interface value and stores it through p. +// Interfaces are encoded as the name of a concrete type followed by a value. // If the name is empty, the value is nil and no value is sent. -func (dec *Decoder) decodeInterface(ityp *reflect.InterfaceType, state *decodeState, p uintptr, indir int) { +func (dec *Decoder) decodeInterface(ityp *reflect.InterfaceType, state *decoderState, p uintptr, indir int) { // Create an interface reflect.Value. We need one even for the nil case. ivalue := reflect.MakeZero(ityp).(*reflect.InterfaceValue) // Read the name of the concrete type. @@ -655,7 +710,8 @@ func (dec *Decoder) decodeInterface(ityp *reflect.InterfaceType, state *decodeSt *(*[2]uintptr)(unsafe.Pointer(p)) = ivalue.Get() } -func (dec *Decoder) ignoreInterface(state *decodeState) { +// ignoreInterface discards the data for an interface value with no destination. +func (dec *Decoder) ignoreInterface(state *decoderState) { // Read the name of the concrete type. b := make([]byte, state.decodeUint()) _, err := state.b.Read(b) @@ -670,8 +726,34 @@ func (dec *Decoder) ignoreInterface(state *decodeState) { state.b.Next(int(state.decodeUint())) } +// decodeGobDecoder decodes something implementing the GobDecoder interface. +// The data is encoded as a byte slice. +func (dec *Decoder) decodeGobDecoder(state *decoderState, v reflect.Value, index int) { + // Read the bytes for the value. + b := make([]byte, state.decodeUint()) + _, err := state.b.Read(b) + if err != nil { + error(err) + } + // We know it's a GobDecoder, so just call the method directly. + err = v.Interface().(GobDecoder).GobDecode(b) + if err != nil { + error(err) + } +} + +// ignoreGobDecoder discards the data for a GobDecoder value with no destination. +func (dec *Decoder) ignoreGobDecoder(state *decoderState) { + // Read the bytes for the value. + b := make([]byte, state.decodeUint()) + _, err := state.b.Read(b) + if err != nil { + error(err) + } +} + // Index by Go types. -var decOpMap = []decOp{ +var decOpTable = [...]decOp{ reflect.Bool: decBool, reflect.Int8: decInt8, reflect.Int16: decInt16, @@ -699,37 +781,49 @@ var decIgnoreOpMap = map[typeId]decOp{ tComplex: ignoreTwoUints, } -// Return the decoding op for the base type under rt and +// decOpFor returns the decoding op for the base type under rt and // the indirection count to reach it. -func (dec *Decoder) decOpFor(wireId typeId, rt reflect.Type, name string) (decOp, int) { - typ, indir := indirect(rt) +func (dec *Decoder) decOpFor(wireId typeId, rt reflect.Type, name string, inProgress map[reflect.Type]*decOp) (*decOp, int) { + ut := userType(rt) + // If the type implements GobEncoder, we handle it without further processing. + if ut.isGobDecoder { + return dec.gobDecodeOpFor(ut) + } + // If this type is already in progress, it's a recursive type (e.g. map[string]*T). + // Return the pointer to the op we're already building. + if opPtr := inProgress[rt]; opPtr != nil { + return opPtr, ut.indir + } + typ := ut.base + indir := ut.indir var op decOp k := typ.Kind() - if int(k) < len(decOpMap) { - op = decOpMap[k] + if int(k) < len(decOpTable) { + op = decOpTable[k] } if op == nil { + inProgress[rt] = &op // Special cases switch t := typ.(type) { case *reflect.ArrayType: name = "element of " + name elemId := dec.wireType[wireId].ArrayT.Elem - elemOp, elemIndir := dec.decOpFor(elemId, t.Elem(), name) + elemOp, elemIndir := dec.decOpFor(elemId, t.Elem(), name, inProgress) ovfl := overflow(name) - op = func(i *decInstr, state *decodeState, p unsafe.Pointer) { - state.dec.decodeArray(t, state, uintptr(p), elemOp, t.Elem().Size(), t.Len(), i.indir, elemIndir, ovfl) + op = func(i *decInstr, state *decoderState, p unsafe.Pointer) { + state.dec.decodeArray(t, state, uintptr(p), *elemOp, t.Elem().Size(), t.Len(), i.indir, elemIndir, ovfl) } case *reflect.MapType: name = "element of " + name keyId := dec.wireType[wireId].MapT.Key elemId := dec.wireType[wireId].MapT.Elem - keyOp, keyIndir := dec.decOpFor(keyId, t.Key(), name) - elemOp, elemIndir := dec.decOpFor(elemId, t.Elem(), name) + keyOp, keyIndir := dec.decOpFor(keyId, t.Key(), name, inProgress) + elemOp, elemIndir := dec.decOpFor(elemId, t.Elem(), name, inProgress) ovfl := overflow(name) - op = func(i *decInstr, state *decodeState, p unsafe.Pointer) { + op = func(i *decInstr, state *decoderState, p unsafe.Pointer) { up := unsafe.Pointer(p) - state.dec.decodeMap(t, state, uintptr(up), keyOp, elemOp, i.indir, keyIndir, elemIndir, ovfl) + state.dec.decodeMap(t, state, uintptr(up), *keyOp, *elemOp, i.indir, keyIndir, elemIndir, ovfl) } case *reflect.SliceType: @@ -744,46 +838,46 @@ func (dec *Decoder) decOpFor(wireId typeId, rt reflect.Type, name string) (decOp } else { elemId = dec.wireType[wireId].SliceT.Elem } - elemOp, elemIndir := dec.decOpFor(elemId, t.Elem(), name) + elemOp, elemIndir := dec.decOpFor(elemId, t.Elem(), name, inProgress) ovfl := overflow(name) - op = func(i *decInstr, state *decodeState, p unsafe.Pointer) { - state.dec.decodeSlice(t, state, uintptr(p), elemOp, t.Elem().Size(), i.indir, elemIndir, ovfl) + op = func(i *decInstr, state *decoderState, p unsafe.Pointer) { + state.dec.decodeSlice(t, state, uintptr(p), *elemOp, t.Elem().Size(), i.indir, elemIndir, ovfl) } case *reflect.StructType: // Generate a closure that calls out to the engine for the nested type. - enginePtr, err := dec.getDecEnginePtr(wireId, typ) + enginePtr, err := dec.getDecEnginePtr(wireId, userType(typ)) if err != nil { error(err) } - op = func(i *decInstr, state *decodeState, p unsafe.Pointer) { - // indirect through enginePtr to delay evaluation for recursive structs - err = dec.decodeStruct(*enginePtr, t, uintptr(p), i.indir) + op = func(i *decInstr, state *decoderState, p unsafe.Pointer) { + // indirect through enginePtr to delay evaluation for recursive structs. + err = dec.decodeStruct(*enginePtr, userType(typ), uintptr(p), i.indir) if err != nil { error(err) } } case *reflect.InterfaceType: - op = func(i *decInstr, state *decodeState, p unsafe.Pointer) { - dec.decodeInterface(t, state, uintptr(p), i.indir) + op = func(i *decInstr, state *decoderState, p unsafe.Pointer) { + state.dec.decodeInterface(t, state, uintptr(p), i.indir) } } } if op == nil { errorf("gob: decode can't handle type %s", rt.String()) } - return op, indir + return &op, indir } -// Return the decoding op for a field that has no destination. +// decIgnoreOpFor returns the decoding op for a field that has no destination. func (dec *Decoder) decIgnoreOpFor(wireId typeId) decOp { op, ok := decIgnoreOpMap[wireId] if !ok { if wireId == tInterface { // Special case because it's a method: the ignored item might // define types and we need to record their state in the decoder. - op = func(i *decInstr, state *decodeState, p unsafe.Pointer) { - dec.ignoreInterface(state) + op = func(i *decInstr, state *decoderState, p unsafe.Pointer) { + state.dec.ignoreInterface(state) } return op } @@ -795,7 +889,7 @@ func (dec *Decoder) decIgnoreOpFor(wireId typeId) decOp { case wire.ArrayT != nil: elemId := wire.ArrayT.Elem elemOp := dec.decIgnoreOpFor(elemId) - op = func(i *decInstr, state *decodeState, p unsafe.Pointer) { + op = func(i *decInstr, state *decoderState, p unsafe.Pointer) { state.dec.ignoreArray(state, elemOp, wire.ArrayT.Len) } @@ -804,14 +898,14 @@ func (dec *Decoder) decIgnoreOpFor(wireId typeId) decOp { elemId := dec.wireType[wireId].MapT.Elem keyOp := dec.decIgnoreOpFor(keyId) elemOp := dec.decIgnoreOpFor(elemId) - op = func(i *decInstr, state *decodeState, p unsafe.Pointer) { + op = func(i *decInstr, state *decoderState, p unsafe.Pointer) { state.dec.ignoreMap(state, keyOp, elemOp) } case wire.SliceT != nil: elemId := wire.SliceT.Elem elemOp := dec.decIgnoreOpFor(elemId) - op = func(i *decInstr, state *decodeState, p unsafe.Pointer) { + op = func(i *decInstr, state *decoderState, p unsafe.Pointer) { state.dec.ignoreSlice(state, elemOp) } @@ -821,10 +915,15 @@ func (dec *Decoder) decIgnoreOpFor(wireId typeId) decOp { if err != nil { error(err) } - op = func(i *decInstr, state *decodeState, p unsafe.Pointer) { + op = func(i *decInstr, state *decoderState, p unsafe.Pointer) { // indirect through enginePtr to delay evaluation for recursive structs state.dec.ignoreStruct(*enginePtr) } + + case wire.GobEncoderT != nil: + op = func(i *decInstr, state *decoderState, p unsafe.Pointer) { + state.dec.ignoreGobDecoder(state) + } } } if op == nil { @@ -833,14 +932,58 @@ func (dec *Decoder) decIgnoreOpFor(wireId typeId) decOp { return op } -// Are these two gob Types compatible? -// Answers the question for basic types, arrays, and slices. +// gobDecodeOpFor returns the op for a type that is known to implement +// GobDecoder. +func (dec *Decoder) gobDecodeOpFor(ut *userTypeInfo) (*decOp, int) { + rt := ut.user + if ut.decIndir != 0 { + errorf("gob: TODO: can't handle indirection to reach GobDecoder") + } + index := -1 + for i := 0; i < rt.NumMethod(); i++ { + if rt.Method(i).Name == gobDecodeMethodName { + index = i + break + } + } + if index < 0 { + panic("can't find GobDecode method") + } + var op decOp + op = func(i *decInstr, state *decoderState, p unsafe.Pointer) { + // Allocate the underlying data, but hold on to the address we have, + // since it's known to be the receiver's address. + // TODO: fix this up when decIndir can be non-zero. + allocate(ut.base, uintptr(p), ut.indir) + v := reflect.NewValue(unsafe.Unreflect(rt, p)) + state.dec.decodeGobDecoder(state, v, index) + } + return &op, int(ut.decIndir) + +} + +// compatibleType asks: Are these two gob Types compatible? +// Answers the question for basic types, arrays, maps and slices, plus +// GobEncoder/Decoder pairs. // Structs are considered ok; fields will be checked later. -func (dec *Decoder) compatibleType(fr reflect.Type, fw typeId) bool { - fr, _ = indirect(fr) - switch t := fr.(type) { +func (dec *Decoder) compatibleType(fr reflect.Type, fw typeId, inProgress map[reflect.Type]typeId) bool { + if rhs, ok := inProgress[fr]; ok { + return rhs == fw + } + inProgress[fr] = fw + ut := userType(fr) + wire, ok := dec.wireType[fw] + // If fr is a GobDecoder, the wire type must be GobEncoder. + // And if fr is not a GobDecoder, the wire type must not be either. + if ut.isGobDecoder != (ok && wire.GobEncoderT != nil) { // the parentheses look odd but are correct. + return false + } + if ut.isGobDecoder { // This test trumps all others. + return true + } + switch t := ut.base.(type) { default: - // map, chan, etc: cannot handle. + // chan, etc: cannot handle. return false case *reflect.BoolType: return fw == tBool @@ -857,19 +1000,17 @@ func (dec *Decoder) compatibleType(fr reflect.Type, fw typeId) bool { case *reflect.InterfaceType: return fw == tInterface case *reflect.ArrayType: - wire, ok := dec.wireType[fw] if !ok || wire.ArrayT == nil { return false } array := wire.ArrayT - return t.Len() == array.Len && dec.compatibleType(t.Elem(), array.Elem) + return t.Len() == array.Len && dec.compatibleType(t.Elem(), array.Elem, inProgress) case *reflect.MapType: - wire, ok := dec.wireType[fw] if !ok || wire.MapT == nil { return false } MapType := wire.MapT - return dec.compatibleType(t.Key(), MapType.Key) && dec.compatibleType(t.Elem(), MapType.Elem) + return dec.compatibleType(t.Key(), MapType.Key, inProgress) && dec.compatibleType(t.Elem(), MapType.Elem, inProgress) case *reflect.SliceType: // Is it an array of bytes? if t.Elem().Kind() == reflect.Uint8 { @@ -882,8 +1023,8 @@ func (dec *Decoder) compatibleType(fr reflect.Type, fw typeId) bool { } else { sw = dec.wireType[fw].SliceT } - elem, _ := indirect(t.Elem()) - return sw != nil && dec.compatibleType(elem, sw.Elem) + elem := userType(t.Elem()).base + return sw != nil && dec.compatibleType(elem, sw.Elem, inProgress) case *reflect.StructType: return true } @@ -899,21 +1040,27 @@ func (dec *Decoder) typeString(remoteId typeId) string { return dec.wireType[remoteId].string() } - -func (dec *Decoder) compileSingle(remoteId typeId, rt reflect.Type) (engine *decEngine, err os.Error) { +// compileSingle compiles the decoder engine for a non-struct top-level value, including +// GobDecoders. +func (dec *Decoder) compileSingle(remoteId typeId, ut *userTypeInfo) (engine *decEngine, err os.Error) { + rt := ut.base + if ut.isGobDecoder { + rt = ut.user + } engine = new(decEngine) engine.instr = make([]decInstr, 1) // one item name := rt.String() // best we can do - if !dec.compatibleType(rt, remoteId) { + if !dec.compatibleType(rt, remoteId, make(map[reflect.Type]typeId)) { return nil, os.ErrorString("gob: wrong type received for local value " + name + ": " + dec.typeString(remoteId)) } - op, indir := dec.decOpFor(remoteId, rt, name) + op, indir := dec.decOpFor(remoteId, rt, name, make(map[reflect.Type]*decOp)) ovfl := os.ErrorString(`value for "` + name + `" out of range`) - engine.instr[singletonField] = decInstr{op, singletonField, indir, 0, ovfl} + engine.instr[singletonField] = decInstr{*op, singletonField, indir, 0, ovfl} engine.numInstr = 1 return } +// compileIgnoreSingle compiles the decoder engine for a non-struct top-level value that will be discarded. func (dec *Decoder) compileIgnoreSingle(remoteId typeId) (engine *decEngine, err os.Error) { engine = new(decEngine) engine.instr = make([]decInstr, 1) // one item @@ -924,17 +1071,13 @@ func (dec *Decoder) compileIgnoreSingle(remoteId typeId) (engine *decEngine, err return } -// Is this an exported - upper case - name? -func isExported(name string) bool { - rune, _ := utf8.DecodeRuneInString(name) - return unicode.IsUpper(rune) -} - -func (dec *Decoder) compileDec(remoteId typeId, rt reflect.Type) (engine *decEngine, err os.Error) { - defer catchError(&err) +// compileDec compiles the decoder engine for a value. If the value is not a struct, +// it calls out to compileSingle. +func (dec *Decoder) compileDec(remoteId typeId, ut *userTypeInfo) (engine *decEngine, err os.Error) { + rt := ut.base srt, ok := rt.(*reflect.StructType) - if !ok { - return dec.compileSingle(remoteId, rt) + if !ok || ut.isGobDecoder { + return dec.compileSingle(remoteId, ut) } var wireStruct *structType // Builtin types can come from global pool; the rest must be defined by the decoder. @@ -953,6 +1096,7 @@ func (dec *Decoder) compileDec(remoteId typeId, rt reflect.Type) (engine *decEng } engine = new(decEngine) engine.instr = make([]decInstr, len(wireStruct.Field)) + seen := make(map[reflect.Type]*decOp) // Loop over the fields of the wire type. for fieldnum := 0; fieldnum < len(wireStruct.Field); fieldnum++ { wireField := wireStruct.Field[fieldnum] @@ -968,17 +1112,19 @@ func (dec *Decoder) compileDec(remoteId typeId, rt reflect.Type) (engine *decEng engine.instr[fieldnum] = decInstr{op, fieldnum, 0, 0, ovfl} continue } - if !dec.compatibleType(localField.Type, wireField.Id) { + if !dec.compatibleType(localField.Type, wireField.Id, make(map[reflect.Type]typeId)) { errorf("gob: wrong type (%s) for received field %s.%s", localField.Type, wireStruct.Name, wireField.Name) } - op, indir := dec.decOpFor(wireField.Id, localField.Type, localField.Name) - engine.instr[fieldnum] = decInstr{op, fieldnum, indir, uintptr(localField.Offset), ovfl} + op, indir := dec.decOpFor(wireField.Id, localField.Type, localField.Name, seen) + engine.instr[fieldnum] = decInstr{*op, fieldnum, indir, uintptr(localField.Offset), ovfl} engine.numInstr++ } return } -func (dec *Decoder) getDecEnginePtr(remoteId typeId, rt reflect.Type) (enginePtr **decEngine, err os.Error) { +// getDecEnginePtr returns the engine for the specified type. +func (dec *Decoder) getDecEnginePtr(remoteId typeId, ut *userTypeInfo) (enginePtr **decEngine, err os.Error) { + rt := ut.base decoderMap, ok := dec.decoderCache[rt] if !ok { decoderMap = make(map[typeId]**decEngine) @@ -988,7 +1134,7 @@ func (dec *Decoder) getDecEnginePtr(remoteId typeId, rt reflect.Type) (enginePtr // To handle recursive types, mark this engine as underway before compiling. enginePtr = new(*decEngine) decoderMap[remoteId] = enginePtr - *enginePtr, err = dec.compileDec(remoteId, rt) + *enginePtr, err = dec.compileDec(remoteId, ut) if err != nil { decoderMap[remoteId] = nil, false } @@ -996,11 +1142,12 @@ func (dec *Decoder) getDecEnginePtr(remoteId typeId, rt reflect.Type) (enginePtr return } -// When ignoring struct data, in effect we compile it into this type +// emptyStruct is the type we compile into when ignoring a struct value. type emptyStruct struct{} var emptyStructType = reflect.Typeof(emptyStruct{}) +// getDecEnginePtr returns the engine for the specified type when the value is to be discarded. func (dec *Decoder) getIgnoreEnginePtr(wireId typeId) (enginePtr **decEngine, err os.Error) { var ok bool if enginePtr, ok = dec.ignorerCache[wireId]; !ok { @@ -1009,7 +1156,7 @@ func (dec *Decoder) getIgnoreEnginePtr(wireId typeId) (enginePtr **decEngine, er dec.ignorerCache[wireId] = enginePtr wire := dec.wireType[wireId] if wire != nil && wire.StructT != nil { - *enginePtr, err = dec.compileDec(wireId, emptyStructType) + *enginePtr, err = dec.compileDec(wireId, userType(emptyStructType)) } else { *enginePtr, err = dec.compileIgnoreSingle(wireId) } @@ -1020,28 +1167,39 @@ func (dec *Decoder) getIgnoreEnginePtr(wireId typeId) (enginePtr **decEngine, er return } -func (dec *Decoder) decodeValue(wireId typeId, val reflect.Value) os.Error { +// decodeValue decodes the data stream representing a value and stores it in val. +func (dec *Decoder) decodeValue(wireId typeId, val reflect.Value) (err os.Error) { + defer catchError(&err) // If the value is nil, it means we should just ignore this item. if val == nil { return dec.decodeIgnoredValue(wireId) } // Dereference down to the underlying struct type. - rt, indir := indirect(val.Type()) - enginePtr, err := dec.getDecEnginePtr(wireId, rt) + ut := userType(val.Type()) + base := ut.base + indir := ut.indir + if ut.isGobDecoder { + indir = int(ut.decIndir) + if indir != 0 { + errorf("TODO: can't handle indirection in GobDecoder value") + } + } + enginePtr, err := dec.getDecEnginePtr(wireId, ut) if err != nil { return err } engine := *enginePtr - if st, ok := rt.(*reflect.StructType); ok { + if st, ok := base.(*reflect.StructType); ok && !ut.isGobDecoder { if engine.numInstr == 0 && st.NumField() > 0 && len(dec.wireType[wireId].StructT.Field) > 0 { - name := rt.Name() + name := base.Name() return os.ErrorString("gob: type mismatch: no fields matched compiling decoder for " + name) } - return dec.decodeStruct(engine, st, uintptr(val.Addr()), indir) + return dec.decodeStruct(engine, ut, uintptr(val.UnsafeAddr()), indir) } - return dec.decodeSingle(engine, rt, uintptr(val.Addr()), indir) + return dec.decodeSingle(engine, ut, uintptr(val.UnsafeAddr())) } +// decodeIgnoredValue decodes the data stream representing a value of the specified type and discards it. func (dec *Decoder) decodeIgnoredValue(wireId typeId) os.Error { enginePtr, err := dec.getIgnoreEnginePtr(wireId) if err != nil { @@ -1066,8 +1224,8 @@ func init() { default: panic("gob: unknown size of int/uint") } - decOpMap[reflect.Int] = iop - decOpMap[reflect.Uint] = uop + decOpTable[reflect.Int] = iop + decOpTable[reflect.Uint] = uop // Finally uintptr switch reflect.Typeof(uintptr(0)).Bits() { @@ -1078,5 +1236,5 @@ func init() { default: panic("gob: unknown size of uintptr") } - decOpMap[reflect.Uintptr] = uop + decOpTable[reflect.Uintptr] = uop } diff --git a/src/pkg/gob/decoder.go b/src/pkg/gob/decoder.go index f7c994ffa..719274583 100644 --- a/src/pkg/gob/decoder.go +++ b/src/pkg/gob/decoder.go @@ -21,7 +21,7 @@ type Decoder struct { wireType map[typeId]*wireType // map from remote ID to local description decoderCache map[reflect.Type]map[typeId]**decEngine // cache of compiled engines ignorerCache map[typeId]**decEngine // ditto for ignored objects - countState *decodeState // reads counts from wire + countState *decoderState // reads counts from wire countBuf []byte // used for decoding integers while parsing messages tmp []byte // temporary storage for i/o; saves reallocating err os.Error diff --git a/src/pkg/gob/encode.go b/src/pkg/gob/encode.go index 2e5ba2487..9190d9203 100644 --- a/src/pkg/gob/encode.go +++ b/src/pkg/gob/encode.go @@ -15,7 +15,7 @@ import ( const uint64Size = unsafe.Sizeof(uint64(0)) -// The global execution state of an instance of the encoder. +// encoderState is the global execution state of an instance of the encoder. // Field numbers are delta encoded and always increase. The field // number is initialized to -1 so 0 comes out as delta(1). A delta of // 0 terminates the structure. @@ -72,6 +72,7 @@ func (state *encoderState) encodeInt(i int64) { state.encodeUint(uint64(x)) } +// encOp is the signature of an encoding operator for a given type. type encOp func(i *encInstr, state *encoderState, p unsafe.Pointer) // The 'instructions' of the encoding machine @@ -82,8 +83,8 @@ type encInstr struct { offset uintptr // offset in the structure of the field to encode } -// Emit a field number and update the state to record its value for delta encoding. -// If the instruction pointer is nil, do nothing +// update emits a field number and updates the state to record its value for delta encoding. +// If the instruction pointer is nil, it does nothing func (state *encoderState) update(instr *encInstr) { if instr != nil { state.encodeUint(uint64(instr.field - state.fieldnum)) @@ -97,6 +98,7 @@ func (state *encoderState) update(instr *encInstr) { // Otherwise, the output (for a scalar) is the field number, as an encoded integer, // followed by the field data in its appropriate format. +// encIndirect dereferences p indir times and returns the result. func encIndirect(p unsafe.Pointer, indir int) unsafe.Pointer { for ; indir > 0; indir-- { p = *(*unsafe.Pointer)(p) @@ -107,6 +109,7 @@ func encIndirect(p unsafe.Pointer, indir int) unsafe.Pointer { return p } +// encBool encodes the bool with address p as an unsigned 0 or 1. func encBool(i *encInstr, state *encoderState, p unsafe.Pointer) { b := *(*bool)(p) if b || state.sendZero { @@ -119,6 +122,7 @@ func encBool(i *encInstr, state *encoderState, p unsafe.Pointer) { } } +// encInt encodes the int with address p. func encInt(i *encInstr, state *encoderState, p unsafe.Pointer) { v := int64(*(*int)(p)) if v != 0 || state.sendZero { @@ -127,6 +131,7 @@ func encInt(i *encInstr, state *encoderState, p unsafe.Pointer) { } } +// encUint encodes the uint with address p. func encUint(i *encInstr, state *encoderState, p unsafe.Pointer) { v := uint64(*(*uint)(p)) if v != 0 || state.sendZero { @@ -135,6 +140,7 @@ func encUint(i *encInstr, state *encoderState, p unsafe.Pointer) { } } +// encInt8 encodes the int8 with address p. func encInt8(i *encInstr, state *encoderState, p unsafe.Pointer) { v := int64(*(*int8)(p)) if v != 0 || state.sendZero { @@ -143,6 +149,7 @@ func encInt8(i *encInstr, state *encoderState, p unsafe.Pointer) { } } +// encUint8 encodes the uint8 with address p. func encUint8(i *encInstr, state *encoderState, p unsafe.Pointer) { v := uint64(*(*uint8)(p)) if v != 0 || state.sendZero { @@ -151,6 +158,7 @@ func encUint8(i *encInstr, state *encoderState, p unsafe.Pointer) { } } +// encInt16 encodes the int16 with address p. func encInt16(i *encInstr, state *encoderState, p unsafe.Pointer) { v := int64(*(*int16)(p)) if v != 0 || state.sendZero { @@ -159,6 +167,7 @@ func encInt16(i *encInstr, state *encoderState, p unsafe.Pointer) { } } +// encUint16 encodes the uint16 with address p. func encUint16(i *encInstr, state *encoderState, p unsafe.Pointer) { v := uint64(*(*uint16)(p)) if v != 0 || state.sendZero { @@ -167,6 +176,7 @@ func encUint16(i *encInstr, state *encoderState, p unsafe.Pointer) { } } +// encInt32 encodes the int32 with address p. func encInt32(i *encInstr, state *encoderState, p unsafe.Pointer) { v := int64(*(*int32)(p)) if v != 0 || state.sendZero { @@ -175,6 +185,7 @@ func encInt32(i *encInstr, state *encoderState, p unsafe.Pointer) { } } +// encUint encodes the uint32 with address p. func encUint32(i *encInstr, state *encoderState, p unsafe.Pointer) { v := uint64(*(*uint32)(p)) if v != 0 || state.sendZero { @@ -183,6 +194,7 @@ func encUint32(i *encInstr, state *encoderState, p unsafe.Pointer) { } } +// encInt64 encodes the int64 with address p. func encInt64(i *encInstr, state *encoderState, p unsafe.Pointer) { v := *(*int64)(p) if v != 0 || state.sendZero { @@ -191,6 +203,7 @@ func encInt64(i *encInstr, state *encoderState, p unsafe.Pointer) { } } +// encInt64 encodes the uint64 with address p. func encUint64(i *encInstr, state *encoderState, p unsafe.Pointer) { v := *(*uint64)(p) if v != 0 || state.sendZero { @@ -199,6 +212,7 @@ func encUint64(i *encInstr, state *encoderState, p unsafe.Pointer) { } } +// encUintptr encodes the uintptr with address p. func encUintptr(i *encInstr, state *encoderState, p unsafe.Pointer) { v := uint64(*(*uintptr)(p)) if v != 0 || state.sendZero { @@ -207,6 +221,7 @@ func encUintptr(i *encInstr, state *encoderState, p unsafe.Pointer) { } } +// floatBits returns a uint64 holding the bits of a floating-point number. // Floating-point numbers are transmitted as uint64s holding the bits // of the underlying representation. They are sent byte-reversed, with // the exponent end coming out first, so integer floating point numbers @@ -223,6 +238,7 @@ func floatBits(f float64) uint64 { return v } +// encFloat32 encodes the float32 with address p. func encFloat32(i *encInstr, state *encoderState, p unsafe.Pointer) { f := *(*float32)(p) if f != 0 || state.sendZero { @@ -232,6 +248,7 @@ func encFloat32(i *encInstr, state *encoderState, p unsafe.Pointer) { } } +// encFloat64 encodes the float64 with address p. func encFloat64(i *encInstr, state *encoderState, p unsafe.Pointer) { f := *(*float64)(p) if f != 0 || state.sendZero { @@ -241,6 +258,7 @@ func encFloat64(i *encInstr, state *encoderState, p unsafe.Pointer) { } } +// encComplex64 encodes the complex64 with address p. // Complex numbers are just a pair of floating-point numbers, real part first. func encComplex64(i *encInstr, state *encoderState, p unsafe.Pointer) { c := *(*complex64)(p) @@ -253,6 +271,7 @@ func encComplex64(i *encInstr, state *encoderState, p unsafe.Pointer) { } } +// encComplex128 encodes the complex128 with address p. func encComplex128(i *encInstr, state *encoderState, p unsafe.Pointer) { c := *(*complex128)(p) if c != 0+0i || state.sendZero { @@ -264,6 +283,7 @@ func encComplex128(i *encInstr, state *encoderState, p unsafe.Pointer) { } } +// encUint8Array encodes the byte slice whose header has address p. // Byte arrays are encoded as an unsigned count followed by the raw bytes. func encUint8Array(i *encInstr, state *encoderState, p unsafe.Pointer) { b := *(*[]byte)(p) @@ -274,6 +294,7 @@ func encUint8Array(i *encInstr, state *encoderState, p unsafe.Pointer) { } } +// encString encodes the string whose header has address p. // Strings are encoded as an unsigned count followed by the raw bytes. func encString(i *encInstr, state *encoderState, p unsafe.Pointer) { s := *(*string)(p) @@ -284,14 +305,15 @@ func encString(i *encInstr, state *encoderState, p unsafe.Pointer) { } } -// The end of a struct is marked by a delta field number of 0. +// encStructTerminator encodes the end of an encoded struct +// as delta field number of 0. func encStructTerminator(i *encInstr, state *encoderState, p unsafe.Pointer) { state.encodeUint(0) } // Execution engine -// The encoder engine is an array of instructions indexed by field number of the encoding +// encEngine an array of instructions indexed by field number of the encoding // data, typically a struct. It is executed top to bottom, walking the struct. type encEngine struct { instr []encInstr @@ -299,6 +321,7 @@ type encEngine struct { const singletonField = 0 +// encodeSingle encodes a single top-level non-struct value. func (enc *Encoder) encodeSingle(b *bytes.Buffer, engine *encEngine, basep uintptr) { state := newEncoderState(enc, b) state.fieldnum = singletonField @@ -315,6 +338,7 @@ func (enc *Encoder) encodeSingle(b *bytes.Buffer, engine *encEngine, basep uintp instr.op(instr, state, p) } +// encodeStruct encodes a single struct value. func (enc *Encoder) encodeStruct(b *bytes.Buffer, engine *encEngine, basep uintptr) { state := newEncoderState(enc, b) state.fieldnum = -1 @@ -330,6 +354,7 @@ func (enc *Encoder) encodeStruct(b *bytes.Buffer, engine *encEngine, basep uintp } } +// encodeArray encodes the array whose 0th element is at p. func (enc *Encoder) encodeArray(b *bytes.Buffer, p uintptr, op encOp, elemWid uintptr, elemIndir int, length int) { state := newEncoderState(enc, b) state.fieldnum = -1 @@ -349,6 +374,7 @@ func (enc *Encoder) encodeArray(b *bytes.Buffer, p uintptr, op encOp, elemWid ui } } +// encodeReflectValue is a helper for maps. It encodes the value v. func encodeReflectValue(state *encoderState, v reflect.Value, op encOp, indir int) { for i := 0; i < indir && v != nil; i++ { v = reflect.Indirect(v) @@ -356,9 +382,12 @@ func encodeReflectValue(state *encoderState, v reflect.Value, op encOp, indir in if v == nil { errorf("gob: encodeReflectValue: nil element") } - op(nil, state, unsafe.Pointer(v.Addr())) + op(nil, state, unsafe.Pointer(v.UnsafeAddr())) } +// encodeMap encodes a map as unsigned count followed by key:value pairs. +// Because map internals are not exposed, we must use reflection rather than +// addresses. func (enc *Encoder) encodeMap(b *bytes.Buffer, mv *reflect.MapValue, keyOp, elemOp encOp, keyIndir, elemIndir int) { state := newEncoderState(enc, b) state.fieldnum = -1 @@ -371,6 +400,7 @@ func (enc *Encoder) encodeMap(b *bytes.Buffer, mv *reflect.MapValue, keyOp, elem } } +// encodeInterface encodes the interface value iv. // To send an interface, we send a string identifying the concrete type, followed // by the type identifier (which might require defining that type right now), followed // by the concrete value. A nil value gets sent as the empty string for the name, @@ -384,10 +414,10 @@ func (enc *Encoder) encodeInterface(b *bytes.Buffer, iv *reflect.InterfaceValue) return } - typ, _ := indirect(iv.Elem().Type()) - name, ok := concreteTypeToName[typ] + ut := userType(iv.Elem().Type()) + name, ok := concreteTypeToName[ut.base] if !ok { - errorf("gob: type not registered for interface: %s", typ) + errorf("gob: type not registered for interface: %s", ut.base) } // Send the name. state.encodeUint(uint64(len(name))) @@ -396,14 +426,14 @@ func (enc *Encoder) encodeInterface(b *bytes.Buffer, iv *reflect.InterfaceValue) error(err) } // Define the type id if necessary. - enc.sendTypeDescriptor(enc.writer(), state, typ) + enc.sendTypeDescriptor(enc.writer(), state, ut) // Send the type id. - enc.sendTypeId(state, typ) + enc.sendTypeId(state, ut) // Encode the value into a new buffer. Any nested type definitions // should be written to b, before the encoded value. enc.pushWriter(b) data := new(bytes.Buffer) - err = enc.encode(data, iv.Elem()) + err = enc.encode(data, iv.Elem(), ut) if err != nil { error(err) } @@ -414,7 +444,22 @@ func (enc *Encoder) encodeInterface(b *bytes.Buffer, iv *reflect.InterfaceValue) } } -var encOpMap = []encOp{ +// encGobEncoder encodes a value that implements the GobEncoder interface. +// The data is sent as a byte array. +func (enc *Encoder) encodeGobEncoder(b *bytes.Buffer, v reflect.Value, index int) { + // TODO: should we catch panics from the called method? + // We know it's a GobEncoder, so just call the method directly. + data, err := v.Interface().(GobEncoder).GobEncode() + if err != nil { + error(err) + } + state := newEncoderState(enc, b) + state.fieldnum = -1 + state.encodeUint(uint64(len(data))) + state.b.Write(data) +} + +var encOpTable = [...]encOp{ reflect.Bool: encBool, reflect.Int: encInt, reflect.Int8: encInt8, @@ -434,16 +479,28 @@ var encOpMap = []encOp{ reflect.String: encString, } -// Return the encoding op for the base type under rt and +// encOpFor returns (a pointer to) the encoding op for the base type under rt and // the indirection count to reach it. -func (enc *Encoder) encOpFor(rt reflect.Type) (encOp, int) { - typ, indir := indirect(rt) - var op encOp +func (enc *Encoder) encOpFor(rt reflect.Type, inProgress map[reflect.Type]*encOp) (*encOp, int) { + ut := userType(rt) + // If the type implements GobEncoder, we handle it without further processing. + if ut.isGobEncoder { + return enc.gobEncodeOpFor(ut) + } + // If this type is already in progress, it's a recursive type (e.g. map[string]*T). + // Return the pointer to the op we're already building. + if opPtr := inProgress[rt]; opPtr != nil { + return opPtr, ut.indir + } + typ := ut.base + indir := ut.indir k := typ.Kind() - if int(k) < len(encOpMap) { - op = encOpMap[k] + var op encOp + if int(k) < len(encOpTable) { + op = encOpTable[k] } if op == nil { + inProgress[rt] = &op // Special cases switch t := typ.(type) { case *reflect.SliceType: @@ -452,40 +509,40 @@ func (enc *Encoder) encOpFor(rt reflect.Type) (encOp, int) { break } // Slices have a header; we decode it to find the underlying array. - elemOp, indir := enc.encOpFor(t.Elem()) + elemOp, indir := enc.encOpFor(t.Elem(), inProgress) op = func(i *encInstr, state *encoderState, p unsafe.Pointer) { slice := (*reflect.SliceHeader)(p) if !state.sendZero && slice.Len == 0 { return } state.update(i) - state.enc.encodeArray(state.b, slice.Data, elemOp, t.Elem().Size(), indir, int(slice.Len)) + state.enc.encodeArray(state.b, slice.Data, *elemOp, t.Elem().Size(), indir, int(slice.Len)) } case *reflect.ArrayType: // True arrays have size in the type. - elemOp, indir := enc.encOpFor(t.Elem()) + elemOp, indir := enc.encOpFor(t.Elem(), inProgress) op = func(i *encInstr, state *encoderState, p unsafe.Pointer) { state.update(i) - state.enc.encodeArray(state.b, uintptr(p), elemOp, t.Elem().Size(), indir, t.Len()) + state.enc.encodeArray(state.b, uintptr(p), *elemOp, t.Elem().Size(), indir, t.Len()) } case *reflect.MapType: - keyOp, keyIndir := enc.encOpFor(t.Key()) - elemOp, elemIndir := enc.encOpFor(t.Elem()) + keyOp, keyIndir := enc.encOpFor(t.Key(), inProgress) + elemOp, elemIndir := enc.encOpFor(t.Elem(), inProgress) op = func(i *encInstr, state *encoderState, p unsafe.Pointer) { // Maps cannot be accessed by moving addresses around the way // that slices etc. can. We must recover a full reflection value for // the iteration. - v := reflect.NewValue(unsafe.Unreflect(t, unsafe.Pointer((p)))) + v := reflect.NewValue(unsafe.Unreflect(t, unsafe.Pointer(p))) mv := reflect.Indirect(v).(*reflect.MapValue) if !state.sendZero && mv.Len() == 0 { return } state.update(i) - state.enc.encodeMap(state.b, mv, keyOp, elemOp, keyIndir, elemIndir) + state.enc.encodeMap(state.b, mv, *keyOp, *elemOp, keyIndir, elemIndir) } case *reflect.StructType: // Generate a closure that calls out to the engine for the nested type. - enc.getEncEngine(typ) + enc.getEncEngine(userType(typ)) info := mustGetTypeInfo(typ) op = func(i *encInstr, state *encoderState, p unsafe.Pointer) { state.update(i) @@ -496,7 +553,7 @@ func (enc *Encoder) encOpFor(rt reflect.Type) (encOp, int) { op = func(i *encInstr, state *encoderState, p unsafe.Pointer) { // Interfaces transmit the name and contents of the concrete // value they contain. - v := reflect.NewValue(unsafe.Unreflect(t, unsafe.Pointer((p)))) + v := reflect.NewValue(unsafe.Unreflect(t, unsafe.Pointer(p))) iv := reflect.Indirect(v).(*reflect.InterfaceValue) if !state.sendZero && (iv == nil || iv.IsNil()) { return @@ -509,21 +566,54 @@ func (enc *Encoder) encOpFor(rt reflect.Type) (encOp, int) { if op == nil { errorf("gob enc: can't happen: encode type %s", rt.String()) } - return op, indir + return &op, indir } -// The local Type was compiled from the actual value, so we know it's compatible. -func (enc *Encoder) compileEnc(rt reflect.Type) *encEngine { - srt, isStruct := rt.(*reflect.StructType) +// gobEncodeOpFor returns the op for a type that is known to implement +// GobEncoder. +func (enc *Encoder) gobEncodeOpFor(ut *userTypeInfo) (*encOp, int) { + rt := ut.user + if ut.encIndir != 0 { + errorf("gob: TODO: can't handle indirection to reach GobEncoder") + } + index := -1 + for i := 0; i < rt.NumMethod(); i++ { + if rt.Method(i).Name == gobEncodeMethodName { + index = i + break + } + } + if index < 0 { + panic("can't find GobEncode method") + } + var op encOp + op = func(i *encInstr, state *encoderState, p unsafe.Pointer) { + // TODO: this will need fixing when ut.encIndr != 0. + v := reflect.NewValue(unsafe.Unreflect(rt, p)) + state.update(i) + state.enc.encodeGobEncoder(state.b, v, index) + } + return &op, int(ut.encIndir) +} + +// compileEnc returns the engine to compile the type. +func (enc *Encoder) compileEnc(ut *userTypeInfo) *encEngine { + srt, isStruct := ut.base.(*reflect.StructType) engine := new(encEngine) - if isStruct { - for fieldNum := 0; fieldNum < srt.NumField(); fieldNum++ { + seen := make(map[reflect.Type]*encOp) + rt := ut.base + if ut.isGobEncoder { + rt = ut.user + } + if !ut.isGobEncoder && isStruct { + for fieldNum, wireFieldNum := 0, 0; fieldNum < srt.NumField(); fieldNum++ { f := srt.Field(fieldNum) if !isExported(f.Name) { continue } - op, indir := enc.encOpFor(f.Type) - engine.instr = append(engine.instr, encInstr{op, fieldNum, indir, uintptr(f.Offset)}) + op, indir := enc.encOpFor(f.Type, seen) + engine.instr = append(engine.instr, encInstr{*op, wireFieldNum, indir, uintptr(f.Offset)}) + wireFieldNum++ } if srt.NumField() > 0 && len(engine.instr) == 0 { errorf("type %s has no exported fields", rt) @@ -531,46 +621,52 @@ func (enc *Encoder) compileEnc(rt reflect.Type) *encEngine { engine.instr = append(engine.instr, encInstr{encStructTerminator, 0, 0, 0}) } else { engine.instr = make([]encInstr, 1) - op, indir := enc.encOpFor(rt) - engine.instr[0] = encInstr{op, singletonField, indir, 0} // offset is zero + op, indir := enc.encOpFor(rt, seen) + engine.instr[0] = encInstr{*op, singletonField, indir, 0} // offset is zero } return engine } +// getEncEngine returns the engine to compile the type. // typeLock must be held (or we're in initialization and guaranteed single-threaded). -// The reflection type must have all its indirections processed out. -func (enc *Encoder) getEncEngine(rt reflect.Type) *encEngine { - info, err1 := getTypeInfo(rt) +func (enc *Encoder) getEncEngine(ut *userTypeInfo) *encEngine { + info, err1 := getTypeInfo(ut) if err1 != nil { error(err1) } if info.encoder == nil { // mark this engine as underway before compiling to handle recursive types. info.encoder = new(encEngine) - info.encoder = enc.compileEnc(rt) + info.encoder = enc.compileEnc(ut) } return info.encoder } -// Put this in a function so we can hold the lock only while compiling, not when encoding. -func (enc *Encoder) lockAndGetEncEngine(rt reflect.Type) *encEngine { +// lockAndGetEncEngine is a function that locks and compiles. +// This lets us hold the lock only while compiling, not when encoding. +func (enc *Encoder) lockAndGetEncEngine(ut *userTypeInfo) *encEngine { typeLock.Lock() defer typeLock.Unlock() - return enc.getEncEngine(rt) + return enc.getEncEngine(ut) } -func (enc *Encoder) encode(b *bytes.Buffer, value reflect.Value) (err os.Error) { +func (enc *Encoder) encode(b *bytes.Buffer, value reflect.Value, ut *userTypeInfo) (err os.Error) { defer catchError(&err) - // Dereference down to the underlying object. - rt, indir := indirect(value.Type()) + engine := enc.lockAndGetEncEngine(ut) + indir := ut.indir + if ut.isGobEncoder { + indir = int(ut.encIndir) + if indir != 0 { + errorf("TODO: can't handle indirection in GobEncoder value") + } + } for i := 0; i < indir; i++ { value = reflect.Indirect(value) } - engine := enc.lockAndGetEncEngine(rt) - if value.Type().Kind() == reflect.Struct { - enc.encodeStruct(b, engine, value.Addr()) + if !ut.isGobEncoder && value.Type().Kind() == reflect.Struct { + enc.encodeStruct(b, engine, value.UnsafeAddr()) } else { - enc.encodeSingle(b, engine, value.Addr()) + enc.encodeSingle(b, engine, value.UnsafeAddr()) } return nil } diff --git a/src/pkg/gob/encoder.go b/src/pkg/gob/encoder.go index 29ba44057..4bfcf15c7 100644 --- a/src/pkg/gob/encoder.go +++ b/src/pkg/gob/encoder.go @@ -78,11 +78,57 @@ func (enc *Encoder) writeMessage(w io.Writer, b *bytes.Buffer) { } } +// sendActualType sends the requested type, without further investigation, unless +// it's been sent before. +func (enc *Encoder) sendActualType(w io.Writer, state *encoderState, ut *userTypeInfo, actual reflect.Type) (sent bool) { + if _, alreadySent := enc.sent[actual]; alreadySent { + return false + } + typeLock.Lock() + info, err := getTypeInfo(ut) + typeLock.Unlock() + if err != nil { + enc.setError(err) + return + } + // Send the pair (-id, type) + // Id: + state.encodeInt(-int64(info.id)) + // Type: + enc.encode(state.b, reflect.NewValue(info.wire), wireTypeUserInfo) + enc.writeMessage(w, state.b) + if enc.err != nil { + return + } + + // Remember we've sent this type, both what the user gave us and the base type. + enc.sent[ut.base] = info.id + if ut.user != ut.base { + enc.sent[ut.user] = info.id + } + // Now send the inner types + switch st := actual.(type) { + case *reflect.StructType: + for i := 0; i < st.NumField(); i++ { + enc.sendType(w, state, st.Field(i).Type) + } + case reflect.ArrayOrSliceType: + enc.sendType(w, state, st.Elem()) + } + return true +} + +// sendType sends the type info to the other side, if necessary. func (enc *Encoder) sendType(w io.Writer, state *encoderState, origt reflect.Type) (sent bool) { - // Drill down to the base type. - rt, _ := indirect(origt) + ut := userType(origt) + if ut.isGobEncoder { + // The rules are different: regardless of the underlying type's representation, + // we need to tell the other side that this exact type is a GobEncoder. + return enc.sendActualType(w, state, ut, ut.user) + } - switch rt := rt.(type) { + // It's a concrete value, so drill down to the base type. + switch rt := ut.base.(type) { default: // Basic types and interfaces do not need to be described. return @@ -108,43 +154,7 @@ func (enc *Encoder) sendType(w io.Writer, state *encoderState, origt reflect.Typ return } - // Have we already sent this type? This time we ask about the base type. - if _, alreadySent := enc.sent[rt]; alreadySent { - return - } - - // Need to send it. - typeLock.Lock() - info, err := getTypeInfo(rt) - typeLock.Unlock() - if err != nil { - enc.setError(err) - return - } - // Send the pair (-id, type) - // Id: - state.encodeInt(-int64(info.id)) - // Type: - enc.encode(state.b, reflect.NewValue(info.wire)) - enc.writeMessage(w, state.b) - if enc.err != nil { - return - } - - // Remember we've sent this type. - enc.sent[rt] = info.id - // Remember we've sent the top-level, possibly indirect type too. - enc.sent[origt] = info.id - // Now send the inner types - switch st := rt.(type) { - case *reflect.StructType: - for i := 0; i < st.NumField(); i++ { - enc.sendType(w, state, st.Field(i).Type) - } - case reflect.ArrayOrSliceType: - enc.sendType(w, state, st.Elem()) - } - return true + return enc.sendActualType(w, state, ut, ut.base) } // Encode transmits the data item represented by the empty interface value, @@ -153,12 +163,19 @@ func (enc *Encoder) Encode(e interface{}) os.Error { return enc.EncodeValue(reflect.NewValue(e)) } -// sendTypeId makes sure the remote side knows about this type. +// sendTypeDescriptor makes sure the remote side knows about this type. // It will send a descriptor if this is the first time the type has been // sent. -func (enc *Encoder) sendTypeDescriptor(w io.Writer, state *encoderState, rt reflect.Type) { +func (enc *Encoder) sendTypeDescriptor(w io.Writer, state *encoderState, ut *userTypeInfo) { // Make sure the type is known to the other side. // First, have we already sent this type? + rt := ut.base + if ut.isGobEncoder { + rt = ut.user + if ut.encIndir != 0 { + panic("TODO: can't handle non-zero encIndir") + } + } if _, alreadySent := enc.sent[rt]; !alreadySent { // No, so send it. sent := enc.sendType(w, state, rt) @@ -170,7 +187,7 @@ func (enc *Encoder) sendTypeDescriptor(w io.Writer, state *encoderState, rt refl // need to send the type info but we do need to update enc.sent. if !sent { typeLock.Lock() - info, err := getTypeInfo(rt) + info, err := getTypeInfo(ut) typeLock.Unlock() if err != nil { enc.setError(err) @@ -182,9 +199,9 @@ func (enc *Encoder) sendTypeDescriptor(w io.Writer, state *encoderState, rt refl } // sendTypeId sends the id, which must have already been defined. -func (enc *Encoder) sendTypeId(state *encoderState, rt reflect.Type) { +func (enc *Encoder) sendTypeId(state *encoderState, ut *userTypeInfo) { // Identify the type of this top-level value. - state.encodeInt(int64(enc.sent[rt])) + state.encodeInt(int64(enc.sent[ut.base])) } // EncodeValue transmits the data item represented by the reflection value, @@ -198,19 +215,22 @@ func (enc *Encoder) EncodeValue(value reflect.Value) os.Error { // Remove any nested writers remaining due to previous errors. enc.w = enc.w[0:1] - enc.err = nil - rt, _ := indirect(value.Type()) + ut, err := validUserType(value.Type()) + if err != nil { + return err + } + enc.err = nil state := newEncoderState(enc, new(bytes.Buffer)) - enc.sendTypeDescriptor(enc.writer(), state, rt) - enc.sendTypeId(state, rt) + enc.sendTypeDescriptor(enc.writer(), state, ut) + enc.sendTypeId(state, ut) if enc.err != nil { return enc.err } // Encode the object. - err := enc.encode(state.b, value) + err = enc.encode(state.b, value, ut) if err != nil { enc.setError(err) } else { diff --git a/src/pkg/gob/encoder_test.go b/src/pkg/gob/encoder_test.go index 3e06db727..a0c713b81 100644 --- a/src/pkg/gob/encoder_test.go +++ b/src/pkg/gob/encoder_test.go @@ -249,6 +249,24 @@ func TestArray(t *testing.T) { } } +func TestRecursiveMapType(t *testing.T) { + type recursiveMap map[string]recursiveMap + r1 := recursiveMap{"A": recursiveMap{"B": nil, "C": nil}, "D": nil} + r2 := make(recursiveMap) + if err := encAndDec(r1, &r2); err != nil { + t.Error(err) + } +} + +func TestRecursiveSliceType(t *testing.T) { + type recursiveSlice []recursiveSlice + r1 := recursiveSlice{0: recursiveSlice{0: nil}, 1: nil} + r2 := make(recursiveSlice, 0) + if err := encAndDec(r1, &r2); err != nil { + t.Error(err) + } +} + // Regression test for bug: must send zero values inside arrays func TestDefaultsInArray(t *testing.T) { type Type7 struct { diff --git a/src/pkg/gob/gobencdec_test.go b/src/pkg/gob/gobencdec_test.go new file mode 100644 index 000000000..82ca68170 --- /dev/null +++ b/src/pkg/gob/gobencdec_test.go @@ -0,0 +1,331 @@ +// Copyright 20011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file contains tests of the GobEncoder/GobDecoder support. + +package gob + +import ( + "bytes" + "fmt" + "os" + "strings" + "testing" +) + +// Types that implement the GobEncoder/Decoder interfaces. + +type ByteStruct struct { + a byte // not an exported field +} + +type StringStruct struct { + s string // not an exported field +} + +type Gobber int + +type ValueGobber string // encodes with a value, decodes with a pointer. + +// The relevant methods + +func (g *ByteStruct) GobEncode() ([]byte, os.Error) { + b := make([]byte, 3) + b[0] = g.a + b[1] = g.a + 1 + b[2] = g.a + 2 + return b, nil +} + +func (g *ByteStruct) GobDecode(data []byte) os.Error { + if g == nil { + return os.ErrorString("NIL RECEIVER") + } + // Expect N sequential-valued bytes. + if len(data) == 0 { + return os.EOF + } + g.a = data[0] + for i, c := range data { + if c != g.a+byte(i) { + return os.ErrorString("invalid data sequence") + } + } + return nil +} + +func (g *StringStruct) GobEncode() ([]byte, os.Error) { + return []byte(g.s), nil +} + +func (g *StringStruct) GobDecode(data []byte) os.Error { + // Expect N sequential-valued bytes. + if len(data) == 0 { + return os.EOF + } + a := data[0] + for i, c := range data { + if c != a+byte(i) { + return os.ErrorString("invalid data sequence") + } + } + g.s = string(data) + return nil +} + +func (g *Gobber) GobEncode() ([]byte, os.Error) { + return []byte(fmt.Sprintf("VALUE=%d", *g)), nil +} + +func (g *Gobber) GobDecode(data []byte) os.Error { + _, err := fmt.Sscanf(string(data), "VALUE=%d", (*int)(g)) + return err +} + +func (v ValueGobber) GobEncode() ([]byte, os.Error) { + return []byte(fmt.Sprintf("VALUE=%s", v)), nil +} + +func (v *ValueGobber) GobDecode(data []byte) os.Error { + _, err := fmt.Sscanf(string(data), "VALUE=%s", (*string)(v)) + return err +} + +// Structs that include GobEncodable fields. + +type GobTest0 struct { + X int // guarantee we have something in common with GobTest* + G *ByteStruct +} + +type GobTest1 struct { + X int // guarantee we have something in common with GobTest* + G *StringStruct +} + +type GobTest2 struct { + X int // guarantee we have something in common with GobTest* + G string // not a GobEncoder - should give us errors +} + +type GobTest3 struct { + X int // guarantee we have something in common with GobTest* + G *Gobber // TODO: should be able to satisfy interface without a pointer +} + +type GobTest4 struct { + X int // guarantee we have something in common with GobTest* + V ValueGobber +} + +type GobTest5 struct { + X int // guarantee we have something in common with GobTest* + V *ValueGobber +} + +type GobTestIgnoreEncoder struct { + X int // guarantee we have something in common with GobTest* +} + +func TestGobEncoderField(t *testing.T) { + b := new(bytes.Buffer) + // First a field that's a structure. + enc := NewEncoder(b) + err := enc.Encode(GobTest0{17, &ByteStruct{'A'}}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTest0) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + if x.G.a != 'A' { + t.Errorf("expected 'A' got %c", x.G.a) + } + // Now a field that's not a structure. + b.Reset() + gobber := Gobber(23) + err = enc.Encode(GobTest3{17, &gobber}) + if err != nil { + t.Fatal("encode error:", err) + } + y := new(GobTest3) + err = dec.Decode(y) + if err != nil { + t.Fatal("decode error:", err) + } + if *y.G != 23 { + t.Errorf("expected '23 got %d", *y.G) + } +} + +// As long as the fields have the same name and implement the +// interface, we can cross-connect them. Not sure it's useful +// and may even be bad but it works and it's hard to prevent +// without exposing the contents of the object, which would +// defeat the purpose. +func TestGobEncoderFieldsOfDifferentType(t *testing.T) { + // first, string in field to byte in field + b := new(bytes.Buffer) + enc := NewEncoder(b) + err := enc.Encode(GobTest1{17, &StringStruct{"ABC"}}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTest0) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + if x.G.a != 'A' { + t.Errorf("expected 'A' got %c", x.G.a) + } + // now the other direction, byte in field to string in field + b.Reset() + err = enc.Encode(GobTest0{17, &ByteStruct{'X'}}) + if err != nil { + t.Fatal("encode error:", err) + } + y := new(GobTest1) + err = dec.Decode(y) + if err != nil { + t.Fatal("decode error:", err) + } + if y.G.s != "XYZ" { + t.Fatalf("expected `XYZ` got %c", y.G.s) + } +} + +// Test that we can encode a value and decode into a pointer. +func TestGobEncoderValueEncoder(t *testing.T) { + // first, string in field to byte in field + b := new(bytes.Buffer) + enc := NewEncoder(b) + err := enc.Encode(GobTest4{17, ValueGobber("hello")}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTest5) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + if *x.V != "hello" { + t.Errorf("expected `hello` got %s", x.V) + } +} + +func TestGobEncoderFieldTypeError(t *testing.T) { + // GobEncoder to non-decoder: error + b := new(bytes.Buffer) + enc := NewEncoder(b) + err := enc.Encode(GobTest1{17, &StringStruct{"ABC"}}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := &GobTest2{} + err = dec.Decode(x) + if err == nil { + t.Fatal("expected decode error for mismatched fields (encoder to non-decoder)") + } + if strings.Index(err.String(), "type") < 0 { + t.Fatal("expected type error; got", err) + } + // Non-encoder to GobDecoder: error + b.Reset() + err = enc.Encode(GobTest2{17, "ABC"}) + if err != nil { + t.Fatal("encode error:", err) + } + y := &GobTest1{} + err = dec.Decode(y) + if err == nil { + t.Fatal("expected decode error for mistmatched fields (non-encoder to decoder)") + } + if strings.Index(err.String(), "type") < 0 { + t.Fatal("expected type error; got", err) + } +} + +// Even though ByteStruct is a struct, it's treated as a singleton at the top level. +func TestGobEncoderStructSingleton(t *testing.T) { + b := new(bytes.Buffer) + enc := NewEncoder(b) + err := enc.Encode(&ByteStruct{'A'}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(ByteStruct) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + if x.a != 'A' { + t.Errorf("expected 'A' got %c", x.a) + } +} + +func TestGobEncoderNonStructSingleton(t *testing.T) { + b := new(bytes.Buffer) + enc := NewEncoder(b) + g := Gobber(1234) // TODO: shouldn't need to take the address here. + err := enc.Encode(&g) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + var x Gobber + err = dec.Decode(&x) + if err != nil { + t.Fatal("decode error:", err) + } + if x != 1234 { + t.Errorf("expected 1234 got %c", x) + } +} + +func TestGobEncoderIgnoreStructField(t *testing.T) { + b := new(bytes.Buffer) + // First a field that's a structure. + enc := NewEncoder(b) + err := enc.Encode(GobTest0{17, &ByteStruct{'A'}}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTestIgnoreEncoder) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + if x.X != 17 { + t.Errorf("expected 17 got %c", x.X) + } +} + +func TestGobEncoderIgnoreNonStructField(t *testing.T) { + b := new(bytes.Buffer) + // First a field that's a structure. + enc := NewEncoder(b) + gobber := Gobber(23) + err := enc.Encode(GobTest3{17, &gobber}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTestIgnoreEncoder) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + if x.X != 17 { + t.Errorf("expected 17 got %c", x.X) + } +} diff --git a/src/pkg/gob/type.go b/src/pkg/gob/type.go index f613f6e8a..a43813941 100644 --- a/src/pkg/gob/type.go +++ b/src/pkg/gob/type.go @@ -9,15 +9,157 @@ import ( "os" "reflect" "sync" + "unicode" + "utf8" ) -// Reflection types are themselves interface values holding structs -// describing the type. Each type has a different struct so that struct can -// be the kind. For example, if typ is the reflect type for an int8, typ is -// a pointer to a reflect.Int8Type struct; if typ is the reflect type for a -// function, typ is a pointer to a reflect.FuncType struct; we use the type -// of that pointer as the kind. +// userTypeInfo stores the information associated with a type the user has handed +// to the package. It's computed once and stored in a map keyed by reflection +// type. +type userTypeInfo struct { + user reflect.Type // the type the user handed us + base reflect.Type // the base type after all indirections + indir int // number of indirections to reach the base type + isGobEncoder bool // does the type implement GobEncoder? + isGobDecoder bool // does the type implement GobDecoder? + encIndir int8 // number of indirections to reach the receiver type; may be negative + decIndir int8 // number of indirections to reach the receiver type; may be negative +} + +var ( + // Protected by an RWMutex because we read it a lot and write + // it only when we see a new type, typically when compiling. + userTypeLock sync.RWMutex + userTypeCache = make(map[reflect.Type]*userTypeInfo) +) + +// validType returns, and saves, the information associated with user-provided type rt. +// If the user type is not valid, err will be non-nil. To be used when the error handler +// is not set up. +func validUserType(rt reflect.Type) (ut *userTypeInfo, err os.Error) { + userTypeLock.RLock() + ut = userTypeCache[rt] + userTypeLock.RUnlock() + if ut != nil { + return + } + // Now set the value under the write lock. + userTypeLock.Lock() + defer userTypeLock.Unlock() + if ut = userTypeCache[rt]; ut != nil { + // Lost the race; not a problem. + return + } + ut = new(userTypeInfo) + ut.base = rt + ut.user = rt + // A type that is just a cycle of pointers (such as type T *T) cannot + // be represented in gobs, which need some concrete data. We use a + // cycle detection algorithm from Knuth, Vol 2, Section 3.1, Ex 6, + // pp 539-540. As we step through indirections, run another type at + // half speed. If they meet up, there's a cycle. + slowpoke := ut.base // walks half as fast as ut.base + for { + pt, ok := ut.base.(*reflect.PtrType) + if !ok { + break + } + ut.base = pt.Elem() + if ut.base == slowpoke { // ut.base lapped slowpoke + // recursive pointer type. + return nil, os.ErrorString("can't represent recursive pointer type " + ut.base.String()) + } + if ut.indir%2 == 0 { + slowpoke = slowpoke.(*reflect.PtrType).Elem() + } + ut.indir++ + } + ut.isGobEncoder, ut.encIndir = implementsGobEncoder(ut.user) + ut.isGobDecoder, ut.decIndir = implementsGobDecoder(ut.user) + userTypeCache[rt] = ut + if ut.encIndir != 0 || ut.decIndir != 0 { + // There are checks in lots of other places, but putting this here means we won't even + // attempt to encode/decode this type. + // TODO: make it possible to handle types that are indirect to the implementation, + // such as a structure field of type T when *T implements GobDecoder. + return nil, os.ErrorString("TODO: gob can't handle indirections to GobEncoder/Decoder") + } + return +} + +const ( + gobEncodeMethodName = "GobEncode" + gobDecodeMethodName = "GobDecode" +) + +// implementsGobEncoder reports whether the type implements the interface. It also +// returns the number of indirections required to get to the implementation. +// TODO: when reflection makes it possible, should also be prepared to climb up +// one level if we're not on a pointer (implementation could be on *T for our T). +// That will mean that indir could be < 0, which is sure to cause problems, but +// we ignore them now as indir is always >= 0 now. +func implementsGobEncoder(rt reflect.Type) (implements bool, indir int8) { + if rt == nil { + return + } + // The type might be a pointer, or it might not, and we need to keep + // dereferencing to the base type until we find an implementation. + for { + if rt.NumMethod() > 0 { // avoid allocations etc. unless there's some chance + if _, ok := reflect.MakeZero(rt).Interface().(GobEncoder); ok { + return true, indir + } + } + if p, ok := rt.(*reflect.PtrType); ok { + indir++ + if indir > 100 { // insane number of indirections + return false, 0 + } + rt = p.Elem() + continue + } + break + } + return false, 0 +} + +// implementsGobDecoder reports whether the type implements the interface. It also +// returns the number of indirections required to get to the implementation. +// TODO: see comment on implementsGobEncoder. +func implementsGobDecoder(rt reflect.Type) (implements bool, indir int8) { + if rt == nil { + return + } + // The type might be a pointer, or it might not, and we need to keep + // dereferencing to the base type until we find an implementation. + for { + if rt.NumMethod() > 0 { // avoid allocations etc. unless there's some chance + if _, ok := reflect.MakeZero(rt).Interface().(GobDecoder); ok { + return true, indir + } + } + if p, ok := rt.(*reflect.PtrType); ok { + indir++ + if indir > 100 { // insane number of indirections + return false, 0 + } + rt = p.Elem() + continue + } + break + } + return false, 0 +} +// userType returns, and saves, the information associated with user-provided type rt. +// If the user type is not valid, it calls error. +func userType(rt reflect.Type) *userTypeInfo { + ut, err := validUserType(rt) + if err != nil { + error(err) + } + return ut +} // A typeId represents a gob Type as an integer that can be passed on the wire. // Internally, typeIds are used as keys to a map to recover the underlying type info. type typeId int32 @@ -110,6 +252,7 @@ var ( // Predefined because it's needed by the Decoder var tWireType = mustGetTypeInfo(reflect.Typeof(wireType{})).id +var wireTypeUserInfo *userTypeInfo // userTypeInfo of (*wireType) func init() { // Some magic numbers to make sure there are no surprises. @@ -133,6 +276,7 @@ func init() { } nextId = firstUserId registerBasics() + wireTypeUserInfo = userType(reflect.Typeof((*wireType)(nil))) } // Array type @@ -142,12 +286,18 @@ type arrayType struct { Len int } -func newArrayType(name string, elem gobType, length int) *arrayType { - a := &arrayType{CommonType{Name: name}, elem.id(), length} - setTypeId(a) +func newArrayType(name string) *arrayType { + a := &arrayType{CommonType{Name: name}, 0, 0} return a } +func (a *arrayType) init(elem gobType, len int) { + // Set our type id before evaluating the element's, in case it's our own. + setTypeId(a) + a.Elem = elem.id() + a.Len = len +} + func (a *arrayType) safeString(seen map[typeId]bool) string { if seen[a.Id] { return a.Name @@ -158,6 +308,23 @@ func (a *arrayType) safeString(seen map[typeId]bool) string { func (a *arrayType) string() string { return a.safeString(make(map[typeId]bool)) } +// GobEncoder type (something that implements the GobEncoder interface) +type gobEncoderType struct { + CommonType +} + +func newGobEncoderType(name string) *gobEncoderType { + g := &gobEncoderType{CommonType{Name: name}} + setTypeId(g) + return g +} + +func (g *gobEncoderType) safeString(seen map[typeId]bool) string { + return g.Name +} + +func (g *gobEncoderType) string() string { return g.Name } + // Map type type mapType struct { CommonType @@ -165,12 +332,18 @@ type mapType struct { Elem typeId } -func newMapType(name string, key, elem gobType) *mapType { - m := &mapType{CommonType{Name: name}, key.id(), elem.id()} - setTypeId(m) +func newMapType(name string) *mapType { + m := &mapType{CommonType{Name: name}, 0, 0} return m } +func (m *mapType) init(key, elem gobType) { + // Set our type id before evaluating the element's, in case it's our own. + setTypeId(m) + m.Key = key.id() + m.Elem = elem.id() +} + func (m *mapType) safeString(seen map[typeId]bool) string { if seen[m.Id] { return m.Name @@ -189,12 +362,17 @@ type sliceType struct { Elem typeId } -func newSliceType(name string, elem gobType) *sliceType { - s := &sliceType{CommonType{Name: name}, elem.id()} - setTypeId(s) +func newSliceType(name string) *sliceType { + s := &sliceType{CommonType{Name: name}, 0} return s } +func (s *sliceType) init(elem gobType) { + // Set our type id before evaluating the element's, in case it's our own. + setTypeId(s) + s.Elem = elem.id() +} + func (s *sliceType) safeString(seen map[typeId]bool) string { if seen[s.Id] { return s.Name @@ -236,26 +414,31 @@ func (s *structType) string() string { return s.safeString(make(map[typeId]bool) func newStructType(name string) *structType { s := &structType{CommonType{Name: name}, nil} + // For historical reasons we set the id here rather than init. + // Se the comment in newTypeObject for details. setTypeId(s) return s } -// Step through the indirections on a type to discover the base type. -// Return the base type and the number of indirections. -func indirect(t reflect.Type) (rt reflect.Type, count int) { - rt = t - for { - pt, ok := rt.(*reflect.PtrType) - if !ok { - break - } - rt = pt.Elem() - count++ +// newTypeObject allocates a gobType for the reflection type rt. +// Unless ut represents a GobEncoder, rt should be the base type +// of ut. +// This is only called from the encoding side. The decoding side +// works through typeIds and userTypeInfos alone. +func newTypeObject(name string, ut *userTypeInfo, rt reflect.Type) (gobType, os.Error) { + // Does this type implement GobEncoder? + if ut.isGobEncoder { + return newGobEncoderType(name), nil } - return -} - -func newTypeObject(name string, rt reflect.Type) (gobType, os.Error) { + var err os.Error + var type0, type1 gobType + defer func() { + if err != nil { + types[rt] = nil, false + } + }() + // Install the top-level type before the subtypes (e.g. struct before + // fields) so recursive types can be constructed safely. switch t := rt.(type) { // All basic types are easy: they are predefined. case *reflect.BoolType: @@ -280,57 +463,73 @@ func newTypeObject(name string, rt reflect.Type) (gobType, os.Error) { return tInterface.gobType(), nil case *reflect.ArrayType: - gt, err := getType("", t.Elem()) + at := newArrayType(name) + types[rt] = at + type0, err = getBaseType("", t.Elem()) if err != nil { return nil, err } - return newArrayType(name, gt, t.Len()), nil + // Historical aside: + // For arrays, maps, and slices, we set the type id after the elements + // are constructed. This is to retain the order of type id allocation after + // a fix made to handle recursive types, which changed the order in + // which types are built. Delaying the setting in this way preserves + // type ids while allowing recursive types to be described. Structs, + // done below, were already handling recursion correctly so they + // assign the top-level id before those of the field. + at.init(type0, t.Len()) + return at, nil case *reflect.MapType: - kt, err := getType("", t.Key()) + mt := newMapType(name) + types[rt] = mt + type0, err = getBaseType("", t.Key()) if err != nil { return nil, err } - vt, err := getType("", t.Elem()) + type1, err = getBaseType("", t.Elem()) if err != nil { return nil, err } - return newMapType(name, kt, vt), nil + mt.init(type0, type1) + return mt, nil case *reflect.SliceType: // []byte == []uint8 is a special case if t.Elem().Kind() == reflect.Uint8 { return tBytes.gobType(), nil } - gt, err := getType(t.Elem().Name(), t.Elem()) + st := newSliceType(name) + types[rt] = st + type0, err = getBaseType(t.Elem().Name(), t.Elem()) if err != nil { return nil, err } - return newSliceType(name, gt), nil + st.init(type0) + return st, nil case *reflect.StructType: - // Install the struct type itself before the fields so recursive - // structures can be constructed safely. - strType := newStructType(name) - types[rt] = strType - idToType[strType.id()] = strType - field := make([]*fieldType, t.NumField()) + st := newStructType(name) + types[rt] = st + idToType[st.id()] = st for i := 0; i < t.NumField(); i++ { f := t.Field(i) - typ, _ := indirect(f.Type) + if !isExported(f.Name) { + continue + } + typ := userType(f.Type).base tname := typ.Name() if tname == "" { - t, _ := indirect(f.Type) + t := userType(f.Type).base tname = t.String() } - gt, err := getType(tname, f.Type) + gt, err := getBaseType(tname, f.Type) if err != nil { return nil, err } - field[i] = &fieldType{f.Name, gt.id()} + st.Field = append(st.Field, &fieldType{f.Name, gt.id()}) } - strType.Field = field - return strType, nil + return st, nil default: return nil, os.ErrorString("gob NewTypeObject can't handle type: " + rt.String()) @@ -338,15 +537,30 @@ func newTypeObject(name string, rt reflect.Type) (gobType, os.Error) { return nil, nil } +// isExported reports whether this is an exported - upper case - name. +func isExported(name string) bool { + rune, _ := utf8.DecodeRuneInString(name) + return unicode.IsUpper(rune) +} + +// getBaseType returns the Gob type describing the given reflect.Type's base type. +// typeLock must be held. +func getBaseType(name string, rt reflect.Type) (gobType, os.Error) { + ut := userType(rt) + return getType(name, ut, ut.base) +} + // getType returns the Gob type describing the given reflect.Type. +// Should be called only when handling GobEncoders/Decoders, +// which may be pointers. All other types are handled through the +// base type, never a pointer. // typeLock must be held. -func getType(name string, rt reflect.Type) (gobType, os.Error) { - rt, _ = indirect(rt) +func getType(name string, ut *userTypeInfo, rt reflect.Type) (gobType, os.Error) { typ, present := types[rt] if present { return typ, nil } - typ, err := newTypeObject(name, rt) + typ, err := newTypeObject(name, ut, rt) if err == nil { types[rt] = typ } @@ -371,6 +585,7 @@ func bootstrapType(name string, e interface{}, expect typeId) typeId { types[rt] = typ setTypeId(typ) checkId(expect, nextId) + userType(rt) // might as well cache it now return nextId } @@ -381,15 +596,16 @@ func bootstrapType(name string, e interface{}, expect typeId) typeId { // For bootstrapping purposes, we assume that the recipient knows how // to decode a wireType; it is exactly the wireType struct here, interpreted // using the gob rules for sending a structure, except that we assume the -// ids for wireType and structType are known. The relevant pieces +// ids for wireType and structType etc. are known. The relevant pieces // are built in encode.go's init() function. // To maintain binary compatibility, if you extend this type, always put // the new fields last. type wireType struct { - ArrayT *arrayType - SliceT *sliceType - StructT *structType - MapT *mapType + ArrayT *arrayType + SliceT *sliceType + StructT *structType + MapT *mapType + GobEncoderT *gobEncoderType } func (w *wireType) string() string { @@ -406,6 +622,8 @@ func (w *wireType) string() string { return w.StructT.Name case w.MapT != nil: return w.MapT.Name + case w.GobEncoderT != nil: + return w.GobEncoderT.Name } return unknown } @@ -418,49 +636,96 @@ type typeInfo struct { var typeInfoMap = make(map[reflect.Type]*typeInfo) // protected by typeLock -// The reflection type must have all its indirections processed out. // typeLock must be held. -func getTypeInfo(rt reflect.Type) (*typeInfo, os.Error) { - if rt.Kind() == reflect.Ptr { - panic("pointer type in getTypeInfo: " + rt.String()) +func getTypeInfo(ut *userTypeInfo) (*typeInfo, os.Error) { + rt := ut.base + if ut.isGobEncoder { + // We want the user type, not the base type. + rt = ut.user } info, ok := typeInfoMap[rt] - if !ok { - info = new(typeInfo) - name := rt.Name() - gt, err := getType(name, rt) + if ok { + return info, nil + } + info = new(typeInfo) + gt, err := getBaseType(rt.Name(), rt) + if err != nil { + return nil, err + } + info.id = gt.id() + + if ut.isGobEncoder { + userType, err := getType(rt.Name(), ut, rt) if err != nil { return nil, err } - info.id = gt.id() - t := info.id.gobType() - switch typ := rt.(type) { - case *reflect.ArrayType: - info.wire = &wireType{ArrayT: t.(*arrayType)} - case *reflect.MapType: - info.wire = &wireType{MapT: t.(*mapType)} - case *reflect.SliceType: - // []byte == []uint8 is a special case handled separately - if typ.Elem().Kind() != reflect.Uint8 { - info.wire = &wireType{SliceT: t.(*sliceType)} - } - case *reflect.StructType: - info.wire = &wireType{StructT: t.(*structType)} + info.wire = &wireType{GobEncoderT: userType.id().gobType().(*gobEncoderType)} + typeInfoMap[ut.user] = info + return info, nil + } + + t := info.id.gobType() + switch typ := rt.(type) { + case *reflect.ArrayType: + info.wire = &wireType{ArrayT: t.(*arrayType)} + case *reflect.MapType: + info.wire = &wireType{MapT: t.(*mapType)} + case *reflect.SliceType: + // []byte == []uint8 is a special case handled separately + if typ.Elem().Kind() != reflect.Uint8 { + info.wire = &wireType{SliceT: t.(*sliceType)} } - typeInfoMap[rt] = info + case *reflect.StructType: + info.wire = &wireType{StructT: t.(*structType)} } + typeInfoMap[rt] = info return info, nil } // Called only when a panic is acceptable and unexpected. func mustGetTypeInfo(rt reflect.Type) *typeInfo { - t, err := getTypeInfo(rt) + t, err := getTypeInfo(userType(rt)) if err != nil { panic("getTypeInfo: " + err.String()) } return t } +// GobEncoder is the interface describing data that provides its own +// representation for encoding values for transmission to a GobDecoder. +// A type that implements GobEncoder and GobDecoder has complete +// control over the representation of its data and may therefore +// contain things such as private fields, channels, and functions, +// which are not usually transmissable in gob streams. +// +// Note: Since gobs can be stored permanently, It is good design +// to guarantee the encoding used by a GobEncoder is stable as the +// software evolves. For instance, it might make sense for GobEncode +// to include a version number in the encoding. +// +// Note: At the moment, the type implementing GobEncoder must +// be exactly the type passed to Encode. For example, if *T implements +// GobEncoder, the data item must be of type *T, not T or **T. +type GobEncoder interface { + // GobEncode returns a byte slice representing the encoding of the + // receiver for transmission to a GobDecoder, usually of the same + // concrete type. + GobEncode() ([]byte, os.Error) +} + +// GobDecoder is the interface describing data that provides its own +// routine for decoding transmitted values sent by a GobEncoder. +// +// Note: At the moment, the type implementing GobDecoder must +// be exactly the type passed to Decode. For example, if *T implements +// GobDecoder, the data item must be of type *T, not T or **T. +type GobDecoder interface { + // GobDecode overwrites the receiver, which must be a pointer, + // with the value represented by the byte slice, which was written + // by GobEncode, usually for the same concrete type. + GobDecode([]byte) os.Error +} + var ( nameToConcreteType = make(map[string]reflect.Type) concreteTypeToName = make(map[reflect.Type]string) @@ -473,18 +738,18 @@ func RegisterName(name string, value interface{}) { // reserved for nil panic("attempt to register empty name") } - rt, _ := indirect(reflect.Typeof(value)) + base := userType(reflect.Typeof(value)).base // Check for incompatible duplicates. - if t, ok := nameToConcreteType[name]; ok && t != rt { + if t, ok := nameToConcreteType[name]; ok && t != base { panic("gob: registering duplicate types for " + name) } - if n, ok := concreteTypeToName[rt]; ok && n != name { - panic("gob: registering duplicate names for " + rt.String()) + if n, ok := concreteTypeToName[base]; ok && n != name { + panic("gob: registering duplicate names for " + base.String()) } // Store the name and type provided by the user.... nameToConcreteType[name] = reflect.Typeof(value) // but the flattened type in the type table, since that's what decode needs. - concreteTypeToName[rt] = name + concreteTypeToName[base] = name } // Register records a type, identified by a value for that type, under its diff --git a/src/pkg/gob/type_test.go b/src/pkg/gob/type_test.go index 5aecde103..ffd1345e5 100644 --- a/src/pkg/gob/type_test.go +++ b/src/pkg/gob/type_test.go @@ -26,7 +26,7 @@ var basicTypes = []typeT{ func getTypeUnlocked(name string, rt reflect.Type) gobType { typeLock.Lock() defer typeLock.Unlock() - t, err := getType(name, rt) + t, err := getBaseType(name, rt) if err != nil { panic("getTypeUnlocked: " + err.String()) } @@ -126,27 +126,27 @@ func TestMapType(t *testing.T) { } type Bar struct { - x string + X string } // This structure has pointers and refers to itself, making it a good test case. type Foo struct { - a int - b int32 // will become int - c string - d []byte - e *float64 // will become float64 - f ****float64 // will become float64 - g *Bar - h *Bar // should not interpolate the definition of Bar again - i *Foo // will not explode + A int + B int32 // will become int + C string + D []byte + E *float64 // will become float64 + F ****float64 // will become float64 + G *Bar + H *Bar // should not interpolate the definition of Bar again + I *Foo // will not explode } func TestStructType(t *testing.T) { sstruct := getTypeUnlocked("Foo", reflect.Typeof(Foo{})) str := sstruct.string() // If we can print it correctly, we built it correctly. - expected := "Foo = struct { a int; b int; c string; d bytes; e float; f float; g Bar = struct { x string; }; h Bar; i Foo; }" + expected := "Foo = struct { A int; B int; C string; D bytes; E float; F float; G Bar = struct { X string; }; H Bar; I Foo; }" if str != expected { t.Errorf("struct printed as %q; expected %q", str, expected) } diff --git a/src/pkg/html/doc.go b/src/pkg/html/doc.go index c5338d078..4f5dee72d 100644 --- a/src/pkg/html/doc.go +++ b/src/pkg/html/doc.go @@ -69,6 +69,9 @@ call to Next. For example, to extract an HTML page's anchor text: } } +A Tokenizer typically skips over HTML comments. To return comment tokens, set +Tokenizer.ReturnComments to true before looping over calls to Next. + Parsing is done by calling Parse with an io.Reader, which returns the root of the parse tree (the document element) as a *Node. It is the caller's responsibility to ensure that the Reader provides UTF-8 encoded HTML. For diff --git a/src/pkg/html/token.go b/src/pkg/html/token.go index d63883850..ad03241ed 100644 --- a/src/pkg/html/token.go +++ b/src/pkg/html/token.go @@ -25,6 +25,8 @@ const ( EndTagToken // A SelfClosingTagToken tag looks like
. SelfClosingTagToken + // A CommentToken looks like . + CommentToken ) // String returns a string representation of the TokenType. @@ -40,6 +42,8 @@ func (t TokenType) String() string { return "EndTag" case SelfClosingTagToken: return "SelfClosingTag" + case CommentToken: + return "Comment" } return "Invalid(" + strconv.Itoa(int(t)) + ")" } @@ -52,8 +56,8 @@ type Attribute struct { } // A Token consists of a TokenType and some Data (tag name for start and end -// tags, content for text). A tag Token may also contain a slice of Attributes. -// Data is unescaped for both tag and text Tokens (it looks like "a" case SelfClosingTagToken: return "<" + t.tagString() + "/>" + case CommentToken: + return "" } return "Invalid(" + strconv.Itoa(int(t.Type)) + ")" } // A Tokenizer returns a stream of HTML Tokens. type Tokenizer struct { + // If ReturnComments is set, Next returns comment tokens; + // otherwise it skips over comments (default). + ReturnComments bool + // r is the source of the HTML text. r io.Reader // tt is the TokenType of the most recently read token. If tt == Error @@ -176,6 +186,39 @@ func (z *Tokenizer) readTo(x uint8) os.Error { panic("unreachable") } +// nextMarkupDeclaration returns the next TokenType starting with ", don't just assume that it's a comment. + for i := 0; i < 2; i++ { + c, err := z.readByte() + if err != nil { + return TextToken, err + } + if c != '-' { + return z.nextText(), nil + } + } + // is a valid comment. + for dashCount := 2; ; { + c, err := z.readByte() + if err != nil { + return TextToken, err + } + switch c { + case '-': + dashCount++ + case '>': + if dashCount >= 2 { + return CommentToken, nil + } + fallthrough + default: + dashCount = 0 + } + } + panic("unreachable") +} + // nextTag returns the next TokenType starting from the tag open state. func (z *Tokenizer) nextTag() (tt TokenType, err os.Error) { c, err := z.readByte() @@ -189,7 +232,7 @@ func (z *Tokenizer) nextTag() (tt TokenType, err os.Error) { case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z': tt = StartTagToken case c == '!': - return ErrorToken, os.NewError("html: TODO(nigeltao): implement comments") + return z.nextMarkupDeclaration() case c == '?': return ErrorToken, os.NewError("html: TODO(nigeltao): implement XML processing instructions") default: @@ -221,22 +264,8 @@ func (z *Tokenizer) nextTag() (tt TokenType, err os.Error) { panic("unreachable") } -// Next scans the next token and returns its type. -func (z *Tokenizer) Next() TokenType { - if z.err != nil { - z.tt = ErrorToken - return z.tt - } - z.p0 = z.p1 - c, err := z.readByte() - if err != nil { - z.tt, z.err = ErrorToken, err - return z.tt - } - if c == '<' { - z.tt, z.err = z.nextTag() - return z.tt - } +// nextText reads all text up until an '<'. +func (z *Tokenizer) nextText() TokenType { for { c, err := z.readByte() if err != nil { @@ -255,6 +284,31 @@ func (z *Tokenizer) Next() TokenType { panic("unreachable") } +// Next scans the next token and returns its type. +func (z *Tokenizer) Next() TokenType { + for { + if z.err != nil { + z.tt = ErrorToken + return z.tt + } + z.p0 = z.p1 + c, err := z.readByte() + if err != nil { + z.tt, z.err = ErrorToken, err + return z.tt + } + if c == '<' { + z.tt, z.err = z.nextTag() + if z.tt == CommentToken && !z.ReturnComments { + continue + } + return z.tt + } + return z.nextText() + } + panic("unreachable") +} + // trim returns the largest j such that z.buf[i:j] contains only white space, // or only white space plus the final ">" or "/>" of the raw data. func (z *Tokenizer) trim(i int) int { @@ -299,18 +353,33 @@ loop: return z.buf[i0:i], z.trim(i) } -// Text returns the raw data after unescaping. +// Text returns the unescaped text of a TextToken or a CommentToken. // The contents of the returned slice may change on the next call to Next. func (z *Tokenizer) Text() []byte { - s := unescape(z.Raw()) - z.p0 = z.p1 - return s + switch z.tt { + case TextToken: + s := unescape(z.Raw()) + z.p0 = z.p1 + return s + case CommentToken: + // We trim the "" from the right. + // "" is a valid comment, so the adjusted endpoints might overlap. + i0 := z.p0 + 4 + i1 := z.p1 - 3 + z.p0 = z.p1 + var s []byte + if i0 < i1 { + s = unescape(z.buf[i0:i1]) + } + return s + } + return nil } // TagName returns the lower-cased name of a tag token (the `img` out of -// ``), and whether the tag has attributes. +// ``) and whether the tag has attributes. // The contents of the returned slice may change on the next call to Next. -func (z *Tokenizer) TagName() (name []byte, remaining bool) { +func (z *Tokenizer) TagName() (name []byte, hasAttr bool) { i := z.p0 + 1 if i >= z.p1 { z.p0 = z.p1 @@ -320,14 +389,14 @@ func (z *Tokenizer) TagName() (name []byte, remaining bool) { i++ } name, z.p0 = z.lower(i) - remaining = z.p0 != z.p1 + hasAttr = z.p0 != z.p1 return } // TagAttr returns the lower-cased key and unescaped value of the next unparsed -// attribute for the current tag token, and whether there are more attributes. +// attribute for the current tag token and whether there are more attributes. // The contents of the returned slices may change on the next call to Next. -func (z *Tokenizer) TagAttr() (key, val []byte, remaining bool) { +func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) { key, i := z.lower(z.p0) // Get past the "=\"". if i == z.p1 || z.buf[i] != '=' { @@ -363,7 +432,7 @@ loop: } } val, z.p0 = z.buf[i:dst], z.trim(src) - remaining = z.p0 != z.p1 + moreAttr = z.p0 != z.p1 return } @@ -372,14 +441,14 @@ loop: func (z *Tokenizer) Token() Token { t := Token{Type: z.tt} switch z.tt { - case TextToken: + case TextToken, CommentToken: t.Data = string(z.Text()) case StartTagToken, EndTagToken, SelfClosingTagToken: var attr []Attribute - name, remaining := z.TagName() - for remaining { + name, moreAttr := z.TagName() + for moreAttr { var key, val []byte - key, val, remaining = z.TagAttr() + key, val, moreAttr = z.TagAttr() attr = append(attr, Attribute{string(key), string(val)}) } t.Data = string(name) diff --git a/src/pkg/html/token_test.go b/src/pkg/html/token_test.go index e07999ca5..5cf1f6dac 100644 --- a/src/pkg/html/token_test.go +++ b/src/pkg/html/token_test.go @@ -7,6 +7,7 @@ package html import ( "bytes" "os" + "strings" "testing" ) @@ -15,8 +16,8 @@ type tokenTest struct { desc string // The HTML to parse. html string - // The string representations of the expected tokens. - tokens []string + // The string representations of the expected tokens, joined by '$'. + golden string } var tokenTests = []tokenTest{ @@ -25,61 +26,86 @@ var tokenTests = []tokenTest{ { "text", "foo bar", - []string{ - "foo bar", - }, + "foo bar", }, // An entity. { "entity", "one < two", - []string{ - "one < two", - }, + "one < two", }, // A start, self-closing and end tag. The tokenizer does not care if the start // and end tokens don't match; that is the job of the parser. { "tags", "bd", - []string{ - "", - "b", - "", - "d", - "", - }, + "$b$$d$", + }, + // Comments. + { + "comment0", + "abcdef", + "abc$$$def", + }, + { + "comment1", + "az", + "a$z", + }, + { + "comment2", + "az", + "a$z", + }, + { + "comment3", + "az", + "a$z", + }, + { + "comment4", + "az", + "a$z", + }, + { + "comment5", + "az", + "a$<!>z", + }, + { + "comment6", + "az", + "a$<!->z", + }, + { + "comment7", + "a