diff options
author | Michael Stapelberg <michael@stapelberg.de> | 2013-03-23 11:28:53 +0100 |
---|---|---|
committer | Michael Stapelberg <michael@stapelberg.de> | 2013-03-23 11:28:53 +0100 |
commit | b39e15dde5ec7b96c15da9faf4ab5892501c1aae (patch) | |
tree | 718cede1f6ca97d082c6c40b7dc3f4f6148253c0 /src/pkg/encoding | |
parent | 04b08da9af0c450d645ab7389d1467308cfc2db8 (diff) | |
download | golang-upstream/1.1_hg20130323.tar.gz |
Imported Upstream version 1.1~hg20130323upstream/1.1_hg20130323
Diffstat (limited to 'src/pkg/encoding')
-rw-r--r-- | src/pkg/encoding/ascii85/ascii85.go | 1 | ||||
-rw-r--r-- | src/pkg/encoding/asn1/marshal.go | 1 | ||||
-rw-r--r-- | src/pkg/encoding/base32/base32.go | 87 | ||||
-rw-r--r-- | src/pkg/encoding/base32/base32_test.go | 106 | ||||
-rw-r--r-- | src/pkg/encoding/base64/base64.go | 63 | ||||
-rw-r--r-- | src/pkg/encoding/base64/base64_test.go | 71 | ||||
-rw-r--r-- | src/pkg/encoding/binary/varint.go | 1 | ||||
-rw-r--r-- | src/pkg/encoding/csv/reader.go | 3 | ||||
-rw-r--r-- | src/pkg/encoding/gob/codec_test.go | 6 | ||||
-rw-r--r-- | src/pkg/encoding/gob/decode.go | 1 | ||||
-rw-r--r-- | src/pkg/encoding/gob/timing_test.go | 9 | ||||
-rw-r--r-- | src/pkg/encoding/gob/type.go | 1 | ||||
-rw-r--r-- | src/pkg/encoding/json/decode.go | 13 | ||||
-rw-r--r-- | src/pkg/encoding/json/decode_test.go | 13 | ||||
-rw-r--r-- | src/pkg/encoding/xml/marshal.go | 75 | ||||
-rw-r--r-- | src/pkg/encoding/xml/marshal_test.go | 38 | ||||
-rw-r--r-- | src/pkg/encoding/xml/read.go | 6 | ||||
-rw-r--r-- | src/pkg/encoding/xml/read_test.go | 222 | ||||
-rw-r--r-- | src/pkg/encoding/xml/typeinfo.go | 41 | ||||
-rw-r--r-- | src/pkg/encoding/xml/xml.go | 28 | ||||
-rw-r--r-- | src/pkg/encoding/xml/xml_test.go | 31 |
21 files changed, 693 insertions, 124 deletions
diff --git a/src/pkg/encoding/ascii85/ascii85.go b/src/pkg/encoding/ascii85/ascii85.go index 705022792..e2afc5871 100644 --- a/src/pkg/encoding/ascii85/ascii85.go +++ b/src/pkg/encoding/ascii85/ascii85.go @@ -296,5 +296,4 @@ func (d *decoder) Read(p []byte) (n int, err error) { nn, d.readErr = d.r.Read(d.buf[d.nbuf:]) d.nbuf += nn } - panic("unreachable") } diff --git a/src/pkg/encoding/asn1/marshal.go b/src/pkg/encoding/asn1/marshal.go index 0c216fdb3..adaf80dcd 100644 --- a/src/pkg/encoding/asn1/marshal.go +++ b/src/pkg/encoding/asn1/marshal.go @@ -460,7 +460,6 @@ func marshalBody(out *forkableWriter, value reflect.Value, params fieldParameter default: return marshalUTF8String(out, v.String()) } - return } return StructuralError{"unknown Go type"} diff --git a/src/pkg/encoding/base32/base32.go b/src/pkg/encoding/base32/base32.go index dbefc48fa..fe17b7322 100644 --- a/src/pkg/encoding/base32/base32.go +++ b/src/pkg/encoding/base32/base32.go @@ -6,8 +6,10 @@ package base32 import ( + "bytes" "io" "strconv" + "strings" ) /* @@ -48,6 +50,13 @@ var StdEncoding = NewEncoding(encodeStd) // It is typically used in DNS. var HexEncoding = NewEncoding(encodeHex) +var removeNewlinesMapper = func(r rune) rune { + if r == '\r' || r == '\n' { + return -1 + } + return r +} + /* * Encoder */ @@ -228,40 +237,47 @@ func (e CorruptInputError) Error() string { // decode is like Decode but returns an additional 'end' value, which // indicates if end-of-message padding was encountered and thus any -// additional data is an error. +// additional data is an error. This method assumes that src has been +// stripped of all supported whitespace ('\r' and '\n'). func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) { - osrc := src + olen := len(src) for len(src) > 0 && !end { // Decode quantum using the base32 alphabet var dbuf [8]byte dlen := 8 - // do the top bytes contain any data? for j := 0; j < 8; { if len(src) == 0 { - return n, false, CorruptInputError(len(osrc) - len(src) - j) + return n, false, CorruptInputError(olen - len(src) - j) } in := src[0] src = src[1:] - if in == '\r' || in == '\n' { - // Ignore this character. - continue - } if in == '=' && j >= 2 && len(src) < 8 { - // We've reached the end and there's - // padding, the rest should be padded - for k := 0; k < 8-j-1; k++ { + // We've reached the end and there's padding + if len(src)+j < 8-1 { + // not enough padding + return n, false, CorruptInputError(olen) + } + for k := 0; k < 8-1-j; k++ { if len(src) > k && src[k] != '=' { - return n, false, CorruptInputError(len(osrc) - len(src) + k - 1) + // incorrect padding + return n, false, CorruptInputError(olen - len(src) + k - 1) } } - dlen = j - end = true + dlen, end = j, true + // 7, 5 and 2 are not valid padding lengths, and so 1, 3 and 6 are not + // valid dlen values. See RFC 4648 Section 6 "Base 32 Encoding" listing + // the five valid padding lengths, and Section 9 "Illustrations and + // Examples" for an illustration for how the the 1st, 3rd and 6th base32 + // src bytes do not yield enough information to decode a dst byte. + if dlen == 1 || dlen == 3 || dlen == 6 { + return n, false, CorruptInputError(olen - len(src) - 1) + } break } dbuf[j] = enc.decodeMap[in] if dbuf[j] == 0xFF { - return n, false, CorruptInputError(len(osrc) - len(src) - 1) + return n, false, CorruptInputError(olen - len(src) - 1) } j++ } @@ -269,16 +285,16 @@ func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) { // Pack 8x 5-bit source blocks into 5 byte destination // quantum switch dlen { - case 7, 8: + case 8: dst[4] = dbuf[6]<<5 | dbuf[7] fallthrough - case 6, 5: + case 7: dst[3] = dbuf[4]<<7 | dbuf[5]<<2 | dbuf[6]>>3 fallthrough - case 4: + case 5: dst[2] = dbuf[3]<<4 | dbuf[4]>>1 fallthrough - case 3: + case 4: dst[1] = dbuf[1]<<6 | dbuf[2]<<1 | dbuf[3]>>4 fallthrough case 2: @@ -288,11 +304,11 @@ func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) { switch dlen { case 2: n += 1 - case 3, 4: + case 4: n += 2 case 5: n += 3 - case 6, 7: + case 7: n += 4 case 8: n += 5 @@ -307,12 +323,14 @@ func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) { // number of bytes successfully written and CorruptInputError. // New line characters (\r and \n) are ignored. func (enc *Encoding) Decode(dst, src []byte) (n int, err error) { + src = bytes.Map(removeNewlinesMapper, src) n, _, err = enc.decode(dst, src) return } // DecodeString returns the bytes represented by the base32 string s. func (enc *Encoding) DecodeString(s string) ([]byte, error) { + s = strings.Map(removeNewlinesMapper, s) dbuf := make([]byte, enc.DecodedLen(len(s))) n, err := enc.Decode(dbuf, []byte(s)) return dbuf[:n], err @@ -377,9 +395,34 @@ func (d *decoder) Read(p []byte) (n int, err error) { return n, d.err } +type newlineFilteringReader struct { + wrapped io.Reader +} + +func (r *newlineFilteringReader) Read(p []byte) (int, error) { + n, err := r.wrapped.Read(p) + for n > 0 { + offset := 0 + for i, b := range p[0:n] { + if b != '\r' && b != '\n' { + if i != offset { + p[offset] = b + } + offset++ + } + } + if offset > 0 { + return offset, err + } + // Previous buffer entirely whitespace, read again + n, err = r.wrapped.Read(p) + } + return n, err +} + // NewDecoder constructs a new base32 stream decoder. func NewDecoder(enc *Encoding, r io.Reader) io.Reader { - return &decoder{enc: enc, r: r} + return &decoder{enc: enc, r: &newlineFilteringReader{r}} } // DecodedLen returns the maximum length in bytes of the decoded data diff --git a/src/pkg/encoding/base32/base32_test.go b/src/pkg/encoding/base32/base32_test.go index 98365e18c..63298d1c9 100644 --- a/src/pkg/encoding/base32/base32_test.go +++ b/src/pkg/encoding/base32/base32_test.go @@ -8,6 +8,7 @@ import ( "bytes" "io" "io/ioutil" + "strings" "testing" ) @@ -137,27 +138,48 @@ func TestDecoderBuffering(t *testing.T) { } func TestDecodeCorrupt(t *testing.T) { - type corrupt struct { - e string - p int - } - examples := []corrupt{ + testCases := []struct { + input string + offset int // -1 means no corruption. + }{ + {"", -1}, {"!!!!", 0}, {"x===", 0}, {"AA=A====", 2}, {"AAA=AAAA", 3}, {"MMMMMMMMM", 8}, {"MMMMMM", 0}, + {"A=", 1}, + {"AA=", 3}, + {"AA==", 4}, + {"AA===", 5}, + {"AAAA=", 5}, + {"AAAA==", 6}, + {"AAAAA=", 6}, + {"AAAAA==", 7}, + {"A=======", 1}, + {"AA======", -1}, + {"AAA=====", 3}, + {"AAAA====", -1}, + {"AAAAA===", -1}, + {"AAAAAA==", 6}, + {"AAAAAAA=", -1}, + {"AAAAAAAA", -1}, } - - for _, e := range examples { - dbuf := make([]byte, StdEncoding.DecodedLen(len(e.e))) - _, err := StdEncoding.Decode(dbuf, []byte(e.e)) + for _, tc := range testCases { + dbuf := make([]byte, StdEncoding.DecodedLen(len(tc.input))) + _, err := StdEncoding.Decode(dbuf, []byte(tc.input)) + if tc.offset == -1 { + if err != nil { + t.Error("Decoder wrongly detected coruption in", tc.input) + } + continue + } switch err := err.(type) { case CorruptInputError: - testEqual(t, "Corruption in %q at offset %v, want %v", e.e, int(err), e.p) + testEqual(t, "Corruption in %q at offset %v, want %v", tc.input, int(err), tc.offset) default: - t.Error("Decoder failed to detect corruption in", e) + t.Error("Decoder failed to detect corruption in", tc) } } } @@ -195,9 +217,21 @@ func TestBig(t *testing.T) { } } +func testStringEncoding(t *testing.T, expected string, examples []string) { + for _, e := range examples { + buf, err := StdEncoding.DecodeString(e) + if err != nil { + t.Errorf("Decode(%q) failed: %v", e, err) + continue + } + if s := string(buf); s != expected { + t.Errorf("Decode(%q) = %q, want %q", e, s, expected) + } + } +} + func TestNewLineCharacters(t *testing.T) { // Each of these should decode to the string "sure", without errors. - const expected = "sure" examples := []string{ "ON2XEZI=", "ON2XEZI=\r", @@ -209,14 +243,44 @@ func TestNewLineCharacters(t *testing.T) { "ON2XEZ\nI=", "ON2XEZI\n=", } - for _, e := range examples { - buf, err := StdEncoding.DecodeString(e) - if err != nil { - t.Errorf("Decode(%q) failed: %v", e, err) - continue - } - if s := string(buf); s != expected { - t.Errorf("Decode(%q) = %q, want %q", e, s, expected) - } + testStringEncoding(t, "sure", examples) + + // Each of these should decode to the string "foobar", without errors. + examples = []string{ + "MZXW6YTBOI======", + "MZXW6YTBOI=\r\n=====", + } + testStringEncoding(t, "foobar", examples) +} + +func TestDecoderIssue4779(t *testing.T) { + encoded := `JRXXEZLNEBUXA43VNUQGI33MN5ZCA43JOQQGC3LFOQWCAY3PNZZWKY3UMV2HK4 +RAMFSGS4DJONUWG2LOM4QGK3DJOQWCA43FMQQGI3YKMVUXK43NN5SCA5DFNVYG64RANFXGG2LENFSH +K3TUEB2XIIDMMFRG64TFEBSXIIDEN5WG64TFEBWWCZ3OMEQGC3DJOF2WCLRAKV2CAZLONFWQUYLEEB +WWS3TJNUQHMZLONFQW2LBAOF2WS4ZANZXXG5DSOVSCAZLYMVZGG2LUMF2GS33OEB2WY3DBNVRW6IDM +MFRG64TJOMQG42LTNEQHK5AKMFWGS4LVNFYCAZLYEBSWCIDDN5WW233EN4QGG33OONSXC5LBOQXCAR +DVNFZSAYLVORSSA2LSOVZGKIDEN5WG64RANFXAU4TFOBZGK2DFNZSGK4TJOQQGS3RAOZXWY5LQORQX +IZJAOZSWY2LUEBSXG43FEBRWS3DMOVWSAZDPNRXXEZJAMV2SAZTVM5UWC5BANZ2WY3DBBJYGC4TJMF +2HK4ROEBCXQY3FOB2GK5LSEBZWS3TUEBXWGY3BMVRWC5BAMN2XA2LEMF2GC5BANZXW4IDQOJXWSZDF +NZ2CYIDTOVXHIIDJNYFGG5LMOBQSA4LVNEQG6ZTGNFRWSYJAMRSXGZLSOVXHIIDNN5WGY2LUEBQW42 +LNEBUWIIDFON2CA3DBMJXXE5LNFY== +====` + encodedShort := strings.Replace(encoded, "\n", "", -1) + + dec := NewDecoder(StdEncoding, bytes.NewBufferString(encoded)) + res1, err := ioutil.ReadAll(dec) + if err != nil { + t.Errorf("ReadAll failed: %v", err) + } + + dec = NewDecoder(StdEncoding, bytes.NewBufferString(encodedShort)) + var res2 []byte + res2, err = ioutil.ReadAll(dec) + if err != nil { + t.Errorf("ReadAll failed: %v", err) + } + + if !bytes.Equal(res1, res2) { + t.Error("Decoded results not equal") } } diff --git a/src/pkg/encoding/base64/base64.go b/src/pkg/encoding/base64/base64.go index e66672a1c..85e398fd0 100644 --- a/src/pkg/encoding/base64/base64.go +++ b/src/pkg/encoding/base64/base64.go @@ -6,8 +6,10 @@ package base64 import ( + "bytes" "io" "strconv" + "strings" ) /* @@ -49,6 +51,13 @@ var StdEncoding = NewEncoding(encodeStd) // It is typically used in URLs and file names. var URLEncoding = NewEncoding(encodeURL) +var removeNewlinesMapper = func(r rune) rune { + if r == '\r' || r == '\n' { + return -1 + } + return r +} + /* * Encoder */ @@ -208,9 +217,10 @@ func (e CorruptInputError) Error() string { // decode is like Decode but returns an additional 'end' value, which // indicates if end-of-message padding was encountered and thus any -// additional data is an error. +// additional data is an error. This method assumes that src has been +// stripped of all supported whitespace ('\r' and '\n'). func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) { - osrc := src + olen := len(src) for len(src) > 0 && !end { // Decode quantum using the base64 alphabet var dbuf [4]byte @@ -218,32 +228,26 @@ func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) { for j := 0; j < 4; { if len(src) == 0 { - return n, false, CorruptInputError(len(osrc) - len(src) - j) + return n, false, CorruptInputError(olen - len(src) - j) } in := src[0] src = src[1:] - if in == '\r' || in == '\n' { - // Ignore this character. - continue - } if in == '=' && j >= 2 && len(src) < 4 { - // We've reached the end and there's - // padding - if len(src) == 0 && j == 2 { + // We've reached the end and there's padding + if len(src)+j < 4-1 { // not enough padding - return n, false, CorruptInputError(len(osrc)) + return n, false, CorruptInputError(olen) } if len(src) > 0 && src[0] != '=' { // incorrect padding - return n, false, CorruptInputError(len(osrc) - len(src) - 1) + return n, false, CorruptInputError(olen - len(src) - 1) } - dlen = j - end = true + dlen, end = j, true break } dbuf[j] = enc.decodeMap[in] if dbuf[j] == 0xFF { - return n, false, CorruptInputError(len(osrc) - len(src) - 1) + return n, false, CorruptInputError(olen - len(src) - 1) } j++ } @@ -273,12 +277,14 @@ func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) { // number of bytes successfully written and CorruptInputError. // New line characters (\r and \n) are ignored. func (enc *Encoding) Decode(dst, src []byte) (n int, err error) { + src = bytes.Map(removeNewlinesMapper, src) n, _, err = enc.decode(dst, src) return } // DecodeString returns the bytes represented by the base64 string s. func (enc *Encoding) DecodeString(s string) ([]byte, error) { + s = strings.Map(removeNewlinesMapper, s) dbuf := make([]byte, enc.DecodedLen(len(s))) n, err := enc.Decode(dbuf, []byte(s)) return dbuf[:n], err @@ -343,9 +349,34 @@ func (d *decoder) Read(p []byte) (n int, err error) { return n, d.err } +type newlineFilteringReader struct { + wrapped io.Reader +} + +func (r *newlineFilteringReader) Read(p []byte) (int, error) { + n, err := r.wrapped.Read(p) + for n > 0 { + offset := 0 + for i, b := range p[0:n] { + if b != '\r' && b != '\n' { + if i != offset { + p[offset] = b + } + offset++ + } + } + if offset > 0 { + return offset, err + } + // Previous buffer entirely whitespace, read again + n, err = r.wrapped.Read(p) + } + return n, err +} + // NewDecoder constructs a new base64 stream decoder. func NewDecoder(enc *Encoding, r io.Reader) io.Reader { - return &decoder{enc: enc, r: r} + return &decoder{enc: enc, r: &newlineFilteringReader{r}} } // DecodedLen returns the maximum length in bytes of the decoded data diff --git a/src/pkg/encoding/base64/base64_test.go b/src/pkg/encoding/base64/base64_test.go index 2166abd7a..579591a88 100644 --- a/src/pkg/encoding/base64/base64_test.go +++ b/src/pkg/encoding/base64/base64_test.go @@ -9,6 +9,7 @@ import ( "errors" "io" "io/ioutil" + "strings" "testing" "time" ) @@ -142,11 +143,11 @@ func TestDecoderBuffering(t *testing.T) { } func TestDecodeCorrupt(t *testing.T) { - type corrupt struct { - e string - p int - } - examples := []corrupt{ + testCases := []struct { + input string + offset int // -1 means no corruption. + }{ + {"", -1}, {"!!!!", 0}, {"x===", 1}, {"AA=A", 2}, @@ -154,18 +155,27 @@ func TestDecodeCorrupt(t *testing.T) { {"AAAAA", 4}, {"AAAAAA", 4}, {"A=", 1}, + {"A==", 1}, {"AA=", 3}, + {"AA==", -1}, + {"AAA=", -1}, + {"AAAA", -1}, {"AAAAAA=", 7}, } - - for _, e := range examples { - dbuf := make([]byte, StdEncoding.DecodedLen(len(e.e))) - _, err := StdEncoding.Decode(dbuf, []byte(e.e)) + for _, tc := range testCases { + dbuf := make([]byte, StdEncoding.DecodedLen(len(tc.input))) + _, err := StdEncoding.Decode(dbuf, []byte(tc.input)) + if tc.offset == -1 { + if err != nil { + t.Error("Decoder wrongly detected coruption in", tc.input) + } + continue + } switch err := err.(type) { case CorruptInputError: - testEqual(t, "Corruption in %q at offset %v, want %v", e.e, int(err), e.p) + testEqual(t, "Corruption in %q at offset %v, want %v", tc.input, int(err), tc.offset) default: - t.Error("Decoder failed to detect corruption in", e) + t.Error("Decoder failed to detect corruption in", tc) } } } @@ -216,6 +226,8 @@ func TestNewLineCharacters(t *testing.T) { "c3V\nyZ\rQ==", "c3VyZ\nQ==", "c3VyZQ\n==", + "c3VyZQ=\n=", + "c3VyZQ=\r\n\r\n=", } for _, e := range examples { buf, err := StdEncoding.DecodeString(e) @@ -276,3 +288,40 @@ func TestDecoderIssue3577(t *testing.T) { t.Errorf("timeout; Decoder blocked without returning an error") } } + +func TestDecoderIssue4779(t *testing.T) { + encoded := `CP/EAT8AAAEF +AQEBAQEBAAAAAAAAAAMAAQIEBQYHCAkKCwEAAQUBAQEBAQEAAAAAAAAAAQACAwQFBgcICQoLEAAB +BAEDAgQCBQcGCAUDDDMBAAIRAwQhEjEFQVFhEyJxgTIGFJGhsUIjJBVSwWIzNHKC0UMHJZJT8OHx +Y3M1FqKygyZEk1RkRcKjdDYX0lXiZfKzhMPTdePzRieUpIW0lcTU5PSltcXV5fVWZnaGlqa2xtbm +9jdHV2d3h5ent8fX5/cRAAICAQIEBAMEBQYHBwYFNQEAAhEDITESBEFRYXEiEwUygZEUobFCI8FS +0fAzJGLhcoKSQ1MVY3M08SUGFqKygwcmNcLSRJNUoxdkRVU2dGXi8rOEw9N14/NGlKSFtJXE1OT0 +pbXF1eX1VmZ2hpamtsbW5vYnN0dXZ3eHl6e3x//aAAwDAQACEQMRAD8A9VSSSSUpJJJJSkkkJ+Tj +1kiy1jCJJDnAcCTykpKkuQ6p/jN6FgmxlNduXawwAzaGH+V6jn/R/wCt71zdn+N/qL3kVYFNYB4N +ji6PDVjWpKp9TSXnvTf8bFNjg3qOEa2n6VlLpj/rT/pf567DpX1i6L1hs9Py67X8mqdtg/rUWbbf ++gkp0kkkklKSSSSUpJJJJT//0PVUkkklKVLq3WMDpGI7KzrNjADtYNXvI/Mqr/Pd/q9W3vaxjnvM +NaCXE9gNSvGPrf8AWS3qmba5jjsJhoB0DAf0NDf6sevf+/lf8Hj0JJATfWT6/dV6oXU1uOLQeKKn +EQP+Hubtfe/+R7Mf/g7f5xcocp++Z11JMCJPgFBxOg7/AOuqDx8I/ikpkXkmSdU8mJIJA/O8EMAy +j+mSARB/17pKVXYWHXjsj7yIex0PadzXMO1zT5KHoNA3HT8ietoGhgjsfA+CSnvvqh/jJtqsrwOv +2b6NGNzXfTYexzJ+nU7/ALkf4P8Awv6P9KvTQQ4AgyDqCF85Pho3CTB7eHwXoH+LT65uZbX9X+o2 +bqbPb06551Y4 +` + encodedShort := strings.Replace(encoded, "\n", "", -1) + + dec := NewDecoder(StdEncoding, bytes.NewBufferString(encoded)) + res1, err := ioutil.ReadAll(dec) + if err != nil { + t.Errorf("ReadAll failed: %v", err) + } + + dec = NewDecoder(StdEncoding, bytes.NewBufferString(encodedShort)) + var res2 []byte + res2, err = ioutil.ReadAll(dec) + if err != nil { + t.Errorf("ReadAll failed: %v", err) + } + + if !bytes.Equal(res1, res2) { + t.Error("Decoded results not equal") + } +} diff --git a/src/pkg/encoding/binary/varint.go b/src/pkg/encoding/binary/varint.go index 7035529f2..3a2dfa3c7 100644 --- a/src/pkg/encoding/binary/varint.go +++ b/src/pkg/encoding/binary/varint.go @@ -120,7 +120,6 @@ func ReadUvarint(r io.ByteReader) (uint64, error) { x |= uint64(b&0x7f) << s s += 7 } - panic("unreachable") } // ReadVarint reads an encoded signed integer from r and returns it as an int64. diff --git a/src/pkg/encoding/csv/reader.go b/src/pkg/encoding/csv/reader.go index db4d98852..b099caf60 100644 --- a/src/pkg/encoding/csv/reader.go +++ b/src/pkg/encoding/csv/reader.go @@ -171,7 +171,6 @@ func (r *Reader) ReadAll() (records [][]string, err error) { } records = append(records, record) } - panic("unreachable") } // readRune reads one rune from r, folding \r\n to \n and keeping track @@ -213,7 +212,6 @@ func (r *Reader) skip(delim rune) error { return nil } } - panic("unreachable") } // parseRecord reads and parses a single csv record from r. @@ -250,7 +248,6 @@ func (r *Reader) parseRecord() (fields []string, err error) { return nil, err } } - panic("unreachable") } // parseField parses the next field in the record. The read field is diff --git a/src/pkg/encoding/gob/codec_test.go b/src/pkg/encoding/gob/codec_test.go index 482212b74..9e38e31d5 100644 --- a/src/pkg/encoding/gob/codec_test.go +++ b/src/pkg/encoding/gob/codec_test.go @@ -1191,10 +1191,8 @@ func TestInterface(t *testing.T) { if v1 != nil || v2 != nil { t.Errorf("item %d inconsistent nils", i) } - continue - if v1.Square() != v2.Square() { - t.Errorf("item %d inconsistent values: %v %v", i, v1, v2) - } + } else if v1.Square() != v2.Square() { + t.Errorf("item %d inconsistent values: %v %v", i, v1, v2) } } } diff --git a/src/pkg/encoding/gob/decode.go b/src/pkg/encoding/gob/decode.go index a80d9f919..7cc756540 100644 --- a/src/pkg/encoding/gob/decode.go +++ b/src/pkg/encoding/gob/decode.go @@ -1066,7 +1066,6 @@ func (dec *Decoder) compatibleType(fr reflect.Type, fw typeId, inProgress map[re case reflect.Struct: return true } - return true } // typeString returns a human-readable description of the type identified by remoteId. diff --git a/src/pkg/encoding/gob/timing_test.go b/src/pkg/encoding/gob/timing_test.go index 13eb11925..f589675dd 100644 --- a/src/pkg/encoding/gob/timing_test.go +++ b/src/pkg/encoding/gob/timing_test.go @@ -9,6 +9,7 @@ import ( "fmt" "io" "os" + "runtime" "testing" ) @@ -49,6 +50,10 @@ func BenchmarkEndToEndByteBuffer(b *testing.B) { } func TestCountEncodeMallocs(t *testing.T) { + if runtime.GOMAXPROCS(0) > 1 { + t.Skip("skipping; GOMAXPROCS>1") + } + const N = 1000 var buf bytes.Buffer @@ -65,6 +70,10 @@ func TestCountEncodeMallocs(t *testing.T) { } func TestCountDecodeMallocs(t *testing.T) { + if runtime.GOMAXPROCS(0) > 1 { + t.Skip("skipping; GOMAXPROCS>1") + } + const N = 1000 var buf bytes.Buffer diff --git a/src/pkg/encoding/gob/type.go b/src/pkg/encoding/gob/type.go index ea0db4eac..7fa0b499f 100644 --- a/src/pkg/encoding/gob/type.go +++ b/src/pkg/encoding/gob/type.go @@ -526,7 +526,6 @@ func newTypeObject(name string, ut *userTypeInfo, rt reflect.Type) (gobType, err default: return nil, errors.New("gob NewTypeObject can't handle type: " + rt.String()) } - return nil, nil } // isExported reports whether this is an exported - upper case - name. diff --git a/src/pkg/encoding/json/decode.go b/src/pkg/encoding/json/decode.go index f2ec9cb67..62ac294b8 100644 --- a/src/pkg/encoding/json/decode.go +++ b/src/pkg/encoding/json/decode.go @@ -261,6 +261,16 @@ func (d *decodeState) value(v reflect.Value) { } d.scan.step(&d.scan, '"') d.scan.step(&d.scan, '"') + + n := len(d.scan.parseState) + if n > 0 && d.scan.parseState[n-1] == parseObjectKey { + // d.scan thinks we just read an object key; finish the object + d.scan.step(&d.scan, ':') + d.scan.step(&d.scan, '"') + d.scan.step(&d.scan, '"') + d.scan.step(&d.scan, '}') + } + return } @@ -739,6 +749,7 @@ func (d *decodeState) valueInterface() interface{} { switch d.scanWhile(scanSkipSpace) { default: d.error(errPhase) + panic("unreachable") case scanBeginArray: return d.arrayInterface() case scanBeginObject: @@ -746,7 +757,6 @@ func (d *decodeState) valueInterface() interface{} { case scanBeginLiteral: return d.literalInterface() } - panic("unreachable") } // arrayInterface is like array but returns []interface{}. @@ -858,7 +868,6 @@ func (d *decodeState) literalInterface() interface{} { } return n } - panic("unreachable") } // getu4 decodes \uXXXX from the beginning of s, returning the hex value, diff --git a/src/pkg/encoding/json/decode_test.go b/src/pkg/encoding/json/decode_test.go index e1bd918dd..037c5b236 100644 --- a/src/pkg/encoding/json/decode_test.go +++ b/src/pkg/encoding/json/decode_test.go @@ -1178,3 +1178,16 @@ func TestUnmarshalJSONLiteralError(t *testing.T) { t.Errorf("got err = %v; want out of range error", err) } } + +// Test that extra object elements in an array do not result in a +// "data changing underfoot" error. +// Issue 3717 +func TestSkipArrayObjects(t *testing.T) { + json := `[{}]` + var dest [0]interface{} + + err := Unmarshal([]byte(json), &dest) + if err != nil { + t.Errorf("got error %q, want nil", err) + } +} diff --git a/src/pkg/encoding/xml/marshal.go b/src/pkg/encoding/xml/marshal.go index ea58ce254..47b001763 100644 --- a/src/pkg/encoding/xml/marshal.go +++ b/src/pkg/encoding/xml/marshal.go @@ -120,11 +120,76 @@ func (enc *Encoder) Encode(v interface{}) error { type printer struct { *bufio.Writer + seq int indent string prefix string depth int indentedIn bool putNewline bool + attrNS map[string]string // map prefix -> name space + attrPrefix map[string]string // map name space -> prefix +} + +// createAttrPrefix finds the name space prefix attribute to use for the given name space, +// defining a new prefix if necessary. It returns the prefix and whether it is new. +func (p *printer) createAttrPrefix(url string) (prefix string, isNew bool) { + if prefix = p.attrPrefix[url]; prefix != "" { + return prefix, false + } + + // The "http://www.w3.org/XML/1998/namespace" name space is predefined as "xml" + // and must be referred to that way. + // (The "http://www.w3.org/2000/xmlns/" name space is also predefined as "xmlns", + // but users should not be trying to use that one directly - that's our job.) + if url == xmlURL { + return "xml", false + } + + // Need to define a new name space. + if p.attrPrefix == nil { + p.attrPrefix = make(map[string]string) + p.attrNS = make(map[string]string) + } + + // Pick a name. We try to use the final element of the path + // but fall back to _. + prefix = strings.TrimRight(url, "/") + if i := strings.LastIndex(prefix, "/"); i >= 0 { + prefix = prefix[i+1:] + } + if prefix == "" || !isName([]byte(prefix)) || strings.Contains(prefix, ":") { + prefix = "_" + } + if strings.HasPrefix(prefix, "xml") { + // xmlanything is reserved. + prefix = "_" + prefix + } + if p.attrNS[prefix] != "" { + // Name is taken. Find a better one. + for p.seq++; ; p.seq++ { + if id := prefix + "_" + strconv.Itoa(p.seq); p.attrNS[id] == "" { + prefix = id + break + } + } + } + + p.attrPrefix[url] = prefix + p.attrNS[prefix] = url + + p.WriteString(`xmlns:`) + p.WriteString(prefix) + p.WriteString(`="`) + EscapeText(p, []byte(url)) + p.WriteString(`" `) + + return prefix, true +} + +// deleteAttrPrefix removes an attribute name space prefix. +func (p *printer) deleteAttrPrefix(prefix string) { + delete(p.attrPrefix, p.attrNS[prefix]) + delete(p.attrNS, prefix) } // marshalValue writes one or more XML elements representing val. @@ -210,6 +275,14 @@ func (p *printer) marshalValue(val reflect.Value, finfo *fieldInfo) error { continue } p.WriteByte(' ') + if finfo.xmlns != "" { + prefix, created := p.createAttrPrefix(finfo.xmlns) + if created { + defer p.deleteAttrPrefix(prefix) + } + p.WriteString(prefix) + p.WriteByte(':') + } p.WriteString(finfo.name) p.WriteString(`="`) if err := p.marshalSimple(fv.Type(), fv); err != nil { @@ -286,7 +359,7 @@ func (p *printer) marshalStruct(tinfo *typeInfo, val reflect.Value) error { s := parentStack{printer: p} for i := range tinfo.fields { finfo := &tinfo.fields[i] - if finfo.flags&(fAttr) != 0 { + if finfo.flags&fAttr != 0 { continue } vf := finfo.value(val) diff --git a/src/pkg/encoding/xml/marshal_test.go b/src/pkg/encoding/xml/marshal_test.go index 3a190def6..ca14a1e53 100644 --- a/src/pkg/encoding/xml/marshal_test.go +++ b/src/pkg/encoding/xml/marshal_test.go @@ -266,6 +266,16 @@ type Plain struct { V interface{} } +type MyInt int + +type EmbedInt struct { + MyInt +} + +type Strings struct { + X []string `xml:"A>B,omitempty"` +} + // Unless explicitly stated as such (or *Plain), all of the // tests below are two-way tests. When introducing new tests, // please try to make them two-way as well to ensure that @@ -790,6 +800,17 @@ var marshalTests = []struct { }, UnmarshalOnly: true, }, + { + ExpectXML: `<EmbedInt><MyInt>42</MyInt></EmbedInt>`, + Value: &EmbedInt{ + MyInt: 42, + }, + }, + // Test omitempty with parent chain; see golang.org/issue/4168. + { + ExpectXML: `<Strings><A></A></Strings>`, + Value: &Strings{}, + }, } func TestMarshal(t *testing.T) { @@ -812,6 +833,10 @@ func TestMarshal(t *testing.T) { } } +type AttrParent struct { + X string `xml:"X>Y,attr"` +} + var marshalErrorTests = []struct { Value interface{} Err string @@ -839,6 +864,11 @@ var marshalErrorTests = []struct { Value: &Domain{Comment: []byte("f--bar")}, Err: `xml: comments must not contain "--"`, }, + // Reject parent chain with attr, never worked; see golang.org/issue/5033. + { + Value: &AttrParent{}, + Err: `xml: X>Y chain not valid with attr flag`, + }, } var marshalIndentTests = []struct { @@ -861,8 +891,12 @@ var marshalIndentTests = []struct { func TestMarshalErrors(t *testing.T) { for idx, test := range marshalErrorTests { - _, err := Marshal(test.Value) - if err == nil || err.Error() != test.Err { + data, err := Marshal(test.Value) + if err == nil { + t.Errorf("#%d: marshal(%#v) = [success] %q, want error %v", idx, test.Value, data, test.Err) + continue + } + if err.Error() != test.Err { t.Errorf("#%d: marshal(%#v) = [error] %v, want %v", idx, test.Value, err, test.Err) } if test.Kind != reflect.Invalid { diff --git a/src/pkg/encoding/xml/read.go b/src/pkg/encoding/xml/read.go index 344ab514e..a7a2a9655 100644 --- a/src/pkg/encoding/xml/read.go +++ b/src/pkg/encoding/xml/read.go @@ -263,7 +263,7 @@ func (p *Decoder) unmarshal(val reflect.Value, start *StartElement) error { strv := finfo.value(sv) // Look for attribute. for _, a := range start.Attr { - if a.Name.Local == finfo.name { + if a.Name.Local == finfo.name && (finfo.xmlns == "" || finfo.xmlns == a.Name.Space) { copyValue(strv, []byte(a.Value)) break } @@ -441,7 +441,7 @@ func (p *Decoder) unmarshalPath(tinfo *typeInfo, sv reflect.Value, parents []str Loop: for i := range tinfo.fields { finfo := &tinfo.fields[i] - if finfo.flags&fElement == 0 || len(finfo.parents) < len(parents) { + if finfo.flags&fElement == 0 || len(finfo.parents) < len(parents) || finfo.xmlns != "" && finfo.xmlns != start.Name.Space { continue } for j := range parents { @@ -493,7 +493,6 @@ Loop: return true, nil } } - panic("unreachable") } // Skip reads tokens until it has consumed the end element @@ -517,5 +516,4 @@ func (d *Decoder) Skip() error { return nil } } - panic("unreachable") } diff --git a/src/pkg/encoding/xml/read_test.go b/src/pkg/encoding/xml/read_test.go index b45e2f0e6..7d28c5d7d 100644 --- a/src/pkg/encoding/xml/read_test.go +++ b/src/pkg/encoding/xml/read_test.go @@ -6,6 +6,7 @@ package xml import ( "reflect" + "strings" "testing" "time" ) @@ -399,3 +400,224 @@ func TestUnmarshalAttr(t *testing.T) { t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p3.Int, 1) } } + +type Tables struct { + HTable string `xml:"http://www.w3.org/TR/html4/ table"` + FTable string `xml:"http://www.w3schools.com/furniture table"` +} + +var tables = []struct { + xml string + tab Tables + ns string +}{ + { + xml: `<Tables>` + + `<table xmlns="http://www.w3.org/TR/html4/">hello</table>` + + `<table xmlns="http://www.w3schools.com/furniture">world</table>` + + `</Tables>`, + tab: Tables{"hello", "world"}, + }, + { + xml: `<Tables>` + + `<table xmlns="http://www.w3schools.com/furniture">world</table>` + + `<table xmlns="http://www.w3.org/TR/html4/">hello</table>` + + `</Tables>`, + tab: Tables{"hello", "world"}, + }, + { + xml: `<Tables xmlns:f="http://www.w3schools.com/furniture" xmlns:h="http://www.w3.org/TR/html4/">` + + `<f:table>world</f:table>` + + `<h:table>hello</h:table>` + + `</Tables>`, + tab: Tables{"hello", "world"}, + }, + { + xml: `<Tables>` + + `<table>bogus</table>` + + `</Tables>`, + tab: Tables{}, + }, + { + xml: `<Tables>` + + `<table>only</table>` + + `</Tables>`, + tab: Tables{HTable: "only"}, + ns: "http://www.w3.org/TR/html4/", + }, + { + xml: `<Tables>` + + `<table>only</table>` + + `</Tables>`, + tab: Tables{FTable: "only"}, + ns: "http://www.w3schools.com/furniture", + }, + { + xml: `<Tables>` + + `<table>only</table>` + + `</Tables>`, + tab: Tables{}, + ns: "something else entirely", + }, +} + +func TestUnmarshalNS(t *testing.T) { + for i, tt := range tables { + var dst Tables + var err error + if tt.ns != "" { + d := NewDecoder(strings.NewReader(tt.xml)) + d.DefaultSpace = tt.ns + err = d.Decode(&dst) + } else { + err = Unmarshal([]byte(tt.xml), &dst) + } + if err != nil { + t.Errorf("#%d: Unmarshal: %v", i, err) + continue + } + want := tt.tab + if dst != want { + t.Errorf("#%d: dst=%+v, want %+v", i, dst, want) + } + } +} + +func TestMarshalNS(t *testing.T) { + dst := Tables{"hello", "world"} + data, err := Marshal(&dst) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + want := `<Tables><table xmlns="http://www.w3.org/TR/html4/">hello</table><table xmlns="http://www.w3schools.com/furniture">world</table></Tables>` + str := string(data) + if str != want { + t.Errorf("have: %q\nwant: %q\n", str, want) + } +} + +type TableAttrs struct { + TAttr TAttr +} + +type TAttr struct { + HTable string `xml:"http://www.w3.org/TR/html4/ table,attr"` + FTable string `xml:"http://www.w3schools.com/furniture table,attr"` + Lang string `xml:"http://www.w3.org/XML/1998/namespace lang,attr,omitempty"` + Other1 string `xml:"http://golang.org/xml/ other,attr,omitempty"` + Other2 string `xml:"http://golang.org/xmlfoo/ other,attr,omitempty"` + Other3 string `xml:"http://golang.org/json/ other,attr,omitempty"` + Other4 string `xml:"http://golang.org/2/json/ other,attr,omitempty"` +} + +var tableAttrs = []struct { + xml string + tab TableAttrs + ns string +}{ + { + xml: `<TableAttrs xmlns:f="http://www.w3schools.com/furniture" xmlns:h="http://www.w3.org/TR/html4/"><TAttr ` + + `h:table="hello" f:table="world" ` + + `/></TableAttrs>`, + tab: TableAttrs{TAttr{HTable: "hello", FTable: "world"}}, + }, + { + xml: `<TableAttrs><TAttr xmlns:f="http://www.w3schools.com/furniture" xmlns:h="http://www.w3.org/TR/html4/" ` + + `h:table="hello" f:table="world" ` + + `/></TableAttrs>`, + tab: TableAttrs{TAttr{HTable: "hello", FTable: "world"}}, + }, + { + xml: `<TableAttrs><TAttr ` + + `h:table="hello" f:table="world" xmlns:f="http://www.w3schools.com/furniture" xmlns:h="http://www.w3.org/TR/html4/" ` + + `/></TableAttrs>`, + tab: TableAttrs{TAttr{HTable: "hello", FTable: "world"}}, + }, + { + // Default space does not apply to attribute names. + xml: `<TableAttrs xmlns="http://www.w3schools.com/furniture" xmlns:h="http://www.w3.org/TR/html4/"><TAttr ` + + `h:table="hello" table="world" ` + + `/></TableAttrs>`, + tab: TableAttrs{TAttr{HTable: "hello", FTable: ""}}, + }, + { + // Default space does not apply to attribute names. + xml: `<TableAttrs xmlns:f="http://www.w3schools.com/furniture"><TAttr xmlns="http://www.w3.org/TR/html4/" ` + + `table="hello" f:table="world" ` + + `/></TableAttrs>`, + tab: TableAttrs{TAttr{HTable: "", FTable: "world"}}, + }, + { + xml: `<TableAttrs><TAttr ` + + `table="bogus" ` + + `/></TableAttrs>`, + tab: TableAttrs{}, + }, + { + // Default space does not apply to attribute names. + xml: `<TableAttrs xmlns:h="http://www.w3.org/TR/html4/"><TAttr ` + + `h:table="hello" table="world" ` + + `/></TableAttrs>`, + tab: TableAttrs{TAttr{HTable: "hello", FTable: ""}}, + ns: "http://www.w3schools.com/furniture", + }, + { + // Default space does not apply to attribute names. + xml: `<TableAttrs xmlns:f="http://www.w3schools.com/furniture"><TAttr ` + + `table="hello" f:table="world" ` + + `/></TableAttrs>`, + tab: TableAttrs{TAttr{HTable: "", FTable: "world"}}, + ns: "http://www.w3.org/TR/html4/", + }, + { + xml: `<TableAttrs><TAttr ` + + `table="bogus" ` + + `/></TableAttrs>`, + tab: TableAttrs{}, + ns: "something else entirely", + }, +} + +func TestUnmarshalNSAttr(t *testing.T) { + for i, tt := range tableAttrs { + var dst TableAttrs + var err error + if tt.ns != "" { + d := NewDecoder(strings.NewReader(tt.xml)) + d.DefaultSpace = tt.ns + err = d.Decode(&dst) + } else { + err = Unmarshal([]byte(tt.xml), &dst) + } + if err != nil { + t.Errorf("#%d: Unmarshal: %v", i, err) + continue + } + want := tt.tab + if dst != want { + t.Errorf("#%d: dst=%+v, want %+v", i, dst, want) + } + } +} + +func TestMarshalNSAttr(t *testing.T) { + src := TableAttrs{TAttr{"hello", "world", "en_US", "other1", "other2", "other3", "other4"}} + data, err := Marshal(&src) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + want := `<TableAttrs><TAttr xmlns:html4="http://www.w3.org/TR/html4/" html4:table="hello" xmlns:furniture="http://www.w3schools.com/furniture" furniture:table="world" xml:lang="en_US" xmlns:_xml="http://golang.org/xml/" _xml:other="other1" xmlns:_xmlfoo="http://golang.org/xmlfoo/" _xmlfoo:other="other2" xmlns:json="http://golang.org/json/" json:other="other3" xmlns:json_1="http://golang.org/2/json/" json_1:other="other4"></TAttr></TableAttrs>` + str := string(data) + if str != want { + t.Errorf("Marshal:\nhave: %#q\nwant: %#q\n", str, want) + } + + var dst TableAttrs + if err := Unmarshal(data, &dst); err != nil { + t.Errorf("Unmarshal: %v", err) + } + + if dst != src { + t.Errorf("Unmarshal = %q, want %q", dst, src) + } +} diff --git a/src/pkg/encoding/xml/typeinfo.go b/src/pkg/encoding/xml/typeinfo.go index bbeb28d87..83e65402c 100644 --- a/src/pkg/encoding/xml/typeinfo.go +++ b/src/pkg/encoding/xml/typeinfo.go @@ -70,20 +70,19 @@ func getTypeInfo(typ reflect.Type) (*typeInfo, error) { if t.Kind() == reflect.Ptr { t = t.Elem() } - if t.Kind() != reflect.Struct { - continue - } - inner, err := getTypeInfo(t) - if err != nil { - return nil, err - } - for _, finfo := range inner.fields { - finfo.idx = append([]int{i}, finfo.idx...) - if err := addFieldInfo(typ, tinfo, &finfo); err != nil { + if t.Kind() == reflect.Struct { + inner, err := getTypeInfo(t) + if err != nil { return nil, err } + for _, finfo := range inner.fields { + finfo.idx = append([]int{i}, finfo.idx...) + if err := addFieldInfo(typ, tinfo, &finfo); err != nil { + return nil, err + } + } + continue } - continue } finfo, err := structFieldInfo(typ, &f) @@ -193,16 +192,19 @@ func structFieldInfo(typ reflect.Type, f *reflect.StructField) (*fieldInfo, erro } // Prepare field name and parents. - tokens = strings.Split(tag, ">") - if tokens[0] == "" { - tokens[0] = f.Name + parents := strings.Split(tag, ">") + if parents[0] == "" { + parents[0] = f.Name } - if tokens[len(tokens)-1] == "" { + if parents[len(parents)-1] == "" { return nil, fmt.Errorf("xml: trailing '>' in field %s of type %s", f.Name, typ) } - finfo.name = tokens[len(tokens)-1] - if len(tokens) > 1 { - finfo.parents = tokens[:len(tokens)-1] + finfo.name = parents[len(parents)-1] + if len(parents) > 1 { + if (finfo.flags & fElement) == 0 { + return nil, fmt.Errorf("xml: %s chain not valid with %s flag", tag, strings.Join(tokens[1:], ",")) + } + finfo.parents = parents[:len(parents)-1] } // If the field type has an XMLName field, the names must match @@ -268,6 +270,9 @@ Loop: if oldf.flags&fMode != newf.flags&fMode { continue } + if oldf.xmlns != "" && newf.xmlns != "" && oldf.xmlns != newf.xmlns { + continue + } minl := min(len(newf.parents), len(oldf.parents)) for p := 0; p < minl; p++ { if oldf.parents[p] != newf.parents[p] { diff --git a/src/pkg/encoding/xml/xml.go b/src/pkg/encoding/xml/xml.go index 143fec554..021f7e47d 100644 --- a/src/pkg/encoding/xml/xml.go +++ b/src/pkg/encoding/xml/xml.go @@ -169,6 +169,11 @@ type Decoder struct { // the CharsetReader's result values must be non-nil. CharsetReader func(charset string, input io.Reader) (io.Reader, error) + // DefaultSpace sets the default name space used for unadorned tags, + // as if the entire XML stream were wrapped in an element containing + // the attribute xmlns="DefaultSpace". + DefaultSpace string + r io.ByteReader buf bytes.Buffer saved *bytes.Buffer @@ -268,6 +273,8 @@ func (d *Decoder) Token() (t Token, err error) { return } +const xmlURL = "http://www.w3.org/XML/1998/namespace" + // Apply name space translation to name n. // The default name space (for Space=="") // applies only to element names, not to attribute names. @@ -277,11 +284,15 @@ func (d *Decoder) translate(n *Name, isElementName bool) { return case n.Space == "" && !isElementName: return + case n.Space == "xml": + n.Space = xmlURL case n.Space == "" && n.Local == "xmlns": return } if v, ok := d.ns[n.Space]; ok { n.Space = v + } else if n.Space == "" { + n.Space = d.DefaultSpace } } @@ -956,7 +967,7 @@ Input: b0, b1 = 0, 0 continue Input } - ent := string(d.buf.Bytes()[before]) + ent := string(d.buf.Bytes()[before:]) if ent[len(ent)-1] != ';' { ent += " (no semicolon)" } @@ -1718,6 +1729,7 @@ var ( esc_tab = []byte("	") esc_nl = []byte("
") esc_cr = []byte("
") + esc_fffd = []byte("\uFFFD") // Unicode replacement character ) // EscapeText writes to w the properly escaped XML equivalent @@ -1725,8 +1737,10 @@ var ( func EscapeText(w io.Writer, s []byte) error { var esc []byte last := 0 - for i, c := range s { - switch c { + for i := 0; i < len(s); { + r, width := utf8.DecodeRune(s[i:]) + i += width + switch r { case '"': esc = esc_quot case '\'': @@ -1744,15 +1758,19 @@ func EscapeText(w io.Writer, s []byte) error { case '\r': esc = esc_cr default: + if !isInCharacterRange(r) { + esc = esc_fffd + break + } continue } - if _, err := w.Write(s[last:i]); err != nil { + if _, err := w.Write(s[last : i-width]); err != nil { return err } if _, err := w.Write(esc); err != nil { return err } - last = i + 1 + last = i } if _, err := w.Write(s[last:]); err != nil { return err diff --git a/src/pkg/encoding/xml/xml_test.go b/src/pkg/encoding/xml/xml_test.go index 54dab5484..eeedbe575 100644 --- a/src/pkg/encoding/xml/xml_test.go +++ b/src/pkg/encoding/xml/xml_test.go @@ -5,6 +5,7 @@ package xml import ( + "bytes" "fmt" "io" "reflect" @@ -595,13 +596,6 @@ func TestEntityInsideCDATA(t *testing.T) { } } -// The last three tests (respectively one for characters in attribute -// names and two for character entities) pass not because of code -// changed for issue 1259, but instead pass with the given messages -// from other parts of xml.Decoder. I provide these to note the -// current behavior of situations where one might think that character -// range checking would detect the error, but it does not in fact. - var characterTests = []struct { in string err string @@ -611,8 +605,10 @@ var characterTests = []struct { {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"}, {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"}, {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"}, + {"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"}, {"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"}, - {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity & (no semicolon)"}, + {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"}, + {"<doc>&hello;</doc>", "invalid character entity &hello;"}, } func TestDisallowedCharacters(t *testing.T) { @@ -629,7 +625,7 @@ func TestDisallowedCharacters(t *testing.T) { t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err) } if synerr.Msg != tt.err { - t.Fatalf("input %d synerr.Msg wrong: want '%s', got '%s'", i, tt.err, synerr.Msg) + t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg) } } } @@ -700,6 +696,21 @@ func TestEscapeTextIOErrors(t *testing.T) { err := EscapeText(errWriter{}, []byte{'A'}) if err == nil || err.Error() != expectErr { - t.Errorf("EscapeTest = [error] %v, want %v", err, expectErr) + t.Errorf("have %v, want %v", err, expectErr) + } +} + +func TestEscapeTextInvalidChar(t *testing.T) { + input := []byte("A \x00 terminated string.") + expected := "A \uFFFD terminated string." + + buff := new(bytes.Buffer) + if err := EscapeText(buff, input); err != nil { + t.Fatalf("have %v, want nil", err) + } + text := buff.String() + + if text != expected { + t.Errorf("have %v, want %v", text, expected) } } |