diff options
Diffstat (limited to 'src/pkg/encoding/xml')
-rw-r--r-- | src/pkg/encoding/xml/example_test.go | 40 | ||||
-rw-r--r-- | src/pkg/encoding/xml/marshal.go | 102 | ||||
-rw-r--r-- | src/pkg/encoding/xml/marshal_test.go | 208 | ||||
-rw-r--r-- | src/pkg/encoding/xml/read.go | 74 | ||||
-rw-r--r-- | src/pkg/encoding/xml/read_test.go | 44 | ||||
-rw-r--r-- | src/pkg/encoding/xml/typeinfo.go | 30 | ||||
-rw-r--r-- | src/pkg/encoding/xml/xml.go | 290 | ||||
-rw-r--r-- | src/pkg/encoding/xml/xml_test.go | 133 |
8 files changed, 739 insertions, 182 deletions
diff --git a/src/pkg/encoding/xml/example_test.go b/src/pkg/encoding/xml/example_test.go index 97c8c0b0d..becedd583 100644 --- a/src/pkg/encoding/xml/example_test.go +++ b/src/pkg/encoding/xml/example_test.go @@ -50,6 +50,46 @@ func ExampleMarshalIndent() { // </person> } +func ExampleEncoder() { + type Address struct { + City, State string + } + type Person struct { + XMLName xml.Name `xml:"person"` + Id int `xml:"id,attr"` + FirstName string `xml:"name>first"` + LastName string `xml:"name>last"` + Age int `xml:"age"` + Height float32 `xml:"height,omitempty"` + Married bool + Address + Comment string `xml:",comment"` + } + + v := &Person{Id: 13, FirstName: "John", LastName: "Doe", Age: 42} + v.Comment = " Need more details. " + v.Address = Address{"Hanga Roa", "Easter Island"} + + enc := xml.NewEncoder(os.Stdout) + enc.Indent(" ", " ") + if err := enc.Encode(v); err != nil { + fmt.Printf("error: %v\n", err) + } + + // Output: + // <person id="13"> + // <name> + // <first>John</first> + // <last>Doe</last> + // </name> + // <age>42</age> + // <Married>false</Married> + // <City>Hanga Roa</City> + // <State>Easter Island</State> + // <!-- Need more details. --> + // </person> +} + // This example demonstrates unmarshaling an XML excerpt into a value with // some preset fields. Note that the Phone field isn't modified and that // the XML <Company> element is ignored. Also, the Groups field is assigned diff --git a/src/pkg/encoding/xml/marshal.go b/src/pkg/encoding/xml/marshal.go index 6c3170bdd..ea58ce254 100644 --- a/src/pkg/encoding/xml/marshal.go +++ b/src/pkg/encoding/xml/marshal.go @@ -45,7 +45,7 @@ const ( // - a field with tag "name,attr" becomes an attribute with // the given name in the XML element. // - a field with tag ",attr" becomes an attribute with the -// field name in the in the XML element. +// field name in the XML element. // - a field with tag ",chardata" is written as character data, // not as an XML element. // - a field with tag ",innerxml" is written verbatim, not subject @@ -57,8 +57,8 @@ const ( // if the field value is empty. The empty values are false, 0, any // nil pointer or interface value, and any array, slice, map, or // string of length zero. -// - a non-pointer anonymous struct field is handled as if the -// fields of its value were part of the outer struct. +// - an anonymous struct field is handled as if the fields of its +// value were part of the outer struct. // // If a field uses a tag "a>b>c", then the element c will be nested inside // parent elements a and b. Fields that appear next to each other that name @@ -81,11 +81,8 @@ func Marshal(v interface{}) ([]byte, error) { func MarshalIndent(v interface{}, prefix, indent string) ([]byte, error) { var b bytes.Buffer enc := NewEncoder(&b) - enc.prefix = prefix - enc.indent = indent - err := enc.marshalValue(reflect.ValueOf(v), nil) - enc.Flush() - if err != nil { + enc.Indent(prefix, indent) + if err := enc.Encode(v); err != nil { return nil, err } return b.Bytes(), nil @@ -101,14 +98,24 @@ func NewEncoder(w io.Writer) *Encoder { return &Encoder{printer{Writer: bufio.NewWriter(w)}} } +// Indent sets the encoder to generate XML in which each element +// begins on a new indented line that starts with prefix and is followed by +// one or more copies of indent according to the nesting depth. +func (enc *Encoder) Indent(prefix, indent string) { + enc.prefix = prefix + enc.indent = indent +} + // Encode writes the XML encoding of v to the stream. // // See the documentation for Marshal for details about the conversion // of Go values to XML. func (enc *Encoder) Encode(v interface{}) error { err := enc.marshalValue(reflect.ValueOf(v), nil) - enc.Flush() - return err + if err != nil { + return err + } + return enc.Flush() } type printer struct { @@ -117,6 +124,7 @@ type printer struct { prefix string depth int indentedIn bool + putNewline bool } // marshalValue writes one or more XML elements representing val. @@ -164,7 +172,7 @@ func (p *printer) marshalValue(val reflect.Value, finfo *fieldInfo) error { xmlname := tinfo.xmlname if xmlname.name != "" { xmlns, name = xmlname.xmlns, xmlname.name - } else if v, ok := val.FieldByIndex(xmlname.idx).Interface().(Name); ok && v.Local != "" { + } else if v, ok := xmlname.value(val).Interface().(Name); ok && v.Local != "" { xmlns, name = v.Space, v.Local } } @@ -185,7 +193,9 @@ func (p *printer) marshalValue(val reflect.Value, finfo *fieldInfo) error { if xmlns != "" { p.WriteString(` xmlns="`) // TODO: EscapeString, to avoid the allocation. - Escape(p, []byte(xmlns)) + if err := EscapeText(p, []byte(xmlns)); err != nil { + return err + } p.WriteByte('"') } @@ -195,7 +205,7 @@ func (p *printer) marshalValue(val reflect.Value, finfo *fieldInfo) error { if finfo.flags&fAttr == 0 { continue } - fv := val.FieldByIndex(finfo.idx) + fv := finfo.value(val) if finfo.flags&fOmitEmpty != 0 && isEmptyValue(fv) { continue } @@ -224,7 +234,7 @@ func (p *printer) marshalValue(val reflect.Value, finfo *fieldInfo) error { p.WriteString(name) p.WriteByte('>') - return nil + return p.cachedWriteError() } var timeType = reflect.TypeOf(time.Time{}) @@ -241,50 +251,70 @@ func (p *printer) marshalSimple(typ reflect.Type, val reflect.Value) error { case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: p.WriteString(strconv.FormatUint(val.Uint(), 10)) case reflect.Float32, reflect.Float64: - p.WriteString(strconv.FormatFloat(val.Float(), 'g', -1, 64)) + p.WriteString(strconv.FormatFloat(val.Float(), 'g', -1, val.Type().Bits())) case reflect.String: // TODO: Add EscapeString. - Escape(p, []byte(val.String())) + EscapeText(p, []byte(val.String())) case reflect.Bool: p.WriteString(strconv.FormatBool(val.Bool())) case reflect.Array: // will be [...]byte - bytes := make([]byte, val.Len()) - for i := range bytes { - bytes[i] = val.Index(i).Interface().(byte) + var bytes []byte + if val.CanAddr() { + bytes = val.Slice(0, val.Len()).Bytes() + } else { + bytes = make([]byte, val.Len()) + reflect.Copy(reflect.ValueOf(bytes), val) } - Escape(p, bytes) + EscapeText(p, bytes) case reflect.Slice: // will be []byte - Escape(p, val.Bytes()) + EscapeText(p, val.Bytes()) default: return &UnsupportedTypeError{typ} } - return nil + return p.cachedWriteError() } var ddBytes = []byte("--") func (p *printer) marshalStruct(tinfo *typeInfo, val reflect.Value) error { if val.Type() == timeType { - p.WriteString(val.Interface().(time.Time).Format(time.RFC3339Nano)) - return nil + _, err := p.WriteString(val.Interface().(time.Time).Format(time.RFC3339Nano)) + return err } s := parentStack{printer: p} for i := range tinfo.fields { finfo := &tinfo.fields[i] - if finfo.flags&(fAttr|fAny) != 0 { + if finfo.flags&(fAttr) != 0 { continue } - vf := val.FieldByIndex(finfo.idx) + vf := finfo.value(val) switch finfo.flags & fMode { case fCharData: + var scratch [64]byte switch vf.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + Escape(p, strconv.AppendInt(scratch[:0], vf.Int(), 10)) + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + Escape(p, strconv.AppendUint(scratch[:0], vf.Uint(), 10)) + case reflect.Float32, reflect.Float64: + Escape(p, strconv.AppendFloat(scratch[:0], vf.Float(), 'g', -1, vf.Type().Bits())) + case reflect.Bool: + Escape(p, strconv.AppendBool(scratch[:0], vf.Bool())) case reflect.String: - Escape(p, []byte(vf.String())) + if err := EscapeText(p, []byte(vf.String())); err != nil { + return err + } case reflect.Slice: if elem, ok := vf.Interface().([]byte); ok { - Escape(p, elem) + if err := EscapeText(p, elem); err != nil { + return err + } + } + case reflect.Struct: + if vf.Type() == timeType { + Escape(p, []byte(vf.Interface().(time.Time).Format(time.RFC3339Nano))) } } continue @@ -340,7 +370,7 @@ func (p *printer) marshalStruct(tinfo *typeInfo, val reflect.Value) error { continue } - case fElement: + case fElement, fElement | fAny: s.trim(finfo.parents) if len(finfo.parents) > len(s.stack) { if vf.Kind() != reflect.Ptr && vf.Kind() != reflect.Interface || !vf.IsNil() { @@ -353,7 +383,13 @@ func (p *printer) marshalStruct(tinfo *typeInfo, val reflect.Value) error { } } s.trim(nil) - return nil + return p.cachedWriteError() +} + +// return the bufio Writer's cached write error +func (p *printer) cachedWriteError() error { + _, err := p.Write(nil) + return err } func (p *printer) writeIndent(depthDelta int) { @@ -368,7 +404,11 @@ func (p *printer) writeIndent(depthDelta int) { } p.indentedIn = false } - p.WriteByte('\n') + if p.putNewline { + p.WriteByte('\n') + } else { + p.putNewline = true + } if len(p.prefix) > 0 { p.WriteString(p.prefix) } diff --git a/src/pkg/encoding/xml/marshal_test.go b/src/pkg/encoding/xml/marshal_test.go index b6978a1e6..3a190def6 100644 --- a/src/pkg/encoding/xml/marshal_test.go +++ b/src/pkg/encoding/xml/marshal_test.go @@ -5,6 +5,10 @@ package xml import ( + "bytes" + "errors" + "fmt" + "io" "reflect" "strconv" "strings" @@ -56,6 +60,36 @@ type Book struct { Title string `xml:",chardata"` } +type Event struct { + XMLName struct{} `xml:"event"` + Year int `xml:",chardata"` +} + +type Movie struct { + XMLName struct{} `xml:"movie"` + Length uint `xml:",chardata"` +} + +type Pi struct { + XMLName struct{} `xml:"pi"` + Approximation float32 `xml:",chardata"` +} + +type Universe struct { + XMLName struct{} `xml:"universe"` + Visible float64 `xml:",chardata"` +} + +type Particle struct { + XMLName struct{} `xml:"particle"` + HasMass bool `xml:",chardata"` +} + +type Departure struct { + XMLName struct{} `xml:"departure"` + When time.Time `xml:",chardata"` +} + type SecretAgent struct { XMLName struct{} `xml:"agent"` Handle string `xml:"handle,attr"` @@ -108,7 +142,7 @@ type EmbedA struct { type EmbedB struct { FieldB string - EmbedC + *EmbedC } type EmbedC struct { @@ -185,6 +219,18 @@ type AnyTest struct { AnyField AnyHolder `xml:",any"` } +type AnyOmitTest struct { + XMLName struct{} `xml:"a"` + Nested string `xml:"nested>value"` + AnyField *AnyHolder `xml:",any,omitempty"` +} + +type AnySliceTest struct { + XMLName struct{} `xml:"a"` + Nested string `xml:"nested>value"` + AnyField []AnyHolder `xml:",any"` +} + type AnyHolder struct { XMLName Name XML string `xml:",innerxml"` @@ -330,6 +376,12 @@ var marshalTests = []struct { {Value: &Domain{Name: []byte("google.com&friends")}, ExpectXML: `<domain>google.com&friends</domain>`}, {Value: &Domain{Name: []byte("google.com"), Comment: []byte(" &friends ")}, ExpectXML: `<domain>google.com<!-- &friends --></domain>`}, {Value: &Book{Title: "Pride & Prejudice"}, ExpectXML: `<book>Pride & Prejudice</book>`}, + {Value: &Event{Year: -3114}, ExpectXML: `<event>-3114</event>`}, + {Value: &Movie{Length: 13440}, ExpectXML: `<movie>13440</movie>`}, + {Value: &Pi{Approximation: 3.14159265}, ExpectXML: `<pi>3.1415927</pi>`}, + {Value: &Universe{Visible: 9.3e13}, ExpectXML: `<universe>9.3e+13</universe>`}, + {Value: &Particle{HasMass: true}, ExpectXML: `<particle>true</particle>`}, + {Value: &Departure{When: ParseTime("2013-01-09T00:15:00-09:00")}, ExpectXML: `<departure>2013-01-09T00:15:00-09:00</departure>`}, {Value: atomValue, ExpectXML: atomXml}, { Value: &Ship{ @@ -493,7 +545,7 @@ var marshalTests = []struct { }, EmbedB: EmbedB{ FieldB: "A.B.B", - EmbedC: EmbedC{ + EmbedC: &EmbedC{ FieldA1: "A.B.C.A1", FieldA2: "A.B.C.A2", FieldB: "", // Shadowed by A.B.B @@ -649,12 +701,43 @@ var marshalTests = []struct { XML: "<sub>unknown</sub>", }, }, - UnmarshalOnly: true, }, { - Value: &AnyTest{Nested: "known", AnyField: AnyHolder{XML: "<unknown/>"}}, - ExpectXML: `<a><nested><value>known</value></nested></a>`, - MarshalOnly: true, + Value: &AnyTest{Nested: "known", + AnyField: AnyHolder{ + XML: "<unknown/>", + XMLName: Name{Local: "AnyField"}, + }, + }, + ExpectXML: `<a><nested><value>known</value></nested><AnyField><unknown/></AnyField></a>`, + }, + { + ExpectXML: `<a><nested><value>b</value></nested></a>`, + Value: &AnyOmitTest{ + Nested: "b", + }, + }, + { + ExpectXML: `<a><nested><value>b</value></nested><c><d>e</d></c><g xmlns="f"><h>i</h></g></a>`, + Value: &AnySliceTest{ + Nested: "b", + AnyField: []AnyHolder{ + { + XMLName: Name{Local: "c"}, + XML: "<d>e</d>", + }, + { + XMLName: Name{Space: "f", Local: "g"}, + XML: "<h>i</h>", + }, + }, + }, + }, + { + ExpectXML: `<a><nested><value>b</value></nested></a>`, + Value: &AnySliceTest{ + Nested: "b", + }, }, // Test recursive types. @@ -684,6 +767,29 @@ var marshalTests = []struct { Value: &IgnoreTest{}, UnmarshalOnly: true, }, + + // Test escaping. + { + ExpectXML: `<a><nested><value>dquote: "; squote: '; ampersand: &; less: <; greater: >;</value></nested><empty></empty></a>`, + Value: &AnyTest{ + Nested: `dquote: "; squote: '; ampersand: &; less: <; greater: >;`, + AnyField: AnyHolder{XMLName: Name{Local: "empty"}}, + }, + }, + { + ExpectXML: `<a><nested><value>newline: 
; cr: 
; tab: 	;</value></nested><AnyField></AnyField></a>`, + Value: &AnyTest{ + Nested: "newline: \n; cr: \r; tab: \t;", + AnyField: AnyHolder{XMLName: Name{Local: "AnyField"}}, + }, + }, + { + ExpectXML: "<a><nested><value>1\r2\r\n3\n\r4\n5</value></nested></a>", + Value: &AnyTest{ + Nested: "1\n2\n3\n\n4\n5", + }, + UnmarshalOnly: true, + }, } func TestMarshal(t *testing.T) { @@ -735,6 +841,24 @@ var marshalErrorTests = []struct { }, } +var marshalIndentTests = []struct { + Value interface{} + Prefix string + Indent string + ExpectXML string +}{ + { + Value: &SecretAgent{ + Handle: "007", + Identity: "James Bond", + Obfuscate: "<redacted/>", + }, + Prefix: "", + Indent: "\t", + ExpectXML: fmt.Sprintf("<agent handle=\"007\">\n\t<Identity>James Bond</Identity><redacted/>\n</agent>"), + }, +} + func TestMarshalErrors(t *testing.T) { for idx, test := range marshalErrorTests { _, err := Marshal(test.Value) @@ -779,6 +903,78 @@ func TestUnmarshal(t *testing.T) { } } +func TestMarshalIndent(t *testing.T) { + for i, test := range marshalIndentTests { + data, err := MarshalIndent(test.Value, test.Prefix, test.Indent) + if err != nil { + t.Errorf("#%d: Error: %s", i, err) + continue + } + if got, want := string(data), test.ExpectXML; got != want { + t.Errorf("#%d: MarshalIndent:\nGot:%s\nWant:\n%s", i, got, want) + } + } +} + +type limitedBytesWriter struct { + w io.Writer + remain int // until writes fail +} + +func (lw *limitedBytesWriter) Write(p []byte) (n int, err error) { + if lw.remain <= 0 { + println("error") + return 0, errors.New("write limit hit") + } + if len(p) > lw.remain { + p = p[:lw.remain] + n, _ = lw.w.Write(p) + lw.remain = 0 + return n, errors.New("write limit hit") + } + n, err = lw.w.Write(p) + lw.remain -= n + return n, err +} + +func TestMarshalWriteErrors(t *testing.T) { + var buf bytes.Buffer + const writeCap = 1024 + w := &limitedBytesWriter{&buf, writeCap} + enc := NewEncoder(w) + var err error + var i int + const n = 4000 + for i = 1; i <= n; i++ { + err = enc.Encode(&Passenger{ + Name: []string{"Alice", "Bob"}, + Weight: 5, + }) + if err != nil { + break + } + } + if err == nil { + t.Error("expected an error") + } + if i == n { + t.Errorf("expected to fail before the end") + } + if buf.Len() != writeCap { + t.Errorf("buf.Len() = %d; want %d", buf.Len(), writeCap) + } +} + +func TestMarshalWriteIOErrors(t *testing.T) { + enc := NewEncoder(errWriter{}) + + expectErr := "unwritable" + err := enc.Encode(&Passenger{}) + if err == nil || err.Error() != expectErr { + t.Errorf("EscapeTest = [error] %v, want %v", err, expectErr) + } +} + func BenchmarkMarshal(b *testing.B) { for i := 0; i < b.N; i++ { Marshal(atomValue) diff --git a/src/pkg/encoding/xml/read.go b/src/pkg/encoding/xml/read.go index c21682420..344ab514e 100644 --- a/src/pkg/encoding/xml/read.go +++ b/src/pkg/encoding/xml/read.go @@ -81,8 +81,8 @@ import ( // of the above rules and the struct has a field with tag ",any", // unmarshal maps the sub-element to that struct field. // -// * A non-pointer anonymous struct field is handled as if the -// fields of its value were part of the outer struct. +// * An anonymous struct field is handled as if the fields of its +// value were part of the outer struct. // // * A struct field with tag "-" is never unmarshalled into. // @@ -248,7 +248,7 @@ func (p *Decoder) unmarshal(val reflect.Value, start *StartElement) error { } return UnmarshalError(e) } - fv := sv.FieldByIndex(finfo.idx) + fv := finfo.value(sv) if _, ok := fv.Interface().(Name); ok { fv.Set(reflect.ValueOf(start.Name)) } @@ -260,7 +260,7 @@ func (p *Decoder) unmarshal(val reflect.Value, start *StartElement) error { finfo := &tinfo.fields[i] switch finfo.flags & fMode { case fAttr: - strv := sv.FieldByIndex(finfo.idx) + strv := finfo.value(sv) // Look for attribute. for _, a := range start.Attr { if a.Name.Local == finfo.name { @@ -271,22 +271,22 @@ func (p *Decoder) unmarshal(val reflect.Value, start *StartElement) error { case fCharData: if !saveData.IsValid() { - saveData = sv.FieldByIndex(finfo.idx) + saveData = finfo.value(sv) } case fComment: if !saveComment.IsValid() { - saveComment = sv.FieldByIndex(finfo.idx) + saveComment = finfo.value(sv) } - case fAny: + case fAny, fAny | fElement: if !saveAny.IsValid() { - saveAny = sv.FieldByIndex(finfo.idx) + saveAny = finfo.value(sv) } case fInnerXml: if !saveXML.IsValid() { - saveXML = sv.FieldByIndex(finfo.idx) + saveXML = finfo.value(sv) if p.saved == nil { saveXMLIndex = 0 p.saved = new(bytes.Buffer) @@ -374,68 +374,58 @@ Loop: } func copyValue(dst reflect.Value, src []byte) (err error) { - // Helper functions for integer and unsigned integer conversions - var itmp int64 - getInt64 := func() bool { - itmp, err = strconv.ParseInt(string(src), 10, 64) - // TODO: should check sizes - return err == nil - } - var utmp uint64 - getUint64 := func() bool { - utmp, err = strconv.ParseUint(string(src), 10, 64) - // TODO: check for overflow? - return err == nil - } - var ftmp float64 - getFloat64 := func() bool { - ftmp, err = strconv.ParseFloat(string(src), 64) - // TODO: check for overflow? - return err == nil + if dst.Kind() == reflect.Ptr { + if dst.IsNil() { + dst.Set(reflect.New(dst.Type().Elem())) + } + dst = dst.Elem() } // Save accumulated data. - switch t := dst; t.Kind() { + switch dst.Kind() { case reflect.Invalid: - // Probably a comment. + // Probably a commendst. default: - return errors.New("cannot happen: unknown type " + t.Type().String()) + return errors.New("cannot happen: unknown type " + dst.Type().String()) case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - if !getInt64() { + itmp, err := strconv.ParseInt(string(src), 10, dst.Type().Bits()) + if err != nil { return err } - t.SetInt(itmp) + dst.SetInt(itmp) case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - if !getUint64() { + utmp, err := strconv.ParseUint(string(src), 10, dst.Type().Bits()) + if err != nil { return err } - t.SetUint(utmp) + dst.SetUint(utmp) case reflect.Float32, reflect.Float64: - if !getFloat64() { + ftmp, err := strconv.ParseFloat(string(src), dst.Type().Bits()) + if err != nil { return err } - t.SetFloat(ftmp) + dst.SetFloat(ftmp) case reflect.Bool: value, err := strconv.ParseBool(strings.TrimSpace(string(src))) if err != nil { return err } - t.SetBool(value) + dst.SetBool(value) case reflect.String: - t.SetString(string(src)) + dst.SetString(string(src)) case reflect.Slice: if len(src) == 0 { // non-nil to flag presence src = []byte{} } - t.SetBytes(src) + dst.SetBytes(src) case reflect.Struct: - if t.Type() == timeType { + if dst.Type() == timeType { tv, err := time.Parse(time.RFC3339, string(src)) if err != nil { return err } - t.Set(reflect.ValueOf(tv)) + dst.Set(reflect.ValueOf(tv)) } } return nil @@ -461,7 +451,7 @@ Loop: } if len(finfo.parents) == len(parents) && finfo.name == start.Name.Local { // It's a perfect match, unmarshal the field. - return true, p.unmarshal(sv.FieldByIndex(finfo.idx), start) + return true, p.unmarshal(finfo.value(sv), start) } if len(finfo.parents) > len(parents) && finfo.parents[len(parents)] == start.Name.Local { // It's a prefix for the field. Break and recurse diff --git a/src/pkg/encoding/xml/read_test.go b/src/pkg/encoding/xml/read_test.go index 8df09b3cc..b45e2f0e6 100644 --- a/src/pkg/encoding/xml/read_test.go +++ b/src/pkg/encoding/xml/read_test.go @@ -355,3 +355,47 @@ func TestUnmarshalWithoutNameType(t *testing.T) { t.Fatalf("have %v\nwant %v", x.Attr, OK) } } + +func TestUnmarshalAttr(t *testing.T) { + type ParamVal struct { + Int int `xml:"int,attr"` + } + + type ParamPtr struct { + Int *int `xml:"int,attr"` + } + + type ParamStringPtr struct { + Int *string `xml:"int,attr"` + } + + x := []byte(`<Param int="1" />`) + + p1 := &ParamPtr{} + if err := Unmarshal(x, p1); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if p1.Int == nil { + t.Fatalf("Unmarshal failed in to *int field") + } else if *p1.Int != 1 { + t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p1.Int, 1) + } + + p2 := &ParamVal{} + if err := Unmarshal(x, p2); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if p2.Int != 1 { + t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p2.Int, 1) + } + + p3 := &ParamStringPtr{} + if err := Unmarshal(x, p3); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if p3.Int == nil { + t.Fatalf("Unmarshal failed in to *string field") + } else if *p3.Int != "1" { + t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p3.Int, 1) + } +} diff --git a/src/pkg/encoding/xml/typeinfo.go b/src/pkg/encoding/xml/typeinfo.go index 8e2e4508b..bbeb28d87 100644 --- a/src/pkg/encoding/xml/typeinfo.go +++ b/src/pkg/encoding/xml/typeinfo.go @@ -66,10 +66,14 @@ func getTypeInfo(typ reflect.Type) (*typeInfo, error) { // For embedded structs, embed its fields. if f.Anonymous { - if f.Type.Kind() != reflect.Struct { + t := f.Type + if t.Kind() == reflect.Ptr { + t = t.Elem() + } + if t.Kind() != reflect.Struct { continue } - inner, err := getTypeInfo(f.Type) + inner, err := getTypeInfo(t) if err != nil { return nil, err } @@ -150,6 +154,9 @@ func structFieldInfo(typ reflect.Type, f *reflect.StructField) (*fieldInfo, erro // This will also catch multiple modes in a single field. valid = false } + if finfo.flags&fMode == fAny { + finfo.flags |= fElement + } if finfo.flags&fOmitEmpty != 0 && finfo.flags&(fElement|fAttr) == 0 { valid = false } @@ -327,3 +334,22 @@ type TagPathError struct { func (e *TagPathError) Error() string { return fmt.Sprintf("%s field %q with tag %q conflicts with field %q with tag %q", e.Struct, e.Field1, e.Tag1, e.Field2, e.Tag2) } + +// value returns v's field value corresponding to finfo. +// It's equivalent to v.FieldByIndex(finfo.idx), but initializes +// and dereferences pointers as necessary. +func (finfo *fieldInfo) value(v reflect.Value) reflect.Value { + for i, x := range finfo.idx { + if i > 0 { + t := v.Type() + if t.Kind() == reflect.Ptr && t.Elem().Kind() == reflect.Struct { + if v.IsNil() { + v.Set(reflect.New(v.Type().Elem())) + } + v = v.Elem() + } + } + v = v.Field(x) + } + return v +} diff --git a/src/pkg/encoding/xml/xml.go b/src/pkg/encoding/xml/xml.go index 5066f5c01..143fec554 100644 --- a/src/pkg/encoding/xml/xml.go +++ b/src/pkg/encoding/xml/xml.go @@ -181,7 +181,6 @@ type Decoder struct { ns map[string]string err error line int - tmp [32]byte } // NewDecoder creates a new XML parser reading from r. @@ -584,6 +583,7 @@ func (d *Decoder) RawToken() (Token, error) { if inquote == 0 && b == '>' && depth == 0 { break } + HandleB: d.buf.WriteByte(b) switch { case b == inquote: @@ -599,7 +599,35 @@ func (d *Decoder) RawToken() (Token, error) { depth-- case b == '<' && inquote == 0: - depth++ + // Look for <!-- to begin comment. + s := "!--" + for i := 0; i < len(s); i++ { + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + if b != s[i] { + for j := 0; j < i; j++ { + d.buf.WriteByte(s[j]) + } + depth++ + goto HandleB + } + } + + // Remove < that was written above. + d.buf.Truncate(d.buf.Len() - 1) + + // Look for terminator. + var b0, b1 byte + for { + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + if b0 == '-' && b1 == '-' && b == '>' { + break + } + b0, b1 = b1, b + } } } return Directive(d.buf.Bytes()), nil @@ -848,78 +876,103 @@ Input: // XML in all its glory allows a document to define and use // its own character names with <!ENTITY ...> directives. // Parsers are required to recognize lt, gt, amp, apos, and quot - // even if they have not been declared. That's all we allow. - var i int - for i = 0; i < len(d.tmp); i++ { - var ok bool - d.tmp[i], ok = d.getc() - if !ok { - if d.err == io.EOF { - d.err = d.syntaxError("unexpected EOF") - } + // even if they have not been declared. + before := d.buf.Len() + d.buf.WriteByte('&') + var ok bool + var text string + var haveText bool + if b, ok = d.mustgetc(); !ok { + return nil + } + if b == '#' { + d.buf.WriteByte(b) + if b, ok = d.mustgetc(); !ok { return nil } - c := d.tmp[i] - if c == ';' { - break - } - if 'a' <= c && c <= 'z' || - 'A' <= c && c <= 'Z' || - '0' <= c && c <= '9' || - c == '_' || c == '#' { - continue + base := 10 + if b == 'x' { + base = 16 + d.buf.WriteByte(b) + if b, ok = d.mustgetc(); !ok { + return nil + } } - d.ungetc(c) - break - } - s := string(d.tmp[0:i]) - if i >= len(d.tmp) { - if !d.Strict { - b0, b1 = 0, 0 - d.buf.WriteByte('&') - d.buf.Write(d.tmp[0:i]) - continue Input + start := d.buf.Len() + for '0' <= b && b <= '9' || + base == 16 && 'a' <= b && b <= 'f' || + base == 16 && 'A' <= b && b <= 'F' { + d.buf.WriteByte(b) + if b, ok = d.mustgetc(); !ok { + return nil + } } - d.err = d.syntaxError("character entity expression &" + s + "... too long") - return nil - } - var haveText bool - var text string - if i >= 2 && s[0] == '#' { - var n uint64 - var err error - if i >= 3 && s[1] == 'x' { - n, err = strconv.ParseUint(s[2:], 16, 64) + if b != ';' { + d.ungetc(b) } else { - n, err = strconv.ParseUint(s[1:], 10, 64) - } - if err == nil && n <= unicode.MaxRune { - text = string(n) - haveText = true + s := string(d.buf.Bytes()[start:]) + d.buf.WriteByte(';') + n, err := strconv.ParseUint(s, base, 64) + if err == nil && n <= unicode.MaxRune { + text = string(n) + haveText = true + } } } else { - if r, ok := entity[s]; ok { - text = string(r) - haveText = true - } else if d.Entity != nil { - text, haveText = d.Entity[s] + d.ungetc(b) + if !d.readName() { + if d.err != nil { + return nil + } + ok = false } - } - if !haveText { - if !d.Strict { - b0, b1 = 0, 0 - d.buf.WriteByte('&') - d.buf.Write(d.tmp[0:i]) - continue Input + if b, ok = d.mustgetc(); !ok { + return nil } - d.err = d.syntaxError("invalid character entity &" + s + ";") - return nil + if b != ';' { + d.ungetc(b) + } else { + name := d.buf.Bytes()[before+1:] + d.buf.WriteByte(';') + if isName(name) { + s := string(name) + if r, ok := entity[s]; ok { + text = string(r) + haveText = true + } else if d.Entity != nil { + text, haveText = d.Entity[s] + } + } + } + } + + if haveText { + d.buf.Truncate(before) + d.buf.Write([]byte(text)) + b0, b1 = 0, 0 + continue Input } - d.buf.Write([]byte(text)) - b0, b1 = 0, 0 - continue Input + if !d.Strict { + b0, b1 = 0, 0 + continue Input + } + ent := string(d.buf.Bytes()[before]) + if ent[len(ent)-1] != ';' { + ent += " (no semicolon)" + } + d.err = d.syntaxError("invalid character entity " + ent) + return nil } - d.buf.WriteByte(b) + + // We must rewrite unescaped \r and \r\n into \n. + if b == '\r' { + d.buf.WriteByte('\n') + } else if b1 == '\r' && b == '\n' { + // Skip \r\n--we already wrote \n. + } else { + d.buf.WriteByte(b) + } + b0, b1 = b1, b } data := d.buf.Bytes() @@ -940,20 +993,7 @@ Input: } } - // Must rewrite \r and \r\n into \n. - w := 0 - for r := 0; r < len(data); r++ { - b := data[r] - if b == '\r' { - if r+1 < len(data) && data[r+1] == '\n' { - continue - } - b = '\n' - } - data[w] = b - w++ - } - return data[0:w] + return data } // Decide whether the given rune is in the XML Character Range, per @@ -989,18 +1029,34 @@ func (d *Decoder) nsname() (name Name, ok bool) { // Do not set d.err if the name is missing (unless unexpected EOF is received): // let the caller provide better context. func (d *Decoder) name() (s string, ok bool) { + d.buf.Reset() + if !d.readName() { + return "", false + } + + // Now we check the characters. + s = d.buf.String() + if !isName([]byte(s)) { + d.err = d.syntaxError("invalid XML name: " + s) + return "", false + } + return s, true +} + +// Read a name and append its bytes to d.buf. +// The name is delimited by any single-byte character not valid in names. +// All multi-byte characters are accepted; the caller must check their validity. +func (d *Decoder) readName() (ok bool) { var b byte if b, ok = d.mustgetc(); !ok { return } - - // As a first approximation, we gather the bytes [A-Za-z_:.-\x80-\xFF]* if b < utf8.RuneSelf && !isNameByte(b) { d.ungetc(b) - return "", false + return false } - d.buf.Reset() d.buf.WriteByte(b) + for { if b, ok = d.mustgetc(); !ok { return @@ -1011,16 +1067,7 @@ func (d *Decoder) name() (s string, ok bool) { } d.buf.WriteByte(b) } - - // Then we check the characters. - s = d.buf.String() - for i, c := range s { - if !unicode.Is(first, c) && (i == 0 || !unicode.Is(second, c)) { - d.err = d.syntaxError("invalid XML name: " + s) - return "", false - } - } - return s, true + return true } func isNameByte(c byte) bool { @@ -1030,6 +1077,30 @@ func isNameByte(c byte) bool { c == '_' || c == ':' || c == '.' || c == '-' } +func isName(s []byte) bool { + if len(s) == 0 { + return false + } + c, n := utf8.DecodeRune(s) + if c == utf8.RuneError && n == 1 { + return false + } + if !unicode.Is(first, c) { + return false + } + for n < len(s) { + s = s[n:] + c, n = utf8.DecodeRune(s) + if c == utf8.RuneError && n == 1 { + return false + } + if !unicode.Is(first, c) && !unicode.Is(second, c) { + return false + } + } + return true +} + // These tables were generated by cut and paste from Appendix B of // the XML spec at http://www.xml.com/axml/testaxml.htm // and then reformatting. First corresponds to (Letter | '_' | ':') @@ -1621,7 +1692,7 @@ var HTMLAutoClose = htmlAutoClose var htmlAutoClose = []string{ /* hget http://www.w3.org/TR/html4/loose.dtd | - 9 sed -n 's/<!ELEMENT (.*) - O EMPTY.+/ "\1",/p' | tr A-Z a-z + 9 sed -n 's/<!ELEMENT ([^ ]*) +- O EMPTY.+/ "\1",/p' | tr A-Z a-z */ "basefont", "br", @@ -1631,7 +1702,7 @@ var htmlAutoClose = []string{ "param", "hr", "input", - "col ", + "col", "frame", "isindex", "base", @@ -1644,11 +1715,14 @@ var ( esc_amp = []byte("&") esc_lt = []byte("<") esc_gt = []byte(">") + esc_tab = []byte("	") + esc_nl = []byte("
") + esc_cr = []byte("
") ) -// Escape writes to w the properly escaped XML equivalent +// EscapeText writes to w the properly escaped XML equivalent // of the plain text data s. -func Escape(w io.Writer, s []byte) { +func EscapeText(w io.Writer, s []byte) error { var esc []byte last := 0 for i, c := range s { @@ -1663,14 +1737,34 @@ func Escape(w io.Writer, s []byte) { esc = esc_lt case '>': esc = esc_gt + case '\t': + esc = esc_tab + case '\n': + esc = esc_nl + case '\r': + esc = esc_cr default: continue } - w.Write(s[last:i]) - w.Write(esc) + if _, err := w.Write(s[last:i]); err != nil { + return err + } + if _, err := w.Write(esc); err != nil { + return err + } last = i + 1 } - w.Write(s[last:]) + if _, err := w.Write(s[last:]); err != nil { + return err + } + return nil +} + +// Escape is like EscapeText but omits the error return value. +// It is provided for backwards compatibility with Go 1.0. +// Code targeting Go 1.1 or later should use EscapeText. +func Escape(w io.Writer, s []byte) { + EscapeText(w, s) } // procInstEncoding parses the `encoding="..."` or `encoding='...'` diff --git a/src/pkg/encoding/xml/xml_test.go b/src/pkg/encoding/xml/xml_test.go index 1d0696ce0..54dab5484 100644 --- a/src/pkg/encoding/xml/xml_test.go +++ b/src/pkg/encoding/xml/xml_test.go @@ -5,6 +5,7 @@ package xml import ( + "fmt" "io" "reflect" "strings" @@ -18,6 +19,7 @@ const testInput = ` <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` + "\r\n\t" + ` > <hello lang="en">World <>'" 白鵬翔</hello> + <query>&何; &is-it;</query> <goodbye /> <outer foo:attr="value" xmlns:tag="ns4"> <inner/> @@ -27,6 +29,8 @@ const testInput = ` </tag:name> </body><!-- missing final newline -->` +var testEntity = map[string]string{"何": "What", "is-it": "is it?"} + var rawTokens = []Token{ CharData("\n"), ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, @@ -40,6 +44,10 @@ var rawTokens = []Token{ CharData("World <>'\" 白鵬翔"), EndElement{Name{"", "hello"}}, CharData("\n "), + StartElement{Name{"", "query"}, []Attr{}}, + CharData("What is it?"), + EndElement{Name{"", "query"}}, + CharData("\n "), StartElement{Name{"", "goodbye"}, []Attr{}}, EndElement{Name{"", "goodbye"}}, CharData("\n "), @@ -73,6 +81,10 @@ var cookedTokens = []Token{ CharData("World <>'\" 白鵬翔"), EndElement{Name{"ns2", "hello"}}, CharData("\n "), + StartElement{Name{"ns2", "query"}, []Attr{}}, + CharData("What is it?"), + EndElement{Name{"ns2", "query"}}, + CharData("\n "), StartElement{Name{"ns2", "goodbye"}, []Attr{}}, EndElement{Name{"ns2", "goodbye"}}, CharData("\n "), @@ -155,9 +167,65 @@ var xmlInput = []string{ func TestRawToken(t *testing.T) { d := NewDecoder(strings.NewReader(testInput)) + d.Entity = testEntity testRawToken(t, d, rawTokens) } +const nonStrictInput = ` +<tag>non&entity</tag> +<tag>&unknown;entity</tag> +<tag>{</tag> +<tag>&#zzz;</tag> +<tag>&なまえ3;</tag> +<tag><-gt;</tag> +<tag>&;</tag> +<tag>&0a;</tag> +` + +var nonStringEntity = map[string]string{"": "oops!", "0a": "oops!"} + +var nonStrictTokens = []Token{ + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("non&entity"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&unknown;entity"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("{"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&#zzz;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&なまえ3;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("<-gt;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&0a;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), +} + +func TestNonStrictRawToken(t *testing.T) { + d := NewDecoder(strings.NewReader(nonStrictInput)) + d.Strict = false + testRawToken(t, d, nonStrictTokens) +} + type downCaser struct { t *testing.T r io.ByteReader @@ -219,7 +287,18 @@ func testRawToken(t *testing.T, d *Decoder, rawTokens []Token) { t.Fatalf("token %d: unexpected error: %s", i, err) } if !reflect.DeepEqual(have, want) { - t.Errorf("token %d = %#v want %#v", i, have, want) + var shave, swant string + if _, ok := have.(CharData); ok { + shave = fmt.Sprintf("CharData(%q)", have) + } else { + shave = fmt.Sprintf("%#v", have) + } + if _, ok := want.(CharData); ok { + swant = fmt.Sprintf("CharData(%q)", want) + } else { + swant = fmt.Sprintf("%#v", want) + } + t.Errorf("token %d = %s, want %s", i, shave, swant) } } } @@ -272,6 +351,7 @@ func TestNestedDirectives(t *testing.T) { func TestToken(t *testing.T) { d := NewDecoder(strings.NewReader(testInput)) + d.Entity = testEntity for i, want := range cookedTokens { have, err := d.Token() @@ -531,8 +611,8 @@ var characterTests = []struct { {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"}, {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"}, {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"}, - {"<doc>&\x01;</doc>", "invalid character entity &;"}, - {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &;"}, + {"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"}, + {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity & (no semicolon)"}, } func TestDisallowedCharacters(t *testing.T) { @@ -576,3 +656,50 @@ func TestProcInstEncoding(t *testing.T) { } } } + +// Ensure that directives with comments include the complete +// text of any nested directives. + +var directivesWithCommentsInput = ` +<!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> +<!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]> +<!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]> +` + +var directivesWithCommentsTokens = []Token{ + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY go "Golang">]`), + CharData("\n"), + Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang">]`), + CharData("\n"), +} + +func TestDirectivesWithComments(t *testing.T) { + d := NewDecoder(strings.NewReader(directivesWithCommentsInput)) + + for i, want := range directivesWithCommentsTokens { + have, err := d.Token() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + t.Errorf("token %d = %#v want %#v", i, have, want) + } + } +} + +// Writer whose Write method always returns an error. +type errWriter struct{} + +func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") } + +func TestEscapeTextIOErrors(t *testing.T) { + expectErr := "unwritable" + err := EscapeText(errWriter{}, []byte{'A'}) + + if err == nil || err.Error() != expectErr { + t.Errorf("EscapeTest = [error] %v, want %v", err, expectErr) + } +} |