diff options
Diffstat (limited to 'src/pkg/encoding/xml/read.go')
-rw-r--r-- | src/pkg/encoding/xml/read.go | 531 |
1 files changed, 531 insertions, 0 deletions
diff --git a/src/pkg/encoding/xml/read.go b/src/pkg/encoding/xml/read.go new file mode 100644 index 000000000..c21682420 --- /dev/null +++ b/src/pkg/encoding/xml/read.go @@ -0,0 +1,531 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml + +import ( + "bytes" + "errors" + "reflect" + "strconv" + "strings" + "time" +) + +// BUG(rsc): Mapping between XML elements and data structures is inherently flawed: +// an XML element is an order-dependent collection of anonymous +// values, while a data structure is an order-independent collection +// of named values. +// See package json for a textual representation more suitable +// to data structures. + +// Unmarshal parses the XML-encoded data and stores the result in +// the value pointed to by v, which must be an arbitrary struct, +// slice, or string. Well-formed data that does not fit into v is +// discarded. +// +// Because Unmarshal uses the reflect package, it can only assign +// to exported (upper case) fields. Unmarshal uses a case-sensitive +// comparison to match XML element names to tag values and struct +// field names. +// +// Unmarshal maps an XML element to a struct using the following rules. +// In the rules, the tag of a field refers to the value associated with the +// key 'xml' in the struct field's tag (see the example above). +// +// * If the struct has a field of type []byte or string with tag +// ",innerxml", Unmarshal accumulates the raw XML nested inside the +// element in that field. The rest of the rules still apply. +// +// * If the struct has a field named XMLName of type xml.Name, +// Unmarshal records the element name in that field. +// +// * If the XMLName field has an associated tag of the form +// "name" or "namespace-URL name", the XML element must have +// the given name (and, optionally, name space) or else Unmarshal +// returns an error. +// +// * If the XML element has an attribute whose name matches a +// struct field name with an associated tag containing ",attr" or +// the explicit name in a struct field tag of the form "name,attr", +// Unmarshal records the attribute value in that field. +// +// * If the XML element contains character data, that data is +// accumulated in the first struct field that has tag "chardata". +// The struct field may have type []byte or string. +// If there is no such field, the character data is discarded. +// +// * If the XML element contains comments, they are accumulated in +// the first struct field that has tag ",comments". The struct +// field may have type []byte or string. If there is no such +// field, the comments are discarded. +// +// * If the XML element contains a sub-element whose name matches +// the prefix of a tag formatted as "a" or "a>b>c", unmarshal +// will descend into the XML structure looking for elements with the +// given names, and will map the innermost elements to that struct +// field. A tag starting with ">" is equivalent to one starting +// with the field name followed by ">". +// +// * If the XML element contains a sub-element whose name matches +// a struct field's XMLName tag and the struct field has no +// explicit name tag as per the previous rule, unmarshal maps +// the sub-element to that struct field. +// +// * If the XML element contains a sub-element whose name matches a +// field without any mode flags (",attr", ",chardata", etc), Unmarshal +// maps the sub-element to that struct field. +// +// * If the XML element contains a sub-element that hasn't matched any +// of the above rules and the struct has a field with tag ",any", +// unmarshal maps the sub-element to that struct field. +// +// * A non-pointer anonymous struct field is handled as if the +// fields of its value were part of the outer struct. +// +// * A struct field with tag "-" is never unmarshalled into. +// +// Unmarshal maps an XML element to a string or []byte by saving the +// concatenation of that element's character data in the string or +// []byte. The saved []byte is never nil. +// +// Unmarshal maps an attribute value to a string or []byte by saving +// the value in the string or slice. +// +// Unmarshal maps an XML element to a slice by extending the length of +// the slice and mapping the element to the newly created value. +// +// Unmarshal maps an XML element or attribute value to a bool by +// setting it to the boolean value represented by the string. +// +// Unmarshal maps an XML element or attribute value to an integer or +// floating-point field by setting the field to the result of +// interpreting the string value in decimal. There is no check for +// overflow. +// +// Unmarshal maps an XML element to an xml.Name by recording the +// element name. +// +// Unmarshal maps an XML element to a pointer by setting the pointer +// to a freshly allocated value and then mapping the element to that value. +// +func Unmarshal(data []byte, v interface{}) error { + return NewDecoder(bytes.NewBuffer(data)).Decode(v) +} + +// Decode works like xml.Unmarshal, except it reads the decoder +// stream to find the start element. +func (d *Decoder) Decode(v interface{}) error { + return d.DecodeElement(v, nil) +} + +// DecodeElement works like xml.Unmarshal except that it takes +// a pointer to the start XML element to decode into v. +// It is useful when a client reads some raw XML tokens itself +// but also wants to defer to Unmarshal for some elements. +func (d *Decoder) DecodeElement(v interface{}, start *StartElement) error { + val := reflect.ValueOf(v) + if val.Kind() != reflect.Ptr { + return errors.New("non-pointer passed to Unmarshal") + } + return d.unmarshal(val.Elem(), start) +} + +// An UnmarshalError represents an error in the unmarshalling process. +type UnmarshalError string + +func (e UnmarshalError) Error() string { return string(e) } + +// Unmarshal a single XML element into val. +func (p *Decoder) unmarshal(val reflect.Value, start *StartElement) error { + // Find start element if we need it. + if start == nil { + for { + tok, err := p.Token() + if err != nil { + return err + } + if t, ok := tok.(StartElement); ok { + start = &t + break + } + } + } + + if pv := val; pv.Kind() == reflect.Ptr { + if pv.IsNil() { + pv.Set(reflect.New(pv.Type().Elem())) + } + val = pv.Elem() + } + + var ( + data []byte + saveData reflect.Value + comment []byte + saveComment reflect.Value + saveXML reflect.Value + saveXMLIndex int + saveXMLData []byte + saveAny reflect.Value + sv reflect.Value + tinfo *typeInfo + err error + ) + + switch v := val; v.Kind() { + default: + return errors.New("unknown type " + v.Type().String()) + + case reflect.Interface: + // TODO: For now, simply ignore the field. In the near + // future we may choose to unmarshal the start + // element on it, if not nil. + return p.Skip() + + case reflect.Slice: + typ := v.Type() + if typ.Elem().Kind() == reflect.Uint8 { + // []byte + saveData = v + break + } + + // Slice of element values. + // Grow slice. + n := v.Len() + if n >= v.Cap() { + ncap := 2 * n + if ncap < 4 { + ncap = 4 + } + new := reflect.MakeSlice(typ, n, ncap) + reflect.Copy(new, v) + v.Set(new) + } + v.SetLen(n + 1) + + // Recur to read element into slice. + if err := p.unmarshal(v.Index(n), start); err != nil { + v.SetLen(n) + return err + } + return nil + + case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.String: + saveData = v + + case reflect.Struct: + typ := v.Type() + if typ == nameType { + v.Set(reflect.ValueOf(start.Name)) + break + } + if typ == timeType { + saveData = v + break + } + + sv = v + tinfo, err = getTypeInfo(typ) + if err != nil { + return err + } + + // Validate and assign element name. + if tinfo.xmlname != nil { + finfo := tinfo.xmlname + if finfo.name != "" && finfo.name != start.Name.Local { + return UnmarshalError("expected element type <" + finfo.name + "> but have <" + start.Name.Local + ">") + } + if finfo.xmlns != "" && finfo.xmlns != start.Name.Space { + e := "expected element <" + finfo.name + "> in name space " + finfo.xmlns + " but have " + if start.Name.Space == "" { + e += "no name space" + } else { + e += start.Name.Space + } + return UnmarshalError(e) + } + fv := sv.FieldByIndex(finfo.idx) + if _, ok := fv.Interface().(Name); ok { + fv.Set(reflect.ValueOf(start.Name)) + } + } + + // Assign attributes. + // Also, determine whether we need to save character data or comments. + for i := range tinfo.fields { + finfo := &tinfo.fields[i] + switch finfo.flags & fMode { + case fAttr: + strv := sv.FieldByIndex(finfo.idx) + // Look for attribute. + for _, a := range start.Attr { + if a.Name.Local == finfo.name { + copyValue(strv, []byte(a.Value)) + break + } + } + + case fCharData: + if !saveData.IsValid() { + saveData = sv.FieldByIndex(finfo.idx) + } + + case fComment: + if !saveComment.IsValid() { + saveComment = sv.FieldByIndex(finfo.idx) + } + + case fAny: + if !saveAny.IsValid() { + saveAny = sv.FieldByIndex(finfo.idx) + } + + case fInnerXml: + if !saveXML.IsValid() { + saveXML = sv.FieldByIndex(finfo.idx) + if p.saved == nil { + saveXMLIndex = 0 + p.saved = new(bytes.Buffer) + } else { + saveXMLIndex = p.savedOffset() + } + } + } + } + } + + // Find end element. + // Process sub-elements along the way. +Loop: + for { + var savedOffset int + if saveXML.IsValid() { + savedOffset = p.savedOffset() + } + tok, err := p.Token() + if err != nil { + return err + } + switch t := tok.(type) { + case StartElement: + consumed := false + if sv.IsValid() { + consumed, err = p.unmarshalPath(tinfo, sv, nil, &t) + if err != nil { + return err + } + if !consumed && saveAny.IsValid() { + consumed = true + if err := p.unmarshal(saveAny, &t); err != nil { + return err + } + } + } + if !consumed { + if err := p.Skip(); err != nil { + return err + } + } + + case EndElement: + if saveXML.IsValid() { + saveXMLData = p.saved.Bytes()[saveXMLIndex:savedOffset] + if saveXMLIndex == 0 { + p.saved = nil + } + } + break Loop + + case CharData: + if saveData.IsValid() { + data = append(data, t...) + } + + case Comment: + if saveComment.IsValid() { + comment = append(comment, t...) + } + } + } + + if err := copyValue(saveData, data); err != nil { + return err + } + + switch t := saveComment; t.Kind() { + case reflect.String: + t.SetString(string(comment)) + case reflect.Slice: + t.Set(reflect.ValueOf(comment)) + } + + switch t := saveXML; t.Kind() { + case reflect.String: + t.SetString(string(saveXMLData)) + case reflect.Slice: + t.Set(reflect.ValueOf(saveXMLData)) + } + + return nil +} + +func copyValue(dst reflect.Value, src []byte) (err error) { + // Helper functions for integer and unsigned integer conversions + var itmp int64 + getInt64 := func() bool { + itmp, err = strconv.ParseInt(string(src), 10, 64) + // TODO: should check sizes + return err == nil + } + var utmp uint64 + getUint64 := func() bool { + utmp, err = strconv.ParseUint(string(src), 10, 64) + // TODO: check for overflow? + return err == nil + } + var ftmp float64 + getFloat64 := func() bool { + ftmp, err = strconv.ParseFloat(string(src), 64) + // TODO: check for overflow? + return err == nil + } + + // Save accumulated data. + switch t := dst; t.Kind() { + case reflect.Invalid: + // Probably a comment. + default: + return errors.New("cannot happen: unknown type " + t.Type().String()) + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + if !getInt64() { + return err + } + t.SetInt(itmp) + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + if !getUint64() { + return err + } + t.SetUint(utmp) + case reflect.Float32, reflect.Float64: + if !getFloat64() { + return err + } + t.SetFloat(ftmp) + case reflect.Bool: + value, err := strconv.ParseBool(strings.TrimSpace(string(src))) + if err != nil { + return err + } + t.SetBool(value) + case reflect.String: + t.SetString(string(src)) + case reflect.Slice: + if len(src) == 0 { + // non-nil to flag presence + src = []byte{} + } + t.SetBytes(src) + case reflect.Struct: + if t.Type() == timeType { + tv, err := time.Parse(time.RFC3339, string(src)) + if err != nil { + return err + } + t.Set(reflect.ValueOf(tv)) + } + } + return nil +} + +// unmarshalPath walks down an XML structure looking for wanted +// paths, and calls unmarshal on them. +// The consumed result tells whether XML elements have been consumed +// from the Decoder until start's matching end element, or if it's +// still untouched because start is uninteresting for sv's fields. +func (p *Decoder) unmarshalPath(tinfo *typeInfo, sv reflect.Value, parents []string, start *StartElement) (consumed bool, err error) { + recurse := false +Loop: + for i := range tinfo.fields { + finfo := &tinfo.fields[i] + if finfo.flags&fElement == 0 || len(finfo.parents) < len(parents) { + continue + } + for j := range parents { + if parents[j] != finfo.parents[j] { + continue Loop + } + } + if len(finfo.parents) == len(parents) && finfo.name == start.Name.Local { + // It's a perfect match, unmarshal the field. + return true, p.unmarshal(sv.FieldByIndex(finfo.idx), start) + } + if len(finfo.parents) > len(parents) && finfo.parents[len(parents)] == start.Name.Local { + // It's a prefix for the field. Break and recurse + // since it's not ok for one field path to be itself + // the prefix for another field path. + recurse = true + + // We can reuse the same slice as long as we + // don't try to append to it. + parents = finfo.parents[:len(parents)+1] + break + } + } + if !recurse { + // We have no business with this element. + return false, nil + } + // The element is not a perfect match for any field, but one + // or more fields have the path to this element as a parent + // prefix. Recurse and attempt to match these. + for { + var tok Token + tok, err = p.Token() + if err != nil { + return true, err + } + switch t := tok.(type) { + case StartElement: + consumed2, err := p.unmarshalPath(tinfo, sv, parents, &t) + if err != nil { + return true, err + } + if !consumed2 { + if err := p.Skip(); err != nil { + return true, err + } + } + case EndElement: + return true, nil + } + } + panic("unreachable") +} + +// Skip reads tokens until it has consumed the end element +// matching the most recent start element already consumed. +// It recurs if it encounters a start element, so it can be used to +// skip nested structures. +// It returns nil if it finds an end element matching the start +// element; otherwise it returns an error describing the problem. +func (d *Decoder) Skip() error { + for { + tok, err := d.Token() + if err != nil { + return err + } + switch tok.(type) { + case StartElement: + if err := d.Skip(); err != nil { + return err + } + case EndElement: + return nil + } + } + panic("unreachable") +} |