diff options
| author | Ondřej Surý <ondrej@sury.org> | 2011-04-28 10:35:15 +0200 | 
|---|---|---|
| committer | Ondřej Surý <ondrej@sury.org> | 2011-04-28 10:35:15 +0200 | 
| commit | c1ba1a0fec4aed430709030f98a3bdb90bfeea16 (patch) | |
| tree | 3df18657e50a0313ed6defcda30e4474cb28a467 /src/pkg/xml/xml.go | |
| parent | 7b15ed9ef455b6b66c6b376898a88aef5d6a9970 (diff) | |
| download | golang-c1ba1a0fec4aed430709030f98a3bdb90bfeea16.tar.gz | |
Imported Upstream version 2011.04.27upstream/2011.04.27
Diffstat (limited to 'src/pkg/xml/xml.go')
| -rw-r--r-- | src/pkg/xml/xml.go | 73 | 
1 files changed, 62 insertions, 11 deletions
| diff --git a/src/pkg/xml/xml.go b/src/pkg/xml/xml.go index f92abe825..42d8b986e 100644 --- a/src/pkg/xml/xml.go +++ b/src/pkg/xml/xml.go @@ -163,6 +163,13 @@ type Parser struct {  	//	"quot": `"`,  	Entity map[string]string +	// CharsetReader, if non-nil, defines a function to generate +	// charset-conversion readers, converting from the provided +	// non-UTF-8 charset into UTF-8. If CharsetReader is nil or +	// returns an error, parsing stops with an error. One of the +	// the CharsetReader's result values must be non-nil. +	CharsetReader func(charset string, input io.Reader) (io.Reader, os.Error) +  	r         io.ByteReader  	buf       bytes.Buffer  	saved     *bytes.Buffer @@ -186,17 +193,7 @@ func NewParser(r io.Reader) *Parser {  		line:     1,  		Strict:   true,  	} - -	// Get efficient byte at a time reader. -	// Assume that if reader has its own -	// ReadByte, it's efficient enough. -	// Otherwise, use bufio. -	if rb, ok := r.(io.ByteReader); ok { -		p.r = rb -	} else { -		p.r = bufio.NewReader(r) -	} - +	p.switchToReader(r)  	return p  } @@ -290,6 +287,18 @@ func (p *Parser) translate(n *Name, isElementName bool) {  	}  } +func (p *Parser) switchToReader(r io.Reader) { +	// Get efficient byte at a time reader. +	// Assume that if reader has its own +	// ReadByte, it's efficient enough. +	// Otherwise, use bufio. +	if rb, ok := r.(io.ByteReader); ok { +		p.r = rb +	} else { +		p.r = bufio.NewReader(r) +	} +} +  // Parsing state - stack holds old name space translations  // and the current set of open elements.  The translations to pop when  // ending a given tag are *below* it on the stack, which is @@ -487,6 +496,25 @@ func (p *Parser) RawToken() (Token, os.Error) {  		}  		data := p.buf.Bytes()  		data = data[0 : len(data)-2] // chop ?> + +		if target == "xml" { +			enc := procInstEncoding(string(data)) +			if enc != "" && enc != "utf-8" && enc != "UTF-8" { +				if p.CharsetReader == nil { +					p.err = fmt.Errorf("xml: encoding %q declared but Parser.CharsetReader is nil", enc) +					return nil, p.err +				} +				newr, err := p.CharsetReader(enc, p.r.(io.Reader)) +				if err != nil { +					p.err = fmt.Errorf("xml: opening charset %q: %v", enc, err) +					return nil, p.err +				} +				if newr == nil { +					panic("CharsetReader returned a nil Reader for charset " + enc) +				} +				p.switchToReader(newr) +			} +		}  		return ProcInst{target, data}, nil  	case '!': @@ -1633,3 +1661,26 @@ func Escape(w io.Writer, s []byte) {  	}  	w.Write(s[last:])  } + +// procInstEncoding parses the `encoding="..."` or `encoding='...'` +// value out of the provided string, returning "" if not found. +func procInstEncoding(s string) string { +	// TODO: this parsing is somewhat lame and not exact. +	// It works for all actual cases, though. +	idx := strings.Index(s, "encoding=") +	if idx == -1 { +		return "" +	} +	v := s[idx+len("encoding="):] +	if v == "" { +		return "" +	} +	if v[0] != '\'' && v[0] != '"' { +		return "" +	} +	idx = strings.IndexRune(v[1:], int(v[0])) +	if idx == -1 { +		return "" +	} +	return v[1 : idx+1] +} | 
