diff options
author | Amrut Joshi <amrut.joshi@gmail.com> | 2010-02-18 23:32:55 -0800 |
---|---|---|
committer | Amrut Joshi <amrut.joshi@gmail.com> | 2010-02-18 23:32:55 -0800 |
commit | 8644a7179ce64f0ffc827eb0991f3064b11efb4d (patch) | |
tree | 4309d21eeea4de79b2926c3c50b81caf64a640bb /src/pkg/xml/xml.go | |
parent | e1bc70d1ddfb8a925aa0f572081b6f681e3dee96 (diff) | |
download | golang-8644a7179ce64f0ffc827eb0991f3064b11efb4d.tar.gz |
xml: allow unquoted attribute values in non-Strict mode
HTML4 standard supports unquoted attibute values in certain cases
(http://www.w3.org/TR/REC-html40/intro/sgmltut.html#h-3.2.2).
R=rsc
CC=golang-dev
http://codereview.appspot.com/207095
Committer: Russ Cox <rsc@golang.org>
Diffstat (limited to 'src/pkg/xml/xml.go')
-rw-r--r-- | src/pkg/xml/xml.go | 43 |
1 files changed, 35 insertions, 8 deletions
diff --git a/src/pkg/xml/xml.go b/src/pkg/xml/xml.go index 33a86a255..67cbb824f 100644 --- a/src/pkg/xml/xml.go +++ b/src/pkg/xml/xml.go @@ -589,14 +589,7 @@ func (p *Parser) RawToken() (Token, os.Error) { return nil, p.err } p.space() - if b, ok = p.mustgetc(); !ok { - return nil, p.err - } - if b != '"' && b != '\'' { - p.err = SyntaxError("unquoted or missing attribute value in element") - return nil, p.err - } - data := p.text(int(b), false) + data := p.attrval() if data == nil { return nil, p.err } @@ -610,6 +603,40 @@ func (p *Parser) RawToken() (Token, os.Error) { return StartElement{name, attr}, nil } +func (p *Parser) attrval() []byte { + b, ok := p.mustgetc() + if !ok { + return nil + } + // Handle quoted attribute values + if b == '"' || b == '\'' { + return p.text(int(b), false) + } + // Handle unquoted attribute values for strict parsers + if p.Strict { + p.err = SyntaxError("unquoted or missing attribute value in element") + return nil + } + // Handle unquoted attribute values for unstrict parsers + p.ungetc(b) + p.buf.Reset() + for { + b, ok = p.mustgetc() + if !ok { + return nil + } + // http://www.w3.org/TR/REC-html40/intro/sgmltut.html#h-3.2.2 + if 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z' || + '0' <= b && b <= '9' || b == '_' || b == ':' || b == '-' { + p.buf.WriteByte(b) + } else { + p.ungetc(b) + break + } + } + return p.buf.Bytes() +} + // Skip spaces if any func (p *Parser) space() { for { |