summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAmrut Joshi <amrut.joshi@gmail.com>2010-02-18 23:32:55 -0800
committerAmrut Joshi <amrut.joshi@gmail.com>2010-02-18 23:32:55 -0800
commit8644a7179ce64f0ffc827eb0991f3064b11efb4d (patch)
tree4309d21eeea4de79b2926c3c50b81caf64a640bb /src
parente1bc70d1ddfb8a925aa0f572081b6f681e3dee96 (diff)
downloadgolang-8644a7179ce64f0ffc827eb0991f3064b11efb4d.tar.gz
xml: allow unquoted attribute values in non-Strict mode
HTML4 standard supports unquoted attibute values in certain cases (http://www.w3.org/TR/REC-html40/intro/sgmltut.html#h-3.2.2). R=rsc CC=golang-dev http://codereview.appspot.com/207095 Committer: Russ Cox <rsc@golang.org>
Diffstat (limited to 'src')
-rw-r--r--src/pkg/xml/xml.go43
-rw-r--r--src/pkg/xml/xml_test.go20
2 files changed, 55 insertions, 8 deletions
diff --git a/src/pkg/xml/xml.go b/src/pkg/xml/xml.go
index 33a86a255..67cbb824f 100644
--- a/src/pkg/xml/xml.go
+++ b/src/pkg/xml/xml.go
@@ -589,14 +589,7 @@ func (p *Parser) RawToken() (Token, os.Error) {
return nil, p.err
}
p.space()
- if b, ok = p.mustgetc(); !ok {
- return nil, p.err
- }
- if b != '"' && b != '\'' {
- p.err = SyntaxError("unquoted or missing attribute value in element")
- return nil, p.err
- }
- data := p.text(int(b), false)
+ data := p.attrval()
if data == nil {
return nil, p.err
}
@@ -610,6 +603,40 @@ func (p *Parser) RawToken() (Token, os.Error) {
return StartElement{name, attr}, nil
}
+func (p *Parser) attrval() []byte {
+ b, ok := p.mustgetc()
+ if !ok {
+ return nil
+ }
+ // Handle quoted attribute values
+ if b == '"' || b == '\'' {
+ return p.text(int(b), false)
+ }
+ // Handle unquoted attribute values for strict parsers
+ if p.Strict {
+ p.err = SyntaxError("unquoted or missing attribute value in element")
+ return nil
+ }
+ // Handle unquoted attribute values for unstrict parsers
+ p.ungetc(b)
+ p.buf.Reset()
+ for {
+ b, ok = p.mustgetc()
+ if !ok {
+ return nil
+ }
+ // http://www.w3.org/TR/REC-html40/intro/sgmltut.html#h-3.2.2
+ if 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z' ||
+ '0' <= b && b <= '9' || b == '_' || b == ':' || b == '-' {
+ p.buf.WriteByte(b)
+ } else {
+ p.ungetc(b)
+ break
+ }
+ }
+ return p.buf.Bytes()
+}
+
// Skip spaces if any
func (p *Parser) space() {
for {
diff --git a/src/pkg/xml/xml_test.go b/src/pkg/xml/xml_test.go
index fa1949500..a4c55b73d 100644
--- a/src/pkg/xml/xml_test.go
+++ b/src/pkg/xml/xml_test.go
@@ -298,3 +298,23 @@ func TestIssue569(t *testing.T) {
t.Fatalf("Expecting abcd")
}
}
+
+func TestUnquotedAttrs(t *testing.T) {
+ data := "<tag attr=azAZ09:-_\t>"
+ p := NewParser(StringReader(data))
+ p.Strict = false
+ token, err := p.Token()
+ if _, ok := err.(SyntaxError); ok {
+ t.Errorf("Unexpected error: %v", err)
+ }
+ if token.(StartElement).Name.Local != "tag" {
+ t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
+ }
+ attr := token.(StartElement).Attr[0]
+ if attr.Value != "azAZ09:-_" {
+ t.Errorf("Unexpected attribute value: %v", attr.Value)
+ }
+ if attr.Name.Local != "attr" {
+ t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
+ }
+}