diff options
-rw-r--r-- | src/pkg/xml/xml.go | 68 | ||||
-rw-r--r-- | src/pkg/xml/xml_test.go | 63 |
2 files changed, 104 insertions, 27 deletions
diff --git a/src/pkg/xml/xml.go b/src/pkg/xml/xml.go index 1a8d89ab8..360a39863 100644 --- a/src/pkg/xml/xml.go +++ b/src/pkg/xml/xml.go @@ -409,7 +409,7 @@ func (p *Parser) RawToken() (Token, os.Error) { return CharData(data), nil; } - if b, ok = p.getc(); !ok { + if b, ok = p.mustgetc(); !ok { return nil, p.err } switch b { @@ -423,7 +423,7 @@ func (p *Parser) RawToken() (Token, os.Error) { return nil, p.err; } p.space(); - if b, ok = p.getc(); !ok { + if b, ok = p.mustgetc(); !ok { return nil, p.err } if b != '>' { @@ -438,17 +438,17 @@ func (p *Parser) RawToken() (Token, os.Error) { // the version is 1.0 and the encoding is UTF-8. var target string; if target, ok = p.name(); !ok { - return nil, p.err + if p.err == nil { + p.err = SyntaxError("expected target name after <?") + } + return nil, p.err; } p.space(); p.buf.Reset(); var b0 byte; for { - if b, ok = p.getc(); !ok { - if p.err == os.EOF { - p.err = SyntaxError("unterminated <? directive") - } - return nil, p.err; + if b, ok = p.mustgetc(); !ok { + return nil, p.err } p.buf.WriteByte(b); if b0 == '?' && b == '>' { @@ -462,13 +462,13 @@ func (p *Parser) RawToken() (Token, os.Error) { case '!': // <!: Maybe comment, maybe CDATA. - if b, ok = p.getc(); !ok { + if b, ok = p.mustgetc(); !ok { return nil, p.err } switch b { case '-': // <!- // Probably <!-- for a comment. - if b, ok = p.getc(); !ok { + if b, ok = p.mustgetc(); !ok { return nil, p.err } if b != '-' { @@ -479,11 +479,8 @@ func (p *Parser) RawToken() (Token, os.Error) { p.buf.Reset(); var b0, b1 byte; for { - if b, ok = p.getc(); !ok { - if p.err == os.EOF { - p.err = SyntaxError("unterminated <!-- comment") - } - return nil, p.err; + if b, ok = p.mustgetc(); !ok { + return nil, p.err } p.buf.WriteByte(b); if b0 == '-' && b1 == '-' && b == '>' { @@ -498,7 +495,7 @@ func (p *Parser) RawToken() (Token, os.Error) { case '[': // <![ // Probably <![CDATA[. for i := 0; i < 6; i++ { - if b, ok = p.getc(); !ok { + if b, ok = p.mustgetc(); !ok { return nil, p.err } if b != "CDATA["[i] { @@ -519,7 +516,7 @@ func (p *Parser) RawToken() (Token, os.Error) { p.buf.Reset(); p.buf.WriteByte(b); for { - if b, ok = p.getc(); !ok { + if b, ok = p.mustgetc(); !ok { return nil, p.err } if b == '>' { @@ -548,12 +545,12 @@ func (p *Parser) RawToken() (Token, os.Error) { attr = make([]Attr, 0, 4); for { p.space(); - if b, ok = p.getc(); !ok { + if b, ok = p.mustgetc(); !ok { return nil, p.err } if b == '/' { empty = true; - if b, ok = p.getc(); !ok { + if b, ok = p.mustgetc(); !ok { return nil, p.err } if b != '>' { @@ -584,7 +581,7 @@ func (p *Parser) RawToken() (Token, os.Error) { return nil, p.err; } p.space(); - if b, ok = p.getc(); !ok { + if b, ok = p.mustgetc(); !ok { return nil, p.err } if b != '=' { @@ -592,7 +589,7 @@ func (p *Parser) RawToken() (Token, os.Error) { return nil, p.err; } p.space(); - if b, ok = p.getc(); !ok { + if b, ok = p.mustgetc(); !ok { return nil, p.err } if b != '"' && b != '\'' { @@ -652,6 +649,19 @@ func (p *Parser) getc() (b byte, ok bool) { return b, true; } +// Must read a single byte. +// If there is no byte to read, +// set p.err to SyntaxError("unexpected EOF") +// and return ok==false +func (p *Parser) mustgetc() (b byte, ok bool) { + if b, ok = p.getc(); !ok { + if p.err == os.EOF { + p.err = SyntaxError("unexpected EOF") + } + } + return; +} + // Unread a single byte. func (p *Parser) ungetc(b byte) { if b == '\n' { @@ -678,7 +688,7 @@ func (p *Parser) text(quote int, cdata bool) []byte { p.buf.Reset(); Input: for { - b, ok := p.getc(); + b, ok := p.mustgetc(); if !ok { return nil } @@ -717,7 +727,10 @@ Input: for i = 0; i < len(p.tmp); i++ { p.tmp[i], p.err = p.r.ReadByte(); if p.err != nil { - return nil + if p.err == os.EOF { + p.err = SyntaxError("unexpected EOF") + } + return nil; } c := p.tmp[i]; if c == ';' { @@ -819,22 +832,23 @@ func (p *Parser) nsname() (name Name, ok bool) { } // Get name: /first(first|second)*/ -// Do not set p.err if the name is missing: let the caller provide better context. +// Do not set p.err if the name is missing (unless unexpected EOF is received): +// let the caller provide better context. func (p *Parser) name() (s string, ok bool) { var b byte; - if b, ok = p.getc(); !ok { + if b, ok = p.mustgetc(); !ok { return } // As a first approximation, we gather the bytes [A-Za-z_:.-\x80-\xFF]* if b < utf8.RuneSelf && !isNameByte(b) { p.ungetc(b); - return; + return "", false; } p.buf.Reset(); p.buf.WriteByte(b); for { - if b, ok = p.getc(); !ok { + if b, ok = p.mustgetc(); !ok { return } if b < utf8.RuneSelf && !isNameByte(b) { diff --git a/src/pkg/xml/xml_test.go b/src/pkg/xml/xml_test.go index 837385b6b..e689949af 100644 --- a/src/pkg/xml/xml_test.go +++ b/src/pkg/xml/xml_test.go @@ -94,6 +94,57 @@ var cookedTokens = []Token{ Comment(strings.Bytes(" missing final newline ")), } +var xmlInput = []string{ + // unexpected EOF cases + "<", + "<t", + "<t ", + "<t/", + "<t/>c", + "<!", + "<!-", + "<!--", + "<!--c-", + "<!--c--", + "<!d", + "<t></", + "<t></t", + "<?", + "<?p", + "<t a", + "<t a=", + "<t a='", + "<t a=''", + "<t/><![", + "<t/><![C", + "<t/><![CDATA[d", + "<t/><![CDATA[d]", + "<t/><![CDATA[d]]", + + // other Syntax errors + " ", + ">", + "<>", + "<t/a", + "<0 />", + "<?0 >", + // "<!0 >", // let the Token() caller handle + "</0>", + "<t 0=''>", + "<t a='&'>", + "<t a='<'>", + "<t> c;</t>", + "<t a>", + "<t a=>", + "<t a=v>", + // "<![CDATA[d]]>", // let the Token() caller handle + "cdata", + "<t></e>", + "<t></>", + "<t></t!", + "<t>cdata]]></t>", +} + type stringReader struct { s string; off int; @@ -149,3 +200,15 @@ func TestToken(t *testing.T) { } } } + +func TestSyntax(t *testing.T) { + for i := range xmlInput { + p := NewParser(StringReader(xmlInput[i])); + var err os.Error; + for _, err = p.Token(); err == nil; _, err = p.Token() { + } + if _, ok := err.(SyntaxError); !ok { + t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i]) + } + } +} |