diff options
Diffstat (limited to 'src/pkg/html')
49 files changed, 6620 insertions, 327 deletions
diff --git a/src/pkg/html/Makefile b/src/pkg/html/Makefile index 00e1c0550..28dc1a3f5 100644 --- a/src/pkg/html/Makefile +++ b/src/pkg/html/Makefile @@ -6,9 +6,11 @@ include ../../Make.inc TARG=html GOFILES=\ + const.go\ doc.go\ entity.go\ escape.go\ + node.go\ parse.go\ token.go\ diff --git a/src/pkg/html/const.go b/src/pkg/html/const.go new file mode 100644 index 000000000..9078d2601 --- /dev/null +++ b/src/pkg/html/const.go @@ -0,0 +1,90 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package html + +// Section 11.2.3.2 of the HTML5 specification says "The following elements +// have varying levels of special parsing rules". +// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements +var isSpecialElement = map[string]bool{ + "address": true, + "applet": true, + "area": true, + "article": true, + "aside": true, + "base": true, + "basefont": true, + "bgsound": true, + "blockquote": true, + "body": true, + "br": true, + "button": true, + "caption": true, + "center": true, + "col": true, + "colgroup": true, + "command": true, + "dd": true, + "details": true, + "dir": true, + "div": true, + "dl": true, + "dt": true, + "embed": true, + "fieldset": true, + "figcaption": true, + "figure": true, + "footer": true, + "form": true, + "frame": true, + "frameset": true, + "h1": true, + "h2": true, + "h3": true, + "h4": true, + "h5": true, + "h6": true, + "head": true, + "header": true, + "hgroup": true, + "hr": true, + "html": true, + "iframe": true, + "img": true, + "input": true, + "isindex": true, + "li": true, + "link": true, + "listing": true, + "marquee": true, + "menu": true, + "meta": true, + "nav": true, + "noembed": true, + "noframes": true, + "noscript": true, + "object": true, + "ol": true, + "p": true, + "param": true, + "plaintext": true, + "pre": true, + "script": true, + "section": true, + "select": true, + "style": true, + "summary": true, + "table": true, + "tbody": true, + "td": true, + "textarea": true, + "tfoot": true, + "th": true, + "thead": true, + "title": true, + "tr": true, + "ul": true, + "wbr": true, + "xmp": true, +} diff --git a/src/pkg/html/entity.go b/src/pkg/html/entity.go index 1530290cb..21263e22d 100644 --- a/src/pkg/html/entity.go +++ b/src/pkg/html/entity.go @@ -4,6 +4,9 @@ package html +// All entities that do not end with ';' are 6 or fewer bytes long. +const longestEntityWithoutSemicolon = 6 + // entity is a map from HTML entity names to their values. The semicolon matters: // http://www.whatwg.org/specs/web-apps/current-work/multipage/named-character-references.html // lists both "amp" and "amp;" as two separate entries. diff --git a/src/pkg/html/entity_test.go b/src/pkg/html/entity_test.go index a1eb4d4f0..2cf49d61d 100644 --- a/src/pkg/html/entity_test.go +++ b/src/pkg/html/entity_test.go @@ -17,6 +17,9 @@ func TestEntityLength(t *testing.T) { if 1+len(k) < utf8.RuneLen(v) { t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v)) } + if len(k) > longestEntityWithoutSemicolon && k[len(k)-1] != ';' { + t.Errorf("entity name %s is %d characters, but longestEntityWithoutSemicolon=%d", k, len(k), longestEntityWithoutSemicolon) + } } for k, v := range entity2 { if 1+len(k) < utf8.RuneLen(v[0])+utf8.RuneLen(v[1]) { diff --git a/src/pkg/html/escape.go b/src/pkg/html/escape.go index 2799f6908..0de97c5ac 100644 --- a/src/pkg/html/escape.go +++ b/src/pkg/html/escape.go @@ -53,7 +53,8 @@ var replacementTable = [...]int{ // unescapeEntity reads an entity like "<" from b[src:] and writes the // corresponding "<" to b[dst:], returning the incremented dst and src cursors. // Precondition: b[src] == '&' && dst <= src. -func unescapeEntity(b []byte, dst, src int) (dst1, src1 int) { +// attribute should be true if parsing an attribute value. +func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) { // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#consume-a-character-reference // i starts at 1 because we already know that s[0] == '&'. @@ -121,12 +122,11 @@ func unescapeEntity(b []byte, dst, src int) (dst1, src1 int) { // Consume the maximum number of characters possible, with the // consumed characters matching one of the named references. - // TODO(nigeltao): unescape("¬it;") should be "¬it;" for i < len(s) { c := s[i] i++ // Lower-cased characters are more common in entities, so we check for them first. - if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' { + if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' { continue } if c != ';' { @@ -136,11 +136,25 @@ func unescapeEntity(b []byte, dst, src int) (dst1, src1 int) { } entityName := string(s[1:i]) - if x := entity[entityName]; x != 0 { + if entityName == "" { + // No-op. + } else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' { + // No-op. + } else if x := entity[entityName]; x != 0 { return dst + utf8.EncodeRune(b[dst:], x), src + i - } else if x := entity2[entityName]; x[0] != 0 { // Check if it's a two-character entity. + } else if x := entity2[entityName]; x[0] != 0 { dst1 := dst + utf8.EncodeRune(b[dst:], x[0]) return dst1 + utf8.EncodeRune(b[dst1:], x[1]), src + i + } else if !attribute { + maxLen := len(entityName) - 1 + if maxLen > longestEntityWithoutSemicolon { + maxLen = longestEntityWithoutSemicolon + } + for j := maxLen; j > 1; j-- { + if x := entity[entityName[:j]]; x != 0 { + return dst + utf8.EncodeRune(b[dst:], x), src + j + 1 + } + } } dst1, src1 = dst+i, src+i @@ -152,11 +166,11 @@ func unescapeEntity(b []byte, dst, src int) (dst1, src1 int) { func unescape(b []byte) []byte { for i, c := range b { if c == '&' { - dst, src := unescapeEntity(b, i, i) + dst, src := unescapeEntity(b, i, i, false) for src < len(b) { c := b[src] if c == '&' { - dst, src = unescapeEntity(b, dst, src) + dst, src = unescapeEntity(b, dst, src, false) } else { b[dst] = c dst, src = dst+1, src+1 diff --git a/src/pkg/html/node.go b/src/pkg/html/node.go new file mode 100644 index 000000000..595afd569 --- /dev/null +++ b/src/pkg/html/node.go @@ -0,0 +1,146 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package html + +// A NodeType is the type of a Node. +type NodeType int + +const ( + ErrorNode NodeType = iota + TextNode + DocumentNode + ElementNode + CommentNode + scopeMarkerNode +) + +// Section 11.2.3.3 says "scope markers are inserted when entering applet +// elements, buttons, object elements, marquees, table cells, and table +// captions, and are used to prevent formatting from 'leaking'". +var scopeMarker = Node{Type: scopeMarkerNode} + +// A Node consists of a NodeType and some Data (tag name for element nodes, +// content for text) and are part of a tree of Nodes. Element nodes may also +// contain a slice of Attributes. Data is unescaped, so that it looks like +// "a<b" rather than "a<b". +type Node struct { + Parent *Node + Child []*Node + Type NodeType + Data string + Attr []Attribute +} + +// Add adds a node as a child of n. +// It will panic if the child's parent is not nil. +func (n *Node) Add(child *Node) { + if child.Parent != nil { + panic("html: Node.Add called for a child Node that already has a parent") + } + child.Parent = n + n.Child = append(n.Child, child) +} + +// Remove removes a node as a child of n. +// It will panic if the child's parent is not n. +func (n *Node) Remove(child *Node) { + if child.Parent == n { + child.Parent = nil + for i, m := range n.Child { + if m == child { + copy(n.Child[i:], n.Child[i+1:]) + j := len(n.Child) - 1 + n.Child[j] = nil + n.Child = n.Child[:j] + return + } + } + } + panic("html: Node.Remove called for a non-child Node") +} + +// reparentChildren reparents all of src's child nodes to dst. +func reparentChildren(dst, src *Node) { + for _, n := range src.Child { + if n.Parent != src { + panic("html: nodes have an inconsistent parent/child relationship") + } + n.Parent = dst + } + dst.Child = append(dst.Child, src.Child...) + src.Child = nil +} + +// clone returns a new node with the same type, data and attributes. +// The clone has no parent and no children. +func (n *Node) clone() *Node { + m := &Node{ + Type: n.Type, + Data: n.Data, + Attr: make([]Attribute, len(n.Attr)), + } + copy(m.Attr, n.Attr) + return m +} + +// nodeStack is a stack of nodes. +type nodeStack []*Node + +// pop pops the stack. It will panic if s is empty. +func (s *nodeStack) pop() *Node { + i := len(*s) + n := (*s)[i-1] + *s = (*s)[:i-1] + return n +} + +// top returns the most recently pushed node, or nil if s is empty. +func (s *nodeStack) top() *Node { + if i := len(*s); i > 0 { + return (*s)[i-1] + } + return nil +} + +// index returns the index of the top-most occurence of n in the stack, or -1 +// if n is not present. +func (s *nodeStack) index(n *Node) int { + for i := len(*s) - 1; i >= 0; i-- { + if (*s)[i] == n { + return i + } + } + return -1 +} + +// insert inserts a node at the given index. +func (s *nodeStack) insert(i int, n *Node) { + (*s) = append(*s, nil) + copy((*s)[i+1:], (*s)[i:]) + (*s)[i] = n +} + +// remove removes a node from the stack. It is a no-op if n is not present. +func (s *nodeStack) remove(n *Node) { + i := s.index(n) + if i == -1 { + return + } + copy((*s)[i:], (*s)[i+1:]) + j := len(*s) - 1 + (*s)[j] = nil + *s = (*s)[:j] +} + +// forTag returns the top-most element node with the given tag. +func (s *nodeStack) forTag(tag string) *Node { + for i := len(*s) - 1; i >= 0; i-- { + n := (*s)[i] + if n.Type == ElementNode && n.Data == tag { + return n + } + } + return nil +} diff --git a/src/pkg/html/parse.go b/src/pkg/html/parse.go index 6a2bc1ea6..980c47069 100644 --- a/src/pkg/html/parse.go +++ b/src/pkg/html/parse.go @@ -9,29 +9,6 @@ import ( "os" ) -// A NodeType is the type of a Node. -type NodeType int - -const ( - ErrorNode NodeType = iota - TextNode - DocumentNode - ElementNode - CommentNode -) - -// A Node consists of a NodeType and some Data (tag name for element nodes, -// content for text) and are part of a tree of Nodes. Element nodes may also -// contain a slice of Attributes. Data is unescaped, so that it looks like -// "a<b" rather than "a<b". -type Node struct { - Parent *Node - Child []*Node - Type NodeType - Data string - Attr []Attribute -} - // A parser implements the HTML5 parsing algorithm: // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#tree-construction type parser struct { @@ -45,38 +22,23 @@ type parser struct { hasSelfClosingToken bool // doc is the document root element. doc *Node - // The stack of open elements (section 10.2.3.2). - stack []*Node - // Element pointers (section 10.2.3.4). + // The stack of open elements (section 11.2.3.2) and active formatting + // elements (section 11.2.3.3). + oe, afe nodeStack + // Element pointers (section 11.2.3.4). head, form *Node - // Other parsing state flags (section 10.2.3.5). + // Other parsing state flags (section 11.2.3.5). scripting, framesetOK bool } -// push pushes onto the stack of open elements. -func (p *parser) push(n *Node) { - p.stack = append(p.stack, n) -} - -// top returns the top of the stack of open elements. -// This is also known as the current node. func (p *parser) top() *Node { - if n := len(p.stack); n > 0 { - return p.stack[n-1] + if n := p.oe.top(); n != nil { + return n } return p.doc } -// pop pops the top of the stack of open elements. -// It will panic if the stack is empty. -func (p *parser) pop() *Node { - n := len(p.stack) - ret := p.stack[n-1] - p.stack = p.stack[:n-1] - return ret -} - -// stopTags for use in popUntil. These come from section 10.2.3.2. +// stopTags for use in popUntil. These come from section 11.2.3.2. var ( defaultScopeStopTags = []string{"applet", "caption", "html", "table", "td", "th", "marquee", "object"} listItemScopeStopTags = []string{"applet", "caption", "html", "table", "td", "th", "marquee", "object", "ol", "ul"} @@ -102,11 +64,11 @@ var ( // popUntil([]string{"html, "table"}, "table") would return true and leave: // ["html", "body", "font"] func (p *parser) popUntil(stopTags []string, matchTags ...string) bool { - for i := len(p.stack) - 1; i >= 0; i-- { - tag := p.stack[i].Data + for i := len(p.oe) - 1; i >= 0; i-- { + tag := p.oe[i].Data for _, t := range matchTags { if t == tag { - p.stack = p.stack[:i] + p.oe = p.oe[:i] return true } } @@ -122,10 +84,9 @@ func (p *parser) popUntil(stopTags []string, matchTags ...string) bool { // addChild adds a child node n to the top element, and pushes n if it is an // element node (text nodes are not part of the stack of open elements). func (p *parser) addChild(n *Node) { - m := p.top() - m.Child = append(m.Child, n) + p.top().Add(n) if n.Type == ElementNode { - p.push(n) + p.oe = append(p.oe, n) } } @@ -148,15 +109,50 @@ func (p *parser) addElement(tag string, attr []Attribute) { }) } -// Section 10.2.3.3. +// Section 11.2.3.3. func (p *parser) addFormattingElement(tag string, attr []Attribute) { p.addElement(tag, attr) + p.afe = append(p.afe, p.top()) // TODO. } -// Section 10.2.3.3. +// Section 11.2.3.3. +func (p *parser) clearActiveFormattingElements() { + for { + n := p.afe.pop() + if len(p.afe) == 0 || n.Type == scopeMarkerNode { + return + } + } +} + +// Section 11.2.3.3. func (p *parser) reconstructActiveFormattingElements() { - // TODO. + n := p.afe.top() + if n == nil { + return + } + if n.Type == scopeMarkerNode || p.oe.index(n) != -1 { + return + } + i := len(p.afe) - 1 + for n.Type != scopeMarkerNode && p.oe.index(n) == -1 { + if i == 0 { + i = -1 + break + } + i-- + n = p.afe[i] + } + for { + i++ + n = p.afe[i] + p.addChild(n.clone()) + p.afe[i] = n + if i == len(p.afe)-1 { + break + } + } } // read reads the next token. This is usually from the tokenizer, but it may @@ -180,12 +176,12 @@ func (p *parser) read() os.Error { return nil } -// Section 10.2.4. +// Section 11.2.4. func (p *parser) acknowledgeSelfClosingTag() { p.hasSelfClosingToken = false } -// An insertion mode (section 10.2.3.1) is the state transition function from +// An insertion mode (section 11.2.3.1) is the state transition function from // a particular state in the HTML5 parser's state machine. It updates the // parser's fields depending on parser.token (where ErrorToken means EOF). In // addition to returning the next insertionMode state, it also returns whether @@ -194,7 +190,7 @@ type insertionMode func(*parser) (insertionMode, bool) // useTheRulesFor runs the delegate insertionMode over p, returning the actual // insertionMode unless the delegate caused a state transition. -// Section 10.2.3.1, "using the rules for". +// Section 11.2.3.1, "using the rules for". func useTheRulesFor(p *parser, actual, delegate insertionMode) (insertionMode, bool) { im, consumed := delegate(p) if im != delegate { @@ -203,13 +199,13 @@ func useTheRulesFor(p *parser, actual, delegate insertionMode) (insertionMode, b return actual, consumed } -// Section 10.2.5.4. +// Section 11.2.5.4.1. func initialIM(p *parser) (insertionMode, bool) { // TODO: check p.tok for DOCTYPE. return beforeHTMLIM, false } -// Section 10.2.5.5. +// Section 11.2.5.4.2. func beforeHTMLIM(p *parser) (insertionMode, bool) { var ( add bool @@ -243,7 +239,7 @@ func beforeHTMLIM(p *parser) (insertionMode, bool) { return beforeHeadIM, !implied } -// Section 10.2.5.6. +// Section 11.2.5.4.3. func beforeHeadIM(p *parser) (insertionMode, bool) { var ( add bool @@ -280,7 +276,7 @@ func beforeHeadIM(p *parser) (insertionMode, bool) { return inHeadIM, !implied } -// Section 10.2.5.7. +// Section 11.2.5.4.4. func inHeadIM(p *parser) (insertionMode, bool) { var ( pop bool @@ -305,7 +301,7 @@ func inHeadIM(p *parser) (insertionMode, bool) { // TODO. } if pop || implied { - n := p.pop() + n := p.oe.pop() if n.Data != "head" { panic("html: bad parser state") } @@ -314,7 +310,7 @@ func inHeadIM(p *parser) (insertionMode, bool) { return inHeadIM, !implied } -// Section 10.2.5.9. +// Section 11.2.5.4.6. func afterHeadIM(p *parser) (insertionMode, bool) { var ( add bool @@ -354,17 +350,18 @@ func afterHeadIM(p *parser) (insertionMode, bool) { return inBodyIM, !implied } -// Section 10.2.5.10. +// Section 11.2.5.4.7. func inBodyIM(p *parser) (insertionMode, bool) { var endP bool switch p.tok.Type { case TextToken: + p.reconstructActiveFormattingElements() p.addText(p.tok.Data) p.framesetOK = false case StartTagToken: switch p.tok.Data { case "address", "article", "aside", "blockquote", "center", "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "menu", "nav", "ol", "p", "section", "summary", "ul": - // TODO: Do the proper "does the stack of open elements has a p element in button scope" algorithm in section 10.2.3.2. + // TODO: Do the proper "does the stack of open elements has a p element in button scope" algorithm in section 11.2.3.2. n := p.top() if n.Type == ElementNode && n.Data == "p" { endP = true @@ -375,16 +372,24 @@ func inBodyIM(p *parser) (insertionMode, bool) { // TODO: auto-insert </p> if necessary. switch n := p.top(); n.Data { case "h1", "h2", "h3", "h4", "h5", "h6": - p.pop() + p.oe.pop() } p.addElement(p.tok.Data, p.tok.Attr) + case "a": + if n := p.afe.forTag("a"); n != nil { + p.inBodyEndTagFormatting("a") + p.oe.remove(n) + p.afe.remove(n) + } + p.reconstructActiveFormattingElements() + p.addFormattingElement(p.tok.Data, p.tok.Attr) case "b", "big", "code", "em", "font", "i", "s", "small", "strike", "strong", "tt", "u": p.reconstructActiveFormattingElements() p.addFormattingElement(p.tok.Data, p.tok.Attr) case "area", "br", "embed", "img", "input", "keygen", "wbr": p.reconstructActiveFormattingElements() p.addElement(p.tok.Data, p.tok.Attr) - p.pop() + p.oe.pop() p.acknowledgeSelfClosingTag() p.framesetOK = false case "table": @@ -395,7 +400,7 @@ func inBodyIM(p *parser) (insertionMode, bool) { case "hr": // TODO: auto-insert </p> if necessary. p.addElement(p.tok.Data, p.tok.Attr) - p.pop() + p.oe.pop() p.acknowledgeSelfClosingTag() p.framesetOK = false default: @@ -408,21 +413,17 @@ func inBodyIM(p *parser) (insertionMode, bool) { // TODO: autoclose the stack of open elements. return afterBodyIM, true case "a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u": - // TODO: implement the "adoption agency" algorithm: - // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#adoptionAgency - if p.tok.Data == p.top().Data { - p.pop() - } + p.inBodyEndTagFormatting(p.tok.Data) default: // TODO: any other end tag if p.tok.Data == p.top().Data { - p.pop() + p.oe.pop() } } } if endP { // TODO: do the proper algorithm. - n := p.pop() + n := p.oe.pop() if n.Type != ElementNode || n.Data != "p" { panic("unreachable") } @@ -430,7 +431,123 @@ func inBodyIM(p *parser) (insertionMode, bool) { return inBodyIM, !endP } -// Section 10.2.5.12. +func (p *parser) inBodyEndTagFormatting(tag string) { + // This is the "adoption agency" algorithm, described at + // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#adoptionAgency + + // TODO: this is a fairly literal line-by-line translation of that algorithm. + // Once the code successfully parses the comprehensive test suite, we should + // refactor this code to be more idiomatic. + + // Steps 1-3. The outer loop. + for i := 0; i < 8; i++ { + // Step 4. Find the formatting element. + var formattingElement *Node + for j := len(p.afe) - 1; j >= 0; j-- { + if p.afe[j].Type == scopeMarkerNode { + break + } + if p.afe[j].Data == tag { + formattingElement = p.afe[j] + break + } + } + if formattingElement == nil { + return + } + feIndex := p.oe.index(formattingElement) + if feIndex == -1 { + p.afe.remove(formattingElement) + return + } + + // Steps 5-6. Find the furthest block. + var furthestBlock *Node + for _, e := range p.oe[feIndex:] { + if isSpecialElement[e.Data] { + furthestBlock = e + break + } + } + if furthestBlock == nil { + e := p.oe.pop() + for e != formattingElement { + e = p.oe.pop() + } + p.afe.remove(e) + return + } + + // Steps 7-8. Find the common ancestor and bookmark node. + commonAncestor := p.oe[feIndex-1] + bookmark := p.afe.index(formattingElement) + + // Step 9. The inner loop. Find the lastNode to reparent. + lastNode := furthestBlock + node := furthestBlock + x := p.oe.index(node) + // Steps 9.1-9.3. + for j := 0; j < 3; j++ { + // Step 9.4. + x-- + node = p.oe[x] + // Step 9.5. + if p.afe.index(node) == -1 { + p.oe.remove(node) + continue + } + // Step 9.6. + if node == formattingElement { + break + } + // Step 9.7. + clone := node.clone() + p.afe[p.afe.index(node)] = clone + p.oe[p.oe.index(node)] = clone + node = clone + // Step 9.8. + if lastNode == furthestBlock { + bookmark = p.afe.index(node) + 1 + } + // Step 9.9. + if lastNode.Parent != nil { + lastNode.Parent.Remove(lastNode) + } + node.Add(lastNode) + // Step 9.10. + lastNode = node + } + + // Step 10. Reparent lastNode to the common ancestor, + // or for misnested table nodes, to the foster parent. + if lastNode.Parent != nil { + lastNode.Parent.Remove(lastNode) + } + switch commonAncestor.Data { + case "table", "tbody", "tfoot", "thead", "tr": + // TODO: fix up misnested table nodes; find the foster parent. + fallthrough + default: + commonAncestor.Add(lastNode) + } + + // Steps 11-13. Reparent nodes from the furthest block's children + // to a clone of the formatting element. + clone := formattingElement.clone() + reparentChildren(clone, furthestBlock) + furthestBlock.Add(clone) + + // Step 14. Fix up the list of active formatting elements. + p.afe.remove(formattingElement) + p.afe.insert(bookmark, clone) + + // Step 15. Fix up the stack of open elements. + p.oe.remove(formattingElement) + p.oe.insert(p.oe.index(furthestBlock)+1, clone) + } +} + +// Section 11.2.5.4.9. func inTableIM(p *parser) (insertionMode, bool) { var ( add bool @@ -461,7 +578,7 @@ func inTableIM(p *parser) (insertionMode, bool) { switch p.tok.Data { case "table": if p.popUntil(tableScopeStopTags, "table") { - // TODO: "reset the insertion mode appropriately" as per 10.2.3.1. + // TODO: "reset the insertion mode appropriately" as per 11.2.3.1. return inBodyIM, false } // Ignore the token. @@ -480,7 +597,7 @@ func inTableIM(p *parser) (insertionMode, bool) { return inTableIM, true } -// Section 10.2.5.16. +// Section 11.2.5.4.13. func inTableBodyIM(p *parser) (insertionMode, bool) { var ( add bool @@ -528,7 +645,7 @@ func inTableBodyIM(p *parser) (insertionMode, bool) { return useTheRulesFor(p, inTableBodyIM, inTableIM) } -// Section 10.2.5.17. +// Section 11.2.5.4.14. func inRowIM(p *parser) (insertionMode, bool) { switch p.tok.Type { case ErrorToken: @@ -540,7 +657,7 @@ func inRowIM(p *parser) (insertionMode, bool) { case "td", "th": // TODO: clear the stack back to a table row context. p.addElement(p.tok.Data, p.tok.Attr) - // TODO: insert a marker at the end of the list of active formatting elements. + p.afe = append(p.afe, &scopeMarker) return inCellIM, true default: // TODO. @@ -567,7 +684,7 @@ func inRowIM(p *parser) (insertionMode, bool) { return useTheRulesFor(p, inRowIM, inTableIM) } -// Section 10.2.5.18. +// Section 11.2.5.4.15. func inCellIM(p *parser) (insertionMode, bool) { var ( closeTheCellAndReprocess bool @@ -592,14 +709,14 @@ func inCellIM(p *parser) (insertionMode, bool) { } if closeTheCellAndReprocess { if p.popUntil(tableScopeStopTags, "td") || p.popUntil(tableScopeStopTags, "th") { - // TODO: clear the list of active formatting elements up to the last marker. + p.clearActiveFormattingElements() return inRowIM, false } } return useTheRulesFor(p, inCellIM, inBodyIM) } -// Section 10.2.5.22. +// Section 11.2.5.4.18. func afterBodyIM(p *parser) (insertionMode, bool) { switch p.tok.Type { case ErrorToken: @@ -620,7 +737,7 @@ func afterBodyIM(p *parser) (insertionMode, bool) { return afterBodyIM, true } -// Section 10.2.5.25. +// Section 11.2.5.4.21. func afterAfterBodyIM(p *parser) (insertionMode, bool) { switch p.tok.Type { case ErrorToken: diff --git a/src/pkg/html/parse_test.go b/src/pkg/html/parse_test.go index 3fa35d5db..f22fa277b 100644 --- a/src/pkg/html/parse_test.go +++ b/src/pkg/html/parse_test.go @@ -85,6 +85,8 @@ func dumpLevel(w io.Writer, n *Node, level int) os.Error { fmt.Fprintf(w, "%q", EscapeString(n.Data)) case CommentNode: return os.NewError("COMMENT") + case scopeMarkerNode: + return os.NewError("unexpected scopeMarkerNode") default: return os.NewError("unknown node type") } @@ -119,7 +121,7 @@ func TestParser(t *testing.T) { rc := make(chan io.Reader) go readDat(filename, rc) // TODO(nigeltao): Process all test cases, not just a subset. - for i := 0; i < 22; i++ { + for i := 0; i < 23; i++ { // Parse the #data section. b, err := ioutil.ReadAll(<-rc) if err != nil { diff --git a/src/pkg/html/testdata/webkit/adoption01.dat b/src/pkg/html/testdata/webkit/adoption01.dat new file mode 100644 index 000000000..787e1b01e --- /dev/null +++ b/src/pkg/html/testdata/webkit/adoption01.dat @@ -0,0 +1,194 @@ +#data +<a><p></a></p> +#errors +#document +| <html> +| <head> +| <body> +| <a> +| <p> +| <a> + +#data +<a>1<p>2</a>3</p> +#errors +#document +| <html> +| <head> +| <body> +| <a> +| "1" +| <p> +| <a> +| "2" +| "3" + +#data +<a>1<button>2</a>3</button> +#errors +#document +| <html> +| <head> +| <body> +| <a> +| "1" +| <button> +| <a> +| "2" +| "3" + +#data +<a>1<b>2</a>3</b> +#errors +#document +| <html> +| <head> +| <body> +| <a> +| "1" +| <b> +| "2" +| <b> +| "3" + +#data +<a>1<div>2<div>3</a>4</div>5</div> +#errors +#document +| <html> +| <head> +| <body> +| <a> +| "1" +| <div> +| <a> +| "2" +| <div> +| <a> +| "3" +| "4" +| "5" + +#data +<table><a>1<p>2</a>3</p> +#errors +#document +| <html> +| <head> +| <body> +| <a> +| "1" +| <p> +| <a> +| "2" +| "3" +| <table> + +#data +<b><b><a><p></a> +#errors +#document +| <html> +| <head> +| <body> +| <b> +| <b> +| <a> +| <p> +| <a> + +#data +<b><a><b><p></a> +#errors +#document +| <html> +| <head> +| <body> +| <b> +| <a> +| <b> +| <b> +| <p> +| <a> + +#data +<a><b><b><p></a> +#errors +#document +| <html> +| <head> +| <body> +| <a> +| <b> +| <b> +| <b> +| <b> +| <p> +| <a> + +#data +<p>1<s id="A">2<b id="B">3</p>4</s>5</b> +#errors +#document +| <html> +| <head> +| <body> +| <p> +| "1" +| <s> +| id="A" +| "2" +| <b> +| id="B" +| "3" +| <s> +| id="A" +| <b> +| id="B" +| "4" +| <b> +| id="B" +| "5" + +#data +<table><a>1<td>2</td>3</table> +#errors +#document +| <html> +| <head> +| <body> +| <a> +| "1" +| <a> +| "3" +| <table> +| <tbody> +| <tr> +| <td> +| "2" + +#data +<table>A<td>B</td>C</table> +#errors +#document +| <html> +| <head> +| <body> +| "AC" +| <table> +| <tbody> +| <tr> +| <td> +| "B" + +#data +<a><svg><tr><input></a> +#errors +#document +| <html> +| <head> +| <body> +| <a> +| <svg svg> +| <svg tr> +| <svg input> diff --git a/src/pkg/html/testdata/webkit/adoption02.dat b/src/pkg/html/testdata/webkit/adoption02.dat new file mode 100644 index 000000000..d18151b44 --- /dev/null +++ b/src/pkg/html/testdata/webkit/adoption02.dat @@ -0,0 +1,31 @@ +#data +<b>1<i>2<p>3</b>4 +#errors +#document +| <html> +| <head> +| <body> +| <b> +| "1" +| <i> +| "2" +| <i> +| <p> +| <b> +| "3" +| "4" + +#data +<a><div><style></style><address><a> +#errors +#document +| <html> +| <head> +| <body> +| <a> +| <div> +| <a> +| <style> +| <address> +| <a> +| <a> diff --git a/src/pkg/html/testdata/webkit/comments01.dat b/src/pkg/html/testdata/webkit/comments01.dat index 388d95287..44f187683 100644 --- a/src/pkg/html/testdata/webkit/comments01.dat +++ b/src/pkg/html/testdata/webkit/comments01.dat @@ -28,8 +28,7 @@ FOO<!-- BAR -- >BAZ | <head> | <body> | "FOO" -| <!-- BAR -- --> -| "BAZ" +| <!-- BAR -- >BAZ --> #data FOO<!-- BAR -- <QUX> -- MUX -->BAZ @@ -61,8 +60,7 @@ FOO<!-- BAR -- <QUX> -- MUX -- >BAZ | <head> | <body> | "FOO" -| <!-- BAR -- <QUX> -- MUX -- --> -| "BAZ" +| <!-- BAR -- <QUX> -- MUX -- >BAZ --> #data FOO<!---->BAZ @@ -124,3 +122,14 @@ FOO<!-->BAZ | <html> | <head> | <body> + +#data +FOO<!----->BAZ +#errors +#document +| <html> +| <head> +| <body> +| "FOO" +| <!-- - --> +| "BAZ" diff --git a/src/pkg/html/testdata/webkit/doctype01.dat b/src/pkg/html/testdata/webkit/doctype01.dat index 575129c14..ae457328a 100644 --- a/src/pkg/html/testdata/webkit/doctype01.dat +++ b/src/pkg/html/testdata/webkit/doctype01.dat @@ -132,7 +132,7 @@ <!DOCTYPE potato SYSTEM 'taco"'>Hello #errors #document -| <!DOCTYPE potato> +| <!DOCTYPE potato "" "taco""> | <html> | <head> | <body> @@ -142,7 +142,7 @@ <!DOCTYPE potato SYSTEM "taco">Hello #errors #document -| <!DOCTYPE potato> +| <!DOCTYPE potato "" "taco"> | <html> | <head> | <body> @@ -152,7 +152,7 @@ <!DOCTYPE potato SYSTEM "tai'co">Hello #errors #document -| <!DOCTYPE potato> +| <!DOCTYPE potato "" "tai'co"> | <html> | <head> | <body> @@ -222,7 +222,7 @@ <!DOCTYPE potato PUBLIC "go'of">Hello #errors #document -| <!DOCTYPE potato> +| <!DOCTYPE potato "go'of" ""> | <html> | <head> | <body> @@ -232,7 +232,7 @@ <!DOCTYPE potato PUBLIC 'go'of'>Hello #errors #document -| <!DOCTYPE potato> +| <!DOCTYPE potato "go" ""> | <html> | <head> | <body> @@ -242,7 +242,7 @@ <!DOCTYPE potato PUBLIC 'go:hh of' >Hello #errors #document -| <!DOCTYPE potato> +| <!DOCTYPE potato "go:hh of" ""> | <html> | <head> | <body> @@ -252,7 +252,7 @@ <!DOCTYPE potato PUBLIC "W3C-//dfdf" SYSTEM ggg>Hello #errors #document -| <!DOCTYPE potato> +| <!DOCTYPE potato "W3C-//dfdf" ""> | <html> | <head> | <body> @@ -263,7 +263,7 @@ "http://www.w3.org/TR/html4/strict.dtd">Hello #errors #document -| <!DOCTYPE html> +| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> | <html> | <head> | <body> @@ -284,7 +284,7 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> #errors #document -| <!DOCTYPE html> +| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> | <html> | <head> | <body> @@ -294,7 +294,7 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"> #errors #document -| <!DOCTYPE html> +| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"> | <html> | <head> | <body> @@ -309,8 +309,7 @@ | <html> | <head> | <body> -| " -]>" +| "]>" #data <!DOCTYPE html PUBLIC @@ -318,7 +317,7 @@ "http://www.wapforum.org/DTD/xhtml-mobile10.dtd"> #errors #document -| <!DOCTYPE html> +| <!DOCTYPE html "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" "http://www.wapforum.org/DTD/xhtml-mobile10.dtd"> | <html> | <head> | <body> @@ -327,9 +326,45 @@ <!DOCTYPE HTML SYSTEM "http://www.w3.org/DTD/HTML4-strict.dtd"><body><b>Mine!</b></body> #errors #document -| <!DOCTYPE html> +| <!DOCTYPE html "" "http://www.w3.org/DTD/HTML4-strict.dtd"> | <html> | <head> | <body> | <b> | "Mine!" + +#data +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN""http://www.w3.org/TR/html4/strict.dtd"> +#errors +#document +| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +| <html> +| <head> +| <body> + +#data +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'> +#errors +#document +| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +| <html> +| <head> +| <body> + +#data +<!DOCTYPE HTML PUBLIC"-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'> +#errors +#document +| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +| <html> +| <head> +| <body> + +#data +<!DOCTYPE HTML PUBLIC'-//W3C//DTD HTML 4.01//EN''http://www.w3.org/TR/html4/strict.dtd'> +#errors +#document +| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +| <html> +| <head> +| <body> diff --git a/src/pkg/html/testdata/webkit/dom2string.js b/src/pkg/html/testdata/webkit/dom2string.js deleted file mode 100644 index 45897fda4..000000000 --- a/src/pkg/html/testdata/webkit/dom2string.js +++ /dev/null @@ -1,135 +0,0 @@ -String.prototype.toAsciiLowerCase = function () { - var output = ""; - for (var i = 0, len = this.length; i < len; ++i) { - if (this.charCodeAt(i) >= 0x41 && this.charCodeAt(i) <= 0x5A) { - output += String.fromCharCode(this.charCodeAt(i) + 0x20) - } else { - output += this.charAt(i); - } - } - return output; -} - -function indent(ancestors) { - var str = ""; - if (ancestors > 0) { - while (ancestors--) - str += " "; - } - return str; -} - -function dom2string(node, ancestors) { - var str = ""; - if (typeof ancestors == "undefined") - var ancestors = 0; - if (!node.firstChild) - return "| "; - var parent = node; - var current = node.firstChild; - var next = null; - var misnested = null; - for (;;) { - str += "\n| " + indent(ancestors); - switch (current.nodeType) { - case 10: - str += '<!DOCTYPE ' + current.nodeName + '>'; - break; - case 8: - try { - str += '<!-- ' + current.nodeValue + ' -->'; - } catch (e) { - str += '<!-- -->'; - } - if (parent != current.parentNode) { - return str += ' (misnested... aborting)'; - } - break; - case 7: - str += '<?' + current.nodeName + current.nodeValue + '>'; - break; - case 4: - str += '<![CDATA[ ' + current.nodeValue + ' ]]>'; - break; - case 3: - str += '"' + current.nodeValue + '"'; - if (parent != current.parentNode) { - return str += ' (misnested... aborting)'; - } - break; - case 1: - str += "<"; - switch (current.namespaceURI) { - case "http://www.w3.org/2000/svg": - str += "svg "; - break; - case "http://www.w3.org/1998/Math/MathML": - str += "math "; - break; - } - if (current.localName && current.namespaceURI && current.namespaceURI != null) { - str += current.localName; - } else { - str += current.nodeName.toAsciiLowerCase(); - } - str += '>'; - if (parent != current.parentNode) { - return str += ' (misnested... aborting)'; - } else { - if (current.attributes) { - var attrNames = []; - var attrPos = {}; - for (var j = 0; j < current.attributes.length; j += 1) { - if (current.attributes[j].specified) { - var name = ""; - switch (current.attributes[j].namespaceURI) { - case "http://www.w3.org/XML/1998/namespace": - name += "xml "; - break; - case "http://www.w3.org/2000/xmlns/": - name += "xmlns "; - break; - case "http://www.w3.org/1999/xlink": - name += "xlink "; - break; - } - if (current.attributes[j].localName) { - name += current.attributes[j].localName; - } else { - name += current.attributes[j].nodeName; - } - attrNames.push(name); - attrPos[name] = j; - } - } - if (attrNames.length > 0) { - attrNames.sort(); - for (var j = 0; j < attrNames.length; j += 1) { - str += "\n| " + indent(1 + ancestors) + attrNames[j]; - str += '="' + current.attributes[attrPos[attrNames[j]]].nodeValue + '"'; - } - } - } - if (next = current.firstChild) { - parent = current; - current = next; - ancestors++; - continue; - } - } - break; - } - for (;;) { - if (next = current.nextSibling) { - current = next; - break; - } - current = current.parentNode; - parent = parent.parentNode; - ancestors--; - if (current == node) { - return str.substring(1); - } - } - } -} diff --git a/src/pkg/html/testdata/webkit/entities01.dat b/src/pkg/html/testdata/webkit/entities01.dat index 926642e2e..c8073b781 100644 --- a/src/pkg/html/testdata/webkit/entities01.dat +++ b/src/pkg/html/testdata/webkit/entities01.dat @@ -189,15 +189,6 @@ FOO�ZOO | "FOO�ZOO" #data -FOO
ZOO -#errors -#document -| <html> -| <head> -| <body> -| "FOO
ZOO" - -#data FOOxZOO #errors #document diff --git a/src/pkg/html/testdata/webkit/entities02.dat b/src/pkg/html/testdata/webkit/entities02.dat index 0b4dd6681..e2fb42a07 100644 --- a/src/pkg/html/testdata/webkit/entities02.dat +++ b/src/pkg/html/testdata/webkit/entities02.dat @@ -127,3 +127,123 @@ | <body> | <div> | bar="ZZ>" + +#data +<div bar="ZZ£_id=23"></div> +#errors +#document +| <html> +| <head> +| <body> +| <div> +| bar="ZZ£_id=23" + +#data +<div bar="ZZ&prod_id=23"></div> +#errors +#document +| <html> +| <head> +| <body> +| <div> +| bar="ZZ&prod_id=23" + +#data +<div bar="ZZ£_id=23"></div> +#errors +#document +| <html> +| <head> +| <body> +| <div> +| bar="ZZ£_id=23" + +#data +<div bar="ZZ∏_id=23"></div> +#errors +#document +| <html> +| <head> +| <body> +| <div> +| bar="ZZ∏_id=23" + +#data +<div bar="ZZ£=23"></div> +#errors +#document +| <html> +| <head> +| <body> +| <div> +| bar="ZZ£=23" + +#data +<div bar="ZZ&prod=23"></div> +#errors +#document +| <html> +| <head> +| <body> +| <div> +| bar="ZZ&prod=23" + +#data +<div>ZZ£_id=23</div> +#errors +#document +| <html> +| <head> +| <body> +| <div> +| "ZZ£_id=23" + +#data +<div>ZZ&prod_id=23</div> +#errors +#document +| <html> +| <head> +| <body> +| <div> +| "ZZ&prod_id=23" + +#data +<div>ZZ£_id=23</div> +#errors +#document +| <html> +| <head> +| <body> +| <div> +| "ZZ£_id=23" + +#data +<div>ZZ∏_id=23</div> +#errors +#document +| <html> +| <head> +| <body> +| <div> +| "ZZ∏_id=23" + +#data +<div>ZZ£=23</div> +#errors +#document +| <html> +| <head> +| <body> +| <div> +| "ZZ£=23" + +#data +<div>ZZ&prod=23</div> +#errors +#document +| <html> +| <head> +| <body> +| <div> +| "ZZ&prod=23" diff --git a/src/pkg/html/testdata/webkit/html5test-com.dat b/src/pkg/html/testdata/webkit/html5test-com.dat new file mode 100644 index 000000000..d7cb71db0 --- /dev/null +++ b/src/pkg/html/testdata/webkit/html5test-com.dat @@ -0,0 +1,246 @@ +#data +<div<div> +#errors +#document +| <html> +| <head> +| <body> +| <div<div> + +#data +<div foo<bar=''> +#errors +#document +| <html> +| <head> +| <body> +| <div> +| foo<bar="" + +#data +<div foo=`bar`> +#errors +#document +| <html> +| <head> +| <body> +| <div> +| foo="`bar`" + +#data +<div \"foo=''> +#errors +#document +| <html> +| <head> +| <body> +| <div> +| \"foo="" + +#data +<a href='\nbar'></a> +#errors +#document +| <html> +| <head> +| <body> +| <a> +| href="\nbar" + +#data +<!DOCTYPE html> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> + +#data +⟨⟩ +#errors +#document +| <html> +| <head> +| <body> +| "⟨⟩" + +#data +' +#errors +#document +| <html> +| <head> +| <body> +| "'" + +#data +ⅈ +#errors +#document +| <html> +| <head> +| <body> +| "ⅈ" + +#data +𝕂 +#errors +#document +| <html> +| <head> +| <body> +| "𝕂" + +#data +∉ +#errors +#document +| <html> +| <head> +| <body> +| "∉" + +#data +<?import namespace="foo" implementation="#bar"> +#errors +#document +| <!-- ?import namespace="foo" implementation="#bar" --> +| <html> +| <head> +| <body> + +#data +<!--foo--bar--> +#errors +#document +| <!-- foo--bar --> +| <html> +| <head> +| <body> + +#data +<![CDATA[x]]> +#errors +#document +| <!-- [CDATA[x]] --> +| <html> +| <head> +| <body> + +#data +<textarea><!--</textarea>--></textarea> +#errors +#document +| <html> +| <head> +| <body> +| <textarea> +| "<!--" +| "-->" + +#data +<textarea><!--</textarea>--> +#errors +#document +| <html> +| <head> +| <body> +| <textarea> +| "<!--" +| "-->" + +#data +<style><!--</style>--></style> +#errors +#document +| <html> +| <head> +| <style> +| "<!--" +| <body> +| "-->" + +#data +<style><!--</style>--> +#errors +#document +| <html> +| <head> +| <style> +| "<!--" +| <body> +| "-->" + +#data +<ul><li>A </li> <li>B</li></ul> +#errors +#document +| <html> +| <head> +| <body> +| <ul> +| <li> +| "A " +| " " +| <li> +| "B" + +#data +<table><form><input type=hidden><input></form><div></div></table> +#errors +#document +| <html> +| <head> +| <body> +| <input> +| <div> +| <table> +| <form> +| <input> +| type="hidden" + +#data +<i>A<b>B<p></i>C</b>D +#errors +#document +| <html> +| <head> +| <body> +| <i> +| "A" +| <b> +| "B" +| <b> +| <p> +| <b> +| <i> +| "C" +| "D" + +#data +<div></div> +#errors +#document +| <html> +| <head> +| <body> +| <div> + +#data +<svg></svg> +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> + +#data +<math></math> +#errors +#document +| <html> +| <head> +| <body> +| <math math> diff --git a/src/pkg/html/testdata/webkit/inbody01.dat b/src/pkg/html/testdata/webkit/inbody01.dat new file mode 100644 index 000000000..3f2bd374c --- /dev/null +++ b/src/pkg/html/testdata/webkit/inbody01.dat @@ -0,0 +1,43 @@ +#data +<button>1</foo> +#errors +#document +| <html> +| <head> +| <body> +| <button> +| "1" + +#data +<foo>1<p>2</foo> +#errors +#document +| <html> +| <head> +| <body> +| <foo> +| "1" +| <p> +| "2" + +#data +<dd>1</foo> +#errors +#document +| <html> +| <head> +| <body> +| <dd> +| "1" + +#data +<foo>1<dd>2</foo> +#errors +#document +| <html> +| <head> +| <body> +| <foo> +| "1" +| <dd> +| "2" diff --git a/src/pkg/html/testdata/webkit/isindex.dat b/src/pkg/html/testdata/webkit/isindex.dat new file mode 100644 index 000000000..88325ffe6 --- /dev/null +++ b/src/pkg/html/testdata/webkit/isindex.dat @@ -0,0 +1,40 @@ +#data +<isindex> +#errors +#document +| <html> +| <head> +| <body> +| <form> +| <hr> +| <label> +| "This is a searchable index. Enter search keywords: " +| <input> +| name="isindex" +| <hr> + +#data +<isindex name="A" action="B" prompt="C" foo="D"> +#errors +#document +| <html> +| <head> +| <body> +| <form> +| action="B" +| <hr> +| <label> +| "C" +| <input> +| foo="D" +| name="isindex" +| <hr> + +#data +<form><isindex> +#errors +#document +| <html> +| <head> +| <body> +| <form> diff --git a/src/pkg/html/testdata/webkit/pending-spec-changes-plain-text-unsafe.dat b/src/pkg/html/testdata/webkit/pending-spec-changes-plain-text-unsafe.dat Binary files differnew file mode 100644 index 000000000..a5ebb1eb2 --- /dev/null +++ b/src/pkg/html/testdata/webkit/pending-spec-changes-plain-text-unsafe.dat diff --git a/src/pkg/html/testdata/webkit/pending-spec-changes.dat b/src/pkg/html/testdata/webkit/pending-spec-changes.dat new file mode 100644 index 000000000..e00ee85d3 --- /dev/null +++ b/src/pkg/html/testdata/webkit/pending-spec-changes.dat @@ -0,0 +1,28 @@ +#data +<input type="hidden"><frameset> +#errors +21: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”. +31: “frameset” start tag seen. +31: End of file seen and there were open elements. +#document +| <html> +| <head> +| <frameset> + +#data +<!DOCTYPE html><table><caption><svg>foo</table>bar +#errors +47: End tag “table” did not match the name of the current open element (“svg”). +47: “table” closed but “caption” was still open. +47: End tag “table” seen, but there were open elements. +36: Unclosed element “svg”. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <caption> +| <svg svg> +| "foo" +| "bar" diff --git a/src/pkg/html/testdata/webkit/plain-text-unsafe.dat b/src/pkg/html/testdata/webkit/plain-text-unsafe.dat new file mode 100644 index 000000000..2f40e83ba --- /dev/null +++ b/src/pkg/html/testdata/webkit/plain-text-unsafe.dat @@ -0,0 +1,8 @@ +#data +FOO
ZOO +#errors +#document +| <html> +| <head> +| <body> +| "FOO
ZOO" diff --git a/src/pkg/html/testdata/webkit/scripted/adoption01.dat b/src/pkg/html/testdata/webkit/scripted/adoption01.dat new file mode 100644 index 000000000..4e08d0e84 --- /dev/null +++ b/src/pkg/html/testdata/webkit/scripted/adoption01.dat @@ -0,0 +1,15 @@ +#data +<p><b id="A"><script>document.getElementById("A").id = "B"</script></p>TEXT</b> +#errors +#document +| <html> +| <head> +| <body> +| <p> +| <b> +| id="B" +| <script> +| "document.getElementById("A").id = "B"" +| <b> +| id="A" +| "TEXT" diff --git a/src/pkg/html/testdata/webkit/scripted/webkit01.dat b/src/pkg/html/testdata/webkit/scripted/webkit01.dat new file mode 100644 index 000000000..ef4a41ca0 --- /dev/null +++ b/src/pkg/html/testdata/webkit/scripted/webkit01.dat @@ -0,0 +1,28 @@ +#data +1<script>document.write("2")</script>3 +#errors +#document +| <html> +| <head> +| <body> +| "1" +| <script> +| "document.write("2")" +| "23" + +#data +1<script>document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")</script>4 +#errors +#document +| <html> +| <head> +| <body> +| "1" +| <script> +| "document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")" +| <script> +| "document.write('2')" +| "2" +| <script> +| "document.write('3')" +| "34" diff --git a/src/pkg/html/testdata/webkit/tables01.dat b/src/pkg/html/testdata/webkit/tables01.dat new file mode 100644 index 000000000..88ef1fe2e --- /dev/null +++ b/src/pkg/html/testdata/webkit/tables01.dat @@ -0,0 +1,197 @@ +#data +<table><th> +#errors +#document +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <th> + +#data +<table><td> +#errors +#document +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <td> + +#data +<table><col foo='bar'> +#errors +#document +| <html> +| <head> +| <body> +| <table> +| <colgroup> +| <col> +| foo="bar" + +#data +<table><colgroup></html>foo +#errors +#document +| <html> +| <head> +| <body> +| "foo" +| <table> +| <colgroup> + +#data +<table></table><p>foo +#errors +#document +| <html> +| <head> +| <body> +| <table> +| <p> +| "foo" + +#data +<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr><td> +#errors +#document +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <td> + +#data +<table><select><option>3</select></table> +#errors +#document +| <html> +| <head> +| <body> +| <select> +| <option> +| "3" +| <table> + +#data +<table><select><table></table></select></table> +#errors +#document +| <html> +| <head> +| <body> +| <select> +| <table> +| <table> + +#data +<table><select></table> +#errors +#document +| <html> +| <head> +| <body> +| <select> +| <table> + +#data +<table><select><option>A<tr><td>B</td></tr></table> +#errors +#document +| <html> +| <head> +| <body> +| <select> +| <option> +| "A" +| <table> +| <tbody> +| <tr> +| <td> +| "B" + +#data +<table><td></body></caption></col></colgroup></html>foo +#errors +#document +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <td> +| "foo" + +#data +<table><td>A</table>B +#errors +#document +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <td> +| "A" +| "B" + +#data +<table><tr><caption> +#errors +#document +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <caption> + +#data +<table><tr></body></caption></col></colgroup></html></td></th><td>foo +#errors +#document +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <td> +| "foo" + +#data +<table><td><tr> +#errors +#document +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <td> +| <tr> + +#data +<table><td><button><td> +#errors +#document +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <td> +| <button> +| <td> diff --git a/src/pkg/html/testdata/webkit/tests1.dat b/src/pkg/html/testdata/webkit/tests1.dat index ad58d314f..cbf8bdda6 100644 --- a/src/pkg/html/testdata/webkit/tests1.dat +++ b/src/pkg/html/testdata/webkit/tests1.dat @@ -259,7 +259,7 @@ Line: 1 Col: 24 End tag (a) violates step 1, paragraph 1 of the adoption agency | "Z" #data -<b><button></b></button></b> +<b><button>foo</b>bar #errors Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE. Line: 1 Col: 15 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm. @@ -268,7 +268,23 @@ Line: 1 Col: 15 End tag (b) violates step 1, paragraph 1 of the adoption agency | <head> | <body> | <b> +| <button> +| <b> +| "foo" +| "bar" + +#data +<!DOCTYPE html><span><button>foo</span>bar +#errors +39: End tag “span” seen but there were unclosed elements. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <span> | <button> +| "foobar" #data <p><b><div><marquee></p></b></div>X @@ -818,32 +834,6 @@ Line: 1 Col: 22 Expected closing tag. Unexpected end of file. | "D" #data -<cite><b><cite><i><cite><i><cite><i><div>X</b>TEST -#errors -Line: 1 Col: 6 Unexpected start tag (cite). Expected DOCTYPE. -Line: 1 Col: 46 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm. -Line: 1 Col: 50 Expected closing tag. Unexpected end of file. -#document -| <html> -| <head> -| <body> -| <cite> -| <b> -| <cite> -| <i> -| <cite> -| <i> -| <cite> -| <i> -| <i> -| <i> -| <i> -| <div> -| <b> -| "X" -| "TEST" - -#data #errors Line: 1 Col: 0 Unexpected End of file. Expected DOCTYPE. @@ -1246,6 +1236,18 @@ Line: 1 Col: 49 Unexpected end tag (code). Ignored. | <strike> #data +<!DOCTYPE html><spacer>foo +#errors +26: End of file seen and there were open elements. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <spacer> +| "foo" + +#data <title><meta></title><link><title><meta></title> #errors Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE. @@ -1474,7 +1476,8 @@ Line: 1 Col: 15 End tag (b) violates step 1, paragraph 1 of the adoption agency | <head> | <body> | <b> -| <button> +| <button> +| <b> #data <p><b><div><marquee></p></b></div> diff --git a/src/pkg/html/testdata/webkit/tests10.dat b/src/pkg/html/testdata/webkit/tests10.dat index 877c9a3d7..4f8df86f2 100644 --- a/src/pkg/html/testdata/webkit/tests10.dat +++ b/src/pkg/html/testdata/webkit/tests10.dat @@ -9,6 +9,18 @@ | <svg svg> #data +<!DOCTYPE html><svg></svg><![CDATA[a]]> +#errors +29: Bogus comment +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <svg svg> +| <!-- [CDATA[a]] --> + +#data <!DOCTYPE html><body><svg></svg> #errors #document @@ -428,3 +440,360 @@ | xlink href="foo" | xml lang="en" | "bar" + +#data +<svg></path> +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> + +#data +<div><svg></div>a +#errors +#document +| <html> +| <head> +| <body> +| <div> +| <svg svg> +| "a" + +#data +<div><svg><path></div>a +#errors +#document +| <html> +| <head> +| <body> +| <div> +| <svg svg> +| <svg path> +| "a" + +#data +<div><svg><path></svg><path> +#errors +#document +| <html> +| <head> +| <body> +| <div> +| <svg svg> +| <svg path> +| <path> + +#data +<div><svg><path><foreignObject><math></div>a +#errors +#document +| <html> +| <head> +| <body> +| <div> +| <svg svg> +| <svg path> +| <svg foreignObject> +| <math math> +| "a" + +#data +<div><svg><path><foreignObject><p></div>a +#errors +#document +| <html> +| <head> +| <body> +| <div> +| <svg svg> +| <svg path> +| <svg foreignObject> +| <p> +| "a" + +#data +<!DOCTYPE html><svg><desc><div><svg><ul>a +#errors +40: HTML start tag “ul” in a foreign namespace context. +41: End of file in a foreign namespace context. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <svg svg> +| <svg desc> +| <div> +| <svg svg> +| <ul> +| "a" + +#data +<!DOCTYPE html><svg><desc><svg><ul>a +#errors +35: HTML start tag “ul” in a foreign namespace context. +36: End of file in a foreign namespace context. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <svg svg> +| <svg desc> +| <svg svg> +| <ul> +| "a" + +#data +<!DOCTYPE html><p><svg><desc><p> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <svg svg> +| <svg desc> +| <p> + +#data +<!DOCTYPE html><p><svg><title><p> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <svg svg> +| <svg title> +| <p> + +#data +<div><svg><path><foreignObject><p></foreignObject><p> +#errors +#document +| <html> +| <head> +| <body> +| <div> +| <svg svg> +| <svg path> +| <svg foreignObject> +| <p> +| <p> + +#data +<math><mi><div><object><div><span></span></div></object></div></mi><mi> +#errors +#document +| <html> +| <head> +| <body> +| <math math> +| <math mi> +| <div> +| <object> +| <div> +| <span> +| <math mi> + +#data +<math><mi><svg><foreignObject><div><div></div></div></foreignObject></svg></mi><mi> +#errors +#document +| <html> +| <head> +| <body> +| <math math> +| <math mi> +| <svg svg> +| <svg foreignObject> +| <div> +| <div> +| <math mi> + +#data +<svg><script></script><path> +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| <svg script> +| <svg path> + +#data +<table><svg></svg><tr> +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| <table> +| <tbody> +| <tr> + +#data +<math><mi><mglyph> +#errors +#document +| <html> +| <head> +| <body> +| <math math> +| <math mi> +| <math mglyph> + +#data +<math><mi><malignmark> +#errors +#document +| <html> +| <head> +| <body> +| <math math> +| <math mi> +| <math malignmark> + +#data +<math><mo><mglyph> +#errors +#document +| <html> +| <head> +| <body> +| <math math> +| <math mo> +| <math mglyph> + +#data +<math><mo><malignmark> +#errors +#document +| <html> +| <head> +| <body> +| <math math> +| <math mo> +| <math malignmark> + +#data +<math><mn><mglyph> +#errors +#document +| <html> +| <head> +| <body> +| <math math> +| <math mn> +| <math mglyph> + +#data +<math><mn><malignmark> +#errors +#document +| <html> +| <head> +| <body> +| <math math> +| <math mn> +| <math malignmark> + +#data +<math><ms><mglyph> +#errors +#document +| <html> +| <head> +| <body> +| <math math> +| <math ms> +| <math mglyph> + +#data +<math><ms><malignmark> +#errors +#document +| <html> +| <head> +| <body> +| <math math> +| <math ms> +| <math malignmark> + +#data +<math><mtext><mglyph> +#errors +#document +| <html> +| <head> +| <body> +| <math math> +| <math mtext> +| <math mglyph> + +#data +<math><mtext><malignmark> +#errors +#document +| <html> +| <head> +| <body> +| <math math> +| <math mtext> +| <math malignmark> + +#data +<math><annotation-xml><svg></svg></annotation-xml><mi> +#errors +#document +| <html> +| <head> +| <body> +| <math math> +| <math annotation-xml> +| <svg svg> +| <math mi> + +#data +<math><annotation-xml><svg><foreignObject><div><math><mi></mi></math><span></span></div></foreignObject><path></path></svg></annotation-xml><mi> +#errors +#document +| <html> +| <head> +| <body> +| <math math> +| <math annotation-xml> +| <svg svg> +| <svg foreignObject> +| <div> +| <math math> +| <math mi> +| <span> +| <svg path> +| <math mi> + +#data +<math><annotation-xml><svg><foreignObject><math><mi><svg></svg></mi><mo></mo></math><span></span></foreignObject><path></path></svg></annotation-xml><mi> +#errors +#document +| <html> +| <head> +| <body> +| <math math> +| <math annotation-xml> +| <svg svg> +| <svg foreignObject> +| <math math> +| <math mi> +| <svg svg> +| <math mo> +| <span> +| <svg path> +| <math mi> diff --git a/src/pkg/html/testdata/webkit/tests13.dat b/src/pkg/html/testdata/webkit/tests13.dat deleted file mode 100644 index d180e8e90..000000000 --- a/src/pkg/html/testdata/webkit/tests13.dat +++ /dev/null @@ -1,9 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN"> -<html><head> -<title>404 Not Found</title> -</head><body> -<h1>Not Found</h1> -<p>The requested URL /html5lib-tests/data/tests13.dat was not found on this server.</p> -<p>Additionally, a 404 Not Found -error was encountered while trying to use an ErrorDocument to handle the request.</p> -</body></html> diff --git a/src/pkg/html/testdata/webkit/tests14.dat b/src/pkg/html/testdata/webkit/tests14.dat index 72f8015f6..b8713f885 100644 --- a/src/pkg/html/testdata/webkit/tests14.dat +++ b/src/pkg/html/testdata/webkit/tests14.dat @@ -71,4 +71,4 @@ | <html> | <head> | <body> -| 789="012"
\ No newline at end of file +| 789="012" diff --git a/src/pkg/html/testdata/webkit/tests15.dat b/src/pkg/html/testdata/webkit/tests15.dat index 7f016cae3..6ce1c0d16 100644 --- a/src/pkg/html/testdata/webkit/tests15.dat +++ b/src/pkg/html/testdata/webkit/tests15.dat @@ -205,4 +205,4 @@ XXX: These errors are wrong, please fix me! | <html> | <head> | <body> -| <object>
\ No newline at end of file +| <object> diff --git a/src/pkg/html/testdata/webkit/tests17.dat b/src/pkg/html/testdata/webkit/tests17.dat new file mode 100644 index 000000000..7b555f888 --- /dev/null +++ b/src/pkg/html/testdata/webkit/tests17.dat @@ -0,0 +1,153 @@ +#data +<!doctype html><table><tbody><select><tr> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> +| <table> +| <tbody> +| <tr> + +#data +<!doctype html><table><tr><select><td> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> +| <table> +| <tbody> +| <tr> +| <td> + +#data +<!doctype html><table><tr><td><select><td> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <td> +| <select> +| <td> + +#data +<!doctype html><table><tr><th><select><td> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <th> +| <select> +| <td> + +#data +<!doctype html><table><caption><select><tr> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <caption> +| <select> +| <tbody> +| <tr> + +#data +<!doctype html><select><tr> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> + +#data +<!doctype html><select><td> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> + +#data +<!doctype html><select><th> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> + +#data +<!doctype html><select><tbody> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> + +#data +<!doctype html><select><thead> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> + +#data +<!doctype html><select><tfoot> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> + +#data +<!doctype html><select><caption> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> + +#data +<!doctype html><table><tr></table>a +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| "a" diff --git a/src/pkg/html/testdata/webkit/tests18.dat b/src/pkg/html/testdata/webkit/tests18.dat new file mode 100644 index 000000000..680e1f068 --- /dev/null +++ b/src/pkg/html/testdata/webkit/tests18.dat @@ -0,0 +1,269 @@ +#data +<!doctype html><plaintext></plaintext> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <plaintext> +| "</plaintext>" + +#data +<!doctype html><table><plaintext></plaintext> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <plaintext> +| "</plaintext>" +| <table> + +#data +<!doctype html><table><tbody><plaintext></plaintext> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <plaintext> +| "</plaintext>" +| <table> +| <tbody> + +#data +<!doctype html><table><tbody><tr><plaintext></plaintext> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <plaintext> +| "</plaintext>" +| <table> +| <tbody> +| <tr> + +#data +<!doctype html><table><tbody><tr><plaintext></plaintext> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <plaintext> +| "</plaintext>" +| <table> +| <tbody> +| <tr> + +#data +<!doctype html><table><td><plaintext></plaintext> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <td> +| <plaintext> +| "</plaintext>" + +#data +<!doctype html><table><caption><plaintext></plaintext> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <caption> +| <plaintext> +| "</plaintext>" + +#data +<!doctype html><table><tr><style></script></style>abc +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| "abc" +| <table> +| <tbody> +| <tr> +| <style> +| "</script>" + +#data +<!doctype html><table><tr><script></style></script>abc +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| "abc" +| <table> +| <tbody> +| <tr> +| <script> +| "</style>" + +#data +<!doctype html><table><caption><style></script></style>abc +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <caption> +| <style> +| "</script>" +| "abc" + +#data +<!doctype html><table><td><style></script></style>abc +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <td> +| <style> +| "</script>" +| "abc" + +#data +<!doctype html><select><script></style></script>abc +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> +| <script> +| "</style>" +| "abc" + +#data +<!doctype html><table><select><script></style></script>abc +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> +| <script> +| "</style>" +| "abc" +| <table> + +#data +<!doctype html><table><tr><select><script></style></script>abc +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> +| <script> +| "</style>" +| "abc" +| <table> +| <tbody> +| <tr> + +#data +<!doctype html><frameset></frameset><noframes>abc +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> +| <noframes> +| "abc" + +#data +<!doctype html><frameset></frameset><noframes>abc</noframes><!--abc--> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> +| <noframes> +| "abc" +| <!-- abc --> + +#data +<!doctype html><frameset></frameset></html><noframes>abc +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> +| <noframes> +| "abc" + +#data +<!doctype html><frameset></frameset></html><noframes>abc</noframes><!--abc--> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> +| <noframes> +| "abc" +| <!-- abc --> + +#data +<!doctype html><table><tr></tbody><tfoot> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <tfoot> + +#data +<!doctype html><table><td><svg></svg>abc<td> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <td> +| <svg svg> +| "abc" +| <td> diff --git a/src/pkg/html/testdata/webkit/tests19.dat b/src/pkg/html/testdata/webkit/tests19.dat new file mode 100644 index 000000000..06222f5b9 --- /dev/null +++ b/src/pkg/html/testdata/webkit/tests19.dat @@ -0,0 +1,1220 @@ +#data +<!doctype html><math><mn DefinitionUrl="foo"> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <math math> +| <math mn> +| definitionURL="foo" + +#data +<!doctype html><html></p><!--foo--> +#errors +#document +| <!DOCTYPE html> +| <html> +| <!-- foo --> +| <head> +| <body> + +#data +<!doctype html><head></head></p><!--foo--> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <!-- foo --> +| <body> + +#data +<!doctype html><body><p><pre> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <pre> + +#data +<!doctype html><body><p><listing> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <listing> + +#data +<!doctype html><p><plaintext> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <plaintext> + +#data +<!doctype html><p><h1> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <h1> + +#data +<!doctype html><form><isindex> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <form> + +#data +<!doctype html><isindex action="POST"> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <form> +| action="POST" +| <hr> +| <label> +| "This is a searchable index. Enter search keywords: " +| <input> +| name="isindex" +| <hr> + +#data +<!doctype html><isindex prompt="this is isindex"> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <form> +| <hr> +| <label> +| "this is isindex" +| <input> +| name="isindex" +| <hr> + +#data +<!doctype html><isindex type="hidden"> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <form> +| <hr> +| <label> +| "This is a searchable index. Enter search keywords: " +| <input> +| name="isindex" +| type="hidden" +| <hr> + +#data +<!doctype html><isindex name="foo"> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <form> +| <hr> +| <label> +| "This is a searchable index. Enter search keywords: " +| <input> +| name="isindex" +| <hr> + +#data +<!doctype html><ruby><p><rp> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <ruby> +| <p> +| <rp> + +#data +<!doctype html><ruby><div><span><rp> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <ruby> +| <div> +| <span> +| <rp> + +#data +<!doctype html><ruby><div><p><rp> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <ruby> +| <div> +| <p> +| <rp> + +#data +<!doctype html><ruby><p><rt> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <ruby> +| <p> +| <rt> + +#data +<!doctype html><ruby><div><span><rt> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <ruby> +| <div> +| <span> +| <rt> + +#data +<!doctype html><ruby><div><p><rt> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <ruby> +| <div> +| <p> +| <rt> + +#data +<!doctype html><math/><foo> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <math math> +| <foo> + +#data +<!doctype html><svg/><foo> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <svg svg> +| <foo> + +#data +<!doctype html><div></body><!--foo--> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <div> +| <!-- foo --> + +#data +<!doctype html><h1><div><h3><span></h1>foo +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <h1> +| <div> +| <h3> +| <span> +| "foo" + +#data +<!doctype html><p></h3>foo +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| "foo" + +#data +<!doctype html><h3><li>abc</h2>foo +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <h3> +| <li> +| "abc" +| "foo" + +#data +<!doctype html><table>abc<!--foo--> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| "abc" +| <table> +| <!-- foo --> + +#data +<!doctype html><table> <!--foo--> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| " " +| <!-- foo --> + +#data +<!doctype html><table> b <!--foo--> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| " b " +| <table> +| <!-- foo --> + +#data +<!doctype html><select><option><option> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> +| <option> +| <option> + +#data +<!doctype html><select><option></optgroup> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> +| <option> + +#data +<!doctype html><select><option></optgroup> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> +| <option> + +#data +<!doctype html><p><math><mi><p><h1> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <math math> +| <math mi> +| <p> +| <h1> + +#data +<!doctype html><p><math><mo><p><h1> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <math math> +| <math mo> +| <p> +| <h1> + +#data +<!doctype html><p><math><mn><p><h1> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <math math> +| <math mn> +| <p> +| <h1> + +#data +<!doctype html><p><math><ms><p><h1> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <math math> +| <math ms> +| <p> +| <h1> + +#data +<!doctype html><p><math><mtext><p><h1> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <math math> +| <math mtext> +| <p> +| <h1> + +#data +<!doctype html><frameset></noframes> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> + +#data +<!doctype html><html c=d><body></html><html a=b> +#errors +#document +| <!DOCTYPE html> +| <html> +| a="b" +| c="d" +| <head> +| <body> + +#data +<!doctype html><html c=d><frameset></frameset></html><html a=b> +#errors +#document +| <!DOCTYPE html> +| <html> +| a="b" +| c="d" +| <head> +| <frameset> + +#data +<!doctype html><html><frameset></frameset></html><!--foo--> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> +| <!-- foo --> + +#data +<!doctype html><html><frameset></frameset></html> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> +| " " + +#data +<!doctype html><html><frameset></frameset></html>abc +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> + +#data +<!doctype html><html><frameset></frameset></html><p> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> + +#data +<!doctype html><html><frameset></frameset></html></p> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> + +#data +<html><frameset></frameset></html><!doctype html> +#errors +#document +| <html> +| <head> +| <frameset> + +#data +<!doctype html><body><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> + +#data +<!doctype html><p><frameset><frame> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> +| <frame> + +#data +<!doctype html><p>a<frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| "a" + +#data +<!doctype html><p> <frameset><frame> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> +| <frame> + +#data +<!doctype html><pre><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <pre> + +#data +<!doctype html><listing><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <listing> + +#data +<!doctype html><li><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <li> + +#data +<!doctype html><dd><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <dd> + +#data +<!doctype html><dt><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <dt> + +#data +<!doctype html><button><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <button> + +#data +<!doctype html><applet><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <applet> + +#data +<!doctype html><marquee><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <marquee> + +#data +<!doctype html><object><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <object> + +#data +<!doctype html><table><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> + +#data +<!doctype html><area><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <area> + +#data +<!doctype html><basefont><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <basefont> +| <frameset> + +#data +<!doctype html><bgsound><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <bgsound> +| <frameset> + +#data +<!doctype html><br><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <br> + +#data +<!doctype html><embed><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <embed> + +#data +<!doctype html><img><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <img> + +#data +<!doctype html><input><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <input> + +#data +<!doctype html><keygen><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <keygen> + +#data +<!doctype html><wbr><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <wbr> + +#data +<!doctype html><hr><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <hr> + +#data +<!doctype html><textarea></textarea><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <textarea> + +#data +<!doctype html><xmp></xmp><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <xmp> + +#data +<!doctype html><iframe></iframe><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <iframe> + +#data +<!doctype html><select></select><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> + +#data +<!doctype html><svg></svg><frameset><frame> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> +| <frame> + +#data +<!doctype html><math></math><frameset><frame> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> +| <frame> + +#data +<!doctype html><svg><foreignObject><div> <frameset><frame> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> +| <frame> + +#data +<!doctype html><svg>a</svg><frameset><frame> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <svg svg> +| "a" + +#data +<!doctype html><svg> </svg><frameset><frame> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> +| <frame> + +#data +<html>aaa<frameset></frameset> +#errors +#document +| <html> +| <head> +| <body> +| "aaa" + +#data +<html> a <frameset></frameset> +#errors +#document +| <html> +| <head> +| <body> +| "a " + +#data +<!doctype html><div><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> + +#data +<!doctype html><div><body><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <div> + +#data +<!doctype html><p><math></p>a +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <math math> +| "a" + +#data +<!doctype html><p><math><mn><span></p>a +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <math math> +| <math mn> +| <span> +| <p> +| "a" + +#data +<!doctype html><math></html> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <math math> + +#data +<!doctype html><meta charset="ascii"> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <meta> +| charset="ascii" +| <body> + +#data +<!doctype html><meta http-equiv="content-type" content="text/html;charset=ascii"> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <meta> +| content="text/html;charset=ascii" +| http-equiv="content-type" +| <body> + +#data +<!doctype html><head><!--aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa--><meta charset="utf8"> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <!-- aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa --> +| <meta> +| charset="utf8" +| <body> + +#data +<!doctype html><html a=b><head></head><html c=d> +#errors +#document +| <!DOCTYPE html> +| <html> +| a="b" +| c="d" +| <head> +| <body> + +#data +<!doctype html><image/> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <img> + +#data +<!doctype html>a<i>b<table>c<b>d</i>e</b>f +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| "a" +| <i> +| "bc" +| <b> +| "de" +| "f" +| <table> + +#data +<!doctype html><table><i>a<b>b<div>c<a>d</i>e</b>f +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <i> +| "a" +| <b> +| "b" +| <b> +| <div> +| <b> +| <i> +| "c" +| <a> +| "d" +| <a> +| "e" +| <a> +| "f" +| <table> + +#data +<!doctype html><i>a<b>b<div>c<a>d</i>e</b>f +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <i> +| "a" +| <b> +| "b" +| <b> +| <div> +| <b> +| <i> +| "c" +| <a> +| "d" +| <a> +| "e" +| <a> +| "f" + +#data +<!doctype html><table><i>a<b>b<div>c</i> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <i> +| "a" +| <b> +| "b" +| <b> +| <div> +| <i> +| "c" +| <table> + +#data +<!doctype html><table><i>a<b>b<div>c<a>d</i>e</b>f +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <i> +| "a" +| <b> +| "b" +| <b> +| <div> +| <b> +| <i> +| "c" +| <a> +| "d" +| <a> +| "e" +| <a> +| "f" +| <table> + +#data +<!doctype html><table><i>a<div>b<tr>c<b>d</i>e +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <i> +| "a" +| <div> +| "b" +| <i> +| "c" +| <b> +| "d" +| <b> +| "e" +| <table> +| <tbody> +| <tr> + +#data +<!doctype html><table><td><table><i>a<div>b<b>c</i>d +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <td> +| <i> +| "a" +| <div> +| <i> +| "b" +| <b> +| "c" +| <b> +| "d" +| <table> + +#data +<!doctype html><body><bgsound> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <bgsound> + +#data +<!doctype html><body><basefont> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <basefont> + +#data +<!doctype html><a><b></a><basefont> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <a> +| <b> +| <basefont> + +#data +<!doctype html><a><b></a><bgsound> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <a> +| <b> +| <bgsound> + +#data +<!doctype html><figcaption><article></figcaption>a +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <figcaption> +| <article> +| "a" + +#data +<!doctype html><summary><article></summary>a +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <summary> +| <article> +| "a" + +#data +<!doctype html><p><a><plaintext>b +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <a> +| <plaintext> +| <a> +| "b" diff --git a/src/pkg/html/testdata/webkit/tests2.dat b/src/pkg/html/testdata/webkit/tests2.dat index d33996e0c..60d859221 100644 --- a/src/pkg/html/testdata/webkit/tests2.dat +++ b/src/pkg/html/testdata/webkit/tests2.dat @@ -461,6 +461,19 @@ Line: 1 Col: 51 Expected closing tag. Unexpected end of file. | <optgroup> #data +<!DOCTYPE html><datalist><option>foo</datalist>bar +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <datalist> +| <option> +| "foo" +| "bar" + +#data <!DOCTYPE html><font><input><input></font> #errors #document @@ -515,7 +528,7 @@ Line: 1 Col: 23 Unexpected start tag isindex. Don't use it! | <form> | <hr> | <label> -| "This is a searchable index. Insert your search keywords here: " +| "This is a searchable index. Enter search keywords: " | <input> | name="isindex" | test="x" @@ -736,3 +749,15 @@ Line: 1 Col: 35 Unexpected character in comment found. | ">" | <!-- <!--x --> | "-->" + +#data +<!doctype html><div><form></form><div></div></div> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <div> +| <form> +| <div> diff --git a/src/pkg/html/testdata/webkit/tests20.dat b/src/pkg/html/testdata/webkit/tests20.dat new file mode 100644 index 000000000..6bd825608 --- /dev/null +++ b/src/pkg/html/testdata/webkit/tests20.dat @@ -0,0 +1,455 @@ +#data +<!doctype html><p><button><button> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <button> +| <button> + +#data +<!doctype html><p><button><address> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <button> +| <address> + +#data +<!doctype html><p><button><blockquote> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <button> +| <blockquote> + +#data +<!doctype html><p><button><menu> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <button> +| <menu> + +#data +<!doctype html><p><button><p> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <button> +| <p> + +#data +<!doctype html><p><button><ul> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <button> +| <ul> + +#data +<!doctype html><p><button><h1> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <button> +| <h1> + +#data +<!doctype html><p><button><h6> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <button> +| <h6> + +#data +<!doctype html><p><button><listing> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <button> +| <listing> + +#data +<!doctype html><p><button><pre> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <button> +| <pre> + +#data +<!doctype html><p><button><form> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <button> +| <form> + +#data +<!doctype html><p><button><li> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <button> +| <li> + +#data +<!doctype html><p><button><dd> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <button> +| <dd> + +#data +<!doctype html><p><button><dt> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <button> +| <dt> + +#data +<!doctype html><p><button><plaintext> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <button> +| <plaintext> + +#data +<!doctype html><p><button><table> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <button> +| <table> + +#data +<!doctype html><p><button><hr> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <button> +| <hr> + +#data +<!doctype html><p><button><xmp> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <button> +| <xmp> + +#data +<!doctype html><p><button></p> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <button> +| <p> + +#data +<!doctype html><address><button></address>a +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <address> +| <button> +| "a" + +#data +<!doctype html><address><button></address>a +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <address> +| <button> +| "a" + +#data +<p><table></p> +#errors +#document +| <html> +| <head> +| <body> +| <p> +| <p> +| <table> + +#data +<!doctype html><svg> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <svg svg> + +#data +<!doctype html><p><figcaption> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <figcaption> + +#data +<!doctype html><p><summary> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <summary> + +#data +<!doctype html><form><table><form> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <form> +| <table> + +#data +<!doctype html><table><form><form> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <form> + +#data +<!doctype html><table><form></table><form> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <form> + +#data +<!doctype html><svg><foreignObject><p> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <svg svg> +| <svg foreignObject> +| <p> + +#data +<!doctype html><svg><title>abc +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <svg svg> +| <svg title> +| "abc" + +#data +<option><span><option> +#errors +#document +| <html> +| <head> +| <body> +| <option> +| <span> +| <option> + +#data +<option><option> +#errors +#document +| <html> +| <head> +| <body> +| <option> +| <option> + +#data +<math><annotation-xml><div> +#errors +#document +| <html> +| <head> +| <body> +| <math math> +| <math annotation-xml> +| <div> + +#data +<math><annotation-xml encoding="application/svg+xml"><div> +#errors +#document +| <html> +| <head> +| <body> +| <math math> +| <math annotation-xml> +| encoding="application/svg+xml" +| <div> + +#data +<math><annotation-xml encoding="application/xhtml+xml"><div> +#errors +#document +| <html> +| <head> +| <body> +| <math math> +| <math annotation-xml> +| encoding="application/xhtml+xml" +| <div> + +#data +<math><annotation-xml encoding="aPPlication/xhtmL+xMl"><div> +#errors +#document +| <html> +| <head> +| <body> +| <math math> +| <math annotation-xml> +| encoding="aPPlication/xhtmL+xMl" +| <div> + +#data +<math><annotation-xml encoding="text/html"><div> +#errors +#document +| <html> +| <head> +| <body> +| <math math> +| <math annotation-xml> +| encoding="text/html" +| <div> + +#data +<math><annotation-xml encoding="Text/htmL"><div> +#errors +#document +| <html> +| <head> +| <body> +| <math math> +| <math annotation-xml> +| encoding="Text/htmL" +| <div> + +#data +<math><annotation-xml encoding=" text/html "><div> +#errors +#document +| <html> +| <head> +| <body> +| <math math> +| <math annotation-xml> +| encoding=" text/html " +| <div> diff --git a/src/pkg/html/testdata/webkit/tests21.dat b/src/pkg/html/testdata/webkit/tests21.dat new file mode 100644 index 000000000..1260ec03e --- /dev/null +++ b/src/pkg/html/testdata/webkit/tests21.dat @@ -0,0 +1,221 @@ +#data +<svg><![CDATA[foo]]> +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| "foo" + +#data +<math><![CDATA[foo]]> +#errors +#document +| <html> +| <head> +| <body> +| <math math> +| "foo" + +#data +<div><![CDATA[foo]]> +#errors +#document +| <html> +| <head> +| <body> +| <div> +| <!-- [CDATA[foo]] --> + +#data +<svg><![CDATA[foo +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| "foo" + +#data +<svg><![CDATA[foo +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| "foo" + +#data +<svg><![CDATA[ +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> + +#data +<svg><![CDATA[]]> +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> + +#data +<svg><![CDATA[]] >]]> +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| "]] >" + +#data +<svg><![CDATA[]] >]]> +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| "]] >" + +#data +<svg><![CDATA[]] +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| "]]" + +#data +<svg><![CDATA[] +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| "]" + +#data +<svg><![CDATA[]>a +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| "]>a" + +#data +<svg><foreignObject><div><![CDATA[foo]]> +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| <svg foreignObject> +| <div> +| <!-- [CDATA[foo]] --> + +#data +<svg><![CDATA[<svg>]]> +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| "<svg>" + +#data +<svg><![CDATA[</svg>a]]> +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| "</svg>a" + +#data +<svg><![CDATA[<svg>a +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| "<svg>a" + +#data +<svg><![CDATA[</svg>a +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| "</svg>a" + +#data +<svg><![CDATA[<svg>]]><path> +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| "<svg>" +| <svg path> + +#data +<svg><![CDATA[<svg>]]></path> +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| "<svg>" + +#data +<svg><![CDATA[<svg>]]><!--path--> +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| "<svg>" +| <!-- path --> + +#data +<svg><![CDATA[<svg>]]>path +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| "<svg>path" + +#data +<svg><![CDATA[<!--svg-->]]> +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| "<!--svg-->" diff --git a/src/pkg/html/testdata/webkit/tests22.dat b/src/pkg/html/testdata/webkit/tests22.dat new file mode 100644 index 000000000..aab27b2e9 --- /dev/null +++ b/src/pkg/html/testdata/webkit/tests22.dat @@ -0,0 +1,157 @@ +#data +<a><b><big><em><strong><div>X</a> +#errors +#document +| <html> +| <head> +| <body> +| <a> +| <b> +| <big> +| <em> +| <strong> +| <big> +| <em> +| <strong> +| <div> +| <a> +| "X" + +#data +<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8>A</a> +#errors +#document +| <html> +| <head> +| <body> +| <a> +| <b> +| <b> +| <div> +| id="1" +| <a> +| <div> +| id="2" +| <a> +| <div> +| id="3" +| <a> +| <div> +| id="4" +| <a> +| <div> +| id="5" +| <a> +| <div> +| id="6" +| <a> +| <div> +| id="7" +| <a> +| <div> +| id="8" +| <a> +| "A" + +#data +<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9>A</a> +#errors +#document +| <html> +| <head> +| <body> +| <a> +| <b> +| <b> +| <div> +| id="1" +| <a> +| <div> +| id="2" +| <a> +| <div> +| id="3" +| <a> +| <div> +| id="4" +| <a> +| <div> +| id="5" +| <a> +| <div> +| id="6" +| <a> +| <div> +| id="7" +| <a> +| <div> +| id="8" +| <a> +| <div> +| id="9" +| "A" + +#data +<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9><div id=10>A</a> +#errors +#document +| <html> +| <head> +| <body> +| <a> +| <b> +| <b> +| <div> +| id="1" +| <a> +| <div> +| id="2" +| <a> +| <div> +| id="3" +| <a> +| <div> +| id="4" +| <a> +| <div> +| id="5" +| <a> +| <div> +| id="6" +| <a> +| <div> +| id="7" +| <a> +| <div> +| id="8" +| <a> +| <div> +| id="9" +| <div> +| id="10" +| "A" + +#data +<cite><b><cite><i><cite><i><cite><i><div>X</b>TEST +#errors +Line: 1 Col: 6 Unexpected start tag (cite). Expected DOCTYPE. +Line: 1 Col: 46 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm. +Line: 1 Col: 50 Expected closing tag. Unexpected end of file. +#document +| <html> +| <head> +| <body> +| <cite> +| <b> +| <cite> +| <i> +| <cite> +| <i> +| <cite> +| <i> +| <i> +| <i> +| <div> +| <b> +| "X" +| "TEST" diff --git a/src/pkg/html/testdata/webkit/tests23.dat b/src/pkg/html/testdata/webkit/tests23.dat new file mode 100644 index 000000000..34d2a73f1 --- /dev/null +++ b/src/pkg/html/testdata/webkit/tests23.dat @@ -0,0 +1,155 @@ +#data +<p><font size=4><font color=red><font size=4><font size=4><font size=4><font size=4><font size=4><font color=red><p>X +#errors +3: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”. +116: Unclosed elements. +117: End of file seen and there were open elements. +#document +| <html> +| <head> +| <body> +| <p> +| <font> +| size="4" +| <font> +| color="red" +| <font> +| size="4" +| <font> +| size="4" +| <font> +| size="4" +| <font> +| size="4" +| <font> +| size="4" +| <font> +| color="red" +| <p> +| <font> +| color="red" +| <font> +| size="4" +| <font> +| size="4" +| <font> +| size="4" +| <font> +| color="red" +| "X" + +#data +<p><font size=4><font size=4><font size=4><font size=4><p>X +#errors +#document +| <html> +| <head> +| <body> +| <p> +| <font> +| size="4" +| <font> +| size="4" +| <font> +| size="4" +| <font> +| size="4" +| <p> +| <font> +| size="4" +| <font> +| size="4" +| <font> +| size="4" +| "X" + +#data +<p><font size=4><font size=4><font size=4><font size="5"><font size=4><p>X +#errors +#document +| <html> +| <head> +| <body> +| <p> +| <font> +| size="4" +| <font> +| size="4" +| <font> +| size="4" +| <font> +| size="5" +| <font> +| size="4" +| <p> +| <font> +| size="4" +| <font> +| size="4" +| <font> +| size="5" +| <font> +| size="4" +| "X" + +#data +<p><font size=4 id=a><font size=4 id=b><font size=4><font size=4><p>X +#errors +#document +| <html> +| <head> +| <body> +| <p> +| <font> +| id="a" +| size="4" +| <font> +| id="b" +| size="4" +| <font> +| size="4" +| <font> +| size="4" +| <p> +| <font> +| id="a" +| size="4" +| <font> +| id="b" +| size="4" +| <font> +| size="4" +| <font> +| size="4" +| "X" + +#data +<p><b id=a><b id=a><b id=a><b><object><b id=a><b id=a>X</object><p>Y +#errors +#document +| <html> +| <head> +| <body> +| <p> +| <b> +| id="a" +| <b> +| id="a" +| <b> +| id="a" +| <b> +| <object> +| <b> +| id="a" +| <b> +| id="a" +| "X" +| <p> +| <b> +| id="a" +| <b> +| id="a" +| <b> +| id="a" +| <b> +| "Y" diff --git a/src/pkg/html/testdata/webkit/tests24.dat b/src/pkg/html/testdata/webkit/tests24.dat new file mode 100644 index 000000000..f6dc7eb48 --- /dev/null +++ b/src/pkg/html/testdata/webkit/tests24.dat @@ -0,0 +1,79 @@ +#data +<!DOCTYPE html>≂̸ +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| "≂̸" + +#data +<!DOCTYPE html>≂̸A +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| "≂̸A" + +#data +<!DOCTYPE html>   +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| " " + +#data +<!DOCTYPE html>  A +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| " A" + +#data +<!DOCTYPE html>⊂⃒ +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| "⊂⃒" + +#data +<!DOCTYPE html>⊂⃒A +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| "⊂⃒A" + +#data +<!DOCTYPE html>𝔾 +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| "𝔾" + +#data +<!DOCTYPE html>𝔾A +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| "𝔾A" diff --git a/src/pkg/html/testdata/webkit/tests25.dat b/src/pkg/html/testdata/webkit/tests25.dat new file mode 100644 index 000000000..00de7295b --- /dev/null +++ b/src/pkg/html/testdata/webkit/tests25.dat @@ -0,0 +1,219 @@ +#data +<!DOCTYPE html><body><foo>A +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <foo> +| "A" + +#data +<!DOCTYPE html><body><area>A +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <area> +| "A" + +#data +<!DOCTYPE html><body><base>A +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <base> +| "A" + +#data +<!DOCTYPE html><body><basefont>A +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <basefont> +| "A" + +#data +<!DOCTYPE html><body><bgsound>A +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <bgsound> +| "A" + +#data +<!DOCTYPE html><body><br>A +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <br> +| "A" + +#data +<!DOCTYPE html><body><col>A +#errors +26: Stray start tag “col”. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| "A" + +#data +<!DOCTYPE html><body><command>A +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <command> +| "A" + +#data +<!DOCTYPE html><body><embed>A +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <embed> +| "A" + +#data +<!DOCTYPE html><body><frame>A +#errors +26: Stray start tag “frame”. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| "A" + +#data +<!DOCTYPE html><body><hr>A +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <hr> +| "A" + +#data +<!DOCTYPE html><body><img>A +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <img> +| "A" + +#data +<!DOCTYPE html><body><input>A +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <input> +| "A" + +#data +<!DOCTYPE html><body><keygen>A +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <keygen> +| "A" + +#data +<!DOCTYPE html><body><link>A +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <link> +| "A" + +#data +<!DOCTYPE html><body><meta>A +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <meta> +| "A" + +#data +<!DOCTYPE html><body><param>A +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <param> +| "A" + +#data +<!DOCTYPE html><body><source>A +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <source> +| "A" + +#data +<!DOCTYPE html><body><track>A +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <track> +| "A" + +#data +<!DOCTYPE html><body><wbr>A +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <wbr> +| "A" diff --git a/src/pkg/html/testdata/webkit/tests26.dat b/src/pkg/html/testdata/webkit/tests26.dat new file mode 100644 index 000000000..da128e779 --- /dev/null +++ b/src/pkg/html/testdata/webkit/tests26.dat @@ -0,0 +1,195 @@ +#data +<!DOCTYPE html><body><a href='#1'><nobr>1<nobr></a><br><a href='#2'><nobr>2<nobr></a><br><a href='#3'><nobr>3<nobr></a> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <a> +| href="#1" +| <nobr> +| "1" +| <nobr> +| <nobr> +| <br> +| <a> +| href="#2" +| <a> +| href="#2" +| <nobr> +| "2" +| <nobr> +| <nobr> +| <br> +| <a> +| href="#3" +| <a> +| href="#3" +| <nobr> +| "3" +| <nobr> + +#data +<!DOCTYPE html><body><b><nobr>1<nobr></b><i><nobr>2<nobr></i>3 +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <b> +| <nobr> +| "1" +| <nobr> +| <nobr> +| <i> +| <i> +| <nobr> +| "2" +| <nobr> +| <nobr> +| "3" + +#data +<!DOCTYPE html><body><b><nobr>1<table><nobr></b><i><nobr>2<nobr></i>3 +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <b> +| <nobr> +| "1" +| <nobr> +| <i> +| <i> +| <nobr> +| "2" +| <nobr> +| <nobr> +| "3" +| <table> + +#data +<!DOCTYPE html><body><b><nobr>1<table><tr><td><nobr></b><i><nobr>2<nobr></i>3 +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <b> +| <nobr> +| "1" +| <table> +| <tbody> +| <tr> +| <td> +| <nobr> +| <i> +| <i> +| <nobr> +| "2" +| <nobr> +| <nobr> +| "3" + +#data +<!DOCTYPE html><body><b><nobr>1<div><nobr></b><i><nobr>2<nobr></i>3 +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <b> +| <nobr> +| "1" +| <div> +| <b> +| <nobr> +| <nobr> +| <nobr> +| <i> +| <i> +| <nobr> +| "2" +| <nobr> +| <nobr> +| "3" + +#data +<!DOCTYPE html><body><b><nobr>1<nobr></b><div><i><nobr>2<nobr></i>3 +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <b> +| <nobr> +| "1" +| <nobr> +| <div> +| <nobr> +| <i> +| <i> +| <nobr> +| "2" +| <nobr> +| <nobr> +| "3" + +#data +<!DOCTYPE html><body><b><nobr>1<nobr><ins></b><i><nobr> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <b> +| <nobr> +| "1" +| <nobr> +| <ins> +| <nobr> +| <i> +| <i> +| <nobr> + +#data +<!DOCTYPE html><body><b><nobr>1<ins><nobr></b><i>2 +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <b> +| <nobr> +| "1" +| <ins> +| <nobr> +| <nobr> +| <i> +| "2" + +#data +<!DOCTYPE html><body><b>1<nobr></b><i><nobr>2</i> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <b> +| "1" +| <nobr> +| <nobr> +| <i> +| <i> +| <nobr> +| "2" diff --git a/src/pkg/html/testdata/webkit/tests3.dat b/src/pkg/html/testdata/webkit/tests3.dat index b0781a87e..38dc501be 100644 --- a/src/pkg/html/testdata/webkit/tests3.dat +++ b/src/pkg/html/testdata/webkit/tests3.dat @@ -144,6 +144,18 @@ Line: 2 Col: 7 End tag (pre) seen too early. Expected other end tag. y" #data +<!DOCTYPE html><pre>

A</pre> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <pre> +| " +A" + +#data <!DOCTYPE html><HTML><META><HEAD></HEAD></HTML> #errors Line: 1 Col: 33 Unexpected start tag head in existing head. Ignored. diff --git a/src/pkg/html/testdata/webkit/tests6.dat b/src/pkg/html/testdata/webkit/tests6.dat index 2fb79967f..f28ece4fb 100644 --- a/src/pkg/html/testdata/webkit/tests6.dat +++ b/src/pkg/html/testdata/webkit/tests6.dat @@ -631,6 +631,16 @@ Line: 1 Col: 17 Unexpected start tag (frameset). | <frameset> #data +<track><frameset></frameset> +#errors +Line: 1 Col: 7 Unexpected start tag (track). Expected DOCTYPE. +Line: 1 Col: 17 Unexpected start tag (frameset). +#document +| <html> +| <head> +| <frameset> + +#data </html><frameset></frameset> #errors 7: End tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”. diff --git a/src/pkg/html/testdata/webkit/tests9.dat b/src/pkg/html/testdata/webkit/tests9.dat index 2b715f83d..554e27aec 100644 --- a/src/pkg/html/testdata/webkit/tests9.dat +++ b/src/pkg/html/testdata/webkit/tests9.dat @@ -19,6 +19,33 @@ | <math math> #data +<!DOCTYPE html><math><mi> +#errors +25: End of file in a foreign namespace context. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <math math> +| <math mi> + +#data +<!DOCTYPE html><math><annotation-xml><svg><u> +#errors +45: HTML start tag “u” in a foreign namespace context. +45: End of file seen and there were open elements. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <math math> +| <math annotation-xml> +| <svg svg> +| <u> + +#data <!DOCTYPE html><body><select><math></math></select> #errors Line: 1 Col: 35 Unexpected start tag token (math) in the select phase. Ignored. diff --git a/src/pkg/html/testdata/webkit/tests_innerHTML_1.dat b/src/pkg/html/testdata/webkit/tests_innerHTML_1.dat new file mode 100644 index 000000000..052fac7d5 --- /dev/null +++ b/src/pkg/html/testdata/webkit/tests_innerHTML_1.dat @@ -0,0 +1,733 @@ +#data +<body><span> +#errors +#document-fragment +body +#document +| <span> + +#data +<span><body> +#errors +#document-fragment +body +#document +| <span> + +#data +<span><body> +#errors +#document-fragment +div +#document +| <span> + +#data +<body><span> +#errors +#document-fragment +html +#document +| <head> +| <body> +| <span> + +#data +<frameset><span> +#errors +#document-fragment +body +#document +| <span> + +#data +<span><frameset> +#errors +#document-fragment +body +#document +| <span> + +#data +<span><frameset> +#errors +#document-fragment +div +#document +| <span> + +#data +<frameset><span> +#errors +#document-fragment +html +#document +| <head> +| <frameset> + +#data +<table><tr> +#errors +#document-fragment +table +#document +| <tbody> +| <tr> + +#data +</table><tr> +#errors +#document-fragment +table +#document +| <tbody> +| <tr> + +#data +<a> +#errors +#document-fragment +table +#document +| <a> + +#data +<a> +#errors +#document-fragment +table +#document +| <a> + +#data +<a><caption>a +#errors +#document-fragment +table +#document +| <a> +| <caption> +| "a" + +#data +<a><colgroup><col> +#errors +#document-fragment +table +#document +| <a> +| <colgroup> +| <col> + +#data +<a><tbody><tr> +#errors +#document-fragment +table +#document +| <a> +| <tbody> +| <tr> + +#data +<a><tfoot><tr> +#errors +#document-fragment +table +#document +| <a> +| <tfoot> +| <tr> + +#data +<a><thead><tr> +#errors +#document-fragment +table +#document +| <a> +| <thead> +| <tr> + +#data +<a><tr> +#errors +#document-fragment +table +#document +| <a> +| <tbody> +| <tr> + +#data +<a><th> +#errors +#document-fragment +table +#document +| <a> +| <tbody> +| <tr> +| <th> + +#data +<a><td> +#errors +#document-fragment +table +#document +| <a> +| <tbody> +| <tr> +| <td> + +#data +<table></table><tbody> +#errors +#document-fragment +caption +#document +| <table> + +#data +</table><span> +#errors +#document-fragment +caption +#document +| <span> + +#data +<span></table> +#errors +#document-fragment +caption +#document +| <span> + +#data +</caption><span> +#errors +#document-fragment +caption +#document +| <span> + +#data +<span></caption><span> +#errors +#document-fragment +caption +#document +| <span> +| <span> + +#data +<span><caption><span> +#errors +#document-fragment +caption +#document +| <span> +| <span> + +#data +<span><col><span> +#errors +#document-fragment +caption +#document +| <span> +| <span> + +#data +<span><colgroup><span> +#errors +#document-fragment +caption +#document +| <span> +| <span> + +#data +<span><html><span> +#errors +#document-fragment +caption +#document +| <span> +| <span> + +#data +<span><tbody><span> +#errors +#document-fragment +caption +#document +| <span> +| <span> + +#data +<span><td><span> +#errors +#document-fragment +caption +#document +| <span> +| <span> + +#data +<span><tfoot><span> +#errors +#document-fragment +caption +#document +| <span> +| <span> + +#data +<span><thead><span> +#errors +#document-fragment +caption +#document +| <span> +| <span> + +#data +<span><th><span> +#errors +#document-fragment +caption +#document +| <span> +| <span> + +#data +<span><tr><span> +#errors +#document-fragment +caption +#document +| <span> +| <span> + +#data +<span></table><span> +#errors +#document-fragment +caption +#document +| <span> +| <span> + +#data +</colgroup><col> +#errors +#document-fragment +colgroup +#document +| <col> + +#data +<a><col> +#errors +#document-fragment +colgroup +#document +| <col> + +#data +<caption><a> +#errors +#document-fragment +tbody +#document +| <a> + +#data +<col><a> +#errors +#document-fragment +tbody +#document +| <a> + +#data +<colgroup><a> +#errors +#document-fragment +tbody +#document +| <a> + +#data +<tbody><a> +#errors +#document-fragment +tbody +#document +| <a> + +#data +<tfoot><a> +#errors +#document-fragment +tbody +#document +| <a> + +#data +<thead><a> +#errors +#document-fragment +tbody +#document +| <a> + +#data +</table><a> +#errors +#document-fragment +tbody +#document +| <a> + +#data +<a><tr> +#errors +#document-fragment +tbody +#document +| <a> +| <tr> + +#data +<a><td> +#errors +#document-fragment +tbody +#document +| <a> +| <tr> +| <td> + +#data +<a><td> +#errors +#document-fragment +tbody +#document +| <a> +| <tr> +| <td> + +#data +<a><td> +#errors +#document-fragment +tbody +#document +| <a> +| <tr> +| <td> + +#data +<td><table><tbody><a><tr> +#errors +#document-fragment +tbody +#document +| <tr> +| <td> +| <a> +| <table> +| <tbody> +| <tr> + +#data +</tr><td> +#errors +#document-fragment +tr +#document +| <td> + +#data +<td><table><a><tr></tr><tr> +#errors +#document-fragment +tr +#document +| <td> +| <a> +| <table> +| <tbody> +| <tr> +| <tr> + +#data +<caption><td> +#errors +#document-fragment +tr +#document +| <td> + +#data +<col><td> +#errors +#document-fragment +tr +#document +| <td> + +#data +<colgroup><td> +#errors +#document-fragment +tr +#document +| <td> + +#data +<tbody><td> +#errors +#document-fragment +tr +#document +| <td> + +#data +<tfoot><td> +#errors +#document-fragment +tr +#document +| <td> + +#data +<thead><td> +#errors +#document-fragment +tr +#document +| <td> + +#data +<tr><td> +#errors +#document-fragment +tr +#document +| <td> + +#data +</table><td> +#errors +#document-fragment +tr +#document +| <td> + +#data +<td><table></table><td> +#errors +#document-fragment +tr +#document +| <td> +| <table> +| <td> + +#data +<td><table></table><td> +#errors +#document-fragment +tr +#document +| <td> +| <table> +| <td> + +#data +<caption><a> +#errors +#document-fragment +td +#document +| <a> + +#data +<col><a> +#errors +#document-fragment +td +#document +| <a> + +#data +<colgroup><a> +#errors +#document-fragment +td +#document +| <a> + +#data +<tbody><a> +#errors +#document-fragment +td +#document +| <a> + +#data +<tfoot><a> +#errors +#document-fragment +td +#document +| <a> + +#data +<th><a> +#errors +#document-fragment +td +#document +| <a> + +#data +<thead><a> +#errors +#document-fragment +td +#document +| <a> + +#data +<tr><a> +#errors +#document-fragment +td +#document +| <a> + +#data +</table><a> +#errors +#document-fragment +td +#document +| <a> + +#data +</tbody><a> +#errors +#document-fragment +td +#document +| <a> + +#data +</td><a> +#errors +#document-fragment +td +#document +| <a> + +#data +</tfoot><a> +#errors +#document-fragment +td +#document +| <a> + +#data +</thead><a> +#errors +#document-fragment +td +#document +| <a> + +#data +</th><a> +#errors +#document-fragment +td +#document +| <a> + +#data +</tr><a> +#errors +#document-fragment +td +#document +| <a> + +#data +<table><td><td> +#errors +#document-fragment +td +#document +| <table> +| <tbody> +| <tr> +| <td> +| <td> + +#data +</select><option> +#errors +#document-fragment +select +#document +| <option> + +#data +<input><option> +#errors +#document-fragment +select +#document +| <option> + +#data +<keygen><option> +#errors +#document-fragment +select +#document +| <option> + +#data +<textarea><option> +#errors +#document-fragment +select +#document +| <option> + +#data +</html><!--abc--> +#errors +#document-fragment +html +#document +| <head> +| <body> +| <!-- abc --> + +#data +</frameset><frame> +#errors +#document-fragment +frameset +#document +| <frame> diff --git a/src/pkg/html/testdata/webkit/tricky01.dat b/src/pkg/html/testdata/webkit/tricky01.dat new file mode 100644 index 000000000..084199244 --- /dev/null +++ b/src/pkg/html/testdata/webkit/tricky01.dat @@ -0,0 +1,261 @@ +#data +<b><p>Bold </b> Not bold</p> +Also not bold. +#errors +#document +| <html> +| <head> +| <body> +| <b> +| <p> +| <b> +| "Bold " +| " Not bold" +| " +Also not bold." + +#data +<html> +<font color=red><i>Italic and Red<p>Italic and Red </font> Just italic.</p> Italic only.</i> Plain +<p>I should not be red. <font color=red>Red. <i>Italic and red.</p> +<p>Italic and red. </i> Red.</font> I should not be red.</p> +<b>Bold <i>Bold and italic</b> Only Italic </i> Plain +#errors +#document +| <html> +| <head> +| <body> +| <font> +| color="red" +| <i> +| "Italic and Red" +| <i> +| <p> +| <font> +| color="red" +| "Italic and Red " +| " Just italic." +| " Italic only." +| " Plain +" +| <p> +| "I should not be red. " +| <font> +| color="red" +| "Red. " +| <i> +| "Italic and red." +| <font> +| color="red" +| <i> +| " +" +| <p> +| <font> +| color="red" +| <i> +| "Italic and red. " +| " Red." +| " I should not be red." +| " +" +| <b> +| "Bold " +| <i> +| "Bold and italic" +| <i> +| " Only Italic " +| " Plain" + +#data +<html><body> +<p><font size="7">First paragraph.</p> +<p>Second paragraph.</p></font> +<b><p><i>Bold and Italic</b> Italic</p> +#errors +#document +| <html> +| <head> +| <body> +| " +" +| <p> +| <font> +| size="7" +| "First paragraph." +| <font> +| size="7" +| " +" +| <p> +| "Second paragraph." +| " +" +| <b> +| <p> +| <b> +| <i> +| "Bold and Italic" +| <i> +| " Italic" + +#data +<html> +<dl> +<dt><b>Boo +<dd>Goo? +</dl> +</html> +#errors +#document +| <html> +| <head> +| <body> +| <dl> +| " +" +| <dt> +| <b> +| "Boo +" +| <dd> +| <b> +| "Goo? +" +| <b> +| " +" + +#data +<html><body> +<label><a><div>Hello<div>World</div></a></label> +</body></html> +#errors +#document +| <html> +| <head> +| <body> +| " +" +| <label> +| <a> +| <div> +| <a> +| "Hello" +| <div> +| "World" +| " +" + +#data +<table><center> <font>a</center> <img> <tr><td> </td> </tr> </table> +#errors +#document +| <html> +| <head> +| <body> +| <center> +| " " +| <font> +| "a" +| <font> +| <img> +| " " +| <table> +| " " +| <tbody> +| <tr> +| <td> +| " " +| " " +| " " + +#data +<table><tr><p><a><p>You should see this text. +#errors +#document +| <html> +| <head> +| <body> +| <p> +| <a> +| <p> +| <a> +| "You should see this text." +| <table> +| <tbody> +| <tr> + +#data +<TABLE> +<TR> +<CENTER><CENTER><TD></TD></TR><TR> +<FONT> +<TABLE><tr></tr></TABLE> +</P> +<a></font><font></a> +This page contains an insanely badly-nested tag sequence. +#errors +#document +| <html> +| <head> +| <body> +| <center> +| <center> +| <font> +| " +" +| <table> +| " +" +| <tbody> +| <tr> +| " +" +| <td> +| <tr> +| " +" +| <table> +| <tbody> +| <tr> +| <font> +| " +" +| <p> +| " +" +| <a> +| <a> +| <font> +| <font> +| " +This page contains an insanely badly-nested tag sequence." + +#data +<html> +<body> +<b><nobr><div>This text is in a div inside a nobr</nobr>More text that should not be in the nobr, i.e., the +nobr should have closed the div inside it implicitly. </b><pre>A pre tag outside everything else.</pre> +</body> +</html> +#errors +#document +| <html> +| <head> +| <body> +| " +" +| <b> +| <nobr> +| <div> +| <b> +| <nobr> +| "This text is in a div inside a nobr" +| "More text that should not be in the nobr, i.e., the +nobr should have closed the div inside it implicitly. " +| <pre> +| "A pre tag outside everything else." +| " + +" diff --git a/src/pkg/html/testdata/webkit/webkit01.dat b/src/pkg/html/testdata/webkit/webkit01.dat index 544da9e8a..4101b216e 100644 --- a/src/pkg/html/testdata/webkit/webkit01.dat +++ b/src/pkg/html/testdata/webkit/webkit01.dat @@ -129,35 +129,6 @@ console.log("FOO<span>BAR</span>BAZ"); | <potato> #data -1<script>document.write("2")</script>3 -#errors -#document -| <html> -| <head> -| <body> -| "1" -| <script> -| "document.write("2")" -| "23" - -#data -1<script>document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")</script>4 -#errors -#document -| <html> -| <head> -| <body> -| "1" -| <script> -| "document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")" -| <script> -| "document.write('2')" -| "2" -| <script> -| "document.write('3')" -| "34" - -#data </ tttt> #errors #document @@ -186,8 +157,7 @@ console.log("FOO<span>BAR</span>BAZ"); | <head> | <body> | <p> -| "Test" -| "Test2" +| "TestTest2" #data <rdar://problem/6869687> @@ -209,3 +179,431 @@ console.log("FOO<span>BAR</span>BAZ"); | <body> | <a> | "test< /A>" + +#data +< +#errors +#document +| <html> +| <head> +| <body> +| "<" + +#data +<body foo='bar'><body foo='baz' yo='mama'> +#errors +#document +| <html> +| <head> +| <body> +| foo="bar" +| yo="mama" + +#data +<body></br foo="bar"></body> +#errors +#document +| <html> +| <head> +| <body> +| <br> + +#data +<bdy><br foo="bar"></body> +#errors +#document +| <html> +| <head> +| <body> +| <bdy> +| <br> +| foo="bar" + +#data +<body></body></br foo="bar"> +#errors +#document +| <html> +| <head> +| <body> +| <br> + +#data +<bdy></body><br foo="bar"> +#errors +#document +| <html> +| <head> +| <body> +| <bdy> +| <br> +| foo="bar" + +#data +<html><body></body></html><!-- Hi there --> +#errors +#document +| <html> +| <head> +| <body> +| <!-- Hi there --> + +#data +<html><body></body></html>x<!-- Hi there --> +#errors +#document +| <html> +| <head> +| <body> +| "x" +| <!-- Hi there --> + +#data +<html><body></body></html>x<!-- Hi there --></html><!-- Again --> +#errors +#document +| <html> +| <head> +| <body> +| "x" +| <!-- Hi there --> +| <!-- Again --> + +#data +<html><body></body></html>x<!-- Hi there --></body></html><!-- Again --> +#errors +#document +| <html> +| <head> +| <body> +| "x" +| <!-- Hi there --> +| <!-- Again --> + +#data +<html><body><ruby><div><rp>xx</rp></div></ruby></body></html> +#errors +#document +| <html> +| <head> +| <body> +| <ruby> +| <div> +| <rp> +| "xx" + +#data +<html><body><ruby><div><rt>xx</rt></div></ruby></body></html> +#errors +#document +| <html> +| <head> +| <body> +| <ruby> +| <div> +| <rt> +| "xx" + +#data +<html><frameset><!--1--><noframes>A</noframes><!--2--></frameset><!--3--><noframes>B</noframes><!--4--></html><!--5--><noframes>C</noframes><!--6--> +#errors +#document +| <html> +| <head> +| <frameset> +| <!-- 1 --> +| <noframes> +| "A" +| <!-- 2 --> +| <!-- 3 --> +| <noframes> +| "B" +| <!-- 4 --> +| <noframes> +| "C" +| <!-- 5 --> +| <!-- 6 --> + +#data +<select><option>A<select><option>B<select><option>C<select><option>D<select><option>E<select><option>F<select><option>G<select> +#errors +#document +| <html> +| <head> +| <body> +| <select> +| <option> +| "A" +| <option> +| "B" +| <select> +| <option> +| "C" +| <option> +| "D" +| <select> +| <option> +| "E" +| <option> +| "F" +| <select> +| <option> +| "G" + +#data +<dd><dd><dt><dt><dd><li><li> +#errors +#document +| <html> +| <head> +| <body> +| <dd> +| <dd> +| <dt> +| <dt> +| <dd> +| <li> +| <li> + +#data +<div><b></div><div><nobr>a<nobr> +#errors +#document +| <html> +| <head> +| <body> +| <div> +| <b> +| <div> +| <b> +| <nobr> +| "a" +| <nobr> + +#data +<head></head> +<body></body> +#errors +#document +| <html> +| <head> +| " +" +| <body> + +#data +<head></head> <style></style>ddd +#errors +#document +| <html> +| <head> +| <style> +| " " +| <body> +| "ddd" + +#data +<kbd><table></kbd><col><select><tr> +#errors +#document +| <html> +| <head> +| <body> +| <kbd> +| <select> +| <table> +| <colgroup> +| <col> +| <tbody> +| <tr> + +#data +<kbd><table></kbd><col><select><tr></table><div> +#errors +#document +| <html> +| <head> +| <body> +| <kbd> +| <select> +| <table> +| <colgroup> +| <col> +| <tbody> +| <tr> +| <div> + +#data +<a><li><style></style><title></title></a> +#errors +#document +| <html> +| <head> +| <body> +| <a> +| <li> +| <a> +| <style> +| <title> + +#data +<font></p><p><meta><title></title></font> +#errors +#document +| <html> +| <head> +| <body> +| <font> +| <p> +| <p> +| <font> +| <meta> +| <title> + +#data +<a><center><title></title><a> +#errors +#document +| <html> +| <head> +| <body> +| <a> +| <center> +| <a> +| <title> +| <a> + +#data +<svg><title><div> +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| <svg title> +| <div> + +#data +<svg><title><rect><div> +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| <svg title> +| <rect> +| <div> + +#data +<svg><title><svg><div> +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| <svg title> +| <svg svg> +| <div> + +#data +<img <="" FAIL> +#errors +#document +| <html> +| <head> +| <body> +| <img> +| <="" +| fail="" + +#data +<ul><li><div id='foo'/>A</li><li>B<div>C</div></li></ul> +#errors +#document +| <html> +| <head> +| <body> +| <ul> +| <li> +| <div> +| id="foo" +| "A" +| <li> +| "B" +| <div> +| "C" + +#data +<svg><em><desc></em> +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| <em> +| <desc> + +#data +<table><tr><td><svg><desc><td></desc><circle> +#errors +#document +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <td> +| <svg svg> +| <svg desc> +| <svg circle> + +#data +<svg><tfoot></mi><td> +#errors +#document +| <html> +| <head> +| <body> +| <svg svg> +| <svg tfoot> +| <svg td> + +#data +<math><mrow><mrow><mn>1</mn></mrow><mi>a</mi></mrow></math> +#errors +#document +| <html> +| <head> +| <body> +| <math math> +| <math mrow> +| <math mrow> +| <math mn> +| "1" +| <math mi> +| "a" + +#data +<!doctype html><input type="hidden"><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> + +#data +<!doctype html><input type="button"><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <input> +| type="button" diff --git a/src/pkg/html/testdata/webkit/webkit02.dat b/src/pkg/html/testdata/webkit/webkit02.dat new file mode 100644 index 000000000..2218f4298 --- /dev/null +++ b/src/pkg/html/testdata/webkit/webkit02.dat @@ -0,0 +1,104 @@ +#data +<foo bar=qux/> +#errors +#document +| <html> +| <head> +| <body> +| <foo> +| bar="qux/" + +#data +<p id="status"><noscript><strong>A</strong></noscript><span>B</span></p> +#errors +#document +| <html> +| <head> +| <body> +| <p> +| id="status" +| <noscript> +| "<strong>A</strong>" +| <span> +| "B" + +#data +<div><sarcasm><div></div></sarcasm></div> +#errors +#document +| <html> +| <head> +| <body> +| <div> +| <sarcasm> +| <div> + +#data +<html><body><img src="" border="0" alt="><div>A</div></body></html> +#errors +#document +| <html> +| <head> +| <body> + +#data +<table><td></tbody>A +#errors +#document +| <html> +| <head> +| <body> +| "A" +| <table> +| <tbody> +| <tr> +| <td> + +#data +<table><td></thead>A +#errors +#document +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <td> +| "A" + +#data +<table><td></tfoot>A +#errors +#document +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <td> +| "A" + +#data +<table><thead><td></tbody>A +#errors +#document +| <html> +| <head> +| <body> +| <table> +| <thead> +| <tr> +| <td> +| "A" + +#data +<legend>test</legend> +#errors +#document +| <html> +| <head> +| <body> +| <legend> +| "test" diff --git a/src/pkg/html/token.go b/src/pkg/html/token.go index 23c95ece6..5c6ed1666 100644 --- a/src/pkg/html/token.go +++ b/src/pkg/html/token.go @@ -459,7 +459,7 @@ loop: src++ break loop case '&': - dst, src = unescapeEntity(z.buf, dst, src) + dst, src = unescapeEntity(z.buf, dst, src, true) case '\\': if src == z.p1 { z.buf[dst] = '\\' diff --git a/src/pkg/html/token_test.go b/src/pkg/html/token_test.go index c794612ab..c8dcc8864 100644 --- a/src/pkg/html/token_test.go +++ b/src/pkg/html/token_test.go @@ -107,6 +107,16 @@ var tokenTests = []tokenTest{ `<a b="c&noSuchEntity;d"><&alsoDoesntExist;&`, `<a b="c&noSuchEntity;d">$<&alsoDoesntExist;&`, }, + { + "entity without semicolon", + `¬it;∉<a b="q=z&=5¬ice=hello¬=world">`, + `¬it;∉$<a b="q=z&amp=5&notice=hello¬=world">`, + }, + { + "entity with digits", + "½", + "½", + }, // Attribute tests: // http://dev.w3.org/html5/spec/Overview.html#attributes-0 |