summaryrefslogtreecommitdiff
path: root/src/pkg/html
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/html')
-rw-r--r--src/pkg/html/Makefile5
-rw-r--r--src/pkg/html/const.go90
-rw-r--r--src/pkg/html/doc.go110
-rw-r--r--src/pkg/html/entity.go4
-rw-r--r--src/pkg/html/entity_test.go2
-rw-r--r--src/pkg/html/escape.go41
-rw-r--r--src/pkg/html/node.go147
-rw-r--r--src/pkg/html/parse.go800
-rw-r--r--src/pkg/html/parse_test.go156
-rw-r--r--src/pkg/html/template/Makefile23
-rw-r--r--src/pkg/html/template/attr.go175
-rw-r--r--src/pkg/html/template/clone.go90
-rw-r--r--src/pkg/html/template/clone_test.go92
-rw-r--r--src/pkg/html/template/content.go113
-rw-r--r--src/pkg/html/template/content_test.go221
-rw-r--r--src/pkg/html/template/context.go339
-rw-r--r--src/pkg/html/template/css.go268
-rw-r--r--src/pkg/html/template/css_test.go281
-rw-r--r--src/pkg/html/template/doc.go186
-rw-r--r--src/pkg/html/template/error.go197
-rw-r--r--src/pkg/html/template/escape.go753
-rw-r--r--src/pkg/html/template/escape_test.go1643
-rw-r--r--src/pkg/html/template/html.go257
-rw-r--r--src/pkg/html/template/html_test.go94
-rw-r--r--src/pkg/html/template/js.go362
-rw-r--r--src/pkg/html/template/js_test.go401
-rw-r--r--src/pkg/html/template/template.go280
-rw-r--r--src/pkg/html/template/transition.go553
-rw-r--r--src/pkg/html/template/url.go105
-rw-r--r--src/pkg/html/template/url_test.go112
-rw-r--r--src/pkg/html/testdata/webkit/README28
-rw-r--r--src/pkg/html/testdata/webkit/adoption01.dat194
-rw-r--r--src/pkg/html/testdata/webkit/adoption02.dat31
-rw-r--r--src/pkg/html/testdata/webkit/comments01.dat135
-rw-r--r--src/pkg/html/testdata/webkit/doctype01.dat370
-rw-r--r--src/pkg/html/testdata/webkit/entities01.dat603
-rw-r--r--src/pkg/html/testdata/webkit/entities02.dat249
-rw-r--r--src/pkg/html/testdata/webkit/html5test-com.dat246
-rw-r--r--src/pkg/html/testdata/webkit/inbody01.dat43
-rw-r--r--src/pkg/html/testdata/webkit/isindex.dat40
-rw-r--r--src/pkg/html/testdata/webkit/pending-spec-changes-plain-text-unsafe.datbin115 -> 0 bytes
-rw-r--r--src/pkg/html/testdata/webkit/pending-spec-changes.dat28
-rw-r--r--src/pkg/html/testdata/webkit/plain-text-unsafe.dat8
-rw-r--r--src/pkg/html/testdata/webkit/scriptdata01.dat308
-rw-r--r--src/pkg/html/testdata/webkit/scripted/adoption01.dat15
-rw-r--r--src/pkg/html/testdata/webkit/scripted/webkit01.dat28
-rw-r--r--src/pkg/html/testdata/webkit/tables01.dat197
-rw-r--r--src/pkg/html/testdata/webkit/tests1.dat1952
-rw-r--r--src/pkg/html/testdata/webkit/tests10.dat799
-rw-r--r--src/pkg/html/testdata/webkit/tests11.dat482
-rw-r--r--src/pkg/html/testdata/webkit/tests12.dat62
-rw-r--r--src/pkg/html/testdata/webkit/tests14.dat74
-rw-r--r--src/pkg/html/testdata/webkit/tests15.dat208
-rw-r--r--src/pkg/html/testdata/webkit/tests16.dat2277
-rw-r--r--src/pkg/html/testdata/webkit/tests17.dat153
-rw-r--r--src/pkg/html/testdata/webkit/tests18.dat269
-rw-r--r--src/pkg/html/testdata/webkit/tests19.dat1220
-rw-r--r--src/pkg/html/testdata/webkit/tests2.dat763
-rw-r--r--src/pkg/html/testdata/webkit/tests20.dat455
-rw-r--r--src/pkg/html/testdata/webkit/tests21.dat221
-rw-r--r--src/pkg/html/testdata/webkit/tests22.dat157
-rw-r--r--src/pkg/html/testdata/webkit/tests23.dat155
-rw-r--r--src/pkg/html/testdata/webkit/tests24.dat79
-rw-r--r--src/pkg/html/testdata/webkit/tests25.dat219
-rw-r--r--src/pkg/html/testdata/webkit/tests26.dat195
-rw-r--r--src/pkg/html/testdata/webkit/tests3.dat305
-rw-r--r--src/pkg/html/testdata/webkit/tests4.dat59
-rw-r--r--src/pkg/html/testdata/webkit/tests5.dat191
-rw-r--r--src/pkg/html/testdata/webkit/tests6.dat663
-rw-r--r--src/pkg/html/testdata/webkit/tests7.dat390
-rw-r--r--src/pkg/html/testdata/webkit/tests8.dat148
-rw-r--r--src/pkg/html/testdata/webkit/tests9.dat457
-rw-r--r--src/pkg/html/testdata/webkit/tests_innerHTML_1.dat733
-rw-r--r--src/pkg/html/testdata/webkit/tricky01.dat261
-rw-r--r--src/pkg/html/testdata/webkit/webkit01.dat609
-rw-r--r--src/pkg/html/testdata/webkit/webkit02.dat104
-rw-r--r--src/pkg/html/token.go575
-rw-r--r--src/pkg/html/token_test.go340
78 files changed, 6578 insertions, 18420 deletions
diff --git a/src/pkg/html/Makefile b/src/pkg/html/Makefile
index 28dc1a3f5..1b18358a7 100644
--- a/src/pkg/html/Makefile
+++ b/src/pkg/html/Makefile
@@ -6,12 +6,7 @@ include ../../Make.inc
TARG=html
GOFILES=\
- const.go\
- doc.go\
entity.go\
escape.go\
- node.go\
- parse.go\
- token.go\
include ../../Make.pkg
diff --git a/src/pkg/html/const.go b/src/pkg/html/const.go
deleted file mode 100644
index 9078d2601..000000000
--- a/src/pkg/html/const.go
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package html
-
-// Section 11.2.3.2 of the HTML5 specification says "The following elements
-// have varying levels of special parsing rules".
-// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
-var isSpecialElement = map[string]bool{
- "address": true,
- "applet": true,
- "area": true,
- "article": true,
- "aside": true,
- "base": true,
- "basefont": true,
- "bgsound": true,
- "blockquote": true,
- "body": true,
- "br": true,
- "button": true,
- "caption": true,
- "center": true,
- "col": true,
- "colgroup": true,
- "command": true,
- "dd": true,
- "details": true,
- "dir": true,
- "div": true,
- "dl": true,
- "dt": true,
- "embed": true,
- "fieldset": true,
- "figcaption": true,
- "figure": true,
- "footer": true,
- "form": true,
- "frame": true,
- "frameset": true,
- "h1": true,
- "h2": true,
- "h3": true,
- "h4": true,
- "h5": true,
- "h6": true,
- "head": true,
- "header": true,
- "hgroup": true,
- "hr": true,
- "html": true,
- "iframe": true,
- "img": true,
- "input": true,
- "isindex": true,
- "li": true,
- "link": true,
- "listing": true,
- "marquee": true,
- "menu": true,
- "meta": true,
- "nav": true,
- "noembed": true,
- "noframes": true,
- "noscript": true,
- "object": true,
- "ol": true,
- "p": true,
- "param": true,
- "plaintext": true,
- "pre": true,
- "script": true,
- "section": true,
- "select": true,
- "style": true,
- "summary": true,
- "table": true,
- "tbody": true,
- "td": true,
- "textarea": true,
- "tfoot": true,
- "th": true,
- "thead": true,
- "title": true,
- "tr": true,
- "ul": true,
- "wbr": true,
- "xmp": true,
-}
diff --git a/src/pkg/html/doc.go b/src/pkg/html/doc.go
deleted file mode 100644
index 5bc063086..000000000
--- a/src/pkg/html/doc.go
+++ /dev/null
@@ -1,110 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-/*
-Package html implements an HTML5-compliant tokenizer and parser.
-INCOMPLETE.
-
-Tokenization is done by creating a Tokenizer for an io.Reader r. It is the
-caller's responsibility to ensure that r provides UTF-8 encoded HTML.
-
- z := html.NewTokenizer(r)
-
-Given a Tokenizer z, the HTML is tokenized by repeatedly calling z.Next(),
-which parses the next token and returns its type, or an error:
-
- for {
- tt := z.Next()
- if tt == html.ErrorToken {
- // ...
- return ...
- }
- // Process the current token.
- }
-
-There are two APIs for retrieving the current token. The high-level API is to
-call Token; the low-level API is to call Text or TagName / TagAttr. Both APIs
-allow optionally calling Raw after Next but before Token, Text, TagName, or
-TagAttr. In EBNF notation, the valid call sequence per token is:
-
- Next {Raw} [ Token | Text | TagName {TagAttr} ]
-
-Token returns an independent data structure that completely describes a token.
-Entities (such as "<") are unescaped, tag names and attribute keys are
-lower-cased, and attributes are collected into a []Attribute. For example:
-
- for {
- if z.Next() == html.ErrorToken {
- // Returning os.EOF indicates success.
- return z.Error()
- }
- emitToken(z.Token())
- }
-
-The low-level API performs fewer allocations and copies, but the contents of
-the []byte values returned by Text, TagName and TagAttr may change on the next
-call to Next. For example, to extract an HTML page's anchor text:
-
- depth := 0
- for {
- tt := z.Next()
- switch tt {
- case ErrorToken:
- return z.Error()
- case TextToken:
- if depth > 0 {
- // emitBytes should copy the []byte it receives,
- // if it doesn't process it immediately.
- emitBytes(z.Text())
- }
- case StartTagToken, EndTagToken:
- tn, _ := z.TagName()
- if len(tn) == 1 && tn[0] == 'a' {
- if tt == StartTag {
- depth++
- } else {
- depth--
- }
- }
- }
- }
-
-A Tokenizer typically skips over HTML comments. To return comment tokens, set
-Tokenizer.ReturnComments to true before looping over calls to Next.
-
-Parsing is done by calling Parse with an io.Reader, which returns the root of
-the parse tree (the document element) as a *Node. It is the caller's
-responsibility to ensure that the Reader provides UTF-8 encoded HTML. For
-example, to process each anchor node in depth-first order:
-
- doc, err := html.Parse(r)
- if err != nil {
- // ...
- }
- var f func(*html.Node)
- f = func(n *html.Node) {
- if n.Type == html.ElementNode && n.Data == "a" {
- // Do something with n...
- }
- for _, c := range n.Child {
- f(c)
- }
- }
- f(doc)
-
-The relevant specifications include:
-http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html and
-http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html
-*/
-package html
-
-// The tokenization algorithm implemented by this package is not a line-by-line
-// transliteration of the relatively verbose state-machine in the WHATWG
-// specification. A more direct approach is used instead, where the program
-// counter implies the state, such as whether it is tokenizing a tag or a text
-// node. Specification compliance is verified by checking expected and actual
-// outputs over a test suite rather than aiming for algorithmic fidelity.
-
-// TODO(nigeltao): Does a DOM API belong in this package or a separate one?
-// TODO(nigeltao): How does parsing interact with a JavaScript engine?
diff --git a/src/pkg/html/entity.go b/src/pkg/html/entity.go
index 21263e22d..bd8307523 100644
--- a/src/pkg/html/entity.go
+++ b/src/pkg/html/entity.go
@@ -13,7 +13,7 @@ const longestEntityWithoutSemicolon = 6
//
// Note that the HTML5 list is larger than the HTML4 list at
// http://www.w3.org/TR/html4/sgml/entities.html
-var entity = map[string]int{
+var entity = map[string]rune{
"AElig;": '\U000000C6',
"AMP;": '\U00000026',
"Aacute;": '\U000000C1',
@@ -2155,7 +2155,7 @@ var entity = map[string]int{
}
// HTML entities that are two unicode codepoints.
-var entity2 = map[string][2]int{
+var entity2 = map[string][2]rune{
// TODO(nigeltao): Handle replacements that are wider than their names.
// "nLt;": {'\u226A', '\u20D2'},
// "nGt;": {'\u226B', '\u20D2'},
diff --git a/src/pkg/html/entity_test.go b/src/pkg/html/entity_test.go
index 2cf49d61d..b53f866fa 100644
--- a/src/pkg/html/entity_test.go
+++ b/src/pkg/html/entity_test.go
@@ -6,7 +6,7 @@ package html
import (
"testing"
- "utf8"
+ "unicode/utf8"
)
func TestEntityLength(t *testing.T) {
diff --git a/src/pkg/html/escape.go b/src/pkg/html/escape.go
index 0de97c5ac..c0b5262af 100644
--- a/src/pkg/html/escape.go
+++ b/src/pkg/html/escape.go
@@ -7,13 +7,17 @@ package html
import (
"bytes"
"strings"
- "utf8"
+ "unicode/utf8"
)
+type writer interface {
+ WriteString(string) (int, error)
+}
+
// These replacements permit compatibility with old numeric entities that
// assumed Windows-1252 encoding.
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#consume-a-character-reference
-var replacementTable = [...]int{
+var replacementTable = [...]rune{
'\u20AC', // First entry is what 0x80 should be replaced with.
'\u0081',
'\u201A',
@@ -78,23 +82,23 @@ func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) {
i++
}
- x := 0
+ x := '\x00'
for i < len(s) {
c = s[i]
i++
if hex {
if '0' <= c && c <= '9' {
- x = 16*x + int(c) - '0'
+ x = 16*x + rune(c) - '0'
continue
} else if 'a' <= c && c <= 'f' {
- x = 16*x + int(c) - 'a' + 10
+ x = 16*x + rune(c) - 'a' + 10
continue
} else if 'A' <= c && c <= 'F' {
- x = 16*x + int(c) - 'A' + 10
+ x = 16*x + rune(c) - 'A' + 10
continue
}
} else if '0' <= c && c <= '9' {
- x = 10*x + int(c) - '0'
+ x = 10*x + rune(c) - '0'
continue
}
if c != ';' {
@@ -182,12 +186,24 @@ func unescape(b []byte) []byte {
return b
}
+// lower lower-cases the A-Z bytes in b in-place, so that "aBc" becomes "abc".
+func lower(b []byte) []byte {
+ for i, c := range b {
+ if 'A' <= c && c <= 'Z' {
+ b[i] = c + 'a' - 'A'
+ }
+ }
+ return b
+}
+
const escapedChars = `&'<>"`
-func escape(buf *bytes.Buffer, s string) {
+func escape(w writer, s string) error {
i := strings.IndexAny(s, escapedChars)
for i != -1 {
- buf.WriteString(s[0:i])
+ if _, err := w.WriteString(s[:i]); err != nil {
+ return err
+ }
var esc string
switch s[i] {
case '&':
@@ -204,10 +220,13 @@ func escape(buf *bytes.Buffer, s string) {
panic("unrecognized escape character")
}
s = s[i+1:]
- buf.WriteString(esc)
+ if _, err := w.WriteString(esc); err != nil {
+ return err
+ }
i = strings.IndexAny(s, escapedChars)
}
- buf.WriteString(s)
+ _, err := w.WriteString(s)
+ return err
}
// EscapeString escapes special characters like "<" to become "&lt;". It
diff --git a/src/pkg/html/node.go b/src/pkg/html/node.go
deleted file mode 100644
index 4ecfd6ca2..000000000
--- a/src/pkg/html/node.go
+++ /dev/null
@@ -1,147 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package html
-
-// A NodeType is the type of a Node.
-type NodeType int
-
-const (
- ErrorNode NodeType = iota
- TextNode
- DocumentNode
- ElementNode
- CommentNode
- DoctypeNode
- scopeMarkerNode
-)
-
-// Section 11.2.3.3 says "scope markers are inserted when entering applet
-// elements, buttons, object elements, marquees, table cells, and table
-// captions, and are used to prevent formatting from 'leaking'".
-var scopeMarker = Node{Type: scopeMarkerNode}
-
-// A Node consists of a NodeType and some Data (tag name for element nodes,
-// content for text) and are part of a tree of Nodes. Element nodes may also
-// contain a slice of Attributes. Data is unescaped, so that it looks like
-// "a<b" rather than "a&lt;b".
-type Node struct {
- Parent *Node
- Child []*Node
- Type NodeType
- Data string
- Attr []Attribute
-}
-
-// Add adds a node as a child of n.
-// It will panic if the child's parent is not nil.
-func (n *Node) Add(child *Node) {
- if child.Parent != nil {
- panic("html: Node.Add called for a child Node that already has a parent")
- }
- child.Parent = n
- n.Child = append(n.Child, child)
-}
-
-// Remove removes a node as a child of n.
-// It will panic if the child's parent is not n.
-func (n *Node) Remove(child *Node) {
- if child.Parent == n {
- child.Parent = nil
- for i, m := range n.Child {
- if m == child {
- copy(n.Child[i:], n.Child[i+1:])
- j := len(n.Child) - 1
- n.Child[j] = nil
- n.Child = n.Child[:j]
- return
- }
- }
- }
- panic("html: Node.Remove called for a non-child Node")
-}
-
-// reparentChildren reparents all of src's child nodes to dst.
-func reparentChildren(dst, src *Node) {
- for _, n := range src.Child {
- if n.Parent != src {
- panic("html: nodes have an inconsistent parent/child relationship")
- }
- n.Parent = dst
- }
- dst.Child = append(dst.Child, src.Child...)
- src.Child = nil
-}
-
-// clone returns a new node with the same type, data and attributes.
-// The clone has no parent and no children.
-func (n *Node) clone() *Node {
- m := &Node{
- Type: n.Type,
- Data: n.Data,
- Attr: make([]Attribute, len(n.Attr)),
- }
- copy(m.Attr, n.Attr)
- return m
-}
-
-// nodeStack is a stack of nodes.
-type nodeStack []*Node
-
-// pop pops the stack. It will panic if s is empty.
-func (s *nodeStack) pop() *Node {
- i := len(*s)
- n := (*s)[i-1]
- *s = (*s)[:i-1]
- return n
-}
-
-// top returns the most recently pushed node, or nil if s is empty.
-func (s *nodeStack) top() *Node {
- if i := len(*s); i > 0 {
- return (*s)[i-1]
- }
- return nil
-}
-
-// index returns the index of the top-most occurence of n in the stack, or -1
-// if n is not present.
-func (s *nodeStack) index(n *Node) int {
- for i := len(*s) - 1; i >= 0; i-- {
- if (*s)[i] == n {
- return i
- }
- }
- return -1
-}
-
-// insert inserts a node at the given index.
-func (s *nodeStack) insert(i int, n *Node) {
- (*s) = append(*s, nil)
- copy((*s)[i+1:], (*s)[i:])
- (*s)[i] = n
-}
-
-// remove removes a node from the stack. It is a no-op if n is not present.
-func (s *nodeStack) remove(n *Node) {
- i := s.index(n)
- if i == -1 {
- return
- }
- copy((*s)[i:], (*s)[i+1:])
- j := len(*s) - 1
- (*s)[j] = nil
- *s = (*s)[:j]
-}
-
-// forTag returns the top-most element node with the given tag.
-func (s *nodeStack) forTag(tag string) *Node {
- for i := len(*s) - 1; i >= 0; i-- {
- n := (*s)[i]
- if n.Type == ElementNode && n.Data == tag {
- return n
- }
- }
- return nil
-}
diff --git a/src/pkg/html/parse.go b/src/pkg/html/parse.go
deleted file mode 100644
index 519ebe587..000000000
--- a/src/pkg/html/parse.go
+++ /dev/null
@@ -1,800 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package html
-
-import (
- "io"
- "os"
-)
-
-// A parser implements the HTML5 parsing algorithm:
-// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#tree-construction
-type parser struct {
- // tokenizer provides the tokens for the parser.
- tokenizer *Tokenizer
- // tok is the most recently read token.
- tok Token
- // Self-closing tags like <hr/> are re-interpreted as a two-token sequence:
- // <hr> followed by </hr>. hasSelfClosingToken is true if we have just read
- // the synthetic start tag and the next one due is the matching end tag.
- hasSelfClosingToken bool
- // doc is the document root element.
- doc *Node
- // The stack of open elements (section 11.2.3.2) and active formatting
- // elements (section 11.2.3.3).
- oe, afe nodeStack
- // Element pointers (section 11.2.3.4).
- head, form *Node
- // Other parsing state flags (section 11.2.3.5).
- scripting, framesetOK bool
-}
-
-func (p *parser) top() *Node {
- if n := p.oe.top(); n != nil {
- return n
- }
- return p.doc
-}
-
-// stopTags for use in popUntil. These come from section 11.2.3.2.
-var (
- defaultScopeStopTags = []string{"applet", "caption", "html", "table", "td", "th", "marquee", "object"}
- listItemScopeStopTags = []string{"applet", "caption", "html", "table", "td", "th", "marquee", "object", "ol", "ul"}
- buttonScopeStopTags = []string{"applet", "caption", "html", "table", "td", "th", "marquee", "object", "button"}
- tableScopeStopTags = []string{"html", "table"}
-)
-
-// popUntil pops the stack of open elements at the highest element whose tag
-// is in matchTags, provided there is no higher element in stopTags. It returns
-// whether or not there was such an element. If there was not, popUntil leaves
-// the stack unchanged.
-//
-// For example, if the stack was:
-// ["html", "body", "font", "table", "b", "i", "u"]
-// then popUntil([]string{"html, "table"}, "font") would return false, but
-// popUntil([]string{"html, "table"}, "i") would return true and the resultant
-// stack would be:
-// ["html", "body", "font", "table", "b"]
-//
-// If an element's tag is in both stopTags and matchTags, then the stack will
-// be popped and the function returns true (provided, of course, there was no
-// higher element in the stack that was also in stopTags). For example,
-// popUntil([]string{"html, "table"}, "table") would return true and leave:
-// ["html", "body", "font"]
-func (p *parser) popUntil(stopTags []string, matchTags ...string) bool {
- for i := len(p.oe) - 1; i >= 0; i-- {
- tag := p.oe[i].Data
- for _, t := range matchTags {
- if t == tag {
- p.oe = p.oe[:i]
- return true
- }
- }
- for _, t := range stopTags {
- if t == tag {
- return false
- }
- }
- }
- return false
-}
-
-// addChild adds a child node n to the top element, and pushes n onto the stack
-// of open elements if it is an element node.
-func (p *parser) addChild(n *Node) {
- p.top().Add(n)
- if n.Type == ElementNode {
- p.oe = append(p.oe, n)
- }
-}
-
-// addText adds text to the preceding node if it is a text node, or else it
-// calls addChild with a new text node.
-func (p *parser) addText(text string) {
- // TODO: distinguish whitespace text from others.
- t := p.top()
- if i := len(t.Child); i > 0 && t.Child[i-1].Type == TextNode {
- t.Child[i-1].Data += text
- return
- }
- p.addChild(&Node{
- Type: TextNode,
- Data: text,
- })
-}
-
-// addElement calls addChild with an element node.
-func (p *parser) addElement(tag string, attr []Attribute) {
- p.addChild(&Node{
- Type: ElementNode,
- Data: tag,
- Attr: attr,
- })
-}
-
-// Section 11.2.3.3.
-func (p *parser) addFormattingElement(tag string, attr []Attribute) {
- p.addElement(tag, attr)
- p.afe = append(p.afe, p.top())
- // TODO.
-}
-
-// Section 11.2.3.3.
-func (p *parser) clearActiveFormattingElements() {
- for {
- n := p.afe.pop()
- if len(p.afe) == 0 || n.Type == scopeMarkerNode {
- return
- }
- }
-}
-
-// Section 11.2.3.3.
-func (p *parser) reconstructActiveFormattingElements() {
- n := p.afe.top()
- if n == nil {
- return
- }
- if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
- return
- }
- i := len(p.afe) - 1
- for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
- if i == 0 {
- i = -1
- break
- }
- i--
- n = p.afe[i]
- }
- for {
- i++
- n = p.afe[i]
- p.addChild(n.clone())
- p.afe[i] = n
- if i == len(p.afe)-1 {
- break
- }
- }
-}
-
-// read reads the next token. This is usually from the tokenizer, but it may
-// be the synthesized end tag implied by a self-closing tag.
-func (p *parser) read() os.Error {
- if p.hasSelfClosingToken {
- p.hasSelfClosingToken = false
- p.tok.Type = EndTagToken
- p.tok.Attr = nil
- return nil
- }
- p.tokenizer.Next()
- p.tok = p.tokenizer.Token()
- switch p.tok.Type {
- case ErrorToken:
- return p.tokenizer.Error()
- case SelfClosingTagToken:
- p.hasSelfClosingToken = true
- p.tok.Type = StartTagToken
- }
- return nil
-}
-
-// Section 11.2.4.
-func (p *parser) acknowledgeSelfClosingTag() {
- p.hasSelfClosingToken = false
-}
-
-// An insertion mode (section 11.2.3.1) is the state transition function from
-// a particular state in the HTML5 parser's state machine. It updates the
-// parser's fields depending on parser.token (where ErrorToken means EOF). In
-// addition to returning the next insertionMode state, it also returns whether
-// the token was consumed.
-type insertionMode func(*parser) (insertionMode, bool)
-
-// useTheRulesFor runs the delegate insertionMode over p, returning the actual
-// insertionMode unless the delegate caused a state transition.
-// Section 11.2.3.1, "using the rules for".
-func useTheRulesFor(p *parser, actual, delegate insertionMode) (insertionMode, bool) {
- im, consumed := delegate(p)
- if im != delegate {
- return im, consumed
- }
- return actual, consumed
-}
-
-// Section 11.2.5.4.1.
-func initialIM(p *parser) (insertionMode, bool) {
- if p.tok.Type == DoctypeToken {
- p.addChild(&Node{
- Type: DoctypeNode,
- Data: p.tok.Data,
- })
- return beforeHTMLIM, true
- }
- // TODO: set "quirks mode"? It's defined in the DOM spec instead of HTML5 proper,
- // and so switching on "quirks mode" might belong in a different package.
- return beforeHTMLIM, false
-}
-
-// Section 11.2.5.4.2.
-func beforeHTMLIM(p *parser) (insertionMode, bool) {
- var (
- add bool
- attr []Attribute
- implied bool
- )
- switch p.tok.Type {
- case ErrorToken:
- implied = true
- case TextToken:
- // TODO: distinguish whitespace text from others.
- implied = true
- case StartTagToken:
- if p.tok.Data == "html" {
- add = true
- attr = p.tok.Attr
- } else {
- implied = true
- }
- case EndTagToken:
- switch p.tok.Data {
- case "head", "body", "html", "br":
- implied = true
- default:
- // Ignore the token.
- }
- }
- if add || implied {
- p.addElement("html", attr)
- }
- return beforeHeadIM, !implied
-}
-
-// Section 11.2.5.4.3.
-func beforeHeadIM(p *parser) (insertionMode, bool) {
- var (
- add bool
- attr []Attribute
- implied bool
- )
- switch p.tok.Type {
- case ErrorToken:
- implied = true
- case TextToken:
- // TODO: distinguish whitespace text from others.
- implied = true
- case StartTagToken:
- switch p.tok.Data {
- case "head":
- add = true
- attr = p.tok.Attr
- case "html":
- return useTheRulesFor(p, beforeHeadIM, inBodyIM)
- default:
- implied = true
- }
- case EndTagToken:
- switch p.tok.Data {
- case "head", "body", "html", "br":
- implied = true
- default:
- // Ignore the token.
- }
- }
- if add || implied {
- p.addElement("head", attr)
- }
- return inHeadIM, !implied
-}
-
-// Section 11.2.5.4.4.
-func inHeadIM(p *parser) (insertionMode, bool) {
- var (
- pop bool
- implied bool
- )
- switch p.tok.Type {
- case ErrorToken, TextToken:
- implied = true
- case StartTagToken:
- switch p.tok.Data {
- case "meta":
- // TODO.
- case "script":
- // TODO.
- default:
- implied = true
- }
- case EndTagToken:
- if p.tok.Data == "head" {
- pop = true
- }
- // TODO.
- }
- if pop || implied {
- n := p.oe.pop()
- if n.Data != "head" {
- panic("html: bad parser state")
- }
- return afterHeadIM, !implied
- }
- return inHeadIM, !implied
-}
-
-// Section 11.2.5.4.6.
-func afterHeadIM(p *parser) (insertionMode, bool) {
- var (
- add bool
- attr []Attribute
- framesetOK bool
- implied bool
- )
- switch p.tok.Type {
- case ErrorToken, TextToken:
- implied = true
- framesetOK = true
- case StartTagToken:
- switch p.tok.Data {
- case "html":
- // TODO.
- case "body":
- add = true
- attr = p.tok.Attr
- framesetOK = false
- case "frameset":
- // TODO.
- case "base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title":
- // TODO.
- case "head":
- // TODO.
- default:
- implied = true
- framesetOK = true
- }
- case EndTagToken:
- // TODO.
- }
- if add || implied {
- p.addElement("body", attr)
- p.framesetOK = framesetOK
- }
- return inBodyIM, !implied
-}
-
-// Section 11.2.5.4.7.
-func inBodyIM(p *parser) (insertionMode, bool) {
- var endP bool
- switch p.tok.Type {
- case TextToken:
- p.reconstructActiveFormattingElements()
- p.addText(p.tok.Data)
- p.framesetOK = false
- case StartTagToken:
- switch p.tok.Data {
- case "address", "article", "aside", "blockquote", "center", "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "menu", "nav", "ol", "p", "section", "summary", "ul":
- // TODO: Do the proper "does the stack of open elements has a p element in button scope" algorithm in section 11.2.3.2.
- n := p.top()
- if n.Type == ElementNode && n.Data == "p" {
- endP = true
- } else {
- p.addElement(p.tok.Data, p.tok.Attr)
- }
- case "h1", "h2", "h3", "h4", "h5", "h6":
- // TODO: auto-insert </p> if necessary.
- switch n := p.top(); n.Data {
- case "h1", "h2", "h3", "h4", "h5", "h6":
- p.oe.pop()
- }
- p.addElement(p.tok.Data, p.tok.Attr)
- case "a":
- if n := p.afe.forTag("a"); n != nil {
- p.inBodyEndTagFormatting("a")
- p.oe.remove(n)
- p.afe.remove(n)
- }
- p.reconstructActiveFormattingElements()
- p.addFormattingElement(p.tok.Data, p.tok.Attr)
- case "b", "big", "code", "em", "font", "i", "s", "small", "strike", "strong", "tt", "u":
- p.reconstructActiveFormattingElements()
- p.addFormattingElement(p.tok.Data, p.tok.Attr)
- case "area", "br", "embed", "img", "input", "keygen", "wbr":
- p.reconstructActiveFormattingElements()
- p.addElement(p.tok.Data, p.tok.Attr)
- p.oe.pop()
- p.acknowledgeSelfClosingTag()
- p.framesetOK = false
- case "table":
- // TODO: auto-insert </p> if necessary, depending on quirks mode.
- p.addElement(p.tok.Data, p.tok.Attr)
- p.framesetOK = false
- return inTableIM, true
- case "hr":
- // TODO: auto-insert </p> if necessary.
- p.addElement(p.tok.Data, p.tok.Attr)
- p.oe.pop()
- p.acknowledgeSelfClosingTag()
- p.framesetOK = false
- default:
- // TODO.
- p.addElement(p.tok.Data, p.tok.Attr)
- }
- case EndTagToken:
- switch p.tok.Data {
- case "body":
- // TODO: autoclose the stack of open elements.
- return afterBodyIM, true
- case "a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u":
- p.inBodyEndTagFormatting(p.tok.Data)
- default:
- // TODO: any other end tag
- if p.tok.Data == p.top().Data {
- p.oe.pop()
- }
- }
- }
- if endP {
- // TODO: do the proper algorithm.
- n := p.oe.pop()
- if n.Type != ElementNode || n.Data != "p" {
- panic("unreachable")
- }
- }
- return inBodyIM, !endP
-}
-
-func (p *parser) inBodyEndTagFormatting(tag string) {
- // This is the "adoption agency" algorithm, described at
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#adoptionAgency
-
- // TODO: this is a fairly literal line-by-line translation of that algorithm.
- // Once the code successfully parses the comprehensive test suite, we should
- // refactor this code to be more idiomatic.
-
- // Steps 1-3. The outer loop.
- for i := 0; i < 8; i++ {
- // Step 4. Find the formatting element.
- var formattingElement *Node
- for j := len(p.afe) - 1; j >= 0; j-- {
- if p.afe[j].Type == scopeMarkerNode {
- break
- }
- if p.afe[j].Data == tag {
- formattingElement = p.afe[j]
- break
- }
- }
- if formattingElement == nil {
- return
- }
- feIndex := p.oe.index(formattingElement)
- if feIndex == -1 {
- p.afe.remove(formattingElement)
- return
- }
-
- // Steps 5-6. Find the furthest block.
- var furthestBlock *Node
- for _, e := range p.oe[feIndex:] {
- if isSpecialElement[e.Data] {
- furthestBlock = e
- break
- }
- }
- if furthestBlock == nil {
- e := p.oe.pop()
- for e != formattingElement {
- e = p.oe.pop()
- }
- p.afe.remove(e)
- return
- }
-
- // Steps 7-8. Find the common ancestor and bookmark node.
- commonAncestor := p.oe[feIndex-1]
- bookmark := p.afe.index(formattingElement)
-
- // Step 9. The inner loop. Find the lastNode to reparent.
- lastNode := furthestBlock
- node := furthestBlock
- x := p.oe.index(node)
- // Steps 9.1-9.3.
- for j := 0; j < 3; j++ {
- // Step 9.4.
- x--
- node = p.oe[x]
- // Step 9.5.
- if p.afe.index(node) == -1 {
- p.oe.remove(node)
- continue
- }
- // Step 9.6.
- if node == formattingElement {
- break
- }
- // Step 9.7.
- clone := node.clone()
- p.afe[p.afe.index(node)] = clone
- p.oe[p.oe.index(node)] = clone
- node = clone
- // Step 9.8.
- if lastNode == furthestBlock {
- bookmark = p.afe.index(node) + 1
- }
- // Step 9.9.
- if lastNode.Parent != nil {
- lastNode.Parent.Remove(lastNode)
- }
- node.Add(lastNode)
- // Step 9.10.
- lastNode = node
- }
-
- // Step 10. Reparent lastNode to the common ancestor,
- // or for misnested table nodes, to the foster parent.
- if lastNode.Parent != nil {
- lastNode.Parent.Remove(lastNode)
- }
- switch commonAncestor.Data {
- case "table", "tbody", "tfoot", "thead", "tr":
- // TODO: fix up misnested table nodes; find the foster parent.
- fallthrough
- default:
- commonAncestor.Add(lastNode)
- }
-
- // Steps 11-13. Reparent nodes from the furthest block's children
- // to a clone of the formatting element.
- clone := formattingElement.clone()
- reparentChildren(clone, furthestBlock)
- furthestBlock.Add(clone)
-
- // Step 14. Fix up the list of active formatting elements.
- p.afe.remove(formattingElement)
- p.afe.insert(bookmark, clone)
-
- // Step 15. Fix up the stack of open elements.
- p.oe.remove(formattingElement)
- p.oe.insert(p.oe.index(furthestBlock)+1, clone)
- }
-}
-
-// Section 11.2.5.4.9.
-func inTableIM(p *parser) (insertionMode, bool) {
- var (
- add bool
- data string
- attr []Attribute
- consumed bool
- )
- switch p.tok.Type {
- case ErrorToken:
- // Stop parsing.
- return nil, true
- case TextToken:
- // TODO.
- case StartTagToken:
- switch p.tok.Data {
- case "tbody", "tfoot", "thead":
- add = true
- data = p.tok.Data
- attr = p.tok.Attr
- consumed = true
- case "td", "th", "tr":
- add = true
- data = "tbody"
- default:
- // TODO.
- }
- case EndTagToken:
- switch p.tok.Data {
- case "table":
- if p.popUntil(tableScopeStopTags, "table") {
- // TODO: "reset the insertion mode appropriately" as per 11.2.3.1.
- return inBodyIM, false
- }
- // Ignore the token.
- return inTableIM, true
- case "body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr":
- // Ignore the token.
- return inTableIM, true
- }
- }
- if add {
- // TODO: clear the stack back to a table context.
- p.addElement(data, attr)
- return inTableBodyIM, consumed
- }
- // TODO: return useTheRulesFor(inTableIM, inBodyIM, p) unless etc. etc. foster parenting.
- return inTableIM, true
-}
-
-// Section 11.2.5.4.13.
-func inTableBodyIM(p *parser) (insertionMode, bool) {
- var (
- add bool
- data string
- attr []Attribute
- consumed bool
- )
- switch p.tok.Type {
- case ErrorToken:
- // TODO.
- case TextToken:
- // TODO.
- case StartTagToken:
- switch p.tok.Data {
- case "tr":
- add = true
- data = p.tok.Data
- attr = p.tok.Attr
- consumed = true
- case "td", "th":
- add = true
- data = "tr"
- consumed = false
- default:
- // TODO.
- }
- case EndTagToken:
- switch p.tok.Data {
- case "table":
- if p.popUntil(tableScopeStopTags, "tbody", "thead", "tfoot") {
- return inTableIM, false
- }
- // Ignore the token.
- return inTableBodyIM, true
- case "body", "caption", "col", "colgroup", "html", "td", "th", "tr":
- // Ignore the token.
- return inTableBodyIM, true
- }
- }
- if add {
- // TODO: clear the stack back to a table body context.
- p.addElement(data, attr)
- return inRowIM, consumed
- }
- return useTheRulesFor(p, inTableBodyIM, inTableIM)
-}
-
-// Section 11.2.5.4.14.
-func inRowIM(p *parser) (insertionMode, bool) {
- switch p.tok.Type {
- case ErrorToken:
- // TODO.
- case TextToken:
- // TODO.
- case StartTagToken:
- switch p.tok.Data {
- case "td", "th":
- // TODO: clear the stack back to a table row context.
- p.addElement(p.tok.Data, p.tok.Attr)
- p.afe = append(p.afe, &scopeMarker)
- return inCellIM, true
- default:
- // TODO.
- }
- case EndTagToken:
- switch p.tok.Data {
- case "tr":
- // TODO.
- case "table":
- if p.popUntil(tableScopeStopTags, "tr") {
- return inTableBodyIM, false
- }
- // Ignore the token.
- return inRowIM, true
- case "tbody", "tfoot", "thead":
- // TODO.
- case "body", "caption", "col", "colgroup", "html", "td", "th":
- // Ignore the token.
- return inRowIM, true
- default:
- // TODO.
- }
- }
- return useTheRulesFor(p, inRowIM, inTableIM)
-}
-
-// Section 11.2.5.4.15.
-func inCellIM(p *parser) (insertionMode, bool) {
- var (
- closeTheCellAndReprocess bool
- )
- switch p.tok.Type {
- case StartTagToken:
- switch p.tok.Data {
- case "caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr":
- // TODO: check for "td" or "th" in table scope.
- closeTheCellAndReprocess = true
- }
- case EndTagToken:
- switch p.tok.Data {
- case "td", "th":
- // TODO.
- case "body", "caption", "col", "colgroup", "html":
- // TODO.
- case "table", "tbody", "tfoot", "thead", "tr":
- // TODO: check for matching element in table scope.
- closeTheCellAndReprocess = true
- }
- }
- if closeTheCellAndReprocess {
- if p.popUntil(tableScopeStopTags, "td") || p.popUntil(tableScopeStopTags, "th") {
- p.clearActiveFormattingElements()
- return inRowIM, false
- }
- }
- return useTheRulesFor(p, inCellIM, inBodyIM)
-}
-
-// Section 11.2.5.4.18.
-func afterBodyIM(p *parser) (insertionMode, bool) {
- switch p.tok.Type {
- case ErrorToken:
- // TODO.
- case TextToken:
- // TODO.
- case StartTagToken:
- // TODO.
- case EndTagToken:
- switch p.tok.Data {
- case "html":
- // TODO: autoclose the stack of open elements.
- return afterAfterBodyIM, true
- default:
- // TODO.
- }
- }
- return afterBodyIM, true
-}
-
-// Section 11.2.5.4.21.
-func afterAfterBodyIM(p *parser) (insertionMode, bool) {
- switch p.tok.Type {
- case ErrorToken:
- // Stop parsing.
- return nil, true
- case TextToken:
- // TODO.
- case StartTagToken:
- if p.tok.Data == "html" {
- return useTheRulesFor(p, afterAfterBodyIM, inBodyIM)
- }
- }
- return inBodyIM, false
-}
-
-// Parse returns the parse tree for the HTML from the given Reader.
-// The input is assumed to be UTF-8 encoded.
-func Parse(r io.Reader) (*Node, os.Error) {
- p := &parser{
- tokenizer: NewTokenizer(r),
- doc: &Node{
- Type: DocumentNode,
- },
- scripting: true,
- framesetOK: true,
- }
- // Iterate until EOF. Any other error will cause an early return.
- im, consumed := initialIM, true
- for {
- if consumed {
- if err := p.read(); err != nil {
- if err == os.EOF {
- break
- }
- return nil, err
- }
- }
- im, consumed = im(p)
- }
- // Loop until the final token (the ErrorToken signifying EOF) is consumed.
- for {
- if im, consumed = im(p); consumed {
- break
- }
- }
- return p.doc, nil
-}
diff --git a/src/pkg/html/parse_test.go b/src/pkg/html/parse_test.go
deleted file mode 100644
index 7d918d250..000000000
--- a/src/pkg/html/parse_test.go
+++ /dev/null
@@ -1,156 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package html
-
-import (
- "bufio"
- "bytes"
- "fmt"
- "io"
- "io/ioutil"
- "os"
- "strings"
- "testing"
-)
-
-func pipeErr(err os.Error) io.Reader {
- pr, pw := io.Pipe()
- pw.CloseWithError(err)
- return pr
-}
-
-func readDat(filename string, c chan io.Reader) {
- f, err := os.Open("testdata/webkit/" + filename)
- if err != nil {
- c <- pipeErr(err)
- return
- }
- defer f.Close()
-
- // Loop through the lines of the file. Each line beginning with "#" denotes
- // a new section, which is returned as a separate io.Reader.
- r := bufio.NewReader(f)
- var pw *io.PipeWriter
- for {
- line, err := r.ReadSlice('\n')
- if err != nil {
- if pw != nil {
- pw.CloseWithError(err)
- pw = nil
- } else {
- c <- pipeErr(err)
- }
- return
- }
- if len(line) == 0 {
- continue
- }
- if line[0] == '#' {
- if pw != nil {
- pw.Close()
- }
- var pr *io.PipeReader
- pr, pw = io.Pipe()
- c <- pr
- continue
- }
- if line[0] != '|' {
- // Strip the trailing '\n'.
- line = line[:len(line)-1]
- }
- if pw != nil {
- if _, err := pw.Write(line); err != nil {
- pw.CloseWithError(err)
- pw = nil
- }
- }
- }
-}
-
-func dumpLevel(w io.Writer, n *Node, level int) os.Error {
- io.WriteString(w, "| ")
- for i := 0; i < level; i++ {
- io.WriteString(w, " ")
- }
- switch n.Type {
- case ErrorNode:
- return os.NewError("unexpected ErrorNode")
- case DocumentNode:
- return os.NewError("unexpected DocumentNode")
- case ElementNode:
- fmt.Fprintf(w, "<%s>", EscapeString(n.Data))
- case TextNode:
- fmt.Fprintf(w, "%q", EscapeString(n.Data))
- case CommentNode:
- return os.NewError("COMMENT")
- case DoctypeNode:
- fmt.Fprintf(w, "<!DOCTYPE %s>", EscapeString(n.Data))
- case scopeMarkerNode:
- return os.NewError("unexpected scopeMarkerNode")
- default:
- return os.NewError("unknown node type")
- }
- io.WriteString(w, "\n")
- for _, c := range n.Child {
- if err := dumpLevel(w, c, level+1); err != nil {
- return err
- }
- }
- return nil
-}
-
-func dump(n *Node) (string, os.Error) {
- if n == nil || len(n.Child) == 0 {
- return "", nil
- }
- b := bytes.NewBuffer(nil)
- for _, child := range n.Child {
- if err := dumpLevel(b, child, 0); err != nil {
- return "", err
- }
- }
- return b.String(), nil
-}
-
-func TestParser(t *testing.T) {
- // TODO(nigeltao): Process all the .dat files, not just the first one.
- filenames := []string{
- "tests1.dat",
- }
- for _, filename := range filenames {
- rc := make(chan io.Reader)
- go readDat(filename, rc)
- // TODO(nigeltao): Process all test cases, not just a subset.
- for i := 0; i < 25; i++ {
- // Parse the #data section.
- b, err := ioutil.ReadAll(<-rc)
- if err != nil {
- t.Fatal(err)
- }
- text := string(b)
- doc, err := Parse(strings.NewReader(text))
- if err != nil {
- t.Fatal(err)
- }
- actual, err := dump(doc)
- if err != nil {
- t.Fatal(err)
- }
- // Skip the #error section.
- if _, err := io.Copy(ioutil.Discard, <-rc); err != nil {
- t.Fatal(err)
- }
- // Compare the parsed tree to the #document section.
- b, err = ioutil.ReadAll(<-rc)
- if err != nil {
- t.Fatal(err)
- }
- expected := string(b)
- if actual != expected {
- t.Errorf("%s test #%d %q, actual vs expected:\n----\n%s----\n%s----", filename, i, text, actual, expected)
- }
- }
- }
-}
diff --git a/src/pkg/html/template/Makefile b/src/pkg/html/template/Makefile
new file mode 100644
index 000000000..d27601a33
--- /dev/null
+++ b/src/pkg/html/template/Makefile
@@ -0,0 +1,23 @@
+# Copyright 2011 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include ../../../Make.inc
+
+TARG=html/template
+GOFILES=\
+ attr.go\
+ clone.go\
+ content.go\
+ context.go\
+ css.go\
+ doc.go\
+ error.go\
+ escape.go\
+ html.go\
+ js.go\
+ template.go\
+ transition.go\
+ url.go\
+
+include ../../../Make.pkg
diff --git a/src/pkg/html/template/attr.go b/src/pkg/html/template/attr.go
new file mode 100644
index 000000000..3ea02880d
--- /dev/null
+++ b/src/pkg/html/template/attr.go
@@ -0,0 +1,175 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "strings"
+)
+
+// attrTypeMap[n] describes the value of the given attribute.
+// If an attribute affects (or can mask) the encoding or interpretation of
+// other content, or affects the contents, idempotency, or credentials of a
+// network message, then the value in this map is contentTypeUnsafe.
+// This map is derived from HTML5, specifically
+// http://www.w3.org/TR/html5/Overview.html#attributes-1
+// as well as "%URI"-typed attributes from
+// http://www.w3.org/TR/html4/index/attributes.html
+var attrTypeMap = map[string]contentType{
+ "accept": contentTypePlain,
+ "accept-charset": contentTypeUnsafe,
+ "action": contentTypeURL,
+ "alt": contentTypePlain,
+ "archive": contentTypeURL,
+ "async": contentTypeUnsafe,
+ "autocomplete": contentTypePlain,
+ "autofocus": contentTypePlain,
+ "autoplay": contentTypePlain,
+ "background": contentTypeURL,
+ "border": contentTypePlain,
+ "checked": contentTypePlain,
+ "cite": contentTypeURL,
+ "challenge": contentTypeUnsafe,
+ "charset": contentTypeUnsafe,
+ "class": contentTypePlain,
+ "classid": contentTypeURL,
+ "codebase": contentTypeURL,
+ "cols": contentTypePlain,
+ "colspan": contentTypePlain,
+ "content": contentTypeUnsafe,
+ "contenteditable": contentTypePlain,
+ "contextmenu": contentTypePlain,
+ "controls": contentTypePlain,
+ "coords": contentTypePlain,
+ "crossorigin": contentTypeUnsafe,
+ "data": contentTypeURL,
+ "datetime": contentTypePlain,
+ "default": contentTypePlain,
+ "defer": contentTypeUnsafe,
+ "dir": contentTypePlain,
+ "dirname": contentTypePlain,
+ "disabled": contentTypePlain,
+ "draggable": contentTypePlain,
+ "dropzone": contentTypePlain,
+ "enctype": contentTypeUnsafe,
+ "for": contentTypePlain,
+ "form": contentTypeUnsafe,
+ "formaction": contentTypeURL,
+ "formenctype": contentTypeUnsafe,
+ "formmethod": contentTypeUnsafe,
+ "formnovalidate": contentTypeUnsafe,
+ "formtarget": contentTypePlain,
+ "headers": contentTypePlain,
+ "height": contentTypePlain,
+ "hidden": contentTypePlain,
+ "high": contentTypePlain,
+ "href": contentTypeURL,
+ "hreflang": contentTypePlain,
+ "http-equiv": contentTypeUnsafe,
+ "icon": contentTypeURL,
+ "id": contentTypePlain,
+ "ismap": contentTypePlain,
+ "keytype": contentTypeUnsafe,
+ "kind": contentTypePlain,
+ "label": contentTypePlain,
+ "lang": contentTypePlain,
+ "language": contentTypeUnsafe,
+ "list": contentTypePlain,
+ "longdesc": contentTypeURL,
+ "loop": contentTypePlain,
+ "low": contentTypePlain,
+ "manifest": contentTypeURL,
+ "max": contentTypePlain,
+ "maxlength": contentTypePlain,
+ "media": contentTypePlain,
+ "mediagroup": contentTypePlain,
+ "method": contentTypeUnsafe,
+ "min": contentTypePlain,
+ "multiple": contentTypePlain,
+ "name": contentTypePlain,
+ "novalidate": contentTypeUnsafe,
+ // Skip handler names from
+ // http://www.w3.org/TR/html5/Overview.html#event-handlers-on-elements-document-objects-and-window-objects
+ // since we have special handling in attrType.
+ "open": contentTypePlain,
+ "optimum": contentTypePlain,
+ "pattern": contentTypeUnsafe,
+ "placeholder": contentTypePlain,
+ "poster": contentTypeURL,
+ "profile": contentTypeURL,
+ "preload": contentTypePlain,
+ "pubdate": contentTypePlain,
+ "radiogroup": contentTypePlain,
+ "readonly": contentTypePlain,
+ "rel": contentTypeUnsafe,
+ "required": contentTypePlain,
+ "reversed": contentTypePlain,
+ "rows": contentTypePlain,
+ "rowspan": contentTypePlain,
+ "sandbox": contentTypeUnsafe,
+ "spellcheck": contentTypePlain,
+ "scope": contentTypePlain,
+ "scoped": contentTypePlain,
+ "seamless": contentTypePlain,
+ "selected": contentTypePlain,
+ "shape": contentTypePlain,
+ "size": contentTypePlain,
+ "sizes": contentTypePlain,
+ "span": contentTypePlain,
+ "src": contentTypeURL,
+ "srcdoc": contentTypeHTML,
+ "srclang": contentTypePlain,
+ "start": contentTypePlain,
+ "step": contentTypePlain,
+ "style": contentTypeCSS,
+ "tabindex": contentTypePlain,
+ "target": contentTypePlain,
+ "title": contentTypePlain,
+ "type": contentTypeUnsafe,
+ "usemap": contentTypeURL,
+ "value": contentTypeUnsafe,
+ "width": contentTypePlain,
+ "wrap": contentTypePlain,
+ "xmlns": contentTypeURL,
+}
+
+// attrType returns a conservative (upper-bound on authority) guess at the
+// type of the named attribute.
+func attrType(name string) contentType {
+ name = strings.ToLower(name)
+ if strings.HasPrefix(name, "data-") {
+ // Strip data- so that custom attribute heuristics below are
+ // widely applied.
+ // Treat data-action as URL below.
+ name = name[5:]
+ } else if colon := strings.IndexRune(name, ':'); colon != -1 {
+ if name[:colon] == "xmlns" {
+ return contentTypeURL
+ }
+ // Treat svg:href and xlink:href as href below.
+ name = name[colon+1:]
+ }
+ if t, ok := attrTypeMap[name]; ok {
+ return t
+ }
+ // Treat partial event handler names as script.
+ if strings.HasPrefix(name, "on") {
+ return contentTypeJS
+ }
+
+ // Heuristics to prevent "javascript:..." injection in custom
+ // data attributes and custom attributes like g:tweetUrl.
+ // http://www.w3.org/TR/html5/elements.html#embedding-custom-non-visible-data-with-the-data-attributes:
+ // "Custom data attributes are intended to store custom data
+ // private to the page or application, for which there are no
+ // more appropriate attributes or elements."
+ // Developers seem to store URL content in data URLs that start
+ // or end with "URI" or "URL".
+ if strings.Contains(name, "src") ||
+ strings.Contains(name, "uri") ||
+ strings.Contains(name, "url") {
+ return contentTypeURL
+ }
+ return contentTypePlain
+}
diff --git a/src/pkg/html/template/clone.go b/src/pkg/html/template/clone.go
new file mode 100644
index 000000000..d0d8ea467
--- /dev/null
+++ b/src/pkg/html/template/clone.go
@@ -0,0 +1,90 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "text/template/parse"
+)
+
+// clone clones a template Node.
+func clone(n parse.Node) parse.Node {
+ switch t := n.(type) {
+ case *parse.ActionNode:
+ return cloneAction(t)
+ case *parse.IfNode:
+ b := new(parse.IfNode)
+ copyBranch(&b.BranchNode, &t.BranchNode)
+ return b
+ case *parse.ListNode:
+ return cloneList(t)
+ case *parse.RangeNode:
+ b := new(parse.RangeNode)
+ copyBranch(&b.BranchNode, &t.BranchNode)
+ return b
+ case *parse.TemplateNode:
+ return cloneTemplate(t)
+ case *parse.TextNode:
+ return cloneText(t)
+ case *parse.WithNode:
+ b := new(parse.WithNode)
+ copyBranch(&b.BranchNode, &t.BranchNode)
+ return b
+ }
+ panic("cloning " + n.String() + " is unimplemented")
+}
+
+// cloneAction returns a deep clone of n.
+func cloneAction(n *parse.ActionNode) *parse.ActionNode {
+ // We use keyless fields because they won't compile if a field is added.
+ return &parse.ActionNode{n.NodeType, n.Line, clonePipe(n.Pipe)}
+}
+
+// cloneList returns a deep clone of n.
+func cloneList(n *parse.ListNode) *parse.ListNode {
+ if n == nil {
+ return nil
+ }
+ // We use keyless fields because they won't compile if a field is added.
+ c := parse.ListNode{n.NodeType, make([]parse.Node, len(n.Nodes))}
+ for i, child := range n.Nodes {
+ c.Nodes[i] = clone(child)
+ }
+ return &c
+}
+
+// clonePipe returns a shallow clone of n.
+// The escaper does not modify pipe descendants in place so there's no need to
+// clone deeply.
+func clonePipe(n *parse.PipeNode) *parse.PipeNode {
+ if n == nil {
+ return nil
+ }
+ // We use keyless fields because they won't compile if a field is added.
+ return &parse.PipeNode{n.NodeType, n.Line, n.Decl, n.Cmds}
+}
+
+// cloneTemplate returns a deep clone of n.
+func cloneTemplate(n *parse.TemplateNode) *parse.TemplateNode {
+ // We use keyless fields because they won't compile if a field is added.
+ return &parse.TemplateNode{n.NodeType, n.Line, n.Name, clonePipe(n.Pipe)}
+}
+
+// cloneText clones the given node sharing its []byte.
+func cloneText(n *parse.TextNode) *parse.TextNode {
+ // We use keyless fields because they won't compile if a field is added.
+ return &parse.TextNode{n.NodeType, n.Text}
+}
+
+// copyBranch clones src into dst.
+func copyBranch(dst, src *parse.BranchNode) {
+ // We use keyless fields because they won't compile if a field is added.
+ *dst = parse.BranchNode{
+ src.NodeType,
+ src.Line,
+ clonePipe(src.Pipe),
+ cloneList(src.List),
+ cloneList(src.ElseList),
+ }
+}
diff --git a/src/pkg/html/template/clone_test.go b/src/pkg/html/template/clone_test.go
new file mode 100644
index 000000000..39788173b
--- /dev/null
+++ b/src/pkg/html/template/clone_test.go
@@ -0,0 +1,92 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "bytes"
+ "testing"
+)
+
+func TestClone(t *testing.T) {
+ tests := []struct {
+ input, want, wantClone string
+ }{
+ {
+ `Hello, {{if true}}{{"<World>"}}{{end}}!`,
+ "Hello, <World>!",
+ "Hello, &lt;World&gt;!",
+ },
+ {
+ `Hello, {{if false}}{{.X}}{{else}}{{"<World>"}}{{end}}!`,
+ "Hello, <World>!",
+ "Hello, &lt;World&gt;!",
+ },
+ {
+ `Hello, {{with "<World>"}}{{.}}{{end}}!`,
+ "Hello, <World>!",
+ "Hello, &lt;World&gt;!",
+ },
+ {
+ `{{range .}}<p>{{.}}</p>{{end}}`,
+ "<p>foo</p><p><bar></p><p>baz</p>",
+ "<p>foo</p><p>&lt;bar&gt;</p><p>baz</p>",
+ },
+ {
+ `Hello, {{"<World>" | html}}!`,
+ "Hello, &lt;World&gt;!",
+ "Hello, &lt;World&gt;!",
+ },
+ {
+ `Hello{{if 1}}, World{{else}}{{template "d"}}{{end}}!`,
+ "Hello, World!",
+ "Hello, World!",
+ },
+ }
+
+ for _, test := range tests {
+ s, err := New("s").Parse(test.input)
+ if err != nil {
+ t.Errorf("input=%q: unexpected parse error %v", test.input, err)
+ }
+
+ d, _ := New("d").Parse(test.input)
+ // Hack: just replace the root of the tree.
+ d.text.Root = cloneList(s.text.Root)
+
+ if want, got := s.text.Root.String(), d.text.Root.String(); want != got {
+ t.Errorf("want %q, got %q", want, got)
+ }
+
+ err = escapeTemplates(d, "d")
+ if err != nil {
+ t.Errorf("%q: failed to escape: %s", test.input, err)
+ continue
+ }
+
+ if want, got := "s", s.Name(); want != got {
+ t.Errorf("want %q, got %q", want, got)
+ continue
+ }
+ if want, got := "d", d.Name(); want != got {
+ t.Errorf("want %q, got %q", want, got)
+ continue
+ }
+
+ data := []string{"foo", "<bar>", "baz"}
+
+ var b bytes.Buffer
+ d.Execute(&b, data)
+ if got := b.String(); got != test.wantClone {
+ t.Errorf("input=%q: want %q, got %q", test.input, test.wantClone, got)
+ }
+
+ // Make sure escaping d did not affect s.
+ b.Reset()
+ s.text.Execute(&b, data)
+ if got := b.String(); got != test.want {
+ t.Errorf("input=%q: want %q, got %q", test.input, test.want, got)
+ }
+ }
+}
diff --git a/src/pkg/html/template/content.go b/src/pkg/html/template/content.go
new file mode 100644
index 000000000..4de7ccde9
--- /dev/null
+++ b/src/pkg/html/template/content.go
@@ -0,0 +1,113 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "fmt"
+ "reflect"
+)
+
+// Strings of content from a trusted source.
+type (
+ // CSS encapsulates known safe content that matches any of:
+ // 1. The CSS3 stylesheet production, such as `p { color: purple }`.
+ // 2. The CSS3 rule production, such as `a[href=~"https:"].foo#bar`.
+ // 3. CSS3 declaration productions, such as `color: red; margin: 2px`.
+ // 4. The CSS3 value production, such as `rgba(0, 0, 255, 127)`.
+ // See http://www.w3.org/TR/css3-syntax/#style
+ CSS string
+
+ // HTML encapsulates a known safe HTML document fragment.
+ // It should not be used for HTML from a third-party, or HTML with
+ // unclosed tags or comments. The outputs of a sound HTML sanitizer
+ // and a template escaped by this package are fine for use with HTML.
+ HTML string
+
+ // HTMLAttr encapsulates an HTML attribute from a trusted source,
+ // for example: ` dir="ltr"`.
+ HTMLAttr string
+
+ // JS encapsulates a known safe EcmaScript5 Expression, or example,
+ // `(x + y * z())`.
+ // Template authors are responsible for ensuring that typed expressions
+ // do not break the intended precedence and that there is no
+ // statement/expression ambiguity as when passing an expression like
+ // "{ foo: bar() }\n['foo']()", which is both a valid Expression and a
+ // valid Program with a very different meaning.
+ JS string
+
+ // JSStr encapsulates a sequence of characters meant to be embedded
+ // between quotes in a JavaScript expression.
+ // The string must match a series of StringCharacters:
+ // StringCharacter :: SourceCharacter but not `\` or LineTerminator
+ // | EscapeSequence
+ // Note that LineContinuations are not allowed.
+ // JSStr("foo\\nbar") is fine, but JSStr("foo\\\nbar") is not.
+ JSStr string
+
+ // URL encapsulates a known safe URL as defined in RFC 3896.
+ // A URL like `javascript:checkThatFormNotEditedBeforeLeavingPage()`
+ // from a trusted source should go in the page, but by default dynamic
+ // `javascript:` URLs are filtered out since they are a frequently
+ // exploited injection vector.
+ URL string
+)
+
+type contentType uint8
+
+const (
+ contentTypePlain contentType = iota
+ contentTypeCSS
+ contentTypeHTML
+ contentTypeHTMLAttr
+ contentTypeJS
+ contentTypeJSStr
+ contentTypeURL
+ // contentTypeUnsafe is used in attr.go for values that affect how
+ // embedded content and network messages are formed, vetted,
+ // or interpreted; or which credentials network messages carry.
+ contentTypeUnsafe
+)
+
+// indirect returns the value, after dereferencing as many times
+// as necessary to reach the base type (or nil).
+func indirect(a interface{}) interface{} {
+ if t := reflect.TypeOf(a); t.Kind() != reflect.Ptr {
+ // Avoid creating a reflect.Value if it's not a pointer.
+ return a
+ }
+ v := reflect.ValueOf(a)
+ for v.Kind() == reflect.Ptr && !v.IsNil() {
+ v = v.Elem()
+ }
+ return v.Interface()
+}
+
+// stringify converts its arguments to a string and the type of the content.
+// All pointers are dereferenced, as in the text/template package.
+func stringify(args ...interface{}) (string, contentType) {
+ if len(args) == 1 {
+ switch s := indirect(args[0]).(type) {
+ case string:
+ return s, contentTypePlain
+ case CSS:
+ return string(s), contentTypeCSS
+ case HTML:
+ return string(s), contentTypeHTML
+ case HTMLAttr:
+ return string(s), contentTypeHTMLAttr
+ case JS:
+ return string(s), contentTypeJS
+ case JSStr:
+ return string(s), contentTypeJSStr
+ case URL:
+ return string(s), contentTypeURL
+ }
+ }
+ for i, arg := range args {
+ args[i] = indirect(arg)
+ }
+ return fmt.Sprint(args...), contentTypePlain
+}
diff --git a/src/pkg/html/template/content_test.go b/src/pkg/html/template/content_test.go
new file mode 100644
index 000000000..c96a521a5
--- /dev/null
+++ b/src/pkg/html/template/content_test.go
@@ -0,0 +1,221 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "bytes"
+ "strings"
+ "testing"
+)
+
+func TestTypedContent(t *testing.T) {
+ data := []interface{}{
+ `<b> "foo%" O'Reilly &bar;`,
+ CSS(`a[href =~ "//example.com"]#foo`),
+ HTML(`Hello, <b>World</b> &amp;tc!`),
+ HTMLAttr(` dir="ltr"`),
+ JS(`c && alert("Hello, World!");`),
+ JSStr(`Hello, World & O'Reilly\x21`),
+ URL(`greeting=H%69&addressee=(World)`),
+ }
+
+ // For each content sensitive escaper, see how it does on
+ // each of the typed strings above.
+ tests := []struct {
+ // A template containing a single {{.}}.
+ input string
+ want []string
+ }{
+ {
+ `<style>{{.}} { color: blue }</style>`,
+ []string{
+ `ZgotmplZ`,
+ // Allowed but not escaped.
+ `a[href =~ "//example.com"]#foo`,
+ `ZgotmplZ`,
+ `ZgotmplZ`,
+ `ZgotmplZ`,
+ `ZgotmplZ`,
+ `ZgotmplZ`,
+ },
+ },
+ {
+ `<div style="{{.}}">`,
+ []string{
+ `ZgotmplZ`,
+ // Allowed and HTML escaped.
+ `a[href =~ &#34;//example.com&#34;]#foo`,
+ `ZgotmplZ`,
+ `ZgotmplZ`,
+ `ZgotmplZ`,
+ `ZgotmplZ`,
+ `ZgotmplZ`,
+ },
+ },
+ {
+ `{{.}}`,
+ []string{
+ `&lt;b&gt; &#34;foo%&#34; O&#39;Reilly &amp;bar;`,
+ `a[href =~ &#34;//example.com&#34;]#foo`,
+ // Not escaped.
+ `Hello, <b>World</b> &amp;tc!`,
+ ` dir=&#34;ltr&#34;`,
+ `c &amp;&amp; alert(&#34;Hello, World!&#34;);`,
+ `Hello, World &amp; O&#39;Reilly\x21`,
+ `greeting=H%69&amp;addressee=(World)`,
+ },
+ },
+ {
+ `<a{{.}}>`,
+ []string{
+ `ZgotmplZ`,
+ `ZgotmplZ`,
+ `ZgotmplZ`,
+ // Allowed and HTML escaped.
+ ` dir="ltr"`,
+ `ZgotmplZ`,
+ `ZgotmplZ`,
+ `ZgotmplZ`,
+ },
+ },
+ {
+ `<a title={{.}}>`,
+ []string{
+ `&lt;b&gt;&#32;&#34;foo%&#34;&#32;O&#39;Reilly&#32;&amp;bar;`,
+ `a[href&#32;&#61;~&#32;&#34;//example.com&#34;]#foo`,
+ // Tags stripped, spaces escaped, entity not re-escaped.
+ `Hello,&#32;World&#32;&amp;tc!`,
+ `&#32;dir&#61;&#34;ltr&#34;`,
+ `c&#32;&amp;&amp;&#32;alert(&#34;Hello,&#32;World!&#34;);`,
+ `Hello,&#32;World&#32;&amp;&#32;O&#39;Reilly\x21`,
+ `greeting&#61;H%69&amp;addressee&#61;(World)`,
+ },
+ },
+ {
+ `<a title='{{.}}'>`,
+ []string{
+ `&lt;b&gt; &#34;foo%&#34; O&#39;Reilly &amp;bar;`,
+ `a[href =~ &#34;//example.com&#34;]#foo`,
+ // Tags stripped, entity not re-escaped.
+ `Hello, World &amp;tc!`,
+ ` dir=&#34;ltr&#34;`,
+ `c &amp;&amp; alert(&#34;Hello, World!&#34;);`,
+ `Hello, World &amp; O&#39;Reilly\x21`,
+ `greeting=H%69&amp;addressee=(World)`,
+ },
+ },
+ {
+ `<textarea>{{.}}</textarea>`,
+ []string{
+ `&lt;b&gt; &#34;foo%&#34; O&#39;Reilly &amp;bar;`,
+ `a[href =~ &#34;//example.com&#34;]#foo`,
+ // Angle brackets escaped to prevent injection of close tags, entity not re-escaped.
+ `Hello, &lt;b&gt;World&lt;/b&gt; &amp;tc!`,
+ ` dir=&#34;ltr&#34;`,
+ `c &amp;&amp; alert(&#34;Hello, World!&#34;);`,
+ `Hello, World &amp; O&#39;Reilly\x21`,
+ `greeting=H%69&amp;addressee=(World)`,
+ },
+ },
+ {
+ `<script>alert({{.}})</script>`,
+ []string{
+ `"\u003cb\u003e \"foo%\" O'Reilly &bar;"`,
+ `"a[href =~ \"//example.com\"]#foo"`,
+ `"Hello, \u003cb\u003eWorld\u003c/b\u003e &amp;tc!"`,
+ `" dir=\"ltr\""`,
+ // Not escaped.
+ `c && alert("Hello, World!");`,
+ // Escape sequence not over-escaped.
+ `"Hello, World & O'Reilly\x21"`,
+ `"greeting=H%69&addressee=(World)"`,
+ },
+ },
+ {
+ `<button onclick="alert({{.}})">`,
+ []string{
+ `&#34;\u003cb\u003e \&#34;foo%\&#34; O&#39;Reilly &amp;bar;&#34;`,
+ `&#34;a[href =~ \&#34;//example.com\&#34;]#foo&#34;`,
+ `&#34;Hello, \u003cb\u003eWorld\u003c/b\u003e &amp;amp;tc!&#34;`,
+ `&#34; dir=\&#34;ltr\&#34;&#34;`,
+ // Not JS escaped but HTML escaped.
+ `c &amp;&amp; alert(&#34;Hello, World!&#34;);`,
+ // Escape sequence not over-escaped.
+ `&#34;Hello, World &amp; O&#39;Reilly\x21&#34;`,
+ `&#34;greeting=H%69&amp;addressee=(World)&#34;`,
+ },
+ },
+ {
+ `<script>alert("{{.}}")</script>`,
+ []string{
+ `\x3cb\x3e \x22foo%\x22 O\x27Reilly \x26bar;`,
+ `a[href =~ \x22\/\/example.com\x22]#foo`,
+ `Hello, \x3cb\x3eWorld\x3c\/b\x3e \x26amp;tc!`,
+ ` dir=\x22ltr\x22`,
+ `c \x26\x26 alert(\x22Hello, World!\x22);`,
+ // Escape sequence not over-escaped.
+ `Hello, World \x26 O\x27Reilly\x21`,
+ `greeting=H%69\x26addressee=(World)`,
+ },
+ },
+ {
+ `<button onclick='alert("{{.}}")'>`,
+ []string{
+ `\x3cb\x3e \x22foo%\x22 O\x27Reilly \x26bar;`,
+ `a[href =~ \x22\/\/example.com\x22]#foo`,
+ `Hello, \x3cb\x3eWorld\x3c\/b\x3e \x26amp;tc!`,
+ ` dir=\x22ltr\x22`,
+ `c \x26\x26 alert(\x22Hello, World!\x22);`,
+ // Escape sequence not over-escaped.
+ `Hello, World \x26 O\x27Reilly\x21`,
+ `greeting=H%69\x26addressee=(World)`,
+ },
+ },
+ {
+ `<a href="?q={{.}}">`,
+ []string{
+ `%3cb%3e%20%22foo%25%22%20O%27Reilly%20%26bar%3b`,
+ `a%5bhref%20%3d~%20%22%2f%2fexample.com%22%5d%23foo`,
+ `Hello%2c%20%3cb%3eWorld%3c%2fb%3e%20%26amp%3btc%21`,
+ `%20dir%3d%22ltr%22`,
+ `c%20%26%26%20alert%28%22Hello%2c%20World%21%22%29%3b`,
+ `Hello%2c%20World%20%26%20O%27Reilly%5cx21`,
+ // Quotes and parens are escaped but %69 is not over-escaped. HTML escaping is done.
+ `greeting=H%69&amp;addressee=%28World%29`,
+ },
+ },
+ {
+ `<style>body { background: url('?img={{.}}') }</style>`,
+ []string{
+ `%3cb%3e%20%22foo%25%22%20O%27Reilly%20%26bar%3b`,
+ `a%5bhref%20%3d~%20%22%2f%2fexample.com%22%5d%23foo`,
+ `Hello%2c%20%3cb%3eWorld%3c%2fb%3e%20%26amp%3btc%21`,
+ `%20dir%3d%22ltr%22`,
+ `c%20%26%26%20alert%28%22Hello%2c%20World%21%22%29%3b`,
+ `Hello%2c%20World%20%26%20O%27Reilly%5cx21`,
+ // Quotes and parens are escaped but %69 is not over-escaped. HTML escaping is not done.
+ `greeting=H%69&addressee=%28World%29`,
+ },
+ },
+ }
+
+ for _, test := range tests {
+ tmpl := Must(New("x").Parse(test.input))
+ pre := strings.Index(test.input, "{{.}}")
+ post := len(test.input) - (pre + 5)
+ var b bytes.Buffer
+ for i, x := range data {
+ b.Reset()
+ if err := tmpl.Execute(&b, x); err != nil {
+ t.Errorf("%q with %v: %s", test.input, x, err)
+ continue
+ }
+ if want, got := test.want[i], b.String()[pre:b.Len()-post]; want != got {
+ t.Errorf("%q with %v:\nwant\n\t%q,\ngot\n\t%q\n", test.input, x, want, got)
+ continue
+ }
+ }
+ }
+}
diff --git a/src/pkg/html/template/context.go b/src/pkg/html/template/context.go
new file mode 100644
index 000000000..7202221b8
--- /dev/null
+++ b/src/pkg/html/template/context.go
@@ -0,0 +1,339 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "fmt"
+)
+
+// context describes the state an HTML parser must be in when it reaches the
+// portion of HTML produced by evaluating a particular template node.
+//
+// The zero value of type context is the start context for a template that
+// produces an HTML fragment as defined at
+// http://www.w3.org/TR/html5/the-end.html#parsing-html-fragments
+// where the context element is null.
+type context struct {
+ state state
+ delim delim
+ urlPart urlPart
+ jsCtx jsCtx
+ attr attr
+ element element
+ err *Error
+}
+
+func (c context) String() string {
+ return fmt.Sprintf("{%v %v %v %v %v %v %v}", c.state, c.delim, c.urlPart, c.jsCtx, c.attr, c.element, c.err)
+}
+
+// eq returns whether two contexts are equal.
+func (c context) eq(d context) bool {
+ return c.state == d.state &&
+ c.delim == d.delim &&
+ c.urlPart == d.urlPart &&
+ c.jsCtx == d.jsCtx &&
+ c.attr == d.attr &&
+ c.element == d.element &&
+ c.err == d.err
+}
+
+// mangle produces an identifier that includes a suffix that distinguishes it
+// from template names mangled with different contexts.
+func (c context) mangle(templateName string) string {
+ // The mangled name for the default context is the input templateName.
+ if c.state == stateText {
+ return templateName
+ }
+ s := templateName + "$htmltemplate_" + c.state.String()
+ if c.delim != 0 {
+ s += "_" + c.delim.String()
+ }
+ if c.urlPart != 0 {
+ s += "_" + c.urlPart.String()
+ }
+ if c.jsCtx != 0 {
+ s += "_" + c.jsCtx.String()
+ }
+ if c.attr != 0 {
+ s += "_" + c.attr.String()
+ }
+ if c.element != 0 {
+ s += "_" + c.element.String()
+ }
+ return s
+}
+
+// state describes a high-level HTML parser state.
+//
+// It bounds the top of the element stack, and by extension the HTML insertion
+// mode, but also contains state that does not correspond to anything in the
+// HTML5 parsing algorithm because a single token production in the HTML
+// grammar may contain embedded actions in a template. For instance, the quoted
+// HTML attribute produced by
+// <div title="Hello {{.World}}">
+// is a single token in HTML's grammar but in a template spans several nodes.
+type state uint8
+
+const (
+ // stateText is parsed character data. An HTML parser is in
+ // this state when its parse position is outside an HTML tag,
+ // directive, comment, and special element body.
+ stateText state = iota
+ // stateTag occurs before an HTML attribute or the end of a tag.
+ stateTag
+ // stateAttrName occurs inside an attribute name.
+ // It occurs between the ^'s in ` ^name^ = value`.
+ stateAttrName
+ // stateAfterName occurs after an attr name has ended but before any
+ // equals sign. It occurs between the ^'s in ` name^ ^= value`.
+ stateAfterName
+ // stateBeforeValue occurs after the equals sign but before the value.
+ // It occurs between the ^'s in ` name =^ ^value`.
+ stateBeforeValue
+ // stateHTMLCmt occurs inside an <!-- HTML comment -->.
+ stateHTMLCmt
+ // stateRCDATA occurs inside an RCDATA element (<textarea> or <title>)
+ // as described at http://dev.w3.org/html5/spec/syntax.html#elements-0
+ stateRCDATA
+ // stateAttr occurs inside an HTML attribute whose content is text.
+ stateAttr
+ // stateURL occurs inside an HTML attribute whose content is a URL.
+ stateURL
+ // stateJS occurs inside an event handler or script element.
+ stateJS
+ // stateJSDqStr occurs inside a JavaScript double quoted string.
+ stateJSDqStr
+ // stateJSSqStr occurs inside a JavaScript single quoted string.
+ stateJSSqStr
+ // stateJSRegexp occurs inside a JavaScript regexp literal.
+ stateJSRegexp
+ // stateJSBlockCmt occurs inside a JavaScript /* block comment */.
+ stateJSBlockCmt
+ // stateJSLineCmt occurs inside a JavaScript // line comment.
+ stateJSLineCmt
+ // stateCSS occurs inside a <style> element or style attribute.
+ stateCSS
+ // stateCSSDqStr occurs inside a CSS double quoted string.
+ stateCSSDqStr
+ // stateCSSSqStr occurs inside a CSS single quoted string.
+ stateCSSSqStr
+ // stateCSSDqURL occurs inside a CSS double quoted url("...").
+ stateCSSDqURL
+ // stateCSSSqURL occurs inside a CSS single quoted url('...').
+ stateCSSSqURL
+ // stateCSSURL occurs inside a CSS unquoted url(...).
+ stateCSSURL
+ // stateCSSBlockCmt occurs inside a CSS /* block comment */.
+ stateCSSBlockCmt
+ // stateCSSLineCmt occurs inside a CSS // line comment.
+ stateCSSLineCmt
+ // stateError is an infectious error state outside any valid
+ // HTML/CSS/JS construct.
+ stateError
+)
+
+var stateNames = [...]string{
+ stateText: "stateText",
+ stateTag: "stateTag",
+ stateAttrName: "stateAttrName",
+ stateAfterName: "stateAfterName",
+ stateBeforeValue: "stateBeforeValue",
+ stateHTMLCmt: "stateHTMLCmt",
+ stateRCDATA: "stateRCDATA",
+ stateAttr: "stateAttr",
+ stateURL: "stateURL",
+ stateJS: "stateJS",
+ stateJSDqStr: "stateJSDqStr",
+ stateJSSqStr: "stateJSSqStr",
+ stateJSRegexp: "stateJSRegexp",
+ stateJSBlockCmt: "stateJSBlockCmt",
+ stateJSLineCmt: "stateJSLineCmt",
+ stateCSS: "stateCSS",
+ stateCSSDqStr: "stateCSSDqStr",
+ stateCSSSqStr: "stateCSSSqStr",
+ stateCSSDqURL: "stateCSSDqURL",
+ stateCSSSqURL: "stateCSSSqURL",
+ stateCSSURL: "stateCSSURL",
+ stateCSSBlockCmt: "stateCSSBlockCmt",
+ stateCSSLineCmt: "stateCSSLineCmt",
+ stateError: "stateError",
+}
+
+func (s state) String() string {
+ if int(s) < len(stateNames) {
+ return stateNames[s]
+ }
+ return fmt.Sprintf("illegal state %d", int(s))
+}
+
+// isComment is true for any state that contains content meant for template
+// authors & maintainers, not for end-users or machines.
+func isComment(s state) bool {
+ switch s {
+ case stateHTMLCmt, stateJSBlockCmt, stateJSLineCmt, stateCSSBlockCmt, stateCSSLineCmt:
+ return true
+ }
+ return false
+}
+
+// isInTag return whether s occurs solely inside an HTML tag.
+func isInTag(s state) bool {
+ switch s {
+ case stateTag, stateAttrName, stateAfterName, stateBeforeValue, stateAttr:
+ return true
+ }
+ return false
+}
+
+// delim is the delimiter that will end the current HTML attribute.
+type delim uint8
+
+const (
+ // delimNone occurs outside any attribute.
+ delimNone delim = iota
+ // delimDoubleQuote occurs when a double quote (") closes the attribute.
+ delimDoubleQuote
+ // delimSingleQuote occurs when a single quote (') closes the attribute.
+ delimSingleQuote
+ // delimSpaceOrTagEnd occurs when a space or right angle bracket (>)
+ // closes the attribute.
+ delimSpaceOrTagEnd
+)
+
+var delimNames = [...]string{
+ delimNone: "delimNone",
+ delimDoubleQuote: "delimDoubleQuote",
+ delimSingleQuote: "delimSingleQuote",
+ delimSpaceOrTagEnd: "delimSpaceOrTagEnd",
+}
+
+func (d delim) String() string {
+ if int(d) < len(delimNames) {
+ return delimNames[d]
+ }
+ return fmt.Sprintf("illegal delim %d", int(d))
+}
+
+// urlPart identifies a part in an RFC 3986 hierarchical URL to allow different
+// encoding strategies.
+type urlPart uint8
+
+const (
+ // urlPartNone occurs when not in a URL, or possibly at the start:
+ // ^ in "^http://auth/path?k=v#frag".
+ urlPartNone urlPart = iota
+ // urlPartPreQuery occurs in the scheme, authority, or path; between the
+ // ^s in "h^ttp://auth/path^?k=v#frag".
+ urlPartPreQuery
+ // urlPartQueryOrFrag occurs in the query portion between the ^s in
+ // "http://auth/path?^k=v#frag^".
+ urlPartQueryOrFrag
+ // urlPartUnknown occurs due to joining of contexts both before and
+ // after the query separator.
+ urlPartUnknown
+)
+
+var urlPartNames = [...]string{
+ urlPartNone: "urlPartNone",
+ urlPartPreQuery: "urlPartPreQuery",
+ urlPartQueryOrFrag: "urlPartQueryOrFrag",
+ urlPartUnknown: "urlPartUnknown",
+}
+
+func (u urlPart) String() string {
+ if int(u) < len(urlPartNames) {
+ return urlPartNames[u]
+ }
+ return fmt.Sprintf("illegal urlPart %d", int(u))
+}
+
+// jsCtx determines whether a '/' starts a regular expression literal or a
+// division operator.
+type jsCtx uint8
+
+const (
+ // jsCtxRegexp occurs where a '/' would start a regexp literal.
+ jsCtxRegexp jsCtx = iota
+ // jsCtxDivOp occurs where a '/' would start a division operator.
+ jsCtxDivOp
+ // jsCtxUnknown occurs where a '/' is ambiguous due to context joining.
+ jsCtxUnknown
+)
+
+func (c jsCtx) String() string {
+ switch c {
+ case jsCtxRegexp:
+ return "jsCtxRegexp"
+ case jsCtxDivOp:
+ return "jsCtxDivOp"
+ case jsCtxUnknown:
+ return "jsCtxUnknown"
+ }
+ return fmt.Sprintf("illegal jsCtx %d", int(c))
+}
+
+// element identifies the HTML element when inside a start tag or special body.
+// Certain HTML element (for example <script> and <style>) have bodies that are
+// treated differently from stateText so the element type is necessary to
+// transition into the correct context at the end of a tag and to identify the
+// end delimiter for the body.
+type element uint8
+
+const (
+ // elementNone occurs outside a special tag or special element body.
+ elementNone element = iota
+ // elementScript corresponds to the raw text <script> element.
+ elementScript
+ // elementStyle corresponds to the raw text <style> element.
+ elementStyle
+ // elementTextarea corresponds to the RCDATA <textarea> element.
+ elementTextarea
+ // elementTitle corresponds to the RCDATA <title> element.
+ elementTitle
+)
+
+var elementNames = [...]string{
+ elementNone: "elementNone",
+ elementScript: "elementScript",
+ elementStyle: "elementStyle",
+ elementTextarea: "elementTextarea",
+ elementTitle: "elementTitle",
+}
+
+func (e element) String() string {
+ if int(e) < len(elementNames) {
+ return elementNames[e]
+ }
+ return fmt.Sprintf("illegal element %d", int(e))
+}
+
+// attr identifies the most recent HTML attribute when inside a start tag.
+type attr uint8
+
+const (
+ // attrNone corresponds to a normal attribute or no attribute.
+ attrNone attr = iota
+ // attrScript corresponds to an event handler attribute.
+ attrScript
+ // attrStyle corresponds to the style attribute whose value is CSS.
+ attrStyle
+ // attrURL corresponds to an attribute whose value is a URL.
+ attrURL
+)
+
+var attrNames = [...]string{
+ attrNone: "attrNone",
+ attrScript: "attrScript",
+ attrStyle: "attrStyle",
+ attrURL: "attrURL",
+}
+
+func (a attr) String() string {
+ if int(a) < len(attrNames) {
+ return attrNames[a]
+ }
+ return fmt.Sprintf("illegal attr %d", int(a))
+}
diff --git a/src/pkg/html/template/css.go b/src/pkg/html/template/css.go
new file mode 100644
index 000000000..3bcd98498
--- /dev/null
+++ b/src/pkg/html/template/css.go
@@ -0,0 +1,268 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "bytes"
+ "fmt"
+ "unicode"
+ "unicode/utf8"
+)
+
+// endsWithCSSKeyword returns whether b ends with an ident that
+// case-insensitively matches the lower-case kw.
+func endsWithCSSKeyword(b []byte, kw string) bool {
+ i := len(b) - len(kw)
+ if i < 0 {
+ // Too short.
+ return false
+ }
+ if i != 0 {
+ r, _ := utf8.DecodeLastRune(b[:i])
+ if isCSSNmchar(r) {
+ // Too long.
+ return false
+ }
+ }
+ // Many CSS keywords, such as "!important" can have characters encoded,
+ // but the URI production does not allow that according to
+ // http://www.w3.org/TR/css3-syntax/#TOK-URI
+ // This does not attempt to recognize encoded keywords. For example,
+ // given "\75\72\6c" and "url" this return false.
+ return string(bytes.ToLower(b[i:])) == kw
+}
+
+// isCSSNmchar returns whether rune is allowed anywhere in a CSS identifier.
+func isCSSNmchar(r rune) bool {
+ // Based on the CSS3 nmchar production but ignores multi-rune escape
+ // sequences.
+ // http://www.w3.org/TR/css3-syntax/#SUBTOK-nmchar
+ return 'a' <= r && r <= 'z' ||
+ 'A' <= r && r <= 'Z' ||
+ '0' <= r && r <= '9' ||
+ r == '-' ||
+ r == '_' ||
+ // Non-ASCII cases below.
+ 0x80 <= r && r <= 0xd7ff ||
+ 0xe000 <= r && r <= 0xfffd ||
+ 0x10000 <= r && r <= 0x10ffff
+}
+
+// decodeCSS decodes CSS3 escapes given a sequence of stringchars.
+// If there is no change, it returns the input, otherwise it returns a slice
+// backed by a new array.
+// http://www.w3.org/TR/css3-syntax/#SUBTOK-stringchar defines stringchar.
+func decodeCSS(s []byte) []byte {
+ i := bytes.IndexByte(s, '\\')
+ if i == -1 {
+ return s
+ }
+ // The UTF-8 sequence for a codepoint is never longer than 1 + the
+ // number hex digits need to represent that codepoint, so len(s) is an
+ // upper bound on the output length.
+ b := make([]byte, 0, len(s))
+ for len(s) != 0 {
+ i := bytes.IndexByte(s, '\\')
+ if i == -1 {
+ i = len(s)
+ }
+ b, s = append(b, s[:i]...), s[i:]
+ if len(s) < 2 {
+ break
+ }
+ // http://www.w3.org/TR/css3-syntax/#SUBTOK-escape
+ // escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
+ if isHex(s[1]) {
+ // http://www.w3.org/TR/css3-syntax/#SUBTOK-unicode
+ // unicode ::= '\' [0-9a-fA-F]{1,6} wc?
+ j := 2
+ for j < len(s) && j < 7 && isHex(s[j]) {
+ j++
+ }
+ r := hexDecode(s[1:j])
+ if r > unicode.MaxRune {
+ r, j = r/16, j-1
+ }
+ n := utf8.EncodeRune(b[len(b):cap(b)], r)
+ // The optional space at the end allows a hex
+ // sequence to be followed by a literal hex.
+ // string(decodeCSS([]byte(`\A B`))) == "\nB"
+ b, s = b[:len(b)+n], skipCSSSpace(s[j:])
+ } else {
+ // `\\` decodes to `\` and `\"` to `"`.
+ _, n := utf8.DecodeRune(s[1:])
+ b, s = append(b, s[1:1+n]...), s[1+n:]
+ }
+ }
+ return b
+}
+
+// isHex returns whether the given character is a hex digit.
+func isHex(c byte) bool {
+ return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
+}
+
+// hexDecode decodes a short hex digit sequence: "10" -> 16.
+func hexDecode(s []byte) rune {
+ n := '\x00'
+ for _, c := range s {
+ n <<= 4
+ switch {
+ case '0' <= c && c <= '9':
+ n |= rune(c - '0')
+ case 'a' <= c && c <= 'f':
+ n |= rune(c-'a') + 10
+ case 'A' <= c && c <= 'F':
+ n |= rune(c-'A') + 10
+ default:
+ panic(fmt.Sprintf("Bad hex digit in %q", s))
+ }
+ }
+ return n
+}
+
+// skipCSSSpace returns a suffix of c, skipping over a single space.
+func skipCSSSpace(c []byte) []byte {
+ if len(c) == 0 {
+ return c
+ }
+ // wc ::= #x9 | #xA | #xC | #xD | #x20
+ switch c[0] {
+ case '\t', '\n', '\f', ' ':
+ return c[1:]
+ case '\r':
+ // This differs from CSS3's wc production because it contains a
+ // probable spec error whereby wc contains all the single byte
+ // sequences in nl (newline) but not CRLF.
+ if len(c) >= 2 && c[1] == '\n' {
+ return c[2:]
+ }
+ return c[1:]
+ }
+ return c
+}
+
+// isCSSSpace returns whether b is a CSS space char as defined in wc.
+func isCSSSpace(b byte) bool {
+ switch b {
+ case '\t', '\n', '\f', '\r', ' ':
+ return true
+ }
+ return false
+}
+
+// cssEscaper escapes HTML and CSS special characters using \<hex>+ escapes.
+func cssEscaper(args ...interface{}) string {
+ s, _ := stringify(args...)
+ var b bytes.Buffer
+ written := 0
+ for i, r := range s {
+ var repl string
+ switch r {
+ case 0:
+ repl = `\0`
+ case '\t':
+ repl = `\9`
+ case '\n':
+ repl = `\a`
+ case '\f':
+ repl = `\c`
+ case '\r':
+ repl = `\d`
+ // Encode HTML specials as hex so the output can be embedded
+ // in HTML attributes without further encoding.
+ case '"':
+ repl = `\22`
+ case '&':
+ repl = `\26`
+ case '\'':
+ repl = `\27`
+ case '(':
+ repl = `\28`
+ case ')':
+ repl = `\29`
+ case '+':
+ repl = `\2b`
+ case '/':
+ repl = `\2f`
+ case ':':
+ repl = `\3a`
+ case ';':
+ repl = `\3b`
+ case '<':
+ repl = `\3c`
+ case '>':
+ repl = `\3e`
+ case '\\':
+ repl = `\\`
+ case '{':
+ repl = `\7b`
+ case '}':
+ repl = `\7d`
+ default:
+ continue
+ }
+ b.WriteString(s[written:i])
+ b.WriteString(repl)
+ written = i + utf8.RuneLen(r)
+ if repl != `\\` && (written == len(s) || isHex(s[written]) || isCSSSpace(s[written])) {
+ b.WriteByte(' ')
+ }
+ }
+ if written == 0 {
+ return s
+ }
+ b.WriteString(s[written:])
+ return b.String()
+}
+
+var expressionBytes = []byte("expression")
+var mozBindingBytes = []byte("mozbinding")
+
+// cssValueFilter allows innocuous CSS values in the output including CSS
+// quantities (10px or 25%), ID or class literals (#foo, .bar), keyword values
+// (inherit, blue), and colors (#888).
+// It filters out unsafe values, such as those that affect token boundaries,
+// and anything that might execute scripts.
+func cssValueFilter(args ...interface{}) string {
+ s, t := stringify(args...)
+ if t == contentTypeCSS {
+ return s
+ }
+ b, id := decodeCSS([]byte(s)), make([]byte, 0, 64)
+
+ // CSS3 error handling is specified as honoring string boundaries per
+ // http://www.w3.org/TR/css3-syntax/#error-handling :
+ // Malformed declarations. User agents must handle unexpected
+ // tokens encountered while parsing a declaration by reading until
+ // the end of the declaration, while observing the rules for
+ // matching pairs of (), [], {}, "", and '', and correctly handling
+ // escapes. For example, a malformed declaration may be missing a
+ // property, colon (:) or value.
+ // So we need to make sure that values do not have mismatched bracket
+ // or quote characters to prevent the browser from restarting parsing
+ // inside a string that might embed JavaScript source.
+ for i, c := range b {
+ switch c {
+ case 0, '"', '\'', '(', ')', '/', ';', '@', '[', '\\', ']', '`', '{', '}':
+ return filterFailsafe
+ case '-':
+ // Disallow <!-- or -->.
+ // -- should not appear in valid identifiers.
+ if i != 0 && b[i-1] == '-' {
+ return filterFailsafe
+ }
+ default:
+ if c < 0x80 && isCSSNmchar(rune(c)) {
+ id = append(id, c)
+ }
+ }
+ }
+ id = bytes.ToLower(id)
+ if bytes.Index(id, expressionBytes) != -1 || bytes.Index(id, mozBindingBytes) != -1 {
+ return filterFailsafe
+ }
+ return string(b)
+}
diff --git a/src/pkg/html/template/css_test.go b/src/pkg/html/template/css_test.go
new file mode 100644
index 000000000..a735638b0
--- /dev/null
+++ b/src/pkg/html/template/css_test.go
@@ -0,0 +1,281 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "strconv"
+ "strings"
+ "testing"
+)
+
+func TestEndsWithCSSKeyword(t *testing.T) {
+ tests := []struct {
+ css, kw string
+ want bool
+ }{
+ {"", "url", false},
+ {"url", "url", true},
+ {"URL", "url", true},
+ {"Url", "url", true},
+ {"url", "important", false},
+ {"important", "important", true},
+ {"image-url", "url", false},
+ {"imageurl", "url", false},
+ {"image url", "url", true},
+ }
+ for _, test := range tests {
+ got := endsWithCSSKeyword([]byte(test.css), test.kw)
+ if got != test.want {
+ t.Errorf("want %t but got %t for css=%v, kw=%v", test.want, got, test.css, test.kw)
+ }
+ }
+}
+
+func TestIsCSSNmchar(t *testing.T) {
+ tests := []struct {
+ rune rune
+ want bool
+ }{
+ {0, false},
+ {'0', true},
+ {'9', true},
+ {'A', true},
+ {'Z', true},
+ {'a', true},
+ {'z', true},
+ {'_', true},
+ {'-', true},
+ {':', false},
+ {';', false},
+ {' ', false},
+ {0x7f, false},
+ {0x80, true},
+ {0x1234, true},
+ {0xd800, false},
+ {0xdc00, false},
+ {0xfffe, false},
+ {0x10000, true},
+ {0x110000, false},
+ }
+ for _, test := range tests {
+ got := isCSSNmchar(test.rune)
+ if got != test.want {
+ t.Errorf("%q: want %t but got %t", string(test.rune), test.want, got)
+ }
+ }
+}
+
+func TestDecodeCSS(t *testing.T) {
+ tests := []struct {
+ css, want string
+ }{
+ {``, ``},
+ {`foo`, `foo`},
+ {`foo\`, `foo`},
+ {`foo\\`, `foo\`},
+ {`\`, ``},
+ {`\A`, "\n"},
+ {`\a`, "\n"},
+ {`\0a`, "\n"},
+ {`\00000a`, "\n"},
+ {`\000000a`, "\u0000a"},
+ {`\1234 5`, "\u1234" + "5"},
+ {`\1234\20 5`, "\u1234" + " 5"},
+ {`\1234\A 5`, "\u1234" + "\n5"},
+ {"\\1234\t5", "\u1234" + "5"},
+ {"\\1234\n5", "\u1234" + "5"},
+ {"\\1234\r\n5", "\u1234" + "5"},
+ {`\12345`, "\U00012345"},
+ {`\\`, `\`},
+ {`\\ `, `\ `},
+ {`\"`, `"`},
+ {`\'`, `'`},
+ {`\.`, `.`},
+ {`\. .`, `. .`},
+ {
+ `The \3c i\3equick\3c/i\3e,\d\A\3cspan style=\27 color:brown\27\3e brown\3c/span\3e fox jumps\2028over the \3c canine class=\22lazy\22 \3e dog\3c/canine\3e`,
+ "The <i>quick</i>,\r\n<span style='color:brown'>brown</span> fox jumps\u2028over the <canine class=\"lazy\">dog</canine>",
+ },
+ }
+ for _, test := range tests {
+ got1 := string(decodeCSS([]byte(test.css)))
+ if got1 != test.want {
+ t.Errorf("%q: want\n\t%q\nbut got\n\t%q", test.css, test.want, got1)
+ }
+ recoded := cssEscaper(got1)
+ if got2 := string(decodeCSS([]byte(recoded))); got2 != test.want {
+ t.Errorf("%q: escape & decode not dual for %q", test.css, recoded)
+ }
+ }
+}
+
+func TestHexDecode(t *testing.T) {
+ for i := 0; i < 0x200000; i += 101 /* coprime with 16 */ {
+ s := strconv.FormatInt(int64(i), 16)
+ if got := int(hexDecode([]byte(s))); got != i {
+ t.Errorf("%s: want %d but got %d", s, i, got)
+ }
+ s = strings.ToUpper(s)
+ if got := int(hexDecode([]byte(s))); got != i {
+ t.Errorf("%s: want %d but got %d", s, i, got)
+ }
+ }
+}
+
+func TestSkipCSSSpace(t *testing.T) {
+ tests := []struct {
+ css, want string
+ }{
+ {"", ""},
+ {"foo", "foo"},
+ {"\n", ""},
+ {"\r\n", ""},
+ {"\r", ""},
+ {"\t", ""},
+ {" ", ""},
+ {"\f", ""},
+ {" foo", "foo"},
+ {" foo", " foo"},
+ {`\20`, `\20`},
+ }
+ for _, test := range tests {
+ got := string(skipCSSSpace([]byte(test.css)))
+ if got != test.want {
+ t.Errorf("%q: want %q but got %q", test.css, test.want, got)
+ }
+ }
+}
+
+func TestCSSEscaper(t *testing.T) {
+ input := ("\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f" +
+ "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+ ` !"#$%&'()*+,-./` +
+ `0123456789:;<=>?` +
+ `@ABCDEFGHIJKLMNO` +
+ `PQRSTUVWXYZ[\]^_` +
+ "`abcdefghijklmno" +
+ "pqrstuvwxyz{|}~\x7f" +
+ "\u00A0\u0100\u2028\u2029\ufeff\U0001D11E")
+
+ want := ("\\0\x01\x02\x03\x04\x05\x06\x07" +
+ "\x08\\9 \\a\x0b\\c \\d\x0E\x0F" +
+ "\x10\x11\x12\x13\x14\x15\x16\x17" +
+ "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+ ` !\22#$%\26\27\28\29*\2b,-.\2f ` +
+ `0123456789\3a\3b\3c=\3e?` +
+ `@ABCDEFGHIJKLMNO` +
+ `PQRSTUVWXYZ[\\]^_` +
+ "`abcdefghijklmno" +
+ `pqrstuvwxyz\7b|\7d~` + "\u007f" +
+ "\u00A0\u0100\u2028\u2029\ufeff\U0001D11E")
+
+ got := cssEscaper(input)
+ if got != want {
+ t.Errorf("encode: want\n\t%q\nbut got\n\t%q", want, got)
+ }
+
+ got = string(decodeCSS([]byte(got)))
+ if input != got {
+ t.Errorf("decode: want\n\t%q\nbut got\n\t%q", input, got)
+ }
+}
+
+func TestCSSValueFilter(t *testing.T) {
+ tests := []struct {
+ css, want string
+ }{
+ {"", ""},
+ {"foo", "foo"},
+ {"0", "0"},
+ {"0px", "0px"},
+ {"-5px", "-5px"},
+ {"1.25in", "1.25in"},
+ {"+.33em", "+.33em"},
+ {"100%", "100%"},
+ {"12.5%", "12.5%"},
+ {".foo", ".foo"},
+ {"#bar", "#bar"},
+ {"corner-radius", "corner-radius"},
+ {"-moz-corner-radius", "-moz-corner-radius"},
+ {"#000", "#000"},
+ {"#48f", "#48f"},
+ {"#123456", "#123456"},
+ {"U+00-FF, U+980-9FF", "U+00-FF, U+980-9FF"},
+ {"color: red", "color: red"},
+ {"<!--", "ZgotmplZ"},
+ {"-->", "ZgotmplZ"},
+ {"<![CDATA[", "ZgotmplZ"},
+ {"]]>", "ZgotmplZ"},
+ {"</style", "ZgotmplZ"},
+ {`"`, "ZgotmplZ"},
+ {`'`, "ZgotmplZ"},
+ {"`", "ZgotmplZ"},
+ {"\x00", "ZgotmplZ"},
+ {"/* foo */", "ZgotmplZ"},
+ {"//", "ZgotmplZ"},
+ {"[href=~", "ZgotmplZ"},
+ {"expression(alert(1337))", "ZgotmplZ"},
+ {"-expression(alert(1337))", "ZgotmplZ"},
+ {"expression", "ZgotmplZ"},
+ {"Expression", "ZgotmplZ"},
+ {"EXPRESSION", "ZgotmplZ"},
+ {"-moz-binding", "ZgotmplZ"},
+ {"-expr\x00ession(alert(1337))", "ZgotmplZ"},
+ {`-expr\0ession(alert(1337))`, "ZgotmplZ"},
+ {`-express\69on(alert(1337))`, "ZgotmplZ"},
+ {`-express\69 on(alert(1337))`, "ZgotmplZ"},
+ {`-exp\72 ession(alert(1337))`, "ZgotmplZ"},
+ {`-exp\52 ession(alert(1337))`, "ZgotmplZ"},
+ {`-exp\000052 ession(alert(1337))`, "ZgotmplZ"},
+ {`-expre\0000073sion`, "-expre\x073sion"},
+ {`@import url evil.css`, "ZgotmplZ"},
+ }
+ for _, test := range tests {
+ got := cssValueFilter(test.css)
+ if got != test.want {
+ t.Errorf("%q: want %q but got %q", test.css, test.want, got)
+ }
+ }
+}
+
+func BenchmarkCSSEscaper(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ cssEscaper("The <i>quick</i>,\r\n<span style='color:brown'>brown</span> fox jumps\u2028over the <canine class=\"lazy\">dog</canine>")
+ }
+}
+
+func BenchmarkCSSEscaperNoSpecials(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ cssEscaper("The quick, brown fox jumps over the lazy dog.")
+ }
+}
+
+func BenchmarkDecodeCSS(b *testing.B) {
+ s := []byte(`The \3c i\3equick\3c/i\3e,\d\A\3cspan style=\27 color:brown\27\3e brown\3c/span\3e fox jumps\2028over the \3c canine class=\22lazy\22 \3edog\3c/canine\3e`)
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ decodeCSS(s)
+ }
+}
+
+func BenchmarkDecodeCSSNoSpecials(b *testing.B) {
+ s := []byte("The quick, brown fox jumps over the lazy dog.")
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ decodeCSS(s)
+ }
+}
+
+func BenchmarkCSSValueFilter(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ cssValueFilter(` e\78preS\0Sio/**/n(alert(1337))`)
+ }
+}
+
+func BenchmarkCSSValueFilterOk(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ cssValueFilter(`Times New Roman`)
+ }
+}
diff --git a/src/pkg/html/template/doc.go b/src/pkg/html/template/doc.go
new file mode 100644
index 000000000..77a9bf2e2
--- /dev/null
+++ b/src/pkg/html/template/doc.go
@@ -0,0 +1,186 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Package template (html/template) is a specialization of package text/template
+that automates the construction of HTML output that is safe against code
+injection.
+
+
+Introduction
+
+This package wraps package template so you can use the standard template API
+to parse and execute templates.
+
+ set, err := new(template.Set).Parse(...)
+ // Error checking elided
+ err = set.Execute(out, "Foo", data)
+
+If successful, set will now be injection-safe. Otherwise, err is an error
+defined in the docs for ErrorCode.
+
+HTML templates treat data values as plain text which should be encoded so they
+can be safely embedded in an HTML document. The escaping is contextual, so
+actions can appear within JavaScript, CSS, and URI contexts.
+
+The security model used by this package assumes that template authors are
+trusted, while Execute's data parameter is not. More details are provided below.
+
+Example
+
+ import "text/template"
+ ...
+ t, err := template.New("foo").Parse(`{{define "T"}}Hello, {{.}}!{{end}}`)
+ err = t.ExecuteTemplate(out, "T", "<script>alert('you have been pwned')</script>")
+
+produces
+
+ Hello, <script>alert('you have been pwned')</script>!
+
+but with contextual autoescaping,
+
+ import "html/template"
+ ...
+ t, err := template.New("foo").Parse(`{{define "T"}}Hello, {{.}}!{{end}}`)
+ err = t.ExecuteTemplate(out, "T", "<script>alert('you have been pwned')</script>")
+
+produces safe, escaped HTML output
+
+ Hello, &lt;script&gt;alert(&#39;you have been pwned&#39;)&lt;/script&gt;!
+
+
+Contexts
+
+This package understands HTML, CSS, JavaScript, and URIs. It adds sanitizing
+functions to each simple action pipeline, so given the excerpt
+
+ <a href="/search?q={{.}}">{{.}}</a>
+
+At parse time each {{.}} is overwritten to add escaping functions as necessary.
+In this case it becomes
+
+ <a href="/search?q={{. | urlquery}}">{{. | html}}</a>
+
+
+Errors
+
+See the documentation of ErrorCode for details.
+
+
+A fuller picture
+
+The rest of this package comment may be skipped on first reading; it includes
+details necessary to understand escaping contexts and error messages. Most users
+will not need to understand these details.
+
+
+Contexts
+
+Assuming {{.}} is `O'Reilly: How are <i>you</i>?`, the table below shows
+how {{.}} appears when used in the context to the left.
+
+ Context {{.}} After
+ {{.}} O'Reilly: How are &lt;i&gt;you&lt;/i&gt;?
+ <a title='{{.}}'> O&#39;Reilly: How are you?
+ <a href="/{{.}}"> O&#39;Reilly: How are %3ci%3eyou%3c/i%3e?
+ <a href="?q={{.}}"> O&#39;Reilly%3a%20How%20are%3ci%3e...%3f
+ <a onx='f("{{.}}")'> O\x27Reilly: How are \x3ci\x3eyou...?
+ <a onx='f({{.}})'> "O\x27Reilly: How are \x3ci\x3eyou...?"
+ <a onx='pattern = /{{.}}/;'> O\x27Reilly: How are \x3ci\x3eyou...\x3f
+
+If used in an unsafe context, then the value might be filtered out:
+
+ Context {{.}} After
+ <a href="{{.}}"> #ZgotmplZ
+
+since "O'Reilly:" is not an allowed protocol like "http:".
+
+
+If {{.}} is the innocuous word, `left`, then it can appear more widely,
+
+ Context {{.}} After
+ {{.}} left
+ <a title='{{.}}'> left
+ <a href='{{.}}'> left
+ <a href='/{{.}}'> left
+ <a href='?dir={{.}}'> left
+ <a style="border-{{.}}: 4px"> left
+ <a style="align: {{.}}"> left
+ <a style="background: '{{.}}'> left
+ <a style="background: url('{{.}}')> left
+ <style>p.{{.}} {color:red}</style> left
+
+Non-string values can be used in JavaScript contexts.
+If {{.}} is
+
+ []struct{A,B string}{ "foo", "bar" }
+
+in the escaped template
+
+ <script>var pair = {{.}};</script>
+
+then the template output is
+
+ <script>var pair = {"A": "foo", "B": "bar"};</script>
+
+See package json to understand how non-string content is marshalled for
+embedding in JavaScript contexts.
+
+
+Typed Strings
+
+By default, this package assumes that all pipelines produce a plain text string.
+It adds escaping pipeline stages necessary to correctly and safely embed that
+plain text string in the appropriate context.
+
+When a data value is not plain text, you can make sure it is not over-escaped
+by marking it with its type.
+
+Types HTML, JS, URL, and others from content.go can carry safe content that is
+exempted from escaping.
+
+The template
+
+ Hello, {{.}}!
+
+can be invoked with
+
+ tmpl.Execute(out, HTML(`<b>World</b>`))
+
+to produce
+
+ Hello, <b>World</b>!
+
+instead of the
+
+ Hello, &lt;b&gt;World&lt;b&gt;!
+
+that would have been produced if {{.}} was a regular string.
+
+
+Security Model
+
+http://js-quasis-libraries-and-repl.googlecode.com/svn/trunk/safetemplate.html#problem_definition defines "safe" as used by this package.
+
+This package assumes that template authors are trusted, that Execute's data
+parameter is not, and seeks to preserve the properties below in the face
+of untrusted data:
+
+Structure Preservation Property
+"... when a template author writes an HTML tag in a safe templating language,
+the browser will interpret the corresponding portion of the output as a tag
+regardless of the values of untrusted data, and similarly for other structures
+such as attribute boundaries and JS and CSS string boundaries."
+
+Code Effect Property
+"... only code specified by the template author should run as a result of
+injecting the template output into a page and all code specified by the
+template author should run as a result of the same."
+
+Least Surprise Property
+"A developer (or code reviewer) familiar with HTML, CSS, and JavaScript, who
+knows that contextual autoescaping happens should be able to look at a {{.}}
+and correctly infer what sanitization happens."
+*/
+package template
diff --git a/src/pkg/html/template/error.go b/src/pkg/html/template/error.go
new file mode 100644
index 000000000..dcac74896
--- /dev/null
+++ b/src/pkg/html/template/error.go
@@ -0,0 +1,197 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "fmt"
+)
+
+// Error describes a problem encountered during template Escaping.
+type Error struct {
+ // ErrorCode describes the kind of error.
+ ErrorCode ErrorCode
+ // Name is the name of the template in which the error was encountered.
+ Name string
+ // Line is the line number of the error in the template source or 0.
+ Line int
+ // Description is a human-readable description of the problem.
+ Description string
+}
+
+// ErrorCode is a code for a kind of error.
+type ErrorCode int
+
+// We define codes for each error that manifests while escaping templates, but
+// escaped templates may also fail at runtime.
+//
+// Output: "ZgotmplZ"
+// Example:
+// <img src="{{.X}}">
+// where {{.X}} evaluates to `javascript:...`
+// Discussion:
+// "ZgotmplZ" is a special value that indicates that unsafe content reached a
+// CSS or URL context at runtime. The output of the example will be
+// <img src="#ZgotmplZ">
+// If the data comes from a trusted source, use content types to exempt it
+// from filtering: URL(`javascript:...`).
+const (
+ // OK indicates the lack of an error.
+ OK ErrorCode = iota
+
+ // ErrAmbigContext: "... appears in an ambiguous URL context"
+ // Example:
+ // <a href="
+ // {{if .C}}
+ // /path/
+ // {{else}}
+ // /search?q=
+ // {{end}}
+ // {{.X}}
+ // ">
+ // Discussion:
+ // {{.X}} is in an ambiguous URL context since, depending on {{.C}},
+ // it may be either a URL suffix or a query parameter.
+ // Moving {{.X}} into the condition removes the ambiguity:
+ // <a href="{{if .C}}/path/{{.X}}{{else}}/search?q={{.X}}">
+ ErrAmbigContext
+
+ // ErrBadHTML: "expected space, attr name, or end of tag, but got ...",
+ // "... in unquoted attr", "... in attribute name"
+ // Example:
+ // <a href = /search?q=foo>
+ // <href=foo>
+ // <form na<e=...>
+ // <option selected<
+ // Discussion:
+ // This is often due to a typo in an HTML element, but some runes
+ // are banned in tag names, attribute names, and unquoted attribute
+ // values because they can tickle parser ambiguities.
+ // Quoting all attributes is the best policy.
+ ErrBadHTML
+
+ // ErrBranchEnd: "{{if}} branches end in different contexts"
+ // Example:
+ // {{if .C}}<a href="{{end}}{{.X}}
+ // Discussion:
+ // Package html/template statically examines each path through an
+ // {{if}}, {{range}}, or {{with}} to escape any following pipelines.
+ // The example is ambiguous since {{.X}} might be an HTML text node,
+ // or a URL prefix in an HTML attribute. The context of {{.X}} is
+ // used to figure out how to escape it, but that context depends on
+ // the run-time value of {{.C}} which is not statically known.
+ //
+ // The problem is usually something like missing quotes or angle
+ // brackets, or can be avoided by refactoring to put the two contexts
+ // into different branches of an if, range or with. If the problem
+ // is in a {{range}} over a collection that should never be empty,
+ // adding a dummy {{else}} can help.
+ ErrBranchEnd
+
+ // ErrEndContext: "... ends in a non-text context: ..."
+ // Examples:
+ // <div
+ // <div title="no close quote>
+ // <script>f()
+ // Discussion:
+ // Executed templates should produce a DocumentFragment of HTML.
+ // Templates that end without closing tags will trigger this error.
+ // Templates that should not be used in an HTML context or that
+ // produce incomplete Fragments should not be executed directly.
+ //
+ // {{define "main"}} <script>{{template "helper"}}</script> {{end}}
+ // {{define "helper"}} document.write(' <div title=" ') {{end}}
+ //
+ // "helper" does not produce a valid document fragment, so should
+ // not be Executed directly.
+ ErrEndContext
+
+ // ErrNoSuchTemplate: "no such template ..."
+ // Examples:
+ // {{define "main"}}<div {{template "attrs"}}>{{end}}
+ // {{define "attrs"}}href="{{.URL}}"{{end}}
+ // Discussion:
+ // Package html/template looks through template calls to compute the
+ // context.
+ // Here the {{.URL}} in "attrs" must be treated as a URL when called
+ // from "main", but you will get this error if "attrs" is not defined
+ // when "main" is parsed.
+ ErrNoSuchTemplate
+
+ // ErrOutputContext: "cannot compute output context for template ..."
+ // Examples:
+ // {{define "t"}}{{if .T}}{{template "t" .T}}{{end}}{{.H}}",{{end}}
+ // Discussion:
+ // A recursive template does not end in the same context in which it
+ // starts, and a reliable output context cannot be computed.
+ // Look for typos in the named template.
+ // If the template should not be called in the named start context,
+ // look for calls to that template in unexpected contexts.
+ // Maybe refactor recursive templates to not be recursive.
+ ErrOutputContext
+
+ // ErrPartialCharset: "unfinished JS regexp charset in ..."
+ // Example:
+ // <script>var pattern = /foo[{{.Chars}}]/</script>
+ // Discussion:
+ // Package html/template does not support interpolation into regular
+ // expression literal character sets.
+ ErrPartialCharset
+
+ // ErrPartialEscape: "unfinished escape sequence in ..."
+ // Example:
+ // <script>alert("\{{.X}}")</script>
+ // Discussion:
+ // Package html/template does not support actions following a
+ // backslash.
+ // This is usually an error and there are better solutions; for
+ // example
+ // <script>alert("{{.X}}")</script>
+ // should work, and if {{.X}} is a partial escape sequence such as
+ // "xA0", mark the whole sequence as safe content: JSStr(`\xA0`)
+ ErrPartialEscape
+
+ // ErrRangeLoopReentry: "on range loop re-entry: ..."
+ // Example:
+ // <script>var x = [{{range .}}'{{.}},{{end}}]</script>
+ // Discussion:
+ // If an iteration through a range would cause it to end in a
+ // different context than an earlier pass, there is no single context.
+ // In the example, there is missing a quote, so it is not clear
+ // whether {{.}} is meant to be inside a JS string or in a JS value
+ // context. The second iteration would produce something like
+ //
+ // <script>var x = ['firstValue,'secondValue]</script>
+ ErrRangeLoopReentry
+
+ // ErrSlashAmbig: '/' could start a division or regexp.
+ // Example:
+ // <script>
+ // {{if .C}}var x = 1{{end}}
+ // /-{{.N}}/i.test(x) ? doThis : doThat();
+ // </script>
+ // Discussion:
+ // The example above could produce `var x = 1/-2/i.test(s)...`
+ // in which the first '/' is a mathematical division operator or it
+ // could produce `/-2/i.test(s)` in which the first '/' starts a
+ // regexp literal.
+ // Look for missing semicolons inside branches, and maybe add
+ // parentheses to make it clear which interpretation you intend.
+ ErrSlashAmbig
+)
+
+func (e *Error) Error() string {
+ if e.Line != 0 {
+ return fmt.Sprintf("html/template:%s:%d: %s", e.Name, e.Line, e.Description)
+ } else if e.Name != "" {
+ return fmt.Sprintf("html/template:%s: %s", e.Name, e.Description)
+ }
+ return "html/template: " + e.Description
+}
+
+// errorf creates an error given a format string f and args.
+// The template Name still needs to be supplied.
+func errorf(k ErrorCode, line int, f string, args ...interface{}) *Error {
+ return &Error{k, "", line, fmt.Sprintf(f, args...)}
+}
diff --git a/src/pkg/html/template/escape.go b/src/pkg/html/template/escape.go
new file mode 100644
index 000000000..c6f723ae4
--- /dev/null
+++ b/src/pkg/html/template/escape.go
@@ -0,0 +1,753 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "bytes"
+ "fmt"
+ "html"
+ "text/template"
+ "text/template/parse"
+)
+
+// escapeTemplates rewrites the named templates, which must be
+// associated with t, to guarantee that the output of any of the named
+// templates is properly escaped. Names should include the names of
+// all templates that might be Executed but need not include helper
+// templates. If no error is returned, then the named templates have
+// been modified. Otherwise the named templates have been rendered
+// unusable.
+func escapeTemplates(tmpl *Template, names ...string) error {
+ e := newEscaper(tmpl)
+ for _, name := range names {
+ c, _ := e.escapeTree(context{}, name, 0)
+ var err error
+ if c.err != nil {
+ err, c.err.Name = c.err, name
+ } else if c.state != stateText {
+ err = &Error{ErrEndContext, name, 0, fmt.Sprintf("ends in a non-text context: %v", c)}
+ }
+ if err != nil {
+ // Prevent execution of unsafe templates.
+ for _, name := range names {
+ if t := tmpl.set[name]; t != nil {
+ t.text.Tree = nil
+ }
+ }
+ return err
+ }
+ tmpl.escaped = true
+ }
+ e.commit()
+ return nil
+}
+
+// funcMap maps command names to functions that render their inputs safe.
+var funcMap = template.FuncMap{
+ "exp_template_html_attrescaper": attrEscaper,
+ "exp_template_html_commentescaper": commentEscaper,
+ "exp_template_html_cssescaper": cssEscaper,
+ "exp_template_html_cssvaluefilter": cssValueFilter,
+ "exp_template_html_htmlnamefilter": htmlNameFilter,
+ "exp_template_html_htmlescaper": htmlEscaper,
+ "exp_template_html_jsregexpescaper": jsRegexpEscaper,
+ "exp_template_html_jsstrescaper": jsStrEscaper,
+ "exp_template_html_jsvalescaper": jsValEscaper,
+ "exp_template_html_nospaceescaper": htmlNospaceEscaper,
+ "exp_template_html_rcdataescaper": rcdataEscaper,
+ "exp_template_html_urlescaper": urlEscaper,
+ "exp_template_html_urlfilter": urlFilter,
+ "exp_template_html_urlnormalizer": urlNormalizer,
+}
+
+// equivEscapers matches contextual escapers to equivalent template builtins.
+var equivEscapers = map[string]string{
+ "exp_template_html_attrescaper": "html",
+ "exp_template_html_htmlescaper": "html",
+ "exp_template_html_nospaceescaper": "html",
+ "exp_template_html_rcdataescaper": "html",
+ "exp_template_html_urlescaper": "urlquery",
+ "exp_template_html_urlnormalizer": "urlquery",
+}
+
+// escaper collects type inferences about templates and changes needed to make
+// templates injection safe.
+type escaper struct {
+ tmpl *Template
+ // output[templateName] is the output context for a templateName that
+ // has been mangled to include its input context.
+ output map[string]context
+ // derived[c.mangle(name)] maps to a template derived from the template
+ // named name templateName for the start context c.
+ derived map[string]*template.Template
+ // called[templateName] is a set of called mangled template names.
+ called map[string]bool
+ // xxxNodeEdits are the accumulated edits to apply during commit.
+ // Such edits are not applied immediately in case a template set
+ // executes a given template in different escaping contexts.
+ actionNodeEdits map[*parse.ActionNode][]string
+ templateNodeEdits map[*parse.TemplateNode]string
+ textNodeEdits map[*parse.TextNode][]byte
+}
+
+// newEscaper creates a blank escaper for the given set.
+func newEscaper(t *Template) *escaper {
+ return &escaper{
+ t,
+ map[string]context{},
+ map[string]*template.Template{},
+ map[string]bool{},
+ map[*parse.ActionNode][]string{},
+ map[*parse.TemplateNode]string{},
+ map[*parse.TextNode][]byte{},
+ }
+}
+
+// filterFailsafe is an innocuous word that is emitted in place of unsafe values
+// by sanitizer functions. It is not a keyword in any programming language,
+// contains no special characters, is not empty, and when it appears in output
+// it is distinct enough that a developer can find the source of the problem
+// via a search engine.
+const filterFailsafe = "ZgotmplZ"
+
+// escape escapes a template node.
+func (e *escaper) escape(c context, n parse.Node) context {
+ switch n := n.(type) {
+ case *parse.ActionNode:
+ return e.escapeAction(c, n)
+ case *parse.IfNode:
+ return e.escapeBranch(c, &n.BranchNode, "if")
+ case *parse.ListNode:
+ return e.escapeList(c, n)
+ case *parse.RangeNode:
+ return e.escapeBranch(c, &n.BranchNode, "range")
+ case *parse.TemplateNode:
+ return e.escapeTemplate(c, n)
+ case *parse.TextNode:
+ return e.escapeText(c, n)
+ case *parse.WithNode:
+ return e.escapeBranch(c, &n.BranchNode, "with")
+ }
+ panic("escaping " + n.String() + " is unimplemented")
+}
+
+// escapeAction escapes an action template node.
+func (e *escaper) escapeAction(c context, n *parse.ActionNode) context {
+ if len(n.Pipe.Decl) != 0 {
+ // A local variable assignment, not an interpolation.
+ return c
+ }
+ c = nudge(c)
+ s := make([]string, 0, 3)
+ switch c.state {
+ case stateError:
+ return c
+ case stateURL, stateCSSDqStr, stateCSSSqStr, stateCSSDqURL, stateCSSSqURL, stateCSSURL:
+ switch c.urlPart {
+ case urlPartNone:
+ s = append(s, "exp_template_html_urlfilter")
+ fallthrough
+ case urlPartPreQuery:
+ switch c.state {
+ case stateCSSDqStr, stateCSSSqStr:
+ s = append(s, "exp_template_html_cssescaper")
+ default:
+ s = append(s, "exp_template_html_urlnormalizer")
+ }
+ case urlPartQueryOrFrag:
+ s = append(s, "exp_template_html_urlescaper")
+ case urlPartUnknown:
+ return context{
+ state: stateError,
+ err: errorf(ErrAmbigContext, n.Line, "%s appears in an ambiguous URL context", n),
+ }
+ default:
+ panic(c.urlPart.String())
+ }
+ case stateJS:
+ s = append(s, "exp_template_html_jsvalescaper")
+ // A slash after a value starts a div operator.
+ c.jsCtx = jsCtxDivOp
+ case stateJSDqStr, stateJSSqStr:
+ s = append(s, "exp_template_html_jsstrescaper")
+ case stateJSRegexp:
+ s = append(s, "exp_template_html_jsregexpescaper")
+ case stateCSS:
+ s = append(s, "exp_template_html_cssvaluefilter")
+ case stateText:
+ s = append(s, "exp_template_html_htmlescaper")
+ case stateRCDATA:
+ s = append(s, "exp_template_html_rcdataescaper")
+ case stateAttr:
+ // Handled below in delim check.
+ case stateAttrName, stateTag:
+ c.state = stateAttrName
+ s = append(s, "exp_template_html_htmlnamefilter")
+ default:
+ if isComment(c.state) {
+ s = append(s, "exp_template_html_commentescaper")
+ } else {
+ panic("unexpected state " + c.state.String())
+ }
+ }
+ switch c.delim {
+ case delimNone:
+ // No extra-escaping needed for raw text content.
+ case delimSpaceOrTagEnd:
+ s = append(s, "exp_template_html_nospaceescaper")
+ default:
+ s = append(s, "exp_template_html_attrescaper")
+ }
+ e.editActionNode(n, s)
+ return c
+}
+
+// ensurePipelineContains ensures that the pipeline has commands with
+// the identifiers in s in order.
+// If the pipeline already has some of the sanitizers, do not interfere.
+// For example, if p is (.X | html) and s is ["escapeJSVal", "html"] then it
+// has one matching, "html", and one to insert, "escapeJSVal", to produce
+// (.X | escapeJSVal | html).
+func ensurePipelineContains(p *parse.PipeNode, s []string) {
+ if len(s) == 0 {
+ return
+ }
+ n := len(p.Cmds)
+ // Find the identifiers at the end of the command chain.
+ idents := p.Cmds
+ for i := n - 1; i >= 0; i-- {
+ if cmd := p.Cmds[i]; len(cmd.Args) != 0 {
+ if id, ok := cmd.Args[0].(*parse.IdentifierNode); ok {
+ if id.Ident == "noescape" {
+ return
+ }
+ continue
+ }
+ }
+ idents = p.Cmds[i+1:]
+ }
+ dups := 0
+ for _, id := range idents {
+ if escFnsEq(s[dups], (id.Args[0].(*parse.IdentifierNode)).Ident) {
+ dups++
+ if dups == len(s) {
+ return
+ }
+ }
+ }
+ newCmds := make([]*parse.CommandNode, n-len(idents), n+len(s)-dups)
+ copy(newCmds, p.Cmds)
+ // Merge existing identifier commands with the sanitizers needed.
+ for _, id := range idents {
+ i := indexOfStr((id.Args[0].(*parse.IdentifierNode)).Ident, s, escFnsEq)
+ if i != -1 {
+ for _, name := range s[:i] {
+ newCmds = appendCmd(newCmds, newIdentCmd(name))
+ }
+ s = s[i+1:]
+ }
+ newCmds = appendCmd(newCmds, id)
+ }
+ // Create any remaining sanitizers.
+ for _, name := range s {
+ newCmds = appendCmd(newCmds, newIdentCmd(name))
+ }
+ p.Cmds = newCmds
+}
+
+// redundantFuncs[a][b] implies that funcMap[b](funcMap[a](x)) == funcMap[a](x)
+// for all x.
+var redundantFuncs = map[string]map[string]bool{
+ "exp_template_html_commentescaper": {
+ "exp_template_html_attrescaper": true,
+ "exp_template_html_nospaceescaper": true,
+ "exp_template_html_htmlescaper": true,
+ },
+ "exp_template_html_cssescaper": {
+ "exp_template_html_attrescaper": true,
+ },
+ "exp_template_html_jsregexpescaper": {
+ "exp_template_html_attrescaper": true,
+ },
+ "exp_template_html_jsstrescaper": {
+ "exp_template_html_attrescaper": true,
+ },
+ "exp_template_html_urlescaper": {
+ "exp_template_html_urlnormalizer": true,
+ },
+}
+
+// appendCmd appends the given command to the end of the command pipeline
+// unless it is redundant with the last command.
+func appendCmd(cmds []*parse.CommandNode, cmd *parse.CommandNode) []*parse.CommandNode {
+ if n := len(cmds); n != 0 {
+ last, ok := cmds[n-1].Args[0].(*parse.IdentifierNode)
+ next, _ := cmd.Args[0].(*parse.IdentifierNode)
+ if ok && redundantFuncs[last.Ident][next.Ident] {
+ return cmds
+ }
+ }
+ return append(cmds, cmd)
+}
+
+// indexOfStr is the first i such that eq(s, strs[i]) or -1 if s was not found.
+func indexOfStr(s string, strs []string, eq func(a, b string) bool) int {
+ for i, t := range strs {
+ if eq(s, t) {
+ return i
+ }
+ }
+ return -1
+}
+
+// escFnsEq returns whether the two escaping functions are equivalent.
+func escFnsEq(a, b string) bool {
+ if e := equivEscapers[a]; e != "" {
+ a = e
+ }
+ if e := equivEscapers[b]; e != "" {
+ b = e
+ }
+ return a == b
+}
+
+// newIdentCmd produces a command containing a single identifier node.
+func newIdentCmd(identifier string) *parse.CommandNode {
+ return &parse.CommandNode{
+ NodeType: parse.NodeCommand,
+ Args: []parse.Node{parse.NewIdentifier(identifier)},
+ }
+}
+
+// nudge returns the context that would result from following empty string
+// transitions from the input context.
+// For example, parsing:
+// `<a href=`
+// will end in context{stateBeforeValue, attrURL}, but parsing one extra rune:
+// `<a href=x`
+// will end in context{stateURL, delimSpaceOrTagEnd, ...}.
+// There are two transitions that happen when the 'x' is seen:
+// (1) Transition from a before-value state to a start-of-value state without
+// consuming any character.
+// (2) Consume 'x' and transition past the first value character.
+// In this case, nudging produces the context after (1) happens.
+func nudge(c context) context {
+ switch c.state {
+ case stateTag:
+ // In `<foo {{.}}`, the action should emit an attribute.
+ c.state = stateAttrName
+ case stateBeforeValue:
+ // In `<foo bar={{.}}`, the action is an undelimited value.
+ c.state, c.delim, c.attr = attrStartStates[c.attr], delimSpaceOrTagEnd, attrNone
+ case stateAfterName:
+ // In `<foo bar {{.}}`, the action is an attribute name.
+ c.state, c.attr = stateAttrName, attrNone
+ }
+ return c
+}
+
+// join joins the two contexts of a branch template node. The result is an
+// error context if either of the input contexts are error contexts, or if the
+// the input contexts differ.
+func join(a, b context, line int, nodeName string) context {
+ if a.state == stateError {
+ return a
+ }
+ if b.state == stateError {
+ return b
+ }
+ if a.eq(b) {
+ return a
+ }
+
+ c := a
+ c.urlPart = b.urlPart
+ if c.eq(b) {
+ // The contexts differ only by urlPart.
+ c.urlPart = urlPartUnknown
+ return c
+ }
+
+ c = a
+ c.jsCtx = b.jsCtx
+ if c.eq(b) {
+ // The contexts differ only by jsCtx.
+ c.jsCtx = jsCtxUnknown
+ return c
+ }
+
+ // Allow a nudged context to join with an unnudged one.
+ // This means that
+ // <p title={{if .C}}{{.}}{{end}}
+ // ends in an unquoted value state even though the else branch
+ // ends in stateBeforeValue.
+ if c, d := nudge(a), nudge(b); !(c.eq(a) && d.eq(b)) {
+ if e := join(c, d, line, nodeName); e.state != stateError {
+ return e
+ }
+ }
+
+ return context{
+ state: stateError,
+ err: errorf(ErrBranchEnd, line, "{{%s}} branches end in different contexts: %v, %v", nodeName, a, b),
+ }
+}
+
+// escapeBranch escapes a branch template node: "if", "range" and "with".
+func (e *escaper) escapeBranch(c context, n *parse.BranchNode, nodeName string) context {
+ c0 := e.escapeList(c, n.List)
+ if nodeName == "range" && c0.state != stateError {
+ // The "true" branch of a "range" node can execute multiple times.
+ // We check that executing n.List once results in the same context
+ // as executing n.List twice.
+ c1, _ := e.escapeListConditionally(c0, n.List, nil)
+ c0 = join(c0, c1, n.Line, nodeName)
+ if c0.state == stateError {
+ // Make clear that this is a problem on loop re-entry
+ // since developers tend to overlook that branch when
+ // debugging templates.
+ c0.err.Line = n.Line
+ c0.err.Description = "on range loop re-entry: " + c0.err.Description
+ return c0
+ }
+ }
+ c1 := e.escapeList(c, n.ElseList)
+ return join(c0, c1, n.Line, nodeName)
+}
+
+// escapeList escapes a list template node.
+func (e *escaper) escapeList(c context, n *parse.ListNode) context {
+ if n == nil {
+ return c
+ }
+ for _, m := range n.Nodes {
+ c = e.escape(c, m)
+ }
+ return c
+}
+
+// escapeListConditionally escapes a list node but only preserves edits and
+// inferences in e if the inferences and output context satisfy filter.
+// It returns the best guess at an output context, and the result of the filter
+// which is the same as whether e was updated.
+func (e *escaper) escapeListConditionally(c context, n *parse.ListNode, filter func(*escaper, context) bool) (context, bool) {
+ e1 := newEscaper(e.tmpl)
+ // Make type inferences available to f.
+ for k, v := range e.output {
+ e1.output[k] = v
+ }
+ c = e1.escapeList(c, n)
+ ok := filter != nil && filter(e1, c)
+ if ok {
+ // Copy inferences and edits from e1 back into e.
+ for k, v := range e1.output {
+ e.output[k] = v
+ }
+ for k, v := range e1.derived {
+ e.derived[k] = v
+ }
+ for k, v := range e1.called {
+ e.called[k] = v
+ }
+ for k, v := range e1.actionNodeEdits {
+ e.editActionNode(k, v)
+ }
+ for k, v := range e1.templateNodeEdits {
+ e.editTemplateNode(k, v)
+ }
+ for k, v := range e1.textNodeEdits {
+ e.editTextNode(k, v)
+ }
+ }
+ return c, ok
+}
+
+// escapeTemplate escapes a {{template}} call node.
+func (e *escaper) escapeTemplate(c context, n *parse.TemplateNode) context {
+ c, name := e.escapeTree(c, n.Name, n.Line)
+ if name != n.Name {
+ e.editTemplateNode(n, name)
+ }
+ return c
+}
+
+// escapeTree escapes the named template starting in the given context as
+// necessary and returns its output context.
+func (e *escaper) escapeTree(c context, name string, line int) (context, string) {
+ // Mangle the template name with the input context to produce a reliable
+ // identifier.
+ dname := c.mangle(name)
+ e.called[dname] = true
+ if out, ok := e.output[dname]; ok {
+ // Already escaped.
+ return out, dname
+ }
+ t := e.template(name)
+ if t == nil {
+ // Two cases: The template exists but is empty, or has never been mentioned at
+ // all. Distinguish the cases in the error messages.
+ if e.tmpl.set[name] != nil {
+ return context{
+ state: stateError,
+ err: errorf(ErrNoSuchTemplate, line, "%q is an incomplete or empty template", name),
+ }, dname
+ }
+ return context{
+ state: stateError,
+ err: errorf(ErrNoSuchTemplate, line, "no such template %q", name),
+ }, dname
+ }
+ if dname != name {
+ // Use any template derived during an earlier call to escapeTemplate
+ // with different top level templates, or clone if necessary.
+ dt := e.template(dname)
+ if dt == nil {
+ dt = template.New(dname)
+ dt.Tree = &parse.Tree{Name: dname, Root: cloneList(t.Root)}
+ e.derived[dname] = dt
+ }
+ t = dt
+ }
+ return e.computeOutCtx(c, t), dname
+}
+
+// computeOutCtx takes a template and its start context and computes the output
+// context while storing any inferences in e.
+func (e *escaper) computeOutCtx(c context, t *template.Template) context {
+ // Propagate context over the body.
+ c1, ok := e.escapeTemplateBody(c, t)
+ if !ok {
+ // Look for a fixed point by assuming c1 as the output context.
+ if c2, ok2 := e.escapeTemplateBody(c1, t); ok2 {
+ c1, ok = c2, true
+ }
+ // Use c1 as the error context if neither assumption worked.
+ }
+ if !ok && c1.state != stateError {
+ return context{
+ state: stateError,
+ // TODO: Find the first node with a line in t.text.Tree.Root
+ err: errorf(ErrOutputContext, 0, "cannot compute output context for template %s", t.Name()),
+ }
+ }
+ return c1
+}
+
+// escapeTemplateBody escapes the given template assuming the given output
+// context, and returns the best guess at the output context and whether the
+// assumption was correct.
+func (e *escaper) escapeTemplateBody(c context, t *template.Template) (context, bool) {
+ filter := func(e1 *escaper, c1 context) bool {
+ if c1.state == stateError {
+ // Do not update the input escaper, e.
+ return false
+ }
+ if !e1.called[t.Name()] {
+ // If t is not recursively called, then c1 is an
+ // accurate output context.
+ return true
+ }
+ // c1 is accurate if it matches our assumed output context.
+ return c.eq(c1)
+ }
+ // We need to assume an output context so that recursive template calls
+ // take the fast path out of escapeTree instead of infinitely recursing.
+ // Naively assuming that the input context is the same as the output
+ // works >90% of the time.
+ e.output[t.Name()] = c
+ return e.escapeListConditionally(c, t.Tree.Root, filter)
+}
+
+// delimEnds maps each delim to a string of characters that terminate it.
+var delimEnds = [...]string{
+ delimDoubleQuote: `"`,
+ delimSingleQuote: "'",
+ // Determined empirically by running the below in various browsers.
+ // var div = document.createElement("DIV");
+ // for (var i = 0; i < 0x10000; ++i) {
+ // div.innerHTML = "<span title=x" + String.fromCharCode(i) + "-bar>";
+ // if (div.getElementsByTagName("SPAN")[0].title.indexOf("bar") < 0)
+ // document.write("<p>U+" + i.toString(16));
+ // }
+ delimSpaceOrTagEnd: " \t\n\f\r>",
+}
+
+var doctypeBytes = []byte("<!DOCTYPE")
+
+// escapeText escapes a text template node.
+func (e *escaper) escapeText(c context, n *parse.TextNode) context {
+ s, written, i, b := n.Text, 0, 0, new(bytes.Buffer)
+ for i != len(s) {
+ c1, nread := contextAfterText(c, s[i:])
+ i1 := i + nread
+ if c.state == stateText || c.state == stateRCDATA {
+ end := i1
+ if c1.state != c.state {
+ for j := end - 1; j >= i; j-- {
+ if s[j] == '<' {
+ end = j
+ break
+ }
+ }
+ }
+ for j := i; j < end; j++ {
+ if s[j] == '<' && !bytes.HasPrefix(s[j:], doctypeBytes) {
+ b.Write(s[written:j])
+ b.WriteString("&lt;")
+ written = j + 1
+ }
+ }
+ } else if isComment(c.state) && c.delim == delimNone {
+ switch c.state {
+ case stateJSBlockCmt:
+ // http://es5.github.com/#x7.4:
+ // "Comments behave like white space and are
+ // discarded except that, if a MultiLineComment
+ // contains a line terminator character, then
+ // the entire comment is considered to be a
+ // LineTerminator for purposes of parsing by
+ // the syntactic grammar."
+ if bytes.IndexAny(s[written:i1], "\n\r\u2028\u2029") != -1 {
+ b.WriteByte('\n')
+ } else {
+ b.WriteByte(' ')
+ }
+ case stateCSSBlockCmt:
+ b.WriteByte(' ')
+ }
+ written = i1
+ }
+ if c.state != c1.state && isComment(c1.state) && c1.delim == delimNone {
+ // Preserve the portion between written and the comment start.
+ cs := i1 - 2
+ if c1.state == stateHTMLCmt {
+ // "<!--" instead of "/*" or "//"
+ cs -= 2
+ }
+ b.Write(s[written:cs])
+ written = i1
+ }
+ if i == i1 && c.state == c1.state {
+ panic(fmt.Sprintf("infinite loop from %v to %v on %q..%q", c, c1, s[:i], s[i:]))
+ }
+ c, i = c1, i1
+ }
+
+ if written != 0 && c.state != stateError {
+ if !isComment(c.state) || c.delim != delimNone {
+ b.Write(n.Text[written:])
+ }
+ e.editTextNode(n, b.Bytes())
+ }
+ return c
+}
+
+// contextAfterText starts in context c, consumes some tokens from the front of
+// s, then returns the context after those tokens and the unprocessed suffix.
+func contextAfterText(c context, s []byte) (context, int) {
+ if c.delim == delimNone {
+ c1, i := tSpecialTagEnd(c, s)
+ if i == 0 {
+ // A special end tag (`</script>`) has been seen and
+ // all content preceding it has been consumed.
+ return c1, 0
+ }
+ // Consider all content up to any end tag.
+ return transitionFunc[c.state](c, s[:i])
+ }
+
+ i := bytes.IndexAny(s, delimEnds[c.delim])
+ if i == -1 {
+ i = len(s)
+ }
+ if c.delim == delimSpaceOrTagEnd {
+ // http://www.w3.org/TR/html5/tokenization.html#attribute-value-unquoted-state
+ // lists the runes below as error characters.
+ // Error out because HTML parsers may differ on whether
+ // "<a id= onclick=f(" ends inside id's or onclick's value,
+ // "<a class=`foo " ends inside a value,
+ // "<a style=font:'Arial'" needs open-quote fixup.
+ // IE treats '`' as a quotation character.
+ if j := bytes.IndexAny(s[:i], "\"'<=`"); j >= 0 {
+ return context{
+ state: stateError,
+ err: errorf(ErrBadHTML, 0, "%q in unquoted attr: %q", s[j:j+1], s[:i]),
+ }, len(s)
+ }
+ }
+ if i == len(s) {
+ // Remain inside the attribute.
+ // Decode the value so non-HTML rules can easily handle
+ // <button onclick="alert(&quot;Hi!&quot;)">
+ // without having to entity decode token boundaries.
+ for u := []byte(html.UnescapeString(string(s))); len(u) != 0; {
+ c1, i1 := transitionFunc[c.state](c, u)
+ c, u = c1, u[i1:]
+ }
+ return c, len(s)
+ }
+ if c.delim != delimSpaceOrTagEnd {
+ // Consume any quote.
+ i++
+ }
+ // On exiting an attribute, we discard all state information
+ // except the state and element.
+ return context{state: stateTag, element: c.element}, i
+}
+
+// editActionNode records a change to an action pipeline for later commit.
+func (e *escaper) editActionNode(n *parse.ActionNode, cmds []string) {
+ if _, ok := e.actionNodeEdits[n]; ok {
+ panic(fmt.Sprintf("node %s shared between templates", n))
+ }
+ e.actionNodeEdits[n] = cmds
+}
+
+// editTemplateNode records a change to a {{template}} callee for later commit.
+func (e *escaper) editTemplateNode(n *parse.TemplateNode, callee string) {
+ if _, ok := e.templateNodeEdits[n]; ok {
+ panic(fmt.Sprintf("node %s shared between templates", n))
+ }
+ e.templateNodeEdits[n] = callee
+}
+
+// editTextNode records a change to a text node for later commit.
+func (e *escaper) editTextNode(n *parse.TextNode, text []byte) {
+ if _, ok := e.textNodeEdits[n]; ok {
+ panic(fmt.Sprintf("node %s shared between templates", n))
+ }
+ e.textNodeEdits[n] = text
+}
+
+// commit applies changes to actions and template calls needed to contextually
+// autoescape content and adds any derived templates to the set.
+func (e *escaper) commit() {
+ for name := range e.output {
+ e.template(name).Funcs(funcMap)
+ }
+ for _, t := range e.derived {
+ if _, err := e.tmpl.text.AddParseTree(t.Name(), t.Tree); err != nil {
+ panic("error adding derived template")
+ }
+ }
+ for n, s := range e.actionNodeEdits {
+ ensurePipelineContains(n.Pipe, s)
+ }
+ for n, name := range e.templateNodeEdits {
+ n.Name = name
+ }
+ for n, s := range e.textNodeEdits {
+ n.Text = s
+ }
+}
+
+// template returns the named template given a mangled template name.
+func (e *escaper) template(name string) *template.Template {
+ t := e.tmpl.text.Lookup(name)
+ if t == nil {
+ t = e.derived[name]
+ }
+ return t
+}
diff --git a/src/pkg/html/template/escape_test.go b/src/pkg/html/template/escape_test.go
new file mode 100644
index 000000000..0e31674f0
--- /dev/null
+++ b/src/pkg/html/template/escape_test.go
@@ -0,0 +1,1643 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "strings"
+ "testing"
+ "text/template"
+ "text/template/parse"
+)
+
+type badMarshaler struct{}
+
+func (x *badMarshaler) MarshalJSON() ([]byte, error) {
+ // Keys in valid JSON must be double quoted as must all strings.
+ return []byte("{ foo: 'not quite valid JSON' }"), nil
+}
+
+type goodMarshaler struct{}
+
+func (x *goodMarshaler) MarshalJSON() ([]byte, error) {
+ return []byte(`{ "<foo>": "O'Reilly" }`), nil
+}
+
+func TestEscape(t *testing.T) {
+ data := struct {
+ F, T bool
+ C, G, H string
+ A, E []string
+ B, M json.Marshaler
+ N int
+ Z *int
+ W HTML
+ }{
+ F: false,
+ T: true,
+ C: "<Cincinatti>",
+ G: "<Goodbye>",
+ H: "<Hello>",
+ A: []string{"<a>", "<b>"},
+ E: []string{},
+ N: 42,
+ B: &badMarshaler{},
+ M: &goodMarshaler{},
+ Z: nil,
+ W: HTML(`&iexcl;<b class="foo">Hello</b>, <textarea>O'World</textarea>!`),
+ }
+ pdata := &data
+
+ tests := []struct {
+ name string
+ input string
+ output string
+ }{
+ {
+ "if",
+ "{{if .T}}Hello{{end}}, {{.C}}!",
+ "Hello, &lt;Cincinatti&gt;!",
+ },
+ {
+ "else",
+ "{{if .F}}{{.H}}{{else}}{{.G}}{{end}}!",
+ "&lt;Goodbye&gt;!",
+ },
+ {
+ "overescaping1",
+ "Hello, {{.C | html}}!",
+ "Hello, &lt;Cincinatti&gt;!",
+ },
+ {
+ "overescaping2",
+ "Hello, {{html .C}}!",
+ "Hello, &lt;Cincinatti&gt;!",
+ },
+ {
+ "overescaping3",
+ "{{with .C}}{{$msg := .}}Hello, {{$msg}}!{{end}}",
+ "Hello, &lt;Cincinatti&gt;!",
+ },
+ {
+ "assignment",
+ "{{if $x := .H}}{{$x}}{{end}}",
+ "&lt;Hello&gt;",
+ },
+ {
+ "withBody",
+ "{{with .H}}{{.}}{{end}}",
+ "&lt;Hello&gt;",
+ },
+ {
+ "withElse",
+ "{{with .E}}{{.}}{{else}}{{.H}}{{end}}",
+ "&lt;Hello&gt;",
+ },
+ {
+ "rangeBody",
+ "{{range .A}}{{.}}{{end}}",
+ "&lt;a&gt;&lt;b&gt;",
+ },
+ {
+ "rangeElse",
+ "{{range .E}}{{.}}{{else}}{{.H}}{{end}}",
+ "&lt;Hello&gt;",
+ },
+ {
+ "nonStringValue",
+ "{{.T}}",
+ "true",
+ },
+ {
+ "constant",
+ `<a href="/search?q={{"'a<b'"}}">`,
+ `<a href="/search?q=%27a%3cb%27">`,
+ },
+ {
+ "multipleAttrs",
+ "<a b=1 c={{.H}}>",
+ "<a b=1 c=&lt;Hello&gt;>",
+ },
+ {
+ "urlStartRel",
+ `<a href='{{"/foo/bar?a=b&c=d"}}'>`,
+ `<a href='/foo/bar?a=b&amp;c=d'>`,
+ },
+ {
+ "urlStartAbsOk",
+ `<a href='{{"http://example.com/foo/bar?a=b&c=d"}}'>`,
+ `<a href='http://example.com/foo/bar?a=b&amp;c=d'>`,
+ },
+ {
+ "protocolRelativeURLStart",
+ `<a href='{{"//example.com:8000/foo/bar?a=b&c=d"}}'>`,
+ `<a href='//example.com:8000/foo/bar?a=b&amp;c=d'>`,
+ },
+ {
+ "pathRelativeURLStart",
+ `<a href="{{"/javascript:80/foo/bar"}}">`,
+ `<a href="/javascript:80/foo/bar">`,
+ },
+ {
+ "dangerousURLStart",
+ `<a href='{{"javascript:alert(%22pwned%22)"}}'>`,
+ `<a href='#ZgotmplZ'>`,
+ },
+ {
+ "dangerousURLStart2",
+ `<a href=' {{"javascript:alert(%22pwned%22)"}}'>`,
+ `<a href=' #ZgotmplZ'>`,
+ },
+ {
+ "nonHierURL",
+ `<a href={{"mailto:Muhammed \"The Greatest\" Ali <m.ali@example.com>"}}>`,
+ `<a href=mailto:Muhammed%20%22The%20Greatest%22%20Ali%20%3cm.ali@example.com%3e>`,
+ },
+ {
+ "urlPath",
+ `<a href='http://{{"javascript:80"}}/foo'>`,
+ `<a href='http://javascript:80/foo'>`,
+ },
+ {
+ "urlQuery",
+ `<a href='/search?q={{.H}}'>`,
+ `<a href='/search?q=%3cHello%3e'>`,
+ },
+ {
+ "urlFragment",
+ `<a href='/faq#{{.H}}'>`,
+ `<a href='/faq#%3cHello%3e'>`,
+ },
+ {
+ "urlBranch",
+ `<a href="{{if .F}}/foo?a=b{{else}}/bar{{end}}">`,
+ `<a href="/bar">`,
+ },
+ {
+ "urlBranchConflictMoot",
+ `<a href="{{if .T}}/foo?a={{else}}/bar#{{end}}{{.C}}">`,
+ `<a href="/foo?a=%3cCincinatti%3e">`,
+ },
+ {
+ "jsStrValue",
+ "<button onclick='alert({{.H}})'>",
+ `<button onclick='alert(&#34;\u003cHello\u003e&#34;)'>`,
+ },
+ {
+ "jsNumericValue",
+ "<button onclick='alert({{.N}})'>",
+ `<button onclick='alert( 42 )'>`,
+ },
+ {
+ "jsBoolValue",
+ "<button onclick='alert({{.T}})'>",
+ `<button onclick='alert( true )'>`,
+ },
+ {
+ "jsNilValue",
+ "<button onclick='alert(typeof{{.Z}})'>",
+ `<button onclick='alert(typeof null )'>`,
+ },
+ {
+ "jsObjValue",
+ "<button onclick='alert({{.A}})'>",
+ `<button onclick='alert([&#34;\u003ca\u003e&#34;,&#34;\u003cb\u003e&#34;])'>`,
+ },
+ {
+ "jsObjValueScript",
+ "<script>alert({{.A}})</script>",
+ `<script>alert(["\u003ca\u003e","\u003cb\u003e"])</script>`,
+ },
+ {
+ "jsObjValueNotOverEscaped",
+ "<button onclick='alert({{.A | html}})'>",
+ `<button onclick='alert([&#34;\u003ca\u003e&#34;,&#34;\u003cb\u003e&#34;])'>`,
+ },
+ {
+ "jsStr",
+ "<button onclick='alert(&quot;{{.H}}&quot;)'>",
+ `<button onclick='alert(&quot;\x3cHello\x3e&quot;)'>`,
+ },
+ {
+ "badMarshaller",
+ `<button onclick='alert(1/{{.B}}in numbers)'>`,
+ `<button onclick='alert(1/ /* json: error calling MarshalJSON for type *template.badMarshaler: invalid character &#39;f&#39; looking for beginning of object key string */null in numbers)'>`,
+ },
+ {
+ "jsMarshaller",
+ `<button onclick='alert({{.M}})'>`,
+ `<button onclick='alert({&#34;&lt;foo&gt;&#34;:&#34;O&#39;Reilly&#34;})'>`,
+ },
+ {
+ "jsStrNotUnderEscaped",
+ "<button onclick='alert({{.C | urlquery}})'>",
+ // URL escaped, then quoted for JS.
+ `<button onclick='alert(&#34;%3CCincinatti%3E&#34;)'>`,
+ },
+ {
+ "jsRe",
+ `<button onclick='alert(/{{"foo+bar"}}/.test(""))'>`,
+ `<button onclick='alert(/foo\x2bbar/.test(""))'>`,
+ },
+ {
+ "jsReBlank",
+ `<script>alert(/{{""}}/.test(""));</script>`,
+ `<script>alert(/(?:)/.test(""));</script>`,
+ },
+ {
+ "jsReAmbigOk",
+ `<script>{{if true}}var x = 1{{end}}</script>`,
+ // The {if} ends in an ambiguous jsCtx but there is
+ // no slash following so we shouldn't care.
+ `<script>var x = 1</script>`,
+ },
+ {
+ "styleBidiKeywordPassed",
+ `<p style="dir: {{"ltr"}}">`,
+ `<p style="dir: ltr">`,
+ },
+ {
+ "styleBidiPropNamePassed",
+ `<p style="border-{{"left"}}: 0; border-{{"right"}}: 1in">`,
+ `<p style="border-left: 0; border-right: 1in">`,
+ },
+ {
+ "styleExpressionBlocked",
+ `<p style="width: {{"expression(alert(1337))"}}">`,
+ `<p style="width: ZgotmplZ">`,
+ },
+ {
+ "styleTagSelectorPassed",
+ `<style>{{"p"}} { color: pink }</style>`,
+ `<style>p { color: pink }</style>`,
+ },
+ {
+ "styleIDPassed",
+ `<style>p{{"#my-ID"}} { font: Arial }</style>`,
+ `<style>p#my-ID { font: Arial }</style>`,
+ },
+ {
+ "styleClassPassed",
+ `<style>p{{".my_class"}} { font: Arial }</style>`,
+ `<style>p.my_class { font: Arial }</style>`,
+ },
+ {
+ "styleQuantityPassed",
+ `<a style="left: {{"2em"}}; top: {{0}}">`,
+ `<a style="left: 2em; top: 0">`,
+ },
+ {
+ "stylePctPassed",
+ `<table style=width:{{"100%"}}>`,
+ `<table style=width:100%>`,
+ },
+ {
+ "styleColorPassed",
+ `<p style="color: {{"#8ff"}}; background: {{"#000"}}">`,
+ `<p style="color: #8ff; background: #000">`,
+ },
+ {
+ "styleObfuscatedExpressionBlocked",
+ `<p style="width: {{" e\\78preS\x00Sio/**/n(alert(1337))"}}">`,
+ `<p style="width: ZgotmplZ">`,
+ },
+ {
+ "styleMozBindingBlocked",
+ `<p style="{{"-moz-binding(alert(1337))"}}: ...">`,
+ `<p style="ZgotmplZ: ...">`,
+ },
+ {
+ "styleObfuscatedMozBindingBlocked",
+ `<p style="{{" -mo\\7a-B\x00I/**/nding(alert(1337))"}}: ...">`,
+ `<p style="ZgotmplZ: ...">`,
+ },
+ {
+ "styleFontNameString",
+ `<p style='font-family: "{{"Times New Roman"}}"'>`,
+ `<p style='font-family: "Times New Roman"'>`,
+ },
+ {
+ "styleFontNameString",
+ `<p style='font-family: "{{"Times New Roman"}}", "{{"sans-serif"}}"'>`,
+ `<p style='font-family: "Times New Roman", "sans-serif"'>`,
+ },
+ {
+ "styleFontNameUnquoted",
+ `<p style='font-family: {{"Times New Roman"}}'>`,
+ `<p style='font-family: Times New Roman'>`,
+ },
+ {
+ "styleURLQueryEncoded",
+ `<p style="background: url(/img?name={{"O'Reilly Animal(1)<2>.png"}})">`,
+ `<p style="background: url(/img?name=O%27Reilly%20Animal%281%29%3c2%3e.png)">`,
+ },
+ {
+ "styleQuotedURLQueryEncoded",
+ `<p style="background: url('/img?name={{"O'Reilly Animal(1)<2>.png"}}')">`,
+ `<p style="background: url('/img?name=O%27Reilly%20Animal%281%29%3c2%3e.png')">`,
+ },
+ {
+ "styleStrQueryEncoded",
+ `<p style="background: '/img?name={{"O'Reilly Animal(1)<2>.png"}}'">`,
+ `<p style="background: '/img?name=O%27Reilly%20Animal%281%29%3c2%3e.png'">`,
+ },
+ {
+ "styleURLBadProtocolBlocked",
+ `<a style="background: url('{{"javascript:alert(1337)"}}')">`,
+ `<a style="background: url('#ZgotmplZ')">`,
+ },
+ {
+ "styleStrBadProtocolBlocked",
+ `<a style="background: '{{"vbscript:alert(1337)"}}'">`,
+ `<a style="background: '#ZgotmplZ'">`,
+ },
+ {
+ "styleStrEncodedProtocolEncoded",
+ `<a style="background: '{{"javascript\\3a alert(1337)"}}'">`,
+ // The CSS string 'javascript\\3a alert(1337)' does not contains a colon.
+ `<a style="background: 'javascript\\3a alert\28 1337\29 '">`,
+ },
+ {
+ "styleURLGoodProtocolPassed",
+ `<a style="background: url('{{"http://oreilly.com/O'Reilly Animals(1)<2>;{}.html"}}')">`,
+ `<a style="background: url('http://oreilly.com/O%27Reilly%20Animals%281%29%3c2%3e;%7b%7d.html')">`,
+ },
+ {
+ "styleStrGoodProtocolPassed",
+ `<a style="background: '{{"http://oreilly.com/O'Reilly Animals(1)<2>;{}.html"}}'">`,
+ `<a style="background: 'http\3a\2f\2foreilly.com\2fO\27Reilly Animals\28 1\29\3c 2\3e\3b\7b\7d.html'">`,
+ },
+ {
+ "styleURLEncodedForHTMLInAttr",
+ `<a style="background: url('{{"/search?img=foo&size=icon"}}')">`,
+ `<a style="background: url('/search?img=foo&amp;size=icon')">`,
+ },
+ {
+ "styleURLNotEncodedForHTMLInCdata",
+ `<style>body { background: url('{{"/search?img=foo&size=icon"}}') }</style>`,
+ `<style>body { background: url('/search?img=foo&size=icon') }</style>`,
+ },
+ {
+ "styleURLMixedCase",
+ `<p style="background: URL(#{{.H}})">`,
+ `<p style="background: URL(#%3cHello%3e)">`,
+ },
+ {
+ "stylePropertyPairPassed",
+ `<a style='{{"color: red"}}'>`,
+ `<a style='color: red'>`,
+ },
+ {
+ "styleStrSpecialsEncoded",
+ `<a style="font-family: '{{"/**/'\";:// \\"}}', &quot;{{"/**/'\";:// \\"}}&quot;">`,
+ `<a style="font-family: '\2f**\2f\27\22\3b\3a\2f\2f \\', &quot;\2f**\2f\27\22\3b\3a\2f\2f \\&quot;">`,
+ },
+ {
+ "styleURLSpecialsEncoded",
+ `<a style="border-image: url({{"/**/'\";:// \\"}}), url(&quot;{{"/**/'\";:// \\"}}&quot;), url('{{"/**/'\";:// \\"}}'), 'http://www.example.com/?q={{"/**/'\";:// \\"}}''">`,
+ `<a style="border-image: url(/**/%27%22;://%20%5c), url(&quot;/**/%27%22;://%20%5c&quot;), url('/**/%27%22;://%20%5c'), 'http://www.example.com/?q=%2f%2a%2a%2f%27%22%3b%3a%2f%2f%20%5c''">`,
+ },
+ {
+ "HTML comment",
+ "<b>Hello, <!-- name of world -->{{.C}}</b>",
+ "<b>Hello, &lt;Cincinatti&gt;</b>",
+ },
+ {
+ "HTML comment not first < in text node.",
+ "<<!-- -->!--",
+ "&lt;!--",
+ },
+ {
+ "HTML normalization 1",
+ "a < b",
+ "a &lt; b",
+ },
+ {
+ "HTML normalization 2",
+ "a << b",
+ "a &lt;&lt; b",
+ },
+ {
+ "HTML normalization 3",
+ "a<<!-- --><!-- -->b",
+ "a&lt;b",
+ },
+ {
+ "HTML doctype not normalized",
+ "<!DOCTYPE html>Hello, World!",
+ "<!DOCTYPE html>Hello, World!",
+ },
+ {
+ "No doctype injection",
+ `<!{{"DOCTYPE"}}`,
+ "&lt;!DOCTYPE",
+ },
+ {
+ "Split HTML comment",
+ "<b>Hello, <!-- name of {{if .T}}city -->{{.C}}{{else}}world -->{{.W}}{{end}}</b>",
+ "<b>Hello, &lt;Cincinatti&gt;</b>",
+ },
+ {
+ "JS line comment",
+ "<script>for (;;) { if (c()) break// foo not a label\n" +
+ "foo({{.T}});}</script>",
+ "<script>for (;;) { if (c()) break\n" +
+ "foo( true );}</script>",
+ },
+ {
+ "JS multiline block comment",
+ "<script>for (;;) { if (c()) break/* foo not a label\n" +
+ " */foo({{.T}});}</script>",
+ // Newline separates break from call. If newline
+ // removed, then break will consume label leaving
+ // code invalid.
+ "<script>for (;;) { if (c()) break\n" +
+ "foo( true );}</script>",
+ },
+ {
+ "JS single-line block comment",
+ "<script>for (;;) {\n" +
+ "if (c()) break/* foo a label */foo;" +
+ "x({{.T}});}</script>",
+ // Newline separates break from call. If newline
+ // removed, then break will consume label leaving
+ // code invalid.
+ "<script>for (;;) {\n" +
+ "if (c()) break foo;" +
+ "x( true );}</script>",
+ },
+ {
+ "JS block comment flush with mathematical division",
+ "<script>var a/*b*//c\nd</script>",
+ "<script>var a /c\nd</script>",
+ },
+ {
+ "JS mixed comments",
+ "<script>var a/*b*///c\nd</script>",
+ "<script>var a \nd</script>",
+ },
+ {
+ "CSS comments",
+ "<style>p// paragraph\n" +
+ `{border: 1px/* color */{{"#00f"}}}</style>`,
+ "<style>p\n" +
+ "{border: 1px #00f}</style>",
+ },
+ {
+ "JS attr block comment",
+ `<a onclick="f(&quot;&quot;); /* alert({{.H}}) */">`,
+ // Attribute comment tests should pass if the comments
+ // are successfully elided.
+ `<a onclick="f(&quot;&quot;); /* alert() */">`,
+ },
+ {
+ "JS attr line comment",
+ `<a onclick="// alert({{.G}})">`,
+ `<a onclick="// alert()">`,
+ },
+ {
+ "CSS attr block comment",
+ `<a style="/* color: {{.H}} */">`,
+ `<a style="/* color: */">`,
+ },
+ {
+ "CSS attr line comment",
+ `<a style="// color: {{.G}}">`,
+ `<a style="// color: ">`,
+ },
+ {
+ "HTML substitution commented out",
+ "<p><!-- {{.H}} --></p>",
+ "<p></p>",
+ },
+ {
+ "Comment ends flush with start",
+ "<!--{{.}}--><script>/*{{.}}*///{{.}}\n</script><style>/*{{.}}*///{{.}}\n</style><a onclick='/*{{.}}*///{{.}}' style='/*{{.}}*///{{.}}'>",
+ "<script> \n</script><style> \n</style><a onclick='/**///' style='/**///'>",
+ },
+ {
+ "typed HTML in text",
+ `{{.W}}`,
+ `&iexcl;<b class="foo">Hello</b>, <textarea>O'World</textarea>!`,
+ },
+ {
+ "typed HTML in attribute",
+ `<div title="{{.W}}">`,
+ `<div title="&iexcl;Hello, O&#39;World!">`,
+ },
+ {
+ "typed HTML in script",
+ `<button onclick="alert({{.W}})">`,
+ `<button onclick="alert(&#34;&amp;iexcl;\u003cb class=\&#34;foo\&#34;\u003eHello\u003c/b\u003e, \u003ctextarea\u003eO&#39;World\u003c/textarea\u003e!&#34;)">`,
+ },
+ {
+ "typed HTML in RCDATA",
+ `<textarea>{{.W}}</textarea>`,
+ `<textarea>&iexcl;&lt;b class=&#34;foo&#34;&gt;Hello&lt;/b&gt;, &lt;textarea&gt;O&#39;World&lt;/textarea&gt;!</textarea>`,
+ },
+ {
+ "range in textarea",
+ "<textarea>{{range .A}}{{.}}{{end}}</textarea>",
+ "<textarea>&lt;a&gt;&lt;b&gt;</textarea>",
+ },
+ {
+ "auditable exemption from escaping",
+ "{{range .A}}{{. | noescape}}{{end}}",
+ "<a><b>",
+ },
+ {
+ "No tag injection",
+ `{{"10$"}}<{{"script src,evil.org/pwnd.js"}}...`,
+ `10$&lt;script src,evil.org/pwnd.js...`,
+ },
+ {
+ "No comment injection",
+ `<{{"!--"}}`,
+ `&lt;!--`,
+ },
+ {
+ "No RCDATA end tag injection",
+ `<textarea><{{"/textarea "}}...</textarea>`,
+ `<textarea>&lt;/textarea ...</textarea>`,
+ },
+ {
+ "optional attrs",
+ `<img class="{{"iconClass"}}"` +
+ `{{if .T}} id="{{"<iconId>"}}"{{end}}` +
+ // Double quotes inside if/else.
+ ` src=` +
+ `{{if .T}}"?{{"<iconPath>"}}"` +
+ `{{else}}"images/cleardot.gif"{{end}}` +
+ // Missing space before title, but it is not a
+ // part of the src attribute.
+ `{{if .T}}title="{{"<title>"}}"{{end}}` +
+ // Quotes outside if/else.
+ ` alt="` +
+ `{{if .T}}{{"<alt>"}}` +
+ `{{else}}{{if .F}}{{"<title>"}}{{end}}` +
+ `{{end}}"` +
+ `>`,
+ `<img class="iconClass" id="&lt;iconId&gt;" src="?%3ciconPath%3e"title="&lt;title&gt;" alt="&lt;alt&gt;">`,
+ },
+ {
+ "conditional valueless attr name",
+ `<input{{if .T}} checked{{end}} name=n>`,
+ `<input checked name=n>`,
+ },
+ {
+ "conditional dynamic valueless attr name 1",
+ `<input{{if .T}} {{"checked"}}{{end}} name=n>`,
+ `<input checked name=n>`,
+ },
+ {
+ "conditional dynamic valueless attr name 2",
+ `<input {{if .T}}{{"checked"}} {{end}}name=n>`,
+ `<input checked name=n>`,
+ },
+ {
+ "dynamic attribute name",
+ `<img on{{"load"}}="alert({{"loaded"}})">`,
+ // Treated as JS since quotes are inserted.
+ `<img onload="alert(&#34;loaded&#34;)">`,
+ },
+ {
+ "bad dynamic attribute name 1",
+ // Allow checked, selected, disabled, but not JS or
+ // CSS attributes.
+ `<input {{"onchange"}}="{{"doEvil()"}}">`,
+ `<input ZgotmplZ="doEvil()">`,
+ },
+ {
+ "bad dynamic attribute name 2",
+ `<div {{"sTyle"}}="{{"color: expression(alert(1337))"}}">`,
+ `<div ZgotmplZ="color: expression(alert(1337))">`,
+ },
+ {
+ "bad dynamic attribute name 3",
+ // Allow title or alt, but not a URL.
+ `<img {{"src"}}="{{"javascript:doEvil()"}}">`,
+ `<img ZgotmplZ="javascript:doEvil()">`,
+ },
+ {
+ "bad dynamic attribute name 4",
+ // Structure preservation requires values to associate
+ // with a consistent attribute.
+ `<input checked {{""}}="Whose value am I?">`,
+ `<input checked ZgotmplZ="Whose value am I?">`,
+ },
+ {
+ "dynamic element name",
+ `<h{{3}}><table><t{{"head"}}>...</h{{3}}>`,
+ `<h3><table><thead>...</h3>`,
+ },
+ {
+ "bad dynamic element name",
+ // Dynamic element names are typically used to switch
+ // between (thead, tfoot, tbody), (ul, ol), (th, td),
+ // and other replaceable sets.
+ // We do not currently easily support (ul, ol).
+ // If we do change to support that, this test should
+ // catch failures to filter out special tag names which
+ // would violate the structure preservation property --
+ // if any special tag name could be substituted, then
+ // the content could be raw text/RCDATA for some inputs
+ // and regular HTML content for others.
+ `<{{"script"}}>{{"doEvil()"}}</{{"script"}}>`,
+ `&lt;script>doEvil()&lt;/script>`,
+ },
+ }
+
+ for _, test := range tests {
+ tmpl := New(test.name)
+ // TODO: Move noescape into template/func.go
+ tmpl.Funcs(FuncMap{
+ "noescape": func(a ...interface{}) string {
+ return fmt.Sprint(a...)
+ },
+ })
+ tmpl = Must(tmpl.Parse(test.input))
+ b := new(bytes.Buffer)
+ if err := tmpl.Execute(b, data); err != nil {
+ t.Errorf("%s: template execution failed: %s", test.name, err)
+ continue
+ }
+ if w, g := test.output, b.String(); w != g {
+ t.Errorf("%s: escaped output: want\n\t%q\ngot\n\t%q", test.name, w, g)
+ continue
+ }
+ b.Reset()
+ if err := tmpl.Execute(b, pdata); err != nil {
+ t.Errorf("%s: template execution failed for pointer: %s", test.name, err)
+ continue
+ }
+ if w, g := test.output, b.String(); w != g {
+ t.Errorf("%s: escaped output for pointer: want\n\t%q\ngot\n\t%q", test.name, w, g)
+ continue
+ }
+ }
+}
+
+func TestEscapeSet(t *testing.T) {
+ type dataItem struct {
+ Children []*dataItem
+ X string
+ }
+
+ data := dataItem{
+ Children: []*dataItem{
+ {X: "foo"},
+ {X: "<bar>"},
+ {
+ Children: []*dataItem{
+ {X: "baz"},
+ },
+ },
+ },
+ }
+
+ tests := []struct {
+ inputs map[string]string
+ want string
+ }{
+ // The trivial set.
+ {
+ map[string]string{
+ "main": ``,
+ },
+ ``,
+ },
+ // A template called in the start context.
+ {
+ map[string]string{
+ "main": `Hello, {{template "helper"}}!`,
+ // Not a valid top level HTML template.
+ // "<b" is not a full tag.
+ "helper": `{{"<World>"}}`,
+ },
+ `Hello, &lt;World&gt;!`,
+ },
+ // A template called in a context other than the start.
+ {
+ map[string]string{
+ "main": `<a onclick='a = {{template "helper"}};'>`,
+ // Not a valid top level HTML template.
+ // "<b" is not a full tag.
+ "helper": `{{"<a>"}}<b`,
+ },
+ `<a onclick='a = &#34;\u003ca\u003e&#34;<b;'>`,
+ },
+ // A recursive template that ends in its start context.
+ {
+ map[string]string{
+ "main": `{{range .Children}}{{template "main" .}}{{else}}{{.X}} {{end}}`,
+ },
+ `foo &lt;bar&gt; baz `,
+ },
+ // A recursive helper template that ends in its start context.
+ {
+ map[string]string{
+ "main": `{{template "helper" .}}`,
+ "helper": `{{if .Children}}<ul>{{range .Children}}<li>{{template "main" .}}</li>{{end}}</ul>{{else}}{{.X}}{{end}}`,
+ },
+ `<ul><li>foo</li><li>&lt;bar&gt;</li><li><ul><li>baz</li></ul></li></ul>`,
+ },
+ // Co-recursive templates that end in its start context.
+ {
+ map[string]string{
+ "main": `<blockquote>{{range .Children}}{{template "helper" .}}{{end}}</blockquote>`,
+ "helper": `{{if .Children}}{{template "main" .}}{{else}}{{.X}}<br>{{end}}`,
+ },
+ `<blockquote>foo<br>&lt;bar&gt;<br><blockquote>baz<br></blockquote></blockquote>`,
+ },
+ // A template that is called in two different contexts.
+ {
+ map[string]string{
+ "main": `<button onclick="title='{{template "helper"}}'; ...">{{template "helper"}}</button>`,
+ "helper": `{{11}} of {{"<100>"}}`,
+ },
+ `<button onclick="title='11 of \x3c100\x3e'; ...">11 of &lt;100&gt;</button>`,
+ },
+ // A non-recursive template that ends in a different context.
+ // helper starts in jsCtxRegexp and ends in jsCtxDivOp.
+ {
+ map[string]string{
+ "main": `<script>var x={{template "helper"}}/{{"42"}};</script>`,
+ "helper": "{{126}}",
+ },
+ `<script>var x= 126 /"42";</script>`,
+ },
+ // A recursive template that ends in a similar context.
+ {
+ map[string]string{
+ "main": `<script>var x=[{{template "countdown" 4}}];</script>`,
+ "countdown": `{{.}}{{if .}},{{template "countdown" . | pred}}{{end}}`,
+ },
+ `<script>var x=[ 4 , 3 , 2 , 1 , 0 ];</script>`,
+ },
+ // A recursive template that ends in a different context.
+ /*
+ {
+ map[string]string{
+ "main": `<a href="/foo{{template "helper" .}}">`,
+ "helper": `{{if .Children}}{{range .Children}}{{template "helper" .}}{{end}}{{else}}?x={{.X}}{{end}}`,
+ },
+ `<a href="/foo?x=foo?x=%3cbar%3e?x=baz">`,
+ },
+ */
+ }
+
+ // pred is a template function that returns the predecessor of a
+ // natural number for testing recursive templates.
+ fns := FuncMap{"pred": func(a ...interface{}) (interface{}, error) {
+ if len(a) == 1 {
+ if i, _ := a[0].(int); i > 0 {
+ return i - 1, nil
+ }
+ }
+ return nil, fmt.Errorf("undefined pred(%v)", a)
+ }}
+
+ for _, test := range tests {
+ source := ""
+ for name, body := range test.inputs {
+ source += fmt.Sprintf("{{define %q}}%s{{end}} ", name, body)
+ }
+ tmpl, err := New("root").Funcs(fns).Parse(source)
+ if err != nil {
+ t.Errorf("error parsing %q: %v", source, err)
+ continue
+ }
+ var b bytes.Buffer
+
+ if err := tmpl.ExecuteTemplate(&b, "main", data); err != nil {
+ t.Errorf("%q executing %v", err.Error(), tmpl.Lookup("main"))
+ continue
+ }
+ if got := b.String(); test.want != got {
+ t.Errorf("want\n\t%q\ngot\n\t%q", test.want, got)
+ }
+ }
+
+}
+
+func TestErrors(t *testing.T) {
+ tests := []struct {
+ input string
+ err string
+ }{
+ // Non-error cases.
+ {
+ "{{if .Cond}}<a>{{else}}<b>{{end}}",
+ "",
+ },
+ {
+ "{{if .Cond}}<a>{{end}}",
+ "",
+ },
+ {
+ "{{if .Cond}}{{else}}<b>{{end}}",
+ "",
+ },
+ {
+ "{{with .Cond}}<div>{{end}}",
+ "",
+ },
+ {
+ "{{range .Items}}<a>{{end}}",
+ "",
+ },
+ {
+ "<a href='/foo?{{range .Items}}&{{.K}}={{.V}}{{end}}'>",
+ "",
+ },
+ // Error cases.
+ {
+ "{{if .Cond}}<a{{end}}",
+ "z:1: {{if}} branches",
+ },
+ {
+ "{{if .Cond}}\n{{else}}\n<a{{end}}",
+ "z:1: {{if}} branches",
+ },
+ {
+ // Missing quote in the else branch.
+ `{{if .Cond}}<a href="foo">{{else}}<a href="bar>{{end}}`,
+ "z:1: {{if}} branches",
+ },
+ {
+ // Different kind of attribute: href implies a URL.
+ "<a {{if .Cond}}href='{{else}}title='{{end}}{{.X}}'>",
+ "z:1: {{if}} branches",
+ },
+ {
+ "\n{{with .X}}<a{{end}}",
+ "z:2: {{with}} branches",
+ },
+ {
+ "\n{{with .X}}<a>{{else}}<a{{end}}",
+ "z:2: {{with}} branches",
+ },
+ {
+ "{{range .Items}}<a{{end}}",
+ `z:1: on range loop re-entry: "<" in attribute name: "<a"`,
+ },
+ {
+ "\n{{range .Items}} x='<a{{end}}",
+ "z:2: on range loop re-entry: {{range}} branches",
+ },
+ {
+ "<a b=1 c={{.H}}",
+ "z: ends in a non-text context: {stateAttr delimSpaceOrTagEnd",
+ },
+ {
+ "<script>foo();",
+ "z: ends in a non-text context: {stateJS",
+ },
+ {
+ `<a href="{{if .F}}/foo?a={{else}}/bar/{{end}}{{.H}}">`,
+ "z:1: {{.H}} appears in an ambiguous URL context",
+ },
+ {
+ `<a onclick="alert('Hello \`,
+ `unfinished escape sequence in JS string: "Hello \\"`,
+ },
+ {
+ `<a onclick='alert("Hello\, World\`,
+ `unfinished escape sequence in JS string: "Hello\\, World\\"`,
+ },
+ {
+ `<a onclick='alert(/x+\`,
+ `unfinished escape sequence in JS string: "x+\\"`,
+ },
+ {
+ `<a onclick="/foo[\]/`,
+ `unfinished JS regexp charset: "foo[\\]/"`,
+ },
+ {
+ // It is ambiguous whether 1.5 should be 1\.5 or 1.5.
+ // Either `var x = 1/- 1.5 /i.test(x)`
+ // where `i.test(x)` is a method call of reference i,
+ // or `/-1\.5/i.test(x)` which is a method call on a
+ // case insensitive regular expression.
+ `<script>{{if false}}var x = 1{{end}}/-{{"1.5"}}/i.test(x)</script>`,
+ `'/' could start a division or regexp: "/-"`,
+ },
+ {
+ `{{template "foo"}}`,
+ "z:1: no such template \"foo\"",
+ },
+ {
+ `<div{{template "y"}}>` +
+ // Illegal starting in stateTag but not in stateText.
+ `{{define "y"}} foo<b{{end}}`,
+ `"<" in attribute name: " foo<b"`,
+ },
+ {
+ `<script>reverseList = [{{template "t"}}]</script>` +
+ // Missing " after recursive call.
+ `{{define "t"}}{{if .Tail}}{{template "t" .Tail}}{{end}}{{.Head}}",{{end}}`,
+ `: cannot compute output context for template t$htmltemplate_stateJS_elementScript`,
+ },
+ {
+ `<input type=button value=onclick=>`,
+ `html/template:z: "=" in unquoted attr: "onclick="`,
+ },
+ {
+ `<input type=button value= onclick=>`,
+ `html/template:z: "=" in unquoted attr: "onclick="`,
+ },
+ {
+ `<input type=button value= 1+1=2>`,
+ `html/template:z: "=" in unquoted attr: "1+1=2"`,
+ },
+ {
+ "<a class=`foo>",
+ "html/template:z: \"`\" in unquoted attr: \"`foo\"",
+ },
+ {
+ `<a style=font:'Arial'>`,
+ `html/template:z: "'" in unquoted attr: "font:'Arial'"`,
+ },
+ {
+ `<a=foo>`,
+ `: expected space, attr name, or end of tag, but got "=foo>"`,
+ },
+ }
+
+ for _, test := range tests {
+ buf := new(bytes.Buffer)
+ tmpl, err := New("z").Parse(test.input)
+ if err != nil {
+ t.Errorf("input=%q: unexpected parse error %s\n", test.input, err)
+ continue
+ }
+ err = tmpl.Execute(buf, nil)
+ var got string
+ if err != nil {
+ got = err.Error()
+ }
+ if test.err == "" {
+ if got != "" {
+ t.Errorf("input=%q: unexpected error %q", test.input, got)
+ }
+ continue
+ }
+ if strings.Index(got, test.err) == -1 {
+ t.Errorf("input=%q: error\n\t%q\ndoes not contain expected string\n\t%q", test.input, got, test.err)
+ continue
+ }
+ }
+}
+
+func TestEscapeText(t *testing.T) {
+ tests := []struct {
+ input string
+ output context
+ }{
+ {
+ ``,
+ context{},
+ },
+ {
+ `Hello, World!`,
+ context{},
+ },
+ {
+ // An orphaned "<" is OK.
+ `I <3 Ponies!`,
+ context{},
+ },
+ {
+ `<a`,
+ context{state: stateTag},
+ },
+ {
+ `<a `,
+ context{state: stateTag},
+ },
+ {
+ `<a>`,
+ context{state: stateText},
+ },
+ {
+ `<a href`,
+ context{state: stateAttrName, attr: attrURL},
+ },
+ {
+ `<a on`,
+ context{state: stateAttrName, attr: attrScript},
+ },
+ {
+ `<a href `,
+ context{state: stateAfterName, attr: attrURL},
+ },
+ {
+ `<a style = `,
+ context{state: stateBeforeValue, attr: attrStyle},
+ },
+ {
+ `<a href=`,
+ context{state: stateBeforeValue, attr: attrURL},
+ },
+ {
+ `<a href=x`,
+ context{state: stateURL, delim: delimSpaceOrTagEnd, urlPart: urlPartPreQuery},
+ },
+ {
+ `<a href=x `,
+ context{state: stateTag},
+ },
+ {
+ `<a href=>`,
+ context{state: stateText},
+ },
+ {
+ `<a href=x>`,
+ context{state: stateText},
+ },
+ {
+ `<a href ='`,
+ context{state: stateURL, delim: delimSingleQuote},
+ },
+ {
+ `<a href=''`,
+ context{state: stateTag},
+ },
+ {
+ `<a href= "`,
+ context{state: stateURL, delim: delimDoubleQuote},
+ },
+ {
+ `<a href=""`,
+ context{state: stateTag},
+ },
+ {
+ `<a title="`,
+ context{state: stateAttr, delim: delimDoubleQuote},
+ },
+ {
+ `<a HREF='http:`,
+ context{state: stateURL, delim: delimSingleQuote, urlPart: urlPartPreQuery},
+ },
+ {
+ `<a Href='/`,
+ context{state: stateURL, delim: delimSingleQuote, urlPart: urlPartPreQuery},
+ },
+ {
+ `<a href='"`,
+ context{state: stateURL, delim: delimSingleQuote, urlPart: urlPartPreQuery},
+ },
+ {
+ `<a href="'`,
+ context{state: stateURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+ },
+ {
+ `<a href='&apos;`,
+ context{state: stateURL, delim: delimSingleQuote, urlPart: urlPartPreQuery},
+ },
+ {
+ `<a href="&quot;`,
+ context{state: stateURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+ },
+ {
+ `<a href="&#34;`,
+ context{state: stateURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+ },
+ {
+ `<a href=&quot;`,
+ context{state: stateURL, delim: delimSpaceOrTagEnd, urlPart: urlPartPreQuery},
+ },
+ {
+ `<img alt="1">`,
+ context{state: stateText},
+ },
+ {
+ `<img alt="1>"`,
+ context{state: stateTag},
+ },
+ {
+ `<img alt="1>">`,
+ context{state: stateText},
+ },
+ {
+ `<input checked type="checkbox"`,
+ context{state: stateTag},
+ },
+ {
+ `<a onclick="`,
+ context{state: stateJS, delim: delimDoubleQuote},
+ },
+ {
+ `<a onclick="//foo`,
+ context{state: stateJSLineCmt, delim: delimDoubleQuote},
+ },
+ {
+ "<a onclick='//\n",
+ context{state: stateJS, delim: delimSingleQuote},
+ },
+ {
+ "<a onclick='//\r\n",
+ context{state: stateJS, delim: delimSingleQuote},
+ },
+ {
+ "<a onclick='//\u2028",
+ context{state: stateJS, delim: delimSingleQuote},
+ },
+ {
+ `<a onclick="/*`,
+ context{state: stateJSBlockCmt, delim: delimDoubleQuote},
+ },
+ {
+ `<a onclick="/*/`,
+ context{state: stateJSBlockCmt, delim: delimDoubleQuote},
+ },
+ {
+ `<a onclick="/**/`,
+ context{state: stateJS, delim: delimDoubleQuote},
+ },
+ {
+ `<a onkeypress="&quot;`,
+ context{state: stateJSDqStr, delim: delimDoubleQuote},
+ },
+ {
+ `<a onclick='&quot;foo&quot;`,
+ context{state: stateJS, delim: delimSingleQuote, jsCtx: jsCtxDivOp},
+ },
+ {
+ `<a onclick=&#39;foo&#39;`,
+ context{state: stateJS, delim: delimSpaceOrTagEnd, jsCtx: jsCtxDivOp},
+ },
+ {
+ `<a onclick=&#39;foo`,
+ context{state: stateJSSqStr, delim: delimSpaceOrTagEnd},
+ },
+ {
+ `<a onclick="&quot;foo'`,
+ context{state: stateJSDqStr, delim: delimDoubleQuote},
+ },
+ {
+ `<a onclick="'foo&quot;`,
+ context{state: stateJSSqStr, delim: delimDoubleQuote},
+ },
+ {
+ `<A ONCLICK="'`,
+ context{state: stateJSSqStr, delim: delimDoubleQuote},
+ },
+ {
+ `<a onclick="/`,
+ context{state: stateJSRegexp, delim: delimDoubleQuote},
+ },
+ {
+ `<a onclick="'foo'`,
+ context{state: stateJS, delim: delimDoubleQuote, jsCtx: jsCtxDivOp},
+ },
+ {
+ `<a onclick="'foo\'`,
+ context{state: stateJSSqStr, delim: delimDoubleQuote},
+ },
+ {
+ `<a onclick="'foo\'`,
+ context{state: stateJSSqStr, delim: delimDoubleQuote},
+ },
+ {
+ `<a onclick="/foo/`,
+ context{state: stateJS, delim: delimDoubleQuote, jsCtx: jsCtxDivOp},
+ },
+ {
+ `<script>/foo/ /=`,
+ context{state: stateJS, element: elementScript},
+ },
+ {
+ `<a onclick="1 /foo`,
+ context{state: stateJS, delim: delimDoubleQuote, jsCtx: jsCtxDivOp},
+ },
+ {
+ `<a onclick="1 /*c*/ /foo`,
+ context{state: stateJS, delim: delimDoubleQuote, jsCtx: jsCtxDivOp},
+ },
+ {
+ `<a onclick="/foo[/]`,
+ context{state: stateJSRegexp, delim: delimDoubleQuote},
+ },
+ {
+ `<a onclick="/foo\/`,
+ context{state: stateJSRegexp, delim: delimDoubleQuote},
+ },
+ {
+ `<a onclick="/foo/`,
+ context{state: stateJS, delim: delimDoubleQuote, jsCtx: jsCtxDivOp},
+ },
+ {
+ `<input checked style="`,
+ context{state: stateCSS, delim: delimDoubleQuote},
+ },
+ {
+ `<a style="//`,
+ context{state: stateCSSLineCmt, delim: delimDoubleQuote},
+ },
+ {
+ `<a style="//</script>`,
+ context{state: stateCSSLineCmt, delim: delimDoubleQuote},
+ },
+ {
+ "<a style='//\n",
+ context{state: stateCSS, delim: delimSingleQuote},
+ },
+ {
+ "<a style='//\r",
+ context{state: stateCSS, delim: delimSingleQuote},
+ },
+ {
+ `<a style="/*`,
+ context{state: stateCSSBlockCmt, delim: delimDoubleQuote},
+ },
+ {
+ `<a style="/*/`,
+ context{state: stateCSSBlockCmt, delim: delimDoubleQuote},
+ },
+ {
+ `<a style="/**/`,
+ context{state: stateCSS, delim: delimDoubleQuote},
+ },
+ {
+ `<a style="background: '`,
+ context{state: stateCSSSqStr, delim: delimDoubleQuote},
+ },
+ {
+ `<a style="background: &quot;`,
+ context{state: stateCSSDqStr, delim: delimDoubleQuote},
+ },
+ {
+ `<a style="background: '/foo?img=`,
+ context{state: stateCSSSqStr, delim: delimDoubleQuote, urlPart: urlPartQueryOrFrag},
+ },
+ {
+ `<a style="background: '/`,
+ context{state: stateCSSSqStr, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+ },
+ {
+ `<a style="background: url(&#x22;/`,
+ context{state: stateCSSDqURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+ },
+ {
+ `<a style="background: url('/`,
+ context{state: stateCSSSqURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+ },
+ {
+ `<a style="background: url('/)`,
+ context{state: stateCSSSqURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+ },
+ {
+ `<a style="background: url('/ `,
+ context{state: stateCSSSqURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+ },
+ {
+ `<a style="background: url(/`,
+ context{state: stateCSSURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+ },
+ {
+ `<a style="background: url( `,
+ context{state: stateCSSURL, delim: delimDoubleQuote},
+ },
+ {
+ `<a style="background: url( /image?name=`,
+ context{state: stateCSSURL, delim: delimDoubleQuote, urlPart: urlPartQueryOrFrag},
+ },
+ {
+ `<a style="background: url(x)`,
+ context{state: stateCSS, delim: delimDoubleQuote},
+ },
+ {
+ `<a style="background: url('x'`,
+ context{state: stateCSS, delim: delimDoubleQuote},
+ },
+ {
+ `<a style="background: url( x `,
+ context{state: stateCSS, delim: delimDoubleQuote},
+ },
+ {
+ `<!-- foo`,
+ context{state: stateHTMLCmt},
+ },
+ {
+ `<!-->`,
+ context{state: stateHTMLCmt},
+ },
+ {
+ `<!--->`,
+ context{state: stateHTMLCmt},
+ },
+ {
+ `<!-- foo -->`,
+ context{state: stateText},
+ },
+ {
+ `<script`,
+ context{state: stateTag, element: elementScript},
+ },
+ {
+ `<script `,
+ context{state: stateTag, element: elementScript},
+ },
+ {
+ `<script src="foo.js" `,
+ context{state: stateTag, element: elementScript},
+ },
+ {
+ `<script src='foo.js' `,
+ context{state: stateTag, element: elementScript},
+ },
+ {
+ `<script type=text/javascript `,
+ context{state: stateTag, element: elementScript},
+ },
+ {
+ `<script>foo`,
+ context{state: stateJS, jsCtx: jsCtxDivOp, element: elementScript},
+ },
+ {
+ `<script>foo</script>`,
+ context{state: stateText},
+ },
+ {
+ `<script>foo</script><!--`,
+ context{state: stateHTMLCmt},
+ },
+ {
+ `<script>document.write("<p>foo</p>");`,
+ context{state: stateJS, element: elementScript},
+ },
+ {
+ `<script>document.write("<p>foo<\/script>");`,
+ context{state: stateJS, element: elementScript},
+ },
+ {
+ `<script>document.write("<script>alert(1)</script>");`,
+ context{state: stateText},
+ },
+ {
+ `<Script>`,
+ context{state: stateJS, element: elementScript},
+ },
+ {
+ `<SCRIPT>foo`,
+ context{state: stateJS, jsCtx: jsCtxDivOp, element: elementScript},
+ },
+ {
+ `<textarea>value`,
+ context{state: stateRCDATA, element: elementTextarea},
+ },
+ {
+ `<textarea>value</TEXTAREA>`,
+ context{state: stateText},
+ },
+ {
+ `<textarea name=html><b`,
+ context{state: stateRCDATA, element: elementTextarea},
+ },
+ {
+ `<title>value`,
+ context{state: stateRCDATA, element: elementTitle},
+ },
+ {
+ `<style>value`,
+ context{state: stateCSS, element: elementStyle},
+ },
+ {
+ `<a xlink:href`,
+ context{state: stateAttrName, attr: attrURL},
+ },
+ {
+ `<a xmlns`,
+ context{state: stateAttrName, attr: attrURL},
+ },
+ {
+ `<a xmlns:foo`,
+ context{state: stateAttrName, attr: attrURL},
+ },
+ {
+ `<a xmlnsxyz`,
+ context{state: stateAttrName},
+ },
+ {
+ `<a data-url`,
+ context{state: stateAttrName, attr: attrURL},
+ },
+ {
+ `<a data-iconUri`,
+ context{state: stateAttrName, attr: attrURL},
+ },
+ {
+ `<a data-urlItem`,
+ context{state: stateAttrName, attr: attrURL},
+ },
+ {
+ `<a g:`,
+ context{state: stateAttrName},
+ },
+ {
+ `<a g:url`,
+ context{state: stateAttrName, attr: attrURL},
+ },
+ {
+ `<a g:iconUri`,
+ context{state: stateAttrName, attr: attrURL},
+ },
+ {
+ `<a g:urlItem`,
+ context{state: stateAttrName, attr: attrURL},
+ },
+ {
+ `<a g:value`,
+ context{state: stateAttrName},
+ },
+ {
+ `<a svg:style='`,
+ context{state: stateCSS, delim: delimSingleQuote},
+ },
+ {
+ `<svg:font-face`,
+ context{state: stateTag},
+ },
+ {
+ `<svg:a svg:onclick="`,
+ context{state: stateJS, delim: delimDoubleQuote},
+ },
+ }
+
+ for _, test := range tests {
+ b, e := []byte(test.input), newEscaper(nil)
+ c := e.escapeText(context{}, &parse.TextNode{parse.NodeText, b})
+ if !test.output.eq(c) {
+ t.Errorf("input %q: want context\n\t%v\ngot\n\t%v", test.input, test.output, c)
+ continue
+ }
+ if test.input != string(b) {
+ t.Errorf("input %q: text node was modified: want %q got %q", test.input, test.input, b)
+ continue
+ }
+ }
+}
+
+func TestEnsurePipelineContains(t *testing.T) {
+ tests := []struct {
+ input, output string
+ ids []string
+ }{
+ {
+ "{{.X}}",
+ ".X",
+ []string{},
+ },
+ {
+ "{{.X | html}}",
+ ".X | html",
+ []string{},
+ },
+ {
+ "{{.X}}",
+ ".X | html",
+ []string{"html"},
+ },
+ {
+ "{{.X | html}}",
+ ".X | html | urlquery",
+ []string{"urlquery"},
+ },
+ {
+ "{{.X | html | urlquery}}",
+ ".X | html | urlquery",
+ []string{"urlquery"},
+ },
+ {
+ "{{.X | html | urlquery}}",
+ ".X | html | urlquery",
+ []string{"html", "urlquery"},
+ },
+ {
+ "{{.X | html | urlquery}}",
+ ".X | html | urlquery",
+ []string{"html"},
+ },
+ {
+ "{{.X | urlquery}}",
+ ".X | html | urlquery",
+ []string{"html", "urlquery"},
+ },
+ {
+ "{{.X | html | print}}",
+ ".X | urlquery | html | print",
+ []string{"urlquery", "html"},
+ },
+ }
+ for i, test := range tests {
+ tmpl := template.Must(template.New("test").Parse(test.input))
+ action, ok := (tmpl.Tree.Root.Nodes[0].(*parse.ActionNode))
+ if !ok {
+ t.Errorf("#%d: First node is not an action: %s", i, test.input)
+ continue
+ }
+ pipe := action.Pipe
+ ensurePipelineContains(pipe, test.ids)
+ got := pipe.String()
+ if got != test.output {
+ t.Errorf("#%d: %s, %v: want\n\t%s\ngot\n\t%s", i, test.input, test.ids, test.output, got)
+ }
+ }
+}
+
+func TestEscapeErrorsNotIgnorable(t *testing.T) {
+ var b bytes.Buffer
+ tmpl, _ := New("dangerous").Parse("<a")
+ err := tmpl.Execute(&b, nil)
+ if err == nil {
+ t.Errorf("Expected error")
+ } else if b.Len() != 0 {
+ t.Errorf("Emitted output despite escaping failure")
+ }
+}
+
+func TestEscapeSetErrorsNotIgnorable(t *testing.T) {
+ var b bytes.Buffer
+ tmpl, err := New("root").Parse(`{{define "t"}}<a{{end}}`)
+ if err != nil {
+ t.Errorf("failed to parse set: %q", err)
+ }
+ err = tmpl.ExecuteTemplate(&b, "t", nil)
+ if err == nil {
+ t.Errorf("Expected error")
+ } else if b.Len() != 0 {
+ t.Errorf("Emitted output despite escaping failure")
+ }
+}
+
+func TestRedundantFuncs(t *testing.T) {
+ inputs := []interface{}{
+ "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f" +
+ "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+ ` !"#$%&'()*+,-./` +
+ `0123456789:;<=>?` +
+ `@ABCDEFGHIJKLMNO` +
+ `PQRSTUVWXYZ[\]^_` +
+ "`abcdefghijklmno" +
+ "pqrstuvwxyz{|}~\x7f" +
+ "\u00A0\u0100\u2028\u2029\ufeff\ufdec\ufffd\uffff\U0001D11E" +
+ "&amp;%22\\",
+ CSS(`a[href =~ "//example.com"]#foo`),
+ HTML(`Hello, <b>World</b> &amp;tc!`),
+ HTMLAttr(` dir="ltr"`),
+ JS(`c && alert("Hello, World!");`),
+ JSStr(`Hello, World & O'Reilly\x21`),
+ URL(`greeting=H%69&addressee=(World)`),
+ }
+
+ for n0, m := range redundantFuncs {
+ f0 := funcMap[n0].(func(...interface{}) string)
+ for n1 := range m {
+ f1 := funcMap[n1].(func(...interface{}) string)
+ for _, input := range inputs {
+ want := f0(input)
+ if got := f1(want); want != got {
+ t.Errorf("%s %s with %T %q: want\n\t%q,\ngot\n\t%q", n0, n1, input, input, want, got)
+ }
+ }
+ }
+ }
+}
+
+func TestIndirectPrint(t *testing.T) {
+ a := 3
+ ap := &a
+ b := "hello"
+ bp := &b
+ bpp := &bp
+ tmpl := Must(New("t").Parse(`{{.}}`))
+ var buf bytes.Buffer
+ err := tmpl.Execute(&buf, ap)
+ if err != nil {
+ t.Errorf("Unexpected error: %s", err)
+ } else if buf.String() != "3" {
+ t.Errorf(`Expected "3"; got %q`, buf.String())
+ }
+ buf.Reset()
+ err = tmpl.Execute(&buf, bpp)
+ if err != nil {
+ t.Errorf("Unexpected error: %s", err)
+ } else if buf.String() != "hello" {
+ t.Errorf(`Expected "hello"; got %q`, buf.String())
+ }
+}
+
+func BenchmarkEscapedExecute(b *testing.B) {
+ tmpl := Must(New("t").Parse(`<a onclick="alert('{{.}}')">{{.}}</a>`))
+ var buf bytes.Buffer
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ tmpl.Execute(&buf, "foo & 'bar' & baz")
+ buf.Reset()
+ }
+}
diff --git a/src/pkg/html/template/html.go b/src/pkg/html/template/html.go
new file mode 100644
index 000000000..7b77d6531
--- /dev/null
+++ b/src/pkg/html/template/html.go
@@ -0,0 +1,257 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "bytes"
+ "fmt"
+ "strings"
+ "unicode/utf8"
+)
+
+// htmlNospaceEscaper escapes for inclusion in unquoted attribute values.
+func htmlNospaceEscaper(args ...interface{}) string {
+ s, t := stringify(args...)
+ if t == contentTypeHTML {
+ return htmlReplacer(stripTags(s), htmlNospaceNormReplacementTable, false)
+ }
+ return htmlReplacer(s, htmlNospaceReplacementTable, false)
+}
+
+// attrEscaper escapes for inclusion in quoted attribute values.
+func attrEscaper(args ...interface{}) string {
+ s, t := stringify(args...)
+ if t == contentTypeHTML {
+ return htmlReplacer(stripTags(s), htmlNormReplacementTable, true)
+ }
+ return htmlReplacer(s, htmlReplacementTable, true)
+}
+
+// rcdataEscaper escapes for inclusion in an RCDATA element body.
+func rcdataEscaper(args ...interface{}) string {
+ s, t := stringify(args...)
+ if t == contentTypeHTML {
+ return htmlReplacer(s, htmlNormReplacementTable, true)
+ }
+ return htmlReplacer(s, htmlReplacementTable, true)
+}
+
+// htmlEscaper escapes for inclusion in HTML text.
+func htmlEscaper(args ...interface{}) string {
+ s, t := stringify(args...)
+ if t == contentTypeHTML {
+ return s
+ }
+ return htmlReplacer(s, htmlReplacementTable, true)
+}
+
+// htmlReplacementTable contains the runes that need to be escaped
+// inside a quoted attribute value or in a text node.
+var htmlReplacementTable = []string{
+ // http://www.w3.org/TR/html5/tokenization.html#attribute-value-unquoted-state: "
+ // U+0000 NULL Parse error. Append a U+FFFD REPLACEMENT
+ // CHARACTER character to the current attribute's value.
+ // "
+ // and similarly
+ // http://www.w3.org/TR/html5/tokenization.html#before-attribute-value-state
+ 0: "\uFFFD",
+ '"': "&#34;",
+ '&': "&amp;",
+ '\'': "&#39;",
+ '+': "&#43;",
+ '<': "&lt;",
+ '>': "&gt;",
+}
+
+// htmlNormReplacementTable is like htmlReplacementTable but without '&' to
+// avoid over-encoding existing entities.
+var htmlNormReplacementTable = []string{
+ 0: "\uFFFD",
+ '"': "&#34;",
+ '\'': "&#39;",
+ '+': "&#43;",
+ '<': "&lt;",
+ '>': "&gt;",
+}
+
+// htmlNospaceReplacementTable contains the runes that need to be escaped
+// inside an unquoted attribute value.
+// The set of runes escaped is the union of the HTML specials and
+// those determined by running the JS below in browsers:
+// <div id=d></div>
+// <script>(function () {
+// var a = [], d = document.getElementById("d"), i, c, s;
+// for (i = 0; i < 0x10000; ++i) {
+// c = String.fromCharCode(i);
+// d.innerHTML = "<span title=" + c + "lt" + c + "></span>"
+// s = d.getElementsByTagName("SPAN")[0];
+// if (!s || s.title !== c + "lt" + c) { a.push(i.toString(16)); }
+// }
+// document.write(a.join(", "));
+// })()</script>
+var htmlNospaceReplacementTable = []string{
+ 0: "&#xfffd;",
+ '\t': "&#9;",
+ '\n': "&#10;",
+ '\v': "&#11;",
+ '\f': "&#12;",
+ '\r': "&#13;",
+ ' ': "&#32;",
+ '"': "&#34;",
+ '&': "&amp;",
+ '\'': "&#39;",
+ '+': "&#43;",
+ '<': "&lt;",
+ '=': "&#61;",
+ '>': "&gt;",
+ // A parse error in the attribute value (unquoted) and
+ // before attribute value states.
+ // Treated as a quoting character by IE.
+ '`': "&#96;",
+}
+
+// htmlNospaceNormReplacementTable is like htmlNospaceReplacementTable but
+// without '&' to avoid over-encoding existing entities.
+var htmlNospaceNormReplacementTable = []string{
+ 0: "&#xfffd;",
+ '\t': "&#9;",
+ '\n': "&#10;",
+ '\v': "&#11;",
+ '\f': "&#12;",
+ '\r': "&#13;",
+ ' ': "&#32;",
+ '"': "&#34;",
+ '\'': "&#39;",
+ '+': "&#43;",
+ '<': "&lt;",
+ '=': "&#61;",
+ '>': "&gt;",
+ // A parse error in the attribute value (unquoted) and
+ // before attribute value states.
+ // Treated as a quoting character by IE.
+ '`': "&#96;",
+}
+
+// htmlReplacer returns s with runes replaced acccording to replacementTable
+// and when badRunes is true, certain bad runes are allowed through unescaped.
+func htmlReplacer(s string, replacementTable []string, badRunes bool) string {
+ written, b := 0, new(bytes.Buffer)
+ for i, r := range s {
+ if int(r) < len(replacementTable) {
+ if repl := replacementTable[r]; len(repl) != 0 {
+ b.WriteString(s[written:i])
+ b.WriteString(repl)
+ // Valid as long as replacementTable doesn't
+ // include anything above 0x7f.
+ written = i + utf8.RuneLen(r)
+ }
+ } else if badRunes {
+ // No-op.
+ // IE does not allow these ranges in unquoted attrs.
+ } else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff {
+ fmt.Fprintf(b, "%s&#x%x;", s[written:i], r)
+ written = i + utf8.RuneLen(r)
+ }
+ }
+ if written == 0 {
+ return s
+ }
+ b.WriteString(s[written:])
+ return b.String()
+}
+
+// stripTags takes a snippet of HTML and returns only the text content.
+// For example, `<b>&iexcl;Hi!</b> <script>...</script>` -> `&iexcl;Hi! `.
+func stripTags(html string) string {
+ var b bytes.Buffer
+ s, c, i, allText := []byte(html), context{}, 0, true
+ // Using the transition funcs helps us avoid mangling
+ // `<div title="1>2">` or `I <3 Ponies!`.
+ for i != len(s) {
+ if c.delim == delimNone {
+ st := c.state
+ // Use RCDATA instead of parsing into JS or CSS styles.
+ if c.element != elementNone && !isInTag(st) {
+ st = stateRCDATA
+ }
+ d, nread := transitionFunc[st](c, s[i:])
+ i1 := i + nread
+ if c.state == stateText || c.state == stateRCDATA {
+ // Emit text up to the start of the tag or comment.
+ j := i1
+ if d.state != c.state {
+ for j1 := j - 1; j1 >= i; j1-- {
+ if s[j1] == '<' {
+ j = j1
+ break
+ }
+ }
+ }
+ b.Write(s[i:j])
+ } else {
+ allText = false
+ }
+ c, i = d, i1
+ continue
+ }
+ i1 := i + bytes.IndexAny(s[i:], delimEnds[c.delim])
+ if i1 < i {
+ break
+ }
+ if c.delim != delimSpaceOrTagEnd {
+ // Consume any quote.
+ i1++
+ }
+ c, i = context{state: stateTag, element: c.element}, i1
+ }
+ if allText {
+ return html
+ } else if c.state == stateText || c.state == stateRCDATA {
+ b.Write(s[i:])
+ }
+ return b.String()
+}
+
+// htmlNameFilter accepts valid parts of an HTML attribute or tag name or
+// a known-safe HTML attribute.
+func htmlNameFilter(args ...interface{}) string {
+ s, t := stringify(args...)
+ if t == contentTypeHTMLAttr {
+ return s
+ }
+ if len(s) == 0 {
+ // Avoid violation of structure preservation.
+ // <input checked {{.K}}={{.V}}>.
+ // Without this, if .K is empty then .V is the value of
+ // checked, but otherwise .V is the value of the attribute
+ // named .K.
+ return filterFailsafe
+ }
+ s = strings.ToLower(s)
+ if t := attrType(s); t != contentTypePlain {
+ // TODO: Split attr and element name part filters so we can whitelist
+ // attributes.
+ return filterFailsafe
+ }
+ for _, r := range s {
+ switch {
+ case '0' <= r && r <= '9':
+ case 'a' <= r && r <= 'z':
+ default:
+ return filterFailsafe
+ }
+ }
+ return s
+}
+
+// commentEscaper returns the empty string regardless of input.
+// Comment content does not correspond to any parsed structure or
+// human-readable content, so the simplest and most secure policy is to drop
+// content interpolated into comments.
+// This approach is equally valid whether or not static comment content is
+// removed from the template.
+func commentEscaper(args ...interface{}) string {
+ return ""
+}
diff --git a/src/pkg/html/template/html_test.go b/src/pkg/html/template/html_test.go
new file mode 100644
index 000000000..b9b970387
--- /dev/null
+++ b/src/pkg/html/template/html_test.go
@@ -0,0 +1,94 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "html"
+ "strings"
+ "testing"
+)
+
+func TestHTMLNospaceEscaper(t *testing.T) {
+ input := ("\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f" +
+ "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+ ` !"#$%&'()*+,-./` +
+ `0123456789:;<=>?` +
+ `@ABCDEFGHIJKLMNO` +
+ `PQRSTUVWXYZ[\]^_` +
+ "`abcdefghijklmno" +
+ "pqrstuvwxyz{|}~\x7f" +
+ "\u00A0\u0100\u2028\u2029\ufeff\ufdec\U0001D11E")
+
+ want := ("&#xfffd;\x01\x02\x03\x04\x05\x06\x07" +
+ "\x08&#9;&#10;&#11;&#12;&#13;\x0E\x0F" +
+ "\x10\x11\x12\x13\x14\x15\x16\x17" +
+ "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+ `&#32;!&#34;#$%&amp;&#39;()*&#43;,-./` +
+ `0123456789:;&lt;&#61;&gt;?` +
+ `@ABCDEFGHIJKLMNO` +
+ `PQRSTUVWXYZ[\]^_` +
+ `&#96;abcdefghijklmno` +
+ `pqrstuvwxyz{|}~` + "\u007f" +
+ "\u00A0\u0100\u2028\u2029\ufeff&#xfdec;\U0001D11E")
+
+ got := htmlNospaceEscaper(input)
+ if got != want {
+ t.Errorf("encode: want\n\t%q\nbut got\n\t%q", want, got)
+ }
+
+ got, want = html.UnescapeString(got), strings.Replace(input, "\x00", "\ufffd", 1)
+ if want != got {
+ t.Errorf("decode: want\n\t%q\nbut got\n\t%q", want, got)
+ }
+}
+
+func TestStripTags(t *testing.T) {
+ tests := []struct {
+ input, want string
+ }{
+ {"", ""},
+ {"Hello, World!", "Hello, World!"},
+ {"foo&amp;bar", "foo&amp;bar"},
+ {`Hello <a href="www.example.com/">World</a>!`, "Hello World!"},
+ {"Foo <textarea>Bar</textarea> Baz", "Foo Bar Baz"},
+ {"Foo <!-- Bar --> Baz", "Foo Baz"},
+ {"<", "<"},
+ {"foo < bar", "foo < bar"},
+ {`Foo<script type="text/javascript">alert(1337)</script>Bar`, "FooBar"},
+ {`Foo<div title="1>2">Bar`, "FooBar"},
+ {`I <3 Ponies!`, `I <3 Ponies!`},
+ {`<script>foo()</script>`, ``},
+ }
+
+ for _, test := range tests {
+ if got := stripTags(test.input); got != test.want {
+ t.Errorf("%q: want %q, got %q", test.input, test.want, got)
+ }
+ }
+}
+
+func BenchmarkHTMLNospaceEscaper(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ htmlNospaceEscaper("The <i>quick</i>,\r\n<span style='color:brown'>brown</span> fox jumps\u2028over the <canine class=\"lazy\">dog</canine>")
+ }
+}
+
+func BenchmarkHTMLNospaceEscaperNoSpecials(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ htmlNospaceEscaper("The_quick,_brown_fox_jumps_over_the_lazy_dog.")
+ }
+}
+
+func BenchmarkStripTags(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ stripTags("The <i>quick</i>,\r\n<span style='color:brown'>brown</span> fox jumps\u2028over the <canine class=\"lazy\">dog</canine>")
+ }
+}
+
+func BenchmarkStripTagsNoSpecials(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ stripTags("The quick, brown fox jumps over the lazy dog.")
+ }
+}
diff --git a/src/pkg/html/template/js.go b/src/pkg/html/template/js.go
new file mode 100644
index 000000000..0e632df42
--- /dev/null
+++ b/src/pkg/html/template/js.go
@@ -0,0 +1,362 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "reflect"
+ "strings"
+ "unicode/utf8"
+)
+
+// nextJSCtx returns the context that determines whether a slash after the
+// given run of tokens tokens starts a regular expression instead of a division
+// operator: / or /=.
+//
+// This assumes that the token run does not include any string tokens, comment
+// tokens, regular expression literal tokens, or division operators.
+//
+// This fails on some valid but nonsensical JavaScript programs like
+// "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
+// fail on any known useful programs. It is based on the draft
+// JavaScript 2.0 lexical grammar and requires one token of lookbehind:
+// http://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
+func nextJSCtx(s []byte, preceding jsCtx) jsCtx {
+ s = bytes.TrimRight(s, "\t\n\f\r \u2028\u2029")
+ if len(s) == 0 {
+ return preceding
+ }
+
+ // All cases below are in the single-byte UTF-8 group.
+ switch c, n := s[len(s)-1], len(s); c {
+ case '+', '-':
+ // ++ and -- are not regexp preceders, but + and - are whether
+ // they are used as infix or prefix operators.
+ start := n - 1
+ // Count the number of adjacent dashes or pluses.
+ for start > 0 && s[start-1] == c {
+ start--
+ }
+ if (n-start)&1 == 1 {
+ // Reached for trailing minus signs since "---" is the
+ // same as "-- -".
+ return jsCtxRegexp
+ }
+ return jsCtxDivOp
+ case '.':
+ // Handle "42."
+ if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' {
+ return jsCtxDivOp
+ }
+ return jsCtxRegexp
+ // Suffixes for all punctuators from section 7.7 of the language spec
+ // that only end binary operators not handled above.
+ case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?':
+ return jsCtxRegexp
+ // Suffixes for all punctuators from section 7.7 of the language spec
+ // that are prefix operators not handled above.
+ case '!', '~':
+ return jsCtxRegexp
+ // Matches all the punctuators from section 7.7 of the language spec
+ // that are open brackets not handled above.
+ case '(', '[':
+ return jsCtxRegexp
+ // Matches all the punctuators from section 7.7 of the language spec
+ // that precede expression starts.
+ case ':', ';', '{':
+ return jsCtxRegexp
+ // CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
+ // are handled in the default except for '}' which can precede a
+ // division op as in
+ // ({ valueOf: function () { return 42 } } / 2
+ // which is valid, but, in practice, developers don't divide object
+ // literals, so our heuristic works well for code like
+ // function () { ... } /foo/.test(x) && sideEffect();
+ // The ')' punctuator can precede a regular expression as in
+ // if (b) /foo/.test(x) && ...
+ // but this is much less likely than
+ // (a + b) / c
+ case '}':
+ return jsCtxRegexp
+ default:
+ // Look for an IdentifierName and see if it is a keyword that
+ // can precede a regular expression.
+ j := n
+ for j > 0 && isJSIdentPart(rune(s[j-1])) {
+ j--
+ }
+ if regexpPrecederKeywords[string(s[j:])] {
+ return jsCtxRegexp
+ }
+ }
+ // Otherwise is a punctuator not listed above, or
+ // a string which precedes a div op, or an identifier
+ // which precedes a div op.
+ return jsCtxDivOp
+}
+
+// regexPrecederKeywords is a set of reserved JS keywords that can precede a
+// regular expression in JS source.
+var regexpPrecederKeywords = map[string]bool{
+ "break": true,
+ "case": true,
+ "continue": true,
+ "delete": true,
+ "do": true,
+ "else": true,
+ "finally": true,
+ "in": true,
+ "instanceof": true,
+ "return": true,
+ "throw": true,
+ "try": true,
+ "typeof": true,
+ "void": true,
+}
+
+var jsonMarshalType = reflect.TypeOf((*json.Marshaler)(nil)).Elem()
+
+// indirectToJSONMarshaler returns the value, after dereferencing as many times
+// as necessary to reach the base type (or nil) or an implementation of json.Marshal.
+func indirectToJSONMarshaler(a interface{}) interface{} {
+ v := reflect.ValueOf(a)
+ for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Ptr && !v.IsNil() {
+ v = v.Elem()
+ }
+ return v.Interface()
+}
+
+// jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
+// neither side-effects nor free variables outside (NaN, Infinity).
+func jsValEscaper(args ...interface{}) string {
+ var a interface{}
+ if len(args) == 1 {
+ a = indirectToJSONMarshaler(args[0])
+ switch t := a.(type) {
+ case JS:
+ return string(t)
+ case JSStr:
+ // TODO: normalize quotes.
+ return `"` + string(t) + `"`
+ case json.Marshaler:
+ // Do not treat as a Stringer.
+ case fmt.Stringer:
+ a = t.String()
+ }
+ } else {
+ for i, arg := range args {
+ args[i] = indirectToJSONMarshaler(arg)
+ }
+ a = fmt.Sprint(args...)
+ }
+ // TODO: detect cycles before calling Marshal which loops infinitely on
+ // cyclic data. This may be an unnacceptable DoS risk.
+
+ b, err := json.Marshal(a)
+ if err != nil {
+ // Put a space before comment so that if it is flush against
+ // a division operator it is not turned into a line comment:
+ // x/{{y}}
+ // turning into
+ // x//* error marshalling y:
+ // second line of error message */null
+ return fmt.Sprintf(" /* %s */null ", strings.Replace(err.Error(), "*/", "* /", -1))
+ }
+
+ // TODO: maybe post-process output to prevent it from containing
+ // "<!--", "-->", "<![CDATA[", "]]>", or "</script"
+ // in case custom marshallers produce output containing those.
+
+ // TODO: Maybe abbreviate \u00ab to \xab to produce more compact output.
+ if len(b) == 0 {
+ // In, `x=y/{{.}}*z` a json.Marshaler that produces "" should
+ // not cause the output `x=y/*z`.
+ return " null "
+ }
+ first, _ := utf8.DecodeRune(b)
+ last, _ := utf8.DecodeLastRune(b)
+ var buf bytes.Buffer
+ // Prevent IdentifierNames and NumericLiterals from running into
+ // keywords: in, instanceof, typeof, void
+ pad := isJSIdentPart(first) || isJSIdentPart(last)
+ if pad {
+ buf.WriteByte(' ')
+ }
+ written := 0
+ // Make sure that json.Marshal escapes codepoints U+2028 & U+2029
+ // so it falls within the subset of JSON which is valid JS.
+ for i := 0; i < len(b); {
+ rune, n := utf8.DecodeRune(b[i:])
+ repl := ""
+ if rune == 0x2028 {
+ repl = `\u2028`
+ } else if rune == 0x2029 {
+ repl = `\u2029`
+ }
+ if repl != "" {
+ buf.Write(b[written:i])
+ buf.WriteString(repl)
+ written = i + n
+ }
+ i += n
+ }
+ if buf.Len() != 0 {
+ buf.Write(b[written:])
+ if pad {
+ buf.WriteByte(' ')
+ }
+ b = buf.Bytes()
+ }
+ return string(b)
+}
+
+// jsStrEscaper produces a string that can be included between quotes in
+// JavaScript source, in JavaScript embedded in an HTML5 <script> element,
+// or in an HTML5 event handler attribute such as onclick.
+func jsStrEscaper(args ...interface{}) string {
+ s, t := stringify(args...)
+ if t == contentTypeJSStr {
+ return replace(s, jsStrNormReplacementTable)
+ }
+ return replace(s, jsStrReplacementTable)
+}
+
+// jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
+// specials so the result is treated literally when included in a regular
+// expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
+// the literal text of {{.X}} followed by the string "bar".
+func jsRegexpEscaper(args ...interface{}) string {
+ s, _ := stringify(args...)
+ s = replace(s, jsRegexpReplacementTable)
+ if s == "" {
+ // /{{.X}}/ should not produce a line comment when .X == "".
+ return "(?:)"
+ }
+ return s
+}
+
+// replace replaces each rune r of s with replacementTable[r], provided that
+// r < len(replacementTable). If replacementTable[r] is the empty string then
+// no replacement is made.
+// It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and
+// `\u2029`.
+func replace(s string, replacementTable []string) string {
+ var b bytes.Buffer
+ written := 0
+ for i, r := range s {
+ var repl string
+ switch {
+ case int(r) < len(replacementTable) && replacementTable[r] != "":
+ repl = replacementTable[r]
+ case r == '\u2028':
+ repl = `\u2028`
+ case r == '\u2029':
+ repl = `\u2029`
+ default:
+ continue
+ }
+ b.WriteString(s[written:i])
+ b.WriteString(repl)
+ written = i + utf8.RuneLen(r)
+ }
+ if written == 0 {
+ return s
+ }
+ b.WriteString(s[written:])
+ return b.String()
+}
+
+var jsStrReplacementTable = []string{
+ 0: `\0`,
+ '\t': `\t`,
+ '\n': `\n`,
+ '\v': `\x0b`, // "\v" == "v" on IE 6.
+ '\f': `\f`,
+ '\r': `\r`,
+ // Encode HTML specials as hex so the output can be embedded
+ // in HTML attributes without further encoding.
+ '"': `\x22`,
+ '&': `\x26`,
+ '\'': `\x27`,
+ '+': `\x2b`,
+ '/': `\/`,
+ '<': `\x3c`,
+ '>': `\x3e`,
+ '\\': `\\`,
+}
+
+// jsStrNormReplacementTable is like jsStrReplacementTable but does not
+// overencode existing escapes since this table has no entry for `\`.
+var jsStrNormReplacementTable = []string{
+ 0: `\0`,
+ '\t': `\t`,
+ '\n': `\n`,
+ '\v': `\x0b`, // "\v" == "v" on IE 6.
+ '\f': `\f`,
+ '\r': `\r`,
+ // Encode HTML specials as hex so the output can be embedded
+ // in HTML attributes without further encoding.
+ '"': `\x22`,
+ '&': `\x26`,
+ '\'': `\x27`,
+ '+': `\x2b`,
+ '/': `\/`,
+ '<': `\x3c`,
+ '>': `\x3e`,
+}
+
+var jsRegexpReplacementTable = []string{
+ 0: `\0`,
+ '\t': `\t`,
+ '\n': `\n`,
+ '\v': `\x0b`, // "\v" == "v" on IE 6.
+ '\f': `\f`,
+ '\r': `\r`,
+ // Encode HTML specials as hex so the output can be embedded
+ // in HTML attributes without further encoding.
+ '"': `\x22`,
+ '$': `\$`,
+ '&': `\x26`,
+ '\'': `\x27`,
+ '(': `\(`,
+ ')': `\)`,
+ '*': `\*`,
+ '+': `\x2b`,
+ '-': `\-`,
+ '.': `\.`,
+ '/': `\/`,
+ '<': `\x3c`,
+ '>': `\x3e`,
+ '?': `\?`,
+ '[': `\[`,
+ '\\': `\\`,
+ ']': `\]`,
+ '^': `\^`,
+ '{': `\{`,
+ '|': `\|`,
+ '}': `\}`,
+}
+
+// isJSIdentPart returns whether the given rune is a JS identifier part.
+// It does not handle all the non-Latin letters, joiners, and combining marks,
+// but it does handle every codepoint that can occur in a numeric literal or
+// a keyword.
+func isJSIdentPart(r rune) bool {
+ switch {
+ case r == '$':
+ return true
+ case '0' <= r && r <= '9':
+ return true
+ case 'A' <= r && r <= 'Z':
+ return true
+ case r == '_':
+ return true
+ case 'a' <= r && r <= 'z':
+ return true
+ }
+ return false
+}
diff --git a/src/pkg/html/template/js_test.go b/src/pkg/html/template/js_test.go
new file mode 100644
index 000000000..311e1d2c4
--- /dev/null
+++ b/src/pkg/html/template/js_test.go
@@ -0,0 +1,401 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "bytes"
+ "math"
+ "strings"
+ "testing"
+)
+
+func TestNextJsCtx(t *testing.T) {
+ tests := []struct {
+ jsCtx jsCtx
+ s string
+ }{
+ // Statement terminators precede regexps.
+ {jsCtxRegexp, ";"},
+ // This is not airtight.
+ // ({ valueOf: function () { return 1 } } / 2)
+ // is valid JavaScript but in practice, devs do not do this.
+ // A block followed by a statement starting with a RegExp is
+ // much more common:
+ // while (x) {...} /foo/.test(x) || panic()
+ {jsCtxRegexp, "}"},
+ // But member, call, grouping, and array expression terminators
+ // precede div ops.
+ {jsCtxDivOp, ")"},
+ {jsCtxDivOp, "]"},
+ // At the start of a primary expression, array, or expression
+ // statement, expect a regexp.
+ {jsCtxRegexp, "("},
+ {jsCtxRegexp, "["},
+ {jsCtxRegexp, "{"},
+ // Assignment operators precede regexps as do all exclusively
+ // prefix and binary operators.
+ {jsCtxRegexp, "="},
+ {jsCtxRegexp, "+="},
+ {jsCtxRegexp, "*="},
+ {jsCtxRegexp, "*"},
+ {jsCtxRegexp, "!"},
+ // Whether the + or - is infix or prefix, it cannot precede a
+ // div op.
+ {jsCtxRegexp, "+"},
+ {jsCtxRegexp, "-"},
+ // An incr/decr op precedes a div operator.
+ // This is not airtight. In (g = ++/h/i) a regexp follows a
+ // pre-increment operator, but in practice devs do not try to
+ // increment or decrement regular expressions.
+ // (g++/h/i) where ++ is a postfix operator on g is much more
+ // common.
+ {jsCtxDivOp, "--"},
+ {jsCtxDivOp, "++"},
+ {jsCtxDivOp, "x--"},
+ // When we have many dashes or pluses, then they are grouped
+ // left to right.
+ {jsCtxRegexp, "x---"}, // A postfix -- then a -.
+ // return followed by a slash returns the regexp literal or the
+ // slash starts a regexp literal in an expression statement that
+ // is dead code.
+ {jsCtxRegexp, "return"},
+ {jsCtxRegexp, "return "},
+ {jsCtxRegexp, "return\t"},
+ {jsCtxRegexp, "return\n"},
+ {jsCtxRegexp, "return\u2028"},
+ // Identifiers can be divided and cannot validly be preceded by
+ // a regular expressions. Semicolon insertion cannot happen
+ // between an identifier and a regular expression on a new line
+ // because the one token lookahead for semicolon insertion has
+ // to conclude that it could be a div binary op and treat it as
+ // such.
+ {jsCtxDivOp, "x"},
+ {jsCtxDivOp, "x "},
+ {jsCtxDivOp, "x\t"},
+ {jsCtxDivOp, "x\n"},
+ {jsCtxDivOp, "x\u2028"},
+ {jsCtxDivOp, "preturn"},
+ // Numbers precede div ops.
+ {jsCtxDivOp, "0"},
+ // Dots that are part of a number are div preceders.
+ {jsCtxDivOp, "0."},
+ }
+
+ for _, test := range tests {
+ if nextJSCtx([]byte(test.s), jsCtxRegexp) != test.jsCtx {
+ t.Errorf("want %s got %q", test.jsCtx, test.s)
+ }
+ if nextJSCtx([]byte(test.s), jsCtxDivOp) != test.jsCtx {
+ t.Errorf("want %s got %q", test.jsCtx, test.s)
+ }
+ }
+
+ if nextJSCtx([]byte(" "), jsCtxRegexp) != jsCtxRegexp {
+ t.Error("Blank tokens")
+ }
+
+ if nextJSCtx([]byte(" "), jsCtxDivOp) != jsCtxDivOp {
+ t.Error("Blank tokens")
+ }
+}
+
+func TestJSValEscaper(t *testing.T) {
+ tests := []struct {
+ x interface{}
+ js string
+ }{
+ {int(42), " 42 "},
+ {uint(42), " 42 "},
+ {int16(42), " 42 "},
+ {uint16(42), " 42 "},
+ {int32(-42), " -42 "},
+ {uint32(42), " 42 "},
+ {int16(-42), " -42 "},
+ {uint16(42), " 42 "},
+ {int64(-42), " -42 "},
+ {uint64(42), " 42 "},
+ {uint64(1) << 53, " 9007199254740992 "},
+ // ulp(1 << 53) > 1 so this loses precision in JS
+ // but it is still a representable integer literal.
+ {uint64(1)<<53 + 1, " 9007199254740993 "},
+ {float32(1.0), " 1 "},
+ {float32(-1.0), " -1 "},
+ {float32(0.5), " 0.5 "},
+ {float32(-0.5), " -0.5 "},
+ {float32(1.0) / float32(256), " 0.00390625 "},
+ {float32(0), " 0 "},
+ {math.Copysign(0, -1), " -0 "},
+ {float64(1.0), " 1 "},
+ {float64(-1.0), " -1 "},
+ {float64(0.5), " 0.5 "},
+ {float64(-0.5), " -0.5 "},
+ {float64(0), " 0 "},
+ {math.Copysign(0, -1), " -0 "},
+ {"", `""`},
+ {"foo", `"foo"`},
+ // Newlines.
+ {"\r\n\u2028\u2029", `"\r\n\u2028\u2029"`},
+ // "\v" == "v" on IE 6 so use "\x0b" instead.
+ {"\t\x0b", `"\u0009\u000b"`},
+ {struct{ X, Y int }{1, 2}, `{"X":1,"Y":2}`},
+ {[]interface{}{}, "[]"},
+ {[]interface{}{42, "foo", nil}, `[42,"foo",null]`},
+ {[]string{"<!--", "</script>", "-->"}, `["\u003c!--","\u003c/script\u003e","--\u003e"]`},
+ {"<!--", `"\u003c!--"`},
+ {"-->", `"--\u003e"`},
+ {"<![CDATA[", `"\u003c![CDATA["`},
+ {"]]>", `"]]\u003e"`},
+ {"</script", `"\u003c/script"`},
+ {"\U0001D11E", "\"\U0001D11E\""}, // or "\uD834\uDD1E"
+ }
+
+ for _, test := range tests {
+ if js := jsValEscaper(test.x); js != test.js {
+ t.Errorf("%+v: want\n\t%q\ngot\n\t%q", test.x, test.js, js)
+ }
+ // Make sure that escaping corner cases are not broken
+ // by nesting.
+ a := []interface{}{test.x}
+ want := "[" + strings.TrimSpace(test.js) + "]"
+ if js := jsValEscaper(a); js != want {
+ t.Errorf("%+v: want\n\t%q\ngot\n\t%q", a, want, js)
+ }
+ }
+}
+
+func TestJSStrEscaper(t *testing.T) {
+ tests := []struct {
+ x interface{}
+ esc string
+ }{
+ {"", ``},
+ {"foo", `foo`},
+ {"\u0000", `\0`},
+ {"\t", `\t`},
+ {"\n", `\n`},
+ {"\r", `\r`},
+ {"\u2028", `\u2028`},
+ {"\u2029", `\u2029`},
+ {"\\", `\\`},
+ {"\\n", `\\n`},
+ {"foo\r\nbar", `foo\r\nbar`},
+ // Preserve attribute boundaries.
+ {`"`, `\x22`},
+ {`'`, `\x27`},
+ // Allow embedding in HTML without further escaping.
+ {`&amp;`, `\x26amp;`},
+ // Prevent breaking out of text node and element boundaries.
+ {"</script>", `\x3c\/script\x3e`},
+ {"<![CDATA[", `\x3c![CDATA[`},
+ {"]]>", `]]\x3e`},
+ // http://dev.w3.org/html5/markup/aria/syntax.html#escaping-text-span
+ // "The text in style, script, title, and textarea elements
+ // must not have an escaping text span start that is not
+ // followed by an escaping text span end."
+ // Furthermore, spoofing an escaping text span end could lead
+ // to different interpretation of a </script> sequence otherwise
+ // masked by the escaping text span, and spoofing a start could
+ // allow regular text content to be interpreted as script
+ // allowing script execution via a combination of a JS string
+ // injection followed by an HTML text injection.
+ {"<!--", `\x3c!--`},
+ {"-->", `--\x3e`},
+ // From http://code.google.com/p/doctype/wiki/ArticleUtf7
+ {"+ADw-script+AD4-alert(1)+ADw-/script+AD4-",
+ `\x2bADw-script\x2bAD4-alert(1)\x2bADw-\/script\x2bAD4-`,
+ },
+ // Invalid UTF-8 sequence
+ {"foo\xA0bar", "foo\xA0bar"},
+ // Invalid unicode scalar value.
+ {"foo\xed\xa0\x80bar", "foo\xed\xa0\x80bar"},
+ }
+
+ for _, test := range tests {
+ esc := jsStrEscaper(test.x)
+ if esc != test.esc {
+ t.Errorf("%q: want %q got %q", test.x, test.esc, esc)
+ }
+ }
+}
+
+func TestJSRegexpEscaper(t *testing.T) {
+ tests := []struct {
+ x interface{}
+ esc string
+ }{
+ {"", `(?:)`},
+ {"foo", `foo`},
+ {"\u0000", `\0`},
+ {"\t", `\t`},
+ {"\n", `\n`},
+ {"\r", `\r`},
+ {"\u2028", `\u2028`},
+ {"\u2029", `\u2029`},
+ {"\\", `\\`},
+ {"\\n", `\\n`},
+ {"foo\r\nbar", `foo\r\nbar`},
+ // Preserve attribute boundaries.
+ {`"`, `\x22`},
+ {`'`, `\x27`},
+ // Allow embedding in HTML without further escaping.
+ {`&amp;`, `\x26amp;`},
+ // Prevent breaking out of text node and element boundaries.
+ {"</script>", `\x3c\/script\x3e`},
+ {"<![CDATA[", `\x3c!\[CDATA\[`},
+ {"]]>", `\]\]\x3e`},
+ // Escaping text spans.
+ {"<!--", `\x3c!\-\-`},
+ {"-->", `\-\-\x3e`},
+ {"*", `\*`},
+ {"+", `\x2b`},
+ {"?", `\?`},
+ {"[](){}", `\[\]\(\)\{\}`},
+ {"$foo|x.y", `\$foo\|x\.y`},
+ {"x^y", `x\^y`},
+ }
+
+ for _, test := range tests {
+ esc := jsRegexpEscaper(test.x)
+ if esc != test.esc {
+ t.Errorf("%q: want %q got %q", test.x, test.esc, esc)
+ }
+ }
+}
+
+func TestEscapersOnLower7AndSelectHighCodepoints(t *testing.T) {
+ input := ("\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f" +
+ "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+ ` !"#$%&'()*+,-./` +
+ `0123456789:;<=>?` +
+ `@ABCDEFGHIJKLMNO` +
+ `PQRSTUVWXYZ[\]^_` +
+ "`abcdefghijklmno" +
+ "pqrstuvwxyz{|}~\x7f" +
+ "\u00A0\u0100\u2028\u2029\ufeff\U0001D11E")
+
+ tests := []struct {
+ name string
+ escaper func(...interface{}) string
+ escaped string
+ }{
+ {
+ "jsStrEscaper",
+ jsStrEscaper,
+ "\\0\x01\x02\x03\x04\x05\x06\x07" +
+ "\x08\\t\\n\\x0b\\f\\r\x0E\x0F" +
+ "\x10\x11\x12\x13\x14\x15\x16\x17" +
+ "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+ ` !\x22#$%\x26\x27()*\x2b,-.\/` +
+ `0123456789:;\x3c=\x3e?` +
+ `@ABCDEFGHIJKLMNO` +
+ `PQRSTUVWXYZ[\\]^_` +
+ "`abcdefghijklmno" +
+ "pqrstuvwxyz{|}~\x7f" +
+ "\u00A0\u0100\\u2028\\u2029\ufeff\U0001D11E",
+ },
+ {
+ "jsRegexpEscaper",
+ jsRegexpEscaper,
+ "\\0\x01\x02\x03\x04\x05\x06\x07" +
+ "\x08\\t\\n\\x0b\\f\\r\x0E\x0F" +
+ "\x10\x11\x12\x13\x14\x15\x16\x17" +
+ "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+ ` !\x22#\$%\x26\x27\(\)\*\x2b,\-\.\/` +
+ `0123456789:;\x3c=\x3e\?` +
+ `@ABCDEFGHIJKLMNO` +
+ `PQRSTUVWXYZ\[\\\]\^_` +
+ "`abcdefghijklmno" +
+ `pqrstuvwxyz\{\|\}~` + "\u007f" +
+ "\u00A0\u0100\\u2028\\u2029\ufeff\U0001D11E",
+ },
+ }
+
+ for _, test := range tests {
+ if s := test.escaper(input); s != test.escaped {
+ t.Errorf("%s once: want\n\t%q\ngot\n\t%q", test.name, test.escaped, s)
+ continue
+ }
+
+ // Escape it rune by rune to make sure that any
+ // fast-path checking does not break escaping.
+ var buf bytes.Buffer
+ for _, c := range input {
+ buf.WriteString(test.escaper(string(c)))
+ }
+
+ if s := buf.String(); s != test.escaped {
+ t.Errorf("%s rune-wise: want\n\t%q\ngot\n\t%q", test.name, test.escaped, s)
+ continue
+ }
+ }
+}
+
+func BenchmarkJSValEscaperWithNum(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ jsValEscaper(3.141592654)
+ }
+}
+
+func BenchmarkJSValEscaperWithStr(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ jsValEscaper("The <i>quick</i>,\r\n<span style='color:brown'>brown</span> fox jumps\u2028over the <canine class=\"lazy\">dog</canine>")
+ }
+}
+
+func BenchmarkJSValEscaperWithStrNoSpecials(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ jsValEscaper("The quick, brown fox jumps over the lazy dog")
+ }
+}
+
+func BenchmarkJSValEscaperWithObj(b *testing.B) {
+ o := struct {
+ S string
+ N int
+ }{
+ "The <i>quick</i>,\r\n<span style='color:brown'>brown</span> fox jumps\u2028over the <canine class=\"lazy\">dog</canine>\u2028",
+ 42,
+ }
+ for i := 0; i < b.N; i++ {
+ jsValEscaper(o)
+ }
+}
+
+func BenchmarkJSValEscaperWithObjNoSpecials(b *testing.B) {
+ o := struct {
+ S string
+ N int
+ }{
+ "The quick, brown fox jumps over the lazy dog",
+ 42,
+ }
+ for i := 0; i < b.N; i++ {
+ jsValEscaper(o)
+ }
+}
+
+func BenchmarkJSStrEscaperNoSpecials(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ jsStrEscaper("The quick, brown fox jumps over the lazy dog.")
+ }
+}
+
+func BenchmarkJSStrEscaper(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ jsStrEscaper("The <i>quick</i>,\r\n<span style='color:brown'>brown</span> fox jumps\u2028over the <canine class=\"lazy\">dog</canine>")
+ }
+}
+
+func BenchmarkJSRegexpEscaperNoSpecials(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ jsRegexpEscaper("The quick, brown fox jumps over the lazy dog")
+ }
+}
+
+func BenchmarkJSRegexpEscaper(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ jsRegexpEscaper("The <i>quick</i>,\r\n<span style='color:brown'>brown</span> fox jumps\u2028over the <canine class=\"lazy\">dog</canine>")
+ }
+}
diff --git a/src/pkg/html/template/template.go b/src/pkg/html/template/template.go
new file mode 100644
index 000000000..9ffe41413
--- /dev/null
+++ b/src/pkg/html/template/template.go
@@ -0,0 +1,280 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "fmt"
+ "io"
+ "io/ioutil"
+ "path/filepath"
+ "sync"
+ "text/template"
+ "text/template/parse"
+)
+
+// Template is a specialized template.Template that produces a safe HTML
+// document fragment.
+type Template struct {
+ escaped bool
+ // We could embed the text/template field, but it's safer not to because
+ // we need to keep our version of the name space and the underlying
+ // template's in sync.
+ text *template.Template
+ *nameSpace // common to all associated templates
+}
+
+// nameSpace is the data structure shared by all templates in an association.
+type nameSpace struct {
+ mu sync.Mutex
+ set map[string]*Template
+}
+
+// Execute applies a parsed template to the specified data object,
+// writing the output to wr.
+func (t *Template) Execute(wr io.Writer, data interface{}) (err error) {
+ t.nameSpace.mu.Lock()
+ if !t.escaped {
+ if err = escapeTemplates(t, t.Name()); err != nil {
+ t.escaped = true
+ }
+ }
+ t.nameSpace.mu.Unlock()
+ if err != nil {
+ return
+ }
+ return t.text.Execute(wr, data)
+}
+
+// ExecuteTemplate applies the template associated with t that has the given
+// name to the specified data object and writes the output to wr.
+func (t *Template) ExecuteTemplate(wr io.Writer, name string, data interface{}) error {
+ tmpl, err := t.lookupAndEscapeTemplate(wr, name)
+ if err != nil {
+ return err
+ }
+ return tmpl.text.Execute(wr, data)
+}
+
+// lookupAndEscapeTemplate guarantees that the template with the given name
+// is escaped, or returns an error if it cannot be. It returns the named
+// template.
+func (t *Template) lookupAndEscapeTemplate(wr io.Writer, name string) (tmpl *Template, err error) {
+ t.nameSpace.mu.Lock()
+ defer t.nameSpace.mu.Unlock()
+ tmpl = t.set[name]
+ if (tmpl == nil) != (t.text.Lookup(name) == nil) {
+ panic("html/template internal error: template escaping out of sync")
+ }
+ if tmpl != nil && !tmpl.escaped {
+ err = escapeTemplates(tmpl, name)
+ }
+ return tmpl, err
+}
+
+// Parse parses a string into a template. Nested template definitions
+// will be associated with the top-level template t. Parse may be
+// called multiple times to parse definitions of templates to associate
+// with t. It is an error if a resulting template is non-empty (contains
+// content other than template definitions) and would replace a
+// non-empty template with the same name. (In multiple calls to Parse
+// with the same receiver template, only one call can contain text
+// other than space, comments, and template definitions.)
+func (t *Template) Parse(src string) (*Template, error) {
+ t.nameSpace.mu.Lock()
+ t.escaped = false
+ t.nameSpace.mu.Unlock()
+ ret, err := t.text.Parse(src)
+ if err != nil {
+ return nil, err
+ }
+ // In general, all the named templates might have changed underfoot.
+ // Regardless, some new ones may have been defined.
+ // The template.Template set has been updated; update ours.
+ t.nameSpace.mu.Lock()
+ defer t.nameSpace.mu.Unlock()
+ for _, v := range ret.Templates() {
+ name := v.Name()
+ tmpl := t.set[name]
+ if tmpl == nil {
+ tmpl = t.new(name)
+ }
+ tmpl.escaped = false
+ tmpl.text = v
+ }
+ return t, nil
+}
+
+// AddParseTree is unimplemented.
+func (t *Template) AddParseTree(name string, tree *parse.Tree) error {
+ return fmt.Errorf("html/template: AddParseTree unimplemented")
+}
+
+// Clone is unimplemented.
+func (t *Template) Clone(name string) error {
+ return fmt.Errorf("html/template: Clone unimplemented")
+}
+
+// New allocates a new HTML template with the given name.
+func New(name string) *Template {
+ tmpl := &Template{
+ false,
+ template.New(name),
+ &nameSpace{
+ set: make(map[string]*Template),
+ },
+ }
+ tmpl.set[name] = tmpl
+ return tmpl
+}
+
+// New allocates a new HTML template associated with the given one
+// and with the same delimiters. The association, which is transitive,
+// allows one template to invoke another with a {{template}} action.
+func (t *Template) New(name string) *Template {
+ t.nameSpace.mu.Lock()
+ defer t.nameSpace.mu.Unlock()
+ return t.new(name)
+}
+
+// new is the implementation of New, without the lock.
+func (t *Template) new(name string) *Template {
+ tmpl := &Template{
+ false,
+ t.text.New(name),
+ t.nameSpace,
+ }
+ tmpl.set[name] = tmpl
+ return tmpl
+}
+
+// Name returns the name of the template.
+func (t *Template) Name() string {
+ return t.text.Name()
+}
+
+// FuncMap is the type of the map defining the mapping from names to
+// functions. Each function must have either a single return value, or two
+// return values of which the second has type error. In that case, if the
+// second (error) argument evaluates to non-nil during execution, execution
+// terminates and Execute returns that error. FuncMap has the same base type
+// as template.FuncMap, copied here so clients need not import "text/template".
+type FuncMap map[string]interface{}
+
+// Funcs adds the elements of the argument map to the template's function map.
+// It panics if a value in the map is not a function with appropriate return
+// type. However, it is legal to overwrite elements of the map. The return
+// value is the template, so calls can be chained.
+func (t *Template) Funcs(funcMap FuncMap) *Template {
+ t.text.Funcs(template.FuncMap(funcMap))
+ return t
+}
+
+// Delims sets the action delimiters to the specified strings, to be used in
+// subsequent calls to Parse, ParseFiles, or ParseGlob. Nested template
+// definitions will inherit the settings. An empty delimiter stands for the
+// corresponding default: {{ or }}.
+// The return value is the template, so calls can be chained.
+func (t *Template) Delims(left, right string) *Template {
+ t.text.Delims(left, right)
+ return t
+}
+
+// Lookup returns the template with the given name that is associated with t,
+// or nil if there is no such template.
+func (t *Template) Lookup(name string) *Template {
+ t.nameSpace.mu.Lock()
+ defer t.nameSpace.mu.Unlock()
+ return t.set[name]
+}
+
+// Must panics if err is non-nil in the same way as template.Must.
+func Must(t *Template, err error) *Template {
+ if err != nil {
+ panic(err)
+ }
+ return t
+}
+
+// ParseFiles creates a new Template and parses the template definitions from
+// the named files. The returned template's name will have the (base) name and
+// (parsed) contents of the first file. There must be at least one file.
+// If an error occurs, parsing stops and the returned *Template is nil.
+func ParseFiles(filenames ...string) (*Template, error) {
+ return parseFiles(nil, filenames...)
+}
+
+// ParseFiles parses the named files and associates the resulting templates with
+// t. If an error occurs, parsing stops and the returned template is nil;
+// otherwise it is t. There must be at least one file.
+func (t *Template) ParseFiles(filenames ...string) (*Template, error) {
+ return parseFiles(t, filenames...)
+}
+
+// parseFiles is the helper for the method and function. If the argument
+// template is nil, it is created from the first file.
+func parseFiles(t *Template, filenames ...string) (*Template, error) {
+ if len(filenames) == 0 {
+ // Not really a problem, but be consistent.
+ return nil, fmt.Errorf("template: no files named in call to ParseFiles")
+ }
+ for _, filename := range filenames {
+ b, err := ioutil.ReadFile(filename)
+ if err != nil {
+ return nil, err
+ }
+ s := string(b)
+ name := filepath.Base(filename)
+ // First template becomes return value if not already defined,
+ // and we use that one for subsequent New calls to associate
+ // all the templates together. Also, if this file has the same name
+ // as t, this file becomes the contents of t, so
+ // t, err := New(name).Funcs(xxx).ParseFiles(name)
+ // works. Otherwise we create a new template associated with t.
+ var tmpl *Template
+ if t == nil {
+ t = New(name)
+ }
+ if name == t.Name() {
+ tmpl = t
+ } else {
+ tmpl = t.New(name)
+ }
+ _, err = tmpl.Parse(s)
+ if err != nil {
+ return nil, err
+ }
+ }
+ return t, nil
+}
+
+// ParseGlob creates a new Template and parses the template definitions from the
+// files identified by the pattern, which must match at least one file. The
+// returned template will have the (base) name and (parsed) contents of the
+// first file matched by the pattern. ParseGlob is equivalent to calling
+// ParseFiles with the list of files matched by the pattern.
+func ParseGlob(pattern string) (*Template, error) {
+ return parseGlob(nil, pattern)
+}
+
+// ParseGlob parses the template definitions in the files identified by the
+// pattern and associates the resulting templates with t. The pattern is
+// processed by filepath.Glob and must match at least one file. ParseGlob is
+// equivalent to calling t.ParseFiles with the list of files matched by the
+// pattern.
+func (t *Template) ParseGlob(pattern string) (*Template, error) {
+ return parseGlob(t, pattern)
+}
+
+// parseGlob is the implementation of the function and method ParseGlob.
+func parseGlob(t *Template, pattern string) (*Template, error) {
+ filenames, err := filepath.Glob(pattern)
+ if err != nil {
+ return nil, err
+ }
+ if len(filenames) == 0 {
+ return nil, fmt.Errorf("template: pattern matches no files: %#q", pattern)
+ }
+ return parseFiles(t, filenames...)
+}
diff --git a/src/pkg/html/template/transition.go b/src/pkg/html/template/transition.go
new file mode 100644
index 000000000..96a4f6678
--- /dev/null
+++ b/src/pkg/html/template/transition.go
@@ -0,0 +1,553 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "bytes"
+ "strings"
+)
+
+// transitionFunc is the array of context transition functions for text nodes.
+// A transition function takes a context and template text input, and returns
+// the updated context and the number of bytes consumed from the front of the
+// input.
+var transitionFunc = [...]func(context, []byte) (context, int){
+ stateText: tText,
+ stateTag: tTag,
+ stateAttrName: tAttrName,
+ stateAfterName: tAfterName,
+ stateBeforeValue: tBeforeValue,
+ stateHTMLCmt: tHTMLCmt,
+ stateRCDATA: tSpecialTagEnd,
+ stateAttr: tAttr,
+ stateURL: tURL,
+ stateJS: tJS,
+ stateJSDqStr: tJSDelimited,
+ stateJSSqStr: tJSDelimited,
+ stateJSRegexp: tJSDelimited,
+ stateJSBlockCmt: tBlockCmt,
+ stateJSLineCmt: tLineCmt,
+ stateCSS: tCSS,
+ stateCSSDqStr: tCSSStr,
+ stateCSSSqStr: tCSSStr,
+ stateCSSDqURL: tCSSStr,
+ stateCSSSqURL: tCSSStr,
+ stateCSSURL: tCSSStr,
+ stateCSSBlockCmt: tBlockCmt,
+ stateCSSLineCmt: tLineCmt,
+ stateError: tError,
+}
+
+var commentStart = []byte("<!--")
+var commentEnd = []byte("-->")
+
+// tText is the context transition function for the text state.
+func tText(c context, s []byte) (context, int) {
+ k := 0
+ for {
+ i := k + bytes.IndexByte(s[k:], '<')
+ if i < k || i+1 == len(s) {
+ return c, len(s)
+ } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
+ return context{state: stateHTMLCmt}, i + 4
+ }
+ i++
+ end := false
+ if s[i] == '/' {
+ if i+1 == len(s) {
+ return c, len(s)
+ }
+ end, i = true, i+1
+ }
+ j, e := eatTagName(s, i)
+ if j != i {
+ if end {
+ e = elementNone
+ }
+ // We've found an HTML tag.
+ return context{state: stateTag, element: e}, j
+ }
+ k = j
+ }
+ panic("unreachable")
+}
+
+var elementContentType = [...]state{
+ elementNone: stateText,
+ elementScript: stateJS,
+ elementStyle: stateCSS,
+ elementTextarea: stateRCDATA,
+ elementTitle: stateRCDATA,
+}
+
+// tTag is the context transition function for the tag state.
+func tTag(c context, s []byte) (context, int) {
+ // Find the attribute name.
+ i := eatWhiteSpace(s, 0)
+ if i == len(s) {
+ return c, len(s)
+ }
+ if s[i] == '>' {
+ return context{
+ state: elementContentType[c.element],
+ element: c.element,
+ }, i + 1
+ }
+ j, err := eatAttrName(s, i)
+ if err != nil {
+ return context{state: stateError, err: err}, len(s)
+ }
+ state, attr := stateTag, attrNone
+ if i == j {
+ return context{
+ state: stateError,
+ err: errorf(ErrBadHTML, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
+ }, len(s)
+ }
+ switch attrType(string(s[i:j])) {
+ case contentTypeURL:
+ attr = attrURL
+ case contentTypeCSS:
+ attr = attrStyle
+ case contentTypeJS:
+ attr = attrScript
+ }
+ if j == len(s) {
+ state = stateAttrName
+ } else {
+ state = stateAfterName
+ }
+ return context{state: state, element: c.element, attr: attr}, j
+}
+
+// tAttrName is the context transition function for stateAttrName.
+func tAttrName(c context, s []byte) (context, int) {
+ i, err := eatAttrName(s, 0)
+ if err != nil {
+ return context{state: stateError, err: err}, len(s)
+ } else if i != len(s) {
+ c.state = stateAfterName
+ }
+ return c, i
+}
+
+// tAfterName is the context transition function for stateAfterName.
+func tAfterName(c context, s []byte) (context, int) {
+ // Look for the start of the value.
+ i := eatWhiteSpace(s, 0)
+ if i == len(s) {
+ return c, len(s)
+ } else if s[i] != '=' {
+ // Occurs due to tag ending '>', and valueless attribute.
+ c.state = stateTag
+ return c, i
+ }
+ c.state = stateBeforeValue
+ // Consume the "=".
+ return c, i + 1
+}
+
+var attrStartStates = [...]state{
+ attrNone: stateAttr,
+ attrScript: stateJS,
+ attrStyle: stateCSS,
+ attrURL: stateURL,
+}
+
+// tBeforeValue is the context transition function for stateBeforeValue.
+func tBeforeValue(c context, s []byte) (context, int) {
+ i := eatWhiteSpace(s, 0)
+ if i == len(s) {
+ return c, len(s)
+ }
+ // Find the attribute delimiter.
+ delim := delimSpaceOrTagEnd
+ switch s[i] {
+ case '\'':
+ delim, i = delimSingleQuote, i+1
+ case '"':
+ delim, i = delimDoubleQuote, i+1
+ }
+ c.state, c.delim, c.attr = attrStartStates[c.attr], delim, attrNone
+ return c, i
+}
+
+// tHTMLCmt is the context transition function for stateHTMLCmt.
+func tHTMLCmt(c context, s []byte) (context, int) {
+ if i := bytes.Index(s, commentEnd); i != -1 {
+ return context{}, i + 3
+ }
+ return c, len(s)
+}
+
+// specialTagEndMarkers maps element types to the character sequence that
+// case-insensitively signals the end of the special tag body.
+var specialTagEndMarkers = [...]string{
+ elementScript: "</script",
+ elementStyle: "</style",
+ elementTextarea: "</textarea",
+ elementTitle: "</title",
+}
+
+// tSpecialTagEnd is the context transition function for raw text and RCDATA
+// element states.
+func tSpecialTagEnd(c context, s []byte) (context, int) {
+ if c.element != elementNone {
+ if i := strings.Index(strings.ToLower(string(s)), specialTagEndMarkers[c.element]); i != -1 {
+ return context{}, i
+ }
+ }
+ return c, len(s)
+}
+
+// tAttr is the context transition function for the attribute state.
+func tAttr(c context, s []byte) (context, int) {
+ return c, len(s)
+}
+
+// tURL is the context transition function for the URL state.
+func tURL(c context, s []byte) (context, int) {
+ if bytes.IndexAny(s, "#?") >= 0 {
+ c.urlPart = urlPartQueryOrFrag
+ } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
+ // HTML5 uses "Valid URL potentially surrounded by spaces" for
+ // attrs: http://www.w3.org/TR/html5/index.html#attributes-1
+ c.urlPart = urlPartPreQuery
+ }
+ return c, len(s)
+}
+
+// tJS is the context transition function for the JS state.
+func tJS(c context, s []byte) (context, int) {
+ i := bytes.IndexAny(s, `"'/`)
+ if i == -1 {
+ // Entire input is non string, comment, regexp tokens.
+ c.jsCtx = nextJSCtx(s, c.jsCtx)
+ return c, len(s)
+ }
+ c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
+ switch s[i] {
+ case '"':
+ c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
+ case '\'':
+ c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
+ case '/':
+ switch {
+ case i+1 < len(s) && s[i+1] == '/':
+ c.state, i = stateJSLineCmt, i+1
+ case i+1 < len(s) && s[i+1] == '*':
+ c.state, i = stateJSBlockCmt, i+1
+ case c.jsCtx == jsCtxRegexp:
+ c.state = stateJSRegexp
+ case c.jsCtx == jsCtxDivOp:
+ c.jsCtx = jsCtxRegexp
+ default:
+ return context{
+ state: stateError,
+ err: errorf(ErrSlashAmbig, 0, "'/' could start a division or regexp: %.32q", s[i:]),
+ }, len(s)
+ }
+ default:
+ panic("unreachable")
+ }
+ return c, i + 1
+}
+
+// tJSDelimited is the context transition function for the JS string and regexp
+// states.
+func tJSDelimited(c context, s []byte) (context, int) {
+ specials := `\"`
+ switch c.state {
+ case stateJSSqStr:
+ specials = `\'`
+ case stateJSRegexp:
+ specials = `\/[]`
+ }
+
+ k, inCharset := 0, false
+ for {
+ i := k + bytes.IndexAny(s[k:], specials)
+ if i < k {
+ break
+ }
+ switch s[i] {
+ case '\\':
+ i++
+ if i == len(s) {
+ return context{
+ state: stateError,
+ err: errorf(ErrPartialEscape, 0, "unfinished escape sequence in JS string: %q", s),
+ }, len(s)
+ }
+ case '[':
+ inCharset = true
+ case ']':
+ inCharset = false
+ default:
+ // end delimiter
+ if !inCharset {
+ c.state, c.jsCtx = stateJS, jsCtxDivOp
+ return c, i + 1
+ }
+ }
+ k = i + 1
+ }
+
+ if inCharset {
+ // This can be fixed by making context richer if interpolation
+ // into charsets is desired.
+ return context{
+ state: stateError,
+ err: errorf(ErrPartialCharset, 0, "unfinished JS regexp charset: %q", s),
+ }, len(s)
+ }
+
+ return c, len(s)
+}
+
+var blockCommentEnd = []byte("*/")
+
+// tBlockCmt is the context transition function for /*comment*/ states.
+func tBlockCmt(c context, s []byte) (context, int) {
+ i := bytes.Index(s, blockCommentEnd)
+ if i == -1 {
+ return c, len(s)
+ }
+ switch c.state {
+ case stateJSBlockCmt:
+ c.state = stateJS
+ case stateCSSBlockCmt:
+ c.state = stateCSS
+ default:
+ panic(c.state.String())
+ }
+ return c, i + 2
+}
+
+// tLineCmt is the context transition function for //comment states.
+func tLineCmt(c context, s []byte) (context, int) {
+ var lineTerminators string
+ var endState state
+ switch c.state {
+ case stateJSLineCmt:
+ lineTerminators, endState = "\n\r\u2028\u2029", stateJS
+ case stateCSSLineCmt:
+ lineTerminators, endState = "\n\f\r", stateCSS
+ // Line comments are not part of any published CSS standard but
+ // are supported by the 4 major browsers.
+ // This defines line comments as
+ // LINECOMMENT ::= "//" [^\n\f\d]*
+ // since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
+ // newlines:
+ // nl ::= #xA | #xD #xA | #xD | #xC
+ default:
+ panic(c.state.String())
+ }
+
+ i := bytes.IndexAny(s, lineTerminators)
+ if i == -1 {
+ return c, len(s)
+ }
+ c.state = endState
+ // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
+ // "However, the LineTerminator at the end of the line is not
+ // considered to be part of the single-line comment; it is
+ // recognized separately by the lexical grammar and becomes part
+ // of the stream of input elements for the syntactic grammar."
+ return c, i
+}
+
+// tCSS is the context transition function for the CSS state.
+func tCSS(c context, s []byte) (context, int) {
+ // CSS quoted strings are almost never used except for:
+ // (1) URLs as in background: "/foo.png"
+ // (2) Multiword font-names as in font-family: "Times New Roman"
+ // (3) List separators in content values as in inline-lists:
+ // <style>
+ // ul.inlineList { list-style: none; padding:0 }
+ // ul.inlineList > li { display: inline }
+ // ul.inlineList > li:before { content: ", " }
+ // ul.inlineList > li:first-child:before { content: "" }
+ // </style>
+ // <ul class=inlineList><li>One<li>Two<li>Three</ul>
+ // (4) Attribute value selectors as in a[href="http://example.com/"]
+ //
+ // We conservatively treat all strings as URLs, but make some
+ // allowances to avoid confusion.
+ //
+ // In (1), our conservative assumption is justified.
+ // In (2), valid font names do not contain ':', '?', or '#', so our
+ // conservative assumption is fine since we will never transition past
+ // urlPartPreQuery.
+ // In (3), our protocol heuristic should not be tripped, and there
+ // should not be non-space content after a '?' or '#', so as long as
+ // we only %-encode RFC 3986 reserved characters we are ok.
+ // In (4), we should URL escape for URL attributes, and for others we
+ // have the attribute name available if our conservative assumption
+ // proves problematic for real code.
+
+ k := 0
+ for {
+ i := k + bytes.IndexAny(s[k:], `("'/`)
+ if i < k {
+ return c, len(s)
+ }
+ switch s[i] {
+ case '(':
+ // Look for url to the left.
+ p := bytes.TrimRight(s[:i], "\t\n\f\r ")
+ if endsWithCSSKeyword(p, "url") {
+ j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
+ switch {
+ case j != len(s) && s[j] == '"':
+ c.state, j = stateCSSDqURL, j+1
+ case j != len(s) && s[j] == '\'':
+ c.state, j = stateCSSSqURL, j+1
+ default:
+ c.state = stateCSSURL
+ }
+ return c, j
+ }
+ case '/':
+ if i+1 < len(s) {
+ switch s[i+1] {
+ case '/':
+ c.state = stateCSSLineCmt
+ return c, i + 2
+ case '*':
+ c.state = stateCSSBlockCmt
+ return c, i + 2
+ }
+ }
+ case '"':
+ c.state = stateCSSDqStr
+ return c, i + 1
+ case '\'':
+ c.state = stateCSSSqStr
+ return c, i + 1
+ }
+ k = i + 1
+ }
+ panic("unreachable")
+}
+
+// tCSSStr is the context transition function for the CSS string and URL states.
+func tCSSStr(c context, s []byte) (context, int) {
+ var endAndEsc string
+ switch c.state {
+ case stateCSSDqStr, stateCSSDqURL:
+ endAndEsc = `\"`
+ case stateCSSSqStr, stateCSSSqURL:
+ endAndEsc = `\'`
+ case stateCSSURL:
+ // Unquoted URLs end with a newline or close parenthesis.
+ // The below includes the wc (whitespace character) and nl.
+ endAndEsc = "\\\t\n\f\r )"
+ default:
+ panic(c.state.String())
+ }
+
+ k := 0
+ for {
+ i := k + bytes.IndexAny(s[k:], endAndEsc)
+ if i < k {
+ c, nread := tURL(c, decodeCSS(s[k:]))
+ return c, k + nread
+ }
+ if s[i] == '\\' {
+ i++
+ if i == len(s) {
+ return context{
+ state: stateError,
+ err: errorf(ErrPartialEscape, 0, "unfinished escape sequence in CSS string: %q", s),
+ }, len(s)
+ }
+ } else {
+ c.state = stateCSS
+ return c, i + 1
+ }
+ c, _ = tURL(c, decodeCSS(s[:i+1]))
+ k = i + 1
+ }
+ panic("unreachable")
+}
+
+// tError is the context transition function for the error state.
+func tError(c context, s []byte) (context, int) {
+ return c, len(s)
+}
+
+// eatAttrName returns the largest j such that s[i:j] is an attribute name.
+// It returns an error if s[i:] does not look like it begins with an
+// attribute name, such as encountering a quote mark without a preceding
+// equals sign.
+func eatAttrName(s []byte, i int) (int, *Error) {
+ for j := i; j < len(s); j++ {
+ switch s[j] {
+ case ' ', '\t', '\n', '\f', '\r', '=', '>':
+ return j, nil
+ case '\'', '"', '<':
+ // These result in a parse warning in HTML5 and are
+ // indicative of serious problems if seen in an attr
+ // name in a template.
+ return -1, errorf(ErrBadHTML, 0, "%q in attribute name: %.32q", s[j:j+1], s)
+ default:
+ // No-op.
+ }
+ }
+ return len(s), nil
+}
+
+var elementNameMap = map[string]element{
+ "script": elementScript,
+ "style": elementStyle,
+ "textarea": elementTextarea,
+ "title": elementTitle,
+}
+
+// asciiAlpha returns whether c is an ASCII letter.
+func asciiAlpha(c byte) bool {
+ return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
+}
+
+// asciiAlphaNum returns whether c is an ASCII letter or digit.
+func asciiAlphaNum(c byte) bool {
+ return asciiAlpha(c) || '0' <= c && c <= '9'
+}
+
+// eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
+func eatTagName(s []byte, i int) (int, element) {
+ if i == len(s) || !asciiAlpha(s[i]) {
+ return i, elementNone
+ }
+ j := i + 1
+ for j < len(s) {
+ x := s[j]
+ if asciiAlphaNum(x) {
+ j++
+ continue
+ }
+ // Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
+ if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
+ j += 2
+ continue
+ }
+ break
+ }
+ return j, elementNameMap[strings.ToLower(string(s[i:j]))]
+}
+
+// eatWhiteSpace returns the largest j such that s[i:j] is white space.
+func eatWhiteSpace(s []byte, i int) int {
+ for j := i; j < len(s); j++ {
+ switch s[j] {
+ case ' ', '\t', '\n', '\f', '\r':
+ // No-op.
+ default:
+ return j
+ }
+ }
+ return len(s)
+}
diff --git a/src/pkg/html/template/url.go b/src/pkg/html/template/url.go
new file mode 100644
index 000000000..454c791ec
--- /dev/null
+++ b/src/pkg/html/template/url.go
@@ -0,0 +1,105 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "bytes"
+ "fmt"
+ "strings"
+)
+
+// urlFilter returns its input unless it contains an unsafe protocol in which
+// case it defangs the entire URL.
+func urlFilter(args ...interface{}) string {
+ s, t := stringify(args...)
+ if t == contentTypeURL {
+ return s
+ }
+ if i := strings.IndexRune(s, ':'); i >= 0 && strings.IndexRune(s[:i], '/') < 0 {
+ protocol := strings.ToLower(s[:i])
+ if protocol != "http" && protocol != "https" && protocol != "mailto" {
+ return "#" + filterFailsafe
+ }
+ }
+ return s
+}
+
+// urlEscaper produces an output that can be embedded in a URL query.
+// The output can be embedded in an HTML attribute without further escaping.
+func urlEscaper(args ...interface{}) string {
+ return urlProcessor(false, args...)
+}
+
+// urlEscaper normalizes URL content so it can be embedded in a quote-delimited
+// string or parenthesis delimited url(...).
+// The normalizer does not encode all HTML specials. Specifically, it does not
+// encode '&' so correct embedding in an HTML attribute requires escaping of
+// '&' to '&amp;'.
+func urlNormalizer(args ...interface{}) string {
+ return urlProcessor(true, args...)
+}
+
+// urlProcessor normalizes (when norm is true) or escapes its input to produce
+// a valid hierarchical or opaque URL part.
+func urlProcessor(norm bool, args ...interface{}) string {
+ s, t := stringify(args...)
+ if t == contentTypeURL {
+ norm = true
+ }
+ var b bytes.Buffer
+ written := 0
+ // The byte loop below assumes that all URLs use UTF-8 as the
+ // content-encoding. This is similar to the URI to IRI encoding scheme
+ // defined in section 3.1 of RFC 3987, and behaves the same as the
+ // EcmaScript builtin encodeURIComponent.
+ // It should not cause any misencoding of URLs in pages with
+ // Content-type: text/html;charset=UTF-8.
+ for i, n := 0, len(s); i < n; i++ {
+ c := s[i]
+ switch c {
+ // Single quote and parens are sub-delims in RFC 3986, but we
+ // escape them so the output can be embedded in in single
+ // quoted attributes and unquoted CSS url(...) constructs.
+ // Single quotes are reserved in URLs, but are only used in
+ // the obsolete "mark" rule in an appendix in RFC 3986
+ // so can be safely encoded.
+ case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']':
+ if norm {
+ continue
+ }
+ // Unreserved according to RFC 3986 sec 2.3
+ // "For consistency, percent-encoded octets in the ranges of
+ // ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
+ // period (%2E), underscore (%5F), or tilde (%7E) should not be
+ // created by URI producers
+ case '-', '.', '_', '~':
+ continue
+ case '%':
+ // When normalizing do not re-encode valid escapes.
+ if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) {
+ continue
+ }
+ default:
+ // Unreserved according to RFC 3986 sec 2.3
+ if 'a' <= c && c <= 'z' {
+ continue
+ }
+ if 'A' <= c && c <= 'Z' {
+ continue
+ }
+ if '0' <= c && c <= '9' {
+ continue
+ }
+ }
+ b.WriteString(s[written:i])
+ fmt.Fprintf(&b, "%%%02x", c)
+ written = i + 1
+ }
+ if written == 0 {
+ return s
+ }
+ b.WriteString(s[written:])
+ return b.String()
+}
diff --git a/src/pkg/html/template/url_test.go b/src/pkg/html/template/url_test.go
new file mode 100644
index 000000000..5182e9d79
--- /dev/null
+++ b/src/pkg/html/template/url_test.go
@@ -0,0 +1,112 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "testing"
+)
+
+func TestURLNormalizer(t *testing.T) {
+ tests := []struct {
+ url, want string
+ }{
+ {"", ""},
+ {
+ "http://example.com:80/foo/bar?q=foo%20&bar=x+y#frag",
+ "http://example.com:80/foo/bar?q=foo%20&bar=x+y#frag",
+ },
+ {" ", "%20"},
+ {"%7c", "%7c"},
+ {"%7C", "%7C"},
+ {"%2", "%252"},
+ {"%", "%25"},
+ {"%z", "%25z"},
+ {"/foo|bar/%5c\u1234", "/foo%7cbar/%5c%e1%88%b4"},
+ }
+ for _, test := range tests {
+ if got := urlNormalizer(test.url); test.want != got {
+ t.Errorf("%q: want\n\t%q\nbut got\n\t%q", test.url, test.want, got)
+ }
+ if test.want != urlNormalizer(test.want) {
+ t.Errorf("not idempotent: %q", test.want)
+ }
+ }
+}
+
+func TestURLFilters(t *testing.T) {
+ input := ("\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f" +
+ "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+ ` !"#$%&'()*+,-./` +
+ `0123456789:;<=>?` +
+ `@ABCDEFGHIJKLMNO` +
+ `PQRSTUVWXYZ[\]^_` +
+ "`abcdefghijklmno" +
+ "pqrstuvwxyz{|}~\x7f" +
+ "\u00A0\u0100\u2028\u2029\ufeff\U0001D11E")
+
+ tests := []struct {
+ name string
+ escaper func(...interface{}) string
+ escaped string
+ }{
+ {
+ "urlEscaper",
+ urlEscaper,
+ "%00%01%02%03%04%05%06%07%08%09%0a%0b%0c%0d%0e%0f" +
+ "%10%11%12%13%14%15%16%17%18%19%1a%1b%1c%1d%1e%1f" +
+ "%20%21%22%23%24%25%26%27%28%29%2a%2b%2c-.%2f" +
+ "0123456789%3a%3b%3c%3d%3e%3f" +
+ "%40ABCDEFGHIJKLMNO" +
+ "PQRSTUVWXYZ%5b%5c%5d%5e_" +
+ "%60abcdefghijklmno" +
+ "pqrstuvwxyz%7b%7c%7d~%7f" +
+ "%c2%a0%c4%80%e2%80%a8%e2%80%a9%ef%bb%bf%f0%9d%84%9e",
+ },
+ {
+ "urlNormalizer",
+ urlNormalizer,
+ "%00%01%02%03%04%05%06%07%08%09%0a%0b%0c%0d%0e%0f" +
+ "%10%11%12%13%14%15%16%17%18%19%1a%1b%1c%1d%1e%1f" +
+ "%20!%22#$%25&%27%28%29*+,-./" +
+ "0123456789:;%3c=%3e?" +
+ "@ABCDEFGHIJKLMNO" +
+ "PQRSTUVWXYZ[%5c]%5e_" +
+ "%60abcdefghijklmno" +
+ "pqrstuvwxyz%7b%7c%7d~%7f" +
+ "%c2%a0%c4%80%e2%80%a8%e2%80%a9%ef%bb%bf%f0%9d%84%9e",
+ },
+ }
+
+ for _, test := range tests {
+ if s := test.escaper(input); s != test.escaped {
+ t.Errorf("%s: want\n\t%q\ngot\n\t%q", test.name, test.escaped, s)
+ continue
+ }
+ }
+}
+
+func BenchmarkURLEscaper(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ urlEscaper("http://example.com:80/foo?q=bar%20&baz=x+y#frag")
+ }
+}
+
+func BenchmarkURLEscaperNoSpecials(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ urlEscaper("TheQuickBrownFoxJumpsOverTheLazyDog.")
+ }
+}
+
+func BenchmarkURLNormalizer(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ urlNormalizer("The quick brown fox jumps over the lazy dog.\n")
+ }
+}
+
+func BenchmarkURLNormalizerNoSpecials(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ urlNormalizer("http://example.com:80/foo?q=bar%20&baz=x+y#frag")
+ }
+}
diff --git a/src/pkg/html/testdata/webkit/README b/src/pkg/html/testdata/webkit/README
deleted file mode 100644
index 9b4c2d8be..000000000
--- a/src/pkg/html/testdata/webkit/README
+++ /dev/null
@@ -1,28 +0,0 @@
-The *.dat files in this directory are copied from The WebKit Open Source
-Project, specifically $WEBKITROOT/LayoutTests/html5lib/resources.
-WebKit is licensed under a BSD style license.
-http://webkit.org/coding/bsd-license.html says:
-
-Copyright (C) 2009 Apple Inc. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright notice,
-this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright notice,
-this list of conditions and the following disclaimer in the documentation
-and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS "AS IS" AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
diff --git a/src/pkg/html/testdata/webkit/adoption01.dat b/src/pkg/html/testdata/webkit/adoption01.dat
deleted file mode 100644
index 787e1b01e..000000000
--- a/src/pkg/html/testdata/webkit/adoption01.dat
+++ /dev/null
@@ -1,194 +0,0 @@
-#data
-<a><p></a></p>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| <p>
-| <a>
-
-#data
-<a>1<p>2</a>3</p>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| "1"
-| <p>
-| <a>
-| "2"
-| "3"
-
-#data
-<a>1<button>2</a>3</button>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| "1"
-| <button>
-| <a>
-| "2"
-| "3"
-
-#data
-<a>1<b>2</a>3</b>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| "1"
-| <b>
-| "2"
-| <b>
-| "3"
-
-#data
-<a>1<div>2<div>3</a>4</div>5</div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| "1"
-| <div>
-| <a>
-| "2"
-| <div>
-| <a>
-| "3"
-| "4"
-| "5"
-
-#data
-<table><a>1<p>2</a>3</p>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| "1"
-| <p>
-| <a>
-| "2"
-| "3"
-| <table>
-
-#data
-<b><b><a><p></a>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <b>
-| <b>
-| <a>
-| <p>
-| <a>
-
-#data
-<b><a><b><p></a>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <b>
-| <a>
-| <b>
-| <b>
-| <p>
-| <a>
-
-#data
-<a><b><b><p></a>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| <b>
-| <b>
-| <b>
-| <b>
-| <p>
-| <a>
-
-#data
-<p>1<s id="A">2<b id="B">3</p>4</s>5</b>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <p>
-| "1"
-| <s>
-| id="A"
-| "2"
-| <b>
-| id="B"
-| "3"
-| <s>
-| id="A"
-| <b>
-| id="B"
-| "4"
-| <b>
-| id="B"
-| "5"
-
-#data
-<table><a>1<td>2</td>3</table>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| "1"
-| <a>
-| "3"
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| "2"
-
-#data
-<table>A<td>B</td>C</table>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "AC"
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| "B"
-
-#data
-<a><svg><tr><input></a>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| <svg svg>
-| <svg tr>
-| <svg input>
diff --git a/src/pkg/html/testdata/webkit/adoption02.dat b/src/pkg/html/testdata/webkit/adoption02.dat
deleted file mode 100644
index d18151b44..000000000
--- a/src/pkg/html/testdata/webkit/adoption02.dat
+++ /dev/null
@@ -1,31 +0,0 @@
-#data
-<b>1<i>2<p>3</b>4
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <b>
-| "1"
-| <i>
-| "2"
-| <i>
-| <p>
-| <b>
-| "3"
-| "4"
-
-#data
-<a><div><style></style><address><a>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| <div>
-| <a>
-| <style>
-| <address>
-| <a>
-| <a>
diff --git a/src/pkg/html/testdata/webkit/comments01.dat b/src/pkg/html/testdata/webkit/comments01.dat
deleted file mode 100644
index 44f187683..000000000
--- a/src/pkg/html/testdata/webkit/comments01.dat
+++ /dev/null
@@ -1,135 +0,0 @@
-#data
-FOO<!-- BAR -->BAZ
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <!-- BAR -->
-| "BAZ"
-
-#data
-FOO<!-- BAR --!>BAZ
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <!-- BAR -->
-| "BAZ"
-
-#data
-FOO<!-- BAR -- >BAZ
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <!-- BAR -- >BAZ -->
-
-#data
-FOO<!-- BAR -- <QUX> -- MUX -->BAZ
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <!-- BAR -- <QUX> -- MUX -->
-| "BAZ"
-
-#data
-FOO<!-- BAR -- <QUX> -- MUX --!>BAZ
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <!-- BAR -- <QUX> -- MUX -->
-| "BAZ"
-
-#data
-FOO<!-- BAR -- <QUX> -- MUX -- >BAZ
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <!-- BAR -- <QUX> -- MUX -- >BAZ -->
-
-#data
-FOO<!---->BAZ
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <!-- -->
-| "BAZ"
-
-#data
-FOO<!--->BAZ
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <!-- -->
-| "BAZ"
-
-#data
-FOO<!-->BAZ
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <!-- -->
-| "BAZ"
-
-#data
-<?xml version="1.0">Hi
-#errors
-#document
-| <!-- ?xml version="1.0" -->
-| <html>
-| <head>
-| <body>
-| "Hi"
-
-#data
-<?xml version="1.0">
-#errors
-#document
-| <!-- ?xml version="1.0" -->
-| <html>
-| <head>
-| <body>
-
-#data
-<?xml version
-#errors
-#document
-| <!-- ?xml version -->
-| <html>
-| <head>
-| <body>
-
-#data
-FOO<!----->BAZ
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <!-- - -->
-| "BAZ"
diff --git a/src/pkg/html/testdata/webkit/doctype01.dat b/src/pkg/html/testdata/webkit/doctype01.dat
deleted file mode 100644
index ae457328a..000000000
--- a/src/pkg/html/testdata/webkit/doctype01.dat
+++ /dev/null
@@ -1,370 +0,0 @@
-#data
-<!DOCTYPE html>Hello
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!dOctYpE HtMl>Hello
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPEhtml>Hello
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE>Hello
-#errors
-#document
-| <!DOCTYPE >
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE >Hello
-#errors
-#document
-| <!DOCTYPE >
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE potato>Hello
-#errors
-#document
-| <!DOCTYPE potato>
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE potato >Hello
-#errors
-#document
-| <!DOCTYPE potato>
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE potato taco>Hello
-#errors
-#document
-| <!DOCTYPE potato>
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE potato taco "ddd>Hello
-#errors
-#document
-| <!DOCTYPE potato>
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE potato sYstEM>Hello
-#errors
-#document
-| <!DOCTYPE potato>
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE potato sYstEM >Hello
-#errors
-#document
-| <!DOCTYPE potato>
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE potato sYstEM ggg>Hello
-#errors
-#document
-| <!DOCTYPE potato>
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE potato SYSTEM taco >Hello
-#errors
-#document
-| <!DOCTYPE potato>
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE potato SYSTEM 'taco"'>Hello
-#errors
-#document
-| <!DOCTYPE potato "" "taco"">
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE potato SYSTEM "taco">Hello
-#errors
-#document
-| <!DOCTYPE potato "" "taco">
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE potato SYSTEM "tai'co">Hello
-#errors
-#document
-| <!DOCTYPE potato "" "tai'co">
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE potato SYSTEMtaco "ddd">Hello
-#errors
-#document
-| <!DOCTYPE potato>
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE potato grass SYSTEM taco>Hello
-#errors
-#document
-| <!DOCTYPE potato>
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE potato pUbLIc>Hello
-#errors
-#document
-| <!DOCTYPE potato>
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE potato pUbLIc >Hello
-#errors
-#document
-| <!DOCTYPE potato>
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE potato pUbLIcgoof>Hello
-#errors
-#document
-| <!DOCTYPE potato>
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE potato PUBLIC goof>Hello
-#errors
-#document
-| <!DOCTYPE potato>
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE potato PUBLIC "go'of">Hello
-#errors
-#document
-| <!DOCTYPE potato "go'of" "">
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE potato PUBLIC 'go'of'>Hello
-#errors
-#document
-| <!DOCTYPE potato "go" "">
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE potato PUBLIC 'go:hh of' >Hello
-#errors
-#document
-| <!DOCTYPE potato "go:hh of" "">
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE potato PUBLIC "W3C-//dfdf" SYSTEM ggg>Hello
-#errors
-#document
-| <!DOCTYPE potato "W3C-//dfdf" "">
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
- "http://www.w3.org/TR/html4/strict.dtd">Hello
-#errors
-#document
-| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE ...>Hello
-#errors
-#document
-| <!DOCTYPE ...>
-| <html>
-| <head>
-| <body>
-| "Hello"
-
-#data
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
-"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-#errors
-#document
-| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-| <html>
-| <head>
-| <body>
-
-#data
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
-"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
-#errors
-#document
-| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
-| <html>
-| <head>
-| <body>
-
-#data
-<!DOCTYPE root-element [SYSTEM OR PUBLIC FPI] "uri" [
-<!-- internal declarations -->
-]>
-#errors
-#document
-| <!DOCTYPE root-element>
-| <html>
-| <head>
-| <body>
-| "]>"
-
-#data
-<!DOCTYPE html PUBLIC
- "-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
- "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
-#errors
-#document
-| <!DOCTYPE html "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
-| <html>
-| <head>
-| <body>
-
-#data
-<!DOCTYPE HTML SYSTEM "http://www.w3.org/DTD/HTML4-strict.dtd"><body><b>Mine!</b></body>
-#errors
-#document
-| <!DOCTYPE html "" "http://www.w3.org/DTD/HTML4-strict.dtd">
-| <html>
-| <head>
-| <body>
-| <b>
-| "Mine!"
-
-#data
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN""http://www.w3.org/TR/html4/strict.dtd">
-#errors
-#document
-| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
-| <html>
-| <head>
-| <body>
-
-#data
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
-#errors
-#document
-| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
-| <html>
-| <head>
-| <body>
-
-#data
-<!DOCTYPE HTML PUBLIC"-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
-#errors
-#document
-| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
-| <html>
-| <head>
-| <body>
-
-#data
-<!DOCTYPE HTML PUBLIC'-//W3C//DTD HTML 4.01//EN''http://www.w3.org/TR/html4/strict.dtd'>
-#errors
-#document
-| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
-| <html>
-| <head>
-| <body>
diff --git a/src/pkg/html/testdata/webkit/entities01.dat b/src/pkg/html/testdata/webkit/entities01.dat
deleted file mode 100644
index c8073b781..000000000
--- a/src/pkg/html/testdata/webkit/entities01.dat
+++ /dev/null
@@ -1,603 +0,0 @@
-#data
-FOO&gt;BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO>BAR"
-
-#data
-FOO&gtBAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO>BAR"
-
-#data
-FOO&gt BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO> BAR"
-
-#data
-FOO&gt;;;BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO>;;BAR"
-
-#data
-I'm &notit; I tell you
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "I'm ¬it; I tell you"
-
-#data
-I'm &notin; I tell you
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "I'm ∉ I tell you"
-
-#data
-FOO& BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO& BAR"
-
-#data
-FOO&<BAR>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO&"
-| <bar>
-
-#data
-FOO&&&&gt;BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO&&&>BAR"
-
-#data
-FOO&#41;BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO)BAR"
-
-#data
-FOO&#x41;BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOOABAR"
-
-#data
-FOO&#X41;BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOOABAR"
-
-#data
-FOO&#BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO&#BAR"
-
-#data
-FOO&#ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO&#ZOO"
-
-#data
-FOO&#xBAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOOºR"
-
-#data
-FOO&#xZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO&#xZOO"
-
-#data
-FOO&#XZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO&#XZOO"
-
-#data
-FOO&#41BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO)BAR"
-
-#data
-FOO&#x41BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO䆺R"
-
-#data
-FOO&#x41ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOOAZOO"
-
-#data
-FOO&#x0000;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO�ZOO"
-
-#data
-FOO&#x0078;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOOxZOO"
-
-#data
-FOO&#x0079;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOOyZOO"
-
-#data
-FOO&#x0080;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO€ZOO"
-
-#data
-FOO&#x0081;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOOZOO"
-
-#data
-FOO&#x0082;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO‚ZOO"
-
-#data
-FOO&#x0083;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOOƒZOO"
-
-#data
-FOO&#x0084;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO„ZOO"
-
-#data
-FOO&#x0085;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO…ZOO"
-
-#data
-FOO&#x0086;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO†ZOO"
-
-#data
-FOO&#x0087;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO‡ZOO"
-
-#data
-FOO&#x0088;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOOˆZOO"
-
-#data
-FOO&#x0089;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO‰ZOO"
-
-#data
-FOO&#x008A;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOOŠZOO"
-
-#data
-FOO&#x008B;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO‹ZOO"
-
-#data
-FOO&#x008C;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOOŒZOO"
-
-#data
-FOO&#x008D;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOOZOO"
-
-#data
-FOO&#x008E;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOOŽZOO"
-
-#data
-FOO&#x008F;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOOZOO"
-
-#data
-FOO&#x0090;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOOZOO"
-
-#data
-FOO&#x0091;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO‘ZOO"
-
-#data
-FOO&#x0092;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO’ZOO"
-
-#data
-FOO&#x0093;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO“ZOO"
-
-#data
-FOO&#x0094;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO”ZOO"
-
-#data
-FOO&#x0095;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO•ZOO"
-
-#data
-FOO&#x0096;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO–ZOO"
-
-#data
-FOO&#x0097;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO—ZOO"
-
-#data
-FOO&#x0098;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO˜ZOO"
-
-#data
-FOO&#x0099;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO™ZOO"
-
-#data
-FOO&#x009A;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOOšZOO"
-
-#data
-FOO&#x009B;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO›ZOO"
-
-#data
-FOO&#x009C;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOOœZOO"
-
-#data
-FOO&#x009D;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOOZOO"
-
-#data
-FOO&#x009E;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOOžZOO"
-
-#data
-FOO&#x009F;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOOŸZOO"
-
-#data
-FOO&#x00A0;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO ZOO"
-
-#data
-FOO&#xD7FF;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO퟿ZOO"
-
-#data
-FOO&#xD800;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO�ZOO"
-
-#data
-FOO&#xD801;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO�ZOO"
-
-#data
-FOO&#xDFFE;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO�ZOO"
-
-#data
-FOO&#xDFFF;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO�ZOO"
-
-#data
-FOO&#xE000;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOOZOO"
-
-#data
-FOO&#x10FFFE;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO􏿾ZOO"
-
-#data
-FOO&#x1087D4;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO􈟔ZOO"
-
-#data
-FOO&#x10FFFF;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO􏿿ZOO"
-
-#data
-FOO&#x110000;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO�ZOO"
-
-#data
-FOO&#xFFFFFF;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO�ZOO"
diff --git a/src/pkg/html/testdata/webkit/entities02.dat b/src/pkg/html/testdata/webkit/entities02.dat
deleted file mode 100644
index e2fb42a07..000000000
--- a/src/pkg/html/testdata/webkit/entities02.dat
+++ /dev/null
@@ -1,249 +0,0 @@
-#data
-<div bar="ZZ&gt;YY"></div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| bar="ZZ>YY"
-
-#data
-<div bar="ZZ&"></div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| bar="ZZ&"
-
-#data
-<div bar='ZZ&'></div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| bar="ZZ&"
-
-#data
-<div bar=ZZ&></div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| bar="ZZ&"
-
-#data
-<div bar="ZZ&gt=YY"></div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| bar="ZZ&gt=YY"
-
-#data
-<div bar="ZZ&gt0YY"></div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| bar="ZZ&gt0YY"
-
-#data
-<div bar="ZZ&gt9YY"></div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| bar="ZZ&gt9YY"
-
-#data
-<div bar="ZZ&gtaYY"></div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| bar="ZZ&gtaYY"
-
-#data
-<div bar="ZZ&gtZYY"></div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| bar="ZZ&gtZYY"
-
-#data
-<div bar="ZZ&gt YY"></div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| bar="ZZ> YY"
-
-#data
-<div bar="ZZ&gt"></div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| bar="ZZ>"
-
-#data
-<div bar='ZZ&gt'></div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| bar="ZZ>"
-
-#data
-<div bar=ZZ&gt></div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| bar="ZZ>"
-
-#data
-<div bar="ZZ&pound_id=23"></div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| bar="ZZ£_id=23"
-
-#data
-<div bar="ZZ&prod_id=23"></div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| bar="ZZ&prod_id=23"
-
-#data
-<div bar="ZZ&pound;_id=23"></div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| bar="ZZ£_id=23"
-
-#data
-<div bar="ZZ&prod;_id=23"></div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| bar="ZZ∏_id=23"
-
-#data
-<div bar="ZZ&pound=23"></div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| bar="ZZ&pound=23"
-
-#data
-<div bar="ZZ&prod=23"></div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| bar="ZZ&prod=23"
-
-#data
-<div>ZZ&pound_id=23</div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| "ZZ£_id=23"
-
-#data
-<div>ZZ&prod_id=23</div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| "ZZ&prod_id=23"
-
-#data
-<div>ZZ&pound;_id=23</div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| "ZZ£_id=23"
-
-#data
-<div>ZZ&prod;_id=23</div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| "ZZ∏_id=23"
-
-#data
-<div>ZZ&pound=23</div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| "ZZ£=23"
-
-#data
-<div>ZZ&prod=23</div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| "ZZ&prod=23"
diff --git a/src/pkg/html/testdata/webkit/html5test-com.dat b/src/pkg/html/testdata/webkit/html5test-com.dat
deleted file mode 100644
index d7cb71db0..000000000
--- a/src/pkg/html/testdata/webkit/html5test-com.dat
+++ /dev/null
@@ -1,246 +0,0 @@
-#data
-<div<div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div<div>
-
-#data
-<div foo<bar=''>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| foo<bar=""
-
-#data
-<div foo=`bar`>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| foo="`bar`"
-
-#data
-<div \"foo=''>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| \"foo=""
-
-#data
-<a href='\nbar'></a>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| href="\nbar"
-
-#data
-<!DOCTYPE html>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-
-#data
-&lang;&rang;
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "⟨⟩"
-
-#data
-&apos;
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "'"
-
-#data
-&ImaginaryI;
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "ⅈ"
-
-#data
-&Kopf;
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "𝕂"
-
-#data
-&notinva;
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "∉"
-
-#data
-<?import namespace="foo" implementation="#bar">
-#errors
-#document
-| <!-- ?import namespace="foo" implementation="#bar" -->
-| <html>
-| <head>
-| <body>
-
-#data
-<!--foo--bar-->
-#errors
-#document
-| <!-- foo--bar -->
-| <html>
-| <head>
-| <body>
-
-#data
-<![CDATA[x]]>
-#errors
-#document
-| <!-- [CDATA[x]] -->
-| <html>
-| <head>
-| <body>
-
-#data
-<textarea><!--</textarea>--></textarea>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <textarea>
-| "<!--"
-| "-->"
-
-#data
-<textarea><!--</textarea>-->
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <textarea>
-| "<!--"
-| "-->"
-
-#data
-<style><!--</style>--></style>
-#errors
-#document
-| <html>
-| <head>
-| <style>
-| "<!--"
-| <body>
-| "-->"
-
-#data
-<style><!--</style>-->
-#errors
-#document
-| <html>
-| <head>
-| <style>
-| "<!--"
-| <body>
-| "-->"
-
-#data
-<ul><li>A </li> <li>B</li></ul>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <ul>
-| <li>
-| "A "
-| " "
-| <li>
-| "B"
-
-#data
-<table><form><input type=hidden><input></form><div></div></table>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <input>
-| <div>
-| <table>
-| <form>
-| <input>
-| type="hidden"
-
-#data
-<i>A<b>B<p></i>C</b>D
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <i>
-| "A"
-| <b>
-| "B"
-| <b>
-| <p>
-| <b>
-| <i>
-| "C"
-| "D"
-
-#data
-<div></div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-
-#data
-<svg></svg>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-
-#data
-<math></math>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
diff --git a/src/pkg/html/testdata/webkit/inbody01.dat b/src/pkg/html/testdata/webkit/inbody01.dat
deleted file mode 100644
index 3f2bd374c..000000000
--- a/src/pkg/html/testdata/webkit/inbody01.dat
+++ /dev/null
@@ -1,43 +0,0 @@
-#data
-<button>1</foo>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <button>
-| "1"
-
-#data
-<foo>1<p>2</foo>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <foo>
-| "1"
-| <p>
-| "2"
-
-#data
-<dd>1</foo>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <dd>
-| "1"
-
-#data
-<foo>1<dd>2</foo>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <foo>
-| "1"
-| <dd>
-| "2"
diff --git a/src/pkg/html/testdata/webkit/isindex.dat b/src/pkg/html/testdata/webkit/isindex.dat
deleted file mode 100644
index 88325ffe6..000000000
--- a/src/pkg/html/testdata/webkit/isindex.dat
+++ /dev/null
@@ -1,40 +0,0 @@
-#data
-<isindex>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <form>
-| <hr>
-| <label>
-| "This is a searchable index. Enter search keywords: "
-| <input>
-| name="isindex"
-| <hr>
-
-#data
-<isindex name="A" action="B" prompt="C" foo="D">
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <form>
-| action="B"
-| <hr>
-| <label>
-| "C"
-| <input>
-| foo="D"
-| name="isindex"
-| <hr>
-
-#data
-<form><isindex>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <form>
diff --git a/src/pkg/html/testdata/webkit/pending-spec-changes-plain-text-unsafe.dat b/src/pkg/html/testdata/webkit/pending-spec-changes-plain-text-unsafe.dat
deleted file mode 100644
index a5ebb1eb2..000000000
--- a/src/pkg/html/testdata/webkit/pending-spec-changes-plain-text-unsafe.dat
+++ /dev/null
Binary files differ
diff --git a/src/pkg/html/testdata/webkit/pending-spec-changes.dat b/src/pkg/html/testdata/webkit/pending-spec-changes.dat
deleted file mode 100644
index e00ee85d3..000000000
--- a/src/pkg/html/testdata/webkit/pending-spec-changes.dat
+++ /dev/null
@@ -1,28 +0,0 @@
-#data
-<input type="hidden"><frameset>
-#errors
-21: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
-31: “frameset” start tag seen.
-31: End of file seen and there were open elements.
-#document
-| <html>
-| <head>
-| <frameset>
-
-#data
-<!DOCTYPE html><table><caption><svg>foo</table>bar
-#errors
-47: End tag “table” did not match the name of the current open element (“svg”).
-47: “table” closed but “caption” was still open.
-47: End tag “table” seen, but there were open elements.
-36: Unclosed element “svg”.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <caption>
-| <svg svg>
-| "foo"
-| "bar"
diff --git a/src/pkg/html/testdata/webkit/plain-text-unsafe.dat b/src/pkg/html/testdata/webkit/plain-text-unsafe.dat
deleted file mode 100644
index 2f40e83ba..000000000
--- a/src/pkg/html/testdata/webkit/plain-text-unsafe.dat
+++ /dev/null
@@ -1,8 +0,0 @@
-#data
-FOO&#x000D;ZOO
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO ZOO"
diff --git a/src/pkg/html/testdata/webkit/scriptdata01.dat b/src/pkg/html/testdata/webkit/scriptdata01.dat
deleted file mode 100644
index 76b67f4ba..000000000
--- a/src/pkg/html/testdata/webkit/scriptdata01.dat
+++ /dev/null
@@ -1,308 +0,0 @@
-#data
-FOO<script>'Hello'</script>BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| "'Hello'"
-| "BAR"
-
-#data
-FOO<script></script>BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| "BAR"
-
-#data
-FOO<script></script >BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| "BAR"
-
-#data
-FOO<script></script/>BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| "BAR"
-
-#data
-FOO<script></script/ >BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| "BAR"
-
-#data
-FOO<script type="text/plain"></scriptx>BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| type="text/plain"
-| "</scriptx>BAR"
-
-#data
-FOO<script></script foo=">" dd>BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| "BAR"
-
-#data
-FOO<script>'<'</script>BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| "'<'"
-| "BAR"
-
-#data
-FOO<script>'<!'</script>BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| "'<!'"
-| "BAR"
-
-#data
-FOO<script>'<!-'</script>BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| "'<!-'"
-| "BAR"
-
-#data
-FOO<script>'<!--'</script>BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| "'<!--'"
-| "BAR"
-
-#data
-FOO<script>'<!---'</script>BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| "'<!---'"
-| "BAR"
-
-#data
-FOO<script>'<!-->'</script>BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| "'<!-->'"
-| "BAR"
-
-#data
-FOO<script>'<!-->'</script>BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| "'<!-->'"
-| "BAR"
-
-#data
-FOO<script>'<!-- potato'</script>BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| "'<!-- potato'"
-| "BAR"
-
-#data
-FOO<script>'<!-- <sCrIpt'</script>BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| "'<!-- <sCrIpt'"
-| "BAR"
-
-#data
-FOO<script type="text/plain">'<!-- <sCrIpt>'</script>BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| type="text/plain"
-| "'<!-- <sCrIpt>'</script>BAR"
-
-#data
-FOO<script type="text/plain">'<!-- <sCrIpt> -'</script>BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| type="text/plain"
-| "'<!-- <sCrIpt> -'</script>BAR"
-
-#data
-FOO<script type="text/plain">'<!-- <sCrIpt> --'</script>BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| type="text/plain"
-| "'<!-- <sCrIpt> --'</script>BAR"
-
-#data
-FOO<script>'<!-- <sCrIpt> -->'</script>BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| "'<!-- <sCrIpt> -->'"
-| "BAR"
-
-#data
-FOO<script type="text/plain">'<!-- <sCrIpt> --!>'</script>BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| type="text/plain"
-| "'<!-- <sCrIpt> --!>'</script>BAR"
-
-#data
-FOO<script type="text/plain">'<!-- <sCrIpt> -- >'</script>BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| type="text/plain"
-| "'<!-- <sCrIpt> -- >'</script>BAR"
-
-#data
-FOO<script type="text/plain">'<!-- <sCrIpt '</script>BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| type="text/plain"
-| "'<!-- <sCrIpt '</script>BAR"
-
-#data
-FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| type="text/plain"
-| "'<!-- <sCrIpt/'</script>BAR"
-
-#data
-FOO<script type="text/plain">'<!-- <sCrIpt\'</script>BAR
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| type="text/plain"
-| "'<!-- <sCrIpt\'"
-| "BAR"
-
-#data
-FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR</script>QUX
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "FOO"
-| <script>
-| type="text/plain"
-| "'<!-- <sCrIpt/'</script>BAR"
-| "QUX"
diff --git a/src/pkg/html/testdata/webkit/scripted/adoption01.dat b/src/pkg/html/testdata/webkit/scripted/adoption01.dat
deleted file mode 100644
index 4e08d0e84..000000000
--- a/src/pkg/html/testdata/webkit/scripted/adoption01.dat
+++ /dev/null
@@ -1,15 +0,0 @@
-#data
-<p><b id="A"><script>document.getElementById("A").id = "B"</script></p>TEXT</b>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <p>
-| <b>
-| id="B"
-| <script>
-| "document.getElementById("A").id = "B""
-| <b>
-| id="A"
-| "TEXT"
diff --git a/src/pkg/html/testdata/webkit/scripted/webkit01.dat b/src/pkg/html/testdata/webkit/scripted/webkit01.dat
deleted file mode 100644
index ef4a41ca0..000000000
--- a/src/pkg/html/testdata/webkit/scripted/webkit01.dat
+++ /dev/null
@@ -1,28 +0,0 @@
-#data
-1<script>document.write("2")</script>3
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "1"
-| <script>
-| "document.write("2")"
-| "23"
-
-#data
-1<script>document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")</script>4
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "1"
-| <script>
-| "document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")"
-| <script>
-| "document.write('2')"
-| "2"
-| <script>
-| "document.write('3')"
-| "34"
diff --git a/src/pkg/html/testdata/webkit/tables01.dat b/src/pkg/html/testdata/webkit/tables01.dat
deleted file mode 100644
index 88ef1fe2e..000000000
--- a/src/pkg/html/testdata/webkit/tables01.dat
+++ /dev/null
@@ -1,197 +0,0 @@
-#data
-<table><th>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <th>
-
-#data
-<table><td>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-
-#data
-<table><col foo='bar'>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <colgroup>
-| <col>
-| foo="bar"
-
-#data
-<table><colgroup></html>foo
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "foo"
-| <table>
-| <colgroup>
-
-#data
-<table></table><p>foo
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <p>
-| "foo"
-
-#data
-<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr><td>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-
-#data
-<table><select><option>3</select></table>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <select>
-| <option>
-| "3"
-| <table>
-
-#data
-<table><select><table></table></select></table>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <select>
-| <table>
-| <table>
-
-#data
-<table><select></table>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <select>
-| <table>
-
-#data
-<table><select><option>A<tr><td>B</td></tr></table>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <select>
-| <option>
-| "A"
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| "B"
-
-#data
-<table><td></body></caption></col></colgroup></html>foo
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| "foo"
-
-#data
-<table><td>A</table>B
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| "A"
-| "B"
-
-#data
-<table><tr><caption>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <caption>
-
-#data
-<table><tr></body></caption></col></colgroup></html></td></th><td>foo
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| "foo"
-
-#data
-<table><td><tr>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <tr>
-
-#data
-<table><td><button><td>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <button>
-| <td>
diff --git a/src/pkg/html/testdata/webkit/tests1.dat b/src/pkg/html/testdata/webkit/tests1.dat
deleted file mode 100644
index cbf8bdda6..000000000
--- a/src/pkg/html/testdata/webkit/tests1.dat
+++ /dev/null
@@ -1,1952 +0,0 @@
-#data
-Test
-#errors
-Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| "Test"
-
-#data
-<p>One<p>Two
-#errors
-Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| <p>
-| "One"
-| <p>
-| "Two"
-
-#data
-Line1<br>Line2<br>Line3<br>Line4
-#errors
-Line: 1 Col: 5 Unexpected non-space characters. Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| "Line1"
-| <br>
-| "Line2"
-| <br>
-| "Line3"
-| <br>
-| "Line4"
-
-#data
-<html>
-#errors
-Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-
-#data
-<head>
-#errors
-Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-
-#data
-<body>
-#errors
-Line: 1 Col: 6 Unexpected start tag (body). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-
-#data
-<html><head>
-#errors
-Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-
-#data
-<html><head></head>
-#errors
-Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-
-#data
-<html><head></head><body>
-#errors
-Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-
-#data
-<html><head></head><body></body>
-#errors
-Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-
-#data
-<html><head><body></body></html>
-#errors
-Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-
-#data
-<html><head></body></html>
-#errors
-Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
-Line: 1 Col: 19 Unexpected end tag (body).
-Line: 1 Col: 26 Unexpected end tag (html).
-#document
-| <html>
-| <head>
-| <body>
-
-#data
-<html><head><body></html>
-#errors
-Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-
-#data
-<html><body></html>
-#errors
-Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-
-#data
-<body></html>
-#errors
-Line: 1 Col: 6 Unexpected start tag (body). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-
-#data
-<head></html>
-#errors
-Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
-Line: 1 Col: 13 Unexpected end tag (html). Ignored.
-#document
-| <html>
-| <head>
-| <body>
-
-#data
-</head>
-#errors
-Line: 1 Col: 7 Unexpected end tag (head). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-
-#data
-</body>
-#errors
-Line: 1 Col: 7 Unexpected end tag (body). Expected DOCTYPE.
-Line: 1 Col: 7 Unexpected end tag (body) after the (implied) root element.
-#document
-| <html>
-| <head>
-| <body>
-
-#data
-</html>
-#errors
-Line: 1 Col: 7 Unexpected end tag (html). Expected DOCTYPE.
-Line: 1 Col: 7 Unexpected end tag (html) after the (implied) root element.
-#document
-| <html>
-| <head>
-| <body>
-
-#data
-<b><table><td><i></table>
-#errors
-Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
-Line: 1 Col: 14 Unexpected table cell start tag (td) in the table body phase.
-Line: 1 Col: 25 Got table cell end tag (td) while required end tags are missing.
-Line: 1 Col: 25 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <b>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <i>
-
-#data
-<b><table><td></b><i></table>X
-#errors
-Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
-Line: 1 Col: 14 Unexpected table cell start tag (td) in the table body phase.
-Line: 1 Col: 18 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 29 Got table cell end tag (td) while required end tags are missing.
-Line: 1 Col: 30 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <b>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <i>
-| "X"
-
-#data
-<h1>Hello<h2>World
-#errors
-4: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
-13: Heading cannot be a child of another heading.
-18: End of file seen and there were open elements.
-#document
-| <html>
-| <head>
-| <body>
-| <h1>
-| "Hello"
-| <h2>
-| "World"
-
-#data
-<a><p>X<a>Y</a>Z</p></a>
-#errors
-Line: 1 Col: 3 Unexpected start tag (a). Expected DOCTYPE.
-Line: 1 Col: 10 Unexpected start tag (a) implies end tag (a).
-Line: 1 Col: 10 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm.
-Line: 1 Col: 24 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| <p>
-| <a>
-| "X"
-| <a>
-| "Y"
-| "Z"
-
-#data
-<b><button>foo</b>bar
-#errors
-Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
-Line: 1 Col: 15 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
-#document
-| <html>
-| <head>
-| <body>
-| <b>
-| <button>
-| <b>
-| "foo"
-| "bar"
-
-#data
-<!DOCTYPE html><span><button>foo</span>bar
-#errors
-39: End tag “span” seen but there were unclosed elements.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <span>
-| <button>
-| "foobar"
-
-#data
-<p><b><div><marquee></p></b></div>X
-#errors
-Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
-Line: 1 Col: 11 Unexpected end tag (p). Ignored.
-Line: 1 Col: 24 Unexpected end tag (p). Ignored.
-Line: 1 Col: 28 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 34 End tag (div) seen too early. Expected other end tag.
-Line: 1 Col: 35 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <p>
-| <b>
-| <div>
-| <b>
-| <marquee>
-| <p>
-| "X"
-
-#data
-<script><div></script></div><title><p></title><p><p>
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 28 Unexpected end tag (div). Ignored.
-#document
-| <html>
-| <head>
-| <script>
-| "<div>"
-| <title>
-| "<p>"
-| <body>
-| <p>
-| <p>
-
-#data
-<!--><div>--<!-->
-#errors
-Line: 1 Col: 5 Incorrect comment.
-Line: 1 Col: 10 Unexpected start tag (div). Expected DOCTYPE.
-Line: 1 Col: 17 Incorrect comment.
-Line: 1 Col: 17 Expected closing tag. Unexpected end of file.
-#document
-| <!-- -->
-| <html>
-| <head>
-| <body>
-| <div>
-| "--"
-| <!-- -->
-
-#data
-<p><hr></p>
-#errors
-Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
-Line: 1 Col: 11 Unexpected end tag (p). Ignored.
-#document
-| <html>
-| <head>
-| <body>
-| <p>
-| <hr>
-| <p>
-
-#data
-<select><b><option><select><option></b></select>X
-#errors
-Line: 1 Col: 8 Unexpected start tag (select). Expected DOCTYPE.
-Line: 1 Col: 11 Unexpected start tag token (b) in the select phase. Ignored.
-Line: 1 Col: 27 Unexpected select start tag in the select phase treated as select end tag.
-Line: 1 Col: 39 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 48 Unexpected end tag (select). Ignored.
-Line: 1 Col: 49 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <select>
-| <option>
-| <option>
-| "X"
-
-#data
-<a><table><td><a><table></table><a></tr><a></table><b>X</b>C<a>Y
-#errors
-Line: 1 Col: 3 Unexpected start tag (a). Expected DOCTYPE.
-Line: 1 Col: 14 Unexpected table cell start tag (td) in the table body phase.
-Line: 1 Col: 35 Unexpected start tag (a) implies end tag (a).
-Line: 1 Col: 40 Got table cell end tag (td) while required end tags are missing.
-Line: 1 Col: 43 Unexpected start tag (a) in table context caused voodoo mode.
-Line: 1 Col: 43 Unexpected start tag (a) implies end tag (a).
-Line: 1 Col: 43 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 51 Unexpected implied end tag (a) in the table phase.
-Line: 1 Col: 63 Unexpected start tag (a) implies end tag (a).
-Line: 1 Col: 64 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| <a>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <a>
-| <table>
-| <a>
-| <a>
-| <b>
-| "X"
-| "C"
-| <a>
-| "Y"
-
-#data
-<a X>0<b>1<a Y>2
-#errors
-Line: 1 Col: 5 Unexpected start tag (a). Expected DOCTYPE.
-Line: 1 Col: 15 Unexpected start tag (a) implies end tag (a).
-Line: 1 Col: 15 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm.
-Line: 1 Col: 16 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| x=""
-| "0"
-| <b>
-| "1"
-| <b>
-| <a>
-| y=""
-| "2"
-
-#data
-<!-----><font><div>hello<table>excite!<b>me!<th><i>please!</tr><!--X-->
-#errors
-Line: 1 Col: 7 Unexpected '-' after '--' found in comment.
-Line: 1 Col: 14 Unexpected start tag (font). Expected DOCTYPE.
-Line: 1 Col: 38 Unexpected non-space characters in table context caused voodoo mode.
-Line: 1 Col: 41 Unexpected start tag (b) in table context caused voodoo mode.
-Line: 1 Col: 48 Unexpected implied end tag (b) in the table phase.
-Line: 1 Col: 48 Unexpected table cell start tag (th) in the table body phase.
-Line: 1 Col: 63 Got table cell end tag (th) while required end tags are missing.
-Line: 1 Col: 71 Unexpected end of file. Expected table content.
-#document
-| <!-- - -->
-| <html>
-| <head>
-| <body>
-| <font>
-| <div>
-| "helloexcite!"
-| <b>
-| "me!"
-| <table>
-| <tbody>
-| <tr>
-| <th>
-| <i>
-| "please!"
-| <!-- X -->
-
-#data
-<!DOCTYPE html><li>hello<li>world<ul>how<li>do</ul>you</body><!--do-->
-#errors
-Line: 1 Col: 61 Unexpected end tag (li). Missing end tag (body).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <li>
-| "hello"
-| <li>
-| "world"
-| <ul>
-| "how"
-| <li>
-| "do"
-| "you"
-| <!-- do -->
-
-#data
-<!DOCTYPE html>A<option>B<optgroup>C<select>D</option>E
-#errors
-Line: 1 Col: 54 Unexpected end tag (option) in the select phase. Ignored.
-Line: 1 Col: 55 Expected closing tag. Unexpected end of file.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "A"
-| <option>
-| "B"
-| <optgroup>
-| "C"
-| <select>
-| "DE"
-
-#data
-<
-#errors
-Line: 1 Col: 1 Expected tag name. Got something else instead
-Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| "<"
-
-#data
-<#
-#errors
-Line: 1 Col: 1 Expected tag name. Got something else instead
-Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| "<#"
-
-#data
-</
-#errors
-Line: 1 Col: 2 Expected closing tag. Unexpected end of file.
-Line: 1 Col: 2 Unexpected non-space characters. Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| "</"
-
-#data
-</#
-#errors
-Line: 1 Col: 2 Expected closing tag. Unexpected character '#' found.
-Line: 1 Col: 3 Unexpected End of file. Expected DOCTYPE.
-#document
-| <!-- # -->
-| <html>
-| <head>
-| <body>
-
-#data
-<?
-#errors
-Line: 1 Col: 1 Expected tag name. Got '?' instead. (HTML doesn't support processing instructions.)
-Line: 1 Col: 2 Unexpected End of file. Expected DOCTYPE.
-#document
-| <!-- ? -->
-| <html>
-| <head>
-| <body>
-
-#data
-<?#
-#errors
-Line: 1 Col: 1 Expected tag name. Got '?' instead. (HTML doesn't support processing instructions.)
-Line: 1 Col: 3 Unexpected End of file. Expected DOCTYPE.
-#document
-| <!-- ?# -->
-| <html>
-| <head>
-| <body>
-
-#data
-<!
-#errors
-Line: 1 Col: 2 Expected '--' or 'DOCTYPE'. Not found.
-Line: 1 Col: 2 Unexpected End of file. Expected DOCTYPE.
-#document
-| <!-- -->
-| <html>
-| <head>
-| <body>
-
-#data
-<!#
-#errors
-Line: 1 Col: 3 Expected '--' or 'DOCTYPE'. Not found.
-Line: 1 Col: 3 Unexpected End of file. Expected DOCTYPE.
-#document
-| <!-- # -->
-| <html>
-| <head>
-| <body>
-
-#data
-<?COMMENT?>
-#errors
-Line: 1 Col: 1 Expected tag name. Got '?' instead. (HTML doesn't support processing instructions.)
-Line: 1 Col: 11 Unexpected End of file. Expected DOCTYPE.
-#document
-| <!-- ?COMMENT? -->
-| <html>
-| <head>
-| <body>
-
-#data
-<!COMMENT>
-#errors
-Line: 1 Col: 2 Expected '--' or 'DOCTYPE'. Not found.
-Line: 1 Col: 10 Unexpected End of file. Expected DOCTYPE.
-#document
-| <!-- COMMENT -->
-| <html>
-| <head>
-| <body>
-
-#data
-</ COMMENT >
-#errors
-Line: 1 Col: 2 Expected closing tag. Unexpected character ' ' found.
-Line: 1 Col: 12 Unexpected End of file. Expected DOCTYPE.
-#document
-| <!-- COMMENT -->
-| <html>
-| <head>
-| <body>
-
-#data
-<?COM--MENT?>
-#errors
-Line: 1 Col: 1 Expected tag name. Got '?' instead. (HTML doesn't support processing instructions.)
-Line: 1 Col: 13 Unexpected End of file. Expected DOCTYPE.
-#document
-| <!-- ?COM--MENT? -->
-| <html>
-| <head>
-| <body>
-
-#data
-<!COM--MENT>
-#errors
-Line: 1 Col: 2 Expected '--' or 'DOCTYPE'. Not found.
-Line: 1 Col: 12 Unexpected End of file. Expected DOCTYPE.
-#document
-| <!-- COM--MENT -->
-| <html>
-| <head>
-| <body>
-
-#data
-</ COM--MENT >
-#errors
-Line: 1 Col: 2 Expected closing tag. Unexpected character ' ' found.
-Line: 1 Col: 14 Unexpected End of file. Expected DOCTYPE.
-#document
-| <!-- COM--MENT -->
-| <html>
-| <head>
-| <body>
-
-#data
-<!DOCTYPE html><style> EOF
-#errors
-Line: 1 Col: 26 Unexpected end of file. Expected end tag (style).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <style>
-| " EOF"
-| <body>
-
-#data
-<!DOCTYPE html><script> <!-- </script> --> </script> EOF
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| " <!-- "
-| " "
-| <body>
-| "--> EOF"
-
-#data
-<b><p></b>TEST
-#errors
-Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
-Line: 1 Col: 10 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
-#document
-| <html>
-| <head>
-| <body>
-| <b>
-| <p>
-| <b>
-| "TEST"
-
-#data
-<p id=a><b><p id=b></b>TEST
-#errors
-Line: 1 Col: 8 Unexpected start tag (p). Expected DOCTYPE.
-Line: 1 Col: 19 Unexpected end tag (p). Ignored.
-Line: 1 Col: 23 End tag (b) violates step 1, paragraph 2 of the adoption agency algorithm.
-#document
-| <html>
-| <head>
-| <body>
-| <p>
-| id="a"
-| <b>
-| <p>
-| id="b"
-| "TEST"
-
-#data
-<b id=a><p><b id=b></p></b>TEST
-#errors
-Line: 1 Col: 8 Unexpected start tag (b). Expected DOCTYPE.
-Line: 1 Col: 23 Unexpected end tag (p). Ignored.
-Line: 1 Col: 27 End tag (b) violates step 1, paragraph 2 of the adoption agency algorithm.
-Line: 1 Col: 31 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <b>
-| id="a"
-| <p>
-| <b>
-| id="b"
-| "TEST"
-
-#data
-<!DOCTYPE html><title>U-test</title><body><div><p>Test<u></p></div></body>
-#errors
-Line: 1 Col: 61 Unexpected end tag (p). Ignored.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <title>
-| "U-test"
-| <body>
-| <div>
-| <p>
-| "Test"
-| <u>
-
-#data
-<!DOCTYPE html><font><table></font></table></font>
-#errors
-Line: 1 Col: 35 Unexpected end tag (font) in table context caused voodoo mode.
-Line: 1 Col: 35 End tag (font) violates step 1, paragraph 1 of the adoption agency algorithm.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <font>
-| <table>
-
-#data
-<font><p>hello<b>cruel</font>world
-#errors
-Line: 1 Col: 6 Unexpected start tag (font). Expected DOCTYPE.
-Line: 1 Col: 29 End tag (font) violates step 1, paragraph 3 of the adoption agency algorithm.
-Line: 1 Col: 29 End tag (font) violates step 1, paragraph 3 of the adoption agency algorithm.
-Line: 1 Col: 34 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <font>
-| <p>
-| <font>
-| "hello"
-| <b>
-| "cruel"
-| <b>
-| "world"
-
-#data
-<b>Test</i>Test
-#errors
-Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
-Line: 1 Col: 11 End tag (i) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <b>
-| "TestTest"
-
-#data
-<b>A<cite>B<div>C
-#errors
-Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
-Line: 1 Col: 17 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <b>
-| "A"
-| <cite>
-| "B"
-| <div>
-| "C"
-
-#data
-<b>A<cite>B<div>C</cite>D
-#errors
-Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
-Line: 1 Col: 24 Unexpected end tag (cite). Ignored.
-Line: 1 Col: 25 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <b>
-| "A"
-| <cite>
-| "B"
-| <div>
-| "CD"
-
-#data
-<b>A<cite>B<div>C</b>D
-#errors
-Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
-Line: 1 Col: 21 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
-Line: 1 Col: 22 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <b>
-| "A"
-| <cite>
-| "B"
-| <div>
-| <b>
-| "C"
-| "D"
-
-#data
-
-#errors
-Line: 1 Col: 0 Unexpected End of file. Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-
-#data
-<DIV>
-#errors
-Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
-Line: 1 Col: 5 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-
-#data
-<DIV> abc
-#errors
-Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
-Line: 1 Col: 9 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| " abc"
-
-#data
-<DIV> abc <B>
-#errors
-Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
-Line: 1 Col: 13 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| " abc "
-| <b>
-
-#data
-<DIV> abc <B> def
-#errors
-Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
-Line: 1 Col: 17 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| " abc "
-| <b>
-| " def"
-
-#data
-<DIV> abc <B> def <I>
-#errors
-Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
-Line: 1 Col: 21 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| " abc "
-| <b>
-| " def "
-| <i>
-
-#data
-<DIV> abc <B> def <I> ghi
-#errors
-Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
-Line: 1 Col: 25 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| " abc "
-| <b>
-| " def "
-| <i>
-| " ghi"
-
-#data
-<DIV> abc <B> def <I> ghi <P>
-#errors
-Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
-Line: 1 Col: 29 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| " abc "
-| <b>
-| " def "
-| <i>
-| " ghi "
-| <p>
-
-#data
-<DIV> abc <B> def <I> ghi <P> jkl
-#errors
-Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
-Line: 1 Col: 33 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| " abc "
-| <b>
-| " def "
-| <i>
-| " ghi "
-| <p>
-| " jkl"
-
-#data
-<DIV> abc <B> def <I> ghi <P> jkl </B>
-#errors
-Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
-Line: 1 Col: 38 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
-Line: 1 Col: 38 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| " abc "
-| <b>
-| " def "
-| <i>
-| " ghi "
-| <i>
-| <p>
-| <b>
-| " jkl "
-
-#data
-<DIV> abc <B> def <I> ghi <P> jkl </B> mno
-#errors
-Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
-Line: 1 Col: 38 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
-Line: 1 Col: 42 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| " abc "
-| <b>
-| " def "
-| <i>
-| " ghi "
-| <i>
-| <p>
-| <b>
-| " jkl "
-| " mno"
-
-#data
-<DIV> abc <B> def <I> ghi <P> jkl </B> mno </I>
-#errors
-Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
-Line: 1 Col: 38 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
-Line: 1 Col: 47 End tag (i) violates step 1, paragraph 3 of the adoption agency algorithm.
-Line: 1 Col: 47 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| " abc "
-| <b>
-| " def "
-| <i>
-| " ghi "
-| <i>
-| <p>
-| <i>
-| <b>
-| " jkl "
-| " mno "
-
-#data
-<DIV> abc <B> def <I> ghi <P> jkl </B> mno </I> pqr
-#errors
-Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
-Line: 1 Col: 38 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
-Line: 1 Col: 47 End tag (i) violates step 1, paragraph 3 of the adoption agency algorithm.
-Line: 1 Col: 51 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| " abc "
-| <b>
-| " def "
-| <i>
-| " ghi "
-| <i>
-| <p>
-| <i>
-| <b>
-| " jkl "
-| " mno "
-| " pqr"
-
-#data
-<DIV> abc <B> def <I> ghi <P> jkl </B> mno </I> pqr </P>
-#errors
-Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
-Line: 1 Col: 38 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
-Line: 1 Col: 47 End tag (i) violates step 1, paragraph 3 of the adoption agency algorithm.
-Line: 1 Col: 56 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| " abc "
-| <b>
-| " def "
-| <i>
-| " ghi "
-| <i>
-| <p>
-| <i>
-| <b>
-| " jkl "
-| " mno "
-| " pqr "
-
-#data
-<DIV> abc <B> def <I> ghi <P> jkl </B> mno </I> pqr </P> stu
-#errors
-Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
-Line: 1 Col: 38 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
-Line: 1 Col: 47 End tag (i) violates step 1, paragraph 3 of the adoption agency algorithm.
-Line: 1 Col: 60 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| " abc "
-| <b>
-| " def "
-| <i>
-| " ghi "
-| <i>
-| <p>
-| <i>
-| <b>
-| " jkl "
-| " mno "
-| " pqr "
-| " stu"
-
-#data
-<test attribute---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------->
-#errors
-Line: 1 Col: 1040 Unexpected start tag (test). Expected DOCTYPE.
-Line: 1 Col: 1040 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <test>
-| attribute----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------=""
-
-#data
-<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe
-#errors
-Line: 1 Col: 15 Unexpected start tag (a). Expected DOCTYPE.
-Line: 1 Col: 39 Unexpected start tag (a) in table context caused voodoo mode.
-Line: 1 Col: 39 Unexpected start tag (a) implies end tag (a).
-Line: 1 Col: 39 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 45 Unexpected implied end tag (a) in the table phase.
-Line: 1 Col: 68 Unexpected implied end tag (a) in the table phase.
-Line: 1 Col: 71 Expected closing tag. Unexpected end of file.
-
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| href="blah"
-| "aba"
-| <a>
-| href="foo"
-| "br"
-| <a>
-| href="foo"
-| "x"
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <a>
-| href="foo"
-| "aoe"
-
-#data
-<a href="blah">aba<table><tr><td><a href="foo">br</td></tr>x</table>aoe
-#errors
-Line: 1 Col: 15 Unexpected start tag (a). Expected DOCTYPE.
-Line: 1 Col: 54 Got table cell end tag (td) while required end tags are missing.
-Line: 1 Col: 60 Unexpected non-space characters in table context caused voodoo mode.
-Line: 1 Col: 71 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| href="blah"
-| "abax"
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <a>
-| href="foo"
-| "br"
-| "aoe"
-
-#data
-<table><a href="blah">aba<tr><td><a href="foo">br</td></tr>x</table>aoe
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 22 Unexpected start tag (a) in table context caused voodoo mode.
-Line: 1 Col: 29 Unexpected implied end tag (a) in the table phase.
-Line: 1 Col: 54 Got table cell end tag (td) while required end tags are missing.
-Line: 1 Col: 68 Unexpected implied end tag (a) in the table phase.
-Line: 1 Col: 71 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| href="blah"
-| "aba"
-| <a>
-| href="blah"
-| "x"
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <a>
-| href="foo"
-| "br"
-| <a>
-| href="blah"
-| "aoe"
-
-#data
-<a href=a>aa<marquee>aa<a href=b>bb</marquee>aa
-#errors
-Line: 1 Col: 10 Unexpected start tag (a). Expected DOCTYPE.
-Line: 1 Col: 45 End tag (marquee) seen too early. Expected other end tag.
-Line: 1 Col: 47 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| href="a"
-| "aa"
-| <marquee>
-| "aa"
-| <a>
-| href="b"
-| "bb"
-| "aa"
-
-#data
-<wbr><strike><code></strike><code><strike></code>
-#errors
-Line: 1 Col: 5 Unexpected start tag (wbr). Expected DOCTYPE.
-Line: 1 Col: 28 End tag (strike) violates step 1, paragraph 3 of the adoption agency algorithm.
-Line: 1 Col: 49 Unexpected end tag (code). Ignored.
-#document
-| <html>
-| <head>
-| <body>
-| <wbr>
-| <strike>
-| <code>
-| <code>
-| <code>
-| <strike>
-
-#data
-<!DOCTYPE html><spacer>foo
-#errors
-26: End of file seen and there were open elements.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <spacer>
-| "foo"
-
-#data
-<title><meta></title><link><title><meta></title>
-#errors
-Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <title>
-| "<meta>"
-| <link>
-| <title>
-| "<meta>"
-| <body>
-
-#data
-<style><!--</style><meta><script>--><link></script>
-#errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
-Line: 1 Col: 51 Unexpected end of file. Expected end tag (style).
-#document
-| <html>
-| <head>
-| <style>
-| "<!--"
-| <meta>
-| <script>
-| "--><link>"
-| <body>
-
-#data
-<head><meta></head><link>
-#errors
-Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
-Line: 1 Col: 25 Unexpected start tag (link) that can be in head. Moved.
-#document
-| <html>
-| <head>
-| <meta>
-| <link>
-| <body>
-
-#data
-<table><tr><tr><td><td><span><th><span>X</table>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 33 Got table cell end tag (td) while required end tags are missing.
-Line: 1 Col: 48 Got table cell end tag (th) while required end tags are missing.
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <tr>
-| <td>
-| <td>
-| <span>
-| <th>
-| <span>
-| "X"
-
-#data
-<body><body><base><link><meta><title><p></title><body><p></body>
-#errors
-Line: 1 Col: 6 Unexpected start tag (body). Expected DOCTYPE.
-Line: 1 Col: 12 Unexpected start tag (body).
-Line: 1 Col: 54 Unexpected start tag (body).
-Line: 1 Col: 64 Unexpected end tag (p). Missing end tag (body).
-#document
-| <html>
-| <head>
-| <body>
-| <base>
-| <link>
-| <meta>
-| <title>
-| "<p>"
-| <p>
-
-#data
-<textarea><p></textarea>
-#errors
-Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| <textarea>
-| "<p>"
-
-#data
-<p><image></p>
-#errors
-Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
-Line: 1 Col: 10 Unexpected start tag (image). Treated as img.
-#document
-| <html>
-| <head>
-| <body>
-| <p>
-| <img>
-
-#data
-<a><table><a></table><p><a><div><a>
-#errors
-Line: 1 Col: 3 Unexpected start tag (a). Expected DOCTYPE.
-Line: 1 Col: 13 Unexpected start tag (a) in table context caused voodoo mode.
-Line: 1 Col: 13 Unexpected start tag (a) implies end tag (a).
-Line: 1 Col: 13 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 21 Unexpected end tag (table). Expected end tag (a).
-Line: 1 Col: 27 Unexpected start tag (a) implies end tag (a).
-Line: 1 Col: 27 End tag (a) violates step 1, paragraph 2 of the adoption agency algorithm.
-Line: 1 Col: 32 Unexpected end tag (p). Ignored.
-Line: 1 Col: 35 Unexpected start tag (a) implies end tag (a).
-Line: 1 Col: 35 End tag (a) violates step 1, paragraph 2 of the adoption agency algorithm.
-Line: 1 Col: 35 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| <a>
-| <table>
-| <p>
-| <a>
-| <div>
-| <a>
-
-#data
-<head></p><meta><p>
-#errors
-Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
-Line: 1 Col: 10 Unexpected end tag (p). Ignored.
-#document
-| <html>
-| <head>
-| <meta>
-| <body>
-| <p>
-
-#data
-<head></html><meta><p>
-#errors
-Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
-Line: 1 Col: 19 Unexpected start tag (meta).
-#document
-| <html>
-| <head>
-| <body>
-| <meta>
-| <p>
-
-#data
-<b><table><td><i></table>
-#errors
-Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
-Line: 1 Col: 14 Unexpected table cell start tag (td) in the table body phase.
-Line: 1 Col: 25 Got table cell end tag (td) while required end tags are missing.
-Line: 1 Col: 25 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <b>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <i>
-
-#data
-<b><table><td></b><i></table>
-#errors
-Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
-Line: 1 Col: 14 Unexpected table cell start tag (td) in the table body phase.
-Line: 1 Col: 18 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 29 Got table cell end tag (td) while required end tags are missing.
-Line: 1 Col: 29 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <b>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <i>
-
-#data
-<h1><h2>
-#errors
-4: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
-8: Heading cannot be a child of another heading.
-8: End of file seen and there were open elements.
-#document
-| <html>
-| <head>
-| <body>
-| <h1>
-| <h2>
-
-#data
-<a><p><a></a></p></a>
-#errors
-Line: 1 Col: 3 Unexpected start tag (a). Expected DOCTYPE.
-Line: 1 Col: 9 Unexpected start tag (a) implies end tag (a).
-Line: 1 Col: 9 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm.
-Line: 1 Col: 21 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| <p>
-| <a>
-| <a>
-
-#data
-<b><button></b></button></b>
-#errors
-Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
-Line: 1 Col: 15 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
-#document
-| <html>
-| <head>
-| <body>
-| <b>
-| <button>
-| <b>
-
-#data
-<p><b><div><marquee></p></b></div>
-#errors
-Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
-Line: 1 Col: 11 Unexpected end tag (p). Ignored.
-Line: 1 Col: 24 Unexpected end tag (p). Ignored.
-Line: 1 Col: 28 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 34 End tag (div) seen too early. Expected other end tag.
-Line: 1 Col: 34 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <p>
-| <b>
-| <div>
-| <b>
-| <marquee>
-| <p>
-
-#data
-<script></script></div><title></title><p><p>
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 23 Unexpected end tag (div). Ignored.
-#document
-| <html>
-| <head>
-| <script>
-| <title>
-| <body>
-| <p>
-| <p>
-
-#data
-<p><hr></p>
-#errors
-Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
-Line: 1 Col: 11 Unexpected end tag (p). Ignored.
-#document
-| <html>
-| <head>
-| <body>
-| <p>
-| <hr>
-| <p>
-
-#data
-<select><b><option><select><option></b></select>
-#errors
-Line: 1 Col: 8 Unexpected start tag (select). Expected DOCTYPE.
-Line: 1 Col: 11 Unexpected start tag token (b) in the select phase. Ignored.
-Line: 1 Col: 27 Unexpected select start tag in the select phase treated as select end tag.
-Line: 1 Col: 39 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 48 Unexpected end tag (select). Ignored.
-Line: 1 Col: 48 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <select>
-| <option>
-| <option>
-
-#data
-<html><head><title></title><body></body></html>
-#errors
-Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <title>
-| <body>
-
-#data
-<a><table><td><a><table></table><a></tr><a></table><a>
-#errors
-Line: 1 Col: 3 Unexpected start tag (a). Expected DOCTYPE.
-Line: 1 Col: 14 Unexpected table cell start tag (td) in the table body phase.
-Line: 1 Col: 35 Unexpected start tag (a) implies end tag (a).
-Line: 1 Col: 40 Got table cell end tag (td) while required end tags are missing.
-Line: 1 Col: 43 Unexpected start tag (a) in table context caused voodoo mode.
-Line: 1 Col: 43 Unexpected start tag (a) implies end tag (a).
-Line: 1 Col: 43 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 51 Unexpected implied end tag (a) in the table phase.
-Line: 1 Col: 54 Unexpected start tag (a) implies end tag (a).
-Line: 1 Col: 54 End tag (a) violates step 1, paragraph 2 of the adoption agency algorithm.
-Line: 1 Col: 54 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| <a>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <a>
-| <table>
-| <a>
-| <a>
-
-#data
-<ul><li></li><div><li></div><li><li><div><li><address><li><b><em></b><li></ul>
-#errors
-Line: 1 Col: 4 Unexpected start tag (ul). Expected DOCTYPE.
-Line: 1 Col: 45 Missing end tag (div, li).
-Line: 1 Col: 58 Missing end tag (address, li).
-Line: 1 Col: 69 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
-#document
-| <html>
-| <head>
-| <body>
-| <ul>
-| <li>
-| <div>
-| <li>
-| <li>
-| <li>
-| <div>
-| <li>
-| <address>
-| <li>
-| <b>
-| <em>
-| <li>
-
-#data
-<ul><li><ul></li><li>a</li></ul></li></ul>
-#errors
-XXX: fix me
-#document
-| <html>
-| <head>
-| <body>
-| <ul>
-| <li>
-| <ul>
-| <li>
-| "a"
-
-#data
-<frameset><frame><frameset><frame></frameset><noframes></noframes></frameset>
-#errors
-Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <frameset>
-| <frame>
-| <frameset>
-| <frame>
-| <noframes>
-
-#data
-<h1><table><td><h3></table><h3></h1>
-#errors
-4: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
-15: “td” start tag in table body.
-27: Unclosed elements.
-31: Heading cannot be a child of another heading.
-36: End tag “h1” seen but there were unclosed elements.
-#document
-| <html>
-| <head>
-| <body>
-| <h1>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <h3>
-| <h3>
-
-#data
-<table><colgroup><col><colgroup><col><col><col><colgroup><col><col><thead><tr><td></table>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <colgroup>
-| <col>
-| <colgroup>
-| <col>
-| <col>
-| <col>
-| <colgroup>
-| <col>
-| <col>
-| <thead>
-| <tr>
-| <td>
-
-#data
-<table><col><tbody><col><tr><col><td><col></table><col>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 37 Unexpected table cell start tag (td) in the table body phase.
-Line: 1 Col: 55 Unexpected start tag col. Ignored.
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <colgroup>
-| <col>
-| <tbody>
-| <colgroup>
-| <col>
-| <tbody>
-| <tr>
-| <colgroup>
-| <col>
-| <tbody>
-| <tr>
-| <td>
-| <colgroup>
-| <col>
-
-#data
-<table><colgroup><tbody><colgroup><tr><colgroup><td><colgroup></table><colgroup>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 52 Unexpected table cell start tag (td) in the table body phase.
-Line: 1 Col: 80 Unexpected start tag colgroup. Ignored.
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <colgroup>
-| <tbody>
-| <colgroup>
-| <tbody>
-| <tr>
-| <colgroup>
-| <tbody>
-| <tr>
-| <td>
-| <colgroup>
-
-#data
-</strong></b></em></i></u></strike></s></blink></tt></pre></big></small></font></select></h1></h2></h3></h4></h5></h6></body></br></a></img></title></span></style></script></table></th></td></tr></frame></area></link></param></hr></input></col></base></meta></basefont></bgsound></embed></spacer></p></dd></dt></caption></colgroup></tbody></tfoot></thead></address></blockquote></center></dir></div></dl></fieldset></listing></menu></ol></ul></li></nobr></wbr></form></button></marquee></object></html></frameset></head></iframe></image></isindex></noembed></noframes></noscript></optgroup></option></plaintext></textarea>
-#errors
-Line: 1 Col: 9 Unexpected end tag (strong). Expected DOCTYPE.
-Line: 1 Col: 9 Unexpected end tag (strong) after the (implied) root element.
-Line: 1 Col: 13 Unexpected end tag (b) after the (implied) root element.
-Line: 1 Col: 18 Unexpected end tag (em) after the (implied) root element.
-Line: 1 Col: 22 Unexpected end tag (i) after the (implied) root element.
-Line: 1 Col: 26 Unexpected end tag (u) after the (implied) root element.
-Line: 1 Col: 35 Unexpected end tag (strike) after the (implied) root element.
-Line: 1 Col: 39 Unexpected end tag (s) after the (implied) root element.
-Line: 1 Col: 47 Unexpected end tag (blink) after the (implied) root element.
-Line: 1 Col: 52 Unexpected end tag (tt) after the (implied) root element.
-Line: 1 Col: 58 Unexpected end tag (pre) after the (implied) root element.
-Line: 1 Col: 64 Unexpected end tag (big) after the (implied) root element.
-Line: 1 Col: 72 Unexpected end tag (small) after the (implied) root element.
-Line: 1 Col: 79 Unexpected end tag (font) after the (implied) root element.
-Line: 1 Col: 88 Unexpected end tag (select) after the (implied) root element.
-Line: 1 Col: 93 Unexpected end tag (h1) after the (implied) root element.
-Line: 1 Col: 98 Unexpected end tag (h2) after the (implied) root element.
-Line: 1 Col: 103 Unexpected end tag (h3) after the (implied) root element.
-Line: 1 Col: 108 Unexpected end tag (h4) after the (implied) root element.
-Line: 1 Col: 113 Unexpected end tag (h5) after the (implied) root element.
-Line: 1 Col: 118 Unexpected end tag (h6) after the (implied) root element.
-Line: 1 Col: 125 Unexpected end tag (body) after the (implied) root element.
-Line: 1 Col: 130 Unexpected end tag (br). Treated as br element.
-Line: 1 Col: 134 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 140 This element (img) has no end tag.
-Line: 1 Col: 148 Unexpected end tag (title). Ignored.
-Line: 1 Col: 155 Unexpected end tag (span). Ignored.
-Line: 1 Col: 163 Unexpected end tag (style). Ignored.
-Line: 1 Col: 172 Unexpected end tag (script). Ignored.
-Line: 1 Col: 180 Unexpected end tag (table). Ignored.
-Line: 1 Col: 185 Unexpected end tag (th). Ignored.
-Line: 1 Col: 190 Unexpected end tag (td). Ignored.
-Line: 1 Col: 195 Unexpected end tag (tr). Ignored.
-Line: 1 Col: 203 This element (frame) has no end tag.
-Line: 1 Col: 210 This element (area) has no end tag.
-Line: 1 Col: 217 Unexpected end tag (link). Ignored.
-Line: 1 Col: 225 This element (param) has no end tag.
-Line: 1 Col: 230 This element (hr) has no end tag.
-Line: 1 Col: 238 This element (input) has no end tag.
-Line: 1 Col: 244 Unexpected end tag (col). Ignored.
-Line: 1 Col: 251 Unexpected end tag (base). Ignored.
-Line: 1 Col: 258 Unexpected end tag (meta). Ignored.
-Line: 1 Col: 269 This element (basefont) has no end tag.
-Line: 1 Col: 279 This element (bgsound) has no end tag.
-Line: 1 Col: 287 This element (embed) has no end tag.
-Line: 1 Col: 296 This element (spacer) has no end tag.
-Line: 1 Col: 300 Unexpected end tag (p). Ignored.
-Line: 1 Col: 305 End tag (dd) seen too early. Expected other end tag.
-Line: 1 Col: 310 End tag (dt) seen too early. Expected other end tag.
-Line: 1 Col: 320 Unexpected end tag (caption). Ignored.
-Line: 1 Col: 331 Unexpected end tag (colgroup). Ignored.
-Line: 1 Col: 339 Unexpected end tag (tbody). Ignored.
-Line: 1 Col: 347 Unexpected end tag (tfoot). Ignored.
-Line: 1 Col: 355 Unexpected end tag (thead). Ignored.
-Line: 1 Col: 365 End tag (address) seen too early. Expected other end tag.
-Line: 1 Col: 378 End tag (blockquote) seen too early. Expected other end tag.
-Line: 1 Col: 387 End tag (center) seen too early. Expected other end tag.
-Line: 1 Col: 393 Unexpected end tag (dir). Ignored.
-Line: 1 Col: 399 End tag (div) seen too early. Expected other end tag.
-Line: 1 Col: 404 End tag (dl) seen too early. Expected other end tag.
-Line: 1 Col: 415 End tag (fieldset) seen too early. Expected other end tag.
-Line: 1 Col: 425 End tag (listing) seen too early. Expected other end tag.
-Line: 1 Col: 432 End tag (menu) seen too early. Expected other end tag.
-Line: 1 Col: 437 End tag (ol) seen too early. Expected other end tag.
-Line: 1 Col: 442 End tag (ul) seen too early. Expected other end tag.
-Line: 1 Col: 447 End tag (li) seen too early. Expected other end tag.
-Line: 1 Col: 454 End tag (nobr) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 460 This element (wbr) has no end tag.
-Line: 1 Col: 476 End tag (button) seen too early. Expected other end tag.
-Line: 1 Col: 486 End tag (marquee) seen too early. Expected other end tag.
-Line: 1 Col: 495 End tag (object) seen too early. Expected other end tag.
-Line: 1 Col: 513 Unexpected end tag (html). Ignored.
-Line: 1 Col: 513 Unexpected end tag (frameset). Ignored.
-Line: 1 Col: 520 Unexpected end tag (head). Ignored.
-Line: 1 Col: 529 Unexpected end tag (iframe). Ignored.
-Line: 1 Col: 537 This element (image) has no end tag.
-Line: 1 Col: 547 This element (isindex) has no end tag.
-Line: 1 Col: 557 Unexpected end tag (noembed). Ignored.
-Line: 1 Col: 568 Unexpected end tag (noframes). Ignored.
-Line: 1 Col: 579 Unexpected end tag (noscript). Ignored.
-Line: 1 Col: 590 Unexpected end tag (optgroup). Ignored.
-Line: 1 Col: 599 Unexpected end tag (option). Ignored.
-Line: 1 Col: 611 Unexpected end tag (plaintext). Ignored.
-Line: 1 Col: 622 Unexpected end tag (textarea). Ignored.
-#document
-| <html>
-| <head>
-| <body>
-| <br>
-| <p>
-
-#data
-<table><tr></strong></b></em></i></u></strike></s></blink></tt></pre></big></small></font></select></h1></h2></h3></h4></h5></h6></body></br></a></img></title></span></style></script></table></th></td></tr></frame></area></link></param></hr></input></col></base></meta></basefont></bgsound></embed></spacer></p></dd></dt></caption></colgroup></tbody></tfoot></thead></address></blockquote></center></dir></div></dl></fieldset></listing></menu></ol></ul></li></nobr></wbr></form></button></marquee></object></html></frameset></head></iframe></image></isindex></noembed></noframes></noscript></optgroup></option></plaintext></textarea>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 20 Unexpected end tag (strong) in table context caused voodoo mode.
-Line: 1 Col: 20 End tag (strong) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 24 Unexpected end tag (b) in table context caused voodoo mode.
-Line: 1 Col: 24 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 29 Unexpected end tag (em) in table context caused voodoo mode.
-Line: 1 Col: 29 End tag (em) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 33 Unexpected end tag (i) in table context caused voodoo mode.
-Line: 1 Col: 33 End tag (i) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 37 Unexpected end tag (u) in table context caused voodoo mode.
-Line: 1 Col: 37 End tag (u) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 46 Unexpected end tag (strike) in table context caused voodoo mode.
-Line: 1 Col: 46 End tag (strike) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 50 Unexpected end tag (s) in table context caused voodoo mode.
-Line: 1 Col: 50 End tag (s) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 58 Unexpected end tag (blink) in table context caused voodoo mode.
-Line: 1 Col: 58 Unexpected end tag (blink). Ignored.
-Line: 1 Col: 63 Unexpected end tag (tt) in table context caused voodoo mode.
-Line: 1 Col: 63 End tag (tt) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 69 Unexpected end tag (pre) in table context caused voodoo mode.
-Line: 1 Col: 69 End tag (pre) seen too early. Expected other end tag.
-Line: 1 Col: 75 Unexpected end tag (big) in table context caused voodoo mode.
-Line: 1 Col: 75 End tag (big) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 83 Unexpected end tag (small) in table context caused voodoo mode.
-Line: 1 Col: 83 End tag (small) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 90 Unexpected end tag (font) in table context caused voodoo mode.
-Line: 1 Col: 90 End tag (font) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 99 Unexpected end tag (select) in table context caused voodoo mode.
-Line: 1 Col: 99 Unexpected end tag (select). Ignored.
-Line: 1 Col: 104 Unexpected end tag (h1) in table context caused voodoo mode.
-Line: 1 Col: 104 End tag (h1) seen too early. Expected other end tag.
-Line: 1 Col: 109 Unexpected end tag (h2) in table context caused voodoo mode.
-Line: 1 Col: 109 End tag (h2) seen too early. Expected other end tag.
-Line: 1 Col: 114 Unexpected end tag (h3) in table context caused voodoo mode.
-Line: 1 Col: 114 End tag (h3) seen too early. Expected other end tag.
-Line: 1 Col: 119 Unexpected end tag (h4) in table context caused voodoo mode.
-Line: 1 Col: 119 End tag (h4) seen too early. Expected other end tag.
-Line: 1 Col: 124 Unexpected end tag (h5) in table context caused voodoo mode.
-Line: 1 Col: 124 End tag (h5) seen too early. Expected other end tag.
-Line: 1 Col: 129 Unexpected end tag (h6) in table context caused voodoo mode.
-Line: 1 Col: 129 End tag (h6) seen too early. Expected other end tag.
-Line: 1 Col: 136 Unexpected end tag (body) in the table row phase. Ignored.
-Line: 1 Col: 141 Unexpected end tag (br) in table context caused voodoo mode.
-Line: 1 Col: 141 Unexpected end tag (br). Treated as br element.
-Line: 1 Col: 145 Unexpected end tag (a) in table context caused voodoo mode.
-Line: 1 Col: 145 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 151 Unexpected end tag (img) in table context caused voodoo mode.
-Line: 1 Col: 151 This element (img) has no end tag.
-Line: 1 Col: 159 Unexpected end tag (title) in table context caused voodoo mode.
-Line: 1 Col: 159 Unexpected end tag (title). Ignored.
-Line: 1 Col: 166 Unexpected end tag (span) in table context caused voodoo mode.
-Line: 1 Col: 166 Unexpected end tag (span). Ignored.
-Line: 1 Col: 174 Unexpected end tag (style) in table context caused voodoo mode.
-Line: 1 Col: 174 Unexpected end tag (style). Ignored.
-Line: 1 Col: 183 Unexpected end tag (script) in table context caused voodoo mode.
-Line: 1 Col: 183 Unexpected end tag (script). Ignored.
-Line: 1 Col: 196 Unexpected end tag (th). Ignored.
-Line: 1 Col: 201 Unexpected end tag (td). Ignored.
-Line: 1 Col: 206 Unexpected end tag (tr). Ignored.
-Line: 1 Col: 214 This element (frame) has no end tag.
-Line: 1 Col: 221 This element (area) has no end tag.
-Line: 1 Col: 228 Unexpected end tag (link). Ignored.
-Line: 1 Col: 236 This element (param) has no end tag.
-Line: 1 Col: 241 This element (hr) has no end tag.
-Line: 1 Col: 249 This element (input) has no end tag.
-Line: 1 Col: 255 Unexpected end tag (col). Ignored.
-Line: 1 Col: 262 Unexpected end tag (base). Ignored.
-Line: 1 Col: 269 Unexpected end tag (meta). Ignored.
-Line: 1 Col: 280 This element (basefont) has no end tag.
-Line: 1 Col: 290 This element (bgsound) has no end tag.
-Line: 1 Col: 298 This element (embed) has no end tag.
-Line: 1 Col: 307 This element (spacer) has no end tag.
-Line: 1 Col: 311 Unexpected end tag (p). Ignored.
-Line: 1 Col: 316 End tag (dd) seen too early. Expected other end tag.
-Line: 1 Col: 321 End tag (dt) seen too early. Expected other end tag.
-Line: 1 Col: 331 Unexpected end tag (caption). Ignored.
-Line: 1 Col: 342 Unexpected end tag (colgroup). Ignored.
-Line: 1 Col: 350 Unexpected end tag (tbody). Ignored.
-Line: 1 Col: 358 Unexpected end tag (tfoot). Ignored.
-Line: 1 Col: 366 Unexpected end tag (thead). Ignored.
-Line: 1 Col: 376 End tag (address) seen too early. Expected other end tag.
-Line: 1 Col: 389 End tag (blockquote) seen too early. Expected other end tag.
-Line: 1 Col: 398 End tag (center) seen too early. Expected other end tag.
-Line: 1 Col: 404 Unexpected end tag (dir). Ignored.
-Line: 1 Col: 410 End tag (div) seen too early. Expected other end tag.
-Line: 1 Col: 415 End tag (dl) seen too early. Expected other end tag.
-Line: 1 Col: 426 End tag (fieldset) seen too early. Expected other end tag.
-Line: 1 Col: 436 End tag (listing) seen too early. Expected other end tag.
-Line: 1 Col: 443 End tag (menu) seen too early. Expected other end tag.
-Line: 1 Col: 448 End tag (ol) seen too early. Expected other end tag.
-Line: 1 Col: 453 End tag (ul) seen too early. Expected other end tag.
-Line: 1 Col: 458 End tag (li) seen too early. Expected other end tag.
-Line: 1 Col: 465 End tag (nobr) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 471 This element (wbr) has no end tag.
-Line: 1 Col: 487 End tag (button) seen too early. Expected other end tag.
-Line: 1 Col: 497 End tag (marquee) seen too early. Expected other end tag.
-Line: 1 Col: 506 End tag (object) seen too early. Expected other end tag.
-Line: 1 Col: 524 Unexpected end tag (html). Ignored.
-Line: 1 Col: 524 Unexpected end tag (frameset). Ignored.
-Line: 1 Col: 531 Unexpected end tag (head). Ignored.
-Line: 1 Col: 540 Unexpected end tag (iframe). Ignored.
-Line: 1 Col: 548 This element (image) has no end tag.
-Line: 1 Col: 558 This element (isindex) has no end tag.
-Line: 1 Col: 568 Unexpected end tag (noembed). Ignored.
-Line: 1 Col: 579 Unexpected end tag (noframes). Ignored.
-Line: 1 Col: 590 Unexpected end tag (noscript). Ignored.
-Line: 1 Col: 601 Unexpected end tag (optgroup). Ignored.
-Line: 1 Col: 610 Unexpected end tag (option). Ignored.
-Line: 1 Col: 622 Unexpected end tag (plaintext). Ignored.
-Line: 1 Col: 633 Unexpected end tag (textarea). Ignored.
-#document
-| <html>
-| <head>
-| <body>
-| <br>
-| <table>
-| <tbody>
-| <tr>
-| <p>
-
-#data
-<frameset>
-#errors
-Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
-Line: 1 Col: 10 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <frameset>
diff --git a/src/pkg/html/testdata/webkit/tests10.dat b/src/pkg/html/testdata/webkit/tests10.dat
deleted file mode 100644
index 4f8df86f2..000000000
--- a/src/pkg/html/testdata/webkit/tests10.dat
+++ /dev/null
@@ -1,799 +0,0 @@
-#data
-<!DOCTYPE html><svg></svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-
-#data
-<!DOCTYPE html><svg></svg><![CDATA[a]]>
-#errors
-29: Bogus comment
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <!-- [CDATA[a]] -->
-
-#data
-<!DOCTYPE html><body><svg></svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-
-#data
-<!DOCTYPE html><body><select><svg></svg></select>
-#errors
-35: Stray “svg” start tag.
-42: Stray end tag “svg”
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-
-#data
-<!DOCTYPE html><body><select><option><svg></svg></option></select>
-#errors
-43: Stray “svg” start tag.
-50: Stray end tag “svg”
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-| <option>
-
-#data
-<!DOCTYPE html><body><table><svg></svg></table>
-#errors
-34: Start tag “svg” seen in “table”.
-41: Stray end tag “svg”.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <table>
-
-#data
-<!DOCTYPE html><body><table><svg><g>foo</g></svg></table>
-#errors
-34: Start tag “svg” seen in “table”.
-46: Stray end tag “g”.
-53: Stray end tag “svg”.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <svg g>
-| "foo"
-| <table>
-
-#data
-<!DOCTYPE html><body><table><svg><g>foo</g><g>bar</g></svg></table>
-#errors
-34: Start tag “svg” seen in “table”.
-46: Stray end tag “g”.
-58: Stray end tag “g”.
-65: Stray end tag “svg”.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <svg g>
-| "foo"
-| <svg g>
-| "bar"
-| <table>
-
-#data
-<!DOCTYPE html><body><table><tbody><svg><g>foo</g><g>bar</g></svg></tbody></table>
-#errors
-41: Start tag “svg” seen in “table”.
-53: Stray end tag “g”.
-65: Stray end tag “g”.
-72: Stray end tag “svg”.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <svg g>
-| "foo"
-| <svg g>
-| "bar"
-| <table>
-| <tbody>
-
-#data
-<!DOCTYPE html><body><table><tbody><tr><svg><g>foo</g><g>bar</g></svg></tr></tbody></table>
-#errors
-45: Start tag “svg” seen in “table”.
-57: Stray end tag “g”.
-69: Stray end tag “g”.
-76: Stray end tag “svg”.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <svg g>
-| "foo"
-| <svg g>
-| "bar"
-| <table>
-| <tbody>
-| <tr>
-
-#data
-<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg></td></tr></tbody></table>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <svg svg>
-| <svg g>
-| "foo"
-| <svg g>
-| "bar"
-
-#data
-<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg><p>baz</td></tr></tbody></table>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <svg svg>
-| <svg g>
-| "foo"
-| <svg g>
-| "bar"
-| <p>
-| "baz"
-
-#data
-<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g></svg><p>baz</caption></table>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <caption>
-| <svg svg>
-| <svg g>
-| "foo"
-| <svg g>
-| "bar"
-| <p>
-| "baz"
-
-#data
-<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
-#errors
-70: HTML start tag “p” in a foreign namespace context.
-81: “table” closed but “caption” was still open.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <caption>
-| <svg svg>
-| <svg g>
-| "foo"
-| <svg g>
-| "bar"
-| <p>
-| "baz"
-| <p>
-| "quux"
-
-#data
-<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g>baz</table><p>quux
-#errors
-78: “table” closed but “caption” was still open.
-78: Unclosed elements on stack.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <caption>
-| <svg svg>
-| <svg g>
-| "foo"
-| <svg g>
-| "bar"
-| "baz"
-| <p>
-| "quux"
-
-#data
-<!DOCTYPE html><body><table><colgroup><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
-#errors
-44: Start tag “svg” seen in “table”.
-56: Stray end tag “g”.
-68: Stray end tag “g”.
-71: HTML start tag “p” in a foreign namespace context.
-71: Start tag “p” seen in “table”.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <svg g>
-| "foo"
-| <svg g>
-| "bar"
-| <p>
-| "baz"
-| <table>
-| <colgroup>
-| <p>
-| "quux"
-
-#data
-<!DOCTYPE html><body><table><tr><td><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
-#errors
-50: Stray “svg” start tag.
-54: Stray “g” start tag.
-62: Stray end tag “g”
-66: Stray “g” start tag.
-74: Stray end tag “g”
-77: Stray “p” start tag.
-88: “table” end tag with “select” open.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <select>
-| "foobarbaz"
-| <p>
-| "quux"
-
-#data
-<!DOCTYPE html><body><table><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
-#errors
-36: Start tag “select” seen in “table”.
-42: Stray “svg” start tag.
-46: Stray “g” start tag.
-54: Stray end tag “g”
-58: Stray “g” start tag.
-66: Stray end tag “g”
-69: Stray “p” start tag.
-80: “table” end tag with “select” open.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-| "foobarbaz"
-| <table>
-| <p>
-| "quux"
-
-#data
-<!DOCTYPE html><body></body></html><svg><g>foo</g><g>bar</g><p>baz
-#errors
-41: Stray “svg” start tag.
-68: HTML start tag “p” in a foreign namespace context.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <svg g>
-| "foo"
-| <svg g>
-| "bar"
-| <p>
-| "baz"
-
-#data
-<!DOCTYPE html><body></body><svg><g>foo</g><g>bar</g><p>baz
-#errors
-34: Stray “svg” start tag.
-61: HTML start tag “p” in a foreign namespace context.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <svg g>
-| "foo"
-| <svg g>
-| "bar"
-| <p>
-| "baz"
-
-#data
-<!DOCTYPE html><frameset><svg><g></g><g></g><p><span>
-#errors
-31: Stray “svg” start tag.
-35: Stray “g” start tag.
-40: Stray end tag “g”
-44: Stray “g” start tag.
-49: Stray end tag “g”
-52: Stray “p” start tag.
-58: Stray “span” start tag.
-58: End of file seen and there were open elements.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <frameset>
-
-#data
-<!DOCTYPE html><frameset></frameset><svg><g></g><g></g><p><span>
-#errors
-42: Stray “svg” start tag.
-46: Stray “g” start tag.
-51: Stray end tag “g”
-55: Stray “g” start tag.
-60: Stray end tag “g”
-63: Stray “p” start tag.
-69: Stray “span” start tag.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <frameset>
-
-#data
-<!DOCTYPE html><body xlink:href=foo><svg xlink:href=foo></svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| xlink:href="foo"
-| <svg svg>
-| xlink href="foo"
-
-#data
-<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo></g></svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| xlink:href="foo"
-| xml:lang="en"
-| <svg svg>
-| <svg g>
-| xlink href="foo"
-| xml lang="en"
-
-#data
-<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo /></svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| xlink:href="foo"
-| xml:lang="en"
-| <svg svg>
-| <svg g>
-| xlink href="foo"
-| xml lang="en"
-
-#data
-<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo />bar</svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| xlink:href="foo"
-| xml:lang="en"
-| <svg svg>
-| <svg g>
-| xlink href="foo"
-| xml lang="en"
-| "bar"
-
-#data
-<svg></path>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-
-#data
-<div><svg></div>a
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| <svg svg>
-| "a"
-
-#data
-<div><svg><path></div>a
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| <svg svg>
-| <svg path>
-| "a"
-
-#data
-<div><svg><path></svg><path>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| <svg svg>
-| <svg path>
-| <path>
-
-#data
-<div><svg><path><foreignObject><math></div>a
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| <svg svg>
-| <svg path>
-| <svg foreignObject>
-| <math math>
-| "a"
-
-#data
-<div><svg><path><foreignObject><p></div>a
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| <svg svg>
-| <svg path>
-| <svg foreignObject>
-| <p>
-| "a"
-
-#data
-<!DOCTYPE html><svg><desc><div><svg><ul>a
-#errors
-40: HTML start tag “ul” in a foreign namespace context.
-41: End of file in a foreign namespace context.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <svg desc>
-| <div>
-| <svg svg>
-| <ul>
-| "a"
-
-#data
-<!DOCTYPE html><svg><desc><svg><ul>a
-#errors
-35: HTML start tag “ul” in a foreign namespace context.
-36: End of file in a foreign namespace context.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <svg desc>
-| <svg svg>
-| <ul>
-| "a"
-
-#data
-<!DOCTYPE html><p><svg><desc><p>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <svg svg>
-| <svg desc>
-| <p>
-
-#data
-<!DOCTYPE html><p><svg><title><p>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <svg svg>
-| <svg title>
-| <p>
-
-#data
-<div><svg><path><foreignObject><p></foreignObject><p>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| <svg svg>
-| <svg path>
-| <svg foreignObject>
-| <p>
-| <p>
-
-#data
-<math><mi><div><object><div><span></span></div></object></div></mi><mi>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math mi>
-| <div>
-| <object>
-| <div>
-| <span>
-| <math mi>
-
-#data
-<math><mi><svg><foreignObject><div><div></div></div></foreignObject></svg></mi><mi>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math mi>
-| <svg svg>
-| <svg foreignObject>
-| <div>
-| <div>
-| <math mi>
-
-#data
-<svg><script></script><path>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <svg script>
-| <svg path>
-
-#data
-<table><svg></svg><tr>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <table>
-| <tbody>
-| <tr>
-
-#data
-<math><mi><mglyph>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math mi>
-| <math mglyph>
-
-#data
-<math><mi><malignmark>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math mi>
-| <math malignmark>
-
-#data
-<math><mo><mglyph>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math mo>
-| <math mglyph>
-
-#data
-<math><mo><malignmark>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math mo>
-| <math malignmark>
-
-#data
-<math><mn><mglyph>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math mn>
-| <math mglyph>
-
-#data
-<math><mn><malignmark>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math mn>
-| <math malignmark>
-
-#data
-<math><ms><mglyph>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math ms>
-| <math mglyph>
-
-#data
-<math><ms><malignmark>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math ms>
-| <math malignmark>
-
-#data
-<math><mtext><mglyph>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math mtext>
-| <math mglyph>
-
-#data
-<math><mtext><malignmark>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math mtext>
-| <math malignmark>
-
-#data
-<math><annotation-xml><svg></svg></annotation-xml><mi>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math annotation-xml>
-| <svg svg>
-| <math mi>
-
-#data
-<math><annotation-xml><svg><foreignObject><div><math><mi></mi></math><span></span></div></foreignObject><path></path></svg></annotation-xml><mi>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math annotation-xml>
-| <svg svg>
-| <svg foreignObject>
-| <div>
-| <math math>
-| <math mi>
-| <span>
-| <svg path>
-| <math mi>
-
-#data
-<math><annotation-xml><svg><foreignObject><math><mi><svg></svg></mi><mo></mo></math><span></span></foreignObject><path></path></svg></annotation-xml><mi>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math annotation-xml>
-| <svg svg>
-| <svg foreignObject>
-| <math math>
-| <math mi>
-| <svg svg>
-| <math mo>
-| <span>
-| <svg path>
-| <math mi>
diff --git a/src/pkg/html/testdata/webkit/tests11.dat b/src/pkg/html/testdata/webkit/tests11.dat
deleted file mode 100644
index 638cde479..000000000
--- a/src/pkg/html/testdata/webkit/tests11.dat
+++ /dev/null
@@ -1,482 +0,0 @@
-#data
-<!DOCTYPE html><body><svg attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' contentScriptType='' contentStyleType='' diffuseConstant='' edgeMode='' externalResourcesRequired='' filterRes='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| attributeName=""
-| attributeType=""
-| baseFrequency=""
-| baseProfile=""
-| calcMode=""
-| clipPathUnits=""
-| contentScriptType=""
-| contentStyleType=""
-| diffuseConstant=""
-| edgeMode=""
-| externalResourcesRequired=""
-| filterRes=""
-| filterUnits=""
-| glyphRef=""
-| gradientTransform=""
-| gradientUnits=""
-| kernelMatrix=""
-| kernelUnitLength=""
-| keyPoints=""
-| keySplines=""
-| keyTimes=""
-| lengthAdjust=""
-| limitingConeAngle=""
-| markerHeight=""
-| markerUnits=""
-| markerWidth=""
-| maskContentUnits=""
-| maskUnits=""
-| numOctaves=""
-| pathLength=""
-| patternContentUnits=""
-| patternTransform=""
-| patternUnits=""
-| pointsAtX=""
-| pointsAtY=""
-| pointsAtZ=""
-| preserveAlpha=""
-| preserveAspectRatio=""
-| primitiveUnits=""
-| refX=""
-| refY=""
-| repeatCount=""
-| repeatDur=""
-| requiredExtensions=""
-| requiredFeatures=""
-| specularConstant=""
-| specularExponent=""
-| spreadMethod=""
-| startOffset=""
-| stdDeviation=""
-| stitchTiles=""
-| surfaceScale=""
-| systemLanguage=""
-| tableValues=""
-| targetX=""
-| targetY=""
-| textLength=""
-| viewBox=""
-| viewTarget=""
-| xChannelSelector=""
-| yChannelSelector=""
-| zoomAndPan=""
-
-#data
-<!DOCTYPE html><BODY><SVG ATTRIBUTENAME='' ATTRIBUTETYPE='' BASEFREQUENCY='' BASEPROFILE='' CALCMODE='' CLIPPATHUNITS='' CONTENTSCRIPTTYPE='' CONTENTSTYLETYPE='' DIFFUSECONSTANT='' EDGEMODE='' EXTERNALRESOURCESREQUIRED='' FILTERRES='' FILTERUNITS='' GLYPHREF='' GRADIENTTRANSFORM='' GRADIENTUNITS='' KERNELMATRIX='' KERNELUNITLENGTH='' KEYPOINTS='' KEYSPLINES='' KEYTIMES='' LENGTHADJUST='' LIMITINGCONEANGLE='' MARKERHEIGHT='' MARKERUNITS='' MARKERWIDTH='' MASKCONTENTUNITS='' MASKUNITS='' NUMOCTAVES='' PATHLENGTH='' PATTERNCONTENTUNITS='' PATTERNTRANSFORM='' PATTERNUNITS='' POINTSATX='' POINTSATY='' POINTSATZ='' PRESERVEALPHA='' PRESERVEASPECTRATIO='' PRIMITIVEUNITS='' REFX='' REFY='' REPEATCOUNT='' REPEATDUR='' REQUIREDEXTENSIONS='' REQUIREDFEATURES='' SPECULARCONSTANT='' SPECULAREXPONENT='' SPREADMETHOD='' STARTOFFSET='' STDDEVIATION='' STITCHTILES='' SURFACESCALE='' SYSTEMLANGUAGE='' TABLEVALUES='' TARGETX='' TARGETY='' TEXTLENGTH='' VIEWBOX='' VIEWTARGET='' XCHANNELSELECTOR='' YCHANNELSELECTOR='' ZOOMANDPAN=''></SVG>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| attributeName=""
-| attributeType=""
-| baseFrequency=""
-| baseProfile=""
-| calcMode=""
-| clipPathUnits=""
-| contentScriptType=""
-| contentStyleType=""
-| diffuseConstant=""
-| edgeMode=""
-| externalResourcesRequired=""
-| filterRes=""
-| filterUnits=""
-| glyphRef=""
-| gradientTransform=""
-| gradientUnits=""
-| kernelMatrix=""
-| kernelUnitLength=""
-| keyPoints=""
-| keySplines=""
-| keyTimes=""
-| lengthAdjust=""
-| limitingConeAngle=""
-| markerHeight=""
-| markerUnits=""
-| markerWidth=""
-| maskContentUnits=""
-| maskUnits=""
-| numOctaves=""
-| pathLength=""
-| patternContentUnits=""
-| patternTransform=""
-| patternUnits=""
-| pointsAtX=""
-| pointsAtY=""
-| pointsAtZ=""
-| preserveAlpha=""
-| preserveAspectRatio=""
-| primitiveUnits=""
-| refX=""
-| refY=""
-| repeatCount=""
-| repeatDur=""
-| requiredExtensions=""
-| requiredFeatures=""
-| specularConstant=""
-| specularExponent=""
-| spreadMethod=""
-| startOffset=""
-| stdDeviation=""
-| stitchTiles=""
-| surfaceScale=""
-| systemLanguage=""
-| tableValues=""
-| targetX=""
-| targetY=""
-| textLength=""
-| viewBox=""
-| viewTarget=""
-| xChannelSelector=""
-| yChannelSelector=""
-| zoomAndPan=""
-
-#data
-<!DOCTYPE html><body><svg attributename='' attributetype='' basefrequency='' baseprofile='' calcmode='' clippathunits='' contentscripttype='' contentstyletype='' diffuseconstant='' edgemode='' externalresourcesrequired='' filterres='' filterunits='' glyphref='' gradienttransform='' gradientunits='' kernelmatrix='' kernelunitlength='' keypoints='' keysplines='' keytimes='' lengthadjust='' limitingconeangle='' markerheight='' markerunits='' markerwidth='' maskcontentunits='' maskunits='' numoctaves='' pathlength='' patterncontentunits='' patterntransform='' patternunits='' pointsatx='' pointsaty='' pointsatz='' preservealpha='' preserveaspectratio='' primitiveunits='' refx='' refy='' repeatcount='' repeatdur='' requiredextensions='' requiredfeatures='' specularconstant='' specularexponent='' spreadmethod='' startoffset='' stddeviation='' stitchtiles='' surfacescale='' systemlanguage='' tablevalues='' targetx='' targety='' textlength='' viewbox='' viewtarget='' xchannelselector='' ychannelselector='' zoomandpan=''></svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| attributeName=""
-| attributeType=""
-| baseFrequency=""
-| baseProfile=""
-| calcMode=""
-| clipPathUnits=""
-| contentScriptType=""
-| contentStyleType=""
-| diffuseConstant=""
-| edgeMode=""
-| externalResourcesRequired=""
-| filterRes=""
-| filterUnits=""
-| glyphRef=""
-| gradientTransform=""
-| gradientUnits=""
-| kernelMatrix=""
-| kernelUnitLength=""
-| keyPoints=""
-| keySplines=""
-| keyTimes=""
-| lengthAdjust=""
-| limitingConeAngle=""
-| markerHeight=""
-| markerUnits=""
-| markerWidth=""
-| maskContentUnits=""
-| maskUnits=""
-| numOctaves=""
-| pathLength=""
-| patternContentUnits=""
-| patternTransform=""
-| patternUnits=""
-| pointsAtX=""
-| pointsAtY=""
-| pointsAtZ=""
-| preserveAlpha=""
-| preserveAspectRatio=""
-| primitiveUnits=""
-| refX=""
-| refY=""
-| repeatCount=""
-| repeatDur=""
-| requiredExtensions=""
-| requiredFeatures=""
-| specularConstant=""
-| specularExponent=""
-| spreadMethod=""
-| startOffset=""
-| stdDeviation=""
-| stitchTiles=""
-| surfaceScale=""
-| systemLanguage=""
-| tableValues=""
-| targetX=""
-| targetY=""
-| textLength=""
-| viewBox=""
-| viewTarget=""
-| xChannelSelector=""
-| yChannelSelector=""
-| zoomAndPan=""
-
-#data
-<!DOCTYPE html><body><math attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' contentScriptType='' contentStyleType='' diffuseConstant='' edgeMode='' externalResourcesRequired='' filterRes='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></math>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <math math>
-| attributename=""
-| attributetype=""
-| basefrequency=""
-| baseprofile=""
-| calcmode=""
-| clippathunits=""
-| contentscripttype=""
-| contentstyletype=""
-| diffuseconstant=""
-| edgemode=""
-| externalresourcesrequired=""
-| filterres=""
-| filterunits=""
-| glyphref=""
-| gradienttransform=""
-| gradientunits=""
-| kernelmatrix=""
-| kernelunitlength=""
-| keypoints=""
-| keysplines=""
-| keytimes=""
-| lengthadjust=""
-| limitingconeangle=""
-| markerheight=""
-| markerunits=""
-| markerwidth=""
-| maskcontentunits=""
-| maskunits=""
-| numoctaves=""
-| pathlength=""
-| patterncontentunits=""
-| patterntransform=""
-| patternunits=""
-| pointsatx=""
-| pointsaty=""
-| pointsatz=""
-| preservealpha=""
-| preserveaspectratio=""
-| primitiveunits=""
-| refx=""
-| refy=""
-| repeatcount=""
-| repeatdur=""
-| requiredextensions=""
-| requiredfeatures=""
-| specularconstant=""
-| specularexponent=""
-| spreadmethod=""
-| startoffset=""
-| stddeviation=""
-| stitchtiles=""
-| surfacescale=""
-| systemlanguage=""
-| tablevalues=""
-| targetx=""
-| targety=""
-| textlength=""
-| viewbox=""
-| viewtarget=""
-| xchannelselector=""
-| ychannelselector=""
-| zoomandpan=""
-
-#data
-<!DOCTYPE html><body><svg><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <svg altGlyph>
-| <svg altGlyphDef>
-| <svg altGlyphItem>
-| <svg animateColor>
-| <svg animateMotion>
-| <svg animateTransform>
-| <svg clipPath>
-| <svg feBlend>
-| <svg feColorMatrix>
-| <svg feComponentTransfer>
-| <svg feComposite>
-| <svg feConvolveMatrix>
-| <svg feDiffuseLighting>
-| <svg feDisplacementMap>
-| <svg feDistantLight>
-| <svg feFlood>
-| <svg feFuncA>
-| <svg feFuncB>
-| <svg feFuncG>
-| <svg feFuncR>
-| <svg feGaussianBlur>
-| <svg feImage>
-| <svg feMerge>
-| <svg feMergeNode>
-| <svg feMorphology>
-| <svg feOffset>
-| <svg fePointLight>
-| <svg feSpecularLighting>
-| <svg feSpotLight>
-| <svg feTile>
-| <svg feTurbulence>
-| <svg foreignObject>
-| <svg glyphRef>
-| <svg linearGradient>
-| <svg radialGradient>
-| <svg textPath>
-
-#data
-<!DOCTYPE html><body><svg><altglyph /><altglyphdef /><altglyphitem /><animatecolor /><animatemotion /><animatetransform /><clippath /><feblend /><fecolormatrix /><fecomponenttransfer /><fecomposite /><feconvolvematrix /><fediffuselighting /><fedisplacementmap /><fedistantlight /><feflood /><fefunca /><fefuncb /><fefuncg /><fefuncr /><fegaussianblur /><feimage /><femerge /><femergenode /><femorphology /><feoffset /><fepointlight /><fespecularlighting /><fespotlight /><fetile /><feturbulence /><foreignobject /><glyphref /><lineargradient /><radialgradient /><textpath /></svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <svg altGlyph>
-| <svg altGlyphDef>
-| <svg altGlyphItem>
-| <svg animateColor>
-| <svg animateMotion>
-| <svg animateTransform>
-| <svg clipPath>
-| <svg feBlend>
-| <svg feColorMatrix>
-| <svg feComponentTransfer>
-| <svg feComposite>
-| <svg feConvolveMatrix>
-| <svg feDiffuseLighting>
-| <svg feDisplacementMap>
-| <svg feDistantLight>
-| <svg feFlood>
-| <svg feFuncA>
-| <svg feFuncB>
-| <svg feFuncG>
-| <svg feFuncR>
-| <svg feGaussianBlur>
-| <svg feImage>
-| <svg feMerge>
-| <svg feMergeNode>
-| <svg feMorphology>
-| <svg feOffset>
-| <svg fePointLight>
-| <svg feSpecularLighting>
-| <svg feSpotLight>
-| <svg feTile>
-| <svg feTurbulence>
-| <svg foreignObject>
-| <svg glyphRef>
-| <svg linearGradient>
-| <svg radialGradient>
-| <svg textPath>
-
-#data
-<!DOCTYPE html><BODY><SVG><ALTGLYPH /><ALTGLYPHDEF /><ALTGLYPHITEM /><ANIMATECOLOR /><ANIMATEMOTION /><ANIMATETRANSFORM /><CLIPPATH /><FEBLEND /><FECOLORMATRIX /><FECOMPONENTTRANSFER /><FECOMPOSITE /><FECONVOLVEMATRIX /><FEDIFFUSELIGHTING /><FEDISPLACEMENTMAP /><FEDISTANTLIGHT /><FEFLOOD /><FEFUNCA /><FEFUNCB /><FEFUNCG /><FEFUNCR /><FEGAUSSIANBLUR /><FEIMAGE /><FEMERGE /><FEMERGENODE /><FEMORPHOLOGY /><FEOFFSET /><FEPOINTLIGHT /><FESPECULARLIGHTING /><FESPOTLIGHT /><FETILE /><FETURBULENCE /><FOREIGNOBJECT /><GLYPHREF /><LINEARGRADIENT /><RADIALGRADIENT /><TEXTPATH /></SVG>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <svg altGlyph>
-| <svg altGlyphDef>
-| <svg altGlyphItem>
-| <svg animateColor>
-| <svg animateMotion>
-| <svg animateTransform>
-| <svg clipPath>
-| <svg feBlend>
-| <svg feColorMatrix>
-| <svg feComponentTransfer>
-| <svg feComposite>
-| <svg feConvolveMatrix>
-| <svg feDiffuseLighting>
-| <svg feDisplacementMap>
-| <svg feDistantLight>
-| <svg feFlood>
-| <svg feFuncA>
-| <svg feFuncB>
-| <svg feFuncG>
-| <svg feFuncR>
-| <svg feGaussianBlur>
-| <svg feImage>
-| <svg feMerge>
-| <svg feMergeNode>
-| <svg feMorphology>
-| <svg feOffset>
-| <svg fePointLight>
-| <svg feSpecularLighting>
-| <svg feSpotLight>
-| <svg feTile>
-| <svg feTurbulence>
-| <svg foreignObject>
-| <svg glyphRef>
-| <svg linearGradient>
-| <svg radialGradient>
-| <svg textPath>
-
-#data
-<!DOCTYPE html><body><math><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></math>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math altglyph>
-| <math altglyphdef>
-| <math altglyphitem>
-| <math animatecolor>
-| <math animatemotion>
-| <math animatetransform>
-| <math clippath>
-| <math feblend>
-| <math fecolormatrix>
-| <math fecomponenttransfer>
-| <math fecomposite>
-| <math feconvolvematrix>
-| <math fediffuselighting>
-| <math fedisplacementmap>
-| <math fedistantlight>
-| <math feflood>
-| <math fefunca>
-| <math fefuncb>
-| <math fefuncg>
-| <math fefuncr>
-| <math fegaussianblur>
-| <math feimage>
-| <math femerge>
-| <math femergenode>
-| <math femorphology>
-| <math feoffset>
-| <math fepointlight>
-| <math fespecularlighting>
-| <math fespotlight>
-| <math fetile>
-| <math feturbulence>
-| <math foreignobject>
-| <math glyphref>
-| <math lineargradient>
-| <math radialgradient>
-| <math textpath>
-
-#data
-<!DOCTYPE html><body><svg><solidColor /></svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <svg solidcolor>
diff --git a/src/pkg/html/testdata/webkit/tests12.dat b/src/pkg/html/testdata/webkit/tests12.dat
deleted file mode 100644
index 63107d277..000000000
--- a/src/pkg/html/testdata/webkit/tests12.dat
+++ /dev/null
@@ -1,62 +0,0 @@
-#data
-<!DOCTYPE html><body><p>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject><P>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| "foo"
-| <math math>
-| <math mtext>
-| <i>
-| "baz"
-| <math annotation-xml>
-| <svg svg>
-| <svg desc>
-| <b>
-| "eggs"
-| <svg g>
-| <svg foreignObject>
-| <p>
-| "spam"
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <img>
-| <svg g>
-| "quux"
-| "bar"
-
-#data
-<!DOCTYPE html><body>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject><P>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "foo"
-| <math math>
-| <math mtext>
-| <i>
-| "baz"
-| <math annotation-xml>
-| <svg svg>
-| <svg desc>
-| <b>
-| "eggs"
-| <svg g>
-| <svg foreignObject>
-| <p>
-| "spam"
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <img>
-| <svg g>
-| "quux"
-| "bar"
diff --git a/src/pkg/html/testdata/webkit/tests14.dat b/src/pkg/html/testdata/webkit/tests14.dat
deleted file mode 100644
index b8713f885..000000000
--- a/src/pkg/html/testdata/webkit/tests14.dat
+++ /dev/null
@@ -1,74 +0,0 @@
-#data
-<!DOCTYPE html><html><body><xyz:abc></xyz:abc>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <xyz:abc>
-
-#data
-<!DOCTYPE html><html><body><xyz:abc></xyz:abc><span></span>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <xyz:abc>
-| <span>
-
-#data
-<!DOCTYPE html><html><html abc:def=gh><xyz:abc></xyz:abc>
-#errors
-15: Unexpected start tag html
-#document
-| <!DOCTYPE html>
-| <html>
-| abc:def="gh"
-| <head>
-| <body>
-| <xyz:abc>
-
-#data
-<!DOCTYPE html><html xml:lang=bar><html xml:lang=foo>
-#errors
-15: Unexpected start tag html
-#document
-| <!DOCTYPE html>
-| <html>
-| xml:lang="bar"
-| <head>
-| <body>
-
-#data
-<!DOCTYPE html><html 123=456>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| 123="456"
-| <head>
-| <body>
-
-#data
-<!DOCTYPE html><html 123=456><html 789=012>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| 123="456"
-| 789="012"
-| <head>
-| <body>
-
-#data
-<!DOCTYPE html><html><body 789=012>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| 789="012"
diff --git a/src/pkg/html/testdata/webkit/tests15.dat b/src/pkg/html/testdata/webkit/tests15.dat
deleted file mode 100644
index 6ce1c0d16..000000000
--- a/src/pkg/html/testdata/webkit/tests15.dat
+++ /dev/null
@@ -1,208 +0,0 @@
-#data
-<!DOCTYPE html><p><b><i><u></p> <p>X
-#errors
-Line: 1 Col: 31 Unexpected end tag (p). Ignored.
-Line: 1 Col: 36 Expected closing tag. Unexpected end of file.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <b>
-| <i>
-| <u>
-| <b>
-| <i>
-| <u>
-| " "
-| <p>
-| "X"
-
-#data
-<p><b><i><u></p>
-<p>X
-#errors
-Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
-Line: 1 Col: 16 Unexpected end tag (p). Ignored.
-Line: 2 Col: 4 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <p>
-| <b>
-| <i>
-| <u>
-| <b>
-| <i>
-| <u>
-| "
-"
-| <p>
-| "X"
-
-#data
-<!doctype html></html> <head>
-#errors
-Line: 1 Col: 22 Unexpected end tag (html) after the (implied) root element.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| " "
-
-#data
-<!doctype html></body><meta>
-#errors
-Line: 1 Col: 22 Unexpected end tag (body) after the (implied) root element.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <meta>
-
-#data
-<html></html><!-- foo -->
-#errors
-Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
-Line: 1 Col: 13 Unexpected end tag (html) after the (implied) root element.
-#document
-| <html>
-| <head>
-| <body>
-| <!-- foo -->
-
-#data
-<!doctype html></body><title>X</title>
-#errors
-Line: 1 Col: 22 Unexpected end tag (body) after the (implied) root element.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <title>
-| "X"
-
-#data
-<!doctype html><table> X<meta></table>
-#errors
-Line: 1 Col: 24 Unexpected non-space characters in table context caused voodoo mode.
-Line: 1 Col: 30 Unexpected start tag (meta) in table context caused voodoo mode.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| " X"
-| <meta>
-| <table>
-
-#data
-<!doctype html><table> x</table>
-#errors
-Line: 1 Col: 24 Unexpected non-space characters in table context caused voodoo mode.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| " x"
-| <table>
-
-#data
-<!doctype html><table> x </table>
-#errors
-Line: 1 Col: 25 Unexpected non-space characters in table context caused voodoo mode.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| " x "
-| <table>
-
-#data
-<!doctype html><table><tr> x</table>
-#errors
-Line: 1 Col: 28 Unexpected non-space characters in table context caused voodoo mode.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| " x"
-| <table>
-| <tbody>
-| <tr>
-
-#data
-<!doctype html><table>X<style> <tr>x </style> </table>
-#errors
-Line: 1 Col: 23 Unexpected non-space characters in table context caused voodoo mode.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "X"
-| <table>
-| <style>
-| " <tr>x "
-| " "
-
-#data
-<!doctype html><div><table><a>foo</a> <tr><td>bar</td> </tr></table></div>
-#errors
-Line: 1 Col: 30 Unexpected start tag (a) in table context caused voodoo mode.
-Line: 1 Col: 37 Unexpected end tag (a) in table context caused voodoo mode.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <div>
-| <a>
-| "foo"
-| <table>
-| " "
-| <tbody>
-| <tr>
-| <td>
-| "bar"
-| " "
-
-#data
-<frame></frame></frame><frameset><frame><frameset><frame></frameset><noframes></frameset><noframes>
-#errors
-6: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
-13: Stray start tag “frame”.
-21: Stray end tag “frame”.
-29: Stray end tag “frame”.
-39: “frameset” start tag after “body” already open.
-105: End of file seen inside an [R]CDATA element.
-105: End of file seen and there were open elements.
-XXX: These errors are wrong, please fix me!
-#document
-| <html>
-| <head>
-| <frameset>
-| <frame>
-| <frameset>
-| <frame>
-| <noframes>
-| "</frameset><noframes>"
-
-#data
-<!DOCTYPE html><object></html>
-#errors
-1: Expected closing tag. Unexpected end of file
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <object>
diff --git a/src/pkg/html/testdata/webkit/tests16.dat b/src/pkg/html/testdata/webkit/tests16.dat
deleted file mode 100644
index 937dba9f4..000000000
--- a/src/pkg/html/testdata/webkit/tests16.dat
+++ /dev/null
@@ -1,2277 +0,0 @@
-#data
-<!doctype html><script>
-#errors
-Line: 1 Col: 23 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| <body>
-
-#data
-<!doctype html><script>a
-#errors
-Line: 1 Col: 24 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "a"
-| <body>
-
-#data
-<!doctype html><script><
-#errors
-Line: 1 Col: 24 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<"
-| <body>
-
-#data
-<!doctype html><script></
-#errors
-Line: 1 Col: 25 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "</"
-| <body>
-
-#data
-<!doctype html><script></S
-#errors
-Line: 1 Col: 26 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "</S"
-| <body>
-
-#data
-<!doctype html><script></SC
-#errors
-Line: 1 Col: 27 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "</SC"
-| <body>
-
-#data
-<!doctype html><script></SCR
-#errors
-Line: 1 Col: 28 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "</SCR"
-| <body>
-
-#data
-<!doctype html><script></SCRI
-#errors
-Line: 1 Col: 29 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "</SCRI"
-| <body>
-
-#data
-<!doctype html><script></SCRIP
-#errors
-Line: 1 Col: 30 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "</SCRIP"
-| <body>
-
-#data
-<!doctype html><script></SCRIPT
-#errors
-Line: 1 Col: 31 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "</SCRIPT"
-| <body>
-
-#data
-<!doctype html><script></SCRIPT
-#errors
-Line: 1 Col: 32 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| <body>
-
-#data
-<!doctype html><script></s
-#errors
-Line: 1 Col: 26 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "</s"
-| <body>
-
-#data
-<!doctype html><script></sc
-#errors
-Line: 1 Col: 27 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "</sc"
-| <body>
-
-#data
-<!doctype html><script></scr
-#errors
-Line: 1 Col: 28 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "</scr"
-| <body>
-
-#data
-<!doctype html><script></scri
-#errors
-Line: 1 Col: 29 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "</scri"
-| <body>
-
-#data
-<!doctype html><script></scrip
-#errors
-Line: 1 Col: 30 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "</scrip"
-| <body>
-
-#data
-<!doctype html><script></script
-#errors
-Line: 1 Col: 31 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "</script"
-| <body>
-
-#data
-<!doctype html><script></script
-#errors
-Line: 1 Col: 32 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| <body>
-
-#data
-<!doctype html><script><!
-#errors
-Line: 1 Col: 25 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!"
-| <body>
-
-#data
-<!doctype html><script><!a
-#errors
-Line: 1 Col: 26 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!a"
-| <body>
-
-#data
-<!doctype html><script><!-
-#errors
-Line: 1 Col: 26 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!-"
-| <body>
-
-#data
-<!doctype html><script><!-a
-#errors
-Line: 1 Col: 27 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!-a"
-| <body>
-
-#data
-<!doctype html><script><!--
-#errors
-Line: 1 Col: 27 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--"
-| <body>
-
-#data
-<!doctype html><script><!--a
-#errors
-Line: 1 Col: 28 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--a"
-| <body>
-
-#data
-<!doctype html><script><!--<
-#errors
-Line: 1 Col: 28 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<"
-| <body>
-
-#data
-<!doctype html><script><!--<a
-#errors
-Line: 1 Col: 29 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<a"
-| <body>
-
-#data
-<!doctype html><script><!--</
-#errors
-Line: 1 Col: 27 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--</"
-| <body>
-
-#data
-<!doctype html><script><!--</script
-#errors
-Line: 1 Col: 35 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--</script"
-| <body>
-
-#data
-<!doctype html><script><!--</script
-#errors
-Line: 1 Col: 36 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--"
-| <body>
-
-#data
-<!doctype html><script><!--<s
-#errors
-Line: 1 Col: 29 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<s"
-| <body>
-
-#data
-<!doctype html><script><!--<script
-#errors
-Line: 1 Col: 34 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script"
-| <body>
-
-#data
-<!doctype html><script><!--<script
-#errors
-Line: 1 Col: 35 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script "
-| <body>
-
-#data
-<!doctype html><script><!--<script <
-#errors
-Line: 1 Col: 36 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script <"
-| <body>
-
-#data
-<!doctype html><script><!--<script <a
-#errors
-Line: 1 Col: 37 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script <a"
-| <body>
-
-#data
-<!doctype html><script><!--<script </
-#errors
-Line: 1 Col: 37 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script </"
-| <body>
-
-#data
-<!doctype html><script><!--<script </s
-#errors
-Line: 1 Col: 38 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script </s"
-| <body>
-
-#data
-<!doctype html><script><!--<script </script
-#errors
-Line: 1 Col: 43 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script </script"
-| <body>
-
-#data
-<!doctype html><script><!--<script </scripta
-#errors
-Line: 1 Col: 44 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script </scripta"
-| <body>
-
-#data
-<!doctype html><script><!--<script </script
-#errors
-Line: 1 Col: 44 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script </script "
-| <body>
-
-#data
-<!doctype html><script><!--<script </script>
-#errors
-Line: 1 Col: 44 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script </script>"
-| <body>
-
-#data
-<!doctype html><script><!--<script </script/
-#errors
-Line: 1 Col: 44 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script </script/"
-| <body>
-
-#data
-<!doctype html><script><!--<script </script <
-#errors
-Line: 1 Col: 45 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script </script <"
-| <body>
-
-#data
-<!doctype html><script><!--<script </script <a
-#errors
-Line: 1 Col: 46 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script </script <a"
-| <body>
-
-#data
-<!doctype html><script><!--<script </script </
-#errors
-Line: 1 Col: 46 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script </script </"
-| <body>
-
-#data
-<!doctype html><script><!--<script </script </script
-#errors
-Line: 1 Col: 52 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script </script </script"
-| <body>
-
-#data
-<!doctype html><script><!--<script </script </script
-#errors
-Line: 1 Col: 53 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script </script "
-| <body>
-
-#data
-<!doctype html><script><!--<script </script </script/
-#errors
-Line: 1 Col: 53 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script </script "
-| <body>
-
-#data
-<!doctype html><script><!--<script </script </script>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script </script "
-| <body>
-
-#data
-<!doctype html><script><!--<script -
-#errors
-Line: 1 Col: 36 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script -"
-| <body>
-
-#data
-<!doctype html><script><!--<script -a
-#errors
-Line: 1 Col: 37 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script -a"
-| <body>
-
-#data
-<!doctype html><script><!--<script -<
-#errors
-Line: 1 Col: 37 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script -<"
-| <body>
-
-#data
-<!doctype html><script><!--<script --
-#errors
-Line: 1 Col: 37 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script --"
-| <body>
-
-#data
-<!doctype html><script><!--<script --a
-#errors
-Line: 1 Col: 38 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script --a"
-| <body>
-
-#data
-<!doctype html><script><!--<script --<
-#errors
-Line: 1 Col: 38 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script --<"
-| <body>
-
-#data
-<!doctype html><script><!--<script -->
-#errors
-Line: 1 Col: 38 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script -->"
-| <body>
-
-#data
-<!doctype html><script><!--<script --><
-#errors
-Line: 1 Col: 39 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script --><"
-| <body>
-
-#data
-<!doctype html><script><!--<script --></
-#errors
-Line: 1 Col: 40 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script --></"
-| <body>
-
-#data
-<!doctype html><script><!--<script --></script
-#errors
-Line: 1 Col: 46 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script --></script"
-| <body>
-
-#data
-<!doctype html><script><!--<script --></script
-#errors
-Line: 1 Col: 47 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script -->"
-| <body>
-
-#data
-<!doctype html><script><!--<script --></script/
-#errors
-Line: 1 Col: 47 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script -->"
-| <body>
-
-#data
-<!doctype html><script><!--<script --></script>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script -->"
-| <body>
-
-#data
-<!doctype html><script><!--<script><\/script>--></script>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script><\/script>-->"
-| <body>
-
-#data
-<!doctype html><script><!--<script></scr'+'ipt>--></script>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script></scr'+'ipt>-->"
-| <body>
-
-#data
-<!doctype html><script><!--<script></script><script></script></script>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script></script><script></script>"
-| <body>
-
-#data
-<!doctype html><script><!--<script></script><script></script>--><!--</script>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script></script><script></script>--><!--"
-| <body>
-
-#data
-<!doctype html><script><!--<script></script><script></script>-- ></script>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script></script><script></script>-- >"
-| <body>
-
-#data
-<!doctype html><script><!--<script></script><script></script>- -></script>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script></script><script></script>- ->"
-| <body>
-
-#data
-<!doctype html><script><!--<script></script><script></script>- - ></script>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script></script><script></script>- - >"
-| <body>
-
-#data
-<!doctype html><script><!--<script></script><script></script>-></script>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script></script><script></script>->"
-| <body>
-
-#data
-<!doctype html><script><!--<script>--!></script>X
-#errors
-Line: 1 Col: 49 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script>--!></script>X"
-| <body>
-
-#data
-<!doctype html><script><!--<scr'+'ipt></script>--></script>
-#errors
-Line: 1 Col: 59 Unexpected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<scr'+'ipt>"
-| <body>
-| "-->"
-
-#data
-<!doctype html><script><!--<script></scr'+'ipt></script>X
-#errors
-Line: 1 Col: 57 Unexpected end of file. Expected end tag (script).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "<!--<script></scr'+'ipt></script>X"
-| <body>
-
-#data
-<!doctype html><style><!--<style></style>--></style>
-#errors
-Line: 1 Col: 52 Unexpected end tag (style).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <style>
-| "<!--<style>"
-| <body>
-| "-->"
-
-#data
-<!doctype html><style><!--</style>X
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <style>
-| "<!--"
-| <body>
-| "X"
-
-#data
-<!doctype html><style><!--...</style>...--></style>
-#errors
-Line: 1 Col: 51 Unexpected end tag (style).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <style>
-| "<!--..."
-| <body>
-| "...-->"
-
-#data
-<!doctype html><style><!--<br><html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style></style>X
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <style>
-| "<!--<br><html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style>"
-| <body>
-| "X"
-
-#data
-<!doctype html><style><!--...<style><!--...--!></style>--></style>
-#errors
-Line: 1 Col: 66 Unexpected end tag (style).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <style>
-| "<!--...<style><!--...--!>"
-| <body>
-| "-->"
-
-#data
-<!doctype html><style><!--...</style><!-- --><style>@import ...</style>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <style>
-| "<!--..."
-| <!-- -->
-| <style>
-| "@import ..."
-| <body>
-
-#data
-<!doctype html><style>...<style><!--...</style><!-- --></style>
-#errors
-Line: 1 Col: 63 Unexpected end tag (style).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <style>
-| "...<style><!--..."
-| <!-- -->
-| <body>
-
-#data
-<!doctype html><style>...<!--[if IE]><style>...</style>X
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <style>
-| "...<!--[if IE]><style>..."
-| <body>
-| "X"
-
-#data
-<!doctype html><title><!--<title></title>--></title>
-#errors
-Line: 1 Col: 52 Unexpected end tag (title).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <title>
-| "<!--<title>"
-| <body>
-| "-->"
-
-#data
-<!doctype html><title>&lt;/title></title>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <title>
-| "</title>"
-| <body>
-
-#data
-<!doctype html><title>foo/title><link></head><body>X
-#errors
-Line: 1 Col: 52 Unexpected end of file. Expected end tag (title).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <title>
-| "foo/title><link></head><body>X"
-| <body>
-
-#data
-<!doctype html><noscript><!--<noscript></noscript>--></noscript>
-#errors
-Line: 1 Col: 64 Unexpected end tag (noscript).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <noscript>
-| "<!--<noscript>"
-| <body>
-| "-->"
-
-#data
-<!doctype html><noscript><!--</noscript>X<noscript>--></noscript>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <noscript>
-| "<!--"
-| <body>
-| "X"
-| <noscript>
-| "-->"
-
-#data
-<!doctype html><noscript><iframe></noscript>X
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <noscript>
-| "<iframe>"
-| <body>
-| "X"
-
-#data
-<!doctype html><noframes><!--<noframes></noframes>--></noframes>
-#errors
-Line: 1 Col: 64 Unexpected end tag (noframes).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <noframes>
-| "<!--<noframes>"
-| <body>
-| "-->"
-
-#data
-<!doctype html><noframes><body><script><!--...</script></body></noframes></html>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <noframes>
-| "<body><script><!--...</script></body>"
-| <body>
-
-#data
-<!doctype html><textarea><!--<textarea></textarea>--></textarea>
-#errors
-Line: 1 Col: 64 Unexpected end tag (textarea).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <textarea>
-| "<!--<textarea>"
-| "-->"
-
-#data
-<!doctype html><textarea>&lt;/textarea></textarea>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <textarea>
-| "</textarea>"
-
-#data
-<!doctype html><iframe><!--<iframe></iframe>--></iframe>
-#errors
-Line: 1 Col: 56 Unexpected end tag (iframe).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <iframe>
-| "<!--<iframe>"
-| "-->"
-
-#data
-<!doctype html><iframe>...<!--X->...<!--/X->...</iframe>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <iframe>
-| "...<!--X->...<!--/X->..."
-
-#data
-<!doctype html><xmp><!--<xmp></xmp>--></xmp>
-#errors
-Line: 1 Col: 44 Unexpected end tag (xmp).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <xmp>
-| "<!--<xmp>"
-| "-->"
-
-#data
-<!doctype html><noembed><!--<noembed></noembed>--></noembed>
-#errors
-Line: 1 Col: 60 Unexpected end tag (noembed).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <noembed>
-| "<!--<noembed>"
-| "-->"
-
-#data
-<script>
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 8 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| <body>
-
-#data
-<script>a
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 9 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "a"
-| <body>
-
-#data
-<script><
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 9 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<"
-| <body>
-
-#data
-<script></
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 10 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "</"
-| <body>
-
-#data
-<script></S
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 11 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "</S"
-| <body>
-
-#data
-<script></SC
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 12 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "</SC"
-| <body>
-
-#data
-<script></SCR
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 13 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "</SCR"
-| <body>
-
-#data
-<script></SCRI
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 14 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "</SCRI"
-| <body>
-
-#data
-<script></SCRIP
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 15 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "</SCRIP"
-| <body>
-
-#data
-<script></SCRIPT
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 16 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "</SCRIPT"
-| <body>
-
-#data
-<script></SCRIPT
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 17 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| <body>
-
-#data
-<script></s
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 11 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "</s"
-| <body>
-
-#data
-<script></sc
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 12 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "</sc"
-| <body>
-
-#data
-<script></scr
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 13 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "</scr"
-| <body>
-
-#data
-<script></scri
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 14 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "</scri"
-| <body>
-
-#data
-<script></scrip
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 15 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "</scrip"
-| <body>
-
-#data
-<script></script
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 16 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "</script"
-| <body>
-
-#data
-<script></script
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 17 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| <body>
-
-#data
-<script><!
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 10 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!"
-| <body>
-
-#data
-<script><!a
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 11 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!a"
-| <body>
-
-#data
-<script><!-
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 11 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!-"
-| <body>
-
-#data
-<script><!-a
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 12 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!-a"
-| <body>
-
-#data
-<script><!--
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 12 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--"
-| <body>
-
-#data
-<script><!--a
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 13 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--a"
-| <body>
-
-#data
-<script><!--<
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 13 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<"
-| <body>
-
-#data
-<script><!--<a
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 14 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<a"
-| <body>
-
-#data
-<script><!--</
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 14 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--</"
-| <body>
-
-#data
-<script><!--</script
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 20 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--</script"
-| <body>
-
-#data
-<script><!--</script
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 21 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--"
-| <body>
-
-#data
-<script><!--<s
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 14 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<s"
-| <body>
-
-#data
-<script><!--<script
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 19 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script"
-| <body>
-
-#data
-<script><!--<script
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 20 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script "
-| <body>
-
-#data
-<script><!--<script <
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 21 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script <"
-| <body>
-
-#data
-<script><!--<script <a
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 22 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script <a"
-| <body>
-
-#data
-<script><!--<script </
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 22 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script </"
-| <body>
-
-#data
-<script><!--<script </s
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 23 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script </s"
-| <body>
-
-#data
-<script><!--<script </script
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 28 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script </script"
-| <body>
-
-#data
-<script><!--<script </scripta
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 29 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script </scripta"
-| <body>
-
-#data
-<script><!--<script </script
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 29 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script </script "
-| <body>
-
-#data
-<script><!--<script </script>
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 29 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script </script>"
-| <body>
-
-#data
-<script><!--<script </script/
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 29 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script </script/"
-| <body>
-
-#data
-<script><!--<script </script <
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 30 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script </script <"
-| <body>
-
-#data
-<script><!--<script </script <a
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 31 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script </script <a"
-| <body>
-
-#data
-<script><!--<script </script </
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 31 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script </script </"
-| <body>
-
-#data
-<script><!--<script </script </script
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 38 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script </script </script"
-| <body>
-
-#data
-<script><!--<script </script </script
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 38 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script </script "
-| <body>
-
-#data
-<script><!--<script </script </script/
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 38 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script </script "
-| <body>
-
-#data
-<script><!--<script </script </script>
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script </script "
-| <body>
-
-#data
-<script><!--<script -
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 21 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script -"
-| <body>
-
-#data
-<script><!--<script -a
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 22 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script -a"
-| <body>
-
-#data
-<script><!--<script --
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 22 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script --"
-| <body>
-
-#data
-<script><!--<script --a
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 23 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script --a"
-| <body>
-
-#data
-<script><!--<script -->
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 23 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script -->"
-| <body>
-
-#data
-<script><!--<script --><
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 24 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script --><"
-| <body>
-
-#data
-<script><!--<script --></
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 25 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script --></"
-| <body>
-
-#data
-<script><!--<script --></script
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 31 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script --></script"
-| <body>
-
-#data
-<script><!--<script --></script
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 32 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script -->"
-| <body>
-
-#data
-<script><!--<script --></script/
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 32 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script -->"
-| <body>
-
-#data
-<script><!--<script --></script>
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script -->"
-| <body>
-
-#data
-<script><!--<script><\/script>--></script>
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script><\/script>-->"
-| <body>
-
-#data
-<script><!--<script></scr'+'ipt>--></script>
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script></scr'+'ipt>-->"
-| <body>
-
-#data
-<script><!--<script></script><script></script></script>
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script></script><script></script>"
-| <body>
-
-#data
-<script><!--<script></script><script></script>--><!--</script>
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script></script><script></script>--><!--"
-| <body>
-
-#data
-<script><!--<script></script><script></script>-- ></script>
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script></script><script></script>-- >"
-| <body>
-
-#data
-<script><!--<script></script><script></script>- -></script>
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script></script><script></script>- ->"
-| <body>
-
-#data
-<script><!--<script></script><script></script>- - ></script>
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script></script><script></script>- - >"
-| <body>
-
-#data
-<script><!--<script></script><script></script>-></script>
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script></script><script></script>->"
-| <body>
-
-#data
-<script><!--<script>--!></script>X
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 34 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script>--!></script>X"
-| <body>
-
-#data
-<script><!--<scr'+'ipt></script>--></script>
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 44 Unexpected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<scr'+'ipt>"
-| <body>
-| "-->"
-
-#data
-<script><!--<script></scr'+'ipt></script>X
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 42 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "<!--<script></scr'+'ipt></script>X"
-| <body>
-
-#data
-<style><!--<style></style>--></style>
-#errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
-Line: 1 Col: 37 Unexpected end tag (style).
-#document
-| <html>
-| <head>
-| <style>
-| "<!--<style>"
-| <body>
-| "-->"
-
-#data
-<style><!--</style>X
-#errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <style>
-| "<!--"
-| <body>
-| "X"
-
-#data
-<style><!--...</style>...--></style>
-#errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
-Line: 1 Col: 36 Unexpected end tag (style).
-#document
-| <html>
-| <head>
-| <style>
-| "<!--..."
-| <body>
-| "...-->"
-
-#data
-<style><!--<br><html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style></style>X
-#errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <style>
-| "<!--<br><html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style>"
-| <body>
-| "X"
-
-#data
-<style><!--...<style><!--...--!></style>--></style>
-#errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
-Line: 1 Col: 51 Unexpected end tag (style).
-#document
-| <html>
-| <head>
-| <style>
-| "<!--...<style><!--...--!>"
-| <body>
-| "-->"
-
-#data
-<style><!--...</style><!-- --><style>@import ...</style>
-#errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <style>
-| "<!--..."
-| <!-- -->
-| <style>
-| "@import ..."
-| <body>
-
-#data
-<style>...<style><!--...</style><!-- --></style>
-#errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
-Line: 1 Col: 48 Unexpected end tag (style).
-#document
-| <html>
-| <head>
-| <style>
-| "...<style><!--..."
-| <!-- -->
-| <body>
-
-#data
-<style>...<!--[if IE]><style>...</style>X
-#errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <style>
-| "...<!--[if IE]><style>..."
-| <body>
-| "X"
-
-#data
-<title><!--<title></title>--></title>
-#errors
-Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
-Line: 1 Col: 37 Unexpected end tag (title).
-#document
-| <html>
-| <head>
-| <title>
-| "<!--<title>"
-| <body>
-| "-->"
-
-#data
-<title>&lt;/title></title>
-#errors
-Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <title>
-| "</title>"
-| <body>
-
-#data
-<title>foo/title><link></head><body>X
-#errors
-Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
-Line: 1 Col: 37 Unexpected end of file. Expected end tag (title).
-#document
-| <html>
-| <head>
-| <title>
-| "foo/title><link></head><body>X"
-| <body>
-
-#data
-<noscript><!--<noscript></noscript>--></noscript>
-#errors
-Line: 1 Col: 10 Unexpected start tag (noscript). Expected DOCTYPE.
-Line: 1 Col: 49 Unexpected end tag (noscript).
-#document
-| <html>
-| <head>
-| <noscript>
-| "<!--<noscript>"
-| <body>
-| "-->"
-
-#data
-<noscript><!--</noscript>X<noscript>--></noscript>
-#errors
-Line: 1 Col: 10 Unexpected start tag (noscript). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <noscript>
-| "<!--"
-| <body>
-| "X"
-| <noscript>
-| "-->"
-
-#data
-<noscript><iframe></noscript>X
-#errors
-Line: 1 Col: 10 Unexpected start tag (noscript). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <noscript>
-| "<iframe>"
-| <body>
-| "X"
-
-#data
-<noframes><!--<noframes></noframes>--></noframes>
-#errors
-Line: 1 Col: 10 Unexpected start tag (noframes). Expected DOCTYPE.
-Line: 1 Col: 49 Unexpected end tag (noframes).
-#document
-| <html>
-| <head>
-| <noframes>
-| "<!--<noframes>"
-| <body>
-| "-->"
-
-#data
-<noframes><body><script><!--...</script></body></noframes></html>
-#errors
-Line: 1 Col: 10 Unexpected start tag (noframes). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <noframes>
-| "<body><script><!--...</script></body>"
-| <body>
-
-#data
-<textarea><!--<textarea></textarea>--></textarea>
-#errors
-Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
-Line: 1 Col: 49 Unexpected end tag (textarea).
-#document
-| <html>
-| <head>
-| <body>
-| <textarea>
-| "<!--<textarea>"
-| "-->"
-
-#data
-<textarea>&lt;/textarea></textarea>
-#errors
-Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| <textarea>
-| "</textarea>"
-
-#data
-<iframe><!--<iframe></iframe>--></iframe>
-#errors
-Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
-Line: 1 Col: 41 Unexpected end tag (iframe).
-#document
-| <html>
-| <head>
-| <body>
-| <iframe>
-| "<!--<iframe>"
-| "-->"
-
-#data
-<iframe>...<!--X->...<!--/X->...</iframe>
-#errors
-Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| <iframe>
-| "...<!--X->...<!--/X->..."
-
-#data
-<xmp><!--<xmp></xmp>--></xmp>
-#errors
-Line: 1 Col: 5 Unexpected start tag (xmp). Expected DOCTYPE.
-Line: 1 Col: 29 Unexpected end tag (xmp).
-#document
-| <html>
-| <head>
-| <body>
-| <xmp>
-| "<!--<xmp>"
-| "-->"
-
-#data
-<noembed><!--<noembed></noembed>--></noembed>
-#errors
-Line: 1 Col: 9 Unexpected start tag (noembed). Expected DOCTYPE.
-Line: 1 Col: 45 Unexpected end tag (noembed).
-#document
-| <html>
-| <head>
-| <body>
-| <noembed>
-| "<!--<noembed>"
-| "-->"
-
-#data
-<!doctype html><table>
-
-#errors
-Line 2 Col 0 Unexpected end of file. Expected table content.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| "
-"
-
-#data
-<!doctype html><table><td><span><font></span><span>
-#errors
-Line 1 Col 26 Unexpected table cell start tag (td) in the table body phase.
-Line 1 Col 45 Unexpected end tag (span).
-Line 1 Col 51 Expected closing tag. Unexpected end of file.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <span>
-| <font>
-| <font>
-| <span>
-
-#data
-<!doctype html><form><table></form><form></table></form>
-#errors
-35: Stray end tag “form”.
-41: Start tag “form” seen in “table”.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <form>
-| <table>
-| <form>
diff --git a/src/pkg/html/testdata/webkit/tests17.dat b/src/pkg/html/testdata/webkit/tests17.dat
deleted file mode 100644
index 7b555f888..000000000
--- a/src/pkg/html/testdata/webkit/tests17.dat
+++ /dev/null
@@ -1,153 +0,0 @@
-#data
-<!doctype html><table><tbody><select><tr>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-| <table>
-| <tbody>
-| <tr>
-
-#data
-<!doctype html><table><tr><select><td>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-
-#data
-<!doctype html><table><tr><td><select><td>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <select>
-| <td>
-
-#data
-<!doctype html><table><tr><th><select><td>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <th>
-| <select>
-| <td>
-
-#data
-<!doctype html><table><caption><select><tr>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <caption>
-| <select>
-| <tbody>
-| <tr>
-
-#data
-<!doctype html><select><tr>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-
-#data
-<!doctype html><select><td>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-
-#data
-<!doctype html><select><th>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-
-#data
-<!doctype html><select><tbody>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-
-#data
-<!doctype html><select><thead>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-
-#data
-<!doctype html><select><tfoot>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-
-#data
-<!doctype html><select><caption>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-
-#data
-<!doctype html><table><tr></table>a
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| "a"
diff --git a/src/pkg/html/testdata/webkit/tests18.dat b/src/pkg/html/testdata/webkit/tests18.dat
deleted file mode 100644
index 680e1f068..000000000
--- a/src/pkg/html/testdata/webkit/tests18.dat
+++ /dev/null
@@ -1,269 +0,0 @@
-#data
-<!doctype html><plaintext></plaintext>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <plaintext>
-| "</plaintext>"
-
-#data
-<!doctype html><table><plaintext></plaintext>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <plaintext>
-| "</plaintext>"
-| <table>
-
-#data
-<!doctype html><table><tbody><plaintext></plaintext>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <plaintext>
-| "</plaintext>"
-| <table>
-| <tbody>
-
-#data
-<!doctype html><table><tbody><tr><plaintext></plaintext>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <plaintext>
-| "</plaintext>"
-| <table>
-| <tbody>
-| <tr>
-
-#data
-<!doctype html><table><tbody><tr><plaintext></plaintext>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <plaintext>
-| "</plaintext>"
-| <table>
-| <tbody>
-| <tr>
-
-#data
-<!doctype html><table><td><plaintext></plaintext>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <plaintext>
-| "</plaintext>"
-
-#data
-<!doctype html><table><caption><plaintext></plaintext>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <caption>
-| <plaintext>
-| "</plaintext>"
-
-#data
-<!doctype html><table><tr><style></script></style>abc
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "abc"
-| <table>
-| <tbody>
-| <tr>
-| <style>
-| "</script>"
-
-#data
-<!doctype html><table><tr><script></style></script>abc
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "abc"
-| <table>
-| <tbody>
-| <tr>
-| <script>
-| "</style>"
-
-#data
-<!doctype html><table><caption><style></script></style>abc
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <caption>
-| <style>
-| "</script>"
-| "abc"
-
-#data
-<!doctype html><table><td><style></script></style>abc
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <style>
-| "</script>"
-| "abc"
-
-#data
-<!doctype html><select><script></style></script>abc
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-| <script>
-| "</style>"
-| "abc"
-
-#data
-<!doctype html><table><select><script></style></script>abc
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-| <script>
-| "</style>"
-| "abc"
-| <table>
-
-#data
-<!doctype html><table><tr><select><script></style></script>abc
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-| <script>
-| "</style>"
-| "abc"
-| <table>
-| <tbody>
-| <tr>
-
-#data
-<!doctype html><frameset></frameset><noframes>abc
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <frameset>
-| <noframes>
-| "abc"
-
-#data
-<!doctype html><frameset></frameset><noframes>abc</noframes><!--abc-->
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <frameset>
-| <noframes>
-| "abc"
-| <!-- abc -->
-
-#data
-<!doctype html><frameset></frameset></html><noframes>abc
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <frameset>
-| <noframes>
-| "abc"
-
-#data
-<!doctype html><frameset></frameset></html><noframes>abc</noframes><!--abc-->
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <frameset>
-| <noframes>
-| "abc"
-| <!-- abc -->
-
-#data
-<!doctype html><table><tr></tbody><tfoot>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <tfoot>
-
-#data
-<!doctype html><table><td><svg></svg>abc<td>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <svg svg>
-| "abc"
-| <td>
diff --git a/src/pkg/html/testdata/webkit/tests19.dat b/src/pkg/html/testdata/webkit/tests19.dat
deleted file mode 100644
index 06222f5b9..000000000
--- a/src/pkg/html/testdata/webkit/tests19.dat
+++ /dev/null
@@ -1,1220 +0,0 @@
-#data
-<!doctype html><math><mn DefinitionUrl="foo">
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math mn>
-| definitionURL="foo"
-
-#data
-<!doctype html><html></p><!--foo-->
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <!-- foo -->
-| <head>
-| <body>
-
-#data
-<!doctype html><head></head></p><!--foo-->
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <!-- foo -->
-| <body>
-
-#data
-<!doctype html><body><p><pre>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <pre>
-
-#data
-<!doctype html><body><p><listing>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <listing>
-
-#data
-<!doctype html><p><plaintext>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <plaintext>
-
-#data
-<!doctype html><p><h1>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <h1>
-
-#data
-<!doctype html><form><isindex>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <form>
-
-#data
-<!doctype html><isindex action="POST">
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <form>
-| action="POST"
-| <hr>
-| <label>
-| "This is a searchable index. Enter search keywords: "
-| <input>
-| name="isindex"
-| <hr>
-
-#data
-<!doctype html><isindex prompt="this is isindex">
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <form>
-| <hr>
-| <label>
-| "this is isindex"
-| <input>
-| name="isindex"
-| <hr>
-
-#data
-<!doctype html><isindex type="hidden">
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <form>
-| <hr>
-| <label>
-| "This is a searchable index. Enter search keywords: "
-| <input>
-| name="isindex"
-| type="hidden"
-| <hr>
-
-#data
-<!doctype html><isindex name="foo">
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <form>
-| <hr>
-| <label>
-| "This is a searchable index. Enter search keywords: "
-| <input>
-| name="isindex"
-| <hr>
-
-#data
-<!doctype html><ruby><p><rp>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <ruby>
-| <p>
-| <rp>
-
-#data
-<!doctype html><ruby><div><span><rp>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <ruby>
-| <div>
-| <span>
-| <rp>
-
-#data
-<!doctype html><ruby><div><p><rp>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <ruby>
-| <div>
-| <p>
-| <rp>
-
-#data
-<!doctype html><ruby><p><rt>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <ruby>
-| <p>
-| <rt>
-
-#data
-<!doctype html><ruby><div><span><rt>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <ruby>
-| <div>
-| <span>
-| <rt>
-
-#data
-<!doctype html><ruby><div><p><rt>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <ruby>
-| <div>
-| <p>
-| <rt>
-
-#data
-<!doctype html><math/><foo>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <math math>
-| <foo>
-
-#data
-<!doctype html><svg/><foo>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <foo>
-
-#data
-<!doctype html><div></body><!--foo-->
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <div>
-| <!-- foo -->
-
-#data
-<!doctype html><h1><div><h3><span></h1>foo
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <h1>
-| <div>
-| <h3>
-| <span>
-| "foo"
-
-#data
-<!doctype html><p></h3>foo
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| "foo"
-
-#data
-<!doctype html><h3><li>abc</h2>foo
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <h3>
-| <li>
-| "abc"
-| "foo"
-
-#data
-<!doctype html><table>abc<!--foo-->
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "abc"
-| <table>
-| <!-- foo -->
-
-#data
-<!doctype html><table> <!--foo-->
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| " "
-| <!-- foo -->
-
-#data
-<!doctype html><table> b <!--foo-->
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| " b "
-| <table>
-| <!-- foo -->
-
-#data
-<!doctype html><select><option><option>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-| <option>
-| <option>
-
-#data
-<!doctype html><select><option></optgroup>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-| <option>
-
-#data
-<!doctype html><select><option></optgroup>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-| <option>
-
-#data
-<!doctype html><p><math><mi><p><h1>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <math math>
-| <math mi>
-| <p>
-| <h1>
-
-#data
-<!doctype html><p><math><mo><p><h1>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <math math>
-| <math mo>
-| <p>
-| <h1>
-
-#data
-<!doctype html><p><math><mn><p><h1>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <math math>
-| <math mn>
-| <p>
-| <h1>
-
-#data
-<!doctype html><p><math><ms><p><h1>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <math math>
-| <math ms>
-| <p>
-| <h1>
-
-#data
-<!doctype html><p><math><mtext><p><h1>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <math math>
-| <math mtext>
-| <p>
-| <h1>
-
-#data
-<!doctype html><frameset></noframes>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <frameset>
-
-#data
-<!doctype html><html c=d><body></html><html a=b>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| a="b"
-| c="d"
-| <head>
-| <body>
-
-#data
-<!doctype html><html c=d><frameset></frameset></html><html a=b>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| a="b"
-| c="d"
-| <head>
-| <frameset>
-
-#data
-<!doctype html><html><frameset></frameset></html><!--foo-->
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <frameset>
-| <!-- foo -->
-
-#data
-<!doctype html><html><frameset></frameset></html>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <frameset>
-| " "
-
-#data
-<!doctype html><html><frameset></frameset></html>abc
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <frameset>
-
-#data
-<!doctype html><html><frameset></frameset></html><p>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <frameset>
-
-#data
-<!doctype html><html><frameset></frameset></html></p>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <frameset>
-
-#data
-<html><frameset></frameset></html><!doctype html>
-#errors
-#document
-| <html>
-| <head>
-| <frameset>
-
-#data
-<!doctype html><body><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-
-#data
-<!doctype html><p><frameset><frame>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <frameset>
-| <frame>
-
-#data
-<!doctype html><p>a<frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| "a"
-
-#data
-<!doctype html><p> <frameset><frame>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <frameset>
-| <frame>
-
-#data
-<!doctype html><pre><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <pre>
-
-#data
-<!doctype html><listing><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <listing>
-
-#data
-<!doctype html><li><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <li>
-
-#data
-<!doctype html><dd><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <dd>
-
-#data
-<!doctype html><dt><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <dt>
-
-#data
-<!doctype html><button><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <button>
-
-#data
-<!doctype html><applet><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <applet>
-
-#data
-<!doctype html><marquee><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <marquee>
-
-#data
-<!doctype html><object><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <object>
-
-#data
-<!doctype html><table><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-
-#data
-<!doctype html><area><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <area>
-
-#data
-<!doctype html><basefont><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <basefont>
-| <frameset>
-
-#data
-<!doctype html><bgsound><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <bgsound>
-| <frameset>
-
-#data
-<!doctype html><br><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <br>
-
-#data
-<!doctype html><embed><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <embed>
-
-#data
-<!doctype html><img><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <img>
-
-#data
-<!doctype html><input><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <input>
-
-#data
-<!doctype html><keygen><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <keygen>
-
-#data
-<!doctype html><wbr><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <wbr>
-
-#data
-<!doctype html><hr><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <hr>
-
-#data
-<!doctype html><textarea></textarea><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <textarea>
-
-#data
-<!doctype html><xmp></xmp><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <xmp>
-
-#data
-<!doctype html><iframe></iframe><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <iframe>
-
-#data
-<!doctype html><select></select><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-
-#data
-<!doctype html><svg></svg><frameset><frame>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <frameset>
-| <frame>
-
-#data
-<!doctype html><math></math><frameset><frame>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <frameset>
-| <frame>
-
-#data
-<!doctype html><svg><foreignObject><div> <frameset><frame>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <frameset>
-| <frame>
-
-#data
-<!doctype html><svg>a</svg><frameset><frame>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| "a"
-
-#data
-<!doctype html><svg> </svg><frameset><frame>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <frameset>
-| <frame>
-
-#data
-<html>aaa<frameset></frameset>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "aaa"
-
-#data
-<html> a <frameset></frameset>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "a "
-
-#data
-<!doctype html><div><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <frameset>
-
-#data
-<!doctype html><div><body><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <div>
-
-#data
-<!doctype html><p><math></p>a
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <math math>
-| "a"
-
-#data
-<!doctype html><p><math><mn><span></p>a
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <math math>
-| <math mn>
-| <span>
-| <p>
-| "a"
-
-#data
-<!doctype html><math></html>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <math math>
-
-#data
-<!doctype html><meta charset="ascii">
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <meta>
-| charset="ascii"
-| <body>
-
-#data
-<!doctype html><meta http-equiv="content-type" content="text/html;charset=ascii">
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <meta>
-| content="text/html;charset=ascii"
-| http-equiv="content-type"
-| <body>
-
-#data
-<!doctype html><head><!--aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa--><meta charset="utf8">
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <!-- aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -->
-| <meta>
-| charset="utf8"
-| <body>
-
-#data
-<!doctype html><html a=b><head></head><html c=d>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| a="b"
-| c="d"
-| <head>
-| <body>
-
-#data
-<!doctype html><image/>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <img>
-
-#data
-<!doctype html>a<i>b<table>c<b>d</i>e</b>f
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "a"
-| <i>
-| "bc"
-| <b>
-| "de"
-| "f"
-| <table>
-
-#data
-<!doctype html><table><i>a<b>b<div>c<a>d</i>e</b>f
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <i>
-| "a"
-| <b>
-| "b"
-| <b>
-| <div>
-| <b>
-| <i>
-| "c"
-| <a>
-| "d"
-| <a>
-| "e"
-| <a>
-| "f"
-| <table>
-
-#data
-<!doctype html><i>a<b>b<div>c<a>d</i>e</b>f
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <i>
-| "a"
-| <b>
-| "b"
-| <b>
-| <div>
-| <b>
-| <i>
-| "c"
-| <a>
-| "d"
-| <a>
-| "e"
-| <a>
-| "f"
-
-#data
-<!doctype html><table><i>a<b>b<div>c</i>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <i>
-| "a"
-| <b>
-| "b"
-| <b>
-| <div>
-| <i>
-| "c"
-| <table>
-
-#data
-<!doctype html><table><i>a<b>b<div>c<a>d</i>e</b>f
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <i>
-| "a"
-| <b>
-| "b"
-| <b>
-| <div>
-| <b>
-| <i>
-| "c"
-| <a>
-| "d"
-| <a>
-| "e"
-| <a>
-| "f"
-| <table>
-
-#data
-<!doctype html><table><i>a<div>b<tr>c<b>d</i>e
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <i>
-| "a"
-| <div>
-| "b"
-| <i>
-| "c"
-| <b>
-| "d"
-| <b>
-| "e"
-| <table>
-| <tbody>
-| <tr>
-
-#data
-<!doctype html><table><td><table><i>a<div>b<b>c</i>d
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <i>
-| "a"
-| <div>
-| <i>
-| "b"
-| <b>
-| "c"
-| <b>
-| "d"
-| <table>
-
-#data
-<!doctype html><body><bgsound>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <bgsound>
-
-#data
-<!doctype html><body><basefont>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <basefont>
-
-#data
-<!doctype html><a><b></a><basefont>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <a>
-| <b>
-| <basefont>
-
-#data
-<!doctype html><a><b></a><bgsound>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <a>
-| <b>
-| <bgsound>
-
-#data
-<!doctype html><figcaption><article></figcaption>a
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <figcaption>
-| <article>
-| "a"
-
-#data
-<!doctype html><summary><article></summary>a
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <summary>
-| <article>
-| "a"
-
-#data
-<!doctype html><p><a><plaintext>b
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <a>
-| <plaintext>
-| <a>
-| "b"
diff --git a/src/pkg/html/testdata/webkit/tests2.dat b/src/pkg/html/testdata/webkit/tests2.dat
deleted file mode 100644
index 60d859221..000000000
--- a/src/pkg/html/testdata/webkit/tests2.dat
+++ /dev/null
@@ -1,763 +0,0 @@
-#data
-<!DOCTYPE html>Test
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "Test"
-
-#data
-<textarea>test</div>test
-#errors
-Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
-Line: 1 Col: 24 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <textarea>
-| "test</div>test"
-
-#data
-<table><td>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 11 Unexpected table cell start tag (td) in the table body phase.
-Line: 1 Col: 11 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-
-#data
-<table><td>test</tbody></table>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 11 Unexpected table cell start tag (td) in the table body phase.
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| "test"
-
-#data
-<frame>test
-#errors
-Line: 1 Col: 7 Unexpected start tag (frame). Expected DOCTYPE.
-Line: 1 Col: 7 Unexpected start tag frame. Ignored.
-#document
-| <html>
-| <head>
-| <body>
-| "test"
-
-#data
-<!DOCTYPE html><frameset>test
-#errors
-Line: 1 Col: 29 Unepxected characters in the frameset phase. Characters ignored.
-Line: 1 Col: 29 Expected closing tag. Unexpected end of file.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <frameset>
-
-#data
-<!DOCTYPE html><frameset><!DOCTYPE html>
-#errors
-Line: 1 Col: 40 Unexpected DOCTYPE. Ignored.
-Line: 1 Col: 40 Expected closing tag. Unexpected end of file.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <frameset>
-
-#data
-<!DOCTYPE html><font><p><b>test</font>
-#errors
-Line: 1 Col: 38 End tag (font) violates step 1, paragraph 3 of the adoption agency algorithm.
-Line: 1 Col: 38 End tag (font) violates step 1, paragraph 3 of the adoption agency algorithm.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <font>
-| <p>
-| <font>
-| <b>
-| "test"
-
-#data
-<!DOCTYPE html><dt><div><dd>
-#errors
-Line: 1 Col: 28 Missing end tag (div, dt).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <dt>
-| <div>
-| <dd>
-
-#data
-<script></x
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-Line: 1 Col: 11 Unexpected end of file. Expected end tag (script).
-#document
-| <html>
-| <head>
-| <script>
-| "</x"
-| <body>
-
-#data
-<table><plaintext><td>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 18 Unexpected start tag (plaintext) in table context caused voodoo mode.
-Line: 1 Col: 22 Unexpected end of file. Expected table content.
-#document
-| <html>
-| <head>
-| <body>
-| <plaintext>
-| "<td>"
-| <table>
-
-#data
-<plaintext></plaintext>
-#errors
-Line: 1 Col: 11 Unexpected start tag (plaintext). Expected DOCTYPE.
-Line: 1 Col: 23 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <plaintext>
-| "</plaintext>"
-
-#data
-<!DOCTYPE html><table><tr>TEST
-#errors
-Line: 1 Col: 30 Unexpected non-space characters in table context caused voodoo mode.
-Line: 1 Col: 30 Unexpected end of file. Expected table content.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "TEST"
-| <table>
-| <tbody>
-| <tr>
-
-#data
-<!DOCTYPE html><body t1=1><body t2=2><body t3=3 t4=4>
-#errors
-Line: 1 Col: 37 Unexpected start tag (body).
-Line: 1 Col: 53 Unexpected start tag (body).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| t1="1"
-| t2="2"
-| t3="3"
-| t4="4"
-
-#data
-</b test
-#errors
-Line: 1 Col: 8 Unexpected end of file in attribute name.
-Line: 1 Col: 8 End tag contains unexpected attributes.
-Line: 1 Col: 8 Unexpected end tag (b). Expected DOCTYPE.
-Line: 1 Col: 8 Unexpected end tag (b) after the (implied) root element.
-#document
-| <html>
-| <head>
-| <body>
-
-#data
-<!DOCTYPE html></b test<b &=&amp>X
-#errors
-Line: 1 Col: 32 Named entity didn't end with ';'.
-Line: 1 Col: 33 End tag contains unexpected attributes.
-Line: 1 Col: 33 Unexpected end tag (b) after the (implied) root element.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "X"
-
-#data
-<!doctypehtml><scrIPt type=text/x-foobar;baz>X</SCRipt
-#errors
-Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
-Line: 1 Col: 54 Unexpected end of file in the tag name.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| type="text/x-foobar;baz"
-| "X</SCRipt"
-| <body>
-
-#data
-&
-#errors
-Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| "&"
-
-#data
-&#
-#errors
-Line: 1 Col: 1 Numeric entity expected. Got end of file instead.
-Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| "&#"
-
-#data
-&#X
-#errors
-Line: 1 Col: 3 Numeric entity expected but none found.
-Line: 1 Col: 3 Unexpected non-space characters. Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| "&#X"
-
-#data
-&#x
-#errors
-Line: 1 Col: 3 Numeric entity expected but none found.
-Line: 1 Col: 3 Unexpected non-space characters. Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| "&#x"
-
-#data
-&#45
-#errors
-Line: 1 Col: 4 Numeric entity didn't end with ';'.
-Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| "-"
-
-#data
-&x-test
-#errors
-Line: 1 Col: 1 Named entity expected. Got none.
-Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| "&x-test"
-
-#data
-<!doctypehtml><p><li>
-#errors
-Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <li>
-
-#data
-<!doctypehtml><p><dt>
-#errors
-Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <dt>
-
-#data
-<!doctypehtml><p><dd>
-#errors
-Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <dd>
-
-#data
-<!doctypehtml><p><form>
-#errors
-Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
-Line: 1 Col: 23 Expected closing tag. Unexpected end of file.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <form>
-
-#data
-<!DOCTYPE html><p></P>X
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| "X"
-
-#data
-&AMP
-#errors
-Line: 1 Col: 4 Named entity didn't end with ';'.
-Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| "&"
-
-#data
-&AMp;
-#errors
-Line: 1 Col: 1 Named entity expected. Got none.
-Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| "&AMp;"
-
-#data
-<!DOCTYPE html><html><head></head><body><thisISasillyTESTelementNameToMakeSureCrazyTagNamesArePARSEDcorrectLY>
-#errors
-Line: 1 Col: 110 Expected closing tag. Unexpected end of file.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <thisisasillytestelementnametomakesurecrazytagnamesareparsedcorrectly>
-
-#data
-<!DOCTYPE html>X</body>X
-#errors
-Line: 1 Col: 24 Unexpected non-space characters in the after body phase.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "XX"
-
-#data
-<!DOCTYPE html><!-- X
-#errors
-Line: 1 Col: 21 Unexpected end of file in comment.
-#document
-| <!DOCTYPE html>
-| <!-- X -->
-| <html>
-| <head>
-| <body>
-
-#data
-<!DOCTYPE html><table><caption>test TEST</caption><td>test
-#errors
-Line: 1 Col: 54 Unexpected table cell start tag (td) in the table body phase.
-Line: 1 Col: 58 Expected closing tag. Unexpected end of file.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <caption>
-| "test TEST"
-| <tbody>
-| <tr>
-| <td>
-| "test"
-
-#data
-<!DOCTYPE html><select><option><optgroup>
-#errors
-Line: 1 Col: 41 Expected closing tag. Unexpected end of file.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-| <option>
-| <optgroup>
-
-#data
-<!DOCTYPE html><select><optgroup><option></optgroup><option><select><option>
-#errors
-Line: 1 Col: 68 Unexpected select start tag in the select phase treated as select end tag.
-Line: 1 Col: 76 Expected closing tag. Unexpected end of file.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-| <optgroup>
-| <option>
-| <option>
-| <option>
-
-#data
-<!DOCTYPE html><select><optgroup><option><optgroup>
-#errors
-Line: 1 Col: 51 Expected closing tag. Unexpected end of file.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-| <optgroup>
-| <option>
-| <optgroup>
-
-#data
-<!DOCTYPE html><datalist><option>foo</datalist>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <datalist>
-| <option>
-| "foo"
-| "bar"
-
-#data
-<!DOCTYPE html><font><input><input></font>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <font>
-| <input>
-| <input>
-
-#data
-<!DOCTYPE html><!-- XXX - XXX -->
-#errors
-#document
-| <!DOCTYPE html>
-| <!-- XXX - XXX -->
-| <html>
-| <head>
-| <body>
-
-#data
-<!DOCTYPE html><!-- XXX - XXX
-#errors
-Line: 1 Col: 29 Unexpected end of file in comment (-)
-#document
-| <!DOCTYPE html>
-| <!-- XXX - XXX -->
-| <html>
-| <head>
-| <body>
-
-#data
-<!DOCTYPE html><!-- XXX - XXX - XXX -->
-#errors
-#document
-| <!DOCTYPE html>
-| <!-- XXX - XXX - XXX -->
-| <html>
-| <head>
-| <body>
-
-#data
-<isindex test=x name=x>
-#errors
-Line: 1 Col: 23 Unexpected start tag (isindex). Expected DOCTYPE.
-Line: 1 Col: 23 Unexpected start tag isindex. Don't use it!
-#document
-| <html>
-| <head>
-| <body>
-| <form>
-| <hr>
-| <label>
-| "This is a searchable index. Enter search keywords: "
-| <input>
-| name="isindex"
-| test="x"
-| <hr>
-
-#data
-test
-test
-#errors
-Line: 2 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| "test
-test"
-
-#data
-<!DOCTYPE html><body><title>test</body></title>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <title>
-| "test</body>"
-
-#data
-<!DOCTYPE html><body><title>X</title><meta name=z><link rel=foo><style>
-x { content:"</style" } </style>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <title>
-| "X"
-| <meta>
-| name="z"
-| <link>
-| rel="foo"
-| <style>
-| "
-x { content:"</style" } "
-
-#data
-<!DOCTYPE html><select><optgroup></optgroup></select>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-| <optgroup>
-
-#data
-
-
-#errors
-Line: 2 Col: 1 Unexpected End of file. Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-
-#data
-<!DOCTYPE html> <html>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-
-#data
-<!DOCTYPE html><script>
-</script> <title>x</title> </head>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <script>
-| "
-"
-| " "
-| <title>
-| "x"
-| " "
-| <body>
-
-#data
-<!DOCTYPE html><html><body><html id=x>
-#errors
-Line: 1 Col: 38 html needs to be the first start tag.
-#document
-| <!DOCTYPE html>
-| <html>
-| id="x"
-| <head>
-| <body>
-
-#data
-<!DOCTYPE html>X</body><html id="x">
-#errors
-Line: 1 Col: 36 Unexpected start tag token (html) in the after body phase.
-Line: 1 Col: 36 html needs to be the first start tag.
-#document
-| <!DOCTYPE html>
-| <html>
-| id="x"
-| <head>
-| <body>
-| "X"
-
-#data
-<!DOCTYPE html><head><html id=x>
-#errors
-Line: 1 Col: 32 html needs to be the first start tag.
-#document
-| <!DOCTYPE html>
-| <html>
-| id="x"
-| <head>
-| <body>
-
-#data
-<!DOCTYPE html>X</html>X
-#errors
-Line: 1 Col: 24 Unexpected non-space characters in the after body phase.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "XX"
-
-#data
-<!DOCTYPE html>X</html>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "X "
-
-#data
-<!DOCTYPE html>X</html><p>X
-#errors
-Line: 1 Col: 26 Unexpected start tag (p).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "X"
-| <p>
-| "X"
-
-#data
-<!DOCTYPE html>X<p/x/y/z>
-#errors
-Line: 1 Col: 19 Expected a > after the /.
-Line: 1 Col: 21 Solidus (/) incorrectly placed in tag.
-Line: 1 Col: 23 Solidus (/) incorrectly placed in tag.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "X"
-| <p>
-| x=""
-| y=""
-| z=""
-
-#data
-<!DOCTYPE html><!--x--
-#errors
-Line: 1 Col: 22 Unexpected end of file in comment (--).
-#document
-| <!DOCTYPE html>
-| <!-- x -->
-| <html>
-| <head>
-| <body>
-
-#data
-<!DOCTYPE html><table><tr><td></p></table>
-#errors
-Line: 1 Col: 34 Unexpected end tag (p). Ignored.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <p>
-
-#data
-<!DOCTYPE <!DOCTYPE HTML>><!--<!--x-->-->
-#errors
-Line: 1 Col: 20 Expected space or '>'. Got ''
-Line: 1 Col: 25 Erroneous DOCTYPE.
-Line: 1 Col: 35 Unexpected character in comment found.
-#document
-| <!DOCTYPE <!doctype>
-| <html>
-| <head>
-| <body>
-| ">"
-| <!-- <!--x -->
-| "-->"
-
-#data
-<!doctype html><div><form></form><div></div></div>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <div>
-| <form>
-| <div>
diff --git a/src/pkg/html/testdata/webkit/tests20.dat b/src/pkg/html/testdata/webkit/tests20.dat
deleted file mode 100644
index 6bd825608..000000000
--- a/src/pkg/html/testdata/webkit/tests20.dat
+++ /dev/null
@@ -1,455 +0,0 @@
-#data
-<!doctype html><p><button><button>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <button>
-| <button>
-
-#data
-<!doctype html><p><button><address>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <button>
-| <address>
-
-#data
-<!doctype html><p><button><blockquote>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <button>
-| <blockquote>
-
-#data
-<!doctype html><p><button><menu>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <button>
-| <menu>
-
-#data
-<!doctype html><p><button><p>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <button>
-| <p>
-
-#data
-<!doctype html><p><button><ul>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <button>
-| <ul>
-
-#data
-<!doctype html><p><button><h1>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <button>
-| <h1>
-
-#data
-<!doctype html><p><button><h6>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <button>
-| <h6>
-
-#data
-<!doctype html><p><button><listing>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <button>
-| <listing>
-
-#data
-<!doctype html><p><button><pre>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <button>
-| <pre>
-
-#data
-<!doctype html><p><button><form>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <button>
-| <form>
-
-#data
-<!doctype html><p><button><li>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <button>
-| <li>
-
-#data
-<!doctype html><p><button><dd>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <button>
-| <dd>
-
-#data
-<!doctype html><p><button><dt>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <button>
-| <dt>
-
-#data
-<!doctype html><p><button><plaintext>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <button>
-| <plaintext>
-
-#data
-<!doctype html><p><button><table>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <button>
-| <table>
-
-#data
-<!doctype html><p><button><hr>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <button>
-| <hr>
-
-#data
-<!doctype html><p><button><xmp>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <button>
-| <xmp>
-
-#data
-<!doctype html><p><button></p>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <button>
-| <p>
-
-#data
-<!doctype html><address><button></address>a
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <address>
-| <button>
-| "a"
-
-#data
-<!doctype html><address><button></address>a
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <address>
-| <button>
-| "a"
-
-#data
-<p><table></p>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <p>
-| <p>
-| <table>
-
-#data
-<!doctype html><svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-
-#data
-<!doctype html><p><figcaption>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <figcaption>
-
-#data
-<!doctype html><p><summary>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <summary>
-
-#data
-<!doctype html><form><table><form>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <form>
-| <table>
-
-#data
-<!doctype html><table><form><form>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <form>
-
-#data
-<!doctype html><table><form></table><form>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <form>
-
-#data
-<!doctype html><svg><foreignObject><p>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <svg foreignObject>
-| <p>
-
-#data
-<!doctype html><svg><title>abc
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <svg title>
-| "abc"
-
-#data
-<option><span><option>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <option>
-| <span>
-| <option>
-
-#data
-<option><option>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <option>
-| <option>
-
-#data
-<math><annotation-xml><div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math annotation-xml>
-| <div>
-
-#data
-<math><annotation-xml encoding="application/svg+xml"><div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math annotation-xml>
-| encoding="application/svg+xml"
-| <div>
-
-#data
-<math><annotation-xml encoding="application/xhtml+xml"><div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math annotation-xml>
-| encoding="application/xhtml+xml"
-| <div>
-
-#data
-<math><annotation-xml encoding="aPPlication/xhtmL+xMl"><div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math annotation-xml>
-| encoding="aPPlication/xhtmL+xMl"
-| <div>
-
-#data
-<math><annotation-xml encoding="text/html"><div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math annotation-xml>
-| encoding="text/html"
-| <div>
-
-#data
-<math><annotation-xml encoding="Text/htmL"><div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math annotation-xml>
-| encoding="Text/htmL"
-| <div>
-
-#data
-<math><annotation-xml encoding=" text/html "><div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math annotation-xml>
-| encoding=" text/html "
-| <div>
diff --git a/src/pkg/html/testdata/webkit/tests21.dat b/src/pkg/html/testdata/webkit/tests21.dat
deleted file mode 100644
index 1260ec03e..000000000
--- a/src/pkg/html/testdata/webkit/tests21.dat
+++ /dev/null
@@ -1,221 +0,0 @@
-#data
-<svg><![CDATA[foo]]>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| "foo"
-
-#data
-<math><![CDATA[foo]]>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
-| "foo"
-
-#data
-<div><![CDATA[foo]]>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| <!-- [CDATA[foo]] -->
-
-#data
-<svg><![CDATA[foo
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| "foo"
-
-#data
-<svg><![CDATA[foo
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| "foo"
-
-#data
-<svg><![CDATA[
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-
-#data
-<svg><![CDATA[]]>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-
-#data
-<svg><![CDATA[]] >]]>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| "]] >"
-
-#data
-<svg><![CDATA[]] >]]>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| "]] >"
-
-#data
-<svg><![CDATA[]]
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| "]]"
-
-#data
-<svg><![CDATA[]
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| "]"
-
-#data
-<svg><![CDATA[]>a
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| "]>a"
-
-#data
-<svg><foreignObject><div><![CDATA[foo]]>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <svg foreignObject>
-| <div>
-| <!-- [CDATA[foo]] -->
-
-#data
-<svg><![CDATA[<svg>]]>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| "<svg>"
-
-#data
-<svg><![CDATA[</svg>a]]>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| "</svg>a"
-
-#data
-<svg><![CDATA[<svg>a
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| "<svg>a"
-
-#data
-<svg><![CDATA[</svg>a
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| "</svg>a"
-
-#data
-<svg><![CDATA[<svg>]]><path>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| "<svg>"
-| <svg path>
-
-#data
-<svg><![CDATA[<svg>]]></path>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| "<svg>"
-
-#data
-<svg><![CDATA[<svg>]]><!--path-->
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| "<svg>"
-| <!-- path -->
-
-#data
-<svg><![CDATA[<svg>]]>path
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| "<svg>path"
-
-#data
-<svg><![CDATA[<!--svg-->]]>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| "<!--svg-->"
diff --git a/src/pkg/html/testdata/webkit/tests22.dat b/src/pkg/html/testdata/webkit/tests22.dat
deleted file mode 100644
index aab27b2e9..000000000
--- a/src/pkg/html/testdata/webkit/tests22.dat
+++ /dev/null
@@ -1,157 +0,0 @@
-#data
-<a><b><big><em><strong><div>X</a>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| <b>
-| <big>
-| <em>
-| <strong>
-| <big>
-| <em>
-| <strong>
-| <div>
-| <a>
-| "X"
-
-#data
-<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8>A</a>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| <b>
-| <b>
-| <div>
-| id="1"
-| <a>
-| <div>
-| id="2"
-| <a>
-| <div>
-| id="3"
-| <a>
-| <div>
-| id="4"
-| <a>
-| <div>
-| id="5"
-| <a>
-| <div>
-| id="6"
-| <a>
-| <div>
-| id="7"
-| <a>
-| <div>
-| id="8"
-| <a>
-| "A"
-
-#data
-<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9>A</a>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| <b>
-| <b>
-| <div>
-| id="1"
-| <a>
-| <div>
-| id="2"
-| <a>
-| <div>
-| id="3"
-| <a>
-| <div>
-| id="4"
-| <a>
-| <div>
-| id="5"
-| <a>
-| <div>
-| id="6"
-| <a>
-| <div>
-| id="7"
-| <a>
-| <div>
-| id="8"
-| <a>
-| <div>
-| id="9"
-| "A"
-
-#data
-<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9><div id=10>A</a>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| <b>
-| <b>
-| <div>
-| id="1"
-| <a>
-| <div>
-| id="2"
-| <a>
-| <div>
-| id="3"
-| <a>
-| <div>
-| id="4"
-| <a>
-| <div>
-| id="5"
-| <a>
-| <div>
-| id="6"
-| <a>
-| <div>
-| id="7"
-| <a>
-| <div>
-| id="8"
-| <a>
-| <div>
-| id="9"
-| <div>
-| id="10"
-| "A"
-
-#data
-<cite><b><cite><i><cite><i><cite><i><div>X</b>TEST
-#errors
-Line: 1 Col: 6 Unexpected start tag (cite). Expected DOCTYPE.
-Line: 1 Col: 46 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
-Line: 1 Col: 50 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <cite>
-| <b>
-| <cite>
-| <i>
-| <cite>
-| <i>
-| <cite>
-| <i>
-| <i>
-| <i>
-| <div>
-| <b>
-| "X"
-| "TEST"
diff --git a/src/pkg/html/testdata/webkit/tests23.dat b/src/pkg/html/testdata/webkit/tests23.dat
deleted file mode 100644
index 34d2a73f1..000000000
--- a/src/pkg/html/testdata/webkit/tests23.dat
+++ /dev/null
@@ -1,155 +0,0 @@
-#data
-<p><font size=4><font color=red><font size=4><font size=4><font size=4><font size=4><font size=4><font color=red><p>X
-#errors
-3: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
-116: Unclosed elements.
-117: End of file seen and there were open elements.
-#document
-| <html>
-| <head>
-| <body>
-| <p>
-| <font>
-| size="4"
-| <font>
-| color="red"
-| <font>
-| size="4"
-| <font>
-| size="4"
-| <font>
-| size="4"
-| <font>
-| size="4"
-| <font>
-| size="4"
-| <font>
-| color="red"
-| <p>
-| <font>
-| color="red"
-| <font>
-| size="4"
-| <font>
-| size="4"
-| <font>
-| size="4"
-| <font>
-| color="red"
-| "X"
-
-#data
-<p><font size=4><font size=4><font size=4><font size=4><p>X
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <p>
-| <font>
-| size="4"
-| <font>
-| size="4"
-| <font>
-| size="4"
-| <font>
-| size="4"
-| <p>
-| <font>
-| size="4"
-| <font>
-| size="4"
-| <font>
-| size="4"
-| "X"
-
-#data
-<p><font size=4><font size=4><font size=4><font size="5"><font size=4><p>X
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <p>
-| <font>
-| size="4"
-| <font>
-| size="4"
-| <font>
-| size="4"
-| <font>
-| size="5"
-| <font>
-| size="4"
-| <p>
-| <font>
-| size="4"
-| <font>
-| size="4"
-| <font>
-| size="5"
-| <font>
-| size="4"
-| "X"
-
-#data
-<p><font size=4 id=a><font size=4 id=b><font size=4><font size=4><p>X
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <p>
-| <font>
-| id="a"
-| size="4"
-| <font>
-| id="b"
-| size="4"
-| <font>
-| size="4"
-| <font>
-| size="4"
-| <p>
-| <font>
-| id="a"
-| size="4"
-| <font>
-| id="b"
-| size="4"
-| <font>
-| size="4"
-| <font>
-| size="4"
-| "X"
-
-#data
-<p><b id=a><b id=a><b id=a><b><object><b id=a><b id=a>X</object><p>Y
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <p>
-| <b>
-| id="a"
-| <b>
-| id="a"
-| <b>
-| id="a"
-| <b>
-| <object>
-| <b>
-| id="a"
-| <b>
-| id="a"
-| "X"
-| <p>
-| <b>
-| id="a"
-| <b>
-| id="a"
-| <b>
-| id="a"
-| <b>
-| "Y"
diff --git a/src/pkg/html/testdata/webkit/tests24.dat b/src/pkg/html/testdata/webkit/tests24.dat
deleted file mode 100644
index f6dc7eb48..000000000
--- a/src/pkg/html/testdata/webkit/tests24.dat
+++ /dev/null
@@ -1,79 +0,0 @@
-#data
-<!DOCTYPE html>&NotEqualTilde;
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "≂̸"
-
-#data
-<!DOCTYPE html>&NotEqualTilde;A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "≂̸A"
-
-#data
-<!DOCTYPE html>&ThickSpace;
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "  "
-
-#data
-<!DOCTYPE html>&ThickSpace;A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "  A"
-
-#data
-<!DOCTYPE html>&NotSubset;
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "⊂⃒"
-
-#data
-<!DOCTYPE html>&NotSubset;A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "⊂⃒A"
-
-#data
-<!DOCTYPE html>&Gopf;
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "𝔾"
-
-#data
-<!DOCTYPE html>&Gopf;A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "𝔾A"
diff --git a/src/pkg/html/testdata/webkit/tests25.dat b/src/pkg/html/testdata/webkit/tests25.dat
deleted file mode 100644
index 00de7295b..000000000
--- a/src/pkg/html/testdata/webkit/tests25.dat
+++ /dev/null
@@ -1,219 +0,0 @@
-#data
-<!DOCTYPE html><body><foo>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <foo>
-| "A"
-
-#data
-<!DOCTYPE html><body><area>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <area>
-| "A"
-
-#data
-<!DOCTYPE html><body><base>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <base>
-| "A"
-
-#data
-<!DOCTYPE html><body><basefont>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <basefont>
-| "A"
-
-#data
-<!DOCTYPE html><body><bgsound>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <bgsound>
-| "A"
-
-#data
-<!DOCTYPE html><body><br>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <br>
-| "A"
-
-#data
-<!DOCTYPE html><body><col>A
-#errors
-26: Stray start tag “col”.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "A"
-
-#data
-<!DOCTYPE html><body><command>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <command>
-| "A"
-
-#data
-<!DOCTYPE html><body><embed>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <embed>
-| "A"
-
-#data
-<!DOCTYPE html><body><frame>A
-#errors
-26: Stray start tag “frame”.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "A"
-
-#data
-<!DOCTYPE html><body><hr>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <hr>
-| "A"
-
-#data
-<!DOCTYPE html><body><img>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <img>
-| "A"
-
-#data
-<!DOCTYPE html><body><input>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <input>
-| "A"
-
-#data
-<!DOCTYPE html><body><keygen>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <keygen>
-| "A"
-
-#data
-<!DOCTYPE html><body><link>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <link>
-| "A"
-
-#data
-<!DOCTYPE html><body><meta>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <meta>
-| "A"
-
-#data
-<!DOCTYPE html><body><param>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <param>
-| "A"
-
-#data
-<!DOCTYPE html><body><source>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <source>
-| "A"
-
-#data
-<!DOCTYPE html><body><track>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <track>
-| "A"
-
-#data
-<!DOCTYPE html><body><wbr>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <wbr>
-| "A"
diff --git a/src/pkg/html/testdata/webkit/tests26.dat b/src/pkg/html/testdata/webkit/tests26.dat
deleted file mode 100644
index da128e779..000000000
--- a/src/pkg/html/testdata/webkit/tests26.dat
+++ /dev/null
@@ -1,195 +0,0 @@
-#data
-<!DOCTYPE html><body><a href='#1'><nobr>1<nobr></a><br><a href='#2'><nobr>2<nobr></a><br><a href='#3'><nobr>3<nobr></a>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <a>
-| href="#1"
-| <nobr>
-| "1"
-| <nobr>
-| <nobr>
-| <br>
-| <a>
-| href="#2"
-| <a>
-| href="#2"
-| <nobr>
-| "2"
-| <nobr>
-| <nobr>
-| <br>
-| <a>
-| href="#3"
-| <a>
-| href="#3"
-| <nobr>
-| "3"
-| <nobr>
-
-#data
-<!DOCTYPE html><body><b><nobr>1<nobr></b><i><nobr>2<nobr></i>3
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <b>
-| <nobr>
-| "1"
-| <nobr>
-| <nobr>
-| <i>
-| <i>
-| <nobr>
-| "2"
-| <nobr>
-| <nobr>
-| "3"
-
-#data
-<!DOCTYPE html><body><b><nobr>1<table><nobr></b><i><nobr>2<nobr></i>3
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <b>
-| <nobr>
-| "1"
-| <nobr>
-| <i>
-| <i>
-| <nobr>
-| "2"
-| <nobr>
-| <nobr>
-| "3"
-| <table>
-
-#data
-<!DOCTYPE html><body><b><nobr>1<table><tr><td><nobr></b><i><nobr>2<nobr></i>3
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <b>
-| <nobr>
-| "1"
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <nobr>
-| <i>
-| <i>
-| <nobr>
-| "2"
-| <nobr>
-| <nobr>
-| "3"
-
-#data
-<!DOCTYPE html><body><b><nobr>1<div><nobr></b><i><nobr>2<nobr></i>3
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <b>
-| <nobr>
-| "1"
-| <div>
-| <b>
-| <nobr>
-| <nobr>
-| <nobr>
-| <i>
-| <i>
-| <nobr>
-| "2"
-| <nobr>
-| <nobr>
-| "3"
-
-#data
-<!DOCTYPE html><body><b><nobr>1<nobr></b><div><i><nobr>2<nobr></i>3
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <b>
-| <nobr>
-| "1"
-| <nobr>
-| <div>
-| <nobr>
-| <i>
-| <i>
-| <nobr>
-| "2"
-| <nobr>
-| <nobr>
-| "3"
-
-#data
-<!DOCTYPE html><body><b><nobr>1<nobr><ins></b><i><nobr>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <b>
-| <nobr>
-| "1"
-| <nobr>
-| <ins>
-| <nobr>
-| <i>
-| <i>
-| <nobr>
-
-#data
-<!DOCTYPE html><body><b><nobr>1<ins><nobr></b><i>2
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <b>
-| <nobr>
-| "1"
-| <ins>
-| <nobr>
-| <nobr>
-| <i>
-| "2"
-
-#data
-<!DOCTYPE html><body><b>1<nobr></b><i><nobr>2</i>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <b>
-| "1"
-| <nobr>
-| <nobr>
-| <i>
-| <i>
-| <nobr>
-| "2"
diff --git a/src/pkg/html/testdata/webkit/tests3.dat b/src/pkg/html/testdata/webkit/tests3.dat
deleted file mode 100644
index 38dc501be..000000000
--- a/src/pkg/html/testdata/webkit/tests3.dat
+++ /dev/null
@@ -1,305 +0,0 @@
-#data
-<head></head><style></style>
-#errors
-Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
-Line: 1 Col: 20 Unexpected start tag (style) that can be in head. Moved.
-#document
-| <html>
-| <head>
-| <style>
-| <body>
-
-#data
-<head></head><script></script>
-#errors
-Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
-Line: 1 Col: 21 Unexpected start tag (script) that can be in head. Moved.
-#document
-| <html>
-| <head>
-| <script>
-| <body>
-
-#data
-<head></head><!-- --><style></style><!-- --><script></script>
-#errors
-Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
-Line: 1 Col: 28 Unexpected start tag (style) that can be in head. Moved.
-#document
-| <html>
-| <head>
-| <style>
-| <script>
-| <!-- -->
-| <!-- -->
-| <body>
-
-#data
-<head></head><!-- -->x<style></style><!-- --><script></script>
-#errors
-Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <!-- -->
-| <body>
-| "x"
-| <style>
-| <!-- -->
-| <script>
-
-#data
-<!DOCTYPE html><html><head></head><body><pre>
-</pre></body></html>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <pre>
-
-#data
-<!DOCTYPE html><html><head></head><body><pre>
-foo</pre></body></html>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <pre>
-| "foo"
-
-#data
-<!DOCTYPE html><html><head></head><body><pre>
-
-foo</pre></body></html>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <pre>
-| "
-foo"
-
-#data
-<!DOCTYPE html><html><head></head><body><pre>
-foo
-</pre></body></html>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <pre>
-| "foo
-"
-
-#data
-<!DOCTYPE html><html><head></head><body><pre>x</pre><span>
-</span></body></html>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <pre>
-| "x"
-| <span>
-| "
-"
-
-#data
-<!DOCTYPE html><html><head></head><body><pre>x
-y</pre></body></html>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <pre>
-| "x
-y"
-
-#data
-<!DOCTYPE html><html><head></head><body><pre>x<div>
-y</pre></body></html>
-#errors
-Line: 2 Col: 7 End tag (pre) seen too early. Expected other end tag.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <pre>
-| "x"
-| <div>
-| "
-y"
-
-#data
-<!DOCTYPE html><pre>&#x0a;&#x0a;A</pre>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <pre>
-| "
-A"
-
-#data
-<!DOCTYPE html><HTML><META><HEAD></HEAD></HTML>
-#errors
-Line: 1 Col: 33 Unexpected start tag head in existing head. Ignored.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <meta>
-| <body>
-
-#data
-<!DOCTYPE html><HTML><HEAD><head></HEAD></HTML>
-#errors
-Line: 1 Col: 33 Unexpected start tag head in existing head. Ignored.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-
-#data
-<textarea>foo<span>bar</span><i>baz
-#errors
-Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
-Line: 1 Col: 35 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <textarea>
-| "foo<span>bar</span><i>baz"
-
-#data
-<title>foo<span>bar</em><i>baz
-#errors
-Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
-Line: 1 Col: 30 Unexpected end of file. Expected end tag (title).
-#document
-| <html>
-| <head>
-| <title>
-| "foo<span>bar</em><i>baz"
-| <body>
-
-#data
-<!DOCTYPE html><textarea>
-</textarea>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <textarea>
-
-#data
-<!DOCTYPE html><textarea>
-foo</textarea>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <textarea>
-| "foo"
-
-#data
-<!DOCTYPE html><textarea>
-
-foo</textarea>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <textarea>
-| "
-foo"
-
-#data
-<!DOCTYPE html><html><head></head><body><ul><li><div><p><li></ul></body></html>
-#errors
-Line: 1 Col: 60 Missing end tag (div, li).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <ul>
-| <li>
-| <div>
-| <p>
-| <li>
-
-#data
-<!doctype html><nobr><nobr><nobr>
-#errors
-Line: 1 Col: 27 Unexpected start tag (nobr) implies end tag (nobr).
-Line: 1 Col: 33 Unexpected start tag (nobr) implies end tag (nobr).
-Line: 1 Col: 33 Expected closing tag. Unexpected end of file.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <nobr>
-| <nobr>
-| <nobr>
-
-#data
-<!doctype html><nobr><nobr></nobr><nobr>
-#errors
-Line: 1 Col: 27 Unexpected start tag (nobr) implies end tag (nobr).
-Line: 1 Col: 40 Expected closing tag. Unexpected end of file.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <nobr>
-| <nobr>
-| <nobr>
-
-#data
-<!doctype html><html><body><p><table></table></body></html>
-#errors
-Not known
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <table>
-
-#data
-<p><table></table>
-#errors
-Not known
-#document
-| <html>
-| <head>
-| <body>
-| <p>
-| <table>
diff --git a/src/pkg/html/testdata/webkit/tests4.dat b/src/pkg/html/testdata/webkit/tests4.dat
deleted file mode 100644
index 3c506326d..000000000
--- a/src/pkg/html/testdata/webkit/tests4.dat
+++ /dev/null
@@ -1,59 +0,0 @@
-#data
-direct div content
-#errors
-#document-fragment
-div
-#document
-| "direct div content"
-
-#data
-direct textarea content
-#errors
-#document-fragment
-textarea
-#document
-| "direct textarea content"
-
-#data
-textarea content with <em>pseudo</em> <foo>markup
-#errors
-#document-fragment
-textarea
-#document
-| "textarea content with <em>pseudo</em> <foo>markup"
-
-#data
-this is &#x0043;DATA inside a <style> element
-#errors
-#document-fragment
-style
-#document
-| "this is &#x0043;DATA inside a <style> element"
-
-#data
-</plaintext>
-#errors
-#document-fragment
-plaintext
-#document
-| "</plaintext>"
-
-#data
-setting html's innerHTML
-#errors
-Line: 1 Col: 24 Unexpected EOF in inner html mode.
-#document-fragment
-html
-#document
-| <head>
-| <body>
-| "setting html's innerHTML"
-
-#data
-<title>setting head's innerHTML</title>
-#errors
-#document-fragment
-head
-#document
-| <title>
-| "setting head's innerHTML"
diff --git a/src/pkg/html/testdata/webkit/tests5.dat b/src/pkg/html/testdata/webkit/tests5.dat
deleted file mode 100644
index d7b5128a4..000000000
--- a/src/pkg/html/testdata/webkit/tests5.dat
+++ /dev/null
@@ -1,191 +0,0 @@
-#data
-<style> <!-- </style>x
-#errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
-Line: 1 Col: 22 Unexpected end of file. Expected end tag (style).
-#document
-| <html>
-| <head>
-| <style>
-| " <!-- "
-| <body>
-| "x"
-
-#data
-<style> <!-- </style> --> </style>x
-#errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <style>
-| " <!-- "
-| " "
-| <body>
-| "--> x"
-
-#data
-<style> <!--> </style>x
-#errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <style>
-| " <!--> "
-| <body>
-| "x"
-
-#data
-<style> <!---> </style>x
-#errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <style>
-| " <!---> "
-| <body>
-| "x"
-
-#data
-<iframe> <!---> </iframe>x
-#errors
-Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| <iframe>
-| " <!---> "
-| "x"
-
-#data
-<iframe> <!--- </iframe>->x</iframe> --> </iframe>x
-#errors
-Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| <iframe>
-| " <!--- "
-| "->x --> x"
-
-#data
-<script> <!-- </script> --> </script>x
-#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <script>
-| " <!-- "
-| " "
-| <body>
-| "--> x"
-
-#data
-<title> <!-- </title> --> </title>x
-#errors
-Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <title>
-| " <!-- "
-| " "
-| <body>
-| "--> x"
-
-#data
-<textarea> <!--- </textarea>->x</textarea> --> </textarea>x
-#errors
-Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| <textarea>
-| " <!--- "
-| "->x --> x"
-
-#data
-<style> <!</-- </style>x
-#errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <style>
-| " <!</-- "
-| <body>
-| "x"
-
-#data
-<p><xmp></xmp>
-#errors
-XXX: Unknown
-#document
-| <html>
-| <head>
-| <body>
-| <p>
-| <xmp>
-
-#data
-<xmp> <!-- > --> </xmp>
-#errors
-Line: 1 Col: 5 Unexpected start tag (xmp). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| <xmp>
-| " <!-- > --> "
-
-#data
-<title>&amp;</title>
-#errors
-Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <title>
-| "&"
-| <body>
-
-#data
-<title><!--&amp;--></title>
-#errors
-Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <title>
-| "<!--&-->"
-| <body>
-
-#data
-<title><!--</title>
-#errors
-Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
-Line: 1 Col: 19 Unexpected end of file. Expected end tag (title).
-#document
-| <html>
-| <head>
-| <title>
-| "<!--"
-| <body>
-
-#data
-<noscript><!--</noscript>--></noscript>
-#errors
-Line: 1 Col: 10 Unexpected start tag (noscript). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <noscript>
-| "<!--"
-| <body>
-| "-->"
diff --git a/src/pkg/html/testdata/webkit/tests6.dat b/src/pkg/html/testdata/webkit/tests6.dat
deleted file mode 100644
index f28ece4fb..000000000
--- a/src/pkg/html/testdata/webkit/tests6.dat
+++ /dev/null
@@ -1,663 +0,0 @@
-#data
-<!doctype html></head> <head>
-#errors
-Line: 1 Col: 29 Unexpected start tag head. Ignored.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| " "
-| <body>
-
-#data
-<!doctype html><form><div></form><div>
-#errors
-33: End tag "form" seen but there were unclosed elements.
-38: End of file seen and there were open elements.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <form>
-| <div>
-| <div>
-
-#data
-<!doctype html><title>&amp;</title>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <title>
-| "&"
-| <body>
-
-#data
-<!doctype html><title><!--&amp;--></title>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <title>
-| "<!--&-->"
-| <body>
-
-#data
-<!doctype>
-#errors
-Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
-Line: 1 Col: 10 Unexpected > character. Expected DOCTYPE name.
-Line: 1 Col: 10 Erroneous DOCTYPE.
-#document
-| <!DOCTYPE >
-| <html>
-| <head>
-| <body>
-
-#data
-<!---x
-#errors
-Line: 1 Col: 6 Unexpected end of file in comment.
-Line: 1 Col: 6 Unexpected End of file. Expected DOCTYPE.
-#document
-| <!-- -x -->
-| <html>
-| <head>
-| <body>
-
-#data
-<body>
-<div>
-#errors
-Line: 1 Col: 6 Unexpected start tag (body).
-Line: 2 Col: 5 Expected closing tag. Unexpected end of file.
-#document-fragment
-div
-#document
-| "
-"
-| <div>
-
-#data
-<frameset></frameset>
-foo
-#errors
-Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
-Line: 2 Col: 3 Unexpected non-space characters in the after frameset phase. Ignored.
-#document
-| <html>
-| <head>
-| <frameset>
-| "
-"
-
-#data
-<frameset></frameset>
-<noframes>
-#errors
-Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
-Line: 2 Col: 10 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <frameset>
-| "
-"
-| <noframes>
-
-#data
-<frameset></frameset>
-<div>
-#errors
-Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
-Line: 2 Col: 5 Unexpected start tag (div) in the after frameset phase. Ignored.
-#document
-| <html>
-| <head>
-| <frameset>
-| "
-"
-
-#data
-<frameset></frameset>
-</html>
-#errors
-Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <frameset>
-| "
-"
-
-#data
-<frameset></frameset>
-</div>
-#errors
-Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
-Line: 2 Col: 6 Unexpected end tag (div) in the after frameset phase. Ignored.
-#document
-| <html>
-| <head>
-| <frameset>
-| "
-"
-
-#data
-<form><form>
-#errors
-Line: 1 Col: 6 Unexpected start tag (form). Expected DOCTYPE.
-Line: 1 Col: 12 Unexpected start tag (form).
-Line: 1 Col: 12 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <form>
-
-#data
-<button><button>
-#errors
-Line: 1 Col: 8 Unexpected start tag (button). Expected DOCTYPE.
-Line: 1 Col: 16 Unexpected start tag (button) implies end tag (button).
-Line: 1 Col: 16 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <button>
-| <button>
-
-#data
-<table><tr><td></th>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 20 Unexpected end tag (th). Ignored.
-Line: 1 Col: 20 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-
-#data
-<table><caption><td>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 20 Unexpected end tag (td). Ignored.
-Line: 1 Col: 20 Unexpected table cell start tag (td) in the table body phase.
-Line: 1 Col: 20 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <caption>
-| <tbody>
-| <tr>
-| <td>
-
-#data
-<table><caption><div>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 21 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <caption>
-| <div>
-
-#data
-</caption><div>
-#errors
-Line: 1 Col: 10 Unexpected end tag (caption). Ignored.
-Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
-#document-fragment
-caption
-#document
-| <div>
-
-#data
-<table><caption><div></caption>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 31 Unexpected end tag (caption). Missing end tag (div).
-Line: 1 Col: 31 Unexpected end of file. Expected table content.
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <caption>
-| <div>
-
-#data
-<table><caption></table>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 24 Unexpected end table tag in caption. Generates implied end caption.
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <caption>
-
-#data
-</table><div>
-#errors
-Line: 1 Col: 8 Unexpected end table tag in caption. Generates implied end caption.
-Line: 1 Col: 8 Unexpected end tag (caption). Ignored.
-Line: 1 Col: 13 Expected closing tag. Unexpected end of file.
-#document-fragment
-caption
-#document
-| <div>
-
-#data
-<table><caption></body></col></colgroup></html></tbody></td></tfoot></th></thead></tr>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 23 Unexpected end tag (body). Ignored.
-Line: 1 Col: 29 Unexpected end tag (col). Ignored.
-Line: 1 Col: 40 Unexpected end tag (colgroup). Ignored.
-Line: 1 Col: 47 Unexpected end tag (html). Ignored.
-Line: 1 Col: 55 Unexpected end tag (tbody). Ignored.
-Line: 1 Col: 60 Unexpected end tag (td). Ignored.
-Line: 1 Col: 68 Unexpected end tag (tfoot). Ignored.
-Line: 1 Col: 73 Unexpected end tag (th). Ignored.
-Line: 1 Col: 81 Unexpected end tag (thead). Ignored.
-Line: 1 Col: 86 Unexpected end tag (tr). Ignored.
-Line: 1 Col: 86 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <caption>
-
-#data
-<table><caption><div></div>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 27 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <caption>
-| <div>
-
-#data
-<table><tr><td></body></caption></col></colgroup></html>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 22 Unexpected end tag (body). Ignored.
-Line: 1 Col: 32 Unexpected end tag (caption). Ignored.
-Line: 1 Col: 38 Unexpected end tag (col). Ignored.
-Line: 1 Col: 49 Unexpected end tag (colgroup). Ignored.
-Line: 1 Col: 56 Unexpected end tag (html). Ignored.
-Line: 1 Col: 56 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-
-#data
-</table></tbody></tfoot></thead></tr><div>
-#errors
-Line: 1 Col: 8 Unexpected end tag (table). Ignored.
-Line: 1 Col: 16 Unexpected end tag (tbody). Ignored.
-Line: 1 Col: 24 Unexpected end tag (tfoot). Ignored.
-Line: 1 Col: 32 Unexpected end tag (thead). Ignored.
-Line: 1 Col: 37 Unexpected end tag (tr). Ignored.
-Line: 1 Col: 42 Expected closing tag. Unexpected end of file.
-#document-fragment
-td
-#document
-| <div>
-
-#data
-<table><colgroup>foo
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 20 Unexpected non-space characters in table context caused voodoo mode.
-Line: 1 Col: 20 Unexpected end of file. Expected table content.
-#document
-| <html>
-| <head>
-| <body>
-| "foo"
-| <table>
-| <colgroup>
-
-#data
-foo<col>
-#errors
-Line: 1 Col: 3 Unexpected end tag (colgroup). Ignored.
-#document-fragment
-colgroup
-#document
-| <col>
-
-#data
-<table><colgroup></col>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 23 This element (col) has no end tag.
-Line: 1 Col: 23 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <colgroup>
-
-#data
-<frameset><div>
-#errors
-Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
-Line: 1 Col: 15 Unexpected start tag token (div) in the frameset phase. Ignored.
-Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <frameset>
-
-#data
-</frameset><frame>
-#errors
-Line: 1 Col: 11 Unexpected end tag token (frameset) in the frameset phase (innerHTML).
-#document-fragment
-frameset
-#document
-| <frame>
-
-#data
-<frameset></div>
-#errors
-Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
-Line: 1 Col: 16 Unexpected end tag token (div) in the frameset phase. Ignored.
-Line: 1 Col: 16 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <frameset>
-
-#data
-</body><div>
-#errors
-Line: 1 Col: 7 Unexpected end tag (body). Ignored.
-Line: 1 Col: 12 Expected closing tag. Unexpected end of file.
-#document-fragment
-body
-#document
-| <div>
-
-#data
-<table><tr><div>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 16 Unexpected start tag (div) in table context caused voodoo mode.
-Line: 1 Col: 16 Unexpected end of file. Expected table content.
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| <table>
-| <tbody>
-| <tr>
-
-#data
-</tr><td>
-#errors
-Line: 1 Col: 5 Unexpected end tag (tr). Ignored.
-#document-fragment
-tr
-#document
-| <td>
-
-#data
-</tbody></tfoot></thead><td>
-#errors
-Line: 1 Col: 8 Unexpected end tag (tbody). Ignored.
-Line: 1 Col: 16 Unexpected end tag (tfoot). Ignored.
-Line: 1 Col: 24 Unexpected end tag (thead). Ignored.
-#document-fragment
-tr
-#document
-| <td>
-
-#data
-<table><tr><div><td>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 16 Unexpected start tag (div) in table context caused voodoo mode.
-Line: 1 Col: 20 Unexpected implied end tag (div) in the table row phase.
-Line: 1 Col: 20 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-
-#data
-<caption><col><colgroup><tbody><tfoot><thead><tr>
-#errors
-Line: 1 Col: 9 Unexpected start tag (caption).
-Line: 1 Col: 14 Unexpected start tag (col).
-Line: 1 Col: 24 Unexpected start tag (colgroup).
-Line: 1 Col: 31 Unexpected start tag (tbody).
-Line: 1 Col: 38 Unexpected start tag (tfoot).
-Line: 1 Col: 45 Unexpected start tag (thead).
-Line: 1 Col: 49 Unexpected end of file. Expected table content.
-#document-fragment
-tbody
-#document
-| <tr>
-
-#data
-<table><tbody></thead>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 22 Unexpected end tag (thead) in the table body phase. Ignored.
-Line: 1 Col: 22 Unexpected end of file. Expected table content.
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-
-#data
-</table><tr>
-#errors
-Line: 1 Col: 8 Unexpected end tag (table). Ignored.
-Line: 1 Col: 12 Unexpected end of file. Expected table content.
-#document-fragment
-tbody
-#document
-| <tr>
-
-#data
-<table><tbody></body></caption></col></colgroup></html></td></th></tr>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 21 Unexpected end tag (body) in the table body phase. Ignored.
-Line: 1 Col: 31 Unexpected end tag (caption) in the table body phase. Ignored.
-Line: 1 Col: 37 Unexpected end tag (col) in the table body phase. Ignored.
-Line: 1 Col: 48 Unexpected end tag (colgroup) in the table body phase. Ignored.
-Line: 1 Col: 55 Unexpected end tag (html) in the table body phase. Ignored.
-Line: 1 Col: 60 Unexpected end tag (td) in the table body phase. Ignored.
-Line: 1 Col: 65 Unexpected end tag (th) in the table body phase. Ignored.
-Line: 1 Col: 70 Unexpected end tag (tr) in the table body phase. Ignored.
-Line: 1 Col: 70 Unexpected end of file. Expected table content.
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-
-#data
-<table><tbody></div>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 20 Unexpected end tag (div) in table context caused voodoo mode.
-Line: 1 Col: 20 End tag (div) seen too early. Expected other end tag.
-Line: 1 Col: 20 Unexpected end of file. Expected table content.
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-
-#data
-<table><table>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 14 Unexpected start tag (table) implies end tag (table).
-Line: 1 Col: 14 Unexpected end of file. Expected table content.
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <table>
-
-#data
-<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 14 Unexpected end tag (body). Ignored.
-Line: 1 Col: 24 Unexpected end tag (caption). Ignored.
-Line: 1 Col: 30 Unexpected end tag (col). Ignored.
-Line: 1 Col: 41 Unexpected end tag (colgroup). Ignored.
-Line: 1 Col: 48 Unexpected end tag (html). Ignored.
-Line: 1 Col: 56 Unexpected end tag (tbody). Ignored.
-Line: 1 Col: 61 Unexpected end tag (td). Ignored.
-Line: 1 Col: 69 Unexpected end tag (tfoot). Ignored.
-Line: 1 Col: 74 Unexpected end tag (th). Ignored.
-Line: 1 Col: 82 Unexpected end tag (thead). Ignored.
-Line: 1 Col: 87 Unexpected end tag (tr). Ignored.
-Line: 1 Col: 87 Unexpected end of file. Expected table content.
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-
-#data
-</table><tr>
-#errors
-Line: 1 Col: 8 Unexpected end tag (table). Ignored.
-Line: 1 Col: 12 Unexpected end of file. Expected table content.
-#document-fragment
-table
-#document
-| <tbody>
-| <tr>
-
-#data
-<body></body></html>
-#errors
-Line: 1 Col: 20 Unexpected html end tag in inner html mode.
-Line: 1 Col: 20 Unexpected EOF in inner html mode.
-#document-fragment
-html
-#document
-| <head>
-| <body>
-
-#data
-<html><frameset></frameset></html>
-#errors
-Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <frameset>
-| " "
-
-#data
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html></html>
-#errors
-Line: 1 Col: 50 Erroneous DOCTYPE.
-Line: 1 Col: 63 Unexpected end tag (html) after the (implied) root element.
-#document
-| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "">
-| <html>
-| <head>
-| <body>
-
-#data
-<param><frameset></frameset>
-#errors
-Line: 1 Col: 7 Unexpected start tag (param). Expected DOCTYPE.
-Line: 1 Col: 17 Unexpected start tag (frameset).
-#document
-| <html>
-| <head>
-| <frameset>
-
-#data
-<source><frameset></frameset>
-#errors
-Line: 1 Col: 7 Unexpected start tag (source). Expected DOCTYPE.
-Line: 1 Col: 17 Unexpected start tag (frameset).
-#document
-| <html>
-| <head>
-| <frameset>
-
-#data
-<track><frameset></frameset>
-#errors
-Line: 1 Col: 7 Unexpected start tag (track). Expected DOCTYPE.
-Line: 1 Col: 17 Unexpected start tag (frameset).
-#document
-| <html>
-| <head>
-| <frameset>
-
-#data
-</html><frameset></frameset>
-#errors
-7: End tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
-17: Stray “frameset” start tag.
-17: “frameset” start tag seen.
-#document
-| <html>
-| <head>
-| <frameset>
-
-#data
-</body><frameset></frameset>
-#errors
-7: End tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
-17: Stray “frameset” start tag.
-17: “frameset” start tag seen.
-#document
-| <html>
-| <head>
-| <frameset>
diff --git a/src/pkg/html/testdata/webkit/tests7.dat b/src/pkg/html/testdata/webkit/tests7.dat
deleted file mode 100644
index f5193c660..000000000
--- a/src/pkg/html/testdata/webkit/tests7.dat
+++ /dev/null
@@ -1,390 +0,0 @@
-#data
-<!doctype html><body><title>X</title>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <title>
-| "X"
-
-#data
-<!doctype html><table><title>X</title></table>
-#errors
-Line: 1 Col: 29 Unexpected start tag (title) in table context caused voodoo mode.
-Line: 1 Col: 38 Unexpected end tag (title) in table context caused voodoo mode.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <title>
-| "X"
-| <table>
-
-#data
-<!doctype html><head></head><title>X</title>
-#errors
-Line: 1 Col: 35 Unexpected start tag (title) that can be in head. Moved.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <title>
-| "X"
-| <body>
-
-#data
-<!doctype html></head><title>X</title>
-#errors
-Line: 1 Col: 29 Unexpected start tag (title) that can be in head. Moved.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <title>
-| "X"
-| <body>
-
-#data
-<!doctype html><table><meta></table>
-#errors
-Line: 1 Col: 28 Unexpected start tag (meta) in table context caused voodoo mode.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <meta>
-| <table>
-
-#data
-<!doctype html><table>X<tr><td><table> <meta></table></table>
-#errors
-Line: 1 Col: 23 Unexpected non-space characters in table context caused voodoo mode.
-Line: 1 Col: 45 Unexpected start tag (meta) in table context caused voodoo mode.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "X"
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <meta>
-| <table>
-| " "
-
-#data
-<!doctype html><html> <head>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-
-#data
-<!doctype html> <head>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-
-#data
-<!doctype html><table><style> <tr>x </style> </table>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <style>
-| " <tr>x "
-| " "
-
-#data
-<!doctype html><table><TBODY><script> <tr>x </script> </table>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <script>
-| " <tr>x "
-| " "
-
-#data
-<!doctype html><p><applet><p>X</p></applet>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <p>
-| <applet>
-| <p>
-| "X"
-
-#data
-<!doctype html><listing>
-X</listing>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <listing>
-| "X"
-
-#data
-<!doctype html><select><input>X
-#errors
-Line: 1 Col: 30 Unexpected input start tag in the select phase.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-| <input>
-| "X"
-
-#data
-<!doctype html><select><select>X
-#errors
-Line: 1 Col: 31 Unexpected select start tag in the select phase treated as select end tag.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-| "X"
-
-#data
-<!doctype html><table><input type=hidDEN></table>
-#errors
-Line: 1 Col: 41 Unexpected input with type hidden in table context.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <input>
-| type="hidDEN"
-
-#data
-<!doctype html><table>X<input type=hidDEN></table>
-#errors
-Line: 1 Col: 23 Unexpected non-space characters in table context caused voodoo mode.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "X"
-| <table>
-| <input>
-| type="hidDEN"
-
-#data
-<!doctype html><table> <input type=hidDEN></table>
-#errors
-Line: 1 Col: 43 Unexpected input with type hidden in table context.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| " "
-| <input>
-| type="hidDEN"
-
-#data
-<!doctype html><table> <input type='hidDEN'></table>
-#errors
-Line: 1 Col: 45 Unexpected input with type hidden in table context.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| " "
-| <input>
-| type="hidDEN"
-
-#data
-<!doctype html><table><input type=" hidden"><input type=hidDEN></table>
-#errors
-Line: 1 Col: 44 Unexpected start tag (input) in table context caused voodoo mode.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <input>
-| type=" hidden"
-| <table>
-| <input>
-| type="hidDEN"
-
-#data
-<!doctype html><table><select>X<tr>
-#errors
-Line: 1 Col: 30 Unexpected start tag (select) in table context caused voodoo mode.
-Line: 1 Col: 35 Unexpected table element start tag (trs) in the select in table phase.
-Line: 1 Col: 35 Unexpected end of file. Expected table content.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-| "X"
-| <table>
-| <tbody>
-| <tr>
-
-#data
-<!doctype html><select>X</select>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-| "X"
-
-#data
-<!DOCTYPE hTmL><html></html>
-#errors
-Line: 1 Col: 28 Unexpected end tag (html) after the (implied) root element.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-
-#data
-<!DOCTYPE HTML><html></html>
-#errors
-Line: 1 Col: 28 Unexpected end tag (html) after the (implied) root element.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-
-#data
-<body>X</body></body>
-#errors
-Line: 1 Col: 21 Unexpected end tag token (body) in the after body phase.
-Line: 1 Col: 21 Unexpected EOF in inner html mode.
-#document-fragment
-html
-#document
-| <head>
-| <body>
-| "X"
-
-#data
-<div><p>a</x> b
-#errors
-Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
-Line: 1 Col: 13 Unexpected end tag (x). Ignored.
-Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| <p>
-| "a b"
-
-#data
-<table><tr><td><code></code> </table>
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <code>
-| " "
-
-#data
-<table><b><tr><td>aaa</td></tr>bbb</table>ccc
-#errors
-XXX: Fix me
-#document
-| <html>
-| <head>
-| <body>
-| <b>
-| <b>
-| "bbb"
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| "aaa"
-| <b>
-| "ccc"
-
-#data
-A<table><tr> B</tr> B</table>
-#errors
-XXX: Fix me
-#document
-| <html>
-| <head>
-| <body>
-| "A B B"
-| <table>
-| <tbody>
-| <tr>
-
-#data
-A<table><tr> B</tr> </em>C</table>
-#errors
-XXX: Fix me
-#document
-| <html>
-| <head>
-| <body>
-| "A BC"
-| <table>
-| <tbody>
-| <tr>
-| " "
-
-#data
-<select><keygen>
-#errors
-Not known
-#document
-| <html>
-| <head>
-| <body>
-| <select>
-| <keygen>
diff --git a/src/pkg/html/testdata/webkit/tests8.dat b/src/pkg/html/testdata/webkit/tests8.dat
deleted file mode 100644
index 90e6c919e..000000000
--- a/src/pkg/html/testdata/webkit/tests8.dat
+++ /dev/null
@@ -1,148 +0,0 @@
-#data
-<div>
-<div></div>
-</span>x
-#errors
-Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
-Line: 3 Col: 7 Unexpected end tag (span). Ignored.
-Line: 3 Col: 8 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| "
-"
-| <div>
-| "
-x"
-
-#data
-<div>x<div></div>
-</span>x
-#errors
-Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
-Line: 2 Col: 7 Unexpected end tag (span). Ignored.
-Line: 2 Col: 8 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| "x"
-| <div>
-| "
-x"
-
-#data
-<div>x<div></div>x</span>x
-#errors
-Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
-Line: 1 Col: 25 Unexpected end tag (span). Ignored.
-Line: 1 Col: 26 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| "x"
-| <div>
-| "xx"
-
-#data
-<div>x<div></div>y</span>z
-#errors
-Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
-Line: 1 Col: 25 Unexpected end tag (span). Ignored.
-Line: 1 Col: 26 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| "x"
-| <div>
-| "yz"
-
-#data
-<table><div>x<div></div>x</span>x
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 12 Unexpected start tag (div) in table context caused voodoo mode.
-Line: 1 Col: 18 Unexpected start tag (div) in table context caused voodoo mode.
-Line: 1 Col: 24 Unexpected end tag (div) in table context caused voodoo mode.
-Line: 1 Col: 32 Unexpected end tag (span) in table context caused voodoo mode.
-Line: 1 Col: 32 Unexpected end tag (span). Ignored.
-Line: 1 Col: 33 Unexpected end of file. Expected table content.
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| "x"
-| <div>
-| "xx"
-| <table>
-
-#data
-x<table>x
-#errors
-Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
-Line: 1 Col: 9 Unexpected non-space characters in table context caused voodoo mode.
-Line: 1 Col: 9 Unexpected end of file. Expected table content.
-#document
-| <html>
-| <head>
-| <body>
-| "xx"
-| <table>
-
-#data
-x<table><table>x
-#errors
-Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
-Line: 1 Col: 15 Unexpected start tag (table) implies end tag (table).
-Line: 1 Col: 16 Unexpected non-space characters in table context caused voodoo mode.
-Line: 1 Col: 16 Unexpected end of file. Expected table content.
-#document
-| <html>
-| <head>
-| <body>
-| "x"
-| <table>
-| "x"
-| <table>
-
-#data
-<b>a<div></div><div></b>y
-#errors
-Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
-Line: 1 Col: 24 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
-Line: 1 Col: 25 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <b>
-| "a"
-| <div>
-| <div>
-| <b>
-| "y"
-
-#data
-<a><div><p></a>
-#errors
-Line: 1 Col: 3 Unexpected start tag (a). Expected DOCTYPE.
-Line: 1 Col: 15 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm.
-Line: 1 Col: 15 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm.
-Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| <div>
-| <a>
-| <p>
-| <a>
diff --git a/src/pkg/html/testdata/webkit/tests9.dat b/src/pkg/html/testdata/webkit/tests9.dat
deleted file mode 100644
index 554e27aec..000000000
--- a/src/pkg/html/testdata/webkit/tests9.dat
+++ /dev/null
@@ -1,457 +0,0 @@
-#data
-<!DOCTYPE html><math></math>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <math math>
-
-#data
-<!DOCTYPE html><body><math></math>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <math math>
-
-#data
-<!DOCTYPE html><math><mi>
-#errors
-25: End of file in a foreign namespace context.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math mi>
-
-#data
-<!DOCTYPE html><math><annotation-xml><svg><u>
-#errors
-45: HTML start tag “u” in a foreign namespace context.
-45: End of file seen and there were open elements.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math annotation-xml>
-| <svg svg>
-| <u>
-
-#data
-<!DOCTYPE html><body><select><math></math></select>
-#errors
-Line: 1 Col: 35 Unexpected start tag token (math) in the select phase. Ignored.
-Line: 1 Col: 42 Unexpected end tag (math) in the select phase. Ignored.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-
-#data
-<!DOCTYPE html><body><select><option><math></math></option></select>
-#errors
-Line: 1 Col: 43 Unexpected start tag token (math) in the select phase. Ignored.
-Line: 1 Col: 50 Unexpected end tag (math) in the select phase. Ignored.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-| <option>
-
-#data
-<!DOCTYPE html><body><table><math></math></table>
-#errors
-Line: 1 Col: 34 Unexpected start tag (math) in table context caused voodoo mode.
-Line: 1 Col: 41 Unexpected end tag (math) in table context caused voodoo mode.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <math math>
-| <table>
-
-#data
-<!DOCTYPE html><body><table><math><mi>foo</mi></math></table>
-#errors
-Line: 1 Col: 34 Unexpected start tag (math) in table context caused voodoo mode.
-Line: 1 Col: 46 Unexpected end tag (mi) in table context caused voodoo mode.
-Line: 1 Col: 53 Unexpected end tag (math) in table context caused voodoo mode.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math mi>
-| "foo"
-| <table>
-
-#data
-<!DOCTYPE html><body><table><math><mi>foo</mi><mi>bar</mi></math></table>
-#errors
-Line: 1 Col: 34 Unexpected start tag (math) in table context caused voodoo mode.
-Line: 1 Col: 46 Unexpected end tag (mi) in table context caused voodoo mode.
-Line: 1 Col: 58 Unexpected end tag (mi) in table context caused voodoo mode.
-Line: 1 Col: 65 Unexpected end tag (math) in table context caused voodoo mode.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math mi>
-| "foo"
-| <math mi>
-| "bar"
-| <table>
-
-#data
-<!DOCTYPE html><body><table><tbody><math><mi>foo</mi><mi>bar</mi></math></tbody></table>
-#errors
-Line: 1 Col: 41 Unexpected start tag (math) in table context caused voodoo mode.
-Line: 1 Col: 53 Unexpected end tag (mi) in table context caused voodoo mode.
-Line: 1 Col: 65 Unexpected end tag (mi) in table context caused voodoo mode.
-Line: 1 Col: 72 Unexpected end tag (math) in table context caused voodoo mode.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math mi>
-| "foo"
-| <math mi>
-| "bar"
-| <table>
-| <tbody>
-
-#data
-<!DOCTYPE html><body><table><tbody><tr><math><mi>foo</mi><mi>bar</mi></math></tr></tbody></table>
-#errors
-Line: 1 Col: 45 Unexpected start tag (math) in table context caused voodoo mode.
-Line: 1 Col: 57 Unexpected end tag (mi) in table context caused voodoo mode.
-Line: 1 Col: 69 Unexpected end tag (mi) in table context caused voodoo mode.
-Line: 1 Col: 76 Unexpected end tag (math) in table context caused voodoo mode.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math mi>
-| "foo"
-| <math mi>
-| "bar"
-| <table>
-| <tbody>
-| <tr>
-
-#data
-<!DOCTYPE html><body><table><tbody><tr><td><math><mi>foo</mi><mi>bar</mi></math></td></tr></tbody></table>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <math math>
-| <math mi>
-| "foo"
-| <math mi>
-| "bar"
-
-#data
-<!DOCTYPE html><body><table><tbody><tr><td><math><mi>foo</mi><mi>bar</mi></math><p>baz</td></tr></tbody></table>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <math math>
-| <math mi>
-| "foo"
-| <math mi>
-| "bar"
-| <p>
-| "baz"
-
-#data
-<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi></math><p>baz</caption></table>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <caption>
-| <math math>
-| <math mi>
-| "foo"
-| <math mi>
-| "bar"
-| <p>
-| "baz"
-
-#data
-<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
-#errors
-Line: 1 Col: 70 HTML start tag "p" in a foreign namespace context.
-Line: 1 Col: 81 Unexpected end table tag in caption. Generates implied end caption.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <caption>
-| <math math>
-| <math mi>
-| "foo"
-| <math mi>
-| "bar"
-| <p>
-| "baz"
-| <p>
-| "quux"
-
-#data
-<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi>baz</table><p>quux
-#errors
-Line: 1 Col: 78 Unexpected end table tag in caption. Generates implied end caption.
-Line: 1 Col: 78 Unexpected end tag (caption). Missing end tag (math).
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <caption>
-| <math math>
-| <math mi>
-| "foo"
-| <math mi>
-| "bar"
-| "baz"
-| <p>
-| "quux"
-
-#data
-<!DOCTYPE html><body><table><colgroup><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
-#errors
-Line: 1 Col: 44 Unexpected start tag (math) in table context caused voodoo mode.
-Line: 1 Col: 56 Unexpected end tag (mi) in table context caused voodoo mode.
-Line: 1 Col: 68 Unexpected end tag (mi) in table context caused voodoo mode.
-Line: 1 Col: 71 HTML start tag "p" in a foreign namespace context.
-Line: 1 Col: 71 Unexpected start tag (p) in table context caused voodoo mode.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math mi>
-| "foo"
-| <math mi>
-| "bar"
-| <p>
-| "baz"
-| <table>
-| <colgroup>
-| <p>
-| "quux"
-
-#data
-<!DOCTYPE html><body><table><tr><td><select><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
-#errors
-Line: 1 Col: 50 Unexpected start tag token (math) in the select phase. Ignored.
-Line: 1 Col: 54 Unexpected start tag token (mi) in the select phase. Ignored.
-Line: 1 Col: 62 Unexpected end tag (mi) in the select phase. Ignored.
-Line: 1 Col: 66 Unexpected start tag token (mi) in the select phase. Ignored.
-Line: 1 Col: 74 Unexpected end tag (mi) in the select phase. Ignored.
-Line: 1 Col: 77 Unexpected start tag token (p) in the select phase. Ignored.
-Line: 1 Col: 88 Unexpected table element end tag (tables) in the select in table phase.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <select>
-| "foobarbaz"
-| <p>
-| "quux"
-
-#data
-<!DOCTYPE html><body><table><select><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
-#errors
-Line: 1 Col: 36 Unexpected start tag (select) in table context caused voodoo mode.
-Line: 1 Col: 42 Unexpected start tag token (math) in the select phase. Ignored.
-Line: 1 Col: 46 Unexpected start tag token (mi) in the select phase. Ignored.
-Line: 1 Col: 54 Unexpected end tag (mi) in the select phase. Ignored.
-Line: 1 Col: 58 Unexpected start tag token (mi) in the select phase. Ignored.
-Line: 1 Col: 66 Unexpected end tag (mi) in the select phase. Ignored.
-Line: 1 Col: 69 Unexpected start tag token (p) in the select phase. Ignored.
-Line: 1 Col: 80 Unexpected table element end tag (tables) in the select in table phase.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <select>
-| "foobarbaz"
-| <table>
-| <p>
-| "quux"
-
-#data
-<!DOCTYPE html><body></body></html><math><mi>foo</mi><mi>bar</mi><p>baz
-#errors
-Line: 1 Col: 41 Unexpected start tag (math).
-Line: 1 Col: 68 HTML start tag "p" in a foreign namespace context.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math mi>
-| "foo"
-| <math mi>
-| "bar"
-| <p>
-| "baz"
-
-#data
-<!DOCTYPE html><body></body><math><mi>foo</mi><mi>bar</mi><p>baz
-#errors
-Line: 1 Col: 34 Unexpected start tag token (math) in the after body phase.
-Line: 1 Col: 61 HTML start tag "p" in a foreign namespace context.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math mi>
-| "foo"
-| <math mi>
-| "bar"
-| <p>
-| "baz"
-
-#data
-<!DOCTYPE html><frameset><math><mi></mi><mi></mi><p><span>
-#errors
-Line: 1 Col: 31 Unexpected start tag token (math) in the frameset phase. Ignored.
-Line: 1 Col: 35 Unexpected start tag token (mi) in the frameset phase. Ignored.
-Line: 1 Col: 40 Unexpected end tag token (mi) in the frameset phase. Ignored.
-Line: 1 Col: 44 Unexpected start tag token (mi) in the frameset phase. Ignored.
-Line: 1 Col: 49 Unexpected end tag token (mi) in the frameset phase. Ignored.
-Line: 1 Col: 52 Unexpected start tag token (p) in the frameset phase. Ignored.
-Line: 1 Col: 58 Unexpected start tag token (span) in the frameset phase. Ignored.
-Line: 1 Col: 58 Expected closing tag. Unexpected end of file.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <frameset>
-
-#data
-<!DOCTYPE html><frameset></frameset><math><mi></mi><mi></mi><p><span>
-#errors
-Line: 1 Col: 42 Unexpected start tag (math) in the after frameset phase. Ignored.
-Line: 1 Col: 46 Unexpected start tag (mi) in the after frameset phase. Ignored.
-Line: 1 Col: 51 Unexpected end tag (mi) in the after frameset phase. Ignored.
-Line: 1 Col: 55 Unexpected start tag (mi) in the after frameset phase. Ignored.
-Line: 1 Col: 60 Unexpected end tag (mi) in the after frameset phase. Ignored.
-Line: 1 Col: 63 Unexpected start tag (p) in the after frameset phase. Ignored.
-Line: 1 Col: 69 Unexpected start tag (span) in the after frameset phase. Ignored.
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <frameset>
-
-#data
-<!DOCTYPE html><body xlink:href=foo><math xlink:href=foo></math>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| xlink:href="foo"
-| <math math>
-| xlink href="foo"
-
-#data
-<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo></mi></math>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| xlink:href="foo"
-| xml:lang="en"
-| <math math>
-| <math mi>
-| xlink href="foo"
-| xml lang="en"
-
-#data
-<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo /></math>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| xlink:href="foo"
-| xml:lang="en"
-| <math math>
-| <math mi>
-| xlink href="foo"
-| xml lang="en"
-
-#data
-<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo />bar</math>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| xlink:href="foo"
-| xml:lang="en"
-| <math math>
-| <math mi>
-| xlink href="foo"
-| xml lang="en"
-| "bar"
diff --git a/src/pkg/html/testdata/webkit/tests_innerHTML_1.dat b/src/pkg/html/testdata/webkit/tests_innerHTML_1.dat
deleted file mode 100644
index 052fac7d5..000000000
--- a/src/pkg/html/testdata/webkit/tests_innerHTML_1.dat
+++ /dev/null
@@ -1,733 +0,0 @@
-#data
-<body><span>
-#errors
-#document-fragment
-body
-#document
-| <span>
-
-#data
-<span><body>
-#errors
-#document-fragment
-body
-#document
-| <span>
-
-#data
-<span><body>
-#errors
-#document-fragment
-div
-#document
-| <span>
-
-#data
-<body><span>
-#errors
-#document-fragment
-html
-#document
-| <head>
-| <body>
-| <span>
-
-#data
-<frameset><span>
-#errors
-#document-fragment
-body
-#document
-| <span>
-
-#data
-<span><frameset>
-#errors
-#document-fragment
-body
-#document
-| <span>
-
-#data
-<span><frameset>
-#errors
-#document-fragment
-div
-#document
-| <span>
-
-#data
-<frameset><span>
-#errors
-#document-fragment
-html
-#document
-| <head>
-| <frameset>
-
-#data
-<table><tr>
-#errors
-#document-fragment
-table
-#document
-| <tbody>
-| <tr>
-
-#data
-</table><tr>
-#errors
-#document-fragment
-table
-#document
-| <tbody>
-| <tr>
-
-#data
-<a>
-#errors
-#document-fragment
-table
-#document
-| <a>
-
-#data
-<a>
-#errors
-#document-fragment
-table
-#document
-| <a>
-
-#data
-<a><caption>a
-#errors
-#document-fragment
-table
-#document
-| <a>
-| <caption>
-| "a"
-
-#data
-<a><colgroup><col>
-#errors
-#document-fragment
-table
-#document
-| <a>
-| <colgroup>
-| <col>
-
-#data
-<a><tbody><tr>
-#errors
-#document-fragment
-table
-#document
-| <a>
-| <tbody>
-| <tr>
-
-#data
-<a><tfoot><tr>
-#errors
-#document-fragment
-table
-#document
-| <a>
-| <tfoot>
-| <tr>
-
-#data
-<a><thead><tr>
-#errors
-#document-fragment
-table
-#document
-| <a>
-| <thead>
-| <tr>
-
-#data
-<a><tr>
-#errors
-#document-fragment
-table
-#document
-| <a>
-| <tbody>
-| <tr>
-
-#data
-<a><th>
-#errors
-#document-fragment
-table
-#document
-| <a>
-| <tbody>
-| <tr>
-| <th>
-
-#data
-<a><td>
-#errors
-#document-fragment
-table
-#document
-| <a>
-| <tbody>
-| <tr>
-| <td>
-
-#data
-<table></table><tbody>
-#errors
-#document-fragment
-caption
-#document
-| <table>
-
-#data
-</table><span>
-#errors
-#document-fragment
-caption
-#document
-| <span>
-
-#data
-<span></table>
-#errors
-#document-fragment
-caption
-#document
-| <span>
-
-#data
-</caption><span>
-#errors
-#document-fragment
-caption
-#document
-| <span>
-
-#data
-<span></caption><span>
-#errors
-#document-fragment
-caption
-#document
-| <span>
-| <span>
-
-#data
-<span><caption><span>
-#errors
-#document-fragment
-caption
-#document
-| <span>
-| <span>
-
-#data
-<span><col><span>
-#errors
-#document-fragment
-caption
-#document
-| <span>
-| <span>
-
-#data
-<span><colgroup><span>
-#errors
-#document-fragment
-caption
-#document
-| <span>
-| <span>
-
-#data
-<span><html><span>
-#errors
-#document-fragment
-caption
-#document
-| <span>
-| <span>
-
-#data
-<span><tbody><span>
-#errors
-#document-fragment
-caption
-#document
-| <span>
-| <span>
-
-#data
-<span><td><span>
-#errors
-#document-fragment
-caption
-#document
-| <span>
-| <span>
-
-#data
-<span><tfoot><span>
-#errors
-#document-fragment
-caption
-#document
-| <span>
-| <span>
-
-#data
-<span><thead><span>
-#errors
-#document-fragment
-caption
-#document
-| <span>
-| <span>
-
-#data
-<span><th><span>
-#errors
-#document-fragment
-caption
-#document
-| <span>
-| <span>
-
-#data
-<span><tr><span>
-#errors
-#document-fragment
-caption
-#document
-| <span>
-| <span>
-
-#data
-<span></table><span>
-#errors
-#document-fragment
-caption
-#document
-| <span>
-| <span>
-
-#data
-</colgroup><col>
-#errors
-#document-fragment
-colgroup
-#document
-| <col>
-
-#data
-<a><col>
-#errors
-#document-fragment
-colgroup
-#document
-| <col>
-
-#data
-<caption><a>
-#errors
-#document-fragment
-tbody
-#document
-| <a>
-
-#data
-<col><a>
-#errors
-#document-fragment
-tbody
-#document
-| <a>
-
-#data
-<colgroup><a>
-#errors
-#document-fragment
-tbody
-#document
-| <a>
-
-#data
-<tbody><a>
-#errors
-#document-fragment
-tbody
-#document
-| <a>
-
-#data
-<tfoot><a>
-#errors
-#document-fragment
-tbody
-#document
-| <a>
-
-#data
-<thead><a>
-#errors
-#document-fragment
-tbody
-#document
-| <a>
-
-#data
-</table><a>
-#errors
-#document-fragment
-tbody
-#document
-| <a>
-
-#data
-<a><tr>
-#errors
-#document-fragment
-tbody
-#document
-| <a>
-| <tr>
-
-#data
-<a><td>
-#errors
-#document-fragment
-tbody
-#document
-| <a>
-| <tr>
-| <td>
-
-#data
-<a><td>
-#errors
-#document-fragment
-tbody
-#document
-| <a>
-| <tr>
-| <td>
-
-#data
-<a><td>
-#errors
-#document-fragment
-tbody
-#document
-| <a>
-| <tr>
-| <td>
-
-#data
-<td><table><tbody><a><tr>
-#errors
-#document-fragment
-tbody
-#document
-| <tr>
-| <td>
-| <a>
-| <table>
-| <tbody>
-| <tr>
-
-#data
-</tr><td>
-#errors
-#document-fragment
-tr
-#document
-| <td>
-
-#data
-<td><table><a><tr></tr><tr>
-#errors
-#document-fragment
-tr
-#document
-| <td>
-| <a>
-| <table>
-| <tbody>
-| <tr>
-| <tr>
-
-#data
-<caption><td>
-#errors
-#document-fragment
-tr
-#document
-| <td>
-
-#data
-<col><td>
-#errors
-#document-fragment
-tr
-#document
-| <td>
-
-#data
-<colgroup><td>
-#errors
-#document-fragment
-tr
-#document
-| <td>
-
-#data
-<tbody><td>
-#errors
-#document-fragment
-tr
-#document
-| <td>
-
-#data
-<tfoot><td>
-#errors
-#document-fragment
-tr
-#document
-| <td>
-
-#data
-<thead><td>
-#errors
-#document-fragment
-tr
-#document
-| <td>
-
-#data
-<tr><td>
-#errors
-#document-fragment
-tr
-#document
-| <td>
-
-#data
-</table><td>
-#errors
-#document-fragment
-tr
-#document
-| <td>
-
-#data
-<td><table></table><td>
-#errors
-#document-fragment
-tr
-#document
-| <td>
-| <table>
-| <td>
-
-#data
-<td><table></table><td>
-#errors
-#document-fragment
-tr
-#document
-| <td>
-| <table>
-| <td>
-
-#data
-<caption><a>
-#errors
-#document-fragment
-td
-#document
-| <a>
-
-#data
-<col><a>
-#errors
-#document-fragment
-td
-#document
-| <a>
-
-#data
-<colgroup><a>
-#errors
-#document-fragment
-td
-#document
-| <a>
-
-#data
-<tbody><a>
-#errors
-#document-fragment
-td
-#document
-| <a>
-
-#data
-<tfoot><a>
-#errors
-#document-fragment
-td
-#document
-| <a>
-
-#data
-<th><a>
-#errors
-#document-fragment
-td
-#document
-| <a>
-
-#data
-<thead><a>
-#errors
-#document-fragment
-td
-#document
-| <a>
-
-#data
-<tr><a>
-#errors
-#document-fragment
-td
-#document
-| <a>
-
-#data
-</table><a>
-#errors
-#document-fragment
-td
-#document
-| <a>
-
-#data
-</tbody><a>
-#errors
-#document-fragment
-td
-#document
-| <a>
-
-#data
-</td><a>
-#errors
-#document-fragment
-td
-#document
-| <a>
-
-#data
-</tfoot><a>
-#errors
-#document-fragment
-td
-#document
-| <a>
-
-#data
-</thead><a>
-#errors
-#document-fragment
-td
-#document
-| <a>
-
-#data
-</th><a>
-#errors
-#document-fragment
-td
-#document
-| <a>
-
-#data
-</tr><a>
-#errors
-#document-fragment
-td
-#document
-| <a>
-
-#data
-<table><td><td>
-#errors
-#document-fragment
-td
-#document
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <td>
-
-#data
-</select><option>
-#errors
-#document-fragment
-select
-#document
-| <option>
-
-#data
-<input><option>
-#errors
-#document-fragment
-select
-#document
-| <option>
-
-#data
-<keygen><option>
-#errors
-#document-fragment
-select
-#document
-| <option>
-
-#data
-<textarea><option>
-#errors
-#document-fragment
-select
-#document
-| <option>
-
-#data
-</html><!--abc-->
-#errors
-#document-fragment
-html
-#document
-| <head>
-| <body>
-| <!-- abc -->
-
-#data
-</frameset><frame>
-#errors
-#document-fragment
-frameset
-#document
-| <frame>
diff --git a/src/pkg/html/testdata/webkit/tricky01.dat b/src/pkg/html/testdata/webkit/tricky01.dat
deleted file mode 100644
index 084199244..000000000
--- a/src/pkg/html/testdata/webkit/tricky01.dat
+++ /dev/null
@@ -1,261 +0,0 @@
-#data
-<b><p>Bold </b> Not bold</p>
-Also not bold.
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <b>
-| <p>
-| <b>
-| "Bold "
-| " Not bold"
-| "
-Also not bold."
-
-#data
-<html>
-<font color=red><i>Italic and Red<p>Italic and Red </font> Just italic.</p> Italic only.</i> Plain
-<p>I should not be red. <font color=red>Red. <i>Italic and red.</p>
-<p>Italic and red. </i> Red.</font> I should not be red.</p>
-<b>Bold <i>Bold and italic</b> Only Italic </i> Plain
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <font>
-| color="red"
-| <i>
-| "Italic and Red"
-| <i>
-| <p>
-| <font>
-| color="red"
-| "Italic and Red "
-| " Just italic."
-| " Italic only."
-| " Plain
-"
-| <p>
-| "I should not be red. "
-| <font>
-| color="red"
-| "Red. "
-| <i>
-| "Italic and red."
-| <font>
-| color="red"
-| <i>
-| "
-"
-| <p>
-| <font>
-| color="red"
-| <i>
-| "Italic and red. "
-| " Red."
-| " I should not be red."
-| "
-"
-| <b>
-| "Bold "
-| <i>
-| "Bold and italic"
-| <i>
-| " Only Italic "
-| " Plain"
-
-#data
-<html><body>
-<p><font size="7">First paragraph.</p>
-<p>Second paragraph.</p></font>
-<b><p><i>Bold and Italic</b> Italic</p>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "
-"
-| <p>
-| <font>
-| size="7"
-| "First paragraph."
-| <font>
-| size="7"
-| "
-"
-| <p>
-| "Second paragraph."
-| "
-"
-| <b>
-| <p>
-| <b>
-| <i>
-| "Bold and Italic"
-| <i>
-| " Italic"
-
-#data
-<html>
-<dl>
-<dt><b>Boo
-<dd>Goo?
-</dl>
-</html>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <dl>
-| "
-"
-| <dt>
-| <b>
-| "Boo
-"
-| <dd>
-| <b>
-| "Goo?
-"
-| <b>
-| "
-"
-
-#data
-<html><body>
-<label><a><div>Hello<div>World</div></a></label>
-</body></html>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "
-"
-| <label>
-| <a>
-| <div>
-| <a>
-| "Hello"
-| <div>
-| "World"
-| "
-"
-
-#data
-<table><center> <font>a</center> <img> <tr><td> </td> </tr> </table>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <center>
-| " "
-| <font>
-| "a"
-| <font>
-| <img>
-| " "
-| <table>
-| " "
-| <tbody>
-| <tr>
-| <td>
-| " "
-| " "
-| " "
-
-#data
-<table><tr><p><a><p>You should see this text.
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <p>
-| <a>
-| <p>
-| <a>
-| "You should see this text."
-| <table>
-| <tbody>
-| <tr>
-
-#data
-<TABLE>
-<TR>
-<CENTER><CENTER><TD></TD></TR><TR>
-<FONT>
-<TABLE><tr></tr></TABLE>
-</P>
-<a></font><font></a>
-This page contains an insanely badly-nested tag sequence.
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <center>
-| <center>
-| <font>
-| "
-"
-| <table>
-| "
-"
-| <tbody>
-| <tr>
-| "
-"
-| <td>
-| <tr>
-| "
-"
-| <table>
-| <tbody>
-| <tr>
-| <font>
-| "
-"
-| <p>
-| "
-"
-| <a>
-| <a>
-| <font>
-| <font>
-| "
-This page contains an insanely badly-nested tag sequence."
-
-#data
-<html>
-<body>
-<b><nobr><div>This text is in a div inside a nobr</nobr>More text that should not be in the nobr, i.e., the
-nobr should have closed the div inside it implicitly. </b><pre>A pre tag outside everything else.</pre>
-</body>
-</html>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "
-"
-| <b>
-| <nobr>
-| <div>
-| <b>
-| <nobr>
-| "This text is in a div inside a nobr"
-| "More text that should not be in the nobr, i.e., the
-nobr should have closed the div inside it implicitly. "
-| <pre>
-| "A pre tag outside everything else."
-| "
-
-"
diff --git a/src/pkg/html/testdata/webkit/webkit01.dat b/src/pkg/html/testdata/webkit/webkit01.dat
deleted file mode 100644
index 4101b216e..000000000
--- a/src/pkg/html/testdata/webkit/webkit01.dat
+++ /dev/null
@@ -1,609 +0,0 @@
-#data
-Test
-#errors
-Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
-#document
-| <html>
-| <head>
-| <body>
-| "Test"
-
-#data
-<div></div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-
-#data
-<div>Test</div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| "Test"
-
-#data
-<di
-#errors
-#document
-| <html>
-| <head>
-| <body>
-
-#data
-<div>Hello</div>
-<script>
-console.log("PASS");
-</script>
-<div>Bye</div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| "Hello"
-| "
-"
-| <script>
-| "
-console.log("PASS");
-"
-| "
-"
-| <div>
-| "Bye"
-
-#data
-<div foo="bar">Hello</div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| foo="bar"
-| "Hello"
-
-#data
-<div>Hello</div>
-<script>
-console.log("FOO<span>BAR</span>BAZ");
-</script>
-<div>Bye</div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| "Hello"
-| "
-"
-| <script>
-| "
-console.log("FOO<span>BAR</span>BAZ");
-"
-| "
-"
-| <div>
-| "Bye"
-
-#data
-<foo bar="baz"></foo><potato quack="duck"></potato>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <foo>
-| bar="baz"
-| <potato>
-| quack="duck"
-
-#data
-<foo bar="baz"><potato quack="duck"></potato></foo>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <foo>
-| bar="baz"
-| <potato>
-| quack="duck"
-
-#data
-<foo></foo bar="baz"><potato></potato quack="duck">
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <foo>
-| <potato>
-
-#data
-</ tttt>
-#errors
-#document
-| <!-- tttt -->
-| <html>
-| <head>
-| <body>
-
-#data
-<div FOO ><img><img></div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| foo=""
-| <img>
-| <img>
-
-#data
-<p>Test</p<p>Test2</p>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <p>
-| "TestTest2"
-
-#data
-<rdar://problem/6869687>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <rdar:>
-| 6869687=""
-| problem=""
-
-#data
-<A>test< /A>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| "test< /A>"
-
-#data
-&lt;
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "<"
-
-#data
-<body foo='bar'><body foo='baz' yo='mama'>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| foo="bar"
-| yo="mama"
-
-#data
-<body></br foo="bar"></body>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <br>
-
-#data
-<bdy><br foo="bar"></body>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <bdy>
-| <br>
-| foo="bar"
-
-#data
-<body></body></br foo="bar">
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <br>
-
-#data
-<bdy></body><br foo="bar">
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <bdy>
-| <br>
-| foo="bar"
-
-#data
-<html><body></body></html><!-- Hi there -->
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <!-- Hi there -->
-
-#data
-<html><body></body></html>x<!-- Hi there -->
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "x"
-| <!-- Hi there -->
-
-#data
-<html><body></body></html>x<!-- Hi there --></html><!-- Again -->
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "x"
-| <!-- Hi there -->
-| <!-- Again -->
-
-#data
-<html><body></body></html>x<!-- Hi there --></body></html><!-- Again -->
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "x"
-| <!-- Hi there -->
-| <!-- Again -->
-
-#data
-<html><body><ruby><div><rp>xx</rp></div></ruby></body></html>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <ruby>
-| <div>
-| <rp>
-| "xx"
-
-#data
-<html><body><ruby><div><rt>xx</rt></div></ruby></body></html>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <ruby>
-| <div>
-| <rt>
-| "xx"
-
-#data
-<html><frameset><!--1--><noframes>A</noframes><!--2--></frameset><!--3--><noframes>B</noframes><!--4--></html><!--5--><noframes>C</noframes><!--6-->
-#errors
-#document
-| <html>
-| <head>
-| <frameset>
-| <!-- 1 -->
-| <noframes>
-| "A"
-| <!-- 2 -->
-| <!-- 3 -->
-| <noframes>
-| "B"
-| <!-- 4 -->
-| <noframes>
-| "C"
-| <!-- 5 -->
-| <!-- 6 -->
-
-#data
-<select><option>A<select><option>B<select><option>C<select><option>D<select><option>E<select><option>F<select><option>G<select>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <select>
-| <option>
-| "A"
-| <option>
-| "B"
-| <select>
-| <option>
-| "C"
-| <option>
-| "D"
-| <select>
-| <option>
-| "E"
-| <option>
-| "F"
-| <select>
-| <option>
-| "G"
-
-#data
-<dd><dd><dt><dt><dd><li><li>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <dd>
-| <dd>
-| <dt>
-| <dt>
-| <dd>
-| <li>
-| <li>
-
-#data
-<div><b></div><div><nobr>a<nobr>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| <b>
-| <div>
-| <b>
-| <nobr>
-| "a"
-| <nobr>
-
-#data
-<head></head>
-<body></body>
-#errors
-#document
-| <html>
-| <head>
-| "
-"
-| <body>
-
-#data
-<head></head> <style></style>ddd
-#errors
-#document
-| <html>
-| <head>
-| <style>
-| " "
-| <body>
-| "ddd"
-
-#data
-<kbd><table></kbd><col><select><tr>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <kbd>
-| <select>
-| <table>
-| <colgroup>
-| <col>
-| <tbody>
-| <tr>
-
-#data
-<kbd><table></kbd><col><select><tr></table><div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <kbd>
-| <select>
-| <table>
-| <colgroup>
-| <col>
-| <tbody>
-| <tr>
-| <div>
-
-#data
-<a><li><style></style><title></title></a>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| <li>
-| <a>
-| <style>
-| <title>
-
-#data
-<font></p><p><meta><title></title></font>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <font>
-| <p>
-| <p>
-| <font>
-| <meta>
-| <title>
-
-#data
-<a><center><title></title><a>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <a>
-| <center>
-| <a>
-| <title>
-| <a>
-
-#data
-<svg><title><div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <svg title>
-| <div>
-
-#data
-<svg><title><rect><div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <svg title>
-| <rect>
-| <div>
-
-#data
-<svg><title><svg><div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <svg title>
-| <svg svg>
-| <div>
-
-#data
-<img <="" FAIL>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <img>
-| <=""
-| fail=""
-
-#data
-<ul><li><div id='foo'/>A</li><li>B<div>C</div></li></ul>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <ul>
-| <li>
-| <div>
-| id="foo"
-| "A"
-| <li>
-| "B"
-| <div>
-| "C"
-
-#data
-<svg><em><desc></em>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <em>
-| <desc>
-
-#data
-<table><tr><td><svg><desc><td></desc><circle>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| <svg svg>
-| <svg desc>
-| <svg circle>
-
-#data
-<svg><tfoot></mi><td>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <svg svg>
-| <svg tfoot>
-| <svg td>
-
-#data
-<math><mrow><mrow><mn>1</mn></mrow><mi>a</mi></mrow></math>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <math math>
-| <math mrow>
-| <math mrow>
-| <math mn>
-| "1"
-| <math mi>
-| "a"
-
-#data
-<!doctype html><input type="hidden"><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <frameset>
-
-#data
-<!doctype html><input type="button"><frameset>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <input>
-| type="button"
diff --git a/src/pkg/html/testdata/webkit/webkit02.dat b/src/pkg/html/testdata/webkit/webkit02.dat
deleted file mode 100644
index 2218f4298..000000000
--- a/src/pkg/html/testdata/webkit/webkit02.dat
+++ /dev/null
@@ -1,104 +0,0 @@
-#data
-<foo bar=qux/>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <foo>
-| bar="qux/"
-
-#data
-<p id="status"><noscript><strong>A</strong></noscript><span>B</span></p>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <p>
-| id="status"
-| <noscript>
-| "<strong>A</strong>"
-| <span>
-| "B"
-
-#data
-<div><sarcasm><div></div></sarcasm></div>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <div>
-| <sarcasm>
-| <div>
-
-#data
-<html><body><img src="" border="0" alt="><div>A</div></body></html>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-
-#data
-<table><td></tbody>A
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| "A"
-| <table>
-| <tbody>
-| <tr>
-| <td>
-
-#data
-<table><td></thead>A
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| "A"
-
-#data
-<table><td></tfoot>A
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <tbody>
-| <tr>
-| <td>
-| "A"
-
-#data
-<table><thead><td></tbody>A
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <table>
-| <thead>
-| <tr>
-| <td>
-| "A"
-
-#data
-<legend>test</legend>
-#errors
-#document
-| <html>
-| <head>
-| <body>
-| <legend>
-| "test"
diff --git a/src/pkg/html/token.go b/src/pkg/html/token.go
deleted file mode 100644
index d266b3a30..000000000
--- a/src/pkg/html/token.go
+++ /dev/null
@@ -1,575 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package html
-
-import (
- "bytes"
- "io"
- "os"
- "strconv"
-)
-
-// A TokenType is the type of a Token.
-type TokenType int
-
-const (
- // ErrorToken means that an error occurred during tokenization.
- ErrorToken TokenType = iota
- // TextToken means a text node.
- TextToken
- // A StartTagToken looks like <a>.
- StartTagToken
- // An EndTagToken looks like </a>.
- EndTagToken
- // A SelfClosingTagToken tag looks like <br/>.
- SelfClosingTagToken
- // A CommentToken looks like <!--x-->.
- CommentToken
- // A DoctypeToken looks like <!DOCTYPE x>
- DoctypeToken
-)
-
-// String returns a string representation of the TokenType.
-func (t TokenType) String() string {
- switch t {
- case ErrorToken:
- return "Error"
- case TextToken:
- return "Text"
- case StartTagToken:
- return "StartTag"
- case EndTagToken:
- return "EndTag"
- case SelfClosingTagToken:
- return "SelfClosingTag"
- case CommentToken:
- return "Comment"
- case DoctypeToken:
- return "Doctype"
- }
- return "Invalid(" + strconv.Itoa(int(t)) + ")"
-}
-
-// An Attribute is an attribute key-value pair. Key is alphabetic (and hence
-// does not contain escapable characters like '&', '<' or '>'), and Val is
-// unescaped (it looks like "a<b" rather than "a&lt;b").
-type Attribute struct {
- Key, Val string
-}
-
-// A Token consists of a TokenType and some Data (tag name for start and end
-// tags, content for text, comments and doctypes). A tag Token may also contain
-// a slice of Attributes. Data is unescaped for all Tokens (it looks like "a<b"
-// rather than "a&lt;b").
-type Token struct {
- Type TokenType
- Data string
- Attr []Attribute
-}
-
-// tagString returns a string representation of a tag Token's Data and Attr.
-func (t Token) tagString() string {
- if len(t.Attr) == 0 {
- return t.Data
- }
- buf := bytes.NewBuffer(nil)
- buf.WriteString(t.Data)
- for _, a := range t.Attr {
- buf.WriteByte(' ')
- buf.WriteString(a.Key)
- buf.WriteString(`="`)
- escape(buf, a.Val)
- buf.WriteByte('"')
- }
- return buf.String()
-}
-
-// String returns a string representation of the Token.
-func (t Token) String() string {
- switch t.Type {
- case ErrorToken:
- return ""
- case TextToken:
- return EscapeString(t.Data)
- case StartTagToken:
- return "<" + t.tagString() + ">"
- case EndTagToken:
- return "</" + t.tagString() + ">"
- case SelfClosingTagToken:
- return "<" + t.tagString() + "/>"
- case CommentToken:
- return "<!--" + EscapeString(t.Data) + "-->"
- case DoctypeToken:
- return "<!DOCTYPE " + EscapeString(t.Data) + ">"
- }
- return "Invalid(" + strconv.Itoa(int(t.Type)) + ")"
-}
-
-// A Tokenizer returns a stream of HTML Tokens.
-type Tokenizer struct {
- // If ReturnComments is set, Next returns comment tokens;
- // otherwise it skips over comments (default).
- ReturnComments bool
-
- // r is the source of the HTML text.
- r io.Reader
- // tt is the TokenType of the most recently read token.
- tt TokenType
- // err is the first error encountered during tokenization. It is possible
- // for tt != Error && err != nil to hold: this means that Next returned a
- // valid token but the subsequent Next call will return an error token.
- // For example, if the HTML text input was just "plain", then the first
- // Next call would set z.err to os.EOF but return a TextToken, and all
- // subsequent Next calls would return an ErrorToken.
- // err is never reset. Once it becomes non-nil, it stays non-nil.
- err os.Error
- // buf[p0:p1] holds the raw data of the most recent token.
- // buf[p1:] is buffered input that will yield future tokens.
- p0, p1 int
- buf []byte
-}
-
-// Error returns the error associated with the most recent ErrorToken token.
-// This is typically os.EOF, meaning the end of tokenization.
-func (z *Tokenizer) Error() os.Error {
- if z.tt != ErrorToken {
- return nil
- }
- return z.err
-}
-
-// Raw returns the unmodified text of the current token. Calling Next, Token,
-// Text, TagName or TagAttr may change the contents of the returned slice.
-func (z *Tokenizer) Raw() []byte {
- return z.buf[z.p0:z.p1]
-}
-
-// readByte returns the next byte from the input stream, doing a buffered read
-// from z.r into z.buf if necessary. z.buf[z.p0:z.p1] remains a contiguous byte
-// slice that holds all the bytes read so far for the current token.
-// It sets z.err if the underlying reader returns an error.
-// Pre-condition: z.err == nil.
-func (z *Tokenizer) readByte() byte {
- if z.p1 >= len(z.buf) {
- // Our buffer is exhausted and we have to read from z.r.
- // We copy z.buf[z.p0:z.p1] to the beginning of z.buf. If the length
- // z.p1 - z.p0 is more than half the capacity of z.buf, then we
- // allocate a new buffer before the copy.
- c := cap(z.buf)
- d := z.p1 - z.p0
- var buf1 []byte
- if 2*d > c {
- buf1 = make([]byte, d, 2*c)
- } else {
- buf1 = z.buf[:d]
- }
- copy(buf1, z.buf[z.p0:z.p1])
- z.p0, z.p1, z.buf = 0, d, buf1[:d]
- // Now that we have copied the live bytes to the start of the buffer,
- // we read from z.r into the remainder.
- n, err := z.r.Read(buf1[d:cap(buf1)])
- if err != nil {
- z.err = err
- return 0
- }
- z.buf = buf1[:d+n]
- }
- x := z.buf[z.p1]
- z.p1++
- return x
-}
-
-// readTo keeps reading bytes until x is found or a read error occurs. If an
-// error does occur, z.err is set to that error.
-// Pre-condition: z.err == nil.
-func (z *Tokenizer) readTo(x uint8) {
- for {
- c := z.readByte()
- if z.err != nil {
- return
- }
- switch c {
- case x:
- return
- case '\\':
- z.readByte()
- if z.err != nil {
- return
- }
- }
- }
-}
-
-// nextComment reads the next token starting with "<!--".
-// The opening "<!--" has already been consumed.
-// Pre-condition: z.tt == TextToken && z.err == nil && z.p0 + 4 <= z.p1.
-func (z *Tokenizer) nextComment() {
- // <!--> is a valid comment.
- for dashCount := 2; ; {
- c := z.readByte()
- if z.err != nil {
- return
- }
- switch c {
- case '-':
- dashCount++
- case '>':
- if dashCount >= 2 {
- z.tt = CommentToken
- return
- }
- dashCount = 0
- default:
- dashCount = 0
- }
- }
-}
-
-// nextMarkupDeclaration reads the next token starting with "<!".
-// It might be a "<!--comment-->", a "<!DOCTYPE foo>", or "<!malformed text".
-// The opening "<!" has already been consumed.
-// Pre-condition: z.tt == TextToken && z.err == nil && z.p0 + 2 <= z.p1.
-func (z *Tokenizer) nextMarkupDeclaration() {
- var c [2]byte
- for i := 0; i < 2; i++ {
- c[i] = z.readByte()
- if z.err != nil {
- return
- }
- }
- if c[0] == '-' && c[1] == '-' {
- z.nextComment()
- return
- }
- z.p1 -= 2
- const s = "DOCTYPE "
- for i := 0; ; i++ {
- c := z.readByte()
- if z.err != nil {
- return
- }
- // Capitalize c.
- if 'a' <= c && c <= 'z' {
- c = 'A' + (c - 'a')
- }
- if i < len(s) && c != s[i] {
- z.nextText()
- return
- }
- if c == '>' {
- if i >= len(s) {
- z.tt = DoctypeToken
- }
- return
- }
- }
-}
-
-// nextTag reads the next token starting with "<". It might be a "<startTag>",
-// an "</endTag>", a "<!markup declaration>", or "<malformed text".
-// The opening "<" has already been consumed.
-// Pre-condition: z.tt == TextToken && z.err == nil && z.p0 + 1 <= z.p1.
-func (z *Tokenizer) nextTag() {
- c := z.readByte()
- if z.err != nil {
- return
- }
- switch {
- case c == '/':
- z.tt = EndTagToken
- // Lower-cased characters are more common in tag names, so we check for them first.
- case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
- z.tt = StartTagToken
- case c == '!':
- z.nextMarkupDeclaration()
- return
- case c == '?':
- z.tt, z.err = ErrorToken, os.NewError("html: TODO: implement XML processing instructions")
- return
- default:
- z.tt, z.err = ErrorToken, os.NewError("html: TODO: handle malformed tags")
- return
- }
- for {
- c := z.readByte()
- if z.err != nil {
- return
- }
- switch c {
- case '"', '\'':
- z.readTo(c)
- if z.err != nil {
- return
- }
- case '>':
- if z.buf[z.p1-2] == '/' && z.tt == StartTagToken {
- z.tt = SelfClosingTagToken
- }
- return
- }
- }
-}
-
-// nextText reads all text up until an '<'.
-// Pre-condition: z.tt == TextToken && z.err == nil && z.p0 + 1 <= z.p1.
-func (z *Tokenizer) nextText() {
- for {
- c := z.readByte()
- if z.err != nil {
- return
- }
- if c == '<' {
- z.p1--
- return
- }
- }
-}
-
-// Next scans the next token and returns its type.
-func (z *Tokenizer) Next() TokenType {
- for {
- if z.err != nil {
- z.tt = ErrorToken
- return z.tt
- }
- z.p0 = z.p1
- c := z.readByte()
- if z.err != nil {
- z.tt = ErrorToken
- return z.tt
- }
- // We assume that the next token is text unless proven otherwise.
- z.tt = TextToken
- if c != '<' {
- z.nextText()
- } else {
- z.nextTag()
- if z.tt == CommentToken && !z.ReturnComments {
- continue
- }
- }
- return z.tt
- }
- panic("unreachable")
-}
-
-// trim returns the largest j such that z.buf[i:j] contains only white space,
-// or only white space plus the final ">" or "/>" of the raw data.
-func (z *Tokenizer) trim(i int) int {
- k := z.p1
- for ; i < k; i++ {
- switch z.buf[i] {
- case ' ', '\n', '\t', '\f':
- continue
- case '>':
- if i == k-1 {
- return k
- }
- case '/':
- if i == k-2 {
- return k
- }
- }
- return i
- }
- return k
-}
-
-// tagName finds the tag name at the start of z.buf[i:] and returns that name
-// lower-cased, as well as the trimmed cursor location afterwards.
-func (z *Tokenizer) tagName(i int) ([]byte, int) {
- i0 := i
-loop:
- for ; i < z.p1; i++ {
- c := z.buf[i]
- switch c {
- case ' ', '\n', '\t', '\f', '/', '>':
- break loop
- }
- if 'A' <= c && c <= 'Z' {
- z.buf[i] = c + 'a' - 'A'
- }
- }
- return z.buf[i0:i], z.trim(i)
-}
-
-// unquotedAttrVal finds the unquoted attribute value at the start of z.buf[i:]
-// and returns that value, as well as the trimmed cursor location afterwards.
-func (z *Tokenizer) unquotedAttrVal(i int) ([]byte, int) {
- i0 := i
-loop:
- for ; i < z.p1; i++ {
- switch z.buf[i] {
- case ' ', '\n', '\t', '\f', '>':
- break loop
- case '&':
- // TODO: unescape the entity.
- }
- }
- return z.buf[i0:i], z.trim(i)
-}
-
-// attrName finds the largest attribute name at the start
-// of z.buf[i:] and returns it lower-cased, as well
-// as the trimmed cursor location after that name.
-//
-// http://dev.w3.org/html5/spec/Overview.html#syntax-attribute-name
-// TODO: unicode characters
-func (z *Tokenizer) attrName(i int) ([]byte, int) {
- for z.buf[i] == '/' {
- i++
- if z.buf[i] == '>' {
- return nil, z.trim(i)
- }
- }
- i0 := i
-loop:
- for ; i < z.p1; i++ {
- c := z.buf[i]
- switch c {
- case '>', '/', '=':
- break loop
- }
- switch {
- case 'A' <= c && c <= 'Z':
- z.buf[i] = c + 'a' - 'A'
- case c > ' ' && c < 0x7f:
- // No-op.
- default:
- break loop
- }
- }
- return z.buf[i0:i], z.trim(i)
-}
-
-// Text returns the unescaped text of a text, comment or doctype token. The
-// contents of the returned slice may change on the next call to Next.
-func (z *Tokenizer) Text() []byte {
- var i0, i1 int
- switch z.tt {
- case TextToken:
- i0 = z.p0
- i1 = z.p1
- case CommentToken:
- // Trim the "<!--" from the left and the "-->" from the right.
- // "<!-->" is a valid comment, so the adjusted endpoints might overlap.
- i0 = z.p0 + 4
- i1 = z.p1 - 3
- case DoctypeToken:
- // Trim the "<!DOCTYPE " from the left and the ">" from the right.
- i0 = z.p0 + 10
- i1 = z.p1 - 1
- default:
- return nil
- }
- z.p0 = z.p1
- if i0 < i1 {
- return unescape(z.buf[i0:i1])
- }
- return nil
-}
-
-// TagName returns the lower-cased name of a tag token (the `img` out of
-// `<IMG SRC="foo">`) and whether the tag has attributes.
-// The contents of the returned slice may change on the next call to Next.
-func (z *Tokenizer) TagName() (name []byte, hasAttr bool) {
- i := z.p0 + 1
- if i >= z.p1 {
- z.p0 = z.p1
- return nil, false
- }
- if z.buf[i] == '/' {
- i++
- }
- name, z.p0 = z.tagName(i)
- hasAttr = z.p0 != z.p1
- return
-}
-
-// TagAttr returns the lower-cased key and unescaped value of the next unparsed
-// attribute for the current tag token and whether there are more attributes.
-// The contents of the returned slices may change on the next call to Next.
-func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) {
- key, i := z.attrName(z.p0)
- // Check for an empty attribute value.
- if i == z.p1 {
- z.p0 = i
- return
- }
- // Get past the equals and quote characters.
- if z.buf[i] != '=' {
- z.p0, moreAttr = i, true
- return
- }
- i = z.trim(i + 1)
- if i == z.p1 {
- z.p0 = i
- return
- }
- closeQuote := z.buf[i]
- if closeQuote != '\'' && closeQuote != '"' {
- val, z.p0 = z.unquotedAttrVal(i)
- moreAttr = z.p0 != z.p1
- return
- }
- i = z.trim(i + 1)
- // Copy and unescape everything up to the closing quote.
- dst, src := i, i
-loop:
- for src < z.p1 {
- c := z.buf[src]
- switch c {
- case closeQuote:
- src++
- break loop
- case '&':
- dst, src = unescapeEntity(z.buf, dst, src, true)
- case '\\':
- if src == z.p1 {
- z.buf[dst] = '\\'
- dst++
- } else {
- z.buf[dst] = z.buf[src+1]
- dst, src = dst+1, src+2
- }
- default:
- z.buf[dst] = c
- dst, src = dst+1, src+1
- }
- }
- val, z.p0 = z.buf[i:dst], z.trim(src)
- moreAttr = z.p0 != z.p1
- return
-}
-
-// Token returns the next Token. The result's Data and Attr values remain valid
-// after subsequent Next calls.
-func (z *Tokenizer) Token() Token {
- t := Token{Type: z.tt}
- switch z.tt {
- case TextToken, CommentToken, DoctypeToken:
- t.Data = string(z.Text())
- case StartTagToken, EndTagToken, SelfClosingTagToken:
- var attr []Attribute
- name, moreAttr := z.TagName()
- for moreAttr {
- var key, val []byte
- key, val, moreAttr = z.TagAttr()
- attr = append(attr, Attribute{string(key), string(val)})
- }
- t.Data = string(name)
- t.Attr = attr
- }
- return t
-}
-
-// NewTokenizer returns a new HTML Tokenizer for the given Reader.
-// The input is assumed to be UTF-8 encoded.
-func NewTokenizer(r io.Reader) *Tokenizer {
- return &Tokenizer{
- r: r,
- buf: make([]byte, 0, 4096),
- }
-}
diff --git a/src/pkg/html/token_test.go b/src/pkg/html/token_test.go
deleted file mode 100644
index 0a0beb201..000000000
--- a/src/pkg/html/token_test.go
+++ /dev/null
@@ -1,340 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package html
-
-import (
- "bytes"
- "os"
- "strings"
- "testing"
-)
-
-type tokenTest struct {
- // A short description of the test case.
- desc string
- // The HTML to parse.
- html string
- // The string representations of the expected tokens, joined by '$'.
- golden string
-}
-
-var tokenTests = []tokenTest{
- // A single text node. The tokenizer should not break text nodes on whitespace,
- // nor should it normalize whitespace within a text node.
- {
- "text",
- "foo bar",
- "foo bar",
- },
- // An entity.
- {
- "entity",
- "one &lt; two",
- "one &lt; two",
- },
- // A start, self-closing and end tag. The tokenizer does not care if the start
- // and end tokens don't match; that is the job of the parser.
- {
- "tags",
- "<a>b<c/>d</e>",
- "<a>$b$<c/>$d$</e>",
- },
- // Some malformed tags that are missing a '>'.
- {
- "malformed tag #0",
- `<p</p>`,
- `<p< p="">`,
- },
- {
- "malformed tag #1",
- `<p </p>`,
- `<p <="" p="">`,
- },
- {
- "malformed tag #2",
- `<p id=0</p>`,
- `<p id="0&lt;/p">`,
- },
- {
- "malformed tag #3",
- `<p id="0</p>`,
- `<p id="0&lt;/p&gt;">`,
- },
- {
- "malformed tag #4",
- `<p id="0"</p>`,
- `<p id="0" <="" p="">`,
- },
- // Comments.
- {
- "comment0",
- "abc<b><!-- skipme --></b>def",
- "abc$<b>$</b>$def",
- },
- {
- "comment1",
- "a<!-->z",
- "a$z",
- },
- {
- "comment2",
- "a<!--->z",
- "a$z",
- },
- {
- "comment3",
- "a<!--x>-->z",
- "a$z",
- },
- {
- "comment4",
- "a<!--x->-->z",
- "a$z",
- },
- {
- "comment5",
- "a<!>z",
- "a$&lt;!&gt;z",
- },
- {
- "comment6",
- "a<!->z",
- "a$&lt;!-&gt;z",
- },
- {
- "comment7",
- "a<!---<>z",
- "a$&lt;!---&lt;&gt;z",
- },
- {
- "comment8",
- "a<!--z",
- "a$&lt;!--z",
- },
- // An attribute with a backslash.
- {
- "backslash",
- `<p id="a\"b">`,
- `<p id="a&quot;b">`,
- },
- // Entities, tag name and attribute key lower-casing, and whitespace
- // normalization within a tag.
- {
- "tricky",
- "<p \t\n iD=\"a&quot;B\" foo=\"bar\"><EM>te&lt;&amp;;xt</em></p>",
- `<p id="a&quot;B" foo="bar">$<em>$te&lt;&amp;;xt$</em>$</p>`,
- },
- // A nonexistent entity. Tokenizing and converting back to a string should
- // escape the "&" to become "&amp;".
- {
- "noSuchEntity",
- `<a b="c&noSuchEntity;d">&lt;&alsoDoesntExist;&`,
- `<a b="c&amp;noSuchEntity;d">$&lt;&amp;alsoDoesntExist;&amp;`,
- },
- {
- "entity without semicolon",
- `&notit;&notin;<a b="q=z&amp=5&notice=hello&not;=world">`,
- `¬it;∉$<a b="q=z&amp;amp=5&amp;notice=hello¬=world">`,
- },
- {
- "entity with digits",
- "&frac12;",
- "½",
- },
- // Attribute tests:
- // http://dev.w3.org/html5/spec/Overview.html#attributes-0
- {
- "Empty attribute",
- `<input disabled FOO>`,
- `<input disabled="" foo="">`,
- },
- {
- "Empty attribute, whitespace",
- `<input disabled FOO >`,
- `<input disabled="" foo="">`,
- },
- {
- "Unquoted attribute value",
- `<input value=yes FOO=BAR>`,
- `<input value="yes" foo="BAR">`,
- },
- {
- "Unquoted attribute value, spaces",
- `<input value = yes FOO = BAR>`,
- `<input value="yes" foo="BAR">`,
- },
- {
- "Unquoted attribute value, trailing space",
- `<input value=yes FOO=BAR >`,
- `<input value="yes" foo="BAR">`,
- },
- {
- "Single-quoted attribute value",
- `<input value='yes' FOO='BAR'>`,
- `<input value="yes" foo="BAR">`,
- },
- {
- "Single-quoted attribute value, trailing space",
- `<input value='yes' FOO='BAR' >`,
- `<input value="yes" foo="BAR">`,
- },
- {
- "Double-quoted attribute value",
- `<input value="I'm an attribute" FOO="BAR">`,
- `<input value="I&apos;m an attribute" foo="BAR">`,
- },
- {
- "Attribute name characters",
- `<meta http-equiv="content-type">`,
- `<meta http-equiv="content-type">`,
- },
-}
-
-func TestTokenizer(t *testing.T) {
-loop:
- for _, tt := range tokenTests {
- z := NewTokenizer(bytes.NewBuffer([]byte(tt.html)))
- for i, s := range strings.Split(tt.golden, "$") {
- if z.Next() == ErrorToken {
- t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Error())
- continue loop
- }
- actual := z.Token().String()
- if s != actual {
- t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual)
- continue loop
- }
- }
- z.Next()
- if z.Error() != os.EOF {
- t.Errorf("%s: want EOF got %q", tt.desc, z.Token().String())
- }
- }
-}
-
-type unescapeTest struct {
- // A short description of the test case.
- desc string
- // The HTML text.
- html string
- // The unescaped text.
- unescaped string
-}
-
-var unescapeTests = []unescapeTest{
- // Handle no entities.
- {
- "copy",
- "A\ttext\nstring",
- "A\ttext\nstring",
- },
- // Handle simple named entities.
- {
- "simple",
- "&amp; &gt; &lt;",
- "& > <",
- },
- // Handle hitting the end of the string.
- {
- "stringEnd",
- "&amp &amp",
- "& &",
- },
- // Handle entities with two codepoints.
- {
- "multiCodepoint",
- "text &gesl; blah",
- "text \u22db\ufe00 blah",
- },
- // Handle decimal numeric entities.
- {
- "decimalEntity",
- "Delta = &#916; ",
- "Delta = Δ ",
- },
- // Handle hexadecimal numeric entities.
- {
- "hexadecimalEntity",
- "Lambda = &#x3bb; = &#X3Bb ",
- "Lambda = λ = λ ",
- },
- // Handle numeric early termination.
- {
- "numericEnds",
- "&# &#x &#128;43 &copy = &#169f = &#xa9",
- "&# &#x €43 © = ©f = ©",
- },
- // Handle numeric ISO-8859-1 entity replacements.
- {
- "numericReplacements",
- "Footnote&#x87;",
- "Footnote‡",
- },
-}
-
-func TestUnescape(t *testing.T) {
- for _, tt := range unescapeTests {
- unescaped := UnescapeString(tt.html)
- if unescaped != tt.unescaped {
- t.Errorf("TestUnescape %s: want %q, got %q", tt.desc, tt.unescaped, unescaped)
- }
- }
-}
-
-func TestUnescapeEscape(t *testing.T) {
- ss := []string{
- ``,
- `abc def`,
- `a & b`,
- `a&amp;b`,
- `a &amp b`,
- `&quot;`,
- `"`,
- `"<&>"`,
- `&quot;&lt;&amp;&gt;&quot;`,
- `3&5==1 && 0<1, "0&lt;1", a+acute=&aacute;`,
- }
- for _, s := range ss {
- if s != UnescapeString(EscapeString(s)) {
- t.Errorf("s != UnescapeString(EscapeString(s)), s=%q", s)
- }
- }
-}
-
-func TestBufAPI(t *testing.T) {
- s := "0<a>1</a>2<b>3<a>4<a>5</a>6</b>7</a>8<a/>9"
- z := NewTokenizer(bytes.NewBuffer([]byte(s)))
- result := bytes.NewBuffer(nil)
- depth := 0
-loop:
- for {
- tt := z.Next()
- switch tt {
- case ErrorToken:
- if z.Error() != os.EOF {
- t.Error(z.Error())
- }
- break loop
- case TextToken:
- if depth > 0 {
- result.Write(z.Text())
- }
- case StartTagToken, EndTagToken:
- tn, _ := z.TagName()
- if len(tn) == 1 && tn[0] == 'a' {
- if tt == StartTagToken {
- depth++
- } else {
- depth--
- }
- }
- }
- }
- u := "14567"
- v := string(result.Bytes())
- if u != v {
- t.Errorf("TestBufAPI: want %q got %q", u, v)
- }
-}