summaryrefslogtreecommitdiff
path: root/src/pkg/exp/html/parse_test.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/exp/html/parse_test.go')
-rw-r--r--src/pkg/exp/html/parse_test.go276
1 files changed, 276 insertions, 0 deletions
diff --git a/src/pkg/exp/html/parse_test.go b/src/pkg/exp/html/parse_test.go
new file mode 100644
index 000000000..1528dffaa
--- /dev/null
+++ b/src/pkg/exp/html/parse_test.go
@@ -0,0 +1,276 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+ "bufio"
+ "bytes"
+ "errors"
+ "fmt"
+ "io"
+ "os"
+ "strings"
+ "testing"
+)
+
+// readParseTest reads a single test case from r.
+func readParseTest(r *bufio.Reader) (text, want, context string, err error) {
+ line, err := r.ReadSlice('\n')
+ if err != nil {
+ return "", "", "", err
+ }
+ var b []byte
+
+ // Read the HTML.
+ if string(line) != "#data\n" {
+ return "", "", "", fmt.Errorf(`got %q want "#data\n"`, line)
+ }
+ for {
+ line, err = r.ReadSlice('\n')
+ if err != nil {
+ return "", "", "", err
+ }
+ if line[0] == '#' {
+ break
+ }
+ b = append(b, line...)
+ }
+ text = strings.TrimRight(string(b), "\n")
+ b = b[:0]
+
+ // Skip the error list.
+ if string(line) != "#errors\n" {
+ return "", "", "", fmt.Errorf(`got %q want "#errors\n"`, line)
+ }
+ for {
+ line, err = r.ReadSlice('\n')
+ if err != nil {
+ return "", "", "", err
+ }
+ if line[0] == '#' {
+ break
+ }
+ }
+
+ if string(line) == "#document-fragment\n" {
+ line, err = r.ReadSlice('\n')
+ if err != nil {
+ return "", "", "", err
+ }
+ context = strings.TrimSpace(string(line))
+ line, err = r.ReadSlice('\n')
+ if err != nil {
+ return "", "", "", err
+ }
+ }
+
+ // Read the dump of what the parse tree should be.
+ if string(line) != "#document\n" {
+ return "", "", "", fmt.Errorf(`got %q want "#document\n"`, line)
+ }
+ for {
+ line, err = r.ReadSlice('\n')
+ if err != nil && err != io.EOF {
+ return "", "", "", err
+ }
+ if len(line) == 0 || len(line) == 1 && line[0] == '\n' {
+ break
+ }
+ b = append(b, line...)
+ }
+ return text, string(b), context, nil
+}
+
+func dumpIndent(w io.Writer, level int) {
+ io.WriteString(w, "| ")
+ for i := 0; i < level; i++ {
+ io.WriteString(w, " ")
+ }
+}
+
+func dumpLevel(w io.Writer, n *Node, level int) error {
+ dumpIndent(w, level)
+ switch n.Type {
+ case ErrorNode:
+ return errors.New("unexpected ErrorNode")
+ case DocumentNode:
+ return errors.New("unexpected DocumentNode")
+ case ElementNode:
+ if n.Namespace != "" {
+ fmt.Fprintf(w, "<%s %s>", n.Namespace, n.Data)
+ } else {
+ fmt.Fprintf(w, "<%s>", n.Data)
+ }
+ attr := n.Attr
+ if len(attr) == 2 && attr[0].Namespace == "xml" && attr[1].Namespace == "xlink" {
+ // Some of the test cases in tests10.dat change the order of adjusted
+ // foreign attributes, but that behavior is not in the spec, and could
+ // simply be an implementation detail of html5lib's python map ordering.
+ attr[0], attr[1] = attr[1], attr[0]
+ }
+ for _, a := range attr {
+ io.WriteString(w, "\n")
+ dumpIndent(w, level+1)
+ if a.Namespace != "" {
+ fmt.Fprintf(w, `%s %s="%s"`, a.Namespace, a.Key, a.Val)
+ } else {
+ fmt.Fprintf(w, `%s="%s"`, a.Key, a.Val)
+ }
+ }
+ case TextNode:
+ fmt.Fprintf(w, `"%s"`, n.Data)
+ case CommentNode:
+ fmt.Fprintf(w, "<!-- %s -->", n.Data)
+ case DoctypeNode:
+ fmt.Fprintf(w, "<!DOCTYPE %s", n.Data)
+ if n.Attr != nil {
+ var p, s string
+ for _, a := range n.Attr {
+ switch a.Key {
+ case "public":
+ p = a.Val
+ case "system":
+ s = a.Val
+ }
+ }
+ if p != "" || s != "" {
+ fmt.Fprintf(w, ` "%s"`, p)
+ fmt.Fprintf(w, ` "%s"`, s)
+ }
+ }
+ io.WriteString(w, ">")
+ case scopeMarkerNode:
+ return errors.New("unexpected scopeMarkerNode")
+ default:
+ return errors.New("unknown node type")
+ }
+ io.WriteString(w, "\n")
+ for _, c := range n.Child {
+ if err := dumpLevel(w, c, level+1); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func dump(n *Node) (string, error) {
+ if n == nil || len(n.Child) == 0 {
+ return "", nil
+ }
+ b := bytes.NewBuffer(nil)
+ for _, child := range n.Child {
+ if err := dumpLevel(b, child, 0); err != nil {
+ return "", err
+ }
+ }
+ return b.String(), nil
+}
+
+func TestParser(t *testing.T) {
+ testFiles := []struct {
+ filename string
+ // n is the number of test cases to run from that file.
+ // -1 means all test cases.
+ n int
+ }{
+ // TODO(nigeltao): Process all the test cases from all the .dat files.
+ {"adoption01.dat", -1},
+ {"doctype01.dat", -1},
+ {"tests1.dat", -1},
+ {"tests2.dat", -1},
+ {"tests3.dat", -1},
+ {"tests4.dat", -1},
+ {"tests5.dat", -1},
+ {"tests6.dat", -1},
+ {"tests10.dat", 35},
+ }
+ for _, tf := range testFiles {
+ f, err := os.Open("testdata/webkit/" + tf.filename)
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer f.Close()
+ r := bufio.NewReader(f)
+ for i := 0; i != tf.n; i++ {
+ text, want, context, err := readParseTest(r)
+ if err == io.EOF && tf.n == -1 {
+ break
+ }
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ var doc *Node
+ if context == "" {
+ doc, err = Parse(strings.NewReader(text))
+ if err != nil {
+ t.Fatal(err)
+ }
+ } else {
+ contextNode := &Node{
+ Type: ElementNode,
+ Data: context,
+ }
+ nodes, err := ParseFragment(strings.NewReader(text), contextNode)
+ if err != nil {
+ t.Fatal(err)
+ }
+ doc = &Node{
+ Type: DocumentNode,
+ }
+ for _, n := range nodes {
+ doc.Add(n)
+ }
+ }
+
+ got, err := dump(doc)
+ if err != nil {
+ t.Fatal(err)
+ }
+ // Compare the parsed tree to the #document section.
+ if got != want {
+ t.Errorf("%s test #%d %q, got vs want:\n----\n%s----\n%s----", tf.filename, i, text, got, want)
+ continue
+ }
+ if renderTestBlacklist[text] || context != "" {
+ continue
+ }
+ // Check that rendering and re-parsing results in an identical tree.
+ pr, pw := io.Pipe()
+ go func() {
+ pw.CloseWithError(Render(pw, doc))
+ }()
+ doc1, err := Parse(pr)
+ if err != nil {
+ t.Fatal(err)
+ }
+ got1, err := dump(doc1)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if got != got1 {
+ t.Errorf("%s test #%d %q, got vs got1:\n----\n%s----\n%s----", tf.filename, i, text, got, got1)
+ continue
+ }
+ }
+ }
+}
+
+// Some test input result in parse trees are not 'well-formed' despite
+// following the HTML5 recovery algorithms. Rendering and re-parsing such a
+// tree will not result in an exact clone of that tree. We blacklist such
+// inputs from the render test.
+var renderTestBlacklist = map[string]bool{
+ // The second <a> will be reparented to the first <table>'s parent. This
+ // results in an <a> whose parent is an <a>, which is not 'well-formed'.
+ `<a><table><td><a><table></table><a></tr><a></table><b>X</b>C<a>Y`: true,
+ // More cases of <a> being reparented:
+ `<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe`: true,
+ `<a><table><a></table><p><a><div><a>`: true,
+ `<a><table><td><a><table></table><a></tr><a></table><a>`: true,
+ // A <plaintext> element is reparented, putting it before a table.
+ // A <plaintext> element can't have anything after it in HTML.
+ `<table><plaintext><td>`: true,
+}