summaryrefslogtreecommitdiff
path: root/src/pkg/html/token_test.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/html/token_test.go')
-rw-r--r--src/pkg/html/token_test.go86
1 files changed, 56 insertions, 30 deletions
diff --git a/src/pkg/html/token_test.go b/src/pkg/html/token_test.go
index e07999ca5..5cf1f6dac 100644
--- a/src/pkg/html/token_test.go
+++ b/src/pkg/html/token_test.go
@@ -7,6 +7,7 @@ package html
import (
"bytes"
"os"
+ "strings"
"testing"
)
@@ -15,8 +16,8 @@ type tokenTest struct {
desc string
// The HTML to parse.
html string
- // The string representations of the expected tokens.
- tokens []string
+ // The string representations of the expected tokens, joined by '$'.
+ golden string
}
var tokenTests = []tokenTest{
@@ -25,61 +26,86 @@ var tokenTests = []tokenTest{
{
"text",
"foo bar",
- []string{
- "foo bar",
- },
+ "foo bar",
},
// An entity.
{
"entity",
"one < two",
- []string{
- "one < two",
- },
+ "one < two",
},
// A start, self-closing and end tag. The tokenizer does not care if the start
// and end tokens don't match; that is the job of the parser.
{
"tags",
"<a>b<c/>d</e>",
- []string{
- "<a>",
- "b",
- "<c/>",
- "d",
- "</e>",
- },
+ "<a>$b$<c/>$d$</e>",
+ },
+ // Comments.
+ {
+ "comment0",
+ "abc<b><!-- skipme --></b>def",
+ "abc$<b>$</b>$def",
+ },
+ {
+ "comment1",
+ "a<!-->z",
+ "a$z",
+ },
+ {
+ "comment2",
+ "a<!--->z",
+ "a$z",
+ },
+ {
+ "comment3",
+ "a<!--x>-->z",
+ "a$z",
+ },
+ {
+ "comment4",
+ "a<!--x->-->z",
+ "a$z",
+ },
+ {
+ "comment5",
+ "a<!>z",
+ "a$&lt;!&gt;z",
+ },
+ {
+ "comment6",
+ "a<!->z",
+ "a$&lt;!-&gt;z",
+ },
+ {
+ "comment7",
+ "a<!---<>z",
+ "a$&lt;!---&lt;&gt;z",
+ },
+ {
+ "comment8",
+ "a<!--z",
+ "a$&lt;!--z",
},
// An attribute with a backslash.
{
"backslash",
`<p id="a\"b">`,
- []string{
- `<p id="a&quot;b">`,
- },
+ `<p id="a&quot;b">`,
},
// Entities, tag name and attribute key lower-casing, and whitespace
// normalization within a tag.
{
"tricky",
"<p \t\n iD=\"a&quot;B\" foo=\"bar\"><EM>te&lt;&amp;;xt</em></p>",
- []string{
- `<p id="a&quot;B" foo="bar">`,
- "<em>",
- "te&lt;&amp;;xt",
- "</em>",
- "</p>",
- },
+ `<p id="a&quot;B" foo="bar">$<em>$te&lt;&amp;;xt$</em>$</p>`,
},
// A non-existant entity. Tokenizing and converting back to a string should
// escape the "&" to become "&amp;".
{
"noSuchEntity",
`<a b="c&noSuchEntity;d">&lt;&alsoDoesntExist;&`,
- []string{
- `<a b="c&amp;noSuchEntity;d">`,
- "&lt;&amp;alsoDoesntExist;&amp;",
- },
+ `<a b="c&amp;noSuchEntity;d">$&lt;&amp;alsoDoesntExist;&amp;`,
},
}
@@ -87,7 +113,7 @@ func TestTokenizer(t *testing.T) {
loop:
for _, tt := range tokenTests {
z := NewTokenizer(bytes.NewBuffer([]byte(tt.html)))
- for i, s := range tt.tokens {
+ for i, s := range strings.Split(tt.golden, "$", -1) {
if z.Next() == ErrorToken {
t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Error())
continue loop