diff options
Diffstat (limited to 'src/pkg/html/token_test.go')
-rw-r--r-- | src/pkg/html/token_test.go | 86 |
1 files changed, 56 insertions, 30 deletions
diff --git a/src/pkg/html/token_test.go b/src/pkg/html/token_test.go index e07999ca5..5cf1f6dac 100644 --- a/src/pkg/html/token_test.go +++ b/src/pkg/html/token_test.go @@ -7,6 +7,7 @@ package html import ( "bytes" "os" + "strings" "testing" ) @@ -15,8 +16,8 @@ type tokenTest struct { desc string // The HTML to parse. html string - // The string representations of the expected tokens. - tokens []string + // The string representations of the expected tokens, joined by '$'. + golden string } var tokenTests = []tokenTest{ @@ -25,61 +26,86 @@ var tokenTests = []tokenTest{ { "text", "foo bar", - []string{ - "foo bar", - }, + "foo bar", }, // An entity. { "entity", "one < two", - []string{ - "one < two", - }, + "one < two", }, // A start, self-closing and end tag. The tokenizer does not care if the start // and end tokens don't match; that is the job of the parser. { "tags", "<a>b<c/>d</e>", - []string{ - "<a>", - "b", - "<c/>", - "d", - "</e>", - }, + "<a>$b$<c/>$d$</e>", + }, + // Comments. + { + "comment0", + "abc<b><!-- skipme --></b>def", + "abc$<b>$</b>$def", + }, + { + "comment1", + "a<!-->z", + "a$z", + }, + { + "comment2", + "a<!--->z", + "a$z", + }, + { + "comment3", + "a<!--x>-->z", + "a$z", + }, + { + "comment4", + "a<!--x->-->z", + "a$z", + }, + { + "comment5", + "a<!>z", + "a$<!>z", + }, + { + "comment6", + "a<!->z", + "a$<!->z", + }, + { + "comment7", + "a<!---<>z", + "a$<!---<>z", + }, + { + "comment8", + "a<!--z", + "a$<!--z", }, // An attribute with a backslash. { "backslash", `<p id="a\"b">`, - []string{ - `<p id="a"b">`, - }, + `<p id="a"b">`, }, // Entities, tag name and attribute key lower-casing, and whitespace // normalization within a tag. { "tricky", "<p \t\n iD=\"a"B\" foo=\"bar\"><EM>te<&;xt</em></p>", - []string{ - `<p id="a"B" foo="bar">`, - "<em>", - "te<&;xt", - "</em>", - "</p>", - }, + `<p id="a"B" foo="bar">$<em>$te<&;xt$</em>$</p>`, }, // A non-existant entity. Tokenizing and converting back to a string should // escape the "&" to become "&". { "noSuchEntity", `<a b="c&noSuchEntity;d"><&alsoDoesntExist;&`, - []string{ - `<a b="c&noSuchEntity;d">`, - "<&alsoDoesntExist;&", - }, + `<a b="c&noSuchEntity;d">$<&alsoDoesntExist;&`, }, } @@ -87,7 +113,7 @@ func TestTokenizer(t *testing.T) { loop: for _, tt := range tokenTests { z := NewTokenizer(bytes.NewBuffer([]byte(tt.html))) - for i, s := range tt.tokens { + for i, s := range strings.Split(tt.golden, "$", -1) { if z.Next() == ErrorToken { t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Error()) continue loop |