1 files changed, 56 insertions, 30 deletions
diff --git a/src/pkg/html/token_test.go b/src/pkg/html/token_test.go
index e07999ca5..5cf1f6dac 100644
--- a/src/pkg/html/token_test.go
+++ b/src/pkg/html/token_test.go
@@ -7,6 +7,7 @@ package html
 import (
 	"bytes"
 	"os"
+	"strings"
 	"testing"
 )
 
@@ -15,8 +16,8 @@ type tokenTest struct {
 	desc string
 	// The HTML to parse.
 	html string
-	// The string representations of the expected tokens.
-	tokens []string
+	// The string representations of the expected tokens, joined by '$'.
+	golden string
 }
 
 var tokenTests = []tokenTest{
@@ -25,61 +26,86 @@ var tokenTests = []tokenTest{
 	{
 		"text",
 		"foo  bar",
-		[]string{
-			"foo  bar",
-		},
+		"foo  bar",
 	},
 	// An entity.
 	{
 		"entity",
 		"one &lt; two",
-		[]string{
-			"one &lt; two",
-		},
+		"one &lt; two",
 	},
 	// A start, self-closing and end tag. The tokenizer does not care if the start
 	// and end tokens don't match; that is the job of the parser.
 	{
 		"tags",
 		"<a>b<c/>d</e>",
-		[]string{
-			"<a>",
-			"b",
-			"<c/>",
-			"d",
-			"</e>",
-		},
+		"<a>$b$<c/>$d$</e>",
+	},
+	// Comments.
+	{
+		"comment0",
+		"abc<b><!-- skipme --></b>def",
+		"abc$<b>$</b>$def",
+	},
+	{
+		"comment1",
+		"a<!-->z",
+		"a$z",
+	},
+	{
+		"comment2",
+		"a<!--->z",
+		"a$z",
+	},
+	{
+		"comment3",
+		"a<!--x>-->z",
+		"a$z",
+	},
+	{
+		"comment4",
+		"a<!--x->-->z",
+		"a$z",
+	},
+	{
+		"comment5",
+		"a<!>z",
+		"a$&lt;!&gt;z",
+	},
+	{
+		"comment6",
+		"a<!->z",
+		"a$&lt;!-&gt;z",
+	},
+	{
+		"comment7",
+		"a<!---<>z",
+		"a$&lt;!---&lt;&gt;z",
+	},
+	{
+		"comment8",
+		"a<!--z",
+		"a$&lt;!--z",
 	},
 	// An attribute with a backslash.
 	{
 		"backslash",
 		`<p id="a\"b">`,
-		[]string{
-			`<p id="a&quot;b">`,
-		},
+		`<p id="a&quot;b">`,
 	},
 	// Entities, tag name and attribute key lower-casing, and whitespace
 	// normalization within a tag.
 	{
 		"tricky",
 		"<p \t\n iD=\"a&quot;B\"  foo=\"bar\"><EM>te&lt;&amp;;xt</em></p>",
-		[]string{
-			`<p id="a&quot;B" foo="bar">`,
-			"<em>",
-			"te&lt;&amp;;xt",
-			"</em>",
-			"</p>",
-		},
+		`<p id="a&quot;B" foo="bar">$<em>$te&lt;&amp;;xt$</em>$</p>`,
 	},
 	// A non-existant entity. Tokenizing and converting back to a string should
 	// escape the "&" to become "&amp;".
 	{
 		"noSuchEntity",
 		`<a b="c&noSuchEntity;d">&lt;&alsoDoesntExist;&`,
-		[]string{
-			`<a b="c&amp;noSuchEntity;d">`,
-			"&lt;&amp;alsoDoesntExist;&amp;",
-		},
+		`<a b="c&amp;noSuchEntity;d">$&lt;&amp;alsoDoesntExist;&amp;`,
 	},
 }
 
@@ -87,7 +113,7 @@ func TestTokenizer(t *testing.T) {
 loop:
 	for _, tt := range tokenTests {
 		z := NewTokenizer(bytes.NewBuffer([]byte(tt.html)))
-		for i, s := range tt.tokens {
+		for i, s := range strings.Split(tt.golden, "$", -1) {
 			if z.Next() == ErrorToken {
 				t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Error())
 				continue loop