// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"bytes"
"os"
"strings"
"testing"
)
type tokenTest struct {
// A short description of the test case.
desc string
// The HTML to parse.
html string
// The string representations of the expected tokens, joined by '$'.
golden string
}
var tokenTests = []tokenTest{
// A single text node. The tokenizer should not break text nodes on whitespace,
// nor should it normalize whitespace within a text node.
{
"text",
"foo bar",
"foo bar",
},
// An entity.
{
"entity",
"one < two",
"one < two",
},
// A start, self-closing and end tag. The tokenizer does not care if the start
// and end tokens don't match; that is the job of the parser.
{
"tags",
"bd",
"$b$$d$",
},
// Comments.
{
"comment0",
"abcdef",
"abc$$$def",
},
{
"comment1",
"az",
"a$z",
},
{
"comment2",
"az",
"a$z",
},
{
"comment3",
"az",
"a$z",
},
{
"comment4",
"az",
"a$z",
},
{
"comment5",
"az",
"a$<!>z",
},
{
"comment6",
"az",
"a$<!->z",
},
{
"comment7",
"a