diff options
Diffstat (limited to 'src/pkg/html/token_test.go')
-rw-r--r-- | src/pkg/html/token_test.go | 305 |
1 files changed, 0 insertions, 305 deletions
diff --git a/src/pkg/html/token_test.go b/src/pkg/html/token_test.go deleted file mode 100644 index c794612ab..000000000 --- a/src/pkg/html/token_test.go +++ /dev/null @@ -1,305 +0,0 @@ -// Copyright 2010 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package html - -import ( - "bytes" - "os" - "strings" - "testing" -) - -type tokenTest struct { - // A short description of the test case. - desc string - // The HTML to parse. - html string - // The string representations of the expected tokens, joined by '$'. - golden string -} - -var tokenTests = []tokenTest{ - // A single text node. The tokenizer should not break text nodes on whitespace, - // nor should it normalize whitespace within a text node. - { - "text", - "foo bar", - "foo bar", - }, - // An entity. - { - "entity", - "one < two", - "one < two", - }, - // A start, self-closing and end tag. The tokenizer does not care if the start - // and end tokens don't match; that is the job of the parser. - { - "tags", - "<a>b<c/>d</e>", - "<a>$b$<c/>$d$</e>", - }, - // Comments. - { - "comment0", - "abc<b><!-- skipme --></b>def", - "abc$<b>$</b>$def", - }, - { - "comment1", - "a<!-->z", - "a$z", - }, - { - "comment2", - "a<!--->z", - "a$z", - }, - { - "comment3", - "a<!--x>-->z", - "a$z", - }, - { - "comment4", - "a<!--x->-->z", - "a$z", - }, - { - "comment5", - "a<!>z", - "a$<!>z", - }, - { - "comment6", - "a<!->z", - "a$<!->z", - }, - { - "comment7", - "a<!---<>z", - "a$<!---<>z", - }, - { - "comment8", - "a<!--z", - "a$<!--z", - }, - // An attribute with a backslash. - { - "backslash", - `<p id="a\"b">`, - `<p id="a"b">`, - }, - // Entities, tag name and attribute key lower-casing, and whitespace - // normalization within a tag. - { - "tricky", - "<p \t\n iD=\"a"B\" foo=\"bar\"><EM>te<&;xt</em></p>", - `<p id="a"B" foo="bar">$<em>$te<&;xt$</em>$</p>`, - }, - // A nonexistent entity. Tokenizing and converting back to a string should - // escape the "&" to become "&". - { - "noSuchEntity", - `<a b="c&noSuchEntity;d"><&alsoDoesntExist;&`, - `<a b="c&noSuchEntity;d">$<&alsoDoesntExist;&`, - }, - - // Attribute tests: - // http://dev.w3.org/html5/spec/Overview.html#attributes-0 - { - "Empty attribute", - `<input disabled FOO>`, - `<input disabled="" foo="">`, - }, - { - "Empty attribute, whitespace", - `<input disabled FOO >`, - `<input disabled="" foo="">`, - }, - { - "Unquoted attribute value", - `<input value=yes FOO=BAR>`, - `<input value="yes" foo="BAR">`, - }, - { - "Unquoted attribute value, spaces", - `<input value = yes FOO = BAR>`, - `<input value="yes" foo="BAR">`, - }, - { - "Unquoted attribute value, trailing space", - `<input value=yes FOO=BAR >`, - `<input value="yes" foo="BAR">`, - }, - { - "Single-quoted attribute value", - `<input value='yes' FOO='BAR'>`, - `<input value="yes" foo="BAR">`, - }, - { - "Single-quoted attribute value, trailing space", - `<input value='yes' FOO='BAR' >`, - `<input value="yes" foo="BAR">`, - }, - { - "Double-quoted attribute value", - `<input value="I'm an attribute" FOO="BAR">`, - `<input value="I'm an attribute" foo="BAR">`, - }, - { - "Attribute name characters", - `<meta http-equiv="content-type">`, - `<meta http-equiv="content-type">`, - }, -} - -func TestTokenizer(t *testing.T) { -loop: - for _, tt := range tokenTests { - z := NewTokenizer(bytes.NewBuffer([]byte(tt.html))) - for i, s := range strings.Split(tt.golden, "$") { - if z.Next() == ErrorToken { - t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Error()) - continue loop - } - actual := z.Token().String() - if s != actual { - t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual) - continue loop - } - } - z.Next() - if z.Error() != os.EOF { - t.Errorf("%s: want EOF got %q", tt.desc, z.Token().String()) - } - } -} - -type unescapeTest struct { - // A short description of the test case. - desc string - // The HTML text. - html string - // The unescaped text. - unescaped string -} - -var unescapeTests = []unescapeTest{ - // Handle no entities. - { - "copy", - "A\ttext\nstring", - "A\ttext\nstring", - }, - // Handle simple named entities. - { - "simple", - "& > <", - "& > <", - }, - // Handle hitting the end of the string. - { - "stringEnd", - "& &", - "& &", - }, - // Handle entities with two codepoints. - { - "multiCodepoint", - "text ⋛︀ blah", - "text \u22db\ufe00 blah", - }, - // Handle decimal numeric entities. - { - "decimalEntity", - "Delta = Δ ", - "Delta = Δ ", - }, - // Handle hexadecimal numeric entities. - { - "hexadecimalEntity", - "Lambda = λ = λ ", - "Lambda = λ = λ ", - }, - // Handle numeric early termination. - { - "numericEnds", - "&# &#x €43 © = ©f = ©", - "&# &#x €43 © = ©f = ©", - }, - // Handle numeric ISO-8859-1 entity replacements. - { - "numericReplacements", - "Footnote‡", - "Footnote‡", - }, -} - -func TestUnescape(t *testing.T) { - for _, tt := range unescapeTests { - unescaped := UnescapeString(tt.html) - if unescaped != tt.unescaped { - t.Errorf("TestUnescape %s: want %q, got %q", tt.desc, tt.unescaped, unescaped) - } - } -} - -func TestUnescapeEscape(t *testing.T) { - ss := []string{ - ``, - `abc def`, - `a & b`, - `a&b`, - `a & b`, - `"`, - `"`, - `"<&>"`, - `"<&>"`, - `3&5==1 && 0<1, "0<1", a+acute=á`, - } - for _, s := range ss { - if s != UnescapeString(EscapeString(s)) { - t.Errorf("s != UnescapeString(EscapeString(s)), s=%q", s) - } - } -} - -func TestBufAPI(t *testing.T) { - s := "0<a>1</a>2<b>3<a>4<a>5</a>6</b>7</a>8<a/>9" - z := NewTokenizer(bytes.NewBuffer([]byte(s))) - result := bytes.NewBuffer(nil) - depth := 0 -loop: - for { - tt := z.Next() - switch tt { - case ErrorToken: - if z.Error() != os.EOF { - t.Error(z.Error()) - } - break loop - case TextToken: - if depth > 0 { - result.Write(z.Text()) - } - case StartTagToken, EndTagToken: - tn, _ := z.TagName() - if len(tn) == 1 && tn[0] == 'a' { - if tt == StartTagToken { - depth++ - } else { - depth-- - } - } - } - } - u := "14567" - v := string(result.Bytes()) - if u != v { - t.Errorf("TestBufAPI: want %q got %q", u, v) - } -} |