summaryrefslogtreecommitdiff
path: root/src/pkg/regexp
diff options
context:
space:
mode:
authorTianon Gravi <admwiggin@gmail.com>2015-01-15 11:54:00 -0700
committerTianon Gravi <admwiggin@gmail.com>2015-01-15 11:54:00 -0700
commitf154da9e12608589e8d5f0508f908a0c3e88a1bb (patch)
treef8255d51e10c6f1e0ed69702200b966c9556a431 /src/pkg/regexp
parent8d8329ed5dfb9622c82a9fbec6fd99a580f9c9f6 (diff)
downloadgolang-upstream/1.4.tar.gz
Imported Upstream version 1.4upstream/1.4
Diffstat (limited to 'src/pkg/regexp')
-rw-r--r--src/pkg/regexp/all_test.go645
-rw-r--r--src/pkg/regexp/example_test.go148
-rw-r--r--src/pkg/regexp/exec.go452
-rw-r--r--src/pkg/regexp/exec2_test.go20
-rw-r--r--src/pkg/regexp/exec_test.go715
-rw-r--r--src/pkg/regexp/find_test.go498
-rw-r--r--src/pkg/regexp/onepass.go582
-rw-r--r--src/pkg/regexp/onepass_test.go208
-rw-r--r--src/pkg/regexp/regexp.go1120
-rw-r--r--src/pkg/regexp/syntax/compile.go289
-rw-r--r--src/pkg/regexp/syntax/doc.go131
-rwxr-xr-xsrc/pkg/regexp/syntax/make_perl_groups.pl107
-rw-r--r--src/pkg/regexp/syntax/parse.go1863
-rw-r--r--src/pkg/regexp/syntax/parse_test.go559
-rw-r--r--src/pkg/regexp/syntax/perl_groups.go134
-rw-r--r--src/pkg/regexp/syntax/prog.go345
-rw-r--r--src/pkg/regexp/syntax/prog_test.go114
-rw-r--r--src/pkg/regexp/syntax/regexp.go319
-rw-r--r--src/pkg/regexp/syntax/simplify.go151
-rw-r--r--src/pkg/regexp/syntax/simplify_test.go151
-rw-r--r--src/pkg/regexp/testdata/README23
-rw-r--r--src/pkg/regexp/testdata/basic.dat221
-rw-r--r--src/pkg/regexp/testdata/nullsubexpr.dat79
-rw-r--r--src/pkg/regexp/testdata/re2-exhaustive.txt.bz2bin394016 -> 0 bytes
-rw-r--r--src/pkg/regexp/testdata/re2-search.txt3667
-rw-r--r--src/pkg/regexp/testdata/repetition.dat163
-rw-r--r--src/pkg/regexp/testdata/testregex.c2286
27 files changed, 0 insertions, 14990 deletions
diff --git a/src/pkg/regexp/all_test.go b/src/pkg/regexp/all_test.go
deleted file mode 100644
index 301a1dfcd..000000000
--- a/src/pkg/regexp/all_test.go
+++ /dev/null
@@ -1,645 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package regexp
-
-import (
- "reflect"
- "strings"
- "testing"
-)
-
-var good_re = []string{
- ``,
- `.`,
- `^.$`,
- `a`,
- `a*`,
- `a+`,
- `a?`,
- `a|b`,
- `a*|b*`,
- `(a*|b)(c*|d)`,
- `[a-z]`,
- `[a-abc-c\-\]\[]`,
- `[a-z]+`,
- `[abc]`,
- `[^1234]`,
- `[^\n]`,
- `\!\\`,
-}
-
-type stringError struct {
- re string
- err string
-}
-
-var bad_re = []stringError{
- {`*`, "missing argument to repetition operator: `*`"},
- {`+`, "missing argument to repetition operator: `+`"},
- {`?`, "missing argument to repetition operator: `?`"},
- {`(abc`, "missing closing ): `(abc`"},
- {`abc)`, "unexpected ): `abc)`"},
- {`x[a-z`, "missing closing ]: `[a-z`"},
- {`[z-a]`, "invalid character class range: `z-a`"},
- {`abc\`, "trailing backslash at end of expression"},
- {`a**`, "invalid nested repetition operator: `**`"},
- {`a*+`, "invalid nested repetition operator: `*+`"},
- {`\x`, "invalid escape sequence: `\\x`"},
-}
-
-func compileTest(t *testing.T, expr string, error string) *Regexp {
- re, err := Compile(expr)
- if error == "" && err != nil {
- t.Error("compiling `", expr, "`; unexpected error: ", err.Error())
- }
- if error != "" && err == nil {
- t.Error("compiling `", expr, "`; missing error")
- } else if error != "" && !strings.Contains(err.Error(), error) {
- t.Error("compiling `", expr, "`; wrong error: ", err.Error(), "; want ", error)
- }
- return re
-}
-
-func TestGoodCompile(t *testing.T) {
- for i := 0; i < len(good_re); i++ {
- compileTest(t, good_re[i], "")
- }
-}
-
-func TestBadCompile(t *testing.T) {
- for i := 0; i < len(bad_re); i++ {
- compileTest(t, bad_re[i].re, bad_re[i].err)
- }
-}
-
-func matchTest(t *testing.T, test *FindTest) {
- re := compileTest(t, test.pat, "")
- if re == nil {
- return
- }
- m := re.MatchString(test.text)
- if m != (len(test.matches) > 0) {
- t.Errorf("MatchString failure on %s: %t should be %t", test, m, len(test.matches) > 0)
- }
- // now try bytes
- m = re.Match([]byte(test.text))
- if m != (len(test.matches) > 0) {
- t.Errorf("Match failure on %s: %t should be %t", test, m, len(test.matches) > 0)
- }
-}
-
-func TestMatch(t *testing.T) {
- for _, test := range findTests {
- matchTest(t, &test)
- }
-}
-
-func matchFunctionTest(t *testing.T, test *FindTest) {
- m, err := MatchString(test.pat, test.text)
- if err == nil {
- return
- }
- if m != (len(test.matches) > 0) {
- t.Errorf("Match failure on %s: %t should be %t", test, m, len(test.matches) > 0)
- }
-}
-
-func TestMatchFunction(t *testing.T) {
- for _, test := range findTests {
- matchFunctionTest(t, &test)
- }
-}
-
-type ReplaceTest struct {
- pattern, replacement, input, output string
-}
-
-var replaceTests = []ReplaceTest{
- // Test empty input and/or replacement, with pattern that matches the empty string.
- {"", "", "", ""},
- {"", "x", "", "x"},
- {"", "", "abc", "abc"},
- {"", "x", "abc", "xaxbxcx"},
-
- // Test empty input and/or replacement, with pattern that does not match the empty string.
- {"b", "", "", ""},
- {"b", "x", "", ""},
- {"b", "", "abc", "ac"},
- {"b", "x", "abc", "axc"},
- {"y", "", "", ""},
- {"y", "x", "", ""},
- {"y", "", "abc", "abc"},
- {"y", "x", "abc", "abc"},
-
- // Multibyte characters -- verify that we don't try to match in the middle
- // of a character.
- {"[a-c]*", "x", "\u65e5", "x\u65e5x"},
- {"[^\u65e5]", "x", "abc\u65e5def", "xxx\u65e5xxx"},
-
- // Start and end of a string.
- {"^[a-c]*", "x", "abcdabc", "xdabc"},
- {"[a-c]*$", "x", "abcdabc", "abcdx"},
- {"^[a-c]*$", "x", "abcdabc", "abcdabc"},
- {"^[a-c]*", "x", "abc", "x"},
- {"[a-c]*$", "x", "abc", "x"},
- {"^[a-c]*$", "x", "abc", "x"},
- {"^[a-c]*", "x", "dabce", "xdabce"},
- {"[a-c]*$", "x", "dabce", "dabcex"},
- {"^[a-c]*$", "x", "dabce", "dabce"},
- {"^[a-c]*", "x", "", "x"},
- {"[a-c]*$", "x", "", "x"},
- {"^[a-c]*$", "x", "", "x"},
-
- {"^[a-c]+", "x", "abcdabc", "xdabc"},
- {"[a-c]+$", "x", "abcdabc", "abcdx"},
- {"^[a-c]+$", "x", "abcdabc", "abcdabc"},
- {"^[a-c]+", "x", "abc", "x"},
- {"[a-c]+$", "x", "abc", "x"},
- {"^[a-c]+$", "x", "abc", "x"},
- {"^[a-c]+", "x", "dabce", "dabce"},
- {"[a-c]+$", "x", "dabce", "dabce"},
- {"^[a-c]+$", "x", "dabce", "dabce"},
- {"^[a-c]+", "x", "", ""},
- {"[a-c]+$", "x", "", ""},
- {"^[a-c]+$", "x", "", ""},
-
- // Other cases.
- {"abc", "def", "abcdefg", "defdefg"},
- {"bc", "BC", "abcbcdcdedef", "aBCBCdcdedef"},
- {"abc", "", "abcdabc", "d"},
- {"x", "xXx", "xxxXxxx", "xXxxXxxXxXxXxxXxxXx"},
- {"abc", "d", "", ""},
- {"abc", "d", "abc", "d"},
- {".+", "x", "abc", "x"},
- {"[a-c]*", "x", "def", "xdxexfx"},
- {"[a-c]+", "x", "abcbcdcdedef", "xdxdedef"},
- {"[a-c]*", "x", "abcbcdcdedef", "xdxdxexdxexfx"},
-
- // Substitutions
- {"a+", "($0)", "banana", "b(a)n(a)n(a)"},
- {"a+", "(${0})", "banana", "b(a)n(a)n(a)"},
- {"a+", "(${0})$0", "banana", "b(a)an(a)an(a)a"},
- {"a+", "(${0})$0", "banana", "b(a)an(a)an(a)a"},
- {"hello, (.+)", "goodbye, ${1}", "hello, world", "goodbye, world"},
- {"hello, (.+)", "goodbye, $1x", "hello, world", "goodbye, "},
- {"hello, (.+)", "goodbye, ${1}x", "hello, world", "goodbye, worldx"},
- {"hello, (.+)", "<$0><$1><$2><$3>", "hello, world", "<hello, world><world><><>"},
- {"hello, (?P<noun>.+)", "goodbye, $noun!", "hello, world", "goodbye, world!"},
- {"hello, (?P<noun>.+)", "goodbye, ${noun}", "hello, world", "goodbye, world"},
- {"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "hi", "hihihi"},
- {"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "bye", "byebyebye"},
- {"(?P<x>hi)|(?P<x>bye)", "$xyz", "hi", ""},
- {"(?P<x>hi)|(?P<x>bye)", "${x}yz", "hi", "hiyz"},
- {"(?P<x>hi)|(?P<x>bye)", "hello $$x", "hi", "hello $x"},
- {"a+", "${oops", "aaa", "${oops"},
- {"a+", "$$", "aaa", "$"},
- {"a+", "$", "aaa", "$"},
-
- // Substitution when subexpression isn't found
- {"(x)?", "$1", "123", "123"},
- {"abc", "$1", "123", "123"},
-}
-
-var replaceLiteralTests = []ReplaceTest{
- // Substitutions
- {"a+", "($0)", "banana", "b($0)n($0)n($0)"},
- {"a+", "(${0})", "banana", "b(${0})n(${0})n(${0})"},
- {"a+", "(${0})$0", "banana", "b(${0})$0n(${0})$0n(${0})$0"},
- {"a+", "(${0})$0", "banana", "b(${0})$0n(${0})$0n(${0})$0"},
- {"hello, (.+)", "goodbye, ${1}", "hello, world", "goodbye, ${1}"},
- {"hello, (?P<noun>.+)", "goodbye, $noun!", "hello, world", "goodbye, $noun!"},
- {"hello, (?P<noun>.+)", "goodbye, ${noun}", "hello, world", "goodbye, ${noun}"},
- {"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "hi", "$x$x$x"},
- {"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "bye", "$x$x$x"},
- {"(?P<x>hi)|(?P<x>bye)", "$xyz", "hi", "$xyz"},
- {"(?P<x>hi)|(?P<x>bye)", "${x}yz", "hi", "${x}yz"},
- {"(?P<x>hi)|(?P<x>bye)", "hello $$x", "hi", "hello $$x"},
- {"a+", "${oops", "aaa", "${oops"},
- {"a+", "$$", "aaa", "$$"},
- {"a+", "$", "aaa", "$"},
-}
-
-type ReplaceFuncTest struct {
- pattern string
- replacement func(string) string
- input, output string
-}
-
-var replaceFuncTests = []ReplaceFuncTest{
- {"[a-c]", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxayxbyxcydef"},
- {"[a-c]+", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxabcydef"},
- {"[a-c]*", func(s string) string { return "x" + s + "y" }, "defabcdef", "xydxyexyfxabcydxyexyfxy"},
-}
-
-func TestReplaceAll(t *testing.T) {
- for _, tc := range replaceTests {
- re, err := Compile(tc.pattern)
- if err != nil {
- t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err)
- continue
- }
- actual := re.ReplaceAllString(tc.input, tc.replacement)
- if actual != tc.output {
- t.Errorf("%q.ReplaceAllString(%q,%q) = %q; want %q",
- tc.pattern, tc.input, tc.replacement, actual, tc.output)
- }
- // now try bytes
- actual = string(re.ReplaceAll([]byte(tc.input), []byte(tc.replacement)))
- if actual != tc.output {
- t.Errorf("%q.ReplaceAll(%q,%q) = %q; want %q",
- tc.pattern, tc.input, tc.replacement, actual, tc.output)
- }
- }
-}
-
-func TestReplaceAllLiteral(t *testing.T) {
- // Run ReplaceAll tests that do not have $ expansions.
- for _, tc := range replaceTests {
- if strings.Contains(tc.replacement, "$") {
- continue
- }
- re, err := Compile(tc.pattern)
- if err != nil {
- t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err)
- continue
- }
- actual := re.ReplaceAllLiteralString(tc.input, tc.replacement)
- if actual != tc.output {
- t.Errorf("%q.ReplaceAllLiteralString(%q,%q) = %q; want %q",
- tc.pattern, tc.input, tc.replacement, actual, tc.output)
- }
- // now try bytes
- actual = string(re.ReplaceAllLiteral([]byte(tc.input), []byte(tc.replacement)))
- if actual != tc.output {
- t.Errorf("%q.ReplaceAllLiteral(%q,%q) = %q; want %q",
- tc.pattern, tc.input, tc.replacement, actual, tc.output)
- }
- }
-
- // Run literal-specific tests.
- for _, tc := range replaceLiteralTests {
- re, err := Compile(tc.pattern)
- if err != nil {
- t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err)
- continue
- }
- actual := re.ReplaceAllLiteralString(tc.input, tc.replacement)
- if actual != tc.output {
- t.Errorf("%q.ReplaceAllLiteralString(%q,%q) = %q; want %q",
- tc.pattern, tc.input, tc.replacement, actual, tc.output)
- }
- // now try bytes
- actual = string(re.ReplaceAllLiteral([]byte(tc.input), []byte(tc.replacement)))
- if actual != tc.output {
- t.Errorf("%q.ReplaceAllLiteral(%q,%q) = %q; want %q",
- tc.pattern, tc.input, tc.replacement, actual, tc.output)
- }
- }
-}
-
-func TestReplaceAllFunc(t *testing.T) {
- for _, tc := range replaceFuncTests {
- re, err := Compile(tc.pattern)
- if err != nil {
- t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err)
- continue
- }
- actual := re.ReplaceAllStringFunc(tc.input, tc.replacement)
- if actual != tc.output {
- t.Errorf("%q.ReplaceFunc(%q,fn) = %q; want %q",
- tc.pattern, tc.input, actual, tc.output)
- }
- // now try bytes
- actual = string(re.ReplaceAllFunc([]byte(tc.input), func(s []byte) []byte { return []byte(tc.replacement(string(s))) }))
- if actual != tc.output {
- t.Errorf("%q.ReplaceFunc(%q,fn) = %q; want %q",
- tc.pattern, tc.input, actual, tc.output)
- }
- }
-}
-
-type MetaTest struct {
- pattern, output, literal string
- isLiteral bool
-}
-
-var metaTests = []MetaTest{
- {``, ``, ``, true},
- {`foo`, `foo`, `foo`, true},
- {`foo\.\$`, `foo\\\.\\\$`, `foo.$`, true}, // has meta but no operator
- {`foo.\$`, `foo\.\\\$`, `foo`, false}, // has escaped operators and real operators
- {`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[\{\]\}\\\|,<\.>/\?~`, `!@#`, false},
-}
-
-func TestQuoteMeta(t *testing.T) {
- for _, tc := range metaTests {
- // Verify that QuoteMeta returns the expected string.
- quoted := QuoteMeta(tc.pattern)
- if quoted != tc.output {
- t.Errorf("QuoteMeta(`%s`) = `%s`; want `%s`",
- tc.pattern, quoted, tc.output)
- continue
- }
-
- // Verify that the quoted string is in fact treated as expected
- // by Compile -- i.e. that it matches the original, unquoted string.
- if tc.pattern != "" {
- re, err := Compile(quoted)
- if err != nil {
- t.Errorf("Unexpected error compiling QuoteMeta(`%s`): %v", tc.pattern, err)
- continue
- }
- src := "abc" + tc.pattern + "def"
- repl := "xyz"
- replaced := re.ReplaceAllString(src, repl)
- expected := "abcxyzdef"
- if replaced != expected {
- t.Errorf("QuoteMeta(`%s`).Replace(`%s`,`%s`) = `%s`; want `%s`",
- tc.pattern, src, repl, replaced, expected)
- }
- }
- }
-}
-
-func TestLiteralPrefix(t *testing.T) {
- for _, tc := range metaTests {
- // Literal method needs to scan the pattern.
- re := MustCompile(tc.pattern)
- str, complete := re.LiteralPrefix()
- if complete != tc.isLiteral {
- t.Errorf("LiteralPrefix(`%s`) = %t; want %t", tc.pattern, complete, tc.isLiteral)
- }
- if str != tc.literal {
- t.Errorf("LiteralPrefix(`%s`) = `%s`; want `%s`", tc.pattern, str, tc.literal)
- }
- }
-}
-
-type subexpCase struct {
- input string
- num int
- names []string
-}
-
-var subexpCases = []subexpCase{
- {``, 0, nil},
- {`.*`, 0, nil},
- {`abba`, 0, nil},
- {`ab(b)a`, 1, []string{"", ""}},
- {`ab(.*)a`, 1, []string{"", ""}},
- {`(.*)ab(.*)a`, 2, []string{"", "", ""}},
- {`(.*)(ab)(.*)a`, 3, []string{"", "", "", ""}},
- {`(.*)((a)b)(.*)a`, 4, []string{"", "", "", "", ""}},
- {`(.*)(\(ab)(.*)a`, 3, []string{"", "", "", ""}},
- {`(.*)(\(a\)b)(.*)a`, 3, []string{"", "", "", ""}},
- {`(?P<foo>.*)(?P<bar>(a)b)(?P<foo>.*)a`, 4, []string{"", "foo", "bar", "", "foo"}},
-}
-
-func TestSubexp(t *testing.T) {
- for _, c := range subexpCases {
- re := MustCompile(c.input)
- n := re.NumSubexp()
- if n != c.num {
- t.Errorf("%q: NumSubexp = %d, want %d", c.input, n, c.num)
- continue
- }
- names := re.SubexpNames()
- if len(names) != 1+n {
- t.Errorf("%q: len(SubexpNames) = %d, want %d", c.input, len(names), n)
- continue
- }
- if c.names != nil {
- for i := 0; i < 1+n; i++ {
- if names[i] != c.names[i] {
- t.Errorf("%q: SubexpNames[%d] = %q, want %q", c.input, i, names[i], c.names[i])
- }
- }
- }
- }
-}
-
-var splitTests = []struct {
- s string
- r string
- n int
- out []string
-}{
- {"foo:and:bar", ":", -1, []string{"foo", "and", "bar"}},
- {"foo:and:bar", ":", 1, []string{"foo:and:bar"}},
- {"foo:and:bar", ":", 2, []string{"foo", "and:bar"}},
- {"foo:and:bar", "foo", -1, []string{"", ":and:bar"}},
- {"foo:and:bar", "bar", -1, []string{"foo:and:", ""}},
- {"foo:and:bar", "baz", -1, []string{"foo:and:bar"}},
- {"baabaab", "a", -1, []string{"b", "", "b", "", "b"}},
- {"baabaab", "a*", -1, []string{"b", "b", "b"}},
- {"baabaab", "ba*", -1, []string{"", "", "", ""}},
- {"foobar", "f*b*", -1, []string{"", "o", "o", "a", "r"}},
- {"foobar", "f+.*b+", -1, []string{"", "ar"}},
- {"foobooboar", "o{2}", -1, []string{"f", "b", "boar"}},
- {"a,b,c,d,e,f", ",", 3, []string{"a", "b", "c,d,e,f"}},
- {"a,b,c,d,e,f", ",", 0, nil},
- {",", ",", -1, []string{"", ""}},
- {",,,", ",", -1, []string{"", "", "", ""}},
- {"", ",", -1, []string{""}},
- {"", ".*", -1, []string{""}},
- {"", ".+", -1, []string{""}},
- {"", "", -1, []string{}},
- {"foobar", "", -1, []string{"f", "o", "o", "b", "a", "r"}},
- {"abaabaccadaaae", "a*", 5, []string{"", "b", "b", "c", "cadaaae"}},
- {":x:y:z:", ":", -1, []string{"", "x", "y", "z", ""}},
-}
-
-func TestSplit(t *testing.T) {
- for i, test := range splitTests {
- re, err := Compile(test.r)
- if err != nil {
- t.Errorf("#%d: %q: compile error: %s", i, test.r, err.Error())
- continue
- }
-
- split := re.Split(test.s, test.n)
- if !reflect.DeepEqual(split, test.out) {
- t.Errorf("#%d: %q: got %q; want %q", i, test.r, split, test.out)
- }
-
- if QuoteMeta(test.r) == test.r {
- strsplit := strings.SplitN(test.s, test.r, test.n)
- if !reflect.DeepEqual(split, strsplit) {
- t.Errorf("#%d: Split(%q, %q, %d): regexp vs strings mismatch\nregexp=%q\nstrings=%q", i, test.s, test.r, test.n, split, strsplit)
- }
- }
- }
-}
-
-// This ran out of stack before issue 7608 was fixed.
-func TestOnePassCutoff(t *testing.T) {
- MustCompile(`^(?:x{1,1000}){1,1000}$`)
-}
-
-func BenchmarkLiteral(b *testing.B) {
- x := strings.Repeat("x", 50) + "y"
- b.StopTimer()
- re := MustCompile("y")
- b.StartTimer()
- for i := 0; i < b.N; i++ {
- if !re.MatchString(x) {
- b.Fatalf("no match!")
- }
- }
-}
-
-func BenchmarkNotLiteral(b *testing.B) {
- x := strings.Repeat("x", 50) + "y"
- b.StopTimer()
- re := MustCompile(".y")
- b.StartTimer()
- for i := 0; i < b.N; i++ {
- if !re.MatchString(x) {
- b.Fatalf("no match!")
- }
- }
-}
-
-func BenchmarkMatchClass(b *testing.B) {
- b.StopTimer()
- x := strings.Repeat("xxxx", 20) + "w"
- re := MustCompile("[abcdw]")
- b.StartTimer()
- for i := 0; i < b.N; i++ {
- if !re.MatchString(x) {
- b.Fatalf("no match!")
- }
- }
-}
-
-func BenchmarkMatchClass_InRange(b *testing.B) {
- b.StopTimer()
- // 'b' is between 'a' and 'c', so the charclass
- // range checking is no help here.
- x := strings.Repeat("bbbb", 20) + "c"
- re := MustCompile("[ac]")
- b.StartTimer()
- for i := 0; i < b.N; i++ {
- if !re.MatchString(x) {
- b.Fatalf("no match!")
- }
- }
-}
-
-func BenchmarkReplaceAll(b *testing.B) {
- x := "abcdefghijklmnopqrstuvwxyz"
- b.StopTimer()
- re := MustCompile("[cjrw]")
- b.StartTimer()
- for i := 0; i < b.N; i++ {
- re.ReplaceAllString(x, "")
- }
-}
-
-func BenchmarkAnchoredLiteralShortNonMatch(b *testing.B) {
- b.StopTimer()
- x := []byte("abcdefghijklmnopqrstuvwxyz")
- re := MustCompile("^zbc(d|e)")
- b.StartTimer()
- for i := 0; i < b.N; i++ {
- re.Match(x)
- }
-}
-
-func BenchmarkAnchoredLiteralLongNonMatch(b *testing.B) {
- b.StopTimer()
- x := []byte("abcdefghijklmnopqrstuvwxyz")
- for i := 0; i < 15; i++ {
- x = append(x, x...)
- }
- re := MustCompile("^zbc(d|e)")
- b.StartTimer()
- for i := 0; i < b.N; i++ {
- re.Match(x)
- }
-}
-
-func BenchmarkAnchoredShortMatch(b *testing.B) {
- b.StopTimer()
- x := []byte("abcdefghijklmnopqrstuvwxyz")
- re := MustCompile("^.bc(d|e)")
- b.StartTimer()
- for i := 0; i < b.N; i++ {
- re.Match(x)
- }
-}
-
-func BenchmarkAnchoredLongMatch(b *testing.B) {
- b.StopTimer()
- x := []byte("abcdefghijklmnopqrstuvwxyz")
- for i := 0; i < 15; i++ {
- x = append(x, x...)
- }
- re := MustCompile("^.bc(d|e)")
- b.StartTimer()
- for i := 0; i < b.N; i++ {
- re.Match(x)
- }
-}
-
-func BenchmarkOnePassShortA(b *testing.B) {
- b.StopTimer()
- x := []byte("abcddddddeeeededd")
- re := MustCompile("^.bc(d|e)*$")
- b.StartTimer()
- for i := 0; i < b.N; i++ {
- re.Match(x)
- }
-}
-
-func BenchmarkNotOnePassShortA(b *testing.B) {
- b.StopTimer()
- x := []byte("abcddddddeeeededd")
- re := MustCompile(".bc(d|e)*$")
- b.StartTimer()
- for i := 0; i < b.N; i++ {
- re.Match(x)
- }
-}
-
-func BenchmarkOnePassShortB(b *testing.B) {
- b.StopTimer()
- x := []byte("abcddddddeeeededd")
- re := MustCompile("^.bc(?:d|e)*$")
- b.StartTimer()
- for i := 0; i < b.N; i++ {
- re.Match(x)
- }
-}
-
-func BenchmarkNotOnePassShortB(b *testing.B) {
- b.StopTimer()
- x := []byte("abcddddddeeeededd")
- re := MustCompile(".bc(?:d|e)*$")
- b.StartTimer()
- for i := 0; i < b.N; i++ {
- re.Match(x)
- }
-}
-
-func BenchmarkOnePassLongPrefix(b *testing.B) {
- b.StopTimer()
- x := []byte("abcdefghijklmnopqrstuvwxyz")
- re := MustCompile("^abcdefghijklmnopqrstuvwxyz.*$")
- b.StartTimer()
- for i := 0; i < b.N; i++ {
- re.Match(x)
- }
-}
-
-func BenchmarkOnePassLongNotPrefix(b *testing.B) {
- b.StopTimer()
- x := []byte("abcdefghijklmnopqrstuvwxyz")
- re := MustCompile("^.bcdefghijklmnopqrstuvwxyz.*$")
- b.StartTimer()
- for i := 0; i < b.N; i++ {
- re.Match(x)
- }
-}
diff --git a/src/pkg/regexp/example_test.go b/src/pkg/regexp/example_test.go
deleted file mode 100644
index a4e0da8ea..000000000
--- a/src/pkg/regexp/example_test.go
+++ /dev/null
@@ -1,148 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package regexp_test
-
-import (
- "fmt"
- "regexp"
-)
-
-func Example() {
- // Compile the expression once, usually at init time.
- // Use raw strings to avoid having to quote the backslashes.
- var validID = regexp.MustCompile(`^[a-z]+\[[0-9]+\]$`)
-
- fmt.Println(validID.MatchString("adam[23]"))
- fmt.Println(validID.MatchString("eve[7]"))
- fmt.Println(validID.MatchString("Job[48]"))
- fmt.Println(validID.MatchString("snakey"))
- // Output:
- // true
- // true
- // false
- // false
-}
-
-func ExampleMatchString() {
- matched, err := regexp.MatchString("foo.*", "seafood")
- fmt.Println(matched, err)
- matched, err = regexp.MatchString("bar.*", "seafood")
- fmt.Println(matched, err)
- matched, err = regexp.MatchString("a(b", "seafood")
- fmt.Println(matched, err)
- // Output:
- // true <nil>
- // false <nil>
- // false error parsing regexp: missing closing ): `a(b`
-}
-
-func ExampleRegexp_FindString() {
- re := regexp.MustCompile("fo.?")
- fmt.Printf("%q\n", re.FindString("seafood"))
- fmt.Printf("%q\n", re.FindString("meat"))
- // Output:
- // "foo"
- // ""
-}
-
-func ExampleRegexp_FindStringIndex() {
- re := regexp.MustCompile("ab?")
- fmt.Println(re.FindStringIndex("tablett"))
- fmt.Println(re.FindStringIndex("foo") == nil)
- // Output:
- // [1 3]
- // true
-}
-
-func ExampleRegexp_FindStringSubmatch() {
- re := regexp.MustCompile("a(x*)b(y|z)c")
- fmt.Printf("%q\n", re.FindStringSubmatch("-axxxbyc-"))
- fmt.Printf("%q\n", re.FindStringSubmatch("-abzc-"))
- // Output:
- // ["axxxbyc" "xxx" "y"]
- // ["abzc" "" "z"]
-}
-
-func ExampleRegexp_FindAllString() {
- re := regexp.MustCompile("a.")
- fmt.Println(re.FindAllString("paranormal", -1))
- fmt.Println(re.FindAllString("paranormal", 2))
- fmt.Println(re.FindAllString("graal", -1))
- fmt.Println(re.FindAllString("none", -1))
- // Output:
- // [ar an al]
- // [ar an]
- // [aa]
- // []
-}
-
-func ExampleRegexp_FindAllStringSubmatch() {
- re := regexp.MustCompile("a(x*)b")
- fmt.Printf("%q\n", re.FindAllStringSubmatch("-ab-", -1))
- fmt.Printf("%q\n", re.FindAllStringSubmatch("-axxb-", -1))
- fmt.Printf("%q\n", re.FindAllStringSubmatch("-ab-axb-", -1))
- fmt.Printf("%q\n", re.FindAllStringSubmatch("-axxb-ab-", -1))
- // Output:
- // [["ab" ""]]
- // [["axxb" "xx"]]
- // [["ab" ""] ["axb" "x"]]
- // [["axxb" "xx"] ["ab" ""]]
-}
-
-func ExampleRegexp_FindAllStringSubmatchIndex() {
- re := regexp.MustCompile("a(x*)b")
- // Indices:
- // 01234567 012345678
- // -ab-axb- -axxb-ab-
- fmt.Println(re.FindAllStringSubmatchIndex("-ab-", -1))
- fmt.Println(re.FindAllStringSubmatchIndex("-axxb-", -1))
- fmt.Println(re.FindAllStringSubmatchIndex("-ab-axb-", -1))
- fmt.Println(re.FindAllStringSubmatchIndex("-axxb-ab-", -1))
- fmt.Println(re.FindAllStringSubmatchIndex("-foo-", -1))
- // Output:
- // [[1 3 2 2]]
- // [[1 5 2 4]]
- // [[1 3 2 2] [4 7 5 6]]
- // [[1 5 2 4] [6 8 7 7]]
- // []
-}
-
-func ExampleRegexp_ReplaceAllLiteralString() {
- re := regexp.MustCompile("a(x*)b")
- fmt.Println(re.ReplaceAllLiteralString("-ab-axxb-", "T"))
- fmt.Println(re.ReplaceAllLiteralString("-ab-axxb-", "$1"))
- fmt.Println(re.ReplaceAllLiteralString("-ab-axxb-", "${1}"))
- // Output:
- // -T-T-
- // -$1-$1-
- // -${1}-${1}-
-}
-
-func ExampleRegexp_ReplaceAllString() {
- re := regexp.MustCompile("a(x*)b")
- fmt.Println(re.ReplaceAllString("-ab-axxb-", "T"))
- fmt.Println(re.ReplaceAllString("-ab-axxb-", "$1"))
- fmt.Println(re.ReplaceAllString("-ab-axxb-", "$1W"))
- fmt.Println(re.ReplaceAllString("-ab-axxb-", "${1}W"))
- // Output:
- // -T-T-
- // --xx-
- // ---
- // -W-xxW-
-}
-
-func ExampleRegexp_SubexpNames() {
- re := regexp.MustCompile("(?P<first>[a-zA-Z]+) (?P<last>[a-zA-Z]+)")
- fmt.Println(re.MatchString("Alan Turing"))
- fmt.Printf("%q\n", re.SubexpNames())
- reversed := fmt.Sprintf("${%s} ${%s}", re.SubexpNames()[2], re.SubexpNames()[1])
- fmt.Println(reversed)
- fmt.Println(re.ReplaceAllString("Alan Turing", reversed))
- // Output:
- // true
- // ["" "first" "last"]
- // ${last} ${first}
- // Turing Alan
-}
diff --git a/src/pkg/regexp/exec.go b/src/pkg/regexp/exec.go
deleted file mode 100644
index c4cb201f6..000000000
--- a/src/pkg/regexp/exec.go
+++ /dev/null
@@ -1,452 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package regexp
-
-import (
- "io"
- "regexp/syntax"
-)
-
-// A queue is a 'sparse array' holding pending threads of execution.
-// See http://research.swtch.com/2008/03/using-uninitialized-memory-for-fun-and.html
-type queue struct {
- sparse []uint32
- dense []entry
-}
-
-// A entry is an entry on a queue.
-// It holds both the instruction pc and the actual thread.
-// Some queue entries are just place holders so that the machine
-// knows it has considered that pc. Such entries have t == nil.
-type entry struct {
- pc uint32
- t *thread
-}
-
-// A thread is the state of a single path through the machine:
-// an instruction and a corresponding capture array.
-// See http://swtch.com/~rsc/regexp/regexp2.html
-type thread struct {
- inst *syntax.Inst
- cap []int
-}
-
-// A machine holds all the state during an NFA simulation for p.
-type machine struct {
- re *Regexp // corresponding Regexp
- p *syntax.Prog // compiled program
- op *onePassProg // compiled onepass program, or notOnePass
- q0, q1 queue // two queues for runq, nextq
- pool []*thread // pool of available threads
- matched bool // whether a match was found
- matchcap []int // capture information for the match
-
- // cached inputs, to avoid allocation
- inputBytes inputBytes
- inputString inputString
- inputReader inputReader
-}
-
-func (m *machine) newInputBytes(b []byte) input {
- m.inputBytes.str = b
- return &m.inputBytes
-}
-
-func (m *machine) newInputString(s string) input {
- m.inputString.str = s
- return &m.inputString
-}
-
-func (m *machine) newInputReader(r io.RuneReader) input {
- m.inputReader.r = r
- m.inputReader.atEOT = false
- m.inputReader.pos = 0
- return &m.inputReader
-}
-
-// progMachine returns a new machine running the prog p.
-func progMachine(p *syntax.Prog, op *onePassProg) *machine {
- m := &machine{p: p, op: op}
- n := len(m.p.Inst)
- m.q0 = queue{make([]uint32, n), make([]entry, 0, n)}
- m.q1 = queue{make([]uint32, n), make([]entry, 0, n)}
- ncap := p.NumCap
- if ncap < 2 {
- ncap = 2
- }
- m.matchcap = make([]int, ncap)
- return m
-}
-
-func (m *machine) init(ncap int) {
- for _, t := range m.pool {
- t.cap = t.cap[:ncap]
- }
- m.matchcap = m.matchcap[:ncap]
-}
-
-// alloc allocates a new thread with the given instruction.
-// It uses the free pool if possible.
-func (m *machine) alloc(i *syntax.Inst) *thread {
- var t *thread
- if n := len(m.pool); n > 0 {
- t = m.pool[n-1]
- m.pool = m.pool[:n-1]
- } else {
- t = new(thread)
- t.cap = make([]int, len(m.matchcap), cap(m.matchcap))
- }
- t.inst = i
- return t
-}
-
-// free returns t to the free pool.
-func (m *machine) free(t *thread) {
- m.inputBytes.str = nil
- m.inputString.str = ""
- m.inputReader.r = nil
- m.pool = append(m.pool, t)
-}
-
-// match runs the machine over the input starting at pos.
-// It reports whether a match was found.
-// If so, m.matchcap holds the submatch information.
-func (m *machine) match(i input, pos int) bool {
- startCond := m.re.cond
- if startCond == ^syntax.EmptyOp(0) { // impossible
- return false
- }
- m.matched = false
- for i := range m.matchcap {
- m.matchcap[i] = -1
- }
- runq, nextq := &m.q0, &m.q1
- r, r1 := endOfText, endOfText
- width, width1 := 0, 0
- r, width = i.step(pos)
- if r != endOfText {
- r1, width1 = i.step(pos + width)
- }
- var flag syntax.EmptyOp
- if pos == 0 {
- flag = syntax.EmptyOpContext(-1, r)
- } else {
- flag = i.context(pos)
- }
- for {
- if len(runq.dense) == 0 {
- if startCond&syntax.EmptyBeginText != 0 && pos != 0 {
- // Anchored match, past beginning of text.
- break
- }
- if m.matched {
- // Have match; finished exploring alternatives.
- break
- }
- if len(m.re.prefix) > 0 && r1 != m.re.prefixRune && i.canCheckPrefix() {
- // Match requires literal prefix; fast search for it.
- advance := i.index(m.re, pos)
- if advance < 0 {
- break
- }
- pos += advance
- r, width = i.step(pos)
- r1, width1 = i.step(pos + width)
- }
- }
- if !m.matched {
- if len(m.matchcap) > 0 {
- m.matchcap[0] = pos
- }
- m.add(runq, uint32(m.p.Start), pos, m.matchcap, flag, nil)
- }
- flag = syntax.EmptyOpContext(r, r1)
- m.step(runq, nextq, pos, pos+width, r, flag)
- if width == 0 {
- break
- }
- if len(m.matchcap) == 0 && m.matched {
- // Found a match and not paying attention
- // to where it is, so any match will do.
- break
- }
- pos += width
- r, width = r1, width1
- if r != endOfText {
- r1, width1 = i.step(pos + width)
- }
- runq, nextq = nextq, runq
- }
- m.clear(nextq)
- return m.matched
-}
-
-// clear frees all threads on the thread queue.
-func (m *machine) clear(q *queue) {
- for _, d := range q.dense {
- if d.t != nil {
- // m.free(d.t)
- m.pool = append(m.pool, d.t)
- }
- }
- q.dense = q.dense[:0]
-}
-
-// step executes one step of the machine, running each of the threads
-// on runq and appending new threads to nextq.
-// The step processes the rune c (which may be endOfText),
-// which starts at position pos and ends at nextPos.
-// nextCond gives the setting for the empty-width flags after c.
-func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond syntax.EmptyOp) {
- longest := m.re.longest
- for j := 0; j < len(runq.dense); j++ {
- d := &runq.dense[j]
- t := d.t
- if t == nil {
- continue
- }
- if longest && m.matched && len(t.cap) > 0 && m.matchcap[0] < t.cap[0] {
- // m.free(t)
- m.pool = append(m.pool, t)
- continue
- }
- i := t.inst
- add := false
- switch i.Op {
- default:
- panic("bad inst")
-
- case syntax.InstMatch:
- if len(t.cap) > 0 && (!longest || !m.matched || m.matchcap[1] < pos) {
- t.cap[1] = pos
- copy(m.matchcap, t.cap)
- }
- if !longest {
- // First-match mode: cut off all lower-priority threads.
- for _, d := range runq.dense[j+1:] {
- if d.t != nil {
- // m.free(d.t)
- m.pool = append(m.pool, d.t)
- }
- }
- runq.dense = runq.dense[:0]
- }
- m.matched = true
-
- case syntax.InstRune:
- add = i.MatchRune(c)
- case syntax.InstRune1:
- add = c == i.Rune[0]
- case syntax.InstRuneAny:
- add = true
- case syntax.InstRuneAnyNotNL:
- add = c != '\n'
- }
- if add {
- t = m.add(nextq, i.Out, nextPos, t.cap, nextCond, t)
- }
- if t != nil {
- // m.free(t)
- m.pool = append(m.pool, t)
- }
- }
- runq.dense = runq.dense[:0]
-}
-
-// add adds an entry to q for pc, unless the q already has such an entry.
-// It also recursively adds an entry for all instructions reachable from pc by following
-// empty-width conditions satisfied by cond. pos gives the current position
-// in the input.
-func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond syntax.EmptyOp, t *thread) *thread {
- if pc == 0 {
- return t
- }
- if j := q.sparse[pc]; j < uint32(len(q.dense)) && q.dense[j].pc == pc {
- return t
- }
-
- j := len(q.dense)
- q.dense = q.dense[:j+1]
- d := &q.dense[j]
- d.t = nil
- d.pc = pc
- q.sparse[pc] = uint32(j)
-
- i := &m.p.Inst[pc]
- switch i.Op {
- default:
- panic("unhandled")
- case syntax.InstFail:
- // nothing
- case syntax.InstAlt, syntax.InstAltMatch:
- t = m.add(q, i.Out, pos, cap, cond, t)
- t = m.add(q, i.Arg, pos, cap, cond, t)
- case syntax.InstEmptyWidth:
- if syntax.EmptyOp(i.Arg)&^cond == 0 {
- t = m.add(q, i.Out, pos, cap, cond, t)
- }
- case syntax.InstNop:
- t = m.add(q, i.Out, pos, cap, cond, t)
- case syntax.InstCapture:
- if int(i.Arg) < len(cap) {
- opos := cap[i.Arg]
- cap[i.Arg] = pos
- m.add(q, i.Out, pos, cap, cond, nil)
- cap[i.Arg] = opos
- } else {
- t = m.add(q, i.Out, pos, cap, cond, t)
- }
- case syntax.InstMatch, syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
- if t == nil {
- t = m.alloc(i)
- } else {
- t.inst = i
- }
- if len(cap) > 0 && &t.cap[0] != &cap[0] {
- copy(t.cap, cap)
- }
- d.t = t
- t = nil
- }
- return t
-}
-
-// onepass runs the machine over the input starting at pos.
-// It reports whether a match was found.
-// If so, m.matchcap holds the submatch information.
-func (m *machine) onepass(i input, pos int) bool {
- startCond := m.re.cond
- if startCond == ^syntax.EmptyOp(0) { // impossible
- return false
- }
- m.matched = false
- for i := range m.matchcap {
- m.matchcap[i] = -1
- }
- r, r1 := endOfText, endOfText
- width, width1 := 0, 0
- r, width = i.step(pos)
- if r != endOfText {
- r1, width1 = i.step(pos + width)
- }
- var flag syntax.EmptyOp
- if pos == 0 {
- flag = syntax.EmptyOpContext(-1, r)
- } else {
- flag = i.context(pos)
- }
- pc := m.op.Start
- inst := m.op.Inst[pc]
- // If there is a simple literal prefix, skip over it.
- if pos == 0 && syntax.EmptyOp(inst.Arg)&^flag == 0 &&
- len(m.re.prefix) > 0 && i.canCheckPrefix() {
- // Match requires literal prefix; fast search for it.
- if i.hasPrefix(m.re) {
- pos += len(m.re.prefix)
- r, width = i.step(pos)
- r1, width1 = i.step(pos + width)
- flag = i.context(pos)
- pc = int(m.re.prefixEnd)
- } else {
- return m.matched
- }
- }
- for {
- inst = m.op.Inst[pc]
- pc = int(inst.Out)
- switch inst.Op {
- default:
- panic("bad inst")
- case syntax.InstMatch:
- m.matched = true
- if len(m.matchcap) > 0 {
- m.matchcap[0] = 0
- m.matchcap[1] = pos
- }
- return m.matched
- case syntax.InstRune:
- if !inst.MatchRune(r) {
- return m.matched
- }
- case syntax.InstRune1:
- if r != inst.Rune[0] {
- return m.matched
- }
- case syntax.InstRuneAny:
- // Nothing
- case syntax.InstRuneAnyNotNL:
- if r == '\n' {
- return m.matched
- }
- // peek at the input rune to see which branch of the Alt to take
- case syntax.InstAlt, syntax.InstAltMatch:
- pc = int(onePassNext(&inst, r))
- continue
- case syntax.InstFail:
- return m.matched
- case syntax.InstNop:
- continue
- case syntax.InstEmptyWidth:
- if syntax.EmptyOp(inst.Arg)&^flag != 0 {
- return m.matched
- }
- continue
- case syntax.InstCapture:
- if int(inst.Arg) < len(m.matchcap) {
- m.matchcap[inst.Arg] = pos
- }
- continue
- }
- if width == 0 {
- break
- }
- flag = syntax.EmptyOpContext(r, r1)
- pos += width
- r, width = r1, width1
- if r != endOfText {
- r1, width1 = i.step(pos + width)
- }
- }
- return m.matched
-}
-
-// empty is a non-nil 0-element slice,
-// so doExecute can avoid an allocation
-// when 0 captures are requested from a successful match.
-var empty = make([]int, 0)
-
-// doExecute finds the leftmost match in the input and returns
-// the position of its subexpressions.
-func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap int) []int {
- m := re.get()
- var i input
- if r != nil {
- i = m.newInputReader(r)
- } else if b != nil {
- i = m.newInputBytes(b)
- } else {
- i = m.newInputString(s)
- }
- if m.op != notOnePass {
- if !m.onepass(i, pos) {
- re.put(m)
- return nil
- }
- } else {
- m.init(ncap)
- if !m.match(i, pos) {
- re.put(m)
- return nil
- }
- }
- if ncap == 0 {
- re.put(m)
- return empty // empty but not nil
- }
- cap := make([]int, len(m.matchcap))
- copy(cap, m.matchcap)
- re.put(m)
- return cap
-}
diff --git a/src/pkg/regexp/exec2_test.go b/src/pkg/regexp/exec2_test.go
deleted file mode 100644
index 7b86b4115..000000000
--- a/src/pkg/regexp/exec2_test.go
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !race
-
-package regexp
-
-import (
- "testing"
-)
-
-// This test is excluded when running under the race detector because
-// it is a very expensive test and takes too long.
-func TestRE2Exhaustive(t *testing.T) {
- if testing.Short() {
- t.Skip("skipping TestRE2Exhaustive during short test")
- }
- testRE2(t, "testdata/re2-exhaustive.txt.bz2")
-}
diff --git a/src/pkg/regexp/exec_test.go b/src/pkg/regexp/exec_test.go
deleted file mode 100644
index 70d069c06..000000000
--- a/src/pkg/regexp/exec_test.go
+++ /dev/null
@@ -1,715 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package regexp
-
-import (
- "bufio"
- "compress/bzip2"
- "fmt"
- "io"
- "os"
- "path/filepath"
- "regexp/syntax"
- "strconv"
- "strings"
- "testing"
- "unicode/utf8"
-)
-
-// TestRE2 tests this package's regexp API against test cases
-// considered during RE2's exhaustive tests, which run all possible
-// regexps over a given set of atoms and operators, up to a given
-// complexity, over all possible strings over a given alphabet,
-// up to a given size. Rather than try to link with RE2, we read a
-// log file containing the test cases and the expected matches.
-// The log file, re2.txt, is generated by running 'make exhaustive-log'
-// in the open source RE2 distribution. http://code.google.com/p/re2/
-//
-// The test file format is a sequence of stanzas like:
-//
-// strings
-// "abc"
-// "123x"
-// regexps
-// "[a-z]+"
-// 0-3;0-3
-// -;-
-// "([0-9])([0-9])([0-9])"
-// -;-
-// -;0-3 0-1 1-2 2-3
-//
-// The stanza begins by defining a set of strings, quoted
-// using Go double-quote syntax, one per line. Then the
-// regexps section gives a sequence of regexps to run on
-// the strings. In the block that follows a regexp, each line
-// gives the semicolon-separated match results of running
-// the regexp on the corresponding string.
-// Each match result is either a single -, meaning no match, or a
-// space-separated sequence of pairs giving the match and
-// submatch indices. An unmatched subexpression formats
-// its pair as a single - (not illustrated above). For now
-// each regexp run produces two match results, one for a
-// ``full match'' that restricts the regexp to matching the entire
-// string or nothing, and one for a ``partial match'' that gives
-// the leftmost first match found in the string.
-//
-// Lines beginning with # are comments. Lines beginning with
-// a capital letter are test names printed during RE2's test suite
-// and are echoed into t but otherwise ignored.
-//
-// At time of writing, re2.txt is 32 MB but compresses to 760 kB,
-// so we store re2.txt.gz in the repository and decompress it on the fly.
-//
-func TestRE2Search(t *testing.T) {
- testRE2(t, "testdata/re2-search.txt")
-}
-
-func testRE2(t *testing.T, file string) {
- f, err := os.Open(file)
- if err != nil {
- t.Fatal(err)
- }
- defer f.Close()
- var txt io.Reader
- if strings.HasSuffix(file, ".bz2") {
- z := bzip2.NewReader(f)
- txt = z
- file = file[:len(file)-len(".bz2")] // for error messages
- } else {
- txt = f
- }
- lineno := 0
- scanner := bufio.NewScanner(txt)
- var (
- str []string
- input []string
- inStrings bool
- re *Regexp
- refull *Regexp
- nfail int
- ncase int
- )
- for lineno := 1; scanner.Scan(); lineno++ {
- line := scanner.Text()
- switch {
- case line == "":
- t.Fatalf("%s:%d: unexpected blank line", file, lineno)
- case line[0] == '#':
- continue
- case 'A' <= line[0] && line[0] <= 'Z':
- // Test name.
- t.Logf("%s\n", line)
- continue
- case line == "strings":
- str = str[:0]
- inStrings = true
- case line == "regexps":
- inStrings = false
- case line[0] == '"':
- q, err := strconv.Unquote(line)
- if err != nil {
- // Fatal because we'll get out of sync.
- t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err)
- }
- if inStrings {
- str = append(str, q)
- continue
- }
- // Is a regexp.
- if len(input) != 0 {
- t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q)
- }
- re, err = tryCompile(q)
- if err != nil {
- if err.Error() == "error parsing regexp: invalid escape sequence: `\\C`" {
- // We don't and likely never will support \C; keep going.
- continue
- }
- t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err)
- if nfail++; nfail >= 100 {
- t.Fatalf("stopping after %d errors", nfail)
- }
- continue
- }
- full := `\A(?:` + q + `)\z`
- refull, err = tryCompile(full)
- if err != nil {
- // Fatal because q worked, so this should always work.
- t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err)
- }
- input = str
- case line[0] == '-' || '0' <= line[0] && line[0] <= '9':
- // A sequence of match results.
- ncase++
- if re == nil {
- // Failed to compile: skip results.
- continue
- }
- if len(input) == 0 {
- t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno)
- }
- var text string
- text, input = input[0], input[1:]
- if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) {
- // RE2's \B considers every byte position,
- // so it sees 'not word boundary' in the
- // middle of UTF-8 sequences. This package
- // only considers the positions between runes,
- // so it disagrees. Skip those cases.
- continue
- }
- res := strings.Split(line, ";")
- if len(res) != len(run) {
- t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run))
- }
- for i := range res {
- have, suffix := run[i](re, refull, text)
- want := parseResult(t, file, lineno, res[i])
- if !same(have, want) {
- t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want)
- if nfail++; nfail >= 100 {
- t.Fatalf("stopping after %d errors", nfail)
- }
- continue
- }
- b, suffix := match[i](re, refull, text)
- if b != (want != nil) {
- t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b)
- if nfail++; nfail >= 100 {
- t.Fatalf("stopping after %d errors", nfail)
- }
- continue
- }
- }
-
- default:
- t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line)
- }
- }
- if err := scanner.Err(); err != nil {
- t.Fatalf("%s:%d: %v", file, lineno, err)
- }
- if len(input) != 0 {
- t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input))
- }
- t.Logf("%d cases tested", ncase)
-}
-
-var run = []func(*Regexp, *Regexp, string) ([]int, string){
- runFull,
- runPartial,
- runFullLongest,
- runPartialLongest,
-}
-
-func runFull(re, refull *Regexp, text string) ([]int, string) {
- refull.longest = false
- return refull.FindStringSubmatchIndex(text), "[full]"
-}
-
-func runPartial(re, refull *Regexp, text string) ([]int, string) {
- re.longest = false
- return re.FindStringSubmatchIndex(text), ""
-}
-
-func runFullLongest(re, refull *Regexp, text string) ([]int, string) {
- refull.longest = true
- return refull.FindStringSubmatchIndex(text), "[full,longest]"
-}
-
-func runPartialLongest(re, refull *Regexp, text string) ([]int, string) {
- re.longest = true
- return re.FindStringSubmatchIndex(text), "[longest]"
-}
-
-var match = []func(*Regexp, *Regexp, string) (bool, string){
- matchFull,
- matchPartial,
- matchFullLongest,
- matchPartialLongest,
-}
-
-func matchFull(re, refull *Regexp, text string) (bool, string) {
- refull.longest = false
- return refull.MatchString(text), "[full]"
-}
-
-func matchPartial(re, refull *Regexp, text string) (bool, string) {
- re.longest = false
- return re.MatchString(text), ""
-}
-
-func matchFullLongest(re, refull *Regexp, text string) (bool, string) {
- refull.longest = true
- return refull.MatchString(text), "[full,longest]"
-}
-
-func matchPartialLongest(re, refull *Regexp, text string) (bool, string) {
- re.longest = true
- return re.MatchString(text), "[longest]"
-}
-
-func isSingleBytes(s string) bool {
- for _, c := range s {
- if c >= utf8.RuneSelf {
- return false
- }
- }
- return true
-}
-
-func tryCompile(s string) (re *Regexp, err error) {
- // Protect against panic during Compile.
- defer func() {
- if r := recover(); r != nil {
- err = fmt.Errorf("panic: %v", r)
- }
- }()
- return Compile(s)
-}
-
-func parseResult(t *testing.T, file string, lineno int, res string) []int {
- // A single - indicates no match.
- if res == "-" {
- return nil
- }
- // Otherwise, a space-separated list of pairs.
- n := 1
- for j := 0; j < len(res); j++ {
- if res[j] == ' ' {
- n++
- }
- }
- out := make([]int, 2*n)
- i := 0
- n = 0
- for j := 0; j <= len(res); j++ {
- if j == len(res) || res[j] == ' ' {
- // Process a single pair. - means no submatch.
- pair := res[i:j]
- if pair == "-" {
- out[n] = -1
- out[n+1] = -1
- } else {
- k := strings.Index(pair, "-")
- if k < 0 {
- t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
- }
- lo, err1 := strconv.Atoi(pair[:k])
- hi, err2 := strconv.Atoi(pair[k+1:])
- if err1 != nil || err2 != nil || lo > hi {
- t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
- }
- out[n] = lo
- out[n+1] = hi
- }
- n += 2
- i = j + 1
- }
- }
- return out
-}
-
-func same(x, y []int) bool {
- if len(x) != len(y) {
- return false
- }
- for i, xi := range x {
- if xi != y[i] {
- return false
- }
- }
- return true
-}
-
-// TestFowler runs this package's regexp API against the
-// POSIX regular expression tests collected by Glenn Fowler
-// at http://www2.research.att.com/~gsf/testregex/.
-func TestFowler(t *testing.T) {
- files, err := filepath.Glob("testdata/*.dat")
- if err != nil {
- t.Fatal(err)
- }
- for _, file := range files {
- t.Log(file)
- testFowler(t, file)
- }
-}
-
-var notab = MustCompilePOSIX(`[^\t]+`)
-
-func testFowler(t *testing.T, file string) {
- f, err := os.Open(file)
- if err != nil {
- t.Error(err)
- return
- }
- defer f.Close()
- b := bufio.NewReader(f)
- lineno := 0
- lastRegexp := ""
-Reading:
- for {
- lineno++
- line, err := b.ReadString('\n')
- if err != nil {
- if err != io.EOF {
- t.Errorf("%s:%d: %v", file, lineno, err)
- }
- break Reading
- }
-
- // http://www2.research.att.com/~gsf/man/man1/testregex.html
- //
- // INPUT FORMAT
- // Input lines may be blank, a comment beginning with #, or a test
- // specification. A specification is five fields separated by one
- // or more tabs. NULL denotes the empty string and NIL denotes the
- // 0 pointer.
- if line[0] == '#' || line[0] == '\n' {
- continue Reading
- }
- line = line[:len(line)-1]
- field := notab.FindAllString(line, -1)
- for i, f := range field {
- if f == "NULL" {
- field[i] = ""
- }
- if f == "NIL" {
- t.Logf("%s:%d: skip: %s", file, lineno, line)
- continue Reading
- }
- }
- if len(field) == 0 {
- continue Reading
- }
-
- // Field 1: the regex(3) flags to apply, one character per REG_feature
- // flag. The test is skipped if REG_feature is not supported by the
- // implementation. If the first character is not [BEASKLP] then the
- // specification is a global control line. One or more of [BEASKLP] may be
- // specified; the test will be repeated for each mode.
- //
- // B basic BRE (grep, ed, sed)
- // E REG_EXTENDED ERE (egrep)
- // A REG_AUGMENTED ARE (egrep with negation)
- // S REG_SHELL SRE (sh glob)
- // K REG_SHELL|REG_AUGMENTED KRE (ksh glob)
- // L REG_LITERAL LRE (fgrep)
- //
- // a REG_LEFT|REG_RIGHT implicit ^...$
- // b REG_NOTBOL lhs does not match ^
- // c REG_COMMENT ignore space and #...\n
- // d REG_SHELL_DOT explicit leading . match
- // e REG_NOTEOL rhs does not match $
- // f REG_MULTIPLE multiple \n separated patterns
- // g FNM_LEADING_DIR testfnmatch only -- match until /
- // h REG_MULTIREF multiple digit backref
- // i REG_ICASE ignore case
- // j REG_SPAN . matches \n
- // k REG_ESCAPE \ to ecape [...] delimiter
- // l REG_LEFT implicit ^...
- // m REG_MINIMAL minimal match
- // n REG_NEWLINE explicit \n match
- // o REG_ENCLOSED (|&) magic inside [@|&](...)
- // p REG_SHELL_PATH explicit / match
- // q REG_DELIMITED delimited pattern
- // r REG_RIGHT implicit ...$
- // s REG_SHELL_ESCAPED \ not special
- // t REG_MUSTDELIM all delimiters must be specified
- // u standard unspecified behavior -- errors not counted
- // v REG_CLASS_ESCAPE \ special inside [...]
- // w REG_NOSUB no subexpression match array
- // x REG_LENIENT let some errors slide
- // y REG_LEFT regexec() implicit ^...
- // z REG_NULL NULL subexpressions ok
- // $ expand C \c escapes in fields 2 and 3
- // / field 2 is a regsubcomp() expression
- // = field 3 is a regdecomp() expression
- //
- // Field 1 control lines:
- //
- // C set LC_COLLATE and LC_CTYPE to locale in field 2
- //
- // ?test ... output field 5 if passed and != EXPECTED, silent otherwise
- // &test ... output field 5 if current and previous passed
- // |test ... output field 5 if current passed and previous failed
- // ; ... output field 2 if previous failed
- // {test ... skip if failed until }
- // } end of skip
- //
- // : comment comment copied as output NOTE
- // :comment:test :comment: ignored
- // N[OTE] comment comment copied as output NOTE
- // T[EST] comment comment
- //
- // number use number for nmatch (20 by default)
- flag := field[0]
- switch flag[0] {
- case '?', '&', '|', ';', '{', '}':
- // Ignore all the control operators.
- // Just run everything.
- flag = flag[1:]
- if flag == "" {
- continue Reading
- }
- case ':':
- i := strings.Index(flag[1:], ":")
- if i < 0 {
- t.Logf("skip: %s", line)
- continue Reading
- }
- flag = flag[1+i+1:]
- case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
- t.Logf("skip: %s", line)
- continue Reading
- }
-
- // Can check field count now that we've handled the myriad comment formats.
- if len(field) < 4 {
- t.Errorf("%s:%d: too few fields: %s", file, lineno, line)
- continue Reading
- }
-
- // Expand C escapes (a.k.a. Go escapes).
- if strings.Contains(flag, "$") {
- f := `"` + field[1] + `"`
- if field[1], err = strconv.Unquote(f); err != nil {
- t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
- }
- f = `"` + field[2] + `"`
- if field[2], err = strconv.Unquote(f); err != nil {
- t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
- }
- }
-
- // Field 2: the regular expression pattern; SAME uses the pattern from
- // the previous specification.
- //
- if field[1] == "SAME" {
- field[1] = lastRegexp
- }
- lastRegexp = field[1]
-
- // Field 3: the string to match.
- text := field[2]
-
- // Field 4: the test outcome...
- ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3])
- if !ok {
- t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3])
- continue Reading
- }
-
- // Field 5: optional comment appended to the report.
-
- Testing:
- // Run test once for each specified capital letter mode that we support.
- for _, c := range flag {
- pattern := field[1]
- syn := syntax.POSIX | syntax.ClassNL
- switch c {
- default:
- continue Testing
- case 'E':
- // extended regexp (what we support)
- case 'L':
- // literal
- pattern = QuoteMeta(pattern)
- }
-
- for _, c := range flag {
- switch c {
- case 'i':
- syn |= syntax.FoldCase
- }
- }
-
- re, err := compile(pattern, syn, true)
- if err != nil {
- if shouldCompile {
- t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern)
- }
- continue Testing
- }
- if !shouldCompile {
- t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern)
- continue Testing
- }
- match := re.MatchString(text)
- if match != shouldMatch {
- t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch)
- continue Testing
- }
- have := re.FindStringSubmatchIndex(text)
- if (len(have) > 0) != match {
- t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, pattern, text, have)
- continue Testing
- }
- if len(have) > len(pos) {
- have = have[:len(pos)]
- }
- if !same(have, pos) {
- t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos)
- }
- }
- }
-}
-
-func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) {
- // Field 4: the test outcome. This is either one of the posix error
- // codes (with REG_ omitted) or the match array, a list of (m,n)
- // entries with m and n being first and last+1 positions in the
- // field 3 string, or NULL if REG_NOSUB is in effect and success
- // is expected. BADPAT is acceptable in place of any regcomp(3)
- // error code. The match[] array is initialized to (-2,-2) before
- // each test. All array elements from 0 to nmatch-1 must be specified
- // in the outcome. Unspecified endpoints (offset -1) are denoted by ?.
- // Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a
- // matched (?{...}) expression, where x is the text enclosed by {...},
- // o is the expression ordinal counting from 1, and n is the length of
- // the unmatched portion of the subject string. If x starts with a
- // number then that is the return value of re_execf(), otherwise 0 is
- // returned.
- switch {
- case s == "":
- // Match with no position information.
- ok = true
- compiled = true
- matched = true
- return
- case s == "NOMATCH":
- // Match failure.
- ok = true
- compiled = true
- matched = false
- return
- case 'A' <= s[0] && s[0] <= 'Z':
- // All the other error codes are compile errors.
- ok = true
- compiled = false
- return
- }
- compiled = true
-
- var x []int
- for s != "" {
- var end byte = ')'
- if len(x)%2 == 0 {
- if s[0] != '(' {
- ok = false
- return
- }
- s = s[1:]
- end = ','
- }
- i := 0
- for i < len(s) && s[i] != end {
- i++
- }
- if i == 0 || i == len(s) {
- ok = false
- return
- }
- var v = -1
- var err error
- if s[:i] != "?" {
- v, err = strconv.Atoi(s[:i])
- if err != nil {
- ok = false
- return
- }
- }
- x = append(x, v)
- s = s[i+1:]
- }
- if len(x)%2 != 0 {
- ok = false
- return
- }
- ok = true
- matched = true
- pos = x
- return
-}
-
-var text []byte
-
-func makeText(n int) []byte {
- if len(text) >= n {
- return text[:n]
- }
- text = make([]byte, n)
- x := ^uint32(0)
- for i := range text {
- x += x
- x ^= 1
- if int32(x) < 0 {
- x ^= 0x88888eef
- }
- if x%31 == 0 {
- text[i] = '\n'
- } else {
- text[i] = byte(x%(0x7E+1-0x20) + 0x20)
- }
- }
- return text
-}
-
-func benchmark(b *testing.B, re string, n int) {
- r := MustCompile(re)
- t := makeText(n)
- b.ResetTimer()
- b.SetBytes(int64(n))
- for i := 0; i < b.N; i++ {
- if r.Match(t) {
- b.Fatal("match!")
- }
- }
-}
-
-const (
- easy0 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
- easy1 = "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
- medium = "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
- hard = "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
- parens = "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" +
- "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
-)
-
-func BenchmarkMatchEasy0_32(b *testing.B) { benchmark(b, easy0, 32<<0) }
-func BenchmarkMatchEasy0_1K(b *testing.B) { benchmark(b, easy0, 1<<10) }
-func BenchmarkMatchEasy0_32K(b *testing.B) { benchmark(b, easy0, 32<<10) }
-func BenchmarkMatchEasy0_1M(b *testing.B) { benchmark(b, easy0, 1<<20) }
-func BenchmarkMatchEasy0_32M(b *testing.B) { benchmark(b, easy0, 32<<20) }
-func BenchmarkMatchEasy1_32(b *testing.B) { benchmark(b, easy1, 32<<0) }
-func BenchmarkMatchEasy1_1K(b *testing.B) { benchmark(b, easy1, 1<<10) }
-func BenchmarkMatchEasy1_32K(b *testing.B) { benchmark(b, easy1, 32<<10) }
-func BenchmarkMatchEasy1_1M(b *testing.B) { benchmark(b, easy1, 1<<20) }
-func BenchmarkMatchEasy1_32M(b *testing.B) { benchmark(b, easy1, 32<<20) }
-func BenchmarkMatchMedium_32(b *testing.B) { benchmark(b, medium, 32<<0) }
-func BenchmarkMatchMedium_1K(b *testing.B) { benchmark(b, medium, 1<<10) }
-func BenchmarkMatchMedium_32K(b *testing.B) { benchmark(b, medium, 32<<10) }
-func BenchmarkMatchMedium_1M(b *testing.B) { benchmark(b, medium, 1<<20) }
-func BenchmarkMatchMedium_32M(b *testing.B) { benchmark(b, medium, 32<<20) }
-func BenchmarkMatchHard_32(b *testing.B) { benchmark(b, hard, 32<<0) }
-func BenchmarkMatchHard_1K(b *testing.B) { benchmark(b, hard, 1<<10) }
-func BenchmarkMatchHard_32K(b *testing.B) { benchmark(b, hard, 32<<10) }
-func BenchmarkMatchHard_1M(b *testing.B) { benchmark(b, hard, 1<<20) }
-func BenchmarkMatchHard_32M(b *testing.B) { benchmark(b, hard, 32<<20) }
-
-func TestLongest(t *testing.T) {
- re, err := Compile(`a(|b)`)
- if err != nil {
- t.Fatal(err)
- }
- if g, w := re.FindString("ab"), "a"; g != w {
- t.Errorf("first match was %q, want %q", g, w)
- }
- re.Longest()
- if g, w := re.FindString("ab"), "ab"; g != w {
- t.Errorf("longest match was %q, want %q", g, w)
- }
-}
diff --git a/src/pkg/regexp/find_test.go b/src/pkg/regexp/find_test.go
deleted file mode 100644
index e07eb7d5c..000000000
--- a/src/pkg/regexp/find_test.go
+++ /dev/null
@@ -1,498 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package regexp
-
-import (
- "fmt"
- "strings"
- "testing"
-)
-
-// For each pattern/text pair, what is the expected output of each function?
-// We can derive the textual results from the indexed results, the non-submatch
-// results from the submatched results, the single results from the 'all' results,
-// and the byte results from the string results. Therefore the table includes
-// only the FindAllStringSubmatchIndex result.
-type FindTest struct {
- pat string
- text string
- matches [][]int
-}
-
-func (t FindTest) String() string {
- return fmt.Sprintf("pat: %#q text: %#q", t.pat, t.text)
-}
-
-var findTests = []FindTest{
- {``, ``, build(1, 0, 0)},
- {`^abcdefg`, "abcdefg", build(1, 0, 7)},
- {`a+`, "baaab", build(1, 1, 4)},
- {"abcd..", "abcdef", build(1, 0, 6)},
- {`a`, "a", build(1, 0, 1)},
- {`x`, "y", nil},
- {`b`, "abc", build(1, 1, 2)},
- {`.`, "a", build(1, 0, 1)},
- {`.*`, "abcdef", build(1, 0, 6)},
- {`^`, "abcde", build(1, 0, 0)},
- {`$`, "abcde", build(1, 5, 5)},
- {`^abcd$`, "abcd", build(1, 0, 4)},
- {`^bcd'`, "abcdef", nil},
- {`^abcd$`, "abcde", nil},
- {`a+`, "baaab", build(1, 1, 4)},
- {`a*`, "baaab", build(3, 0, 0, 1, 4, 5, 5)},
- {`[a-z]+`, "abcd", build(1, 0, 4)},
- {`[^a-z]+`, "ab1234cd", build(1, 2, 6)},
- {`[a\-\]z]+`, "az]-bcz", build(2, 0, 4, 6, 7)},
- {`[^\n]+`, "abcd\n", build(1, 0, 4)},
- {`[日本語]+`, "日本語日本語", build(1, 0, 18)},
- {`日本語+`, "日本語", build(1, 0, 9)},
- {`日本語+`, "日本語語語語", build(1, 0, 18)},
- {`()`, "", build(1, 0, 0, 0, 0)},
- {`(a)`, "a", build(1, 0, 1, 0, 1)},
- {`(.)(.)`, "æ—¥a", build(1, 0, 4, 0, 3, 3, 4)},
- {`(.*)`, "", build(1, 0, 0, 0, 0)},
- {`(.*)`, "abcd", build(1, 0, 4, 0, 4)},
- {`(..)(..)`, "abcd", build(1, 0, 4, 0, 2, 2, 4)},
- {`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)},
- {`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)},
- {`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)},
- {`\a\f\n\r\t\v`, "\a\f\n\r\t\v", build(1, 0, 6)},
- {`[\a\f\n\r\t\v]+`, "\a\f\n\r\t\v", build(1, 0, 6)},
-
- {`a*(|(b))c*`, "aacc", build(1, 0, 4, 2, 2, -1, -1)},
- {`(.*).*`, "ab", build(1, 0, 2, 0, 2)},
- {`[.]`, ".", build(1, 0, 1)},
- {`/$`, "/abc/", build(1, 4, 5)},
- {`/$`, "/abc", nil},
-
- // multiple matches
- {`.`, "abc", build(3, 0, 1, 1, 2, 2, 3)},
- {`(.)`, "abc", build(3, 0, 1, 0, 1, 1, 2, 1, 2, 2, 3, 2, 3)},
- {`.(.)`, "abcd", build(2, 0, 2, 1, 2, 2, 4, 3, 4)},
- {`ab*`, "abbaab", build(3, 0, 3, 3, 4, 4, 6)},
- {`a(b*)`, "abbaab", build(3, 0, 3, 1, 3, 3, 4, 4, 4, 4, 6, 5, 6)},
-
- // fixed bugs
- {`ab$`, "cab", build(1, 1, 3)},
- {`axxb$`, "axxcb", nil},
- {`data`, "daXY data", build(1, 5, 9)},
- {`da(.)a$`, "daXY data", build(1, 5, 9, 7, 8)},
- {`zx+`, "zzx", build(1, 1, 3)},
- {`ab$`, "abcab", build(1, 3, 5)},
- {`(aa)*$`, "a", build(1, 1, 1, -1, -1)},
- {`(?:.|(?:.a))`, "", nil},
- {`(?:A(?:A|a))`, "Aa", build(1, 0, 2)},
- {`(?:A|(?:A|a))`, "a", build(1, 0, 1)},
- {`(a){0}`, "", build(1, 0, 0, -1, -1)},
- {`(?-s)(?:(?:^).)`, "\n", nil},
- {`(?s)(?:(?:^).)`, "\n", build(1, 0, 1)},
- {`(?:(?:^).)`, "\n", nil},
- {`\b`, "x", build(2, 0, 0, 1, 1)},
- {`\b`, "xx", build(2, 0, 0, 2, 2)},
- {`\b`, "x y", build(4, 0, 0, 1, 1, 2, 2, 3, 3)},
- {`\b`, "xx yy", build(4, 0, 0, 2, 2, 3, 3, 5, 5)},
- {`\B`, "x", nil},
- {`\B`, "xx", build(1, 1, 1)},
- {`\B`, "x y", nil},
- {`\B`, "xx yy", build(2, 1, 1, 4, 4)},
-
- // RE2 tests
- {`[^\S\s]`, "abcd", nil},
- {`[^\S[:space:]]`, "abcd", nil},
- {`[^\D\d]`, "abcd", nil},
- {`[^\D[:digit:]]`, "abcd", nil},
- {`(?i)\W`, "x", nil},
- {`(?i)\W`, "k", nil},
- {`(?i)\W`, "s", nil},
-
- // can backslash-escape any punctuation
- {`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
- `!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
- {`[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~]+`,
- `!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
- {"\\`", "`", build(1, 0, 1)},
- {"[\\`]+", "`", build(1, 0, 1)},
-
- // long set of matches (longer than startSize)
- {
- ".",
- "qwertyuiopasdfghjklzxcvbnm1234567890",
- build(36, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
- 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20,
- 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30,
- 30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36),
- },
-}
-
-// build is a helper to construct a [][]int by extracting n sequences from x.
-// This represents n matches with len(x)/n submatches each.
-func build(n int, x ...int) [][]int {
- ret := make([][]int, n)
- runLength := len(x) / n
- j := 0
- for i := range ret {
- ret[i] = make([]int, runLength)
- copy(ret[i], x[j:])
- j += runLength
- if j > len(x) {
- panic("invalid build entry")
- }
- }
- return ret
-}
-
-// First the simple cases.
-
-func TestFind(t *testing.T) {
- for _, test := range findTests {
- re := MustCompile(test.pat)
- if re.String() != test.pat {
- t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat)
- }
- result := re.Find([]byte(test.text))
- switch {
- case len(test.matches) == 0 && len(result) == 0:
- // ok
- case test.matches == nil && result != nil:
- t.Errorf("expected no match; got one: %s", test)
- case test.matches != nil && result == nil:
- t.Errorf("expected match; got none: %s", test)
- case test.matches != nil && result != nil:
- expect := test.text[test.matches[0][0]:test.matches[0][1]]
- if expect != string(result) {
- t.Errorf("expected %q got %q: %s", expect, result, test)
- }
- }
- }
-}
-
-func TestFindString(t *testing.T) {
- for _, test := range findTests {
- result := MustCompile(test.pat).FindString(test.text)
- switch {
- case len(test.matches) == 0 && len(result) == 0:
- // ok
- case test.matches == nil && result != "":
- t.Errorf("expected no match; got one: %s", test)
- case test.matches != nil && result == "":
- // Tricky because an empty result has two meanings: no match or empty match.
- if test.matches[0][0] != test.matches[0][1] {
- t.Errorf("expected match; got none: %s", test)
- }
- case test.matches != nil && result != "":
- expect := test.text[test.matches[0][0]:test.matches[0][1]]
- if expect != result {
- t.Errorf("expected %q got %q: %s", expect, result, test)
- }
- }
- }
-}
-
-func testFindIndex(test *FindTest, result []int, t *testing.T) {
- switch {
- case len(test.matches) == 0 && len(result) == 0:
- // ok
- case test.matches == nil && result != nil:
- t.Errorf("expected no match; got one: %s", test)
- case test.matches != nil && result == nil:
- t.Errorf("expected match; got none: %s", test)
- case test.matches != nil && result != nil:
- expect := test.matches[0]
- if expect[0] != result[0] || expect[1] != result[1] {
- t.Errorf("expected %v got %v: %s", expect, result, test)
- }
- }
-}
-
-func TestFindIndex(t *testing.T) {
- for _, test := range findTests {
- testFindIndex(&test, MustCompile(test.pat).FindIndex([]byte(test.text)), t)
- }
-}
-
-func TestFindStringIndex(t *testing.T) {
- for _, test := range findTests {
- testFindIndex(&test, MustCompile(test.pat).FindStringIndex(test.text), t)
- }
-}
-
-func TestFindReaderIndex(t *testing.T) {
- for _, test := range findTests {
- testFindIndex(&test, MustCompile(test.pat).FindReaderIndex(strings.NewReader(test.text)), t)
- }
-}
-
-// Now come the simple All cases.
-
-func TestFindAll(t *testing.T) {
- for _, test := range findTests {
- result := MustCompile(test.pat).FindAll([]byte(test.text), -1)
- switch {
- case test.matches == nil && result == nil:
- // ok
- case test.matches == nil && result != nil:
- t.Errorf("expected no match; got one: %s", test)
- case test.matches != nil && result == nil:
- t.Fatalf("expected match; got none: %s", test)
- case test.matches != nil && result != nil:
- if len(test.matches) != len(result) {
- t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
- continue
- }
- for k, e := range test.matches {
- expect := test.text[e[0]:e[1]]
- if expect != string(result[k]) {
- t.Errorf("match %d: expected %q got %q: %s", k, expect, result[k], test)
- }
- }
- }
- }
-}
-
-func TestFindAllString(t *testing.T) {
- for _, test := range findTests {
- result := MustCompile(test.pat).FindAllString(test.text, -1)
- switch {
- case test.matches == nil && result == nil:
- // ok
- case test.matches == nil && result != nil:
- t.Errorf("expected no match; got one: %s", test)
- case test.matches != nil && result == nil:
- t.Errorf("expected match; got none: %s", test)
- case test.matches != nil && result != nil:
- if len(test.matches) != len(result) {
- t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
- continue
- }
- for k, e := range test.matches {
- expect := test.text[e[0]:e[1]]
- if expect != result[k] {
- t.Errorf("expected %q got %q: %s", expect, result, test)
- }
- }
- }
- }
-}
-
-func testFindAllIndex(test *FindTest, result [][]int, t *testing.T) {
- switch {
- case test.matches == nil && result == nil:
- // ok
- case test.matches == nil && result != nil:
- t.Errorf("expected no match; got one: %s", test)
- case test.matches != nil && result == nil:
- t.Errorf("expected match; got none: %s", test)
- case test.matches != nil && result != nil:
- if len(test.matches) != len(result) {
- t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
- return
- }
- for k, e := range test.matches {
- if e[0] != result[k][0] || e[1] != result[k][1] {
- t.Errorf("match %d: expected %v got %v: %s", k, e, result[k], test)
- }
- }
- }
-}
-
-func TestFindAllIndex(t *testing.T) {
- for _, test := range findTests {
- testFindAllIndex(&test, MustCompile(test.pat).FindAllIndex([]byte(test.text), -1), t)
- }
-}
-
-func TestFindAllStringIndex(t *testing.T) {
- for _, test := range findTests {
- testFindAllIndex(&test, MustCompile(test.pat).FindAllStringIndex(test.text, -1), t)
- }
-}
-
-// Now come the Submatch cases.
-
-func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, t *testing.T) {
- if len(submatches) != len(result)*2 {
- t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
- return
- }
- for k := 0; k < len(submatches); k += 2 {
- if submatches[k] == -1 {
- if result[k/2] != nil {
- t.Errorf("match %d: expected nil got %q: %s", n, result, test)
- }
- continue
- }
- expect := test.text[submatches[k]:submatches[k+1]]
- if expect != string(result[k/2]) {
- t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test)
- return
- }
- }
-}
-
-func TestFindSubmatch(t *testing.T) {
- for _, test := range findTests {
- result := MustCompile(test.pat).FindSubmatch([]byte(test.text))
- switch {
- case test.matches == nil && result == nil:
- // ok
- case test.matches == nil && result != nil:
- t.Errorf("expected no match; got one: %s", test)
- case test.matches != nil && result == nil:
- t.Errorf("expected match; got none: %s", test)
- case test.matches != nil && result != nil:
- testSubmatchBytes(&test, 0, test.matches[0], result, t)
- }
- }
-}
-
-func testSubmatchString(test *FindTest, n int, submatches []int, result []string, t *testing.T) {
- if len(submatches) != len(result)*2 {
- t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
- return
- }
- for k := 0; k < len(submatches); k += 2 {
- if submatches[k] == -1 {
- if result[k/2] != "" {
- t.Errorf("match %d: expected nil got %q: %s", n, result, test)
- }
- continue
- }
- expect := test.text[submatches[k]:submatches[k+1]]
- if expect != result[k/2] {
- t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test)
- return
- }
- }
-}
-
-func TestFindStringSubmatch(t *testing.T) {
- for _, test := range findTests {
- result := MustCompile(test.pat).FindStringSubmatch(test.text)
- switch {
- case test.matches == nil && result == nil:
- // ok
- case test.matches == nil && result != nil:
- t.Errorf("expected no match; got one: %s", test)
- case test.matches != nil && result == nil:
- t.Errorf("expected match; got none: %s", test)
- case test.matches != nil && result != nil:
- testSubmatchString(&test, 0, test.matches[0], result, t)
- }
- }
-}
-
-func testSubmatchIndices(test *FindTest, n int, expect, result []int, t *testing.T) {
- if len(expect) != len(result) {
- t.Errorf("match %d: expected %d matches; got %d: %s", n, len(expect)/2, len(result)/2, test)
- return
- }
- for k, e := range expect {
- if e != result[k] {
- t.Errorf("match %d: submatch error: expected %v got %v: %s", n, expect, result, test)
- }
- }
-}
-
-func testFindSubmatchIndex(test *FindTest, result []int, t *testing.T) {
- switch {
- case test.matches == nil && result == nil:
- // ok
- case test.matches == nil && result != nil:
- t.Errorf("expected no match; got one: %s", test)
- case test.matches != nil && result == nil:
- t.Errorf("expected match; got none: %s", test)
- case test.matches != nil && result != nil:
- testSubmatchIndices(test, 0, test.matches[0], result, t)
- }
-}
-
-func TestFindSubmatchIndex(t *testing.T) {
- for _, test := range findTests {
- testFindSubmatchIndex(&test, MustCompile(test.pat).FindSubmatchIndex([]byte(test.text)), t)
- }
-}
-
-func TestFindStringSubmatchIndex(t *testing.T) {
- for _, test := range findTests {
- testFindSubmatchIndex(&test, MustCompile(test.pat).FindStringSubmatchIndex(test.text), t)
- }
-}
-
-func TestFindReaderSubmatchIndex(t *testing.T) {
- for _, test := range findTests {
- testFindSubmatchIndex(&test, MustCompile(test.pat).FindReaderSubmatchIndex(strings.NewReader(test.text)), t)
- }
-}
-
-// Now come the monster AllSubmatch cases.
-
-func TestFindAllSubmatch(t *testing.T) {
- for _, test := range findTests {
- result := MustCompile(test.pat).FindAllSubmatch([]byte(test.text), -1)
- switch {
- case test.matches == nil && result == nil:
- // ok
- case test.matches == nil && result != nil:
- t.Errorf("expected no match; got one: %s", test)
- case test.matches != nil && result == nil:
- t.Errorf("expected match; got none: %s", test)
- case len(test.matches) != len(result):
- t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
- case test.matches != nil && result != nil:
- for k, match := range test.matches {
- testSubmatchBytes(&test, k, match, result[k], t)
- }
- }
- }
-}
-
-func TestFindAllStringSubmatch(t *testing.T) {
- for _, test := range findTests {
- result := MustCompile(test.pat).FindAllStringSubmatch(test.text, -1)
- switch {
- case test.matches == nil && result == nil:
- // ok
- case test.matches == nil && result != nil:
- t.Errorf("expected no match; got one: %s", test)
- case test.matches != nil && result == nil:
- t.Errorf("expected match; got none: %s", test)
- case len(test.matches) != len(result):
- t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
- case test.matches != nil && result != nil:
- for k, match := range test.matches {
- testSubmatchString(&test, k, match, result[k], t)
- }
- }
- }
-}
-
-func testFindAllSubmatchIndex(test *FindTest, result [][]int, t *testing.T) {
- switch {
- case test.matches == nil && result == nil:
- // ok
- case test.matches == nil && result != nil:
- t.Errorf("expected no match; got one: %s", test)
- case test.matches != nil && result == nil:
- t.Errorf("expected match; got none: %s", test)
- case len(test.matches) != len(result):
- t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
- case test.matches != nil && result != nil:
- for k, match := range test.matches {
- testSubmatchIndices(test, k, match, result[k], t)
- }
- }
-}
-
-func TestFindAllSubmatchIndex(t *testing.T) {
- for _, test := range findTests {
- testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllSubmatchIndex([]byte(test.text), -1), t)
- }
-}
-
-func TestFindAllStringSubmatchIndex(t *testing.T) {
- for _, test := range findTests {
- testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllStringSubmatchIndex(test.text, -1), t)
- }
-}
diff --git a/src/pkg/regexp/onepass.go b/src/pkg/regexp/onepass.go
deleted file mode 100644
index 501fb28af..000000000
--- a/src/pkg/regexp/onepass.go
+++ /dev/null
@@ -1,582 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-
-package regexp
-
-import (
- "bytes"
- "regexp/syntax"
- "sort"
- "unicode"
-)
-
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// "One-pass" regexp execution.
-// Some regexps can be analyzed to determine that they never need
-// backtracking: they are guaranteed to run in one pass over the string
-// without bothering to save all the usual NFA state.
-// Detect those and execute them more quickly.
-
-// A onePassProg is a compiled one-pass regular expression program.
-// It is the same as syntax.Prog except for the use of onePassInst.
-type onePassProg struct {
- Inst []onePassInst
- Start int // index of start instruction
- NumCap int // number of InstCapture insts in re
-}
-
-// A onePassInst is a single instruction in a one-pass regular expression program.
-// It is the same as syntax.Inst except for the new 'Next' field.
-type onePassInst struct {
- syntax.Inst
- Next []uint32
-}
-
-// OnePassPrefix returns a literal string that all matches for the
-// regexp must start with. Complete is true if the prefix
-// is the entire match. Pc is the index of the last rune instruction
-// in the string. The OnePassPrefix skips over the mandatory
-// EmptyBeginText
-func onePassPrefix(p *syntax.Prog) (prefix string, complete bool, pc uint32) {
- i := &p.Inst[p.Start]
- if i.Op != syntax.InstEmptyWidth || (syntax.EmptyOp(i.Arg))&syntax.EmptyBeginText == 0 {
- return "", i.Op == syntax.InstMatch, uint32(p.Start)
- }
- pc = i.Out
- i = &p.Inst[pc]
- for i.Op == syntax.InstNop {
- pc = i.Out
- i = &p.Inst[pc]
- }
- // Avoid allocation of buffer if prefix is empty.
- if iop(i) != syntax.InstRune || len(i.Rune) != 1 {
- return "", i.Op == syntax.InstMatch, uint32(p.Start)
- }
-
- // Have prefix; gather characters.
- var buf bytes.Buffer
- for iop(i) == syntax.InstRune && len(i.Rune) == 1 && syntax.Flags(i.Arg)&syntax.FoldCase == 0 {
- buf.WriteRune(i.Rune[0])
- pc, i = i.Out, &p.Inst[i.Out]
- }
- return buf.String(), i.Op == syntax.InstEmptyWidth && (syntax.EmptyOp(i.Arg))&syntax.EmptyBeginText != 0, pc
-}
-
-// OnePassNext selects the next actionable state of the prog, based on the input character.
-// It should only be called when i.Op == InstAlt or InstAltMatch, and from the one-pass machine.
-// One of the alternates may ultimately lead without input to end of line. If the instruction
-// is InstAltMatch the path to the InstMatch is in i.Out, the normal node in i.Next.
-func onePassNext(i *onePassInst, r rune) uint32 {
- next := i.MatchRunePos(r)
- if next >= 0 {
- return i.Next[next]
- }
- if i.Op == syntax.InstAltMatch {
- return i.Out
- }
- return 0
-}
-
-func iop(i *syntax.Inst) syntax.InstOp {
- op := i.Op
- switch op {
- case syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
- op = syntax.InstRune
- }
- return op
-}
-
-// Sparse Array implementation is used as a queueOnePass.
-type queueOnePass struct {
- sparse []uint32
- dense []uint32
- size, nextIndex uint32
-}
-
-func (q *queueOnePass) empty() bool {
- return q.nextIndex >= q.size
-}
-
-func (q *queueOnePass) next() (n uint32) {
- n = q.dense[q.nextIndex]
- q.nextIndex++
- return
-}
-
-func (q *queueOnePass) clear() {
- q.size = 0
- q.nextIndex = 0
-}
-
-func (q *queueOnePass) reset() {
- q.nextIndex = 0
-}
-
-func (q *queueOnePass) contains(u uint32) bool {
- if u >= uint32(len(q.sparse)) {
- return false
- }
- return q.sparse[u] < q.size && q.dense[q.sparse[u]] == u
-}
-
-func (q *queueOnePass) insert(u uint32) {
- if !q.contains(u) {
- q.insertNew(u)
- }
-}
-
-func (q *queueOnePass) insertNew(u uint32) {
- if u >= uint32(len(q.sparse)) {
- return
- }
- q.sparse[u] = q.size
- q.dense[q.size] = u
- q.size++
-}
-
-func newQueue(size int) (q *queueOnePass) {
- return &queueOnePass{
- sparse: make([]uint32, size),
- dense: make([]uint32, size),
- }
-}
-
-// mergeRuneSets merges two non-intersecting runesets, and returns the merged result,
-// and a NextIp array. The idea is that if a rune matches the OnePassRunes at index
-// i, NextIp[i/2] is the target. If the input sets intersect, an empty runeset and a
-// NextIp array with the single element mergeFailed is returned.
-// The code assumes that both inputs contain ordered and non-intersecting rune pairs.
-const mergeFailed = uint32(0xffffffff)
-
-var (
- noRune = []rune{}
- noNext = []uint32{mergeFailed}
-)
-
-func mergeRuneSets(leftRunes, rightRunes *[]rune, leftPC, rightPC uint32) ([]rune, []uint32) {
- leftLen := len(*leftRunes)
- rightLen := len(*rightRunes)
- if leftLen&0x1 != 0 || rightLen&0x1 != 0 {
- panic("mergeRuneSets odd length []rune")
- }
- var (
- lx, rx int
- )
- merged := make([]rune, 0)
- next := make([]uint32, 0)
- ok := true
- defer func() {
- if !ok {
- merged = nil
- next = nil
- }
- }()
-
- ix := -1
- extend := func(newLow *int, newArray *[]rune, pc uint32) bool {
- if ix > 0 && (*newArray)[*newLow] <= merged[ix] {
- return false
- }
- merged = append(merged, (*newArray)[*newLow], (*newArray)[*newLow+1])
- *newLow += 2
- ix += 2
- next = append(next, pc)
- return true
- }
-
- for lx < leftLen || rx < rightLen {
- switch {
- case rx >= rightLen:
- ok = extend(&lx, leftRunes, leftPC)
- case lx >= leftLen:
- ok = extend(&rx, rightRunes, rightPC)
- case (*rightRunes)[rx] < (*leftRunes)[lx]:
- ok = extend(&rx, rightRunes, rightPC)
- default:
- ok = extend(&lx, leftRunes, leftPC)
- }
- if !ok {
- return noRune, noNext
- }
- }
- return merged, next
-}
-
-// cleanupOnePass drops working memory, and restores certain shortcut instructions.
-func cleanupOnePass(prog *onePassProg, original *syntax.Prog) {
- for ix, instOriginal := range original.Inst {
- switch instOriginal.Op {
- case syntax.InstAlt, syntax.InstAltMatch, syntax.InstRune:
- case syntax.InstCapture, syntax.InstEmptyWidth, syntax.InstNop, syntax.InstMatch, syntax.InstFail:
- prog.Inst[ix].Next = nil
- case syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
- prog.Inst[ix].Next = nil
- prog.Inst[ix] = onePassInst{Inst: instOriginal}
- }
- }
-}
-
-// onePassCopy creates a copy of the original Prog, as we'll be modifying it
-func onePassCopy(prog *syntax.Prog) *onePassProg {
- p := &onePassProg{
- Start: prog.Start,
- NumCap: prog.NumCap,
- }
- for _, inst := range prog.Inst {
- p.Inst = append(p.Inst, onePassInst{Inst: inst})
- }
-
- // rewrites one or more common Prog constructs that enable some otherwise
- // non-onepass Progs to be onepass. A:BD (for example) means an InstAlt at
- // ip A, that points to ips B & C.
- // A:BC + B:DA => A:BC + B:CD
- // A:BC + B:DC => A:DC + B:DC
- for pc := range p.Inst {
- switch p.Inst[pc].Op {
- default:
- continue
- case syntax.InstAlt, syntax.InstAltMatch:
- // A:Bx + B:Ay
- p_A_Other := &p.Inst[pc].Out
- p_A_Alt := &p.Inst[pc].Arg
- // make sure a target is another Alt
- instAlt := p.Inst[*p_A_Alt]
- if !(instAlt.Op == syntax.InstAlt || instAlt.Op == syntax.InstAltMatch) {
- p_A_Alt, p_A_Other = p_A_Other, p_A_Alt
- instAlt = p.Inst[*p_A_Alt]
- if !(instAlt.Op == syntax.InstAlt || instAlt.Op == syntax.InstAltMatch) {
- continue
- }
- }
- instOther := p.Inst[*p_A_Other]
- // Analyzing both legs pointing to Alts is for another day
- if instOther.Op == syntax.InstAlt || instOther.Op == syntax.InstAltMatch {
- // too complicated
- continue
- }
- // simple empty transition loop
- // A:BC + B:DA => A:BC + B:DC
- p_B_Alt := &p.Inst[*p_A_Alt].Out
- p_B_Other := &p.Inst[*p_A_Alt].Arg
- patch := false
- if instAlt.Out == uint32(pc) {
- patch = true
- } else if instAlt.Arg == uint32(pc) {
- patch = true
- p_B_Alt, p_B_Other = p_B_Other, p_B_Alt
- }
- if patch {
- *p_B_Alt = *p_A_Other
- }
-
- // empty transition to common target
- // A:BC + B:DC => A:DC + B:DC
- if *p_A_Other == *p_B_Alt {
- *p_A_Alt = *p_B_Other
- }
- }
- }
- return p
-}
-
-// runeSlice exists to permit sorting the case-folded rune sets.
-type runeSlice []rune
-
-func (p runeSlice) Len() int { return len(p) }
-func (p runeSlice) Less(i, j int) bool { return p[i] < p[j] }
-func (p runeSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
-
-// Sort is a convenience method.
-func (p runeSlice) Sort() {
- sort.Sort(p)
-}
-
-var anyRuneNotNL = []rune{0, '\n' - 1, '\n' + 1, unicode.MaxRune}
-var anyRune = []rune{0, unicode.MaxRune}
-
-// makeOnePass creates a onepass Prog, if possible. It is possible if at any alt,
-// the match engine can always tell which branch to take. The routine may modify
-// p if it is turned into a onepass Prog. If it isn't possible for this to be a
-// onepass Prog, the Prog notOnePass is returned. makeOnePass is recursive
-// to the size of the Prog.
-func makeOnePass(p *onePassProg) *onePassProg {
- // If the machine is very long, it's not worth the time to check if we can use one pass.
- if len(p.Inst) >= 1000 {
- return notOnePass
- }
-
- var (
- instQueue = newQueue(len(p.Inst))
- visitQueue = newQueue(len(p.Inst))
- build func(uint32, *queueOnePass)
- check func(uint32, map[uint32]bool) bool
- onePassRunes = make([][]rune, len(p.Inst))
- )
- build = func(pc uint32, q *queueOnePass) {
- if q.contains(pc) {
- return
- }
- inst := p.Inst[pc]
- switch inst.Op {
- case syntax.InstAlt, syntax.InstAltMatch:
- q.insert(inst.Out)
- build(inst.Out, q)
- q.insert(inst.Arg)
- case syntax.InstMatch, syntax.InstFail:
- default:
- q.insert(inst.Out)
- }
- }
-
- // check that paths from Alt instructions are unambiguous, and rebuild the new
- // program as a onepass program
- check = func(pc uint32, m map[uint32]bool) (ok bool) {
- ok = true
- inst := &p.Inst[pc]
- if visitQueue.contains(pc) {
- return
- }
- visitQueue.insert(pc)
- switch inst.Op {
- case syntax.InstAlt, syntax.InstAltMatch:
- ok = check(inst.Out, m) && check(inst.Arg, m)
- // check no-input paths to InstMatch
- matchOut := m[inst.Out]
- matchArg := m[inst.Arg]
- if matchOut && matchArg {
- ok = false
- break
- }
- // Match on empty goes in inst.Out
- if matchArg {
- inst.Out, inst.Arg = inst.Arg, inst.Out
- matchOut, matchArg = matchArg, matchOut
- }
- if matchOut {
- m[pc] = true
- inst.Op = syntax.InstAltMatch
- }
-
- // build a dispatch operator from the two legs of the alt.
- onePassRunes[pc], inst.Next = mergeRuneSets(
- &onePassRunes[inst.Out], &onePassRunes[inst.Arg], inst.Out, inst.Arg)
- if len(inst.Next) > 0 && inst.Next[0] == mergeFailed {
- ok = false
- break
- }
- case syntax.InstCapture, syntax.InstNop:
- ok = check(inst.Out, m)
- m[pc] = m[inst.Out]
- // pass matching runes back through these no-ops.
- onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...)
- inst.Next = []uint32{}
- for i := len(onePassRunes[pc]) / 2; i >= 0; i-- {
- inst.Next = append(inst.Next, inst.Out)
- }
- case syntax.InstEmptyWidth:
- ok = check(inst.Out, m)
- m[pc] = m[inst.Out]
- onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...)
- inst.Next = []uint32{}
- for i := len(onePassRunes[pc]) / 2; i >= 0; i-- {
- inst.Next = append(inst.Next, inst.Out)
- }
- case syntax.InstMatch, syntax.InstFail:
- m[pc] = inst.Op == syntax.InstMatch
- break
- case syntax.InstRune:
- ok = check(inst.Out, m)
- m[pc] = false
- if len(inst.Next) > 0 {
- break
- }
- if len(inst.Rune) == 0 {
- onePassRunes[pc] = []rune{}
- inst.Next = []uint32{inst.Out}
- break
- }
- runes := make([]rune, 0)
- if len(inst.Rune) == 1 && syntax.Flags(inst.Arg)&syntax.FoldCase != 0 {
- r0 := inst.Rune[0]
- runes = append(runes, r0, r0)
- for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
- runes = append(runes, r1, r1)
- }
- sort.Sort(runeSlice(runes))
- } else {
- runes = append(runes, inst.Rune...)
- }
- onePassRunes[pc] = runes
- inst.Next = []uint32{}
- for i := len(onePassRunes[pc]) / 2; i >= 0; i-- {
- inst.Next = append(inst.Next, inst.Out)
- }
- inst.Op = syntax.InstRune
- case syntax.InstRune1:
- ok = check(inst.Out, m)
- m[pc] = false
- if len(inst.Next) > 0 {
- break
- }
- runes := []rune{}
- // expand case-folded runes
- if syntax.Flags(inst.Arg)&syntax.FoldCase != 0 {
- r0 := inst.Rune[0]
- runes = append(runes, r0, r0)
- for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
- runes = append(runes, r1, r1)
- }
- sort.Sort(runeSlice(runes))
- } else {
- runes = append(runes, inst.Rune[0], inst.Rune[0])
- }
- onePassRunes[pc] = runes
- inst.Next = []uint32{}
- for i := len(onePassRunes[pc]) / 2; i >= 0; i-- {
- inst.Next = append(inst.Next, inst.Out)
- }
- inst.Op = syntax.InstRune
- case syntax.InstRuneAny:
- ok = check(inst.Out, m)
- m[pc] = false
- if len(inst.Next) > 0 {
- break
- }
- onePassRunes[pc] = append([]rune{}, anyRune...)
- inst.Next = []uint32{inst.Out}
- case syntax.InstRuneAnyNotNL:
- ok = check(inst.Out, m)
- m[pc] = false
- if len(inst.Next) > 0 {
- break
- }
- onePassRunes[pc] = append([]rune{}, anyRuneNotNL...)
- inst.Next = []uint32{}
- for i := len(onePassRunes[pc]) / 2; i >= 0; i-- {
- inst.Next = append(inst.Next, inst.Out)
- }
- }
- return
- }
-
- instQueue.clear()
- instQueue.insert(uint32(p.Start))
- m := make(map[uint32]bool, len(p.Inst))
- for !instQueue.empty() {
- pc := instQueue.next()
- inst := p.Inst[pc]
- visitQueue.clear()
- if !check(uint32(pc), m) {
- p = notOnePass
- break
- }
- switch inst.Op {
- case syntax.InstAlt, syntax.InstAltMatch:
- instQueue.insert(inst.Out)
- instQueue.insert(inst.Arg)
- case syntax.InstCapture, syntax.InstEmptyWidth, syntax.InstNop:
- instQueue.insert(inst.Out)
- case syntax.InstMatch:
- case syntax.InstFail:
- case syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
- default:
- }
- }
- if p != notOnePass {
- for i, _ := range p.Inst {
- p.Inst[i].Rune = onePassRunes[i]
- }
- }
- return p
-}
-
-// walk visits each Inst in the prog once, and applies the argument
-// function(ip, next), in pre-order.
-func walk(prog *syntax.Prog, funcs ...func(ip, next uint32)) {
- var walk1 func(uint32)
- progQueue := newQueue(len(prog.Inst))
- walk1 = func(ip uint32) {
- if progQueue.contains(ip) {
- return
- }
- progQueue.insert(ip)
- inst := prog.Inst[ip]
- switch inst.Op {
- case syntax.InstAlt, syntax.InstAltMatch:
- for _, f := range funcs {
- f(ip, inst.Out)
- f(ip, inst.Arg)
- }
- walk1(inst.Out)
- walk1(inst.Arg)
- default:
- for _, f := range funcs {
- f(ip, inst.Out)
- }
- walk1(inst.Out)
- }
- }
- walk1(uint32(prog.Start))
-}
-
-// find returns the Insts that match the argument predicate function
-func find(prog *syntax.Prog, f func(*syntax.Prog, int) bool) (matches []uint32) {
- matches = []uint32{}
-
- for ip := range prog.Inst {
- if f(prog, ip) {
- matches = append(matches, uint32(ip))
- }
- }
- return
-}
-
-var notOnePass *onePassProg = nil
-
-// compileOnePass returns a new *syntax.Prog suitable for onePass execution if the original Prog
-// can be recharacterized as a one-pass regexp program, or syntax.notOnePass if the
-// Prog cannot be converted. For a one pass prog, the fundamental condition that must
-// be true is: at any InstAlt, there must be no ambiguity about what branch to take.
-func compileOnePass(prog *syntax.Prog) (p *onePassProg) {
- if prog.Start == 0 {
- return notOnePass
- }
- // onepass regexp is anchored
- if prog.Inst[prog.Start].Op != syntax.InstEmptyWidth ||
- syntax.EmptyOp(prog.Inst[prog.Start].Arg)&syntax.EmptyBeginText != syntax.EmptyBeginText {
- return notOnePass
- }
- // every instruction leading to InstMatch must be EmptyEndText
- for _, inst := range prog.Inst {
- opOut := prog.Inst[inst.Out].Op
- switch inst.Op {
- default:
- if opOut == syntax.InstMatch {
- return notOnePass
- }
- case syntax.InstAlt, syntax.InstAltMatch:
- if opOut == syntax.InstMatch || prog.Inst[inst.Arg].Op == syntax.InstMatch {
- return notOnePass
- }
- case syntax.InstEmptyWidth:
- if opOut == syntax.InstMatch {
- if syntax.EmptyOp(inst.Arg)&syntax.EmptyEndText == syntax.EmptyEndText {
- continue
- }
- return notOnePass
- }
- }
- }
- // Creates a slightly optimized copy of the original Prog
- // that cleans up some Prog idioms that block valid onepass programs
- p = onePassCopy(prog)
-
- // checkAmbiguity on InstAlts, build onepass Prog if possible
- p = makeOnePass(p)
-
- if p != notOnePass {
- cleanupOnePass(p, prog)
- }
- return p
-}
diff --git a/src/pkg/regexp/onepass_test.go b/src/pkg/regexp/onepass_test.go
deleted file mode 100644
index 7b2beea67..000000000
--- a/src/pkg/regexp/onepass_test.go
+++ /dev/null
@@ -1,208 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package regexp
-
-import (
- "reflect"
- "regexp/syntax"
- "testing"
-)
-
-var runeMergeTests = []struct {
- left, right, merged []rune
- next []uint32
- leftPC, rightPC uint32
-}{
- {
- // empty rhs
- []rune{69, 69},
- []rune{},
- []rune{69, 69},
- []uint32{1},
- 1, 2,
- },
- {
- // identical runes, identical targets
- []rune{69, 69},
- []rune{69, 69},
- []rune{},
- []uint32{mergeFailed},
- 1, 1,
- },
- {
- // identical runes, different targets
- []rune{69, 69},
- []rune{69, 69},
- []rune{},
- []uint32{mergeFailed},
- 1, 2,
- },
- {
- // append right-first
- []rune{69, 69},
- []rune{71, 71},
- []rune{69, 69, 71, 71},
- []uint32{1, 2},
- 1, 2,
- },
- {
- // append, left-first
- []rune{71, 71},
- []rune{69, 69},
- []rune{69, 69, 71, 71},
- []uint32{2, 1},
- 1, 2,
- },
- {
- // successful interleave
- []rune{60, 60, 71, 71, 101, 101},
- []rune{69, 69, 88, 88},
- []rune{60, 60, 69, 69, 71, 71, 88, 88, 101, 101},
- []uint32{1, 2, 1, 2, 1},
- 1, 2,
- },
- {
- // left surrounds right
- []rune{69, 74},
- []rune{71, 71},
- []rune{},
- []uint32{mergeFailed},
- 1, 2,
- },
- {
- // right surrounds left
- []rune{69, 74},
- []rune{68, 75},
- []rune{},
- []uint32{mergeFailed},
- 1, 2,
- },
- {
- // overlap at interval begin
- []rune{69, 74},
- []rune{74, 75},
- []rune{},
- []uint32{mergeFailed},
- 1, 2,
- },
- {
- // overlap ar interval end
- []rune{69, 74},
- []rune{65, 69},
- []rune{},
- []uint32{mergeFailed},
- 1, 2,
- },
- {
- // overlap from above
- []rune{69, 74},
- []rune{71, 74},
- []rune{},
- []uint32{mergeFailed},
- 1, 2,
- },
- {
- // overlap from below
- []rune{69, 74},
- []rune{65, 71},
- []rune{},
- []uint32{mergeFailed},
- 1, 2,
- },
- {
- // out of order []rune
- []rune{69, 74, 60, 65},
- []rune{66, 67},
- []rune{},
- []uint32{mergeFailed},
- 1, 2,
- },
-}
-
-func TestMergeRuneSet(t *testing.T) {
- for ix, test := range runeMergeTests {
- merged, next := mergeRuneSets(&test.left, &test.right, test.leftPC, test.rightPC)
- if !reflect.DeepEqual(merged, test.merged) {
- t.Errorf("mergeRuneSet :%d (%v, %v) merged\n have\n%v\nwant\n%v", ix, test.left, test.right, merged, test.merged)
- }
- if !reflect.DeepEqual(next, test.next) {
- t.Errorf("mergeRuneSet :%d(%v, %v) next\n have\n%v\nwant\n%v", ix, test.left, test.right, next, test.next)
- }
- }
-}
-
-const noStr = `!`
-
-var onePass = &onePassProg{}
-
-var onePassTests = []struct {
- re string
- onePass *onePassProg
- prog string
-}{
- {`^(?:a|(?:a*))$`, notOnePass, noStr},
- {`^(?:(a)|(?:a*))$`, notOnePass, noStr},
- {`^(?:(?:(?:.(?:$))?))$`, onePass, `a`},
- {`^abcd$`, onePass, `abcd`},
- {`^abcd$`, onePass, `abcde`},
- {`^(?:(?:a{0,})*?)$`, onePass, `a`},
- {`^(?:(?:a+)*)$`, onePass, ``},
- {`^(?:(?:a|(?:aa)))$`, onePass, ``},
- {`^(?:[^\s\S])$`, onePass, ``},
- {`^(?:(?:a{3,4}){0,})$`, notOnePass, `aaaaaa`},
- {`^(?:(?:a+)*)$`, onePass, `a`},
- {`^(?:(?:(?:a*)+))$`, onePass, noStr},
- {`^(?:(?:a+)*)$`, onePass, ``},
- {`^[a-c]+$`, onePass, `abc`},
- {`^[a-c]*$`, onePass, `abcdabc`},
- {`^(?:a*)$`, onePass, `aaaaaaa`},
- {`^(?:(?:aa)|a)$`, onePass, `a`},
- {`^[a-c]*`, notOnePass, `abcdabc`},
- {`^[a-c]*$`, onePass, `abc`},
- {`^...$`, onePass, ``},
- {`^(?:a|(?:aa))$`, onePass, `a`},
- {`^[a-c]*`, notOnePass, `abcabc`},
- {`^a((b))c$`, onePass, noStr},
- {`^a.[l-nA-Cg-j]?e$`, onePass, noStr},
- {`^a((b))$`, onePass, noStr},
- {`^a(?:(b)|(c))c$`, onePass, noStr},
- {`^a(?:(b*)|(c))c$`, notOnePass, noStr},
- {`^a(?:b|c)$`, onePass, noStr},
- {`^a(?:b?|c)$`, onePass, noStr},
- {`^a(?:b?|c?)$`, notOnePass, noStr},
- {`^a(?:b?|c+)$`, onePass, noStr},
- {`^a(?:b+|(bc))d$`, notOnePass, noStr},
- {`^a(?:bc)+$`, onePass, noStr},
- {`^a(?:[bcd])+$`, onePass, noStr},
- {`^a((?:[bcd])+)$`, onePass, noStr},
- {`^a(:?b|c)*d$`, onePass, `abbbccbbcbbd"`},
- {`^.bc(d|e)*$`, onePass, `abcddddddeeeededd`},
- {`^(?:(?:aa)|.)$`, notOnePass, `a`},
- {`^(?:(?:a{1,2}){1,2})$`, notOnePass, `aaaa`},
-}
-
-func TestCompileOnePass(t *testing.T) {
- var (
- p *syntax.Prog
- re *syntax.Regexp
- err error
- )
- for _, test := range onePassTests {
- if re, err = syntax.Parse(test.re, syntax.Perl); err != nil {
- t.Errorf("Parse(%q) got err:%s, want success", test.re, err)
- continue
- }
- // needs to be done before compile...
- re = re.Simplify()
- if p, err = syntax.Compile(re); err != nil {
- t.Errorf("Compile(%q) got err:%s, want success", test.re, err)
- continue
- }
- onePass = compileOnePass(p)
- if (onePass == notOnePass) != (test.onePass == notOnePass) {
- t.Errorf("CompileOnePass(%q) got %v, expected %v", test.re, onePass, test.onePass)
- }
- }
-}
diff --git a/src/pkg/regexp/regexp.go b/src/pkg/regexp/regexp.go
deleted file mode 100644
index 0b8336a04..000000000
--- a/src/pkg/regexp/regexp.go
+++ /dev/null
@@ -1,1120 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package regexp implements regular expression search.
-//
-// The syntax of the regular expressions accepted is the same
-// general syntax used by Perl, Python, and other languages.
-// More precisely, it is the syntax accepted by RE2 and described at
-// http://code.google.com/p/re2/wiki/Syntax, except for \C.
-// For an overview of the syntax, run
-// godoc regexp/syntax
-//
-// The regexp implementation provided by this package is
-// guaranteed to run in time linear in the size of the input.
-// (This is a property not guaranteed by most open source
-// implementations of regular expressions.) For more information
-// about this property, see
-// http://swtch.com/~rsc/regexp/regexp1.html
-// or any book about automata theory.
-//
-// All characters are UTF-8-encoded code points.
-//
-// There are 16 methods of Regexp that match a regular expression and identify
-// the matched text. Their names are matched by this regular expression:
-//
-// Find(All)?(String)?(Submatch)?(Index)?
-//
-// If 'All' is present, the routine matches successive non-overlapping
-// matches of the entire expression. Empty matches abutting a preceding
-// match are ignored. The return value is a slice containing the successive
-// return values of the corresponding non-'All' routine. These routines take
-// an extra integer argument, n; if n >= 0, the function returns at most n
-// matches/submatches.
-//
-// If 'String' is present, the argument is a string; otherwise it is a slice
-// of bytes; return values are adjusted as appropriate.
-//
-// If 'Submatch' is present, the return value is a slice identifying the
-// successive submatches of the expression. Submatches are matches of
-// parenthesized subexpressions (also known as capturing groups) within the
-// regular expression, numbered from left to right in order of opening
-// parenthesis. Submatch 0 is the match of the entire expression, submatch 1
-// the match of the first parenthesized subexpression, and so on.
-//
-// If 'Index' is present, matches and submatches are identified by byte index
-// pairs within the input string: result[2*n:2*n+1] identifies the indexes of
-// the nth submatch. The pair for n==0 identifies the match of the entire
-// expression. If 'Index' is not present, the match is identified by the
-// text of the match/submatch. If an index is negative, it means that
-// subexpression did not match any string in the input.
-//
-// There is also a subset of the methods that can be applied to text read
-// from a RuneReader:
-//
-// MatchReader, FindReaderIndex, FindReaderSubmatchIndex
-//
-// This set may grow. Note that regular expression matches may need to
-// examine text beyond the text returned by a match, so the methods that
-// match text from a RuneReader may read arbitrarily far into the input
-// before returning.
-//
-// (There are a few other methods that do not match this pattern.)
-//
-package regexp
-
-import (
- "bytes"
- "io"
- "regexp/syntax"
- "strconv"
- "strings"
- "sync"
- "unicode"
- "unicode/utf8"
-)
-
-var debug = false
-
-// Regexp is the representation of a compiled regular expression.
-// A Regexp is safe for concurrent use by multiple goroutines.
-type Regexp struct {
- // read-only after Compile
- expr string // as passed to Compile
- prog *syntax.Prog // compiled program
- onepass *onePassProg // onpass program or nil
- prefix string // required prefix in unanchored matches
- prefixBytes []byte // prefix, as a []byte
- prefixComplete bool // prefix is the entire regexp
- prefixRune rune // first rune in prefix
- prefixEnd uint32 // pc for last rune in prefix
- cond syntax.EmptyOp // empty-width conditions required at start of match
- numSubexp int
- subexpNames []string
- longest bool
-
- // cache of machines for running regexp
- mu sync.Mutex
- machine []*machine
-}
-
-// String returns the source text used to compile the regular expression.
-func (re *Regexp) String() string {
- return re.expr
-}
-
-// Compile parses a regular expression and returns, if successful,
-// a Regexp object that can be used to match against text.
-//
-// When matching against text, the regexp returns a match that
-// begins as early as possible in the input (leftmost), and among those
-// it chooses the one that a backtracking search would have found first.
-// This so-called leftmost-first matching is the same semantics
-// that Perl, Python, and other implementations use, although this
-// package implements it without the expense of backtracking.
-// For POSIX leftmost-longest matching, see CompilePOSIX.
-func Compile(expr string) (*Regexp, error) {
- return compile(expr, syntax.Perl, false)
-}
-
-// CompilePOSIX is like Compile but restricts the regular expression
-// to POSIX ERE (egrep) syntax and changes the match semantics to
-// leftmost-longest.
-//
-// That is, when matching against text, the regexp returns a match that
-// begins as early as possible in the input (leftmost), and among those
-// it chooses a match that is as long as possible.
-// This so-called leftmost-longest matching is the same semantics
-// that early regular expression implementations used and that POSIX
-// specifies.
-//
-// However, there can be multiple leftmost-longest matches, with different
-// submatch choices, and here this package diverges from POSIX.
-// Among the possible leftmost-longest matches, this package chooses
-// the one that a backtracking search would have found first, while POSIX
-// specifies that the match be chosen to maximize the length of the first
-// subexpression, then the second, and so on from left to right.
-// The POSIX rule is computationally prohibitive and not even well-defined.
-// See http://swtch.com/~rsc/regexp/regexp2.html#posix for details.
-func CompilePOSIX(expr string) (*Regexp, error) {
- return compile(expr, syntax.POSIX, true)
-}
-
-// Longest makes future searches prefer the leftmost-longest match.
-// That is, when matching against text, the regexp returns a match that
-// begins as early as possible in the input (leftmost), and among those
-// it chooses a match that is as long as possible.
-func (re *Regexp) Longest() {
- re.longest = true
-}
-
-func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) {
- re, err := syntax.Parse(expr, mode)
- if err != nil {
- return nil, err
- }
- maxCap := re.MaxCap()
- capNames := re.CapNames()
-
- re = re.Simplify()
- prog, err := syntax.Compile(re)
- if err != nil {
- return nil, err
- }
- regexp := &Regexp{
- expr: expr,
- prog: prog,
- onepass: compileOnePass(prog),
- numSubexp: maxCap,
- subexpNames: capNames,
- cond: prog.StartCond(),
- longest: longest,
- }
- if regexp.onepass == notOnePass {
- regexp.prefix, regexp.prefixComplete = prog.Prefix()
- } else {
- regexp.prefix, regexp.prefixComplete, regexp.prefixEnd = onePassPrefix(prog)
- }
- if regexp.prefix != "" {
- // TODO(rsc): Remove this allocation by adding
- // IndexString to package bytes.
- regexp.prefixBytes = []byte(regexp.prefix)
- regexp.prefixRune, _ = utf8.DecodeRuneInString(regexp.prefix)
- }
- return regexp, nil
-}
-
-// get returns a machine to use for matching re.
-// It uses the re's machine cache if possible, to avoid
-// unnecessary allocation.
-func (re *Regexp) get() *machine {
- re.mu.Lock()
- if n := len(re.machine); n > 0 {
- z := re.machine[n-1]
- re.machine = re.machine[:n-1]
- re.mu.Unlock()
- return z
- }
- re.mu.Unlock()
- z := progMachine(re.prog, re.onepass)
- z.re = re
- return z
-}
-
-// put returns a machine to the re's machine cache.
-// There is no attempt to limit the size of the cache, so it will
-// grow to the maximum number of simultaneous matches
-// run using re. (The cache empties when re gets garbage collected.)
-func (re *Regexp) put(z *machine) {
- re.mu.Lock()
- re.machine = append(re.machine, z)
- re.mu.Unlock()
-}
-
-// MustCompile is like Compile but panics if the expression cannot be parsed.
-// It simplifies safe initialization of global variables holding compiled regular
-// expressions.
-func MustCompile(str string) *Regexp {
- regexp, error := Compile(str)
- if error != nil {
- panic(`regexp: Compile(` + quote(str) + `): ` + error.Error())
- }
- return regexp
-}
-
-// MustCompilePOSIX is like CompilePOSIX but panics if the expression cannot be parsed.
-// It simplifies safe initialization of global variables holding compiled regular
-// expressions.
-func MustCompilePOSIX(str string) *Regexp {
- regexp, error := CompilePOSIX(str)
- if error != nil {
- panic(`regexp: CompilePOSIX(` + quote(str) + `): ` + error.Error())
- }
- return regexp
-}
-
-func quote(s string) string {
- if strconv.CanBackquote(s) {
- return "`" + s + "`"
- }
- return strconv.Quote(s)
-}
-
-// NumSubexp returns the number of parenthesized subexpressions in this Regexp.
-func (re *Regexp) NumSubexp() int {
- return re.numSubexp
-}
-
-// SubexpNames returns the names of the parenthesized subexpressions
-// in this Regexp. The name for the first sub-expression is names[1],
-// so that if m is a match slice, the name for m[i] is SubexpNames()[i].
-// Since the Regexp as a whole cannot be named, names[0] is always
-// the empty string. The slice should not be modified.
-func (re *Regexp) SubexpNames() []string {
- return re.subexpNames
-}
-
-const endOfText rune = -1
-
-// input abstracts different representations of the input text. It provides
-// one-character lookahead.
-type input interface {
- step(pos int) (r rune, width int) // advance one rune
- canCheckPrefix() bool // can we look ahead without losing info?
- hasPrefix(re *Regexp) bool
- index(re *Regexp, pos int) int
- context(pos int) syntax.EmptyOp
-}
-
-// inputString scans a string.
-type inputString struct {
- str string
-}
-
-func (i *inputString) step(pos int) (rune, int) {
- if pos < len(i.str) {
- c := i.str[pos]
- if c < utf8.RuneSelf {
- return rune(c), 1
- }
- return utf8.DecodeRuneInString(i.str[pos:])
- }
- return endOfText, 0
-}
-
-func (i *inputString) canCheckPrefix() bool {
- return true
-}
-
-func (i *inputString) hasPrefix(re *Regexp) bool {
- return strings.HasPrefix(i.str, re.prefix)
-}
-
-func (i *inputString) index(re *Regexp, pos int) int {
- return strings.Index(i.str[pos:], re.prefix)
-}
-
-func (i *inputString) context(pos int) syntax.EmptyOp {
- r1, r2 := endOfText, endOfText
- if pos > 0 && pos <= len(i.str) {
- r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
- }
- if pos < len(i.str) {
- r2, _ = utf8.DecodeRuneInString(i.str[pos:])
- }
- return syntax.EmptyOpContext(r1, r2)
-}
-
-// inputBytes scans a byte slice.
-type inputBytes struct {
- str []byte
-}
-
-func (i *inputBytes) step(pos int) (rune, int) {
- if pos < len(i.str) {
- c := i.str[pos]
- if c < utf8.RuneSelf {
- return rune(c), 1
- }
- return utf8.DecodeRune(i.str[pos:])
- }
- return endOfText, 0
-}
-
-func (i *inputBytes) canCheckPrefix() bool {
- return true
-}
-
-func (i *inputBytes) hasPrefix(re *Regexp) bool {
- return bytes.HasPrefix(i.str, re.prefixBytes)
-}
-
-func (i *inputBytes) index(re *Regexp, pos int) int {
- return bytes.Index(i.str[pos:], re.prefixBytes)
-}
-
-func (i *inputBytes) context(pos int) syntax.EmptyOp {
- r1, r2 := endOfText, endOfText
- if pos > 0 && pos <= len(i.str) {
- r1, _ = utf8.DecodeLastRune(i.str[:pos])
- }
- if pos < len(i.str) {
- r2, _ = utf8.DecodeRune(i.str[pos:])
- }
- return syntax.EmptyOpContext(r1, r2)
-}
-
-// inputReader scans a RuneReader.
-type inputReader struct {
- r io.RuneReader
- atEOT bool
- pos int
-}
-
-func (i *inputReader) step(pos int) (rune, int) {
- if !i.atEOT && pos != i.pos {
- return endOfText, 0
-
- }
- r, w, err := i.r.ReadRune()
- if err != nil {
- i.atEOT = true
- return endOfText, 0
- }
- i.pos += w
- return r, w
-}
-
-func (i *inputReader) canCheckPrefix() bool {
- return false
-}
-
-func (i *inputReader) hasPrefix(re *Regexp) bool {
- return false
-}
-
-func (i *inputReader) index(re *Regexp, pos int) int {
- return -1
-}
-
-func (i *inputReader) context(pos int) syntax.EmptyOp {
- return 0
-}
-
-// LiteralPrefix returns a literal string that must begin any match
-// of the regular expression re. It returns the boolean true if the
-// literal string comprises the entire regular expression.
-func (re *Regexp) LiteralPrefix() (prefix string, complete bool) {
- return re.prefix, re.prefixComplete
-}
-
-// MatchReader reports whether the Regexp matches the text read by the
-// RuneReader.
-func (re *Regexp) MatchReader(r io.RuneReader) bool {
- return re.doExecute(r, nil, "", 0, 0) != nil
-}
-
-// MatchString reports whether the Regexp matches the string s.
-func (re *Regexp) MatchString(s string) bool {
- return re.doExecute(nil, nil, s, 0, 0) != nil
-}
-
-// Match reports whether the Regexp matches the byte slice b.
-func (re *Regexp) Match(b []byte) bool {
- return re.doExecute(nil, b, "", 0, 0) != nil
-}
-
-// MatchReader checks whether a textual regular expression matches the text
-// read by the RuneReader. More complicated queries need to use Compile and
-// the full Regexp interface.
-func MatchReader(pattern string, r io.RuneReader) (matched bool, err error) {
- re, err := Compile(pattern)
- if err != nil {
- return false, err
- }
- return re.MatchReader(r), nil
-}
-
-// MatchString checks whether a textual regular expression
-// matches a string. More complicated queries need
-// to use Compile and the full Regexp interface.
-func MatchString(pattern string, s string) (matched bool, err error) {
- re, err := Compile(pattern)
- if err != nil {
- return false, err
- }
- return re.MatchString(s), nil
-}
-
-// Match checks whether a textual regular expression
-// matches a byte slice. More complicated queries need
-// to use Compile and the full Regexp interface.
-func Match(pattern string, b []byte) (matched bool, err error) {
- re, err := Compile(pattern)
- if err != nil {
- return false, err
- }
- return re.Match(b), nil
-}
-
-// ReplaceAllString returns a copy of src, replacing matches of the Regexp
-// with the replacement string repl. Inside repl, $ signs are interpreted as
-// in Expand, so for instance $1 represents the text of the first submatch.
-func (re *Regexp) ReplaceAllString(src, repl string) string {
- n := 2
- if strings.Index(repl, "$") >= 0 {
- n = 2 * (re.numSubexp + 1)
- }
- b := re.replaceAll(nil, src, n, func(dst []byte, match []int) []byte {
- return re.expand(dst, repl, nil, src, match)
- })
- return string(b)
-}
-
-// ReplaceAllStringLiteral returns a copy of src, replacing matches of the Regexp
-// with the replacement string repl. The replacement repl is substituted directly,
-// without using Expand.
-func (re *Regexp) ReplaceAllLiteralString(src, repl string) string {
- return string(re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte {
- return append(dst, repl...)
- }))
-}
-
-// ReplaceAllStringFunc returns a copy of src in which all matches of the
-// Regexp have been replaced by the return value of function repl applied
-// to the matched substring. The replacement returned by repl is substituted
-// directly, without using Expand.
-func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string {
- b := re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte {
- return append(dst, repl(src[match[0]:match[1]])...)
- })
- return string(b)
-}
-
-func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst []byte, m []int) []byte) []byte {
- lastMatchEnd := 0 // end position of the most recent match
- searchPos := 0 // position where we next look for a match
- var buf []byte
- var endPos int
- if bsrc != nil {
- endPos = len(bsrc)
- } else {
- endPos = len(src)
- }
- for searchPos <= endPos {
- a := re.doExecute(nil, bsrc, src, searchPos, nmatch)
- if len(a) == 0 {
- break // no more matches
- }
-
- // Copy the unmatched characters before this match.
- if bsrc != nil {
- buf = append(buf, bsrc[lastMatchEnd:a[0]]...)
- } else {
- buf = append(buf, src[lastMatchEnd:a[0]]...)
- }
-
- // Now insert a copy of the replacement string, but not for a
- // match of the empty string immediately after another match.
- // (Otherwise, we get double replacement for patterns that
- // match both empty and nonempty strings.)
- if a[1] > lastMatchEnd || a[0] == 0 {
- buf = repl(buf, a)
- }
- lastMatchEnd = a[1]
-
- // Advance past this match; always advance at least one character.
- var width int
- if bsrc != nil {
- _, width = utf8.DecodeRune(bsrc[searchPos:])
- } else {
- _, width = utf8.DecodeRuneInString(src[searchPos:])
- }
- if searchPos+width > a[1] {
- searchPos += width
- } else if searchPos+1 > a[1] {
- // This clause is only needed at the end of the input
- // string. In that case, DecodeRuneInString returns width=0.
- searchPos++
- } else {
- searchPos = a[1]
- }
- }
-
- // Copy the unmatched characters after the last match.
- if bsrc != nil {
- buf = append(buf, bsrc[lastMatchEnd:]...)
- } else {
- buf = append(buf, src[lastMatchEnd:]...)
- }
-
- return buf
-}
-
-// ReplaceAll returns a copy of src, replacing matches of the Regexp
-// with the replacement text repl. Inside repl, $ signs are interpreted as
-// in Expand, so for instance $1 represents the text of the first submatch.
-func (re *Regexp) ReplaceAll(src, repl []byte) []byte {
- n := 2
- if bytes.IndexByte(repl, '$') >= 0 {
- n = 2 * (re.numSubexp + 1)
- }
- srepl := ""
- b := re.replaceAll(src, "", n, func(dst []byte, match []int) []byte {
- if len(srepl) != len(repl) {
- srepl = string(repl)
- }
- return re.expand(dst, srepl, src, "", match)
- })
- return b
-}
-
-// ReplaceAllLiteral returns a copy of src, replacing matches of the Regexp
-// with the replacement bytes repl. The replacement repl is substituted directly,
-// without using Expand.
-func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte {
- return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte {
- return append(dst, repl...)
- })
-}
-
-// ReplaceAllFunc returns a copy of src in which all matches of the
-// Regexp have been replaced by the return value of function repl applied
-// to the matched byte slice. The replacement returned by repl is substituted
-// directly, without using Expand.
-func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
- return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte {
- return append(dst, repl(src[match[0]:match[1]])...)
- })
-}
-
-var specialBytes = []byte(`\.+*?()|[]{}^$`)
-
-func special(b byte) bool {
- return bytes.IndexByte(specialBytes, b) >= 0
-}
-
-// QuoteMeta returns a string that quotes all regular expression metacharacters
-// inside the argument text; the returned string is a regular expression matching
-// the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`.
-func QuoteMeta(s string) string {
- b := make([]byte, 2*len(s))
-
- // A byte loop is correct because all metacharacters are ASCII.
- j := 0
- for i := 0; i < len(s); i++ {
- if special(s[i]) {
- b[j] = '\\'
- j++
- }
- b[j] = s[i]
- j++
- }
- return string(b[0:j])
-}
-
-// The number of capture values in the program may correspond
-// to fewer capturing expressions than are in the regexp.
-// For example, "(a){0}" turns into an empty program, so the
-// maximum capture in the program is 0 but we need to return
-// an expression for \1. Pad appends -1s to the slice a as needed.
-func (re *Regexp) pad(a []int) []int {
- if a == nil {
- // No match.
- return nil
- }
- n := (1 + re.numSubexp) * 2
- for len(a) < n {
- a = append(a, -1)
- }
- return a
-}
-
-// Find matches in slice b if b is non-nil, otherwise find matches in string s.
-func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
- var end int
- if b == nil {
- end = len(s)
- } else {
- end = len(b)
- }
-
- for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; {
- matches := re.doExecute(nil, b, s, pos, re.prog.NumCap)
- if len(matches) == 0 {
- break
- }
-
- accept := true
- if matches[1] == pos {
- // We've found an empty match.
- if matches[0] == prevMatchEnd {
- // We don't allow an empty match right
- // after a previous match, so ignore it.
- accept = false
- }
- var width int
- // TODO: use step()
- if b == nil {
- _, width = utf8.DecodeRuneInString(s[pos:end])
- } else {
- _, width = utf8.DecodeRune(b[pos:end])
- }
- if width > 0 {
- pos += width
- } else {
- pos = end + 1
- }
- } else {
- pos = matches[1]
- }
- prevMatchEnd = matches[1]
-
- if accept {
- deliver(re.pad(matches))
- i++
- }
- }
-}
-
-// Find returns a slice holding the text of the leftmost match in b of the regular expression.
-// A return value of nil indicates no match.
-func (re *Regexp) Find(b []byte) []byte {
- a := re.doExecute(nil, b, "", 0, 2)
- if a == nil {
- return nil
- }
- return b[a[0]:a[1]]
-}
-
-// FindIndex returns a two-element slice of integers defining the location of
-// the leftmost match in b of the regular expression. The match itself is at
-// b[loc[0]:loc[1]].
-// A return value of nil indicates no match.
-func (re *Regexp) FindIndex(b []byte) (loc []int) {
- a := re.doExecute(nil, b, "", 0, 2)
- if a == nil {
- return nil
- }
- return a[0:2]
-}
-
-// FindString returns a string holding the text of the leftmost match in s of the regular
-// expression. If there is no match, the return value is an empty string,
-// but it will also be empty if the regular expression successfully matches
-// an empty string. Use FindStringIndex or FindStringSubmatch if it is
-// necessary to distinguish these cases.
-func (re *Regexp) FindString(s string) string {
- a := re.doExecute(nil, nil, s, 0, 2)
- if a == nil {
- return ""
- }
- return s[a[0]:a[1]]
-}
-
-// FindStringIndex returns a two-element slice of integers defining the
-// location of the leftmost match in s of the regular expression. The match
-// itself is at s[loc[0]:loc[1]].
-// A return value of nil indicates no match.
-func (re *Regexp) FindStringIndex(s string) (loc []int) {
- a := re.doExecute(nil, nil, s, 0, 2)
- if a == nil {
- return nil
- }
- return a[0:2]
-}
-
-// FindReaderIndex returns a two-element slice of integers defining the
-// location of the leftmost match of the regular expression in text read from
-// the RuneReader. The match text was found in the input stream at
-// byte offset loc[0] through loc[1]-1.
-// A return value of nil indicates no match.
-func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) {
- a := re.doExecute(r, nil, "", 0, 2)
- if a == nil {
- return nil
- }
- return a[0:2]
-}
-
-// FindSubmatch returns a slice of slices holding the text of the leftmost
-// match of the regular expression in b and the matches, if any, of its
-// subexpressions, as defined by the 'Submatch' descriptions in the package
-// comment.
-// A return value of nil indicates no match.
-func (re *Regexp) FindSubmatch(b []byte) [][]byte {
- a := re.doExecute(nil, b, "", 0, re.prog.NumCap)
- if a == nil {
- return nil
- }
- ret := make([][]byte, 1+re.numSubexp)
- for i := range ret {
- if 2*i < len(a) && a[2*i] >= 0 {
- ret[i] = b[a[2*i]:a[2*i+1]]
- }
- }
- return ret
-}
-
-// Expand appends template to dst and returns the result; during the
-// append, Expand replaces variables in the template with corresponding
-// matches drawn from src. The match slice should have been returned by
-// FindSubmatchIndex.
-//
-// In the template, a variable is denoted by a substring of the form
-// $name or ${name}, where name is a non-empty sequence of letters,
-// digits, and underscores. A purely numeric name like $1 refers to
-// the submatch with the corresponding index; other names refer to
-// capturing parentheses named with the (?P<name>...) syntax. A
-// reference to an out of range or unmatched index or a name that is not
-// present in the regular expression is replaced with an empty slice.
-//
-// In the $name form, name is taken to be as long as possible: $1x is
-// equivalent to ${1x}, not ${1}x, and, $10 is equivalent to ${10}, not ${1}0.
-//
-// To insert a literal $ in the output, use $$ in the template.
-func (re *Regexp) Expand(dst []byte, template []byte, src []byte, match []int) []byte {
- return re.expand(dst, string(template), src, "", match)
-}
-
-// ExpandString is like Expand but the template and source are strings.
-// It appends to and returns a byte slice in order to give the calling
-// code control over allocation.
-func (re *Regexp) ExpandString(dst []byte, template string, src string, match []int) []byte {
- return re.expand(dst, template, nil, src, match)
-}
-
-func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, match []int) []byte {
- for len(template) > 0 {
- i := strings.Index(template, "$")
- if i < 0 {
- break
- }
- dst = append(dst, template[:i]...)
- template = template[i:]
- if len(template) > 1 && template[1] == '$' {
- // Treat $$ as $.
- dst = append(dst, '$')
- template = template[2:]
- continue
- }
- name, num, rest, ok := extract(template)
- if !ok {
- // Malformed; treat $ as raw text.
- dst = append(dst, '$')
- template = template[1:]
- continue
- }
- template = rest
- if num >= 0 {
- if 2*num+1 < len(match) && match[2*num] >= 0 {
- if bsrc != nil {
- dst = append(dst, bsrc[match[2*num]:match[2*num+1]]...)
- } else {
- dst = append(dst, src[match[2*num]:match[2*num+1]]...)
- }
- }
- } else {
- for i, namei := range re.subexpNames {
- if name == namei && 2*i+1 < len(match) && match[2*i] >= 0 {
- if bsrc != nil {
- dst = append(dst, bsrc[match[2*i]:match[2*i+1]]...)
- } else {
- dst = append(dst, src[match[2*i]:match[2*i+1]]...)
- }
- break
- }
- }
- }
- }
- dst = append(dst, template...)
- return dst
-}
-
-// extract returns the name from a leading "$name" or "${name}" in str.
-// If it is a number, extract returns num set to that number; otherwise num = -1.
-func extract(str string) (name string, num int, rest string, ok bool) {
- if len(str) < 2 || str[0] != '$' {
- return
- }
- brace := false
- if str[1] == '{' {
- brace = true
- str = str[2:]
- } else {
- str = str[1:]
- }
- i := 0
- for i < len(str) {
- rune, size := utf8.DecodeRuneInString(str[i:])
- if !unicode.IsLetter(rune) && !unicode.IsDigit(rune) && rune != '_' {
- break
- }
- i += size
- }
- if i == 0 {
- // empty name is not okay
- return
- }
- name = str[:i]
- if brace {
- if i >= len(str) || str[i] != '}' {
- // missing closing brace
- return
- }
- i++
- }
-
- // Parse number.
- num = 0
- for i := 0; i < len(name); i++ {
- if name[i] < '0' || '9' < name[i] || num >= 1e8 {
- num = -1
- break
- }
- num = num*10 + int(name[i]) - '0'
- }
- // Disallow leading zeros.
- if name[0] == '0' && len(name) > 1 {
- num = -1
- }
-
- rest = str[i:]
- ok = true
- return
-}
-
-// FindSubmatchIndex returns a slice holding the index pairs identifying the
-// leftmost match of the regular expression in b and the matches, if any, of
-// its subexpressions, as defined by the 'Submatch' and 'Index' descriptions
-// in the package comment.
-// A return value of nil indicates no match.
-func (re *Regexp) FindSubmatchIndex(b []byte) []int {
- return re.pad(re.doExecute(nil, b, "", 0, re.prog.NumCap))
-}
-
-// FindStringSubmatch returns a slice of strings holding the text of the
-// leftmost match of the regular expression in s and the matches, if any, of
-// its subexpressions, as defined by the 'Submatch' description in the
-// package comment.
-// A return value of nil indicates no match.
-func (re *Regexp) FindStringSubmatch(s string) []string {
- a := re.doExecute(nil, nil, s, 0, re.prog.NumCap)
- if a == nil {
- return nil
- }
- ret := make([]string, 1+re.numSubexp)
- for i := range ret {
- if 2*i < len(a) && a[2*i] >= 0 {
- ret[i] = s[a[2*i]:a[2*i+1]]
- }
- }
- return ret
-}
-
-// FindStringSubmatchIndex returns a slice holding the index pairs
-// identifying the leftmost match of the regular expression in s and the
-// matches, if any, of its subexpressions, as defined by the 'Submatch' and
-// 'Index' descriptions in the package comment.
-// A return value of nil indicates no match.
-func (re *Regexp) FindStringSubmatchIndex(s string) []int {
- return re.pad(re.doExecute(nil, nil, s, 0, re.prog.NumCap))
-}
-
-// FindReaderSubmatchIndex returns a slice holding the index pairs
-// identifying the leftmost match of the regular expression of text read by
-// the RuneReader, and the matches, if any, of its subexpressions, as defined
-// by the 'Submatch' and 'Index' descriptions in the package comment. A
-// return value of nil indicates no match.
-func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int {
- return re.pad(re.doExecute(r, nil, "", 0, re.prog.NumCap))
-}
-
-const startSize = 10 // The size at which to start a slice in the 'All' routines.
-
-// FindAll is the 'All' version of Find; it returns a slice of all successive
-// matches of the expression, as defined by the 'All' description in the
-// package comment.
-// A return value of nil indicates no match.
-func (re *Regexp) FindAll(b []byte, n int) [][]byte {
- if n < 0 {
- n = len(b) + 1
- }
- result := make([][]byte, 0, startSize)
- re.allMatches("", b, n, func(match []int) {
- result = append(result, b[match[0]:match[1]])
- })
- if len(result) == 0 {
- return nil
- }
- return result
-}
-
-// FindAllIndex is the 'All' version of FindIndex; it returns a slice of all
-// successive matches of the expression, as defined by the 'All' description
-// in the package comment.
-// A return value of nil indicates no match.
-func (re *Regexp) FindAllIndex(b []byte, n int) [][]int {
- if n < 0 {
- n = len(b) + 1
- }
- result := make([][]int, 0, startSize)
- re.allMatches("", b, n, func(match []int) {
- result = append(result, match[0:2])
- })
- if len(result) == 0 {
- return nil
- }
- return result
-}
-
-// FindAllString is the 'All' version of FindString; it returns a slice of all
-// successive matches of the expression, as defined by the 'All' description
-// in the package comment.
-// A return value of nil indicates no match.
-func (re *Regexp) FindAllString(s string, n int) []string {
- if n < 0 {
- n = len(s) + 1
- }
- result := make([]string, 0, startSize)
- re.allMatches(s, nil, n, func(match []int) {
- result = append(result, s[match[0]:match[1]])
- })
- if len(result) == 0 {
- return nil
- }
- return result
-}
-
-// FindAllStringIndex is the 'All' version of FindStringIndex; it returns a
-// slice of all successive matches of the expression, as defined by the 'All'
-// description in the package comment.
-// A return value of nil indicates no match.
-func (re *Regexp) FindAllStringIndex(s string, n int) [][]int {
- if n < 0 {
- n = len(s) + 1
- }
- result := make([][]int, 0, startSize)
- re.allMatches(s, nil, n, func(match []int) {
- result = append(result, match[0:2])
- })
- if len(result) == 0 {
- return nil
- }
- return result
-}
-
-// FindAllSubmatch is the 'All' version of FindSubmatch; it returns a slice
-// of all successive matches of the expression, as defined by the 'All'
-// description in the package comment.
-// A return value of nil indicates no match.
-func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte {
- if n < 0 {
- n = len(b) + 1
- }
- result := make([][][]byte, 0, startSize)
- re.allMatches("", b, n, func(match []int) {
- slice := make([][]byte, len(match)/2)
- for j := range slice {
- if match[2*j] >= 0 {
- slice[j] = b[match[2*j]:match[2*j+1]]
- }
- }
- result = append(result, slice)
- })
- if len(result) == 0 {
- return nil
- }
- return result
-}
-
-// FindAllSubmatchIndex is the 'All' version of FindSubmatchIndex; it returns
-// a slice of all successive matches of the expression, as defined by the
-// 'All' description in the package comment.
-// A return value of nil indicates no match.
-func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int {
- if n < 0 {
- n = len(b) + 1
- }
- result := make([][]int, 0, startSize)
- re.allMatches("", b, n, func(match []int) {
- result = append(result, match)
- })
- if len(result) == 0 {
- return nil
- }
- return result
-}
-
-// FindAllStringSubmatch is the 'All' version of FindStringSubmatch; it
-// returns a slice of all successive matches of the expression, as defined by
-// the 'All' description in the package comment.
-// A return value of nil indicates no match.
-func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string {
- if n < 0 {
- n = len(s) + 1
- }
- result := make([][]string, 0, startSize)
- re.allMatches(s, nil, n, func(match []int) {
- slice := make([]string, len(match)/2)
- for j := range slice {
- if match[2*j] >= 0 {
- slice[j] = s[match[2*j]:match[2*j+1]]
- }
- }
- result = append(result, slice)
- })
- if len(result) == 0 {
- return nil
- }
- return result
-}
-
-// FindAllStringSubmatchIndex is the 'All' version of
-// FindStringSubmatchIndex; it returns a slice of all successive matches of
-// the expression, as defined by the 'All' description in the package
-// comment.
-// A return value of nil indicates no match.
-func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int {
- if n < 0 {
- n = len(s) + 1
- }
- result := make([][]int, 0, startSize)
- re.allMatches(s, nil, n, func(match []int) {
- result = append(result, match)
- })
- if len(result) == 0 {
- return nil
- }
- return result
-}
-
-// Split slices s into substrings separated by the expression and returns a slice of
-// the substrings between those expression matches.
-//
-// The slice returned by this method consists of all the substrings of s
-// not contained in the slice returned by FindAllString. When called on an expression
-// that contains no metacharacters, it is equivalent to strings.SplitN.
-//
-// Example:
-// s := regexp.MustCompile("a*").Split("abaabaccadaaae", 5)
-// // s: ["", "b", "b", "c", "cadaaae"]
-//
-// The count determines the number of substrings to return:
-// n > 0: at most n substrings; the last substring will be the unsplit remainder.
-// n == 0: the result is nil (zero substrings)
-// n < 0: all substrings
-func (re *Regexp) Split(s string, n int) []string {
-
- if n == 0 {
- return nil
- }
-
- if len(re.expr) > 0 && len(s) == 0 {
- return []string{""}
- }
-
- matches := re.FindAllStringIndex(s, n)
- strings := make([]string, 0, len(matches))
-
- beg := 0
- end := 0
- for _, match := range matches {
- if n > 0 && len(strings) >= n-1 {
- break
- }
-
- end = match[0]
- if match[1] != 0 {
- strings = append(strings, s[beg:end])
- }
- beg = match[1]
- }
-
- if end != len(s) {
- strings = append(strings, s[beg:])
- }
-
- return strings
-}
diff --git a/src/pkg/regexp/syntax/compile.go b/src/pkg/regexp/syntax/compile.go
deleted file mode 100644
index 95f6f1569..000000000
--- a/src/pkg/regexp/syntax/compile.go
+++ /dev/null
@@ -1,289 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package syntax
-
-import "unicode"
-
-// A patchList is a list of instruction pointers that need to be filled in (patched).
-// Because the pointers haven't been filled in yet, we can reuse their storage
-// to hold the list. It's kind of sleazy, but works well in practice.
-// See http://swtch.com/~rsc/regexp/regexp1.html for inspiration.
-//
-// These aren't really pointers: they're integers, so we can reinterpret them
-// this way without using package unsafe. A value l denotes
-// p.inst[l>>1].Out (l&1==0) or .Arg (l&1==1).
-// l == 0 denotes the empty list, okay because we start every program
-// with a fail instruction, so we'll never want to point at its output link.
-type patchList uint32
-
-func (l patchList) next(p *Prog) patchList {
- i := &p.Inst[l>>1]
- if l&1 == 0 {
- return patchList(i.Out)
- }
- return patchList(i.Arg)
-}
-
-func (l patchList) patch(p *Prog, val uint32) {
- for l != 0 {
- i := &p.Inst[l>>1]
- if l&1 == 0 {
- l = patchList(i.Out)
- i.Out = val
- } else {
- l = patchList(i.Arg)
- i.Arg = val
- }
- }
-}
-
-func (l1 patchList) append(p *Prog, l2 patchList) patchList {
- if l1 == 0 {
- return l2
- }
- if l2 == 0 {
- return l1
- }
-
- last := l1
- for {
- next := last.next(p)
- if next == 0 {
- break
- }
- last = next
- }
-
- i := &p.Inst[last>>1]
- if last&1 == 0 {
- i.Out = uint32(l2)
- } else {
- i.Arg = uint32(l2)
- }
- return l1
-}
-
-// A frag represents a compiled program fragment.
-type frag struct {
- i uint32 // index of first instruction
- out patchList // where to record end instruction
-}
-
-type compiler struct {
- p *Prog
-}
-
-// Compile compiles the regexp into a program to be executed.
-// The regexp should have been simplified already (returned from re.Simplify).
-func Compile(re *Regexp) (*Prog, error) {
- var c compiler
- c.init()
- f := c.compile(re)
- f.out.patch(c.p, c.inst(InstMatch).i)
- c.p.Start = int(f.i)
- return c.p, nil
-}
-
-func (c *compiler) init() {
- c.p = new(Prog)
- c.p.NumCap = 2 // implicit ( and ) for whole match $0
- c.inst(InstFail)
-}
-
-var anyRuneNotNL = []rune{0, '\n' - 1, '\n' + 1, unicode.MaxRune}
-var anyRune = []rune{0, unicode.MaxRune}
-
-func (c *compiler) compile(re *Regexp) frag {
- switch re.Op {
- case OpNoMatch:
- return c.fail()
- case OpEmptyMatch:
- return c.nop()
- case OpLiteral:
- if len(re.Rune) == 0 {
- return c.nop()
- }
- var f frag
- for j := range re.Rune {
- f1 := c.rune(re.Rune[j:j+1], re.Flags)
- if j == 0 {
- f = f1
- } else {
- f = c.cat(f, f1)
- }
- }
- return f
- case OpCharClass:
- return c.rune(re.Rune, re.Flags)
- case OpAnyCharNotNL:
- return c.rune(anyRuneNotNL, 0)
- case OpAnyChar:
- return c.rune(anyRune, 0)
- case OpBeginLine:
- return c.empty(EmptyBeginLine)
- case OpEndLine:
- return c.empty(EmptyEndLine)
- case OpBeginText:
- return c.empty(EmptyBeginText)
- case OpEndText:
- return c.empty(EmptyEndText)
- case OpWordBoundary:
- return c.empty(EmptyWordBoundary)
- case OpNoWordBoundary:
- return c.empty(EmptyNoWordBoundary)
- case OpCapture:
- bra := c.cap(uint32(re.Cap << 1))
- sub := c.compile(re.Sub[0])
- ket := c.cap(uint32(re.Cap<<1 | 1))
- return c.cat(c.cat(bra, sub), ket)
- case OpStar:
- return c.star(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0)
- case OpPlus:
- return c.plus(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0)
- case OpQuest:
- return c.quest(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0)
- case OpConcat:
- if len(re.Sub) == 0 {
- return c.nop()
- }
- var f frag
- for i, sub := range re.Sub {
- if i == 0 {
- f = c.compile(sub)
- } else {
- f = c.cat(f, c.compile(sub))
- }
- }
- return f
- case OpAlternate:
- var f frag
- for _, sub := range re.Sub {
- f = c.alt(f, c.compile(sub))
- }
- return f
- }
- panic("regexp: unhandled case in compile")
-}
-
-func (c *compiler) inst(op InstOp) frag {
- // TODO: impose length limit
- f := frag{i: uint32(len(c.p.Inst))}
- c.p.Inst = append(c.p.Inst, Inst{Op: op})
- return f
-}
-
-func (c *compiler) nop() frag {
- f := c.inst(InstNop)
- f.out = patchList(f.i << 1)
- return f
-}
-
-func (c *compiler) fail() frag {
- return frag{}
-}
-
-func (c *compiler) cap(arg uint32) frag {
- f := c.inst(InstCapture)
- f.out = patchList(f.i << 1)
- c.p.Inst[f.i].Arg = arg
-
- if c.p.NumCap < int(arg)+1 {
- c.p.NumCap = int(arg) + 1
- }
- return f
-}
-
-func (c *compiler) cat(f1, f2 frag) frag {
- // concat of failure is failure
- if f1.i == 0 || f2.i == 0 {
- return frag{}
- }
-
- // TODO: elide nop
-
- f1.out.patch(c.p, f2.i)
- return frag{f1.i, f2.out}
-}
-
-func (c *compiler) alt(f1, f2 frag) frag {
- // alt of failure is other
- if f1.i == 0 {
- return f2
- }
- if f2.i == 0 {
- return f1
- }
-
- f := c.inst(InstAlt)
- i := &c.p.Inst[f.i]
- i.Out = f1.i
- i.Arg = f2.i
- f.out = f1.out.append(c.p, f2.out)
- return f
-}
-
-func (c *compiler) quest(f1 frag, nongreedy bool) frag {
- f := c.inst(InstAlt)
- i := &c.p.Inst[f.i]
- if nongreedy {
- i.Arg = f1.i
- f.out = patchList(f.i << 1)
- } else {
- i.Out = f1.i
- f.out = patchList(f.i<<1 | 1)
- }
- f.out = f.out.append(c.p, f1.out)
- return f
-}
-
-func (c *compiler) star(f1 frag, nongreedy bool) frag {
- f := c.inst(InstAlt)
- i := &c.p.Inst[f.i]
- if nongreedy {
- i.Arg = f1.i
- f.out = patchList(f.i << 1)
- } else {
- i.Out = f1.i
- f.out = patchList(f.i<<1 | 1)
- }
- f1.out.patch(c.p, f.i)
- return f
-}
-
-func (c *compiler) plus(f1 frag, nongreedy bool) frag {
- return frag{f1.i, c.star(f1, nongreedy).out}
-}
-
-func (c *compiler) empty(op EmptyOp) frag {
- f := c.inst(InstEmptyWidth)
- c.p.Inst[f.i].Arg = uint32(op)
- f.out = patchList(f.i << 1)
- return f
-}
-
-func (c *compiler) rune(r []rune, flags Flags) frag {
- f := c.inst(InstRune)
- i := &c.p.Inst[f.i]
- i.Rune = r
- flags &= FoldCase // only relevant flag is FoldCase
- if len(r) != 1 || unicode.SimpleFold(r[0]) == r[0] {
- // and sometimes not even that
- flags &^= FoldCase
- }
- i.Arg = uint32(flags)
- f.out = patchList(f.i << 1)
-
- // Special cases for exec machine.
- switch {
- case flags&FoldCase == 0 && (len(r) == 1 || len(r) == 2 && r[0] == r[1]):
- i.Op = InstRune1
- case len(r) == 2 && r[0] == 0 && r[1] == unicode.MaxRune:
- i.Op = InstRuneAny
- case len(r) == 4 && r[0] == 0 && r[1] == '\n'-1 && r[2] == '\n'+1 && r[3] == unicode.MaxRune:
- i.Op = InstRuneAnyNotNL
- }
-
- return f
-}
diff --git a/src/pkg/regexp/syntax/doc.go b/src/pkg/regexp/syntax/doc.go
deleted file mode 100644
index 8e72c90d3..000000000
--- a/src/pkg/regexp/syntax/doc.go
+++ /dev/null
@@ -1,131 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// DO NOT EDIT. This file is generated by mksyntaxgo from the RE2 distribution.
-
-/*
-Package syntax parses regular expressions into parse trees and compiles
-parse trees into programs. Most clients of regular expressions will use the
-facilities of package regexp (such as Compile and Match) instead of this package.
-
-Syntax
-
-The regular expression syntax understood by this package when parsing with the Perl flag is as follows.
-Parts of the syntax can be disabled by passing alternate flags to Parse.
-
-
-Single characters:
- . any character, possibly including newline (flag s=true)
- [xyz] character class
- [^xyz] negated character class
- \d Perl character class
- \D negated Perl character class
- [:alpha:] ASCII character class
- [:^alpha:] negated ASCII character class
- \pN Unicode character class (one-letter name)
- \p{Greek} Unicode character class
- \PN negated Unicode character class (one-letter name)
- \P{Greek} negated Unicode character class
-
-Composites:
- xy x followed by y
- x|y x or y (prefer x)
-
-Repetitions:
- x* zero or more x, prefer more
- x+ one or more x, prefer more
- x? zero or one x, prefer one
- x{n,m} n or n+1 or ... or m x, prefer more
- x{n,} n or more x, prefer more
- x{n} exactly n x
- x*? zero or more x, prefer fewer
- x+? one or more x, prefer fewer
- x?? zero or one x, prefer zero
- x{n,m}? n or n+1 or ... or m x, prefer fewer
- x{n,}? n or more x, prefer fewer
- x{n}? exactly n x
-
-Implementation restriction: The counting forms x{n} etc. (but not the other
-forms x* etc.) have an upper limit of n=1000. Negative or higher explicit
-counts yield the parse error ErrInvalidRepeatSize.
-
-Grouping:
- (re) numbered capturing group (submatch)
- (?P<name>re) named & numbered capturing group (submatch)
- (?:re) non-capturing group (submatch)
- (?flags) set flags within current group; non-capturing
- (?flags:re) set flags during re; non-capturing
-
- Flag syntax is xyz (set) or -xyz (clear) or xy-z (set xy, clear z). The flags are:
-
- i case-insensitive (default false)
- m multi-line mode: ^ and $ match begin/end line in addition to begin/end text (default false)
- s let . match \n (default false)
- U ungreedy: swap meaning of x* and x*?, x+ and x+?, etc (default false)
-
-Empty strings:
- ^ at beginning of text or line (flag m=true)
- $ at end of text (like \z not \Z) or line (flag m=true)
- \A at beginning of text
- \b at ASCII word boundary (\w on one side and \W, \A, or \z on the other)
- \B not an ASCII word boundary
- \z at end of text
-
-Escape sequences:
- \a bell (== \007)
- \f form feed (== \014)
- \t horizontal tab (== \011)
- \n newline (== \012)
- \r carriage return (== \015)
- \v vertical tab character (== \013)
- \* literal *, for any punctuation character *
- \123 octal character code (up to three digits)
- \x7F hex character code (exactly two digits)
- \x{10FFFF} hex character code
- \Q...\E literal text ... even if ... has punctuation
-
-Character class elements:
- x single character
- A-Z character range (inclusive)
- \d Perl character class
- [:foo:] ASCII character class foo
- \p{Foo} Unicode character class Foo
- \pF Unicode character class F (one-letter name)
-
-Named character classes as character class elements:
- [\d] digits (== \d)
- [^\d] not digits (== \D)
- [\D] not digits (== \D)
- [^\D] not not digits (== \d)
- [[:name:]] named ASCII class inside character class (== [:name:])
- [^[:name:]] named ASCII class inside negated character class (== [:^name:])
- [\p{Name}] named Unicode property inside character class (== \p{Name})
- [^\p{Name}] named Unicode property inside negated character class (== \P{Name})
-
-Perl character classes:
- \d digits (== [0-9])
- \D not digits (== [^0-9])
- \s whitespace (== [\t\n\f\r ])
- \S not whitespace (== [^\t\n\f\r ])
- \w ASCII word characters (== [0-9A-Za-z_])
- \W not ASCII word characters (== [^0-9A-Za-z_])
-
-ASCII character classes:
- [:alnum:] alphanumeric (== [0-9A-Za-z])
- [:alpha:] alphabetic (== [A-Za-z])
- [:ascii:] ASCII (== [\x00-\x7F])
- [:blank:] blank (== [\t ])
- [:cntrl:] control (== [\x00-\x1F\x7F])
- [:digit:] digits (== [0-9])
- [:graph:] graphical (== [!-~] == [A-Za-z0-9!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~])
- [:lower:] lower case (== [a-z])
- [:print:] printable (== [ -~] == [ [:graph:]])
- [:punct:] punctuation (== [!-/:-@[-`{-~])
- [:space:] whitespace (== [\t\n\v\f\r ])
- [:upper:] upper case (== [A-Z])
- [:word:] word characters (== [0-9A-Za-z_])
- [:xdigit:] hex digit (== [0-9A-Fa-f])
-
-*/
-package syntax
diff --git a/src/pkg/regexp/syntax/make_perl_groups.pl b/src/pkg/regexp/syntax/make_perl_groups.pl
deleted file mode 100755
index 90040fcb4..000000000
--- a/src/pkg/regexp/syntax/make_perl_groups.pl
+++ /dev/null
@@ -1,107 +0,0 @@
-#!/usr/bin/perl
-# Copyright 2008 The Go Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style
-# license that can be found in the LICENSE file.
-
-# Modified version of RE2's make_perl_groups.pl.
-
-# Generate table entries giving character ranges
-# for POSIX/Perl character classes. Rather than
-# figure out what the definition is, it is easier to ask
-# Perl about each letter from 0-128 and write down
-# its answer.
-
-@posixclasses = (
- "[:alnum:]",
- "[:alpha:]",
- "[:ascii:]",
- "[:blank:]",
- "[:cntrl:]",
- "[:digit:]",
- "[:graph:]",
- "[:lower:]",
- "[:print:]",
- "[:punct:]",
- "[:space:]",
- "[:upper:]",
- "[:word:]",
- "[:xdigit:]",
-);
-
-@perlclasses = (
- "\\d",
- "\\s",
- "\\w",
-);
-
-sub ComputeClass($) {
- my @ranges;
- my ($class) = @_;
- my $regexp = "[$class]";
- my $start = -1;
- for (my $i=0; $i<=129; $i++) {
- if ($i == 129) { $i = 256; }
- if ($i <= 128 && chr($i) =~ $regexp) {
- if ($start < 0) {
- $start = $i;
- }
- } else {
- if ($start >= 0) {
- push @ranges, [$start, $i-1];
- }
- $start = -1;
- }
- }
- return @ranges;
-}
-
-sub PrintClass($$@) {
- my ($cname, $name, @ranges) = @_;
- print "var code$cname = []rune{ /* $name */\n";
- for (my $i=0; $i<@ranges; $i++) {
- my @a = @{$ranges[$i]};
- printf "\t0x%x, 0x%x,\n", $a[0], $a[1];
- }
- print "}\n\n";
- my $n = @ranges;
- $negname = $name;
- if ($negname =~ /:/) {
- $negname =~ s/:/:^/;
- } else {
- $negname =~ y/a-z/A-Z/;
- }
- return "\t`$name`: {+1, code$cname},\n" .
- "\t`$negname`: {-1, code$cname},\n";
-}
-
-my $gen = 0;
-
-sub PrintClasses($@) {
- my ($cname, @classes) = @_;
- my @entries;
- foreach my $cl (@classes) {
- my @ranges = ComputeClass($cl);
- push @entries, PrintClass(++$gen, $cl, @ranges);
- }
- print "var ${cname}Group = map[string]charGroup{\n";
- foreach my $e (@entries) {
- print $e;
- }
- print "}\n";
- my $count = @entries;
-}
-
-print <<EOF;
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// GENERATED BY make_perl_groups.pl; DO NOT EDIT.
-// make_perl_groups.pl >perl_groups.go
-
-package syntax
-
-EOF
-
-PrintClasses("perl", @perlclasses);
-PrintClasses("posix", @posixclasses);
diff --git a/src/pkg/regexp/syntax/parse.go b/src/pkg/regexp/syntax/parse.go
deleted file mode 100644
index cb25dca39..000000000
--- a/src/pkg/regexp/syntax/parse.go
+++ /dev/null
@@ -1,1863 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package syntax
-
-import (
- "sort"
- "strings"
- "unicode"
- "unicode/utf8"
-)
-
-// An Error describes a failure to parse a regular expression
-// and gives the offending expression.
-type Error struct {
- Code ErrorCode
- Expr string
-}
-
-func (e *Error) Error() string {
- return "error parsing regexp: " + e.Code.String() + ": `" + e.Expr + "`"
-}
-
-// An ErrorCode describes a failure to parse a regular expression.
-type ErrorCode string
-
-const (
- // Unexpected error
- ErrInternalError ErrorCode = "regexp/syntax: internal error"
-
- // Parse errors
- ErrInvalidCharClass ErrorCode = "invalid character class"
- ErrInvalidCharRange ErrorCode = "invalid character class range"
- ErrInvalidEscape ErrorCode = "invalid escape sequence"
- ErrInvalidNamedCapture ErrorCode = "invalid named capture"
- ErrInvalidPerlOp ErrorCode = "invalid or unsupported Perl syntax"
- ErrInvalidRepeatOp ErrorCode = "invalid nested repetition operator"
- ErrInvalidRepeatSize ErrorCode = "invalid repeat count"
- ErrInvalidUTF8 ErrorCode = "invalid UTF-8"
- ErrMissingBracket ErrorCode = "missing closing ]"
- ErrMissingParen ErrorCode = "missing closing )"
- ErrMissingRepeatArgument ErrorCode = "missing argument to repetition operator"
- ErrTrailingBackslash ErrorCode = "trailing backslash at end of expression"
- ErrUnexpectedParen ErrorCode = "unexpected )"
-)
-
-func (e ErrorCode) String() string {
- return string(e)
-}
-
-// Flags control the behavior of the parser and record information about regexp context.
-type Flags uint16
-
-const (
- FoldCase Flags = 1 << iota // case-insensitive match
- Literal // treat pattern as literal string
- ClassNL // allow character classes like [^a-z] and [[:space:]] to match newline
- DotNL // allow . to match newline
- OneLine // treat ^ and $ as only matching at beginning and end of text
- NonGreedy // make repetition operators default to non-greedy
- PerlX // allow Perl extensions
- UnicodeGroups // allow \p{Han}, \P{Han} for Unicode group and negation
- WasDollar // regexp OpEndText was $, not \z
- Simple // regexp contains no counted repetition
-
- MatchNL = ClassNL | DotNL
-
- Perl = ClassNL | OneLine | PerlX | UnicodeGroups // as close to Perl as possible
- POSIX Flags = 0 // POSIX syntax
-)
-
-// Pseudo-ops for parsing stack.
-const (
- opLeftParen = opPseudo + iota
- opVerticalBar
-)
-
-type parser struct {
- flags Flags // parse mode flags
- stack []*Regexp // stack of parsed expressions
- free *Regexp
- numCap int // number of capturing groups seen
- wholeRegexp string
- tmpClass []rune // temporary char class work space
-}
-
-func (p *parser) newRegexp(op Op) *Regexp {
- re := p.free
- if re != nil {
- p.free = re.Sub0[0]
- *re = Regexp{}
- } else {
- re = new(Regexp)
- }
- re.Op = op
- return re
-}
-
-func (p *parser) reuse(re *Regexp) {
- re.Sub0[0] = p.free
- p.free = re
-}
-
-// Parse stack manipulation.
-
-// push pushes the regexp re onto the parse stack and returns the regexp.
-func (p *parser) push(re *Regexp) *Regexp {
- if re.Op == OpCharClass && len(re.Rune) == 2 && re.Rune[0] == re.Rune[1] {
- // Single rune.
- if p.maybeConcat(re.Rune[0], p.flags&^FoldCase) {
- return nil
- }
- re.Op = OpLiteral
- re.Rune = re.Rune[:1]
- re.Flags = p.flags &^ FoldCase
- } else if re.Op == OpCharClass && len(re.Rune) == 4 &&
- re.Rune[0] == re.Rune[1] && re.Rune[2] == re.Rune[3] &&
- unicode.SimpleFold(re.Rune[0]) == re.Rune[2] &&
- unicode.SimpleFold(re.Rune[2]) == re.Rune[0] ||
- re.Op == OpCharClass && len(re.Rune) == 2 &&
- re.Rune[0]+1 == re.Rune[1] &&
- unicode.SimpleFold(re.Rune[0]) == re.Rune[1] &&
- unicode.SimpleFold(re.Rune[1]) == re.Rune[0] {
- // Case-insensitive rune like [Aa] or [Δδ].
- if p.maybeConcat(re.Rune[0], p.flags|FoldCase) {
- return nil
- }
-
- // Rewrite as (case-insensitive) literal.
- re.Op = OpLiteral
- re.Rune = re.Rune[:1]
- re.Flags = p.flags | FoldCase
- } else {
- // Incremental concatenation.
- p.maybeConcat(-1, 0)
- }
-
- p.stack = append(p.stack, re)
- return re
-}
-
-// maybeConcat implements incremental concatenation
-// of literal runes into string nodes. The parser calls this
-// before each push, so only the top fragment of the stack
-// might need processing. Since this is called before a push,
-// the topmost literal is no longer subject to operators like *
-// (Otherwise ab* would turn into (ab)*.)
-// If r >= 0 and there's a node left over, maybeConcat uses it
-// to push r with the given flags.
-// maybeConcat reports whether r was pushed.
-func (p *parser) maybeConcat(r rune, flags Flags) bool {
- n := len(p.stack)
- if n < 2 {
- return false
- }
-
- re1 := p.stack[n-1]
- re2 := p.stack[n-2]
- if re1.Op != OpLiteral || re2.Op != OpLiteral || re1.Flags&FoldCase != re2.Flags&FoldCase {
- return false
- }
-
- // Push re1 into re2.
- re2.Rune = append(re2.Rune, re1.Rune...)
-
- // Reuse re1 if possible.
- if r >= 0 {
- re1.Rune = re1.Rune0[:1]
- re1.Rune[0] = r
- re1.Flags = flags
- return true
- }
-
- p.stack = p.stack[:n-1]
- p.reuse(re1)
- return false // did not push r
-}
-
-// newLiteral returns a new OpLiteral Regexp with the given flags
-func (p *parser) newLiteral(r rune, flags Flags) *Regexp {
- re := p.newRegexp(OpLiteral)
- re.Flags = flags
- if flags&FoldCase != 0 {
- r = minFoldRune(r)
- }
- re.Rune0[0] = r
- re.Rune = re.Rune0[:1]
- return re
-}
-
-// minFoldRune returns the minimum rune fold-equivalent to r.
-func minFoldRune(r rune) rune {
- if r < minFold || r > maxFold {
- return r
- }
- min := r
- r0 := r
- for r = unicode.SimpleFold(r); r != r0; r = unicode.SimpleFold(r) {
- if min > r {
- min = r
- }
- }
- return min
-}
-
-// literal pushes a literal regexp for the rune r on the stack
-// and returns that regexp.
-func (p *parser) literal(r rune) {
- p.push(p.newLiteral(r, p.flags))
-}
-
-// op pushes a regexp with the given op onto the stack
-// and returns that regexp.
-func (p *parser) op(op Op) *Regexp {
- re := p.newRegexp(op)
- re.Flags = p.flags
- return p.push(re)
-}
-
-// repeat replaces the top stack element with itself repeated according to op, min, max.
-// before is the regexp suffix starting at the repetition operator.
-// after is the regexp suffix following after the repetition operator.
-// repeat returns an updated 'after' and an error, if any.
-func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (string, error) {
- flags := p.flags
- if p.flags&PerlX != 0 {
- if len(after) > 0 && after[0] == '?' {
- after = after[1:]
- flags ^= NonGreedy
- }
- if lastRepeat != "" {
- // In Perl it is not allowed to stack repetition operators:
- // a** is a syntax error, not a doubled star, and a++ means
- // something else entirely, which we don't support!
- return "", &Error{ErrInvalidRepeatOp, lastRepeat[:len(lastRepeat)-len(after)]}
- }
- }
- n := len(p.stack)
- if n == 0 {
- return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]}
- }
- sub := p.stack[n-1]
- if sub.Op >= opPseudo {
- return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]}
- }
- re := p.newRegexp(op)
- re.Min = min
- re.Max = max
- re.Flags = flags
- re.Sub = re.Sub0[:1]
- re.Sub[0] = sub
- p.stack[n-1] = re
- return after, nil
-}
-
-// concat replaces the top of the stack (above the topmost '|' or '(') with its concatenation.
-func (p *parser) concat() *Regexp {
- p.maybeConcat(-1, 0)
-
- // Scan down to find pseudo-operator | or (.
- i := len(p.stack)
- for i > 0 && p.stack[i-1].Op < opPseudo {
- i--
- }
- subs := p.stack[i:]
- p.stack = p.stack[:i]
-
- // Empty concatenation is special case.
- if len(subs) == 0 {
- return p.push(p.newRegexp(OpEmptyMatch))
- }
-
- return p.push(p.collapse(subs, OpConcat))
-}
-
-// alternate replaces the top of the stack (above the topmost '(') with its alternation.
-func (p *parser) alternate() *Regexp {
- // Scan down to find pseudo-operator (.
- // There are no | above (.
- i := len(p.stack)
- for i > 0 && p.stack[i-1].Op < opPseudo {
- i--
- }
- subs := p.stack[i:]
- p.stack = p.stack[:i]
-
- // Make sure top class is clean.
- // All the others already are (see swapVerticalBar).
- if len(subs) > 0 {
- cleanAlt(subs[len(subs)-1])
- }
-
- // Empty alternate is special case
- // (shouldn't happen but easy to handle).
- if len(subs) == 0 {
- return p.push(p.newRegexp(OpNoMatch))
- }
-
- return p.push(p.collapse(subs, OpAlternate))
-}
-
-// cleanAlt cleans re for eventual inclusion in an alternation.
-func cleanAlt(re *Regexp) {
- switch re.Op {
- case OpCharClass:
- re.Rune = cleanClass(&re.Rune)
- if len(re.Rune) == 2 && re.Rune[0] == 0 && re.Rune[1] == unicode.MaxRune {
- re.Rune = nil
- re.Op = OpAnyChar
- return
- }
- if len(re.Rune) == 4 && re.Rune[0] == 0 && re.Rune[1] == '\n'-1 && re.Rune[2] == '\n'+1 && re.Rune[3] == unicode.MaxRune {
- re.Rune = nil
- re.Op = OpAnyCharNotNL
- return
- }
- if cap(re.Rune)-len(re.Rune) > 100 {
- // re.Rune will not grow any more.
- // Make a copy or inline to reclaim storage.
- re.Rune = append(re.Rune0[:0], re.Rune...)
- }
- }
-}
-
-// collapse returns the result of applying op to sub.
-// If sub contains op nodes, they all get hoisted up
-// so that there is never a concat of a concat or an
-// alternate of an alternate.
-func (p *parser) collapse(subs []*Regexp, op Op) *Regexp {
- if len(subs) == 1 {
- return subs[0]
- }
- re := p.newRegexp(op)
- re.Sub = re.Sub0[:0]
- for _, sub := range subs {
- if sub.Op == op {
- re.Sub = append(re.Sub, sub.Sub...)
- p.reuse(sub)
- } else {
- re.Sub = append(re.Sub, sub)
- }
- }
- if op == OpAlternate {
- re.Sub = p.factor(re.Sub, re.Flags)
- if len(re.Sub) == 1 {
- old := re
- re = re.Sub[0]
- p.reuse(old)
- }
- }
- return re
-}
-
-// factor factors common prefixes from the alternation list sub.
-// It returns a replacement list that reuses the same storage and
-// frees (passes to p.reuse) any removed *Regexps.
-//
-// For example,
-// ABC|ABD|AEF|BCX|BCY
-// simplifies by literal prefix extraction to
-// A(B(C|D)|EF)|BC(X|Y)
-// which simplifies by character class introduction to
-// A(B[CD]|EF)|BC[XY]
-//
-func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp {
- if len(sub) < 2 {
- return sub
- }
-
- // Round 1: Factor out common literal prefixes.
- var str []rune
- var strflags Flags
- start := 0
- out := sub[:0]
- for i := 0; i <= len(sub); i++ {
- // Invariant: the Regexps that were in sub[0:start] have been
- // used or marked for reuse, and the slice space has been reused
- // for out (len(out) <= start).
- //
- // Invariant: sub[start:i] consists of regexps that all begin
- // with str as modified by strflags.
- var istr []rune
- var iflags Flags
- if i < len(sub) {
- istr, iflags = p.leadingString(sub[i])
- if iflags == strflags {
- same := 0
- for same < len(str) && same < len(istr) && str[same] == istr[same] {
- same++
- }
- if same > 0 {
- // Matches at least one rune in current range.
- // Keep going around.
- str = str[:same]
- continue
- }
- }
- }
-
- // Found end of a run with common leading literal string:
- // sub[start:i] all begin with str[0:len(str)], but sub[i]
- // does not even begin with str[0].
- //
- // Factor out common string and append factored expression to out.
- if i == start {
- // Nothing to do - run of length 0.
- } else if i == start+1 {
- // Just one: don't bother factoring.
- out = append(out, sub[start])
- } else {
- // Construct factored form: prefix(suffix1|suffix2|...)
- prefix := p.newRegexp(OpLiteral)
- prefix.Flags = strflags
- prefix.Rune = append(prefix.Rune[:0], str...)
-
- for j := start; j < i; j++ {
- sub[j] = p.removeLeadingString(sub[j], len(str))
- }
- suffix := p.collapse(sub[start:i], OpAlternate) // recurse
-
- re := p.newRegexp(OpConcat)
- re.Sub = append(re.Sub[:0], prefix, suffix)
- out = append(out, re)
- }
-
- // Prepare for next iteration.
- start = i
- str = istr
- strflags = iflags
- }
- sub = out
-
- // Round 2: Factor out common complex prefixes,
- // just the first piece of each concatenation,
- // whatever it is. This is good enough a lot of the time.
- start = 0
- out = sub[:0]
- var first *Regexp
- for i := 0; i <= len(sub); i++ {
- // Invariant: the Regexps that were in sub[0:start] have been
- // used or marked for reuse, and the slice space has been reused
- // for out (len(out) <= start).
- //
- // Invariant: sub[start:i] consists of regexps that all begin with ifirst.
- var ifirst *Regexp
- if i < len(sub) {
- ifirst = p.leadingRegexp(sub[i])
- if first != nil && first.Equal(ifirst) {
- continue
- }
- }
-
- // Found end of a run with common leading regexp:
- // sub[start:i] all begin with first but sub[i] does not.
- //
- // Factor out common regexp and append factored expression to out.
- if i == start {
- // Nothing to do - run of length 0.
- } else if i == start+1 {
- // Just one: don't bother factoring.
- out = append(out, sub[start])
- } else {
- // Construct factored form: prefix(suffix1|suffix2|...)
- prefix := first
- for j := start; j < i; j++ {
- reuse := j != start // prefix came from sub[start]
- sub[j] = p.removeLeadingRegexp(sub[j], reuse)
- }
- suffix := p.collapse(sub[start:i], OpAlternate) // recurse
-
- re := p.newRegexp(OpConcat)
- re.Sub = append(re.Sub[:0], prefix, suffix)
- out = append(out, re)
- }
-
- // Prepare for next iteration.
- start = i
- first = ifirst
- }
- sub = out
-
- // Round 3: Collapse runs of single literals into character classes.
- start = 0
- out = sub[:0]
- for i := 0; i <= len(sub); i++ {
- // Invariant: the Regexps that were in sub[0:start] have been
- // used or marked for reuse, and the slice space has been reused
- // for out (len(out) <= start).
- //
- // Invariant: sub[start:i] consists of regexps that are either
- // literal runes or character classes.
- if i < len(sub) && isCharClass(sub[i]) {
- continue
- }
-
- // sub[i] is not a char or char class;
- // emit char class for sub[start:i]...
- if i == start {
- // Nothing to do - run of length 0.
- } else if i == start+1 {
- out = append(out, sub[start])
- } else {
- // Make new char class.
- // Start with most complex regexp in sub[start].
- max := start
- for j := start + 1; j < i; j++ {
- if sub[max].Op < sub[j].Op || sub[max].Op == sub[j].Op && len(sub[max].Rune) < len(sub[j].Rune) {
- max = j
- }
- }
- sub[start], sub[max] = sub[max], sub[start]
-
- for j := start + 1; j < i; j++ {
- mergeCharClass(sub[start], sub[j])
- p.reuse(sub[j])
- }
- cleanAlt(sub[start])
- out = append(out, sub[start])
- }
-
- // ... and then emit sub[i].
- if i < len(sub) {
- out = append(out, sub[i])
- }
- start = i + 1
- }
- sub = out
-
- // Round 4: Collapse runs of empty matches into a single empty match.
- start = 0
- out = sub[:0]
- for i := range sub {
- if i+1 < len(sub) && sub[i].Op == OpEmptyMatch && sub[i+1].Op == OpEmptyMatch {
- continue
- }
- out = append(out, sub[i])
- }
- sub = out
-
- return sub
-}
-
-// leadingString returns the leading literal string that re begins with.
-// The string refers to storage in re or its children.
-func (p *parser) leadingString(re *Regexp) ([]rune, Flags) {
- if re.Op == OpConcat && len(re.Sub) > 0 {
- re = re.Sub[0]
- }
- if re.Op != OpLiteral {
- return nil, 0
- }
- return re.Rune, re.Flags & FoldCase
-}
-
-// removeLeadingString removes the first n leading runes
-// from the beginning of re. It returns the replacement for re.
-func (p *parser) removeLeadingString(re *Regexp, n int) *Regexp {
- if re.Op == OpConcat && len(re.Sub) > 0 {
- // Removing a leading string in a concatenation
- // might simplify the concatenation.
- sub := re.Sub[0]
- sub = p.removeLeadingString(sub, n)
- re.Sub[0] = sub
- if sub.Op == OpEmptyMatch {
- p.reuse(sub)
- switch len(re.Sub) {
- case 0, 1:
- // Impossible but handle.
- re.Op = OpEmptyMatch
- re.Sub = nil
- case 2:
- old := re
- re = re.Sub[1]
- p.reuse(old)
- default:
- copy(re.Sub, re.Sub[1:])
- re.Sub = re.Sub[:len(re.Sub)-1]
- }
- }
- return re
- }
-
- if re.Op == OpLiteral {
- re.Rune = re.Rune[:copy(re.Rune, re.Rune[n:])]
- if len(re.Rune) == 0 {
- re.Op = OpEmptyMatch
- }
- }
- return re
-}
-
-// leadingRegexp returns the leading regexp that re begins with.
-// The regexp refers to storage in re or its children.
-func (p *parser) leadingRegexp(re *Regexp) *Regexp {
- if re.Op == OpEmptyMatch {
- return nil
- }
- if re.Op == OpConcat && len(re.Sub) > 0 {
- sub := re.Sub[0]
- if sub.Op == OpEmptyMatch {
- return nil
- }
- return sub
- }
- return re
-}
-
-// removeLeadingRegexp removes the leading regexp in re.
-// It returns the replacement for re.
-// If reuse is true, it passes the removed regexp (if no longer needed) to p.reuse.
-func (p *parser) removeLeadingRegexp(re *Regexp, reuse bool) *Regexp {
- if re.Op == OpConcat && len(re.Sub) > 0 {
- if reuse {
- p.reuse(re.Sub[0])
- }
- re.Sub = re.Sub[:copy(re.Sub, re.Sub[1:])]
- switch len(re.Sub) {
- case 0:
- re.Op = OpEmptyMatch
- re.Sub = nil
- case 1:
- old := re
- re = re.Sub[0]
- p.reuse(old)
- }
- return re
- }
- if reuse {
- p.reuse(re)
- }
- return p.newRegexp(OpEmptyMatch)
-}
-
-func literalRegexp(s string, flags Flags) *Regexp {
- re := &Regexp{Op: OpLiteral}
- re.Flags = flags
- re.Rune = re.Rune0[:0] // use local storage for small strings
- for _, c := range s {
- if len(re.Rune) >= cap(re.Rune) {
- // string is too long to fit in Rune0. let Go handle it
- re.Rune = []rune(s)
- break
- }
- re.Rune = append(re.Rune, c)
- }
- return re
-}
-
-// Parsing.
-
-// Parse parses a regular expression string s, controlled by the specified
-// Flags, and returns a regular expression parse tree. The syntax is
-// described in the top-level comment.
-func Parse(s string, flags Flags) (*Regexp, error) {
- if flags&Literal != 0 {
- // Trivial parser for literal string.
- if err := checkUTF8(s); err != nil {
- return nil, err
- }
- return literalRegexp(s, flags), nil
- }
-
- // Otherwise, must do real work.
- var (
- p parser
- err error
- c rune
- op Op
- lastRepeat string
- )
- p.flags = flags
- p.wholeRegexp = s
- t := s
- for t != "" {
- repeat := ""
- BigSwitch:
- switch t[0] {
- default:
- if c, t, err = nextRune(t); err != nil {
- return nil, err
- }
- p.literal(c)
-
- case '(':
- if p.flags&PerlX != 0 && len(t) >= 2 && t[1] == '?' {
- // Flag changes and non-capturing groups.
- if t, err = p.parsePerlFlags(t); err != nil {
- return nil, err
- }
- break
- }
- p.numCap++
- p.op(opLeftParen).Cap = p.numCap
- t = t[1:]
- case '|':
- if err = p.parseVerticalBar(); err != nil {
- return nil, err
- }
- t = t[1:]
- case ')':
- if err = p.parseRightParen(); err != nil {
- return nil, err
- }
- t = t[1:]
- case '^':
- if p.flags&OneLine != 0 {
- p.op(OpBeginText)
- } else {
- p.op(OpBeginLine)
- }
- t = t[1:]
- case '$':
- if p.flags&OneLine != 0 {
- p.op(OpEndText).Flags |= WasDollar
- } else {
- p.op(OpEndLine)
- }
- t = t[1:]
- case '.':
- if p.flags&DotNL != 0 {
- p.op(OpAnyChar)
- } else {
- p.op(OpAnyCharNotNL)
- }
- t = t[1:]
- case '[':
- if t, err = p.parseClass(t); err != nil {
- return nil, err
- }
- case '*', '+', '?':
- before := t
- switch t[0] {
- case '*':
- op = OpStar
- case '+':
- op = OpPlus
- case '?':
- op = OpQuest
- }
- after := t[1:]
- if after, err = p.repeat(op, 0, 0, before, after, lastRepeat); err != nil {
- return nil, err
- }
- repeat = before
- t = after
- case '{':
- op = OpRepeat
- before := t
- min, max, after, ok := p.parseRepeat(t)
- if !ok {
- // If the repeat cannot be parsed, { is a literal.
- p.literal('{')
- t = t[1:]
- break
- }
- if min < 0 || min > 1000 || max > 1000 || max >= 0 && min > max {
- // Numbers were too big, or max is present and min > max.
- return nil, &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
- }
- if after, err = p.repeat(op, min, max, before, after, lastRepeat); err != nil {
- return nil, err
- }
- repeat = before
- t = after
- case '\\':
- if p.flags&PerlX != 0 && len(t) >= 2 {
- switch t[1] {
- case 'A':
- p.op(OpBeginText)
- t = t[2:]
- break BigSwitch
- case 'b':
- p.op(OpWordBoundary)
- t = t[2:]
- break BigSwitch
- case 'B':
- p.op(OpNoWordBoundary)
- t = t[2:]
- break BigSwitch
- case 'C':
- // any byte; not supported
- return nil, &Error{ErrInvalidEscape, t[:2]}
- case 'Q':
- // \Q ... \E: the ... is always literals
- var lit string
- if i := strings.Index(t, `\E`); i < 0 {
- lit = t[2:]
- t = ""
- } else {
- lit = t[2:i]
- t = t[i+2:]
- }
- p.push(literalRegexp(lit, p.flags))
- break BigSwitch
- case 'z':
- p.op(OpEndText)
- t = t[2:]
- break BigSwitch
- }
- }
-
- re := p.newRegexp(OpCharClass)
- re.Flags = p.flags
-
- // Look for Unicode character group like \p{Han}
- if len(t) >= 2 && (t[1] == 'p' || t[1] == 'P') {
- r, rest, err := p.parseUnicodeClass(t, re.Rune0[:0])
- if err != nil {
- return nil, err
- }
- if r != nil {
- re.Rune = r
- t = rest
- p.push(re)
- break BigSwitch
- }
- }
-
- // Perl character class escape.
- if r, rest := p.parsePerlClassEscape(t, re.Rune0[:0]); r != nil {
- re.Rune = r
- t = rest
- p.push(re)
- break BigSwitch
- }
- p.reuse(re)
-
- // Ordinary single-character escape.
- if c, t, err = p.parseEscape(t); err != nil {
- return nil, err
- }
- p.literal(c)
- }
- lastRepeat = repeat
- }
-
- p.concat()
- if p.swapVerticalBar() {
- // pop vertical bar
- p.stack = p.stack[:len(p.stack)-1]
- }
- p.alternate()
-
- n := len(p.stack)
- if n != 1 {
- return nil, &Error{ErrMissingParen, s}
- }
- return p.stack[0], nil
-}
-
-// parseRepeat parses {min} (max=min) or {min,} (max=-1) or {min,max}.
-// If s is not of that form, it returns ok == false.
-// If s has the right form but the values are too big, it returns min == -1, ok == true.
-func (p *parser) parseRepeat(s string) (min, max int, rest string, ok bool) {
- if s == "" || s[0] != '{' {
- return
- }
- s = s[1:]
- var ok1 bool
- if min, s, ok1 = p.parseInt(s); !ok1 {
- return
- }
- if s == "" {
- return
- }
- if s[0] != ',' {
- max = min
- } else {
- s = s[1:]
- if s == "" {
- return
- }
- if s[0] == '}' {
- max = -1
- } else if max, s, ok1 = p.parseInt(s); !ok1 {
- return
- } else if max < 0 {
- // parseInt found too big a number
- min = -1
- }
- }
- if s == "" || s[0] != '}' {
- return
- }
- rest = s[1:]
- ok = true
- return
-}
-
-// parsePerlFlags parses a Perl flag setting or non-capturing group or both,
-// like (?i) or (?: or (?i:. It removes the prefix from s and updates the parse state.
-// The caller must have ensured that s begins with "(?".
-func (p *parser) parsePerlFlags(s string) (rest string, err error) {
- t := s
-
- // Check for named captures, first introduced in Python's regexp library.
- // As usual, there are three slightly different syntaxes:
- //
- // (?P<name>expr) the original, introduced by Python
- // (?<name>expr) the .NET alteration, adopted by Perl 5.10
- // (?'name'expr) another .NET alteration, adopted by Perl 5.10
- //
- // Perl 5.10 gave in and implemented the Python version too,
- // but they claim that the last two are the preferred forms.
- // PCRE and languages based on it (specifically, PHP and Ruby)
- // support all three as well. EcmaScript 4 uses only the Python form.
- //
- // In both the open source world (via Code Search) and the
- // Google source tree, (?P<expr>name) is the dominant form,
- // so that's the one we implement. One is enough.
- if len(t) > 4 && t[2] == 'P' && t[3] == '<' {
- // Pull out name.
- end := strings.IndexRune(t, '>')
- if end < 0 {
- if err = checkUTF8(t); err != nil {
- return "", err
- }
- return "", &Error{ErrInvalidNamedCapture, s}
- }
-
- capture := t[:end+1] // "(?P<name>"
- name := t[4:end] // "name"
- if err = checkUTF8(name); err != nil {
- return "", err
- }
- if !isValidCaptureName(name) {
- return "", &Error{ErrInvalidNamedCapture, capture}
- }
-
- // Like ordinary capture, but named.
- p.numCap++
- re := p.op(opLeftParen)
- re.Cap = p.numCap
- re.Name = name
- return t[end+1:], nil
- }
-
- // Non-capturing group. Might also twiddle Perl flags.
- var c rune
- t = t[2:] // skip (?
- flags := p.flags
- sign := +1
- sawFlag := false
-Loop:
- for t != "" {
- if c, t, err = nextRune(t); err != nil {
- return "", err
- }
- switch c {
- default:
- break Loop
-
- // Flags.
- case 'i':
- flags |= FoldCase
- sawFlag = true
- case 'm':
- flags &^= OneLine
- sawFlag = true
- case 's':
- flags |= DotNL
- sawFlag = true
- case 'U':
- flags |= NonGreedy
- sawFlag = true
-
- // Switch to negation.
- case '-':
- if sign < 0 {
- break Loop
- }
- sign = -1
- // Invert flags so that | above turn into &^ and vice versa.
- // We'll invert flags again before using it below.
- flags = ^flags
- sawFlag = false
-
- // End of flags, starting group or not.
- case ':', ')':
- if sign < 0 {
- if !sawFlag {
- break Loop
- }
- flags = ^flags
- }
- if c == ':' {
- // Open new group
- p.op(opLeftParen)
- }
- p.flags = flags
- return t, nil
- }
- }
-
- return "", &Error{ErrInvalidPerlOp, s[:len(s)-len(t)]}
-}
-
-// isValidCaptureName reports whether name
-// is a valid capture name: [A-Za-z0-9_]+.
-// PCRE limits names to 32 bytes.
-// Python rejects names starting with digits.
-// We don't enforce either of those.
-func isValidCaptureName(name string) bool {
- if name == "" {
- return false
- }
- for _, c := range name {
- if c != '_' && !isalnum(c) {
- return false
- }
- }
- return true
-}
-
-// parseInt parses a decimal integer.
-func (p *parser) parseInt(s string) (n int, rest string, ok bool) {
- if s == "" || s[0] < '0' || '9' < s[0] {
- return
- }
- // Disallow leading zeros.
- if len(s) >= 2 && s[0] == '0' && '0' <= s[1] && s[1] <= '9' {
- return
- }
- t := s
- for s != "" && '0' <= s[0] && s[0] <= '9' {
- s = s[1:]
- }
- rest = s
- ok = true
- // Have digits, compute value.
- t = t[:len(t)-len(s)]
- for i := 0; i < len(t); i++ {
- // Avoid overflow.
- if n >= 1e8 {
- n = -1
- break
- }
- n = n*10 + int(t[i]) - '0'
- }
- return
-}
-
-// can this be represented as a character class?
-// single-rune literal string, char class, ., and .|\n.
-func isCharClass(re *Regexp) bool {
- return re.Op == OpLiteral && len(re.Rune) == 1 ||
- re.Op == OpCharClass ||
- re.Op == OpAnyCharNotNL ||
- re.Op == OpAnyChar
-}
-
-// does re match r?
-func matchRune(re *Regexp, r rune) bool {
- switch re.Op {
- case OpLiteral:
- return len(re.Rune) == 1 && re.Rune[0] == r
- case OpCharClass:
- for i := 0; i < len(re.Rune); i += 2 {
- if re.Rune[i] <= r && r <= re.Rune[i+1] {
- return true
- }
- }
- return false
- case OpAnyCharNotNL:
- return r != '\n'
- case OpAnyChar:
- return true
- }
- return false
-}
-
-// parseVerticalBar handles a | in the input.
-func (p *parser) parseVerticalBar() error {
- p.concat()
-
- // The concatenation we just parsed is on top of the stack.
- // If it sits above an opVerticalBar, swap it below
- // (things below an opVerticalBar become an alternation).
- // Otherwise, push a new vertical bar.
- if !p.swapVerticalBar() {
- p.op(opVerticalBar)
- }
-
- return nil
-}
-
-// mergeCharClass makes dst = dst|src.
-// The caller must ensure that dst.Op >= src.Op,
-// to reduce the amount of copying.
-func mergeCharClass(dst, src *Regexp) {
- switch dst.Op {
- case OpAnyChar:
- // src doesn't add anything.
- case OpAnyCharNotNL:
- // src might add \n
- if matchRune(src, '\n') {
- dst.Op = OpAnyChar
- }
- case OpCharClass:
- // src is simpler, so either literal or char class
- if src.Op == OpLiteral {
- dst.Rune = appendLiteral(dst.Rune, src.Rune[0], src.Flags)
- } else {
- dst.Rune = appendClass(dst.Rune, src.Rune)
- }
- case OpLiteral:
- // both literal
- if src.Rune[0] == dst.Rune[0] && src.Flags == dst.Flags {
- break
- }
- dst.Op = OpCharClass
- dst.Rune = appendLiteral(dst.Rune[:0], dst.Rune[0], dst.Flags)
- dst.Rune = appendLiteral(dst.Rune, src.Rune[0], src.Flags)
- }
-}
-
-// If the top of the stack is an element followed by an opVerticalBar
-// swapVerticalBar swaps the two and returns true.
-// Otherwise it returns false.
-func (p *parser) swapVerticalBar() bool {
- // If above and below vertical bar are literal or char class,
- // can merge into a single char class.
- n := len(p.stack)
- if n >= 3 && p.stack[n-2].Op == opVerticalBar && isCharClass(p.stack[n-1]) && isCharClass(p.stack[n-3]) {
- re1 := p.stack[n-1]
- re3 := p.stack[n-3]
- // Make re3 the more complex of the two.
- if re1.Op > re3.Op {
- re1, re3 = re3, re1
- p.stack[n-3] = re3
- }
- mergeCharClass(re3, re1)
- p.reuse(re1)
- p.stack = p.stack[:n-1]
- return true
- }
-
- if n >= 2 {
- re1 := p.stack[n-1]
- re2 := p.stack[n-2]
- if re2.Op == opVerticalBar {
- if n >= 3 {
- // Now out of reach.
- // Clean opportunistically.
- cleanAlt(p.stack[n-3])
- }
- p.stack[n-2] = re1
- p.stack[n-1] = re2
- return true
- }
- }
- return false
-}
-
-// parseRightParen handles a ) in the input.
-func (p *parser) parseRightParen() error {
- p.concat()
- if p.swapVerticalBar() {
- // pop vertical bar
- p.stack = p.stack[:len(p.stack)-1]
- }
- p.alternate()
-
- n := len(p.stack)
- if n < 2 {
- return &Error{ErrUnexpectedParen, p.wholeRegexp}
- }
- re1 := p.stack[n-1]
- re2 := p.stack[n-2]
- p.stack = p.stack[:n-2]
- if re2.Op != opLeftParen {
- return &Error{ErrUnexpectedParen, p.wholeRegexp}
- }
- // Restore flags at time of paren.
- p.flags = re2.Flags
- if re2.Cap == 0 {
- // Just for grouping.
- p.push(re1)
- } else {
- re2.Op = OpCapture
- re2.Sub = re2.Sub0[:1]
- re2.Sub[0] = re1
- p.push(re2)
- }
- return nil
-}
-
-// parseEscape parses an escape sequence at the beginning of s
-// and returns the rune.
-func (p *parser) parseEscape(s string) (r rune, rest string, err error) {
- t := s[1:]
- if t == "" {
- return 0, "", &Error{ErrTrailingBackslash, ""}
- }
- c, t, err := nextRune(t)
- if err != nil {
- return 0, "", err
- }
-
-Switch:
- switch c {
- default:
- if c < utf8.RuneSelf && !isalnum(c) {
- // Escaped non-word characters are always themselves.
- // PCRE is not quite so rigorous: it accepts things like
- // \q, but we don't. We once rejected \_, but too many
- // programs and people insist on using it, so allow \_.
- return c, t, nil
- }
-
- // Octal escapes.
- case '1', '2', '3', '4', '5', '6', '7':
- // Single non-zero digit is a backreference; not supported
- if t == "" || t[0] < '0' || t[0] > '7' {
- break
- }
- fallthrough
- case '0':
- // Consume up to three octal digits; already have one.
- r = c - '0'
- for i := 1; i < 3; i++ {
- if t == "" || t[0] < '0' || t[0] > '7' {
- break
- }
- r = r*8 + rune(t[0]) - '0'
- t = t[1:]
- }
- return r, t, nil
-
- // Hexadecimal escapes.
- case 'x':
- if t == "" {
- break
- }
- if c, t, err = nextRune(t); err != nil {
- return 0, "", err
- }
- if c == '{' {
- // Any number of digits in braces.
- // Perl accepts any text at all; it ignores all text
- // after the first non-hex digit. We require only hex digits,
- // and at least one.
- nhex := 0
- r = 0
- for {
- if t == "" {
- break Switch
- }
- if c, t, err = nextRune(t); err != nil {
- return 0, "", err
- }
- if c == '}' {
- break
- }
- v := unhex(c)
- if v < 0 {
- break Switch
- }
- r = r*16 + v
- if r > unicode.MaxRune {
- break Switch
- }
- nhex++
- }
- if nhex == 0 {
- break Switch
- }
- return r, t, nil
- }
-
- // Easy case: two hex digits.
- x := unhex(c)
- if c, t, err = nextRune(t); err != nil {
- return 0, "", err
- }
- y := unhex(c)
- if x < 0 || y < 0 {
- break
- }
- return x*16 + y, t, nil
-
- // C escapes. There is no case 'b', to avoid misparsing
- // the Perl word-boundary \b as the C backspace \b
- // when in POSIX mode. In Perl, /\b/ means word-boundary
- // but /[\b]/ means backspace. We don't support that.
- // If you want a backspace, embed a literal backspace
- // character or use \x08.
- case 'a':
- return '\a', t, err
- case 'f':
- return '\f', t, err
- case 'n':
- return '\n', t, err
- case 'r':
- return '\r', t, err
- case 't':
- return '\t', t, err
- case 'v':
- return '\v', t, err
- }
- return 0, "", &Error{ErrInvalidEscape, s[:len(s)-len(t)]}
-}
-
-// parseClassChar parses a character class character at the beginning of s
-// and returns it.
-func (p *parser) parseClassChar(s, wholeClass string) (r rune, rest string, err error) {
- if s == "" {
- return 0, "", &Error{Code: ErrMissingBracket, Expr: wholeClass}
- }
-
- // Allow regular escape sequences even though
- // many need not be escaped in this context.
- if s[0] == '\\' {
- return p.parseEscape(s)
- }
-
- return nextRune(s)
-}
-
-type charGroup struct {
- sign int
- class []rune
-}
-
-// parsePerlClassEscape parses a leading Perl character class escape like \d
-// from the beginning of s. If one is present, it appends the characters to r
-// and returns the new slice r and the remainder of the string.
-func (p *parser) parsePerlClassEscape(s string, r []rune) (out []rune, rest string) {
- if p.flags&PerlX == 0 || len(s) < 2 || s[0] != '\\' {
- return
- }
- g := perlGroup[s[0:2]]
- if g.sign == 0 {
- return
- }
- return p.appendGroup(r, g), s[2:]
-}
-
-// parseNamedClass parses a leading POSIX named character class like [:alnum:]
-// from the beginning of s. If one is present, it appends the characters to r
-// and returns the new slice r and the remainder of the string.
-func (p *parser) parseNamedClass(s string, r []rune) (out []rune, rest string, err error) {
- if len(s) < 2 || s[0] != '[' || s[1] != ':' {
- return
- }
-
- i := strings.Index(s[2:], ":]")
- if i < 0 {
- return
- }
- i += 2
- name, s := s[0:i+2], s[i+2:]
- g := posixGroup[name]
- if g.sign == 0 {
- return nil, "", &Error{ErrInvalidCharRange, name}
- }
- return p.appendGroup(r, g), s, nil
-}
-
-func (p *parser) appendGroup(r []rune, g charGroup) []rune {
- if p.flags&FoldCase == 0 {
- if g.sign < 0 {
- r = appendNegatedClass(r, g.class)
- } else {
- r = appendClass(r, g.class)
- }
- } else {
- tmp := p.tmpClass[:0]
- tmp = appendFoldedClass(tmp, g.class)
- p.tmpClass = tmp
- tmp = cleanClass(&p.tmpClass)
- if g.sign < 0 {
- r = appendNegatedClass(r, tmp)
- } else {
- r = appendClass(r, tmp)
- }
- }
- return r
-}
-
-var anyTable = &unicode.RangeTable{
- R16: []unicode.Range16{{Lo: 0, Hi: 1<<16 - 1, Stride: 1}},
- R32: []unicode.Range32{{Lo: 1 << 16, Hi: unicode.MaxRune, Stride: 1}},
-}
-
-// unicodeTable returns the unicode.RangeTable identified by name
-// and the table of additional fold-equivalent code points.
-func unicodeTable(name string) (*unicode.RangeTable, *unicode.RangeTable) {
- // Special case: "Any" means any.
- if name == "Any" {
- return anyTable, anyTable
- }
- if t := unicode.Categories[name]; t != nil {
- return t, unicode.FoldCategory[name]
- }
- if t := unicode.Scripts[name]; t != nil {
- return t, unicode.FoldScript[name]
- }
- return nil, nil
-}
-
-// parseUnicodeClass parses a leading Unicode character class like \p{Han}
-// from the beginning of s. If one is present, it appends the characters to r
-// and returns the new slice r and the remainder of the string.
-func (p *parser) parseUnicodeClass(s string, r []rune) (out []rune, rest string, err error) {
- if p.flags&UnicodeGroups == 0 || len(s) < 2 || s[0] != '\\' || s[1] != 'p' && s[1] != 'P' {
- return
- }
-
- // Committed to parse or return error.
- sign := +1
- if s[1] == 'P' {
- sign = -1
- }
- t := s[2:]
- c, t, err := nextRune(t)
- if err != nil {
- return
- }
- var seq, name string
- if c != '{' {
- // Single-letter name.
- seq = s[:len(s)-len(t)]
- name = seq[2:]
- } else {
- // Name is in braces.
- end := strings.IndexRune(s, '}')
- if end < 0 {
- if err = checkUTF8(s); err != nil {
- return
- }
- return nil, "", &Error{ErrInvalidCharRange, s}
- }
- seq, t = s[:end+1], s[end+1:]
- name = s[3:end]
- if err = checkUTF8(name); err != nil {
- return
- }
- }
-
- // Group can have leading negation too. \p{^Han} == \P{Han}, \P{^Han} == \p{Han}.
- if name != "" && name[0] == '^' {
- sign = -sign
- name = name[1:]
- }
-
- tab, fold := unicodeTable(name)
- if tab == nil {
- return nil, "", &Error{ErrInvalidCharRange, seq}
- }
-
- if p.flags&FoldCase == 0 || fold == nil {
- if sign > 0 {
- r = appendTable(r, tab)
- } else {
- r = appendNegatedTable(r, tab)
- }
- } else {
- // Merge and clean tab and fold in a temporary buffer.
- // This is necessary for the negative case and just tidy
- // for the positive case.
- tmp := p.tmpClass[:0]
- tmp = appendTable(tmp, tab)
- tmp = appendTable(tmp, fold)
- p.tmpClass = tmp
- tmp = cleanClass(&p.tmpClass)
- if sign > 0 {
- r = appendClass(r, tmp)
- } else {
- r = appendNegatedClass(r, tmp)
- }
- }
- return r, t, nil
-}
-
-// parseClass parses a character class at the beginning of s
-// and pushes it onto the parse stack.
-func (p *parser) parseClass(s string) (rest string, err error) {
- t := s[1:] // chop [
- re := p.newRegexp(OpCharClass)
- re.Flags = p.flags
- re.Rune = re.Rune0[:0]
-
- sign := +1
- if t != "" && t[0] == '^' {
- sign = -1
- t = t[1:]
-
- // If character class does not match \n, add it here,
- // so that negation later will do the right thing.
- if p.flags&ClassNL == 0 {
- re.Rune = append(re.Rune, '\n', '\n')
- }
- }
-
- class := re.Rune
- first := true // ] and - are okay as first char in class
- for t == "" || t[0] != ']' || first {
- // POSIX: - is only okay unescaped as first or last in class.
- // Perl: - is okay anywhere.
- if t != "" && t[0] == '-' && p.flags&PerlX == 0 && !first && (len(t) == 1 || t[1] != ']') {
- _, size := utf8.DecodeRuneInString(t[1:])
- return "", &Error{Code: ErrInvalidCharRange, Expr: t[:1+size]}
- }
- first = false
-
- // Look for POSIX [:alnum:] etc.
- if len(t) > 2 && t[0] == '[' && t[1] == ':' {
- nclass, nt, err := p.parseNamedClass(t, class)
- if err != nil {
- return "", err
- }
- if nclass != nil {
- class, t = nclass, nt
- continue
- }
- }
-
- // Look for Unicode character group like \p{Han}.
- nclass, nt, err := p.parseUnicodeClass(t, class)
- if err != nil {
- return "", err
- }
- if nclass != nil {
- class, t = nclass, nt
- continue
- }
-
- // Look for Perl character class symbols (extension).
- if nclass, nt := p.parsePerlClassEscape(t, class); nclass != nil {
- class, t = nclass, nt
- continue
- }
-
- // Single character or simple range.
- rng := t
- var lo, hi rune
- if lo, t, err = p.parseClassChar(t, s); err != nil {
- return "", err
- }
- hi = lo
- // [a-] means (a|-) so check for final ].
- if len(t) >= 2 && t[0] == '-' && t[1] != ']' {
- t = t[1:]
- if hi, t, err = p.parseClassChar(t, s); err != nil {
- return "", err
- }
- if hi < lo {
- rng = rng[:len(rng)-len(t)]
- return "", &Error{Code: ErrInvalidCharRange, Expr: rng}
- }
- }
- if p.flags&FoldCase == 0 {
- class = appendRange(class, lo, hi)
- } else {
- class = appendFoldedRange(class, lo, hi)
- }
- }
- t = t[1:] // chop ]
-
- // Use &re.Rune instead of &class to avoid allocation.
- re.Rune = class
- class = cleanClass(&re.Rune)
- if sign < 0 {
- class = negateClass(class)
- }
- re.Rune = class
- p.push(re)
- return t, nil
-}
-
-// cleanClass sorts the ranges (pairs of elements of r),
-// merges them, and eliminates duplicates.
-func cleanClass(rp *[]rune) []rune {
-
- // Sort by lo increasing, hi decreasing to break ties.
- sort.Sort(ranges{rp})
-
- r := *rp
- if len(r) < 2 {
- return r
- }
-
- // Merge abutting, overlapping.
- w := 2 // write index
- for i := 2; i < len(r); i += 2 {
- lo, hi := r[i], r[i+1]
- if lo <= r[w-1]+1 {
- // merge with previous range
- if hi > r[w-1] {
- r[w-1] = hi
- }
- continue
- }
- // new disjoint range
- r[w] = lo
- r[w+1] = hi
- w += 2
- }
-
- return r[:w]
-}
-
-// appendLiteral returns the result of appending the literal x to the class r.
-func appendLiteral(r []rune, x rune, flags Flags) []rune {
- if flags&FoldCase != 0 {
- return appendFoldedRange(r, x, x)
- }
- return appendRange(r, x, x)
-}
-
-// appendRange returns the result of appending the range lo-hi to the class r.
-func appendRange(r []rune, lo, hi rune) []rune {
- // Expand last range or next to last range if it overlaps or abuts.
- // Checking two ranges helps when appending case-folded
- // alphabets, so that one range can be expanding A-Z and the
- // other expanding a-z.
- n := len(r)
- for i := 2; i <= 4; i += 2 { // twice, using i=2, i=4
- if n >= i {
- rlo, rhi := r[n-i], r[n-i+1]
- if lo <= rhi+1 && rlo <= hi+1 {
- if lo < rlo {
- r[n-i] = lo
- }
- if hi > rhi {
- r[n-i+1] = hi
- }
- return r
- }
- }
- }
-
- return append(r, lo, hi)
-}
-
-const (
- // minimum and maximum runes involved in folding.
- // checked during test.
- minFold = 0x0041
- maxFold = 0x1044f
-)
-
-// appendFoldedRange returns the result of appending the range lo-hi
-// and its case folding-equivalent runes to the class r.
-func appendFoldedRange(r []rune, lo, hi rune) []rune {
- // Optimizations.
- if lo <= minFold && hi >= maxFold {
- // Range is full: folding can't add more.
- return appendRange(r, lo, hi)
- }
- if hi < minFold || lo > maxFold {
- // Range is outside folding possibilities.
- return appendRange(r, lo, hi)
- }
- if lo < minFold {
- // [lo, minFold-1] needs no folding.
- r = appendRange(r, lo, minFold-1)
- lo = minFold
- }
- if hi > maxFold {
- // [maxFold+1, hi] needs no folding.
- r = appendRange(r, maxFold+1, hi)
- hi = maxFold
- }
-
- // Brute force. Depend on appendRange to coalesce ranges on the fly.
- for c := lo; c <= hi; c++ {
- r = appendRange(r, c, c)
- f := unicode.SimpleFold(c)
- for f != c {
- r = appendRange(r, f, f)
- f = unicode.SimpleFold(f)
- }
- }
- return r
-}
-
-// appendClass returns the result of appending the class x to the class r.
-// It assume x is clean.
-func appendClass(r []rune, x []rune) []rune {
- for i := 0; i < len(x); i += 2 {
- r = appendRange(r, x[i], x[i+1])
- }
- return r
-}
-
-// appendFolded returns the result of appending the case folding of the class x to the class r.
-func appendFoldedClass(r []rune, x []rune) []rune {
- for i := 0; i < len(x); i += 2 {
- r = appendFoldedRange(r, x[i], x[i+1])
- }
- return r
-}
-
-// appendNegatedClass returns the result of appending the negation of the class x to the class r.
-// It assumes x is clean.
-func appendNegatedClass(r []rune, x []rune) []rune {
- nextLo := '\u0000'
- for i := 0; i < len(x); i += 2 {
- lo, hi := x[i], x[i+1]
- if nextLo <= lo-1 {
- r = appendRange(r, nextLo, lo-1)
- }
- nextLo = hi + 1
- }
- if nextLo <= unicode.MaxRune {
- r = appendRange(r, nextLo, unicode.MaxRune)
- }
- return r
-}
-
-// appendTable returns the result of appending x to the class r.
-func appendTable(r []rune, x *unicode.RangeTable) []rune {
- for _, xr := range x.R16 {
- lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
- if stride == 1 {
- r = appendRange(r, lo, hi)
- continue
- }
- for c := lo; c <= hi; c += stride {
- r = appendRange(r, c, c)
- }
- }
- for _, xr := range x.R32 {
- lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
- if stride == 1 {
- r = appendRange(r, lo, hi)
- continue
- }
- for c := lo; c <= hi; c += stride {
- r = appendRange(r, c, c)
- }
- }
- return r
-}
-
-// appendNegatedTable returns the result of appending the negation of x to the class r.
-func appendNegatedTable(r []rune, x *unicode.RangeTable) []rune {
- nextLo := '\u0000' // lo end of next class to add
- for _, xr := range x.R16 {
- lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
- if stride == 1 {
- if nextLo <= lo-1 {
- r = appendRange(r, nextLo, lo-1)
- }
- nextLo = hi + 1
- continue
- }
- for c := lo; c <= hi; c += stride {
- if nextLo <= c-1 {
- r = appendRange(r, nextLo, c-1)
- }
- nextLo = c + 1
- }
- }
- for _, xr := range x.R32 {
- lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
- if stride == 1 {
- if nextLo <= lo-1 {
- r = appendRange(r, nextLo, lo-1)
- }
- nextLo = hi + 1
- continue
- }
- for c := lo; c <= hi; c += stride {
- if nextLo <= c-1 {
- r = appendRange(r, nextLo, c-1)
- }
- nextLo = c + 1
- }
- }
- if nextLo <= unicode.MaxRune {
- r = appendRange(r, nextLo, unicode.MaxRune)
- }
- return r
-}
-
-// negateClass overwrites r and returns r's negation.
-// It assumes the class r is already clean.
-func negateClass(r []rune) []rune {
- nextLo := '\u0000' // lo end of next class to add
- w := 0 // write index
- for i := 0; i < len(r); i += 2 {
- lo, hi := r[i], r[i+1]
- if nextLo <= lo-1 {
- r[w] = nextLo
- r[w+1] = lo - 1
- w += 2
- }
- nextLo = hi + 1
- }
- r = r[:w]
- if nextLo <= unicode.MaxRune {
- // It's possible for the negation to have one more
- // range - this one - than the original class, so use append.
- r = append(r, nextLo, unicode.MaxRune)
- }
- return r
-}
-
-// ranges implements sort.Interface on a []rune.
-// The choice of receiver type definition is strange
-// but avoids an allocation since we already have
-// a *[]rune.
-type ranges struct {
- p *[]rune
-}
-
-func (ra ranges) Less(i, j int) bool {
- p := *ra.p
- i *= 2
- j *= 2
- return p[i] < p[j] || p[i] == p[j] && p[i+1] > p[j+1]
-}
-
-func (ra ranges) Len() int {
- return len(*ra.p) / 2
-}
-
-func (ra ranges) Swap(i, j int) {
- p := *ra.p
- i *= 2
- j *= 2
- p[i], p[i+1], p[j], p[j+1] = p[j], p[j+1], p[i], p[i+1]
-}
-
-func checkUTF8(s string) error {
- for s != "" {
- rune, size := utf8.DecodeRuneInString(s)
- if rune == utf8.RuneError && size == 1 {
- return &Error{Code: ErrInvalidUTF8, Expr: s}
- }
- s = s[size:]
- }
- return nil
-}
-
-func nextRune(s string) (c rune, t string, err error) {
- c, size := utf8.DecodeRuneInString(s)
- if c == utf8.RuneError && size == 1 {
- return 0, "", &Error{Code: ErrInvalidUTF8, Expr: s}
- }
- return c, s[size:], nil
-}
-
-func isalnum(c rune) bool {
- return '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
-}
-
-func unhex(c rune) rune {
- if '0' <= c && c <= '9' {
- return c - '0'
- }
- if 'a' <= c && c <= 'f' {
- return c - 'a' + 10
- }
- if 'A' <= c && c <= 'F' {
- return c - 'A' + 10
- }
- return -1
-}
diff --git a/src/pkg/regexp/syntax/parse_test.go b/src/pkg/regexp/syntax/parse_test.go
deleted file mode 100644
index f3089294c..000000000
--- a/src/pkg/regexp/syntax/parse_test.go
+++ /dev/null
@@ -1,559 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package syntax
-
-import (
- "bytes"
- "fmt"
- "testing"
- "unicode"
-)
-
-type parseTest struct {
- Regexp string
- Dump string
-}
-
-var parseTests = []parseTest{
- // Base cases
- {`a`, `lit{a}`},
- {`a.`, `cat{lit{a}dot{}}`},
- {`a.b`, `cat{lit{a}dot{}lit{b}}`},
- {`ab`, `str{ab}`},
- {`a.b.c`, `cat{lit{a}dot{}lit{b}dot{}lit{c}}`},
- {`abc`, `str{abc}`},
- {`a|^`, `alt{lit{a}bol{}}`},
- {`a|b`, `cc{0x61-0x62}`},
- {`(a)`, `cap{lit{a}}`},
- {`(a)|b`, `alt{cap{lit{a}}lit{b}}`},
- {`a*`, `star{lit{a}}`},
- {`a+`, `plus{lit{a}}`},
- {`a?`, `que{lit{a}}`},
- {`a{2}`, `rep{2,2 lit{a}}`},
- {`a{2,3}`, `rep{2,3 lit{a}}`},
- {`a{2,}`, `rep{2,-1 lit{a}}`},
- {`a*?`, `nstar{lit{a}}`},
- {`a+?`, `nplus{lit{a}}`},
- {`a??`, `nque{lit{a}}`},
- {`a{2}?`, `nrep{2,2 lit{a}}`},
- {`a{2,3}?`, `nrep{2,3 lit{a}}`},
- {`a{2,}?`, `nrep{2,-1 lit{a}}`},
- // Malformed { } are treated as literals.
- {`x{1001`, `str{x{1001}`},
- {`x{9876543210`, `str{x{9876543210}`},
- {`x{9876543210,`, `str{x{9876543210,}`},
- {`x{2,1`, `str{x{2,1}`},
- {`x{1,9876543210`, `str{x{1,9876543210}`},
- {``, `emp{}`},
- {`|`, `emp{}`}, // alt{emp{}emp{}} but got factored
- {`|x|`, `alt{emp{}lit{x}emp{}}`},
- {`.`, `dot{}`},
- {`^`, `bol{}`},
- {`$`, `eol{}`},
- {`\|`, `lit{|}`},
- {`\(`, `lit{(}`},
- {`\)`, `lit{)}`},
- {`\*`, `lit{*}`},
- {`\+`, `lit{+}`},
- {`\?`, `lit{?}`},
- {`{`, `lit{{}`},
- {`}`, `lit{}}`},
- {`\.`, `lit{.}`},
- {`\^`, `lit{^}`},
- {`\$`, `lit{$}`},
- {`\\`, `lit{\}`},
- {`[ace]`, `cc{0x61 0x63 0x65}`},
- {`[abc]`, `cc{0x61-0x63}`},
- {`[a-z]`, `cc{0x61-0x7a}`},
- {`[a]`, `lit{a}`},
- {`\-`, `lit{-}`},
- {`-`, `lit{-}`},
- {`\_`, `lit{_}`},
- {`abc`, `str{abc}`},
- {`abc|def`, `alt{str{abc}str{def}}`},
- {`abc|def|ghi`, `alt{str{abc}str{def}str{ghi}}`},
-
- // Posix and Perl extensions
- {`[[:lower:]]`, `cc{0x61-0x7a}`},
- {`[a-z]`, `cc{0x61-0x7a}`},
- {`[^[:lower:]]`, `cc{0x0-0x60 0x7b-0x10ffff}`},
- {`[[:^lower:]]`, `cc{0x0-0x60 0x7b-0x10ffff}`},
- {`(?i)[[:lower:]]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`},
- {`(?i)[a-z]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`},
- {`(?i)[^[:lower:]]`, `cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}`},
- {`(?i)[[:^lower:]]`, `cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}`},
- {`\d`, `cc{0x30-0x39}`},
- {`\D`, `cc{0x0-0x2f 0x3a-0x10ffff}`},
- {`\s`, `cc{0x9-0xa 0xc-0xd 0x20}`},
- {`\S`, `cc{0x0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}`},
- {`\w`, `cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a}`},
- {`\W`, `cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x10ffff}`},
- {`(?i)\w`, `cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a 0x17f 0x212a}`},
- {`(?i)\W`, `cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}`},
- {`[^\\]`, `cc{0x0-0x5b 0x5d-0x10ffff}`},
- // { `\C`, `byte{}` }, // probably never
-
- // Unicode, negatives, and a double negative.
- {`\p{Braille}`, `cc{0x2800-0x28ff}`},
- {`\P{Braille}`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
- {`\p{^Braille}`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
- {`\P{^Braille}`, `cc{0x2800-0x28ff}`},
- {`\pZ`, `cc{0x20 0xa0 0x1680 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}`},
- {`[\p{Braille}]`, `cc{0x2800-0x28ff}`},
- {`[\P{Braille}]`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
- {`[\p{^Braille}]`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
- {`[\P{^Braille}]`, `cc{0x2800-0x28ff}`},
- {`[\pZ]`, `cc{0x20 0xa0 0x1680 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}`},
- {`\p{Lu}`, mkCharClass(unicode.IsUpper)},
- {`[\p{Lu}]`, mkCharClass(unicode.IsUpper)},
- {`(?i)[\p{Lu}]`, mkCharClass(isUpperFold)},
- {`\p{Any}`, `dot{}`},
- {`\p{^Any}`, `cc{}`},
-
- // Hex, octal.
- {`[\012-\234]\141`, `cat{cc{0xa-0x9c}lit{a}}`},
- {`[\x{41}-\x7a]\x61`, `cat{cc{0x41-0x7a}lit{a}}`},
-
- // More interesting regular expressions.
- {`a{,2}`, `str{a{,2}}`},
- {`\.\^\$\\`, `str{.^$\}`},
- {`[a-zABC]`, `cc{0x41-0x43 0x61-0x7a}`},
- {`[^a]`, `cc{0x0-0x60 0x62-0x10ffff}`},
- {`[α-ε☺]`, `cc{0x3b1-0x3b5 0x263a}`}, // utf-8
- {`a*{`, `cat{star{lit{a}}lit{{}}`},
-
- // Test precedences
- {`(?:ab)*`, `star{str{ab}}`},
- {`(ab)*`, `star{cap{str{ab}}}`},
- {`ab|cd`, `alt{str{ab}str{cd}}`},
- {`a(b|c)d`, `cat{lit{a}cap{cc{0x62-0x63}}lit{d}}`},
-
- // Test flattening.
- {`(?:a)`, `lit{a}`},
- {`(?:ab)(?:cd)`, `str{abcd}`},
- {`(?:a+b+)(?:c+d+)`, `cat{plus{lit{a}}plus{lit{b}}plus{lit{c}}plus{lit{d}}}`},
- {`(?:a+|b+)|(?:c+|d+)`, `alt{plus{lit{a}}plus{lit{b}}plus{lit{c}}plus{lit{d}}}`},
- {`(?:a|b)|(?:c|d)`, `cc{0x61-0x64}`},
- {`a|.`, `dot{}`},
- {`.|a`, `dot{}`},
- {`(?:[abc]|A|Z|hello|world)`, `alt{cc{0x41 0x5a 0x61-0x63}str{hello}str{world}}`},
- {`(?:[abc]|A|Z)`, `cc{0x41 0x5a 0x61-0x63}`},
-
- // Test Perl quoted literals
- {`\Q+|*?{[\E`, `str{+|*?{[}`},
- {`\Q+\E+`, `plus{lit{+}}`},
- {`\Q\\E`, `lit{\}`},
- {`\Q\\\E`, `str{\\}`},
-
- // Test Perl \A and \z
- {`(?m)^`, `bol{}`},
- {`(?m)$`, `eol{}`},
- {`(?-m)^`, `bot{}`},
- {`(?-m)$`, `eot{}`},
- {`(?m)\A`, `bot{}`},
- {`(?m)\z`, `eot{\z}`},
- {`(?-m)\A`, `bot{}`},
- {`(?-m)\z`, `eot{\z}`},
-
- // Test named captures
- {`(?P<name>a)`, `cap{name:lit{a}}`},
-
- // Case-folded literals
- {`[Aa]`, `litfold{A}`},
- {`[\x{100}\x{101}]`, `litfold{Ä€}`},
- {`[Δδ]`, `litfold{Δ}`},
-
- // Strings
- {`abcde`, `str{abcde}`},
- {`[Aa][Bb]cd`, `cat{strfold{AB}str{cd}}`},
-
- // Factoring.
- {`abc|abd|aef|bcx|bcy`, `alt{cat{lit{a}alt{cat{lit{b}cc{0x63-0x64}}str{ef}}}cat{str{bc}cc{0x78-0x79}}}`},
- {`ax+y|ax+z|ay+w`, `cat{lit{a}alt{cat{plus{lit{x}}cc{0x79-0x7a}}cat{plus{lit{y}}lit{w}}}}`},
-
- // Bug fixes.
- {`(?:.)`, `dot{}`},
- {`(?:x|(?:xa))`, `cat{lit{x}alt{emp{}lit{a}}}`},
- {`(?:.|(?:.a))`, `cat{dot{}alt{emp{}lit{a}}}`},
- {`(?:A(?:A|a))`, `cat{lit{A}litfold{A}}`},
- {`(?:A|a)`, `litfold{A}`},
- {`A|(?:A|a)`, `litfold{A}`},
- {`(?s).`, `dot{}`},
- {`(?-s).`, `dnl{}`},
- {`(?:(?:^).)`, `cat{bol{}dot{}}`},
- {`(?-s)(?:(?:^).)`, `cat{bol{}dnl{}}`},
-
- // RE2 prefix_tests
- {`abc|abd`, `cat{str{ab}cc{0x63-0x64}}`},
- {`a(?:b)c|abd`, `cat{str{ab}cc{0x63-0x64}}`},
- {`abc|abd|aef|bcx|bcy`,
- `alt{cat{lit{a}alt{cat{lit{b}cc{0x63-0x64}}str{ef}}}` +
- `cat{str{bc}cc{0x78-0x79}}}`},
- {`abc|x|abd`, `alt{str{abc}lit{x}str{abd}}`},
- {`(?i)abc|ABD`, `cat{strfold{AB}cc{0x43-0x44 0x63-0x64}}`},
- {`[ab]c|[ab]d`, `cat{cc{0x61-0x62}cc{0x63-0x64}}`},
- {`(?:xx|yy)c|(?:xx|yy)d`,
- `cat{alt{str{xx}str{yy}}cc{0x63-0x64}}`},
- {`x{2}|x{2}[0-9]`,
- `cat{rep{2,2 lit{x}}alt{emp{}cc{0x30-0x39}}}`},
- {`x{2}y|x{2}[0-9]y`,
- `cat{rep{2,2 lit{x}}alt{lit{y}cat{cc{0x30-0x39}lit{y}}}}`},
-}
-
-const testFlags = MatchNL | PerlX | UnicodeGroups
-
-func TestParseSimple(t *testing.T) {
- testParseDump(t, parseTests, testFlags)
-}
-
-var foldcaseTests = []parseTest{
- {`AbCdE`, `strfold{ABCDE}`},
- {`[Aa]`, `litfold{A}`},
- {`a`, `litfold{A}`},
-
- // 0x17F is an old English long s (looks like an f) and folds to s.
- // 0x212A is the Kelvin symbol and folds to k.
- {`A[F-g]`, `cat{litfold{A}cc{0x41-0x7a 0x17f 0x212a}}`}, // [Aa][A-z...]
- {`[[:upper:]]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`},
- {`[[:lower:]]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`},
-}
-
-func TestParseFoldCase(t *testing.T) {
- testParseDump(t, foldcaseTests, FoldCase)
-}
-
-var literalTests = []parseTest{
- {"(|)^$.[*+?]{5,10},\\", "str{(|)^$.[*+?]{5,10},\\}"},
-}
-
-func TestParseLiteral(t *testing.T) {
- testParseDump(t, literalTests, Literal)
-}
-
-var matchnlTests = []parseTest{
- {`.`, `dot{}`},
- {"\n", "lit{\n}"},
- {`[^a]`, `cc{0x0-0x60 0x62-0x10ffff}`},
- {`[a\n]`, `cc{0xa 0x61}`},
-}
-
-func TestParseMatchNL(t *testing.T) {
- testParseDump(t, matchnlTests, MatchNL)
-}
-
-var nomatchnlTests = []parseTest{
- {`.`, `dnl{}`},
- {"\n", "lit{\n}"},
- {`[^a]`, `cc{0x0-0x9 0xb-0x60 0x62-0x10ffff}`},
- {`[a\n]`, `cc{0xa 0x61}`},
-}
-
-func TestParseNoMatchNL(t *testing.T) {
- testParseDump(t, nomatchnlTests, 0)
-}
-
-// Test Parse -> Dump.
-func testParseDump(t *testing.T, tests []parseTest, flags Flags) {
- for _, tt := range tests {
- re, err := Parse(tt.Regexp, flags)
- if err != nil {
- t.Errorf("Parse(%#q): %v", tt.Regexp, err)
- continue
- }
- d := dump(re)
- if d != tt.Dump {
- t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump)
- }
- }
-}
-
-// dump prints a string representation of the regexp showing
-// the structure explicitly.
-func dump(re *Regexp) string {
- var b bytes.Buffer
- dumpRegexp(&b, re)
- return b.String()
-}
-
-var opNames = []string{
- OpNoMatch: "no",
- OpEmptyMatch: "emp",
- OpLiteral: "lit",
- OpCharClass: "cc",
- OpAnyCharNotNL: "dnl",
- OpAnyChar: "dot",
- OpBeginLine: "bol",
- OpEndLine: "eol",
- OpBeginText: "bot",
- OpEndText: "eot",
- OpWordBoundary: "wb",
- OpNoWordBoundary: "nwb",
- OpCapture: "cap",
- OpStar: "star",
- OpPlus: "plus",
- OpQuest: "que",
- OpRepeat: "rep",
- OpConcat: "cat",
- OpAlternate: "alt",
-}
-
-// dumpRegexp writes an encoding of the syntax tree for the regexp re to b.
-// It is used during testing to distinguish between parses that might print
-// the same using re's String method.
-func dumpRegexp(b *bytes.Buffer, re *Regexp) {
- if int(re.Op) >= len(opNames) || opNames[re.Op] == "" {
- fmt.Fprintf(b, "op%d", re.Op)
- } else {
- switch re.Op {
- default:
- b.WriteString(opNames[re.Op])
- case OpStar, OpPlus, OpQuest, OpRepeat:
- if re.Flags&NonGreedy != 0 {
- b.WriteByte('n')
- }
- b.WriteString(opNames[re.Op])
- case OpLiteral:
- if len(re.Rune) > 1 {
- b.WriteString("str")
- } else {
- b.WriteString("lit")
- }
- if re.Flags&FoldCase != 0 {
- for _, r := range re.Rune {
- if unicode.SimpleFold(r) != r {
- b.WriteString("fold")
- break
- }
- }
- }
- }
- }
- b.WriteByte('{')
- switch re.Op {
- case OpEndText:
- if re.Flags&WasDollar == 0 {
- b.WriteString(`\z`)
- }
- case OpLiteral:
- for _, r := range re.Rune {
- b.WriteRune(r)
- }
- case OpConcat, OpAlternate:
- for _, sub := range re.Sub {
- dumpRegexp(b, sub)
- }
- case OpStar, OpPlus, OpQuest:
- dumpRegexp(b, re.Sub[0])
- case OpRepeat:
- fmt.Fprintf(b, "%d,%d ", re.Min, re.Max)
- dumpRegexp(b, re.Sub[0])
- case OpCapture:
- if re.Name != "" {
- b.WriteString(re.Name)
- b.WriteByte(':')
- }
- dumpRegexp(b, re.Sub[0])
- case OpCharClass:
- sep := ""
- for i := 0; i < len(re.Rune); i += 2 {
- b.WriteString(sep)
- sep = " "
- lo, hi := re.Rune[i], re.Rune[i+1]
- if lo == hi {
- fmt.Fprintf(b, "%#x", lo)
- } else {
- fmt.Fprintf(b, "%#x-%#x", lo, hi)
- }
- }
- }
- b.WriteByte('}')
-}
-
-func mkCharClass(f func(rune) bool) string {
- re := &Regexp{Op: OpCharClass}
- lo := rune(-1)
- for i := rune(0); i <= unicode.MaxRune; i++ {
- if f(i) {
- if lo < 0 {
- lo = i
- }
- } else {
- if lo >= 0 {
- re.Rune = append(re.Rune, lo, i-1)
- lo = -1
- }
- }
- }
- if lo >= 0 {
- re.Rune = append(re.Rune, lo, unicode.MaxRune)
- }
- return dump(re)
-}
-
-func isUpperFold(r rune) bool {
- if unicode.IsUpper(r) {
- return true
- }
- c := unicode.SimpleFold(r)
- for c != r {
- if unicode.IsUpper(c) {
- return true
- }
- c = unicode.SimpleFold(c)
- }
- return false
-}
-
-func TestFoldConstants(t *testing.T) {
- last := rune(-1)
- for i := rune(0); i <= unicode.MaxRune; i++ {
- if unicode.SimpleFold(i) == i {
- continue
- }
- if last == -1 && minFold != i {
- t.Errorf("minFold=%#U should be %#U", minFold, i)
- }
- last = i
- }
- if maxFold != last {
- t.Errorf("maxFold=%#U should be %#U", maxFold, last)
- }
-}
-
-func TestAppendRangeCollapse(t *testing.T) {
- // AppendRange should collapse each of the new ranges
- // into the earlier ones (it looks back two ranges), so that
- // the slice never grows very large.
- // Note that we are not calling cleanClass.
- var r []rune
- for i := rune('A'); i <= 'Z'; i++ {
- r = appendRange(r, i, i)
- r = appendRange(r, i+'a'-'A', i+'a'-'A')
- }
- if string(r) != "AZaz" {
- t.Errorf("appendRange interlaced A-Z a-z = %s, want AZaz", string(r))
- }
-}
-
-var invalidRegexps = []string{
- `(`,
- `)`,
- `(a`,
- `a)`,
- `(a))`,
- `(a|b|`,
- `a|b|)`,
- `(a|b|))`,
- `(a|b`,
- `a|b)`,
- `(a|b))`,
- `[a-z`,
- `([a-z)`,
- `[a-z)`,
- `([a-z]))`,
- `x{1001}`,
- `x{9876543210}`,
- `x{2,1}`,
- `x{1,9876543210}`,
- "\xff", // Invalid UTF-8
- "[\xff]",
- "[\\\xff]",
- "\\\xff",
- `(?P<name>a`,
- `(?P<name>`,
- `(?P<name`,
- `(?P<x y>a)`,
- `(?P<>a)`,
- `[a-Z]`,
- `(?i)[a-Z]`,
- `a{100000}`,
- `a{100000,}`,
-}
-
-var onlyPerl = []string{
- `[a-b-c]`,
- `\Qabc\E`,
- `\Q*+?{[\E`,
- `\Q\\E`,
- `\Q\\\E`,
- `\Q\\\\E`,
- `\Q\\\\\E`,
- `(?:a)`,
- `(?P<name>a)`,
-}
-
-var onlyPOSIX = []string{
- "a++",
- "a**",
- "a?*",
- "a+*",
- "a{1}*",
- ".{1}{2}.{3}",
-}
-
-func TestParseInvalidRegexps(t *testing.T) {
- for _, regexp := range invalidRegexps {
- if re, err := Parse(regexp, Perl); err == nil {
- t.Errorf("Parse(%#q, Perl) = %s, should have failed", regexp, dump(re))
- }
- if re, err := Parse(regexp, POSIX); err == nil {
- t.Errorf("Parse(%#q, POSIX) = %s, should have failed", regexp, dump(re))
- }
- }
- for _, regexp := range onlyPerl {
- if _, err := Parse(regexp, Perl); err != nil {
- t.Errorf("Parse(%#q, Perl): %v", regexp, err)
- }
- if re, err := Parse(regexp, POSIX); err == nil {
- t.Errorf("Parse(%#q, POSIX) = %s, should have failed", regexp, dump(re))
- }
- }
- for _, regexp := range onlyPOSIX {
- if re, err := Parse(regexp, Perl); err == nil {
- t.Errorf("Parse(%#q, Perl) = %s, should have failed", regexp, dump(re))
- }
- if _, err := Parse(regexp, POSIX); err != nil {
- t.Errorf("Parse(%#q, POSIX): %v", regexp, err)
- }
- }
-}
-
-func TestToStringEquivalentParse(t *testing.T) {
- for _, tt := range parseTests {
- re, err := Parse(tt.Regexp, testFlags)
- if err != nil {
- t.Errorf("Parse(%#q): %v", tt.Regexp, err)
- continue
- }
- d := dump(re)
- if d != tt.Dump {
- t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump)
- continue
- }
-
- s := re.String()
- if s != tt.Regexp {
- // If ToString didn't return the original regexp,
- // it must have found one with fewer parens.
- // Unfortunately we can't check the length here, because
- // ToString produces "\\{" for a literal brace,
- // but "{" is a shorter equivalent in some contexts.
- nre, err := Parse(s, testFlags)
- if err != nil {
- t.Errorf("Parse(%#q.String() = %#q): %v", tt.Regexp, s, err)
- continue
- }
- nd := dump(nre)
- if d != nd {
- t.Errorf("Parse(%#q) -> %#q; %#q vs %#q", tt.Regexp, s, d, nd)
- }
-
- ns := nre.String()
- if s != ns {
- t.Errorf("Parse(%#q) -> %#q -> %#q", tt.Regexp, s, ns)
- }
- }
- }
-}
diff --git a/src/pkg/regexp/syntax/perl_groups.go b/src/pkg/regexp/syntax/perl_groups.go
deleted file mode 100644
index effe4e686..000000000
--- a/src/pkg/regexp/syntax/perl_groups.go
+++ /dev/null
@@ -1,134 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// GENERATED BY make_perl_groups.pl; DO NOT EDIT.
-// make_perl_groups.pl >perl_groups.go
-
-package syntax
-
-var code1 = []rune{ /* \d */
- 0x30, 0x39,
-}
-
-var code2 = []rune{ /* \s */
- 0x9, 0xa,
- 0xc, 0xd,
- 0x20, 0x20,
-}
-
-var code3 = []rune{ /* \w */
- 0x30, 0x39,
- 0x41, 0x5a,
- 0x5f, 0x5f,
- 0x61, 0x7a,
-}
-
-var perlGroup = map[string]charGroup{
- `\d`: {+1, code1},
- `\D`: {-1, code1},
- `\s`: {+1, code2},
- `\S`: {-1, code2},
- `\w`: {+1, code3},
- `\W`: {-1, code3},
-}
-var code4 = []rune{ /* [:alnum:] */
- 0x30, 0x39,
- 0x41, 0x5a,
- 0x61, 0x7a,
-}
-
-var code5 = []rune{ /* [:alpha:] */
- 0x41, 0x5a,
- 0x61, 0x7a,
-}
-
-var code6 = []rune{ /* [:ascii:] */
- 0x0, 0x7f,
-}
-
-var code7 = []rune{ /* [:blank:] */
- 0x9, 0x9,
- 0x20, 0x20,
-}
-
-var code8 = []rune{ /* [:cntrl:] */
- 0x0, 0x1f,
- 0x7f, 0x7f,
-}
-
-var code9 = []rune{ /* [:digit:] */
- 0x30, 0x39,
-}
-
-var code10 = []rune{ /* [:graph:] */
- 0x21, 0x7e,
-}
-
-var code11 = []rune{ /* [:lower:] */
- 0x61, 0x7a,
-}
-
-var code12 = []rune{ /* [:print:] */
- 0x20, 0x7e,
-}
-
-var code13 = []rune{ /* [:punct:] */
- 0x21, 0x2f,
- 0x3a, 0x40,
- 0x5b, 0x60,
- 0x7b, 0x7e,
-}
-
-var code14 = []rune{ /* [:space:] */
- 0x9, 0xd,
- 0x20, 0x20,
-}
-
-var code15 = []rune{ /* [:upper:] */
- 0x41, 0x5a,
-}
-
-var code16 = []rune{ /* [:word:] */
- 0x30, 0x39,
- 0x41, 0x5a,
- 0x5f, 0x5f,
- 0x61, 0x7a,
-}
-
-var code17 = []rune{ /* [:xdigit:] */
- 0x30, 0x39,
- 0x41, 0x46,
- 0x61, 0x66,
-}
-
-var posixGroup = map[string]charGroup{
- `[:alnum:]`: {+1, code4},
- `[:^alnum:]`: {-1, code4},
- `[:alpha:]`: {+1, code5},
- `[:^alpha:]`: {-1, code5},
- `[:ascii:]`: {+1, code6},
- `[:^ascii:]`: {-1, code6},
- `[:blank:]`: {+1, code7},
- `[:^blank:]`: {-1, code7},
- `[:cntrl:]`: {+1, code8},
- `[:^cntrl:]`: {-1, code8},
- `[:digit:]`: {+1, code9},
- `[:^digit:]`: {-1, code9},
- `[:graph:]`: {+1, code10},
- `[:^graph:]`: {-1, code10},
- `[:lower:]`: {+1, code11},
- `[:^lower:]`: {-1, code11},
- `[:print:]`: {+1, code12},
- `[:^print:]`: {-1, code12},
- `[:punct:]`: {+1, code13},
- `[:^punct:]`: {-1, code13},
- `[:space:]`: {+1, code14},
- `[:^space:]`: {-1, code14},
- `[:upper:]`: {+1, code15},
- `[:^upper:]`: {-1, code15},
- `[:word:]`: {+1, code16},
- `[:^word:]`: {-1, code16},
- `[:xdigit:]`: {+1, code17},
- `[:^xdigit:]`: {-1, code17},
-}
diff --git a/src/pkg/regexp/syntax/prog.go b/src/pkg/regexp/syntax/prog.go
deleted file mode 100644
index 29bd282d0..000000000
--- a/src/pkg/regexp/syntax/prog.go
+++ /dev/null
@@ -1,345 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package syntax
-
-import (
- "bytes"
- "strconv"
- "unicode"
-)
-
-// Compiled program.
-// May not belong in this package, but convenient for now.
-
-// A Prog is a compiled regular expression program.
-type Prog struct {
- Inst []Inst
- Start int // index of start instruction
- NumCap int // number of InstCapture insts in re
-}
-
-// An InstOp is an instruction opcode.
-type InstOp uint8
-
-const (
- InstAlt InstOp = iota
- InstAltMatch
- InstCapture
- InstEmptyWidth
- InstMatch
- InstFail
- InstNop
- InstRune
- InstRune1
- InstRuneAny
- InstRuneAnyNotNL
-)
-
-var instOpNames = []string{
- "InstAlt",
- "InstAltMatch",
- "InstCapture",
- "InstEmptyWidth",
- "InstMatch",
- "InstFail",
- "InstNop",
- "InstRune",
- "InstRune1",
- "InstRuneAny",
- "InstRuneAnyNotNL",
-}
-
-func (i InstOp) String() string {
- if uint(i) >= uint(len(instOpNames)) {
- return ""
- }
- return instOpNames[i]
-}
-
-// An EmptyOp specifies a kind or mixture of zero-width assertions.
-type EmptyOp uint8
-
-const (
- EmptyBeginLine EmptyOp = 1 << iota
- EmptyEndLine
- EmptyBeginText
- EmptyEndText
- EmptyWordBoundary
- EmptyNoWordBoundary
-)
-
-// EmptyOpContext returns the zero-width assertions
-// satisfied at the position between the runes r1 and r2.
-// Passing r1 == -1 indicates that the position is
-// at the beginning of the text.
-// Passing r2 == -1 indicates that the position is
-// at the end of the text.
-func EmptyOpContext(r1, r2 rune) EmptyOp {
- var op EmptyOp = EmptyNoWordBoundary
- var boundary byte
- switch {
- case IsWordChar(r1):
- boundary = 1
- case r1 == '\n':
- op |= EmptyBeginLine
- case r1 < 0:
- op |= EmptyBeginText | EmptyBeginLine
- }
- switch {
- case IsWordChar(r2):
- boundary ^= 1
- case r2 == '\n':
- op |= EmptyEndLine
- case r2 < 0:
- op |= EmptyEndText | EmptyEndLine
- }
- if boundary != 0 { // IsWordChar(r1) != IsWordChar(r2)
- op ^= (EmptyWordBoundary | EmptyNoWordBoundary)
- }
- return op
-}
-
-// IsWordChar reports whether r is consider a ``word character''
-// during the evaluation of the \b and \B zero-width assertions.
-// These assertions are ASCII-only: the word characters are [A-Za-z0-9_].
-func IsWordChar(r rune) bool {
- return 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' || '0' <= r && r <= '9' || r == '_'
-}
-
-// An Inst is a single instruction in a regular expression program.
-type Inst struct {
- Op InstOp
- Out uint32 // all but InstMatch, InstFail
- Arg uint32 // InstAlt, InstAltMatch, InstCapture, InstEmptyWidth
- Rune []rune
-}
-
-func (p *Prog) String() string {
- var b bytes.Buffer
- dumpProg(&b, p)
- return b.String()
-}
-
-// skipNop follows any no-op or capturing instructions
-// and returns the resulting pc.
-func (p *Prog) skipNop(pc uint32) (*Inst, uint32) {
- i := &p.Inst[pc]
- for i.Op == InstNop || i.Op == InstCapture {
- pc = i.Out
- i = &p.Inst[pc]
- }
- return i, pc
-}
-
-// op returns i.Op but merges all the Rune special cases into InstRune
-func (i *Inst) op() InstOp {
- op := i.Op
- switch op {
- case InstRune1, InstRuneAny, InstRuneAnyNotNL:
- op = InstRune
- }
- return op
-}
-
-// Prefix returns a literal string that all matches for the
-// regexp must start with. Complete is true if the prefix
-// is the entire match.
-func (p *Prog) Prefix() (prefix string, complete bool) {
- i, _ := p.skipNop(uint32(p.Start))
-
- // Avoid allocation of buffer if prefix is empty.
- if i.op() != InstRune || len(i.Rune) != 1 {
- return "", i.Op == InstMatch
- }
-
- // Have prefix; gather characters.
- var buf bytes.Buffer
- for i.op() == InstRune && len(i.Rune) == 1 && Flags(i.Arg)&FoldCase == 0 {
- buf.WriteRune(i.Rune[0])
- i, _ = p.skipNop(i.Out)
- }
- return buf.String(), i.Op == InstMatch
-}
-
-// StartCond returns the leading empty-width conditions that must
-// be true in any match. It returns ^EmptyOp(0) if no matches are possible.
-func (p *Prog) StartCond() EmptyOp {
- var flag EmptyOp
- pc := uint32(p.Start)
- i := &p.Inst[pc]
-Loop:
- for {
- switch i.Op {
- case InstEmptyWidth:
- flag |= EmptyOp(i.Arg)
- case InstFail:
- return ^EmptyOp(0)
- case InstCapture, InstNop:
- // skip
- default:
- break Loop
- }
- pc = i.Out
- i = &p.Inst[pc]
- }
- return flag
-}
-
-const noMatch = -1
-
-// MatchRune returns true if the instruction matches (and consumes) r.
-// It should only be called when i.Op == InstRune.
-func (i *Inst) MatchRune(r rune) bool {
- return i.MatchRunePos(r) != noMatch
-}
-
-// MatchRunePos checks whether the instruction matches (and consumes) r.
-// If so, MatchRunePos returns the index of the matching rune pair
-// (or, when len(i.Rune) == 1, rune singleton).
-// If not, MatchRunePos returns -1.
-// MatchRunePos should only be called when i.Op == InstRune.
-func (i *Inst) MatchRunePos(r rune) int {
- rune := i.Rune
-
- // Special case: single-rune slice is from literal string, not char class.
- if len(rune) == 1 {
- r0 := rune[0]
- if r == r0 {
- return 0
- }
- if Flags(i.Arg)&FoldCase != 0 {
- for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
- if r == r1 {
- return 0
- }
- }
- }
- return noMatch
- }
-
- // Peek at the first few pairs.
- // Should handle ASCII well.
- for j := 0; j < len(rune) && j <= 8; j += 2 {
- if r < rune[j] {
- return noMatch
- }
- if r <= rune[j+1] {
- return j / 2
- }
- }
-
- // Otherwise binary search.
- lo := 0
- hi := len(rune) / 2
- for lo < hi {
- m := lo + (hi-lo)/2
- if c := rune[2*m]; c <= r {
- if r <= rune[2*m+1] {
- return m
- }
- lo = m + 1
- } else {
- hi = m
- }
- }
- return noMatch
-}
-
-// As per re2's Prog::IsWordChar. Determines whether rune is an ASCII word char.
-// Since we act on runes, it would be easy to support Unicode here.
-func wordRune(r rune) bool {
- return r == '_' ||
- ('A' <= r && r <= 'Z') ||
- ('a' <= r && r <= 'z') ||
- ('0' <= r && r <= '9')
-}
-
-// MatchEmptyWidth returns true if the instruction matches
-// an empty string between the runes before and after.
-// It should only be called when i.Op == InstEmptyWidth.
-func (i *Inst) MatchEmptyWidth(before rune, after rune) bool {
- switch EmptyOp(i.Arg) {
- case EmptyBeginLine:
- return before == '\n' || before == -1
- case EmptyEndLine:
- return after == '\n' || after == -1
- case EmptyBeginText:
- return before == -1
- case EmptyEndText:
- return after == -1
- case EmptyWordBoundary:
- return wordRune(before) != wordRune(after)
- case EmptyNoWordBoundary:
- return wordRune(before) == wordRune(after)
- }
- panic("unknown empty width arg")
-}
-
-func (i *Inst) String() string {
- var b bytes.Buffer
- dumpInst(&b, i)
- return b.String()
-}
-
-func bw(b *bytes.Buffer, args ...string) {
- for _, s := range args {
- b.WriteString(s)
- }
-}
-
-func dumpProg(b *bytes.Buffer, p *Prog) {
- for j := range p.Inst {
- i := &p.Inst[j]
- pc := strconv.Itoa(j)
- if len(pc) < 3 {
- b.WriteString(" "[len(pc):])
- }
- if j == p.Start {
- pc += "*"
- }
- bw(b, pc, "\t")
- dumpInst(b, i)
- bw(b, "\n")
- }
-}
-
-func u32(i uint32) string {
- return strconv.FormatUint(uint64(i), 10)
-}
-
-func dumpInst(b *bytes.Buffer, i *Inst) {
- switch i.Op {
- case InstAlt:
- bw(b, "alt -> ", u32(i.Out), ", ", u32(i.Arg))
- case InstAltMatch:
- bw(b, "altmatch -> ", u32(i.Out), ", ", u32(i.Arg))
- case InstCapture:
- bw(b, "cap ", u32(i.Arg), " -> ", u32(i.Out))
- case InstEmptyWidth:
- bw(b, "empty ", u32(i.Arg), " -> ", u32(i.Out))
- case InstMatch:
- bw(b, "match")
- case InstFail:
- bw(b, "fail")
- case InstNop:
- bw(b, "nop -> ", u32(i.Out))
- case InstRune:
- if i.Rune == nil {
- // shouldn't happen
- bw(b, "rune <nil>")
- }
- bw(b, "rune ", strconv.QuoteToASCII(string(i.Rune)))
- if Flags(i.Arg)&FoldCase != 0 {
- bw(b, "/i")
- }
- bw(b, " -> ", u32(i.Out))
- case InstRune1:
- bw(b, "rune1 ", strconv.QuoteToASCII(string(i.Rune)), " -> ", u32(i.Out))
- case InstRuneAny:
- bw(b, "any -> ", u32(i.Out))
- case InstRuneAnyNotNL:
- bw(b, "anynotnl -> ", u32(i.Out))
- }
-}
diff --git a/src/pkg/regexp/syntax/prog_test.go b/src/pkg/regexp/syntax/prog_test.go
deleted file mode 100644
index 50bfa3d4b..000000000
--- a/src/pkg/regexp/syntax/prog_test.go
+++ /dev/null
@@ -1,114 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package syntax
-
-import "testing"
-
-var compileTests = []struct {
- Regexp string
- Prog string
-}{
- {"a", ` 0 fail
- 1* rune1 "a" -> 2
- 2 match
-`},
- {"[A-M][n-z]", ` 0 fail
- 1* rune "AM" -> 2
- 2 rune "nz" -> 3
- 3 match
-`},
- {"", ` 0 fail
- 1* nop -> 2
- 2 match
-`},
- {"a?", ` 0 fail
- 1 rune1 "a" -> 3
- 2* alt -> 1, 3
- 3 match
-`},
- {"a??", ` 0 fail
- 1 rune1 "a" -> 3
- 2* alt -> 3, 1
- 3 match
-`},
- {"a+", ` 0 fail
- 1* rune1 "a" -> 2
- 2 alt -> 1, 3
- 3 match
-`},
- {"a+?", ` 0 fail
- 1* rune1 "a" -> 2
- 2 alt -> 3, 1
- 3 match
-`},
- {"a*", ` 0 fail
- 1 rune1 "a" -> 2
- 2* alt -> 1, 3
- 3 match
-`},
- {"a*?", ` 0 fail
- 1 rune1 "a" -> 2
- 2* alt -> 3, 1
- 3 match
-`},
- {"a+b+", ` 0 fail
- 1* rune1 "a" -> 2
- 2 alt -> 1, 3
- 3 rune1 "b" -> 4
- 4 alt -> 3, 5
- 5 match
-`},
- {"(a+)(b+)", ` 0 fail
- 1* cap 2 -> 2
- 2 rune1 "a" -> 3
- 3 alt -> 2, 4
- 4 cap 3 -> 5
- 5 cap 4 -> 6
- 6 rune1 "b" -> 7
- 7 alt -> 6, 8
- 8 cap 5 -> 9
- 9 match
-`},
- {"a+|b+", ` 0 fail
- 1 rune1 "a" -> 2
- 2 alt -> 1, 6
- 3 rune1 "b" -> 4
- 4 alt -> 3, 6
- 5* alt -> 1, 3
- 6 match
-`},
- {"A[Aa]", ` 0 fail
- 1* rune1 "A" -> 2
- 2 rune "A"/i -> 3
- 3 match
-`},
- {"(?:(?:^).)", ` 0 fail
- 1* empty 4 -> 2
- 2 anynotnl -> 3
- 3 match
-`},
-}
-
-func TestCompile(t *testing.T) {
- for _, tt := range compileTests {
- re, _ := Parse(tt.Regexp, Perl)
- p, _ := Compile(re)
- s := p.String()
- if s != tt.Prog {
- t.Errorf("compiled %#q:\n--- have\n%s---\n--- want\n%s---", tt.Regexp, s, tt.Prog)
- }
- }
-}
-
-func BenchmarkEmptyOpContext(b *testing.B) {
- for i := 0; i < b.N; i++ {
- var r1 rune = -1
- for _, r2 := range "foo, bar, baz\nsome input text.\n" {
- EmptyOpContext(r1, r2)
- r1 = r2
- }
- EmptyOpContext(r1, -1)
- }
-}
diff --git a/src/pkg/regexp/syntax/regexp.go b/src/pkg/regexp/syntax/regexp.go
deleted file mode 100644
index 329a90e01..000000000
--- a/src/pkg/regexp/syntax/regexp.go
+++ /dev/null
@@ -1,319 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package syntax
-
-// Note to implementers:
-// In this package, re is always a *Regexp and r is always a rune.
-
-import (
- "bytes"
- "strconv"
- "strings"
- "unicode"
-)
-
-// A Regexp is a node in a regular expression syntax tree.
-type Regexp struct {
- Op Op // operator
- Flags Flags
- Sub []*Regexp // subexpressions, if any
- Sub0 [1]*Regexp // storage for short Sub
- Rune []rune // matched runes, for OpLiteral, OpCharClass
- Rune0 [2]rune // storage for short Rune
- Min, Max int // min, max for OpRepeat
- Cap int // capturing index, for OpCapture
- Name string // capturing name, for OpCapture
-}
-
-// An Op is a single regular expression operator.
-type Op uint8
-
-// Operators are listed in precedence order, tightest binding to weakest.
-// Character class operators are listed simplest to most complex
-// (OpLiteral, OpCharClass, OpAnyCharNotNL, OpAnyChar).
-
-const (
- OpNoMatch Op = 1 + iota // matches no strings
- OpEmptyMatch // matches empty string
- OpLiteral // matches Runes sequence
- OpCharClass // matches Runes interpreted as range pair list
- OpAnyCharNotNL // matches any character
- OpAnyChar // matches any character
- OpBeginLine // matches empty string at beginning of line
- OpEndLine // matches empty string at end of line
- OpBeginText // matches empty string at beginning of text
- OpEndText // matches empty string at end of text
- OpWordBoundary // matches word boundary `\b`
- OpNoWordBoundary // matches word non-boundary `\B`
- OpCapture // capturing subexpression with index Cap, optional name Name
- OpStar // matches Sub[0] zero or more times
- OpPlus // matches Sub[0] one or more times
- OpQuest // matches Sub[0] zero or one times
- OpRepeat // matches Sub[0] at least Min times, at most Max (Max == -1 is no limit)
- OpConcat // matches concatenation of Subs
- OpAlternate // matches alternation of Subs
-)
-
-const opPseudo Op = 128 // where pseudo-ops start
-
-// Equal returns true if x and y have identical structure.
-func (x *Regexp) Equal(y *Regexp) bool {
- if x == nil || y == nil {
- return x == y
- }
- if x.Op != y.Op {
- return false
- }
- switch x.Op {
- case OpEndText:
- // The parse flags remember whether this is \z or \Z.
- if x.Flags&WasDollar != y.Flags&WasDollar {
- return false
- }
-
- case OpLiteral, OpCharClass:
- if len(x.Rune) != len(y.Rune) {
- return false
- }
- for i, r := range x.Rune {
- if r != y.Rune[i] {
- return false
- }
- }
-
- case OpAlternate, OpConcat:
- if len(x.Sub) != len(y.Sub) {
- return false
- }
- for i, sub := range x.Sub {
- if !sub.Equal(y.Sub[i]) {
- return false
- }
- }
-
- case OpStar, OpPlus, OpQuest:
- if x.Flags&NonGreedy != y.Flags&NonGreedy || !x.Sub[0].Equal(y.Sub[0]) {
- return false
- }
-
- case OpRepeat:
- if x.Flags&NonGreedy != y.Flags&NonGreedy || x.Min != y.Min || x.Max != y.Max || !x.Sub[0].Equal(y.Sub[0]) {
- return false
- }
-
- case OpCapture:
- if x.Cap != y.Cap || x.Name != y.Name || !x.Sub[0].Equal(y.Sub[0]) {
- return false
- }
- }
- return true
-}
-
-// writeRegexp writes the Perl syntax for the regular expression re to b.
-func writeRegexp(b *bytes.Buffer, re *Regexp) {
- switch re.Op {
- default:
- b.WriteString("<invalid op" + strconv.Itoa(int(re.Op)) + ">")
- case OpNoMatch:
- b.WriteString(`[^\x00-\x{10FFFF}]`)
- case OpEmptyMatch:
- b.WriteString(`(?:)`)
- case OpLiteral:
- if re.Flags&FoldCase != 0 {
- b.WriteString(`(?i:`)
- }
- for _, r := range re.Rune {
- escape(b, r, false)
- }
- if re.Flags&FoldCase != 0 {
- b.WriteString(`)`)
- }
- case OpCharClass:
- if len(re.Rune)%2 != 0 {
- b.WriteString(`[invalid char class]`)
- break
- }
- b.WriteRune('[')
- if len(re.Rune) == 0 {
- b.WriteString(`^\x00-\x{10FFFF}`)
- } else if re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune {
- // Contains 0 and MaxRune. Probably a negated class.
- // Print the gaps.
- b.WriteRune('^')
- for i := 1; i < len(re.Rune)-1; i += 2 {
- lo, hi := re.Rune[i]+1, re.Rune[i+1]-1
- escape(b, lo, lo == '-')
- if lo != hi {
- b.WriteRune('-')
- escape(b, hi, hi == '-')
- }
- }
- } else {
- for i := 0; i < len(re.Rune); i += 2 {
- lo, hi := re.Rune[i], re.Rune[i+1]
- escape(b, lo, lo == '-')
- if lo != hi {
- b.WriteRune('-')
- escape(b, hi, hi == '-')
- }
- }
- }
- b.WriteRune(']')
- case OpAnyCharNotNL:
- b.WriteString(`(?-s:.)`)
- case OpAnyChar:
- b.WriteString(`(?s:.)`)
- case OpBeginLine:
- b.WriteRune('^')
- case OpEndLine:
- b.WriteRune('$')
- case OpBeginText:
- b.WriteString(`\A`)
- case OpEndText:
- if re.Flags&WasDollar != 0 {
- b.WriteString(`(?-m:$)`)
- } else {
- b.WriteString(`\z`)
- }
- case OpWordBoundary:
- b.WriteString(`\b`)
- case OpNoWordBoundary:
- b.WriteString(`\B`)
- case OpCapture:
- if re.Name != "" {
- b.WriteString(`(?P<`)
- b.WriteString(re.Name)
- b.WriteRune('>')
- } else {
- b.WriteRune('(')
- }
- if re.Sub[0].Op != OpEmptyMatch {
- writeRegexp(b, re.Sub[0])
- }
- b.WriteRune(')')
- case OpStar, OpPlus, OpQuest, OpRepeat:
- if sub := re.Sub[0]; sub.Op > OpCapture || sub.Op == OpLiteral && len(sub.Rune) > 1 {
- b.WriteString(`(?:`)
- writeRegexp(b, sub)
- b.WriteString(`)`)
- } else {
- writeRegexp(b, sub)
- }
- switch re.Op {
- case OpStar:
- b.WriteRune('*')
- case OpPlus:
- b.WriteRune('+')
- case OpQuest:
- b.WriteRune('?')
- case OpRepeat:
- b.WriteRune('{')
- b.WriteString(strconv.Itoa(re.Min))
- if re.Max != re.Min {
- b.WriteRune(',')
- if re.Max >= 0 {
- b.WriteString(strconv.Itoa(re.Max))
- }
- }
- b.WriteRune('}')
- }
- if re.Flags&NonGreedy != 0 {
- b.WriteRune('?')
- }
- case OpConcat:
- for _, sub := range re.Sub {
- if sub.Op == OpAlternate {
- b.WriteString(`(?:`)
- writeRegexp(b, sub)
- b.WriteString(`)`)
- } else {
- writeRegexp(b, sub)
- }
- }
- case OpAlternate:
- for i, sub := range re.Sub {
- if i > 0 {
- b.WriteRune('|')
- }
- writeRegexp(b, sub)
- }
- }
-}
-
-func (re *Regexp) String() string {
- var b bytes.Buffer
- writeRegexp(&b, re)
- return b.String()
-}
-
-const meta = `\.+*?()|[]{}^$`
-
-func escape(b *bytes.Buffer, r rune, force bool) {
- if unicode.IsPrint(r) {
- if strings.IndexRune(meta, r) >= 0 || force {
- b.WriteRune('\\')
- }
- b.WriteRune(r)
- return
- }
-
- switch r {
- case '\a':
- b.WriteString(`\a`)
- case '\f':
- b.WriteString(`\f`)
- case '\n':
- b.WriteString(`\n`)
- case '\r':
- b.WriteString(`\r`)
- case '\t':
- b.WriteString(`\t`)
- case '\v':
- b.WriteString(`\v`)
- default:
- if r < 0x100 {
- b.WriteString(`\x`)
- s := strconv.FormatInt(int64(r), 16)
- if len(s) == 1 {
- b.WriteRune('0')
- }
- b.WriteString(s)
- break
- }
- b.WriteString(`\x{`)
- b.WriteString(strconv.FormatInt(int64(r), 16))
- b.WriteString(`}`)
- }
-}
-
-// MaxCap walks the regexp to find the maximum capture index.
-func (re *Regexp) MaxCap() int {
- m := 0
- if re.Op == OpCapture {
- m = re.Cap
- }
- for _, sub := range re.Sub {
- if n := sub.MaxCap(); m < n {
- m = n
- }
- }
- return m
-}
-
-// CapNames walks the regexp to find the names of capturing groups.
-func (re *Regexp) CapNames() []string {
- names := make([]string, re.MaxCap()+1)
- re.capNames(names)
- return names
-}
-
-func (re *Regexp) capNames(names []string) {
- if re.Op == OpCapture {
- names[re.Cap] = re.Name
- }
- for _, sub := range re.Sub {
- sub.capNames(names)
- }
-}
diff --git a/src/pkg/regexp/syntax/simplify.go b/src/pkg/regexp/syntax/simplify.go
deleted file mode 100644
index 72390417b..000000000
--- a/src/pkg/regexp/syntax/simplify.go
+++ /dev/null
@@ -1,151 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package syntax
-
-// Simplify returns a regexp equivalent to re but without counted repetitions
-// and with various other simplifications, such as rewriting /(?:a+)+/ to /a+/.
-// The resulting regexp will execute correctly but its string representation
-// will not produce the same parse tree, because capturing parentheses
-// may have been duplicated or removed. For example, the simplified form
-// for /(x){1,2}/ is /(x)(x)?/ but both parentheses capture as $1.
-// The returned regexp may share structure with or be the original.
-func (re *Regexp) Simplify() *Regexp {
- if re == nil {
- return nil
- }
- switch re.Op {
- case OpCapture, OpConcat, OpAlternate:
- // Simplify children, building new Regexp if children change.
- nre := re
- for i, sub := range re.Sub {
- nsub := sub.Simplify()
- if nre == re && nsub != sub {
- // Start a copy.
- nre = new(Regexp)
- *nre = *re
- nre.Rune = nil
- nre.Sub = append(nre.Sub0[:0], re.Sub[:i]...)
- }
- if nre != re {
- nre.Sub = append(nre.Sub, nsub)
- }
- }
- return nre
-
- case OpStar, OpPlus, OpQuest:
- sub := re.Sub[0].Simplify()
- return simplify1(re.Op, re.Flags, sub, re)
-
- case OpRepeat:
- // Special special case: x{0} matches the empty string
- // and doesn't even need to consider x.
- if re.Min == 0 && re.Max == 0 {
- return &Regexp{Op: OpEmptyMatch}
- }
-
- // The fun begins.
- sub := re.Sub[0].Simplify()
-
- // x{n,} means at least n matches of x.
- if re.Max == -1 {
- // Special case: x{0,} is x*.
- if re.Min == 0 {
- return simplify1(OpStar, re.Flags, sub, nil)
- }
-
- // Special case: x{1,} is x+.
- if re.Min == 1 {
- return simplify1(OpPlus, re.Flags, sub, nil)
- }
-
- // General case: x{4,} is xxxx+.
- nre := &Regexp{Op: OpConcat}
- nre.Sub = nre.Sub0[:0]
- for i := 0; i < re.Min-1; i++ {
- nre.Sub = append(nre.Sub, sub)
- }
- nre.Sub = append(nre.Sub, simplify1(OpPlus, re.Flags, sub, nil))
- return nre
- }
-
- // Special case x{0} handled above.
-
- // Special case: x{1} is just x.
- if re.Min == 1 && re.Max == 1 {
- return sub
- }
-
- // General case: x{n,m} means n copies of x and m copies of x?
- // The machine will do less work if we nest the final m copies,
- // so that x{2,5} = xx(x(x(x)?)?)?
-
- // Build leading prefix: xx.
- var prefix *Regexp
- if re.Min > 0 {
- prefix = &Regexp{Op: OpConcat}
- prefix.Sub = prefix.Sub0[:0]
- for i := 0; i < re.Min; i++ {
- prefix.Sub = append(prefix.Sub, sub)
- }
- }
-
- // Build and attach suffix: (x(x(x)?)?)?
- if re.Max > re.Min {
- suffix := simplify1(OpQuest, re.Flags, sub, nil)
- for i := re.Min + 1; i < re.Max; i++ {
- nre2 := &Regexp{Op: OpConcat}
- nre2.Sub = append(nre2.Sub0[:0], sub, suffix)
- suffix = simplify1(OpQuest, re.Flags, nre2, nil)
- }
- if prefix == nil {
- return suffix
- }
- prefix.Sub = append(prefix.Sub, suffix)
- }
- if prefix != nil {
- return prefix
- }
-
- // Some degenerate case like min > max or min < max < 0.
- // Handle as impossible match.
- return &Regexp{Op: OpNoMatch}
- }
-
- return re
-}
-
-// simplify1 implements Simplify for the unary OpStar,
-// OpPlus, and OpQuest operators. It returns the simple regexp
-// equivalent to
-//
-// Regexp{Op: op, Flags: flags, Sub: {sub}}
-//
-// under the assumption that sub is already simple, and
-// without first allocating that structure. If the regexp
-// to be returned turns out to be equivalent to re, simplify1
-// returns re instead.
-//
-// simplify1 is factored out of Simplify because the implementation
-// for other operators generates these unary expressions.
-// Letting them call simplify1 makes sure the expressions they
-// generate are simple.
-func simplify1(op Op, flags Flags, sub, re *Regexp) *Regexp {
- // Special case: repeat the empty string as much as
- // you want, but it's still the empty string.
- if sub.Op == OpEmptyMatch {
- return sub
- }
- // The operators are idempotent if the flags match.
- if op == sub.Op && flags&NonGreedy == sub.Flags&NonGreedy {
- return sub
- }
- if re != nil && re.Op == op && re.Flags&NonGreedy == flags&NonGreedy && sub == re.Sub[0] {
- return re
- }
-
- re = &Regexp{Op: op, Flags: flags}
- re.Sub = append(re.Sub0[:0], sub)
- return re
-}
diff --git a/src/pkg/regexp/syntax/simplify_test.go b/src/pkg/regexp/syntax/simplify_test.go
deleted file mode 100644
index 879eff5be..000000000
--- a/src/pkg/regexp/syntax/simplify_test.go
+++ /dev/null
@@ -1,151 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package syntax
-
-import "testing"
-
-var simplifyTests = []struct {
- Regexp string
- Simple string
-}{
- // Already-simple constructs
- {`a`, `a`},
- {`ab`, `ab`},
- {`a|b`, `[a-b]`},
- {`ab|cd`, `ab|cd`},
- {`(ab)*`, `(ab)*`},
- {`(ab)+`, `(ab)+`},
- {`(ab)?`, `(ab)?`},
- {`.`, `(?s:.)`},
- {`^`, `^`},
- {`$`, `$`},
- {`[ac]`, `[ac]`},
- {`[^ac]`, `[^ac]`},
-
- // Posix character classes
- {`[[:alnum:]]`, `[0-9A-Za-z]`},
- {`[[:alpha:]]`, `[A-Za-z]`},
- {`[[:blank:]]`, `[\t ]`},
- {`[[:cntrl:]]`, `[\x00-\x1f\x7f]`},
- {`[[:digit:]]`, `[0-9]`},
- {`[[:graph:]]`, `[!-~]`},
- {`[[:lower:]]`, `[a-z]`},
- {`[[:print:]]`, `[ -~]`},
- {`[[:punct:]]`, "[!-/:-@\\[-`\\{-~]"},
- {`[[:space:]]`, `[\t-\r ]`},
- {`[[:upper:]]`, `[A-Z]`},
- {`[[:xdigit:]]`, `[0-9A-Fa-f]`},
-
- // Perl character classes
- {`\d`, `[0-9]`},
- {`\s`, `[\t-\n\f-\r ]`},
- {`\w`, `[0-9A-Z_a-z]`},
- {`\D`, `[^0-9]`},
- {`\S`, `[^\t-\n\f-\r ]`},
- {`\W`, `[^0-9A-Z_a-z]`},
- {`[\d]`, `[0-9]`},
- {`[\s]`, `[\t-\n\f-\r ]`},
- {`[\w]`, `[0-9A-Z_a-z]`},
- {`[\D]`, `[^0-9]`},
- {`[\S]`, `[^\t-\n\f-\r ]`},
- {`[\W]`, `[^0-9A-Z_a-z]`},
-
- // Posix repetitions
- {`a{1}`, `a`},
- {`a{2}`, `aa`},
- {`a{5}`, `aaaaa`},
- {`a{0,1}`, `a?`},
- // The next three are illegible because Simplify inserts (?:)
- // parens instead of () parens to avoid creating extra
- // captured subexpressions. The comments show a version with fewer parens.
- {`(a){0,2}`, `(?:(a)(a)?)?`}, // (aa?)?
- {`(a){0,4}`, `(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // (a(a(aa?)?)?)?
- {`(a){2,6}`, `(a)(a)(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // aa(a(a(aa?)?)?)?
- {`a{0,2}`, `(?:aa?)?`}, // (aa?)?
- {`a{0,4}`, `(?:a(?:a(?:aa?)?)?)?`}, // (a(a(aa?)?)?)?
- {`a{2,6}`, `aa(?:a(?:a(?:aa?)?)?)?`}, // aa(a(a(aa?)?)?)?
- {`a{0,}`, `a*`},
- {`a{1,}`, `a+`},
- {`a{2,}`, `aa+`},
- {`a{5,}`, `aaaaa+`},
-
- // Test that operators simplify their arguments.
- {`(?:a{1,}){1,}`, `a+`},
- {`(a{1,}b{1,})`, `(a+b+)`},
- {`a{1,}|b{1,}`, `a+|b+`},
- {`(?:a{1,})*`, `(?:a+)*`},
- {`(?:a{1,})+`, `a+`},
- {`(?:a{1,})?`, `(?:a+)?`},
- {``, `(?:)`},
- {`a{0}`, `(?:)`},
-
- // Character class simplification
- {`[ab]`, `[a-b]`},
- {`[a-za-za-z]`, `[a-z]`},
- {`[A-Za-zA-Za-z]`, `[A-Za-z]`},
- {`[ABCDEFGH]`, `[A-H]`},
- {`[AB-CD-EF-GH]`, `[A-H]`},
- {`[W-ZP-XE-R]`, `[E-Z]`},
- {`[a-ee-gg-m]`, `[a-m]`},
- {`[a-ea-ha-m]`, `[a-m]`},
- {`[a-ma-ha-e]`, `[a-m]`},
- {`[a-zA-Z0-9 -~]`, `[ -~]`},
-
- // Empty character classes
- {`[^[:cntrl:][:^cntrl:]]`, `[^\x00-\x{10FFFF}]`},
-
- // Full character classes
- {`[[:cntrl:][:^cntrl:]]`, `(?s:.)`},
-
- // Unicode case folding.
- {`(?i)A`, `(?i:A)`},
- {`(?i)a`, `(?i:A)`},
- {`(?i)[A]`, `(?i:A)`},
- {`(?i)[a]`, `(?i:A)`},
- {`(?i)K`, `(?i:K)`},
- {`(?i)k`, `(?i:K)`},
- {`(?i)\x{212a}`, "(?i:K)"},
- {`(?i)[K]`, "[Kk\u212A]"},
- {`(?i)[k]`, "[Kk\u212A]"},
- {`(?i)[\x{212a}]`, "[Kk\u212A]"},
- {`(?i)[a-z]`, "[A-Za-z\u017F\u212A]"},
- {`(?i)[\x00-\x{FFFD}]`, "[\\x00-\uFFFD]"},
- {`(?i)[\x00-\x{10FFFF}]`, `(?s:.)`},
-
- // Empty string as a regular expression.
- // The empty string must be preserved inside parens in order
- // to make submatches work right, so these tests are less
- // interesting than they might otherwise be. String inserts
- // explicit (?:) in place of non-parenthesized empty strings,
- // to make them easier to spot for other parsers.
- {`(a|b|)`, `([a-b]|(?:))`},
- {`(|)`, `()`},
- {`a()`, `a()`},
- {`(()|())`, `(()|())`},
- {`(a|)`, `(a|(?:))`},
- {`ab()cd()`, `ab()cd()`},
- {`()`, `()`},
- {`()*`, `()*`},
- {`()+`, `()+`},
- {`()?`, `()?`},
- {`(){0}`, `(?:)`},
- {`(){1}`, `()`},
- {`(){1,}`, `()+`},
- {`(){0,2}`, `(?:()()?)?`},
-}
-
-func TestSimplify(t *testing.T) {
- for _, tt := range simplifyTests {
- re, err := Parse(tt.Regexp, MatchNL|Perl&^OneLine)
- if err != nil {
- t.Errorf("Parse(%#q) = error %v", tt.Regexp, err)
- continue
- }
- s := re.Simplify().String()
- if s != tt.Simple {
- t.Errorf("Simplify(%#q) = %#q, want %#q", tt.Regexp, s, tt.Simple)
- }
- }
-}
diff --git a/src/pkg/regexp/testdata/README b/src/pkg/regexp/testdata/README
deleted file mode 100644
index b1b301be8..000000000
--- a/src/pkg/regexp/testdata/README
+++ /dev/null
@@ -1,23 +0,0 @@
-AT&T POSIX Test Files
-See textregex.c for copyright + license.
-
-testregex.c http://www2.research.att.com/~gsf/testregex/testregex.c
-basic.dat http://www2.research.att.com/~gsf/testregex/basic.dat
-nullsubexpr.dat http://www2.research.att.com/~gsf/testregex/nullsubexpr.dat
-repetition.dat http://www2.research.att.com/~gsf/testregex/repetition.dat
-
-The test data has been edited to reflect RE2/Go differences:
- * In a star of a possibly empty match like (a*)* matching x,
- the no match case runs the starred subexpression zero times,
- not once. This is consistent with (a*)* matching a, which
- runs the starred subexpression one time, not twice.
- * The submatch choice is first match, not the POSIX rule.
-
-Such changes are marked with 'RE2/Go'.
-
-
-RE2 Test Files
-
-re2-exhaustive.txt.bz2 and re2-search.txt are built by running
-'make log' in the RE2 distribution. http://code.google.com/p/re2/.
-The exhaustive file is compressed because it is huge.
diff --git a/src/pkg/regexp/testdata/basic.dat b/src/pkg/regexp/testdata/basic.dat
deleted file mode 100644
index 7859290ba..000000000
--- a/src/pkg/regexp/testdata/basic.dat
+++ /dev/null
@@ -1,221 +0,0 @@
-NOTE all standard compliant implementations should pass these : 2002-05-31
-
-BE abracadabra$ abracadabracadabra (7,18)
-BE a...b abababbb (2,7)
-BE XXXXXX ..XXXXXX (2,8)
-E \) () (1,2)
-BE a] a]a (0,2)
-B } } (0,1)
-E \} } (0,1)
-BE \] ] (0,1)
-B ] ] (0,1)
-E ] ] (0,1)
-B { { (0,1)
-B } } (0,1)
-BE ^a ax (0,1)
-BE \^a a^a (1,3)
-BE a\^ a^ (0,2)
-BE a$ aa (1,2)
-BE a\$ a$ (0,2)
-BE ^$ NULL (0,0)
-E $^ NULL (0,0)
-E a($) aa (1,2)(2,2)
-E a*(^a) aa (0,1)(0,1)
-E (..)*(...)* a (0,0)
-E (..)*(...)* abcd (0,4)(2,4)
-E (ab|a)(bc|c) abc (0,3)(0,2)(2,3)
-E (ab)c|abc abc (0,3)(0,2)
-E a{0}b ab (1,2)
-E (a*)(b?)(b+)b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7)
-E (a*)(b{0,1})(b{1,})b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7)
-E a{9876543210} NULL BADBR
-E ((a|a)|a) a (0,1)(0,1)(0,1)
-E (a*)(a|aa) aaaa (0,4)(0,3)(3,4)
-E a*(a.|aa) aaaa (0,4)(2,4)
-E a(b)|c(d)|a(e)f aef (0,3)(?,?)(?,?)(1,2)
-E (a|b)?.* b (0,1)(0,1)
-E (a|b)c|a(b|c) ac (0,2)(0,1)
-E (a|b)c|a(b|c) ab (0,2)(?,?)(1,2)
-E (a|b)*c|(a|ab)*c abc (0,3)(1,2)
-E (a|b)*c|(a|ab)*c xc (1,2)
-E (.a|.b).*|.*(.a|.b) xa (0,2)(0,2)
-E a?(ab|ba)ab abab (0,4)(0,2)
-E a?(ac{0}b|ba)ab abab (0,4)(0,2)
-E ab|abab abbabab (0,2)
-E aba|bab|bba baaabbbaba (5,8)
-E aba|bab baaabbbaba (6,9)
-E (aa|aaa)*|(a|aaaaa) aa (0,2)(0,2)
-E (a.|.a.)*|(a|.a...) aa (0,2)(0,2)
-E ab|a xabc (1,3)
-E ab|a xxabc (2,4)
-Ei (Ab|cD)* aBcD (0,4)(2,4)
-BE [^-] --a (2,3)
-BE [a-]* --a (0,3)
-BE [a-m-]* --amoma-- (0,4)
-E :::1:::0:|:::1:1:0: :::0:::1:::1:::0: (8,17)
-E :::1:::0:|:::1:1:1: :::0:::1:::1:::0: (8,17)
-{E [[:upper:]] A (0,1) [[<element>]] not supported
-E [[:lower:]]+ `az{ (1,3)
-E [[:upper:]]+ @AZ[ (1,3)
-# No collation in Go
-#BE [[-]] [[-]] (2,4)
-#BE [[.NIL.]] NULL ECOLLATE
-#BE [[=aleph=]] NULL ECOLLATE
-}
-BE$ \n \n (0,1)
-BEn$ \n \n (0,1)
-BE$ [^a] \n (0,1)
-BE$ \na \na (0,2)
-E (a)(b)(c) abc (0,3)(0,1)(1,2)(2,3)
-BE xxx xxx (0,3)
-E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 6, (0,6)
-E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) 2/7 (0,3)
-E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 1,Feb 6 (5,11)
-E3 ((((((((((((((((((((((((((((((x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1)
-E3 ((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2)
-E a?(ab|ba)* ababababababababababababababababababababababababababababababababababababababababa (0,81)(79,81)
-E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabbbbaa (18,25)
-E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabaa (18,22)
-E aaac|aabc|abac|abbc|baac|babc|bbac|bbbc baaabbbabac (7,11)
-BE$ .* \x01\xff (0,2)
-E aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa (53,57)
-L aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa NOMATCH
-E a*a*a*a*a*b aaaaaaaaab (0,10)
-BE ^ NULL (0,0)
-BE $ NULL (0,0)
-BE ^$ NULL (0,0)
-BE ^a$ a (0,1)
-BE abc abc (0,3)
-BE abc xabcy (1,4)
-BE abc ababc (2,5)
-BE ab*c abc (0,3)
-BE ab*bc abc (0,3)
-BE ab*bc abbc (0,4)
-BE ab*bc abbbbc (0,6)
-E ab+bc abbc (0,4)
-E ab+bc abbbbc (0,6)
-E ab?bc abbc (0,4)
-E ab?bc abc (0,3)
-E ab?c abc (0,3)
-BE ^abc$ abc (0,3)
-BE ^abc abcc (0,3)
-BE abc$ aabc (1,4)
-BE ^ abc (0,0)
-BE $ abc (3,3)
-BE a.c abc (0,3)
-BE a.c axc (0,3)
-BE a.*c axyzc (0,5)
-BE a[bc]d abd (0,3)
-BE a[b-d]e ace (0,3)
-BE a[b-d] aac (1,3)
-BE a[-b] a- (0,2)
-BE a[b-] a- (0,2)
-BE a] a] (0,2)
-BE a[]]b a]b (0,3)
-BE a[^bc]d aed (0,3)
-BE a[^-b]c adc (0,3)
-BE a[^]b]c adc (0,3)
-E ab|cd abc (0,2)
-E ab|cd abcd (0,2)
-E a\(b a(b (0,3)
-E a\(*b ab (0,2)
-E a\(*b a((b (0,4)
-E ((a)) abc (0,1)(0,1)(0,1)
-E (a)b(c) abc (0,3)(0,1)(2,3)
-E a+b+c aabbabc (4,7)
-E a* aaa (0,3)
-#E (a*)* - (0,0)(0,0)
-E (a*)* - (0,0)(?,?) RE2/Go
-E (a*)+ - (0,0)(0,0)
-#E (a*|b)* - (0,0)(0,0)
-E (a*|b)* - (0,0)(?,?) RE2/Go
-E (a+|b)* ab (0,2)(1,2)
-E (a+|b)+ ab (0,2)(1,2)
-E (a+|b)? ab (0,1)(0,1)
-BE [^ab]* cde (0,3)
-#E (^)* - (0,0)(0,0)
-E (^)* - (0,0)(?,?) RE2/Go
-BE a* NULL (0,0)
-E ([abc])*d abbbcd (0,6)(4,5)
-E ([abc])*bcd abcd (0,4)(0,1)
-E a|b|c|d|e e (0,1)
-E (a|b|c|d|e)f ef (0,2)(0,1)
-#E ((a*|b))* - (0,0)(0,0)(0,0)
-E ((a*|b))* - (0,0)(?,?)(?,?) RE2/Go
-BE abcd*efg abcdefg (0,7)
-BE ab* xabyabbbz (1,3)
-BE ab* xayabbbz (1,2)
-E (ab|cd)e abcde (2,5)(2,4)
-BE [abhgefdc]ij hij (0,3)
-E (a|b)c*d abcd (1,4)(1,2)
-E (ab|ab*)bc abc (0,3)(0,1)
-E a([bc]*)c* abc (0,3)(1,3)
-E a([bc]*)(c*d) abcd (0,4)(1,3)(3,4)
-E a([bc]+)(c*d) abcd (0,4)(1,3)(3,4)
-E a([bc]*)(c+d) abcd (0,4)(1,2)(2,4)
-E a[bcd]*dcdcde adcdcde (0,7)
-E (ab|a)b*c abc (0,3)(0,2)
-E ((a)(b)c)(d) abcd (0,4)(0,3)(0,1)(1,2)(3,4)
-BE [A-Za-z_][A-Za-z0-9_]* alpha (0,5)
-E ^a(bc+|b[eh])g|.h$ abh (1,3)
-E (bc+d$|ef*g.|h?i(j|k)) effgz (0,5)(0,5)
-E (bc+d$|ef*g.|h?i(j|k)) ij (0,2)(0,2)(1,2)
-E (bc+d$|ef*g.|h?i(j|k)) reffgz (1,6)(1,6)
-E (((((((((a))))))))) a (0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)
-BE multiple words multiple words yeah (0,14)
-E (.*)c(.*) abcde (0,5)(0,2)(3,5)
-BE abcd abcd (0,4)
-E a(bc)d abcd (0,4)(1,3)
-E a[-]?c ac (0,3)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qaddafi (0,15)(?,?)(10,12)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mo'ammar Gadhafi (0,16)(?,?)(11,13)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Kaddafi (0,15)(?,?)(10,12)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qadhafi (0,15)(?,?)(10,12)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gadafi (0,14)(?,?)(10,11)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadafi (0,15)(?,?)(11,12)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moamar Gaddafi (0,14)(?,?)(9,11)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadhdhafi (0,18)(?,?)(13,15)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Khaddafi (0,16)(?,?)(11,13)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafy (0,16)(?,?)(11,13)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghadafi (0,15)(?,?)(11,12)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafi (0,16)(?,?)(11,13)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muamar Kaddafi (0,14)(?,?)(9,11)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Quathafi (0,16)(?,?)(11,13)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gheddafi (0,16)(?,?)(11,13)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Khadafy (0,15)(?,?)(11,12)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Qudhafi (0,15)(?,?)(10,12)
-E a+(b|c)*d+ aabcdd (0,6)(3,4)
-E ^.+$ vivi (0,4)
-E ^(.+)$ vivi (0,4)(0,4)
-E ^([^!.]+).att.com!(.+)$ gryphon.att.com!eby (0,19)(0,7)(16,19)
-E ^([^!]+!)?([^!]+)$ bas (0,3)(?,?)(0,3)
-E ^([^!]+!)?([^!]+)$ bar!bas (0,7)(0,4)(4,7)
-E ^([^!]+!)?([^!]+)$ foo!bas (0,7)(0,4)(4,7)
-E ^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(4,8)(8,11)
-E ((foo)|(bar))!bas bar!bas (0,7)(0,3)(?,?)(0,3)
-E ((foo)|(bar))!bas foo!bar!bas (4,11)(4,7)(?,?)(4,7)
-E ((foo)|(bar))!bas foo!bas (0,7)(0,3)(0,3)
-E ((foo)|bar)!bas bar!bas (0,7)(0,3)
-E ((foo)|bar)!bas foo!bar!bas (4,11)(4,7)
-E ((foo)|bar)!bas foo!bas (0,7)(0,3)(0,3)
-E (foo|(bar))!bas bar!bas (0,7)(0,3)(0,3)
-E (foo|(bar))!bas foo!bar!bas (4,11)(4,7)(4,7)
-E (foo|(bar))!bas foo!bas (0,7)(0,3)
-E (foo|bar)!bas bar!bas (0,7)(0,3)
-E (foo|bar)!bas foo!bar!bas (4,11)(4,7)
-E (foo|bar)!bas foo!bas (0,7)(0,3)
-E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
-E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bas (0,3)(?,?)(0,3)
-E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bar!bas (0,7)(0,4)(4,7)
-E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(?,?)(?,?)(4,8)(8,11)
-E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bas (0,7)(0,4)(4,7)
-E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bas (0,3)(0,3)(?,?)(0,3)
-E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bar!bas (0,7)(0,7)(0,4)(4,7)
-E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
-E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bas (0,7)(0,7)(0,4)(4,7)
-E .*(/XXX).* /XXX (0,4)(0,4)
-E .*(\\XXX).* \XXX (0,4)(0,4)
-E \\XXX \XXX (0,4)
-E .*(/000).* /000 (0,4)(0,4)
-E .*(\\000).* \000 (0,4)(0,4)
-E \\000 \000 (0,4)
diff --git a/src/pkg/regexp/testdata/nullsubexpr.dat b/src/pkg/regexp/testdata/nullsubexpr.dat
deleted file mode 100644
index 2e18fbb91..000000000
--- a/src/pkg/regexp/testdata/nullsubexpr.dat
+++ /dev/null
@@ -1,79 +0,0 @@
-NOTE null subexpression matches : 2002-06-06
-
-E (a*)* a (0,1)(0,1)
-#E SAME x (0,0)(0,0)
-E SAME x (0,0)(?,?) RE2/Go
-E SAME aaaaaa (0,6)(0,6)
-E SAME aaaaaax (0,6)(0,6)
-E (a*)+ a (0,1)(0,1)
-E SAME x (0,0)(0,0)
-E SAME aaaaaa (0,6)(0,6)
-E SAME aaaaaax (0,6)(0,6)
-E (a+)* a (0,1)(0,1)
-E SAME x (0,0)
-E SAME aaaaaa (0,6)(0,6)
-E SAME aaaaaax (0,6)(0,6)
-E (a+)+ a (0,1)(0,1)
-E SAME x NOMATCH
-E SAME aaaaaa (0,6)(0,6)
-E SAME aaaaaax (0,6)(0,6)
-
-E ([a]*)* a (0,1)(0,1)
-#E SAME x (0,0)(0,0)
-E SAME x (0,0)(?,?) RE2/Go
-E SAME aaaaaa (0,6)(0,6)
-E SAME aaaaaax (0,6)(0,6)
-E ([a]*)+ a (0,1)(0,1)
-E SAME x (0,0)(0,0)
-E SAME aaaaaa (0,6)(0,6)
-E SAME aaaaaax (0,6)(0,6)
-E ([^b]*)* a (0,1)(0,1)
-#E SAME b (0,0)(0,0)
-E SAME b (0,0)(?,?) RE2/Go
-E SAME aaaaaa (0,6)(0,6)
-E SAME aaaaaab (0,6)(0,6)
-E ([ab]*)* a (0,1)(0,1)
-E SAME aaaaaa (0,6)(0,6)
-E SAME ababab (0,6)(0,6)
-E SAME bababa (0,6)(0,6)
-E SAME b (0,1)(0,1)
-E SAME bbbbbb (0,6)(0,6)
-E SAME aaaabcde (0,5)(0,5)
-E ([^a]*)* b (0,1)(0,1)
-E SAME bbbbbb (0,6)(0,6)
-#E SAME aaaaaa (0,0)(0,0)
-E SAME aaaaaa (0,0)(?,?) RE2/Go
-E ([^ab]*)* ccccxx (0,6)(0,6)
-#E SAME ababab (0,0)(0,0)
-E SAME ababab (0,0)(?,?) RE2/Go
-
-E ((z)+|a)* zabcde (0,2)(1,2)
-
-#{E a+? aaaaaa (0,1) no *? +? mimimal match ops
-#E (a) aaa (0,1)(0,1)
-#E (a*?) aaa (0,0)(0,0)
-#E (a)*? aaa (0,0)
-#E (a*?)*? aaa (0,0)
-#}
-
-B \(a*\)*\(x\) x (0,1)(0,0)(0,1)
-B \(a*\)*\(x\) ax (0,2)(0,1)(1,2)
-B \(a*\)*\(x\) axa (0,2)(0,1)(1,2)
-B \(a*\)*\(x\)\(\1\) x (0,1)(0,0)(0,1)(1,1)
-B \(a*\)*\(x\)\(\1\) ax (0,2)(1,1)(1,2)(2,2)
-B \(a*\)*\(x\)\(\1\) axa (0,3)(0,1)(1,2)(2,3)
-B \(a*\)*\(x\)\(\1\)\(x\) axax (0,4)(0,1)(1,2)(2,3)(3,4)
-B \(a*\)*\(x\)\(\1\)\(x\) axxa (0,3)(1,1)(1,2)(2,2)(2,3)
-
-#E (a*)*(x) x (0,1)(0,0)(0,1)
-E (a*)*(x) x (0,1)(?,?)(0,1) RE2/Go
-E (a*)*(x) ax (0,2)(0,1)(1,2)
-E (a*)*(x) axa (0,2)(0,1)(1,2)
-
-E (a*)+(x) x (0,1)(0,0)(0,1)
-E (a*)+(x) ax (0,2)(0,1)(1,2)
-E (a*)+(x) axa (0,2)(0,1)(1,2)
-
-E (a*){2}(x) x (0,1)(0,0)(0,1)
-E (a*){2}(x) ax (0,2)(1,1)(1,2)
-E (a*){2}(x) axa (0,2)(1,1)(1,2)
diff --git a/src/pkg/regexp/testdata/re2-exhaustive.txt.bz2 b/src/pkg/regexp/testdata/re2-exhaustive.txt.bz2
deleted file mode 100644
index a357f2801..000000000
--- a/src/pkg/regexp/testdata/re2-exhaustive.txt.bz2
+++ /dev/null
Binary files differ
diff --git a/src/pkg/regexp/testdata/re2-search.txt b/src/pkg/regexp/testdata/re2-search.txt
deleted file mode 100644
index f648e5527..000000000
--- a/src/pkg/regexp/testdata/re2-search.txt
+++ /dev/null
@@ -1,3667 +0,0 @@
-# RE2 basic search tests built by make log
-# Thu Sep 8 13:43:43 EDT 2011
-Regexp.SearchTests
-strings
-""
-"a"
-regexps
-"a"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:a)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:a)"
--;-;-;-
-0-1;0-1;0-1;0-1
-"(?:a)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-strings
-""
-"zyzzyva"
-regexps
-"a"
--;-;-;-
--;6-7;-;6-7
-"^(?:a)$"
--;-;-;-
--;-;-;-
-"^(?:a)"
--;-;-;-
--;-;-;-
-"(?:a)$"
--;-;-;-
--;6-7;-;6-7
-strings
-""
-"aa"
-regexps
-"a+"
--;-;-;-
-0-2;0-2;0-2;0-2
-"^(?:a+)$"
--;-;-;-
-0-2;0-2;0-2;0-2
-"^(?:a+)"
--;-;-;-
-0-2;0-2;0-2;0-2
-"(?:a+)$"
--;-;-;-
-0-2;0-2;0-2;0-2
-strings
-""
-"ab"
-regexps
-"(a+|b)+"
--;-;-;-
-0-2 1-2;0-2 1-2;0-2 1-2;0-2 1-2
-"^(?:(a+|b)+)$"
--;-;-;-
-0-2 1-2;0-2 1-2;0-2 1-2;0-2 1-2
-"^(?:(a+|b)+)"
--;-;-;-
-0-2 1-2;0-2 1-2;0-2 1-2;0-2 1-2
-"(?:(a+|b)+)$"
--;-;-;-
-0-2 1-2;0-2 1-2;0-2 1-2;0-2 1-2
-strings
-""
-"xabcdx"
-regexps
-"ab|cd"
--;-;-;-
--;1-3;-;1-3
-"^(?:ab|cd)$"
--;-;-;-
--;-;-;-
-"^(?:ab|cd)"
--;-;-;-
--;-;-;-
-"(?:ab|cd)$"
--;-;-;-
--;-;-;-
-strings
-""
-"hello\ngoodbye\n"
-regexps
-"h.*od?"
--;-;-;-
--;0-5;-;0-5
-"^(?:h.*od?)$"
--;-;-;-
--;-;-;-
-"^(?:h.*od?)"
--;-;-;-
--;0-5;-;0-5
-"(?:h.*od?)$"
--;-;-;-
--;-;-;-
-strings
-""
-"hello\ngoodbye\n"
-regexps
-"h.*o"
--;-;-;-
--;0-5;-;0-5
-"^(?:h.*o)$"
--;-;-;-
--;-;-;-
-"^(?:h.*o)"
--;-;-;-
--;0-5;-;0-5
-"(?:h.*o)$"
--;-;-;-
--;-;-;-
-strings
-""
-"goodbye\nhello\n"
-regexps
-"h.*o"
--;-;-;-
--;8-13;-;8-13
-"^(?:h.*o)$"
--;-;-;-
--;-;-;-
-"^(?:h.*o)"
--;-;-;-
--;-;-;-
-"(?:h.*o)$"
--;-;-;-
--;-;-;-
-strings
-""
-"hello world"
-regexps
-"h.*o"
--;-;-;-
--;0-8;-;0-8
-"^(?:h.*o)$"
--;-;-;-
--;-;-;-
-"^(?:h.*o)"
--;-;-;-
--;0-8;-;0-8
-"(?:h.*o)$"
--;-;-;-
--;-;-;-
-strings
-""
-"othello, world"
-regexps
-"h.*o"
--;-;-;-
--;2-11;-;2-11
-"^(?:h.*o)$"
--;-;-;-
--;-;-;-
-"^(?:h.*o)"
--;-;-;-
--;-;-;-
-"(?:h.*o)$"
--;-;-;-
--;-;-;-
-strings
-""
-"aaaaaaa"
-regexps
-"[^\\s\\S]"
--;-;-;-
--;-;-;-
-"^(?:[^\\s\\S])$"
--;-;-;-
--;-;-;-
-"^(?:[^\\s\\S])"
--;-;-;-
--;-;-;-
-"(?:[^\\s\\S])$"
--;-;-;-
--;-;-;-
-strings
-""
-"aaaaaaa"
-regexps
-"a"
--;-;-;-
--;0-1;-;0-1
-"^(?:a)$"
--;-;-;-
--;-;-;-
-"^(?:a)"
--;-;-;-
--;0-1;-;0-1
-"(?:a)$"
--;-;-;-
--;6-7;-;6-7
-strings
-""
-"aaaaaaa"
-regexps
-"a*"
-0-0;0-0;0-0;0-0
-0-7;0-7;0-7;0-7
-"^(?:a*)$"
-0-0;0-0;0-0;0-0
-0-7;0-7;0-7;0-7
-"^(?:a*)"
-0-0;0-0;0-0;0-0
-0-7;0-7;0-7;0-7
-"(?:a*)$"
-0-0;0-0;0-0;0-0
-0-7;0-7;0-7;0-7
-strings
-""
-""
-regexps
-"a*"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:a*)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:a*)"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"(?:a*)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-strings
-""
-""
-regexps
-"a*"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:a*)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:a*)"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"(?:a*)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-strings
-""
-"xabcdx"
-regexps
-"ab|cd"
--;-;-;-
--;1-3;-;1-3
-"^(?:ab|cd)$"
--;-;-;-
--;-;-;-
-"^(?:ab|cd)"
--;-;-;-
--;-;-;-
-"(?:ab|cd)$"
--;-;-;-
--;-;-;-
-strings
-""
-"cab"
-regexps
-"a"
--;-;-;-
--;1-2;-;1-2
-"^(?:a)$"
--;-;-;-
--;-;-;-
-"^(?:a)"
--;-;-;-
--;-;-;-
-"(?:a)$"
--;-;-;-
--;-;-;-
-strings
-""
-"cab"
-regexps
-"a*b"
--;-;-;-
--;1-3;-;1-3
-"^(?:a*b)$"
--;-;-;-
--;-;-;-
-"^(?:a*b)"
--;-;-;-
--;-;-;-
-"(?:a*b)$"
--;-;-;-
--;1-3;-;1-3
-strings
-""
-"x"
-regexps
-"((((((((((((((((((((x))))))))))))))))))))"
--;-;-;-
-0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1
-"^(?:((((((((((((((((((((x)))))))))))))))))))))$"
--;-;-;-
-0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1
-"^(?:((((((((((((((((((((x)))))))))))))))))))))"
--;-;-;-
-0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1
-"(?:((((((((((((((((((((x)))))))))))))))))))))$"
--;-;-;-
-0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1
-strings
-""
-"xxxabcdxxx"
-regexps
-"[abcd]"
--;-;-;-
--;3-4;-;3-4
-"^(?:[abcd])$"
--;-;-;-
--;-;-;-
-"^(?:[abcd])"
--;-;-;-
--;-;-;-
-"(?:[abcd])$"
--;-;-;-
--;-;-;-
-strings
-""
-"xxxabcdxxx"
-regexps
-"[^x]"
--;-;-;-
--;3-4;-;3-4
-"^(?:[^x])$"
--;-;-;-
--;-;-;-
-"^(?:[^x])"
--;-;-;-
--;-;-;-
-"(?:[^x])$"
--;-;-;-
--;-;-;-
-strings
-""
-"xxxabcdxxx"
-regexps
-"[abcd]+"
--;-;-;-
--;3-7;-;3-7
-"^(?:[abcd]+)$"
--;-;-;-
--;-;-;-
-"^(?:[abcd]+)"
--;-;-;-
--;-;-;-
-"(?:[abcd]+)$"
--;-;-;-
--;-;-;-
-strings
-""
-"xxxabcdxxx"
-regexps
-"[^x]+"
--;-;-;-
--;3-7;-;3-7
-"^(?:[^x]+)$"
--;-;-;-
--;-;-;-
-"^(?:[^x]+)"
--;-;-;-
--;-;-;-
-"(?:[^x]+)$"
--;-;-;-
--;-;-;-
-strings
-""
-"fo"
-regexps
-"(fo|foo)"
--;-;-;-
-0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
-"^(?:(fo|foo))$"
--;-;-;-
-0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
-"^(?:(fo|foo))"
--;-;-;-
-0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
-"(?:(fo|foo))$"
--;-;-;-
-0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
-strings
-""
-"foo"
-regexps
-"(foo|fo)"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:(foo|fo))$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:(foo|fo))"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"(?:(foo|fo))$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-strings
-""
-"aA"
-regexps
-"aa"
--;-;-;-
--;-;-;-
-"^(?:aa)$"
--;-;-;-
--;-;-;-
-"^(?:aa)"
--;-;-;-
--;-;-;-
-"(?:aa)$"
--;-;-;-
--;-;-;-
-strings
-""
-"Aa"
-regexps
-"a"
--;-;-;-
--;1-2;-;1-2
-"^(?:a)$"
--;-;-;-
--;-;-;-
-"^(?:a)"
--;-;-;-
--;-;-;-
-"(?:a)$"
--;-;-;-
--;1-2;-;1-2
-strings
-""
-"A"
-regexps
-"a"
--;-;-;-
--;-;-;-
-"^(?:a)$"
--;-;-;-
--;-;-;-
-"^(?:a)"
--;-;-;-
--;-;-;-
-"(?:a)$"
--;-;-;-
--;-;-;-
-strings
-""
-"abc"
-regexps
-"ABC"
--;-;-;-
--;-;-;-
-"^(?:ABC)$"
--;-;-;-
--;-;-;-
-"^(?:ABC)"
--;-;-;-
--;-;-;-
-"(?:ABC)$"
--;-;-;-
--;-;-;-
-strings
-""
-"XABCY"
-regexps
-"abc"
--;-;-;-
--;-;-;-
-"^(?:abc)$"
--;-;-;-
--;-;-;-
-"^(?:abc)"
--;-;-;-
--;-;-;-
-"(?:abc)$"
--;-;-;-
--;-;-;-
-strings
-""
-"xabcy"
-regexps
-"ABC"
--;-;-;-
--;-;-;-
-"^(?:ABC)$"
--;-;-;-
--;-;-;-
-"^(?:ABC)"
--;-;-;-
--;-;-;-
-"(?:ABC)$"
--;-;-;-
--;-;-;-
-strings
-""
-"foo"
-regexps
-"foo|bar|[A-Z]"
--;-;-;-
-0-3;0-3;0-3;0-3
-"^(?:foo|bar|[A-Z])$"
--;-;-;-
-0-3;0-3;0-3;0-3
-"^(?:foo|bar|[A-Z])"
--;-;-;-
-0-3;0-3;0-3;0-3
-"(?:foo|bar|[A-Z])$"
--;-;-;-
-0-3;0-3;0-3;0-3
-strings
-""
-"foo"
-regexps
-"^(foo|bar|[A-Z])"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:^(foo|bar|[A-Z]))$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:^(foo|bar|[A-Z]))"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"(?:^(foo|bar|[A-Z]))$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-strings
-""
-"foo\n"
-regexps
-"(foo|bar|[A-Z])$"
--;-;-;-
--;-;-;-
-"^(?:(foo|bar|[A-Z])$)$"
--;-;-;-
--;-;-;-
-"^(?:(foo|bar|[A-Z])$)"
--;-;-;-
--;-;-;-
-"(?:(foo|bar|[A-Z])$)$"
--;-;-;-
--;-;-;-
-strings
-""
-"foo"
-regexps
-"(foo|bar|[A-Z])$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:(foo|bar|[A-Z])$)$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:(foo|bar|[A-Z])$)"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"(?:(foo|bar|[A-Z])$)$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-strings
-""
-"foo\n"
-regexps
-"^(foo|bar|[A-Z])$"
--;-;-;-
--;-;-;-
-"^(?:^(foo|bar|[A-Z])$)$"
--;-;-;-
--;-;-;-
-"^(?:^(foo|bar|[A-Z])$)"
--;-;-;-
--;-;-;-
-"(?:^(foo|bar|[A-Z])$)$"
--;-;-;-
--;-;-;-
-strings
-""
-"foo"
-regexps
-"^(foo|bar|[A-Z])$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:^(foo|bar|[A-Z])$)$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:^(foo|bar|[A-Z])$)"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"(?:^(foo|bar|[A-Z])$)$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-strings
-""
-"bar"
-regexps
-"^(foo|bar|[A-Z])$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:^(foo|bar|[A-Z])$)$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:^(foo|bar|[A-Z])$)"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"(?:^(foo|bar|[A-Z])$)$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-strings
-""
-"X"
-regexps
-"^(foo|bar|[A-Z])$"
--;-;-;-
-0-1 0-1;0-1 0-1;0-1 0-1;0-1 0-1
-"^(?:^(foo|bar|[A-Z])$)$"
--;-;-;-
-0-1 0-1;0-1 0-1;0-1 0-1;0-1 0-1
-"^(?:^(foo|bar|[A-Z])$)"
--;-;-;-
-0-1 0-1;0-1 0-1;0-1 0-1;0-1 0-1
-"(?:^(foo|bar|[A-Z])$)$"
--;-;-;-
-0-1 0-1;0-1 0-1;0-1 0-1;0-1 0-1
-strings
-""
-"XY"
-regexps
-"^(foo|bar|[A-Z])$"
--;-;-;-
--;-;-;-
-"^(?:^(foo|bar|[A-Z])$)$"
--;-;-;-
--;-;-;-
-"^(?:^(foo|bar|[A-Z])$)"
--;-;-;-
--;-;-;-
-"(?:^(foo|bar|[A-Z])$)$"
--;-;-;-
--;-;-;-
-strings
-""
-"fo"
-regexps
-"^(fo|foo)$"
--;-;-;-
-0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
-"^(?:^(fo|foo)$)$"
--;-;-;-
-0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
-"^(?:^(fo|foo)$)"
--;-;-;-
-0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
-"(?:^(fo|foo)$)$"
--;-;-;-
-0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
-strings
-""
-"foo"
-regexps
-"^(fo|foo)$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:^(fo|foo)$)$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:^(fo|foo)$)"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"(?:^(fo|foo)$)$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-strings
-""
-"fo"
-regexps
-"^^(fo|foo)$"
--;-;-;-
-0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
-"^(?:^^(fo|foo)$)$"
--;-;-;-
-0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
-"^(?:^^(fo|foo)$)"
--;-;-;-
-0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
-"(?:^^(fo|foo)$)$"
--;-;-;-
-0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
-strings
-""
-"foo"
-regexps
-"^^(fo|foo)$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:^^(fo|foo)$)$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:^^(fo|foo)$)"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"(?:^^(fo|foo)$)$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-strings
-""
-""
-regexps
-"^$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:^$)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:^$)"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"(?:^$)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-strings
-""
-"x"
-regexps
-"^$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^$)"
-0-0;0-0;0-0;0-0
--;-;-;-
-"(?:^$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-strings
-""
-""
-regexps
-"^^$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:^^$)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:^^$)"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"(?:^^$)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-strings
-""
-""
-regexps
-"^$$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:^$$)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:^$$)"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"(?:^$$)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-strings
-""
-"x"
-regexps
-"^^$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^^$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^^$)"
-0-0;0-0;0-0;0-0
--;-;-;-
-"(?:^^$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-strings
-""
-"x"
-regexps
-"^$$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^$$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^$$)"
-0-0;0-0;0-0;0-0
--;-;-;-
-"(?:^$$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-strings
-""
-""
-regexps
-"^^$$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:^^$$)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:^^$$)"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"(?:^^$$)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-strings
-""
-"x"
-regexps
-"^^$$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^^$$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^^$$)"
-0-0;0-0;0-0;0-0
--;-;-;-
-"(?:^^$$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-strings
-""
-""
-regexps
-"^^^^^^^^$$$$$$$$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:^^^^^^^^$$$$$$$$)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:^^^^^^^^$$$$$$$$)"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"(?:^^^^^^^^$$$$$$$$)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-strings
-""
-"x"
-regexps
-"^"
-0-0;0-0;0-0;0-0
--;0-0;-;0-0
-"^(?:^)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^)"
-0-0;0-0;0-0;0-0
--;0-0;-;0-0
-"(?:^)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-strings
-""
-"x"
-regexps
-"$"
-0-0;0-0;0-0;0-0
--;1-1;-;1-1
-"^(?:$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:$)"
-0-0;0-0;0-0;0-0
--;-;-;-
-"(?:$)$"
-0-0;0-0;0-0;0-0
--;1-1;-;1-1
-strings
-""
-"nofoo foo that"
-regexps
-"\\bfoo\\b"
--;-;-;-
--;6-9;-;6-9
-"^(?:\\bfoo\\b)$"
--;-;-;-
--;-;-;-
-"^(?:\\bfoo\\b)"
--;-;-;-
--;-;-;-
-"(?:\\bfoo\\b)$"
--;-;-;-
--;-;-;-
-strings
-""
-"faoa x"
-regexps
-"a\\b"
--;-;-;-
--;3-4;-;3-4
-"^(?:a\\b)$"
--;-;-;-
--;-;-;-
-"^(?:a\\b)"
--;-;-;-
--;-;-;-
-"(?:a\\b)$"
--;-;-;-
--;-;-;-
-strings
-""
-"bar x"
-regexps
-"\\bbar"
--;-;-;-
--;0-3;-;0-3
-"^(?:\\bbar)$"
--;-;-;-
--;-;-;-
-"^(?:\\bbar)"
--;-;-;-
--;0-3;-;0-3
-"(?:\\bbar)$"
--;-;-;-
--;-;-;-
-strings
-""
-"foo\nbar x"
-regexps
-"\\bbar"
--;-;-;-
--;4-7;-;4-7
-"^(?:\\bbar)$"
--;-;-;-
--;-;-;-
-"^(?:\\bbar)"
--;-;-;-
--;-;-;-
-"(?:\\bbar)$"
--;-;-;-
--;-;-;-
-strings
-""
-"foobar"
-regexps
-"bar\\b"
--;-;-;-
--;3-6;-;3-6
-"^(?:bar\\b)$"
--;-;-;-
--;-;-;-
-"^(?:bar\\b)"
--;-;-;-
--;-;-;-
-"(?:bar\\b)$"
--;-;-;-
--;3-6;-;3-6
-strings
-""
-"foobar\nxxx"
-regexps
-"bar\\b"
--;-;-;-
--;3-6;-;3-6
-"^(?:bar\\b)$"
--;-;-;-
--;-;-;-
-"^(?:bar\\b)"
--;-;-;-
--;-;-;-
-"(?:bar\\b)$"
--;-;-;-
--;-;-;-
-strings
-""
-"foo"
-regexps
-"(foo|bar|[A-Z])\\b"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:(foo|bar|[A-Z])\\b)$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:(foo|bar|[A-Z])\\b)"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"(?:(foo|bar|[A-Z])\\b)$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-strings
-""
-"foo\n"
-regexps
-"(foo|bar|[A-Z])\\b"
--;-;-;-
--;0-3 0-3;-;0-3 0-3
-"^(?:(foo|bar|[A-Z])\\b)$"
--;-;-;-
--;-;-;-
-"^(?:(foo|bar|[A-Z])\\b)"
--;-;-;-
--;0-3 0-3;-;0-3 0-3
-"(?:(foo|bar|[A-Z])\\b)$"
--;-;-;-
--;-;-;-
-strings
-""
-""
-regexps
-"\\b"
--;-;-;-
--;-;-;-
-"^(?:\\b)$"
--;-;-;-
--;-;-;-
-"^(?:\\b)"
--;-;-;-
--;-;-;-
-"(?:\\b)$"
--;-;-;-
--;-;-;-
-strings
-""
-"x"
-regexps
-"\\b"
--;-;-;-
--;0-0;-;0-0
-"^(?:\\b)$"
--;-;-;-
--;-;-;-
-"^(?:\\b)"
--;-;-;-
--;0-0;-;0-0
-"(?:\\b)$"
--;-;-;-
--;1-1;-;1-1
-strings
-""
-"foo"
-regexps
-"\\b(foo|bar|[A-Z])"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:\\b(foo|bar|[A-Z]))$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:\\b(foo|bar|[A-Z]))"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"(?:\\b(foo|bar|[A-Z]))$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-strings
-""
-"X"
-regexps
-"\\b(foo|bar|[A-Z])\\b"
--;-;-;-
-0-1 0-1;0-1 0-1;0-1 0-1;0-1 0-1
-"^(?:\\b(foo|bar|[A-Z])\\b)$"
--;-;-;-
-0-1 0-1;0-1 0-1;0-1 0-1;0-1 0-1
-"^(?:\\b(foo|bar|[A-Z])\\b)"
--;-;-;-
-0-1 0-1;0-1 0-1;0-1 0-1;0-1 0-1
-"(?:\\b(foo|bar|[A-Z])\\b)$"
--;-;-;-
-0-1 0-1;0-1 0-1;0-1 0-1;0-1 0-1
-strings
-""
-"XY"
-regexps
-"\\b(foo|bar|[A-Z])\\b"
--;-;-;-
--;-;-;-
-"^(?:\\b(foo|bar|[A-Z])\\b)$"
--;-;-;-
--;-;-;-
-"^(?:\\b(foo|bar|[A-Z])\\b)"
--;-;-;-
--;-;-;-
-"(?:\\b(foo|bar|[A-Z])\\b)$"
--;-;-;-
--;-;-;-
-strings
-""
-"bar"
-regexps
-"\\b(foo|bar|[A-Z])\\b"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:\\b(foo|bar|[A-Z])\\b)$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:\\b(foo|bar|[A-Z])\\b)"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"(?:\\b(foo|bar|[A-Z])\\b)$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-strings
-""
-"foo"
-regexps
-"\\b(foo|bar|[A-Z])\\b"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:\\b(foo|bar|[A-Z])\\b)$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:\\b(foo|bar|[A-Z])\\b)"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"(?:\\b(foo|bar|[A-Z])\\b)$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-strings
-""
-"foo\n"
-regexps
-"\\b(foo|bar|[A-Z])\\b"
--;-;-;-
--;0-3 0-3;-;0-3 0-3
-"^(?:\\b(foo|bar|[A-Z])\\b)$"
--;-;-;-
--;-;-;-
-"^(?:\\b(foo|bar|[A-Z])\\b)"
--;-;-;-
--;0-3 0-3;-;0-3 0-3
-"(?:\\b(foo|bar|[A-Z])\\b)$"
--;-;-;-
--;-;-;-
-strings
-""
-"ffoo bbar N x"
-regexps
-"\\b(foo|bar|[A-Z])\\b"
--;-;-;-
--;10-11 10-11;-;10-11 10-11
-"^(?:\\b(foo|bar|[A-Z])\\b)$"
--;-;-;-
--;-;-;-
-"^(?:\\b(foo|bar|[A-Z])\\b)"
--;-;-;-
--;-;-;-
-"(?:\\b(foo|bar|[A-Z])\\b)$"
--;-;-;-
--;-;-;-
-strings
-""
-"fo"
-regexps
-"\\b(fo|foo)\\b"
--;-;-;-
-0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
-"^(?:\\b(fo|foo)\\b)$"
--;-;-;-
-0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
-"^(?:\\b(fo|foo)\\b)"
--;-;-;-
-0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
-"(?:\\b(fo|foo)\\b)$"
--;-;-;-
-0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
-strings
-""
-"foo"
-regexps
-"\\b(fo|foo)\\b"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:\\b(fo|foo)\\b)$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:\\b(fo|foo)\\b)"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"(?:\\b(fo|foo)\\b)$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-strings
-""
-""
-regexps
-"\\b\\b"
--;-;-;-
--;-;-;-
-"^(?:\\b\\b)$"
--;-;-;-
--;-;-;-
-"^(?:\\b\\b)"
--;-;-;-
--;-;-;-
-"(?:\\b\\b)$"
--;-;-;-
--;-;-;-
-strings
-""
-"x"
-regexps
-"\\b\\b"
--;-;-;-
--;0-0;-;0-0
-"^(?:\\b\\b)$"
--;-;-;-
--;-;-;-
-"^(?:\\b\\b)"
--;-;-;-
--;0-0;-;0-0
-"(?:\\b\\b)$"
--;-;-;-
--;1-1;-;1-1
-strings
-""
-""
-regexps
-"\\b$"
--;-;-;-
--;-;-;-
-"^(?:\\b$)$"
--;-;-;-
--;-;-;-
-"^(?:\\b$)"
--;-;-;-
--;-;-;-
-"(?:\\b$)$"
--;-;-;-
--;-;-;-
-strings
-""
-"x"
-regexps
-"\\b$"
--;-;-;-
--;1-1;-;1-1
-"^(?:\\b$)$"
--;-;-;-
--;-;-;-
-"^(?:\\b$)"
--;-;-;-
--;-;-;-
-"(?:\\b$)$"
--;-;-;-
--;1-1;-;1-1
-strings
-""
-"y x"
-regexps
-"\\b$"
--;-;-;-
--;3-3;-;3-3
-"^(?:\\b$)$"
--;-;-;-
--;-;-;-
-"^(?:\\b$)"
--;-;-;-
--;-;-;-
-"(?:\\b$)$"
--;-;-;-
--;3-3;-;3-3
-strings
-""
-"x"
-regexps
-"\\b.$"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:\\b.$)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:\\b.$)"
--;-;-;-
-0-1;0-1;0-1;0-1
-"(?:\\b.$)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-strings
-""
-"fo"
-regexps
-"^\\b(fo|foo)\\b"
--;-;-;-
-0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
-"^(?:^\\b(fo|foo)\\b)$"
--;-;-;-
-0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
-"^(?:^\\b(fo|foo)\\b)"
--;-;-;-
-0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
-"(?:^\\b(fo|foo)\\b)$"
--;-;-;-
-0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
-strings
-""
-"foo"
-regexps
-"^\\b(fo|foo)\\b"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:^\\b(fo|foo)\\b)$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:^\\b(fo|foo)\\b)"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"(?:^\\b(fo|foo)\\b)$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-strings
-""
-""
-regexps
-"^\\b"
--;-;-;-
--;-;-;-
-"^(?:^\\b)$"
--;-;-;-
--;-;-;-
-"^(?:^\\b)"
--;-;-;-
--;-;-;-
-"(?:^\\b)$"
--;-;-;-
--;-;-;-
-strings
-""
-"x"
-regexps
-"^\\b"
--;-;-;-
--;0-0;-;0-0
-"^(?:^\\b)$"
--;-;-;-
--;-;-;-
-"^(?:^\\b)"
--;-;-;-
--;0-0;-;0-0
-"(?:^\\b)$"
--;-;-;-
--;-;-;-
-strings
-""
-""
-regexps
-"^\\b\\b"
--;-;-;-
--;-;-;-
-"^(?:^\\b\\b)$"
--;-;-;-
--;-;-;-
-"^(?:^\\b\\b)"
--;-;-;-
--;-;-;-
-"(?:^\\b\\b)$"
--;-;-;-
--;-;-;-
-strings
-""
-"x"
-regexps
-"^\\b\\b"
--;-;-;-
--;0-0;-;0-0
-"^(?:^\\b\\b)$"
--;-;-;-
--;-;-;-
-"^(?:^\\b\\b)"
--;-;-;-
--;0-0;-;0-0
-"(?:^\\b\\b)$"
--;-;-;-
--;-;-;-
-strings
-""
-""
-regexps
-"^\\b$"
--;-;-;-
--;-;-;-
-"^(?:^\\b$)$"
--;-;-;-
--;-;-;-
-"^(?:^\\b$)"
--;-;-;-
--;-;-;-
-"(?:^\\b$)$"
--;-;-;-
--;-;-;-
-strings
-""
-"x"
-regexps
-"^\\b$"
--;-;-;-
--;-;-;-
-"^(?:^\\b$)$"
--;-;-;-
--;-;-;-
-"^(?:^\\b$)"
--;-;-;-
--;-;-;-
-"(?:^\\b$)$"
--;-;-;-
--;-;-;-
-strings
-""
-"x"
-regexps
-"^\\b.$"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:^\\b.$)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:^\\b.$)"
--;-;-;-
-0-1;0-1;0-1;0-1
-"(?:^\\b.$)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-strings
-""
-"x"
-regexps
-"^\\b.\\b$"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:^\\b.\\b$)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:^\\b.\\b$)"
--;-;-;-
-0-1;0-1;0-1;0-1
-"(?:^\\b.\\b$)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-strings
-""
-""
-regexps
-"^^^^^^^^\\b$$$$$$$"
--;-;-;-
--;-;-;-
-"^(?:^^^^^^^^\\b$$$$$$$)$"
--;-;-;-
--;-;-;-
-"^(?:^^^^^^^^\\b$$$$$$$)"
--;-;-;-
--;-;-;-
-"(?:^^^^^^^^\\b$$$$$$$)$"
--;-;-;-
--;-;-;-
-strings
-""
-"x"
-regexps
-"^^^^^^^^\\b.$$$$$$"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:^^^^^^^^\\b.$$$$$$)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:^^^^^^^^\\b.$$$$$$)"
--;-;-;-
-0-1;0-1;0-1;0-1
-"(?:^^^^^^^^\\b.$$$$$$)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-strings
-""
-"x"
-regexps
-"^^^^^^^^\\b$$$$$$$"
--;-;-;-
--;-;-;-
-"^(?:^^^^^^^^\\b$$$$$$$)$"
--;-;-;-
--;-;-;-
-"^(?:^^^^^^^^\\b$$$$$$$)"
--;-;-;-
--;-;-;-
-"(?:^^^^^^^^\\b$$$$$$$)$"
--;-;-;-
--;-;-;-
-strings
-""
-"n foo xfoox that"
-regexps
-"\\Bfoo\\B"
--;-;-;-
--;7-10;-;7-10
-"^(?:\\Bfoo\\B)$"
--;-;-;-
--;-;-;-
-"^(?:\\Bfoo\\B)"
--;-;-;-
--;-;-;-
-"(?:\\Bfoo\\B)$"
--;-;-;-
--;-;-;-
-strings
-""
-"faoa x"
-regexps
-"a\\B"
--;-;-;-
--;1-2;-;1-2
-"^(?:a\\B)$"
--;-;-;-
--;-;-;-
-"^(?:a\\B)"
--;-;-;-
--;-;-;-
-"(?:a\\B)$"
--;-;-;-
--;-;-;-
-strings
-""
-"bar x"
-regexps
-"\\Bbar"
--;-;-;-
--;-;-;-
-"^(?:\\Bbar)$"
--;-;-;-
--;-;-;-
-"^(?:\\Bbar)"
--;-;-;-
--;-;-;-
-"(?:\\Bbar)$"
--;-;-;-
--;-;-;-
-strings
-""
-"foo\nbar x"
-regexps
-"\\Bbar"
--;-;-;-
--;-;-;-
-"^(?:\\Bbar)$"
--;-;-;-
--;-;-;-
-"^(?:\\Bbar)"
--;-;-;-
--;-;-;-
-"(?:\\Bbar)$"
--;-;-;-
--;-;-;-
-strings
-""
-"foobar"
-regexps
-"bar\\B"
--;-;-;-
--;-;-;-
-"^(?:bar\\B)$"
--;-;-;-
--;-;-;-
-"^(?:bar\\B)"
--;-;-;-
--;-;-;-
-"(?:bar\\B)$"
--;-;-;-
--;-;-;-
-strings
-""
-"foobar\nxxx"
-regexps
-"bar\\B"
--;-;-;-
--;-;-;-
-"^(?:bar\\B)$"
--;-;-;-
--;-;-;-
-"^(?:bar\\B)"
--;-;-;-
--;-;-;-
-"(?:bar\\B)$"
--;-;-;-
--;-;-;-
-strings
-""
-"foox"
-regexps
-"(foo|bar|[A-Z])\\B"
--;-;-;-
--;0-3 0-3;-;0-3 0-3
-"^(?:(foo|bar|[A-Z])\\B)$"
--;-;-;-
--;-;-;-
-"^(?:(foo|bar|[A-Z])\\B)"
--;-;-;-
--;0-3 0-3;-;0-3 0-3
-"(?:(foo|bar|[A-Z])\\B)$"
--;-;-;-
--;-;-;-
-strings
-""
-"foo\n"
-regexps
-"(foo|bar|[A-Z])\\B"
--;-;-;-
--;-;-;-
-"^(?:(foo|bar|[A-Z])\\B)$"
--;-;-;-
--;-;-;-
-"^(?:(foo|bar|[A-Z])\\B)"
--;-;-;-
--;-;-;-
-"(?:(foo|bar|[A-Z])\\B)$"
--;-;-;-
--;-;-;-
-strings
-""
-""
-regexps
-"\\B"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:\\B)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:\\B)"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"(?:\\B)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-strings
-""
-"x"
-regexps
-"\\B"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:\\B)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:\\B)"
-0-0;0-0;0-0;0-0
--;-;-;-
-"(?:\\B)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-strings
-""
-"foo"
-regexps
-"\\B(foo|bar|[A-Z])"
--;-;-;-
--;-;-;-
-"^(?:\\B(foo|bar|[A-Z]))$"
--;-;-;-
--;-;-;-
-"^(?:\\B(foo|bar|[A-Z]))"
--;-;-;-
--;-;-;-
-"(?:\\B(foo|bar|[A-Z]))$"
--;-;-;-
--;-;-;-
-strings
-""
-"xXy"
-regexps
-"\\B(foo|bar|[A-Z])\\B"
--;-;-;-
--;1-2 1-2;-;1-2 1-2
-"^(?:\\B(foo|bar|[A-Z])\\B)$"
--;-;-;-
--;-;-;-
-"^(?:\\B(foo|bar|[A-Z])\\B)"
--;-;-;-
--;-;-;-
-"(?:\\B(foo|bar|[A-Z])\\B)$"
--;-;-;-
--;-;-;-
-strings
-""
-"XY"
-regexps
-"\\B(foo|bar|[A-Z])\\B"
--;-;-;-
--;-;-;-
-"^(?:\\B(foo|bar|[A-Z])\\B)$"
--;-;-;-
--;-;-;-
-"^(?:\\B(foo|bar|[A-Z])\\B)"
--;-;-;-
--;-;-;-
-"(?:\\B(foo|bar|[A-Z])\\B)$"
--;-;-;-
--;-;-;-
-strings
-""
-"XYZ"
-regexps
-"\\B(foo|bar|[A-Z])\\B"
--;-;-;-
--;1-2 1-2;-;1-2 1-2
-"^(?:\\B(foo|bar|[A-Z])\\B)$"
--;-;-;-
--;-;-;-
-"^(?:\\B(foo|bar|[A-Z])\\B)"
--;-;-;-
--;-;-;-
-"(?:\\B(foo|bar|[A-Z])\\B)$"
--;-;-;-
--;-;-;-
-strings
-""
-"abara"
-regexps
-"\\B(foo|bar|[A-Z])\\B"
--;-;-;-
--;1-4 1-4;-;1-4 1-4
-"^(?:\\B(foo|bar|[A-Z])\\B)$"
--;-;-;-
--;-;-;-
-"^(?:\\B(foo|bar|[A-Z])\\B)"
--;-;-;-
--;-;-;-
-"(?:\\B(foo|bar|[A-Z])\\B)$"
--;-;-;-
--;-;-;-
-strings
-""
-"xfoo_"
-regexps
-"\\B(foo|bar|[A-Z])\\B"
--;-;-;-
--;1-4 1-4;-;1-4 1-4
-"^(?:\\B(foo|bar|[A-Z])\\B)$"
--;-;-;-
--;-;-;-
-"^(?:\\B(foo|bar|[A-Z])\\B)"
--;-;-;-
--;-;-;-
-"(?:\\B(foo|bar|[A-Z])\\B)$"
--;-;-;-
--;-;-;-
-strings
-""
-"xfoo\n"
-regexps
-"\\B(foo|bar|[A-Z])\\B"
--;-;-;-
--;-;-;-
-"^(?:\\B(foo|bar|[A-Z])\\B)$"
--;-;-;-
--;-;-;-
-"^(?:\\B(foo|bar|[A-Z])\\B)"
--;-;-;-
--;-;-;-
-"(?:\\B(foo|bar|[A-Z])\\B)$"
--;-;-;-
--;-;-;-
-strings
-""
-"foo bar vNx"
-regexps
-"\\B(foo|bar|[A-Z])\\B"
--;-;-;-
--;9-10 9-10;-;9-10 9-10
-"^(?:\\B(foo|bar|[A-Z])\\B)$"
--;-;-;-
--;-;-;-
-"^(?:\\B(foo|bar|[A-Z])\\B)"
--;-;-;-
--;-;-;-
-"(?:\\B(foo|bar|[A-Z])\\B)$"
--;-;-;-
--;-;-;-
-strings
-""
-"xfoo"
-regexps
-"\\B(fo|foo)\\B"
--;-;-;-
--;1-3 1-3;-;1-3 1-3
-"^(?:\\B(fo|foo)\\B)$"
--;-;-;-
--;-;-;-
-"^(?:\\B(fo|foo)\\B)"
--;-;-;-
--;-;-;-
-"(?:\\B(fo|foo)\\B)$"
--;-;-;-
--;-;-;-
-strings
-""
-"xfooo"
-regexps
-"\\B(foo|fo)\\B"
--;-;-;-
--;1-4 1-4;-;1-4 1-4
-"^(?:\\B(foo|fo)\\B)$"
--;-;-;-
--;-;-;-
-"^(?:\\B(foo|fo)\\B)"
--;-;-;-
--;-;-;-
-"(?:\\B(foo|fo)\\B)$"
--;-;-;-
--;-;-;-
-strings
-""
-""
-regexps
-"\\B\\B"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:\\B\\B)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:\\B\\B)"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"(?:\\B\\B)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-strings
-""
-"x"
-regexps
-"\\B\\B"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:\\B\\B)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:\\B\\B)"
-0-0;0-0;0-0;0-0
--;-;-;-
-"(?:\\B\\B)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-strings
-""
-""
-regexps
-"\\B$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:\\B$)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:\\B$)"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"(?:\\B$)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-strings
-""
-"x"
-regexps
-"\\B$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:\\B$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:\\B$)"
-0-0;0-0;0-0;0-0
--;-;-;-
-"(?:\\B$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-strings
-""
-"y x"
-regexps
-"\\B$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:\\B$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:\\B$)"
-0-0;0-0;0-0;0-0
--;-;-;-
-"(?:\\B$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-strings
-""
-"x"
-regexps
-"\\B.$"
--;-;-;-
--;-;-;-
-"^(?:\\B.$)$"
--;-;-;-
--;-;-;-
-"^(?:\\B.$)"
--;-;-;-
--;-;-;-
-"(?:\\B.$)$"
--;-;-;-
--;-;-;-
-strings
-""
-"fo"
-regexps
-"^\\B(fo|foo)\\B"
--;-;-;-
--;-;-;-
-"^(?:^\\B(fo|foo)\\B)$"
--;-;-;-
--;-;-;-
-"^(?:^\\B(fo|foo)\\B)"
--;-;-;-
--;-;-;-
-"(?:^\\B(fo|foo)\\B)$"
--;-;-;-
--;-;-;-
-strings
-""
-"foo"
-regexps
-"^\\B(fo|foo)\\B"
--;-;-;-
--;-;-;-
-"^(?:^\\B(fo|foo)\\B)$"
--;-;-;-
--;-;-;-
-"^(?:^\\B(fo|foo)\\B)"
--;-;-;-
--;-;-;-
-"(?:^\\B(fo|foo)\\B)$"
--;-;-;-
--;-;-;-
-strings
-""
-""
-regexps
-"^\\B"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:^\\B)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:^\\B)"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"(?:^\\B)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-strings
-""
-"x"
-regexps
-"^\\B"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^\\B)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^\\B)"
-0-0;0-0;0-0;0-0
--;-;-;-
-"(?:^\\B)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-strings
-""
-""
-regexps
-"^\\B\\B"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:^\\B\\B)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:^\\B\\B)"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"(?:^\\B\\B)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-strings
-""
-"x"
-regexps
-"^\\B\\B"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^\\B\\B)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^\\B\\B)"
-0-0;0-0;0-0;0-0
--;-;-;-
-"(?:^\\B\\B)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-strings
-""
-""
-regexps
-"^\\B$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:^\\B$)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:^\\B$)"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"(?:^\\B$)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-strings
-""
-"x"
-regexps
-"^\\B$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^\\B$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^\\B$)"
-0-0;0-0;0-0;0-0
--;-;-;-
-"(?:^\\B$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-strings
-""
-"x"
-regexps
-"^\\B.$"
--;-;-;-
--;-;-;-
-"^(?:^\\B.$)$"
--;-;-;-
--;-;-;-
-"^(?:^\\B.$)"
--;-;-;-
--;-;-;-
-"(?:^\\B.$)$"
--;-;-;-
--;-;-;-
-strings
-""
-"x"
-regexps
-"^\\B.\\B$"
--;-;-;-
--;-;-;-
-"^(?:^\\B.\\B$)$"
--;-;-;-
--;-;-;-
-"^(?:^\\B.\\B$)"
--;-;-;-
--;-;-;-
-"(?:^\\B.\\B$)$"
--;-;-;-
--;-;-;-
-strings
-""
-""
-regexps
-"^^^^^^^^\\B$$$$$$$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:^^^^^^^^\\B$$$$$$$)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:^^^^^^^^\\B$$$$$$$)"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"(?:^^^^^^^^\\B$$$$$$$)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-strings
-""
-"x"
-regexps
-"^^^^^^^^\\B.$$$$$$"
--;-;-;-
--;-;-;-
-"^(?:^^^^^^^^\\B.$$$$$$)$"
--;-;-;-
--;-;-;-
-"^(?:^^^^^^^^\\B.$$$$$$)"
--;-;-;-
--;-;-;-
-"(?:^^^^^^^^\\B.$$$$$$)$"
--;-;-;-
--;-;-;-
-strings
-""
-"x"
-regexps
-"^^^^^^^^\\B$$$$$$$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^^^^^^^^\\B$$$$$$$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^^^^^^^^\\B$$$$$$$)"
-0-0;0-0;0-0;0-0
--;-;-;-
-"(?:^^^^^^^^\\B$$$$$$$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-strings
-""
-"x"
-regexps
-"\\bx\\b"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:\\bx\\b)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:\\bx\\b)"
--;-;-;-
-0-1;0-1;0-1;0-1
-"(?:\\bx\\b)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-strings
-""
-"x>"
-regexps
-"\\bx\\b"
--;-;-;-
--;0-1;-;0-1
-"^(?:\\bx\\b)$"
--;-;-;-
--;-;-;-
-"^(?:\\bx\\b)"
--;-;-;-
--;0-1;-;0-1
-"(?:\\bx\\b)$"
--;-;-;-
--;-;-;-
-strings
-""
-"<x"
-regexps
-"\\bx\\b"
--;-;-;-
--;1-2;-;1-2
-"^(?:\\bx\\b)$"
--;-;-;-
--;-;-;-
-"^(?:\\bx\\b)"
--;-;-;-
--;-;-;-
-"(?:\\bx\\b)$"
--;-;-;-
--;1-2;-;1-2
-strings
-""
-"<x>"
-regexps
-"\\bx\\b"
--;-;-;-
--;1-2;-;1-2
-"^(?:\\bx\\b)$"
--;-;-;-
--;-;-;-
-"^(?:\\bx\\b)"
--;-;-;-
--;-;-;-
-"(?:\\bx\\b)$"
--;-;-;-
--;-;-;-
-strings
-""
-"ax"
-regexps
-"\\bx\\b"
--;-;-;-
--;-;-;-
-"^(?:\\bx\\b)$"
--;-;-;-
--;-;-;-
-"^(?:\\bx\\b)"
--;-;-;-
--;-;-;-
-"(?:\\bx\\b)$"
--;-;-;-
--;-;-;-
-strings
-""
-"xb"
-regexps
-"\\bx\\b"
--;-;-;-
--;-;-;-
-"^(?:\\bx\\b)$"
--;-;-;-
--;-;-;-
-"^(?:\\bx\\b)"
--;-;-;-
--;-;-;-
-"(?:\\bx\\b)$"
--;-;-;-
--;-;-;-
-strings
-""
-"axb"
-regexps
-"\\bx\\b"
--;-;-;-
--;-;-;-
-"^(?:\\bx\\b)$"
--;-;-;-
--;-;-;-
-"^(?:\\bx\\b)"
--;-;-;-
--;-;-;-
-"(?:\\bx\\b)$"
--;-;-;-
--;-;-;-
-strings
-""
-"«x"
-regexps
-"\\bx\\b"
--;-;-;-
--;2-3;-;2-3
-"^(?:\\bx\\b)$"
--;-;-;-
--;-;-;-
-"^(?:\\bx\\b)"
--;-;-;-
--;-;-;-
-"(?:\\bx\\b)$"
--;-;-;-
--;2-3;-;2-3
-strings
-""
-"x»"
-regexps
-"\\bx\\b"
--;-;-;-
--;0-1;-;0-1
-"^(?:\\bx\\b)$"
--;-;-;-
--;-;-;-
-"^(?:\\bx\\b)"
--;-;-;-
--;0-1;-;0-1
-"(?:\\bx\\b)$"
--;-;-;-
--;-;-;-
-strings
-""
-"«x»"
-regexps
-"\\bx\\b"
--;-;-;-
--;2-3;-;2-3
-"^(?:\\bx\\b)$"
--;-;-;-
--;-;-;-
-"^(?:\\bx\\b)"
--;-;-;-
--;-;-;-
-"(?:\\bx\\b)$"
--;-;-;-
--;-;-;-
-strings
-""
-"axb"
-regexps
-"\\bx\\b"
--;-;-;-
--;-;-;-
-"^(?:\\bx\\b)$"
--;-;-;-
--;-;-;-
-"^(?:\\bx\\b)"
--;-;-;-
--;-;-;-
-"(?:\\bx\\b)$"
--;-;-;-
--;-;-;-
-strings
-""
-"áxβ"
-regexps
-"\\bx\\b"
--;-;-;-
--;2-3;-;2-3
-"^(?:\\bx\\b)$"
--;-;-;-
--;-;-;-
-"^(?:\\bx\\b)"
--;-;-;-
--;-;-;-
-"(?:\\bx\\b)$"
--;-;-;-
--;-;-;-
-strings
-""
-"axb"
-regexps
-"\\Bx\\B"
--;-;-;-
--;1-2;-;1-2
-"^(?:\\Bx\\B)$"
--;-;-;-
--;-;-;-
-"^(?:\\Bx\\B)"
--;-;-;-
--;-;-;-
-"(?:\\Bx\\B)$"
--;-;-;-
--;-;-;-
-strings
-""
-"áxβ"
-regexps
-"\\Bx\\B"
--;-;-;-
--;-;-;-
-"^(?:\\Bx\\B)$"
--;-;-;-
--;-;-;-
-"^(?:\\Bx\\B)"
--;-;-;-
--;-;-;-
-"(?:\\Bx\\B)$"
--;-;-;-
--;-;-;-
-strings
-""
-""
-regexps
-"^$^$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:^$^$)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:^$^$)"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"(?:^$^$)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-strings
-""
-""
-regexps
-"^$^"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:^$^)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:^$^)"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"(?:^$^)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-strings
-""
-""
-regexps
-"$^$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:$^$)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"^(?:$^$)"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-"(?:$^$)$"
-0-0;0-0;0-0;0-0
-0-0;0-0;0-0;0-0
-strings
-""
-"x"
-regexps
-"^$^$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^$^$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^$^$)"
-0-0;0-0;0-0;0-0
--;-;-;-
-"(?:^$^$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-strings
-""
-"x"
-regexps
-"^$^"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^$^)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^$^)"
-0-0;0-0;0-0;0-0
--;-;-;-
-"(?:^$^)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-strings
-""
-"x"
-regexps
-"$^$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:$^$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:$^$)"
-0-0;0-0;0-0;0-0
--;-;-;-
-"(?:$^$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-strings
-""
-"x\ny"
-regexps
-"^$^$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^$^$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^$^$)"
-0-0;0-0;0-0;0-0
--;-;-;-
-"(?:^$^$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-strings
-""
-"x\ny"
-regexps
-"^$^"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^$^)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^$^)"
-0-0;0-0;0-0;0-0
--;-;-;-
-"(?:^$^)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-strings
-""
-"x\ny"
-regexps
-"$^$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:$^$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:$^$)"
-0-0;0-0;0-0;0-0
--;-;-;-
-"(?:$^$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-strings
-""
-"x\n\ny"
-regexps
-"^$^$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^$^$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^$^$)"
-0-0;0-0;0-0;0-0
--;-;-;-
-"(?:^$^$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-strings
-""
-"x\n\ny"
-regexps
-"^$^"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^$^)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^$^)"
-0-0;0-0;0-0;0-0
--;-;-;-
-"(?:^$^)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-strings
-""
-"x\n\ny"
-regexps
-"$^$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:$^$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:$^$)"
-0-0;0-0;0-0;0-0
--;-;-;-
-"(?:$^$)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-strings
-""
-"foo$bar"
-regexps
-"^(foo\\$)$"
--;-;-;-
--;-;-;-
-"^(?:^(foo\\$)$)$"
--;-;-;-
--;-;-;-
-"^(?:^(foo\\$)$)"
--;-;-;-
--;-;-;-
-"(?:^(foo\\$)$)$"
--;-;-;-
--;-;-;-
-strings
-""
-"foo$bar"
-regexps
-"(foo\\$)"
--;-;-;-
--;0-4 0-4;-;0-4 0-4
-"^(?:(foo\\$))$"
--;-;-;-
--;-;-;-
-"^(?:(foo\\$))"
--;-;-;-
--;0-4 0-4;-;0-4 0-4
-"(?:(foo\\$))$"
--;-;-;-
--;-;-;-
-strings
-""
-"abc"
-regexps
-"^...$"
--;-;-;-
-0-3;0-3;0-3;0-3
-"^(?:^...$)$"
--;-;-;-
-0-3;0-3;0-3;0-3
-"^(?:^...$)"
--;-;-;-
-0-3;0-3;0-3;0-3
-"(?:^...$)$"
--;-;-;-
-0-3;0-3;0-3;0-3
-strings
-""
-"本"
-regexps
-"^本$"
--;-;-;-
-0-3;0-3;0-3;0-3
-"^(?:^本$)$"
--;-;-;-
-0-3;0-3;0-3;0-3
-"^(?:^本$)"
--;-;-;-
-0-3;0-3;0-3;0-3
-"(?:^本$)$"
--;-;-;-
-0-3;0-3;0-3;0-3
-strings
-""
-"日本語"
-regexps
-"^...$"
--;-;-;-
-0-9;0-9;0-9;0-9
-"^(?:^...$)$"
--;-;-;-
-0-9;0-9;0-9;0-9
-"^(?:^...$)"
--;-;-;-
-0-9;0-9;0-9;0-9
-"(?:^...$)$"
--;-;-;-
-0-9;0-9;0-9;0-9
-strings
-""
-".本."
-regexps
-"^...$"
--;-;-;-
-0-5;0-5;0-5;0-5
-"^(?:^...$)$"
--;-;-;-
-0-5;0-5;0-5;0-5
-"^(?:^...$)"
--;-;-;-
-0-5;0-5;0-5;0-5
-"(?:^...$)$"
--;-;-;-
-0-5;0-5;0-5;0-5
-strings
-""
-"本"
-regexps
-"^\\C\\C\\C$"
--;-;-;-
-0-3;0-3;0-3;0-3
-"^(?:^\\C\\C\\C$)$"
--;-;-;-
-0-3;0-3;0-3;0-3
-"^(?:^\\C\\C\\C$)"
--;-;-;-
-0-3;0-3;0-3;0-3
-"(?:^\\C\\C\\C$)$"
--;-;-;-
-0-3;0-3;0-3;0-3
-strings
-""
-"本"
-regexps
-"^\\C$"
--;-;-;-
--;-;-;-
-"^(?:^\\C$)$"
--;-;-;-
--;-;-;-
-"^(?:^\\C$)"
--;-;-;-
--;-;-;-
-"(?:^\\C$)$"
--;-;-;-
--;-;-;-
-strings
-""
-"日本語"
-regexps
-"^\\C\\C\\C$"
--;-;-;-
--;-;-;-
-"^(?:^\\C\\C\\C$)$"
--;-;-;-
--;-;-;-
-"^(?:^\\C\\C\\C$)"
--;-;-;-
--;-;-;-
-"(?:^\\C\\C\\C$)$"
--;-;-;-
--;-;-;-
-strings
-""
-"日本語"
-regexps
-"^...$"
--;-;-;-
-0-9;0-9;0-9;0-9
-"^(?:^...$)$"
--;-;-;-
-0-9;0-9;0-9;0-9
-"^(?:^...$)"
--;-;-;-
-0-9;0-9;0-9;0-9
-"(?:^...$)$"
--;-;-;-
-0-9;0-9;0-9;0-9
-strings
-""
-"日本語"
-regexps
-"^.........$"
--;-;-;-
--;-;-;-
-"^(?:^.........$)$"
--;-;-;-
--;-;-;-
-"^(?:^.........$)"
--;-;-;-
--;-;-;-
-"(?:^.........$)$"
--;-;-;-
--;-;-;-
-strings
-""
-".本."
-regexps
-"^...$"
--;-;-;-
-0-5;0-5;0-5;0-5
-"^(?:^...$)$"
--;-;-;-
-0-5;0-5;0-5;0-5
-"^(?:^...$)"
--;-;-;-
-0-5;0-5;0-5;0-5
-"(?:^...$)$"
--;-;-;-
-0-5;0-5;0-5;0-5
-strings
-""
-".本."
-regexps
-"^.....$"
--;-;-;-
--;-;-;-
-"^(?:^.....$)$"
--;-;-;-
--;-;-;-
-"^(?:^.....$)"
--;-;-;-
--;-;-;-
-"(?:^.....$)$"
--;-;-;-
--;-;-;-
-strings
-""
-"xfooo"
-regexps
-"\\B(fo|foo)\\B"
--;-;-;-
--;1-3 1-3;-;1-4 1-4
-"^(?:\\B(fo|foo)\\B)$"
--;-;-;-
--;-;-;-
-"^(?:\\B(fo|foo)\\B)"
--;-;-;-
--;-;-;-
-"(?:\\B(fo|foo)\\B)$"
--;-;-;-
--;-;-;-
-strings
-""
-"foo"
-regexps
-"(fo|foo)"
--;-;-;-
-0-3 0-3;0-2 0-2;0-3 0-3;0-3 0-3
-"^(?:(fo|foo))$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-"^(?:(fo|foo))"
--;-;-;-
-0-3 0-3;0-2 0-2;0-3 0-3;0-3 0-3
-"(?:(fo|foo))$"
--;-;-;-
-0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
-strings
-""
-"a"
-regexps
-"\\141"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:\\141)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:\\141)"
--;-;-;-
-0-1;0-1;0-1;0-1
-"(?:\\141)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-strings
-""
-"0"
-regexps
-"\\060"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:\\060)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:\\060)"
--;-;-;-
-0-1;0-1;0-1;0-1
-"(?:\\060)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-strings
-""
-"00"
-regexps
-"\\0600"
--;-;-;-
-0-2;0-2;0-2;0-2
-"^(?:\\0600)$"
--;-;-;-
-0-2;0-2;0-2;0-2
-"^(?:\\0600)"
--;-;-;-
-0-2;0-2;0-2;0-2
-"(?:\\0600)$"
--;-;-;-
-0-2;0-2;0-2;0-2
-strings
-""
-"08"
-regexps
-"\\608"
--;-;-;-
-0-2;0-2;0-2;0-2
-"^(?:\\608)$"
--;-;-;-
-0-2;0-2;0-2;0-2
-"^(?:\\608)"
--;-;-;-
-0-2;0-2;0-2;0-2
-"(?:\\608)$"
--;-;-;-
-0-2;0-2;0-2;0-2
-strings
-""
-""
-regexps
-"\\01"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:\\01)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:\\01)"
--;-;-;-
-0-1;0-1;0-1;0-1
-"(?:\\01)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-strings
-""
-"8"
-regexps
-"\\018"
--;-;-;-
-0-2;0-2;0-2;0-2
-"^(?:\\018)$"
--;-;-;-
-0-2;0-2;0-2;0-2
-"^(?:\\018)"
--;-;-;-
-0-2;0-2;0-2;0-2
-"(?:\\018)$"
--;-;-;-
-0-2;0-2;0-2;0-2
-strings
-""
-"a"
-regexps
-"\\x{61}"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:\\x{61})$"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:\\x{61})"
--;-;-;-
-0-1;0-1;0-1;0-1
-"(?:\\x{61})$"
--;-;-;-
-0-1;0-1;0-1;0-1
-strings
-""
-"a"
-regexps
-"\\x61"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:\\x61)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:\\x61)"
--;-;-;-
-0-1;0-1;0-1;0-1
-"(?:\\x61)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-strings
-""
-"a"
-regexps
-"\\x{00000061}"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:\\x{00000061})$"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:\\x{00000061})"
--;-;-;-
-0-1;0-1;0-1;0-1
-"(?:\\x{00000061})$"
--;-;-;-
-0-1;0-1;0-1;0-1
-strings
-""
-"aαβb"
-regexps
-"\\p{Greek}+"
--;-;-;-
--;1-5;-;1-5
-"^(?:\\p{Greek}+)$"
--;-;-;-
--;-;-;-
-"^(?:\\p{Greek}+)"
--;-;-;-
--;-;-;-
-"(?:\\p{Greek}+)$"
--;-;-;-
--;-;-;-
-strings
-""
-"aαβb"
-regexps
-"\\P{Greek}+"
--;-;-;-
--;0-1;-;0-1
-"^(?:\\P{Greek}+)$"
--;-;-;-
--;-;-;-
-"^(?:\\P{Greek}+)"
--;-;-;-
--;0-1;-;0-1
-"(?:\\P{Greek}+)$"
--;-;-;-
--;5-6;-;5-6
-strings
-""
-"aαβb"
-regexps
-"\\p{^Greek}+"
--;-;-;-
--;0-1;-;0-1
-"^(?:\\p{^Greek}+)$"
--;-;-;-
--;-;-;-
-"^(?:\\p{^Greek}+)"
--;-;-;-
--;0-1;-;0-1
-"(?:\\p{^Greek}+)$"
--;-;-;-
--;5-6;-;5-6
-strings
-""
-"aαβb"
-regexps
-"\\P{^Greek}+"
--;-;-;-
--;1-5;-;1-5
-"^(?:\\P{^Greek}+)$"
--;-;-;-
--;-;-;-
-"^(?:\\P{^Greek}+)"
--;-;-;-
--;-;-;-
-"(?:\\P{^Greek}+)$"
--;-;-;-
--;-;-;-
-strings
-""
-"abc123"
-regexps
-"[^0-9]+"
--;-;-;-
--;0-3;-;0-3
-"^(?:[^0-9]+)$"
--;-;-;-
--;-;-;-
-"^(?:[^0-9]+)"
--;-;-;-
--;0-3;-;0-3
-"(?:[^0-9]+)$"
--;-;-;-
--;-;-;-
-strings
-""
-"abc123²³¼½¾₀₉"
-regexps
-"\\p{Nd}+"
--;-;-;-
--;3-6;-;3-6
-"^(?:\\p{Nd}+)$"
--;-;-;-
--;-;-;-
-"^(?:\\p{Nd}+)"
--;-;-;-
--;-;-;-
-"(?:\\p{Nd}+)$"
--;-;-;-
--;-;-;-
-strings
-""
-"abc123²³¼½¾₀₉"
-regexps
-"\\p{^Nd}+"
--;-;-;-
--;0-3;-;0-3
-"^(?:\\p{^Nd}+)$"
--;-;-;-
--;-;-;-
-"^(?:\\p{^Nd}+)"
--;-;-;-
--;0-3;-;0-3
-"(?:\\p{^Nd}+)$"
--;-;-;-
--;6-22;-;6-22
-strings
-""
-"abc123²³¼½¾₀₉"
-regexps
-"\\P{Nd}+"
--;-;-;-
--;0-3;-;0-3
-"^(?:\\P{Nd}+)$"
--;-;-;-
--;-;-;-
-"^(?:\\P{Nd}+)"
--;-;-;-
--;0-3;-;0-3
-"(?:\\P{Nd}+)$"
--;-;-;-
--;6-22;-;6-22
-strings
-""
-"abc123²³¼½¾₀₉"
-regexps
-"\\P{^Nd}+"
--;-;-;-
--;3-6;-;3-6
-"^(?:\\P{^Nd}+)$"
--;-;-;-
--;-;-;-
-"^(?:\\P{^Nd}+)"
--;-;-;-
--;-;-;-
-"(?:\\P{^Nd}+)$"
--;-;-;-
--;-;-;-
-strings
-""
-"abc123²³¼½¾₀₉"
-regexps
-"\\pN+"
--;-;-;-
--;3-22;-;3-22
-"^(?:\\pN+)$"
--;-;-;-
--;-;-;-
-"^(?:\\pN+)"
--;-;-;-
--;-;-;-
-"(?:\\pN+)$"
--;-;-;-
--;3-22;-;3-22
-strings
-""
-"abc123²³¼½¾₀₉"
-regexps
-"\\p{N}+"
--;-;-;-
--;3-22;-;3-22
-"^(?:\\p{N}+)$"
--;-;-;-
--;-;-;-
-"^(?:\\p{N}+)"
--;-;-;-
--;-;-;-
-"(?:\\p{N}+)$"
--;-;-;-
--;3-22;-;3-22
-strings
-""
-"abc123²³¼½¾₀₉"
-regexps
-"\\p{^N}+"
--;-;-;-
--;0-3;-;0-3
-"^(?:\\p{^N}+)$"
--;-;-;-
--;-;-;-
-"^(?:\\p{^N}+)"
--;-;-;-
--;0-3;-;0-3
-"(?:\\p{^N}+)$"
--;-;-;-
--;-;-;-
-strings
-""
-"abc123"
-regexps
-"\\p{Any}+"
--;-;-;-
-0-6;0-6;0-6;0-6
-"^(?:\\p{Any}+)$"
--;-;-;-
-0-6;0-6;0-6;0-6
-"^(?:\\p{Any}+)"
--;-;-;-
-0-6;0-6;0-6;0-6
-"(?:\\p{Any}+)$"
--;-;-;-
-0-6;0-6;0-6;0-6
-strings
-""
-"@AaB"
-regexps
-"(?i)[@-A]+"
--;-;-;-
--;0-3;-;0-3
-"^(?:(?i)[@-A]+)$"
--;-;-;-
--;-;-;-
-"^(?:(?i)[@-A]+)"
--;-;-;-
--;0-3;-;0-3
-"(?:(?i)[@-A]+)$"
--;-;-;-
--;-;-;-
-strings
-""
-"aAzZ"
-regexps
-"(?i)[A-Z]+"
--;-;-;-
-0-4;0-4;0-4;0-4
-"^(?:(?i)[A-Z]+)$"
--;-;-;-
-0-4;0-4;0-4;0-4
-"^(?:(?i)[A-Z]+)"
--;-;-;-
-0-4;0-4;0-4;0-4
-"(?:(?i)[A-Z]+)$"
--;-;-;-
-0-4;0-4;0-4;0-4
-strings
-""
-"Aa\\"
-regexps
-"(?i)[^\\\\]+"
--;-;-;-
--;0-2;-;0-2
-"^(?:(?i)[^\\\\]+)$"
--;-;-;-
--;-;-;-
-"^(?:(?i)[^\\\\]+)"
--;-;-;-
--;0-2;-;0-2
-"(?:(?i)[^\\\\]+)$"
--;-;-;-
--;-;-;-
-strings
-""
-"acegikmoqsuwyACEGIKMOQSUWY"
-regexps
-"(?i)[acegikmoqsuwy]+"
--;-;-;-
-0-26;0-26;0-26;0-26
-"^(?:(?i)[acegikmoqsuwy]+)$"
--;-;-;-
-0-26;0-26;0-26;0-26
-"^(?:(?i)[acegikmoqsuwy]+)"
--;-;-;-
-0-26;0-26;0-26;0-26
-"(?:(?i)[acegikmoqsuwy]+)$"
--;-;-;-
-0-26;0-26;0-26;0-26
-strings
-""
-"@AaB"
-regexps
-"[@-A]+"
--;-;-;-
--;0-2;-;0-2
-"^(?:[@-A]+)$"
--;-;-;-
--;-;-;-
-"^(?:[@-A]+)"
--;-;-;-
--;0-2;-;0-2
-"(?:[@-A]+)$"
--;-;-;-
--;-;-;-
-strings
-""
-"aAzZ"
-regexps
-"[A-Z]+"
--;-;-;-
--;1-2;-;1-2
-"^(?:[A-Z]+)$"
--;-;-;-
--;-;-;-
-"^(?:[A-Z]+)"
--;-;-;-
--;-;-;-
-"(?:[A-Z]+)$"
--;-;-;-
--;3-4;-;3-4
-strings
-""
-"Aa\\"
-regexps
-"[^\\\\]+"
--;-;-;-
--;0-2;-;0-2
-"^(?:[^\\\\]+)$"
--;-;-;-
--;-;-;-
-"^(?:[^\\\\]+)"
--;-;-;-
--;0-2;-;0-2
-"(?:[^\\\\]+)$"
--;-;-;-
--;-;-;-
-strings
-""
-"acegikmoqsuwyACEGIKMOQSUWY"
-regexps
-"[acegikmoqsuwy]+"
--;-;-;-
--;0-13;-;0-13
-"^(?:[acegikmoqsuwy]+)$"
--;-;-;-
--;-;-;-
-"^(?:[acegikmoqsuwy]+)"
--;-;-;-
--;0-13;-;0-13
-"(?:[acegikmoqsuwy]+)$"
--;-;-;-
--;-;-;-
-strings
-""
-"abcdef"
-regexps
-"^abc"
--;-;-;-
--;0-3;-;0-3
-"^(?:^abc)$"
--;-;-;-
--;-;-;-
-"^(?:^abc)"
--;-;-;-
--;0-3;-;0-3
-"(?:^abc)$"
--;-;-;-
--;-;-;-
-strings
-""
-"aabcdef"
-regexps
-"^abc"
--;-;-;-
--;-;-;-
-"^(?:^abc)$"
--;-;-;-
--;-;-;-
-"^(?:^abc)"
--;-;-;-
--;-;-;-
-"(?:^abc)$"
--;-;-;-
--;-;-;-
-strings
-""
-"abcdef"
-regexps
-"^[ay]*[bx]+c"
--;-;-;-
--;0-3;-;0-3
-"^(?:^[ay]*[bx]+c)$"
--;-;-;-
--;-;-;-
-"^(?:^[ay]*[bx]+c)"
--;-;-;-
--;0-3;-;0-3
-"(?:^[ay]*[bx]+c)$"
--;-;-;-
--;-;-;-
-strings
-""
-"aabcdef"
-regexps
-"^[ay]*[bx]+c"
--;-;-;-
--;0-4;-;0-4
-"^(?:^[ay]*[bx]+c)$"
--;-;-;-
--;-;-;-
-"^(?:^[ay]*[bx]+c)"
--;-;-;-
--;0-4;-;0-4
-"(?:^[ay]*[bx]+c)$"
--;-;-;-
--;-;-;-
-strings
-""
-"abcdef"
-regexps
-"def$"
--;-;-;-
--;3-6;-;3-6
-"^(?:def$)$"
--;-;-;-
--;-;-;-
-"^(?:def$)"
--;-;-;-
--;-;-;-
-"(?:def$)$"
--;-;-;-
--;3-6;-;3-6
-strings
-""
-"abcdeff"
-regexps
-"def$"
--;-;-;-
--;-;-;-
-"^(?:def$)$"
--;-;-;-
--;-;-;-
-"^(?:def$)"
--;-;-;-
--;-;-;-
-"(?:def$)$"
--;-;-;-
--;-;-;-
-strings
-""
-"abcdef"
-regexps
-"d[ex][fy]$"
--;-;-;-
--;3-6;-;3-6
-"^(?:d[ex][fy]$)$"
--;-;-;-
--;-;-;-
-"^(?:d[ex][fy]$)"
--;-;-;-
--;-;-;-
-"(?:d[ex][fy]$)$"
--;-;-;-
--;3-6;-;3-6
-strings
-""
-"abcdeff"
-regexps
-"d[ex][fy]$"
--;-;-;-
--;-;-;-
-"^(?:d[ex][fy]$)$"
--;-;-;-
--;-;-;-
-"^(?:d[ex][fy]$)"
--;-;-;-
--;-;-;-
-"(?:d[ex][fy]$)$"
--;-;-;-
--;-;-;-
-strings
-""
-"abcdef"
-regexps
-"[dz][ex][fy]$"
--;-;-;-
--;3-6;-;3-6
-"^(?:[dz][ex][fy]$)$"
--;-;-;-
--;-;-;-
-"^(?:[dz][ex][fy]$)"
--;-;-;-
--;-;-;-
-"(?:[dz][ex][fy]$)$"
--;-;-;-
--;3-6;-;3-6
-strings
-""
-"abcdeff"
-regexps
-"[dz][ex][fy]$"
--;-;-;-
--;-;-;-
-"^(?:[dz][ex][fy]$)$"
--;-;-;-
--;-;-;-
-"^(?:[dz][ex][fy]$)"
--;-;-;-
--;-;-;-
-"(?:[dz][ex][fy]$)$"
--;-;-;-
--;-;-;-
-strings
-""
-"abcdef"
-regexps
-"(?m)^abc"
--;-;-;-
--;0-3;-;0-3
-"^(?:(?m)^abc)$"
--;-;-;-
--;-;-;-
-"^(?:(?m)^abc)"
--;-;-;-
--;0-3;-;0-3
-"(?:(?m)^abc)$"
--;-;-;-
--;-;-;-
-strings
-""
-"aabcdef"
-regexps
-"(?m)^abc"
--;-;-;-
--;-;-;-
-"^(?:(?m)^abc)$"
--;-;-;-
--;-;-;-
-"^(?:(?m)^abc)"
--;-;-;-
--;-;-;-
-"(?:(?m)^abc)$"
--;-;-;-
--;-;-;-
-strings
-""
-"abcdef"
-regexps
-"(?m)^[ay]*[bx]+c"
--;-;-;-
--;0-3;-;0-3
-"^(?:(?m)^[ay]*[bx]+c)$"
--;-;-;-
--;-;-;-
-"^(?:(?m)^[ay]*[bx]+c)"
--;-;-;-
--;0-3;-;0-3
-"(?:(?m)^[ay]*[bx]+c)$"
--;-;-;-
--;-;-;-
-strings
-""
-"aabcdef"
-regexps
-"(?m)^[ay]*[bx]+c"
--;-;-;-
--;0-4;-;0-4
-"^(?:(?m)^[ay]*[bx]+c)$"
--;-;-;-
--;-;-;-
-"^(?:(?m)^[ay]*[bx]+c)"
--;-;-;-
--;0-4;-;0-4
-"(?:(?m)^[ay]*[bx]+c)$"
--;-;-;-
--;-;-;-
-strings
-""
-"abcdef"
-regexps
-"(?m)def$"
--;-;-;-
--;3-6;-;3-6
-"^(?:(?m)def$)$"
--;-;-;-
--;-;-;-
-"^(?:(?m)def$)"
--;-;-;-
--;-;-;-
-"(?:(?m)def$)$"
--;-;-;-
--;3-6;-;3-6
-strings
-""
-"abcdeff"
-regexps
-"(?m)def$"
--;-;-;-
--;-;-;-
-"^(?:(?m)def$)$"
--;-;-;-
--;-;-;-
-"^(?:(?m)def$)"
--;-;-;-
--;-;-;-
-"(?:(?m)def$)$"
--;-;-;-
--;-;-;-
-strings
-""
-"abcdef"
-regexps
-"(?m)d[ex][fy]$"
--;-;-;-
--;3-6;-;3-6
-"^(?:(?m)d[ex][fy]$)$"
--;-;-;-
--;-;-;-
-"^(?:(?m)d[ex][fy]$)"
--;-;-;-
--;-;-;-
-"(?:(?m)d[ex][fy]$)$"
--;-;-;-
--;3-6;-;3-6
-strings
-""
-"abcdeff"
-regexps
-"(?m)d[ex][fy]$"
--;-;-;-
--;-;-;-
-"^(?:(?m)d[ex][fy]$)$"
--;-;-;-
--;-;-;-
-"^(?:(?m)d[ex][fy]$)"
--;-;-;-
--;-;-;-
-"(?:(?m)d[ex][fy]$)$"
--;-;-;-
--;-;-;-
-strings
-""
-"abcdef"
-regexps
-"(?m)[dz][ex][fy]$"
--;-;-;-
--;3-6;-;3-6
-"^(?:(?m)[dz][ex][fy]$)$"
--;-;-;-
--;-;-;-
-"^(?:(?m)[dz][ex][fy]$)"
--;-;-;-
--;-;-;-
-"(?:(?m)[dz][ex][fy]$)$"
--;-;-;-
--;3-6;-;3-6
-strings
-""
-"abcdeff"
-regexps
-"(?m)[dz][ex][fy]$"
--;-;-;-
--;-;-;-
-"^(?:(?m)[dz][ex][fy]$)$"
--;-;-;-
--;-;-;-
-"^(?:(?m)[dz][ex][fy]$)"
--;-;-;-
--;-;-;-
-"(?:(?m)[dz][ex][fy]$)$"
--;-;-;-
--;-;-;-
-strings
-""
-"a"
-regexps
-"^"
-0-0;0-0;0-0;0-0
--;0-0;-;0-0
-"^(?:^)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^)"
-0-0;0-0;0-0;0-0
--;0-0;-;0-0
-"(?:^)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-strings
-""
-"a"
-regexps
-"^^"
-0-0;0-0;0-0;0-0
--;0-0;-;0-0
-"^(?:^^)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-"^(?:^^)"
-0-0;0-0;0-0;0-0
--;0-0;-;0-0
-"(?:^^)$"
-0-0;0-0;0-0;0-0
--;-;-;-
-strings
-""
-"a"
-regexps
-"a"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:a)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:a)"
--;-;-;-
-0-1;0-1;0-1;0-1
-"(?:a)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-strings
-""
-"a"
-regexps
-"ab*"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:ab*)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:ab*)"
--;-;-;-
-0-1;0-1;0-1;0-1
-"(?:ab*)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-strings
-""
-"a"
-regexps
-"a\\C*"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:a\\C*)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-"^(?:a\\C*)"
--;-;-;-
-0-1;0-1;0-1;0-1
-"(?:a\\C*)$"
--;-;-;-
-0-1;0-1;0-1;0-1
-strings
-""
-"baba"
-regexps
-"a\\C*|ba\\C"
--;-;-;-
--;0-3;-;0-3
-"^(?:a\\C*|ba\\C)$"
--;-;-;-
--;-;-;-
-"^(?:a\\C*|ba\\C)"
--;-;-;-
--;0-3;-;0-3
-"(?:a\\C*|ba\\C)$"
--;-;-;-
--;1-4;-;1-4
diff --git a/src/pkg/regexp/testdata/repetition.dat b/src/pkg/regexp/testdata/repetition.dat
deleted file mode 100644
index e6361f51a..000000000
--- a/src/pkg/regexp/testdata/repetition.dat
+++ /dev/null
@@ -1,163 +0,0 @@
-NOTE implicit vs. explicit repetitions : 2009-02-02
-
-# Glenn Fowler <gsf@research.att.com>
-# conforming matches (column 4) must match one of the following BREs
-# NOMATCH
-# (0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)*
-# (0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)*
-# i.e., each 3-tuple has two identical elements and one (?,?)
-
-E ((..)|(.)) NULL NOMATCH
-E ((..)|(.))((..)|(.)) NULL NOMATCH
-E ((..)|(.))((..)|(.))((..)|(.)) NULL NOMATCH
-
-E ((..)|(.)){1} NULL NOMATCH
-E ((..)|(.)){2} NULL NOMATCH
-E ((..)|(.)){3} NULL NOMATCH
-
-E ((..)|(.))* NULL (0,0)
-
-E ((..)|(.)) a (0,1)(0,1)(?,?)(0,1)
-E ((..)|(.))((..)|(.)) a NOMATCH
-E ((..)|(.))((..)|(.))((..)|(.)) a NOMATCH
-
-E ((..)|(.)){1} a (0,1)(0,1)(?,?)(0,1)
-E ((..)|(.)){2} a NOMATCH
-E ((..)|(.)){3} a NOMATCH
-
-E ((..)|(.))* a (0,1)(0,1)(?,?)(0,1)
-
-E ((..)|(.)) aa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.))((..)|(.)) aa (0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)
-E ((..)|(.))((..)|(.))((..)|(.)) aa NOMATCH
-
-E ((..)|(.)){1} aa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.)){2} aa (0,2)(1,2)(?,?)(1,2)
-E ((..)|(.)){3} aa NOMATCH
-
-E ((..)|(.))* aa (0,2)(0,2)(0,2)(?,?)
-
-E ((..)|(.)) aaa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.))((..)|(.)) aaa (0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)
-E ((..)|(.))((..)|(.))((..)|(.)) aaa (0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3)
-
-E ((..)|(.)){1} aaa (0,2)(0,2)(0,2)(?,?)
-#E ((..)|(.)){2} aaa (0,3)(2,3)(?,?)(2,3)
-E ((..)|(.)){2} aaa (0,3)(2,3)(0,2)(2,3) RE2/Go
-E ((..)|(.)){3} aaa (0,3)(2,3)(?,?)(2,3)
-
-#E ((..)|(.))* aaa (0,3)(2,3)(?,?)(2,3)
-E ((..)|(.))* aaa (0,3)(2,3)(0,2)(2,3) RE2/Go
-
-E ((..)|(.)) aaaa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
-E ((..)|(.))((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4)
-
-E ((..)|(.)){1} aaaa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.)){2} aaaa (0,4)(2,4)(2,4)(?,?)
-#E ((..)|(.)){3} aaaa (0,4)(3,4)(?,?)(3,4)
-E ((..)|(.)){3} aaaa (0,4)(3,4)(0,2)(3,4) RE2/Go
-
-E ((..)|(.))* aaaa (0,4)(2,4)(2,4)(?,?)
-
-E ((..)|(.)) aaaaa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.))((..)|(.)) aaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
-E ((..)|(.))((..)|(.))((..)|(.)) aaaaa (0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5)
-
-E ((..)|(.)){1} aaaaa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.)){2} aaaaa (0,4)(2,4)(2,4)(?,?)
-#E ((..)|(.)){3} aaaaa (0,5)(4,5)(?,?)(4,5)
-E ((..)|(.)){3} aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go
-
-#E ((..)|(.))* aaaaa (0,5)(4,5)(?,?)(4,5)
-E ((..)|(.))* aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go
-
-E ((..)|(.)) aaaaaa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.))((..)|(.)) aaaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
-E ((..)|(.))((..)|(.))((..)|(.)) aaaaaa (0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?)
-
-E ((..)|(.)){1} aaaaaa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.)){2} aaaaaa (0,4)(2,4)(2,4)(?,?)
-E ((..)|(.)){3} aaaaaa (0,6)(4,6)(4,6)(?,?)
-
-E ((..)|(.))* aaaaaa (0,6)(4,6)(4,6)(?,?)
-
-NOTE additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02
-
-# These test a bug in OS X / FreeBSD / NetBSD, and libtree.
-# Linux/GLIBC gets the {8,} and {8,8} wrong.
-
-:HA#100:E X(.?){0,}Y X1234567Y (0,9)(7,8)
-:HA#101:E X(.?){1,}Y X1234567Y (0,9)(7,8)
-:HA#102:E X(.?){2,}Y X1234567Y (0,9)(7,8)
-:HA#103:E X(.?){3,}Y X1234567Y (0,9)(7,8)
-:HA#104:E X(.?){4,}Y X1234567Y (0,9)(7,8)
-:HA#105:E X(.?){5,}Y X1234567Y (0,9)(7,8)
-:HA#106:E X(.?){6,}Y X1234567Y (0,9)(7,8)
-:HA#107:E X(.?){7,}Y X1234567Y (0,9)(7,8)
-:HA#108:E X(.?){8,}Y X1234567Y (0,9)(8,8)
-#:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(7,8)
-:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(8,8) RE2/Go
-#:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(7,8)
-:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(8,8) RE2/Go
-#:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(7,8)
-:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(8,8) RE2/Go
-#:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(7,8)
-:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(8,8) RE2/Go
-#:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(7,8)
-:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(8,8) RE2/Go
-#:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(7,8)
-:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(8,8) RE2/Go
-#:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(7,8)
-:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(8,8) RE2/Go
-#:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(7,8)
-:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(8,8) RE2/Go
-:HA#118:E X(.?){8,8}Y X1234567Y (0,9)(8,8)
-
-# These test a fixed bug in my regex-tdfa that did not keep the expanded
-# form properly grouped, so right association did the wrong thing with
-# these ambiguous patterns (crafted just to test my code when I became
-# suspicious of my implementation). The first subexpression should use
-# "ab" then "a" then "bcd".
-
-# OS X / FreeBSD / NetBSD badly fail many of these, with impossible
-# results like (0,6)(4,5)(6,6).
-
-:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#262:E (a|ab|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#263:E (a|ab|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#264:E (a|ab|c|bcd){4,}(d*) ababcd NOMATCH
-:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#267:E (a|ab|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#268:E (a|ab|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#269:E (a|ab|c|bcd){4,10}(d*) ababcd NOMATCH
-:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6)
-:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6)
-
-# The above worked on Linux/GLIBC but the following often fail.
-# They also trip up OS X / FreeBSD / NetBSD:
-
-#:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-#:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-#:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-#:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-:HA#284:E (ab|a|c|bcd){4,}(d*) ababcd NOMATCH
-#:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-#:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-#:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-#:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-:HA#289:E (ab|a|c|bcd){4,10}(d*) ababcd NOMATCH
-#:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6)
-:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-#:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6)
-:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
diff --git a/src/pkg/regexp/testdata/testregex.c b/src/pkg/regexp/testdata/testregex.c
deleted file mode 100644
index 37545d057..000000000
--- a/src/pkg/regexp/testdata/testregex.c
+++ /dev/null
@@ -1,2286 +0,0 @@
-#pragma prototyped noticed
-
-/*
- * regex(3) test harness
- *
- * build: cc -o testregex testregex.c
- * help: testregex --man
- * note: REG_* features are detected by #ifdef; if REG_* are enums
- * then supply #define REG_foo REG_foo for each enum REG_foo
- *
- * Glenn Fowler <gsf@research.att.com>
- * AT&T Research
- *
- * PLEASE: publish your tests so everyone can benefit
- *
- * The following license covers testregex.c and all associated test data.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of THIS SOFTWARE FILE (the "Software"), to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, and/or sell copies of the
- * Software, and to permit persons to whom the Software is furnished to do
- * so, subject to the following disclaimer:
- *
- * THIS SOFTWARE IS PROVIDED BY AT&T ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL AT&T BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-static const char id[] = "\n@(#)$Id: testregex (AT&T Research) 2010-06-10 $\0\n";
-
-#if _PACKAGE_ast
-#include <ast.h>
-#else
-#include <sys/types.h>
-#endif
-
-#include <stdio.h>
-#include <regex.h>
-#include <ctype.h>
-#include <setjmp.h>
-#include <signal.h>
-#include <string.h>
-#include <unistd.h>
-
-#ifdef __STDC__
-#include <stdlib.h>
-#include <locale.h>
-#endif
-
-#ifndef RE_DUP_MAX
-#define RE_DUP_MAX 32767
-#endif
-
-#if !_PACKAGE_ast
-#undef REG_DISCIPLINE
-#endif
-
-#ifndef REG_DELIMITED
-#undef _REG_subcomp
-#endif
-
-#define TEST_ARE 0x00000001
-#define TEST_BRE 0x00000002
-#define TEST_ERE 0x00000004
-#define TEST_KRE 0x00000008
-#define TEST_LRE 0x00000010
-#define TEST_SRE 0x00000020
-
-#define TEST_EXPAND 0x00000100
-#define TEST_LENIENT 0x00000200
-
-#define TEST_QUERY 0x00000400
-#define TEST_SUB 0x00000800
-#define TEST_UNSPECIFIED 0x00001000
-#define TEST_VERIFY 0x00002000
-#define TEST_AND 0x00004000
-#define TEST_OR 0x00008000
-
-#define TEST_DELIMIT 0x00010000
-#define TEST_OK 0x00020000
-#define TEST_SAME 0x00040000
-
-#define TEST_ACTUAL 0x00100000
-#define TEST_BASELINE 0x00200000
-#define TEST_FAIL 0x00400000
-#define TEST_PASS 0x00800000
-#define TEST_SUMMARY 0x01000000
-
-#define TEST_IGNORE_ERROR 0x02000000
-#define TEST_IGNORE_OVER 0x04000000
-#define TEST_IGNORE_POSITION 0x08000000
-
-#define TEST_CATCH 0x10000000
-#define TEST_VERBOSE 0x20000000
-
-#define TEST_DECOMP 0x40000000
-
-#define TEST_GLOBAL (TEST_ACTUAL|TEST_AND|TEST_BASELINE|TEST_CATCH|TEST_FAIL|TEST_IGNORE_ERROR|TEST_IGNORE_OVER|TEST_IGNORE_POSITION|TEST_OR|TEST_PASS|TEST_SUMMARY|TEST_VERBOSE)
-
-#ifdef REG_DISCIPLINE
-
-
-#include <stk.h>
-
-typedef struct Disc_s
-{
- regdisc_t disc;
- int ordinal;
- Sfio_t* sp;
-} Disc_t;
-
-static void*
-compf(const regex_t* re, const char* xstr, size_t xlen, regdisc_t* disc)
-{
- Disc_t* dp = (Disc_t*)disc;
-
- return (void*)((char*)0 + ++dp->ordinal);
-}
-
-static int
-execf(const regex_t* re, void* data, const char* xstr, size_t xlen, const char* sstr, size_t slen, char** snxt, regdisc_t* disc)
-{
- Disc_t* dp = (Disc_t*)disc;
-
- sfprintf(dp->sp, "{%-.*s}(%lu:%d)", xlen, xstr, (char*)data - (char*)0, slen);
- return atoi(xstr);
-}
-
-static void*
-resizef(void* handle, void* data, size_t size)
-{
- if (!size)
- return 0;
- return stkalloc((Sfio_t*)handle, size);
-}
-
-#endif
-
-#ifndef NiL
-#ifdef __STDC__
-#define NiL 0
-#else
-#define NiL (char*)0
-#endif
-#endif
-
-#define H(x) do{if(html)fprintf(stderr,x);}while(0)
-#define T(x) fprintf(stderr,x)
-
-static void
-help(int html)
-{
-H("<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n");
-H("<HTML>\n");
-H("<HEAD>\n");
-H("<TITLE>testregex man document</TITLE>\n");
-H("</HEAD>\n");
-H("<BODY bgcolor=white>\n");
-H("<PRE>\n");
-T("NAME\n");
-T(" testregex - regex(3) test harness\n");
-T("\n");
-T("SYNOPSIS\n");
-T(" testregex [ options ]\n");
-T("\n");
-T("DESCRIPTION\n");
-T(" testregex reads regex(3) test specifications, one per line, from the\n");
-T(" standard input and writes one output line for each failed test. A\n");
-T(" summary line is written after all tests are done. Each successful\n");
-T(" test is run again with REG_NOSUB. Unsupported features are noted\n");
-T(" before the first test, and tests requiring these features are\n");
-T(" silently ignored.\n");
-T("\n");
-T("OPTIONS\n");
-T(" -c catch signals and non-terminating calls\n");
-T(" -e ignore error return mismatches\n");
-T(" -h list help on standard error\n");
-T(" -n do not repeat successful tests with regnexec()\n");
-T(" -o ignore match[] overrun errors\n");
-T(" -p ignore negative position mismatches\n");
-T(" -s use stack instead of malloc\n");
-T(" -x do not repeat successful tests with REG_NOSUB\n");
-T(" -v list each test line\n");
-T(" -A list failed test lines with actual answers\n");
-T(" -B list all test lines with actual answers\n");
-T(" -F list failed test lines\n");
-T(" -P list passed test lines\n");
-T(" -S output one summary line\n");
-T("\n");
-T("INPUT FORMAT\n");
-T(" Input lines may be blank, a comment beginning with #, or a test\n");
-T(" specification. A specification is five fields separated by one\n");
-T(" or more tabs. NULL denotes the empty string and NIL denotes the\n");
-T(" 0 pointer.\n");
-T("\n");
-T(" Field 1: the regex(3) flags to apply, one character per REG_feature\n");
-T(" flag. The test is skipped if REG_feature is not supported by the\n");
-T(" implementation. If the first character is not [BEASKLP] then the\n");
-T(" specification is a global control line. One or more of [BEASKLP] may be\n");
-T(" specified; the test will be repeated for each mode.\n");
-T("\n");
-T(" B basic BRE (grep, ed, sed)\n");
-T(" E REG_EXTENDED ERE (egrep)\n");
-T(" A REG_AUGMENTED ARE (egrep with negation)\n");
-T(" S REG_SHELL SRE (sh glob)\n");
-T(" K REG_SHELL|REG_AUGMENTED KRE (ksh glob)\n");
-T(" L REG_LITERAL LRE (fgrep)\n");
-T("\n");
-T(" a REG_LEFT|REG_RIGHT implicit ^...$\n");
-T(" b REG_NOTBOL lhs does not match ^\n");
-T(" c REG_COMMENT ignore space and #...\\n\n");
-T(" d REG_SHELL_DOT explicit leading . match\n");
-T(" e REG_NOTEOL rhs does not match $\n");
-T(" f REG_MULTIPLE multiple \\n separated patterns\n");
-T(" g FNM_LEADING_DIR testfnmatch only -- match until /\n");
-T(" h REG_MULTIREF multiple digit backref\n");
-T(" i REG_ICASE ignore case\n");
-T(" j REG_SPAN . matches \\n\n");
-T(" k REG_ESCAPE \\ to ecape [...] delimiter\n");
-T(" l REG_LEFT implicit ^...\n");
-T(" m REG_MINIMAL minimal match\n");
-T(" n REG_NEWLINE explicit \\n match\n");
-T(" o REG_ENCLOSED (|&) magic inside [@|&](...)\n");
-T(" p REG_SHELL_PATH explicit / match\n");
-T(" q REG_DELIMITED delimited pattern\n");
-T(" r REG_RIGHT implicit ...$\n");
-T(" s REG_SHELL_ESCAPED \\ not special\n");
-T(" t REG_MUSTDELIM all delimiters must be specified\n");
-T(" u standard unspecified behavior -- errors not counted\n");
-T(" v REG_CLASS_ESCAPE \\ special inside [...]\n");
-T(" w REG_NOSUB no subexpression match array\n");
-T(" x REG_LENIENT let some errors slide\n");
-T(" y REG_LEFT regexec() implicit ^...\n");
-T(" z REG_NULL NULL subexpressions ok\n");
-T(" $ expand C \\c escapes in fields 2 and 3\n");
-T(" / field 2 is a regsubcomp() expression\n");
-T(" = field 3 is a regdecomp() expression\n");
-T("\n");
-T(" Field 1 control lines:\n");
-T("\n");
-T(" C set LC_COLLATE and LC_CTYPE to locale in field 2\n");
-T("\n");
-T(" ?test ... output field 5 if passed and != EXPECTED, silent otherwise\n");
-T(" &test ... output field 5 if current and previous passed\n");
-T(" |test ... output field 5 if current passed and previous failed\n");
-T(" ; ... output field 2 if previous failed\n");
-T(" {test ... skip if failed until }\n");
-T(" } end of skip\n");
-T("\n");
-T(" : comment comment copied as output NOTE\n");
-T(" :comment:test :comment: ignored\n");
-T(" N[OTE] comment comment copied as output NOTE\n");
-T(" T[EST] comment comment\n");
-T("\n");
-T(" number use number for nmatch (20 by default)\n");
-T("\n");
-T(" Field 2: the regular expression pattern; SAME uses the pattern from\n");
-T(" the previous specification. RE_DUP_MAX inside {...} expands to the\n");
-T(" value from <limits.h>.\n");
-T("\n");
-T(" Field 3: the string to match. X...{RE_DUP_MAX} expands to RE_DUP_MAX\n");
-T(" copies of X.\n");
-T("\n");
-T(" Field 4: the test outcome. This is either one of the posix error\n");
-T(" codes (with REG_ omitted) or the match array, a list of (m,n)\n");
-T(" entries with m and n being first and last+1 positions in the\n");
-T(" field 3 string, or NULL if REG_NOSUB is in effect and success\n");
-T(" is expected. BADPAT is acceptable in place of any regcomp(3)\n");
-T(" error code. The match[] array is initialized to (-2,-2) before\n");
-T(" each test. All array elements from 0 to nmatch-1 must be specified\n");
-T(" in the outcome. Unspecified endpoints (offset -1) are denoted by ?.\n");
-T(" Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a\n");
-T(" matched (?{...}) expression, where x is the text enclosed by {...},\n");
-T(" o is the expression ordinal counting from 1, and n is the length of\n");
-T(" the unmatched portion of the subject string. If x starts with a\n");
-T(" number then that is the return value of re_execf(), otherwise 0 is\n");
-T(" returned. RE_DUP_MAX[-+]N expands to the <limits.h> value -+N.\n");
-T("\n");
-T(" Field 5: optional comment appended to the report.\n");
-T("\n");
-T("CAVEAT\n");
-T(" If a regex implementation misbehaves with memory then all bets are off.\n");
-T("\n");
-T("CONTRIBUTORS\n");
-T(" Glenn Fowler gsf@research.att.com (ksh strmatch, regex extensions)\n");
-T(" David Korn dgk@research.att.com (ksh glob matcher)\n");
-T(" Doug McIlroy mcilroy@dartmouth.edu (ast regex/testre in C++)\n");
-T(" Tom Lord lord@regexps.com (rx tests)\n");
-T(" Henry Spencer henry@zoo.toronto.edu (original public regex)\n");
-T(" Andrew Hume andrew@research.att.com (gre tests)\n");
-T(" John Maddock John_Maddock@compuserve.com (regex++ tests)\n");
-T(" Philip Hazel ph10@cam.ac.uk (pcre tests)\n");
-T(" Ville Laurikari vl@iki.fi (libtre tests)\n");
-H("</PRE>\n");
-H("</BODY>\n");
-H("</HTML>\n");
-}
-
-#ifndef elementsof
-#define elementsof(x) (sizeof(x)/sizeof(x[0]))
-#endif
-
-#ifndef streq
-#define streq(a,b) (*(a)==*(b)&&!strcmp(a,b))
-#endif
-
-#define HUNG 2
-#define NOTEST (~0)
-
-#ifndef REG_TEST_DEFAULT
-#define REG_TEST_DEFAULT 0
-#endif
-
-#ifndef REG_EXEC_DEFAULT
-#define REG_EXEC_DEFAULT 0
-#endif
-
-static const char* unsupported[] =
-{
- "BASIC",
-#ifndef REG_EXTENDED
- "EXTENDED",
-#endif
-#ifndef REG_AUGMENTED
- "AUGMENTED",
-#endif
-#ifndef REG_SHELL
- "SHELL",
-#endif
-
-#ifndef REG_CLASS_ESCAPE
- "CLASS_ESCAPE",
-#endif
-#ifndef REG_COMMENT
- "COMMENT",
-#endif
-#ifndef REG_DELIMITED
- "DELIMITED",
-#endif
-#ifndef REG_DISCIPLINE
- "DISCIPLINE",
-#endif
-#ifndef REG_ESCAPE
- "ESCAPE",
-#endif
-#ifndef REG_ICASE
- "ICASE",
-#endif
-#ifndef REG_LEFT
- "LEFT",
-#endif
-#ifndef REG_LENIENT
- "LENIENT",
-#endif
-#ifndef REG_LITERAL
- "LITERAL",
-#endif
-#ifndef REG_MINIMAL
- "MINIMAL",
-#endif
-#ifndef REG_MULTIPLE
- "MULTIPLE",
-#endif
-#ifndef REG_MULTIREF
- "MULTIREF",
-#endif
-#ifndef REG_MUSTDELIM
- "MUSTDELIM",
-#endif
-#ifndef REG_NEWLINE
- "NEWLINE",
-#endif
-#ifndef REG_NOTBOL
- "NOTBOL",
-#endif
-#ifndef REG_NOTEOL
- "NOTEOL",
-#endif
-#ifndef REG_NULL
- "NULL",
-#endif
-#ifndef REG_RIGHT
- "RIGHT",
-#endif
-#ifndef REG_SHELL_DOT
- "SHELL_DOT",
-#endif
-#ifndef REG_SHELL_ESCAPED
- "SHELL_ESCAPED",
-#endif
-#ifndef REG_SHELL_GROUP
- "SHELL_GROUP",
-#endif
-#ifndef REG_SHELL_PATH
- "SHELL_PATH",
-#endif
-#ifndef REG_SPAN
- "SPAN",
-#endif
-#if REG_NOSUB & REG_TEST_DEFAULT
- "SUBMATCH",
-#endif
-#if !_REG_nexec
- "regnexec",
-#endif
-#if !_REG_subcomp
- "regsubcomp",
-#endif
-#if !_REG_decomp
- "redecomp",
-#endif
- 0
-};
-
-#ifndef REG_CLASS_ESCAPE
-#define REG_CLASS_ESCAPE NOTEST
-#endif
-#ifndef REG_COMMENT
-#define REG_COMMENT NOTEST
-#endif
-#ifndef REG_DELIMITED
-#define REG_DELIMITED NOTEST
-#endif
-#ifndef REG_ESCAPE
-#define REG_ESCAPE NOTEST
-#endif
-#ifndef REG_ICASE
-#define REG_ICASE NOTEST
-#endif
-#ifndef REG_LEFT
-#define REG_LEFT NOTEST
-#endif
-#ifndef REG_LENIENT
-#define REG_LENIENT 0
-#endif
-#ifndef REG_MINIMAL
-#define REG_MINIMAL NOTEST
-#endif
-#ifndef REG_MULTIPLE
-#define REG_MULTIPLE NOTEST
-#endif
-#ifndef REG_MULTIREF
-#define REG_MULTIREF NOTEST
-#endif
-#ifndef REG_MUSTDELIM
-#define REG_MUSTDELIM NOTEST
-#endif
-#ifndef REG_NEWLINE
-#define REG_NEWLINE NOTEST
-#endif
-#ifndef REG_NOTBOL
-#define REG_NOTBOL NOTEST
-#endif
-#ifndef REG_NOTEOL
-#define REG_NOTEOL NOTEST
-#endif
-#ifndef REG_NULL
-#define REG_NULL NOTEST
-#endif
-#ifndef REG_RIGHT
-#define REG_RIGHT NOTEST
-#endif
-#ifndef REG_SHELL_DOT
-#define REG_SHELL_DOT NOTEST
-#endif
-#ifndef REG_SHELL_ESCAPED
-#define REG_SHELL_ESCAPED NOTEST
-#endif
-#ifndef REG_SHELL_GROUP
-#define REG_SHELL_GROUP NOTEST
-#endif
-#ifndef REG_SHELL_PATH
-#define REG_SHELL_PATH NOTEST
-#endif
-#ifndef REG_SPAN
-#define REG_SPAN NOTEST
-#endif
-
-#define REG_UNKNOWN (-1)
-
-#ifndef REG_ENEWLINE
-#define REG_ENEWLINE (REG_UNKNOWN-1)
-#endif
-#ifndef REG_ENULL
-#ifndef REG_EMPTY
-#define REG_ENULL (REG_UNKNOWN-2)
-#else
-#define REG_ENULL REG_EMPTY
-#endif
-#endif
-#ifndef REG_ECOUNT
-#define REG_ECOUNT (REG_UNKNOWN-3)
-#endif
-#ifndef REG_BADESC
-#define REG_BADESC (REG_UNKNOWN-4)
-#endif
-#ifndef REG_EMEM
-#define REG_EMEM (REG_UNKNOWN-5)
-#endif
-#ifndef REG_EHUNG
-#define REG_EHUNG (REG_UNKNOWN-6)
-#endif
-#ifndef REG_EBUS
-#define REG_EBUS (REG_UNKNOWN-7)
-#endif
-#ifndef REG_EFAULT
-#define REG_EFAULT (REG_UNKNOWN-8)
-#endif
-#ifndef REG_EFLAGS
-#define REG_EFLAGS (REG_UNKNOWN-9)
-#endif
-#ifndef REG_EDELIM
-#define REG_EDELIM (REG_UNKNOWN-9)
-#endif
-
-static const struct { int code; char* name; } codes[] =
-{
- REG_UNKNOWN, "UNKNOWN",
- REG_NOMATCH, "NOMATCH",
- REG_BADPAT, "BADPAT",
- REG_ECOLLATE, "ECOLLATE",
- REG_ECTYPE, "ECTYPE",
- REG_EESCAPE, "EESCAPE",
- REG_ESUBREG, "ESUBREG",
- REG_EBRACK, "EBRACK",
- REG_EPAREN, "EPAREN",
- REG_EBRACE, "EBRACE",
- REG_BADBR, "BADBR",
- REG_ERANGE, "ERANGE",
- REG_ESPACE, "ESPACE",
- REG_BADRPT, "BADRPT",
- REG_ENEWLINE, "ENEWLINE",
- REG_ENULL, "ENULL",
- REG_ECOUNT, "ECOUNT",
- REG_BADESC, "BADESC",
- REG_EMEM, "EMEM",
- REG_EHUNG, "EHUNG",
- REG_EBUS, "EBUS",
- REG_EFAULT, "EFAULT",
- REG_EFLAGS, "EFLAGS",
- REG_EDELIM, "EDELIM",
-};
-
-static struct
-{
- regmatch_t NOMATCH;
- int errors;
- int extracted;
- int ignored;
- int lineno;
- int passed;
- int signals;
- int unspecified;
- int verify;
- int warnings;
- char* file;
- char* stack;
- char* which;
- jmp_buf gotcha;
-#ifdef REG_DISCIPLINE
- Disc_t disc;
-#endif
-} state;
-
-static void
-quote(char* s, int len, unsigned long test)
-{
- unsigned char* u = (unsigned char*)s;
- unsigned char* e;
- int c;
-#ifdef MB_CUR_MAX
- int w;
-#endif
-
- if (!u)
- printf("NIL");
- else if (!*u && len <= 1)
- printf("NULL");
- else if (test & TEST_EXPAND)
- {
- if (len < 0)
- len = strlen((char*)u);
- e = u + len;
- if (test & TEST_DELIMIT)
- printf("\"");
- while (u < e)
- switch (c = *u++)
- {
- case '\\':
- printf("\\\\");
- break;
- case '"':
- if (test & TEST_DELIMIT)
- printf("\\\"");
- else
- printf("\"");
- break;
- case '\a':
- printf("\\a");
- break;
- case '\b':
- printf("\\b");
- break;
- case 033:
- printf("\\e");
- break;
- case '\f':
- printf("\\f");
- break;
- case '\n':
- printf("\\n");
- break;
- case '\r':
- printf("\\r");
- break;
- case '\t':
- printf("\\t");
- break;
- case '\v':
- printf("\\v");
- break;
- default:
-#ifdef MB_CUR_MAX
- s = (char*)u - 1;
- if ((w = mblen(s, (char*)e - s)) > 1)
- {
- u += w - 1;
- fwrite(s, 1, w, stdout);
- }
- else
-#endif
- if (!iscntrl(c) && isprint(c))
- putchar(c);
- else
- printf("\\x%02x", c);
- break;
- }
- if (test & TEST_DELIMIT)
- printf("\"");
- }
- else
- printf("%s", s);
-}
-
-static void
-report(char* comment, char* fun, char* re, char* s, int len, char* msg, int flags, unsigned long test)
-{
- if (state.file)
- printf("%s:", state.file);
- printf("%d:", state.lineno);
- if (re)
- {
- printf(" ");
- quote(re, -1, test|TEST_DELIMIT);
- if (s)
- {
- printf(" versus ");
- quote(s, len, test|TEST_DELIMIT);
- }
- }
- if (test & TEST_UNSPECIFIED)
- {
- state.unspecified++;
- printf(" unspecified behavior");
- }
- else
- state.errors++;
- if (state.which)
- printf(" %s", state.which);
- if (flags & REG_NOSUB)
- printf(" NOSUB");
- if (fun)
- printf(" %s", fun);
- if (comment[strlen(comment)-1] == '\n')
- printf(" %s", comment);
- else
- {
- printf(" %s: ", comment);
- if (msg)
- printf("%s: ", msg);
- }
-}
-
-static void
-error(regex_t* preg, int code)
-{
- char* msg;
- char buf[256];
-
- switch (code)
- {
- case REG_EBUS:
- msg = "bus error";
- break;
- case REG_EFAULT:
- msg = "memory fault";
- break;
- case REG_EHUNG:
- msg = "did not terminate";
- break;
- default:
- regerror(code, preg, msg = buf, sizeof buf);
- break;
- }
- printf("%s\n", msg);
-}
-
-static void
-bad(char* comment, char* re, char* s, int len, unsigned long test)
-{
- printf("bad test case ");
- report(comment, NiL, re, s, len, NiL, 0, test);
- exit(1);
-}
-
-static int
-escape(char* s)
-{
- char* b;
- char* t;
- char* q;
- char* e;
- int c;
-
- for (b = t = s; *t = *s; s++, t++)
- if (*s == '\\')
- switch (*++s)
- {
- case '\\':
- break;
- case 'a':
- *t = '\a';
- break;
- case 'b':
- *t = '\b';
- break;
- case 'c':
- if (*t = *++s)
- *t &= 037;
- else
- s--;
- break;
- case 'e':
- case 'E':
- *t = 033;
- break;
- case 'f':
- *t = '\f';
- break;
- case 'n':
- *t = '\n';
- break;
- case 'r':
- *t = '\r';
- break;
- case 's':
- *t = ' ';
- break;
- case 't':
- *t = '\t';
- break;
- case 'v':
- *t = '\v';
- break;
- case 'u':
- case 'x':
- c = 0;
- q = c == 'u' ? (s + 5) : (char*)0;
- e = s + 1;
- while (!e || !q || s < q)
- {
- switch (*++s)
- {
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- c = (c << 4) + *s - 'a' + 10;
- continue;
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- c = (c << 4) + *s - 'A' + 10;
- continue;
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- c = (c << 4) + *s - '0';
- continue;
- case '{':
- case '[':
- if (s != e)
- {
- s--;
- break;
- }
- e = 0;
- continue;
- case '}':
- case ']':
- if (e)
- s--;
- break;
- default:
- s--;
- break;
- }
- break;
- }
- *t = c;
- break;
- case '0': case '1': case '2': case '3':
- case '4': case '5': case '6': case '7':
- c = *s - '0';
- q = s + 2;
- while (s < q)
- {
- switch (*++s)
- {
- case '0': case '1': case '2': case '3':
- case '4': case '5': case '6': case '7':
- c = (c << 3) + *s - '0';
- break;
- default:
- q = --s;
- break;
- }
- }
- *t = c;
- break;
- default:
- *(s + 1) = 0;
- bad("invalid C \\ escape\n", s - 1, NiL, 0, 0);
- }
- return t - b;
-}
-
-static void
-matchoffprint(int off)
-{
- switch (off)
- {
- case -2:
- printf("X");
- break;
- case -1:
- printf("?");
- break;
- default:
- printf("%d", off);
- break;
- }
-}
-
-static void
-matchprint(regmatch_t* match, int nmatch, int nsub, char* ans, unsigned long test)
-{
- int i;
-
- for (; nmatch > nsub + 1; nmatch--)
- if ((match[nmatch-1].rm_so != -1 || match[nmatch-1].rm_eo != -1) && (!(test & TEST_IGNORE_POSITION) || match[nmatch-1].rm_so >= 0 && match[nmatch-1].rm_eo >= 0))
- break;
- for (i = 0; i < nmatch; i++)
- {
- printf("(");
- matchoffprint(match[i].rm_so);
- printf(",");
- matchoffprint(match[i].rm_eo);
- printf(")");
- }
- if (!(test & (TEST_ACTUAL|TEST_BASELINE)))
- {
- if (ans)
- printf(" expected: %s", ans);
- printf("\n");
- }
-}
-
-static int
-matchcheck(regmatch_t* match, int nmatch, int nsub, char* ans, char* re, char* s, int len, int flags, unsigned long test)
-{
- char* p;
- int i;
- int m;
- int n;
-
- if (streq(ans, "OK"))
- return test & (TEST_BASELINE|TEST_PASS|TEST_VERIFY);
- for (i = 0, p = ans; i < nmatch && *p; i++)
- {
- if (*p == '{')
- {
-#ifdef REG_DISCIPLINE
- char* x;
-
- if (!(x = sfstruse(state.disc.sp)))
- bad("out of space [discipline string]\n", NiL, NiL, 0, 0);
- if (strcmp(p, x))
- {
- if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
- return 0;
- report("callout failed", NiL, re, s, len, NiL, flags, test);
- quote(p, -1, test);
- printf(" expected, ");
- quote(x, -1, test);
- printf(" returned\n");
- }
-#endif
- break;
- }
- if (*p++ != '(')
- bad("improper answer\n", re, s, -1, test);
- if (*p == '?')
- {
- m = -1;
- p++;
- }
- else if (*p == 'R' && !memcmp(p, "RE_DUP_MAX", 10))
- {
- m = RE_DUP_MAX;
- p += 10;
- if (*p == '+' || *p == '-')
- m += strtol(p, &p, 10);
- }
- else
- m = strtol(p, &p, 10);
- if (*p++ != ',')
- bad("improper answer\n", re, s, -1, test);
- if (*p == '?')
- {
- n = -1;
- p++;
- }
- else if (*p == 'R' && !memcmp(p, "RE_DUP_MAX", 10))
- {
- n = RE_DUP_MAX;
- p += 10;
- if (*p == '+' || *p == '-')
- n += strtol(p, &p, 10);
- }
- else
- n = strtol(p, &p, 10);
- if (*p++ != ')')
- bad("improper answer\n", re, s, -1, test);
- if (m!=match[i].rm_so || n!=match[i].rm_eo)
- {
- if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY)))
- {
- report("failed: match was", NiL, re, s, len, NiL, flags, test);
- matchprint(match, nmatch, nsub, ans, test);
- }
- return 0;
- }
- }
- for (; i < nmatch; i++)
- {
- if (match[i].rm_so!=-1 || match[i].rm_eo!=-1)
- {
- if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_VERIFY)))
- {
- if ((test & TEST_IGNORE_POSITION) && (match[i].rm_so<0 || match[i].rm_eo<0))
- {
- state.ignored++;
- return 0;
- }
- if (!(test & TEST_SUMMARY))
- {
- report("failed: match was", NiL, re, s, len, NiL, flags, test);
- matchprint(match, nmatch, nsub, ans, test);
- }
- }
- return 0;
- }
- }
- if (!(test & TEST_IGNORE_OVER) && match[nmatch].rm_so != state.NOMATCH.rm_so)
- {
- if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY)))
- {
- report("failed: overran match array", NiL, re, s, len, NiL, flags, test);
- matchprint(match, nmatch + 1, nsub, NiL, test);
- }
- return 0;
- }
- return 1;
-}
-
-static void
-sigunblock(int s)
-{
-#ifdef SIG_SETMASK
- int op;
- sigset_t mask;
-
- sigemptyset(&mask);
- if (s)
- {
- sigaddset(&mask, s);
- op = SIG_UNBLOCK;
- }
- else op = SIG_SETMASK;
- sigprocmask(op, &mask, NiL);
-#else
-#ifdef sigmask
- sigsetmask(s ? (sigsetmask(0L) & ~sigmask(s)) : 0L);
-#endif
-#endif
-}
-
-static void
-gotcha(int sig)
-{
- int ret;
-
- signal(sig, gotcha);
- alarm(0);
- state.signals++;
- switch (sig)
- {
- case SIGALRM:
- ret = REG_EHUNG;
- break;
- case SIGBUS:
- ret = REG_EBUS;
- break;
- default:
- ret = REG_EFAULT;
- break;
- }
- sigunblock(sig);
- longjmp(state.gotcha, ret);
-}
-
-static char*
-getline(FILE* fp)
-{
- static char buf[32 * 1024];
-
- register char* s = buf;
- register char* e = &buf[sizeof(buf)];
- register char* b;
-
- for (;;)
- {
- if (!(b = fgets(s, e - s, fp)))
- return 0;
- state.lineno++;
- s += strlen(s);
- if (s == b || *--s != '\n' || s == b || *(s - 1) != '\\')
- {
- *s = 0;
- break;
- }
- s--;
- }
- return buf;
-}
-
-static unsigned long
-note(unsigned long level, char* msg, unsigned long skip, unsigned long test)
-{
- if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_SUMMARY)) && !skip)
- {
- printf("NOTE\t");
- if (msg)
- printf("%s: ", msg);
- printf("skipping lines %d", state.lineno);
- }
- return skip | level;
-}
-
-#define TABS(n) &ts[7-((n)&7)]
-
-static char ts[] = "\t\t\t\t\t\t\t";
-
-static unsigned long
-extract(int* tabs, char* spec, char* re, char* s, char* ans, char* msg, char* accept, regmatch_t* match, int nmatch, int nsub, unsigned long skip, unsigned long level, unsigned long test)
-{
- if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_OK|TEST_PASS|TEST_SUMMARY))
- {
- state.extracted = 1;
- if (test & TEST_OK)
- {
- state.passed++;
- if ((test & TEST_VERIFY) && !(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_SUMMARY)))
- {
- if (msg && strcmp(msg, "EXPECTED"))
- printf("NOTE\t%s\n", msg);
- return skip;
- }
- test &= ~(TEST_PASS|TEST_QUERY);
- }
- if (test & (TEST_QUERY|TEST_VERIFY))
- {
- if (test & TEST_BASELINE)
- test &= ~(TEST_BASELINE|TEST_PASS);
- else
- test |= TEST_PASS;
- skip |= level;
- }
- if (!(test & TEST_OK))
- {
- if (test & TEST_UNSPECIFIED)
- state.unspecified++;
- else
- state.errors++;
- }
- if (test & (TEST_PASS|TEST_SUMMARY))
- return skip;
- test &= ~TEST_DELIMIT;
- printf("%s%s", spec, TABS(*tabs++));
- if ((test & (TEST_BASELINE|TEST_SAME)) == (TEST_BASELINE|TEST_SAME))
- printf("SAME");
- else
- quote(re, -1, test);
- printf("%s", TABS(*tabs++));
- quote(s, -1, test);
- printf("%s", TABS(*tabs++));
- if (!(test & (TEST_ACTUAL|TEST_BASELINE)) || !accept && !match)
- printf("%s", ans);
- else if (accept)
- printf("%s", accept);
- else
- matchprint(match, nmatch, nsub, NiL, test);
- if (msg)
- printf("%s%s", TABS(*tabs++), msg);
- putchar('\n');
- }
- else if (test & TEST_QUERY)
- skip = note(level, msg, skip, test);
- else if (test & TEST_VERIFY)
- state.extracted = 1;
- return skip;
-}
-
-static int
-catchfree(regex_t* preg, int flags, int* tabs, char* spec, char* re, char* s, char* ans, char* msg, char* accept, regmatch_t* match, int nmatch, int nsub, unsigned long skip, unsigned long level, unsigned long test)
-{
- int eret;
-
- if (!(test & TEST_CATCH))
- {
- regfree(preg);
- eret = 0;
- }
- else if (!(eret = setjmp(state.gotcha)))
- {
- alarm(HUNG);
- regfree(preg);
- alarm(0);
- }
- else if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
- extract(tabs, spec, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test);
- else
- {
- report("failed", "regfree", re, NiL, -1, msg, flags, test);
- error(preg, eret);
- }
- return eret;
-}
-
-static char*
-expand(char* os, char* ot)
-{
- char* s = os;
- char* t;
- int n = 0;
- int r;
- long m;
-
- for (;;)
- {
- switch (*s++)
- {
- case 0:
- break;
- case '{':
- n++;
- continue;
- case '}':
- n--;
- continue;
- case 'R':
- if (n == 1 && !memcmp(s, "E_DUP_MAX", 9))
- {
- s--;
- for (t = ot; os < s; *t++ = *os++);
- r = ((t - ot) >= 5 && t[-1] == '{' && t[-2] == '.' && t[-3] == '.' && t[-4] == '.') ? t[-5] : 0;
- os = ot;
- m = RE_DUP_MAX;
- if (*(s += 10) == '+' || *s == '-')
- m += strtol(s, &s, 10);
- if (r)
- {
- t -= 5;
- while (m-- > 0)
- *t++ = r;
- while (*s && *s++ != '}');
- }
- else
- t += snprintf(t, 32, "%ld", m);
- while (*t = *s++)
- t++;
- break;
- }
- continue;
- default:
- continue;
- }
- break;
- }
- return os;
-}
-
-int
-main(int argc, char** argv)
-{
- int flags;
- int cflags;
- int eflags;
- int nmatch;
- int nexec;
- int nstr;
- int cret;
- int eret;
- int nsub;
- int i;
- int j;
- int expected;
- int got;
- int locale;
- int subunitlen;
- int testno;
- unsigned long level;
- unsigned long skip;
- char* p;
- char* line;
- char* spec;
- char* re;
- char* s;
- char* ans;
- char* msg;
- char* fun;
- char* ppat;
- char* subunit;
- char* version;
- char* field[6];
- char* delim[6];
- FILE* fp;
- int tabs[6];
- char unit[64];
- regmatch_t match[100];
- regex_t preg;
-
- static char pat[32 * 1024];
- static char patbuf[32 * 1024];
- static char strbuf[32 * 1024];
-
- int nonosub = REG_NOSUB == 0;
- int nonexec = 0;
-
- unsigned long test = 0;
-
- static char* filter[] = { "-", 0 };
-
- state.NOMATCH.rm_so = state.NOMATCH.rm_eo = -2;
- p = unit;
- version = (char*)id + 10;
- while (p < &unit[sizeof(unit)-1] && (*p = *version++) && !isspace(*p))
- p++;
- *p = 0;
- while ((p = *++argv) && *p == '-')
- for (;;)
- {
- switch (*++p)
- {
- case 0:
- break;
- case 'c':
- test |= TEST_CATCH;
- continue;
- case 'e':
- test |= TEST_IGNORE_ERROR;
- continue;
- case 'h':
- case '?':
- help(0);
- return 2;
- case '-':
- help(p[1] == 'h');
- return 2;
- case 'n':
- nonexec = 1;
- continue;
- case 'o':
- test |= TEST_IGNORE_OVER;
- continue;
- case 'p':
- test |= TEST_IGNORE_POSITION;
- continue;
- case 's':
-#ifdef REG_DISCIPLINE
- if (!(state.stack = stkalloc(stkstd, 0)))
- fprintf(stderr, "%s: out of space [stack]", unit);
- state.disc.disc.re_resizef = resizef;
- state.disc.disc.re_resizehandle = (void*)stkstd;
-#endif
- continue;
- case 'x':
- nonosub = 1;
- continue;
- case 'v':
- test |= TEST_VERBOSE;
- continue;
- case 'A':
- test |= TEST_ACTUAL;
- continue;
- case 'B':
- test |= TEST_BASELINE;
- continue;
- case 'F':
- test |= TEST_FAIL;
- continue;
- case 'P':
- test |= TEST_PASS;
- continue;
- case 'S':
- test |= TEST_SUMMARY;
- continue;
- default:
- fprintf(stderr, "%s: %c: invalid option\n", unit, *p);
- return 2;
- }
- break;
- }
- if (!*argv)
- argv = filter;
- locale = 0;
- while (state.file = *argv++)
- {
- if (streq(state.file, "-") || streq(state.file, "/dev/stdin") || streq(state.file, "/dev/fd/0"))
- {
- state.file = 0;
- fp = stdin;
- }
- else if (!(fp = fopen(state.file, "r")))
- {
- fprintf(stderr, "%s: %s: cannot read\n", unit, state.file);
- return 2;
- }
- testno = state.errors = state.ignored = state.lineno = state.passed =
- state.signals = state.unspecified = state.warnings = 0;
- skip = 0;
- level = 1;
- if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_SUMMARY)))
- {
- printf("TEST\t%s ", unit);
- if (s = state.file)
- {
- subunit = p = 0;
- for (;;)
- {
- switch (*s++)
- {
- case 0:
- break;
- case '/':
- subunit = s;
- continue;
- case '.':
- p = s - 1;
- continue;
- default:
- continue;
- }
- break;
- }
- if (!subunit)
- subunit = state.file;
- if (p < subunit)
- p = s - 1;
- subunitlen = p - subunit;
- printf("%-.*s ", subunitlen, subunit);
- }
- else
- subunit = 0;
- for (s = version; *s && (*s != ' ' || *(s + 1) != '$'); s++)
- putchar(*s);
- if (test & TEST_CATCH)
- printf(", catch");
- if (test & TEST_IGNORE_ERROR)
- printf(", ignore error code mismatches");
- if (test & TEST_IGNORE_POSITION)
- printf(", ignore negative position mismatches");
-#ifdef REG_DISCIPLINE
- if (state.stack)
- printf(", stack");
-#endif
- if (test & TEST_VERBOSE)
- printf(", verbose");
- printf("\n");
-#ifdef REG_VERSIONID
- if (regerror(REG_VERSIONID, NiL, pat, sizeof(pat)) > 0)
- s = pat;
- else
-#endif
-#ifdef REG_TEST_VERSION
- s = REG_TEST_VERSION;
-#else
- s = "regex";
-#endif
- printf("NOTE\t%s\n", s);
- if (elementsof(unsupported) > 1)
- {
-#if (REG_TEST_DEFAULT & (REG_AUGMENTED|REG_EXTENDED|REG_SHELL)) || !defined(REG_EXTENDED)
- i = 0;
-#else
- i = REG_EXTENDED != 0;
-#endif
- for (got = 0; i < elementsof(unsupported) - 1; i++)
- {
- if (!got)
- {
- got = 1;
- printf("NOTE\tunsupported: %s", unsupported[i]);
- }
- else
- printf(",%s", unsupported[i]);
- }
- if (got)
- printf("\n");
- }
- }
-#ifdef REG_DISCIPLINE
- state.disc.disc.re_version = REG_VERSION;
- state.disc.disc.re_compf = compf;
- state.disc.disc.re_execf = execf;
- if (!(state.disc.sp = sfstropen()))
- bad("out of space [discipline string stream]\n", NiL, NiL, 0, 0);
- preg.re_disc = &state.disc.disc;
-#endif
- if (test & TEST_CATCH)
- {
- signal(SIGALRM, gotcha);
- signal(SIGBUS, gotcha);
- signal(SIGSEGV, gotcha);
- }
- while (p = getline(fp))
- {
-
- /* parse: */
-
- line = p;
- if (*p == ':' && !isspace(*(p + 1)))
- {
- while (*++p && *p != ':');
- if (!*p++)
- {
- if (test & TEST_BASELINE)
- printf("%s\n", line);
- continue;
- }
- }
- while (isspace(*p))
- p++;
- if (*p == 0 || *p == '#' || *p == 'T')
- {
- if (test & TEST_BASELINE)
- printf("%s\n", line);
- continue;
- }
- if (*p == ':' || *p == 'N')
- {
- if (test & TEST_BASELINE)
- printf("%s\n", line);
- else if (!(test & (TEST_ACTUAL|TEST_FAIL|TEST_PASS|TEST_SUMMARY)))
- {
- while (*++p && !isspace(*p));
- while (isspace(*p))
- p++;
- printf("NOTE %s\n", p);
- }
- continue;
- }
- j = 0;
- i = 0;
- field[i++] = p;
- for (;;)
- {
- switch (*p++)
- {
- case 0:
- p--;
- j = 0;
- goto checkfield;
- case '\t':
- *(delim[i] = p - 1) = 0;
- j = 1;
- checkfield:
- s = field[i - 1];
- if (streq(s, "NIL"))
- field[i - 1] = 0;
- else if (streq(s, "NULL"))
- *s = 0;
- while (*p == '\t')
- {
- p++;
- j++;
- }
- tabs[i - 1] = j;
- if (!*p)
- break;
- if (i >= elementsof(field))
- bad("too many fields\n", NiL, NiL, 0, 0);
- field[i++] = p;
- /*FALLTHROUGH*/
- default:
- continue;
- }
- break;
- }
- if (!(spec = field[0]))
- bad("NIL spec\n", NiL, NiL, 0, 0);
-
- /* interpret: */
-
- cflags = REG_TEST_DEFAULT;
- eflags = REG_EXEC_DEFAULT;
- test &= TEST_GLOBAL;
- state.extracted = 0;
- nmatch = 20;
- nsub = -1;
- for (p = spec; *p; p++)
- {
- if (isdigit(*p))
- {
- nmatch = strtol(p, &p, 10);
- if (nmatch >= elementsof(match))
- bad("nmatch must be < 100\n", NiL, NiL, 0, 0);
- p--;
- continue;
- }
- switch (*p)
- {
- case 'A':
- test |= TEST_ARE;
- continue;
- case 'B':
- test |= TEST_BRE;
- continue;
- case 'C':
- if (!(test & TEST_QUERY) && !(skip & level))
- bad("locale must be nested\n", NiL, NiL, 0, 0);
- test &= ~TEST_QUERY;
- if (locale)
- bad("locale nesting not supported\n", NiL, NiL, 0, 0);
- if (i != 2)
- bad("locale field expected\n", NiL, NiL, 0, 0);
- if (!(skip & level))
- {
-#if defined(LC_COLLATE) && defined(LC_CTYPE)
- s = field[1];
- if (!s || streq(s, "POSIX"))
- s = "C";
- if ((ans = setlocale(LC_COLLATE, s)) && streq(ans, "POSIX"))
- ans = "C";
- if (!ans || !streq(ans, s) && streq(s, "C"))
- ans = 0;
- else if ((ans = setlocale(LC_CTYPE, s)) && streq(ans, "POSIX"))
- ans = "C";
- if (!ans || !streq(ans, s) && streq(s, "C"))
- skip = note(level, s, skip, test);
- else
- {
- if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_SUMMARY)))
- printf("NOTE \"%s\" locale\n", s);
- locale = level;
- }
-#else
- skip = note(level, skip, test, "locales not supported");
-#endif
- }
- cflags = NOTEST;
- continue;
- case 'E':
- test |= TEST_ERE;
- continue;
- case 'K':
- test |= TEST_KRE;
- continue;
- case 'L':
- test |= TEST_LRE;
- continue;
- case 'S':
- test |= TEST_SRE;
- continue;
-
- case 'a':
- cflags |= REG_LEFT|REG_RIGHT;
- continue;
- case 'b':
- eflags |= REG_NOTBOL;
- continue;
- case 'c':
- cflags |= REG_COMMENT;
- continue;
- case 'd':
- cflags |= REG_SHELL_DOT;
- continue;
- case 'e':
- eflags |= REG_NOTEOL;
- continue;
- case 'f':
- cflags |= REG_MULTIPLE;
- continue;
- case 'g':
- cflags |= NOTEST;
- continue;
- case 'h':
- cflags |= REG_MULTIREF;
- continue;
- case 'i':
- cflags |= REG_ICASE;
- continue;
- case 'j':
- cflags |= REG_SPAN;
- continue;
- case 'k':
- cflags |= REG_ESCAPE;
- continue;
- case 'l':
- cflags |= REG_LEFT;
- continue;
- case 'm':
- cflags |= REG_MINIMAL;
- continue;
- case 'n':
- cflags |= REG_NEWLINE;
- continue;
- case 'o':
- cflags |= REG_SHELL_GROUP;
- continue;
- case 'p':
- cflags |= REG_SHELL_PATH;
- continue;
- case 'q':
- cflags |= REG_DELIMITED;
- continue;
- case 'r':
- cflags |= REG_RIGHT;
- continue;
- case 's':
- cflags |= REG_SHELL_ESCAPED;
- continue;
- case 't':
- cflags |= REG_MUSTDELIM;
- continue;
- case 'u':
- test |= TEST_UNSPECIFIED;
- continue;
- case 'v':
- cflags |= REG_CLASS_ESCAPE;
- continue;
- case 'w':
- cflags |= REG_NOSUB;
- continue;
- case 'x':
- if (REG_LENIENT)
- cflags |= REG_LENIENT;
- else
- test |= TEST_LENIENT;
- continue;
- case 'y':
- eflags |= REG_LEFT;
- continue;
- case 'z':
- cflags |= REG_NULL;
- continue;
-
- case '$':
- test |= TEST_EXPAND;
- continue;
-
- case '/':
- test |= TEST_SUB;
- continue;
-
- case '=':
- test |= TEST_DECOMP;
- continue;
-
- case '?':
- test |= TEST_VERIFY;
- test &= ~(TEST_AND|TEST_OR);
- state.verify = state.passed;
- continue;
- case '&':
- test |= TEST_VERIFY|TEST_AND;
- test &= ~TEST_OR;
- continue;
- case '|':
- test |= TEST_VERIFY|TEST_OR;
- test &= ~TEST_AND;
- continue;
- case ';':
- test |= TEST_OR;
- test &= ~TEST_AND;
- continue;
-
- case '{':
- level <<= 1;
- if (skip & (level >> 1))
- {
- skip |= level;
- cflags = NOTEST;
- }
- else
- {
- skip &= ~level;
- test |= TEST_QUERY;
- }
- continue;
- case '}':
- if (level == 1)
- bad("invalid {...} nesting\n", NiL, NiL, 0, 0);
- if ((skip & level) && !(skip & (level>>1)))
- {
- if (!(test & (TEST_BASELINE|TEST_SUMMARY)))
- {
- if (test & (TEST_ACTUAL|TEST_FAIL))
- printf("}\n");
- else if (!(test & TEST_PASS))
- printf("-%d\n", state.lineno);
- }
- }
-#if defined(LC_COLLATE) && defined(LC_CTYPE)
- else if (locale & level)
- {
- locale = 0;
- if (!(skip & level))
- {
- s = "C";
- setlocale(LC_COLLATE, s);
- setlocale(LC_CTYPE, s);
- if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_SUMMARY)))
- printf("NOTE \"%s\" locale\n", s);
- else if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_PASS))
- printf("}\n");
- }
- else if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL))
- printf("}\n");
- }
-#endif
- level >>= 1;
- cflags = NOTEST;
- continue;
-
- default:
- bad("bad spec\n", spec, NiL, 0, test);
- break;
-
- }
- break;
- }
- if ((cflags|eflags) == NOTEST || (skip & level) && (test & TEST_BASELINE))
- {
- if (test & TEST_BASELINE)
- {
- while (i > 1)
- *delim[--i] = '\t';
- printf("%s\n", line);
- }
- continue;
- }
- if (test & TEST_OR)
- {
- if (!(test & TEST_VERIFY))
- {
- test &= ~TEST_OR;
- if (state.passed == state.verify && i > 1)
- printf("NOTE\t%s\n", field[1]);
- continue;
- }
- else if (state.passed > state.verify)
- continue;
- }
- else if (test & TEST_AND)
- {
- if (state.passed == state.verify)
- continue;
- state.passed = state.verify;
- }
- if (i < ((test & TEST_DECOMP) ? 3 : 4))
- bad("too few fields\n", NiL, NiL, 0, test);
- while (i < elementsof(field))
- field[i++] = 0;
- if (re = field[1])
- {
- if (streq(re, "SAME"))
- {
- re = ppat;
- test |= TEST_SAME;
- }
- else
- {
- if (test & TEST_EXPAND)
- escape(re);
- re = expand(re, patbuf);
- strcpy(ppat = pat, re);
- }
- }
- else
- ppat = 0;
- nstr = -1;
- if (s = field[2])
- {
- s = expand(s, strbuf);
- if (test & TEST_EXPAND)
- {
- nstr = escape(s);
-#if _REG_nexec
- if (nstr != strlen(s))
- nexec = nstr;
-#endif
- }
- }
- if (!(ans = field[(test & TEST_DECOMP) ? 2 : 3]))
- bad("NIL answer\n", NiL, NiL, 0, test);
- msg = field[4];
- fflush(stdout);
- if (test & TEST_SUB)
-#if _REG_subcomp
- cflags |= REG_DELIMITED;
-#else
- continue;
-#endif
-#if !_REG_decomp
- if (test & TEST_DECOMP)
- continue;
-#endif
-
- compile:
-
- if (state.extracted || (skip & level))
- continue;
-#if !(REG_TEST_DEFAULT & (REG_AUGMENTED|REG_EXTENDED|REG_SHELL))
-#ifdef REG_EXTENDED
- if (REG_EXTENDED != 0 && (test & TEST_BRE))
-#else
- if (test & TEST_BRE)
-#endif
- {
- test &= ~TEST_BRE;
- flags = cflags;
- state.which = "BRE";
- }
- else
-#endif
-#ifdef REG_EXTENDED
- if (test & TEST_ERE)
- {
- test &= ~TEST_ERE;
- flags = cflags | REG_EXTENDED;
- state.which = "ERE";
- }
- else
-#endif
-#ifdef REG_AUGMENTED
- if (test & TEST_ARE)
- {
- test &= ~TEST_ARE;
- flags = cflags | REG_AUGMENTED;
- state.which = "ARE";
- }
- else
-#endif
-#ifdef REG_LITERAL
- if (test & TEST_LRE)
- {
- test &= ~TEST_LRE;
- flags = cflags | REG_LITERAL;
- state.which = "LRE";
- }
- else
-#endif
-#ifdef REG_SHELL
- if (test & TEST_SRE)
- {
- test &= ~TEST_SRE;
- flags = cflags | REG_SHELL;
- state.which = "SRE";
- }
- else
-#ifdef REG_AUGMENTED
- if (test & TEST_KRE)
- {
- test &= ~TEST_KRE;
- flags = cflags | REG_SHELL | REG_AUGMENTED;
- state.which = "KRE";
- }
- else
-#endif
-#endif
- {
- if (test & (TEST_BASELINE|TEST_PASS|TEST_VERIFY))
- extract(tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test|TEST_OK);
- continue;
- }
- if ((test & (TEST_QUERY|TEST_VERBOSE|TEST_VERIFY)) == TEST_VERBOSE)
- {
- printf("test %-3d %s ", state.lineno, state.which);
- quote(re, -1, test|TEST_DELIMIT);
- printf(" ");
- quote(s, nstr, test|TEST_DELIMIT);
- printf("\n");
- }
-
- nosub:
- fun = "regcomp";
-#if _REG_nexec
- if (nstr >= 0 && nstr != strlen(s))
- nexec = nstr;
-
- else
-#endif
- nexec = -1;
- if (state.extracted || (skip & level))
- continue;
- if (!(test & TEST_QUERY))
- testno++;
-#ifdef REG_DISCIPLINE
- if (state.stack)
- stkset(stkstd, state.stack, 0);
- flags |= REG_DISCIPLINE;
- state.disc.ordinal = 0;
- sfstrseek(state.disc.sp, 0, SEEK_SET);
-#endif
- if (!(test & TEST_CATCH))
- cret = regcomp(&preg, re, flags);
- else if (!(cret = setjmp(state.gotcha)))
- {
- alarm(HUNG);
- cret = regcomp(&preg, re, flags);
- alarm(0);
- }
-#if _REG_subcomp
- if (!cret && (test & TEST_SUB))
- {
- fun = "regsubcomp";
- p = re + preg.re_npat;
- if (!(test & TEST_CATCH))
- cret = regsubcomp(&preg, p, NiL, 0, 0);
- else if (!(cret = setjmp(state.gotcha)))
- {
- alarm(HUNG);
- cret = regsubcomp(&preg, p, NiL, 0, 0);
- alarm(0);
- }
- if (!cret && *(p += preg.re_npat) && !(preg.re_sub->re_flags & REG_SUB_LAST))
- {
- if (catchfree(&preg, flags, tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test))
- continue;
- cret = REG_EFLAGS;
- }
- }
-#endif
-#if _REG_decomp
- if (!cret && (test & TEST_DECOMP))
- {
- char buf[128];
-
- if ((j = nmatch) > sizeof(buf))
- j = sizeof(buf);
- fun = "regdecomp";
- p = re + preg.re_npat;
- if (!(test & TEST_CATCH))
- i = regdecomp(&preg, -1, buf, j);
- else if (!(cret = setjmp(state.gotcha)))
- {
- alarm(HUNG);
- i = regdecomp(&preg, -1, buf, j);
- alarm(0);
- }
- if (!cret)
- {
- catchfree(&preg, flags, tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test);
- if (i > j)
- {
- if (i != (strlen(ans) + 1))
- {
- report("failed", fun, re, s, nstr, msg, flags, test);
- printf(" %d byte buffer supplied, %d byte buffer required\n", j, i);
- }
- }
- else if (strcmp(buf, ans))
- {
- report("failed", fun, re, s, nstr, msg, flags, test);
- quote(ans, -1, test|TEST_DELIMIT);
- printf(" expected, ");
- quote(buf, -1, test|TEST_DELIMIT);
- printf(" returned\n");
- }
- continue;
- }
- }
-#endif
- if (!cret)
- {
- if (!(flags & REG_NOSUB) && nsub < 0 && *ans == '(')
- {
- for (p = ans; *p; p++)
- if (*p == '(')
- nsub++;
- else if (*p == '{')
- nsub--;
- if (nsub >= 0)
- {
- if (test & TEST_IGNORE_OVER)
- {
- if (nmatch > nsub)
- nmatch = nsub + 1;
- }
- else if (nsub != preg.re_nsub)
- {
- if (nsub > preg.re_nsub)
- {
- if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
- skip = extract(tabs, line, re, s, ans, msg, "OK", NiL, 0, 0, skip, level, test|TEST_DELIMIT);
- else
- {
- report("re_nsub incorrect", fun, re, NiL, -1, msg, flags, test);
- printf("at least %d expected, %d returned\n", nsub, preg.re_nsub);
- state.errors++;
- }
- }
- else
- nsub = preg.re_nsub;
- }
- }
- }
- if (!(test & (TEST_DECOMP|TEST_SUB)) && *ans && *ans != '(' && !streq(ans, "OK") && !streq(ans, "NOMATCH"))
- {
- if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
- skip = extract(tabs, line, re, s, ans, msg, "OK", NiL, 0, 0, skip, level, test|TEST_DELIMIT);
- else if (!(test & TEST_LENIENT))
- {
- report("failed", fun, re, NiL, -1, msg, flags, test);
- printf("%s expected, OK returned\n", ans);
- }
- catchfree(&preg, flags, tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test);
- continue;
- }
- }
- else
- {
- if (test & TEST_LENIENT)
- /* we'll let it go this time */;
- else if (!*ans || ans[0]=='(' || cret == REG_BADPAT && streq(ans, "NOMATCH"))
- {
- got = 0;
- for (i = 1; i < elementsof(codes); i++)
- if (cret==codes[i].code)
- got = i;
- if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
- skip = extract(tabs, line, re, s, ans, msg, codes[got].name, NiL, 0, 0, skip, level, test|TEST_DELIMIT);
- else
- {
- report("failed", fun, re, NiL, -1, msg, flags, test);
- printf("%s returned: ", codes[got].name);
- error(&preg, cret);
- }
- }
- else
- {
- expected = got = 0;
- for (i = 1; i < elementsof(codes); i++)
- {
- if (streq(ans, codes[i].name))
- expected = i;
- if (cret==codes[i].code)
- got = i;
- }
- if (!expected)
- {
- if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
- skip = extract(tabs, line, re, s, ans, msg, codes[got].name, NiL, 0, 0, skip, level, test|TEST_DELIMIT);
- else
- {
- report("failed: invalid error code", NiL, re, NiL, -1, msg, flags, test);
- printf("%s expected, %s returned\n", ans, codes[got].name);
- }
- }
- else if (cret != codes[expected].code && cret != REG_BADPAT)
- {
- if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
- skip = extract(tabs, line, re, s, ans, msg, codes[got].name, NiL, 0, 0, skip, level, test|TEST_DELIMIT);
- else if (test & TEST_IGNORE_ERROR)
- state.ignored++;
- else
- {
- report("should fail and did", fun, re, NiL, -1, msg, flags, test);
- printf("%s expected, %s returned: ", ans, codes[got].name);
- state.errors--;
- state.warnings++;
- error(&preg, cret);
- }
- }
- }
- goto compile;
- }
-
-#if _REG_nexec
- execute:
- if (nexec >= 0)
- fun = "regnexec";
- else
-#endif
- fun = "regexec";
-
- for (i = 0; i < elementsof(match); i++)
- match[i] = state.NOMATCH;
-
-#if _REG_nexec
- if (nexec >= 0)
- {
- eret = regnexec(&preg, s, nexec, nmatch, match, eflags);
- s[nexec] = 0;
- }
- else
-#endif
- {
- if (!(test & TEST_CATCH))
- eret = regexec(&preg, s, nmatch, match, eflags);
- else if (!(eret = setjmp(state.gotcha)))
- {
- alarm(HUNG);
- eret = regexec(&preg, s, nmatch, match, eflags);
- alarm(0);
- }
- }
-#if _REG_subcomp
- if ((test & TEST_SUB) && !eret)
- {
- fun = "regsubexec";
- if (!(test & TEST_CATCH))
- eret = regsubexec(&preg, s, nmatch, match);
- else if (!(eret = setjmp(state.gotcha)))
- {
- alarm(HUNG);
- eret = regsubexec(&preg, s, nmatch, match);
- alarm(0);
- }
- }
-#endif
- if (flags & REG_NOSUB)
- {
- if (eret)
- {
- if (eret != REG_NOMATCH || !streq(ans, "NOMATCH"))
- {
- if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
- skip = extract(tabs, line, re, s, ans, msg, "NOMATCH", NiL, 0, 0, skip, level, test|TEST_DELIMIT);
- else
- {
- report("REG_NOSUB failed", fun, re, s, nstr, msg, flags, test);
- error(&preg, eret);
- }
- }
- }
- else if (streq(ans, "NOMATCH"))
- {
- if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
- skip = extract(tabs, line, re, s, ans, msg, NiL, match, nmatch, nsub, skip, level, test|TEST_DELIMIT);
- else
- {
- report("should fail and didn't", fun, re, s, nstr, msg, flags, test);
- error(&preg, eret);
- }
- }
- }
- else if (eret)
- {
- if (eret != REG_NOMATCH || !streq(ans, "NOMATCH"))
- {
- if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
- skip = extract(tabs, line, re, s, ans, msg, "NOMATCH", NiL, 0, nsub, skip, level, test|TEST_DELIMIT);
- else
- {
- report("failed", fun, re, s, nstr, msg, flags, test);
- if (eret != REG_NOMATCH)
- error(&preg, eret);
- else if (*ans)
- printf("expected: %s\n", ans);
- else
- printf("\n");
- }
- }
- }
- else if (streq(ans, "NOMATCH"))
- {
- if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
- skip = extract(tabs, line, re, s, ans, msg, NiL, match, nmatch, nsub, skip, level, test|TEST_DELIMIT);
- else
- {
- report("should fail and didn't", fun, re, s, nstr, msg, flags, test);
- matchprint(match, nmatch, nsub, NiL, test);
- }
- }
-#if _REG_subcomp
- else if (test & TEST_SUB)
- {
- p = preg.re_sub->re_buf;
- if (strcmp(p, ans))
- {
- report("failed", fun, re, s, nstr, msg, flags, test);
- quote(ans, -1, test|TEST_DELIMIT);
- printf(" expected, ");
- quote(p, -1, test|TEST_DELIMIT);
- printf(" returned\n");
- }
- }
-#endif
- else if (!*ans)
- {
- if (match[0].rm_so != state.NOMATCH.rm_so)
- {
- if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
- skip = extract(tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test);
- else
- {
- report("failed: no match but match array assigned", NiL, re, s, nstr, msg, flags, test);
- matchprint(match, nmatch, nsub, NiL, test);
- }
- }
- }
- else if (matchcheck(match, nmatch, nsub, ans, re, s, nstr, flags, test))
- {
-#if _REG_nexec
- if (nexec < 0 && !nonexec)
- {
- nexec = nstr >= 0 ? nstr : strlen(s);
- s[nexec] = '\n';
- testno++;
- goto execute;
- }
-#endif
- if (!(test & (TEST_DECOMP|TEST_SUB|TEST_VERIFY)) && !nonosub)
- {
- if (catchfree(&preg, flags, tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test))
- continue;
- flags |= REG_NOSUB;
- goto nosub;
- }
- if (test & (TEST_BASELINE|TEST_PASS|TEST_VERIFY))
- skip = extract(tabs, line, re, s, ans, msg, NiL, match, nmatch, nsub, skip, level, test|TEST_OK);
- }
- else if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
- skip = extract(tabs, line, re, s, ans, msg, NiL, match, nmatch, nsub, skip, level, test|TEST_DELIMIT);
- if (catchfree(&preg, flags, tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test))
- continue;
- goto compile;
- }
- if (test & TEST_SUMMARY)
- printf("tests=%-4d errors=%-4d warnings=%-2d ignored=%-2d unspecified=%-2d signals=%d\n", testno, state.errors, state.warnings, state.ignored, state.unspecified, state.signals);
- else if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS)))
- {
- printf("TEST\t%s", unit);
- if (subunit)
- printf(" %-.*s", subunitlen, subunit);
- printf(", %d test%s", testno, testno == 1 ? "" : "s");
- if (state.ignored)
- printf(", %d ignored mismatche%s", state.ignored, state.ignored == 1 ? "" : "s");
- if (state.warnings)
- printf(", %d warning%s", state.warnings, state.warnings == 1 ? "" : "s");
- if (state.unspecified)
- printf(", %d unspecified difference%s", state.unspecified, state.unspecified == 1 ? "" : "s");
- if (state.signals)
- printf(", %d signal%s", state.signals, state.signals == 1 ? "" : "s");
- printf(", %d error%s\n", state.errors, state.errors == 1 ? "" : "s");
- }
- if (fp != stdin)
- fclose(fp);
- }
- return 0;
-}