diff options
author | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 11:54:00 -0700 |
---|---|---|
committer | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 11:54:00 -0700 |
commit | f154da9e12608589e8d5f0508f908a0c3e88a1bb (patch) | |
tree | f8255d51e10c6f1e0ed69702200b966c9556a431 /src/pkg/regexp | |
parent | 8d8329ed5dfb9622c82a9fbec6fd99a580f9c9f6 (diff) | |
download | golang-upstream/1.4.tar.gz |
Imported Upstream version 1.4upstream/1.4
Diffstat (limited to 'src/pkg/regexp')
27 files changed, 0 insertions, 14990 deletions
diff --git a/src/pkg/regexp/all_test.go b/src/pkg/regexp/all_test.go deleted file mode 100644 index 301a1dfcd..000000000 --- a/src/pkg/regexp/all_test.go +++ /dev/null @@ -1,645 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package regexp - -import ( - "reflect" - "strings" - "testing" -) - -var good_re = []string{ - ``, - `.`, - `^.$`, - `a`, - `a*`, - `a+`, - `a?`, - `a|b`, - `a*|b*`, - `(a*|b)(c*|d)`, - `[a-z]`, - `[a-abc-c\-\]\[]`, - `[a-z]+`, - `[abc]`, - `[^1234]`, - `[^\n]`, - `\!\\`, -} - -type stringError struct { - re string - err string -} - -var bad_re = []stringError{ - {`*`, "missing argument to repetition operator: `*`"}, - {`+`, "missing argument to repetition operator: `+`"}, - {`?`, "missing argument to repetition operator: `?`"}, - {`(abc`, "missing closing ): `(abc`"}, - {`abc)`, "unexpected ): `abc)`"}, - {`x[a-z`, "missing closing ]: `[a-z`"}, - {`[z-a]`, "invalid character class range: `z-a`"}, - {`abc\`, "trailing backslash at end of expression"}, - {`a**`, "invalid nested repetition operator: `**`"}, - {`a*+`, "invalid nested repetition operator: `*+`"}, - {`\x`, "invalid escape sequence: `\\x`"}, -} - -func compileTest(t *testing.T, expr string, error string) *Regexp { - re, err := Compile(expr) - if error == "" && err != nil { - t.Error("compiling `", expr, "`; unexpected error: ", err.Error()) - } - if error != "" && err == nil { - t.Error("compiling `", expr, "`; missing error") - } else if error != "" && !strings.Contains(err.Error(), error) { - t.Error("compiling `", expr, "`; wrong error: ", err.Error(), "; want ", error) - } - return re -} - -func TestGoodCompile(t *testing.T) { - for i := 0; i < len(good_re); i++ { - compileTest(t, good_re[i], "") - } -} - -func TestBadCompile(t *testing.T) { - for i := 0; i < len(bad_re); i++ { - compileTest(t, bad_re[i].re, bad_re[i].err) - } -} - -func matchTest(t *testing.T, test *FindTest) { - re := compileTest(t, test.pat, "") - if re == nil { - return - } - m := re.MatchString(test.text) - if m != (len(test.matches) > 0) { - t.Errorf("MatchString failure on %s: %t should be %t", test, m, len(test.matches) > 0) - } - // now try bytes - m = re.Match([]byte(test.text)) - if m != (len(test.matches) > 0) { - t.Errorf("Match failure on %s: %t should be %t", test, m, len(test.matches) > 0) - } -} - -func TestMatch(t *testing.T) { - for _, test := range findTests { - matchTest(t, &test) - } -} - -func matchFunctionTest(t *testing.T, test *FindTest) { - m, err := MatchString(test.pat, test.text) - if err == nil { - return - } - if m != (len(test.matches) > 0) { - t.Errorf("Match failure on %s: %t should be %t", test, m, len(test.matches) > 0) - } -} - -func TestMatchFunction(t *testing.T) { - for _, test := range findTests { - matchFunctionTest(t, &test) - } -} - -type ReplaceTest struct { - pattern, replacement, input, output string -} - -var replaceTests = []ReplaceTest{ - // Test empty input and/or replacement, with pattern that matches the empty string. - {"", "", "", ""}, - {"", "x", "", "x"}, - {"", "", "abc", "abc"}, - {"", "x", "abc", "xaxbxcx"}, - - // Test empty input and/or replacement, with pattern that does not match the empty string. - {"b", "", "", ""}, - {"b", "x", "", ""}, - {"b", "", "abc", "ac"}, - {"b", "x", "abc", "axc"}, - {"y", "", "", ""}, - {"y", "x", "", ""}, - {"y", "", "abc", "abc"}, - {"y", "x", "abc", "abc"}, - - // Multibyte characters -- verify that we don't try to match in the middle - // of a character. - {"[a-c]*", "x", "\u65e5", "x\u65e5x"}, - {"[^\u65e5]", "x", "abc\u65e5def", "xxx\u65e5xxx"}, - - // Start and end of a string. - {"^[a-c]*", "x", "abcdabc", "xdabc"}, - {"[a-c]*$", "x", "abcdabc", "abcdx"}, - {"^[a-c]*$", "x", "abcdabc", "abcdabc"}, - {"^[a-c]*", "x", "abc", "x"}, - {"[a-c]*$", "x", "abc", "x"}, - {"^[a-c]*$", "x", "abc", "x"}, - {"^[a-c]*", "x", "dabce", "xdabce"}, - {"[a-c]*$", "x", "dabce", "dabcex"}, - {"^[a-c]*$", "x", "dabce", "dabce"}, - {"^[a-c]*", "x", "", "x"}, - {"[a-c]*$", "x", "", "x"}, - {"^[a-c]*$", "x", "", "x"}, - - {"^[a-c]+", "x", "abcdabc", "xdabc"}, - {"[a-c]+$", "x", "abcdabc", "abcdx"}, - {"^[a-c]+$", "x", "abcdabc", "abcdabc"}, - {"^[a-c]+", "x", "abc", "x"}, - {"[a-c]+$", "x", "abc", "x"}, - {"^[a-c]+$", "x", "abc", "x"}, - {"^[a-c]+", "x", "dabce", "dabce"}, - {"[a-c]+$", "x", "dabce", "dabce"}, - {"^[a-c]+$", "x", "dabce", "dabce"}, - {"^[a-c]+", "x", "", ""}, - {"[a-c]+$", "x", "", ""}, - {"^[a-c]+$", "x", "", ""}, - - // Other cases. - {"abc", "def", "abcdefg", "defdefg"}, - {"bc", "BC", "abcbcdcdedef", "aBCBCdcdedef"}, - {"abc", "", "abcdabc", "d"}, - {"x", "xXx", "xxxXxxx", "xXxxXxxXxXxXxxXxxXx"}, - {"abc", "d", "", ""}, - {"abc", "d", "abc", "d"}, - {".+", "x", "abc", "x"}, - {"[a-c]*", "x", "def", "xdxexfx"}, - {"[a-c]+", "x", "abcbcdcdedef", "xdxdedef"}, - {"[a-c]*", "x", "abcbcdcdedef", "xdxdxexdxexfx"}, - - // Substitutions - {"a+", "($0)", "banana", "b(a)n(a)n(a)"}, - {"a+", "(${0})", "banana", "b(a)n(a)n(a)"}, - {"a+", "(${0})$0", "banana", "b(a)an(a)an(a)a"}, - {"a+", "(${0})$0", "banana", "b(a)an(a)an(a)a"}, - {"hello, (.+)", "goodbye, ${1}", "hello, world", "goodbye, world"}, - {"hello, (.+)", "goodbye, $1x", "hello, world", "goodbye, "}, - {"hello, (.+)", "goodbye, ${1}x", "hello, world", "goodbye, worldx"}, - {"hello, (.+)", "<$0><$1><$2><$3>", "hello, world", "<hello, world><world><><>"}, - {"hello, (?P<noun>.+)", "goodbye, $noun!", "hello, world", "goodbye, world!"}, - {"hello, (?P<noun>.+)", "goodbye, ${noun}", "hello, world", "goodbye, world"}, - {"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "hi", "hihihi"}, - {"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "bye", "byebyebye"}, - {"(?P<x>hi)|(?P<x>bye)", "$xyz", "hi", ""}, - {"(?P<x>hi)|(?P<x>bye)", "${x}yz", "hi", "hiyz"}, - {"(?P<x>hi)|(?P<x>bye)", "hello $$x", "hi", "hello $x"}, - {"a+", "${oops", "aaa", "${oops"}, - {"a+", "$$", "aaa", "$"}, - {"a+", "$", "aaa", "$"}, - - // Substitution when subexpression isn't found - {"(x)?", "$1", "123", "123"}, - {"abc", "$1", "123", "123"}, -} - -var replaceLiteralTests = []ReplaceTest{ - // Substitutions - {"a+", "($0)", "banana", "b($0)n($0)n($0)"}, - {"a+", "(${0})", "banana", "b(${0})n(${0})n(${0})"}, - {"a+", "(${0})$0", "banana", "b(${0})$0n(${0})$0n(${0})$0"}, - {"a+", "(${0})$0", "banana", "b(${0})$0n(${0})$0n(${0})$0"}, - {"hello, (.+)", "goodbye, ${1}", "hello, world", "goodbye, ${1}"}, - {"hello, (?P<noun>.+)", "goodbye, $noun!", "hello, world", "goodbye, $noun!"}, - {"hello, (?P<noun>.+)", "goodbye, ${noun}", "hello, world", "goodbye, ${noun}"}, - {"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "hi", "$x$x$x"}, - {"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "bye", "$x$x$x"}, - {"(?P<x>hi)|(?P<x>bye)", "$xyz", "hi", "$xyz"}, - {"(?P<x>hi)|(?P<x>bye)", "${x}yz", "hi", "${x}yz"}, - {"(?P<x>hi)|(?P<x>bye)", "hello $$x", "hi", "hello $$x"}, - {"a+", "${oops", "aaa", "${oops"}, - {"a+", "$$", "aaa", "$$"}, - {"a+", "$", "aaa", "$"}, -} - -type ReplaceFuncTest struct { - pattern string - replacement func(string) string - input, output string -} - -var replaceFuncTests = []ReplaceFuncTest{ - {"[a-c]", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxayxbyxcydef"}, - {"[a-c]+", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxabcydef"}, - {"[a-c]*", func(s string) string { return "x" + s + "y" }, "defabcdef", "xydxyexyfxabcydxyexyfxy"}, -} - -func TestReplaceAll(t *testing.T) { - for _, tc := range replaceTests { - re, err := Compile(tc.pattern) - if err != nil { - t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err) - continue - } - actual := re.ReplaceAllString(tc.input, tc.replacement) - if actual != tc.output { - t.Errorf("%q.ReplaceAllString(%q,%q) = %q; want %q", - tc.pattern, tc.input, tc.replacement, actual, tc.output) - } - // now try bytes - actual = string(re.ReplaceAll([]byte(tc.input), []byte(tc.replacement))) - if actual != tc.output { - t.Errorf("%q.ReplaceAll(%q,%q) = %q; want %q", - tc.pattern, tc.input, tc.replacement, actual, tc.output) - } - } -} - -func TestReplaceAllLiteral(t *testing.T) { - // Run ReplaceAll tests that do not have $ expansions. - for _, tc := range replaceTests { - if strings.Contains(tc.replacement, "$") { - continue - } - re, err := Compile(tc.pattern) - if err != nil { - t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err) - continue - } - actual := re.ReplaceAllLiteralString(tc.input, tc.replacement) - if actual != tc.output { - t.Errorf("%q.ReplaceAllLiteralString(%q,%q) = %q; want %q", - tc.pattern, tc.input, tc.replacement, actual, tc.output) - } - // now try bytes - actual = string(re.ReplaceAllLiteral([]byte(tc.input), []byte(tc.replacement))) - if actual != tc.output { - t.Errorf("%q.ReplaceAllLiteral(%q,%q) = %q; want %q", - tc.pattern, tc.input, tc.replacement, actual, tc.output) - } - } - - // Run literal-specific tests. - for _, tc := range replaceLiteralTests { - re, err := Compile(tc.pattern) - if err != nil { - t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err) - continue - } - actual := re.ReplaceAllLiteralString(tc.input, tc.replacement) - if actual != tc.output { - t.Errorf("%q.ReplaceAllLiteralString(%q,%q) = %q; want %q", - tc.pattern, tc.input, tc.replacement, actual, tc.output) - } - // now try bytes - actual = string(re.ReplaceAllLiteral([]byte(tc.input), []byte(tc.replacement))) - if actual != tc.output { - t.Errorf("%q.ReplaceAllLiteral(%q,%q) = %q; want %q", - tc.pattern, tc.input, tc.replacement, actual, tc.output) - } - } -} - -func TestReplaceAllFunc(t *testing.T) { - for _, tc := range replaceFuncTests { - re, err := Compile(tc.pattern) - if err != nil { - t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err) - continue - } - actual := re.ReplaceAllStringFunc(tc.input, tc.replacement) - if actual != tc.output { - t.Errorf("%q.ReplaceFunc(%q,fn) = %q; want %q", - tc.pattern, tc.input, actual, tc.output) - } - // now try bytes - actual = string(re.ReplaceAllFunc([]byte(tc.input), func(s []byte) []byte { return []byte(tc.replacement(string(s))) })) - if actual != tc.output { - t.Errorf("%q.ReplaceFunc(%q,fn) = %q; want %q", - tc.pattern, tc.input, actual, tc.output) - } - } -} - -type MetaTest struct { - pattern, output, literal string - isLiteral bool -} - -var metaTests = []MetaTest{ - {``, ``, ``, true}, - {`foo`, `foo`, `foo`, true}, - {`foo\.\$`, `foo\\\.\\\$`, `foo.$`, true}, // has meta but no operator - {`foo.\$`, `foo\.\\\$`, `foo`, false}, // has escaped operators and real operators - {`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[\{\]\}\\\|,<\.>/\?~`, `!@#`, false}, -} - -func TestQuoteMeta(t *testing.T) { - for _, tc := range metaTests { - // Verify that QuoteMeta returns the expected string. - quoted := QuoteMeta(tc.pattern) - if quoted != tc.output { - t.Errorf("QuoteMeta(`%s`) = `%s`; want `%s`", - tc.pattern, quoted, tc.output) - continue - } - - // Verify that the quoted string is in fact treated as expected - // by Compile -- i.e. that it matches the original, unquoted string. - if tc.pattern != "" { - re, err := Compile(quoted) - if err != nil { - t.Errorf("Unexpected error compiling QuoteMeta(`%s`): %v", tc.pattern, err) - continue - } - src := "abc" + tc.pattern + "def" - repl := "xyz" - replaced := re.ReplaceAllString(src, repl) - expected := "abcxyzdef" - if replaced != expected { - t.Errorf("QuoteMeta(`%s`).Replace(`%s`,`%s`) = `%s`; want `%s`", - tc.pattern, src, repl, replaced, expected) - } - } - } -} - -func TestLiteralPrefix(t *testing.T) { - for _, tc := range metaTests { - // Literal method needs to scan the pattern. - re := MustCompile(tc.pattern) - str, complete := re.LiteralPrefix() - if complete != tc.isLiteral { - t.Errorf("LiteralPrefix(`%s`) = %t; want %t", tc.pattern, complete, tc.isLiteral) - } - if str != tc.literal { - t.Errorf("LiteralPrefix(`%s`) = `%s`; want `%s`", tc.pattern, str, tc.literal) - } - } -} - -type subexpCase struct { - input string - num int - names []string -} - -var subexpCases = []subexpCase{ - {``, 0, nil}, - {`.*`, 0, nil}, - {`abba`, 0, nil}, - {`ab(b)a`, 1, []string{"", ""}}, - {`ab(.*)a`, 1, []string{"", ""}}, - {`(.*)ab(.*)a`, 2, []string{"", "", ""}}, - {`(.*)(ab)(.*)a`, 3, []string{"", "", "", ""}}, - {`(.*)((a)b)(.*)a`, 4, []string{"", "", "", "", ""}}, - {`(.*)(\(ab)(.*)a`, 3, []string{"", "", "", ""}}, - {`(.*)(\(a\)b)(.*)a`, 3, []string{"", "", "", ""}}, - {`(?P<foo>.*)(?P<bar>(a)b)(?P<foo>.*)a`, 4, []string{"", "foo", "bar", "", "foo"}}, -} - -func TestSubexp(t *testing.T) { - for _, c := range subexpCases { - re := MustCompile(c.input) - n := re.NumSubexp() - if n != c.num { - t.Errorf("%q: NumSubexp = %d, want %d", c.input, n, c.num) - continue - } - names := re.SubexpNames() - if len(names) != 1+n { - t.Errorf("%q: len(SubexpNames) = %d, want %d", c.input, len(names), n) - continue - } - if c.names != nil { - for i := 0; i < 1+n; i++ { - if names[i] != c.names[i] { - t.Errorf("%q: SubexpNames[%d] = %q, want %q", c.input, i, names[i], c.names[i]) - } - } - } - } -} - -var splitTests = []struct { - s string - r string - n int - out []string -}{ - {"foo:and:bar", ":", -1, []string{"foo", "and", "bar"}}, - {"foo:and:bar", ":", 1, []string{"foo:and:bar"}}, - {"foo:and:bar", ":", 2, []string{"foo", "and:bar"}}, - {"foo:and:bar", "foo", -1, []string{"", ":and:bar"}}, - {"foo:and:bar", "bar", -1, []string{"foo:and:", ""}}, - {"foo:and:bar", "baz", -1, []string{"foo:and:bar"}}, - {"baabaab", "a", -1, []string{"b", "", "b", "", "b"}}, - {"baabaab", "a*", -1, []string{"b", "b", "b"}}, - {"baabaab", "ba*", -1, []string{"", "", "", ""}}, - {"foobar", "f*b*", -1, []string{"", "o", "o", "a", "r"}}, - {"foobar", "f+.*b+", -1, []string{"", "ar"}}, - {"foobooboar", "o{2}", -1, []string{"f", "b", "boar"}}, - {"a,b,c,d,e,f", ",", 3, []string{"a", "b", "c,d,e,f"}}, - {"a,b,c,d,e,f", ",", 0, nil}, - {",", ",", -1, []string{"", ""}}, - {",,,", ",", -1, []string{"", "", "", ""}}, - {"", ",", -1, []string{""}}, - {"", ".*", -1, []string{""}}, - {"", ".+", -1, []string{""}}, - {"", "", -1, []string{}}, - {"foobar", "", -1, []string{"f", "o", "o", "b", "a", "r"}}, - {"abaabaccadaaae", "a*", 5, []string{"", "b", "b", "c", "cadaaae"}}, - {":x:y:z:", ":", -1, []string{"", "x", "y", "z", ""}}, -} - -func TestSplit(t *testing.T) { - for i, test := range splitTests { - re, err := Compile(test.r) - if err != nil { - t.Errorf("#%d: %q: compile error: %s", i, test.r, err.Error()) - continue - } - - split := re.Split(test.s, test.n) - if !reflect.DeepEqual(split, test.out) { - t.Errorf("#%d: %q: got %q; want %q", i, test.r, split, test.out) - } - - if QuoteMeta(test.r) == test.r { - strsplit := strings.SplitN(test.s, test.r, test.n) - if !reflect.DeepEqual(split, strsplit) { - t.Errorf("#%d: Split(%q, %q, %d): regexp vs strings mismatch\nregexp=%q\nstrings=%q", i, test.s, test.r, test.n, split, strsplit) - } - } - } -} - -// This ran out of stack before issue 7608 was fixed. -func TestOnePassCutoff(t *testing.T) { - MustCompile(`^(?:x{1,1000}){1,1000}$`) -} - -func BenchmarkLiteral(b *testing.B) { - x := strings.Repeat("x", 50) + "y" - b.StopTimer() - re := MustCompile("y") - b.StartTimer() - for i := 0; i < b.N; i++ { - if !re.MatchString(x) { - b.Fatalf("no match!") - } - } -} - -func BenchmarkNotLiteral(b *testing.B) { - x := strings.Repeat("x", 50) + "y" - b.StopTimer() - re := MustCompile(".y") - b.StartTimer() - for i := 0; i < b.N; i++ { - if !re.MatchString(x) { - b.Fatalf("no match!") - } - } -} - -func BenchmarkMatchClass(b *testing.B) { - b.StopTimer() - x := strings.Repeat("xxxx", 20) + "w" - re := MustCompile("[abcdw]") - b.StartTimer() - for i := 0; i < b.N; i++ { - if !re.MatchString(x) { - b.Fatalf("no match!") - } - } -} - -func BenchmarkMatchClass_InRange(b *testing.B) { - b.StopTimer() - // 'b' is between 'a' and 'c', so the charclass - // range checking is no help here. - x := strings.Repeat("bbbb", 20) + "c" - re := MustCompile("[ac]") - b.StartTimer() - for i := 0; i < b.N; i++ { - if !re.MatchString(x) { - b.Fatalf("no match!") - } - } -} - -func BenchmarkReplaceAll(b *testing.B) { - x := "abcdefghijklmnopqrstuvwxyz" - b.StopTimer() - re := MustCompile("[cjrw]") - b.StartTimer() - for i := 0; i < b.N; i++ { - re.ReplaceAllString(x, "") - } -} - -func BenchmarkAnchoredLiteralShortNonMatch(b *testing.B) { - b.StopTimer() - x := []byte("abcdefghijklmnopqrstuvwxyz") - re := MustCompile("^zbc(d|e)") - b.StartTimer() - for i := 0; i < b.N; i++ { - re.Match(x) - } -} - -func BenchmarkAnchoredLiteralLongNonMatch(b *testing.B) { - b.StopTimer() - x := []byte("abcdefghijklmnopqrstuvwxyz") - for i := 0; i < 15; i++ { - x = append(x, x...) - } - re := MustCompile("^zbc(d|e)") - b.StartTimer() - for i := 0; i < b.N; i++ { - re.Match(x) - } -} - -func BenchmarkAnchoredShortMatch(b *testing.B) { - b.StopTimer() - x := []byte("abcdefghijklmnopqrstuvwxyz") - re := MustCompile("^.bc(d|e)") - b.StartTimer() - for i := 0; i < b.N; i++ { - re.Match(x) - } -} - -func BenchmarkAnchoredLongMatch(b *testing.B) { - b.StopTimer() - x := []byte("abcdefghijklmnopqrstuvwxyz") - for i := 0; i < 15; i++ { - x = append(x, x...) - } - re := MustCompile("^.bc(d|e)") - b.StartTimer() - for i := 0; i < b.N; i++ { - re.Match(x) - } -} - -func BenchmarkOnePassShortA(b *testing.B) { - b.StopTimer() - x := []byte("abcddddddeeeededd") - re := MustCompile("^.bc(d|e)*$") - b.StartTimer() - for i := 0; i < b.N; i++ { - re.Match(x) - } -} - -func BenchmarkNotOnePassShortA(b *testing.B) { - b.StopTimer() - x := []byte("abcddddddeeeededd") - re := MustCompile(".bc(d|e)*$") - b.StartTimer() - for i := 0; i < b.N; i++ { - re.Match(x) - } -} - -func BenchmarkOnePassShortB(b *testing.B) { - b.StopTimer() - x := []byte("abcddddddeeeededd") - re := MustCompile("^.bc(?:d|e)*$") - b.StartTimer() - for i := 0; i < b.N; i++ { - re.Match(x) - } -} - -func BenchmarkNotOnePassShortB(b *testing.B) { - b.StopTimer() - x := []byte("abcddddddeeeededd") - re := MustCompile(".bc(?:d|e)*$") - b.StartTimer() - for i := 0; i < b.N; i++ { - re.Match(x) - } -} - -func BenchmarkOnePassLongPrefix(b *testing.B) { - b.StopTimer() - x := []byte("abcdefghijklmnopqrstuvwxyz") - re := MustCompile("^abcdefghijklmnopqrstuvwxyz.*$") - b.StartTimer() - for i := 0; i < b.N; i++ { - re.Match(x) - } -} - -func BenchmarkOnePassLongNotPrefix(b *testing.B) { - b.StopTimer() - x := []byte("abcdefghijklmnopqrstuvwxyz") - re := MustCompile("^.bcdefghijklmnopqrstuvwxyz.*$") - b.StartTimer() - for i := 0; i < b.N; i++ { - re.Match(x) - } -} diff --git a/src/pkg/regexp/example_test.go b/src/pkg/regexp/example_test.go deleted file mode 100644 index a4e0da8ea..000000000 --- a/src/pkg/regexp/example_test.go +++ /dev/null @@ -1,148 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package regexp_test - -import ( - "fmt" - "regexp" -) - -func Example() { - // Compile the expression once, usually at init time. - // Use raw strings to avoid having to quote the backslashes. - var validID = regexp.MustCompile(`^[a-z]+\[[0-9]+\]$`) - - fmt.Println(validID.MatchString("adam[23]")) - fmt.Println(validID.MatchString("eve[7]")) - fmt.Println(validID.MatchString("Job[48]")) - fmt.Println(validID.MatchString("snakey")) - // Output: - // true - // true - // false - // false -} - -func ExampleMatchString() { - matched, err := regexp.MatchString("foo.*", "seafood") - fmt.Println(matched, err) - matched, err = regexp.MatchString("bar.*", "seafood") - fmt.Println(matched, err) - matched, err = regexp.MatchString("a(b", "seafood") - fmt.Println(matched, err) - // Output: - // true <nil> - // false <nil> - // false error parsing regexp: missing closing ): `a(b` -} - -func ExampleRegexp_FindString() { - re := regexp.MustCompile("fo.?") - fmt.Printf("%q\n", re.FindString("seafood")) - fmt.Printf("%q\n", re.FindString("meat")) - // Output: - // "foo" - // "" -} - -func ExampleRegexp_FindStringIndex() { - re := regexp.MustCompile("ab?") - fmt.Println(re.FindStringIndex("tablett")) - fmt.Println(re.FindStringIndex("foo") == nil) - // Output: - // [1 3] - // true -} - -func ExampleRegexp_FindStringSubmatch() { - re := regexp.MustCompile("a(x*)b(y|z)c") - fmt.Printf("%q\n", re.FindStringSubmatch("-axxxbyc-")) - fmt.Printf("%q\n", re.FindStringSubmatch("-abzc-")) - // Output: - // ["axxxbyc" "xxx" "y"] - // ["abzc" "" "z"] -} - -func ExampleRegexp_FindAllString() { - re := regexp.MustCompile("a.") - fmt.Println(re.FindAllString("paranormal", -1)) - fmt.Println(re.FindAllString("paranormal", 2)) - fmt.Println(re.FindAllString("graal", -1)) - fmt.Println(re.FindAllString("none", -1)) - // Output: - // [ar an al] - // [ar an] - // [aa] - // [] -} - -func ExampleRegexp_FindAllStringSubmatch() { - re := regexp.MustCompile("a(x*)b") - fmt.Printf("%q\n", re.FindAllStringSubmatch("-ab-", -1)) - fmt.Printf("%q\n", re.FindAllStringSubmatch("-axxb-", -1)) - fmt.Printf("%q\n", re.FindAllStringSubmatch("-ab-axb-", -1)) - fmt.Printf("%q\n", re.FindAllStringSubmatch("-axxb-ab-", -1)) - // Output: - // [["ab" ""]] - // [["axxb" "xx"]] - // [["ab" ""] ["axb" "x"]] - // [["axxb" "xx"] ["ab" ""]] -} - -func ExampleRegexp_FindAllStringSubmatchIndex() { - re := regexp.MustCompile("a(x*)b") - // Indices: - // 01234567 012345678 - // -ab-axb- -axxb-ab- - fmt.Println(re.FindAllStringSubmatchIndex("-ab-", -1)) - fmt.Println(re.FindAllStringSubmatchIndex("-axxb-", -1)) - fmt.Println(re.FindAllStringSubmatchIndex("-ab-axb-", -1)) - fmt.Println(re.FindAllStringSubmatchIndex("-axxb-ab-", -1)) - fmt.Println(re.FindAllStringSubmatchIndex("-foo-", -1)) - // Output: - // [[1 3 2 2]] - // [[1 5 2 4]] - // [[1 3 2 2] [4 7 5 6]] - // [[1 5 2 4] [6 8 7 7]] - // [] -} - -func ExampleRegexp_ReplaceAllLiteralString() { - re := regexp.MustCompile("a(x*)b") - fmt.Println(re.ReplaceAllLiteralString("-ab-axxb-", "T")) - fmt.Println(re.ReplaceAllLiteralString("-ab-axxb-", "$1")) - fmt.Println(re.ReplaceAllLiteralString("-ab-axxb-", "${1}")) - // Output: - // -T-T- - // -$1-$1- - // -${1}-${1}- -} - -func ExampleRegexp_ReplaceAllString() { - re := regexp.MustCompile("a(x*)b") - fmt.Println(re.ReplaceAllString("-ab-axxb-", "T")) - fmt.Println(re.ReplaceAllString("-ab-axxb-", "$1")) - fmt.Println(re.ReplaceAllString("-ab-axxb-", "$1W")) - fmt.Println(re.ReplaceAllString("-ab-axxb-", "${1}W")) - // Output: - // -T-T- - // --xx- - // --- - // -W-xxW- -} - -func ExampleRegexp_SubexpNames() { - re := regexp.MustCompile("(?P<first>[a-zA-Z]+) (?P<last>[a-zA-Z]+)") - fmt.Println(re.MatchString("Alan Turing")) - fmt.Printf("%q\n", re.SubexpNames()) - reversed := fmt.Sprintf("${%s} ${%s}", re.SubexpNames()[2], re.SubexpNames()[1]) - fmt.Println(reversed) - fmt.Println(re.ReplaceAllString("Alan Turing", reversed)) - // Output: - // true - // ["" "first" "last"] - // ${last} ${first} - // Turing Alan -} diff --git a/src/pkg/regexp/exec.go b/src/pkg/regexp/exec.go deleted file mode 100644 index c4cb201f6..000000000 --- a/src/pkg/regexp/exec.go +++ /dev/null @@ -1,452 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package regexp - -import ( - "io" - "regexp/syntax" -) - -// A queue is a 'sparse array' holding pending threads of execution. -// See http://research.swtch.com/2008/03/using-uninitialized-memory-for-fun-and.html -type queue struct { - sparse []uint32 - dense []entry -} - -// A entry is an entry on a queue. -// It holds both the instruction pc and the actual thread. -// Some queue entries are just place holders so that the machine -// knows it has considered that pc. Such entries have t == nil. -type entry struct { - pc uint32 - t *thread -} - -// A thread is the state of a single path through the machine: -// an instruction and a corresponding capture array. -// See http://swtch.com/~rsc/regexp/regexp2.html -type thread struct { - inst *syntax.Inst - cap []int -} - -// A machine holds all the state during an NFA simulation for p. -type machine struct { - re *Regexp // corresponding Regexp - p *syntax.Prog // compiled program - op *onePassProg // compiled onepass program, or notOnePass - q0, q1 queue // two queues for runq, nextq - pool []*thread // pool of available threads - matched bool // whether a match was found - matchcap []int // capture information for the match - - // cached inputs, to avoid allocation - inputBytes inputBytes - inputString inputString - inputReader inputReader -} - -func (m *machine) newInputBytes(b []byte) input { - m.inputBytes.str = b - return &m.inputBytes -} - -func (m *machine) newInputString(s string) input { - m.inputString.str = s - return &m.inputString -} - -func (m *machine) newInputReader(r io.RuneReader) input { - m.inputReader.r = r - m.inputReader.atEOT = false - m.inputReader.pos = 0 - return &m.inputReader -} - -// progMachine returns a new machine running the prog p. -func progMachine(p *syntax.Prog, op *onePassProg) *machine { - m := &machine{p: p, op: op} - n := len(m.p.Inst) - m.q0 = queue{make([]uint32, n), make([]entry, 0, n)} - m.q1 = queue{make([]uint32, n), make([]entry, 0, n)} - ncap := p.NumCap - if ncap < 2 { - ncap = 2 - } - m.matchcap = make([]int, ncap) - return m -} - -func (m *machine) init(ncap int) { - for _, t := range m.pool { - t.cap = t.cap[:ncap] - } - m.matchcap = m.matchcap[:ncap] -} - -// alloc allocates a new thread with the given instruction. -// It uses the free pool if possible. -func (m *machine) alloc(i *syntax.Inst) *thread { - var t *thread - if n := len(m.pool); n > 0 { - t = m.pool[n-1] - m.pool = m.pool[:n-1] - } else { - t = new(thread) - t.cap = make([]int, len(m.matchcap), cap(m.matchcap)) - } - t.inst = i - return t -} - -// free returns t to the free pool. -func (m *machine) free(t *thread) { - m.inputBytes.str = nil - m.inputString.str = "" - m.inputReader.r = nil - m.pool = append(m.pool, t) -} - -// match runs the machine over the input starting at pos. -// It reports whether a match was found. -// If so, m.matchcap holds the submatch information. -func (m *machine) match(i input, pos int) bool { - startCond := m.re.cond - if startCond == ^syntax.EmptyOp(0) { // impossible - return false - } - m.matched = false - for i := range m.matchcap { - m.matchcap[i] = -1 - } - runq, nextq := &m.q0, &m.q1 - r, r1 := endOfText, endOfText - width, width1 := 0, 0 - r, width = i.step(pos) - if r != endOfText { - r1, width1 = i.step(pos + width) - } - var flag syntax.EmptyOp - if pos == 0 { - flag = syntax.EmptyOpContext(-1, r) - } else { - flag = i.context(pos) - } - for { - if len(runq.dense) == 0 { - if startCond&syntax.EmptyBeginText != 0 && pos != 0 { - // Anchored match, past beginning of text. - break - } - if m.matched { - // Have match; finished exploring alternatives. - break - } - if len(m.re.prefix) > 0 && r1 != m.re.prefixRune && i.canCheckPrefix() { - // Match requires literal prefix; fast search for it. - advance := i.index(m.re, pos) - if advance < 0 { - break - } - pos += advance - r, width = i.step(pos) - r1, width1 = i.step(pos + width) - } - } - if !m.matched { - if len(m.matchcap) > 0 { - m.matchcap[0] = pos - } - m.add(runq, uint32(m.p.Start), pos, m.matchcap, flag, nil) - } - flag = syntax.EmptyOpContext(r, r1) - m.step(runq, nextq, pos, pos+width, r, flag) - if width == 0 { - break - } - if len(m.matchcap) == 0 && m.matched { - // Found a match and not paying attention - // to where it is, so any match will do. - break - } - pos += width - r, width = r1, width1 - if r != endOfText { - r1, width1 = i.step(pos + width) - } - runq, nextq = nextq, runq - } - m.clear(nextq) - return m.matched -} - -// clear frees all threads on the thread queue. -func (m *machine) clear(q *queue) { - for _, d := range q.dense { - if d.t != nil { - // m.free(d.t) - m.pool = append(m.pool, d.t) - } - } - q.dense = q.dense[:0] -} - -// step executes one step of the machine, running each of the threads -// on runq and appending new threads to nextq. -// The step processes the rune c (which may be endOfText), -// which starts at position pos and ends at nextPos. -// nextCond gives the setting for the empty-width flags after c. -func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond syntax.EmptyOp) { - longest := m.re.longest - for j := 0; j < len(runq.dense); j++ { - d := &runq.dense[j] - t := d.t - if t == nil { - continue - } - if longest && m.matched && len(t.cap) > 0 && m.matchcap[0] < t.cap[0] { - // m.free(t) - m.pool = append(m.pool, t) - continue - } - i := t.inst - add := false - switch i.Op { - default: - panic("bad inst") - - case syntax.InstMatch: - if len(t.cap) > 0 && (!longest || !m.matched || m.matchcap[1] < pos) { - t.cap[1] = pos - copy(m.matchcap, t.cap) - } - if !longest { - // First-match mode: cut off all lower-priority threads. - for _, d := range runq.dense[j+1:] { - if d.t != nil { - // m.free(d.t) - m.pool = append(m.pool, d.t) - } - } - runq.dense = runq.dense[:0] - } - m.matched = true - - case syntax.InstRune: - add = i.MatchRune(c) - case syntax.InstRune1: - add = c == i.Rune[0] - case syntax.InstRuneAny: - add = true - case syntax.InstRuneAnyNotNL: - add = c != '\n' - } - if add { - t = m.add(nextq, i.Out, nextPos, t.cap, nextCond, t) - } - if t != nil { - // m.free(t) - m.pool = append(m.pool, t) - } - } - runq.dense = runq.dense[:0] -} - -// add adds an entry to q for pc, unless the q already has such an entry. -// It also recursively adds an entry for all instructions reachable from pc by following -// empty-width conditions satisfied by cond. pos gives the current position -// in the input. -func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond syntax.EmptyOp, t *thread) *thread { - if pc == 0 { - return t - } - if j := q.sparse[pc]; j < uint32(len(q.dense)) && q.dense[j].pc == pc { - return t - } - - j := len(q.dense) - q.dense = q.dense[:j+1] - d := &q.dense[j] - d.t = nil - d.pc = pc - q.sparse[pc] = uint32(j) - - i := &m.p.Inst[pc] - switch i.Op { - default: - panic("unhandled") - case syntax.InstFail: - // nothing - case syntax.InstAlt, syntax.InstAltMatch: - t = m.add(q, i.Out, pos, cap, cond, t) - t = m.add(q, i.Arg, pos, cap, cond, t) - case syntax.InstEmptyWidth: - if syntax.EmptyOp(i.Arg)&^cond == 0 { - t = m.add(q, i.Out, pos, cap, cond, t) - } - case syntax.InstNop: - t = m.add(q, i.Out, pos, cap, cond, t) - case syntax.InstCapture: - if int(i.Arg) < len(cap) { - opos := cap[i.Arg] - cap[i.Arg] = pos - m.add(q, i.Out, pos, cap, cond, nil) - cap[i.Arg] = opos - } else { - t = m.add(q, i.Out, pos, cap, cond, t) - } - case syntax.InstMatch, syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL: - if t == nil { - t = m.alloc(i) - } else { - t.inst = i - } - if len(cap) > 0 && &t.cap[0] != &cap[0] { - copy(t.cap, cap) - } - d.t = t - t = nil - } - return t -} - -// onepass runs the machine over the input starting at pos. -// It reports whether a match was found. -// If so, m.matchcap holds the submatch information. -func (m *machine) onepass(i input, pos int) bool { - startCond := m.re.cond - if startCond == ^syntax.EmptyOp(0) { // impossible - return false - } - m.matched = false - for i := range m.matchcap { - m.matchcap[i] = -1 - } - r, r1 := endOfText, endOfText - width, width1 := 0, 0 - r, width = i.step(pos) - if r != endOfText { - r1, width1 = i.step(pos + width) - } - var flag syntax.EmptyOp - if pos == 0 { - flag = syntax.EmptyOpContext(-1, r) - } else { - flag = i.context(pos) - } - pc := m.op.Start - inst := m.op.Inst[pc] - // If there is a simple literal prefix, skip over it. - if pos == 0 && syntax.EmptyOp(inst.Arg)&^flag == 0 && - len(m.re.prefix) > 0 && i.canCheckPrefix() { - // Match requires literal prefix; fast search for it. - if i.hasPrefix(m.re) { - pos += len(m.re.prefix) - r, width = i.step(pos) - r1, width1 = i.step(pos + width) - flag = i.context(pos) - pc = int(m.re.prefixEnd) - } else { - return m.matched - } - } - for { - inst = m.op.Inst[pc] - pc = int(inst.Out) - switch inst.Op { - default: - panic("bad inst") - case syntax.InstMatch: - m.matched = true - if len(m.matchcap) > 0 { - m.matchcap[0] = 0 - m.matchcap[1] = pos - } - return m.matched - case syntax.InstRune: - if !inst.MatchRune(r) { - return m.matched - } - case syntax.InstRune1: - if r != inst.Rune[0] { - return m.matched - } - case syntax.InstRuneAny: - // Nothing - case syntax.InstRuneAnyNotNL: - if r == '\n' { - return m.matched - } - // peek at the input rune to see which branch of the Alt to take - case syntax.InstAlt, syntax.InstAltMatch: - pc = int(onePassNext(&inst, r)) - continue - case syntax.InstFail: - return m.matched - case syntax.InstNop: - continue - case syntax.InstEmptyWidth: - if syntax.EmptyOp(inst.Arg)&^flag != 0 { - return m.matched - } - continue - case syntax.InstCapture: - if int(inst.Arg) < len(m.matchcap) { - m.matchcap[inst.Arg] = pos - } - continue - } - if width == 0 { - break - } - flag = syntax.EmptyOpContext(r, r1) - pos += width - r, width = r1, width1 - if r != endOfText { - r1, width1 = i.step(pos + width) - } - } - return m.matched -} - -// empty is a non-nil 0-element slice, -// so doExecute can avoid an allocation -// when 0 captures are requested from a successful match. -var empty = make([]int, 0) - -// doExecute finds the leftmost match in the input and returns -// the position of its subexpressions. -func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap int) []int { - m := re.get() - var i input - if r != nil { - i = m.newInputReader(r) - } else if b != nil { - i = m.newInputBytes(b) - } else { - i = m.newInputString(s) - } - if m.op != notOnePass { - if !m.onepass(i, pos) { - re.put(m) - return nil - } - } else { - m.init(ncap) - if !m.match(i, pos) { - re.put(m) - return nil - } - } - if ncap == 0 { - re.put(m) - return empty // empty but not nil - } - cap := make([]int, len(m.matchcap)) - copy(cap, m.matchcap) - re.put(m) - return cap -} diff --git a/src/pkg/regexp/exec2_test.go b/src/pkg/regexp/exec2_test.go deleted file mode 100644 index 7b86b4115..000000000 --- a/src/pkg/regexp/exec2_test.go +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !race - -package regexp - -import ( - "testing" -) - -// This test is excluded when running under the race detector because -// it is a very expensive test and takes too long. -func TestRE2Exhaustive(t *testing.T) { - if testing.Short() { - t.Skip("skipping TestRE2Exhaustive during short test") - } - testRE2(t, "testdata/re2-exhaustive.txt.bz2") -} diff --git a/src/pkg/regexp/exec_test.go b/src/pkg/regexp/exec_test.go deleted file mode 100644 index 70d069c06..000000000 --- a/src/pkg/regexp/exec_test.go +++ /dev/null @@ -1,715 +0,0 @@ -// Copyright 2010 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package regexp - -import ( - "bufio" - "compress/bzip2" - "fmt" - "io" - "os" - "path/filepath" - "regexp/syntax" - "strconv" - "strings" - "testing" - "unicode/utf8" -) - -// TestRE2 tests this package's regexp API against test cases -// considered during RE2's exhaustive tests, which run all possible -// regexps over a given set of atoms and operators, up to a given -// complexity, over all possible strings over a given alphabet, -// up to a given size. Rather than try to link with RE2, we read a -// log file containing the test cases and the expected matches. -// The log file, re2.txt, is generated by running 'make exhaustive-log' -// in the open source RE2 distribution. http://code.google.com/p/re2/ -// -// The test file format is a sequence of stanzas like: -// -// strings -// "abc" -// "123x" -// regexps -// "[a-z]+" -// 0-3;0-3 -// -;- -// "([0-9])([0-9])([0-9])" -// -;- -// -;0-3 0-1 1-2 2-3 -// -// The stanza begins by defining a set of strings, quoted -// using Go double-quote syntax, one per line. Then the -// regexps section gives a sequence of regexps to run on -// the strings. In the block that follows a regexp, each line -// gives the semicolon-separated match results of running -// the regexp on the corresponding string. -// Each match result is either a single -, meaning no match, or a -// space-separated sequence of pairs giving the match and -// submatch indices. An unmatched subexpression formats -// its pair as a single - (not illustrated above). For now -// each regexp run produces two match results, one for a -// ``full match'' that restricts the regexp to matching the entire -// string or nothing, and one for a ``partial match'' that gives -// the leftmost first match found in the string. -// -// Lines beginning with # are comments. Lines beginning with -// a capital letter are test names printed during RE2's test suite -// and are echoed into t but otherwise ignored. -// -// At time of writing, re2.txt is 32 MB but compresses to 760 kB, -// so we store re2.txt.gz in the repository and decompress it on the fly. -// -func TestRE2Search(t *testing.T) { - testRE2(t, "testdata/re2-search.txt") -} - -func testRE2(t *testing.T, file string) { - f, err := os.Open(file) - if err != nil { - t.Fatal(err) - } - defer f.Close() - var txt io.Reader - if strings.HasSuffix(file, ".bz2") { - z := bzip2.NewReader(f) - txt = z - file = file[:len(file)-len(".bz2")] // for error messages - } else { - txt = f - } - lineno := 0 - scanner := bufio.NewScanner(txt) - var ( - str []string - input []string - inStrings bool - re *Regexp - refull *Regexp - nfail int - ncase int - ) - for lineno := 1; scanner.Scan(); lineno++ { - line := scanner.Text() - switch { - case line == "": - t.Fatalf("%s:%d: unexpected blank line", file, lineno) - case line[0] == '#': - continue - case 'A' <= line[0] && line[0] <= 'Z': - // Test name. - t.Logf("%s\n", line) - continue - case line == "strings": - str = str[:0] - inStrings = true - case line == "regexps": - inStrings = false - case line[0] == '"': - q, err := strconv.Unquote(line) - if err != nil { - // Fatal because we'll get out of sync. - t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err) - } - if inStrings { - str = append(str, q) - continue - } - // Is a regexp. - if len(input) != 0 { - t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q) - } - re, err = tryCompile(q) - if err != nil { - if err.Error() == "error parsing regexp: invalid escape sequence: `\\C`" { - // We don't and likely never will support \C; keep going. - continue - } - t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err) - if nfail++; nfail >= 100 { - t.Fatalf("stopping after %d errors", nfail) - } - continue - } - full := `\A(?:` + q + `)\z` - refull, err = tryCompile(full) - if err != nil { - // Fatal because q worked, so this should always work. - t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err) - } - input = str - case line[0] == '-' || '0' <= line[0] && line[0] <= '9': - // A sequence of match results. - ncase++ - if re == nil { - // Failed to compile: skip results. - continue - } - if len(input) == 0 { - t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno) - } - var text string - text, input = input[0], input[1:] - if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) { - // RE2's \B considers every byte position, - // so it sees 'not word boundary' in the - // middle of UTF-8 sequences. This package - // only considers the positions between runes, - // so it disagrees. Skip those cases. - continue - } - res := strings.Split(line, ";") - if len(res) != len(run) { - t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run)) - } - for i := range res { - have, suffix := run[i](re, refull, text) - want := parseResult(t, file, lineno, res[i]) - if !same(have, want) { - t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want) - if nfail++; nfail >= 100 { - t.Fatalf("stopping after %d errors", nfail) - } - continue - } - b, suffix := match[i](re, refull, text) - if b != (want != nil) { - t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b) - if nfail++; nfail >= 100 { - t.Fatalf("stopping after %d errors", nfail) - } - continue - } - } - - default: - t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line) - } - } - if err := scanner.Err(); err != nil { - t.Fatalf("%s:%d: %v", file, lineno, err) - } - if len(input) != 0 { - t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input)) - } - t.Logf("%d cases tested", ncase) -} - -var run = []func(*Regexp, *Regexp, string) ([]int, string){ - runFull, - runPartial, - runFullLongest, - runPartialLongest, -} - -func runFull(re, refull *Regexp, text string) ([]int, string) { - refull.longest = false - return refull.FindStringSubmatchIndex(text), "[full]" -} - -func runPartial(re, refull *Regexp, text string) ([]int, string) { - re.longest = false - return re.FindStringSubmatchIndex(text), "" -} - -func runFullLongest(re, refull *Regexp, text string) ([]int, string) { - refull.longest = true - return refull.FindStringSubmatchIndex(text), "[full,longest]" -} - -func runPartialLongest(re, refull *Regexp, text string) ([]int, string) { - re.longest = true - return re.FindStringSubmatchIndex(text), "[longest]" -} - -var match = []func(*Regexp, *Regexp, string) (bool, string){ - matchFull, - matchPartial, - matchFullLongest, - matchPartialLongest, -} - -func matchFull(re, refull *Regexp, text string) (bool, string) { - refull.longest = false - return refull.MatchString(text), "[full]" -} - -func matchPartial(re, refull *Regexp, text string) (bool, string) { - re.longest = false - return re.MatchString(text), "" -} - -func matchFullLongest(re, refull *Regexp, text string) (bool, string) { - refull.longest = true - return refull.MatchString(text), "[full,longest]" -} - -func matchPartialLongest(re, refull *Regexp, text string) (bool, string) { - re.longest = true - return re.MatchString(text), "[longest]" -} - -func isSingleBytes(s string) bool { - for _, c := range s { - if c >= utf8.RuneSelf { - return false - } - } - return true -} - -func tryCompile(s string) (re *Regexp, err error) { - // Protect against panic during Compile. - defer func() { - if r := recover(); r != nil { - err = fmt.Errorf("panic: %v", r) - } - }() - return Compile(s) -} - -func parseResult(t *testing.T, file string, lineno int, res string) []int { - // A single - indicates no match. - if res == "-" { - return nil - } - // Otherwise, a space-separated list of pairs. - n := 1 - for j := 0; j < len(res); j++ { - if res[j] == ' ' { - n++ - } - } - out := make([]int, 2*n) - i := 0 - n = 0 - for j := 0; j <= len(res); j++ { - if j == len(res) || res[j] == ' ' { - // Process a single pair. - means no submatch. - pair := res[i:j] - if pair == "-" { - out[n] = -1 - out[n+1] = -1 - } else { - k := strings.Index(pair, "-") - if k < 0 { - t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair) - } - lo, err1 := strconv.Atoi(pair[:k]) - hi, err2 := strconv.Atoi(pair[k+1:]) - if err1 != nil || err2 != nil || lo > hi { - t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair) - } - out[n] = lo - out[n+1] = hi - } - n += 2 - i = j + 1 - } - } - return out -} - -func same(x, y []int) bool { - if len(x) != len(y) { - return false - } - for i, xi := range x { - if xi != y[i] { - return false - } - } - return true -} - -// TestFowler runs this package's regexp API against the -// POSIX regular expression tests collected by Glenn Fowler -// at http://www2.research.att.com/~gsf/testregex/. -func TestFowler(t *testing.T) { - files, err := filepath.Glob("testdata/*.dat") - if err != nil { - t.Fatal(err) - } - for _, file := range files { - t.Log(file) - testFowler(t, file) - } -} - -var notab = MustCompilePOSIX(`[^\t]+`) - -func testFowler(t *testing.T, file string) { - f, err := os.Open(file) - if err != nil { - t.Error(err) - return - } - defer f.Close() - b := bufio.NewReader(f) - lineno := 0 - lastRegexp := "" -Reading: - for { - lineno++ - line, err := b.ReadString('\n') - if err != nil { - if err != io.EOF { - t.Errorf("%s:%d: %v", file, lineno, err) - } - break Reading - } - - // http://www2.research.att.com/~gsf/man/man1/testregex.html - // - // INPUT FORMAT - // Input lines may be blank, a comment beginning with #, or a test - // specification. A specification is five fields separated by one - // or more tabs. NULL denotes the empty string and NIL denotes the - // 0 pointer. - if line[0] == '#' || line[0] == '\n' { - continue Reading - } - line = line[:len(line)-1] - field := notab.FindAllString(line, -1) - for i, f := range field { - if f == "NULL" { - field[i] = "" - } - if f == "NIL" { - t.Logf("%s:%d: skip: %s", file, lineno, line) - continue Reading - } - } - if len(field) == 0 { - continue Reading - } - - // Field 1: the regex(3) flags to apply, one character per REG_feature - // flag. The test is skipped if REG_feature is not supported by the - // implementation. If the first character is not [BEASKLP] then the - // specification is a global control line. One or more of [BEASKLP] may be - // specified; the test will be repeated for each mode. - // - // B basic BRE (grep, ed, sed) - // E REG_EXTENDED ERE (egrep) - // A REG_AUGMENTED ARE (egrep with negation) - // S REG_SHELL SRE (sh glob) - // K REG_SHELL|REG_AUGMENTED KRE (ksh glob) - // L REG_LITERAL LRE (fgrep) - // - // a REG_LEFT|REG_RIGHT implicit ^...$ - // b REG_NOTBOL lhs does not match ^ - // c REG_COMMENT ignore space and #...\n - // d REG_SHELL_DOT explicit leading . match - // e REG_NOTEOL rhs does not match $ - // f REG_MULTIPLE multiple \n separated patterns - // g FNM_LEADING_DIR testfnmatch only -- match until / - // h REG_MULTIREF multiple digit backref - // i REG_ICASE ignore case - // j REG_SPAN . matches \n - // k REG_ESCAPE \ to ecape [...] delimiter - // l REG_LEFT implicit ^... - // m REG_MINIMAL minimal match - // n REG_NEWLINE explicit \n match - // o REG_ENCLOSED (|&) magic inside [@|&](...) - // p REG_SHELL_PATH explicit / match - // q REG_DELIMITED delimited pattern - // r REG_RIGHT implicit ...$ - // s REG_SHELL_ESCAPED \ not special - // t REG_MUSTDELIM all delimiters must be specified - // u standard unspecified behavior -- errors not counted - // v REG_CLASS_ESCAPE \ special inside [...] - // w REG_NOSUB no subexpression match array - // x REG_LENIENT let some errors slide - // y REG_LEFT regexec() implicit ^... - // z REG_NULL NULL subexpressions ok - // $ expand C \c escapes in fields 2 and 3 - // / field 2 is a regsubcomp() expression - // = field 3 is a regdecomp() expression - // - // Field 1 control lines: - // - // C set LC_COLLATE and LC_CTYPE to locale in field 2 - // - // ?test ... output field 5 if passed and != EXPECTED, silent otherwise - // &test ... output field 5 if current and previous passed - // |test ... output field 5 if current passed and previous failed - // ; ... output field 2 if previous failed - // {test ... skip if failed until } - // } end of skip - // - // : comment comment copied as output NOTE - // :comment:test :comment: ignored - // N[OTE] comment comment copied as output NOTE - // T[EST] comment comment - // - // number use number for nmatch (20 by default) - flag := field[0] - switch flag[0] { - case '?', '&', '|', ';', '{', '}': - // Ignore all the control operators. - // Just run everything. - flag = flag[1:] - if flag == "" { - continue Reading - } - case ':': - i := strings.Index(flag[1:], ":") - if i < 0 { - t.Logf("skip: %s", line) - continue Reading - } - flag = flag[1+i+1:] - case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - t.Logf("skip: %s", line) - continue Reading - } - - // Can check field count now that we've handled the myriad comment formats. - if len(field) < 4 { - t.Errorf("%s:%d: too few fields: %s", file, lineno, line) - continue Reading - } - - // Expand C escapes (a.k.a. Go escapes). - if strings.Contains(flag, "$") { - f := `"` + field[1] + `"` - if field[1], err = strconv.Unquote(f); err != nil { - t.Errorf("%s:%d: cannot unquote %s", file, lineno, f) - } - f = `"` + field[2] + `"` - if field[2], err = strconv.Unquote(f); err != nil { - t.Errorf("%s:%d: cannot unquote %s", file, lineno, f) - } - } - - // Field 2: the regular expression pattern; SAME uses the pattern from - // the previous specification. - // - if field[1] == "SAME" { - field[1] = lastRegexp - } - lastRegexp = field[1] - - // Field 3: the string to match. - text := field[2] - - // Field 4: the test outcome... - ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3]) - if !ok { - t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3]) - continue Reading - } - - // Field 5: optional comment appended to the report. - - Testing: - // Run test once for each specified capital letter mode that we support. - for _, c := range flag { - pattern := field[1] - syn := syntax.POSIX | syntax.ClassNL - switch c { - default: - continue Testing - case 'E': - // extended regexp (what we support) - case 'L': - // literal - pattern = QuoteMeta(pattern) - } - - for _, c := range flag { - switch c { - case 'i': - syn |= syntax.FoldCase - } - } - - re, err := compile(pattern, syn, true) - if err != nil { - if shouldCompile { - t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern) - } - continue Testing - } - if !shouldCompile { - t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern) - continue Testing - } - match := re.MatchString(text) - if match != shouldMatch { - t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch) - continue Testing - } - have := re.FindStringSubmatchIndex(text) - if (len(have) > 0) != match { - t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, pattern, text, have) - continue Testing - } - if len(have) > len(pos) { - have = have[:len(pos)] - } - if !same(have, pos) { - t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos) - } - } - } -} - -func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) { - // Field 4: the test outcome. This is either one of the posix error - // codes (with REG_ omitted) or the match array, a list of (m,n) - // entries with m and n being first and last+1 positions in the - // field 3 string, or NULL if REG_NOSUB is in effect and success - // is expected. BADPAT is acceptable in place of any regcomp(3) - // error code. The match[] array is initialized to (-2,-2) before - // each test. All array elements from 0 to nmatch-1 must be specified - // in the outcome. Unspecified endpoints (offset -1) are denoted by ?. - // Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a - // matched (?{...}) expression, where x is the text enclosed by {...}, - // o is the expression ordinal counting from 1, and n is the length of - // the unmatched portion of the subject string. If x starts with a - // number then that is the return value of re_execf(), otherwise 0 is - // returned. - switch { - case s == "": - // Match with no position information. - ok = true - compiled = true - matched = true - return - case s == "NOMATCH": - // Match failure. - ok = true - compiled = true - matched = false - return - case 'A' <= s[0] && s[0] <= 'Z': - // All the other error codes are compile errors. - ok = true - compiled = false - return - } - compiled = true - - var x []int - for s != "" { - var end byte = ')' - if len(x)%2 == 0 { - if s[0] != '(' { - ok = false - return - } - s = s[1:] - end = ',' - } - i := 0 - for i < len(s) && s[i] != end { - i++ - } - if i == 0 || i == len(s) { - ok = false - return - } - var v = -1 - var err error - if s[:i] != "?" { - v, err = strconv.Atoi(s[:i]) - if err != nil { - ok = false - return - } - } - x = append(x, v) - s = s[i+1:] - } - if len(x)%2 != 0 { - ok = false - return - } - ok = true - matched = true - pos = x - return -} - -var text []byte - -func makeText(n int) []byte { - if len(text) >= n { - return text[:n] - } - text = make([]byte, n) - x := ^uint32(0) - for i := range text { - x += x - x ^= 1 - if int32(x) < 0 { - x ^= 0x88888eef - } - if x%31 == 0 { - text[i] = '\n' - } else { - text[i] = byte(x%(0x7E+1-0x20) + 0x20) - } - } - return text -} - -func benchmark(b *testing.B, re string, n int) { - r := MustCompile(re) - t := makeText(n) - b.ResetTimer() - b.SetBytes(int64(n)) - for i := 0; i < b.N; i++ { - if r.Match(t) { - b.Fatal("match!") - } - } -} - -const ( - easy0 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ$" - easy1 = "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$" - medium = "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$" - hard = "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$" - parens = "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" + - "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$" -) - -func BenchmarkMatchEasy0_32(b *testing.B) { benchmark(b, easy0, 32<<0) } -func BenchmarkMatchEasy0_1K(b *testing.B) { benchmark(b, easy0, 1<<10) } -func BenchmarkMatchEasy0_32K(b *testing.B) { benchmark(b, easy0, 32<<10) } -func BenchmarkMatchEasy0_1M(b *testing.B) { benchmark(b, easy0, 1<<20) } -func BenchmarkMatchEasy0_32M(b *testing.B) { benchmark(b, easy0, 32<<20) } -func BenchmarkMatchEasy1_32(b *testing.B) { benchmark(b, easy1, 32<<0) } -func BenchmarkMatchEasy1_1K(b *testing.B) { benchmark(b, easy1, 1<<10) } -func BenchmarkMatchEasy1_32K(b *testing.B) { benchmark(b, easy1, 32<<10) } -func BenchmarkMatchEasy1_1M(b *testing.B) { benchmark(b, easy1, 1<<20) } -func BenchmarkMatchEasy1_32M(b *testing.B) { benchmark(b, easy1, 32<<20) } -func BenchmarkMatchMedium_32(b *testing.B) { benchmark(b, medium, 32<<0) } -func BenchmarkMatchMedium_1K(b *testing.B) { benchmark(b, medium, 1<<10) } -func BenchmarkMatchMedium_32K(b *testing.B) { benchmark(b, medium, 32<<10) } -func BenchmarkMatchMedium_1M(b *testing.B) { benchmark(b, medium, 1<<20) } -func BenchmarkMatchMedium_32M(b *testing.B) { benchmark(b, medium, 32<<20) } -func BenchmarkMatchHard_32(b *testing.B) { benchmark(b, hard, 32<<0) } -func BenchmarkMatchHard_1K(b *testing.B) { benchmark(b, hard, 1<<10) } -func BenchmarkMatchHard_32K(b *testing.B) { benchmark(b, hard, 32<<10) } -func BenchmarkMatchHard_1M(b *testing.B) { benchmark(b, hard, 1<<20) } -func BenchmarkMatchHard_32M(b *testing.B) { benchmark(b, hard, 32<<20) } - -func TestLongest(t *testing.T) { - re, err := Compile(`a(|b)`) - if err != nil { - t.Fatal(err) - } - if g, w := re.FindString("ab"), "a"; g != w { - t.Errorf("first match was %q, want %q", g, w) - } - re.Longest() - if g, w := re.FindString("ab"), "ab"; g != w { - t.Errorf("longest match was %q, want %q", g, w) - } -} diff --git a/src/pkg/regexp/find_test.go b/src/pkg/regexp/find_test.go deleted file mode 100644 index e07eb7d5c..000000000 --- a/src/pkg/regexp/find_test.go +++ /dev/null @@ -1,498 +0,0 @@ -// Copyright 2010 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package regexp - -import ( - "fmt" - "strings" - "testing" -) - -// For each pattern/text pair, what is the expected output of each function? -// We can derive the textual results from the indexed results, the non-submatch -// results from the submatched results, the single results from the 'all' results, -// and the byte results from the string results. Therefore the table includes -// only the FindAllStringSubmatchIndex result. -type FindTest struct { - pat string - text string - matches [][]int -} - -func (t FindTest) String() string { - return fmt.Sprintf("pat: %#q text: %#q", t.pat, t.text) -} - -var findTests = []FindTest{ - {``, ``, build(1, 0, 0)}, - {`^abcdefg`, "abcdefg", build(1, 0, 7)}, - {`a+`, "baaab", build(1, 1, 4)}, - {"abcd..", "abcdef", build(1, 0, 6)}, - {`a`, "a", build(1, 0, 1)}, - {`x`, "y", nil}, - {`b`, "abc", build(1, 1, 2)}, - {`.`, "a", build(1, 0, 1)}, - {`.*`, "abcdef", build(1, 0, 6)}, - {`^`, "abcde", build(1, 0, 0)}, - {`$`, "abcde", build(1, 5, 5)}, - {`^abcd$`, "abcd", build(1, 0, 4)}, - {`^bcd'`, "abcdef", nil}, - {`^abcd$`, "abcde", nil}, - {`a+`, "baaab", build(1, 1, 4)}, - {`a*`, "baaab", build(3, 0, 0, 1, 4, 5, 5)}, - {`[a-z]+`, "abcd", build(1, 0, 4)}, - {`[^a-z]+`, "ab1234cd", build(1, 2, 6)}, - {`[a\-\]z]+`, "az]-bcz", build(2, 0, 4, 6, 7)}, - {`[^\n]+`, "abcd\n", build(1, 0, 4)}, - {`[日本語]+`, "日本語日本語", build(1, 0, 18)}, - {`日本語+`, "日本語", build(1, 0, 9)}, - {`日本語+`, "日本語語語語", build(1, 0, 18)}, - {`()`, "", build(1, 0, 0, 0, 0)}, - {`(a)`, "a", build(1, 0, 1, 0, 1)}, - {`(.)(.)`, "日a", build(1, 0, 4, 0, 3, 3, 4)}, - {`(.*)`, "", build(1, 0, 0, 0, 0)}, - {`(.*)`, "abcd", build(1, 0, 4, 0, 4)}, - {`(..)(..)`, "abcd", build(1, 0, 4, 0, 2, 2, 4)}, - {`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)}, - {`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)}, - {`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)}, - {`\a\f\n\r\t\v`, "\a\f\n\r\t\v", build(1, 0, 6)}, - {`[\a\f\n\r\t\v]+`, "\a\f\n\r\t\v", build(1, 0, 6)}, - - {`a*(|(b))c*`, "aacc", build(1, 0, 4, 2, 2, -1, -1)}, - {`(.*).*`, "ab", build(1, 0, 2, 0, 2)}, - {`[.]`, ".", build(1, 0, 1)}, - {`/$`, "/abc/", build(1, 4, 5)}, - {`/$`, "/abc", nil}, - - // multiple matches - {`.`, "abc", build(3, 0, 1, 1, 2, 2, 3)}, - {`(.)`, "abc", build(3, 0, 1, 0, 1, 1, 2, 1, 2, 2, 3, 2, 3)}, - {`.(.)`, "abcd", build(2, 0, 2, 1, 2, 2, 4, 3, 4)}, - {`ab*`, "abbaab", build(3, 0, 3, 3, 4, 4, 6)}, - {`a(b*)`, "abbaab", build(3, 0, 3, 1, 3, 3, 4, 4, 4, 4, 6, 5, 6)}, - - // fixed bugs - {`ab$`, "cab", build(1, 1, 3)}, - {`axxb$`, "axxcb", nil}, - {`data`, "daXY data", build(1, 5, 9)}, - {`da(.)a$`, "daXY data", build(1, 5, 9, 7, 8)}, - {`zx+`, "zzx", build(1, 1, 3)}, - {`ab$`, "abcab", build(1, 3, 5)}, - {`(aa)*$`, "a", build(1, 1, 1, -1, -1)}, - {`(?:.|(?:.a))`, "", nil}, - {`(?:A(?:A|a))`, "Aa", build(1, 0, 2)}, - {`(?:A|(?:A|a))`, "a", build(1, 0, 1)}, - {`(a){0}`, "", build(1, 0, 0, -1, -1)}, - {`(?-s)(?:(?:^).)`, "\n", nil}, - {`(?s)(?:(?:^).)`, "\n", build(1, 0, 1)}, - {`(?:(?:^).)`, "\n", nil}, - {`\b`, "x", build(2, 0, 0, 1, 1)}, - {`\b`, "xx", build(2, 0, 0, 2, 2)}, - {`\b`, "x y", build(4, 0, 0, 1, 1, 2, 2, 3, 3)}, - {`\b`, "xx yy", build(4, 0, 0, 2, 2, 3, 3, 5, 5)}, - {`\B`, "x", nil}, - {`\B`, "xx", build(1, 1, 1)}, - {`\B`, "x y", nil}, - {`\B`, "xx yy", build(2, 1, 1, 4, 4)}, - - // RE2 tests - {`[^\S\s]`, "abcd", nil}, - {`[^\S[:space:]]`, "abcd", nil}, - {`[^\D\d]`, "abcd", nil}, - {`[^\D[:digit:]]`, "abcd", nil}, - {`(?i)\W`, "x", nil}, - {`(?i)\W`, "k", nil}, - {`(?i)\W`, "s", nil}, - - // can backslash-escape any punctuation - {`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`, - `!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)}, - {`[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~]+`, - `!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)}, - {"\\`", "`", build(1, 0, 1)}, - {"[\\`]+", "`", build(1, 0, 1)}, - - // long set of matches (longer than startSize) - { - ".", - "qwertyuiopasdfghjklzxcvbnm1234567890", - build(36, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, - 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, - 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, - 30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36), - }, -} - -// build is a helper to construct a [][]int by extracting n sequences from x. -// This represents n matches with len(x)/n submatches each. -func build(n int, x ...int) [][]int { - ret := make([][]int, n) - runLength := len(x) / n - j := 0 - for i := range ret { - ret[i] = make([]int, runLength) - copy(ret[i], x[j:]) - j += runLength - if j > len(x) { - panic("invalid build entry") - } - } - return ret -} - -// First the simple cases. - -func TestFind(t *testing.T) { - for _, test := range findTests { - re := MustCompile(test.pat) - if re.String() != test.pat { - t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat) - } - result := re.Find([]byte(test.text)) - switch { - case len(test.matches) == 0 && len(result) == 0: - // ok - case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) - case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) - case test.matches != nil && result != nil: - expect := test.text[test.matches[0][0]:test.matches[0][1]] - if expect != string(result) { - t.Errorf("expected %q got %q: %s", expect, result, test) - } - } - } -} - -func TestFindString(t *testing.T) { - for _, test := range findTests { - result := MustCompile(test.pat).FindString(test.text) - switch { - case len(test.matches) == 0 && len(result) == 0: - // ok - case test.matches == nil && result != "": - t.Errorf("expected no match; got one: %s", test) - case test.matches != nil && result == "": - // Tricky because an empty result has two meanings: no match or empty match. - if test.matches[0][0] != test.matches[0][1] { - t.Errorf("expected match; got none: %s", test) - } - case test.matches != nil && result != "": - expect := test.text[test.matches[0][0]:test.matches[0][1]] - if expect != result { - t.Errorf("expected %q got %q: %s", expect, result, test) - } - } - } -} - -func testFindIndex(test *FindTest, result []int, t *testing.T) { - switch { - case len(test.matches) == 0 && len(result) == 0: - // ok - case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) - case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) - case test.matches != nil && result != nil: - expect := test.matches[0] - if expect[0] != result[0] || expect[1] != result[1] { - t.Errorf("expected %v got %v: %s", expect, result, test) - } - } -} - -func TestFindIndex(t *testing.T) { - for _, test := range findTests { - testFindIndex(&test, MustCompile(test.pat).FindIndex([]byte(test.text)), t) - } -} - -func TestFindStringIndex(t *testing.T) { - for _, test := range findTests { - testFindIndex(&test, MustCompile(test.pat).FindStringIndex(test.text), t) - } -} - -func TestFindReaderIndex(t *testing.T) { - for _, test := range findTests { - testFindIndex(&test, MustCompile(test.pat).FindReaderIndex(strings.NewReader(test.text)), t) - } -} - -// Now come the simple All cases. - -func TestFindAll(t *testing.T) { - for _, test := range findTests { - result := MustCompile(test.pat).FindAll([]byte(test.text), -1) - switch { - case test.matches == nil && result == nil: - // ok - case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) - case test.matches != nil && result == nil: - t.Fatalf("expected match; got none: %s", test) - case test.matches != nil && result != nil: - if len(test.matches) != len(result) { - t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) - continue - } - for k, e := range test.matches { - expect := test.text[e[0]:e[1]] - if expect != string(result[k]) { - t.Errorf("match %d: expected %q got %q: %s", k, expect, result[k], test) - } - } - } - } -} - -func TestFindAllString(t *testing.T) { - for _, test := range findTests { - result := MustCompile(test.pat).FindAllString(test.text, -1) - switch { - case test.matches == nil && result == nil: - // ok - case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) - case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) - case test.matches != nil && result != nil: - if len(test.matches) != len(result) { - t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) - continue - } - for k, e := range test.matches { - expect := test.text[e[0]:e[1]] - if expect != result[k] { - t.Errorf("expected %q got %q: %s", expect, result, test) - } - } - } - } -} - -func testFindAllIndex(test *FindTest, result [][]int, t *testing.T) { - switch { - case test.matches == nil && result == nil: - // ok - case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) - case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) - case test.matches != nil && result != nil: - if len(test.matches) != len(result) { - t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) - return - } - for k, e := range test.matches { - if e[0] != result[k][0] || e[1] != result[k][1] { - t.Errorf("match %d: expected %v got %v: %s", k, e, result[k], test) - } - } - } -} - -func TestFindAllIndex(t *testing.T) { - for _, test := range findTests { - testFindAllIndex(&test, MustCompile(test.pat).FindAllIndex([]byte(test.text), -1), t) - } -} - -func TestFindAllStringIndex(t *testing.T) { - for _, test := range findTests { - testFindAllIndex(&test, MustCompile(test.pat).FindAllStringIndex(test.text, -1), t) - } -} - -// Now come the Submatch cases. - -func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, t *testing.T) { - if len(submatches) != len(result)*2 { - t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test) - return - } - for k := 0; k < len(submatches); k += 2 { - if submatches[k] == -1 { - if result[k/2] != nil { - t.Errorf("match %d: expected nil got %q: %s", n, result, test) - } - continue - } - expect := test.text[submatches[k]:submatches[k+1]] - if expect != string(result[k/2]) { - t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test) - return - } - } -} - -func TestFindSubmatch(t *testing.T) { - for _, test := range findTests { - result := MustCompile(test.pat).FindSubmatch([]byte(test.text)) - switch { - case test.matches == nil && result == nil: - // ok - case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) - case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) - case test.matches != nil && result != nil: - testSubmatchBytes(&test, 0, test.matches[0], result, t) - } - } -} - -func testSubmatchString(test *FindTest, n int, submatches []int, result []string, t *testing.T) { - if len(submatches) != len(result)*2 { - t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test) - return - } - for k := 0; k < len(submatches); k += 2 { - if submatches[k] == -1 { - if result[k/2] != "" { - t.Errorf("match %d: expected nil got %q: %s", n, result, test) - } - continue - } - expect := test.text[submatches[k]:submatches[k+1]] - if expect != result[k/2] { - t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test) - return - } - } -} - -func TestFindStringSubmatch(t *testing.T) { - for _, test := range findTests { - result := MustCompile(test.pat).FindStringSubmatch(test.text) - switch { - case test.matches == nil && result == nil: - // ok - case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) - case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) - case test.matches != nil && result != nil: - testSubmatchString(&test, 0, test.matches[0], result, t) - } - } -} - -func testSubmatchIndices(test *FindTest, n int, expect, result []int, t *testing.T) { - if len(expect) != len(result) { - t.Errorf("match %d: expected %d matches; got %d: %s", n, len(expect)/2, len(result)/2, test) - return - } - for k, e := range expect { - if e != result[k] { - t.Errorf("match %d: submatch error: expected %v got %v: %s", n, expect, result, test) - } - } -} - -func testFindSubmatchIndex(test *FindTest, result []int, t *testing.T) { - switch { - case test.matches == nil && result == nil: - // ok - case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) - case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) - case test.matches != nil && result != nil: - testSubmatchIndices(test, 0, test.matches[0], result, t) - } -} - -func TestFindSubmatchIndex(t *testing.T) { - for _, test := range findTests { - testFindSubmatchIndex(&test, MustCompile(test.pat).FindSubmatchIndex([]byte(test.text)), t) - } -} - -func TestFindStringSubmatchIndex(t *testing.T) { - for _, test := range findTests { - testFindSubmatchIndex(&test, MustCompile(test.pat).FindStringSubmatchIndex(test.text), t) - } -} - -func TestFindReaderSubmatchIndex(t *testing.T) { - for _, test := range findTests { - testFindSubmatchIndex(&test, MustCompile(test.pat).FindReaderSubmatchIndex(strings.NewReader(test.text)), t) - } -} - -// Now come the monster AllSubmatch cases. - -func TestFindAllSubmatch(t *testing.T) { - for _, test := range findTests { - result := MustCompile(test.pat).FindAllSubmatch([]byte(test.text), -1) - switch { - case test.matches == nil && result == nil: - // ok - case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) - case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) - case len(test.matches) != len(result): - t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) - case test.matches != nil && result != nil: - for k, match := range test.matches { - testSubmatchBytes(&test, k, match, result[k], t) - } - } - } -} - -func TestFindAllStringSubmatch(t *testing.T) { - for _, test := range findTests { - result := MustCompile(test.pat).FindAllStringSubmatch(test.text, -1) - switch { - case test.matches == nil && result == nil: - // ok - case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) - case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) - case len(test.matches) != len(result): - t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) - case test.matches != nil && result != nil: - for k, match := range test.matches { - testSubmatchString(&test, k, match, result[k], t) - } - } - } -} - -func testFindAllSubmatchIndex(test *FindTest, result [][]int, t *testing.T) { - switch { - case test.matches == nil && result == nil: - // ok - case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) - case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) - case len(test.matches) != len(result): - t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) - case test.matches != nil && result != nil: - for k, match := range test.matches { - testSubmatchIndices(test, k, match, result[k], t) - } - } -} - -func TestFindAllSubmatchIndex(t *testing.T) { - for _, test := range findTests { - testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllSubmatchIndex([]byte(test.text), -1), t) - } -} - -func TestFindAllStringSubmatchIndex(t *testing.T) { - for _, test := range findTests { - testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllStringSubmatchIndex(test.text, -1), t) - } -} diff --git a/src/pkg/regexp/onepass.go b/src/pkg/regexp/onepass.go deleted file mode 100644 index 501fb28af..000000000 --- a/src/pkg/regexp/onepass.go +++ /dev/null @@ -1,582 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. - -package regexp - -import ( - "bytes" - "regexp/syntax" - "sort" - "unicode" -) - -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// "One-pass" regexp execution. -// Some regexps can be analyzed to determine that they never need -// backtracking: they are guaranteed to run in one pass over the string -// without bothering to save all the usual NFA state. -// Detect those and execute them more quickly. - -// A onePassProg is a compiled one-pass regular expression program. -// It is the same as syntax.Prog except for the use of onePassInst. -type onePassProg struct { - Inst []onePassInst - Start int // index of start instruction - NumCap int // number of InstCapture insts in re -} - -// A onePassInst is a single instruction in a one-pass regular expression program. -// It is the same as syntax.Inst except for the new 'Next' field. -type onePassInst struct { - syntax.Inst - Next []uint32 -} - -// OnePassPrefix returns a literal string that all matches for the -// regexp must start with. Complete is true if the prefix -// is the entire match. Pc is the index of the last rune instruction -// in the string. The OnePassPrefix skips over the mandatory -// EmptyBeginText -func onePassPrefix(p *syntax.Prog) (prefix string, complete bool, pc uint32) { - i := &p.Inst[p.Start] - if i.Op != syntax.InstEmptyWidth || (syntax.EmptyOp(i.Arg))&syntax.EmptyBeginText == 0 { - return "", i.Op == syntax.InstMatch, uint32(p.Start) - } - pc = i.Out - i = &p.Inst[pc] - for i.Op == syntax.InstNop { - pc = i.Out - i = &p.Inst[pc] - } - // Avoid allocation of buffer if prefix is empty. - if iop(i) != syntax.InstRune || len(i.Rune) != 1 { - return "", i.Op == syntax.InstMatch, uint32(p.Start) - } - - // Have prefix; gather characters. - var buf bytes.Buffer - for iop(i) == syntax.InstRune && len(i.Rune) == 1 && syntax.Flags(i.Arg)&syntax.FoldCase == 0 { - buf.WriteRune(i.Rune[0]) - pc, i = i.Out, &p.Inst[i.Out] - } - return buf.String(), i.Op == syntax.InstEmptyWidth && (syntax.EmptyOp(i.Arg))&syntax.EmptyBeginText != 0, pc -} - -// OnePassNext selects the next actionable state of the prog, based on the input character. -// It should only be called when i.Op == InstAlt or InstAltMatch, and from the one-pass machine. -// One of the alternates may ultimately lead without input to end of line. If the instruction -// is InstAltMatch the path to the InstMatch is in i.Out, the normal node in i.Next. -func onePassNext(i *onePassInst, r rune) uint32 { - next := i.MatchRunePos(r) - if next >= 0 { - return i.Next[next] - } - if i.Op == syntax.InstAltMatch { - return i.Out - } - return 0 -} - -func iop(i *syntax.Inst) syntax.InstOp { - op := i.Op - switch op { - case syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL: - op = syntax.InstRune - } - return op -} - -// Sparse Array implementation is used as a queueOnePass. -type queueOnePass struct { - sparse []uint32 - dense []uint32 - size, nextIndex uint32 -} - -func (q *queueOnePass) empty() bool { - return q.nextIndex >= q.size -} - -func (q *queueOnePass) next() (n uint32) { - n = q.dense[q.nextIndex] - q.nextIndex++ - return -} - -func (q *queueOnePass) clear() { - q.size = 0 - q.nextIndex = 0 -} - -func (q *queueOnePass) reset() { - q.nextIndex = 0 -} - -func (q *queueOnePass) contains(u uint32) bool { - if u >= uint32(len(q.sparse)) { - return false - } - return q.sparse[u] < q.size && q.dense[q.sparse[u]] == u -} - -func (q *queueOnePass) insert(u uint32) { - if !q.contains(u) { - q.insertNew(u) - } -} - -func (q *queueOnePass) insertNew(u uint32) { - if u >= uint32(len(q.sparse)) { - return - } - q.sparse[u] = q.size - q.dense[q.size] = u - q.size++ -} - -func newQueue(size int) (q *queueOnePass) { - return &queueOnePass{ - sparse: make([]uint32, size), - dense: make([]uint32, size), - } -} - -// mergeRuneSets merges two non-intersecting runesets, and returns the merged result, -// and a NextIp array. The idea is that if a rune matches the OnePassRunes at index -// i, NextIp[i/2] is the target. If the input sets intersect, an empty runeset and a -// NextIp array with the single element mergeFailed is returned. -// The code assumes that both inputs contain ordered and non-intersecting rune pairs. -const mergeFailed = uint32(0xffffffff) - -var ( - noRune = []rune{} - noNext = []uint32{mergeFailed} -) - -func mergeRuneSets(leftRunes, rightRunes *[]rune, leftPC, rightPC uint32) ([]rune, []uint32) { - leftLen := len(*leftRunes) - rightLen := len(*rightRunes) - if leftLen&0x1 != 0 || rightLen&0x1 != 0 { - panic("mergeRuneSets odd length []rune") - } - var ( - lx, rx int - ) - merged := make([]rune, 0) - next := make([]uint32, 0) - ok := true - defer func() { - if !ok { - merged = nil - next = nil - } - }() - - ix := -1 - extend := func(newLow *int, newArray *[]rune, pc uint32) bool { - if ix > 0 && (*newArray)[*newLow] <= merged[ix] { - return false - } - merged = append(merged, (*newArray)[*newLow], (*newArray)[*newLow+1]) - *newLow += 2 - ix += 2 - next = append(next, pc) - return true - } - - for lx < leftLen || rx < rightLen { - switch { - case rx >= rightLen: - ok = extend(&lx, leftRunes, leftPC) - case lx >= leftLen: - ok = extend(&rx, rightRunes, rightPC) - case (*rightRunes)[rx] < (*leftRunes)[lx]: - ok = extend(&rx, rightRunes, rightPC) - default: - ok = extend(&lx, leftRunes, leftPC) - } - if !ok { - return noRune, noNext - } - } - return merged, next -} - -// cleanupOnePass drops working memory, and restores certain shortcut instructions. -func cleanupOnePass(prog *onePassProg, original *syntax.Prog) { - for ix, instOriginal := range original.Inst { - switch instOriginal.Op { - case syntax.InstAlt, syntax.InstAltMatch, syntax.InstRune: - case syntax.InstCapture, syntax.InstEmptyWidth, syntax.InstNop, syntax.InstMatch, syntax.InstFail: - prog.Inst[ix].Next = nil - case syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL: - prog.Inst[ix].Next = nil - prog.Inst[ix] = onePassInst{Inst: instOriginal} - } - } -} - -// onePassCopy creates a copy of the original Prog, as we'll be modifying it -func onePassCopy(prog *syntax.Prog) *onePassProg { - p := &onePassProg{ - Start: prog.Start, - NumCap: prog.NumCap, - } - for _, inst := range prog.Inst { - p.Inst = append(p.Inst, onePassInst{Inst: inst}) - } - - // rewrites one or more common Prog constructs that enable some otherwise - // non-onepass Progs to be onepass. A:BD (for example) means an InstAlt at - // ip A, that points to ips B & C. - // A:BC + B:DA => A:BC + B:CD - // A:BC + B:DC => A:DC + B:DC - for pc := range p.Inst { - switch p.Inst[pc].Op { - default: - continue - case syntax.InstAlt, syntax.InstAltMatch: - // A:Bx + B:Ay - p_A_Other := &p.Inst[pc].Out - p_A_Alt := &p.Inst[pc].Arg - // make sure a target is another Alt - instAlt := p.Inst[*p_A_Alt] - if !(instAlt.Op == syntax.InstAlt || instAlt.Op == syntax.InstAltMatch) { - p_A_Alt, p_A_Other = p_A_Other, p_A_Alt - instAlt = p.Inst[*p_A_Alt] - if !(instAlt.Op == syntax.InstAlt || instAlt.Op == syntax.InstAltMatch) { - continue - } - } - instOther := p.Inst[*p_A_Other] - // Analyzing both legs pointing to Alts is for another day - if instOther.Op == syntax.InstAlt || instOther.Op == syntax.InstAltMatch { - // too complicated - continue - } - // simple empty transition loop - // A:BC + B:DA => A:BC + B:DC - p_B_Alt := &p.Inst[*p_A_Alt].Out - p_B_Other := &p.Inst[*p_A_Alt].Arg - patch := false - if instAlt.Out == uint32(pc) { - patch = true - } else if instAlt.Arg == uint32(pc) { - patch = true - p_B_Alt, p_B_Other = p_B_Other, p_B_Alt - } - if patch { - *p_B_Alt = *p_A_Other - } - - // empty transition to common target - // A:BC + B:DC => A:DC + B:DC - if *p_A_Other == *p_B_Alt { - *p_A_Alt = *p_B_Other - } - } - } - return p -} - -// runeSlice exists to permit sorting the case-folded rune sets. -type runeSlice []rune - -func (p runeSlice) Len() int { return len(p) } -func (p runeSlice) Less(i, j int) bool { return p[i] < p[j] } -func (p runeSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } - -// Sort is a convenience method. -func (p runeSlice) Sort() { - sort.Sort(p) -} - -var anyRuneNotNL = []rune{0, '\n' - 1, '\n' + 1, unicode.MaxRune} -var anyRune = []rune{0, unicode.MaxRune} - -// makeOnePass creates a onepass Prog, if possible. It is possible if at any alt, -// the match engine can always tell which branch to take. The routine may modify -// p if it is turned into a onepass Prog. If it isn't possible for this to be a -// onepass Prog, the Prog notOnePass is returned. makeOnePass is recursive -// to the size of the Prog. -func makeOnePass(p *onePassProg) *onePassProg { - // If the machine is very long, it's not worth the time to check if we can use one pass. - if len(p.Inst) >= 1000 { - return notOnePass - } - - var ( - instQueue = newQueue(len(p.Inst)) - visitQueue = newQueue(len(p.Inst)) - build func(uint32, *queueOnePass) - check func(uint32, map[uint32]bool) bool - onePassRunes = make([][]rune, len(p.Inst)) - ) - build = func(pc uint32, q *queueOnePass) { - if q.contains(pc) { - return - } - inst := p.Inst[pc] - switch inst.Op { - case syntax.InstAlt, syntax.InstAltMatch: - q.insert(inst.Out) - build(inst.Out, q) - q.insert(inst.Arg) - case syntax.InstMatch, syntax.InstFail: - default: - q.insert(inst.Out) - } - } - - // check that paths from Alt instructions are unambiguous, and rebuild the new - // program as a onepass program - check = func(pc uint32, m map[uint32]bool) (ok bool) { - ok = true - inst := &p.Inst[pc] - if visitQueue.contains(pc) { - return - } - visitQueue.insert(pc) - switch inst.Op { - case syntax.InstAlt, syntax.InstAltMatch: - ok = check(inst.Out, m) && check(inst.Arg, m) - // check no-input paths to InstMatch - matchOut := m[inst.Out] - matchArg := m[inst.Arg] - if matchOut && matchArg { - ok = false - break - } - // Match on empty goes in inst.Out - if matchArg { - inst.Out, inst.Arg = inst.Arg, inst.Out - matchOut, matchArg = matchArg, matchOut - } - if matchOut { - m[pc] = true - inst.Op = syntax.InstAltMatch - } - - // build a dispatch operator from the two legs of the alt. - onePassRunes[pc], inst.Next = mergeRuneSets( - &onePassRunes[inst.Out], &onePassRunes[inst.Arg], inst.Out, inst.Arg) - if len(inst.Next) > 0 && inst.Next[0] == mergeFailed { - ok = false - break - } - case syntax.InstCapture, syntax.InstNop: - ok = check(inst.Out, m) - m[pc] = m[inst.Out] - // pass matching runes back through these no-ops. - onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...) - inst.Next = []uint32{} - for i := len(onePassRunes[pc]) / 2; i >= 0; i-- { - inst.Next = append(inst.Next, inst.Out) - } - case syntax.InstEmptyWidth: - ok = check(inst.Out, m) - m[pc] = m[inst.Out] - onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...) - inst.Next = []uint32{} - for i := len(onePassRunes[pc]) / 2; i >= 0; i-- { - inst.Next = append(inst.Next, inst.Out) - } - case syntax.InstMatch, syntax.InstFail: - m[pc] = inst.Op == syntax.InstMatch - break - case syntax.InstRune: - ok = check(inst.Out, m) - m[pc] = false - if len(inst.Next) > 0 { - break - } - if len(inst.Rune) == 0 { - onePassRunes[pc] = []rune{} - inst.Next = []uint32{inst.Out} - break - } - runes := make([]rune, 0) - if len(inst.Rune) == 1 && syntax.Flags(inst.Arg)&syntax.FoldCase != 0 { - r0 := inst.Rune[0] - runes = append(runes, r0, r0) - for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) { - runes = append(runes, r1, r1) - } - sort.Sort(runeSlice(runes)) - } else { - runes = append(runes, inst.Rune...) - } - onePassRunes[pc] = runes - inst.Next = []uint32{} - for i := len(onePassRunes[pc]) / 2; i >= 0; i-- { - inst.Next = append(inst.Next, inst.Out) - } - inst.Op = syntax.InstRune - case syntax.InstRune1: - ok = check(inst.Out, m) - m[pc] = false - if len(inst.Next) > 0 { - break - } - runes := []rune{} - // expand case-folded runes - if syntax.Flags(inst.Arg)&syntax.FoldCase != 0 { - r0 := inst.Rune[0] - runes = append(runes, r0, r0) - for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) { - runes = append(runes, r1, r1) - } - sort.Sort(runeSlice(runes)) - } else { - runes = append(runes, inst.Rune[0], inst.Rune[0]) - } - onePassRunes[pc] = runes - inst.Next = []uint32{} - for i := len(onePassRunes[pc]) / 2; i >= 0; i-- { - inst.Next = append(inst.Next, inst.Out) - } - inst.Op = syntax.InstRune - case syntax.InstRuneAny: - ok = check(inst.Out, m) - m[pc] = false - if len(inst.Next) > 0 { - break - } - onePassRunes[pc] = append([]rune{}, anyRune...) - inst.Next = []uint32{inst.Out} - case syntax.InstRuneAnyNotNL: - ok = check(inst.Out, m) - m[pc] = false - if len(inst.Next) > 0 { - break - } - onePassRunes[pc] = append([]rune{}, anyRuneNotNL...) - inst.Next = []uint32{} - for i := len(onePassRunes[pc]) / 2; i >= 0; i-- { - inst.Next = append(inst.Next, inst.Out) - } - } - return - } - - instQueue.clear() - instQueue.insert(uint32(p.Start)) - m := make(map[uint32]bool, len(p.Inst)) - for !instQueue.empty() { - pc := instQueue.next() - inst := p.Inst[pc] - visitQueue.clear() - if !check(uint32(pc), m) { - p = notOnePass - break - } - switch inst.Op { - case syntax.InstAlt, syntax.InstAltMatch: - instQueue.insert(inst.Out) - instQueue.insert(inst.Arg) - case syntax.InstCapture, syntax.InstEmptyWidth, syntax.InstNop: - instQueue.insert(inst.Out) - case syntax.InstMatch: - case syntax.InstFail: - case syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL: - default: - } - } - if p != notOnePass { - for i, _ := range p.Inst { - p.Inst[i].Rune = onePassRunes[i] - } - } - return p -} - -// walk visits each Inst in the prog once, and applies the argument -// function(ip, next), in pre-order. -func walk(prog *syntax.Prog, funcs ...func(ip, next uint32)) { - var walk1 func(uint32) - progQueue := newQueue(len(prog.Inst)) - walk1 = func(ip uint32) { - if progQueue.contains(ip) { - return - } - progQueue.insert(ip) - inst := prog.Inst[ip] - switch inst.Op { - case syntax.InstAlt, syntax.InstAltMatch: - for _, f := range funcs { - f(ip, inst.Out) - f(ip, inst.Arg) - } - walk1(inst.Out) - walk1(inst.Arg) - default: - for _, f := range funcs { - f(ip, inst.Out) - } - walk1(inst.Out) - } - } - walk1(uint32(prog.Start)) -} - -// find returns the Insts that match the argument predicate function -func find(prog *syntax.Prog, f func(*syntax.Prog, int) bool) (matches []uint32) { - matches = []uint32{} - - for ip := range prog.Inst { - if f(prog, ip) { - matches = append(matches, uint32(ip)) - } - } - return -} - -var notOnePass *onePassProg = nil - -// compileOnePass returns a new *syntax.Prog suitable for onePass execution if the original Prog -// can be recharacterized as a one-pass regexp program, or syntax.notOnePass if the -// Prog cannot be converted. For a one pass prog, the fundamental condition that must -// be true is: at any InstAlt, there must be no ambiguity about what branch to take. -func compileOnePass(prog *syntax.Prog) (p *onePassProg) { - if prog.Start == 0 { - return notOnePass - } - // onepass regexp is anchored - if prog.Inst[prog.Start].Op != syntax.InstEmptyWidth || - syntax.EmptyOp(prog.Inst[prog.Start].Arg)&syntax.EmptyBeginText != syntax.EmptyBeginText { - return notOnePass - } - // every instruction leading to InstMatch must be EmptyEndText - for _, inst := range prog.Inst { - opOut := prog.Inst[inst.Out].Op - switch inst.Op { - default: - if opOut == syntax.InstMatch { - return notOnePass - } - case syntax.InstAlt, syntax.InstAltMatch: - if opOut == syntax.InstMatch || prog.Inst[inst.Arg].Op == syntax.InstMatch { - return notOnePass - } - case syntax.InstEmptyWidth: - if opOut == syntax.InstMatch { - if syntax.EmptyOp(inst.Arg)&syntax.EmptyEndText == syntax.EmptyEndText { - continue - } - return notOnePass - } - } - } - // Creates a slightly optimized copy of the original Prog - // that cleans up some Prog idioms that block valid onepass programs - p = onePassCopy(prog) - - // checkAmbiguity on InstAlts, build onepass Prog if possible - p = makeOnePass(p) - - if p != notOnePass { - cleanupOnePass(p, prog) - } - return p -} diff --git a/src/pkg/regexp/onepass_test.go b/src/pkg/regexp/onepass_test.go deleted file mode 100644 index 7b2beea67..000000000 --- a/src/pkg/regexp/onepass_test.go +++ /dev/null @@ -1,208 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package regexp - -import ( - "reflect" - "regexp/syntax" - "testing" -) - -var runeMergeTests = []struct { - left, right, merged []rune - next []uint32 - leftPC, rightPC uint32 -}{ - { - // empty rhs - []rune{69, 69}, - []rune{}, - []rune{69, 69}, - []uint32{1}, - 1, 2, - }, - { - // identical runes, identical targets - []rune{69, 69}, - []rune{69, 69}, - []rune{}, - []uint32{mergeFailed}, - 1, 1, - }, - { - // identical runes, different targets - []rune{69, 69}, - []rune{69, 69}, - []rune{}, - []uint32{mergeFailed}, - 1, 2, - }, - { - // append right-first - []rune{69, 69}, - []rune{71, 71}, - []rune{69, 69, 71, 71}, - []uint32{1, 2}, - 1, 2, - }, - { - // append, left-first - []rune{71, 71}, - []rune{69, 69}, - []rune{69, 69, 71, 71}, - []uint32{2, 1}, - 1, 2, - }, - { - // successful interleave - []rune{60, 60, 71, 71, 101, 101}, - []rune{69, 69, 88, 88}, - []rune{60, 60, 69, 69, 71, 71, 88, 88, 101, 101}, - []uint32{1, 2, 1, 2, 1}, - 1, 2, - }, - { - // left surrounds right - []rune{69, 74}, - []rune{71, 71}, - []rune{}, - []uint32{mergeFailed}, - 1, 2, - }, - { - // right surrounds left - []rune{69, 74}, - []rune{68, 75}, - []rune{}, - []uint32{mergeFailed}, - 1, 2, - }, - { - // overlap at interval begin - []rune{69, 74}, - []rune{74, 75}, - []rune{}, - []uint32{mergeFailed}, - 1, 2, - }, - { - // overlap ar interval end - []rune{69, 74}, - []rune{65, 69}, - []rune{}, - []uint32{mergeFailed}, - 1, 2, - }, - { - // overlap from above - []rune{69, 74}, - []rune{71, 74}, - []rune{}, - []uint32{mergeFailed}, - 1, 2, - }, - { - // overlap from below - []rune{69, 74}, - []rune{65, 71}, - []rune{}, - []uint32{mergeFailed}, - 1, 2, - }, - { - // out of order []rune - []rune{69, 74, 60, 65}, - []rune{66, 67}, - []rune{}, - []uint32{mergeFailed}, - 1, 2, - }, -} - -func TestMergeRuneSet(t *testing.T) { - for ix, test := range runeMergeTests { - merged, next := mergeRuneSets(&test.left, &test.right, test.leftPC, test.rightPC) - if !reflect.DeepEqual(merged, test.merged) { - t.Errorf("mergeRuneSet :%d (%v, %v) merged\n have\n%v\nwant\n%v", ix, test.left, test.right, merged, test.merged) - } - if !reflect.DeepEqual(next, test.next) { - t.Errorf("mergeRuneSet :%d(%v, %v) next\n have\n%v\nwant\n%v", ix, test.left, test.right, next, test.next) - } - } -} - -const noStr = `!` - -var onePass = &onePassProg{} - -var onePassTests = []struct { - re string - onePass *onePassProg - prog string -}{ - {`^(?:a|(?:a*))$`, notOnePass, noStr}, - {`^(?:(a)|(?:a*))$`, notOnePass, noStr}, - {`^(?:(?:(?:.(?:$))?))$`, onePass, `a`}, - {`^abcd$`, onePass, `abcd`}, - {`^abcd$`, onePass, `abcde`}, - {`^(?:(?:a{0,})*?)$`, onePass, `a`}, - {`^(?:(?:a+)*)$`, onePass, ``}, - {`^(?:(?:a|(?:aa)))$`, onePass, ``}, - {`^(?:[^\s\S])$`, onePass, ``}, - {`^(?:(?:a{3,4}){0,})$`, notOnePass, `aaaaaa`}, - {`^(?:(?:a+)*)$`, onePass, `a`}, - {`^(?:(?:(?:a*)+))$`, onePass, noStr}, - {`^(?:(?:a+)*)$`, onePass, ``}, - {`^[a-c]+$`, onePass, `abc`}, - {`^[a-c]*$`, onePass, `abcdabc`}, - {`^(?:a*)$`, onePass, `aaaaaaa`}, - {`^(?:(?:aa)|a)$`, onePass, `a`}, - {`^[a-c]*`, notOnePass, `abcdabc`}, - {`^[a-c]*$`, onePass, `abc`}, - {`^...$`, onePass, ``}, - {`^(?:a|(?:aa))$`, onePass, `a`}, - {`^[a-c]*`, notOnePass, `abcabc`}, - {`^a((b))c$`, onePass, noStr}, - {`^a.[l-nA-Cg-j]?e$`, onePass, noStr}, - {`^a((b))$`, onePass, noStr}, - {`^a(?:(b)|(c))c$`, onePass, noStr}, - {`^a(?:(b*)|(c))c$`, notOnePass, noStr}, - {`^a(?:b|c)$`, onePass, noStr}, - {`^a(?:b?|c)$`, onePass, noStr}, - {`^a(?:b?|c?)$`, notOnePass, noStr}, - {`^a(?:b?|c+)$`, onePass, noStr}, - {`^a(?:b+|(bc))d$`, notOnePass, noStr}, - {`^a(?:bc)+$`, onePass, noStr}, - {`^a(?:[bcd])+$`, onePass, noStr}, - {`^a((?:[bcd])+)$`, onePass, noStr}, - {`^a(:?b|c)*d$`, onePass, `abbbccbbcbbd"`}, - {`^.bc(d|e)*$`, onePass, `abcddddddeeeededd`}, - {`^(?:(?:aa)|.)$`, notOnePass, `a`}, - {`^(?:(?:a{1,2}){1,2})$`, notOnePass, `aaaa`}, -} - -func TestCompileOnePass(t *testing.T) { - var ( - p *syntax.Prog - re *syntax.Regexp - err error - ) - for _, test := range onePassTests { - if re, err = syntax.Parse(test.re, syntax.Perl); err != nil { - t.Errorf("Parse(%q) got err:%s, want success", test.re, err) - continue - } - // needs to be done before compile... - re = re.Simplify() - if p, err = syntax.Compile(re); err != nil { - t.Errorf("Compile(%q) got err:%s, want success", test.re, err) - continue - } - onePass = compileOnePass(p) - if (onePass == notOnePass) != (test.onePass == notOnePass) { - t.Errorf("CompileOnePass(%q) got %v, expected %v", test.re, onePass, test.onePass) - } - } -} diff --git a/src/pkg/regexp/regexp.go b/src/pkg/regexp/regexp.go deleted file mode 100644 index 0b8336a04..000000000 --- a/src/pkg/regexp/regexp.go +++ /dev/null @@ -1,1120 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package regexp implements regular expression search. -// -// The syntax of the regular expressions accepted is the same -// general syntax used by Perl, Python, and other languages. -// More precisely, it is the syntax accepted by RE2 and described at -// http://code.google.com/p/re2/wiki/Syntax, except for \C. -// For an overview of the syntax, run -// godoc regexp/syntax -// -// The regexp implementation provided by this package is -// guaranteed to run in time linear in the size of the input. -// (This is a property not guaranteed by most open source -// implementations of regular expressions.) For more information -// about this property, see -// http://swtch.com/~rsc/regexp/regexp1.html -// or any book about automata theory. -// -// All characters are UTF-8-encoded code points. -// -// There are 16 methods of Regexp that match a regular expression and identify -// the matched text. Their names are matched by this regular expression: -// -// Find(All)?(String)?(Submatch)?(Index)? -// -// If 'All' is present, the routine matches successive non-overlapping -// matches of the entire expression. Empty matches abutting a preceding -// match are ignored. The return value is a slice containing the successive -// return values of the corresponding non-'All' routine. These routines take -// an extra integer argument, n; if n >= 0, the function returns at most n -// matches/submatches. -// -// If 'String' is present, the argument is a string; otherwise it is a slice -// of bytes; return values are adjusted as appropriate. -// -// If 'Submatch' is present, the return value is a slice identifying the -// successive submatches of the expression. Submatches are matches of -// parenthesized subexpressions (also known as capturing groups) within the -// regular expression, numbered from left to right in order of opening -// parenthesis. Submatch 0 is the match of the entire expression, submatch 1 -// the match of the first parenthesized subexpression, and so on. -// -// If 'Index' is present, matches and submatches are identified by byte index -// pairs within the input string: result[2*n:2*n+1] identifies the indexes of -// the nth submatch. The pair for n==0 identifies the match of the entire -// expression. If 'Index' is not present, the match is identified by the -// text of the match/submatch. If an index is negative, it means that -// subexpression did not match any string in the input. -// -// There is also a subset of the methods that can be applied to text read -// from a RuneReader: -// -// MatchReader, FindReaderIndex, FindReaderSubmatchIndex -// -// This set may grow. Note that regular expression matches may need to -// examine text beyond the text returned by a match, so the methods that -// match text from a RuneReader may read arbitrarily far into the input -// before returning. -// -// (There are a few other methods that do not match this pattern.) -// -package regexp - -import ( - "bytes" - "io" - "regexp/syntax" - "strconv" - "strings" - "sync" - "unicode" - "unicode/utf8" -) - -var debug = false - -// Regexp is the representation of a compiled regular expression. -// A Regexp is safe for concurrent use by multiple goroutines. -type Regexp struct { - // read-only after Compile - expr string // as passed to Compile - prog *syntax.Prog // compiled program - onepass *onePassProg // onpass program or nil - prefix string // required prefix in unanchored matches - prefixBytes []byte // prefix, as a []byte - prefixComplete bool // prefix is the entire regexp - prefixRune rune // first rune in prefix - prefixEnd uint32 // pc for last rune in prefix - cond syntax.EmptyOp // empty-width conditions required at start of match - numSubexp int - subexpNames []string - longest bool - - // cache of machines for running regexp - mu sync.Mutex - machine []*machine -} - -// String returns the source text used to compile the regular expression. -func (re *Regexp) String() string { - return re.expr -} - -// Compile parses a regular expression and returns, if successful, -// a Regexp object that can be used to match against text. -// -// When matching against text, the regexp returns a match that -// begins as early as possible in the input (leftmost), and among those -// it chooses the one that a backtracking search would have found first. -// This so-called leftmost-first matching is the same semantics -// that Perl, Python, and other implementations use, although this -// package implements it without the expense of backtracking. -// For POSIX leftmost-longest matching, see CompilePOSIX. -func Compile(expr string) (*Regexp, error) { - return compile(expr, syntax.Perl, false) -} - -// CompilePOSIX is like Compile but restricts the regular expression -// to POSIX ERE (egrep) syntax and changes the match semantics to -// leftmost-longest. -// -// That is, when matching against text, the regexp returns a match that -// begins as early as possible in the input (leftmost), and among those -// it chooses a match that is as long as possible. -// This so-called leftmost-longest matching is the same semantics -// that early regular expression implementations used and that POSIX -// specifies. -// -// However, there can be multiple leftmost-longest matches, with different -// submatch choices, and here this package diverges from POSIX. -// Among the possible leftmost-longest matches, this package chooses -// the one that a backtracking search would have found first, while POSIX -// specifies that the match be chosen to maximize the length of the first -// subexpression, then the second, and so on from left to right. -// The POSIX rule is computationally prohibitive and not even well-defined. -// See http://swtch.com/~rsc/regexp/regexp2.html#posix for details. -func CompilePOSIX(expr string) (*Regexp, error) { - return compile(expr, syntax.POSIX, true) -} - -// Longest makes future searches prefer the leftmost-longest match. -// That is, when matching against text, the regexp returns a match that -// begins as early as possible in the input (leftmost), and among those -// it chooses a match that is as long as possible. -func (re *Regexp) Longest() { - re.longest = true -} - -func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) { - re, err := syntax.Parse(expr, mode) - if err != nil { - return nil, err - } - maxCap := re.MaxCap() - capNames := re.CapNames() - - re = re.Simplify() - prog, err := syntax.Compile(re) - if err != nil { - return nil, err - } - regexp := &Regexp{ - expr: expr, - prog: prog, - onepass: compileOnePass(prog), - numSubexp: maxCap, - subexpNames: capNames, - cond: prog.StartCond(), - longest: longest, - } - if regexp.onepass == notOnePass { - regexp.prefix, regexp.prefixComplete = prog.Prefix() - } else { - regexp.prefix, regexp.prefixComplete, regexp.prefixEnd = onePassPrefix(prog) - } - if regexp.prefix != "" { - // TODO(rsc): Remove this allocation by adding - // IndexString to package bytes. - regexp.prefixBytes = []byte(regexp.prefix) - regexp.prefixRune, _ = utf8.DecodeRuneInString(regexp.prefix) - } - return regexp, nil -} - -// get returns a machine to use for matching re. -// It uses the re's machine cache if possible, to avoid -// unnecessary allocation. -func (re *Regexp) get() *machine { - re.mu.Lock() - if n := len(re.machine); n > 0 { - z := re.machine[n-1] - re.machine = re.machine[:n-1] - re.mu.Unlock() - return z - } - re.mu.Unlock() - z := progMachine(re.prog, re.onepass) - z.re = re - return z -} - -// put returns a machine to the re's machine cache. -// There is no attempt to limit the size of the cache, so it will -// grow to the maximum number of simultaneous matches -// run using re. (The cache empties when re gets garbage collected.) -func (re *Regexp) put(z *machine) { - re.mu.Lock() - re.machine = append(re.machine, z) - re.mu.Unlock() -} - -// MustCompile is like Compile but panics if the expression cannot be parsed. -// It simplifies safe initialization of global variables holding compiled regular -// expressions. -func MustCompile(str string) *Regexp { - regexp, error := Compile(str) - if error != nil { - panic(`regexp: Compile(` + quote(str) + `): ` + error.Error()) - } - return regexp -} - -// MustCompilePOSIX is like CompilePOSIX but panics if the expression cannot be parsed. -// It simplifies safe initialization of global variables holding compiled regular -// expressions. -func MustCompilePOSIX(str string) *Regexp { - regexp, error := CompilePOSIX(str) - if error != nil { - panic(`regexp: CompilePOSIX(` + quote(str) + `): ` + error.Error()) - } - return regexp -} - -func quote(s string) string { - if strconv.CanBackquote(s) { - return "`" + s + "`" - } - return strconv.Quote(s) -} - -// NumSubexp returns the number of parenthesized subexpressions in this Regexp. -func (re *Regexp) NumSubexp() int { - return re.numSubexp -} - -// SubexpNames returns the names of the parenthesized subexpressions -// in this Regexp. The name for the first sub-expression is names[1], -// so that if m is a match slice, the name for m[i] is SubexpNames()[i]. -// Since the Regexp as a whole cannot be named, names[0] is always -// the empty string. The slice should not be modified. -func (re *Regexp) SubexpNames() []string { - return re.subexpNames -} - -const endOfText rune = -1 - -// input abstracts different representations of the input text. It provides -// one-character lookahead. -type input interface { - step(pos int) (r rune, width int) // advance one rune - canCheckPrefix() bool // can we look ahead without losing info? - hasPrefix(re *Regexp) bool - index(re *Regexp, pos int) int - context(pos int) syntax.EmptyOp -} - -// inputString scans a string. -type inputString struct { - str string -} - -func (i *inputString) step(pos int) (rune, int) { - if pos < len(i.str) { - c := i.str[pos] - if c < utf8.RuneSelf { - return rune(c), 1 - } - return utf8.DecodeRuneInString(i.str[pos:]) - } - return endOfText, 0 -} - -func (i *inputString) canCheckPrefix() bool { - return true -} - -func (i *inputString) hasPrefix(re *Regexp) bool { - return strings.HasPrefix(i.str, re.prefix) -} - -func (i *inputString) index(re *Regexp, pos int) int { - return strings.Index(i.str[pos:], re.prefix) -} - -func (i *inputString) context(pos int) syntax.EmptyOp { - r1, r2 := endOfText, endOfText - if pos > 0 && pos <= len(i.str) { - r1, _ = utf8.DecodeLastRuneInString(i.str[:pos]) - } - if pos < len(i.str) { - r2, _ = utf8.DecodeRuneInString(i.str[pos:]) - } - return syntax.EmptyOpContext(r1, r2) -} - -// inputBytes scans a byte slice. -type inputBytes struct { - str []byte -} - -func (i *inputBytes) step(pos int) (rune, int) { - if pos < len(i.str) { - c := i.str[pos] - if c < utf8.RuneSelf { - return rune(c), 1 - } - return utf8.DecodeRune(i.str[pos:]) - } - return endOfText, 0 -} - -func (i *inputBytes) canCheckPrefix() bool { - return true -} - -func (i *inputBytes) hasPrefix(re *Regexp) bool { - return bytes.HasPrefix(i.str, re.prefixBytes) -} - -func (i *inputBytes) index(re *Regexp, pos int) int { - return bytes.Index(i.str[pos:], re.prefixBytes) -} - -func (i *inputBytes) context(pos int) syntax.EmptyOp { - r1, r2 := endOfText, endOfText - if pos > 0 && pos <= len(i.str) { - r1, _ = utf8.DecodeLastRune(i.str[:pos]) - } - if pos < len(i.str) { - r2, _ = utf8.DecodeRune(i.str[pos:]) - } - return syntax.EmptyOpContext(r1, r2) -} - -// inputReader scans a RuneReader. -type inputReader struct { - r io.RuneReader - atEOT bool - pos int -} - -func (i *inputReader) step(pos int) (rune, int) { - if !i.atEOT && pos != i.pos { - return endOfText, 0 - - } - r, w, err := i.r.ReadRune() - if err != nil { - i.atEOT = true - return endOfText, 0 - } - i.pos += w - return r, w -} - -func (i *inputReader) canCheckPrefix() bool { - return false -} - -func (i *inputReader) hasPrefix(re *Regexp) bool { - return false -} - -func (i *inputReader) index(re *Regexp, pos int) int { - return -1 -} - -func (i *inputReader) context(pos int) syntax.EmptyOp { - return 0 -} - -// LiteralPrefix returns a literal string that must begin any match -// of the regular expression re. It returns the boolean true if the -// literal string comprises the entire regular expression. -func (re *Regexp) LiteralPrefix() (prefix string, complete bool) { - return re.prefix, re.prefixComplete -} - -// MatchReader reports whether the Regexp matches the text read by the -// RuneReader. -func (re *Regexp) MatchReader(r io.RuneReader) bool { - return re.doExecute(r, nil, "", 0, 0) != nil -} - -// MatchString reports whether the Regexp matches the string s. -func (re *Regexp) MatchString(s string) bool { - return re.doExecute(nil, nil, s, 0, 0) != nil -} - -// Match reports whether the Regexp matches the byte slice b. -func (re *Regexp) Match(b []byte) bool { - return re.doExecute(nil, b, "", 0, 0) != nil -} - -// MatchReader checks whether a textual regular expression matches the text -// read by the RuneReader. More complicated queries need to use Compile and -// the full Regexp interface. -func MatchReader(pattern string, r io.RuneReader) (matched bool, err error) { - re, err := Compile(pattern) - if err != nil { - return false, err - } - return re.MatchReader(r), nil -} - -// MatchString checks whether a textual regular expression -// matches a string. More complicated queries need -// to use Compile and the full Regexp interface. -func MatchString(pattern string, s string) (matched bool, err error) { - re, err := Compile(pattern) - if err != nil { - return false, err - } - return re.MatchString(s), nil -} - -// Match checks whether a textual regular expression -// matches a byte slice. More complicated queries need -// to use Compile and the full Regexp interface. -func Match(pattern string, b []byte) (matched bool, err error) { - re, err := Compile(pattern) - if err != nil { - return false, err - } - return re.Match(b), nil -} - -// ReplaceAllString returns a copy of src, replacing matches of the Regexp -// with the replacement string repl. Inside repl, $ signs are interpreted as -// in Expand, so for instance $1 represents the text of the first submatch. -func (re *Regexp) ReplaceAllString(src, repl string) string { - n := 2 - if strings.Index(repl, "$") >= 0 { - n = 2 * (re.numSubexp + 1) - } - b := re.replaceAll(nil, src, n, func(dst []byte, match []int) []byte { - return re.expand(dst, repl, nil, src, match) - }) - return string(b) -} - -// ReplaceAllStringLiteral returns a copy of src, replacing matches of the Regexp -// with the replacement string repl. The replacement repl is substituted directly, -// without using Expand. -func (re *Regexp) ReplaceAllLiteralString(src, repl string) string { - return string(re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte { - return append(dst, repl...) - })) -} - -// ReplaceAllStringFunc returns a copy of src in which all matches of the -// Regexp have been replaced by the return value of function repl applied -// to the matched substring. The replacement returned by repl is substituted -// directly, without using Expand. -func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string { - b := re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte { - return append(dst, repl(src[match[0]:match[1]])...) - }) - return string(b) -} - -func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst []byte, m []int) []byte) []byte { - lastMatchEnd := 0 // end position of the most recent match - searchPos := 0 // position where we next look for a match - var buf []byte - var endPos int - if bsrc != nil { - endPos = len(bsrc) - } else { - endPos = len(src) - } - for searchPos <= endPos { - a := re.doExecute(nil, bsrc, src, searchPos, nmatch) - if len(a) == 0 { - break // no more matches - } - - // Copy the unmatched characters before this match. - if bsrc != nil { - buf = append(buf, bsrc[lastMatchEnd:a[0]]...) - } else { - buf = append(buf, src[lastMatchEnd:a[0]]...) - } - - // Now insert a copy of the replacement string, but not for a - // match of the empty string immediately after another match. - // (Otherwise, we get double replacement for patterns that - // match both empty and nonempty strings.) - if a[1] > lastMatchEnd || a[0] == 0 { - buf = repl(buf, a) - } - lastMatchEnd = a[1] - - // Advance past this match; always advance at least one character. - var width int - if bsrc != nil { - _, width = utf8.DecodeRune(bsrc[searchPos:]) - } else { - _, width = utf8.DecodeRuneInString(src[searchPos:]) - } - if searchPos+width > a[1] { - searchPos += width - } else if searchPos+1 > a[1] { - // This clause is only needed at the end of the input - // string. In that case, DecodeRuneInString returns width=0. - searchPos++ - } else { - searchPos = a[1] - } - } - - // Copy the unmatched characters after the last match. - if bsrc != nil { - buf = append(buf, bsrc[lastMatchEnd:]...) - } else { - buf = append(buf, src[lastMatchEnd:]...) - } - - return buf -} - -// ReplaceAll returns a copy of src, replacing matches of the Regexp -// with the replacement text repl. Inside repl, $ signs are interpreted as -// in Expand, so for instance $1 represents the text of the first submatch. -func (re *Regexp) ReplaceAll(src, repl []byte) []byte { - n := 2 - if bytes.IndexByte(repl, '$') >= 0 { - n = 2 * (re.numSubexp + 1) - } - srepl := "" - b := re.replaceAll(src, "", n, func(dst []byte, match []int) []byte { - if len(srepl) != len(repl) { - srepl = string(repl) - } - return re.expand(dst, srepl, src, "", match) - }) - return b -} - -// ReplaceAllLiteral returns a copy of src, replacing matches of the Regexp -// with the replacement bytes repl. The replacement repl is substituted directly, -// without using Expand. -func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte { - return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte { - return append(dst, repl...) - }) -} - -// ReplaceAllFunc returns a copy of src in which all matches of the -// Regexp have been replaced by the return value of function repl applied -// to the matched byte slice. The replacement returned by repl is substituted -// directly, without using Expand. -func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte { - return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte { - return append(dst, repl(src[match[0]:match[1]])...) - }) -} - -var specialBytes = []byte(`\.+*?()|[]{}^$`) - -func special(b byte) bool { - return bytes.IndexByte(specialBytes, b) >= 0 -} - -// QuoteMeta returns a string that quotes all regular expression metacharacters -// inside the argument text; the returned string is a regular expression matching -// the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`. -func QuoteMeta(s string) string { - b := make([]byte, 2*len(s)) - - // A byte loop is correct because all metacharacters are ASCII. - j := 0 - for i := 0; i < len(s); i++ { - if special(s[i]) { - b[j] = '\\' - j++ - } - b[j] = s[i] - j++ - } - return string(b[0:j]) -} - -// The number of capture values in the program may correspond -// to fewer capturing expressions than are in the regexp. -// For example, "(a){0}" turns into an empty program, so the -// maximum capture in the program is 0 but we need to return -// an expression for \1. Pad appends -1s to the slice a as needed. -func (re *Regexp) pad(a []int) []int { - if a == nil { - // No match. - return nil - } - n := (1 + re.numSubexp) * 2 - for len(a) < n { - a = append(a, -1) - } - return a -} - -// Find matches in slice b if b is non-nil, otherwise find matches in string s. -func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) { - var end int - if b == nil { - end = len(s) - } else { - end = len(b) - } - - for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; { - matches := re.doExecute(nil, b, s, pos, re.prog.NumCap) - if len(matches) == 0 { - break - } - - accept := true - if matches[1] == pos { - // We've found an empty match. - if matches[0] == prevMatchEnd { - // We don't allow an empty match right - // after a previous match, so ignore it. - accept = false - } - var width int - // TODO: use step() - if b == nil { - _, width = utf8.DecodeRuneInString(s[pos:end]) - } else { - _, width = utf8.DecodeRune(b[pos:end]) - } - if width > 0 { - pos += width - } else { - pos = end + 1 - } - } else { - pos = matches[1] - } - prevMatchEnd = matches[1] - - if accept { - deliver(re.pad(matches)) - i++ - } - } -} - -// Find returns a slice holding the text of the leftmost match in b of the regular expression. -// A return value of nil indicates no match. -func (re *Regexp) Find(b []byte) []byte { - a := re.doExecute(nil, b, "", 0, 2) - if a == nil { - return nil - } - return b[a[0]:a[1]] -} - -// FindIndex returns a two-element slice of integers defining the location of -// the leftmost match in b of the regular expression. The match itself is at -// b[loc[0]:loc[1]]. -// A return value of nil indicates no match. -func (re *Regexp) FindIndex(b []byte) (loc []int) { - a := re.doExecute(nil, b, "", 0, 2) - if a == nil { - return nil - } - return a[0:2] -} - -// FindString returns a string holding the text of the leftmost match in s of the regular -// expression. If there is no match, the return value is an empty string, -// but it will also be empty if the regular expression successfully matches -// an empty string. Use FindStringIndex or FindStringSubmatch if it is -// necessary to distinguish these cases. -func (re *Regexp) FindString(s string) string { - a := re.doExecute(nil, nil, s, 0, 2) - if a == nil { - return "" - } - return s[a[0]:a[1]] -} - -// FindStringIndex returns a two-element slice of integers defining the -// location of the leftmost match in s of the regular expression. The match -// itself is at s[loc[0]:loc[1]]. -// A return value of nil indicates no match. -func (re *Regexp) FindStringIndex(s string) (loc []int) { - a := re.doExecute(nil, nil, s, 0, 2) - if a == nil { - return nil - } - return a[0:2] -} - -// FindReaderIndex returns a two-element slice of integers defining the -// location of the leftmost match of the regular expression in text read from -// the RuneReader. The match text was found in the input stream at -// byte offset loc[0] through loc[1]-1. -// A return value of nil indicates no match. -func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) { - a := re.doExecute(r, nil, "", 0, 2) - if a == nil { - return nil - } - return a[0:2] -} - -// FindSubmatch returns a slice of slices holding the text of the leftmost -// match of the regular expression in b and the matches, if any, of its -// subexpressions, as defined by the 'Submatch' descriptions in the package -// comment. -// A return value of nil indicates no match. -func (re *Regexp) FindSubmatch(b []byte) [][]byte { - a := re.doExecute(nil, b, "", 0, re.prog.NumCap) - if a == nil { - return nil - } - ret := make([][]byte, 1+re.numSubexp) - for i := range ret { - if 2*i < len(a) && a[2*i] >= 0 { - ret[i] = b[a[2*i]:a[2*i+1]] - } - } - return ret -} - -// Expand appends template to dst and returns the result; during the -// append, Expand replaces variables in the template with corresponding -// matches drawn from src. The match slice should have been returned by -// FindSubmatchIndex. -// -// In the template, a variable is denoted by a substring of the form -// $name or ${name}, where name is a non-empty sequence of letters, -// digits, and underscores. A purely numeric name like $1 refers to -// the submatch with the corresponding index; other names refer to -// capturing parentheses named with the (?P<name>...) syntax. A -// reference to an out of range or unmatched index or a name that is not -// present in the regular expression is replaced with an empty slice. -// -// In the $name form, name is taken to be as long as possible: $1x is -// equivalent to ${1x}, not ${1}x, and, $10 is equivalent to ${10}, not ${1}0. -// -// To insert a literal $ in the output, use $$ in the template. -func (re *Regexp) Expand(dst []byte, template []byte, src []byte, match []int) []byte { - return re.expand(dst, string(template), src, "", match) -} - -// ExpandString is like Expand but the template and source are strings. -// It appends to and returns a byte slice in order to give the calling -// code control over allocation. -func (re *Regexp) ExpandString(dst []byte, template string, src string, match []int) []byte { - return re.expand(dst, template, nil, src, match) -} - -func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, match []int) []byte { - for len(template) > 0 { - i := strings.Index(template, "$") - if i < 0 { - break - } - dst = append(dst, template[:i]...) - template = template[i:] - if len(template) > 1 && template[1] == '$' { - // Treat $$ as $. - dst = append(dst, '$') - template = template[2:] - continue - } - name, num, rest, ok := extract(template) - if !ok { - // Malformed; treat $ as raw text. - dst = append(dst, '$') - template = template[1:] - continue - } - template = rest - if num >= 0 { - if 2*num+1 < len(match) && match[2*num] >= 0 { - if bsrc != nil { - dst = append(dst, bsrc[match[2*num]:match[2*num+1]]...) - } else { - dst = append(dst, src[match[2*num]:match[2*num+1]]...) - } - } - } else { - for i, namei := range re.subexpNames { - if name == namei && 2*i+1 < len(match) && match[2*i] >= 0 { - if bsrc != nil { - dst = append(dst, bsrc[match[2*i]:match[2*i+1]]...) - } else { - dst = append(dst, src[match[2*i]:match[2*i+1]]...) - } - break - } - } - } - } - dst = append(dst, template...) - return dst -} - -// extract returns the name from a leading "$name" or "${name}" in str. -// If it is a number, extract returns num set to that number; otherwise num = -1. -func extract(str string) (name string, num int, rest string, ok bool) { - if len(str) < 2 || str[0] != '$' { - return - } - brace := false - if str[1] == '{' { - brace = true - str = str[2:] - } else { - str = str[1:] - } - i := 0 - for i < len(str) { - rune, size := utf8.DecodeRuneInString(str[i:]) - if !unicode.IsLetter(rune) && !unicode.IsDigit(rune) && rune != '_' { - break - } - i += size - } - if i == 0 { - // empty name is not okay - return - } - name = str[:i] - if brace { - if i >= len(str) || str[i] != '}' { - // missing closing brace - return - } - i++ - } - - // Parse number. - num = 0 - for i := 0; i < len(name); i++ { - if name[i] < '0' || '9' < name[i] || num >= 1e8 { - num = -1 - break - } - num = num*10 + int(name[i]) - '0' - } - // Disallow leading zeros. - if name[0] == '0' && len(name) > 1 { - num = -1 - } - - rest = str[i:] - ok = true - return -} - -// FindSubmatchIndex returns a slice holding the index pairs identifying the -// leftmost match of the regular expression in b and the matches, if any, of -// its subexpressions, as defined by the 'Submatch' and 'Index' descriptions -// in the package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindSubmatchIndex(b []byte) []int { - return re.pad(re.doExecute(nil, b, "", 0, re.prog.NumCap)) -} - -// FindStringSubmatch returns a slice of strings holding the text of the -// leftmost match of the regular expression in s and the matches, if any, of -// its subexpressions, as defined by the 'Submatch' description in the -// package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindStringSubmatch(s string) []string { - a := re.doExecute(nil, nil, s, 0, re.prog.NumCap) - if a == nil { - return nil - } - ret := make([]string, 1+re.numSubexp) - for i := range ret { - if 2*i < len(a) && a[2*i] >= 0 { - ret[i] = s[a[2*i]:a[2*i+1]] - } - } - return ret -} - -// FindStringSubmatchIndex returns a slice holding the index pairs -// identifying the leftmost match of the regular expression in s and the -// matches, if any, of its subexpressions, as defined by the 'Submatch' and -// 'Index' descriptions in the package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindStringSubmatchIndex(s string) []int { - return re.pad(re.doExecute(nil, nil, s, 0, re.prog.NumCap)) -} - -// FindReaderSubmatchIndex returns a slice holding the index pairs -// identifying the leftmost match of the regular expression of text read by -// the RuneReader, and the matches, if any, of its subexpressions, as defined -// by the 'Submatch' and 'Index' descriptions in the package comment. A -// return value of nil indicates no match. -func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int { - return re.pad(re.doExecute(r, nil, "", 0, re.prog.NumCap)) -} - -const startSize = 10 // The size at which to start a slice in the 'All' routines. - -// FindAll is the 'All' version of Find; it returns a slice of all successive -// matches of the expression, as defined by the 'All' description in the -// package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindAll(b []byte, n int) [][]byte { - if n < 0 { - n = len(b) + 1 - } - result := make([][]byte, 0, startSize) - re.allMatches("", b, n, func(match []int) { - result = append(result, b[match[0]:match[1]]) - }) - if len(result) == 0 { - return nil - } - return result -} - -// FindAllIndex is the 'All' version of FindIndex; it returns a slice of all -// successive matches of the expression, as defined by the 'All' description -// in the package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindAllIndex(b []byte, n int) [][]int { - if n < 0 { - n = len(b) + 1 - } - result := make([][]int, 0, startSize) - re.allMatches("", b, n, func(match []int) { - result = append(result, match[0:2]) - }) - if len(result) == 0 { - return nil - } - return result -} - -// FindAllString is the 'All' version of FindString; it returns a slice of all -// successive matches of the expression, as defined by the 'All' description -// in the package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindAllString(s string, n int) []string { - if n < 0 { - n = len(s) + 1 - } - result := make([]string, 0, startSize) - re.allMatches(s, nil, n, func(match []int) { - result = append(result, s[match[0]:match[1]]) - }) - if len(result) == 0 { - return nil - } - return result -} - -// FindAllStringIndex is the 'All' version of FindStringIndex; it returns a -// slice of all successive matches of the expression, as defined by the 'All' -// description in the package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindAllStringIndex(s string, n int) [][]int { - if n < 0 { - n = len(s) + 1 - } - result := make([][]int, 0, startSize) - re.allMatches(s, nil, n, func(match []int) { - result = append(result, match[0:2]) - }) - if len(result) == 0 { - return nil - } - return result -} - -// FindAllSubmatch is the 'All' version of FindSubmatch; it returns a slice -// of all successive matches of the expression, as defined by the 'All' -// description in the package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte { - if n < 0 { - n = len(b) + 1 - } - result := make([][][]byte, 0, startSize) - re.allMatches("", b, n, func(match []int) { - slice := make([][]byte, len(match)/2) - for j := range slice { - if match[2*j] >= 0 { - slice[j] = b[match[2*j]:match[2*j+1]] - } - } - result = append(result, slice) - }) - if len(result) == 0 { - return nil - } - return result -} - -// FindAllSubmatchIndex is the 'All' version of FindSubmatchIndex; it returns -// a slice of all successive matches of the expression, as defined by the -// 'All' description in the package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int { - if n < 0 { - n = len(b) + 1 - } - result := make([][]int, 0, startSize) - re.allMatches("", b, n, func(match []int) { - result = append(result, match) - }) - if len(result) == 0 { - return nil - } - return result -} - -// FindAllStringSubmatch is the 'All' version of FindStringSubmatch; it -// returns a slice of all successive matches of the expression, as defined by -// the 'All' description in the package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string { - if n < 0 { - n = len(s) + 1 - } - result := make([][]string, 0, startSize) - re.allMatches(s, nil, n, func(match []int) { - slice := make([]string, len(match)/2) - for j := range slice { - if match[2*j] >= 0 { - slice[j] = s[match[2*j]:match[2*j+1]] - } - } - result = append(result, slice) - }) - if len(result) == 0 { - return nil - } - return result -} - -// FindAllStringSubmatchIndex is the 'All' version of -// FindStringSubmatchIndex; it returns a slice of all successive matches of -// the expression, as defined by the 'All' description in the package -// comment. -// A return value of nil indicates no match. -func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int { - if n < 0 { - n = len(s) + 1 - } - result := make([][]int, 0, startSize) - re.allMatches(s, nil, n, func(match []int) { - result = append(result, match) - }) - if len(result) == 0 { - return nil - } - return result -} - -// Split slices s into substrings separated by the expression and returns a slice of -// the substrings between those expression matches. -// -// The slice returned by this method consists of all the substrings of s -// not contained in the slice returned by FindAllString. When called on an expression -// that contains no metacharacters, it is equivalent to strings.SplitN. -// -// Example: -// s := regexp.MustCompile("a*").Split("abaabaccadaaae", 5) -// // s: ["", "b", "b", "c", "cadaaae"] -// -// The count determines the number of substrings to return: -// n > 0: at most n substrings; the last substring will be the unsplit remainder. -// n == 0: the result is nil (zero substrings) -// n < 0: all substrings -func (re *Regexp) Split(s string, n int) []string { - - if n == 0 { - return nil - } - - if len(re.expr) > 0 && len(s) == 0 { - return []string{""} - } - - matches := re.FindAllStringIndex(s, n) - strings := make([]string, 0, len(matches)) - - beg := 0 - end := 0 - for _, match := range matches { - if n > 0 && len(strings) >= n-1 { - break - } - - end = match[0] - if match[1] != 0 { - strings = append(strings, s[beg:end]) - } - beg = match[1] - } - - if end != len(s) { - strings = append(strings, s[beg:]) - } - - return strings -} diff --git a/src/pkg/regexp/syntax/compile.go b/src/pkg/regexp/syntax/compile.go deleted file mode 100644 index 95f6f1569..000000000 --- a/src/pkg/regexp/syntax/compile.go +++ /dev/null @@ -1,289 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package syntax - -import "unicode" - -// A patchList is a list of instruction pointers that need to be filled in (patched). -// Because the pointers haven't been filled in yet, we can reuse their storage -// to hold the list. It's kind of sleazy, but works well in practice. -// See http://swtch.com/~rsc/regexp/regexp1.html for inspiration. -// -// These aren't really pointers: they're integers, so we can reinterpret them -// this way without using package unsafe. A value l denotes -// p.inst[l>>1].Out (l&1==0) or .Arg (l&1==1). -// l == 0 denotes the empty list, okay because we start every program -// with a fail instruction, so we'll never want to point at its output link. -type patchList uint32 - -func (l patchList) next(p *Prog) patchList { - i := &p.Inst[l>>1] - if l&1 == 0 { - return patchList(i.Out) - } - return patchList(i.Arg) -} - -func (l patchList) patch(p *Prog, val uint32) { - for l != 0 { - i := &p.Inst[l>>1] - if l&1 == 0 { - l = patchList(i.Out) - i.Out = val - } else { - l = patchList(i.Arg) - i.Arg = val - } - } -} - -func (l1 patchList) append(p *Prog, l2 patchList) patchList { - if l1 == 0 { - return l2 - } - if l2 == 0 { - return l1 - } - - last := l1 - for { - next := last.next(p) - if next == 0 { - break - } - last = next - } - - i := &p.Inst[last>>1] - if last&1 == 0 { - i.Out = uint32(l2) - } else { - i.Arg = uint32(l2) - } - return l1 -} - -// A frag represents a compiled program fragment. -type frag struct { - i uint32 // index of first instruction - out patchList // where to record end instruction -} - -type compiler struct { - p *Prog -} - -// Compile compiles the regexp into a program to be executed. -// The regexp should have been simplified already (returned from re.Simplify). -func Compile(re *Regexp) (*Prog, error) { - var c compiler - c.init() - f := c.compile(re) - f.out.patch(c.p, c.inst(InstMatch).i) - c.p.Start = int(f.i) - return c.p, nil -} - -func (c *compiler) init() { - c.p = new(Prog) - c.p.NumCap = 2 // implicit ( and ) for whole match $0 - c.inst(InstFail) -} - -var anyRuneNotNL = []rune{0, '\n' - 1, '\n' + 1, unicode.MaxRune} -var anyRune = []rune{0, unicode.MaxRune} - -func (c *compiler) compile(re *Regexp) frag { - switch re.Op { - case OpNoMatch: - return c.fail() - case OpEmptyMatch: - return c.nop() - case OpLiteral: - if len(re.Rune) == 0 { - return c.nop() - } - var f frag - for j := range re.Rune { - f1 := c.rune(re.Rune[j:j+1], re.Flags) - if j == 0 { - f = f1 - } else { - f = c.cat(f, f1) - } - } - return f - case OpCharClass: - return c.rune(re.Rune, re.Flags) - case OpAnyCharNotNL: - return c.rune(anyRuneNotNL, 0) - case OpAnyChar: - return c.rune(anyRune, 0) - case OpBeginLine: - return c.empty(EmptyBeginLine) - case OpEndLine: - return c.empty(EmptyEndLine) - case OpBeginText: - return c.empty(EmptyBeginText) - case OpEndText: - return c.empty(EmptyEndText) - case OpWordBoundary: - return c.empty(EmptyWordBoundary) - case OpNoWordBoundary: - return c.empty(EmptyNoWordBoundary) - case OpCapture: - bra := c.cap(uint32(re.Cap << 1)) - sub := c.compile(re.Sub[0]) - ket := c.cap(uint32(re.Cap<<1 | 1)) - return c.cat(c.cat(bra, sub), ket) - case OpStar: - return c.star(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0) - case OpPlus: - return c.plus(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0) - case OpQuest: - return c.quest(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0) - case OpConcat: - if len(re.Sub) == 0 { - return c.nop() - } - var f frag - for i, sub := range re.Sub { - if i == 0 { - f = c.compile(sub) - } else { - f = c.cat(f, c.compile(sub)) - } - } - return f - case OpAlternate: - var f frag - for _, sub := range re.Sub { - f = c.alt(f, c.compile(sub)) - } - return f - } - panic("regexp: unhandled case in compile") -} - -func (c *compiler) inst(op InstOp) frag { - // TODO: impose length limit - f := frag{i: uint32(len(c.p.Inst))} - c.p.Inst = append(c.p.Inst, Inst{Op: op}) - return f -} - -func (c *compiler) nop() frag { - f := c.inst(InstNop) - f.out = patchList(f.i << 1) - return f -} - -func (c *compiler) fail() frag { - return frag{} -} - -func (c *compiler) cap(arg uint32) frag { - f := c.inst(InstCapture) - f.out = patchList(f.i << 1) - c.p.Inst[f.i].Arg = arg - - if c.p.NumCap < int(arg)+1 { - c.p.NumCap = int(arg) + 1 - } - return f -} - -func (c *compiler) cat(f1, f2 frag) frag { - // concat of failure is failure - if f1.i == 0 || f2.i == 0 { - return frag{} - } - - // TODO: elide nop - - f1.out.patch(c.p, f2.i) - return frag{f1.i, f2.out} -} - -func (c *compiler) alt(f1, f2 frag) frag { - // alt of failure is other - if f1.i == 0 { - return f2 - } - if f2.i == 0 { - return f1 - } - - f := c.inst(InstAlt) - i := &c.p.Inst[f.i] - i.Out = f1.i - i.Arg = f2.i - f.out = f1.out.append(c.p, f2.out) - return f -} - -func (c *compiler) quest(f1 frag, nongreedy bool) frag { - f := c.inst(InstAlt) - i := &c.p.Inst[f.i] - if nongreedy { - i.Arg = f1.i - f.out = patchList(f.i << 1) - } else { - i.Out = f1.i - f.out = patchList(f.i<<1 | 1) - } - f.out = f.out.append(c.p, f1.out) - return f -} - -func (c *compiler) star(f1 frag, nongreedy bool) frag { - f := c.inst(InstAlt) - i := &c.p.Inst[f.i] - if nongreedy { - i.Arg = f1.i - f.out = patchList(f.i << 1) - } else { - i.Out = f1.i - f.out = patchList(f.i<<1 | 1) - } - f1.out.patch(c.p, f.i) - return f -} - -func (c *compiler) plus(f1 frag, nongreedy bool) frag { - return frag{f1.i, c.star(f1, nongreedy).out} -} - -func (c *compiler) empty(op EmptyOp) frag { - f := c.inst(InstEmptyWidth) - c.p.Inst[f.i].Arg = uint32(op) - f.out = patchList(f.i << 1) - return f -} - -func (c *compiler) rune(r []rune, flags Flags) frag { - f := c.inst(InstRune) - i := &c.p.Inst[f.i] - i.Rune = r - flags &= FoldCase // only relevant flag is FoldCase - if len(r) != 1 || unicode.SimpleFold(r[0]) == r[0] { - // and sometimes not even that - flags &^= FoldCase - } - i.Arg = uint32(flags) - f.out = patchList(f.i << 1) - - // Special cases for exec machine. - switch { - case flags&FoldCase == 0 && (len(r) == 1 || len(r) == 2 && r[0] == r[1]): - i.Op = InstRune1 - case len(r) == 2 && r[0] == 0 && r[1] == unicode.MaxRune: - i.Op = InstRuneAny - case len(r) == 4 && r[0] == 0 && r[1] == '\n'-1 && r[2] == '\n'+1 && r[3] == unicode.MaxRune: - i.Op = InstRuneAnyNotNL - } - - return f -} diff --git a/src/pkg/regexp/syntax/doc.go b/src/pkg/regexp/syntax/doc.go deleted file mode 100644 index 8e72c90d3..000000000 --- a/src/pkg/regexp/syntax/doc.go +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// DO NOT EDIT. This file is generated by mksyntaxgo from the RE2 distribution. - -/* -Package syntax parses regular expressions into parse trees and compiles -parse trees into programs. Most clients of regular expressions will use the -facilities of package regexp (such as Compile and Match) instead of this package. - -Syntax - -The regular expression syntax understood by this package when parsing with the Perl flag is as follows. -Parts of the syntax can be disabled by passing alternate flags to Parse. - - -Single characters: - . any character, possibly including newline (flag s=true) - [xyz] character class - [^xyz] negated character class - \d Perl character class - \D negated Perl character class - [:alpha:] ASCII character class - [:^alpha:] negated ASCII character class - \pN Unicode character class (one-letter name) - \p{Greek} Unicode character class - \PN negated Unicode character class (one-letter name) - \P{Greek} negated Unicode character class - -Composites: - xy x followed by y - x|y x or y (prefer x) - -Repetitions: - x* zero or more x, prefer more - x+ one or more x, prefer more - x? zero or one x, prefer one - x{n,m} n or n+1 or ... or m x, prefer more - x{n,} n or more x, prefer more - x{n} exactly n x - x*? zero or more x, prefer fewer - x+? one or more x, prefer fewer - x?? zero or one x, prefer zero - x{n,m}? n or n+1 or ... or m x, prefer fewer - x{n,}? n or more x, prefer fewer - x{n}? exactly n x - -Implementation restriction: The counting forms x{n} etc. (but not the other -forms x* etc.) have an upper limit of n=1000. Negative or higher explicit -counts yield the parse error ErrInvalidRepeatSize. - -Grouping: - (re) numbered capturing group (submatch) - (?P<name>re) named & numbered capturing group (submatch) - (?:re) non-capturing group (submatch) - (?flags) set flags within current group; non-capturing - (?flags:re) set flags during re; non-capturing - - Flag syntax is xyz (set) or -xyz (clear) or xy-z (set xy, clear z). The flags are: - - i case-insensitive (default false) - m multi-line mode: ^ and $ match begin/end line in addition to begin/end text (default false) - s let . match \n (default false) - U ungreedy: swap meaning of x* and x*?, x+ and x+?, etc (default false) - -Empty strings: - ^ at beginning of text or line (flag m=true) - $ at end of text (like \z not \Z) or line (flag m=true) - \A at beginning of text - \b at ASCII word boundary (\w on one side and \W, \A, or \z on the other) - \B not an ASCII word boundary - \z at end of text - -Escape sequences: - \a bell (== \007) - \f form feed (== \014) - \t horizontal tab (== \011) - \n newline (== \012) - \r carriage return (== \015) - \v vertical tab character (== \013) - \* literal *, for any punctuation character * - \123 octal character code (up to three digits) - \x7F hex character code (exactly two digits) - \x{10FFFF} hex character code - \Q...\E literal text ... even if ... has punctuation - -Character class elements: - x single character - A-Z character range (inclusive) - \d Perl character class - [:foo:] ASCII character class foo - \p{Foo} Unicode character class Foo - \pF Unicode character class F (one-letter name) - -Named character classes as character class elements: - [\d] digits (== \d) - [^\d] not digits (== \D) - [\D] not digits (== \D) - [^\D] not not digits (== \d) - [[:name:]] named ASCII class inside character class (== [:name:]) - [^[:name:]] named ASCII class inside negated character class (== [:^name:]) - [\p{Name}] named Unicode property inside character class (== \p{Name}) - [^\p{Name}] named Unicode property inside negated character class (== \P{Name}) - -Perl character classes: - \d digits (== [0-9]) - \D not digits (== [^0-9]) - \s whitespace (== [\t\n\f\r ]) - \S not whitespace (== [^\t\n\f\r ]) - \w ASCII word characters (== [0-9A-Za-z_]) - \W not ASCII word characters (== [^0-9A-Za-z_]) - -ASCII character classes: - [:alnum:] alphanumeric (== [0-9A-Za-z]) - [:alpha:] alphabetic (== [A-Za-z]) - [:ascii:] ASCII (== [\x00-\x7F]) - [:blank:] blank (== [\t ]) - [:cntrl:] control (== [\x00-\x1F\x7F]) - [:digit:] digits (== [0-9]) - [:graph:] graphical (== [!-~] == [A-Za-z0-9!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~]) - [:lower:] lower case (== [a-z]) - [:print:] printable (== [ -~] == [ [:graph:]]) - [:punct:] punctuation (== [!-/:-@[-`{-~]) - [:space:] whitespace (== [\t\n\v\f\r ]) - [:upper:] upper case (== [A-Z]) - [:word:] word characters (== [0-9A-Za-z_]) - [:xdigit:] hex digit (== [0-9A-Fa-f]) - -*/ -package syntax diff --git a/src/pkg/regexp/syntax/make_perl_groups.pl b/src/pkg/regexp/syntax/make_perl_groups.pl deleted file mode 100755 index 90040fcb4..000000000 --- a/src/pkg/regexp/syntax/make_perl_groups.pl +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/perl -# Copyright 2008 The Go Authors. All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -# Modified version of RE2's make_perl_groups.pl. - -# Generate table entries giving character ranges -# for POSIX/Perl character classes. Rather than -# figure out what the definition is, it is easier to ask -# Perl about each letter from 0-128 and write down -# its answer. - -@posixclasses = ( - "[:alnum:]", - "[:alpha:]", - "[:ascii:]", - "[:blank:]", - "[:cntrl:]", - "[:digit:]", - "[:graph:]", - "[:lower:]", - "[:print:]", - "[:punct:]", - "[:space:]", - "[:upper:]", - "[:word:]", - "[:xdigit:]", -); - -@perlclasses = ( - "\\d", - "\\s", - "\\w", -); - -sub ComputeClass($) { - my @ranges; - my ($class) = @_; - my $regexp = "[$class]"; - my $start = -1; - for (my $i=0; $i<=129; $i++) { - if ($i == 129) { $i = 256; } - if ($i <= 128 && chr($i) =~ $regexp) { - if ($start < 0) { - $start = $i; - } - } else { - if ($start >= 0) { - push @ranges, [$start, $i-1]; - } - $start = -1; - } - } - return @ranges; -} - -sub PrintClass($$@) { - my ($cname, $name, @ranges) = @_; - print "var code$cname = []rune{ /* $name */\n"; - for (my $i=0; $i<@ranges; $i++) { - my @a = @{$ranges[$i]}; - printf "\t0x%x, 0x%x,\n", $a[0], $a[1]; - } - print "}\n\n"; - my $n = @ranges; - $negname = $name; - if ($negname =~ /:/) { - $negname =~ s/:/:^/; - } else { - $negname =~ y/a-z/A-Z/; - } - return "\t`$name`: {+1, code$cname},\n" . - "\t`$negname`: {-1, code$cname},\n"; -} - -my $gen = 0; - -sub PrintClasses($@) { - my ($cname, @classes) = @_; - my @entries; - foreach my $cl (@classes) { - my @ranges = ComputeClass($cl); - push @entries, PrintClass(++$gen, $cl, @ranges); - } - print "var ${cname}Group = map[string]charGroup{\n"; - foreach my $e (@entries) { - print $e; - } - print "}\n"; - my $count = @entries; -} - -print <<EOF; -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// GENERATED BY make_perl_groups.pl; DO NOT EDIT. -// make_perl_groups.pl >perl_groups.go - -package syntax - -EOF - -PrintClasses("perl", @perlclasses); -PrintClasses("posix", @posixclasses); diff --git a/src/pkg/regexp/syntax/parse.go b/src/pkg/regexp/syntax/parse.go deleted file mode 100644 index cb25dca39..000000000 --- a/src/pkg/regexp/syntax/parse.go +++ /dev/null @@ -1,1863 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package syntax - -import ( - "sort" - "strings" - "unicode" - "unicode/utf8" -) - -// An Error describes a failure to parse a regular expression -// and gives the offending expression. -type Error struct { - Code ErrorCode - Expr string -} - -func (e *Error) Error() string { - return "error parsing regexp: " + e.Code.String() + ": `" + e.Expr + "`" -} - -// An ErrorCode describes a failure to parse a regular expression. -type ErrorCode string - -const ( - // Unexpected error - ErrInternalError ErrorCode = "regexp/syntax: internal error" - - // Parse errors - ErrInvalidCharClass ErrorCode = "invalid character class" - ErrInvalidCharRange ErrorCode = "invalid character class range" - ErrInvalidEscape ErrorCode = "invalid escape sequence" - ErrInvalidNamedCapture ErrorCode = "invalid named capture" - ErrInvalidPerlOp ErrorCode = "invalid or unsupported Perl syntax" - ErrInvalidRepeatOp ErrorCode = "invalid nested repetition operator" - ErrInvalidRepeatSize ErrorCode = "invalid repeat count" - ErrInvalidUTF8 ErrorCode = "invalid UTF-8" - ErrMissingBracket ErrorCode = "missing closing ]" - ErrMissingParen ErrorCode = "missing closing )" - ErrMissingRepeatArgument ErrorCode = "missing argument to repetition operator" - ErrTrailingBackslash ErrorCode = "trailing backslash at end of expression" - ErrUnexpectedParen ErrorCode = "unexpected )" -) - -func (e ErrorCode) String() string { - return string(e) -} - -// Flags control the behavior of the parser and record information about regexp context. -type Flags uint16 - -const ( - FoldCase Flags = 1 << iota // case-insensitive match - Literal // treat pattern as literal string - ClassNL // allow character classes like [^a-z] and [[:space:]] to match newline - DotNL // allow . to match newline - OneLine // treat ^ and $ as only matching at beginning and end of text - NonGreedy // make repetition operators default to non-greedy - PerlX // allow Perl extensions - UnicodeGroups // allow \p{Han}, \P{Han} for Unicode group and negation - WasDollar // regexp OpEndText was $, not \z - Simple // regexp contains no counted repetition - - MatchNL = ClassNL | DotNL - - Perl = ClassNL | OneLine | PerlX | UnicodeGroups // as close to Perl as possible - POSIX Flags = 0 // POSIX syntax -) - -// Pseudo-ops for parsing stack. -const ( - opLeftParen = opPseudo + iota - opVerticalBar -) - -type parser struct { - flags Flags // parse mode flags - stack []*Regexp // stack of parsed expressions - free *Regexp - numCap int // number of capturing groups seen - wholeRegexp string - tmpClass []rune // temporary char class work space -} - -func (p *parser) newRegexp(op Op) *Regexp { - re := p.free - if re != nil { - p.free = re.Sub0[0] - *re = Regexp{} - } else { - re = new(Regexp) - } - re.Op = op - return re -} - -func (p *parser) reuse(re *Regexp) { - re.Sub0[0] = p.free - p.free = re -} - -// Parse stack manipulation. - -// push pushes the regexp re onto the parse stack and returns the regexp. -func (p *parser) push(re *Regexp) *Regexp { - if re.Op == OpCharClass && len(re.Rune) == 2 && re.Rune[0] == re.Rune[1] { - // Single rune. - if p.maybeConcat(re.Rune[0], p.flags&^FoldCase) { - return nil - } - re.Op = OpLiteral - re.Rune = re.Rune[:1] - re.Flags = p.flags &^ FoldCase - } else if re.Op == OpCharClass && len(re.Rune) == 4 && - re.Rune[0] == re.Rune[1] && re.Rune[2] == re.Rune[3] && - unicode.SimpleFold(re.Rune[0]) == re.Rune[2] && - unicode.SimpleFold(re.Rune[2]) == re.Rune[0] || - re.Op == OpCharClass && len(re.Rune) == 2 && - re.Rune[0]+1 == re.Rune[1] && - unicode.SimpleFold(re.Rune[0]) == re.Rune[1] && - unicode.SimpleFold(re.Rune[1]) == re.Rune[0] { - // Case-insensitive rune like [Aa] or [Δδ]. - if p.maybeConcat(re.Rune[0], p.flags|FoldCase) { - return nil - } - - // Rewrite as (case-insensitive) literal. - re.Op = OpLiteral - re.Rune = re.Rune[:1] - re.Flags = p.flags | FoldCase - } else { - // Incremental concatenation. - p.maybeConcat(-1, 0) - } - - p.stack = append(p.stack, re) - return re -} - -// maybeConcat implements incremental concatenation -// of literal runes into string nodes. The parser calls this -// before each push, so only the top fragment of the stack -// might need processing. Since this is called before a push, -// the topmost literal is no longer subject to operators like * -// (Otherwise ab* would turn into (ab)*.) -// If r >= 0 and there's a node left over, maybeConcat uses it -// to push r with the given flags. -// maybeConcat reports whether r was pushed. -func (p *parser) maybeConcat(r rune, flags Flags) bool { - n := len(p.stack) - if n < 2 { - return false - } - - re1 := p.stack[n-1] - re2 := p.stack[n-2] - if re1.Op != OpLiteral || re2.Op != OpLiteral || re1.Flags&FoldCase != re2.Flags&FoldCase { - return false - } - - // Push re1 into re2. - re2.Rune = append(re2.Rune, re1.Rune...) - - // Reuse re1 if possible. - if r >= 0 { - re1.Rune = re1.Rune0[:1] - re1.Rune[0] = r - re1.Flags = flags - return true - } - - p.stack = p.stack[:n-1] - p.reuse(re1) - return false // did not push r -} - -// newLiteral returns a new OpLiteral Regexp with the given flags -func (p *parser) newLiteral(r rune, flags Flags) *Regexp { - re := p.newRegexp(OpLiteral) - re.Flags = flags - if flags&FoldCase != 0 { - r = minFoldRune(r) - } - re.Rune0[0] = r - re.Rune = re.Rune0[:1] - return re -} - -// minFoldRune returns the minimum rune fold-equivalent to r. -func minFoldRune(r rune) rune { - if r < minFold || r > maxFold { - return r - } - min := r - r0 := r - for r = unicode.SimpleFold(r); r != r0; r = unicode.SimpleFold(r) { - if min > r { - min = r - } - } - return min -} - -// literal pushes a literal regexp for the rune r on the stack -// and returns that regexp. -func (p *parser) literal(r rune) { - p.push(p.newLiteral(r, p.flags)) -} - -// op pushes a regexp with the given op onto the stack -// and returns that regexp. -func (p *parser) op(op Op) *Regexp { - re := p.newRegexp(op) - re.Flags = p.flags - return p.push(re) -} - -// repeat replaces the top stack element with itself repeated according to op, min, max. -// before is the regexp suffix starting at the repetition operator. -// after is the regexp suffix following after the repetition operator. -// repeat returns an updated 'after' and an error, if any. -func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (string, error) { - flags := p.flags - if p.flags&PerlX != 0 { - if len(after) > 0 && after[0] == '?' { - after = after[1:] - flags ^= NonGreedy - } - if lastRepeat != "" { - // In Perl it is not allowed to stack repetition operators: - // a** is a syntax error, not a doubled star, and a++ means - // something else entirely, which we don't support! - return "", &Error{ErrInvalidRepeatOp, lastRepeat[:len(lastRepeat)-len(after)]} - } - } - n := len(p.stack) - if n == 0 { - return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]} - } - sub := p.stack[n-1] - if sub.Op >= opPseudo { - return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]} - } - re := p.newRegexp(op) - re.Min = min - re.Max = max - re.Flags = flags - re.Sub = re.Sub0[:1] - re.Sub[0] = sub - p.stack[n-1] = re - return after, nil -} - -// concat replaces the top of the stack (above the topmost '|' or '(') with its concatenation. -func (p *parser) concat() *Regexp { - p.maybeConcat(-1, 0) - - // Scan down to find pseudo-operator | or (. - i := len(p.stack) - for i > 0 && p.stack[i-1].Op < opPseudo { - i-- - } - subs := p.stack[i:] - p.stack = p.stack[:i] - - // Empty concatenation is special case. - if len(subs) == 0 { - return p.push(p.newRegexp(OpEmptyMatch)) - } - - return p.push(p.collapse(subs, OpConcat)) -} - -// alternate replaces the top of the stack (above the topmost '(') with its alternation. -func (p *parser) alternate() *Regexp { - // Scan down to find pseudo-operator (. - // There are no | above (. - i := len(p.stack) - for i > 0 && p.stack[i-1].Op < opPseudo { - i-- - } - subs := p.stack[i:] - p.stack = p.stack[:i] - - // Make sure top class is clean. - // All the others already are (see swapVerticalBar). - if len(subs) > 0 { - cleanAlt(subs[len(subs)-1]) - } - - // Empty alternate is special case - // (shouldn't happen but easy to handle). - if len(subs) == 0 { - return p.push(p.newRegexp(OpNoMatch)) - } - - return p.push(p.collapse(subs, OpAlternate)) -} - -// cleanAlt cleans re for eventual inclusion in an alternation. -func cleanAlt(re *Regexp) { - switch re.Op { - case OpCharClass: - re.Rune = cleanClass(&re.Rune) - if len(re.Rune) == 2 && re.Rune[0] == 0 && re.Rune[1] == unicode.MaxRune { - re.Rune = nil - re.Op = OpAnyChar - return - } - if len(re.Rune) == 4 && re.Rune[0] == 0 && re.Rune[1] == '\n'-1 && re.Rune[2] == '\n'+1 && re.Rune[3] == unicode.MaxRune { - re.Rune = nil - re.Op = OpAnyCharNotNL - return - } - if cap(re.Rune)-len(re.Rune) > 100 { - // re.Rune will not grow any more. - // Make a copy or inline to reclaim storage. - re.Rune = append(re.Rune0[:0], re.Rune...) - } - } -} - -// collapse returns the result of applying op to sub. -// If sub contains op nodes, they all get hoisted up -// so that there is never a concat of a concat or an -// alternate of an alternate. -func (p *parser) collapse(subs []*Regexp, op Op) *Regexp { - if len(subs) == 1 { - return subs[0] - } - re := p.newRegexp(op) - re.Sub = re.Sub0[:0] - for _, sub := range subs { - if sub.Op == op { - re.Sub = append(re.Sub, sub.Sub...) - p.reuse(sub) - } else { - re.Sub = append(re.Sub, sub) - } - } - if op == OpAlternate { - re.Sub = p.factor(re.Sub, re.Flags) - if len(re.Sub) == 1 { - old := re - re = re.Sub[0] - p.reuse(old) - } - } - return re -} - -// factor factors common prefixes from the alternation list sub. -// It returns a replacement list that reuses the same storage and -// frees (passes to p.reuse) any removed *Regexps. -// -// For example, -// ABC|ABD|AEF|BCX|BCY -// simplifies by literal prefix extraction to -// A(B(C|D)|EF)|BC(X|Y) -// which simplifies by character class introduction to -// A(B[CD]|EF)|BC[XY] -// -func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp { - if len(sub) < 2 { - return sub - } - - // Round 1: Factor out common literal prefixes. - var str []rune - var strflags Flags - start := 0 - out := sub[:0] - for i := 0; i <= len(sub); i++ { - // Invariant: the Regexps that were in sub[0:start] have been - // used or marked for reuse, and the slice space has been reused - // for out (len(out) <= start). - // - // Invariant: sub[start:i] consists of regexps that all begin - // with str as modified by strflags. - var istr []rune - var iflags Flags - if i < len(sub) { - istr, iflags = p.leadingString(sub[i]) - if iflags == strflags { - same := 0 - for same < len(str) && same < len(istr) && str[same] == istr[same] { - same++ - } - if same > 0 { - // Matches at least one rune in current range. - // Keep going around. - str = str[:same] - continue - } - } - } - - // Found end of a run with common leading literal string: - // sub[start:i] all begin with str[0:len(str)], but sub[i] - // does not even begin with str[0]. - // - // Factor out common string and append factored expression to out. - if i == start { - // Nothing to do - run of length 0. - } else if i == start+1 { - // Just one: don't bother factoring. - out = append(out, sub[start]) - } else { - // Construct factored form: prefix(suffix1|suffix2|...) - prefix := p.newRegexp(OpLiteral) - prefix.Flags = strflags - prefix.Rune = append(prefix.Rune[:0], str...) - - for j := start; j < i; j++ { - sub[j] = p.removeLeadingString(sub[j], len(str)) - } - suffix := p.collapse(sub[start:i], OpAlternate) // recurse - - re := p.newRegexp(OpConcat) - re.Sub = append(re.Sub[:0], prefix, suffix) - out = append(out, re) - } - - // Prepare for next iteration. - start = i - str = istr - strflags = iflags - } - sub = out - - // Round 2: Factor out common complex prefixes, - // just the first piece of each concatenation, - // whatever it is. This is good enough a lot of the time. - start = 0 - out = sub[:0] - var first *Regexp - for i := 0; i <= len(sub); i++ { - // Invariant: the Regexps that were in sub[0:start] have been - // used or marked for reuse, and the slice space has been reused - // for out (len(out) <= start). - // - // Invariant: sub[start:i] consists of regexps that all begin with ifirst. - var ifirst *Regexp - if i < len(sub) { - ifirst = p.leadingRegexp(sub[i]) - if first != nil && first.Equal(ifirst) { - continue - } - } - - // Found end of a run with common leading regexp: - // sub[start:i] all begin with first but sub[i] does not. - // - // Factor out common regexp and append factored expression to out. - if i == start { - // Nothing to do - run of length 0. - } else if i == start+1 { - // Just one: don't bother factoring. - out = append(out, sub[start]) - } else { - // Construct factored form: prefix(suffix1|suffix2|...) - prefix := first - for j := start; j < i; j++ { - reuse := j != start // prefix came from sub[start] - sub[j] = p.removeLeadingRegexp(sub[j], reuse) - } - suffix := p.collapse(sub[start:i], OpAlternate) // recurse - - re := p.newRegexp(OpConcat) - re.Sub = append(re.Sub[:0], prefix, suffix) - out = append(out, re) - } - - // Prepare for next iteration. - start = i - first = ifirst - } - sub = out - - // Round 3: Collapse runs of single literals into character classes. - start = 0 - out = sub[:0] - for i := 0; i <= len(sub); i++ { - // Invariant: the Regexps that were in sub[0:start] have been - // used or marked for reuse, and the slice space has been reused - // for out (len(out) <= start). - // - // Invariant: sub[start:i] consists of regexps that are either - // literal runes or character classes. - if i < len(sub) && isCharClass(sub[i]) { - continue - } - - // sub[i] is not a char or char class; - // emit char class for sub[start:i]... - if i == start { - // Nothing to do - run of length 0. - } else if i == start+1 { - out = append(out, sub[start]) - } else { - // Make new char class. - // Start with most complex regexp in sub[start]. - max := start - for j := start + 1; j < i; j++ { - if sub[max].Op < sub[j].Op || sub[max].Op == sub[j].Op && len(sub[max].Rune) < len(sub[j].Rune) { - max = j - } - } - sub[start], sub[max] = sub[max], sub[start] - - for j := start + 1; j < i; j++ { - mergeCharClass(sub[start], sub[j]) - p.reuse(sub[j]) - } - cleanAlt(sub[start]) - out = append(out, sub[start]) - } - - // ... and then emit sub[i]. - if i < len(sub) { - out = append(out, sub[i]) - } - start = i + 1 - } - sub = out - - // Round 4: Collapse runs of empty matches into a single empty match. - start = 0 - out = sub[:0] - for i := range sub { - if i+1 < len(sub) && sub[i].Op == OpEmptyMatch && sub[i+1].Op == OpEmptyMatch { - continue - } - out = append(out, sub[i]) - } - sub = out - - return sub -} - -// leadingString returns the leading literal string that re begins with. -// The string refers to storage in re or its children. -func (p *parser) leadingString(re *Regexp) ([]rune, Flags) { - if re.Op == OpConcat && len(re.Sub) > 0 { - re = re.Sub[0] - } - if re.Op != OpLiteral { - return nil, 0 - } - return re.Rune, re.Flags & FoldCase -} - -// removeLeadingString removes the first n leading runes -// from the beginning of re. It returns the replacement for re. -func (p *parser) removeLeadingString(re *Regexp, n int) *Regexp { - if re.Op == OpConcat && len(re.Sub) > 0 { - // Removing a leading string in a concatenation - // might simplify the concatenation. - sub := re.Sub[0] - sub = p.removeLeadingString(sub, n) - re.Sub[0] = sub - if sub.Op == OpEmptyMatch { - p.reuse(sub) - switch len(re.Sub) { - case 0, 1: - // Impossible but handle. - re.Op = OpEmptyMatch - re.Sub = nil - case 2: - old := re - re = re.Sub[1] - p.reuse(old) - default: - copy(re.Sub, re.Sub[1:]) - re.Sub = re.Sub[:len(re.Sub)-1] - } - } - return re - } - - if re.Op == OpLiteral { - re.Rune = re.Rune[:copy(re.Rune, re.Rune[n:])] - if len(re.Rune) == 0 { - re.Op = OpEmptyMatch - } - } - return re -} - -// leadingRegexp returns the leading regexp that re begins with. -// The regexp refers to storage in re or its children. -func (p *parser) leadingRegexp(re *Regexp) *Regexp { - if re.Op == OpEmptyMatch { - return nil - } - if re.Op == OpConcat && len(re.Sub) > 0 { - sub := re.Sub[0] - if sub.Op == OpEmptyMatch { - return nil - } - return sub - } - return re -} - -// removeLeadingRegexp removes the leading regexp in re. -// It returns the replacement for re. -// If reuse is true, it passes the removed regexp (if no longer needed) to p.reuse. -func (p *parser) removeLeadingRegexp(re *Regexp, reuse bool) *Regexp { - if re.Op == OpConcat && len(re.Sub) > 0 { - if reuse { - p.reuse(re.Sub[0]) - } - re.Sub = re.Sub[:copy(re.Sub, re.Sub[1:])] - switch len(re.Sub) { - case 0: - re.Op = OpEmptyMatch - re.Sub = nil - case 1: - old := re - re = re.Sub[0] - p.reuse(old) - } - return re - } - if reuse { - p.reuse(re) - } - return p.newRegexp(OpEmptyMatch) -} - -func literalRegexp(s string, flags Flags) *Regexp { - re := &Regexp{Op: OpLiteral} - re.Flags = flags - re.Rune = re.Rune0[:0] // use local storage for small strings - for _, c := range s { - if len(re.Rune) >= cap(re.Rune) { - // string is too long to fit in Rune0. let Go handle it - re.Rune = []rune(s) - break - } - re.Rune = append(re.Rune, c) - } - return re -} - -// Parsing. - -// Parse parses a regular expression string s, controlled by the specified -// Flags, and returns a regular expression parse tree. The syntax is -// described in the top-level comment. -func Parse(s string, flags Flags) (*Regexp, error) { - if flags&Literal != 0 { - // Trivial parser for literal string. - if err := checkUTF8(s); err != nil { - return nil, err - } - return literalRegexp(s, flags), nil - } - - // Otherwise, must do real work. - var ( - p parser - err error - c rune - op Op - lastRepeat string - ) - p.flags = flags - p.wholeRegexp = s - t := s - for t != "" { - repeat := "" - BigSwitch: - switch t[0] { - default: - if c, t, err = nextRune(t); err != nil { - return nil, err - } - p.literal(c) - - case '(': - if p.flags&PerlX != 0 && len(t) >= 2 && t[1] == '?' { - // Flag changes and non-capturing groups. - if t, err = p.parsePerlFlags(t); err != nil { - return nil, err - } - break - } - p.numCap++ - p.op(opLeftParen).Cap = p.numCap - t = t[1:] - case '|': - if err = p.parseVerticalBar(); err != nil { - return nil, err - } - t = t[1:] - case ')': - if err = p.parseRightParen(); err != nil { - return nil, err - } - t = t[1:] - case '^': - if p.flags&OneLine != 0 { - p.op(OpBeginText) - } else { - p.op(OpBeginLine) - } - t = t[1:] - case '$': - if p.flags&OneLine != 0 { - p.op(OpEndText).Flags |= WasDollar - } else { - p.op(OpEndLine) - } - t = t[1:] - case '.': - if p.flags&DotNL != 0 { - p.op(OpAnyChar) - } else { - p.op(OpAnyCharNotNL) - } - t = t[1:] - case '[': - if t, err = p.parseClass(t); err != nil { - return nil, err - } - case '*', '+', '?': - before := t - switch t[0] { - case '*': - op = OpStar - case '+': - op = OpPlus - case '?': - op = OpQuest - } - after := t[1:] - if after, err = p.repeat(op, 0, 0, before, after, lastRepeat); err != nil { - return nil, err - } - repeat = before - t = after - case '{': - op = OpRepeat - before := t - min, max, after, ok := p.parseRepeat(t) - if !ok { - // If the repeat cannot be parsed, { is a literal. - p.literal('{') - t = t[1:] - break - } - if min < 0 || min > 1000 || max > 1000 || max >= 0 && min > max { - // Numbers were too big, or max is present and min > max. - return nil, &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]} - } - if after, err = p.repeat(op, min, max, before, after, lastRepeat); err != nil { - return nil, err - } - repeat = before - t = after - case '\\': - if p.flags&PerlX != 0 && len(t) >= 2 { - switch t[1] { - case 'A': - p.op(OpBeginText) - t = t[2:] - break BigSwitch - case 'b': - p.op(OpWordBoundary) - t = t[2:] - break BigSwitch - case 'B': - p.op(OpNoWordBoundary) - t = t[2:] - break BigSwitch - case 'C': - // any byte; not supported - return nil, &Error{ErrInvalidEscape, t[:2]} - case 'Q': - // \Q ... \E: the ... is always literals - var lit string - if i := strings.Index(t, `\E`); i < 0 { - lit = t[2:] - t = "" - } else { - lit = t[2:i] - t = t[i+2:] - } - p.push(literalRegexp(lit, p.flags)) - break BigSwitch - case 'z': - p.op(OpEndText) - t = t[2:] - break BigSwitch - } - } - - re := p.newRegexp(OpCharClass) - re.Flags = p.flags - - // Look for Unicode character group like \p{Han} - if len(t) >= 2 && (t[1] == 'p' || t[1] == 'P') { - r, rest, err := p.parseUnicodeClass(t, re.Rune0[:0]) - if err != nil { - return nil, err - } - if r != nil { - re.Rune = r - t = rest - p.push(re) - break BigSwitch - } - } - - // Perl character class escape. - if r, rest := p.parsePerlClassEscape(t, re.Rune0[:0]); r != nil { - re.Rune = r - t = rest - p.push(re) - break BigSwitch - } - p.reuse(re) - - // Ordinary single-character escape. - if c, t, err = p.parseEscape(t); err != nil { - return nil, err - } - p.literal(c) - } - lastRepeat = repeat - } - - p.concat() - if p.swapVerticalBar() { - // pop vertical bar - p.stack = p.stack[:len(p.stack)-1] - } - p.alternate() - - n := len(p.stack) - if n != 1 { - return nil, &Error{ErrMissingParen, s} - } - return p.stack[0], nil -} - -// parseRepeat parses {min} (max=min) or {min,} (max=-1) or {min,max}. -// If s is not of that form, it returns ok == false. -// If s has the right form but the values are too big, it returns min == -1, ok == true. -func (p *parser) parseRepeat(s string) (min, max int, rest string, ok bool) { - if s == "" || s[0] != '{' { - return - } - s = s[1:] - var ok1 bool - if min, s, ok1 = p.parseInt(s); !ok1 { - return - } - if s == "" { - return - } - if s[0] != ',' { - max = min - } else { - s = s[1:] - if s == "" { - return - } - if s[0] == '}' { - max = -1 - } else if max, s, ok1 = p.parseInt(s); !ok1 { - return - } else if max < 0 { - // parseInt found too big a number - min = -1 - } - } - if s == "" || s[0] != '}' { - return - } - rest = s[1:] - ok = true - return -} - -// parsePerlFlags parses a Perl flag setting or non-capturing group or both, -// like (?i) or (?: or (?i:. It removes the prefix from s and updates the parse state. -// The caller must have ensured that s begins with "(?". -func (p *parser) parsePerlFlags(s string) (rest string, err error) { - t := s - - // Check for named captures, first introduced in Python's regexp library. - // As usual, there are three slightly different syntaxes: - // - // (?P<name>expr) the original, introduced by Python - // (?<name>expr) the .NET alteration, adopted by Perl 5.10 - // (?'name'expr) another .NET alteration, adopted by Perl 5.10 - // - // Perl 5.10 gave in and implemented the Python version too, - // but they claim that the last two are the preferred forms. - // PCRE and languages based on it (specifically, PHP and Ruby) - // support all three as well. EcmaScript 4 uses only the Python form. - // - // In both the open source world (via Code Search) and the - // Google source tree, (?P<expr>name) is the dominant form, - // so that's the one we implement. One is enough. - if len(t) > 4 && t[2] == 'P' && t[3] == '<' { - // Pull out name. - end := strings.IndexRune(t, '>') - if end < 0 { - if err = checkUTF8(t); err != nil { - return "", err - } - return "", &Error{ErrInvalidNamedCapture, s} - } - - capture := t[:end+1] // "(?P<name>" - name := t[4:end] // "name" - if err = checkUTF8(name); err != nil { - return "", err - } - if !isValidCaptureName(name) { - return "", &Error{ErrInvalidNamedCapture, capture} - } - - // Like ordinary capture, but named. - p.numCap++ - re := p.op(opLeftParen) - re.Cap = p.numCap - re.Name = name - return t[end+1:], nil - } - - // Non-capturing group. Might also twiddle Perl flags. - var c rune - t = t[2:] // skip (? - flags := p.flags - sign := +1 - sawFlag := false -Loop: - for t != "" { - if c, t, err = nextRune(t); err != nil { - return "", err - } - switch c { - default: - break Loop - - // Flags. - case 'i': - flags |= FoldCase - sawFlag = true - case 'm': - flags &^= OneLine - sawFlag = true - case 's': - flags |= DotNL - sawFlag = true - case 'U': - flags |= NonGreedy - sawFlag = true - - // Switch to negation. - case '-': - if sign < 0 { - break Loop - } - sign = -1 - // Invert flags so that | above turn into &^ and vice versa. - // We'll invert flags again before using it below. - flags = ^flags - sawFlag = false - - // End of flags, starting group or not. - case ':', ')': - if sign < 0 { - if !sawFlag { - break Loop - } - flags = ^flags - } - if c == ':' { - // Open new group - p.op(opLeftParen) - } - p.flags = flags - return t, nil - } - } - - return "", &Error{ErrInvalidPerlOp, s[:len(s)-len(t)]} -} - -// isValidCaptureName reports whether name -// is a valid capture name: [A-Za-z0-9_]+. -// PCRE limits names to 32 bytes. -// Python rejects names starting with digits. -// We don't enforce either of those. -func isValidCaptureName(name string) bool { - if name == "" { - return false - } - for _, c := range name { - if c != '_' && !isalnum(c) { - return false - } - } - return true -} - -// parseInt parses a decimal integer. -func (p *parser) parseInt(s string) (n int, rest string, ok bool) { - if s == "" || s[0] < '0' || '9' < s[0] { - return - } - // Disallow leading zeros. - if len(s) >= 2 && s[0] == '0' && '0' <= s[1] && s[1] <= '9' { - return - } - t := s - for s != "" && '0' <= s[0] && s[0] <= '9' { - s = s[1:] - } - rest = s - ok = true - // Have digits, compute value. - t = t[:len(t)-len(s)] - for i := 0; i < len(t); i++ { - // Avoid overflow. - if n >= 1e8 { - n = -1 - break - } - n = n*10 + int(t[i]) - '0' - } - return -} - -// can this be represented as a character class? -// single-rune literal string, char class, ., and .|\n. -func isCharClass(re *Regexp) bool { - return re.Op == OpLiteral && len(re.Rune) == 1 || - re.Op == OpCharClass || - re.Op == OpAnyCharNotNL || - re.Op == OpAnyChar -} - -// does re match r? -func matchRune(re *Regexp, r rune) bool { - switch re.Op { - case OpLiteral: - return len(re.Rune) == 1 && re.Rune[0] == r - case OpCharClass: - for i := 0; i < len(re.Rune); i += 2 { - if re.Rune[i] <= r && r <= re.Rune[i+1] { - return true - } - } - return false - case OpAnyCharNotNL: - return r != '\n' - case OpAnyChar: - return true - } - return false -} - -// parseVerticalBar handles a | in the input. -func (p *parser) parseVerticalBar() error { - p.concat() - - // The concatenation we just parsed is on top of the stack. - // If it sits above an opVerticalBar, swap it below - // (things below an opVerticalBar become an alternation). - // Otherwise, push a new vertical bar. - if !p.swapVerticalBar() { - p.op(opVerticalBar) - } - - return nil -} - -// mergeCharClass makes dst = dst|src. -// The caller must ensure that dst.Op >= src.Op, -// to reduce the amount of copying. -func mergeCharClass(dst, src *Regexp) { - switch dst.Op { - case OpAnyChar: - // src doesn't add anything. - case OpAnyCharNotNL: - // src might add \n - if matchRune(src, '\n') { - dst.Op = OpAnyChar - } - case OpCharClass: - // src is simpler, so either literal or char class - if src.Op == OpLiteral { - dst.Rune = appendLiteral(dst.Rune, src.Rune[0], src.Flags) - } else { - dst.Rune = appendClass(dst.Rune, src.Rune) - } - case OpLiteral: - // both literal - if src.Rune[0] == dst.Rune[0] && src.Flags == dst.Flags { - break - } - dst.Op = OpCharClass - dst.Rune = appendLiteral(dst.Rune[:0], dst.Rune[0], dst.Flags) - dst.Rune = appendLiteral(dst.Rune, src.Rune[0], src.Flags) - } -} - -// If the top of the stack is an element followed by an opVerticalBar -// swapVerticalBar swaps the two and returns true. -// Otherwise it returns false. -func (p *parser) swapVerticalBar() bool { - // If above and below vertical bar are literal or char class, - // can merge into a single char class. - n := len(p.stack) - if n >= 3 && p.stack[n-2].Op == opVerticalBar && isCharClass(p.stack[n-1]) && isCharClass(p.stack[n-3]) { - re1 := p.stack[n-1] - re3 := p.stack[n-3] - // Make re3 the more complex of the two. - if re1.Op > re3.Op { - re1, re3 = re3, re1 - p.stack[n-3] = re3 - } - mergeCharClass(re3, re1) - p.reuse(re1) - p.stack = p.stack[:n-1] - return true - } - - if n >= 2 { - re1 := p.stack[n-1] - re2 := p.stack[n-2] - if re2.Op == opVerticalBar { - if n >= 3 { - // Now out of reach. - // Clean opportunistically. - cleanAlt(p.stack[n-3]) - } - p.stack[n-2] = re1 - p.stack[n-1] = re2 - return true - } - } - return false -} - -// parseRightParen handles a ) in the input. -func (p *parser) parseRightParen() error { - p.concat() - if p.swapVerticalBar() { - // pop vertical bar - p.stack = p.stack[:len(p.stack)-1] - } - p.alternate() - - n := len(p.stack) - if n < 2 { - return &Error{ErrUnexpectedParen, p.wholeRegexp} - } - re1 := p.stack[n-1] - re2 := p.stack[n-2] - p.stack = p.stack[:n-2] - if re2.Op != opLeftParen { - return &Error{ErrUnexpectedParen, p.wholeRegexp} - } - // Restore flags at time of paren. - p.flags = re2.Flags - if re2.Cap == 0 { - // Just for grouping. - p.push(re1) - } else { - re2.Op = OpCapture - re2.Sub = re2.Sub0[:1] - re2.Sub[0] = re1 - p.push(re2) - } - return nil -} - -// parseEscape parses an escape sequence at the beginning of s -// and returns the rune. -func (p *parser) parseEscape(s string) (r rune, rest string, err error) { - t := s[1:] - if t == "" { - return 0, "", &Error{ErrTrailingBackslash, ""} - } - c, t, err := nextRune(t) - if err != nil { - return 0, "", err - } - -Switch: - switch c { - default: - if c < utf8.RuneSelf && !isalnum(c) { - // Escaped non-word characters are always themselves. - // PCRE is not quite so rigorous: it accepts things like - // \q, but we don't. We once rejected \_, but too many - // programs and people insist on using it, so allow \_. - return c, t, nil - } - - // Octal escapes. - case '1', '2', '3', '4', '5', '6', '7': - // Single non-zero digit is a backreference; not supported - if t == "" || t[0] < '0' || t[0] > '7' { - break - } - fallthrough - case '0': - // Consume up to three octal digits; already have one. - r = c - '0' - for i := 1; i < 3; i++ { - if t == "" || t[0] < '0' || t[0] > '7' { - break - } - r = r*8 + rune(t[0]) - '0' - t = t[1:] - } - return r, t, nil - - // Hexadecimal escapes. - case 'x': - if t == "" { - break - } - if c, t, err = nextRune(t); err != nil { - return 0, "", err - } - if c == '{' { - // Any number of digits in braces. - // Perl accepts any text at all; it ignores all text - // after the first non-hex digit. We require only hex digits, - // and at least one. - nhex := 0 - r = 0 - for { - if t == "" { - break Switch - } - if c, t, err = nextRune(t); err != nil { - return 0, "", err - } - if c == '}' { - break - } - v := unhex(c) - if v < 0 { - break Switch - } - r = r*16 + v - if r > unicode.MaxRune { - break Switch - } - nhex++ - } - if nhex == 0 { - break Switch - } - return r, t, nil - } - - // Easy case: two hex digits. - x := unhex(c) - if c, t, err = nextRune(t); err != nil { - return 0, "", err - } - y := unhex(c) - if x < 0 || y < 0 { - break - } - return x*16 + y, t, nil - - // C escapes. There is no case 'b', to avoid misparsing - // the Perl word-boundary \b as the C backspace \b - // when in POSIX mode. In Perl, /\b/ means word-boundary - // but /[\b]/ means backspace. We don't support that. - // If you want a backspace, embed a literal backspace - // character or use \x08. - case 'a': - return '\a', t, err - case 'f': - return '\f', t, err - case 'n': - return '\n', t, err - case 'r': - return '\r', t, err - case 't': - return '\t', t, err - case 'v': - return '\v', t, err - } - return 0, "", &Error{ErrInvalidEscape, s[:len(s)-len(t)]} -} - -// parseClassChar parses a character class character at the beginning of s -// and returns it. -func (p *parser) parseClassChar(s, wholeClass string) (r rune, rest string, err error) { - if s == "" { - return 0, "", &Error{Code: ErrMissingBracket, Expr: wholeClass} - } - - // Allow regular escape sequences even though - // many need not be escaped in this context. - if s[0] == '\\' { - return p.parseEscape(s) - } - - return nextRune(s) -} - -type charGroup struct { - sign int - class []rune -} - -// parsePerlClassEscape parses a leading Perl character class escape like \d -// from the beginning of s. If one is present, it appends the characters to r -// and returns the new slice r and the remainder of the string. -func (p *parser) parsePerlClassEscape(s string, r []rune) (out []rune, rest string) { - if p.flags&PerlX == 0 || len(s) < 2 || s[0] != '\\' { - return - } - g := perlGroup[s[0:2]] - if g.sign == 0 { - return - } - return p.appendGroup(r, g), s[2:] -} - -// parseNamedClass parses a leading POSIX named character class like [:alnum:] -// from the beginning of s. If one is present, it appends the characters to r -// and returns the new slice r and the remainder of the string. -func (p *parser) parseNamedClass(s string, r []rune) (out []rune, rest string, err error) { - if len(s) < 2 || s[0] != '[' || s[1] != ':' { - return - } - - i := strings.Index(s[2:], ":]") - if i < 0 { - return - } - i += 2 - name, s := s[0:i+2], s[i+2:] - g := posixGroup[name] - if g.sign == 0 { - return nil, "", &Error{ErrInvalidCharRange, name} - } - return p.appendGroup(r, g), s, nil -} - -func (p *parser) appendGroup(r []rune, g charGroup) []rune { - if p.flags&FoldCase == 0 { - if g.sign < 0 { - r = appendNegatedClass(r, g.class) - } else { - r = appendClass(r, g.class) - } - } else { - tmp := p.tmpClass[:0] - tmp = appendFoldedClass(tmp, g.class) - p.tmpClass = tmp - tmp = cleanClass(&p.tmpClass) - if g.sign < 0 { - r = appendNegatedClass(r, tmp) - } else { - r = appendClass(r, tmp) - } - } - return r -} - -var anyTable = &unicode.RangeTable{ - R16: []unicode.Range16{{Lo: 0, Hi: 1<<16 - 1, Stride: 1}}, - R32: []unicode.Range32{{Lo: 1 << 16, Hi: unicode.MaxRune, Stride: 1}}, -} - -// unicodeTable returns the unicode.RangeTable identified by name -// and the table of additional fold-equivalent code points. -func unicodeTable(name string) (*unicode.RangeTable, *unicode.RangeTable) { - // Special case: "Any" means any. - if name == "Any" { - return anyTable, anyTable - } - if t := unicode.Categories[name]; t != nil { - return t, unicode.FoldCategory[name] - } - if t := unicode.Scripts[name]; t != nil { - return t, unicode.FoldScript[name] - } - return nil, nil -} - -// parseUnicodeClass parses a leading Unicode character class like \p{Han} -// from the beginning of s. If one is present, it appends the characters to r -// and returns the new slice r and the remainder of the string. -func (p *parser) parseUnicodeClass(s string, r []rune) (out []rune, rest string, err error) { - if p.flags&UnicodeGroups == 0 || len(s) < 2 || s[0] != '\\' || s[1] != 'p' && s[1] != 'P' { - return - } - - // Committed to parse or return error. - sign := +1 - if s[1] == 'P' { - sign = -1 - } - t := s[2:] - c, t, err := nextRune(t) - if err != nil { - return - } - var seq, name string - if c != '{' { - // Single-letter name. - seq = s[:len(s)-len(t)] - name = seq[2:] - } else { - // Name is in braces. - end := strings.IndexRune(s, '}') - if end < 0 { - if err = checkUTF8(s); err != nil { - return - } - return nil, "", &Error{ErrInvalidCharRange, s} - } - seq, t = s[:end+1], s[end+1:] - name = s[3:end] - if err = checkUTF8(name); err != nil { - return - } - } - - // Group can have leading negation too. \p{^Han} == \P{Han}, \P{^Han} == \p{Han}. - if name != "" && name[0] == '^' { - sign = -sign - name = name[1:] - } - - tab, fold := unicodeTable(name) - if tab == nil { - return nil, "", &Error{ErrInvalidCharRange, seq} - } - - if p.flags&FoldCase == 0 || fold == nil { - if sign > 0 { - r = appendTable(r, tab) - } else { - r = appendNegatedTable(r, tab) - } - } else { - // Merge and clean tab and fold in a temporary buffer. - // This is necessary for the negative case and just tidy - // for the positive case. - tmp := p.tmpClass[:0] - tmp = appendTable(tmp, tab) - tmp = appendTable(tmp, fold) - p.tmpClass = tmp - tmp = cleanClass(&p.tmpClass) - if sign > 0 { - r = appendClass(r, tmp) - } else { - r = appendNegatedClass(r, tmp) - } - } - return r, t, nil -} - -// parseClass parses a character class at the beginning of s -// and pushes it onto the parse stack. -func (p *parser) parseClass(s string) (rest string, err error) { - t := s[1:] // chop [ - re := p.newRegexp(OpCharClass) - re.Flags = p.flags - re.Rune = re.Rune0[:0] - - sign := +1 - if t != "" && t[0] == '^' { - sign = -1 - t = t[1:] - - // If character class does not match \n, add it here, - // so that negation later will do the right thing. - if p.flags&ClassNL == 0 { - re.Rune = append(re.Rune, '\n', '\n') - } - } - - class := re.Rune - first := true // ] and - are okay as first char in class - for t == "" || t[0] != ']' || first { - // POSIX: - is only okay unescaped as first or last in class. - // Perl: - is okay anywhere. - if t != "" && t[0] == '-' && p.flags&PerlX == 0 && !first && (len(t) == 1 || t[1] != ']') { - _, size := utf8.DecodeRuneInString(t[1:]) - return "", &Error{Code: ErrInvalidCharRange, Expr: t[:1+size]} - } - first = false - - // Look for POSIX [:alnum:] etc. - if len(t) > 2 && t[0] == '[' && t[1] == ':' { - nclass, nt, err := p.parseNamedClass(t, class) - if err != nil { - return "", err - } - if nclass != nil { - class, t = nclass, nt - continue - } - } - - // Look for Unicode character group like \p{Han}. - nclass, nt, err := p.parseUnicodeClass(t, class) - if err != nil { - return "", err - } - if nclass != nil { - class, t = nclass, nt - continue - } - - // Look for Perl character class symbols (extension). - if nclass, nt := p.parsePerlClassEscape(t, class); nclass != nil { - class, t = nclass, nt - continue - } - - // Single character or simple range. - rng := t - var lo, hi rune - if lo, t, err = p.parseClassChar(t, s); err != nil { - return "", err - } - hi = lo - // [a-] means (a|-) so check for final ]. - if len(t) >= 2 && t[0] == '-' && t[1] != ']' { - t = t[1:] - if hi, t, err = p.parseClassChar(t, s); err != nil { - return "", err - } - if hi < lo { - rng = rng[:len(rng)-len(t)] - return "", &Error{Code: ErrInvalidCharRange, Expr: rng} - } - } - if p.flags&FoldCase == 0 { - class = appendRange(class, lo, hi) - } else { - class = appendFoldedRange(class, lo, hi) - } - } - t = t[1:] // chop ] - - // Use &re.Rune instead of &class to avoid allocation. - re.Rune = class - class = cleanClass(&re.Rune) - if sign < 0 { - class = negateClass(class) - } - re.Rune = class - p.push(re) - return t, nil -} - -// cleanClass sorts the ranges (pairs of elements of r), -// merges them, and eliminates duplicates. -func cleanClass(rp *[]rune) []rune { - - // Sort by lo increasing, hi decreasing to break ties. - sort.Sort(ranges{rp}) - - r := *rp - if len(r) < 2 { - return r - } - - // Merge abutting, overlapping. - w := 2 // write index - for i := 2; i < len(r); i += 2 { - lo, hi := r[i], r[i+1] - if lo <= r[w-1]+1 { - // merge with previous range - if hi > r[w-1] { - r[w-1] = hi - } - continue - } - // new disjoint range - r[w] = lo - r[w+1] = hi - w += 2 - } - - return r[:w] -} - -// appendLiteral returns the result of appending the literal x to the class r. -func appendLiteral(r []rune, x rune, flags Flags) []rune { - if flags&FoldCase != 0 { - return appendFoldedRange(r, x, x) - } - return appendRange(r, x, x) -} - -// appendRange returns the result of appending the range lo-hi to the class r. -func appendRange(r []rune, lo, hi rune) []rune { - // Expand last range or next to last range if it overlaps or abuts. - // Checking two ranges helps when appending case-folded - // alphabets, so that one range can be expanding A-Z and the - // other expanding a-z. - n := len(r) - for i := 2; i <= 4; i += 2 { // twice, using i=2, i=4 - if n >= i { - rlo, rhi := r[n-i], r[n-i+1] - if lo <= rhi+1 && rlo <= hi+1 { - if lo < rlo { - r[n-i] = lo - } - if hi > rhi { - r[n-i+1] = hi - } - return r - } - } - } - - return append(r, lo, hi) -} - -const ( - // minimum and maximum runes involved in folding. - // checked during test. - minFold = 0x0041 - maxFold = 0x1044f -) - -// appendFoldedRange returns the result of appending the range lo-hi -// and its case folding-equivalent runes to the class r. -func appendFoldedRange(r []rune, lo, hi rune) []rune { - // Optimizations. - if lo <= minFold && hi >= maxFold { - // Range is full: folding can't add more. - return appendRange(r, lo, hi) - } - if hi < minFold || lo > maxFold { - // Range is outside folding possibilities. - return appendRange(r, lo, hi) - } - if lo < minFold { - // [lo, minFold-1] needs no folding. - r = appendRange(r, lo, minFold-1) - lo = minFold - } - if hi > maxFold { - // [maxFold+1, hi] needs no folding. - r = appendRange(r, maxFold+1, hi) - hi = maxFold - } - - // Brute force. Depend on appendRange to coalesce ranges on the fly. - for c := lo; c <= hi; c++ { - r = appendRange(r, c, c) - f := unicode.SimpleFold(c) - for f != c { - r = appendRange(r, f, f) - f = unicode.SimpleFold(f) - } - } - return r -} - -// appendClass returns the result of appending the class x to the class r. -// It assume x is clean. -func appendClass(r []rune, x []rune) []rune { - for i := 0; i < len(x); i += 2 { - r = appendRange(r, x[i], x[i+1]) - } - return r -} - -// appendFolded returns the result of appending the case folding of the class x to the class r. -func appendFoldedClass(r []rune, x []rune) []rune { - for i := 0; i < len(x); i += 2 { - r = appendFoldedRange(r, x[i], x[i+1]) - } - return r -} - -// appendNegatedClass returns the result of appending the negation of the class x to the class r. -// It assumes x is clean. -func appendNegatedClass(r []rune, x []rune) []rune { - nextLo := '\u0000' - for i := 0; i < len(x); i += 2 { - lo, hi := x[i], x[i+1] - if nextLo <= lo-1 { - r = appendRange(r, nextLo, lo-1) - } - nextLo = hi + 1 - } - if nextLo <= unicode.MaxRune { - r = appendRange(r, nextLo, unicode.MaxRune) - } - return r -} - -// appendTable returns the result of appending x to the class r. -func appendTable(r []rune, x *unicode.RangeTable) []rune { - for _, xr := range x.R16 { - lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride) - if stride == 1 { - r = appendRange(r, lo, hi) - continue - } - for c := lo; c <= hi; c += stride { - r = appendRange(r, c, c) - } - } - for _, xr := range x.R32 { - lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride) - if stride == 1 { - r = appendRange(r, lo, hi) - continue - } - for c := lo; c <= hi; c += stride { - r = appendRange(r, c, c) - } - } - return r -} - -// appendNegatedTable returns the result of appending the negation of x to the class r. -func appendNegatedTable(r []rune, x *unicode.RangeTable) []rune { - nextLo := '\u0000' // lo end of next class to add - for _, xr := range x.R16 { - lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride) - if stride == 1 { - if nextLo <= lo-1 { - r = appendRange(r, nextLo, lo-1) - } - nextLo = hi + 1 - continue - } - for c := lo; c <= hi; c += stride { - if nextLo <= c-1 { - r = appendRange(r, nextLo, c-1) - } - nextLo = c + 1 - } - } - for _, xr := range x.R32 { - lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride) - if stride == 1 { - if nextLo <= lo-1 { - r = appendRange(r, nextLo, lo-1) - } - nextLo = hi + 1 - continue - } - for c := lo; c <= hi; c += stride { - if nextLo <= c-1 { - r = appendRange(r, nextLo, c-1) - } - nextLo = c + 1 - } - } - if nextLo <= unicode.MaxRune { - r = appendRange(r, nextLo, unicode.MaxRune) - } - return r -} - -// negateClass overwrites r and returns r's negation. -// It assumes the class r is already clean. -func negateClass(r []rune) []rune { - nextLo := '\u0000' // lo end of next class to add - w := 0 // write index - for i := 0; i < len(r); i += 2 { - lo, hi := r[i], r[i+1] - if nextLo <= lo-1 { - r[w] = nextLo - r[w+1] = lo - 1 - w += 2 - } - nextLo = hi + 1 - } - r = r[:w] - if nextLo <= unicode.MaxRune { - // It's possible for the negation to have one more - // range - this one - than the original class, so use append. - r = append(r, nextLo, unicode.MaxRune) - } - return r -} - -// ranges implements sort.Interface on a []rune. -// The choice of receiver type definition is strange -// but avoids an allocation since we already have -// a *[]rune. -type ranges struct { - p *[]rune -} - -func (ra ranges) Less(i, j int) bool { - p := *ra.p - i *= 2 - j *= 2 - return p[i] < p[j] || p[i] == p[j] && p[i+1] > p[j+1] -} - -func (ra ranges) Len() int { - return len(*ra.p) / 2 -} - -func (ra ranges) Swap(i, j int) { - p := *ra.p - i *= 2 - j *= 2 - p[i], p[i+1], p[j], p[j+1] = p[j], p[j+1], p[i], p[i+1] -} - -func checkUTF8(s string) error { - for s != "" { - rune, size := utf8.DecodeRuneInString(s) - if rune == utf8.RuneError && size == 1 { - return &Error{Code: ErrInvalidUTF8, Expr: s} - } - s = s[size:] - } - return nil -} - -func nextRune(s string) (c rune, t string, err error) { - c, size := utf8.DecodeRuneInString(s) - if c == utf8.RuneError && size == 1 { - return 0, "", &Error{Code: ErrInvalidUTF8, Expr: s} - } - return c, s[size:], nil -} - -func isalnum(c rune) bool { - return '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' -} - -func unhex(c rune) rune { - if '0' <= c && c <= '9' { - return c - '0' - } - if 'a' <= c && c <= 'f' { - return c - 'a' + 10 - } - if 'A' <= c && c <= 'F' { - return c - 'A' + 10 - } - return -1 -} diff --git a/src/pkg/regexp/syntax/parse_test.go b/src/pkg/regexp/syntax/parse_test.go deleted file mode 100644 index f3089294c..000000000 --- a/src/pkg/regexp/syntax/parse_test.go +++ /dev/null @@ -1,559 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package syntax - -import ( - "bytes" - "fmt" - "testing" - "unicode" -) - -type parseTest struct { - Regexp string - Dump string -} - -var parseTests = []parseTest{ - // Base cases - {`a`, `lit{a}`}, - {`a.`, `cat{lit{a}dot{}}`}, - {`a.b`, `cat{lit{a}dot{}lit{b}}`}, - {`ab`, `str{ab}`}, - {`a.b.c`, `cat{lit{a}dot{}lit{b}dot{}lit{c}}`}, - {`abc`, `str{abc}`}, - {`a|^`, `alt{lit{a}bol{}}`}, - {`a|b`, `cc{0x61-0x62}`}, - {`(a)`, `cap{lit{a}}`}, - {`(a)|b`, `alt{cap{lit{a}}lit{b}}`}, - {`a*`, `star{lit{a}}`}, - {`a+`, `plus{lit{a}}`}, - {`a?`, `que{lit{a}}`}, - {`a{2}`, `rep{2,2 lit{a}}`}, - {`a{2,3}`, `rep{2,3 lit{a}}`}, - {`a{2,}`, `rep{2,-1 lit{a}}`}, - {`a*?`, `nstar{lit{a}}`}, - {`a+?`, `nplus{lit{a}}`}, - {`a??`, `nque{lit{a}}`}, - {`a{2}?`, `nrep{2,2 lit{a}}`}, - {`a{2,3}?`, `nrep{2,3 lit{a}}`}, - {`a{2,}?`, `nrep{2,-1 lit{a}}`}, - // Malformed { } are treated as literals. - {`x{1001`, `str{x{1001}`}, - {`x{9876543210`, `str{x{9876543210}`}, - {`x{9876543210,`, `str{x{9876543210,}`}, - {`x{2,1`, `str{x{2,1}`}, - {`x{1,9876543210`, `str{x{1,9876543210}`}, - {``, `emp{}`}, - {`|`, `emp{}`}, // alt{emp{}emp{}} but got factored - {`|x|`, `alt{emp{}lit{x}emp{}}`}, - {`.`, `dot{}`}, - {`^`, `bol{}`}, - {`$`, `eol{}`}, - {`\|`, `lit{|}`}, - {`\(`, `lit{(}`}, - {`\)`, `lit{)}`}, - {`\*`, `lit{*}`}, - {`\+`, `lit{+}`}, - {`\?`, `lit{?}`}, - {`{`, `lit{{}`}, - {`}`, `lit{}}`}, - {`\.`, `lit{.}`}, - {`\^`, `lit{^}`}, - {`\$`, `lit{$}`}, - {`\\`, `lit{\}`}, - {`[ace]`, `cc{0x61 0x63 0x65}`}, - {`[abc]`, `cc{0x61-0x63}`}, - {`[a-z]`, `cc{0x61-0x7a}`}, - {`[a]`, `lit{a}`}, - {`\-`, `lit{-}`}, - {`-`, `lit{-}`}, - {`\_`, `lit{_}`}, - {`abc`, `str{abc}`}, - {`abc|def`, `alt{str{abc}str{def}}`}, - {`abc|def|ghi`, `alt{str{abc}str{def}str{ghi}}`}, - - // Posix and Perl extensions - {`[[:lower:]]`, `cc{0x61-0x7a}`}, - {`[a-z]`, `cc{0x61-0x7a}`}, - {`[^[:lower:]]`, `cc{0x0-0x60 0x7b-0x10ffff}`}, - {`[[:^lower:]]`, `cc{0x0-0x60 0x7b-0x10ffff}`}, - {`(?i)[[:lower:]]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`}, - {`(?i)[a-z]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`}, - {`(?i)[^[:lower:]]`, `cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}`}, - {`(?i)[[:^lower:]]`, `cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}`}, - {`\d`, `cc{0x30-0x39}`}, - {`\D`, `cc{0x0-0x2f 0x3a-0x10ffff}`}, - {`\s`, `cc{0x9-0xa 0xc-0xd 0x20}`}, - {`\S`, `cc{0x0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}`}, - {`\w`, `cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a}`}, - {`\W`, `cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x10ffff}`}, - {`(?i)\w`, `cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a 0x17f 0x212a}`}, - {`(?i)\W`, `cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}`}, - {`[^\\]`, `cc{0x0-0x5b 0x5d-0x10ffff}`}, - // { `\C`, `byte{}` }, // probably never - - // Unicode, negatives, and a double negative. - {`\p{Braille}`, `cc{0x2800-0x28ff}`}, - {`\P{Braille}`, `cc{0x0-0x27ff 0x2900-0x10ffff}`}, - {`\p{^Braille}`, `cc{0x0-0x27ff 0x2900-0x10ffff}`}, - {`\P{^Braille}`, `cc{0x2800-0x28ff}`}, - {`\pZ`, `cc{0x20 0xa0 0x1680 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}`}, - {`[\p{Braille}]`, `cc{0x2800-0x28ff}`}, - {`[\P{Braille}]`, `cc{0x0-0x27ff 0x2900-0x10ffff}`}, - {`[\p{^Braille}]`, `cc{0x0-0x27ff 0x2900-0x10ffff}`}, - {`[\P{^Braille}]`, `cc{0x2800-0x28ff}`}, - {`[\pZ]`, `cc{0x20 0xa0 0x1680 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}`}, - {`\p{Lu}`, mkCharClass(unicode.IsUpper)}, - {`[\p{Lu}]`, mkCharClass(unicode.IsUpper)}, - {`(?i)[\p{Lu}]`, mkCharClass(isUpperFold)}, - {`\p{Any}`, `dot{}`}, - {`\p{^Any}`, `cc{}`}, - - // Hex, octal. - {`[\012-\234]\141`, `cat{cc{0xa-0x9c}lit{a}}`}, - {`[\x{41}-\x7a]\x61`, `cat{cc{0x41-0x7a}lit{a}}`}, - - // More interesting regular expressions. - {`a{,2}`, `str{a{,2}}`}, - {`\.\^\$\\`, `str{.^$\}`}, - {`[a-zABC]`, `cc{0x41-0x43 0x61-0x7a}`}, - {`[^a]`, `cc{0x0-0x60 0x62-0x10ffff}`}, - {`[α-ε☺]`, `cc{0x3b1-0x3b5 0x263a}`}, // utf-8 - {`a*{`, `cat{star{lit{a}}lit{{}}`}, - - // Test precedences - {`(?:ab)*`, `star{str{ab}}`}, - {`(ab)*`, `star{cap{str{ab}}}`}, - {`ab|cd`, `alt{str{ab}str{cd}}`}, - {`a(b|c)d`, `cat{lit{a}cap{cc{0x62-0x63}}lit{d}}`}, - - // Test flattening. - {`(?:a)`, `lit{a}`}, - {`(?:ab)(?:cd)`, `str{abcd}`}, - {`(?:a+b+)(?:c+d+)`, `cat{plus{lit{a}}plus{lit{b}}plus{lit{c}}plus{lit{d}}}`}, - {`(?:a+|b+)|(?:c+|d+)`, `alt{plus{lit{a}}plus{lit{b}}plus{lit{c}}plus{lit{d}}}`}, - {`(?:a|b)|(?:c|d)`, `cc{0x61-0x64}`}, - {`a|.`, `dot{}`}, - {`.|a`, `dot{}`}, - {`(?:[abc]|A|Z|hello|world)`, `alt{cc{0x41 0x5a 0x61-0x63}str{hello}str{world}}`}, - {`(?:[abc]|A|Z)`, `cc{0x41 0x5a 0x61-0x63}`}, - - // Test Perl quoted literals - {`\Q+|*?{[\E`, `str{+|*?{[}`}, - {`\Q+\E+`, `plus{lit{+}}`}, - {`\Q\\E`, `lit{\}`}, - {`\Q\\\E`, `str{\\}`}, - - // Test Perl \A and \z - {`(?m)^`, `bol{}`}, - {`(?m)$`, `eol{}`}, - {`(?-m)^`, `bot{}`}, - {`(?-m)$`, `eot{}`}, - {`(?m)\A`, `bot{}`}, - {`(?m)\z`, `eot{\z}`}, - {`(?-m)\A`, `bot{}`}, - {`(?-m)\z`, `eot{\z}`}, - - // Test named captures - {`(?P<name>a)`, `cap{name:lit{a}}`}, - - // Case-folded literals - {`[Aa]`, `litfold{A}`}, - {`[\x{100}\x{101}]`, `litfold{Ā}`}, - {`[Δδ]`, `litfold{Δ}`}, - - // Strings - {`abcde`, `str{abcde}`}, - {`[Aa][Bb]cd`, `cat{strfold{AB}str{cd}}`}, - - // Factoring. - {`abc|abd|aef|bcx|bcy`, `alt{cat{lit{a}alt{cat{lit{b}cc{0x63-0x64}}str{ef}}}cat{str{bc}cc{0x78-0x79}}}`}, - {`ax+y|ax+z|ay+w`, `cat{lit{a}alt{cat{plus{lit{x}}cc{0x79-0x7a}}cat{plus{lit{y}}lit{w}}}}`}, - - // Bug fixes. - {`(?:.)`, `dot{}`}, - {`(?:x|(?:xa))`, `cat{lit{x}alt{emp{}lit{a}}}`}, - {`(?:.|(?:.a))`, `cat{dot{}alt{emp{}lit{a}}}`}, - {`(?:A(?:A|a))`, `cat{lit{A}litfold{A}}`}, - {`(?:A|a)`, `litfold{A}`}, - {`A|(?:A|a)`, `litfold{A}`}, - {`(?s).`, `dot{}`}, - {`(?-s).`, `dnl{}`}, - {`(?:(?:^).)`, `cat{bol{}dot{}}`}, - {`(?-s)(?:(?:^).)`, `cat{bol{}dnl{}}`}, - - // RE2 prefix_tests - {`abc|abd`, `cat{str{ab}cc{0x63-0x64}}`}, - {`a(?:b)c|abd`, `cat{str{ab}cc{0x63-0x64}}`}, - {`abc|abd|aef|bcx|bcy`, - `alt{cat{lit{a}alt{cat{lit{b}cc{0x63-0x64}}str{ef}}}` + - `cat{str{bc}cc{0x78-0x79}}}`}, - {`abc|x|abd`, `alt{str{abc}lit{x}str{abd}}`}, - {`(?i)abc|ABD`, `cat{strfold{AB}cc{0x43-0x44 0x63-0x64}}`}, - {`[ab]c|[ab]d`, `cat{cc{0x61-0x62}cc{0x63-0x64}}`}, - {`(?:xx|yy)c|(?:xx|yy)d`, - `cat{alt{str{xx}str{yy}}cc{0x63-0x64}}`}, - {`x{2}|x{2}[0-9]`, - `cat{rep{2,2 lit{x}}alt{emp{}cc{0x30-0x39}}}`}, - {`x{2}y|x{2}[0-9]y`, - `cat{rep{2,2 lit{x}}alt{lit{y}cat{cc{0x30-0x39}lit{y}}}}`}, -} - -const testFlags = MatchNL | PerlX | UnicodeGroups - -func TestParseSimple(t *testing.T) { - testParseDump(t, parseTests, testFlags) -} - -var foldcaseTests = []parseTest{ - {`AbCdE`, `strfold{ABCDE}`}, - {`[Aa]`, `litfold{A}`}, - {`a`, `litfold{A}`}, - - // 0x17F is an old English long s (looks like an f) and folds to s. - // 0x212A is the Kelvin symbol and folds to k. - {`A[F-g]`, `cat{litfold{A}cc{0x41-0x7a 0x17f 0x212a}}`}, // [Aa][A-z...] - {`[[:upper:]]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`}, - {`[[:lower:]]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`}, -} - -func TestParseFoldCase(t *testing.T) { - testParseDump(t, foldcaseTests, FoldCase) -} - -var literalTests = []parseTest{ - {"(|)^$.[*+?]{5,10},\\", "str{(|)^$.[*+?]{5,10},\\}"}, -} - -func TestParseLiteral(t *testing.T) { - testParseDump(t, literalTests, Literal) -} - -var matchnlTests = []parseTest{ - {`.`, `dot{}`}, - {"\n", "lit{\n}"}, - {`[^a]`, `cc{0x0-0x60 0x62-0x10ffff}`}, - {`[a\n]`, `cc{0xa 0x61}`}, -} - -func TestParseMatchNL(t *testing.T) { - testParseDump(t, matchnlTests, MatchNL) -} - -var nomatchnlTests = []parseTest{ - {`.`, `dnl{}`}, - {"\n", "lit{\n}"}, - {`[^a]`, `cc{0x0-0x9 0xb-0x60 0x62-0x10ffff}`}, - {`[a\n]`, `cc{0xa 0x61}`}, -} - -func TestParseNoMatchNL(t *testing.T) { - testParseDump(t, nomatchnlTests, 0) -} - -// Test Parse -> Dump. -func testParseDump(t *testing.T, tests []parseTest, flags Flags) { - for _, tt := range tests { - re, err := Parse(tt.Regexp, flags) - if err != nil { - t.Errorf("Parse(%#q): %v", tt.Regexp, err) - continue - } - d := dump(re) - if d != tt.Dump { - t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump) - } - } -} - -// dump prints a string representation of the regexp showing -// the structure explicitly. -func dump(re *Regexp) string { - var b bytes.Buffer - dumpRegexp(&b, re) - return b.String() -} - -var opNames = []string{ - OpNoMatch: "no", - OpEmptyMatch: "emp", - OpLiteral: "lit", - OpCharClass: "cc", - OpAnyCharNotNL: "dnl", - OpAnyChar: "dot", - OpBeginLine: "bol", - OpEndLine: "eol", - OpBeginText: "bot", - OpEndText: "eot", - OpWordBoundary: "wb", - OpNoWordBoundary: "nwb", - OpCapture: "cap", - OpStar: "star", - OpPlus: "plus", - OpQuest: "que", - OpRepeat: "rep", - OpConcat: "cat", - OpAlternate: "alt", -} - -// dumpRegexp writes an encoding of the syntax tree for the regexp re to b. -// It is used during testing to distinguish between parses that might print -// the same using re's String method. -func dumpRegexp(b *bytes.Buffer, re *Regexp) { - if int(re.Op) >= len(opNames) || opNames[re.Op] == "" { - fmt.Fprintf(b, "op%d", re.Op) - } else { - switch re.Op { - default: - b.WriteString(opNames[re.Op]) - case OpStar, OpPlus, OpQuest, OpRepeat: - if re.Flags&NonGreedy != 0 { - b.WriteByte('n') - } - b.WriteString(opNames[re.Op]) - case OpLiteral: - if len(re.Rune) > 1 { - b.WriteString("str") - } else { - b.WriteString("lit") - } - if re.Flags&FoldCase != 0 { - for _, r := range re.Rune { - if unicode.SimpleFold(r) != r { - b.WriteString("fold") - break - } - } - } - } - } - b.WriteByte('{') - switch re.Op { - case OpEndText: - if re.Flags&WasDollar == 0 { - b.WriteString(`\z`) - } - case OpLiteral: - for _, r := range re.Rune { - b.WriteRune(r) - } - case OpConcat, OpAlternate: - for _, sub := range re.Sub { - dumpRegexp(b, sub) - } - case OpStar, OpPlus, OpQuest: - dumpRegexp(b, re.Sub[0]) - case OpRepeat: - fmt.Fprintf(b, "%d,%d ", re.Min, re.Max) - dumpRegexp(b, re.Sub[0]) - case OpCapture: - if re.Name != "" { - b.WriteString(re.Name) - b.WriteByte(':') - } - dumpRegexp(b, re.Sub[0]) - case OpCharClass: - sep := "" - for i := 0; i < len(re.Rune); i += 2 { - b.WriteString(sep) - sep = " " - lo, hi := re.Rune[i], re.Rune[i+1] - if lo == hi { - fmt.Fprintf(b, "%#x", lo) - } else { - fmt.Fprintf(b, "%#x-%#x", lo, hi) - } - } - } - b.WriteByte('}') -} - -func mkCharClass(f func(rune) bool) string { - re := &Regexp{Op: OpCharClass} - lo := rune(-1) - for i := rune(0); i <= unicode.MaxRune; i++ { - if f(i) { - if lo < 0 { - lo = i - } - } else { - if lo >= 0 { - re.Rune = append(re.Rune, lo, i-1) - lo = -1 - } - } - } - if lo >= 0 { - re.Rune = append(re.Rune, lo, unicode.MaxRune) - } - return dump(re) -} - -func isUpperFold(r rune) bool { - if unicode.IsUpper(r) { - return true - } - c := unicode.SimpleFold(r) - for c != r { - if unicode.IsUpper(c) { - return true - } - c = unicode.SimpleFold(c) - } - return false -} - -func TestFoldConstants(t *testing.T) { - last := rune(-1) - for i := rune(0); i <= unicode.MaxRune; i++ { - if unicode.SimpleFold(i) == i { - continue - } - if last == -1 && minFold != i { - t.Errorf("minFold=%#U should be %#U", minFold, i) - } - last = i - } - if maxFold != last { - t.Errorf("maxFold=%#U should be %#U", maxFold, last) - } -} - -func TestAppendRangeCollapse(t *testing.T) { - // AppendRange should collapse each of the new ranges - // into the earlier ones (it looks back two ranges), so that - // the slice never grows very large. - // Note that we are not calling cleanClass. - var r []rune - for i := rune('A'); i <= 'Z'; i++ { - r = appendRange(r, i, i) - r = appendRange(r, i+'a'-'A', i+'a'-'A') - } - if string(r) != "AZaz" { - t.Errorf("appendRange interlaced A-Z a-z = %s, want AZaz", string(r)) - } -} - -var invalidRegexps = []string{ - `(`, - `)`, - `(a`, - `a)`, - `(a))`, - `(a|b|`, - `a|b|)`, - `(a|b|))`, - `(a|b`, - `a|b)`, - `(a|b))`, - `[a-z`, - `([a-z)`, - `[a-z)`, - `([a-z]))`, - `x{1001}`, - `x{9876543210}`, - `x{2,1}`, - `x{1,9876543210}`, - "\xff", // Invalid UTF-8 - "[\xff]", - "[\\\xff]", - "\\\xff", - `(?P<name>a`, - `(?P<name>`, - `(?P<name`, - `(?P<x y>a)`, - `(?P<>a)`, - `[a-Z]`, - `(?i)[a-Z]`, - `a{100000}`, - `a{100000,}`, -} - -var onlyPerl = []string{ - `[a-b-c]`, - `\Qabc\E`, - `\Q*+?{[\E`, - `\Q\\E`, - `\Q\\\E`, - `\Q\\\\E`, - `\Q\\\\\E`, - `(?:a)`, - `(?P<name>a)`, -} - -var onlyPOSIX = []string{ - "a++", - "a**", - "a?*", - "a+*", - "a{1}*", - ".{1}{2}.{3}", -} - -func TestParseInvalidRegexps(t *testing.T) { - for _, regexp := range invalidRegexps { - if re, err := Parse(regexp, Perl); err == nil { - t.Errorf("Parse(%#q, Perl) = %s, should have failed", regexp, dump(re)) - } - if re, err := Parse(regexp, POSIX); err == nil { - t.Errorf("Parse(%#q, POSIX) = %s, should have failed", regexp, dump(re)) - } - } - for _, regexp := range onlyPerl { - if _, err := Parse(regexp, Perl); err != nil { - t.Errorf("Parse(%#q, Perl): %v", regexp, err) - } - if re, err := Parse(regexp, POSIX); err == nil { - t.Errorf("Parse(%#q, POSIX) = %s, should have failed", regexp, dump(re)) - } - } - for _, regexp := range onlyPOSIX { - if re, err := Parse(regexp, Perl); err == nil { - t.Errorf("Parse(%#q, Perl) = %s, should have failed", regexp, dump(re)) - } - if _, err := Parse(regexp, POSIX); err != nil { - t.Errorf("Parse(%#q, POSIX): %v", regexp, err) - } - } -} - -func TestToStringEquivalentParse(t *testing.T) { - for _, tt := range parseTests { - re, err := Parse(tt.Regexp, testFlags) - if err != nil { - t.Errorf("Parse(%#q): %v", tt.Regexp, err) - continue - } - d := dump(re) - if d != tt.Dump { - t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump) - continue - } - - s := re.String() - if s != tt.Regexp { - // If ToString didn't return the original regexp, - // it must have found one with fewer parens. - // Unfortunately we can't check the length here, because - // ToString produces "\\{" for a literal brace, - // but "{" is a shorter equivalent in some contexts. - nre, err := Parse(s, testFlags) - if err != nil { - t.Errorf("Parse(%#q.String() = %#q): %v", tt.Regexp, s, err) - continue - } - nd := dump(nre) - if d != nd { - t.Errorf("Parse(%#q) -> %#q; %#q vs %#q", tt.Regexp, s, d, nd) - } - - ns := nre.String() - if s != ns { - t.Errorf("Parse(%#q) -> %#q -> %#q", tt.Regexp, s, ns) - } - } - } -} diff --git a/src/pkg/regexp/syntax/perl_groups.go b/src/pkg/regexp/syntax/perl_groups.go deleted file mode 100644 index effe4e686..000000000 --- a/src/pkg/regexp/syntax/perl_groups.go +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// GENERATED BY make_perl_groups.pl; DO NOT EDIT. -// make_perl_groups.pl >perl_groups.go - -package syntax - -var code1 = []rune{ /* \d */ - 0x30, 0x39, -} - -var code2 = []rune{ /* \s */ - 0x9, 0xa, - 0xc, 0xd, - 0x20, 0x20, -} - -var code3 = []rune{ /* \w */ - 0x30, 0x39, - 0x41, 0x5a, - 0x5f, 0x5f, - 0x61, 0x7a, -} - -var perlGroup = map[string]charGroup{ - `\d`: {+1, code1}, - `\D`: {-1, code1}, - `\s`: {+1, code2}, - `\S`: {-1, code2}, - `\w`: {+1, code3}, - `\W`: {-1, code3}, -} -var code4 = []rune{ /* [:alnum:] */ - 0x30, 0x39, - 0x41, 0x5a, - 0x61, 0x7a, -} - -var code5 = []rune{ /* [:alpha:] */ - 0x41, 0x5a, - 0x61, 0x7a, -} - -var code6 = []rune{ /* [:ascii:] */ - 0x0, 0x7f, -} - -var code7 = []rune{ /* [:blank:] */ - 0x9, 0x9, - 0x20, 0x20, -} - -var code8 = []rune{ /* [:cntrl:] */ - 0x0, 0x1f, - 0x7f, 0x7f, -} - -var code9 = []rune{ /* [:digit:] */ - 0x30, 0x39, -} - -var code10 = []rune{ /* [:graph:] */ - 0x21, 0x7e, -} - -var code11 = []rune{ /* [:lower:] */ - 0x61, 0x7a, -} - -var code12 = []rune{ /* [:print:] */ - 0x20, 0x7e, -} - -var code13 = []rune{ /* [:punct:] */ - 0x21, 0x2f, - 0x3a, 0x40, - 0x5b, 0x60, - 0x7b, 0x7e, -} - -var code14 = []rune{ /* [:space:] */ - 0x9, 0xd, - 0x20, 0x20, -} - -var code15 = []rune{ /* [:upper:] */ - 0x41, 0x5a, -} - -var code16 = []rune{ /* [:word:] */ - 0x30, 0x39, - 0x41, 0x5a, - 0x5f, 0x5f, - 0x61, 0x7a, -} - -var code17 = []rune{ /* [:xdigit:] */ - 0x30, 0x39, - 0x41, 0x46, - 0x61, 0x66, -} - -var posixGroup = map[string]charGroup{ - `[:alnum:]`: {+1, code4}, - `[:^alnum:]`: {-1, code4}, - `[:alpha:]`: {+1, code5}, - `[:^alpha:]`: {-1, code5}, - `[:ascii:]`: {+1, code6}, - `[:^ascii:]`: {-1, code6}, - `[:blank:]`: {+1, code7}, - `[:^blank:]`: {-1, code7}, - `[:cntrl:]`: {+1, code8}, - `[:^cntrl:]`: {-1, code8}, - `[:digit:]`: {+1, code9}, - `[:^digit:]`: {-1, code9}, - `[:graph:]`: {+1, code10}, - `[:^graph:]`: {-1, code10}, - `[:lower:]`: {+1, code11}, - `[:^lower:]`: {-1, code11}, - `[:print:]`: {+1, code12}, - `[:^print:]`: {-1, code12}, - `[:punct:]`: {+1, code13}, - `[:^punct:]`: {-1, code13}, - `[:space:]`: {+1, code14}, - `[:^space:]`: {-1, code14}, - `[:upper:]`: {+1, code15}, - `[:^upper:]`: {-1, code15}, - `[:word:]`: {+1, code16}, - `[:^word:]`: {-1, code16}, - `[:xdigit:]`: {+1, code17}, - `[:^xdigit:]`: {-1, code17}, -} diff --git a/src/pkg/regexp/syntax/prog.go b/src/pkg/regexp/syntax/prog.go deleted file mode 100644 index 29bd282d0..000000000 --- a/src/pkg/regexp/syntax/prog.go +++ /dev/null @@ -1,345 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package syntax - -import ( - "bytes" - "strconv" - "unicode" -) - -// Compiled program. -// May not belong in this package, but convenient for now. - -// A Prog is a compiled regular expression program. -type Prog struct { - Inst []Inst - Start int // index of start instruction - NumCap int // number of InstCapture insts in re -} - -// An InstOp is an instruction opcode. -type InstOp uint8 - -const ( - InstAlt InstOp = iota - InstAltMatch - InstCapture - InstEmptyWidth - InstMatch - InstFail - InstNop - InstRune - InstRune1 - InstRuneAny - InstRuneAnyNotNL -) - -var instOpNames = []string{ - "InstAlt", - "InstAltMatch", - "InstCapture", - "InstEmptyWidth", - "InstMatch", - "InstFail", - "InstNop", - "InstRune", - "InstRune1", - "InstRuneAny", - "InstRuneAnyNotNL", -} - -func (i InstOp) String() string { - if uint(i) >= uint(len(instOpNames)) { - return "" - } - return instOpNames[i] -} - -// An EmptyOp specifies a kind or mixture of zero-width assertions. -type EmptyOp uint8 - -const ( - EmptyBeginLine EmptyOp = 1 << iota - EmptyEndLine - EmptyBeginText - EmptyEndText - EmptyWordBoundary - EmptyNoWordBoundary -) - -// EmptyOpContext returns the zero-width assertions -// satisfied at the position between the runes r1 and r2. -// Passing r1 == -1 indicates that the position is -// at the beginning of the text. -// Passing r2 == -1 indicates that the position is -// at the end of the text. -func EmptyOpContext(r1, r2 rune) EmptyOp { - var op EmptyOp = EmptyNoWordBoundary - var boundary byte - switch { - case IsWordChar(r1): - boundary = 1 - case r1 == '\n': - op |= EmptyBeginLine - case r1 < 0: - op |= EmptyBeginText | EmptyBeginLine - } - switch { - case IsWordChar(r2): - boundary ^= 1 - case r2 == '\n': - op |= EmptyEndLine - case r2 < 0: - op |= EmptyEndText | EmptyEndLine - } - if boundary != 0 { // IsWordChar(r1) != IsWordChar(r2) - op ^= (EmptyWordBoundary | EmptyNoWordBoundary) - } - return op -} - -// IsWordChar reports whether r is consider a ``word character'' -// during the evaluation of the \b and \B zero-width assertions. -// These assertions are ASCII-only: the word characters are [A-Za-z0-9_]. -func IsWordChar(r rune) bool { - return 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' || '0' <= r && r <= '9' || r == '_' -} - -// An Inst is a single instruction in a regular expression program. -type Inst struct { - Op InstOp - Out uint32 // all but InstMatch, InstFail - Arg uint32 // InstAlt, InstAltMatch, InstCapture, InstEmptyWidth - Rune []rune -} - -func (p *Prog) String() string { - var b bytes.Buffer - dumpProg(&b, p) - return b.String() -} - -// skipNop follows any no-op or capturing instructions -// and returns the resulting pc. -func (p *Prog) skipNop(pc uint32) (*Inst, uint32) { - i := &p.Inst[pc] - for i.Op == InstNop || i.Op == InstCapture { - pc = i.Out - i = &p.Inst[pc] - } - return i, pc -} - -// op returns i.Op but merges all the Rune special cases into InstRune -func (i *Inst) op() InstOp { - op := i.Op - switch op { - case InstRune1, InstRuneAny, InstRuneAnyNotNL: - op = InstRune - } - return op -} - -// Prefix returns a literal string that all matches for the -// regexp must start with. Complete is true if the prefix -// is the entire match. -func (p *Prog) Prefix() (prefix string, complete bool) { - i, _ := p.skipNop(uint32(p.Start)) - - // Avoid allocation of buffer if prefix is empty. - if i.op() != InstRune || len(i.Rune) != 1 { - return "", i.Op == InstMatch - } - - // Have prefix; gather characters. - var buf bytes.Buffer - for i.op() == InstRune && len(i.Rune) == 1 && Flags(i.Arg)&FoldCase == 0 { - buf.WriteRune(i.Rune[0]) - i, _ = p.skipNop(i.Out) - } - return buf.String(), i.Op == InstMatch -} - -// StartCond returns the leading empty-width conditions that must -// be true in any match. It returns ^EmptyOp(0) if no matches are possible. -func (p *Prog) StartCond() EmptyOp { - var flag EmptyOp - pc := uint32(p.Start) - i := &p.Inst[pc] -Loop: - for { - switch i.Op { - case InstEmptyWidth: - flag |= EmptyOp(i.Arg) - case InstFail: - return ^EmptyOp(0) - case InstCapture, InstNop: - // skip - default: - break Loop - } - pc = i.Out - i = &p.Inst[pc] - } - return flag -} - -const noMatch = -1 - -// MatchRune returns true if the instruction matches (and consumes) r. -// It should only be called when i.Op == InstRune. -func (i *Inst) MatchRune(r rune) bool { - return i.MatchRunePos(r) != noMatch -} - -// MatchRunePos checks whether the instruction matches (and consumes) r. -// If so, MatchRunePos returns the index of the matching rune pair -// (or, when len(i.Rune) == 1, rune singleton). -// If not, MatchRunePos returns -1. -// MatchRunePos should only be called when i.Op == InstRune. -func (i *Inst) MatchRunePos(r rune) int { - rune := i.Rune - - // Special case: single-rune slice is from literal string, not char class. - if len(rune) == 1 { - r0 := rune[0] - if r == r0 { - return 0 - } - if Flags(i.Arg)&FoldCase != 0 { - for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) { - if r == r1 { - return 0 - } - } - } - return noMatch - } - - // Peek at the first few pairs. - // Should handle ASCII well. - for j := 0; j < len(rune) && j <= 8; j += 2 { - if r < rune[j] { - return noMatch - } - if r <= rune[j+1] { - return j / 2 - } - } - - // Otherwise binary search. - lo := 0 - hi := len(rune) / 2 - for lo < hi { - m := lo + (hi-lo)/2 - if c := rune[2*m]; c <= r { - if r <= rune[2*m+1] { - return m - } - lo = m + 1 - } else { - hi = m - } - } - return noMatch -} - -// As per re2's Prog::IsWordChar. Determines whether rune is an ASCII word char. -// Since we act on runes, it would be easy to support Unicode here. -func wordRune(r rune) bool { - return r == '_' || - ('A' <= r && r <= 'Z') || - ('a' <= r && r <= 'z') || - ('0' <= r && r <= '9') -} - -// MatchEmptyWidth returns true if the instruction matches -// an empty string between the runes before and after. -// It should only be called when i.Op == InstEmptyWidth. -func (i *Inst) MatchEmptyWidth(before rune, after rune) bool { - switch EmptyOp(i.Arg) { - case EmptyBeginLine: - return before == '\n' || before == -1 - case EmptyEndLine: - return after == '\n' || after == -1 - case EmptyBeginText: - return before == -1 - case EmptyEndText: - return after == -1 - case EmptyWordBoundary: - return wordRune(before) != wordRune(after) - case EmptyNoWordBoundary: - return wordRune(before) == wordRune(after) - } - panic("unknown empty width arg") -} - -func (i *Inst) String() string { - var b bytes.Buffer - dumpInst(&b, i) - return b.String() -} - -func bw(b *bytes.Buffer, args ...string) { - for _, s := range args { - b.WriteString(s) - } -} - -func dumpProg(b *bytes.Buffer, p *Prog) { - for j := range p.Inst { - i := &p.Inst[j] - pc := strconv.Itoa(j) - if len(pc) < 3 { - b.WriteString(" "[len(pc):]) - } - if j == p.Start { - pc += "*" - } - bw(b, pc, "\t") - dumpInst(b, i) - bw(b, "\n") - } -} - -func u32(i uint32) string { - return strconv.FormatUint(uint64(i), 10) -} - -func dumpInst(b *bytes.Buffer, i *Inst) { - switch i.Op { - case InstAlt: - bw(b, "alt -> ", u32(i.Out), ", ", u32(i.Arg)) - case InstAltMatch: - bw(b, "altmatch -> ", u32(i.Out), ", ", u32(i.Arg)) - case InstCapture: - bw(b, "cap ", u32(i.Arg), " -> ", u32(i.Out)) - case InstEmptyWidth: - bw(b, "empty ", u32(i.Arg), " -> ", u32(i.Out)) - case InstMatch: - bw(b, "match") - case InstFail: - bw(b, "fail") - case InstNop: - bw(b, "nop -> ", u32(i.Out)) - case InstRune: - if i.Rune == nil { - // shouldn't happen - bw(b, "rune <nil>") - } - bw(b, "rune ", strconv.QuoteToASCII(string(i.Rune))) - if Flags(i.Arg)&FoldCase != 0 { - bw(b, "/i") - } - bw(b, " -> ", u32(i.Out)) - case InstRune1: - bw(b, "rune1 ", strconv.QuoteToASCII(string(i.Rune)), " -> ", u32(i.Out)) - case InstRuneAny: - bw(b, "any -> ", u32(i.Out)) - case InstRuneAnyNotNL: - bw(b, "anynotnl -> ", u32(i.Out)) - } -} diff --git a/src/pkg/regexp/syntax/prog_test.go b/src/pkg/regexp/syntax/prog_test.go deleted file mode 100644 index 50bfa3d4b..000000000 --- a/src/pkg/regexp/syntax/prog_test.go +++ /dev/null @@ -1,114 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package syntax - -import "testing" - -var compileTests = []struct { - Regexp string - Prog string -}{ - {"a", ` 0 fail - 1* rune1 "a" -> 2 - 2 match -`}, - {"[A-M][n-z]", ` 0 fail - 1* rune "AM" -> 2 - 2 rune "nz" -> 3 - 3 match -`}, - {"", ` 0 fail - 1* nop -> 2 - 2 match -`}, - {"a?", ` 0 fail - 1 rune1 "a" -> 3 - 2* alt -> 1, 3 - 3 match -`}, - {"a??", ` 0 fail - 1 rune1 "a" -> 3 - 2* alt -> 3, 1 - 3 match -`}, - {"a+", ` 0 fail - 1* rune1 "a" -> 2 - 2 alt -> 1, 3 - 3 match -`}, - {"a+?", ` 0 fail - 1* rune1 "a" -> 2 - 2 alt -> 3, 1 - 3 match -`}, - {"a*", ` 0 fail - 1 rune1 "a" -> 2 - 2* alt -> 1, 3 - 3 match -`}, - {"a*?", ` 0 fail - 1 rune1 "a" -> 2 - 2* alt -> 3, 1 - 3 match -`}, - {"a+b+", ` 0 fail - 1* rune1 "a" -> 2 - 2 alt -> 1, 3 - 3 rune1 "b" -> 4 - 4 alt -> 3, 5 - 5 match -`}, - {"(a+)(b+)", ` 0 fail - 1* cap 2 -> 2 - 2 rune1 "a" -> 3 - 3 alt -> 2, 4 - 4 cap 3 -> 5 - 5 cap 4 -> 6 - 6 rune1 "b" -> 7 - 7 alt -> 6, 8 - 8 cap 5 -> 9 - 9 match -`}, - {"a+|b+", ` 0 fail - 1 rune1 "a" -> 2 - 2 alt -> 1, 6 - 3 rune1 "b" -> 4 - 4 alt -> 3, 6 - 5* alt -> 1, 3 - 6 match -`}, - {"A[Aa]", ` 0 fail - 1* rune1 "A" -> 2 - 2 rune "A"/i -> 3 - 3 match -`}, - {"(?:(?:^).)", ` 0 fail - 1* empty 4 -> 2 - 2 anynotnl -> 3 - 3 match -`}, -} - -func TestCompile(t *testing.T) { - for _, tt := range compileTests { - re, _ := Parse(tt.Regexp, Perl) - p, _ := Compile(re) - s := p.String() - if s != tt.Prog { - t.Errorf("compiled %#q:\n--- have\n%s---\n--- want\n%s---", tt.Regexp, s, tt.Prog) - } - } -} - -func BenchmarkEmptyOpContext(b *testing.B) { - for i := 0; i < b.N; i++ { - var r1 rune = -1 - for _, r2 := range "foo, bar, baz\nsome input text.\n" { - EmptyOpContext(r1, r2) - r1 = r2 - } - EmptyOpContext(r1, -1) - } -} diff --git a/src/pkg/regexp/syntax/regexp.go b/src/pkg/regexp/syntax/regexp.go deleted file mode 100644 index 329a90e01..000000000 --- a/src/pkg/regexp/syntax/regexp.go +++ /dev/null @@ -1,319 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package syntax - -// Note to implementers: -// In this package, re is always a *Regexp and r is always a rune. - -import ( - "bytes" - "strconv" - "strings" - "unicode" -) - -// A Regexp is a node in a regular expression syntax tree. -type Regexp struct { - Op Op // operator - Flags Flags - Sub []*Regexp // subexpressions, if any - Sub0 [1]*Regexp // storage for short Sub - Rune []rune // matched runes, for OpLiteral, OpCharClass - Rune0 [2]rune // storage for short Rune - Min, Max int // min, max for OpRepeat - Cap int // capturing index, for OpCapture - Name string // capturing name, for OpCapture -} - -// An Op is a single regular expression operator. -type Op uint8 - -// Operators are listed in precedence order, tightest binding to weakest. -// Character class operators are listed simplest to most complex -// (OpLiteral, OpCharClass, OpAnyCharNotNL, OpAnyChar). - -const ( - OpNoMatch Op = 1 + iota // matches no strings - OpEmptyMatch // matches empty string - OpLiteral // matches Runes sequence - OpCharClass // matches Runes interpreted as range pair list - OpAnyCharNotNL // matches any character - OpAnyChar // matches any character - OpBeginLine // matches empty string at beginning of line - OpEndLine // matches empty string at end of line - OpBeginText // matches empty string at beginning of text - OpEndText // matches empty string at end of text - OpWordBoundary // matches word boundary `\b` - OpNoWordBoundary // matches word non-boundary `\B` - OpCapture // capturing subexpression with index Cap, optional name Name - OpStar // matches Sub[0] zero or more times - OpPlus // matches Sub[0] one or more times - OpQuest // matches Sub[0] zero or one times - OpRepeat // matches Sub[0] at least Min times, at most Max (Max == -1 is no limit) - OpConcat // matches concatenation of Subs - OpAlternate // matches alternation of Subs -) - -const opPseudo Op = 128 // where pseudo-ops start - -// Equal returns true if x and y have identical structure. -func (x *Regexp) Equal(y *Regexp) bool { - if x == nil || y == nil { - return x == y - } - if x.Op != y.Op { - return false - } - switch x.Op { - case OpEndText: - // The parse flags remember whether this is \z or \Z. - if x.Flags&WasDollar != y.Flags&WasDollar { - return false - } - - case OpLiteral, OpCharClass: - if len(x.Rune) != len(y.Rune) { - return false - } - for i, r := range x.Rune { - if r != y.Rune[i] { - return false - } - } - - case OpAlternate, OpConcat: - if len(x.Sub) != len(y.Sub) { - return false - } - for i, sub := range x.Sub { - if !sub.Equal(y.Sub[i]) { - return false - } - } - - case OpStar, OpPlus, OpQuest: - if x.Flags&NonGreedy != y.Flags&NonGreedy || !x.Sub[0].Equal(y.Sub[0]) { - return false - } - - case OpRepeat: - if x.Flags&NonGreedy != y.Flags&NonGreedy || x.Min != y.Min || x.Max != y.Max || !x.Sub[0].Equal(y.Sub[0]) { - return false - } - - case OpCapture: - if x.Cap != y.Cap || x.Name != y.Name || !x.Sub[0].Equal(y.Sub[0]) { - return false - } - } - return true -} - -// writeRegexp writes the Perl syntax for the regular expression re to b. -func writeRegexp(b *bytes.Buffer, re *Regexp) { - switch re.Op { - default: - b.WriteString("<invalid op" + strconv.Itoa(int(re.Op)) + ">") - case OpNoMatch: - b.WriteString(`[^\x00-\x{10FFFF}]`) - case OpEmptyMatch: - b.WriteString(`(?:)`) - case OpLiteral: - if re.Flags&FoldCase != 0 { - b.WriteString(`(?i:`) - } - for _, r := range re.Rune { - escape(b, r, false) - } - if re.Flags&FoldCase != 0 { - b.WriteString(`)`) - } - case OpCharClass: - if len(re.Rune)%2 != 0 { - b.WriteString(`[invalid char class]`) - break - } - b.WriteRune('[') - if len(re.Rune) == 0 { - b.WriteString(`^\x00-\x{10FFFF}`) - } else if re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune { - // Contains 0 and MaxRune. Probably a negated class. - // Print the gaps. - b.WriteRune('^') - for i := 1; i < len(re.Rune)-1; i += 2 { - lo, hi := re.Rune[i]+1, re.Rune[i+1]-1 - escape(b, lo, lo == '-') - if lo != hi { - b.WriteRune('-') - escape(b, hi, hi == '-') - } - } - } else { - for i := 0; i < len(re.Rune); i += 2 { - lo, hi := re.Rune[i], re.Rune[i+1] - escape(b, lo, lo == '-') - if lo != hi { - b.WriteRune('-') - escape(b, hi, hi == '-') - } - } - } - b.WriteRune(']') - case OpAnyCharNotNL: - b.WriteString(`(?-s:.)`) - case OpAnyChar: - b.WriteString(`(?s:.)`) - case OpBeginLine: - b.WriteRune('^') - case OpEndLine: - b.WriteRune('$') - case OpBeginText: - b.WriteString(`\A`) - case OpEndText: - if re.Flags&WasDollar != 0 { - b.WriteString(`(?-m:$)`) - } else { - b.WriteString(`\z`) - } - case OpWordBoundary: - b.WriteString(`\b`) - case OpNoWordBoundary: - b.WriteString(`\B`) - case OpCapture: - if re.Name != "" { - b.WriteString(`(?P<`) - b.WriteString(re.Name) - b.WriteRune('>') - } else { - b.WriteRune('(') - } - if re.Sub[0].Op != OpEmptyMatch { - writeRegexp(b, re.Sub[0]) - } - b.WriteRune(')') - case OpStar, OpPlus, OpQuest, OpRepeat: - if sub := re.Sub[0]; sub.Op > OpCapture || sub.Op == OpLiteral && len(sub.Rune) > 1 { - b.WriteString(`(?:`) - writeRegexp(b, sub) - b.WriteString(`)`) - } else { - writeRegexp(b, sub) - } - switch re.Op { - case OpStar: - b.WriteRune('*') - case OpPlus: - b.WriteRune('+') - case OpQuest: - b.WriteRune('?') - case OpRepeat: - b.WriteRune('{') - b.WriteString(strconv.Itoa(re.Min)) - if re.Max != re.Min { - b.WriteRune(',') - if re.Max >= 0 { - b.WriteString(strconv.Itoa(re.Max)) - } - } - b.WriteRune('}') - } - if re.Flags&NonGreedy != 0 { - b.WriteRune('?') - } - case OpConcat: - for _, sub := range re.Sub { - if sub.Op == OpAlternate { - b.WriteString(`(?:`) - writeRegexp(b, sub) - b.WriteString(`)`) - } else { - writeRegexp(b, sub) - } - } - case OpAlternate: - for i, sub := range re.Sub { - if i > 0 { - b.WriteRune('|') - } - writeRegexp(b, sub) - } - } -} - -func (re *Regexp) String() string { - var b bytes.Buffer - writeRegexp(&b, re) - return b.String() -} - -const meta = `\.+*?()|[]{}^$` - -func escape(b *bytes.Buffer, r rune, force bool) { - if unicode.IsPrint(r) { - if strings.IndexRune(meta, r) >= 0 || force { - b.WriteRune('\\') - } - b.WriteRune(r) - return - } - - switch r { - case '\a': - b.WriteString(`\a`) - case '\f': - b.WriteString(`\f`) - case '\n': - b.WriteString(`\n`) - case '\r': - b.WriteString(`\r`) - case '\t': - b.WriteString(`\t`) - case '\v': - b.WriteString(`\v`) - default: - if r < 0x100 { - b.WriteString(`\x`) - s := strconv.FormatInt(int64(r), 16) - if len(s) == 1 { - b.WriteRune('0') - } - b.WriteString(s) - break - } - b.WriteString(`\x{`) - b.WriteString(strconv.FormatInt(int64(r), 16)) - b.WriteString(`}`) - } -} - -// MaxCap walks the regexp to find the maximum capture index. -func (re *Regexp) MaxCap() int { - m := 0 - if re.Op == OpCapture { - m = re.Cap - } - for _, sub := range re.Sub { - if n := sub.MaxCap(); m < n { - m = n - } - } - return m -} - -// CapNames walks the regexp to find the names of capturing groups. -func (re *Regexp) CapNames() []string { - names := make([]string, re.MaxCap()+1) - re.capNames(names) - return names -} - -func (re *Regexp) capNames(names []string) { - if re.Op == OpCapture { - names[re.Cap] = re.Name - } - for _, sub := range re.Sub { - sub.capNames(names) - } -} diff --git a/src/pkg/regexp/syntax/simplify.go b/src/pkg/regexp/syntax/simplify.go deleted file mode 100644 index 72390417b..000000000 --- a/src/pkg/regexp/syntax/simplify.go +++ /dev/null @@ -1,151 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package syntax - -// Simplify returns a regexp equivalent to re but without counted repetitions -// and with various other simplifications, such as rewriting /(?:a+)+/ to /a+/. -// The resulting regexp will execute correctly but its string representation -// will not produce the same parse tree, because capturing parentheses -// may have been duplicated or removed. For example, the simplified form -// for /(x){1,2}/ is /(x)(x)?/ but both parentheses capture as $1. -// The returned regexp may share structure with or be the original. -func (re *Regexp) Simplify() *Regexp { - if re == nil { - return nil - } - switch re.Op { - case OpCapture, OpConcat, OpAlternate: - // Simplify children, building new Regexp if children change. - nre := re - for i, sub := range re.Sub { - nsub := sub.Simplify() - if nre == re && nsub != sub { - // Start a copy. - nre = new(Regexp) - *nre = *re - nre.Rune = nil - nre.Sub = append(nre.Sub0[:0], re.Sub[:i]...) - } - if nre != re { - nre.Sub = append(nre.Sub, nsub) - } - } - return nre - - case OpStar, OpPlus, OpQuest: - sub := re.Sub[0].Simplify() - return simplify1(re.Op, re.Flags, sub, re) - - case OpRepeat: - // Special special case: x{0} matches the empty string - // and doesn't even need to consider x. - if re.Min == 0 && re.Max == 0 { - return &Regexp{Op: OpEmptyMatch} - } - - // The fun begins. - sub := re.Sub[0].Simplify() - - // x{n,} means at least n matches of x. - if re.Max == -1 { - // Special case: x{0,} is x*. - if re.Min == 0 { - return simplify1(OpStar, re.Flags, sub, nil) - } - - // Special case: x{1,} is x+. - if re.Min == 1 { - return simplify1(OpPlus, re.Flags, sub, nil) - } - - // General case: x{4,} is xxxx+. - nre := &Regexp{Op: OpConcat} - nre.Sub = nre.Sub0[:0] - for i := 0; i < re.Min-1; i++ { - nre.Sub = append(nre.Sub, sub) - } - nre.Sub = append(nre.Sub, simplify1(OpPlus, re.Flags, sub, nil)) - return nre - } - - // Special case x{0} handled above. - - // Special case: x{1} is just x. - if re.Min == 1 && re.Max == 1 { - return sub - } - - // General case: x{n,m} means n copies of x and m copies of x? - // The machine will do less work if we nest the final m copies, - // so that x{2,5} = xx(x(x(x)?)?)? - - // Build leading prefix: xx. - var prefix *Regexp - if re.Min > 0 { - prefix = &Regexp{Op: OpConcat} - prefix.Sub = prefix.Sub0[:0] - for i := 0; i < re.Min; i++ { - prefix.Sub = append(prefix.Sub, sub) - } - } - - // Build and attach suffix: (x(x(x)?)?)? - if re.Max > re.Min { - suffix := simplify1(OpQuest, re.Flags, sub, nil) - for i := re.Min + 1; i < re.Max; i++ { - nre2 := &Regexp{Op: OpConcat} - nre2.Sub = append(nre2.Sub0[:0], sub, suffix) - suffix = simplify1(OpQuest, re.Flags, nre2, nil) - } - if prefix == nil { - return suffix - } - prefix.Sub = append(prefix.Sub, suffix) - } - if prefix != nil { - return prefix - } - - // Some degenerate case like min > max or min < max < 0. - // Handle as impossible match. - return &Regexp{Op: OpNoMatch} - } - - return re -} - -// simplify1 implements Simplify for the unary OpStar, -// OpPlus, and OpQuest operators. It returns the simple regexp -// equivalent to -// -// Regexp{Op: op, Flags: flags, Sub: {sub}} -// -// under the assumption that sub is already simple, and -// without first allocating that structure. If the regexp -// to be returned turns out to be equivalent to re, simplify1 -// returns re instead. -// -// simplify1 is factored out of Simplify because the implementation -// for other operators generates these unary expressions. -// Letting them call simplify1 makes sure the expressions they -// generate are simple. -func simplify1(op Op, flags Flags, sub, re *Regexp) *Regexp { - // Special case: repeat the empty string as much as - // you want, but it's still the empty string. - if sub.Op == OpEmptyMatch { - return sub - } - // The operators are idempotent if the flags match. - if op == sub.Op && flags&NonGreedy == sub.Flags&NonGreedy { - return sub - } - if re != nil && re.Op == op && re.Flags&NonGreedy == flags&NonGreedy && sub == re.Sub[0] { - return re - } - - re = &Regexp{Op: op, Flags: flags} - re.Sub = append(re.Sub0[:0], sub) - return re -} diff --git a/src/pkg/regexp/syntax/simplify_test.go b/src/pkg/regexp/syntax/simplify_test.go deleted file mode 100644 index 879eff5be..000000000 --- a/src/pkg/regexp/syntax/simplify_test.go +++ /dev/null @@ -1,151 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package syntax - -import "testing" - -var simplifyTests = []struct { - Regexp string - Simple string -}{ - // Already-simple constructs - {`a`, `a`}, - {`ab`, `ab`}, - {`a|b`, `[a-b]`}, - {`ab|cd`, `ab|cd`}, - {`(ab)*`, `(ab)*`}, - {`(ab)+`, `(ab)+`}, - {`(ab)?`, `(ab)?`}, - {`.`, `(?s:.)`}, - {`^`, `^`}, - {`$`, `$`}, - {`[ac]`, `[ac]`}, - {`[^ac]`, `[^ac]`}, - - // Posix character classes - {`[[:alnum:]]`, `[0-9A-Za-z]`}, - {`[[:alpha:]]`, `[A-Za-z]`}, - {`[[:blank:]]`, `[\t ]`}, - {`[[:cntrl:]]`, `[\x00-\x1f\x7f]`}, - {`[[:digit:]]`, `[0-9]`}, - {`[[:graph:]]`, `[!-~]`}, - {`[[:lower:]]`, `[a-z]`}, - {`[[:print:]]`, `[ -~]`}, - {`[[:punct:]]`, "[!-/:-@\\[-`\\{-~]"}, - {`[[:space:]]`, `[\t-\r ]`}, - {`[[:upper:]]`, `[A-Z]`}, - {`[[:xdigit:]]`, `[0-9A-Fa-f]`}, - - // Perl character classes - {`\d`, `[0-9]`}, - {`\s`, `[\t-\n\f-\r ]`}, - {`\w`, `[0-9A-Z_a-z]`}, - {`\D`, `[^0-9]`}, - {`\S`, `[^\t-\n\f-\r ]`}, - {`\W`, `[^0-9A-Z_a-z]`}, - {`[\d]`, `[0-9]`}, - {`[\s]`, `[\t-\n\f-\r ]`}, - {`[\w]`, `[0-9A-Z_a-z]`}, - {`[\D]`, `[^0-9]`}, - {`[\S]`, `[^\t-\n\f-\r ]`}, - {`[\W]`, `[^0-9A-Z_a-z]`}, - - // Posix repetitions - {`a{1}`, `a`}, - {`a{2}`, `aa`}, - {`a{5}`, `aaaaa`}, - {`a{0,1}`, `a?`}, - // The next three are illegible because Simplify inserts (?:) - // parens instead of () parens to avoid creating extra - // captured subexpressions. The comments show a version with fewer parens. - {`(a){0,2}`, `(?:(a)(a)?)?`}, // (aa?)? - {`(a){0,4}`, `(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // (a(a(aa?)?)?)? - {`(a){2,6}`, `(a)(a)(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // aa(a(a(aa?)?)?)? - {`a{0,2}`, `(?:aa?)?`}, // (aa?)? - {`a{0,4}`, `(?:a(?:a(?:aa?)?)?)?`}, // (a(a(aa?)?)?)? - {`a{2,6}`, `aa(?:a(?:a(?:aa?)?)?)?`}, // aa(a(a(aa?)?)?)? - {`a{0,}`, `a*`}, - {`a{1,}`, `a+`}, - {`a{2,}`, `aa+`}, - {`a{5,}`, `aaaaa+`}, - - // Test that operators simplify their arguments. - {`(?:a{1,}){1,}`, `a+`}, - {`(a{1,}b{1,})`, `(a+b+)`}, - {`a{1,}|b{1,}`, `a+|b+`}, - {`(?:a{1,})*`, `(?:a+)*`}, - {`(?:a{1,})+`, `a+`}, - {`(?:a{1,})?`, `(?:a+)?`}, - {``, `(?:)`}, - {`a{0}`, `(?:)`}, - - // Character class simplification - {`[ab]`, `[a-b]`}, - {`[a-za-za-z]`, `[a-z]`}, - {`[A-Za-zA-Za-z]`, `[A-Za-z]`}, - {`[ABCDEFGH]`, `[A-H]`}, - {`[AB-CD-EF-GH]`, `[A-H]`}, - {`[W-ZP-XE-R]`, `[E-Z]`}, - {`[a-ee-gg-m]`, `[a-m]`}, - {`[a-ea-ha-m]`, `[a-m]`}, - {`[a-ma-ha-e]`, `[a-m]`}, - {`[a-zA-Z0-9 -~]`, `[ -~]`}, - - // Empty character classes - {`[^[:cntrl:][:^cntrl:]]`, `[^\x00-\x{10FFFF}]`}, - - // Full character classes - {`[[:cntrl:][:^cntrl:]]`, `(?s:.)`}, - - // Unicode case folding. - {`(?i)A`, `(?i:A)`}, - {`(?i)a`, `(?i:A)`}, - {`(?i)[A]`, `(?i:A)`}, - {`(?i)[a]`, `(?i:A)`}, - {`(?i)K`, `(?i:K)`}, - {`(?i)k`, `(?i:K)`}, - {`(?i)\x{212a}`, "(?i:K)"}, - {`(?i)[K]`, "[Kk\u212A]"}, - {`(?i)[k]`, "[Kk\u212A]"}, - {`(?i)[\x{212a}]`, "[Kk\u212A]"}, - {`(?i)[a-z]`, "[A-Za-z\u017F\u212A]"}, - {`(?i)[\x00-\x{FFFD}]`, "[\\x00-\uFFFD]"}, - {`(?i)[\x00-\x{10FFFF}]`, `(?s:.)`}, - - // Empty string as a regular expression. - // The empty string must be preserved inside parens in order - // to make submatches work right, so these tests are less - // interesting than they might otherwise be. String inserts - // explicit (?:) in place of non-parenthesized empty strings, - // to make them easier to spot for other parsers. - {`(a|b|)`, `([a-b]|(?:))`}, - {`(|)`, `()`}, - {`a()`, `a()`}, - {`(()|())`, `(()|())`}, - {`(a|)`, `(a|(?:))`}, - {`ab()cd()`, `ab()cd()`}, - {`()`, `()`}, - {`()*`, `()*`}, - {`()+`, `()+`}, - {`()?`, `()?`}, - {`(){0}`, `(?:)`}, - {`(){1}`, `()`}, - {`(){1,}`, `()+`}, - {`(){0,2}`, `(?:()()?)?`}, -} - -func TestSimplify(t *testing.T) { - for _, tt := range simplifyTests { - re, err := Parse(tt.Regexp, MatchNL|Perl&^OneLine) - if err != nil { - t.Errorf("Parse(%#q) = error %v", tt.Regexp, err) - continue - } - s := re.Simplify().String() - if s != tt.Simple { - t.Errorf("Simplify(%#q) = %#q, want %#q", tt.Regexp, s, tt.Simple) - } - } -} diff --git a/src/pkg/regexp/testdata/README b/src/pkg/regexp/testdata/README deleted file mode 100644 index b1b301be8..000000000 --- a/src/pkg/regexp/testdata/README +++ /dev/null @@ -1,23 +0,0 @@ -AT&T POSIX Test Files -See textregex.c for copyright + license. - -testregex.c http://www2.research.att.com/~gsf/testregex/testregex.c -basic.dat http://www2.research.att.com/~gsf/testregex/basic.dat -nullsubexpr.dat http://www2.research.att.com/~gsf/testregex/nullsubexpr.dat -repetition.dat http://www2.research.att.com/~gsf/testregex/repetition.dat - -The test data has been edited to reflect RE2/Go differences: - * In a star of a possibly empty match like (a*)* matching x, - the no match case runs the starred subexpression zero times, - not once. This is consistent with (a*)* matching a, which - runs the starred subexpression one time, not twice. - * The submatch choice is first match, not the POSIX rule. - -Such changes are marked with 'RE2/Go'. - - -RE2 Test Files - -re2-exhaustive.txt.bz2 and re2-search.txt are built by running -'make log' in the RE2 distribution. http://code.google.com/p/re2/. -The exhaustive file is compressed because it is huge. diff --git a/src/pkg/regexp/testdata/basic.dat b/src/pkg/regexp/testdata/basic.dat deleted file mode 100644 index 7859290ba..000000000 --- a/src/pkg/regexp/testdata/basic.dat +++ /dev/null @@ -1,221 +0,0 @@ -NOTE all standard compliant implementations should pass these : 2002-05-31 - -BE abracadabra$ abracadabracadabra (7,18) -BE a...b abababbb (2,7) -BE XXXXXX ..XXXXXX (2,8) -E \) () (1,2) -BE a] a]a (0,2) -B } } (0,1) -E \} } (0,1) -BE \] ] (0,1) -B ] ] (0,1) -E ] ] (0,1) -B { { (0,1) -B } } (0,1) -BE ^a ax (0,1) -BE \^a a^a (1,3) -BE a\^ a^ (0,2) -BE a$ aa (1,2) -BE a\$ a$ (0,2) -BE ^$ NULL (0,0) -E $^ NULL (0,0) -E a($) aa (1,2)(2,2) -E a*(^a) aa (0,1)(0,1) -E (..)*(...)* a (0,0) -E (..)*(...)* abcd (0,4)(2,4) -E (ab|a)(bc|c) abc (0,3)(0,2)(2,3) -E (ab)c|abc abc (0,3)(0,2) -E a{0}b ab (1,2) -E (a*)(b?)(b+)b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7) -E (a*)(b{0,1})(b{1,})b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7) -E a{9876543210} NULL BADBR -E ((a|a)|a) a (0,1)(0,1)(0,1) -E (a*)(a|aa) aaaa (0,4)(0,3)(3,4) -E a*(a.|aa) aaaa (0,4)(2,4) -E a(b)|c(d)|a(e)f aef (0,3)(?,?)(?,?)(1,2) -E (a|b)?.* b (0,1)(0,1) -E (a|b)c|a(b|c) ac (0,2)(0,1) -E (a|b)c|a(b|c) ab (0,2)(?,?)(1,2) -E (a|b)*c|(a|ab)*c abc (0,3)(1,2) -E (a|b)*c|(a|ab)*c xc (1,2) -E (.a|.b).*|.*(.a|.b) xa (0,2)(0,2) -E a?(ab|ba)ab abab (0,4)(0,2) -E a?(ac{0}b|ba)ab abab (0,4)(0,2) -E ab|abab abbabab (0,2) -E aba|bab|bba baaabbbaba (5,8) -E aba|bab baaabbbaba (6,9) -E (aa|aaa)*|(a|aaaaa) aa (0,2)(0,2) -E (a.|.a.)*|(a|.a...) aa (0,2)(0,2) -E ab|a xabc (1,3) -E ab|a xxabc (2,4) -Ei (Ab|cD)* aBcD (0,4)(2,4) -BE [^-] --a (2,3) -BE [a-]* --a (0,3) -BE [a-m-]* --amoma-- (0,4) -E :::1:::0:|:::1:1:0: :::0:::1:::1:::0: (8,17) -E :::1:::0:|:::1:1:1: :::0:::1:::1:::0: (8,17) -{E [[:upper:]] A (0,1) [[<element>]] not supported -E [[:lower:]]+ `az{ (1,3) -E [[:upper:]]+ @AZ[ (1,3) -# No collation in Go -#BE [[-]] [[-]] (2,4) -#BE [[.NIL.]] NULL ECOLLATE -#BE [[=aleph=]] NULL ECOLLATE -} -BE$ \n \n (0,1) -BEn$ \n \n (0,1) -BE$ [^a] \n (0,1) -BE$ \na \na (0,2) -E (a)(b)(c) abc (0,3)(0,1)(1,2)(2,3) -BE xxx xxx (0,3) -E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 6, (0,6) -E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) 2/7 (0,3) -E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 1,Feb 6 (5,11) -E3 ((((((((((((((((((((((((((((((x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1) -E3 ((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2) -E a?(ab|ba)* ababababababababababababababababababababababababababababababababababababababababa (0,81)(79,81) -E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabbbbaa (18,25) -E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabaa (18,22) -E aaac|aabc|abac|abbc|baac|babc|bbac|bbbc baaabbbabac (7,11) -BE$ .* \x01\xff (0,2) -E aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa (53,57) -L aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa NOMATCH -E a*a*a*a*a*b aaaaaaaaab (0,10) -BE ^ NULL (0,0) -BE $ NULL (0,0) -BE ^$ NULL (0,0) -BE ^a$ a (0,1) -BE abc abc (0,3) -BE abc xabcy (1,4) -BE abc ababc (2,5) -BE ab*c abc (0,3) -BE ab*bc abc (0,3) -BE ab*bc abbc (0,4) -BE ab*bc abbbbc (0,6) -E ab+bc abbc (0,4) -E ab+bc abbbbc (0,6) -E ab?bc abbc (0,4) -E ab?bc abc (0,3) -E ab?c abc (0,3) -BE ^abc$ abc (0,3) -BE ^abc abcc (0,3) -BE abc$ aabc (1,4) -BE ^ abc (0,0) -BE $ abc (3,3) -BE a.c abc (0,3) -BE a.c axc (0,3) -BE a.*c axyzc (0,5) -BE a[bc]d abd (0,3) -BE a[b-d]e ace (0,3) -BE a[b-d] aac (1,3) -BE a[-b] a- (0,2) -BE a[b-] a- (0,2) -BE a] a] (0,2) -BE a[]]b a]b (0,3) -BE a[^bc]d aed (0,3) -BE a[^-b]c adc (0,3) -BE a[^]b]c adc (0,3) -E ab|cd abc (0,2) -E ab|cd abcd (0,2) -E a\(b a(b (0,3) -E a\(*b ab (0,2) -E a\(*b a((b (0,4) -E ((a)) abc (0,1)(0,1)(0,1) -E (a)b(c) abc (0,3)(0,1)(2,3) -E a+b+c aabbabc (4,7) -E a* aaa (0,3) -#E (a*)* - (0,0)(0,0) -E (a*)* - (0,0)(?,?) RE2/Go -E (a*)+ - (0,0)(0,0) -#E (a*|b)* - (0,0)(0,0) -E (a*|b)* - (0,0)(?,?) RE2/Go -E (a+|b)* ab (0,2)(1,2) -E (a+|b)+ ab (0,2)(1,2) -E (a+|b)? ab (0,1)(0,1) -BE [^ab]* cde (0,3) -#E (^)* - (0,0)(0,0) -E (^)* - (0,0)(?,?) RE2/Go -BE a* NULL (0,0) -E ([abc])*d abbbcd (0,6)(4,5) -E ([abc])*bcd abcd (0,4)(0,1) -E a|b|c|d|e e (0,1) -E (a|b|c|d|e)f ef (0,2)(0,1) -#E ((a*|b))* - (0,0)(0,0)(0,0) -E ((a*|b))* - (0,0)(?,?)(?,?) RE2/Go -BE abcd*efg abcdefg (0,7) -BE ab* xabyabbbz (1,3) -BE ab* xayabbbz (1,2) -E (ab|cd)e abcde (2,5)(2,4) -BE [abhgefdc]ij hij (0,3) -E (a|b)c*d abcd (1,4)(1,2) -E (ab|ab*)bc abc (0,3)(0,1) -E a([bc]*)c* abc (0,3)(1,3) -E a([bc]*)(c*d) abcd (0,4)(1,3)(3,4) -E a([bc]+)(c*d) abcd (0,4)(1,3)(3,4) -E a([bc]*)(c+d) abcd (0,4)(1,2)(2,4) -E a[bcd]*dcdcde adcdcde (0,7) -E (ab|a)b*c abc (0,3)(0,2) -E ((a)(b)c)(d) abcd (0,4)(0,3)(0,1)(1,2)(3,4) -BE [A-Za-z_][A-Za-z0-9_]* alpha (0,5) -E ^a(bc+|b[eh])g|.h$ abh (1,3) -E (bc+d$|ef*g.|h?i(j|k)) effgz (0,5)(0,5) -E (bc+d$|ef*g.|h?i(j|k)) ij (0,2)(0,2)(1,2) -E (bc+d$|ef*g.|h?i(j|k)) reffgz (1,6)(1,6) -E (((((((((a))))))))) a (0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1) -BE multiple words multiple words yeah (0,14) -E (.*)c(.*) abcde (0,5)(0,2)(3,5) -BE abcd abcd (0,4) -E a(bc)d abcd (0,4)(1,3) -E a[-]?c ac (0,3) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qaddafi (0,15)(?,?)(10,12) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mo'ammar Gadhafi (0,16)(?,?)(11,13) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Kaddafi (0,15)(?,?)(10,12) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qadhafi (0,15)(?,?)(10,12) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gadafi (0,14)(?,?)(10,11) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadafi (0,15)(?,?)(11,12) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moamar Gaddafi (0,14)(?,?)(9,11) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadhdhafi (0,18)(?,?)(13,15) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Khaddafi (0,16)(?,?)(11,13) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafy (0,16)(?,?)(11,13) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghadafi (0,15)(?,?)(11,12) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafi (0,16)(?,?)(11,13) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muamar Kaddafi (0,14)(?,?)(9,11) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Quathafi (0,16)(?,?)(11,13) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gheddafi (0,16)(?,?)(11,13) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Khadafy (0,15)(?,?)(11,12) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Qudhafi (0,15)(?,?)(10,12) -E a+(b|c)*d+ aabcdd (0,6)(3,4) -E ^.+$ vivi (0,4) -E ^(.+)$ vivi (0,4)(0,4) -E ^([^!.]+).att.com!(.+)$ gryphon.att.com!eby (0,19)(0,7)(16,19) -E ^([^!]+!)?([^!]+)$ bas (0,3)(?,?)(0,3) -E ^([^!]+!)?([^!]+)$ bar!bas (0,7)(0,4)(4,7) -E ^([^!]+!)?([^!]+)$ foo!bas (0,7)(0,4)(4,7) -E ^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(4,8)(8,11) -E ((foo)|(bar))!bas bar!bas (0,7)(0,3)(?,?)(0,3) -E ((foo)|(bar))!bas foo!bar!bas (4,11)(4,7)(?,?)(4,7) -E ((foo)|(bar))!bas foo!bas (0,7)(0,3)(0,3) -E ((foo)|bar)!bas bar!bas (0,7)(0,3) -E ((foo)|bar)!bas foo!bar!bas (4,11)(4,7) -E ((foo)|bar)!bas foo!bas (0,7)(0,3)(0,3) -E (foo|(bar))!bas bar!bas (0,7)(0,3)(0,3) -E (foo|(bar))!bas foo!bar!bas (4,11)(4,7)(4,7) -E (foo|(bar))!bas foo!bas (0,7)(0,3) -E (foo|bar)!bas bar!bas (0,7)(0,3) -E (foo|bar)!bas foo!bar!bas (4,11)(4,7) -E (foo|bar)!bas foo!bas (0,7)(0,3) -E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11) -E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bas (0,3)(?,?)(0,3) -E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bar!bas (0,7)(0,4)(4,7) -E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(?,?)(?,?)(4,8)(8,11) -E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bas (0,7)(0,4)(4,7) -E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bas (0,3)(0,3)(?,?)(0,3) -E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bar!bas (0,7)(0,7)(0,4)(4,7) -E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11) -E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bas (0,7)(0,7)(0,4)(4,7) -E .*(/XXX).* /XXX (0,4)(0,4) -E .*(\\XXX).* \XXX (0,4)(0,4) -E \\XXX \XXX (0,4) -E .*(/000).* /000 (0,4)(0,4) -E .*(\\000).* \000 (0,4)(0,4) -E \\000 \000 (0,4) diff --git a/src/pkg/regexp/testdata/nullsubexpr.dat b/src/pkg/regexp/testdata/nullsubexpr.dat deleted file mode 100644 index 2e18fbb91..000000000 --- a/src/pkg/regexp/testdata/nullsubexpr.dat +++ /dev/null @@ -1,79 +0,0 @@ -NOTE null subexpression matches : 2002-06-06 - -E (a*)* a (0,1)(0,1) -#E SAME x (0,0)(0,0) -E SAME x (0,0)(?,?) RE2/Go -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaax (0,6)(0,6) -E (a*)+ a (0,1)(0,1) -E SAME x (0,0)(0,0) -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaax (0,6)(0,6) -E (a+)* a (0,1)(0,1) -E SAME x (0,0) -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaax (0,6)(0,6) -E (a+)+ a (0,1)(0,1) -E SAME x NOMATCH -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaax (0,6)(0,6) - -E ([a]*)* a (0,1)(0,1) -#E SAME x (0,0)(0,0) -E SAME x (0,0)(?,?) RE2/Go -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaax (0,6)(0,6) -E ([a]*)+ a (0,1)(0,1) -E SAME x (0,0)(0,0) -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaax (0,6)(0,6) -E ([^b]*)* a (0,1)(0,1) -#E SAME b (0,0)(0,0) -E SAME b (0,0)(?,?) RE2/Go -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaab (0,6)(0,6) -E ([ab]*)* a (0,1)(0,1) -E SAME aaaaaa (0,6)(0,6) -E SAME ababab (0,6)(0,6) -E SAME bababa (0,6)(0,6) -E SAME b (0,1)(0,1) -E SAME bbbbbb (0,6)(0,6) -E SAME aaaabcde (0,5)(0,5) -E ([^a]*)* b (0,1)(0,1) -E SAME bbbbbb (0,6)(0,6) -#E SAME aaaaaa (0,0)(0,0) -E SAME aaaaaa (0,0)(?,?) RE2/Go -E ([^ab]*)* ccccxx (0,6)(0,6) -#E SAME ababab (0,0)(0,0) -E SAME ababab (0,0)(?,?) RE2/Go - -E ((z)+|a)* zabcde (0,2)(1,2) - -#{E a+? aaaaaa (0,1) no *? +? mimimal match ops -#E (a) aaa (0,1)(0,1) -#E (a*?) aaa (0,0)(0,0) -#E (a)*? aaa (0,0) -#E (a*?)*? aaa (0,0) -#} - -B \(a*\)*\(x\) x (0,1)(0,0)(0,1) -B \(a*\)*\(x\) ax (0,2)(0,1)(1,2) -B \(a*\)*\(x\) axa (0,2)(0,1)(1,2) -B \(a*\)*\(x\)\(\1\) x (0,1)(0,0)(0,1)(1,1) -B \(a*\)*\(x\)\(\1\) ax (0,2)(1,1)(1,2)(2,2) -B \(a*\)*\(x\)\(\1\) axa (0,3)(0,1)(1,2)(2,3) -B \(a*\)*\(x\)\(\1\)\(x\) axax (0,4)(0,1)(1,2)(2,3)(3,4) -B \(a*\)*\(x\)\(\1\)\(x\) axxa (0,3)(1,1)(1,2)(2,2)(2,3) - -#E (a*)*(x) x (0,1)(0,0)(0,1) -E (a*)*(x) x (0,1)(?,?)(0,1) RE2/Go -E (a*)*(x) ax (0,2)(0,1)(1,2) -E (a*)*(x) axa (0,2)(0,1)(1,2) - -E (a*)+(x) x (0,1)(0,0)(0,1) -E (a*)+(x) ax (0,2)(0,1)(1,2) -E (a*)+(x) axa (0,2)(0,1)(1,2) - -E (a*){2}(x) x (0,1)(0,0)(0,1) -E (a*){2}(x) ax (0,2)(1,1)(1,2) -E (a*){2}(x) axa (0,2)(1,1)(1,2) diff --git a/src/pkg/regexp/testdata/re2-exhaustive.txt.bz2 b/src/pkg/regexp/testdata/re2-exhaustive.txt.bz2 Binary files differdeleted file mode 100644 index a357f2801..000000000 --- a/src/pkg/regexp/testdata/re2-exhaustive.txt.bz2 +++ /dev/null diff --git a/src/pkg/regexp/testdata/re2-search.txt b/src/pkg/regexp/testdata/re2-search.txt deleted file mode 100644 index f648e5527..000000000 --- a/src/pkg/regexp/testdata/re2-search.txt +++ /dev/null @@ -1,3667 +0,0 @@ -# RE2 basic search tests built by make log -# Thu Sep 8 13:43:43 EDT 2011 -Regexp.SearchTests -strings -"" -"a" -regexps -"a" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:a)$" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:a)" --;-;-;- -0-1;0-1;0-1;0-1 -"(?:a)$" --;-;-;- -0-1;0-1;0-1;0-1 -strings -"" -"zyzzyva" -regexps -"a" --;-;-;- --;6-7;-;6-7 -"^(?:a)$" --;-;-;- --;-;-;- -"^(?:a)" --;-;-;- --;-;-;- -"(?:a)$" --;-;-;- --;6-7;-;6-7 -strings -"" -"aa" -regexps -"a+" --;-;-;- -0-2;0-2;0-2;0-2 -"^(?:a+)$" --;-;-;- -0-2;0-2;0-2;0-2 -"^(?:a+)" --;-;-;- -0-2;0-2;0-2;0-2 -"(?:a+)$" --;-;-;- -0-2;0-2;0-2;0-2 -strings -"" -"ab" -regexps -"(a+|b)+" --;-;-;- -0-2 1-2;0-2 1-2;0-2 1-2;0-2 1-2 -"^(?:(a+|b)+)$" --;-;-;- -0-2 1-2;0-2 1-2;0-2 1-2;0-2 1-2 -"^(?:(a+|b)+)" --;-;-;- -0-2 1-2;0-2 1-2;0-2 1-2;0-2 1-2 -"(?:(a+|b)+)$" --;-;-;- -0-2 1-2;0-2 1-2;0-2 1-2;0-2 1-2 -strings -"" -"xabcdx" -regexps -"ab|cd" --;-;-;- --;1-3;-;1-3 -"^(?:ab|cd)$" --;-;-;- --;-;-;- -"^(?:ab|cd)" --;-;-;- --;-;-;- -"(?:ab|cd)$" --;-;-;- --;-;-;- -strings -"" -"hello\ngoodbye\n" -regexps -"h.*od?" --;-;-;- --;0-5;-;0-5 -"^(?:h.*od?)$" --;-;-;- --;-;-;- -"^(?:h.*od?)" --;-;-;- --;0-5;-;0-5 -"(?:h.*od?)$" --;-;-;- --;-;-;- -strings -"" -"hello\ngoodbye\n" -regexps -"h.*o" --;-;-;- --;0-5;-;0-5 -"^(?:h.*o)$" --;-;-;- --;-;-;- -"^(?:h.*o)" --;-;-;- --;0-5;-;0-5 -"(?:h.*o)$" --;-;-;- --;-;-;- -strings -"" -"goodbye\nhello\n" -regexps -"h.*o" --;-;-;- --;8-13;-;8-13 -"^(?:h.*o)$" --;-;-;- --;-;-;- -"^(?:h.*o)" --;-;-;- --;-;-;- -"(?:h.*o)$" --;-;-;- --;-;-;- -strings -"" -"hello world" -regexps -"h.*o" --;-;-;- --;0-8;-;0-8 -"^(?:h.*o)$" --;-;-;- --;-;-;- -"^(?:h.*o)" --;-;-;- --;0-8;-;0-8 -"(?:h.*o)$" --;-;-;- --;-;-;- -strings -"" -"othello, world" -regexps -"h.*o" --;-;-;- --;2-11;-;2-11 -"^(?:h.*o)$" --;-;-;- --;-;-;- -"^(?:h.*o)" --;-;-;- --;-;-;- -"(?:h.*o)$" --;-;-;- --;-;-;- -strings -"" -"aaaaaaa" -regexps -"[^\\s\\S]" --;-;-;- --;-;-;- -"^(?:[^\\s\\S])$" --;-;-;- --;-;-;- -"^(?:[^\\s\\S])" --;-;-;- --;-;-;- -"(?:[^\\s\\S])$" --;-;-;- --;-;-;- -strings -"" -"aaaaaaa" -regexps -"a" --;-;-;- --;0-1;-;0-1 -"^(?:a)$" --;-;-;- --;-;-;- -"^(?:a)" --;-;-;- --;0-1;-;0-1 -"(?:a)$" --;-;-;- --;6-7;-;6-7 -strings -"" -"aaaaaaa" -regexps -"a*" -0-0;0-0;0-0;0-0 -0-7;0-7;0-7;0-7 -"^(?:a*)$" -0-0;0-0;0-0;0-0 -0-7;0-7;0-7;0-7 -"^(?:a*)" -0-0;0-0;0-0;0-0 -0-7;0-7;0-7;0-7 -"(?:a*)$" -0-0;0-0;0-0;0-0 -0-7;0-7;0-7;0-7 -strings -"" -"" -regexps -"a*" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:a*)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:a*)" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"(?:a*)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -strings -"" -"" -regexps -"a*" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:a*)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:a*)" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"(?:a*)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -strings -"" -"xabcdx" -regexps -"ab|cd" --;-;-;- --;1-3;-;1-3 -"^(?:ab|cd)$" --;-;-;- --;-;-;- -"^(?:ab|cd)" --;-;-;- --;-;-;- -"(?:ab|cd)$" --;-;-;- --;-;-;- -strings -"" -"cab" -regexps -"a" --;-;-;- --;1-2;-;1-2 -"^(?:a)$" --;-;-;- --;-;-;- -"^(?:a)" --;-;-;- --;-;-;- -"(?:a)$" --;-;-;- --;-;-;- -strings -"" -"cab" -regexps -"a*b" --;-;-;- --;1-3;-;1-3 -"^(?:a*b)$" --;-;-;- --;-;-;- -"^(?:a*b)" --;-;-;- --;-;-;- -"(?:a*b)$" --;-;-;- --;1-3;-;1-3 -strings -"" -"x" -regexps -"((((((((((((((((((((x))))))))))))))))))))" --;-;-;- -0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 -"^(?:((((((((((((((((((((x)))))))))))))))))))))$" --;-;-;- -0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 -"^(?:((((((((((((((((((((x)))))))))))))))))))))" --;-;-;- -0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 -"(?:((((((((((((((((((((x)))))))))))))))))))))$" --;-;-;- -0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 -strings -"" -"xxxabcdxxx" -regexps -"[abcd]" --;-;-;- --;3-4;-;3-4 -"^(?:[abcd])$" --;-;-;- --;-;-;- -"^(?:[abcd])" --;-;-;- --;-;-;- -"(?:[abcd])$" --;-;-;- --;-;-;- -strings -"" -"xxxabcdxxx" -regexps -"[^x]" --;-;-;- --;3-4;-;3-4 -"^(?:[^x])$" --;-;-;- --;-;-;- -"^(?:[^x])" --;-;-;- --;-;-;- -"(?:[^x])$" --;-;-;- --;-;-;- -strings -"" -"xxxabcdxxx" -regexps -"[abcd]+" --;-;-;- --;3-7;-;3-7 -"^(?:[abcd]+)$" --;-;-;- --;-;-;- -"^(?:[abcd]+)" --;-;-;- --;-;-;- -"(?:[abcd]+)$" --;-;-;- --;-;-;- -strings -"" -"xxxabcdxxx" -regexps -"[^x]+" --;-;-;- --;3-7;-;3-7 -"^(?:[^x]+)$" --;-;-;- --;-;-;- -"^(?:[^x]+)" --;-;-;- --;-;-;- -"(?:[^x]+)$" --;-;-;- --;-;-;- -strings -"" -"fo" -regexps -"(fo|foo)" --;-;-;- -0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2 -"^(?:(fo|foo))$" --;-;-;- -0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2 -"^(?:(fo|foo))" --;-;-;- -0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2 -"(?:(fo|foo))$" --;-;-;- -0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2 -strings -"" -"foo" -regexps -"(foo|fo)" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:(foo|fo))$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:(foo|fo))" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"(?:(foo|fo))$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -strings -"" -"aA" -regexps -"aa" --;-;-;- --;-;-;- -"^(?:aa)$" --;-;-;- --;-;-;- -"^(?:aa)" --;-;-;- --;-;-;- -"(?:aa)$" --;-;-;- --;-;-;- -strings -"" -"Aa" -regexps -"a" --;-;-;- --;1-2;-;1-2 -"^(?:a)$" --;-;-;- --;-;-;- -"^(?:a)" --;-;-;- --;-;-;- -"(?:a)$" --;-;-;- --;1-2;-;1-2 -strings -"" -"A" -regexps -"a" --;-;-;- --;-;-;- -"^(?:a)$" --;-;-;- --;-;-;- -"^(?:a)" --;-;-;- --;-;-;- -"(?:a)$" --;-;-;- --;-;-;- -strings -"" -"abc" -regexps -"ABC" --;-;-;- --;-;-;- -"^(?:ABC)$" --;-;-;- --;-;-;- -"^(?:ABC)" --;-;-;- --;-;-;- -"(?:ABC)$" --;-;-;- --;-;-;- -strings -"" -"XABCY" -regexps -"abc" --;-;-;- --;-;-;- -"^(?:abc)$" --;-;-;- --;-;-;- -"^(?:abc)" --;-;-;- --;-;-;- -"(?:abc)$" --;-;-;- --;-;-;- -strings -"" -"xabcy" -regexps -"ABC" --;-;-;- --;-;-;- -"^(?:ABC)$" --;-;-;- --;-;-;- -"^(?:ABC)" --;-;-;- --;-;-;- -"(?:ABC)$" --;-;-;- --;-;-;- -strings -"" -"foo" -regexps -"foo|bar|[A-Z]" --;-;-;- -0-3;0-3;0-3;0-3 -"^(?:foo|bar|[A-Z])$" --;-;-;- -0-3;0-3;0-3;0-3 -"^(?:foo|bar|[A-Z])" --;-;-;- -0-3;0-3;0-3;0-3 -"(?:foo|bar|[A-Z])$" --;-;-;- -0-3;0-3;0-3;0-3 -strings -"" -"foo" -regexps -"^(foo|bar|[A-Z])" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:^(foo|bar|[A-Z]))$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:^(foo|bar|[A-Z]))" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"(?:^(foo|bar|[A-Z]))$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -strings -"" -"foo\n" -regexps -"(foo|bar|[A-Z])$" --;-;-;- --;-;-;- -"^(?:(foo|bar|[A-Z])$)$" --;-;-;- --;-;-;- -"^(?:(foo|bar|[A-Z])$)" --;-;-;- --;-;-;- -"(?:(foo|bar|[A-Z])$)$" --;-;-;- --;-;-;- -strings -"" -"foo" -regexps -"(foo|bar|[A-Z])$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:(foo|bar|[A-Z])$)$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:(foo|bar|[A-Z])$)" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"(?:(foo|bar|[A-Z])$)$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -strings -"" -"foo\n" -regexps -"^(foo|bar|[A-Z])$" --;-;-;- --;-;-;- -"^(?:^(foo|bar|[A-Z])$)$" --;-;-;- --;-;-;- -"^(?:^(foo|bar|[A-Z])$)" --;-;-;- --;-;-;- -"(?:^(foo|bar|[A-Z])$)$" --;-;-;- --;-;-;- -strings -"" -"foo" -regexps -"^(foo|bar|[A-Z])$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:^(foo|bar|[A-Z])$)$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:^(foo|bar|[A-Z])$)" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"(?:^(foo|bar|[A-Z])$)$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -strings -"" -"bar" -regexps -"^(foo|bar|[A-Z])$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:^(foo|bar|[A-Z])$)$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:^(foo|bar|[A-Z])$)" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"(?:^(foo|bar|[A-Z])$)$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -strings -"" -"X" -regexps -"^(foo|bar|[A-Z])$" --;-;-;- -0-1 0-1;0-1 0-1;0-1 0-1;0-1 0-1 -"^(?:^(foo|bar|[A-Z])$)$" --;-;-;- -0-1 0-1;0-1 0-1;0-1 0-1;0-1 0-1 -"^(?:^(foo|bar|[A-Z])$)" --;-;-;- -0-1 0-1;0-1 0-1;0-1 0-1;0-1 0-1 -"(?:^(foo|bar|[A-Z])$)$" --;-;-;- -0-1 0-1;0-1 0-1;0-1 0-1;0-1 0-1 -strings -"" -"XY" -regexps -"^(foo|bar|[A-Z])$" --;-;-;- --;-;-;- -"^(?:^(foo|bar|[A-Z])$)$" --;-;-;- --;-;-;- -"^(?:^(foo|bar|[A-Z])$)" --;-;-;- --;-;-;- -"(?:^(foo|bar|[A-Z])$)$" --;-;-;- --;-;-;- -strings -"" -"fo" -regexps -"^(fo|foo)$" --;-;-;- -0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2 -"^(?:^(fo|foo)$)$" --;-;-;- -0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2 -"^(?:^(fo|foo)$)" --;-;-;- -0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2 -"(?:^(fo|foo)$)$" --;-;-;- -0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2 -strings -"" -"foo" -regexps -"^(fo|foo)$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:^(fo|foo)$)$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:^(fo|foo)$)" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"(?:^(fo|foo)$)$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -strings -"" -"fo" -regexps -"^^(fo|foo)$" --;-;-;- -0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2 -"^(?:^^(fo|foo)$)$" --;-;-;- -0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2 -"^(?:^^(fo|foo)$)" --;-;-;- -0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2 -"(?:^^(fo|foo)$)$" --;-;-;- -0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2 -strings -"" -"foo" -regexps -"^^(fo|foo)$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:^^(fo|foo)$)$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:^^(fo|foo)$)" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"(?:^^(fo|foo)$)$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -strings -"" -"" -regexps -"^$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:^$)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:^$)" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"(?:^$)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -strings -"" -"x" -regexps -"^$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^$)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^$)" -0-0;0-0;0-0;0-0 --;-;-;- -"(?:^$)$" -0-0;0-0;0-0;0-0 --;-;-;- -strings -"" -"" -regexps -"^^$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:^^$)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:^^$)" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"(?:^^$)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -strings -"" -"" -regexps -"^$$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:^$$)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:^$$)" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"(?:^$$)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -strings -"" -"x" -regexps -"^^$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^^$)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^^$)" -0-0;0-0;0-0;0-0 --;-;-;- -"(?:^^$)$" -0-0;0-0;0-0;0-0 --;-;-;- -strings -"" -"x" -regexps -"^$$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^$$)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^$$)" -0-0;0-0;0-0;0-0 --;-;-;- -"(?:^$$)$" -0-0;0-0;0-0;0-0 --;-;-;- -strings -"" -"" -regexps -"^^$$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:^^$$)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:^^$$)" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"(?:^^$$)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -strings -"" -"x" -regexps -"^^$$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^^$$)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^^$$)" -0-0;0-0;0-0;0-0 --;-;-;- -"(?:^^$$)$" -0-0;0-0;0-0;0-0 --;-;-;- -strings -"" -"" -regexps -"^^^^^^^^$$$$$$$$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:^^^^^^^^$$$$$$$$)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:^^^^^^^^$$$$$$$$)" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"(?:^^^^^^^^$$$$$$$$)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -strings -"" -"x" -regexps -"^" -0-0;0-0;0-0;0-0 --;0-0;-;0-0 -"^(?:^)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^)" -0-0;0-0;0-0;0-0 --;0-0;-;0-0 -"(?:^)$" -0-0;0-0;0-0;0-0 --;-;-;- -strings -"" -"x" -regexps -"$" -0-0;0-0;0-0;0-0 --;1-1;-;1-1 -"^(?:$)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:$)" -0-0;0-0;0-0;0-0 --;-;-;- -"(?:$)$" -0-0;0-0;0-0;0-0 --;1-1;-;1-1 -strings -"" -"nofoo foo that" -regexps -"\\bfoo\\b" --;-;-;- --;6-9;-;6-9 -"^(?:\\bfoo\\b)$" --;-;-;- --;-;-;- -"^(?:\\bfoo\\b)" --;-;-;- --;-;-;- -"(?:\\bfoo\\b)$" --;-;-;- --;-;-;- -strings -"" -"faoa x" -regexps -"a\\b" --;-;-;- --;3-4;-;3-4 -"^(?:a\\b)$" --;-;-;- --;-;-;- -"^(?:a\\b)" --;-;-;- --;-;-;- -"(?:a\\b)$" --;-;-;- --;-;-;- -strings -"" -"bar x" -regexps -"\\bbar" --;-;-;- --;0-3;-;0-3 -"^(?:\\bbar)$" --;-;-;- --;-;-;- -"^(?:\\bbar)" --;-;-;- --;0-3;-;0-3 -"(?:\\bbar)$" --;-;-;- --;-;-;- -strings -"" -"foo\nbar x" -regexps -"\\bbar" --;-;-;- --;4-7;-;4-7 -"^(?:\\bbar)$" --;-;-;- --;-;-;- -"^(?:\\bbar)" --;-;-;- --;-;-;- -"(?:\\bbar)$" --;-;-;- --;-;-;- -strings -"" -"foobar" -regexps -"bar\\b" --;-;-;- --;3-6;-;3-6 -"^(?:bar\\b)$" --;-;-;- --;-;-;- -"^(?:bar\\b)" --;-;-;- --;-;-;- -"(?:bar\\b)$" --;-;-;- --;3-6;-;3-6 -strings -"" -"foobar\nxxx" -regexps -"bar\\b" --;-;-;- --;3-6;-;3-6 -"^(?:bar\\b)$" --;-;-;- --;-;-;- -"^(?:bar\\b)" --;-;-;- --;-;-;- -"(?:bar\\b)$" --;-;-;- --;-;-;- -strings -"" -"foo" -regexps -"(foo|bar|[A-Z])\\b" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:(foo|bar|[A-Z])\\b)$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:(foo|bar|[A-Z])\\b)" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"(?:(foo|bar|[A-Z])\\b)$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -strings -"" -"foo\n" -regexps -"(foo|bar|[A-Z])\\b" --;-;-;- --;0-3 0-3;-;0-3 0-3 -"^(?:(foo|bar|[A-Z])\\b)$" --;-;-;- --;-;-;- -"^(?:(foo|bar|[A-Z])\\b)" --;-;-;- --;0-3 0-3;-;0-3 0-3 -"(?:(foo|bar|[A-Z])\\b)$" --;-;-;- --;-;-;- -strings -"" -"" -regexps -"\\b" --;-;-;- --;-;-;- -"^(?:\\b)$" --;-;-;- --;-;-;- -"^(?:\\b)" --;-;-;- --;-;-;- -"(?:\\b)$" --;-;-;- --;-;-;- -strings -"" -"x" -regexps -"\\b" --;-;-;- --;0-0;-;0-0 -"^(?:\\b)$" --;-;-;- --;-;-;- -"^(?:\\b)" --;-;-;- --;0-0;-;0-0 -"(?:\\b)$" --;-;-;- --;1-1;-;1-1 -strings -"" -"foo" -regexps -"\\b(foo|bar|[A-Z])" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:\\b(foo|bar|[A-Z]))$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:\\b(foo|bar|[A-Z]))" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"(?:\\b(foo|bar|[A-Z]))$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -strings -"" -"X" -regexps -"\\b(foo|bar|[A-Z])\\b" --;-;-;- -0-1 0-1;0-1 0-1;0-1 0-1;0-1 0-1 -"^(?:\\b(foo|bar|[A-Z])\\b)$" --;-;-;- -0-1 0-1;0-1 0-1;0-1 0-1;0-1 0-1 -"^(?:\\b(foo|bar|[A-Z])\\b)" --;-;-;- -0-1 0-1;0-1 0-1;0-1 0-1;0-1 0-1 -"(?:\\b(foo|bar|[A-Z])\\b)$" --;-;-;- -0-1 0-1;0-1 0-1;0-1 0-1;0-1 0-1 -strings -"" -"XY" -regexps -"\\b(foo|bar|[A-Z])\\b" --;-;-;- --;-;-;- -"^(?:\\b(foo|bar|[A-Z])\\b)$" --;-;-;- --;-;-;- -"^(?:\\b(foo|bar|[A-Z])\\b)" --;-;-;- --;-;-;- -"(?:\\b(foo|bar|[A-Z])\\b)$" --;-;-;- --;-;-;- -strings -"" -"bar" -regexps -"\\b(foo|bar|[A-Z])\\b" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:\\b(foo|bar|[A-Z])\\b)$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:\\b(foo|bar|[A-Z])\\b)" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"(?:\\b(foo|bar|[A-Z])\\b)$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -strings -"" -"foo" -regexps -"\\b(foo|bar|[A-Z])\\b" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:\\b(foo|bar|[A-Z])\\b)$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:\\b(foo|bar|[A-Z])\\b)" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"(?:\\b(foo|bar|[A-Z])\\b)$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -strings -"" -"foo\n" -regexps -"\\b(foo|bar|[A-Z])\\b" --;-;-;- --;0-3 0-3;-;0-3 0-3 -"^(?:\\b(foo|bar|[A-Z])\\b)$" --;-;-;- --;-;-;- -"^(?:\\b(foo|bar|[A-Z])\\b)" --;-;-;- --;0-3 0-3;-;0-3 0-3 -"(?:\\b(foo|bar|[A-Z])\\b)$" --;-;-;- --;-;-;- -strings -"" -"ffoo bbar N x" -regexps -"\\b(foo|bar|[A-Z])\\b" --;-;-;- --;10-11 10-11;-;10-11 10-11 -"^(?:\\b(foo|bar|[A-Z])\\b)$" --;-;-;- --;-;-;- -"^(?:\\b(foo|bar|[A-Z])\\b)" --;-;-;- --;-;-;- -"(?:\\b(foo|bar|[A-Z])\\b)$" --;-;-;- --;-;-;- -strings -"" -"fo" -regexps -"\\b(fo|foo)\\b" --;-;-;- -0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2 -"^(?:\\b(fo|foo)\\b)$" --;-;-;- -0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2 -"^(?:\\b(fo|foo)\\b)" --;-;-;- -0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2 -"(?:\\b(fo|foo)\\b)$" --;-;-;- -0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2 -strings -"" -"foo" -regexps -"\\b(fo|foo)\\b" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:\\b(fo|foo)\\b)$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:\\b(fo|foo)\\b)" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"(?:\\b(fo|foo)\\b)$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -strings -"" -"" -regexps -"\\b\\b" --;-;-;- --;-;-;- -"^(?:\\b\\b)$" --;-;-;- --;-;-;- -"^(?:\\b\\b)" --;-;-;- --;-;-;- -"(?:\\b\\b)$" --;-;-;- --;-;-;- -strings -"" -"x" -regexps -"\\b\\b" --;-;-;- --;0-0;-;0-0 -"^(?:\\b\\b)$" --;-;-;- --;-;-;- -"^(?:\\b\\b)" --;-;-;- --;0-0;-;0-0 -"(?:\\b\\b)$" --;-;-;- --;1-1;-;1-1 -strings -"" -"" -regexps -"\\b$" --;-;-;- --;-;-;- -"^(?:\\b$)$" --;-;-;- --;-;-;- -"^(?:\\b$)" --;-;-;- --;-;-;- -"(?:\\b$)$" --;-;-;- --;-;-;- -strings -"" -"x" -regexps -"\\b$" --;-;-;- --;1-1;-;1-1 -"^(?:\\b$)$" --;-;-;- --;-;-;- -"^(?:\\b$)" --;-;-;- --;-;-;- -"(?:\\b$)$" --;-;-;- --;1-1;-;1-1 -strings -"" -"y x" -regexps -"\\b$" --;-;-;- --;3-3;-;3-3 -"^(?:\\b$)$" --;-;-;- --;-;-;- -"^(?:\\b$)" --;-;-;- --;-;-;- -"(?:\\b$)$" --;-;-;- --;3-3;-;3-3 -strings -"" -"x" -regexps -"\\b.$" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:\\b.$)$" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:\\b.$)" --;-;-;- -0-1;0-1;0-1;0-1 -"(?:\\b.$)$" --;-;-;- -0-1;0-1;0-1;0-1 -strings -"" -"fo" -regexps -"^\\b(fo|foo)\\b" --;-;-;- -0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2 -"^(?:^\\b(fo|foo)\\b)$" --;-;-;- -0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2 -"^(?:^\\b(fo|foo)\\b)" --;-;-;- -0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2 -"(?:^\\b(fo|foo)\\b)$" --;-;-;- -0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2 -strings -"" -"foo" -regexps -"^\\b(fo|foo)\\b" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:^\\b(fo|foo)\\b)$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:^\\b(fo|foo)\\b)" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"(?:^\\b(fo|foo)\\b)$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -strings -"" -"" -regexps -"^\\b" --;-;-;- --;-;-;- -"^(?:^\\b)$" --;-;-;- --;-;-;- -"^(?:^\\b)" --;-;-;- --;-;-;- -"(?:^\\b)$" --;-;-;- --;-;-;- -strings -"" -"x" -regexps -"^\\b" --;-;-;- --;0-0;-;0-0 -"^(?:^\\b)$" --;-;-;- --;-;-;- -"^(?:^\\b)" --;-;-;- --;0-0;-;0-0 -"(?:^\\b)$" --;-;-;- --;-;-;- -strings -"" -"" -regexps -"^\\b\\b" --;-;-;- --;-;-;- -"^(?:^\\b\\b)$" --;-;-;- --;-;-;- -"^(?:^\\b\\b)" --;-;-;- --;-;-;- -"(?:^\\b\\b)$" --;-;-;- --;-;-;- -strings -"" -"x" -regexps -"^\\b\\b" --;-;-;- --;0-0;-;0-0 -"^(?:^\\b\\b)$" --;-;-;- --;-;-;- -"^(?:^\\b\\b)" --;-;-;- --;0-0;-;0-0 -"(?:^\\b\\b)$" --;-;-;- --;-;-;- -strings -"" -"" -regexps -"^\\b$" --;-;-;- --;-;-;- -"^(?:^\\b$)$" --;-;-;- --;-;-;- -"^(?:^\\b$)" --;-;-;- --;-;-;- -"(?:^\\b$)$" --;-;-;- --;-;-;- -strings -"" -"x" -regexps -"^\\b$" --;-;-;- --;-;-;- -"^(?:^\\b$)$" --;-;-;- --;-;-;- -"^(?:^\\b$)" --;-;-;- --;-;-;- -"(?:^\\b$)$" --;-;-;- --;-;-;- -strings -"" -"x" -regexps -"^\\b.$" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:^\\b.$)$" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:^\\b.$)" --;-;-;- -0-1;0-1;0-1;0-1 -"(?:^\\b.$)$" --;-;-;- -0-1;0-1;0-1;0-1 -strings -"" -"x" -regexps -"^\\b.\\b$" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:^\\b.\\b$)$" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:^\\b.\\b$)" --;-;-;- -0-1;0-1;0-1;0-1 -"(?:^\\b.\\b$)$" --;-;-;- -0-1;0-1;0-1;0-1 -strings -"" -"" -regexps -"^^^^^^^^\\b$$$$$$$" --;-;-;- --;-;-;- -"^(?:^^^^^^^^\\b$$$$$$$)$" --;-;-;- --;-;-;- -"^(?:^^^^^^^^\\b$$$$$$$)" --;-;-;- --;-;-;- -"(?:^^^^^^^^\\b$$$$$$$)$" --;-;-;- --;-;-;- -strings -"" -"x" -regexps -"^^^^^^^^\\b.$$$$$$" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:^^^^^^^^\\b.$$$$$$)$" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:^^^^^^^^\\b.$$$$$$)" --;-;-;- -0-1;0-1;0-1;0-1 -"(?:^^^^^^^^\\b.$$$$$$)$" --;-;-;- -0-1;0-1;0-1;0-1 -strings -"" -"x" -regexps -"^^^^^^^^\\b$$$$$$$" --;-;-;- --;-;-;- -"^(?:^^^^^^^^\\b$$$$$$$)$" --;-;-;- --;-;-;- -"^(?:^^^^^^^^\\b$$$$$$$)" --;-;-;- --;-;-;- -"(?:^^^^^^^^\\b$$$$$$$)$" --;-;-;- --;-;-;- -strings -"" -"n foo xfoox that" -regexps -"\\Bfoo\\B" --;-;-;- --;7-10;-;7-10 -"^(?:\\Bfoo\\B)$" --;-;-;- --;-;-;- -"^(?:\\Bfoo\\B)" --;-;-;- --;-;-;- -"(?:\\Bfoo\\B)$" --;-;-;- --;-;-;- -strings -"" -"faoa x" -regexps -"a\\B" --;-;-;- --;1-2;-;1-2 -"^(?:a\\B)$" --;-;-;- --;-;-;- -"^(?:a\\B)" --;-;-;- --;-;-;- -"(?:a\\B)$" --;-;-;- --;-;-;- -strings -"" -"bar x" -regexps -"\\Bbar" --;-;-;- --;-;-;- -"^(?:\\Bbar)$" --;-;-;- --;-;-;- -"^(?:\\Bbar)" --;-;-;- --;-;-;- -"(?:\\Bbar)$" --;-;-;- --;-;-;- -strings -"" -"foo\nbar x" -regexps -"\\Bbar" --;-;-;- --;-;-;- -"^(?:\\Bbar)$" --;-;-;- --;-;-;- -"^(?:\\Bbar)" --;-;-;- --;-;-;- -"(?:\\Bbar)$" --;-;-;- --;-;-;- -strings -"" -"foobar" -regexps -"bar\\B" --;-;-;- --;-;-;- -"^(?:bar\\B)$" --;-;-;- --;-;-;- -"^(?:bar\\B)" --;-;-;- --;-;-;- -"(?:bar\\B)$" --;-;-;- --;-;-;- -strings -"" -"foobar\nxxx" -regexps -"bar\\B" --;-;-;- --;-;-;- -"^(?:bar\\B)$" --;-;-;- --;-;-;- -"^(?:bar\\B)" --;-;-;- --;-;-;- -"(?:bar\\B)$" --;-;-;- --;-;-;- -strings -"" -"foox" -regexps -"(foo|bar|[A-Z])\\B" --;-;-;- --;0-3 0-3;-;0-3 0-3 -"^(?:(foo|bar|[A-Z])\\B)$" --;-;-;- --;-;-;- -"^(?:(foo|bar|[A-Z])\\B)" --;-;-;- --;0-3 0-3;-;0-3 0-3 -"(?:(foo|bar|[A-Z])\\B)$" --;-;-;- --;-;-;- -strings -"" -"foo\n" -regexps -"(foo|bar|[A-Z])\\B" --;-;-;- --;-;-;- -"^(?:(foo|bar|[A-Z])\\B)$" --;-;-;- --;-;-;- -"^(?:(foo|bar|[A-Z])\\B)" --;-;-;- --;-;-;- -"(?:(foo|bar|[A-Z])\\B)$" --;-;-;- --;-;-;- -strings -"" -"" -regexps -"\\B" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:\\B)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:\\B)" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"(?:\\B)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -strings -"" -"x" -regexps -"\\B" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:\\B)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:\\B)" -0-0;0-0;0-0;0-0 --;-;-;- -"(?:\\B)$" -0-0;0-0;0-0;0-0 --;-;-;- -strings -"" -"foo" -regexps -"\\B(foo|bar|[A-Z])" --;-;-;- --;-;-;- -"^(?:\\B(foo|bar|[A-Z]))$" --;-;-;- --;-;-;- -"^(?:\\B(foo|bar|[A-Z]))" --;-;-;- --;-;-;- -"(?:\\B(foo|bar|[A-Z]))$" --;-;-;- --;-;-;- -strings -"" -"xXy" -regexps -"\\B(foo|bar|[A-Z])\\B" --;-;-;- --;1-2 1-2;-;1-2 1-2 -"^(?:\\B(foo|bar|[A-Z])\\B)$" --;-;-;- --;-;-;- -"^(?:\\B(foo|bar|[A-Z])\\B)" --;-;-;- --;-;-;- -"(?:\\B(foo|bar|[A-Z])\\B)$" --;-;-;- --;-;-;- -strings -"" -"XY" -regexps -"\\B(foo|bar|[A-Z])\\B" --;-;-;- --;-;-;- -"^(?:\\B(foo|bar|[A-Z])\\B)$" --;-;-;- --;-;-;- -"^(?:\\B(foo|bar|[A-Z])\\B)" --;-;-;- --;-;-;- -"(?:\\B(foo|bar|[A-Z])\\B)$" --;-;-;- --;-;-;- -strings -"" -"XYZ" -regexps -"\\B(foo|bar|[A-Z])\\B" --;-;-;- --;1-2 1-2;-;1-2 1-2 -"^(?:\\B(foo|bar|[A-Z])\\B)$" --;-;-;- --;-;-;- -"^(?:\\B(foo|bar|[A-Z])\\B)" --;-;-;- --;-;-;- -"(?:\\B(foo|bar|[A-Z])\\B)$" --;-;-;- --;-;-;- -strings -"" -"abara" -regexps -"\\B(foo|bar|[A-Z])\\B" --;-;-;- --;1-4 1-4;-;1-4 1-4 -"^(?:\\B(foo|bar|[A-Z])\\B)$" --;-;-;- --;-;-;- -"^(?:\\B(foo|bar|[A-Z])\\B)" --;-;-;- --;-;-;- -"(?:\\B(foo|bar|[A-Z])\\B)$" --;-;-;- --;-;-;- -strings -"" -"xfoo_" -regexps -"\\B(foo|bar|[A-Z])\\B" --;-;-;- --;1-4 1-4;-;1-4 1-4 -"^(?:\\B(foo|bar|[A-Z])\\B)$" --;-;-;- --;-;-;- -"^(?:\\B(foo|bar|[A-Z])\\B)" --;-;-;- --;-;-;- -"(?:\\B(foo|bar|[A-Z])\\B)$" --;-;-;- --;-;-;- -strings -"" -"xfoo\n" -regexps -"\\B(foo|bar|[A-Z])\\B" --;-;-;- --;-;-;- -"^(?:\\B(foo|bar|[A-Z])\\B)$" --;-;-;- --;-;-;- -"^(?:\\B(foo|bar|[A-Z])\\B)" --;-;-;- --;-;-;- -"(?:\\B(foo|bar|[A-Z])\\B)$" --;-;-;- --;-;-;- -strings -"" -"foo bar vNx" -regexps -"\\B(foo|bar|[A-Z])\\B" --;-;-;- --;9-10 9-10;-;9-10 9-10 -"^(?:\\B(foo|bar|[A-Z])\\B)$" --;-;-;- --;-;-;- -"^(?:\\B(foo|bar|[A-Z])\\B)" --;-;-;- --;-;-;- -"(?:\\B(foo|bar|[A-Z])\\B)$" --;-;-;- --;-;-;- -strings -"" -"xfoo" -regexps -"\\B(fo|foo)\\B" --;-;-;- --;1-3 1-3;-;1-3 1-3 -"^(?:\\B(fo|foo)\\B)$" --;-;-;- --;-;-;- -"^(?:\\B(fo|foo)\\B)" --;-;-;- --;-;-;- -"(?:\\B(fo|foo)\\B)$" --;-;-;- --;-;-;- -strings -"" -"xfooo" -regexps -"\\B(foo|fo)\\B" --;-;-;- --;1-4 1-4;-;1-4 1-4 -"^(?:\\B(foo|fo)\\B)$" --;-;-;- --;-;-;- -"^(?:\\B(foo|fo)\\B)" --;-;-;- --;-;-;- -"(?:\\B(foo|fo)\\B)$" --;-;-;- --;-;-;- -strings -"" -"" -regexps -"\\B\\B" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:\\B\\B)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:\\B\\B)" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"(?:\\B\\B)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -strings -"" -"x" -regexps -"\\B\\B" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:\\B\\B)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:\\B\\B)" -0-0;0-0;0-0;0-0 --;-;-;- -"(?:\\B\\B)$" -0-0;0-0;0-0;0-0 --;-;-;- -strings -"" -"" -regexps -"\\B$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:\\B$)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:\\B$)" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"(?:\\B$)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -strings -"" -"x" -regexps -"\\B$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:\\B$)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:\\B$)" -0-0;0-0;0-0;0-0 --;-;-;- -"(?:\\B$)$" -0-0;0-0;0-0;0-0 --;-;-;- -strings -"" -"y x" -regexps -"\\B$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:\\B$)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:\\B$)" -0-0;0-0;0-0;0-0 --;-;-;- -"(?:\\B$)$" -0-0;0-0;0-0;0-0 --;-;-;- -strings -"" -"x" -regexps -"\\B.$" --;-;-;- --;-;-;- -"^(?:\\B.$)$" --;-;-;- --;-;-;- -"^(?:\\B.$)" --;-;-;- --;-;-;- -"(?:\\B.$)$" --;-;-;- --;-;-;- -strings -"" -"fo" -regexps -"^\\B(fo|foo)\\B" --;-;-;- --;-;-;- -"^(?:^\\B(fo|foo)\\B)$" --;-;-;- --;-;-;- -"^(?:^\\B(fo|foo)\\B)" --;-;-;- --;-;-;- -"(?:^\\B(fo|foo)\\B)$" --;-;-;- --;-;-;- -strings -"" -"foo" -regexps -"^\\B(fo|foo)\\B" --;-;-;- --;-;-;- -"^(?:^\\B(fo|foo)\\B)$" --;-;-;- --;-;-;- -"^(?:^\\B(fo|foo)\\B)" --;-;-;- --;-;-;- -"(?:^\\B(fo|foo)\\B)$" --;-;-;- --;-;-;- -strings -"" -"" -regexps -"^\\B" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:^\\B)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:^\\B)" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"(?:^\\B)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -strings -"" -"x" -regexps -"^\\B" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^\\B)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^\\B)" -0-0;0-0;0-0;0-0 --;-;-;- -"(?:^\\B)$" -0-0;0-0;0-0;0-0 --;-;-;- -strings -"" -"" -regexps -"^\\B\\B" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:^\\B\\B)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:^\\B\\B)" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"(?:^\\B\\B)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -strings -"" -"x" -regexps -"^\\B\\B" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^\\B\\B)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^\\B\\B)" -0-0;0-0;0-0;0-0 --;-;-;- -"(?:^\\B\\B)$" -0-0;0-0;0-0;0-0 --;-;-;- -strings -"" -"" -regexps -"^\\B$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:^\\B$)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:^\\B$)" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"(?:^\\B$)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -strings -"" -"x" -regexps -"^\\B$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^\\B$)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^\\B$)" -0-0;0-0;0-0;0-0 --;-;-;- -"(?:^\\B$)$" -0-0;0-0;0-0;0-0 --;-;-;- -strings -"" -"x" -regexps -"^\\B.$" --;-;-;- --;-;-;- -"^(?:^\\B.$)$" --;-;-;- --;-;-;- -"^(?:^\\B.$)" --;-;-;- --;-;-;- -"(?:^\\B.$)$" --;-;-;- --;-;-;- -strings -"" -"x" -regexps -"^\\B.\\B$" --;-;-;- --;-;-;- -"^(?:^\\B.\\B$)$" --;-;-;- --;-;-;- -"^(?:^\\B.\\B$)" --;-;-;- --;-;-;- -"(?:^\\B.\\B$)$" --;-;-;- --;-;-;- -strings -"" -"" -regexps -"^^^^^^^^\\B$$$$$$$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:^^^^^^^^\\B$$$$$$$)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:^^^^^^^^\\B$$$$$$$)" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"(?:^^^^^^^^\\B$$$$$$$)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -strings -"" -"x" -regexps -"^^^^^^^^\\B.$$$$$$" --;-;-;- --;-;-;- -"^(?:^^^^^^^^\\B.$$$$$$)$" --;-;-;- --;-;-;- -"^(?:^^^^^^^^\\B.$$$$$$)" --;-;-;- --;-;-;- -"(?:^^^^^^^^\\B.$$$$$$)$" --;-;-;- --;-;-;- -strings -"" -"x" -regexps -"^^^^^^^^\\B$$$$$$$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^^^^^^^^\\B$$$$$$$)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^^^^^^^^\\B$$$$$$$)" -0-0;0-0;0-0;0-0 --;-;-;- -"(?:^^^^^^^^\\B$$$$$$$)$" -0-0;0-0;0-0;0-0 --;-;-;- -strings -"" -"x" -regexps -"\\bx\\b" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:\\bx\\b)$" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:\\bx\\b)" --;-;-;- -0-1;0-1;0-1;0-1 -"(?:\\bx\\b)$" --;-;-;- -0-1;0-1;0-1;0-1 -strings -"" -"x>" -regexps -"\\bx\\b" --;-;-;- --;0-1;-;0-1 -"^(?:\\bx\\b)$" --;-;-;- --;-;-;- -"^(?:\\bx\\b)" --;-;-;- --;0-1;-;0-1 -"(?:\\bx\\b)$" --;-;-;- --;-;-;- -strings -"" -"<x" -regexps -"\\bx\\b" --;-;-;- --;1-2;-;1-2 -"^(?:\\bx\\b)$" --;-;-;- --;-;-;- -"^(?:\\bx\\b)" --;-;-;- --;-;-;- -"(?:\\bx\\b)$" --;-;-;- --;1-2;-;1-2 -strings -"" -"<x>" -regexps -"\\bx\\b" --;-;-;- --;1-2;-;1-2 -"^(?:\\bx\\b)$" --;-;-;- --;-;-;- -"^(?:\\bx\\b)" --;-;-;- --;-;-;- -"(?:\\bx\\b)$" --;-;-;- --;-;-;- -strings -"" -"ax" -regexps -"\\bx\\b" --;-;-;- --;-;-;- -"^(?:\\bx\\b)$" --;-;-;- --;-;-;- -"^(?:\\bx\\b)" --;-;-;- --;-;-;- -"(?:\\bx\\b)$" --;-;-;- --;-;-;- -strings -"" -"xb" -regexps -"\\bx\\b" --;-;-;- --;-;-;- -"^(?:\\bx\\b)$" --;-;-;- --;-;-;- -"^(?:\\bx\\b)" --;-;-;- --;-;-;- -"(?:\\bx\\b)$" --;-;-;- --;-;-;- -strings -"" -"axb" -regexps -"\\bx\\b" --;-;-;- --;-;-;- -"^(?:\\bx\\b)$" --;-;-;- --;-;-;- -"^(?:\\bx\\b)" --;-;-;- --;-;-;- -"(?:\\bx\\b)$" --;-;-;- --;-;-;- -strings -"" -"«x" -regexps -"\\bx\\b" --;-;-;- --;2-3;-;2-3 -"^(?:\\bx\\b)$" --;-;-;- --;-;-;- -"^(?:\\bx\\b)" --;-;-;- --;-;-;- -"(?:\\bx\\b)$" --;-;-;- --;2-3;-;2-3 -strings -"" -"x»" -regexps -"\\bx\\b" --;-;-;- --;0-1;-;0-1 -"^(?:\\bx\\b)$" --;-;-;- --;-;-;- -"^(?:\\bx\\b)" --;-;-;- --;0-1;-;0-1 -"(?:\\bx\\b)$" --;-;-;- --;-;-;- -strings -"" -"«x»" -regexps -"\\bx\\b" --;-;-;- --;2-3;-;2-3 -"^(?:\\bx\\b)$" --;-;-;- --;-;-;- -"^(?:\\bx\\b)" --;-;-;- --;-;-;- -"(?:\\bx\\b)$" --;-;-;- --;-;-;- -strings -"" -"axb" -regexps -"\\bx\\b" --;-;-;- --;-;-;- -"^(?:\\bx\\b)$" --;-;-;- --;-;-;- -"^(?:\\bx\\b)" --;-;-;- --;-;-;- -"(?:\\bx\\b)$" --;-;-;- --;-;-;- -strings -"" -"áxβ" -regexps -"\\bx\\b" --;-;-;- --;2-3;-;2-3 -"^(?:\\bx\\b)$" --;-;-;- --;-;-;- -"^(?:\\bx\\b)" --;-;-;- --;-;-;- -"(?:\\bx\\b)$" --;-;-;- --;-;-;- -strings -"" -"axb" -regexps -"\\Bx\\B" --;-;-;- --;1-2;-;1-2 -"^(?:\\Bx\\B)$" --;-;-;- --;-;-;- -"^(?:\\Bx\\B)" --;-;-;- --;-;-;- -"(?:\\Bx\\B)$" --;-;-;- --;-;-;- -strings -"" -"áxβ" -regexps -"\\Bx\\B" --;-;-;- --;-;-;- -"^(?:\\Bx\\B)$" --;-;-;- --;-;-;- -"^(?:\\Bx\\B)" --;-;-;- --;-;-;- -"(?:\\Bx\\B)$" --;-;-;- --;-;-;- -strings -"" -"" -regexps -"^$^$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:^$^$)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:^$^$)" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"(?:^$^$)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -strings -"" -"" -regexps -"^$^" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:^$^)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:^$^)" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"(?:^$^)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -strings -"" -"" -regexps -"$^$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:$^$)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"^(?:$^$)" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -"(?:$^$)$" -0-0;0-0;0-0;0-0 -0-0;0-0;0-0;0-0 -strings -"" -"x" -regexps -"^$^$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^$^$)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^$^$)" -0-0;0-0;0-0;0-0 --;-;-;- -"(?:^$^$)$" -0-0;0-0;0-0;0-0 --;-;-;- -strings -"" -"x" -regexps -"^$^" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^$^)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^$^)" -0-0;0-0;0-0;0-0 --;-;-;- -"(?:^$^)$" -0-0;0-0;0-0;0-0 --;-;-;- -strings -"" -"x" -regexps -"$^$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:$^$)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:$^$)" -0-0;0-0;0-0;0-0 --;-;-;- -"(?:$^$)$" -0-0;0-0;0-0;0-0 --;-;-;- -strings -"" -"x\ny" -regexps -"^$^$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^$^$)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^$^$)" -0-0;0-0;0-0;0-0 --;-;-;- -"(?:^$^$)$" -0-0;0-0;0-0;0-0 --;-;-;- -strings -"" -"x\ny" -regexps -"^$^" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^$^)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^$^)" -0-0;0-0;0-0;0-0 --;-;-;- -"(?:^$^)$" -0-0;0-0;0-0;0-0 --;-;-;- -strings -"" -"x\ny" -regexps -"$^$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:$^$)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:$^$)" -0-0;0-0;0-0;0-0 --;-;-;- -"(?:$^$)$" -0-0;0-0;0-0;0-0 --;-;-;- -strings -"" -"x\n\ny" -regexps -"^$^$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^$^$)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^$^$)" -0-0;0-0;0-0;0-0 --;-;-;- -"(?:^$^$)$" -0-0;0-0;0-0;0-0 --;-;-;- -strings -"" -"x\n\ny" -regexps -"^$^" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^$^)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^$^)" -0-0;0-0;0-0;0-0 --;-;-;- -"(?:^$^)$" -0-0;0-0;0-0;0-0 --;-;-;- -strings -"" -"x\n\ny" -regexps -"$^$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:$^$)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:$^$)" -0-0;0-0;0-0;0-0 --;-;-;- -"(?:$^$)$" -0-0;0-0;0-0;0-0 --;-;-;- -strings -"" -"foo$bar" -regexps -"^(foo\\$)$" --;-;-;- --;-;-;- -"^(?:^(foo\\$)$)$" --;-;-;- --;-;-;- -"^(?:^(foo\\$)$)" --;-;-;- --;-;-;- -"(?:^(foo\\$)$)$" --;-;-;- --;-;-;- -strings -"" -"foo$bar" -regexps -"(foo\\$)" --;-;-;- --;0-4 0-4;-;0-4 0-4 -"^(?:(foo\\$))$" --;-;-;- --;-;-;- -"^(?:(foo\\$))" --;-;-;- --;0-4 0-4;-;0-4 0-4 -"(?:(foo\\$))$" --;-;-;- --;-;-;- -strings -"" -"abc" -regexps -"^...$" --;-;-;- -0-3;0-3;0-3;0-3 -"^(?:^...$)$" --;-;-;- -0-3;0-3;0-3;0-3 -"^(?:^...$)" --;-;-;- -0-3;0-3;0-3;0-3 -"(?:^...$)$" --;-;-;- -0-3;0-3;0-3;0-3 -strings -"" -"本" -regexps -"^本$" --;-;-;- -0-3;0-3;0-3;0-3 -"^(?:^本$)$" --;-;-;- -0-3;0-3;0-3;0-3 -"^(?:^本$)" --;-;-;- -0-3;0-3;0-3;0-3 -"(?:^本$)$" --;-;-;- -0-3;0-3;0-3;0-3 -strings -"" -"日本語" -regexps -"^...$" --;-;-;- -0-9;0-9;0-9;0-9 -"^(?:^...$)$" --;-;-;- -0-9;0-9;0-9;0-9 -"^(?:^...$)" --;-;-;- -0-9;0-9;0-9;0-9 -"(?:^...$)$" --;-;-;- -0-9;0-9;0-9;0-9 -strings -"" -".本." -regexps -"^...$" --;-;-;- -0-5;0-5;0-5;0-5 -"^(?:^...$)$" --;-;-;- -0-5;0-5;0-5;0-5 -"^(?:^...$)" --;-;-;- -0-5;0-5;0-5;0-5 -"(?:^...$)$" --;-;-;- -0-5;0-5;0-5;0-5 -strings -"" -"本" -regexps -"^\\C\\C\\C$" --;-;-;- -0-3;0-3;0-3;0-3 -"^(?:^\\C\\C\\C$)$" --;-;-;- -0-3;0-3;0-3;0-3 -"^(?:^\\C\\C\\C$)" --;-;-;- -0-3;0-3;0-3;0-3 -"(?:^\\C\\C\\C$)$" --;-;-;- -0-3;0-3;0-3;0-3 -strings -"" -"本" -regexps -"^\\C$" --;-;-;- --;-;-;- -"^(?:^\\C$)$" --;-;-;- --;-;-;- -"^(?:^\\C$)" --;-;-;- --;-;-;- -"(?:^\\C$)$" --;-;-;- --;-;-;- -strings -"" -"日本語" -regexps -"^\\C\\C\\C$" --;-;-;- --;-;-;- -"^(?:^\\C\\C\\C$)$" --;-;-;- --;-;-;- -"^(?:^\\C\\C\\C$)" --;-;-;- --;-;-;- -"(?:^\\C\\C\\C$)$" --;-;-;- --;-;-;- -strings -"" -"日本語" -regexps -"^...$" --;-;-;- -0-9;0-9;0-9;0-9 -"^(?:^...$)$" --;-;-;- -0-9;0-9;0-9;0-9 -"^(?:^...$)" --;-;-;- -0-9;0-9;0-9;0-9 -"(?:^...$)$" --;-;-;- -0-9;0-9;0-9;0-9 -strings -"" -"日本語" -regexps -"^.........$" --;-;-;- --;-;-;- -"^(?:^.........$)$" --;-;-;- --;-;-;- -"^(?:^.........$)" --;-;-;- --;-;-;- -"(?:^.........$)$" --;-;-;- --;-;-;- -strings -"" -".本." -regexps -"^...$" --;-;-;- -0-5;0-5;0-5;0-5 -"^(?:^...$)$" --;-;-;- -0-5;0-5;0-5;0-5 -"^(?:^...$)" --;-;-;- -0-5;0-5;0-5;0-5 -"(?:^...$)$" --;-;-;- -0-5;0-5;0-5;0-5 -strings -"" -".本." -regexps -"^.....$" --;-;-;- --;-;-;- -"^(?:^.....$)$" --;-;-;- --;-;-;- -"^(?:^.....$)" --;-;-;- --;-;-;- -"(?:^.....$)$" --;-;-;- --;-;-;- -strings -"" -"xfooo" -regexps -"\\B(fo|foo)\\B" --;-;-;- --;1-3 1-3;-;1-4 1-4 -"^(?:\\B(fo|foo)\\B)$" --;-;-;- --;-;-;- -"^(?:\\B(fo|foo)\\B)" --;-;-;- --;-;-;- -"(?:\\B(fo|foo)\\B)$" --;-;-;- --;-;-;- -strings -"" -"foo" -regexps -"(fo|foo)" --;-;-;- -0-3 0-3;0-2 0-2;0-3 0-3;0-3 0-3 -"^(?:(fo|foo))$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -"^(?:(fo|foo))" --;-;-;- -0-3 0-3;0-2 0-2;0-3 0-3;0-3 0-3 -"(?:(fo|foo))$" --;-;-;- -0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3 -strings -"" -"a" -regexps -"\\141" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:\\141)$" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:\\141)" --;-;-;- -0-1;0-1;0-1;0-1 -"(?:\\141)$" --;-;-;- -0-1;0-1;0-1;0-1 -strings -"" -"0" -regexps -"\\060" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:\\060)$" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:\\060)" --;-;-;- -0-1;0-1;0-1;0-1 -"(?:\\060)$" --;-;-;- -0-1;0-1;0-1;0-1 -strings -"" -"00" -regexps -"\\0600" --;-;-;- -0-2;0-2;0-2;0-2 -"^(?:\\0600)$" --;-;-;- -0-2;0-2;0-2;0-2 -"^(?:\\0600)" --;-;-;- -0-2;0-2;0-2;0-2 -"(?:\\0600)$" --;-;-;- -0-2;0-2;0-2;0-2 -strings -"" -"08" -regexps -"\\608" --;-;-;- -0-2;0-2;0-2;0-2 -"^(?:\\608)$" --;-;-;- -0-2;0-2;0-2;0-2 -"^(?:\\608)" --;-;-;- -0-2;0-2;0-2;0-2 -"(?:\\608)$" --;-;-;- -0-2;0-2;0-2;0-2 -strings -"" -"" -regexps -"\\01" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:\\01)$" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:\\01)" --;-;-;- -0-1;0-1;0-1;0-1 -"(?:\\01)$" --;-;-;- -0-1;0-1;0-1;0-1 -strings -"" -"8" -regexps -"\\018" --;-;-;- -0-2;0-2;0-2;0-2 -"^(?:\\018)$" --;-;-;- -0-2;0-2;0-2;0-2 -"^(?:\\018)" --;-;-;- -0-2;0-2;0-2;0-2 -"(?:\\018)$" --;-;-;- -0-2;0-2;0-2;0-2 -strings -"" -"a" -regexps -"\\x{61}" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:\\x{61})$" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:\\x{61})" --;-;-;- -0-1;0-1;0-1;0-1 -"(?:\\x{61})$" --;-;-;- -0-1;0-1;0-1;0-1 -strings -"" -"a" -regexps -"\\x61" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:\\x61)$" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:\\x61)" --;-;-;- -0-1;0-1;0-1;0-1 -"(?:\\x61)$" --;-;-;- -0-1;0-1;0-1;0-1 -strings -"" -"a" -regexps -"\\x{00000061}" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:\\x{00000061})$" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:\\x{00000061})" --;-;-;- -0-1;0-1;0-1;0-1 -"(?:\\x{00000061})$" --;-;-;- -0-1;0-1;0-1;0-1 -strings -"" -"aαβb" -regexps -"\\p{Greek}+" --;-;-;- --;1-5;-;1-5 -"^(?:\\p{Greek}+)$" --;-;-;- --;-;-;- -"^(?:\\p{Greek}+)" --;-;-;- --;-;-;- -"(?:\\p{Greek}+)$" --;-;-;- --;-;-;- -strings -"" -"aαβb" -regexps -"\\P{Greek}+" --;-;-;- --;0-1;-;0-1 -"^(?:\\P{Greek}+)$" --;-;-;- --;-;-;- -"^(?:\\P{Greek}+)" --;-;-;- --;0-1;-;0-1 -"(?:\\P{Greek}+)$" --;-;-;- --;5-6;-;5-6 -strings -"" -"aαβb" -regexps -"\\p{^Greek}+" --;-;-;- --;0-1;-;0-1 -"^(?:\\p{^Greek}+)$" --;-;-;- --;-;-;- -"^(?:\\p{^Greek}+)" --;-;-;- --;0-1;-;0-1 -"(?:\\p{^Greek}+)$" --;-;-;- --;5-6;-;5-6 -strings -"" -"aαβb" -regexps -"\\P{^Greek}+" --;-;-;- --;1-5;-;1-5 -"^(?:\\P{^Greek}+)$" --;-;-;- --;-;-;- -"^(?:\\P{^Greek}+)" --;-;-;- --;-;-;- -"(?:\\P{^Greek}+)$" --;-;-;- --;-;-;- -strings -"" -"abc123" -regexps -"[^0-9]+" --;-;-;- --;0-3;-;0-3 -"^(?:[^0-9]+)$" --;-;-;- --;-;-;- -"^(?:[^0-9]+)" --;-;-;- --;0-3;-;0-3 -"(?:[^0-9]+)$" --;-;-;- --;-;-;- -strings -"" -"abc123²³¼½¾₀₉" -regexps -"\\p{Nd}+" --;-;-;- --;3-6;-;3-6 -"^(?:\\p{Nd}+)$" --;-;-;- --;-;-;- -"^(?:\\p{Nd}+)" --;-;-;- --;-;-;- -"(?:\\p{Nd}+)$" --;-;-;- --;-;-;- -strings -"" -"abc123²³¼½¾₀₉" -regexps -"\\p{^Nd}+" --;-;-;- --;0-3;-;0-3 -"^(?:\\p{^Nd}+)$" --;-;-;- --;-;-;- -"^(?:\\p{^Nd}+)" --;-;-;- --;0-3;-;0-3 -"(?:\\p{^Nd}+)$" --;-;-;- --;6-22;-;6-22 -strings -"" -"abc123²³¼½¾₀₉" -regexps -"\\P{Nd}+" --;-;-;- --;0-3;-;0-3 -"^(?:\\P{Nd}+)$" --;-;-;- --;-;-;- -"^(?:\\P{Nd}+)" --;-;-;- --;0-3;-;0-3 -"(?:\\P{Nd}+)$" --;-;-;- --;6-22;-;6-22 -strings -"" -"abc123²³¼½¾₀₉" -regexps -"\\P{^Nd}+" --;-;-;- --;3-6;-;3-6 -"^(?:\\P{^Nd}+)$" --;-;-;- --;-;-;- -"^(?:\\P{^Nd}+)" --;-;-;- --;-;-;- -"(?:\\P{^Nd}+)$" --;-;-;- --;-;-;- -strings -"" -"abc123²³¼½¾₀₉" -regexps -"\\pN+" --;-;-;- --;3-22;-;3-22 -"^(?:\\pN+)$" --;-;-;- --;-;-;- -"^(?:\\pN+)" --;-;-;- --;-;-;- -"(?:\\pN+)$" --;-;-;- --;3-22;-;3-22 -strings -"" -"abc123²³¼½¾₀₉" -regexps -"\\p{N}+" --;-;-;- --;3-22;-;3-22 -"^(?:\\p{N}+)$" --;-;-;- --;-;-;- -"^(?:\\p{N}+)" --;-;-;- --;-;-;- -"(?:\\p{N}+)$" --;-;-;- --;3-22;-;3-22 -strings -"" -"abc123²³¼½¾₀₉" -regexps -"\\p{^N}+" --;-;-;- --;0-3;-;0-3 -"^(?:\\p{^N}+)$" --;-;-;- --;-;-;- -"^(?:\\p{^N}+)" --;-;-;- --;0-3;-;0-3 -"(?:\\p{^N}+)$" --;-;-;- --;-;-;- -strings -"" -"abc123" -regexps -"\\p{Any}+" --;-;-;- -0-6;0-6;0-6;0-6 -"^(?:\\p{Any}+)$" --;-;-;- -0-6;0-6;0-6;0-6 -"^(?:\\p{Any}+)" --;-;-;- -0-6;0-6;0-6;0-6 -"(?:\\p{Any}+)$" --;-;-;- -0-6;0-6;0-6;0-6 -strings -"" -"@AaB" -regexps -"(?i)[@-A]+" --;-;-;- --;0-3;-;0-3 -"^(?:(?i)[@-A]+)$" --;-;-;- --;-;-;- -"^(?:(?i)[@-A]+)" --;-;-;- --;0-3;-;0-3 -"(?:(?i)[@-A]+)$" --;-;-;- --;-;-;- -strings -"" -"aAzZ" -regexps -"(?i)[A-Z]+" --;-;-;- -0-4;0-4;0-4;0-4 -"^(?:(?i)[A-Z]+)$" --;-;-;- -0-4;0-4;0-4;0-4 -"^(?:(?i)[A-Z]+)" --;-;-;- -0-4;0-4;0-4;0-4 -"(?:(?i)[A-Z]+)$" --;-;-;- -0-4;0-4;0-4;0-4 -strings -"" -"Aa\\" -regexps -"(?i)[^\\\\]+" --;-;-;- --;0-2;-;0-2 -"^(?:(?i)[^\\\\]+)$" --;-;-;- --;-;-;- -"^(?:(?i)[^\\\\]+)" --;-;-;- --;0-2;-;0-2 -"(?:(?i)[^\\\\]+)$" --;-;-;- --;-;-;- -strings -"" -"acegikmoqsuwyACEGIKMOQSUWY" -regexps -"(?i)[acegikmoqsuwy]+" --;-;-;- -0-26;0-26;0-26;0-26 -"^(?:(?i)[acegikmoqsuwy]+)$" --;-;-;- -0-26;0-26;0-26;0-26 -"^(?:(?i)[acegikmoqsuwy]+)" --;-;-;- -0-26;0-26;0-26;0-26 -"(?:(?i)[acegikmoqsuwy]+)$" --;-;-;- -0-26;0-26;0-26;0-26 -strings -"" -"@AaB" -regexps -"[@-A]+" --;-;-;- --;0-2;-;0-2 -"^(?:[@-A]+)$" --;-;-;- --;-;-;- -"^(?:[@-A]+)" --;-;-;- --;0-2;-;0-2 -"(?:[@-A]+)$" --;-;-;- --;-;-;- -strings -"" -"aAzZ" -regexps -"[A-Z]+" --;-;-;- --;1-2;-;1-2 -"^(?:[A-Z]+)$" --;-;-;- --;-;-;- -"^(?:[A-Z]+)" --;-;-;- --;-;-;- -"(?:[A-Z]+)$" --;-;-;- --;3-4;-;3-4 -strings -"" -"Aa\\" -regexps -"[^\\\\]+" --;-;-;- --;0-2;-;0-2 -"^(?:[^\\\\]+)$" --;-;-;- --;-;-;- -"^(?:[^\\\\]+)" --;-;-;- --;0-2;-;0-2 -"(?:[^\\\\]+)$" --;-;-;- --;-;-;- -strings -"" -"acegikmoqsuwyACEGIKMOQSUWY" -regexps -"[acegikmoqsuwy]+" --;-;-;- --;0-13;-;0-13 -"^(?:[acegikmoqsuwy]+)$" --;-;-;- --;-;-;- -"^(?:[acegikmoqsuwy]+)" --;-;-;- --;0-13;-;0-13 -"(?:[acegikmoqsuwy]+)$" --;-;-;- --;-;-;- -strings -"" -"abcdef" -regexps -"^abc" --;-;-;- --;0-3;-;0-3 -"^(?:^abc)$" --;-;-;- --;-;-;- -"^(?:^abc)" --;-;-;- --;0-3;-;0-3 -"(?:^abc)$" --;-;-;- --;-;-;- -strings -"" -"aabcdef" -regexps -"^abc" --;-;-;- --;-;-;- -"^(?:^abc)$" --;-;-;- --;-;-;- -"^(?:^abc)" --;-;-;- --;-;-;- -"(?:^abc)$" --;-;-;- --;-;-;- -strings -"" -"abcdef" -regexps -"^[ay]*[bx]+c" --;-;-;- --;0-3;-;0-3 -"^(?:^[ay]*[bx]+c)$" --;-;-;- --;-;-;- -"^(?:^[ay]*[bx]+c)" --;-;-;- --;0-3;-;0-3 -"(?:^[ay]*[bx]+c)$" --;-;-;- --;-;-;- -strings -"" -"aabcdef" -regexps -"^[ay]*[bx]+c" --;-;-;- --;0-4;-;0-4 -"^(?:^[ay]*[bx]+c)$" --;-;-;- --;-;-;- -"^(?:^[ay]*[bx]+c)" --;-;-;- --;0-4;-;0-4 -"(?:^[ay]*[bx]+c)$" --;-;-;- --;-;-;- -strings -"" -"abcdef" -regexps -"def$" --;-;-;- --;3-6;-;3-6 -"^(?:def$)$" --;-;-;- --;-;-;- -"^(?:def$)" --;-;-;- --;-;-;- -"(?:def$)$" --;-;-;- --;3-6;-;3-6 -strings -"" -"abcdeff" -regexps -"def$" --;-;-;- --;-;-;- -"^(?:def$)$" --;-;-;- --;-;-;- -"^(?:def$)" --;-;-;- --;-;-;- -"(?:def$)$" --;-;-;- --;-;-;- -strings -"" -"abcdef" -regexps -"d[ex][fy]$" --;-;-;- --;3-6;-;3-6 -"^(?:d[ex][fy]$)$" --;-;-;- --;-;-;- -"^(?:d[ex][fy]$)" --;-;-;- --;-;-;- -"(?:d[ex][fy]$)$" --;-;-;- --;3-6;-;3-6 -strings -"" -"abcdeff" -regexps -"d[ex][fy]$" --;-;-;- --;-;-;- -"^(?:d[ex][fy]$)$" --;-;-;- --;-;-;- -"^(?:d[ex][fy]$)" --;-;-;- --;-;-;- -"(?:d[ex][fy]$)$" --;-;-;- --;-;-;- -strings -"" -"abcdef" -regexps -"[dz][ex][fy]$" --;-;-;- --;3-6;-;3-6 -"^(?:[dz][ex][fy]$)$" --;-;-;- --;-;-;- -"^(?:[dz][ex][fy]$)" --;-;-;- --;-;-;- -"(?:[dz][ex][fy]$)$" --;-;-;- --;3-6;-;3-6 -strings -"" -"abcdeff" -regexps -"[dz][ex][fy]$" --;-;-;- --;-;-;- -"^(?:[dz][ex][fy]$)$" --;-;-;- --;-;-;- -"^(?:[dz][ex][fy]$)" --;-;-;- --;-;-;- -"(?:[dz][ex][fy]$)$" --;-;-;- --;-;-;- -strings -"" -"abcdef" -regexps -"(?m)^abc" --;-;-;- --;0-3;-;0-3 -"^(?:(?m)^abc)$" --;-;-;- --;-;-;- -"^(?:(?m)^abc)" --;-;-;- --;0-3;-;0-3 -"(?:(?m)^abc)$" --;-;-;- --;-;-;- -strings -"" -"aabcdef" -regexps -"(?m)^abc" --;-;-;- --;-;-;- -"^(?:(?m)^abc)$" --;-;-;- --;-;-;- -"^(?:(?m)^abc)" --;-;-;- --;-;-;- -"(?:(?m)^abc)$" --;-;-;- --;-;-;- -strings -"" -"abcdef" -regexps -"(?m)^[ay]*[bx]+c" --;-;-;- --;0-3;-;0-3 -"^(?:(?m)^[ay]*[bx]+c)$" --;-;-;- --;-;-;- -"^(?:(?m)^[ay]*[bx]+c)" --;-;-;- --;0-3;-;0-3 -"(?:(?m)^[ay]*[bx]+c)$" --;-;-;- --;-;-;- -strings -"" -"aabcdef" -regexps -"(?m)^[ay]*[bx]+c" --;-;-;- --;0-4;-;0-4 -"^(?:(?m)^[ay]*[bx]+c)$" --;-;-;- --;-;-;- -"^(?:(?m)^[ay]*[bx]+c)" --;-;-;- --;0-4;-;0-4 -"(?:(?m)^[ay]*[bx]+c)$" --;-;-;- --;-;-;- -strings -"" -"abcdef" -regexps -"(?m)def$" --;-;-;- --;3-6;-;3-6 -"^(?:(?m)def$)$" --;-;-;- --;-;-;- -"^(?:(?m)def$)" --;-;-;- --;-;-;- -"(?:(?m)def$)$" --;-;-;- --;3-6;-;3-6 -strings -"" -"abcdeff" -regexps -"(?m)def$" --;-;-;- --;-;-;- -"^(?:(?m)def$)$" --;-;-;- --;-;-;- -"^(?:(?m)def$)" --;-;-;- --;-;-;- -"(?:(?m)def$)$" --;-;-;- --;-;-;- -strings -"" -"abcdef" -regexps -"(?m)d[ex][fy]$" --;-;-;- --;3-6;-;3-6 -"^(?:(?m)d[ex][fy]$)$" --;-;-;- --;-;-;- -"^(?:(?m)d[ex][fy]$)" --;-;-;- --;-;-;- -"(?:(?m)d[ex][fy]$)$" --;-;-;- --;3-6;-;3-6 -strings -"" -"abcdeff" -regexps -"(?m)d[ex][fy]$" --;-;-;- --;-;-;- -"^(?:(?m)d[ex][fy]$)$" --;-;-;- --;-;-;- -"^(?:(?m)d[ex][fy]$)" --;-;-;- --;-;-;- -"(?:(?m)d[ex][fy]$)$" --;-;-;- --;-;-;- -strings -"" -"abcdef" -regexps -"(?m)[dz][ex][fy]$" --;-;-;- --;3-6;-;3-6 -"^(?:(?m)[dz][ex][fy]$)$" --;-;-;- --;-;-;- -"^(?:(?m)[dz][ex][fy]$)" --;-;-;- --;-;-;- -"(?:(?m)[dz][ex][fy]$)$" --;-;-;- --;3-6;-;3-6 -strings -"" -"abcdeff" -regexps -"(?m)[dz][ex][fy]$" --;-;-;- --;-;-;- -"^(?:(?m)[dz][ex][fy]$)$" --;-;-;- --;-;-;- -"^(?:(?m)[dz][ex][fy]$)" --;-;-;- --;-;-;- -"(?:(?m)[dz][ex][fy]$)$" --;-;-;- --;-;-;- -strings -"" -"a" -regexps -"^" -0-0;0-0;0-0;0-0 --;0-0;-;0-0 -"^(?:^)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^)" -0-0;0-0;0-0;0-0 --;0-0;-;0-0 -"(?:^)$" -0-0;0-0;0-0;0-0 --;-;-;- -strings -"" -"a" -regexps -"^^" -0-0;0-0;0-0;0-0 --;0-0;-;0-0 -"^(?:^^)$" -0-0;0-0;0-0;0-0 --;-;-;- -"^(?:^^)" -0-0;0-0;0-0;0-0 --;0-0;-;0-0 -"(?:^^)$" -0-0;0-0;0-0;0-0 --;-;-;- -strings -"" -"a" -regexps -"a" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:a)$" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:a)" --;-;-;- -0-1;0-1;0-1;0-1 -"(?:a)$" --;-;-;- -0-1;0-1;0-1;0-1 -strings -"" -"a" -regexps -"ab*" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:ab*)$" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:ab*)" --;-;-;- -0-1;0-1;0-1;0-1 -"(?:ab*)$" --;-;-;- -0-1;0-1;0-1;0-1 -strings -"" -"a" -regexps -"a\\C*" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:a\\C*)$" --;-;-;- -0-1;0-1;0-1;0-1 -"^(?:a\\C*)" --;-;-;- -0-1;0-1;0-1;0-1 -"(?:a\\C*)$" --;-;-;- -0-1;0-1;0-1;0-1 -strings -"" -"baba" -regexps -"a\\C*|ba\\C" --;-;-;- --;0-3;-;0-3 -"^(?:a\\C*|ba\\C)$" --;-;-;- --;-;-;- -"^(?:a\\C*|ba\\C)" --;-;-;- --;0-3;-;0-3 -"(?:a\\C*|ba\\C)$" --;-;-;- --;1-4;-;1-4 diff --git a/src/pkg/regexp/testdata/repetition.dat b/src/pkg/regexp/testdata/repetition.dat deleted file mode 100644 index e6361f51a..000000000 --- a/src/pkg/regexp/testdata/repetition.dat +++ /dev/null @@ -1,163 +0,0 @@ -NOTE implicit vs. explicit repetitions : 2009-02-02 - -# Glenn Fowler <gsf@research.att.com> -# conforming matches (column 4) must match one of the following BREs -# NOMATCH -# (0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)* -# (0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)* -# i.e., each 3-tuple has two identical elements and one (?,?) - -E ((..)|(.)) NULL NOMATCH -E ((..)|(.))((..)|(.)) NULL NOMATCH -E ((..)|(.))((..)|(.))((..)|(.)) NULL NOMATCH - -E ((..)|(.)){1} NULL NOMATCH -E ((..)|(.)){2} NULL NOMATCH -E ((..)|(.)){3} NULL NOMATCH - -E ((..)|(.))* NULL (0,0) - -E ((..)|(.)) a (0,1)(0,1)(?,?)(0,1) -E ((..)|(.))((..)|(.)) a NOMATCH -E ((..)|(.))((..)|(.))((..)|(.)) a NOMATCH - -E ((..)|(.)){1} a (0,1)(0,1)(?,?)(0,1) -E ((..)|(.)){2} a NOMATCH -E ((..)|(.)){3} a NOMATCH - -E ((..)|(.))* a (0,1)(0,1)(?,?)(0,1) - -E ((..)|(.)) aa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.))((..)|(.)) aa (0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2) -E ((..)|(.))((..)|(.))((..)|(.)) aa NOMATCH - -E ((..)|(.)){1} aa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.)){2} aa (0,2)(1,2)(?,?)(1,2) -E ((..)|(.)){3} aa NOMATCH - -E ((..)|(.))* aa (0,2)(0,2)(0,2)(?,?) - -E ((..)|(.)) aaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.))((..)|(.)) aaa (0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3) -E ((..)|(.))((..)|(.))((..)|(.)) aaa (0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3) - -E ((..)|(.)){1} aaa (0,2)(0,2)(0,2)(?,?) -#E ((..)|(.)){2} aaa (0,3)(2,3)(?,?)(2,3) -E ((..)|(.)){2} aaa (0,3)(2,3)(0,2)(2,3) RE2/Go -E ((..)|(.)){3} aaa (0,3)(2,3)(?,?)(2,3) - -#E ((..)|(.))* aaa (0,3)(2,3)(?,?)(2,3) -E ((..)|(.))* aaa (0,3)(2,3)(0,2)(2,3) RE2/Go - -E ((..)|(.)) aaaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?) -E ((..)|(.))((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4) - -E ((..)|(.)){1} aaaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.)){2} aaaa (0,4)(2,4)(2,4)(?,?) -#E ((..)|(.)){3} aaaa (0,4)(3,4)(?,?)(3,4) -E ((..)|(.)){3} aaaa (0,4)(3,4)(0,2)(3,4) RE2/Go - -E ((..)|(.))* aaaa (0,4)(2,4)(2,4)(?,?) - -E ((..)|(.)) aaaaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.))((..)|(.)) aaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?) -E ((..)|(.))((..)|(.))((..)|(.)) aaaaa (0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5) - -E ((..)|(.)){1} aaaaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.)){2} aaaaa (0,4)(2,4)(2,4)(?,?) -#E ((..)|(.)){3} aaaaa (0,5)(4,5)(?,?)(4,5) -E ((..)|(.)){3} aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go - -#E ((..)|(.))* aaaaa (0,5)(4,5)(?,?)(4,5) -E ((..)|(.))* aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go - -E ((..)|(.)) aaaaaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.))((..)|(.)) aaaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?) -E ((..)|(.))((..)|(.))((..)|(.)) aaaaaa (0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?) - -E ((..)|(.)){1} aaaaaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.)){2} aaaaaa (0,4)(2,4)(2,4)(?,?) -E ((..)|(.)){3} aaaaaa (0,6)(4,6)(4,6)(?,?) - -E ((..)|(.))* aaaaaa (0,6)(4,6)(4,6)(?,?) - -NOTE additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02 - -# These test a bug in OS X / FreeBSD / NetBSD, and libtree. -# Linux/GLIBC gets the {8,} and {8,8} wrong. - -:HA#100:E X(.?){0,}Y X1234567Y (0,9)(7,8) -:HA#101:E X(.?){1,}Y X1234567Y (0,9)(7,8) -:HA#102:E X(.?){2,}Y X1234567Y (0,9)(7,8) -:HA#103:E X(.?){3,}Y X1234567Y (0,9)(7,8) -:HA#104:E X(.?){4,}Y X1234567Y (0,9)(7,8) -:HA#105:E X(.?){5,}Y X1234567Y (0,9)(7,8) -:HA#106:E X(.?){6,}Y X1234567Y (0,9)(7,8) -:HA#107:E X(.?){7,}Y X1234567Y (0,9)(7,8) -:HA#108:E X(.?){8,}Y X1234567Y (0,9)(8,8) -#:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(7,8) -:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(7,8) -:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(7,8) -:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(7,8) -:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(7,8) -:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(7,8) -:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(7,8) -:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(7,8) -:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(8,8) RE2/Go -:HA#118:E X(.?){8,8}Y X1234567Y (0,9)(8,8) - -# These test a fixed bug in my regex-tdfa that did not keep the expanded -# form properly grouped, so right association did the wrong thing with -# these ambiguous patterns (crafted just to test my code when I became -# suspicious of my implementation). The first subexpression should use -# "ab" then "a" then "bcd". - -# OS X / FreeBSD / NetBSD badly fail many of these, with impossible -# results like (0,6)(4,5)(6,6). - -:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#262:E (a|ab|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#263:E (a|ab|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#264:E (a|ab|c|bcd){4,}(d*) ababcd NOMATCH -:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#267:E (a|ab|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#268:E (a|ab|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#269:E (a|ab|c|bcd){4,10}(d*) ababcd NOMATCH -:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6) -:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6) - -# The above worked on Linux/GLIBC but the following often fail. -# They also trip up OS X / FreeBSD / NetBSD: - -#:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -:HA#284:E (ab|a|c|bcd){4,}(d*) ababcd NOMATCH -#:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -:HA#289:E (ab|a|c|bcd){4,10}(d*) ababcd NOMATCH -#:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6) -:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6) -:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(4,5)(5,6) RE2/Go diff --git a/src/pkg/regexp/testdata/testregex.c b/src/pkg/regexp/testdata/testregex.c deleted file mode 100644 index 37545d057..000000000 --- a/src/pkg/regexp/testdata/testregex.c +++ /dev/null @@ -1,2286 +0,0 @@ -#pragma prototyped noticed - -/* - * regex(3) test harness - * - * build: cc -o testregex testregex.c - * help: testregex --man - * note: REG_* features are detected by #ifdef; if REG_* are enums - * then supply #define REG_foo REG_foo for each enum REG_foo - * - * Glenn Fowler <gsf@research.att.com> - * AT&T Research - * - * PLEASE: publish your tests so everyone can benefit - * - * The following license covers testregex.c and all associated test data. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of THIS SOFTWARE FILE (the "Software"), to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, and/or sell copies of the - * Software, and to permit persons to whom the Software is furnished to do - * so, subject to the following disclaimer: - * - * THIS SOFTWARE IS PROVIDED BY AT&T ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL AT&T BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -static const char id[] = "\n@(#)$Id: testregex (AT&T Research) 2010-06-10 $\0\n"; - -#if _PACKAGE_ast -#include <ast.h> -#else -#include <sys/types.h> -#endif - -#include <stdio.h> -#include <regex.h> -#include <ctype.h> -#include <setjmp.h> -#include <signal.h> -#include <string.h> -#include <unistd.h> - -#ifdef __STDC__ -#include <stdlib.h> -#include <locale.h> -#endif - -#ifndef RE_DUP_MAX -#define RE_DUP_MAX 32767 -#endif - -#if !_PACKAGE_ast -#undef REG_DISCIPLINE -#endif - -#ifndef REG_DELIMITED -#undef _REG_subcomp -#endif - -#define TEST_ARE 0x00000001 -#define TEST_BRE 0x00000002 -#define TEST_ERE 0x00000004 -#define TEST_KRE 0x00000008 -#define TEST_LRE 0x00000010 -#define TEST_SRE 0x00000020 - -#define TEST_EXPAND 0x00000100 -#define TEST_LENIENT 0x00000200 - -#define TEST_QUERY 0x00000400 -#define TEST_SUB 0x00000800 -#define TEST_UNSPECIFIED 0x00001000 -#define TEST_VERIFY 0x00002000 -#define TEST_AND 0x00004000 -#define TEST_OR 0x00008000 - -#define TEST_DELIMIT 0x00010000 -#define TEST_OK 0x00020000 -#define TEST_SAME 0x00040000 - -#define TEST_ACTUAL 0x00100000 -#define TEST_BASELINE 0x00200000 -#define TEST_FAIL 0x00400000 -#define TEST_PASS 0x00800000 -#define TEST_SUMMARY 0x01000000 - -#define TEST_IGNORE_ERROR 0x02000000 -#define TEST_IGNORE_OVER 0x04000000 -#define TEST_IGNORE_POSITION 0x08000000 - -#define TEST_CATCH 0x10000000 -#define TEST_VERBOSE 0x20000000 - -#define TEST_DECOMP 0x40000000 - -#define TEST_GLOBAL (TEST_ACTUAL|TEST_AND|TEST_BASELINE|TEST_CATCH|TEST_FAIL|TEST_IGNORE_ERROR|TEST_IGNORE_OVER|TEST_IGNORE_POSITION|TEST_OR|TEST_PASS|TEST_SUMMARY|TEST_VERBOSE) - -#ifdef REG_DISCIPLINE - - -#include <stk.h> - -typedef struct Disc_s -{ - regdisc_t disc; - int ordinal; - Sfio_t* sp; -} Disc_t; - -static void* -compf(const regex_t* re, const char* xstr, size_t xlen, regdisc_t* disc) -{ - Disc_t* dp = (Disc_t*)disc; - - return (void*)((char*)0 + ++dp->ordinal); -} - -static int -execf(const regex_t* re, void* data, const char* xstr, size_t xlen, const char* sstr, size_t slen, char** snxt, regdisc_t* disc) -{ - Disc_t* dp = (Disc_t*)disc; - - sfprintf(dp->sp, "{%-.*s}(%lu:%d)", xlen, xstr, (char*)data - (char*)0, slen); - return atoi(xstr); -} - -static void* -resizef(void* handle, void* data, size_t size) -{ - if (!size) - return 0; - return stkalloc((Sfio_t*)handle, size); -} - -#endif - -#ifndef NiL -#ifdef __STDC__ -#define NiL 0 -#else -#define NiL (char*)0 -#endif -#endif - -#define H(x) do{if(html)fprintf(stderr,x);}while(0) -#define T(x) fprintf(stderr,x) - -static void -help(int html) -{ -H("<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n"); -H("<HTML>\n"); -H("<HEAD>\n"); -H("<TITLE>testregex man document</TITLE>\n"); -H("</HEAD>\n"); -H("<BODY bgcolor=white>\n"); -H("<PRE>\n"); -T("NAME\n"); -T(" testregex - regex(3) test harness\n"); -T("\n"); -T("SYNOPSIS\n"); -T(" testregex [ options ]\n"); -T("\n"); -T("DESCRIPTION\n"); -T(" testregex reads regex(3) test specifications, one per line, from the\n"); -T(" standard input and writes one output line for each failed test. A\n"); -T(" summary line is written after all tests are done. Each successful\n"); -T(" test is run again with REG_NOSUB. Unsupported features are noted\n"); -T(" before the first test, and tests requiring these features are\n"); -T(" silently ignored.\n"); -T("\n"); -T("OPTIONS\n"); -T(" -c catch signals and non-terminating calls\n"); -T(" -e ignore error return mismatches\n"); -T(" -h list help on standard error\n"); -T(" -n do not repeat successful tests with regnexec()\n"); -T(" -o ignore match[] overrun errors\n"); -T(" -p ignore negative position mismatches\n"); -T(" -s use stack instead of malloc\n"); -T(" -x do not repeat successful tests with REG_NOSUB\n"); -T(" -v list each test line\n"); -T(" -A list failed test lines with actual answers\n"); -T(" -B list all test lines with actual answers\n"); -T(" -F list failed test lines\n"); -T(" -P list passed test lines\n"); -T(" -S output one summary line\n"); -T("\n"); -T("INPUT FORMAT\n"); -T(" Input lines may be blank, a comment beginning with #, or a test\n"); -T(" specification. A specification is five fields separated by one\n"); -T(" or more tabs. NULL denotes the empty string and NIL denotes the\n"); -T(" 0 pointer.\n"); -T("\n"); -T(" Field 1: the regex(3) flags to apply, one character per REG_feature\n"); -T(" flag. The test is skipped if REG_feature is not supported by the\n"); -T(" implementation. If the first character is not [BEASKLP] then the\n"); -T(" specification is a global control line. One or more of [BEASKLP] may be\n"); -T(" specified; the test will be repeated for each mode.\n"); -T("\n"); -T(" B basic BRE (grep, ed, sed)\n"); -T(" E REG_EXTENDED ERE (egrep)\n"); -T(" A REG_AUGMENTED ARE (egrep with negation)\n"); -T(" S REG_SHELL SRE (sh glob)\n"); -T(" K REG_SHELL|REG_AUGMENTED KRE (ksh glob)\n"); -T(" L REG_LITERAL LRE (fgrep)\n"); -T("\n"); -T(" a REG_LEFT|REG_RIGHT implicit ^...$\n"); -T(" b REG_NOTBOL lhs does not match ^\n"); -T(" c REG_COMMENT ignore space and #...\\n\n"); -T(" d REG_SHELL_DOT explicit leading . match\n"); -T(" e REG_NOTEOL rhs does not match $\n"); -T(" f REG_MULTIPLE multiple \\n separated patterns\n"); -T(" g FNM_LEADING_DIR testfnmatch only -- match until /\n"); -T(" h REG_MULTIREF multiple digit backref\n"); -T(" i REG_ICASE ignore case\n"); -T(" j REG_SPAN . matches \\n\n"); -T(" k REG_ESCAPE \\ to ecape [...] delimiter\n"); -T(" l REG_LEFT implicit ^...\n"); -T(" m REG_MINIMAL minimal match\n"); -T(" n REG_NEWLINE explicit \\n match\n"); -T(" o REG_ENCLOSED (|&) magic inside [@|&](...)\n"); -T(" p REG_SHELL_PATH explicit / match\n"); -T(" q REG_DELIMITED delimited pattern\n"); -T(" r REG_RIGHT implicit ...$\n"); -T(" s REG_SHELL_ESCAPED \\ not special\n"); -T(" t REG_MUSTDELIM all delimiters must be specified\n"); -T(" u standard unspecified behavior -- errors not counted\n"); -T(" v REG_CLASS_ESCAPE \\ special inside [...]\n"); -T(" w REG_NOSUB no subexpression match array\n"); -T(" x REG_LENIENT let some errors slide\n"); -T(" y REG_LEFT regexec() implicit ^...\n"); -T(" z REG_NULL NULL subexpressions ok\n"); -T(" $ expand C \\c escapes in fields 2 and 3\n"); -T(" / field 2 is a regsubcomp() expression\n"); -T(" = field 3 is a regdecomp() expression\n"); -T("\n"); -T(" Field 1 control lines:\n"); -T("\n"); -T(" C set LC_COLLATE and LC_CTYPE to locale in field 2\n"); -T("\n"); -T(" ?test ... output field 5 if passed and != EXPECTED, silent otherwise\n"); -T(" &test ... output field 5 if current and previous passed\n"); -T(" |test ... output field 5 if current passed and previous failed\n"); -T(" ; ... output field 2 if previous failed\n"); -T(" {test ... skip if failed until }\n"); -T(" } end of skip\n"); -T("\n"); -T(" : comment comment copied as output NOTE\n"); -T(" :comment:test :comment: ignored\n"); -T(" N[OTE] comment comment copied as output NOTE\n"); -T(" T[EST] comment comment\n"); -T("\n"); -T(" number use number for nmatch (20 by default)\n"); -T("\n"); -T(" Field 2: the regular expression pattern; SAME uses the pattern from\n"); -T(" the previous specification. RE_DUP_MAX inside {...} expands to the\n"); -T(" value from <limits.h>.\n"); -T("\n"); -T(" Field 3: the string to match. X...{RE_DUP_MAX} expands to RE_DUP_MAX\n"); -T(" copies of X.\n"); -T("\n"); -T(" Field 4: the test outcome. This is either one of the posix error\n"); -T(" codes (with REG_ omitted) or the match array, a list of (m,n)\n"); -T(" entries with m and n being first and last+1 positions in the\n"); -T(" field 3 string, or NULL if REG_NOSUB is in effect and success\n"); -T(" is expected. BADPAT is acceptable in place of any regcomp(3)\n"); -T(" error code. The match[] array is initialized to (-2,-2) before\n"); -T(" each test. All array elements from 0 to nmatch-1 must be specified\n"); -T(" in the outcome. Unspecified endpoints (offset -1) are denoted by ?.\n"); -T(" Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a\n"); -T(" matched (?{...}) expression, where x is the text enclosed by {...},\n"); -T(" o is the expression ordinal counting from 1, and n is the length of\n"); -T(" the unmatched portion of the subject string. If x starts with a\n"); -T(" number then that is the return value of re_execf(), otherwise 0 is\n"); -T(" returned. RE_DUP_MAX[-+]N expands to the <limits.h> value -+N.\n"); -T("\n"); -T(" Field 5: optional comment appended to the report.\n"); -T("\n"); -T("CAVEAT\n"); -T(" If a regex implementation misbehaves with memory then all bets are off.\n"); -T("\n"); -T("CONTRIBUTORS\n"); -T(" Glenn Fowler gsf@research.att.com (ksh strmatch, regex extensions)\n"); -T(" David Korn dgk@research.att.com (ksh glob matcher)\n"); -T(" Doug McIlroy mcilroy@dartmouth.edu (ast regex/testre in C++)\n"); -T(" Tom Lord lord@regexps.com (rx tests)\n"); -T(" Henry Spencer henry@zoo.toronto.edu (original public regex)\n"); -T(" Andrew Hume andrew@research.att.com (gre tests)\n"); -T(" John Maddock John_Maddock@compuserve.com (regex++ tests)\n"); -T(" Philip Hazel ph10@cam.ac.uk (pcre tests)\n"); -T(" Ville Laurikari vl@iki.fi (libtre tests)\n"); -H("</PRE>\n"); -H("</BODY>\n"); -H("</HTML>\n"); -} - -#ifndef elementsof -#define elementsof(x) (sizeof(x)/sizeof(x[0])) -#endif - -#ifndef streq -#define streq(a,b) (*(a)==*(b)&&!strcmp(a,b)) -#endif - -#define HUNG 2 -#define NOTEST (~0) - -#ifndef REG_TEST_DEFAULT -#define REG_TEST_DEFAULT 0 -#endif - -#ifndef REG_EXEC_DEFAULT -#define REG_EXEC_DEFAULT 0 -#endif - -static const char* unsupported[] = -{ - "BASIC", -#ifndef REG_EXTENDED - "EXTENDED", -#endif -#ifndef REG_AUGMENTED - "AUGMENTED", -#endif -#ifndef REG_SHELL - "SHELL", -#endif - -#ifndef REG_CLASS_ESCAPE - "CLASS_ESCAPE", -#endif -#ifndef REG_COMMENT - "COMMENT", -#endif -#ifndef REG_DELIMITED - "DELIMITED", -#endif -#ifndef REG_DISCIPLINE - "DISCIPLINE", -#endif -#ifndef REG_ESCAPE - "ESCAPE", -#endif -#ifndef REG_ICASE - "ICASE", -#endif -#ifndef REG_LEFT - "LEFT", -#endif -#ifndef REG_LENIENT - "LENIENT", -#endif -#ifndef REG_LITERAL - "LITERAL", -#endif -#ifndef REG_MINIMAL - "MINIMAL", -#endif -#ifndef REG_MULTIPLE - "MULTIPLE", -#endif -#ifndef REG_MULTIREF - "MULTIREF", -#endif -#ifndef REG_MUSTDELIM - "MUSTDELIM", -#endif -#ifndef REG_NEWLINE - "NEWLINE", -#endif -#ifndef REG_NOTBOL - "NOTBOL", -#endif -#ifndef REG_NOTEOL - "NOTEOL", -#endif -#ifndef REG_NULL - "NULL", -#endif -#ifndef REG_RIGHT - "RIGHT", -#endif -#ifndef REG_SHELL_DOT - "SHELL_DOT", -#endif -#ifndef REG_SHELL_ESCAPED - "SHELL_ESCAPED", -#endif -#ifndef REG_SHELL_GROUP - "SHELL_GROUP", -#endif -#ifndef REG_SHELL_PATH - "SHELL_PATH", -#endif -#ifndef REG_SPAN - "SPAN", -#endif -#if REG_NOSUB & REG_TEST_DEFAULT - "SUBMATCH", -#endif -#if !_REG_nexec - "regnexec", -#endif -#if !_REG_subcomp - "regsubcomp", -#endif -#if !_REG_decomp - "redecomp", -#endif - 0 -}; - -#ifndef REG_CLASS_ESCAPE -#define REG_CLASS_ESCAPE NOTEST -#endif -#ifndef REG_COMMENT -#define REG_COMMENT NOTEST -#endif -#ifndef REG_DELIMITED -#define REG_DELIMITED NOTEST -#endif -#ifndef REG_ESCAPE -#define REG_ESCAPE NOTEST -#endif -#ifndef REG_ICASE -#define REG_ICASE NOTEST -#endif -#ifndef REG_LEFT -#define REG_LEFT NOTEST -#endif -#ifndef REG_LENIENT -#define REG_LENIENT 0 -#endif -#ifndef REG_MINIMAL -#define REG_MINIMAL NOTEST -#endif -#ifndef REG_MULTIPLE -#define REG_MULTIPLE NOTEST -#endif -#ifndef REG_MULTIREF -#define REG_MULTIREF NOTEST -#endif -#ifndef REG_MUSTDELIM -#define REG_MUSTDELIM NOTEST -#endif -#ifndef REG_NEWLINE -#define REG_NEWLINE NOTEST -#endif -#ifndef REG_NOTBOL -#define REG_NOTBOL NOTEST -#endif -#ifndef REG_NOTEOL -#define REG_NOTEOL NOTEST -#endif -#ifndef REG_NULL -#define REG_NULL NOTEST -#endif -#ifndef REG_RIGHT -#define REG_RIGHT NOTEST -#endif -#ifndef REG_SHELL_DOT -#define REG_SHELL_DOT NOTEST -#endif -#ifndef REG_SHELL_ESCAPED -#define REG_SHELL_ESCAPED NOTEST -#endif -#ifndef REG_SHELL_GROUP -#define REG_SHELL_GROUP NOTEST -#endif -#ifndef REG_SHELL_PATH -#define REG_SHELL_PATH NOTEST -#endif -#ifndef REG_SPAN -#define REG_SPAN NOTEST -#endif - -#define REG_UNKNOWN (-1) - -#ifndef REG_ENEWLINE -#define REG_ENEWLINE (REG_UNKNOWN-1) -#endif -#ifndef REG_ENULL -#ifndef REG_EMPTY -#define REG_ENULL (REG_UNKNOWN-2) -#else -#define REG_ENULL REG_EMPTY -#endif -#endif -#ifndef REG_ECOUNT -#define REG_ECOUNT (REG_UNKNOWN-3) -#endif -#ifndef REG_BADESC -#define REG_BADESC (REG_UNKNOWN-4) -#endif -#ifndef REG_EMEM -#define REG_EMEM (REG_UNKNOWN-5) -#endif -#ifndef REG_EHUNG -#define REG_EHUNG (REG_UNKNOWN-6) -#endif -#ifndef REG_EBUS -#define REG_EBUS (REG_UNKNOWN-7) -#endif -#ifndef REG_EFAULT -#define REG_EFAULT (REG_UNKNOWN-8) -#endif -#ifndef REG_EFLAGS -#define REG_EFLAGS (REG_UNKNOWN-9) -#endif -#ifndef REG_EDELIM -#define REG_EDELIM (REG_UNKNOWN-9) -#endif - -static const struct { int code; char* name; } codes[] = -{ - REG_UNKNOWN, "UNKNOWN", - REG_NOMATCH, "NOMATCH", - REG_BADPAT, "BADPAT", - REG_ECOLLATE, "ECOLLATE", - REG_ECTYPE, "ECTYPE", - REG_EESCAPE, "EESCAPE", - REG_ESUBREG, "ESUBREG", - REG_EBRACK, "EBRACK", - REG_EPAREN, "EPAREN", - REG_EBRACE, "EBRACE", - REG_BADBR, "BADBR", - REG_ERANGE, "ERANGE", - REG_ESPACE, "ESPACE", - REG_BADRPT, "BADRPT", - REG_ENEWLINE, "ENEWLINE", - REG_ENULL, "ENULL", - REG_ECOUNT, "ECOUNT", - REG_BADESC, "BADESC", - REG_EMEM, "EMEM", - REG_EHUNG, "EHUNG", - REG_EBUS, "EBUS", - REG_EFAULT, "EFAULT", - REG_EFLAGS, "EFLAGS", - REG_EDELIM, "EDELIM", -}; - -static struct -{ - regmatch_t NOMATCH; - int errors; - int extracted; - int ignored; - int lineno; - int passed; - int signals; - int unspecified; - int verify; - int warnings; - char* file; - char* stack; - char* which; - jmp_buf gotcha; -#ifdef REG_DISCIPLINE - Disc_t disc; -#endif -} state; - -static void -quote(char* s, int len, unsigned long test) -{ - unsigned char* u = (unsigned char*)s; - unsigned char* e; - int c; -#ifdef MB_CUR_MAX - int w; -#endif - - if (!u) - printf("NIL"); - else if (!*u && len <= 1) - printf("NULL"); - else if (test & TEST_EXPAND) - { - if (len < 0) - len = strlen((char*)u); - e = u + len; - if (test & TEST_DELIMIT) - printf("\""); - while (u < e) - switch (c = *u++) - { - case '\\': - printf("\\\\"); - break; - case '"': - if (test & TEST_DELIMIT) - printf("\\\""); - else - printf("\""); - break; - case '\a': - printf("\\a"); - break; - case '\b': - printf("\\b"); - break; - case 033: - printf("\\e"); - break; - case '\f': - printf("\\f"); - break; - case '\n': - printf("\\n"); - break; - case '\r': - printf("\\r"); - break; - case '\t': - printf("\\t"); - break; - case '\v': - printf("\\v"); - break; - default: -#ifdef MB_CUR_MAX - s = (char*)u - 1; - if ((w = mblen(s, (char*)e - s)) > 1) - { - u += w - 1; - fwrite(s, 1, w, stdout); - } - else -#endif - if (!iscntrl(c) && isprint(c)) - putchar(c); - else - printf("\\x%02x", c); - break; - } - if (test & TEST_DELIMIT) - printf("\""); - } - else - printf("%s", s); -} - -static void -report(char* comment, char* fun, char* re, char* s, int len, char* msg, int flags, unsigned long test) -{ - if (state.file) - printf("%s:", state.file); - printf("%d:", state.lineno); - if (re) - { - printf(" "); - quote(re, -1, test|TEST_DELIMIT); - if (s) - { - printf(" versus "); - quote(s, len, test|TEST_DELIMIT); - } - } - if (test & TEST_UNSPECIFIED) - { - state.unspecified++; - printf(" unspecified behavior"); - } - else - state.errors++; - if (state.which) - printf(" %s", state.which); - if (flags & REG_NOSUB) - printf(" NOSUB"); - if (fun) - printf(" %s", fun); - if (comment[strlen(comment)-1] == '\n') - printf(" %s", comment); - else - { - printf(" %s: ", comment); - if (msg) - printf("%s: ", msg); - } -} - -static void -error(regex_t* preg, int code) -{ - char* msg; - char buf[256]; - - switch (code) - { - case REG_EBUS: - msg = "bus error"; - break; - case REG_EFAULT: - msg = "memory fault"; - break; - case REG_EHUNG: - msg = "did not terminate"; - break; - default: - regerror(code, preg, msg = buf, sizeof buf); - break; - } - printf("%s\n", msg); -} - -static void -bad(char* comment, char* re, char* s, int len, unsigned long test) -{ - printf("bad test case "); - report(comment, NiL, re, s, len, NiL, 0, test); - exit(1); -} - -static int -escape(char* s) -{ - char* b; - char* t; - char* q; - char* e; - int c; - - for (b = t = s; *t = *s; s++, t++) - if (*s == '\\') - switch (*++s) - { - case '\\': - break; - case 'a': - *t = '\a'; - break; - case 'b': - *t = '\b'; - break; - case 'c': - if (*t = *++s) - *t &= 037; - else - s--; - break; - case 'e': - case 'E': - *t = 033; - break; - case 'f': - *t = '\f'; - break; - case 'n': - *t = '\n'; - break; - case 'r': - *t = '\r'; - break; - case 's': - *t = ' '; - break; - case 't': - *t = '\t'; - break; - case 'v': - *t = '\v'; - break; - case 'u': - case 'x': - c = 0; - q = c == 'u' ? (s + 5) : (char*)0; - e = s + 1; - while (!e || !q || s < q) - { - switch (*++s) - { - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - c = (c << 4) + *s - 'a' + 10; - continue; - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - c = (c << 4) + *s - 'A' + 10; - continue; - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - c = (c << 4) + *s - '0'; - continue; - case '{': - case '[': - if (s != e) - { - s--; - break; - } - e = 0; - continue; - case '}': - case ']': - if (e) - s--; - break; - default: - s--; - break; - } - break; - } - *t = c; - break; - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - c = *s - '0'; - q = s + 2; - while (s < q) - { - switch (*++s) - { - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - c = (c << 3) + *s - '0'; - break; - default: - q = --s; - break; - } - } - *t = c; - break; - default: - *(s + 1) = 0; - bad("invalid C \\ escape\n", s - 1, NiL, 0, 0); - } - return t - b; -} - -static void -matchoffprint(int off) -{ - switch (off) - { - case -2: - printf("X"); - break; - case -1: - printf("?"); - break; - default: - printf("%d", off); - break; - } -} - -static void -matchprint(regmatch_t* match, int nmatch, int nsub, char* ans, unsigned long test) -{ - int i; - - for (; nmatch > nsub + 1; nmatch--) - if ((match[nmatch-1].rm_so != -1 || match[nmatch-1].rm_eo != -1) && (!(test & TEST_IGNORE_POSITION) || match[nmatch-1].rm_so >= 0 && match[nmatch-1].rm_eo >= 0)) - break; - for (i = 0; i < nmatch; i++) - { - printf("("); - matchoffprint(match[i].rm_so); - printf(","); - matchoffprint(match[i].rm_eo); - printf(")"); - } - if (!(test & (TEST_ACTUAL|TEST_BASELINE))) - { - if (ans) - printf(" expected: %s", ans); - printf("\n"); - } -} - -static int -matchcheck(regmatch_t* match, int nmatch, int nsub, char* ans, char* re, char* s, int len, int flags, unsigned long test) -{ - char* p; - int i; - int m; - int n; - - if (streq(ans, "OK")) - return test & (TEST_BASELINE|TEST_PASS|TEST_VERIFY); - for (i = 0, p = ans; i < nmatch && *p; i++) - { - if (*p == '{') - { -#ifdef REG_DISCIPLINE - char* x; - - if (!(x = sfstruse(state.disc.sp))) - bad("out of space [discipline string]\n", NiL, NiL, 0, 0); - if (strcmp(p, x)) - { - if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY)) - return 0; - report("callout failed", NiL, re, s, len, NiL, flags, test); - quote(p, -1, test); - printf(" expected, "); - quote(x, -1, test); - printf(" returned\n"); - } -#endif - break; - } - if (*p++ != '(') - bad("improper answer\n", re, s, -1, test); - if (*p == '?') - { - m = -1; - p++; - } - else if (*p == 'R' && !memcmp(p, "RE_DUP_MAX", 10)) - { - m = RE_DUP_MAX; - p += 10; - if (*p == '+' || *p == '-') - m += strtol(p, &p, 10); - } - else - m = strtol(p, &p, 10); - if (*p++ != ',') - bad("improper answer\n", re, s, -1, test); - if (*p == '?') - { - n = -1; - p++; - } - else if (*p == 'R' && !memcmp(p, "RE_DUP_MAX", 10)) - { - n = RE_DUP_MAX; - p += 10; - if (*p == '+' || *p == '-') - n += strtol(p, &p, 10); - } - else - n = strtol(p, &p, 10); - if (*p++ != ')') - bad("improper answer\n", re, s, -1, test); - if (m!=match[i].rm_so || n!=match[i].rm_eo) - { - if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))) - { - report("failed: match was", NiL, re, s, len, NiL, flags, test); - matchprint(match, nmatch, nsub, ans, test); - } - return 0; - } - } - for (; i < nmatch; i++) - { - if (match[i].rm_so!=-1 || match[i].rm_eo!=-1) - { - if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_VERIFY))) - { - if ((test & TEST_IGNORE_POSITION) && (match[i].rm_so<0 || match[i].rm_eo<0)) - { - state.ignored++; - return 0; - } - if (!(test & TEST_SUMMARY)) - { - report("failed: match was", NiL, re, s, len, NiL, flags, test); - matchprint(match, nmatch, nsub, ans, test); - } - } - return 0; - } - } - if (!(test & TEST_IGNORE_OVER) && match[nmatch].rm_so != state.NOMATCH.rm_so) - { - if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))) - { - report("failed: overran match array", NiL, re, s, len, NiL, flags, test); - matchprint(match, nmatch + 1, nsub, NiL, test); - } - return 0; - } - return 1; -} - -static void -sigunblock(int s) -{ -#ifdef SIG_SETMASK - int op; - sigset_t mask; - - sigemptyset(&mask); - if (s) - { - sigaddset(&mask, s); - op = SIG_UNBLOCK; - } - else op = SIG_SETMASK; - sigprocmask(op, &mask, NiL); -#else -#ifdef sigmask - sigsetmask(s ? (sigsetmask(0L) & ~sigmask(s)) : 0L); -#endif -#endif -} - -static void -gotcha(int sig) -{ - int ret; - - signal(sig, gotcha); - alarm(0); - state.signals++; - switch (sig) - { - case SIGALRM: - ret = REG_EHUNG; - break; - case SIGBUS: - ret = REG_EBUS; - break; - default: - ret = REG_EFAULT; - break; - } - sigunblock(sig); - longjmp(state.gotcha, ret); -} - -static char* -getline(FILE* fp) -{ - static char buf[32 * 1024]; - - register char* s = buf; - register char* e = &buf[sizeof(buf)]; - register char* b; - - for (;;) - { - if (!(b = fgets(s, e - s, fp))) - return 0; - state.lineno++; - s += strlen(s); - if (s == b || *--s != '\n' || s == b || *(s - 1) != '\\') - { - *s = 0; - break; - } - s--; - } - return buf; -} - -static unsigned long -note(unsigned long level, char* msg, unsigned long skip, unsigned long test) -{ - if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_SUMMARY)) && !skip) - { - printf("NOTE\t"); - if (msg) - printf("%s: ", msg); - printf("skipping lines %d", state.lineno); - } - return skip | level; -} - -#define TABS(n) &ts[7-((n)&7)] - -static char ts[] = "\t\t\t\t\t\t\t"; - -static unsigned long -extract(int* tabs, char* spec, char* re, char* s, char* ans, char* msg, char* accept, regmatch_t* match, int nmatch, int nsub, unsigned long skip, unsigned long level, unsigned long test) -{ - if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_OK|TEST_PASS|TEST_SUMMARY)) - { - state.extracted = 1; - if (test & TEST_OK) - { - state.passed++; - if ((test & TEST_VERIFY) && !(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_SUMMARY))) - { - if (msg && strcmp(msg, "EXPECTED")) - printf("NOTE\t%s\n", msg); - return skip; - } - test &= ~(TEST_PASS|TEST_QUERY); - } - if (test & (TEST_QUERY|TEST_VERIFY)) - { - if (test & TEST_BASELINE) - test &= ~(TEST_BASELINE|TEST_PASS); - else - test |= TEST_PASS; - skip |= level; - } - if (!(test & TEST_OK)) - { - if (test & TEST_UNSPECIFIED) - state.unspecified++; - else - state.errors++; - } - if (test & (TEST_PASS|TEST_SUMMARY)) - return skip; - test &= ~TEST_DELIMIT; - printf("%s%s", spec, TABS(*tabs++)); - if ((test & (TEST_BASELINE|TEST_SAME)) == (TEST_BASELINE|TEST_SAME)) - printf("SAME"); - else - quote(re, -1, test); - printf("%s", TABS(*tabs++)); - quote(s, -1, test); - printf("%s", TABS(*tabs++)); - if (!(test & (TEST_ACTUAL|TEST_BASELINE)) || !accept && !match) - printf("%s", ans); - else if (accept) - printf("%s", accept); - else - matchprint(match, nmatch, nsub, NiL, test); - if (msg) - printf("%s%s", TABS(*tabs++), msg); - putchar('\n'); - } - else if (test & TEST_QUERY) - skip = note(level, msg, skip, test); - else if (test & TEST_VERIFY) - state.extracted = 1; - return skip; -} - -static int -catchfree(regex_t* preg, int flags, int* tabs, char* spec, char* re, char* s, char* ans, char* msg, char* accept, regmatch_t* match, int nmatch, int nsub, unsigned long skip, unsigned long level, unsigned long test) -{ - int eret; - - if (!(test & TEST_CATCH)) - { - regfree(preg); - eret = 0; - } - else if (!(eret = setjmp(state.gotcha))) - { - alarm(HUNG); - regfree(preg); - alarm(0); - } - else if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY)) - extract(tabs, spec, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test); - else - { - report("failed", "regfree", re, NiL, -1, msg, flags, test); - error(preg, eret); - } - return eret; -} - -static char* -expand(char* os, char* ot) -{ - char* s = os; - char* t; - int n = 0; - int r; - long m; - - for (;;) - { - switch (*s++) - { - case 0: - break; - case '{': - n++; - continue; - case '}': - n--; - continue; - case 'R': - if (n == 1 && !memcmp(s, "E_DUP_MAX", 9)) - { - s--; - for (t = ot; os < s; *t++ = *os++); - r = ((t - ot) >= 5 && t[-1] == '{' && t[-2] == '.' && t[-3] == '.' && t[-4] == '.') ? t[-5] : 0; - os = ot; - m = RE_DUP_MAX; - if (*(s += 10) == '+' || *s == '-') - m += strtol(s, &s, 10); - if (r) - { - t -= 5; - while (m-- > 0) - *t++ = r; - while (*s && *s++ != '}'); - } - else - t += snprintf(t, 32, "%ld", m); - while (*t = *s++) - t++; - break; - } - continue; - default: - continue; - } - break; - } - return os; -} - -int -main(int argc, char** argv) -{ - int flags; - int cflags; - int eflags; - int nmatch; - int nexec; - int nstr; - int cret; - int eret; - int nsub; - int i; - int j; - int expected; - int got; - int locale; - int subunitlen; - int testno; - unsigned long level; - unsigned long skip; - char* p; - char* line; - char* spec; - char* re; - char* s; - char* ans; - char* msg; - char* fun; - char* ppat; - char* subunit; - char* version; - char* field[6]; - char* delim[6]; - FILE* fp; - int tabs[6]; - char unit[64]; - regmatch_t match[100]; - regex_t preg; - - static char pat[32 * 1024]; - static char patbuf[32 * 1024]; - static char strbuf[32 * 1024]; - - int nonosub = REG_NOSUB == 0; - int nonexec = 0; - - unsigned long test = 0; - - static char* filter[] = { "-", 0 }; - - state.NOMATCH.rm_so = state.NOMATCH.rm_eo = -2; - p = unit; - version = (char*)id + 10; - while (p < &unit[sizeof(unit)-1] && (*p = *version++) && !isspace(*p)) - p++; - *p = 0; - while ((p = *++argv) && *p == '-') - for (;;) - { - switch (*++p) - { - case 0: - break; - case 'c': - test |= TEST_CATCH; - continue; - case 'e': - test |= TEST_IGNORE_ERROR; - continue; - case 'h': - case '?': - help(0); - return 2; - case '-': - help(p[1] == 'h'); - return 2; - case 'n': - nonexec = 1; - continue; - case 'o': - test |= TEST_IGNORE_OVER; - continue; - case 'p': - test |= TEST_IGNORE_POSITION; - continue; - case 's': -#ifdef REG_DISCIPLINE - if (!(state.stack = stkalloc(stkstd, 0))) - fprintf(stderr, "%s: out of space [stack]", unit); - state.disc.disc.re_resizef = resizef; - state.disc.disc.re_resizehandle = (void*)stkstd; -#endif - continue; - case 'x': - nonosub = 1; - continue; - case 'v': - test |= TEST_VERBOSE; - continue; - case 'A': - test |= TEST_ACTUAL; - continue; - case 'B': - test |= TEST_BASELINE; - continue; - case 'F': - test |= TEST_FAIL; - continue; - case 'P': - test |= TEST_PASS; - continue; - case 'S': - test |= TEST_SUMMARY; - continue; - default: - fprintf(stderr, "%s: %c: invalid option\n", unit, *p); - return 2; - } - break; - } - if (!*argv) - argv = filter; - locale = 0; - while (state.file = *argv++) - { - if (streq(state.file, "-") || streq(state.file, "/dev/stdin") || streq(state.file, "/dev/fd/0")) - { - state.file = 0; - fp = stdin; - } - else if (!(fp = fopen(state.file, "r"))) - { - fprintf(stderr, "%s: %s: cannot read\n", unit, state.file); - return 2; - } - testno = state.errors = state.ignored = state.lineno = state.passed = - state.signals = state.unspecified = state.warnings = 0; - skip = 0; - level = 1; - if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_SUMMARY))) - { - printf("TEST\t%s ", unit); - if (s = state.file) - { - subunit = p = 0; - for (;;) - { - switch (*s++) - { - case 0: - break; - case '/': - subunit = s; - continue; - case '.': - p = s - 1; - continue; - default: - continue; - } - break; - } - if (!subunit) - subunit = state.file; - if (p < subunit) - p = s - 1; - subunitlen = p - subunit; - printf("%-.*s ", subunitlen, subunit); - } - else - subunit = 0; - for (s = version; *s && (*s != ' ' || *(s + 1) != '$'); s++) - putchar(*s); - if (test & TEST_CATCH) - printf(", catch"); - if (test & TEST_IGNORE_ERROR) - printf(", ignore error code mismatches"); - if (test & TEST_IGNORE_POSITION) - printf(", ignore negative position mismatches"); -#ifdef REG_DISCIPLINE - if (state.stack) - printf(", stack"); -#endif - if (test & TEST_VERBOSE) - printf(", verbose"); - printf("\n"); -#ifdef REG_VERSIONID - if (regerror(REG_VERSIONID, NiL, pat, sizeof(pat)) > 0) - s = pat; - else -#endif -#ifdef REG_TEST_VERSION - s = REG_TEST_VERSION; -#else - s = "regex"; -#endif - printf("NOTE\t%s\n", s); - if (elementsof(unsupported) > 1) - { -#if (REG_TEST_DEFAULT & (REG_AUGMENTED|REG_EXTENDED|REG_SHELL)) || !defined(REG_EXTENDED) - i = 0; -#else - i = REG_EXTENDED != 0; -#endif - for (got = 0; i < elementsof(unsupported) - 1; i++) - { - if (!got) - { - got = 1; - printf("NOTE\tunsupported: %s", unsupported[i]); - } - else - printf(",%s", unsupported[i]); - } - if (got) - printf("\n"); - } - } -#ifdef REG_DISCIPLINE - state.disc.disc.re_version = REG_VERSION; - state.disc.disc.re_compf = compf; - state.disc.disc.re_execf = execf; - if (!(state.disc.sp = sfstropen())) - bad("out of space [discipline string stream]\n", NiL, NiL, 0, 0); - preg.re_disc = &state.disc.disc; -#endif - if (test & TEST_CATCH) - { - signal(SIGALRM, gotcha); - signal(SIGBUS, gotcha); - signal(SIGSEGV, gotcha); - } - while (p = getline(fp)) - { - - /* parse: */ - - line = p; - if (*p == ':' && !isspace(*(p + 1))) - { - while (*++p && *p != ':'); - if (!*p++) - { - if (test & TEST_BASELINE) - printf("%s\n", line); - continue; - } - } - while (isspace(*p)) - p++; - if (*p == 0 || *p == '#' || *p == 'T') - { - if (test & TEST_BASELINE) - printf("%s\n", line); - continue; - } - if (*p == ':' || *p == 'N') - { - if (test & TEST_BASELINE) - printf("%s\n", line); - else if (!(test & (TEST_ACTUAL|TEST_FAIL|TEST_PASS|TEST_SUMMARY))) - { - while (*++p && !isspace(*p)); - while (isspace(*p)) - p++; - printf("NOTE %s\n", p); - } - continue; - } - j = 0; - i = 0; - field[i++] = p; - for (;;) - { - switch (*p++) - { - case 0: - p--; - j = 0; - goto checkfield; - case '\t': - *(delim[i] = p - 1) = 0; - j = 1; - checkfield: - s = field[i - 1]; - if (streq(s, "NIL")) - field[i - 1] = 0; - else if (streq(s, "NULL")) - *s = 0; - while (*p == '\t') - { - p++; - j++; - } - tabs[i - 1] = j; - if (!*p) - break; - if (i >= elementsof(field)) - bad("too many fields\n", NiL, NiL, 0, 0); - field[i++] = p; - /*FALLTHROUGH*/ - default: - continue; - } - break; - } - if (!(spec = field[0])) - bad("NIL spec\n", NiL, NiL, 0, 0); - - /* interpret: */ - - cflags = REG_TEST_DEFAULT; - eflags = REG_EXEC_DEFAULT; - test &= TEST_GLOBAL; - state.extracted = 0; - nmatch = 20; - nsub = -1; - for (p = spec; *p; p++) - { - if (isdigit(*p)) - { - nmatch = strtol(p, &p, 10); - if (nmatch >= elementsof(match)) - bad("nmatch must be < 100\n", NiL, NiL, 0, 0); - p--; - continue; - } - switch (*p) - { - case 'A': - test |= TEST_ARE; - continue; - case 'B': - test |= TEST_BRE; - continue; - case 'C': - if (!(test & TEST_QUERY) && !(skip & level)) - bad("locale must be nested\n", NiL, NiL, 0, 0); - test &= ~TEST_QUERY; - if (locale) - bad("locale nesting not supported\n", NiL, NiL, 0, 0); - if (i != 2) - bad("locale field expected\n", NiL, NiL, 0, 0); - if (!(skip & level)) - { -#if defined(LC_COLLATE) && defined(LC_CTYPE) - s = field[1]; - if (!s || streq(s, "POSIX")) - s = "C"; - if ((ans = setlocale(LC_COLLATE, s)) && streq(ans, "POSIX")) - ans = "C"; - if (!ans || !streq(ans, s) && streq(s, "C")) - ans = 0; - else if ((ans = setlocale(LC_CTYPE, s)) && streq(ans, "POSIX")) - ans = "C"; - if (!ans || !streq(ans, s) && streq(s, "C")) - skip = note(level, s, skip, test); - else - { - if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_SUMMARY))) - printf("NOTE \"%s\" locale\n", s); - locale = level; - } -#else - skip = note(level, skip, test, "locales not supported"); -#endif - } - cflags = NOTEST; - continue; - case 'E': - test |= TEST_ERE; - continue; - case 'K': - test |= TEST_KRE; - continue; - case 'L': - test |= TEST_LRE; - continue; - case 'S': - test |= TEST_SRE; - continue; - - case 'a': - cflags |= REG_LEFT|REG_RIGHT; - continue; - case 'b': - eflags |= REG_NOTBOL; - continue; - case 'c': - cflags |= REG_COMMENT; - continue; - case 'd': - cflags |= REG_SHELL_DOT; - continue; - case 'e': - eflags |= REG_NOTEOL; - continue; - case 'f': - cflags |= REG_MULTIPLE; - continue; - case 'g': - cflags |= NOTEST; - continue; - case 'h': - cflags |= REG_MULTIREF; - continue; - case 'i': - cflags |= REG_ICASE; - continue; - case 'j': - cflags |= REG_SPAN; - continue; - case 'k': - cflags |= REG_ESCAPE; - continue; - case 'l': - cflags |= REG_LEFT; - continue; - case 'm': - cflags |= REG_MINIMAL; - continue; - case 'n': - cflags |= REG_NEWLINE; - continue; - case 'o': - cflags |= REG_SHELL_GROUP; - continue; - case 'p': - cflags |= REG_SHELL_PATH; - continue; - case 'q': - cflags |= REG_DELIMITED; - continue; - case 'r': - cflags |= REG_RIGHT; - continue; - case 's': - cflags |= REG_SHELL_ESCAPED; - continue; - case 't': - cflags |= REG_MUSTDELIM; - continue; - case 'u': - test |= TEST_UNSPECIFIED; - continue; - case 'v': - cflags |= REG_CLASS_ESCAPE; - continue; - case 'w': - cflags |= REG_NOSUB; - continue; - case 'x': - if (REG_LENIENT) - cflags |= REG_LENIENT; - else - test |= TEST_LENIENT; - continue; - case 'y': - eflags |= REG_LEFT; - continue; - case 'z': - cflags |= REG_NULL; - continue; - - case '$': - test |= TEST_EXPAND; - continue; - - case '/': - test |= TEST_SUB; - continue; - - case '=': - test |= TEST_DECOMP; - continue; - - case '?': - test |= TEST_VERIFY; - test &= ~(TEST_AND|TEST_OR); - state.verify = state.passed; - continue; - case '&': - test |= TEST_VERIFY|TEST_AND; - test &= ~TEST_OR; - continue; - case '|': - test |= TEST_VERIFY|TEST_OR; - test &= ~TEST_AND; - continue; - case ';': - test |= TEST_OR; - test &= ~TEST_AND; - continue; - - case '{': - level <<= 1; - if (skip & (level >> 1)) - { - skip |= level; - cflags = NOTEST; - } - else - { - skip &= ~level; - test |= TEST_QUERY; - } - continue; - case '}': - if (level == 1) - bad("invalid {...} nesting\n", NiL, NiL, 0, 0); - if ((skip & level) && !(skip & (level>>1))) - { - if (!(test & (TEST_BASELINE|TEST_SUMMARY))) - { - if (test & (TEST_ACTUAL|TEST_FAIL)) - printf("}\n"); - else if (!(test & TEST_PASS)) - printf("-%d\n", state.lineno); - } - } -#if defined(LC_COLLATE) && defined(LC_CTYPE) - else if (locale & level) - { - locale = 0; - if (!(skip & level)) - { - s = "C"; - setlocale(LC_COLLATE, s); - setlocale(LC_CTYPE, s); - if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_SUMMARY))) - printf("NOTE \"%s\" locale\n", s); - else if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_PASS)) - printf("}\n"); - } - else if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL)) - printf("}\n"); - } -#endif - level >>= 1; - cflags = NOTEST; - continue; - - default: - bad("bad spec\n", spec, NiL, 0, test); - break; - - } - break; - } - if ((cflags|eflags) == NOTEST || (skip & level) && (test & TEST_BASELINE)) - { - if (test & TEST_BASELINE) - { - while (i > 1) - *delim[--i] = '\t'; - printf("%s\n", line); - } - continue; - } - if (test & TEST_OR) - { - if (!(test & TEST_VERIFY)) - { - test &= ~TEST_OR; - if (state.passed == state.verify && i > 1) - printf("NOTE\t%s\n", field[1]); - continue; - } - else if (state.passed > state.verify) - continue; - } - else if (test & TEST_AND) - { - if (state.passed == state.verify) - continue; - state.passed = state.verify; - } - if (i < ((test & TEST_DECOMP) ? 3 : 4)) - bad("too few fields\n", NiL, NiL, 0, test); - while (i < elementsof(field)) - field[i++] = 0; - if (re = field[1]) - { - if (streq(re, "SAME")) - { - re = ppat; - test |= TEST_SAME; - } - else - { - if (test & TEST_EXPAND) - escape(re); - re = expand(re, patbuf); - strcpy(ppat = pat, re); - } - } - else - ppat = 0; - nstr = -1; - if (s = field[2]) - { - s = expand(s, strbuf); - if (test & TEST_EXPAND) - { - nstr = escape(s); -#if _REG_nexec - if (nstr != strlen(s)) - nexec = nstr; -#endif - } - } - if (!(ans = field[(test & TEST_DECOMP) ? 2 : 3])) - bad("NIL answer\n", NiL, NiL, 0, test); - msg = field[4]; - fflush(stdout); - if (test & TEST_SUB) -#if _REG_subcomp - cflags |= REG_DELIMITED; -#else - continue; -#endif -#if !_REG_decomp - if (test & TEST_DECOMP) - continue; -#endif - - compile: - - if (state.extracted || (skip & level)) - continue; -#if !(REG_TEST_DEFAULT & (REG_AUGMENTED|REG_EXTENDED|REG_SHELL)) -#ifdef REG_EXTENDED - if (REG_EXTENDED != 0 && (test & TEST_BRE)) -#else - if (test & TEST_BRE) -#endif - { - test &= ~TEST_BRE; - flags = cflags; - state.which = "BRE"; - } - else -#endif -#ifdef REG_EXTENDED - if (test & TEST_ERE) - { - test &= ~TEST_ERE; - flags = cflags | REG_EXTENDED; - state.which = "ERE"; - } - else -#endif -#ifdef REG_AUGMENTED - if (test & TEST_ARE) - { - test &= ~TEST_ARE; - flags = cflags | REG_AUGMENTED; - state.which = "ARE"; - } - else -#endif -#ifdef REG_LITERAL - if (test & TEST_LRE) - { - test &= ~TEST_LRE; - flags = cflags | REG_LITERAL; - state.which = "LRE"; - } - else -#endif -#ifdef REG_SHELL - if (test & TEST_SRE) - { - test &= ~TEST_SRE; - flags = cflags | REG_SHELL; - state.which = "SRE"; - } - else -#ifdef REG_AUGMENTED - if (test & TEST_KRE) - { - test &= ~TEST_KRE; - flags = cflags | REG_SHELL | REG_AUGMENTED; - state.which = "KRE"; - } - else -#endif -#endif - { - if (test & (TEST_BASELINE|TEST_PASS|TEST_VERIFY)) - extract(tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test|TEST_OK); - continue; - } - if ((test & (TEST_QUERY|TEST_VERBOSE|TEST_VERIFY)) == TEST_VERBOSE) - { - printf("test %-3d %s ", state.lineno, state.which); - quote(re, -1, test|TEST_DELIMIT); - printf(" "); - quote(s, nstr, test|TEST_DELIMIT); - printf("\n"); - } - - nosub: - fun = "regcomp"; -#if _REG_nexec - if (nstr >= 0 && nstr != strlen(s)) - nexec = nstr; - - else -#endif - nexec = -1; - if (state.extracted || (skip & level)) - continue; - if (!(test & TEST_QUERY)) - testno++; -#ifdef REG_DISCIPLINE - if (state.stack) - stkset(stkstd, state.stack, 0); - flags |= REG_DISCIPLINE; - state.disc.ordinal = 0; - sfstrseek(state.disc.sp, 0, SEEK_SET); -#endif - if (!(test & TEST_CATCH)) - cret = regcomp(&preg, re, flags); - else if (!(cret = setjmp(state.gotcha))) - { - alarm(HUNG); - cret = regcomp(&preg, re, flags); - alarm(0); - } -#if _REG_subcomp - if (!cret && (test & TEST_SUB)) - { - fun = "regsubcomp"; - p = re + preg.re_npat; - if (!(test & TEST_CATCH)) - cret = regsubcomp(&preg, p, NiL, 0, 0); - else if (!(cret = setjmp(state.gotcha))) - { - alarm(HUNG); - cret = regsubcomp(&preg, p, NiL, 0, 0); - alarm(0); - } - if (!cret && *(p += preg.re_npat) && !(preg.re_sub->re_flags & REG_SUB_LAST)) - { - if (catchfree(&preg, flags, tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test)) - continue; - cret = REG_EFLAGS; - } - } -#endif -#if _REG_decomp - if (!cret && (test & TEST_DECOMP)) - { - char buf[128]; - - if ((j = nmatch) > sizeof(buf)) - j = sizeof(buf); - fun = "regdecomp"; - p = re + preg.re_npat; - if (!(test & TEST_CATCH)) - i = regdecomp(&preg, -1, buf, j); - else if (!(cret = setjmp(state.gotcha))) - { - alarm(HUNG); - i = regdecomp(&preg, -1, buf, j); - alarm(0); - } - if (!cret) - { - catchfree(&preg, flags, tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test); - if (i > j) - { - if (i != (strlen(ans) + 1)) - { - report("failed", fun, re, s, nstr, msg, flags, test); - printf(" %d byte buffer supplied, %d byte buffer required\n", j, i); - } - } - else if (strcmp(buf, ans)) - { - report("failed", fun, re, s, nstr, msg, flags, test); - quote(ans, -1, test|TEST_DELIMIT); - printf(" expected, "); - quote(buf, -1, test|TEST_DELIMIT); - printf(" returned\n"); - } - continue; - } - } -#endif - if (!cret) - { - if (!(flags & REG_NOSUB) && nsub < 0 && *ans == '(') - { - for (p = ans; *p; p++) - if (*p == '(') - nsub++; - else if (*p == '{') - nsub--; - if (nsub >= 0) - { - if (test & TEST_IGNORE_OVER) - { - if (nmatch > nsub) - nmatch = nsub + 1; - } - else if (nsub != preg.re_nsub) - { - if (nsub > preg.re_nsub) - { - if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY)) - skip = extract(tabs, line, re, s, ans, msg, "OK", NiL, 0, 0, skip, level, test|TEST_DELIMIT); - else - { - report("re_nsub incorrect", fun, re, NiL, -1, msg, flags, test); - printf("at least %d expected, %d returned\n", nsub, preg.re_nsub); - state.errors++; - } - } - else - nsub = preg.re_nsub; - } - } - } - if (!(test & (TEST_DECOMP|TEST_SUB)) && *ans && *ans != '(' && !streq(ans, "OK") && !streq(ans, "NOMATCH")) - { - if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY)) - skip = extract(tabs, line, re, s, ans, msg, "OK", NiL, 0, 0, skip, level, test|TEST_DELIMIT); - else if (!(test & TEST_LENIENT)) - { - report("failed", fun, re, NiL, -1, msg, flags, test); - printf("%s expected, OK returned\n", ans); - } - catchfree(&preg, flags, tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test); - continue; - } - } - else - { - if (test & TEST_LENIENT) - /* we'll let it go this time */; - else if (!*ans || ans[0]=='(' || cret == REG_BADPAT && streq(ans, "NOMATCH")) - { - got = 0; - for (i = 1; i < elementsof(codes); i++) - if (cret==codes[i].code) - got = i; - if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY)) - skip = extract(tabs, line, re, s, ans, msg, codes[got].name, NiL, 0, 0, skip, level, test|TEST_DELIMIT); - else - { - report("failed", fun, re, NiL, -1, msg, flags, test); - printf("%s returned: ", codes[got].name); - error(&preg, cret); - } - } - else - { - expected = got = 0; - for (i = 1; i < elementsof(codes); i++) - { - if (streq(ans, codes[i].name)) - expected = i; - if (cret==codes[i].code) - got = i; - } - if (!expected) - { - if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY)) - skip = extract(tabs, line, re, s, ans, msg, codes[got].name, NiL, 0, 0, skip, level, test|TEST_DELIMIT); - else - { - report("failed: invalid error code", NiL, re, NiL, -1, msg, flags, test); - printf("%s expected, %s returned\n", ans, codes[got].name); - } - } - else if (cret != codes[expected].code && cret != REG_BADPAT) - { - if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY)) - skip = extract(tabs, line, re, s, ans, msg, codes[got].name, NiL, 0, 0, skip, level, test|TEST_DELIMIT); - else if (test & TEST_IGNORE_ERROR) - state.ignored++; - else - { - report("should fail and did", fun, re, NiL, -1, msg, flags, test); - printf("%s expected, %s returned: ", ans, codes[got].name); - state.errors--; - state.warnings++; - error(&preg, cret); - } - } - } - goto compile; - } - -#if _REG_nexec - execute: - if (nexec >= 0) - fun = "regnexec"; - else -#endif - fun = "regexec"; - - for (i = 0; i < elementsof(match); i++) - match[i] = state.NOMATCH; - -#if _REG_nexec - if (nexec >= 0) - { - eret = regnexec(&preg, s, nexec, nmatch, match, eflags); - s[nexec] = 0; - } - else -#endif - { - if (!(test & TEST_CATCH)) - eret = regexec(&preg, s, nmatch, match, eflags); - else if (!(eret = setjmp(state.gotcha))) - { - alarm(HUNG); - eret = regexec(&preg, s, nmatch, match, eflags); - alarm(0); - } - } -#if _REG_subcomp - if ((test & TEST_SUB) && !eret) - { - fun = "regsubexec"; - if (!(test & TEST_CATCH)) - eret = regsubexec(&preg, s, nmatch, match); - else if (!(eret = setjmp(state.gotcha))) - { - alarm(HUNG); - eret = regsubexec(&preg, s, nmatch, match); - alarm(0); - } - } -#endif - if (flags & REG_NOSUB) - { - if (eret) - { - if (eret != REG_NOMATCH || !streq(ans, "NOMATCH")) - { - if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY)) - skip = extract(tabs, line, re, s, ans, msg, "NOMATCH", NiL, 0, 0, skip, level, test|TEST_DELIMIT); - else - { - report("REG_NOSUB failed", fun, re, s, nstr, msg, flags, test); - error(&preg, eret); - } - } - } - else if (streq(ans, "NOMATCH")) - { - if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY)) - skip = extract(tabs, line, re, s, ans, msg, NiL, match, nmatch, nsub, skip, level, test|TEST_DELIMIT); - else - { - report("should fail and didn't", fun, re, s, nstr, msg, flags, test); - error(&preg, eret); - } - } - } - else if (eret) - { - if (eret != REG_NOMATCH || !streq(ans, "NOMATCH")) - { - if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY)) - skip = extract(tabs, line, re, s, ans, msg, "NOMATCH", NiL, 0, nsub, skip, level, test|TEST_DELIMIT); - else - { - report("failed", fun, re, s, nstr, msg, flags, test); - if (eret != REG_NOMATCH) - error(&preg, eret); - else if (*ans) - printf("expected: %s\n", ans); - else - printf("\n"); - } - } - } - else if (streq(ans, "NOMATCH")) - { - if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY)) - skip = extract(tabs, line, re, s, ans, msg, NiL, match, nmatch, nsub, skip, level, test|TEST_DELIMIT); - else - { - report("should fail and didn't", fun, re, s, nstr, msg, flags, test); - matchprint(match, nmatch, nsub, NiL, test); - } - } -#if _REG_subcomp - else if (test & TEST_SUB) - { - p = preg.re_sub->re_buf; - if (strcmp(p, ans)) - { - report("failed", fun, re, s, nstr, msg, flags, test); - quote(ans, -1, test|TEST_DELIMIT); - printf(" expected, "); - quote(p, -1, test|TEST_DELIMIT); - printf(" returned\n"); - } - } -#endif - else if (!*ans) - { - if (match[0].rm_so != state.NOMATCH.rm_so) - { - if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY)) - skip = extract(tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test); - else - { - report("failed: no match but match array assigned", NiL, re, s, nstr, msg, flags, test); - matchprint(match, nmatch, nsub, NiL, test); - } - } - } - else if (matchcheck(match, nmatch, nsub, ans, re, s, nstr, flags, test)) - { -#if _REG_nexec - if (nexec < 0 && !nonexec) - { - nexec = nstr >= 0 ? nstr : strlen(s); - s[nexec] = '\n'; - testno++; - goto execute; - } -#endif - if (!(test & (TEST_DECOMP|TEST_SUB|TEST_VERIFY)) && !nonosub) - { - if (catchfree(&preg, flags, tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test)) - continue; - flags |= REG_NOSUB; - goto nosub; - } - if (test & (TEST_BASELINE|TEST_PASS|TEST_VERIFY)) - skip = extract(tabs, line, re, s, ans, msg, NiL, match, nmatch, nsub, skip, level, test|TEST_OK); - } - else if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY)) - skip = extract(tabs, line, re, s, ans, msg, NiL, match, nmatch, nsub, skip, level, test|TEST_DELIMIT); - if (catchfree(&preg, flags, tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test)) - continue; - goto compile; - } - if (test & TEST_SUMMARY) - printf("tests=%-4d errors=%-4d warnings=%-2d ignored=%-2d unspecified=%-2d signals=%d\n", testno, state.errors, state.warnings, state.ignored, state.unspecified, state.signals); - else if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS))) - { - printf("TEST\t%s", unit); - if (subunit) - printf(" %-.*s", subunitlen, subunit); - printf(", %d test%s", testno, testno == 1 ? "" : "s"); - if (state.ignored) - printf(", %d ignored mismatche%s", state.ignored, state.ignored == 1 ? "" : "s"); - if (state.warnings) - printf(", %d warning%s", state.warnings, state.warnings == 1 ? "" : "s"); - if (state.unspecified) - printf(", %d unspecified difference%s", state.unspecified, state.unspecified == 1 ? "" : "s"); - if (state.signals) - printf(", %d signal%s", state.signals, state.signals == 1 ? "" : "s"); - printf(", %d error%s\n", state.errors, state.errors == 1 ? "" : "s"); - } - if (fp != stdin) - fclose(fp); - } - return 0; -} |