diff options
Diffstat (limited to 'src/pkg/regexp')
-rw-r--r-- | src/pkg/regexp/all_test.go | 105 | ||||
-rw-r--r-- | src/pkg/regexp/example_test.go | 144 | ||||
-rw-r--r-- | src/pkg/regexp/exec_test.go | 50 | ||||
-rw-r--r-- | src/pkg/regexp/regexp.go | 74 | ||||
-rw-r--r-- | src/pkg/regexp/syntax/compile.go | 4 | ||||
-rw-r--r-- | src/pkg/regexp/syntax/doc.go | 127 | ||||
-rw-r--r-- | src/pkg/regexp/syntax/parse.go | 14 |
7 files changed, 453 insertions, 65 deletions
diff --git a/src/pkg/regexp/all_test.go b/src/pkg/regexp/all_test.go index f7b41a674..9c4d64f58 100644 --- a/src/pkg/regexp/all_test.go +++ b/src/pkg/regexp/all_test.go @@ -5,6 +5,7 @@ package regexp import ( + "reflect" "strings" "testing" ) @@ -29,53 +30,52 @@ var good_re = []string{ `\!\\`, } -/* type stringError struct { re string - err error + err string } var bad_re = []stringError{ - {`*`, ErrBareClosure}, - {`+`, ErrBareClosure}, - {`?`, ErrBareClosure}, - {`(abc`, ErrUnmatchedLpar}, - {`abc)`, ErrUnmatchedRpar}, - {`x[a-z`, ErrUnmatchedLbkt}, - {`abc]`, ErrUnmatchedRbkt}, - {`[z-a]`, ErrBadRange}, - {`abc\`, ErrExtraneousBackslash}, - {`a**`, ErrBadClosure}, - {`a*+`, ErrBadClosure}, - {`a??`, ErrBadClosure}, - {`\x`, ErrBadBackslash}, -} -*/ - -func compileTest(t *testing.T, expr string, error error) *Regexp { + {`*`, "missing argument to repetition operator: `*`"}, + {`+`, "missing argument to repetition operator: `+`"}, + {`?`, "missing argument to repetition operator: `?`"}, + {`(abc`, "missing closing ): `(abc`"}, + {`abc)`, "unexpected ): `abc)`"}, + {`x[a-z`, "missing closing ]: `[a-z`"}, + {`[z-a]`, "invalid character class range: `z-a`"}, + {`abc\`, "trailing backslash at end of expression"}, + {`a**`, "invalid nested repetition operator: `**`"}, + {`a*+`, "invalid nested repetition operator: `*+`"}, + {`\x`, "invalid escape sequence: `\\x`"}, +} + +func compileTest(t *testing.T, expr string, error string) *Regexp { re, err := Compile(expr) - if err != error { + if error == "" && err != nil { t.Error("compiling `", expr, "`; unexpected error: ", err.Error()) } + if error != "" && err == nil { + t.Error("compiling `", expr, "`; missing error") + } else if error != "" && !strings.Contains(err.Error(), error) { + t.Error("compiling `", expr, "`; wrong error: ", err.Error(), "; want ", error) + } return re } func TestGoodCompile(t *testing.T) { for i := 0; i < len(good_re); i++ { - compileTest(t, good_re[i], nil) + compileTest(t, good_re[i], "") } } -/* func TestBadCompile(t *testing.T) { for i := 0; i < len(bad_re); i++ { compileTest(t, bad_re[i].re, bad_re[i].err) } } -*/ func matchTest(t *testing.T, test *FindTest) { - re := compileTest(t, test.pat, nil) + re := compileTest(t, test.pat, "") if re == nil { return } @@ -196,6 +196,10 @@ var replaceTests = []ReplaceTest{ {"a+", "${oops", "aaa", "${oops"}, {"a+", "$$", "aaa", "$"}, {"a+", "$", "aaa", "$"}, + + // Substitution when subexpression isn't found + {"(x)?", "$1", "123", "123"}, + {"abc", "$1", "123", "123"}, } var replaceLiteralTests = []ReplaceTest{ @@ -416,6 +420,59 @@ func TestSubexp(t *testing.T) { } } +var splitTests = []struct { + s string + r string + n int + out []string +}{ + {"foo:and:bar", ":", -1, []string{"foo", "and", "bar"}}, + {"foo:and:bar", ":", 1, []string{"foo:and:bar"}}, + {"foo:and:bar", ":", 2, []string{"foo", "and:bar"}}, + {"foo:and:bar", "foo", -1, []string{"", ":and:bar"}}, + {"foo:and:bar", "bar", -1, []string{"foo:and:", ""}}, + {"foo:and:bar", "baz", -1, []string{"foo:and:bar"}}, + {"baabaab", "a", -1, []string{"b", "", "b", "", "b"}}, + {"baabaab", "a*", -1, []string{"b", "b", "b"}}, + {"baabaab", "ba*", -1, []string{"", "", "", ""}}, + {"foobar", "f*b*", -1, []string{"", "o", "o", "a", "r"}}, + {"foobar", "f+.*b+", -1, []string{"", "ar"}}, + {"foobooboar", "o{2}", -1, []string{"f", "b", "boar"}}, + {"a,b,c,d,e,f", ",", 3, []string{"a", "b", "c,d,e,f"}}, + {"a,b,c,d,e,f", ",", 0, nil}, + {",", ",", -1, []string{"", ""}}, + {",,,", ",", -1, []string{"", "", "", ""}}, + {"", ",", -1, []string{""}}, + {"", ".*", -1, []string{""}}, + {"", ".+", -1, []string{""}}, + {"", "", -1, []string{}}, + {"foobar", "", -1, []string{"f", "o", "o", "b", "a", "r"}}, + {"abaabaccadaaae", "a*", 5, []string{"", "b", "b", "c", "cadaaae"}}, + {":x:y:z:", ":", -1, []string{"", "x", "y", "z", ""}}, +} + +func TestSplit(t *testing.T) { + for i, test := range splitTests { + re, err := Compile(test.r) + if err != nil { + t.Errorf("#%d: %q: compile error: %s", i, test.r, err.Error()) + continue + } + + split := re.Split(test.s, test.n) + if !reflect.DeepEqual(split, test.out) { + t.Errorf("#%d: %q: got %q; want %q", i, test.r, split, test.out) + } + + if QuoteMeta(test.r) == test.r { + strsplit := strings.SplitN(test.s, test.r, test.n) + if !reflect.DeepEqual(split, strsplit) { + t.Errorf("#%d: Split(%q, %q, %d): regexp vs strings mismatch\nregexp=%q\nstrings=%q", i, test.s, test.r, test.n, split, strsplit) + } + } + } +} + func BenchmarkLiteral(b *testing.B) { x := strings.Repeat("x", 50) + "y" b.StopTimer() diff --git a/src/pkg/regexp/example_test.go b/src/pkg/regexp/example_test.go new file mode 100644 index 000000000..b0ad9d340 --- /dev/null +++ b/src/pkg/regexp/example_test.go @@ -0,0 +1,144 @@ +package regexp_test + +import ( + "fmt" + "regexp" +) + +func Example() { + // Compile the expression once, usually at init time. + // Use raw strings to avoid having to quote the backslashes. + var validID = regexp.MustCompile(`^[a-z]+\[[0-9]+\]$`) + + fmt.Println(validID.MatchString("adam[23]")) + fmt.Println(validID.MatchString("eve[7]")) + fmt.Println(validID.MatchString("Job[48]")) + fmt.Println(validID.MatchString("snakey")) + // Output: + // true + // true + // false + // false +} + +func ExampleMatchString() { + matched, err := regexp.MatchString("foo.*", "seafood") + fmt.Println(matched, err) + matched, err = regexp.MatchString("bar.*", "seafood") + fmt.Println(matched, err) + matched, err = regexp.MatchString("a(b", "seafood") + fmt.Println(matched, err) + // Output: + // true <nil> + // false <nil> + // false error parsing regexp: missing closing ): `a(b` +} + +func ExampleRegexp_FindString() { + re := regexp.MustCompile("fo.?") + fmt.Printf("%q\n", re.FindString("seafood")) + fmt.Printf("%q\n", re.FindString("meat")) + // Output: + // "foo" + // "" +} + +func ExampleRegexp_FindStringIndex() { + re := regexp.MustCompile("ab?") + fmt.Println(re.FindStringIndex("tablett")) + fmt.Println(re.FindStringIndex("foo") == nil) + // Output: + // [1 3] + // true +} + +func ExampleRegexp_FindStringSubmatch() { + re := regexp.MustCompile("a(x*)b(y|z)c") + fmt.Printf("%q\n", re.FindStringSubmatch("-axxxbyc-")) + fmt.Printf("%q\n", re.FindStringSubmatch("-abzc-")) + // Output: + // ["axxxbyc" "xxx" "y"] + // ["abzc" "" "z"] +} + +func ExampleRegexp_FindAllString() { + re := regexp.MustCompile("a.") + fmt.Println(re.FindAllString("paranormal", -1)) + fmt.Println(re.FindAllString("paranormal", 2)) + fmt.Println(re.FindAllString("graal", -1)) + fmt.Println(re.FindAllString("none", -1)) + // Output: + // [ar an al] + // [ar an] + // [aa] + // [] +} + +func ExampleRegexp_FindAllStringSubmatch() { + re := regexp.MustCompile("a(x*)b") + fmt.Printf("%q\n", re.FindAllStringSubmatch("-ab-", -1)) + fmt.Printf("%q\n", re.FindAllStringSubmatch("-axxb-", -1)) + fmt.Printf("%q\n", re.FindAllStringSubmatch("-ab-axb-", -1)) + fmt.Printf("%q\n", re.FindAllStringSubmatch("-axxb-ab-", -1)) + // Output: + // [["ab" ""]] + // [["axxb" "xx"]] + // [["ab" ""] ["axb" "x"]] + // [["axxb" "xx"] ["ab" ""]] +} + +func ExampleRegexp_FindAllStringSubmatchIndex() { + re := regexp.MustCompile("a(x*)b") + // Indices: + // 01234567 012345678 + // -ab-axb- -axxb-ab- + fmt.Println(re.FindAllStringSubmatchIndex("-ab-", -1)) + fmt.Println(re.FindAllStringSubmatchIndex("-axxb-", -1)) + fmt.Println(re.FindAllStringSubmatchIndex("-ab-axb-", -1)) + fmt.Println(re.FindAllStringSubmatchIndex("-axxb-ab-", -1)) + fmt.Println(re.FindAllStringSubmatchIndex("-foo-", -1)) + // Output: + // [[1 3 2 2]] + // [[1 5 2 4]] + // [[1 3 2 2] [4 7 5 6]] + // [[1 5 2 4] [6 8 7 7]] + // [] +} + +func ExampleRegexp_ReplaceAllLiteralString() { + re := regexp.MustCompile("a(x*)b") + fmt.Println(re.ReplaceAllLiteralString("-ab-axxb-", "T")) + fmt.Println(re.ReplaceAllLiteralString("-ab-axxb-", "$1")) + fmt.Println(re.ReplaceAllLiteralString("-ab-axxb-", "${1}")) + // Output: + // -T-T- + // -$1-$1- + // -${1}-${1}- +} + +func ExampleRegexp_ReplaceAllString() { + re := regexp.MustCompile("a(x*)b") + fmt.Println(re.ReplaceAllString("-ab-axxb-", "T")) + fmt.Println(re.ReplaceAllString("-ab-axxb-", "$1")) + fmt.Println(re.ReplaceAllString("-ab-axxb-", "$1W")) + fmt.Println(re.ReplaceAllString("-ab-axxb-", "${1}W")) + // Output: + // -T-T- + // --xx- + // --- + // -W-xxW- +} + +func ExampleRegexp_SubexpNames() { + re := regexp.MustCompile("(?P<first>[a-zA-Z]+) (?P<last>[a-zA-Z]+)") + fmt.Println(re.MatchString("Alan Turing")) + fmt.Printf("%q\n", re.SubexpNames()) + reversed := fmt.Sprintf("${%s} ${%s}", re.SubexpNames()[2], re.SubexpNames()[1]) + fmt.Println(reversed) + fmt.Println(re.ReplaceAllString("Alan Turing", reversed)) + // Output: + // true + // ["" "first" "last"] + // ${last} ${first} + // Turing Alan +} diff --git a/src/pkg/regexp/exec_test.go b/src/pkg/regexp/exec_test.go index e668574a5..9dfaed713 100644 --- a/src/pkg/regexp/exec_test.go +++ b/src/pkg/regexp/exec_test.go @@ -69,8 +69,7 @@ func TestRE2Search(t *testing.T) { func TestRE2Exhaustive(t *testing.T) { if testing.Short() { - t.Log("skipping TestRE2Exhaustive during short test") - return + t.Skip("skipping TestRE2Exhaustive during short test") } testRE2(t, "testdata/re2-exhaustive.txt.bz2") } @@ -90,7 +89,7 @@ func testRE2(t *testing.T, file string) { txt = f } lineno := 0 - r := bufio.NewReader(txt) + scanner := bufio.NewScanner(txt) var ( str []string input []string @@ -100,16 +99,8 @@ func testRE2(t *testing.T, file string) { nfail int ncase int ) - for { - line, err := r.ReadString('\n') - if err != nil { - if err == io.EOF { - break - } - t.Fatalf("%s:%d: %v", file, lineno, err) - } - line = line[:len(line)-1] // chop \n - lineno++ + for lineno := 1; scanner.Scan(); lineno++ { + line := scanner.Text() switch { case line == "": t.Fatalf("%s:%d: unexpected blank line", file, lineno) @@ -205,6 +196,9 @@ func testRE2(t *testing.T, file string) { t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line) } } + if err := scanner.Err(); err != nil { + t.Fatalf("%s:%d: %v", file, lineno, err) + } if len(input) != 0 { t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input)) } @@ -405,14 +399,14 @@ Reading: // implementation. If the first character is not [BEASKLP] then the // specification is a global control line. One or more of [BEASKLP] may be // specified; the test will be repeated for each mode. - // + // // B basic BRE (grep, ed, sed) // E REG_EXTENDED ERE (egrep) // A REG_AUGMENTED ARE (egrep with negation) // S REG_SHELL SRE (sh glob) // K REG_SHELL|REG_AUGMENTED KRE (ksh glob) // L REG_LITERAL LRE (fgrep) - // + // // a REG_LEFT|REG_RIGHT implicit ^...$ // b REG_NOTBOL lhs does not match ^ // c REG_COMMENT ignore space and #...\n @@ -442,23 +436,23 @@ Reading: // $ expand C \c escapes in fields 2 and 3 // / field 2 is a regsubcomp() expression // = field 3 is a regdecomp() expression - // + // // Field 1 control lines: - // + // // C set LC_COLLATE and LC_CTYPE to locale in field 2 - // + // // ?test ... output field 5 if passed and != EXPECTED, silent otherwise // &test ... output field 5 if current and previous passed // |test ... output field 5 if current passed and previous failed // ; ... output field 2 if previous failed // {test ... skip if failed until } // } end of skip - // + // // : comment comment copied as output NOTE // :comment:test :comment: ignored // N[OTE] comment comment copied as output NOTE // T[EST] comment comment - // + // // number use number for nmatch (20 by default) flag := field[0] switch flag[0] { @@ -501,7 +495,7 @@ Reading: // Field 2: the regular expression pattern; SAME uses the pattern from // the previous specification. - // + // if field[1] == "SAME" { field[1] = lastRegexp } @@ -707,3 +701,17 @@ func BenchmarkMatchHard_1K(b *testing.B) { benchmark(b, hard, 1<<10) } func BenchmarkMatchHard_32K(b *testing.B) { benchmark(b, hard, 32<<10) } func BenchmarkMatchHard_1M(b *testing.B) { benchmark(b, hard, 1<<20) } func BenchmarkMatchHard_32M(b *testing.B) { benchmark(b, hard, 32<<20) } + +func TestLongest(t *testing.T) { + re, err := Compile(`a(|b)`) + if err != nil { + t.Fatal(err) + } + if g, w := re.FindString("ab"), "a"; g != w { + t.Errorf("first match was %q, want %q", g, w) + } + re.Longest() + if g, w := re.FindString("ab"), "ab"; g != w { + t.Errorf("longest match was %q, want %q", g, w) + } +} diff --git a/src/pkg/regexp/regexp.go b/src/pkg/regexp/regexp.go index 87e6b1c61..3aa16dec6 100644 --- a/src/pkg/regexp/regexp.go +++ b/src/pkg/regexp/regexp.go @@ -130,6 +130,14 @@ func CompilePOSIX(expr string) (*Regexp, error) { return compile(expr, syntax.POSIX, true) } +// Longest makes future searches prefer the leftmost-longest match. +// That is, when matching against text, the regexp returns a match that +// begins as early as possible in the input (leftmost), and among those +// it chooses a match that is as long as possible. +func (re *Regexp) Longest() { + re.longest = true +} + func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) { re, err := syntax.Parse(expr, mode) if err != nil { @@ -441,7 +449,7 @@ func (re *Regexp) ReplaceAllLiteralString(src, repl string) string { } // ReplaceAllStringFunc returns a copy of src in which all matches of the -// Regexp have been replaced by the return value of of function repl applied +// Regexp have been replaced by the return value of function repl applied // to the matched substring. The replacement returned by repl is substituted // directly, without using Expand. func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string { @@ -539,7 +547,7 @@ func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte { } // ReplaceAllFunc returns a copy of src in which all matches of the -// Regexp have been replaced by the return value of of function repl applied +// Regexp have been replaced by the return value of function repl applied // to the matched byte slice. The replacement returned by repl is substituted // directly, without using Expand. func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte { @@ -686,8 +694,9 @@ func (re *Regexp) FindStringIndex(s string) (loc []int) { // FindReaderIndex returns a two-element slice of integers defining the // location of the leftmost match of the regular expression in text read from -// the RuneReader. The match itself is at s[loc[0]:loc[1]]. A return -// value of nil indicates no match. +// the RuneReader. The match text was found in the input stream at +// byte offset loc[0] through loc[1]-1. +// A return value of nil indicates no match. func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) { a := re.doExecute(r, nil, "", 0, 2) if a == nil { @@ -719,7 +728,7 @@ func (re *Regexp) FindSubmatch(b []byte) [][]byte { // append, Expand replaces variables in the template with corresponding // matches drawn from src. The match slice should have been returned by // FindSubmatchIndex. -// +// // In the template, a variable is denoted by a substring of the form // $name or ${name}, where name is a non-empty sequence of letters, // digits, and underscores. A purely numeric name like $1 refers to @@ -727,10 +736,10 @@ func (re *Regexp) FindSubmatch(b []byte) [][]byte { // capturing parentheses named with the (?P<name>...) syntax. A // reference to an out of range or unmatched index or a name that is not // present in the regular expression is replaced with an empty slice. -// +// // In the $name form, name is taken to be as long as possible: $1x is // equivalent to ${1x}, not ${1}x, and, $10 is equivalent to ${10}, not ${1}0. -// +// // To insert a literal $ in the output, use $$ in the template. func (re *Regexp) Expand(dst []byte, template []byte, src []byte, match []int) []byte { return re.expand(dst, string(template), src, "", match) @@ -766,7 +775,7 @@ func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, m } template = rest if num >= 0 { - if 2*num+1 < len(match) { + if 2*num+1 < len(match) && match[2*num] >= 0 { if bsrc != nil { dst = append(dst, bsrc[match[2*num]:match[2*num+1]]...) } else { @@ -1047,3 +1056,52 @@ func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int { } return result } + +// Split slices s into substrings separated by the expression and returns a slice of +// the substrings between those expression matches. +// +// The slice returned by this method consists of all the substrings of s +// not contained in the slice returned by FindAllString. When called on an expression +// that contains no metacharacters, it is equivalent to strings.SplitN. +// +// Example: +// s := regexp.MustCompile("a*").Split("abaabaccadaaae", 5) +// // s: ["", "b", "b", "c", "cadaaae"] +// +// The count determines the number of substrings to return: +// n > 0: at most n substrings; the last substring will be the unsplit remainder. +// n == 0: the result is nil (zero substrings) +// n < 0: all substrings +func (re *Regexp) Split(s string, n int) []string { + + if n == 0 { + return nil + } + + if len(re.expr) > 0 && len(s) == 0 { + return []string{""} + } + + matches := re.FindAllStringIndex(s, n) + strings := make([]string, 0, len(matches)) + + beg := 0 + end := 0 + for _, match := range matches { + if n > 0 && len(strings) >= n-1 { + break + } + + end = match[0] + if match[1] != 0 { + strings = append(strings, s[beg:end]) + } + beg = match[1] + } + + if end != len(s) { + strings = append(strings, s[beg:]) + } + + return strings +} diff --git a/src/pkg/regexp/syntax/compile.go b/src/pkg/regexp/syntax/compile.go index 41955bfc2..95f6f1569 100644 --- a/src/pkg/regexp/syntax/compile.go +++ b/src/pkg/regexp/syntax/compile.go @@ -10,10 +10,10 @@ import "unicode" // Because the pointers haven't been filled in yet, we can reuse their storage // to hold the list. It's kind of sleazy, but works well in practice. // See http://swtch.com/~rsc/regexp/regexp1.html for inspiration. -// +// // These aren't really pointers: they're integers, so we can reinterpret them // this way without using package unsafe. A value l denotes -// p.inst[l>>1].Out (l&1==0) or .Arg (l&1==1). +// p.inst[l>>1].Out (l&1==0) or .Arg (l&1==1). // l == 0 denotes the empty list, okay because we start every program // with a fail instruction, so we'll never want to point at its output link. type patchList uint32 diff --git a/src/pkg/regexp/syntax/doc.go b/src/pkg/regexp/syntax/doc.go new file mode 100644 index 000000000..843a6f6a4 --- /dev/null +++ b/src/pkg/regexp/syntax/doc.go @@ -0,0 +1,127 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// DO NOT EDIT. This file is generated by mksyntaxgo from the RE2 distribution. + +/* +Package syntax parses regular expressions into parse trees and compiles +parse trees into programs. Most clients of regular expressions will use the +facilities of package regexp (such as Compile and Match) instead of this package. + +Syntax + +The regular expression syntax understood by this package when parsing with the Perl flag is as follows. +Parts of the syntax can be disabled by passing alternate flags to Parse. + + +Single characters: + . any character, possibly including newline (flag s=true) + [xyz] character class + [^xyz] negated character class + \d Perl character class + \D negated Perl character class + [:alpha:] ASCII character class + [:^alpha:] negated ASCII character class + \pN Unicode character class (one-letter name) + \p{Greek} Unicode character class + \PN negated Unicode character class (one-letter name) + \P{Greek} negated Unicode character class + +Composites: + xy x followed by y + x|y x or y (prefer x) + +Repetitions: + x* zero or more x, prefer more + x+ one or more x, prefer more + x? zero or one x, prefer one + x{n,m} n or n+1 or ... or m x, prefer more + x{n,} n or more x, prefer more + x{n} exactly n x + x*? zero or more x, prefer fewer + x+? one or more x, prefer fewer + x?? zero or one x, prefer zero + x{n,m}? n or n+1 or ... or m x, prefer fewer + x{n,}? n or more x, prefer fewer + x{n}? exactly n x + +Grouping: + (re) numbered capturing group + (?P<name>re) named & numbered capturing group + (?:re) non-capturing group + (?flags) set flags within current group; non-capturing + (?flags:re) set flags during re; non-capturing + + Flag syntax is xyz (set) or -xyz (clear) or xy-z (set xy, clear z). The flags are: + + i case-insensitive (default false) + m multi-line mode: ^ and $ match begin/end line in addition to begin/end text (default false) + s let . match \n (default false) + U ungreedy: swap meaning of x* and x*?, x+ and x+?, etc (default false) + +Empty strings: + ^ at beginning of text or line (flag m=true) + $ at end of text (like \z not \Z) or line (flag m=true) + \A at beginning of text + \b at word boundary (\w on one side and \W, \A, or \z on the other) + \B not a word boundary + \z at end of text + +Escape sequences: + \a bell (== \007) + \f form feed (== \014) + \t horizontal tab (== \011) + \n newline (== \012) + \r carriage return (== \015) + \v vertical tab character (== \013) + \* literal *, for any punctuation character * + \123 octal character code (up to three digits) + \x7F hex character code (exactly two digits) + \x{10FFFF} hex character code + \Q...\E literal text ... even if ... has punctuation + +Character class elements: + x single character + A-Z character range (inclusive) + \d Perl character class + [:foo:] ASCII character class foo + \p{Foo} Unicode character class Foo + \pF Unicode character class F (one-letter name) + +Named character classes as character class elements: + [\d] digits (== \d) + [^\d] not digits (== \D) + [\D] not digits (== \D) + [^\D] not not digits (== \d) + [[:name:]] named ASCII class inside character class (== [:name:]) + [^[:name:]] named ASCII class inside negated character class (== [:^name:]) + [\p{Name}] named Unicode property inside character class (== \p{Name}) + [^\p{Name}] named Unicode property inside negated character class (== \P{Name}) + +Perl character classes: + \d digits (== [0-9]) + \D not digits (== [^0-9]) + \s whitespace (== [\t\n\f\r ]) + \S not whitespace (== [^\t\n\f\r ]) + \w word characters (== [0-9A-Za-z_]) + \W not word characters (== [^0-9A-Za-z_]) + +ASCII character classes: + [:alnum:] alphanumeric (== [0-9A-Za-z]) + [:alpha:] alphabetic (== [A-Za-z]) + [:ascii:] ASCII (== [\x00-\x7F]) + [:blank:] blank (== [\t ]) + [:cntrl:] control (== [\x00-\x1F\x7F]) + [:digit:] digits (== [0-9]) + [:graph:] graphical (== [!-~] == [A-Za-z0-9!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~]) + [:lower:] lower case (== [a-z]) + [:print:] printable (== [ -~] == [ [:graph:]]) + [:punct:] punctuation (== [!-/:-@[-`{-~]) + [:space:] whitespace (== [\t\n\v\f\r ]) + [:upper:] upper case (== [A-Z]) + [:word:] word characters (== [0-9A-Za-z_]) + [:xdigit:] hex digit (== [0-9A-Fa-f]) + +*/ +package syntax diff --git a/src/pkg/regexp/syntax/parse.go b/src/pkg/regexp/syntax/parse.go index 4924e9453..30e0e8b7f 100644 --- a/src/pkg/regexp/syntax/parse.go +++ b/src/pkg/regexp/syntax/parse.go @@ -2,10 +2,6 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Package syntax parses regular expressions into parse trees and compiles -// parse trees into programs. Most clients of regular expressions will use -// the facilities of package regexp (such as Compile and Match) instead of -// this package. package syntax import ( @@ -46,11 +42,9 @@ const ( ErrMissingParen ErrorCode = "missing closing )" ErrMissingRepeatArgument ErrorCode = "missing argument to repetition operator" ErrTrailingBackslash ErrorCode = "trailing backslash at end of expression" + ErrUnexpectedParen ErrorCode = "unexpected )" ) -// TODO: Export for Go 1.1. -const errUnexpectedParen ErrorCode = "unexpected )" - func (e ErrorCode) String() string { return string(e) } @@ -470,7 +464,7 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp { // Construct factored form: prefix(suffix1|suffix2|...) prefix := first for j := start; j < i; j++ { - reuse := j != start // prefix came from sub[start] + reuse := j != start // prefix came from sub[start] sub[j] = p.removeLeadingRegexp(sub[j], reuse) } suffix := p.collapse(sub[start:i], OpAlternate) // recurse @@ -1171,13 +1165,13 @@ func (p *parser) parseRightParen() error { n := len(p.stack) if n < 2 { - return &Error{errUnexpectedParen, p.wholeRegexp} + return &Error{ErrUnexpectedParen, p.wholeRegexp} } re1 := p.stack[n-1] re2 := p.stack[n-2] p.stack = p.stack[:n-2] if re2.Op != opLeftParen { - return &Error{errUnexpectedParen, p.wholeRegexp} + return &Error{ErrUnexpectedParen, p.wholeRegexp} } // Restore flags at time of paren. p.flags = re2.Flags |