summaryrefslogtreecommitdiff
path: root/src/pkg/regexp
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/regexp')
-rw-r--r--src/pkg/regexp/all_test.go105
-rw-r--r--src/pkg/regexp/example_test.go144
-rw-r--r--src/pkg/regexp/exec_test.go50
-rw-r--r--src/pkg/regexp/regexp.go74
-rw-r--r--src/pkg/regexp/syntax/compile.go4
-rw-r--r--src/pkg/regexp/syntax/doc.go127
-rw-r--r--src/pkg/regexp/syntax/parse.go14
7 files changed, 453 insertions, 65 deletions
diff --git a/src/pkg/regexp/all_test.go b/src/pkg/regexp/all_test.go
index f7b41a674..9c4d64f58 100644
--- a/src/pkg/regexp/all_test.go
+++ b/src/pkg/regexp/all_test.go
@@ -5,6 +5,7 @@
package regexp
import (
+ "reflect"
"strings"
"testing"
)
@@ -29,53 +30,52 @@ var good_re = []string{
`\!\\`,
}
-/*
type stringError struct {
re string
- err error
+ err string
}
var bad_re = []stringError{
- {`*`, ErrBareClosure},
- {`+`, ErrBareClosure},
- {`?`, ErrBareClosure},
- {`(abc`, ErrUnmatchedLpar},
- {`abc)`, ErrUnmatchedRpar},
- {`x[a-z`, ErrUnmatchedLbkt},
- {`abc]`, ErrUnmatchedRbkt},
- {`[z-a]`, ErrBadRange},
- {`abc\`, ErrExtraneousBackslash},
- {`a**`, ErrBadClosure},
- {`a*+`, ErrBadClosure},
- {`a??`, ErrBadClosure},
- {`\x`, ErrBadBackslash},
-}
-*/
-
-func compileTest(t *testing.T, expr string, error error) *Regexp {
+ {`*`, "missing argument to repetition operator: `*`"},
+ {`+`, "missing argument to repetition operator: `+`"},
+ {`?`, "missing argument to repetition operator: `?`"},
+ {`(abc`, "missing closing ): `(abc`"},
+ {`abc)`, "unexpected ): `abc)`"},
+ {`x[a-z`, "missing closing ]: `[a-z`"},
+ {`[z-a]`, "invalid character class range: `z-a`"},
+ {`abc\`, "trailing backslash at end of expression"},
+ {`a**`, "invalid nested repetition operator: `**`"},
+ {`a*+`, "invalid nested repetition operator: `*+`"},
+ {`\x`, "invalid escape sequence: `\\x`"},
+}
+
+func compileTest(t *testing.T, expr string, error string) *Regexp {
re, err := Compile(expr)
- if err != error {
+ if error == "" && err != nil {
t.Error("compiling `", expr, "`; unexpected error: ", err.Error())
}
+ if error != "" && err == nil {
+ t.Error("compiling `", expr, "`; missing error")
+ } else if error != "" && !strings.Contains(err.Error(), error) {
+ t.Error("compiling `", expr, "`; wrong error: ", err.Error(), "; want ", error)
+ }
return re
}
func TestGoodCompile(t *testing.T) {
for i := 0; i < len(good_re); i++ {
- compileTest(t, good_re[i], nil)
+ compileTest(t, good_re[i], "")
}
}
-/*
func TestBadCompile(t *testing.T) {
for i := 0; i < len(bad_re); i++ {
compileTest(t, bad_re[i].re, bad_re[i].err)
}
}
-*/
func matchTest(t *testing.T, test *FindTest) {
- re := compileTest(t, test.pat, nil)
+ re := compileTest(t, test.pat, "")
if re == nil {
return
}
@@ -196,6 +196,10 @@ var replaceTests = []ReplaceTest{
{"a+", "${oops", "aaa", "${oops"},
{"a+", "$$", "aaa", "$"},
{"a+", "$", "aaa", "$"},
+
+ // Substitution when subexpression isn't found
+ {"(x)?", "$1", "123", "123"},
+ {"abc", "$1", "123", "123"},
}
var replaceLiteralTests = []ReplaceTest{
@@ -416,6 +420,59 @@ func TestSubexp(t *testing.T) {
}
}
+var splitTests = []struct {
+ s string
+ r string
+ n int
+ out []string
+}{
+ {"foo:and:bar", ":", -1, []string{"foo", "and", "bar"}},
+ {"foo:and:bar", ":", 1, []string{"foo:and:bar"}},
+ {"foo:and:bar", ":", 2, []string{"foo", "and:bar"}},
+ {"foo:and:bar", "foo", -1, []string{"", ":and:bar"}},
+ {"foo:and:bar", "bar", -1, []string{"foo:and:", ""}},
+ {"foo:and:bar", "baz", -1, []string{"foo:and:bar"}},
+ {"baabaab", "a", -1, []string{"b", "", "b", "", "b"}},
+ {"baabaab", "a*", -1, []string{"b", "b", "b"}},
+ {"baabaab", "ba*", -1, []string{"", "", "", ""}},
+ {"foobar", "f*b*", -1, []string{"", "o", "o", "a", "r"}},
+ {"foobar", "f+.*b+", -1, []string{"", "ar"}},
+ {"foobooboar", "o{2}", -1, []string{"f", "b", "boar"}},
+ {"a,b,c,d,e,f", ",", 3, []string{"a", "b", "c,d,e,f"}},
+ {"a,b,c,d,e,f", ",", 0, nil},
+ {",", ",", -1, []string{"", ""}},
+ {",,,", ",", -1, []string{"", "", "", ""}},
+ {"", ",", -1, []string{""}},
+ {"", ".*", -1, []string{""}},
+ {"", ".+", -1, []string{""}},
+ {"", "", -1, []string{}},
+ {"foobar", "", -1, []string{"f", "o", "o", "b", "a", "r"}},
+ {"abaabaccadaaae", "a*", 5, []string{"", "b", "b", "c", "cadaaae"}},
+ {":x:y:z:", ":", -1, []string{"", "x", "y", "z", ""}},
+}
+
+func TestSplit(t *testing.T) {
+ for i, test := range splitTests {
+ re, err := Compile(test.r)
+ if err != nil {
+ t.Errorf("#%d: %q: compile error: %s", i, test.r, err.Error())
+ continue
+ }
+
+ split := re.Split(test.s, test.n)
+ if !reflect.DeepEqual(split, test.out) {
+ t.Errorf("#%d: %q: got %q; want %q", i, test.r, split, test.out)
+ }
+
+ if QuoteMeta(test.r) == test.r {
+ strsplit := strings.SplitN(test.s, test.r, test.n)
+ if !reflect.DeepEqual(split, strsplit) {
+ t.Errorf("#%d: Split(%q, %q, %d): regexp vs strings mismatch\nregexp=%q\nstrings=%q", i, test.s, test.r, test.n, split, strsplit)
+ }
+ }
+ }
+}
+
func BenchmarkLiteral(b *testing.B) {
x := strings.Repeat("x", 50) + "y"
b.StopTimer()
diff --git a/src/pkg/regexp/example_test.go b/src/pkg/regexp/example_test.go
new file mode 100644
index 000000000..b0ad9d340
--- /dev/null
+++ b/src/pkg/regexp/example_test.go
@@ -0,0 +1,144 @@
+package regexp_test
+
+import (
+ "fmt"
+ "regexp"
+)
+
+func Example() {
+ // Compile the expression once, usually at init time.
+ // Use raw strings to avoid having to quote the backslashes.
+ var validID = regexp.MustCompile(`^[a-z]+\[[0-9]+\]$`)
+
+ fmt.Println(validID.MatchString("adam[23]"))
+ fmt.Println(validID.MatchString("eve[7]"))
+ fmt.Println(validID.MatchString("Job[48]"))
+ fmt.Println(validID.MatchString("snakey"))
+ // Output:
+ // true
+ // true
+ // false
+ // false
+}
+
+func ExampleMatchString() {
+ matched, err := regexp.MatchString("foo.*", "seafood")
+ fmt.Println(matched, err)
+ matched, err = regexp.MatchString("bar.*", "seafood")
+ fmt.Println(matched, err)
+ matched, err = regexp.MatchString("a(b", "seafood")
+ fmt.Println(matched, err)
+ // Output:
+ // true <nil>
+ // false <nil>
+ // false error parsing regexp: missing closing ): `a(b`
+}
+
+func ExampleRegexp_FindString() {
+ re := regexp.MustCompile("fo.?")
+ fmt.Printf("%q\n", re.FindString("seafood"))
+ fmt.Printf("%q\n", re.FindString("meat"))
+ // Output:
+ // "foo"
+ // ""
+}
+
+func ExampleRegexp_FindStringIndex() {
+ re := regexp.MustCompile("ab?")
+ fmt.Println(re.FindStringIndex("tablett"))
+ fmt.Println(re.FindStringIndex("foo") == nil)
+ // Output:
+ // [1 3]
+ // true
+}
+
+func ExampleRegexp_FindStringSubmatch() {
+ re := regexp.MustCompile("a(x*)b(y|z)c")
+ fmt.Printf("%q\n", re.FindStringSubmatch("-axxxbyc-"))
+ fmt.Printf("%q\n", re.FindStringSubmatch("-abzc-"))
+ // Output:
+ // ["axxxbyc" "xxx" "y"]
+ // ["abzc" "" "z"]
+}
+
+func ExampleRegexp_FindAllString() {
+ re := regexp.MustCompile("a.")
+ fmt.Println(re.FindAllString("paranormal", -1))
+ fmt.Println(re.FindAllString("paranormal", 2))
+ fmt.Println(re.FindAllString("graal", -1))
+ fmt.Println(re.FindAllString("none", -1))
+ // Output:
+ // [ar an al]
+ // [ar an]
+ // [aa]
+ // []
+}
+
+func ExampleRegexp_FindAllStringSubmatch() {
+ re := regexp.MustCompile("a(x*)b")
+ fmt.Printf("%q\n", re.FindAllStringSubmatch("-ab-", -1))
+ fmt.Printf("%q\n", re.FindAllStringSubmatch("-axxb-", -1))
+ fmt.Printf("%q\n", re.FindAllStringSubmatch("-ab-axb-", -1))
+ fmt.Printf("%q\n", re.FindAllStringSubmatch("-axxb-ab-", -1))
+ // Output:
+ // [["ab" ""]]
+ // [["axxb" "xx"]]
+ // [["ab" ""] ["axb" "x"]]
+ // [["axxb" "xx"] ["ab" ""]]
+}
+
+func ExampleRegexp_FindAllStringSubmatchIndex() {
+ re := regexp.MustCompile("a(x*)b")
+ // Indices:
+ // 01234567 012345678
+ // -ab-axb- -axxb-ab-
+ fmt.Println(re.FindAllStringSubmatchIndex("-ab-", -1))
+ fmt.Println(re.FindAllStringSubmatchIndex("-axxb-", -1))
+ fmt.Println(re.FindAllStringSubmatchIndex("-ab-axb-", -1))
+ fmt.Println(re.FindAllStringSubmatchIndex("-axxb-ab-", -1))
+ fmt.Println(re.FindAllStringSubmatchIndex("-foo-", -1))
+ // Output:
+ // [[1 3 2 2]]
+ // [[1 5 2 4]]
+ // [[1 3 2 2] [4 7 5 6]]
+ // [[1 5 2 4] [6 8 7 7]]
+ // []
+}
+
+func ExampleRegexp_ReplaceAllLiteralString() {
+ re := regexp.MustCompile("a(x*)b")
+ fmt.Println(re.ReplaceAllLiteralString("-ab-axxb-", "T"))
+ fmt.Println(re.ReplaceAllLiteralString("-ab-axxb-", "$1"))
+ fmt.Println(re.ReplaceAllLiteralString("-ab-axxb-", "${1}"))
+ // Output:
+ // -T-T-
+ // -$1-$1-
+ // -${1}-${1}-
+}
+
+func ExampleRegexp_ReplaceAllString() {
+ re := regexp.MustCompile("a(x*)b")
+ fmt.Println(re.ReplaceAllString("-ab-axxb-", "T"))
+ fmt.Println(re.ReplaceAllString("-ab-axxb-", "$1"))
+ fmt.Println(re.ReplaceAllString("-ab-axxb-", "$1W"))
+ fmt.Println(re.ReplaceAllString("-ab-axxb-", "${1}W"))
+ // Output:
+ // -T-T-
+ // --xx-
+ // ---
+ // -W-xxW-
+}
+
+func ExampleRegexp_SubexpNames() {
+ re := regexp.MustCompile("(?P<first>[a-zA-Z]+) (?P<last>[a-zA-Z]+)")
+ fmt.Println(re.MatchString("Alan Turing"))
+ fmt.Printf("%q\n", re.SubexpNames())
+ reversed := fmt.Sprintf("${%s} ${%s}", re.SubexpNames()[2], re.SubexpNames()[1])
+ fmt.Println(reversed)
+ fmt.Println(re.ReplaceAllString("Alan Turing", reversed))
+ // Output:
+ // true
+ // ["" "first" "last"]
+ // ${last} ${first}
+ // Turing Alan
+}
diff --git a/src/pkg/regexp/exec_test.go b/src/pkg/regexp/exec_test.go
index e668574a5..9dfaed713 100644
--- a/src/pkg/regexp/exec_test.go
+++ b/src/pkg/regexp/exec_test.go
@@ -69,8 +69,7 @@ func TestRE2Search(t *testing.T) {
func TestRE2Exhaustive(t *testing.T) {
if testing.Short() {
- t.Log("skipping TestRE2Exhaustive during short test")
- return
+ t.Skip("skipping TestRE2Exhaustive during short test")
}
testRE2(t, "testdata/re2-exhaustive.txt.bz2")
}
@@ -90,7 +89,7 @@ func testRE2(t *testing.T, file string) {
txt = f
}
lineno := 0
- r := bufio.NewReader(txt)
+ scanner := bufio.NewScanner(txt)
var (
str []string
input []string
@@ -100,16 +99,8 @@ func testRE2(t *testing.T, file string) {
nfail int
ncase int
)
- for {
- line, err := r.ReadString('\n')
- if err != nil {
- if err == io.EOF {
- break
- }
- t.Fatalf("%s:%d: %v", file, lineno, err)
- }
- line = line[:len(line)-1] // chop \n
- lineno++
+ for lineno := 1; scanner.Scan(); lineno++ {
+ line := scanner.Text()
switch {
case line == "":
t.Fatalf("%s:%d: unexpected blank line", file, lineno)
@@ -205,6 +196,9 @@ func testRE2(t *testing.T, file string) {
t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line)
}
}
+ if err := scanner.Err(); err != nil {
+ t.Fatalf("%s:%d: %v", file, lineno, err)
+ }
if len(input) != 0 {
t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input))
}
@@ -405,14 +399,14 @@ Reading:
// implementation. If the first character is not [BEASKLP] then the
// specification is a global control line. One or more of [BEASKLP] may be
// specified; the test will be repeated for each mode.
- //
+ //
// B basic BRE (grep, ed, sed)
// E REG_EXTENDED ERE (egrep)
// A REG_AUGMENTED ARE (egrep with negation)
// S REG_SHELL SRE (sh glob)
// K REG_SHELL|REG_AUGMENTED KRE (ksh glob)
// L REG_LITERAL LRE (fgrep)
- //
+ //
// a REG_LEFT|REG_RIGHT implicit ^...$
// b REG_NOTBOL lhs does not match ^
// c REG_COMMENT ignore space and #...\n
@@ -442,23 +436,23 @@ Reading:
// $ expand C \c escapes in fields 2 and 3
// / field 2 is a regsubcomp() expression
// = field 3 is a regdecomp() expression
- //
+ //
// Field 1 control lines:
- //
+ //
// C set LC_COLLATE and LC_CTYPE to locale in field 2
- //
+ //
// ?test ... output field 5 if passed and != EXPECTED, silent otherwise
// &test ... output field 5 if current and previous passed
// |test ... output field 5 if current passed and previous failed
// ; ... output field 2 if previous failed
// {test ... skip if failed until }
// } end of skip
- //
+ //
// : comment comment copied as output NOTE
// :comment:test :comment: ignored
// N[OTE] comment comment copied as output NOTE
// T[EST] comment comment
- //
+ //
// number use number for nmatch (20 by default)
flag := field[0]
switch flag[0] {
@@ -501,7 +495,7 @@ Reading:
// Field 2: the regular expression pattern; SAME uses the pattern from
// the previous specification.
- //
+ //
if field[1] == "SAME" {
field[1] = lastRegexp
}
@@ -707,3 +701,17 @@ func BenchmarkMatchHard_1K(b *testing.B) { benchmark(b, hard, 1<<10) }
func BenchmarkMatchHard_32K(b *testing.B) { benchmark(b, hard, 32<<10) }
func BenchmarkMatchHard_1M(b *testing.B) { benchmark(b, hard, 1<<20) }
func BenchmarkMatchHard_32M(b *testing.B) { benchmark(b, hard, 32<<20) }
+
+func TestLongest(t *testing.T) {
+ re, err := Compile(`a(|b)`)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if g, w := re.FindString("ab"), "a"; g != w {
+ t.Errorf("first match was %q, want %q", g, w)
+ }
+ re.Longest()
+ if g, w := re.FindString("ab"), "ab"; g != w {
+ t.Errorf("longest match was %q, want %q", g, w)
+ }
+}
diff --git a/src/pkg/regexp/regexp.go b/src/pkg/regexp/regexp.go
index 87e6b1c61..3aa16dec6 100644
--- a/src/pkg/regexp/regexp.go
+++ b/src/pkg/regexp/regexp.go
@@ -130,6 +130,14 @@ func CompilePOSIX(expr string) (*Regexp, error) {
return compile(expr, syntax.POSIX, true)
}
+// Longest makes future searches prefer the leftmost-longest match.
+// That is, when matching against text, the regexp returns a match that
+// begins as early as possible in the input (leftmost), and among those
+// it chooses a match that is as long as possible.
+func (re *Regexp) Longest() {
+ re.longest = true
+}
+
func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) {
re, err := syntax.Parse(expr, mode)
if err != nil {
@@ -441,7 +449,7 @@ func (re *Regexp) ReplaceAllLiteralString(src, repl string) string {
}
// ReplaceAllStringFunc returns a copy of src in which all matches of the
-// Regexp have been replaced by the return value of of function repl applied
+// Regexp have been replaced by the return value of function repl applied
// to the matched substring. The replacement returned by repl is substituted
// directly, without using Expand.
func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string {
@@ -539,7 +547,7 @@ func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte {
}
// ReplaceAllFunc returns a copy of src in which all matches of the
-// Regexp have been replaced by the return value of of function repl applied
+// Regexp have been replaced by the return value of function repl applied
// to the matched byte slice. The replacement returned by repl is substituted
// directly, without using Expand.
func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
@@ -686,8 +694,9 @@ func (re *Regexp) FindStringIndex(s string) (loc []int) {
// FindReaderIndex returns a two-element slice of integers defining the
// location of the leftmost match of the regular expression in text read from
-// the RuneReader. The match itself is at s[loc[0]:loc[1]]. A return
-// value of nil indicates no match.
+// the RuneReader. The match text was found in the input stream at
+// byte offset loc[0] through loc[1]-1.
+// A return value of nil indicates no match.
func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) {
a := re.doExecute(r, nil, "", 0, 2)
if a == nil {
@@ -719,7 +728,7 @@ func (re *Regexp) FindSubmatch(b []byte) [][]byte {
// append, Expand replaces variables in the template with corresponding
// matches drawn from src. The match slice should have been returned by
// FindSubmatchIndex.
-//
+//
// In the template, a variable is denoted by a substring of the form
// $name or ${name}, where name is a non-empty sequence of letters,
// digits, and underscores. A purely numeric name like $1 refers to
@@ -727,10 +736,10 @@ func (re *Regexp) FindSubmatch(b []byte) [][]byte {
// capturing parentheses named with the (?P<name>...) syntax. A
// reference to an out of range or unmatched index or a name that is not
// present in the regular expression is replaced with an empty slice.
-//
+//
// In the $name form, name is taken to be as long as possible: $1x is
// equivalent to ${1x}, not ${1}x, and, $10 is equivalent to ${10}, not ${1}0.
-//
+//
// To insert a literal $ in the output, use $$ in the template.
func (re *Regexp) Expand(dst []byte, template []byte, src []byte, match []int) []byte {
return re.expand(dst, string(template), src, "", match)
@@ -766,7 +775,7 @@ func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, m
}
template = rest
if num >= 0 {
- if 2*num+1 < len(match) {
+ if 2*num+1 < len(match) && match[2*num] >= 0 {
if bsrc != nil {
dst = append(dst, bsrc[match[2*num]:match[2*num+1]]...)
} else {
@@ -1047,3 +1056,52 @@ func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int {
}
return result
}
+
+// Split slices s into substrings separated by the expression and returns a slice of
+// the substrings between those expression matches.
+//
+// The slice returned by this method consists of all the substrings of s
+// not contained in the slice returned by FindAllString. When called on an expression
+// that contains no metacharacters, it is equivalent to strings.SplitN.
+//
+// Example:
+// s := regexp.MustCompile("a*").Split("abaabaccadaaae", 5)
+// // s: ["", "b", "b", "c", "cadaaae"]
+//
+// The count determines the number of substrings to return:
+// n > 0: at most n substrings; the last substring will be the unsplit remainder.
+// n == 0: the result is nil (zero substrings)
+// n < 0: all substrings
+func (re *Regexp) Split(s string, n int) []string {
+
+ if n == 0 {
+ return nil
+ }
+
+ if len(re.expr) > 0 && len(s) == 0 {
+ return []string{""}
+ }
+
+ matches := re.FindAllStringIndex(s, n)
+ strings := make([]string, 0, len(matches))
+
+ beg := 0
+ end := 0
+ for _, match := range matches {
+ if n > 0 && len(strings) >= n-1 {
+ break
+ }
+
+ end = match[0]
+ if match[1] != 0 {
+ strings = append(strings, s[beg:end])
+ }
+ beg = match[1]
+ }
+
+ if end != len(s) {
+ strings = append(strings, s[beg:])
+ }
+
+ return strings
+}
diff --git a/src/pkg/regexp/syntax/compile.go b/src/pkg/regexp/syntax/compile.go
index 41955bfc2..95f6f1569 100644
--- a/src/pkg/regexp/syntax/compile.go
+++ b/src/pkg/regexp/syntax/compile.go
@@ -10,10 +10,10 @@ import "unicode"
// Because the pointers haven't been filled in yet, we can reuse their storage
// to hold the list. It's kind of sleazy, but works well in practice.
// See http://swtch.com/~rsc/regexp/regexp1.html for inspiration.
-//
+//
// These aren't really pointers: they're integers, so we can reinterpret them
// this way without using package unsafe. A value l denotes
-// p.inst[l>>1].Out (l&1==0) or .Arg (l&1==1).
+// p.inst[l>>1].Out (l&1==0) or .Arg (l&1==1).
// l == 0 denotes the empty list, okay because we start every program
// with a fail instruction, so we'll never want to point at its output link.
type patchList uint32
diff --git a/src/pkg/regexp/syntax/doc.go b/src/pkg/regexp/syntax/doc.go
new file mode 100644
index 000000000..843a6f6a4
--- /dev/null
+++ b/src/pkg/regexp/syntax/doc.go
@@ -0,0 +1,127 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// DO NOT EDIT. This file is generated by mksyntaxgo from the RE2 distribution.
+
+/*
+Package syntax parses regular expressions into parse trees and compiles
+parse trees into programs. Most clients of regular expressions will use the
+facilities of package regexp (such as Compile and Match) instead of this package.
+
+Syntax
+
+The regular expression syntax understood by this package when parsing with the Perl flag is as follows.
+Parts of the syntax can be disabled by passing alternate flags to Parse.
+
+
+Single characters:
+ . any character, possibly including newline (flag s=true)
+ [xyz] character class
+ [^xyz] negated character class
+ \d Perl character class
+ \D negated Perl character class
+ [:alpha:] ASCII character class
+ [:^alpha:] negated ASCII character class
+ \pN Unicode character class (one-letter name)
+ \p{Greek} Unicode character class
+ \PN negated Unicode character class (one-letter name)
+ \P{Greek} negated Unicode character class
+
+Composites:
+ xy x followed by y
+ x|y x or y (prefer x)
+
+Repetitions:
+ x* zero or more x, prefer more
+ x+ one or more x, prefer more
+ x? zero or one x, prefer one
+ x{n,m} n or n+1 or ... or m x, prefer more
+ x{n,} n or more x, prefer more
+ x{n} exactly n x
+ x*? zero or more x, prefer fewer
+ x+? one or more x, prefer fewer
+ x?? zero or one x, prefer zero
+ x{n,m}? n or n+1 or ... or m x, prefer fewer
+ x{n,}? n or more x, prefer fewer
+ x{n}? exactly n x
+
+Grouping:
+ (re) numbered capturing group
+ (?P<name>re) named & numbered capturing group
+ (?:re) non-capturing group
+ (?flags) set flags within current group; non-capturing
+ (?flags:re) set flags during re; non-capturing
+
+ Flag syntax is xyz (set) or -xyz (clear) or xy-z (set xy, clear z). The flags are:
+
+ i case-insensitive (default false)
+ m multi-line mode: ^ and $ match begin/end line in addition to begin/end text (default false)
+ s let . match \n (default false)
+ U ungreedy: swap meaning of x* and x*?, x+ and x+?, etc (default false)
+
+Empty strings:
+ ^ at beginning of text or line (flag m=true)
+ $ at end of text (like \z not \Z) or line (flag m=true)
+ \A at beginning of text
+ \b at word boundary (\w on one side and \W, \A, or \z on the other)
+ \B not a word boundary
+ \z at end of text
+
+Escape sequences:
+ \a bell (== \007)
+ \f form feed (== \014)
+ \t horizontal tab (== \011)
+ \n newline (== \012)
+ \r carriage return (== \015)
+ \v vertical tab character (== \013)
+ \* literal *, for any punctuation character *
+ \123 octal character code (up to three digits)
+ \x7F hex character code (exactly two digits)
+ \x{10FFFF} hex character code
+ \Q...\E literal text ... even if ... has punctuation
+
+Character class elements:
+ x single character
+ A-Z character range (inclusive)
+ \d Perl character class
+ [:foo:] ASCII character class foo
+ \p{Foo} Unicode character class Foo
+ \pF Unicode character class F (one-letter name)
+
+Named character classes as character class elements:
+ [\d] digits (== \d)
+ [^\d] not digits (== \D)
+ [\D] not digits (== \D)
+ [^\D] not not digits (== \d)
+ [[:name:]] named ASCII class inside character class (== [:name:])
+ [^[:name:]] named ASCII class inside negated character class (== [:^name:])
+ [\p{Name}] named Unicode property inside character class (== \p{Name})
+ [^\p{Name}] named Unicode property inside negated character class (== \P{Name})
+
+Perl character classes:
+ \d digits (== [0-9])
+ \D not digits (== [^0-9])
+ \s whitespace (== [\t\n\f\r ])
+ \S not whitespace (== [^\t\n\f\r ])
+ \w word characters (== [0-9A-Za-z_])
+ \W not word characters (== [^0-9A-Za-z_])
+
+ASCII character classes:
+ [:alnum:] alphanumeric (== [0-9A-Za-z])
+ [:alpha:] alphabetic (== [A-Za-z])
+ [:ascii:] ASCII (== [\x00-\x7F])
+ [:blank:] blank (== [\t ])
+ [:cntrl:] control (== [\x00-\x1F\x7F])
+ [:digit:] digits (== [0-9])
+ [:graph:] graphical (== [!-~] == [A-Za-z0-9!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~])
+ [:lower:] lower case (== [a-z])
+ [:print:] printable (== [ -~] == [ [:graph:]])
+ [:punct:] punctuation (== [!-/:-@[-`{-~])
+ [:space:] whitespace (== [\t\n\v\f\r ])
+ [:upper:] upper case (== [A-Z])
+ [:word:] word characters (== [0-9A-Za-z_])
+ [:xdigit:] hex digit (== [0-9A-Fa-f])
+
+*/
+package syntax
diff --git a/src/pkg/regexp/syntax/parse.go b/src/pkg/regexp/syntax/parse.go
index 4924e9453..30e0e8b7f 100644
--- a/src/pkg/regexp/syntax/parse.go
+++ b/src/pkg/regexp/syntax/parse.go
@@ -2,10 +2,6 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// Package syntax parses regular expressions into parse trees and compiles
-// parse trees into programs. Most clients of regular expressions will use
-// the facilities of package regexp (such as Compile and Match) instead of
-// this package.
package syntax
import (
@@ -46,11 +42,9 @@ const (
ErrMissingParen ErrorCode = "missing closing )"
ErrMissingRepeatArgument ErrorCode = "missing argument to repetition operator"
ErrTrailingBackslash ErrorCode = "trailing backslash at end of expression"
+ ErrUnexpectedParen ErrorCode = "unexpected )"
)
-// TODO: Export for Go 1.1.
-const errUnexpectedParen ErrorCode = "unexpected )"
-
func (e ErrorCode) String() string {
return string(e)
}
@@ -470,7 +464,7 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp {
// Construct factored form: prefix(suffix1|suffix2|...)
prefix := first
for j := start; j < i; j++ {
- reuse := j != start // prefix came from sub[start]
+ reuse := j != start // prefix came from sub[start]
sub[j] = p.removeLeadingRegexp(sub[j], reuse)
}
suffix := p.collapse(sub[start:i], OpAlternate) // recurse
@@ -1171,13 +1165,13 @@ func (p *parser) parseRightParen() error {
n := len(p.stack)
if n < 2 {
- return &Error{errUnexpectedParen, p.wholeRegexp}
+ return &Error{ErrUnexpectedParen, p.wholeRegexp}
}
re1 := p.stack[n-1]
re2 := p.stack[n-2]
p.stack = p.stack[:n-2]
if re2.Op != opLeftParen {
- return &Error{errUnexpectedParen, p.wholeRegexp}
+ return &Error{ErrUnexpectedParen, p.wholeRegexp}
}
// Restore flags at time of paren.
p.flags = re2.Flags