Imported Upstream version 2011.01.12upstream/2011.01.12

author: Ondřej Surý <ondrej@sury.org> 2011-01-17 12:40:45 +0100
committer: Ondřej Surý <ondrej@sury.org> 2011-01-17 12:40:45 +0100
commit: 3e45412327a2654a77944249962b3652e6142299 (patch)
tree: bc3bf69452afa055423cbe0c5cfa8ca357df6ccf /src/pkg/regexp
parent: c533680039762cacbc37db8dc7eed074c3e497be (diff)
download: golang-3e45412327a2654a77944249962b3652e6142299.tar.gz
4 files changed, 1145 insertions, 745 deletions
diff --git a/src/pkg/regexp/Makefile b/src/pkg/regexp/Makefile
index 9f91c8e7a..9024e66da 100644
--- a/src/pkg/regexp/Makefile
+++ b/src/pkg/regexp/Makefile
@@ -2,7 +2,7 @@
 # Use of this source code is governed by a BSD-style
 # license that can be found in the LICENSE file.
 
-include ../../Make.$(GOARCH)
+include ../../Make.inc
 
 TARG=regexp
 GOFILES=\
diff --git a/src/pkg/regexp/all_test.go b/src/pkg/regexp/all_test.go
index 4bdd6c67e..3b2c489bc 100644
--- a/src/pkg/regexp/all_test.go
+++ b/src/pkg/regexp/all_test.go
@@ -37,80 +37,18 @@ type stringError struct {
 }
 
 var bad_re = []stringError{
-	stringError{`*`, ErrBareClosure},
-	stringError{`(abc`, ErrUnmatchedLpar},
-	stringError{`abc)`, ErrUnmatchedRpar},
-	stringError{`x[a-z`, ErrUnmatchedLbkt},
-	stringError{`abc]`, ErrUnmatchedRbkt},
-	stringError{`[z-a]`, ErrBadRange},
-	stringError{`abc\`, ErrExtraneousBackslash},
-	stringError{`a**`, ErrBadClosure},
-	stringError{`a*+`, ErrBadClosure},
-	stringError{`a??`, ErrBadClosure},
-	stringError{`*`, ErrBareClosure},
-	stringError{`\x`, ErrBadBackslash},
-}
-
-type vec []int
-
-type tester struct {
-	re    string
-	text  string
-	match vec
-}
-
-var matches = []tester{
-	tester{`^abcdefg`, "abcdefg", vec{0, 7}},
-	tester{`a+`, "baaab", vec{1, 4}},
-	tester{"abcd..", "abcdef", vec{0, 6}},
-	tester{``, "", vec{0, 0}},
-	tester{`a`, "a", vec{0, 1}},
-	tester{`x`, "y", vec{}},
-	tester{`b`, "abc", vec{1, 2}},
-	tester{`.`, "a", vec{0, 1}},
-	tester{`.*`, "abcdef", vec{0, 6}},
-	tester{`^`, "abcde", vec{0, 0}},
-	tester{`$`, "abcde", vec{5, 5}},
-	tester{`^abcd$`, "abcd", vec{0, 4}},
-	tester{`^bcd'`, "abcdef", vec{}},
-	tester{`^abcd$`, "abcde", vec{}},
-	tester{`a+`, "baaab", vec{1, 4}},
-	tester{`a*`, "baaab", vec{0, 0}},
-	tester{`[a-z]+`, "abcd", vec{0, 4}},
-	tester{`[^a-z]+`, "ab1234cd", vec{2, 6}},
-	tester{`[a\-\]z]+`, "az]-bcz", vec{0, 4}},
-	tester{`[^\n]+`, "abcd\n", vec{0, 4}},
-	tester{`[日本語]+`, "日本語日本語", vec{0, 18}},
-	tester{`日本語+`, "日本語", vec{0, 9}},
-	tester{`日本語+`, "日本語語語語", vec{0, 18}},
-	tester{`()`, "", vec{0, 0, 0, 0}},
-	tester{`(a)`, "a", vec{0, 1, 0, 1}},
-	tester{`(.)(.)`, "日a", vec{0, 4, 0, 3, 3, 4}},
-	tester{`(.*)`, "", vec{0, 0, 0, 0}},
-	tester{`(.*)`, "abcd", vec{0, 4, 0, 4}},
-	tester{`(..)(..)`, "abcd", vec{0, 4, 0, 2, 2, 4}},
-	tester{`(([^xyz]*)(d))`, "abcd", vec{0, 4, 0, 4, 0, 3, 3, 4}},
-	tester{`((a|b|c)*(d))`, "abcd", vec{0, 4, 0, 4, 2, 3, 3, 4}},
-	tester{`(((a|b|c)*)(d))`, "abcd", vec{0, 4, 0, 4, 0, 3, 2, 3, 3, 4}},
-	tester{`a*(|(b))c*`, "aacc", vec{0, 4, 2, 2, -1, -1}},
-	tester{`(.*).*`, "ab", vec{0, 2, 0, 2}},
-	tester{`[.]`, ".", vec{0, 1}},
-	tester{`/$`, "/abc/", vec{4, 5}},
-	tester{`/$`, "/abc", vec{}},
-
-	// fixed bugs
-	tester{`ab$`, "cab", vec{1, 3}},
-	tester{`axxb$`, "axxcb", vec{}},
-	tester{`data`, "daXY data", vec{5, 9}},
-	tester{`da(.)a$`, "daXY data", vec{5, 9, 7, 8}},
-
-	// can backslash-escape any punctuation
-	tester{`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
-		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, vec{0, 31}},
-	tester{`[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~]+`,
-		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, vec{0, 31}},
-	tester{"\\`", "`", vec{0, 1}},
-	tester{"[\\`]+", "`", vec{0, 1}},
+	{`*`, ErrBareClosure},
+	{`(abc`, ErrUnmatchedLpar},
+	{`abc)`, ErrUnmatchedRpar},
+	{`x[a-z`, ErrUnmatchedLbkt},
+	{`abc]`, ErrUnmatchedRbkt},
+	{`[z-a]`, ErrBadRange},
+	{`abc\`, ErrExtraneousBackslash},
+	{`a**`, ErrBadClosure},
+	{`a*+`, ErrBadClosure},
+	{`a??`, ErrBadClosure},
+	{`*`, ErrBareClosure},
+	{`\x`, ErrBadBackslash},
 }
 
 func compileTest(t *testing.T, expr string, error os.Error) *Regexp {
@@ -121,66 +59,6 @@ func compileTest(t *testing.T, expr string, error os.Error) *Regexp {
 	return re
 }
 
-func printVec(t *testing.T, m []int) {
-	l := len(m)
-	if l == 0 {
-		t.Log("\t<no match>")
-	} else {
-		if m[len(m)-1] == -1 {
-			m = m[0 : len(m)-2]
-		}
-		t.Log("\t", m)
-	}
-}
-
-func equal(m1, m2 []int) bool {
-	l := len(m1)
-	if l != len(m2) {
-		return false
-	}
-	for i := 0; i < l; i++ {
-		if m1[i] != m2[i] {
-			return false
-		}
-	}
-	return true
-}
-
-func equalStrings(m1, m2 []string) bool {
-	l := len(m1)
-	if l != len(m2) {
-		return false
-	}
-	for i := 0; i < l; i++ {
-		if m1[i] != m2[i] {
-			return false
-		}
-	}
-	return true
-}
-
-func executeTest(t *testing.T, expr string, str string, match []int) {
-	re := compileTest(t, expr, nil)
-	if re == nil {
-		return
-	}
-	m := re.ExecuteString(str)
-	if !equal(m, match) {
-		t.Errorf("ExecuteString failure on %#q matching %q:", expr, str)
-		printVec(t, m)
-		t.Log("should be:")
-		printVec(t, match)
-	}
-	// now try bytes
-	m = re.Execute([]byte(str))
-	if !equal(m, match) {
-		t.Errorf("Execute failure on %#q matching %q:", expr, str)
-		printVec(t, m)
-		t.Log("should be:")
-		printVec(t, match)
-	}
-}
-
 func TestGoodCompile(t *testing.T) {
 	for i := 0; i < len(good_re); i++ {
 		compileTest(t, good_re[i], nil)
@@ -193,57 +71,41 @@ func TestBadCompile(t *testing.T) {
 	}
 }
 
-func TestExecute(t *testing.T) {
-	for i := 0; i < len(matches); i++ {
-		test := &matches[i]
-		executeTest(t, test.re, test.text, test.match)
-	}
-}
-
-func matchTest(t *testing.T, expr string, str string, match []int) {
-	re := compileTest(t, expr, nil)
+func matchTest(t *testing.T, test *FindTest) {
+	re := compileTest(t, test.pat, nil)
 	if re == nil {
 		return
 	}
-	m := re.MatchString(str)
-	if m != (len(match) > 0) {
-		t.Errorf("MatchString failure on %#q matching %q: %t should be %t", expr, str, m, len(match) > 0)
+	m := re.MatchString(test.text)
+	if m != (len(test.matches) > 0) {
+		t.Errorf("MatchString failure on %s: %t should be %t", test, m, len(test.matches) > 0)
 	}
 	// now try bytes
-	m = re.Match([]byte(str))
-	if m != (len(match) > 0) {
-		t.Errorf("Match failure on %#q matching %q: %t should be %t", expr, str, m, len(match) > 0)
+	m = re.Match([]byte(test.text))
+	if m != (len(test.matches) > 0) {
+		t.Errorf("Match failure on %s: %t should be %t", test, m, len(test.matches) > 0)
 	}
 }
 
 func TestMatch(t *testing.T) {
-	for i := 0; i < len(matches); i++ {
-		test := &matches[i]
-		matchTest(t, test.re, test.text, test.match)
+	for _, test := range findTests {
+		matchTest(t, &test)
 	}
 }
 
-func TestMatchStrings(t *testing.T) {
-	for i := 0; i < len(matches); i++ {
-		test := &matches[i]
-		matchTest(t, test.re, test.text, test.match)
-	}
-}
-
-func matchFunctionTest(t *testing.T, expr string, str string, match []int) {
-	m, err := MatchString(expr, str)
+func matchFunctionTest(t *testing.T, test *FindTest) {
+	m, err := MatchString(test.pat, test.text)
 	if err == nil {
 		return
 	}
-	if m != (len(match) > 0) {
-		t.Errorf("Match failure on %#q matching %q: %d should be %d", expr, str, m, len(match) > 0)
+	if m != (len(test.matches) > 0) {
+		t.Errorf("Match failure on %s: %t should be %t", test, m, len(test.matches) > 0)
 	}
 }
 
 func TestMatchFunction(t *testing.T) {
-	for i := 0; i < len(matches); i++ {
-		test := &matches[i]
-		matchFunctionTest(t, test.re, test.text, test.match)
+	for _, test := range findTests {
+		matchFunctionTest(t, &test)
 	}
 }
 
@@ -253,64 +115,64 @@ type ReplaceTest struct {
 
 var replaceTests = []ReplaceTest{
 	// Test empty input and/or replacement, with pattern that matches the empty string.
-	ReplaceTest{"", "", "", ""},
-	ReplaceTest{"", "x", "", "x"},
-	ReplaceTest{"", "", "abc", "abc"},
-	ReplaceTest{"", "x", "abc", "xaxbxcx"},
+	{"", "", "", ""},
+	{"", "x", "", "x"},
+	{"", "", "abc", "abc"},
+	{"", "x", "abc", "xaxbxcx"},
 
 	// Test empty input and/or replacement, with pattern that does not match the empty string.
-	ReplaceTest{"b", "", "", ""},
-	ReplaceTest{"b", "x", "", ""},
-	ReplaceTest{"b", "", "abc", "ac"},
-	ReplaceTest{"b", "x", "abc", "axc"},
-	ReplaceTest{"y", "", "", ""},
-	ReplaceTest{"y", "x", "", ""},
-	ReplaceTest{"y", "", "abc", "abc"},
-	ReplaceTest{"y", "x", "abc", "abc"},
+	{"b", "", "", ""},
+	{"b", "x", "", ""},
+	{"b", "", "abc", "ac"},
+	{"b", "x", "abc", "axc"},
+	{"y", "", "", ""},
+	{"y", "x", "", ""},
+	{"y", "", "abc", "abc"},
+	{"y", "x", "abc", "abc"},
 
 	// Multibyte characters -- verify that we don't try to match in the middle
 	// of a character.
-	ReplaceTest{"[a-c]*", "x", "\u65e5", "x\u65e5x"},
-	ReplaceTest{"[^\u65e5]", "x", "abc\u65e5def", "xxx\u65e5xxx"},
+	{"[a-c]*", "x", "\u65e5", "x\u65e5x"},
+	{"[^\u65e5]", "x", "abc\u65e5def", "xxx\u65e5xxx"},
 
 	// Start and end of a string.
-	ReplaceTest{"^[a-c]*", "x", "abcdabc", "xdabc"},
-	ReplaceTest{"[a-c]*$", "x", "abcdabc", "abcdx"},
-	ReplaceTest{"^[a-c]*$", "x", "abcdabc", "abcdabc"},
-	ReplaceTest{"^[a-c]*", "x", "abc", "x"},
-	ReplaceTest{"[a-c]*$", "x", "abc", "x"},
-	ReplaceTest{"^[a-c]*$", "x", "abc", "x"},
-	ReplaceTest{"^[a-c]*", "x", "dabce", "xdabce"},
-	ReplaceTest{"[a-c]*$", "x", "dabce", "dabcex"},
-	ReplaceTest{"^[a-c]*$", "x", "dabce", "dabce"},
-	ReplaceTest{"^[a-c]*", "x", "", "x"},
-	ReplaceTest{"[a-c]*$", "x", "", "x"},
-	ReplaceTest{"^[a-c]*$", "x", "", "x"},
-
-	ReplaceTest{"^[a-c]+", "x", "abcdabc", "xdabc"},
-	ReplaceTest{"[a-c]+$", "x", "abcdabc", "abcdx"},
-	ReplaceTest{"^[a-c]+$", "x", "abcdabc", "abcdabc"},
-	ReplaceTest{"^[a-c]+", "x", "abc", "x"},
-	ReplaceTest{"[a-c]+$", "x", "abc", "x"},
-	ReplaceTest{"^[a-c]+$", "x", "abc", "x"},
-	ReplaceTest{"^[a-c]+", "x", "dabce", "dabce"},
-	ReplaceTest{"[a-c]+$", "x", "dabce", "dabce"},
-	ReplaceTest{"^[a-c]+$", "x", "dabce", "dabce"},
-	ReplaceTest{"^[a-c]+", "x", "", ""},
-	ReplaceTest{"[a-c]+$", "x", "", ""},
-	ReplaceTest{"^[a-c]+$", "x", "", ""},
+	{"^[a-c]*", "x", "abcdabc", "xdabc"},
+	{"[a-c]*$", "x", "abcdabc", "abcdx"},
+	{"^[a-c]*$", "x", "abcdabc", "abcdabc"},
+	{"^[a-c]*", "x", "abc", "x"},
+	{"[a-c]*$", "x", "abc", "x"},
+	{"^[a-c]*$", "x", "abc", "x"},
+	{"^[a-c]*", "x", "dabce", "xdabce"},
+	{"[a-c]*$", "x", "dabce", "dabcex"},
+	{"^[a-c]*$", "x", "dabce", "dabce"},
+	{"^[a-c]*", "x", "", "x"},
+	{"[a-c]*$", "x", "", "x"},
+	{"^[a-c]*$", "x", "", "x"},
+
+	{"^[a-c]+", "x", "abcdabc", "xdabc"},
+	{"[a-c]+$", "x", "abcdabc", "abcdx"},
+	{"^[a-c]+$", "x", "abcdabc", "abcdabc"},
+	{"^[a-c]+", "x", "abc", "x"},
+	{"[a-c]+$", "x", "abc", "x"},
+	{"^[a-c]+$", "x", "abc", "x"},
+	{"^[a-c]+", "x", "dabce", "dabce"},
+	{"[a-c]+$", "x", "dabce", "dabce"},
+	{"^[a-c]+$", "x", "dabce", "dabce"},
+	{"^[a-c]+", "x", "", ""},
+	{"[a-c]+$", "x", "", ""},
+	{"^[a-c]+$", "x", "", ""},
 
 	// Other cases.
-	ReplaceTest{"abc", "def", "abcdefg", "defdefg"},
-	ReplaceTest{"bc", "BC", "abcbcdcdedef", "aBCBCdcdedef"},
-	ReplaceTest{"abc", "", "abcdabc", "d"},
-	ReplaceTest{"x", "xXx", "xxxXxxx", "xXxxXxxXxXxXxxXxxXx"},
-	ReplaceTest{"abc", "d", "", ""},
-	ReplaceTest{"abc", "d", "abc", "d"},
-	ReplaceTest{".+", "x", "abc", "x"},
-	ReplaceTest{"[a-c]*", "x", "def", "xdxexfx"},
-	ReplaceTest{"[a-c]+", "x", "abcbcdcdedef", "xdxdedef"},
-	ReplaceTest{"[a-c]*", "x", "abcbcdcdedef", "xdxdxexdxexfx"},
+	{"abc", "def", "abcdefg", "defdefg"},
+	{"bc", "BC", "abcbcdcdedef", "aBCBCdcdedef"},
+	{"abc", "", "abcdabc", "d"},
+	{"x", "xXx", "xxxXxxx", "xXxxXxxXxXxXxxXxxXx"},
+	{"abc", "d", "", ""},
+	{"abc", "d", "abc", "d"},
+	{".+", "x", "abc", "x"},
+	{"[a-c]*", "x", "def", "xdxexfx"},
+	{"[a-c]+", "x", "abcbcdcdedef", "xdxdedef"},
+	{"[a-c]*", "x", "abcbcdcdedef", "xdxdxexdxexfx"},
 }
 
 type ReplaceFuncTest struct {
@@ -320,9 +182,9 @@ type ReplaceFuncTest struct {
 }
 
 var replaceFuncTests = []ReplaceFuncTest{
-	ReplaceFuncTest{"[a-c]", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxayxbyxcydef"},
-	ReplaceFuncTest{"[a-c]+", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxabcydef"},
-	ReplaceFuncTest{"[a-c]*", func(s string) string { return "x" + s + "y" }, "defabcdef", "xydxyexyfxabcydxyexyfxy"},
+	{"[a-c]", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxayxbyxcydef"},
+	{"[a-c]+", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxabcydef"},
+	{"[a-c]*", func(s string) string { return "x" + s + "y" }, "defabcdef", "xydxyexyfxabcydxyexyfxy"},
 }
 
 func TestReplaceAll(t *testing.T) {
@@ -367,18 +229,21 @@ func TestReplaceAllFunc(t *testing.T) {
 	}
 }
 
-type QuoteMetaTest struct {
-	pattern, output string
+type MetaTest struct {
+	pattern, output, literal string
+	isLiteral                bool
 }
 
-var quoteMetaTests = []QuoteMetaTest{
-	QuoteMetaTest{``, ``},
-	QuoteMetaTest{`foo`, `foo`},
-	QuoteMetaTest{`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[{\]}\\\|,<\.>/\?~`},
+var metaTests = []MetaTest{
+	{``, ``, ``, true},
+	{`foo`, `foo`, `foo`, true},
+	{`foo\.\$`, `foo\\\.\\\$`, `foo.$`, true}, // has meta but no operator
+	{`foo.\$`, `foo\.\\\$`, `foo`, false},     // has escaped operators and real operators
+	{`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[{\]}\\\|,<\.>/\?~`, `!@#`, false},
 }
 
 func TestQuoteMeta(t *testing.T) {
-	for _, tc := range quoteMetaTests {
+	for _, tc := range metaTests {
 		// Verify that QuoteMeta returns the expected string.
 		quoted := QuoteMeta(tc.pattern)
 		if quoted != tc.output {
@@ -407,96 +272,16 @@ func TestQuoteMeta(t *testing.T) {
 	}
 }
 
-type matchCase struct {
-	matchfunc string
-	input     string
-	n         int
-	regexp    string
-	expected  []string
-}
-
-var matchCases = []matchCase{
-	matchCase{"match", " aa b", 0, "[^ ]+", []string{"aa", "b"}},
-	matchCase{"match", " aa b", 0, "[^ ]*", []string{"", "aa", "b"}},
-	matchCase{"match", "a b c", 0, "[^ ]*", []string{"a", "b", "c"}},
-	matchCase{"match", "a:a: a:", 0, "^.:", []string{"a:"}},
-	matchCase{"match", "", 0, "[^ ]*", []string{""}},
-	matchCase{"match", "", 0, "", []string{""}},
-	matchCase{"match", "a", 0, "", []string{"", ""}},
-	matchCase{"match", "ab", 0, "^", []string{""}},
-	matchCase{"match", "ab", 0, "$", []string{""}},
-	matchCase{"match", "ab", 0, "X*", []string{"", "", ""}},
-	matchCase{"match", "aX", 0, "X*", []string{"", "X"}},
-	matchCase{"match", "XabX", 0, "X*", []string{"X", "", "X"}},
-
-	matchCase{"matchit", "", 0, ".", []string{}},
-	matchCase{"matchit", "abc", 2, ".", []string{"a", "b"}},
-	matchCase{"matchit", "abc", 0, ".", []string{"a", "b", "c"}},
-}
-
-func printStringSlice(t *testing.T, s []string) {
-	t.Logf("%#v", s)
-}
-
-func TestAllMatches(t *testing.T) {
-	ch := make(chan matchCase)
-	go func() {
-		for _, c := range matchCases {
-			ch <- c
-			stringCase := matchCase{
-				"string" + c.matchfunc,
-				c.input,
-				c.n,
-				c.regexp,
-				c.expected,
-			}
-			ch <- stringCase
-		}
-		close(ch)
-	}()
-
-	for c := range ch {
-		var result []string
-		re, _ := Compile(c.regexp)
-
-		switch c.matchfunc {
-		case "matchit":
-			result = make([]string, len(c.input)+1)
-			i := 0
-			b := []byte(c.input)
-			for match := range re.AllMatchesIter(b, c.n) {
-				result[i] = string(match)
-				i++
-			}
-			result = result[0:i]
-		case "stringmatchit":
-			result = make([]string, len(c.input)+1)
-			i := 0
-			for match := range re.AllMatchesStringIter(c.input, c.n) {
-				result[i] = match
-				i++
-			}
-			result = result[0:i]
-		case "match":
-			result = make([]string, len(c.input)+1)
-			b := []byte(c.input)
-			i := 0
-			for _, match := range re.AllMatches(b, c.n) {
-				result[i] = string(match)
-				i++
-			}
-			result = result[0:i]
-		case "stringmatch":
-			result = re.AllMatchesString(c.input, c.n)
+func TestLiteralPrefix(t *testing.T) {
+	for _, tc := range metaTests {
+		// Literal method needs to scan the pattern.
+		re := MustCompile(tc.pattern)
+		str, complete := re.LiteralPrefix()
+		if complete != tc.isLiteral {
+			t.Errorf("LiteralPrefix(`%s`) = %t; want %t", tc.pattern, complete, tc.isLiteral)
 		}
-
-		if !equalStrings(result, c.expected) {
-			t.Errorf("testing '%s'.%s('%s', %d), expected: ",
-				c.regexp, c.matchfunc, c.input, c.n)
-			printStringSlice(t, c.expected)
-			t.Log("got: ")
-			printStringSlice(t, result)
-			t.Log("\n")
+		if str != tc.literal {
+			t.Errorf("LiteralPrefix(`%s`) = `%s`; want `%s`", tc.pattern, str, tc.literal)
 		}
 	}
 }
@@ -507,16 +292,16 @@ type numSubexpCase struct {
 }
 
 var numSubexpCases = []numSubexpCase{
-	numSubexpCase{``, 0},
-	numSubexpCase{`.*`, 0},
-	numSubexpCase{`abba`, 0},
-	numSubexpCase{`ab(b)a`, 1},
-	numSubexpCase{`ab(.*)a`, 1},
-	numSubexpCase{`(.*)ab(.*)a`, 2},
-	numSubexpCase{`(.*)(ab)(.*)a`, 3},
-	numSubexpCase{`(.*)((a)b)(.*)a`, 4},
-	numSubexpCase{`(.*)(\(ab)(.*)a`, 3},
-	numSubexpCase{`(.*)(\(a\)b)(.*)a`, 3},
+	{``, 0},
+	{`.*`, 0},
+	{`abba`, 0},
+	{`ab(b)a`, 1},
+	{`ab(.*)a`, 1},
+	{`(.*)ab(.*)a`, 2},
+	{`(.*)(ab)(.*)a`, 3},
+	{`(.*)((a)b)(.*)a`, 4},
+	{`(.*)(\(ab)(.*)a`, 3},
+	{`(.*)(\(a\)b)(.*)a`, 3},
 }
 
 func TestNumSubexp(t *testing.T) {
@@ -592,3 +377,49 @@ func BenchmarkReplaceAll(b *testing.B) {
 		re.ReplaceAllString(x, "")
 	}
 }
+
+func BenchmarkAnchoredLiteralShortNonMatch(b *testing.B) {
+	b.StopTimer()
+	x := []byte("abcdefghijklmnopqrstuvwxyz")
+	re := MustCompile("^zbc(d|e)")
+	b.StartTimer()
+	for i := 0; i < b.N; i++ {
+		re.Match(x)
+	}
+}
+
+func BenchmarkAnchoredLiteralLongNonMatch(b *testing.B) {
+	b.StopTimer()
+	x := []byte("abcdefghijklmnopqrstuvwxyz")
+	for i := 0; i < 15; i++ {
+		x = append(x, x...)
+	}
+	re := MustCompile("^zbc(d|e)")
+	b.StartTimer()
+	for i := 0; i < b.N; i++ {
+		re.Match(x)
+	}
+}
+
+func BenchmarkAnchoredShortMatch(b *testing.B) {
+	b.StopTimer()
+	x := []byte("abcdefghijklmnopqrstuvwxyz")
+	re := MustCompile("^.bc(d|e)")
+	b.StartTimer()
+	for i := 0; i < b.N; i++ {
+		re.Match(x)
+	}
+}
+
+func BenchmarkAnchoredLongMatch(b *testing.B) {
+	b.StopTimer()
+	x := []byte("abcdefghijklmnopqrstuvwxyz")
+	for i := 0; i < 15; i++ {
+		x = append(x, x...)
+	}
+	re := MustCompile("^.bc(d|e)")
+	b.StartTimer()
+	for i := 0; i < b.N; i++ {
+		re.Match(x)
+	}
+}
diff --git a/src/pkg/regexp/find_test.go b/src/pkg/regexp/find_test.go
new file mode 100644
index 000000000..1690711dd
--- /dev/null
+++ b/src/pkg/regexp/find_test.go
@@ -0,0 +1,459 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package regexp
+
+import (
+	"fmt"
+	"testing"
+)
+
+// For each pattern/text pair, what is the expected output of each function?
+// We can derive the textual results from the indexed results, the non-submatch
+// results from the submatched results, the single results from the 'all' results,
+// and the byte results from the string results. Therefore the table includes
+// only the FindAllStringSubmatchIndex result.
+type FindTest struct {
+	pat     string
+	text    string
+	matches [][]int
+}
+
+func (t FindTest) String() string {
+	return fmt.Sprintf("pat: %#q text: %#q", t.pat, t.text)
+}
+
+var findTests = []FindTest{
+	{``, ``, build(1, 0, 0)},
+	{`^abcdefg`, "abcdefg", build(1, 0, 7)},
+	{`a+`, "baaab", build(1, 1, 4)},
+	{"abcd..", "abcdef", build(1, 0, 6)},
+	{`a`, "a", build(1, 0, 1)},
+	{`x`, "y", nil},
+	{`b`, "abc", build(1, 1, 2)},
+	{`.`, "a", build(1, 0, 1)},
+	{`.*`, "abcdef", build(1, 0, 6)},
+	{`^`, "abcde", build(1, 0, 0)},
+	{`$`, "abcde", build(1, 5, 5)},
+	{`^abcd$`, "abcd", build(1, 0, 4)},
+	{`^bcd'`, "abcdef", nil},
+	{`^abcd$`, "abcde", nil},
+	{`a+`, "baaab", build(1, 1, 4)},
+	{`a*`, "baaab", build(3, 0, 0, 1, 4, 5, 5)},
+	{`[a-z]+`, "abcd", build(1, 0, 4)},
+	{`[^a-z]+`, "ab1234cd", build(1, 2, 6)},
+	{`[a\-\]z]+`, "az]-bcz", build(2, 0, 4, 6, 7)},
+	{`[^\n]+`, "abcd\n", build(1, 0, 4)},
+	{`[日本語]+`, "日本語日本語", build(1, 0, 18)},
+	{`日本語+`, "日本語", build(1, 0, 9)},
+	{`日本語+`, "日本語語語語", build(1, 0, 18)},
+	{`()`, "", build(1, 0, 0, 0, 0)},
+	{`(a)`, "a", build(1, 0, 1, 0, 1)},
+	{`(.)(.)`, "日a", build(1, 0, 4, 0, 3, 3, 4)},
+	{`(.*)`, "", build(1, 0, 0, 0, 0)},
+	{`(.*)`, "abcd", build(1, 0, 4, 0, 4)},
+	{`(..)(..)`, "abcd", build(1, 0, 4, 0, 2, 2, 4)},
+	{`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)},
+	{`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)},
+	{`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)},
+	{`\a\b\f\n\r\t\v`, "\a\b\f\n\r\t\v", build(1, 0, 7)},
+	{`[\a\b\f\n\r\t\v]+`, "\a\b\f\n\r\t\v", build(1, 0, 7)},
+
+	{`a*(|(b))c*`, "aacc", build(1, 0, 4, 2, 2, -1, -1)},
+	{`(.*).*`, "ab", build(1, 0, 2, 0, 2)},
+	{`[.]`, ".", build(1, 0, 1)},
+	{`/$`, "/abc/", build(1, 4, 5)},
+	{`/$`, "/abc", nil},
+
+	// multiple matches
+	{`.`, "abc", build(3, 0, 1, 1, 2, 2, 3)},
+	{`(.)`, "abc", build(3, 0, 1, 0, 1, 1, 2, 1, 2, 2, 3, 2, 3)},
+	{`.(.)`, "abcd", build(2, 0, 2, 1, 2, 2, 4, 3, 4)},
+	{`ab*`, "abbaab", build(3, 0, 3, 3, 4, 4, 6)},
+	{`a(b*)`, "abbaab", build(3, 0, 3, 1, 3, 3, 4, 4, 4, 4, 6, 5, 6)},
+
+	// fixed bugs
+	{`ab$`, "cab", build(1, 1, 3)},
+	{`axxb$`, "axxcb", nil},
+	{`data`, "daXY data", build(1, 5, 9)},
+	{`da(.)a$`, "daXY data", build(1, 5, 9, 7, 8)},
+	{`zx+`, "zzx", build(1, 1, 3)},
+
+	// can backslash-escape any punctuation
+	{`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
+		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
+	{`[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~]+`,
+		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
+	{"\\`", "`", build(1, 0, 1)},
+	{"[\\`]+", "`", build(1, 0, 1)},
+
+	// long set of matches (longer than startSize)
+	{
+		".",
+		"qwertyuiopasdfghjklzxcvbnm1234567890",
+		build(36, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
+			10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20,
+			20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30,
+			30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36),
+	},
+}
+
+// build is a helper to construct a [][]int by extracting n sequences from x.
+// This represents n matches with len(x)/n submatches each.
+func build(n int, x ...int) [][]int {
+	ret := make([][]int, n)
+	runLength := len(x) / n
+	j := 0
+	for i := range ret {
+		ret[i] = make([]int, runLength)
+		copy(ret[i], x[j:])
+		j += runLength
+		if j > len(x) {
+			panic("invalid build entry")
+		}
+	}
+	return ret
+}
+
+// First the simple cases.
+
+func TestFind(t *testing.T) {
+	for _, test := range findTests {
+		re := MustCompile(test.pat)
+		if re.String() != test.pat {
+			t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat)
+		}
+		result := re.Find([]byte(test.text))
+		switch {
+		case len(test.matches) == 0 && len(result) == 0:
+			// ok
+		case test.matches == nil && result != nil:
+			t.Errorf("expected no match; got one: %s", test)
+		case test.matches != nil && result == nil:
+			t.Errorf("expected match; got none: %s", test)
+		case test.matches != nil && result != nil:
+			expect := test.text[test.matches[0][0]:test.matches[0][1]]
+			if expect != string(result) {
+				t.Errorf("expected %q got %q: %s", expect, result, test)
+			}
+		}
+	}
+}
+
+func TestFindString(t *testing.T) {
+	for _, test := range findTests {
+		result := MustCompile(test.pat).FindString(test.text)
+		switch {
+		case len(test.matches) == 0 && len(result) == 0:
+			// ok
+		case test.matches == nil && result != "":
+			t.Errorf("expected no match; got one: %s", test)
+		case test.matches != nil && result == "":
+			// Tricky because an empty result has two meanings: no match or empty match.
+			if test.matches[0][0] != test.matches[0][1] {
+				t.Errorf("expected match; got none: %s", test)
+			}
+		case test.matches != nil && result != "":
+			expect := test.text[test.matches[0][0]:test.matches[0][1]]
+			if expect != result {
+				t.Errorf("expected %q got %q: %s", expect, result, test)
+			}
+		}
+	}
+}
+
+func testFindIndex(test *FindTest, result []int, t *testing.T) {
+	switch {
+	case len(test.matches) == 0 && len(result) == 0:
+		// ok
+	case test.matches == nil && result != nil:
+		t.Errorf("expected no match; got one: %s", test)
+	case test.matches != nil && result == nil:
+		t.Errorf("expected match; got none: %s", test)
+	case test.matches != nil && result != nil:
+		expect := test.matches[0]
+		if expect[0] != result[0] || expect[1] != result[1] {
+			t.Errorf("expected %v got %v: %s", expect, result, test)
+		}
+	}
+}
+
+func TestFindIndex(t *testing.T) {
+	for _, test := range findTests {
+		testFindIndex(&test, MustCompile(test.pat).FindIndex([]byte(test.text)), t)
+	}
+}
+
+func TestFindStringIndex(t *testing.T) {
+	for _, test := range findTests {
+		testFindIndex(&test, MustCompile(test.pat).FindStringIndex(test.text), t)
+	}
+}
+
+// Now come the simple All cases.
+
+func TestFindAll(t *testing.T) {
+	for _, test := range findTests {
+		result := MustCompile(test.pat).FindAll([]byte(test.text), -1)
+		switch {
+		case test.matches == nil && result == nil:
+			// ok
+		case test.matches == nil && result != nil:
+			t.Errorf("expected no match; got one: %s", test)
+		case test.matches != nil && result == nil:
+			t.Errorf("expected match; got none: %s", test)
+		case test.matches != nil && result != nil:
+			if len(test.matches) != len(result) {
+				t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
+				continue
+			}
+			for k, e := range test.matches {
+				expect := test.text[e[0]:e[1]]
+				if expect != string(result[k]) {
+					t.Errorf("match %d: expected %q got %q: %s", k, expect, result[k], test)
+				}
+			}
+		}
+	}
+}
+
+func TestFindAllString(t *testing.T) {
+	for _, test := range findTests {
+		result := MustCompile(test.pat).FindAllString(test.text, -1)
+		switch {
+		case test.matches == nil && result == nil:
+			// ok
+		case test.matches == nil && result != nil:
+			t.Errorf("expected no match; got one: %s", test)
+		case test.matches != nil && result == nil:
+			t.Errorf("expected match; got none: %s", test)
+		case test.matches != nil && result != nil:
+			if len(test.matches) != len(result) {
+				t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
+				continue
+			}
+			for k, e := range test.matches {
+				expect := test.text[e[0]:e[1]]
+				if expect != result[k] {
+					t.Errorf("expected %q got %q: %s", expect, result, test)
+				}
+			}
+		}
+	}
+}
+
+func testFindAllIndex(test *FindTest, result [][]int, t *testing.T) {
+	switch {
+	case test.matches == nil && result == nil:
+		// ok
+	case test.matches == nil && result != nil:
+		t.Errorf("expected no match; got one: %s", test)
+	case test.matches != nil && result == nil:
+		t.Errorf("expected match; got none: %s", test)
+	case test.matches != nil && result != nil:
+		if len(test.matches) != len(result) {
+			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
+			return
+		}
+		for k, e := range test.matches {
+			if e[0] != result[k][0] || e[1] != result[k][1] {
+				t.Errorf("match %d: expected %v got %v: %s", k, e, result[k], test)
+			}
+		}
+	}
+}
+
+func TestFindAllIndex(t *testing.T) {
+	for _, test := range findTests {
+		testFindAllIndex(&test, MustCompile(test.pat).FindAllIndex([]byte(test.text), -1), t)
+	}
+}
+
+func TestFindAllStringIndex(t *testing.T) {
+	for _, test := range findTests {
+		testFindAllIndex(&test, MustCompile(test.pat).FindAllStringIndex(test.text, -1), t)
+	}
+}
+
+// Now come the Submatch cases.
+
+func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, t *testing.T) {
+	if len(submatches) != len(result)*2 {
+		t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
+		return
+	}
+	for k := 0; k < len(submatches); k += 2 {
+		if submatches[k] == -1 {
+			if result[k/2] != nil {
+				t.Errorf("match %d: expected nil got %q: %s", n, result, test)
+			}
+			continue
+		}
+		expect := test.text[submatches[k]:submatches[k+1]]
+		if expect != string(result[k/2]) {
+			t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test)
+			return
+		}
+	}
+}
+
+func TestFindSubmatch(t *testing.T) {
+	for _, test := range findTests {
+		result := MustCompile(test.pat).FindSubmatch([]byte(test.text))
+		switch {
+		case test.matches == nil && result == nil:
+			// ok
+		case test.matches == nil && result != nil:
+			t.Errorf("expected no match; got one: %s", test)
+		case test.matches != nil && result == nil:
+			t.Errorf("expected match; got none: %s", test)
+		case test.matches != nil && result != nil:
+			testSubmatchBytes(&test, 0, test.matches[0], result, t)
+		}
+	}
+}
+
+func testSubmatchString(test *FindTest, n int, submatches []int, result []string, t *testing.T) {
+	if len(submatches) != len(result)*2 {
+		t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
+		return
+	}
+	for k := 0; k < len(submatches); k += 2 {
+		if submatches[k] == -1 {
+			if result[k/2] != "" {
+				t.Errorf("match %d: expected nil got %q: %s", n, result, test)
+			}
+			continue
+		}
+		expect := test.text[submatches[k]:submatches[k+1]]
+		if expect != result[k/2] {
+			t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test)
+			return
+		}
+	}
+}
+
+func TestFindStringSubmatch(t *testing.T) {
+	for _, test := range findTests {
+		result := MustCompile(test.pat).FindStringSubmatch(test.text)
+		switch {
+		case test.matches == nil && result == nil:
+			// ok
+		case test.matches == nil && result != nil:
+			t.Errorf("expected no match; got one: %s", test)
+		case test.matches != nil && result == nil:
+			t.Errorf("expected match; got none: %s", test)
+		case test.matches != nil && result != nil:
+			testSubmatchString(&test, 0, test.matches[0], result, t)
+		}
+	}
+}
+
+func testSubmatchIndices(test *FindTest, n int, expect, result []int, t *testing.T) {
+	if len(expect) != len(result) {
+		t.Errorf("match %d: expected %d matches; got %d: %s", n, len(expect)/2, len(result)/2, test)
+		return
+	}
+	for k, e := range expect {
+		if e != result[k] {
+			t.Errorf("match %d: submatch error: expected %v got %v: %s", n, expect, result, test)
+		}
+	}
+}
+
+func testFindSubmatchIndex(test *FindTest, result []int, t *testing.T) {
+	switch {
+	case test.matches == nil && result == nil:
+		// ok
+	case test.matches == nil && result != nil:
+		t.Errorf("expected no match; got one: %s", test)
+	case test.matches != nil && result == nil:
+		t.Errorf("expected match; got none: %s", test)
+	case test.matches != nil && result != nil:
+		testSubmatchIndices(test, 0, test.matches[0], result, t)
+	}
+}
+
+func TestFindSubmatchIndex(t *testing.T) {
+	for _, test := range findTests {
+		testFindSubmatchIndex(&test, MustCompile(test.pat).FindSubmatchIndex([]byte(test.text)), t)
+	}
+}
+
+func TestFindStringSubmatchndex(t *testing.T) {
+	for _, test := range findTests {
+		testFindSubmatchIndex(&test, MustCompile(test.pat).FindStringSubmatchIndex(test.text), t)
+	}
+}
+
+// Now come the monster AllSubmatch cases.
+
+func TestFindAllSubmatch(t *testing.T) {
+	for _, test := range findTests {
+		result := MustCompile(test.pat).FindAllSubmatch([]byte(test.text), -1)
+		switch {
+		case test.matches == nil && result == nil:
+			// ok
+		case test.matches == nil && result != nil:
+			t.Errorf("expected no match; got one: %s", test)
+		case test.matches != nil && result == nil:
+			t.Errorf("expected match; got none: %s", test)
+		case len(test.matches) != len(result):
+			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
+		case test.matches != nil && result != nil:
+			for k, match := range test.matches {
+				testSubmatchBytes(&test, k, match, result[k], t)
+			}
+		}
+	}
+}
+
+func TestFindAllStringSubmatch(t *testing.T) {
+	for _, test := range findTests {
+		result := MustCompile(test.pat).FindAllStringSubmatch(test.text, -1)
+		switch {
+		case test.matches == nil && result == nil:
+			// ok
+		case test.matches == nil && result != nil:
+			t.Errorf("expected no match; got one: %s", test)
+		case test.matches != nil && result == nil:
+			t.Errorf("expected match; got none: %s", test)
+		case len(test.matches) != len(result):
+			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
+		case test.matches != nil && result != nil:
+			for k, match := range test.matches {
+				testSubmatchString(&test, k, match, result[k], t)
+			}
+		}
+	}
+}
+
+func testFindAllSubmatchIndex(test *FindTest, result [][]int, t *testing.T) {
+	switch {
+	case test.matches == nil && result == nil:
+		// ok
+	case test.matches == nil && result != nil:
+		t.Errorf("expected no match; got one: %s", test)
+	case test.matches != nil && result == nil:
+		t.Errorf("expected match; got none: %s", test)
+	case len(test.matches) != len(result):
+		t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
+	case test.matches != nil && result != nil:
+		for k, match := range test.matches {
+			testSubmatchIndices(test, k, match, result[k], t)
+		}
+	}
+}
+
+func TestFindAllSubmatchIndex(t *testing.T) {
+	for _, test := range findTests {
+		testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllSubmatchIndex([]byte(test.text), -1), t)
+	}
+}
+
+func TestFindAllStringSubmatchndex(t *testing.T) {
+	for _, test := range findTests {
+		testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllStringSubmatchIndex(test.text, -1), t)
+	}
+}
diff --git a/src/pkg/regexp/regexp.go b/src/pkg/regexp/regexp.go
index 4dd430ea6..2e03b798a 100644
--- a/src/pkg/regexp/regexp.go
+++ b/src/pkg/regexp/regexp.go
@@ -16,14 +16,50 @@
 //		'$'
 //		'.'
 //		character
-//		'[' [ '^' ] character-ranges ']'
+//		'[' [ '^' ] { character-range } ']'
 //		'(' regexp ')'
+//	character-range:
+//		character [ '-' character ]
+//
+// All characters are UTF-8-encoded code points.  Backslashes escape special
+// characters, including inside character classes.  The standard Go character
+// escapes are also recognized: \a \b \f \n \r \t \v.
+//
+// There are 16 methods of Regexp that match a regular expression and identify
+// the matched text.  Their names are matched by this regular expression:
+//
+//	Find(All)?(String)?(Submatch)?(Index)?
+//
+// If 'All' is present, the routine matches successive non-overlapping
+// matches of the entire expression.  Empty matches abutting a preceding
+// match are ignored.  The return value is a slice containing the successive
+// return values of the corresponding non-'All' routine.  These routines take
+// an extra integer argument, n; if n >= 0, the function returns at most n
+// matches/submatches.
+//
+// If 'String' is present, the argument is a string; otherwise it is a slice
+// of bytes; return values are adjusted as appropriate.
+//
+// If 'Submatch' is present, the return value is a slice identifying the
+// successive submatches of the expression.  Submatches are matches of
+// parenthesized subexpressions within the regular expression, numbered from
+// left to right in order of opening parenthesis.  Submatch 0 is the match of
+// the entire expression, submatch 1 the match of the first parenthesized
+// subexpression, and so on.
+//
+// If 'Index' is present, matches and submatches are identified by byte index
+// pairs within the input string: result[2*n:2*n+1] identifies the indexes of
+// the nth submatch.  The pair for n==0 identifies the match of the entire
+// expression.  If 'Index' is not present, the match is identified by the
+// text of the match/submatch.  If an index is negative, it means that
+// subexpression did not match any string in the input.
+//
+// (There are a few other methods that do not match this pattern.)
 //
 package regexp
 
 import (
 	"bytes"
-	"container/vector"
 	"io"
 	"os"
 	"strings"
@@ -53,121 +89,90 @@ var (
 	ErrBadBackslash        = Error("illegal backslash escape")
 )
 
-// An instruction executed by the NFA
-type instr interface {
-	kind() int   // the type of this instruction: _CHAR, _ANY, etc.
-	next() instr // the instruction to execute after this one
-	setNext(i instr)
-	index() int
-	setIndex(i int)
-	print()
-}
+const (
+	iStart     = iota // beginning of program
+	iEnd              // end of program: success
+	iBOT              // '^' beginning of text
+	iEOT              // '$' end of text
+	iChar             // 'a' regular character
+	iCharClass        // [a-z] character class
+	iAny              // '.' any character including newline
+	iNotNL            // [^\n] special case: any character but newline
+	iBra              // '(' parenthesized expression: 2*braNum for left, 2*braNum+1 for right
+	iAlt              // '|' alternation
+	iNop              // do nothing; makes it easy to link without patching
+)
 
-// Fields and methods common to all instructions
-type common struct {
-	_next  instr
-	_index int
+// An instruction executed by the NFA
+type instr struct {
+	kind  int    // the type of this instruction: iChar, iAny, etc.
+	index int    // used only in debugging; could be eliminated
+	next  *instr // the instruction to execute after this one
+	// Special fields valid only for some items.
+	char   int        // iChar
+	braNum int        // iBra, iEbra
+	cclass *charClass // iCharClass
+	left   *instr     // iAlt, other branch
+}
+
+func (i *instr) print() {
+	switch i.kind {
+	case iStart:
+		print("start")
+	case iEnd:
+		print("end")
+	case iBOT:
+		print("bot")
+	case iEOT:
+		print("eot")
+	case iChar:
+		print("char ", string(i.char))
+	case iCharClass:
+		i.cclass.print()
+	case iAny:
+		print("any")
+	case iNotNL:
+		print("notnl")
+	case iBra:
+		if i.braNum&1 == 0 {
+			print("bra", i.braNum/2)
+		} else {
+			print("ebra", i.braNum/2)
+		}
+	case iAlt:
+		print("alt(", i.left.index, ")")
+	case iNop:
+		print("nop")
+	}
 }
 
-func (c *common) next() instr     { return c._next }
-func (c *common) setNext(i instr) { c._next = i }
-func (c *common) index() int      { return c._index }
-func (c *common) setIndex(i int)  { c._index = i }
-
 // Regexp is the representation of a compiled regular expression.
 // The public interface is entirely through methods.
 type Regexp struct {
 	expr        string // the original expression
 	prefix      string // initial plain text string
 	prefixBytes []byte // initial plain text bytes
-	inst        *vector.Vector
-	start       instr // first instruction of machine
-	prefixStart instr // where to start if there is a prefix
-	nbra        int   // number of brackets in expression, for subexpressions
-}
-
-const (
-	_START     = iota // beginning of program
-	_END              // end of program: success
-	_BOT              // '^' beginning of text
-	_EOT              // '$' end of text
-	_CHAR             // 'a' regular character
-	_CHARCLASS        // [a-z] character class
-	_ANY              // '.' any character including newline
-	_NOTNL            // [^\n] special case: any character but newline
-	_BRA              // '(' parenthesized expression
-	_EBRA             // ')'; end of '(' parenthesized expression
-	_ALT              // '|' alternation
-	_NOP              // do nothing; makes it easy to link without patching
-)
-
-// --- START start of program
-type _Start struct {
-	common
+	inst        []*instr
+	start       *instr // first instruction of machine
+	prefixStart *instr // where to start if there is a prefix
+	nbra        int    // number of brackets in expression, for subexpressions
 }
 
-func (start *_Start) kind() int { return _START }
-func (start *_Start) print()    { print("start") }
-
-// --- END end of program
-type _End struct {
-	common
-}
-
-func (end *_End) kind() int { return _END }
-func (end *_End) print()    { print("end") }
-
-// --- BOT beginning of text
-type _Bot struct {
-	common
-}
-
-func (bot *_Bot) kind() int { return _BOT }
-func (bot *_Bot) print()    { print("bot") }
-
-// --- EOT end of text
-type _Eot struct {
-	common
-}
-
-func (eot *_Eot) kind() int { return _EOT }
-func (eot *_Eot) print()    { print("eot") }
-
-// --- CHAR a regular character
-type _Char struct {
-	common
-	char int
-}
-
-func (char *_Char) kind() int { return _CHAR }
-func (char *_Char) print()    { print("char ", string(char.char)) }
-
-func newChar(char int) *_Char {
-	c := new(_Char)
-	c.char = char
-	return c
-}
-
-// --- CHARCLASS [a-z]
-
-type _CharClass struct {
-	common
+type charClass struct {
 	negate bool // is character class negated? ([^a-z])
-	// vector of int, stored pairwise: [a-z] is (a,z); x is (x,x):
-	ranges     *vector.IntVector
+	// slice of int, stored pairwise: [a-z] is (a,z); x is (x,x):
+	ranges     []int
 	cmin, cmax int
 }
 
-func (cclass *_CharClass) kind() int { return _CHARCLASS }
-
-func (cclass *_CharClass) print() {
+func (cclass *charClass) print() {
 	print("charclass")
 	if cclass.negate {
 		print(" (negated)")
 	}
-	for i := 0; i < cclass.ranges.Len(); i += 2 {
-		l := cclass.ranges.At(i)
-		r := cclass.ranges.At(i + 1)
+	for i := 0; i < len(cclass.ranges); i += 2 {
+		l := cclass.ranges[i]
+		r := cclass.ranges[i+1]
 		if l == r {
 			print(" [", string(l), "]")
 		} else {
@@ -176,10 +181,9 @@ func (cclass *_CharClass) print() {
 	}
 }
 
-func (cclass *_CharClass) addRange(a, b int) {
+func (cclass *charClass) addRange(a, b int) {
 	// range is a through b inclusive
-	cclass.ranges.Push(a)
-	cclass.ranges.Push(b)
+	cclass.ranges = append(cclass.ranges, a, b)
 	if a < cclass.cmin {
 		cclass.cmin = a
 	}
@@ -188,11 +192,11 @@ func (cclass *_CharClass) addRange(a, b int) {
 	}
 }
 
-func (cclass *_CharClass) matches(c int) bool {
+func (cclass *charClass) matches(c int) bool {
 	if c < cclass.cmin || c > cclass.cmax {
 		return cclass.negate
 	}
-	ranges := []int(*cclass.ranges)
+	ranges := cclass.ranges
 	for i := 0; i < len(ranges); i = i + 2 {
 		if ranges[i] <= c && c <= ranges[i+1] {
 			return !cclass.negate
@@ -201,68 +205,18 @@ func (cclass *_CharClass) matches(c int) bool {
 	return cclass.negate
 }
 
-func newCharClass() *_CharClass {
-	c := new(_CharClass)
-	c.ranges = new(vector.IntVector)
-	c.cmin = 0x10FFFF + 1 // MaxRune + 1
-	c.cmax = -1
-	return c
-}
-
-// --- ANY any character
-type _Any struct {
-	common
-}
-
-func (any *_Any) kind() int { return _ANY }
-func (any *_Any) print()    { print("any") }
-
-// --- NOTNL any character but newline
-type _NotNl struct {
-	common
-}
-
-func (notnl *_NotNl) kind() int { return _NOTNL }
-func (notnl *_NotNl) print()    { print("notnl") }
-
-// --- BRA parenthesized expression
-type _Bra struct {
-	common
-	n int // subexpression number
-}
-
-func (bra *_Bra) kind() int { return _BRA }
-func (bra *_Bra) print()    { print("bra", bra.n) }
-
-// --- EBRA end of parenthesized expression
-type _Ebra struct {
-	common
-	n int // subexpression number
-}
-
-func (ebra *_Ebra) kind() int { return _EBRA }
-func (ebra *_Ebra) print()    { print("ebra ", ebra.n) }
-
-// --- ALT alternation
-type _Alt struct {
-	common
-	left instr // other branch
-}
-
-func (alt *_Alt) kind() int { return _ALT }
-func (alt *_Alt) print()    { print("alt(", alt.left.index(), ")") }
-
-// --- NOP no operation
-type _Nop struct {
-	common
+func newCharClass() *instr {
+	i := &instr{kind: iCharClass}
+	i.cclass = new(charClass)
+	i.cclass.ranges = make([]int, 0, 4)
+	i.cclass.cmin = 0x10FFFF + 1 // MaxRune + 1
+	i.cclass.cmax = -1
+	return i
 }
 
-func (nop *_Nop) kind() int { return _NOP }
-func (nop *_Nop) print()    { print("nop") }
-
-func (re *Regexp) add(i instr) instr {
-	i.setIndex(re.inst.Len())
-	re.inst.Push(i)
+func (re *Regexp) add(i *instr) *instr {
+	i.index = len(re.inst)
+	re.inst = append(re.inst, i)
 	return i
 }
 
@@ -317,8 +271,21 @@ func ispunct(c int) bool {
 	return false
 }
 
-func (p *parser) charClass() instr {
-	cc := newCharClass()
+var escapes = []byte("abfnrtv")
+var escaped = []byte("\a\b\f\n\r\t\v")
+
+func escape(c int) int {
+	for i, b := range escapes {
+		if int(b) == c {
+			return i
+		}
+	}
+	return -1
+}
+
+func (p *parser) charClass() *instr {
+	i := newCharClass()
+	cc := i.cclass
 	if p.c() == '^' {
 		cc.negate = true
 		p.nextc()
@@ -331,20 +298,20 @@ func (p *parser) charClass() instr {
 				p.error(ErrBadRange)
 			}
 			// Is it [^\n]?
-			if cc.negate && cc.ranges.Len() == 2 &&
-				cc.ranges.At(0) == '\n' && cc.ranges.At(1) == '\n' {
-				nl := new(_NotNl)
+			if cc.negate && len(cc.ranges) == 2 &&
+				cc.ranges[0] == '\n' && cc.ranges[1] == '\n' {
+				nl := &instr{kind: iNotNL}
 				p.re.add(nl)
 				return nl
 			}
 			// Special common case: "[a]" -> "a"
-			if !cc.negate && cc.ranges.Len() == 2 && cc.ranges.At(0) == cc.ranges.At(1) {
-				c := newChar(cc.ranges.At(0))
+			if !cc.negate && len(cc.ranges) == 2 && cc.ranges[0] == cc.ranges[1] {
+				c := &instr{kind: iChar, char: cc.ranges[0]}
 				p.re.add(c)
 				return c
 			}
-			p.re.add(cc)
-			return cc
+			p.re.add(i)
+			return i
 		case '-': // do this before backslash processing
 			p.error(ErrBadRange)
 		case '\\':
@@ -352,10 +319,10 @@ func (p *parser) charClass() instr {
 			switch {
 			case c == endOfFile:
 				p.error(ErrExtraneousBackslash)
-			case c == 'n':
-				c = '\n'
 			case ispunct(c):
 				// c is as delivered
+			case escape(c) >= 0:
+				c = int(escaped[escape(c)])
 			default:
 				p.error(ErrBadBackslash)
 			}
@@ -381,7 +348,7 @@ func (p *parser) charClass() instr {
 	return nil
 }
 
-func (p *parser) term() (start, end instr) {
+func (p *parser) term() (start, end *instr) {
 	switch c := p.c(); c {
 	case '|', endOfFile:
 		return nil, nil
@@ -396,15 +363,15 @@ func (p *parser) term() (start, end instr) {
 		p.error(ErrUnmatchedRbkt)
 	case '^':
 		p.nextc()
-		start = p.re.add(new(_Bot))
+		start = p.re.add(&instr{kind: iBOT})
 		return start, start
 	case '$':
 		p.nextc()
-		start = p.re.add(new(_Eot))
+		start = p.re.add(&instr{kind: iEOT})
 		return start, start
 	case '.':
 		p.nextc()
-		start = p.re.add(new(_Any))
+		start = p.re.add(&instr{kind: iAny})
 		return start, start
 	case '[':
 		p.nextc()
@@ -425,12 +392,10 @@ func (p *parser) term() (start, end instr) {
 		}
 		p.nlpar--
 		p.nextc()
-		bra := new(_Bra)
+		bra := &instr{kind: iBra, braNum: 2 * nbra}
 		p.re.add(bra)
-		ebra := new(_Ebra)
+		ebra := &instr{kind: iBra, braNum: 2*nbra + 1}
 		p.re.add(ebra)
-		bra.n = nbra
-		ebra.n = nbra
 		if start == nil {
 			if end == nil {
 				p.error(ErrInternal)
@@ -438,33 +403,33 @@ func (p *parser) term() (start, end instr) {
 			}
 			start = ebra
 		} else {
-			end.setNext(ebra)
+			end.next = ebra
 		}
-		bra.setNext(start)
+		bra.next = start
 		return bra, ebra
 	case '\\':
 		c = p.nextc()
 		switch {
 		case c == endOfFile:
 			p.error(ErrExtraneousBackslash)
-		case c == 'n':
-			c = '\n'
 		case ispunct(c):
 			// c is as delivered
+		case escape(c) >= 0:
+			c = int(escaped[escape(c)])
 		default:
 			p.error(ErrBadBackslash)
 		}
 		fallthrough
 	default:
 		p.nextc()
-		start = newChar(c)
+		start = &instr{kind: iChar, char: c}
 		p.re.add(start)
 		return start, start
 	}
 	panic("unreachable")
 }
 
-func (p *parser) closure() (start, end instr) {
+func (p *parser) closure() (start, end *instr) {
 	start, end = p.term()
 	if start == nil {
 		return
@@ -472,28 +437,28 @@ func (p *parser) closure() (start, end instr) {
 	switch p.c() {
 	case '*':
 		// (start,end)*:
-		alt := new(_Alt)
+		alt := &instr{kind: iAlt}
 		p.re.add(alt)
-		end.setNext(alt) // after end, do alt
+		end.next = alt   // after end, do alt
 		alt.left = start // alternate brach: return to start
 		start = alt      // alt becomes new (start, end)
 		end = alt
 	case '+':
 		// (start,end)+:
-		alt := new(_Alt)
+		alt := &instr{kind: iAlt}
 		p.re.add(alt)
-		end.setNext(alt) // after end, do alt
+		end.next = alt   // after end, do alt
 		alt.left = start // alternate brach: return to start
 		end = alt        // start is unchanged; end is alt
 	case '?':
 		// (start,end)?:
-		alt := new(_Alt)
+		alt := &instr{kind: iAlt}
 		p.re.add(alt)
-		nop := new(_Nop)
+		nop := &instr{kind: iNop}
 		p.re.add(nop)
 		alt.left = start // alternate branch is start
-		alt.setNext(nop) // follow on to nop
-		end.setNext(nop) // after end, go to nop
+		alt.next = nop   // follow on to nop
+		end.next = nop   // after end, go to nop
 		start = alt      // start is now alt
 		end = nop        // end is nop pointed to by both branches
 	default:
@@ -506,27 +471,27 @@ func (p *parser) closure() (start, end instr) {
 	return
 }
 
-func (p *parser) concatenation() (start, end instr) {
+func (p *parser) concatenation() (start, end *instr) {
 	for {
 		nstart, nend := p.closure()
 		switch {
 		case nstart == nil: // end of this concatenation
 			if start == nil { // this is the empty string
-				nop := p.re.add(new(_Nop))
+				nop := p.re.add(&instr{kind: iNop})
 				return nop, nop
 			}
 			return
 		case start == nil: // this is first element of concatenation
 			start, end = nstart, nend
 		default:
-			end.setNext(nstart)
+			end.next = nstart
 			end = nend
 		}
 	}
 	panic("unreachable")
 }
 
-func (p *parser) regexp() (start, end instr) {
+func (p *parser) regexp() (start, end *instr) {
 	start, end = p.concatenation()
 	for {
 		switch p.c() {
@@ -535,49 +500,46 @@ func (p *parser) regexp() (start, end instr) {
 		case '|':
 			p.nextc()
 			nstart, nend := p.concatenation()
-			alt := new(_Alt)
+			alt := &instr{kind: iAlt}
 			p.re.add(alt)
 			alt.left = start
-			alt.setNext(nstart)
-			nop := new(_Nop)
+			alt.next = nstart
+			nop := &instr{kind: iNop}
 			p.re.add(nop)
-			end.setNext(nop)
-			nend.setNext(nop)
+			end.next = nop
+			nend.next = nop
 			start, end = alt, nop
 		}
 	}
 	panic("unreachable")
 }
 
-func unNop(i instr) instr {
-	for i.kind() == _NOP {
-		i = i.next()
+func unNop(i *instr) *instr {
+	for i.kind == iNop {
+		i = i.next
 	}
 	return i
 }
 
 func (re *Regexp) eliminateNops() {
-	for i := 0; i < re.inst.Len(); i++ {
-		inst := re.inst.At(i).(instr)
-		if inst.kind() == _END {
+	for _, inst := range re.inst {
+		if inst.kind == iEnd {
 			continue
 		}
-		inst.setNext(unNop(inst.next()))
-		if inst.kind() == _ALT {
-			alt := inst.(*_Alt)
-			alt.left = unNop(alt.left)
+		inst.next = unNop(inst.next)
+		if inst.kind == iAlt {
+			inst.left = unNop(inst.left)
 		}
 	}
 }
 
 func (re *Regexp) dump() {
 	print("prefix <", re.prefix, ">\n")
-	for i := 0; i < re.inst.Len(); i++ {
-		inst := re.inst.At(i).(instr)
-		print(inst.index(), ": ")
+	for _, inst := range re.inst {
+		print(inst.index, ": ")
 		inst.print()
-		if inst.kind() != _END {
-			print(" -> ", inst.next().index())
+		if inst.kind != iEnd {
+			print(" -> ", inst.next.index)
 		}
 		print("\n")
 	}
@@ -585,12 +547,12 @@ func (re *Regexp) dump() {
 
 func (re *Regexp) doParse() {
 	p := newParser(re)
-	start := new(_Start)
+	start := &instr{kind: iStart}
 	re.add(start)
 	s, e := p.regexp()
-	start.setNext(s)
+	start.next = s
 	re.start = start
-	e.setNext(re.add(new(_End)))
+	e.next = re.add(&instr{kind: iEnd})
 
 	if debug {
 		re.dump()
@@ -609,36 +571,44 @@ func (re *Regexp) doParse() {
 	}
 }
 
-// Extract regular text from the beginning of the pattern.
+// Extract regular text from the beginning of the pattern,
+// possibly after a leading iBOT.
 // That text can be used by doExecute to speed up matching.
 func (re *Regexp) setPrefix() {
 	var b []byte
 	var utf = make([]byte, utf8.UTFMax)
-	// First instruction is start; skip that.
-	i := re.inst.At(0).(instr).next().index()
+	var inst *instr
+	// First instruction is start; skip that.  Also skip any initial iBOT.
+	inst = re.inst[0].next
+	for inst.kind == iBOT {
+		inst = inst.next
+	}
 Loop:
-	for i < re.inst.Len() {
-		inst := re.inst.At(i).(instr)
+	for ; inst.kind != iEnd; inst = inst.next {
 		// stop if this is not a char
-		if inst.kind() != _CHAR {
+		if inst.kind != iChar {
 			break
 		}
 		// stop if this char can be followed by a match for an empty string,
 		// which includes closures, ^, and $.
-		switch re.inst.At(inst.next().index()).(instr).kind() {
-		case _BOT, _EOT, _ALT:
+		switch inst.next.kind {
+		case iBOT, iEOT, iAlt:
 			break Loop
 		}
-		n := utf8.EncodeRune(inst.(*_Char).char, utf)
-		b = bytes.Add(b, utf[0:n])
-		i = inst.next().index()
+		n := utf8.EncodeRune(utf, inst.char)
+		b = append(b, utf[0:n]...)
 	}
 	// point prefixStart instruction to first non-CHAR after prefix
-	re.prefixStart = re.inst.At(i).(instr)
+	re.prefixStart = inst
 	re.prefixBytes = b
 	re.prefix = string(b)
 }
 
+// String returns the source text used to compile the regular expression.
+func (re *Regexp) String() string {
+	return re.expr
+}
+
 // Compile parses a regular expression and returns, if successful, a Regexp
 // object that can be used to match against text.
 func Compile(str string) (regexp *Regexp, error os.Error) {
@@ -651,7 +621,7 @@ func Compile(str string) (regexp *Regexp, error os.Error) {
 		}
 	}()
 	regexp.expr = str
-	regexp.inst = new(vector.Vector)
+	regexp.inst = make([]*instr, 0, 10)
 	regexp.doParse()
 	return
 }
@@ -727,60 +697,45 @@ func (a *matchArena) noMatch() *matchVec {
 }
 
 type state struct {
-	inst     instr // next instruction to execute
-	prefixed bool  // this match began with a fixed prefix
+	inst     *instr // next instruction to execute
+	prefixed bool   // this match began with a fixed prefix
 	match    *matchVec
 }
 
 // Append new state to to-do list.  Leftmost-longest wins so avoid
 // adding a state that's already active.  The matchVec will be inc-ref'ed
 // if it is assigned to a state.
-func (a *matchArena) addState(s []state, inst instr, prefixed bool, match *matchVec, pos, end int) []state {
-	switch inst.kind() {
-	case _BOT:
+func (a *matchArena) addState(s []state, inst *instr, prefixed bool, match *matchVec, pos, end int) []state {
+	switch inst.kind {
+	case iBOT:
 		if pos == 0 {
-			s = a.addState(s, inst.next(), prefixed, match, pos, end)
+			s = a.addState(s, inst.next, prefixed, match, pos, end)
 		}
 		return s
-	case _EOT:
+	case iEOT:
 		if pos == end {
-			s = a.addState(s, inst.next(), prefixed, match, pos, end)
+			s = a.addState(s, inst.next, prefixed, match, pos, end)
 		}
 		return s
-	case _BRA:
-		n := inst.(*_Bra).n
-		match.m[2*n] = pos
-		s = a.addState(s, inst.next(), prefixed, match, pos, end)
-		return s
-	case _EBRA:
-		n := inst.(*_Ebra).n
-		match.m[2*n+1] = pos
-		s = a.addState(s, inst.next(), prefixed, match, pos, end)
+	case iBra:
+		match.m[inst.braNum] = pos
+		s = a.addState(s, inst.next, prefixed, match, pos, end)
 		return s
 	}
-	index := inst.index()
 	l := len(s)
 	// States are inserted in order so it's sufficient to see if we have the same
 	// instruction; no need to see if existing match is earlier (it is).
 	for i := 0; i < l; i++ {
-		if s[i].inst.index() == index {
+		if s[i].inst == inst {
 			return s
 		}
 	}
-	if l == cap(s) {
-		s1 := make([]state, 2*l)[0:l]
-		copy(s1, s)
-		s = s1
-	}
-	s = s[0 : l+1]
-	s[l].inst = inst
-	s[l].prefixed = prefixed
-	s[l].match = match
+	s = append(s, state{inst, prefixed, match})
 	match.ref++
-	if inst.kind() == _ALT {
-		s = a.addState(s, inst.(*_Alt).left, prefixed, a.copy(match), pos, end)
+	if inst.kind == iAlt {
+		s = a.addState(s, inst.left, prefixed, a.copy(match), pos, end)
 		// give other branch a copy of this match vector
-		s = a.addState(s, inst.next(), prefixed, a.copy(match), pos, end)
+		s = a.addState(s, inst.next, prefixed, a.copy(match), pos, end)
 	}
 	return s
 }
@@ -789,8 +744,8 @@ func (a *matchArena) addState(s []state, inst instr, prefixed bool, match *match
 // If bytes == nil, scan str.
 func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
 	var s [2][]state
-	s[0] = make([]state, 10)[0:0]
-	s[1] = make([]state, 10)[0:0]
+	s[0] = make([]state, 0, 10)
+	s[1] = make([]state, 0, 10)
 	in, out := 0, 1
 	var final state
 	found := false
@@ -798,34 +753,46 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
 	if bytestr != nil {
 		end = len(bytestr)
 	}
+	anchored := re.inst[0].next.kind == iBOT
+	if anchored && pos > 0 {
+		return nil
+	}
 	// fast check for initial plain substring
-	prefixed := false // has this iteration begun by skipping a prefix?
 	if re.prefix != "" {
-		var advance int
-		if bytestr == nil {
-			advance = strings.Index(str[pos:], re.prefix)
+		advance := 0
+		if anchored {
+			if bytestr == nil {
+				if !strings.HasPrefix(str, re.prefix) {
+					return nil
+				}
+			} else {
+				if !bytes.HasPrefix(bytestr, re.prefixBytes) {
+					return nil
+				}
+			}
 		} else {
-			advance = bytes.Index(bytestr[pos:], re.prefixBytes)
+			if bytestr == nil {
+				advance = strings.Index(str[pos:], re.prefix)
+			} else {
+				advance = bytes.Index(bytestr[pos:], re.prefixBytes)
+			}
 		}
 		if advance == -1 {
-			return []int{}
+			return nil
 		}
-		pos += advance + len(re.prefix)
-		prefixed = true
+		pos += advance
 	}
 	arena := &matchArena{nil, 2 * (re.nbra + 1)}
-	for pos <= end {
-		if !found {
+	for startPos := pos; pos <= end; {
+		if !found && (pos == startPos || !anchored) {
 			// prime the pump if we haven't seen a match yet
 			match := arena.noMatch()
 			match.m[0] = pos
-			if prefixed {
-				s[out] = arena.addState(s[out], re.prefixStart, true, match, pos, end)
-				prefixed = false // next iteration should start at beginning of machine.
-			} else {
-				s[out] = arena.addState(s[out], re.start.next(), false, match, pos, end)
-			}
+			s[out] = arena.addState(s[out], re.start.next, false, match, pos, end)
 			arena.free(match) // if addState saved it, ref was incremented
+		} else if len(s[out]) == 0 {
+			// machine has completed
+			break
 		}
 		in, out = out, in // old out state is new in state
 		// clear out old state
@@ -834,10 +801,6 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
 			arena.free(state.match)
 		}
 		s[out] = old[0:0] // truncate state vector
-		if found && len(s[in]) == 0 {
-			// machine has completed
-			break
-		}
 		charwidth := 1
 		c := endOfFile
 		if pos < end {
@@ -849,29 +812,28 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
 		}
 		pos += charwidth
 		for _, st := range s[in] {
-			switch st.inst.kind() {
-			case _BOT:
-			case _EOT:
-			case _CHAR:
-				if c == st.inst.(*_Char).char {
-					s[out] = arena.addState(s[out], st.inst.next(), st.prefixed, st.match, pos, end)
+			switch st.inst.kind {
+			case iBOT:
+			case iEOT:
+			case iChar:
+				if c == st.inst.char {
+					s[out] = arena.addState(s[out], st.inst.next, st.prefixed, st.match, pos, end)
 				}
-			case _CHARCLASS:
-				if st.inst.(*_CharClass).matches(c) {
-					s[out] = arena.addState(s[out], st.inst.next(), st.prefixed, st.match, pos, end)
+			case iCharClass:
+				if st.inst.cclass.matches(c) {
+					s[out] = arena.addState(s[out], st.inst.next, st.prefixed, st.match, pos, end)
 				}
-			case _ANY:
+			case iAny:
 				if c != endOfFile {
-					s[out] = arena.addState(s[out], st.inst.next(), st.prefixed, st.match, pos, end)
+					s[out] = arena.addState(s[out], st.inst.next, st.prefixed, st.match, pos, end)
 				}
-			case _NOTNL:
+			case iNotNL:
 				if c != endOfFile && c != '\n' {
-					s[out] = arena.addState(s[out], st.inst.next(), st.prefixed, st.match, pos, end)
+					s[out] = arena.addState(s[out], st.inst.next, st.prefixed, st.match, pos, end)
 				}
-			case _BRA:
-			case _EBRA:
-			case _ALT:
-			case _END:
+			case iBra:
+			case iAlt:
+			case iEnd:
 				// choose leftmost longest
 				if !found || // first
 					st.match.m[0] < final.match.m[0] || // leftmost
@@ -900,77 +862,33 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
 	return final.match.m
 }
 
-
-// ExecuteString matches the Regexp against the string s.
-// The return value is an array of integers, in pairs, identifying the positions of
-// substrings matched by the expression.
-//    s[a[0]:a[1]] is the substring matched by the entire expression.
-//    s[a[2*i]:a[2*i+1]] for i > 0 is the substring matched by the ith parenthesized subexpression.
-// A negative value means the subexpression did not match any element of the string.
-// An empty array means "no match".
-func (re *Regexp) ExecuteString(s string) (a []int) {
-	return re.doExecute(s, nil, 0)
+// LiteralPrefix returns a literal string that must begin any match
+// of the regular expression re.  It returns the boolean true if the
+// literal string comprises the entire regular expression.
+func (re *Regexp) LiteralPrefix() (prefix string, complete bool) {
+	c := make([]int, len(re.inst)-2) // minus start and end.
+	// First instruction is start; skip that.
+	i := 0
+	for inst := re.inst[0].next; inst.kind != iEnd; inst = inst.next {
+		// stop if this is not a char
+		if inst.kind != iChar {
+			return string(c[:i]), false
+		}
+		c[i] = inst.char
+		i++
+	}
+	return string(c[:i]), true
 }
 
-
-// Execute matches the Regexp against the byte slice b.
-// The return value is an array of integers, in pairs, identifying the positions of
-// subslices matched by the expression.
-//    b[a[0]:a[1]] is the subslice matched by the entire expression.
-//    b[a[2*i]:a[2*i+1]] for i > 0 is the subslice matched by the ith parenthesized subexpression.
-// A negative value means the subexpression did not match any element of the slice.
-// An empty array means "no match".
-func (re *Regexp) Execute(b []byte) (a []int) { return re.doExecute("", b, 0) }
-
-
 // MatchString returns whether the Regexp matches the string s.
 // The return value is a boolean: true for match, false for no match.
 func (re *Regexp) MatchString(s string) bool { return len(re.doExecute(s, nil, 0)) > 0 }
 
-
 // Match returns whether the Regexp matches the byte slice b.
 // The return value is a boolean: true for match, false for no match.
 func (re *Regexp) Match(b []byte) bool { return len(re.doExecute("", b, 0)) > 0 }
 
 
-// MatchStrings matches the Regexp against the string s.
-// The return value is an array of strings matched by the expression.
-//    a[0] is the substring matched by the entire expression.
-//    a[i] for i > 0 is the substring matched by the ith parenthesized subexpression.
-// An empty array means ``no match''.
-func (re *Regexp) MatchStrings(s string) (a []string) {
-	r := re.doExecute(s, nil, 0)
-	if r == nil {
-		return nil
-	}
-	a = make([]string, len(r)/2)
-	for i := 0; i < len(r); i += 2 {
-		if r[i] != -1 { // -1 means no match for this subexpression
-			a[i/2] = s[r[i]:r[i+1]]
-		}
-	}
-	return
-}
-
-// MatchSlices matches the Regexp against the byte slice b.
-// The return value is an array of subslices matched by the expression.
-//    a[0] is the subslice matched by the entire expression.
-//    a[i] for i > 0 is the subslice matched by the ith parenthesized subexpression.
-// An empty array means ``no match''.
-func (re *Regexp) MatchSlices(b []byte) (a [][]byte) {
-	r := re.doExecute("", b, 0)
-	if r == nil {
-		return nil
-	}
-	a = make([][]byte, len(r)/2)
-	for i := 0; i < len(r); i += 2 {
-		if r[i] != -1 { // -1 means no match for this subexpression
-			a[i/2] = b[r[i]:r[i+1]]
-		}
-	}
-	return
-}
-
 // MatchString checks whether a textual regular expression
 // matches a string.  More complicated queries need
 // to use Compile and the full Regexp interface.
@@ -1117,7 +1035,7 @@ func QuoteMeta(s string) string {
 }
 
 // Find matches in slice b if b is non-nil, otherwise find matches in string s.
-func (re *Regexp) allMatches(s string, b []byte, n int, deliver func(int, int)) {
+func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
 	var end int
 	if b == nil {
 		end = len(s)
@@ -1156,78 +1074,270 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func(int, int))
 		prevMatchEnd = matches[1]
 
 		if accept {
-			deliver(matches[0], matches[1])
+			deliver(matches)
 			i++
 		}
 	}
 }
 
-// AllMatches slices the byte slice b into substrings that are successive
-// matches of the Regexp within b. If n > 0, the function returns at most n
-// matches. Text that does not match the expression will be skipped. Empty
-// matches abutting a preceding match are ignored. The function returns a slice
-// containing the matching substrings.
-func (re *Regexp) AllMatches(b []byte, n int) [][]byte {
-	if n <= 0 {
+// Find returns a slice holding the text of the leftmost match in b of the regular expression.
+// A return value of nil indicates no match.
+func (re *Regexp) Find(b []byte) []byte {
+	a := re.doExecute("", b, 0)
+	if a == nil {
+		return nil
+	}
+	return b[a[0]:a[1]]
+}
+
+// FindIndex returns a two-element slice of integers defining the location of
+// the leftmost match in b of the regular expression.  The match itself is at
+// b[loc[0]:loc[1]].
+// A return value of nil indicates no match.
+func (re *Regexp) FindIndex(b []byte) (loc []int) {
+	a := re.doExecute("", b, 0)
+	if a == nil {
+		return nil
+	}
+	return a[0:2]
+}
+
+// FindString returns a string holding the text of the leftmost match in s of the regular
+// expression.  If there is no match, the return value is an empty string,
+// but it will also be empty if the regular expression successfully matches
+// an empty string.  Use FindStringIndex or FindStringSubmatch if it is
+// necessary to distinguish these cases.
+func (re *Regexp) FindString(s string) string {
+	a := re.doExecute(s, nil, 0)
+	if a == nil {
+		return ""
+	}
+	return s[a[0]:a[1]]
+}
+
+// FindStringIndex returns a two-element slice of integers defining the
+// location of the leftmost match in s of the regular expression.  The match
+// itself is at s[loc[0]:loc[1]].
+// A return value of nil indicates no match.
+func (re *Regexp) FindStringIndex(s string) []int {
+	a := re.doExecute(s, nil, 0)
+	if a == nil {
+		return nil
+	}
+	return a[0:2]
+}
+
+// FindSubmatch returns a slice of slices holding the text of the leftmost
+// match of the regular expression in b and the matches, if any, of its
+// subexpressions, as defined by the 'Submatch' descriptions in the package
+// comment.
+// A return value of nil indicates no match.
+func (re *Regexp) FindSubmatch(b []byte) [][]byte {
+	a := re.doExecute("", b, 0)
+	if a == nil {
+		return nil
+	}
+	ret := make([][]byte, len(a)/2)
+	for i := range ret {
+		if a[2*i] >= 0 {
+			ret[i] = b[a[2*i]:a[2*i+1]]
+		}
+	}
+	return ret
+}
+
+// FindSubmatchIndex returns a slice holding the index pairs identifying the
+// leftmost match of the regular expression in b and the matches, if any, of
+// its subexpressions, as defined by the 'Submatch' and 'Index' descriptions
+// in the package comment.
+// A return value of nil indicates no match.
+func (re *Regexp) FindSubmatchIndex(b []byte) []int {
+	return re.doExecute("", b, 0)
+}
+
+// FindStringSubmatch returns a slice of strings holding the text of the
+// leftmost match of the regular expression in s and the matches, if any, of
+// its subexpressions, as defined by the 'Submatch' description in the
+// package comment.
+// A return value of nil indicates no match.
+func (re *Regexp) FindStringSubmatch(s string) []string {
+	a := re.doExecute(s, nil, 0)
+	if a == nil {
+		return nil
+	}
+	ret := make([]string, len(a)/2)
+	for i := range ret {
+		if a[2*i] >= 0 {
+			ret[i] = s[a[2*i]:a[2*i+1]]
+		}
+	}
+	return ret
+}
+
+// FindStringSubmatchIndex returns a slice holding the index pairs
+// identifying the leftmost match of the regular expression in s and the
+// matches, if any, of its subexpressions, as defined by the 'Submatch' and
+// 'Index' descriptions in the package comment.
+// A return value of nil indicates no match.
+func (re *Regexp) FindStringSubmatchIndex(s string) []int {
+	return re.doExecute(s, nil, 0)
+}
+
+const startSize = 10 // The size at which to start a slice in the 'All' routines.
+
+// FindAll is the 'All' version of Find; it returns a slice of all successive
+// matches of the expression, as defined by the 'All' description in the
+// package comment.
+// A return value of nil indicates no match.
+func (re *Regexp) FindAll(b []byte, n int) [][]byte {
+	if n < 0 {
 		n = len(b) + 1
 	}
-	result := make([][]byte, n)
-	i := 0
-	re.allMatches("", b, n, func(start, end int) {
-		result[i] = b[start:end]
-		i++
+	result := make([][]byte, 0, startSize)
+	re.allMatches("", b, n, func(match []int) {
+		result = append(result, b[match[0]:match[1]])
 	})
-	return result[0:i]
+	if len(result) == 0 {
+		return nil
+	}
+	return result
+}
+
+// FindAllIndex is the 'All' version of FindIndex; it returns a slice of all
+// successive matches of the expression, as defined by the 'All' description
+// in the package comment.
+// A return value of nil indicates no match.
+func (re *Regexp) FindAllIndex(b []byte, n int) [][]int {
+	if n < 0 {
+		n = len(b) + 1
+	}
+	result := make([][]int, 0, startSize)
+	re.allMatches("", b, n, func(match []int) {
+		result = append(result, match[0:2])
+	})
+	if len(result) == 0 {
+		return nil
+	}
+	return result
 }
 
-// AllMatchesString slices the string s into substrings that are successive
-// matches of the Regexp within s. If n > 0, the function returns at most n
-// matches. Text that does not match the expression will be skipped. Empty
-// matches abutting a preceding match are ignored. The function returns a slice
-// containing the matching substrings.
-func (re *Regexp) AllMatchesString(s string, n int) []string {
-	if n <= 0 {
+// FindAllString is the 'All' version of FindString; it returns a slice of all
+// successive matches of the expression, as defined by the 'All' description
+// in the package comment.
+// A return value of nil indicates no match.
+func (re *Regexp) FindAllString(s string, n int) []string {
+	if n < 0 {
 		n = len(s) + 1
 	}
-	result := make([]string, n)
-	i := 0
-	re.allMatches(s, nil, n, func(start, end int) {
-		result[i] = s[start:end]
-		i++
+	result := make([]string, 0, startSize)
+	re.allMatches(s, nil, n, func(match []int) {
+		result = append(result, s[match[0]:match[1]])
 	})
-	return result[0:i]
+	if len(result) == 0 {
+		return nil
+	}
+	return result
 }
 
-// AllMatchesIter slices the byte slice b into substrings that are successive
-// matches of the Regexp within b. If n > 0, the function returns at most n
-// matches. Text that does not match the expression will be skipped. Empty
-// matches abutting a preceding match are ignored. The function returns a
-// channel that iterates over the matching substrings.
-func (re *Regexp) AllMatchesIter(b []byte, n int) <-chan []byte {
-	if n <= 0 {
+// FindAllStringIndex is the 'All' version of FindStringIndex; it returns a
+// slice of all successive matches of the expression, as defined by the 'All'
+// description in the package comment.
+// A return value of nil indicates no match.
+func (re *Regexp) FindAllStringIndex(s string, n int) [][]int {
+	if n < 0 {
+		n = len(s) + 1
+	}
+	result := make([][]int, 0, startSize)
+	re.allMatches(s, nil, n, func(match []int) {
+		result = append(result, match[0:2])
+	})
+	if len(result) == 0 {
+		return nil
+	}
+	return result
+}
+
+// FindAllSubmatch is the 'All' version of FindSubmatch; it returns a slice
+// of all successive matches of the expression, as defined by the 'All'
+// description in the package comment.
+// A return value of nil indicates no match.
+func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte {
+	if n < 0 {
 		n = len(b) + 1
 	}
-	c := make(chan []byte, 10)
-	go func() {
-		re.allMatches("", b, n, func(start, end int) { c <- b[start:end] })
-		close(c)
-	}()
-	return c
+	result := make([][][]byte, 0, startSize)
+	re.allMatches("", b, n, func(match []int) {
+		slice := make([][]byte, len(match)/2)
+		for j := range slice {
+			if match[2*j] >= 0 {
+				slice[j] = b[match[2*j]:match[2*j+1]]
+			}
+		}
+		result = append(result, slice)
+	})
+	if len(result) == 0 {
+		return nil
+	}
+	return result
+}
+
+// FindAllSubmatchIndex is the 'All' version of FindSubmatchIndex; it returns
+// a slice of all successive matches of the expression, as defined by the
+// 'All' description in the package comment.
+// A return value of nil indicates no match.
+func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int {
+	if n < 0 {
+		n = len(b) + 1
+	}
+	result := make([][]int, 0, startSize)
+	re.allMatches("", b, n, func(match []int) {
+		result = append(result, match)
+	})
+	if len(result) == 0 {
+		return nil
+	}
+	return result
 }
 
-// AllMatchesStringIter slices the string s into substrings that are successive
-// matches of the Regexp within s. If n > 0, the function returns at most n
-// matches. Text that does not match the expression will be skipped. Empty
-// matches abutting a preceding match are ignored. The function returns a
-// channel that iterates over the matching substrings.
-func (re *Regexp) AllMatchesStringIter(s string, n int) <-chan string {
-	if n <= 0 {
+// FindAllStringSubmatch is the 'All' version of FindStringSubmatch; it
+// returns a slice of all successive matches of the expression, as defined by
+// the 'All' description in the package comment.
+// A return value of nil indicates no match.
+func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string {
+	if n < 0 {
 		n = len(s) + 1
 	}
-	c := make(chan string, 10)
-	go func() {
-		re.allMatches(s, nil, n, func(start, end int) { c <- s[start:end] })
-		close(c)
-	}()
-	return c
+	result := make([][]string, 0, startSize)
+	re.allMatches(s, nil, n, func(match []int) {
+		slice := make([]string, len(match)/2)
+		for j := range slice {
+			if match[2*j] >= 0 {
+				slice[j] = s[match[2*j]:match[2*j+1]]
+			}
+		}
+		result = append(result, slice)
+	})
+	if len(result) == 0 {
+		return nil
+	}
+	return result
+}
+
+// FindAllStringSubmatchIndex is the 'All' version of
+// FindStringSubmatchIndex; it returns a slice of all successive matches of
+// the expression, as defined by the 'All' description in the package
+// comment.
+// A return value of nil indicates no match.
+func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int {
+	if n < 0 {
+		n = len(s) + 1
+	}
+	result := make([][]int, 0, startSize)
+	re.allMatches(s, nil, n, func(match []int) {
+		result = append(result, match)
+	})
+	if len(result) == 0 {
+		return nil
+	}
+	return result
 }
author	Ondřej Surý <ondrej@sury.org>	2011-01-17 12:40:45 +0100
committer	Ondřej Surý <ondrej@sury.org>	2011-01-17 12:40:45 +0100
commit	3e45412327a2654a77944249962b3652e6142299 (patch)
tree	bc3bf69452afa055423cbe0c5cfa8ca357df6ccf /src/pkg/regexp
parent	c533680039762cacbc37db8dc7eed074c3e497be (diff)
download	golang-3e45412327a2654a77944249962b3652e6142299.tar.gz