summaryrefslogtreecommitdiff
path: root/src/pkg/regexp/syntax
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/regexp/syntax')
-rw-r--r--src/pkg/regexp/syntax/doc.go4
-rwxr-xr-xsrc/pkg/regexp/syntax/make_perl_groups.pl4
-rw-r--r--src/pkg/regexp/syntax/parse.go3
-rw-r--r--src/pkg/regexp/syntax/parse_test.go4
-rw-r--r--src/pkg/regexp/syntax/perl_groups.go4
-rw-r--r--src/pkg/regexp/syntax/prog.go54
-rw-r--r--src/pkg/regexp/syntax/prog_test.go4
7 files changed, 59 insertions, 18 deletions
diff --git a/src/pkg/regexp/syntax/doc.go b/src/pkg/regexp/syntax/doc.go
index e52632ef7..8e72c90d3 100644
--- a/src/pkg/regexp/syntax/doc.go
+++ b/src/pkg/regexp/syntax/doc.go
@@ -46,6 +46,10 @@ Repetitions:
x{n,}? n or more x, prefer fewer
x{n}? exactly n x
+Implementation restriction: The counting forms x{n} etc. (but not the other
+forms x* etc.) have an upper limit of n=1000. Negative or higher explicit
+counts yield the parse error ErrInvalidRepeatSize.
+
Grouping:
(re) numbered capturing group (submatch)
(?P<name>re) named & numbered capturing group (submatch)
diff --git a/src/pkg/regexp/syntax/make_perl_groups.pl b/src/pkg/regexp/syntax/make_perl_groups.pl
index d024f5090..90040fcb4 100755
--- a/src/pkg/regexp/syntax/make_perl_groups.pl
+++ b/src/pkg/regexp/syntax/make_perl_groups.pl
@@ -92,6 +92,10 @@ sub PrintClasses($@) {
}
print <<EOF;
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
// GENERATED BY make_perl_groups.pl; DO NOT EDIT.
// make_perl_groups.pl >perl_groups.go
diff --git a/src/pkg/regexp/syntax/parse.go b/src/pkg/regexp/syntax/parse.go
index 42d0bf4a1..cb25dca39 100644
--- a/src/pkg/regexp/syntax/parse.go
+++ b/src/pkg/regexp/syntax/parse.go
@@ -668,7 +668,6 @@ func Parse(s string, flags Flags) (*Regexp, error) {
c rune
op Op
lastRepeat string
- min, max int
)
p.flags = flags
p.wholeRegexp = s
@@ -740,7 +739,7 @@ func Parse(s string, flags Flags) (*Regexp, error) {
op = OpQuest
}
after := t[1:]
- if after, err = p.repeat(op, min, max, before, after, lastRepeat); err != nil {
+ if after, err = p.repeat(op, 0, 0, before, after, lastRepeat); err != nil {
return nil, err
}
repeat = before
diff --git a/src/pkg/regexp/syntax/parse_test.go b/src/pkg/regexp/syntax/parse_test.go
index 269d6c3b8..f3089294c 100644
--- a/src/pkg/regexp/syntax/parse_test.go
+++ b/src/pkg/regexp/syntax/parse_test.go
@@ -100,12 +100,12 @@ var parseTests = []parseTest{
{`\P{Braille}`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
{`\p{^Braille}`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
{`\P{^Braille}`, `cc{0x2800-0x28ff}`},
- {`\pZ`, `cc{0x20 0xa0 0x1680 0x180e 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}`},
+ {`\pZ`, `cc{0x20 0xa0 0x1680 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}`},
{`[\p{Braille}]`, `cc{0x2800-0x28ff}`},
{`[\P{Braille}]`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
{`[\p{^Braille}]`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
{`[\P{^Braille}]`, `cc{0x2800-0x28ff}`},
- {`[\pZ]`, `cc{0x20 0xa0 0x1680 0x180e 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}`},
+ {`[\pZ]`, `cc{0x20 0xa0 0x1680 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}`},
{`\p{Lu}`, mkCharClass(unicode.IsUpper)},
{`[\p{Lu}]`, mkCharClass(unicode.IsUpper)},
{`(?i)[\p{Lu}]`, mkCharClass(isUpperFold)},
diff --git a/src/pkg/regexp/syntax/perl_groups.go b/src/pkg/regexp/syntax/perl_groups.go
index 1a11ca62f..effe4e686 100644
--- a/src/pkg/regexp/syntax/perl_groups.go
+++ b/src/pkg/regexp/syntax/perl_groups.go
@@ -1,3 +1,7 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
// GENERATED BY make_perl_groups.pl; DO NOT EDIT.
// make_perl_groups.pl >perl_groups.go
diff --git a/src/pkg/regexp/syntax/prog.go b/src/pkg/regexp/syntax/prog.go
index a482a82f2..29bd282d0 100644
--- a/src/pkg/regexp/syntax/prog.go
+++ b/src/pkg/regexp/syntax/prog.go
@@ -37,6 +37,27 @@ const (
InstRuneAnyNotNL
)
+var instOpNames = []string{
+ "InstAlt",
+ "InstAltMatch",
+ "InstCapture",
+ "InstEmptyWidth",
+ "InstMatch",
+ "InstFail",
+ "InstNop",
+ "InstRune",
+ "InstRune1",
+ "InstRuneAny",
+ "InstRuneAnyNotNL",
+}
+
+func (i InstOp) String() string {
+ if uint(i) >= uint(len(instOpNames)) {
+ return ""
+ }
+ return instOpNames[i]
+}
+
// An EmptyOp specifies a kind or mixture of zero-width assertions.
type EmptyOp uint8
@@ -103,13 +124,13 @@ func (p *Prog) String() string {
// skipNop follows any no-op or capturing instructions
// and returns the resulting pc.
-func (p *Prog) skipNop(pc uint32) *Inst {
+func (p *Prog) skipNop(pc uint32) (*Inst, uint32) {
i := &p.Inst[pc]
for i.Op == InstNop || i.Op == InstCapture {
pc = i.Out
i = &p.Inst[pc]
}
- return i
+ return i, pc
}
// op returns i.Op but merges all the Rune special cases into InstRune
@@ -126,7 +147,7 @@ func (i *Inst) op() InstOp {
// regexp must start with. Complete is true if the prefix
// is the entire match.
func (p *Prog) Prefix() (prefix string, complete bool) {
- i := p.skipNop(uint32(p.Start))
+ i, _ := p.skipNop(uint32(p.Start))
// Avoid allocation of buffer if prefix is empty.
if i.op() != InstRune || len(i.Rune) != 1 {
@@ -137,7 +158,7 @@ func (p *Prog) Prefix() (prefix string, complete bool) {
var buf bytes.Buffer
for i.op() == InstRune && len(i.Rune) == 1 && Flags(i.Arg)&FoldCase == 0 {
buf.WriteRune(i.Rune[0])
- i = p.skipNop(i.Out)
+ i, _ = p.skipNop(i.Out)
}
return buf.String(), i.Op == InstMatch
}
@@ -166,35 +187,46 @@ Loop:
return flag
}
+const noMatch = -1
+
// MatchRune returns true if the instruction matches (and consumes) r.
// It should only be called when i.Op == InstRune.
func (i *Inst) MatchRune(r rune) bool {
+ return i.MatchRunePos(r) != noMatch
+}
+
+// MatchRunePos checks whether the instruction matches (and consumes) r.
+// If so, MatchRunePos returns the index of the matching rune pair
+// (or, when len(i.Rune) == 1, rune singleton).
+// If not, MatchRunePos returns -1.
+// MatchRunePos should only be called when i.Op == InstRune.
+func (i *Inst) MatchRunePos(r rune) int {
rune := i.Rune
// Special case: single-rune slice is from literal string, not char class.
if len(rune) == 1 {
r0 := rune[0]
if r == r0 {
- return true
+ return 0
}
if Flags(i.Arg)&FoldCase != 0 {
for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
if r == r1 {
- return true
+ return 0
}
}
}
- return false
+ return noMatch
}
// Peek at the first few pairs.
// Should handle ASCII well.
for j := 0; j < len(rune) && j <= 8; j += 2 {
if r < rune[j] {
- return false
+ return noMatch
}
if r <= rune[j+1] {
- return true
+ return j / 2
}
}
@@ -205,14 +237,14 @@ func (i *Inst) MatchRune(r rune) bool {
m := lo + (hi-lo)/2
if c := rune[2*m]; c <= r {
if r <= rune[2*m+1] {
- return true
+ return m
}
lo = m + 1
} else {
hi = m
}
}
- return false
+ return noMatch
}
// As per re2's Prog::IsWordChar. Determines whether rune is an ASCII word char.
diff --git a/src/pkg/regexp/syntax/prog_test.go b/src/pkg/regexp/syntax/prog_test.go
index cd71abc2a..50bfa3d4b 100644
--- a/src/pkg/regexp/syntax/prog_test.go
+++ b/src/pkg/regexp/syntax/prog_test.go
@@ -4,9 +4,7 @@
package syntax
-import (
- "testing"
-)
+import "testing"
var compileTests = []struct {
Regexp string