diff options
author | Michael Stapelberg <stapelberg@debian.org> | 2014-06-19 09:22:53 +0200 |
---|---|---|
committer | Michael Stapelberg <stapelberg@debian.org> | 2014-06-19 09:22:53 +0200 |
commit | 8a39ee361feb9bf46d728ff1ba4f07ca1d9610b1 (patch) | |
tree | 4449f2036cccf162e8417cc5841a35815b3e7ac5 /src/pkg/regexp/syntax | |
parent | c8bf49ef8a92e2337b69c14b9b88396efe498600 (diff) | |
download | golang-upstream/1.3.tar.gz |
Imported Upstream version 1.3upstream/1.3
Diffstat (limited to 'src/pkg/regexp/syntax')
-rw-r--r-- | src/pkg/regexp/syntax/doc.go | 4 | ||||
-rwxr-xr-x | src/pkg/regexp/syntax/make_perl_groups.pl | 4 | ||||
-rw-r--r-- | src/pkg/regexp/syntax/parse.go | 3 | ||||
-rw-r--r-- | src/pkg/regexp/syntax/parse_test.go | 4 | ||||
-rw-r--r-- | src/pkg/regexp/syntax/perl_groups.go | 4 | ||||
-rw-r--r-- | src/pkg/regexp/syntax/prog.go | 54 | ||||
-rw-r--r-- | src/pkg/regexp/syntax/prog_test.go | 4 |
7 files changed, 59 insertions, 18 deletions
diff --git a/src/pkg/regexp/syntax/doc.go b/src/pkg/regexp/syntax/doc.go index e52632ef7..8e72c90d3 100644 --- a/src/pkg/regexp/syntax/doc.go +++ b/src/pkg/regexp/syntax/doc.go @@ -46,6 +46,10 @@ Repetitions: x{n,}? n or more x, prefer fewer x{n}? exactly n x +Implementation restriction: The counting forms x{n} etc. (but not the other +forms x* etc.) have an upper limit of n=1000. Negative or higher explicit +counts yield the parse error ErrInvalidRepeatSize. + Grouping: (re) numbered capturing group (submatch) (?P<name>re) named & numbered capturing group (submatch) diff --git a/src/pkg/regexp/syntax/make_perl_groups.pl b/src/pkg/regexp/syntax/make_perl_groups.pl index d024f5090..90040fcb4 100755 --- a/src/pkg/regexp/syntax/make_perl_groups.pl +++ b/src/pkg/regexp/syntax/make_perl_groups.pl @@ -92,6 +92,10 @@ sub PrintClasses($@) { } print <<EOF; +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + // GENERATED BY make_perl_groups.pl; DO NOT EDIT. // make_perl_groups.pl >perl_groups.go diff --git a/src/pkg/regexp/syntax/parse.go b/src/pkg/regexp/syntax/parse.go index 42d0bf4a1..cb25dca39 100644 --- a/src/pkg/regexp/syntax/parse.go +++ b/src/pkg/regexp/syntax/parse.go @@ -668,7 +668,6 @@ func Parse(s string, flags Flags) (*Regexp, error) { c rune op Op lastRepeat string - min, max int ) p.flags = flags p.wholeRegexp = s @@ -740,7 +739,7 @@ func Parse(s string, flags Flags) (*Regexp, error) { op = OpQuest } after := t[1:] - if after, err = p.repeat(op, min, max, before, after, lastRepeat); err != nil { + if after, err = p.repeat(op, 0, 0, before, after, lastRepeat); err != nil { return nil, err } repeat = before diff --git a/src/pkg/regexp/syntax/parse_test.go b/src/pkg/regexp/syntax/parse_test.go index 269d6c3b8..f3089294c 100644 --- a/src/pkg/regexp/syntax/parse_test.go +++ b/src/pkg/regexp/syntax/parse_test.go @@ -100,12 +100,12 @@ var parseTests = []parseTest{ {`\P{Braille}`, `cc{0x0-0x27ff 0x2900-0x10ffff}`}, {`\p{^Braille}`, `cc{0x0-0x27ff 0x2900-0x10ffff}`}, {`\P{^Braille}`, `cc{0x2800-0x28ff}`}, - {`\pZ`, `cc{0x20 0xa0 0x1680 0x180e 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}`}, + {`\pZ`, `cc{0x20 0xa0 0x1680 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}`}, {`[\p{Braille}]`, `cc{0x2800-0x28ff}`}, {`[\P{Braille}]`, `cc{0x0-0x27ff 0x2900-0x10ffff}`}, {`[\p{^Braille}]`, `cc{0x0-0x27ff 0x2900-0x10ffff}`}, {`[\P{^Braille}]`, `cc{0x2800-0x28ff}`}, - {`[\pZ]`, `cc{0x20 0xa0 0x1680 0x180e 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}`}, + {`[\pZ]`, `cc{0x20 0xa0 0x1680 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}`}, {`\p{Lu}`, mkCharClass(unicode.IsUpper)}, {`[\p{Lu}]`, mkCharClass(unicode.IsUpper)}, {`(?i)[\p{Lu}]`, mkCharClass(isUpperFold)}, diff --git a/src/pkg/regexp/syntax/perl_groups.go b/src/pkg/regexp/syntax/perl_groups.go index 1a11ca62f..effe4e686 100644 --- a/src/pkg/regexp/syntax/perl_groups.go +++ b/src/pkg/regexp/syntax/perl_groups.go @@ -1,3 +1,7 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + // GENERATED BY make_perl_groups.pl; DO NOT EDIT. // make_perl_groups.pl >perl_groups.go diff --git a/src/pkg/regexp/syntax/prog.go b/src/pkg/regexp/syntax/prog.go index a482a82f2..29bd282d0 100644 --- a/src/pkg/regexp/syntax/prog.go +++ b/src/pkg/regexp/syntax/prog.go @@ -37,6 +37,27 @@ const ( InstRuneAnyNotNL ) +var instOpNames = []string{ + "InstAlt", + "InstAltMatch", + "InstCapture", + "InstEmptyWidth", + "InstMatch", + "InstFail", + "InstNop", + "InstRune", + "InstRune1", + "InstRuneAny", + "InstRuneAnyNotNL", +} + +func (i InstOp) String() string { + if uint(i) >= uint(len(instOpNames)) { + return "" + } + return instOpNames[i] +} + // An EmptyOp specifies a kind or mixture of zero-width assertions. type EmptyOp uint8 @@ -103,13 +124,13 @@ func (p *Prog) String() string { // skipNop follows any no-op or capturing instructions // and returns the resulting pc. -func (p *Prog) skipNop(pc uint32) *Inst { +func (p *Prog) skipNop(pc uint32) (*Inst, uint32) { i := &p.Inst[pc] for i.Op == InstNop || i.Op == InstCapture { pc = i.Out i = &p.Inst[pc] } - return i + return i, pc } // op returns i.Op but merges all the Rune special cases into InstRune @@ -126,7 +147,7 @@ func (i *Inst) op() InstOp { // regexp must start with. Complete is true if the prefix // is the entire match. func (p *Prog) Prefix() (prefix string, complete bool) { - i := p.skipNop(uint32(p.Start)) + i, _ := p.skipNop(uint32(p.Start)) // Avoid allocation of buffer if prefix is empty. if i.op() != InstRune || len(i.Rune) != 1 { @@ -137,7 +158,7 @@ func (p *Prog) Prefix() (prefix string, complete bool) { var buf bytes.Buffer for i.op() == InstRune && len(i.Rune) == 1 && Flags(i.Arg)&FoldCase == 0 { buf.WriteRune(i.Rune[0]) - i = p.skipNop(i.Out) + i, _ = p.skipNop(i.Out) } return buf.String(), i.Op == InstMatch } @@ -166,35 +187,46 @@ Loop: return flag } +const noMatch = -1 + // MatchRune returns true if the instruction matches (and consumes) r. // It should only be called when i.Op == InstRune. func (i *Inst) MatchRune(r rune) bool { + return i.MatchRunePos(r) != noMatch +} + +// MatchRunePos checks whether the instruction matches (and consumes) r. +// If so, MatchRunePos returns the index of the matching rune pair +// (or, when len(i.Rune) == 1, rune singleton). +// If not, MatchRunePos returns -1. +// MatchRunePos should only be called when i.Op == InstRune. +func (i *Inst) MatchRunePos(r rune) int { rune := i.Rune // Special case: single-rune slice is from literal string, not char class. if len(rune) == 1 { r0 := rune[0] if r == r0 { - return true + return 0 } if Flags(i.Arg)&FoldCase != 0 { for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) { if r == r1 { - return true + return 0 } } } - return false + return noMatch } // Peek at the first few pairs. // Should handle ASCII well. for j := 0; j < len(rune) && j <= 8; j += 2 { if r < rune[j] { - return false + return noMatch } if r <= rune[j+1] { - return true + return j / 2 } } @@ -205,14 +237,14 @@ func (i *Inst) MatchRune(r rune) bool { m := lo + (hi-lo)/2 if c := rune[2*m]; c <= r { if r <= rune[2*m+1] { - return true + return m } lo = m + 1 } else { hi = m } } - return false + return noMatch } // As per re2's Prog::IsWordChar. Determines whether rune is an ASCII word char. diff --git a/src/pkg/regexp/syntax/prog_test.go b/src/pkg/regexp/syntax/prog_test.go index cd71abc2a..50bfa3d4b 100644 --- a/src/pkg/regexp/syntax/prog_test.go +++ b/src/pkg/regexp/syntax/prog_test.go @@ -4,9 +4,7 @@ package syntax -import ( - "testing" -) +import "testing" var compileTests = []struct { Regexp string |