summaryrefslogtreecommitdiff
path: root/src/pkg/testing/regexp.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/testing/regexp.go')
-rw-r--r--src/pkg/testing/regexp.go288
1 files changed, 171 insertions, 117 deletions
diff --git a/src/pkg/testing/regexp.go b/src/pkg/testing/regexp.go
index e5b5eac4f..7e6539e9e 100644
--- a/src/pkg/testing/regexp.go
+++ b/src/pkg/testing/regexp.go
@@ -29,28 +29,28 @@ import (
"utf8";
)
-var debug = false;
+var debug = false
// Error codes returned by failures to parse an expression.
var (
- ErrInternal = "internal error";
- ErrUnmatchedLpar = "unmatched ''";
- ErrUnmatchedRpar = "unmatched ''";
- ErrUnmatchedLbkt = "unmatched '['";
- ErrUnmatchedRbkt = "unmatched ']'";
- ErrBadRange = "bad range in character class";
- ErrExtraneousBackslash = "extraneous backslash";
- ErrBadClosure = "repeated closure **, ++, etc.";
- ErrBareClosure = "closure applies to nothing";
- ErrBadBackslash = "illegal backslash escape";
+ ErrInternal = "internal error";
+ ErrUnmatchedLpar = "unmatched ''";
+ ErrUnmatchedRpar = "unmatched ''";
+ ErrUnmatchedLbkt = "unmatched '['";
+ ErrUnmatchedRbkt = "unmatched ']'";
+ ErrBadRange = "bad range in character class";
+ ErrExtraneousBackslash = "extraneous backslash";
+ ErrBadClosure = "repeated closure **, ++, etc.";
+ ErrBareClosure = "closure applies to nothing";
+ ErrBadBackslash = "illegal backslash escape";
)
// An instruction executed by the NFA
type instr interface {
- kind() int; // the type of this instruction: _CHAR, _ANY, etc.
- next() instr; // the instruction to execute after this one
+ kind() int; // the type of this instruction: _CHAR, _ANY, etc.
+ next() instr; // the instruction to execute after this one
setNext(i instr);
- index() int;
+ index() int;
setIndex(i int);
print();
}
@@ -61,69 +61,93 @@ type common struct {
_index int;
}
-func (c *common) next() instr { return c._next }
-func (c *common) setNext(i instr) { c._next = i }
-func (c *common) index() int { return c._index }
-func (c *common) setIndex(i int) { c._index = i }
+func (c *common) next() instr {
+ return c._next;
+}
+func (c *common) setNext(i instr) {
+ c._next = i;
+}
+func (c *common) index() int {
+ return c._index;
+}
+func (c *common) setIndex(i int) {
+ c._index = i;
+}
// The representation of a compiled regular expression.
// The public interface is entirely through methods.
type Regexp struct {
- expr string; // the original expression
+ expr string; // the original expression
ch chan<- *Regexp; // reply channel when we're done
- error string; // compile- or run-time error; nil if OK
+ error string; // compile- or run-time error; nil if OK
inst []instr;
start instr;
nbra int; // number of brackets in expression, for subexpressions
}
const (
- _START // beginning of program
- = iota;
+ _START = // beginning of program
+ iota;
_END; // end of program: success
_BOT; // '^' beginning of text
_EOT; // '$' end of text
- _CHAR; // 'a' regular character
+ _CHAR; // 'a' regular character
_CHARCLASS; // [a-z] character class
_ANY; // '.' any character including newline
_NOTNL; // [^\n] special case: any character but newline
_BRA; // '(' parenthesized expression
- _EBRA; // ')'; end of '(' parenthesized expression
+ _EBRA; // ')'; end of '(' parenthesized expression
_ALT; // '|' alternation
_NOP; // do nothing; makes it easy to link without patching
)
// --- START start of program
type _Start struct {
- common
+ common;
}
-func (start *_Start) kind() int { return _START }
-func (start *_Start) print() { print("start") }
+func (start *_Start) kind() int {
+ return _START;
+}
+func (start *_Start) print() {
+ print("start");
+}
// --- END end of program
type _End struct {
- common
+ common;
}
-func (end *_End) kind() int { return _END }
-func (end *_End) print() { print("end") }
+func (end *_End) kind() int {
+ return _END;
+}
+func (end *_End) print() {
+ print("end");
+}
// --- BOT beginning of text
type _Bot struct {
- common
+ common;
}
-func (bot *_Bot) kind() int { return _BOT }
-func (bot *_Bot) print() { print("bot") }
+func (bot *_Bot) kind() int {
+ return _BOT;
+}
+func (bot *_Bot) print() {
+ print("bot");
+}
// --- EOT end of text
type _Eot struct {
- common
+ common;
}
-func (eot *_Eot) kind() int { return _EOT }
-func (eot *_Eot) print() { print("eot") }
+func (eot *_Eot) kind() int {
+ return _EOT;
+}
+func (eot *_Eot) print() {
+ print("eot");
+}
// --- CHAR a regular character
type _Char struct {
@@ -131,8 +155,12 @@ type _Char struct {
char int;
}
-func (char *_Char) kind() int { return _CHAR }
-func (char *_Char) print() { print("char ", string(char.char)) }
+func (char *_Char) kind() int {
+ return _CHAR;
+}
+func (char *_Char) print() {
+ print("char ", string(char.char));
+}
func newChar(char int) *_Char {
c := new(_Char);
@@ -150,7 +178,9 @@ type _CharClass struct {
ranges []int;
}
-func (cclass *_CharClass) kind() int { return _CHARCLASS }
+func (cclass *_CharClass) kind() int {
+ return _CHARCLASS;
+}
func (cclass *_CharClass) print() {
print("charclass");
@@ -174,11 +204,11 @@ func (cclass *_CharClass) addRange(a, b int) {
if n >= cap(cclass.ranges) {
nr := make([]int, n, 2*n);
for i, j := range nr {
- nr[i] = j
+ nr[i] = j;
}
cclass.ranges = nr;
}
- cclass.ranges = cclass.ranges[0:n+2];
+ cclass.ranges = cclass.ranges[0 : n+2];
cclass.ranges[n] = a;
n++;
cclass.ranges[n] = b;
@@ -190,10 +220,10 @@ func (cclass *_CharClass) matches(c int) bool {
min := cclass.ranges[i];
max := cclass.ranges[i+1];
if min <= c && c <= max {
- return !cclass.negate
+ return !cclass.negate;
}
}
- return cclass.negate
+ return cclass.negate;
}
func newCharClass() *_CharClass {
@@ -204,19 +234,27 @@ func newCharClass() *_CharClass {
// --- ANY any character
type _Any struct {
- common
+ common;
}
-func (any *_Any) kind() int { return _ANY }
-func (any *_Any) print() { print("any") }
+func (any *_Any) kind() int {
+ return _ANY;
+}
+func (any *_Any) print() {
+ print("any");
+}
// --- NOTNL any character but newline
type _NotNl struct {
- common
+ common;
}
-func (notnl *_NotNl) kind() int { return _NOTNL }
-func (notnl *_NotNl) print() { print("notnl") }
+func (notnl *_NotNl) kind() int {
+ return _NOTNL;
+}
+func (notnl *_NotNl) print() {
+ print("notnl");
+}
// --- BRA parenthesized expression
type _Bra struct {
@@ -224,8 +262,12 @@ type _Bra struct {
n int; // subexpression number
}
-func (bra *_Bra) kind() int { return _BRA }
-func (bra *_Bra) print() { print("bra", bra.n); }
+func (bra *_Bra) kind() int {
+ return _BRA;
+}
+func (bra *_Bra) print() {
+ print("bra", bra.n);
+}
// --- EBRA end of parenthesized expression
type _Ebra struct {
@@ -233,8 +275,12 @@ type _Ebra struct {
n int; // subexpression number
}
-func (ebra *_Ebra) kind() int { return _EBRA }
-func (ebra *_Ebra) print() { print("ebra ", ebra.n); }
+func (ebra *_Ebra) kind() int {
+ return _EBRA;
+}
+func (ebra *_Ebra) print() {
+ print("ebra ", ebra.n);
+}
// --- ALT alternation
type _Alt struct {
@@ -242,16 +288,24 @@ type _Alt struct {
left instr; // other branch
}
-func (alt *_Alt) kind() int { return _ALT }
-func (alt *_Alt) print() { print("alt(", alt.left.index(), ")"); }
+func (alt *_Alt) kind() int {
+ return _ALT;
+}
+func (alt *_Alt) print() {
+ print("alt(", alt.left.index(), ")");
+}
// --- NOP no operation
type _Nop struct {
- common
+ common;
}
-func (nop *_Nop) kind() int { return _NOP }
-func (nop *_Nop) print() { print("nop") }
+func (nop *_Nop) kind() int {
+ return _NOP;
+}
+func (nop *_Nop) print() {
+ print("nop");
+}
// report error and exit compiling/executing goroutine
func (re *Regexp) setError(err string) {
@@ -266,11 +320,11 @@ func (re *Regexp) add(i instr) instr {
if n >= cap(re.inst) {
ni := make([]instr, n, 2*n);
for i, j := range re.inst {
- ni[i] = j
+ ni[i] = j;
}
re.inst = ni;
}
- re.inst = re.inst[0:n+1];
+ re.inst = re.inst[0 : n+1];
re.inst[n] = i;
return i;
}
@@ -290,9 +344,9 @@ func (p *parser) c() int {
func (p *parser) nextc() int {
if p.pos >= len(p.re.expr) {
- p.ch = endOfFile
+ p.ch = endOfFile;
} else {
- c, w := utf8.DecodeRuneInString(p.re.expr[p.pos:len(p.re.expr)]);
+ c, w := utf8.DecodeRuneInString(p.re.expr[p.pos : len(p.re.expr)]);
p.ch = c;
p.pos += w;
}
@@ -312,20 +366,20 @@ func special(c int) bool {
s := `\.+*?()|[]^$`;
for i := 0; i < len(s); i++ {
if c == int(s[i]) {
- return true
+ return true;
}
}
- return false
+ return false;
}
func specialcclass(c int) bool {
s := `\-[]`;
for i := 0; i < len(s); i++ {
if c == int(s[i]) {
- return true
+ return true;
}
}
- return false
+ return false;
}
func (p *parser) charClass() instr {
@@ -360,7 +414,7 @@ func (p *parser) charClass() instr {
case c == 'n':
c = '\n';
case specialcclass(c):
- // c is as delivered
+ // c is as delivered
default:
p.re.setError(ErrBadBackslash);
}
@@ -383,7 +437,7 @@ func (p *parser) charClass() instr {
}
}
}
- return iNULL
+ return iNULL;
}
func (p *parser) term() (start, end instr) {
@@ -438,9 +492,9 @@ func (p *parser) term() (start, end instr) {
ebra.n = nbra;
if start == iNULL {
if end == iNULL {
- p.re.setError(ErrInternal)
+ p.re.setError(ErrInternal);
}
- start = ebra
+ start = ebra;
} else {
end.setNext(ebra);
}
@@ -454,7 +508,7 @@ func (p *parser) term() (start, end instr) {
case c == 'n':
c = '\n';
case special(c):
- // c is as delivered
+ // c is as delivered
default:
p.re.setError(ErrBadBackslash);
}
@@ -463,7 +517,7 @@ func (p *parser) term() (start, end instr) {
p.nextc();
start = newChar(c);
p.re.add(start);
- return start, start
+ return start, start;
}
panic("unreachable");
}
@@ -471,7 +525,7 @@ func (p *parser) term() (start, end instr) {
func (p *parser) closure() (start, end instr) {
start, end = p.term();
if start == iNULL {
- return
+ return;
}
switch p.c() {
case '*':
@@ -480,7 +534,7 @@ func (p *parser) closure() (start, end instr) {
p.re.add(alt);
end.setNext(alt); // after end, do alt
alt.left = start; // alternate brach: return to start
- start = alt; // alt becomes new (start, end)
+ start = alt; // alt becomes new (start, end)
end = alt;
case '+':
// (start,end)+:
@@ -488,7 +542,7 @@ func (p *parser) closure() (start, end instr) {
p.re.add(alt);
end.setNext(alt); // after end, do alt
alt.left = start; // alternate brach: return to start
- end = alt; // start is unchanged; end is alt
+ end = alt; // start is unchanged; end is alt
case '?':
// (start,end)?:
alt := new(_Alt);
@@ -498,16 +552,16 @@ func (p *parser) closure() (start, end instr) {
alt.left = start; // alternate branch is start
alt.setNext(nop); // follow on to nop
end.setNext(nop); // after end, go to nop
- start = alt; // start is now alt
- end = nop; // end is nop pointed to by both branches
+ start = alt; // start is now alt
+ end = nop; // end is nop pointed to by both branches
default:
- return
+ return;
}
switch p.nextc() {
case '*', '+', '?':
p.re.setError(ErrBadClosure);
}
- return
+ return;
}
func (p *parser) concatenation() (start, end instr) {
@@ -556,16 +610,16 @@ func (p *parser) regexp() (start, end instr) {
func unNop(i instr) instr {
for i.kind() == _NOP {
- i = i.next()
+ i = i.next();
}
- return i
+ return i;
}
func (re *Regexp) eliminateNops() {
for i := 0; i < len(re.inst); i++ {
inst := re.inst[i];
if inst.kind() == _END {
- continue
+ continue;
}
inst.setNext(unNop(inst.next()));
if inst.kind() == _ALT {
@@ -581,7 +635,7 @@ func (re *Regexp) dump() {
print(inst.index(), ": ");
inst.print();
if inst.kind() != _END {
- print(" -> ", inst.next().index())
+ print(" -> ", inst.next().index());
}
print("\n");
}
@@ -626,7 +680,7 @@ func CompileRegexp(str string) (regexp *Regexp, error string) {
ch := make(chan *Regexp);
go compiler(str, ch);
re := <-ch;
- return re, re.error
+ return re, re.error;
}
type state struct {
@@ -643,10 +697,10 @@ func addState(s []state, inst instr, match []int) []state {
// TODO: Once the state is a vector and we can do insert, have inputs always
// go in order correctly and this "earlier" test is never necessary,
for i := 0; i < l; i++ {
- if s[i].inst.index() == index && // same instruction
- s[i].match[0] < pos { // earlier match already going; lefmost wins
- return s
- }
+ if s[i].inst.index() == index && // same instruction
+ s[i].match[0] < pos { // earlier match already going; lefmost wins
+ return s;
+ }
}
if l == cap(s) {
s1 := make([]state, 2*l)[0:l];
@@ -655,7 +709,7 @@ func addState(s []state, inst instr, match []int) []state {
}
s = s1;
}
- s = s[0:l+1];
+ s = s[0 : l+1];
s[l].inst = inst;
s[l].match = match;
return s;
@@ -672,16 +726,16 @@ func (re *Regexp) doExecute(str string, bytes []byte, pos int) []int {
found := false;
end := len(str);
if bytes != nil {
- end = len(bytes)
+ end = len(bytes);
}
for pos <= end {
if !found {
// prime the pump if we haven't seen a match yet
- match := make([]int, 2*(re.nbra+1));
+ match := make([]int, 2*(re.nbra + 1));
for i := 0; i < len(match); i++ {
match[i] = -1; // no match seen; catches cases like "a(b)?c" on "ac"
}
- match[0] = pos;
+ match[0] = pos;
s[out] = addState(s[out], re.start.next(), match);
}
in, out = out, in; // old out state is new in state
@@ -704,27 +758,27 @@ func (re *Regexp) doExecute(str string, bytes []byte, pos int) []int {
switch s[in][i].inst.kind() {
case _BOT:
if pos == 0 {
- s[in] = addState(s[in], st.inst.next(), st.match)
+ s[in] = addState(s[in], st.inst.next(), st.match);
}
case _EOT:
if pos == end {
- s[in] = addState(s[in], st.inst.next(), st.match)
+ s[in] = addState(s[in], st.inst.next(), st.match);
}
case _CHAR:
if c == st.inst.(*_Char).char {
- s[out] = addState(s[out], st.inst.next(), st.match)
+ s[out] = addState(s[out], st.inst.next(), st.match);
}
case _CHARCLASS:
if st.inst.(*_CharClass).matches(c) {
- s[out] = addState(s[out], st.inst.next(), st.match)
+ s[out] = addState(s[out], st.inst.next(), st.match);
}
case _ANY:
if c != endOfFile {
- s[out] = addState(s[out], st.inst.next(), st.match)
+ s[out] = addState(s[out], st.inst.next(), st.match);
}
case _NOTNL:
if c != endOfFile && c != '\n' {
- s[out] = addState(s[out], st.inst.next(), st.match)
+ s[out] = addState(s[out], st.inst.next(), st.match);
}
case _BRA:
n := st.inst.(*_Bra).n;
@@ -732,21 +786,21 @@ func (re *Regexp) doExecute(str string, bytes []byte, pos int) []int {
s[in] = addState(s[in], st.inst.next(), st.match);
case _EBRA:
n := st.inst.(*_Ebra).n;
- st.match[2*n+1] = pos;
+ st.match[2*n + 1] = pos;
s[in] = addState(s[in], st.inst.next(), st.match);
case _ALT:
s[in] = addState(s[in], st.inst.(*_Alt).left, st.match);
// give other branch a copy of this match vector
- s1 := make([]int, 2*(re.nbra+1));
+ s1 := make([]int, 2*(re.nbra + 1));
for i := 0; i < len(s1); i++ {
- s1[i] = st.match[i]
+ s1[i] = st.match[i];
}
s[in] = addState(s[in], st.inst.next(), s1);
case _END:
// choose leftmost longest
if !found || // first
- st.match[0] < final.match[0] || // leftmost
- (st.match[0] == final.match[0] && pos > final.match[1]) { // longest
+ st.match[0] < final.match[0] || // leftmost
+ (st.match[0] == final.match[0] && pos > final.match[1]) { // longest
final = st;
final.match[1] = pos;
}
@@ -770,7 +824,7 @@ func (re *Regexp) doExecute(str string, bytes []byte, pos int) []int {
// A negative value means the subexpression did not match any element of the string.
// An empty array means "no match".
func (re *Regexp) ExecuteString(s string) (a []int) {
- return re.doExecute(s, nil, 0)
+ return re.doExecute(s, nil, 0);
}
@@ -782,21 +836,21 @@ func (re *Regexp) ExecuteString(s string) (a []int) {
// A negative value means the subexpression did not match any element of the slice.
// An empty array means "no match".
func (re *Regexp) Execute(b []byte) (a []int) {
- return re.doExecute("", b, 0)
+ return re.doExecute("", b, 0);
}
// MatchString returns whether the Regexp matches the string s.
// The return value is a boolean: true for match, false for no match.
func (re *Regexp) MatchString(s string) bool {
- return len(re.doExecute(s, nil, 0)) > 0
+ return len(re.doExecute(s, nil, 0)) > 0;
}
// Match returns whether the Regexp matches the byte slice b.
// The return value is a boolean: true for match, false for no match.
func (re *Regexp) Match(b []byte) bool {
- return len(re.doExecute("", b, 0)) > 0
+ return len(re.doExecute("", b, 0)) > 0;
}
@@ -808,15 +862,15 @@ func (re *Regexp) Match(b []byte) bool {
func (re *Regexp) MatchStrings(s string) (a []string) {
r := re.doExecute(s, nil, 0);
if r == nil {
- return nil
+ return nil;
}
a = make([]string, len(r)/2);
for i := 0; i < len(r); i += 2 {
if r[i] != -1 { // -1 means no match for this subexpression
- a[i/2] = s[r[i] : r[i+1]]
+ a[i/2] = s[r[i]:r[i+1]];
}
}
- return
+ return;
}
// MatchSlices matches the Regexp against the byte slice b.
@@ -827,15 +881,15 @@ func (re *Regexp) MatchStrings(s string) (a []string) {
func (re *Regexp) MatchSlices(b []byte) (a [][]byte) {
r := re.doExecute("", b, 0);
if r == nil {
- return nil
+ return nil;
}
a = make([][]byte, len(r)/2);
for i := 0; i < len(r); i += 2 {
if r[i] != -1 { // -1 means no match for this subexpression
- a[i/2] = b[r[i] : r[i+1]]
+ a[i/2] = b[r[i]:r[i+1]];
}
}
- return
+ return;
}
// MatchString checks whether a textual regular expression
@@ -844,9 +898,9 @@ func (re *Regexp) MatchSlices(b []byte) (a [][]byte) {
func MatchString(pattern string, s string) (matched bool, error string) {
re, err := CompileRegexp(pattern);
if err != "" {
- return false, err
+ return false, err;
}
- return re.MatchString(s), ""
+ return re.MatchString(s), "";
}
// Match checks whether a textual regular expression
@@ -855,7 +909,7 @@ func MatchString(pattern string, s string) (matched bool, error string) {
func Match(pattern string, b []byte) (matched bool, error string) {
re, err := CompileRegexp(pattern);
if err != "" {
- return false, err
+ return false, err;
}
- return re.Match(b), ""
+ return re.Match(b), "";
}