diff options
author | Rob Pike <r@golang.org> | 2009-01-15 17:22:15 -0800 |
---|---|---|
committer | Rob Pike <r@golang.org> | 2009-01-15 17:22:15 -0800 |
commit | 42ee3a44717f5a30b7f710d68538088a525151ea (patch) | |
tree | f1fdfdc10fd1fb6a2f7555f49f51f8deed1ba1e2 /src/lib/regexp | |
parent | 2b9167a5e4c38f0b5de416bf5154e8e013dbe1b3 (diff) | |
download | golang-42ee3a44717f5a30b7f710d68538088a525151ea.tar.gz |
casify lib/regexp
R=rsc
DELTA=259 (0 added, 0 deleted, 259 changed)
OCL=22897
CL=22900
Diffstat (limited to 'src/lib/regexp')
-rw-r--r-- | src/lib/regexp/all_test.go | 96 | ||||
-rw-r--r-- | src/lib/regexp/regexp.go | 374 |
2 files changed, 235 insertions, 235 deletions
diff --git a/src/lib/regexp/all_test.go b/src/lib/regexp/all_test.go index 143d81380..f8e030d67 100644 --- a/src/lib/regexp/all_test.go +++ b/src/lib/regexp/all_test.go @@ -30,59 +30,59 @@ var good_re = []string{ } // TODO: nice to do this with a map -type StringError struct { +type stringError struct { re string; err *os.Error; } -var bad_re = []StringError{ - StringError{ `*`, regexp.ErrBareClosure }, - StringError{ `(abc`, regexp.ErrUnmatchedLpar }, - StringError{ `abc)`, regexp.ErrUnmatchedRpar }, - StringError{ `x[a-z`, regexp.ErrUnmatchedLbkt }, - StringError{ `abc]`, regexp.ErrUnmatchedRbkt }, - StringError{ `[z-a]`, regexp.ErrBadRange }, - StringError{ `abc\`, regexp.ErrExtraneousBackslash }, - StringError{ `a**`, regexp.ErrBadClosure }, - StringError{ `a*+`, regexp.ErrBadClosure }, - StringError{ `a??`, regexp.ErrBadClosure }, - StringError{ `*`, regexp.ErrBareClosure }, - StringError{ `\x`, regexp.ErrBadBackslash }, +var bad_re = []stringError{ + stringError{ `*`, regexp.ErrBareClosure }, + stringError{ `(abc`, regexp.ErrUnmatchedLpar }, + stringError{ `abc)`, regexp.ErrUnmatchedRpar }, + stringError{ `x[a-z`, regexp.ErrUnmatchedLbkt }, + stringError{ `abc]`, regexp.ErrUnmatchedRbkt }, + stringError{ `[z-a]`, regexp.ErrBadRange }, + stringError{ `abc\`, regexp.ErrExtraneousBackslash }, + stringError{ `a**`, regexp.ErrBadClosure }, + stringError{ `a*+`, regexp.ErrBadClosure }, + stringError{ `a??`, regexp.ErrBadClosure }, + stringError{ `*`, regexp.ErrBareClosure }, + stringError{ `\x`, regexp.ErrBadBackslash }, } -type Vec []int; +type vec []int; -type Tester struct { +type tester struct { re string; text string; - match Vec; -} - -var matches = []Tester { - Tester{ ``, "", Vec{0,0} }, - Tester{ `a`, "a", Vec{0,1} }, - Tester{ `x`, "y", Vec{} }, - Tester{ `b`, "abc", Vec{1,2} }, - Tester{ `.`, "a", Vec{0,1} }, - Tester{ `.*`, "abcdef", Vec{0,6} }, - Tester{ `^abcd$`, "abcd", Vec{0,4} }, - Tester{ `^bcd'`, "abcdef", Vec{} }, - Tester{ `^abcd$`, "abcde", Vec{} }, - Tester{ `a+`, "baaab", Vec{1,4} }, - Tester{ `a*`, "baaab", Vec{0,0} }, - Tester{ `[a-z]+`, "abcd", Vec{0,4} }, - Tester{ `[^a-z]+`, "ab1234cd", Vec{2,6} }, - Tester{ `[a\-\]z]+`, "az]-bcz", Vec{0,4} }, - Tester{ `[日本語]+`, "日本語日本語", Vec{0,18} }, - Tester{ `()`, "", Vec{0,0, 0,0} }, - Tester{ `(a)`, "a", Vec{0,1, 0,1} }, - Tester{ `(.)(.)`, "日a", Vec{0,4, 0,3, 3,4} }, - Tester{ `(.*)`, "", Vec{0,0, 0,0} }, - Tester{ `(.*)`, "abcd", Vec{0,4, 0,4} }, - Tester{ `(..)(..)`, "abcd", Vec{0,4, 0,2, 2,4} }, - Tester{ `(([^xyz]*)(d))`, "abcd", Vec{0,4, 0,4, 0,3, 3,4} }, - Tester{ `((a|b|c)*(d))`, "abcd", Vec{0,4, 0,4, 2,3, 3,4} }, - Tester{ `(((a|b|c)*)(d))`, "abcd", Vec{0,4, 0,4, 0,3, 2,3, 3,4} }, - Tester{ `a*(|(b))c*`, "aacc", Vec{0,4, 2,2, -1,-1} }, + match vec; +} + +var matches = []tester { + tester{ ``, "", vec{0,0} }, + tester{ `a`, "a", vec{0,1} }, + tester{ `x`, "y", vec{} }, + tester{ `b`, "abc", vec{1,2} }, + tester{ `.`, "a", vec{0,1} }, + tester{ `.*`, "abcdef", vec{0,6} }, + tester{ `^abcd$`, "abcd", vec{0,4} }, + tester{ `^bcd'`, "abcdef", vec{} }, + tester{ `^abcd$`, "abcde", vec{} }, + tester{ `a+`, "baaab", vec{1,4} }, + tester{ `a*`, "baaab", vec{0,0} }, + tester{ `[a-z]+`, "abcd", vec{0,4} }, + tester{ `[^a-z]+`, "ab1234cd", vec{2,6} }, + tester{ `[a\-\]z]+`, "az]-bcz", vec{0,4} }, + tester{ `[日本語]+`, "日本語日本語", vec{0,18} }, + tester{ `()`, "", vec{0,0, 0,0} }, + tester{ `(a)`, "a", vec{0,1, 0,1} }, + tester{ `(.)(.)`, "日a", vec{0,4, 0,3, 3,4} }, + tester{ `(.*)`, "", vec{0,0, 0,0} }, + tester{ `(.*)`, "abcd", vec{0,4, 0,4} }, + tester{ `(..)(..)`, "abcd", vec{0,4, 0,2, 2,4} }, + tester{ `(([^xyz]*)(d))`, "abcd", vec{0,4, 0,4, 0,3, 3,4} }, + tester{ `((a|b|c)*(d))`, "abcd", vec{0,4, 0,4, 2,3, 3,4} }, + tester{ `(((a|b|c)*)(d))`, "abcd", vec{0,4, 0,4, 0,3, 2,3, 3,4} }, + tester{ `a*(|(b))c*`, "aacc", vec{0,4, 2,2, -1,-1} }, } func CompileTest(t *testing.T, expr string, error *os.Error) regexp.Regexp { @@ -93,7 +93,7 @@ func CompileTest(t *testing.T, expr string, error *os.Error) regexp.Regexp { return re } -func PrintVec(t *testing.T, m []int) { +func Printvec(t *testing.T, m []int) { l := len(m); if l == 0 { t.Log("\t<no match>"); @@ -149,9 +149,9 @@ func ExecuteTest(t *testing.T, expr string, str string, match []int) { m := re.Execute(str); if !Equal(m, match) { t.Error("Execute failure on `", expr, "` matching `", str, "`:"); - PrintVec(t, m); + Printvec(t, m); t.Log("should be:"); - PrintVec(t, match); + Printvec(t, match); } } diff --git a/src/lib/regexp/regexp.go b/src/lib/regexp/regexp.go index 49f971610..32fa113c7 100644 --- a/src/lib/regexp/regexp.go +++ b/src/lib/regexp/regexp.go @@ -11,7 +11,7 @@ import ( "array"; ) -export var debug = false; +var debug = false; export var ErrInternal = os.NewError("internal error"); @@ -26,110 +26,110 @@ export var ErrBareClosure = os.NewError("closure applies to nothing"); export var ErrBadBackslash = os.NewError("illegal backslash escape"); // An instruction executed by the NFA -type Inst interface { - Type() int; // the type of this instruction: CHAR, ANY, etc. - Next() Inst; // the instruction to execute after this one - SetNext(i Inst); +type instr interface { + Type() int; // the type of this instruction: cCHAR, cANY, etc. + Next() instr; // the instruction to execute after this one + SetNext(i instr); Index() int; SetIndex(i int); Print(); } // Fields and methods common to all instructions -type Common struct { - next Inst; +type iCommon struct { + next instr; index int; } -func (c *Common) Next() Inst { return c.next } -func (c *Common) SetNext(i Inst) { c.next = i } -func (c *Common) Index() int { return c.index } -func (c *Common) SetIndex(i int) { c.index = i } +func (c *iCommon) Next() instr { return c.next } +func (c *iCommon) SetNext(i instr) { c.next = i } +func (c *iCommon) Index() int { return c.index } +func (c *iCommon) SetIndex(i int) { c.index = i } -type RE struct { +type regExp struct { expr string; // the original expression - ch chan<- *RE; // reply channel when we're done + ch chan<- *regExp; // reply channel when we're done error *os.Error; // compile- or run-time error; nil if OK inst *array.Array; - start Inst; + start instr; nbra int; // number of brackets in expression, for subexpressions } const ( - START // beginning of program + cSTART // beginning of program = iota; - END; // end of program: success - BOT; // '^' beginning of text - EOT; // '$' end of text - CHAR; // 'a' regular character - CHARCLASS; // [a-z] character class - ANY; // '.' any character - BRA; // '(' parenthesized expression - EBRA; // ')'; end of '(' parenthesized expression - ALT; // '|' alternation - NOP; // do nothing; makes it easy to link without patching + cEND; // end of program: success + cBOT; // '^' beginning of text + cEOT; // '$' end of text + cCHAR; // 'a' regular character + cCHARCLASS; // [a-z] character class + cANY; // '.' any character + cBRA; // '(' parenthesized expression + cEBRA; // ')'; end of '(' parenthesized expression + cALT; // '|' alternation + cNOP; // do nothing; makes it easy to link without patching ) // --- START start of program -type Start struct { - Common +type iStart struct { + iCommon } -func (start *Start) Type() int { return START } -func (start *Start) Print() { print("start") } +func (start *iStart) Type() int { return cSTART } +func (start *iStart) Print() { print("start") } // --- END end of program -type End struct { - Common +type iEnd struct { + iCommon } -func (end *End) Type() int { return END } -func (end *End) Print() { print("end") } +func (end *iEnd) Type() int { return cEND } +func (end *iEnd) Print() { print("end") } // --- BOT beginning of text -type Bot struct { - Common +type iBot struct { + iCommon } -func (bot *Bot) Type() int { return BOT } -func (bot *Bot) Print() { print("bot") } +func (bot *iBot) Type() int { return cBOT } +func (bot *iBot) Print() { print("bot") } // --- EOT end of text -type Eot struct { - Common +type iEot struct { + iCommon } -func (eot *Eot) Type() int { return EOT } -func (eot *Eot) Print() { print("eot") } +func (eot *iEot) Type() int { return cEOT } +func (eot *iEot) Print() { print("eot") } // --- CHAR a regular character -type Char struct { - Common; +type iChar struct { + iCommon; char int; } -func (char *Char) Type() int { return CHAR } -func (char *Char) Print() { print("char ", string(char.char)) } +func (char *iChar) Type() int { return cCHAR } +func (char *iChar) Print() { print("char ", string(char.char)) } -func NewChar(char int) *Char { - c := new(Char); +func newChar(char int) *iChar { + c := new(iChar); c.char = char; return c; } // --- CHARCLASS [a-z] -type CharClass struct { - Common; +type iCharClass struct { + iCommon; char int; negate bool; // is character class negated? ([^a-z]) // array of int, stored pairwise: [a-z] is (a,z); x is (x,x): ranges *array.IntArray; } -func (cclass *CharClass) Type() int { return CHARCLASS } +func (cclass *iCharClass) Type() int { return cCHARCLASS } -func (cclass *CharClass) Print() { +func (cclass *iCharClass) Print() { print("charclass"); if cclass.negate { print(" (negated)"); @@ -145,13 +145,13 @@ func (cclass *CharClass) Print() { } } -func (cclass *CharClass) AddRange(a, b int) { +func (cclass *iCharClass) AddRange(a, b int) { // range is a through b inclusive cclass.ranges.Push(a); cclass.ranges.Push(b); } -func (cclass *CharClass) Matches(c int) bool { +func (cclass *iCharClass) Matches(c int) bool { for i := 0; i < cclass.ranges.Len(); i = i+2 { min := cclass.ranges.At(i); max := cclass.ranges.At(i+1); @@ -162,84 +162,84 @@ func (cclass *CharClass) Matches(c int) bool { return cclass.negate } -func NewCharClass() *CharClass { - c := new(CharClass); +func newCharClass() *iCharClass { + c := new(iCharClass); c.ranges = array.NewIntArray(0); return c; } // --- ANY any character -type Any struct { - Common +type iAny struct { + iCommon } -func (any *Any) Type() int { return ANY } -func (any *Any) Print() { print("any") } +func (any *iAny) Type() int { return cANY } +func (any *iAny) Print() { print("any") } // --- BRA parenthesized expression -type Bra struct { - Common; +type iBra struct { + iCommon; n int; // subexpression number } -func (bra *Bra) Type() int { return BRA } -func (bra *Bra) Print() { print("bra", bra.n); } +func (bra *iBra) Type() int { return cBRA } +func (bra *iBra) Print() { print("bra", bra.n); } // --- EBRA end of parenthesized expression -type Ebra struct { - Common; +type iEbra struct { + iCommon; n int; // subexpression number } -func (ebra *Ebra) Type() int { return EBRA } -func (ebra *Ebra) Print() { print("ebra ", ebra.n); } +func (ebra *iEbra) Type() int { return cEBRA } +func (ebra *iEbra) Print() { print("ebra ", ebra.n); } // --- ALT alternation -type Alt struct { - Common; - left Inst; // other branch +type iAlt struct { + iCommon; + left instr; // other branch } -func (alt *Alt) Type() int { return ALT } -func (alt *Alt) Print() { print("alt(", alt.left.Index(), ")"); } +func (alt *iAlt) Type() int { return cALT } +func (alt *iAlt) Print() { print("alt(", alt.left.Index(), ")"); } // --- NOP no operation -type Nop struct { - Common +type iNop struct { + iCommon } -func (nop *Nop) Type() int { return NOP } -func (nop *Nop) Print() { print("nop") } +func (nop *iNop) Type() int { return cNOP } +func (nop *iNop) Print() { print("nop") } // report error and exit compiling/executing goroutine -func (re *RE) Error(err *os.Error) { +func (re *regExp) Error(err *os.Error) { re.error = err; re.ch <- re; sys.goexit(); } -func (re *RE) Add(i Inst) Inst { +func (re *regExp) Add(i instr) instr { i.SetIndex(re.inst.Len()); re.inst.Push(i); return i; } -type Parser struct { - re *RE; +type parser struct { + re *regExp; nlpar int; // number of unclosed lpars pos int; ch int; } -const EOF = -1 +const endOfFile = -1 -func (p *Parser) c() int { +func (p *parser) c() int { return p.ch; } -func (p *Parser) nextc() int { +func (p *parser) nextc() int { if p.pos >= len(p.re.expr) { - p.ch = EOF + p.ch = endOfFile } else { c, w := sys.stringtorune(p.re.expr, p.pos); p.ch = c; @@ -248,11 +248,11 @@ func (p *Parser) nextc() int { return p.ch; } -func NewParser(re *RE) *Parser { - parser := new(Parser); - parser.re = re; - parser.nextc(); // load p.ch - return parser; +func newParser(re *regExp) *parser { + p := new(parser); + p.re = re; + p.nextc(); // load p.ch + return p; } /* @@ -274,9 +274,9 @@ Grammar: */ -func (p *Parser) Regexp() (start, end Inst) +func (p *parser) Regexp() (start, end instr) -var NULL Inst +var iNULL instr func special(c int) bool { s := `\.+*?()|[]`; @@ -298,8 +298,8 @@ func specialcclass(c int) bool { return false } -func (p *Parser) CharClass() Inst { - cc := NewCharClass(); +func (p *parser) CharClass() instr { + cc := newCharClass(); p.re.Add(cc); if p.c() == '^' { cc.negate = true; @@ -308,7 +308,7 @@ func (p *Parser) CharClass() Inst { left := -1; for { switch c := p.c(); c { - case ']', EOF: + case ']', endOfFile: if left >= 0 { p.re.Error(ErrBadRange); } @@ -318,7 +318,7 @@ func (p *Parser) CharClass() Inst { case '\\': c = p.nextc(); switch { - case c == EOF: + case c == endOfFile: p.re.Error(ErrExtraneousBackslash); case c == 'n': c = '\n'; @@ -346,33 +346,33 @@ func (p *Parser) CharClass() Inst { } } } - return NULL + return iNULL } -func (p *Parser) Term() (start, end Inst) { +func (p *parser) Term() (start, end instr) { switch c := p.c(); c { - case '|', EOF: - return NULL, NULL; + case '|', endOfFile: + return iNULL, iNULL; case '*', '+': p.re.Error(ErrBareClosure); case ')': if p.nlpar == 0 { p.re.Error(ErrUnmatchedRpar); } - return NULL, NULL; + return iNULL, iNULL; case ']': p.re.Error(ErrUnmatchedRbkt); case '^': p.nextc(); - start = p.re.Add(new(Bot)); + start = p.re.Add(new(iBot)); return start, start; case '$': p.nextc(); - start = p.re.Add(new(Eot)); + start = p.re.Add(new(iEot)); return start, start; case '.': p.nextc(); - start = p.re.Add(new(Any)); + start = p.re.Add(new(iAny)); return start, start; case '[': p.nextc(); @@ -393,14 +393,14 @@ func (p *Parser) Term() (start, end Inst) { } p.nlpar--; p.nextc(); - bra := new(Bra); + bra := new(iBra); p.re.Add(bra); - ebra := new(Ebra); + ebra := new(iEbra); p.re.Add(ebra); bra.n = nbra; ebra.n = nbra; - if start == NULL { - if end == NULL { p.re.Error(ErrInternal) } + if start == iNULL { + if end == iNULL { p.re.Error(ErrInternal) } start = ebra } else { end.SetNext(ebra); @@ -410,7 +410,7 @@ func (p *Parser) Term() (start, end Inst) { case '\\': c = p.nextc(); switch { - case c == EOF: + case c == endOfFile: p.re.Error(ErrExtraneousBackslash); case c == 'n': c = '\n'; @@ -422,22 +422,22 @@ func (p *Parser) Term() (start, end Inst) { fallthrough; default: p.nextc(); - start = NewChar(c); + start = newChar(c); p.re.Add(start); return start, start } panic("unreachable"); } -func (p *Parser) Closure() (start, end Inst) { +func (p *parser) Closure() (start, end instr) { start, end = p.Term(); - if start == NULL { + if start == iNULL { return } switch p.c() { case '*': // (start,end)*: - alt := new(Alt); + alt := new(iAlt); p.re.Add(alt); end.SetNext(alt); // after end, do alt alt.left = start; // alternate brach: return to start @@ -445,16 +445,16 @@ func (p *Parser) Closure() (start, end Inst) { end = alt; case '+': // (start,end)+: - alt := new(Alt); + alt := new(iAlt); p.re.Add(alt); end.SetNext(alt); // after end, do alt alt.left = start; // alternate brach: return to start end = alt; // start is unchanged; end is alt case '?': // (start,end)?: - alt := new(Alt); + alt := new(iAlt); p.re.Add(alt); - nop := new(Nop); + nop := new(iNop); p.re.Add(nop); alt.left = start; // alternate branch is start alt.next = nop; // follow on to nop @@ -471,18 +471,18 @@ func (p *Parser) Closure() (start, end Inst) { return } -func (p *Parser) Concatenation() (start, end Inst) { - start, end = NULL, NULL; +func (p *parser) Concatenation() (start, end instr) { + start, end = iNULL, iNULL; for { nstart, nend := p.Closure(); switch { - case nstart == NULL: // end of this concatenation - if start == NULL { // this is the empty string - nop := p.re.Add(new(Nop)); + case nstart == iNULL: // end of this concatenation + if start == iNULL { // this is the empty string + nop := p.re.Add(new(iNop)); return nop, nop; } return; - case start == NULL: // this is first element of concatenation + case start == iNULL: // this is first element of concatenation start, end = nstart, nend; default: end.SetNext(nstart); @@ -492,7 +492,7 @@ func (p *Parser) Concatenation() (start, end Inst) { panic("unreachable"); } -func (p *Parser) Regexp() (start, end Inst) { +func (p *parser) Regexp() (start, end instr) { start, end = p.Concatenation(); for { switch p.c() { @@ -501,11 +501,11 @@ func (p *Parser) Regexp() (start, end Inst) { case '|': p.nextc(); nstart, nend := p.Concatenation(); - alt := new(Alt); + alt := new(iAlt); p.re.Add(alt); alt.left = start; alt.next = nstart; - nop := new(Nop); + nop := new(iNop); p.re.Add(nop); end.SetNext(nop); nend.SetNext(nop); @@ -515,47 +515,47 @@ func (p *Parser) Regexp() (start, end Inst) { panic("unreachable"); } -func UnNop(i Inst) Inst { - for i.Type() == NOP { +func UnNop(i instr) instr { + for i.Type() == cNOP { i = i.Next() } return i } -func (re *RE) EliminateNops() { +func (re *regExp) EliminateNops() { for i := 0; i < re.inst.Len(); i++ { - inst := re.inst.At(i).(Inst); - if inst.Type() == END { + inst := re.inst.At(i).(instr); + if inst.Type() == cEND { continue } inst.SetNext(UnNop(inst.Next())); - if inst.Type() == ALT { - alt := inst.(*Alt); + if inst.Type() == cALT { + alt := inst.(*iAlt); alt.left = UnNop(alt.left); } } } -func (re *RE) Dump() { +func (re *regExp) Dump() { for i := 0; i < re.inst.Len(); i++ { - inst := re.inst.At(i).(Inst); + inst := re.inst.At(i).(instr); print(inst.Index(), ": "); inst.Print(); - if inst.Type() != END { + if inst.Type() != cEND { print(" -> ", inst.Next().Index()) } print("\n"); } } -func (re *RE) DoParse() { - parser := NewParser(re); - start := new(Start); +func (re *regExp) DoParse() { + p := newParser(re); + start := new(iStart); re.Add(start); - s, e := parser.Regexp(); + s, e := p.Regexp(); start.next = s; re.start = start; - e.SetNext(re.Add(new(End))); + e.SetNext(re.Add(new(iEnd))); if debug { re.Dump(); @@ -571,8 +571,8 @@ func (re *RE) DoParse() { } -func Compiler(str string, ch chan *RE) { - re := new(RE); +func Compiler(str string, ch chan *regExp) { + re := new(regExp); re.expr = str; re.inst = array.New(0); re.ch = ch; @@ -589,20 +589,20 @@ export type Regexp interface { // Compile in separate goroutine; wait for result export func Compile(str string) (regexp Regexp, error *os.Error) { - ch := make(chan *RE); + ch := make(chan *regExp); go Compiler(str, ch); re := <-ch; return re, re.error } -type State struct { - inst Inst; // next instruction to execute +type state struct { + inst instr; // next instruction to execute match []int; // pairs of bracketing submatches. 0th is start,end } // Append new state to to-do list. Leftmost-longest wins so avoid // adding a state that's already active. -func AddState(s []State, inst Inst, match []int) []State { +func addState(s []state, inst instr, match []int) []state { index := inst.Index(); l := len(s); pos := match[0]; @@ -615,7 +615,7 @@ func AddState(s []State, inst Inst, match []int) []State { } } if l == cap(s) { - s1 := make([]State, 2*l)[0:l]; + s1 := make([]state, 2*l)[0:l]; for i := 0; i < l; i++ { s1[i] = s[i]; } @@ -627,12 +627,12 @@ func AddState(s []State, inst Inst, match []int) []State { return s; } -func (re *RE) DoExecute(str string, pos int) []int { - var s [2][]State; // TODO: use a vector when State values (not ptrs) can be vector elements - s[0] = make([]State, 10)[0:0]; - s[1] = make([]State, 10)[0:0]; +func (re *regExp) DoExecute(str string, pos int) []int { + var s [2][]state; // TODO: use a vector when state values (not ptrs) can be vector elements + s[0] = make([]state, 10)[0:0]; + s[1] = make([]state, 10)[0:0]; in, out := 0, 1; - var final State; + var final state; found := false; for pos <= len(str) { if !found { @@ -642,7 +642,7 @@ func (re *RE) DoExecute(str string, pos int) []int { match[i] = -1; // no match seen; catches cases like "a(b)?c" on "ac" } match[0] = pos; - s[out] = AddState(s[out], re.start.Next(), match); + s[out] = addState(s[out], re.start.Next(), match); } in, out = out, in; // old out state is new in state s[out] = s[out][0:0]; // clear out state @@ -651,60 +651,60 @@ func (re *RE) DoExecute(str string, pos int) []int { break; } charwidth := 1; - c := EOF; + c := endOfFile; if pos < len(str) { c, charwidth = sys.stringtorune(str, pos); } for i := 0; i < len(s[in]); i++ { - state := s[in][i]; + st := s[in][i]; switch s[in][i].inst.Type() { - case BOT: + case cBOT: if pos == 0 { - s[in] = AddState(s[in], state.inst.Next(), state.match) + s[in] = addState(s[in], st.inst.Next(), st.match) } - case EOT: + case cEOT: if pos == len(str) { - s[in] = AddState(s[in], state.inst.Next(), state.match) + s[in] = addState(s[in], st.inst.Next(), st.match) } - case CHAR: - if c == state.inst.(*Char).char { - s[out] = AddState(s[out], state.inst.Next(), state.match) + case cCHAR: + if c == st.inst.(*iChar).char { + s[out] = addState(s[out], st.inst.Next(), st.match) } - case CHARCLASS: - if state.inst.(*CharClass).Matches(c) { - s[out] = AddState(s[out], state.inst.Next(), state.match) + case cCHARCLASS: + if st.inst.(*iCharClass).Matches(c) { + s[out] = addState(s[out], st.inst.Next(), st.match) } - case ANY: - if c != EOF { - s[out] = AddState(s[out], state.inst.Next(), state.match) + case cANY: + if c != endOfFile { + s[out] = addState(s[out], st.inst.Next(), st.match) } - case BRA: - n := state.inst.(*Bra).n; - state.match[2*n] = pos; - s[in] = AddState(s[in], state.inst.Next(), state.match); - case EBRA: - n := state.inst.(*Ebra).n; - state.match[2*n+1] = pos; - s[in] = AddState(s[in], state.inst.Next(), state.match); - case ALT: - s[in] = AddState(s[in], state.inst.(*Alt).left, state.match); + case cBRA: + n := st.inst.(*iBra).n; + st.match[2*n] = pos; + s[in] = addState(s[in], st.inst.Next(), st.match); + case cEBRA: + n := st.inst.(*iEbra).n; + st.match[2*n+1] = pos; + s[in] = addState(s[in], st.inst.Next(), st.match); + case cALT: + s[in] = addState(s[in], st.inst.(*iAlt).left, st.match); // give other branch a copy of this match vector s1 := make([]int, 2*(re.nbra+1)); for i := 0; i < len(s1); i++ { - s1[i] = state.match[i] + s1[i] = st.match[i] } - s[in] = AddState(s[in], state.inst.Next(), s1); - case END: + s[in] = addState(s[in], st.inst.Next(), s1); + case cEND: // choose leftmost longest if !found || // first - state.match[0] < final.match[0] || // leftmost - (state.match[0] == final.match[0] && pos > final.match[1]) { // longest - final = state; + st.match[0] < final.match[0] || // leftmost + (st.match[0] == final.match[0] && pos > final.match[1]) { // longest + final = st; final.match[1] = pos; } found = true; default: - state.inst.Print(); + st.inst.Print(); panic("unknown instruction in execute"); } } @@ -714,17 +714,17 @@ func (re *RE) DoExecute(str string, pos int) []int { } -func (re *RE) Execute(s string) []int { +func (re *regExp) Execute(s string) []int { return re.DoExecute(s, 0) } -func (re *RE) Match(s string) bool { +func (re *regExp) Match(s string) bool { return len(re.DoExecute(s, 0)) > 0 } -func (re *RE) MatchStrings(s string) []string { +func (re *regExp) MatchStrings(s string) []string { r := re.DoExecute(s, 0); if r == nil { return nil |