diff options
author | Rob Pike <r@golang.org> | 2008-10-14 19:22:17 -0700 |
---|---|---|
committer | Rob Pike <r@golang.org> | 2008-10-14 19:22:17 -0700 |
commit | e1033402ca1125d3517e79f1dbabae9b763ec90c (patch) | |
tree | f06f7331ad0775154f3e460e2785d9ad897120e1 /usr/r | |
parent | e36cd17c697c44d238ae9bd15dfafda2b8a4d209 (diff) | |
download | golang-e1033402ca1125d3517e79f1dbabae9b763ec90c.tar.gz |
move regexp to lib
next cl will update names and add to build
R=rsc
DELTA=1876 (938 added, 938 deleted, 0 changed)
OCL=17149
CL=17166
Diffstat (limited to 'usr/r')
-rw-r--r-- | usr/r/regexp/Makefile | 20 | ||||
-rw-r--r-- | usr/r/regexp/main.go | 160 | ||||
-rw-r--r-- | usr/r/regexp/regexp.go | 767 |
3 files changed, 0 insertions, 947 deletions
diff --git a/usr/r/regexp/Makefile b/usr/r/regexp/Makefile deleted file mode 100644 index ac466a0f1..000000000 --- a/usr/r/regexp/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright 2009 The Go Authors. All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -A=6 -G=$(A)g -L=$(A)l - -all: main - -main: main.6 - $L -o main main.6 - -main.6: regexp.6 - -clean: - rm -f *.6 main - -%.6: %.go - $G $< diff --git a/usr/r/regexp/main.go b/usr/r/regexp/main.go deleted file mode 100644 index c89f9b557..000000000 --- a/usr/r/regexp/main.go +++ /dev/null @@ -1,160 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package main - -import ( - "os"; - "regexp"; -) - -var good_re = []string{ - ``, - `.`, - `^.$`, - `a`, - `a*`, - `a+`, - `a?`, - `a|b`, - `a*|b*`, - `(a*|b)(c*|d)`, - `[a-z]`, - `[a-abc-c\-\]\[]`, - `[a-z]+`, - `[]`, - `[abc]`, - `[^1234]`, -} - -// TODO: nice to do this with a map but we don't have an iterator -type StringError struct { - re string; - err *os.Error; -} -var bad_re = []StringError{ - StringError{ `*`, regexp.ErrBareClosure }, - StringError{ `(abc`, regexp.ErrUnmatchedLpar }, - StringError{ `abc)`, regexp.ErrUnmatchedRpar }, - StringError{ `x[a-z`, regexp.ErrUnmatchedLbkt }, - StringError{ `abc]`, regexp.ErrUnmatchedRbkt }, - StringError{ `[z-a]`, regexp.ErrBadRange }, - StringError{ `abc\`, regexp.ErrExtraneousBackslash }, - StringError{ `a**`, regexp.ErrBadClosure }, - StringError{ `a*+`, regexp.ErrBadClosure }, - StringError{ `a??`, regexp.ErrBadClosure }, - StringError{ `*`, regexp.ErrBareClosure }, - StringError{ `\x`, regexp.ErrBadBackslash }, -} - -type Vec [20]int; - -type Tester struct { - re string; - text string; - match Vec; -} - -const END = -1000 - -var matches = []Tester { - Tester{ ``, "", Vec{0,0, END} }, - Tester{ `a`, "a", Vec{0,1, END} }, - Tester{ `b`, "abc", Vec{1,2, END} }, - Tester{ `.`, "a", Vec{0,1, END} }, - Tester{ `.*`, "abcdef", Vec{0,6, END} }, - Tester{ `^abcd$`, "abcd", Vec{0,4, END} }, - Tester{ `^bcd'`, "abcdef", Vec{END} }, - Tester{ `^abcd$`, "abcde", Vec{END} }, - Tester{ `a+`, "baaab", Vec{1,4, END} }, - Tester{ `a*`, "baaab", Vec{0,0, END} }, - Tester{ `[a-z]+`, "abcd", Vec{0,4, END} }, - Tester{ `[^a-z]+`, "ab1234cd", Vec{2,6, END} }, - Tester{ `[a\-\]z]+`, "az]-bcz", Vec{0,4, END} }, - Tester{ `[日本語]+`, "日本語日本語", Vec{0,18, END} }, - Tester{ `()`, "", Vec{0,0, 0,0, END} }, - Tester{ `(a)`, "a", Vec{0,1, 0,1, END} }, - Tester{ `(.)(.)`, "日a", Vec{0,4, 0,3, 3,4, END} }, - Tester{ `(.*)`, "", Vec{0,0, 0,0, END} }, - Tester{ `(.*)`, "abcd", Vec{0,4, 0,4, END} }, - Tester{ `(..)(..)`, "abcd", Vec{0,4, 0,2, 2,4, END} }, - Tester{ `(([^xyz]*)(d))`, "abcd", Vec{0,4, 0,4, 0,3, 3,4, END} }, - Tester{ `((a|b|c)*(d))`, "abcd", Vec{0,4, 0,4, 2,3, 3,4, END} }, - Tester{ `(((a|b|c)*)(d))`, "abcd", Vec{0,4, 0,4, 0,3, 2,3, 3,4, END} }, - Tester{ `a*(|(b))c*`, "aacc", Vec{0,4, 2,2, -1,-1, END} }, -} - -func Compile(expr string, error *os.Error) regexp.Regexp { - re, err := regexp.Compile(expr); - if err != error { - print("compiling `", expr, "`; unexpected error: ", err.String(), "\n"); - sys.exit(1); - } - return re -} - -func MarkedLen(m *[] int) int { - if m == nil { - return 0 - } - var i int; - for i = 0; i < len(m) && m[i] != END; i = i+2 { - } - return i -} - -func PrintVec(m *[] int) { - l := MarkedLen(m); - if l == 0 { - print("<no match>"); - } else { - for i := 0; i < l && m[i] != END; i = i+2 { - print(m[i], ",", m[i+1], " ") - } - } -} - -func Equal(m1, m2 *[]int) bool { - l := MarkedLen(m1); - if l != MarkedLen(m2) { - return false - } - for i := 0; i < l; i++ { - if m1[i] != m2[i] { - return false - } - } - return true -} - -func Match(expr string, str string, match *[]int) { - re := Compile(expr, nil); - m := re.Execute(str); - if !Equal(m, match) { - print("failure on `", expr, "` matching `", str, "`:\n"); - PrintVec(m); - print("\nshould be:\n"); - PrintVec(match); - print("\n"); - sys.exit(1); - } -} - -func main() { - //regexp.debug = true; - if sys.argc() > 1 { - Compile(sys.argv(1), nil); - sys.exit(0); - } - for i := 0; i < len(good_re); i++ { - Compile(good_re[i], nil); - } - for i := 0; i < len(bad_re); i++ { - Compile(bad_re[i].re, bad_re[i].err) - } - for i := 0; i < len(matches); i++ { - t := &matches[i]; - Match(t.re, t.text, &t.match) - } -} diff --git a/usr/r/regexp/regexp.go b/usr/r/regexp/regexp.go deleted file mode 100644 index 6535e6ef4..000000000 --- a/usr/r/regexp/regexp.go +++ /dev/null @@ -1,767 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Regular expression library. - -package regexp - -import ( - "os"; - "vector"; -) - -export var debug = false; - - -export var ErrUnimplemented = os.NewError("unimplemented"); -export var ErrInternal = os.NewError("internal error"); -export var ErrUnmatchedLpar = os.NewError("unmatched '('"); -export var ErrUnmatchedRpar = os.NewError("unmatched ')'"); -export var ErrUnmatchedLbkt = os.NewError("unmatched '['"); -export var ErrUnmatchedRbkt = os.NewError("unmatched ']'"); -export var ErrBadRange = os.NewError("bad range in character class"); -export var ErrExtraneousBackslash = os.NewError("extraneous backslash"); -export var ErrBadClosure = os.NewError("repeated closure (**, ++, etc.)"); -export var ErrBareClosure = os.NewError("closure applies to nothing"); -export var ErrBadBackslash = os.NewError("illegal backslash escape"); - -// An instruction executed by the NFA -type Inst interface { - Type() int; // the type of this instruction: CHAR, ANY, etc. - Next() Inst; // the instruction to execute after this one - SetNext(i Inst); - Index() int; - SetIndex(i int); - Print(); -} - -type RE struct { - expr string; // the original expression - ch *chan<- *RE; // reply channel when we're done - error *os.Error; // compile- or run-time error; nil if OK - inst *vector.Vector; - start Inst; - nbra int; // number of brackets in expression, for subexpressions -} - -const ( - START // beginning of program - = iota; - END; // end of program: success - BOT; // '^' beginning of text - EOT; // '$' end of text - CHAR; // 'a' regular character - CHARCLASS; // [a-z] character class - ANY; // '.' any character - BRA; // '(' parenthesized expression - EBRA; // ')'; end of '(' parenthesized expression - ALT; // '|' alternation - NOP; // do nothing; makes it easy to link without patching -) - -// --- START start of program -type Start struct { - next Inst; - index int; -} - -func (start *Start) Type() int { return START } -func (start *Start) Next() Inst { return start.next } -func (start *Start) SetNext(i Inst) { start.next = i } -func (start *Start) Index() int { return start.index } -func (start *Start) SetIndex(i int) { start.index = i } -func (start *Start) Print() { print("start") } - -// --- END end of program -type End struct { - next Inst; - index int; -} - -func (end *End) Type() int { return END } -func (end *End) Next() Inst { return end.next } -func (end *End) SetNext(i Inst) { end.next = i } -func (end *End) Index() int { return end.index } -func (end *End) SetIndex(i int) { end.index = i } -func (end *End) Print() { print("end") } - -// --- BOT beginning of text -type Bot struct { - next Inst; - index int; -} - -func (bot *Bot) Type() int { return BOT } -func (bot *Bot) Next() Inst { return bot.next } -func (bot *Bot) SetNext(i Inst) { bot.next = i } -func (bot *Bot) Index() int { return bot.index } -func (bot *Bot) SetIndex(i int) { bot.index = i } -func (bot *Bot) Print() { print("bot") } - -// --- EOT end of text -type Eot struct { - next Inst; - index int; -} - -func (eot *Eot) Type() int { return EOT } -func (eot *Eot) Next() Inst { return eot.next } -func (eot *Eot) SetNext(i Inst) { eot.next = i } -func (eot *Eot) Index() int { return eot.index } -func (eot *Eot) SetIndex(i int) { eot.index = i } -func (eot *Eot) Print() { print("eot") } - -// --- CHAR a regular character -type Char struct { - next Inst; - index int; - char int; -} - -func (char *Char) Type() int { return CHAR } -func (char *Char) Next() Inst { return char.next } -func (char *Char) SetNext(i Inst) { char.next = i } -func (char *Char) Index() int { return char.index } -func (char *Char) SetIndex(i int) { char.index = i } -func (char *Char) Print() { print("char ", string(char.char)) } - -func NewChar(char int) *Char { - c := new(Char); - c.char = char; - return c; -} - -// --- CHARCLASS [a-z] - -type CClassChar int; // BUG: Shouldn't be necessary but 6g won't put ints into vectors - -type CharClass struct { - next Inst; - index int; - char int; - negate bool; // is character class negated? ([^a-z]) - // Vector of CClassChar, stored pairwise: [a-z] is (a,z); x is (x,x): - ranges *vector.Vector; -} - -func (cclass *CharClass) Type() int { return CHARCLASS } -func (cclass *CharClass) Next() Inst { return cclass.next } -func (cclass *CharClass) SetNext(i Inst) { cclass.next = i } -func (cclass *CharClass) Index() int { return cclass.index } -func (cclass *CharClass) SetIndex(i int) { cclass.index = i } -func (cclass *CharClass) Print() { - print("charclass"); - if cclass.negate { - print(" (negated)"); - } - for i := 0; i < cclass.ranges.Len(); i += 2 { - l := cclass.ranges.At(i).(CClassChar); - r := cclass.ranges.At(i+1).(CClassChar); - if l == r { - print(" [", string(l), "]"); - } else { - print(" [", string(l), "-", string(r), "]"); - } - } -} - -func (cclass *CharClass) AddRange(a, b CClassChar) { - // range is a through b inclusive - cclass.ranges.Append(a); - cclass.ranges.Append(b); -} - -func (cclass *CharClass) Matches(c int) bool { - for i := 0; i < cclass.ranges.Len(); i = i+2 { - min := cclass.ranges.At(i).(CClassChar); - max := cclass.ranges.At(i+1).(CClassChar); - if min <= c && c <= max { - return !cclass.negate - } - } - return cclass.negate -} - -func NewCharClass() *CharClass { - c := new(CharClass); - c.ranges = vector.New(); - return c; -} - -// --- ANY any character -type Any struct { - next Inst; - index int; -} - -func (any *Any) Type() int { return ANY } -func (any *Any) Next() Inst { return any.next } -func (any *Any) SetNext(i Inst) { any.next = i } -func (any *Any) Index() int { return any.index } -func (any *Any) SetIndex(i int) { any.index = i } -func (any *Any) Print() { print("any") } - -// --- BRA parenthesized expression -type Bra struct { - next Inst; - index int; - n int; // subexpression number -} - -func (bra *Bra) Type() int { return BRA } -func (bra *Bra) Next() Inst { return bra.next } -func (bra *Bra) SetNext(i Inst) { bra.next = i } -func (bra *Bra) Index() int { return bra.index } -func (bra *Bra) SetIndex(i int) { bra.index = i } -func (bra *Bra) Print() { print("bra"); } - -// --- EBRA end of parenthesized expression -type Ebra struct { - next Inst; - index int; - n int; // subexpression number -} - -func (ebra *Ebra) Type() int { return EBRA } -func (ebra *Ebra) Next() Inst { return ebra.next } -func (ebra *Ebra) SetNext(i Inst) { ebra.next = i } -func (ebra *Ebra) Index() int { return ebra.index } -func (ebra *Ebra) SetIndex(i int) { ebra.index = i } -func (ebra *Ebra) Print() { print("ebra ", ebra.n); } - -// --- ALT alternation -type Alt struct { - next Inst; - index int; - left Inst; // other branch -} - -func (alt *Alt) Type() int { return ALT } -func (alt *Alt) Next() Inst { return alt.next } -func (alt *Alt) SetNext(i Inst) { alt.next = i } -func (alt *Alt) Index() int { return alt.index } -func (alt *Alt) SetIndex(i int) { alt.index = i } -func (alt *Alt) Print() { print("alt(", alt.left.Index(), ")"); } - -// --- NOP no operation -type Nop struct { - next Inst; - index int; -} - -func (nop *Nop) Type() int { return NOP } -func (nop *Nop) Next() Inst { return nop.next } -func (nop *Nop) SetNext(i Inst) { nop.next = i } -func (nop *Nop) Index() int { return nop.index } -func (nop *Nop) SetIndex(i int) { nop.index = i } -func (nop *Nop) Print() { print("nop") } - -// report error and exit compiling/executing goroutine -func (re *RE) Error(err *os.Error) { - re.error = err; - re.ch <- re; - sys.goexit(); -} - -func (re *RE) Add(i Inst) Inst { - i.SetIndex(re.inst.Len()); - re.inst.Append(i); - return i; -} - -type Parser struct { - re *RE; - nlpar int; // number of unclosed lpars - pos int; - ch int; -} - -const EOF = -1 - -func (p *Parser) c() int { - return p.ch; -} - -func (p *Parser) nextc() int { - if p.pos >= len(p.re.expr) { - p.ch = EOF - } else { - c, w := sys.stringtorune(p.re.expr, p.pos); - p.ch = c; - p.pos += w; - } - return p.ch; -} - -func NewParser(re *RE) *Parser { - parser := new(Parser); - parser.re = re; - parser.nextc(); // load p.ch - return parser; -} - -/* - -Grammar: - regexp: - concatenation { '|' concatenation } - concatenation: - { closure } - closure: - term [ '*' | '+' | '?' ] - term: - '^' - '$' - '.' - character - '[' character-ranges ']' - '(' regexp ')' - -*/ - -func (p *Parser) Regexp() (start, end Inst) - -var NULL Inst -type BUGinter interface{} - -func special(c int) bool { - s := `\.+*?()|[]`; - for i := 0; i < len(s); i++ { - if c == int(s[i]) { - return true - } - } - return false -} - -func specialcclass(c int) bool { - s := `\-[]`; - for i := 0; i < len(s); i++ { - if c == int(s[i]) { - return true - } - } - return false -} - -func (p *Parser) CharClass() Inst { - cc := NewCharClass(); - p.re.Add(cc); - if p.c() == '^' { - cc.negate = true; - p.nextc(); - } - left := -1; - for { - switch c := p.c(); c { - case ']', EOF: - if left >= 0 { - p.re.Error(ErrBadRange); - } - return cc; - case '-': // do this before backslash processing - p.re.Error(ErrBadRange); - case '\\': - c = p.nextc(); - switch { - case c == EOF: - p.re.Error(ErrExtraneousBackslash); - case c == 'n': - c = '\n'; - case specialcclass(c): - // c is as delivered - default: - p.re.Error(ErrBadBackslash); - } - fallthrough; - default: - p.nextc(); - switch { - case left < 0: // first of pair - if p.c() == '-' { // range - p.nextc(); - left = c; - } else { // single char - cc.AddRange(c, c); - } - case left <= c: // second of pair - cc.AddRange(left, c); - left = -1; - default: - p.re.Error(ErrBadRange); - } - } - } - return NULL -} - -func (p *Parser) Term() (start, end Inst) { - switch c := p.c(); c { - case '|', EOF: - return NULL, NULL; - case '*', '+', '|': - p.re.Error(ErrBareClosure); - case ')': - if p.nlpar == 0 { - p.re.Error(ErrUnmatchedRpar); - } - return NULL, NULL; - case ']': - p.re.Error(ErrUnmatchedRbkt); - case '^': - p.nextc(); - start = p.re.Add(new(Bot)); - return start, start; - case '$': - p.nextc(); - start = p.re.Add(new(Eot)); - return start, start; - case '.': - p.nextc(); - start = p.re.Add(new(Any)); - return start, start; - case '[': - p.nextc(); - start = p.CharClass(); - if p.c() != ']' { - p.re.Error(ErrUnmatchedLbkt); - } - p.nextc(); - return start, start; - case '(': - p.nextc(); - p.nlpar++; - p.re.nbra++; // increment first so first subexpr is \1 - nbra := p.re.nbra; - start, end = p.Regexp(); - if p.c() != ')' { - p.re.Error(ErrUnmatchedLpar); - } - p.nlpar--; - p.nextc(); - bra := new(Bra); - p.re.Add(bra); - ebra := new(Ebra); - p.re.Add(ebra); - bra.n = nbra; - ebra.n = nbra; - if start == NULL { - if end == NULL { p.re.Error(ErrInternal) } - start = ebra - } else { - end.SetNext(ebra); - } - bra.SetNext(start); - return bra, ebra; - case '\\': - c = p.nextc(); - switch { - case c == EOF: - p.re.Error(ErrExtraneousBackslash); - case c == 'n': - c = '\n'; - case special(c): - // c is as delivered - default: - p.re.Error(ErrBadBackslash); - } - fallthrough; - default: - p.nextc(); - start = NewChar(c); - p.re.Add(start); - return start, start - } - panic("unreachable"); -} - -func (p *Parser) Closure() (start, end Inst) { - start, end = p.Term(); - if start == NULL { - return - } - switch p.c() { - case '*': - // (start,end)*: - alt := new(Alt); - p.re.Add(alt); - end.SetNext(alt); // after end, do alt - alt.left = start; // alternate brach: return to start - start = alt; // alt becomes new (start, end) - end = alt; - case '+': - // (start,end)+: - alt := new(Alt); - p.re.Add(alt); - end.SetNext(alt); // after end, do alt - alt.left = start; // alternate brach: return to start - end = alt; // start is unchanged; end is alt - case '?': - // (start,end)?: - alt := new(Alt); - p.re.Add(alt); - nop := new(Nop); - p.re.Add(nop); - alt.left = start; // alternate branch is start - alt.next = nop; // follow on to nop - end.SetNext(nop); // after end, go to nop - start = alt; // start is now alt - end = nop; // end is nop pointed to by both branches - default: - return - } - switch p.nextc() { - case '*', '+', '?': - p.re.Error(ErrBadClosure); - } - return -} - -func (p *Parser) Concatenation() (start, end Inst) { - start, end = NULL, NULL; - for { - nstart, nend := p.Closure(); - switch { - case nstart == NULL: // end of this concatenation - if start == NULL { // this is the empty string - nop := p.re.Add(new(Nop)); - return nop, nop; - } - return; - case start == NULL: // this is first element of concatenation - start, end = nstart, nend; - default: - end.SetNext(nstart); - end = nend; - } - } - panic("unreachable"); -} - -func (p *Parser) Regexp() (start, end Inst) { - start, end = p.Concatenation(); - for { - switch p.c() { - default: - return; - case '|': - p.nextc(); - nstart, nend := p.Concatenation(); - alt := new(Alt); - p.re.Add(alt); - alt.left = start; - alt.next = nstart; - nop := new(Nop); - p.re.Add(nop); - end.SetNext(nop); - nend.SetNext(nop); - start, end = alt, nop; - } - } - panic("unreachable"); -} - -func UnNop(i Inst) Inst { - for i.Type() == NOP { - i = i.Next() - } - return i -} - -func (re *RE) EliminateNops() { - for i := 0; i < re.inst.Len(); i++ { - inst := re.inst.At(i).(Inst); - if inst.Type() == END { - continue - } - inst.SetNext(UnNop(inst.Next())); - if inst.Type() == ALT { - alt := inst.(*Alt); - alt.left = UnNop(alt.left); - } - } -} - -func (re *RE) Dump() { - for i := 0; i < re.inst.Len(); i++ { - inst := re.inst.At(i).(Inst); - print(inst.Index(), ": "); - inst.Print(); - if inst.Type() != END { - print(" -> ", inst.Next().Index()) - } - print("\n"); - } -} - -func (re *RE) DoParse() { - parser := NewParser(re); - start := new(Start); - re.Add(start); - s, e := parser.Regexp(); - start.next = s; - re.start = start; - e.SetNext(re.Add(new(End))); - - if debug { - re.Dump(); - println(); - } - - re.EliminateNops(); - - if debug { - re.Dump(); - println(); - } -} - - -func Compiler(str string, ch *chan *RE) { - re := new(RE); - re.expr = str; - re.inst = vector.New(); - re.ch = ch; - re.DoParse(); - ch <- re; -} - -// Public interface has only execute functionality (not yet implemented) -export type Regexp interface { - Execute(s string) *[]int -} - -// Compile in separate goroutine; wait for result -export func Compile(str string) (regexp Regexp, error *os.Error) { - ch := new(chan *RE); - go Compiler(str, ch); - re := <-ch; - return re, re.error -} - -type State struct { - inst Inst; // next instruction to execute - match *[]int; // pairs of bracketing submatches. 0th is start,end -} - -// Append new state to to-do list. Leftmost-longest wins so avoid -// adding a state that's already active. -func AddState(s *[]State, inst Inst, match *[]int) *[]State { - index := inst.Index(); - l := len(s); - pos := match[0]; - // TODO: Once the state is a vector and we can do insert, have inputs always - // go in order correctly and this "earlier" test is never necessary, - for i := 0; i < l; i++ { - if s[i].inst.Index() == index && // same instruction - s[i].match[0] < pos { // earlier match already going; lefmost wins - return s - } - } - if l == cap(s) { - s1 := new([]State, 2*l)[0:l]; - for i := 0; i < l; i++ { - s1[i] = s[i]; - } - s = s1; - } - s = s[0:l+1]; - s[l].inst = inst; - s[l].match = match; - return s; -} - -func (re *RE) DoExecute(str string, pos int) *[]int { - var s [2]*[]State; // TODO: use a vector when State values (not ptrs) can be vector elements - s[0] = new([]State, 10)[0:0]; - s[1] = new([]State, 10)[0:0]; - in, out := 0, 1; - var final State; - found := false; - for pos <= len(str) { - if !found { - // prime the pump if we haven't seen a match yet - match := new([]int, 2*(re.nbra+1)); - for i := 0; i < len(match); i++ { - match[i] = -1; // no match seen; catches cases like "a(b)?c" on "ac" - } - match[0] = pos; - s[out] = AddState(s[out], re.start.Next(), match); - } - in, out = out, in; // old out state is new in state - s[out] = s[out][0:0]; // clear out state - if len(s[in]) == 0 { - // machine has completed - break; - } - charwidth := 1; - c := EOF; - if pos < len(str) { - c, charwidth = sys.stringtorune(str, pos); - } - for i := 0; i < len(s[in]); i++ { - state := s[in][i]; - switch s[in][i].inst.Type() { - case BOT: - if pos == 0 { - s[in] = AddState(s[in], state.inst.Next(), state.match) - } - case EOT: - if pos == len(str) { - s[in] = AddState(s[in], state.inst.Next(), state.match) - } - case CHAR: - if c == state.inst.(*Char).char { - s[out] = AddState(s[out], state.inst.Next(), state.match) - } - case CHARCLASS: - if state.inst.(*CharClass).Matches(c) { - s[out] = AddState(s[out], state.inst.Next(), state.match) - } - case ANY: - if c != EOF { - s[out] = AddState(s[out], state.inst.Next(), state.match) - } - case BRA: - n := state.inst.(*Bra).n; - state.match[2*n] = pos; - s[in] = AddState(s[in], state.inst.Next(), state.match); - case EBRA: - n := state.inst.(*Ebra).n; - state.match[2*n+1] = pos; - s[in] = AddState(s[in], state.inst.Next(), state.match); - case ALT: - s[in] = AddState(s[in], state.inst.(*Alt).left, state.match); - // give other branch a copy of this match vector - s1 := new([]int, 2*(re.nbra+1)); - for i := 0; i < len(s1); i++ { - s1[i] = state.match[i] - } - s[in] = AddState(s[in], state.inst.Next(), s1); - case END: - // choose leftmost longest - if !found || // first - state.match[0] < final.match[0] || // leftmost - (state.match[0] == final.match[0] && pos > final.match[1]) { // longest - final = state; - final.match[1] = pos; - } - found = true; - default: - state.inst.Print(); - panic("unknown instruction in execute"); - } - } - pos += charwidth; - } - if !found { - return nil - } - return final.match; -} - - -func (re *RE) Execute(s string) *[]int { - return re.DoExecute(s, 0) -} |