summaryrefslogtreecommitdiff
path: root/usr/r
diff options
context:
space:
mode:
authorRob Pike <r@golang.org>2008-10-14 19:22:17 -0700
committerRob Pike <r@golang.org>2008-10-14 19:22:17 -0700
commite1033402ca1125d3517e79f1dbabae9b763ec90c (patch)
treef06f7331ad0775154f3e460e2785d9ad897120e1 /usr/r
parente36cd17c697c44d238ae9bd15dfafda2b8a4d209 (diff)
downloadgolang-e1033402ca1125d3517e79f1dbabae9b763ec90c.tar.gz
move regexp to lib
next cl will update names and add to build R=rsc DELTA=1876 (938 added, 938 deleted, 0 changed) OCL=17149 CL=17166
Diffstat (limited to 'usr/r')
-rw-r--r--usr/r/regexp/Makefile20
-rw-r--r--usr/r/regexp/main.go160
-rw-r--r--usr/r/regexp/regexp.go767
3 files changed, 0 insertions, 947 deletions
diff --git a/usr/r/regexp/Makefile b/usr/r/regexp/Makefile
deleted file mode 100644
index ac466a0f1..000000000
--- a/usr/r/regexp/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright 2009 The Go Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style
-# license that can be found in the LICENSE file.
-
-A=6
-G=$(A)g
-L=$(A)l
-
-all: main
-
-main: main.6
- $L -o main main.6
-
-main.6: regexp.6
-
-clean:
- rm -f *.6 main
-
-%.6: %.go
- $G $<
diff --git a/usr/r/regexp/main.go b/usr/r/regexp/main.go
deleted file mode 100644
index c89f9b557..000000000
--- a/usr/r/regexp/main.go
+++ /dev/null
@@ -1,160 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-import (
- "os";
- "regexp";
-)
-
-var good_re = []string{
- ``,
- `.`,
- `^.$`,
- `a`,
- `a*`,
- `a+`,
- `a?`,
- `a|b`,
- `a*|b*`,
- `(a*|b)(c*|d)`,
- `[a-z]`,
- `[a-abc-c\-\]\[]`,
- `[a-z]+`,
- `[]`,
- `[abc]`,
- `[^1234]`,
-}
-
-// TODO: nice to do this with a map but we don't have an iterator
-type StringError struct {
- re string;
- err *os.Error;
-}
-var bad_re = []StringError{
- StringError{ `*`, regexp.ErrBareClosure },
- StringError{ `(abc`, regexp.ErrUnmatchedLpar },
- StringError{ `abc)`, regexp.ErrUnmatchedRpar },
- StringError{ `x[a-z`, regexp.ErrUnmatchedLbkt },
- StringError{ `abc]`, regexp.ErrUnmatchedRbkt },
- StringError{ `[z-a]`, regexp.ErrBadRange },
- StringError{ `abc\`, regexp.ErrExtraneousBackslash },
- StringError{ `a**`, regexp.ErrBadClosure },
- StringError{ `a*+`, regexp.ErrBadClosure },
- StringError{ `a??`, regexp.ErrBadClosure },
- StringError{ `*`, regexp.ErrBareClosure },
- StringError{ `\x`, regexp.ErrBadBackslash },
-}
-
-type Vec [20]int;
-
-type Tester struct {
- re string;
- text string;
- match Vec;
-}
-
-const END = -1000
-
-var matches = []Tester {
- Tester{ ``, "", Vec{0,0, END} },
- Tester{ `a`, "a", Vec{0,1, END} },
- Tester{ `b`, "abc", Vec{1,2, END} },
- Tester{ `.`, "a", Vec{0,1, END} },
- Tester{ `.*`, "abcdef", Vec{0,6, END} },
- Tester{ `^abcd$`, "abcd", Vec{0,4, END} },
- Tester{ `^bcd'`, "abcdef", Vec{END} },
- Tester{ `^abcd$`, "abcde", Vec{END} },
- Tester{ `a+`, "baaab", Vec{1,4, END} },
- Tester{ `a*`, "baaab", Vec{0,0, END} },
- Tester{ `[a-z]+`, "abcd", Vec{0,4, END} },
- Tester{ `[^a-z]+`, "ab1234cd", Vec{2,6, END} },
- Tester{ `[a\-\]z]+`, "az]-bcz", Vec{0,4, END} },
- Tester{ `[日本語]+`, "日本語日本語", Vec{0,18, END} },
- Tester{ `()`, "", Vec{0,0, 0,0, END} },
- Tester{ `(a)`, "a", Vec{0,1, 0,1, END} },
- Tester{ `(.)(.)`, "日a", Vec{0,4, 0,3, 3,4, END} },
- Tester{ `(.*)`, "", Vec{0,0, 0,0, END} },
- Tester{ `(.*)`, "abcd", Vec{0,4, 0,4, END} },
- Tester{ `(..)(..)`, "abcd", Vec{0,4, 0,2, 2,4, END} },
- Tester{ `(([^xyz]*)(d))`, "abcd", Vec{0,4, 0,4, 0,3, 3,4, END} },
- Tester{ `((a|b|c)*(d))`, "abcd", Vec{0,4, 0,4, 2,3, 3,4, END} },
- Tester{ `(((a|b|c)*)(d))`, "abcd", Vec{0,4, 0,4, 0,3, 2,3, 3,4, END} },
- Tester{ `a*(|(b))c*`, "aacc", Vec{0,4, 2,2, -1,-1, END} },
-}
-
-func Compile(expr string, error *os.Error) regexp.Regexp {
- re, err := regexp.Compile(expr);
- if err != error {
- print("compiling `", expr, "`; unexpected error: ", err.String(), "\n");
- sys.exit(1);
- }
- return re
-}
-
-func MarkedLen(m *[] int) int {
- if m == nil {
- return 0
- }
- var i int;
- for i = 0; i < len(m) && m[i] != END; i = i+2 {
- }
- return i
-}
-
-func PrintVec(m *[] int) {
- l := MarkedLen(m);
- if l == 0 {
- print("<no match>");
- } else {
- for i := 0; i < l && m[i] != END; i = i+2 {
- print(m[i], ",", m[i+1], " ")
- }
- }
-}
-
-func Equal(m1, m2 *[]int) bool {
- l := MarkedLen(m1);
- if l != MarkedLen(m2) {
- return false
- }
- for i := 0; i < l; i++ {
- if m1[i] != m2[i] {
- return false
- }
- }
- return true
-}
-
-func Match(expr string, str string, match *[]int) {
- re := Compile(expr, nil);
- m := re.Execute(str);
- if !Equal(m, match) {
- print("failure on `", expr, "` matching `", str, "`:\n");
- PrintVec(m);
- print("\nshould be:\n");
- PrintVec(match);
- print("\n");
- sys.exit(1);
- }
-}
-
-func main() {
- //regexp.debug = true;
- if sys.argc() > 1 {
- Compile(sys.argv(1), nil);
- sys.exit(0);
- }
- for i := 0; i < len(good_re); i++ {
- Compile(good_re[i], nil);
- }
- for i := 0; i < len(bad_re); i++ {
- Compile(bad_re[i].re, bad_re[i].err)
- }
- for i := 0; i < len(matches); i++ {
- t := &matches[i];
- Match(t.re, t.text, &t.match)
- }
-}
diff --git a/usr/r/regexp/regexp.go b/usr/r/regexp/regexp.go
deleted file mode 100644
index 6535e6ef4..000000000
--- a/usr/r/regexp/regexp.go
+++ /dev/null
@@ -1,767 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Regular expression library.
-
-package regexp
-
-import (
- "os";
- "vector";
-)
-
-export var debug = false;
-
-
-export var ErrUnimplemented = os.NewError("unimplemented");
-export var ErrInternal = os.NewError("internal error");
-export var ErrUnmatchedLpar = os.NewError("unmatched '('");
-export var ErrUnmatchedRpar = os.NewError("unmatched ')'");
-export var ErrUnmatchedLbkt = os.NewError("unmatched '['");
-export var ErrUnmatchedRbkt = os.NewError("unmatched ']'");
-export var ErrBadRange = os.NewError("bad range in character class");
-export var ErrExtraneousBackslash = os.NewError("extraneous backslash");
-export var ErrBadClosure = os.NewError("repeated closure (**, ++, etc.)");
-export var ErrBareClosure = os.NewError("closure applies to nothing");
-export var ErrBadBackslash = os.NewError("illegal backslash escape");
-
-// An instruction executed by the NFA
-type Inst interface {
- Type() int; // the type of this instruction: CHAR, ANY, etc.
- Next() Inst; // the instruction to execute after this one
- SetNext(i Inst);
- Index() int;
- SetIndex(i int);
- Print();
-}
-
-type RE struct {
- expr string; // the original expression
- ch *chan<- *RE; // reply channel when we're done
- error *os.Error; // compile- or run-time error; nil if OK
- inst *vector.Vector;
- start Inst;
- nbra int; // number of brackets in expression, for subexpressions
-}
-
-const (
- START // beginning of program
- = iota;
- END; // end of program: success
- BOT; // '^' beginning of text
- EOT; // '$' end of text
- CHAR; // 'a' regular character
- CHARCLASS; // [a-z] character class
- ANY; // '.' any character
- BRA; // '(' parenthesized expression
- EBRA; // ')'; end of '(' parenthesized expression
- ALT; // '|' alternation
- NOP; // do nothing; makes it easy to link without patching
-)
-
-// --- START start of program
-type Start struct {
- next Inst;
- index int;
-}
-
-func (start *Start) Type() int { return START }
-func (start *Start) Next() Inst { return start.next }
-func (start *Start) SetNext(i Inst) { start.next = i }
-func (start *Start) Index() int { return start.index }
-func (start *Start) SetIndex(i int) { start.index = i }
-func (start *Start) Print() { print("start") }
-
-// --- END end of program
-type End struct {
- next Inst;
- index int;
-}
-
-func (end *End) Type() int { return END }
-func (end *End) Next() Inst { return end.next }
-func (end *End) SetNext(i Inst) { end.next = i }
-func (end *End) Index() int { return end.index }
-func (end *End) SetIndex(i int) { end.index = i }
-func (end *End) Print() { print("end") }
-
-// --- BOT beginning of text
-type Bot struct {
- next Inst;
- index int;
-}
-
-func (bot *Bot) Type() int { return BOT }
-func (bot *Bot) Next() Inst { return bot.next }
-func (bot *Bot) SetNext(i Inst) { bot.next = i }
-func (bot *Bot) Index() int { return bot.index }
-func (bot *Bot) SetIndex(i int) { bot.index = i }
-func (bot *Bot) Print() { print("bot") }
-
-// --- EOT end of text
-type Eot struct {
- next Inst;
- index int;
-}
-
-func (eot *Eot) Type() int { return EOT }
-func (eot *Eot) Next() Inst { return eot.next }
-func (eot *Eot) SetNext(i Inst) { eot.next = i }
-func (eot *Eot) Index() int { return eot.index }
-func (eot *Eot) SetIndex(i int) { eot.index = i }
-func (eot *Eot) Print() { print("eot") }
-
-// --- CHAR a regular character
-type Char struct {
- next Inst;
- index int;
- char int;
-}
-
-func (char *Char) Type() int { return CHAR }
-func (char *Char) Next() Inst { return char.next }
-func (char *Char) SetNext(i Inst) { char.next = i }
-func (char *Char) Index() int { return char.index }
-func (char *Char) SetIndex(i int) { char.index = i }
-func (char *Char) Print() { print("char ", string(char.char)) }
-
-func NewChar(char int) *Char {
- c := new(Char);
- c.char = char;
- return c;
-}
-
-// --- CHARCLASS [a-z]
-
-type CClassChar int; // BUG: Shouldn't be necessary but 6g won't put ints into vectors
-
-type CharClass struct {
- next Inst;
- index int;
- char int;
- negate bool; // is character class negated? ([^a-z])
- // Vector of CClassChar, stored pairwise: [a-z] is (a,z); x is (x,x):
- ranges *vector.Vector;
-}
-
-func (cclass *CharClass) Type() int { return CHARCLASS }
-func (cclass *CharClass) Next() Inst { return cclass.next }
-func (cclass *CharClass) SetNext(i Inst) { cclass.next = i }
-func (cclass *CharClass) Index() int { return cclass.index }
-func (cclass *CharClass) SetIndex(i int) { cclass.index = i }
-func (cclass *CharClass) Print() {
- print("charclass");
- if cclass.negate {
- print(" (negated)");
- }
- for i := 0; i < cclass.ranges.Len(); i += 2 {
- l := cclass.ranges.At(i).(CClassChar);
- r := cclass.ranges.At(i+1).(CClassChar);
- if l == r {
- print(" [", string(l), "]");
- } else {
- print(" [", string(l), "-", string(r), "]");
- }
- }
-}
-
-func (cclass *CharClass) AddRange(a, b CClassChar) {
- // range is a through b inclusive
- cclass.ranges.Append(a);
- cclass.ranges.Append(b);
-}
-
-func (cclass *CharClass) Matches(c int) bool {
- for i := 0; i < cclass.ranges.Len(); i = i+2 {
- min := cclass.ranges.At(i).(CClassChar);
- max := cclass.ranges.At(i+1).(CClassChar);
- if min <= c && c <= max {
- return !cclass.negate
- }
- }
- return cclass.negate
-}
-
-func NewCharClass() *CharClass {
- c := new(CharClass);
- c.ranges = vector.New();
- return c;
-}
-
-// --- ANY any character
-type Any struct {
- next Inst;
- index int;
-}
-
-func (any *Any) Type() int { return ANY }
-func (any *Any) Next() Inst { return any.next }
-func (any *Any) SetNext(i Inst) { any.next = i }
-func (any *Any) Index() int { return any.index }
-func (any *Any) SetIndex(i int) { any.index = i }
-func (any *Any) Print() { print("any") }
-
-// --- BRA parenthesized expression
-type Bra struct {
- next Inst;
- index int;
- n int; // subexpression number
-}
-
-func (bra *Bra) Type() int { return BRA }
-func (bra *Bra) Next() Inst { return bra.next }
-func (bra *Bra) SetNext(i Inst) { bra.next = i }
-func (bra *Bra) Index() int { return bra.index }
-func (bra *Bra) SetIndex(i int) { bra.index = i }
-func (bra *Bra) Print() { print("bra"); }
-
-// --- EBRA end of parenthesized expression
-type Ebra struct {
- next Inst;
- index int;
- n int; // subexpression number
-}
-
-func (ebra *Ebra) Type() int { return EBRA }
-func (ebra *Ebra) Next() Inst { return ebra.next }
-func (ebra *Ebra) SetNext(i Inst) { ebra.next = i }
-func (ebra *Ebra) Index() int { return ebra.index }
-func (ebra *Ebra) SetIndex(i int) { ebra.index = i }
-func (ebra *Ebra) Print() { print("ebra ", ebra.n); }
-
-// --- ALT alternation
-type Alt struct {
- next Inst;
- index int;
- left Inst; // other branch
-}
-
-func (alt *Alt) Type() int { return ALT }
-func (alt *Alt) Next() Inst { return alt.next }
-func (alt *Alt) SetNext(i Inst) { alt.next = i }
-func (alt *Alt) Index() int { return alt.index }
-func (alt *Alt) SetIndex(i int) { alt.index = i }
-func (alt *Alt) Print() { print("alt(", alt.left.Index(), ")"); }
-
-// --- NOP no operation
-type Nop struct {
- next Inst;
- index int;
-}
-
-func (nop *Nop) Type() int { return NOP }
-func (nop *Nop) Next() Inst { return nop.next }
-func (nop *Nop) SetNext(i Inst) { nop.next = i }
-func (nop *Nop) Index() int { return nop.index }
-func (nop *Nop) SetIndex(i int) { nop.index = i }
-func (nop *Nop) Print() { print("nop") }
-
-// report error and exit compiling/executing goroutine
-func (re *RE) Error(err *os.Error) {
- re.error = err;
- re.ch <- re;
- sys.goexit();
-}
-
-func (re *RE) Add(i Inst) Inst {
- i.SetIndex(re.inst.Len());
- re.inst.Append(i);
- return i;
-}
-
-type Parser struct {
- re *RE;
- nlpar int; // number of unclosed lpars
- pos int;
- ch int;
-}
-
-const EOF = -1
-
-func (p *Parser) c() int {
- return p.ch;
-}
-
-func (p *Parser) nextc() int {
- if p.pos >= len(p.re.expr) {
- p.ch = EOF
- } else {
- c, w := sys.stringtorune(p.re.expr, p.pos);
- p.ch = c;
- p.pos += w;
- }
- return p.ch;
-}
-
-func NewParser(re *RE) *Parser {
- parser := new(Parser);
- parser.re = re;
- parser.nextc(); // load p.ch
- return parser;
-}
-
-/*
-
-Grammar:
- regexp:
- concatenation { '|' concatenation }
- concatenation:
- { closure }
- closure:
- term [ '*' | '+' | '?' ]
- term:
- '^'
- '$'
- '.'
- character
- '[' character-ranges ']'
- '(' regexp ')'
-
-*/
-
-func (p *Parser) Regexp() (start, end Inst)
-
-var NULL Inst
-type BUGinter interface{}
-
-func special(c int) bool {
- s := `\.+*?()|[]`;
- for i := 0; i < len(s); i++ {
- if c == int(s[i]) {
- return true
- }
- }
- return false
-}
-
-func specialcclass(c int) bool {
- s := `\-[]`;
- for i := 0; i < len(s); i++ {
- if c == int(s[i]) {
- return true
- }
- }
- return false
-}
-
-func (p *Parser) CharClass() Inst {
- cc := NewCharClass();
- p.re.Add(cc);
- if p.c() == '^' {
- cc.negate = true;
- p.nextc();
- }
- left := -1;
- for {
- switch c := p.c(); c {
- case ']', EOF:
- if left >= 0 {
- p.re.Error(ErrBadRange);
- }
- return cc;
- case '-': // do this before backslash processing
- p.re.Error(ErrBadRange);
- case '\\':
- c = p.nextc();
- switch {
- case c == EOF:
- p.re.Error(ErrExtraneousBackslash);
- case c == 'n':
- c = '\n';
- case specialcclass(c):
- // c is as delivered
- default:
- p.re.Error(ErrBadBackslash);
- }
- fallthrough;
- default:
- p.nextc();
- switch {
- case left < 0: // first of pair
- if p.c() == '-' { // range
- p.nextc();
- left = c;
- } else { // single char
- cc.AddRange(c, c);
- }
- case left <= c: // second of pair
- cc.AddRange(left, c);
- left = -1;
- default:
- p.re.Error(ErrBadRange);
- }
- }
- }
- return NULL
-}
-
-func (p *Parser) Term() (start, end Inst) {
- switch c := p.c(); c {
- case '|', EOF:
- return NULL, NULL;
- case '*', '+', '|':
- p.re.Error(ErrBareClosure);
- case ')':
- if p.nlpar == 0 {
- p.re.Error(ErrUnmatchedRpar);
- }
- return NULL, NULL;
- case ']':
- p.re.Error(ErrUnmatchedRbkt);
- case '^':
- p.nextc();
- start = p.re.Add(new(Bot));
- return start, start;
- case '$':
- p.nextc();
- start = p.re.Add(new(Eot));
- return start, start;
- case '.':
- p.nextc();
- start = p.re.Add(new(Any));
- return start, start;
- case '[':
- p.nextc();
- start = p.CharClass();
- if p.c() != ']' {
- p.re.Error(ErrUnmatchedLbkt);
- }
- p.nextc();
- return start, start;
- case '(':
- p.nextc();
- p.nlpar++;
- p.re.nbra++; // increment first so first subexpr is \1
- nbra := p.re.nbra;
- start, end = p.Regexp();
- if p.c() != ')' {
- p.re.Error(ErrUnmatchedLpar);
- }
- p.nlpar--;
- p.nextc();
- bra := new(Bra);
- p.re.Add(bra);
- ebra := new(Ebra);
- p.re.Add(ebra);
- bra.n = nbra;
- ebra.n = nbra;
- if start == NULL {
- if end == NULL { p.re.Error(ErrInternal) }
- start = ebra
- } else {
- end.SetNext(ebra);
- }
- bra.SetNext(start);
- return bra, ebra;
- case '\\':
- c = p.nextc();
- switch {
- case c == EOF:
- p.re.Error(ErrExtraneousBackslash);
- case c == 'n':
- c = '\n';
- case special(c):
- // c is as delivered
- default:
- p.re.Error(ErrBadBackslash);
- }
- fallthrough;
- default:
- p.nextc();
- start = NewChar(c);
- p.re.Add(start);
- return start, start
- }
- panic("unreachable");
-}
-
-func (p *Parser) Closure() (start, end Inst) {
- start, end = p.Term();
- if start == NULL {
- return
- }
- switch p.c() {
- case '*':
- // (start,end)*:
- alt := new(Alt);
- p.re.Add(alt);
- end.SetNext(alt); // after end, do alt
- alt.left = start; // alternate brach: return to start
- start = alt; // alt becomes new (start, end)
- end = alt;
- case '+':
- // (start,end)+:
- alt := new(Alt);
- p.re.Add(alt);
- end.SetNext(alt); // after end, do alt
- alt.left = start; // alternate brach: return to start
- end = alt; // start is unchanged; end is alt
- case '?':
- // (start,end)?:
- alt := new(Alt);
- p.re.Add(alt);
- nop := new(Nop);
- p.re.Add(nop);
- alt.left = start; // alternate branch is start
- alt.next = nop; // follow on to nop
- end.SetNext(nop); // after end, go to nop
- start = alt; // start is now alt
- end = nop; // end is nop pointed to by both branches
- default:
- return
- }
- switch p.nextc() {
- case '*', '+', '?':
- p.re.Error(ErrBadClosure);
- }
- return
-}
-
-func (p *Parser) Concatenation() (start, end Inst) {
- start, end = NULL, NULL;
- for {
- nstart, nend := p.Closure();
- switch {
- case nstart == NULL: // end of this concatenation
- if start == NULL { // this is the empty string
- nop := p.re.Add(new(Nop));
- return nop, nop;
- }
- return;
- case start == NULL: // this is first element of concatenation
- start, end = nstart, nend;
- default:
- end.SetNext(nstart);
- end = nend;
- }
- }
- panic("unreachable");
-}
-
-func (p *Parser) Regexp() (start, end Inst) {
- start, end = p.Concatenation();
- for {
- switch p.c() {
- default:
- return;
- case '|':
- p.nextc();
- nstart, nend := p.Concatenation();
- alt := new(Alt);
- p.re.Add(alt);
- alt.left = start;
- alt.next = nstart;
- nop := new(Nop);
- p.re.Add(nop);
- end.SetNext(nop);
- nend.SetNext(nop);
- start, end = alt, nop;
- }
- }
- panic("unreachable");
-}
-
-func UnNop(i Inst) Inst {
- for i.Type() == NOP {
- i = i.Next()
- }
- return i
-}
-
-func (re *RE) EliminateNops() {
- for i := 0; i < re.inst.Len(); i++ {
- inst := re.inst.At(i).(Inst);
- if inst.Type() == END {
- continue
- }
- inst.SetNext(UnNop(inst.Next()));
- if inst.Type() == ALT {
- alt := inst.(*Alt);
- alt.left = UnNop(alt.left);
- }
- }
-}
-
-func (re *RE) Dump() {
- for i := 0; i < re.inst.Len(); i++ {
- inst := re.inst.At(i).(Inst);
- print(inst.Index(), ": ");
- inst.Print();
- if inst.Type() != END {
- print(" -> ", inst.Next().Index())
- }
- print("\n");
- }
-}
-
-func (re *RE) DoParse() {
- parser := NewParser(re);
- start := new(Start);
- re.Add(start);
- s, e := parser.Regexp();
- start.next = s;
- re.start = start;
- e.SetNext(re.Add(new(End)));
-
- if debug {
- re.Dump();
- println();
- }
-
- re.EliminateNops();
-
- if debug {
- re.Dump();
- println();
- }
-}
-
-
-func Compiler(str string, ch *chan *RE) {
- re := new(RE);
- re.expr = str;
- re.inst = vector.New();
- re.ch = ch;
- re.DoParse();
- ch <- re;
-}
-
-// Public interface has only execute functionality (not yet implemented)
-export type Regexp interface {
- Execute(s string) *[]int
-}
-
-// Compile in separate goroutine; wait for result
-export func Compile(str string) (regexp Regexp, error *os.Error) {
- ch := new(chan *RE);
- go Compiler(str, ch);
- re := <-ch;
- return re, re.error
-}
-
-type State struct {
- inst Inst; // next instruction to execute
- match *[]int; // pairs of bracketing submatches. 0th is start,end
-}
-
-// Append new state to to-do list. Leftmost-longest wins so avoid
-// adding a state that's already active.
-func AddState(s *[]State, inst Inst, match *[]int) *[]State {
- index := inst.Index();
- l := len(s);
- pos := match[0];
- // TODO: Once the state is a vector and we can do insert, have inputs always
- // go in order correctly and this "earlier" test is never necessary,
- for i := 0; i < l; i++ {
- if s[i].inst.Index() == index && // same instruction
- s[i].match[0] < pos { // earlier match already going; lefmost wins
- return s
- }
- }
- if l == cap(s) {
- s1 := new([]State, 2*l)[0:l];
- for i := 0; i < l; i++ {
- s1[i] = s[i];
- }
- s = s1;
- }
- s = s[0:l+1];
- s[l].inst = inst;
- s[l].match = match;
- return s;
-}
-
-func (re *RE) DoExecute(str string, pos int) *[]int {
- var s [2]*[]State; // TODO: use a vector when State values (not ptrs) can be vector elements
- s[0] = new([]State, 10)[0:0];
- s[1] = new([]State, 10)[0:0];
- in, out := 0, 1;
- var final State;
- found := false;
- for pos <= len(str) {
- if !found {
- // prime the pump if we haven't seen a match yet
- match := new([]int, 2*(re.nbra+1));
- for i := 0; i < len(match); i++ {
- match[i] = -1; // no match seen; catches cases like "a(b)?c" on "ac"
- }
- match[0] = pos;
- s[out] = AddState(s[out], re.start.Next(), match);
- }
- in, out = out, in; // old out state is new in state
- s[out] = s[out][0:0]; // clear out state
- if len(s[in]) == 0 {
- // machine has completed
- break;
- }
- charwidth := 1;
- c := EOF;
- if pos < len(str) {
- c, charwidth = sys.stringtorune(str, pos);
- }
- for i := 0; i < len(s[in]); i++ {
- state := s[in][i];
- switch s[in][i].inst.Type() {
- case BOT:
- if pos == 0 {
- s[in] = AddState(s[in], state.inst.Next(), state.match)
- }
- case EOT:
- if pos == len(str) {
- s[in] = AddState(s[in], state.inst.Next(), state.match)
- }
- case CHAR:
- if c == state.inst.(*Char).char {
- s[out] = AddState(s[out], state.inst.Next(), state.match)
- }
- case CHARCLASS:
- if state.inst.(*CharClass).Matches(c) {
- s[out] = AddState(s[out], state.inst.Next(), state.match)
- }
- case ANY:
- if c != EOF {
- s[out] = AddState(s[out], state.inst.Next(), state.match)
- }
- case BRA:
- n := state.inst.(*Bra).n;
- state.match[2*n] = pos;
- s[in] = AddState(s[in], state.inst.Next(), state.match);
- case EBRA:
- n := state.inst.(*Ebra).n;
- state.match[2*n+1] = pos;
- s[in] = AddState(s[in], state.inst.Next(), state.match);
- case ALT:
- s[in] = AddState(s[in], state.inst.(*Alt).left, state.match);
- // give other branch a copy of this match vector
- s1 := new([]int, 2*(re.nbra+1));
- for i := 0; i < len(s1); i++ {
- s1[i] = state.match[i]
- }
- s[in] = AddState(s[in], state.inst.Next(), s1);
- case END:
- // choose leftmost longest
- if !found || // first
- state.match[0] < final.match[0] || // leftmost
- (state.match[0] == final.match[0] && pos > final.match[1]) { // longest
- final = state;
- final.match[1] = pos;
- }
- found = true;
- default:
- state.inst.Print();
- panic("unknown instruction in execute");
- }
- }
- pos += charwidth;
- }
- if !found {
- return nil
- }
- return final.match;
-}
-
-
-func (re *RE) Execute(s string) *[]int {
- return re.DoExecute(s, 0)
-}