summaryrefslogtreecommitdiff
path: root/usr/r/regexp/regexp.go
diff options
context:
space:
mode:
authorRob Pike <r@golang.org>2008-10-14 17:45:49 -0700
committerRob Pike <r@golang.org>2008-10-14 17:45:49 -0700
commita6063ecf5d8425df5338705b533c495591df02ef (patch)
tree0452d030340bd20aaec1dbe743dac5105c4a5164 /usr/r/regexp/regexp.go
parent3f9f0d637b367c08ea008d68dfd454ea0738263b (diff)
downloadgolang-a6063ecf5d8425df5338705b533c495591df02ef.tar.gz
add some tests
fix some bugs in () ordering and rune processing R=rsc DELTA=72 (27 added, 5 deleted, 40 changed) OCL=17147 CL=17147
Diffstat (limited to 'usr/r/regexp/regexp.go')
-rw-r--r--usr/r/regexp/regexp.go29
1 files changed, 15 insertions, 14 deletions
diff --git a/usr/r/regexp/regexp.go b/usr/r/regexp/regexp.go
index 0a6fd3113..6535e6ef4 100644
--- a/usr/r/regexp/regexp.go
+++ b/usr/r/regexp/regexp.go
@@ -287,7 +287,6 @@ func (p *Parser) nextc() int {
if p.pos >= len(p.re.expr) {
p.ch = EOF
} else {
- // TODO: stringotorune should take a string*
c, w := sys.stringtorune(p.re.expr, p.pos);
p.ch = c;
p.pos += w;
@@ -433,6 +432,8 @@ func (p *Parser) Term() (start, end Inst) {
case '(':
p.nextc();
p.nlpar++;
+ p.re.nbra++; // increment first so first subexpr is \1
+ nbra := p.re.nbra;
start, end = p.Regexp();
if p.c() != ')' {
p.re.Error(ErrUnmatchedLpar);
@@ -443,9 +444,8 @@ func (p *Parser) Term() (start, end Inst) {
p.re.Add(bra);
ebra := new(Ebra);
p.re.Add(ebra);
- p.re.nbra++; // increment first so first subexpr is \1
- bra.n = p.re.nbra;
- ebra.n = p.re.nbra;
+ bra.n = nbra;
+ ebra.n = nbra;
if start == NULL {
if end == NULL { p.re.Error(ErrInternal) }
start = ebra
@@ -479,7 +479,7 @@ func (p *Parser) Term() (start, end Inst) {
func (p *Parser) Closure() (start, end Inst) {
start, end = p.Term();
if start == NULL {
- return start, end
+ return
}
switch p.c() {
case '*':
@@ -509,13 +509,13 @@ func (p *Parser) Closure() (start, end Inst) {
start = alt; // start is now alt
end = nop; // end is nop pointed to by both branches
default:
- return start, end;
+ return
}
switch p.nextc() {
case '*', '+', '?':
p.re.Error(ErrBadClosure);
}
- return start, end;
+ return
}
func (p *Parser) Concatenation() (start, end Inst) {
@@ -528,7 +528,7 @@ func (p *Parser) Concatenation() (start, end Inst) {
nop := p.re.Add(new(Nop));
return nop, nop;
}
- return start, end;
+ return;
case start == NULL: // this is first element of concatenation
start, end = nstart, nend;
default:
@@ -544,7 +544,7 @@ func (p *Parser) Regexp() (start, end Inst) {
for {
switch p.c() {
default:
- return start, end;
+ return;
case '|':
p.nextc();
nstart, nend := p.Concatenation();
@@ -683,6 +683,9 @@ func (re *RE) DoExecute(str string, pos int) *[]int {
if !found {
// prime the pump if we haven't seen a match yet
match := new([]int, 2*(re.nbra+1));
+ for i := 0; i < len(match); i++ {
+ match[i] = -1; // no match seen; catches cases like "a(b)?c" on "ac"
+ }
match[0] = pos;
s[out] = AddState(s[out], re.start.Next(), match);
}
@@ -692,14 +695,13 @@ func (re *RE) DoExecute(str string, pos int) *[]int {
// machine has completed
break;
}
+ charwidth := 1;
c := EOF;
if pos < len(str) {
- c = int(str[pos])
+ c, charwidth = sys.stringtorune(str, pos);
}
-//println("position ", pos, "char", string(c), "in", in, "out", out, "len in", len(s[in]));
for i := 0; i < len(s[in]); i++ {
state := s[in][i];
-//state.inst.Print(); print("\n");
switch s[in][i].inst.Type() {
case BOT:
if pos == 0 {
@@ -751,12 +753,11 @@ func (re *RE) DoExecute(str string, pos int) *[]int {
panic("unknown instruction in execute");
}
}
- pos++;
+ pos += charwidth;
}
if !found {
return nil
}
-//if found { println("found: from ", final.match[0], "to", final.match[1] )}
return final.match;
}