summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Pike <r@golang.org>2010-06-22 16:02:14 -0700
committerRob Pike <r@golang.org>2010-06-22 16:02:14 -0700
commit6514206eca362d767c87f8f8e4045b7ce40e714b (patch)
treeb39fa10c3bb8146f135b78be0aa3131ce5fa07f6
parent9d205d3b9f5cda77ff1dfd0e5dc9320923cd0470 (diff)
downloadgolang-6514206eca362d767c87f8f8e4045b7ce40e714b.tar.gz
regexp: bug fix: need to track whether match begins with fixed prefix.
Fixes issue 872. R=rsc CC=golang-dev http://codereview.appspot.com/1731043
-rw-r--r--src/pkg/regexp/all_test.go3
-rw-r--r--src/pkg/regexp/regexp.go34
2 files changed, 20 insertions, 17 deletions
diff --git a/src/pkg/regexp/all_test.go b/src/pkg/regexp/all_test.go
index fd7ee2acb..9936d4f45 100644
--- a/src/pkg/regexp/all_test.go
+++ b/src/pkg/regexp/all_test.go
@@ -100,7 +100,8 @@ var matches = []tester{
// fixed bugs
tester{`ab$`, "cab", vec{1, 3}},
- tester{`axxb$`, "axxcb", vec{}},
+ tester{`data`, "daXY data", vec{5, 9}},
+ tester{`da(.)a$`, "daXY data", vec{5, 9, 7, 8}},
// can backslash-escape any punctuation
tester{`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
diff --git a/src/pkg/regexp/regexp.go b/src/pkg/regexp/regexp.go
index edf91531d..4dd430ea6 100644
--- a/src/pkg/regexp/regexp.go
+++ b/src/pkg/regexp/regexp.go
@@ -727,34 +727,35 @@ func (a *matchArena) noMatch() *matchVec {
}
type state struct {
- inst instr // next instruction to execute
- match *matchVec
+ inst instr // next instruction to execute
+ prefixed bool // this match began with a fixed prefix
+ match *matchVec
}
// Append new state to to-do list. Leftmost-longest wins so avoid
// adding a state that's already active. The matchVec will be inc-ref'ed
// if it is assigned to a state.
-func (a *matchArena) addState(s []state, inst instr, match *matchVec, pos, end int) []state {
+func (a *matchArena) addState(s []state, inst instr, prefixed bool, match *matchVec, pos, end int) []state {
switch inst.kind() {
case _BOT:
if pos == 0 {
- s = a.addState(s, inst.next(), match, pos, end)
+ s = a.addState(s, inst.next(), prefixed, match, pos, end)
}
return s
case _EOT:
if pos == end {
- s = a.addState(s, inst.next(), match, pos, end)
+ s = a.addState(s, inst.next(), prefixed, match, pos, end)
}
return s
case _BRA:
n := inst.(*_Bra).n
match.m[2*n] = pos
- s = a.addState(s, inst.next(), match, pos, end)
+ s = a.addState(s, inst.next(), prefixed, match, pos, end)
return s
case _EBRA:
n := inst.(*_Ebra).n
match.m[2*n+1] = pos
- s = a.addState(s, inst.next(), match, pos, end)
+ s = a.addState(s, inst.next(), prefixed, match, pos, end)
return s
}
index := inst.index()
@@ -773,12 +774,13 @@ func (a *matchArena) addState(s []state, inst instr, match *matchVec, pos, end i
}
s = s[0 : l+1]
s[l].inst = inst
+ s[l].prefixed = prefixed
s[l].match = match
match.ref++
if inst.kind() == _ALT {
- s = a.addState(s, inst.(*_Alt).left, a.copy(match), pos, end)
+ s = a.addState(s, inst.(*_Alt).left, prefixed, a.copy(match), pos, end)
// give other branch a copy of this match vector
- s = a.addState(s, inst.next(), a.copy(match), pos, end)
+ s = a.addState(s, inst.next(), prefixed, a.copy(match), pos, end)
}
return s
}
@@ -818,10 +820,10 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
match := arena.noMatch()
match.m[0] = pos
if prefixed {
- s[out] = arena.addState(s[out], re.prefixStart, match, pos, end)
+ s[out] = arena.addState(s[out], re.prefixStart, true, match, pos, end)
prefixed = false // next iteration should start at beginning of machine.
} else {
- s[out] = arena.addState(s[out], re.start.next(), match, pos, end)
+ s[out] = arena.addState(s[out], re.start.next(), false, match, pos, end)
}
arena.free(match) // if addState saved it, ref was incremented
}
@@ -852,19 +854,19 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
case _EOT:
case _CHAR:
if c == st.inst.(*_Char).char {
- s[out] = arena.addState(s[out], st.inst.next(), st.match, pos, end)
+ s[out] = arena.addState(s[out], st.inst.next(), st.prefixed, st.match, pos, end)
}
case _CHARCLASS:
if st.inst.(*_CharClass).matches(c) {
- s[out] = arena.addState(s[out], st.inst.next(), st.match, pos, end)
+ s[out] = arena.addState(s[out], st.inst.next(), st.prefixed, st.match, pos, end)
}
case _ANY:
if c != endOfFile {
- s[out] = arena.addState(s[out], st.inst.next(), st.match, pos, end)
+ s[out] = arena.addState(s[out], st.inst.next(), st.prefixed, st.match, pos, end)
}
case _NOTNL:
if c != endOfFile && c != '\n' {
- s[out] = arena.addState(s[out], st.inst.next(), st.match, pos, end)
+ s[out] = arena.addState(s[out], st.inst.next(), st.prefixed, st.match, pos, end)
}
case _BRA:
case _EBRA:
@@ -892,7 +894,7 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
return nil
}
// if match found, back up start of match by width of prefix.
- if re.prefix != "" && len(final.match.m) > 0 {
+ if final.prefixed && len(final.match.m) > 0 {
final.match.m[0] -= len(re.prefix)
}
return final.match.m