summaryrefslogtreecommitdiff
path: root/src/pkg/regexp/regexp.go
diff options
context:
space:
mode:
authorKyle Consalus <consalus@gmail.com>2010-06-02 23:04:44 -0700
committerKyle Consalus <consalus@gmail.com>2010-06-02 23:04:44 -0700
commit782ef98ee931b10e8454928455f93cf5859ac361 (patch)
treef60b39fd63f570b1ad794fdcea8d6ac315820c64 /src/pkg/regexp/regexp.go
parent164ca008324c94078c26d32252bf314b5c933936 (diff)
downloadgolang-782ef98ee931b10e8454928455f93cf5859ac361.tar.gz
Optimization to regexp _CharClass: keep track of overall range of
charclass to avoid unnecessarily iterating over ranges. Also, use the fact that IntVector is an []int to avoid method calls. On my machine, this brings us from ~27500 ns/op to ~17500 ns/op in the benchmark I've added (it is also faster in the case where a range check doesn't help, added a benchmark for this too.) I'd also like to propose that "[]", and "[^]" be disallowed. They aren't useful as far as I can tell, they aren't widely supported, and they make reasoning about character classes a bit more complicated. R=r CC=golang-dev http://codereview.appspot.com/1495041 Committer: Rob Pike <r@golang.org>
Diffstat (limited to 'src/pkg/regexp/regexp.go')
-rw-r--r--src/pkg/regexp/regexp.go22
1 files changed, 16 insertions, 6 deletions
diff --git a/src/pkg/regexp/regexp.go b/src/pkg/regexp/regexp.go
index cdd5cacdd..edf91531d 100644
--- a/src/pkg/regexp/regexp.go
+++ b/src/pkg/regexp/regexp.go
@@ -152,10 +152,10 @@ func newChar(char int) *_Char {
type _CharClass struct {
common
- char int
negate bool // is character class negated? ([^a-z])
// vector of int, stored pairwise: [a-z] is (a,z); x is (x,x):
- ranges *vector.IntVector
+ ranges *vector.IntVector
+ cmin, cmax int
}
func (cclass *_CharClass) kind() int { return _CHARCLASS }
@@ -180,13 +180,21 @@ func (cclass *_CharClass) addRange(a, b int) {
// range is a through b inclusive
cclass.ranges.Push(a)
cclass.ranges.Push(b)
+ if a < cclass.cmin {
+ cclass.cmin = a
+ }
+ if b > cclass.cmax {
+ cclass.cmax = b
+ }
}
func (cclass *_CharClass) matches(c int) bool {
- for i := 0; i < cclass.ranges.Len(); i = i + 2 {
- min := cclass.ranges.At(i)
- max := cclass.ranges.At(i + 1)
- if min <= c && c <= max {
+ if c < cclass.cmin || c > cclass.cmax {
+ return cclass.negate
+ }
+ ranges := []int(*cclass.ranges)
+ for i := 0; i < len(ranges); i = i + 2 {
+ if ranges[i] <= c && c <= ranges[i+1] {
return !cclass.negate
}
}
@@ -196,6 +204,8 @@ func (cclass *_CharClass) matches(c int) bool {
func newCharClass() *_CharClass {
c := new(_CharClass)
c.ranges = new(vector.IntVector)
+ c.cmin = 0x10FFFF + 1 // MaxRune + 1
+ c.cmax = -1
return c
}