diff options
author | Kyle Consalus <consalus@gmail.com> | 2010-06-02 23:04:44 -0700 |
---|---|---|
committer | Kyle Consalus <consalus@gmail.com> | 2010-06-02 23:04:44 -0700 |
commit | 782ef98ee931b10e8454928455f93cf5859ac361 (patch) | |
tree | f60b39fd63f570b1ad794fdcea8d6ac315820c64 /src/pkg/regexp/regexp.go | |
parent | 164ca008324c94078c26d32252bf314b5c933936 (diff) | |
download | golang-782ef98ee931b10e8454928455f93cf5859ac361.tar.gz |
Optimization to regexp _CharClass: keep track of overall range of
charclass to avoid unnecessarily iterating over ranges.
Also, use the fact that IntVector is an []int to avoid method calls.
On my machine, this brings us from ~27500 ns/op to ~17500 ns/op in the benchmark I've added (it is also faster in the case where a range check
doesn't help, added a benchmark for this too.)
I'd also like to propose that "[]", and "[^]" be disallowed. They aren't useful as far as I can tell, they aren't widely supported, and they make reasoning about character classes a bit more complicated.
R=r
CC=golang-dev
http://codereview.appspot.com/1495041
Committer: Rob Pike <r@golang.org>
Diffstat (limited to 'src/pkg/regexp/regexp.go')
-rw-r--r-- | src/pkg/regexp/regexp.go | 22 |
1 files changed, 16 insertions, 6 deletions
diff --git a/src/pkg/regexp/regexp.go b/src/pkg/regexp/regexp.go index cdd5cacdd..edf91531d 100644 --- a/src/pkg/regexp/regexp.go +++ b/src/pkg/regexp/regexp.go @@ -152,10 +152,10 @@ func newChar(char int) *_Char { type _CharClass struct { common - char int negate bool // is character class negated? ([^a-z]) // vector of int, stored pairwise: [a-z] is (a,z); x is (x,x): - ranges *vector.IntVector + ranges *vector.IntVector + cmin, cmax int } func (cclass *_CharClass) kind() int { return _CHARCLASS } @@ -180,13 +180,21 @@ func (cclass *_CharClass) addRange(a, b int) { // range is a through b inclusive cclass.ranges.Push(a) cclass.ranges.Push(b) + if a < cclass.cmin { + cclass.cmin = a + } + if b > cclass.cmax { + cclass.cmax = b + } } func (cclass *_CharClass) matches(c int) bool { - for i := 0; i < cclass.ranges.Len(); i = i + 2 { - min := cclass.ranges.At(i) - max := cclass.ranges.At(i + 1) - if min <= c && c <= max { + if c < cclass.cmin || c > cclass.cmax { + return cclass.negate + } + ranges := []int(*cclass.ranges) + for i := 0; i < len(ranges); i = i + 2 { + if ranges[i] <= c && c <= ranges[i+1] { return !cclass.negate } } @@ -196,6 +204,8 @@ func (cclass *_CharClass) matches(c int) bool { func newCharClass() *_CharClass { c := new(_CharClass) c.ranges = new(vector.IntVector) + c.cmin = 0x10FFFF + 1 // MaxRune + 1 + c.cmax = -1 return c } |