diff options
| author | Kyle Consalus <consalus@gmail.com> | 2010-06-02 23:04:44 -0700 |
|---|---|---|
| committer | Kyle Consalus <consalus@gmail.com> | 2010-06-02 23:04:44 -0700 |
| commit | 782ef98ee931b10e8454928455f93cf5859ac361 (patch) | |
| tree | f60b39fd63f570b1ad794fdcea8d6ac315820c64 /src/pkg/regexp/all_test.go | |
| parent | 164ca008324c94078c26d32252bf314b5c933936 (diff) | |
| download | golang-782ef98ee931b10e8454928455f93cf5859ac361.tar.gz | |
Optimization to regexp _CharClass: keep track of overall range of
charclass to avoid unnecessarily iterating over ranges.
Also, use the fact that IntVector is an []int to avoid method calls.
On my machine, this brings us from ~27500 ns/op to ~17500 ns/op in the benchmark I've added (it is also faster in the case where a range check
doesn't help, added a benchmark for this too.)
I'd also like to propose that "[]", and "[^]" be disallowed. They aren't useful as far as I can tell, they aren't widely supported, and they make reasoning about character classes a bit more complicated.
R=r
CC=golang-dev
http://codereview.appspot.com/1495041
Committer: Rob Pike <r@golang.org>
Diffstat (limited to 'src/pkg/regexp/all_test.go')
| -rw-r--r-- | src/pkg/regexp/all_test.go | 36 |
1 files changed, 32 insertions, 4 deletions
diff --git a/src/pkg/regexp/all_test.go b/src/pkg/regexp/all_test.go index 62dad3aa0..fd7ee2acb 100644 --- a/src/pkg/regexp/all_test.go +++ b/src/pkg/regexp/all_test.go @@ -519,7 +519,7 @@ var numSubexpCases = []numSubexpCase{ func TestNumSubexp(t *testing.T) { for _, c := range numSubexpCases { - re, _ := Compile(c.input) + re := MustCompile(c.input) n := re.NumSubexp() if n != c.expected { t.Errorf("NumSubexp for %q returned %d, expected %d", c.input, n, c.expected) @@ -530,7 +530,7 @@ func TestNumSubexp(t *testing.T) { func BenchmarkLiteral(b *testing.B) { x := strings.Repeat("x", 50) b.StopTimer() - re, _ := Compile(x) + re := MustCompile(x) b.StartTimer() for i := 0; i < b.N; i++ { if !re.MatchString(x) { @@ -543,7 +543,35 @@ func BenchmarkLiteral(b *testing.B) { func BenchmarkNotLiteral(b *testing.B) { x := strings.Repeat("x", 49) b.StopTimer() - re, _ := Compile("^" + x) + re := MustCompile("^" + x) + b.StartTimer() + for i := 0; i < b.N; i++ { + if !re.MatchString(x) { + println("no match!") + break + } + } +} + +func BenchmarkMatchClass(b *testing.B) { + b.StopTimer() + x := strings.Repeat("xxxx", 20) + "w" + re := MustCompile("[abcdw]") + b.StartTimer() + for i := 0; i < b.N; i++ { + if !re.MatchString(x) { + println("no match!") + break + } + } +} + +func BenchmarkMatchClass_InRange(b *testing.B) { + b.StopTimer() + // 'b' is betwen 'a' and 'c', so the charclass + // range checking is no help here. + x := strings.Repeat("bbbb", 20) + "c" + re := MustCompile("[ac]") b.StartTimer() for i := 0; i < b.N; i++ { if !re.MatchString(x) { @@ -556,7 +584,7 @@ func BenchmarkNotLiteral(b *testing.B) { func BenchmarkReplaceAll(b *testing.B) { x := "abcdefghijklmnopqrstuvwxyz" b.StopTimer() - re, _ := Compile("[cjrw]") + re := MustCompile("[cjrw]") b.StartTimer() for i := 0; i < b.N; i++ { re.ReplaceAllString(x, "") |
