summaryrefslogtreecommitdiff
path: root/src/pkg/unicode/letter_test.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/unicode/letter_test.go')
-rw-r--r--src/pkg/unicode/letter_test.go118
1 files changed, 118 insertions, 0 deletions
diff --git a/src/pkg/unicode/letter_test.go b/src/pkg/unicode/letter_test.go
index 2d8056218..e4d5572a0 100644
--- a/src/pkg/unicode/letter_test.go
+++ b/src/pkg/unicode/letter_test.go
@@ -5,6 +5,10 @@
package unicode_test
import (
+ "flag"
+ "fmt"
+ "runtime"
+ "sort"
"testing"
. "unicode"
)
@@ -427,3 +431,117 @@ func TestSimpleFold(t *testing.T) {
}
}
}
+
+// Running 'go test -calibrate' runs the calibration to find a plausible
+// cutoff point for linear search of a range list vs. binary search.
+// We create a fake table and then time how long it takes to do a
+// sequence of searches within that table, for all possible inputs
+// relative to the ranges (something before all, in each, between each, after all).
+// This assumes that all possible runes are equally likely.
+// In practice most runes are ASCII so this is a conservative estimate
+// of an effective cutoff value. In practice we could probably set it higher
+// than what this function recommends.
+
+var calibrate = flag.Bool("calibrate", false, "compute crossover for linear vs. binary search")
+
+func TestCalibrate(t *testing.T) {
+ if !*calibrate {
+ return
+ }
+
+ if runtime.GOARCH == "amd64" {
+ fmt.Printf("warning: running calibration on %s\n", runtime.GOARCH)
+ }
+
+ // Find the point where binary search wins by more than 10%.
+ // The 10% bias gives linear search an edge when they're close,
+ // because on predominantly ASCII inputs linear search is even
+ // better than our benchmarks measure.
+ n := sort.Search(64, func(n int) bool {
+ tab := fakeTable(n)
+ blinear := func(b *testing.B) {
+ tab := tab
+ max := n*5 + 20
+ for i := 0; i < b.N; i++ {
+ for j := 0; j <= max; j++ {
+ linear(tab, uint16(j))
+ }
+ }
+ }
+ bbinary := func(b *testing.B) {
+ tab := tab
+ max := n*5 + 20
+ for i := 0; i < b.N; i++ {
+ for j := 0; j <= max; j++ {
+ binary(tab, uint16(j))
+ }
+ }
+ }
+ bmlinear := testing.Benchmark(blinear)
+ bmbinary := testing.Benchmark(bbinary)
+ fmt.Printf("n=%d: linear=%d binary=%d\n", n, bmlinear.NsPerOp(), bmbinary.NsPerOp())
+ return bmlinear.NsPerOp()*100 > bmbinary.NsPerOp()*110
+ })
+ fmt.Printf("calibration: linear cutoff = %d\n", n)
+}
+
+func fakeTable(n int) []Range16 {
+ var r16 []Range16
+ for i := 0; i < n; i++ {
+ r16 = append(r16, Range16{uint16(i*5 + 10), uint16(i*5 + 12), 1})
+ }
+ return r16
+}
+
+func linear(ranges []Range16, r uint16) bool {
+ for i := range ranges {
+ range_ := &ranges[i]
+ if r < range_.Lo {
+ return false
+ }
+ if r <= range_.Hi {
+ return (r-range_.Lo)%range_.Stride == 0
+ }
+ }
+ return false
+}
+
+func binary(ranges []Range16, r uint16) bool {
+ // binary search over ranges
+ lo := 0
+ hi := len(ranges)
+ for lo < hi {
+ m := lo + (hi-lo)/2
+ range_ := &ranges[m]
+ if range_.Lo <= r && r <= range_.Hi {
+ return (r-range_.Lo)%range_.Stride == 0
+ }
+ if r < range_.Lo {
+ hi = m
+ } else {
+ lo = m + 1
+ }
+ }
+ return false
+}
+
+func TestLatinOffset(t *testing.T) {
+ var maps = []map[string]*RangeTable{
+ Categories,
+ FoldCategory,
+ FoldScript,
+ Properties,
+ Scripts,
+ }
+ for _, m := range maps {
+ for name, tab := range m {
+ i := 0
+ for i < len(tab.R16) && tab.R16[i].Hi <= MaxLatin1 {
+ i++
+ }
+ if tab.LatinOffset != i {
+ t.Errorf("%s: LatinOffset=%d, want %d", name, tab.LatinOffset, i)
+ }
+ }
+ }
+}