diff options
Diffstat (limited to 'src/pkg/unicode/letter_test.go')
-rw-r--r-- | src/pkg/unicode/letter_test.go | 118 |
1 files changed, 118 insertions, 0 deletions
diff --git a/src/pkg/unicode/letter_test.go b/src/pkg/unicode/letter_test.go index 2d8056218..e4d5572a0 100644 --- a/src/pkg/unicode/letter_test.go +++ b/src/pkg/unicode/letter_test.go @@ -5,6 +5,10 @@ package unicode_test import ( + "flag" + "fmt" + "runtime" + "sort" "testing" . "unicode" ) @@ -427,3 +431,117 @@ func TestSimpleFold(t *testing.T) { } } } + +// Running 'go test -calibrate' runs the calibration to find a plausible +// cutoff point for linear search of a range list vs. binary search. +// We create a fake table and then time how long it takes to do a +// sequence of searches within that table, for all possible inputs +// relative to the ranges (something before all, in each, between each, after all). +// This assumes that all possible runes are equally likely. +// In practice most runes are ASCII so this is a conservative estimate +// of an effective cutoff value. In practice we could probably set it higher +// than what this function recommends. + +var calibrate = flag.Bool("calibrate", false, "compute crossover for linear vs. binary search") + +func TestCalibrate(t *testing.T) { + if !*calibrate { + return + } + + if runtime.GOARCH == "amd64" { + fmt.Printf("warning: running calibration on %s\n", runtime.GOARCH) + } + + // Find the point where binary search wins by more than 10%. + // The 10% bias gives linear search an edge when they're close, + // because on predominantly ASCII inputs linear search is even + // better than our benchmarks measure. + n := sort.Search(64, func(n int) bool { + tab := fakeTable(n) + blinear := func(b *testing.B) { + tab := tab + max := n*5 + 20 + for i := 0; i < b.N; i++ { + for j := 0; j <= max; j++ { + linear(tab, uint16(j)) + } + } + } + bbinary := func(b *testing.B) { + tab := tab + max := n*5 + 20 + for i := 0; i < b.N; i++ { + for j := 0; j <= max; j++ { + binary(tab, uint16(j)) + } + } + } + bmlinear := testing.Benchmark(blinear) + bmbinary := testing.Benchmark(bbinary) + fmt.Printf("n=%d: linear=%d binary=%d\n", n, bmlinear.NsPerOp(), bmbinary.NsPerOp()) + return bmlinear.NsPerOp()*100 > bmbinary.NsPerOp()*110 + }) + fmt.Printf("calibration: linear cutoff = %d\n", n) +} + +func fakeTable(n int) []Range16 { + var r16 []Range16 + for i := 0; i < n; i++ { + r16 = append(r16, Range16{uint16(i*5 + 10), uint16(i*5 + 12), 1}) + } + return r16 +} + +func linear(ranges []Range16, r uint16) bool { + for i := range ranges { + range_ := &ranges[i] + if r < range_.Lo { + return false + } + if r <= range_.Hi { + return (r-range_.Lo)%range_.Stride == 0 + } + } + return false +} + +func binary(ranges []Range16, r uint16) bool { + // binary search over ranges + lo := 0 + hi := len(ranges) + for lo < hi { + m := lo + (hi-lo)/2 + range_ := &ranges[m] + if range_.Lo <= r && r <= range_.Hi { + return (r-range_.Lo)%range_.Stride == 0 + } + if r < range_.Lo { + hi = m + } else { + lo = m + 1 + } + } + return false +} + +func TestLatinOffset(t *testing.T) { + var maps = []map[string]*RangeTable{ + Categories, + FoldCategory, + FoldScript, + Properties, + Scripts, + } + for _, m := range maps { + for name, tab := range m { + i := 0 + for i < len(tab.R16) && tab.R16[i].Hi <= MaxLatin1 { + i++ + } + if tab.LatinOffset != i { + t.Errorf("%s: LatinOffset=%d, want %d", name, tab.LatinOffset, i) + } + } + } +} |