summaryrefslogtreecommitdiff
path: root/src/pkg/exp/norm/trie.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/exp/norm/trie.go')
-rw-r--r--src/pkg/exp/norm/trie.go232
1 files changed, 232 insertions, 0 deletions
diff --git a/src/pkg/exp/norm/trie.go b/src/pkg/exp/norm/trie.go
new file mode 100644
index 000000000..82267a8d3
--- /dev/null
+++ b/src/pkg/exp/norm/trie.go
@@ -0,0 +1,232 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package norm
+
+type valueRange struct {
+ value uint16 // header: value:stride
+ lo, hi byte // header: lo:n
+}
+
+type trie struct {
+ index []uint8
+ values []uint16
+ sparse []valueRange
+ sparseOffset []uint16
+ cutoff uint8 // indices >= cutoff are sparse
+}
+
+// lookupValue determines the type of block n and looks up the value for b.
+// For n < t.cutoff, the block is a simple lookup table. Otherwise, the block
+// is a list of ranges with an accompanying value. Given a matching range r,
+// the value for b is by r.value + (b - r.lo) * stride.
+func (t *trie) lookupValue(n uint8, b byte) uint16 {
+ if n < t.cutoff {
+ return t.values[uint16(n)<<6+uint16(b)]
+ }
+ offset := t.sparseOffset[n-t.cutoff]
+ header := t.sparse[offset]
+ lo := offset + 1
+ hi := lo + uint16(header.lo)
+ for lo < hi {
+ m := lo + (hi-lo)/2
+ r := t.sparse[m]
+ if r.lo <= b && b <= r.hi {
+ return r.value + uint16(b-r.lo)*header.value
+ }
+ if b < r.lo {
+ hi = m
+ } else {
+ lo = m + 1
+ }
+ }
+ return 0
+}
+
+const (
+ t1 = 0x00 // 0000 0000
+ tx = 0x80 // 1000 0000
+ t2 = 0xC0 // 1100 0000
+ t3 = 0xE0 // 1110 0000
+ t4 = 0xF0 // 1111 0000
+ t5 = 0xF8 // 1111 1000
+ t6 = 0xFC // 1111 1100
+ te = 0xFE // 1111 1110
+)
+
+// lookup returns the trie value for the first UTF-8 encoding in s and
+// the width in bytes of this encoding. The size will be 0 if s does not
+// hold enough bytes to complete the encoding. len(s) must be greater than 0.
+func (t *trie) lookup(s []byte) (v uint16, sz int) {
+ c0 := s[0]
+ switch {
+ case c0 < tx:
+ return t.values[c0], 1
+ case c0 < t2:
+ return 0, 1
+ case c0 < t3:
+ if len(s) < 2 {
+ return 0, 0
+ }
+ i := t.index[c0]
+ c1 := s[1]
+ if c1 < tx || t2 <= c1 {
+ return 0, 1
+ }
+ return t.lookupValue(i, c1), 2
+ case c0 < t4:
+ if len(s) < 3 {
+ return 0, 0
+ }
+ i := t.index[c0]
+ c1 := s[1]
+ if c1 < tx || t2 <= c1 {
+ return 0, 1
+ }
+ o := uint16(i)<<6 + uint16(c1)
+ i = t.index[o]
+ c2 := s[2]
+ if c2 < tx || t2 <= c2 {
+ return 0, 2
+ }
+ return t.lookupValue(i, c2), 3
+ case c0 < t5:
+ if len(s) < 4 {
+ return 0, 0
+ }
+ i := t.index[c0]
+ c1 := s[1]
+ if c1 < tx || t2 <= c1 {
+ return 0, 1
+ }
+ o := uint16(i)<<6 + uint16(c1)
+ i = t.index[o]
+ c2 := s[2]
+ if c2 < tx || t2 <= c2 {
+ return 0, 2
+ }
+ o = uint16(i)<<6 + uint16(c2)
+ i = t.index[o]
+ c3 := s[3]
+ if c3 < tx || t2 <= c3 {
+ return 0, 3
+ }
+ return t.lookupValue(i, c3), 4
+ }
+ // Illegal rune
+ return 0, 1
+}
+
+// lookupString returns the trie value for the first UTF-8 encoding in s and
+// the width in bytes of this encoding. The size will be 0 if s does not
+// hold enough bytes to complete the encoding. len(s) must be greater than 0.
+func (t *trie) lookupString(s string) (v uint16, sz int) {
+ c0 := s[0]
+ switch {
+ case c0 < tx:
+ return t.values[c0], 1
+ case c0 < t2:
+ return 0, 1
+ case c0 < t3:
+ if len(s) < 2 {
+ return 0, 0
+ }
+ i := t.index[c0]
+ c1 := s[1]
+ if c1 < tx || t2 <= c1 {
+ return 0, 1
+ }
+ return t.lookupValue(i, c1), 2
+ case c0 < t4:
+ if len(s) < 3 {
+ return 0, 0
+ }
+ i := t.index[c0]
+ c1 := s[1]
+ if c1 < tx || t2 <= c1 {
+ return 0, 1
+ }
+ o := uint16(i)<<6 + uint16(c1)
+ i = t.index[o]
+ c2 := s[2]
+ if c2 < tx || t2 <= c2 {
+ return 0, 2
+ }
+ return t.lookupValue(i, c2), 3
+ case c0 < t5:
+ if len(s) < 4 {
+ return 0, 0
+ }
+ i := t.index[c0]
+ c1 := s[1]
+ if c1 < tx || t2 <= c1 {
+ return 0, 1
+ }
+ o := uint16(i)<<6 + uint16(c1)
+ i = t.index[o]
+ c2 := s[2]
+ if c2 < tx || t2 <= c2 {
+ return 0, 2
+ }
+ o = uint16(i)<<6 + uint16(c2)
+ i = t.index[o]
+ c3 := s[3]
+ if c3 < tx || t2 <= c3 {
+ return 0, 3
+ }
+ return t.lookupValue(i, c3), 4
+ }
+ // Illegal rune
+ return 0, 1
+}
+
+// lookupUnsafe returns the trie value for the first UTF-8 encoding in s.
+// s must hold a full encoding.
+func (t *trie) lookupUnsafe(s []byte) uint16 {
+ c0 := s[0]
+ if c0 < tx {
+ return t.values[c0]
+ }
+ if c0 < t2 {
+ return 0
+ }
+ i := t.index[c0]
+ if c0 < t3 {
+ return t.lookupValue(i, s[1])
+ }
+ i = t.index[uint16(i)<<6+uint16(s[1])]
+ if c0 < t4 {
+ return t.lookupValue(i, s[2])
+ }
+ i = t.index[uint16(i)<<6+uint16(s[2])]
+ if c0 < t5 {
+ return t.lookupValue(i, s[3])
+ }
+ return 0
+}
+
+// lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s.
+// s must hold a full encoding.
+func (t *trie) lookupStringUnsafe(s string) uint16 {
+ c0 := s[0]
+ if c0 < tx {
+ return t.values[c0]
+ }
+ if c0 < t2 {
+ return 0
+ }
+ i := t.index[c0]
+ if c0 < t3 {
+ return t.lookupValue(i, s[1])
+ }
+ i = t.index[uint16(i)<<6+uint16(s[1])]
+ if c0 < t4 {
+ return t.lookupValue(i, s[2])
+ }
+ i = t.index[uint16(i)<<6+uint16(s[2])]
+ if c0 < t5 {
+ return t.lookupValue(i, s[3])
+ }
+ return 0
+}