diff options
Diffstat (limited to 'src/pkg/exp/norm/trie.go')
-rw-r--r-- | src/pkg/exp/norm/trie.go | 232 |
1 files changed, 232 insertions, 0 deletions
diff --git a/src/pkg/exp/norm/trie.go b/src/pkg/exp/norm/trie.go new file mode 100644 index 000000000..82267a8d3 --- /dev/null +++ b/src/pkg/exp/norm/trie.go @@ -0,0 +1,232 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package norm + +type valueRange struct { + value uint16 // header: value:stride + lo, hi byte // header: lo:n +} + +type trie struct { + index []uint8 + values []uint16 + sparse []valueRange + sparseOffset []uint16 + cutoff uint8 // indices >= cutoff are sparse +} + +// lookupValue determines the type of block n and looks up the value for b. +// For n < t.cutoff, the block is a simple lookup table. Otherwise, the block +// is a list of ranges with an accompanying value. Given a matching range r, +// the value for b is by r.value + (b - r.lo) * stride. +func (t *trie) lookupValue(n uint8, b byte) uint16 { + if n < t.cutoff { + return t.values[uint16(n)<<6+uint16(b)] + } + offset := t.sparseOffset[n-t.cutoff] + header := t.sparse[offset] + lo := offset + 1 + hi := lo + uint16(header.lo) + for lo < hi { + m := lo + (hi-lo)/2 + r := t.sparse[m] + if r.lo <= b && b <= r.hi { + return r.value + uint16(b-r.lo)*header.value + } + if b < r.lo { + hi = m + } else { + lo = m + 1 + } + } + return 0 +} + +const ( + t1 = 0x00 // 0000 0000 + tx = 0x80 // 1000 0000 + t2 = 0xC0 // 1100 0000 + t3 = 0xE0 // 1110 0000 + t4 = 0xF0 // 1111 0000 + t5 = 0xF8 // 1111 1000 + t6 = 0xFC // 1111 1100 + te = 0xFE // 1111 1110 +) + +// lookup returns the trie value for the first UTF-8 encoding in s and +// the width in bytes of this encoding. The size will be 0 if s does not +// hold enough bytes to complete the encoding. len(s) must be greater than 0. +func (t *trie) lookup(s []byte) (v uint16, sz int) { + c0 := s[0] + switch { + case c0 < tx: + return t.values[c0], 1 + case c0 < t2: + return 0, 1 + case c0 < t3: + if len(s) < 2 { + return 0, 0 + } + i := t.index[c0] + c1 := s[1] + if c1 < tx || t2 <= c1 { + return 0, 1 + } + return t.lookupValue(i, c1), 2 + case c0 < t4: + if len(s) < 3 { + return 0, 0 + } + i := t.index[c0] + c1 := s[1] + if c1 < tx || t2 <= c1 { + return 0, 1 + } + o := uint16(i)<<6 + uint16(c1) + i = t.index[o] + c2 := s[2] + if c2 < tx || t2 <= c2 { + return 0, 2 + } + return t.lookupValue(i, c2), 3 + case c0 < t5: + if len(s) < 4 { + return 0, 0 + } + i := t.index[c0] + c1 := s[1] + if c1 < tx || t2 <= c1 { + return 0, 1 + } + o := uint16(i)<<6 + uint16(c1) + i = t.index[o] + c2 := s[2] + if c2 < tx || t2 <= c2 { + return 0, 2 + } + o = uint16(i)<<6 + uint16(c2) + i = t.index[o] + c3 := s[3] + if c3 < tx || t2 <= c3 { + return 0, 3 + } + return t.lookupValue(i, c3), 4 + } + // Illegal rune + return 0, 1 +} + +// lookupString returns the trie value for the first UTF-8 encoding in s and +// the width in bytes of this encoding. The size will be 0 if s does not +// hold enough bytes to complete the encoding. len(s) must be greater than 0. +func (t *trie) lookupString(s string) (v uint16, sz int) { + c0 := s[0] + switch { + case c0 < tx: + return t.values[c0], 1 + case c0 < t2: + return 0, 1 + case c0 < t3: + if len(s) < 2 { + return 0, 0 + } + i := t.index[c0] + c1 := s[1] + if c1 < tx || t2 <= c1 { + return 0, 1 + } + return t.lookupValue(i, c1), 2 + case c0 < t4: + if len(s) < 3 { + return 0, 0 + } + i := t.index[c0] + c1 := s[1] + if c1 < tx || t2 <= c1 { + return 0, 1 + } + o := uint16(i)<<6 + uint16(c1) + i = t.index[o] + c2 := s[2] + if c2 < tx || t2 <= c2 { + return 0, 2 + } + return t.lookupValue(i, c2), 3 + case c0 < t5: + if len(s) < 4 { + return 0, 0 + } + i := t.index[c0] + c1 := s[1] + if c1 < tx || t2 <= c1 { + return 0, 1 + } + o := uint16(i)<<6 + uint16(c1) + i = t.index[o] + c2 := s[2] + if c2 < tx || t2 <= c2 { + return 0, 2 + } + o = uint16(i)<<6 + uint16(c2) + i = t.index[o] + c3 := s[3] + if c3 < tx || t2 <= c3 { + return 0, 3 + } + return t.lookupValue(i, c3), 4 + } + // Illegal rune + return 0, 1 +} + +// lookupUnsafe returns the trie value for the first UTF-8 encoding in s. +// s must hold a full encoding. +func (t *trie) lookupUnsafe(s []byte) uint16 { + c0 := s[0] + if c0 < tx { + return t.values[c0] + } + if c0 < t2 { + return 0 + } + i := t.index[c0] + if c0 < t3 { + return t.lookupValue(i, s[1]) + } + i = t.index[uint16(i)<<6+uint16(s[1])] + if c0 < t4 { + return t.lookupValue(i, s[2]) + } + i = t.index[uint16(i)<<6+uint16(s[2])] + if c0 < t5 { + return t.lookupValue(i, s[3]) + } + return 0 +} + +// lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. +// s must hold a full encoding. +func (t *trie) lookupStringUnsafe(s string) uint16 { + c0 := s[0] + if c0 < tx { + return t.values[c0] + } + if c0 < t2 { + return 0 + } + i := t.index[c0] + if c0 < t3 { + return t.lookupValue(i, s[1]) + } + i = t.index[uint16(i)<<6+uint16(s[1])] + if c0 < t4 { + return t.lookupValue(i, s[2]) + } + i = t.index[uint16(i)<<6+uint16(s[2])] + if c0 < t5 { + return t.lookupValue(i, s[3]) + } + return 0 +} |