// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package norm type valueRange struct { value uint16 // header: value:stride lo, hi byte // header: lo:n } type trie struct { index []uint8 values []uint16 sparse []valueRange sparseOffset []uint16 cutoff uint8 // indices >= cutoff are sparse } // lookupValue determines the type of block n and looks up the value for b. // For n < t.cutoff, the block is a simple lookup table. Otherwise, the block // is a list of ranges with an accompanying value. Given a matching range r, // the value for b is by r.value + (b - r.lo) * stride. func (t *trie) lookupValue(n uint8, b byte) uint16 { if n < t.cutoff { return t.values[uint16(n)<<6+uint16(b&maskx)] } offset := t.sparseOffset[n-t.cutoff] header := t.sparse[offset] lo := offset + 1 hi := lo + uint16(header.lo) for lo < hi { m := lo + (hi-lo)/2 r := t.sparse[m] if r.lo <= b && b <= r.hi { return r.value + uint16(b-r.lo)*header.value } if b < r.lo { hi = m } else { lo = m + 1 } } return 0 } const ( t1 = 0x00 // 0000 0000 tx = 0x80 // 1000 0000 t2 = 0xC0 // 1100 0000 t3 = 0xE0 // 1110 0000 t4 = 0xF0 // 1111 0000 t5 = 0xF8 // 1111 1000 t6 = 0xFC // 1111 1100 te = 0xFE // 1111 1110 maskx = 0x3F // 0011 1111 mask2 = 0x1F // 0001 1111 mask3 = 0x0F // 0000 1111 mask4 = 0x07 // 0000 0111 ) // lookup returns the trie value for the first UTF-8 encoding in s and // the width in bytes of this encoding. The size will be 0 if s does not // hold enough bytes to complete the encoding. len(s) must be greater than 0. func (t *trie) lookup(s []byte) (v uint16, sz int) { c0 := s[0] switch { case c0 < tx: return t.values[c0], 1 case c0 < t2: return 0, 1 case c0 < t3: if len(s) < 2 { return 0, 0 } i := t.index[c0] c1 := s[1] if c1 < tx || t2 <= c1 { return 0, 1 } return t.lookupValue(i, c1), 2 case c0 < t4: if len(s) < 3 { return 0, 0 } i := t.index[c0] c1 := s[1] if c1 < tx || t2 <= c1 { return 0, 1 } o := uint16(i)<<6 + uint16(c1)&maskx i = t.index[o] c2 := s[2] if c2 < tx || t2 <= c2 { return 0, 2 } return t.lookupValue(i, c2), 3 case c0 < t5: if len(s) < 4 { return 0, 0 } i := t.index[c0] c1 := s[1] if c1 < tx || t2 <= c1 { return 0, 1 } o := uint16(i)<<6 + uint16(c1)&maskx i = t.index[o] c2 := s[2] if c2 < tx || t2 <= c2 { return 0, 2 } o = uint16(i)<<6 + uint16(c2)&maskx i = t.index[o] c3 := s[3] if c3 < tx || t2 <= c3 { return 0, 3 } return t.lookupValue(i, c3), 4 } // Illegal rune return 0, 1 } // lookupString returns the trie value for the first UTF-8 encoding in s and // the width in bytes of this encoding. The size will be 0 if s does not // hold enough bytes to complete the encoding. len(s) must be greater than 0. func (t *trie) lookupString(s string) (v uint16, sz int) { c0 := s[0] switch { case c0 < tx: return t.values[c0], 1 case c0 < t2: return 0, 1 case c0 < t3: if len(s) < 2 { return 0, 0 } i := t.index[c0] c1 := s[1] if c1 < tx || t2 <= c1 { return 0, 1 } return t.lookupValue(i, c1), 2 case c0 < t4: if len(s) < 3 { return 0, 0 } i := t.index[c0] c1 := s[1] if c1 < tx || t2 <= c1 { return 0, 1 } o := uint16(i)<<6 + uint16(c1)&maskx i = t.index[o] c2 := s[2] if c2 < tx || t2 <= c2 { return 0, 2 } return t.lookupValue(i, c2), 3 case c0 < t5: if len(s) < 4 { return 0, 0 } i := t.index[c0] c1 := s[1] if c1 < tx || t2 <= c1 { return 0, 1 } o := uint16(i)<<6 + uint16(c1)&maskx i = t.index[o] c2 := s[2] if c2 < tx || t2 <= c2 { return 0, 2 } o = uint16(i)<<6 + uint16(c2)&maskx i = t.index[o] c3 := s[3] if c3 < tx || t2 <= c3 { return 0, 3 } return t.lookupValue(i, c3), 4 } // Illegal rune return 0, 1 } // lookupUnsafe returns the trie value for the first UTF-8 encoding in s. // s must hold a full encoding. func (t *trie) lookupUnsafe(s []byte) uint16 { c0 := s[0] if c0 < tx { return t.values[c0] } if c0 < t2 { return 0 } i := t.index[c0] if c0 < t3 { return t.lookupValue(i, s[1]) } i = t.index[uint16(i)<<6+uint16(s[1])&maskx] if c0 < t4 { return t.lookupValue(i, s[2]) } i = t.index[uint16(i)<<6+uint16(s[2])&maskx] if c0 < t5 { return t.lookupValue(i, s[3]) } return 0 } // lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. // s must hold a full encoding. func (t *trie) lookupStringUnsafe(s string) uint16 { c0 := s[0] if c0 < tx { return t.values[c0] } if c0 < t2 { return 0 } i := t.index[c0] if c0 < t3 { return t.lookupValue(i, s[1]) } i = t.index[uint16(i)<<6+uint16(s[1])&maskx] if c0 < t4 { return t.lookupValue(i, s[2]) } i = t.index[uint16(i)<<6+uint16(s[2])&maskx] if c0 < t5 { return t.lookupValue(i, s[3]) } return 0 }