summaryrefslogtreecommitdiff
path: root/src/pkg/unicode
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/unicode')
-rw-r--r--src/pkg/unicode/digit.go2
-rw-r--r--src/pkg/unicode/graphic.go32
-rw-r--r--src/pkg/unicode/letter.go80
-rw-r--r--src/pkg/unicode/letter_test.go118
-rw-r--r--src/pkg/unicode/maketables.go75
-rw-r--r--src/pkg/unicode/script_test.go11
-rw-r--r--src/pkg/unicode/tables.go783
-rw-r--r--src/pkg/unicode/utf8/example_test.go192
-rw-r--r--src/pkg/unicode/utf8/utf8.go56
-rw-r--r--src/pkg/unicode/utf8/utf8_test.go101
10 files changed, 1141 insertions, 309 deletions
diff --git a/src/pkg/unicode/digit.go b/src/pkg/unicode/digit.go
index 4800bd6ea..53171b396 100644
--- a/src/pkg/unicode/digit.go
+++ b/src/pkg/unicode/digit.go
@@ -9,5 +9,5 @@ func IsDigit(r rune) bool {
if r <= MaxLatin1 {
return '0' <= r && r <= '9'
}
- return Is(Digit, r)
+ return isExcludingLatin(Digit, r)
}
diff --git a/src/pkg/unicode/graphic.go b/src/pkg/unicode/graphic.go
index 0de90ebd8..5b995fcd0 100644
--- a/src/pkg/unicode/graphic.go
+++ b/src/pkg/unicode/graphic.go
@@ -6,15 +6,17 @@ package unicode
// Bit masks for each code point under U+0100, for fast lookup.
const (
- pC = 1 << iota // a control character.
- pP // a punctuation character.
- pN // a numeral.
- pS // a symbolic character.
- pZ // a spacing character.
- pLu // an upper-case letter.
- pLl // a lower-case letter.
- pp // a printable character according to Go's definition.
- pg = pp | pZ // a graphical character according to the Unicode definition.
+ pC = 1 << iota // a control character.
+ pP // a punctuation character.
+ pN // a numeral.
+ pS // a symbolic character.
+ pZ // a spacing character.
+ pLu // an upper-case letter.
+ pLl // a lower-case letter.
+ pp // a printable character according to Go's definition.
+ pg = pp | pZ // a graphical character according to the Unicode definition.
+ pLo = pLl | pLu // a letter that is neither upper nor lower case.
+ pLmask = pLo
)
// GraphicRanges defines the set of graphic characters according to Unicode.
@@ -76,15 +78,15 @@ func IsControl(r rune) bool {
// IsLetter reports whether the rune is a letter (category L).
func IsLetter(r rune) bool {
if uint32(r) <= MaxLatin1 {
- return properties[uint8(r)]&(pLu|pLl) != 0
+ return properties[uint8(r)]&(pLmask) != 0
}
- return Is(Letter, r)
+ return isExcludingLatin(Letter, r)
}
// IsMark reports whether the rune is a mark character (category M).
func IsMark(r rune) bool {
// There are no mark characters in Latin-1.
- return Is(Mark, r)
+ return isExcludingLatin(Mark, r)
}
// IsNumber reports whether the rune is a number (category N).
@@ -92,7 +94,7 @@ func IsNumber(r rune) bool {
if uint32(r) <= MaxLatin1 {
return properties[uint8(r)]&pN != 0
}
- return Is(Number, r)
+ return isExcludingLatin(Number, r)
}
// IsPunct reports whether the rune is a Unicode punctuation character
@@ -119,7 +121,7 @@ func IsSpace(r rune) bool {
}
return false
}
- return Is(White_Space, r)
+ return isExcludingLatin(White_Space, r)
}
// IsSymbol reports whether the rune is a symbolic character.
@@ -127,5 +129,5 @@ func IsSymbol(r rune) bool {
if uint32(r) <= MaxLatin1 {
return properties[uint8(r)]&pS != 0
}
- return Is(Symbol, r)
+ return isExcludingLatin(Symbol, r)
}
diff --git a/src/pkg/unicode/letter.go b/src/pkg/unicode/letter.go
index be484553d..8d56363df 100644
--- a/src/pkg/unicode/letter.go
+++ b/src/pkg/unicode/letter.go
@@ -19,8 +19,9 @@ const (
// The two slices must be in sorted order and non-overlapping.
// Also, R32 should contain only values >= 0x10000 (1<<16).
type RangeTable struct {
- R16 []Range16
- R32 []Range32
+ R16 []Range16
+ R32 []Range32
+ LatinOffset int // number of entries in R16 with Hi <= MaxLatin1
}
// Range16 represents of a range of 16-bit Unicode code points. The range runs from Lo to Hi
@@ -80,14 +81,31 @@ const (
UpperLower = MaxRune + 1 // (Cannot be a valid delta.)
)
-// is16 uses binary search to test whether rune is in the specified slice of 16-bit ranges.
+// linearMax is the maximum size table for linear search for non-Latin1 rune.
+// Derived by running 'go test -calibrate'.
+const linearMax = 18
+
+// is16 reports whether r is in the sorted slice of 16-bit ranges.
func is16(ranges []Range16, r uint16) bool {
+ if len(ranges) <= linearMax || r <= MaxLatin1 {
+ for i := range ranges {
+ range_ := &ranges[i]
+ if r < range_.Lo {
+ return false
+ }
+ if r <= range_.Hi {
+ return (r-range_.Lo)%range_.Stride == 0
+ }
+ }
+ return false
+ }
+
// binary search over ranges
lo := 0
hi := len(ranges)
for lo < hi {
m := lo + (hi-lo)/2
- range_ := ranges[m]
+ range_ := &ranges[m]
if range_.Lo <= r && r <= range_.Hi {
return (r-range_.Lo)%range_.Stride == 0
}
@@ -100,8 +118,21 @@ func is16(ranges []Range16, r uint16) bool {
return false
}
-// is32 uses binary search to test whether rune is in the specified slice of 32-bit ranges.
+// is32 reports whether r is in the sorted slice of 32-bit ranges.
func is32(ranges []Range32, r uint32) bool {
+ if len(ranges) <= linearMax {
+ for i := range ranges {
+ range_ := &ranges[i]
+ if r < range_.Lo {
+ return false
+ }
+ if r <= range_.Hi {
+ return (r-range_.Lo)%range_.Stride == 0
+ }
+ }
+ return false
+ }
+
// binary search over ranges
lo := 0
hi := len(ranges)
@@ -122,21 +153,6 @@ func is32(ranges []Range32, r uint32) bool {
// Is tests whether rune is in the specified table of ranges.
func Is(rangeTab *RangeTable, r rune) bool {
- // common case: rune is ASCII or Latin-1.
- if uint32(r) <= MaxLatin1 {
- // Only need to check R16, since R32 is always >= 1<<16.
- r16 := uint16(r)
- for _, r := range rangeTab.R16 {
- if r16 > r.Hi {
- continue
- }
- if r16 < r.Lo {
- return false
- }
- return (r16-r.Lo)%r.Stride == 0
- }
- return false
- }
r16 := rangeTab.R16
if len(r16) > 0 && r <= rune(r16[len(r16)-1].Hi) {
return is16(r16, uint16(r))
@@ -148,22 +164,34 @@ func Is(rangeTab *RangeTable, r rune) bool {
return false
}
+func isExcludingLatin(rangeTab *RangeTable, r rune) bool {
+ r16 := rangeTab.R16
+ if off := rangeTab.LatinOffset; len(r16) > off && r <= rune(r16[len(r16)-1].Hi) {
+ return is16(r16[off:], uint16(r))
+ }
+ r32 := rangeTab.R32
+ if len(r32) > 0 && r >= rune(r32[0].Lo) {
+ return is32(r32, uint32(r))
+ }
+ return false
+}
+
// IsUpper reports whether the rune is an upper case letter.
func IsUpper(r rune) bool {
// See comment in IsGraphic.
if uint32(r) <= MaxLatin1 {
- return properties[uint8(r)]&pLu != 0
+ return properties[uint8(r)]&pLmask == pLu
}
- return Is(Upper, r)
+ return isExcludingLatin(Upper, r)
}
// IsLower reports whether the rune is a lower case letter.
func IsLower(r rune) bool {
// See comment in IsGraphic.
if uint32(r) <= MaxLatin1 {
- return properties[uint8(r)]&pLl != 0
+ return properties[uint8(r)]&pLmask == pLl
}
- return Is(Lower, r)
+ return isExcludingLatin(Lower, r)
}
// IsTitle reports whether the rune is a title case letter.
@@ -171,7 +199,7 @@ func IsTitle(r rune) bool {
if r <= MaxLatin1 {
return false
}
- return Is(Title, r)
+ return isExcludingLatin(Title, r)
}
// to maps the rune using the specified case mapping.
@@ -288,7 +316,7 @@ type foldPair struct {
// SimpleFold iterates over Unicode code points equivalent under
// the Unicode-defined simple case folding. Among the code points
// equivalent to rune (including rune itself), SimpleFold returns the
-// smallest rune >= r if one exists, or else the smallest rune >= 0.
+// smallest rune >= r if one exists, or else the smallest rune >= 0.
//
// For example:
// SimpleFold('A') = 'a'
diff --git a/src/pkg/unicode/letter_test.go b/src/pkg/unicode/letter_test.go
index 2d8056218..e4d5572a0 100644
--- a/src/pkg/unicode/letter_test.go
+++ b/src/pkg/unicode/letter_test.go
@@ -5,6 +5,10 @@
package unicode_test
import (
+ "flag"
+ "fmt"
+ "runtime"
+ "sort"
"testing"
. "unicode"
)
@@ -427,3 +431,117 @@ func TestSimpleFold(t *testing.T) {
}
}
}
+
+// Running 'go test -calibrate' runs the calibration to find a plausible
+// cutoff point for linear search of a range list vs. binary search.
+// We create a fake table and then time how long it takes to do a
+// sequence of searches within that table, for all possible inputs
+// relative to the ranges (something before all, in each, between each, after all).
+// This assumes that all possible runes are equally likely.
+// In practice most runes are ASCII so this is a conservative estimate
+// of an effective cutoff value. In practice we could probably set it higher
+// than what this function recommends.
+
+var calibrate = flag.Bool("calibrate", false, "compute crossover for linear vs. binary search")
+
+func TestCalibrate(t *testing.T) {
+ if !*calibrate {
+ return
+ }
+
+ if runtime.GOARCH == "amd64" {
+ fmt.Printf("warning: running calibration on %s\n", runtime.GOARCH)
+ }
+
+ // Find the point where binary search wins by more than 10%.
+ // The 10% bias gives linear search an edge when they're close,
+ // because on predominantly ASCII inputs linear search is even
+ // better than our benchmarks measure.
+ n := sort.Search(64, func(n int) bool {
+ tab := fakeTable(n)
+ blinear := func(b *testing.B) {
+ tab := tab
+ max := n*5 + 20
+ for i := 0; i < b.N; i++ {
+ for j := 0; j <= max; j++ {
+ linear(tab, uint16(j))
+ }
+ }
+ }
+ bbinary := func(b *testing.B) {
+ tab := tab
+ max := n*5 + 20
+ for i := 0; i < b.N; i++ {
+ for j := 0; j <= max; j++ {
+ binary(tab, uint16(j))
+ }
+ }
+ }
+ bmlinear := testing.Benchmark(blinear)
+ bmbinary := testing.Benchmark(bbinary)
+ fmt.Printf("n=%d: linear=%d binary=%d\n", n, bmlinear.NsPerOp(), bmbinary.NsPerOp())
+ return bmlinear.NsPerOp()*100 > bmbinary.NsPerOp()*110
+ })
+ fmt.Printf("calibration: linear cutoff = %d\n", n)
+}
+
+func fakeTable(n int) []Range16 {
+ var r16 []Range16
+ for i := 0; i < n; i++ {
+ r16 = append(r16, Range16{uint16(i*5 + 10), uint16(i*5 + 12), 1})
+ }
+ return r16
+}
+
+func linear(ranges []Range16, r uint16) bool {
+ for i := range ranges {
+ range_ := &ranges[i]
+ if r < range_.Lo {
+ return false
+ }
+ if r <= range_.Hi {
+ return (r-range_.Lo)%range_.Stride == 0
+ }
+ }
+ return false
+}
+
+func binary(ranges []Range16, r uint16) bool {
+ // binary search over ranges
+ lo := 0
+ hi := len(ranges)
+ for lo < hi {
+ m := lo + (hi-lo)/2
+ range_ := &ranges[m]
+ if range_.Lo <= r && r <= range_.Hi {
+ return (r-range_.Lo)%range_.Stride == 0
+ }
+ if r < range_.Lo {
+ hi = m
+ } else {
+ lo = m + 1
+ }
+ }
+ return false
+}
+
+func TestLatinOffset(t *testing.T) {
+ var maps = []map[string]*RangeTable{
+ Categories,
+ FoldCategory,
+ FoldScript,
+ Properties,
+ Scripts,
+ }
+ for _, m := range maps {
+ for name, tab := range m {
+ i := 0
+ for i < len(tab.R16) && tab.R16[i].Hi <= MaxLatin1 {
+ i++
+ }
+ if tab.LatinOffset != i {
+ t.Errorf("%s: LatinOffset=%d, want %d", name, tab.LatinOffset, i)
+ }
+ }
+ }
+}
diff --git a/src/pkg/unicode/maketables.go b/src/pkg/unicode/maketables.go
index fcd14fc73..53d8b967e 100644
--- a/src/pkg/unicode/maketables.go
+++ b/src/pkg/unicode/maketables.go
@@ -13,7 +13,6 @@ import (
"bufio"
"flag"
"fmt"
- "io"
"log"
"net/http"
"os"
@@ -41,7 +40,7 @@ func main() {
var dataURL = flag.String("data", "", "full URL for UnicodeData.txt; defaults to --url/UnicodeData.txt")
var casefoldingURL = flag.String("casefolding", "", "full URL for CaseFolding.txt; defaults to --url/CaseFolding.txt")
var url = flag.String("url",
- "http://www.unicode.org/Public/6.0.0/ucd/",
+ "http://www.unicode.org/Public/6.2.0/ucd/",
"URL of Unicode database directory")
var tablelist = flag.String("tables",
"all",
@@ -321,16 +320,11 @@ func loadChars() {
flag.Set("data", *url+"UnicodeData.txt")
}
input := open(*dataURL)
+ defer input.close()
+ scanner := bufio.NewScanner(input)
var first rune = 0
- for {
- line, err := input.ReadString('\n')
- if err != nil {
- if err == io.EOF {
- break
- }
- logger.Fatal(err)
- }
- switch parseCategory(line[0 : len(line)-1]) {
+ for scanner.Scan() {
+ switch parseCategory(scanner.Text()) {
case SNormal:
if first != 0 {
logger.Fatalf("bad state normal at %U", lastChar)
@@ -351,7 +345,9 @@ func loadChars() {
first = 0
}
}
- input.close()
+ if scanner.Err() != nil {
+ logger.Fatal(scanner.Err())
+ }
}
func loadCasefold() {
@@ -359,15 +355,11 @@ func loadCasefold() {
flag.Set("casefolding", *url+"CaseFolding.txt")
}
input := open(*casefoldingURL)
- for {
- line, err := input.ReadString('\n')
- if err != nil {
- if err == io.EOF {
- break
- }
- logger.Fatal(err)
- }
- if line[0] == '#' {
+ defer input.close()
+ scanner := bufio.NewScanner(input)
+ for scanner.Scan() {
+ line := scanner.Text()
+ if len(line) == 0 || line[0] == '#' || len(strings.TrimSpace(line)) == 0 {
continue
}
field := strings.Split(line, "; ")
@@ -389,7 +381,9 @@ func loadCasefold() {
}
chars[p1].foldCase = rune(p2)
}
- input.close()
+ if scanner.Err() != nil {
+ logger.Fatal(scanner.Err())
+ }
}
const progHeader = `// Generated by running
@@ -503,6 +497,7 @@ const format = "\t\t{0x%04x, 0x%04x, %d},\n"
func dumpRange(header string, inCategory Op) {
fmt.Print(header)
next := rune(0)
+ latinOffset := 0
fmt.Print("\tR16: []Range16{\n")
// one Range for each iteration
count := &range16Count
@@ -546,11 +541,17 @@ func dumpRange(header string, inCategory Op) {
break
}
}
+ if uint32(hi) <= unicode.MaxLatin1 {
+ latinOffset++
+ }
size, count = printRange(uint32(lo), uint32(hi), uint32(stride), size, count)
// next range: start looking where this range ends
next = hi + 1
}
fmt.Print("\t},\n")
+ if latinOffset > 0 {
+ fmt.Printf("\tLatinOffset: %d,\n", latinOffset)
+ }
fmt.Print("}\n\n")
}
@@ -703,15 +704,12 @@ func printScriptOrProperty(doProps bool) {
return
}
input := open(*url + file)
- for {
- line, err := input.ReadString('\n')
- if err != nil {
- if err == io.EOF {
- break
- }
- logger.Fatal(err)
- }
- parseScript(line[0:len(line)-1], table)
+ scanner := bufio.NewScanner(input)
+ for scanner.Scan() {
+ parseScript(scanner.Text(), table)
+ }
+ if scanner.Err() != nil {
+ logger.Fatal(scanner.Err())
}
input.close()
@@ -760,14 +758,17 @@ func printScriptOrProperty(doProps bool) {
}
ndecl++
fmt.Printf("var _%s = &RangeTable {\n", name)
- fmt.Print("\tR16: []Range16{\n")
ranges := foldAdjacent(table[name])
+ fmt.Print("\tR16: []Range16{\n")
size := 16
count := &range16Count
for _, s := range ranges {
size, count = printRange(s.Lo, s.Hi, s.Stride, size, count)
}
fmt.Print("\t},\n")
+ if off := findLatinOffset(ranges); off > 0 {
+ fmt.Printf("\tLatinOffset: %d,\n", off)
+ }
fmt.Print("}\n\n")
}
decl.Sort()
@@ -779,6 +780,14 @@ func printScriptOrProperty(doProps bool) {
fmt.Print(")\n\n")
}
+func findLatinOffset(ranges []unicode.Range32) int {
+ i := 0
+ for i < len(ranges) && ranges[i].Hi <= unicode.MaxLatin1 {
+ i++
+ }
+ return i
+}
+
const (
CaseUpper = 1 << iota
CaseLower
@@ -1022,6 +1031,8 @@ func printLatinProperties() {
property = "0"
case "Ll":
property = "pLl | pp"
+ case "Lo":
+ property = "pLo | pp"
case "Lu":
property = "pLu | pp"
case "Nd", "No":
diff --git a/src/pkg/unicode/script_test.go b/src/pkg/unicode/script_test.go
index 1c5b80142..395cc71a0 100644
--- a/src/pkg/unicode/script_test.go
+++ b/src/pkg/unicode/script_test.go
@@ -14,7 +14,7 @@ type T struct {
script string
}
-// Hand-chosen tests from Unicode 5.1.0 & 6.0..0, mostly to discover when new
+// Hand-chosen tests from Unicode 5.1.0, 6.0.0 and 6.2.0 mostly to discover when new
// scripts and categories arise.
var inTest = []T{
{0x06e2, "Arabic"},
@@ -31,6 +31,7 @@ var inTest = []T{
{0x11011, "Brahmi"},
{0x156d, "Canadian_Aboriginal"},
{0x102a9, "Carian"},
+ {0x11111, "Chakma"},
{0xaa4d, "Cham"},
{0x13c2, "Cherokee"},
{0x0020, "Common"},
@@ -76,6 +77,9 @@ var inTest = []T{
{0x0d42, "Malayalam"},
{0x0843, "Mandaic"},
{0xabd0, "Meetei_Mayek"},
+ {0x1099f, "Meroitic_Hieroglyphs"},
+ {0x109a0, "Meroitic_Cursive"},
+ {0x16f00, "Miao"},
{0x1822, "Mongolian"},
{0x104c, "Myanmar"},
{0x19c3, "New_Tai_Lue"},
@@ -94,8 +98,10 @@ var inTest = []T{
{0x16c0, "Runic"},
{0x081d, "Samaritan"},
{0xa892, "Saurashtra"},
+ {0x111a0, "Sharada"},
{0x10463, "Shavian"},
{0x0dbd, "Sinhala"},
+ {0x110d0, "Sora_Sompeng"},
{0x1ba3, "Sundanese"},
{0xa803, "Syloti_Nagri"},
{0x070f, "Syriac"},
@@ -104,6 +110,7 @@ var inTest = []T{
{0x1972, "Tai_Le"},
{0x1a62, "Tai_Tham"},
{0xaadc, "Tai_Viet"},
+ {0x116c9, "Takri"},
{0x0bbf, "Tamil"},
{0x0c55, "Telugu"},
{0x07a7, "Thaana"},
@@ -121,7 +128,7 @@ var outTest = []T{ // not really worth being thorough
var inCategoryTest = []T{
{0x0081, "Cc"},
- {0x17b4, "Cf"},
+ {0x200B, "Cf"},
{0xf0000, "Co"},
{0xdb80, "Cs"},
{0x0236, "Ll"},
diff --git a/src/pkg/unicode/tables.go b/src/pkg/unicode/tables.go
index ebd169b09..36b5a3115 100644
--- a/src/pkg/unicode/tables.go
+++ b/src/pkg/unicode/tables.go
@@ -1,11 +1,11 @@
// Generated by running
-// maketables --tables=all --data=http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt --casefolding=http://www.unicode.org/Public/6.0.0/ucd/CaseFolding.txt
+// maketables --tables=all --data=http://www.unicode.org/Public/6.2.0/ucd/UnicodeData.txt --casefolding=http://www.unicode.org/Public/6.2.0/ucd/CaseFolding.txt
// DO NOT EDIT
package unicode
// Version is the Unicode edition from which the tables are derived.
-const Version = "6.0.0"
+const Version = "6.2.0"
// Categories is the set of Unicode category tables.
var Categories = map[string]*RangeTable{
@@ -52,9 +52,8 @@ var _C = &RangeTable{
{0x0001, 0x001f, 1},
{0x007f, 0x009f, 1},
{0x00ad, 0x0600, 1363},
- {0x0601, 0x0603, 1},
+ {0x0601, 0x0604, 1},
{0x06dd, 0x070f, 50},
- {0x17b4, 0x17b5, 1},
{0x200b, 0x200f, 1},
{0x202a, 0x202e, 1},
{0x2060, 0x2064, 1},
@@ -71,6 +70,7 @@ var _C = &RangeTable{
{0xf0000, 0xffffd, 1},
{0x100000, 0x10fffd, 1},
},
+ LatinOffset: 2,
}
var _Cc = &RangeTable{
@@ -78,14 +78,14 @@ var _Cc = &RangeTable{
{0x0001, 0x001f, 1},
{0x007f, 0x009f, 1},
},
+ LatinOffset: 2,
}
var _Cf = &RangeTable{
R16: []Range16{
{0x00ad, 0x0600, 1363},
- {0x0601, 0x0603, 1},
+ {0x0601, 0x0604, 1},
{0x06dd, 0x070f, 50},
- {0x17b4, 0x17b5, 1},
{0x200b, 0x200f, 1},
{0x202a, 0x202e, 1},
{0x2060, 0x2064, 1},
@@ -162,6 +162,8 @@ var _L = &RangeTable{
{0x081a, 0x0824, 10},
{0x0828, 0x0840, 24},
{0x0841, 0x0858, 1},
+ {0x08a0, 0x08a2, 2},
+ {0x08a3, 0x08ac, 1},
{0x0904, 0x0939, 1},
{0x093d, 0x0950, 19},
{0x0958, 0x0961, 1},
@@ -259,8 +261,9 @@ var _L = &RangeTable{
{0x0ebd, 0x0ec0, 3},
{0x0ec1, 0x0ec4, 1},
{0x0ec6, 0x0edc, 22},
- {0x0edd, 0x0f00, 35},
- {0x0f40, 0x0f47, 1},
+ {0x0edd, 0x0edf, 1},
+ {0x0f00, 0x0f40, 64},
+ {0x0f41, 0x0f47, 1},
{0x0f49, 0x0f6c, 1},
{0x0f88, 0x0f8c, 1},
{0x1000, 0x102a, 1},
@@ -273,9 +276,9 @@ var _L = &RangeTable{
{0x1075, 0x1081, 1},
{0x108e, 0x10a0, 18},
{0x10a1, 0x10c5, 1},
+ {0x10c7, 0x10cd, 6},
{0x10d0, 0x10fa, 1},
- {0x10fc, 0x1100, 4},
- {0x1101, 0x1248, 1},
+ {0x10fc, 0x1248, 1},
{0x124a, 0x124d, 1},
{0x1250, 0x1256, 1},
{0x1258, 0x125a, 2},
@@ -321,12 +324,13 @@ var _L = &RangeTable{
{0x1b45, 0x1b4b, 1},
{0x1b83, 0x1ba0, 1},
{0x1bae, 0x1baf, 1},
- {0x1bc0, 0x1be5, 1},
+ {0x1bba, 0x1be5, 1},
{0x1c00, 0x1c23, 1},
{0x1c4d, 0x1c4f, 1},
{0x1c5a, 0x1c7d, 1},
{0x1ce9, 0x1cec, 1},
{0x1cee, 0x1cf1, 1},
+ {0x1cf5, 0x1cf6, 1},
{0x1d00, 0x1dbf, 1},
{0x1e00, 0x1f15, 1},
{0x1f18, 0x1f1d, 1},
@@ -362,8 +366,10 @@ var _L = &RangeTable{
{0x2c30, 0x2c5e, 1},
{0x2c60, 0x2ce4, 1},
{0x2ceb, 0x2cee, 1},
+ {0x2cf2, 0x2cf3, 1},
{0x2d00, 0x2d25, 1},
- {0x2d30, 0x2d65, 1},
+ {0x2d27, 0x2d2d, 6},
+ {0x2d30, 0x2d67, 1},
{0x2d6f, 0x2d80, 17},
{0x2d81, 0x2d96, 1},
{0x2da0, 0x2da6, 1},
@@ -387,7 +393,7 @@ var _L = &RangeTable{
{0x31a0, 0x31ba, 1},
{0x31f0, 0x31ff, 1},
{0x3400, 0x4db5, 1},
- {0x4e00, 0x9fcb, 1},
+ {0x4e00, 0x9fcc, 1},
{0xa000, 0xa48c, 1},
{0xa4d0, 0xa4fd, 1},
{0xa500, 0xa60c, 1},
@@ -399,9 +405,9 @@ var _L = &RangeTable{
{0xa717, 0xa71f, 1},
{0xa722, 0xa788, 1},
{0xa78b, 0xa78e, 1},
- {0xa790, 0xa791, 1},
- {0xa7a0, 0xa7a9, 1},
- {0xa7fa, 0xa801, 1},
+ {0xa790, 0xa793, 1},
+ {0xa7a0, 0xa7aa, 1},
+ {0xa7f8, 0xa801, 1},
{0xa803, 0xa805, 1},
{0xa807, 0xa80a, 1},
{0xa80c, 0xa822, 1},
@@ -425,6 +431,8 @@ var _L = &RangeTable{
{0xaaba, 0xaabd, 1},
{0xaac0, 0xaac2, 2},
{0xaadb, 0xaadd, 1},
+ {0xaae0, 0xaaea, 1},
+ {0xaaf2, 0xaaf4, 1},
{0xab01, 0xab06, 1},
{0xab09, 0xab0e, 1},
{0xab11, 0xab16, 1},
@@ -434,8 +442,7 @@ var _L = &RangeTable{
{0xac00, 0xd7a3, 1},
{0xd7b0, 0xd7c6, 1},
{0xd7cb, 0xd7fb, 1},
- {0xf900, 0xfa2d, 1},
- {0xfa30, 0xfa6d, 1},
+ {0xf900, 0xfa6d, 1},
{0xfa70, 0xfad9, 1},
{0xfb00, 0xfb06, 1},
{0xfb13, 0xfb17, 1},
@@ -486,6 +493,8 @@ var _L = &RangeTable{
{0x10840, 0x10855, 1},
{0x10900, 0x10915, 1},
{0x10920, 0x10939, 1},
+ {0x10980, 0x109b7, 1},
+ {0x109be, 0x109bf, 1},
{0x10a00, 0x10a10, 16},
{0x10a11, 0x10a13, 1},
{0x10a15, 0x10a17, 1},
@@ -497,9 +506,17 @@ var _L = &RangeTable{
{0x10c00, 0x10c48, 1},
{0x11003, 0x11037, 1},
{0x11083, 0x110af, 1},
+ {0x110d0, 0x110e8, 1},
+ {0x11103, 0x11126, 1},
+ {0x11183, 0x111b2, 1},
+ {0x111c1, 0x111c4, 1},
+ {0x11680, 0x116aa, 1},
{0x12000, 0x1236e, 1},
{0x13000, 0x1342e, 1},
{0x16800, 0x16a38, 1},
+ {0x16f00, 0x16f44, 1},
+ {0x16f50, 0x16f93, 67},
+ {0x16f94, 0x16f9f, 1},
{0x1b000, 0x1b001, 1},
{0x1d400, 0x1d454, 1},
{0x1d456, 0x1d49c, 1},
@@ -531,18 +548,42 @@ var _L = &RangeTable{
{0x1d78a, 0x1d7a8, 1},
{0x1d7aa, 0x1d7c2, 1},
{0x1d7c4, 0x1d7cb, 1},
+ {0x1ee00, 0x1ee03, 1},
+ {0x1ee05, 0x1ee1f, 1},
+ {0x1ee21, 0x1ee22, 1},
+ {0x1ee24, 0x1ee27, 3},
+ {0x1ee29, 0x1ee32, 1},
+ {0x1ee34, 0x1ee37, 1},
+ {0x1ee39, 0x1ee3b, 2},
+ {0x1ee42, 0x1ee47, 5},
+ {0x1ee49, 0x1ee4d, 2},
+ {0x1ee4e, 0x1ee4f, 1},
+ {0x1ee51, 0x1ee52, 1},
+ {0x1ee54, 0x1ee57, 3},
+ {0x1ee59, 0x1ee61, 2},
+ {0x1ee62, 0x1ee64, 2},
+ {0x1ee67, 0x1ee6a, 1},
+ {0x1ee6c, 0x1ee72, 1},
+ {0x1ee74, 0x1ee77, 1},
+ {0x1ee79, 0x1ee7c, 1},
+ {0x1ee7e, 0x1ee80, 2},
+ {0x1ee81, 0x1ee89, 1},
+ {0x1ee8b, 0x1ee9b, 1},
+ {0x1eea1, 0x1eea3, 1},
+ {0x1eea5, 0x1eea9, 1},
+ {0x1eeab, 0x1eebb, 1},
{0x20000, 0x2a6d6, 1},
{0x2a700, 0x2b734, 1},
{0x2b740, 0x2b81d, 1},
{0x2f800, 0x2fa1d, 1},
},
+ LatinOffset: 6,
}
var _Ll = &RangeTable{
R16: []Range16{
{0x0061, 0x007a, 1},
- {0x00aa, 0x00b5, 11},
- {0x00ba, 0x00df, 37},
+ {0x00b5, 0x00df, 42},
{0x00e0, 0x00f6, 1},
{0x00f8, 0x00ff, 1},
{0x0101, 0x0137, 2},
@@ -593,7 +634,7 @@ var _Ll = &RangeTable{
{0x04cf, 0x0527, 2},
{0x0561, 0x0587, 1},
{0x1d00, 0x1d2b, 1},
- {0x1d62, 0x1d77, 1},
+ {0x1d6b, 0x1d77, 1},
{0x1d79, 0x1d9a, 1},
{0x1e01, 0x1e95, 2},
{0x1e96, 0x1e9d, 1},
@@ -630,11 +671,12 @@ var _Ll = &RangeTable{
{0x2c66, 0x2c6c, 2},
{0x2c71, 0x2c73, 2},
{0x2c74, 0x2c76, 2},
- {0x2c77, 0x2c7c, 1},
+ {0x2c77, 0x2c7b, 1},
{0x2c81, 0x2ce3, 2},
{0x2ce4, 0x2cec, 8},
- {0x2cee, 0x2d00, 18},
- {0x2d01, 0x2d25, 1},
+ {0x2cee, 0x2cf3, 5},
+ {0x2d00, 0x2d25, 1},
+ {0x2d27, 0x2d2d, 6},
{0xa641, 0xa66d, 2},
{0xa681, 0xa697, 2},
{0xa723, 0xa72f, 2},
@@ -644,8 +686,8 @@ var _Ll = &RangeTable{
{0xa77a, 0xa77c, 2},
{0xa77f, 0xa787, 2},
{0xa78c, 0xa78e, 2},
- {0xa791, 0xa7a1, 16},
- {0xa7a3, 0xa7a9, 2},
+ {0xa791, 0xa793, 2},
+ {0xa7a1, 0xa7a9, 2},
{0xa7fa, 0xfb00, 21254},
{0xfb01, 0xfb06, 1},
{0xfb13, 0xfb17, 1},
@@ -682,6 +724,7 @@ var _Ll = &RangeTable{
{0x1d7c4, 0x1d7c9, 1},
{0x1d7cb, 0x1d7cb, 1},
},
+ LatinOffset: 4,
}
var _Lm = &RangeTable{
@@ -701,14 +744,15 @@ var _Lm = &RangeTable{
{0x17d7, 0x1843, 108},
{0x1aa7, 0x1c78, 465},
{0x1c79, 0x1c7d, 1},
- {0x1d2c, 0x1d61, 1},
+ {0x1d2c, 0x1d6a, 1},
{0x1d78, 0x1d9b, 35},
{0x1d9c, 0x1dbf, 1},
{0x2071, 0x207f, 14},
{0x2090, 0x209c, 1},
- {0x2c7d, 0x2d6f, 242},
- {0x2e2f, 0x3005, 470},
- {0x3031, 0x3035, 1},
+ {0x2c7c, 0x2c7d, 1},
+ {0x2d6f, 0x2e2f, 192},
+ {0x3005, 0x3031, 44},
+ {0x3032, 0x3035, 1},
{0x303b, 0x309d, 98},
{0x309e, 0x30fc, 94},
{0x30fd, 0x30fe, 1},
@@ -717,14 +761,20 @@ var _Lm = &RangeTable{
{0xa60c, 0xa67f, 115},
{0xa717, 0xa71f, 1},
{0xa770, 0xa788, 24},
+ {0xa7f8, 0xa7f9, 1},
{0xa9cf, 0xaa70, 161},
- {0xaadd, 0xff70, 21651},
+ {0xaadd, 0xaaf3, 22},
+ {0xaaf4, 0xff70, 21628},
{0xff9e, 0xff9f, 1},
},
+ R32: []Range32{
+ {0x16f93, 0x16f9f, 1},
+ },
}
var _Lo = &RangeTable{
R16: []Range16{
+ {0x00aa, 0x00ba, 16},
{0x01bb, 0x01c0, 5},
{0x01c1, 0x01c3, 1},
{0x0294, 0x05d0, 828},
@@ -744,6 +794,8 @@ var _Lo = &RangeTable{
{0x07cb, 0x07ea, 1},
{0x0800, 0x0815, 1},
{0x0840, 0x0858, 1},
+ {0x08a0, 0x08a2, 2},
+ {0x08a3, 0x08ac, 1},
{0x0904, 0x0939, 1},
{0x093d, 0x0950, 19},
{0x0958, 0x0961, 1},
@@ -840,7 +892,7 @@ var _Lo = &RangeTable{
{0x0eb2, 0x0eb3, 1},
{0x0ebd, 0x0ec0, 3},
{0x0ec1, 0x0ec4, 1},
- {0x0edc, 0x0edd, 1},
+ {0x0edc, 0x0edf, 1},
{0x0f00, 0x0f40, 64},
{0x0f41, 0x0f47, 1},
{0x0f49, 0x0f6c, 1},
@@ -855,7 +907,7 @@ var _Lo = &RangeTable{
{0x1075, 0x1081, 1},
{0x108e, 0x10d0, 66},
{0x10d1, 0x10fa, 1},
- {0x1100, 0x1248, 1},
+ {0x10fd, 0x1248, 1},
{0x124a, 0x124d, 1},
{0x1250, 0x1256, 1},
{0x1258, 0x125a, 2},
@@ -901,14 +953,15 @@ var _Lo = &RangeTable{
{0x1b45, 0x1b4b, 1},
{0x1b83, 0x1ba0, 1},
{0x1bae, 0x1baf, 1},
- {0x1bc0, 0x1be5, 1},
+ {0x1bba, 0x1be5, 1},
{0x1c00, 0x1c23, 1},
{0x1c4d, 0x1c4f, 1},
{0x1c5a, 0x1c77, 1},
{0x1ce9, 0x1cec, 1},
{0x1cee, 0x1cf1, 1},
+ {0x1cf5, 0x1cf6, 1},
{0x2135, 0x2138, 1},
- {0x2d30, 0x2d65, 1},
+ {0x2d30, 0x2d67, 1},
{0x2d80, 0x2d96, 1},
{0x2da0, 0x2da6, 1},
{0x2da8, 0x2dae, 1},
@@ -928,7 +981,7 @@ var _Lo = &RangeTable{
{0x31a0, 0x31ba, 1},
{0x31f0, 0x31ff, 1},
{0x3400, 0x4db5, 1},
- {0x4e00, 0x9fcb, 1},
+ {0x4e00, 0x9fcc, 1},
{0xa000, 0xa014, 1},
{0xa016, 0xa48c, 1},
{0xa4d0, 0xa4f7, 1},
@@ -961,7 +1014,9 @@ var _Lo = &RangeTable{
{0xaaba, 0xaabd, 1},
{0xaac0, 0xaac2, 2},
{0xaadb, 0xaadc, 1},
- {0xab01, 0xab06, 1},
+ {0xaae0, 0xaaea, 1},
+ {0xaaf2, 0xab01, 15},
+ {0xab02, 0xab06, 1},
{0xab09, 0xab0e, 1},
{0xab11, 0xab16, 1},
{0xab20, 0xab26, 1},
@@ -970,8 +1025,7 @@ var _Lo = &RangeTable{
{0xac00, 0xd7a3, 1},
{0xd7b0, 0xd7c6, 1},
{0xd7cb, 0xd7fb, 1},
- {0xf900, 0xfa2d, 1},
- {0xfa30, 0xfa6d, 1},
+ {0xf900, 0xfa6d, 1},
{0xfa70, 0xfad9, 1},
{0xfb1d, 0xfb1f, 2},
{0xfb20, 0xfb28, 1},
@@ -1020,6 +1074,8 @@ var _Lo = &RangeTable{
{0x10840, 0x10855, 1},
{0x10900, 0x10915, 1},
{0x10920, 0x10939, 1},
+ {0x10980, 0x109b7, 1},
+ {0x109be, 0x109bf, 1},
{0x10a00, 0x10a10, 16},
{0x10a11, 0x10a13, 1},
{0x10a15, 0x10a17, 1},
@@ -1031,15 +1087,47 @@ var _Lo = &RangeTable{
{0x10c00, 0x10c48, 1},
{0x11003, 0x11037, 1},
{0x11083, 0x110af, 1},
+ {0x110d0, 0x110e8, 1},
+ {0x11103, 0x11126, 1},
+ {0x11183, 0x111b2, 1},
+ {0x111c1, 0x111c4, 1},
+ {0x11680, 0x116aa, 1},
{0x12000, 0x1236e, 1},
{0x13000, 0x1342e, 1},
{0x16800, 0x16a38, 1},
- {0x1b000, 0x1b001, 1},
+ {0x16f00, 0x16f44, 1},
+ {0x16f50, 0x1b000, 16560},
+ {0x1b001, 0x1ee00, 15871},
+ {0x1ee01, 0x1ee03, 1},
+ {0x1ee05, 0x1ee1f, 1},
+ {0x1ee21, 0x1ee22, 1},
+ {0x1ee24, 0x1ee27, 3},
+ {0x1ee29, 0x1ee32, 1},
+ {0x1ee34, 0x1ee37, 1},
+ {0x1ee39, 0x1ee3b, 2},
+ {0x1ee42, 0x1ee47, 5},
+ {0x1ee49, 0x1ee4d, 2},
+ {0x1ee4e, 0x1ee4f, 1},
+ {0x1ee51, 0x1ee52, 1},
+ {0x1ee54, 0x1ee57, 3},
+ {0x1ee59, 0x1ee61, 2},
+ {0x1ee62, 0x1ee64, 2},
+ {0x1ee67, 0x1ee6a, 1},
+ {0x1ee6c, 0x1ee72, 1},
+ {0x1ee74, 0x1ee77, 1},
+ {0x1ee79, 0x1ee7c, 1},
+ {0x1ee7e, 0x1ee80, 2},
+ {0x1ee81, 0x1ee89, 1},
+ {0x1ee8b, 0x1ee9b, 1},
+ {0x1eea1, 0x1eea3, 1},
+ {0x1eea5, 0x1eea9, 1},
+ {0x1eeab, 0x1eebb, 1},
{0x20000, 0x2a6d6, 1},
{0x2a700, 0x2b734, 1},
{0x2b740, 0x2b81d, 1},
{0x2f800, 0x2fa1d, 1},
},
+ LatinOffset: 1,
}
var _Lt = &RangeTable{
@@ -1109,6 +1197,7 @@ var _Lu = &RangeTable{
{0x04d0, 0x0526, 2},
{0x0531, 0x0556, 1},
{0x10a0, 0x10c5, 1},
+ {0x10c7, 0x10cd, 6},
{0x1e00, 0x1e94, 2},
{0x1e9e, 0x1efe, 2},
{0x1f08, 0x1f0f, 1},
@@ -1142,15 +1231,16 @@ var _Lu = &RangeTable{
{0x2c7e, 0x2c80, 1},
{0x2c82, 0x2ce2, 2},
{0x2ceb, 0x2ced, 2},
- {0xa640, 0xa66c, 2},
+ {0x2cf2, 0xa640, 31054},
+ {0xa642, 0xa66c, 2},
{0xa680, 0xa696, 2},
{0xa722, 0xa72e, 2},
{0xa732, 0xa76e, 2},
{0xa779, 0xa77d, 2},
{0xa77e, 0xa786, 2},
{0xa78b, 0xa78d, 2},
- {0xa790, 0xa7a0, 16},
- {0xa7a2, 0xa7a8, 2},
+ {0xa790, 0xa792, 2},
+ {0xa7a0, 0xa7aa, 2},
{0xff21, 0xff3a, 1},
},
R32: []Range32{
@@ -1186,6 +1276,7 @@ var _Lu = &RangeTable{
{0x1d790, 0x1d7a8, 1},
{0x1d7ca, 0x1d7ca, 1},
},
+ LatinOffset: 3,
}
var _M = &RangeTable{
@@ -1212,6 +1303,7 @@ var _M = &RangeTable{
{0x0825, 0x0827, 1},
{0x0829, 0x082d, 1},
{0x0859, 0x085b, 1},
+ {0x08e4, 0x08fe, 1},
{0x0900, 0x0903, 1},
{0x093a, 0x093c, 1},
{0x093e, 0x094f, 1},
@@ -1302,7 +1394,7 @@ var _M = &RangeTable{
{0x1732, 0x1734, 1},
{0x1752, 0x1753, 1},
{0x1772, 0x1773, 1},
- {0x17b6, 0x17d3, 1},
+ {0x17b4, 0x17d3, 1},
{0x17dd, 0x180b, 46},
{0x180c, 0x180d, 1},
{0x18a9, 0x1920, 119},
@@ -1318,12 +1410,13 @@ var _M = &RangeTable{
{0x1b34, 0x1b44, 1},
{0x1b6b, 0x1b73, 1},
{0x1b80, 0x1b82, 1},
- {0x1ba1, 0x1baa, 1},
+ {0x1ba1, 0x1bad, 1},
{0x1be6, 0x1bf3, 1},
{0x1c24, 0x1c37, 1},
{0x1cd0, 0x1cd2, 1},
{0x1cd4, 0x1ce8, 1},
{0x1ced, 0x1cf2, 5},
+ {0x1cf3, 0x1cf4, 1},
{0x1dc0, 0x1de6, 1},
{0x1dfc, 0x1dff, 1},
{0x20d0, 0x20f0, 1},
@@ -1333,11 +1426,11 @@ var _M = &RangeTable{
{0x302a, 0x302f, 1},
{0x3099, 0x309a, 1},
{0xa66f, 0xa672, 1},
- {0xa67c, 0xa67d, 1},
- {0xa6f0, 0xa6f1, 1},
- {0xa802, 0xa806, 4},
- {0xa80b, 0xa823, 24},
- {0xa824, 0xa827, 1},
+ {0xa674, 0xa67d, 1},
+ {0xa69f, 0xa6f0, 81},
+ {0xa6f1, 0xa802, 273},
+ {0xa806, 0xa80b, 5},
+ {0xa823, 0xa827, 1},
{0xa880, 0xa881, 1},
{0xa8b4, 0xa8c4, 1},
{0xa8e0, 0xa8f1, 1},
@@ -1352,8 +1445,10 @@ var _M = &RangeTable{
{0xaab3, 0xaab4, 1},
{0xaab7, 0xaab8, 1},
{0xaabe, 0xaabf, 1},
- {0xaac1, 0xabe3, 290},
- {0xabe4, 0xabea, 1},
+ {0xaac1, 0xaaeb, 42},
+ {0xaaec, 0xaaef, 1},
+ {0xaaf5, 0xaaf6, 1},
+ {0xabe3, 0xabea, 1},
{0xabec, 0xabed, 1},
{0xfb1e, 0xfe00, 738},
{0xfe01, 0xfe0f, 1},
@@ -1370,6 +1465,13 @@ var _M = &RangeTable{
{0x11038, 0x11046, 1},
{0x11080, 0x11082, 1},
{0x110b0, 0x110ba, 1},
+ {0x11100, 0x11102, 1},
+ {0x11127, 0x11134, 1},
+ {0x11180, 0x11182, 1},
+ {0x111b3, 0x111c0, 1},
+ {0x116ab, 0x116b7, 1},
+ {0x16f51, 0x16f7e, 1},
+ {0x16f8f, 0x16f92, 1},
{0x1d165, 0x1d169, 1},
{0x1d16d, 0x1d172, 1},
{0x1d17b, 0x1d182, 1},
@@ -1454,17 +1556,19 @@ var _Mc = &RangeTable{
{0x1b43, 0x1b44, 1},
{0x1b82, 0x1ba1, 31},
{0x1ba6, 0x1ba7, 1},
- {0x1baa, 0x1be7, 61},
+ {0x1baa, 0x1bac, 2},
+ {0x1bad, 0x1be7, 58},
{0x1bea, 0x1bec, 1},
{0x1bee, 0x1bf2, 4},
{0x1bf3, 0x1c24, 49},
{0x1c25, 0x1c2b, 1},
{0x1c34, 0x1c35, 1},
{0x1ce1, 0x1cf2, 17},
- {0xa823, 0xa824, 1},
- {0xa827, 0xa880, 89},
- {0xa881, 0xa8b4, 51},
- {0xa8b5, 0xa8c3, 1},
+ {0x1cf3, 0x302e, 4923},
+ {0x302f, 0xa823, 30708},
+ {0xa824, 0xa827, 3},
+ {0xa880, 0xa881, 1},
+ {0xa8b4, 0xa8c3, 1},
{0xa952, 0xa953, 1},
{0xa983, 0xa9b4, 49},
{0xa9b5, 0xa9ba, 5},
@@ -1473,6 +1577,8 @@ var _Mc = &RangeTable{
{0xaa2f, 0xaa30, 1},
{0xaa33, 0xaa34, 1},
{0xaa4d, 0xaa7b, 46},
+ {0xaaeb, 0xaaee, 3},
+ {0xaaef, 0xaaf5, 6},
{0xabe3, 0xabe4, 1},
{0xabe6, 0xabe7, 1},
{0xabe9, 0xabea, 1},
@@ -1483,6 +1589,12 @@ var _Mc = &RangeTable{
{0x11002, 0x11082, 128},
{0x110b0, 0x110b2, 1},
{0x110b7, 0x110b8, 1},
+ {0x1112c, 0x11182, 86},
+ {0x111b3, 0x111b5, 1},
+ {0x111bf, 0x111c0, 1},
+ {0x116ac, 0x116ae, 2},
+ {0x116af, 0x116b6, 7},
+ {0x16f51, 0x16f7e, 1},
{0x1d165, 0x1d166, 1},
{0x1d16d, 0x1d172, 1},
},
@@ -1521,6 +1633,7 @@ var _Mn = &RangeTable{
{0x0825, 0x0827, 1},
{0x0829, 0x082d, 1},
{0x0859, 0x085b, 1},
+ {0x08e4, 0x08fe, 1},
{0x0900, 0x0902, 1},
{0x093a, 0x093c, 2},
{0x0941, 0x0948, 1},
@@ -1592,6 +1705,7 @@ var _Mn = &RangeTable{
{0x1732, 0x1734, 1},
{0x1752, 0x1753, 1},
{0x1772, 0x1773, 1},
+ {0x17b4, 0x17b5, 1},
{0x17b7, 0x17bd, 1},
{0x17c6, 0x17c9, 3},
{0x17ca, 0x17d3, 1},
@@ -1617,16 +1731,17 @@ var _Mn = &RangeTable{
{0x1b80, 0x1b81, 1},
{0x1ba2, 0x1ba5, 1},
{0x1ba8, 0x1ba9, 1},
- {0x1be6, 0x1be8, 2},
- {0x1be9, 0x1bed, 4},
- {0x1bef, 0x1bf1, 1},
+ {0x1bab, 0x1be6, 59},
+ {0x1be8, 0x1be9, 1},
+ {0x1bed, 0x1bef, 2},
+ {0x1bf0, 0x1bf1, 1},
{0x1c2c, 0x1c33, 1},
{0x1c36, 0x1c37, 1},
{0x1cd0, 0x1cd2, 1},
{0x1cd4, 0x1ce0, 1},
{0x1ce2, 0x1ce8, 1},
- {0x1ced, 0x1dc0, 211},
- {0x1dc1, 0x1de6, 1},
+ {0x1ced, 0x1cf4, 7},
+ {0x1dc0, 0x1de6, 1},
{0x1dfc, 0x1dff, 1},
{0x20d0, 0x20dc, 1},
{0x20e1, 0x20e5, 4},
@@ -1634,10 +1749,11 @@ var _Mn = &RangeTable{
{0x2cef, 0x2cf1, 1},
{0x2d7f, 0x2de0, 97},
{0x2de1, 0x2dff, 1},
- {0x302a, 0x302f, 1},
+ {0x302a, 0x302d, 1},
{0x3099, 0x309a, 1},
- {0xa66f, 0xa67c, 13},
- {0xa67d, 0xa6f0, 115},
+ {0xa66f, 0xa674, 5},
+ {0xa675, 0xa67d, 1},
+ {0xa69f, 0xa6f0, 81},
{0xa6f1, 0xa802, 273},
{0xa806, 0xa80b, 5},
{0xa825, 0xa826, 1},
@@ -1657,10 +1773,11 @@ var _Mn = &RangeTable{
{0xaab3, 0xaab4, 1},
{0xaab7, 0xaab8, 1},
{0xaabe, 0xaabf, 1},
- {0xaac1, 0xabe5, 292},
- {0xabe8, 0xabed, 5},
- {0xfb1e, 0xfe00, 738},
- {0xfe01, 0xfe0f, 1},
+ {0xaac1, 0xaaec, 43},
+ {0xaaed, 0xaaf6, 9},
+ {0xabe5, 0xabe8, 3},
+ {0xabed, 0xfb1e, 20273},
+ {0xfe00, 0xfe0f, 1},
{0xfe20, 0xfe26, 1},
},
R32: []Range32{
@@ -1674,6 +1791,15 @@ var _Mn = &RangeTable{
{0x11080, 0x11081, 1},
{0x110b3, 0x110b6, 1},
{0x110b9, 0x110ba, 1},
+ {0x11100, 0x11102, 1},
+ {0x11127, 0x1112b, 1},
+ {0x1112d, 0x11134, 1},
+ {0x11180, 0x11181, 1},
+ {0x111b6, 0x111be, 1},
+ {0x116ab, 0x116ad, 2},
+ {0x116b0, 0x116b5, 1},
+ {0x116b7, 0x16f8f, 22744},
+ {0x16f90, 0x16f92, 1},
{0x1d167, 0x1d169, 1},
{0x1d17b, 0x1d182, 1},
{0x1d185, 0x1d18b, 1},
@@ -1735,6 +1861,7 @@ var _N = &RangeTable{
{0x3038, 0x303a, 1},
{0x3192, 0x3195, 1},
{0x3220, 0x3229, 1},
+ {0x3248, 0x324f, 1},
{0x3251, 0x325f, 1},
{0x3280, 0x3289, 1},
{0x32b1, 0x32bf, 1},
@@ -1764,11 +1891,16 @@ var _N = &RangeTable{
{0x10b78, 0x10b7f, 1},
{0x10e60, 0x10e7e, 1},
{0x11052, 0x1106f, 1},
+ {0x110f0, 0x110f9, 1},
+ {0x11136, 0x1113f, 1},
+ {0x111d0, 0x111d9, 1},
+ {0x116c0, 0x116c9, 1},
{0x12400, 0x12462, 1},
{0x1d360, 0x1d371, 1},
{0x1d7ce, 0x1d7ff, 1},
{0x1f100, 0x1f10a, 1},
},
+ LatinOffset: 4,
}
var _Nd = &RangeTable{
@@ -1812,8 +1944,13 @@ var _Nd = &RangeTable{
R32: []Range32{
{0x104a0, 0x104a9, 1},
{0x11066, 0x1106f, 1},
+ {0x110f0, 0x110f9, 1},
+ {0x11136, 0x1113f, 1},
+ {0x111d0, 0x111d9, 1},
+ {0x116c0, 0x116c9, 1},
{0x1d7ce, 0x1d7ff, 1},
},
+ LatinOffset: 1,
}
var _Nl = &RangeTable{
@@ -1858,6 +1995,7 @@ var _No = &RangeTable{
{0x2cfd, 0x3192, 1173},
{0x3193, 0x3195, 1},
{0x3220, 0x3229, 1},
+ {0x3248, 0x324f, 1},
{0x3251, 0x325f, 1},
{0x3280, 0x3289, 1},
{0x32b1, 0x32bf, 1},
@@ -1879,6 +2017,7 @@ var _No = &RangeTable{
{0x1d360, 0x1d371, 1},
{0x1f100, 0x1f10a, 1},
},
+ LatinOffset: 3,
}
var _P = &RangeTable{
@@ -1891,7 +2030,8 @@ var _P = &RangeTable{
{0x005b, 0x005d, 1},
{0x005f, 0x007b, 28},
{0x007d, 0x00a1, 36},
- {0x00ab, 0x00b7, 12},
+ {0x00a7, 0x00ab, 4},
+ {0x00b6, 0x00b7, 1},
{0x00bb, 0x00bf, 4},
{0x037e, 0x0387, 9},
{0x055a, 0x055f, 1},
@@ -1910,16 +2050,18 @@ var _P = &RangeTable{
{0x0830, 0x083e, 1},
{0x085e, 0x0964, 262},
{0x0965, 0x0970, 11},
- {0x0df4, 0x0e4f, 91},
- {0x0e5a, 0x0e5b, 1},
- {0x0f04, 0x0f12, 1},
- {0x0f3a, 0x0f3d, 1},
+ {0x0af0, 0x0df4, 772},
+ {0x0e4f, 0x0e5a, 11},
+ {0x0e5b, 0x0f04, 169},
+ {0x0f05, 0x0f12, 1},
+ {0x0f14, 0x0f3a, 38},
+ {0x0f3b, 0x0f3d, 1},
{0x0f85, 0x0fd0, 75},
{0x0fd1, 0x0fd4, 1},
{0x0fd9, 0x0fda, 1},
{0x104a, 0x104f, 1},
- {0x10fb, 0x1361, 614},
- {0x1362, 0x1368, 1},
+ {0x10fb, 0x1360, 613},
+ {0x1361, 0x1368, 1},
{0x1400, 0x166d, 621},
{0x166e, 0x169b, 45},
{0x169c, 0x16eb, 79},
@@ -1936,6 +2078,7 @@ var _P = &RangeTable{
{0x1bfc, 0x1bff, 1},
{0x1c3b, 0x1c3f, 1},
{0x1c7e, 0x1c7f, 1},
+ {0x1cc0, 0x1cc7, 1},
{0x1cd3, 0x2010, 829},
{0x2011, 0x2027, 1},
{0x2030, 0x2043, 1},
@@ -1954,7 +2097,7 @@ var _P = &RangeTable{
{0x2cfe, 0x2cff, 1},
{0x2d70, 0x2e00, 144},
{0x2e01, 0x2e2e, 1},
- {0x2e30, 0x2e31, 1},
+ {0x2e30, 0x2e3b, 1},
{0x3001, 0x3003, 1},
{0x3008, 0x3011, 1},
{0x3014, 0x301f, 1},
@@ -1973,6 +2116,7 @@ var _P = &RangeTable{
{0xa9de, 0xa9df, 1},
{0xaa5c, 0xaa5f, 1},
{0xaade, 0xaadf, 1},
+ {0xaaf0, 0xaaf1, 1},
{0xabeb, 0xfd3e, 20819},
{0xfd3f, 0xfe10, 209},
{0xfe11, 0xfe19, 1},
@@ -1991,7 +2135,7 @@ var _P = &RangeTable{
{0xff60, 0xff65, 1},
},
R32: []Range32{
- {0x10100, 0x10101, 1},
+ {0x10100, 0x10102, 1},
{0x1039f, 0x103d0, 49},
{0x10857, 0x1091f, 200},
{0x1093f, 0x10a50, 273},
@@ -2001,8 +2145,11 @@ var _P = &RangeTable{
{0x11047, 0x1104d, 1},
{0x110bb, 0x110bc, 1},
{0x110be, 0x110c1, 1},
+ {0x11140, 0x11143, 1},
+ {0x111c5, 0x111c8, 1},
{0x12470, 0x12473, 1},
},
+ LatinOffset: 11,
}
var _Pc = &RangeTable{
@@ -2022,6 +2169,7 @@ var _Pd = &RangeTable{
{0x1806, 0x2010, 2058},
{0x2011, 0x2015, 1},
{0x2e17, 0x2e1a, 3},
+ {0x2e3a, 0x2e3b, 1},
{0x301c, 0x3030, 20},
{0x30a0, 0xfe31, 52625},
{0xfe32, 0xfe58, 38},
@@ -2053,6 +2201,7 @@ var _Pe = &RangeTable{
{0xff09, 0xff3d, 52},
{0xff5d, 0xff63, 3},
},
+ LatinOffset: 1,
}
var _Pf = &RangeTable{
@@ -2084,7 +2233,8 @@ var _Po = &RangeTable{
{0x002f, 0x003a, 11},
{0x003b, 0x003f, 4},
{0x0040, 0x005c, 28},
- {0x00a1, 0x00b7, 22},
+ {0x00a1, 0x00a7, 6},
+ {0x00b6, 0x00b7, 1},
{0x00bf, 0x037e, 703},
{0x0387, 0x055a, 467},
{0x055b, 0x055f, 1},
@@ -2102,15 +2252,16 @@ var _Po = &RangeTable{
{0x0830, 0x083e, 1},
{0x085e, 0x0964, 262},
{0x0965, 0x0970, 11},
- {0x0df4, 0x0e4f, 91},
- {0x0e5a, 0x0e5b, 1},
- {0x0f04, 0x0f12, 1},
- {0x0f85, 0x0fd0, 75},
- {0x0fd1, 0x0fd4, 1},
+ {0x0af0, 0x0df4, 772},
+ {0x0e4f, 0x0e5a, 11},
+ {0x0e5b, 0x0f04, 169},
+ {0x0f05, 0x0f12, 1},
+ {0x0f14, 0x0f85, 113},
+ {0x0fd0, 0x0fd4, 1},
{0x0fd9, 0x0fda, 1},
{0x104a, 0x104f, 1},
- {0x10fb, 0x1361, 614},
- {0x1362, 0x1368, 1},
+ {0x10fb, 0x1360, 613},
+ {0x1361, 0x1368, 1},
{0x166d, 0x166e, 1},
{0x16eb, 0x16ed, 1},
{0x1735, 0x1736, 1},
@@ -2126,6 +2277,7 @@ var _Po = &RangeTable{
{0x1bfc, 0x1bff, 1},
{0x1c3b, 0x1c3f, 1},
{0x1c7e, 0x1c7f, 1},
+ {0x1cc0, 0x1cc7, 1},
{0x1cd3, 0x2016, 835},
{0x2017, 0x2020, 9},
{0x2021, 0x2027, 1},
@@ -2146,7 +2298,7 @@ var _Po = &RangeTable{
{0x2e1b, 0x2e1e, 3},
{0x2e1f, 0x2e2a, 11},
{0x2e2b, 0x2e2e, 1},
- {0x2e30, 0x2e31, 1},
+ {0x2e30, 0x2e39, 1},
{0x3001, 0x3003, 1},
{0x303d, 0x30fb, 190},
{0xa4fe, 0xa4ff, 1},
@@ -2162,6 +2314,7 @@ var _Po = &RangeTable{
{0xa9de, 0xa9df, 1},
{0xaa5c, 0xaa5f, 1},
{0xaade, 0xaadf, 1},
+ {0xaaf0, 0xaaf1, 1},
{0xabeb, 0xfe10, 21029},
{0xfe11, 0xfe16, 1},
{0xfe19, 0xfe30, 23},
@@ -2183,17 +2336,21 @@ var _Po = &RangeTable{
},
R32: []Range32{
{0x10100, 0x10100, 1},
- {0x10101, 0x1039f, 670},
- {0x103d0, 0x10857, 1159},
- {0x1091f, 0x1093f, 32},
- {0x10a50, 0x10a58, 1},
+ {0x10101, 0x10102, 1},
+ {0x1039f, 0x103d0, 49},
+ {0x10857, 0x1091f, 200},
+ {0x1093f, 0x10a50, 273},
+ {0x10a51, 0x10a58, 1},
{0x10a7f, 0x10b39, 186},
{0x10b3a, 0x10b3f, 1},
{0x11047, 0x1104d, 1},
{0x110bb, 0x110bc, 1},
{0x110be, 0x110c1, 1},
+ {0x11140, 0x11143, 1},
+ {0x111c5, 0x111c8, 1},
{0x12470, 0x12473, 1},
},
+ LatinOffset: 8,
}
var _Ps = &RangeTable{
@@ -2222,6 +2379,7 @@ var _Ps = &RangeTable{
{0xff5b, 0xff5f, 4},
{0xff62, 0xff62, 1},
},
+ LatinOffset: 1,
}
var _S = &RangeTable{
@@ -2230,10 +2388,11 @@ var _S = &RangeTable{
{0x003c, 0x003e, 1},
{0x005e, 0x0060, 2},
{0x007c, 0x007e, 2},
- {0x00a2, 0x00a9, 1},
+ {0x00a2, 0x00a6, 1},
+ {0x00a8, 0x00a9, 1},
{0x00ac, 0x00ae, 2},
{0x00af, 0x00b1, 1},
- {0x00b4, 0x00b8, 2},
+ {0x00b4, 0x00b8, 4},
{0x00d7, 0x00f7, 32},
{0x02c2, 0x02c5, 1},
{0x02d2, 0x02df, 1},
@@ -2242,8 +2401,8 @@ var _S = &RangeTable{
{0x02f0, 0x02ff, 1},
{0x0375, 0x0384, 15},
{0x0385, 0x03f6, 113},
- {0x0482, 0x0606, 388},
- {0x0607, 0x0608, 1},
+ {0x0482, 0x058f, 269},
+ {0x0606, 0x0608, 1},
{0x060b, 0x060e, 3},
{0x060f, 0x06de, 207},
{0x06e9, 0x06fd, 20},
@@ -2255,7 +2414,8 @@ var _S = &RangeTable{
{0x0c7f, 0x0d79, 250},
{0x0e3f, 0x0f01, 194},
{0x0f02, 0x0f03, 1},
- {0x0f13, 0x0f17, 1},
+ {0x0f13, 0x0f15, 2},
+ {0x0f16, 0x0f17, 1},
{0x0f1a, 0x0f1f, 1},
{0x0f34, 0x0f38, 2},
{0x0fbe, 0x0fc5, 1},
@@ -2263,8 +2423,7 @@ var _S = &RangeTable{
{0x0fce, 0x0fcf, 1},
{0x0fd5, 0x0fd8, 1},
{0x109e, 0x109f, 1},
- {0x1360, 0x1390, 48},
- {0x1391, 0x1399, 1},
+ {0x1390, 0x1399, 1},
{0x17db, 0x1940, 357},
{0x19de, 0x19ff, 1},
{0x1b61, 0x1b6a, 1},
@@ -2278,7 +2437,7 @@ var _S = &RangeTable{
{0x2044, 0x2052, 14},
{0x207a, 0x207c, 1},
{0x208a, 0x208c, 1},
- {0x20a0, 0x20b9, 1},
+ {0x20a0, 0x20ba, 1},
{0x2100, 0x2101, 1},
{0x2103, 0x2106, 1},
{0x2108, 0x2109, 1},
@@ -2299,9 +2458,7 @@ var _S = &RangeTable{
{0x2500, 0x26ff, 1},
{0x2701, 0x2767, 1},
{0x2794, 0x27c4, 1},
- {0x27c7, 0x27ca, 1},
- {0x27cc, 0x27ce, 2},
- {0x27cf, 0x27e5, 1},
+ {0x27c7, 0x27e5, 1},
{0x27f0, 0x2982, 1},
{0x2999, 0x29d7, 1},
{0x29dc, 0x29fb, 1},
@@ -2321,8 +2478,9 @@ var _S = &RangeTable{
{0x3196, 0x319f, 1},
{0x31c0, 0x31e3, 1},
{0x3200, 0x321e, 1},
- {0x322a, 0x3250, 1},
- {0x3260, 0x327f, 1},
+ {0x322a, 0x3247, 1},
+ {0x3250, 0x3260, 16},
+ {0x3261, 0x327f, 1},
{0x328a, 0x32b0, 1},
{0x32c0, 0x32fe, 1},
{0x3300, 0x33ff, 1},
@@ -2349,8 +2507,7 @@ var _S = &RangeTable{
{0xfffc, 0xfffd, 1},
},
R32: []Range32{
- {0x10102, 0x10137, 53},
- {0x10138, 0x1013f, 1},
+ {0x10137, 0x1013f, 1},
{0x10179, 0x10189, 1},
{0x10190, 0x1019b, 1},
{0x101d0, 0x101fc, 1},
@@ -2369,6 +2526,7 @@ var _S = &RangeTable{
{0x1d735, 0x1d74f, 26},
{0x1d76f, 0x1d789, 26},
{0x1d7a9, 0x1d7c3, 26},
+ {0x1eef0, 0x1eef1, 1},
{0x1f000, 0x1f02b, 1},
{0x1f030, 0x1f093, 1},
{0x1f0a0, 0x1f0ae, 1},
@@ -2376,7 +2534,7 @@ var _S = &RangeTable{
{0x1f0c1, 0x1f0cf, 1},
{0x1f0d1, 0x1f0df, 1},
{0x1f110, 0x1f12e, 1},
- {0x1f130, 0x1f169, 1},
+ {0x1f130, 0x1f16b, 1},
{0x1f170, 0x1f19a, 1},
{0x1f1e6, 0x1f202, 1},
{0x1f210, 0x1f23a, 1},
@@ -2394,37 +2552,32 @@ var _S = &RangeTable{
{0x1f443, 0x1f4f7, 1},
{0x1f4f9, 0x1f4fc, 1},
{0x1f500, 0x1f53d, 1},
+ {0x1f540, 0x1f543, 1},
{0x1f550, 0x1f567, 1},
- {0x1f5fb, 0x1f5ff, 1},
- {0x1f601, 0x1f610, 1},
- {0x1f612, 0x1f614, 1},
- {0x1f616, 0x1f61c, 2},
- {0x1f61d, 0x1f61e, 1},
- {0x1f620, 0x1f625, 1},
- {0x1f628, 0x1f62b, 1},
- {0x1f62d, 0x1f630, 3},
- {0x1f631, 0x1f633, 1},
- {0x1f635, 0x1f640, 1},
+ {0x1f5fb, 0x1f640, 1},
{0x1f645, 0x1f64f, 1},
{0x1f680, 0x1f6c5, 1},
{0x1f700, 0x1f773, 1},
},
+ LatinOffset: 10,
}
var _Sc = &RangeTable{
R16: []Range16{
{0x0024, 0x00a2, 126},
{0x00a3, 0x00a5, 1},
- {0x060b, 0x09f2, 999},
- {0x09f3, 0x09fb, 8},
- {0x0af1, 0x0bf9, 264},
- {0x0e3f, 0x17db, 2460},
- {0x20a0, 0x20b9, 1},
+ {0x058f, 0x060b, 124},
+ {0x09f2, 0x09f3, 1},
+ {0x09fb, 0x0af1, 246},
+ {0x0bf9, 0x0e3f, 582},
+ {0x17db, 0x20a0, 2245},
+ {0x20a1, 0x20ba, 1},
{0xa838, 0xfdfc, 21956},
{0xfe69, 0xff04, 155},
{0xffe0, 0xffe1, 1},
{0xffe5, 0xffe6, 1},
},
+ LatinOffset: 2,
}
var _Sk = &RangeTable{
@@ -2452,6 +2605,7 @@ var _Sk = &RangeTable{
{0xff3e, 0xff40, 2},
{0xffe3, 0xffe3, 1},
},
+ LatinOffset: 3,
}
var _Sm = &RangeTable{
@@ -2485,9 +2639,7 @@ var _Sm = &RangeTable{
{0x25f8, 0x25ff, 1},
{0x266f, 0x27c0, 337},
{0x27c1, 0x27c4, 1},
- {0x27c7, 0x27ca, 1},
- {0x27cc, 0x27ce, 2},
- {0x27cf, 0x27e5, 1},
+ {0x27c7, 0x27e5, 1},
{0x27f0, 0x27ff, 1},
{0x2900, 0x2982, 1},
{0x2999, 0x29d7, 1},
@@ -2509,14 +2661,15 @@ var _Sm = &RangeTable{
{0x1d735, 0x1d74f, 26},
{0x1d76f, 0x1d789, 26},
{0x1d7a9, 0x1d7c3, 26},
+ {0x1eef0, 0x1eef1, 1},
},
+ LatinOffset: 5,
}
var _So = &RangeTable{
R16: []Range16{
- {0x00a6, 0x00a7, 1},
- {0x00a9, 0x00ae, 5},
- {0x00b0, 0x00b6, 6},
+ {0x00a6, 0x00a9, 3},
+ {0x00ae, 0x00b0, 2},
{0x0482, 0x060e, 396},
{0x060f, 0x06de, 207},
{0x06e9, 0x06fd, 20},
@@ -2526,7 +2679,8 @@ var _So = &RangeTable{
{0x0bfa, 0x0c7f, 133},
{0x0d79, 0x0f01, 392},
{0x0f02, 0x0f03, 1},
- {0x0f13, 0x0f17, 1},
+ {0x0f13, 0x0f15, 2},
+ {0x0f16, 0x0f17, 1},
{0x0f1a, 0x0f1f, 1},
{0x0f34, 0x0f38, 2},
{0x0fbe, 0x0fc5, 1},
@@ -2534,8 +2688,7 @@ var _So = &RangeTable{
{0x0fce, 0x0fcf, 1},
{0x0fd5, 0x0fd8, 1},
{0x109e, 0x109f, 1},
- {0x1360, 0x1390, 48},
- {0x1391, 0x1399, 1},
+ {0x1390, 0x1399, 1},
{0x1940, 0x19de, 158},
{0x19df, 0x19ff, 1},
{0x1b61, 0x1b6a, 1},
@@ -2594,8 +2747,9 @@ var _So = &RangeTable{
{0x3196, 0x319f, 1},
{0x31c0, 0x31e3, 1},
{0x3200, 0x321e, 1},
- {0x322a, 0x3250, 1},
- {0x3260, 0x327f, 1},
+ {0x322a, 0x3247, 1},
+ {0x3250, 0x3260, 16},
+ {0x3261, 0x327f, 1},
{0x328a, 0x32b0, 1},
{0x32c0, 0x32fe, 1},
{0x3300, 0x33ff, 1},
@@ -2611,8 +2765,8 @@ var _So = &RangeTable{
{0xfffd, 0xfffd, 1},
},
R32: []Range32{
- {0x10102, 0x10102, 1},
- {0x10137, 0x1013f, 1},
+ {0x10137, 0x10137, 1},
+ {0x10138, 0x1013f, 1},
{0x10179, 0x10189, 1},
{0x10190, 0x1019b, 1},
{0x101d0, 0x101fc, 1},
@@ -2633,7 +2787,7 @@ var _So = &RangeTable{
{0x1f0c1, 0x1f0cf, 1},
{0x1f0d1, 0x1f0df, 1},
{0x1f110, 0x1f12e, 1},
- {0x1f130, 0x1f169, 1},
+ {0x1f130, 0x1f16b, 1},
{0x1f170, 0x1f19a, 1},
{0x1f1e6, 0x1f202, 1},
{0x1f210, 0x1f23a, 1},
@@ -2651,21 +2805,14 @@ var _So = &RangeTable{
{0x1f443, 0x1f4f7, 1},
{0x1f4f9, 0x1f4fc, 1},
{0x1f500, 0x1f53d, 1},
+ {0x1f540, 0x1f543, 1},
{0x1f550, 0x1f567, 1},
- {0x1f5fb, 0x1f5ff, 1},
- {0x1f601, 0x1f610, 1},
- {0x1f612, 0x1f614, 1},
- {0x1f616, 0x1f61c, 2},
- {0x1f61d, 0x1f61e, 1},
- {0x1f620, 0x1f625, 1},
- {0x1f628, 0x1f62b, 1},
- {0x1f62d, 0x1f630, 3},
- {0x1f631, 0x1f633, 1},
- {0x1f635, 0x1f640, 1},
+ {0x1f5fb, 0x1f640, 1},
{0x1f645, 0x1f64f, 1},
{0x1f680, 0x1f6c5, 1},
{0x1f700, 0x1f773, 1},
},
+ LatinOffset: 2,
}
var _Z = &RangeTable{
@@ -2677,6 +2824,7 @@ var _Z = &RangeTable{
{0x202f, 0x205f, 48},
{0x3000, 0x3000, 1},
},
+ LatinOffset: 1,
}
var _Zl = &RangeTable{
@@ -2699,6 +2847,7 @@ var _Zs = &RangeTable{
{0x202f, 0x205f, 48},
{0x3000, 0x3000, 1},
},
+ LatinOffset: 1,
}
// These variables have type *RangeTable.
@@ -2753,7 +2902,7 @@ var (
)
// Generated by running
-// maketables --scripts=all --url=http://www.unicode.org/Public/6.0.0/ucd/
+// maketables --scripts=all --url=http://www.unicode.org/Public/6.2.0/ucd/
// DO NOT EDIT
// Scripts is the set of Unicode script tables.
@@ -2772,6 +2921,7 @@ var Scripts = map[string]*RangeTable{
"Buhid": Buhid,
"Canadian_Aboriginal": Canadian_Aboriginal,
"Carian": Carian,
+ "Chakma": Chakma,
"Cham": Cham,
"Cherokee": Cherokee,
"Common": Common,
@@ -2816,6 +2966,9 @@ var Scripts = map[string]*RangeTable{
"Malayalam": Malayalam,
"Mandaic": Mandaic,
"Meetei_Mayek": Meetei_Mayek,
+ "Meroitic_Cursive": Meroitic_Cursive,
+ "Meroitic_Hieroglyphs": Meroitic_Hieroglyphs,
+ "Miao": Miao,
"Mongolian": Mongolian,
"Myanmar": Myanmar,
"New_Tai_Lue": New_Tai_Lue,
@@ -2834,8 +2987,10 @@ var Scripts = map[string]*RangeTable{
"Runic": Runic,
"Samaritan": Samaritan,
"Saurashtra": Saurashtra,
+ "Sharada": Sharada,
"Shavian": Shavian,
"Sinhala": Sinhala,
+ "Sora_Sompeng": Sora_Sompeng,
"Sundanese": Sundanese,
"Syloti_Nagri": Syloti_Nagri,
"Syriac": Syriac,
@@ -2844,6 +2999,7 @@ var Scripts = map[string]*RangeTable{
"Tai_Le": Tai_Le,
"Tai_Tham": Tai_Tham,
"Tai_Viet": Tai_Viet,
+ "Takri": Takri,
"Tamil": Tamil,
"Telugu": Telugu,
"Thaana": Thaana,
@@ -2857,17 +3013,20 @@ var Scripts = map[string]*RangeTable{
var _Arabic = &RangeTable{
R16: []Range16{
- {0x0600, 0x0603, 1},
+ {0x0600, 0x0604, 1},
{0x0606, 0x060b, 1},
{0x060d, 0x061a, 1},
{0x061e, 0x061e, 1},
{0x0620, 0x063f, 1},
{0x0641, 0x064a, 1},
- {0x0656, 0x065e, 1},
+ {0x0656, 0x065f, 1},
{0x066a, 0x066f, 1},
{0x0671, 0x06dc, 1},
{0x06de, 0x06ff, 1},
{0x0750, 0x077f, 1},
+ {0x08a0, 0x08a0, 1},
+ {0x08a2, 0x08ac, 1},
+ {0x08e4, 0x08fe, 1},
{0xfb50, 0xfbc1, 1},
{0xfbd3, 0xfd3d, 1},
{0xfd50, 0xfd8f, 1},
@@ -2878,6 +3037,40 @@ var _Arabic = &RangeTable{
},
R32: []Range32{
{0x10e60, 0x10e7e, 1},
+ {0x1ee00, 0x1ee03, 1},
+ {0x1ee05, 0x1ee1f, 1},
+ {0x1ee21, 0x1ee22, 1},
+ {0x1ee24, 0x1ee24, 1},
+ {0x1ee27, 0x1ee27, 1},
+ {0x1ee29, 0x1ee32, 1},
+ {0x1ee34, 0x1ee37, 1},
+ {0x1ee39, 0x1ee39, 1},
+ {0x1ee3b, 0x1ee3b, 1},
+ {0x1ee42, 0x1ee42, 1},
+ {0x1ee47, 0x1ee47, 1},
+ {0x1ee49, 0x1ee49, 1},
+ {0x1ee4b, 0x1ee4b, 1},
+ {0x1ee4d, 0x1ee4f, 1},
+ {0x1ee51, 0x1ee52, 1},
+ {0x1ee54, 0x1ee54, 1},
+ {0x1ee57, 0x1ee57, 1},
+ {0x1ee59, 0x1ee59, 1},
+ {0x1ee5b, 0x1ee5b, 1},
+ {0x1ee5d, 0x1ee5d, 1},
+ {0x1ee5f, 0x1ee5f, 1},
+ {0x1ee61, 0x1ee62, 1},
+ {0x1ee64, 0x1ee64, 1},
+ {0x1ee67, 0x1ee6a, 1},
+ {0x1ee6c, 0x1ee72, 1},
+ {0x1ee74, 0x1ee77, 1},
+ {0x1ee79, 0x1ee7c, 1},
+ {0x1ee7e, 0x1ee7e, 1},
+ {0x1ee80, 0x1ee89, 1},
+ {0x1ee8b, 0x1ee9b, 1},
+ {0x1eea1, 0x1eea3, 1},
+ {0x1eea5, 0x1eea9, 1},
+ {0x1eeab, 0x1eebb, 1},
+ {0x1eef0, 0x1eef1, 1},
},
}
@@ -2887,6 +3080,7 @@ var _Armenian = &RangeTable{
{0x0559, 0x055f, 1},
{0x0561, 0x0587, 1},
{0x058a, 0x058a, 1},
+ {0x058f, 0x058f, 1},
{0xfb13, 0xfb17, 1},
},
}
@@ -2990,6 +3184,14 @@ var _Carian = &RangeTable{
},
}
+var _Chakma = &RangeTable{
+ R16: []Range16{},
+ R32: []Range32{
+ {0x11100, 0x11134, 1},
+ {0x11136, 0x11143, 1},
+ },
+}
+
var _Cham = &RangeTable{
R16: []Range16{
{0xaa00, 0xaa36, 1},
@@ -3029,7 +3231,6 @@ var _Common = &RangeTable{
{0x0660, 0x0669, 1},
{0x06dd, 0x06dd, 1},
{0x0964, 0x0965, 1},
- {0x0970, 0x0970, 1},
{0x0e3f, 0x0e3f, 1},
{0x0fd5, 0x0fd8, 1},
{0x10fb, 0x10fb, 1},
@@ -3040,13 +3241,14 @@ var _Common = &RangeTable{
{0x1cd3, 0x1cd3, 1},
{0x1ce1, 0x1ce1, 1},
{0x1ce9, 0x1cec, 1},
- {0x1cee, 0x1cf2, 1},
+ {0x1cee, 0x1cf3, 1},
+ {0x1cf5, 0x1cf6, 1},
{0x2000, 0x200b, 1},
{0x200e, 0x2064, 1},
{0x206a, 0x2070, 1},
{0x2074, 0x207e, 1},
{0x2080, 0x208e, 1},
- {0x20a0, 0x20b9, 1},
+ {0x20a0, 0x20ba, 1},
{0x2100, 0x2125, 1},
{0x2127, 0x2129, 1},
{0x212c, 0x2131, 1},
@@ -3057,12 +3259,10 @@ var _Common = &RangeTable{
{0x2400, 0x2426, 1},
{0x2440, 0x244a, 1},
{0x2460, 0x26ff, 1},
- {0x2701, 0x27ca, 1},
- {0x27cc, 0x27cc, 1},
- {0x27ce, 0x27ff, 1},
+ {0x2701, 0x27ff, 1},
{0x2900, 0x2b4c, 1},
{0x2b50, 0x2b59, 1},
- {0x2e00, 0x2e31, 1},
+ {0x2e00, 0x2e3b, 1},
{0x2ff0, 0x2ffb, 1},
{0x3000, 0x3004, 1},
{0x3006, 0x3006, 1},
@@ -3141,7 +3341,7 @@ var _Common = &RangeTable{
{0x1f0d1, 0x1f0df, 1},
{0x1f100, 0x1f10a, 1},
{0x1f110, 0x1f12e, 1},
- {0x1f130, 0x1f169, 1},
+ {0x1f130, 0x1f16b, 1},
{0x1f170, 0x1f19a, 1},
{0x1f1e6, 0x1f1ff, 1},
{0x1f201, 0x1f202, 1},
@@ -3160,31 +3360,22 @@ var _Common = &RangeTable{
{0x1f442, 0x1f4f7, 1},
{0x1f4f9, 0x1f4fc, 1},
{0x1f500, 0x1f53d, 1},
+ {0x1f540, 0x1f543, 1},
{0x1f550, 0x1f567, 1},
- {0x1f5fb, 0x1f5ff, 1},
- {0x1f601, 0x1f610, 1},
- {0x1f612, 0x1f614, 1},
- {0x1f616, 0x1f616, 1},
- {0x1f618, 0x1f618, 1},
- {0x1f61a, 0x1f61a, 1},
- {0x1f61c, 0x1f61e, 1},
- {0x1f620, 0x1f625, 1},
- {0x1f628, 0x1f62b, 1},
- {0x1f62d, 0x1f62d, 1},
- {0x1f630, 0x1f633, 1},
- {0x1f635, 0x1f640, 1},
+ {0x1f5fb, 0x1f640, 1},
{0x1f645, 0x1f64f, 1},
{0x1f680, 0x1f6c5, 1},
{0x1f700, 0x1f773, 1},
{0xe0001, 0xe0001, 1},
{0xe0020, 0xe007f, 1},
},
+ LatinOffset: 7,
}
var _Coptic = &RangeTable{
R16: []Range16{
{0x03e2, 0x03ef, 1},
- {0x2c80, 0x2cf1, 1},
+ {0x2c80, 0x2cf3, 1},
{0x2cf9, 0x2cff, 1},
},
}
@@ -3217,8 +3408,8 @@ var _Cyrillic = &RangeTable{
{0x1d2b, 0x1d2b, 1},
{0x1d78, 0x1d78, 1},
{0x2de0, 0x2dff, 1},
- {0xa640, 0xa673, 1},
- {0xa67c, 0xa697, 1},
+ {0xa640, 0xa697, 1},
+ {0xa69f, 0xa69f, 1},
},
}
@@ -3233,8 +3424,7 @@ var _Devanagari = &RangeTable{
R16: []Range16{
{0x0900, 0x0950, 1},
{0x0953, 0x0963, 1},
- {0x0966, 0x096f, 1},
- {0x0971, 0x0977, 1},
+ {0x0966, 0x0977, 1},
{0x0979, 0x097f, 1},
{0xa8e0, 0xa8fb, 1},
},
@@ -3287,9 +3477,13 @@ var _Ethiopic = &RangeTable{
var _Georgian = &RangeTable{
R16: []Range16{
{0x10a0, 0x10c5, 1},
+ {0x10c7, 0x10c7, 1},
+ {0x10cd, 0x10cd, 1},
{0x10d0, 0x10fa, 1},
- {0x10fc, 0x10fc, 1},
+ {0x10fc, 0x10ff, 1},
{0x2d00, 0x2d25, 1},
+ {0x2d27, 0x2d27, 1},
+ {0x2d2d, 0x2d2d, 1},
},
}
@@ -3361,8 +3555,7 @@ var _Gujarati = &RangeTable{
{0x0acb, 0x0acd, 1},
{0x0ad0, 0x0ad0, 1},
{0x0ae0, 0x0ae3, 1},
- {0x0ae6, 0x0aef, 1},
- {0x0af1, 0x0af1, 1},
+ {0x0ae6, 0x0af1, 1},
},
}
@@ -3397,9 +3590,8 @@ var _Han = &RangeTable{
{0x3021, 0x3029, 1},
{0x3038, 0x303b, 1},
{0x3400, 0x4db5, 1},
- {0x4e00, 0x9fcb, 1},
- {0xf900, 0xfa2d, 1},
- {0xfa30, 0xfa6d, 1},
+ {0x4e00, 0x9fcc, 1},
+ {0xf900, 0xfa6d, 1},
{0xfa70, 0xfad9, 1},
},
R32: []Range32{
@@ -3473,13 +3665,13 @@ var _Inherited = &RangeTable{
{0x0300, 0x036f, 1},
{0x0485, 0x0486, 1},
{0x064b, 0x0655, 1},
- {0x065f, 0x065f, 1},
{0x0670, 0x0670, 1},
{0x0951, 0x0952, 1},
{0x1cd0, 0x1cd2, 1},
{0x1cd4, 0x1ce0, 1},
{0x1ce2, 0x1ce8, 1},
{0x1ced, 0x1ced, 1},
+ {0x1cf4, 0x1cf4, 1},
{0x1dc0, 0x1de6, 1},
{0x1dfc, 0x1dff, 1},
{0x200c, 0x200d, 1},
@@ -3612,7 +3804,7 @@ var _Lao = &RangeTable{
{0x0ec6, 0x0ec6, 1},
{0x0ec8, 0x0ecd, 1},
{0x0ed0, 0x0ed9, 1},
- {0x0edc, 0x0edd, 1},
+ {0x0edc, 0x0edf, 1},
},
}
@@ -3642,13 +3834,14 @@ var _Latin = &RangeTable{
{0x2c60, 0x2c7f, 1},
{0xa722, 0xa787, 1},
{0xa78b, 0xa78e, 1},
- {0xa790, 0xa791, 1},
- {0xa7a0, 0xa7a9, 1},
- {0xa7fa, 0xa7ff, 1},
+ {0xa790, 0xa793, 1},
+ {0xa7a0, 0xa7aa, 1},
+ {0xa7f8, 0xa7ff, 1},
{0xfb00, 0xfb06, 1},
{0xff21, 0xff3a, 1},
{0xff41, 0xff5a, 1},
},
+ LatinOffset: 6,
}
var _Lepcha = &RangeTable{
@@ -3728,11 +3921,36 @@ var _Mandaic = &RangeTable{
var _Meetei_Mayek = &RangeTable{
R16: []Range16{
+ {0xaae0, 0xaaf6, 1},
{0xabc0, 0xabed, 1},
{0xabf0, 0xabf9, 1},
},
}
+var _Meroitic_Cursive = &RangeTable{
+ R16: []Range16{},
+ R32: []Range32{
+ {0x109a0, 0x109b7, 1},
+ {0x109be, 0x109bf, 1},
+ },
+}
+
+var _Meroitic_Hieroglyphs = &RangeTable{
+ R16: []Range16{},
+ R32: []Range32{
+ {0x10980, 0x1099f, 1},
+ },
+}
+
+var _Miao = &RangeTable{
+ R16: []Range16{},
+ R32: []Range32{
+ {0x16f00, 0x16f44, 1},
+ {0x16f50, 0x16f7e, 1},
+ {0x16f8f, 0x16f9f, 1},
+ },
+}
+
var _Mongolian = &RangeTable{
R16: []Range16{
{0x1800, 0x1801, 1},
@@ -3877,6 +4095,14 @@ var _Saurashtra = &RangeTable{
},
}
+var _Sharada = &RangeTable{
+ R16: []Range16{},
+ R32: []Range32{
+ {0x11180, 0x111c8, 1},
+ {0x111d0, 0x111d9, 1},
+ },
+}
+
var _Shavian = &RangeTable{
R16: []Range16{},
R32: []Range32{
@@ -3900,10 +4126,18 @@ var _Sinhala = &RangeTable{
},
}
+var _Sora_Sompeng = &RangeTable{
+ R16: []Range16{},
+ R32: []Range32{
+ {0x110d0, 0x110e8, 1},
+ {0x110f0, 0x110f9, 1},
+ },
+}
+
var _Sundanese = &RangeTable{
R16: []Range16{
- {0x1b80, 0x1baa, 1},
- {0x1bae, 0x1bb9, 1},
+ {0x1b80, 0x1bbf, 1},
+ {0x1cc0, 0x1cc7, 1},
},
}
@@ -3960,6 +4194,14 @@ var _Tai_Viet = &RangeTable{
},
}
+var _Takri = &RangeTable{
+ R16: []Range16{},
+ R32: []Range32{
+ {0x11680, 0x116b7, 1},
+ {0x116c0, 0x116c9, 1},
+ },
+}
+
var _Tamil = &RangeTable{
R16: []Range16{
{0x0b82, 0x0b83, 1},
@@ -4027,7 +4269,7 @@ var _Tibetan = &RangeTable{
var _Tifinagh = &RangeTable{
R16: []Range16{
- {0x2d30, 0x2d65, 1},
+ {0x2d30, 0x2d67, 1},
{0x2d6f, 0x2d70, 1},
{0x2d7f, 0x2d7f, 1},
},
@@ -4070,6 +4312,7 @@ var (
Buhid = _Buhid // Buhid is the set of Unicode characters in script Buhid.
Canadian_Aboriginal = _Canadian_Aboriginal // Canadian_Aboriginal is the set of Unicode characters in script Canadian_Aboriginal.
Carian = _Carian // Carian is the set of Unicode characters in script Carian.
+ Chakma = _Chakma // Chakma is the set of Unicode characters in script Chakma.
Cham = _Cham // Cham is the set of Unicode characters in script Cham.
Cherokee = _Cherokee // Cherokee is the set of Unicode characters in script Cherokee.
Common = _Common // Common is the set of Unicode characters in script Common.
@@ -4114,6 +4357,9 @@ var (
Malayalam = _Malayalam // Malayalam is the set of Unicode characters in script Malayalam.
Mandaic = _Mandaic // Mandaic is the set of Unicode characters in script Mandaic.
Meetei_Mayek = _Meetei_Mayek // Meetei_Mayek is the set of Unicode characters in script Meetei_Mayek.
+ Meroitic_Cursive = _Meroitic_Cursive // Meroitic_Cursive is the set of Unicode characters in script Meroitic_Cursive.
+ Meroitic_Hieroglyphs = _Meroitic_Hieroglyphs // Meroitic_Hieroglyphs is the set of Unicode characters in script Meroitic_Hieroglyphs.
+ Miao = _Miao // Miao is the set of Unicode characters in script Miao.
Mongolian = _Mongolian // Mongolian is the set of Unicode characters in script Mongolian.
Myanmar = _Myanmar // Myanmar is the set of Unicode characters in script Myanmar.
New_Tai_Lue = _New_Tai_Lue // New_Tai_Lue is the set of Unicode characters in script New_Tai_Lue.
@@ -4132,8 +4378,10 @@ var (
Runic = _Runic // Runic is the set of Unicode characters in script Runic.
Samaritan = _Samaritan // Samaritan is the set of Unicode characters in script Samaritan.
Saurashtra = _Saurashtra // Saurashtra is the set of Unicode characters in script Saurashtra.
+ Sharada = _Sharada // Sharada is the set of Unicode characters in script Sharada.
Shavian = _Shavian // Shavian is the set of Unicode characters in script Shavian.
Sinhala = _Sinhala // Sinhala is the set of Unicode characters in script Sinhala.
+ Sora_Sompeng = _Sora_Sompeng // Sora_Sompeng is the set of Unicode characters in script Sora_Sompeng.
Sundanese = _Sundanese // Sundanese is the set of Unicode characters in script Sundanese.
Syloti_Nagri = _Syloti_Nagri // Syloti_Nagri is the set of Unicode characters in script Syloti_Nagri.
Syriac = _Syriac // Syriac is the set of Unicode characters in script Syriac.
@@ -4142,6 +4390,7 @@ var (
Tai_Le = _Tai_Le // Tai_Le is the set of Unicode characters in script Tai_Le.
Tai_Tham = _Tai_Tham // Tai_Tham is the set of Unicode characters in script Tai_Tham.
Tai_Viet = _Tai_Viet // Tai_Viet is the set of Unicode characters in script Tai_Viet.
+ Takri = _Takri // Takri is the set of Unicode characters in script Takri.
Tamil = _Tamil // Tamil is the set of Unicode characters in script Tamil.
Telugu = _Telugu // Telugu is the set of Unicode characters in script Telugu.
Thaana = _Thaana // Thaana is the set of Unicode characters in script Thaana.
@@ -4154,7 +4403,7 @@ var (
)
// Generated by running
-// maketables --props=all --url=http://www.unicode.org/Public/6.0.0/ucd/
+// maketables --props=all --url=http://www.unicode.org/Public/6.2.0/ucd/
// DO NOT EDIT
// Properties is the set of Unicode property tables.
@@ -4199,6 +4448,7 @@ var _ASCII_Hex_Digit = &RangeTable{
{0x0041, 0x0046, 1},
{0x0061, 0x0066, 1},
},
+ LatinOffset: 3,
}
var _Bidi_Control = &RangeTable{
@@ -4222,6 +4472,7 @@ var _Dash = &RangeTable{
{0x2212, 0x2212, 1},
{0x2e17, 0x2e17, 1},
{0x2e1a, 0x2e1a, 1},
+ {0x2e3a, 0x2e3b, 1},
{0x301c, 0x301c, 1},
{0x3030, 0x3030, 1},
{0x30a0, 0x30a0, 1},
@@ -4230,6 +4481,7 @@ var _Dash = &RangeTable{
{0xfe63, 0xfe63, 1},
{0xff0d, 0xff0d, 1},
},
+ LatinOffset: 1,
}
var _Deprecated = &RangeTable{
@@ -4278,6 +4530,7 @@ var _Diacritic = &RangeTable{
{0x07a6, 0x07b0, 1},
{0x07eb, 0x07f5, 1},
{0x0818, 0x0819, 1},
+ {0x08e4, 0x08fe, 1},
{0x093c, 0x093c, 1},
{0x094d, 0x094d, 1},
{0x0951, 0x0954, 1},
@@ -4320,11 +4573,12 @@ var _Diacritic = &RangeTable{
{0x1b34, 0x1b34, 1},
{0x1b44, 0x1b44, 1},
{0x1b6b, 0x1b73, 1},
- {0x1baa, 0x1baa, 1},
+ {0x1baa, 0x1bab, 1},
{0x1c36, 0x1c37, 1},
{0x1c78, 0x1c7d, 1},
{0x1cd0, 0x1ce8, 1},
{0x1ced, 0x1ced, 1},
+ {0x1cf4, 0x1cf4, 1},
{0x1d2c, 0x1d6a, 1},
{0x1dc4, 0x1dcf, 1},
{0x1dfd, 0x1dff, 1},
@@ -4345,6 +4599,7 @@ var _Diacritic = &RangeTable{
{0xa6f0, 0xa6f1, 1},
{0xa717, 0xa721, 1},
{0xa788, 0xa788, 1},
+ {0xa7f8, 0xa7f9, 1},
{0xa8c4, 0xa8c4, 1},
{0xa8e0, 0xa8f1, 1},
{0xa92b, 0xa92e, 1},
@@ -4353,6 +4608,7 @@ var _Diacritic = &RangeTable{
{0xa9c0, 0xa9c0, 1},
{0xaa7b, 0xaa7b, 1},
{0xaabf, 0xaac2, 1},
+ {0xaaf6, 0xaaf6, 1},
{0xabec, 0xabed, 1},
{0xfb1e, 0xfb1e, 1},
{0xfe20, 0xfe26, 1},
@@ -4364,12 +4620,17 @@ var _Diacritic = &RangeTable{
},
R32: []Range32{
{0x110b9, 0x110ba, 1},
+ {0x11133, 0x11134, 1},
+ {0x111c0, 0x111c0, 1},
+ {0x116b6, 0x116b7, 1},
+ {0x16f8f, 0x16f9f, 1},
{0x1d167, 0x1d169, 1},
{0x1d16d, 0x1d172, 1},
{0x1d17b, 0x1d182, 1},
{0x1d185, 0x1d18b, 1},
{0x1d1aa, 0x1d1ad, 1},
},
+ LatinOffset: 6,
}
var _Extender = &RangeTable{
@@ -4380,6 +4641,7 @@ var _Extender = &RangeTable{
{0x07fa, 0x07fa, 1},
{0x0e46, 0x0e46, 1},
{0x0ec6, 0x0ec6, 1},
+ {0x180a, 0x180a, 1},
{0x1843, 0x1843, 1},
{0x1aa7, 0x1aa7, 1},
{0x1c36, 0x1c36, 1},
@@ -4393,8 +4655,10 @@ var _Extender = &RangeTable{
{0xa9cf, 0xa9cf, 1},
{0xaa70, 0xaa70, 1},
{0xaadd, 0xaadd, 1},
+ {0xaaf3, 0xaaf4, 1},
{0xff70, 0xff70, 1},
},
+ LatinOffset: 1,
}
var _Hex_Digit = &RangeTable{
@@ -4406,6 +4670,7 @@ var _Hex_Digit = &RangeTable{
{0xff21, 0xff26, 1},
{0xff41, 0xff46, 1},
},
+ LatinOffset: 3,
}
var _Hyphen = &RangeTable{
@@ -4421,6 +4686,7 @@ var _Hyphen = &RangeTable{
{0xff0d, 0xff0d, 1},
{0xff65, 0xff65, 1},
},
+ LatinOffset: 2,
}
var _IDS_Binary_Operator = &RangeTable{
@@ -4442,9 +4708,8 @@ var _Ideographic = &RangeTable{
{0x3021, 0x3029, 1},
{0x3038, 0x303a, 1},
{0x3400, 0x4db5, 1},
- {0x4e00, 0x9fcb, 1},
- {0xf900, 0xfa2d, 1},
- {0xfa30, 0xfa6d, 1},
+ {0x4e00, 0x9fcc, 1},
+ {0xf900, 0xfa6d, 1},
{0xfa70, 0xfad9, 1},
},
R32: []Range32{
@@ -4519,6 +4784,8 @@ var _Other_Alphabetic = &RangeTable{
{0x081b, 0x0823, 1},
{0x0825, 0x0827, 1},
{0x0829, 0x082c, 1},
+ {0x08e4, 0x08e9, 1},
+ {0x08f0, 0x08fe, 1},
{0x0900, 0x0903, 1},
{0x093a, 0x093b, 1},
{0x093e, 0x094c, 1},
@@ -4615,11 +4882,14 @@ var _Other_Alphabetic = &RangeTable{
{0x1b35, 0x1b43, 1},
{0x1b80, 0x1b82, 1},
{0x1ba1, 0x1ba9, 1},
+ {0x1bac, 0x1bad, 1},
{0x1be7, 0x1bf1, 1},
{0x1c24, 0x1c35, 1},
- {0x1cf2, 0x1cf2, 1},
+ {0x1cf2, 0x1cf3, 1},
{0x24b6, 0x24e9, 1},
{0x2de0, 0x2dff, 1},
+ {0xa674, 0xa67b, 1},
+ {0xa69f, 0xa69f, 1},
{0xa823, 0xa827, 1},
{0xa880, 0xa881, 1},
{0xa8b4, 0xa8c3, 1},
@@ -4634,6 +4904,8 @@ var _Other_Alphabetic = &RangeTable{
{0xaab2, 0xaab4, 1},
{0xaab7, 0xaab8, 1},
{0xaabe, 0xaabe, 1},
+ {0xaaeb, 0xaaef, 1},
+ {0xaaf5, 0xaaf5, 1},
{0xabe3, 0xabea, 1},
{0xfb1e, 0xfb1e, 1},
},
@@ -4645,6 +4917,12 @@ var _Other_Alphabetic = &RangeTable{
{0x11038, 0x11045, 1},
{0x11082, 0x11082, 1},
{0x110b0, 0x110b8, 1},
+ {0x11100, 0x11102, 1},
+ {0x11127, 0x11132, 1},
+ {0x11180, 0x11182, 1},
+ {0x111b3, 0x111bf, 1},
+ {0x116ab, 0x116b5, 1},
+ {0x16f51, 0x16f7e, 1},
},
}
@@ -4652,6 +4930,7 @@ var _Other_Default_Ignorable_Code_Point = &RangeTable{
R16: []Range16{
{0x034f, 0x034f, 1},
{0x115f, 0x1160, 1},
+ {0x17b4, 0x17b5, 1},
{0x2065, 0x2069, 1},
{0x3164, 0x3164, 1},
{0xffa0, 0xffa0, 1},
@@ -4680,6 +4959,7 @@ var _Other_Grapheme_Extend = &RangeTable{
{0x0dcf, 0x0dcf, 1},
{0x0ddf, 0x0ddf, 1},
{0x200c, 0x200d, 1},
+ {0x302e, 0x302f, 1},
{0xff9e, 0xff9f, 1},
},
R32: []Range32{
@@ -4695,6 +4975,7 @@ var _Other_ID_Continue = &RangeTable{
{0x1369, 0x1371, 1},
{0x19da, 0x19da, 1},
},
+ LatinOffset: 1,
}
var _Other_ID_Start = &RangeTable{
@@ -4707,20 +4988,26 @@ var _Other_ID_Start = &RangeTable{
var _Other_Lowercase = &RangeTable{
R16: []Range16{
+ {0x00aa, 0x00aa, 1},
+ {0x00ba, 0x00ba, 1},
{0x02b0, 0x02b8, 1},
{0x02c0, 0x02c1, 1},
{0x02e0, 0x02e4, 1},
{0x0345, 0x0345, 1},
{0x037a, 0x037a, 1},
- {0x1d2c, 0x1d61, 1},
+ {0x1d2c, 0x1d6a, 1},
{0x1d78, 0x1d78, 1},
{0x1d9b, 0x1dbf, 1},
- {0x2090, 0x2094, 1},
+ {0x2071, 0x2071, 1},
+ {0x207f, 0x207f, 1},
+ {0x2090, 0x209c, 1},
{0x2170, 0x217f, 1},
{0x24d0, 0x24e9, 1},
- {0x2c7d, 0x2c7d, 1},
+ {0x2c7c, 0x2c7d, 1},
{0xa770, 0xa770, 1},
+ {0xa7f8, 0xa7f9, 1},
},
+ LatinOffset: 2,
}
var _Other_Math = &RangeTable{
@@ -4827,7 +5114,41 @@ var _Other_Math = &RangeTable{
{0x1d7aa, 0x1d7c2, 1},
{0x1d7c4, 0x1d7cb, 1},
{0x1d7ce, 0x1d7ff, 1},
- },
+ {0x1ee00, 0x1ee03, 1},
+ {0x1ee05, 0x1ee1f, 1},
+ {0x1ee21, 0x1ee22, 1},
+ {0x1ee24, 0x1ee24, 1},
+ {0x1ee27, 0x1ee27, 1},
+ {0x1ee29, 0x1ee32, 1},
+ {0x1ee34, 0x1ee37, 1},
+ {0x1ee39, 0x1ee39, 1},
+ {0x1ee3b, 0x1ee3b, 1},
+ {0x1ee42, 0x1ee42, 1},
+ {0x1ee47, 0x1ee47, 1},
+ {0x1ee49, 0x1ee49, 1},
+ {0x1ee4b, 0x1ee4b, 1},
+ {0x1ee4d, 0x1ee4f, 1},
+ {0x1ee51, 0x1ee52, 1},
+ {0x1ee54, 0x1ee54, 1},
+ {0x1ee57, 0x1ee57, 1},
+ {0x1ee59, 0x1ee59, 1},
+ {0x1ee5b, 0x1ee5b, 1},
+ {0x1ee5d, 0x1ee5d, 1},
+ {0x1ee5f, 0x1ee5f, 1},
+ {0x1ee61, 0x1ee62, 1},
+ {0x1ee64, 0x1ee64, 1},
+ {0x1ee67, 0x1ee6a, 1},
+ {0x1ee6c, 0x1ee72, 1},
+ {0x1ee74, 0x1ee77, 1},
+ {0x1ee79, 0x1ee7c, 1},
+ {0x1ee7e, 0x1ee7e, 1},
+ {0x1ee80, 0x1ee89, 1},
+ {0x1ee8b, 0x1ee9b, 1},
+ {0x1eea1, 0x1eea3, 1},
+ {0x1eea5, 0x1eea9, 1},
+ {0x1eeab, 0x1eebb, 1},
+ },
+ LatinOffset: 1,
}
var _Other_Uppercase = &RangeTable{
@@ -4868,6 +5189,7 @@ var _Pattern_Syntax = &RangeTable{
{0xfd3e, 0xfd3f, 1},
{0xfe45, 0xfe46, 1},
},
+ LatinOffset: 15,
}
var _Pattern_White_Space = &RangeTable{
@@ -4878,6 +5200,7 @@ var _Pattern_White_Space = &RangeTable{
{0x200e, 0x200f, 1},
{0x2028, 0x2029, 1},
},
+ LatinOffset: 3,
}
var _Quotation_Mark = &RangeTable{
@@ -4895,6 +5218,7 @@ var _Quotation_Mark = &RangeTable{
{0xff07, 0xff07, 1},
{0xff62, 0xff63, 1},
},
+ LatinOffset: 4,
}
var _Radical = &RangeTable{
@@ -4944,6 +5268,7 @@ var _STerm = &RangeTable{
{0xa92f, 0xa92f, 1},
{0xa9c8, 0xa9c9, 1},
{0xaa5d, 0xaa5f, 1},
+ {0xaaf0, 0xaaf1, 1},
{0xabeb, 0xabeb, 1},
{0xfe52, 0xfe52, 1},
{0xfe56, 0xfe57, 1},
@@ -4956,7 +5281,10 @@ var _STerm = &RangeTable{
{0x10a56, 0x10a57, 1},
{0x11047, 0x11048, 1},
{0x110be, 0x110c1, 1},
+ {0x11141, 0x11143, 1},
+ {0x111c5, 0x111c6, 1},
},
+ LatinOffset: 3,
}
var _Soft_Dotted = &RangeTable{
@@ -4995,6 +5323,7 @@ var _Soft_Dotted = &RangeTable{
{0x1d65e, 0x1d65f, 1},
{0x1d692, 0x1d693, 1},
},
+ LatinOffset: 1,
}
var _Terminal_Punctuation = &RangeTable{
@@ -5048,6 +5377,7 @@ var _Terminal_Punctuation = &RangeTable{
{0xa9c7, 0xa9c9, 1},
{0xaa5d, 0xaa5f, 1},
{0xaadf, 0xaadf, 1},
+ {0xaaf0, 0xaaf1, 1},
{0xabeb, 0xabeb, 1},
{0xfe50, 0xfe52, 1},
{0xfe54, 0xfe57, 1},
@@ -5067,14 +5397,17 @@ var _Terminal_Punctuation = &RangeTable{
{0x10b3a, 0x10b3f, 1},
{0x11047, 0x1104d, 1},
{0x110be, 0x110c1, 1},
+ {0x11141, 0x11143, 1},
+ {0x111c5, 0x111c6, 1},
{0x12470, 0x12473, 1},
},
+ LatinOffset: 5,
}
var _Unified_Ideograph = &RangeTable{
R16: []Range16{
{0x3400, 0x4db5, 1},
- {0x4e00, 0x9fcb, 1},
+ {0x4e00, 0x9fcc, 1},
{0xfa0e, 0xfa0f, 1},
{0xfa11, 0xfa11, 1},
{0xfa13, 0xfa14, 1},
@@ -5114,6 +5447,7 @@ var _White_Space = &RangeTable{
{0x205f, 0x205f, 1},
{0x3000, 0x3000, 1},
},
+ LatinOffset: 4,
}
// These variables have type *RangeTable.
@@ -5153,7 +5487,7 @@ var (
)
// Generated by running
-// maketables --data=http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt --casefolding=http://www.unicode.org/Public/6.0.0/ucd/CaseFolding.txt
+// maketables --data=http://www.unicode.org/Public/6.2.0/ucd/UnicodeData.txt --casefolding=http://www.unicode.org/Public/6.2.0/ucd/CaseFolding.txt
// DO NOT EDIT
// CaseRanges is the table describing case mappings for all letters with
@@ -5254,6 +5588,7 @@ var _CaseRanges = []CaseRange{
{0x0260, 0x0260, d{-205, 0, -205}},
{0x0263, 0x0263, d{-207, 0, -207}},
{0x0265, 0x0265, d{42280, 0, 42280}},
+ {0x0266, 0x0266, d{42308, 0, 42308}},
{0x0268, 0x0268, d{-209, 0, -209}},
{0x0269, 0x0269, d{-211, 0, -211}},
{0x026B, 0x026B, d{10743, 0, 10743}},
@@ -5315,6 +5650,8 @@ var _CaseRanges = []CaseRange{
{0x0531, 0x0556, d{0, 48, 0}},
{0x0561, 0x0586, d{-48, 0, -48}},
{0x10A0, 0x10C5, d{0, 7264, 0}},
+ {0x10C7, 0x10C7, d{0, 7264, 0}},
+ {0x10CD, 0x10CD, d{0, 7264, 0}},
{0x1D79, 0x1D79, d{35332, 0, 35332}},
{0x1D7D, 0x1D7D, d{3814, 0, 3814}},
{0x1E00, 0x1E95, d{UpperLower, UpperLower, UpperLower}},
@@ -5402,7 +5739,10 @@ var _CaseRanges = []CaseRange{
{0x2C7E, 0x2C7F, d{0, -10815, 0}},
{0x2C80, 0x2CE3, d{UpperLower, UpperLower, UpperLower}},
{0x2CEB, 0x2CEE, d{UpperLower, UpperLower, UpperLower}},
+ {0x2CF2, 0x2CF3, d{UpperLower, UpperLower, UpperLower}},
{0x2D00, 0x2D25, d{-7264, 0, -7264}},
+ {0x2D27, 0x2D27, d{-7264, 0, -7264}},
+ {0x2D2D, 0x2D2D, d{-7264, 0, -7264}},
{0xA640, 0xA66D, d{UpperLower, UpperLower, UpperLower}},
{0xA680, 0xA697, d{UpperLower, UpperLower, UpperLower}},
{0xA722, 0xA72F, d{UpperLower, UpperLower, UpperLower}},
@@ -5412,8 +5752,9 @@ var _CaseRanges = []CaseRange{
{0xA77E, 0xA787, d{UpperLower, UpperLower, UpperLower}},
{0xA78B, 0xA78C, d{UpperLower, UpperLower, UpperLower}},
{0xA78D, 0xA78D, d{0, -42280, 0}},
- {0xA790, 0xA791, d{UpperLower, UpperLower, UpperLower}},
+ {0xA790, 0xA793, d{UpperLower, UpperLower, UpperLower}},
{0xA7A0, 0xA7A9, d{UpperLower, UpperLower, UpperLower}},
+ {0xA7AA, 0xA7AA, d{0, -42308, 0}},
{0xFF21, 0xFF3A, d{0, 32, 0}},
{0xFF41, 0xFF5A, d{-32, 0, -32}},
{0x10400, 0x10427, d{0, 40, 0}},
@@ -5587,10 +5928,10 @@ var properties = [MaxLatin1 + 1]uint8{
0xA4: pS | pp, // '¤'
0xA5: pS | pp, // '¥'
0xA6: pS | pp, // '¦'
- 0xA7: pS | pp, // '§'
+ 0xA7: pP | pp, // '§'
0xA8: pS | pp, // '¨'
0xA9: pS | pp, // '©'
- 0xAA: pLl | pp, // 'ª'
+ 0xAA: pLo | pp, // 'ª'
0xAB: pP | pp, // '«'
0xAC: pS | pp, // '¬'
0xAD: 0, // '\u00ad'
@@ -5602,11 +5943,11 @@ var properties = [MaxLatin1 + 1]uint8{
0xB3: pN | pp, // '³'
0xB4: pS | pp, // '´'
0xB5: pLl | pp, // 'µ'
- 0xB6: pS | pp, // '¶'
+ 0xB6: pP | pp, // '¶'
0xB7: pP | pp, // '·'
0xB8: pS | pp, // '¸'
0xB9: pN | pp, // '¹'
- 0xBA: pLl | pp, // 'º'
+ 0xBA: pLo | pp, // 'º'
0xBB: pP | pp, // '»'
0xBC: pN | pp, // '¼'
0xBD: pN | pp, // '½'
@@ -5844,6 +6185,7 @@ var foldLl = &RangeTable{
{0x04d0, 0x0526, 2},
{0x0531, 0x0556, 1},
{0x10a0, 0x10c5, 1},
+ {0x10c7, 0x10cd, 6},
{0x1e00, 0x1e94, 2},
{0x1e9e, 0x1efe, 2},
{0x1f08, 0x1f0f, 1},
@@ -5873,20 +6215,22 @@ var foldLl = &RangeTable{
{0x2c7e, 0x2c80, 1},
{0x2c82, 0x2ce2, 2},
{0x2ceb, 0x2ced, 2},
- {0xa640, 0xa66c, 2},
+ {0x2cf2, 0xa640, 31054},
+ {0xa642, 0xa66c, 2},
{0xa680, 0xa696, 2},
{0xa722, 0xa72e, 2},
{0xa732, 0xa76e, 2},
{0xa779, 0xa77d, 2},
{0xa77e, 0xa786, 2},
{0xa78b, 0xa78d, 2},
- {0xa790, 0xa7a0, 16},
- {0xa7a2, 0xa7a8, 2},
+ {0xa790, 0xa792, 2},
+ {0xa7a0, 0xa7aa, 2},
{0xff21, 0xff3a, 1},
},
R32: []Range32{
{0x10400, 0x10427, 1},
},
+ LatinOffset: 3,
}
var foldLt = &RangeTable{
@@ -5941,11 +6285,12 @@ var foldLu = &RangeTable{
{0x0256, 0x0257, 1},
{0x0259, 0x025b, 2},
{0x0260, 0x0263, 3},
- {0x0265, 0x0268, 3},
- {0x0269, 0x026b, 2},
- {0x026f, 0x0271, 2},
- {0x0272, 0x0275, 3},
- {0x027d, 0x0283, 3},
+ {0x0265, 0x0266, 1},
+ {0x0268, 0x0269, 1},
+ {0x026b, 0x026f, 4},
+ {0x0271, 0x0272, 1},
+ {0x0275, 0x027d, 8},
+ {0x0280, 0x0283, 3},
{0x0288, 0x028c, 1},
{0x0292, 0x0345, 179},
{0x0371, 0x0373, 2},
@@ -5987,7 +6332,9 @@ var foldLu = &RangeTable{
{0x2c73, 0x2c76, 3},
{0x2c81, 0x2ce3, 2},
{0x2cec, 0x2cee, 2},
- {0x2d00, 0x2d25, 1},
+ {0x2cf3, 0x2d00, 13},
+ {0x2d01, 0x2d25, 1},
+ {0x2d27, 0x2d2d, 6},
{0xa641, 0xa66d, 2},
{0xa681, 0xa697, 2},
{0xa723, 0xa72f, 2},
@@ -5995,12 +6342,14 @@ var foldLu = &RangeTable{
{0xa77a, 0xa77c, 2},
{0xa77f, 0xa787, 2},
{0xa78c, 0xa791, 5},
- {0xa7a1, 0xa7a9, 2},
+ {0xa793, 0xa7a1, 14},
+ {0xa7a3, 0xa7a9, 2},
{0xff41, 0xff5a, 1},
},
R32: []Range32{
{0x10428, 0x1044f, 1},
},
+ LatinOffset: 4,
}
var foldM = &RangeTable{
@@ -6023,7 +6372,7 @@ var foldMn = &RangeTable{
// If there is no entry for a script name, there are no such points.
var FoldScript = map[string]*RangeTable{}
-// Range entries: 3391 16-bit, 659 32-bit, 4050 total.
-// Range bytes: 20346 16-bit, 7908 32-bit, 28254 total.
+// Range entries: 3462 16-bit, 832 32-bit, 4294 total.
+// Range bytes: 20772 16-bit, 9984 32-bit, 30756 total.
// Fold orbit bytes: 63 pairs, 252 bytes
diff --git a/src/pkg/unicode/utf8/example_test.go b/src/pkg/unicode/utf8/example_test.go
new file mode 100644
index 000000000..fe2037336
--- /dev/null
+++ b/src/pkg/unicode/utf8/example_test.go
@@ -0,0 +1,192 @@
+package utf8_test
+
+import (
+ "fmt"
+ "unicode/utf8"
+)
+
+func ExampleDecodeLastRune() {
+ b := []byte("Hello, 世界")
+
+ for len(b) > 0 {
+ r, size := utf8.DecodeLastRune(b)
+ fmt.Printf("%c %v\n", r, size)
+
+ b = b[:len(b)-size]
+ }
+ // Output:
+ // 界 3
+ // 世 3
+ // 1
+ // , 1
+ // o 1
+ // l 1
+ // l 1
+ // e 1
+ // H 1
+}
+
+func ExampleDecodeLastRuneInString() {
+ str := "Hello, 世界"
+
+ for len(str) > 0 {
+ r, size := utf8.DecodeLastRuneInString(str)
+ fmt.Printf("%c %v\n", r, size)
+
+ str = str[:len(str)-size]
+ }
+ // Output:
+ // 界 3
+ // 世 3
+ // 1
+ // , 1
+ // o 1
+ // l 1
+ // l 1
+ // e 1
+ // H 1
+
+}
+
+func ExampleDecodeRune() {
+ b := []byte("Hello, 世界")
+
+ for len(b) > 0 {
+ r, size := utf8.DecodeRune(b)
+ fmt.Printf("%c %v\n", r, size)
+
+ b = b[size:]
+ }
+ // Output:
+ // H 1
+ // e 1
+ // l 1
+ // l 1
+ // o 1
+ // , 1
+ // 1
+ // 世 3
+ // 界 3
+}
+
+func ExampleDecodeRuneInString() {
+ str := "Hello, 世界"
+
+ for len(str) > 0 {
+ r, size := utf8.DecodeRuneInString(str)
+ fmt.Printf("%c %v\n", r, size)
+
+ str = str[size:]
+ }
+ // Output:
+ // H 1
+ // e 1
+ // l 1
+ // l 1
+ // o 1
+ // , 1
+ // 1
+ // 世 3
+ // 界 3
+}
+
+func ExampleEncodeRune() {
+ r := '世'
+ buf := make([]byte, 3)
+
+ n := utf8.EncodeRune(buf, r)
+
+ fmt.Println(buf)
+ fmt.Println(n)
+ // Output:
+ // [228 184 150]
+ // 3
+}
+
+func ExampleFullRune() {
+ buf := []byte{228, 184, 150} // 世
+ fmt.Println(utf8.FullRune(buf))
+ fmt.Println(utf8.FullRune(buf[:2]))
+ // Output:
+ // true
+ // false
+}
+
+func ExampleFullRuneInString() {
+ str := "世"
+ fmt.Println(utf8.FullRuneInString(str))
+ fmt.Println(utf8.FullRuneInString(str[:2]))
+ // Output:
+ // true
+ // false
+}
+
+func ExampleRuneCount() {
+ buf := []byte("Hello, 世界")
+ fmt.Println("bytes =", len(buf))
+ fmt.Println("runes =", utf8.RuneCount(buf))
+ // Output:
+ // bytes = 13
+ // runes = 9
+}
+
+func ExampleRuneCountInString() {
+ str := "Hello, 世界"
+ fmt.Println("bytes =", len(str))
+ fmt.Println("runes =", utf8.RuneCountInString(str))
+ // Output:
+ // bytes = 13
+ // runes = 9
+}
+
+func ExampleRuneLen() {
+ fmt.Println(utf8.RuneLen('a'))
+ fmt.Println(utf8.RuneLen('界'))
+ // Output:
+ // 1
+ // 3
+}
+
+func ExampleRuneStart() {
+ buf := []byte("a界")
+ fmt.Println(utf8.RuneStart(buf[0]))
+ fmt.Println(utf8.RuneStart(buf[1]))
+ fmt.Println(utf8.RuneStart(buf[2]))
+ // Output:
+ // true
+ // true
+ // false
+}
+
+func ExampleValid() {
+ valid := []byte("Hello, 世界")
+ invalid := []byte{0xff, 0xfe, 0xfd}
+
+ fmt.Println(utf8.Valid(valid))
+ fmt.Println(utf8.Valid(invalid))
+ // Output:
+ // true
+ // false
+}
+
+func ExampleValidRune() {
+ valid := 'a'
+ invalid := rune(0xfffffff)
+
+ fmt.Println(utf8.ValidRune(valid))
+ fmt.Println(utf8.ValidRune(invalid))
+ // Output:
+ // true
+ // false
+}
+
+func ExampleValidString() {
+ valid := "Hello, 世界"
+ invalid := string([]byte{0xff, 0xfe, 0xfd})
+
+ fmt.Println(utf8.ValidString(valid))
+ fmt.Println(utf8.ValidString(invalid))
+ // Output:
+ // true
+ // false
+}
diff --git a/src/pkg/unicode/utf8/utf8.go b/src/pkg/unicode/utf8/utf8.go
index 57ea19e96..93d0be5e0 100644
--- a/src/pkg/unicode/utf8/utf8.go
+++ b/src/pkg/unicode/utf8/utf8.go
@@ -18,6 +18,12 @@ const (
UTFMax = 4 // maximum number of bytes of a UTF-8 encoded Unicode character.
)
+// Code points in the surrogate range are not valid for UTF-8.
+const (
+ surrogateMin = 0xD800
+ surrogateMax = 0xDFFF
+)
+
const (
t1 = 0x00 // 0000 0000
tx = 0x80 // 1000 0000
@@ -34,7 +40,6 @@ const (
rune1Max = 1<<7 - 1
rune2Max = 1<<11 - 1
rune3Max = 1<<16 - 1
- rune4Max = 1<<21 - 1
)
func decodeRuneInternal(p []byte) (r rune, size int, short bool) {
@@ -87,6 +92,9 @@ func decodeRuneInternal(p []byte) (r rune, size int, short bool) {
if r <= rune2Max {
return RuneError, 1, false
}
+ if surrogateMin <= r && r <= surrogateMax {
+ return RuneError, 1, false
+ }
return r, 3, false
}
@@ -102,7 +110,7 @@ func decodeRuneInternal(p []byte) (r rune, size int, short bool) {
// 4-byte, 21-bit sequence?
if c0 < t5 {
r = rune(c0&mask4)<<18 | rune(c1&maskx)<<12 | rune(c2&maskx)<<6 | rune(c3&maskx)
- if r <= rune3Max {
+ if r <= rune3Max || MaxRune < r {
return RuneError, 1, false
}
return r, 4, false
@@ -162,6 +170,9 @@ func decodeRuneInStringInternal(s string) (r rune, size int, short bool) {
if r <= rune2Max {
return RuneError, 1, false
}
+ if surrogateMin <= r && r <= surrogateMax {
+ return RuneError, 1, false
+ }
return r, 3, false
}
@@ -177,7 +188,7 @@ func decodeRuneInStringInternal(s string) (r rune, size int, short bool) {
// 4-byte, 21-bit sequence?
if c0 < t5 {
r = rune(c0&mask4)<<18 | rune(c1&maskx)<<12 | rune(c2&maskx)<<6 | rune(c3&maskx)
- if r <= rune3Max {
+ if r <= rune3Max || MaxRune < r {
return RuneError, 1, false
}
return r, 4, false
@@ -202,6 +213,9 @@ func FullRuneInString(s string) bool {
// DecodeRune unpacks the first UTF-8 encoding in p and returns the rune and its width in bytes.
// If the encoding is invalid, it returns (RuneError, 1), an impossible result for correct UTF-8.
+// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is
+// out of range, or is not the shortest possible UTF-8 encoding for the
+// value. No other validation is performed.
func DecodeRune(p []byte) (r rune, size int) {
r, size, _ = decodeRuneInternal(p)
return
@@ -209,6 +223,9 @@ func DecodeRune(p []byte) (r rune, size int) {
// DecodeRuneInString is like DecodeRune but its input is a string.
// If the encoding is invalid, it returns (RuneError, 1), an impossible result for correct UTF-8.
+// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is
+// out of range, or is not the shortest possible UTF-8 encoding for the
+// value. No other validation is performed.
func DecodeRuneInString(s string) (r rune, size int) {
r, size, _ = decodeRuneInStringInternal(s)
return
@@ -216,6 +233,9 @@ func DecodeRuneInString(s string) (r rune, size int) {
// DecodeLastRune unpacks the last UTF-8 encoding in p and returns the rune and its width in bytes.
// If the encoding is invalid, it returns (RuneError, 1), an impossible result for correct UTF-8.
+// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is
+// out of range, or is not the shortest possible UTF-8 encoding for the
+// value. No other validation is performed.
func DecodeLastRune(p []byte) (r rune, size int) {
end := len(p)
if end == 0 {
@@ -250,6 +270,9 @@ func DecodeLastRune(p []byte) (r rune, size int) {
// DecodeLastRuneInString is like DecodeLastRune but its input is a string.
// If the encoding is invalid, it returns (RuneError, 1), an impossible result for correct UTF-8.
+// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is
+// out of range, or is not the shortest possible UTF-8 encoding for the
+// value. No other validation is performed.
func DecodeLastRuneInString(s string) (r rune, size int) {
end := len(s)
if end == 0 {
@@ -283,15 +306,20 @@ func DecodeLastRuneInString(s string) (r rune, size int) {
}
// RuneLen returns the number of bytes required to encode the rune.
+// It returns -1 if the rune is not a valid value to encode in UTF-8.
func RuneLen(r rune) int {
switch {
+ case r < 0:
+ return -1
case r <= rune1Max:
return 1
case r <= rune2Max:
return 2
+ case surrogateMin <= r && r <= surrogateMax:
+ return -1
case r <= rune3Max:
return 3
- case r <= rune4Max:
+ case r <= MaxRune:
return 4
}
return -1
@@ -316,6 +344,10 @@ func EncodeRune(p []byte, r rune) int {
r = RuneError
}
+ if surrogateMin <= r && r <= surrogateMax {
+ r = RuneError
+ }
+
if uint32(r) <= rune3Max {
p[0] = t3 | byte(r>>12)
p[1] = tx | byte(r>>6)&maskx
@@ -368,7 +400,7 @@ func Valid(p []byte) bool {
} else {
_, size := DecodeRune(p[i:])
if size == 1 {
- // All valid runes of size of 1 (those
+ // All valid runes of size 1 (those
// below RuneSelf) were handled above.
// This must be a RuneError.
return false
@@ -395,3 +427,17 @@ func ValidString(s string) bool {
}
return true
}
+
+// ValidRune reports whether r can be legally encoded as UTF-8.
+// Code points that are out of range or a surrogate half are illegal.
+func ValidRune(r rune) bool {
+ switch {
+ case r < 0:
+ return false
+ case surrogateMin <= r && r <= surrogateMax:
+ return false
+ case r > MaxRune:
+ return false
+ }
+ return true
+}
diff --git a/src/pkg/unicode/utf8/utf8_test.go b/src/pkg/unicode/utf8/utf8_test.go
index 4f73c8fb8..c516871c9 100644
--- a/src/pkg/unicode/utf8/utf8_test.go
+++ b/src/pkg/unicode/utf8/utf8_test.go
@@ -56,6 +56,8 @@ var utf8map = []Utf8Map{
{0x07ff, "\xdf\xbf"},
{0x0800, "\xe0\xa0\x80"},
{0x0801, "\xe0\xa0\x81"},
+ {0xd7ff, "\xed\x9f\xbf"}, // last code point before surrogate half.
+ {0xe000, "\xee\x80\x80"}, // first code point after surrogate half.
{0xfffe, "\xef\xbf\xbe"},
{0xffff, "\xef\xbf\xbf"},
{0x10000, "\xf0\x90\x80\x80"},
@@ -65,6 +67,11 @@ var utf8map = []Utf8Map{
{0xFFFD, "\xef\xbf\xbd"},
}
+var surrogateMap = []Utf8Map{
+ {0xd800, "\xed\xa0\x80"}, // surrogate min decodes to (RuneError, 1)
+ {0xdfff, "\xed\xbf\xbf"}, // surrogate max decodes to (RuneError, 1)
+}
+
var testStrings = []string{
"",
"abcd",
@@ -75,8 +82,7 @@ var testStrings = []string{
}
func TestFullRune(t *testing.T) {
- for i := 0; i < len(utf8map); i++ {
- m := utf8map[i]
+ for _, m := range utf8map {
b := []byte(m.str)
if !FullRune(b) {
t.Errorf("FullRune(%q) (%U) = false, want true", b, m.r)
@@ -97,8 +103,7 @@ func TestFullRune(t *testing.T) {
}
func TestEncodeRune(t *testing.T) {
- for i := 0; i < len(utf8map); i++ {
- m := utf8map[i]
+ for _, m := range utf8map {
b := []byte(m.str)
var buf [10]byte
n := EncodeRune(buf[0:], m.r)
@@ -110,8 +115,7 @@ func TestEncodeRune(t *testing.T) {
}
func TestDecodeRune(t *testing.T) {
- for i := 0; i < len(utf8map); i++ {
- m := utf8map[i]
+ for _, m := range utf8map {
b := []byte(m.str)
r, size := DecodeRune(b)
if r != m.r || size != len(b) {
@@ -168,6 +172,21 @@ func TestDecodeRune(t *testing.T) {
}
}
+func TestDecodeSurrogateRune(t *testing.T) {
+ for _, m := range surrogateMap {
+ b := []byte(m.str)
+ r, size := DecodeRune(b)
+ if r != RuneError || size != 1 {
+ t.Errorf("DecodeRune(%q) = %x, %d want %x, %d", b, r, size, RuneError, 1)
+ }
+ s := m.str
+ r, size = DecodeRuneInString(s)
+ if r != RuneError || size != 1 {
+ t.Errorf("DecodeRune(%q) = %x, %d want %x, %d", b, r, size, RuneError, 1)
+ }
+ }
+}
+
// Check that DecodeRune and DecodeLastRune correspond to
// the equivalent range loop.
func TestSequencing(t *testing.T) {
@@ -284,8 +303,7 @@ var runecounttests = []RuneCountTest{
}
func TestRuneCount(t *testing.T) {
- for i := 0; i < len(runecounttests); i++ {
- tt := runecounttests[i]
+ for _, tt := range runecounttests {
if out := RuneCountInString(tt.in); out != tt.out {
t.Errorf("RuneCountInString(%q) = %d, want %d", tt.in, out, tt.out)
}
@@ -295,6 +313,32 @@ func TestRuneCount(t *testing.T) {
}
}
+type RuneLenTest struct {
+ r rune
+ size int
+}
+
+var runelentests = []RuneLenTest{
+ {0, 1},
+ {'e', 1},
+ {'é', 2},
+ {'☺', 3},
+ {RuneError, 3},
+ {MaxRune, 4},
+ {0xD800, -1},
+ {0xDFFF, -1},
+ {MaxRune + 1, -1},
+ {-1, -1},
+}
+
+func TestRuneLen(t *testing.T) {
+ for _, tt := range runelentests {
+ if size := RuneLen(tt.r); size != tt.size {
+ t.Errorf("RuneLen(%#U) = %d, want %d", tt.r, size, tt.size)
+ }
+ }
+}
+
type ValidTest struct {
in string
out bool
@@ -311,15 +355,50 @@ var validTests = []ValidTest{
{string([]byte{66, 250}), false},
{string([]byte{66, 250, 67}), false},
{"a\uFFFDb", true},
+ {string("\xF4\x8F\xBF\xBF"), true}, // U+10FFFF
+ {string("\xF4\x90\x80\x80"), false}, // U+10FFFF+1; out of range
+ {string("\xF7\xBF\xBF\xBF"), false}, // 0x1FFFFF; out of range
+ {string("\xFB\xBF\xBF\xBF\xBF"), false}, // 0x3FFFFFF; out of range
+ {string("\xc0\x80"), false}, // U+0000 encoded in two bytes: incorrect
+ {string("\xed\xa0\x80"), false}, // U+D800 high surrogate (sic)
+ {string("\xed\xbf\xbf"), false}, // U+DFFF low surrogate (sic)
}
func TestValid(t *testing.T) {
- for i, tt := range validTests {
+ for _, tt := range validTests {
if Valid([]byte(tt.in)) != tt.out {
- t.Errorf("%d. Valid(%q) = %v; want %v", i, tt.in, !tt.out, tt.out)
+ t.Errorf("Valid(%q) = %v; want %v", tt.in, !tt.out, tt.out)
}
if ValidString(tt.in) != tt.out {
- t.Errorf("%d. ValidString(%q) = %v; want %v", i, tt.in, !tt.out, tt.out)
+ t.Errorf("ValidString(%q) = %v; want %v", tt.in, !tt.out, tt.out)
+ }
+ }
+}
+
+type ValidRuneTest struct {
+ r rune
+ ok bool
+}
+
+var validrunetests = []ValidRuneTest{
+ {0, true},
+ {'e', true},
+ {'é', true},
+ {'☺', true},
+ {RuneError, true},
+ {MaxRune, true},
+ {0xD7FF, true},
+ {0xD800, false},
+ {0xDFFF, false},
+ {0xE000, true},
+ {MaxRune + 1, false},
+ {-1, false},
+}
+
+func TestValidRune(t *testing.T) {
+ for _, tt := range validrunetests {
+ if ok := ValidRune(tt.r); ok != tt.ok {
+ t.Errorf("ValidRune(%#U) = %t, want %t", tt.r, ok, tt.ok)
}
}
}