diff options
-rw-r--r-- | src/pkg/unicode/maketables.go | 91 | ||||
-rw-r--r-- | src/pkg/unicode/script_test.go | 29 | ||||
-rw-r--r-- | src/pkg/unicode/tables.go | 349 |
3 files changed, 179 insertions, 290 deletions
diff --git a/src/pkg/unicode/maketables.go b/src/pkg/unicode/maketables.go index 1cdfed679..d1e599c40 100644 --- a/src/pkg/unicode/maketables.go +++ b/src/pkg/unicode/maketables.go @@ -14,6 +14,7 @@ import ( "http"; "log"; "os"; + "sort"; "strconv"; "strings"; "regexp"; @@ -235,7 +236,7 @@ func printCategories() { die.Log(err); } if resp.StatusCode != 200 { - die.Log("bad GET status for UnicodeData.txt", resp.StatusCode); + die.Log("bad GET status for UnicodeData.txt", resp.Status); } input := bufio.NewReader(resp.Body); for { @@ -279,6 +280,8 @@ func printCategories() { fmt.Printf("}\n\n"); } + decl := make(sort.StringArray, len(list)); + ndecl := 0; for _, name := range list { if _, ok := category[name]; !ok { die.Log("unknown category", name); @@ -286,56 +289,52 @@ func printCategories() { // We generate an UpperCase name to serve as concise documentation and an _UnderScored // name to store the data. This stops godoc dumping all the tables but keeps them // available to clients. - if name == "letter" { // special case - dumpRange( - "\n// Letter is the set of Unicode letters.\n" - "var Letter = letter\n" - "var letter = []Range {\n", - letterOp, - "}\n" - ); - continue; - } // Cases deserving special comments + varDecl := ""; switch name { + case "letter": + varDecl = "\tLetter = letter; // Letter is the set of Unicode letters.\n"; case "Nd": - fmt.Printf( - "\n// Digit is the set of Unicode characters with the \"decimal digit\" property.\n" - "var Digit = Nd\n\n" - ) + varDecl = "\tDigit = _Nd; // Digit is the set of Unicode characters with the \"decimal digit\" property.\n"; case "Lu": - fmt.Printf( - "\n// Upper is the set of Unicode upper case letters.\n" - "var Upper = Lu\n\n" - ) + varDecl = "\tUpper = _Lu; // Upper is the set of Unicode upper case letters.\n"; case "Ll": - fmt.Printf( - "\n// Lower is the set of Unicode lower case letters.\n" - "var Lower = Ll\n\n" - ) + varDecl = "\tLower = _Ll; // Lower is the set of Unicode lower case letters.\n"; case "Lt": - fmt.Printf( - "\n// Title is the set of Unicode title case letters.\n" - "var Title = Lt\n\n" - ) + varDecl = "\tTitle = _Lt; // Title is the set of Unicode title case letters.\n"; + } + if name != "letter" { + varDecl += fmt.Sprintf( + "\t%s = _%s; // %s is the set of Unicode characters in category %s.\n", + name, name, name, name + ); + } + decl[ndecl] = varDecl; + ndecl++; + if name == "letter" { // special case + dumpRange( + "var letter = []Range {\n", + letterOp + ); + continue; } dumpRange( - fmt.Sprintf( - "// %s is the set of Unicode characters in category %s.\n" - "var %s = _%s\n" - "var _%s = []Range {\n", - name, name, name, name, name - ), - func(code int) bool { return chars[code].category == name }, - "}\n\n" + fmt.Sprintf("var _%s = []Range {\n", name), + func(code int) bool { return chars[code].category == name } ); } + decl.Sort(); + fmt.Println("var ("); + for _, d := range decl { + fmt.Print(d); + } + fmt.Println(")\n"); } type Op func(code int) bool const format = "\tRange{0x%04x, 0x%04x, %d},\n"; -func dumpRange(header string, inCategory Op, trailer string) { +func dumpRange(header string, inCategory Op) { fmt.Print(header); next := 0; // one Range for each iteration @@ -382,7 +381,7 @@ func dumpRange(header string, inCategory Op, trailer string) { // next range: start looking where this range ends next = hi + 1; } - fmt.Print(trailer); + fmt.Print("}\n\n"); } func fullCategoryTest(list []string) { @@ -510,19 +509,27 @@ func printScripts() { fmt.Printf("}\n\n"); } + decl := make(sort.StringArray, len(list)); + ndecl := 0; for _, name := range list { - fmt.Printf( - "// %s is the set of Unicode characters in script %s.\n" - "var %s = _%s\n" - "var _%s = []Range {\n", - name, name, name, name, name + decl[ndecl] = fmt.Sprintf( + "\t%s = _%s;\t// %s is the set of Unicode characters in script %s.\n", + name, name, name, name ); + ndecl++; + fmt.Printf("var _%s = []Range {\n", name); ranges := foldAdjacent(scripts[name]); for _, s := range ranges { fmt.Printf(format, s.Lo, s.Hi, s.Stride); } fmt.Printf("}\n\n"); } + decl.Sort(); + fmt.Println("var ("); + for _, d := range decl { + fmt.Print(d); + } + fmt.Println(")\n"); } // The script tables have a lot of adjacent elements. Fold them together. diff --git a/src/pkg/unicode/script_test.go b/src/pkg/unicode/script_test.go index f0c5fa6b9..390e47b36 100644 --- a/src/pkg/unicode/script_test.go +++ b/src/pkg/unicode/script_test.go @@ -132,43 +132,38 @@ var inCategoryTest = []T { } func TestScripts(t *testing.T) { + notTested := make(map[string] bool); + for k := range Scripts { + notTested[k] = true + } for i, test := range inTest { if !Is(Scripts[test.script], test.rune) { t.Errorf("IsScript(%#x, %s) = false, want true\n", test.rune, test.script); } + notTested[test.script] = false, false } for i, test := range outTest { if Is(Scripts[test.script], test.rune) { t.Errorf("IsScript(%#x, %s) = true, want false\n", test.rune, test.script); } } - tested := make(map[string] bool); - for k := range Scripts { - tested[k] = true - } - for _, test := range inTest { - tested[test.script] = false, false - } - for k := range tested { + for k := range notTested { t.Error("not tested:", k) } } - func TestCategories(t *testing.T) { + notTested := make(map[string] bool); + for k := range Categories { + notTested[k] = true + } for i, test := range inCategoryTest { if !Is(Categories[test.script], test.rune) { t.Errorf("IsCategory(%#x, %s) = false, want true\n", test.rune, test.script); } + notTested[test.script] = false, false } - tested := make(map[string] bool); - for k := range Categories { - tested[k] = true - } - for _, test := range inCategoryTest { - tested[test.script] = false, false - } - for k := range tested { + for k := range notTested { t.Error("not tested:", k) } } diff --git a/src/pkg/unicode/tables.go b/src/pkg/unicode/tables.go index 767c88df8..a795799a6 100644 --- a/src/pkg/unicode/tables.go +++ b/src/pkg/unicode/tables.go @@ -41,8 +41,6 @@ var Categories = map[string] []Range { "Lo": Lo, } -// Lm is the set of Unicode characters in category Lm. -var Lm = _Lm var _Lm = []Range { Range{0x02b0, 0x02c1, 1}, Range{0x02c6, 0x02d1, 1}, @@ -75,12 +73,6 @@ var _Lm = []Range { Range{0xff9f, 0xff9f, 1}, } - -// Lower is the set of Unicode lower case letters. -var Lower = Ll - -// Ll is the set of Unicode characters in category Ll. -var Ll = _Ll var _Ll = []Range { Range{0x0061, 0x007a, 1}, Range{0x00aa, 0x00b5, 11}, @@ -221,8 +213,6 @@ var _Ll = []Range { Range{0x1d7cb, 0x1d7cb, 1}, } -// Me is the set of Unicode characters in category Me. -var Me = _Me var _Me = []Range { Range{0x0488, 0x0489, 1}, Range{0x06de, 0x20dd, 6655}, @@ -231,8 +221,6 @@ var _Me = []Range { Range{0xa670, 0xa672, 1}, } -// Mc is the set of Unicode characters in category Mc. -var Mc = _Mc var _Mc = []Range { Range{0x0903, 0x093e, 59}, Range{0x093f, 0x0940, 1}, @@ -314,8 +302,6 @@ var _Mc = []Range { Range{0x1d16e, 0x1d172, 1}, } -// Mn is the set of Unicode characters in category Mn. -var Mn = _Mn var _Mn = []Range { Range{0x0300, 0x036f, 1}, Range{0x0483, 0x0487, 1}, @@ -461,15 +447,10 @@ var _Mn = []Range { Range{0xe0100, 0xe01ef, 1}, } -// Zl is the set of Unicode characters in category Zl. -var Zl = _Zl var _Zl = []Range { Range{0x2028, 0x2028, 1}, } - -// Letter is the set of Unicode letters. -var Letter = letter var letter = []Range { Range{0x0041, 0x005a, 1}, Range{0x0061, 0x007a, 1}, @@ -843,14 +824,11 @@ var letter = []Range { Range{0x20000, 0x2a6d6, 42710}, Range{0x2f800, 0x2fa1d, 1}, } -// Zp is the set of Unicode characters in category Zp. -var Zp = _Zp + var _Zp = []Range { Range{0x2029, 0x2029, 1}, } -// Zs is the set of Unicode characters in category Zs. -var Zs = _Zs var _Zs = []Range { Range{0x0020, 0x00a0, 128}, Range{0x1680, 0x180e, 398}, @@ -859,24 +837,18 @@ var _Zs = []Range { Range{0x3000, 0x3000, 1}, } -// Cs is the set of Unicode characters in category Cs. -var Cs = _Cs var _Cs = []Range { Range{0xd800, 0xdb7f, 895}, Range{0xdb80, 0xdbff, 127}, Range{0xdc00, 0xdfff, 1023}, } -// Co is the set of Unicode characters in category Co. -var Co = _Co var _Co = []Range { Range{0xe000, 0xf8ff, 6399}, Range{0xf0000, 0xffffd, 65533}, Range{0x100000, 0x10fffd, 65533}, } -// Cf is the set of Unicode characters in category Cf. -var Cf = _Cf var _Cf = []Range { Range{0x00ad, 0x0600, 1363}, Range{0x0601, 0x0603, 1}, @@ -893,15 +865,11 @@ var _Cf = []Range { Range{0xe0021, 0xe007f, 1}, } -// Cc is the set of Unicode characters in category Cc. -var Cc = _Cc var _Cc = []Range { Range{0x0001, 0x001f, 1}, Range{0x007f, 0x009f, 1}, } -// Po is the set of Unicode characters in category Po. -var Po = _Po var _Po = []Range { Range{0x0021, 0x0023, 1}, Range{0x0025, 0x0027, 1}, @@ -999,8 +967,6 @@ var _Po = []Range { Range{0x12470, 0x12473, 1}, } -// Pi is the set of Unicode characters in category Pi. -var Pi = _Pi var _Pi = []Range { Range{0x00ab, 0x2018, 8045}, Range{0x201b, 0x201c, 1}, @@ -1010,8 +976,6 @@ var _Pi = []Range { Range{0x2e1c, 0x2e20, 4}, } -// Pf is the set of Unicode characters in category Pf. -var Pf = _Pf var _Pf = []Range { Range{0x00bb, 0x2019, 8030}, Range{0x201d, 0x203a, 29}, @@ -1020,8 +984,6 @@ var _Pf = []Range { Range{0x2e1d, 0x2e21, 4}, } -// Pe is the set of Unicode characters in category Pe. -var Pe = _Pe var _Pe = []Range { Range{0x0029, 0x005d, 52}, Range{0x007d, 0x0f3b, 3774}, @@ -1046,8 +1008,6 @@ var _Pe = []Range { Range{0xff5d, 0xff63, 3}, } -// Pd is the set of Unicode characters in category Pd. -var Pd = _Pd var _Pd = []Range { Range{0x002d, 0x058a, 1373}, Range{0x05be, 0x1806, 4680}, @@ -1059,8 +1019,6 @@ var _Pd = []Range { Range{0xfe63, 0xff0d, 170}, } -// Pc is the set of Unicode characters in category Pc. -var Pc = _Pc var _Pc = []Range { Range{0x005f, 0x203f, 8160}, Range{0x2040, 0x2054, 20}, @@ -1069,8 +1027,6 @@ var _Pc = []Range { Range{0xff3f, 0xff3f, 1}, } -// Ps is the set of Unicode characters in category Ps. -var Ps = _Ps var _Ps = []Range { Range{0x0028, 0x005b, 51}, Range{0x007b, 0x0f3a, 3775}, @@ -1097,12 +1053,6 @@ var _Ps = []Range { Range{0xff62, 0xff62, 1}, } - -// Digit is the set of Unicode characters with the "decimal digit" property. -var Digit = Nd - -// Nd is the set of Unicode characters in category Nd. -var Nd = _Nd var _Nd = []Range { Range{0x0030, 0x0039, 1}, Range{0x0660, 0x0669, 1}, @@ -1139,8 +1089,6 @@ var _Nd = []Range { Range{0x1d7ce, 0x1d7ff, 1}, } -// Nl is the set of Unicode characters in category Nl. -var Nl = _Nl var _Nl = []Range { Range{0x16ee, 0x16f0, 1}, Range{0x2160, 0x2182, 1}, @@ -1154,8 +1102,6 @@ var _Nl = []Range { Range{0x12400, 0x12462, 1}, } -// No is the set of Unicode characters in category No. -var No = _No var _No = []Range { Range{0x00b2, 0x00b3, 1}, Range{0x00b9, 0x00bc, 3}, @@ -1189,8 +1135,6 @@ var _No = []Range { Range{0x1d360, 0x1d371, 1}, } -// So is the set of Unicode characters in category So. -var So = _So var _So = []Range { Range{0x00a6, 0x00a7, 1}, Range{0x00a9, 0x00ae, 5}, @@ -1315,8 +1259,6 @@ var _So = []Range { Range{0x1f030, 0x1f093, 1}, } -// Sm is the set of Unicode characters in category Sm. -var Sm = _Sm var _Sm = []Range { Range{0x002b, 0x003c, 17}, Range{0x003d, 0x003e, 1}, @@ -1370,8 +1312,6 @@ var _Sm = []Range { Range{0x1d7a9, 0x1d7c3, 26}, } -// Sk is the set of Unicode characters in category Sk. -var Sk = _Sk var _Sk = []Range { Range{0x005e, 0x0060, 2}, Range{0x00a8, 0x00af, 7}, @@ -1396,8 +1336,6 @@ var _Sk = []Range { Range{0xffe3, 0xffe3, 1}, } -// Sc is the set of Unicode characters in category Sc. -var Sc = _Sc var _Sc = []Range { Range{0x0024, 0x00a2, 126}, Range{0x00a3, 0x00a5, 1}, @@ -1412,12 +1350,6 @@ var _Sc = []Range { Range{0xffe6, 0xffe6, 1}, } - -// Upper is the set of Unicode upper case letters. -var Upper = Lu - -// Lu is the set of Unicode characters in category Lu. -var Lu = _Lu var _Lu = []Range { Range{0x0041, 0x005a, 1}, Range{0x00c0, 0x00d6, 1}, @@ -1545,12 +1477,6 @@ var _Lu = []Range { Range{0x1d7ca, 0x1d7ca, 1}, } - -// Title is the set of Unicode title case letters. -var Title = Lt - -// Lt is the set of Unicode characters in category Lt. -var Lt = _Lt var _Lt = []Range { Range{0x01c5, 0x01cb, 3}, Range{0x01f2, 0x1f88, 7574}, @@ -1561,8 +1487,6 @@ var _Lt = []Range { Range{0x1ffc, 0x1ffc, 1}, } -// Lo is the set of Unicode characters in category Lo. -var Lo = _Lo var _Lo = []Range { Range{0x01bb, 0x01c0, 5}, Range{0x01c1, 0x01c3, 1}, @@ -1838,6 +1762,43 @@ var _Lo = []Range { Range{0x2f800, 0x2fa1d, 1}, } +var ( + Cc = _Cc; // Cc is the set of Unicode characters in category Cc. + Cf = _Cf; // Cf is the set of Unicode characters in category Cf. + Co = _Co; // Co is the set of Unicode characters in category Co. + Cs = _Cs; // Cs is the set of Unicode characters in category Cs. + Digit = _Nd; // Digit is the set of Unicode characters with the "decimal digit" property. + Nd = _Nd; // Nd is the set of Unicode characters in category Nd. + Letter = letter; // Letter is the set of Unicode letters. + Lm = _Lm; // Lm is the set of Unicode characters in category Lm. + Lo = _Lo; // Lo is the set of Unicode characters in category Lo. + Lower = _Ll; // Lower is the set of Unicode lower case letters. + Ll = _Ll; // Ll is the set of Unicode characters in category Ll. + Mc = _Mc; // Mc is the set of Unicode characters in category Mc. + Me = _Me; // Me is the set of Unicode characters in category Me. + Mn = _Mn; // Mn is the set of Unicode characters in category Mn. + Nl = _Nl; // Nl is the set of Unicode characters in category Nl. + No = _No; // No is the set of Unicode characters in category No. + Pc = _Pc; // Pc is the set of Unicode characters in category Pc. + Pd = _Pd; // Pd is the set of Unicode characters in category Pd. + Pe = _Pe; // Pe is the set of Unicode characters in category Pe. + Pf = _Pf; // Pf is the set of Unicode characters in category Pf. + Pi = _Pi; // Pi is the set of Unicode characters in category Pi. + Po = _Po; // Po is the set of Unicode characters in category Po. + Ps = _Ps; // Ps is the set of Unicode characters in category Ps. + Sc = _Sc; // Sc is the set of Unicode characters in category Sc. + Sk = _Sk; // Sk is the set of Unicode characters in category Sk. + Sm = _Sm; // Sm is the set of Unicode characters in category Sm. + So = _So; // So is the set of Unicode characters in category So. + Title = _Lt; // Title is the set of Unicode title case letters. + Lt = _Lt; // Lt is the set of Unicode characters in category Lt. + Upper = _Lu; // Upper is the set of Unicode upper case letters. + Lu = _Lu; // Lu is the set of Unicode characters in category Lu. + Zl = _Zl; // Zl is the set of Unicode characters in category Zl. + Zp = _Zp; // Zp is the set of Unicode characters in category Zp. + Zs = _Zs; // Zs is the set of Unicode characters in category Zs. +) + // Generated by running // maketables --scripts=all --url=http://www.unicode.org/Public/5.1.0/ucd/ // DO NOT EDIT @@ -1923,8 +1884,6 @@ var Scripts = map[string] []Range { "Gothic": Gothic, } -// Katakana is the set of Unicode characters in script Katakana. -var Katakana = _Katakana var _Katakana = []Range { Range{0x30a1, 0x30fa, 1}, Range{0x30fd, 0x30ff, 1}, @@ -1935,8 +1894,6 @@ var _Katakana = []Range { Range{0xff71, 0xff9d, 1}, } -// Malayalam is the set of Unicode characters in script Malayalam. -var Malayalam = _Malayalam var _Malayalam = []Range { Range{0x0d02, 0x0d03, 1}, Range{0x0d05, 0x0d0c, 1}, @@ -1952,14 +1909,10 @@ var _Malayalam = []Range { Range{0x0d79, 0x0d7f, 1}, } -// Phags_Pa is the set of Unicode characters in script Phags_Pa. -var Phags_Pa = _Phags_Pa var _Phags_Pa = []Range { Range{0xa840, 0xa877, 1}, } -// Latin is the set of Unicode characters in script Latin. -var Latin = _Latin var _Latin = []Range { Range{0x0041, 0x005a, 1}, Range{0x0061, 0x007a, 1}, @@ -1992,15 +1945,11 @@ var _Latin = []Range { Range{0xff41, 0xff5a, 1}, } -// Osmanya is the set of Unicode characters in script Osmanya. -var Osmanya = _Osmanya var _Osmanya = []Range { Range{0x10480, 0x1049d, 1}, Range{0x104a0, 0x104a9, 1}, } -// Khmer is the set of Unicode characters in script Khmer. -var Khmer = _Khmer var _Khmer = []Range { Range{0x1780, 0x17dd, 1}, Range{0x17e0, 0x17e9, 1}, @@ -2008,8 +1957,6 @@ var _Khmer = []Range { Range{0x19e0, 0x19ff, 1}, } -// Inherited is the set of Unicode characters in script Inherited. -var Inherited = _Inherited var _Inherited = []Range { Range{0x0300, 0x036f, 1}, Range{0x064b, 0x0655, 1}, @@ -2031,8 +1978,6 @@ var _Inherited = []Range { Range{0xe0100, 0xe01ef, 1}, } -// Telugu is the set of Unicode characters in script Telugu. -var Telugu = _Telugu var _Telugu = []Range { Range{0x0c01, 0x0c03, 1}, Range{0x0c05, 0x0c0c, 1}, @@ -2050,21 +1995,15 @@ var _Telugu = []Range { Range{0x0c78, 0x0c7f, 1}, } -// Bopomofo is the set of Unicode characters in script Bopomofo. -var Bopomofo = _Bopomofo var _Bopomofo = []Range { Range{0x3105, 0x312d, 1}, Range{0x31a0, 0x31b7, 1}, } -// Kayah_Li is the set of Unicode characters in script Kayah_Li. -var Kayah_Li = _Kayah_Li var _Kayah_Li = []Range { Range{0xa900, 0xa92f, 1}, } -// New_Tai_Lue is the set of Unicode characters in script New_Tai_Lue. -var New_Tai_Lue = _New_Tai_Lue var _New_Tai_Lue = []Range { Range{0x1980, 0x19a9, 1}, Range{0x19b0, 0x19c9, 1}, @@ -2072,15 +2011,11 @@ var _New_Tai_Lue = []Range { Range{0x19de, 0x19df, 1}, } -// Tai_Le is the set of Unicode characters in script Tai_Le. -var Tai_Le = _Tai_Le var _Tai_Le = []Range { Range{0x1950, 0x196d, 1}, Range{0x1970, 0x1974, 1}, } -// Kharoshthi is the set of Unicode characters in script Kharoshthi. -var Kharoshthi = _Kharoshthi var _Kharoshthi = []Range { Range{0x10a00, 0x10a03, 1}, Range{0x10a05, 0x10a06, 1}, @@ -2092,8 +2027,6 @@ var _Kharoshthi = []Range { Range{0x10a50, 0x10a58, 1}, } -// Common is the set of Unicode characters in script Common. -var Common = _Common var _Common = []Range { Range{0x0000, 0x0040, 1}, Range{0x005b, 0x0060, 1}, @@ -2234,8 +2167,6 @@ var _Common = []Range { Range{0xe0020, 0xe007f, 1}, } -// Kannada is the set of Unicode characters in script Kannada. -var Kannada = _Kannada var _Kannada = []Range { Range{0x0c82, 0x0c83, 1}, Range{0x0c85, 0x0c8c, 1}, @@ -2252,8 +2183,6 @@ var _Kannada = []Range { Range{0x0ce6, 0x0cef, 1}, } -// Tamil is the set of Unicode characters in script Tamil. -var Tamil = _Tamil var _Tamil = []Range { Range{0x0b82, 0x0b83, 1}, Range{0x0b85, 0x0b8a, 1}, @@ -2273,15 +2202,11 @@ var _Tamil = []Range { Range{0x0be6, 0x0bfa, 1}, } -// Tagalog is the set of Unicode characters in script Tagalog. -var Tagalog = _Tagalog var _Tagalog = []Range { Range{0x1700, 0x170c, 1}, Range{0x170e, 0x1714, 1}, } -// Arabic is the set of Unicode characters in script Arabic. -var Arabic = _Arabic var _Arabic = []Range { Range{0x0606, 0x060b, 1}, Range{0x060d, 0x061a, 1}, @@ -2302,22 +2227,16 @@ var _Arabic = []Range { Range{0xfe76, 0xfefc, 1}, } -// Tagbanwa is the set of Unicode characters in script Tagbanwa. -var Tagbanwa = _Tagbanwa var _Tagbanwa = []Range { Range{0x1760, 0x176c, 1}, Range{0x176e, 0x1770, 1}, Range{0x1772, 0x1773, 1}, } -// Canadian_Aboriginal is the set of Unicode characters in script Canadian_Aboriginal. -var Canadian_Aboriginal = _Canadian_Aboriginal var _Canadian_Aboriginal = []Range { Range{0x1401, 0x1676, 1}, } -// Tibetan is the set of Unicode characters in script Tibetan. -var Tibetan = _Tibetan var _Tibetan = []Range { Range{0x0f00, 0x0f47, 1}, Range{0x0f49, 0x0f6c, 1}, @@ -2328,23 +2247,17 @@ var _Tibetan = []Range { Range{0x0fce, 0x0fd4, 1}, } -// Coptic is the set of Unicode characters in script Coptic. -var Coptic = _Coptic var _Coptic = []Range { Range{0x03e2, 0x03ef, 1}, Range{0x2c80, 0x2cea, 1}, Range{0x2cf9, 0x2cff, 1}, } -// Hiragana is the set of Unicode characters in script Hiragana. -var Hiragana = _Hiragana var _Hiragana = []Range { Range{0x3041, 0x3096, 1}, Range{0x309d, 0x309f, 1}, } -// Limbu is the set of Unicode characters in script Limbu. -var Limbu = _Limbu var _Limbu = []Range { Range{0x1900, 0x191c, 1}, Range{0x1920, 0x192b, 1}, @@ -2353,15 +2266,11 @@ var _Limbu = []Range { Range{0x1944, 0x194f, 1}, } -// Myanmar is the set of Unicode characters in script Myanmar. -var Myanmar = _Myanmar var _Myanmar = []Range { Range{0x1000, 0x1099, 1}, Range{0x109e, 0x109f, 1}, } -// Armenian is the set of Unicode characters in script Armenian. -var Armenian = _Armenian var _Armenian = []Range { Range{0x0531, 0x0556, 1}, Range{0x0559, 0x055f, 1}, @@ -2370,8 +2279,6 @@ var _Armenian = []Range { Range{0xfb13, 0xfb17, 1}, } -// Sinhala is the set of Unicode characters in script Sinhala. -var Sinhala = _Sinhala var _Sinhala = []Range { Range{0x0d82, 0x0d83, 1}, Range{0x0d85, 0x0d96, 1}, @@ -2386,8 +2293,6 @@ var _Sinhala = []Range { Range{0x0df2, 0x0df4, 1}, } -// Bengali is the set of Unicode characters in script Bengali. -var Bengali = _Bengali var _Bengali = []Range { Range{0x0981, 0x0983, 1}, Range{0x0985, 0x098c, 1}, @@ -2405,8 +2310,6 @@ var _Bengali = []Range { Range{0x09e6, 0x09fa, 1}, } -// Greek is the set of Unicode characters in script Greek. -var Greek = _Greek var _Greek = []Range { Range{0x0370, 0x0373, 1}, Range{0x0375, 0x0377, 1}, @@ -2443,8 +2346,6 @@ var _Greek = []Range { Range{0x1d200, 0x1d245, 1}, } -// Cham is the set of Unicode characters in script Cham. -var Cham = _Cham var _Cham = []Range { Range{0xaa00, 0xaa36, 1}, Range{0xaa40, 0xaa4d, 1}, @@ -2452,8 +2353,6 @@ var _Cham = []Range { Range{0xaa5c, 0xaa5f, 1}, } -// Hebrew is the set of Unicode characters in script Hebrew. -var Hebrew = _Hebrew var _Hebrew = []Range { Range{0x0591, 0x05c7, 1}, Range{0x05d0, 0x05ea, 1}, @@ -2466,15 +2365,11 @@ var _Hebrew = []Range { Range{0xfb46, 0xfb4f, 1}, } -// Saurashtra is the set of Unicode characters in script Saurashtra. -var Saurashtra = _Saurashtra var _Saurashtra = []Range { Range{0xa880, 0xa8c4, 1}, Range{0xa8ce, 0xa8d9, 1}, } -// Hangul is the set of Unicode characters in script Hangul. -var Hangul = _Hangul var _Hangul = []Range { Range{0x1100, 0x1159, 1}, Range{0x115f, 0x11a2, 1}, @@ -2490,35 +2385,25 @@ var _Hangul = []Range { Range{0xffda, 0xffdc, 1}, } -// Runic is the set of Unicode characters in script Runic. -var Runic = _Runic var _Runic = []Range { Range{0x16a0, 0x16ea, 1}, Range{0x16ee, 0x16f0, 1}, } -// Deseret is the set of Unicode characters in script Deseret. -var Deseret = _Deseret var _Deseret = []Range { Range{0x10400, 0x1044f, 1}, } -// Sundanese is the set of Unicode characters in script Sundanese. -var Sundanese = _Sundanese var _Sundanese = []Range { Range{0x1b80, 0x1baa, 1}, Range{0x1bae, 0x1bb9, 1}, } -// Glagolitic is the set of Unicode characters in script Glagolitic. -var Glagolitic = _Glagolitic var _Glagolitic = []Range { Range{0x2c00, 0x2c2e, 1}, Range{0x2c30, 0x2c5e, 1}, } -// Oriya is the set of Unicode characters in script Oriya. -var Oriya = _Oriya var _Oriya = []Range { Range{0x0b01, 0x0b03, 1}, Range{0x0b05, 0x0b0c, 1}, @@ -2536,14 +2421,10 @@ var _Oriya = []Range { Range{0x0b66, 0x0b71, 1}, } -// Buhid is the set of Unicode characters in script Buhid. -var Buhid = _Buhid var _Buhid = []Range { Range{0x1740, 0x1753, 1}, } -// Ethiopic is the set of Unicode characters in script Ethiopic. -var Ethiopic = _Ethiopic var _Ethiopic = []Range { Range{0x1200, 0x1248, 1}, Range{0x124a, 0x124d, 1}, @@ -2574,40 +2455,28 @@ var _Ethiopic = []Range { Range{0x2dd8, 0x2dde, 1}, } -// Syloti_Nagri is the set of Unicode characters in script Syloti_Nagri. -var Syloti_Nagri = _Syloti_Nagri var _Syloti_Nagri = []Range { Range{0xa800, 0xa82b, 1}, } -// Vai is the set of Unicode characters in script Vai. -var Vai = _Vai var _Vai = []Range { Range{0xa500, 0xa62b, 1}, } -// Cherokee is the set of Unicode characters in script Cherokee. -var Cherokee = _Cherokee var _Cherokee = []Range { Range{0x13a0, 0x13f4, 1}, } -// Ogham is the set of Unicode characters in script Ogham. -var Ogham = _Ogham var _Ogham = []Range { Range{0x1680, 0x169c, 1}, } -// Syriac is the set of Unicode characters in script Syriac. -var Syriac = _Syriac var _Syriac = []Range { Range{0x0700, 0x070d, 1}, Range{0x070f, 0x074a, 1}, Range{0x074d, 0x074f, 1}, } -// Gurmukhi is the set of Unicode characters in script Gurmukhi. -var Gurmukhi = _Gurmukhi var _Gurmukhi = []Range { Range{0x0a01, 0x0a03, 1}, Range{0x0a05, 0x0a0a, 1}, @@ -2627,14 +2496,10 @@ var _Gurmukhi = []Range { Range{0x0a66, 0x0a75, 1}, } -// Ol_Chiki is the set of Unicode characters in script Ol_Chiki. -var Ol_Chiki = _Ol_Chiki var _Ol_Chiki = []Range { Range{0x1c50, 0x1c7f, 1}, } -// Mongolian is the set of Unicode characters in script Mongolian. -var Mongolian = _Mongolian var _Mongolian = []Range { Range{0x1800, 0x1801, 1}, Range{0x1804, 0x1804, 1}, @@ -2644,14 +2509,10 @@ var _Mongolian = []Range { Range{0x1880, 0x18aa, 1}, } -// Hanunoo is the set of Unicode characters in script Hanunoo. -var Hanunoo = _Hanunoo var _Hanunoo = []Range { Range{0x1720, 0x1734, 1}, } -// Cypriot is the set of Unicode characters in script Cypriot. -var Cypriot = _Cypriot var _Cypriot = []Range { Range{0x10800, 0x10805, 1}, Range{0x10808, 0x10808, 1}, @@ -2661,51 +2522,37 @@ var _Cypriot = []Range { Range{0x1083f, 0x1083f, 1}, } -// Buginese is the set of Unicode characters in script Buginese. -var Buginese = _Buginese var _Buginese = []Range { Range{0x1a00, 0x1a1b, 1}, Range{0x1a1e, 0x1a1f, 1}, } -// Lepcha is the set of Unicode characters in script Lepcha. -var Lepcha = _Lepcha var _Lepcha = []Range { Range{0x1c00, 0x1c37, 1}, Range{0x1c3b, 0x1c49, 1}, Range{0x1c4d, 0x1c4f, 1}, } -// Thaana is the set of Unicode characters in script Thaana. -var Thaana = _Thaana var _Thaana = []Range { Range{0x0780, 0x07b1, 1}, } -// Old_Persian is the set of Unicode characters in script Old_Persian. -var Old_Persian = _Old_Persian var _Old_Persian = []Range { Range{0x103a0, 0x103c3, 1}, Range{0x103c8, 0x103d5, 1}, } -// Cuneiform is the set of Unicode characters in script Cuneiform. -var Cuneiform = _Cuneiform var _Cuneiform = []Range { Range{0x12000, 0x1236e, 1}, Range{0x12400, 0x12462, 1}, Range{0x12470, 0x12473, 1}, } -// Rejang is the set of Unicode characters in script Rejang. -var Rejang = _Rejang var _Rejang = []Range { Range{0xa930, 0xa953, 1}, Range{0xa95f, 0xa95f, 1}, } -// Georgian is the set of Unicode characters in script Georgian. -var Georgian = _Georgian var _Georgian = []Range { Range{0x10a0, 0x10c5, 1}, Range{0x10d0, 0x10fa, 1}, @@ -2713,33 +2560,23 @@ var _Georgian = []Range { Range{0x2d00, 0x2d25, 1}, } -// Shavian is the set of Unicode characters in script Shavian. -var Shavian = _Shavian var _Shavian = []Range { Range{0x10450, 0x1047f, 1}, } -// Lycian is the set of Unicode characters in script Lycian. -var Lycian = _Lycian var _Lycian = []Range { Range{0x10280, 0x1029c, 1}, } -// Nko is the set of Unicode characters in script Nko. -var Nko = _Nko var _Nko = []Range { Range{0x07c0, 0x07fa, 1}, } -// Yi is the set of Unicode characters in script Yi. -var Yi = _Yi var _Yi = []Range { Range{0xa000, 0xa48c, 1}, Range{0xa490, 0xa4c6, 1}, } -// Lao is the set of Unicode characters in script Lao. -var Lao = _Lao var _Lao = []Range { Range{0x0e81, 0x0e82, 1}, Range{0x0e84, 0x0e84, 1}, @@ -2761,8 +2598,6 @@ var _Lao = []Range { Range{0x0edc, 0x0edd, 1}, } -// Linear_B is the set of Unicode characters in script Linear_B. -var Linear_B = _Linear_B var _Linear_B = []Range { Range{0x10000, 0x1000b, 1}, Range{0x1000d, 0x10026, 1}, @@ -2773,15 +2608,11 @@ var _Linear_B = []Range { Range{0x10080, 0x100fa, 1}, } -// Old_Italic is the set of Unicode characters in script Old_Italic. -var Old_Italic = _Old_Italic var _Old_Italic = []Range { Range{0x10300, 0x1031e, 1}, Range{0x10320, 0x10323, 1}, } -// Devanagari is the set of Unicode characters in script Devanagari. -var Devanagari = _Devanagari var _Devanagari = []Range { Range{0x0901, 0x0939, 1}, Range{0x093c, 0x094d, 1}, @@ -2793,36 +2624,26 @@ var _Devanagari = []Range { Range{0x097b, 0x097f, 1}, } -// Lydian is the set of Unicode characters in script Lydian. -var Lydian = _Lydian var _Lydian = []Range { Range{0x10920, 0x10939, 1}, Range{0x1093f, 0x1093f, 1}, } -// Tifinagh is the set of Unicode characters in script Tifinagh. -var Tifinagh = _Tifinagh var _Tifinagh = []Range { Range{0x2d30, 0x2d65, 1}, Range{0x2d6f, 0x2d6f, 1}, } -// Ugaritic is the set of Unicode characters in script Ugaritic. -var Ugaritic = _Ugaritic var _Ugaritic = []Range { Range{0x10380, 0x1039d, 1}, Range{0x1039f, 0x1039f, 1}, } -// Thai is the set of Unicode characters in script Thai. -var Thai = _Thai var _Thai = []Range { Range{0x0e01, 0x0e3a, 1}, Range{0x0e40, 0x0e5b, 1}, } -// Cyrillic is the set of Unicode characters in script Cyrillic. -var Cyrillic = _Cyrillic var _Cyrillic = []Range { Range{0x0400, 0x0523, 1}, Range{0x1d2b, 0x1d2b, 1}, @@ -2833,8 +2654,6 @@ var _Cyrillic = []Range { Range{0xa67c, 0xa697, 1}, } -// Gujarati is the set of Unicode characters in script Gujarati. -var Gujarati = _Gujarati var _Gujarati = []Range { Range{0x0a81, 0x0a83, 1}, Range{0x0a85, 0x0a8d, 1}, @@ -2852,34 +2671,24 @@ var _Gujarati = []Range { Range{0x0af1, 0x0af1, 1}, } -// Carian is the set of Unicode characters in script Carian. -var Carian = _Carian var _Carian = []Range { Range{0x102a0, 0x102d0, 1}, } -// Phoenician is the set of Unicode characters in script Phoenician. -var Phoenician = _Phoenician var _Phoenician = []Range { Range{0x10900, 0x10919, 1}, Range{0x1091f, 0x1091f, 1}, } -// Balinese is the set of Unicode characters in script Balinese. -var Balinese = _Balinese var _Balinese = []Range { Range{0x1b00, 0x1b4b, 1}, Range{0x1b50, 0x1b7c, 1}, } -// Braille is the set of Unicode characters in script Braille. -var Braille = _Braille var _Braille = []Range { Range{0x2800, 0x28ff, 1}, } -// Han is the set of Unicode characters in script Han. -var Han = _Han var _Han = []Range { Range{0x2e80, 0x2e99, 1}, Range{0x2e9b, 0x2ef3, 1}, @@ -2897,9 +2706,87 @@ var _Han = []Range { Range{0x2f800, 0x2fa1d, 1}, } -// Gothic is the set of Unicode characters in script Gothic. -var Gothic = _Gothic var _Gothic = []Range { Range{0x10330, 0x1034a, 1}, } +var ( + Arabic = _Arabic; // Arabic is the set of Unicode characters in script Arabic. + Armenian = _Armenian; // Armenian is the set of Unicode characters in script Armenian. + Balinese = _Balinese; // Balinese is the set of Unicode characters in script Balinese. + Bengali = _Bengali; // Bengali is the set of Unicode characters in script Bengali. + Bopomofo = _Bopomofo; // Bopomofo is the set of Unicode characters in script Bopomofo. + Braille = _Braille; // Braille is the set of Unicode characters in script Braille. + Buginese = _Buginese; // Buginese is the set of Unicode characters in script Buginese. + Buhid = _Buhid; // Buhid is the set of Unicode characters in script Buhid. + Canadian_Aboriginal = _Canadian_Aboriginal; // Canadian_Aboriginal is the set of Unicode characters in script Canadian_Aboriginal. + Carian = _Carian; // Carian is the set of Unicode characters in script Carian. + Cham = _Cham; // Cham is the set of Unicode characters in script Cham. + Cherokee = _Cherokee; // Cherokee is the set of Unicode characters in script Cherokee. + Common = _Common; // Common is the set of Unicode characters in script Common. + Coptic = _Coptic; // Coptic is the set of Unicode characters in script Coptic. + Cuneiform = _Cuneiform; // Cuneiform is the set of Unicode characters in script Cuneiform. + Cypriot = _Cypriot; // Cypriot is the set of Unicode characters in script Cypriot. + Cyrillic = _Cyrillic; // Cyrillic is the set of Unicode characters in script Cyrillic. + Deseret = _Deseret; // Deseret is the set of Unicode characters in script Deseret. + Devanagari = _Devanagari; // Devanagari is the set of Unicode characters in script Devanagari. + Ethiopic = _Ethiopic; // Ethiopic is the set of Unicode characters in script Ethiopic. + Georgian = _Georgian; // Georgian is the set of Unicode characters in script Georgian. + Glagolitic = _Glagolitic; // Glagolitic is the set of Unicode characters in script Glagolitic. + Gothic = _Gothic; // Gothic is the set of Unicode characters in script Gothic. + Greek = _Greek; // Greek is the set of Unicode characters in script Greek. + Gujarati = _Gujarati; // Gujarati is the set of Unicode characters in script Gujarati. + Gurmukhi = _Gurmukhi; // Gurmukhi is the set of Unicode characters in script Gurmukhi. + Han = _Han; // Han is the set of Unicode characters in script Han. + Hangul = _Hangul; // Hangul is the set of Unicode characters in script Hangul. + Hanunoo = _Hanunoo; // Hanunoo is the set of Unicode characters in script Hanunoo. + Hebrew = _Hebrew; // Hebrew is the set of Unicode characters in script Hebrew. + Hiragana = _Hiragana; // Hiragana is the set of Unicode characters in script Hiragana. + Inherited = _Inherited; // Inherited is the set of Unicode characters in script Inherited. + Kannada = _Kannada; // Kannada is the set of Unicode characters in script Kannada. + Katakana = _Katakana; // Katakana is the set of Unicode characters in script Katakana. + Kayah_Li = _Kayah_Li; // Kayah_Li is the set of Unicode characters in script Kayah_Li. + Kharoshthi = _Kharoshthi; // Kharoshthi is the set of Unicode characters in script Kharoshthi. + Khmer = _Khmer; // Khmer is the set of Unicode characters in script Khmer. + Lao = _Lao; // Lao is the set of Unicode characters in script Lao. + Latin = _Latin; // Latin is the set of Unicode characters in script Latin. + Lepcha = _Lepcha; // Lepcha is the set of Unicode characters in script Lepcha. + Limbu = _Limbu; // Limbu is the set of Unicode characters in script Limbu. + Linear_B = _Linear_B; // Linear_B is the set of Unicode characters in script Linear_B. + Lycian = _Lycian; // Lycian is the set of Unicode characters in script Lycian. + Lydian = _Lydian; // Lydian is the set of Unicode characters in script Lydian. + Malayalam = _Malayalam; // Malayalam is the set of Unicode characters in script Malayalam. + Mongolian = _Mongolian; // Mongolian is the set of Unicode characters in script Mongolian. + Myanmar = _Myanmar; // Myanmar is the set of Unicode characters in script Myanmar. + New_Tai_Lue = _New_Tai_Lue; // New_Tai_Lue is the set of Unicode characters in script New_Tai_Lue. + Nko = _Nko; // Nko is the set of Unicode characters in script Nko. + Ogham = _Ogham; // Ogham is the set of Unicode characters in script Ogham. + Ol_Chiki = _Ol_Chiki; // Ol_Chiki is the set of Unicode characters in script Ol_Chiki. + Old_Italic = _Old_Italic; // Old_Italic is the set of Unicode characters in script Old_Italic. + Old_Persian = _Old_Persian; // Old_Persian is the set of Unicode characters in script Old_Persian. + Oriya = _Oriya; // Oriya is the set of Unicode characters in script Oriya. + Osmanya = _Osmanya; // Osmanya is the set of Unicode characters in script Osmanya. + Phags_Pa = _Phags_Pa; // Phags_Pa is the set of Unicode characters in script Phags_Pa. + Phoenician = _Phoenician; // Phoenician is the set of Unicode characters in script Phoenician. + Rejang = _Rejang; // Rejang is the set of Unicode characters in script Rejang. + Runic = _Runic; // Runic is the set of Unicode characters in script Runic. + Saurashtra = _Saurashtra; // Saurashtra is the set of Unicode characters in script Saurashtra. + Shavian = _Shavian; // Shavian is the set of Unicode characters in script Shavian. + Sinhala = _Sinhala; // Sinhala is the set of Unicode characters in script Sinhala. + Sundanese = _Sundanese; // Sundanese is the set of Unicode characters in script Sundanese. + Syloti_Nagri = _Syloti_Nagri; // Syloti_Nagri is the set of Unicode characters in script Syloti_Nagri. + Syriac = _Syriac; // Syriac is the set of Unicode characters in script Syriac. + Tagalog = _Tagalog; // Tagalog is the set of Unicode characters in script Tagalog. + Tagbanwa = _Tagbanwa; // Tagbanwa is the set of Unicode characters in script Tagbanwa. + Tai_Le = _Tai_Le; // Tai_Le is the set of Unicode characters in script Tai_Le. + Tamil = _Tamil; // Tamil is the set of Unicode characters in script Tamil. + Telugu = _Telugu; // Telugu is the set of Unicode characters in script Telugu. + Thaana = _Thaana; // Thaana is the set of Unicode characters in script Thaana. + Thai = _Thai; // Thai is the set of Unicode characters in script Thai. + Tibetan = _Tibetan; // Tibetan is the set of Unicode characters in script Tibetan. + Tifinagh = _Tifinagh; // Tifinagh is the set of Unicode characters in script Tifinagh. + Ugaritic = _Ugaritic; // Ugaritic is the set of Unicode characters in script Ugaritic. + Vai = _Vai; // Vai is the set of Unicode characters in script Vai. + Yi = _Yi; // Yi is the set of Unicode characters in script Yi. +) + |