diff options
Diffstat (limited to 'src/pkg/unicode')
| -rw-r--r-- | src/pkg/unicode/digit.go | 4 | ||||
| -rw-r--r-- | src/pkg/unicode/digit_test.go | 4 | ||||
| -rw-r--r-- | src/pkg/unicode/letter.go | 104 | ||||
| -rw-r--r-- | src/pkg/unicode/letter_test.go | 16 | ||||
| -rw-r--r-- | src/pkg/unicode/maketables.go | 456 | ||||
| -rw-r--r-- | src/pkg/unicode/script_test.go | 22 | ||||
| -rw-r--r-- | src/pkg/unicode/tables.go | 316 |
7 files changed, 461 insertions, 461 deletions
diff --git a/src/pkg/unicode/digit.go b/src/pkg/unicode/digit.go index b12ada2d6..471c4dfdc 100644 --- a/src/pkg/unicode/digit.go +++ b/src/pkg/unicode/digit.go @@ -6,8 +6,8 @@ package unicode // IsDigit reports whether the rune is a decimal digit. func IsDigit(rune int) bool { - if rune < 0x100 { // quick ASCII (Latin-1, really) check + if rune < 0x100 { // quick ASCII (Latin-1, really) check return '0' <= rune && rune <= '9' } - return Is(Digit, rune); + return Is(Digit, rune) } diff --git a/src/pkg/unicode/digit_test.go b/src/pkg/unicode/digit_test.go index 3031eafc8..57a625b02 100644 --- a/src/pkg/unicode/digit_test.go +++ b/src/pkg/unicode/digit_test.go @@ -5,8 +5,8 @@ package unicode_test import ( - "testing"; - . "unicode"; + "testing" + . "unicode" ) var testDigit = []int{ diff --git a/src/pkg/unicode/letter.go b/src/pkg/unicode/letter.go index b249f2389..8020cd0cf 100644 --- a/src/pkg/unicode/letter.go +++ b/src/pkg/unicode/letter.go @@ -6,17 +6,17 @@ package unicode const ( - MaxRune = 0x10FFFF; // Maximum valid Unicode code point. - ReplacementChar = 0xFFFD; // Represents invalid code points. + MaxRune = 0x10FFFF // Maximum valid Unicode code point. + ReplacementChar = 0xFFFD // Represents invalid code points. ) // The representation of a range of Unicode code points. The range runs from Lo to Hi // inclusive and has the specified stride. type Range struct { - Lo int; - Hi int; - Stride int; + Lo int + Hi int + Stride int } // The representation of a range of Unicode code points for case conversion. @@ -29,26 +29,26 @@ type Range struct { // {UpperLower, UpperLower, UpperLower} // The constant UpperLower has an otherwise impossible delta value. type CaseRange struct { - Lo int; - Hi int; - Delta d; + Lo int + Hi int + Delta d } // Indices into the Delta arrays inside CaseRanges for case mapping. const ( - UpperCase = iota; - LowerCase; - TitleCase; - MaxCase; + UpperCase = iota + LowerCase + TitleCase + MaxCase ) -type d [MaxCase]int32 // to make the CaseRanges text shorter +type d [MaxCase]int32 // to make the CaseRanges text shorter // If the Delta field of a CaseRange is UpperLower or LowerUpper, it means // this CaseRange represents a sequence of the form (say) // Upper Lower Upper Lower. const ( - UpperLower = MaxRune + 1; // (Cannot be a valid delta.) + UpperLower = MaxRune + 1 // (Cannot be a valid delta.) ) // Is tests whether rune is in the specified table of ranges. @@ -62,17 +62,17 @@ func Is(ranges []Range, rune int) bool { if rune < r.Lo { return false } - return (rune-r.Lo)%r.Stride == 0; + return (rune-r.Lo)%r.Stride == 0 } - return false; + return false } // binary search over ranges - lo := 0; - hi := len(ranges); + lo := 0 + hi := len(ranges) for lo < hi { - m := lo + (hi-lo)/2; - r := ranges[m]; + m := lo + (hi-lo)/2 + r := ranges[m] if r.Lo <= rune && rune <= r.Hi { return (rune-r.Lo)%r.Stride == 0 } @@ -82,67 +82,67 @@ func Is(ranges []Range, rune int) bool { lo = m + 1 } } - return false; + return false } // IsUpper reports whether the rune is an upper case letter. func IsUpper(rune int) bool { - if rune < 0x80 { // quick ASCII check + if rune < 0x80 { // quick ASCII check return 'A' <= rune && rune <= 'Z' } - return Is(Upper, rune); + return Is(Upper, rune) } // IsLower reports whether the rune is a lower case letter. func IsLower(rune int) bool { - if rune < 0x80 { // quick ASCII check + if rune < 0x80 { // quick ASCII check return 'a' <= rune && rune <= 'z' } - return Is(Lower, rune); + return Is(Lower, rune) } // IsTitle reports whether the rune is a title case letter. func IsTitle(rune int) bool { - if rune < 0x80 { // quick ASCII check + if rune < 0x80 { // quick ASCII check return false } - return Is(Title, rune); + return Is(Title, rune) } // IsLetter reports whether the rune is a letter. func IsLetter(rune int) bool { - if rune < 0x80 { // quick ASCII check - rune &^= 'a' - 'A'; - return 'A' <= rune && rune <= 'Z'; + if rune < 0x80 { // quick ASCII check + rune &^= 'a' - 'A' + return 'A' <= rune && rune <= 'Z' } - return Is(Letter, rune); + return Is(Letter, rune) } // IsSpace reports whether the rune is a white space character. func IsSpace(rune int) bool { - if rune <= 0xFF { // quick Latin-1 check + if rune <= 0xFF { // quick Latin-1 check switch rune { case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0: return true } - return false; + return false } - return Is(White_Space, rune); + return Is(White_Space, rune) } // To maps the rune to the specified case: UpperCase, LowerCase, or TitleCase func To(_case int, rune int) int { if _case < 0 || MaxCase <= _case { - return ReplacementChar // as reasonable an error as any + return ReplacementChar // as reasonable an error as any } // binary search over ranges - lo := 0; - hi := len(CaseRanges); + lo := 0 + hi := len(CaseRanges) for lo < hi { - m := lo + (hi-lo)/2; - r := CaseRanges[m]; + m := lo + (hi-lo)/2 + r := CaseRanges[m] if r.Lo <= rune && rune <= r.Hi { - delta := int(r.Delta[_case]); + delta := int(r.Delta[_case]) if delta > MaxRune { // In an Upper-Lower sequence, which always starts with // an UpperCase letter, the real deltas always look like: @@ -156,7 +156,7 @@ func To(_case int, rune int) int { // is odd so we take the low bit from _case. return r.Lo + ((rune-r.Lo)&^1 | _case&1) } - return rune + delta; + return rune + delta } if rune < r.Lo { hi = m @@ -164,38 +164,38 @@ func To(_case int, rune int) int { lo = m + 1 } } - return rune; + return rune } // ToUpper maps the rune to upper case func ToUpper(rune int) int { - if rune < 0x80 { // quick ASCII check + if rune < 0x80 { // quick ASCII check if 'a' <= rune && rune <= 'z' { rune -= 'a' - 'A' } - return rune; + return rune } - return To(UpperCase, rune); + return To(UpperCase, rune) } // ToLower maps the rune to lower case func ToLower(rune int) int { - if rune < 0x80 { // quick ASCII check + if rune < 0x80 { // quick ASCII check if 'A' <= rune && rune <= 'Z' { rune += 'a' - 'A' } - return rune; + return rune } - return To(LowerCase, rune); + return To(LowerCase, rune) } // ToTitle maps the rune to title case func ToTitle(rune int) int { - if rune < 0x80 { // quick ASCII check - if 'a' <= rune && rune <= 'z' { // title case is upper case for ASCII + if rune < 0x80 { // quick ASCII check + if 'a' <= rune && rune <= 'z' { // title case is upper case for ASCII rune -= 'a' - 'A' } - return rune; + return rune } - return To(TitleCase, rune); + return To(TitleCase, rune) } diff --git a/src/pkg/unicode/letter_test.go b/src/pkg/unicode/letter_test.go index 8e4f5373e..f39fced66 100644 --- a/src/pkg/unicode/letter_test.go +++ b/src/pkg/unicode/letter_test.go @@ -5,8 +5,8 @@ package unicode_test import ( - "testing"; - . "unicode"; + "testing" + . "unicode" ) var upperTest = []int{ @@ -107,7 +107,7 @@ var spaceTest = []int{ } type caseT struct { - cas, in, out int; + cas, in, out int } var caseTest = []caseT{ @@ -258,12 +258,12 @@ func caseString(c int) string { case TitleCase: return "TitleCase" } - return "ErrorCase"; + return "ErrorCase" } func TestTo(t *testing.T) { for _, c := range caseTest { - r := To(c.cas, c.in); + r := To(c.cas, c.in) if c.out != r { t.Errorf("To(U+%04X, %s) = U+%04X want U+%04X\n", c.in, caseString(c.cas), r, c.out) } @@ -275,7 +275,7 @@ func TestToUpperCase(t *testing.T) { if c.cas != UpperCase { continue } - r := ToUpper(c.in); + r := ToUpper(c.in) if c.out != r { t.Errorf("ToUpper(U+%04X) = U+%04X want U+%04X\n", c.in, r, c.out) } @@ -287,7 +287,7 @@ func TestToLowerCase(t *testing.T) { if c.cas != LowerCase { continue } - r := ToLower(c.in); + r := ToLower(c.in) if c.out != r { t.Errorf("ToLower(U+%04X) = U+%04X want U+%04X\n", c.in, r, c.out) } @@ -299,7 +299,7 @@ func TestToTitleCase(t *testing.T) { if c.cas != TitleCase { continue } - r := ToTitle(c.in); + r := ToTitle(c.in) if c.out != r { t.Errorf("ToTitle(U+%04X) = U+%04X want U+%04X\n", c.in, r, c.out) } diff --git a/src/pkg/unicode/maketables.go b/src/pkg/unicode/maketables.go index 486742e64..75d16418e 100644 --- a/src/pkg/unicode/maketables.go +++ b/src/pkg/unicode/maketables.go @@ -8,26 +8,26 @@ package main import ( - "bufio"; - "flag"; - "fmt"; - "http"; - "log"; - "os"; - "sort"; - "strconv"; - "strings"; - "regexp"; - "unicode"; + "bufio" + "flag" + "fmt" + "http" + "log" + "os" + "sort" + "strconv" + "strings" + "regexp" + "unicode" ) func main() { - flag.Parse(); - loadChars(); // always needed - printCategories(); - printScriptOrProperty(false); - printScriptOrProperty(true); - printCases(); + flag.Parse() + loadChars() // always needed + printCategories() + printScriptOrProperty(false) + printScriptOrProperty(true) + printCases() } var dataURL = flag.String("data", "", "full URL for UnicodeData.txt; defaults to --url/UnicodeData.txt") @@ -53,7 +53,7 @@ var test = flag.Bool("test", var scriptRe = regexp.MustCompile(`([0-9A-F]+)(\.\.[0-9A-F]+)? *; ([A-Za-z_]+)`) var die = log.New(os.Stderr, nil, "", log.Lexit|log.Lshortfile) -var category = map[string]bool{"letter": true} // Nd Lu etc. letter is a special case +var category = map[string]bool{"letter": true} // Nd Lu etc. letter is a special case // UnicodeData.txt has form: // 0037;DIGIT SEVEN;Nd;0;EN;;7;7;7;N;;;;; @@ -61,24 +61,24 @@ var category = map[string]bool{"letter": true} // Nd Lu etc. letter is a special // See http://www.unicode.org/Public/5.1.0/ucd/UCD.html for full explanation // The fields: const ( - FCodePoint = iota; - FName; - FGeneralCategory; - FCanonicalCombiningClass; - FBidiClass; - FDecompositionType; - FDecompositionMapping; - FNumericType; - FNumericValue; - FBidiMirrored; - FUnicode1Name; - FISOComment; - FSimpleUppercaseMapping; - FSimpleLowercaseMapping; - FSimpleTitlecaseMapping; - NumField; - - MaxChar = 0x10FFFF; // anything above this shouldn't exist + FCodePoint = iota + FName + FGeneralCategory + FCanonicalCombiningClass + FBidiClass + FDecompositionType + FDecompositionMapping + FNumericType + FNumericValue + FBidiMirrored + FUnicode1Name + FISOComment + FSimpleUppercaseMapping + FSimpleLowercaseMapping + FSimpleTitlecaseMapping + NumField + + MaxChar = 0x10FFFF // anything above this shouldn't exist ) var fieldName = []string{ @@ -101,12 +101,12 @@ var fieldName = []string{ // This contains only the properties we're interested in. type Char struct { - field []string; // debugging only; could be deleted if we take out char.dump() - codePoint uint32; // if zero, this index is not a valid code point. - category string; - upperCase int; - lowerCase int; - titleCase int; + field []string // debugging only; could be deleted if we take out char.dump() + codePoint uint32 // if zero, this index is not a valid code point. + category string + upperCase int + lowerCase int + titleCase int } // Scripts.txt has form: @@ -115,13 +115,13 @@ type Char struct { // See http://www.unicode.org/Public/5.1.0/ucd/UCD.html for full explanation type Script struct { - lo, hi uint32; // range of code points - script string; + lo, hi uint32 // range of code points + script string } var chars = make([]Char, MaxChar+1) var scripts = make(map[string][]Script) -var props = make(map[string][]Script) // a property looks like a script; can share the format +var props = make(map[string][]Script) // a property looks like a script; can share the format var lastChar uint32 = 0 @@ -132,40 +132,40 @@ var lastChar uint32 = 0 type State int const ( - SNormal State = iota; // known to be zero for the type - SFirst; - SLast; - SMissing; + SNormal State = iota // known to be zero for the type + SFirst + SLast + SMissing ) func parseCategory(line string) (state State) { - field := strings.Split(line, ";", -1); + field := strings.Split(line, ";", -1) if len(field) != NumField { die.Logf("%5s: %d fields (expected %d)\n", line, len(field), NumField) } - point, err := strconv.Btoui64(field[FCodePoint], 16); + point, err := strconv.Btoui64(field[FCodePoint], 16) if err != nil { die.Log("%.5s...:", err) } - lastChar = uint32(point); + lastChar = uint32(point) if point == 0 { - return // not interesting and we use 0 as unset + return // not interesting and we use 0 as unset } if point > MaxChar { return } - char := &chars[point]; - char.field = field; + char := &chars[point] + char.field = field if char.codePoint != 0 { die.Logf("point U+%04x reused\n") } - char.codePoint = lastChar; - char.category = field[FGeneralCategory]; - category[char.category] = true; + char.codePoint = lastChar + char.category = field[FGeneralCategory] + category[char.category] = true switch char.category { case "Nd": // Decimal digit - _, err := strconv.Atoi(field[FNumericValue]); + _, err := strconv.Atoi(field[FNumericValue]) if err != nil { die.Log("U+%04x: bad numeric field: %s", point, err) } @@ -184,66 +184,66 @@ func parseCategory(line string) (state State) { case strings.Index(field[FName], ", Last>") > 0: state = SLast } - return; + return } func (char *Char) dump(s string) { - fmt.Print(s, " "); + fmt.Print(s, " ") for i := 0; i < len(char.field); i++ { fmt.Printf("%s:%q ", fieldName[i], char.field[i]) } - fmt.Print("\n"); + fmt.Print("\n") } func (char *Char) letter(u, l, t string) { - char.upperCase = char.letterValue(u, "U"); - char.lowerCase = char.letterValue(l, "L"); - char.titleCase = char.letterValue(t, "T"); + char.upperCase = char.letterValue(u, "U") + char.lowerCase = char.letterValue(l, "L") + char.titleCase = char.letterValue(t, "T") } func (char *Char) letterValue(s string, cas string) int { if s == "" { return 0 } - v, err := strconv.Btoui64(s, 16); + v, err := strconv.Btoui64(s, 16) if err != nil { - char.dump(cas); - die.Logf("U+%04x: bad letter(%s): %s", char.codePoint, s, err); + char.dump(cas) + die.Logf("U+%04x: bad letter(%s): %s", char.codePoint, s, err) } - return int(v); + return int(v) } func allCategories() []string { - a := make([]string, len(category)); - i := 0; + a := make([]string, len(category)) + i := 0 for k := range category { - a[i] = k; - i++; + a[i] = k + i++ } - return a; + return a } func all(scripts map[string][]Script) []string { - a := make([]string, len(scripts)); - i := 0; + a := make([]string, len(scripts)) + i := 0 for k := range scripts { - a[i] = k; - i++; + a[i] = k + i++ } - return a; + return a } // Extract the version number from the URL func version() string { // Break on slashes and look for the first numeric field - fields := strings.Split(*url, "/", 0); + fields := strings.Split(*url, "/", 0) for _, f := range fields { if len(f) > 0 && '0' <= f[0] && f[0] <= '9' { return f } } - die.Log("unknown version"); - return "Unknown"; + die.Log("unknown version") + return "Unknown" } func letterOp(code int) bool { @@ -251,29 +251,29 @@ func letterOp(code int) bool { case "Lu", "Ll", "Lt", "Lm", "Lo": return true } - return false; + return false } func loadChars() { if *dataURL == "" { flag.Set("data", *url+"UnicodeData.txt") } - resp, _, err := http.Get(*dataURL); + resp, _, err := http.Get(*dataURL) if err != nil { die.Log(err) } if resp.StatusCode != 200 { die.Log("bad GET status for UnicodeData.txt", resp.Status) } - input := bufio.NewReader(resp.Body); - var first uint32 = 0; + input := bufio.NewReader(resp.Body) + var first uint32 = 0 for { - line, err := input.ReadString('\n'); + line, err := input.ReadString('\n') if err != nil { if err == os.EOF { break } - die.Log(err); + die.Log(err) } switch parseCategory(line[0 : len(line)-1]) { case SNormal: @@ -284,19 +284,19 @@ func loadChars() { if first != 0 { die.Logf("bad state first at U+%04X", lastChar) } - first = lastChar; + first = lastChar case SLast: if first == 0 { die.Logf("bad state last at U+%04X", lastChar) } for i := first + 1; i <= lastChar; i++ { - chars[i] = chars[first]; - chars[i].codePoint = i; + chars[i] = chars[first] + chars[i].codePoint = i } - first = 0; + first = 0 } } - resp.Body.Close(); + resp.Body.Close() } func printCategories() { @@ -304,13 +304,13 @@ func printCategories() { return } // Find out which categories to dump - list := strings.Split(*tablelist, ",", 0); + list := strings.Split(*tablelist, ",", 0) if *tablelist == "all" { list = allCategories() } if *test { - fullCategoryTest(list); - return; + fullCategoryTest(list) + return } fmt.Printf( "// Generated by running\n"+ @@ -318,22 +318,22 @@ func printCategories() { "// DO NOT EDIT\n\n"+ "package unicode\n\n", *tablelist, - *dataURL); + *dataURL) - fmt.Println("// Version is the Unicode edition from which the tables are derived."); - fmt.Printf("const Version = %q\n\n", version()); + fmt.Println("// Version is the Unicode edition from which the tables are derived.") + fmt.Printf("const Version = %q\n\n", version()) if *tablelist == "all" { - fmt.Println("// Categories is the set of Unicode data tables."); - fmt.Println("var Categories = map[string] []Range {"); + fmt.Println("// Categories is the set of Unicode data tables.") + fmt.Println("var Categories = map[string] []Range {") for k, _ := range category { fmt.Printf("\t%q: %s,\n", k, k) } - fmt.Printf("}\n\n"); + fmt.Printf("}\n\n") } - decl := make(sort.StringArray, len(list)); - ndecl := 0; + decl := make(sort.StringArray, len(list)) + ndecl := 0 for _, name := range list { if _, ok := category[name]; !ok { die.Log("unknown category", name) @@ -342,7 +342,7 @@ func printCategories() { // name to store the data. This stops godoc dumping all the tables but keeps them // available to clients. // Cases deserving special comments - varDecl := ""; + varDecl := "" switch name { case "letter": varDecl = "\tLetter = letter; // Letter is the set of Unicode letters.\n" @@ -360,24 +360,24 @@ func printCategories() { "\t%s = _%s; // %s is the set of Unicode characters in category %s.\n", name, name, name, name) } - decl[ndecl] = varDecl; - ndecl++; - if name == "letter" { // special case + decl[ndecl] = varDecl + ndecl++ + if name == "letter" { // special case dumpRange( "var letter = []Range {\n", - letterOp); - continue; + letterOp) + continue } dumpRange( fmt.Sprintf("var _%s = []Range {\n", name), - func(code int) bool { return chars[code].category == name }); + func(code int) bool { return chars[code].category == name }) } - decl.Sort(); - fmt.Println("var ("); + decl.Sort() + fmt.Println("var (") for _, d := range decl { fmt.Print(d) } - fmt.Println(")\n"); + fmt.Println(")\n") } type Op func(code int) bool @@ -385,8 +385,8 @@ type Op func(code int) bool const format = "\tRange{0x%04x, 0x%04x, %d},\n" func dumpRange(header string, inCategory Op) { - fmt.Print(header); - next := 0; + fmt.Print(header) + next := 0 // one Range for each iteration for { // look for start of range @@ -399,22 +399,22 @@ func dumpRange(header string, inCategory Op) { } // start of range - lo := next; - hi := next; - stride := 1; + lo := next + hi := next + stride := 1 // accept lo - next++; + next++ // look for another character to set the stride for next < len(chars) && !inCategory(next) { next++ } if next >= len(chars) { // no more characters - fmt.Printf(format, lo, hi, stride); - break; + fmt.Printf(format, lo, hi, stride) + break } // set stride - stride = next - lo; + stride = next - lo // check for length of run. next points to first jump in stride for i := next; i < len(chars); i++ { if inCategory(i) == (((i - lo) % stride) == 0) { @@ -427,11 +427,11 @@ func dumpRange(header string, inCategory Op) { break } } - fmt.Printf(format, lo, hi, stride); + fmt.Printf(format, lo, hi, stride) // next range: start looking where this range ends - next = hi + 1; + next = hi + 1 } - fmt.Print("}\n\n"); + fmt.Print("}\n\n") } func fullCategoryTest(list []string) { @@ -439,7 +439,7 @@ func fullCategoryTest(list []string) { if _, ok := category[name]; !ok { die.Log("unknown category", name) } - r, ok := unicode.Categories[name]; + r, ok := unicode.Categories[name] if !ok { die.Log("unknown table", name) } @@ -456,8 +456,8 @@ func fullCategoryTest(list []string) { func verifyRange(name string, inCategory Op, table []unicode.Range) { for i := range chars { - web := inCategory(i); - pkg := unicode.Is(table, i); + web := inCategory(i) + pkg := unicode.Is(table, i) if web != pkg { fmt.Fprintf(os.Stderr, "%s: U+%04X: web=%t pkg=%t\n", name, i, web, pkg) } @@ -465,61 +465,61 @@ func verifyRange(name string, inCategory Op, table []unicode.Range) { } func parseScript(line string, scripts map[string][]Script) { - comment := strings.Index(line, "#"); + comment := strings.Index(line, "#") if comment >= 0 { line = line[0:comment] } - line = strings.TrimSpace(line); + line = strings.TrimSpace(line) if len(line) == 0 { return } - field := strings.Split(line, ";", -1); + field := strings.Split(line, ";", -1) if len(field) != 2 { die.Logf("%s: %d fields (expected 2)\n", line, len(field)) } - matches := scriptRe.MatchStrings(line); + matches := scriptRe.MatchStrings(line) if len(matches) != 4 { die.Logf("%s: %d matches (expected 3)\n", line, len(matches)) } - lo, err := strconv.Btoui64(matches[1], 16); + lo, err := strconv.Btoui64(matches[1], 16) if err != nil { die.Log("%.5s...:", err) } - hi := lo; - if len(matches[2]) > 2 { // ignore leading .. - hi, err = strconv.Btoui64(matches[2][2:], 16); + hi := lo + if len(matches[2]) > 2 { // ignore leading .. + hi, err = strconv.Btoui64(matches[2][2:], 16) if err != nil { die.Log("%.5s...:", err) } } - name := matches[3]; - s, ok := scripts[name]; + name := matches[3] + s, ok := scripts[name] if !ok || len(s) == cap(s) { - ns := make([]Script, len(s), len(s)+100); + ns := make([]Script, len(s), len(s)+100) for i, sc := range s { ns[i] = sc } - s = ns; + s = ns } - s = s[0 : len(s)+1]; - s[len(s)-1] = Script{uint32(lo), uint32(hi), name}; - scripts[name] = s; + s = s[0 : len(s)+1] + s[len(s)-1] = Script{uint32(lo), uint32(hi), name} + scripts[name] = s } // The script tables have a lot of adjacent elements. Fold them together. func foldAdjacent(r []Script) []unicode.Range { - s := make([]unicode.Range, 0, len(r)); - j := 0; + s := make([]unicode.Range, 0, len(r)) + j := 0 for i := 0; i < len(r); i++ { if j > 0 && int(r[i].lo) == s[j-1].Hi+1 { s[j-1].Hi = int(r[i].hi) } else { - s = s[0 : j+1]; - s[j] = unicode.Range{int(r[i].lo), int(r[i].hi), 1}; - j++; + s = s[0 : j+1] + s[j] = unicode.Range{int(r[i].lo), int(r[i].hi), 1} + j++ } } - return s; + return s } func fullScriptTest(list []string, installed map[string][]unicode.Range, scripts map[string][]Script) { @@ -527,7 +527,7 @@ func fullScriptTest(list []string, installed map[string][]unicode.Range, scripts if _, ok := scripts[name]; !ok { die.Log("unknown script", name) } - _, ok := installed[name]; + _, ok := installed[name] if !ok { die.Log("unknown table", name) } @@ -543,50 +543,50 @@ func fullScriptTest(list []string, installed map[string][]unicode.Range, scripts // PropList.txt has the same format as Scripts.txt so we can share its parser. func printScriptOrProperty(doProps bool) { - flag := "scripts"; - flaglist := *scriptlist; - file := "Scripts.txt"; - table := scripts; - installed := unicode.Scripts; + flag := "scripts" + flaglist := *scriptlist + file := "Scripts.txt" + table := scripts + installed := unicode.Scripts if doProps { - flag = "props"; - flaglist = *proplist; - file = "PropList.txt"; - table = props; - installed = unicode.Properties; + flag = "props" + flaglist = *proplist + file = "PropList.txt" + table = props + installed = unicode.Properties } if flaglist == "" { return } - var err os.Error; - resp, _, err := http.Get(*url + file); + var err os.Error + resp, _, err := http.Get(*url + file) if err != nil { die.Log(err) } if resp.StatusCode != 200 { die.Log("bad GET status for ", file, ":", resp.Status) } - input := bufio.NewReader(resp.Body); + input := bufio.NewReader(resp.Body) for { - line, err := input.ReadString('\n'); + line, err := input.ReadString('\n') if err != nil { if err == os.EOF { break } - die.Log(err); + die.Log(err) } - parseScript(line[0:len(line)-1], table); + parseScript(line[0:len(line)-1], table) } - resp.Body.Close(); + resp.Body.Close() // Find out which scripts to dump - list := strings.Split(flaglist, ",", 0); + list := strings.Split(flaglist, ",", 0) if flaglist == "all" { list = all(table) } if *test { - fullScriptTest(list, installed, table); - return; + fullScriptTest(list, installed, table) + return } fmt.Printf( @@ -595,23 +595,23 @@ func printScriptOrProperty(doProps bool) { "// DO NOT EDIT\n\n", flag, flaglist, - *url); + *url) if flaglist == "all" { if doProps { - fmt.Println("// Properties is the set of Unicode property tables."); - fmt.Println("var Properties = map[string] []Range {"); + fmt.Println("// Properties is the set of Unicode property tables.") + fmt.Println("var Properties = map[string] []Range {") } else { - fmt.Println("// Scripts is the set of Unicode script tables."); - fmt.Println("var Scripts = map[string] []Range {"); + fmt.Println("// Scripts is the set of Unicode script tables.") + fmt.Println("var Scripts = map[string] []Range {") } for k, _ := range table { fmt.Printf("\t%q: %s,\n", k, k) } - fmt.Printf("}\n\n"); + fmt.Printf("}\n\n") } - decl := make(sort.StringArray, len(list)); - ndecl := 0; + decl := make(sort.StringArray, len(list)) + ndecl := 0 for _, name := range list { if doProps { decl[ndecl] = fmt.Sprintf( @@ -622,36 +622,36 @@ func printScriptOrProperty(doProps bool) { "\t%s = _%s;\t// %s is the set of Unicode characters in script %s.\n", name, name, name, name) } - ndecl++; - fmt.Printf("var _%s = []Range {\n", name); - ranges := foldAdjacent(table[name]); + ndecl++ + fmt.Printf("var _%s = []Range {\n", name) + ranges := foldAdjacent(table[name]) for _, s := range ranges { fmt.Printf(format, s.Lo, s.Hi, s.Stride) } - fmt.Printf("}\n\n"); + fmt.Printf("}\n\n") } - decl.Sort(); - fmt.Println("var ("); + decl.Sort() + fmt.Println("var (") for _, d := range decl { fmt.Print(d) } - fmt.Println(")\n"); + fmt.Println(")\n") } const ( - CaseUpper = 1 << iota; - CaseLower; - CaseTitle; - CaseNone = 0; // must be zero - CaseMissing = -1; // character not present; not a valid case state + CaseUpper = 1 << iota + CaseLower + CaseTitle + CaseNone = 0 // must be zero + CaseMissing = -1 // character not present; not a valid case state ) type caseState struct { - point int; - _case int; - deltaToUpper int; - deltaToLower int; - deltaToTitle int; + point int + _case int + deltaToUpper int + deltaToLower int + deltaToTitle int } // Is d a continuation of the state of c? @@ -660,9 +660,9 @@ func (c *caseState) adjacent(d *caseState) bool { c, d = d, c } switch { - case d.point != c.point+1: // code points not adjacent (shouldn't happen) + case d.point != c.point+1: // code points not adjacent (shouldn't happen) return false - case d._case != c._case: // different cases + case d._case != c._case: // different cases return c.upperLowerAdjacent(d) case c._case == CaseNone: return false @@ -675,7 +675,7 @@ func (c *caseState) adjacent(d *caseState) bool { case d.deltaToTitle != c.deltaToTitle: return false } - return true; + return true } // Is d the same as c, but opposite in upper/lower case? this would make it @@ -709,7 +709,7 @@ func (c *caseState) upperLowerAdjacent(d *caseState) bool { case d.deltaToTitle != -1: return false } - return true; + return true } // Does this character start an UpperLower sequence? @@ -724,7 +724,7 @@ func (c *caseState) isUpperLower() bool { case c.deltaToTitle != 0: return false } - return true; + return true } // Does this character start a LowerUpper sequence? @@ -739,16 +739,16 @@ func (c *caseState) isLowerUpper() bool { case c.deltaToTitle != -1: return false } - return true; + return true } func getCaseState(i int) (c *caseState) { - c = &caseState{point: i, _case: CaseNone}; - ch := &chars[i]; + c = &caseState{point: i, _case: CaseNone} + ch := &chars[i] switch int(ch.codePoint) { case 0: - c._case = CaseMissing; // Will get NUL wrong but that doesn't matter - return; + c._case = CaseMissing // Will get NUL wrong but that doesn't matter + return case ch.upperCase: c._case = CaseUpper case ch.lowerCase: @@ -765,7 +765,7 @@ func getCaseState(i int) (c *caseState) { if ch.titleCase != 0 { c.deltaToTitle = ch.titleCase - i } - return; + return } func printCases() { @@ -773,8 +773,8 @@ func printCases() { return } if *test { - fullCaseTest(); - return; + fullCaseTest() + return } fmt.Printf( "// Generated by running\n"+ @@ -784,25 +784,25 @@ func printCases() { "// non-self mappings.\n"+ "var CaseRanges = _CaseRanges\n"+ "var _CaseRanges = []CaseRange {\n", - *dataURL); + *dataURL) - var startState *caseState; // the start of a run; nil for not active - var prevState = &caseState{}; // the state of the previous character + var startState *caseState // the start of a run; nil for not active + var prevState = &caseState{} // the state of the previous character for i := range chars { - state := getCaseState(i); + state := getCaseState(i) if state.adjacent(prevState) { - prevState = state; - continue; + prevState = state + continue } // end of run (possibly) - printCaseRange(startState, prevState); - startState = nil; + printCaseRange(startState, prevState) + startState = nil if state._case != CaseMissing && state._case != CaseNone { startState = state } - prevState = state; + prevState = state } - fmt.Printf("}\n"); + fmt.Printf("}\n") } func printCaseRange(lo, hi *caseState) { @@ -818,9 +818,9 @@ func printCaseRange(lo, hi *caseState) { fmt.Printf("\tCaseRange{0x%04X, 0x%04X, d{UpperLower, UpperLower, UpperLower}},\n", lo.point, hi.point) case hi.point > lo.point && lo.isLowerUpper(): - die.Log("LowerUpper sequence: should not happen: U+%04X. If it's real, need to fix To()", lo.point); + die.Log("LowerUpper sequence: should not happen: U+%04X. If it's real, need to fix To()", lo.point) fmt.Printf("\tCaseRange{0x%04X, 0x%04X, d{LowerUpper, LowerUpper, LowerUpper}},\n", - lo.point, hi.point); + lo.point, hi.point) default: fmt.Printf("\tCaseRange{0x%04X, 0x%04X, d{%d, %d, %d}},\n", lo.point, hi.point, @@ -833,23 +833,23 @@ func caseIt(rune, cased int) int { if cased == 0 { return rune } - return cased; + return cased } func fullCaseTest() { for i, c := range chars { - lower := unicode.ToLower(i); - want := caseIt(i, c.lowerCase); + lower := unicode.ToLower(i) + want := caseIt(i, c.lowerCase) if lower != want { fmt.Fprintf(os.Stderr, "lower U+%04X should be U+%04X is U+%04X\n", i, want, lower) } - upper := unicode.ToUpper(i); - want = caseIt(i, c.upperCase); + upper := unicode.ToUpper(i) + want = caseIt(i, c.upperCase) if upper != want { fmt.Fprintf(os.Stderr, "upper U+%04X should be U+%04X is U+%04X\n", i, want, upper) } - title := unicode.ToTitle(i); - want = caseIt(i, c.titleCase); + title := unicode.ToTitle(i) + want = caseIt(i, c.titleCase) if title != want { fmt.Fprintf(os.Stderr, "title U+%04X should be U+%04X is U+%04X\n", i, want, title) } diff --git a/src/pkg/unicode/script_test.go b/src/pkg/unicode/script_test.go index 316de2d25..b3e980b1c 100644 --- a/src/pkg/unicode/script_test.go +++ b/src/pkg/unicode/script_test.go @@ -5,13 +5,13 @@ package unicode_test import ( - "testing"; - . "unicode"; + "testing" + . "unicode" ) type T struct { - rune int; - script string; + rune int + script string } // Hand-chosen tests from Unicode 5.1.0, mostly to discover when new @@ -112,7 +112,7 @@ var inTest = []T{ T{0xa216, "Yi"}, } -var outTest = []T{ // not really worth being thorough +var outTest = []T{ // not really worth being thorough T{0x20, "Telugu"}, } @@ -185,7 +185,7 @@ var inPropTest = []T{ } func TestScripts(t *testing.T) { - notTested := make(map[string]bool); + notTested := make(map[string]bool) for k := range Scripts { notTested[k] = true } @@ -196,7 +196,7 @@ func TestScripts(t *testing.T) { if !Is(Scripts[test.script], test.rune) { t.Errorf("IsScript(%#x, %s) = false, want true\n", test.rune, test.script) } - notTested[test.script] = false, false; + notTested[test.script] = false, false } for _, test := range outTest { if Is(Scripts[test.script], test.rune) { @@ -209,7 +209,7 @@ func TestScripts(t *testing.T) { } func TestCategories(t *testing.T) { - notTested := make(map[string]bool); + notTested := make(map[string]bool) for k := range Categories { notTested[k] = true } @@ -220,7 +220,7 @@ func TestCategories(t *testing.T) { if !Is(Categories[test.script], test.rune) { t.Errorf("IsCategory(%#x, %s) = false, want true\n", test.rune, test.script) } - notTested[test.script] = false, false; + notTested[test.script] = false, false } for k := range notTested { t.Error("not tested:", k) @@ -228,7 +228,7 @@ func TestCategories(t *testing.T) { } func TestProperties(t *testing.T) { - notTested := make(map[string]bool); + notTested := make(map[string]bool) for k := range Properties { notTested[k] = true } @@ -239,7 +239,7 @@ func TestProperties(t *testing.T) { if !Is(Properties[test.script], test.rune) { t.Errorf("IsCategory(%#x, %s) = false, want true\n", test.rune, test.script) } - notTested[test.script] = false, false; + notTested[test.script] = false, false } for k := range notTested { t.Error("not tested:", k) diff --git a/src/pkg/unicode/tables.go b/src/pkg/unicode/tables.go index 4a4dbe02b..10e4fae6d 100644 --- a/src/pkg/unicode/tables.go +++ b/src/pkg/unicode/tables.go @@ -1921,40 +1921,40 @@ var _Lo = []Range{ } var ( - Cc = _Cc; // Cc is the set of Unicode characters in category Cc. - Cf = _Cf; // Cf is the set of Unicode characters in category Cf. - Co = _Co; // Co is the set of Unicode characters in category Co. - Cs = _Cs; // Cs is the set of Unicode characters in category Cs. - Digit = _Nd; // Digit is the set of Unicode characters with the "decimal digit" property. - Nd = _Nd; // Nd is the set of Unicode characters in category Nd. - Letter = letter; // Letter is the set of Unicode letters. - Lm = _Lm; // Lm is the set of Unicode characters in category Lm. - Lo = _Lo; // Lo is the set of Unicode characters in category Lo. - Lower = _Ll; // Lower is the set of Unicode lower case letters. - Ll = _Ll; // Ll is the set of Unicode characters in category Ll. - Mc = _Mc; // Mc is the set of Unicode characters in category Mc. - Me = _Me; // Me is the set of Unicode characters in category Me. - Mn = _Mn; // Mn is the set of Unicode characters in category Mn. - Nl = _Nl; // Nl is the set of Unicode characters in category Nl. - No = _No; // No is the set of Unicode characters in category No. - Pc = _Pc; // Pc is the set of Unicode characters in category Pc. - Pd = _Pd; // Pd is the set of Unicode characters in category Pd. - Pe = _Pe; // Pe is the set of Unicode characters in category Pe. - Pf = _Pf; // Pf is the set of Unicode characters in category Pf. - Pi = _Pi; // Pi is the set of Unicode characters in category Pi. - Po = _Po; // Po is the set of Unicode characters in category Po. - Ps = _Ps; // Ps is the set of Unicode characters in category Ps. - Sc = _Sc; // Sc is the set of Unicode characters in category Sc. - Sk = _Sk; // Sk is the set of Unicode characters in category Sk. - Sm = _Sm; // Sm is the set of Unicode characters in category Sm. - So = _So; // So is the set of Unicode characters in category So. - Title = _Lt; // Title is the set of Unicode title case letters. - Lt = _Lt; // Lt is the set of Unicode characters in category Lt. - Upper = _Lu; // Upper is the set of Unicode upper case letters. - Lu = _Lu; // Lu is the set of Unicode characters in category Lu. - Zl = _Zl; // Zl is the set of Unicode characters in category Zl. - Zp = _Zp; // Zp is the set of Unicode characters in category Zp. - Zs = _Zs; // Zs is the set of Unicode characters in category Zs. + Cc = _Cc // Cc is the set of Unicode characters in category Cc. + Cf = _Cf // Cf is the set of Unicode characters in category Cf. + Co = _Co // Co is the set of Unicode characters in category Co. + Cs = _Cs // Cs is the set of Unicode characters in category Cs. + Digit = _Nd // Digit is the set of Unicode characters with the "decimal digit" property. + Nd = _Nd // Nd is the set of Unicode characters in category Nd. + Letter = letter // Letter is the set of Unicode letters. + Lm = _Lm // Lm is the set of Unicode characters in category Lm. + Lo = _Lo // Lo is the set of Unicode characters in category Lo. + Lower = _Ll // Lower is the set of Unicode lower case letters. + Ll = _Ll // Ll is the set of Unicode characters in category Ll. + Mc = _Mc // Mc is the set of Unicode characters in category Mc. + Me = _Me // Me is the set of Unicode characters in category Me. + Mn = _Mn // Mn is the set of Unicode characters in category Mn. + Nl = _Nl // Nl is the set of Unicode characters in category Nl. + No = _No // No is the set of Unicode characters in category No. + Pc = _Pc // Pc is the set of Unicode characters in category Pc. + Pd = _Pd // Pd is the set of Unicode characters in category Pd. + Pe = _Pe // Pe is the set of Unicode characters in category Pe. + Pf = _Pf // Pf is the set of Unicode characters in category Pf. + Pi = _Pi // Pi is the set of Unicode characters in category Pi. + Po = _Po // Po is the set of Unicode characters in category Po. + Ps = _Ps // Ps is the set of Unicode characters in category Ps. + Sc = _Sc // Sc is the set of Unicode characters in category Sc. + Sk = _Sk // Sk is the set of Unicode characters in category Sk. + Sm = _Sm // Sm is the set of Unicode characters in category Sm. + So = _So // So is the set of Unicode characters in category So. + Title = _Lt // Title is the set of Unicode title case letters. + Lt = _Lt // Lt is the set of Unicode characters in category Lt. + Upper = _Lu // Upper is the set of Unicode upper case letters. + Lu = _Lu // Lu is the set of Unicode characters in category Lu. + Zl = _Zl // Zl is the set of Unicode characters in category Zl. + Zp = _Zp // Zp is the set of Unicode characters in category Zp. + Zs = _Zs // Zs is the set of Unicode characters in category Zs. ) // Generated by running @@ -2990,98 +2990,98 @@ var _Gothic = []Range{ } var ( - Arabic = _Arabic; // Arabic is the set of Unicode characters in script Arabic. - Armenian = _Armenian; // Armenian is the set of Unicode characters in script Armenian. - Avestan = _Avestan; // Avestan is the set of Unicode characters in script Avestan. - Balinese = _Balinese; // Balinese is the set of Unicode characters in script Balinese. - Bamum = _Bamum; // Bamum is the set of Unicode characters in script Bamum. - Bengali = _Bengali; // Bengali is the set of Unicode characters in script Bengali. - Bopomofo = _Bopomofo; // Bopomofo is the set of Unicode characters in script Bopomofo. - Braille = _Braille; // Braille is the set of Unicode characters in script Braille. - Buginese = _Buginese; // Buginese is the set of Unicode characters in script Buginese. - Buhid = _Buhid; // Buhid is the set of Unicode characters in script Buhid. - Canadian_Aboriginal = _Canadian_Aboriginal; // Canadian_Aboriginal is the set of Unicode characters in script Canadian_Aboriginal. - Carian = _Carian; // Carian is the set of Unicode characters in script Carian. - Cham = _Cham; // Cham is the set of Unicode characters in script Cham. - Cherokee = _Cherokee; // Cherokee is the set of Unicode characters in script Cherokee. - Common = _Common; // Common is the set of Unicode characters in script Common. - Coptic = _Coptic; // Coptic is the set of Unicode characters in script Coptic. - Cuneiform = _Cuneiform; // Cuneiform is the set of Unicode characters in script Cuneiform. - Cypriot = _Cypriot; // Cypriot is the set of Unicode characters in script Cypriot. - Cyrillic = _Cyrillic; // Cyrillic is the set of Unicode characters in script Cyrillic. - Deseret = _Deseret; // Deseret is the set of Unicode characters in script Deseret. - Devanagari = _Devanagari; // Devanagari is the set of Unicode characters in script Devanagari. - Egyptian_Hieroglyphs = _Egyptian_Hieroglyphs; // Egyptian_Hieroglyphs is the set of Unicode characters in script Egyptian_Hieroglyphs. - Ethiopic = _Ethiopic; // Ethiopic is the set of Unicode characters in script Ethiopic. - Georgian = _Georgian; // Georgian is the set of Unicode characters in script Georgian. - Glagolitic = _Glagolitic; // Glagolitic is the set of Unicode characters in script Glagolitic. - Gothic = _Gothic; // Gothic is the set of Unicode characters in script Gothic. - Greek = _Greek; // Greek is the set of Unicode characters in script Greek. - Gujarati = _Gujarati; // Gujarati is the set of Unicode characters in script Gujarati. - Gurmukhi = _Gurmukhi; // Gurmukhi is the set of Unicode characters in script Gurmukhi. - Han = _Han; // Han is the set of Unicode characters in script Han. - Hangul = _Hangul; // Hangul is the set of Unicode characters in script Hangul. - Hanunoo = _Hanunoo; // Hanunoo is the set of Unicode characters in script Hanunoo. - Hebrew = _Hebrew; // Hebrew is the set of Unicode characters in script Hebrew. - Hiragana = _Hiragana; // Hiragana is the set of Unicode characters in script Hiragana. - Imperial_Aramaic = _Imperial_Aramaic; // Imperial_Aramaic is the set of Unicode characters in script Imperial_Aramaic. - Inherited = _Inherited; // Inherited is the set of Unicode characters in script Inherited. - Inscriptional_Pahlavi = _Inscriptional_Pahlavi; // Inscriptional_Pahlavi is the set of Unicode characters in script Inscriptional_Pahlavi. - Inscriptional_Parthian = _Inscriptional_Parthian; // Inscriptional_Parthian is the set of Unicode characters in script Inscriptional_Parthian. - Javanese = _Javanese; // Javanese is the set of Unicode characters in script Javanese. - Kaithi = _Kaithi; // Kaithi is the set of Unicode characters in script Kaithi. - Kannada = _Kannada; // Kannada is the set of Unicode characters in script Kannada. - Katakana = _Katakana; // Katakana is the set of Unicode characters in script Katakana. - Kayah_Li = _Kayah_Li; // Kayah_Li is the set of Unicode characters in script Kayah_Li. - Kharoshthi = _Kharoshthi; // Kharoshthi is the set of Unicode characters in script Kharoshthi. - Khmer = _Khmer; // Khmer is the set of Unicode characters in script Khmer. - Lao = _Lao; // Lao is the set of Unicode characters in script Lao. - Latin = _Latin; // Latin is the set of Unicode characters in script Latin. - Lepcha = _Lepcha; // Lepcha is the set of Unicode characters in script Lepcha. - Limbu = _Limbu; // Limbu is the set of Unicode characters in script Limbu. - Linear_B = _Linear_B; // Linear_B is the set of Unicode characters in script Linear_B. - Lisu = _Lisu; // Lisu is the set of Unicode characters in script Lisu. - Lycian = _Lycian; // Lycian is the set of Unicode characters in script Lycian. - Lydian = _Lydian; // Lydian is the set of Unicode characters in script Lydian. - Malayalam = _Malayalam; // Malayalam is the set of Unicode characters in script Malayalam. - Meetei_Mayek = _Meetei_Mayek; // Meetei_Mayek is the set of Unicode characters in script Meetei_Mayek. - Mongolian = _Mongolian; // Mongolian is the set of Unicode characters in script Mongolian. - Myanmar = _Myanmar; // Myanmar is the set of Unicode characters in script Myanmar. - New_Tai_Lue = _New_Tai_Lue; // New_Tai_Lue is the set of Unicode characters in script New_Tai_Lue. - Nko = _Nko; // Nko is the set of Unicode characters in script Nko. - Ogham = _Ogham; // Ogham is the set of Unicode characters in script Ogham. - Ol_Chiki = _Ol_Chiki; // Ol_Chiki is the set of Unicode characters in script Ol_Chiki. - Old_Italic = _Old_Italic; // Old_Italic is the set of Unicode characters in script Old_Italic. - Old_Persian = _Old_Persian; // Old_Persian is the set of Unicode characters in script Old_Persian. - Old_South_Arabian = _Old_South_Arabian; // Old_South_Arabian is the set of Unicode characters in script Old_South_Arabian. - Old_Turkic = _Old_Turkic; // Old_Turkic is the set of Unicode characters in script Old_Turkic. - Oriya = _Oriya; // Oriya is the set of Unicode characters in script Oriya. - Osmanya = _Osmanya; // Osmanya is the set of Unicode characters in script Osmanya. - Phags_Pa = _Phags_Pa; // Phags_Pa is the set of Unicode characters in script Phags_Pa. - Phoenician = _Phoenician; // Phoenician is the set of Unicode characters in script Phoenician. - Rejang = _Rejang; // Rejang is the set of Unicode characters in script Rejang. - Runic = _Runic; // Runic is the set of Unicode characters in script Runic. - Samaritan = _Samaritan; // Samaritan is the set of Unicode characters in script Samaritan. - Saurashtra = _Saurashtra; // Saurashtra is the set of Unicode characters in script Saurashtra. - Shavian = _Shavian; // Shavian is the set of Unicode characters in script Shavian. - Sinhala = _Sinhala; // Sinhala is the set of Unicode characters in script Sinhala. - Sundanese = _Sundanese; // Sundanese is the set of Unicode characters in script Sundanese. - Syloti_Nagri = _Syloti_Nagri; // Syloti_Nagri is the set of Unicode characters in script Syloti_Nagri. - Syriac = _Syriac; // Syriac is the set of Unicode characters in script Syriac. - Tagalog = _Tagalog; // Tagalog is the set of Unicode characters in script Tagalog. - Tagbanwa = _Tagbanwa; // Tagbanwa is the set of Unicode characters in script Tagbanwa. - Tai_Le = _Tai_Le; // Tai_Le is the set of Unicode characters in script Tai_Le. - Tai_Tham = _Tai_Tham; // Tai_Tham is the set of Unicode characters in script Tai_Tham. - Tai_Viet = _Tai_Viet; // Tai_Viet is the set of Unicode characters in script Tai_Viet. - Tamil = _Tamil; // Tamil is the set of Unicode characters in script Tamil. - Telugu = _Telugu; // Telugu is the set of Unicode characters in script Telugu. - Thaana = _Thaana; // Thaana is the set of Unicode characters in script Thaana. - Thai = _Thai; // Thai is the set of Unicode characters in script Thai. - Tibetan = _Tibetan; // Tibetan is the set of Unicode characters in script Tibetan. - Tifinagh = _Tifinagh; // Tifinagh is the set of Unicode characters in script Tifinagh. - Ugaritic = _Ugaritic; // Ugaritic is the set of Unicode characters in script Ugaritic. - Vai = _Vai; // Vai is the set of Unicode characters in script Vai. - Yi = _Yi; // Yi is the set of Unicode characters in script Yi. + Arabic = _Arabic // Arabic is the set of Unicode characters in script Arabic. + Armenian = _Armenian // Armenian is the set of Unicode characters in script Armenian. + Avestan = _Avestan // Avestan is the set of Unicode characters in script Avestan. + Balinese = _Balinese // Balinese is the set of Unicode characters in script Balinese. + Bamum = _Bamum // Bamum is the set of Unicode characters in script Bamum. + Bengali = _Bengali // Bengali is the set of Unicode characters in script Bengali. + Bopomofo = _Bopomofo // Bopomofo is the set of Unicode characters in script Bopomofo. + Braille = _Braille // Braille is the set of Unicode characters in script Braille. + Buginese = _Buginese // Buginese is the set of Unicode characters in script Buginese. + Buhid = _Buhid // Buhid is the set of Unicode characters in script Buhid. + Canadian_Aboriginal = _Canadian_Aboriginal // Canadian_Aboriginal is the set of Unicode characters in script Canadian_Aboriginal. + Carian = _Carian // Carian is the set of Unicode characters in script Carian. + Cham = _Cham // Cham is the set of Unicode characters in script Cham. + Cherokee = _Cherokee // Cherokee is the set of Unicode characters in script Cherokee. + Common = _Common // Common is the set of Unicode characters in script Common. + Coptic = _Coptic // Coptic is the set of Unicode characters in script Coptic. + Cuneiform = _Cuneiform // Cuneiform is the set of Unicode characters in script Cuneiform. + Cypriot = _Cypriot // Cypriot is the set of Unicode characters in script Cypriot. + Cyrillic = _Cyrillic // Cyrillic is the set of Unicode characters in script Cyrillic. + Deseret = _Deseret // Deseret is the set of Unicode characters in script Deseret. + Devanagari = _Devanagari // Devanagari is the set of Unicode characters in script Devanagari. + Egyptian_Hieroglyphs = _Egyptian_Hieroglyphs // Egyptian_Hieroglyphs is the set of Unicode characters in script Egyptian_Hieroglyphs. + Ethiopic = _Ethiopic // Ethiopic is the set of Unicode characters in script Ethiopic. + Georgian = _Georgian // Georgian is the set of Unicode characters in script Georgian. + Glagolitic = _Glagolitic // Glagolitic is the set of Unicode characters in script Glagolitic. + Gothic = _Gothic // Gothic is the set of Unicode characters in script Gothic. + Greek = _Greek // Greek is the set of Unicode characters in script Greek. + Gujarati = _Gujarati // Gujarati is the set of Unicode characters in script Gujarati. + Gurmukhi = _Gurmukhi // Gurmukhi is the set of Unicode characters in script Gurmukhi. + Han = _Han // Han is the set of Unicode characters in script Han. + Hangul = _Hangul // Hangul is the set of Unicode characters in script Hangul. + Hanunoo = _Hanunoo // Hanunoo is the set of Unicode characters in script Hanunoo. + Hebrew = _Hebrew // Hebrew is the set of Unicode characters in script Hebrew. + Hiragana = _Hiragana // Hiragana is the set of Unicode characters in script Hiragana. + Imperial_Aramaic = _Imperial_Aramaic // Imperial_Aramaic is the set of Unicode characters in script Imperial_Aramaic. + Inherited = _Inherited // Inherited is the set of Unicode characters in script Inherited. + Inscriptional_Pahlavi = _Inscriptional_Pahlavi // Inscriptional_Pahlavi is the set of Unicode characters in script Inscriptional_Pahlavi. + Inscriptional_Parthian = _Inscriptional_Parthian // Inscriptional_Parthian is the set of Unicode characters in script Inscriptional_Parthian. + Javanese = _Javanese // Javanese is the set of Unicode characters in script Javanese. + Kaithi = _Kaithi // Kaithi is the set of Unicode characters in script Kaithi. + Kannada = _Kannada // Kannada is the set of Unicode characters in script Kannada. + Katakana = _Katakana // Katakana is the set of Unicode characters in script Katakana. + Kayah_Li = _Kayah_Li // Kayah_Li is the set of Unicode characters in script Kayah_Li. + Kharoshthi = _Kharoshthi // Kharoshthi is the set of Unicode characters in script Kharoshthi. + Khmer = _Khmer // Khmer is the set of Unicode characters in script Khmer. + Lao = _Lao // Lao is the set of Unicode characters in script Lao. + Latin = _Latin // Latin is the set of Unicode characters in script Latin. + Lepcha = _Lepcha // Lepcha is the set of Unicode characters in script Lepcha. + Limbu = _Limbu // Limbu is the set of Unicode characters in script Limbu. + Linear_B = _Linear_B // Linear_B is the set of Unicode characters in script Linear_B. + Lisu = _Lisu // Lisu is the set of Unicode characters in script Lisu. + Lycian = _Lycian // Lycian is the set of Unicode characters in script Lycian. + Lydian = _Lydian // Lydian is the set of Unicode characters in script Lydian. + Malayalam = _Malayalam // Malayalam is the set of Unicode characters in script Malayalam. + Meetei_Mayek = _Meetei_Mayek // Meetei_Mayek is the set of Unicode characters in script Meetei_Mayek. + Mongolian = _Mongolian // Mongolian is the set of Unicode characters in script Mongolian. + Myanmar = _Myanmar // Myanmar is the set of Unicode characters in script Myanmar. + New_Tai_Lue = _New_Tai_Lue // New_Tai_Lue is the set of Unicode characters in script New_Tai_Lue. + Nko = _Nko // Nko is the set of Unicode characters in script Nko. + Ogham = _Ogham // Ogham is the set of Unicode characters in script Ogham. + Ol_Chiki = _Ol_Chiki // Ol_Chiki is the set of Unicode characters in script Ol_Chiki. + Old_Italic = _Old_Italic // Old_Italic is the set of Unicode characters in script Old_Italic. + Old_Persian = _Old_Persian // Old_Persian is the set of Unicode characters in script Old_Persian. + Old_South_Arabian = _Old_South_Arabian // Old_South_Arabian is the set of Unicode characters in script Old_South_Arabian. + Old_Turkic = _Old_Turkic // Old_Turkic is the set of Unicode characters in script Old_Turkic. + Oriya = _Oriya // Oriya is the set of Unicode characters in script Oriya. + Osmanya = _Osmanya // Osmanya is the set of Unicode characters in script Osmanya. + Phags_Pa = _Phags_Pa // Phags_Pa is the set of Unicode characters in script Phags_Pa. + Phoenician = _Phoenician // Phoenician is the set of Unicode characters in script Phoenician. + Rejang = _Rejang // Rejang is the set of Unicode characters in script Rejang. + Runic = _Runic // Runic is the set of Unicode characters in script Runic. + Samaritan = _Samaritan // Samaritan is the set of Unicode characters in script Samaritan. + Saurashtra = _Saurashtra // Saurashtra is the set of Unicode characters in script Saurashtra. + Shavian = _Shavian // Shavian is the set of Unicode characters in script Shavian. + Sinhala = _Sinhala // Sinhala is the set of Unicode characters in script Sinhala. + Sundanese = _Sundanese // Sundanese is the set of Unicode characters in script Sundanese. + Syloti_Nagri = _Syloti_Nagri // Syloti_Nagri is the set of Unicode characters in script Syloti_Nagri. + Syriac = _Syriac // Syriac is the set of Unicode characters in script Syriac. + Tagalog = _Tagalog // Tagalog is the set of Unicode characters in script Tagalog. + Tagbanwa = _Tagbanwa // Tagbanwa is the set of Unicode characters in script Tagbanwa. + Tai_Le = _Tai_Le // Tai_Le is the set of Unicode characters in script Tai_Le. + Tai_Tham = _Tai_Tham // Tai_Tham is the set of Unicode characters in script Tai_Tham. + Tai_Viet = _Tai_Viet // Tai_Viet is the set of Unicode characters in script Tai_Viet. + Tamil = _Tamil // Tamil is the set of Unicode characters in script Tamil. + Telugu = _Telugu // Telugu is the set of Unicode characters in script Telugu. + Thaana = _Thaana // Thaana is the set of Unicode characters in script Thaana. + Thai = _Thai // Thai is the set of Unicode characters in script Thai. + Tibetan = _Tibetan // Tibetan is the set of Unicode characters in script Tibetan. + Tifinagh = _Tifinagh // Tifinagh is the set of Unicode characters in script Tifinagh. + Ugaritic = _Ugaritic // Ugaritic is the set of Unicode characters in script Ugaritic. + Vai = _Vai // Vai is the set of Unicode characters in script Vai. + Yi = _Yi // Yi is the set of Unicode characters in script Yi. ) // Generated by running @@ -3943,38 +3943,38 @@ var _White_Space = []Range{ } var ( - ASCII_Hex_Digit = _ASCII_Hex_Digit; // ASCII_Hex_Digit is the set of Unicode characters with property ASCII_Hex_Digit. - Bidi_Control = _Bidi_Control; // Bidi_Control is the set of Unicode characters with property Bidi_Control. - Dash = _Dash; // Dash is the set of Unicode characters with property Dash. - Deprecated = _Deprecated; // Deprecated is the set of Unicode characters with property Deprecated. - Diacritic = _Diacritic; // Diacritic is the set of Unicode characters with property Diacritic. - Extender = _Extender; // Extender is the set of Unicode characters with property Extender. - Hex_Digit = _Hex_Digit; // Hex_Digit is the set of Unicode characters with property Hex_Digit. - Hyphen = _Hyphen; // Hyphen is the set of Unicode characters with property Hyphen. - IDS_Binary_Operator = _IDS_Binary_Operator; // IDS_Binary_Operator is the set of Unicode characters with property IDS_Binary_Operator. - IDS_Trinary_Operator = _IDS_Trinary_Operator; // IDS_Trinary_Operator is the set of Unicode characters with property IDS_Trinary_Operator. - Ideographic = _Ideographic; // Ideographic is the set of Unicode characters with property Ideographic. - Join_Control = _Join_Control; // Join_Control is the set of Unicode characters with property Join_Control. - Logical_Order_Exception = _Logical_Order_Exception; // Logical_Order_Exception is the set of Unicode characters with property Logical_Order_Exception. - Noncharacter_Code_Point = _Noncharacter_Code_Point; // Noncharacter_Code_Point is the set of Unicode characters with property Noncharacter_Code_Point. - Other_Alphabetic = _Other_Alphabetic; // Other_Alphabetic is the set of Unicode characters with property Other_Alphabetic. - Other_Default_Ignorable_Code_Point = _Other_Default_Ignorable_Code_Point; // Other_Default_Ignorable_Code_Point is the set of Unicode characters with property Other_Default_Ignorable_Code_Point. - Other_Grapheme_Extend = _Other_Grapheme_Extend; // Other_Grapheme_Extend is the set of Unicode characters with property Other_Grapheme_Extend. - Other_ID_Continue = _Other_ID_Continue; // Other_ID_Continue is the set of Unicode characters with property Other_ID_Continue. - Other_ID_Start = _Other_ID_Start; // Other_ID_Start is the set of Unicode characters with property Other_ID_Start. - Other_Lowercase = _Other_Lowercase; // Other_Lowercase is the set of Unicode characters with property Other_Lowercase. - Other_Math = _Other_Math; // Other_Math is the set of Unicode characters with property Other_Math. - Other_Uppercase = _Other_Uppercase; // Other_Uppercase is the set of Unicode characters with property Other_Uppercase. - Pattern_Syntax = _Pattern_Syntax; // Pattern_Syntax is the set of Unicode characters with property Pattern_Syntax. - Pattern_White_Space = _Pattern_White_Space; // Pattern_White_Space is the set of Unicode characters with property Pattern_White_Space. - Quotation_Mark = _Quotation_Mark; // Quotation_Mark is the set of Unicode characters with property Quotation_Mark. - Radical = _Radical; // Radical is the set of Unicode characters with property Radical. - STerm = _STerm; // STerm is the set of Unicode characters with property STerm. - Soft_Dotted = _Soft_Dotted; // Soft_Dotted is the set of Unicode characters with property Soft_Dotted. - Terminal_Punctuation = _Terminal_Punctuation; // Terminal_Punctuation is the set of Unicode characters with property Terminal_Punctuation. - Unified_Ideograph = _Unified_Ideograph; // Unified_Ideograph is the set of Unicode characters with property Unified_Ideograph. - Variation_Selector = _Variation_Selector; // Variation_Selector is the set of Unicode characters with property Variation_Selector. - White_Space = _White_Space; // White_Space is the set of Unicode characters with property White_Space. + ASCII_Hex_Digit = _ASCII_Hex_Digit // ASCII_Hex_Digit is the set of Unicode characters with property ASCII_Hex_Digit. + Bidi_Control = _Bidi_Control // Bidi_Control is the set of Unicode characters with property Bidi_Control. + Dash = _Dash // Dash is the set of Unicode characters with property Dash. + Deprecated = _Deprecated // Deprecated is the set of Unicode characters with property Deprecated. + Diacritic = _Diacritic // Diacritic is the set of Unicode characters with property Diacritic. + Extender = _Extender // Extender is the set of Unicode characters with property Extender. + Hex_Digit = _Hex_Digit // Hex_Digit is the set of Unicode characters with property Hex_Digit. + Hyphen = _Hyphen // Hyphen is the set of Unicode characters with property Hyphen. + IDS_Binary_Operator = _IDS_Binary_Operator // IDS_Binary_Operator is the set of Unicode characters with property IDS_Binary_Operator. + IDS_Trinary_Operator = _IDS_Trinary_Operator // IDS_Trinary_Operator is the set of Unicode characters with property IDS_Trinary_Operator. + Ideographic = _Ideographic // Ideographic is the set of Unicode characters with property Ideographic. + Join_Control = _Join_Control // Join_Control is the set of Unicode characters with property Join_Control. + Logical_Order_Exception = _Logical_Order_Exception // Logical_Order_Exception is the set of Unicode characters with property Logical_Order_Exception. + Noncharacter_Code_Point = _Noncharacter_Code_Point // Noncharacter_Code_Point is the set of Unicode characters with property Noncharacter_Code_Point. + Other_Alphabetic = _Other_Alphabetic // Other_Alphabetic is the set of Unicode characters with property Other_Alphabetic. + Other_Default_Ignorable_Code_Point = _Other_Default_Ignorable_Code_Point // Other_Default_Ignorable_Code_Point is the set of Unicode characters with property Other_Default_Ignorable_Code_Point. + Other_Grapheme_Extend = _Other_Grapheme_Extend // Other_Grapheme_Extend is the set of Unicode characters with property Other_Grapheme_Extend. + Other_ID_Continue = _Other_ID_Continue // Other_ID_Continue is the set of Unicode characters with property Other_ID_Continue. + Other_ID_Start = _Other_ID_Start // Other_ID_Start is the set of Unicode characters with property Other_ID_Start. + Other_Lowercase = _Other_Lowercase // Other_Lowercase is the set of Unicode characters with property Other_Lowercase. + Other_Math = _Other_Math // Other_Math is the set of Unicode characters with property Other_Math. + Other_Uppercase = _Other_Uppercase // Other_Uppercase is the set of Unicode characters with property Other_Uppercase. + Pattern_Syntax = _Pattern_Syntax // Pattern_Syntax is the set of Unicode characters with property Pattern_Syntax. + Pattern_White_Space = _Pattern_White_Space // Pattern_White_Space is the set of Unicode characters with property Pattern_White_Space. + Quotation_Mark = _Quotation_Mark // Quotation_Mark is the set of Unicode characters with property Quotation_Mark. + Radical = _Radical // Radical is the set of Unicode characters with property Radical. + STerm = _STerm // STerm is the set of Unicode characters with property STerm. + Soft_Dotted = _Soft_Dotted // Soft_Dotted is the set of Unicode characters with property Soft_Dotted. + Terminal_Punctuation = _Terminal_Punctuation // Terminal_Punctuation is the set of Unicode characters with property Terminal_Punctuation. + Unified_Ideograph = _Unified_Ideograph // Unified_Ideograph is the set of Unicode characters with property Unified_Ideograph. + Variation_Selector = _Variation_Selector // Variation_Selector is the set of Unicode characters with property Variation_Selector. + White_Space = _White_Space // White_Space is the set of Unicode characters with property White_Space. ) // Generated by running |
