diff options
Diffstat (limited to 'src/pkg/unicode/maketables.go')
-rw-r--r-- | src/pkg/unicode/maketables.go | 114 |
1 files changed, 82 insertions, 32 deletions
diff --git a/src/pkg/unicode/maketables.go b/src/pkg/unicode/maketables.go index 33a826862..c3cf32b48 100644 --- a/src/pkg/unicode/maketables.go +++ b/src/pkg/unicode/maketables.go @@ -28,6 +28,7 @@ func main() { printScriptOrProperty(false) printScriptOrProperty(true) printCases() + printSizes() } var dataURL = flag.String("data", "", "full URL for UnicodeData.txt; defaults to --url/UnicodeData.txt") @@ -278,16 +279,16 @@ func loadChars() { switch parseCategory(line[0 : len(line)-1]) { case SNormal: if first != 0 { - logger.Fatalf("bad state normal at U+%04X", lastChar) + logger.Fatalf("bad state normal at %U", lastChar) } case SFirst: if first != 0 { - logger.Fatalf("bad state first at U+%04X", lastChar) + logger.Fatalf("bad state first at %U", lastChar) } first = lastChar case SLast: if first == 0 { - logger.Fatalf("bad state last at U+%04X", lastChar) + logger.Fatalf("bad state last at %U", lastChar) } for i := first + 1; i <= lastChar; i++ { chars[i] = chars[first] @@ -299,6 +300,15 @@ func loadChars() { resp.Body.Close() } +const progHeader = `// Generated by running +// maketables --tables=%s --data=%s +// DO NOT EDIT + +package unicode + +` + + func printCategories() { if *tablelist == "" { return @@ -312,20 +322,14 @@ func printCategories() { fullCategoryTest(list) return } - fmt.Printf( - "// Generated by running\n"+ - "// maketables --tables=%s --data=%s\n"+ - "// DO NOT EDIT\n\n"+ - "package unicode\n\n", - *tablelist, - *dataURL) + fmt.Printf(progHeader, *tablelist, *dataURL) fmt.Println("// Version is the Unicode edition from which the tables are derived.") fmt.Printf("const Version = %q\n\n", version()) if *tablelist == "all" { fmt.Println("// Categories is the set of Unicode data tables.") - fmt.Println("var Categories = map[string] []Range {") + fmt.Println("var Categories = map[string] *RangeTable {") for k := range category { fmt.Printf("\t%q: %s,\n", k, k) } @@ -364,12 +368,12 @@ func printCategories() { ndecl++ if name == "letter" { // special case dumpRange( - "var letter = []Range {\n", + "var letter = &RangeTable{\n", letterOp) continue } dumpRange( - fmt.Sprintf("var _%s = []Range {\n", name), + fmt.Sprintf("var _%s = &RangeTable{\n", name), func(code int) bool { return chars[code].category == name }) } decl.Sort() @@ -382,12 +386,15 @@ func printCategories() { type Op func(code int) bool -const format = "\t{0x%04x, 0x%04x, %d},\n" +const format = "\t\t{0x%04x, 0x%04x, %d},\n" func dumpRange(header string, inCategory Op) { fmt.Print(header) next := 0 + fmt.Print("\tR16: []Range16{\n") // one Range for each iteration + count := &range16Count + size := 16 for { // look for start of range for next < len(chars) && !inCategory(next) { @@ -427,13 +434,38 @@ func dumpRange(header string, inCategory Op) { break } } - fmt.Printf(format, lo, hi, stride) + size, count = printRange(uint32(lo), uint32(hi), uint32(stride), size, count) // next range: start looking where this range ends next = hi + 1 } + fmt.Print("\t},\n") fmt.Print("}\n\n") } +func printRange(lo, hi, stride uint32, size int, count *int) (int, *int) { + if size == 16 && hi >= 1<<16 { + if lo < 1<<16 { + if lo+stride != hi { + log.Fatalf("unexpected straddle: %U %U %d", lo, hi, stride) + } + // No range contains U+FFFF as an instance, so split + // the range into two entries. That way we can maintain + // the invariant that R32 contains only >= 1<<16. + fmt.Printf(format, lo, lo, 1) + lo = hi + stride = 1 + *count++ + } + fmt.Print("\t},\n") + fmt.Print("\tR32: []Range32{\n") + size = 32 + count = &range32Count + } + fmt.Printf(format, lo, hi, stride) + *count++ + return size, count +} + func fullCategoryTest(list []string) { for _, name := range list { if _, ok := category[name]; !ok { @@ -454,12 +486,12 @@ func fullCategoryTest(list []string) { } } -func verifyRange(name string, inCategory Op, table []unicode.Range) { +func verifyRange(name string, inCategory Op, table *unicode.RangeTable) { for i := range chars { web := inCategory(i) pkg := unicode.Is(table, i) if web != pkg { - fmt.Fprintf(os.Stderr, "%s: U+%04X: web=%t pkg=%t\n", name, i, web, pkg) + fmt.Fprintf(os.Stderr, "%s: %U: web=%t pkg=%t\n", name, i, web, pkg) } } } @@ -497,22 +529,22 @@ func parseScript(line string, scripts map[string][]Script) { } // The script tables have a lot of adjacent elements. Fold them together. -func foldAdjacent(r []Script) []unicode.Range { - s := make([]unicode.Range, 0, len(r)) +func foldAdjacent(r []Script) []unicode.Range32 { + s := make([]unicode.Range32, 0, len(r)) j := 0 for i := 0; i < len(r); i++ { - if j > 0 && int(r[i].lo) == s[j-1].Hi+1 { - s[j-1].Hi = int(r[i].hi) + if j > 0 && r[i].lo == s[j-1].Hi+1 { + s[j-1].Hi = r[i].hi } else { s = s[0 : j+1] - s[j] = unicode.Range{int(r[i].lo), int(r[i].hi), 1} + s[j] = unicode.Range32{uint32(r[i].lo), uint32(r[i].hi), 1} j++ } } return s } -func fullScriptTest(list []string, installed map[string][]unicode.Range, scripts map[string][]Script) { +func fullScriptTest(list []string, installed map[string]*unicode.RangeTable, scripts map[string][]Script) { for _, name := range list { if _, ok := scripts[name]; !ok { logger.Fatal("unknown script", name) @@ -524,7 +556,7 @@ func fullScriptTest(list []string, installed map[string][]unicode.Range, scripts for _, script := range scripts[name] { for r := script.lo; r <= script.hi; r++ { if !unicode.Is(installed[name], int(r)) { - fmt.Fprintf(os.Stderr, "U+%04X: not in script %s\n", r, name) + fmt.Fprintf(os.Stderr, "%U: not in script %s\n", r, name) } } } @@ -589,10 +621,10 @@ func printScriptOrProperty(doProps bool) { if flaglist == "all" { if doProps { fmt.Println("// Properties is the set of Unicode property tables.") - fmt.Println("var Properties = map[string] []Range {") + fmt.Println("var Properties = map[string] *RangeTable{") } else { fmt.Println("// Scripts is the set of Unicode script tables.") - fmt.Println("var Scripts = map[string] []Range {") + fmt.Println("var Scripts = map[string] *RangeTable{") } for k := range table { fmt.Printf("\t%q: %s,\n", k, k) @@ -613,11 +645,15 @@ func printScriptOrProperty(doProps bool) { name, name, name, name) } ndecl++ - fmt.Printf("var _%s = []Range {\n", name) + fmt.Printf("var _%s = &RangeTable {\n", name) + fmt.Print("\tR16: []Range16{\n") ranges := foldAdjacent(table[name]) + size := 16 + count := &range16Count for _, s := range ranges { - fmt.Printf(format, s.Lo, s.Hi, s.Stride) + size, count = printRange(s.Lo, s.Hi, s.Stride, size, count) } + fmt.Print("\t},\n") fmt.Print("}\n\n") } decl.Sort() @@ -808,7 +844,7 @@ func printCaseRange(lo, hi *caseState) { fmt.Printf("\t{0x%04X, 0x%04X, d{UpperLower, UpperLower, UpperLower}},\n", lo.point, hi.point) case hi.point > lo.point && lo.isLowerUpper(): - logger.Fatalf("LowerUpper sequence: should not happen: U+%04X. If it's real, need to fix To()", lo.point) + logger.Fatalf("LowerUpper sequence: should not happen: %U. If it's real, need to fix To()", lo.point) fmt.Printf("\t{0x%04X, 0x%04X, d{LowerUpper, LowerUpper, LowerUpper}},\n", lo.point, hi.point) default: @@ -831,17 +867,31 @@ func fullCaseTest() { lower := unicode.ToLower(i) want := caseIt(i, c.lowerCase) if lower != want { - fmt.Fprintf(os.Stderr, "lower U+%04X should be U+%04X is U+%04X\n", i, want, lower) + fmt.Fprintf(os.Stderr, "lower %U should be %U is %U\n", i, want, lower) } upper := unicode.ToUpper(i) want = caseIt(i, c.upperCase) if upper != want { - fmt.Fprintf(os.Stderr, "upper U+%04X should be U+%04X is U+%04X\n", i, want, upper) + fmt.Fprintf(os.Stderr, "upper %U should be %U is %U\n", i, want, upper) } title := unicode.ToTitle(i) want = caseIt(i, c.titleCase) if title != want { - fmt.Fprintf(os.Stderr, "title U+%04X should be U+%04X is U+%04X\n", i, want, title) + fmt.Fprintf(os.Stderr, "title %U should be %U is %U\n", i, want, title) } } } + +var range16Count = 0 // Number of entries in the 16-bit range tables. +var range32Count = 0 // Number of entries in the 32-bit range tables. + +func printSizes() { + if *test { + return + } + fmt.Println() + fmt.Printf("// Range entries: %d 16-bit, %d 32-bit, %d total.\n", range16Count, range32Count, range16Count+range32Count) + range16Bytes := range16Count * 3 * 2 + range32Bytes := range32Count * 3 * 4 + fmt.Printf("// Range bytes: %d 16-bit, %d 32-bit, %d total.\n", range16Bytes, range32Bytes, range16Bytes+range32Bytes) +} |