summaryrefslogtreecommitdiff
path: root/src/pkg/unicode/maketables.go
diff options
context:
space:
mode:
authorOndřej Surý <ondrej@sury.org>2011-06-03 11:31:24 +0200
committerOndřej Surý <ondrej@sury.org>2011-06-03 11:38:02 +0200
commit13f4fcd5bf09c70942b6c85a2b919ffa1ca0c6a8 (patch)
tree717e1ceeef3a60af29e7897c7629d1502b797838 /src/pkg/unicode/maketables.go
parent6bf52070ef1028f7fcc98fad1e73795a7efd7ce7 (diff)
downloadgolang-13f4fcd5bf09c70942b6c85a2b919ffa1ca0c6a8.tar.gz
Imported Upstream version 2011.06.02
Diffstat (limited to 'src/pkg/unicode/maketables.go')
-rw-r--r--src/pkg/unicode/maketables.go114
1 files changed, 82 insertions, 32 deletions
diff --git a/src/pkg/unicode/maketables.go b/src/pkg/unicode/maketables.go
index 33a826862..c3cf32b48 100644
--- a/src/pkg/unicode/maketables.go
+++ b/src/pkg/unicode/maketables.go
@@ -28,6 +28,7 @@ func main() {
printScriptOrProperty(false)
printScriptOrProperty(true)
printCases()
+ printSizes()
}
var dataURL = flag.String("data", "", "full URL for UnicodeData.txt; defaults to --url/UnicodeData.txt")
@@ -278,16 +279,16 @@ func loadChars() {
switch parseCategory(line[0 : len(line)-1]) {
case SNormal:
if first != 0 {
- logger.Fatalf("bad state normal at U+%04X", lastChar)
+ logger.Fatalf("bad state normal at %U", lastChar)
}
case SFirst:
if first != 0 {
- logger.Fatalf("bad state first at U+%04X", lastChar)
+ logger.Fatalf("bad state first at %U", lastChar)
}
first = lastChar
case SLast:
if first == 0 {
- logger.Fatalf("bad state last at U+%04X", lastChar)
+ logger.Fatalf("bad state last at %U", lastChar)
}
for i := first + 1; i <= lastChar; i++ {
chars[i] = chars[first]
@@ -299,6 +300,15 @@ func loadChars() {
resp.Body.Close()
}
+const progHeader = `// Generated by running
+// maketables --tables=%s --data=%s
+// DO NOT EDIT
+
+package unicode
+
+`
+
+
func printCategories() {
if *tablelist == "" {
return
@@ -312,20 +322,14 @@ func printCategories() {
fullCategoryTest(list)
return
}
- fmt.Printf(
- "// Generated by running\n"+
- "// maketables --tables=%s --data=%s\n"+
- "// DO NOT EDIT\n\n"+
- "package unicode\n\n",
- *tablelist,
- *dataURL)
+ fmt.Printf(progHeader, *tablelist, *dataURL)
fmt.Println("// Version is the Unicode edition from which the tables are derived.")
fmt.Printf("const Version = %q\n\n", version())
if *tablelist == "all" {
fmt.Println("// Categories is the set of Unicode data tables.")
- fmt.Println("var Categories = map[string] []Range {")
+ fmt.Println("var Categories = map[string] *RangeTable {")
for k := range category {
fmt.Printf("\t%q: %s,\n", k, k)
}
@@ -364,12 +368,12 @@ func printCategories() {
ndecl++
if name == "letter" { // special case
dumpRange(
- "var letter = []Range {\n",
+ "var letter = &RangeTable{\n",
letterOp)
continue
}
dumpRange(
- fmt.Sprintf("var _%s = []Range {\n", name),
+ fmt.Sprintf("var _%s = &RangeTable{\n", name),
func(code int) bool { return chars[code].category == name })
}
decl.Sort()
@@ -382,12 +386,15 @@ func printCategories() {
type Op func(code int) bool
-const format = "\t{0x%04x, 0x%04x, %d},\n"
+const format = "\t\t{0x%04x, 0x%04x, %d},\n"
func dumpRange(header string, inCategory Op) {
fmt.Print(header)
next := 0
+ fmt.Print("\tR16: []Range16{\n")
// one Range for each iteration
+ count := &range16Count
+ size := 16
for {
// look for start of range
for next < len(chars) && !inCategory(next) {
@@ -427,13 +434,38 @@ func dumpRange(header string, inCategory Op) {
break
}
}
- fmt.Printf(format, lo, hi, stride)
+ size, count = printRange(uint32(lo), uint32(hi), uint32(stride), size, count)
// next range: start looking where this range ends
next = hi + 1
}
+ fmt.Print("\t},\n")
fmt.Print("}\n\n")
}
+func printRange(lo, hi, stride uint32, size int, count *int) (int, *int) {
+ if size == 16 && hi >= 1<<16 {
+ if lo < 1<<16 {
+ if lo+stride != hi {
+ log.Fatalf("unexpected straddle: %U %U %d", lo, hi, stride)
+ }
+ // No range contains U+FFFF as an instance, so split
+ // the range into two entries. That way we can maintain
+ // the invariant that R32 contains only >= 1<<16.
+ fmt.Printf(format, lo, lo, 1)
+ lo = hi
+ stride = 1
+ *count++
+ }
+ fmt.Print("\t},\n")
+ fmt.Print("\tR32: []Range32{\n")
+ size = 32
+ count = &range32Count
+ }
+ fmt.Printf(format, lo, hi, stride)
+ *count++
+ return size, count
+}
+
func fullCategoryTest(list []string) {
for _, name := range list {
if _, ok := category[name]; !ok {
@@ -454,12 +486,12 @@ func fullCategoryTest(list []string) {
}
}
-func verifyRange(name string, inCategory Op, table []unicode.Range) {
+func verifyRange(name string, inCategory Op, table *unicode.RangeTable) {
for i := range chars {
web := inCategory(i)
pkg := unicode.Is(table, i)
if web != pkg {
- fmt.Fprintf(os.Stderr, "%s: U+%04X: web=%t pkg=%t\n", name, i, web, pkg)
+ fmt.Fprintf(os.Stderr, "%s: %U: web=%t pkg=%t\n", name, i, web, pkg)
}
}
}
@@ -497,22 +529,22 @@ func parseScript(line string, scripts map[string][]Script) {
}
// The script tables have a lot of adjacent elements. Fold them together.
-func foldAdjacent(r []Script) []unicode.Range {
- s := make([]unicode.Range, 0, len(r))
+func foldAdjacent(r []Script) []unicode.Range32 {
+ s := make([]unicode.Range32, 0, len(r))
j := 0
for i := 0; i < len(r); i++ {
- if j > 0 && int(r[i].lo) == s[j-1].Hi+1 {
- s[j-1].Hi = int(r[i].hi)
+ if j > 0 && r[i].lo == s[j-1].Hi+1 {
+ s[j-1].Hi = r[i].hi
} else {
s = s[0 : j+1]
- s[j] = unicode.Range{int(r[i].lo), int(r[i].hi), 1}
+ s[j] = unicode.Range32{uint32(r[i].lo), uint32(r[i].hi), 1}
j++
}
}
return s
}
-func fullScriptTest(list []string, installed map[string][]unicode.Range, scripts map[string][]Script) {
+func fullScriptTest(list []string, installed map[string]*unicode.RangeTable, scripts map[string][]Script) {
for _, name := range list {
if _, ok := scripts[name]; !ok {
logger.Fatal("unknown script", name)
@@ -524,7 +556,7 @@ func fullScriptTest(list []string, installed map[string][]unicode.Range, scripts
for _, script := range scripts[name] {
for r := script.lo; r <= script.hi; r++ {
if !unicode.Is(installed[name], int(r)) {
- fmt.Fprintf(os.Stderr, "U+%04X: not in script %s\n", r, name)
+ fmt.Fprintf(os.Stderr, "%U: not in script %s\n", r, name)
}
}
}
@@ -589,10 +621,10 @@ func printScriptOrProperty(doProps bool) {
if flaglist == "all" {
if doProps {
fmt.Println("// Properties is the set of Unicode property tables.")
- fmt.Println("var Properties = map[string] []Range {")
+ fmt.Println("var Properties = map[string] *RangeTable{")
} else {
fmt.Println("// Scripts is the set of Unicode script tables.")
- fmt.Println("var Scripts = map[string] []Range {")
+ fmt.Println("var Scripts = map[string] *RangeTable{")
}
for k := range table {
fmt.Printf("\t%q: %s,\n", k, k)
@@ -613,11 +645,15 @@ func printScriptOrProperty(doProps bool) {
name, name, name, name)
}
ndecl++
- fmt.Printf("var _%s = []Range {\n", name)
+ fmt.Printf("var _%s = &RangeTable {\n", name)
+ fmt.Print("\tR16: []Range16{\n")
ranges := foldAdjacent(table[name])
+ size := 16
+ count := &range16Count
for _, s := range ranges {
- fmt.Printf(format, s.Lo, s.Hi, s.Stride)
+ size, count = printRange(s.Lo, s.Hi, s.Stride, size, count)
}
+ fmt.Print("\t},\n")
fmt.Print("}\n\n")
}
decl.Sort()
@@ -808,7 +844,7 @@ func printCaseRange(lo, hi *caseState) {
fmt.Printf("\t{0x%04X, 0x%04X, d{UpperLower, UpperLower, UpperLower}},\n",
lo.point, hi.point)
case hi.point > lo.point && lo.isLowerUpper():
- logger.Fatalf("LowerUpper sequence: should not happen: U+%04X. If it's real, need to fix To()", lo.point)
+ logger.Fatalf("LowerUpper sequence: should not happen: %U. If it's real, need to fix To()", lo.point)
fmt.Printf("\t{0x%04X, 0x%04X, d{LowerUpper, LowerUpper, LowerUpper}},\n",
lo.point, hi.point)
default:
@@ -831,17 +867,31 @@ func fullCaseTest() {
lower := unicode.ToLower(i)
want := caseIt(i, c.lowerCase)
if lower != want {
- fmt.Fprintf(os.Stderr, "lower U+%04X should be U+%04X is U+%04X\n", i, want, lower)
+ fmt.Fprintf(os.Stderr, "lower %U should be %U is %U\n", i, want, lower)
}
upper := unicode.ToUpper(i)
want = caseIt(i, c.upperCase)
if upper != want {
- fmt.Fprintf(os.Stderr, "upper U+%04X should be U+%04X is U+%04X\n", i, want, upper)
+ fmt.Fprintf(os.Stderr, "upper %U should be %U is %U\n", i, want, upper)
}
title := unicode.ToTitle(i)
want = caseIt(i, c.titleCase)
if title != want {
- fmt.Fprintf(os.Stderr, "title U+%04X should be U+%04X is U+%04X\n", i, want, title)
+ fmt.Fprintf(os.Stderr, "title %U should be %U is %U\n", i, want, title)
}
}
}
+
+var range16Count = 0 // Number of entries in the 16-bit range tables.
+var range32Count = 0 // Number of entries in the 32-bit range tables.
+
+func printSizes() {
+ if *test {
+ return
+ }
+ fmt.Println()
+ fmt.Printf("// Range entries: %d 16-bit, %d 32-bit, %d total.\n", range16Count, range32Count, range16Count+range32Count)
+ range16Bytes := range16Count * 3 * 2
+ range32Bytes := range32Count * 3 * 4
+ fmt.Printf("// Range bytes: %d 16-bit, %d 32-bit, %d total.\n", range16Bytes, range32Bytes, range16Bytes+range32Bytes)
+}