diff options
Diffstat (limited to 'src/pkg/unicode/maketables.go')
-rw-r--r-- | src/pkg/unicode/maketables.go | 75 |
1 files changed, 43 insertions, 32 deletions
diff --git a/src/pkg/unicode/maketables.go b/src/pkg/unicode/maketables.go index fcd14fc73..53d8b967e 100644 --- a/src/pkg/unicode/maketables.go +++ b/src/pkg/unicode/maketables.go @@ -13,7 +13,6 @@ import ( "bufio" "flag" "fmt" - "io" "log" "net/http" "os" @@ -41,7 +40,7 @@ func main() { var dataURL = flag.String("data", "", "full URL for UnicodeData.txt; defaults to --url/UnicodeData.txt") var casefoldingURL = flag.String("casefolding", "", "full URL for CaseFolding.txt; defaults to --url/CaseFolding.txt") var url = flag.String("url", - "http://www.unicode.org/Public/6.0.0/ucd/", + "http://www.unicode.org/Public/6.2.0/ucd/", "URL of Unicode database directory") var tablelist = flag.String("tables", "all", @@ -321,16 +320,11 @@ func loadChars() { flag.Set("data", *url+"UnicodeData.txt") } input := open(*dataURL) + defer input.close() + scanner := bufio.NewScanner(input) var first rune = 0 - for { - line, err := input.ReadString('\n') - if err != nil { - if err == io.EOF { - break - } - logger.Fatal(err) - } - switch parseCategory(line[0 : len(line)-1]) { + for scanner.Scan() { + switch parseCategory(scanner.Text()) { case SNormal: if first != 0 { logger.Fatalf("bad state normal at %U", lastChar) @@ -351,7 +345,9 @@ func loadChars() { first = 0 } } - input.close() + if scanner.Err() != nil { + logger.Fatal(scanner.Err()) + } } func loadCasefold() { @@ -359,15 +355,11 @@ func loadCasefold() { flag.Set("casefolding", *url+"CaseFolding.txt") } input := open(*casefoldingURL) - for { - line, err := input.ReadString('\n') - if err != nil { - if err == io.EOF { - break - } - logger.Fatal(err) - } - if line[0] == '#' { + defer input.close() + scanner := bufio.NewScanner(input) + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 || line[0] == '#' || len(strings.TrimSpace(line)) == 0 { continue } field := strings.Split(line, "; ") @@ -389,7 +381,9 @@ func loadCasefold() { } chars[p1].foldCase = rune(p2) } - input.close() + if scanner.Err() != nil { + logger.Fatal(scanner.Err()) + } } const progHeader = `// Generated by running @@ -503,6 +497,7 @@ const format = "\t\t{0x%04x, 0x%04x, %d},\n" func dumpRange(header string, inCategory Op) { fmt.Print(header) next := rune(0) + latinOffset := 0 fmt.Print("\tR16: []Range16{\n") // one Range for each iteration count := &range16Count @@ -546,11 +541,17 @@ func dumpRange(header string, inCategory Op) { break } } + if uint32(hi) <= unicode.MaxLatin1 { + latinOffset++ + } size, count = printRange(uint32(lo), uint32(hi), uint32(stride), size, count) // next range: start looking where this range ends next = hi + 1 } fmt.Print("\t},\n") + if latinOffset > 0 { + fmt.Printf("\tLatinOffset: %d,\n", latinOffset) + } fmt.Print("}\n\n") } @@ -703,15 +704,12 @@ func printScriptOrProperty(doProps bool) { return } input := open(*url + file) - for { - line, err := input.ReadString('\n') - if err != nil { - if err == io.EOF { - break - } - logger.Fatal(err) - } - parseScript(line[0:len(line)-1], table) + scanner := bufio.NewScanner(input) + for scanner.Scan() { + parseScript(scanner.Text(), table) + } + if scanner.Err() != nil { + logger.Fatal(scanner.Err()) } input.close() @@ -760,14 +758,17 @@ func printScriptOrProperty(doProps bool) { } ndecl++ fmt.Printf("var _%s = &RangeTable {\n", name) - fmt.Print("\tR16: []Range16{\n") ranges := foldAdjacent(table[name]) + fmt.Print("\tR16: []Range16{\n") size := 16 count := &range16Count for _, s := range ranges { size, count = printRange(s.Lo, s.Hi, s.Stride, size, count) } fmt.Print("\t},\n") + if off := findLatinOffset(ranges); off > 0 { + fmt.Printf("\tLatinOffset: %d,\n", off) + } fmt.Print("}\n\n") } decl.Sort() @@ -779,6 +780,14 @@ func printScriptOrProperty(doProps bool) { fmt.Print(")\n\n") } +func findLatinOffset(ranges []unicode.Range32) int { + i := 0 + for i < len(ranges) && ranges[i].Hi <= unicode.MaxLatin1 { + i++ + } + return i +} + const ( CaseUpper = 1 << iota CaseLower @@ -1022,6 +1031,8 @@ func printLatinProperties() { property = "0" case "Ll": property = "pLl | pp" + case "Lo": + property = "pLo | pp" case "Lu": property = "pLu | pp" case "Nd", "No": |