summaryrefslogtreecommitdiff
path: root/src/pkg/unicode/maketables.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/unicode/maketables.go')
-rw-r--r--src/pkg/unicode/maketables.go75
1 files changed, 43 insertions, 32 deletions
diff --git a/src/pkg/unicode/maketables.go b/src/pkg/unicode/maketables.go
index fcd14fc73..53d8b967e 100644
--- a/src/pkg/unicode/maketables.go
+++ b/src/pkg/unicode/maketables.go
@@ -13,7 +13,6 @@ import (
"bufio"
"flag"
"fmt"
- "io"
"log"
"net/http"
"os"
@@ -41,7 +40,7 @@ func main() {
var dataURL = flag.String("data", "", "full URL for UnicodeData.txt; defaults to --url/UnicodeData.txt")
var casefoldingURL = flag.String("casefolding", "", "full URL for CaseFolding.txt; defaults to --url/CaseFolding.txt")
var url = flag.String("url",
- "http://www.unicode.org/Public/6.0.0/ucd/",
+ "http://www.unicode.org/Public/6.2.0/ucd/",
"URL of Unicode database directory")
var tablelist = flag.String("tables",
"all",
@@ -321,16 +320,11 @@ func loadChars() {
flag.Set("data", *url+"UnicodeData.txt")
}
input := open(*dataURL)
+ defer input.close()
+ scanner := bufio.NewScanner(input)
var first rune = 0
- for {
- line, err := input.ReadString('\n')
- if err != nil {
- if err == io.EOF {
- break
- }
- logger.Fatal(err)
- }
- switch parseCategory(line[0 : len(line)-1]) {
+ for scanner.Scan() {
+ switch parseCategory(scanner.Text()) {
case SNormal:
if first != 0 {
logger.Fatalf("bad state normal at %U", lastChar)
@@ -351,7 +345,9 @@ func loadChars() {
first = 0
}
}
- input.close()
+ if scanner.Err() != nil {
+ logger.Fatal(scanner.Err())
+ }
}
func loadCasefold() {
@@ -359,15 +355,11 @@ func loadCasefold() {
flag.Set("casefolding", *url+"CaseFolding.txt")
}
input := open(*casefoldingURL)
- for {
- line, err := input.ReadString('\n')
- if err != nil {
- if err == io.EOF {
- break
- }
- logger.Fatal(err)
- }
- if line[0] == '#' {
+ defer input.close()
+ scanner := bufio.NewScanner(input)
+ for scanner.Scan() {
+ line := scanner.Text()
+ if len(line) == 0 || line[0] == '#' || len(strings.TrimSpace(line)) == 0 {
continue
}
field := strings.Split(line, "; ")
@@ -389,7 +381,9 @@ func loadCasefold() {
}
chars[p1].foldCase = rune(p2)
}
- input.close()
+ if scanner.Err() != nil {
+ logger.Fatal(scanner.Err())
+ }
}
const progHeader = `// Generated by running
@@ -503,6 +497,7 @@ const format = "\t\t{0x%04x, 0x%04x, %d},\n"
func dumpRange(header string, inCategory Op) {
fmt.Print(header)
next := rune(0)
+ latinOffset := 0
fmt.Print("\tR16: []Range16{\n")
// one Range for each iteration
count := &range16Count
@@ -546,11 +541,17 @@ func dumpRange(header string, inCategory Op) {
break
}
}
+ if uint32(hi) <= unicode.MaxLatin1 {
+ latinOffset++
+ }
size, count = printRange(uint32(lo), uint32(hi), uint32(stride), size, count)
// next range: start looking where this range ends
next = hi + 1
}
fmt.Print("\t},\n")
+ if latinOffset > 0 {
+ fmt.Printf("\tLatinOffset: %d,\n", latinOffset)
+ }
fmt.Print("}\n\n")
}
@@ -703,15 +704,12 @@ func printScriptOrProperty(doProps bool) {
return
}
input := open(*url + file)
- for {
- line, err := input.ReadString('\n')
- if err != nil {
- if err == io.EOF {
- break
- }
- logger.Fatal(err)
- }
- parseScript(line[0:len(line)-1], table)
+ scanner := bufio.NewScanner(input)
+ for scanner.Scan() {
+ parseScript(scanner.Text(), table)
+ }
+ if scanner.Err() != nil {
+ logger.Fatal(scanner.Err())
}
input.close()
@@ -760,14 +758,17 @@ func printScriptOrProperty(doProps bool) {
}
ndecl++
fmt.Printf("var _%s = &RangeTable {\n", name)
- fmt.Print("\tR16: []Range16{\n")
ranges := foldAdjacent(table[name])
+ fmt.Print("\tR16: []Range16{\n")
size := 16
count := &range16Count
for _, s := range ranges {
size, count = printRange(s.Lo, s.Hi, s.Stride, size, count)
}
fmt.Print("\t},\n")
+ if off := findLatinOffset(ranges); off > 0 {
+ fmt.Printf("\tLatinOffset: %d,\n", off)
+ }
fmt.Print("}\n\n")
}
decl.Sort()
@@ -779,6 +780,14 @@ func printScriptOrProperty(doProps bool) {
fmt.Print(")\n\n")
}
+func findLatinOffset(ranges []unicode.Range32) int {
+ i := 0
+ for i < len(ranges) && ranges[i].Hi <= unicode.MaxLatin1 {
+ i++
+ }
+ return i
+}
+
const (
CaseUpper = 1 << iota
CaseLower
@@ -1022,6 +1031,8 @@ func printLatinProperties() {
property = "0"
case "Ll":
property = "pLl | pp"
+ case "Lo":
+ property = "pLo | pp"
case "Lu":
property = "pLu | pp"
case "Nd", "No":