diff options
Diffstat (limited to 'src/pkg/unicode/maketables.go')
-rw-r--r-- | src/pkg/unicode/maketables.go | 85 |
1 files changed, 55 insertions, 30 deletions
diff --git a/src/pkg/unicode/maketables.go b/src/pkg/unicode/maketables.go index 07b931d7e..b586bc655 100644 --- a/src/pkg/unicode/maketables.go +++ b/src/pkg/unicode/maketables.go @@ -14,6 +14,7 @@ import ( "http" "log" "os" + "path/filepath" "sort" "strconv" "strings" @@ -54,10 +55,47 @@ var cases = flag.Bool("cases", var test = flag.Bool("test", false, "test existing tables; can be used to compare web data with package data") +var localFiles = flag.Bool("local", + false, + "data files have been copied to current directory; for debugging only") var scriptRe = regexp.MustCompile(`^([0-9A-F]+)(\.\.[0-9A-F]+)? *; ([A-Za-z_]+)$`) var logger = log.New(os.Stderr, "", log.Lshortfile) +type reader struct { + *bufio.Reader + fd *os.File + resp *http.Response +} + +func open(url string) *reader { + file := filepath.Base(url) + if *localFiles { + fd, err := os.Open(file) + if err != nil { + logger.Fatal(err) + } + return &reader{bufio.NewReader(fd), fd, nil} + } + resp, err := http.Get(*dataURL) + if err != nil { + logger.Fatal(err) + } + if resp.StatusCode != 200 { + logger.Fatalf("bad GET status for %s: %d", file, resp.Status) + } + return &reader{bufio.NewReader(resp.Body), nil, resp} + +} + +func (r *reader) close() { + if r.fd != nil { + r.fd.Close() + } else { + r.resp.Body.Close() + } +} + var category = map[string]bool{ // Nd Lu etc. // We use one-character names to identify merged categories @@ -192,7 +230,7 @@ func parseCategory(line string) (state State) { char.letter(field[FSimpleUppercaseMapping], field[FCodePoint], field[FSimpleTitlecaseMapping]) case "Lt": char.letter(field[FSimpleUppercaseMapping], field[FSimpleLowercaseMapping], field[FCodePoint]) - case "Lm", "Lo": + default: char.letter(field[FSimpleUppercaseMapping], field[FSimpleLowercaseMapping], field[FSimpleTitlecaseMapping]) } switch { @@ -272,14 +310,7 @@ func loadChars() { if *dataURL == "" { flag.Set("data", *url+"UnicodeData.txt") } - resp, err := http.Get(*dataURL) - if err != nil { - logger.Fatal(err) - } - if resp.StatusCode != 200 { - logger.Fatal("bad GET status for UnicodeData.txt", resp.Status) - } - input := bufio.NewReader(resp.Body) + input := open(*dataURL) var first uint32 = 0 for { line, err := input.ReadString('\n') @@ -310,21 +341,14 @@ func loadChars() { first = 0 } } - resp.Body.Close() + input.close() } func loadCasefold() { if *casefoldingURL == "" { flag.Set("casefolding", *url+"CaseFolding.txt") } - resp, err := http.Get(*casefoldingURL) - if err != nil { - logger.Fatal(err) - } - if resp.StatusCode != 200 { - logger.Fatal("bad GET status for CaseFolding.txt", resp.Status) - } - input := bufio.NewReader(resp.Body) + input := open(*casefoldingURL) for { line, err := input.ReadString('\n') if err != nil { @@ -355,7 +379,7 @@ func loadCasefold() { } chars[p1].foldCase = int(p2) } - resp.Body.Close() + input.close() } const progHeader = `// Generated by running @@ -366,7 +390,6 @@ package unicode ` - func printCategories() { if *tablelist == "" { return @@ -663,15 +686,7 @@ func printScriptOrProperty(doProps bool) { if flaglist == "" { return } - var err os.Error - resp, err := http.Get(*url + file) - if err != nil { - logger.Fatal(err) - } - if resp.StatusCode != 200 { - logger.Fatal("bad GET status for ", file, ":", resp.Status) - } - input := bufio.NewReader(resp.Body) + input := open(*url + file) for { line, err := input.ReadString('\n') if err != nil { @@ -682,7 +697,7 @@ func printScriptOrProperty(doProps bool) { } parseScript(line[0:len(line)-1], table) } - resp.Body.Close() + input.close() // Find out which scripts to dump list := strings.Split(flaglist, ",") @@ -865,6 +880,16 @@ func getCaseState(i int) (c *caseState) { case ch.titleCase: c._case = CaseTitle } + // Some things such as roman numeral U+2161 don't describe themselves + // as upper case, but have a lower case. Second-guess them. + if c._case == CaseNone && ch.lowerCase != 0 { + c._case = CaseUpper + } + // Same in the other direction. + if c._case == CaseNone && ch.upperCase != 0 { + c._case = CaseLower + } + if ch.upperCase != 0 { c.deltaToUpper = ch.upperCase - i } |