summaryrefslogtreecommitdiff
path: root/src/pkg/unicode/maketables.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/unicode/maketables.go')
-rw-r--r--src/pkg/unicode/maketables.go85
1 files changed, 55 insertions, 30 deletions
diff --git a/src/pkg/unicode/maketables.go b/src/pkg/unicode/maketables.go
index 07b931d7e..b586bc655 100644
--- a/src/pkg/unicode/maketables.go
+++ b/src/pkg/unicode/maketables.go
@@ -14,6 +14,7 @@ import (
"http"
"log"
"os"
+ "path/filepath"
"sort"
"strconv"
"strings"
@@ -54,10 +55,47 @@ var cases = flag.Bool("cases",
var test = flag.Bool("test",
false,
"test existing tables; can be used to compare web data with package data")
+var localFiles = flag.Bool("local",
+ false,
+ "data files have been copied to current directory; for debugging only")
var scriptRe = regexp.MustCompile(`^([0-9A-F]+)(\.\.[0-9A-F]+)? *; ([A-Za-z_]+)$`)
var logger = log.New(os.Stderr, "", log.Lshortfile)
+type reader struct {
+ *bufio.Reader
+ fd *os.File
+ resp *http.Response
+}
+
+func open(url string) *reader {
+ file := filepath.Base(url)
+ if *localFiles {
+ fd, err := os.Open(file)
+ if err != nil {
+ logger.Fatal(err)
+ }
+ return &reader{bufio.NewReader(fd), fd, nil}
+ }
+ resp, err := http.Get(*dataURL)
+ if err != nil {
+ logger.Fatal(err)
+ }
+ if resp.StatusCode != 200 {
+ logger.Fatalf("bad GET status for %s: %d", file, resp.Status)
+ }
+ return &reader{bufio.NewReader(resp.Body), nil, resp}
+
+}
+
+func (r *reader) close() {
+ if r.fd != nil {
+ r.fd.Close()
+ } else {
+ r.resp.Body.Close()
+ }
+}
+
var category = map[string]bool{
// Nd Lu etc.
// We use one-character names to identify merged categories
@@ -192,7 +230,7 @@ func parseCategory(line string) (state State) {
char.letter(field[FSimpleUppercaseMapping], field[FCodePoint], field[FSimpleTitlecaseMapping])
case "Lt":
char.letter(field[FSimpleUppercaseMapping], field[FSimpleLowercaseMapping], field[FCodePoint])
- case "Lm", "Lo":
+ default:
char.letter(field[FSimpleUppercaseMapping], field[FSimpleLowercaseMapping], field[FSimpleTitlecaseMapping])
}
switch {
@@ -272,14 +310,7 @@ func loadChars() {
if *dataURL == "" {
flag.Set("data", *url+"UnicodeData.txt")
}
- resp, err := http.Get(*dataURL)
- if err != nil {
- logger.Fatal(err)
- }
- if resp.StatusCode != 200 {
- logger.Fatal("bad GET status for UnicodeData.txt", resp.Status)
- }
- input := bufio.NewReader(resp.Body)
+ input := open(*dataURL)
var first uint32 = 0
for {
line, err := input.ReadString('\n')
@@ -310,21 +341,14 @@ func loadChars() {
first = 0
}
}
- resp.Body.Close()
+ input.close()
}
func loadCasefold() {
if *casefoldingURL == "" {
flag.Set("casefolding", *url+"CaseFolding.txt")
}
- resp, err := http.Get(*casefoldingURL)
- if err != nil {
- logger.Fatal(err)
- }
- if resp.StatusCode != 200 {
- logger.Fatal("bad GET status for CaseFolding.txt", resp.Status)
- }
- input := bufio.NewReader(resp.Body)
+ input := open(*casefoldingURL)
for {
line, err := input.ReadString('\n')
if err != nil {
@@ -355,7 +379,7 @@ func loadCasefold() {
}
chars[p1].foldCase = int(p2)
}
- resp.Body.Close()
+ input.close()
}
const progHeader = `// Generated by running
@@ -366,7 +390,6 @@ package unicode
`
-
func printCategories() {
if *tablelist == "" {
return
@@ -663,15 +686,7 @@ func printScriptOrProperty(doProps bool) {
if flaglist == "" {
return
}
- var err os.Error
- resp, err := http.Get(*url + file)
- if err != nil {
- logger.Fatal(err)
- }
- if resp.StatusCode != 200 {
- logger.Fatal("bad GET status for ", file, ":", resp.Status)
- }
- input := bufio.NewReader(resp.Body)
+ input := open(*url + file)
for {
line, err := input.ReadString('\n')
if err != nil {
@@ -682,7 +697,7 @@ func printScriptOrProperty(doProps bool) {
}
parseScript(line[0:len(line)-1], table)
}
- resp.Body.Close()
+ input.close()
// Find out which scripts to dump
list := strings.Split(flaglist, ",")
@@ -865,6 +880,16 @@ func getCaseState(i int) (c *caseState) {
case ch.titleCase:
c._case = CaseTitle
}
+ // Some things such as roman numeral U+2161 don't describe themselves
+ // as upper case, but have a lower case. Second-guess them.
+ if c._case == CaseNone && ch.lowerCase != 0 {
+ c._case = CaseUpper
+ }
+ // Same in the other direction.
+ if c._case == CaseNone && ch.upperCase != 0 {
+ c._case = CaseLower
+ }
+
if ch.upperCase != 0 {
c.deltaToUpper = ch.upperCase - i
}