diff options
Diffstat (limited to 'src/pkg/unicode/letter.go')
| -rw-r--r-- | src/pkg/unicode/letter.go | 90 |
1 files changed, 87 insertions, 3 deletions
diff --git a/src/pkg/unicode/letter.go b/src/pkg/unicode/letter.go index c625e9068..087c4ad3a 100644 --- a/src/pkg/unicode/letter.go +++ b/src/pkg/unicode/letter.go @@ -9,11 +9,39 @@ package unicode // The representation of a range of Unicode code points. The range runs from Lo to Hi // inclusive and has the specified stride. type Range struct { - Lo int; - Hi int; - Stride int; + Lo int; + Hi int; + Stride int; } +// The representation of a range of Unicode code points for case conversion. +// The range runs from Lo to Hi inclusive, with a fixed stride of 1. Deltas +// are the number to add to the code point to reach the code point for a +// different case for that character. They may be negative. If zero, it +// means the character is in the corresponding case. +type CaseRange struct { + Lo int; + Hi int; + Delta d; +} + +// Indices into the Delta arrays inside CaseRanges for case mapping. +const ( + UpperCase = iota; + LowerCase; + TitleCase; + MaxCase; +) +type d [MaxCase]int32 // to make the CaseRanges text shorter + +// If the Delta field of a CaseRange is UpperLower or LowerUpper, it means +// this CaseRange represents a sequence of the form (say) +// Upper Lower Upper Lower. +const ( + UpperLower = 1; + LowerUpper = -1; +) + // Is tests whether rune is in the specified table of ranges. func Is(ranges []Range, rune int) bool { // common case: rune is ASCII or Latin-1 @@ -80,3 +108,59 @@ func IsLetter(rune int) bool { } return Is(Letter, rune); } + +// To maps the rune to the specified case, UpperCase, LowerCase, or TitleCase +func To(_case int, rune int) int { + if _case < 0 || MaxCase <= _case { + return 0xFFFD // as reasonable an error as any + } + // binary search over ranges + lo := 0; + hi := len(CaseRanges); + for lo < hi { + m := lo + (hi - lo)/2; + r := CaseRanges[m]; + if r.Lo <= rune && rune <= r.Hi { + return rune + int(r.Delta[_case]); + } + if rune < r.Lo { + hi = m; + } else { + lo = m+1; + } + } + return rune; +} + +// ToUpper maps the rune to upper case +func ToUpper(rune int) int { + if rune < 0x80 { // quick ASCII check + if 'a' <= rune && rune <= 'z' { + rune &^= ' ' + } + return rune + } + return To(UpperCase, rune); +} + +// ToLower maps the rune to lower case +func ToLower(rune int) int { + if rune < 0x80 { // quick ASCII check + if 'A' <= rune && rune <= 'Z' { + rune |= ' ' + } + return rune + } + return To(LowerCase, rune); +} + +// ToTitle maps the rune to title case +func ToTitle(rune int) int { + if rune < 0x80 { // quick ASCII check + if 'a' <= rune && rune <= 'z' { // title case is upper case for ASCII + rune &^= ' ' + } + return rune + } + return To(TitleCase, rune); +} |
