summaryrefslogtreecommitdiff
path: root/src/unicode/graphic.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/unicode/graphic.go')
-rw-r--r--src/unicode/graphic.go144
1 files changed, 144 insertions, 0 deletions
diff --git a/src/unicode/graphic.go b/src/unicode/graphic.go
new file mode 100644
index 000000000..ba90b4e51
--- /dev/null
+++ b/src/unicode/graphic.go
@@ -0,0 +1,144 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unicode
+
+// Bit masks for each code point under U+0100, for fast lookup.
+const (
+ pC = 1 << iota // a control character.
+ pP // a punctuation character.
+ pN // a numeral.
+ pS // a symbolic character.
+ pZ // a spacing character.
+ pLu // an upper-case letter.
+ pLl // a lower-case letter.
+ pp // a printable character according to Go's definition.
+ pg = pp | pZ // a graphical character according to the Unicode definition.
+ pLo = pLl | pLu // a letter that is neither upper nor lower case.
+ pLmask = pLo
+)
+
+// GraphicRanges defines the set of graphic characters according to Unicode.
+var GraphicRanges = []*RangeTable{
+ L, M, N, P, S, Zs,
+}
+
+// PrintRanges defines the set of printable characters according to Go.
+// ASCII space, U+0020, is handled separately.
+var PrintRanges = []*RangeTable{
+ L, M, N, P, S,
+}
+
+// IsGraphic reports whether the rune is defined as a Graphic by Unicode.
+// Such characters include letters, marks, numbers, punctuation, symbols, and
+// spaces, from categories L, M, N, P, S, Zs.
+func IsGraphic(r rune) bool {
+ // We convert to uint32 to avoid the extra test for negative,
+ // and in the index we convert to uint8 to avoid the range check.
+ if uint32(r) <= MaxLatin1 {
+ return properties[uint8(r)]&pg != 0
+ }
+ return In(r, GraphicRanges...)
+}
+
+// IsPrint reports whether the rune is defined as printable by Go. Such
+// characters include letters, marks, numbers, punctuation, symbols, and the
+// ASCII space character, from categories L, M, N, P, S and the ASCII space
+// character. This categorization is the same as IsGraphic except that the
+// only spacing character is ASCII space, U+0020.
+func IsPrint(r rune) bool {
+ if uint32(r) <= MaxLatin1 {
+ return properties[uint8(r)]&pp != 0
+ }
+ return In(r, PrintRanges...)
+}
+
+// IsOneOf reports whether the rune is a member of one of the ranges.
+// The function "In" provides a nicer signature and should be used in preference to IsOneOf.
+func IsOneOf(ranges []*RangeTable, r rune) bool {
+ for _, inside := range ranges {
+ if Is(inside, r) {
+ return true
+ }
+ }
+ return false
+}
+
+// In reports whether the rune is a member of one of the ranges.
+func In(r rune, ranges ...*RangeTable) bool {
+ for _, inside := range ranges {
+ if Is(inside, r) {
+ return true
+ }
+ }
+ return false
+}
+
+// IsControl reports whether the rune is a control character.
+// The C (Other) Unicode category includes more code points
+// such as surrogates; use Is(C, r) to test for them.
+func IsControl(r rune) bool {
+ if uint32(r) <= MaxLatin1 {
+ return properties[uint8(r)]&pC != 0
+ }
+ // All control characters are < Latin1Max.
+ return false
+}
+
+// IsLetter reports whether the rune is a letter (category L).
+func IsLetter(r rune) bool {
+ if uint32(r) <= MaxLatin1 {
+ return properties[uint8(r)]&(pLmask) != 0
+ }
+ return isExcludingLatin(Letter, r)
+}
+
+// IsMark reports whether the rune is a mark character (category M).
+func IsMark(r rune) bool {
+ // There are no mark characters in Latin-1.
+ return isExcludingLatin(Mark, r)
+}
+
+// IsNumber reports whether the rune is a number (category N).
+func IsNumber(r rune) bool {
+ if uint32(r) <= MaxLatin1 {
+ return properties[uint8(r)]&pN != 0
+ }
+ return isExcludingLatin(Number, r)
+}
+
+// IsPunct reports whether the rune is a Unicode punctuation character
+// (category P).
+func IsPunct(r rune) bool {
+ if uint32(r) <= MaxLatin1 {
+ return properties[uint8(r)]&pP != 0
+ }
+ return Is(Punct, r)
+}
+
+// IsSpace reports whether the rune is a space character as defined
+// by Unicode's White Space property; in the Latin-1 space
+// this is
+// '\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP).
+// Other definitions of spacing characters are set by category
+// Z and property Pattern_White_Space.
+func IsSpace(r rune) bool {
+ // This property isn't the same as Z; special-case it.
+ if uint32(r) <= MaxLatin1 {
+ switch r {
+ case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
+ return true
+ }
+ return false
+ }
+ return isExcludingLatin(White_Space, r)
+}
+
+// IsSymbol reports whether the rune is a symbolic character.
+func IsSymbol(r rune) bool {
+ if uint32(r) <= MaxLatin1 {
+ return properties[uint8(r)]&pS != 0
+ }
+ return isExcludingLatin(Symbol, r)
+}