1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
|
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package unicode
// Bit masks for each code point under U+0100, for fast lookup.
const (
pC = 1 << iota // a control character.
pP // a punctuation character.
pN // a numeral.
pS // a symbolic character.
pZ // a spacing character.
pLu // an upper-case letter.
pLl // a lower-case letter.
pp // a printable character according to Go's definition.
pg = pp | pZ // a graphical character according to the Unicode definition.
pLo = pLl | pLu // a letter that is neither upper nor lower case.
pLmask = pLo
)
// GraphicRanges defines the set of graphic characters according to Unicode.
var GraphicRanges = []*RangeTable{
L, M, N, P, S, Zs,
}
// PrintRanges defines the set of printable characters according to Go.
// ASCII space, U+0020, is handled separately.
var PrintRanges = []*RangeTable{
L, M, N, P, S,
}
// IsGraphic reports whether the rune is defined as a Graphic by Unicode.
// Such characters include letters, marks, numbers, punctuation, symbols, and
// spaces, from categories L, M, N, P, S, Zs.
func IsGraphic(r rune) bool {
// We convert to uint32 to avoid the extra test for negative,
// and in the index we convert to uint8 to avoid the range check.
if uint32(r) <= MaxLatin1 {
return properties[uint8(r)]&pg != 0
}
return In(r, GraphicRanges...)
}
// IsPrint reports whether the rune is defined as printable by Go. Such
// characters include letters, marks, numbers, punctuation, symbols, and the
// ASCII space character, from categories L, M, N, P, S and the ASCII space
// character. This categorization is the same as IsGraphic except that the
// only spacing character is ASCII space, U+0020.
func IsPrint(r rune) bool {
if uint32(r) <= MaxLatin1 {
return properties[uint8(r)]&pp != 0
}
return In(r, PrintRanges...)
}
// IsOneOf reports whether the rune is a member of one of the ranges.
// The function "In" provides a nicer signature and should be used in preference to IsOneOf.
func IsOneOf(ranges []*RangeTable, r rune) bool {
for _, inside := range ranges {
if Is(inside, r) {
return true
}
}
return false
}
// In reports whether the rune is a member of one of the ranges.
func In(r rune, ranges ...*RangeTable) bool {
for _, inside := range ranges {
if Is(inside, r) {
return true
}
}
return false
}
// IsControl reports whether the rune is a control character.
// The C (Other) Unicode category includes more code points
// such as surrogates; use Is(C, r) to test for them.
func IsControl(r rune) bool {
if uint32(r) <= MaxLatin1 {
return properties[uint8(r)]&pC != 0
}
// All control characters are < Latin1Max.
return false
}
// IsLetter reports whether the rune is a letter (category L).
func IsLetter(r rune) bool {
if uint32(r) <= MaxLatin1 {
return properties[uint8(r)]&(pLmask) != 0
}
return isExcludingLatin(Letter, r)
}
// IsMark reports whether the rune is a mark character (category M).
func IsMark(r rune) bool {
// There are no mark characters in Latin-1.
return isExcludingLatin(Mark, r)
}
// IsNumber reports whether the rune is a number (category N).
func IsNumber(r rune) bool {
if uint32(r) <= MaxLatin1 {
return properties[uint8(r)]&pN != 0
}
return isExcludingLatin(Number, r)
}
// IsPunct reports whether the rune is a Unicode punctuation character
// (category P).
func IsPunct(r rune) bool {
if uint32(r) <= MaxLatin1 {
return properties[uint8(r)]&pP != 0
}
return Is(Punct, r)
}
// IsSpace reports whether the rune is a space character as defined
// by Unicode's White Space property; in the Latin-1 space
// this is
// '\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP).
// Other definitions of spacing characters are set by category
// Z and property Pattern_White_Space.
func IsSpace(r rune) bool {
// This property isn't the same as Z; special-case it.
if uint32(r) <= MaxLatin1 {
switch r {
case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
return true
}
return false
}
return isExcludingLatin(White_Space, r)
}
// IsSymbol reports whether the rune is a symbolic character.
func IsSymbol(r rune) bool {
if uint32(r) <= MaxLatin1 {
return properties[uint8(r)]&pS != 0
}
return isExcludingLatin(Symbol, r)
}
|