diff options
author | Robert Griesemer <gri@golang.org> | 2009-03-10 17:08:05 -0700 |
---|---|---|
committer | Robert Griesemer <gri@golang.org> | 2009-03-10 17:08:05 -0700 |
commit | afd59f83d837d03770516ad2d0b7aa7b2bcde8b4 (patch) | |
tree | 83719ec0f501d778d5edc4e4a68f9402078c786b /src/lib/go | |
parent | 7b0b557b34b7285c840f941cfe22c9ed07342314 (diff) | |
download | golang-afd59f83d837d03770516ad2d0b7aa7b2bcde8b4.tar.gz |
- allow unicode digits in identifiers
- fixed a bug with character escapes (before: allowed arbitrary long sequences)
R=r
DELTA=63 (33 added, 19 deleted, 11 changed)
OCL=26010
CL=26070
Diffstat (limited to 'src/lib/go')
-rw-r--r-- | src/lib/go/scanner.go | 63 | ||||
-rw-r--r-- | src/lib/go/scanner_test.go | 7 |
2 files changed, 42 insertions, 28 deletions
diff --git a/src/lib/go/scanner.go b/src/lib/go/scanner.go index 94391c044..90078e0dd 100644 --- a/src/lib/go/scanner.go +++ b/src/lib/go/scanner.go @@ -60,25 +60,6 @@ type Scanner struct { } -func isLetter(ch int) bool { - return - 'a' <= ch && ch <= 'z' || - 'A' <= ch && ch <= 'Z' || - ch == '_' || - ch >= 0x80 && unicode.IsLetter(ch); -} - - -func digitVal(ch int) int { - switch { - case '0' <= ch && ch <= '9': return ch - '0'; - case 'a' <= ch && ch <= 'f': return ch - 'a' + 10; - case 'A' <= ch && ch <= 'F': return ch - 'A' + 10; - } - return 16; // larger than any legal digit val -} - - // Read the next Unicode char into S.ch. // S.ch < 0 means end-of-file. func (S *Scanner) next() { @@ -195,9 +176,25 @@ func (S *Scanner) scanComment() []byte { } +func isLetter(ch int) bool { + return + 'a' <= ch && ch <= 'z' || + 'A' <= ch && ch <= 'Z' || + ch == '_' || + ch >= 0x80 && unicode.IsLetter(ch); +} + + +func isDigit(ch int) bool { + return + '0' <= ch && ch <= '9' || + ch >= 0x80 && unicode.IsDecimalDigit(ch); +} + + func (S *Scanner) scanIdentifier() (tok int, lit []byte) { pos := S.chpos; - for isLetter(S.ch) || digitVal(S.ch) < 10 { + for isLetter(S.ch) || isDigit(S.ch) { S.next(); } lit = S.src[pos : S.chpos]; @@ -205,6 +202,16 @@ func (S *Scanner) scanIdentifier() (tok int, lit []byte) { } +func digitVal(ch int) int { + switch { + case '0' <= ch && ch <= '9': return ch - '0'; + case 'a' <= ch && ch <= 'f': return ch - 'a' + 10; + case 'A' <= ch && ch <= 'F': return ch - 'A' + 10; + } + return 16; // larger than any legal digit val +} + + func (S *Scanner) scanMantissa(base int) { for digitVal(S.ch) < base { S.next(); @@ -270,12 +277,12 @@ exit: } -func (S *Scanner) scanDigits(n int, base int) { - for digitVal(S.ch) < base { +func (S *Scanner) scanDigits(base, length int) { + for length > 0 && digitVal(S.ch) < base { S.next(); - n--; + length--; } - if n > 0 { + if length > 0 { S.error(S.chpos, "illegal char escape"); } } @@ -289,13 +296,13 @@ func (S *Scanner) scanEscape(quote int) { case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote: // nothing to do case '0', '1', '2', '3', '4', '5', '6', '7': - S.scanDigits(3 - 1, 8); // 1 char read already + S.scanDigits(8, 3 - 1); // 1 char read already case 'x': - S.scanDigits(2, 16); + S.scanDigits(16, 2); case 'u': - S.scanDigits(4, 16); + S.scanDigits(16, 4); case 'U': - S.scanDigits(8, 16); + S.scanDigits(16, 8); default: S.error(pos, "illegal char escape"); } diff --git a/src/lib/go/scanner_test.go b/src/lib/go/scanner_test.go index 136677cd0..9cad23914 100644 --- a/src/lib/go/scanner_test.go +++ b/src/lib/go/scanner_test.go @@ -45,6 +45,9 @@ var tokens = [...]elt{ // Identifiers and basic type literals elt{ 0, token.IDENT, "foobar", literal }, + elt{ 0, token.IDENT, "a۰۱۸", literal }, + elt{ 0, token.IDENT, "foo६४", literal }, + elt{ 0, token.IDENT, "bar9876", literal }, elt{ 0, token.INT, "0", literal }, elt{ 0, token.INT, "01234567", literal }, elt{ 0, token.INT, "0xcafebabe", literal }, @@ -56,6 +59,10 @@ var tokens = [...]elt{ elt{ 0, token.FLOAT, "1e-100", literal }, elt{ 0, token.FLOAT, "2.71828e-1000", literal }, elt{ 0, token.CHAR, "'a'", literal }, + elt{ 0, token.CHAR, "'\\000'", literal }, + elt{ 0, token.CHAR, "'\\xFF'", literal }, + elt{ 0, token.CHAR, "'\\uff16'", literal }, + elt{ 0, token.CHAR, "'\\U0000ff16'", literal }, elt{ 0, token.STRING, "`foobar`", literal }, // Operators and delimitors |