summaryrefslogtreecommitdiff
path: root/src/lib/go
diff options
context:
space:
mode:
authorRobert Griesemer <gri@golang.org>2009-03-10 17:08:05 -0700
committerRobert Griesemer <gri@golang.org>2009-03-10 17:08:05 -0700
commitafd59f83d837d03770516ad2d0b7aa7b2bcde8b4 (patch)
tree83719ec0f501d778d5edc4e4a68f9402078c786b /src/lib/go
parent7b0b557b34b7285c840f941cfe22c9ed07342314 (diff)
downloadgolang-afd59f83d837d03770516ad2d0b7aa7b2bcde8b4.tar.gz
- allow unicode digits in identifiers
- fixed a bug with character escapes (before: allowed arbitrary long sequences) R=r DELTA=63 (33 added, 19 deleted, 11 changed) OCL=26010 CL=26070
Diffstat (limited to 'src/lib/go')
-rw-r--r--src/lib/go/scanner.go63
-rw-r--r--src/lib/go/scanner_test.go7
2 files changed, 42 insertions, 28 deletions
diff --git a/src/lib/go/scanner.go b/src/lib/go/scanner.go
index 94391c044..90078e0dd 100644
--- a/src/lib/go/scanner.go
+++ b/src/lib/go/scanner.go
@@ -60,25 +60,6 @@ type Scanner struct {
}
-func isLetter(ch int) bool {
- return
- 'a' <= ch && ch <= 'z' ||
- 'A' <= ch && ch <= 'Z' ||
- ch == '_' ||
- ch >= 0x80 && unicode.IsLetter(ch);
-}
-
-
-func digitVal(ch int) int {
- switch {
- case '0' <= ch && ch <= '9': return ch - '0';
- case 'a' <= ch && ch <= 'f': return ch - 'a' + 10;
- case 'A' <= ch && ch <= 'F': return ch - 'A' + 10;
- }
- return 16; // larger than any legal digit val
-}
-
-
// Read the next Unicode char into S.ch.
// S.ch < 0 means end-of-file.
func (S *Scanner) next() {
@@ -195,9 +176,25 @@ func (S *Scanner) scanComment() []byte {
}
+func isLetter(ch int) bool {
+ return
+ 'a' <= ch && ch <= 'z' ||
+ 'A' <= ch && ch <= 'Z' ||
+ ch == '_' ||
+ ch >= 0x80 && unicode.IsLetter(ch);
+}
+
+
+func isDigit(ch int) bool {
+ return
+ '0' <= ch && ch <= '9' ||
+ ch >= 0x80 && unicode.IsDecimalDigit(ch);
+}
+
+
func (S *Scanner) scanIdentifier() (tok int, lit []byte) {
pos := S.chpos;
- for isLetter(S.ch) || digitVal(S.ch) < 10 {
+ for isLetter(S.ch) || isDigit(S.ch) {
S.next();
}
lit = S.src[pos : S.chpos];
@@ -205,6 +202,16 @@ func (S *Scanner) scanIdentifier() (tok int, lit []byte) {
}
+func digitVal(ch int) int {
+ switch {
+ case '0' <= ch && ch <= '9': return ch - '0';
+ case 'a' <= ch && ch <= 'f': return ch - 'a' + 10;
+ case 'A' <= ch && ch <= 'F': return ch - 'A' + 10;
+ }
+ return 16; // larger than any legal digit val
+}
+
+
func (S *Scanner) scanMantissa(base int) {
for digitVal(S.ch) < base {
S.next();
@@ -270,12 +277,12 @@ exit:
}
-func (S *Scanner) scanDigits(n int, base int) {
- for digitVal(S.ch) < base {
+func (S *Scanner) scanDigits(base, length int) {
+ for length > 0 && digitVal(S.ch) < base {
S.next();
- n--;
+ length--;
}
- if n > 0 {
+ if length > 0 {
S.error(S.chpos, "illegal char escape");
}
}
@@ -289,13 +296,13 @@ func (S *Scanner) scanEscape(quote int) {
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
// nothing to do
case '0', '1', '2', '3', '4', '5', '6', '7':
- S.scanDigits(3 - 1, 8); // 1 char read already
+ S.scanDigits(8, 3 - 1); // 1 char read already
case 'x':
- S.scanDigits(2, 16);
+ S.scanDigits(16, 2);
case 'u':
- S.scanDigits(4, 16);
+ S.scanDigits(16, 4);
case 'U':
- S.scanDigits(8, 16);
+ S.scanDigits(16, 8);
default:
S.error(pos, "illegal char escape");
}
diff --git a/src/lib/go/scanner_test.go b/src/lib/go/scanner_test.go
index 136677cd0..9cad23914 100644
--- a/src/lib/go/scanner_test.go
+++ b/src/lib/go/scanner_test.go
@@ -45,6 +45,9 @@ var tokens = [...]elt{
// Identifiers and basic type literals
elt{ 0, token.IDENT, "foobar", literal },
+ elt{ 0, token.IDENT, "a۰۱۸", literal },
+ elt{ 0, token.IDENT, "foo६४", literal },
+ elt{ 0, token.IDENT, "bar9876", literal },
elt{ 0, token.INT, "0", literal },
elt{ 0, token.INT, "01234567", literal },
elt{ 0, token.INT, "0xcafebabe", literal },
@@ -56,6 +59,10 @@ var tokens = [...]elt{
elt{ 0, token.FLOAT, "1e-100", literal },
elt{ 0, token.FLOAT, "2.71828e-1000", literal },
elt{ 0, token.CHAR, "'a'", literal },
+ elt{ 0, token.CHAR, "'\\000'", literal },
+ elt{ 0, token.CHAR, "'\\xFF'", literal },
+ elt{ 0, token.CHAR, "'\\uff16'", literal },
+ elt{ 0, token.CHAR, "'\\U0000ff16'", literal },
elt{ 0, token.STRING, "`foobar`", literal },
// Operators and delimitors