summaryrefslogtreecommitdiff
path: root/src/lib/go
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/go')
-rw-r--r--src/lib/go/Makefile66
-rw-r--r--src/lib/go/scanner.go475
-rw-r--r--src/lib/go/scanner_test.go202
-rw-r--r--src/lib/go/token.go296
4 files changed, 1039 insertions, 0 deletions
diff --git a/src/lib/go/Makefile b/src/lib/go/Makefile
new file mode 100644
index 000000000..58b562171
--- /dev/null
+++ b/src/lib/go/Makefile
@@ -0,0 +1,66 @@
+# Copyright 2009 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# DO NOT EDIT. Automatically generated by gobuild.
+# gobuild -m >Makefile
+O=6
+GC=$(O)g
+CC=$(O)c -w
+AS=$(O)a
+AR=$(O)ar
+
+default: packages
+
+clean:
+ rm -f *.$O *.a $O.out
+
+test: packages
+ gotest
+
+coverage: packages
+ gotest
+ 6cov -g `pwd` | grep -v '_test\.go:'
+
+%.$O: %.go
+ $(GC) $*.go
+
+%.$O: %.c
+ $(CC) $*.c
+
+%.$O: %.s
+ $(AS) $*.s
+
+O1=\
+ token.$O\
+
+O2=\
+ scanner.$O\
+
+scanner.a: a1 a2
+token.a: a1 a2
+
+a1: $(O1)
+ $(AR) grc token.a token.$O
+ rm -f $(O1)
+
+a2: $(O2)
+ $(AR) grc scanner.a scanner.$O
+ rm -f $(O2)
+
+newpkg: clean
+ $(AR) grc scanner.a
+ $(AR) grc token.a
+
+$(O1): newpkg
+$(O2): a1
+
+nuke: clean
+ rm -f $(GOROOT)/pkg/scanner.a $(GOROOT)/pkg/token.a
+
+packages: scanner.a token.a
+
+install: packages
+ cp scanner.a $(GOROOT)/pkg/scanner.a
+ cp token.a $(GOROOT)/pkg/token.a
+
diff --git a/src/lib/go/scanner.go b/src/lib/go/scanner.go
new file mode 100644
index 000000000..ad7f80b5b
--- /dev/null
+++ b/src/lib/go/scanner.go
@@ -0,0 +1,475 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package scanner
+
+// A Go scanner. Takes a []byte as source which can then be
+// tokenized through repeated calls to the Scan() function.
+//
+// Sample use:
+//
+// import "token"
+// import "scanner"
+//
+// func tokenize(src []byte) {
+// var s scanner.Scanner;
+// s.Init(src, nil /* no error handler */, false /* ignore comments */);
+// for {
+// pos, tok, lit := s.Scan();
+// if tok == Scanner.EOF {
+// return;
+// }
+// println(pos, token.TokenString(tok), string(lit));
+// }
+// }
+
+import (
+ "utf8";
+ "unicode";
+ "strconv";
+ "token";
+)
+
+
+// An implementation of an ErrorHandler must be provided to the Scanner.
+// If a syntax error is encountered, Error() is called with the exact
+// token position (the byte position of the token in the source) and the
+// error message.
+
+type ErrorHandler interface {
+ Error(pos int, msg string);
+}
+
+
+type Scanner struct {
+ // immutable state
+ src []byte; // source
+ err ErrorHandler; // error reporting
+ scan_comments bool; // if set, comments are reported as tokens
+
+ // scanning state
+ pos int; // current reading position
+ ch int; // one char look-ahead
+ chpos int; // position of ch
+}
+
+
+func isLetter(ch int) bool {
+ return
+ 'a' <= ch && ch <= 'z' ||
+ 'A' <= ch && ch <= 'Z' ||
+ ch == '_' ||
+ ch >= 0x80 && unicode.IsLetter(ch);
+}
+
+
+func digitVal(ch int) int {
+ switch {
+ case '0' <= ch && ch <= '9': return ch - '0';
+ case 'a' <= ch && ch <= 'f': return ch - 'a' + 10;
+ case 'A' <= ch && ch <= 'F': return ch - 'A' + 10;
+ }
+ return 16; // larger than any legal digit val
+}
+
+
+// Read the next Unicode char into S.ch.
+// S.ch < 0 means end-of-file.
+func (S *Scanner) next() {
+ if S.pos < len(S.src) {
+ // assume ASCII
+ r, w := int(S.src[S.pos]), 1;
+ if r >= 0x80 {
+ // not ASCII
+ r, w = utf8.DecodeRune(S.src[S.pos : len(S.src)]);
+ }
+ S.ch = r;
+ S.chpos = S.pos;
+ S.pos += w;
+ } else {
+ S.ch = -1; // eof
+ S.chpos = len(S.src);
+ }
+}
+
+
+// Initialize the scanner.
+//
+// The error handler (err) is called when an illegal token is encountered.
+// If scan_comments is set to true, newline characters ('\n') and comments
+// are recognized as token.COMMENT, otherwise they are treated as white
+// space and ignored.
+
+func (S *Scanner) Init(src []byte, err ErrorHandler, scan_comments bool) {
+ S.src = src;
+ S.err = err;
+ S.scan_comments = scan_comments;
+ S.next();
+}
+
+
+func charString(ch int) string {
+ s := string(ch);
+ switch ch {
+ case '\a': s = `\a`;
+ case '\b': s = `\b`;
+ case '\f': s = `\f`;
+ case '\n': s = `\n`;
+ case '\r': s = `\r`;
+ case '\t': s = `\t`;
+ case '\v': s = `\v`;
+ case '\\': s = `\\`;
+ case '\'': s = `\'`;
+ }
+ return "'" + s + "' (U+" + strconv.Itob(ch, 16) + ")";
+}
+
+
+func (S *Scanner) error(pos int, msg string) {
+ S.err.Error(pos, msg);
+}
+
+
+func (S *Scanner) expect(ch int) {
+ if S.ch != ch {
+ S.error(S.chpos, "expected " + charString(ch) + ", found " + charString(S.ch));
+ }
+ S.next(); // always make progress
+}
+
+
+func (S *Scanner) skipWhitespace() {
+ for {
+ switch S.ch {
+ case '\t', '\r', ' ':
+ // nothing to do
+ case '\n':
+ if S.scan_comments {
+ return;
+ }
+ default:
+ return;
+ }
+ S.next();
+ }
+ panic("UNREACHABLE");
+}
+
+
+func (S *Scanner) scanComment() []byte {
+ // first '/' already consumed
+ pos := S.chpos - 1;
+
+ if S.ch == '/' {
+ //-style comment
+ for S.ch >= 0 {
+ S.next();
+ if S.ch == '\n' {
+ // '\n' terminates comment but we do not include
+ // it in the comment (otherwise we don't see the
+ // start of a newline in skipWhitespace()).
+ return S.src[pos : S.chpos];
+ }
+ }
+
+ } else {
+ /*-style comment */
+ S.expect('*');
+ for S.ch >= 0 {
+ ch := S.ch;
+ S.next();
+ if ch == '*' && S.ch == '/' {
+ S.next();
+ return S.src[pos : S.chpos];
+ }
+ }
+ }
+
+ S.error(pos, "comment not terminated");
+ return S.src[pos : S.chpos];
+}
+
+
+func (S *Scanner) scanIdentifier() (tok int, lit []byte) {
+ pos := S.chpos;
+ for isLetter(S.ch) || digitVal(S.ch) < 10 {
+ S.next();
+ }
+ lit = S.src[pos : S.chpos];
+ return token.Lookup(lit), lit;
+}
+
+
+func (S *Scanner) scanMantissa(base int) {
+ for digitVal(S.ch) < base {
+ S.next();
+ }
+}
+
+
+func (S *Scanner) scanNumber(seen_decimal_point bool) (tok int, lit []byte) {
+ pos := S.chpos;
+ tok = token.INT;
+
+ if seen_decimal_point {
+ tok = token.FLOAT;
+ pos--; // '.' is one byte
+ S.scanMantissa(10);
+ goto exponent;
+ }
+
+ if S.ch == '0' {
+ // int or float
+ S.next();
+ if S.ch == 'x' || S.ch == 'X' {
+ // hexadecimal int
+ S.next();
+ S.scanMantissa(16);
+ } else {
+ // octal int or float
+ S.scanMantissa(8);
+ if digitVal(S.ch) < 10 || S.ch == '.' || S.ch == 'e' || S.ch == 'E' {
+ // float
+ tok = token.FLOAT;
+ goto mantissa;
+ }
+ // octal int
+ }
+ goto exit;
+ }
+
+mantissa:
+ // decimal int or float
+ S.scanMantissa(10);
+
+ if S.ch == '.' {
+ // float
+ tok = token.FLOAT;
+ S.next();
+ S.scanMantissa(10)
+ }
+
+exponent:
+ if S.ch == 'e' || S.ch == 'E' {
+ // float
+ tok = token.FLOAT;
+ S.next();
+ if S.ch == '-' || S.ch == '+' {
+ S.next();
+ }
+ S.scanMantissa(10);
+ }
+
+exit:
+ return tok, S.src[pos : S.chpos];
+}
+
+
+func (S *Scanner) scanDigits(n int, base int) {
+ for digitVal(S.ch) < base {
+ S.next();
+ n--;
+ }
+ if n > 0 {
+ S.error(S.chpos, "illegal char escape");
+ }
+}
+
+
+func (S *Scanner) scanEscape(quote int) {
+ ch := S.ch;
+ pos := S.chpos;
+ S.next();
+ switch ch {
+ case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
+ // nothing to do
+ case '0', '1', '2', '3', '4', '5', '6', '7':
+ S.scanDigits(3 - 1, 8); // 1 char read already
+ case 'x':
+ S.scanDigits(2, 16);
+ case 'u':
+ S.scanDigits(4, 16);
+ case 'U':
+ S.scanDigits(8, 16);
+ default:
+ S.error(pos, "illegal char escape");
+ }
+}
+
+
+func (S *Scanner) scanChar() []byte {
+ // '\'' already consumed
+
+ pos := S.chpos - 1;
+ ch := S.ch;
+ S.next();
+ if ch == '\\' {
+ S.scanEscape('\'');
+ }
+
+ S.expect('\'');
+ return S.src[pos : S.chpos];
+}
+
+
+func (S *Scanner) scanString() []byte {
+ // '"' already consumed
+
+ pos := S.chpos - 1;
+ for S.ch != '"' {
+ ch := S.ch;
+ S.next();
+ if ch == '\n' || ch < 0 {
+ S.error(pos, "string not terminated");
+ break;
+ }
+ if ch == '\\' {
+ S.scanEscape('"');
+ }
+ }
+
+ S.next();
+ return S.src[pos : S.chpos];
+}
+
+
+func (S *Scanner) scanRawString() []byte {
+ // '`' already consumed
+
+ pos := S.chpos - 1;
+ for S.ch != '`' {
+ ch := S.ch;
+ S.next();
+ if ch == '\n' || ch < 0 {
+ S.error(pos, "string not terminated");
+ break;
+ }
+ }
+
+ S.next();
+ return S.src[pos : S.chpos];
+}
+
+
+// Helper functions for scanning multi-byte tokens such as >> += >>= .
+// Different routines recognize different length tok_i based on matches
+// of ch_i. If a token ends in '=', the result is tok1 or tok3
+// respectively. Otherwise, the result is tok0 if there was no other
+// matching character, or tok2 if the matching character was ch2.
+
+func (S *Scanner) switch2(tok0, tok1 int) int {
+ if S.ch == '=' {
+ S.next();
+ return tok1;
+ }
+ return tok0;
+}
+
+
+func (S *Scanner) switch3(tok0, tok1, ch2, tok2 int) int {
+ if S.ch == '=' {
+ S.next();
+ return tok1;
+ }
+ if S.ch == ch2 {
+ S.next();
+ return tok2;
+ }
+ return tok0;
+}
+
+
+func (S *Scanner) switch4(tok0, tok1, ch2, tok2, tok3 int) int {
+ if S.ch == '=' {
+ S.next();
+ return tok1;
+ }
+ if S.ch == ch2 {
+ S.next();
+ if S.ch == '=' {
+ S.next();
+ return tok3;
+ }
+ return tok2;
+ }
+ return tok0;
+}
+
+
+// Scans the next token. Returns the token byte position in the source,
+// its token value, and the corresponding literal text if the token is
+// an identifier or basic type literal (token.IsLiteral(tok) == true).
+
+func (S *Scanner) Scan() (pos, tok int, lit []byte) {
+scan_again:
+ S.skipWhitespace();
+
+ pos, tok = S.chpos, token.ILLEGAL;
+
+ switch ch := S.ch; {
+ case isLetter(ch):
+ tok, lit = S.scanIdentifier();
+ case digitVal(ch) < 10:
+ tok, lit = S.scanNumber(false);
+ default:
+ S.next(); // always make progress
+ switch ch {
+ case -1 : tok = token.EOF;
+ case '\n': tok, lit = token.COMMENT, []byte{'\n'};
+ case '"' : tok, lit = token.STRING, S.scanString();
+ case '\'': tok, lit = token.CHAR, S.scanChar();
+ case '`' : tok, lit = token.STRING, S.scanRawString();
+ case ':' : tok = S.switch2(token.COLON, token.DEFINE);
+ case '.' :
+ if digitVal(S.ch) < 10 {
+ tok, lit = S.scanNumber(true);
+ } else if S.ch == '.' {
+ S.next();
+ if S.ch == '.' {
+ S.next();
+ tok = token.ELLIPSIS;
+ }
+ } else {
+ tok = token.PERIOD;
+ }
+ case ',': tok = token.COMMA;
+ case ';': tok = token.SEMICOLON;
+ case '(': tok = token.LPAREN;
+ case ')': tok = token.RPAREN;
+ case '[': tok = token.LBRACK;
+ case ']': tok = token.RBRACK;
+ case '{': tok = token.LBRACE;
+ case '}': tok = token.RBRACE;
+ case '+': tok = S.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC);
+ case '-': tok = S.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC);
+ case '*': tok = S.switch2(token.MUL, token.MUL_ASSIGN);
+ case '/':
+ if S.ch == '/' || S.ch == '*' {
+ tok, lit = token.COMMENT, S.scanComment();
+ if !S.scan_comments {
+ goto scan_again;
+ }
+ } else {
+ tok = S.switch2(token.QUO, token.QUO_ASSIGN);
+ }
+ case '%': tok = S.switch2(token.REM, token.REM_ASSIGN);
+ case '^': tok = S.switch2(token.XOR, token.XOR_ASSIGN);
+ case '<':
+ if S.ch == '-' {
+ S.next();
+ tok = token.ARROW;
+ } else {
+ tok = S.switch4(token.LSS, token.LEQ, '<', token.SHL, token.SHL_ASSIGN);
+ }
+ case '>': tok = S.switch4(token.GTR, token.GEQ, '>', token.SHR, token.SHR_ASSIGN);
+ case '=': tok = S.switch2(token.ASSIGN, token.EQL);
+ case '!': tok = S.switch2(token.NOT, token.NEQ);
+ case '&': tok = S.switch3(token.AND, token.AND_ASSIGN, '&', token.LAND);
+ case '|': tok = S.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR);
+ default: S.error(pos, "illegal character " + charString(ch));
+ }
+ }
+
+ return pos, tok, lit;
+}
diff --git a/src/lib/go/scanner_test.go b/src/lib/go/scanner_test.go
new file mode 100644
index 000000000..136677cd0
--- /dev/null
+++ b/src/lib/go/scanner_test.go
@@ -0,0 +1,202 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package scanner
+
+import (
+ "io";
+ "token";
+ "scanner";
+ "testing";
+)
+
+
+const /* class */ (
+ special = iota;
+ literal;
+ operator;
+ keyword;
+)
+
+
+func tokenclass(tok int) int {
+ switch {
+ case token.IsLiteral(tok): return literal;
+ case token.IsOperator(tok): return operator;
+ case token.IsKeyword(tok): return keyword;
+ }
+ return special;
+}
+
+
+type elt struct {
+ pos int;
+ tok int;
+ lit string;
+ class int;
+}
+
+
+var tokens = [...]elt{
+ // Special tokens
+ elt{ 0, token.COMMENT, "/* a comment */", special },
+ elt{ 0, token.COMMENT, "\n", special },
+
+ // Identifiers and basic type literals
+ elt{ 0, token.IDENT, "foobar", literal },
+ elt{ 0, token.INT, "0", literal },
+ elt{ 0, token.INT, "01234567", literal },
+ elt{ 0, token.INT, "0xcafebabe", literal },
+ elt{ 0, token.FLOAT, "0.", literal },
+ elt{ 0, token.FLOAT, ".0", literal },
+ elt{ 0, token.FLOAT, "3.14159265", literal },
+ elt{ 0, token.FLOAT, "1e0", literal },
+ elt{ 0, token.FLOAT, "1e+100", literal },
+ elt{ 0, token.FLOAT, "1e-100", literal },
+ elt{ 0, token.FLOAT, "2.71828e-1000", literal },
+ elt{ 0, token.CHAR, "'a'", literal },
+ elt{ 0, token.STRING, "`foobar`", literal },
+
+ // Operators and delimitors
+ elt{ 0, token.ADD, "+", operator },
+ elt{ 0, token.SUB, "-", operator },
+ elt{ 0, token.MUL, "*", operator },
+ elt{ 0, token.QUO, "/", operator },
+ elt{ 0, token.REM, "%", operator },
+
+ elt{ 0, token.AND, "&", operator },
+ elt{ 0, token.OR, "|", operator },
+ elt{ 0, token.XOR, "^", operator },
+ elt{ 0, token.SHL, "<<", operator },
+ elt{ 0, token.SHR, ">>", operator },
+
+ elt{ 0, token.ADD_ASSIGN, "+=", operator },
+ elt{ 0, token.SUB_ASSIGN, "-=", operator },
+ elt{ 0, token.MUL_ASSIGN, "*=", operator },
+ elt{ 0, token.QUO_ASSIGN, "/=", operator },
+ elt{ 0, token.REM_ASSIGN, "%=", operator },
+
+ elt{ 0, token.AND_ASSIGN, "&=", operator },
+ elt{ 0, token.OR_ASSIGN, "|=", operator },
+ elt{ 0, token.XOR_ASSIGN, "^=", operator },
+ elt{ 0, token.SHL_ASSIGN, "<<=", operator },
+ elt{ 0, token.SHR_ASSIGN, ">>=", operator },
+
+ elt{ 0, token.LAND, "&&", operator },
+ elt{ 0, token.LOR, "||", operator },
+ elt{ 0, token.ARROW, "<-", operator },
+ elt{ 0, token.INC, "++", operator },
+ elt{ 0, token.DEC, "--", operator },
+
+ elt{ 0, token.EQL, "==", operator },
+ elt{ 0, token.LSS, "<", operator },
+ elt{ 0, token.GTR, ">", operator },
+ elt{ 0, token.ASSIGN, "=", operator },
+ elt{ 0, token.NOT, "!", operator },
+
+ elt{ 0, token.NEQ, "!=", operator },
+ elt{ 0, token.LEQ, "<=", operator },
+ elt{ 0, token.GEQ, ">=", operator },
+ elt{ 0, token.DEFINE, ":=", operator },
+ elt{ 0, token.ELLIPSIS, "...", operator },
+
+ elt{ 0, token.LPAREN, "(", operator },
+ elt{ 0, token.LBRACK, "[", operator },
+ elt{ 0, token.LBRACE, "{", operator },
+ elt{ 0, token.COMMA, ",", operator },
+ elt{ 0, token.PERIOD, ".", operator },
+
+ elt{ 0, token.RPAREN, ")", operator },
+ elt{ 0, token.RBRACK, "]", operator },
+ elt{ 0, token.RBRACE, "}", operator },
+ elt{ 0, token.SEMICOLON, ";", operator },
+ elt{ 0, token.COLON, ":", operator },
+
+ // Keywords
+ elt{ 0, token.BREAK, "break", keyword },
+ elt{ 0, token.CASE, "case", keyword },
+ elt{ 0, token.CHAN, "chan", keyword },
+ elt{ 0, token.CONST, "const", keyword },
+ elt{ 0, token.CONTINUE, "continue", keyword },
+
+ elt{ 0, token.DEFAULT, "default", keyword },
+ elt{ 0, token.DEFER, "defer", keyword },
+ elt{ 0, token.ELSE, "else", keyword },
+ elt{ 0, token.FALLTHROUGH, "fallthrough", keyword },
+ elt{ 0, token.FOR, "for", keyword },
+
+ elt{ 0, token.FUNC, "func", keyword },
+ elt{ 0, token.GO, "go", keyword },
+ elt{ 0, token.GOTO, "goto", keyword },
+ elt{ 0, token.IF, "if", keyword },
+ elt{ 0, token.IMPORT, "import", keyword },
+
+ elt{ 0, token.INTERFACE, "interface", keyword },
+ elt{ 0, token.MAP, "map", keyword },
+ elt{ 0, token.PACKAGE, "package", keyword },
+ elt{ 0, token.RANGE, "range", keyword },
+ elt{ 0, token.RETURN, "return", keyword },
+
+ elt{ 0, token.SELECT, "select", keyword },
+ elt{ 0, token.STRUCT, "struct", keyword },
+ elt{ 0, token.SWITCH, "switch", keyword },
+ elt{ 0, token.TYPE, "type", keyword },
+ elt{ 0, token.VAR, "var", keyword },
+}
+
+
+func init() {
+ // set pos fields
+ pos := 0;
+ for i := 0; i < len(tokens); i++ {
+ tokens[i].pos = pos;
+ pos += len(tokens[i].lit) + 1; // + 1 for space in between
+ }
+}
+
+
+type TestErrorHandler struct {
+ t *testing.T
+}
+
+func (h *TestErrorHandler) Error(pos int, msg string) {
+ h.t.Errorf("Error() called (pos = %d, msg = %s)", pos, msg);
+}
+
+
+func Test(t *testing.T) {
+ // make source
+ var src string;
+ for i, e := range tokens {
+ src += e.lit + " ";
+ }
+
+ // set up scanner
+ var s scanner.Scanner;
+ s.Init(io.StringBytes(src), &TestErrorHandler{t}, true);
+
+ // verify scan
+ for i, e := range tokens {
+ pos, tok, lit := s.Scan();
+ if pos != e.pos {
+ t.Errorf("bad position for %s: got %d, expected %d", e.lit, pos, e.pos);
+ }
+ if tok != e.tok {
+ t.Errorf("bad token for %s: got %s, expected %s", e.lit, token.TokenString(tok), token.TokenString(e.tok));
+ }
+ if token.IsLiteral(e.tok) && string(lit) != e.lit {
+ t.Errorf("bad literal for %s: got %s, expected %s", e.lit, string(lit), e.lit);
+ }
+ if tokenclass(tok) != e.class {
+ t.Errorf("bad class for %s: got %d, expected %d", e.lit, tokenclass(tok), e.class);
+ }
+ }
+ pos, tok, lit := s.Scan();
+ if tok != token.EOF {
+ t.Errorf("bad token at eof: got %s, expected EOF", token.TokenString(tok));
+ }
+ if tokenclass(tok) != special {
+ t.Errorf("bad class at eof: got %d, expected %d", tokenclass(tok), special);
+ }
+}
diff --git a/src/lib/go/token.go b/src/lib/go/token.go
new file mode 100644
index 000000000..7691bac63
--- /dev/null
+++ b/src/lib/go/token.go
@@ -0,0 +1,296 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package token
+
+// Defines Go tokens and basic token operations.
+
+import "strconv"
+
+const (
+ // Special tokens
+ ILLEGAL = iota;
+ EOF;
+ COMMENT;
+
+ // Identifiers and basic type literals
+ // (these tokens stand for classes of literals)
+ literal_beg;
+ IDENT;
+ INT;
+ FLOAT;
+ CHAR;
+ STRING;
+ literal_end;
+
+ // Operators and delimiters
+ operator_beg;
+ ADD;
+ SUB;
+ MUL;
+ QUO;
+ REM;
+
+ AND;
+ OR;
+ XOR;
+ SHL;
+ SHR;
+
+ ADD_ASSIGN;
+ SUB_ASSIGN;
+ MUL_ASSIGN;
+ QUO_ASSIGN;
+ REM_ASSIGN;
+
+ AND_ASSIGN;
+ OR_ASSIGN;
+ XOR_ASSIGN;
+ SHL_ASSIGN;
+ SHR_ASSIGN;
+
+ LAND;
+ LOR;
+ ARROW;
+ INC;
+ DEC;
+
+ EQL;
+ LSS;
+ GTR;
+ ASSIGN;
+ NOT;
+
+ NEQ;
+ LEQ;
+ GEQ;
+ DEFINE;
+ ELLIPSIS;
+
+ LPAREN;
+ LBRACK;
+ LBRACE;
+ COMMA;
+ PERIOD;
+
+ RPAREN;
+ RBRACK;
+ RBRACE;
+ SEMICOLON;
+ COLON;
+ operator_end;
+
+ // Keywords
+ keyword_beg;
+ BREAK;
+ CASE;
+ CHAN;
+ CONST;
+ CONTINUE;
+
+ DEFAULT;
+ DEFER;
+ ELSE;
+ FALLTHROUGH;
+ FOR;
+
+ FUNC;
+ GO;
+ GOTO;
+ IF;
+ IMPORT;
+
+ INTERFACE;
+ MAP;
+ PACKAGE;
+ RANGE;
+ RETURN;
+
+ SELECT;
+ STRUCT;
+ SWITCH;
+ TYPE;
+ VAR;
+ keyword_end;
+)
+
+
+// At the moment we have no array literal syntax that lets us describe
+// the index for each element - use a map for now to make sure they are
+// in sync.
+var tokens = map [int] string {
+ ILLEGAL : "ILLEGAL",
+
+ EOF : "EOF",
+ COMMENT : "COMMENT",
+
+ IDENT : "IDENT",
+ INT : "INT",
+ FLOAT : "FLOAT",
+ CHAR : "CHAR",
+ STRING : "STRING",
+
+ ADD : "+",
+ SUB : "-",
+ MUL : "*",
+ QUO : "/",
+ REM : "%",
+
+ AND : "&",
+ OR : "|",
+ XOR : "^",
+ SHL : "<<",
+ SHR : ">>",
+
+ ADD_ASSIGN : "+=",
+ SUB_ASSIGN : "-=",
+ MUL_ASSIGN : "+=",
+ QUO_ASSIGN : "/=",
+ REM_ASSIGN : "%=",
+
+ AND_ASSIGN : "&=",
+ OR_ASSIGN : "|=",
+ XOR_ASSIGN : "^=",
+ SHL_ASSIGN : "<<=",
+ SHR_ASSIGN : ">>=",
+
+ LAND : "&&",
+ LOR : "||",
+ ARROW : "<-",
+ INC : "++",
+ DEC : "--",
+
+ EQL : "==",
+ LSS : "<",
+ GTR : ">",
+ ASSIGN : "=",
+ NOT : "!",
+
+ NEQ : "!=",
+ LEQ : "<=",
+ GEQ : ">=",
+ DEFINE : ":=",
+ ELLIPSIS : "...",
+
+ LPAREN : "(",
+ LBRACK : "[",
+ LBRACE : "{",
+ COMMA : ",",
+ PERIOD : ".",
+
+ RPAREN : ")",
+ RBRACK : "]",
+ RBRACE : "}",
+ SEMICOLON : ";",
+ COLON : ":",
+
+ BREAK : "break",
+ CASE : "case",
+ CHAN : "chan",
+ CONST : "const",
+ CONTINUE : "continue",
+
+ DEFAULT : "default",
+ DEFER : "defer",
+ ELSE : "else",
+ FALLTHROUGH : "fallthrough",
+ FOR : "for",
+
+ FUNC : "func",
+ GO : "go",
+ GOTO : "goto",
+ IF : "if",
+ IMPORT : "import",
+
+ INTERFACE : "interface",
+ MAP : "map",
+ PACKAGE : "package",
+ RANGE : "range",
+ RETURN : "return",
+
+ SELECT : "select",
+ STRUCT : "struct",
+ SWITCH : "switch",
+ TYPE : "type",
+ VAR : "var",
+}
+
+func TokenString(tok int) string {
+ if str, exists := tokens[tok]; exists {
+ return str;
+ }
+ return "token(" + strconv.Itoa(tok) + ")";
+}
+
+
+// A set of constants for precedence-based expression parsing.
+// Non-operators have lowest precedence, followed by operators
+// starting with precedence 0 up to unary operators and finally
+// the highest precedence used for tokens used in selectors, etc.
+
+const (
+ LowestPrec = -1; // non-operators
+ UnaryPrec = 7;
+ HighestPrec = 8;
+)
+
+// Returns precedence of a token. Returns LowestPrec
+// if the token is not an operator.
+func Precedence(tok int) int {
+ switch tok {
+ case COLON:
+ return 0;
+ case LOR:
+ return 1;
+ case LAND:
+ return 2;
+ case ARROW:
+ return 3;
+ case EQL, NEQ, LSS, LEQ, GTR, GEQ:
+ return 4;
+ case ADD, SUB, OR, XOR:
+ return 5;
+ case MUL, QUO, REM, SHL, SHR, AND:
+ return 6;
+ }
+ return LowestPrec;
+}
+
+
+var keywords map [string] int;
+
+func init() {
+ keywords = make(map [string] int);
+ for i := keyword_beg + 1; i < keyword_end; i++ {
+ keywords[tokens[i]] = i;
+ }
+}
+
+
+// Map an identifier to its keyword token or IDENT (if not a keyword).
+func Lookup(ident []byte) int {
+ // TODO Maps with []byte key are illegal because []byte does not
+ // support == . Should find a more efficient solution eventually.
+ if tok, is_keyword := keywords[string(ident)]; is_keyword {
+ return tok;
+ }
+ return IDENT;
+}
+
+
+// Predicates
+
+// Identifiers and basic type literals
+func IsLiteral(tok int) bool {
+ return literal_beg < tok && tok < literal_end;
+}
+
+// Operators and delimiters
+func IsOperator(tok int) bool {
+ return operator_beg < tok && tok < operator_end;
+}
+
+func IsKeyword(tok int) bool {
+ return keyword_beg < tok && tok < keyword_end;
+}