diff options
Diffstat (limited to 'src/lib/go')
-rw-r--r-- | src/lib/go/Makefile | 66 | ||||
-rw-r--r-- | src/lib/go/scanner.go | 475 | ||||
-rw-r--r-- | src/lib/go/scanner_test.go | 202 | ||||
-rw-r--r-- | src/lib/go/token.go | 296 |
4 files changed, 1039 insertions, 0 deletions
diff --git a/src/lib/go/Makefile b/src/lib/go/Makefile new file mode 100644 index 000000000..58b562171 --- /dev/null +++ b/src/lib/go/Makefile @@ -0,0 +1,66 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +# DO NOT EDIT. Automatically generated by gobuild. +# gobuild -m >Makefile +O=6 +GC=$(O)g +CC=$(O)c -w +AS=$(O)a +AR=$(O)ar + +default: packages + +clean: + rm -f *.$O *.a $O.out + +test: packages + gotest + +coverage: packages + gotest + 6cov -g `pwd` | grep -v '_test\.go:' + +%.$O: %.go + $(GC) $*.go + +%.$O: %.c + $(CC) $*.c + +%.$O: %.s + $(AS) $*.s + +O1=\ + token.$O\ + +O2=\ + scanner.$O\ + +scanner.a: a1 a2 +token.a: a1 a2 + +a1: $(O1) + $(AR) grc token.a token.$O + rm -f $(O1) + +a2: $(O2) + $(AR) grc scanner.a scanner.$O + rm -f $(O2) + +newpkg: clean + $(AR) grc scanner.a + $(AR) grc token.a + +$(O1): newpkg +$(O2): a1 + +nuke: clean + rm -f $(GOROOT)/pkg/scanner.a $(GOROOT)/pkg/token.a + +packages: scanner.a token.a + +install: packages + cp scanner.a $(GOROOT)/pkg/scanner.a + cp token.a $(GOROOT)/pkg/token.a + diff --git a/src/lib/go/scanner.go b/src/lib/go/scanner.go new file mode 100644 index 000000000..ad7f80b5b --- /dev/null +++ b/src/lib/go/scanner.go @@ -0,0 +1,475 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package scanner + +// A Go scanner. Takes a []byte as source which can then be +// tokenized through repeated calls to the Scan() function. +// +// Sample use: +// +// import "token" +// import "scanner" +// +// func tokenize(src []byte) { +// var s scanner.Scanner; +// s.Init(src, nil /* no error handler */, false /* ignore comments */); +// for { +// pos, tok, lit := s.Scan(); +// if tok == Scanner.EOF { +// return; +// } +// println(pos, token.TokenString(tok), string(lit)); +// } +// } + +import ( + "utf8"; + "unicode"; + "strconv"; + "token"; +) + + +// An implementation of an ErrorHandler must be provided to the Scanner. +// If a syntax error is encountered, Error() is called with the exact +// token position (the byte position of the token in the source) and the +// error message. + +type ErrorHandler interface { + Error(pos int, msg string); +} + + +type Scanner struct { + // immutable state + src []byte; // source + err ErrorHandler; // error reporting + scan_comments bool; // if set, comments are reported as tokens + + // scanning state + pos int; // current reading position + ch int; // one char look-ahead + chpos int; // position of ch +} + + +func isLetter(ch int) bool { + return + 'a' <= ch && ch <= 'z' || + 'A' <= ch && ch <= 'Z' || + ch == '_' || + ch >= 0x80 && unicode.IsLetter(ch); +} + + +func digitVal(ch int) int { + switch { + case '0' <= ch && ch <= '9': return ch - '0'; + case 'a' <= ch && ch <= 'f': return ch - 'a' + 10; + case 'A' <= ch && ch <= 'F': return ch - 'A' + 10; + } + return 16; // larger than any legal digit val +} + + +// Read the next Unicode char into S.ch. +// S.ch < 0 means end-of-file. +func (S *Scanner) next() { + if S.pos < len(S.src) { + // assume ASCII + r, w := int(S.src[S.pos]), 1; + if r >= 0x80 { + // not ASCII + r, w = utf8.DecodeRune(S.src[S.pos : len(S.src)]); + } + S.ch = r; + S.chpos = S.pos; + S.pos += w; + } else { + S.ch = -1; // eof + S.chpos = len(S.src); + } +} + + +// Initialize the scanner. +// +// The error handler (err) is called when an illegal token is encountered. +// If scan_comments is set to true, newline characters ('\n') and comments +// are recognized as token.COMMENT, otherwise they are treated as white +// space and ignored. + +func (S *Scanner) Init(src []byte, err ErrorHandler, scan_comments bool) { + S.src = src; + S.err = err; + S.scan_comments = scan_comments; + S.next(); +} + + +func charString(ch int) string { + s := string(ch); + switch ch { + case '\a': s = `\a`; + case '\b': s = `\b`; + case '\f': s = `\f`; + case '\n': s = `\n`; + case '\r': s = `\r`; + case '\t': s = `\t`; + case '\v': s = `\v`; + case '\\': s = `\\`; + case '\'': s = `\'`; + } + return "'" + s + "' (U+" + strconv.Itob(ch, 16) + ")"; +} + + +func (S *Scanner) error(pos int, msg string) { + S.err.Error(pos, msg); +} + + +func (S *Scanner) expect(ch int) { + if S.ch != ch { + S.error(S.chpos, "expected " + charString(ch) + ", found " + charString(S.ch)); + } + S.next(); // always make progress +} + + +func (S *Scanner) skipWhitespace() { + for { + switch S.ch { + case '\t', '\r', ' ': + // nothing to do + case '\n': + if S.scan_comments { + return; + } + default: + return; + } + S.next(); + } + panic("UNREACHABLE"); +} + + +func (S *Scanner) scanComment() []byte { + // first '/' already consumed + pos := S.chpos - 1; + + if S.ch == '/' { + //-style comment + for S.ch >= 0 { + S.next(); + if S.ch == '\n' { + // '\n' terminates comment but we do not include + // it in the comment (otherwise we don't see the + // start of a newline in skipWhitespace()). + return S.src[pos : S.chpos]; + } + } + + } else { + /*-style comment */ + S.expect('*'); + for S.ch >= 0 { + ch := S.ch; + S.next(); + if ch == '*' && S.ch == '/' { + S.next(); + return S.src[pos : S.chpos]; + } + } + } + + S.error(pos, "comment not terminated"); + return S.src[pos : S.chpos]; +} + + +func (S *Scanner) scanIdentifier() (tok int, lit []byte) { + pos := S.chpos; + for isLetter(S.ch) || digitVal(S.ch) < 10 { + S.next(); + } + lit = S.src[pos : S.chpos]; + return token.Lookup(lit), lit; +} + + +func (S *Scanner) scanMantissa(base int) { + for digitVal(S.ch) < base { + S.next(); + } +} + + +func (S *Scanner) scanNumber(seen_decimal_point bool) (tok int, lit []byte) { + pos := S.chpos; + tok = token.INT; + + if seen_decimal_point { + tok = token.FLOAT; + pos--; // '.' is one byte + S.scanMantissa(10); + goto exponent; + } + + if S.ch == '0' { + // int or float + S.next(); + if S.ch == 'x' || S.ch == 'X' { + // hexadecimal int + S.next(); + S.scanMantissa(16); + } else { + // octal int or float + S.scanMantissa(8); + if digitVal(S.ch) < 10 || S.ch == '.' || S.ch == 'e' || S.ch == 'E' { + // float + tok = token.FLOAT; + goto mantissa; + } + // octal int + } + goto exit; + } + +mantissa: + // decimal int or float + S.scanMantissa(10); + + if S.ch == '.' { + // float + tok = token.FLOAT; + S.next(); + S.scanMantissa(10) + } + +exponent: + if S.ch == 'e' || S.ch == 'E' { + // float + tok = token.FLOAT; + S.next(); + if S.ch == '-' || S.ch == '+' { + S.next(); + } + S.scanMantissa(10); + } + +exit: + return tok, S.src[pos : S.chpos]; +} + + +func (S *Scanner) scanDigits(n int, base int) { + for digitVal(S.ch) < base { + S.next(); + n--; + } + if n > 0 { + S.error(S.chpos, "illegal char escape"); + } +} + + +func (S *Scanner) scanEscape(quote int) { + ch := S.ch; + pos := S.chpos; + S.next(); + switch ch { + case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote: + // nothing to do + case '0', '1', '2', '3', '4', '5', '6', '7': + S.scanDigits(3 - 1, 8); // 1 char read already + case 'x': + S.scanDigits(2, 16); + case 'u': + S.scanDigits(4, 16); + case 'U': + S.scanDigits(8, 16); + default: + S.error(pos, "illegal char escape"); + } +} + + +func (S *Scanner) scanChar() []byte { + // '\'' already consumed + + pos := S.chpos - 1; + ch := S.ch; + S.next(); + if ch == '\\' { + S.scanEscape('\''); + } + + S.expect('\''); + return S.src[pos : S.chpos]; +} + + +func (S *Scanner) scanString() []byte { + // '"' already consumed + + pos := S.chpos - 1; + for S.ch != '"' { + ch := S.ch; + S.next(); + if ch == '\n' || ch < 0 { + S.error(pos, "string not terminated"); + break; + } + if ch == '\\' { + S.scanEscape('"'); + } + } + + S.next(); + return S.src[pos : S.chpos]; +} + + +func (S *Scanner) scanRawString() []byte { + // '`' already consumed + + pos := S.chpos - 1; + for S.ch != '`' { + ch := S.ch; + S.next(); + if ch == '\n' || ch < 0 { + S.error(pos, "string not terminated"); + break; + } + } + + S.next(); + return S.src[pos : S.chpos]; +} + + +// Helper functions for scanning multi-byte tokens such as >> += >>= . +// Different routines recognize different length tok_i based on matches +// of ch_i. If a token ends in '=', the result is tok1 or tok3 +// respectively. Otherwise, the result is tok0 if there was no other +// matching character, or tok2 if the matching character was ch2. + +func (S *Scanner) switch2(tok0, tok1 int) int { + if S.ch == '=' { + S.next(); + return tok1; + } + return tok0; +} + + +func (S *Scanner) switch3(tok0, tok1, ch2, tok2 int) int { + if S.ch == '=' { + S.next(); + return tok1; + } + if S.ch == ch2 { + S.next(); + return tok2; + } + return tok0; +} + + +func (S *Scanner) switch4(tok0, tok1, ch2, tok2, tok3 int) int { + if S.ch == '=' { + S.next(); + return tok1; + } + if S.ch == ch2 { + S.next(); + if S.ch == '=' { + S.next(); + return tok3; + } + return tok2; + } + return tok0; +} + + +// Scans the next token. Returns the token byte position in the source, +// its token value, and the corresponding literal text if the token is +// an identifier or basic type literal (token.IsLiteral(tok) == true). + +func (S *Scanner) Scan() (pos, tok int, lit []byte) { +scan_again: + S.skipWhitespace(); + + pos, tok = S.chpos, token.ILLEGAL; + + switch ch := S.ch; { + case isLetter(ch): + tok, lit = S.scanIdentifier(); + case digitVal(ch) < 10: + tok, lit = S.scanNumber(false); + default: + S.next(); // always make progress + switch ch { + case -1 : tok = token.EOF; + case '\n': tok, lit = token.COMMENT, []byte{'\n'}; + case '"' : tok, lit = token.STRING, S.scanString(); + case '\'': tok, lit = token.CHAR, S.scanChar(); + case '`' : tok, lit = token.STRING, S.scanRawString(); + case ':' : tok = S.switch2(token.COLON, token.DEFINE); + case '.' : + if digitVal(S.ch) < 10 { + tok, lit = S.scanNumber(true); + } else if S.ch == '.' { + S.next(); + if S.ch == '.' { + S.next(); + tok = token.ELLIPSIS; + } + } else { + tok = token.PERIOD; + } + case ',': tok = token.COMMA; + case ';': tok = token.SEMICOLON; + case '(': tok = token.LPAREN; + case ')': tok = token.RPAREN; + case '[': tok = token.LBRACK; + case ']': tok = token.RBRACK; + case '{': tok = token.LBRACE; + case '}': tok = token.RBRACE; + case '+': tok = S.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC); + case '-': tok = S.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC); + case '*': tok = S.switch2(token.MUL, token.MUL_ASSIGN); + case '/': + if S.ch == '/' || S.ch == '*' { + tok, lit = token.COMMENT, S.scanComment(); + if !S.scan_comments { + goto scan_again; + } + } else { + tok = S.switch2(token.QUO, token.QUO_ASSIGN); + } + case '%': tok = S.switch2(token.REM, token.REM_ASSIGN); + case '^': tok = S.switch2(token.XOR, token.XOR_ASSIGN); + case '<': + if S.ch == '-' { + S.next(); + tok = token.ARROW; + } else { + tok = S.switch4(token.LSS, token.LEQ, '<', token.SHL, token.SHL_ASSIGN); + } + case '>': tok = S.switch4(token.GTR, token.GEQ, '>', token.SHR, token.SHR_ASSIGN); + case '=': tok = S.switch2(token.ASSIGN, token.EQL); + case '!': tok = S.switch2(token.NOT, token.NEQ); + case '&': tok = S.switch3(token.AND, token.AND_ASSIGN, '&', token.LAND); + case '|': tok = S.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR); + default: S.error(pos, "illegal character " + charString(ch)); + } + } + + return pos, tok, lit; +} diff --git a/src/lib/go/scanner_test.go b/src/lib/go/scanner_test.go new file mode 100644 index 000000000..136677cd0 --- /dev/null +++ b/src/lib/go/scanner_test.go @@ -0,0 +1,202 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package scanner + +import ( + "io"; + "token"; + "scanner"; + "testing"; +) + + +const /* class */ ( + special = iota; + literal; + operator; + keyword; +) + + +func tokenclass(tok int) int { + switch { + case token.IsLiteral(tok): return literal; + case token.IsOperator(tok): return operator; + case token.IsKeyword(tok): return keyword; + } + return special; +} + + +type elt struct { + pos int; + tok int; + lit string; + class int; +} + + +var tokens = [...]elt{ + // Special tokens + elt{ 0, token.COMMENT, "/* a comment */", special }, + elt{ 0, token.COMMENT, "\n", special }, + + // Identifiers and basic type literals + elt{ 0, token.IDENT, "foobar", literal }, + elt{ 0, token.INT, "0", literal }, + elt{ 0, token.INT, "01234567", literal }, + elt{ 0, token.INT, "0xcafebabe", literal }, + elt{ 0, token.FLOAT, "0.", literal }, + elt{ 0, token.FLOAT, ".0", literal }, + elt{ 0, token.FLOAT, "3.14159265", literal }, + elt{ 0, token.FLOAT, "1e0", literal }, + elt{ 0, token.FLOAT, "1e+100", literal }, + elt{ 0, token.FLOAT, "1e-100", literal }, + elt{ 0, token.FLOAT, "2.71828e-1000", literal }, + elt{ 0, token.CHAR, "'a'", literal }, + elt{ 0, token.STRING, "`foobar`", literal }, + + // Operators and delimitors + elt{ 0, token.ADD, "+", operator }, + elt{ 0, token.SUB, "-", operator }, + elt{ 0, token.MUL, "*", operator }, + elt{ 0, token.QUO, "/", operator }, + elt{ 0, token.REM, "%", operator }, + + elt{ 0, token.AND, "&", operator }, + elt{ 0, token.OR, "|", operator }, + elt{ 0, token.XOR, "^", operator }, + elt{ 0, token.SHL, "<<", operator }, + elt{ 0, token.SHR, ">>", operator }, + + elt{ 0, token.ADD_ASSIGN, "+=", operator }, + elt{ 0, token.SUB_ASSIGN, "-=", operator }, + elt{ 0, token.MUL_ASSIGN, "*=", operator }, + elt{ 0, token.QUO_ASSIGN, "/=", operator }, + elt{ 0, token.REM_ASSIGN, "%=", operator }, + + elt{ 0, token.AND_ASSIGN, "&=", operator }, + elt{ 0, token.OR_ASSIGN, "|=", operator }, + elt{ 0, token.XOR_ASSIGN, "^=", operator }, + elt{ 0, token.SHL_ASSIGN, "<<=", operator }, + elt{ 0, token.SHR_ASSIGN, ">>=", operator }, + + elt{ 0, token.LAND, "&&", operator }, + elt{ 0, token.LOR, "||", operator }, + elt{ 0, token.ARROW, "<-", operator }, + elt{ 0, token.INC, "++", operator }, + elt{ 0, token.DEC, "--", operator }, + + elt{ 0, token.EQL, "==", operator }, + elt{ 0, token.LSS, "<", operator }, + elt{ 0, token.GTR, ">", operator }, + elt{ 0, token.ASSIGN, "=", operator }, + elt{ 0, token.NOT, "!", operator }, + + elt{ 0, token.NEQ, "!=", operator }, + elt{ 0, token.LEQ, "<=", operator }, + elt{ 0, token.GEQ, ">=", operator }, + elt{ 0, token.DEFINE, ":=", operator }, + elt{ 0, token.ELLIPSIS, "...", operator }, + + elt{ 0, token.LPAREN, "(", operator }, + elt{ 0, token.LBRACK, "[", operator }, + elt{ 0, token.LBRACE, "{", operator }, + elt{ 0, token.COMMA, ",", operator }, + elt{ 0, token.PERIOD, ".", operator }, + + elt{ 0, token.RPAREN, ")", operator }, + elt{ 0, token.RBRACK, "]", operator }, + elt{ 0, token.RBRACE, "}", operator }, + elt{ 0, token.SEMICOLON, ";", operator }, + elt{ 0, token.COLON, ":", operator }, + + // Keywords + elt{ 0, token.BREAK, "break", keyword }, + elt{ 0, token.CASE, "case", keyword }, + elt{ 0, token.CHAN, "chan", keyword }, + elt{ 0, token.CONST, "const", keyword }, + elt{ 0, token.CONTINUE, "continue", keyword }, + + elt{ 0, token.DEFAULT, "default", keyword }, + elt{ 0, token.DEFER, "defer", keyword }, + elt{ 0, token.ELSE, "else", keyword }, + elt{ 0, token.FALLTHROUGH, "fallthrough", keyword }, + elt{ 0, token.FOR, "for", keyword }, + + elt{ 0, token.FUNC, "func", keyword }, + elt{ 0, token.GO, "go", keyword }, + elt{ 0, token.GOTO, "goto", keyword }, + elt{ 0, token.IF, "if", keyword }, + elt{ 0, token.IMPORT, "import", keyword }, + + elt{ 0, token.INTERFACE, "interface", keyword }, + elt{ 0, token.MAP, "map", keyword }, + elt{ 0, token.PACKAGE, "package", keyword }, + elt{ 0, token.RANGE, "range", keyword }, + elt{ 0, token.RETURN, "return", keyword }, + + elt{ 0, token.SELECT, "select", keyword }, + elt{ 0, token.STRUCT, "struct", keyword }, + elt{ 0, token.SWITCH, "switch", keyword }, + elt{ 0, token.TYPE, "type", keyword }, + elt{ 0, token.VAR, "var", keyword }, +} + + +func init() { + // set pos fields + pos := 0; + for i := 0; i < len(tokens); i++ { + tokens[i].pos = pos; + pos += len(tokens[i].lit) + 1; // + 1 for space in between + } +} + + +type TestErrorHandler struct { + t *testing.T +} + +func (h *TestErrorHandler) Error(pos int, msg string) { + h.t.Errorf("Error() called (pos = %d, msg = %s)", pos, msg); +} + + +func Test(t *testing.T) { + // make source + var src string; + for i, e := range tokens { + src += e.lit + " "; + } + + // set up scanner + var s scanner.Scanner; + s.Init(io.StringBytes(src), &TestErrorHandler{t}, true); + + // verify scan + for i, e := range tokens { + pos, tok, lit := s.Scan(); + if pos != e.pos { + t.Errorf("bad position for %s: got %d, expected %d", e.lit, pos, e.pos); + } + if tok != e.tok { + t.Errorf("bad token for %s: got %s, expected %s", e.lit, token.TokenString(tok), token.TokenString(e.tok)); + } + if token.IsLiteral(e.tok) && string(lit) != e.lit { + t.Errorf("bad literal for %s: got %s, expected %s", e.lit, string(lit), e.lit); + } + if tokenclass(tok) != e.class { + t.Errorf("bad class for %s: got %d, expected %d", e.lit, tokenclass(tok), e.class); + } + } + pos, tok, lit := s.Scan(); + if tok != token.EOF { + t.Errorf("bad token at eof: got %s, expected EOF", token.TokenString(tok)); + } + if tokenclass(tok) != special { + t.Errorf("bad class at eof: got %d, expected %d", tokenclass(tok), special); + } +} diff --git a/src/lib/go/token.go b/src/lib/go/token.go new file mode 100644 index 000000000..7691bac63 --- /dev/null +++ b/src/lib/go/token.go @@ -0,0 +1,296 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package token + +// Defines Go tokens and basic token operations. + +import "strconv" + +const ( + // Special tokens + ILLEGAL = iota; + EOF; + COMMENT; + + // Identifiers and basic type literals + // (these tokens stand for classes of literals) + literal_beg; + IDENT; + INT; + FLOAT; + CHAR; + STRING; + literal_end; + + // Operators and delimiters + operator_beg; + ADD; + SUB; + MUL; + QUO; + REM; + + AND; + OR; + XOR; + SHL; + SHR; + + ADD_ASSIGN; + SUB_ASSIGN; + MUL_ASSIGN; + QUO_ASSIGN; + REM_ASSIGN; + + AND_ASSIGN; + OR_ASSIGN; + XOR_ASSIGN; + SHL_ASSIGN; + SHR_ASSIGN; + + LAND; + LOR; + ARROW; + INC; + DEC; + + EQL; + LSS; + GTR; + ASSIGN; + NOT; + + NEQ; + LEQ; + GEQ; + DEFINE; + ELLIPSIS; + + LPAREN; + LBRACK; + LBRACE; + COMMA; + PERIOD; + + RPAREN; + RBRACK; + RBRACE; + SEMICOLON; + COLON; + operator_end; + + // Keywords + keyword_beg; + BREAK; + CASE; + CHAN; + CONST; + CONTINUE; + + DEFAULT; + DEFER; + ELSE; + FALLTHROUGH; + FOR; + + FUNC; + GO; + GOTO; + IF; + IMPORT; + + INTERFACE; + MAP; + PACKAGE; + RANGE; + RETURN; + + SELECT; + STRUCT; + SWITCH; + TYPE; + VAR; + keyword_end; +) + + +// At the moment we have no array literal syntax that lets us describe +// the index for each element - use a map for now to make sure they are +// in sync. +var tokens = map [int] string { + ILLEGAL : "ILLEGAL", + + EOF : "EOF", + COMMENT : "COMMENT", + + IDENT : "IDENT", + INT : "INT", + FLOAT : "FLOAT", + CHAR : "CHAR", + STRING : "STRING", + + ADD : "+", + SUB : "-", + MUL : "*", + QUO : "/", + REM : "%", + + AND : "&", + OR : "|", + XOR : "^", + SHL : "<<", + SHR : ">>", + + ADD_ASSIGN : "+=", + SUB_ASSIGN : "-=", + MUL_ASSIGN : "+=", + QUO_ASSIGN : "/=", + REM_ASSIGN : "%=", + + AND_ASSIGN : "&=", + OR_ASSIGN : "|=", + XOR_ASSIGN : "^=", + SHL_ASSIGN : "<<=", + SHR_ASSIGN : ">>=", + + LAND : "&&", + LOR : "||", + ARROW : "<-", + INC : "++", + DEC : "--", + + EQL : "==", + LSS : "<", + GTR : ">", + ASSIGN : "=", + NOT : "!", + + NEQ : "!=", + LEQ : "<=", + GEQ : ">=", + DEFINE : ":=", + ELLIPSIS : "...", + + LPAREN : "(", + LBRACK : "[", + LBRACE : "{", + COMMA : ",", + PERIOD : ".", + + RPAREN : ")", + RBRACK : "]", + RBRACE : "}", + SEMICOLON : ";", + COLON : ":", + + BREAK : "break", + CASE : "case", + CHAN : "chan", + CONST : "const", + CONTINUE : "continue", + + DEFAULT : "default", + DEFER : "defer", + ELSE : "else", + FALLTHROUGH : "fallthrough", + FOR : "for", + + FUNC : "func", + GO : "go", + GOTO : "goto", + IF : "if", + IMPORT : "import", + + INTERFACE : "interface", + MAP : "map", + PACKAGE : "package", + RANGE : "range", + RETURN : "return", + + SELECT : "select", + STRUCT : "struct", + SWITCH : "switch", + TYPE : "type", + VAR : "var", +} + +func TokenString(tok int) string { + if str, exists := tokens[tok]; exists { + return str; + } + return "token(" + strconv.Itoa(tok) + ")"; +} + + +// A set of constants for precedence-based expression parsing. +// Non-operators have lowest precedence, followed by operators +// starting with precedence 0 up to unary operators and finally +// the highest precedence used for tokens used in selectors, etc. + +const ( + LowestPrec = -1; // non-operators + UnaryPrec = 7; + HighestPrec = 8; +) + +// Returns precedence of a token. Returns LowestPrec +// if the token is not an operator. +func Precedence(tok int) int { + switch tok { + case COLON: + return 0; + case LOR: + return 1; + case LAND: + return 2; + case ARROW: + return 3; + case EQL, NEQ, LSS, LEQ, GTR, GEQ: + return 4; + case ADD, SUB, OR, XOR: + return 5; + case MUL, QUO, REM, SHL, SHR, AND: + return 6; + } + return LowestPrec; +} + + +var keywords map [string] int; + +func init() { + keywords = make(map [string] int); + for i := keyword_beg + 1; i < keyword_end; i++ { + keywords[tokens[i]] = i; + } +} + + +// Map an identifier to its keyword token or IDENT (if not a keyword). +func Lookup(ident []byte) int { + // TODO Maps with []byte key are illegal because []byte does not + // support == . Should find a more efficient solution eventually. + if tok, is_keyword := keywords[string(ident)]; is_keyword { + return tok; + } + return IDENT; +} + + +// Predicates + +// Identifiers and basic type literals +func IsLiteral(tok int) bool { + return literal_beg < tok && tok < literal_end; +} + +// Operators and delimiters +func IsOperator(tok int) bool { + return operator_beg < tok && tok < operator_end; +} + +func IsKeyword(tok int) bool { + return keyword_beg < tok && tok < keyword_end; +} |