From 86feedebc83fd6371d5ad6989fe05a543e5dd538 Mon Sep 17 00:00:00 2001 From: Robert Griesemer Date: Mon, 9 Mar 2009 12:41:53 -0700 Subject: - directory rename lang -> go R=rsc DELTA=2070 (1035 added, 1035 deleted, 0 changed) OCL=25939 CL=25939 --- src/lib/Makefile | 4 +- src/lib/go/Makefile | 66 ++++++ src/lib/go/scanner.go | 475 +++++++++++++++++++++++++++++++++++++++++++ src/lib/go/scanner_test.go | 202 ++++++++++++++++++ src/lib/go/token.go | 296 +++++++++++++++++++++++++++ src/lib/lang/Makefile | 66 ------ src/lib/lang/scanner.go | 475 ------------------------------------------- src/lib/lang/scanner_test.go | 202 ------------------ src/lib/lang/token.go | 296 --------------------------- src/run.bash | 2 +- 10 files changed, 1042 insertions(+), 1042 deletions(-) create mode 100644 src/lib/go/Makefile create mode 100644 src/lib/go/scanner.go create mode 100644 src/lib/go/scanner_test.go create mode 100644 src/lib/go/token.go delete mode 100644 src/lib/lang/Makefile delete mode 100644 src/lib/lang/scanner.go delete mode 100644 src/lib/lang/scanner_test.go delete mode 100644 src/lib/lang/token.go (limited to 'src') diff --git a/src/lib/Makefile b/src/lib/Makefile index 3df20b7a1..0bdaf709e 100644 --- a/src/lib/Makefile +++ b/src/lib/Makefile @@ -9,11 +9,11 @@ GC=6g DIRS=\ container\ fmt\ + go\ hash\ http\ io\ json\ - lang\ math\ net\ os\ @@ -99,12 +99,12 @@ strings.6: utf8.install testing.6: flag.install fmt.dirinstall fmt.dirinstall: io.dirinstall reflect.dirinstall strconv.dirinstall +go.dirinstall: strconv.dirinstall utf8.install unicode.dirinstall hash.dirinstall: os.dirinstall http.dirinstall: bufio.install io.dirinstall net.dirinstall os.dirinstall strings.install log.install io.dirinstall: os.dirinstall sync.dirinstall json.dirinstall: container.dirinstall fmt.dirinstall io.dirinstall math.dirinstall \ strconv.dirinstall strings.install utf8.install -lang.dirinstall: strconv.dirinstall utf8.install unicode.dirinstall # TODO(rsc): net is not supposed to depend on fmt or strings or strconv net.dirinstall: fmt.dirinstall once.install os.dirinstall strconv.dirinstall strings.install os.dirinstall: syscall.dirinstall once.install diff --git a/src/lib/go/Makefile b/src/lib/go/Makefile new file mode 100644 index 000000000..58b562171 --- /dev/null +++ b/src/lib/go/Makefile @@ -0,0 +1,66 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +# DO NOT EDIT. Automatically generated by gobuild. +# gobuild -m >Makefile +O=6 +GC=$(O)g +CC=$(O)c -w +AS=$(O)a +AR=$(O)ar + +default: packages + +clean: + rm -f *.$O *.a $O.out + +test: packages + gotest + +coverage: packages + gotest + 6cov -g `pwd` | grep -v '_test\.go:' + +%.$O: %.go + $(GC) $*.go + +%.$O: %.c + $(CC) $*.c + +%.$O: %.s + $(AS) $*.s + +O1=\ + token.$O\ + +O2=\ + scanner.$O\ + +scanner.a: a1 a2 +token.a: a1 a2 + +a1: $(O1) + $(AR) grc token.a token.$O + rm -f $(O1) + +a2: $(O2) + $(AR) grc scanner.a scanner.$O + rm -f $(O2) + +newpkg: clean + $(AR) grc scanner.a + $(AR) grc token.a + +$(O1): newpkg +$(O2): a1 + +nuke: clean + rm -f $(GOROOT)/pkg/scanner.a $(GOROOT)/pkg/token.a + +packages: scanner.a token.a + +install: packages + cp scanner.a $(GOROOT)/pkg/scanner.a + cp token.a $(GOROOT)/pkg/token.a + diff --git a/src/lib/go/scanner.go b/src/lib/go/scanner.go new file mode 100644 index 000000000..ad7f80b5b --- /dev/null +++ b/src/lib/go/scanner.go @@ -0,0 +1,475 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package scanner + +// A Go scanner. Takes a []byte as source which can then be +// tokenized through repeated calls to the Scan() function. +// +// Sample use: +// +// import "token" +// import "scanner" +// +// func tokenize(src []byte) { +// var s scanner.Scanner; +// s.Init(src, nil /* no error handler */, false /* ignore comments */); +// for { +// pos, tok, lit := s.Scan(); +// if tok == Scanner.EOF { +// return; +// } +// println(pos, token.TokenString(tok), string(lit)); +// } +// } + +import ( + "utf8"; + "unicode"; + "strconv"; + "token"; +) + + +// An implementation of an ErrorHandler must be provided to the Scanner. +// If a syntax error is encountered, Error() is called with the exact +// token position (the byte position of the token in the source) and the +// error message. + +type ErrorHandler interface { + Error(pos int, msg string); +} + + +type Scanner struct { + // immutable state + src []byte; // source + err ErrorHandler; // error reporting + scan_comments bool; // if set, comments are reported as tokens + + // scanning state + pos int; // current reading position + ch int; // one char look-ahead + chpos int; // position of ch +} + + +func isLetter(ch int) bool { + return + 'a' <= ch && ch <= 'z' || + 'A' <= ch && ch <= 'Z' || + ch == '_' || + ch >= 0x80 && unicode.IsLetter(ch); +} + + +func digitVal(ch int) int { + switch { + case '0' <= ch && ch <= '9': return ch - '0'; + case 'a' <= ch && ch <= 'f': return ch - 'a' + 10; + case 'A' <= ch && ch <= 'F': return ch - 'A' + 10; + } + return 16; // larger than any legal digit val +} + + +// Read the next Unicode char into S.ch. +// S.ch < 0 means end-of-file. +func (S *Scanner) next() { + if S.pos < len(S.src) { + // assume ASCII + r, w := int(S.src[S.pos]), 1; + if r >= 0x80 { + // not ASCII + r, w = utf8.DecodeRune(S.src[S.pos : len(S.src)]); + } + S.ch = r; + S.chpos = S.pos; + S.pos += w; + } else { + S.ch = -1; // eof + S.chpos = len(S.src); + } +} + + +// Initialize the scanner. +// +// The error handler (err) is called when an illegal token is encountered. +// If scan_comments is set to true, newline characters ('\n') and comments +// are recognized as token.COMMENT, otherwise they are treated as white +// space and ignored. + +func (S *Scanner) Init(src []byte, err ErrorHandler, scan_comments bool) { + S.src = src; + S.err = err; + S.scan_comments = scan_comments; + S.next(); +} + + +func charString(ch int) string { + s := string(ch); + switch ch { + case '\a': s = `\a`; + case '\b': s = `\b`; + case '\f': s = `\f`; + case '\n': s = `\n`; + case '\r': s = `\r`; + case '\t': s = `\t`; + case '\v': s = `\v`; + case '\\': s = `\\`; + case '\'': s = `\'`; + } + return "'" + s + "' (U+" + strconv.Itob(ch, 16) + ")"; +} + + +func (S *Scanner) error(pos int, msg string) { + S.err.Error(pos, msg); +} + + +func (S *Scanner) expect(ch int) { + if S.ch != ch { + S.error(S.chpos, "expected " + charString(ch) + ", found " + charString(S.ch)); + } + S.next(); // always make progress +} + + +func (S *Scanner) skipWhitespace() { + for { + switch S.ch { + case '\t', '\r', ' ': + // nothing to do + case '\n': + if S.scan_comments { + return; + } + default: + return; + } + S.next(); + } + panic("UNREACHABLE"); +} + + +func (S *Scanner) scanComment() []byte { + // first '/' already consumed + pos := S.chpos - 1; + + if S.ch == '/' { + //-style comment + for S.ch >= 0 { + S.next(); + if S.ch == '\n' { + // '\n' terminates comment but we do not include + // it in the comment (otherwise we don't see the + // start of a newline in skipWhitespace()). + return S.src[pos : S.chpos]; + } + } + + } else { + /*-style comment */ + S.expect('*'); + for S.ch >= 0 { + ch := S.ch; + S.next(); + if ch == '*' && S.ch == '/' { + S.next(); + return S.src[pos : S.chpos]; + } + } + } + + S.error(pos, "comment not terminated"); + return S.src[pos : S.chpos]; +} + + +func (S *Scanner) scanIdentifier() (tok int, lit []byte) { + pos := S.chpos; + for isLetter(S.ch) || digitVal(S.ch) < 10 { + S.next(); + } + lit = S.src[pos : S.chpos]; + return token.Lookup(lit), lit; +} + + +func (S *Scanner) scanMantissa(base int) { + for digitVal(S.ch) < base { + S.next(); + } +} + + +func (S *Scanner) scanNumber(seen_decimal_point bool) (tok int, lit []byte) { + pos := S.chpos; + tok = token.INT; + + if seen_decimal_point { + tok = token.FLOAT; + pos--; // '.' is one byte + S.scanMantissa(10); + goto exponent; + } + + if S.ch == '0' { + // int or float + S.next(); + if S.ch == 'x' || S.ch == 'X' { + // hexadecimal int + S.next(); + S.scanMantissa(16); + } else { + // octal int or float + S.scanMantissa(8); + if digitVal(S.ch) < 10 || S.ch == '.' || S.ch == 'e' || S.ch == 'E' { + // float + tok = token.FLOAT; + goto mantissa; + } + // octal int + } + goto exit; + } + +mantissa: + // decimal int or float + S.scanMantissa(10); + + if S.ch == '.' { + // float + tok = token.FLOAT; + S.next(); + S.scanMantissa(10) + } + +exponent: + if S.ch == 'e' || S.ch == 'E' { + // float + tok = token.FLOAT; + S.next(); + if S.ch == '-' || S.ch == '+' { + S.next(); + } + S.scanMantissa(10); + } + +exit: + return tok, S.src[pos : S.chpos]; +} + + +func (S *Scanner) scanDigits(n int, base int) { + for digitVal(S.ch) < base { + S.next(); + n--; + } + if n > 0 { + S.error(S.chpos, "illegal char escape"); + } +} + + +func (S *Scanner) scanEscape(quote int) { + ch := S.ch; + pos := S.chpos; + S.next(); + switch ch { + case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote: + // nothing to do + case '0', '1', '2', '3', '4', '5', '6', '7': + S.scanDigits(3 - 1, 8); // 1 char read already + case 'x': + S.scanDigits(2, 16); + case 'u': + S.scanDigits(4, 16); + case 'U': + S.scanDigits(8, 16); + default: + S.error(pos, "illegal char escape"); + } +} + + +func (S *Scanner) scanChar() []byte { + // '\'' already consumed + + pos := S.chpos - 1; + ch := S.ch; + S.next(); + if ch == '\\' { + S.scanEscape('\''); + } + + S.expect('\''); + return S.src[pos : S.chpos]; +} + + +func (S *Scanner) scanString() []byte { + // '"' already consumed + + pos := S.chpos - 1; + for S.ch != '"' { + ch := S.ch; + S.next(); + if ch == '\n' || ch < 0 { + S.error(pos, "string not terminated"); + break; + } + if ch == '\\' { + S.scanEscape('"'); + } + } + + S.next(); + return S.src[pos : S.chpos]; +} + + +func (S *Scanner) scanRawString() []byte { + // '`' already consumed + + pos := S.chpos - 1; + for S.ch != '`' { + ch := S.ch; + S.next(); + if ch == '\n' || ch < 0 { + S.error(pos, "string not terminated"); + break; + } + } + + S.next(); + return S.src[pos : S.chpos]; +} + + +// Helper functions for scanning multi-byte tokens such as >> += >>= . +// Different routines recognize different length tok_i based on matches +// of ch_i. If a token ends in '=', the result is tok1 or tok3 +// respectively. Otherwise, the result is tok0 if there was no other +// matching character, or tok2 if the matching character was ch2. + +func (S *Scanner) switch2(tok0, tok1 int) int { + if S.ch == '=' { + S.next(); + return tok1; + } + return tok0; +} + + +func (S *Scanner) switch3(tok0, tok1, ch2, tok2 int) int { + if S.ch == '=' { + S.next(); + return tok1; + } + if S.ch == ch2 { + S.next(); + return tok2; + } + return tok0; +} + + +func (S *Scanner) switch4(tok0, tok1, ch2, tok2, tok3 int) int { + if S.ch == '=' { + S.next(); + return tok1; + } + if S.ch == ch2 { + S.next(); + if S.ch == '=' { + S.next(); + return tok3; + } + return tok2; + } + return tok0; +} + + +// Scans the next token. Returns the token byte position in the source, +// its token value, and the corresponding literal text if the token is +// an identifier or basic type literal (token.IsLiteral(tok) == true). + +func (S *Scanner) Scan() (pos, tok int, lit []byte) { +scan_again: + S.skipWhitespace(); + + pos, tok = S.chpos, token.ILLEGAL; + + switch ch := S.ch; { + case isLetter(ch): + tok, lit = S.scanIdentifier(); + case digitVal(ch) < 10: + tok, lit = S.scanNumber(false); + default: + S.next(); // always make progress + switch ch { + case -1 : tok = token.EOF; + case '\n': tok, lit = token.COMMENT, []byte{'\n'}; + case '"' : tok, lit = token.STRING, S.scanString(); + case '\'': tok, lit = token.CHAR, S.scanChar(); + case '`' : tok, lit = token.STRING, S.scanRawString(); + case ':' : tok = S.switch2(token.COLON, token.DEFINE); + case '.' : + if digitVal(S.ch) < 10 { + tok, lit = S.scanNumber(true); + } else if S.ch == '.' { + S.next(); + if S.ch == '.' { + S.next(); + tok = token.ELLIPSIS; + } + } else { + tok = token.PERIOD; + } + case ',': tok = token.COMMA; + case ';': tok = token.SEMICOLON; + case '(': tok = token.LPAREN; + case ')': tok = token.RPAREN; + case '[': tok = token.LBRACK; + case ']': tok = token.RBRACK; + case '{': tok = token.LBRACE; + case '}': tok = token.RBRACE; + case '+': tok = S.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC); + case '-': tok = S.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC); + case '*': tok = S.switch2(token.MUL, token.MUL_ASSIGN); + case '/': + if S.ch == '/' || S.ch == '*' { + tok, lit = token.COMMENT, S.scanComment(); + if !S.scan_comments { + goto scan_again; + } + } else { + tok = S.switch2(token.QUO, token.QUO_ASSIGN); + } + case '%': tok = S.switch2(token.REM, token.REM_ASSIGN); + case '^': tok = S.switch2(token.XOR, token.XOR_ASSIGN); + case '<': + if S.ch == '-' { + S.next(); + tok = token.ARROW; + } else { + tok = S.switch4(token.LSS, token.LEQ, '<', token.SHL, token.SHL_ASSIGN); + } + case '>': tok = S.switch4(token.GTR, token.GEQ, '>', token.SHR, token.SHR_ASSIGN); + case '=': tok = S.switch2(token.ASSIGN, token.EQL); + case '!': tok = S.switch2(token.NOT, token.NEQ); + case '&': tok = S.switch3(token.AND, token.AND_ASSIGN, '&', token.LAND); + case '|': tok = S.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR); + default: S.error(pos, "illegal character " + charString(ch)); + } + } + + return pos, tok, lit; +} diff --git a/src/lib/go/scanner_test.go b/src/lib/go/scanner_test.go new file mode 100644 index 000000000..136677cd0 --- /dev/null +++ b/src/lib/go/scanner_test.go @@ -0,0 +1,202 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package scanner + +import ( + "io"; + "token"; + "scanner"; + "testing"; +) + + +const /* class */ ( + special = iota; + literal; + operator; + keyword; +) + + +func tokenclass(tok int) int { + switch { + case token.IsLiteral(tok): return literal; + case token.IsOperator(tok): return operator; + case token.IsKeyword(tok): return keyword; + } + return special; +} + + +type elt struct { + pos int; + tok int; + lit string; + class int; +} + + +var tokens = [...]elt{ + // Special tokens + elt{ 0, token.COMMENT, "/* a comment */", special }, + elt{ 0, token.COMMENT, "\n", special }, + + // Identifiers and basic type literals + elt{ 0, token.IDENT, "foobar", literal }, + elt{ 0, token.INT, "0", literal }, + elt{ 0, token.INT, "01234567", literal }, + elt{ 0, token.INT, "0xcafebabe", literal }, + elt{ 0, token.FLOAT, "0.", literal }, + elt{ 0, token.FLOAT, ".0", literal }, + elt{ 0, token.FLOAT, "3.14159265", literal }, + elt{ 0, token.FLOAT, "1e0", literal }, + elt{ 0, token.FLOAT, "1e+100", literal }, + elt{ 0, token.FLOAT, "1e-100", literal }, + elt{ 0, token.FLOAT, "2.71828e-1000", literal }, + elt{ 0, token.CHAR, "'a'", literal }, + elt{ 0, token.STRING, "`foobar`", literal }, + + // Operators and delimitors + elt{ 0, token.ADD, "+", operator }, + elt{ 0, token.SUB, "-", operator }, + elt{ 0, token.MUL, "*", operator }, + elt{ 0, token.QUO, "/", operator }, + elt{ 0, token.REM, "%", operator }, + + elt{ 0, token.AND, "&", operator }, + elt{ 0, token.OR, "|", operator }, + elt{ 0, token.XOR, "^", operator }, + elt{ 0, token.SHL, "<<", operator }, + elt{ 0, token.SHR, ">>", operator }, + + elt{ 0, token.ADD_ASSIGN, "+=", operator }, + elt{ 0, token.SUB_ASSIGN, "-=", operator }, + elt{ 0, token.MUL_ASSIGN, "*=", operator }, + elt{ 0, token.QUO_ASSIGN, "/=", operator }, + elt{ 0, token.REM_ASSIGN, "%=", operator }, + + elt{ 0, token.AND_ASSIGN, "&=", operator }, + elt{ 0, token.OR_ASSIGN, "|=", operator }, + elt{ 0, token.XOR_ASSIGN, "^=", operator }, + elt{ 0, token.SHL_ASSIGN, "<<=", operator }, + elt{ 0, token.SHR_ASSIGN, ">>=", operator }, + + elt{ 0, token.LAND, "&&", operator }, + elt{ 0, token.LOR, "||", operator }, + elt{ 0, token.ARROW, "<-", operator }, + elt{ 0, token.INC, "++", operator }, + elt{ 0, token.DEC, "--", operator }, + + elt{ 0, token.EQL, "==", operator }, + elt{ 0, token.LSS, "<", operator }, + elt{ 0, token.GTR, ">", operator }, + elt{ 0, token.ASSIGN, "=", operator }, + elt{ 0, token.NOT, "!", operator }, + + elt{ 0, token.NEQ, "!=", operator }, + elt{ 0, token.LEQ, "<=", operator }, + elt{ 0, token.GEQ, ">=", operator }, + elt{ 0, token.DEFINE, ":=", operator }, + elt{ 0, token.ELLIPSIS, "...", operator }, + + elt{ 0, token.LPAREN, "(", operator }, + elt{ 0, token.LBRACK, "[", operator }, + elt{ 0, token.LBRACE, "{", operator }, + elt{ 0, token.COMMA, ",", operator }, + elt{ 0, token.PERIOD, ".", operator }, + + elt{ 0, token.RPAREN, ")", operator }, + elt{ 0, token.RBRACK, "]", operator }, + elt{ 0, token.RBRACE, "}", operator }, + elt{ 0, token.SEMICOLON, ";", operator }, + elt{ 0, token.COLON, ":", operator }, + + // Keywords + elt{ 0, token.BREAK, "break", keyword }, + elt{ 0, token.CASE, "case", keyword }, + elt{ 0, token.CHAN, "chan", keyword }, + elt{ 0, token.CONST, "const", keyword }, + elt{ 0, token.CONTINUE, "continue", keyword }, + + elt{ 0, token.DEFAULT, "default", keyword }, + elt{ 0, token.DEFER, "defer", keyword }, + elt{ 0, token.ELSE, "else", keyword }, + elt{ 0, token.FALLTHROUGH, "fallthrough", keyword }, + elt{ 0, token.FOR, "for", keyword }, + + elt{ 0, token.FUNC, "func", keyword }, + elt{ 0, token.GO, "go", keyword }, + elt{ 0, token.GOTO, "goto", keyword }, + elt{ 0, token.IF, "if", keyword }, + elt{ 0, token.IMPORT, "import", keyword }, + + elt{ 0, token.INTERFACE, "interface", keyword }, + elt{ 0, token.MAP, "map", keyword }, + elt{ 0, token.PACKAGE, "package", keyword }, + elt{ 0, token.RANGE, "range", keyword }, + elt{ 0, token.RETURN, "return", keyword }, + + elt{ 0, token.SELECT, "select", keyword }, + elt{ 0, token.STRUCT, "struct", keyword }, + elt{ 0, token.SWITCH, "switch", keyword }, + elt{ 0, token.TYPE, "type", keyword }, + elt{ 0, token.VAR, "var", keyword }, +} + + +func init() { + // set pos fields + pos := 0; + for i := 0; i < len(tokens); i++ { + tokens[i].pos = pos; + pos += len(tokens[i].lit) + 1; // + 1 for space in between + } +} + + +type TestErrorHandler struct { + t *testing.T +} + +func (h *TestErrorHandler) Error(pos int, msg string) { + h.t.Errorf("Error() called (pos = %d, msg = %s)", pos, msg); +} + + +func Test(t *testing.T) { + // make source + var src string; + for i, e := range tokens { + src += e.lit + " "; + } + + // set up scanner + var s scanner.Scanner; + s.Init(io.StringBytes(src), &TestErrorHandler{t}, true); + + // verify scan + for i, e := range tokens { + pos, tok, lit := s.Scan(); + if pos != e.pos { + t.Errorf("bad position for %s: got %d, expected %d", e.lit, pos, e.pos); + } + if tok != e.tok { + t.Errorf("bad token for %s: got %s, expected %s", e.lit, token.TokenString(tok), token.TokenString(e.tok)); + } + if token.IsLiteral(e.tok) && string(lit) != e.lit { + t.Errorf("bad literal for %s: got %s, expected %s", e.lit, string(lit), e.lit); + } + if tokenclass(tok) != e.class { + t.Errorf("bad class for %s: got %d, expected %d", e.lit, tokenclass(tok), e.class); + } + } + pos, tok, lit := s.Scan(); + if tok != token.EOF { + t.Errorf("bad token at eof: got %s, expected EOF", token.TokenString(tok)); + } + if tokenclass(tok) != special { + t.Errorf("bad class at eof: got %d, expected %d", tokenclass(tok), special); + } +} diff --git a/src/lib/go/token.go b/src/lib/go/token.go new file mode 100644 index 000000000..7691bac63 --- /dev/null +++ b/src/lib/go/token.go @@ -0,0 +1,296 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package token + +// Defines Go tokens and basic token operations. + +import "strconv" + +const ( + // Special tokens + ILLEGAL = iota; + EOF; + COMMENT; + + // Identifiers and basic type literals + // (these tokens stand for classes of literals) + literal_beg; + IDENT; + INT; + FLOAT; + CHAR; + STRING; + literal_end; + + // Operators and delimiters + operator_beg; + ADD; + SUB; + MUL; + QUO; + REM; + + AND; + OR; + XOR; + SHL; + SHR; + + ADD_ASSIGN; + SUB_ASSIGN; + MUL_ASSIGN; + QUO_ASSIGN; + REM_ASSIGN; + + AND_ASSIGN; + OR_ASSIGN; + XOR_ASSIGN; + SHL_ASSIGN; + SHR_ASSIGN; + + LAND; + LOR; + ARROW; + INC; + DEC; + + EQL; + LSS; + GTR; + ASSIGN; + NOT; + + NEQ; + LEQ; + GEQ; + DEFINE; + ELLIPSIS; + + LPAREN; + LBRACK; + LBRACE; + COMMA; + PERIOD; + + RPAREN; + RBRACK; + RBRACE; + SEMICOLON; + COLON; + operator_end; + + // Keywords + keyword_beg; + BREAK; + CASE; + CHAN; + CONST; + CONTINUE; + + DEFAULT; + DEFER; + ELSE; + FALLTHROUGH; + FOR; + + FUNC; + GO; + GOTO; + IF; + IMPORT; + + INTERFACE; + MAP; + PACKAGE; + RANGE; + RETURN; + + SELECT; + STRUCT; + SWITCH; + TYPE; + VAR; + keyword_end; +) + + +// At the moment we have no array literal syntax that lets us describe +// the index for each element - use a map for now to make sure they are +// in sync. +var tokens = map [int] string { + ILLEGAL : "ILLEGAL", + + EOF : "EOF", + COMMENT : "COMMENT", + + IDENT : "IDENT", + INT : "INT", + FLOAT : "FLOAT", + CHAR : "CHAR", + STRING : "STRING", + + ADD : "+", + SUB : "-", + MUL : "*", + QUO : "/", + REM : "%", + + AND : "&", + OR : "|", + XOR : "^", + SHL : "<<", + SHR : ">>", + + ADD_ASSIGN : "+=", + SUB_ASSIGN : "-=", + MUL_ASSIGN : "+=", + QUO_ASSIGN : "/=", + REM_ASSIGN : "%=", + + AND_ASSIGN : "&=", + OR_ASSIGN : "|=", + XOR_ASSIGN : "^=", + SHL_ASSIGN : "<<=", + SHR_ASSIGN : ">>=", + + LAND : "&&", + LOR : "||", + ARROW : "<-", + INC : "++", + DEC : "--", + + EQL : "==", + LSS : "<", + GTR : ">", + ASSIGN : "=", + NOT : "!", + + NEQ : "!=", + LEQ : "<=", + GEQ : ">=", + DEFINE : ":=", + ELLIPSIS : "...", + + LPAREN : "(", + LBRACK : "[", + LBRACE : "{", + COMMA : ",", + PERIOD : ".", + + RPAREN : ")", + RBRACK : "]", + RBRACE : "}", + SEMICOLON : ";", + COLON : ":", + + BREAK : "break", + CASE : "case", + CHAN : "chan", + CONST : "const", + CONTINUE : "continue", + + DEFAULT : "default", + DEFER : "defer", + ELSE : "else", + FALLTHROUGH : "fallthrough", + FOR : "for", + + FUNC : "func", + GO : "go", + GOTO : "goto", + IF : "if", + IMPORT : "import", + + INTERFACE : "interface", + MAP : "map", + PACKAGE : "package", + RANGE : "range", + RETURN : "return", + + SELECT : "select", + STRUCT : "struct", + SWITCH : "switch", + TYPE : "type", + VAR : "var", +} + +func TokenString(tok int) string { + if str, exists := tokens[tok]; exists { + return str; + } + return "token(" + strconv.Itoa(tok) + ")"; +} + + +// A set of constants for precedence-based expression parsing. +// Non-operators have lowest precedence, followed by operators +// starting with precedence 0 up to unary operators and finally +// the highest precedence used for tokens used in selectors, etc. + +const ( + LowestPrec = -1; // non-operators + UnaryPrec = 7; + HighestPrec = 8; +) + +// Returns precedence of a token. Returns LowestPrec +// if the token is not an operator. +func Precedence(tok int) int { + switch tok { + case COLON: + return 0; + case LOR: + return 1; + case LAND: + return 2; + case ARROW: + return 3; + case EQL, NEQ, LSS, LEQ, GTR, GEQ: + return 4; + case ADD, SUB, OR, XOR: + return 5; + case MUL, QUO, REM, SHL, SHR, AND: + return 6; + } + return LowestPrec; +} + + +var keywords map [string] int; + +func init() { + keywords = make(map [string] int); + for i := keyword_beg + 1; i < keyword_end; i++ { + keywords[tokens[i]] = i; + } +} + + +// Map an identifier to its keyword token or IDENT (if not a keyword). +func Lookup(ident []byte) int { + // TODO Maps with []byte key are illegal because []byte does not + // support == . Should find a more efficient solution eventually. + if tok, is_keyword := keywords[string(ident)]; is_keyword { + return tok; + } + return IDENT; +} + + +// Predicates + +// Identifiers and basic type literals +func IsLiteral(tok int) bool { + return literal_beg < tok && tok < literal_end; +} + +// Operators and delimiters +func IsOperator(tok int) bool { + return operator_beg < tok && tok < operator_end; +} + +func IsKeyword(tok int) bool { + return keyword_beg < tok && tok < keyword_end; +} diff --git a/src/lib/lang/Makefile b/src/lib/lang/Makefile deleted file mode 100644 index 58b562171..000000000 --- a/src/lib/lang/Makefile +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright 2009 The Go Authors. All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -# DO NOT EDIT. Automatically generated by gobuild. -# gobuild -m >Makefile -O=6 -GC=$(O)g -CC=$(O)c -w -AS=$(O)a -AR=$(O)ar - -default: packages - -clean: - rm -f *.$O *.a $O.out - -test: packages - gotest - -coverage: packages - gotest - 6cov -g `pwd` | grep -v '_test\.go:' - -%.$O: %.go - $(GC) $*.go - -%.$O: %.c - $(CC) $*.c - -%.$O: %.s - $(AS) $*.s - -O1=\ - token.$O\ - -O2=\ - scanner.$O\ - -scanner.a: a1 a2 -token.a: a1 a2 - -a1: $(O1) - $(AR) grc token.a token.$O - rm -f $(O1) - -a2: $(O2) - $(AR) grc scanner.a scanner.$O - rm -f $(O2) - -newpkg: clean - $(AR) grc scanner.a - $(AR) grc token.a - -$(O1): newpkg -$(O2): a1 - -nuke: clean - rm -f $(GOROOT)/pkg/scanner.a $(GOROOT)/pkg/token.a - -packages: scanner.a token.a - -install: packages - cp scanner.a $(GOROOT)/pkg/scanner.a - cp token.a $(GOROOT)/pkg/token.a - diff --git a/src/lib/lang/scanner.go b/src/lib/lang/scanner.go deleted file mode 100644 index ad7f80b5b..000000000 --- a/src/lib/lang/scanner.go +++ /dev/null @@ -1,475 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package scanner - -// A Go scanner. Takes a []byte as source which can then be -// tokenized through repeated calls to the Scan() function. -// -// Sample use: -// -// import "token" -// import "scanner" -// -// func tokenize(src []byte) { -// var s scanner.Scanner; -// s.Init(src, nil /* no error handler */, false /* ignore comments */); -// for { -// pos, tok, lit := s.Scan(); -// if tok == Scanner.EOF { -// return; -// } -// println(pos, token.TokenString(tok), string(lit)); -// } -// } - -import ( - "utf8"; - "unicode"; - "strconv"; - "token"; -) - - -// An implementation of an ErrorHandler must be provided to the Scanner. -// If a syntax error is encountered, Error() is called with the exact -// token position (the byte position of the token in the source) and the -// error message. - -type ErrorHandler interface { - Error(pos int, msg string); -} - - -type Scanner struct { - // immutable state - src []byte; // source - err ErrorHandler; // error reporting - scan_comments bool; // if set, comments are reported as tokens - - // scanning state - pos int; // current reading position - ch int; // one char look-ahead - chpos int; // position of ch -} - - -func isLetter(ch int) bool { - return - 'a' <= ch && ch <= 'z' || - 'A' <= ch && ch <= 'Z' || - ch == '_' || - ch >= 0x80 && unicode.IsLetter(ch); -} - - -func digitVal(ch int) int { - switch { - case '0' <= ch && ch <= '9': return ch - '0'; - case 'a' <= ch && ch <= 'f': return ch - 'a' + 10; - case 'A' <= ch && ch <= 'F': return ch - 'A' + 10; - } - return 16; // larger than any legal digit val -} - - -// Read the next Unicode char into S.ch. -// S.ch < 0 means end-of-file. -func (S *Scanner) next() { - if S.pos < len(S.src) { - // assume ASCII - r, w := int(S.src[S.pos]), 1; - if r >= 0x80 { - // not ASCII - r, w = utf8.DecodeRune(S.src[S.pos : len(S.src)]); - } - S.ch = r; - S.chpos = S.pos; - S.pos += w; - } else { - S.ch = -1; // eof - S.chpos = len(S.src); - } -} - - -// Initialize the scanner. -// -// The error handler (err) is called when an illegal token is encountered. -// If scan_comments is set to true, newline characters ('\n') and comments -// are recognized as token.COMMENT, otherwise they are treated as white -// space and ignored. - -func (S *Scanner) Init(src []byte, err ErrorHandler, scan_comments bool) { - S.src = src; - S.err = err; - S.scan_comments = scan_comments; - S.next(); -} - - -func charString(ch int) string { - s := string(ch); - switch ch { - case '\a': s = `\a`; - case '\b': s = `\b`; - case '\f': s = `\f`; - case '\n': s = `\n`; - case '\r': s = `\r`; - case '\t': s = `\t`; - case '\v': s = `\v`; - case '\\': s = `\\`; - case '\'': s = `\'`; - } - return "'" + s + "' (U+" + strconv.Itob(ch, 16) + ")"; -} - - -func (S *Scanner) error(pos int, msg string) { - S.err.Error(pos, msg); -} - - -func (S *Scanner) expect(ch int) { - if S.ch != ch { - S.error(S.chpos, "expected " + charString(ch) + ", found " + charString(S.ch)); - } - S.next(); // always make progress -} - - -func (S *Scanner) skipWhitespace() { - for { - switch S.ch { - case '\t', '\r', ' ': - // nothing to do - case '\n': - if S.scan_comments { - return; - } - default: - return; - } - S.next(); - } - panic("UNREACHABLE"); -} - - -func (S *Scanner) scanComment() []byte { - // first '/' already consumed - pos := S.chpos - 1; - - if S.ch == '/' { - //-style comment - for S.ch >= 0 { - S.next(); - if S.ch == '\n' { - // '\n' terminates comment but we do not include - // it in the comment (otherwise we don't see the - // start of a newline in skipWhitespace()). - return S.src[pos : S.chpos]; - } - } - - } else { - /*-style comment */ - S.expect('*'); - for S.ch >= 0 { - ch := S.ch; - S.next(); - if ch == '*' && S.ch == '/' { - S.next(); - return S.src[pos : S.chpos]; - } - } - } - - S.error(pos, "comment not terminated"); - return S.src[pos : S.chpos]; -} - - -func (S *Scanner) scanIdentifier() (tok int, lit []byte) { - pos := S.chpos; - for isLetter(S.ch) || digitVal(S.ch) < 10 { - S.next(); - } - lit = S.src[pos : S.chpos]; - return token.Lookup(lit), lit; -} - - -func (S *Scanner) scanMantissa(base int) { - for digitVal(S.ch) < base { - S.next(); - } -} - - -func (S *Scanner) scanNumber(seen_decimal_point bool) (tok int, lit []byte) { - pos := S.chpos; - tok = token.INT; - - if seen_decimal_point { - tok = token.FLOAT; - pos--; // '.' is one byte - S.scanMantissa(10); - goto exponent; - } - - if S.ch == '0' { - // int or float - S.next(); - if S.ch == 'x' || S.ch == 'X' { - // hexadecimal int - S.next(); - S.scanMantissa(16); - } else { - // octal int or float - S.scanMantissa(8); - if digitVal(S.ch) < 10 || S.ch == '.' || S.ch == 'e' || S.ch == 'E' { - // float - tok = token.FLOAT; - goto mantissa; - } - // octal int - } - goto exit; - } - -mantissa: - // decimal int or float - S.scanMantissa(10); - - if S.ch == '.' { - // float - tok = token.FLOAT; - S.next(); - S.scanMantissa(10) - } - -exponent: - if S.ch == 'e' || S.ch == 'E' { - // float - tok = token.FLOAT; - S.next(); - if S.ch == '-' || S.ch == '+' { - S.next(); - } - S.scanMantissa(10); - } - -exit: - return tok, S.src[pos : S.chpos]; -} - - -func (S *Scanner) scanDigits(n int, base int) { - for digitVal(S.ch) < base { - S.next(); - n--; - } - if n > 0 { - S.error(S.chpos, "illegal char escape"); - } -} - - -func (S *Scanner) scanEscape(quote int) { - ch := S.ch; - pos := S.chpos; - S.next(); - switch ch { - case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote: - // nothing to do - case '0', '1', '2', '3', '4', '5', '6', '7': - S.scanDigits(3 - 1, 8); // 1 char read already - case 'x': - S.scanDigits(2, 16); - case 'u': - S.scanDigits(4, 16); - case 'U': - S.scanDigits(8, 16); - default: - S.error(pos, "illegal char escape"); - } -} - - -func (S *Scanner) scanChar() []byte { - // '\'' already consumed - - pos := S.chpos - 1; - ch := S.ch; - S.next(); - if ch == '\\' { - S.scanEscape('\''); - } - - S.expect('\''); - return S.src[pos : S.chpos]; -} - - -func (S *Scanner) scanString() []byte { - // '"' already consumed - - pos := S.chpos - 1; - for S.ch != '"' { - ch := S.ch; - S.next(); - if ch == '\n' || ch < 0 { - S.error(pos, "string not terminated"); - break; - } - if ch == '\\' { - S.scanEscape('"'); - } - } - - S.next(); - return S.src[pos : S.chpos]; -} - - -func (S *Scanner) scanRawString() []byte { - // '`' already consumed - - pos := S.chpos - 1; - for S.ch != '`' { - ch := S.ch; - S.next(); - if ch == '\n' || ch < 0 { - S.error(pos, "string not terminated"); - break; - } - } - - S.next(); - return S.src[pos : S.chpos]; -} - - -// Helper functions for scanning multi-byte tokens such as >> += >>= . -// Different routines recognize different length tok_i based on matches -// of ch_i. If a token ends in '=', the result is tok1 or tok3 -// respectively. Otherwise, the result is tok0 if there was no other -// matching character, or tok2 if the matching character was ch2. - -func (S *Scanner) switch2(tok0, tok1 int) int { - if S.ch == '=' { - S.next(); - return tok1; - } - return tok0; -} - - -func (S *Scanner) switch3(tok0, tok1, ch2, tok2 int) int { - if S.ch == '=' { - S.next(); - return tok1; - } - if S.ch == ch2 { - S.next(); - return tok2; - } - return tok0; -} - - -func (S *Scanner) switch4(tok0, tok1, ch2, tok2, tok3 int) int { - if S.ch == '=' { - S.next(); - return tok1; - } - if S.ch == ch2 { - S.next(); - if S.ch == '=' { - S.next(); - return tok3; - } - return tok2; - } - return tok0; -} - - -// Scans the next token. Returns the token byte position in the source, -// its token value, and the corresponding literal text if the token is -// an identifier or basic type literal (token.IsLiteral(tok) == true). - -func (S *Scanner) Scan() (pos, tok int, lit []byte) { -scan_again: - S.skipWhitespace(); - - pos, tok = S.chpos, token.ILLEGAL; - - switch ch := S.ch; { - case isLetter(ch): - tok, lit = S.scanIdentifier(); - case digitVal(ch) < 10: - tok, lit = S.scanNumber(false); - default: - S.next(); // always make progress - switch ch { - case -1 : tok = token.EOF; - case '\n': tok, lit = token.COMMENT, []byte{'\n'}; - case '"' : tok, lit = token.STRING, S.scanString(); - case '\'': tok, lit = token.CHAR, S.scanChar(); - case '`' : tok, lit = token.STRING, S.scanRawString(); - case ':' : tok = S.switch2(token.COLON, token.DEFINE); - case '.' : - if digitVal(S.ch) < 10 { - tok, lit = S.scanNumber(true); - } else if S.ch == '.' { - S.next(); - if S.ch == '.' { - S.next(); - tok = token.ELLIPSIS; - } - } else { - tok = token.PERIOD; - } - case ',': tok = token.COMMA; - case ';': tok = token.SEMICOLON; - case '(': tok = token.LPAREN; - case ')': tok = token.RPAREN; - case '[': tok = token.LBRACK; - case ']': tok = token.RBRACK; - case '{': tok = token.LBRACE; - case '}': tok = token.RBRACE; - case '+': tok = S.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC); - case '-': tok = S.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC); - case '*': tok = S.switch2(token.MUL, token.MUL_ASSIGN); - case '/': - if S.ch == '/' || S.ch == '*' { - tok, lit = token.COMMENT, S.scanComment(); - if !S.scan_comments { - goto scan_again; - } - } else { - tok = S.switch2(token.QUO, token.QUO_ASSIGN); - } - case '%': tok = S.switch2(token.REM, token.REM_ASSIGN); - case '^': tok = S.switch2(token.XOR, token.XOR_ASSIGN); - case '<': - if S.ch == '-' { - S.next(); - tok = token.ARROW; - } else { - tok = S.switch4(token.LSS, token.LEQ, '<', token.SHL, token.SHL_ASSIGN); - } - case '>': tok = S.switch4(token.GTR, token.GEQ, '>', token.SHR, token.SHR_ASSIGN); - case '=': tok = S.switch2(token.ASSIGN, token.EQL); - case '!': tok = S.switch2(token.NOT, token.NEQ); - case '&': tok = S.switch3(token.AND, token.AND_ASSIGN, '&', token.LAND); - case '|': tok = S.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR); - default: S.error(pos, "illegal character " + charString(ch)); - } - } - - return pos, tok, lit; -} diff --git a/src/lib/lang/scanner_test.go b/src/lib/lang/scanner_test.go deleted file mode 100644 index 136677cd0..000000000 --- a/src/lib/lang/scanner_test.go +++ /dev/null @@ -1,202 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package scanner - -import ( - "io"; - "token"; - "scanner"; - "testing"; -) - - -const /* class */ ( - special = iota; - literal; - operator; - keyword; -) - - -func tokenclass(tok int) int { - switch { - case token.IsLiteral(tok): return literal; - case token.IsOperator(tok): return operator; - case token.IsKeyword(tok): return keyword; - } - return special; -} - - -type elt struct { - pos int; - tok int; - lit string; - class int; -} - - -var tokens = [...]elt{ - // Special tokens - elt{ 0, token.COMMENT, "/* a comment */", special }, - elt{ 0, token.COMMENT, "\n", special }, - - // Identifiers and basic type literals - elt{ 0, token.IDENT, "foobar", literal }, - elt{ 0, token.INT, "0", literal }, - elt{ 0, token.INT, "01234567", literal }, - elt{ 0, token.INT, "0xcafebabe", literal }, - elt{ 0, token.FLOAT, "0.", literal }, - elt{ 0, token.FLOAT, ".0", literal }, - elt{ 0, token.FLOAT, "3.14159265", literal }, - elt{ 0, token.FLOAT, "1e0", literal }, - elt{ 0, token.FLOAT, "1e+100", literal }, - elt{ 0, token.FLOAT, "1e-100", literal }, - elt{ 0, token.FLOAT, "2.71828e-1000", literal }, - elt{ 0, token.CHAR, "'a'", literal }, - elt{ 0, token.STRING, "`foobar`", literal }, - - // Operators and delimitors - elt{ 0, token.ADD, "+", operator }, - elt{ 0, token.SUB, "-", operator }, - elt{ 0, token.MUL, "*", operator }, - elt{ 0, token.QUO, "/", operator }, - elt{ 0, token.REM, "%", operator }, - - elt{ 0, token.AND, "&", operator }, - elt{ 0, token.OR, "|", operator }, - elt{ 0, token.XOR, "^", operator }, - elt{ 0, token.SHL, "<<", operator }, - elt{ 0, token.SHR, ">>", operator }, - - elt{ 0, token.ADD_ASSIGN, "+=", operator }, - elt{ 0, token.SUB_ASSIGN, "-=", operator }, - elt{ 0, token.MUL_ASSIGN, "*=", operator }, - elt{ 0, token.QUO_ASSIGN, "/=", operator }, - elt{ 0, token.REM_ASSIGN, "%=", operator }, - - elt{ 0, token.AND_ASSIGN, "&=", operator }, - elt{ 0, token.OR_ASSIGN, "|=", operator }, - elt{ 0, token.XOR_ASSIGN, "^=", operator }, - elt{ 0, token.SHL_ASSIGN, "<<=", operator }, - elt{ 0, token.SHR_ASSIGN, ">>=", operator }, - - elt{ 0, token.LAND, "&&", operator }, - elt{ 0, token.LOR, "||", operator }, - elt{ 0, token.ARROW, "<-", operator }, - elt{ 0, token.INC, "++", operator }, - elt{ 0, token.DEC, "--", operator }, - - elt{ 0, token.EQL, "==", operator }, - elt{ 0, token.LSS, "<", operator }, - elt{ 0, token.GTR, ">", operator }, - elt{ 0, token.ASSIGN, "=", operator }, - elt{ 0, token.NOT, "!", operator }, - - elt{ 0, token.NEQ, "!=", operator }, - elt{ 0, token.LEQ, "<=", operator }, - elt{ 0, token.GEQ, ">=", operator }, - elt{ 0, token.DEFINE, ":=", operator }, - elt{ 0, token.ELLIPSIS, "...", operator }, - - elt{ 0, token.LPAREN, "(", operator }, - elt{ 0, token.LBRACK, "[", operator }, - elt{ 0, token.LBRACE, "{", operator }, - elt{ 0, token.COMMA, ",", operator }, - elt{ 0, token.PERIOD, ".", operator }, - - elt{ 0, token.RPAREN, ")", operator }, - elt{ 0, token.RBRACK, "]", operator }, - elt{ 0, token.RBRACE, "}", operator }, - elt{ 0, token.SEMICOLON, ";", operator }, - elt{ 0, token.COLON, ":", operator }, - - // Keywords - elt{ 0, token.BREAK, "break", keyword }, - elt{ 0, token.CASE, "case", keyword }, - elt{ 0, token.CHAN, "chan", keyword }, - elt{ 0, token.CONST, "const", keyword }, - elt{ 0, token.CONTINUE, "continue", keyword }, - - elt{ 0, token.DEFAULT, "default", keyword }, - elt{ 0, token.DEFER, "defer", keyword }, - elt{ 0, token.ELSE, "else", keyword }, - elt{ 0, token.FALLTHROUGH, "fallthrough", keyword }, - elt{ 0, token.FOR, "for", keyword }, - - elt{ 0, token.FUNC, "func", keyword }, - elt{ 0, token.GO, "go", keyword }, - elt{ 0, token.GOTO, "goto", keyword }, - elt{ 0, token.IF, "if", keyword }, - elt{ 0, token.IMPORT, "import", keyword }, - - elt{ 0, token.INTERFACE, "interface", keyword }, - elt{ 0, token.MAP, "map", keyword }, - elt{ 0, token.PACKAGE, "package", keyword }, - elt{ 0, token.RANGE, "range", keyword }, - elt{ 0, token.RETURN, "return", keyword }, - - elt{ 0, token.SELECT, "select", keyword }, - elt{ 0, token.STRUCT, "struct", keyword }, - elt{ 0, token.SWITCH, "switch", keyword }, - elt{ 0, token.TYPE, "type", keyword }, - elt{ 0, token.VAR, "var", keyword }, -} - - -func init() { - // set pos fields - pos := 0; - for i := 0; i < len(tokens); i++ { - tokens[i].pos = pos; - pos += len(tokens[i].lit) + 1; // + 1 for space in between - } -} - - -type TestErrorHandler struct { - t *testing.T -} - -func (h *TestErrorHandler) Error(pos int, msg string) { - h.t.Errorf("Error() called (pos = %d, msg = %s)", pos, msg); -} - - -func Test(t *testing.T) { - // make source - var src string; - for i, e := range tokens { - src += e.lit + " "; - } - - // set up scanner - var s scanner.Scanner; - s.Init(io.StringBytes(src), &TestErrorHandler{t}, true); - - // verify scan - for i, e := range tokens { - pos, tok, lit := s.Scan(); - if pos != e.pos { - t.Errorf("bad position for %s: got %d, expected %d", e.lit, pos, e.pos); - } - if tok != e.tok { - t.Errorf("bad token for %s: got %s, expected %s", e.lit, token.TokenString(tok), token.TokenString(e.tok)); - } - if token.IsLiteral(e.tok) && string(lit) != e.lit { - t.Errorf("bad literal for %s: got %s, expected %s", e.lit, string(lit), e.lit); - } - if tokenclass(tok) != e.class { - t.Errorf("bad class for %s: got %d, expected %d", e.lit, tokenclass(tok), e.class); - } - } - pos, tok, lit := s.Scan(); - if tok != token.EOF { - t.Errorf("bad token at eof: got %s, expected EOF", token.TokenString(tok)); - } - if tokenclass(tok) != special { - t.Errorf("bad class at eof: got %d, expected %d", tokenclass(tok), special); - } -} diff --git a/src/lib/lang/token.go b/src/lib/lang/token.go deleted file mode 100644 index 7691bac63..000000000 --- a/src/lib/lang/token.go +++ /dev/null @@ -1,296 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package token - -// Defines Go tokens and basic token operations. - -import "strconv" - -const ( - // Special tokens - ILLEGAL = iota; - EOF; - COMMENT; - - // Identifiers and basic type literals - // (these tokens stand for classes of literals) - literal_beg; - IDENT; - INT; - FLOAT; - CHAR; - STRING; - literal_end; - - // Operators and delimiters - operator_beg; - ADD; - SUB; - MUL; - QUO; - REM; - - AND; - OR; - XOR; - SHL; - SHR; - - ADD_ASSIGN; - SUB_ASSIGN; - MUL_ASSIGN; - QUO_ASSIGN; - REM_ASSIGN; - - AND_ASSIGN; - OR_ASSIGN; - XOR_ASSIGN; - SHL_ASSIGN; - SHR_ASSIGN; - - LAND; - LOR; - ARROW; - INC; - DEC; - - EQL; - LSS; - GTR; - ASSIGN; - NOT; - - NEQ; - LEQ; - GEQ; - DEFINE; - ELLIPSIS; - - LPAREN; - LBRACK; - LBRACE; - COMMA; - PERIOD; - - RPAREN; - RBRACK; - RBRACE; - SEMICOLON; - COLON; - operator_end; - - // Keywords - keyword_beg; - BREAK; - CASE; - CHAN; - CONST; - CONTINUE; - - DEFAULT; - DEFER; - ELSE; - FALLTHROUGH; - FOR; - - FUNC; - GO; - GOTO; - IF; - IMPORT; - - INTERFACE; - MAP; - PACKAGE; - RANGE; - RETURN; - - SELECT; - STRUCT; - SWITCH; - TYPE; - VAR; - keyword_end; -) - - -// At the moment we have no array literal syntax that lets us describe -// the index for each element - use a map for now to make sure they are -// in sync. -var tokens = map [int] string { - ILLEGAL : "ILLEGAL", - - EOF : "EOF", - COMMENT : "COMMENT", - - IDENT : "IDENT", - INT : "INT", - FLOAT : "FLOAT", - CHAR : "CHAR", - STRING : "STRING", - - ADD : "+", - SUB : "-", - MUL : "*", - QUO : "/", - REM : "%", - - AND : "&", - OR : "|", - XOR : "^", - SHL : "<<", - SHR : ">>", - - ADD_ASSIGN : "+=", - SUB_ASSIGN : "-=", - MUL_ASSIGN : "+=", - QUO_ASSIGN : "/=", - REM_ASSIGN : "%=", - - AND_ASSIGN : "&=", - OR_ASSIGN : "|=", - XOR_ASSIGN : "^=", - SHL_ASSIGN : "<<=", - SHR_ASSIGN : ">>=", - - LAND : "&&", - LOR : "||", - ARROW : "<-", - INC : "++", - DEC : "--", - - EQL : "==", - LSS : "<", - GTR : ">", - ASSIGN : "=", - NOT : "!", - - NEQ : "!=", - LEQ : "<=", - GEQ : ">=", - DEFINE : ":=", - ELLIPSIS : "...", - - LPAREN : "(", - LBRACK : "[", - LBRACE : "{", - COMMA : ",", - PERIOD : ".", - - RPAREN : ")", - RBRACK : "]", - RBRACE : "}", - SEMICOLON : ";", - COLON : ":", - - BREAK : "break", - CASE : "case", - CHAN : "chan", - CONST : "const", - CONTINUE : "continue", - - DEFAULT : "default", - DEFER : "defer", - ELSE : "else", - FALLTHROUGH : "fallthrough", - FOR : "for", - - FUNC : "func", - GO : "go", - GOTO : "goto", - IF : "if", - IMPORT : "import", - - INTERFACE : "interface", - MAP : "map", - PACKAGE : "package", - RANGE : "range", - RETURN : "return", - - SELECT : "select", - STRUCT : "struct", - SWITCH : "switch", - TYPE : "type", - VAR : "var", -} - -func TokenString(tok int) string { - if str, exists := tokens[tok]; exists { - return str; - } - return "token(" + strconv.Itoa(tok) + ")"; -} - - -// A set of constants for precedence-based expression parsing. -// Non-operators have lowest precedence, followed by operators -// starting with precedence 0 up to unary operators and finally -// the highest precedence used for tokens used in selectors, etc. - -const ( - LowestPrec = -1; // non-operators - UnaryPrec = 7; - HighestPrec = 8; -) - -// Returns precedence of a token. Returns LowestPrec -// if the token is not an operator. -func Precedence(tok int) int { - switch tok { - case COLON: - return 0; - case LOR: - return 1; - case LAND: - return 2; - case ARROW: - return 3; - case EQL, NEQ, LSS, LEQ, GTR, GEQ: - return 4; - case ADD, SUB, OR, XOR: - return 5; - case MUL, QUO, REM, SHL, SHR, AND: - return 6; - } - return LowestPrec; -} - - -var keywords map [string] int; - -func init() { - keywords = make(map [string] int); - for i := keyword_beg + 1; i < keyword_end; i++ { - keywords[tokens[i]] = i; - } -} - - -// Map an identifier to its keyword token or IDENT (if not a keyword). -func Lookup(ident []byte) int { - // TODO Maps with []byte key are illegal because []byte does not - // support == . Should find a more efficient solution eventually. - if tok, is_keyword := keywords[string(ident)]; is_keyword { - return tok; - } - return IDENT; -} - - -// Predicates - -// Identifiers and basic type literals -func IsLiteral(tok int) bool { - return literal_beg < tok && tok < literal_end; -} - -// Operators and delimiters -func IsOperator(tok int) bool { - return operator_beg < tok && tok < operator_end; -} - -func IsKeyword(tok int) bool { - return keyword_beg < tok && tok < keyword_end; -} diff --git a/src/run.bash b/src/run.bash index a2fffebf6..73b2ef83f 100755 --- a/src/run.bash +++ b/src/run.bash @@ -25,10 +25,10 @@ maketest() { maketest \ lib/fmt\ + lib/go\ lib/hash\ lib/io\ lib/json\ - lib/lang\ lib/math\ lib/net\ lib/os\ -- cgit v1.2.3