summaryrefslogtreecommitdiff
path: root/usr/gri/pretty/scanner.go
diff options
context:
space:
mode:
Diffstat (limited to 'usr/gri/pretty/scanner.go')
-rw-r--r--usr/gri/pretty/scanner.go792
1 files changed, 792 insertions, 0 deletions
diff --git a/usr/gri/pretty/scanner.go b/usr/gri/pretty/scanner.go
new file mode 100644
index 000000000..1e2645cb2
--- /dev/null
+++ b/usr/gri/pretty/scanner.go
@@ -0,0 +1,792 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package Scanner
+
+import Platform "platform"
+import Utils "utils"
+
+
+export const (
+ ILLEGAL = iota;
+ EOF;
+ INT;
+ FLOAT;
+ STRING;
+
+ COMMA;
+ COLON;
+ SEMICOLON;
+ PERIOD;
+
+ LPAREN;
+ RPAREN;
+ LBRACK;
+ RBRACK;
+ LBRACE;
+ RBRACE;
+
+ ASSIGN;
+ DEFINE;
+
+ INC;
+ DEC;
+ NOT;
+
+ AND;
+ OR;
+ XOR;
+
+ ADD;
+ SUB;
+ MUL;
+ QUO;
+ REM;
+
+ EQL;
+ NEQ;
+ LSS;
+ LEQ;
+ GTR;
+ GEQ;
+
+ SHL;
+ SHR;
+
+ ARROW;
+ HASH;
+
+ ADD_ASSIGN;
+ SUB_ASSIGN;
+ MUL_ASSIGN;
+ QUO_ASSIGN;
+ REM_ASSIGN;
+
+ AND_ASSIGN;
+ OR_ASSIGN;
+ XOR_ASSIGN;
+
+ SHL_ASSIGN;
+ SHR_ASSIGN;
+
+ LAND;
+ LOR;
+
+ // IDENT must be immediately before keywords
+ IDENT;
+
+ // keywords
+ KEYWORDS_BEG;
+ BREAK;
+ CASE;
+ CHAN;
+ CONST;
+ CONTINUE;
+ DEFAULT;
+ ELSE;
+ EXPORT;
+ FALLTHROUGH;
+ FOR;
+ FUNC;
+ GO;
+ GOTO;
+ IF;
+ IMPORT;
+ INTERFACE;
+ MAP;
+ PACKAGE;
+ RANGE;
+ RETURN;
+ SELECT;
+ STRUCT;
+ SWITCH;
+ TYPE;
+ VAR;
+ KEYWORDS_END;
+)
+
+
+var Keywords *map [string] int;
+var VerboseMsgs bool; // error message customization
+
+
+export func TokenName(tok int) string {
+ switch (tok) {
+ case ILLEGAL: return "illegal";
+ case EOF: return "eof";
+ case INT: return "int";
+ case FLOAT: return "float";
+ case STRING: return "string";
+
+ case COMMA: return ",";
+ case COLON: return ":";
+ case SEMICOLON: return ";";
+ case PERIOD: return ".";
+
+ case LPAREN: return "(";
+ case RPAREN: return ")";
+ case LBRACK: return "[";
+ case RBRACK: return "]";
+ case LBRACE: return "LBRACE";
+ case RBRACE: return "RBRACE";
+
+ case ASSIGN: return "=";
+ case DEFINE: return ":=";
+
+ case INC: return "++";
+ case DEC: return "--";
+ case NOT: return "!";
+
+ case AND: return "&";
+ case OR: return "|";
+ case XOR: return "^";
+
+ case ADD: return "+";
+ case SUB: return "-";
+ case MUL: return "*";
+ case QUO: return "/";
+ case REM: return "%";
+
+ case EQL: return "==";
+ case NEQ: return "!=";
+ case LSS: return "<";
+ case LEQ: return "<=";
+ case GTR: return ">";
+ case GEQ: return ">=";
+
+ case SHL: return "<<";
+ case SHR: return ">>";
+
+ case ARROW: return "<-";
+ case HASH: return "#";
+
+ case ADD_ASSIGN: return "+=";
+ case SUB_ASSIGN: return "-=";
+ case MUL_ASSIGN: return "+=";
+ case QUO_ASSIGN: return "/=";
+ case REM_ASSIGN: return "%=";
+
+ case AND_ASSIGN: return "&=";
+ case OR_ASSIGN: return "|=";
+ case XOR_ASSIGN: return "^=";
+
+ case SHL_ASSIGN: return "<<=";
+ case SHR_ASSIGN: return ">>=";
+
+ case LAND: return "&&";
+ case LOR: return "||";
+
+ case IDENT: return "ident";
+
+ case BREAK: return "break";
+ case CASE: return "case";
+ case CHAN: return "chan";
+ case CONST: return "const";
+ case CONTINUE: return "continue";
+ case DEFAULT: return "default";
+ case ELSE: return "else";
+ case EXPORT: return "export";
+ case FALLTHROUGH: return "fallthrough";
+ case FOR: return "for";
+ case FUNC: return "func";
+ case GO: return "go";
+ case GOTO: return "goto";
+ case IF: return "if";
+ case IMPORT: return "import";
+ case INTERFACE: return "interface";
+ case MAP: return "map";
+ case PACKAGE: return "package";
+ case RANGE: return "range";
+ case RETURN: return "return";
+ case SELECT: return "select";
+ case STRUCT: return "struct";
+ case SWITCH: return "switch";
+ case TYPE: return "type";
+ case VAR: return "var";
+ }
+
+ return "???";
+}
+
+
+func init() {
+ Keywords = new(map [string] int);
+
+ for i := KEYWORDS_BEG; i <= KEYWORDS_END; i++ {
+ Keywords[TokenName(i)] = i;
+ }
+
+ // Provide column information in error messages for gri only...
+ VerboseMsgs = Platform.USER == "gri";
+}
+
+
+func is_whitespace(ch int) bool {
+ return ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t';
+}
+
+
+func is_letter(ch int) bool {
+ return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 128 ;
+}
+
+
+func digit_val(ch int) int {
+ if '0' <= ch && ch <= '9' {
+ return ch - '0';
+ }
+ if 'a' <= ch && ch <= 'f' {
+ return ch - 'a' + 10;
+ }
+ if 'A' <= ch && ch <= 'F' {
+ return ch - 'A' + 10;
+ }
+ return 16; // larger than any legal digit val
+}
+
+
+export type Scanner struct {
+ filename string; // error reporting only
+ nerrors int; // number of errors
+ errpos int; // last error position
+
+ src string; // scanned source
+ pos int; // current reading position
+ ch int; // one char look-ahead
+ chpos int; // position of ch
+}
+
+
+// Read the next Unicode char into S.ch.
+// S.ch < 0 means end-of-file.
+//
+func (S *Scanner) Next() {
+ const (
+ Bit1 = 7;
+ Bitx = 6;
+ Bit2 = 5;
+ Bit3 = 4;
+ Bit4 = 3;
+
+ T1 = (1 << (Bit1 + 1) - 1) ^ 0xFF; // 0000 0000
+ Tx = (1 << (Bitx + 1) - 1) ^ 0xFF; // 1000 0000
+ T2 = (1 << (Bit2 + 1) - 1) ^ 0xFF; // 1100 0000
+ T3 = (1 << (Bit3 + 1) - 1) ^ 0xFF; // 1110 0000
+ T4 = (1 << (Bit4 + 1) - 1) ^ 0xFF; // 1111 0000
+
+ Rune1 = 1 << (Bit1 + 0*Bitx) - 1; // 0000 0000 0111 1111
+ Rune2 = 1 << (Bit2 + 1*Bitx) - 1; // 0000 0111 1111 1111
+ Rune3 = 1 << (Bit3 + 2*Bitx) - 1; // 1111 1111 1111 1111
+
+ Maskx = 0x3F; // 1 << Bitx - 1; // 0011 1111
+ Testx = 0xC0; // Maskx ^ 0xFF; // 1100 0000
+
+ Bad = 0xFFFD; // Runeerror
+ );
+
+ src := S.src;
+ lim := len(src);
+ pos := S.pos;
+
+ // 1-byte sequence
+ // 0000-007F => T1
+ if pos >= lim {
+ S.ch = -1; // end of file
+ S.chpos = lim;
+ return;
+ }
+ c0 := int(src[pos]);
+ pos++;
+ if c0 < Tx {
+ S.ch = c0;
+ S.chpos = S.pos;
+ S.pos = pos;
+ return;
+ }
+
+ // 2-byte sequence
+ // 0080-07FF => T2 Tx
+ if pos >= lim {
+ goto bad;
+ }
+ c1 := int(src[pos]) ^ Tx;
+ pos++;
+ if c1 & Testx != 0 {
+ goto bad;
+ }
+ if c0 < T3 {
+ if c0 < T2 {
+ goto bad;
+ }
+ r := (c0 << Bitx | c1) & Rune2;
+ if r <= Rune1 {
+ goto bad;
+ }
+ S.ch = r;
+ S.chpos = S.pos;
+ S.pos = pos;
+ return;
+ }
+
+ // 3-byte sequence
+ // 0800-FFFF => T3 Tx Tx
+ if pos >= lim {
+ goto bad;
+ }
+ c2 := int(src[pos]) ^ Tx;
+ pos++;
+ if c2 & Testx != 0 {
+ goto bad;
+ }
+ if c0 < T4 {
+ r := (((c0 << Bitx | c1) << Bitx) | c2) & Rune3;
+ if r <= Rune2 {
+ goto bad;
+ }
+ S.ch = r;
+ S.chpos = S.pos;
+ S.pos = pos;
+ return;
+ }
+
+ // bad encoding
+bad:
+ S.ch = Bad;
+ S.chpos = S.pos;
+ S.pos += 1;
+ return;
+}
+
+
+// Compute (line, column) information for a given source position.
+func (S *Scanner) LineCol(pos int) (line, col int) {
+ line = 1;
+ lpos := 0;
+
+ src := S.src;
+ if pos > len(src) {
+ pos = len(src);
+ }
+
+ for i := 0; i < pos; i++ {
+ if src[i] == '\n' {
+ line++;
+ lpos = i;
+ }
+ }
+
+ return line, pos - lpos;
+}
+
+
+func (S *Scanner) Error(pos int, msg string) {
+ const errdist = 10;
+ delta := pos - S.errpos; // may be negative!
+ if delta < 0 {
+ delta = -delta;
+ }
+ if delta > errdist || S.nerrors == 0 /* always report first error */ {
+ print(S.filename);
+ if pos >= 0 {
+ // print position
+ line, col := S.LineCol(pos);
+ if VerboseMsgs {
+ print(":", line, ":", col);
+ } else {
+ print(":", line);
+ }
+ }
+ print(": ", msg, "\n");
+ S.nerrors++;
+ S.errpos = pos;
+ }
+
+ if S.nerrors >= 10 {
+ sys.exit(1);
+ }
+}
+
+
+func (S *Scanner) Open(filename, src string) {
+ S.filename = filename;
+ S.nerrors = 0;
+ S.errpos = 0;
+
+ S.src = src;
+ S.pos = 0;
+ S.Next();
+}
+
+
+func CharString(ch int) string {
+ s := string(ch);
+ switch ch {
+ case '\a': s = `\a`;
+ case '\b': s = `\b`;
+ case '\f': s = `\f`;
+ case '\n': s = `\n`;
+ case '\r': s = `\r`;
+ case '\t': s = `\t`;
+ case '\v': s = `\v`;
+ case '\\': s = `\\`;
+ case '\'': s = `\'`;
+ }
+ return "'" + s + "' (U+" + Utils.IntToString(ch, 16) + ")";
+}
+
+
+func (S *Scanner) Expect(ch int) {
+ if S.ch != ch {
+ S.Error(S.chpos, "expected " + CharString(ch) + ", found " + CharString(S.ch));
+ }
+ S.Next(); // make always progress
+}
+
+
+func (S *Scanner) SkipWhitespace() {
+ for is_whitespace(S.ch) {
+ S.Next();
+ }
+}
+
+
+func (S *Scanner) SkipComment() {
+ // '/' already consumed
+ if S.ch == '/' {
+ // comment
+ S.Next();
+ for S.ch != '\n' && S.ch >= 0 {
+ S.Next();
+ }
+
+ } else {
+ /* comment */
+ pos := S.chpos - 1;
+ S.Expect('*');
+ for S.ch >= 0 {
+ ch := S.ch;
+ S.Next();
+ if ch == '*' && S.ch == '/' {
+ S.Next();
+ return;
+ }
+ }
+ S.Error(pos, "comment not terminated");
+ }
+}
+
+
+func (S *Scanner) ScanIdentifier() (tok int, val string) {
+ pos := S.chpos;
+ for is_letter(S.ch) || digit_val(S.ch) < 10 {
+ S.Next();
+ }
+ val = S.src[pos : S.chpos];
+
+ var present bool;
+ tok, present = Keywords[val];
+ if !present {
+ tok = IDENT;
+ }
+
+ return tok, val;
+}
+
+
+func (S *Scanner) ScanMantissa(base int) {
+ for digit_val(S.ch) < base {
+ S.Next();
+ }
+}
+
+
+func (S *Scanner) ScanNumber(seen_decimal_point bool) (tok int, val string) {
+ pos := S.chpos;
+ tok = INT;
+
+ if seen_decimal_point {
+ tok = FLOAT;
+ pos--; // '.' is one byte
+ S.ScanMantissa(10);
+ goto exponent;
+ }
+
+ if S.ch == '0' {
+ // int or float
+ S.Next();
+ if S.ch == 'x' || S.ch == 'X' {
+ // hexadecimal int
+ S.Next();
+ S.ScanMantissa(16);
+ } else {
+ // octal int or float
+ S.ScanMantissa(8);
+ if digit_val(S.ch) < 10 || S.ch == '.' || S.ch == 'e' || S.ch == 'E' {
+ // float
+ tok = FLOAT;
+ goto mantissa;
+ }
+ // octal int
+ }
+ goto exit;
+ }
+
+mantissa:
+ // decimal int or float
+ S.ScanMantissa(10);
+
+ if S.ch == '.' {
+ // float
+ tok = FLOAT;
+ S.Next();
+ S.ScanMantissa(10)
+ }
+
+exponent:
+ if S.ch == 'e' || S.ch == 'E' {
+ // float
+ tok = FLOAT;
+ S.Next();
+ if S.ch == '-' || S.ch == '+' {
+ S.Next();
+ }
+ S.ScanMantissa(10);
+ }
+
+exit:
+ return tok, S.src[pos : S.chpos];
+}
+
+
+func (S *Scanner) ScanDigits(n int, base int) {
+ for digit_val(S.ch) < base {
+ S.Next();
+ n--;
+ }
+ if n > 0 {
+ S.Error(S.chpos, "illegal char escape");
+ }
+}
+
+
+func (S *Scanner) ScanEscape(quote int) string {
+ // TODO: fix this routine
+
+ ch := S.ch;
+ pos := S.chpos;
+ S.Next();
+ switch (ch) {
+ case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\':
+ return string(ch);
+
+ case '0', '1', '2', '3', '4', '5', '6', '7':
+ S.ScanDigits(3 - 1, 8); // 1 char already read
+ return ""; // TODO fix this
+
+ case 'x':
+ S.ScanDigits(2, 16);
+ return ""; // TODO fix this
+
+ case 'u':
+ S.ScanDigits(4, 16);
+ return ""; // TODO fix this
+
+ case 'U':
+ S.ScanDigits(8, 16);
+ return ""; // TODO fix this
+
+ default:
+ // check for quote outside the switch for better generated code (eventually)
+ if ch == quote {
+ return string(quote);
+ }
+ S.Error(pos, "illegal char escape");
+ }
+
+ return ""; // TODO fix this
+}
+
+
+func (S *Scanner) ScanChar() string {
+ // '\'' already consumed
+
+ pos := S.chpos - 1;
+ ch := S.ch;
+ S.Next();
+ if ch == '\\' {
+ S.ScanEscape('\'');
+ }
+
+ S.Expect('\'');
+ return S.src[pos : S.chpos];
+}
+
+
+func (S *Scanner) ScanString() string {
+ // '"' already consumed
+
+ pos := S.chpos - 1;
+ for S.ch != '"' {
+ ch := S.ch;
+ S.Next();
+ if ch == '\n' || ch < 0 {
+ S.Error(pos, "string not terminated");
+ break;
+ }
+ if ch == '\\' {
+ S.ScanEscape('"');
+ }
+ }
+
+ S.Next();
+ return S.src[pos : S.chpos];
+}
+
+
+func (S *Scanner) ScanRawString() string {
+ // '`' already consumed
+
+ pos := S.chpos - 1;
+ for S.ch != '`' {
+ ch := S.ch;
+ S.Next();
+ if ch == '\n' || ch < 0 {
+ S.Error(pos, "string not terminated");
+ break;
+ }
+ }
+
+ S.Next();
+ return S.src[pos : S.chpos];
+}
+
+
+func (S *Scanner) Select2(tok0, tok1 int) int {
+ if S.ch == '=' {
+ S.Next();
+ return tok1;
+ }
+ return tok0;
+}
+
+
+func (S *Scanner) Select3(tok0, tok1, ch2, tok2 int) int {
+ if S.ch == '=' {
+ S.Next();
+ return tok1;
+ }
+ if S.ch == ch2 {
+ S.Next();
+ return tok2;
+ }
+ return tok0;
+}
+
+
+func (S *Scanner) Select4(tok0, tok1, ch2, tok2, tok3 int) int {
+ if S.ch == '=' {
+ S.Next();
+ return tok1;
+ }
+ if S.ch == ch2 {
+ S.Next();
+ if S.ch == '=' {
+ S.Next();
+ return tok3;
+ }
+ return tok2;
+ }
+ return tok0;
+}
+
+
+func (S *Scanner) Scan() (tok, pos int, val string) {
+ S.SkipWhitespace();
+
+ ch := S.ch;
+ tok = ILLEGAL;
+ pos = S.chpos;
+
+ switch {
+ case is_letter(ch): tok, val = S.ScanIdentifier();
+ case digit_val(ch) < 10: tok, val = S.ScanNumber(false);
+ default:
+ S.Next(); // always make progress
+ switch ch {
+ case -1: tok = EOF;
+ case '"': tok, val = STRING, S.ScanString();
+ case '\'': tok, val = INT, S.ScanChar();
+ case '`': tok, val = STRING, S.ScanRawString();
+ case ':': tok = S.Select2(COLON, DEFINE);
+ case '.':
+ if digit_val(S.ch) < 10 {
+ tok, val = S.ScanNumber(true);
+ } else {
+ tok = PERIOD;
+ }
+ case ',': tok = COMMA;
+ case ';': tok = SEMICOLON;
+ case '(': tok = LPAREN;
+ case ')': tok = RPAREN;
+ case '[': tok = LBRACK;
+ case ']': tok = RBRACK;
+ case '{': tok = LBRACE;
+ case '}': tok = RBRACE;
+ case '+': tok = S.Select3(ADD, ADD_ASSIGN, '+', INC);
+ case '-': tok = S.Select3(SUB, SUB_ASSIGN, '-', DEC);
+ case '*': tok = S.Select2(MUL, MUL_ASSIGN);
+ case '/':
+ if S.ch == '/' || S.ch == '*' {
+ S.SkipComment();
+ // cannot simply return because of 6g bug
+ tok, pos, val = S.Scan();
+ return tok, pos, val;
+ }
+ tok = S.Select2(QUO, QUO_ASSIGN);
+ case '%': tok = S.Select2(REM, REM_ASSIGN);
+ case '^': tok = S.Select2(XOR, XOR_ASSIGN);
+ case '<':
+ if S.ch == '-' {
+ S.Next();
+ tok = ARROW;
+ } else {
+ tok = S.Select4(LSS, LEQ, '<', SHL, SHL_ASSIGN);
+ }
+ case '>': tok = S.Select4(GTR, GEQ, '>', SHR, SHR_ASSIGN);
+ case '=': tok = S.Select2(ASSIGN, EQL);
+ case '!': tok = S.Select2(NOT, NEQ);
+ case '&': tok = S.Select3(AND, AND_ASSIGN, '&', LAND);
+ case '|': tok = S.Select3(OR, OR_ASSIGN, '|', LOR);
+ case '#': tok = HASH;
+ default:
+ S.Error(pos, "illegal character " + CharString(ch));
+ tok = ILLEGAL;
+ }
+ }
+
+ return tok, pos, val;
+}
+
+
+export type Token struct {
+ pos int;
+ tok int;
+ val string;
+}
+
+
+func (S *Scanner) TokenStream() *<-chan *Token {
+ ch := new(chan *Token);
+ go func(S *Scanner, ch *chan <- *Token) {
+ for {
+ t := new(Token);
+ t.tok, t.pos, t.val = S.Scan();
+ ch <- t;
+ if t.tok == EOF {
+ break;
+ }
+ }
+ }(S, ch);
+ return ch;
+}