diff options
Diffstat (limited to 'usr/gri/src/scanner.go')
-rw-r--r-- | usr/gri/src/scanner.go | 777 |
1 files changed, 0 insertions, 777 deletions
diff --git a/usr/gri/src/scanner.go b/usr/gri/src/scanner.go deleted file mode 100644 index 94d8f1915..000000000 --- a/usr/gri/src/scanner.go +++ /dev/null @@ -1,777 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package Scanner - -export - ILLEGAL, EOF, IDENT, STRING, NUMBER, - COMMA, COLON, SEMICOLON, PERIOD, - LPAREN, RPAREN, LBRACK, RBRACK, LBRACE, RBRACE, - ASSIGN, DEFINE, - INC, DEC, NOT, - AND, OR, XOR, - ADD, SUB, MUL, QUO, REM, - EQL, NEQ, LSS, LEQ, GTR, GEQ, - SHL, SHR, - ADD_ASSIGN, SUB_ASSIGN, MUL_ASSIGN, QUO_ASSIGN, REM_ASSIGN, - AND_ASSIGN, OR_ASSIGN, XOR_ASSIGN, SHL_ASSIGN, SHR_ASSIGN, - LAND, LOR, - BREAK, CASE, CHAN, CONST, CONTINUE, DEFAULT, ELSE, EXPORT, FALLTHROUGH, FALSE, - FOR, FUNC, GO, GOTO, IF, IMPORT, INTERFACE, IOTA, MAP, NEW, NIL, PACKAGE, RANGE, - RETURN, SELECT, STRUCT, SWITCH, TRUE, TYPE, VAR - - -const ( - ILLEGAL = iota; - EOF; - IDENT; - STRING; - NUMBER; - - COMMA; - COLON; - SEMICOLON; - PERIOD; - - LPAREN; - RPAREN; - LBRACK; - RBRACK; - LBRACE; - RBRACE; - - ASSIGN; - DEFINE; - - INC; - DEC; - NOT; - - AND; - OR; - XOR; - - ADD; - SUB; - MUL; - QUO; - REM; - - EQL; - NEQ; - LSS; - LEQ; - GTR; - GEQ; - - SHL; - SHR; - - ADD_ASSIGN; - SUB_ASSIGN; - MUL_ASSIGN; - QUO_ASSIGN; - REM_ASSIGN; - - AND_ASSIGN; - OR_ASSIGN; - XOR_ASSIGN; - - SHL_ASSIGN; - SHR_ASSIGN; - - LAND; - LOR; - - // keywords - KEYWORDS_BEG; - BREAK; - CASE; - CHAN; - CONST; - CONTINUE; - DEFAULT; - ELSE; - EXPORT; - FALLTHROUGH; - FALSE; - FOR; - FUNC; - GO; - GOTO; - IF; - IMPORT; - INTERFACE; - IOTA; - MAP; - NEW; - NIL; - PACKAGE; - RANGE; - RETURN; - SELECT; - STRUCT; - SWITCH; - TRUE; - TYPE; - VAR; - KEYWORDS_END; -) - - -var Keywords *map [string] int; - - -export TokenName -func TokenName(tok int) string { - switch (tok) { - case ILLEGAL: return "illegal"; - case EOF: return "eof"; - case IDENT: return "ident"; - case STRING: return "string"; - case NUMBER: return "number"; - - case COMMA: return ","; - case COLON: return ":"; - case SEMICOLON: return ";"; - case PERIOD: return "."; - - case LPAREN: return "("; - case RPAREN: return ")"; - case LBRACK: return "["; - case RBRACK: return "]"; - case LBRACE: return "LBRACE"; - case RBRACE: return "RBRACE"; - - case ASSIGN: return "="; - case DEFINE: return ":="; - - case INC: return "++"; - case DEC: return "--"; - case NOT: return "!"; - - case AND: return "&"; - case OR: return "|"; - case XOR: return "^"; - - case ADD: return "+"; - case SUB: return "-"; - case MUL: return "*"; - case QUO: return "/"; - case REM: return "%"; - - case EQL: return "=="; - case NEQ: return "!="; - case LSS: return "<"; - case LEQ: return "<="; - case GTR: return ">"; - case GEQ: return ">="; - - case SHL: return "<<"; - case SHR: return ">>"; - - case ADD_ASSIGN: return "+="; - case SUB_ASSIGN: return "-="; - case MUL_ASSIGN: return "+="; - case QUO_ASSIGN: return "/="; - case REM_ASSIGN: return "%="; - - case AND_ASSIGN: return "&="; - case OR_ASSIGN: return "|="; - case XOR_ASSIGN: return "^="; - - case SHL_ASSIGN: return "<<="; - case SHR_ASSIGN: return ">>="; - - case LAND: return "&&"; - case LOR: return "||"; - - case BREAK: return "break"; - case CASE: return "case"; - case CHAN: return "chan"; - case CONST: return "const"; - case CONTINUE: return "continue"; - case DEFAULT: return "default"; - case ELSE: return "else"; - case EXPORT: return "export"; - case FALLTHROUGH: return "fallthrough"; - case FALSE: return "false"; - case FOR: return "for"; - case FUNC: return "func"; - case GO: return "go"; - case GOTO: return "goto"; - case IF: return "if"; - case IMPORT: return "import"; - case INTERFACE: return "interface"; - case IOTA: return "iota"; - case MAP: return "map"; - case NEW: return "new"; - case NIL: return "nil"; - case PACKAGE: return "package"; - case RANGE: return "range"; - case RETURN: return "return"; - case SELECT: return "select"; - case STRUCT: return "struct"; - case SWITCH: return "switch"; - case TRUE: return "true"; - case TYPE: return "type"; - case VAR: return "var"; - } - - return "???"; -} - - -func is_whitespace (ch int) bool { - return ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t'; -} - - -func is_letter (ch int) bool { - return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 128 ; -} - - -func digit_val (ch int) int { - if '0' <= ch && ch <= '9' { - return ch - '0'; - } - if 'a' <= ch && ch <= 'f' { - return ch - 'a' + 10; - } - if 'A' <= ch && ch <= 'F' { - return ch - 'A' + 10; - } - return 16; // larger than any legal digit val -} - - -export Scanner -type Scanner struct { - filename string; // error reporting only - nerrors int; // number of errors - errpos int; // last error position - - src string; - pos int; // current reading position - ch int; // one char look-ahead - chpos int; // position of ch -} - - -// Read the next Unicode char into S.ch. -// S.ch < 0 means end-of-file. -// -func (S *Scanner) Next () { - const ( - Bit1 = 7; - Bitx = 6; - Bit2 = 5; - Bit3 = 4; - Bit4 = 3; - - // TODO 6g constant evaluation incomplete - T1 = 0x00; // (1 << (Bit1 + 1) - 1) ^ 0xFF; // 0000 0000 - Tx = 0x80; // (1 << (Bitx + 1) - 1) ^ 0xFF; // 1000 0000 - T2 = 0xC0; // (1 << (Bit2 + 1) - 1) ^ 0xFF; // 1100 0000 - T3 = 0xE0; // (1 << (Bit3 + 1) - 1) ^ 0xFF; // 1110 0000 - T4 = 0xF0; // (1 << (Bit4 + 1) - 1) ^ 0xFF; // 1111 0000 - - Rune1 = 1 << (Bit1 + 0*Bitx) - 1; // 0000 0000 0111 1111 - Rune2 = 1 << (Bit2 + 1*Bitx) - 1; // 0000 0111 1111 1111 - Rune3 = 1 << (Bit3 + 2*Bitx) - 1; // 1111 1111 1111 1111 - - Maskx = 0x3F; // 1 << Bitx - 1; // 0011 1111 - Testx = 0xC0; // Maskx ^ 0xFF; // 1100 0000 - - Bad = 0xFFFD; // Runeerror - ); - - src := S.src; // TODO only needed because of 6g bug - lim := len(src); - pos := S.pos; - - // 1-byte sequence - // 0000-007F => T1 - if pos >= lim { - S.ch = -1; // end of file - S.chpos = lim; - return; - } - c0 := int(src[pos]); - pos++; - if c0 < Tx { - S.ch = c0; - S.chpos = S.pos; - S.pos = pos; - return; - } - - // 2-byte sequence - // 0080-07FF => T2 Tx - if pos >= lim { - goto bad; - } - c1 := int(src[pos]) ^ Tx; - pos++; - if c1 & Testx != 0 { - goto bad; - } - if c0 < T3 { - if c0 < T2 { - goto bad; - } - r := (c0 << Bitx | c1) & Rune2; - if r <= Rune1 { - goto bad; - } - S.ch = r; - S.chpos = S.pos; - S.pos = pos; - return; - } - - // 3-byte sequence - // 0800-FFFF => T3 Tx Tx - if pos >= lim { - goto bad; - } - c2 := int(src[pos]) ^ Tx; - pos++; - if c2 & Testx != 0 { - goto bad; - } - if c0 < T4 { - r := (((c0 << Bitx | c1) << Bitx) | c2) & Rune3; - if r <= Rune2 { - goto bad; - } - S.ch = r; - S.chpos = S.pos; - S.pos = pos; - return; - } - - // bad encoding -bad: - S.ch = Bad; - S.chpos = S.pos; - S.pos += 1; - return; -} - - -func Init () { - Keywords = new(map [string] int); - - for i := KEYWORDS_BEG; i <= KEYWORDS_END; i++ { - Keywords[TokenName(i)] = i; - } -} - - -// Compute (line, column) information for a given source position. -func (S *Scanner) LineCol(pos int) (line, col int) { - line = 1; - lpos := 0; - - src := S.src; - if pos > len(src) { - pos = len(src); - } - - for i := 0; i < pos; i++ { - if src[i] == '\n' { - line++; - lpos = i; - } - } - - return line, pos - lpos; -} - - -func (S *Scanner) Error(pos int, msg string) { - const errdist = 10; - if pos > S.errpos + errdist || S.nerrors == 0 { - line, col := S.LineCol(pos); - print S.filename, ":", line, ":", col, ": ", msg, "\n"; - S.nerrors++; - S.errpos = pos; - } -} - - -func (S *Scanner) Open (filename, src string) { - if Keywords == nil { - Init(); - } - - S.filename = filename; - S.nerrors = 0; - S.errpos = 0; - - S.src = src; - S.pos = 0; - S.Next(); -} - - -// TODO this needs to go elsewhere -func IntString(x, base int) string { - neg := false; - if x < 0 { - x = -x; - if x < 0 { - panic "smallest int not handled"; - } - neg = true; - } - - hex := "0123456789ABCDEF"; - var buf [16] byte; - i := 0; - for x > 0 || i == 0 { - buf[i] = hex[x % base]; - x /= base; - i++; - } - - s := ""; - if neg { - s = "-"; - } - for i > 0 { - i--; - s = s + string(int(buf[i])); - } - return s; -} - - -func CharString(ch int) string { - s := string(ch); - switch ch { - case '\a': s = `\a`; - case '\b': s = `\b`; - case '\f': s = `\f`; - case '\n': s = `\n`; - case '\r': s = `\r`; - case '\t': s = `\t`; - case '\v': s = `\v`; - case '\\': s = `\\`; - case '\'': s = `\'`; - } - return "'" + s + "' (U+" + IntString(ch, 16) + ")"; -} - - -func (S *Scanner) Expect (ch int) { - if S.ch != ch { - S.Error(S.chpos, "expected " + CharString(ch) + ", found " + CharString(S.ch)); - } - S.Next(); // make always progress -} - - -func (S *Scanner) SkipWhitespace () { - for is_whitespace(S.ch) { - S.Next(); - } -} - - -func (S *Scanner) SkipComment () { - // '/' already consumed - if S.ch == '/' { - // comment - S.Next(); - for S.ch != '\n' && S.ch >= 0 { - S.Next(); - } - - } else { - /* comment */ - pos := S.chpos - 1; - S.Expect('*'); - for S.ch >= 0 { - ch := S.ch; - S.Next(); - if ch == '*' && S.ch == '/' { - S.Next(); - return; - } - } - S.Error(pos, "comment not terminated"); - } -} - - -func (S *Scanner) ScanIdentifier () int { - beg := S.pos - 1; - for is_letter(S.ch) || digit_val(S.ch) < 10 { - S.Next(); - } - end := S.pos - 1; - - var tok int; - var present bool; - tok, present = Keywords[S.src[beg : end]]; - if !present { - tok = IDENT; - } - - return tok; -} - - -func (S *Scanner) ScanMantissa (base int) { - for digit_val(S.ch) < base { - S.Next(); - } -} - - -func (S *Scanner) ScanNumber (seen_decimal_point bool) int { - if seen_decimal_point { - S.ScanMantissa(10); - goto exponent; - } - - if S.ch == '0' { - // int or float - S.Next(); - if S.ch == 'x' || S.ch == 'X' { - // hexadecimal int - S.Next(); - S.ScanMantissa(16); - } else { - // octal int or float - S.ScanMantissa(8); - if digit_val(S.ch) < 10 || S.ch == '.' || S.ch == 'e' || S.ch == 'E' { - // float - goto mantissa; - } - // octal int - } - return NUMBER; - } - -mantissa: - // decimal int or float - S.ScanMantissa(10); - - if S.ch == '.' { - // float - S.Next(); - S.ScanMantissa(10) - } - -exponent: - if S.ch == 'e' || S.ch == 'E' { - // float - S.Next(); - if S.ch == '-' || S.ch == '+' { - S.Next(); - } - S.ScanMantissa(10); - } - return NUMBER; -} - - -func (S *Scanner) ScanDigits(n int, base int) { - for digit_val(S.ch) < base { - S.Next(); - n--; - } - if n > 0 { - S.Error(S.chpos, "illegal char escape"); - } -} - - -func (S *Scanner) ScanEscape () string { - // TODO: fix this routine - - ch := S.ch; - pos := S.chpos; - S.Next(); - switch (ch) { - case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '\'', '"': - return string(ch); - - case '0', '1', '2', '3', '4', '5', '6', '7': - S.ScanDigits(3 - 1, 8); // 1 char already read - return ""; // TODO fix this - - case 'x': - S.ScanDigits(2, 16); - return ""; // TODO fix this - - case 'u': - S.ScanDigits(4, 16); - return ""; // TODO fix this - - case 'U': - S.ScanDigits(8, 16); - return ""; // TODO fix this - - default: - S.Error(pos, "illegal char escape"); - } -} - - -func (S *Scanner) ScanChar () int { - // '\'' already consumed - - ch := S.ch; - S.Next(); - if ch == '\\' { - S.ScanEscape(); - } - - S.Expect('\''); - return NUMBER; -} - - -func (S *Scanner) ScanString () int { - // '"' already consumed - - pos := S.chpos - 1; - for S.ch != '"' { - ch := S.ch; - S.Next(); - if ch == '\n' || ch < 0 { - S.Error(pos, "string not terminated"); - break; - } - if ch == '\\' { - S.ScanEscape(); - } - } - - S.Next(); - return STRING; -} - - -func (S *Scanner) ScanRawString () int { - // '`' already consumed - - pos := S.chpos - 1; - for S.ch != '`' { - ch := S.ch; - S.Next(); - if ch == '\n' || ch < 0 { - S.Error(pos, "string not terminated"); - break; - } - } - - S.Next(); - return STRING; -} - - -func (S *Scanner) Select2 (tok0, tok1 int) int { - if S.ch == '=' { - S.Next(); - return tok1; - } - return tok0; -} - - -func (S *Scanner) Select3 (tok0, tok1, ch2, tok2 int) int { - if S.ch == '=' { - S.Next(); - return tok1; - } - if S.ch == ch2 { - S.Next(); - return tok2; - } - return tok0; -} - - -func (S *Scanner) Select4 (tok0, tok1, ch2, tok2, tok3 int) int { - if S.ch == '=' { - S.Next(); - return tok1; - } - if S.ch == ch2 { - S.Next(); - if S.ch == '=' { - S.Next(); - return tok3; - } - return tok2; - } - return tok0; -} - - -func (S *Scanner) Scan () (tok, beg, end int) { - S.SkipWhitespace(); - - ch := S.ch; - tok = ILLEGAL; - beg = S.chpos; - - switch { - case is_letter(ch): tok = S.ScanIdentifier(); - case digit_val(ch) < 10: tok = S.ScanNumber(false); - default: - S.Next(); // always make progress - switch ch { - case -1: tok = EOF; - case '"': tok = S.ScanString(); - case '\'': tok = S.ScanChar(); - case '`': tok = S.ScanRawString(); - case ':': tok = S.Select2(COLON, DEFINE); - case '.': - if digit_val(S.ch) < 10 { - tok = S.ScanNumber(true); - } else { - tok = PERIOD; - } - case ',': tok = COMMA; - case ';': tok = SEMICOLON; - case '(': tok = LPAREN; - case ')': tok = RPAREN; - case '[': tok = LBRACK; - case ']': tok = RBRACK; - case '{': tok = LBRACE; - case '}': tok = RBRACE; - case '+': tok = S.Select3(ADD, ADD_ASSIGN, '+', INC); - case '-': tok = S.Select3(SUB, SUB_ASSIGN, '-', DEC); - case '*': tok = S.Select2(MUL, MUL_ASSIGN); - case '/': - if S.ch == '/' || S.ch == '*' { - S.SkipComment(); - // cannot simply return because of 6g bug - tok, beg, end = S.Scan(); - return tok, beg, end; - } - tok = S.Select2(QUO, QUO_ASSIGN); - case '%': tok = S.Select2(REM, REM_ASSIGN); - case '^': tok = S.Select2(XOR, XOR_ASSIGN); - case '<': tok = S.Select4(LSS, LEQ, '<', SHL, SHL_ASSIGN); - case '>': tok = S.Select4(GTR, GEQ, '>', SHR, SHR_ASSIGN); - case '=': tok = S.Select2(ASSIGN, EQL); - case '!': tok = S.Select2(NOT, NEQ); - case '&': tok = S.Select3(AND, AND_ASSIGN, '&', LAND); - case '|': tok = S.Select3(OR, OR_ASSIGN, '|', LOR); - default: - S.Error(beg, "illegal character " + CharString(ch)); - tok = ILLEGAL; - } - } - - return tok, beg, S.chpos; -} |