implemented InsertSemis mode for go/scanner

R=rsc http://codereview.appspot.com/175047
author: Robert Griesemer <gri@golang.org> 2009-12-10 15:31:02 -0800
committer: Robert Griesemer <gri@golang.org> 2009-12-10 15:31:02 -0800
commit: 28ebedc0d488ac0a8db3d1ef33e646653bdd153a (patch)
tree: 0ebeea1759152ab73bfeddcd8d7bba0fda3ee167 /src/pkg/go/scanner/scanner.go
parent: 1bd24d1205268dac1da5dfa4ee83adc126de1161 (diff)
download: golang-28ebedc0d488ac0a8db3d1ef33e646653bdd153a.tar.gz
1 files changed, 79 insertions, 17 deletions
diff --git a/src/pkg/go/scanner/scanner.go b/src/pkg/go/scanner/scanner.go
index 177fe0f19..386cdb0e9 100644
--- a/src/pkg/go/scanner/scanner.go
+++ b/src/pkg/go/scanner/scanner.go
@@ -29,9 +29,11 @@ type Scanner struct {
 	mode	uint;		// scanning mode
 
 	// scanning state
-	pos	token.Position;	// previous reading position (position before ch)
-	offset	int;		// current reading offset (position after ch)
-	ch	int;		// one char look-ahead
+	pos		token.Position;	// previous reading position (position before ch)
+	offset		int;		// current reading offset (position after ch)
+	ch		int;		// one char look-ahead
+	insertSemi	bool;		// insert a semicolon before next newline
+	pendingComment	token.Position;	// valid if pendingComment.Line > 0
 
 	// public state - ok to modify
 	ErrorCount	int;	// number of errors encountered
@@ -69,6 +71,7 @@ func (S *Scanner) next() {
 const (
 	ScanComments		= 1 << iota;	// return comments as COMMENT tokens
 	AllowIllegalChars;	// do not report an error for illegal chars
+	InsertSemis;		// automatically insert semicolons
 )
 
 
@@ -420,6 +423,8 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Toke
 }
 
 
+var semicolon = []byte{';'}
+
 // Scan scans the next token and returns the token position pos,
 // the token tok, and the literal text lit corresponding to the
 // token. The source end is indicated by token.EOF.
@@ -432,40 +437,63 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Toke
 // of the error handler, if there was one installed.
 //
 func (S *Scanner) Scan() (pos token.Position, tok token.Token, lit []byte) {
-scan_again:
+	if S.pendingComment.Line > 0 {
+		// "consume" pending comment
+		S.pos = S.pendingComment;
+		S.offset = S.pos.Offset + 1;
+		S.ch = '/';
+		S.pendingComment.Line = 0;
+	}
+
+scanAgain:
 	// skip white space
-	for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' || S.ch == '\r' {
+	for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' && !S.insertSemi || S.ch == '\r' {
 		S.next()
 	}
 
 	// current token start
+	insertSemi := false;
 	pos, tok = S.pos, token.ILLEGAL;
 
 	// determine token value
 	switch ch := S.ch; {
 	case isLetter(ch):
-		tok = S.scanIdentifier()
+		tok = S.scanIdentifier();
+		switch tok {
+		case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN:
+			insertSemi = true
+		default:
+			insertSemi = false
+		}
 	case digitVal(ch) < 10:
-		tok = S.scanNumber(false)
+		insertSemi = true;
+		tok = S.scanNumber(false);
 	default:
 		S.next();	// always make progress
 		switch ch {
 		case -1:
 			tok = token.EOF
+		case '\n':
+			S.insertSemi = false;
+			return pos, token.SEMICOLON, semicolon;
 		case '"':
+			insertSemi = true;
 			tok = token.STRING;
 			S.scanString(pos);
 		case '\'':
+			insertSemi = true;
 			tok = token.CHAR;
 			S.scanChar(pos);
 		case '`':
+			insertSemi = true;
 			tok = token.STRING;
 			S.scanRawString(pos);
 		case ':':
 			tok = S.switch2(token.COLON, token.DEFINE)
 		case '.':
 			if digitVal(S.ch) < 10 {
-				tok = S.scanNumber(true)
+				insertSemi = true;
+				tok = S.scanNumber(true);
 			} else if S.ch == '.' {
 				S.next();
 				if S.ch == '.' {
@@ -482,27 +510,57 @@ scan_again:
 		case '(':
 			tok = token.LPAREN
 		case ')':
-			tok = token.RPAREN
+			insertSemi = true;
+			tok = token.RPAREN;
 		case '[':
 			tok = token.LBRACK
 		case ']':
-			tok = token.RBRACK
+			insertSemi = true;
+			tok = token.RBRACK;
 		case '{':
 			tok = token.LBRACE
 		case '}':
-			tok = token.RBRACE
+			insertSemi = true;
+			tok = token.RBRACE;
 		case '+':
-			tok = S.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC)
+			tok = S.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC);
+			if tok == token.INC {
+				insertSemi = true
+			}
 		case '-':
-			tok = S.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC)
+			tok = S.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC);
+			if tok == token.DEC {
+				insertSemi = true
+			}
 		case '*':
 			tok = S.switch2(token.MUL, token.MUL_ASSIGN)
 		case '/':
 			if S.ch == '/' || S.ch == '*' {
-				S.scanComment(pos);
-				tok = token.COMMENT;
-				if S.mode&ScanComments == 0 {
-					goto scan_again
+				// comment
+				newline := false;
+				if S.insertSemi {
+					if S.ch == '/' {
+						// a line comment acts like a newline
+						newline = true
+					} else {
+						// a general comment may act like a newline
+						S.scanComment(pos);
+						newline = pos.Line < S.pos.Line;
+					}
+				} else {
+					S.scanComment(pos)
+				}
+				if newline {
+					// insert a semicolon and retain pending comment
+					S.insertSemi = false;
+					S.pendingComment = pos;
+					return pos, token.SEMICOLON, semicolon;
+				} else if S.mode&ScanComments == 0 {
+					// skip comment
+					goto scanAgain
+				} else {
+					insertSemi = S.insertSemi;	// preserve insertSemi info
+					tok = token.COMMENT;
 				}
 			} else {
 				tok = S.switch2(token.QUO, token.QUO_ASSIGN)
@@ -537,9 +595,13 @@ scan_again:
 			if S.mode&AllowIllegalChars == 0 {
 				S.error(pos, "illegal character "+charString(ch))
 			}
+			insertSemi = S.insertSemi;	// preserve insertSemi info
 		}
 	}
 
+	if S.mode&InsertSemis != 0 {
+		S.insertSemi = insertSemi
+	}
 	return pos, tok, S.src[pos.Offset:S.pos.Offset];
 }
author	Robert Griesemer <gri@golang.org>	2009-12-10 15:31:02 -0800
committer	Robert Griesemer <gri@golang.org>	2009-12-10 15:31:02 -0800
commit	28ebedc0d488ac0a8db3d1ef33e646653bdd153a (patch)
tree	0ebeea1759152ab73bfeddcd8d7bba0fda3ee167 /src/pkg/go/scanner/scanner.go
parent	1bd24d1205268dac1da5dfa4ee83adc126de1161 (diff)
download	golang-28ebedc0d488ac0a8db3d1ef33e646653bdd153a.tar.gz