3 files changed, 164 insertions, 111 deletions
diff --git a/src/pkg/go/scanner/errors.go b/src/pkg/go/scanner/errors.go
index a0927e416..cd9620b87 100644
--- a/src/pkg/go/scanner/errors.go
+++ b/src/pkg/go/scanner/errors.go
@@ -8,7 +8,6 @@ import (
 	"fmt"
 	"go/token"
 	"io"
-	"os"
 	"sort"
 )
 
@@ -49,7 +48,7 @@ type Error struct {
 	Msg string
 }
 
-func (e *Error) String() string {
+func (e *Error) Error() string {
 	if e.Pos.Filename != "" || e.Pos.IsValid() {
 		// don't print "<unknown position>"
 		// TODO(gri) reconsider the semantics of Position.IsValid
@@ -85,14 +84,14 @@ func (p ErrorList) Less(i, j int) bool {
 	return false
 }
 
-func (p ErrorList) String() string {
+func (p ErrorList) Error() string {
 	switch len(p) {
 	case 0:
 		return "unspecified error"
 	case 1:
-		return p[0].String()
+		return p[0].Error()
 	}
-	return fmt.Sprintf("%s (and %d more errors)", p[0].String(), len(p)-1)
+	return fmt.Sprintf("%s (and %d more errors)", p[0], len(p)-1)
 }
 
 // These constants control the construction of the ErrorList
@@ -136,11 +135,11 @@ func (h *ErrorVector) GetErrorList(mode int) ErrorList {
 	return list
 }
 
-// GetError is like GetErrorList, but it returns an os.Error instead
-// so that a nil result can be assigned to an os.Error variable and
+// GetError is like GetErrorList, but it returns an error instead
+// so that a nil result can be assigned to an error variable and
 // remains nil.
 //
-func (h *ErrorVector) GetError(mode int) os.Error {
+func (h *ErrorVector) GetError(mode int) error {
 	if len(h.errors) == 0 {
 		return nil
 	}
@@ -157,7 +156,7 @@ func (h *ErrorVector) Error(pos token.Position, msg string) {
 // one error per line, if the err parameter is an ErrorList. Otherwise
 // it prints the err string.
 //
-func PrintError(w io.Writer, err os.Error) {
+func PrintError(w io.Writer, err error) {
 	if list, ok := err.(ErrorList); ok {
 		for _, e := range list {
 			fmt.Fprintf(w, "%s\n", e)
diff --git a/src/pkg/go/scanner/scanner.go b/src/pkg/go/scanner/scanner.go
index 7f3dd2373..7c72c0a46 100644
--- a/src/pkg/go/scanner/scanner.go
+++ b/src/pkg/go/scanner/scanner.go
@@ -6,7 +6,7 @@
 // source which can then be tokenized through repeated calls to the Scan
 // function. Typical use:
 //
-//	var s Scanner
+//	var s scanner.Scanner
 //	fset := token.NewFileSet()  // position information is relative to fset
 //      file := fset.AddFile(filename, fset.Base(), len(src))  // register file
 //	s.Init(file, src, nil /* no error handler */, 0)
@@ -27,7 +27,7 @@ import (
 	"path/filepath"
 	"strconv"
 	"unicode"
-	"utf8"
+	"unicode/utf8"
 )
 
 // A Scanner holds the scanner's internal state while processing
@@ -40,10 +40,10 @@ type Scanner struct {
 	dir  string       // directory portion of file.Name()
 	src  []byte       // source
 	err  ErrorHandler // error reporting; or nil
-	mode uint         // scanning mode
+	mode Mode         // scanning mode
 
 	// scanning state
-	ch         int  // current character
+	ch         rune // current character
 	offset     int  // character offset
 	rdOffset   int  // reading offset (position after current character)
 	lineOffset int  // current line offset
@@ -63,7 +63,7 @@ func (S *Scanner) next() {
 			S.lineOffset = S.offset
 			S.file.AddLine(S.offset)
 		}
-		r, w := int(S.src[S.rdOffset]), 1
+		r, w := rune(S.src[S.rdOffset]), 1
 		switch {
 		case r == 0:
 			S.error(S.offset, "illegal character NUL")
@@ -86,13 +86,14 @@ func (S *Scanner) next() {
 	}
 }
 
-// The mode parameter to the Init function is a set of flags (or 0).
+// A mode value is set of flags (or 0).
 // They control scanner behavior.
 //
+type Mode uint
+
 const (
-	ScanComments      = 1 << iota // return comments as COMMENT tokens
-	AllowIllegalChars             // do not report an error for illegal chars
-	InsertSemis                   // automatically insert semicolons
+	ScanComments    Mode = 1 << iota // return comments as COMMENT tokens
+	dontInsertSemis                  // do not automatically insert semicolons - for testing only
 )
 
 // Init prepares the scanner S to tokenize the text src by setting the
@@ -105,12 +106,12 @@ const (
 // Calls to Scan will use the error handler err if they encounter a
 // syntax error and err is not nil. Also, for each error encountered,
 // the Scanner field ErrorCount is incremented by one. The mode parameter
-// determines how comments, illegal characters, and semicolons are handled.
+// determines how comments are handled.
 //
 // Note that Init may call err if there is an error in the first character
 // of the file.
 //
-func (S *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode uint) {
+func (S *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode Mode) {
 	// Explicitly initialize all fields since a scanner may be reused.
 	if file.Size() != len(src) {
 		panic("file size does not match src len")
@@ -152,13 +153,13 @@ func (S *Scanner) interpretLineComment(text []byte) {
 					filename = filepath.Join(S.dir, filename)
 				}
 				// update scanner position
-				S.file.AddLineInfo(S.lineOffset, filename, line-1) // -1 since comment applies to next line
+				S.file.AddLineInfo(S.lineOffset+len(text)+1, filename, line) // +len(text)+1 since comment applies to next line
 			}
 		}
 	}
 }
 
-func (S *Scanner) scanComment() {
+func (S *Scanner) scanComment() string {
 	// initial '/' already consumed; S.ch == '/' || S.ch == '*'
 	offs := S.offset - 1 // position of initial '/'
 
@@ -172,7 +173,7 @@ func (S *Scanner) scanComment() {
 			// comment starts at the beginning of the current line
 			S.interpretLineComment(S.src[offs:S.offset])
 		}
-		return
+		goto exit
 	}
 
 	/*-style comment */
@@ -182,11 +183,14 @@ func (S *Scanner) scanComment() {
 		S.next()
 		if ch == '*' && S.ch == '/' {
 			S.next()
-			return
+			goto exit
 		}
 	}
 
 	S.error(offs, "comment not terminated")
+
+exit:
+	return string(S.src[offs:S.offset])
 }
 
 func (S *Scanner) findLineEnd() bool {
@@ -233,30 +237,30 @@ func (S *Scanner) findLineEnd() bool {
 	return false
 }
 
-func isLetter(ch int) bool {
+func isLetter(ch rune) bool {
 	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
 }
 
-func isDigit(ch int) bool {
+func isDigit(ch rune) bool {
 	return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
 }
 
-func (S *Scanner) scanIdentifier() token.Token {
+func (S *Scanner) scanIdentifier() string {
 	offs := S.offset
 	for isLetter(S.ch) || isDigit(S.ch) {
 		S.next()
 	}
-	return token.Lookup(S.src[offs:S.offset])
+	return string(S.src[offs:S.offset])
 }
 
-func digitVal(ch int) int {
+func digitVal(ch rune) int {
 	switch {
 	case '0' <= ch && ch <= '9':
-		return ch - '0'
+		return int(ch - '0')
 	case 'a' <= ch && ch <= 'f':
-		return ch - 'a' + 10
+		return int(ch - 'a' + 10)
 	case 'A' <= ch && ch <= 'F':
-		return ch - 'A' + 10
+		return int(ch - 'A' + 10)
 	}
 	return 16 // larger than any legal digit val
 }
@@ -267,11 +271,13 @@ func (S *Scanner) scanMantissa(base int) {
 	}
 }
 
-func (S *Scanner) scanNumber(seenDecimalPoint bool) token.Token {
+func (S *Scanner) scanNumber(seenDecimalPoint bool) (token.Token, string) {
 	// digitVal(S.ch) < 10
+	offs := S.offset
 	tok := token.INT
 
 	if seenDecimalPoint {
+		offs--
 		tok = token.FLOAT
 		S.scanMantissa(10)
 		goto exponent
@@ -335,10 +341,10 @@ exponent:
 	}
 
 exit:
-	return tok
+	return tok, string(S.src[offs:S.offset])
 }
 
-func (S *Scanner) scanEscape(quote int) {
+func (S *Scanner) scanEscape(quote rune) {
 	offs := S.offset
 
 	var i, base, max uint32
@@ -382,7 +388,7 @@ func (S *Scanner) scanEscape(quote int) {
 	}
 }
 
-func (S *Scanner) scanChar() {
+func (S *Scanner) scanChar() string {
 	// '\'' opening already consumed
 	offs := S.offset - 1
 
@@ -406,9 +412,11 @@ func (S *Scanner) scanChar() {
 	if n != 1 {
 		S.error(offs, "illegal character literal")
 	}
+
+	return string(S.src[offs:S.offset])
 }
 
-func (S *Scanner) scanString() {
+func (S *Scanner) scanString() string {
 	// '"' opening already consumed
 	offs := S.offset - 1
 
@@ -425,15 +433,33 @@ func (S *Scanner) scanString() {
 	}
 
 	S.next()
+
+	return string(S.src[offs:S.offset])
+}
+
+func stripCR(b []byte) []byte {
+	c := make([]byte, len(b))
+	i := 0
+	for _, ch := range b {
+		if ch != '\r' {
+			c[i] = ch
+			i++
+		}
+	}
+	return c[:i]
 }
 
-func (S *Scanner) scanRawString() {
+func (S *Scanner) scanRawString() string {
 	// '`' opening already consumed
 	offs := S.offset - 1
 
+	hasCR := false
 	for S.ch != '`' {
 		ch := S.ch
 		S.next()
+		if ch == '\r' {
+			hasCR = true
+		}
 		if ch < 0 {
 			S.error(offs, "string not terminated")
 			break
@@ -441,6 +467,13 @@ func (S *Scanner) scanRawString() {
 	}
 
 	S.next()
+
+	lit := S.src[offs:S.offset]
+	if hasCR {
+		lit = stripCR(lit)
+	}
+
+	return string(lit)
 }
 
 func (S *Scanner) skipWhitespace() {
@@ -463,7 +496,7 @@ func (S *Scanner) switch2(tok0, tok1 token.Token) token.Token {
 	return tok0
 }
 
-func (S *Scanner) switch3(tok0, tok1 token.Token, ch2 int, tok2 token.Token) token.Token {
+func (S *Scanner) switch3(tok0, tok1 token.Token, ch2 rune, tok2 token.Token) token.Token {
 	if S.ch == '=' {
 		S.next()
 		return tok1
@@ -475,7 +508,7 @@ func (S *Scanner) switch3(tok0, tok1 token.Token, ch2 int, tok2 token.Token) tok
 	return tok0
 }
 
-func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Token) token.Token {
+func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Token) token.Token {
 	if S.ch == '=' {
 		S.next()
 		return tok1
@@ -491,15 +524,24 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Toke
 	return tok0
 }
 
-// Scan scans the next token and returns the token position,
-// the token, and the literal string corresponding to the
-// token. The source end is indicated by token.EOF.
+// Scan scans the next token and returns the token position, the token,
+// and its literal string if applicable. The source end is indicated by
+// token.EOF.
+//
+// If the returned token is a literal (token.IDENT, token.INT, token.FLOAT,
+// token.IMAG, token.CHAR, token.STRING) or token.COMMENT, the literal string
+// has the corresponding value.
 //
 // If the returned token is token.SEMICOLON, the corresponding
 // literal string is ";" if the semicolon was present in the source,
 // and "\n" if the semicolon was inserted because of a newline or
 // at EOF.
 //
+// If the returned token is token.ILLEGAL, the literal string is the
+// offending character.
+//
+// In all other cases, Scan returns an empty literal string.
+//
 // For more tolerant parsing, Scan will return a valid token if
 // possible even if a syntax error was encountered. Thus, even
 // if the resulting token sequence contains no illegal tokens,
@@ -511,33 +553,33 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Toke
 // set with Init. Token positions are relative to that file
 // and thus relative to the file set.
 //
-func (S *Scanner) Scan() (token.Pos, token.Token, string) {
+func (S *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {
 scanAgain:
 	S.skipWhitespace()
 
 	// current token start
-	insertSemi := false
-	offs := S.offset
-	tok := token.ILLEGAL
+	pos = S.file.Pos(S.offset)
 
 	// determine token value
+	insertSemi := false
 	switch ch := S.ch; {
 	case isLetter(ch):
-		tok = S.scanIdentifier()
+		lit = S.scanIdentifier()
+		tok = token.Lookup(lit)
 		switch tok {
 		case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN:
 			insertSemi = true
 		}
 	case digitVal(ch) < 10:
 		insertSemi = true
-		tok = S.scanNumber(false)
+		tok, lit = S.scanNumber(false)
 	default:
 		S.next() // always make progress
 		switch ch {
 		case -1:
 			if S.insertSemi {
 				S.insertSemi = false // EOF consumed
-				return S.file.Pos(offs), token.SEMICOLON, "\n"
+				return pos, token.SEMICOLON, "\n"
 			}
 			tok = token.EOF
 		case '\n':
@@ -545,25 +587,25 @@ scanAgain:
 			// set in the first place and exited early
 			// from S.skipWhitespace()
 			S.insertSemi = false // newline consumed
-			return S.file.Pos(offs), token.SEMICOLON, "\n"
+			return pos, token.SEMICOLON, "\n"
 		case '"':
 			insertSemi = true
 			tok = token.STRING
-			S.scanString()
+			lit = S.scanString()
 		case '\'':
 			insertSemi = true
 			tok = token.CHAR
-			S.scanChar()
+			lit = S.scanChar()
 		case '`':
 			insertSemi = true
 			tok = token.STRING
-			S.scanRawString()
+			lit = S.scanRawString()
 		case ':':
 			tok = S.switch2(token.COLON, token.DEFINE)
 		case '.':
 			if digitVal(S.ch) < 10 {
 				insertSemi = true
-				tok = S.scanNumber(true)
+				tok, lit = S.scanNumber(true)
 			} else if S.ch == '.' {
 				S.next()
 				if S.ch == '.' {
@@ -577,6 +619,7 @@ scanAgain:
 			tok = token.COMMA
 		case ';':
 			tok = token.SEMICOLON
+			lit = ";"
 		case '(':
 			tok = token.LPAREN
 		case ')':
@@ -610,12 +653,12 @@ scanAgain:
 				if S.insertSemi && S.findLineEnd() {
 					// reset position to the beginning of the comment
 					S.ch = '/'
-					S.offset = offs
-					S.rdOffset = offs + 1
+					S.offset = S.file.Offset(pos)
+					S.rdOffset = S.offset + 1
 					S.insertSemi = false // newline consumed
-					return S.file.Pos(offs), token.SEMICOLON, "\n"
+					return pos, token.SEMICOLON, "\n"
 				}
-				S.scanComment()
+				lit = S.scanComment()
 				if S.mode&ScanComments == 0 {
 					// skip comment
 					S.insertSemi = false // newline consumed
@@ -652,19 +695,15 @@ scanAgain:
 		case '|':
 			tok = S.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR)
 		default:
-			if S.mode&AllowIllegalChars == 0 {
-				S.error(offs, fmt.Sprintf("illegal character %#U", ch))
-			}
+			S.error(S.file.Offset(pos), fmt.Sprintf("illegal character %#U", ch))
 			insertSemi = S.insertSemi // preserve insertSemi info
+			tok = token.ILLEGAL
+			lit = string(ch)
 		}
 	}
-
-	if S.mode&InsertSemis != 0 {
+	if S.mode&dontInsertSemis == 0 {
 		S.insertSemi = insertSemi
 	}
 
-	// TODO(gri): The scanner API should change such that the literal string
-	//            is only valid if an actual literal was scanned. This will
-	//            permit a more efficient implementation.
-	return S.file.Pos(offs), tok, string(S.src[offs:S.offset])
+	return
 }
diff --git a/src/pkg/go/scanner/scanner_test.go b/src/pkg/go/scanner/scanner_test.go
index eb9e1cb81..af45bc5b1 100644
--- a/src/pkg/go/scanner/scanner_test.go
+++ b/src/pkg/go/scanner/scanner_test.go
@@ -83,6 +83,8 @@ var tokens = [...]elt{
 		"`",
 		literal,
 	},
+	{token.STRING, "`\r`", literal},
+	{token.STRING, "`foo\r\nbar`", literal},
 
 	// Operators and delimiters
 	{token.ADD, "+", operator},
@@ -175,6 +177,15 @@ var tokens = [...]elt{
 
 const whitespace = "  \t  \n\n\n" // to separate tokens
 
+var source = func() []byte {
+	var src []byte
+	for _, t := range tokens {
+		src = append(src, t.lit...)
+		src = append(src, whitespace...)
+	}
+	return src
+}()
+
 type testErrorHandler struct {
 	t *testing.T
 }
@@ -212,20 +223,20 @@ func checkPos(t *testing.T, lit string, p token.Pos, expected token.Position) {
 // Verify that calling Scan() provides the correct results.
 func TestScan(t *testing.T) {
 	// make source
-	var src string
-	for _, e := range tokens {
-		src += e.lit + whitespace
-	}
-	src_linecount := newlineCount(src)
+	src_linecount := newlineCount(string(source))
 	whitespace_linecount := newlineCount(whitespace)
 
 	// verify scan
 	var s Scanner
-	s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), &testErrorHandler{t}, ScanComments)
+	s.Init(fset.AddFile("", fset.Base(), len(source)), source, &testErrorHandler{t}, ScanComments|dontInsertSemis)
 	index := 0
 	epos := token.Position{"", 0, 1, 1} // expected position
 	for {
 		pos, tok, lit := s.Scan()
+		if lit == "" {
+			// no literal value for non-literal tokens
+			lit = tok.String()
+		}
 		e := elt{token.EOF, "", special}
 		if index < len(tokens) {
 			e = tokens[index]
@@ -237,10 +248,18 @@ func TestScan(t *testing.T) {
 		}
 		checkPos(t, lit, pos, epos)
 		if tok != e.tok {
-			t.Errorf("bad token for %q: got %s, expected %s", lit, tok.String(), e.tok.String())
+			t.Errorf("bad token for %q: got %s, expected %s", lit, tok, e.tok)
 		}
-		if e.tok.IsLiteral() && lit != e.lit {
-			t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, e.lit)
+		if e.tok.IsLiteral() {
+			// no CRs in raw string literals
+			elit := e.lit
+			if elit[0] == '`' {
+				elit = string(stripCR([]byte(elit)))
+				epos.Offset += len(e.lit) - len(lit) // correct position
+			}
+			if lit != elit {
+				t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, elit)
+			}
 		}
 		if tokenclass(tok) != e.class {
 			t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class)
@@ -262,7 +281,7 @@ func TestScan(t *testing.T) {
 	}
 }
 
-func checkSemi(t *testing.T, line string, mode uint) {
+func checkSemi(t *testing.T, line string, mode Mode) {
 	var S Scanner
 	file := fset.AddFile("TestSemis", fset.Base(), len(line))
 	S.Init(file, []byte(line), nil, mode)
@@ -286,7 +305,7 @@ func checkSemi(t *testing.T, line string, mode uint) {
 				}
 				checkPos(t, line, pos, semiPos)
 			} else {
-				t.Errorf("bad token for %q: got %s, expected ;", line, tok.String())
+				t.Errorf("bad token for %q: got %s, expected ;", line, tok)
 			}
 		} else if tok == token.SEMICOLON {
 			t.Errorf("bad token for %q: got ;, expected no ;", line)
@@ -420,14 +439,14 @@ var lines = []string{
 
 func TestSemis(t *testing.T) {
 	for _, line := range lines {
-		checkSemi(t, line, AllowIllegalChars|InsertSemis)
-		checkSemi(t, line, AllowIllegalChars|InsertSemis|ScanComments)
+		checkSemi(t, line, 0)
+		checkSemi(t, line, ScanComments)
 
 		// if the input ended in newlines, the input must tokenize the
 		// same with or without those newlines
 		for i := len(line) - 1; i >= 0 && line[i] == '\n'; i-- {
-			checkSemi(t, line[0:i], AllowIllegalChars|InsertSemis)
-			checkSemi(t, line[0:i], AllowIllegalChars|InsertSemis|ScanComments)
+			checkSemi(t, line[0:i], 0)
+			checkSemi(t, line[0:i], ScanComments)
 		}
 	}
 }
@@ -482,7 +501,7 @@ func TestLineComments(t *testing.T) {
 	// verify scan
 	var S Scanner
 	file := fset.AddFile(filepath.Join("dir", "TestLineComments"), fset.Base(), len(src))
-	S.Init(file, []byte(src), nil, 0)
+	S.Init(file, []byte(src), nil, dontInsertSemis)
 	for _, s := range segs {
 		p, _, lit := S.Scan()
 		pos := file.Position(p)
@@ -501,7 +520,7 @@ func TestInit(t *testing.T) {
 	// 1st init
 	src1 := "if true { }"
 	f1 := fset.AddFile("src1", fset.Base(), len(src1))
-	s.Init(f1, []byte(src1), nil, 0)
+	s.Init(f1, []byte(src1), nil, dontInsertSemis)
 	if f1.Size() != len(src1) {
 		t.Errorf("bad file size: got %d, expected %d", f1.Size(), len(src1))
 	}
@@ -509,40 +528,19 @@ func TestInit(t *testing.T) {
 	s.Scan()              // true
 	_, tok, _ := s.Scan() // {
 	if tok != token.LBRACE {
-		t.Errorf("bad token: got %s, expected %s", tok.String(), token.LBRACE)
+		t.Errorf("bad token: got %s, expected %s", tok, token.LBRACE)
 	}
 
 	// 2nd init
 	src2 := "go true { ]"
 	f2 := fset.AddFile("src2", fset.Base(), len(src2))
-	s.Init(f2, []byte(src2), nil, 0)
+	s.Init(f2, []byte(src2), nil, dontInsertSemis)
 	if f2.Size() != len(src2) {
 		t.Errorf("bad file size: got %d, expected %d", f2.Size(), len(src2))
 	}
 	_, tok, _ = s.Scan() // go
 	if tok != token.GO {
-		t.Errorf("bad token: got %s, expected %s", tok.String(), token.GO)
-	}
-
-	if s.ErrorCount != 0 {
-		t.Errorf("found %d errors", s.ErrorCount)
-	}
-}
-
-func TestIllegalChars(t *testing.T) {
-	var s Scanner
-
-	const src = "*?*$*@*"
-	file := fset.AddFile("", fset.Base(), len(src))
-	s.Init(file, []byte(src), &testErrorHandler{t}, AllowIllegalChars)
-	for offs, ch := range src {
-		pos, tok, lit := s.Scan()
-		if poffs := file.Offset(pos); poffs != offs {
-			t.Errorf("bad position for %s: got %d, expected %d", lit, poffs, offs)
-		}
-		if tok == token.ILLEGAL && lit != string(ch) {
-			t.Errorf("bad token: got %s, expected %s", lit, string(ch))
-		}
+		t.Errorf("bad token: got %s, expected %s", tok, token.GO)
 	}
 
 	if s.ErrorCount != 0 {
@@ -562,7 +560,7 @@ func TestStdErrorHander(t *testing.T) {
 
 	v := new(ErrorVector)
 	var s Scanner
-	s.Init(fset.AddFile("File1", fset.Base(), len(src)), []byte(src), v, 0)
+	s.Init(fset.AddFile("File1", fset.Base(), len(src)), []byte(src), v, dontInsertSemis)
 	for {
 		if _, tok, _ := s.Scan(); tok == token.EOF {
 			break
@@ -607,7 +605,7 @@ func (h *errorCollector) Error(pos token.Position, msg string) {
 func checkError(t *testing.T, src string, tok token.Token, pos int, err string) {
 	var s Scanner
 	var h errorCollector
-	s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), &h, ScanComments)
+	s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), &h, ScanComments|dontInsertSemis)
 	_, tok0, _ := s.Scan()
 	_, tok1, _ := s.Scan()
 	if tok0 != tok {
@@ -670,3 +668,20 @@ func TestScanErrors(t *testing.T) {
 		checkError(t, e.src, e.tok, e.pos, e.err)
 	}
 }
+
+func BenchmarkScan(b *testing.B) {
+	b.StopTimer()
+	fset := token.NewFileSet()
+	file := fset.AddFile("", fset.Base(), len(source))
+	var s Scanner
+	b.StartTimer()
+	for i := b.N - 1; i >= 0; i-- {
+		s.Init(file, source, nil, ScanComments)
+		for {
+			_, tok, _ := s.Scan()
+			if tok == token.EOF {
+				break
+			}
+		}
+	}
+}