3 files changed, 455 insertions, 379 deletions
diff --git a/src/pkg/go/scanner/Makefile b/src/pkg/go/scanner/Makefile
index 70d21a972..453faac00 100644
--- a/src/pkg/go/scanner/Makefile
+++ b/src/pkg/go/scanner/Makefile
@@ -2,7 +2,7 @@
 # Use of this source code is governed by a BSD-style
 # license that can be found in the LICENSE file.
 
-include ../../../Make.$(GOARCH)
+include ../../../Make.inc
 
 TARG=go/scanner
 GOFILES=\
diff --git a/src/pkg/go/scanner/scanner.go b/src/pkg/go/scanner/scanner.go
index e5ac9d772..6ce846cd8 100644
--- a/src/pkg/go/scanner/scanner.go
+++ b/src/pkg/go/scanner/scanner.go
@@ -4,13 +4,25 @@
 
 // A scanner for Go source text. Takes a []byte as source which can
 // then be tokenized through repeated calls to the Scan function.
-// For a sample use of a scanner, see the implementation of Tokenize.
+// Typical use:
+//
+//	var s Scanner
+//	fset := token.NewFileSet()  // position information is relative to fset
+//	s.Init(fset, filename, src, nil /* no error handler */, 0)
+//	for {
+//		pos, tok, lit := s.Scan()
+//		if tok == token.EOF {
+//			break
+//		}
+//		// do something here with pos, tok, and lit
+//	}
 //
 package scanner
 
 import (
 	"bytes"
 	"go/token"
+	"path"
 	"strconv"
 	"unicode"
 	"utf8"
@@ -19,20 +31,22 @@ import (
 
 // A Scanner holds the scanner's internal state while processing
 // a given text.  It can be allocated as part of another data
-// structure but must be initialized via Init before use. For
-// a sample use, see the implementation of Tokenize.
+// structure but must be initialized via Init before use.
 //
 type Scanner struct {
 	// immutable state
+	file *token.File  // source file handle
+	dir  string       // directory portion of file.Name()
 	src  []byte       // source
 	err  ErrorHandler // error reporting; or nil
 	mode uint         // scanning mode
 
 	// scanning state
-	pos        token.Position // previous reading position (position before ch)
-	offset     int            // current reading offset (position after ch)
-	ch         int            // one char look-ahead
-	insertSemi bool           // insert a semicolon before next newline
+	ch         int  // current character
+	offset     int  // character offset
+	rdOffset   int  // reading offset (position after current character)
+	lineOffset int  // current line offset
+	insertSemi bool // insert a semicolon before next newline
 
 	// public state - ok to modify
 	ErrorCount int // number of errors encountered
@@ -43,29 +57,31 @@ type Scanner struct {
 // S.ch < 0 means end-of-file.
 //
 func (S *Scanner) next() {
-	if S.offset < len(S.src) {
-		S.pos.Offset = S.offset
-		S.pos.Column++
+	if S.rdOffset < len(S.src) {
+		S.offset = S.rdOffset
 		if S.ch == '\n' {
-			// next character starts a new line
-			S.pos.Line++
-			S.pos.Column = 1
+			S.lineOffset = S.offset
+			S.file.AddLine(S.offset)
 		}
-		r, w := int(S.src[S.offset]), 1
+		r, w := int(S.src[S.rdOffset]), 1
 		switch {
 		case r == 0:
-			S.error(S.pos, "illegal character NUL")
+			S.error(S.offset, "illegal character NUL")
 		case r >= 0x80:
 			// not ASCII
-			r, w = utf8.DecodeRune(S.src[S.offset:])
+			r, w = utf8.DecodeRune(S.src[S.rdOffset:])
 			if r == utf8.RuneError && w == 1 {
-				S.error(S.pos, "illegal UTF-8 encoding")
+				S.error(S.offset, "illegal UTF-8 encoding")
 			}
 		}
-		S.offset += w
+		S.rdOffset += w
 		S.ch = r
 	} else {
-		S.pos.Offset = len(S.src)
+		S.offset = len(S.src)
+		if S.ch == '\n' {
+			S.lineOffset = S.offset
+			S.file.AddLine(S.offset)
+		}
 		S.ch = -1 // eof
 	}
 }
@@ -80,24 +96,38 @@ const (
 	InsertSemis                   // automatically insert semicolons
 )
 
+// TODO(gri) Would it be better to simply provide *token.File to Init
+//           instead of fset, and filename, and then return the file?
+//           It could cause an error/panic if the provided file.Size()
+//           doesn't match len(src).
 
-// Init prepares the scanner S to tokenize the text src. Calls to Scan
-// will use the error handler err if they encounter a syntax error and
-// err is not nil. Also, for each error encountered, the Scanner field
-// ErrorCount is incremented by one. The filename parameter is used as
-// filename in the token.Position returned by Scan for each token. The
-// mode parameter determines how comments and illegal characters are
-// handled.
+// Init prepares the scanner S to tokenize the text src. It sets the
+// scanner at the beginning of the source text, adds a new file with
+// the given filename to the file set fset, and returns that file.
+//
+// Calls to Scan will use the error handler err if they encounter a
+// syntax error and err is not nil. Also, for each error encountered,
+// the Scanner field ErrorCount is incremented by one. The mode parameter
+// determines how comments, illegal characters, and semicolons are handled.
 //
-func (S *Scanner) Init(filename string, src []byte, err ErrorHandler, mode uint) {
+func (S *Scanner) Init(fset *token.FileSet, filename string, src []byte, err ErrorHandler, mode uint) *token.File {
 	// Explicitly initialize all fields since a scanner may be reused.
+	S.file = fset.AddFile(filename, fset.Base(), len(src))
+	S.dir, _ = path.Split(filename)
 	S.src = src
 	S.err = err
 	S.mode = mode
-	S.pos = token.Position{filename, 0, 1, 0}
+
+	S.ch = ' '
 	S.offset = 0
+	S.rdOffset = 0
+	S.lineOffset = 0
+	S.insertSemi = false
 	S.ErrorCount = 0
+
 	S.next()
+
+	return S.file
 }
 
 
@@ -131,111 +161,109 @@ func charString(ch int) string {
 }
 
 
-func (S *Scanner) error(pos token.Position, msg string) {
+func (S *Scanner) error(offs int, msg string) {
 	if S.err != nil {
-		S.err.Error(pos, msg)
+		S.err.Error(S.file.Position(S.file.Pos(offs)), msg)
 	}
 	S.ErrorCount++
 }
 
 
-func (S *Scanner) expect(ch int) {
-	if S.ch != ch {
-		S.error(S.pos, "expected "+charString(ch)+", found "+charString(S.ch))
+var prefix = []byte("//line ")
+
+func (S *Scanner) interpretLineComment(text []byte) {
+	if bytes.HasPrefix(text, prefix) {
+		// get filename and line number, if any
+		if i := bytes.Index(text, []byte{':'}); i > 0 {
+			if line, err := strconv.Atoi(string(text[i+1:])); err == nil && line > 0 {
+				// valid //line filename:line comment;
+				filename := path.Clean(string(text[len(prefix):i]))
+				if filename[0] != '/' {
+					// make filename relative to current directory
+					filename = path.Join(S.dir, filename)
+				}
+				// update scanner position
+				S.file.AddLineInfo(S.lineOffset, filename, line-1) // -1 since comment applies to next line
+			}
+		}
 	}
-	S.next() // always make progress
 }
 
 
-var prefix = []byte("line ")
-
-func (S *Scanner) scanComment(pos token.Position) {
-	// first '/' already consumed
+func (S *Scanner) scanComment() {
+	// initial '/' already consumed; S.ch == '/' || S.ch == '*'
+	offs := S.offset - 1 // position of initial '/'
 
 	if S.ch == '/' {
 		//-style comment
-		for S.ch >= 0 {
+		S.next()
+		for S.ch != '\n' && S.ch >= 0 {
 			S.next()
-			if S.ch == '\n' {
-				// '\n' is not part of the comment for purposes of scanning
-				// (the comment ends on the same line where it started)
-				if pos.Column == 1 {
-					text := S.src[pos.Offset+2 : S.pos.Offset]
-					if bytes.HasPrefix(text, prefix) {
-						// comment starts at beginning of line with "//line ";
-						// get filename and line number, if any
-						i := bytes.Index(text, []byte{':'})
-						if i >= 0 {
-							if line, err := strconv.Atoi(string(text[i+1:])); err == nil && line > 0 {
-								// valid //line filename:line comment;
-								// update scanner position
-								S.pos.Filename = string(text[len(prefix):i])
-								S.pos.Line = line - 1 // -1 since the '\n' has not been consumed yet
-							}
-						}
-					}
-				}
-				return
-			}
 		}
+		if offs == S.lineOffset {
+			// comment starts at the beginning of the current line
+			S.interpretLineComment(S.src[offs:S.offset])
+		}
+		return
+	}
 
-	} else {
-		/*-style comment */
-		S.expect('*')
-		for S.ch >= 0 {
-			ch := S.ch
+	/*-style comment */
+	S.next()
+	for S.ch >= 0 {
+		ch := S.ch
+		S.next()
+		if ch == '*' && S.ch == '/' {
 			S.next()
-			if ch == '*' && S.ch == '/' {
-				S.next()
-				return
-			}
+			return
 		}
 	}
 
-	S.error(pos, "comment not terminated")
+	S.error(offs, "comment not terminated")
 }
 
 
-func (S *Scanner) findNewline(pos token.Position) bool {
-	// first '/' already consumed; assume S.ch == '/' || S.ch == '*'
+func (S *Scanner) findLineEnd() bool {
+	// initial '/' already consumed
+
+	defer func(offs int) {
+		// reset scanner state to where it was upon calling findLineEnd
+		S.ch = '/'
+		S.offset = offs
+		S.rdOffset = offs + 1
+		S.next() // consume initial '/' again
+	}(S.offset - 1)
 
-	// read ahead until a newline or non-comment token is found
-	newline := false
-	for pos1 := pos; S.ch >= 0; {
+	// read ahead until a newline, EOF, or non-comment token is found
+	for S.ch == '/' || S.ch == '*' {
 		if S.ch == '/' {
 			//-style comment always contains a newline
-			newline = true
-			break
+			return true
 		}
-		S.scanComment(pos1)
-		if pos1.Line < S.pos.Line {
-			/*-style comment contained a newline */
-			newline = true
-			break
+		/*-style comment: look for newline */
+		S.next()
+		for S.ch >= 0 {
+			ch := S.ch
+			if ch == '\n' {
+				return true
+			}
+			S.next()
+			if ch == '*' && S.ch == '/' {
+				S.next()
+				break
+			}
 		}
 		S.skipWhitespace() // S.insertSemi is set
-		if S.ch == '\n' {
-			newline = true
-			break
+		if S.ch < 0 || S.ch == '\n' {
+			return true
 		}
 		if S.ch != '/' {
 			// non-comment token
-			break
-		}
-		pos1 = S.pos
-		S.next()
-		if S.ch != '/' && S.ch != '*' {
-			// non-comment token
-			break
+			return false
 		}
+		S.next() // consume '/'
 	}
 
-	// reset position to where it was upon calling findNewline
-	S.pos = pos
-	S.offset = pos.Offset + 1
-	S.next()
-
-	return newline
+	return false
 }
 
 
@@ -250,11 +278,11 @@ func isDigit(ch int) bool {
 
 
 func (S *Scanner) scanIdentifier() token.Token {
-	pos := S.pos.Offset
+	offs := S.offset
 	for isLetter(S.ch) || isDigit(S.ch) {
 		S.next()
 	}
-	return token.Lookup(S.src[pos:S.pos.Offset])
+	return token.Lookup(S.src[offs:S.offset])
 }
 
 
@@ -278,7 +306,7 @@ func (S *Scanner) scanMantissa(base int) {
 }
 
 
-func (S *Scanner) scanNumber(pos token.Position, seenDecimalPoint bool) token.Token {
+func (S *Scanner) scanNumber(seenDecimalPoint bool) token.Token {
 	// digitVal(S.ch) < 10
 	tok := token.INT
 
@@ -290,6 +318,7 @@ func (S *Scanner) scanNumber(pos token.Position, seenDecimalPoint bool) token.To
 
 	if S.ch == '0' {
 		// int or float
+		offs := S.offset
 		S.next()
 		if S.ch == 'x' || S.ch == 'X' {
 			// hexadecimal int
@@ -309,7 +338,7 @@ func (S *Scanner) scanNumber(pos token.Position, seenDecimalPoint bool) token.To
 			}
 			// octal int
 			if seenDecimalDigit {
-				S.error(pos, "illegal octal number")
+				S.error(offs, "illegal octal number")
 			}
 		}
 		goto exit
@@ -346,7 +375,7 @@ exit:
 
 
 func (S *Scanner) scanEscape(quote int) {
-	pos := S.pos
+	offs := S.offset
 
 	var i, base, max uint32
 	switch S.ch {
@@ -366,28 +395,33 @@ func (S *Scanner) scanEscape(quote int) {
 		i, base, max = 8, 16, unicode.MaxRune
 	default:
 		S.next() // always make progress
-		S.error(pos, "unknown escape sequence")
+		S.error(offs, "unknown escape sequence")
 		return
 	}
 
 	var x uint32
-	for ; i > 0; i-- {
+	for ; i > 0 && S.ch != quote && S.ch >= 0; i-- {
 		d := uint32(digitVal(S.ch))
-		if d > base {
-			S.error(S.pos, "illegal character in escape sequence")
-			return
+		if d >= base {
+			S.error(S.offset, "illegal character in escape sequence")
+			break
 		}
 		x = x*base + d
 		S.next()
 	}
+	// in case of an error, consume remaining chars
+	for ; i > 0 && S.ch != quote && S.ch >= 0; i-- {
+		S.next()
+	}
 	if x > max || 0xd800 <= x && x < 0xe000 {
-		S.error(pos, "escape sequence is invalid Unicode code point")
+		S.error(offs, "escape sequence is invalid Unicode code point")
 	}
 }
 
 
-func (S *Scanner) scanChar(pos token.Position) {
-	// '\'' already consumed
+func (S *Scanner) scanChar() {
+	// '\'' opening already consumed
+	offs := S.offset - 1
 
 	n := 0
 	for S.ch != '\'' {
@@ -395,7 +429,7 @@ func (S *Scanner) scanChar(pos token.Position) {
 		n++
 		S.next()
 		if ch == '\n' || ch < 0 {
-			S.error(pos, "character literal not terminated")
+			S.error(offs, "character literal not terminated")
 			n = 1
 			break
 		}
@@ -407,19 +441,20 @@ func (S *Scanner) scanChar(pos token.Position) {
 	S.next()
 
 	if n != 1 {
-		S.error(pos, "illegal character literal")
+		S.error(offs, "illegal character literal")
 	}
 }
 
 
-func (S *Scanner) scanString(pos token.Position) {
-	// '"' already consumed
+func (S *Scanner) scanString() {
+	// '"' opening already consumed
+	offs := S.offset - 1
 
 	for S.ch != '"' {
 		ch := S.ch
 		S.next()
 		if ch == '\n' || ch < 0 {
-			S.error(pos, "string not terminated")
+			S.error(offs, "string not terminated")
 			break
 		}
 		if ch == '\\' {
@@ -431,14 +466,15 @@ func (S *Scanner) scanString(pos token.Position) {
 }
 
 
-func (S *Scanner) scanRawString(pos token.Position) {
-	// '`' already consumed
+func (S *Scanner) scanRawString() {
+	// '`' opening already consumed
+	offs := S.offset - 1
 
 	for S.ch != '`' {
 		ch := S.ch
 		S.next()
 		if ch < 0 {
-			S.error(pos, "string not terminated")
+			S.error(offs, "string not terminated")
 			break
 		}
 	}
@@ -499,12 +535,17 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Toke
 }
 
 
-var semicolon = []byte{';'}
+var newline = []byte{'\n'}
 
 // Scan scans the next token and returns the token position pos,
 // the token tok, and the literal text lit corresponding to the
 // token. The source end is indicated by token.EOF.
 //
+// If the returned token is token.SEMICOLON, the corresponding
+// literal value is ";" if the semicolon was present in the source,
+// and "\n" if the semicolon was inserted because of a newline or
+// at EOF.
+//
 // For more tolerant parsing, Scan will return a valid token if
 // possible even if a syntax error was encountered. Thus, even
 // if the resulting token sequence contains no illegal tokens,
@@ -512,13 +553,18 @@ var semicolon = []byte{';'}
 // must check the scanner's ErrorCount or the number of calls
 // of the error handler, if there was one installed.
 //
-func (S *Scanner) Scan() (pos token.Position, tok token.Token, lit []byte) {
+// Scan adds line information to the file added to the file
+// set with Init. Token positions are relative to that file
+// and thus relative to the file set.
+//
+func (S *Scanner) Scan() (token.Pos, token.Token, []byte) {
 scanAgain:
 	S.skipWhitespace()
 
 	// current token start
 	insertSemi := false
-	pos, tok = S.pos, token.ILLEGAL
+	offs := S.offset
+	tok := token.ILLEGAL
 
 	// determine token value
 	switch ch := S.ch; {
@@ -530,36 +576,40 @@ scanAgain:
 		}
 	case digitVal(ch) < 10:
 		insertSemi = true
-		tok = S.scanNumber(pos, false)
+		tok = S.scanNumber(false)
 	default:
 		S.next() // always make progress
 		switch ch {
 		case -1:
+			if S.insertSemi {
+				S.insertSemi = false // EOF consumed
+				return S.file.Pos(offs), token.SEMICOLON, newline
+			}
 			tok = token.EOF
 		case '\n':
 			// we only reach here if S.insertSemi was
 			// set in the first place and exited early
 			// from S.skipWhitespace()
 			S.insertSemi = false // newline consumed
-			return pos, token.SEMICOLON, semicolon
+			return S.file.Pos(offs), token.SEMICOLON, newline
 		case '"':
 			insertSemi = true
 			tok = token.STRING
-			S.scanString(pos)
+			S.scanString()
 		case '\'':
 			insertSemi = true
 			tok = token.CHAR
-			S.scanChar(pos)
+			S.scanChar()
 		case '`':
 			insertSemi = true
 			tok = token.STRING
-			S.scanRawString(pos)
+			S.scanRawString()
 		case ':':
 			tok = S.switch2(token.COLON, token.DEFINE)
 		case '.':
 			if digitVal(S.ch) < 10 {
 				insertSemi = true
-				tok = S.scanNumber(pos, true)
+				tok = S.scanNumber(true)
 			} else if S.ch == '.' {
 				S.next()
 				if S.ch == '.' {
@@ -603,15 +653,15 @@ scanAgain:
 		case '/':
 			if S.ch == '/' || S.ch == '*' {
 				// comment
-				if S.insertSemi && S.findNewline(pos) {
+				if S.insertSemi && S.findLineEnd() {
 					// reset position to the beginning of the comment
-					S.pos = pos
-					S.offset = pos.Offset + 1
 					S.ch = '/'
+					S.offset = offs
+					S.rdOffset = offs + 1
 					S.insertSemi = false // newline consumed
-					return pos, token.SEMICOLON, semicolon
+					return S.file.Pos(offs), token.SEMICOLON, newline
 				}
-				S.scanComment(pos)
+				S.scanComment()
 				if S.mode&ScanComments == 0 {
 					// skip comment
 					S.insertSemi = false // newline consumed
@@ -649,7 +699,7 @@ scanAgain:
 			tok = S.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR)
 		default:
 			if S.mode&AllowIllegalChars == 0 {
-				S.error(pos, "illegal character "+charString(ch))
+				S.error(offs, "illegal character "+charString(ch))
 			}
 			insertSemi = S.insertSemi // preserve insertSemi info
 		}
@@ -658,21 +708,5 @@ scanAgain:
 	if S.mode&InsertSemis != 0 {
 		S.insertSemi = insertSemi
 	}
-	return pos, tok, S.src[pos.Offset:S.pos.Offset]
-}
-
-
-// Tokenize calls a function f with the token position, token value, and token
-// text for each token in the source src. The other parameters have the same
-// meaning as for the Init function. Tokenize keeps scanning until f returns
-// false (usually when the token value is token.EOF). The result is the number
-// of errors encountered.
-//
-func Tokenize(filename string, src []byte, err ErrorHandler, mode uint, f func(pos token.Position, tok token.Token, lit []byte) bool) int {
-	var s Scanner
-	s.Init(filename, src, err, mode)
-	for f(s.Scan()) {
-		// action happens in f
-	}
-	return s.ErrorCount
+	return S.file.Pos(offs), tok, S.src[offs:S.offset]
 }
diff --git a/src/pkg/go/scanner/scanner_test.go b/src/pkg/go/scanner/scanner_test.go
index 002a81dd9..b1004f89d 100644
--- a/src/pkg/go/scanner/scanner_test.go
+++ b/src/pkg/go/scanner/scanner_test.go
@@ -11,6 +11,9 @@ import (
 )
 
 
+var fset = token.NewFileSet()
+
+
 const /* class */ (
 	special = iota
 	literal
@@ -41,136 +44,136 @@ type elt struct {
 
 var tokens = [...]elt{
 	// Special tokens
-	elt{token.COMMENT, "/* a comment */", special},
-	elt{token.COMMENT, "// a comment \n", special},
+	{token.COMMENT, "/* a comment */", special},
+	{token.COMMENT, "// a comment \n", special},
 
 	// Identifiers and basic type literals
-	elt{token.IDENT, "foobar", literal},
-	elt{token.IDENT, "a۰۱۸", literal},
-	elt{token.IDENT, "foo६४", literal},
-	elt{token.IDENT, "bar９８７６", literal},
-	elt{token.INT, "0", literal},
-	elt{token.INT, "1", literal},
-	elt{token.INT, "123456789012345678890", literal},
-	elt{token.INT, "01234567", literal},
-	elt{token.INT, "0xcafebabe", literal},
-	elt{token.FLOAT, "0.", literal},
-	elt{token.FLOAT, ".0", literal},
-	elt{token.FLOAT, "3.14159265", literal},
-	elt{token.FLOAT, "1e0", literal},
-	elt{token.FLOAT, "1e+100", literal},
-	elt{token.FLOAT, "1e-100", literal},
-	elt{token.FLOAT, "2.71828e-1000", literal},
-	elt{token.IMAG, "0i", literal},
-	elt{token.IMAG, "1i", literal},
-	elt{token.IMAG, "012345678901234567889i", literal},
-	elt{token.IMAG, "123456789012345678890i", literal},
-	elt{token.IMAG, "0.i", literal},
-	elt{token.IMAG, ".0i", literal},
-	elt{token.IMAG, "3.14159265i", literal},
-	elt{token.IMAG, "1e0i", literal},
-	elt{token.IMAG, "1e+100i", literal},
-	elt{token.IMAG, "1e-100i", literal},
-	elt{token.IMAG, "2.71828e-1000i", literal},
-	elt{token.CHAR, "'a'", literal},
-	elt{token.CHAR, "'\\000'", literal},
-	elt{token.CHAR, "'\\xFF'", literal},
-	elt{token.CHAR, "'\\uff16'", literal},
-	elt{token.CHAR, "'\\U0000ff16'", literal},
-	elt{token.STRING, "`foobar`", literal},
-	elt{token.STRING, "`" + `foo
+	{token.IDENT, "foobar", literal},
+	{token.IDENT, "a۰۱۸", literal},
+	{token.IDENT, "foo६४", literal},
+	{token.IDENT, "bar９８７６", literal},
+	{token.INT, "0", literal},
+	{token.INT, "1", literal},
+	{token.INT, "123456789012345678890", literal},
+	{token.INT, "01234567", literal},
+	{token.INT, "0xcafebabe", literal},
+	{token.FLOAT, "0.", literal},
+	{token.FLOAT, ".0", literal},
+	{token.FLOAT, "3.14159265", literal},
+	{token.FLOAT, "1e0", literal},
+	{token.FLOAT, "1e+100", literal},
+	{token.FLOAT, "1e-100", literal},
+	{token.FLOAT, "2.71828e-1000", literal},
+	{token.IMAG, "0i", literal},
+	{token.IMAG, "1i", literal},
+	{token.IMAG, "012345678901234567889i", literal},
+	{token.IMAG, "123456789012345678890i", literal},
+	{token.IMAG, "0.i", literal},
+	{token.IMAG, ".0i", literal},
+	{token.IMAG, "3.14159265i", literal},
+	{token.IMAG, "1e0i", literal},
+	{token.IMAG, "1e+100i", literal},
+	{token.IMAG, "1e-100i", literal},
+	{token.IMAG, "2.71828e-1000i", literal},
+	{token.CHAR, "'a'", literal},
+	{token.CHAR, "'\\000'", literal},
+	{token.CHAR, "'\\xFF'", literal},
+	{token.CHAR, "'\\uff16'", literal},
+	{token.CHAR, "'\\U0000ff16'", literal},
+	{token.STRING, "`foobar`", literal},
+	{token.STRING, "`" + `foo
 	                        bar` +
 		"`",
 		literal,
 	},
 
 	// Operators and delimitors
-	elt{token.ADD, "+", operator},
-	elt{token.SUB, "-", operator},
-	elt{token.MUL, "*", operator},
-	elt{token.QUO, "/", operator},
-	elt{token.REM, "%", operator},
-
-	elt{token.AND, "&", operator},
-	elt{token.OR, "|", operator},
-	elt{token.XOR, "^", operator},
-	elt{token.SHL, "<<", operator},
-	elt{token.SHR, ">>", operator},
-	elt{token.AND_NOT, "&^", operator},
-
-	elt{token.ADD_ASSIGN, "+=", operator},
-	elt{token.SUB_ASSIGN, "-=", operator},
-	elt{token.MUL_ASSIGN, "*=", operator},
-	elt{token.QUO_ASSIGN, "/=", operator},
-	elt{token.REM_ASSIGN, "%=", operator},
-
-	elt{token.AND_ASSIGN, "&=", operator},
-	elt{token.OR_ASSIGN, "|=", operator},
-	elt{token.XOR_ASSIGN, "^=", operator},
-	elt{token.SHL_ASSIGN, "<<=", operator},
-	elt{token.SHR_ASSIGN, ">>=", operator},
-	elt{token.AND_NOT_ASSIGN, "&^=", operator},
-
-	elt{token.LAND, "&&", operator},
-	elt{token.LOR, "||", operator},
-	elt{token.ARROW, "<-", operator},
-	elt{token.INC, "++", operator},
-	elt{token.DEC, "--", operator},
-
-	elt{token.EQL, "==", operator},
-	elt{token.LSS, "<", operator},
-	elt{token.GTR, ">", operator},
-	elt{token.ASSIGN, "=", operator},
-	elt{token.NOT, "!", operator},
-
-	elt{token.NEQ, "!=", operator},
-	elt{token.LEQ, "<=", operator},
-	elt{token.GEQ, ">=", operator},
-	elt{token.DEFINE, ":=", operator},
-	elt{token.ELLIPSIS, "...", operator},
-
-	elt{token.LPAREN, "(", operator},
-	elt{token.LBRACK, "[", operator},
-	elt{token.LBRACE, "{", operator},
-	elt{token.COMMA, ",", operator},
-	elt{token.PERIOD, ".", operator},
-
-	elt{token.RPAREN, ")", operator},
-	elt{token.RBRACK, "]", operator},
-	elt{token.RBRACE, "}", operator},
-	elt{token.SEMICOLON, ";", operator},
-	elt{token.COLON, ":", operator},
+	{token.ADD, "+", operator},
+	{token.SUB, "-", operator},
+	{token.MUL, "*", operator},
+	{token.QUO, "/", operator},
+	{token.REM, "%", operator},
+
+	{token.AND, "&", operator},
+	{token.OR, "|", operator},
+	{token.XOR, "^", operator},
+	{token.SHL, "<<", operator},
+	{token.SHR, ">>", operator},
+	{token.AND_NOT, "&^", operator},
+
+	{token.ADD_ASSIGN, "+=", operator},
+	{token.SUB_ASSIGN, "-=", operator},
+	{token.MUL_ASSIGN, "*=", operator},
+	{token.QUO_ASSIGN, "/=", operator},
+	{token.REM_ASSIGN, "%=", operator},
+
+	{token.AND_ASSIGN, "&=", operator},
+	{token.OR_ASSIGN, "|=", operator},
+	{token.XOR_ASSIGN, "^=", operator},
+	{token.SHL_ASSIGN, "<<=", operator},
+	{token.SHR_ASSIGN, ">>=", operator},
+	{token.AND_NOT_ASSIGN, "&^=", operator},
+
+	{token.LAND, "&&", operator},
+	{token.LOR, "||", operator},
+	{token.ARROW, "<-", operator},
+	{token.INC, "++", operator},
+	{token.DEC, "--", operator},
+
+	{token.EQL, "==", operator},
+	{token.LSS, "<", operator},
+	{token.GTR, ">", operator},
+	{token.ASSIGN, "=", operator},
+	{token.NOT, "!", operator},
+
+	{token.NEQ, "!=", operator},
+	{token.LEQ, "<=", operator},
+	{token.GEQ, ">=", operator},
+	{token.DEFINE, ":=", operator},
+	{token.ELLIPSIS, "...", operator},
+
+	{token.LPAREN, "(", operator},
+	{token.LBRACK, "[", operator},
+	{token.LBRACE, "{", operator},
+	{token.COMMA, ",", operator},
+	{token.PERIOD, ".", operator},
+
+	{token.RPAREN, ")", operator},
+	{token.RBRACK, "]", operator},
+	{token.RBRACE, "}", operator},
+	{token.SEMICOLON, ";", operator},
+	{token.COLON, ":", operator},
 
 	// Keywords
-	elt{token.BREAK, "break", keyword},
-	elt{token.CASE, "case", keyword},
-	elt{token.CHAN, "chan", keyword},
-	elt{token.CONST, "const", keyword},
-	elt{token.CONTINUE, "continue", keyword},
-
-	elt{token.DEFAULT, "default", keyword},
-	elt{token.DEFER, "defer", keyword},
-	elt{token.ELSE, "else", keyword},
-	elt{token.FALLTHROUGH, "fallthrough", keyword},
-	elt{token.FOR, "for", keyword},
-
-	elt{token.FUNC, "func", keyword},
-	elt{token.GO, "go", keyword},
-	elt{token.GOTO, "goto", keyword},
-	elt{token.IF, "if", keyword},
-	elt{token.IMPORT, "import", keyword},
-
-	elt{token.INTERFACE, "interface", keyword},
-	elt{token.MAP, "map", keyword},
-	elt{token.PACKAGE, "package", keyword},
-	elt{token.RANGE, "range", keyword},
-	elt{token.RETURN, "return", keyword},
-
-	elt{token.SELECT, "select", keyword},
-	elt{token.STRUCT, "struct", keyword},
-	elt{token.SWITCH, "switch", keyword},
-	elt{token.TYPE, "type", keyword},
-	elt{token.VAR, "var", keyword},
+	{token.BREAK, "break", keyword},
+	{token.CASE, "case", keyword},
+	{token.CHAN, "chan", keyword},
+	{token.CONST, "const", keyword},
+	{token.CONTINUE, "continue", keyword},
+
+	{token.DEFAULT, "default", keyword},
+	{token.DEFER, "defer", keyword},
+	{token.ELSE, "else", keyword},
+	{token.FALLTHROUGH, "fallthrough", keyword},
+	{token.FOR, "for", keyword},
+
+	{token.FUNC, "func", keyword},
+	{token.GO, "go", keyword},
+	{token.GOTO, "goto", keyword},
+	{token.IF, "if", keyword},
+	{token.IMPORT, "import", keyword},
+
+	{token.INTERFACE, "interface", keyword},
+	{token.MAP, "map", keyword},
+	{token.PACKAGE, "package", keyword},
+	{token.RANGE, "range", keyword},
+	{token.RETURN, "return", keyword},
+
+	{token.SELECT, "select", keyword},
+	{token.STRUCT, "struct", keyword},
+	{token.SWITCH, "switch", keyword},
+	{token.TYPE, "type", keyword},
+	{token.VAR, "var", keyword},
 }
 
 
@@ -196,18 +199,19 @@ func newlineCount(s string) int {
 }
 
 
-func checkPos(t *testing.T, lit string, pos, expected token.Position) {
+func checkPos(t *testing.T, lit string, p token.Pos, expected token.Position) {
+	pos := fset.Position(p)
 	if pos.Filename != expected.Filename {
-		t.Errorf("bad filename for %s: got %s, expected %s", lit, pos.Filename, expected.Filename)
+		t.Errorf("bad filename for %q: got %s, expected %s", lit, pos.Filename, expected.Filename)
 	}
 	if pos.Offset != expected.Offset {
-		t.Errorf("bad position for %s: got %d, expected %d", lit, pos.Offset, expected.Offset)
+		t.Errorf("bad position for %q: got %d, expected %d", lit, pos.Offset, expected.Offset)
 	}
 	if pos.Line != expected.Line {
-		t.Errorf("bad line for %s: got %d, expected %d", lit, pos.Line, expected.Line)
+		t.Errorf("bad line for %q: got %d, expected %d", lit, pos.Line, expected.Line)
 	}
 	if pos.Column != expected.Column {
-		t.Errorf("bad column for %s: got %d, expected %d", lit, pos.Column, expected.Column)
+		t.Errorf("bad column for %q: got %d, expected %d", lit, pos.Column, expected.Column)
 	}
 }
 
@@ -219,66 +223,76 @@ func TestScan(t *testing.T) {
 	for _, e := range tokens {
 		src += e.lit + whitespace
 	}
-	src_linecount := newlineCount(src)
+	src_linecount := newlineCount(src) + 1
 	whitespace_linecount := newlineCount(whitespace)
 
 	// verify scan
+	var s Scanner
+	s.Init(fset, "", []byte(src), &testErrorHandler{t}, ScanComments)
 	index := 0
 	epos := token.Position{"", 0, 1, 1} // expected position
-	nerrors := Tokenize("", []byte(src), &testErrorHandler{t}, ScanComments,
-		func(pos token.Position, tok token.Token, litb []byte) bool {
-			e := elt{token.EOF, "", special}
-			if index < len(tokens) {
-				e = tokens[index]
-			}
-			lit := string(litb)
-			if tok == token.EOF {
-				lit = "<EOF>"
-				epos.Line = src_linecount
-				epos.Column = 1
-			}
-			checkPos(t, lit, pos, epos)
-			if tok != e.tok {
-				t.Errorf("bad token for %q: got %s, expected %s", lit, tok.String(), e.tok.String())
-			}
-			if e.tok.IsLiteral() && lit != e.lit {
-				t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, e.lit)
-			}
-			if tokenclass(tok) != e.class {
-				t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class)
-			}
-			epos.Offset += len(lit) + len(whitespace)
-			epos.Line += newlineCount(lit) + whitespace_linecount
-			if tok == token.COMMENT && litb[1] == '/' {
-				// correct for unaccounted '/n' in //-style comment
-				epos.Offset++
-				epos.Line++
-			}
-			index++
-			return tok != token.EOF
-		})
-	if nerrors != 0 {
-		t.Errorf("found %d errors", nerrors)
+	for {
+		pos, tok, litb := s.Scan()
+		e := elt{token.EOF, "", special}
+		if index < len(tokens) {
+			e = tokens[index]
+		}
+		lit := string(litb)
+		if tok == token.EOF {
+			lit = "<EOF>"
+			epos.Line = src_linecount
+			epos.Column = 1
+		}
+		checkPos(t, lit, pos, epos)
+		if tok != e.tok {
+			t.Errorf("bad token for %q: got %s, expected %s", lit, tok.String(), e.tok.String())
+		}
+		if e.tok.IsLiteral() && lit != e.lit {
+			t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, e.lit)
+		}
+		if tokenclass(tok) != e.class {
+			t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class)
+		}
+		epos.Offset += len(lit) + len(whitespace)
+		epos.Line += newlineCount(lit) + whitespace_linecount
+		if tok == token.COMMENT && litb[1] == '/' {
+			// correct for unaccounted '/n' in //-style comment
+			epos.Offset++
+			epos.Line++
+		}
+		index++
+		if tok == token.EOF {
+			break
+		}
+	}
+	if s.ErrorCount != 0 {
+		t.Errorf("found %d errors", s.ErrorCount)
 	}
 }
 
 
 func checkSemi(t *testing.T, line string, mode uint) {
 	var S Scanner
-	S.Init("TestSemis", []byte(line), nil, mode)
+	file := S.Init(fset, "TestSemis", []byte(line), nil, mode)
 	pos, tok, lit := S.Scan()
 	for tok != token.EOF {
 		if tok == token.ILLEGAL {
+			// the illegal token literal indicates what
+			// kind of semicolon literal to expect
+			semiLit := "\n"
+			if lit[0] == '#' {
+				semiLit = ";"
+			}
 			// next token must be a semicolon
-			offs := pos.Offset + 1
+			semiPos := file.Position(pos)
+			semiPos.Offset++
+			semiPos.Column++
 			pos, tok, lit = S.Scan()
 			if tok == token.SEMICOLON {
-				if pos.Offset != offs {
-					t.Errorf("bad offset for %q: got %d, expected %d", line, pos.Offset, offs)
-				}
-				if string(lit) != ";" {
-					t.Errorf(`bad literal for %q: got %q, expected ";"`, line, lit)
+				if string(lit) != semiLit {
+					t.Errorf(`bad literal for %q: got %q, expected %q`, line, lit, semiLit)
 				}
+				checkPos(t, line, pos, semiPos)
 			} else {
 				t.Errorf("bad token for %q: got %s, expected ;", line, tok.String())
 			}
@@ -291,9 +305,10 @@ func checkSemi(t *testing.T, line string, mode uint) {
 
 
 var lines = []string{
-	// the $ character indicates where a semicolon is expected
+	// # indicates a semicolon present in the source
+	// $ indicates an automatically inserted semicolon
 	"",
-	"$;",
+	"#;",
 	"foo$\n",
 	"123$\n",
 	"1.2$\n",
@@ -354,7 +369,7 @@ var lines = []string{
 	")$\n",
 	"]$\n",
 	"}$\n",
-	"$;\n",
+	"#;\n",
 	":\n",
 
 	"break$\n",
@@ -388,57 +403,66 @@ var lines = []string{
 	"var\n",
 
 	"foo$//comment\n",
+	"foo$//comment",
 	"foo$/*comment*/\n",
 	"foo$/*\n*/",
 	"foo$/*comment*/    \n",
 	"foo$/*\n*/    ",
+
 	"foo    $// comment\n",
+	"foo    $// comment",
 	"foo    $/*comment*/\n",
 	"foo    $/*\n*/",
-
-	"foo    $/*comment*/\n",
+	"foo    $/*  */ /* \n */ bar$/**/\n",
 	"foo    $/*0*/ /*1*/ /*2*/\n",
+
 	"foo    $/*comment*/    \n",
 	"foo    $/*0*/ /*1*/ /*2*/    \n",
-	"foo	$/**/ /*-------------*/       /*----\n*/bar       $/*  \n*/baa",
+	"foo	$/**/ /*-------------*/       /*----\n*/bar       $/*  \n*/baa$\n",
+	"foo    $/* an EOF terminates a line */",
+	"foo    $/* an EOF terminates a line */ /*",
+	"foo    $/* an EOF terminates a line */ //",
 
 	"package main$\n\nfunc main() {\n\tif {\n\t\treturn /* */ }$\n}$\n",
+	"package main$",
 }
 
 
 func TestSemis(t *testing.T) {
 	for _, line := range lines {
 		checkSemi(t, line, AllowIllegalChars|InsertSemis)
-	}
-	for _, line := range lines {
 		checkSemi(t, line, AllowIllegalChars|InsertSemis|ScanComments)
+
+		// if the input ended in newlines, the input must tokenize the
+		// same with or without those newlines
+		for i := len(line) - 1; i >= 0 && line[i] == '\n'; i-- {
+			checkSemi(t, line[0:i], AllowIllegalChars|InsertSemis)
+			checkSemi(t, line[0:i], AllowIllegalChars|InsertSemis|ScanComments)
+		}
 	}
 }
 
 
-type seg struct {
+var segments = []struct {
 	srcline  string // a line of source text
 	filename string // filename for current token
 	line     int    // line number for current token
-}
-
-
-var segments = []seg{
+}{
 	// exactly one token per line since the test consumes one token per segment
-	seg{"  line1", "TestLineComments", 1},
-	seg{"\nline2", "TestLineComments", 2},
-	seg{"\nline3  //line File1.go:100", "TestLineComments", 3}, // bad line comment, ignored
-	seg{"\nline4", "TestLineComments", 4},
-	seg{"\n//line File1.go:100\n  line100", "File1.go", 100},
-	seg{"\n//line File2.go:200\n  line200", "File2.go", 200},
-	seg{"\n//line :1\n  line1", "", 1},
-	seg{"\n//line foo:42\n  line42", "foo", 42},
-	seg{"\n //line foo:42\n  line44", "foo", 44},           // bad line comment, ignored
-	seg{"\n//line foo 42\n  line46", "foo", 46},            // bad line comment, ignored
-	seg{"\n//line foo:42 extra text\n  line48", "foo", 48}, // bad line comment, ignored
-	seg{"\n//line foo:42\n  line42", "foo", 42},
-	seg{"\n//line foo:42\n  line42", "foo", 42},
-	seg{"\n//line File1.go:100\n  line100", "File1.go", 100},
+	{"  line1", "dir/TestLineComments", 1},
+	{"\nline2", "dir/TestLineComments", 2},
+	{"\nline3  //line File1.go:100", "dir/TestLineComments", 3}, // bad line comment, ignored
+	{"\nline4", "dir/TestLineComments", 4},
+	{"\n//line File1.go:100\n  line100", "dir/File1.go", 100},
+	{"\n//line File2.go:200\n  line200", "dir/File2.go", 200},
+	{"\n//line :1\n  line1", "dir", 1},
+	{"\n//line foo:42\n  line42", "dir/foo", 42},
+	{"\n //line foo:42\n  line44", "dir/foo", 44},           // bad line comment, ignored
+	{"\n//line foo 42\n  line46", "dir/foo", 46},            // bad line comment, ignored
+	{"\n//line foo:42 extra text\n  line48", "dir/foo", 48}, // bad line comment, ignored
+	{"\n//line /bar:42\n  line42", "/bar", 42},
+	{"\n//line ./foo:42\n  line42", "dir/foo", 42},
+	{"\n//line a/b/c/File1.go:100\n  line100", "dir/a/b/c/File1.go", 100},
 }
 
 
@@ -452,10 +476,11 @@ func TestLineComments(t *testing.T) {
 
 	// verify scan
 	var S Scanner
-	S.Init("TestLineComments", []byte(src), nil, 0)
+	file := S.Init(fset, "dir/TestLineComments", []byte(src), nil, 0)
 	for _, s := range segments {
-		pos, _, lit := S.Scan()
-		checkPos(t, string(lit), pos, token.Position{s.filename, pos.Offset, s.line, pos.Column})
+		p, _, lit := S.Scan()
+		pos := file.Position(p)
+		checkPos(t, string(lit), p, token.Position{s.filename, pos.Offset, s.line, pos.Column})
 	}
 
 	if S.ErrorCount != 0 {
@@ -469,7 +494,11 @@ func TestInit(t *testing.T) {
 	var s Scanner
 
 	// 1st init
-	s.Init("", []byte("if true { }"), nil, 0)
+	src1 := "if true { }"
+	f1 := s.Init(fset, "", []byte(src1), nil, 0)
+	if f1.Size() != len(src1) {
+		t.Errorf("bad file size: got %d, expected %d", f1.Size(), len(src1))
+	}
 	s.Scan()              // if
 	s.Scan()              // true
 	_, tok, _ := s.Scan() // {
@@ -478,7 +507,11 @@ func TestInit(t *testing.T) {
 	}
 
 	// 2nd init
-	s.Init("", []byte("go true { ]"), nil, 0)
+	src2 := "go true { ]"
+	f2 := s.Init(fset, "", []byte(src2), nil, 0)
+	if f2.Size() != len(src2) {
+		t.Errorf("bad file size: got %d, expected %d", f2.Size(), len(src2))
+	}
 	_, tok, _ = s.Scan() // go
 	if tok != token.GO {
 		t.Errorf("bad token: got %s, expected %s", tok.String(), token.GO)
@@ -494,11 +527,11 @@ func TestIllegalChars(t *testing.T) {
 	var s Scanner
 
 	const src = "*?*$*@*"
-	s.Init("", []byte(src), &testErrorHandler{t}, AllowIllegalChars)
+	file := s.Init(fset, "", []byte(src), &testErrorHandler{t}, AllowIllegalChars)
 	for offs, ch := range src {
 		pos, tok, lit := s.Scan()
-		if pos.Offset != offs {
-			t.Errorf("bad position for %s: got %d, expected %d", string(lit), pos.Offset, offs)
+		if poffs := file.Offset(pos); poffs != offs {
+			t.Errorf("bad position for %s: got %d, expected %d", string(lit), poffs, offs)
 		}
 		if tok == token.ILLEGAL && string(lit) != string(ch) {
 			t.Errorf("bad token: got %s, expected %s", string(lit), string(ch))
@@ -522,10 +555,13 @@ func TestStdErrorHander(t *testing.T) {
 		"@ @ @" // original file, line 1 again
 
 	v := new(ErrorVector)
-	nerrors := Tokenize("File1", []byte(src), v, 0,
-		func(pos token.Position, tok token.Token, litb []byte) bool {
-			return tok != token.EOF
-		})
+	var s Scanner
+	s.Init(fset, "File1", []byte(src), v, 0)
+	for {
+		if _, tok, _ := s.Scan(); tok == token.EOF {
+			break
+		}
+	}
 
 	list := v.GetErrorList(Raw)
 	if len(list) != 9 {
@@ -545,8 +581,8 @@ func TestStdErrorHander(t *testing.T) {
 		PrintError(os.Stderr, list)
 	}
 
-	if v.ErrorCount() != nerrors {
-		t.Errorf("found %d errors, expected %d", v.ErrorCount(), nerrors)
+	if v.ErrorCount() != s.ErrorCount {
+		t.Errorf("found %d errors, expected %d", v.ErrorCount(), s.ErrorCount)
 	}
 }
 
@@ -568,7 +604,7 @@ func (h *errorCollector) Error(pos token.Position, msg string) {
 func checkError(t *testing.T, src string, tok token.Token, pos int, err string) {
 	var s Scanner
 	var h errorCollector
-	s.Init("", []byte(src), &h, ScanComments)
+	s.Init(fset, "", []byte(src), &h, ScanComments)
 	_, tok0, _ := s.Scan()
 	_, tok1, _ := s.Scan()
 	if tok0 != tok {
@@ -593,28 +629,34 @@ func checkError(t *testing.T, src string, tok token.Token, pos int, err string)
 }
 
 
-type srcerr struct {
+var errors = []struct {
 	src string
 	tok token.Token
 	pos int
 	err string
-}
-
-var errors = []srcerr{
-	srcerr{"\"\"", token.STRING, 0, ""},
-	srcerr{"\"", token.STRING, 0, "string not terminated"},
-	srcerr{"/**/", token.COMMENT, 0, ""},
-	srcerr{"/*", token.COMMENT, 0, "comment not terminated"},
-	srcerr{"//\n", token.COMMENT, 0, ""},
-	srcerr{"//", token.COMMENT, 0, "comment not terminated"},
-	srcerr{"077", token.INT, 0, ""},
-	srcerr{"078.", token.FLOAT, 0, ""},
-	srcerr{"07801234567.", token.FLOAT, 0, ""},
-	srcerr{"078e0", token.FLOAT, 0, ""},
-	srcerr{"078", token.INT, 0, "illegal octal number"},
-	srcerr{"07800000009", token.INT, 0, "illegal octal number"},
-	srcerr{"\"abc\x00def\"", token.STRING, 4, "illegal character NUL"},
-	srcerr{"\"abc\x80def\"", token.STRING, 4, "illegal UTF-8 encoding"},
+}{
+	{`#`, token.ILLEGAL, 0, "illegal character '#' (U+23)"},
+	{`' '`, token.CHAR, 0, ""},
+	{`''`, token.CHAR, 0, "illegal character literal"},
+	{`'\8'`, token.CHAR, 2, "unknown escape sequence"},
+	{`'\08'`, token.CHAR, 3, "illegal character in escape sequence"},
+	{`'\x0g'`, token.CHAR, 4, "illegal character in escape sequence"},
+	{`'\Uffffffff'`, token.CHAR, 2, "escape sequence is invalid Unicode code point"},
+	{`'`, token.CHAR, 0, "character literal not terminated"},
+	{`""`, token.STRING, 0, ""},
+	{`"`, token.STRING, 0, "string not terminated"},
+	{"``", token.STRING, 0, ""},
+	{"`", token.STRING, 0, "string not terminated"},
+	{"/**/", token.COMMENT, 0, ""},
+	{"/*", token.COMMENT, 0, "comment not terminated"},
+	{"077", token.INT, 0, ""},
+	{"078.", token.FLOAT, 0, ""},
+	{"07801234567.", token.FLOAT, 0, ""},
+	{"078e0", token.FLOAT, 0, ""},
+	{"078", token.INT, 0, "illegal octal number"},
+	{"07800000009", token.INT, 0, "illegal octal number"},
+	{"\"abc\x00def\"", token.STRING, 4, "illegal character NUL"},
+	{"\"abc\x80def\"", token.STRING, 4, "illegal UTF-8 encoding"},
 }