summaryrefslogtreecommitdiff
path: root/src/pkg/go/scanner
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/go/scanner')
-rw-r--r--src/pkg/go/scanner/scanner.go118
-rw-r--r--src/pkg/go/scanner/scanner_test.go103
2 files changed, 137 insertions, 84 deletions
diff --git a/src/pkg/go/scanner/scanner.go b/src/pkg/go/scanner/scanner.go
index 1e259d5ed..cec82ea10 100644
--- a/src/pkg/go/scanner/scanner.go
+++ b/src/pkg/go/scanner/scanner.go
@@ -148,11 +148,14 @@ func (s *Scanner) interpretLineComment(text []byte) {
// get filename and line number, if any
if i := bytes.LastIndex(text, []byte{':'}); i > 0 {
if line, err := strconv.Atoi(string(text[i+1:])); err == nil && line > 0 {
- // valid //line filename:line comment;
- filename := filepath.Clean(string(text[len(prefix):i]))
- if !filepath.IsAbs(filename) {
- // make filename relative to current directory
- filename = filepath.Join(s.dir, filename)
+ // valid //line filename:line comment
+ filename := string(bytes.TrimSpace(text[len(prefix):i]))
+ if filename != "" {
+ filename = filepath.Clean(filename)
+ if !filepath.IsAbs(filename) {
+ // make filename relative to current directory
+ filename = filepath.Join(s.dir, filename)
+ }
}
// update scanner position
s.file.AddLineInfo(s.lineOffset+len(text)+1, filename, line) // +len(text)+1 since comment applies to next line
@@ -358,73 +361,94 @@ exit:
return tok, string(s.src[offs:s.offset])
}
-func (s *Scanner) scanEscape(quote rune) {
+// scanEscape parses an escape sequence where rune is the accepted
+// escaped quote. In case of a syntax error, it stops at the offending
+// character (without consuming it) and returns false. Otherwise
+// it returns true.
+func (s *Scanner) scanEscape(quote rune) bool {
offs := s.offset
- var i, base, max uint32
+ var n int
+ var base, max uint32
switch s.ch {
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
s.next()
- return
+ return true
case '0', '1', '2', '3', '4', '5', '6', '7':
- i, base, max = 3, 8, 255
+ n, base, max = 3, 8, 255
case 'x':
s.next()
- i, base, max = 2, 16, 255
+ n, base, max = 2, 16, 255
case 'u':
s.next()
- i, base, max = 4, 16, unicode.MaxRune
+ n, base, max = 4, 16, unicode.MaxRune
case 'U':
s.next()
- i, base, max = 8, 16, unicode.MaxRune
+ n, base, max = 8, 16, unicode.MaxRune
default:
- s.next() // always make progress
- s.error(offs, "unknown escape sequence")
- return
+ msg := "unknown escape sequence"
+ if s.ch < 0 {
+ msg = "escape sequence not terminated"
+ }
+ s.error(offs, msg)
+ return false
}
var x uint32
- for ; i > 0 && s.ch != quote && s.ch >= 0; i-- {
+ for n > 0 {
d := uint32(digitVal(s.ch))
if d >= base {
- s.error(s.offset, "illegal character in escape sequence")
- break
+ msg := fmt.Sprintf("illegal character %#U in escape sequence", s.ch)
+ if s.ch < 0 {
+ msg = "escape sequence not terminated"
+ }
+ s.error(s.offset, msg)
+ return false
}
x = x*base + d
s.next()
+ n--
}
- // in case of an error, consume remaining chars
- for ; i > 0 && s.ch != quote && s.ch >= 0; i-- {
- s.next()
- }
+
if x > max || 0xD800 <= x && x < 0xE000 {
s.error(offs, "escape sequence is invalid Unicode code point")
+ return false
}
+
+ return true
}
-func (s *Scanner) scanChar() string {
+func (s *Scanner) scanRune() string {
// '\'' opening already consumed
offs := s.offset - 1
+ valid := true
n := 0
- for s.ch != '\'' {
+ for {
ch := s.ch
- n++
- s.next()
if ch == '\n' || ch < 0 {
- s.error(offs, "character literal not terminated")
- n = 1
+ // only report error if we don't have one already
+ if valid {
+ s.error(offs, "rune literal not terminated")
+ valid = false
+ }
break
}
+ s.next()
+ if ch == '\'' {
+ break
+ }
+ n++
if ch == '\\' {
- s.scanEscape('\'')
+ if !s.scanEscape('\'') {
+ valid = false
+ }
+ // continue to read to closing quote
}
}
- s.next()
-
- if n != 1 {
- s.error(offs, "illegal character literal")
+ if valid && n != 1 {
+ s.error(offs, "illegal rune literal")
}
return string(s.src[offs:s.offset])
@@ -434,11 +458,14 @@ func (s *Scanner) scanString() string {
// '"' opening already consumed
offs := s.offset - 1
- for s.ch != '"' {
+ for {
ch := s.ch
- s.next()
if ch == '\n' || ch < 0 {
- s.error(offs, "string not terminated")
+ s.error(offs, "string literal not terminated")
+ break
+ }
+ s.next()
+ if ch == '"' {
break
}
if ch == '\\' {
@@ -446,8 +473,6 @@ func (s *Scanner) scanString() string {
}
}
- s.next()
-
return string(s.src[offs:s.offset])
}
@@ -468,20 +493,21 @@ func (s *Scanner) scanRawString() string {
offs := s.offset - 1
hasCR := false
- for s.ch != '`' {
+ for {
ch := s.ch
+ if ch < 0 {
+ s.error(offs, "raw string literal not terminated")
+ break
+ }
s.next()
+ if ch == '`' {
+ break
+ }
if ch == '\r' {
hasCR = true
}
- if ch < 0 {
- s.error(offs, "string not terminated")
- break
- }
}
- s.next()
-
lit := s.src[offs:s.offset]
if hasCR {
lit = stripCR(lit)
@@ -617,7 +643,7 @@ scanAgain:
case '\'':
insertSemi = true
tok = token.CHAR
- lit = s.scanChar()
+ lit = s.scanRune()
case '`':
insertSemi = true
tok = token.STRING
diff --git a/src/pkg/go/scanner/scanner_test.go b/src/pkg/go/scanner/scanner_test.go
index 8c64c2b95..fc450d8a6 100644
--- a/src/pkg/go/scanner/scanner_test.go
+++ b/src/pkg/go/scanner/scanner_test.go
@@ -493,9 +493,9 @@ var segments = []segment{
{"\nline3 //line File1.go:100", filepath.Join("dir", "TestLineComments"), 3}, // bad line comment, ignored
{"\nline4", filepath.Join("dir", "TestLineComments"), 4},
{"\n//line File1.go:100\n line100", filepath.Join("dir", "File1.go"), 100},
+ {"\n//line \t :42\n line1", "", 42},
{"\n//line File2.go:200\n line200", filepath.Join("dir", "File2.go"), 200},
- {"\n//line :1\n line1", "dir", 1},
- {"\n//line foo:42\n line42", filepath.Join("dir", "foo"), 42},
+ {"\n//line foo\t:42\n line42", filepath.Join("dir", "foo"), 42},
{"\n //line foo:42\n line44", filepath.Join("dir", "foo"), 44}, // bad line comment, ignored
{"\n//line foo 42\n line46", filepath.Join("dir", "foo"), 46}, // bad line comment, ignored
{"\n//line foo:42 extra text\n line48", filepath.Join("dir", "foo"), 48}, // bad line comment, ignored
@@ -631,7 +631,7 @@ type errorCollector struct {
pos token.Position // last error position encountered
}
-func checkError(t *testing.T, src string, tok token.Token, pos int, err string) {
+func checkError(t *testing.T, src string, tok token.Token, pos int, lit, err string) {
var s Scanner
var h errorCollector
eh := func(pos token.Position, msg string) {
@@ -640,13 +640,12 @@ func checkError(t *testing.T, src string, tok token.Token, pos int, err string)
h.pos = pos
}
s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), eh, ScanComments|dontInsertSemis)
- _, tok0, _ := s.Scan()
- _, tok1, _ := s.Scan()
+ _, tok0, lit0 := s.Scan()
if tok0 != tok {
t.Errorf("%q: got %s, expected %s", src, tok0, tok)
}
- if tok1 != token.EOF {
- t.Errorf("%q: got %s, expected EOF", src, tok1)
+ if tok0 != token.ILLEGAL && lit0 != lit {
+ t.Errorf("%q: got literal %q, expected %q", src, lit0, lit)
}
cnt := 0
if err != "" {
@@ -667,43 +666,71 @@ var errors = []struct {
src string
tok token.Token
pos int
+ lit string
err string
}{
- {"\a", token.ILLEGAL, 0, "illegal character U+0007"},
- {`#`, token.ILLEGAL, 0, "illegal character U+0023 '#'"},
- {`…`, token.ILLEGAL, 0, "illegal character U+2026 '…'"},
- {`' '`, token.CHAR, 0, ""},
- {`''`, token.CHAR, 0, "illegal character literal"},
- {`'\8'`, token.CHAR, 2, "unknown escape sequence"},
- {`'\08'`, token.CHAR, 3, "illegal character in escape sequence"},
- {`'\x0g'`, token.CHAR, 4, "illegal character in escape sequence"},
- {`'\Uffffffff'`, token.CHAR, 2, "escape sequence is invalid Unicode code point"},
- {`'`, token.CHAR, 0, "character literal not terminated"},
- {`""`, token.STRING, 0, ""},
- {`"`, token.STRING, 0, "string not terminated"},
- {"``", token.STRING, 0, ""},
- {"`", token.STRING, 0, "string not terminated"},
- {"/**/", token.COMMENT, 0, ""},
- {"/*", token.COMMENT, 0, "comment not terminated"},
- {"077", token.INT, 0, ""},
- {"078.", token.FLOAT, 0, ""},
- {"07801234567.", token.FLOAT, 0, ""},
- {"078e0", token.FLOAT, 0, ""},
- {"078", token.INT, 0, "illegal octal number"},
- {"07800000009", token.INT, 0, "illegal octal number"},
- {"0x", token.INT, 0, "illegal hexadecimal number"},
- {"0X", token.INT, 0, "illegal hexadecimal number"},
- {"\"abc\x00def\"", token.STRING, 4, "illegal character NUL"},
- {"\"abc\x80def\"", token.STRING, 4, "illegal UTF-8 encoding"},
- {"\ufeff\ufeff", token.ILLEGAL, 3, "illegal byte order mark"}, // only first BOM is ignored
- {"//\ufeff", token.COMMENT, 2, "illegal byte order mark"}, // only first BOM is ignored
- {"'\ufeff" + `'`, token.CHAR, 1, "illegal byte order mark"}, // only first BOM is ignored
- {`"` + "abc\ufeffdef" + `"`, token.STRING, 4, "illegal byte order mark"}, // only first BOM is ignored
+ {"\a", token.ILLEGAL, 0, "", "illegal character U+0007"},
+ {`#`, token.ILLEGAL, 0, "", "illegal character U+0023 '#'"},
+ {`…`, token.ILLEGAL, 0, "", "illegal character U+2026 '…'"},
+ {`' '`, token.CHAR, 0, `' '`, ""},
+ {`''`, token.CHAR, 0, `''`, "illegal rune literal"},
+ {`'12'`, token.CHAR, 0, `'12'`, "illegal rune literal"},
+ {`'123'`, token.CHAR, 0, `'123'`, "illegal rune literal"},
+ {`'\0'`, token.CHAR, 3, `'\0'`, "illegal character U+0027 ''' in escape sequence"},
+ {`'\07'`, token.CHAR, 4, `'\07'`, "illegal character U+0027 ''' in escape sequence"},
+ {`'\8'`, token.CHAR, 2, `'\8'`, "unknown escape sequence"},
+ {`'\08'`, token.CHAR, 3, `'\08'`, "illegal character U+0038 '8' in escape sequence"},
+ {`'\x'`, token.CHAR, 3, `'\x'`, "illegal character U+0027 ''' in escape sequence"},
+ {`'\x0'`, token.CHAR, 4, `'\x0'`, "illegal character U+0027 ''' in escape sequence"},
+ {`'\x0g'`, token.CHAR, 4, `'\x0g'`, "illegal character U+0067 'g' in escape sequence"},
+ {`'\u'`, token.CHAR, 3, `'\u'`, "illegal character U+0027 ''' in escape sequence"},
+ {`'\u0'`, token.CHAR, 4, `'\u0'`, "illegal character U+0027 ''' in escape sequence"},
+ {`'\u00'`, token.CHAR, 5, `'\u00'`, "illegal character U+0027 ''' in escape sequence"},
+ {`'\u000'`, token.CHAR, 6, `'\u000'`, "illegal character U+0027 ''' in escape sequence"},
+ {`'\u000`, token.CHAR, 6, `'\u000`, "escape sequence not terminated"},
+ {`'\u0000'`, token.CHAR, 0, `'\u0000'`, ""},
+ {`'\U'`, token.CHAR, 3, `'\U'`, "illegal character U+0027 ''' in escape sequence"},
+ {`'\U0'`, token.CHAR, 4, `'\U0'`, "illegal character U+0027 ''' in escape sequence"},
+ {`'\U00'`, token.CHAR, 5, `'\U00'`, "illegal character U+0027 ''' in escape sequence"},
+ {`'\U000'`, token.CHAR, 6, `'\U000'`, "illegal character U+0027 ''' in escape sequence"},
+ {`'\U0000'`, token.CHAR, 7, `'\U0000'`, "illegal character U+0027 ''' in escape sequence"},
+ {`'\U00000'`, token.CHAR, 8, `'\U00000'`, "illegal character U+0027 ''' in escape sequence"},
+ {`'\U000000'`, token.CHAR, 9, `'\U000000'`, "illegal character U+0027 ''' in escape sequence"},
+ {`'\U0000000'`, token.CHAR, 10, `'\U0000000'`, "illegal character U+0027 ''' in escape sequence"},
+ {`'\U0000000`, token.CHAR, 10, `'\U0000000`, "escape sequence not terminated"},
+ {`'\U00000000'`, token.CHAR, 0, `'\U00000000'`, ""},
+ {`'\Uffffffff'`, token.CHAR, 2, `'\Uffffffff'`, "escape sequence is invalid Unicode code point"},
+ {`'`, token.CHAR, 0, `'`, "rune literal not terminated"},
+ {`'\`, token.CHAR, 2, `'\`, "escape sequence not terminated"},
+ {"'\n", token.CHAR, 0, "'", "rune literal not terminated"},
+ {"'\n ", token.CHAR, 0, "'", "rune literal not terminated"},
+ {`""`, token.STRING, 0, `""`, ""},
+ {`"abc`, token.STRING, 0, `"abc`, "string literal not terminated"},
+ {"\"abc\n", token.STRING, 0, `"abc`, "string literal not terminated"},
+ {"\"abc\n ", token.STRING, 0, `"abc`, "string literal not terminated"},
+ {"``", token.STRING, 0, "``", ""},
+ {"`", token.STRING, 0, "`", "raw string literal not terminated"},
+ {"/**/", token.COMMENT, 0, "/**/", ""},
+ {"/*", token.COMMENT, 0, "/*", "comment not terminated"},
+ {"077", token.INT, 0, "077", ""},
+ {"078.", token.FLOAT, 0, "078.", ""},
+ {"07801234567.", token.FLOAT, 0, "07801234567.", ""},
+ {"078e0", token.FLOAT, 0, "078e0", ""},
+ {"078", token.INT, 0, "078", "illegal octal number"},
+ {"07800000009", token.INT, 0, "07800000009", "illegal octal number"},
+ {"0x", token.INT, 0, "0x", "illegal hexadecimal number"},
+ {"0X", token.INT, 0, "0X", "illegal hexadecimal number"},
+ {"\"abc\x00def\"", token.STRING, 4, "\"abc\x00def\"", "illegal character NUL"},
+ {"\"abc\x80def\"", token.STRING, 4, "\"abc\x80def\"", "illegal UTF-8 encoding"},
+ {"\ufeff\ufeff", token.ILLEGAL, 3, "\ufeff\ufeff", "illegal byte order mark"}, // only first BOM is ignored
+ {"//\ufeff", token.COMMENT, 2, "//\ufeff", "illegal byte order mark"}, // only first BOM is ignored
+ {"'\ufeff" + `'`, token.CHAR, 1, "'\ufeff" + `'`, "illegal byte order mark"}, // only first BOM is ignored
+ {`"` + "abc\ufeffdef" + `"`, token.STRING, 4, `"` + "abc\ufeffdef" + `"`, "illegal byte order mark"}, // only first BOM is ignored
}
func TestScanErrors(t *testing.T) {
for _, e := range errors {
- checkError(t, e.src, e.tok, e.pos, e.err)
+ checkError(t, e.src, e.tok, e.pos, e.lit, e.err)
}
}