diff options
Diffstat (limited to 'src/pkg/go/scanner')
-rw-r--r-- | src/pkg/go/scanner/scanner.go | 118 | ||||
-rw-r--r-- | src/pkg/go/scanner/scanner_test.go | 103 |
2 files changed, 137 insertions, 84 deletions
diff --git a/src/pkg/go/scanner/scanner.go b/src/pkg/go/scanner/scanner.go index 1e259d5ed..cec82ea10 100644 --- a/src/pkg/go/scanner/scanner.go +++ b/src/pkg/go/scanner/scanner.go @@ -148,11 +148,14 @@ func (s *Scanner) interpretLineComment(text []byte) { // get filename and line number, if any if i := bytes.LastIndex(text, []byte{':'}); i > 0 { if line, err := strconv.Atoi(string(text[i+1:])); err == nil && line > 0 { - // valid //line filename:line comment; - filename := filepath.Clean(string(text[len(prefix):i])) - if !filepath.IsAbs(filename) { - // make filename relative to current directory - filename = filepath.Join(s.dir, filename) + // valid //line filename:line comment + filename := string(bytes.TrimSpace(text[len(prefix):i])) + if filename != "" { + filename = filepath.Clean(filename) + if !filepath.IsAbs(filename) { + // make filename relative to current directory + filename = filepath.Join(s.dir, filename) + } } // update scanner position s.file.AddLineInfo(s.lineOffset+len(text)+1, filename, line) // +len(text)+1 since comment applies to next line @@ -358,73 +361,94 @@ exit: return tok, string(s.src[offs:s.offset]) } -func (s *Scanner) scanEscape(quote rune) { +// scanEscape parses an escape sequence where rune is the accepted +// escaped quote. In case of a syntax error, it stops at the offending +// character (without consuming it) and returns false. Otherwise +// it returns true. +func (s *Scanner) scanEscape(quote rune) bool { offs := s.offset - var i, base, max uint32 + var n int + var base, max uint32 switch s.ch { case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote: s.next() - return + return true case '0', '1', '2', '3', '4', '5', '6', '7': - i, base, max = 3, 8, 255 + n, base, max = 3, 8, 255 case 'x': s.next() - i, base, max = 2, 16, 255 + n, base, max = 2, 16, 255 case 'u': s.next() - i, base, max = 4, 16, unicode.MaxRune + n, base, max = 4, 16, unicode.MaxRune case 'U': s.next() - i, base, max = 8, 16, unicode.MaxRune + n, base, max = 8, 16, unicode.MaxRune default: - s.next() // always make progress - s.error(offs, "unknown escape sequence") - return + msg := "unknown escape sequence" + if s.ch < 0 { + msg = "escape sequence not terminated" + } + s.error(offs, msg) + return false } var x uint32 - for ; i > 0 && s.ch != quote && s.ch >= 0; i-- { + for n > 0 { d := uint32(digitVal(s.ch)) if d >= base { - s.error(s.offset, "illegal character in escape sequence") - break + msg := fmt.Sprintf("illegal character %#U in escape sequence", s.ch) + if s.ch < 0 { + msg = "escape sequence not terminated" + } + s.error(s.offset, msg) + return false } x = x*base + d s.next() + n-- } - // in case of an error, consume remaining chars - for ; i > 0 && s.ch != quote && s.ch >= 0; i-- { - s.next() - } + if x > max || 0xD800 <= x && x < 0xE000 { s.error(offs, "escape sequence is invalid Unicode code point") + return false } + + return true } -func (s *Scanner) scanChar() string { +func (s *Scanner) scanRune() string { // '\'' opening already consumed offs := s.offset - 1 + valid := true n := 0 - for s.ch != '\'' { + for { ch := s.ch - n++ - s.next() if ch == '\n' || ch < 0 { - s.error(offs, "character literal not terminated") - n = 1 + // only report error if we don't have one already + if valid { + s.error(offs, "rune literal not terminated") + valid = false + } break } + s.next() + if ch == '\'' { + break + } + n++ if ch == '\\' { - s.scanEscape('\'') + if !s.scanEscape('\'') { + valid = false + } + // continue to read to closing quote } } - s.next() - - if n != 1 { - s.error(offs, "illegal character literal") + if valid && n != 1 { + s.error(offs, "illegal rune literal") } return string(s.src[offs:s.offset]) @@ -434,11 +458,14 @@ func (s *Scanner) scanString() string { // '"' opening already consumed offs := s.offset - 1 - for s.ch != '"' { + for { ch := s.ch - s.next() if ch == '\n' || ch < 0 { - s.error(offs, "string not terminated") + s.error(offs, "string literal not terminated") + break + } + s.next() + if ch == '"' { break } if ch == '\\' { @@ -446,8 +473,6 @@ func (s *Scanner) scanString() string { } } - s.next() - return string(s.src[offs:s.offset]) } @@ -468,20 +493,21 @@ func (s *Scanner) scanRawString() string { offs := s.offset - 1 hasCR := false - for s.ch != '`' { + for { ch := s.ch + if ch < 0 { + s.error(offs, "raw string literal not terminated") + break + } s.next() + if ch == '`' { + break + } if ch == '\r' { hasCR = true } - if ch < 0 { - s.error(offs, "string not terminated") - break - } } - s.next() - lit := s.src[offs:s.offset] if hasCR { lit = stripCR(lit) @@ -617,7 +643,7 @@ scanAgain: case '\'': insertSemi = true tok = token.CHAR - lit = s.scanChar() + lit = s.scanRune() case '`': insertSemi = true tok = token.STRING diff --git a/src/pkg/go/scanner/scanner_test.go b/src/pkg/go/scanner/scanner_test.go index 8c64c2b95..fc450d8a6 100644 --- a/src/pkg/go/scanner/scanner_test.go +++ b/src/pkg/go/scanner/scanner_test.go @@ -493,9 +493,9 @@ var segments = []segment{ {"\nline3 //line File1.go:100", filepath.Join("dir", "TestLineComments"), 3}, // bad line comment, ignored {"\nline4", filepath.Join("dir", "TestLineComments"), 4}, {"\n//line File1.go:100\n line100", filepath.Join("dir", "File1.go"), 100}, + {"\n//line \t :42\n line1", "", 42}, {"\n//line File2.go:200\n line200", filepath.Join("dir", "File2.go"), 200}, - {"\n//line :1\n line1", "dir", 1}, - {"\n//line foo:42\n line42", filepath.Join("dir", "foo"), 42}, + {"\n//line foo\t:42\n line42", filepath.Join("dir", "foo"), 42}, {"\n //line foo:42\n line44", filepath.Join("dir", "foo"), 44}, // bad line comment, ignored {"\n//line foo 42\n line46", filepath.Join("dir", "foo"), 46}, // bad line comment, ignored {"\n//line foo:42 extra text\n line48", filepath.Join("dir", "foo"), 48}, // bad line comment, ignored @@ -631,7 +631,7 @@ type errorCollector struct { pos token.Position // last error position encountered } -func checkError(t *testing.T, src string, tok token.Token, pos int, err string) { +func checkError(t *testing.T, src string, tok token.Token, pos int, lit, err string) { var s Scanner var h errorCollector eh := func(pos token.Position, msg string) { @@ -640,13 +640,12 @@ func checkError(t *testing.T, src string, tok token.Token, pos int, err string) h.pos = pos } s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), eh, ScanComments|dontInsertSemis) - _, tok0, _ := s.Scan() - _, tok1, _ := s.Scan() + _, tok0, lit0 := s.Scan() if tok0 != tok { t.Errorf("%q: got %s, expected %s", src, tok0, tok) } - if tok1 != token.EOF { - t.Errorf("%q: got %s, expected EOF", src, tok1) + if tok0 != token.ILLEGAL && lit0 != lit { + t.Errorf("%q: got literal %q, expected %q", src, lit0, lit) } cnt := 0 if err != "" { @@ -667,43 +666,71 @@ var errors = []struct { src string tok token.Token pos int + lit string err string }{ - {"\a", token.ILLEGAL, 0, "illegal character U+0007"}, - {`#`, token.ILLEGAL, 0, "illegal character U+0023 '#'"}, - {`…`, token.ILLEGAL, 0, "illegal character U+2026 '…'"}, - {`' '`, token.CHAR, 0, ""}, - {`''`, token.CHAR, 0, "illegal character literal"}, - {`'\8'`, token.CHAR, 2, "unknown escape sequence"}, - {`'\08'`, token.CHAR, 3, "illegal character in escape sequence"}, - {`'\x0g'`, token.CHAR, 4, "illegal character in escape sequence"}, - {`'\Uffffffff'`, token.CHAR, 2, "escape sequence is invalid Unicode code point"}, - {`'`, token.CHAR, 0, "character literal not terminated"}, - {`""`, token.STRING, 0, ""}, - {`"`, token.STRING, 0, "string not terminated"}, - {"``", token.STRING, 0, ""}, - {"`", token.STRING, 0, "string not terminated"}, - {"/**/", token.COMMENT, 0, ""}, - {"/*", token.COMMENT, 0, "comment not terminated"}, - {"077", token.INT, 0, ""}, - {"078.", token.FLOAT, 0, ""}, - {"07801234567.", token.FLOAT, 0, ""}, - {"078e0", token.FLOAT, 0, ""}, - {"078", token.INT, 0, "illegal octal number"}, - {"07800000009", token.INT, 0, "illegal octal number"}, - {"0x", token.INT, 0, "illegal hexadecimal number"}, - {"0X", token.INT, 0, "illegal hexadecimal number"}, - {"\"abc\x00def\"", token.STRING, 4, "illegal character NUL"}, - {"\"abc\x80def\"", token.STRING, 4, "illegal UTF-8 encoding"}, - {"\ufeff\ufeff", token.ILLEGAL, 3, "illegal byte order mark"}, // only first BOM is ignored - {"//\ufeff", token.COMMENT, 2, "illegal byte order mark"}, // only first BOM is ignored - {"'\ufeff" + `'`, token.CHAR, 1, "illegal byte order mark"}, // only first BOM is ignored - {`"` + "abc\ufeffdef" + `"`, token.STRING, 4, "illegal byte order mark"}, // only first BOM is ignored + {"\a", token.ILLEGAL, 0, "", "illegal character U+0007"}, + {`#`, token.ILLEGAL, 0, "", "illegal character U+0023 '#'"}, + {`…`, token.ILLEGAL, 0, "", "illegal character U+2026 '…'"}, + {`' '`, token.CHAR, 0, `' '`, ""}, + {`''`, token.CHAR, 0, `''`, "illegal rune literal"}, + {`'12'`, token.CHAR, 0, `'12'`, "illegal rune literal"}, + {`'123'`, token.CHAR, 0, `'123'`, "illegal rune literal"}, + {`'\0'`, token.CHAR, 3, `'\0'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\07'`, token.CHAR, 4, `'\07'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\8'`, token.CHAR, 2, `'\8'`, "unknown escape sequence"}, + {`'\08'`, token.CHAR, 3, `'\08'`, "illegal character U+0038 '8' in escape sequence"}, + {`'\x'`, token.CHAR, 3, `'\x'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\x0'`, token.CHAR, 4, `'\x0'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\x0g'`, token.CHAR, 4, `'\x0g'`, "illegal character U+0067 'g' in escape sequence"}, + {`'\u'`, token.CHAR, 3, `'\u'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\u0'`, token.CHAR, 4, `'\u0'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\u00'`, token.CHAR, 5, `'\u00'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\u000'`, token.CHAR, 6, `'\u000'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\u000`, token.CHAR, 6, `'\u000`, "escape sequence not terminated"}, + {`'\u0000'`, token.CHAR, 0, `'\u0000'`, ""}, + {`'\U'`, token.CHAR, 3, `'\U'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\U0'`, token.CHAR, 4, `'\U0'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\U00'`, token.CHAR, 5, `'\U00'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\U000'`, token.CHAR, 6, `'\U000'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\U0000'`, token.CHAR, 7, `'\U0000'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\U00000'`, token.CHAR, 8, `'\U00000'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\U000000'`, token.CHAR, 9, `'\U000000'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\U0000000'`, token.CHAR, 10, `'\U0000000'`, "illegal character U+0027 ''' in escape sequence"}, + {`'\U0000000`, token.CHAR, 10, `'\U0000000`, "escape sequence not terminated"}, + {`'\U00000000'`, token.CHAR, 0, `'\U00000000'`, ""}, + {`'\Uffffffff'`, token.CHAR, 2, `'\Uffffffff'`, "escape sequence is invalid Unicode code point"}, + {`'`, token.CHAR, 0, `'`, "rune literal not terminated"}, + {`'\`, token.CHAR, 2, `'\`, "escape sequence not terminated"}, + {"'\n", token.CHAR, 0, "'", "rune literal not terminated"}, + {"'\n ", token.CHAR, 0, "'", "rune literal not terminated"}, + {`""`, token.STRING, 0, `""`, ""}, + {`"abc`, token.STRING, 0, `"abc`, "string literal not terminated"}, + {"\"abc\n", token.STRING, 0, `"abc`, "string literal not terminated"}, + {"\"abc\n ", token.STRING, 0, `"abc`, "string literal not terminated"}, + {"``", token.STRING, 0, "``", ""}, + {"`", token.STRING, 0, "`", "raw string literal not terminated"}, + {"/**/", token.COMMENT, 0, "/**/", ""}, + {"/*", token.COMMENT, 0, "/*", "comment not terminated"}, + {"077", token.INT, 0, "077", ""}, + {"078.", token.FLOAT, 0, "078.", ""}, + {"07801234567.", token.FLOAT, 0, "07801234567.", ""}, + {"078e0", token.FLOAT, 0, "078e0", ""}, + {"078", token.INT, 0, "078", "illegal octal number"}, + {"07800000009", token.INT, 0, "07800000009", "illegal octal number"}, + {"0x", token.INT, 0, "0x", "illegal hexadecimal number"}, + {"0X", token.INT, 0, "0X", "illegal hexadecimal number"}, + {"\"abc\x00def\"", token.STRING, 4, "\"abc\x00def\"", "illegal character NUL"}, + {"\"abc\x80def\"", token.STRING, 4, "\"abc\x80def\"", "illegal UTF-8 encoding"}, + {"\ufeff\ufeff", token.ILLEGAL, 3, "\ufeff\ufeff", "illegal byte order mark"}, // only first BOM is ignored + {"//\ufeff", token.COMMENT, 2, "//\ufeff", "illegal byte order mark"}, // only first BOM is ignored + {"'\ufeff" + `'`, token.CHAR, 1, "'\ufeff" + `'`, "illegal byte order mark"}, // only first BOM is ignored + {`"` + "abc\ufeffdef" + `"`, token.STRING, 4, `"` + "abc\ufeffdef" + `"`, "illegal byte order mark"}, // only first BOM is ignored } func TestScanErrors(t *testing.T) { for _, e := range errors { - checkError(t, e.src, e.tok, e.pos, e.err) + checkError(t, e.src, e.tok, e.pos, e.lit, e.err) } } |