summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Hodge <tpg@mutabah.net>2017-08-19 22:12:45 +0800
committerJohn Hodge <tpg@mutabah.net>2017-08-19 22:12:45 +0800
commit1b66d3257daf5dfb18128e5aaebb43568925587b (patch)
tree8ece674953ccc8bf44e9370ed3ce09ceebf8d565
parent6056c2e4c59164f13c4ac5d514319a3ac0c7ed4c (diff)
downloadmrust-1b66d3257daf5dfb18128e5aaebb43568925587b.tar.gz
Lex - Fix duplication of final token in file.
-rw-r--r--src/parse/lex.cpp34
-rw-r--r--src/parse/tokenstream.cpp17
2 files changed, 38 insertions, 13 deletions
diff --git a/src/parse/lex.cpp b/src/parse/lex.cpp
index 9388fca4..69d2b1c0 100644
--- a/src/parse/lex.cpp
+++ b/src/parse/lex.cpp
@@ -15,6 +15,8 @@
#include <typeinfo>
#include <algorithm> // std::count
#include <cctype>
+#define TRACE_CHARS
+#define TRACE_RAW_TOKENS
Lexer::Lexer(const ::std::string& filename):
m_path(filename.c_str()),
@@ -191,14 +193,14 @@ signed int Lexer::getSymbol()
// 3. IF: a smaller character or, EOS is hit - Return current best
unsigned ofs = 0;
signed int best = 0;
+ bool hit_eof = false;
for(unsigned i = 0; i < LEN(TOKENMAP); i ++)
{
const char* const chars = TOKENMAP[i].chars;
const size_t len = TOKENMAP[i].len;
if( ofs >= len || static_cast<uint32_t>(chars[ofs]) > ch.v ) {
- this->ungetc();
- return best;
+ break ;
}
while( chars[ofs] && ch == chars[ofs] )
@@ -208,6 +210,8 @@ signed int Lexer::getSymbol()
}
catch(Lexer::EndOfFile) {
ch = 0;
+ // Prevent `ungetc` if EOF was hit
+ hit_eof = true;
}
ofs ++;
}
@@ -217,7 +221,10 @@ signed int Lexer::getSymbol()
}
}
- this->ungetc();
+ if( !hit_eof )
+ {
+ this->ungetc();
+ }
return best;
}
@@ -247,7 +254,9 @@ Token Lexer::realGetToken()
while(true)
{
Token tok = getTokenInt();
- //::std::cout << "getTokenInt: tok = " << tok << ::std::endl;
+#ifdef TRACE_RAW_TOKENS
+ ::std::cout << "getTokenInt: tok = " << tok << ::std::endl;
+#endif
switch(tok.type())
{
case TOK_NEWLINE:
@@ -272,7 +281,7 @@ Token Lexer::getTokenInt()
{
Codepoint ch = this->getc();
- if( ch == '#' && m_line == 1 && m_line_ofs == 1 ) {
+ if( m_line == 1 && m_line_ofs == 1 && ch == '#') {
switch( (ch = this->getc()).v )
{
case '!':
@@ -871,8 +880,8 @@ uint32_t Lexer::parseEscape(char enclosing)
char Lexer::getc_byte()
{
- char rv = m_istream.get();
- if( m_istream.eof() )
+ int rv = m_istream.get();
+ if( rv == EOF || m_istream.eof() )
throw Lexer::EndOfFile();
if( rv == '\n' )
@@ -888,13 +897,18 @@ Codepoint Lexer::getc()
if( m_last_char_valid )
{
m_last_char_valid = false;
+#ifdef TRACE_CHARS
+ ::std::cout << "getc(): U+" << ::std::hex << m_last_char.v << " (cached)" << ::std::endl;
+#endif
}
else
{
m_last_char = this->getc_cp();
m_line_ofs += 1;
+#ifdef TRACE_CHARS
+ ::std::cout << "getc(): U+" << ::std::hex << m_last_char.v << ::std::endl;
+#endif
}
- //::std::cout << "getc(): '" << m_last_char << "'" << ::std::endl;
return m_last_char;
}
@@ -965,7 +979,9 @@ Codepoint Lexer::getc_cp()
void Lexer::ungetc()
{
-// ::std::cout << "ungetc(): " << m_last_char_valid << " '" << m_last_char << "'" << ::std::endl;
+#ifdef TRACE_CHARS
+ ::std::cout << "ungetc(): cache U+" << ::std::hex << m_last_char.v << ::std::endl;
+#endif
assert(!m_last_char_valid);
m_last_char_valid = true;
}
diff --git a/src/parse/tokenstream.cpp b/src/parse/tokenstream.cpp
index 2975a523..7b8fa532 100644
--- a/src/parse/tokenstream.cpp
+++ b/src/parse/tokenstream.cpp
@@ -12,6 +12,7 @@
const bool DEBUG_PRINT_TOKENS = false;
//const bool DEBUG_PRINT_TOKENS = true;
//#define DEBUG_PRINT_TOKENS debug_enabled("Lexer Tokens")
+#define FULL_TRACE
TokenStream::TokenStream():
m_cache_valid(false)
@@ -33,7 +34,9 @@ Token TokenStream::getToken()
{
if( m_cache_valid )
{
- //DEBUG("<<< " << m_cache << " (cache)");
+#ifdef FULL_TRACE
+ DEBUG("<<< " << m_cache << " (cache)");
+#endif
m_cache_valid = false;
return mv$(m_cache);
}
@@ -42,7 +45,9 @@ Token TokenStream::getToken()
Token ret = mv$( m_lookahead.front().first );
m_hygiene = m_lookahead.front().second;
m_lookahead.erase(m_lookahead.begin());
- //DEBUG("<<< " << ret << " (lookahead)");
+#ifdef FULL_TRACE
+ DEBUG("<<< " << ret << " (lookahead)");
+#endif
if( DEBUG_PRINT_TOKENS ) {
::std::cout << "getToken[" << typeid(*this).name() << "] - " << ret.get_pos() << "-" << ret << ::std::endl;
}
@@ -52,7 +57,9 @@ Token TokenStream::getToken()
{
Token ret = this->innerGetToken();
m_hygiene = this->realGetHygiene();
- //DEBUG("<<< " << ret << " (new)");
+#ifdef FULL_TRACE
+ DEBUG("<<< " << ret << " (new)");
+#endif
if( DEBUG_PRINT_TOKENS ) {
::std::cout << "getToken[" << typeid(*this).name() << "] - " << ret.get_pos() << "-" << ret << ::std::endl;
}
@@ -68,7 +75,9 @@ void TokenStream::putback(Token tok)
}
else
{
- //DEBUG(">>> " << tok);
+#ifdef FULL_TRACE
+ DEBUG(">>> " << tok);
+#endif
m_cache_valid = true;
m_cache = mv$(tok);
}