diff options
-rw-r--r-- | src/parse/lex.cpp | 34 | ||||
-rw-r--r-- | src/parse/tokenstream.cpp | 17 |
2 files changed, 38 insertions, 13 deletions
diff --git a/src/parse/lex.cpp b/src/parse/lex.cpp index 9388fca4..69d2b1c0 100644 --- a/src/parse/lex.cpp +++ b/src/parse/lex.cpp @@ -15,6 +15,8 @@ #include <typeinfo> #include <algorithm> // std::count #include <cctype> +#define TRACE_CHARS +#define TRACE_RAW_TOKENS Lexer::Lexer(const ::std::string& filename): m_path(filename.c_str()), @@ -191,14 +193,14 @@ signed int Lexer::getSymbol() // 3. IF: a smaller character or, EOS is hit - Return current best unsigned ofs = 0; signed int best = 0; + bool hit_eof = false; for(unsigned i = 0; i < LEN(TOKENMAP); i ++) { const char* const chars = TOKENMAP[i].chars; const size_t len = TOKENMAP[i].len; if( ofs >= len || static_cast<uint32_t>(chars[ofs]) > ch.v ) { - this->ungetc(); - return best; + break ; } while( chars[ofs] && ch == chars[ofs] ) @@ -208,6 +210,8 @@ signed int Lexer::getSymbol() } catch(Lexer::EndOfFile) { ch = 0; + // Prevent `ungetc` if EOF was hit + hit_eof = true; } ofs ++; } @@ -217,7 +221,10 @@ signed int Lexer::getSymbol() } } - this->ungetc(); + if( !hit_eof ) + { + this->ungetc(); + } return best; } @@ -247,7 +254,9 @@ Token Lexer::realGetToken() while(true) { Token tok = getTokenInt(); - //::std::cout << "getTokenInt: tok = " << tok << ::std::endl; +#ifdef TRACE_RAW_TOKENS + ::std::cout << "getTokenInt: tok = " << tok << ::std::endl; +#endif switch(tok.type()) { case TOK_NEWLINE: @@ -272,7 +281,7 @@ Token Lexer::getTokenInt() { Codepoint ch = this->getc(); - if( ch == '#' && m_line == 1 && m_line_ofs == 1 ) { + if( m_line == 1 && m_line_ofs == 1 && ch == '#') { switch( (ch = this->getc()).v ) { case '!': @@ -871,8 +880,8 @@ uint32_t Lexer::parseEscape(char enclosing) char Lexer::getc_byte() { - char rv = m_istream.get(); - if( m_istream.eof() ) + int rv = m_istream.get(); + if( rv == EOF || m_istream.eof() ) throw Lexer::EndOfFile(); if( rv == '\n' ) @@ -888,13 +897,18 @@ Codepoint Lexer::getc() if( m_last_char_valid ) { m_last_char_valid = false; +#ifdef TRACE_CHARS + ::std::cout << "getc(): U+" << ::std::hex << m_last_char.v << " (cached)" << ::std::endl; +#endif } else { m_last_char = this->getc_cp(); m_line_ofs += 1; +#ifdef TRACE_CHARS + ::std::cout << "getc(): U+" << ::std::hex << m_last_char.v << ::std::endl; +#endif } - //::std::cout << "getc(): '" << m_last_char << "'" << ::std::endl; return m_last_char; } @@ -965,7 +979,9 @@ Codepoint Lexer::getc_cp() void Lexer::ungetc() { -// ::std::cout << "ungetc(): " << m_last_char_valid << " '" << m_last_char << "'" << ::std::endl; +#ifdef TRACE_CHARS + ::std::cout << "ungetc(): cache U+" << ::std::hex << m_last_char.v << ::std::endl; +#endif assert(!m_last_char_valid); m_last_char_valid = true; } diff --git a/src/parse/tokenstream.cpp b/src/parse/tokenstream.cpp index 2975a523..7b8fa532 100644 --- a/src/parse/tokenstream.cpp +++ b/src/parse/tokenstream.cpp @@ -12,6 +12,7 @@ const bool DEBUG_PRINT_TOKENS = false; //const bool DEBUG_PRINT_TOKENS = true; //#define DEBUG_PRINT_TOKENS debug_enabled("Lexer Tokens") +#define FULL_TRACE TokenStream::TokenStream(): m_cache_valid(false) @@ -33,7 +34,9 @@ Token TokenStream::getToken() { if( m_cache_valid ) { - //DEBUG("<<< " << m_cache << " (cache)"); +#ifdef FULL_TRACE + DEBUG("<<< " << m_cache << " (cache)"); +#endif m_cache_valid = false; return mv$(m_cache); } @@ -42,7 +45,9 @@ Token TokenStream::getToken() Token ret = mv$( m_lookahead.front().first ); m_hygiene = m_lookahead.front().second; m_lookahead.erase(m_lookahead.begin()); - //DEBUG("<<< " << ret << " (lookahead)"); +#ifdef FULL_TRACE + DEBUG("<<< " << ret << " (lookahead)"); +#endif if( DEBUG_PRINT_TOKENS ) { ::std::cout << "getToken[" << typeid(*this).name() << "] - " << ret.get_pos() << "-" << ret << ::std::endl; } @@ -52,7 +57,9 @@ Token TokenStream::getToken() { Token ret = this->innerGetToken(); m_hygiene = this->realGetHygiene(); - //DEBUG("<<< " << ret << " (new)"); +#ifdef FULL_TRACE + DEBUG("<<< " << ret << " (new)"); +#endif if( DEBUG_PRINT_TOKENS ) { ::std::cout << "getToken[" << typeid(*this).name() << "] - " << ret.get_pos() << "-" << ret << ::std::endl; } @@ -68,7 +75,9 @@ void TokenStream::putback(Token tok) } else { - //DEBUG(">>> " << tok); +#ifdef FULL_TRACE + DEBUG(">>> " << tok); +#endif m_cache_valid = true; m_cache = mv$(tok); } |