diff options
Diffstat (limited to 'src/parse/lex.cpp')
-rw-r--r-- | src/parse/lex.cpp | 34 |
1 files changed, 25 insertions, 9 deletions
diff --git a/src/parse/lex.cpp b/src/parse/lex.cpp index 9388fca4..69d2b1c0 100644 --- a/src/parse/lex.cpp +++ b/src/parse/lex.cpp @@ -15,6 +15,8 @@ #include <typeinfo> #include <algorithm> // std::count #include <cctype> +#define TRACE_CHARS +#define TRACE_RAW_TOKENS Lexer::Lexer(const ::std::string& filename): m_path(filename.c_str()), @@ -191,14 +193,14 @@ signed int Lexer::getSymbol() // 3. IF: a smaller character or, EOS is hit - Return current best unsigned ofs = 0; signed int best = 0; + bool hit_eof = false; for(unsigned i = 0; i < LEN(TOKENMAP); i ++) { const char* const chars = TOKENMAP[i].chars; const size_t len = TOKENMAP[i].len; if( ofs >= len || static_cast<uint32_t>(chars[ofs]) > ch.v ) { - this->ungetc(); - return best; + break ; } while( chars[ofs] && ch == chars[ofs] ) @@ -208,6 +210,8 @@ signed int Lexer::getSymbol() } catch(Lexer::EndOfFile) { ch = 0; + // Prevent `ungetc` if EOF was hit + hit_eof = true; } ofs ++; } @@ -217,7 +221,10 @@ signed int Lexer::getSymbol() } } - this->ungetc(); + if( !hit_eof ) + { + this->ungetc(); + } return best; } @@ -247,7 +254,9 @@ Token Lexer::realGetToken() while(true) { Token tok = getTokenInt(); - //::std::cout << "getTokenInt: tok = " << tok << ::std::endl; +#ifdef TRACE_RAW_TOKENS + ::std::cout << "getTokenInt: tok = " << tok << ::std::endl; +#endif switch(tok.type()) { case TOK_NEWLINE: @@ -272,7 +281,7 @@ Token Lexer::getTokenInt() { Codepoint ch = this->getc(); - if( ch == '#' && m_line == 1 && m_line_ofs == 1 ) { + if( m_line == 1 && m_line_ofs == 1 && ch == '#') { switch( (ch = this->getc()).v ) { case '!': @@ -871,8 +880,8 @@ uint32_t Lexer::parseEscape(char enclosing) char Lexer::getc_byte() { - char rv = m_istream.get(); - if( m_istream.eof() ) + int rv = m_istream.get(); + if( rv == EOF || m_istream.eof() ) throw Lexer::EndOfFile(); if( rv == '\n' ) @@ -888,13 +897,18 @@ Codepoint Lexer::getc() if( m_last_char_valid ) { m_last_char_valid = false; +#ifdef TRACE_CHARS + ::std::cout << "getc(): U+" << ::std::hex << m_last_char.v << " (cached)" << ::std::endl; +#endif } else { m_last_char = this->getc_cp(); m_line_ofs += 1; +#ifdef TRACE_CHARS + ::std::cout << "getc(): U+" << ::std::hex << m_last_char.v << ::std::endl; +#endif } - //::std::cout << "getc(): '" << m_last_char << "'" << ::std::endl; return m_last_char; } @@ -965,7 +979,9 @@ Codepoint Lexer::getc_cp() void Lexer::ungetc() { -// ::std::cout << "ungetc(): " << m_last_char_valid << " '" << m_last_char << "'" << ::std::endl; +#ifdef TRACE_CHARS + ::std::cout << "ungetc(): cache U+" << ::std::hex << m_last_char.v << ::std::endl; +#endif assert(!m_last_char_valid); m_last_char_valid = true; } |