summaryrefslogtreecommitdiff
path: root/src/parse/lex.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/parse/lex.cpp')
-rw-r--r--src/parse/lex.cpp34
1 files changed, 25 insertions, 9 deletions
diff --git a/src/parse/lex.cpp b/src/parse/lex.cpp
index 9388fca4..69d2b1c0 100644
--- a/src/parse/lex.cpp
+++ b/src/parse/lex.cpp
@@ -15,6 +15,8 @@
#include <typeinfo>
#include <algorithm> // std::count
#include <cctype>
+#define TRACE_CHARS
+#define TRACE_RAW_TOKENS
Lexer::Lexer(const ::std::string& filename):
m_path(filename.c_str()),
@@ -191,14 +193,14 @@ signed int Lexer::getSymbol()
// 3. IF: a smaller character or, EOS is hit - Return current best
unsigned ofs = 0;
signed int best = 0;
+ bool hit_eof = false;
for(unsigned i = 0; i < LEN(TOKENMAP); i ++)
{
const char* const chars = TOKENMAP[i].chars;
const size_t len = TOKENMAP[i].len;
if( ofs >= len || static_cast<uint32_t>(chars[ofs]) > ch.v ) {
- this->ungetc();
- return best;
+ break ;
}
while( chars[ofs] && ch == chars[ofs] )
@@ -208,6 +210,8 @@ signed int Lexer::getSymbol()
}
catch(Lexer::EndOfFile) {
ch = 0;
+ // Prevent `ungetc` if EOF was hit
+ hit_eof = true;
}
ofs ++;
}
@@ -217,7 +221,10 @@ signed int Lexer::getSymbol()
}
}
- this->ungetc();
+ if( !hit_eof )
+ {
+ this->ungetc();
+ }
return best;
}
@@ -247,7 +254,9 @@ Token Lexer::realGetToken()
while(true)
{
Token tok = getTokenInt();
- //::std::cout << "getTokenInt: tok = " << tok << ::std::endl;
+#ifdef TRACE_RAW_TOKENS
+ ::std::cout << "getTokenInt: tok = " << tok << ::std::endl;
+#endif
switch(tok.type())
{
case TOK_NEWLINE:
@@ -272,7 +281,7 @@ Token Lexer::getTokenInt()
{
Codepoint ch = this->getc();
- if( ch == '#' && m_line == 1 && m_line_ofs == 1 ) {
+ if( m_line == 1 && m_line_ofs == 1 && ch == '#') {
switch( (ch = this->getc()).v )
{
case '!':
@@ -871,8 +880,8 @@ uint32_t Lexer::parseEscape(char enclosing)
char Lexer::getc_byte()
{
- char rv = m_istream.get();
- if( m_istream.eof() )
+ int rv = m_istream.get();
+ if( rv == EOF || m_istream.eof() )
throw Lexer::EndOfFile();
if( rv == '\n' )
@@ -888,13 +897,18 @@ Codepoint Lexer::getc()
if( m_last_char_valid )
{
m_last_char_valid = false;
+#ifdef TRACE_CHARS
+ ::std::cout << "getc(): U+" << ::std::hex << m_last_char.v << " (cached)" << ::std::endl;
+#endif
}
else
{
m_last_char = this->getc_cp();
m_line_ofs += 1;
+#ifdef TRACE_CHARS
+ ::std::cout << "getc(): U+" << ::std::hex << m_last_char.v << ::std::endl;
+#endif
}
- //::std::cout << "getc(): '" << m_last_char << "'" << ::std::endl;
return m_last_char;
}
@@ -965,7 +979,9 @@ Codepoint Lexer::getc_cp()
void Lexer::ungetc()
{
-// ::std::cout << "ungetc(): " << m_last_char_valid << " '" << m_last_char << "'" << ::std::endl;
+#ifdef TRACE_CHARS
+ ::std::cout << "ungetc(): cache U+" << ::std::hex << m_last_char.v << ::std::endl;
+#endif
assert(!m_last_char_valid);
m_last_char_valid = true;
}