diff options
Diffstat (limited to 'tools/standalone_miri/lex.cpp')
-rw-r--r-- | tools/standalone_miri/lex.cpp | 132 |
1 files changed, 123 insertions, 9 deletions
diff --git a/tools/standalone_miri/lex.cpp b/tools/standalone_miri/lex.cpp index 7650ec78..a602128a 100644 --- a/tools/standalone_miri/lex.cpp +++ b/tools/standalone_miri/lex.cpp @@ -31,11 +31,40 @@ double Token::real() const return this->numbers.real_val; } +::std::ostream& operator<<(::std::ostream& os, const Token& x) +{ + switch(x.type) + { + case TokenClass::Eof: + os << "-EOF-"; + break; + case TokenClass::Symbol: + os << "Symbol(" << x.strval << ")"; + break; + case TokenClass::Ident: + os << "Ident(" << x.strval << ")"; + break; + case TokenClass::Integer: + os << "Integer(" << x.numbers.int_val << ")"; + break; + case TokenClass::Real: + os << "Real(" << x.numbers.real_val << ")"; + break; + case TokenClass::String: + os << "\"" << x.strval << "\""; + break; + } + return os; +} + Lexer::Lexer(const ::std::string& path): + m_filename(path), m_if(path) { + m_cur_line = 1; if( !m_if.good() ) { + ::std::cerr << "Unable to open file '" << path << "'" << ::std::endl; throw "ERROR"; } @@ -57,21 +86,21 @@ Token Lexer::consume() void Lexer::check(TokenClass tc) { if( next() != tc ) { - ::std::cerr << "Syntax error: Expected token class #" << int(tc) << " - got '" << next().strval << "'" << ::std::endl; + ::std::cerr << *this << "Syntax error: Expected token class #" << int(tc) << " - got '" << next().strval << "'" << ::std::endl; throw "ERROR"; } } void Lexer::check(char ch) { if( next() != ch ) { - ::std::cerr << "Syntax error: Expected '" << ch << "' - got '" << next().strval << "'" << ::std::endl; + ::std::cerr << *this << "Syntax error: Expected '" << ch << "' - got '" << next().strval << "'" << ::std::endl; throw "ERROR"; } } void Lexer::check(const char* s) { if( next() != s ) { - ::std::cerr << "Syntax error: Expected '" << s << "' - got '" << next().strval << "'" << ::std::endl; + ::std::cerr << *this << "Syntax error: Expected '" << s << "' - got '" << next().strval << "'" << ::std::endl; throw "ERROR"; } } @@ -79,17 +108,83 @@ void Lexer::check(const char* s) void Lexer::advance() { char ch; - while( ::std::isblank(ch = m_if.get()) || ch == '\n' || ch == '\r') - ; + do + { + while( ::std::isblank(ch = m_if.get()) || ch == '\n' || ch == '\r') + { + if(ch == '\n') + m_cur_line ++; + } + if( ch == '/' ) + { + if( m_if.get() == '*' ) + { + unsigned level = 0; + while(1) + { + ch = m_if.get(); + if( ch == '\n' ) + m_cur_line ++; + if( ch == '/' ) { + if( m_if.get() == '*' ) { + level ++; + } + else { + m_if.unget(); + } + } + else if( ch == '*' ) { + if( m_if.get() == '/' ) { + if( level == 0 ) { + break; + } + level --; + } + else { + m_if.unget(); + } + } + } + + continue ; + } + else { + m_if.unget(); + } + } + break; + } while(1); //::std::cout << "ch=" << ch << ::std::endl; + + // Special hack to treat #0 as an ident + if( ch == '#' ) + { + ch = m_if.get(); + if( ::std::isdigit(ch) ) + { + ::std::string val = "#"; + while(::std::isdigit(ch)) + { + val.push_back(ch); + ch = m_if.get(); + } + m_if.unget(); + m_cur = Token { TokenClass::Ident, ::std::move(val) }; + return ; + } + + m_if.unget(); + ch = '#'; + } + if( m_if.eof() ) { m_cur = Token { TokenClass::Eof, "" }; } - else if( ::std::isalpha(ch) ) + else if( ::std::isalpha(ch) || ch == '_' ) { ::std::string val; - while(::std::isalnum(ch) || ch == '_') + while(::std::isalnum(ch) || ch == '_' || ch == '#' || ch == '$' ) // Note '#' and '$' is allowed because mrustc them it internally { val.push_back(ch); ch = m_if.get(); @@ -124,6 +219,7 @@ void Lexer::advance() if( ch == '.' || ch == 'p' ) { // Floats! + ::std::cerr << *this << "TODO - Hex floats" << ::std::endl; throw "TODO"; } m_if.unget(); @@ -148,6 +244,7 @@ void Lexer::advance() if( ch == '.' || ch == 'e' ) { // Floats! + ::std::cerr << *this << "TODO: Parse floating point numbers" << ::std::endl; throw "TODO"; } m_if.unget(); @@ -171,8 +268,9 @@ void Lexer::advance() val.push_back( static_cast<char>(::std::strtol(tmp, nullptr, 16)) ); break; } case '"': val.push_back('"'); break; + case '\\': val.push_back('\\'); break; default: - ::std::cerr << "Unexpected escape sequence '\\" << ch << "'" << ::std::endl; + ::std::cerr << *this << "Unexpected escape sequence '\\" << ch << "'" << ::std::endl; throw "ERROR"; } } @@ -203,6 +301,16 @@ void Lexer::advance() case '.': m_cur = Token { TokenClass::Symbol, "." }; break; case ',': m_cur = Token { TokenClass::Symbol, "," }; break; case '=': m_cur = Token { TokenClass::Symbol, "=" }; break; + case '&': m_cur = Token { TokenClass::Symbol, "&" }; break; + case '*': m_cur = Token { TokenClass::Symbol, "*" }; break; + case '/': m_cur = Token { TokenClass::Symbol, "/" }; break; + case '-': m_cur = Token { TokenClass::Symbol, "-" }; break; + case '+': m_cur = Token { TokenClass::Symbol, "+" }; break; + case '^': m_cur = Token { TokenClass::Symbol, "^" }; break; + case '|': m_cur = Token { TokenClass::Symbol, "|" }; break; + case '!': m_cur = Token { TokenClass::Symbol, "!" }; break; + + case '@': m_cur = Token { TokenClass::Symbol, "@" }; break; case '(': m_cur = Token { TokenClass::Symbol, "(" }; break; case ')': m_cur = Token { TokenClass::Symbol, ")" }; break; @@ -213,8 +321,14 @@ void Lexer::advance() case '{': m_cur = Token { TokenClass::Symbol, "{" }; break; case '}': m_cur = Token { TokenClass::Symbol, "}" }; break; default: - ::std::cerr << "Unexpected chracter '" << ch << "'" << ::std::endl; + ::std::cerr << *this << "Unexpected chracter '" << ch << "'" << ::std::endl; throw "ERROR"; } } } + +::std::ostream& operator<<(::std::ostream& os, const Lexer& x) +{ + os << x.m_filename << ":" << x.m_cur_line << ": "; + return os; +} |