diff options
Diffstat (limited to 'tools/standalone_miri/lex.cpp')
-rw-r--r-- | tools/standalone_miri/lex.cpp | 220 |
1 files changed, 220 insertions, 0 deletions
diff --git a/tools/standalone_miri/lex.cpp b/tools/standalone_miri/lex.cpp new file mode 100644 index 00000000..7650ec78 --- /dev/null +++ b/tools/standalone_miri/lex.cpp @@ -0,0 +1,220 @@ +// +// +// +#include "lex.hpp" +#include <cctype> +#include <iostream> + +bool Token::operator==(TokenClass tc) const +{ + return this->type == tc; +} +bool Token::operator==(char c) const +{ + return this->strval.size() == 1 && this->strval[0] == c; +} +bool Token::operator==(const char* s) const +{ + return this->strval == s; +} + +uint64_t Token::integer() const +{ + if( this->type != TokenClass::Integer ) + throw ""; + return this->numbers.int_val; +} +double Token::real() const +{ + if( this->type != TokenClass::Real ) + throw ""; + return this->numbers.real_val; +} + +Lexer::Lexer(const ::std::string& path): + m_if(path) +{ + if( !m_if.good() ) + { + throw "ERROR"; + } + + advance(); +} + +const Token& Lexer::next() const +{ + return m_cur; +} +Token Lexer::consume() +{ + auto rv = ::std::move(m_cur); + + advance(); + + return rv; +} +void Lexer::check(TokenClass tc) +{ + if( next() != tc ) { + ::std::cerr << "Syntax error: Expected token class #" << int(tc) << " - got '" << next().strval << "'" << ::std::endl; + throw "ERROR"; + } +} +void Lexer::check(char ch) +{ + if( next() != ch ) { + ::std::cerr << "Syntax error: Expected '" << ch << "' - got '" << next().strval << "'" << ::std::endl; + throw "ERROR"; + } +} +void Lexer::check(const char* s) +{ + if( next() != s ) { + ::std::cerr << "Syntax error: Expected '" << s << "' - got '" << next().strval << "'" << ::std::endl; + throw "ERROR"; + } +} + +void Lexer::advance() +{ + char ch; + while( ::std::isblank(ch = m_if.get()) || ch == '\n' || ch == '\r') + ; + //::std::cout << "ch=" << ch << ::std::endl; + if( m_if.eof() ) + { + m_cur = Token { TokenClass::Eof, "" }; + } + else if( ::std::isalpha(ch) ) + { + ::std::string val; + while(::std::isalnum(ch) || ch == '_') + { + val.push_back(ch); + ch = m_if.get(); + } + m_if.unget(); + m_cur = Token { TokenClass::Ident, ::std::move(val) }; + } + else if( ::std::isdigit(ch) ) + { + if( ch == '0' ) + { + ch = m_if.get(); + if( ch == 'x' ) { + ch = m_if.get(); + if( !::std::isxdigit(ch) ) + throw "ERROR"; + + uint64_t rv = 0; + while(::std::isdigit(ch)) + { + rv *= 16; + if( ch <= '9' ) + rv += ch - '0'; + else if( ch <= 'F' ) + rv += ch - 'A' + 10; + else if( ch <= 'f' ) + rv += ch - 'a' + 10; + else + throw ""; + ch = m_if.get(); + } + if( ch == '.' || ch == 'p' ) + { + // Floats! + throw "TODO"; + } + m_if.unget(); + + m_cur = Token { TokenClass::Integer, "" }; + m_cur.numbers.int_val = rv; + return ; + } + else { + m_if.unget(); + ch = '0'; + } + } + + uint64_t rv = 0; + while(::std::isdigit(ch)) + { + rv *= 10; + rv += ch - '0'; + ch = m_if.get(); + } + if( ch == '.' || ch == 'e' ) + { + // Floats! + throw "TODO"; + } + m_if.unget(); + + m_cur = Token { TokenClass::Integer, "" }; + m_cur.numbers.int_val = rv; + } + else if( ch == '"' ) + { + ::std::string val; + while( (ch = m_if.get()) != '"' ) + { + if( ch == '\\' ) + { + switch( (ch = m_if.get()) ) + { + case '0': val.push_back(0); break; + case 'n': val.push_back(10); break; + case 'x': { + char tmp[3] = { static_cast<char>(m_if.get()), static_cast<char>(m_if.get()), 0}; + val.push_back( static_cast<char>(::std::strtol(tmp, nullptr, 16)) ); + break; } + case '"': val.push_back('"'); break; + default: + ::std::cerr << "Unexpected escape sequence '\\" << ch << "'" << ::std::endl; + throw "ERROR"; + } + } + else + { + val.push_back(ch); + } + } + m_cur = Token { TokenClass::String, ::std::move(val) }; + } + else + { + switch(ch) + { + case ':': + switch(m_if.get()) + { + case ':': + m_cur = Token { TokenClass::Symbol, "::" }; + break; + default: + m_if.unget(); + m_cur = Token { TokenClass::Symbol, ":" }; + break; + } + break; + case ';': m_cur = Token { TokenClass::Symbol, ";" }; break; + case '.': m_cur = Token { TokenClass::Symbol, "." }; break; + case ',': m_cur = Token { TokenClass::Symbol, "," }; break; + case '=': m_cur = Token { TokenClass::Symbol, "=" }; break; + + case '(': m_cur = Token { TokenClass::Symbol, "(" }; break; + case ')': m_cur = Token { TokenClass::Symbol, ")" }; break; + case '<': m_cur = Token { TokenClass::Symbol, "<" }; break; + case '>': m_cur = Token { TokenClass::Symbol, ">" }; break; + case '[': m_cur = Token { TokenClass::Symbol, "[" }; break; + case ']': m_cur = Token { TokenClass::Symbol, "]" }; break; + case '{': m_cur = Token { TokenClass::Symbol, "{" }; break; + case '}': m_cur = Token { TokenClass::Symbol, "}" }; break; + default: + ::std::cerr << "Unexpected chracter '" << ch << "'" << ::std::endl; + throw "ERROR"; + } + } +} |