summaryrefslogtreecommitdiff
path: root/tools/standalone_miri/lex.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tools/standalone_miri/lex.cpp')
-rw-r--r--tools/standalone_miri/lex.cpp132
1 files changed, 123 insertions, 9 deletions
diff --git a/tools/standalone_miri/lex.cpp b/tools/standalone_miri/lex.cpp
index 7650ec78..a602128a 100644
--- a/tools/standalone_miri/lex.cpp
+++ b/tools/standalone_miri/lex.cpp
@@ -31,11 +31,40 @@ double Token::real() const
return this->numbers.real_val;
}
+::std::ostream& operator<<(::std::ostream& os, const Token& x)
+{
+ switch(x.type)
+ {
+ case TokenClass::Eof:
+ os << "-EOF-";
+ break;
+ case TokenClass::Symbol:
+ os << "Symbol(" << x.strval << ")";
+ break;
+ case TokenClass::Ident:
+ os << "Ident(" << x.strval << ")";
+ break;
+ case TokenClass::Integer:
+ os << "Integer(" << x.numbers.int_val << ")";
+ break;
+ case TokenClass::Real:
+ os << "Real(" << x.numbers.real_val << ")";
+ break;
+ case TokenClass::String:
+ os << "\"" << x.strval << "\"";
+ break;
+ }
+ return os;
+}
+
Lexer::Lexer(const ::std::string& path):
+ m_filename(path),
m_if(path)
{
+ m_cur_line = 1;
if( !m_if.good() )
{
+ ::std::cerr << "Unable to open file '" << path << "'" << ::std::endl;
throw "ERROR";
}
@@ -57,21 +86,21 @@ Token Lexer::consume()
void Lexer::check(TokenClass tc)
{
if( next() != tc ) {
- ::std::cerr << "Syntax error: Expected token class #" << int(tc) << " - got '" << next().strval << "'" << ::std::endl;
+ ::std::cerr << *this << "Syntax error: Expected token class #" << int(tc) << " - got '" << next().strval << "'" << ::std::endl;
throw "ERROR";
}
}
void Lexer::check(char ch)
{
if( next() != ch ) {
- ::std::cerr << "Syntax error: Expected '" << ch << "' - got '" << next().strval << "'" << ::std::endl;
+ ::std::cerr << *this << "Syntax error: Expected '" << ch << "' - got '" << next().strval << "'" << ::std::endl;
throw "ERROR";
}
}
void Lexer::check(const char* s)
{
if( next() != s ) {
- ::std::cerr << "Syntax error: Expected '" << s << "' - got '" << next().strval << "'" << ::std::endl;
+ ::std::cerr << *this << "Syntax error: Expected '" << s << "' - got '" << next().strval << "'" << ::std::endl;
throw "ERROR";
}
}
@@ -79,17 +108,83 @@ void Lexer::check(const char* s)
void Lexer::advance()
{
char ch;
- while( ::std::isblank(ch = m_if.get()) || ch == '\n' || ch == '\r')
- ;
+ do
+ {
+ while( ::std::isblank(ch = m_if.get()) || ch == '\n' || ch == '\r')
+ {
+ if(ch == '\n')
+ m_cur_line ++;
+ }
+ if( ch == '/' )
+ {
+ if( m_if.get() == '*' )
+ {
+ unsigned level = 0;
+ while(1)
+ {
+ ch = m_if.get();
+ if( ch == '\n' )
+ m_cur_line ++;
+ if( ch == '/' ) {
+ if( m_if.get() == '*' ) {
+ level ++;
+ }
+ else {
+ m_if.unget();
+ }
+ }
+ else if( ch == '*' ) {
+ if( m_if.get() == '/' ) {
+ if( level == 0 ) {
+ break;
+ }
+ level --;
+ }
+ else {
+ m_if.unget();
+ }
+ }
+ }
+
+ continue ;
+ }
+ else {
+ m_if.unget();
+ }
+ }
+ break;
+ } while(1);
//::std::cout << "ch=" << ch << ::std::endl;
+
+ // Special hack to treat #0 as an ident
+ if( ch == '#' )
+ {
+ ch = m_if.get();
+ if( ::std::isdigit(ch) )
+ {
+ ::std::string val = "#";
+ while(::std::isdigit(ch))
+ {
+ val.push_back(ch);
+ ch = m_if.get();
+ }
+ m_if.unget();
+ m_cur = Token { TokenClass::Ident, ::std::move(val) };
+ return ;
+ }
+
+ m_if.unget();
+ ch = '#';
+ }
+
if( m_if.eof() )
{
m_cur = Token { TokenClass::Eof, "" };
}
- else if( ::std::isalpha(ch) )
+ else if( ::std::isalpha(ch) || ch == '_' )
{
::std::string val;
- while(::std::isalnum(ch) || ch == '_')
+ while(::std::isalnum(ch) || ch == '_' || ch == '#' || ch == '$' ) // Note '#' and '$' is allowed because mrustc them it internally
{
val.push_back(ch);
ch = m_if.get();
@@ -124,6 +219,7 @@ void Lexer::advance()
if( ch == '.' || ch == 'p' )
{
// Floats!
+ ::std::cerr << *this << "TODO - Hex floats" << ::std::endl;
throw "TODO";
}
m_if.unget();
@@ -148,6 +244,7 @@ void Lexer::advance()
if( ch == '.' || ch == 'e' )
{
// Floats!
+ ::std::cerr << *this << "TODO: Parse floating point numbers" << ::std::endl;
throw "TODO";
}
m_if.unget();
@@ -171,8 +268,9 @@ void Lexer::advance()
val.push_back( static_cast<char>(::std::strtol(tmp, nullptr, 16)) );
break; }
case '"': val.push_back('"'); break;
+ case '\\': val.push_back('\\'); break;
default:
- ::std::cerr << "Unexpected escape sequence '\\" << ch << "'" << ::std::endl;
+ ::std::cerr << *this << "Unexpected escape sequence '\\" << ch << "'" << ::std::endl;
throw "ERROR";
}
}
@@ -203,6 +301,16 @@ void Lexer::advance()
case '.': m_cur = Token { TokenClass::Symbol, "." }; break;
case ',': m_cur = Token { TokenClass::Symbol, "," }; break;
case '=': m_cur = Token { TokenClass::Symbol, "=" }; break;
+ case '&': m_cur = Token { TokenClass::Symbol, "&" }; break;
+ case '*': m_cur = Token { TokenClass::Symbol, "*" }; break;
+ case '/': m_cur = Token { TokenClass::Symbol, "/" }; break;
+ case '-': m_cur = Token { TokenClass::Symbol, "-" }; break;
+ case '+': m_cur = Token { TokenClass::Symbol, "+" }; break;
+ case '^': m_cur = Token { TokenClass::Symbol, "^" }; break;
+ case '|': m_cur = Token { TokenClass::Symbol, "|" }; break;
+ case '!': m_cur = Token { TokenClass::Symbol, "!" }; break;
+
+ case '@': m_cur = Token { TokenClass::Symbol, "@" }; break;
case '(': m_cur = Token { TokenClass::Symbol, "(" }; break;
case ')': m_cur = Token { TokenClass::Symbol, ")" }; break;
@@ -213,8 +321,14 @@ void Lexer::advance()
case '{': m_cur = Token { TokenClass::Symbol, "{" }; break;
case '}': m_cur = Token { TokenClass::Symbol, "}" }; break;
default:
- ::std::cerr << "Unexpected chracter '" << ch << "'" << ::std::endl;
+ ::std::cerr << *this << "Unexpected chracter '" << ch << "'" << ::std::endl;
throw "ERROR";
}
}
}
+
+::std::ostream& operator<<(::std::ostream& os, const Lexer& x)
+{
+ os << x.m_filename << ":" << x.m_cur_line << ": ";
+ return os;
+}