summaryrefslogtreecommitdiff
path: root/tools/standalone_miri/lex.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tools/standalone_miri/lex.cpp')
-rw-r--r--tools/standalone_miri/lex.cpp216
1 files changed, 187 insertions, 29 deletions
diff --git a/tools/standalone_miri/lex.cpp b/tools/standalone_miri/lex.cpp
index a602128a..48a9e0cd 100644
--- a/tools/standalone_miri/lex.cpp
+++ b/tools/standalone_miri/lex.cpp
@@ -53,6 +53,9 @@ double Token::real() const
case TokenClass::String:
os << "\"" << x.strval << "\"";
break;
+ case TokenClass::ByteString:
+ os << "b\"" << x.strval << "\"";
+ break;
}
return os;
}
@@ -75,6 +78,18 @@ const Token& Lexer::next() const
{
return m_cur;
}
+const Token& Lexer::lookahead()
+{
+ if( !m_next_valid )
+ {
+ auto tmp = ::std::move(m_cur);
+ advance();
+ m_next = ::std::move(m_cur);
+ m_cur = ::std::move(tmp);
+ m_next_valid = true;
+ }
+ return m_next;
+}
Token Lexer::consume()
{
auto rv = ::std::move(m_cur);
@@ -107,6 +122,13 @@ void Lexer::check(const char* s)
void Lexer::advance()
{
+ if( m_next_valid )
+ {
+ m_cur = ::std::move(m_next);
+ m_next_valid = false;
+ return ;
+ }
+
char ch;
do
{
@@ -177,6 +199,20 @@ void Lexer::advance()
ch = '#';
}
+ if(ch == 'b')
+ {
+ ch = m_if.get();
+ if( ch == '"' ) {
+ auto val = this->parse_string();
+ m_cur = Token { TokenClass::ByteString, ::std::move(val) };
+ return ;
+ }
+ else {
+ m_if.unget();
+ }
+ ch = 'b';
+ }
+
if( m_if.eof() )
{
m_cur = Token { TokenClass::Eof, "" };
@@ -203,7 +239,7 @@ void Lexer::advance()
throw "ERROR";
uint64_t rv = 0;
- while(::std::isdigit(ch))
+ while(::std::isxdigit(ch))
{
rv *= 16;
if( ch <= '9' )
@@ -218,9 +254,65 @@ void Lexer::advance()
}
if( ch == '.' || ch == 'p' )
{
+ uint64_t frac = 0;
+ if( ch == '.' )
+ {
+ ch = m_if.get();
+ int pos = 0;
+ while(::std::isxdigit(ch))
+ {
+ frac *= 16;
+ if( ch <= '9' )
+ frac += ch - '0';
+ else if( ch <= 'F' )
+ frac += ch - 'A' + 10;
+ else if( ch <= 'f' )
+ frac += ch - 'a' + 10;
+ else
+ throw "";
+ pos ++;
+ ch = m_if.get();
+ }
+ while(pos < 52/4)
+ {
+ frac *= 16;
+ pos ++;
+ }
+ }
+ int exp = 0;
+ if( ch == 'p' )
+ {
+ ch = m_if.get();
+ bool neg = false;
+ if( ch == '-' ) {
+ neg = true;
+ ch = m_if.get();
+ }
+ if( !::std::isdigit(ch) )
+ throw "ERROR";
+ while(::std::isdigit(ch))
+ {
+ exp *= 10;
+ exp += ch - '0';
+ ch = m_if.get();
+ }
+ if(neg)
+ exp = -exp;
+ }
// Floats!
- ::std::cerr << *this << "TODO - Hex floats" << ::std::endl;
- throw "TODO";
+ //::std::cerr << *this << "TODO - Hex floats - " << rv << "." << frac << "p" << exp << ::std::endl;
+ if( rv != 1 ) {
+ ::std::cerr << *this << "Invalid hex float literal, whole component must be 1" << ::std::endl;
+ throw "ERROR";
+ }
+ if( frac >= (1ull << 52) ) {
+ ::std::cerr << *this << "Invalid hex float literal, fractional component is more than 52 bits" << ::std::endl;
+ throw "ERROR";
+ }
+ uint64_t vi = (static_cast<uint64_t>(exp) << 52) | frac;
+ m_cur = Token { TokenClass::Real, "" };
+ m_cur.numbers.real_val = *reinterpret_cast<const double*>(&vi);
+ return ;
}
m_if.unget();
@@ -254,31 +346,7 @@ void Lexer::advance()
}
else if( ch == '"' )
{
- ::std::string val;
- while( (ch = m_if.get()) != '"' )
- {
- if( ch == '\\' )
- {
- switch( (ch = m_if.get()) )
- {
- case '0': val.push_back(0); break;
- case 'n': val.push_back(10); break;
- case 'x': {
- char tmp[3] = { static_cast<char>(m_if.get()), static_cast<char>(m_if.get()), 0};
- val.push_back( static_cast<char>(::std::strtol(tmp, nullptr, 16)) );
- break; }
- case '"': val.push_back('"'); break;
- case '\\': val.push_back('\\'); break;
- default:
- ::std::cerr << *this << "Unexpected escape sequence '\\" << ch << "'" << ::std::endl;
- throw "ERROR";
- }
- }
- else
- {
- val.push_back(ch);
- }
- }
+ auto val = this->parse_string();
m_cur = Token { TokenClass::String, ::std::move(val) };
}
else
@@ -304,6 +372,7 @@ void Lexer::advance()
case '&': m_cur = Token { TokenClass::Symbol, "&" }; break;
case '*': m_cur = Token { TokenClass::Symbol, "*" }; break;
case '/': m_cur = Token { TokenClass::Symbol, "/" }; break;
+ case '%': m_cur = Token { TokenClass::Symbol, "%" }; break;
case '-': m_cur = Token { TokenClass::Symbol, "-" }; break;
case '+': m_cur = Token { TokenClass::Symbol, "+" }; break;
case '^': m_cur = Token { TokenClass::Symbol, "^" }; break;
@@ -314,7 +383,19 @@ void Lexer::advance()
case '(': m_cur = Token { TokenClass::Symbol, "(" }; break;
case ')': m_cur = Token { TokenClass::Symbol, ")" }; break;
- case '<': m_cur = Token { TokenClass::Symbol, "<" }; break;
+ case '<':
+ // Combine << (note, doesn't need to happen for >>)
+ ch = m_if.get();
+ if( ch == '<' )
+ {
+ m_cur = Token { TokenClass::Symbol, "<<" };
+ }
+ else
+ {
+ m_if.unget();
+ m_cur = Token { TokenClass::Symbol, "<" };
+ }
+ break;
case '>': m_cur = Token { TokenClass::Symbol, ">" }; break;
case '[': m_cur = Token { TokenClass::Symbol, "[" }; break;
case ']': m_cur = Token { TokenClass::Symbol, "]" }; break;
@@ -326,6 +407,83 @@ void Lexer::advance()
}
}
}
+::std::string Lexer::parse_string()
+{
+ ::std::string val;
+ char ch;
+ while( (ch = m_if.get()) != '"' )
+ {
+ if( ch == '\\' )
+ {
+ switch( (ch = m_if.get()) )
+ {
+ case '0': val.push_back(0); break;
+ case 'n': val.push_back(10); break;
+ case 'x': {
+ char tmp[3] = { static_cast<char>(m_if.get()), static_cast<char>(m_if.get()), 0};
+ val.push_back( static_cast<char>(::std::strtol(tmp, nullptr, 16)) );
+ } break;
+ case 'u': {
+ ch = m_if.get();
+ if( ch != '{' ) {
+ ::std::cerr << *this << "Unexpected character in unicode escape - '" << ch << "'" << ::std::endl;
+ throw "ERROR";
+ }
+ ch = m_if.get();
+ uint32_t v = 0;
+ do {
+ if( !isxdigit(ch) ) {
+ ::std::cerr << *this << "Unexpected character in unicode escape - '" << ch << "'" << ::std::endl;
+ throw "ERROR";
+ }
+ v *= 16;
+ if( ch <= '9' )
+ v += ch - '0';
+ else if( ch <= 'F' )
+ v += ch - 'A' + 10;
+ else if( ch <= 'f' )
+ v += ch - 'a' + 10;
+ else
+ throw "";
+ ch = m_if.get();
+ } while(ch != '}');
+
+ if( v < 0x80 ) {
+ val.push_back(static_cast<char>(v));
+ }
+ else if( v < (0x1F+1)<<(1*6) ) {
+ val += (char)(0xC0 | ((v >> 6) & 0x1F));
+ val += (char)(0x80 | ((v >> 0) & 0x3F));
+ }
+ else if( v < (0x0F+1)<<(2*6) ) {
+ val += (char)(0xE0 | ((v >> 12) & 0x0F));
+ val += (char)(0x80 | ((v >> 6) & 0x3F));
+ val += (char)(0x80 | ((v >> 0) & 0x3F));
+ }
+ else if( v < (0x07+1)<<(3*6) ) {
+ val += (char)(0xF0 | ((v >> 18) & 0x07));
+ val += (char)(0x80 | ((v >> 12) & 0x3F));
+ val += (char)(0x80 | ((v >> 6) & 0x3F));
+ val += (char)(0x80 | ((v >> 0) & 0x3F));
+ }
+ else {
+ throw "";
+ }
+ } break;
+ case '"': val.push_back('"'); break;
+ case '\\': val.push_back('\\'); break;
+ default:
+ ::std::cerr << *this << "Unexpected escape sequence '\\" << ch << "'" << ::std::endl;
+ throw "ERROR";
+ }
+ }
+ else
+ {
+ val.push_back(ch);
+ }
+ }
+ return val;
+}
::std::ostream& operator<<(::std::ostream& os, const Lexer& x)
{