diff options
author | John Hodge <tpg@mutabah.net> | 2018-01-26 21:04:21 +0800 |
---|---|---|
committer | John Hodge <tpg@mutabah.net> | 2018-01-26 21:04:21 +0800 |
commit | daacad86908557e5914d2d6931f3db6d1b75f1d0 (patch) | |
tree | c5bc5bb1d0caec41194acf97db84d71e0d91c724 | |
parent | 0de427d0acb099f83bcef81e819c629b703a6e24 (diff) | |
download | mrust-daacad86908557e5914d2d6931f3db6d1b75f1d0.tar.gz |
standalone_miri - Lots of work with parser.
-rw-r--r-- | tools/standalone_miri/hir_sim.cpp | 53 | ||||
-rw-r--r-- | tools/standalone_miri/hir_sim.hpp | 5 | ||||
-rw-r--r-- | tools/standalone_miri/lex.cpp | 132 | ||||
-rw-r--r-- | tools/standalone_miri/lex.hpp | 8 | ||||
-rw-r--r-- | tools/standalone_miri/module_tree.cpp | 565 | ||||
-rw-r--r-- | tools/standalone_miri/module_tree.hpp | 1 |
6 files changed, 671 insertions, 93 deletions
diff --git a/tools/standalone_miri/hir_sim.cpp b/tools/standalone_miri/hir_sim.cpp index 4072c323..090e0f6b 100644 --- a/tools/standalone_miri/hir_sim.cpp +++ b/tools/standalone_miri/hir_sim.cpp @@ -136,9 +136,25 @@ namespace HIR { break; case TypeWrapper::Ty::Pointer: os << "*"; + switch(it->size) + { + case 2: os << "move "; break; + case 1: os << "mut "; break; + case 0: os << "const "; break; + default: + break; + } break; case TypeWrapper::Ty::Borrow: os << "&"; + switch(it->size) + { + case 2: os << "move "; break; + case 1: os << "mut "; break; + case 0: os << ""; break; + default: + break; + } break; } } @@ -153,9 +169,10 @@ namespace HIR { case RawType::Unreachable: os << "!"; break; - case RawType::Str: - os << "str"; - break; + case RawType::Bool: os << "bool"; break; + case RawType::Char: os << "char"; break; + case RawType::Str: os << "str"; break; + case RawType::U8: os << "u8"; break; case RawType::I8: os << "i8"; break; case RawType::U16: os << "u16"; break; @@ -166,6 +183,8 @@ namespace HIR { case RawType::I64: os << "i64"; break; case RawType::U128: os << "u128"; break; case RawType::I128: os << "i128"; break; + case RawType::USize: os << "usize"; break; + case RawType::ISize: os << "isize"; break; } for(auto it = x.wrappers.rbegin(); it != x.wrappers.rend(); ++it) { @@ -183,6 +202,15 @@ namespace HIR { } return os; } + ::std::ostream& operator<<(::std::ostream& os, const SimplePath& x) + { + os << "::\"" << x.crate_name << "\""; + for(const auto& e : x.ents) + { + os << "::" << e; + } + return os; + } ::std::ostream& operator<<(::std::ostream& os, const ::HIR::PathParams& x) { if( !x.tys.empty() ) @@ -194,8 +222,27 @@ namespace HIR { } return os; } + ::std::ostream& operator<<(::std::ostream& os, const GenericPath& x) + { + os << x.m_simplepath; + os << x.m_params; + return os; + } ::std::ostream& operator<<(::std::ostream& os, const ::HIR::Path& x) { + if( x.m_name == "" ) + { + os << x.m_trait; + } + else + { + os << "<" << x.m_type; + if( x.m_trait != ::HIR::GenericPath() ) + { + os << " as " << x.m_trait; + } + os << ">::" << x.m_name << x.m_params; + } return os; } }
\ No newline at end of file diff --git a/tools/standalone_miri/hir_sim.hpp b/tools/standalone_miri/hir_sim.hpp index 5a9ddf41..1303f077 100644 --- a/tools/standalone_miri/hir_sim.hpp +++ b/tools/standalone_miri/hir_sim.hpp @@ -21,6 +21,7 @@ struct DataType; enum class RawType { Unreachable, + Function, Unit, Bool, @@ -29,6 +30,7 @@ enum class RawType U32, I32, U64, I64, U128, I128, + USize, ISize, F32, F64, @@ -152,6 +154,7 @@ namespace HIR { __LT(ents); return false; } + friend ::std::ostream& operator<<(::std::ostream& os, const SimplePath& x); }; struct PathParams @@ -183,6 +186,8 @@ namespace HIR { __LT(m_params.tys); return false; } + + friend ::std::ostream& operator<<(::std::ostream& os, const GenericPath& x); }; struct Path { diff --git a/tools/standalone_miri/lex.cpp b/tools/standalone_miri/lex.cpp index 7650ec78..a602128a 100644 --- a/tools/standalone_miri/lex.cpp +++ b/tools/standalone_miri/lex.cpp @@ -31,11 +31,40 @@ double Token::real() const return this->numbers.real_val; } +::std::ostream& operator<<(::std::ostream& os, const Token& x) +{ + switch(x.type) + { + case TokenClass::Eof: + os << "-EOF-"; + break; + case TokenClass::Symbol: + os << "Symbol(" << x.strval << ")"; + break; + case TokenClass::Ident: + os << "Ident(" << x.strval << ")"; + break; + case TokenClass::Integer: + os << "Integer(" << x.numbers.int_val << ")"; + break; + case TokenClass::Real: + os << "Real(" << x.numbers.real_val << ")"; + break; + case TokenClass::String: + os << "\"" << x.strval << "\""; + break; + } + return os; +} + Lexer::Lexer(const ::std::string& path): + m_filename(path), m_if(path) { + m_cur_line = 1; if( !m_if.good() ) { + ::std::cerr << "Unable to open file '" << path << "'" << ::std::endl; throw "ERROR"; } @@ -57,21 +86,21 @@ Token Lexer::consume() void Lexer::check(TokenClass tc) { if( next() != tc ) { - ::std::cerr << "Syntax error: Expected token class #" << int(tc) << " - got '" << next().strval << "'" << ::std::endl; + ::std::cerr << *this << "Syntax error: Expected token class #" << int(tc) << " - got '" << next().strval << "'" << ::std::endl; throw "ERROR"; } } void Lexer::check(char ch) { if( next() != ch ) { - ::std::cerr << "Syntax error: Expected '" << ch << "' - got '" << next().strval << "'" << ::std::endl; + ::std::cerr << *this << "Syntax error: Expected '" << ch << "' - got '" << next().strval << "'" << ::std::endl; throw "ERROR"; } } void Lexer::check(const char* s) { if( next() != s ) { - ::std::cerr << "Syntax error: Expected '" << s << "' - got '" << next().strval << "'" << ::std::endl; + ::std::cerr << *this << "Syntax error: Expected '" << s << "' - got '" << next().strval << "'" << ::std::endl; throw "ERROR"; } } @@ -79,17 +108,83 @@ void Lexer::check(const char* s) void Lexer::advance() { char ch; - while( ::std::isblank(ch = m_if.get()) || ch == '\n' || ch == '\r') - ; + do + { + while( ::std::isblank(ch = m_if.get()) || ch == '\n' || ch == '\r') + { + if(ch == '\n') + m_cur_line ++; + } + if( ch == '/' ) + { + if( m_if.get() == '*' ) + { + unsigned level = 0; + while(1) + { + ch = m_if.get(); + if( ch == '\n' ) + m_cur_line ++; + if( ch == '/' ) { + if( m_if.get() == '*' ) { + level ++; + } + else { + m_if.unget(); + } + } + else if( ch == '*' ) { + if( m_if.get() == '/' ) { + if( level == 0 ) { + break; + } + level --; + } + else { + m_if.unget(); + } + } + } + + continue ; + } + else { + m_if.unget(); + } + } + break; + } while(1); //::std::cout << "ch=" << ch << ::std::endl; + + // Special hack to treat #0 as an ident + if( ch == '#' ) + { + ch = m_if.get(); + if( ::std::isdigit(ch) ) + { + ::std::string val = "#"; + while(::std::isdigit(ch)) + { + val.push_back(ch); + ch = m_if.get(); + } + m_if.unget(); + m_cur = Token { TokenClass::Ident, ::std::move(val) }; + return ; + } + + m_if.unget(); + ch = '#'; + } + if( m_if.eof() ) { m_cur = Token { TokenClass::Eof, "" }; } - else if( ::std::isalpha(ch) ) + else if( ::std::isalpha(ch) || ch == '_' ) { ::std::string val; - while(::std::isalnum(ch) || ch == '_') + while(::std::isalnum(ch) || ch == '_' || ch == '#' || ch == '$' ) // Note '#' and '$' is allowed because mrustc them it internally { val.push_back(ch); ch = m_if.get(); @@ -124,6 +219,7 @@ void Lexer::advance() if( ch == '.' || ch == 'p' ) { // Floats! + ::std::cerr << *this << "TODO - Hex floats" << ::std::endl; throw "TODO"; } m_if.unget(); @@ -148,6 +244,7 @@ void Lexer::advance() if( ch == '.' || ch == 'e' ) { // Floats! + ::std::cerr << *this << "TODO: Parse floating point numbers" << ::std::endl; throw "TODO"; } m_if.unget(); @@ -171,8 +268,9 @@ void Lexer::advance() val.push_back( static_cast<char>(::std::strtol(tmp, nullptr, 16)) ); break; } case '"': val.push_back('"'); break; + case '\\': val.push_back('\\'); break; default: - ::std::cerr << "Unexpected escape sequence '\\" << ch << "'" << ::std::endl; + ::std::cerr << *this << "Unexpected escape sequence '\\" << ch << "'" << ::std::endl; throw "ERROR"; } } @@ -203,6 +301,16 @@ void Lexer::advance() case '.': m_cur = Token { TokenClass::Symbol, "." }; break; case ',': m_cur = Token { TokenClass::Symbol, "," }; break; case '=': m_cur = Token { TokenClass::Symbol, "=" }; break; + case '&': m_cur = Token { TokenClass::Symbol, "&" }; break; + case '*': m_cur = Token { TokenClass::Symbol, "*" }; break; + case '/': m_cur = Token { TokenClass::Symbol, "/" }; break; + case '-': m_cur = Token { TokenClass::Symbol, "-" }; break; + case '+': m_cur = Token { TokenClass::Symbol, "+" }; break; + case '^': m_cur = Token { TokenClass::Symbol, "^" }; break; + case '|': m_cur = Token { TokenClass::Symbol, "|" }; break; + case '!': m_cur = Token { TokenClass::Symbol, "!" }; break; + + case '@': m_cur = Token { TokenClass::Symbol, "@" }; break; case '(': m_cur = Token { TokenClass::Symbol, "(" }; break; case ')': m_cur = Token { TokenClass::Symbol, ")" }; break; @@ -213,8 +321,14 @@ void Lexer::advance() case '{': m_cur = Token { TokenClass::Symbol, "{" }; break; case '}': m_cur = Token { TokenClass::Symbol, "}" }; break; default: - ::std::cerr << "Unexpected chracter '" << ch << "'" << ::std::endl; + ::std::cerr << *this << "Unexpected chracter '" << ch << "'" << ::std::endl; throw "ERROR"; } } } + +::std::ostream& operator<<(::std::ostream& os, const Lexer& x) +{ + os << x.m_filename << ":" << x.m_cur_line << ": "; + return os; +} diff --git a/tools/standalone_miri/lex.hpp b/tools/standalone_miri/lex.hpp index c01a23a4..8c785a5f 100644 --- a/tools/standalone_miri/lex.hpp +++ b/tools/standalone_miri/lex.hpp @@ -33,10 +33,14 @@ struct Token uint64_t integer() const; double real() const; + + friend ::std::ostream& operator<<(::std::ostream& os, const Token& x); }; class Lexer { + ::std::string m_filename; + unsigned m_cur_line; ::std::ifstream m_if; Token m_cur; public: @@ -52,6 +56,8 @@ public: bool consume_if(char ch) { if(next() == ch) { consume(); return true; } return false; } bool consume_if(const char* s) { if(next() == s) { consume(); return true; } return false; } + friend ::std::ostream& operator<<(::std::ostream& os, const Lexer& x); + private: void advance(); -};
\ No newline at end of file +}; diff --git a/tools/standalone_miri/module_tree.cpp b/tools/standalone_miri/module_tree.cpp index 27737ea5..34d0376b 100644 --- a/tools/standalone_miri/module_tree.cpp +++ b/tools/standalone_miri/module_tree.cpp @@ -23,6 +23,7 @@ struct Parser ::MIR::Function parse_body(); ::HIR::Path parse_path(); + ::HIR::PathParams parse_pathparams(); ::HIR::GenericPath parse_genericpath(); ::HIR::SimplePath parse_simplepath(); RawType parse_core_type(); @@ -32,6 +33,8 @@ struct Parser void ModuleTree::load_file(const ::std::string& path) { + ::std::cout << "DEBUG: load_file(" << path << ")" << ::std::endl; + //TRACE_FUNCTION_F(path); auto parse = Parser { *this, path }; while(parse.parse_one()) @@ -42,14 +45,30 @@ void ModuleTree::load_file(const ::std::string& path) // Parse a single item from a .mir file bool Parser::parse_one() { + //::std::cout << "DEBUG: parse_one" << ::std::endl; if( lex.next() == "" ) // EOF? { return false; } - if( lex.consume_if("fn") ) + if( lex.consume_if("crate") ) + { + // Import an external crate + lex.check(TokenClass::String); + auto path = ::std::move(lex.next().strval); + lex.consume(); + //::std::cout << "DEBUG: parse_one - crate '" << path << "'" << ::std::endl; + + lex.check_consume(';'); + + + this->tree.load_file(path); + } + else if( lex.consume_if("fn") ) { auto p = parse_path(); + //::std::cout << "DEBUG:p arse_one - fn " << p << ::std::endl; + lex.check_consume('('); ::std::vector<::HIR::TypeRef> arg_tys; while(lex.next() != ')') @@ -70,6 +89,7 @@ bool Parser::parse_one() else if( lex.consume_if("static") ) { auto p = parse_path(); + //::std::cout << "DEBUG: parse_one - static " << p << ::std::endl; lex.check_consume('='); // TODO: Body? Value? //auto body = parse_body(); @@ -79,15 +99,21 @@ bool Parser::parse_one() else if( lex.consume_if("type") ) { auto p = (lex.consume_if('(')) ? parse_tuple() : parse_genericpath(); + //::std::cout << "DEBUG: parse_one - type " << p << ::std::endl; - auto rv = ::std::make_unique<DataType>(); + auto rv = DataType {}; lex.check_consume('{'); lex.check_consume("SIZE"); - rv->size = lex.consume().integer(); + rv.size = lex.consume().integer(); lex.check_consume(','); lex.check_consume("ALIGN"); - rv->alignment = lex.consume().integer(); + rv.alignment = lex.consume().integer(); + if( rv.alignment == 0 ) + { + ::std::cerr << lex << "Alignment of zero is invalid, " << p << ::std::endl; + throw "ERROR"; + } lex.check_consume(';'); // TODO: DST Meta @@ -108,9 +134,10 @@ bool Parser::parse_one() size_t ofs = lex.consume().integer(); lex.check_consume('='); auto ty = parse_type(); - lex.check_consume(','); + lex.check_consume(';'); + //::std::cout << ofs << " " << ty << ::std::endl; - rv->fields.push_back(::std::make_pair(ofs, ::std::move(ty))); + rv.fields.push_back(::std::make_pair(ofs, ::std::move(ty))); } // Variants while(lex.next() == '[') @@ -137,21 +164,35 @@ bool Parser::parse_one() pos += 8; } lex.consume(); - lex.check_consume(','); + lex.check_consume(';'); - rv->variants.push_back({ base_idx, other_idx, v }); + rv.variants.push_back({ base_idx, other_idx, v }); } lex.check_consume('}'); - auto r = this->tree.data_types.insert(::std::make_pair( ::std::move(p), ::std::move(rv) )); - if( !r.second ) + auto it = this->tree.data_types.find(p); + if( it != this->tree.data_types.end() ) { - // Duplicate definition of a type - throw "ERROR"; + if( it->second->alignment == 0 ) + { + *it->second = ::std::move(rv); + } + else + { + ::std::cerr << lex << "Duplicate definition of " << p << ::std::endl; + // Not really an error, can happen when loading crates + //throw "ERROR"; + } + } + else + { + this->tree.data_types.insert(::std::make_pair( ::std::move(p), ::std::make_unique<DataType>(::std::move(rv)) )); } } else { + ::std::cerr << lex << "Unexpected token at root - " << lex.next() << ::std::endl; + // Unknown item type throw "ERROR"; } @@ -169,50 +210,76 @@ bool Parser::parse_one() static ::std::unique_ptr<::MIR::LValue> make_lvp(::MIR::LValue&& lv) { return ::std::unique_ptr<::MIR::LValue>(new ::MIR::LValue(::std::move(lv))); } - static ::MIR::LValue parse_lvalue(Lexer& lex, ::std::vector<::std::string>& var_names) + // + // Parse a LValue + // + static ::MIR::LValue parse_lvalue(Parser& p, ::std::vector<::std::string>& var_names) { + auto& lex = p.lex; int deref = 0; + // Count up leading derefs while(lex.consume_if('*') ) { deref ++; } ::MIR::LValue lv; if( lex.consume_if('(') ) { - lv = parse_lvalue(lex, var_names); + lv = parse_lvalue(p, var_names); lex.check_consume(')'); } else if( lex.next() == TokenClass::Ident ) { auto name = ::std::move(lex.consume().strval); - //::std::cout << "name=" << name << "\n"; + // TODO: Make arguments have custom names too if( name.substr(0,3) == "arg" ) { - auto idx = static_cast<unsigned>( ::std::stol(name.substr(4)) ); - - lv = ::MIR::LValue::make_Argument({ idx }); + try { + auto idx = static_cast<unsigned>( ::std::stol(name.substr(3)) ); + lv = ::MIR::LValue::make_Argument({ idx }); + } + catch(const ::std::exception& e) { + ::std::cerr << lex << "Invalid argument name - " << name << " - " << e.what() << ::std::endl; + throw "ERROR"; + } } + // Hard-coded "RETURN" lvalue else if( name == "RETURN" ) { lv = ::MIR::LValue::make_Return({}); } + // Otherwise, look up variable names else { auto it = ::std::find(var_names.begin(), var_names.end(), name); if( it == var_names.end() ) { + ::std::cerr << lex << "Cannot find variable named '" << name << "'" << ::std::endl; throw "ERROR"; } lv = ::MIR::LValue::make_Local(static_cast<unsigned>(it - var_names.begin())); } } + else if( lex.next() == "::" || lex.next() == '<' ) + { + auto path = p.parse_path(); + lv = ::MIR::LValue( ::std::move(path) ); + } else { + ::std::cerr << lex << "Unexpected token in LValue - " << lex.next() << ::std::endl; throw "ERROR"; } for(;;) { - if( lex.consume_if('.') ) + if( lex.consume_if('@') ) { + lex.check(TokenClass::Integer); + auto idx = static_cast<unsigned>( lex.consume().integer() ); + lv = ::MIR::LValue::make_Downcast({ make_lvp(::std::move(lv)), idx }); + } + else if( lex.consume_if('.') ) + { + lex.check(TokenClass::Integer); auto idx = static_cast<unsigned>( lex.consume().integer() ); lv = ::MIR::LValue::make_Field({ make_lvp(::std::move(lv)), idx }); } else if( lex.next() == '[' ) { lex.consume(); - auto idx_lv = parse_lvalue(lex, var_names); + auto idx_lv = parse_lvalue(p, var_names); lv = ::MIR::LValue::make_Index({ make_lvp(::std::move(lv)), make_lvp(::std::move(idx_lv)) }); lex.check_consume(']'); } @@ -228,15 +295,44 @@ bool Parser::parse_one() return lv; } - static ::MIR::Param parse_param(Parser& p, ::std::vector<::std::string>& var_names) + static ::MIR::Constant parse_const(Parser& p) { if( p.lex.next() == TokenClass::Integer ) { auto v = p.lex.consume().integer(); auto cty = p.parse_core_type(); - return ::MIR::Constant::make_Int({ static_cast<int64_t>(v), cty }); + return ::MIR::Constant::make_Uint({ static_cast<uint64_t>(v), cty }); + } + else if( p.lex.next() == '+' || p.lex.next() == '-' ) { + bool is_neg = (p.lex.consume() == '-'); + auto v = static_cast<int64_t>(p.lex.consume().integer()); + auto cty = p.parse_core_type(); + return ::MIR::Constant::make_Int({ is_neg ? -v : v, cty }); + } + else if( p.lex.consume_if("true") ) { + return ::MIR::Constant::make_Bool({ true }); + } + else if( p.lex.consume_if("false") ) { + return ::MIR::Constant::make_Bool({ false }); + } + else if( p.lex.consume_if("&") ) { + auto path = p.parse_path(); + + return ::MIR::Constant::make_ItemAddr({ ::std::move(path) }); } else { - return parse_lvalue(p.lex, var_names); + ::std::cerr << p.lex << "BUG? " << p.lex.next() << ::std::endl; + throw "ERROR"; + } + } + + // Parse a "Param" (constant or lvalue) + static ::MIR::Param parse_param(Parser& p, ::std::vector<::std::string>& var_names) + { + if( p.lex.next() == TokenClass::Integer || p.lex.next() == '+' || p.lex.next() == '-' || p.lex.next() == '&' || p.lex.next() == "true" || p.lex.next() == "false" ) { + return parse_const(p); + } + else { + return parse_lvalue(p, var_names); } } }; @@ -248,15 +344,13 @@ bool Parser::parse_one() { lex.consume(); auto name = ::std::move(lex.consume().strval); - if(lex.next() == '=') + if(lex.consume_if('=')) { - lex.consume(); rv.drop_flags.push_back(lex.consume().integer() != 0); drop_flag_names.push_back(::std::move(name)); } - else if(lex.next() == ':') + else if(lex.consume_if(':')) { - lex.consume(); var_names.push_back(::std::move(name)); rv.locals.push_back( parse_type() ); } @@ -273,6 +367,11 @@ bool Parser::parse_one() ::std::vector<::MIR::Statement> stmts; ::MIR::Terminator term; + if( lex.next().integer() != rv.blocks.size() ) + { + // TODO: Error. + } + lex.consume(); lex.check_consume(':'); lex.check_consume('{'); @@ -281,37 +380,197 @@ bool Parser::parse_one() lex.check(TokenClass::Ident); if( lex.consume_if("ASSIGN") ) { - auto dst_val = H::parse_lvalue(lex, var_names); + auto dst_val = H::parse_lvalue(*this, var_names); lex.check_consume('='); ::MIR::RValue src_rval; - if( lex.next() == TokenClass::Integer ) { - auto v = lex.consume().integer(); - auto cty = parse_core_type(); - src_rval = ::MIR::Constant::make_Int({ static_cast<int64_t>(v), cty }); + // Literals + if( lex.next() == TokenClass::Integer || lex.next() == '+' || lex.next() == '-' || lex.next() == "true" || lex.next() == "false" ) { + src_rval = H::parse_const(*this); + } + // LValue (prefixed by =) + else if( lex.consume_if('=') ) { + src_rval = H::parse_lvalue(*this, var_names); + } + else if( lex.consume_if('&') ) { + auto bt = ::HIR::BorrowType::Shared; + if( lex.consume_if("move") ) + bt = ::HIR::BorrowType::Move; + else if( lex.consume_if("mut") ) + bt = ::HIR::BorrowType::Unique; + else + ; + auto val = H::parse_lvalue(*this, var_names); + src_rval = ::MIR::RValue::make_Borrow({ 0, bt, ::std::move(val) }); } + // Composites else if( lex.consume_if('(') ) { ::std::vector<::MIR::Param> vals; while( lex.next() != ')' ) { vals.push_back( H::parse_param(*this, var_names) ); - lex.check_consume(','); + if( !lex.consume_if(',') ) + break ; } - lex.consume(); + lex.check_consume(')'); src_rval = ::MIR::RValue::make_Tuple({ ::std::move(vals) }); } - else if( lex.consume_if("USE") ) { - src_rval = H::parse_lvalue(lex, var_names); + else if( lex.consume_if('[') ) { + ::std::vector<::MIR::Param> vals; + if( lex.consume_if(']') ) + { + // Empty array + src_rval = ::MIR::RValue::make_Array({ ::std::move(vals) }); + } + else + { + vals.push_back( H::parse_param(*this, var_names) ); + if( lex.consume_if(';') ) + { + // Sized array + lex.check(TokenClass::Integer); + auto size_val = static_cast<unsigned>(lex.consume().integer()); + lex.check_consume(']'); + + src_rval = ::MIR::RValue::make_SizedArray({ ::std::move(vals[0]), size_val }); + } + else + { + // List array + if( lex.consume_if(',') ) + { + while( lex.next() != ']' ) + { + vals.push_back( H::parse_param(*this, var_names) ); + if( !lex.consume_if(',') ) + break ; + } + } + lex.check_consume(']'); + src_rval = ::MIR::RValue::make_Array({ ::std::move(vals) }); + } + } + } + else if( lex.consume_if('{') ) { + ::std::vector<::MIR::Param> vals; + while( lex.next() != '}' ) + { + vals.push_back( H::parse_param(*this, var_names) ); + if( !lex.consume_if(',') ) + break ; + } + lex.check_consume('}'); + lex.check_consume(':'); + auto p = parse_genericpath(); + + src_rval = ::MIR::RValue::make_Struct({ ::std::move(p), ::std::move(vals) }); + } + else if( lex.consume_if("VARIANT") ) { + auto path = parse_genericpath(); + //auto idx = static_cast<unsigned>(lex.consume_integer()); + lex.check(TokenClass::Integer); + auto idx = static_cast<unsigned>(lex.consume().integer()); + auto val = H::parse_param(*this, var_names); + + src_rval = ::MIR::RValue::make_Variant({ ::std::move(path), idx, ::std::move(val) }); + } + // Operations + else if( lex.consume_if("CAST") ) { + auto lv = H::parse_lvalue(*this, var_names); + lex.check_consume("as"); + auto ty = parse_type(); + src_rval = ::MIR::RValue::make_Cast({ ::std::move(lv), ::std::move(ty) }); + } + else if( lex.consume_if("UNIOP") ) { + + lex.check(TokenClass::Symbol); + ::MIR::eUniOp op; + if( lex.consume_if('!') ) { + op = ::MIR::eUniOp::INV; + } + else if( lex.consume_if('-') ) { + op = ::MIR::eUniOp::NEG; + } + else { + ::std::cerr << lex << "Unexpected token in uniop - " << lex.next() << ::std::endl; + throw "ERROR"; + } + + auto lv = H::parse_lvalue(*this, var_names); + + src_rval = ::MIR::RValue::make_UniOp({ ::std::move(lv), op }); + } + else if( lex.consume_if("BINOP") ) { + auto lv1 = H::parse_param(*this, var_names); + lex.check(TokenClass::Symbol); + auto t = lex.consume(); + ::MIR::eBinOp op; + switch(t.strval[0]) + { + case '+': op = (lex.consume_if('^') ? ::MIR::eBinOp::ADD_OV : ::MIR::eBinOp::ADD); break; + case '-': op = (lex.consume_if('^') ? ::MIR::eBinOp::SUB_OV : ::MIR::eBinOp::SUB); break; + case '*': op = (lex.consume_if('^') ? ::MIR::eBinOp::MUL_OV : ::MIR::eBinOp::MUL); break; + case '/': op = (lex.consume_if('^') ? ::MIR::eBinOp::DIV_OV : ::MIR::eBinOp::DIV); break; + case '|': op = ::MIR::eBinOp::BIT_OR ; break; + case '&': op = ::MIR::eBinOp::BIT_AND; break; + case '^': op = ::MIR::eBinOp::BIT_XOR; break; + case '<': + if( lex.consume_if('<') ) + op = ::MIR::eBinOp::BIT_SHL; + else if( lex.consume_if('=') ) + op = ::MIR::eBinOp::LE; + else + op = ::MIR::eBinOp::LT; + break; + case '>': + if( lex.consume_if('>') ) + op = ::MIR::eBinOp::BIT_SHR; + else if( lex.consume_if('=') ) + op = ::MIR::eBinOp::GE; + else + op = ::MIR::eBinOp::GT; + break; + case '=': + op = ::MIR::eBinOp::EQ; if(0) + case '!': + op = ::MIR::eBinOp::NE; + lex.check_consume('='); + break; + default: + ::std::cerr << lex << "Unexpected token " << t << " in BINOP" << ::std::endl; + throw "ERROR"; + } + auto lv2 = H::parse_param(*this, var_names); + + src_rval = ::MIR::RValue::make_BinOp({ ::std::move(lv1), op, ::std::move(lv2) }); + } + else if( lex.consume_if("MAKEDST") ) { + auto lv_ptr = H::parse_param(*this, var_names); + lex.check_consume(','); + auto lv_meta = H::parse_param(*this, var_names); + src_rval = ::MIR::RValue::make_MakeDst({ ::std::move(lv_ptr), ::std::move(lv_meta) }); + } + else if( lex.consume_if("DSTPTR") ) { + auto lv = H::parse_lvalue(*this, var_names); + src_rval = ::MIR::RValue::make_DstPtr({ ::std::move(lv) }); + } + else if( lex.consume_if("DSTMETA") ) { + auto lv = H::parse_lvalue(*this, var_names); + src_rval = ::MIR::RValue::make_DstMeta({ ::std::move(lv) }); } else { - throw ""; + ::std::cerr << lex << "Unexpected token in RValue - " << lex.next() << ::std::endl; + throw "ERROR"; } stmts.push_back(::MIR::Statement::make_Assign({ ::std::move(dst_val), ::std::move(src_rval) })); } else if( lex.consume_if("SETFLAG") ) { - auto df_it = ::std::find(drop_flag_names.begin(), drop_flag_names.end(), lex.next().strval); + lex.check(TokenClass::Ident); + auto name = ::std::move(lex.consume().strval); + auto df_it = ::std::find(drop_flag_names.begin(), drop_flag_names.end(), name); if( df_it == drop_flag_names.end() ) { + ::std::cerr << lex << "Unable to find drop flag '" << name << "'" << ::std::endl; throw "ERROR"; } auto df_idx = static_cast<unsigned>( df_it - drop_flag_names.begin() ); @@ -326,8 +585,11 @@ bool Parser::parse_one() inv = true; lex.consume(); } - df_it = ::std::find(drop_flag_names.begin(), drop_flag_names.end(), lex.next().strval); + lex.check(TokenClass::Ident); + auto name = ::std::move(lex.consume().strval); + df_it = ::std::find(drop_flag_names.begin(), drop_flag_names.end(), name); if( df_it == drop_flag_names.end() ) { + ::std::cerr << lex << "Unable to find drop flag '" << name << "'" << ::std::endl; throw "ERROR"; } auto other_idx = static_cast<unsigned>( df_it - drop_flag_names.begin() ); @@ -335,9 +597,28 @@ bool Parser::parse_one() stmts.push_back(::MIR::Statement::make_SetDropFlag({ df_idx, inv, other_idx })); } } - else if(lex.next() == "DROP") + else if(lex.consume_if("DROP") ) { - throw "TODO"; + auto slot = H::parse_lvalue(*this, var_names); + auto kind = ::MIR::eDropKind::DEEP; + if( lex.consume_if("SHALLOW") ) + { + kind = ::MIR::eDropKind::SHALLOW; + } + unsigned flag_idx = ~0u; + if( lex.consume_if("IF") ) + { + lex.check(TokenClass::Ident); + auto name = ::std::move(lex.consume().strval); + auto df_it = ::std::find(drop_flag_names.begin(), drop_flag_names.end(), name); + if( df_it == drop_flag_names.end() ) { + ::std::cerr << lex << "Unable to find drop flag '" << name << "'" << ::std::endl; + throw "ERROR"; + } + flag_idx = static_cast<unsigned>( df_it - drop_flag_names.begin() ); + } + + stmts.push_back(::MIR::Statement::make_Drop({ kind, ::std::move(slot), flag_idx })); } else if(lex.next() == "ASM") { @@ -348,6 +629,7 @@ bool Parser::parse_one() break; } lex.check_consume(';'); + //::std::cout << stmts.back() << ::std::endl; } lex.check(TokenClass::Ident); @@ -355,35 +637,86 @@ bool Parser::parse_one() { term = ::MIR::Terminator::make_Goto(static_cast<unsigned>(lex.consume().integer())); } + else if( lex.consume_if("PANIC") ) + { + term = ::MIR::Terminator::make_Panic({ static_cast<unsigned>(lex.consume().integer()) }); + } else if( lex.consume_if("RETURN") ) { term = ::MIR::Terminator::make_Return({}); } - else if(lex.next() == "IF") + else if( lex.consume_if("DIVERGE") ) + { + term = ::MIR::Terminator::make_Diverge({}); + } + else if( lex.consume_if("IF") ) { - auto val = H::parse_lvalue(lex, var_names); + auto val = H::parse_lvalue(*this, var_names); lex.check_consume("goto"); auto tgt_true = static_cast<unsigned>(lex.consume().integer()); lex.check_consume("else"); auto tgt_false = static_cast<unsigned>(lex.consume().integer()); term = ::MIR::Terminator::make_If({ ::std::move(val), tgt_true, tgt_false }); } - else if(lex.next() == "SWITCH") + else if( lex.consume_if("SWITCH") ) { - auto val = H::parse_lvalue(lex, var_names); - throw "TODO"; + auto val = H::parse_lvalue(*this, var_names); + lex.check_consume('{'); + ::std::vector<unsigned> targets; + while(lex.next() != '{') + { + targets.push_back( static_cast<unsigned>(lex.consume().integer()) ); + if( !lex.consume_if(',') ) + break; + } + lex.check_consume('}'); + + term = ::MIR::Terminator::make_Switch({ ::std::move(val), ::std::move(targets) }); } - else if(lex.next() == "SWITCHVAL") + else if( lex.consume_if("SWITCHVAL") ) { - auto val = H::parse_lvalue(lex, var_names); + auto val = H::parse_lvalue(*this, var_names); throw "TODO"; } - else if(lex.next() == "CALL") + else if( lex.consume_if("CALL") ) { - throw "TODO"; + auto dst = H::parse_lvalue(*this, var_names); + lex.check_consume('='); + ::MIR::CallTarget ct; + if(lex.consume_if('(')) { + ct = H::parse_lvalue(*this, var_names); + lex.check_consume(')'); + } + else if( lex.next() == TokenClass::String ) { + auto name = ::std::move(lex.consume().strval); + auto params = parse_pathparams(); + ct = ::MIR::CallTarget::make_Intrinsic({ ::std::move(name), ::std::move(params) }); + } + else { + ct = parse_path(); + } + lex.check_consume('('); + ::std::vector<::MIR::Param> args; + while(lex.next() != ')') + { + args.push_back(H::parse_param(*this, var_names)); + if( !lex.consume_if(',') ) + break; + } + lex.check_consume(')'); + lex.check_consume("goto"); + //auto tgt_idx = lex.consume_integer(); + lex.check(TokenClass::Integer); + auto tgt_block = static_cast<unsigned>(lex.consume().integer()); + lex.check_consume("else"); + lex.check(TokenClass::Integer); + auto panic_block = static_cast<unsigned>(lex.consume().integer()); + + term = ::MIR::Terminator::make_Call({ tgt_block, panic_block, ::std::move(dst), ::std::move(ct), ::std::move(args) }); } else { + ::std::cerr << lex << "Unexpected token at terminator - " << lex.next() << ::std::endl; throw "ERROR"; } @@ -410,34 +743,35 @@ bool Parser::parse_one() lex.check(TokenClass::Ident); auto item_name = ::std::move(lex.consume().strval); - ::HIR::PathParams params; - if( lex.consume_if('<') ) - { - do - { - params.tys.push_back( parse_type() ); - } while( lex.consume_if(',') ); - lex.check_consume('>'); - } - throw "TODO"; + ::HIR::PathParams params = parse_pathparams(); + + return ::HIR::Path( ::std::move(ty), ::std::move(trait), ::std::move(item_name), ::std::move(params) ); } else { return parse_genericpath(); } } -::HIR::GenericPath Parser::parse_genericpath() +::HIR::PathParams Parser::parse_pathparams() { - ::HIR::GenericPath rv; - rv.m_simplepath = parse_simplepath(); + ::HIR::PathParams params; if( lex.consume_if('<') ) { - do + while(lex.next() != '>') { - rv.m_params.tys.push_back( parse_type() ); - } while( lex.consume_if(',') ); + params.tys.push_back( parse_type() ); + if( !lex.consume_if(',') ) + break ; + } lex.check_consume('>'); } + return params; +} +::HIR::GenericPath Parser::parse_genericpath() +{ + ::HIR::GenericPath rv; + rv.m_simplepath = parse_simplepath(); + rv.m_params = parse_pathparams(); return rv; } ::HIR::SimplePath Parser::parse_simplepath() @@ -462,14 +796,17 @@ bool Parser::parse_one() do { gp.m_params.tys.push_back(parse_type()); - lex.check_consume(','); + if( !lex.consume_if(',') ) + break; } while( lex.next() != ')' ); - lex.consume(); + lex.check_consume(')'); return gp; } RawType Parser::parse_core_type() { + //::std::cout << lex.next() << ::std::endl; + lex.check(TokenClass::Ident); auto tok = lex.consume(); // Primitive type. if( tok == "u8" ) { @@ -487,6 +824,9 @@ RawType Parser::parse_core_type() else if( tok == "u128" ) { return RawType::U128; } + else if( tok == "usize" ) { + return RawType::USize; + } else if( tok == "i8" ) { return RawType::I8; } @@ -502,6 +842,9 @@ RawType Parser::parse_core_type() else if( tok == "i128" ) { return RawType::I128; } + else if( tok == "isize" ) { + return RawType::ISize; + } else if( tok == "f32" ) { return RawType::F32; } @@ -511,10 +854,14 @@ RawType Parser::parse_core_type() else if( tok == "bool" ) { return RawType::Bool; } + else if( tok == "char" ) { + return RawType::Char; + } else if( tok == "str" ) { return RawType::Str; } else { + ::std::cerr << lex << "Unknown core type " << tok << "'" << ::std::endl; throw "ERROR"; } } @@ -529,15 +876,20 @@ RawType Parser::parse_core_type() // Tuples! Should point to a composite ::HIR::GenericPath gp = parse_tuple(); - auto rv = this->tree.data_types.find(gp); - if( rv == this->tree.data_types.end() ) + // Look up this type, then create a TypeRef referring to the type in the datastore + // - May need to create an unpopulated type? + auto it = tree.data_types.find(gp); + if( it == tree.data_types.end() ) { - throw "ERROR"; + // TODO: Later on need to check if the type is valid. + auto v = ::std::make_unique<DataType>(DataType {}); + auto ir = tree.data_types.insert(::std::make_pair( ::std::move(gp), ::std::move(v)) ); + it = ir.first; } - - return ::HIR::TypeRef(rv->second.get()); + // Good. + return ::HIR::TypeRef(it->second.get()); } - else if( lex.next() == '[' ) + else if( lex.consume_if('[') ) { auto rv = parse_type(); if( lex.consume_if(';') ) @@ -554,11 +906,11 @@ RawType Parser::parse_core_type() lex.check_consume(']'); return rv; } - else if( lex.next() == '!' ) + else if( lex.consume_if('!') ) { return ::HIR::TypeRef::diverge(); } - else if( lex.next() == '&' ) + else if( lex.consume_if('&') ) { auto bt = ::HIR::BorrowType::Shared; if( lex.consume_if("move") ) @@ -568,10 +920,10 @@ RawType Parser::parse_core_type() else ; // keep as shared auto rv = parse_type(); - rv.wrappers.insert( rv.wrappers.begin(), { TypeWrapper::Ty::Borrow, 0 }); + rv.wrappers.insert( rv.wrappers.begin(), { TypeWrapper::Ty::Borrow, static_cast<size_t>(bt) }); return rv; } - else if( lex.next() == '*' ) + else if( lex.consume_if('*') ) { auto bt = ::HIR::BorrowType::Shared; if( lex.consume_if("move") ) @@ -583,7 +935,7 @@ RawType Parser::parse_core_type() else throw "ERROR"; auto rv = parse_type(); - rv.wrappers.insert( rv.wrappers.begin(), { TypeWrapper::Ty::Pointer, 0 }); + rv.wrappers.insert( rv.wrappers.begin(), { TypeWrapper::Ty::Pointer, static_cast<size_t>(bt) }); return rv; } else if( lex.next() == "::" ) @@ -591,7 +943,59 @@ RawType Parser::parse_core_type() auto path = parse_genericpath(); // Look up this type, then create a TypeRef referring to the type in the datastore // - May need to create an unpopulated type? - throw "TODO"; + auto it = tree.data_types.find(path); + if( it == tree.data_types.end() ) + { + // TODO: Later on need to check if the type is valid. + auto v = ::std::make_unique<DataType>(DataType {}); + auto ir = tree.data_types.insert(::std::make_pair( ::std::move(path), ::std::move(v)) ); + it = ir.first; + } + // Good. + return ::HIR::TypeRef(it->second.get()); + } + else if( lex.next() == "extern" || lex.next() == "fn" ) + { + ::std::string abi = "Rust"; + if( lex.consume_if("extern") ) + { + // TODO: Save the ABI + lex.check(TokenClass::String); + abi = lex.consume().strval; + } + lex.check_consume("fn"); + lex.check_consume('('); + ::std::vector<::HIR::TypeRef> args; + while( lex.next() != ')' ) + { + args.push_back(parse_type()); + if( !lex.consume_if(',') ) + break; + } + lex.check_consume(')'); + lex.check_consume('-'); + lex.check_consume('>'); + auto ret_ty = parse_type(); + return ::HIR::TypeRef(RawType::Function); + // TODO: Use abi/ret_ty/args as part of that + } + else if( lex.consume_if("dyn") ) + { + lex.consume_if('('); + ::HIR::GenericPath base_trait; + if( lex.next() != '+' ) + { + base_trait = parse_genericpath(); + } + ::std::vector<::HIR::GenericPath> markers; + while(lex.consume_if('+')) + { + markers.push_back(parse_genericpath()); + // TODO: Lifetimes? + } + lex.consume_if(')'); + return ::HIR::TypeRef(RawType::TraitObject); + // TODO: Figure out how to include the traits in this type. } else if( lex.next() == TokenClass::Ident ) { @@ -599,6 +1003,7 @@ RawType Parser::parse_core_type() } else { + ::std::cerr << lex << "Unexpected token in type - " << lex.next() << ::std::endl; throw "ERROR"; } } diff --git a/tools/standalone_miri/module_tree.hpp b/tools/standalone_miri/module_tree.hpp index 7475953b..ecc19625 100644 --- a/tools/standalone_miri/module_tree.hpp +++ b/tools/standalone_miri/module_tree.hpp @@ -37,6 +37,7 @@ public: struct DataType { // TODO: Metadata type! (indicates an unsized wrapper) + // TODO: Drop glue size_t alignment; size_t size; |