From 6d7c84bd9d8d5e2bec8bc86d22d53975bde6e833 Mon Sep 17 00:00:00 2001 From: John Hodge Date: Fri, 6 Mar 2015 19:55:17 +0800 Subject: Bugfixes exposed by fixing formatted output --- src/ast/ast.hpp | 1 + src/ast/expr.cpp | 111 ++-- src/ast/expr.hpp | 83 ++- src/ast/path.cpp | 38 +- src/common.hpp | 115 +--- src/convert/resolve.cpp | 16 +- src/convert/typecheck_expr.cpp | 2 +- src/convert/typecheck_params.cpp | 2 +- src/dump_as_rust.cpp | 268 +++++++- src/include/debug.hpp | 26 + src/include/rustic.hpp | 100 +++ src/main.cpp | 23 +- src/parse/lex.cpp | 1299 +++++++++++++++++++------------------- 13 files changed, 1225 insertions(+), 859 deletions(-) create mode 100644 src/include/debug.hpp create mode 100644 src/include/rustic.hpp (limited to 'src') diff --git a/src/ast/ast.hpp b/src/ast/ast.hpp index 261deb47..58bcd389 100644 --- a/src/ast/ast.hpp +++ b/src/ast/ast.hpp @@ -483,6 +483,7 @@ public: const itemlist_use_t& imports() const { return m_imports; } const ::std::vector >& type_aliases() const { return m_type_aliases; } const itemlist_ext_t& extern_crates() const { return m_extern_crates; } + const ::std::vector& impls() const { return m_impls; } const itemlist_static_t& statics() const { return m_statics; } const ItemList& traits() const { return m_traits; } const itemlist_enum_t& enums () const { return m_enums; } diff --git a/src/ast/expr.cpp b/src/ast/expr.cpp index 91f77633..b4cd9240 100644 --- a/src/ast/expr.cpp +++ b/src/ast/expr.cpp @@ -6,8 +6,16 @@ namespace AST { void Expr::visit_nodes(NodeVisitor& v) { + assert(!!m_node); m_node->visit(v); } +void Expr::visit_nodes(NodeVisitor& v) const +{ + assert(!!m_node); + assert(v.is_const()); + //const_cast(m_node.get())->visit(v); + m_node->visit(v); +} ::std::ostream& operator<<(::std::ostream& os, const Expr& pat) { if( pat.m_node.get() ) @@ -72,7 +80,11 @@ SERIALISE_TYPE(Expr::, "Expr", { ExprNode::~ExprNode() { } -#define NODE(class, serialise, _print) void class::visit(NodeVisitor& nv) { nv.visit(*this); } SERIALISE_TYPE_S(class, serialise) void class::print(::std::ostream& os) const _print +#define NODE(class, serialise, _print)\ + void class::visit(NodeVisitor& nv) { nv.visit(*this); } \ + /*void class::visit(NodeVisitor& nv) const { nv.visit(*this); }*/ \ + void class::print(::std::ostream& os) const _print \ + SERIALISE_TYPE_S(class, serialise) \ ExprNode_Block::~ExprNode_Block() { } @@ -261,127 +273,114 @@ NODE(ExprNode_BinOp, { }) -void NodeVisitor::visit(ExprNode_Block& node) -{ - DEBUG("DEF - ExprNode_Block"); +#define NV(type, actions)\ + void NodeVisitorDef::visit(type& node) { DEBUG("DEF - "#type); actions } +// void NodeVisitorDef::visit(const type& node) { DEBUG("DEF - "#type" (const)"); actions } + +NV(ExprNode_Block, { INDENT(); for( auto& child : node.m_nodes ) visit(child); UNINDENT(); -} -void NodeVisitor::visit(ExprNode_Macro& node) +}) +NV(ExprNode_Macro, { - DEBUG("DEF - ExprNode_Macro"); -} -void NodeVisitor::visit(ExprNode_Return& node) + DEBUG("TODO: Macro"); +}) +NV(ExprNode_Return, { - DEBUG("DEF - ExprNode_Return"); visit(node.m_value); -} -void NodeVisitor::visit(ExprNode_LetBinding& node) +}) +NV(ExprNode_LetBinding, { - DEBUG("DEF - ExprNode_LetBinding"); // TODO: Handle recurse into Let pattern visit(node.m_value); -} -void NodeVisitor::visit(ExprNode_Assign& node) +}) +NV(ExprNode_Assign, { - DEBUG("DEF - ExprNode_Assign"); INDENT(); visit(node.m_slot); visit(node.m_value); UNINDENT(); -} -void NodeVisitor::visit(ExprNode_CallPath& node) +}) +NV(ExprNode_CallPath, { - DEBUG("DEF - ExprNode_CallPath"); INDENT(); for( auto& arg : node.m_args ) visit(arg); UNINDENT(); -} -void NodeVisitor::visit(ExprNode_CallMethod& node) +}) +NV(ExprNode_CallMethod, { - DEBUG("DEF - ExprNode_CallMethod"); INDENT(); visit(node.m_val); for( auto& arg : node.m_args ) visit(arg); UNINDENT(); -} -void NodeVisitor::visit(ExprNode_CallObject& node) +}) +NV(ExprNode_CallObject, { - DEBUG("DEF - ExprNode_CallObject"); INDENT(); visit(node.m_val); for( auto& arg : node.m_args ) visit(arg); UNINDENT(); -} -void NodeVisitor::visit(ExprNode_Match& node) +}) +NV(ExprNode_Match, { - DEBUG("DEF - ExprNode_Match"); INDENT(); visit(node.m_val); for( auto& arm : node.m_arms ) visit(arm.second); UNINDENT(); -} -void NodeVisitor::visit(ExprNode_If& node) +}) +NV(ExprNode_If, { - DEBUG("DEF - ExprNode_If"); INDENT(); visit(node.m_cond); visit(node.m_true); visit(node.m_false); UNINDENT(); -} +}) -void NodeVisitor::visit(ExprNode_Integer& node) +NV(ExprNode_Integer, { - DEBUG("DEF - ExprNode_Integer"); // LEAF -} -void NodeVisitor::visit(ExprNode_StructLiteral& node) +}) +NV(ExprNode_StructLiteral, { - DEBUG("DEF - ExprNode_StructLiteral"); visit(node.m_base_value); for( auto& val : node.m_values ) visit(val.second); -} -void NodeVisitor::visit(ExprNode_Tuple& node) +}) +NV(ExprNode_Tuple, { - DEBUG("DEF - ExprNode_Tuple"); for( auto& val : node.m_values ) visit(val); -} -void NodeVisitor::visit(ExprNode_NamedValue& node) +}) +NV(ExprNode_NamedValue, { - DEBUG("DEF - ExprNode_NamedValue"); // LEAF -} +}) -void NodeVisitor::visit(ExprNode_Field& node) +NV(ExprNode_Field, { - DEBUG("DEF - ExprNode_Field"); visit(node.m_obj); -} -void NodeVisitor::visit(ExprNode_Deref& node) +}) +NV(ExprNode_Deref, { - DEBUG("DEF - ExprNode_Deref"); visit(node.m_value); -} -void NodeVisitor::visit(ExprNode_Cast& node) +}) +NV(ExprNode_Cast, { - DEBUG("DEF - ExprNode_Cast"); visit(node.m_value); -} -void NodeVisitor::visit(ExprNode_BinOp& node) +}) +NV(ExprNode_BinOp, { - DEBUG("DEF - ExprNode_BinOp"); visit(node.m_left); visit(node.m_right); -} +}) +#undef NV }; diff --git a/src/ast/expr.hpp b/src/ast/expr.hpp index 2744b75e..bbb82bde 100644 --- a/src/ast/expr.hpp +++ b/src/ast/expr.hpp @@ -25,6 +25,7 @@ public: virtual ~ExprNode() = 0; virtual void visit(NodeVisitor& nv) = 0; + //virtual void visit(NodeVisitor& nv) const = 0; virtual void print(::std::ostream& os) const = 0; TypeRef& get_res_type() { return m_res_type; } @@ -33,7 +34,11 @@ public: static ::std::unique_ptr from_deserialiser(Deserialiser& d); }; -#define NODE_METHODS() virtual void visit(NodeVisitor& nv) override; virtual void print(::std::ostream& os) const override; SERIALISABLE_PROTOTYPES(); +#define NODE_METHODS() \ + virtual void visit(NodeVisitor& nv) override;\ + virtual void print(::std::ostream& os) const override; \ + SERIALISABLE_PROTOTYPES();/* \ + virtual void visit(NodeVisitor& nv) const override;*/ struct ExprNode_Block: public ExprNode @@ -336,27 +341,65 @@ public: if(cnode.get()) cnode->visit(*this); } + virtual bool is_const() const { return false; } + + #define NT(nt) \ + virtual void visit(nt& node) = 0/*; \ + virtual void visit(const nt& node) = 0*/ + NT(ExprNode_Block); + NT(ExprNode_Macro); + NT(ExprNode_Return); + NT(ExprNode_LetBinding); + NT(ExprNode_Assign); + NT(ExprNode_CallPath); + NT(ExprNode_CallMethod); + NT(ExprNode_CallObject); + NT(ExprNode_Match); + NT(ExprNode_If); - virtual void visit(ExprNode_Block& node); - virtual void visit(ExprNode_Macro& node); - virtual void visit(ExprNode_Return& node); - virtual void visit(ExprNode_LetBinding& node); - virtual void visit(ExprNode_Assign& node); - virtual void visit(ExprNode_CallPath& node); - virtual void visit(ExprNode_CallMethod& node); - virtual void visit(ExprNode_CallObject& node); - virtual void visit(ExprNode_Match& node); - virtual void visit(ExprNode_If& node); + NT(ExprNode_Integer); + NT(ExprNode_StructLiteral); + NT(ExprNode_Tuple); + NT(ExprNode_NamedValue); - virtual void visit(ExprNode_Integer& node); - virtual void visit(ExprNode_StructLiteral& node); - virtual void visit(ExprNode_Tuple& node); - virtual void visit(ExprNode_NamedValue& node); + NT(ExprNode_Field); + NT(ExprNode_Deref); + NT(ExprNode_Cast); + NT(ExprNode_BinOp); + #undef NT +}; +class NodeVisitorDef: + public NodeVisitor +{ +public: + inline void visit(const unique_ptr& cnode) { + if(cnode.get()) + cnode->visit(*this); + } + #define NT(nt) \ + virtual void visit(nt& node) override;/* \ + virtual void visit(const nt& node) override*/ + NT(ExprNode_Block); + NT(ExprNode_Macro); + NT(ExprNode_Return); + NT(ExprNode_LetBinding); + NT(ExprNode_Assign); + NT(ExprNode_CallPath); + NT(ExprNode_CallMethod); + NT(ExprNode_CallObject); + NT(ExprNode_Match); + NT(ExprNode_If); + + NT(ExprNode_Integer); + NT(ExprNode_StructLiteral); + NT(ExprNode_Tuple); + NT(ExprNode_NamedValue); - virtual void visit(ExprNode_Field& node); - virtual void visit(ExprNode_Deref& node); - virtual void visit(ExprNode_Cast& node); - virtual void visit(ExprNode_BinOp& node); + NT(ExprNode_Field); + NT(ExprNode_Deref); + NT(ExprNode_Cast); + NT(ExprNode_BinOp); + #undef NT }; class Expr: @@ -379,8 +422,10 @@ public: bool is_valid() const { return m_node.get() != nullptr; } ExprNode& node() { assert(m_node.get()); return *m_node; } + const ExprNode& node() const { assert(m_node.get()); return *m_node; } ::std::shared_ptr take_node() { assert(m_node.get()); return ::std::move(m_node); } void visit_nodes(NodeVisitor& v); + void visit_nodes(NodeVisitor& v) const; friend ::std::ostream& operator<<(::std::ostream& os, const Expr& pat); diff --git a/src/ast/path.cpp b/src/ast/path.cpp index cd068694..f3fc5e82 100644 --- a/src/ast/path.cpp +++ b/src/ast/path.cpp @@ -30,9 +30,6 @@ bool PathNode::operator==(const PathNode& x) const return m_name == x.m_name && m_params == x.m_params; } ::std::ostream& operator<<(::std::ostream& os, const PathNode& pn) { - #if PRETTY_PATH_PRINT - os << "::"; - #endif os << pn.m_name; if( pn.m_params.size() ) { @@ -66,6 +63,8 @@ void Path::resolve(const Crate& root_crate) throw ParseError::BugCheck("Calling Path::resolve on non-absolute path"); DEBUG("m_crate = '" << m_crate << "'"); + unsigned int slice_from = 0; // Used when rewriting the path to be relative to its crate root + const Module* mod = &root_crate.get_root_module(m_crate); for(unsigned int i = 0; i < m_nodes.size(); i ++ ) { @@ -97,6 +96,8 @@ void Path::resolve(const Crate& root_crate) DEBUG("Extern crate '" << node.name() << "' = '" << it->data << "'"); if( node.args().size() ) throw ParseError::Generic("Generic params applied to extern crate"); + m_crate = it->data; + slice_from = i+1; mod = &root_crate.get_root_module(it->data); continue; } @@ -127,7 +128,7 @@ void Path::resolve(const Crate& root_crate) if( is_last ) { m_binding_type = ALIAS; m_binding.alias_ = &it->data; - return ; + goto ret; } else { throw ParseError::Todo("Path::resolve() type method"); @@ -145,7 +146,7 @@ void Path::resolve(const Crate& root_crate) if( is_last ) { m_binding_type = FUNCTION; m_binding.func_ = &it->data; - return ; + goto ret; } else { throw ParseError::Generic("Import of function, too many extra nodes"); @@ -163,7 +164,7 @@ void Path::resolve(const Crate& root_crate) if( is_last ) { m_binding_type = TRAIT; m_binding.trait_ = &it->data; - return; + goto ret; } else if( is_sec_last ) { throw ParseError::Todo("Path::resolve() trait method"); @@ -182,7 +183,7 @@ void Path::resolve(const Crate& root_crate) DEBUG("Found struct"); if( is_last ) { bind_struct(it->data, node.args()); - return; + goto ret; } else if( is_sec_last ) { throw ParseError::Todo("Path::resolve() struct method"); @@ -201,11 +202,11 @@ void Path::resolve(const Crate& root_crate) DEBUG("Found enum"); if( is_last ) { bind_enum(it->data, node.args()); - return ; + goto ret; } else if( is_sec_last ) { bind_enum_var(it->data, m_nodes[i+1].name(), node.args()); - return ; + goto ret; } else { throw ParseError::Generic("Binding path to enum, too many extra nodes"); @@ -223,7 +224,7 @@ void Path::resolve(const Crate& root_crate) if( node.args().size() ) throw ParseError::Generic("Unexpected generic params on static/const"); bind_static(it->data); - return ; + goto ret; } else { throw ParseError::Generic("Binding path to static, trailing nodes"); @@ -236,6 +237,13 @@ void Path::resolve(const Crate& root_crate) // We only reach here if the path points to a module bind_module(*mod); +ret: + if( slice_from > 0 ) + { + DEBUG("Removing " << slice_from << " nodes to rebase path to crate root"); + m_nodes.erase(m_nodes.begin(), m_nodes.begin()+slice_from); + } + return ; } void Path::bind_module(const Module& mod) { @@ -340,12 +348,22 @@ void Path::print_pretty(::std::ostream& os) const case Path::RELATIVE: os << "self"; for(const auto& n : path.m_nodes) + { + #if PRETTY_PATH_PRINT + os << "::"; + #endif os << n; + } break; case Path::ABSOLUTE: os << "{"< #include -extern int g_debug_indent_level; - #define FMT(ss) (dynamic_cast< ::std::stringstream&>(::std::stringstream() << ss).str()) -#define INDENT() do { g_debug_indent_level += 1; } while(0) -#define UNINDENT() do { g_debug_indent_level -= 1; } while(0) -#define DEBUG(ss) do{ ::std::cerr << ::RepeatLitStr{" ", g_debug_indent_level} << __FUNCTION__ << ": " << ss << ::std::endl; } while(0) - -struct RepeatLitStr -{ - const char *s; - int n; - - friend ::std::ostream& operator<<(::std::ostream& os, const RepeatLitStr& r) { - for(int i = 0; i < r.n; i ++ ) - os << r.s; - return os; - } -}; - -template -class slice -{ - T* m_first; - unsigned int m_len; -public: - slice(::std::vector& v): - m_first(&v[0]), - m_len(v.size()) - {} - - ::std::vector to_vec() const { - return ::std::vector(begin(), end()); - } - - unsigned int size() const { - return m_len; - } - T& operator[](unsigned int i) const { - assert(i < m_len); - return m_first[i]; - } - - T* begin() const { return m_first; } - T* end() const { return m_first + m_len; } -}; - -template -::std::ostream& operator<<(::std::ostream& os, slice s) { - if( s.size() > 0 ) - { - bool is_first = true; - for( const auto& i : s ) - { - if(!is_first) - os << ", "; - is_first = false; - os << i; - } - } - return os; -} - -namespace rust { - -template -class option -{ - bool m_set; - T m_data; -public: - option(T ent): - m_set(true), - m_data( ::std::move(ent) ) - {} - option(): - m_set(false) - {} - - bool is_none() const { return !m_set; } - bool is_some() const { return m_set; } - - const T& unwrap() const { - assert(is_some()); - return m_data; - } -}; -template -class option -{ - T* m_ptr; -public: - option(T& ent): - m_ptr(&ent) - {} - option(): - m_ptr(nullptr) - {} - - bool is_none() const { return m_ptr == nullptr; } - bool is_some() const { return m_ptr != nullptr; } - T& unwrap() const { - assert(is_some()); - return *m_ptr; - } -}; -template -option Some(T data) { - return option( ::std::move(data) ); -} -template -option None() { - return option( ); -} -}; +#include "include/debug.hpp" +#include "include/rustic.hpp" // slice and option namespace std { diff --git a/src/convert/resolve.cpp b/src/convert/resolve.cpp index 3c10a133..cba7d375 100644 --- a/src/convert/resolve.cpp +++ b/src/convert/resolve.cpp @@ -70,7 +70,7 @@ public: void ResolvePaths(AST::Crate& crate); class CResolvePaths_NodeVisitor: - public AST::NodeVisitor + public AST::NodeVisitorDef { CPathResolver& m_res; public: @@ -92,8 +92,8 @@ public: m_res.handle_path(node.m_path, CASTIterator::MODE_EXPR); } void visit(AST::ExprNode_CallPath& node) { - DEBUG("ExprNode_CallPath"); - AST::NodeVisitor::visit(node); + DEBUG("ExprNode_CallPath - " << node); + AST::NodeVisitorDef::visit(node); m_res.handle_path(node.m_path, CASTIterator::MODE_EXPR); } @@ -171,11 +171,13 @@ void CPathResolver::handle_path(AST::Path& path, CASTIterator::PathMode mode) if( path.is_absolute() ) { DEBUG("Absolute - binding"); + INDENT(); // Already absolute, our job is done // - However, if the path isn't bound, bind it if( !path.is_bound() ) { path.resolve(m_crate); } + UNINDENT(); } else if( path.is_relative() ) { @@ -492,17 +494,23 @@ void ResolvePaths(AST::Crate& crate) { DEBUG(" >>>"); // Pre-process external crates to tag all paths + DEBUG(" --- Extern crates"); + INDENT(); for(auto& ec : crate.extern_crates()) { SetCrateName_Mod(crate, ec.first, ec.second.root_module()); } + UNINDENT(); // Handle 'use' statements in an initial parss + DEBUG(" --- Use Statements"); + INDENT(); ResolvePaths_HandleModule_Use(crate, AST::Path(AST::Path::TagAbsolute()), crate.root_module()); - DEBUG(" ---"); + UNINDENT(); // Then do path resolution on all other items CPathResolver pr(crate); + DEBUG(" ---"); pr.handle_module(AST::Path(AST::Path::TagAbsolute()), crate.root_module()); DEBUG(" <<<"); } diff --git a/src/convert/typecheck_expr.cpp b/src/convert/typecheck_expr.cpp index 852f1c12..49a17243 100644 --- a/src/convert/typecheck_expr.cpp +++ b/src/convert/typecheck_expr.cpp @@ -62,7 +62,7 @@ private: void iterate_traits(::std::function fcn); }; class CTC_NodeVisitor: - public AST::NodeVisitor + public AST::NodeVisitorDef { CTypeChecker& m_tc; public: diff --git a/src/convert/typecheck_params.cpp b/src/convert/typecheck_params.cpp index 7a682ce7..109713d4 100644 --- a/src/convert/typecheck_params.cpp +++ b/src/convert/typecheck_params.cpp @@ -26,7 +26,7 @@ private: }; class CNodeVisitor: - public AST::NodeVisitor + public AST::NodeVisitorDef { CGenericParamChecker& m_pc; public: diff --git a/src/dump_as_rust.cpp b/src/dump_as_rust.cpp index 1a0b426d..ade350e8 100644 --- a/src/dump_as_rust.cpp +++ b/src/dump_as_rust.cpp @@ -26,7 +26,193 @@ public: void handle_function(const AST::Item& f); + virtual bool is_const() const override { return true; } + virtual void visit(AST::ExprNode_Block& n) override { + m_os << "{"; + inc_indent(); + bool is_first = true; + for( auto& child : n.m_nodes ) + { + if(is_first) { + is_first = false; + } else { + m_os << ";"; + } + m_os << "\n"; + m_os << indent(); + if( !child.get() ) + m_os << "/* nil */"; + else + AST::NodeVisitor::visit(child); + } + m_os << "\n"; + dec_indent(); + m_os << indent() << "}"; + } + virtual void visit(AST::ExprNode_Macro& n) override { + m_os << n.m_name << "!( /* TODO: Macro TT */ )"; + } + virtual void visit(AST::ExprNode_Return& n) override { + m_os << "return "; + AST::NodeVisitor::visit(n.m_value); + } + virtual void visit(AST::ExprNode_LetBinding& n) override { + m_os << "let "; + print_pattern(n.m_pat); + m_os << " = "; + AST::NodeVisitor::visit(n.m_value); + } + virtual void visit(AST::ExprNode_Assign& n) override { + AST::NodeVisitor::visit(n.m_slot); + m_os << " = "; + AST::NodeVisitor::visit(n.m_value); + } + virtual void visit(AST::ExprNode_CallPath& n) override { + m_os << n.m_path; + m_os << "("; + bool is_first = true; + for( auto& arg : n.m_args ) + { + if(is_first) { + is_first = false; + } else { + m_os << ", "; + } + AST::NodeVisitor::visit(arg); + } + m_os << ")"; + } + virtual void visit(AST::ExprNode_CallMethod& n) override { + m_os << "("; + AST::NodeVisitor::visit(n.m_val); + m_os << ")." << n.m_method; + m_os << "("; + bool is_first = true; + for( auto& arg : n.m_args ) + { + if(is_first) { + is_first = false; + } else { + m_os << ", "; + } + AST::NodeVisitor::visit(arg); + } + m_os << ")"; + } + virtual void visit(AST::ExprNode_CallObject&) override { + throw ::std::runtime_error("unimplemented ExprNode_CallObject"); + } + virtual void visit(AST::ExprNode_Match& n) override { + m_os << "match "; + AST::NodeVisitor::visit(n.m_val); + m_os << " {\n"; + inc_indent(); + + for( auto& arm : n.m_arms ) + { + m_os << indent(); + print_pattern( arm.first ); + m_os << " => "; + AST::NodeVisitor::visit(arm.second); + m_os << ",\n"; + } + + m_os << indent() << "}"; + dec_indent(); + } + virtual void visit(AST::ExprNode_If& n) override { + m_os << "if "; + AST::NodeVisitor::visit(n.m_cond); + m_os << " "; + AST::NodeVisitor::visit(n.m_true); + if(n.m_false.get()) + { + m_os << " else "; + AST::NodeVisitor::visit(n.m_false); + } + } + virtual void visit(AST::ExprNode_Integer& n) override { + switch(n.m_datatype) + { + } + m_os << "0x" << ::std::hex << n.m_value << ::std::dec; + } + virtual void visit(AST::ExprNode_StructLiteral& n) override { + m_os << n.m_path << " {\n"; + inc_indent(); + for( const auto& i : n.m_values ) + { + m_os << indent() << i.first << ": "; + AST::NodeVisitor::visit(i.second); + m_os << ",\n"; + } + if( n.m_base_value.get() ) + { + m_os << indent() << ".. "; + AST::NodeVisitor::visit(n.m_base_value); + m_os << "\n"; + } + m_os << indent() << "}"; + dec_indent(); + } + virtual void visit(AST::ExprNode_Tuple& n) override { + m_os << "("; + for( auto& item : n.m_values ) + { + AST::NodeVisitor::visit(item); + m_os << ", "; + } + m_os << ")"; + } + virtual void visit(AST::ExprNode_NamedValue& n) override { + m_os << n.m_path; + } + virtual void visit(AST::ExprNode_Field& n) override { + m_os << "("; + AST::NodeVisitor::visit(n.m_obj); + m_os << ")." << n.m_name; + } + virtual void visit(AST::ExprNode_Deref&) override { + throw ::std::runtime_error("unimplemented ExprNode_Deref"); + } + virtual void visit(AST::ExprNode_Cast& n) override { + AST::NodeVisitor::visit(n.m_value); + m_os << " as " << n.m_type; + } + virtual void visit(AST::ExprNode_BinOp& n) override { + #define IS(v, c) (dynamic_cast(&v) != 0) + if( IS(*n.m_left, AST::ExprNode_Cast) ) + paren_wrap(n.m_left); + else if( IS(*n.m_left, AST::ExprNode_BinOp) ) + paren_wrap(n.m_left); + else + AST::NodeVisitor::visit(n.m_left); + m_os << " "; + switch(n.m_type) + { + case AST::ExprNode_BinOp::CMPEQU: m_os << "=="; break; + case AST::ExprNode_BinOp::CMPNEQU:m_os << "!="; break; + case AST::ExprNode_BinOp::BITAND: m_os << "&"; break; + case AST::ExprNode_BinOp::BITOR: m_os << "|"; break; + case AST::ExprNode_BinOp::BITXOR: m_os << "^"; break; + case AST::ExprNode_BinOp::SHL: m_os << "<<"; break; + case AST::ExprNode_BinOp::SHR: m_os << ">>"; break; + } + m_os << " "; + if( IS(*n.m_right, AST::ExprNode_BinOp) ) + paren_wrap(n.m_right); + else + AST::NodeVisitor::visit(n.m_right); + } + + private: + void paren_wrap(::std::unique_ptr& node) { + m_os << "("; + AST::NodeVisitor::visit(node); + m_os << ")"; + } + void print_params(const AST::TypeParams& params); void print_bounds(const AST::TypeParams& params); void print_pattern(const AST::Pattern& p); @@ -45,11 +231,32 @@ void Dump_Rust(const char *Filename, const AST::Crate& crate) void RustPrinter::handle_module(const AST::Module& mod) { - m_os << "\n"; + bool need_nl = true; + + for( const auto& i : mod.imports() ) + { + //if(need_nl) { + // m_os << "\n"; + // need_nl = false; + //} + m_os << indent() << (i.is_pub ? "pub " : "") << "use " << i.data; + if( i.name == "" ) + { + m_os << "::*"; + } + else if( i.data.nodes().back().name() != i.name ) + { + m_os << " as " << i.name; + } + m_os << ";\n"; + } + need_nl = true; for( const auto& sm : mod.submods() ) { - m_os << indent() << (sm.second ? "pub " : "") << "mod " << sm.first.name() << " {\n"; + m_os << "\n"; + m_os << indent() << (sm.second ? "pub " : "") << "mod " << sm.first.name() << "\n"; + m_os << indent() << "{\n"; inc_indent(); handle_module(sm.first); dec_indent(); @@ -57,27 +264,47 @@ void RustPrinter::handle_module(const AST::Module& mod) m_os << "\n"; } + for( const auto& i : mod.type_aliases() ) + { + if(need_nl) { + m_os << "\n"; + need_nl = false; + } + m_os << indent() << (i.is_pub ? "pub " : "") << "type " << i.name; + print_params(i.data.params()); + m_os << " = " << i.data.type(); + print_bounds(i.data.params()); + m_os << ";\n"; + } + need_nl = true; for( const auto& i : mod.structs() ) { + m_os << "\n"; m_os << indent() << (i.is_pub ? "pub " : "") << "struct " << i.name; handle_struct(i.data); } for( const auto& i : mod.enums() ) { + m_os << "\n"; m_os << indent() << (i.is_pub ? "pub " : "") << "enum " << i.name; handle_enum(i.data); } for( const auto& i : mod.traits() ) { + m_os << "\n"; m_os << indent() << (i.is_pub ? "pub " : "") << "trait " << i.name; handle_trait(i.data); } for( const auto& i : mod.statics() ) { + if(need_nl) { + m_os << "\n"; + need_nl = false; + } m_os << indent() << (i.is_pub ? "pub " : ""); switch( i.data.s_class() ) { @@ -86,14 +313,41 @@ void RustPrinter::handle_module(const AST::Module& mod) case AST::Static::MUT: m_os << "static mut "; break; } m_os << i.name << ": " << i.data.type() << " = "; - //handle_expr(i.data.value()); + i.data.value().visit_nodes(*this); m_os << ";\n"; } for( const auto& i : mod.functions() ) { + m_os << "\n"; handle_function(i); } + + for( const auto& i : mod.impls() ) + { + m_os << "\n"; + m_os << indent() << "impl"; + print_params(i.params()); + if( i.trait() != TypeRef() ) + { + m_os << " " << i.trait() << " for"; + } + m_os << " " << i.type() << "\n"; + + print_bounds(i.params()); + m_os << indent() << "{\n"; + inc_indent(); + for( const auto& t : i.types() ) + { + m_os << indent() << "type " << t.name << " = " << t.data << ";\n"; + } + for( const auto& t : i.functions() ) + { + handle_function(t); + } + dec_indent(); + m_os << indent() << "}\n"; + } } void RustPrinter::print_params(const AST::TypeParams& params) @@ -230,7 +484,7 @@ void RustPrinter::handle_trait(const AST::Trait& s) for( const auto& i : s.types() ) { - m_os << indent() << "type " << i.name << "\n"; + m_os << indent() << "type " << i.name << ";\n"; } for( const auto& i : s.functions() ) { @@ -244,6 +498,7 @@ void RustPrinter::handle_trait(const AST::Trait& s) void RustPrinter::handle_function(const AST::Item& f) { + m_os << "\n"; m_os << indent() << (f.is_pub ? "pub " : "") << "fn " << f.name; print_params(f.data.params()); m_os << "("; @@ -267,7 +522,10 @@ void RustPrinter::handle_function(const AST::Item& f) m_os << "\n"; print_bounds(f.data.params()); - m_os << indent() << f.data.code() << "\n"; + m_os << indent(); + f.data.code().visit_nodes(*this); + m_os << "\n"; + //m_os << indent() << f.data.code() << "\n"; } else { diff --git a/src/include/debug.hpp b/src/include/debug.hpp new file mode 100644 index 00000000..012ef1bb --- /dev/null +++ b/src/include/debug.hpp @@ -0,0 +1,26 @@ +/* + */ +#pragma once +#include + +extern int g_debug_indent_level; + +#define INDENT() do { g_debug_indent_level += 1; } while(0) +#define UNINDENT() do { g_debug_indent_level -= 1; } while(0) +#define DEBUG(ss) do{ if(debug_enabled()) { debug_output(g_debug_indent_level, __FUNCTION__) << ss << ::std::endl; } } while(0) + +extern bool debug_enabled(); +extern ::std::ostream& debug_output(int indent, const char* function); + +struct RepeatLitStr +{ + const char *s; + int n; + + friend ::std::ostream& operator<<(::std::ostream& os, const RepeatLitStr& r) { + for(int i = 0; i < r.n; i ++ ) + os << r.s; + return os; + } +}; + diff --git a/src/include/rustic.hpp b/src/include/rustic.hpp new file mode 100644 index 00000000..0e46777d --- /dev/null +++ b/src/include/rustic.hpp @@ -0,0 +1,100 @@ +/* + */ +#pragma once + +template +class slice +{ + T* m_first; + unsigned int m_len; +public: + slice(::std::vector& v): + m_first(&v[0]), + m_len(v.size()) + {} + + ::std::vector to_vec() const { + return ::std::vector(begin(), end()); + } + + unsigned int size() const { + return m_len; + } + T& operator[](unsigned int i) const { + assert(i < m_len); + return m_first[i]; + } + + T* begin() const { return m_first; } + T* end() const { return m_first + m_len; } +}; + +template +::std::ostream& operator<<(::std::ostream& os, slice s) { + if( s.size() > 0 ) + { + bool is_first = true; + for( const auto& i : s ) + { + if(!is_first) + os << ", "; + is_first = false; + os << i; + } + } + return os; +} + +namespace rust { + +template +class option +{ + bool m_set; + T m_data; +public: + option(T ent): + m_set(true), + m_data( ::std::move(ent) ) + {} + option(): + m_set(false) + {} + + bool is_none() const { return !m_set; } + bool is_some() const { return m_set; } + + const T& unwrap() const { + assert(is_some()); + return m_data; + } +}; +template +class option +{ + T* m_ptr; +public: + option(T& ent): + m_ptr(&ent) + {} + option(): + m_ptr(nullptr) + {} + + bool is_none() const { return m_ptr == nullptr; } + bool is_some() const { return m_ptr != nullptr; } + T& unwrap() const { + assert(is_some()); + return *m_ptr; + } +}; +template +option Some(T data) { + return option( ::std::move(data) ); +} +template +option None() { + return option( ); +} + +}; diff --git a/src/main.cpp b/src/main.cpp index 9e533139..6aeba895 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -11,6 +11,17 @@ int g_debug_indent_level = 0; +::std::string g_cur_phase; + +bool debug_enabled() +{ + + return true; +} +::std::ostream& debug_output(int indent, const char* function) +{ + return ::std::cout << g_cur_phase << "- " << RepeatLitStr { " ", indent } << function << ": "; +} /// main! int main(int argc, char *argv[]) @@ -79,26 +90,35 @@ int main(int argc, char *argv[]) //Serialiser& s = s_tt; try { + g_cur_phase = "Parse"; AST::Crate crate = Parse_Crate(infile); + g_cur_phase = "PostParse"; crate.post_parse(); //s << crate; // Resolve names to be absolute names (include references to the relevant struct/global/function) + g_cur_phase = "Resolve"; ResolvePaths(crate); //s << crate; // Typecheck / type propagate module (type annotations of all values) // - Check all generic conditions (ensure referenced trait is valid) // > Also mark parameter with applicable traits + #if 0 + g_cur_phase = "TypecheckBounds"; Typecheck_GenericBounds(crate); // - Check all generic parameters match required conditions + g_cur_phase = "TypecheckParams"; Typecheck_GenericParams(crate); // - Typecheck statics and consts // - Typecheck + propagate functions // > Forward pass first - Typecheck_Expr(crate); + //g_cur_phase = "TypecheckExpr"; + //Typecheck_Expr(crate); + #endif + g_cur_phase = "Output"; Dump_Rust( FMT(outfile << ".rs").c_str(), crate ); if( strcmp(emit_type, "ast") == 0 ) @@ -109,6 +129,7 @@ int main(int argc, char *argv[]) return 0; } // Flatten modules into "mangled" set + g_cur_phase = "Flatten"; AST::Flat flat_crate = Convert_Flatten(crate); // Convert structures to C structures / tagged enums diff --git a/src/parse/lex.cpp b/src/parse/lex.cpp index 7dd8eb3c..2d6873d0 100644 --- a/src/parse/lex.cpp +++ b/src/parse/lex.cpp @@ -1,652 +1,653 @@ -/* - * "MRustC" - Primitive rust compiler in C++ - */ -/** - * \file parse/lex.cpp - * \brief Low-level lexer - */ -#include "lex.hpp" -#include "tokentree.hpp" -#include "parseerror.hpp" -#include -#include -#include // strtol -#include - -Lexer::Lexer(::std::string filename): - m_istream(filename.c_str()), - m_last_char_valid(false) -{ - if( !m_istream.is_open() ) - { - throw ::std::runtime_error("Unable to open file"); - } -} - -#define LINECOMMENT -1 -#define BLOCKCOMMENT -2 -#define SINGLEQUOTE -3 -#define DOUBLEQUOTE -4 - -// NOTE: This array must be kept reverse sorted -#define TOKENT(str, sym) {sizeof(str)-1, str, sym} -static const struct { - unsigned char len; - const char* chars; - signed int type; -} TOKENMAP[] = { - TOKENT("!" , TOK_EXCLAM), - TOKENT("!=", TOK_EXCLAM_EQUAL), - TOKENT("\"", DOUBLEQUOTE), - TOKENT("#", 0), - TOKENT("#![",TOK_CATTR_OPEN), - TOKENT("#[", TOK_ATTR_OPEN), - //TOKENT("$", 0), - TOKENT("%" , TOK_PERCENT), - TOKENT("%=", TOK_PERCENT_EQUAL), - TOKENT("&" , TOK_AMP), - TOKENT("&&", TOK_DOUBLE_AMP), - TOKENT("&=", TOK_AMP_EQUAL), - TOKENT("'" , SINGLEQUOTE), - TOKENT("(" , TOK_PAREN_OPEN), - TOKENT(")" , TOK_PAREN_CLOSE), - TOKENT("*" , TOK_STAR), - TOKENT("*=", TOK_STAR_EQUAL), - TOKENT("+" , TOK_PLUS), - TOKENT("+=", TOK_PLUS_EQUAL), - TOKENT("," , TOK_COMMA), - TOKENT("-" , TOK_DASH), - TOKENT("-=", TOK_DASH_EQUAL), - TOKENT("->", TOK_THINARROW), - TOKENT(".", TOK_DOT), - TOKENT("..", TOK_DOUBLE_DOT), - TOKENT("...",TOK_TRIPLE_DOT), - TOKENT("/" , TOK_SLASH), - TOKENT("/*", BLOCKCOMMENT), - TOKENT("//", LINECOMMENT), - TOKENT("/=", TOK_SLASH_EQUAL), - // 0-9 :: Elsewhere - TOKENT(":", TOK_COLON), - TOKENT("::", TOK_DOUBLE_COLON), - TOKENT(";", TOK_SEMICOLON), - TOKENT("<", TOK_LT), - TOKENT("<<", TOK_DOUBLE_LT), - TOKENT("<=", TOK_LTE), - TOKENT("=" , TOK_EQUAL), - TOKENT("==", TOK_DOUBLE_EQUAL), - TOKENT("=>", TOK_FATARROW), - TOKENT(">", TOK_GT), - TOKENT(">>", TOK_DOUBLE_GT), - TOKENT(">=", TOK_GTE), - TOKENT("?", TOK_QMARK), - TOKENT("@", TOK_AT), - // A-Z :: Elsewhere - TOKENT("[", TOK_SQUARE_OPEN), - TOKENT("\\", TOK_BACKSLASH), - TOKENT("]", TOK_SQUARE_CLOSE), - TOKENT("^", TOK_CARET), - TOKENT("`", TOK_BACKTICK), - - TOKENT("{", TOK_BRACE_OPEN), - TOKENT("|", TOK_PIPE), - TOKENT("|=", TOK_PIPE_EQUAL), - TOKENT("||", TOK_DOUBLE_PIPE), - TOKENT("}", TOK_BRACE_CLOSE), - TOKENT("~", TOK_TILDE), -}; -#define LEN(arr) (sizeof(arr)/sizeof(arr[0])) -static const struct { - unsigned char len; - const char* chars; - signed int type; -} RWORDS[] = { - TOKENT("abstract",TOK_RWORD_ABSTRACT), - TOKENT("alignof", TOK_RWORD_ALIGNOF), - TOKENT("as", TOK_RWORD_AS), - TOKENT("be", TOK_RWORD_BE), - TOKENT("box", TOK_RWORD_BOX), - TOKENT("break", TOK_RWORD_BREAK), - TOKENT("const", TOK_RWORD_CONST), - TOKENT("continue",TOK_RWORD_CONTINUE), - TOKENT("crate", TOK_RWORD_CRATE), - TOKENT("do", TOK_RWORD_DO), - TOKENT("else", TOK_RWORD_ELSE), - TOKENT("enum", TOK_RWORD_ENUM), - TOKENT("extern", TOK_RWORD_EXTERN), - TOKENT("false", TOK_RWORD_FALSE), - TOKENT("final", TOK_RWORD_FINAL), - TOKENT("fn", TOK_RWORD_FN), - TOKENT("for", TOK_RWORD_FOR), - TOKENT("if", TOK_RWORD_IF), - TOKENT("impl", TOK_RWORD_IMPL), - TOKENT("in", TOK_RWORD_IN), - TOKENT("let", TOK_RWORD_LET), - TOKENT("loop", TOK_RWORD_LOOP), - TOKENT("match", TOK_RWORD_MATCH), - TOKENT("mod", TOK_RWORD_MOD), - TOKENT("move", TOK_RWORD_MOVE), - TOKENT("mut", TOK_RWORD_MUT), - TOKENT("offsetof",TOK_RWORD_OFFSETOF), - TOKENT("once", TOK_RWORD_ONCE), - TOKENT("override",TOK_RWORD_OVERRIDE), - TOKENT("priv", TOK_RWORD_PRIV), - TOKENT("proc", TOK_RWORD_PROC), - TOKENT("pub", TOK_RWORD_PUB), - TOKENT("pure", TOK_RWORD_PURE), - TOKENT("ref", TOK_RWORD_REF), - TOKENT("return", TOK_RWORD_RETURN), - TOKENT("self", TOK_RWORD_SELF), - TOKENT("sizeof", TOK_RWORD_SIZEOF), - TOKENT("static", TOK_RWORD_STATIC), - TOKENT("struct", TOK_RWORD_STRUCT), - TOKENT("super", TOK_RWORD_SUPER), - TOKENT("trait", TOK_RWORD_TRAIT), - TOKENT("true", TOK_RWORD_TRUE), - TOKENT("type", TOK_RWORD_TYPE), - TOKENT("typeof", TOK_RWORD_TYPEOF), - TOKENT("unsafe", TOK_RWORD_UNSAFE), - TOKENT("unsized", TOK_RWORD_UNSIZED), - TOKENT("use", TOK_RWORD_USE), - TOKENT("virtual", TOK_RWORD_VIRTUAL), - TOKENT("where", TOK_RWORD_WHERE), - TOKENT("while", TOK_RWORD_WHILE), - TOKENT("yield", TOK_RWORD_YIELD), -}; - -signed int Lexer::getSymbol() -{ - char ch = this->getc(); - // 1. lsearch for character - // 2. Consume as many characters as currently match - // 3. IF: a smaller character or, EOS is hit - Return current best - unsigned ofs = 0; - signed int best = 0; - for(unsigned i = 0; i < LEN(TOKENMAP); i ++) - { - const char* const chars = TOKENMAP[i].chars; - const size_t len = TOKENMAP[i].len; - - //::std::cout << "ofs=" << ofs << ", chars[ofs] = " << chars[ofs] << ", ch = " << ch << ", len = " << len << ::std::endl; - - if( ofs >= len || chars[ofs] > ch ) { - this->putback(); - return best; - } - - while( chars[ofs] && chars[ofs] == ch ) - { - ch = this->getc(); - ofs ++; - } - if( chars[ofs] == 0 ) - { - best = TOKENMAP[i].type; - } - } - - this->putback(); - return best; -} - -bool issym(char ch) -{ - if( ::std::isalnum(ch) ) - return true; - if( ch == '_' ) - return true; - if( ch == '$' ) - return true; - return false; -} - -Token Lexer::getToken() -{ - try - { - char ch = this->getc(); - - if( isspace(ch) ) - { - while( isspace(this->getc()) ) - ; - this->putback(); - return Token(TOK_WHITESPACE); - } - this->putback(); - - const signed int sym = this->getSymbol(); - if( sym == 0 ) - { - // No match at all, check for symbol - char ch = this->getc(); - if( isdigit(ch) ) - { - // TODO: handle integers/floats - uint64_t val = 0; - if( ch == '0' ) { - // Octal/hex handling - ch = this->getc(); - if( ch == 'x' ) { - while( isxdigit(ch = this->getc()) ) { - val *= val * 16; - if(ch <= '9') - val += ch - '0'; - else if( ch <= 'F' ) - val += ch - 'A' + 10; - else if( ch <= 'f' ) - val += ch - 'a' + 10; - } - } - else if( isdigit(ch) ) { - throw ParseError::Todo("Lex octal numbers"); - } - else { - val = 0; - } - } - else { - while( isdigit(ch) ) { - val *= val * 10; - val += ch - '0'; - ch = this->getc(); - } - } - - if(ch == 'u' || ch == 'i') { - // Unsigned - throw ParseError::Todo("Lex number suffixes"); - } - else if( ch == '.' ) { - throw ParseError::Todo("Lex floats"); - } - else { - this->putback(); - return Token(val, CORETYPE_ANY); - } - } - else if( issym(ch) ) - { - ::std::string str; - while( issym(ch) ) - { - str.push_back(ch); - ch = this->getc(); - } - - if( ch == '!' ) - { - return Token(TOK_MACRO, str); - } - else - { - this->putback(); - for( unsigned int i = 0; i < LEN(RWORDS); i ++ ) - { - if( str < RWORDS[i].chars ) break; - if( str == RWORDS[i].chars ) return Token((enum eTokenType)RWORDS[i].type); - } - return Token(TOK_IDENT, str); - } - } - else - { - throw ParseError::BadChar(ch); - } - } - else if( sym > 0 ) - { - return Token((enum eTokenType)sym); - } - else - { - switch(sym) - { - case LINECOMMENT: { - // Line comment - ::std::string str; - char ch = this->getc(); - while(ch != '\n' && ch != '\r') - { - str.push_back(ch); - ch = this->getc(); - } - return Token(TOK_COMMENT, str); } - case BLOCKCOMMENT: { - ::std::string str; - while(true) - { - if( ch == '*' ) { - ch = this->getc(); - if( ch == '/' ) break; - this->putback(); - } - str.push_back(ch); - ch = this->getc(); - } - return Token(TOK_COMMENT, str); } - case SINGLEQUOTE: { - char firstchar = this->getc(); - if( firstchar != '\\' ) { - ch = this->getc(); - if( ch == '\'' ) { - // Character constant - return Token((uint64_t)ch, CORETYPE_CHAR); - } - else { - // Lifetime name - ::std::string str; - str.push_back(firstchar); - while( issym(ch) ) - { - str.push_back(ch); - ch = this->getc(); - } - this->putback(); - return Token(TOK_LIFETIME, str); - } - } - else { - // Character constant with an escape code - uint32_t val = this->parseEscape('\''); - if(this->getc() != '\'') { - throw ParseError::Todo("Proper error for lex failures"); - } - return Token((uint64_t)val, CORETYPE_CHAR); - } - break; } - case DOUBLEQUOTE: - throw ParseError::Todo("Strings"); - break; - default: - assert(!"bugcheck"); - } - } - } - catch(const Lexer::EndOfFile& e) - { - return Token(TOK_EOF); - } - //assert(!"bugcheck"); -} - -uint32_t Lexer::parseEscape(char enclosing) -{ - char ch = this->getc(); - switch(ch) - { - case 'u': { - // Unicode (up to six hex digits) - uint32_t val = 0; - ch = this->getc(); - if( !isxdigit(ch) ) - throw ParseError::Todo("Proper lex error for escape sequences"); - while( isxdigit(ch) ) - { - char tmp[2] = {ch, 0}; - val *= 16; - val += ::std::strtol(tmp, NULL, 16); - ch = this->getc(); - } - this->putback(); - return val; } - case '\\': - return '\\'; - default: - throw ParseError::Todo("Proper lex error for escape sequences"); - } -} - -char Lexer::getc() -{ - if( m_last_char_valid ) - { - m_last_char_valid = false; - } - else +/* + * "MRustC" - Primitive rust compiler in C++ + */ +/** + * \file parse/lex.cpp + * \brief Low-level lexer + */ +#include "lex.hpp" +#include "tokentree.hpp" +#include "parseerror.hpp" +#include +#include +#include // strtol +#include + +Lexer::Lexer(::std::string filename): + m_istream(filename.c_str()), + m_last_char_valid(false) +{ + if( !m_istream.is_open() ) + { + throw ::std::runtime_error("Unable to open file"); + } +} + +#define LINECOMMENT -1 +#define BLOCKCOMMENT -2 +#define SINGLEQUOTE -3 +#define DOUBLEQUOTE -4 + +// NOTE: This array must be kept reverse sorted +#define TOKENT(str, sym) {sizeof(str)-1, str, sym} +static const struct { + unsigned char len; + const char* chars; + signed int type; +} TOKENMAP[] = { + TOKENT("!" , TOK_EXCLAM), + TOKENT("!=", TOK_EXCLAM_EQUAL), + TOKENT("\"", DOUBLEQUOTE), + TOKENT("#", 0), + TOKENT("#![",TOK_CATTR_OPEN), + TOKENT("#[", TOK_ATTR_OPEN), + //TOKENT("$", 0), + TOKENT("%" , TOK_PERCENT), + TOKENT("%=", TOK_PERCENT_EQUAL), + TOKENT("&" , TOK_AMP), + TOKENT("&&", TOK_DOUBLE_AMP), + TOKENT("&=", TOK_AMP_EQUAL), + TOKENT("'" , SINGLEQUOTE), + TOKENT("(" , TOK_PAREN_OPEN), + TOKENT(")" , TOK_PAREN_CLOSE), + TOKENT("*" , TOK_STAR), + TOKENT("*=", TOK_STAR_EQUAL), + TOKENT("+" , TOK_PLUS), + TOKENT("+=", TOK_PLUS_EQUAL), + TOKENT("," , TOK_COMMA), + TOKENT("-" , TOK_DASH), + TOKENT("-=", TOK_DASH_EQUAL), + TOKENT("->", TOK_THINARROW), + TOKENT(".", TOK_DOT), + TOKENT("..", TOK_DOUBLE_DOT), + TOKENT("...",TOK_TRIPLE_DOT), + TOKENT("/" , TOK_SLASH), + TOKENT("/*", BLOCKCOMMENT), + TOKENT("//", LINECOMMENT), + TOKENT("/=", TOK_SLASH_EQUAL), + // 0-9 :: Elsewhere + TOKENT(":", TOK_COLON), + TOKENT("::", TOK_DOUBLE_COLON), + TOKENT(";", TOK_SEMICOLON), + TOKENT("<", TOK_LT), + TOKENT("<<", TOK_DOUBLE_LT), + TOKENT("<=", TOK_LTE), + TOKENT("=" , TOK_EQUAL), + TOKENT("==", TOK_DOUBLE_EQUAL), + TOKENT("=>", TOK_FATARROW), + TOKENT(">", TOK_GT), + TOKENT(">>", TOK_DOUBLE_GT), + TOKENT(">=", TOK_GTE), + TOKENT("?", TOK_QMARK), + TOKENT("@", TOK_AT), + // A-Z :: Elsewhere + TOKENT("[", TOK_SQUARE_OPEN), + TOKENT("\\", TOK_BACKSLASH), + TOKENT("]", TOK_SQUARE_CLOSE), + TOKENT("^", TOK_CARET), + TOKENT("`", TOK_BACKTICK), + + TOKENT("{", TOK_BRACE_OPEN), + TOKENT("|", TOK_PIPE), + TOKENT("|=", TOK_PIPE_EQUAL), + TOKENT("||", TOK_DOUBLE_PIPE), + TOKENT("}", TOK_BRACE_CLOSE), + TOKENT("~", TOK_TILDE), +}; +#define LEN(arr) (sizeof(arr)/sizeof(arr[0])) +static const struct { + unsigned char len; + const char* chars; + signed int type; +} RWORDS[] = { + TOKENT("abstract",TOK_RWORD_ABSTRACT), + TOKENT("alignof", TOK_RWORD_ALIGNOF), + TOKENT("as", TOK_RWORD_AS), + TOKENT("be", TOK_RWORD_BE), + TOKENT("box", TOK_RWORD_BOX), + TOKENT("break", TOK_RWORD_BREAK), + TOKENT("const", TOK_RWORD_CONST), + TOKENT("continue",TOK_RWORD_CONTINUE), + TOKENT("crate", TOK_RWORD_CRATE), + TOKENT("do", TOK_RWORD_DO), + TOKENT("else", TOK_RWORD_ELSE), + TOKENT("enum", TOK_RWORD_ENUM), + TOKENT("extern", TOK_RWORD_EXTERN), + TOKENT("false", TOK_RWORD_FALSE), + TOKENT("final", TOK_RWORD_FINAL), + TOKENT("fn", TOK_RWORD_FN), + TOKENT("for", TOK_RWORD_FOR), + TOKENT("if", TOK_RWORD_IF), + TOKENT("impl", TOK_RWORD_IMPL), + TOKENT("in", TOK_RWORD_IN), + TOKENT("let", TOK_RWORD_LET), + TOKENT("loop", TOK_RWORD_LOOP), + TOKENT("match", TOK_RWORD_MATCH), + TOKENT("mod", TOK_RWORD_MOD), + TOKENT("move", TOK_RWORD_MOVE), + TOKENT("mut", TOK_RWORD_MUT), + TOKENT("offsetof",TOK_RWORD_OFFSETOF), + TOKENT("once", TOK_RWORD_ONCE), + TOKENT("override",TOK_RWORD_OVERRIDE), + TOKENT("priv", TOK_RWORD_PRIV), + TOKENT("proc", TOK_RWORD_PROC), + TOKENT("pub", TOK_RWORD_PUB), + TOKENT("pure", TOK_RWORD_PURE), + TOKENT("ref", TOK_RWORD_REF), + TOKENT("return", TOK_RWORD_RETURN), + TOKENT("self", TOK_RWORD_SELF), + TOKENT("sizeof", TOK_RWORD_SIZEOF), + TOKENT("static", TOK_RWORD_STATIC), + TOKENT("struct", TOK_RWORD_STRUCT), + TOKENT("super", TOK_RWORD_SUPER), + TOKENT("trait", TOK_RWORD_TRAIT), + TOKENT("true", TOK_RWORD_TRUE), + TOKENT("type", TOK_RWORD_TYPE), + TOKENT("typeof", TOK_RWORD_TYPEOF), + TOKENT("unsafe", TOK_RWORD_UNSAFE), + TOKENT("unsized", TOK_RWORD_UNSIZED), + TOKENT("use", TOK_RWORD_USE), + TOKENT("virtual", TOK_RWORD_VIRTUAL), + TOKENT("where", TOK_RWORD_WHERE), + TOKENT("while", TOK_RWORD_WHILE), + TOKENT("yield", TOK_RWORD_YIELD), +}; + +signed int Lexer::getSymbol() +{ + char ch = this->getc(); + // 1. lsearch for character + // 2. Consume as many characters as currently match + // 3. IF: a smaller character or, EOS is hit - Return current best + unsigned ofs = 0; + signed int best = 0; + for(unsigned i = 0; i < LEN(TOKENMAP); i ++) + { + const char* const chars = TOKENMAP[i].chars; + const size_t len = TOKENMAP[i].len; + + //::std::cout << "ofs=" << ofs << ", chars[ofs] = " << chars[ofs] << ", ch = " << ch << ", len = " << len << ::std::endl; + + if( ofs >= len || chars[ofs] > ch ) { + this->putback(); + return best; + } + + while( chars[ofs] && chars[ofs] == ch ) + { + ch = this->getc(); + ofs ++; + } + if( chars[ofs] == 0 ) + { + best = TOKENMAP[i].type; + } + } + + this->putback(); + return best; +} + +bool issym(char ch) +{ + if( ::std::isalnum(ch) ) + return true; + if( ch == '_' ) + return true; + if( ch == '$' ) + return true; + return false; +} + +Token Lexer::getToken() +{ + try + { + char ch = this->getc(); + + if( isspace(ch) ) + { + while( isspace(this->getc()) ) + ; + this->putback(); + return Token(TOK_WHITESPACE); + } + this->putback(); + + const signed int sym = this->getSymbol(); + if( sym == 0 ) + { + // No match at all, check for symbol + char ch = this->getc(); + if( isdigit(ch) ) + { + // TODO: handle integers/floats + uint64_t val = 0; + if( ch == '0' ) { + // Octal/hex handling + ch = this->getc(); + if( ch == 'x' ) { + while( isxdigit(ch = this->getc()) ) + { + val *= 16; + if(ch <= '9') + val += ch - '0'; + else if( ch <= 'F' ) + val += ch - 'A' + 10; + else if( ch <= 'f' ) + val += ch - 'a' + 10; + } + } + else if( isdigit(ch) ) { + throw ParseError::Todo("Lex octal numbers"); + } + else { + val = 0; + } + } + else { + while( isdigit(ch) ) { + val *= val * 10; + val += ch - '0'; + ch = this->getc(); + } + } + + if(ch == 'u' || ch == 'i') { + // Unsigned + throw ParseError::Todo("Lex number suffixes"); + } + else if( ch == '.' ) { + throw ParseError::Todo("Lex floats"); + } + else { + this->putback(); + return Token(val, CORETYPE_ANY); + } + } + else if( issym(ch) ) + { + ::std::string str; + while( issym(ch) ) + { + str.push_back(ch); + ch = this->getc(); + } + + if( ch == '!' ) + { + return Token(TOK_MACRO, str); + } + else + { + this->putback(); + for( unsigned int i = 0; i < LEN(RWORDS); i ++ ) + { + if( str < RWORDS[i].chars ) break; + if( str == RWORDS[i].chars ) return Token((enum eTokenType)RWORDS[i].type); + } + return Token(TOK_IDENT, str); + } + } + else + { + throw ParseError::BadChar(ch); + } + } + else if( sym > 0 ) + { + return Token((enum eTokenType)sym); + } + else + { + switch(sym) + { + case LINECOMMENT: { + // Line comment + ::std::string str; + char ch = this->getc(); + while(ch != '\n' && ch != '\r') + { + str.push_back(ch); + ch = this->getc(); + } + return Token(TOK_COMMENT, str); } + case BLOCKCOMMENT: { + ::std::string str; + while(true) + { + if( ch == '*' ) { + ch = this->getc(); + if( ch == '/' ) break; + this->putback(); + } + str.push_back(ch); + ch = this->getc(); + } + return Token(TOK_COMMENT, str); } + case SINGLEQUOTE: { + char firstchar = this->getc(); + if( firstchar != '\\' ) { + ch = this->getc(); + if( ch == '\'' ) { + // Character constant + return Token((uint64_t)ch, CORETYPE_CHAR); + } + else { + // Lifetime name + ::std::string str; + str.push_back(firstchar); + while( issym(ch) ) + { + str.push_back(ch); + ch = this->getc(); + } + this->putback(); + return Token(TOK_LIFETIME, str); + } + } + else { + // Character constant with an escape code + uint32_t val = this->parseEscape('\''); + if(this->getc() != '\'') { + throw ParseError::Todo("Proper error for lex failures"); + } + return Token((uint64_t)val, CORETYPE_CHAR); + } + break; } + case DOUBLEQUOTE: + throw ParseError::Todo("Strings"); + break; + default: + assert(!"bugcheck"); + } + } + } + catch(const Lexer::EndOfFile& e) + { + return Token(TOK_EOF); + } + //assert(!"bugcheck"); +} + +uint32_t Lexer::parseEscape(char enclosing) +{ + char ch = this->getc(); + switch(ch) + { + case 'u': { + // Unicode (up to six hex digits) + uint32_t val = 0; + ch = this->getc(); + if( !isxdigit(ch) ) + throw ParseError::Todo("Proper lex error for escape sequences"); + while( isxdigit(ch) ) + { + char tmp[2] = {ch, 0}; + val *= 16; + val += ::std::strtol(tmp, NULL, 16); + ch = this->getc(); + } + this->putback(); + return val; } + case '\\': + return '\\'; + default: + throw ParseError::Todo("Proper lex error for escape sequences"); + } +} + +char Lexer::getc() +{ + if( m_last_char_valid ) + { + m_last_char_valid = false; + } + else { m_last_char = m_istream.get(); if( m_istream.eof() ) - throw Lexer::EndOfFile(); - } - //::std::cout << "getc(): '" << m_last_char << "'" << ::std::endl; - return m_last_char; -} - -void Lexer::putback() -{ -// ::std::cout << "putback(): " << m_last_char_valid << " '" << m_last_char << "'" << ::std::endl; - assert(!m_last_char_valid); - m_last_char_valid = true; -} - -Token::Token(): - m_type(TOK_NULL), - m_str("") -{ -} -Token::Token(enum eTokenType type): - m_type(type), - m_str("") -{ -} -Token::Token(enum eTokenType type, ::std::string str): - m_type(type), - m_str(str) -{ -} -Token::Token(uint64_t val, enum eCoreType datatype): - m_type(TOK_INTEGER), - m_datatype(datatype), - m_intval(val) -{ -} - -const char* Token::typestr(enum eTokenType type) -{ - switch(type) - { - case TOK_NULL: return "TOK_NULL"; - case TOK_EOF: return "TOK_EOF"; - - case TOK_WHITESPACE: return "TOK_WHITESPACE"; - case TOK_COMMENT: return "TOK_COMMENT"; - - // Value tokens - case TOK_IDENT: return "TOK_IDENT"; - case TOK_MACRO: return "TOK_MACRO"; - case TOK_LIFETIME: return "TOK_LIFETIME"; - case TOK_INTEGER: return "TOK_INTEGER"; - case TOK_CHAR: return "TOK_CHAR"; - case TOK_FLOAT: return "TOK_FLOAT"; - case TOK_STRING: return "TOK_STRING"; - - case TOK_CATTR_OPEN: return "TOK_CATTR_OPEN"; - case TOK_ATTR_OPEN: return "TOK_ATTR_OPEN"; - - // Symbols - case TOK_PAREN_OPEN: return "TOK_PAREN_OPEN"; case TOK_PAREN_CLOSE: return "TOK_PAREN_CLOSE"; - case TOK_BRACE_OPEN: return "TOK_BRACE_OPEN"; case TOK_BRACE_CLOSE: return "TOK_BRACE_CLOSE"; - case TOK_LT: return "TOK_LT"; case TOK_GT: return "TOK_GT"; - case TOK_SQUARE_OPEN: return "TOK_SQUARE_OPEN";case TOK_SQUARE_CLOSE: return "TOK_SQUARE_CLOSE"; - case TOK_COMMA: return "TOK_COMMA"; - case TOK_SEMICOLON: return "TOK_SEMICOLON"; - case TOK_COLON: return "TOK_COLON"; - case TOK_DOUBLE_COLON: return "TOK_DOUBLE_COLON"; - case TOK_STAR: return "TOK_STAR"; case TOK_AMP: return "TOK_AMP"; - case TOK_PIPE: return "TOK_PIPE"; - - case TOK_FATARROW: return "TOK_FATARROW"; // => - case TOK_THINARROW: return "TOK_THINARROW"; // -> - - case TOK_PLUS: return "TOK_PLUS"; case TOK_DASH: return "TOK_DASH"; - case TOK_EXCLAM: return "TOK_EXCLAM"; - case TOK_PERCENT: return "TOK_PERCENT"; - case TOK_SLASH: return "TOK_SLASH"; - - case TOK_DOT: return "TOK_DOT"; - case TOK_DOUBLE_DOT: return "TOK_DOUBLE_DOT"; - case TOK_TRIPLE_DOT: return "TOK_TRIPLE_DOT"; - - case TOK_EQUAL: return "TOK_EQUAL"; - case TOK_PLUS_EQUAL: return "TOK_PLUS_EQUAL"; - case TOK_DASH_EQUAL: return "TOK_DASH_EQUAL"; - case TOK_PERCENT_EQUAL: return "TOK_PERCENT_EQUAL"; - case TOK_SLASH_EQUAL: return "TOK_SLASH_EQUAL"; - case TOK_STAR_EQUAL: return "TOK_STAR_EQUAL"; - case TOK_AMP_EQUAL: return "TOK_AMP_EQUAL"; - case TOK_PIPE_EQUAL: return "TOK_PIPE_EQUAL"; - - case TOK_DOUBLE_EQUAL: return "TOK_DOUBLE_EQUAL"; - case TOK_EXCLAM_EQUAL: return "TOK_EXCLAM_EQUAL"; - case TOK_GTE: return "TOK_GTE"; - case TOK_LTE: return "TOK_LTE"; - - case TOK_DOUBLE_AMP: return "TOK_DOUBLE_AMP"; - case TOK_DOUBLE_PIPE: return "TOK_DOUBLE_PIPE"; - case TOK_DOUBLE_LT: return "TOK_DOUBLE_LT"; - case TOK_DOUBLE_GT: return "TOK_DOUBLE_GT"; - - case TOK_QMARK: return "TOK_QMARK"; - case TOK_AT: return "TOK_AT"; - case TOK_TILDE: return "TOK_TILDE"; - case TOK_BACKSLASH: return "TOK_BACKSLASH"; - case TOK_CARET: return "TOK_CARET"; - case TOK_BACKTICK: return "TOK_BACKTICK"; - - // Reserved Words - case TOK_RWORD_PUB: return "TOK_RWORD_PUB"; - case TOK_RWORD_PRIV: return "TOK_RWORD_PRIV"; - case TOK_RWORD_MUT: return "TOK_RWORD_MUT"; - case TOK_RWORD_CONST: return "TOK_RWORD_CONST"; - case TOK_RWORD_STATIC: return "TOK_RWORD_STATIC"; - case TOK_RWORD_UNSAFE: return "TOK_RWORD_UNSAFE"; - case TOK_RWORD_EXTERN: return "TOK_RWORD_EXTERN"; - - case TOK_RWORD_CRATE: return "TOK_RWORD_CRATE"; - case TOK_RWORD_MOD: return "TOK_RWORD_MOD"; - case TOK_RWORD_STRUCT: return "TOK_RWORD_STRUCT"; - case TOK_RWORD_ENUM: return "TOK_RWORD_ENUM"; - case TOK_RWORD_TRAIT: return "TOK_RWORD_TRAIT"; - case TOK_RWORD_FN: return "TOK_RWORD_FN"; - case TOK_RWORD_USE: return "TOK_RWORD_USE"; - case TOK_RWORD_IMPL: return "TOK_RWORD_IMPL"; - case TOK_RWORD_TYPE: return "TOK_RWORD_TYPE"; - - case TOK_RWORD_WHERE: return "TOK_RWORD_WHERE"; - case TOK_RWORD_AS: return "TOK_RWORD_AS"; - - case TOK_RWORD_LET: return "TOK_RWORD_LET"; - case TOK_RWORD_MATCH: return "TOK_RWORD_MATCH"; - case TOK_RWORD_IF: return "TOK_RWORD_IF"; - case TOK_RWORD_ELSE: return "TOK_RWORD_ELSE"; - case TOK_RWORD_LOOP: return "TOK_RWORD_LOOP"; - case TOK_RWORD_WHILE: return "TOK_RWORD_WHILE"; - case TOK_RWORD_FOR: return "TOK_RWORD_FOR"; - case TOK_RWORD_IN: return "TOK_RWORD_IN"; - case TOK_RWORD_DO: return "TOK_RWORD_DO"; - - case TOK_RWORD_CONTINUE: return "TOK_RWORD_CONTINUE"; - case TOK_RWORD_BREAK: return "TOK_RWORD_BREAK"; - case TOK_RWORD_RETURN: return "TOK_RWORD_RETURN"; - case TOK_RWORD_YIELD: return "TOK_RWORD_YIELD"; - case TOK_RWORD_BOX: return "TOK_RWORD_BOX"; - case TOK_RWORD_REF: return "TOK_RWORD_REF"; - - case TOK_RWORD_FALSE: return "TOK_RWORD_FALSE"; - case TOK_RWORD_TRUE: return "TOK_RWORD_TRUE"; - case TOK_RWORD_SELF: return "TOK_RWORD_SELF"; - case TOK_RWORD_SUPER: return "TOK_RWORD_SUPER"; - - case TOK_RWORD_PROC: return "TOK_RWORD_PROC"; - case TOK_RWORD_MOVE: return "TOK_RWORD_MOVE"; - case TOK_RWORD_ONCE: return "TOK_RWORD_ONCE"; - - case TOK_RWORD_ABSTRACT: return "TOK_RWORD_ABSTRACT"; - case TOK_RWORD_FINAL: return "TOK_RWORD_FINAL"; - case TOK_RWORD_PURE: return "TOK_RWORD_PURE"; - case TOK_RWORD_OVERRIDE: return "TOK_RWORD_OVERRIDE"; - case TOK_RWORD_VIRTUAL: return "TOK_RWORD_VIRTUAL"; - - case TOK_RWORD_ALIGNOF: return "TOK_RWORD_ALIGNOF"; - case TOK_RWORD_OFFSETOF: return "TOK_RWORD_OFFSETOF"; - case TOK_RWORD_SIZEOF: return "TOK_RWORD_SIZEOF"; - case TOK_RWORD_TYPEOF: return "TOK_RWORD_TYPEOF"; - - case TOK_RWORD_BE: return "TOK_RWORD_BE"; - case TOK_RWORD_UNSIZED: return "TOK_RWORD_UNSIZED"; - } - return ">>BUGCHECK: BADTOK<<"; -} - -::std::ostream& operator<<(::std::ostream& os, Token& tok) -{ - os << Token::typestr(tok.type()) << "\"" << tok.str() << "\""; - return os; -} - -TTStream::TTStream(const TokenTree& input_tt): - m_input_tt(input_tt) -{ - m_stack.push_back( ::std::make_pair(0, &input_tt) ); -} -TTStream::~TTStream() -{ -} -Token TTStream::realGetToken() -{ - while(m_stack.size() > 0) - { - // If current index is above TT size, go up - unsigned int& idx = m_stack.back().first; - const TokenTree& tree = *m_stack.back().second; - - if(idx == 0 && tree.size() == 0) { - idx ++; - return tree.tok(); - } - - if(idx < tree.size()) - { - const TokenTree& subtree = tree[idx]; - idx ++; - if( subtree.size() == 0 ) { - return subtree.tok(); - } - else { - m_stack.push_back( ::std::make_pair(0, &subtree ) ); - } - } - else { - m_stack.pop_back(); - } - } - return Token(TOK_EOF); -} - -TokenStream::TokenStream(): - m_cache_valid(false) -{ -} -TokenStream::~TokenStream() -{ -} - -Token TokenStream::getToken() -{ - if( m_cache_valid ) - { - m_cache_valid = false; - return m_cache; - } - else - { - Token ret = this->realGetToken(); - ::std::cout << "getToken[" << typeid(*this).name() << "] - " << ret << ::std::endl; - return ret; - } -} -void TokenStream::putback(Token tok) -{ - m_cache_valid = true; - m_cache = tok; -} + throw Lexer::EndOfFile(); + } + //::std::cout << "getc(): '" << m_last_char << "'" << ::std::endl; + return m_last_char; +} + +void Lexer::putback() +{ +// ::std::cout << "putback(): " << m_last_char_valid << " '" << m_last_char << "'" << ::std::endl; + assert(!m_last_char_valid); + m_last_char_valid = true; +} + +Token::Token(): + m_type(TOK_NULL), + m_str("") +{ +} +Token::Token(enum eTokenType type): + m_type(type), + m_str("") +{ +} +Token::Token(enum eTokenType type, ::std::string str): + m_type(type), + m_str(str) +{ +} +Token::Token(uint64_t val, enum eCoreType datatype): + m_type(TOK_INTEGER), + m_datatype(datatype), + m_intval(val) +{ +} + +const char* Token::typestr(enum eTokenType type) +{ + switch(type) + { + case TOK_NULL: return "TOK_NULL"; + case TOK_EOF: return "TOK_EOF"; + + case TOK_WHITESPACE: return "TOK_WHITESPACE"; + case TOK_COMMENT: return "TOK_COMMENT"; + + // Value tokens + case TOK_IDENT: return "TOK_IDENT"; + case TOK_MACRO: return "TOK_MACRO"; + case TOK_LIFETIME: return "TOK_LIFETIME"; + case TOK_INTEGER: return "TOK_INTEGER"; + case TOK_CHAR: return "TOK_CHAR"; + case TOK_FLOAT: return "TOK_FLOAT"; + case TOK_STRING: return "TOK_STRING"; + + case TOK_CATTR_OPEN: return "TOK_CATTR_OPEN"; + case TOK_ATTR_OPEN: return "TOK_ATTR_OPEN"; + + // Symbols + case TOK_PAREN_OPEN: return "TOK_PAREN_OPEN"; case TOK_PAREN_CLOSE: return "TOK_PAREN_CLOSE"; + case TOK_BRACE_OPEN: return "TOK_BRACE_OPEN"; case TOK_BRACE_CLOSE: return "TOK_BRACE_CLOSE"; + case TOK_LT: return "TOK_LT"; case TOK_GT: return "TOK_GT"; + case TOK_SQUARE_OPEN: return "TOK_SQUARE_OPEN";case TOK_SQUARE_CLOSE: return "TOK_SQUARE_CLOSE"; + case TOK_COMMA: return "TOK_COMMA"; + case TOK_SEMICOLON: return "TOK_SEMICOLON"; + case TOK_COLON: return "TOK_COLON"; + case TOK_DOUBLE_COLON: return "TOK_DOUBLE_COLON"; + case TOK_STAR: return "TOK_STAR"; case TOK_AMP: return "TOK_AMP"; + case TOK_PIPE: return "TOK_PIPE"; + + case TOK_FATARROW: return "TOK_FATARROW"; // => + case TOK_THINARROW: return "TOK_THINARROW"; // -> + + case TOK_PLUS: return "TOK_PLUS"; case TOK_DASH: return "TOK_DASH"; + case TOK_EXCLAM: return "TOK_EXCLAM"; + case TOK_PERCENT: return "TOK_PERCENT"; + case TOK_SLASH: return "TOK_SLASH"; + + case TOK_DOT: return "TOK_DOT"; + case TOK_DOUBLE_DOT: return "TOK_DOUBLE_DOT"; + case TOK_TRIPLE_DOT: return "TOK_TRIPLE_DOT"; + + case TOK_EQUAL: return "TOK_EQUAL"; + case TOK_PLUS_EQUAL: return "TOK_PLUS_EQUAL"; + case TOK_DASH_EQUAL: return "TOK_DASH_EQUAL"; + case TOK_PERCENT_EQUAL: return "TOK_PERCENT_EQUAL"; + case TOK_SLASH_EQUAL: return "TOK_SLASH_EQUAL"; + case TOK_STAR_EQUAL: return "TOK_STAR_EQUAL"; + case TOK_AMP_EQUAL: return "TOK_AMP_EQUAL"; + case TOK_PIPE_EQUAL: return "TOK_PIPE_EQUAL"; + + case TOK_DOUBLE_EQUAL: return "TOK_DOUBLE_EQUAL"; + case TOK_EXCLAM_EQUAL: return "TOK_EXCLAM_EQUAL"; + case TOK_GTE: return "TOK_GTE"; + case TOK_LTE: return "TOK_LTE"; + + case TOK_DOUBLE_AMP: return "TOK_DOUBLE_AMP"; + case TOK_DOUBLE_PIPE: return "TOK_DOUBLE_PIPE"; + case TOK_DOUBLE_LT: return "TOK_DOUBLE_LT"; + case TOK_DOUBLE_GT: return "TOK_DOUBLE_GT"; + + case TOK_QMARK: return "TOK_QMARK"; + case TOK_AT: return "TOK_AT"; + case TOK_TILDE: return "TOK_TILDE"; + case TOK_BACKSLASH: return "TOK_BACKSLASH"; + case TOK_CARET: return "TOK_CARET"; + case TOK_BACKTICK: return "TOK_BACKTICK"; + + // Reserved Words + case TOK_RWORD_PUB: return "TOK_RWORD_PUB"; + case TOK_RWORD_PRIV: return "TOK_RWORD_PRIV"; + case TOK_RWORD_MUT: return "TOK_RWORD_MUT"; + case TOK_RWORD_CONST: return "TOK_RWORD_CONST"; + case TOK_RWORD_STATIC: return "TOK_RWORD_STATIC"; + case TOK_RWORD_UNSAFE: return "TOK_RWORD_UNSAFE"; + case TOK_RWORD_EXTERN: return "TOK_RWORD_EXTERN"; + + case TOK_RWORD_CRATE: return "TOK_RWORD_CRATE"; + case TOK_RWORD_MOD: return "TOK_RWORD_MOD"; + case TOK_RWORD_STRUCT: return "TOK_RWORD_STRUCT"; + case TOK_RWORD_ENUM: return "TOK_RWORD_ENUM"; + case TOK_RWORD_TRAIT: return "TOK_RWORD_TRAIT"; + case TOK_RWORD_FN: return "TOK_RWORD_FN"; + case TOK_RWORD_USE: return "TOK_RWORD_USE"; + case TOK_RWORD_IMPL: return "TOK_RWORD_IMPL"; + case TOK_RWORD_TYPE: return "TOK_RWORD_TYPE"; + + case TOK_RWORD_WHERE: return "TOK_RWORD_WHERE"; + case TOK_RWORD_AS: return "TOK_RWORD_AS"; + + case TOK_RWORD_LET: return "TOK_RWORD_LET"; + case TOK_RWORD_MATCH: return "TOK_RWORD_MATCH"; + case TOK_RWORD_IF: return "TOK_RWORD_IF"; + case TOK_RWORD_ELSE: return "TOK_RWORD_ELSE"; + case TOK_RWORD_LOOP: return "TOK_RWORD_LOOP"; + case TOK_RWORD_WHILE: return "TOK_RWORD_WHILE"; + case TOK_RWORD_FOR: return "TOK_RWORD_FOR"; + case TOK_RWORD_IN: return "TOK_RWORD_IN"; + case TOK_RWORD_DO: return "TOK_RWORD_DO"; + + case TOK_RWORD_CONTINUE: return "TOK_RWORD_CONTINUE"; + case TOK_RWORD_BREAK: return "TOK_RWORD_BREAK"; + case TOK_RWORD_RETURN: return "TOK_RWORD_RETURN"; + case TOK_RWORD_YIELD: return "TOK_RWORD_YIELD"; + case TOK_RWORD_BOX: return "TOK_RWORD_BOX"; + case TOK_RWORD_REF: return "TOK_RWORD_REF"; + + case TOK_RWORD_FALSE: return "TOK_RWORD_FALSE"; + case TOK_RWORD_TRUE: return "TOK_RWORD_TRUE"; + case TOK_RWORD_SELF: return "TOK_RWORD_SELF"; + case TOK_RWORD_SUPER: return "TOK_RWORD_SUPER"; + + case TOK_RWORD_PROC: return "TOK_RWORD_PROC"; + case TOK_RWORD_MOVE: return "TOK_RWORD_MOVE"; + case TOK_RWORD_ONCE: return "TOK_RWORD_ONCE"; + + case TOK_RWORD_ABSTRACT: return "TOK_RWORD_ABSTRACT"; + case TOK_RWORD_FINAL: return "TOK_RWORD_FINAL"; + case TOK_RWORD_PURE: return "TOK_RWORD_PURE"; + case TOK_RWORD_OVERRIDE: return "TOK_RWORD_OVERRIDE"; + case TOK_RWORD_VIRTUAL: return "TOK_RWORD_VIRTUAL"; + + case TOK_RWORD_ALIGNOF: return "TOK_RWORD_ALIGNOF"; + case TOK_RWORD_OFFSETOF: return "TOK_RWORD_OFFSETOF"; + case TOK_RWORD_SIZEOF: return "TOK_RWORD_SIZEOF"; + case TOK_RWORD_TYPEOF: return "TOK_RWORD_TYPEOF"; + + case TOK_RWORD_BE: return "TOK_RWORD_BE"; + case TOK_RWORD_UNSIZED: return "TOK_RWORD_UNSIZED"; + } + return ">>BUGCHECK: BADTOK<<"; +} + +::std::ostream& operator<<(::std::ostream& os, Token& tok) +{ + os << Token::typestr(tok.type()) << "\"" << tok.str() << "\""; + return os; +} + +TTStream::TTStream(const TokenTree& input_tt): + m_input_tt(input_tt) +{ + m_stack.push_back( ::std::make_pair(0, &input_tt) ); +} +TTStream::~TTStream() +{ +} +Token TTStream::realGetToken() +{ + while(m_stack.size() > 0) + { + // If current index is above TT size, go up + unsigned int& idx = m_stack.back().first; + const TokenTree& tree = *m_stack.back().second; + + if(idx == 0 && tree.size() == 0) { + idx ++; + return tree.tok(); + } + + if(idx < tree.size()) + { + const TokenTree& subtree = tree[idx]; + idx ++; + if( subtree.size() == 0 ) { + return subtree.tok(); + } + else { + m_stack.push_back( ::std::make_pair(0, &subtree ) ); + } + } + else { + m_stack.pop_back(); + } + } + return Token(TOK_EOF); +} + +TokenStream::TokenStream(): + m_cache_valid(false) +{ +} +TokenStream::~TokenStream() +{ +} + +Token TokenStream::getToken() +{ + if( m_cache_valid ) + { + m_cache_valid = false; + return m_cache; + } + else + { + Token ret = this->realGetToken(); + ::std::cout << "getToken[" << typeid(*this).name() << "] - " << ret << ::std::endl; + return ret; + } +} +void TokenStream::putback(Token tok) +{ + m_cache_valid = true; + m_cache = tok; +} -- cgit v1.2.3