summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Hodge <tpg@mutabah.net>2015-03-06 19:55:17 +0800
committerJohn Hodge <tpg@mutabah.net>2015-03-06 19:55:17 +0800
commit6d7c84bd9d8d5e2bec8bc86d22d53975bde6e833 (patch)
treedddee2de5214120a9808d2f7a367288807703b54
parent414883e7b61d14edb242d320dcf0e5dea3e75e85 (diff)
downloadmrust-6d7c84bd9d8d5e2bec8bc86d22d53975bde6e833.tar.gz
Bugfixes exposed by fixing formatted output
-rw-r--r--Makefile4
-rw-r--r--src/ast/ast.hpp1
-rw-r--r--src/ast/expr.cpp111
-rw-r--r--src/ast/expr.hpp83
-rw-r--r--src/ast/path.cpp38
-rw-r--r--src/common.hpp115
-rw-r--r--src/convert/resolve.cpp16
-rw-r--r--src/convert/typecheck_expr.cpp2
-rw-r--r--src/convert/typecheck_params.cpp2
-rw-r--r--src/dump_as_rust.cpp268
-rw-r--r--src/include/debug.hpp26
-rw-r--r--src/include/rustic.hpp100
-rw-r--r--src/main.cpp23
-rw-r--r--src/parse/lex.cpp1299
14 files changed, 1227 insertions, 861 deletions
diff --git a/Makefile b/Makefile
index e2e138dc..e2bec3f9 100644
--- a/Makefile
+++ b/Makefile
@@ -30,8 +30,8 @@ clean:
test: $(BIN) samples/1.rs
mkdir -p output/
- time $(DBG) $(BIN) samples/std.rs --emit ast -o output/std.ast 2>&1 | tee output/ast_dbg.txt
- time $(DBG) $(BIN) samples/1.rs --crate-path output/std.ast -o output/test.c 2>&1 | tee output/1_dbg.txt
+ $(DBG) $(BIN) samples/std.rs --emit ast -o output/std.ast 2>&1 | tee output/ast_dbg.txt
+ $(DBG) $(BIN) samples/1.rs --crate-path output/std.ast -o output/test.c 2>&1 | tee output/1_dbg.txt
$(BIN): $(OBJ)
@mkdir -p $(dir $@)
diff --git a/src/ast/ast.hpp b/src/ast/ast.hpp
index 261deb47..58bcd389 100644
--- a/src/ast/ast.hpp
+++ b/src/ast/ast.hpp
@@ -483,6 +483,7 @@ public:
const itemlist_use_t& imports() const { return m_imports; }
const ::std::vector<Item<TypeAlias> >& type_aliases() const { return m_type_aliases; }
const itemlist_ext_t& extern_crates() const { return m_extern_crates; }
+ const ::std::vector<Impl>& impls() const { return m_impls; }
const itemlist_static_t& statics() const { return m_statics; }
const ItemList<Trait>& traits() const { return m_traits; }
const itemlist_enum_t& enums () const { return m_enums; }
diff --git a/src/ast/expr.cpp b/src/ast/expr.cpp
index 91f77633..b4cd9240 100644
--- a/src/ast/expr.cpp
+++ b/src/ast/expr.cpp
@@ -6,8 +6,16 @@ namespace AST {
void Expr::visit_nodes(NodeVisitor& v)
{
+ assert(!!m_node);
m_node->visit(v);
}
+void Expr::visit_nodes(NodeVisitor& v) const
+{
+ assert(!!m_node);
+ assert(v.is_const());
+ //const_cast<const ExprNode*>(m_node.get())->visit(v);
+ m_node->visit(v);
+}
::std::ostream& operator<<(::std::ostream& os, const Expr& pat)
{
if( pat.m_node.get() )
@@ -72,7 +80,11 @@ SERIALISE_TYPE(Expr::, "Expr", {
ExprNode::~ExprNode() {
}
-#define NODE(class, serialise, _print) void class::visit(NodeVisitor& nv) { nv.visit(*this); } SERIALISE_TYPE_S(class, serialise) void class::print(::std::ostream& os) const _print
+#define NODE(class, serialise, _print)\
+ void class::visit(NodeVisitor& nv) { nv.visit(*this); } \
+ /*void class::visit(NodeVisitor& nv) const { nv.visit(*this); }*/ \
+ void class::print(::std::ostream& os) const _print \
+ SERIALISE_TYPE_S(class, serialise) \
ExprNode_Block::~ExprNode_Block() {
}
@@ -261,127 +273,114 @@ NODE(ExprNode_BinOp, {
})
-void NodeVisitor::visit(ExprNode_Block& node)
-{
- DEBUG("DEF - ExprNode_Block");
+#define NV(type, actions)\
+ void NodeVisitorDef::visit(type& node) { DEBUG("DEF - "#type); actions }
+// void NodeVisitorDef::visit(const type& node) { DEBUG("DEF - "#type" (const)"); actions }
+
+NV(ExprNode_Block, {
INDENT();
for( auto& child : node.m_nodes )
visit(child);
UNINDENT();
-}
-void NodeVisitor::visit(ExprNode_Macro& node)
+})
+NV(ExprNode_Macro,
{
- DEBUG("DEF - ExprNode_Macro");
-}
-void NodeVisitor::visit(ExprNode_Return& node)
+ DEBUG("TODO: Macro");
+})
+NV(ExprNode_Return,
{
- DEBUG("DEF - ExprNode_Return");
visit(node.m_value);
-}
-void NodeVisitor::visit(ExprNode_LetBinding& node)
+})
+NV(ExprNode_LetBinding,
{
- DEBUG("DEF - ExprNode_LetBinding");
// TODO: Handle recurse into Let pattern
visit(node.m_value);
-}
-void NodeVisitor::visit(ExprNode_Assign& node)
+})
+NV(ExprNode_Assign,
{
- DEBUG("DEF - ExprNode_Assign");
INDENT();
visit(node.m_slot);
visit(node.m_value);
UNINDENT();
-}
-void NodeVisitor::visit(ExprNode_CallPath& node)
+})
+NV(ExprNode_CallPath,
{
- DEBUG("DEF - ExprNode_CallPath");
INDENT();
for( auto& arg : node.m_args )
visit(arg);
UNINDENT();
-}
-void NodeVisitor::visit(ExprNode_CallMethod& node)
+})
+NV(ExprNode_CallMethod,
{
- DEBUG("DEF - ExprNode_CallMethod");
INDENT();
visit(node.m_val);
for( auto& arg : node.m_args )
visit(arg);
UNINDENT();
-}
-void NodeVisitor::visit(ExprNode_CallObject& node)
+})
+NV(ExprNode_CallObject,
{
- DEBUG("DEF - ExprNode_CallObject");
INDENT();
visit(node.m_val);
for( auto& arg : node.m_args )
visit(arg);
UNINDENT();
-}
-void NodeVisitor::visit(ExprNode_Match& node)
+})
+NV(ExprNode_Match,
{
- DEBUG("DEF - ExprNode_Match");
INDENT();
visit(node.m_val);
for( auto& arm : node.m_arms )
visit(arm.second);
UNINDENT();
-}
-void NodeVisitor::visit(ExprNode_If& node)
+})
+NV(ExprNode_If,
{
- DEBUG("DEF - ExprNode_If");
INDENT();
visit(node.m_cond);
visit(node.m_true);
visit(node.m_false);
UNINDENT();
-}
+})
-void NodeVisitor::visit(ExprNode_Integer& node)
+NV(ExprNode_Integer,
{
- DEBUG("DEF - ExprNode_Integer");
// LEAF
-}
-void NodeVisitor::visit(ExprNode_StructLiteral& node)
+})
+NV(ExprNode_StructLiteral,
{
- DEBUG("DEF - ExprNode_StructLiteral");
visit(node.m_base_value);
for( auto& val : node.m_values )
visit(val.second);
-}
-void NodeVisitor::visit(ExprNode_Tuple& node)
+})
+NV(ExprNode_Tuple,
{
- DEBUG("DEF - ExprNode_Tuple");
for( auto& val : node.m_values )
visit(val);
-}
-void NodeVisitor::visit(ExprNode_NamedValue& node)
+})
+NV(ExprNode_NamedValue,
{
- DEBUG("DEF - ExprNode_NamedValue");
// LEAF
-}
+})
-void NodeVisitor::visit(ExprNode_Field& node)
+NV(ExprNode_Field,
{
- DEBUG("DEF - ExprNode_Field");
visit(node.m_obj);
-}
-void NodeVisitor::visit(ExprNode_Deref& node)
+})
+NV(ExprNode_Deref,
{
- DEBUG("DEF - ExprNode_Deref");
visit(node.m_value);
-}
-void NodeVisitor::visit(ExprNode_Cast& node)
+})
+NV(ExprNode_Cast,
{
- DEBUG("DEF - ExprNode_Cast");
visit(node.m_value);
-}
-void NodeVisitor::visit(ExprNode_BinOp& node)
+})
+NV(ExprNode_BinOp,
{
- DEBUG("DEF - ExprNode_BinOp");
visit(node.m_left);
visit(node.m_right);
-}
+})
+#undef NV
};
diff --git a/src/ast/expr.hpp b/src/ast/expr.hpp
index 2744b75e..bbb82bde 100644
--- a/src/ast/expr.hpp
+++ b/src/ast/expr.hpp
@@ -25,6 +25,7 @@ public:
virtual ~ExprNode() = 0;
virtual void visit(NodeVisitor& nv) = 0;
+ //virtual void visit(NodeVisitor& nv) const = 0;
virtual void print(::std::ostream& os) const = 0;
TypeRef& get_res_type() { return m_res_type; }
@@ -33,7 +34,11 @@ public:
static ::std::unique_ptr<ExprNode> from_deserialiser(Deserialiser& d);
};
-#define NODE_METHODS() virtual void visit(NodeVisitor& nv) override; virtual void print(::std::ostream& os) const override; SERIALISABLE_PROTOTYPES();
+#define NODE_METHODS() \
+ virtual void visit(NodeVisitor& nv) override;\
+ virtual void print(::std::ostream& os) const override; \
+ SERIALISABLE_PROTOTYPES();/* \
+ virtual void visit(NodeVisitor& nv) const override;*/
struct ExprNode_Block:
public ExprNode
@@ -336,27 +341,65 @@ public:
if(cnode.get())
cnode->visit(*this);
}
+ virtual bool is_const() const { return false; }
+
+ #define NT(nt) \
+ virtual void visit(nt& node) = 0/*; \
+ virtual void visit(const nt& node) = 0*/
+ NT(ExprNode_Block);
+ NT(ExprNode_Macro);
+ NT(ExprNode_Return);
+ NT(ExprNode_LetBinding);
+ NT(ExprNode_Assign);
+ NT(ExprNode_CallPath);
+ NT(ExprNode_CallMethod);
+ NT(ExprNode_CallObject);
+ NT(ExprNode_Match);
+ NT(ExprNode_If);
- virtual void visit(ExprNode_Block& node);
- virtual void visit(ExprNode_Macro& node);
- virtual void visit(ExprNode_Return& node);
- virtual void visit(ExprNode_LetBinding& node);
- virtual void visit(ExprNode_Assign& node);
- virtual void visit(ExprNode_CallPath& node);
- virtual void visit(ExprNode_CallMethod& node);
- virtual void visit(ExprNode_CallObject& node);
- virtual void visit(ExprNode_Match& node);
- virtual void visit(ExprNode_If& node);
+ NT(ExprNode_Integer);
+ NT(ExprNode_StructLiteral);
+ NT(ExprNode_Tuple);
+ NT(ExprNode_NamedValue);
- virtual void visit(ExprNode_Integer& node);
- virtual void visit(ExprNode_StructLiteral& node);
- virtual void visit(ExprNode_Tuple& node);
- virtual void visit(ExprNode_NamedValue& node);
+ NT(ExprNode_Field);
+ NT(ExprNode_Deref);
+ NT(ExprNode_Cast);
+ NT(ExprNode_BinOp);
+ #undef NT
+};
+class NodeVisitorDef:
+ public NodeVisitor
+{
+public:
+ inline void visit(const unique_ptr<ExprNode>& cnode) {
+ if(cnode.get())
+ cnode->visit(*this);
+ }
+ #define NT(nt) \
+ virtual void visit(nt& node) override;/* \
+ virtual void visit(const nt& node) override*/
+ NT(ExprNode_Block);
+ NT(ExprNode_Macro);
+ NT(ExprNode_Return);
+ NT(ExprNode_LetBinding);
+ NT(ExprNode_Assign);
+ NT(ExprNode_CallPath);
+ NT(ExprNode_CallMethod);
+ NT(ExprNode_CallObject);
+ NT(ExprNode_Match);
+ NT(ExprNode_If);
+
+ NT(ExprNode_Integer);
+ NT(ExprNode_StructLiteral);
+ NT(ExprNode_Tuple);
+ NT(ExprNode_NamedValue);
- virtual void visit(ExprNode_Field& node);
- virtual void visit(ExprNode_Deref& node);
- virtual void visit(ExprNode_Cast& node);
- virtual void visit(ExprNode_BinOp& node);
+ NT(ExprNode_Field);
+ NT(ExprNode_Deref);
+ NT(ExprNode_Cast);
+ NT(ExprNode_BinOp);
+ #undef NT
};
class Expr:
@@ -379,8 +422,10 @@ public:
bool is_valid() const { return m_node.get() != nullptr; }
ExprNode& node() { assert(m_node.get()); return *m_node; }
+ const ExprNode& node() const { assert(m_node.get()); return *m_node; }
::std::shared_ptr<ExprNode> take_node() { assert(m_node.get()); return ::std::move(m_node); }
void visit_nodes(NodeVisitor& v);
+ void visit_nodes(NodeVisitor& v) const;
friend ::std::ostream& operator<<(::std::ostream& os, const Expr& pat);
diff --git a/src/ast/path.cpp b/src/ast/path.cpp
index cd068694..f3fc5e82 100644
--- a/src/ast/path.cpp
+++ b/src/ast/path.cpp
@@ -30,9 +30,6 @@ bool PathNode::operator==(const PathNode& x) const
return m_name == x.m_name && m_params == x.m_params;
}
::std::ostream& operator<<(::std::ostream& os, const PathNode& pn) {
- #if PRETTY_PATH_PRINT
- os << "::";
- #endif
os << pn.m_name;
if( pn.m_params.size() )
{
@@ -66,6 +63,8 @@ void Path::resolve(const Crate& root_crate)
throw ParseError::BugCheck("Calling Path::resolve on non-absolute path");
DEBUG("m_crate = '" << m_crate << "'");
+ unsigned int slice_from = 0; // Used when rewriting the path to be relative to its crate root
+
const Module* mod = &root_crate.get_root_module(m_crate);
for(unsigned int i = 0; i < m_nodes.size(); i ++ )
{
@@ -97,6 +96,8 @@ void Path::resolve(const Crate& root_crate)
DEBUG("Extern crate '" << node.name() << "' = '" << it->data << "'");
if( node.args().size() )
throw ParseError::Generic("Generic params applied to extern crate");
+ m_crate = it->data;
+ slice_from = i+1;
mod = &root_crate.get_root_module(it->data);
continue;
}
@@ -127,7 +128,7 @@ void Path::resolve(const Crate& root_crate)
if( is_last ) {
m_binding_type = ALIAS;
m_binding.alias_ = &it->data;
- return ;
+ goto ret;
}
else {
throw ParseError::Todo("Path::resolve() type method");
@@ -145,7 +146,7 @@ void Path::resolve(const Crate& root_crate)
if( is_last ) {
m_binding_type = FUNCTION;
m_binding.func_ = &it->data;
- return ;
+ goto ret;
}
else {
throw ParseError::Generic("Import of function, too many extra nodes");
@@ -163,7 +164,7 @@ void Path::resolve(const Crate& root_crate)
if( is_last ) {
m_binding_type = TRAIT;
m_binding.trait_ = &it->data;
- return;
+ goto ret;
}
else if( is_sec_last ) {
throw ParseError::Todo("Path::resolve() trait method");
@@ -182,7 +183,7 @@ void Path::resolve(const Crate& root_crate)
DEBUG("Found struct");
if( is_last ) {
bind_struct(it->data, node.args());
- return;
+ goto ret;
}
else if( is_sec_last ) {
throw ParseError::Todo("Path::resolve() struct method");
@@ -201,11 +202,11 @@ void Path::resolve(const Crate& root_crate)
DEBUG("Found enum");
if( is_last ) {
bind_enum(it->data, node.args());
- return ;
+ goto ret;
}
else if( is_sec_last ) {
bind_enum_var(it->data, m_nodes[i+1].name(), node.args());
- return ;
+ goto ret;
}
else {
throw ParseError::Generic("Binding path to enum, too many extra nodes");
@@ -223,7 +224,7 @@ void Path::resolve(const Crate& root_crate)
if( node.args().size() )
throw ParseError::Generic("Unexpected generic params on static/const");
bind_static(it->data);
- return ;
+ goto ret;
}
else {
throw ParseError::Generic("Binding path to static, trailing nodes");
@@ -236,6 +237,13 @@ void Path::resolve(const Crate& root_crate)
// We only reach here if the path points to a module
bind_module(*mod);
+ret:
+ if( slice_from > 0 )
+ {
+ DEBUG("Removing " << slice_from << " nodes to rebase path to crate root");
+ m_nodes.erase(m_nodes.begin(), m_nodes.begin()+slice_from);
+ }
+ return ;
}
void Path::bind_module(const Module& mod)
{
@@ -340,12 +348,22 @@ void Path::print_pretty(::std::ostream& os) const
case Path::RELATIVE:
os << "self";
for(const auto& n : path.m_nodes)
+ {
+ #if PRETTY_PATH_PRINT
+ os << "::";
+ #endif
os << n;
+ }
break;
case Path::ABSOLUTE:
os << "{"<<path.m_crate<<"}";
for(const auto& n : path.m_nodes)
+ {
+ #if PRETTY_PATH_PRINT
+ os << "::";
+ #endif
os << n;
+ }
break;
case Path::LOCAL:
os << path.m_nodes[0].name();
diff --git a/src/common.hpp b/src/common.hpp
index 0736f2fc..59035c4b 100644
--- a/src/common.hpp
+++ b/src/common.hpp
@@ -9,121 +9,10 @@
#include <cassert>
#include <sstream>
-extern int g_debug_indent_level;
-
#define FMT(ss) (dynamic_cast< ::std::stringstream&>(::std::stringstream() << ss).str())
-#define INDENT() do { g_debug_indent_level += 1; } while(0)
-#define UNINDENT() do { g_debug_indent_level -= 1; } while(0)
-#define DEBUG(ss) do{ ::std::cerr << ::RepeatLitStr{" ", g_debug_indent_level} << __FUNCTION__ << ": " << ss << ::std::endl; } while(0)
-
-struct RepeatLitStr
-{
- const char *s;
- int n;
-
- friend ::std::ostream& operator<<(::std::ostream& os, const RepeatLitStr& r) {
- for(int i = 0; i < r.n; i ++ )
- os << r.s;
- return os;
- }
-};
-
-template<typename T>
-class slice
-{
- T* m_first;
- unsigned int m_len;
-public:
- slice(::std::vector<T>& v):
- m_first(&v[0]),
- m_len(v.size())
- {}
-
- ::std::vector<T> to_vec() const {
- return ::std::vector<T>(begin(), end());
- }
-
- unsigned int size() const {
- return m_len;
- }
- T& operator[](unsigned int i) const {
- assert(i < m_len);
- return m_first[i];
- }
-
- T* begin() const { return m_first; }
- T* end() const { return m_first + m_len; }
-};
-
-template<typename T>
-::std::ostream& operator<<(::std::ostream& os, slice<T> s) {
- if( s.size() > 0 )
- {
- bool is_first = true;
- for( const auto& i : s )
- {
- if(!is_first)
- os << ", ";
- is_first = false;
- os << i;
- }
- }
- return os;
-}
-
-namespace rust {
-
-template<typename T>
-class option
-{
- bool m_set;
- T m_data;
-public:
- option(T ent):
- m_set(true),
- m_data( ::std::move(ent) )
- {}
- option():
- m_set(false)
- {}
-
- bool is_none() const { return !m_set; }
- bool is_some() const { return m_set; }
-
- const T& unwrap() const {
- assert(is_some());
- return m_data;
- }
-};
-template<typename T>
-class option<T&>
-{
- T* m_ptr;
-public:
- option(T& ent):
- m_ptr(&ent)
- {}
- option():
- m_ptr(nullptr)
- {}
-
- bool is_none() const { return m_ptr == nullptr; }
- bool is_some() const { return m_ptr != nullptr; }
- T& unwrap() const {
- assert(is_some());
- return *m_ptr;
- }
-};
-template<typename T>
-option<T> Some(T data) {
- return option<T>( ::std::move(data) );
-}
-template<typename T>
-option<T> None() {
- return option<T>( );
-}
-};
+#include "include/debug.hpp"
+#include "include/rustic.hpp" // slice and option
namespace std {
diff --git a/src/convert/resolve.cpp b/src/convert/resolve.cpp
index 3c10a133..cba7d375 100644
--- a/src/convert/resolve.cpp
+++ b/src/convert/resolve.cpp
@@ -70,7 +70,7 @@ public:
void ResolvePaths(AST::Crate& crate);
class CResolvePaths_NodeVisitor:
- public AST::NodeVisitor
+ public AST::NodeVisitorDef
{
CPathResolver& m_res;
public:
@@ -92,8 +92,8 @@ public:
m_res.handle_path(node.m_path, CASTIterator::MODE_EXPR);
}
void visit(AST::ExprNode_CallPath& node) {
- DEBUG("ExprNode_CallPath");
- AST::NodeVisitor::visit(node);
+ DEBUG("ExprNode_CallPath - " << node);
+ AST::NodeVisitorDef::visit(node);
m_res.handle_path(node.m_path, CASTIterator::MODE_EXPR);
}
@@ -171,11 +171,13 @@ void CPathResolver::handle_path(AST::Path& path, CASTIterator::PathMode mode)
if( path.is_absolute() )
{
DEBUG("Absolute - binding");
+ INDENT();
// Already absolute, our job is done
// - However, if the path isn't bound, bind it
if( !path.is_bound() ) {
path.resolve(m_crate);
}
+ UNINDENT();
}
else if( path.is_relative() )
{
@@ -492,17 +494,23 @@ void ResolvePaths(AST::Crate& crate)
{
DEBUG(" >>>");
// Pre-process external crates to tag all paths
+ DEBUG(" --- Extern crates");
+ INDENT();
for(auto& ec : crate.extern_crates())
{
SetCrateName_Mod(crate, ec.first, ec.second.root_module());
}
+ UNINDENT();
// Handle 'use' statements in an initial parss
+ DEBUG(" --- Use Statements");
+ INDENT();
ResolvePaths_HandleModule_Use(crate, AST::Path(AST::Path::TagAbsolute()), crate.root_module());
- DEBUG(" ---");
+ UNINDENT();
// Then do path resolution on all other items
CPathResolver pr(crate);
+ DEBUG(" ---");
pr.handle_module(AST::Path(AST::Path::TagAbsolute()), crate.root_module());
DEBUG(" <<<");
}
diff --git a/src/convert/typecheck_expr.cpp b/src/convert/typecheck_expr.cpp
index 852f1c12..49a17243 100644
--- a/src/convert/typecheck_expr.cpp
+++ b/src/convert/typecheck_expr.cpp
@@ -62,7 +62,7 @@ private:
void iterate_traits(::std::function<bool(const TypeRef& trait)> fcn);
};
class CTC_NodeVisitor:
- public AST::NodeVisitor
+ public AST::NodeVisitorDef
{
CTypeChecker& m_tc;
public:
diff --git a/src/convert/typecheck_params.cpp b/src/convert/typecheck_params.cpp
index 7a682ce7..109713d4 100644
--- a/src/convert/typecheck_params.cpp
+++ b/src/convert/typecheck_params.cpp
@@ -26,7 +26,7 @@ private:
};
class CNodeVisitor:
- public AST::NodeVisitor
+ public AST::NodeVisitorDef
{
CGenericParamChecker& m_pc;
public:
diff --git a/src/dump_as_rust.cpp b/src/dump_as_rust.cpp
index 1a0b426d..ade350e8 100644
--- a/src/dump_as_rust.cpp
+++ b/src/dump_as_rust.cpp
@@ -26,7 +26,193 @@ public:
void handle_function(const AST::Item<AST::Function>& f);
+ virtual bool is_const() const override { return true; }
+ virtual void visit(AST::ExprNode_Block& n) override {
+ m_os << "{";
+ inc_indent();
+ bool is_first = true;
+ for( auto& child : n.m_nodes )
+ {
+ if(is_first) {
+ is_first = false;
+ } else {
+ m_os << ";";
+ }
+ m_os << "\n";
+ m_os << indent();
+ if( !child.get() )
+ m_os << "/* nil */";
+ else
+ AST::NodeVisitor::visit(child);
+ }
+ m_os << "\n";
+ dec_indent();
+ m_os << indent() << "}";
+ }
+ virtual void visit(AST::ExprNode_Macro& n) override {
+ m_os << n.m_name << "!( /* TODO: Macro TT */ )";
+ }
+ virtual void visit(AST::ExprNode_Return& n) override {
+ m_os << "return ";
+ AST::NodeVisitor::visit(n.m_value);
+ }
+ virtual void visit(AST::ExprNode_LetBinding& n) override {
+ m_os << "let ";
+ print_pattern(n.m_pat);
+ m_os << " = ";
+ AST::NodeVisitor::visit(n.m_value);
+ }
+ virtual void visit(AST::ExprNode_Assign& n) override {
+ AST::NodeVisitor::visit(n.m_slot);
+ m_os << " = ";
+ AST::NodeVisitor::visit(n.m_value);
+ }
+ virtual void visit(AST::ExprNode_CallPath& n) override {
+ m_os << n.m_path;
+ m_os << "(";
+ bool is_first = true;
+ for( auto& arg : n.m_args )
+ {
+ if(is_first) {
+ is_first = false;
+ } else {
+ m_os << ", ";
+ }
+ AST::NodeVisitor::visit(arg);
+ }
+ m_os << ")";
+ }
+ virtual void visit(AST::ExprNode_CallMethod& n) override {
+ m_os << "(";
+ AST::NodeVisitor::visit(n.m_val);
+ m_os << ")." << n.m_method;
+ m_os << "(";
+ bool is_first = true;
+ for( auto& arg : n.m_args )
+ {
+ if(is_first) {
+ is_first = false;
+ } else {
+ m_os << ", ";
+ }
+ AST::NodeVisitor::visit(arg);
+ }
+ m_os << ")";
+ }
+ virtual void visit(AST::ExprNode_CallObject&) override {
+ throw ::std::runtime_error("unimplemented ExprNode_CallObject");
+ }
+ virtual void visit(AST::ExprNode_Match& n) override {
+ m_os << "match ";
+ AST::NodeVisitor::visit(n.m_val);
+ m_os << " {\n";
+ inc_indent();
+
+ for( auto& arm : n.m_arms )
+ {
+ m_os << indent();
+ print_pattern( arm.first );
+ m_os << " => ";
+ AST::NodeVisitor::visit(arm.second);
+ m_os << ",\n";
+ }
+
+ m_os << indent() << "}";
+ dec_indent();
+ }
+ virtual void visit(AST::ExprNode_If& n) override {
+ m_os << "if ";
+ AST::NodeVisitor::visit(n.m_cond);
+ m_os << " ";
+ AST::NodeVisitor::visit(n.m_true);
+ if(n.m_false.get())
+ {
+ m_os << " else ";
+ AST::NodeVisitor::visit(n.m_false);
+ }
+ }
+ virtual void visit(AST::ExprNode_Integer& n) override {
+ switch(n.m_datatype)
+ {
+ }
+ m_os << "0x" << ::std::hex << n.m_value << ::std::dec;
+ }
+ virtual void visit(AST::ExprNode_StructLiteral& n) override {
+ m_os << n.m_path << " {\n";
+ inc_indent();
+ for( const auto& i : n.m_values )
+ {
+ m_os << indent() << i.first << ": ";
+ AST::NodeVisitor::visit(i.second);
+ m_os << ",\n";
+ }
+ if( n.m_base_value.get() )
+ {
+ m_os << indent() << ".. ";
+ AST::NodeVisitor::visit(n.m_base_value);
+ m_os << "\n";
+ }
+ m_os << indent() << "}";
+ dec_indent();
+ }
+ virtual void visit(AST::ExprNode_Tuple& n) override {
+ m_os << "(";
+ for( auto& item : n.m_values )
+ {
+ AST::NodeVisitor::visit(item);
+ m_os << ", ";
+ }
+ m_os << ")";
+ }
+ virtual void visit(AST::ExprNode_NamedValue& n) override {
+ m_os << n.m_path;
+ }
+ virtual void visit(AST::ExprNode_Field& n) override {
+ m_os << "(";
+ AST::NodeVisitor::visit(n.m_obj);
+ m_os << ")." << n.m_name;
+ }
+ virtual void visit(AST::ExprNode_Deref&) override {
+ throw ::std::runtime_error("unimplemented ExprNode_Deref");
+ }
+ virtual void visit(AST::ExprNode_Cast& n) override {
+ AST::NodeVisitor::visit(n.m_value);
+ m_os << " as " << n.m_type;
+ }
+ virtual void visit(AST::ExprNode_BinOp& n) override {
+ #define IS(v, c) (dynamic_cast<c*>(&v) != 0)
+ if( IS(*n.m_left, AST::ExprNode_Cast) )
+ paren_wrap(n.m_left);
+ else if( IS(*n.m_left, AST::ExprNode_BinOp) )
+ paren_wrap(n.m_left);
+ else
+ AST::NodeVisitor::visit(n.m_left);
+ m_os << " ";
+ switch(n.m_type)
+ {
+ case AST::ExprNode_BinOp::CMPEQU: m_os << "=="; break;
+ case AST::ExprNode_BinOp::CMPNEQU:m_os << "!="; break;
+ case AST::ExprNode_BinOp::BITAND: m_os << "&"; break;
+ case AST::ExprNode_BinOp::BITOR: m_os << "|"; break;
+ case AST::ExprNode_BinOp::BITXOR: m_os << "^"; break;
+ case AST::ExprNode_BinOp::SHL: m_os << "<<"; break;
+ case AST::ExprNode_BinOp::SHR: m_os << ">>"; break;
+ }
+ m_os << " ";
+ if( IS(*n.m_right, AST::ExprNode_BinOp) )
+ paren_wrap(n.m_right);
+ else
+ AST::NodeVisitor::visit(n.m_right);
+ }
+
+
private:
+ void paren_wrap(::std::unique_ptr<AST::ExprNode>& node) {
+ m_os << "(";
+ AST::NodeVisitor::visit(node);
+ m_os << ")";
+ }
+
void print_params(const AST::TypeParams& params);
void print_bounds(const AST::TypeParams& params);
void print_pattern(const AST::Pattern& p);
@@ -45,11 +231,32 @@ void Dump_Rust(const char *Filename, const AST::Crate& crate)
void RustPrinter::handle_module(const AST::Module& mod)
{
- m_os << "\n";
+ bool need_nl = true;
+
+ for( const auto& i : mod.imports() )
+ {
+ //if(need_nl) {
+ // m_os << "\n";
+ // need_nl = false;
+ //}
+ m_os << indent() << (i.is_pub ? "pub " : "") << "use " << i.data;
+ if( i.name == "" )
+ {
+ m_os << "::*";
+ }
+ else if( i.data.nodes().back().name() != i.name )
+ {
+ m_os << " as " << i.name;
+ }
+ m_os << ";\n";
+ }
+ need_nl = true;
for( const auto& sm : mod.submods() )
{
- m_os << indent() << (sm.second ? "pub " : "") << "mod " << sm.first.name() << " {\n";
+ m_os << "\n";
+ m_os << indent() << (sm.second ? "pub " : "") << "mod " << sm.first.name() << "\n";
+ m_os << indent() << "{\n";
inc_indent();
handle_module(sm.first);
dec_indent();
@@ -57,27 +264,47 @@ void RustPrinter::handle_module(const AST::Module& mod)
m_os << "\n";
}
+ for( const auto& i : mod.type_aliases() )
+ {
+ if(need_nl) {
+ m_os << "\n";
+ need_nl = false;
+ }
+ m_os << indent() << (i.is_pub ? "pub " : "") << "type " << i.name;
+ print_params(i.data.params());
+ m_os << " = " << i.data.type();
+ print_bounds(i.data.params());
+ m_os << ";\n";
+ }
+ need_nl = true;
for( const auto& i : mod.structs() )
{
+ m_os << "\n";
m_os << indent() << (i.is_pub ? "pub " : "") << "struct " << i.name;
handle_struct(i.data);
}
for( const auto& i : mod.enums() )
{
+ m_os << "\n";
m_os << indent() << (i.is_pub ? "pub " : "") << "enum " << i.name;
handle_enum(i.data);
}
for( const auto& i : mod.traits() )
{
+ m_os << "\n";
m_os << indent() << (i.is_pub ? "pub " : "") << "trait " << i.name;
handle_trait(i.data);
}
for( const auto& i : mod.statics() )
{
+ if(need_nl) {
+ m_os << "\n";
+ need_nl = false;
+ }
m_os << indent() << (i.is_pub ? "pub " : "");
switch( i.data.s_class() )
{
@@ -86,14 +313,41 @@ void RustPrinter::handle_module(const AST::Module& mod)
case AST::Static::MUT: m_os << "static mut "; break;
}
m_os << i.name << ": " << i.data.type() << " = ";
- //handle_expr(i.data.value());
+ i.data.value().visit_nodes(*this);
m_os << ";\n";
}
for( const auto& i : mod.functions() )
{
+ m_os << "\n";
handle_function(i);
}
+
+ for( const auto& i : mod.impls() )
+ {
+ m_os << "\n";
+ m_os << indent() << "impl";
+ print_params(i.params());
+ if( i.trait() != TypeRef() )
+ {
+ m_os << " " << i.trait() << " for";
+ }
+ m_os << " " << i.type() << "\n";
+
+ print_bounds(i.params());
+ m_os << indent() << "{\n";
+ inc_indent();
+ for( const auto& t : i.types() )
+ {
+ m_os << indent() << "type " << t.name << " = " << t.data << ";\n";
+ }
+ for( const auto& t : i.functions() )
+ {
+ handle_function(t);
+ }
+ dec_indent();
+ m_os << indent() << "}\n";
+ }
}
void RustPrinter::print_params(const AST::TypeParams& params)
@@ -230,7 +484,7 @@ void RustPrinter::handle_trait(const AST::Trait& s)
for( const auto& i : s.types() )
{
- m_os << indent() << "type " << i.name << "\n";
+ m_os << indent() << "type " << i.name << ";\n";
}
for( const auto& i : s.functions() )
{
@@ -244,6 +498,7 @@ void RustPrinter::handle_trait(const AST::Trait& s)
void RustPrinter::handle_function(const AST::Item<AST::Function>& f)
{
+ m_os << "\n";
m_os << indent() << (f.is_pub ? "pub " : "") << "fn " << f.name;
print_params(f.data.params());
m_os << "(";
@@ -267,7 +522,10 @@ void RustPrinter::handle_function(const AST::Item<AST::Function>& f)
m_os << "\n";
print_bounds(f.data.params());
- m_os << indent() << f.data.code() << "\n";
+ m_os << indent();
+ f.data.code().visit_nodes(*this);
+ m_os << "\n";
+ //m_os << indent() << f.data.code() << "\n";
}
else
{
diff --git a/src/include/debug.hpp b/src/include/debug.hpp
new file mode 100644
index 00000000..012ef1bb
--- /dev/null
+++ b/src/include/debug.hpp
@@ -0,0 +1,26 @@
+/*
+ */
+#pragma once
+#include <sstream>
+
+extern int g_debug_indent_level;
+
+#define INDENT() do { g_debug_indent_level += 1; } while(0)
+#define UNINDENT() do { g_debug_indent_level -= 1; } while(0)
+#define DEBUG(ss) do{ if(debug_enabled()) { debug_output(g_debug_indent_level, __FUNCTION__) << ss << ::std::endl; } } while(0)
+
+extern bool debug_enabled();
+extern ::std::ostream& debug_output(int indent, const char* function);
+
+struct RepeatLitStr
+{
+ const char *s;
+ int n;
+
+ friend ::std::ostream& operator<<(::std::ostream& os, const RepeatLitStr& r) {
+ for(int i = 0; i < r.n; i ++ )
+ os << r.s;
+ return os;
+ }
+};
+
diff --git a/src/include/rustic.hpp b/src/include/rustic.hpp
new file mode 100644
index 00000000..0e46777d
--- /dev/null
+++ b/src/include/rustic.hpp
@@ -0,0 +1,100 @@
+/*
+ */
+#pragma once
+
+template<typename T>
+class slice
+{
+ T* m_first;
+ unsigned int m_len;
+public:
+ slice(::std::vector<T>& v):
+ m_first(&v[0]),
+ m_len(v.size())
+ {}
+
+ ::std::vector<T> to_vec() const {
+ return ::std::vector<T>(begin(), end());
+ }
+
+ unsigned int size() const {
+ return m_len;
+ }
+ T& operator[](unsigned int i) const {
+ assert(i < m_len);
+ return m_first[i];
+ }
+
+ T* begin() const { return m_first; }
+ T* end() const { return m_first + m_len; }
+};
+
+template<typename T>
+::std::ostream& operator<<(::std::ostream& os, slice<T> s) {
+ if( s.size() > 0 )
+ {
+ bool is_first = true;
+ for( const auto& i : s )
+ {
+ if(!is_first)
+ os << ", ";
+ is_first = false;
+ os << i;
+ }
+ }
+ return os;
+}
+
+namespace rust {
+
+template<typename T>
+class option
+{
+ bool m_set;
+ T m_data;
+public:
+ option(T ent):
+ m_set(true),
+ m_data( ::std::move(ent) )
+ {}
+ option():
+ m_set(false)
+ {}
+
+ bool is_none() const { return !m_set; }
+ bool is_some() const { return m_set; }
+
+ const T& unwrap() const {
+ assert(is_some());
+ return m_data;
+ }
+};
+template<typename T>
+class option<T&>
+{
+ T* m_ptr;
+public:
+ option(T& ent):
+ m_ptr(&ent)
+ {}
+ option():
+ m_ptr(nullptr)
+ {}
+
+ bool is_none() const { return m_ptr == nullptr; }
+ bool is_some() const { return m_ptr != nullptr; }
+ T& unwrap() const {
+ assert(is_some());
+ return *m_ptr;
+ }
+};
+template<typename T>
+option<T> Some(T data) {
+ return option<T>( ::std::move(data) );
+}
+template<typename T>
+option<T> None() {
+ return option<T>( );
+}
+
+};
diff --git a/src/main.cpp b/src/main.cpp
index 9e533139..6aeba895 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -11,6 +11,17 @@
int g_debug_indent_level = 0;
+::std::string g_cur_phase;
+
+bool debug_enabled()
+{
+
+ return true;
+}
+::std::ostream& debug_output(int indent, const char* function)
+{
+ return ::std::cout << g_cur_phase << "- " << RepeatLitStr { " ", indent } << function << ": ";
+}
/// main!
int main(int argc, char *argv[])
@@ -79,26 +90,35 @@ int main(int argc, char *argv[])
//Serialiser& s = s_tt;
try
{
+ g_cur_phase = "Parse";
AST::Crate crate = Parse_Crate(infile);
+ g_cur_phase = "PostParse";
crate.post_parse();
//s << crate;
// Resolve names to be absolute names (include references to the relevant struct/global/function)
+ g_cur_phase = "Resolve";
ResolvePaths(crate);
//s << crate;
// Typecheck / type propagate module (type annotations of all values)
// - Check all generic conditions (ensure referenced trait is valid)
// > Also mark parameter with applicable traits
+ #if 0
+ g_cur_phase = "TypecheckBounds";
Typecheck_GenericBounds(crate);
// - Check all generic parameters match required conditions
+ g_cur_phase = "TypecheckParams";
Typecheck_GenericParams(crate);
// - Typecheck statics and consts
// - Typecheck + propagate functions
// > Forward pass first
- Typecheck_Expr(crate);
+ //g_cur_phase = "TypecheckExpr";
+ //Typecheck_Expr(crate);
+ #endif
+ g_cur_phase = "Output";
Dump_Rust( FMT(outfile << ".rs").c_str(), crate );
if( strcmp(emit_type, "ast") == 0 )
@@ -109,6 +129,7 @@ int main(int argc, char *argv[])
return 0;
}
// Flatten modules into "mangled" set
+ g_cur_phase = "Flatten";
AST::Flat flat_crate = Convert_Flatten(crate);
// Convert structures to C structures / tagged enums
diff --git a/src/parse/lex.cpp b/src/parse/lex.cpp
index 7dd8eb3c..2d6873d0 100644
--- a/src/parse/lex.cpp
+++ b/src/parse/lex.cpp
@@ -1,652 +1,653 @@
-/*
- * "MRustC" - Primitive rust compiler in C++
- */
-/**
- * \file parse/lex.cpp
- * \brief Low-level lexer
- */
-#include "lex.hpp"
-#include "tokentree.hpp"
-#include "parseerror.hpp"
-#include <cassert>
-#include <iostream>
-#include <cstdlib> // strtol
-#include <typeinfo>
-
-Lexer::Lexer(::std::string filename):
- m_istream(filename.c_str()),
- m_last_char_valid(false)
-{
- if( !m_istream.is_open() )
- {
- throw ::std::runtime_error("Unable to open file");
- }
-}
-
-#define LINECOMMENT -1
-#define BLOCKCOMMENT -2
-#define SINGLEQUOTE -3
-#define DOUBLEQUOTE -4
-
-// NOTE: This array must be kept reverse sorted
-#define TOKENT(str, sym) {sizeof(str)-1, str, sym}
-static const struct {
- unsigned char len;
- const char* chars;
- signed int type;
-} TOKENMAP[] = {
- TOKENT("!" , TOK_EXCLAM),
- TOKENT("!=", TOK_EXCLAM_EQUAL),
- TOKENT("\"", DOUBLEQUOTE),
- TOKENT("#", 0),
- TOKENT("#![",TOK_CATTR_OPEN),
- TOKENT("#[", TOK_ATTR_OPEN),
- //TOKENT("$", 0),
- TOKENT("%" , TOK_PERCENT),
- TOKENT("%=", TOK_PERCENT_EQUAL),
- TOKENT("&" , TOK_AMP),
- TOKENT("&&", TOK_DOUBLE_AMP),
- TOKENT("&=", TOK_AMP_EQUAL),
- TOKENT("'" , SINGLEQUOTE),
- TOKENT("(" , TOK_PAREN_OPEN),
- TOKENT(")" , TOK_PAREN_CLOSE),
- TOKENT("*" , TOK_STAR),
- TOKENT("*=", TOK_STAR_EQUAL),
- TOKENT("+" , TOK_PLUS),
- TOKENT("+=", TOK_PLUS_EQUAL),
- TOKENT("," , TOK_COMMA),
- TOKENT("-" , TOK_DASH),
- TOKENT("-=", TOK_DASH_EQUAL),
- TOKENT("->", TOK_THINARROW),
- TOKENT(".", TOK_DOT),
- TOKENT("..", TOK_DOUBLE_DOT),
- TOKENT("...",TOK_TRIPLE_DOT),
- TOKENT("/" , TOK_SLASH),
- TOKENT("/*", BLOCKCOMMENT),
- TOKENT("//", LINECOMMENT),
- TOKENT("/=", TOK_SLASH_EQUAL),
- // 0-9 :: Elsewhere
- TOKENT(":", TOK_COLON),
- TOKENT("::", TOK_DOUBLE_COLON),
- TOKENT(";", TOK_SEMICOLON),
- TOKENT("<", TOK_LT),
- TOKENT("<<", TOK_DOUBLE_LT),
- TOKENT("<=", TOK_LTE),
- TOKENT("=" , TOK_EQUAL),
- TOKENT("==", TOK_DOUBLE_EQUAL),
- TOKENT("=>", TOK_FATARROW),
- TOKENT(">", TOK_GT),
- TOKENT(">>", TOK_DOUBLE_GT),
- TOKENT(">=", TOK_GTE),
- TOKENT("?", TOK_QMARK),
- TOKENT("@", TOK_AT),
- // A-Z :: Elsewhere
- TOKENT("[", TOK_SQUARE_OPEN),
- TOKENT("\\", TOK_BACKSLASH),
- TOKENT("]", TOK_SQUARE_CLOSE),
- TOKENT("^", TOK_CARET),
- TOKENT("`", TOK_BACKTICK),
-
- TOKENT("{", TOK_BRACE_OPEN),
- TOKENT("|", TOK_PIPE),
- TOKENT("|=", TOK_PIPE_EQUAL),
- TOKENT("||", TOK_DOUBLE_PIPE),
- TOKENT("}", TOK_BRACE_CLOSE),
- TOKENT("~", TOK_TILDE),
-};
-#define LEN(arr) (sizeof(arr)/sizeof(arr[0]))
-static const struct {
- unsigned char len;
- const char* chars;
- signed int type;
-} RWORDS[] = {
- TOKENT("abstract",TOK_RWORD_ABSTRACT),
- TOKENT("alignof", TOK_RWORD_ALIGNOF),
- TOKENT("as", TOK_RWORD_AS),
- TOKENT("be", TOK_RWORD_BE),
- TOKENT("box", TOK_RWORD_BOX),
- TOKENT("break", TOK_RWORD_BREAK),
- TOKENT("const", TOK_RWORD_CONST),
- TOKENT("continue",TOK_RWORD_CONTINUE),
- TOKENT("crate", TOK_RWORD_CRATE),
- TOKENT("do", TOK_RWORD_DO),
- TOKENT("else", TOK_RWORD_ELSE),
- TOKENT("enum", TOK_RWORD_ENUM),
- TOKENT("extern", TOK_RWORD_EXTERN),
- TOKENT("false", TOK_RWORD_FALSE),
- TOKENT("final", TOK_RWORD_FINAL),
- TOKENT("fn", TOK_RWORD_FN),
- TOKENT("for", TOK_RWORD_FOR),
- TOKENT("if", TOK_RWORD_IF),
- TOKENT("impl", TOK_RWORD_IMPL),
- TOKENT("in", TOK_RWORD_IN),
- TOKENT("let", TOK_RWORD_LET),
- TOKENT("loop", TOK_RWORD_LOOP),
- TOKENT("match", TOK_RWORD_MATCH),
- TOKENT("mod", TOK_RWORD_MOD),
- TOKENT("move", TOK_RWORD_MOVE),
- TOKENT("mut", TOK_RWORD_MUT),
- TOKENT("offsetof",TOK_RWORD_OFFSETOF),
- TOKENT("once", TOK_RWORD_ONCE),
- TOKENT("override",TOK_RWORD_OVERRIDE),
- TOKENT("priv", TOK_RWORD_PRIV),
- TOKENT("proc", TOK_RWORD_PROC),
- TOKENT("pub", TOK_RWORD_PUB),
- TOKENT("pure", TOK_RWORD_PURE),
- TOKENT("ref", TOK_RWORD_REF),
- TOKENT("return", TOK_RWORD_RETURN),
- TOKENT("self", TOK_RWORD_SELF),
- TOKENT("sizeof", TOK_RWORD_SIZEOF),
- TOKENT("static", TOK_RWORD_STATIC),
- TOKENT("struct", TOK_RWORD_STRUCT),
- TOKENT("super", TOK_RWORD_SUPER),
- TOKENT("trait", TOK_RWORD_TRAIT),
- TOKENT("true", TOK_RWORD_TRUE),
- TOKENT("type", TOK_RWORD_TYPE),
- TOKENT("typeof", TOK_RWORD_TYPEOF),
- TOKENT("unsafe", TOK_RWORD_UNSAFE),
- TOKENT("unsized", TOK_RWORD_UNSIZED),
- TOKENT("use", TOK_RWORD_USE),
- TOKENT("virtual", TOK_RWORD_VIRTUAL),
- TOKENT("where", TOK_RWORD_WHERE),
- TOKENT("while", TOK_RWORD_WHILE),
- TOKENT("yield", TOK_RWORD_YIELD),
-};
-
-signed int Lexer::getSymbol()
-{
- char ch = this->getc();
- // 1. lsearch for character
- // 2. Consume as many characters as currently match
- // 3. IF: a smaller character or, EOS is hit - Return current best
- unsigned ofs = 0;
- signed int best = 0;
- for(unsigned i = 0; i < LEN(TOKENMAP); i ++)
- {
- const char* const chars = TOKENMAP[i].chars;
- const size_t len = TOKENMAP[i].len;
-
- //::std::cout << "ofs=" << ofs << ", chars[ofs] = " << chars[ofs] << ", ch = " << ch << ", len = " << len << ::std::endl;
-
- if( ofs >= len || chars[ofs] > ch ) {
- this->putback();
- return best;
- }
-
- while( chars[ofs] && chars[ofs] == ch )
- {
- ch = this->getc();
- ofs ++;
- }
- if( chars[ofs] == 0 )
- {
- best = TOKENMAP[i].type;
- }
- }
-
- this->putback();
- return best;
-}
-
-bool issym(char ch)
-{
- if( ::std::isalnum(ch) )
- return true;
- if( ch == '_' )
- return true;
- if( ch == '$' )
- return true;
- return false;
-}
-
-Token Lexer::getToken()
-{
- try
- {
- char ch = this->getc();
-
- if( isspace(ch) )
- {
- while( isspace(this->getc()) )
- ;
- this->putback();
- return Token(TOK_WHITESPACE);
- }
- this->putback();
-
- const signed int sym = this->getSymbol();
- if( sym == 0 )
- {
- // No match at all, check for symbol
- char ch = this->getc();
- if( isdigit(ch) )
- {
- // TODO: handle integers/floats
- uint64_t val = 0;
- if( ch == '0' ) {
- // Octal/hex handling
- ch = this->getc();
- if( ch == 'x' ) {
- while( isxdigit(ch = this->getc()) ) {
- val *= val * 16;
- if(ch <= '9')
- val += ch - '0';
- else if( ch <= 'F' )
- val += ch - 'A' + 10;
- else if( ch <= 'f' )
- val += ch - 'a' + 10;
- }
- }
- else if( isdigit(ch) ) {
- throw ParseError::Todo("Lex octal numbers");
- }
- else {
- val = 0;
- }
- }
- else {
- while( isdigit(ch) ) {
- val *= val * 10;
- val += ch - '0';
- ch = this->getc();
- }
- }
-
- if(ch == 'u' || ch == 'i') {
- // Unsigned
- throw ParseError::Todo("Lex number suffixes");
- }
- else if( ch == '.' ) {
- throw ParseError::Todo("Lex floats");
- }
- else {
- this->putback();
- return Token(val, CORETYPE_ANY);
- }
- }
- else if( issym(ch) )
- {
- ::std::string str;
- while( issym(ch) )
- {
- str.push_back(ch);
- ch = this->getc();
- }
-
- if( ch == '!' )
- {
- return Token(TOK_MACRO, str);
- }
- else
- {
- this->putback();
- for( unsigned int i = 0; i < LEN(RWORDS); i ++ )
- {
- if( str < RWORDS[i].chars ) break;
- if( str == RWORDS[i].chars ) return Token((enum eTokenType)RWORDS[i].type);
- }
- return Token(TOK_IDENT, str);
- }
- }
- else
- {
- throw ParseError::BadChar(ch);
- }
- }
- else if( sym > 0 )
- {
- return Token((enum eTokenType)sym);
- }
- else
- {
- switch(sym)
- {
- case LINECOMMENT: {
- // Line comment
- ::std::string str;
- char ch = this->getc();
- while(ch != '\n' && ch != '\r')
- {
- str.push_back(ch);
- ch = this->getc();
- }
- return Token(TOK_COMMENT, str); }
- case BLOCKCOMMENT: {
- ::std::string str;
- while(true)
- {
- if( ch == '*' ) {
- ch = this->getc();
- if( ch == '/' ) break;
- this->putback();
- }
- str.push_back(ch);
- ch = this->getc();
- }
- return Token(TOK_COMMENT, str); }
- case SINGLEQUOTE: {
- char firstchar = this->getc();
- if( firstchar != '\\' ) {
- ch = this->getc();
- if( ch == '\'' ) {
- // Character constant
- return Token((uint64_t)ch, CORETYPE_CHAR);
- }
- else {
- // Lifetime name
- ::std::string str;
- str.push_back(firstchar);
- while( issym(ch) )
- {
- str.push_back(ch);
- ch = this->getc();
- }
- this->putback();
- return Token(TOK_LIFETIME, str);
- }
- }
- else {
- // Character constant with an escape code
- uint32_t val = this->parseEscape('\'');
- if(this->getc() != '\'') {
- throw ParseError::Todo("Proper error for lex failures");
- }
- return Token((uint64_t)val, CORETYPE_CHAR);
- }
- break; }
- case DOUBLEQUOTE:
- throw ParseError::Todo("Strings");
- break;
- default:
- assert(!"bugcheck");
- }
- }
- }
- catch(const Lexer::EndOfFile& e)
- {
- return Token(TOK_EOF);
- }
- //assert(!"bugcheck");
-}
-
-uint32_t Lexer::parseEscape(char enclosing)
-{
- char ch = this->getc();
- switch(ch)
- {
- case 'u': {
- // Unicode (up to six hex digits)
- uint32_t val = 0;
- ch = this->getc();
- if( !isxdigit(ch) )
- throw ParseError::Todo("Proper lex error for escape sequences");
- while( isxdigit(ch) )
- {
- char tmp[2] = {ch, 0};
- val *= 16;
- val += ::std::strtol(tmp, NULL, 16);
- ch = this->getc();
- }
- this->putback();
- return val; }
- case '\\':
- return '\\';
- default:
- throw ParseError::Todo("Proper lex error for escape sequences");
- }
-}
-
-char Lexer::getc()
-{
- if( m_last_char_valid )
- {
- m_last_char_valid = false;
- }
- else
+/*
+ * "MRustC" - Primitive rust compiler in C++
+ */
+/**
+ * \file parse/lex.cpp
+ * \brief Low-level lexer
+ */
+#include "lex.hpp"
+#include "tokentree.hpp"
+#include "parseerror.hpp"
+#include <cassert>
+#include <iostream>
+#include <cstdlib> // strtol
+#include <typeinfo>
+
+Lexer::Lexer(::std::string filename):
+ m_istream(filename.c_str()),
+ m_last_char_valid(false)
+{
+ if( !m_istream.is_open() )
+ {
+ throw ::std::runtime_error("Unable to open file");
+ }
+}
+
+#define LINECOMMENT -1
+#define BLOCKCOMMENT -2
+#define SINGLEQUOTE -3
+#define DOUBLEQUOTE -4
+
+// NOTE: This array must be kept reverse sorted
+#define TOKENT(str, sym) {sizeof(str)-1, str, sym}
+static const struct {
+ unsigned char len;
+ const char* chars;
+ signed int type;
+} TOKENMAP[] = {
+ TOKENT("!" , TOK_EXCLAM),
+ TOKENT("!=", TOK_EXCLAM_EQUAL),
+ TOKENT("\"", DOUBLEQUOTE),
+ TOKENT("#", 0),
+ TOKENT("#![",TOK_CATTR_OPEN),
+ TOKENT("#[", TOK_ATTR_OPEN),
+ //TOKENT("$", 0),
+ TOKENT("%" , TOK_PERCENT),
+ TOKENT("%=", TOK_PERCENT_EQUAL),
+ TOKENT("&" , TOK_AMP),
+ TOKENT("&&", TOK_DOUBLE_AMP),
+ TOKENT("&=", TOK_AMP_EQUAL),
+ TOKENT("'" , SINGLEQUOTE),
+ TOKENT("(" , TOK_PAREN_OPEN),
+ TOKENT(")" , TOK_PAREN_CLOSE),
+ TOKENT("*" , TOK_STAR),
+ TOKENT("*=", TOK_STAR_EQUAL),
+ TOKENT("+" , TOK_PLUS),
+ TOKENT("+=", TOK_PLUS_EQUAL),
+ TOKENT("," , TOK_COMMA),
+ TOKENT("-" , TOK_DASH),
+ TOKENT("-=", TOK_DASH_EQUAL),
+ TOKENT("->", TOK_THINARROW),
+ TOKENT(".", TOK_DOT),
+ TOKENT("..", TOK_DOUBLE_DOT),
+ TOKENT("...",TOK_TRIPLE_DOT),
+ TOKENT("/" , TOK_SLASH),
+ TOKENT("/*", BLOCKCOMMENT),
+ TOKENT("//", LINECOMMENT),
+ TOKENT("/=", TOK_SLASH_EQUAL),
+ // 0-9 :: Elsewhere
+ TOKENT(":", TOK_COLON),
+ TOKENT("::", TOK_DOUBLE_COLON),
+ TOKENT(";", TOK_SEMICOLON),
+ TOKENT("<", TOK_LT),
+ TOKENT("<<", TOK_DOUBLE_LT),
+ TOKENT("<=", TOK_LTE),
+ TOKENT("=" , TOK_EQUAL),
+ TOKENT("==", TOK_DOUBLE_EQUAL),
+ TOKENT("=>", TOK_FATARROW),
+ TOKENT(">", TOK_GT),
+ TOKENT(">>", TOK_DOUBLE_GT),
+ TOKENT(">=", TOK_GTE),
+ TOKENT("?", TOK_QMARK),
+ TOKENT("@", TOK_AT),
+ // A-Z :: Elsewhere
+ TOKENT("[", TOK_SQUARE_OPEN),
+ TOKENT("\\", TOK_BACKSLASH),
+ TOKENT("]", TOK_SQUARE_CLOSE),
+ TOKENT("^", TOK_CARET),
+ TOKENT("`", TOK_BACKTICK),
+
+ TOKENT("{", TOK_BRACE_OPEN),
+ TOKENT("|", TOK_PIPE),
+ TOKENT("|=", TOK_PIPE_EQUAL),
+ TOKENT("||", TOK_DOUBLE_PIPE),
+ TOKENT("}", TOK_BRACE_CLOSE),
+ TOKENT("~", TOK_TILDE),
+};
+#define LEN(arr) (sizeof(arr)/sizeof(arr[0]))
+static const struct {
+ unsigned char len;
+ const char* chars;
+ signed int type;
+} RWORDS[] = {
+ TOKENT("abstract",TOK_RWORD_ABSTRACT),
+ TOKENT("alignof", TOK_RWORD_ALIGNOF),
+ TOKENT("as", TOK_RWORD_AS),
+ TOKENT("be", TOK_RWORD_BE),
+ TOKENT("box", TOK_RWORD_BOX),
+ TOKENT("break", TOK_RWORD_BREAK),
+ TOKENT("const", TOK_RWORD_CONST),
+ TOKENT("continue",TOK_RWORD_CONTINUE),
+ TOKENT("crate", TOK_RWORD_CRATE),
+ TOKENT("do", TOK_RWORD_DO),
+ TOKENT("else", TOK_RWORD_ELSE),
+ TOKENT("enum", TOK_RWORD_ENUM),
+ TOKENT("extern", TOK_RWORD_EXTERN),
+ TOKENT("false", TOK_RWORD_FALSE),
+ TOKENT("final", TOK_RWORD_FINAL),
+ TOKENT("fn", TOK_RWORD_FN),
+ TOKENT("for", TOK_RWORD_FOR),
+ TOKENT("if", TOK_RWORD_IF),
+ TOKENT("impl", TOK_RWORD_IMPL),
+ TOKENT("in", TOK_RWORD_IN),
+ TOKENT("let", TOK_RWORD_LET),
+ TOKENT("loop", TOK_RWORD_LOOP),
+ TOKENT("match", TOK_RWORD_MATCH),
+ TOKENT("mod", TOK_RWORD_MOD),
+ TOKENT("move", TOK_RWORD_MOVE),
+ TOKENT("mut", TOK_RWORD_MUT),
+ TOKENT("offsetof",TOK_RWORD_OFFSETOF),
+ TOKENT("once", TOK_RWORD_ONCE),
+ TOKENT("override",TOK_RWORD_OVERRIDE),
+ TOKENT("priv", TOK_RWORD_PRIV),
+ TOKENT("proc", TOK_RWORD_PROC),
+ TOKENT("pub", TOK_RWORD_PUB),
+ TOKENT("pure", TOK_RWORD_PURE),
+ TOKENT("ref", TOK_RWORD_REF),
+ TOKENT("return", TOK_RWORD_RETURN),
+ TOKENT("self", TOK_RWORD_SELF),
+ TOKENT("sizeof", TOK_RWORD_SIZEOF),
+ TOKENT("static", TOK_RWORD_STATIC),
+ TOKENT("struct", TOK_RWORD_STRUCT),
+ TOKENT("super", TOK_RWORD_SUPER),
+ TOKENT("trait", TOK_RWORD_TRAIT),
+ TOKENT("true", TOK_RWORD_TRUE),
+ TOKENT("type", TOK_RWORD_TYPE),
+ TOKENT("typeof", TOK_RWORD_TYPEOF),
+ TOKENT("unsafe", TOK_RWORD_UNSAFE),
+ TOKENT("unsized", TOK_RWORD_UNSIZED),
+ TOKENT("use", TOK_RWORD_USE),
+ TOKENT("virtual", TOK_RWORD_VIRTUAL),
+ TOKENT("where", TOK_RWORD_WHERE),
+ TOKENT("while", TOK_RWORD_WHILE),
+ TOKENT("yield", TOK_RWORD_YIELD),
+};
+
+signed int Lexer::getSymbol()
+{
+ char ch = this->getc();
+ // 1. lsearch for character
+ // 2. Consume as many characters as currently match
+ // 3. IF: a smaller character or, EOS is hit - Return current best
+ unsigned ofs = 0;
+ signed int best = 0;
+ for(unsigned i = 0; i < LEN(TOKENMAP); i ++)
+ {
+ const char* const chars = TOKENMAP[i].chars;
+ const size_t len = TOKENMAP[i].len;
+
+ //::std::cout << "ofs=" << ofs << ", chars[ofs] = " << chars[ofs] << ", ch = " << ch << ", len = " << len << ::std::endl;
+
+ if( ofs >= len || chars[ofs] > ch ) {
+ this->putback();
+ return best;
+ }
+
+ while( chars[ofs] && chars[ofs] == ch )
+ {
+ ch = this->getc();
+ ofs ++;
+ }
+ if( chars[ofs] == 0 )
+ {
+ best = TOKENMAP[i].type;
+ }
+ }
+
+ this->putback();
+ return best;
+}
+
+bool issym(char ch)
+{
+ if( ::std::isalnum(ch) )
+ return true;
+ if( ch == '_' )
+ return true;
+ if( ch == '$' )
+ return true;
+ return false;
+}
+
+Token Lexer::getToken()
+{
+ try
+ {
+ char ch = this->getc();
+
+ if( isspace(ch) )
+ {
+ while( isspace(this->getc()) )
+ ;
+ this->putback();
+ return Token(TOK_WHITESPACE);
+ }
+ this->putback();
+
+ const signed int sym = this->getSymbol();
+ if( sym == 0 )
+ {
+ // No match at all, check for symbol
+ char ch = this->getc();
+ if( isdigit(ch) )
+ {
+ // TODO: handle integers/floats
+ uint64_t val = 0;
+ if( ch == '0' ) {
+ // Octal/hex handling
+ ch = this->getc();
+ if( ch == 'x' ) {
+ while( isxdigit(ch = this->getc()) )
+ {
+ val *= 16;
+ if(ch <= '9')
+ val += ch - '0';
+ else if( ch <= 'F' )
+ val += ch - 'A' + 10;
+ else if( ch <= 'f' )
+ val += ch - 'a' + 10;
+ }
+ }
+ else if( isdigit(ch) ) {
+ throw ParseError::Todo("Lex octal numbers");
+ }
+ else {
+ val = 0;
+ }
+ }
+ else {
+ while( isdigit(ch) ) {
+ val *= val * 10;
+ val += ch - '0';
+ ch = this->getc();
+ }
+ }
+
+ if(ch == 'u' || ch == 'i') {
+ // Unsigned
+ throw ParseError::Todo("Lex number suffixes");
+ }
+ else if( ch == '.' ) {
+ throw ParseError::Todo("Lex floats");
+ }
+ else {
+ this->putback();
+ return Token(val, CORETYPE_ANY);
+ }
+ }
+ else if( issym(ch) )
+ {
+ ::std::string str;
+ while( issym(ch) )
+ {
+ str.push_back(ch);
+ ch = this->getc();
+ }
+
+ if( ch == '!' )
+ {
+ return Token(TOK_MACRO, str);
+ }
+ else
+ {
+ this->putback();
+ for( unsigned int i = 0; i < LEN(RWORDS); i ++ )
+ {
+ if( str < RWORDS[i].chars ) break;
+ if( str == RWORDS[i].chars ) return Token((enum eTokenType)RWORDS[i].type);
+ }
+ return Token(TOK_IDENT, str);
+ }
+ }
+ else
+ {
+ throw ParseError::BadChar(ch);
+ }
+ }
+ else if( sym > 0 )
+ {
+ return Token((enum eTokenType)sym);
+ }
+ else
+ {
+ switch(sym)
+ {
+ case LINECOMMENT: {
+ // Line comment
+ ::std::string str;
+ char ch = this->getc();
+ while(ch != '\n' && ch != '\r')
+ {
+ str.push_back(ch);
+ ch = this->getc();
+ }
+ return Token(TOK_COMMENT, str); }
+ case BLOCKCOMMENT: {
+ ::std::string str;
+ while(true)
+ {
+ if( ch == '*' ) {
+ ch = this->getc();
+ if( ch == '/' ) break;
+ this->putback();
+ }
+ str.push_back(ch);
+ ch = this->getc();
+ }
+ return Token(TOK_COMMENT, str); }
+ case SINGLEQUOTE: {
+ char firstchar = this->getc();
+ if( firstchar != '\\' ) {
+ ch = this->getc();
+ if( ch == '\'' ) {
+ // Character constant
+ return Token((uint64_t)ch, CORETYPE_CHAR);
+ }
+ else {
+ // Lifetime name
+ ::std::string str;
+ str.push_back(firstchar);
+ while( issym(ch) )
+ {
+ str.push_back(ch);
+ ch = this->getc();
+ }
+ this->putback();
+ return Token(TOK_LIFETIME, str);
+ }
+ }
+ else {
+ // Character constant with an escape code
+ uint32_t val = this->parseEscape('\'');
+ if(this->getc() != '\'') {
+ throw ParseError::Todo("Proper error for lex failures");
+ }
+ return Token((uint64_t)val, CORETYPE_CHAR);
+ }
+ break; }
+ case DOUBLEQUOTE:
+ throw ParseError::Todo("Strings");
+ break;
+ default:
+ assert(!"bugcheck");
+ }
+ }
+ }
+ catch(const Lexer::EndOfFile& e)
+ {
+ return Token(TOK_EOF);
+ }
+ //assert(!"bugcheck");
+}
+
+uint32_t Lexer::parseEscape(char enclosing)
+{
+ char ch = this->getc();
+ switch(ch)
+ {
+ case 'u': {
+ // Unicode (up to six hex digits)
+ uint32_t val = 0;
+ ch = this->getc();
+ if( !isxdigit(ch) )
+ throw ParseError::Todo("Proper lex error for escape sequences");
+ while( isxdigit(ch) )
+ {
+ char tmp[2] = {ch, 0};
+ val *= 16;
+ val += ::std::strtol(tmp, NULL, 16);
+ ch = this->getc();
+ }
+ this->putback();
+ return val; }
+ case '\\':
+ return '\\';
+ default:
+ throw ParseError::Todo("Proper lex error for escape sequences");
+ }
+}
+
+char Lexer::getc()
+{
+ if( m_last_char_valid )
+ {
+ m_last_char_valid = false;
+ }
+ else
{
m_last_char = m_istream.get();
if( m_istream.eof() )
- throw Lexer::EndOfFile();
- }
- //::std::cout << "getc(): '" << m_last_char << "'" << ::std::endl;
- return m_last_char;
-}
-
-void Lexer::putback()
-{
-// ::std::cout << "putback(): " << m_last_char_valid << " '" << m_last_char << "'" << ::std::endl;
- assert(!m_last_char_valid);
- m_last_char_valid = true;
-}
-
-Token::Token():
- m_type(TOK_NULL),
- m_str("")
-{
-}
-Token::Token(enum eTokenType type):
- m_type(type),
- m_str("")
-{
-}
-Token::Token(enum eTokenType type, ::std::string str):
- m_type(type),
- m_str(str)
-{
-}
-Token::Token(uint64_t val, enum eCoreType datatype):
- m_type(TOK_INTEGER),
- m_datatype(datatype),
- m_intval(val)
-{
-}
-
-const char* Token::typestr(enum eTokenType type)
-{
- switch(type)
- {
- case TOK_NULL: return "TOK_NULL";
- case TOK_EOF: return "TOK_EOF";
-
- case TOK_WHITESPACE: return "TOK_WHITESPACE";
- case TOK_COMMENT: return "TOK_COMMENT";
-
- // Value tokens
- case TOK_IDENT: return "TOK_IDENT";
- case TOK_MACRO: return "TOK_MACRO";
- case TOK_LIFETIME: return "TOK_LIFETIME";
- case TOK_INTEGER: return "TOK_INTEGER";
- case TOK_CHAR: return "TOK_CHAR";
- case TOK_FLOAT: return "TOK_FLOAT";
- case TOK_STRING: return "TOK_STRING";
-
- case TOK_CATTR_OPEN: return "TOK_CATTR_OPEN";
- case TOK_ATTR_OPEN: return "TOK_ATTR_OPEN";
-
- // Symbols
- case TOK_PAREN_OPEN: return "TOK_PAREN_OPEN"; case TOK_PAREN_CLOSE: return "TOK_PAREN_CLOSE";
- case TOK_BRACE_OPEN: return "TOK_BRACE_OPEN"; case TOK_BRACE_CLOSE: return "TOK_BRACE_CLOSE";
- case TOK_LT: return "TOK_LT"; case TOK_GT: return "TOK_GT";
- case TOK_SQUARE_OPEN: return "TOK_SQUARE_OPEN";case TOK_SQUARE_CLOSE: return "TOK_SQUARE_CLOSE";
- case TOK_COMMA: return "TOK_COMMA";
- case TOK_SEMICOLON: return "TOK_SEMICOLON";
- case TOK_COLON: return "TOK_COLON";
- case TOK_DOUBLE_COLON: return "TOK_DOUBLE_COLON";
- case TOK_STAR: return "TOK_STAR"; case TOK_AMP: return "TOK_AMP";
- case TOK_PIPE: return "TOK_PIPE";
-
- case TOK_FATARROW: return "TOK_FATARROW"; // =>
- case TOK_THINARROW: return "TOK_THINARROW"; // ->
-
- case TOK_PLUS: return "TOK_PLUS"; case TOK_DASH: return "TOK_DASH";
- case TOK_EXCLAM: return "TOK_EXCLAM";
- case TOK_PERCENT: return "TOK_PERCENT";
- case TOK_SLASH: return "TOK_SLASH";
-
- case TOK_DOT: return "TOK_DOT";
- case TOK_DOUBLE_DOT: return "TOK_DOUBLE_DOT";
- case TOK_TRIPLE_DOT: return "TOK_TRIPLE_DOT";
-
- case TOK_EQUAL: return "TOK_EQUAL";
- case TOK_PLUS_EQUAL: return "TOK_PLUS_EQUAL";
- case TOK_DASH_EQUAL: return "TOK_DASH_EQUAL";
- case TOK_PERCENT_EQUAL: return "TOK_PERCENT_EQUAL";
- case TOK_SLASH_EQUAL: return "TOK_SLASH_EQUAL";
- case TOK_STAR_EQUAL: return "TOK_STAR_EQUAL";
- case TOK_AMP_EQUAL: return "TOK_AMP_EQUAL";
- case TOK_PIPE_EQUAL: return "TOK_PIPE_EQUAL";
-
- case TOK_DOUBLE_EQUAL: return "TOK_DOUBLE_EQUAL";
- case TOK_EXCLAM_EQUAL: return "TOK_EXCLAM_EQUAL";
- case TOK_GTE: return "TOK_GTE";
- case TOK_LTE: return "TOK_LTE";
-
- case TOK_DOUBLE_AMP: return "TOK_DOUBLE_AMP";
- case TOK_DOUBLE_PIPE: return "TOK_DOUBLE_PIPE";
- case TOK_DOUBLE_LT: return "TOK_DOUBLE_LT";
- case TOK_DOUBLE_GT: return "TOK_DOUBLE_GT";
-
- case TOK_QMARK: return "TOK_QMARK";
- case TOK_AT: return "TOK_AT";
- case TOK_TILDE: return "TOK_TILDE";
- case TOK_BACKSLASH: return "TOK_BACKSLASH";
- case TOK_CARET: return "TOK_CARET";
- case TOK_BACKTICK: return "TOK_BACKTICK";
-
- // Reserved Words
- case TOK_RWORD_PUB: return "TOK_RWORD_PUB";
- case TOK_RWORD_PRIV: return "TOK_RWORD_PRIV";
- case TOK_RWORD_MUT: return "TOK_RWORD_MUT";
- case TOK_RWORD_CONST: return "TOK_RWORD_CONST";
- case TOK_RWORD_STATIC: return "TOK_RWORD_STATIC";
- case TOK_RWORD_UNSAFE: return "TOK_RWORD_UNSAFE";
- case TOK_RWORD_EXTERN: return "TOK_RWORD_EXTERN";
-
- case TOK_RWORD_CRATE: return "TOK_RWORD_CRATE";
- case TOK_RWORD_MOD: return "TOK_RWORD_MOD";
- case TOK_RWORD_STRUCT: return "TOK_RWORD_STRUCT";
- case TOK_RWORD_ENUM: return "TOK_RWORD_ENUM";
- case TOK_RWORD_TRAIT: return "TOK_RWORD_TRAIT";
- case TOK_RWORD_FN: return "TOK_RWORD_FN";
- case TOK_RWORD_USE: return "TOK_RWORD_USE";
- case TOK_RWORD_IMPL: return "TOK_RWORD_IMPL";
- case TOK_RWORD_TYPE: return "TOK_RWORD_TYPE";
-
- case TOK_RWORD_WHERE: return "TOK_RWORD_WHERE";
- case TOK_RWORD_AS: return "TOK_RWORD_AS";
-
- case TOK_RWORD_LET: return "TOK_RWORD_LET";
- case TOK_RWORD_MATCH: return "TOK_RWORD_MATCH";
- case TOK_RWORD_IF: return "TOK_RWORD_IF";
- case TOK_RWORD_ELSE: return "TOK_RWORD_ELSE";
- case TOK_RWORD_LOOP: return "TOK_RWORD_LOOP";
- case TOK_RWORD_WHILE: return "TOK_RWORD_WHILE";
- case TOK_RWORD_FOR: return "TOK_RWORD_FOR";
- case TOK_RWORD_IN: return "TOK_RWORD_IN";
- case TOK_RWORD_DO: return "TOK_RWORD_DO";
-
- case TOK_RWORD_CONTINUE: return "TOK_RWORD_CONTINUE";
- case TOK_RWORD_BREAK: return "TOK_RWORD_BREAK";
- case TOK_RWORD_RETURN: return "TOK_RWORD_RETURN";
- case TOK_RWORD_YIELD: return "TOK_RWORD_YIELD";
- case TOK_RWORD_BOX: return "TOK_RWORD_BOX";
- case TOK_RWORD_REF: return "TOK_RWORD_REF";
-
- case TOK_RWORD_FALSE: return "TOK_RWORD_FALSE";
- case TOK_RWORD_TRUE: return "TOK_RWORD_TRUE";
- case TOK_RWORD_SELF: return "TOK_RWORD_SELF";
- case TOK_RWORD_SUPER: return "TOK_RWORD_SUPER";
-
- case TOK_RWORD_PROC: return "TOK_RWORD_PROC";
- case TOK_RWORD_MOVE: return "TOK_RWORD_MOVE";
- case TOK_RWORD_ONCE: return "TOK_RWORD_ONCE";
-
- case TOK_RWORD_ABSTRACT: return "TOK_RWORD_ABSTRACT";
- case TOK_RWORD_FINAL: return "TOK_RWORD_FINAL";
- case TOK_RWORD_PURE: return "TOK_RWORD_PURE";
- case TOK_RWORD_OVERRIDE: return "TOK_RWORD_OVERRIDE";
- case TOK_RWORD_VIRTUAL: return "TOK_RWORD_VIRTUAL";
-
- case TOK_RWORD_ALIGNOF: return "TOK_RWORD_ALIGNOF";
- case TOK_RWORD_OFFSETOF: return "TOK_RWORD_OFFSETOF";
- case TOK_RWORD_SIZEOF: return "TOK_RWORD_SIZEOF";
- case TOK_RWORD_TYPEOF: return "TOK_RWORD_TYPEOF";
-
- case TOK_RWORD_BE: return "TOK_RWORD_BE";
- case TOK_RWORD_UNSIZED: return "TOK_RWORD_UNSIZED";
- }
- return ">>BUGCHECK: BADTOK<<";
-}
-
-::std::ostream& operator<<(::std::ostream& os, Token& tok)
-{
- os << Token::typestr(tok.type()) << "\"" << tok.str() << "\"";
- return os;
-}
-
-TTStream::TTStream(const TokenTree& input_tt):
- m_input_tt(input_tt)
-{
- m_stack.push_back( ::std::make_pair(0, &input_tt) );
-}
-TTStream::~TTStream()
-{
-}
-Token TTStream::realGetToken()
-{
- while(m_stack.size() > 0)
- {
- // If current index is above TT size, go up
- unsigned int& idx = m_stack.back().first;
- const TokenTree& tree = *m_stack.back().second;
-
- if(idx == 0 && tree.size() == 0) {
- idx ++;
- return tree.tok();
- }
-
- if(idx < tree.size())
- {
- const TokenTree& subtree = tree[idx];
- idx ++;
- if( subtree.size() == 0 ) {
- return subtree.tok();
- }
- else {
- m_stack.push_back( ::std::make_pair(0, &subtree ) );
- }
- }
- else {
- m_stack.pop_back();
- }
- }
- return Token(TOK_EOF);
-}
-
-TokenStream::TokenStream():
- m_cache_valid(false)
-{
-}
-TokenStream::~TokenStream()
-{
-}
-
-Token TokenStream::getToken()
-{
- if( m_cache_valid )
- {
- m_cache_valid = false;
- return m_cache;
- }
- else
- {
- Token ret = this->realGetToken();
- ::std::cout << "getToken[" << typeid(*this).name() << "] - " << ret << ::std::endl;
- return ret;
- }
-}
-void TokenStream::putback(Token tok)
-{
- m_cache_valid = true;
- m_cache = tok;
-}
+ throw Lexer::EndOfFile();
+ }
+ //::std::cout << "getc(): '" << m_last_char << "'" << ::std::endl;
+ return m_last_char;
+}
+
+void Lexer::putback()
+{
+// ::std::cout << "putback(): " << m_last_char_valid << " '" << m_last_char << "'" << ::std::endl;
+ assert(!m_last_char_valid);
+ m_last_char_valid = true;
+}
+
+Token::Token():
+ m_type(TOK_NULL),
+ m_str("")
+{
+}
+Token::Token(enum eTokenType type):
+ m_type(type),
+ m_str("")
+{
+}
+Token::Token(enum eTokenType type, ::std::string str):
+ m_type(type),
+ m_str(str)
+{
+}
+Token::Token(uint64_t val, enum eCoreType datatype):
+ m_type(TOK_INTEGER),
+ m_datatype(datatype),
+ m_intval(val)
+{
+}
+
+const char* Token::typestr(enum eTokenType type)
+{
+ switch(type)
+ {
+ case TOK_NULL: return "TOK_NULL";
+ case TOK_EOF: return "TOK_EOF";
+
+ case TOK_WHITESPACE: return "TOK_WHITESPACE";
+ case TOK_COMMENT: return "TOK_COMMENT";
+
+ // Value tokens
+ case TOK_IDENT: return "TOK_IDENT";
+ case TOK_MACRO: return "TOK_MACRO";
+ case TOK_LIFETIME: return "TOK_LIFETIME";
+ case TOK_INTEGER: return "TOK_INTEGER";
+ case TOK_CHAR: return "TOK_CHAR";
+ case TOK_FLOAT: return "TOK_FLOAT";
+ case TOK_STRING: return "TOK_STRING";
+
+ case TOK_CATTR_OPEN: return "TOK_CATTR_OPEN";
+ case TOK_ATTR_OPEN: return "TOK_ATTR_OPEN";
+
+ // Symbols
+ case TOK_PAREN_OPEN: return "TOK_PAREN_OPEN"; case TOK_PAREN_CLOSE: return "TOK_PAREN_CLOSE";
+ case TOK_BRACE_OPEN: return "TOK_BRACE_OPEN"; case TOK_BRACE_CLOSE: return "TOK_BRACE_CLOSE";
+ case TOK_LT: return "TOK_LT"; case TOK_GT: return "TOK_GT";
+ case TOK_SQUARE_OPEN: return "TOK_SQUARE_OPEN";case TOK_SQUARE_CLOSE: return "TOK_SQUARE_CLOSE";
+ case TOK_COMMA: return "TOK_COMMA";
+ case TOK_SEMICOLON: return "TOK_SEMICOLON";
+ case TOK_COLON: return "TOK_COLON";
+ case TOK_DOUBLE_COLON: return "TOK_DOUBLE_COLON";
+ case TOK_STAR: return "TOK_STAR"; case TOK_AMP: return "TOK_AMP";
+ case TOK_PIPE: return "TOK_PIPE";
+
+ case TOK_FATARROW: return "TOK_FATARROW"; // =>
+ case TOK_THINARROW: return "TOK_THINARROW"; // ->
+
+ case TOK_PLUS: return "TOK_PLUS"; case TOK_DASH: return "TOK_DASH";
+ case TOK_EXCLAM: return "TOK_EXCLAM";
+ case TOK_PERCENT: return "TOK_PERCENT";
+ case TOK_SLASH: return "TOK_SLASH";
+
+ case TOK_DOT: return "TOK_DOT";
+ case TOK_DOUBLE_DOT: return "TOK_DOUBLE_DOT";
+ case TOK_TRIPLE_DOT: return "TOK_TRIPLE_DOT";
+
+ case TOK_EQUAL: return "TOK_EQUAL";
+ case TOK_PLUS_EQUAL: return "TOK_PLUS_EQUAL";
+ case TOK_DASH_EQUAL: return "TOK_DASH_EQUAL";
+ case TOK_PERCENT_EQUAL: return "TOK_PERCENT_EQUAL";
+ case TOK_SLASH_EQUAL: return "TOK_SLASH_EQUAL";
+ case TOK_STAR_EQUAL: return "TOK_STAR_EQUAL";
+ case TOK_AMP_EQUAL: return "TOK_AMP_EQUAL";
+ case TOK_PIPE_EQUAL: return "TOK_PIPE_EQUAL";
+
+ case TOK_DOUBLE_EQUAL: return "TOK_DOUBLE_EQUAL";
+ case TOK_EXCLAM_EQUAL: return "TOK_EXCLAM_EQUAL";
+ case TOK_GTE: return "TOK_GTE";
+ case TOK_LTE: return "TOK_LTE";
+
+ case TOK_DOUBLE_AMP: return "TOK_DOUBLE_AMP";
+ case TOK_DOUBLE_PIPE: return "TOK_DOUBLE_PIPE";
+ case TOK_DOUBLE_LT: return "TOK_DOUBLE_LT";
+ case TOK_DOUBLE_GT: return "TOK_DOUBLE_GT";
+
+ case TOK_QMARK: return "TOK_QMARK";
+ case TOK_AT: return "TOK_AT";
+ case TOK_TILDE: return "TOK_TILDE";
+ case TOK_BACKSLASH: return "TOK_BACKSLASH";
+ case TOK_CARET: return "TOK_CARET";
+ case TOK_BACKTICK: return "TOK_BACKTICK";
+
+ // Reserved Words
+ case TOK_RWORD_PUB: return "TOK_RWORD_PUB";
+ case TOK_RWORD_PRIV: return "TOK_RWORD_PRIV";
+ case TOK_RWORD_MUT: return "TOK_RWORD_MUT";
+ case TOK_RWORD_CONST: return "TOK_RWORD_CONST";
+ case TOK_RWORD_STATIC: return "TOK_RWORD_STATIC";
+ case TOK_RWORD_UNSAFE: return "TOK_RWORD_UNSAFE";
+ case TOK_RWORD_EXTERN: return "TOK_RWORD_EXTERN";
+
+ case TOK_RWORD_CRATE: return "TOK_RWORD_CRATE";
+ case TOK_RWORD_MOD: return "TOK_RWORD_MOD";
+ case TOK_RWORD_STRUCT: return "TOK_RWORD_STRUCT";
+ case TOK_RWORD_ENUM: return "TOK_RWORD_ENUM";
+ case TOK_RWORD_TRAIT: return "TOK_RWORD_TRAIT";
+ case TOK_RWORD_FN: return "TOK_RWORD_FN";
+ case TOK_RWORD_USE: return "TOK_RWORD_USE";
+ case TOK_RWORD_IMPL: return "TOK_RWORD_IMPL";
+ case TOK_RWORD_TYPE: return "TOK_RWORD_TYPE";
+
+ case TOK_RWORD_WHERE: return "TOK_RWORD_WHERE";
+ case TOK_RWORD_AS: return "TOK_RWORD_AS";
+
+ case TOK_RWORD_LET: return "TOK_RWORD_LET";
+ case TOK_RWORD_MATCH: return "TOK_RWORD_MATCH";
+ case TOK_RWORD_IF: return "TOK_RWORD_IF";
+ case TOK_RWORD_ELSE: return "TOK_RWORD_ELSE";
+ case TOK_RWORD_LOOP: return "TOK_RWORD_LOOP";
+ case TOK_RWORD_WHILE: return "TOK_RWORD_WHILE";
+ case TOK_RWORD_FOR: return "TOK_RWORD_FOR";
+ case TOK_RWORD_IN: return "TOK_RWORD_IN";
+ case TOK_RWORD_DO: return "TOK_RWORD_DO";
+
+ case TOK_RWORD_CONTINUE: return "TOK_RWORD_CONTINUE";
+ case TOK_RWORD_BREAK: return "TOK_RWORD_BREAK";
+ case TOK_RWORD_RETURN: return "TOK_RWORD_RETURN";
+ case TOK_RWORD_YIELD: return "TOK_RWORD_YIELD";
+ case TOK_RWORD_BOX: return "TOK_RWORD_BOX";
+ case TOK_RWORD_REF: return "TOK_RWORD_REF";
+
+ case TOK_RWORD_FALSE: return "TOK_RWORD_FALSE";
+ case TOK_RWORD_TRUE: return "TOK_RWORD_TRUE";
+ case TOK_RWORD_SELF: return "TOK_RWORD_SELF";
+ case TOK_RWORD_SUPER: return "TOK_RWORD_SUPER";
+
+ case TOK_RWORD_PROC: return "TOK_RWORD_PROC";
+ case TOK_RWORD_MOVE: return "TOK_RWORD_MOVE";
+ case TOK_RWORD_ONCE: return "TOK_RWORD_ONCE";
+
+ case TOK_RWORD_ABSTRACT: return "TOK_RWORD_ABSTRACT";
+ case TOK_RWORD_FINAL: return "TOK_RWORD_FINAL";
+ case TOK_RWORD_PURE: return "TOK_RWORD_PURE";
+ case TOK_RWORD_OVERRIDE: return "TOK_RWORD_OVERRIDE";
+ case TOK_RWORD_VIRTUAL: return "TOK_RWORD_VIRTUAL";
+
+ case TOK_RWORD_ALIGNOF: return "TOK_RWORD_ALIGNOF";
+ case TOK_RWORD_OFFSETOF: return "TOK_RWORD_OFFSETOF";
+ case TOK_RWORD_SIZEOF: return "TOK_RWORD_SIZEOF";
+ case TOK_RWORD_TYPEOF: return "TOK_RWORD_TYPEOF";
+
+ case TOK_RWORD_BE: return "TOK_RWORD_BE";
+ case TOK_RWORD_UNSIZED: return "TOK_RWORD_UNSIZED";
+ }
+ return ">>BUGCHECK: BADTOK<<";
+}
+
+::std::ostream& operator<<(::std::ostream& os, Token& tok)
+{
+ os << Token::typestr(tok.type()) << "\"" << tok.str() << "\"";
+ return os;
+}
+
+TTStream::TTStream(const TokenTree& input_tt):
+ m_input_tt(input_tt)
+{
+ m_stack.push_back( ::std::make_pair(0, &input_tt) );
+}
+TTStream::~TTStream()
+{
+}
+Token TTStream::realGetToken()
+{
+ while(m_stack.size() > 0)
+ {
+ // If current index is above TT size, go up
+ unsigned int& idx = m_stack.back().first;
+ const TokenTree& tree = *m_stack.back().second;
+
+ if(idx == 0 && tree.size() == 0) {
+ idx ++;
+ return tree.tok();
+ }
+
+ if(idx < tree.size())
+ {
+ const TokenTree& subtree = tree[idx];
+ idx ++;
+ if( subtree.size() == 0 ) {
+ return subtree.tok();
+ }
+ else {
+ m_stack.push_back( ::std::make_pair(0, &subtree ) );
+ }
+ }
+ else {
+ m_stack.pop_back();
+ }
+ }
+ return Token(TOK_EOF);
+}
+
+TokenStream::TokenStream():
+ m_cache_valid(false)
+{
+}
+TokenStream::~TokenStream()
+{
+}
+
+Token TokenStream::getToken()
+{
+ if( m_cache_valid )
+ {
+ m_cache_valid = false;
+ return m_cache;
+ }
+ else
+ {
+ Token ret = this->realGetToken();
+ ::std::cout << "getToken[" << typeid(*this).name() << "] - " << ret << ::std::endl;
+ return ret;
+ }
+}
+void TokenStream::putback(Token tok)
+{
+ m_cache_valid = true;
+ m_cache = tok;
+}