From 04dd6b05f945c944c13431baa509ec628ac26f41 Mon Sep 17 00:00:00 2001 From: "John Hodge (sonata)" Date: Tue, 23 Dec 2014 12:06:25 +0800 Subject: Move source files to src/ folder --- src/ast/ast.cpp | 138 ++++++++++ src/ast/ast.hpp | 193 ++++++++++++++ src/ast/path.hpp | 49 ++++ src/common.hpp | 9 + src/convert/flatten.cpp | 8 + src/convert/render.cpp | 52 ++++ src/convert/resolve.cpp | 47 ++++ src/coretypes.hpp | 18 ++ src/macros.cpp | 143 +++++++++++ src/macros.hpp | 94 +++++++ src/main.cpp | 34 +++ src/parse/common.hpp | 50 ++++ src/parse/expr.cpp | 631 +++++++++++++++++++++++++++++++++++++++++++++ src/parse/lex.cpp | 650 +++++++++++++++++++++++++++++++++++++++++++++++ src/parse/lex.hpp | 239 +++++++++++++++++ src/parse/parseerror.cpp | 51 ++++ src/parse/parseerror.hpp | 67 +++++ src/parse/preproc.cpp | 36 +++ src/parse/preproc.hpp | 20 ++ src/parse/root.cpp | 609 ++++++++++++++++++++++++++++++++++++++++++++ src/parse/tokentree.hpp | 11 + src/types.cpp | 8 + src/types.hpp | 37 +++ 23 files changed, 3194 insertions(+) create mode 100644 src/ast/ast.cpp create mode 100644 src/ast/ast.hpp create mode 100644 src/ast/path.hpp create mode 100644 src/common.hpp create mode 100644 src/convert/flatten.cpp create mode 100644 src/convert/render.cpp create mode 100644 src/convert/resolve.cpp create mode 100644 src/coretypes.hpp create mode 100644 src/macros.cpp create mode 100644 src/macros.hpp create mode 100644 src/main.cpp create mode 100644 src/parse/common.hpp create mode 100644 src/parse/expr.cpp create mode 100644 src/parse/lex.cpp create mode 100644 src/parse/lex.hpp create mode 100644 src/parse/parseerror.cpp create mode 100644 src/parse/parseerror.hpp create mode 100644 src/parse/preproc.cpp create mode 100644 src/parse/preproc.hpp create mode 100644 src/parse/root.cpp create mode 100644 src/parse/tokentree.hpp create mode 100644 src/types.cpp create mode 100644 src/types.hpp (limited to 'src') diff --git a/src/ast/ast.cpp b/src/ast/ast.cpp new file mode 100644 index 00000000..a39bc0d4 --- /dev/null +++ b/src/ast/ast.cpp @@ -0,0 +1,138 @@ +/* + */ +#include "ast.hpp" +#include "../types.hpp" +#include +#include "../parse/parseerror.hpp" + +namespace AST { + +Path::Path() +{ +} +Path::Path(Path::TagAbsolute) +{ +} + + +PathNode::PathNode(::std::string name, ::std::vector args): + m_name(name), + m_params(args) +{ +} +const ::std::string& PathNode::name() const +{ + return m_name; +} +const ::std::vector& PathNode::args() const +{ + return m_params; +} + +Pattern::Pattern(TagMaybeBind, ::std::string name) +{ +} +Pattern::Pattern(TagValue, ExprNode node) +{ +} +Pattern::Pattern(TagEnumVariant, Path path, ::std::vector sub_patterns) +{ +} + + +Function::Function(::std::string name, TypeParams params, Class fcn_class, TypeRef ret_type, ::std::vector args, Expr code) +{ +} + +Impl::Impl(TypeRef impl_type, TypeRef trait_type) +{ +} +void Impl::add_function(bool is_public, Function fcn) +{ +} + +void Crate::iterate_functions(Crate::fcn_visitor_t* visitor) +{ +} + +void Module::add_constant(bool is_public, ::std::string name, TypeRef type, Expr val) +{ + ::std::cout << "add_constant()" << ::std::endl; +} +void Module::add_global(bool is_public, bool is_mut, ::std::string name, TypeRef type, Expr val) +{ + ::std::cout << "add_global()" << ::std::endl; +} +void Module::add_struct(bool is_public, ::std::string name, TypeParams params, ::std::vector items) +{ +} +void Module::add_function(bool is_public, Function func) +{ +} +void Module::add_impl(Impl impl) +{ +} + +void Expr::visit_nodes(const NodeVisitor& v) +{ + throw ParseError::Todo("Expr::visit_nodes"); +} + +ExprNode::ExprNode() +{ + +} +ExprNode::ExprNode(TagBlock, ::std::vector nodes) +{ +} +ExprNode::ExprNode(TagLetBinding, Pattern pat, ExprNode value) +{ +} +ExprNode::ExprNode(TagReturn, ExprNode val) +{ +} +ExprNode::ExprNode(TagCast, ExprNode value, TypeRef dst_type) +{ +} +ExprNode::ExprNode(TagInteger, uint64_t value, enum eCoreType datatype) +{ +} +ExprNode::ExprNode(TagStructLiteral, Path path, ExprNode base_value, ::std::vector< ::std::pair< ::std::string,ExprNode> > values ) +{ +} +ExprNode::ExprNode(TagCallPath, Path path, ::std::vector args) +{ +} +ExprNode::ExprNode(TagCallObject, ExprNode val, ::std::vector args) +{ +} +ExprNode::ExprNode(TagMatch, ExprNode val, ::std::vector< ::std::pair > arms) +{ +} +ExprNode::ExprNode(TagIf, ExprNode cond, ExprNode true_code, ExprNode false_code) +{ +} +ExprNode::ExprNode(TagNamedValue, Path path) +{ +} +ExprNode::ExprNode(TagField, ::std::string name) +{ +} +ExprNode::ExprNode(TagBinOp, BinOpType type, ExprNode left, ExprNode right) +{ +} + +TypeParam::TypeParam(bool is_lifetime, ::std::string name) +{ + +} +void TypeParam::addLifetimeBound(::std::string name) +{ + +} +void TypeParam::addTypeBound(TypeRef type) +{ + +} + +} diff --git a/src/ast/ast.hpp b/src/ast/ast.hpp new file mode 100644 index 00000000..9e4728b3 --- /dev/null +++ b/src/ast/ast.hpp @@ -0,0 +1,193 @@ +#ifndef AST_HPP_INCLUDED +#define AST_HPP_INCLUDED + +#include +#include +#include +#include "../coretypes.hpp" +#include + +#include "../types.hpp" + +namespace AST { + +class MetaItem +{ + ::std::string m_name; + ::std::vector m_items; + ::std::string m_str_val; +public: + MetaItem(::std::string name): + m_name(name) + { + } + MetaItem(::std::string name, ::std::vector items): + m_name(name), + m_items(items) + { + } +}; + +class ExprNode; + +class Pattern +{ +public: + Pattern(); + + struct TagMaybeBind {}; + Pattern(TagMaybeBind, ::std::string name); + + struct TagValue {}; + Pattern(TagValue, ExprNode node); + + struct TagEnumVariant {}; + Pattern(TagEnumVariant, Path path, ::std::vector sub_patterns); +}; + +class ExprNode +{ +public: + ExprNode(); + + struct TagBlock {}; + ExprNode(TagBlock, ::std::vector nodes); + + struct TagLetBinding {}; + ExprNode(TagLetBinding, Pattern pat, ExprNode value); + + struct TagReturn {}; + ExprNode(TagReturn, ExprNode val); + + struct TagAssign {}; + ExprNode(TagAssign, ExprNode slot, ExprNode value) {} + + struct TagCast {}; + ExprNode(TagCast, ExprNode value, TypeRef dst_type); + + struct TagInteger {}; + ExprNode(TagInteger, uint64_t value, enum eCoreType datatype); + + struct TagStructLiteral {}; + ExprNode(TagStructLiteral, Path path, ExprNode base_value, ::std::vector< ::std::pair< ::std::string,ExprNode> > values ); + + struct TagCallPath {}; + ExprNode(TagCallPath, Path path, ::std::vector args); + + struct TagCallObject {}; + ExprNode(TagCallObject, ExprNode val, ::std::vector args); + + struct TagMatch {}; + ExprNode(TagMatch, ExprNode val, ::std::vector< ::std::pair > arms); + + struct TagIf {}; + ExprNode(TagIf, ExprNode cond, ExprNode true_code, ExprNode false_code); + + struct TagNamedValue {}; + ExprNode(TagNamedValue, Path path); + + struct TagField {}; + ExprNode(TagField, ::std::string name); + + enum BinOpType { + BINOP_CMPEQU, + BINOP_CMPNEQU, + + BINOP_BITAND, + BINOP_BITOR, + BINOP_BITXOR, + + BINOP_SHL, + BINOP_SHR, + }; + struct TagBinOp {}; + ExprNode(TagBinOp, BinOpType type, ExprNode left, ExprNode right); +}; + +class NodeVisitor +{ +public: + virtual void visit(ExprNode::TagBlock, ExprNode& node) {} + virtual void visit(ExprNode::TagNamedValue, ExprNode& node) {} +}; + +class Expr +{ +public: + Expr() {} + Expr(ExprNode node) {} + + void visit_nodes(const NodeVisitor& v); +}; + +class Function +{ +public: + enum Class + { + CLASS_UNBOUND, + CLASS_REFMETHOD, + CLASS_MUTMETHOD, + CLASS_VALMETHOD, + }; + typedef ::std::vector Arglist; + +private: + Expr m_code; + TypeRef m_rettype; + Arglist m_args; +public: + + Function(::std::string name, TypeParams params, Class fcn_class, TypeRef ret_type, Arglist args, Expr code); + + Expr& code() { return m_code; } + const Expr code() const { return m_code; } + + TypeRef& rettype() { return m_rettype; } + + Arglist& args() { return m_args; } +}; + +class Impl +{ +public: + Impl(TypeRef impl_type, TypeRef trait_type); + + void add_function(bool is_public, Function fcn); +}; + +class Module +{ + ::std::vector m_functions; +public: + void add_alias(bool is_public, Path path) {} + void add_constant(bool is_public, ::std::string name, TypeRef type, Expr val); + void add_global(bool is_public, bool is_mut, ::std::string name, TypeRef type, Expr val); + void add_struct(bool is_public, ::std::string name, TypeParams params, ::std::vector items); + void add_function(bool is_public, Function func); + void add_impl(Impl impl); +}; + +class Crate +{ + Module m_root_module; +public: + Crate(Module root_module): + m_root_module(root_module) + { + } + + typedef void fcn_visitor_t(const AST::Crate& crate, Function& fcn); + + void iterate_functions( fcn_visitor_t* visitor ); +}; + +class Flat +{ + ::std::vector m_functions; +public: +}; + +} + +#endif // AST_HPP_INCLUDED diff --git a/src/ast/path.hpp b/src/ast/path.hpp new file mode 100644 index 00000000..09e5d9ed --- /dev/null +++ b/src/ast/path.hpp @@ -0,0 +1,49 @@ +/* + */ +#ifndef AST_PATH_HPP_INCLUDED +#define AST_PATH_HPP_INCLUDED + +#include +#include + +class TypeRef; + +namespace AST { + +class TypeParam +{ +public: + TypeParam(bool is_lifetime, ::std::string name); + void addLifetimeBound(::std::string name); + void addTypeBound(TypeRef type); +}; + +typedef ::std::vector TypeParams; +typedef ::std::pair< ::std::string, TypeRef> StructItem; + +class PathNode +{ + ::std::string m_name; + ::std::vector m_params; +public: + PathNode(::std::string name, ::std::vector args); + const ::std::string& name() const; + const ::std::vector& args() const; +}; + +class Path +{ +public: + Path(); + struct TagAbsolute {}; + Path(TagAbsolute); + + void append(PathNode node) {} + size_t length() const {return 0;} + + PathNode& operator[](size_t idx) { throw ::std::out_of_range("Path []"); } +}; + +} // namespace AST + +#endif diff --git a/src/common.hpp b/src/common.hpp new file mode 100644 index 00000000..deaf4ed1 --- /dev/null +++ b/src/common.hpp @@ -0,0 +1,9 @@ +/* + */ +#ifndef COMMON_HPP_INCLUDED +#define COMMON_HPP_INCLUDED + +#define FOREACH(basetype, it, src) for(basetype::const_iterator it = src.begin(); it != src.end(); ++ it) +#define FOREACH_M(basetype, it, src) for(basetype::iterator it = src.begin(); it != src.end(); ++ it) + +#endif diff --git a/src/convert/flatten.cpp b/src/convert/flatten.cpp new file mode 100644 index 00000000..a1c89998 --- /dev/null +++ b/src/convert/flatten.cpp @@ -0,0 +1,8 @@ +/* + */ +#include "../ast/ast.hpp" + +AST::Flat Convert_Flattern(const AST::Crate& crate) +{ + throw ParseError::Todo("Flatten"); +} diff --git a/src/convert/render.cpp b/src/convert/render.cpp new file mode 100644 index 00000000..6de8dc57 --- /dev/null +++ b/src/convert/render.cpp @@ -0,0 +1,52 @@ +/* + */ +#include "../ast/ast.hpp" + +void Render_Type(::std::ostream& os, const TypeRef& type, const char *name) +{ + /* + swicth(type.class()) + { + case TYPECLASS_STRUCT: + os << "struct " << type.struct().mangled_name() << " " << name; + break; + } + */ +} + +void Render_CStruct(::std::ostream& os, const AST::CStruct& str) +{ + os << "struct " << str.name() << "{\n"; + FOREACH(::std::vector >, f, str.fields()) + { + os << "\t"; + Render_Type(os, f->second(), f->first().c_str()); + os << ";\n"; + } + os << "}\n" +} + +void Render_Crate(::std::ostream& os, const AST::Flat& crate) +{ + // First off, print forward declarations of all structs + enums + FOREACH(::std::vector, s, crate.structs()) + os << "struct " << s->mangled_name() << ";\n"; + + FOREACH(::std::vector, fcn, crate.functions()) + { + Render_Type(os, fcn->rettype(), nullptr); + os << " " << fcn->name() << "("; + bool is_first = true; + FOREACH(::std::vector >, f, fcn.args()) + { + if( !is_first ) + os << ", "; + is_first = false; + Render_Type(os, f->second(), f->first().c_str()); + } + os << ")\n{\n"; + // Dump expression AST + os << "}\n"; + } +} + diff --git a/src/convert/resolve.cpp b/src/convert/resolve.cpp new file mode 100644 index 00000000..0d79f1cd --- /dev/null +++ b/src/convert/resolve.cpp @@ -0,0 +1,47 @@ + +#include "../common.hpp" +#include "../ast/ast.hpp" +#include "../parse/parseerror.hpp" + +// Path resolution checking +void ResolvePaths(AST::Crate& crate); +void ResolvePaths_HandleFunction(const AST::Crate& crate, AST::Function& fcn); + +class CResolvePaths_NodeVisitor: + public AST::NodeVisitor +{ + const AST::Crate& m_crate; +public: + CResolvePaths_NodeVisitor(const AST::Crate& crate): + m_crate(crate) + { + } + + void visit(AST::ExprNode::TagNamedValue, AST::ExprNode& node) { + // TODO: Convert into a real absolute path + throw ParseError::Todo("CResolvePaths_NodeVisitor::visit(TagNamedValue)"); + } +}; + +void ResolvePaths_Type(TypeRef& type) +{ + // TODO: Convert type into absolute + throw ParseError::Todo("ResolvePaths_Type"); +} + +void ResolvePaths_HandleFunction(const AST::Crate& crate, const AST::Module& module, AST::Function& fcn) +{ + fcn.code().visit_nodes( CResolvePaths_NodeVisitor(crate, module) ); + + ResolvePaths_Type(crate, module, fcn.rettype()); + + FOREACH_M(AST::Function::Arglist, arg, fcn.args()) + { + ResolvePaths_Type(arg->second); + } +} + +void ResolvePaths(AST::Crate& crate) +{ + crate.iterate_functions(ResolvePaths_HandleFunction); +} diff --git a/src/coretypes.hpp b/src/coretypes.hpp new file mode 100644 index 00000000..99d574b3 --- /dev/null +++ b/src/coretypes.hpp @@ -0,0 +1,18 @@ +#ifndef CORETYPES_HPP_INCLUDED +#define CORETYPES_HPP_INCLUDED + +enum eCoreType +{ + CORETYPE_INVAL, + CORETYPE_ANY, + CORETYPE_CHAR, + CORETYPE_UINT, CORETYPE_INT, + CORETYPE_U8, CORETYPE_I8, + CORETYPE_U16, CORETYPE_I16, + CORETYPE_U32, CORETYPE_I32, + CORETYPE_U64, CORETYPE_I64, + CORETYPE_F32, + CORETYPE_F64, +}; + +#endif // CORETYPES_HPP_INCLUDED diff --git a/src/macros.cpp b/src/macros.cpp new file mode 100644 index 00000000..4356a399 --- /dev/null +++ b/src/macros.cpp @@ -0,0 +1,143 @@ +/* + */ +#include "common.hpp" +#include "macros.hpp" +#include "parse/parseerror.hpp" +#include "parse/tokentree.hpp" +#include "parse/common.hpp" + +typedef ::std::map< ::std::string, MacroRules> t_macro_regs; + +t_macro_regs g_macro_registrations; + +void Macro_InitDefaults() +{ + // try!() macro + { + MacroRule rule; + rule.m_pattern.push_back( MacroPatEnt("val", MacroPatEnt::PAT_EXPR) ); + // match $rule { + rule.m_contents.push_back( MacroRuleEnt(Token(TOK_RWORD_MATCH)) ); + rule.m_contents.push_back( MacroRuleEnt("val") ); + rule.m_contents.push_back( MacroRuleEnt(Token(TOK_BRACE_OPEN)) ); + // Ok(v) => v, + rule.m_contents.push_back( MacroRuleEnt(Token(TOK_IDENT, "Ok")) ); + rule.m_contents.push_back( MacroRuleEnt(Token(TOK_PAREN_OPEN)) ); + rule.m_contents.push_back( MacroRuleEnt(Token(TOK_IDENT, "v")) ); + rule.m_contents.push_back( MacroRuleEnt(Token(TOK_PAREN_CLOSE)) ); + rule.m_contents.push_back( MacroRuleEnt(Token(TOK_FATARROW)) ); + rule.m_contents.push_back( MacroRuleEnt(Token(TOK_IDENT, "v")) ); + rule.m_contents.push_back( MacroRuleEnt(Token(TOK_COMMA)) ); + // Err(e) => return Err(r), + rule.m_contents.push_back( MacroRuleEnt(Token(TOK_IDENT, "Err")) ); + rule.m_contents.push_back( MacroRuleEnt(Token(TOK_PAREN_OPEN)) ); + rule.m_contents.push_back( MacroRuleEnt(Token(TOK_IDENT, "e")) ); + rule.m_contents.push_back( MacroRuleEnt(Token(TOK_PAREN_CLOSE)) ); + rule.m_contents.push_back( MacroRuleEnt(Token(TOK_FATARROW)) ); + rule.m_contents.push_back( MacroRuleEnt(Token(TOK_RWORD_RETURN)) ); + rule.m_contents.push_back( MacroRuleEnt(Token(TOK_IDENT, "Err")) ); + rule.m_contents.push_back( MacroRuleEnt(Token(TOK_PAREN_OPEN)) ); + rule.m_contents.push_back( MacroRuleEnt(Token(TOK_IDENT, "e")) ); + rule.m_contents.push_back( MacroRuleEnt(Token(TOK_PAREN_CLOSE)) ); + rule.m_contents.push_back( MacroRuleEnt(Token(TOK_COMMA)) ); + // } + rule.m_contents.push_back( MacroRuleEnt(Token(TOK_BRACE_CLOSE)) ); + MacroRules rules; + rules.push_back(rule); + g_macro_registrations.insert( make_pair(::std::string("try"), rules)); + } +} + +MacroExpander Macro_Invoke(const char* name, TokenTree input) +{ + // XXX: EVIL HACK! - This should be removed when std loading is implemented + if( g_macro_registrations.size() == 0 ) { + Macro_InitDefaults(); + } + // 1. Locate macro with that name + t_macro_regs::iterator macro_reg = g_macro_registrations.find(name); + if( macro_reg != g_macro_registrations.end() ) + { + const MacroRules& rules = macro_reg->second; + // 2. Check input token tree against possible variants + // 3. Bind names + // 4. Return expander + FOREACH(MacroRules, rule_it, rules) + { + Token tok; + // Create token stream for input tree + TTStream lex(input); + if(GET_TOK(tok, lex) == TOK_EOF) { + throw ParseError::Unexpected(tok); + } + ::std::map bound_tts; + // Parse according to rules + bool fail = false; + FOREACH(::std::vector, pat_it, rule_it->m_pattern) + { + TokenTree val; + const MacroPatEnt& pat = *pat_it; + try + { + switch(pat.type) + { + case MacroPatEnt::PAT_TOKEN: + GET_CHECK_TOK(tok, lex, pat.tok.type()); + break; + case MacroPatEnt::PAT_EXPR: + val = Parse_TT_Expr(lex); + if(0) + case MacroPatEnt::PAT_STMT: + val = Parse_TT_Stmt(lex); + bound_tts.insert( std::make_pair(pat.name.c_str(), val) ); + break; + default: + throw ParseError::Todo("macro pattern matching"); + } + } + catch(const ParseError::Base& e) + { + fail = true; + break; + } + } + // TODO: Actually check if the final token is the closer to the first + if( !fail && GET_TOK(tok, lex) == TOK_EOF) { + throw ParseError::Unexpected(tok); + } + if( !fail && lex.getToken().type() == TOK_EOF ) + { + return MacroExpander(rule_it->m_contents, bound_tts); + } + } + throw ParseError::Todo("Error when macro fails to match"); + } + + throw ParseError::Generic( ::std::string("Macro '") + name + "' was not found" ); +} + +Token MacroExpander::realGetToken() +{ + if( m_ttstream.get() ) + { + Token rv = m_ttstream->getToken(); + if( rv.type() != TOK_EOF ) + return rv; + m_ttstream.reset(); + } + if( m_ofs < m_contents.size() ) + { + const MacroRuleEnt& ent = m_contents[m_ofs]; + m_ofs ++; + if( ent.name.size() != 0 ) { + // Binding! + m_ttstream.reset( new TTStream(m_mappings.at(ent.name.c_str())) ); + return m_ttstream->getToken(); + } + else { + return ent.tok; + } + throw ParseError::Todo("MacroExpander - realGetToken"); + } + return Token(TOK_EOF); +} diff --git a/src/macros.hpp b/src/macros.hpp new file mode 100644 index 00000000..2618825b --- /dev/null +++ b/src/macros.hpp @@ -0,0 +1,94 @@ +#ifndef MACROS_HPP_INCLUDED +#define MACROS_HPP_INCLUDED + +#include "parse/lex.hpp" +#include +#include +#include + +class MacroExpander; + +class MacroRuleEnt +{ + friend class MacroExpander; + + Token tok; + ::std::string name; +public: + MacroRuleEnt(Token tok): + tok(tok), + name("") + { + } + MacroRuleEnt(::std::string name): + name(name) + { + } +}; +struct MacroPatEnt +{ + Token tok; + ::std::string name; + enum Type { + PAT_TOKEN, + PAT_TT, + PAT_IDENT, + PAT_PATH, + PAT_EXPR, + PAT_STMT, + PAT_BLOCK, + } type; + + MacroPatEnt(::std::string name, Type type): + tok(), + name(name), + type(type) + { + } +}; + +/// A rule within a macro_rules! blcok +class MacroRule +{ +public: + ::std::vector m_pattern; + ::std::vector m_contents; +}; + +/// A sigle 'macro_rules!' block +typedef ::std::vector MacroRules; + +struct cmp_str { + bool operator()(const char* a, const char* b) const { + return ::std::strcmp(a, b) < 0; + } +}; + +class MacroExpander: + public TokenStream +{ + typedef ::std::map t_mappings; + const t_mappings m_mappings; + const ::std::vector& m_contents; + size_t m_ofs; + + ::std::auto_ptr m_ttstream; +public: + MacroExpander(const MacroExpander& x): + m_mappings(x.m_mappings), + m_contents(x.m_contents), + m_ofs(0) + { + } + MacroExpander(const ::std::vector& contents, t_mappings mappings): + m_mappings(mappings), + m_contents(contents), + m_ofs(0) + { + } + virtual Token realGetToken(); +}; + +extern MacroExpander Macro_Invoke(const char* name, TokenTree input); + +#endif // MACROS_HPP_INCLUDED diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 00000000..f3ea8136 --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,34 @@ +#include +#include +#include "parse/lex.hpp" +#include "parse/parseerror.hpp" +#include "ast/ast.hpp" + +extern AST::Crate Parse_Crate(::std::string mainfile); +extern void ResolvePaths(AST::Crate& crate); +extern AST::Flat Convert_Flatten(const AST::Crate& crate); + +/// main! +int main(int argc, char *argv[]) +{ + try + { + AST::Crate crate = Parse_Crate("samples/1.rs"); + + // Resolve names to be absolute names (include references to the relevant struct/global/function) + ResolvePaths(crate); + + // Typecheck / type propagate module (type annotations of all values) + + // Flatten modules into "mangled" set + AST::Flat flat_crate = Convert_Flattern(crate); + + // Convert structures to C structures / tagged enums + //Convert_Render(flat_crate, stdout); + } + catch(const ParseError::Base& e) + { + ::std::cerr << "Parser Error: " << e.what() << ::std::endl; + } + return 0; +} diff --git a/src/parse/common.hpp b/src/parse/common.hpp new file mode 100644 index 00000000..0d6d550b --- /dev/null +++ b/src/parse/common.hpp @@ -0,0 +1,50 @@ +#ifndef PARSE_COMMON_HPP_INCLUDED +#define PARSE_COMMON_HPP_INCLUDED +#include + +#define GET_TOK(tok, lex) ((tok = lex.getToken()).type()) +#define GET_CHECK_TOK(tok, lex, exp) do {\ + if((tok = lex.getToken()).type() != exp) \ + throw ParseError::Unexpected(tok, Token(exp));\ +} while(0) +#define CHECK_TOK(tok, exp) do {\ + if(tok.type() != exp) \ + throw ParseError::Unexpected(tok, Token(exp));\ +} while(0) + +enum eParsePathGenericMode +{ + PATH_GENERIC_NONE, + PATH_GENERIC_EXPR, + PATH_GENERIC_TYPE +}; + +extern AST::Path Parse_Path(TokenStream& lex, bool is_abs, eParsePathGenericMode generic_mode); +extern TypeRef Parse_Type(TokenStream& lex); +extern AST::Expr Parse_Expr(TokenStream& lex, bool const_only); +extern AST::Expr Parse_ExprBlock(TokenStream& lex); + +class TraceLog +{ + static unsigned int depth; + const char* m_tag; +public: + TraceLog(const char* tag): m_tag(tag) { indent(); ::std::cout << ">> " << m_tag << ::std::endl; } + ~TraceLog() { outdent(); ::std::cout << "<< " << m_tag << ::std::endl; } +private: + void indent() + { + for(unsigned int i = 0; i < depth; i ++) + ::std::cout << " "; + depth ++; + } + void outdent() + { + depth --; + for(unsigned int i = 0; i < depth; i ++) + ::std::cout << " "; + } +}; +#define TRACE_FUNCTION TraceLog _tf_(__func__) + +#endif // PARSE_COMMON_HPP_INCLUDED diff --git a/src/parse/expr.cpp b/src/parse/expr.cpp new file mode 100644 index 00000000..e632d47f --- /dev/null +++ b/src/parse/expr.cpp @@ -0,0 +1,631 @@ +/* + */ +#include "preproc.hpp" +#include "parseerror.hpp" +#include "../ast/ast.hpp" +#include "common.hpp" +#include "../macros.hpp" +#include +#include "tokentree.hpp" + +using AST::ExprNode; + +AST::ExprNode Parse_ExprBlockNode(TokenStream& lex); +AST::ExprNode Parse_Stmt(TokenStream& lex, bool& opt_semicolon); +AST::ExprNode Parse_Expr0(TokenStream& lex); +AST::ExprNode Parse_ExprBlocks(TokenStream& lex); +AST::ExprNode Parse_Expr1(TokenStream& lex); + +AST::Expr Parse_Expr(TokenStream& lex, bool const_only) +{ + return AST::Expr(Parse_Expr0(lex)); +} + +AST::Expr Parse_ExprBlock(TokenStream& lex) +{ + return AST::Expr(Parse_ExprBlockNode(lex)); +} + +AST::Pattern Parse_Pattern(TokenStream& lex) +{ + TRACE_FUNCTION; + + AST::Path path; + Token tok; + tok = lex.getToken(); + if( tok.type() == TOK_RWORD_REF ) + { + throw ParseError::Todo("ref bindings"); + tok = lex.getToken(); + } + switch( tok.type() ) + { + case TOK_IDENT: + // 1. Identifiers could be either a bind or a value + // - If the path resolves to a single node, either a local enum value, or a binding + lex.putback(tok); + path = Parse_Path(lex, false, PATH_GENERIC_EXPR); + if( path.length() == 1 && path[0].args().size() == 0 ) + { + // Could be a name binding, check the next token + GET_TOK(tok, lex); + if(tok.type() != TOK_PAREN_OPEN) { + lex.putback(tok); + return AST::Pattern(AST::Pattern::TagMaybeBind(), path[0].name()); + } + lex.putback(tok); + } + // otherwise, it's a value check + if(0) + case TOK_DOUBLE_COLON: + // 2. Paths are enum/struct names + { + path = Parse_Path(lex, true, PATH_GENERIC_EXPR); + } + switch( GET_TOK(tok, lex) ) + { + case TOK_PAREN_OPEN: { + // A list of internal patterns + ::std::vector child_pats; + do { + AST::Pattern pat = Parse_Pattern(lex); + child_pats.push_back(pat); + } while( GET_TOK(tok, lex) == TOK_COMMA ); + CHECK_TOK(tok, TOK_PAREN_CLOSE); + return AST::Pattern(AST::Pattern::TagEnumVariant(), path, child_pats); + } + default: + lex.putback(tok); + return AST::Pattern(AST::Pattern::TagValue(), ExprNode(ExprNode::TagNamedValue(), path)); + } + break; + case TOK_INTEGER: + return AST::Pattern( AST::Pattern::TagValue(), ExprNode(ExprNode::TagInteger(), tok.intval(), tok.datatype()) ); + case TOK_PAREN_OPEN: + throw ParseError::Todo("tuple patterns"); + default: + throw ParseError::Unexpected(tok); + } + throw ParseError::BugCheck("Parse_TT_Stmt should early return"); +} + +ExprNode Parse_ExprBlockNode(TokenStream& lex) +{ + TRACE_FUNCTION; + + ::std::vector nodes; + Token tok; + GET_CHECK_TOK(tok, lex, TOK_BRACE_OPEN); + while( GET_TOK(tok, lex) != TOK_BRACE_CLOSE ) + { + lex.putback(tok); + bool opt_semicolon = false; + // NOTE: This semicolon handling is SHIT. + nodes.push_back(Parse_Stmt(lex, opt_semicolon)); + if( GET_TOK(tok, lex) != TOK_BRACE_CLOSE ) { + if( !opt_semicolon ) + CHECK_TOK(tok, TOK_SEMICOLON); + else + lex.putback(tok); + } + else { + nodes.push_back(ExprNode()); + break; + } + } + return AST::ExprNode(ExprNode::TagBlock(), nodes); +} + +AST::ExprNode Parse_Stmt(TokenStream& lex, bool& opt_semicolon) +{ + TRACE_FUNCTION; + + Token tok; + // 1. Handle 'let' + // 2. Handle new blocks + // 3. Handle a sequence of expressions broken by ';' + switch(GET_TOK(tok, lex)) + { + case TOK_BRACE_OPEN: + lex.putback(tok); + opt_semicolon = true; + return Parse_ExprBlockNode(lex); + case TOK_RWORD_LET: { + //ret.append(); + AST::Pattern pat = Parse_Pattern(lex); + GET_CHECK_TOK(tok, lex, TOK_EQUAL); + AST::ExprNode val = Parse_Expr1(lex); + opt_semicolon = false; + return ExprNode(ExprNode::TagLetBinding(), pat, val); + } + case TOK_RWORD_RETURN: + return ExprNode(ExprNode::TagReturn(), Parse_Expr1(lex)); + case TOK_RWORD_LOOP: + throw ParseError::Todo("loop"); + break; + case TOK_RWORD_FOR: + throw ParseError::Todo("for"); + break; + case TOK_RWORD_WHILE: + throw ParseError::Todo("while"); + break; + default: { + lex.putback(tok); + opt_semicolon = true; + return Parse_Expr0(lex); + } + } + +} + +::std::vector Parse_ParenList(TokenStream& lex) +{ + TRACE_FUNCTION; + + ::std::vector rv; + Token tok; + GET_CHECK_TOK(tok, lex, TOK_PAREN_OPEN); + if( (tok = lex.getToken()).type() != TOK_PAREN_CLOSE ) + { + lex.putback(tok); + do { + rv.push_back( Parse_Expr1(lex) ); + } while( (tok = lex.getToken()).type() == TOK_COMMA ); + CHECK_TOK(tok, TOK_PAREN_CLOSE); + } + return rv; +} + +// 0: Assign +AST::ExprNode Parse_Expr0(TokenStream& lex) +{ + TRACE_FUNCTION; + + AST::ExprNode rv = Parse_ExprBlocks(lex); + Token tok = lex.getToken(); + if( tok.type() == TOK_EQUAL ) + { + ExprNode val = Parse_Expr1(lex); + rv = ExprNode(ExprNode::TagAssign(), rv, val); + } + else + { + lex.putback(tok); + } + return rv; +} + +/// Parse an 'if' statement +// Note: TOK_RWORD_IF has already been eaten +AST::ExprNode Parse_IfStmt(TokenStream& lex) +{ + TRACE_FUNCTION; + + Token tok; + ExprNode cond; + if( GET_TOK(tok, lex) == TOK_RWORD_LET ) { + throw ParseError::Todo("if let"); + } + else { + lex.putback(tok); + cond = Parse_Expr0(lex); + } + + // Contents + ExprNode code = Parse_ExprBlockNode(lex); + + // Handle else: + ExprNode altcode; + if( GET_TOK(tok, lex) == TOK_RWORD_ELSE ) + { + // Recurse for 'else if' + if( GET_TOK(tok, lex) == TOK_RWORD_IF ) { + altcode = Parse_IfStmt(lex); + } + // - or get block + else { + lex.putback(tok); + altcode = Parse_ExprBlockNode(lex); + } + } + // - or nothing + else { + lex.putback(tok); + altcode = ExprNode(); + } + + return ExprNode(ExprNode::TagIf(), cond, code, altcode); +} + +// 0.5: Blocks +AST::ExprNode Parse_ExprBlocks(TokenStream& lex) +{ + Token tok = lex.getToken(); + switch( tok.type() ) + { + case TOK_RWORD_MATCH: { + // 1. Get expression + AST::ExprNode switch_val = Parse_Expr1(lex); + GET_CHECK_TOK(tok, lex, TOK_BRACE_OPEN); + ::std::vector< ::std::pair > arms; + do { + if( GET_TOK(tok, lex) == TOK_BRACE_CLOSE ) + break; + lex.putback(tok); + AST::Pattern pat = Parse_Pattern(lex); + GET_CHECK_TOK(tok, lex, TOK_FATARROW); + bool opt_semicolon = false; + AST::ExprNode val = Parse_Stmt(lex, opt_semicolon); + arms.push_back( ::std::make_pair(pat, val) ); + } while( GET_TOK(tok, lex) == TOK_COMMA ); + CHECK_TOK(tok, TOK_BRACE_CLOSE); + return AST::ExprNode(ExprNode::TagMatch(), switch_val, arms); + } + case TOK_RWORD_IF: + // TODO: if let + return Parse_IfStmt(lex); + default: + lex.putback(tok); + return Parse_Expr1(lex); + } +} + + +#define LEFTASSOC(cur, _next, cases) \ +AST::ExprNode _next(TokenStream& lex); \ +AST::ExprNode cur(TokenStream& lex) \ +{ \ + AST::ExprNode (*next)(TokenStream&) = _next;\ + AST::ExprNode rv = next(lex); \ + while(true) \ + { \ + Token tok; \ + switch((tok = lex.getToken()).type()) \ + { \ + cases \ + default: \ + ::std::cout << "<<" << #cur << ::std::endl; \ + lex.putback(tok); \ + return rv; \ + } \ + } \ +} +// 1: Bool OR +LEFTASSOC(Parse_Expr1, Parse_Expr2, + case TOK_DOUBLE_PIPE: + throw ParseError::Todo("expr - boolean OR"); +) +// 2: Bool AND +LEFTASSOC(Parse_Expr2, Parse_Expr3, + case TOK_DOUBLE_AMP: + throw ParseError::Todo("expr - boolean AND"); +) +// 3: (In)Equality +LEFTASSOC(Parse_Expr3, Parse_Expr4, + case TOK_DOUBLE_EQUAL: + rv = ExprNode(ExprNode::TagBinOp(), ExprNode::BINOP_CMPEQU, rv, next(lex)); + break; + case TOK_EXCLAM_EQUAL: + rv = ExprNode(ExprNode::TagBinOp(), ExprNode::BINOP_CMPNEQU, rv, next(lex)); + break; +) +// 4: Comparisons +LEFTASSOC(Parse_Expr4, Parse_Expr5, + case TOK_LT: + throw ParseError::Todo("expr - less than"); + case TOK_GT: + throw ParseError::Todo("expr - greater than"); + case TOK_LTE: + throw ParseError::Todo("expr - less than or equal"); + case TOK_GTE: + throw ParseError::Todo("expr - greater than or equal"); +) +// 5: Bit OR +LEFTASSOC(Parse_Expr5, Parse_Expr6, + case TOK_PIPE: + rv = ExprNode(ExprNode::TagBinOp(), ExprNode::BINOP_BITOR, rv, next(lex)); + break; +) +// 6: Bit XOR +LEFTASSOC(Parse_Expr6, Parse_Expr7, + case TOK_CARET: + rv = ExprNode(ExprNode::TagBinOp(), ExprNode::BINOP_BITXOR, rv, next(lex)); + break; +) +// 7: Bit AND +LEFTASSOC(Parse_Expr7, Parse_Expr8, + case TOK_AMP: + rv = ExprNode(ExprNode::TagBinOp(), ExprNode::BINOP_BITAND, rv, next(lex)); + break; +) +// 8: Bit Shifts +LEFTASSOC(Parse_Expr8, Parse_Expr9, + case TOK_DOUBLE_LT: + rv = ExprNode(ExprNode::TagBinOp(), ExprNode::BINOP_SHL, rv, next(lex)); + break; + case TOK_DOUBLE_GT: + rv = ExprNode(ExprNode::TagBinOp(), ExprNode::BINOP_SHR, rv, next(lex)); + break; +) +// 9: Add / Subtract +LEFTASSOC(Parse_Expr9, Parse_Expr10, + case TOK_PLUS: + throw ParseError::Todo("expr - add"); + case TOK_DASH: + throw ParseError::Todo("expr - sub"); +) +// 10: Cast +LEFTASSOC(Parse_Expr10, Parse_Expr11, + case TOK_RWORD_AS: + rv = ExprNode(ExprNode::TagCast(), rv, Parse_Type(lex)); + break; +) +// 11: Times / Divide / Modulo +LEFTASSOC(Parse_Expr11, Parse_Expr12, + case TOK_STAR: + throw ParseError::Todo("expr - multiply"); + case TOK_SLASH: + throw ParseError::Todo("expr - divide"); + case TOK_PERCENT: + throw ParseError::Todo("expr - modulo"); +) +// 12: Unaries +AST::ExprNode Parse_ExprFC(TokenStream& lex); +AST::ExprNode Parse_Expr12(TokenStream& lex) +{ + Token tok; + switch((tok = lex.getToken()).type()) + { + case TOK_DASH: + throw ParseError::Todo("expr - negate"); + case TOK_EXCLAM: + throw ParseError::Todo("expr - logical negate"); + case TOK_STAR: + throw ParseError::Todo("expr - dereference"); + case TOK_RWORD_BOX: + throw ParseError::Todo("expr - box"); + case TOK_AMP: + throw ParseError::Todo("expr - borrow"); + default: + lex.putback(tok); + return Parse_ExprFC(lex); + } +} + +AST::ExprNode Parse_ExprVal(TokenStream& lex); +AST::ExprNode Parse_ExprFC(TokenStream& lex) +{ + AST::ExprNode val = Parse_ExprVal(lex); + while(true) + { + Token tok; + switch(GET_TOK(tok, lex)) + { + case TOK_PAREN_OPEN: + // Function call + lex.putback(tok); + val = AST::ExprNode(AST::ExprNode::TagCallObject(), val, Parse_ParenList(lex)); + break; + case TOK_DOT: + // Field access + // TODO: What about tuple indexing? + GET_CHECK_TOK(tok, lex, TOK_IDENT); + val = AST::ExprNode(AST::ExprNode::TagField(), tok.str()); + break; + default: + lex.putback(tok); + return val; + } + } +} + +AST::ExprNode Parse_ExprVal(TokenStream& lex) +{ + Token tok; + AST::Path path; + switch((tok = lex.getToken()).type()) + { + case TOK_IDENT: + // Get path + lex.putback(tok); + path = Parse_Path(lex, false, PATH_GENERIC_EXPR); + if(0) + case TOK_DOUBLE_COLON: + path = Parse_Path(lex, true, PATH_GENERIC_EXPR); + switch( GET_TOK(tok, lex) ) + { + case TOK_BRACE_OPEN: { + // Braced structure literal + // - A series of 0 or more pairs of : , + // - '..' + ::std::vector< ::std::pair< ::std::string, AST::ExprNode> > items; + while( GET_TOK(tok, lex) == TOK_IDENT ) + { + ::std::string name = tok.str(); + GET_CHECK_TOK(tok, lex, TOK_COLON); + AST::ExprNode val = Parse_Expr0(lex); + items.push_back( ::std::make_pair(name, val) ); + if( GET_TOK(tok,lex) == TOK_BRACE_CLOSE ) + break; + CHECK_TOK(tok, TOK_COMMA); + } + AST::ExprNode base_val; + if( tok.type() == TOK_DOUBLE_DOT ) + { + // default + base_val = Parse_Expr0(lex); + GET_TOK(tok, lex); + } + CHECK_TOK(tok, TOK_BRACE_CLOSE); + return ExprNode(ExprNode::TagStructLiteral(), path, base_val, items); + } + case TOK_PAREN_OPEN: { + lex.putback(tok); + // Function call + ::std::vector args = Parse_ParenList(lex); + return ExprNode(ExprNode::TagCallPath(), path, args); + } + default: + // Value + lex.putback(tok); + return ExprNode(ExprNode::TagNamedValue(), path); + } + case TOK_INTEGER: + return ExprNode(ExprNode::TagInteger(), tok.intval(), tok.datatype()); + case TOK_FLOAT: + throw ParseError::Todo("Float"); + case TOK_RWORD_SELF: { + AST::Path path; + path.append( AST::PathNode("self", ::std::vector()) ); + return ExprNode(ExprNode::TagNamedValue(), path); + } + case TOK_PAREN_OPEN: { + ExprNode rv = Parse_Expr0(lex); + GET_CHECK_TOK(tok, lex, TOK_PAREN_CLOSE); + return rv; } + case TOK_MACRO: { + // Need to create a token tree, pass to the macro, then pass the result of that to Parse_Expr0 + MacroExpander expanded_macro = Macro_Invoke(tok.str().c_str(), Parse_TT(lex)); + + return Parse_Expr0(expanded_macro); + } + default: + throw ParseError::Unexpected(tok); + } +} + +// Token Tree Parsing +TokenTree Parse_TT(TokenStream& lex) +{ + Token tok = lex.getToken(); + eTokenType closer = TOK_PAREN_CLOSE; + switch(tok.type()) + { + case TOK_PAREN_OPEN: + closer = TOK_PAREN_CLOSE; + break; + case TOK_SQUARE_OPEN: + closer = TOK_SQUARE_CLOSE; + break; + case TOK_BRACE_OPEN: + closer = TOK_BRACE_CLOSE; + break; + default: + return TokenTree(tok); + } + + ::std::vector items; + items.push_back(tok); + while(GET_TOK(tok, lex) != closer && tok.type() != TOK_EOF) + { + lex.putback(tok); + items.push_back(Parse_TT(lex)); + } + items.push_back(tok); + return TokenTree(items); +} + +TokenTree Parse_TT_Path(TokenStream& lex) +{ + throw ParseError::Todo("TokenTree path"); +} +/// Parse a token tree path +TokenTree Parse_TT_Val(TokenStream& lex) +{ + Token tok; + ::std::vector ret; + switch(GET_TOK(tok, lex)) + { + case TOK_PAREN_OPEN: + lex.putback(tok); + return Parse_TT(lex); + + case TOK_IDENT: + case TOK_DOUBLE_COLON: { + lex.putback(tok); + TokenTree inner = Parse_TT_Path(lex); + if(GET_TOK(tok, lex) == TOK_BRACE_OPEN) { + lex.putback(tok); + ret.push_back(inner); + ret.push_back(Parse_TT(lex)); + } + else { + lex.putback(tok); + return inner; + } + break; } + case TOK_RWORD_SELF: + return TokenTree(tok); + case TOK_RWORD_MATCH: + ret.push_back(TokenTree(tok)); + ret.push_back(Parse_TT(lex)); + break; + case TOK_RWORD_IF: + ret.push_back(TokenTree(tok)); + ret.push_back(Parse_TT(lex)); + if( GET_TOK(tok, lex) == TOK_RWORD_ELSE ) { + ret.push_back(TokenTree(tok)); + ret.push_back(Parse_TT(lex)); + } + else { + lex.putback(tok); + } + break; + default: + // Oh, fail :( + throw ParseError::Unexpected(tok); + } + return TokenTree(ret); +} +/// Parse a token tree expression +TokenTree Parse_TT_Expr(TokenStream& lex) +{ + Token tok; + ::std::vector ret; + + ret.push_back(Parse_TT_Val(lex)); + // 1. Get left associative blocks until nothing matches + bool cont = true; + while(cont) + { + switch(GET_TOK(tok, lex)) + { + case TOK_PLUS: + case TOK_DASH: + ret.push_back(tok); + ret.push_back(Parse_TT_Val(lex)); + break; + case TOK_DOT: + ret.push_back(tok); + GET_CHECK_TOK(tok, lex, TOK_IDENT); + ret.push_back(tok); + switch(GET_TOK(tok, lex)) + { + case TOK_DOUBLE_COLON: + throw ParseError::Todo("Generic type params in TT expr"); + case TOK_PAREN_OPEN: + lex.putback(tok); + ret.push_back(Parse_TT(lex)); + break; + default: + lex.putback(tok); + break; + } + break; + default: + lex.putback(tok); + cont = false; + break; + } + } + return TokenTree(ret); + +} +TokenTree Parse_TT_Stmt(TokenStream& lex) +{ + throw ParseError::Todo("Parse_TT_Stmt"); +} +TokenTree Parse_TT_Block(TokenStream& lex) +{ + throw ParseError::Todo("Parse_TT_Block"); +} diff --git a/src/parse/lex.cpp b/src/parse/lex.cpp new file mode 100644 index 00000000..79db4603 --- /dev/null +++ b/src/parse/lex.cpp @@ -0,0 +1,650 @@ +/* + * "MRustC" - Primitive rust compiler in C++ + */ +/** + * \file parse/lex.cpp + * \brief Low-level lexer + */ +#include "lex.hpp" +#include "parseerror.hpp" +#include +#include +#include // strtol +#include + +Lexer::Lexer(::std::string filename): + m_istream(filename.c_str()), + m_last_char_valid(false) +{ + if( !m_istream.is_open() ) + { + throw ::std::runtime_error("Unable to open file"); + } +} + +#define LINECOMMENT -1 +#define BLOCKCOMMENT -2 +#define SINGLEQUOTE -3 +#define DOUBLEQUOTE -4 + +// NOTE: This array must be kept reverse sorted +#define TOKENT(str, sym) {sizeof(str)-1, str, sym} +static const struct { + unsigned char len; + const char* chars; + signed int type; +} TOKENMAP[] = { + TOKENT("!" , TOK_EXCLAM), + TOKENT("!=", TOK_EXCLAM_EQUAL), + TOKENT("\"", DOUBLEQUOTE), + TOKENT("#", 0), + TOKENT("#![",TOK_CATTR_OPEN), + TOKENT("#[", TOK_ATTR_OPEN), + //TOKENT("$", 0), + TOKENT("%" , TOK_PERCENT), + TOKENT("%=", TOK_PERCENT_EQUAL), + TOKENT("&" , TOK_AMP), + TOKENT("&&", TOK_DOUBLE_AMP), + TOKENT("&=", TOK_AMP_EQUAL), + TOKENT("'" , SINGLEQUOTE), + TOKENT("(" , TOK_PAREN_OPEN), + TOKENT(")" , TOK_PAREN_CLOSE), + TOKENT("*" , TOK_STAR), + TOKENT("*=", TOK_STAR_EQUAL), + TOKENT("+" , TOK_PLUS), + TOKENT("+=", TOK_PLUS_EQUAL), + TOKENT("," , TOK_COMMA), + TOKENT("-" , TOK_DASH), + TOKENT("-=", TOK_DASH_EQUAL), + TOKENT("->", TOK_THINARROW), + TOKENT(".", TOK_DOT), + TOKENT("..", TOK_DOUBLE_DOT), + TOKENT("...",TOK_TRIPLE_DOT), + TOKENT("/" , TOK_SLASH), + TOKENT("/*", BLOCKCOMMENT), + TOKENT("//", LINECOMMENT), + TOKENT("/=", TOK_SLASH_EQUAL), + // 0-9 :: Elsewhere + TOKENT(":", TOK_COLON), + TOKENT("::", TOK_DOUBLE_COLON), + TOKENT(";", TOK_SEMICOLON), + TOKENT("<", TOK_LT), + TOKENT("<<", TOK_DOUBLE_LT), + TOKENT("<=", TOK_LTE), + TOKENT("=" , TOK_EQUAL), + TOKENT("==", TOK_DOUBLE_EQUAL), + TOKENT("=>", TOK_FATARROW), + TOKENT(">", TOK_GT), + TOKENT(">>", TOK_DOUBLE_GT), + TOKENT(">=", TOK_GTE), + TOKENT("?", TOK_QMARK), + TOKENT("@", TOK_AT), + // A-Z :: Elsewhere + TOKENT("[", TOK_SQUARE_OPEN), + TOKENT("\\", TOK_BACKSLASH), + TOKENT("]", TOK_SQUARE_CLOSE), + TOKENT("^", TOK_CARET), + TOKENT("`", TOK_BACKTICK), + + TOKENT("{", TOK_BRACE_OPEN), + TOKENT("|", TOK_PIPE), + TOKENT("|=", TOK_PIPE_EQUAL), + TOKENT("||", TOK_DOUBLE_PIPE), + TOKENT("}", TOK_BRACE_CLOSE), + TOKENT("~", TOK_TILDE), +}; +#define LEN(arr) (sizeof(arr)/sizeof(arr[0])) +static const struct { + unsigned char len; + const char* chars; + signed int type; +} RWORDS[] = { + TOKENT("abstract",TOK_RWORD_ABSTRACT), + TOKENT("alignof", TOK_RWORD_ALIGNOF), + TOKENT("as", TOK_RWORD_AS), + TOKENT("be", TOK_RWORD_BE), + TOKENT("box", TOK_RWORD_BOX), + TOKENT("break", TOK_RWORD_BREAK), + TOKENT("const", TOK_RWORD_CONST), + TOKENT("continue",TOK_RWORD_CONTINUE), + TOKENT("crate", TOK_RWORD_CRATE), + TOKENT("do", TOK_RWORD_DO), + TOKENT("else", TOK_RWORD_ELSE), + TOKENT("enum", TOK_RWORD_ENUM), + TOKENT("extern", TOK_RWORD_EXTERN), + TOKENT("false", TOK_RWORD_FALSE), + TOKENT("final", TOK_RWORD_FINAL), + TOKENT("fn", TOK_RWORD_FN), + TOKENT("for", TOK_RWORD_FOR), + TOKENT("if", TOK_RWORD_IF), + TOKENT("impl", TOK_RWORD_IMPL), + TOKENT("in", TOK_RWORD_IN), + TOKENT("let", TOK_RWORD_LET), + TOKENT("loop", TOK_RWORD_LOOP), + TOKENT("match", TOK_RWORD_MATCH), + TOKENT("mod", TOK_RWORD_MOD), + TOKENT("move", TOK_RWORD_MOVE), + TOKENT("mut", TOK_RWORD_MUT), + TOKENT("offsetof",TOK_RWORD_OFFSETOF), + TOKENT("once", TOK_RWORD_ONCE), + TOKENT("override",TOK_RWORD_OVERRIDE), + TOKENT("priv", TOK_RWORD_PRIV), + TOKENT("proc", TOK_RWORD_PROC), + TOKENT("pub", TOK_RWORD_PUB), + TOKENT("pure", TOK_RWORD_PURE), + TOKENT("ref", TOK_RWORD_REF), + TOKENT("return", TOK_RWORD_RETURN), + TOKENT("self", TOK_RWORD_SELF), + TOKENT("sizeof", TOK_RWORD_SIZEOF), + TOKENT("static", TOK_RWORD_STATIC), + TOKENT("struct", TOK_RWORD_STRUCT), + TOKENT("super", TOK_RWORD_SUPER), + TOKENT("true", TOK_RWORD_TRUE), + TOKENT("trait", TOK_RWORD_TRAIT), + TOKENT("type", TOK_RWORD_TYPE), + TOKENT("typeof", TOK_RWORD_TYPEOF), + TOKENT("unsafe", TOK_RWORD_UNSAFE), + TOKENT("unsized", TOK_RWORD_UNSIZED), + TOKENT("use", TOK_RWORD_USE), + TOKENT("virtual", TOK_RWORD_VIRTUAL), + TOKENT("where", TOK_RWORD_WHERE), + TOKENT("while", TOK_RWORD_WHILE), + TOKENT("yield", TOK_RWORD_YIELD), +}; + +signed int Lexer::getSymbol() +{ + char ch = this->getc(); + // 1. lsearch for character + // 2. Consume as many characters as currently match + // 3. IF: a smaller character or, EOS is hit - Return current best + unsigned ofs = 0; + signed int best = 0; + for(unsigned i = 0; i < LEN(TOKENMAP); i ++) + { + const char* const chars = TOKENMAP[i].chars; + const size_t len = TOKENMAP[i].len; + + //::std::cout << "ofs=" << ofs << ", chars[ofs] = " << chars[ofs] << ", ch = " << ch << ", len = " << len << ::std::endl; + + if( ofs >= len || chars[ofs] > ch ) { + this->putback(); + return best; + } + + while( chars[ofs] && chars[ofs] == ch ) + { + ch = this->getc(); + ofs ++; + } + if( chars[ofs] == 0 ) + { + best = TOKENMAP[i].type; + } + } + + this->putback(); + return best; +} + +bool issym(char ch) +{ + if( ::std::isalnum(ch) ) + return true; + if( ch == '_' ) + return true; + if( ch == '$' ) + return true; + return false; +} + +Token Lexer::getToken() +{ + try + { + char ch = this->getc(); + + if( isspace(ch) ) + { + while( isspace(this->getc()) ) + ; + this->putback(); + return Token(TOK_WHITESPACE); + } + this->putback(); + + const signed int sym = this->getSymbol(); + if( sym == 0 ) + { + // No match at all, check for symbol + char ch = this->getc(); + if( isdigit(ch) ) + { + // TODO: handle integers/floats + uint64_t val = 0; + if( ch == '0' ) { + // Octal/hex handling + ch = this->getc(); + if( ch == 'x' ) { + while( isxdigit(ch = this->getc()) ) { + val *= val * 16; + if(ch <= '9') + val += ch - '0'; + else if( ch <= 'F' ) + val += ch - 'A' + 10; + else if( ch <= 'f' ) + val += ch - 'a' + 10; + } + } + else if( isdigit(ch) ) { + throw ParseError::Todo("Lex octal numbers"); + } + else { + val = 0; + } + } + else { + while( isdigit(ch) ) { + val *= val * 10; + val += ch - '0'; + ch = this->getc(); + } + } + + if(ch == 'u' || ch == 'i') { + // Unsigned + throw ParseError::Todo("Lex number suffixes"); + } + else if( ch == '.' ) { + throw ParseError::Todo("Lex floats"); + } + else { + this->putback(); + return Token(val, CORETYPE_ANY); + } + } + else if( issym(ch) ) + { + ::std::string str; + while( issym(ch) ) + { + str.push_back(ch); + ch = this->getc(); + } + + if( ch == '!' ) + { + return Token(TOK_MACRO, str); + } + else + { + this->putback(); + for( unsigned int i = 0; i < LEN(RWORDS); i ++ ) + { + if( str < RWORDS[i].chars ) break; + if( str == RWORDS[i].chars ) return Token((enum eTokenType)RWORDS[i].type); + } + return Token(TOK_IDENT, str); + } + } + else + { + throw ParseError::BadChar(ch); + } + } + else if( sym > 0 ) + { + return Token((enum eTokenType)sym); + } + else + { + switch(sym) + { + case LINECOMMENT: { + // Line comment + ::std::string str; + char ch = this->getc(); + while(ch != '\n' && ch != '\r') + { + str.push_back(ch); + ch = this->getc(); + } + return Token(TOK_COMMENT, str); } + case BLOCKCOMMENT: { + ::std::string str; + while(true) + { + if( ch == '*' ) { + ch = this->getc(); + if( ch == '/' ) break; + this->putback(); + } + str.push_back(ch); + ch = this->getc(); + } + return Token(TOK_COMMENT, str); } + case SINGLEQUOTE: { + char firstchar = this->getc(); + if( firstchar != '\\' ) { + ch = this->getc(); + if( ch == '\'' ) { + // Character constant + return Token((uint64_t)ch, CORETYPE_CHAR); + } + else { + // Lifetime name + ::std::string str; + str.push_back(firstchar); + while( issym(ch) ) + { + str.push_back(ch); + ch = this->getc(); + } + this->putback(); + return Token(TOK_LIFETIME, str); + } + } + else { + // Character constant with an escape code + uint32_t val = this->parseEscape('\''); + if(this->getc() != '\'') { + throw ParseError::Todo("Proper error for lex failures"); + } + return Token((uint64_t)val, CORETYPE_CHAR); + } + break; } + case DOUBLEQUOTE: + throw ParseError::Todo("Strings"); + break; + default: + assert(!"bugcheck"); + } + } + } + catch(const Lexer::EndOfFile& e) + { + return Token(TOK_EOF); + } + //assert(!"bugcheck"); +} + +uint32_t Lexer::parseEscape(char enclosing) +{ + char ch = this->getc(); + switch(ch) + { + case 'u': { + // Unicode (up to six hex digits) + uint32_t val = 0; + ch = this->getc(); + if( !isxdigit(ch) ) + throw ParseError::Todo("Proper lex error for escape sequences"); + while( isxdigit(ch) ) + { + char tmp[2] = {ch, 0}; + val *= 16; + val += ::std::strtol(tmp, NULL, 16); + ch = this->getc(); + } + this->putback(); + return val; } + case '\\': + return '\\'; + default: + throw ParseError::Todo("Proper lex error for escape sequences"); + } +} + +char Lexer::getc() +{ + if( m_last_char_valid ) + { + m_last_char_valid = false; + } + else + { + m_last_char = m_istream.get(); + if( m_istream.eof() ) + throw Lexer::EndOfFile(); + } + //::std::cout << "getc(): '" << m_last_char << "'" << ::std::endl; + return m_last_char; +} + +void Lexer::putback() +{ +// ::std::cout << "putback(): " << m_last_char_valid << " '" << m_last_char << "'" << ::std::endl; + assert(!m_last_char_valid); + m_last_char_valid = true; +} + +Token::Token(): + m_type(TOK_NULL), + m_str("") +{ +} +Token::Token(enum eTokenType type): + m_type(type), + m_str("") +{ +} +Token::Token(enum eTokenType type, ::std::string str): + m_type(type), + m_str(str) +{ +} +Token::Token(uint64_t val, enum eCoreType datatype): + m_type(TOK_INTEGER), + m_datatype(datatype), + m_intval(val) +{ +} + +const char* Token::typestr(enum eTokenType type) +{ + switch(type) + { + case TOK_NULL: return "TOK_NULL"; + case TOK_EOF: return "TOK_EOF"; + + case TOK_WHITESPACE: return "TOK_WHITESPACE"; + case TOK_COMMENT: return "TOK_COMMENT"; + + // Value tokens + case TOK_IDENT: return "TOK_IDENT"; + case TOK_MACRO: return "TOK_MACRO"; + case TOK_LIFETIME: return "TOK_LIFETIME"; + case TOK_INTEGER: return "TOK_INTEGER"; + case TOK_CHAR: return "TOK_CHAR"; + case TOK_FLOAT: return "TOK_FLOAT"; + + case TOK_CATTR_OPEN: return "TOK_CATTR_OPEN"; + case TOK_ATTR_OPEN: return "TOK_ATTR_OPEN"; + + // Symbols + case TOK_PAREN_OPEN: return "TOK_PAREN_OPEN"; case TOK_PAREN_CLOSE: return "TOK_PAREN_CLOSE"; + case TOK_BRACE_OPEN: return "TOK_BRACE_OPEN"; case TOK_BRACE_CLOSE: return "TOK_BRACE_CLOSE"; + case TOK_LT: return "TOK_LT"; case TOK_GT: return "TOK_GT"; + case TOK_SQUARE_OPEN: return "TOK_SQUARE_OPEN";case TOK_SQUARE_CLOSE: return "TOK_SQUARE_CLOSE"; + case TOK_COMMA: return "TOK_COMMA"; + case TOK_SEMICOLON: return "TOK_SEMICOLON"; + case TOK_COLON: return "TOK_COLON"; + case TOK_DOUBLE_COLON: return "TOK_DOUBLE_COLON"; + case TOK_STAR: return "TOK_STAR"; case TOK_AMP: return "TOK_AMP"; + case TOK_PIPE: return "TOK_PIPE"; + + case TOK_FATARROW: return "TOK_FATARROW"; // => + case TOK_THINARROW: return "TOK_THINARROW"; // -> + + case TOK_PLUS: return "TOK_PLUS"; case TOK_DASH: return "TOK_DASH"; + case TOK_EXCLAM: return "TOK_EXCLAM"; + case TOK_PERCENT: return "TOK_PERCENT"; + case TOK_SLASH: return "TOK_SLASH"; + + case TOK_DOT: return "TOK_DOT"; + case TOK_DOUBLE_DOT: return "TOK_DOUBLE_DOT"; + case TOK_TRIPLE_DOT: return "TOK_TRIPLE_DOT"; + + case TOK_EQUAL: return "TOK_EQUAL"; + case TOK_PLUS_EQUAL: return "TOK_PLUS_EQUAL"; + case TOK_DASH_EQUAL: return "TOK_DASH_EQUAL"; + case TOK_PERCENT_EQUAL: return "TOK_PERCENT_EQUAL"; + case TOK_SLASH_EQUAL: return "TOK_SLASH_EQUAL"; + case TOK_STAR_EQUAL: return "TOK_STAR_EQUAL"; + case TOK_AMP_EQUAL: return "TOK_AMP_EQUAL"; + case TOK_PIPE_EQUAL: return "TOK_PIPE_EQUAL"; + + case TOK_DOUBLE_EQUAL: return "TOK_DOUBLE_EQUAL"; + case TOK_EXCLAM_EQUAL: return "TOK_EXCLAM_EQUAL"; + case TOK_GTE: return "TOK_GTE"; + case TOK_LTE: return "TOK_LTE"; + + case TOK_DOUBLE_AMP: return "TOK_DOUBLE_AMP"; + case TOK_DOUBLE_PIPE: return "TOK_DOUBLE_PIPE"; + case TOK_DOUBLE_LT: return "TOK_DOUBLE_LT"; + case TOK_DOUBLE_GT: return "TOK_DOUBLE_GT"; + + case TOK_QMARK: return "TOK_QMARK"; + case TOK_AT: return "TOK_AT"; + case TOK_TILDE: return "TOK_TILDE"; + case TOK_BACKSLASH: return "TOK_BACKSLASH"; + case TOK_CARET: return "TOK_CARET"; + case TOK_BACKTICK: return "TOK_BACKTICK"; + + // Reserved Words + case TOK_RWORD_PUB: return "TOK_RWORD_PUB"; + case TOK_RWORD_PRIV: return "TOK_RWORD_PRIV"; + case TOK_RWORD_MUT: return "TOK_RWORD_MUT"; + case TOK_RWORD_CONST: return "TOK_RWORD_CONST"; + case TOK_RWORD_STATIC: return "TOK_RWORD_STATIC"; + case TOK_RWORD_UNSAFE: return "TOK_RWORD_UNSAFE"; + case TOK_RWORD_EXTERN: return "TOK_RWORD_EXTERN"; + + case TOK_RWORD_CRATE: return "TOK_RWORD_CRATE"; + case TOK_RWORD_MOD: return "TOK_RWORD_MOD"; + case TOK_RWORD_STRUCT: return "TOK_RWORD_STRUCT"; + case TOK_RWORD_ENUM: return "TOK_RWORD_ENUM"; + case TOK_RWORD_TRAIT: return "TOK_RWORD_TRAIT"; + case TOK_RWORD_FN: return "TOK_RWORD_FN"; + case TOK_RWORD_USE: return "TOK_RWORD_USE"; + case TOK_RWORD_IMPL: return "TOK_RWORD_IMPL"; + case TOK_RWORD_TYPE: return "TOK_RWORD_TYPE"; + + case TOK_RWORD_WHERE: return "TOK_RWORD_WHERE"; + case TOK_RWORD_AS: return "TOK_RWORD_AS"; + + case TOK_RWORD_LET: return "TOK_RWORD_LET"; + case TOK_RWORD_MATCH: return "TOK_RWORD_MATCH"; + case TOK_RWORD_IF: return "TOK_RWORD_IF"; + case TOK_RWORD_ELSE: return "TOK_RWORD_ELSE"; + case TOK_RWORD_LOOP: return "TOK_RWORD_LOOP"; + case TOK_RWORD_WHILE: return "TOK_RWORD_WHILE"; + case TOK_RWORD_FOR: return "TOK_RWORD_FOR"; + case TOK_RWORD_IN: return "TOK_RWORD_IN"; + case TOK_RWORD_DO: return "TOK_RWORD_DO"; + + case TOK_RWORD_CONTINUE: return "TOK_RWORD_CONTINUE"; + case TOK_RWORD_BREAK: return "TOK_RWORD_BREAK"; + case TOK_RWORD_RETURN: return "TOK_RWORD_RETURN"; + case TOK_RWORD_YIELD: return "TOK_RWORD_YIELD"; + case TOK_RWORD_BOX: return "TOK_RWORD_BOX"; + case TOK_RWORD_REF: return "TOK_RWORD_REF"; + + case TOK_RWORD_FALSE: return "TOK_RWORD_FALSE"; + case TOK_RWORD_TRUE: return "TOK_RWORD_TRUE"; + case TOK_RWORD_SELF: return "TOK_RWORD_SELF"; + case TOK_RWORD_SUPER: return "TOK_RWORD_SUPER"; + + case TOK_RWORD_PROC: return "TOK_RWORD_PROC"; + case TOK_RWORD_MOVE: return "TOK_RWORD_MOVE"; + case TOK_RWORD_ONCE: return "TOK_RWORD_ONCE"; + + case TOK_RWORD_ABSTRACT: return "TOK_RWORD_ABSTRACT"; + case TOK_RWORD_FINAL: return "TOK_RWORD_FINAL"; + case TOK_RWORD_PURE: return "TOK_RWORD_PURE"; + case TOK_RWORD_OVERRIDE: return "TOK_RWORD_OVERRIDE"; + case TOK_RWORD_VIRTUAL: return "TOK_RWORD_VIRTUAL"; + + case TOK_RWORD_ALIGNOF: return "TOK_RWORD_ALIGNOF"; + case TOK_RWORD_OFFSETOF: return "TOK_RWORD_OFFSETOF"; + case TOK_RWORD_SIZEOF: return "TOK_RWORD_SIZEOF"; + case TOK_RWORD_TYPEOF: return "TOK_RWORD_TYPEOF"; + + case TOK_RWORD_BE: return "TOK_RWORD_BE"; + case TOK_RWORD_UNSIZED: return "TOK_RWORD_UNSIZED"; + } + return ">>BUGCHECK: BADTOK<<"; +} + +::std::ostream& operator<<(::std::ostream& os, Token& tok) +{ + os << Token::typestr(tok.type()) << "\"" << tok.str() << "\""; + return os; +} + +TTStream::TTStream(const TokenTree& input_tt): + m_input_tt(input_tt) +{ + m_stack.push_back( ::std::make_pair(0, &input_tt) ); +} +TTStream::~TTStream() +{ +} +Token TTStream::realGetToken() +{ + while(m_stack.size() > 0) + { + // If current index is above TT size, go up + unsigned int& idx = m_stack.back().first; + const TokenTree& tree = *m_stack.back().second; + + if(idx == 0 && tree.size() == 0) { + idx ++; + return tree.tok(); + } + + if(idx < tree.size()) + { + const TokenTree& subtree = tree[idx]; + idx ++; + if( subtree.size() == 0 ) { + return subtree.tok(); + } + else { + m_stack.push_back( ::std::make_pair(0, &subtree ) ); + } + } + else { + m_stack.pop_back(); + } + } + return Token(TOK_EOF); +} + +TokenStream::TokenStream(): + m_cache_valid(false) +{ +} +TokenStream::~TokenStream() +{ +} + +Token TokenStream::getToken() +{ + if( m_cache_valid ) + { + m_cache_valid = false; + return m_cache; + } + else + { + Token ret = this->realGetToken(); + ::std::cout << "getToken[" << typeid(*this).name() << "] - " << ret << ::std::endl; + return ret; + } +} +void TokenStream::putback(Token tok) +{ + m_cache_valid = true; + m_cache = tok; +} diff --git a/src/parse/lex.hpp b/src/parse/lex.hpp new file mode 100644 index 00000000..dbf365a0 --- /dev/null +++ b/src/parse/lex.hpp @@ -0,0 +1,239 @@ +#ifndef LEX_HPP_INCLUDED +#define LEX_HPP_INCLUDED + +#include "../types.hpp" +#include +#include + +enum eTokenType +{ + TOK_NULL, + TOK_EOF, + + TOK_WHITESPACE, + TOK_COMMENT, + + // Value tokens + TOK_IDENT, + TOK_MACRO, + TOK_LIFETIME, + TOK_INTEGER, + TOK_CHAR, + TOK_FLOAT, + + TOK_CATTR_OPEN, + TOK_ATTR_OPEN, + + // Symbols + TOK_PAREN_OPEN, TOK_PAREN_CLOSE, + TOK_BRACE_OPEN, TOK_BRACE_CLOSE, + TOK_LT, TOK_GT, + TOK_SQUARE_OPEN,TOK_SQUARE_CLOSE, + TOK_COMMA, + TOK_SEMICOLON, + TOK_COLON, + TOK_DOUBLE_COLON, + TOK_STAR, TOK_AMP, + TOK_PIPE, + + TOK_FATARROW, // => + TOK_THINARROW, // -> + + TOK_PLUS, TOK_DASH, + TOK_EXCLAM, + TOK_PERCENT, + TOK_SLASH, + + TOK_DOT, + TOK_DOUBLE_DOT, + TOK_TRIPLE_DOT, + + TOK_EQUAL, + TOK_PLUS_EQUAL, + TOK_DASH_EQUAL, + TOK_PERCENT_EQUAL, + TOK_SLASH_EQUAL, + TOK_STAR_EQUAL, + TOK_AMP_EQUAL, + TOK_PIPE_EQUAL, + + TOK_DOUBLE_EQUAL, + TOK_EXCLAM_EQUAL, + TOK_GTE, + TOK_LTE, + + TOK_DOUBLE_AMP, + TOK_DOUBLE_PIPE, + TOK_DOUBLE_LT, + TOK_DOUBLE_GT, + + TOK_QMARK, + TOK_AT, + TOK_TILDE, + TOK_BACKSLASH, + TOK_CARET, + TOK_BACKTICK, + + // Reserved Words + TOK_RWORD_PUB, + TOK_RWORD_PRIV, + TOK_RWORD_MUT, + TOK_RWORD_CONST, + TOK_RWORD_STATIC, + TOK_RWORD_UNSAFE, + TOK_RWORD_EXTERN, + + TOK_RWORD_CRATE, + TOK_RWORD_MOD, + TOK_RWORD_STRUCT, + TOK_RWORD_ENUM, + TOK_RWORD_TRAIT, + TOK_RWORD_FN, + TOK_RWORD_USE, + TOK_RWORD_IMPL, + TOK_RWORD_TYPE, + + TOK_RWORD_WHERE, + TOK_RWORD_AS, + + TOK_RWORD_LET, + TOK_RWORD_MATCH, + TOK_RWORD_IF, + TOK_RWORD_ELSE, + TOK_RWORD_LOOP, + TOK_RWORD_WHILE, + TOK_RWORD_FOR, + TOK_RWORD_IN, + TOK_RWORD_DO, + + TOK_RWORD_CONTINUE, + TOK_RWORD_BREAK, + TOK_RWORD_RETURN, + TOK_RWORD_YIELD, + TOK_RWORD_BOX, + TOK_RWORD_REF, + + TOK_RWORD_FALSE, + TOK_RWORD_TRUE, + TOK_RWORD_SELF, + TOK_RWORD_SUPER, + + TOK_RWORD_PROC, + TOK_RWORD_MOVE, + TOK_RWORD_ONCE, + + TOK_RWORD_ABSTRACT, + TOK_RWORD_FINAL, + TOK_RWORD_PURE, + TOK_RWORD_OVERRIDE, + TOK_RWORD_VIRTUAL, + + TOK_RWORD_ALIGNOF, + TOK_RWORD_OFFSETOF, + TOK_RWORD_SIZEOF, + TOK_RWORD_TYPEOF, + + TOK_RWORD_BE, + TOK_RWORD_UNSIZED, +}; + +class Token +{ + enum eTokenType m_type; + ::std::string m_str; + enum eCoreType m_datatype; + union { + uint64_t m_intval; + double m_floatval; + }; +public: + Token(); + Token(enum eTokenType type); + Token(enum eTokenType type, ::std::string str); + Token(uint64_t val, enum eCoreType datatype); + Token(double val, enum eCoreType datatype); + + enum eTokenType type() const { return m_type; } + const ::std::string& str() const { return m_str; } + enum eCoreType datatype() const { return m_datatype; } + uint64_t intval() const { return m_intval; } + double floatval() const { return m_floatval; } + + static const char* typestr(enum eTokenType type); +}; + +extern ::std::ostream& operator<<(::std::ostream& os, Token& tok); + +class TokenStream +{ + bool m_cache_valid; + Token m_cache; +public: + TokenStream(); + virtual ~TokenStream(); + Token getToken(); + void putback(Token tok); +protected: + virtual Token realGetToken() = 0; +}; + +class Lexer +{ + ::std::ifstream m_istream; + bool m_last_char_valid; + char m_last_char; +public: + Lexer(::std::string filename); + + Token getToken(); + +private: + signed int getSymbol(); + uint32_t parseEscape(char enclosing); + + char getc(); + void putback(); + + class EndOfFile {}; +}; + +class TokenTree +{ + Token m_tok; + ::std::vector m_subtrees; +public: + TokenTree() {} + TokenTree(Token tok): + m_tok(tok) + { + } + TokenTree(::std::vector subtrees): + m_subtrees(subtrees) + { + } + + const unsigned int size() const { + return m_subtrees.size(); + } + const TokenTree& operator[](unsigned int idx) const { + return m_subtrees[idx]; + } + const Token& tok() const { + return m_tok; + } +}; + +class TTStream: + public TokenStream +{ + const TokenTree& m_input_tt; + ::std::vector< ::std::pair > m_stack; +public: + TTStream(const TokenTree& input_tt); + ~TTStream(); + +protected: + virtual Token realGetToken(); +}; + +#endif // LEX_HPP_INCLUDED diff --git a/src/parse/parseerror.cpp b/src/parse/parseerror.cpp new file mode 100644 index 00000000..37beb863 --- /dev/null +++ b/src/parse/parseerror.cpp @@ -0,0 +1,51 @@ +/* + */ +#include "parseerror.hpp" +#include + +ParseError::Base::~Base() throw() +{ +} + +ParseError::Generic::Generic(::std::string message): + m_message(message) +{ + ::std::cout << "Generic(" << message << ")" << ::std::endl; +} + +ParseError::BugCheck::BugCheck(::std::string message): + m_message(message) +{ + ::std::cout << "BugCheck(" << message << ")" << ::std::endl; +} + +ParseError::Todo::Todo(::std::string message): + m_message(message) +{ + ::std::cout << "Todo(" << message << ")" << ::std::endl; +} +ParseError::Todo::~Todo() throw() +{ +} + +ParseError::BadChar::BadChar(char character): + m_char(character) +{ + ::std::cout << "BadChar(" << character << ")" << ::std::endl; +} +ParseError::BadChar::~BadChar() throw() +{ +} + +ParseError::Unexpected::Unexpected(Token tok): + m_tok(tok) +{ + ::std::cout << "Unexpected(" << tok << ")" << ::std::endl; +} +ParseError::Unexpected::Unexpected(Token tok, Token exp) +{ + ::std::cout << "Unexpected(" << tok << ", " << exp << ")" << ::std::endl; +} +ParseError::Unexpected::~Unexpected() throw() +{ +} diff --git a/src/parse/parseerror.hpp b/src/parse/parseerror.hpp new file mode 100644 index 00000000..11324476 --- /dev/null +++ b/src/parse/parseerror.hpp @@ -0,0 +1,67 @@ +#ifndef PARSEERROR_HPP_INCLUDED +#define PARSEERROR_HPP_INCLUDED + +#include +#include "lex.hpp" + +namespace ParseError { + +class Base: + public ::std::exception +{ +public: + virtual ~Base() throw(); +}; + +class Generic: + public Base +{ + ::std::string m_message; +public: + Generic(::std::string message); + virtual ~Generic() throw () {} +}; + +class BugCheck: + public Base +{ + ::std::string m_message; +public: + BugCheck(::std::string message); + virtual ~BugCheck() throw () {} +}; + +class Todo: + public Base +{ + ::std::string m_message; +public: + Todo(::std::string message); + virtual ~Todo() throw (); + +}; + +class BadChar: + public Base +{ + char m_char; +public: + BadChar(char character); + virtual ~BadChar() throw (); + +}; + +class Unexpected: + public Base +{ + Token m_tok; +public: + Unexpected(Token tok); + Unexpected(Token tok, Token exp); + virtual ~Unexpected() throw (); + +}; + +} + +#endif // PARSEERROR_HPP_INCLUDED diff --git a/src/parse/preproc.cpp b/src/parse/preproc.cpp new file mode 100644 index 00000000..3e2865b2 --- /dev/null +++ b/src/parse/preproc.cpp @@ -0,0 +1,36 @@ +#include "preproc.hpp" +#include + +Preproc::Preproc(::std::string path): + m_lex(path) +{ + //ctor +} + +Preproc::~Preproc() +{ + //dtor +} + +Token Preproc::getTokenInt() +{ + while(true) + { + Token tok = m_lex.getToken(); + //::std::cout << "getTokenInt: tok = " << tok << ::std::endl; + switch(tok.type()) + { + case TOK_WHITESPACE: + continue; + case TOK_COMMENT: + continue; + default: + return tok; + } + } +} + +Token Preproc::realGetToken() +{ + return getTokenInt(); +} diff --git a/src/parse/preproc.hpp b/src/parse/preproc.hpp new file mode 100644 index 00000000..bedb3076 --- /dev/null +++ b/src/parse/preproc.hpp @@ -0,0 +1,20 @@ +#ifndef PREPROC_H +#define PREPROC_H + +#include "lex.hpp" + +class Preproc: + public TokenStream +{ + Lexer m_lex; + +public: + Preproc(::std::string path); + ~Preproc(); + + virtual Token realGetToken(); +private: + Token getTokenInt(); +}; + +#endif // PREPROC_H diff --git a/src/parse/root.cpp b/src/parse/root.cpp new file mode 100644 index 00000000..c9fc0401 --- /dev/null +++ b/src/parse/root.cpp @@ -0,0 +1,609 @@ +/* + */ +#include "preproc.hpp" +#include "../ast/ast.hpp" +#include "parseerror.hpp" +#include "common.hpp" +#include + +unsigned int TraceLog::depth = 0; + +::std::vector Parse_Path_GenericList(TokenStream& lex) +{ + TRACE_FUNCTION; + + ::std::vector types; + Token tok; + do { + types.push_back( Parse_Type(lex) ); + } while( GET_TOK(tok, lex) == TOK_COMMA ); + // HACK: Split >> into > + if(tok.type() == TOK_DOUBLE_GT) { + lex.putback(Token(TOK_GT)); + } + else { + CHECK_TOK(tok, TOK_GT); + } + return types; +} + +AST::Path Parse_PathFrom(TokenStream& lex, AST::Path path, eParsePathGenericMode generic_mode) +{ + TRACE_FUNCTION; + + Token tok; + + tok = lex.getToken(); + while(true) + { + ::std::vector params; + + CHECK_TOK(tok, TOK_IDENT); + ::std::string component = tok.str(); + + tok = lex.getToken(); + if(generic_mode == PATH_GENERIC_TYPE && tok.type() == TOK_LT) + { + // Type-mode generics "::path::to::Type" + params = Parse_Path_GenericList(lex); + tok = lex.getToken(); + } + if( tok.type() != TOK_DOUBLE_COLON ) { + path.append( AST::PathNode(component, params) ); + break; + } + tok = lex.getToken(); + if( generic_mode == PATH_GENERIC_EXPR && tok.type() == TOK_LT ) + { + // Expr-mode generics "::path::to::function::(arg1, arg2)" + params = Parse_Path_GenericList(lex); + tok = lex.getToken(); + if( tok.type() != TOK_DOUBLE_COLON ) { + path.append( AST::PathNode(component, params) ); + break; + } + } + path.append( AST::PathNode(component, params) ); + } + lex.putback(tok); + return path; +} + +AST::Path Parse_Path(TokenStream& lex, bool is_abs, eParsePathGenericMode generic_mode) +{ + if( is_abs ) + return Parse_PathFrom(lex, AST::Path(AST::Path::TagAbsolute()), generic_mode); + else + return Parse_PathFrom(lex, AST::Path(), generic_mode); +} + +static const struct { + const char* name; + enum eCoreType type; +} CORETYPES[] = { + {"char", CORETYPE_CHAR}, + {"f32", CORETYPE_F32}, + {"f64", CORETYPE_F64}, + {"i16", CORETYPE_I16}, + {"i32", CORETYPE_I32}, + {"i64", CORETYPE_I64}, + {"i8", CORETYPE_I8}, + {"int", CORETYPE_INT}, + {"u16", CORETYPE_U16}, + {"u32", CORETYPE_U32}, + {"u64", CORETYPE_U64}, + {"u8", CORETYPE_U8}, + {"uint", CORETYPE_UINT}, +}; + +TypeRef Parse_Type(TokenStream& lex) +{ + TRACE_FUNCTION; + + Token tok = lex.getToken(); + switch(tok.type()) + { + case TOK_IDENT: + // Either a path (with generics) + if( tok.str() == "_" ) + return TypeRef(); + for(unsigned int i = 0; i < sizeof(CORETYPES)/sizeof(CORETYPES[0]); i ++) + { + if( tok.str() < CORETYPES[i].name ) + break; + if( tok.str() == CORETYPES[i].name ) + return TypeRef(TypeRef::TagPrimitive(), CORETYPES[i].type); + } + // or a primitive + lex.putback(tok); + return TypeRef(TypeRef::TagPath(), Parse_Path(lex, false, PATH_GENERIC_TYPE)); // relative path + case TOK_DOUBLE_COLON: + // Path with generics + return TypeRef(TypeRef::TagPath(), Parse_Path(lex, true, PATH_GENERIC_TYPE)); + case TOK_AMP: + // Reference + tok = lex.getToken(); + if( tok.type() == TOK_RWORD_MUT ) { + // Mutable reference + return TypeRef(TypeRef::TagReference(), true, Parse_Type(lex)); + } + else { + lex.putback(tok); + // Immutable reference + return TypeRef(TypeRef::TagReference(), false, Parse_Type(lex)); + } + throw ParseError::BugCheck("Reached end of Parse_Type:AMP"); + case TOK_STAR: + // Pointer + switch( GET_TOK(tok, lex) ) + { + case TOK_RWORD_MUT: + // Mutable pointer + return TypeRef(TypeRef::TagPointer(), true, Parse_Type(lex)); + case TOK_RWORD_CONST: + // Immutable pointer + return TypeRef(TypeRef::TagPointer(), false, Parse_Type(lex)); + default: + throw ParseError::Unexpected(tok, Token(TOK_RWORD_CONST)); + } + throw ParseError::BugCheck("Reached end of Parse_Type:STAR"); + case TOK_SQUARE_OPEN: { + // Array + TypeRef inner = Parse_Type(lex); + tok = lex.getToken(); + if( tok.type() == TOK_COMMA ) { + // Sized array + GET_CHECK_TOK(tok, lex, TOK_DOUBLE_DOT); + AST::Expr array_size = Parse_Expr(lex, true); + GET_CHECK_TOK(tok, lex, TOK_SQUARE_CLOSE); + return TypeRef(TypeRef::TagSizedArray(), inner, array_size); + } + else { + GET_CHECK_TOK(tok, lex, TOK_SQUARE_CLOSE); + return TypeRef(TypeRef::TagUnsizedArray(), inner); + } + throw ParseError::BugCheck("Reached end of Parse_Type:SQUARE"); + } + case TOK_PAREN_OPEN: { + ::std::vector types; + if( (tok = lex.getToken()).type() == TOK_PAREN_CLOSE) + return TypeRef(TypeRef::TagTuple(), types); + do + { + TypeRef type = Parse_Type(lex); + types.push_back(type); + } while( (tok = lex.getToken()).type() == TOK_COMMA ); + GET_CHECK_TOK(tok, lex, TOK_PAREN_CLOSE); + return TypeRef(TypeRef::TagTuple(), types); } + case TOK_EXCLAM: + throw ParseError::Todo("noreturn type"); + default: + throw ParseError::Unexpected(tok); + } + throw ParseError::BugCheck("Reached end of Parse_Type"); +} + +AST::TypeParams Parse_TypeParams(TokenStream& lex) +{ + TRACE_FUNCTION; + + AST::TypeParams ret; + Token tok; + do { + bool is_lifetime = false; + tok = lex.getToken(); + switch(tok.type()) + { + case TOK_IDENT: + break; + case TOK_LIFETIME: + is_lifetime = true; + break; + default: + // Oopsie! + throw ParseError::Unexpected(tok); + } + AST::TypeParam param( is_lifetime, tok.str() ); + tok = lex.getToken(); + if( tok.type() == TOK_COLON ) + { + // TODO: Conditions + if( is_lifetime ) + { + throw ParseError::Todo("lifetime param conditions"); + } + + do { + tok = lex.getToken(); + if(tok.type() == TOK_LIFETIME) + param.addLifetimeBound(tok.str()); + else { + lex.putback(tok); + param.addTypeBound(Parse_Type(lex)); + } + tok = lex.getToken(); + } while(tok.type() == TOK_PLUS); + } + ret.push_back(param); + } while( tok.type() == TOK_COMMA ); + lex.putback(tok); + return ret; +} + +void Parse_TypeConds(TokenStream& lex, AST::TypeParams& params) +{ + TRACE_FUNCTION; + throw ParseError::Todo("type param conditions (where)"); +} + +/// Parse a function definition (after the 'fn') +AST::Function Parse_FunctionDef(TokenStream& lex) +{ + TRACE_FUNCTION; + + Token tok; + + // Name + GET_CHECK_TOK(tok, lex, TOK_IDENT); + ::std::string name = tok.str(); + + // Parameters + AST::TypeParams params; + if( GET_TOK(tok, lex) == TOK_LT ) + { + params = Parse_TypeParams(lex); + GET_CHECK_TOK(tok, lex, TOK_GT); + + //if(GET_TOK(tok, lex) == TOK_RWORD_WHERE) + //{ + // Parse_TypeConds(lex, params); + // tok = lex.getToken(); + //} + } + else { + lex.putback(tok); + } + + AST::Function::Class fcn_class = AST::Function::CLASS_UNBOUND; + GET_CHECK_TOK(tok, lex, TOK_PAREN_OPEN); + GET_TOK(tok, lex); + if( tok.type() == TOK_AMP ) + { + // By-reference method + if( GET_TOK(tok, lex) == TOK_LIFETIME ) + { + throw ParseError::Todo("Lifetimes on self in methods"); + } + if( tok.type() == TOK_RWORD_MUT ) + { + GET_CHECK_TOK(tok, lex, TOK_RWORD_SELF); + fcn_class = AST::Function::CLASS_MUTMETHOD; + } + else + { + CHECK_TOK(tok, TOK_RWORD_SELF); + fcn_class = AST::Function::CLASS_REFMETHOD; + } + GET_TOK(tok, lex); + } + else if( tok.type() == TOK_RWORD_SELF ) + { + // By-value method + fcn_class = AST::Function::CLASS_VALMETHOD; + GET_TOK(tok, lex); + throw ParseError::Todo("By-value methods"); + } + else + { + // Unbound method + } + ::std::vector args; + if( tok.type() != TOK_PAREN_CLOSE ) + { + lex.putback(tok); + // Argument list + do { + GET_CHECK_TOK(tok, lex, TOK_IDENT); + ::std::string name = tok.str(); + GET_CHECK_TOK(tok, lex, TOK_COLON); + TypeRef type = Parse_Type(lex); + args.push_back( ::std::make_pair(name, type) ); + tok = lex.getToken(); + } while( tok.type() == TOK_COMMA ); + CHECK_TOK(tok, TOK_PAREN_CLOSE); + } + else { + // Eat 'tok', negative comparison + } + + TypeRef ret_type; + if( GET_TOK(tok, lex) == TOK_THINARROW ) + { + // Return type + ret_type = Parse_Type(lex); + } + else + { + lex.putback(tok); + } + + AST::Expr code = Parse_ExprBlock(lex); + + return AST::Function(name, params, fcn_class, ret_type, args, code); +} + +void Parse_Struct(AST::Module& mod, TokenStream& lex, const bool is_public, const ::std::vector meta_items) +{ + Token tok; + + GET_CHECK_TOK(tok, lex, TOK_IDENT); + ::std::string name = tok.str(); + tok = lex.getToken(); + AST::TypeParams params; + if( tok.type() == TOK_LT ) + { + params = Parse_TypeParams(lex); + GET_CHECK_TOK(tok, lex, TOK_GT); + tok = lex.getToken(); + if(tok.type() == TOK_RWORD_WHERE) + { + Parse_TypeConds(lex, params); + tok = lex.getToken(); + } + } + if(tok.type() == TOK_PAREN_OPEN) + { + TypeRef inner = Parse_Type(lex); + tok = lex.getToken(); + if(tok.type() != TOK_PAREN_CLOSE) + { + ::std::vector refs; + refs.push_back(inner); + while( (tok = lex.getToken()).type() == TOK_COMMA ) + { + refs.push_back( Parse_Type(lex) ); + } + if( tok.type() != TOK_PAREN_CLOSE ) + throw ParseError::Unexpected(tok, Token(TOK_PAREN_CLOSE)); + inner = TypeRef(TypeRef::TagTuple(), refs); + } + throw ParseError::Todo("tuple struct"); + } + else if(tok.type() == TOK_SEMICOLON) + { + throw ParseError::Todo("unit-like struct"); + } + else if(tok.type() == TOK_BRACE_OPEN) + { + ::std::vector items; + while( (tok = lex.getToken()).type() != TOK_BRACE_CLOSE ) + { + CHECK_TOK(tok, TOK_IDENT); + ::std::string name = tok.str(); + GET_CHECK_TOK(tok, lex, TOK_COLON); + TypeRef type = Parse_Type(lex); + items.push_back( ::std::make_pair(name, type) ); + tok = lex.getToken(); + if(tok.type() == TOK_BRACE_CLOSE) + break; + if(tok.type() != TOK_COMMA) + throw ParseError::Unexpected(tok, Token(TOK_COMMA)); + } + mod.add_struct(is_public, name, params, items); + } + else + { + throw ParseError::Unexpected(tok); + } +} + +/// Parse a meta-item declaration (either #![ or #[) +AST::MetaItem Parse_MetaItem(TokenStream& lex) +{ + Token tok; + GET_CHECK_TOK(tok, lex, TOK_IDENT); + ::std::string name = tok.str(); + switch(GET_TOK(tok, lex)) + { + case TOK_EQUAL: + throw ParseError::Todo("Meta item key-value"); + case TOK_PAREN_OPEN: { + ::std::vector items; + do { + items.push_back(Parse_MetaItem(lex)); + } while(GET_TOK(tok, lex) == TOK_COMMA); + CHECK_TOK(tok, TOK_PAREN_CLOSE); + return AST::MetaItem(name, items); } + default: + lex.putback(tok); + return AST::MetaItem(name); + } +} + +AST::Impl Parse_Impl(TokenStream& lex) +{ + Token tok; + + AST::TypeParams params; + // 1. (optional) type parameters + if( GET_TOK(tok, lex) == TOK_LT ) + { + params = Parse_TypeParams(lex); + GET_CHECK_TOK(tok, lex, TOK_GT); + } + else { + lex.putback(tok); + } + // 2. Either a trait name (with type params), or the type to impl + // - Don't care which at this stage + TypeRef trait_type; + TypeRef impl_type = Parse_Type(lex); + if( GET_TOK(tok, lex) == TOK_RWORD_FOR ) + { + // Implementing a trait for another type, get the target type + trait_type = impl_type; + impl_type = Parse_Type(lex); + } + else { + lex.putback(tok); + } + // Where clause + if( GET_TOK(tok, lex) == TOK_RWORD_WHERE ) + { + Parse_TypeConds(lex, params); + } + else { + lex.putback(tok); + } + GET_CHECK_TOK(tok, lex, TOK_BRACE_OPEN); + + AST::Impl impl(impl_type, trait_type); + + // A sequence of method implementations + while( GET_TOK(tok, lex) != TOK_BRACE_CLOSE ) + { + bool is_public = false; + if(tok.type() == TOK_RWORD_PUB) { + is_public = true; + GET_TOK(tok, lex); + } + switch(tok.type()) + { + case TOK_RWORD_FN: + impl.add_function(is_public, Parse_FunctionDef(lex)); + break; + + default: + throw ParseError::Unexpected(tok); + } + } + + return impl; +} + +AST::Module Parse_ModRoot(const ::std::string& path, Preproc& lex) +{ + Token tok; + + AST::Module mod; + + // Attributes on module/crate (will continue loop) + while( GET_TOK(tok, lex) == TOK_CATTR_OPEN ) + { + AST::MetaItem item = Parse_MetaItem(lex); + GET_CHECK_TOK(tok, lex, TOK_SQUARE_CLOSE); + + throw ParseError::Todo("Parent attrs"); + //mod_attrs.push_back( item ); + } + lex.putback(tok); + + // TODO: Handle known parent attribs if operating on crate root + + for(;;) + { + // Check 1 - End of module (either via a closing brace, or EOF) + switch(GET_TOK(tok, lex)) + { + case TOK_BRACE_CLOSE: + if( path.size() > 0 ) + throw ParseError::Unexpected(tok); + return mod; + case TOK_EOF: + if( path.size() == 0 ) + throw ParseError::Unexpected(tok); + return mod; + default: + lex.putback(tok); + break; + } + + // Attributes on the following item + ::std::vector meta_items; + while( GET_TOK(tok, lex) == TOK_ATTR_OPEN ) + { + meta_items.push_back( Parse_MetaItem(lex) ); + GET_CHECK_TOK(tok, lex, TOK_SQUARE_CLOSE); + } + lex.putback(tok); + + // Module visibility + bool is_public = false; + if( GET_TOK(tok, lex) == TOK_RWORD_PUB ) + { + is_public = true; + } + else + { + lex.putback(tok); + } + + // The actual item! + switch( GET_TOK(tok, lex) ) + { + case TOK_RWORD_USE: + // TODO: Do manual path parsing here, as use has its own special set of quirks + mod.add_alias( is_public, Parse_Path(lex, true, PATH_GENERIC_NONE) ); + GET_CHECK_TOK(tok, lex, TOK_SEMICOLON); + break; + + case TOK_RWORD_CONST: { + GET_CHECK_TOK(tok, lex, TOK_IDENT); + ::std::string name = tok.str(); + + GET_CHECK_TOK(tok, lex, TOK_COLON); + TypeRef type = Parse_Type(lex); + GET_CHECK_TOK(tok, lex, TOK_EQUAL); + AST::Expr val = Parse_Expr(lex, true); + GET_CHECK_TOK(tok, lex, TOK_SEMICOLON); + mod.add_constant(is_public, name, type, val); + break; } + case TOK_RWORD_STATIC: { + tok = lex.getToken(); + bool is_mut = false; + if(tok.type() == TOK_RWORD_MUT) { + is_mut = true; + tok = lex.getToken(); + } + CHECK_TOK(tok, TOK_IDENT); + ::std::string name = tok.str(); + + GET_CHECK_TOK(tok, lex, TOK_COLON); + TypeRef type = Parse_Type(lex); + + GET_CHECK_TOK(tok, lex, TOK_EQUAL); + + AST::Expr val = Parse_Expr(lex, true); + + GET_CHECK_TOK(tok, lex, TOK_SEMICOLON); + mod.add_global(is_public, is_mut, name, type, val); + break; } + + case TOK_RWORD_FN: + mod.add_function(is_public, Parse_FunctionDef(lex)); + break; + case TOK_RWORD_STRUCT: + Parse_Struct(mod, lex, is_public, meta_items); + break; + case TOK_RWORD_ENUM: + throw ParseError::Todo("modroot enum"); + case TOK_RWORD_IMPL: + mod.add_impl(Parse_Impl(lex)); + break; + case TOK_RWORD_TRAIT: + throw ParseError::Todo("modroot trait"); + + case TOK_RWORD_MOD: + throw ParseError::Todo("sub-modules"); + + default: + throw ParseError::Unexpected(tok); + } + } +} + +AST::Crate Parse_Crate(::std::string mainfile) +{ + Preproc lex(mainfile); + return AST::Crate( Parse_ModRoot(mainfile, lex) ); +} diff --git a/src/parse/tokentree.hpp b/src/parse/tokentree.hpp new file mode 100644 index 00000000..4105897d --- /dev/null +++ b/src/parse/tokentree.hpp @@ -0,0 +1,11 @@ +#ifndef TOKENTREE_HPP_INCLUDED +#define TOKENTREE_HPP_INCLUDED + +#include "lex.hpp" + + +extern TokenTree Parse_TT(TokenStream& lex); +extern TokenTree Parse_TT_Expr(TokenStream& lex); +extern TokenTree Parse_TT_Stmt(TokenStream& lex); + +#endif // TOKENTREE_HPP_INCLUDED diff --git a/src/types.cpp b/src/types.cpp new file mode 100644 index 00000000..4bfb448f --- /dev/null +++ b/src/types.cpp @@ -0,0 +1,8 @@ +/* + */ +#include "types.hpp" +#include "ast/ast.hpp" + +TypeRef::TypeRef(TypeRef::TagSizedArray, TypeRef inner, AST::Expr size_expr) +{ +} diff --git a/src/types.hpp b/src/types.hpp new file mode 100644 index 00000000..18972143 --- /dev/null +++ b/src/types.hpp @@ -0,0 +1,37 @@ +#ifndef TYPES_HPP_INCLUDED +#define TYPES_HPP_INCLUDED + +#include +#include "coretypes.hpp" +#include "ast/path.hpp" + +namespace AST { +class Expr; +} + +class TypeRef +{ +public: + TypeRef() {} + + struct TagUnit {}; // unit maps to a zero-length tuple, just easier to type + TypeRef(TagUnit) {} + + struct TagPrimitive {}; + TypeRef(TagPrimitive, enum eCoreType type) {} + struct TagTuple {}; + TypeRef(TagTuple _, ::std::vector inner_types) {} + struct TagReference {}; + TypeRef(TagReference _, bool is_mut, TypeRef inner_type) {} + struct TagPointer {}; + TypeRef(TagPointer _, bool is_mut, TypeRef inner_type) {} + struct TagSizedArray {}; + TypeRef(TagSizedArray _, TypeRef inner_type, AST::Expr size); + struct TagUnsizedArray {}; + TypeRef(TagUnsizedArray _, TypeRef inner_type) {} + + struct TagPath {}; + TypeRef(TagPath, AST::Path path) {} +}; + +#endif // TYPES_HPP_INCLUDED -- cgit v1.2.3