diff options
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | src/ast/pattern.hpp | 10 | ||||
-rw-r--r-- | src/include/debug.hpp | 7 | ||||
-rw-r--r-- | src/parse/common.hpp | 5 | ||||
-rw-r--r-- | src/parse/expr.cpp | 410 | ||||
-rw-r--r-- | src/parse/lex.cpp | 88 | ||||
-rw-r--r-- | src/parse/lex.hpp | 13 | ||||
-rw-r--r-- | src/parse/preproc.cpp | 51 | ||||
-rw-r--r-- | src/parse/preproc.hpp | 23 | ||||
-rw-r--r-- | src/parse/root.cpp | 170 |
10 files changed, 489 insertions, 290 deletions
@@ -16,7 +16,7 @@ BIN := bin/mrustc$(EXESUF) OBJ := main.o macros.o types.o serialise.o OBJ += ast/ast.o ast/path.o ast/expr.o -OBJ += parse/parseerror.o parse/lex.o parse/preproc.o parse/root.o parse/expr.o +OBJ += parse/parseerror.o parse/lex.o parse/root.o parse/expr.o OBJ += dump_as_rust.o OBJ += convert/ast_iterate.o OBJ += convert/resolve.o convert/typecheck_bounds.o convert/typecheck_params.o convert/typecheck_expr.o diff --git a/src/ast/pattern.hpp b/src/ast/pattern.hpp index 5b03a8a2..7544d53c 100644 --- a/src/ast/pattern.hpp +++ b/src/ast/pattern.hpp @@ -5,6 +5,7 @@ #include <vector> #include <memory> #include <string> +#include <tagged_enum.hpp> namespace AST { @@ -30,6 +31,7 @@ private: ::std::string m_binding; Path m_path; unique_ptr<ExprNode> m_node; + unique_ptr<ExprNode> m_node2; // ONLY used for range values ::std::vector<Pattern> m_sub_patterns; public: Pattern(Pattern&& o) noexcept: @@ -80,11 +82,13 @@ public: {} struct TagValue {}; - Pattern(TagValue, unique_ptr<ExprNode> node): + Pattern(TagValue, unique_ptr<ExprNode> node, unique_ptr<ExprNode> node2 = 0): m_class(VALUE), - m_node( ::std::move(node) ) + m_node( ::std::move(node) ), + m_node2( ::std::move(node2) ) {} + struct TagReference {}; Pattern(TagReference, Pattern sub_pattern): m_class(REF), @@ -111,6 +115,8 @@ public: m_binding = name; } + ::std::unique_ptr<ExprNode> take_node() { assert(m_class == VALUE); m_class = ANY; return ::std::move(m_node); } + // Accessors const ::std::string& binding() const { return m_binding; } BindType type() const { return m_class; } diff --git a/src/include/debug.hpp b/src/include/debug.hpp index 11301d27..39af8ab0 100644 --- a/src/include/debug.hpp +++ b/src/include/debug.hpp @@ -34,8 +34,12 @@ class TraceLog { const char* m_tag; public: + TraceLog(const char* tag, ::std::string info): m_tag(tag) { + DEBUG(" >> " << m_tag << "(" << info << ")"); + INDENT(); + } TraceLog(const char* tag): m_tag(tag) { - DEBUG(">> " << m_tag); + DEBUG(" >> " << m_tag); INDENT(); } ~TraceLog() { @@ -44,5 +48,6 @@ public: } }; #define TRACE_FUNCTION TraceLog _tf_(__func__) +#define TRACE_FUNCTION_F(ss) TraceLog _tf_(__func__, FMT(ss)) diff --git a/src/parse/common.hpp b/src/parse/common.hpp index da7d767d..9659500c 100644 --- a/src/parse/common.hpp +++ b/src/parse/common.hpp @@ -25,10 +25,15 @@ enum eParsePathGenericMode PATH_GENERIC_TYPE
};
+extern AST::MetaItem Parse_MetaItem(TokenStream& lex);
extern AST::Path Parse_Path(TokenStream& lex, bool is_abs, eParsePathGenericMode generic_mode);
extern ::std::vector<TypeRef> Parse_Path_GenericList(TokenStream& lex);
extern TypeRef Parse_Type(TokenStream& lex);
+
extern void Parse_Use(TokenStream& lex, ::std::function<void(AST::Path, ::std::string)> fcn);
+extern void Parse_Struct(AST::Module& mod, TokenStream& lex, bool is_public, const AST::MetaItems meta_items);
+extern AST::Impl Parse_Impl(TokenStream& lex, bool is_unsafe=false);
+
extern AST::Function Parse_FunctionDef(TokenStream& lex, ::std::string abi, AST::MetaItems attrs, bool allow_self, bool can_be_prototype);
extern AST::Expr Parse_Expr(TokenStream& lex, bool const_only);
extern AST::Expr Parse_ExprBlock(TokenStream& lex);
diff --git a/src/parse/expr.cpp b/src/parse/expr.cpp index eadd8e56..601c7e69 100644 --- a/src/parse/expr.cpp +++ b/src/parse/expr.cpp @@ -1,6 +1,5 @@ /*
*/
-#include "preproc.hpp"
#include "parseerror.hpp"
#include "../ast/ast.hpp"
#include "common.hpp"
@@ -13,10 +12,12 @@ typedef ::std::unique_ptr<AST::ExprNode> ExprNodeP; using AST::ExprNode;
ExprNodeP Parse_ExprBlockNode(TokenStream& lex);
-ExprNodeP Parse_Stmt(TokenStream& lex, bool& opt_semicolon);
+ExprNodeP Parse_Stmt(TokenStream& lex);
ExprNodeP Parse_Expr0(TokenStream& lex);
ExprNodeP Parse_ExprBlocks(TokenStream& lex);
ExprNodeP Parse_IfStmt(TokenStream& lex);
+ExprNodeP Parse_WhileStmt(TokenStream& lex, ::std::string lifetime);
+ExprNodeP Parse_ForStmt(TokenStream& lex, ::std::string lifetime);
ExprNodeP Parse_Expr_Match(TokenStream& lex);
ExprNodeP Parse_Expr1(TokenStream& lex);
@@ -123,8 +124,34 @@ AST::Pattern Parse_Pattern(TokenStream& lex) pat.set_bind(bind_name, is_ref, is_mut);
return ::std::move(pat);
}
+
+AST::Pattern Parse_PatternReal(TokenStream& lex);
+AST::Pattern Parse_PatternReal1(TokenStream& lex);
+
AST::Pattern Parse_PatternReal(TokenStream& lex)
{
+ Token tok;
+ AST::Pattern ret = Parse_PatternReal1(lex);
+ if( GET_TOK(tok, lex) == TOK_TRIPLE_DOT )
+ {
+ if( ret.type() != AST::Pattern::VALUE )
+ throw ParseError::Generic(lex, "Using '...' with a non-value on left");
+ auto leftval = ret.take_node();
+ auto right_pat = Parse_PatternReal1(lex);
+ if( right_pat.type() != AST::Pattern::VALUE )
+ throw ParseError::Generic(lex, "Using '...' with a non-value on right");
+ auto rightval = right_pat.take_node();
+
+ return AST::Pattern(AST::Pattern::TagValue(), ::std::move(leftval), ::std::move(rightval));
+ }
+ else
+ {
+ lex.putback(tok);
+ return ret;
+ }
+}
+AST::Pattern Parse_PatternReal1(TokenStream& lex)
+{
TRACE_FUNCTION;
Token tok;
@@ -190,135 +217,287 @@ AST::Pattern Parse_PatternReal_Path(TokenStream& lex, AST::Path path) return child_pats;
}
+ExprNodeP Parse_ExprBlockNode(TokenStream& lex);
+ExprNodeP Parse_ExprBlockLine(TokenStream& lex, bool *expect_end);
+void Parse_ExternBlock(TokenStream& lex, AST::MetaItems attrs, ::std::vector< AST::Item<AST::Function> >& imports);
+
ExprNodeP Parse_ExprBlockNode(TokenStream& lex)
{
TRACE_FUNCTION;
Token tok;
::std::vector<ExprNodeP> nodes;
- GET_CHECK_TOK(tok, lex, TOK_BRACE_OPEN);
+ ::std::unique_ptr<AST::Module> local_mod;
+ GET_CHECK_TOK(tok, lex, TOK_BRACE_OPEN);
while( GET_TOK(tok, lex) != TOK_BRACE_CLOSE )
{
- lex.putback(tok);
- bool opt_semicolon = false;
- // NOTE: This semicolon handling is SHIT.
- nodes.push_back(Parse_Stmt(lex, opt_semicolon));
- if( GET_TOK(tok, lex) != TOK_BRACE_CLOSE )
+ AST::MetaItems item_attrs;
+ while( tok.type() == TOK_ATTR_OPEN )
{
- if( !opt_semicolon )
+ item_attrs.push_back( Parse_MetaItem(lex) );
+ GET_CHECK_TOK(tok, lex, TOK_SQUARE_CLOSE);
+ GET_TOK(tok, lex);
+ }
+
+ switch(tok.type())
+ {
+ // Items:
+ // - 'use'
+ case TOK_RWORD_USE:
+ if( !local_mod.get() ) local_mod.reset( new AST::Module("") );
+ Parse_Use(
+ lex,
+ [&local_mod](AST::Path p, std::string s) {
+ local_mod->imports().push_back( AST::Item<AST::Path>( ::std::move(s), ::std::move(p), false ) );
+ }
+ );
+ GET_CHECK_TOK(tok, lex, TOK_SEMICOLON);
+ break;
+ // 'extern' blocks
+ case TOK_RWORD_EXTERN:
+ if( !local_mod.get() ) local_mod.reset( new AST::Module("") );
+ Parse_ExternBlock(lex, ::std::move(item_attrs), local_mod->functions());
+ break;
+ // - 'const'
+ case TOK_RWORD_CONST:
+ if( !local_mod.get() ) local_mod.reset( new AST::Module("") );
{
- CHECK_TOK(tok, TOK_SEMICOLON);
+ GET_CHECK_TOK(tok, lex, TOK_IDENT);
+ ::std::string name = tok.str();
+ GET_CHECK_TOK(tok, lex, TOK_COLON);
+ TypeRef type = Parse_Type(lex);
+ GET_CHECK_TOK(tok, lex, TOK_EQUAL);
+ auto val = Parse_Expr1(lex);
+ GET_CHECK_TOK(tok, lex, TOK_SEMICOLON);
+
+ local_mod->statics().push_back( AST::Item<AST::Static>(
+ ::std::move(name),
+ AST::Static(AST::Static::CONST, ::std::move(type), ::std::move(val)),
+ false ) );
+ break;
}
- else
+ // - 'struct'
+ case TOK_RWORD_STRUCT:
+ if( !local_mod.get() ) local_mod.reset( new AST::Module("") );
+ Parse_Struct(*local_mod, lex, false, item_attrs);
+ break;
+ // - 'impl'
+ case TOK_RWORD_IMPL:
+ if( !local_mod.get() ) local_mod.reset( new AST::Module("") );
+ local_mod->add_impl(Parse_Impl(lex, false));
+ break;
+ default: {
+ lex.putback(tok);
+ bool expect_end = false;
+ nodes.push_back(Parse_ExprBlockLine(lex, &expect_end));
+ // Set to TRUE if there was no semicolon after a statement
+ if( expect_end )
+ {
+ if( GET_TOK(tok, lex) != TOK_BRACE_CLOSE )
+ {
+ throw ParseError::Unexpected(lex, tok, Token(TOK_BRACE_CLOSE));
+ }
lex.putback(tok);
- }
- else
- {
- goto pass_value;
+ }
+ break;
+ }
}
}
- nodes.push_back(nullptr);
-pass_value:
return NEWNODE( AST::ExprNode_Block, ::std::move(nodes) );
}
-ExprNodeP Parse_Stmt(TokenStream& lex, bool& opt_semicolon)
+/// Parse a single line from a block
+///
+/// Handles:
+/// - Block-level constructs (with lifetime annotations)
+/// - use/extern/const/let
+ExprNodeP Parse_ExprBlockLine(TokenStream& lex, bool *expect_end)
{
TRACE_FUNCTION;
- Token tok;
-
- ::std::string lifetime;
- if( GET_TOK(tok, lex) == TOK_LIFETIME ) {
- lifetime = tok.str();
- GET_CHECK_TOK(tok, lex, TOK_COLON);
- }
- else {
- lex.putback(tok);
- }
+ Token tok;
- // 1. Handle 'let'
- // 2. Handle new blocks
- // 3. Handle a sequence of expressions broken by ';'
- switch(GET_TOK(tok, lex))
+ if( GET_TOK(tok, lex) == TOK_LIFETIME )
{
- case TOK_BRACE_OPEN:
- lex.putback(tok);
- opt_semicolon = true;
- return Parse_ExprBlockNode(lex);
- case TOK_RWORD_USE: {
- opt_semicolon = false;
- ::std::vector< ::std::pair< ::std::string, AST::Path> > imports;
- Parse_Use(lex, [&imports](AST::Path p, std::string s) {
- imports.push_back( ::std::make_pair( ::std::move(s), ::std::move(p) ) );
- });
- return NEWNODE( AST::ExprNode_Import, ::std::move(imports) );
- }
- // 'extern' blocks
- case TOK_RWORD_EXTERN: {
- opt_semicolon = true;
- // - default ABI is "C"
- ::std::string abi = "C";
- if( GET_TOK(tok, lex) == TOK_STRING ) {
- abi = tok.str();
+ // Lifetimes can only precede loops... and blocks?
+ ::std::string lifetime = tok.str();
+ GET_CHECK_TOK(tok, lex, TOK_COLON);
+
+ switch( GET_TOK(tok, lex) )
+ {
+ case TOK_RWORD_LOOP:
+ return NEWNODE( AST::ExprNode_Loop, lifetime, Parse_ExprBlockNode(lex) );
+ case TOK_RWORD_WHILE:
+ return Parse_WhileStmt(lex, lifetime);
+ case TOK_RWORD_FOR:
+ return Parse_ForStmt(lex, lifetime);
+ case TOK_RWORD_IF:
+ return Parse_IfStmt(lex);
+ case TOK_RWORD_MATCH:
+ return Parse_Expr_Match(lex);
+ case TOK_BRACE_OPEN:
+ lex.putback(tok);
+ return Parse_ExprBlockNode(lex);
+
+ default:
+ throw ParseError::Unexpected(lex, tok);
}
- else
+ }
+ else
+ {
+ switch( tok.type() )
+ {
+ case TOK_SEMICOLON:
+ return 0;
+ case TOK_BRACE_OPEN:
lex.putback(tok);
+ return Parse_ExprBlockNode(lex);
- ::std::vector< ::std::pair< ::std::string, AST::Function> > imports;
- bool is_block = false;
- if( GET_TOK(tok, lex) == TOK_BRACE_OPEN )
- is_block = true;
- else
- lex.putback(tok);
+ // let binding
+ case TOK_RWORD_LET: {
+ AST::Pattern pat = Parse_Pattern(lex);
+ TypeRef type;
+ if( GET_TOK(tok, lex) == TOK_COLON ) {
+ type = Parse_Type(lex);
+ GET_CHECK_TOK(tok, lex, TOK_EQUAL);
+ }
+ else {
+ CHECK_TOK(tok, TOK_EQUAL);
+ }
+ ExprNodeP val = Parse_ExprBlocks(lex);
+ return NEWNODE( AST::ExprNode_LetBinding, ::std::move(pat), ::std::move(type), ::std::move(val) );
+ }
- do {
- ::std::string name;
- switch( GET_TOK(tok, lex) )
+ // blocks that don't need semicolons
+ case TOK_RWORD_LOOP:
+ return NEWNODE( AST::ExprNode_Loop, "", Parse_ExprBlockNode(lex) );
+ case TOK_RWORD_WHILE:
+ return Parse_WhileStmt(lex, "");
+ case TOK_RWORD_FOR:
+ return Parse_ForStmt(lex, "");
+ case TOK_RWORD_IF:
+ return Parse_IfStmt(lex);
+ case TOK_RWORD_MATCH:
+ return Parse_Expr_Match(lex);
+
+ // Fall through to the statement code
+ default: {
+ lex.putback(tok);
+ auto ret = Parse_Stmt(lex);
+ if( GET_TOK(tok, lex) != TOK_SEMICOLON )
{
- case TOK_RWORD_FN:
- GET_CHECK_TOK(tok, lex, TOK_IDENT);
- name = tok.str();
- imports.push_back( ::std::make_pair( ::std::move(name), Parse_FunctionDef(lex, abi, AST::MetaItems(), false, true) ) );
- GET_CHECK_TOK(tok, lex, TOK_SEMICOLON);
- break;
- default:
- throw ParseError::Unexpected(lex, tok);
+ lex.putback(tok);
+ *expect_end = true;
+ }
+ return ::std::move(ret);
+ break;
}
- } while( is_block && LOOK_AHEAD(lex) != TOK_BRACE_CLOSE );
- if( is_block )
- GET_CHECK_TOK(tok, lex, TOK_BRACE_CLOSE);
- return NEWNODE( AST::ExprNode_Extern, ::std::move(imports) );
- }
- case TOK_RWORD_CONST: {
- opt_semicolon = false;
- GET_CHECK_TOK(tok, lex, TOK_IDENT);
- ::std::string name = tok.str();
- GET_CHECK_TOK(tok, lex, TOK_COLON);
- TypeRef type = Parse_Type(lex);
- GET_CHECK_TOK(tok, lex, TOK_EQUAL);
- auto val = Parse_Expr1(lex);
- return NEWNODE( AST::ExprNode_Const, ::std::move(name), ::std::move(type), ::std::move(val) );
}
- case TOK_RWORD_LET: {
- opt_semicolon = false;
- AST::Pattern pat = Parse_Pattern(lex);
- TypeRef type;
- if( GET_TOK(tok, lex) == TOK_COLON ) {
- type = Parse_Type(lex);
- GET_CHECK_TOK(tok, lex, TOK_EQUAL);
+ }
+}
+/// Extern block within a block
+void Parse_ExternBlock(TokenStream& lex, AST::MetaItems attrs, ::std::vector< AST::Item<AST::Function> >& imports)
+{
+ Token tok;
+
+ // - default ABI is "C"
+ ::std::string abi = "C";
+ if( GET_TOK(tok, lex) == TOK_STRING ) {
+ abi = tok.str();
+ }
+ else
+ lex.putback(tok);
+
+ bool is_block = false;
+ if( GET_TOK(tok, lex) == TOK_BRACE_OPEN ) {
+ is_block = true;
+ }
+ else
+ lex.putback(tok);
+
+ do {
+ AST::MetaItems inner_attrs;
+ if( is_block )
+ {
+ while( GET_TOK(tok, lex) == TOK_ATTR_OPEN )
+ {
+ inner_attrs.push_back( Parse_MetaItem(lex) );
+ GET_CHECK_TOK(tok, lex, TOK_SQUARE_CLOSE);
+ }
+ lex.putback(tok);
}
- else {
- CHECK_TOK(tok, TOK_EQUAL);
+ else
+ {
+ inner_attrs = attrs;
}
- ExprNodeP val = Parse_ExprBlocks(lex);
- return NEWNODE( AST::ExprNode_LetBinding, ::std::move(pat), ::std::move(type), ::std::move(val) );
+ ::std::string name;
+ switch( GET_TOK(tok, lex) )
+ {
+ case TOK_RWORD_FN:
+ GET_CHECK_TOK(tok, lex, TOK_IDENT);
+ name = tok.str();
+ imports.push_back( AST::Item<AST::Function>(
+ ::std::move(name),
+ Parse_FunctionDef(lex, abi, AST::MetaItems(), false, true),
+ false
+ ) );
+ GET_CHECK_TOK(tok, lex, TOK_SEMICOLON);
+ break;
+ default:
+ throw ParseError::Unexpected(lex, tok);
}
+ } while( is_block && LOOK_AHEAD(lex) != TOK_BRACE_CLOSE );
+ if( is_block )
+ GET_CHECK_TOK(tok, lex, TOK_BRACE_CLOSE);
+ else
+ GET_CHECK_TOK(tok, lex, TOK_SEMICOLON);
+}
+/// While loop (either as a statement, or as part of an expression)
+ExprNodeP Parse_WhileStmt(TokenStream& lex, ::std::string lifetime)
+{
+ Token tok;
+
+ if( GET_TOK(tok, lex) == TOK_RWORD_LET ) {
+ auto pat = Parse_Pattern(lex);
+ GET_CHECK_TOK(tok, lex, TOK_EQUAL);
+ auto val = Parse_Expr0(lex);
+ return NEWNODE( AST::ExprNode_Loop, lifetime, AST::ExprNode_Loop::WHILELET,
+ ::std::move(pat), ::std::move(val), Parse_ExprBlockNode(lex) );
+ }
+ else {
+ lex.putback(tok);
+ ExprNodeP cnd = Parse_Expr1(lex);
+ return NEWNODE( AST::ExprNode_Loop, lifetime, ::std::move(cnd), Parse_ExprBlockNode(lex) );
+ }
+}
+/// For loop (either as a statement, or as part of an expression)
+ExprNodeP Parse_ForStmt(TokenStream& lex, ::std::string lifetime)
+{
+ Token tok;
+
+ AST::Pattern pat = Parse_Pattern(lex);
+ GET_CHECK_TOK(tok, lex, TOK_RWORD_IN);
+ ExprNodeP val;
+ {
+ SET_PARSE_FLAG(lex, disallow_struct_literal);
+ val = Parse_Expr1(lex);
+ }
+ return NEWNODE( AST::ExprNode_Loop, lifetime, AST::ExprNode_Loop::FOR,
+ ::std::move(pat), ::std::move(val), Parse_ExprBlockNode(lex) );
+}
+
+/// Parses the 'stmt' fragment specifier
+/// - Flow control
+/// - Expressions
+ExprNodeP Parse_Stmt(TokenStream& lex)
+{
+ TRACE_FUNCTION;
+ Token tok;
+ switch(GET_TOK(tok, lex))
+ {
case TOK_RWORD_RETURN: {
- if( lifetime.size() )
- throw ParseError::Unexpected(lex, Token(TOK_LIFETIME, lifetime));
ExprNodeP val;
if( GET_TOK(tok, lex) != TOK_SEMICOLON ) {
lex.putback(tok);
@@ -331,8 +510,6 @@ ExprNodeP Parse_Stmt(TokenStream& lex, bool& opt_semicolon) case TOK_RWORD_CONTINUE:
case TOK_RWORD_BREAK:
{
- if( lifetime.size() )
- throw ParseError::Unexpected(lex, Token(TOK_LIFETIME, lifetime));
AST::ExprNode_Flow::Type type;
switch(tok.type())
{
@@ -340,13 +517,14 @@ ExprNodeP Parse_Stmt(TokenStream& lex, bool& opt_semicolon) case TOK_RWORD_BREAK: type = AST::ExprNode_Flow::BREAK; break;
default: throw ParseError::BugCheck(/*lex,*/ "continue/break");
}
+ ::std::string lifetime;
if( GET_TOK(tok, lex) == TOK_LIFETIME )
{
lifetime = tok.str();
GET_TOK(tok, lex);
}
ExprNodeP val;
- if( tok.type() != TOK_SEMICOLON ) {
+ if( tok.type() != TOK_SEMICOLON && tok.type() != TOK_COMMA && tok.type() != TOK_BRACE_CLOSE ) {
lex.putback(tok);
val = Parse_Expr1(lex);
}
@@ -354,29 +532,6 @@ ExprNodeP Parse_Stmt(TokenStream& lex, bool& opt_semicolon) lex.putback(tok);
return NEWNODE( AST::ExprNode_Flow, type, lifetime, ::std::move(val) );
}
-
- case TOK_RWORD_LOOP:
- opt_semicolon = true;
- return NEWNODE( AST::ExprNode_Loop, lifetime, Parse_ExprBlockNode(lex) );
- case TOK_RWORD_WHILE: {
- opt_semicolon = true;
- ExprNodeP cnd = Parse_Expr1(lex);
- return NEWNODE( AST::ExprNode_Loop, lifetime, ::std::move(cnd), Parse_ExprBlockNode(lex) );
- }
- case TOK_RWORD_FOR: {
- opt_semicolon = true;
- AST::Pattern pat = Parse_Pattern(lex);
- GET_CHECK_TOK(tok, lex, TOK_RWORD_IN);
- ExprNodeP val = Parse_Expr1(lex);
- return NEWNODE( AST::ExprNode_Loop, lifetime, AST::ExprNode_Loop::FOR,
- ::std::move(pat), ::std::move(val), Parse_ExprBlockNode(lex) );
- }
- case TOK_RWORD_IF:
- opt_semicolon = true;
- return Parse_IfStmt(lex);
- case TOK_RWORD_MATCH:
- opt_semicolon = true;
- return Parse_Expr_Match(lex);
default:
lex.putback(tok);
return Parse_Expr0(lex);
@@ -394,7 +549,7 @@ ExprNodeP Parse_Stmt(TokenStream& lex, bool& opt_semicolon) {
lex.putback(tok);
do {
- rv.push_back( Parse_Expr1(lex) );
+ rv.push_back( Parse_Expr0(lex) );
} while( GET_TOK(tok, lex) == TOK_COMMA );
CHECK_TOK(tok, TOK_PAREN_CLOSE);
}
@@ -517,8 +672,7 @@ ExprNodeP Parse_Expr_Match(TokenStream& lex) }
CHECK_TOK(tok, TOK_FATARROW);
- bool opt_semicolon = false;
- arm.m_code = Parse_Stmt(lex, opt_semicolon);
+ arm.m_code = Parse_Stmt(lex);
arms.push_back( ::std::move(arm) );
@@ -542,6 +696,12 @@ ExprNodeP Parse_ExprBlocks(TokenStream& lex) case TOK_BRACE_OPEN:
lex.putback(tok);
return Parse_ExprBlockNode(lex);
+ case TOK_RWORD_LOOP:
+ return NEWNODE( AST::ExprNode_Loop, "", Parse_ExprBlockNode(lex) );
+ case TOK_RWORD_WHILE:
+ return Parse_WhileStmt(lex, "");
+ case TOK_RWORD_FOR:
+ return Parse_ForStmt(lex, "");
case TOK_RWORD_MATCH:
return Parse_Expr_Match(lex);
case TOK_RWORD_IF:
diff --git a/src/parse/lex.cpp b/src/parse/lex.cpp index 4237b48f..a2459e14 100644 --- a/src/parse/lex.cpp +++ b/src/parse/lex.cpp @@ -12,8 +12,11 @@ #include <iostream> #include <cstdlib> // strtol #include <typeinfo> +#include <algorithm> // std::count Lexer::Lexer(::std::string filename): + m_path(filename), + m_line(1), m_istream(filename.c_str()), m_last_char_valid(false) { @@ -170,7 +173,7 @@ signed int Lexer::getSymbol() //::std::cout << "ofs=" << ofs << ", chars[ofs] = " << chars[ofs] << ", ch = " << ch << ", len = " << len << ::std::endl; if( ofs >= len || chars[ofs] > ch ) { - this->putback(); + this->ungetc(); return best; } @@ -185,7 +188,7 @@ signed int Lexer::getSymbol() } } - this->putback(); + this->ungetc(); return best; } @@ -198,7 +201,37 @@ bool issym(char ch) return false; } -Token Lexer::getToken() +Position Lexer::getPosition() const +{ + return Position(m_path, m_line); +} +Token Lexer::realGetToken() +{ + while(true) + { + Token tok = getTokenInt(); + //::std::cout << "getTokenInt: tok = " << tok << ::std::endl; + switch(tok.type()) + { + case TOK_NEWLINE: + m_line ++; + //DEBUG("m_line = " << m_line << " (NL)"); + continue; + case TOK_WHITESPACE: + continue; + case TOK_COMMENT: { + ::std::string comment = tok.str(); + unsigned int c = ::std::count(comment.begin(), comment.end(), '\n'); + m_line += c; + //DEBUG("m_line = " << m_line << " (comment w/ "<<c<<")"); + continue; } + default: + return tok; + } + } +} + +Token Lexer::getTokenInt() { if( this->m_next_token.type() != TOK_NULL ) { @@ -214,10 +247,10 @@ Token Lexer::getToken() { while( isspace(ch = this->getc()) && ch != '\n' ) ; - this->putback(); + this->ungetc(); return Token(TOK_WHITESPACE); } - this->putback(); + this->ungetc(); const signed int sym = this->getSymbol(); if( sym == 0 ) @@ -297,7 +330,7 @@ Token Lexer::getToken() this->m_next_token = Token(TOK_TRIPLE_DOT); } else { - this->putback(); + this->ungetc(); this->m_next_token = Token(TOK_DOUBLE_DOT); } return Token(val, CORETYPE_ANY); @@ -309,7 +342,7 @@ Token Lexer::getToken() return Token(val, CORETYPE_ANY); } - this->putback(); + this->ungetc(); double fval = this->parseFloat(val); if( (ch = this->getc()) == 'f' ) { @@ -319,7 +352,7 @@ Token Lexer::getToken() suffix.push_back(ch); ch = this->getc(); } - this->putback(); + this->ungetc(); if( suffix == "f32" ) { num_type = CORETYPE_F32; } @@ -332,13 +365,13 @@ Token Lexer::getToken() } else { - this->putback(); + this->ungetc(); } return Token( fval, num_type); } else { - this->putback(); + this->ungetc(); return Token(val, num_type); } } @@ -357,7 +390,7 @@ Token Lexer::getToken() } else { - this->putback(); + this->ungetc(); for( unsigned int i = 0; i < LEN(RWORDS); i ++ ) { if( str < RWORDS[i].chars ) break; @@ -388,7 +421,7 @@ Token Lexer::getToken() str.push_back(ch); ch = this->getc(); } - this->putback(); + this->ungetc(); return Token(TOK_COMMENT, str); } case BLOCKCOMMENT: { ::std::string str; @@ -397,7 +430,7 @@ Token Lexer::getToken() if( ch == '*' ) { ch = this->getc(); if( ch == '/' ) break; - this->putback(); + this->ungetc(); } str.push_back(ch); ch = this->getc(); @@ -420,7 +453,7 @@ Token Lexer::getToken() str.push_back(ch); ch = this->getc(); } - this->putback(); + this->ungetc(); return Token(TOK_LIFETIME, str); } } @@ -485,7 +518,7 @@ double Lexer::parseFloat(uint64_t whole) ch = this->getc_num(); } while( isdigit(ch) ); } - this->putback(); + this->ungetc(); buf[ofs] = 0; return ::std::strtod(buf, NULL); @@ -501,8 +534,13 @@ uint32_t Lexer::parseEscape(char enclosing) // Unicode (up to six hex digits) uint32_t val = 0; ch = this->getc(); + bool req_close_brace = false; + if( ch == '{' ) { + req_close_brace = true; + ch = this->getc(); + } if( !isxdigit(ch) ) - throw ParseError::Todo( FMT("Found invalid character '\\x" << ::std::hex << (int)ch << "' in \\u sequence" ) ); + throw ParseError::Generic(*this, FMT("Found invalid character '\\x" << ::std::hex << (int)ch << "' in \\u sequence" ) ); while( isxdigit(ch) ) { char tmp[2] = {ch, 0}; @@ -510,13 +548,25 @@ uint32_t Lexer::parseEscape(char enclosing) val += ::std::strtol(tmp, NULL, 16); ch = this->getc(); } - this->putback(); + if( !req_close_brace ) + this->ungetc(); + else if( ch != '}' ) + throw ParseError::Generic(*this, "Expected terminating } in \\u sequence"); + else + ; return val; } case '\\': return '\\'; + case '\'': + return '\''; + case 'r': + return '\r'; case 'n': return '\n'; + case 't': + return '\t'; case '\n': + m_line ++; while( isspace(ch) ) ch = this->getc(); return ch; @@ -550,9 +600,9 @@ char Lexer::getc_num() return ch; } -void Lexer::putback() +void Lexer::ungetc() { -// ::std::cout << "putback(): " << m_last_char_valid << " '" << m_last_char << "'" << ::std::endl; +// ::std::cout << "ungetc(): " << m_last_char_valid << " '" << m_last_char << "'" << ::std::endl; assert(!m_last_char_valid); m_last_char_valid = true; } diff --git a/src/parse/lex.hpp b/src/parse/lex.hpp index da56c6ba..47c2db60 100644 --- a/src/parse/lex.hpp +++ b/src/parse/lex.hpp @@ -133,8 +133,12 @@ public: #define CLEAR_PARSE_FLAG(lex, flag) SavedParseState _sps(lex, lex.parse_state()); lex.parse_state().flag = false #define CHECK_PARSE_FLAG(lex, flag) (lex.parse_state().flag == true) -class Lexer +class Lexer: + public TokenStream { + ::std::string m_path; + unsigned int m_line; + ::std::ifstream m_istream; bool m_last_char_valid; char m_last_char; @@ -142,16 +146,19 @@ class Lexer public: Lexer(::std::string filename); - Token getToken(); + virtual Position getPosition() const override; + virtual Token realGetToken() override; private: + Token getTokenInt(); + signed int getSymbol(); double parseFloat(uint64_t whole); uint32_t parseEscape(char enclosing); char getc(); char getc_num(); - void putback(); + void ungetc(); class EndOfFile {}; }; diff --git a/src/parse/preproc.cpp b/src/parse/preproc.cpp deleted file mode 100644 index 781ea8bc..00000000 --- a/src/parse/preproc.cpp +++ /dev/null @@ -1,51 +0,0 @@ -#include "preproc.hpp"
-#include <iostream>
-#include <algorithm>
-
-Preproc::Preproc(::std::string path):
- m_path(path),
- m_line(1),
- m_lex(path)
-{
- //ctor
-}
-
-Preproc::~Preproc()
-{
- //dtor
-}
-
-Token Preproc::getTokenInt()
-{
- while(true)
- {
- Token tok = m_lex.getToken();
- //::std::cout << "getTokenInt: tok = " << tok << ::std::endl;
- switch(tok.type())
- {
- case TOK_NEWLINE:
- m_line ++;
- //DEBUG("m_line = " << m_line << " (NL)");
- continue;
- case TOK_WHITESPACE:
- continue;
- case TOK_COMMENT: {
- ::std::string comment = tok.str();
- unsigned int c = ::std::count(comment.begin(), comment.end(), '\n');
- m_line += c;
- //DEBUG("m_line = " << m_line << " (comment w/ "<<c<<")");
- continue; }
- default:
- return tok;
- }
- }
-}
-
-Position Preproc::getPosition() const
-{
- return Position(m_path, m_line);
-}
-Token Preproc::realGetToken()
-{
- return getTokenInt();
-}
diff --git a/src/parse/preproc.hpp b/src/parse/preproc.hpp deleted file mode 100644 index 08fda12d..00000000 --- a/src/parse/preproc.hpp +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef PREPROC_H
-#define PREPROC_H
-
-#include "lex.hpp"
-
-class Preproc:
- public TokenStream
-{
- ::std::string m_path;
- unsigned int m_line;
- Lexer m_lex;
-
-public:
- Preproc(::std::string path);
- ~Preproc();
-
- virtual Position getPosition() const override;
- virtual Token realGetToken() override;
-private:
- Token getTokenInt();
-};
-
-#endif // PREPROC_H
diff --git a/src/parse/root.cpp b/src/parse/root.cpp index b67f620c..270b38e0 100644 --- a/src/parse/root.cpp +++ b/src/parse/root.cpp @@ -1,6 +1,5 @@ /*
*/
-#include "preproc.hpp"
#include "../ast/ast.hpp"
#include "parseerror.hpp"
#include "common.hpp"
@@ -13,8 +12,8 @@ void Parse_ModRoot(TokenStream& lex, AST::Crate& crate, AST::Module& mod, LList< AST::Path Parse_Path(TokenStream& lex, eParsePathGenericMode generic_mode);
AST::Path Parse_Path(TokenStream& lex, bool is_abs, eParsePathGenericMode generic_mode);
-::std::vector<TypeRef> Parse_Path_GenericList(TokenStream& lex);
AST::Path Parse_PathFrom(TokenStream& lex, AST::Path path, eParsePathGenericMode generic_mode);
+::std::vector<TypeRef> Parse_Path_GenericList(TokenStream& lex);
AST::Path Parse_Path(TokenStream& lex, eParsePathGenericMode generic_mode)
{
@@ -46,6 +45,96 @@ AST::Path Parse_Path(TokenStream& lex, bool is_abs, eParsePathGenericMode generi return Parse_PathFrom(lex, AST::Path(), generic_mode);
}
+AST::Path Parse_PathFrom(TokenStream& lex, AST::Path path, eParsePathGenericMode generic_mode)
+{
+ TRACE_FUNCTION_F("path = " << path);
+
+ Token tok;
+
+ tok = lex.getToken();
+ while(true)
+ {
+ ::std::vector<TypeRef> params;
+
+ CHECK_TOK(tok, TOK_IDENT);
+ ::std::string component = tok.str();
+
+ GET_TOK(tok, lex);
+ if( generic_mode == PATH_GENERIC_TYPE )
+ {
+ if( tok.type() == TOK_LT || tok.type() == TOK_DOUBLE_LT )
+ {
+ // HACK! Handle breaking << into < <
+ if( tok.type() == TOK_DOUBLE_LT )
+ lex.putback( Token(TOK_LT) );
+
+ // Type-mode generics "::path::to::Type<A,B>"
+ params = Parse_Path_GenericList(lex);
+ tok = lex.getToken();
+ }
+ // HACK - 'Fn*(...) -> ...' notation
+ else if( tok.type() == TOK_PAREN_OPEN )
+ {
+ DEBUG("Fn() hack");
+ ::std::vector<TypeRef> args;
+ if( GET_TOK(tok, lex) == TOK_PAREN_CLOSE )
+ {
+ // Empty list
+ }
+ else
+ {
+ lex.putback(tok);
+ do {
+ args.push_back( Parse_Type(lex) );
+ } while( GET_TOK(tok, lex) == TOK_COMMA );
+ }
+ CHECK_TOK(tok, TOK_PAREN_CLOSE);
+
+ TypeRef ret_type = TypeRef( TypeRef::TagUnit() );
+ if( GET_TOK(tok, lex) == TOK_THINARROW ) {
+ ret_type = Parse_Type(lex);
+ }
+ else {
+ lex.putback(tok);
+ }
+ DEBUG("- Fn("<<args<<")->"<<ret_type<<"");
+
+ // Encode into path, by converting Fn(A,B)->C into Fn<(A,B),Ret=C>
+ params = ::std::vector<TypeRef> { TypeRef(TypeRef::TagTuple(), ::std::move(args)) };
+ // TODO: Use 'ret_type' as an associated type bound
+
+ GET_TOK(tok, lex);
+ }
+ else
+ {
+ }
+ }
+ if( tok.type() != TOK_DOUBLE_COLON ) {
+ path.append( AST::PathNode(component, params) );
+ break;
+ }
+ tok = lex.getToken();
+ if( generic_mode == PATH_GENERIC_EXPR && (tok.type() == TOK_LT || tok.type() == TOK_DOUBLE_LT) )
+ {
+ // HACK! Handle breaking << into < <
+ if( tok.type() == TOK_DOUBLE_LT )
+ lex.putback( Token(TOK_LT) );
+
+ // Expr-mode generics "::path::to::function::<Type1,Type2>(arg1, arg2)"
+ params = Parse_Path_GenericList(lex);
+ tok = lex.getToken();
+ if( tok.type() != TOK_DOUBLE_COLON ) {
+ path.append( AST::PathNode(component, params) );
+ break;
+ }
+ GET_TOK(tok, lex);
+ }
+ path.append( AST::PathNode(component, params) );
+ }
+ lex.putback(tok);
+ DEBUG("path = " << path);
+ return path;
+}
/// Parse a list of parameters within a path
::std::vector<TypeRef> Parse_Path_GenericList(TokenStream& lex)
{
@@ -90,57 +179,6 @@ AST::Path Parse_Path(TokenStream& lex, bool is_abs, eParsePathGenericMode generi return types;
}
-AST::Path Parse_PathFrom(TokenStream& lex, AST::Path path, eParsePathGenericMode generic_mode)
-{
- TRACE_FUNCTION;
-
- Token tok;
-
- tok = lex.getToken();
- while(true)
- {
- ::std::vector<TypeRef> params;
-
- CHECK_TOK(tok, TOK_IDENT);
- ::std::string component = tok.str();
-
- tok = lex.getToken();
- if( generic_mode == PATH_GENERIC_TYPE && (tok.type() == TOK_LT || tok.type() == TOK_DOUBLE_LT) )
- {
- // HACK! Handle breaking << into < <
- if( tok.type() == TOK_DOUBLE_LT )
- lex.putback( Token(TOK_LT) );
-
- // Type-mode generics "::path::to::Type<A,B>"
- params = Parse_Path_GenericList(lex);
- tok = lex.getToken();
- }
- if( tok.type() != TOK_DOUBLE_COLON ) {
- path.append( AST::PathNode(component, params) );
- break;
- }
- tok = lex.getToken();
- if( generic_mode == PATH_GENERIC_EXPR && (tok.type() == TOK_LT || tok.type() == TOK_DOUBLE_LT) )
- {
- // HACK! Handle breaking << into < <
- if( tok.type() == TOK_DOUBLE_LT )
- lex.putback( Token(TOK_LT) );
-
- // Expr-mode generics "::path::to::function::<Type1,Type2>(arg1, arg2)"
- params = Parse_Path_GenericList(lex);
- tok = lex.getToken();
- if( tok.type() != TOK_DOUBLE_COLON ) {
- path.append( AST::PathNode(component, params) );
- break;
- }
- GET_TOK(tok, lex);
- }
- path.append( AST::PathNode(component, params) );
- }
- lex.putback(tok);
- return path;
-}
-
static const struct {
const char* name;
enum eCoreType type;
@@ -200,6 +238,7 @@ TypeRef Parse_Type(TokenStream& lex) // Either a path (with generics)
if( tok.str() == "_" )
return TypeRef();
+ // or a primitive
for(unsigned int i = 0; i < sizeof(CORETYPES)/sizeof(CORETYPES[0]); i ++)
{
if( tok.str() < CORETYPES[i].name )
@@ -207,7 +246,6 @@ TypeRef Parse_Type(TokenStream& lex) if( tok.str() == CORETYPES[i].name )
return TypeRef(TypeRef::TagPrimitive(), CORETYPES[i].type);
}
- // or a primitive
lex.putback(tok);
return TypeRef(TypeRef::TagPath(), Parse_Path(lex, false, PATH_GENERIC_TYPE)); // relative path
case TOK_DOUBLE_COLON:
@@ -253,19 +291,19 @@ TypeRef Parse_Type(TokenStream& lex) case TOK_SQUARE_OPEN: {
// Array
TypeRef inner = Parse_Type(lex);
- tok = lex.getToken();
- if( tok.type() == TOK_COMMA ) {
+ if( GET_TOK(tok, lex) == TOK_SEMICOLON ) {
// Sized array
- GET_CHECK_TOK(tok, lex, TOK_DOUBLE_DOT);
AST::Expr array_size = Parse_Expr(lex, true);
GET_CHECK_TOK(tok, lex, TOK_SQUARE_CLOSE);
return TypeRef(TypeRef::TagSizedArray(), inner, array_size.take_node());
}
- else {
- GET_CHECK_TOK(tok, lex, TOK_SQUARE_CLOSE);
+ else if( tok.type() == TOK_SQUARE_CLOSE )
+ {
return TypeRef(TypeRef::TagUnsizedArray(), inner);
}
- throw ParseError::BugCheck("Reached end of Parse_Type:SQUARE");
+ else {
+ throw ParseError::Unexpected(lex, tok/*, "; or ]"*/);
+ }
}
case TOK_PAREN_OPEN: {
DEBUG("Tuple");
@@ -392,6 +430,8 @@ void Parse_WhereClause(TokenStream& lex, AST::TypeParams& params) if( GET_TOK(tok, lex) == TOK_LIFETIME )
{
}
+ else if( tok.type() == TOK_BRACE_OPEN )
+ break;
else
{
lex.putback(tok);
@@ -903,7 +943,7 @@ AST::MetaItem Parse_MetaItem(TokenStream& lex) }
}
-AST::Impl Parse_Impl(TokenStream& lex, bool is_unsafe=false)
+AST::Impl Parse_Impl(TokenStream& lex, bool is_unsafe/*=false*/)
{
TRACE_FUNCTION;
Token tok;
@@ -1626,7 +1666,7 @@ void Parse_ModRoot_Items(TokenStream& lex, AST::Crate& crate, AST::Module& mod, else
{
::std::string newdir( newpath_dir.begin(), newpath_dir.begin() + newpath_dir.find_last_of('/') );
- Preproc sub_lex(newpath_dir);
+ Lexer sub_lex(newpath_dir);
Parse_ModRoot(sub_lex, crate, submod, &modstack, newdir);
}
}
@@ -1643,13 +1683,13 @@ void Parse_ModRoot_Items(TokenStream& lex, AST::Crate& crate, AST::Module& mod, else if( ifs_dir.is_open() )
{
// Load from dir
- Preproc sub_lex(newpath_dir + "mod.rs");
+ Lexer sub_lex(newpath_dir + "mod.rs");
Parse_ModRoot(sub_lex, crate, submod, &modstack, newpath_dir);
}
else if( ifs_file.is_open() )
{
// Load from file
- Preproc sub_lex(newpath_file);
+ Lexer sub_lex(newpath_file);
Parse_ModRoot(sub_lex, crate, submod, &modstack, newpath_file);
}
else
@@ -1727,7 +1767,7 @@ AST::Crate Parse_Crate(::std::string mainfile) {
Token tok;
- Preproc lex(mainfile);
+ Lexer lex(mainfile);
size_t p = mainfile.find_last_of('/');
::std::string mainpath = (p != ::std::string::npos ? ::std::string(mainfile.begin(), mainfile.begin()+p+1) : "./");
|