From 4f48058690ffc6af273a49eeb909d792163cade9 Mon Sep 17 00:00:00 2001 From: John Hodge Date: Sun, 27 Jan 2019 11:06:21 +0800 Subject: macro_rules - Rework pattern matching into a "compiled" format (easier to disambiguate) --- src/macro_rules/parse.cpp | 452 +++++++++++++++++++++++++++------------------- 1 file changed, 267 insertions(+), 185 deletions(-) (limited to 'src/macro_rules/parse.cpp') diff --git a/src/macro_rules/parse.cpp b/src/macro_rules/parse.cpp index 8af9bd07..689a81f1 100644 --- a/src/macro_rules/parse.cpp +++ b/src/macro_rules/parse.cpp @@ -12,13 +12,21 @@ #include "pattern_checks.hpp" MacroRulesPtr Parse_MacroRules(TokenStream& lex); +namespace { + ::std::vector macro_pattern_to_simple(const Span& sp, const ::std::vector& pattern); +} /// A rule within a macro_rules! blcok class MacroRule { public: ::std::vector m_pattern; + Span m_pat_span; ::std::vector m_contents; + + MacroRule() {} + MacroRule(MacroRule&&) = default; + MacroRule(const MacroRule&) = delete; }; /// Parse the pattern of a macro_rules! arm @@ -261,7 +269,11 @@ MacroRule Parse_MacroRules_Var(TokenStream& lex) } // - Pattern entries ::std::vector< ::std::string> names; - rule.m_pattern = Parse_MacroRules_Pat(lex, tok.type(), close, names); + { + auto ps = lex.start_span(); + rule.m_pattern = Parse_MacroRules_Pat(lex, tok.type(), close, names); + rule.m_pat_span = lex.end_span(ps); + } GET_CHECK_TOK(tok, lex, TOK_FATARROW); @@ -280,189 +292,9 @@ MacroRule Parse_MacroRules_Var(TokenStream& lex) return rule; } -bool patterns_are_same(const Span& sp, const MacroPatEnt& left, const MacroPatEnt& right) -{ - if( left.type > right.type ) - return patterns_are_same(sp, right, left); - - //if( left.name != right.name ) { - // TODO(sp, "Handle different binding names " << left << " != " << right); - //} - - // NOTE: left.type <= right.type - switch(right.type) - { - case MacroPatEnt::PAT_TOKEN: - assert( left.type == MacroPatEnt::PAT_TOKEN ); - return right.tok == left.tok; - case MacroPatEnt::PAT_LOOP: - switch(left.type) - { - case MacroPatEnt::PAT_TOKEN: - // - Check for compatibility, but these two don't match - if( patterns_are_same(sp, left, right.subpats.at(0)) == true ) - ERROR(sp, E0000, "Incompatible use of loop with matching non-loop"); - return false; - case MacroPatEnt::PAT_LOOP: - TODO(sp, "patterns_are_same - PAT_LOOP"); - default: - assert( !"" ); - } - - case MacroPatEnt::PAT_TT: - if( left.type == right.type ) - return true; - ERROR(sp, E0000, "Incompatible macro fragments - " << right << " used with " << left); - break; - - case MacroPatEnt::PAT_PAT: - switch(left.type) - { - case MacroPatEnt::PAT_TOKEN: - // - If this token is a valid pattern token, error - if( is_token_pat(left.tok.type()) ) - ERROR(sp, E0000, "Incompatible macro fragments - " << right << " used with " << left); - return false; - case MacroPatEnt::PAT_PAT: - return true; - default: - ERROR(sp, E0000, "Incompatible macro fragments - " << right << " used with " << left); - } - break; - // `:ident` - Compatible with just other tokens - case MacroPatEnt::PAT_IDENT: - switch(left.type) - { - case MacroPatEnt::PAT_TOKEN: - if( left.tok.type() == TOK_IDENT ) - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - return false; - case MacroPatEnt::PAT_IDENT: - return true; - default: - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - } - case MacroPatEnt::PAT_PATH: - switch(left.type) - { - case MacroPatEnt::PAT_TOKEN: - if( is_token_path(left.tok.type()) ) - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - return false; - case MacroPatEnt::PAT_PATH: - return true; - default: - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - } - case MacroPatEnt::PAT_TYPE: - switch(left.type) - { - case MacroPatEnt::PAT_TOKEN: - if( is_token_type(left.tok.type()) ) - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - return false; - case MacroPatEnt::PAT_TYPE: - return true; - default: - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - } - case MacroPatEnt::PAT_EXPR: - switch(left.type) - { - case MacroPatEnt::PAT_TOKEN: - if( is_token_expr(left.tok.type()) ) - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - return false; - // TODO: Allow a loop starting with an expr? - // - Consume, add split based on loop condition or next arm - // - Possible problem with binding levels. - case MacroPatEnt::PAT_EXPR: - return true; - default: - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - } - case MacroPatEnt::PAT_STMT: - switch(left.type) - { - case MacroPatEnt::PAT_TOKEN: - if( is_token_stmt(left.tok.type()) ) - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - return false; - case MacroPatEnt::PAT_STMT: - return true; - default: - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - } - // Block - Expects '{' - Compatible with everything but a literal '{' - case MacroPatEnt::PAT_BLOCK: - switch(left.type) - { - case MacroPatEnt::PAT_TOKEN: - if( left.tok.type() == TOK_BRACE_OPEN ) - ERROR(sp, E0000, "Incompatible macro fragments"); - return false; - case MacroPatEnt::PAT_BLOCK: - return true; - default: - return false; - } - // Matches meta/attribute fragments. - case MacroPatEnt::PAT_META: - switch(left.type) - { - case MacroPatEnt::PAT_TOKEN: - if( left.tok.type() == TOK_IDENT ) - ERROR(sp, E0000, "Incompatible macro fragments"); - return false; - case MacroPatEnt::PAT_META: - return true; - default: - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - } - // Matches items - case MacroPatEnt::PAT_ITEM: - switch(left.type) - { - case MacroPatEnt::PAT_TOKEN: - if( is_token_item(left.tok.type()) ) - ERROR(sp, E0000, "Incompatible macro fragments"); - return false; - case MacroPatEnt::PAT_ITEM: - return true; - default: - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - } - // Matches visibility specifiers - case MacroPatEnt::PAT_VIS: - switch(left.type) - { - case MacroPatEnt::PAT_TOKEN: - if( is_token_vis(left.tok.type()) ) - ERROR(sp, E0000, "Incompatible macro fragments"); - return false; - case MacroPatEnt::PAT_VIS: - return true; - default: - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - } - case MacroPatEnt::PAT_LIFETIME: - switch(left.type) - { - case MacroPatEnt::PAT_TOKEN: - if( left.tok.type() == TOK_LIFETIME ) - ERROR(sp, E0000, "Incompatible macro fragments"); - return false; - case MacroPatEnt::PAT_LIFETIME: - return true; - default: - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - } - } - throw ""; -} - // TODO: Also count the number of times each variable is used? -void enumerate_names(const ::std::vector& pats, ::std::vector< ::std::string>& names) { +void enumerate_names(const ::std::vector& pats, ::std::vector< ::std::string>& names) +{ for( const auto& pat : pats ) { if( pat.type == MacroPatEnt::PAT_LOOP ) { @@ -504,9 +336,11 @@ MacroRulesPtr Parse_MacroRules(TokenStream& lex) // Re-parse the patterns into a unified form for(auto& rule : rules) { - MacroRulesArm arm = MacroRulesArm( mv$(rule.m_pattern), mv$(rule.m_contents) ); + // TODO: Transform the pattern into a DFA or similar here (seekable instruction stream?) - enumerate_names(arm.m_pattern, arm.m_param_names); + auto rule_sequence = macro_pattern_to_simple(rule.m_pat_span, rule.m_pattern); + MacroRulesArm arm = MacroRulesArm( mv$(rule_sequence), mv$(rule.m_contents) ); + enumerate_names(rule.m_pattern, arm.m_param_names); rule_arms.push_back( mv$(arm) ); } @@ -517,3 +351,251 @@ MacroRulesPtr Parse_MacroRules(TokenStream& lex) return MacroRulesPtr(rv); } + +namespace { + + struct ExpTok { + MacroPatEnt::Type ty; + const Token* tok; + ExpTok(MacroPatEnt::Type ty, const Token* tok): ty(ty), tok(tok) {} + bool operator==(const ExpTok& t) const { return this->ty == t.ty && *this->tok == *t.tok; } + }; + ::std::ostream& operator<<(::std::ostream& os, const ExpTok& t) { + os << "ExpTok(" << t.ty << " " << *t.tok << ")"; + return os; + } + + // Yields all possible ExpectTok/ExpectPat entries from a pattern position + // Returns `true` if there's no fall-through + bool macro_pattern_get_head_set_inner(::std::vector& rv, const ::std::vector& pattern, size_t direct_pos, size_t indirect_ofs) + { + for(size_t idx = direct_pos; idx < pattern.size(); idx ++) + { + const auto& ent = pattern[idx]; + switch(ent.type) + { + case MacroPatEnt::PAT_LOOP: + if( macro_pattern_get_head_set_inner(rv, ent.subpats, 0, indirect_ofs) ) + { + // + loops have to iterate at least once, so if the set is closed by the sub-patterns, close us too + if(ent.name == "+") + { + return true; + } + // for * and ? loops, they can be skipped entirely. + // - No separator, this is for the skip case + } + else + { + // If the inner pattern didn't close the option set, then the next token can be the separator + if( ent.tok != TOK_NULL ) + { + // If indirect is non-zero, decrement without doing anything + if( indirect_ofs > 0 ) + { + indirect_ofs --; + } + else + { + rv.push_back(ExpTok(MacroPatEnt::PAT_TOKEN, &ent.tok)); + } + } + } + break; + default: + // If indirect is non-zero, decrement + if( indirect_ofs > 0 ) + { + indirect_ofs --; + } + else + { + rv.push_back( ExpTok(ent.type, &ent.tok) ); + return true; + } + break; + } + } + return false; + } + ::std::vector macro_pattern_get_head_set(const ::std::vector& pattern, size_t direct_pos, size_t indirect_ofs) + { + ::std::vector rv; + macro_pattern_get_head_set_inner(rv, pattern, direct_pos, indirect_ofs); + return rv; + } + + void macro_pattern_to_simple_inner(const Span& sp, ::std::vector& rv, const ::std::vector& pattern) + { + size_t level_start = rv.size(); + TRACE_FUNCTION_FR("[" << pattern << "]", "[" << FMT_CB(ss, for(auto it = rv.begin()+level_start; it != rv.end(); ++it) { ss << *it << ", "; }) << "]"); + auto push = [&rv](SimplePatEnt spe) { + DEBUG("[macro_pattern_to_simple_inner] rv[" << rv.size() << "] = " << spe); + rv.push_back( ::std::move(spe) ); + }; + auto push_ifv = [&push](bool is_equal, ::std::vector ents, size_t tgt) { + push(SimplePatEnt::make_If({ is_equal, tgt, mv$(ents) })); + }; + auto push_if = [&push_ifv](bool is_equal, MacroPatEnt::Type ty, const Token& tok, size_t tgt) { + push_ifv(is_equal, make_vec1(SimplePatIfCheck { ty, tok }), tgt); + }; + for(size_t idx = 0; idx < pattern.size(); idx ++) + { + const auto& ent = pattern[idx]; + DEBUG("[" << idx << "] ent = " << ent); + switch(ent.type) + { + case MacroPatEnt::PAT_LOOP: { + auto entry_pats = macro_pattern_get_head_set(ent.subpats, 0, 0); + DEBUG("Entry = [" << entry_pats << "]"); + ASSERT_BUG(sp, entry_pats.size() > 0, "No entry conditions extracted from sub-pattern [" << ent.subpats << "]"); + auto skip_pats = macro_pattern_get_head_set(pattern, idx+1, 0); + DEBUG("Skip = [" << skip_pats << "]"); + + // - Duplicates need special handling (build up a subseqent set) + for(const auto& ee : entry_pats) + { + if( ::std::find(skip_pats.begin(), skip_pats.end(), ee) != skip_pats.end() ) + { + TODO(sp, "Entry and skip patterns share an entry, ambigious - " << ee); + } + } + + // TODO: Combine the two cases below into one + + // If the loop is a $()+ loop, then just recurse into it + if( ent.name == "+" ) + { + push( SimplePatEnt::make_LoopStart({}) ); + size_t start = rv.size(); + macro_pattern_to_simple_inner(sp, rv, ent.subpats); + push( SimplePatEnt::make_LoopNext({}) ); + if( ent.tok != TOK_NULL ) + { + size_t end = rv.size() + 3; + if( ::std::find(skip_pats.begin(), skip_pats.end(), ExpTok(MacroPatEnt::PAT_TOKEN, &ent.tok)) != skip_pats.end() ) { + for(const auto& p : entry_pats) + { + auto v = ::make_vec2( + { MacroPatEnt::PAT_TOKEN, ent.tok}, + { p.ty, *p.tok } + ); + push_ifv(false, mv$(v), ~0u); + } + } + else { + push_if( false, MacroPatEnt::PAT_TOKEN, ent.tok, end ); + } + push( SimplePatEnt::make_ExpectTok(ent.tok) ); + push( SimplePatEnt::make_Jump({ start }) ); + } + else + { + // TODO: What if there's a collision at this level? + for(const auto& p : entry_pats) + { + push_if(true, p.ty, *p.tok, start); + } + } + push( SimplePatEnt::make_LoopEnd({}) ); + } + else + { + push( SimplePatEnt::make_LoopStart({}) ); + + // Options: + // - Enter the loop (if the next token is one of the head set of the loop) + // - Skip the loop (the next token is the head set of the subsequent entries) + size_t rewrite_start = rv.size(); + if( ent.name != "+" ) + { + if( entry_pats.size() == 1 ) + { + // If not the entry pattern, skip. + push_if(false, entry_pats.front().ty, *entry_pats.front().tok, ~0u); + } + else if( skip_pats.empty() ) + { + // No skip patterns, try all entry patterns + size_t start = rv.size() + entry_pats.size() + 1; + for(const auto& p : entry_pats) + { + push_if(true, p.ty, *p.tok, start); + } + push(SimplePatEnt::make_Jump({ ~0u })); + } + else + { + for(const auto& p : skip_pats) + { + push_if(true, p.ty, *p.tok, ~0u); + } + } + } + + macro_pattern_to_simple_inner(sp, rv, ent.subpats); + push( SimplePatEnt::make_LoopNext({}) ); + + if( ent.name != "?" ) + { + if( ent.tok != TOK_NULL ) + { + // If the joiner is also valid after the loop, handle by also checking the entry conditions + if( ::std::find(skip_pats.begin(), skip_pats.end(), ExpTok(MacroPatEnt::PAT_TOKEN, &ent.tok)) != skip_pats.end() ) { + // Try all re-loop (joiner + entry) patterns, if any fail then jump to the end. + for(const auto& p : entry_pats) { + auto v = ::make_vec2( + { MacroPatEnt::PAT_TOKEN, ent.tok}, + { p.ty, *p.tok } + ); + push_ifv(false, mv$(v), ~0u); + } + } + else { + // If not the joiner, jump to the end + push_if(false, MacroPatEnt::PAT_TOKEN, ent.tok, ~0u); + } + push( SimplePatEnt::make_ExpectTok(ent.tok) ); + } + // Jump back to the entry check. + push(SimplePatEnt::make_Jump({ rewrite_start })); + } + else + { + ASSERT_BUG(sp, ent.tok == TOK_NULL, "$()? with a separator isn't valid"); + } + size_t post_loop = rv.size(); + for(size_t i = rewrite_start; i < post_loop; i++) + { + if( auto* pe = rv[i].opt_If() ) { + if(pe->jump_target == ~0u) { + pe->jump_target = post_loop; + } + } + if( auto* pe = rv[i].opt_Jump() ) { + if(pe->jump_target == ~0u) { + pe->jump_target = post_loop; + } + } + } + push( SimplePatEnt::make_LoopEnd({}) ); + } + } break; + case MacroPatEnt::PAT_TOKEN: + push( SimplePatEnt::make_ExpectTok(ent.tok) ); + break; + default: + push( SimplePatEnt::make_ExpectPat({ ent.type, ent.name_index }) ); + break; + } + } + } + + ::std::vector macro_pattern_to_simple(const Span& sp, const ::std::vector& pattern) + { + ::std::vector rv; + TRACE_FUNCTION_FR(pattern, rv); + macro_pattern_to_simple_inner(sp, rv, pattern); + return rv; + } +} -- cgit v1.2.3