diff options
author | John Hodge <tpg@ucc.asn.au> | 2019-01-27 11:06:21 +0800 |
---|---|---|
committer | John Hodge <tpg@ucc.asn.au> | 2019-01-27 11:06:21 +0800 |
commit | 4f48058690ffc6af273a49eeb909d792163cade9 (patch) | |
tree | 95bd3f0a836ca2a3081e5c473e93be69b031f4c6 /src | |
parent | d1ea840742dac3d3af66667508c8a841a6c6ca70 (diff) | |
download | mrust-4f48058690ffc6af273a49eeb909d792163cade9.tar.gz |
macro_rules - Rework pattern matching into a "compiled" format (easier to disambiguate)
Diffstat (limited to 'src')
-rw-r--r-- | src/hir/deserialise.cpp | 41 | ||||
-rw-r--r-- | src/hir/serialise.cpp | 28 | ||||
-rw-r--r-- | src/include/tagged_union.hpp | 2 | ||||
-rw-r--r-- | src/macro_rules/eval.cpp | 757 | ||||
-rw-r--r-- | src/macro_rules/macro_rules.hpp | 40 | ||||
-rw-r--r-- | src/macro_rules/mod.cpp | 36 | ||||
-rw-r--r-- | src/macro_rules/parse.cpp | 452 |
7 files changed, 512 insertions, 844 deletions
diff --git a/src/hir/deserialise.cpp b/src/hir/deserialise.cpp index 2f9b752e..30d05b40 100644 --- a/src/hir/deserialise.cpp +++ b/src/hir/deserialise.cpp @@ -244,6 +244,45 @@ } return rv; } + ::SimplePatIfCheck deserialise_simplepatifcheck() { + return ::SimplePatIfCheck { + static_cast<::MacroPatEnt::Type>(m_in.read_tag()), + deserialise_token() + }; + } + ::SimplePatEnt deserialise_simplepatent() { + auto tag = static_cast< ::SimplePatEnt::Tag>( m_in.read_tag() ); + switch(tag) + { + case ::SimplePatEnt::TAG_End: + return ::SimplePatEnt::make_End({}); + case ::SimplePatEnt::TAG_LoopStart: + return ::SimplePatEnt::make_LoopStart({}); + case ::SimplePatEnt::TAG_LoopNext: + return ::SimplePatEnt::make_LoopNext({}); + case ::SimplePatEnt::TAG_LoopEnd: + return ::SimplePatEnt::make_LoopEnd({}); + case ::SimplePatEnt::TAG_Jump: + return ::SimplePatEnt::make_Jump({ m_in.read_count() }); + case ::SimplePatEnt::TAG_ExpectTok: + return SimplePatEnt::make_ExpectTok({ + deserialise_token() + }); + case ::SimplePatEnt::TAG_ExpectPat: + return SimplePatEnt::make_ExpectPat({ + static_cast<::MacroPatEnt::Type>(m_in.read_tag()), + static_cast<unsigned>(m_in.read_count()) + }); + case SimplePatEnt::TAG_If: + return SimplePatEnt::make_If({ + m_in.read_bool(), + m_in.read_count(), + deserialise_vec_c< SimplePatIfCheck>([&](){ return deserialise_simplepatifcheck(); }) + }); + default: + BUG(Span(), "Bad tag for MacroPatEnt - #" << static_cast<int>(tag)); + } + } ::MacroPatEnt deserialise_macropatent() { auto s = m_in.read_string(); auto n = static_cast<unsigned int>(m_in.read_count()); @@ -278,7 +317,7 @@ ::MacroRulesArm deserialise_macrorulesarm() { ::MacroRulesArm rv; rv.m_param_names = deserialise_vec< ::std::string>(); - rv.m_pattern = deserialise_vec_c< ::MacroPatEnt>( [&](){ return deserialise_macropatent(); } ); + rv.m_pattern = deserialise_vec_c< ::SimplePatEnt>( [&](){ return deserialise_simplepatent(); } ); rv.m_contents = deserialise_vec_c< ::MacroExpansionEnt>( [&](){ return deserialise_macroexpansionent(); } ); return rv; } diff --git a/src/hir/serialise.cpp b/src/hir/serialise.cpp index 8d92267a..c91cf93f 100644 --- a/src/hir/serialise.cpp +++ b/src/hir/serialise.cpp @@ -415,6 +415,34 @@ serialise_vec(pe.subpats); } } + void serialise(const ::SimplePatIfCheck& e) { + m_out.write_tag( static_cast<int>(e.ty) ); + serialise(e.tok); + } + void serialise(const ::SimplePatEnt& pe) { + m_out.write_tag( pe.tag() ); + TU_MATCH_HDRA( (pe), { ) + TU_ARMA(End, _e) {} + TU_ARMA(LoopStart, _e) {} + TU_ARMA(LoopNext, _e) {} + TU_ARMA(LoopEnd, _e) {} + TU_ARMA(Jump, e) { + m_out.write_count(e.jump_target); + } + TU_ARMA(ExpectTok, e) { + serialise(e); + } + TU_ARMA(ExpectPat, e) { + m_out.write_tag( static_cast<int>(e.type) ); + m_out.write_count(e.idx); + } + TU_ARMA(If, e) { + m_out.write_bool(e.is_equal); + m_out.write_count(e.jump_target); + serialise_vec(e.ents); + } + } + } void serialise(const ::MacroRulesArm& arm) { serialise_vec(arm.m_param_names); serialise_vec(arm.m_pattern); diff --git a/src/include/tagged_union.hpp b/src/include/tagged_union.hpp index d249de13..dab7f8ea 100644 --- a/src/include/tagged_union.hpp +++ b/src/include/tagged_union.hpp @@ -122,7 +122,7 @@ #define TU_ARM(VAR, TAG, NAME) break; case ::std::remove_reference<decltype(VAR)>::type::TAG_##TAG: for(bool tu_lc = true; tu_lc; tu_lc=false) for(auto& NAME = (VAR).as_##TAG(); (void)NAME, tu_lc; tu_lc=false) #define TU_MATCH_HDRA(VARS, brace) TU_MATCH_HDRA_(::std::remove_reference<decltype(TU_FIRST VARS)>::type, VARS, brace) -#define TU_MATCH_HDRA_(CLASS, VARS, brace) const auto& tu_match_hdr2_v = (TU_FIRST VARS); switch( tu_match_hdr2_v.tag() ) brace case CLASS::TAGDEAD: assert(!"ERROR: destructed tagged union used"); +#define TU_MATCH_HDRA_(CLASS, VARS, brace) auto& tu_match_hdr2_v = (TU_FIRST VARS); switch( tu_match_hdr2_v.tag() ) brace case CLASS::TAGDEAD: assert(!"ERROR: destructed tagged union used"); // Evil hack: two for loops, the inner stops the outer after it's done. #define TU_ARMA(TAG, NAME) break; case ::std::remove_reference<decltype(tu_match_hdr2_v)>::type::TAG_##TAG: for(bool tu_lc = true; tu_lc; tu_lc=false) for(auto& NAME = tu_match_hdr2_v.as_##TAG(); (void)NAME, tu_lc; tu_lc=false) diff --git a/src/macro_rules/eval.cpp b/src/macro_rules/eval.cpp index fd0f5648..03c5609c 100644 --- a/src/macro_rules/eval.cpp +++ b/src/macro_rules/eval.cpp @@ -93,60 +93,43 @@ private: CapturedVal& get_cap(const ::std::vector<unsigned int>& iterations, unsigned int name_idx); }; -/// Simple pattern entry for macro_rules! arm patterns -TAGGED_UNION( SimplePatEnt, End, - // End of the pattern stream - (End, struct{}), - // Expect a specific token - (ExpectTok, Token), - // Expect a pattern match - (ExpectPat, struct { - MacroPatEnt::Type type; - unsigned int idx; - }), - // Compare the head of the input stream and poke the pattern stream - (IfTok, struct { - bool is_equal; - Token tok; - }), - // Compare the head of the input stream and poke the pattern stream - (IfPat, struct { - bool is_equal; - MacroPatEnt::Type type; - }) - ); class MacroPatternStream { - const ::std::vector<MacroPatEnt>* m_pattern; - // Position in each nested pattern - ::std::vector<unsigned int> m_pos; + const ::std::vector<SimplePatEnt>& m_simple_ents; + size_t m_cur_pos; + + bool m_last_was_cond; + bool m_condition_met; + ::std::vector<bool> m_condition_history; + + const ::std::vector<bool>* m_condition_replay; + size_t m_condition_replay_pos; + // Iteration index of each active loop level ::std::vector<unsigned int> m_loop_iterations; - ::std::vector<SimplePatEnt> m_stack; - unsigned int m_skip_count; - - SimplePatEnt m_peek_cache; bool m_peek_cache_valid = false; + const SimplePatEnt* m_peek_cache; - bool m_break_if_not = false; - bool m_condition_fired = false; public: - MacroPatternStream(const ::std::vector<MacroPatEnt>& pattern): - m_pattern(&pattern), - m_pos({0}) + MacroPatternStream(const ::std::vector<SimplePatEnt>& ents, const ::std::vector<bool>* condition_replay=nullptr): + m_simple_ents(ents), + m_cur_pos(0), + m_last_was_cond(false), + m_condition_replay(condition_replay), + m_condition_replay_pos(0) { } /// Get the next pattern entry - SimplePatEnt next(); + const SimplePatEnt& next(); const SimplePatEnt& peek() { if( !m_peek_cache_valid ) { - m_peek_cache = next(); + m_peek_cache = &next(); m_peek_cache_valid = true; } - return m_peek_cache; + return *m_peek_cache; } /// Inform the stream that the `if` rule that was just returned succeeded @@ -156,16 +139,9 @@ public: return m_loop_iterations; } -private: - SimplePatEnt emit_loop_start(const MacroPatEnt& pat); - - SimplePatEnt emit_seq(SimplePatEnt v1, SimplePatEnt v2) { - assert( m_stack.empty() ); - m_stack.push_back( mv$(v2) ); - return v1; + ::std::vector<bool> take_history() { + return ::std::move(m_condition_history); } - - void break_loop(); }; // === Prototypes === @@ -312,223 +288,74 @@ bool ParameterMappings::dec_count(const ::std::vector<unsigned int>& iterations, // MacroPatternStream // ------------------------------------ -SimplePatEnt MacroPatternStream::next() +const SimplePatEnt& MacroPatternStream::next() { - TRACE_FUNCTION_F("m_pos=[" << m_pos << "], m_stack.size()=" << m_stack.size()); - assert(m_pos.size() >= 1); - if( m_peek_cache_valid ) { m_peek_cache_valid = false; - return mv$(m_peek_cache); - } - - // Pop off the generation stack - if( ! m_stack.empty() ) { - auto rv = mv$(m_stack.back()); - m_stack.pop_back(); - return rv; - } - - if( m_break_if_not && ! m_condition_fired ) { - // Break out of the current loop then continue downwards. - break_loop(); - } - - m_skip_count = 0; - m_break_if_not = false; - m_condition_fired = false; - - const MacroPatEnt* parent_pat = nullptr; - decltype(m_pattern) parent_ents = nullptr; - const auto* ents = m_pattern; - for(unsigned int i = 0; i < m_pos.size() - 1; i ++) - { - auto idx = m_pos[i]; - //DEBUG(i << " idx=" << idx << " ents->size()=" << ents->size()); - assert( idx < ents->size() ); - assert( (*ents)[idx].type == MacroPatEnt::PAT_LOOP ); - parent_pat = &(*ents)[idx]; - parent_ents = ents; - ents = &parent_pat->subpats; + return *m_peek_cache; } - DEBUG( (m_pos.size()-1) << " " << m_pos.back() << " / " << ents->size()); - if( m_pos.back() < ents->size() ) + for(;;) { - const auto& pat = ents->at( m_pos.back() ); - - if( pat.type == MacroPatEnt::PAT_LOOP ) { - DEBUG("Enter " << pat); - // Increase level, return entry control - m_pos.push_back( 0 ); - m_loop_iterations.push_back( 0 ); - - if( pat.name == "*" ) - { - return emit_loop_start(pat); - } - else - { - // If the name is "+" then this is should always be entered, so just recurse - assert( pat.name == "+" ); - return next(); - } - } - else if( pat.type == MacroPatEnt::PAT_TOKEN ) { - m_pos.back() += 1; - return SimplePatEnt::make_ExpectTok( pat.tok.clone() ); - } - else { - m_pos.back() += 1; - return SimplePatEnt::make_ExpectPat({ pat.type, pat.name_index }); - } - } - else - { - if( parent_pat ) + // If not replaying, and the previous entry was a conditional, record the result of that conditional + if( !m_condition_replay && m_last_was_cond ) { - // Last entry in a loop - return the breakout control - // - Reset the loop back to the start - m_pos.back() = 0; - m_loop_iterations.back() += 1; - - // - Emit break conditions - if( parent_pat->tok == TOK_NULL ) { - // Loop separator is TOK_NULL - get the first token of the loop and use it. - // - This shares the code that controls if a loop is entered - DEBUG("No separator"); - return emit_loop_start(*parent_pat); - } - else { - // If the next token is the same as the separator emit: Expect(separator), ShouldEnter - - auto i = m_pos[ m_pos.size() - 2 ] + 1; - if( i < parent_ents->size() ) - { - DEBUG("sep = " << parent_pat->tok << ", next = " << parent_ents->at(i) << ", start = " << ents->at(0)); - if( parent_ents->at(i).type == MacroPatEnt::PAT_TOKEN && parent_pat->tok == parent_ents->at(i).tok ) - { - DEBUG("MAGIC: Reverse conditions for case where sep==next"); - // > Mark to skip the next token after the end of the loop - m_skip_count = 1; - // - Yeild `EXPECT sep` then the entry condition of this loop - auto pat = emit_loop_start(*parent_pat); - m_stack.push_back( mv$(pat) ); - return SimplePatEnt::make_ExpectTok( parent_pat->tok.clone() ); - } - } - // - Yeild `IF NOT sep BREAK` and `EXPECT sep` - DEBUG("Separator = " << parent_pat->tok); - return emit_seq( - SimplePatEnt::make_IfTok({ false, parent_pat->tok.clone() }), - SimplePatEnt::make_ExpectTok( parent_pat->tok.clone() ) - ); - } + m_condition_history.push_back(m_condition_met); } - else - { - // End of the input sequence - return SimplePatEnt::make_End({}); + m_last_was_cond = false; + // End of list? return End entry + if( m_cur_pos == m_simple_ents.size() ) { + static SimplePatEnt END = SimplePatEnt::make_End({}); + return END; } - } -} - -namespace { - void get_loop_entry_pats(const MacroPatEnt& pat, ::std::vector<const MacroPatEnt*>& entry_pats) - { - assert( pat.type == MacroPatEnt::PAT_LOOP ); - - // If this pattern is a loop, get the entry concrete patterns for it - // - Otherwise, just - unsigned int i = 0; - while( i < pat.subpats.size() && pat.subpats[i].type == MacroPatEnt::PAT_LOOP ) + const auto& cur_ent = m_simple_ents[m_cur_pos]; + // If replaying, and this is a conditional + if( m_condition_replay && cur_ent.is_If() ) { - const auto& cur_pat = pat.subpats[i]; - bool is_optional = (cur_pat.name == "*"); - - get_loop_entry_pats(cur_pat, entry_pats); - - if( !is_optional ) - { - // Non-optional loop, MUST be entered, so return after recursing - return ; - } - // Optional, so continue the loop. - i ++; - } - - // First non-loop pattern - if( i < pat.subpats.size() ) - { - entry_pats.push_back( &pat.subpats[i] ); - } - } -} // namespace - -/// Returns (and primes m_stack) the rules to control the start of a loop -/// This code emits rules to break out of the loop if the entry conditions are not met -SimplePatEnt MacroPatternStream::emit_loop_start(const MacroPatEnt& pat) -{ - // Find the next non-loop pattern to control if this loop should be entered - ::std::vector<const MacroPatEnt*> m_entry_pats; - - get_loop_entry_pats(pat, m_entry_pats); - DEBUG("m_entry_pats = [" << FMT_CB(ss, for(const auto* p : m_entry_pats) { ss << *p << ","; }) << "]"); - - struct H { - static SimplePatEnt get_if(bool flag, const MacroPatEnt& mpe) { - if( mpe.type == MacroPatEnt::PAT_TOKEN ) - return SimplePatEnt::make_IfTok({ flag, mpe.tok.clone() }); + // Skip the conditional (following its target or just skipping over) + if( (*m_condition_replay)[m_condition_replay_pos++] ) + m_cur_pos = cur_ent.as_If().jump_target; else - return SimplePatEnt::make_IfPat({ flag, mpe.type }); + m_cur_pos += 1; + continue ; } - }; - - const auto* entry_pat = m_entry_pats.back(); - m_entry_pats.pop_back(); - if( m_entry_pats.size() > 0 ) - { - DEBUG("Multiple entry possibilities, reversing condition"); - m_break_if_not = true; - for(auto pat_ptr : m_entry_pats) - { - m_stack.push_back( H::get_if(true, *pat_ptr) ); + m_cur_pos += 1; + TU_MATCH_HDRA( (cur_ent), {) + default: + if( cur_ent.is_If() ) + { + m_last_was_cond = true; + m_condition_met = false; + } + return cur_ent; + TU_ARMA(End, _e) + BUG(Span(), "Unexpected End"); + TU_ARMA(Jump, e) + m_cur_pos = e.jump_target; + TU_ARMA(LoopStart, _e) { + m_loop_iterations.push_back(0); + } + TU_ARMA(LoopNext, _e) { + m_loop_iterations.back() += 1; + } + TU_ARMA(LoopEnd, _e) { + m_loop_iterations.pop_back(); + } } - return H::get_if(true, *entry_pat); - } - else - { - // Emit an if based on it - return H::get_if(false, *entry_pat); } } void MacroPatternStream::if_succeeded() { - if( m_break_if_not ) - { - m_condition_fired = true; - } - else - { - break_loop(); + assert(m_cur_pos > 0); + assert(m_last_was_cond); + TU_MATCH_HDRA( (m_simple_ents[m_cur_pos-1]), {) + default: + BUG(Span(), "Unexpected " << m_simple_ents[m_cur_pos-1]); + TU_ARMA(If, e) + m_cur_pos = e.jump_target; } -} -void MacroPatternStream::break_loop() -{ - DEBUG("- Break out of loop, m_skip_count = " << m_skip_count); - // Break out of an active loop (pop level and increment parent level) - assert( m_pos.size() >= 1 ); - // - This should never be called when on the top level - assert( m_pos.size() != 1 ); - - // HACK: Clear the stack if an if succeeded - m_stack.clear(); - - m_pos.pop_back(); - m_pos.back() += 1 + m_skip_count; - - m_loop_iterations.pop_back(); + m_condition_met = true; } // ---------------------------------------------------------------- @@ -616,73 +443,6 @@ namespace { } } -// TODO: This shouldn't exist, and can false-positives -// - Ideally, this would use consume_from_frag (which takes a clone-able input) -bool Macro_TryPatternCap(TokenStream& lex, MacroPatEnt::Type type) -{ - switch(type) - { - case MacroPatEnt::PAT_TOKEN: - BUG(lex.point_span(), ""); - case MacroPatEnt::PAT_LOOP: - BUG(lex.point_span(), ""); - case MacroPatEnt::PAT_BLOCK: - return LOOK_AHEAD(lex) == TOK_BRACE_OPEN || LOOK_AHEAD(lex) == TOK_INTERPOLATED_BLOCK; - case MacroPatEnt::PAT_IDENT: - return LOOK_AHEAD(lex) == TOK_IDENT || is_reserved_word(LOOK_AHEAD(lex)); - case MacroPatEnt::PAT_TT: - switch(LOOK_AHEAD(lex)) - { - case TOK_EOF: - case TOK_PAREN_CLOSE: - case TOK_BRACE_CLOSE: - case TOK_SQUARE_CLOSE: - return false; - default: - return true; - } - case MacroPatEnt::PAT_PATH: - return is_token_path( LOOK_AHEAD(lex) ); - case MacroPatEnt::PAT_TYPE: - return is_token_type( LOOK_AHEAD(lex) ); - case MacroPatEnt::PAT_EXPR: - return is_token_expr( LOOK_AHEAD(lex) ); - case MacroPatEnt::PAT_STMT: - return is_token_stmt( LOOK_AHEAD(lex) ); - case MacroPatEnt::PAT_PAT: - return is_token_pat( LOOK_AHEAD(lex) ); - case MacroPatEnt::PAT_META: - return LOOK_AHEAD(lex) == TOK_IDENT || LOOK_AHEAD(lex) == TOK_INTERPOLATED_META; - case MacroPatEnt::PAT_ITEM: - return is_token_item( LOOK_AHEAD(lex) ) || LOOK_AHEAD(lex) == TOK_IDENT; // Huh? Ident? - case MacroPatEnt::PAT_VIS: - return is_token_vis( LOOK_AHEAD(lex) ); - case MacroPatEnt::PAT_LIFETIME: - return LOOK_AHEAD(lex) == TOK_LIFETIME; - } - BUG(lex.point_span(), "Fell through"); -} -bool Macro_TryPattern(TokenStream& lex, const MacroPatEnt& pat) -{ - DEBUG("pat = " << pat); - Token tok; - switch(pat.type) - { - case MacroPatEnt::PAT_TOKEN: { - GET_TOK(tok, lex); - bool rv = (tok == pat.tok); - PUTBACK(tok, lex); - return rv; - } - case MacroPatEnt::PAT_LOOP: - if( pat.name == "*" ) - return true; - return Macro_TryPattern(lex, pat.subpats[0]); - default: - return Macro_TryPatternCap(lex, pat.type); - } -} - InterpolatedFragment Macro_HandlePatternCap(TokenStream& lex, MacroPatEnt::Type type) { Token tok; @@ -761,7 +521,6 @@ InterpolatedFragment Macro_HandlePatternCap(TokenStream& lex, MacroPatEnt::Type return ::std::unique_ptr<TokenStream>( ret_ptr ); } -#if 1 // Collection of functions that consume a specific fragment type from a token stream // - Does very loose consuming namespace @@ -1887,7 +1646,7 @@ namespace { case MacroPatEnt::PAT_TOKEN: case MacroPatEnt::PAT_LOOP: - throw ""; + BUG(Span(), "Encountered " << type << " in consume_from_frag");; case MacroPatEnt::PAT_BLOCK: if( lex.next() == TOK_BRACE_OPEN ) { return consume_tt(lex); @@ -1966,13 +1725,7 @@ unsigned int Macro_InvokeRules_MatchPattern(const Span& sp, const MacroRules& ru { TRACE_FUNCTION; - struct ActiveArm { - unsigned int index; - MacroPatternStream pat_stream; - TokenStreamRO in_stream; - }; - - ::std::vector<size_t> matches; + ::std::vector< ::std::pair<size_t, ::std::vector<bool>> > matches; for(size_t i = 0; i < rules.m_rules.size(); i ++) { auto lex = TokenStreamRO(input); @@ -1981,29 +1734,38 @@ unsigned int Macro_InvokeRules_MatchPattern(const Span& sp, const MacroRules& ru bool fail = false; for(;;) { - auto pat = arm_stream.next(); + const auto& pat = arm_stream.next(); + DEBUG(i << " " << pat); if(pat.is_End()) { - DEBUG(i << " End"); if( lex.next() != TOK_EOF ) fail = true; break; } - else if( const auto* e = pat.opt_IfPat() ) + else if( const auto* e = pat.opt_If() ) { - DEBUG(i << " IfPat(" << (e->is_equal ? "==" : "!=") << " ?" << e->type << ")"); auto lc = lex.clone(); - if( consume_from_frag(lc, e->type) == e->is_equal ) + bool rv = true; + for(const auto& check : e->ents) { - DEBUG("- Succeeded"); - arm_stream.if_succeeded(); + if( check.ty != MacroPatEnt::PAT_TOKEN ) { + if( !consume_from_frag(lc, check.ty) ) + { + rv = false; + break; + } + } + else + { + if( lc.next_tok() != check.tok ) + { + rv = false; + break; + } + lc.consume(); + } } - } - else if( const auto* e = pat.opt_IfTok() ) - { - DEBUG(i << " IfTok(" << (e->is_equal ? "==" : "!=") << " ?" << e->tok << ")"); - const auto& tok = lex.next_tok(); - if( (tok == e->tok) == e->is_equal ) + if( rv == e->is_equal ) { DEBUG("- Succeeded"); arm_stream.if_succeeded(); @@ -2038,7 +1800,7 @@ unsigned int Macro_InvokeRules_MatchPattern(const Span& sp, const MacroRules& ru if( ! fail ) { - matches.push_back(i); + matches.push_back( ::std::make_pair(i, arm_stream.take_history()) ); DEBUG(i << " MATCHED"); } else @@ -2057,12 +1819,13 @@ unsigned int Macro_InvokeRules_MatchPattern(const Span& sp, const MacroRules& ru // yay! // NOTE: There can be multiple arms active, take the first. - auto i = matches[0]; + auto i = matches[0].first; + const auto& history = matches[0].second; DEBUG("Evalulating arm " << i); auto lex = TTStreamO(sp, mv$(input)); SET_MODULE(lex, mod); - auto arm_stream = MacroPatternStream(rules.m_rules[i].m_pattern); + auto arm_stream = MacroPatternStream(rules.m_rules[i].m_pattern, &history); struct Capture { unsigned int binding_idx; @@ -2074,30 +1837,15 @@ unsigned int Macro_InvokeRules_MatchPattern(const Span& sp, const MacroRules& ru for(;;) { - auto pat = arm_stream.next(); + const auto& pat = arm_stream.next(); + DEBUG(i << " " << pat); if(pat.is_End()) { break; } - else if( const auto* e = pat.opt_IfPat() ) - { - DEBUG(i << " IfPat(" << (e->is_equal ? "==" : "!=") << " ?" << e->type << ")"); - if( Macro_TryPatternCap(lex, e->type) == e->is_equal ) - { - DEBUG("- Succeeded"); - arm_stream.if_succeeded(); - } - } - else if( const auto* e = pat.opt_IfTok() ) + else if( pat.is_If() ) { - DEBUG(i << " IfTok(" << (e->is_equal ? "==" : "!=") << " ?" << e->tok << ")"); - auto tok = lex.getToken(); - if( (tok == e->tok) == e->is_equal ) - { - DEBUG("- Succeeded"); - arm_stream.if_succeeded(); - } - lex.putback( mv$(tok) ); + BUG(sp, "Unexpected If pattern during final matching - " << pat); } else if( const auto* e = pat.opt_ExpectTok() ) { @@ -2132,305 +1880,6 @@ unsigned int Macro_InvokeRules_MatchPattern(const Span& sp, const MacroRules& ru return i; } } -#else -unsigned int Macro_InvokeRules_MatchPattern(const Span& sp, const MacroRules& rules, TokenTree input, AST::Module& mod, ParameterMappings& bound_tts) -{ - TRACE_FUNCTION; - Span sp;// = input.span(); - - struct Capture { - unsigned int binding_idx; - ::std::vector<unsigned int> iterations; - unsigned int cap_idx; - }; - struct ActiveArm { - unsigned int index; - ::std::vector<Capture> captures; - MacroPatternStream stream; - }; - // - List of active rules (rules that haven't yet failed) - ::std::vector< ActiveArm > active_arms; - active_arms.reserve( rules.m_rules.size() ); - for(unsigned int i = 0; i < rules.m_rules.size(); i ++) - { - active_arms.push_back( ActiveArm { i, {}, MacroPatternStream(rules.m_rules[i].m_pattern) } ); - } - - // - List of captured values - ::std::vector<InterpolatedFragment> captures; - - TTStreamO lex(sp, mv$(input) ); - SET_MODULE(lex, mod); - while(true) - { - DEBUG("--- ---"); - // 1. Get concrete patterns for all active rules (i.e. no If* patterns) - ::std::vector<SimplePatEnt> arm_pats; - for(auto& arm : active_arms) - { - auto idx = arm.index; - SimplePatEnt pat; - // Consume all If* rules - do - { - pat = arm.stream.next(); - TU_IFLET( SimplePatEnt, pat, IfPat, e, - DEBUG(idx << " IfPat(" << (e.is_equal ? "==" : "!=") << " ?" << e.type << ")"); - if( Macro_TryPatternCap(lex, e.type) == e.is_equal ) - { - DEBUG("- Succeeded"); - arm.stream.if_succeeded(); - } - ) - else TU_IFLET( SimplePatEnt, pat, IfTok, e, - DEBUG(idx << " IfTok(" << (e.is_equal ? "==" : "!=") << " ?" << e.tok << ")"); - auto tok = lex.getToken(); - if( (tok == e.tok) == e.is_equal ) - { - DEBUG("- Succeeded"); - arm.stream.if_succeeded(); - } - lex.putback( mv$(tok) ); - ) - else { - break; - } - } while( pat.is_IfPat() || pat.is_IfTok() ); - - TU_MATCH( SimplePatEnt, (pat), (e), - (IfPat, BUG(sp, "IfTok unexpected here");), - (IfTok, BUG(sp, "IfTok unexpected here");), - (ExpectTok, - DEBUG(idx << " ExpectTok(" << e << ")"); - ), - (ExpectPat, - DEBUG(idx << " ExpectPat(" << e.type << " => $" << e.idx << ")"); - ), - (End, - DEBUG(idx << " End"); - ) - ) - arm_pats.push_back( mv$(pat) ); - } - assert( arm_pats.size() == active_arms.size() ); - - // 2. Prune imposible arms - for(unsigned int i = 0, j = 0; i < arm_pats.size(); ) - { - auto idx = active_arms[i].index; - const auto& pat = arm_pats[i]; - bool fail = false; - - TU_MATCH( SimplePatEnt, (pat), (e), - (IfPat, BUG(sp, "IfTok unexpected here");), - (IfTok, BUG(sp, "IfTok unexpected here");), - (ExpectTok, - auto tok = lex.getToken(); - DEBUG(j<<"="<<idx << " ExpectTok(" << e << ") == " << tok); - fail = !(tok == e); - lex.putback( mv$(tok) ); - ), - (ExpectPat, - DEBUG(j<<"="<<idx << " ExpectPat(" << e.type << " => $" << e.idx << ")"); - fail = !Macro_TryPatternCap(lex, e.type); - ), - (End, - DEBUG(j<<"="<<idx << " End"); - fail = !(lex.lookahead(0) == TOK_EOF); - ) - ) - if( fail ) { - DEBUG("- Failed arm " << active_arms[i].index); - arm_pats.erase( arm_pats.begin() + i ); - active_arms.erase( active_arms.begin() + i ); - } - else { - i ++; - } - j ++; - } - - if( arm_pats.size() == 0 ) { - auto tok = lex.getToken(); - ERROR(tok.get_pos(), E0000, "No rules expected " << tok); - } - - // 3. If there is a token pattern in the list, take that arm (and any other token arms) - const SimplePatEnt* tok_pat = nullptr; - unsigned int ident_pat_idx = arm_pats.size(); - bool has_non_ident_pat = false; - for( unsigned int i = 0; i < arm_pats.size(); i ++ ) - { - const auto& pat = arm_pats[i]; - TU_IFLET(SimplePatEnt, pat, ExpectTok, e, - if( tok_pat ) { - if( e != tok_pat->as_ExpectTok() ) - ERROR(lex.getPosition(), E0000, "Incompatible macro arms - " << tok_pat->as_ExpectTok() << " vs " << e); - } - else { - tok_pat = &pat; - } - ) - else TU_IFLET(SimplePatEnt, pat, ExpectPat, e, - if( e.type == MacroPatEnt::PAT_IDENT ) { - ident_pat_idx = i; - } - else { - has_non_ident_pat = true; - } - ) - } - - if( tok_pat ) - { - auto tok = lex.getToken(); - const auto& e = tok_pat->as_ExpectTok(); - // NOTE: This should never fail. - if( tok != e ) { - ERROR(lex.getPosition(), E0000, "Unexpected " << tok << ", expected " << e); - } - } - else - { - if( has_non_ident_pat && ident_pat_idx < arm_pats.size() ) - { - // For all :ident patterns present, check the next rule. - // - If this rule would fail, remove the arm. - bool ident_rule_kept = false; - for( unsigned int i = 0; i < arm_pats.size(); ) - { - bool discard = false; - const auto& pat = arm_pats[i]; - const auto& e = pat.as_ExpectPat(); - if( e.type == MacroPatEnt::PAT_IDENT ) - { - const auto& next = active_arms[i].stream.peek(); - TU_MATCHA( (next), (ne), - (IfPat, TODO(sp, "Handle IfPat following a conflicting :ident");), - (IfTok, TODO(sp, "IfTok following a conflicting :ident");), - (ExpectTok, - if( ne.type() != lex.lookahead(1) ) { - DEBUG("Discard active arm " << i << " due to next token mismatch"); - discard = true; - } - else { - ident_rule_kept = true; - } - ), - (ExpectPat, - TODO(sp, "Handle ExpectPat following a conflicting :ident"); - ), - (End, TODO(sp, "Handle End following a conflicting :ident"); ) - ) - } - - if( discard ) { - arm_pats.erase( arm_pats.begin() + i ); - active_arms.erase( active_arms.begin() + i ); - } - else { - ++ i; - } - } - - // If there are any remaining ident rules, erase the non-ident rules. - if( ident_rule_kept ) { - // If no rules were discarded, remove the non-ident rules - for( unsigned int i = 0; i < arm_pats.size(); ) - { - if( arm_pats[i].as_ExpectPat().type != MacroPatEnt::PAT_IDENT ) { - arm_pats.erase( arm_pats.begin() + i ); - active_arms.erase( active_arms.begin() + i ); - } - else { - ++ i; - } - } - } - assert(arm_pats.size() > 0); - assert(arm_pats.size() == active_arms.size()); - } - - // 3. Check that all remaining arms are the same pattern. - const auto& active_pat = arm_pats[0]; - for(unsigned int i = 1; i < arm_pats.size(); i ++) - { - if( active_pat.tag() != arm_pats[i].tag() ) { - ERROR(lex.getPosition(), E0000, "Incompatible macro arms " - << "- " << active_arms[0].index << " SimplePatEnt::" << active_pat.tag_str() - << " vs " << active_arms[i].index<< " SimplePatEnt::" << arm_pats[i].tag_str() - ); - } - TU_MATCH( SimplePatEnt, (active_pat, arm_pats[i]), (e1, e2), - (IfPat, BUG(sp, "IfPat unexpected here");), - (IfTok, BUG(sp, "IfTok unexpected here");), - (ExpectTok, - BUG(sp, "ExpectTok unexpected here"); - ), - (ExpectPat, - // Can fail, as :expr and :stmt overlap in their trigger set - if( e1.type != e2.type ) { - ERROR(lex.getPosition(), E0000, "Incompatible macro arms - mismatched patterns " << e1.type << " and " << e2.type); - } - ), - (End, - ) - ) - } - - // 4. Apply patterns. - TU_MATCH( SimplePatEnt, (arm_pats[0]), (e), - (End, - auto tok = lex.getToken(); - if( tok.type() != TOK_EOF ) { - ERROR(lex.getPosition(), E0000, "Unexpected " << tok << ", expected TOK_EOF"); - } - // NOTE: There can be multiple arms active, take the first. - for(const auto& cap : active_arms[0].captures) - { - bound_tts.insert( cap.binding_idx, cap.iterations, mv$(captures[cap.cap_idx]) ); - } - return active_arms[0].index; - ), - (IfPat, BUG(sp, "IfPat unexpected here");), - (IfTok, BUG(sp, "IfTok unexpected here");), - (ExpectTok, - BUG(sp, "ExpectTok should have been handled already"); - ), - (ExpectPat, - struct H { - static bool is_prefix(const ::std::vector<unsigned>& needle, const ::std::vector<unsigned>& haystack) { - if( needle.size() > haystack.size() ) { - return false; - } - else { - for(unsigned int i = 0; i < needle.size(); i ++) { - if(needle[i] != haystack[i]) - return false; - } - return true; - } - } - }; - - auto cap = Macro_HandlePatternCap(lex, e.type); - - unsigned int cap_idx = captures.size(); - captures.push_back( mv$(cap) ); - for(unsigned int i = 0; i < active_arms.size(); i ++) - { - auto& arm = active_arms[i]; - const auto& pat_e = arm_pats[i].as_ExpectPat(); - arm.captures.push_back( Capture { pat_e.idx, arm.stream.get_loop_iters(), cap_idx } ); - } - ) - ) - } - - // Keep looping - breakout is handled in 'End' above - } -} -#endif void Macro_InvokeRules_CountSubstUses(ParameterMappings& bound_tts, const ::std::vector<MacroExpansionEnt>& contents) { diff --git a/src/macro_rules/macro_rules.hpp b/src/macro_rules/macro_rules.hpp index 29c4f93d..02b302d8 100644 --- a/src/macro_rules/macro_rules.hpp +++ b/src/macro_rules/macro_rules.hpp @@ -18,6 +18,7 @@ #include <set> class MacroExpander; +class SimplePatEnt; TAGGED_UNION(MacroExpansionEnt, Token, // TODO: have a "raw" stream instead of just tokens @@ -42,6 +43,7 @@ struct MacroPatEnt { ::std::string name; unsigned int name_index = 0; + // TODO: Include a point span for the token? Token tok; ::std::vector<MacroPatEnt> subpats; @@ -95,6 +97,39 @@ struct MacroPatEnt friend ::std::ostream& operator<<(::std::ostream& os, const MacroPatEnt::Type& x); }; +struct SimplePatIfCheck +{ + MacroPatEnt::Type ty; // If PAT_TOKEN, token is checked + Token tok; +}; + +/// Simple pattern entry for macro_rules! arm patterns +TAGGED_UNION( SimplePatEnt, End, + // End of the pattern stream (expects EOF, and terminates the match process) + (End, struct{}), + (LoopStart, struct{}), + (LoopNext, struct{}), + (LoopEnd, struct{}), + (Jump, struct { + size_t jump_target; + }), + // Expect a specific token, erroring/failing the arm if nt met + (ExpectTok, Token), + // Expect a pattern match + (ExpectPat, struct { + MacroPatEnt::Type type; + unsigned int idx; + }), + // Compare the head of the input stream and poke the pattern stream + (If, struct { + bool is_equal; + size_t jump_target; + ::std::vector<SimplePatIfCheck> ents; + }) + ); + +extern::std::ostream& operator<<(::std::ostream& os, const SimplePatEnt& x); + /// An expansion arm within a macro_rules! blcok struct MacroRulesArm { @@ -102,14 +137,15 @@ struct MacroRulesArm ::std::vector< ::std::string> m_param_names; /// Patterns - ::std::vector<MacroPatEnt> m_pattern; + ::std::vector<SimplePatEnt> m_pattern; /// Rule contents ::std::vector<MacroExpansionEnt> m_contents; + ~MacroRulesArm(); MacroRulesArm() {} - MacroRulesArm(::std::vector<MacroPatEnt> pattern, ::std::vector<MacroExpansionEnt> contents): + MacroRulesArm(::std::vector<SimplePatEnt> pattern, ::std::vector<MacroExpansionEnt> contents): m_pattern( mv$(pattern) ), m_contents( mv$(contents) ) {} diff --git a/src/macro_rules/mod.cpp b/src/macro_rules/mod.cpp index 04f1577a..e9ecb504 100644 --- a/src/macro_rules/mod.cpp +++ b/src/macro_rules/mod.cpp @@ -182,7 +182,7 @@ MacroRulesPtr::~MacroRulesPtr() switch(x.type) { case MacroPatEnt::PAT_TOKEN: os << "=" << x.tok; break; - case MacroPatEnt::PAT_LOOP: os << "loop w/ " << x.tok << " [" << x.subpats << "]"; break; + case MacroPatEnt::PAT_LOOP: os << "loop" << x.name << " w/ " << x.tok << " [" << x.subpats << "]"; break; default: os << "$" << x.name << ":"; switch(x.type) @@ -228,6 +228,37 @@ MacroRulesPtr::~MacroRulesPtr() return os; } +::std::ostream& operator<<(::std::ostream& os, const SimplePatEnt& x) +{ + TU_MATCH_HDRA( (x), { ) + TU_ARMA(End, _e) os << "End"; + TU_ARMA(LoopStart, _e) os << "LoopStart"; + TU_ARMA(LoopNext, _e) os << "LoopNext"; + TU_ARMA(LoopEnd, _e) os << "LoopEnd"; + TU_ARMA(Jump, e) { + os << "Jump(->" << e.jump_target << ")"; + } + TU_ARMA(ExpectTok, e) { + os << "Expect(" << e << ")"; + } + TU_ARMA(ExpectPat, e) { + os << "Expect($" << e.idx << " = " << e.type << ")"; + } + TU_ARMA(If, e) { + os << "If(" << (e.is_equal ? "=" : "!=") << "["; + for(const auto& p : e.ents) { + if(p.ty == MacroPatEnt::PAT_TOKEN) + os << p.tok; + else + os << p.ty; + os << ", "; + } + os << "] ->" << e.jump_target << ")"; + } + } + return os; +} + ::std::ostream& operator<<(::std::ostream& os, const MacroExpansionEnt& x) { TU_MATCH( MacroExpansionEnt, (x), (e), @@ -252,4 +283,7 @@ MacroRulesPtr::~MacroRulesPtr() MacroRules::~MacroRules() { } +MacroRulesArm::~MacroRulesArm() +{ +} diff --git a/src/macro_rules/parse.cpp b/src/macro_rules/parse.cpp index 8af9bd07..689a81f1 100644 --- a/src/macro_rules/parse.cpp +++ b/src/macro_rules/parse.cpp @@ -12,13 +12,21 @@ #include "pattern_checks.hpp" MacroRulesPtr Parse_MacroRules(TokenStream& lex); +namespace { + ::std::vector<SimplePatEnt> macro_pattern_to_simple(const Span& sp, const ::std::vector<MacroPatEnt>& pattern); +} /// A rule within a macro_rules! blcok class MacroRule { public: ::std::vector<MacroPatEnt> m_pattern; + Span m_pat_span; ::std::vector<MacroExpansionEnt> m_contents; + + MacroRule() {} + MacroRule(MacroRule&&) = default; + MacroRule(const MacroRule&) = delete; }; /// Parse the pattern of a macro_rules! arm @@ -261,7 +269,11 @@ MacroRule Parse_MacroRules_Var(TokenStream& lex) } // - Pattern entries ::std::vector< ::std::string> names; - rule.m_pattern = Parse_MacroRules_Pat(lex, tok.type(), close, names); + { + auto ps = lex.start_span(); + rule.m_pattern = Parse_MacroRules_Pat(lex, tok.type(), close, names); + rule.m_pat_span = lex.end_span(ps); + } GET_CHECK_TOK(tok, lex, TOK_FATARROW); @@ -280,189 +292,9 @@ MacroRule Parse_MacroRules_Var(TokenStream& lex) return rule; } -bool patterns_are_same(const Span& sp, const MacroPatEnt& left, const MacroPatEnt& right) -{ - if( left.type > right.type ) - return patterns_are_same(sp, right, left); - - //if( left.name != right.name ) { - // TODO(sp, "Handle different binding names " << left << " != " << right); - //} - - // NOTE: left.type <= right.type - switch(right.type) - { - case MacroPatEnt::PAT_TOKEN: - assert( left.type == MacroPatEnt::PAT_TOKEN ); - return right.tok == left.tok; - case MacroPatEnt::PAT_LOOP: - switch(left.type) - { - case MacroPatEnt::PAT_TOKEN: - // - Check for compatibility, but these two don't match - if( patterns_are_same(sp, left, right.subpats.at(0)) == true ) - ERROR(sp, E0000, "Incompatible use of loop with matching non-loop"); - return false; - case MacroPatEnt::PAT_LOOP: - TODO(sp, "patterns_are_same - PAT_LOOP"); - default: - assert( !"" ); - } - - case MacroPatEnt::PAT_TT: - if( left.type == right.type ) - return true; - ERROR(sp, E0000, "Incompatible macro fragments - " << right << " used with " << left); - break; - - case MacroPatEnt::PAT_PAT: - switch(left.type) - { - case MacroPatEnt::PAT_TOKEN: - // - If this token is a valid pattern token, error - if( is_token_pat(left.tok.type()) ) - ERROR(sp, E0000, "Incompatible macro fragments - " << right << " used with " << left); - return false; - case MacroPatEnt::PAT_PAT: - return true; - default: - ERROR(sp, E0000, "Incompatible macro fragments - " << right << " used with " << left); - } - break; - // `:ident` - Compatible with just other tokens - case MacroPatEnt::PAT_IDENT: - switch(left.type) - { - case MacroPatEnt::PAT_TOKEN: - if( left.tok.type() == TOK_IDENT ) - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - return false; - case MacroPatEnt::PAT_IDENT: - return true; - default: - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - } - case MacroPatEnt::PAT_PATH: - switch(left.type) - { - case MacroPatEnt::PAT_TOKEN: - if( is_token_path(left.tok.type()) ) - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - return false; - case MacroPatEnt::PAT_PATH: - return true; - default: - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - } - case MacroPatEnt::PAT_TYPE: - switch(left.type) - { - case MacroPatEnt::PAT_TOKEN: - if( is_token_type(left.tok.type()) ) - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - return false; - case MacroPatEnt::PAT_TYPE: - return true; - default: - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - } - case MacroPatEnt::PAT_EXPR: - switch(left.type) - { - case MacroPatEnt::PAT_TOKEN: - if( is_token_expr(left.tok.type()) ) - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - return false; - // TODO: Allow a loop starting with an expr? - // - Consume, add split based on loop condition or next arm - // - Possible problem with binding levels. - case MacroPatEnt::PAT_EXPR: - return true; - default: - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - } - case MacroPatEnt::PAT_STMT: - switch(left.type) - { - case MacroPatEnt::PAT_TOKEN: - if( is_token_stmt(left.tok.type()) ) - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - return false; - case MacroPatEnt::PAT_STMT: - return true; - default: - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - } - // Block - Expects '{' - Compatible with everything but a literal '{' - case MacroPatEnt::PAT_BLOCK: - switch(left.type) - { - case MacroPatEnt::PAT_TOKEN: - if( left.tok.type() == TOK_BRACE_OPEN ) - ERROR(sp, E0000, "Incompatible macro fragments"); - return false; - case MacroPatEnt::PAT_BLOCK: - return true; - default: - return false; - } - // Matches meta/attribute fragments. - case MacroPatEnt::PAT_META: - switch(left.type) - { - case MacroPatEnt::PAT_TOKEN: - if( left.tok.type() == TOK_IDENT ) - ERROR(sp, E0000, "Incompatible macro fragments"); - return false; - case MacroPatEnt::PAT_META: - return true; - default: - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - } - // Matches items - case MacroPatEnt::PAT_ITEM: - switch(left.type) - { - case MacroPatEnt::PAT_TOKEN: - if( is_token_item(left.tok.type()) ) - ERROR(sp, E0000, "Incompatible macro fragments"); - return false; - case MacroPatEnt::PAT_ITEM: - return true; - default: - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - } - // Matches visibility specifiers - case MacroPatEnt::PAT_VIS: - switch(left.type) - { - case MacroPatEnt::PAT_TOKEN: - if( is_token_vis(left.tok.type()) ) - ERROR(sp, E0000, "Incompatible macro fragments"); - return false; - case MacroPatEnt::PAT_VIS: - return true; - default: - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - } - case MacroPatEnt::PAT_LIFETIME: - switch(left.type) - { - case MacroPatEnt::PAT_TOKEN: - if( left.tok.type() == TOK_LIFETIME ) - ERROR(sp, E0000, "Incompatible macro fragments"); - return false; - case MacroPatEnt::PAT_LIFETIME: - return true; - default: - ERROR(sp, E0000, "Incompatible macro fragments " << right << " used with " << left); - } - } - throw ""; -} - // TODO: Also count the number of times each variable is used? -void enumerate_names(const ::std::vector<MacroPatEnt>& pats, ::std::vector< ::std::string>& names) { +void enumerate_names(const ::std::vector<MacroPatEnt>& pats, ::std::vector< ::std::string>& names) +{ for( const auto& pat : pats ) { if( pat.type == MacroPatEnt::PAT_LOOP ) { @@ -504,9 +336,11 @@ MacroRulesPtr Parse_MacroRules(TokenStream& lex) // Re-parse the patterns into a unified form for(auto& rule : rules) { - MacroRulesArm arm = MacroRulesArm( mv$(rule.m_pattern), mv$(rule.m_contents) ); + // TODO: Transform the pattern into a DFA or similar here (seekable instruction stream?) - enumerate_names(arm.m_pattern, arm.m_param_names); + auto rule_sequence = macro_pattern_to_simple(rule.m_pat_span, rule.m_pattern); + MacroRulesArm arm = MacroRulesArm( mv$(rule_sequence), mv$(rule.m_contents) ); + enumerate_names(rule.m_pattern, arm.m_param_names); rule_arms.push_back( mv$(arm) ); } @@ -517,3 +351,251 @@ MacroRulesPtr Parse_MacroRules(TokenStream& lex) return MacroRulesPtr(rv); } + +namespace { + + struct ExpTok { + MacroPatEnt::Type ty; + const Token* tok; + ExpTok(MacroPatEnt::Type ty, const Token* tok): ty(ty), tok(tok) {} + bool operator==(const ExpTok& t) const { return this->ty == t.ty && *this->tok == *t.tok; } + }; + ::std::ostream& operator<<(::std::ostream& os, const ExpTok& t) { + os << "ExpTok(" << t.ty << " " << *t.tok << ")"; + return os; + } + + // Yields all possible ExpectTok/ExpectPat entries from a pattern position + // Returns `true` if there's no fall-through + bool macro_pattern_get_head_set_inner(::std::vector<ExpTok>& rv, const ::std::vector<MacroPatEnt>& pattern, size_t direct_pos, size_t indirect_ofs) + { + for(size_t idx = direct_pos; idx < pattern.size(); idx ++) + { + const auto& ent = pattern[idx]; + switch(ent.type) + { + case MacroPatEnt::PAT_LOOP: + if( macro_pattern_get_head_set_inner(rv, ent.subpats, 0, indirect_ofs) ) + { + // + loops have to iterate at least once, so if the set is closed by the sub-patterns, close us too + if(ent.name == "+") + { + return true; + } + // for * and ? loops, they can be skipped entirely. + // - No separator, this is for the skip case + } + else + { + // If the inner pattern didn't close the option set, then the next token can be the separator + if( ent.tok != TOK_NULL ) + { + // If indirect is non-zero, decrement without doing anything + if( indirect_ofs > 0 ) + { + indirect_ofs --; + } + else + { + rv.push_back(ExpTok(MacroPatEnt::PAT_TOKEN, &ent.tok)); + } + } + } + break; + default: + // If indirect is non-zero, decrement + if( indirect_ofs > 0 ) + { + indirect_ofs --; + } + else + { + rv.push_back( ExpTok(ent.type, &ent.tok) ); + return true; + } + break; + } + } + return false; + } + ::std::vector<ExpTok> macro_pattern_get_head_set(const ::std::vector<MacroPatEnt>& pattern, size_t direct_pos, size_t indirect_ofs) + { + ::std::vector<ExpTok> rv; + macro_pattern_get_head_set_inner(rv, pattern, direct_pos, indirect_ofs); + return rv; + } + + void macro_pattern_to_simple_inner(const Span& sp, ::std::vector<SimplePatEnt>& rv, const ::std::vector<MacroPatEnt>& pattern) + { + size_t level_start = rv.size(); + TRACE_FUNCTION_FR("[" << pattern << "]", "[" << FMT_CB(ss, for(auto it = rv.begin()+level_start; it != rv.end(); ++it) { ss << *it << ", "; }) << "]"); + auto push = [&rv](SimplePatEnt spe) { + DEBUG("[macro_pattern_to_simple_inner] rv[" << rv.size() << "] = " << spe); + rv.push_back( ::std::move(spe) ); + }; + auto push_ifv = [&push](bool is_equal, ::std::vector<SimplePatIfCheck> ents, size_t tgt) { + push(SimplePatEnt::make_If({ is_equal, tgt, mv$(ents) })); + }; + auto push_if = [&push_ifv](bool is_equal, MacroPatEnt::Type ty, const Token& tok, size_t tgt) { + push_ifv(is_equal, make_vec1(SimplePatIfCheck { ty, tok }), tgt); + }; + for(size_t idx = 0; idx < pattern.size(); idx ++) + { + const auto& ent = pattern[idx]; + DEBUG("[" << idx << "] ent = " << ent); + switch(ent.type) + { + case MacroPatEnt::PAT_LOOP: { + auto entry_pats = macro_pattern_get_head_set(ent.subpats, 0, 0); + DEBUG("Entry = [" << entry_pats << "]"); + ASSERT_BUG(sp, entry_pats.size() > 0, "No entry conditions extracted from sub-pattern [" << ent.subpats << "]"); + auto skip_pats = macro_pattern_get_head_set(pattern, idx+1, 0); + DEBUG("Skip = [" << skip_pats << "]"); + + // - Duplicates need special handling (build up a subseqent set) + for(const auto& ee : entry_pats) + { + if( ::std::find(skip_pats.begin(), skip_pats.end(), ee) != skip_pats.end() ) + { + TODO(sp, "Entry and skip patterns share an entry, ambigious - " << ee); + } + } + + // TODO: Combine the two cases below into one + + // If the loop is a $()+ loop, then just recurse into it + if( ent.name == "+" ) + { + push( SimplePatEnt::make_LoopStart({}) ); + size_t start = rv.size(); + macro_pattern_to_simple_inner(sp, rv, ent.subpats); + push( SimplePatEnt::make_LoopNext({}) ); + if( ent.tok != TOK_NULL ) + { + size_t end = rv.size() + 3; + if( ::std::find(skip_pats.begin(), skip_pats.end(), ExpTok(MacroPatEnt::PAT_TOKEN, &ent.tok)) != skip_pats.end() ) { + for(const auto& p : entry_pats) + { + auto v = ::make_vec2<SimplePatIfCheck>( + { MacroPatEnt::PAT_TOKEN, ent.tok}, + { p.ty, *p.tok } + ); + push_ifv(false, mv$(v), ~0u); + } + } + else { + push_if( false, MacroPatEnt::PAT_TOKEN, ent.tok, end ); + } + push( SimplePatEnt::make_ExpectTok(ent.tok) ); + push( SimplePatEnt::make_Jump({ start }) ); + } + else + { + // TODO: What if there's a collision at this level? + for(const auto& p : entry_pats) + { + push_if(true, p.ty, *p.tok, start); + } + } + push( SimplePatEnt::make_LoopEnd({}) ); + } + else + { + push( SimplePatEnt::make_LoopStart({}) ); + + // Options: + // - Enter the loop (if the next token is one of the head set of the loop) + // - Skip the loop (the next token is the head set of the subsequent entries) + size_t rewrite_start = rv.size(); + if( ent.name != "+" ) + { + if( entry_pats.size() == 1 ) + { + // If not the entry pattern, skip. + push_if(false, entry_pats.front().ty, *entry_pats.front().tok, ~0u); + } + else if( skip_pats.empty() ) + { + // No skip patterns, try all entry patterns + size_t start = rv.size() + entry_pats.size() + 1; + for(const auto& p : entry_pats) + { + push_if(true, p.ty, *p.tok, start); + } + push(SimplePatEnt::make_Jump({ ~0u })); + } + else + { + for(const auto& p : skip_pats) + { + push_if(true, p.ty, *p.tok, ~0u); + } + } + } + + macro_pattern_to_simple_inner(sp, rv, ent.subpats); + push( SimplePatEnt::make_LoopNext({}) ); + + if( ent.name != "?" ) + { + if( ent.tok != TOK_NULL ) + { + // If the joiner is also valid after the loop, handle by also checking the entry conditions + if( ::std::find(skip_pats.begin(), skip_pats.end(), ExpTok(MacroPatEnt::PAT_TOKEN, &ent.tok)) != skip_pats.end() ) { + // Try all re-loop (joiner + entry) patterns, if any fail then jump to the end. + for(const auto& p : entry_pats) { + auto v = ::make_vec2<SimplePatIfCheck>( + { MacroPatEnt::PAT_TOKEN, ent.tok}, + { p.ty, *p.tok } + ); + push_ifv(false, mv$(v), ~0u); + } + } + else { + // If not the joiner, jump to the end + push_if(false, MacroPatEnt::PAT_TOKEN, ent.tok, ~0u); + } + push( SimplePatEnt::make_ExpectTok(ent.tok) ); + } + // Jump back to the entry check. + push(SimplePatEnt::make_Jump({ rewrite_start })); + } + else + { + ASSERT_BUG(sp, ent.tok == TOK_NULL, "$()? with a separator isn't valid"); + } + size_t post_loop = rv.size(); + for(size_t i = rewrite_start; i < post_loop; i++) + { + if( auto* pe = rv[i].opt_If() ) { + if(pe->jump_target == ~0u) { + pe->jump_target = post_loop; + } + } + if( auto* pe = rv[i].opt_Jump() ) { + if(pe->jump_target == ~0u) { + pe->jump_target = post_loop; + } + } + } + push( SimplePatEnt::make_LoopEnd({}) ); + } + } break; + case MacroPatEnt::PAT_TOKEN: + push( SimplePatEnt::make_ExpectTok(ent.tok) ); + break; + default: + push( SimplePatEnt::make_ExpectPat({ ent.type, ent.name_index }) ); + break; + } + } + } + + ::std::vector<SimplePatEnt> macro_pattern_to_simple(const Span& sp, const ::std::vector<MacroPatEnt>& pattern) + { + ::std::vector<SimplePatEnt> rv; + TRACE_FUNCTION_FR(pattern, rv); + macro_pattern_to_simple_inner(sp, rv, pattern); + return rv; + } +} |