diff options
author | Daniel Burrows <dburrows@debian.org> | 2008-12-13 20:27:37 -0800 |
---|---|---|
committer | Daniel Burrows <dburrows@debian.org> | 2008-12-13 20:27:37 -0800 |
commit | f520ceae78cdfb17c7f2c703550bbc8485fa3030 (patch) | |
tree | 46c07f4b2a734d268b7626d2ca55273f84060181 | |
parent | 8258c49e45c552efc17df36648ec0cf62f62a2b3 (diff) | |
download | aptitude-f520ceae78cdfb17c7f2c703550bbc8485fa3030.tar.gz |
Add a ?term-prefix matcher that matches against textual extensions of a string, add parser support for using it on trailing bare strings, and make incremental search in the cwidget UI use it. (Closes: #506651)
-rw-r--r-- | src/generic/apt/matching/compare_patterns.cc | 3 | ||||
-rw-r--r-- | src/generic/apt/matching/match.cc | 144 | ||||
-rw-r--r-- | src/generic/apt/matching/parse.cc | 63 | ||||
-rw-r--r-- | src/generic/apt/matching/parse.h | 54 | ||||
-rw-r--r-- | src/generic/apt/matching/pattern.h | 29 | ||||
-rw-r--r-- | src/generic/apt/matching/serialize.cc | 6 | ||||
-rw-r--r-- | src/load_grouppolicy.cc | 4 | ||||
-rw-r--r-- | src/menu_tree.cc | 2 |
8 files changed, 270 insertions, 35 deletions
diff --git a/src/generic/apt/matching/compare_patterns.cc b/src/generic/apt/matching/compare_patterns.cc index 83398af5..8ae22b84 100644 --- a/src/generic/apt/matching/compare_patterns.cc +++ b/src/generic/apt/matching/compare_patterns.cc @@ -275,6 +275,9 @@ namespace aptitude case pattern::term: return p1->get_term_term().compare(p2->get_term_term()); + case pattern::term_prefix: + return p1->get_term_prefix_term().compare(p2->get_term_prefix_term()); + case pattern::true_tp: return 0; diff --git a/src/generic/apt/matching/match.cc b/src/generic/apt/matching/match.cc index bb1761a2..4310dfd4 100644 --- a/src/generic/apt/matching/match.cc +++ b/src/generic/apt/matching/match.cc @@ -201,6 +201,10 @@ namespace aptitude // the packages it matches. std::map<std::string, std::vector<Xapian::docid> > matched_terms; + // Maps each term that has been looked up as a prefix to a + // sorted list of the packages it matches. + std::map<std::string, std::vector<Xapian::docid> > matched_term_prefixes; + public: implementation() { @@ -238,6 +242,76 @@ namespace aptitude return cached_match->second; } + bool term_prefix_matches(const pkgCache::PkgIterator &pkg, + const std::string &prefix, + bool debug) + { + if(debug) + std::cout << "Searching for " << prefix << " as a term pefix." << std::endl; + + Xapian::docid pkg_docid(db.docidByName(pkg.Name())); + const Xapian::Database xapian_db(db.db()); + + + const std::map<std::string, std::vector<Xapian::docid> >::iterator + found = matched_term_prefixes.find(prefix); + + if(found != matched_term_prefixes.end()) + return std::binary_search(found->second.begin(), + found->second.end(), + pkg_docid); + else + { + if(debug) + std::cout << "Retrieving the prefix hits for " << prefix << std::endl; + + const std::map<std::string, std::vector<Xapian::docid> >::iterator + inserted = matched_term_prefixes.insert(std::make_pair(prefix, std::vector<Xapian::docid>())).first; + + std::vector<Xapian::docid> &matches(inserted->second); + + Xapian::TermIterator prefix_list_end = xapian_db.allterms_end(prefix); + for(Xapian::TermIterator extensionIt = xapian_db.allterms_begin(prefix); + extensionIt != prefix_list_end; ++extensionIt) + { + // Both this string and its stemmed version are + // possible continuations, so index both of them. + const std::string extension(*extensionIt); + const std::string extensionStemmed(Xapian::Stem("en")(extension)); + + const std::string *terms[2] = { &extension, &extensionStemmed }; + const int numTerms = sizeof(terms) / sizeof(terms[0]); + + for(const std::string **termIt = terms; + termIt < terms + numTerms; ++termIt) + { + const std::string &currTerm(**termIt); + + Xapian::PostingIterator + postingsBegin = xapian_db.postlist_begin(currTerm), + postingsEnd = xapian_db.postlist_end(currTerm); + + for(Xapian::PostingIterator it = postingsBegin; + it != postingsEnd; ++it) + matches.push_back(*it); + } + } + + std::sort(matches.begin(), matches.end()); + matches.erase(std::unique(matches.begin(), matches.end()), + matches.end()); + + if(debug) + std::cout << " (" << matches.size() << " hits)" << std::endl; + + // Now that the hit list is initialized, we can search it + // for a match. + return std::binary_search(matches.begin(), + matches.end(), + pkg_docid); + } + } + bool term_matches(const pkgCache::PkgIterator &pkg, const std::string &term, bool debug) @@ -1270,6 +1344,17 @@ namespace aptitude } break; + case pattern::term_prefix: + { + pkgCache::PkgIterator pkg(target.get_package_iterator(cache)); + + if(search_info->term_prefix_matches(pkg, p->get_term_prefix_term(), debug)) + return match::make_atomic(p); + else + return NULL; + } + break; + case pattern::true_tp: return match::make_atomic(p); break; @@ -1668,6 +1753,7 @@ namespace aptitude case pattern::tag: case pattern::task: case pattern::term: + case pattern::term_prefix: case pattern::true_tp: case pattern::upgradable: case pattern::user_tag: @@ -1834,6 +1920,9 @@ namespace aptitude case pattern::term: return true; + case pattern::term_prefix: + return true; + // Various non-Xapian terms. All of these return false. case pattern::archive: @@ -1948,6 +2037,9 @@ namespace aptitude case pattern::term: return true; + case pattern::term_prefix: + return true; + // Various non-dependent terms. All of these return // false. Some have internal matchers, but they are // separate searches. @@ -2179,6 +2271,7 @@ namespace aptitude case pattern::tag: case pattern::task: case pattern::term: + case pattern::term_prefix: case pattern::true_tp: case pattern::upgradable: case pattern::user_tag: @@ -2203,12 +2296,14 @@ namespace aptitude // // Returns the new query, or an empty query if there's no // Xapian-dependence. - Xapian::Query build_xapian_query(const cwidget::util::ref_ptr<pattern> &p) + Xapian::Query build_xapian_query(const cwidget::util::ref_ptr<pattern> &p, + const Xapian::Database &db) { switch(p->get_type()) { case pattern::all_versions: - return build_xapian_query(p->get_all_versions_pattern()); + return build_xapian_query(p->get_all_versions_pattern(), + db); case pattern::and_tp: { @@ -2239,7 +2334,8 @@ namespace aptitude if((*it)->get_type() == pattern::not_tp && is_pure_xapian((*it)->get_not_pattern())) { - Xapian::Query q(build_xapian_query((*it)->get_not_pattern())); + Xapian::Query q(build_xapian_query((*it)->get_not_pattern(), + db)); // Ignore empty queries; they signify that the // entry should be pruned from the generated @@ -2271,7 +2367,8 @@ namespace aptitude if((*it)->get_type() != pattern::not_tp && !is_xapian_dependent(*it)) { - Xapian::Query q(build_xapian_query(*it)); + Xapian::Query q(build_xapian_query(*it, + db)); if(and_maybe_tail.empty()) and_maybe_tail = q; @@ -2290,7 +2387,8 @@ namespace aptitude if((*it)->get_type() != pattern::not_tp && is_xapian_dependent(*it)) { - Xapian::Query q(build_xapian_query(*it)); + Xapian::Query q(build_xapian_query(*it, + db)); if(!q.empty()) { @@ -2343,13 +2441,16 @@ namespace aptitude } case pattern::any_version: - return build_xapian_query(p->get_any_version_pattern()); + return build_xapian_query(p->get_any_version_pattern(), + db); case pattern::for_tp: - return build_xapian_query(p->get_for_pattern()); + return build_xapian_query(p->get_for_pattern(), + db); case pattern::narrow: - return build_xapian_query(p->get_narrow_pattern()); + return build_xapian_query(p->get_narrow_pattern(), + db); case pattern::not_tp: // D'oh! Should I assert-fail here? @@ -2373,7 +2474,8 @@ namespace aptitude { if((*it)->get_type() != pattern::not_tp) { - Xapian::Query q(build_xapian_query(*it)); + Xapian::Query q(build_xapian_query(*it, + db)); if(!q.empty()) { @@ -2391,7 +2493,8 @@ namespace aptitude } case pattern::widen: - return build_xapian_query(p->get_widen_pattern()); + return build_xapian_query(p->get_widen_pattern(), + db); case pattern::term: // We try stemming everything as if it were English. @@ -2401,6 +2504,25 @@ namespace aptitude // language of the package descriptions). return stem_term(p->get_term_term()); + case pattern::term_prefix: + { + Xapian::Stem stemmer("en"); + std::vector<Xapian::Query> subqueries; + + Xapian::TermIterator prefix_list_end = db.allterms_end(p->get_term_prefix_term()); + for(Xapian::TermIterator extensionIt = db.allterms_begin(p->get_term_prefix_term()); + extensionIt != prefix_list_end; ++extensionIt) + { + const std::string extension(*extensionIt); + subqueries.push_back(Xapian::Query(extension)); + subqueries.push_back(Xapian::Query(stemmer(extension))); + } + + return Xapian::Query(Xapian::Query::OP_OR, + subqueries.begin(), + subqueries.end()); + } + case pattern::archive: case pattern::action: case pattern::automatic: @@ -2524,7 +2646,7 @@ namespace aptitude << std::endl; - Xapian::Query q(build_xapian_query(normalized)); + Xapian::Query q(build_xapian_query(normalized, db)); if(q.empty()) { diff --git a/src/generic/apt/matching/parse.cc b/src/generic/apt/matching/parse.cc index 81d90c6e..4c342410 100644 --- a/src/generic/apt/matching/parse.cc +++ b/src/generic/apt/matching/parse.cc @@ -134,6 +134,7 @@ namespace term_type_tag, term_type_task, term_type_term, + term_type_term_prefix, term_type_true, term_type_upgradable, term_type_user_tag, @@ -190,6 +191,7 @@ namespace { "tag", term_type_tag }, { "task", term_type_task }, { "term", term_type_term }, + { "term-prefix", term_type_term_prefix }, { "true", term_type_true }, { "upgradable", term_type_upgradable }, { "user-tag", term_type_user_tag }, @@ -205,7 +207,7 @@ typedef imm::map<std::string, int> parse_environment; ref_ptr<pattern> parse_condition_list(string::const_iterator &start, const string::const_iterator &end, const vector<const char *> &terminators, - bool wide_context, + bool wide_context, bool partial, const parse_environment &name_context); @@ -508,7 +510,7 @@ struct parse_method<ref_ptr<pattern> > bool wide_context, const parse_environment &name_context) const { - return parse_condition_list(start, end, terminators, wide_context, name_context); + return parse_condition_list(start, end, terminators, wide_context, false, name_context); } }; @@ -617,7 +619,7 @@ ref_ptr<pattern> parse_term_args(string::const_iterator &start, const parse_environment &name_context) { parse_open_paren(start, end); - ref_ptr<pattern> p(parse_condition_list(start, end, terminators, wide_context, name_context)); + ref_ptr<pattern> p(parse_condition_list(start, end, terminators, wide_context, false, name_context)); parse_close_paren(start, end); return p; @@ -665,7 +667,7 @@ ref_ptr<pattern> parse_explicit_term(const std::string &term_name, string::const_iterator &start, const string::const_iterator &end, const std::vector<const char *> &terminators, - bool wide_context, + bool wide_context, bool partial, const parse_environment &name_context) { parse_whitespace(start, end); @@ -707,6 +709,7 @@ ref_ptr<pattern> parse_explicit_term(const std::string &term_name, ref_ptr<pattern> p(parse_condition_list(start, end, terminators, wide_context, + partial, name_context2)); return pattern::make_for(bound_variable, p); @@ -732,11 +735,13 @@ ref_ptr<pattern> maybe_bind(const string &bound_variable, term); } +// NB: "partial" is passed in because ?for terms can have trailing strings. ref_ptr<pattern> parse_term_args(const string &term_name, string::const_iterator &start, const string::const_iterator &end, const vector<const char *> &terminators, bool wide_context, + bool partial, const parse_environment &name_context) { { @@ -901,7 +906,7 @@ ref_ptr<pattern> parse_term_args(const string &term_name, // it's no longer a terminator. new_terminators.pop_back(); - ref_ptr<pattern> p = parse_condition_list(start, end, new_terminators, wide_context, name_context); + ref_ptr<pattern> p = parse_condition_list(start, end, new_terminators, wide_context, false, name_context); parse_whitespace(start, end); parse_close_paren(start, end); @@ -918,7 +923,7 @@ ref_ptr<pattern> parse_term_args(const string &term_name, case term_type_false: return pattern::make_false(); case term_type_for: - return parse_explicit_term(term_name, start, end, terminators, wide_context, name_context); + return parse_explicit_term(term_name, start, end, terminators, wide_context, partial, name_context); case term_type_garbage: return pattern::make_garbage(); case term_type_installed: @@ -955,6 +960,8 @@ ref_ptr<pattern> parse_term_args(const string &term_name, return pattern::make_task(parse_string_match_args(start, end)); case term_type_term: return pattern::make_term(parse_string_match_args(start, end)); + case term_type_term_prefix: + return pattern::make_term_prefix(parse_string_match_args(start, end)); case term_type_true: return pattern::make_true(); case term_type_upgradable: @@ -1061,6 +1068,7 @@ ref_ptr<pattern> parse_function_style_term_tail(string::const_iterator &start, end, terminators, wide_context, + false, name_context), name_context); } @@ -1068,7 +1076,7 @@ ref_ptr<pattern> parse_function_style_term_tail(string::const_iterator &start, ref_ptr<pattern> parse_atom(string::const_iterator &start, const string::const_iterator &end, const vector<const char *> &terminators, - bool wide_context, + bool wide_context, bool partial, const parse_environment &name_context) { std::string substr; @@ -1083,7 +1091,7 @@ ref_ptr<pattern> parse_atom(string::const_iterator &start, { ++start; return pattern::make_not(parse_atom(start, end, terminators, - wide_context, + wide_context, partial, name_context)); } else if(*start == '(') @@ -1094,6 +1102,7 @@ ref_ptr<pattern> parse_atom(string::const_iterator &start, ref_ptr<pattern> lst(parse_condition_list(start, end, vector<const char *>(), wide_context, + false, name_context)); if(!(start != end && *start == ')')) @@ -1171,6 +1180,7 @@ ref_ptr<pattern> parse_atom(string::const_iterator &start, end, terminators, search_flag == 'W', + partial, name_context)); switch(search_flag) @@ -1189,12 +1199,14 @@ ref_ptr<pattern> parse_atom(string::const_iterator &start, end, terminators, false, + partial, name_context)); ref_ptr<pattern> p(parse_atom(start, end, terminators, false, + partial, name_context)); return pattern::make_narrow(filter, p); @@ -1245,7 +1257,7 @@ ref_ptr<pattern> parse_atom(string::const_iterator &start, throw MatchingException(_("Provides: cannot be broken")); ref_ptr<pattern> p(parse_atom(start, end, terminators, - false, name_context)); + false, partial, name_context)); switch(search_flag) { @@ -1305,9 +1317,12 @@ ref_ptr<pattern> parse_atom(string::const_iterator &start, } else { - return pattern::make_term(parse_substr(start, end, - terminators, - true)); + std::string s = parse_substr(start, end, terminators, true); + + if(partial && start == end) + return pattern::make_term_prefix(s); + else + return pattern::make_term(s); } } @@ -1318,7 +1333,7 @@ ref_ptr<pattern> parse_atom(string::const_iterator &start, ref_ptr<pattern> parse_and_group(string::const_iterator &start, const string::const_iterator &end, const vector<const char *> &terminators, - bool wide_context, + bool wide_context, bool partial, const parse_environment &name_context) { std::vector<ref_ptr<pattern> > rval; @@ -1330,6 +1345,7 @@ ref_ptr<pattern> parse_and_group(string::const_iterator &start, { ref_ptr<pattern> atom(parse_atom(start, end, terminators, wide_context, + partial, name_context)); rval.push_back(atom); @@ -1350,7 +1366,7 @@ ref_ptr<pattern> parse_and_group(string::const_iterator &start, ref_ptr<pattern> parse_condition_list(string::const_iterator &start, const string::const_iterator &end, const vector<const char *> &terminators, - bool wide_context, + bool wide_context, bool partial, const parse_environment &name_context) { std::vector<ref_ptr<pattern> > grp; @@ -1375,6 +1391,7 @@ ref_ptr<pattern> parse_condition_list(string::const_iterator &start, grp.push_back(parse_and_group(start, end, terminators, wide_context, + partial, name_context)); parse_whitespace(start, end); @@ -1390,7 +1407,8 @@ ref_ptr<pattern> parse(string::const_iterator &start, const string::const_iterator &end, const std::vector<const char *> &terminators, bool flag_errors, - bool require_full_parse) + bool require_full_parse, + bool partial) { // Just filter blank strings out immediately. while(start != end && isspace(*start) && !terminate(start, end, terminators)) @@ -1399,16 +1417,23 @@ ref_ptr<pattern> parse(string::const_iterator &start, if(start == end) return ref_ptr<pattern>(); + string::const_iterator real_end = end; + + // Move 'end' back as long as there's whitespace, so that we can + // easily check whether a word is at the end of the string. + while(start != real_end && isspace(*(real_end - 1))) + --real_end; + try { - ref_ptr<pattern> rval(parse_condition_list(start, end, terminators, - true, + ref_ptr<pattern> rval(parse_condition_list(start, real_end, terminators, + true, partial, parse_environment())); - while(start != end && isspace(*start)) + while(start != real_end && isspace(*start)) ++start; - if(require_full_parse && start != end) + if(require_full_parse && start != real_end) throw MatchingException(_("Unexpected ')'")); else return rval; diff --git a/src/generic/apt/matching/parse.h b/src/generic/apt/matching/parse.h index 0484b1ba..285dc922 100644 --- a/src/generic/apt/matching/parse.h +++ b/src/generic/apt/matching/parse.h @@ -51,6 +51,10 @@ namespace aptitude * signalled if part of the region * is left after parsing (i.e., * if start != end). + * \param partial If \b true, if the last word is an + * unadorned Xapian term, it will be + * treated as a wildcard representing + * all the terms in the database. * * \return The parsed expression, or NULL if an * error occurred or if the input range was empty. @@ -60,7 +64,8 @@ namespace aptitude const std::string::const_iterator &end, const std::vector<const char *> &terminators, bool flag_errors, - bool require_full_parse); + bool require_full_parse, + bool partial); /** \brief Parse a string as a search pattern. @@ -93,7 +98,23 @@ namespace aptitude return parse(start, s.end(), terminators, flag_errors, - require_full_parse); + require_full_parse, + false); + } + + inline cwidget::util::ref_ptr<pattern> + parse(const std::string &s, + const std::vector<const char *> &terminators, + bool flag_errors, + bool require_full_parse, + bool partial) + { + std::string::const_iterator start = s.begin(); + return parse(start, s.end(), + terminators, + flag_errors, + require_full_parse, + partial); } /** \brief Parse a string as a search pattern. */ @@ -107,6 +128,19 @@ namespace aptitude require_full_parse); } + /** \brief Parse a string as a search pattern. */ + inline cwidget::util::ref_ptr<pattern> + parse(const std::string &s, + bool flag_errors, + bool require_full_parse, + bool partial) + { + return parse(s, std::vector<const char *>(), + flag_errors, + require_full_parse, + partial); + } + /** \brief Parse a string as a search pattern. * * Errors will be flagged by generating apt errors, and it is an @@ -122,6 +156,22 @@ namespace aptitude { return parse(s, true, true); } + + /** \brief Parse a string as a possibly incomplete search pattern. + * + * Errors will be flagged by generating apt errors, and it is an + * error if part of the input string is left over after parsing. + * + * \param s The string to parse. + * + * \return The parsed expression, or NULL if an + * error occurred or if the input string was empty. + */ + inline cwidget::util::ref_ptr<pattern> + parse_with_extension(const std::string &s) + { + return parse(s, true, true, true); + } } } diff --git a/src/generic/apt/matching/pattern.h b/src/generic/apt/matching/pattern.h index f3f876a3..da33d4e9 100644 --- a/src/generic/apt/matching/pattern.h +++ b/src/generic/apt/matching/pattern.h @@ -470,6 +470,12 @@ namespace aptitude * Fields: term. */ term, + /** \brief ?term-prefix(TERM) + * + * Matches a package using a full-text keyword search against + * a prefix (so "apt" will match both "apt" and "aptitude"). + */ + term_prefix, /** \brief ?true * * Matches everything. @@ -1723,6 +1729,29 @@ namespace aptitude // @} + + // @{ + + /** \brief Create a ?term-prefix term. + * + * \param s The keyword to search for. + */ + static cwidget::util::ref_ptr<pattern> + make_term_prefix(const std::string &s) + { + return new pattern(term_prefix, s); + } + + /** \brief Retrieve the term field of a ?term term. */ + const std::string &get_term_prefix_term() const + { + eassert(tp == term_prefix); + + return string_info; + } + + // @} + /** \name true term constructor */ // @{ diff --git a/src/generic/apt/matching/serialize.cc b/src/generic/apt/matching/serialize.cc index 42d4ca36..f9df2ed5 100644 --- a/src/generic/apt/matching/serialize.cc +++ b/src/generic/apt/matching/serialize.cc @@ -497,6 +497,12 @@ namespace aptitude out.push_back(')'); break; + case pattern::term_prefix: + out += "?term-prefix("; + serialize_string(p->get_term_term(), out); + out.push_back(')'); + break; + case pattern::true_tp: out += "?true"; break; diff --git a/src/load_grouppolicy.cc b/src/load_grouppolicy.cc index 9b773bd3..83ed79c6 100644 --- a/src/load_grouppolicy.cc +++ b/src/load_grouppolicy.cc @@ -491,7 +491,7 @@ public: terminators.push_back(")"); ref_ptr<matching::pattern> p(matching::parse(begin, end, terminators, - true, false)); + true, false, false)); if(!p.valid()) throw GroupParseException(_("Unable to parse pattern at '%s'"), @@ -647,7 +647,7 @@ class pattern_policy_parser : public group_policy_parser cw::util::ref_ptr<matching::pattern> pattern(matching::parse(begin, end, terminators, - true, false)); + true, false, false)); bool passthrough = false; std::auto_ptr<pkg_grouppolicy_factory> chain; diff --git a/src/menu_tree.cc b/src/menu_tree.cc index 103ea3da..4df1e2e6 100644 --- a/src/menu_tree.cc +++ b/src/menu_tree.cc @@ -377,7 +377,7 @@ void menu_tree::do_incsearch(std::wstring s, bool backward) pre_incsearch_selected=get_selection(); } - ref_ptr<matching::pattern> p = matching::parse(cw::util::transcode(s), false, true); + ref_ptr<matching::pattern> p = matching::parse(cw::util::transcode(s), false, true, true); set_selection(pre_incsearch_selected); |