diff options
| author | Enrico Zini <enrico@enricozini.org> | 2010-05-10 17:50:28 +0100 |
|---|---|---|
| committer | Enrico Zini <enrico@enricozini.org> | 2010-05-10 17:50:28 +0100 |
| commit | 1b5ac4e66699cacde7123da3b111a0a16ee55db4 (patch) | |
| tree | 63d345bf5d93f5ed3ab639571d60ccd8c2b2b331 /tools | |
| parent | 80d13236ef7014f077a5793a9cb3e9a0573ffbe1 (diff) | |
| download | libept-1b5ac4e66699cacde7123da3b111a0a16ee55db4.tar.gz | |
Replaced textindex with axi, which contains just a handful of helper functions to handle apt-xapian-index
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | tools/Environment.cc | 10 | ||||
| -rw-r--r-- | tools/Environment.h | 11 | ||||
| -rw-r--r-- | tools/ept-cache.cc | 105 | ||||
| -rw-r--r-- | tools/ept-search.cpp | 43 |
5 files changed, 85 insertions, 85 deletions
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 7177401..bd75e75 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -2,7 +2,6 @@ include_directories( ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR} ${TAGCOLL_INCLUDE_DIRS} ${WIBBLE_INCLUDE_DIRS} ) link_libraries( ept ) -add_executable( ept-search ept-search.cpp ) add_executable( ept-cat ept-cat.cpp ) add_executable( pkglist pkglist.cpp ) add_executable( manpage manpage.cc ) diff --git a/tools/Environment.cc b/tools/Environment.cc index a293ab7..2249591 100644 --- a/tools/Environment.cc +++ b/tools/Environment.cc @@ -25,13 +25,14 @@ #include <ept/debtags/vocabulary.h> #include <ept/popcon/popcon.h> #include <ept/popcon/local.h> -#include <ept/textsearch/textsearch.h> +#include <ept/axi/axi.h> #include <stdio.h> #include <stdarg.h> #include <unistd.h> // isatty using namespace std; +using namespace ept; static Environment* instance = 0; @@ -45,7 +46,7 @@ Environment& Environment::get() throw () // Initialize the environment with default values Environment::Environment() throw () - : m_apt(0), m_debtags(0), m_popcon(0), m_popconlocal(0), m_textsearch(0), _verbose(false), _debug(false) {} + : m_apt(0), m_debtags(0), m_popcon(0), m_popconlocal(0), m_axi(0), _verbose(false), _debug(false) {} void Environment::init(bool editable) { @@ -54,7 +55,10 @@ void Environment::init(bool editable) m_vocabulary = new ept::debtags::Vocabulary; m_popcon = new ept::popcon::Popcon; m_popconlocal = new ept::popcon::Local; - m_textsearch = new ept::textsearch::TextSearch; + if (axi::timestamp() > 0) + m_axi = new Xapian::Database(axi::path_db()); + else + m_axi = new Xapian::Database; } void fatal_error(const char* fmt, ...) throw() ATTR_PRINTF(1, 2) diff --git a/tools/Environment.h b/tools/Environment.h index 4df4fc4..d332da3 100644 --- a/tools/Environment.h +++ b/tools/Environment.h @@ -23,6 +23,10 @@ #include <string> +namespace Xapian { +class Database; +} + namespace ept { namespace apt { class Apt; @@ -35,9 +39,6 @@ namespace popcon { class Popcon; class Local; } -namespace textsearch { -class TextSearch; -} } class Environment @@ -59,7 +60,7 @@ protected: ept::popcon::Local* m_popconlocal; /// TextSearch data provider - ept::textsearch::TextSearch* m_textsearch; + Xapian::Database* m_axi; // True when operations should be verbose bool _verbose; @@ -95,7 +96,7 @@ public: ept::popcon::Local& popconLocal() { return *m_popconlocal; } /// Access the textsearch data - ept::textsearch::TextSearch& textsearch() { return *m_textsearch; } + Xapian::Database& axi() { return *m_axi; } // Accessor methods diff --git a/tools/ept-cache.cc b/tools/ept-cache.cc index 483edb4..bd3a8af 100644 --- a/tools/ept-cache.cc +++ b/tools/ept-cache.cc @@ -27,18 +27,21 @@ #include <ept/apt/apt.h> #include <ept/apt/packagerecord.h> #include <ept/debtags/debtags.h> +#include <ept/debtags/vocabulary.h> #include <ept/popcon/popcon.h> #include <ept/popcon/local.h> #include <tagcoll/expression.h> -#include <ept/textsearch/textsearch.h> -#include <ept/textsearch/extraindexers.h> +#include <ept/axi/axi.h> #include <wibble/regexp.h> +#include <wibble/string.h> #include <algorithm> #include <iostream> #include <sstream> +#include <sys/types.h> +#include <sys/stat.h> #include <unistd.h> /* @@ -86,10 +89,10 @@ using namespace std; using namespace tagcoll; +using namespace wibble; using namespace ept; using namespace ept::debtags; using namespace ept::apt; -using namespace ept::textsearch; static const int DEFAULT_QUALITY_CUTOFF = 50; @@ -461,6 +464,42 @@ struct Generator { filter::And filters; + Xapian::QueryParser qp; + Xapian::Stem stem; + + Generator() : stem("en") + { + qp.set_default_op(Xapian::Query::OP_AND); + qp.set_database(env().axi()); + qp.set_stemmer(stem); + qp.set_stemming_strategy(Xapian::QueryParser::STEM_SOME); + qp.add_prefix("pkg", "XP"); + qp.add_boolean_prefix("tag", "XT"); + qp.add_boolean_prefix("sec", "XS"); + } + + Xapian::Query makeQuery(const vector<string>& keywords) + { + // Add prefixes to tag names + const Vocabulary& voc = env().voc(); + vector<string> kw; + for (vector<string>::const_iterator i = keywords.begin(); + i != keywords.end(); ++i) + { + if (voc.hasTag(*i)) + kw.push_back("tag:" + *i); + else + kw.push_back(*i); + } + return qp.parse_query(str::join(kw.begin(), kw.end(), " "), + Xapian::QueryParser::FLAG_BOOLEAN | + Xapian::QueryParser::FLAG_LOVEHATE | + Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE | + Xapian::QueryParser::FLAG_WILDCARD | + Xapian::QueryParser::FLAG_PURE_NOT | + Xapian::QueryParser::FLAG_SPELLING_CORRECTION); + } + // Generate all the packages, without records void generateNames(Consumer& out) { @@ -496,13 +535,14 @@ struct Generator void keywordXapianSearch(const vector<string>& keywords, Consumer& out, int qualityCutoff = DEFAULT_QUALITY_CUTOFF) { debug("Generate with xapian\n"); - Xapian::Enquire enquire(env().textsearch().db()); + Xapian::Enquire enquire(env().axi()); // Set up the base query - Xapian::Query query = env().textsearch().makeORQuery(keywords.begin(), keywords.end()); + Xapian::Query query = makeQuery(keywords); enquire.set_query(query); debug("Xapian query: %s\n", query.get_description().c_str()); +#if 0 // Get a set of tags to expand the query vector<string> expand = env().textsearch().expand(enquire); @@ -510,6 +550,7 @@ struct Generator Xapian::Query expansion(Xapian::Query::OP_OR, expand.begin(), expand.end()); enquire.set_query(Xapian::Query(Xapian::Query::OP_OR, query, expansion)); debug("Expanded Xapian query: %s\n", enquire.get_query().get_description().c_str()); +#endif //cerr << "Q: " << enquire.get_query().get_description() << endl; fromXapianEnquire(enquire, out, qualityCutoff); @@ -605,7 +646,7 @@ Consumer* createPrinter(const wibble::commandline::EptCacheOptions& opts, Consum bool usesXapian(wibble::commandline::EptCacheOptions& opts) { - return opts.hasNext() && env().textsearch().hasData(); + return opts.hasNext() && axi::timestamp() > 0; } void generate(wibble::commandline::EptCacheOptions& opts, Consumer& output, @@ -684,7 +725,7 @@ void generate(wibble::commandline::EptCacheOptions& opts, Consumer& output, vector<string> keywords; while (opts.hasNext()) keywords.push_back(toLower(opts.next())); - if (env().textsearch().hasData()) + if (axi::timestamp() > 0) gen.keywordXapianSearch(keywords, *cons, defaultXapianQualityCutoff); else gen.keywordAptSearch(keywords, *cons); @@ -712,6 +753,21 @@ struct BlacklistDecider : public Xapian::MatchDecider } }; +Xapian::Query makeRelatedQuery(const std::string& pkgname) +{ + Xapian::Enquire enquire(env().axi()); + + // Retrieve the document for the given package + enquire.set_query(Xapian::Query("XP"+pkgname)); + Xapian::MSet matches = enquire.get_mset(0, 1); + Xapian::MSetIterator mi = matches.begin(); + if (mi == matches.end()) return Xapian::Query(); + Xapian::Document doc = mi.get_document(); + + // Return the query to get the list of similar documents + return Xapian::Query(Xapian::Query::OP_OR, doc.termlist_begin(), doc.termlist_end()); +} + void generateRelated(wibble::commandline::EptCacheOptions& opts, Consumer& output, int defaultXapianLimit = -1, int defaultXapianQualityCutoff = DEFAULT_QUALITY_CUTOFF) { @@ -767,10 +823,9 @@ void generateRelated(wibble::commandline::EptCacheOptions& opts, Consumer& outpu if (opts.out_cutoff->isSet()) defaultXapianQualityCutoff = opts.out_cutoff->intValue(); - TextSearch& textsearch = env().textsearch(); - Xapian::Enquire enq(textsearch.db()); + Xapian::Enquire enq(env().axi()); string name = opts.next(); - Xapian::Query query = textsearch.makeRelatedQuery(name); + Xapian::Query query = makeRelatedQuery(name); BlacklistDecider blacklister; debug("Excluding '%s' from the results\n", name.c_str()); blacklister.blacklist.insert(name); @@ -782,7 +837,7 @@ void generateRelated(wibble::commandline::EptCacheOptions& opts, Consumer& outpu blacklister.blacklist.insert(name); if (!env().apt().isValid(name)) throw wibble::exception::Consistency("reading package names", "package "+name+" does not exist"); - query = Xapian::Query(Xapian::Query::OP_AND, query, textsearch.makeRelatedQuery(name)); + query = Xapian::Query(Xapian::Query::OP_AND, query, makeRelatedQuery(name)); } //enq.register_match_decider("blacklist", &blacklister); //gen.filters.acquire(new filter::Blacklist(seen)); @@ -917,11 +972,7 @@ int main(int argc, const char* argv[]) { warn_non_root_on_error = true; - // Access the indexes to trigger a rebuild - env().init(); - - textsearch::AptTagsExtraIndexer atei; - textsearch::DebtagsExtraIndexer dei(env().debtags()); + mode_t prev_umask = umask(022); int exitcode; if (opts.out_quiet->boolValue()) @@ -935,22 +986,9 @@ int main(int argc, const char* argv[]) throw wibble::exception::Consistency("running update-apt-xapian-index", str.str()); } -#if 0 - vector<const TextSearch::ExtraIndexer*> extraIndexers; - - if (env().debtags().hasData()) - extraIndexers.push_back(&dei); - else - extraIndexers.push_back(&atei); - - // The TextSearch needs explicit reindexing - env().textsearch().rebuildIfNeeded(env().apt(), extraIndexers); -#endif - // TODO: if verbose, print the various data files used - //mode_t prev_umask = umask(022); - //umask(prev_umask); + umask(prev_umask); } // info // Show information about the data providers @@ -974,9 +1012,10 @@ int main(int argc, const char* argv[]) cout << "Popcon local scan: disabled. To enable it, install the popularity-contest package and" << endl << " enable it to run." << endl; - if (env().textsearch().hasData()) - if (env().textsearch().needsRebuild(env().apt())) - cout << "Xapian: enabled but not up to date. To update it, run 'ept-cache reindex' as root." << endl; + time_t xts = axi::timestamp(); + if (xts > 0) + if (xts < env().apt().timestamp()) + cout << "Xapian: enabled but not up to date. To update it, run 'update-apt-xapian-index' as root." << endl; else cout << "Xapian: enabled and up to date." << endl; else diff --git a/tools/ept-search.cpp b/tools/ept-search.cpp deleted file mode 100644 index 2fd7396..0000000 --- a/tools/ept-search.cpp +++ /dev/null @@ -1,43 +0,0 @@ -#include <ept/apt/apt.h> -#include <ept/textsearch/textsearch.h> - -using namespace ept; - -struct Main { - apt::Apt apt; - textsearch::TextSearch ts; - - std::vector< std::string > args; - - Main( int argc, char **argv ) - { - for ( int i = 1; i < argc; ++i ) - args.push_back( argv[i] ); - } - - int run() { - using namespace std; - Xapian::Enquire enq( ts.db() ); - enq.set_query( ts.makeORQuery( args.begin(), args.end() ) ); - - // Limit to 20 matches - Xapian::MSet matches = enq.get_mset(0, 20); - for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) - { - // Filter out results that apt doesn't know - if (!apt.isValid(i.get_document().get_data())) - { - cerr << " Not in apt database: " << i.get_document().get_data().c_str() << endl; - continue; - } - - cout << apt.rawRecord(i.get_document().get_data()) << endl; - } - - return 0; - } -}; - -int main( int argc, char **argv ) { - return Main( argc, argv ).run(); -} |
