diff options
Diffstat (limited to 'ept/popcon')
-rw-r--r-- | ept/popcon/local.cc | 168 | ||||
-rw-r--r-- | ept/popcon/local.h | 87 | ||||
-rw-r--r-- | ept/popcon/local.test.h | 111 | ||||
-rw-r--r-- | ept/popcon/maint/path.cc | 113 | ||||
-rw-r--r-- | ept/popcon/maint/path.h | 125 | ||||
-rw-r--r-- | ept/popcon/maint/popconindexer.cc | 248 | ||||
-rw-r--r-- | ept/popcon/maint/popconindexer.h | 75 | ||||
-rw-r--r-- | ept/popcon/maint/sourcedir.cc | 155 | ||||
-rw-r--r-- | ept/popcon/maint/sourcedir.h | 64 | ||||
-rw-r--r-- | ept/popcon/popcon.cc | 95 | ||||
-rw-r--r-- | ept/popcon/popcon.h | 153 | ||||
-rw-r--r-- | ept/popcon/popcon.test.h | 108 |
12 files changed, 0 insertions, 1502 deletions
diff --git a/ept/popcon/local.cc b/ept/popcon/local.cc deleted file mode 100644 index 308a9e4..0000000 --- a/ept/popcon/local.cc +++ /dev/null @@ -1,168 +0,0 @@ -/** @file - * @author Enrico Zini <enrico@enricozini.org> - * Correlate popcon data with local popcon information - */ - -/* - * Copyright (C) 2007 Enrico Zini <enrico@debian.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <ept/popcon/local.h> -#include <ept/popcon/popcon.h> -#include <ept/popcon/maint/path.h> - -#include <wibble/exception.h> - -#include <algorithm> -#include <fstream> -#include <cmath> - -//#include <iostream> - -using namespace std; - -namespace ept { -namespace popcon { - -// Split a string where there are separators -static vector<string> split(const std::string& str, char sep = ' ') -{ - vector<string> res; - size_t start = 0; - while (start < str.size()) - { - size_t end = str.find(sep, start); - if (end == string::npos) - { - res.push_back(str.substr(start)); - break; - } - else - { - res.push_back(str.substr(start, end-start)); - start = end + 1; - } - } - return res; -} - -// Reverse sort pairs by comparing their second element -struct secondsort -{ - bool operator()(const pair<string, float>& a, const pair<string, float>& b) const - { - if (a.second == b.second) - return a.first > b.first; - else - return a.second > b.second; - } -}; - -Local::Local(const std::string& file) -{ - m_timestamp = Path::timestamp(file); - if (m_timestamp == 0) - return; - - ifstream in; - in.open(file.c_str()); - if (!in.good()) - throw wibble::exception::File(file, "opening file for reading"); - - while (!in.eof()) - { - std::string line; - getline(in, line); - if (line.substr(0, 10) == "POPULARITY") - continue; - if (line.substr(0, 14) == "END-POPULARITY") - continue; - vector<string> data = split(line); - if (data.size() < 4) - continue; - if (data[3] == "<NOFILES>") - // This is an empty / virtual package - m_scores.insert(make_pair(data[2], 0.1)); - else if (data.size() == 4) - // Package normally in use - m_scores.insert(make_pair(data[2], 1.0)); - else if (data[4] == "<OLD>") - // Unused packages - m_scores.insert(make_pair(data[2], 0.3)); - else if (data[4] == "<RECENT-CTIME>") - // Recently installed packages - m_scores.insert(make_pair(data[2], 0.5)); - } -} - -float Local::score(const std::string& pkg) const -{ - std::map<std::string, float>::const_iterator i = m_scores.find(pkg); - if (i == m_scores.end()) - return 0; - else - return i->second; -} - -/** - * Return the TFIDF score of the package computed against the popcon - * information. - */ -float Local::tfidf(const Popcon& popcon, const std::string& pkg) const -{ - float popconScore = popcon.score(pkg); - //cerr << pkg << ": " << score(pkg) << " * log(" << (float)popcon.submissions() << " / " << popconScore << ") = " << score(pkg) * log((float)popcon.submissions() / popconScore) << endl; - if (popconScore == 0) - return 0; - else - return score(pkg) * log((float)popcon.submissions() / popconScore); - -} - -std::vector< std::pair<std::string, float> > Local::scores() const -{ - vector< pair<string, float> > res; - // Copy the scores in res - copy(m_scores.begin(), m_scores.end(), back_inserter(res)); - // Sort res by score - sort(res.begin(), res.end(), secondsort()); - return res; -} - -std::vector< std::pair<std::string, float> > Local::tfidf(const Popcon& popcon) const -{ - vector< pair<string, float> > res; - // Compute the tfidf scores and store them into res - for (std::map<std::string, float>::const_iterator i = m_scores.begin(); - i != m_scores.end(); ++i) - { - float popconScore = popcon.score(i->first); - if (popconScore == 0) - res.push_back(make_pair(i->first, 0.0f)); - else - res.push_back(make_pair(i->first, - i->second * log((float)popcon.submissions() / popconScore))); - } - // Sort res by score - sort(res.begin(), res.end(), secondsort()); - return res; -} - -} -} - -// vim:set ts=4 sw=4: diff --git a/ept/popcon/local.h b/ept/popcon/local.h deleted file mode 100644 index 257cc72..0000000 --- a/ept/popcon/local.h +++ /dev/null @@ -1,87 +0,0 @@ -#ifndef EPT_POPCON_LOCAL_H -#define EPT_POPCON_LOCAL_H - -/** @file - * @author Enrico Zini <enrico@enricozini.org> - * Correlate popcon data with local popcon information - */ - -/* - * Copyright (C) 2007 Enrico Zini <enrico@debian.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <string> -#include <vector> -#include <map> - -namespace ept { -namespace popcon { - -class Popcon; - -/** - * Access the results of the local daily popcon scan. - */ -class Local -{ -protected: - std::map<std::string, float> m_scores; - time_t m_timestamp; - -public: - Local(const std::string& file = std::string("/var/log/popularity-contest")); - - /// Get the timestamp of the local popcon information - time_t timestamp() const { return m_timestamp; } - - /// Return true if this data source has data, false if it's empty - bool hasData() const { return m_timestamp != 0; } - - /** - * Return the local score of the package - */ - float score(const std::string& pkg) const; - - /** - * Return the TFIDF score of the package computed against the popcon - * information. - * - * The TFIDF score is high when a package is representative of this system, - * that is, it is used in this system and not much used in other systems. - */ - float tfidf(const Popcon& popcon, const std::string& pkg) const; - - /** - * Read the local popcon vote and return the list of packages and their - * local scores, sorted by ascending score. - */ - std::vector< std::pair<std::string, float> > scores() const; - - /** - * Read the local popcon vote and return the list of packages and their - * TFIDF scores computed against the popcon information. - * - * The packages will be sorted by ascending score. - */ - std::vector< std::pair<std::string, float> > tfidf(const Popcon& popcon) const; -}; - -} -} - -// vim:set ts=4 sw=4: -#endif diff --git a/ept/popcon/local.test.h b/ept/popcon/local.test.h deleted file mode 100644 index 66d9919..0000000 --- a/ept/popcon/local.test.h +++ /dev/null @@ -1,111 +0,0 @@ -// -*- mode: c++; tab-width: 4; indent-tabs-mode: t -*- -/* - * popcon/local test - * - * Copyright (C) 2007 Enrico Zini <enrico@debian.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <ept/popcon/local.h> -#include <ept/popcon/popcon.h> -#include <ept/popcon/maint/path.h> - -#include <ept/test.h> - -using namespace std; -using namespace ept; -using namespace ept::popcon; - -struct TestPopconLocal -{ - Path::OverridePopconSourceDir odsd; - Path::OverridePopconIndexDir odid; - Path::OverridePopconUserSourceDir odusd; - Path::OverridePopconUserIndexDir oduid; - - Popcon popcon; - Local local; - - TestPopconLocal() - : odsd( TEST_ENV_DIR "popcon" ), - odid( TEST_ENV_DIR "popcon" ), - odusd( TEST_ENV_DIR "popcon" ), - oduid( TEST_ENV_DIR "popcon" ), - local( TEST_ENV_DIR "popcon/popularity-contest" ) - {} - - // Very basic access - Test basicAccess() - { - assert(local.score("apt") > 0); - assert(local.tfidf(popcon, "apt") > 0); - } - -#if 0 // mornfall: apparently left out by enrico, leaving as it is -// Check that every valid index is accessible -template<> template<> -void to::test< 2 >() -{ - for (size_t i = 0; i < popcon.size(); ++i) - { - //cerr << popcon.name(i) << " " << popcon.score(i) << endl; - assert(popcon.score(i) > 0); - } -} - -// Check that we can get a score for every package -template<> template<> -void to::test< 3 >() -{ - int has = 0; - for (Apt::iterator i = apt.begin(); i != apt.end(); ++i) - { - float score = popcon.score(*i); - if (score > 0) - ++has; - } - // At least 1000 packages should have a score - assert(has > 1000); -} - -// Check that scores are meaningful -template<> template<> -void to::test< 4 >() -{ - assert(popcon["apt"] > popcon["libapt-pkg-dev"]); -} - -// If there is no data, Popcon should work as if all scores were 0 -template<> template<> -void to::test<5>() -{ - Path::OverridePopconSourceDir odsd("./empty"); - Path::OverridePopconIndexDir odid("./empty"); - Path::OverridePopconUserSourceDir odusd("./empty"); - Path::OverridePopconUserIndexDir oduid("./empty"); - Popcon empty; - - assert_eq(empty.timestamp(), 0); - assert(!empty.hasData()); - - assert(empty.size() == 0); - assert(empty.score("apt") == 0.0); -} -#endif - -}; - -// vim:set ts=4 sw=4: diff --git a/ept/popcon/maint/path.cc b/ept/popcon/maint/path.cc deleted file mode 100644 index aef6314..0000000 --- a/ept/popcon/maint/path.cc +++ /dev/null @@ -1,113 +0,0 @@ -// -*- mode: c++; indent-tabs-mode: t -*- - -/** \file - * popcon paths - */ - -/* - * Copyright (C) 2005,2006,2007 Enrico Zini <enrico@debian.org>, Peter Rockai <me@mornfall.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <ept/config.h> -#include <ept/popcon/maint/path.h> - -#include <wibble/sys/fs.h> -#include <wibble/string.h> - -#include <sys/types.h> // getpwuid, getuid -#include <pwd.h> // getpwuid -#include <unistd.h> // getuid - -using namespace wibble; - -namespace ept { -namespace popcon { - -static std::string userdir() -{ - std::string rcdir; - - struct passwd* udata = getpwuid(getuid()); - rcdir = str::joinpath(udata->pw_dir, ".popcon"); - - return rcdir; -} - - -Path &Path::instance() { - if (!s_instance) { - s_instance = new Path; - instance().m_popconSourceDir = POPCON_DB_DIR; - instance().m_popconIndexDir = POPCON_DB_DIR; - instance().m_popconUserSourceDir = userdir(); - instance().m_popconUserIndexDir = userdir(); - } - return *s_instance; -} - -int Path::access( const std::string &s, int m ) { - return ::access( s.c_str(), m ); -} - -time_t Path::timestamp( const std::string& file ) { - return sys::fs::timestamp(file, 0); -} - -void Path::setPopconSourceDir( const std::string &s ) -{ - instance().m_popconSourceDir = s; -} -void Path::setPopconIndexDir( const std::string &s ) -{ - instance().m_popconIndexDir = s; -} -void Path::setPopconUserSourceDir( const std::string &s ) -{ - instance().m_popconUserSourceDir = s; -} -void Path::setPopconUserIndexDir( const std::string &s ) -{ - instance().m_popconUserIndexDir = s; -} - -std::string Path::popconSourceDir() { return instance().m_popconSourceDir; } -std::string Path::popconIndexDir() { return instance().m_popconIndexDir; } -std::string Path::popconUserSourceDir() { return instance().m_popconUserSourceDir; } -std::string Path::popconUserIndexDir() { return instance().m_popconUserIndexDir; } - -std::string Path::scores() { - return str::joinpath(popconIndexDir(), "scores"); -} - -std::string Path::scoresIndex() { - return str::joinpath(popconIndexDir(), "scores.idx"); -} - -std::string Path::userScores() { - return str::joinpath(popconUserIndexDir(), "scores"); -} - -std::string Path::userScoresIndex() { - return str::joinpath(popconUserIndexDir(), "scores.idx"); -} - -Path *Path::s_instance = 0; - -} -} - -// vim:set ts=4 sw=4: diff --git a/ept/popcon/maint/path.h b/ept/popcon/maint/path.h deleted file mode 100644 index cb4c31d..0000000 --- a/ept/popcon/maint/path.h +++ /dev/null @@ -1,125 +0,0 @@ -// -*- mode: c++; indent-tabs-mode: t -*- -/** \file - * popcon paths - */ - -/* - * Copyright (C) 2005,2006,2007 Enrico Zini <enrico@debian.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef EPT_POPCON_PATH_H -#define EPT_POPCON_PATH_H - -#include <string> - -namespace ept { -namespace popcon { - -/** - * Singleton class to configure and access the various Popcon paths - */ -class Path -{ -public: - static std::string scores(); - static std::string scoresIndex(); - static std::string userScores(); - static std::string userScoresIndex(); - - static std::string popconSourceDir(); - static std::string popconIndexDir(); - static std::string popconUserSourceDir(); - static std::string popconUserIndexDir(); - - // Directory where Popcon source data is found - static void setPopconSourceDir( const std::string &s ); - - // Directory where Popcon indexes are kept - static void setPopconIndexDir( const std::string &s ); - - // User-specific directory for Popcon source data - static void setPopconUserSourceDir( const std::string &s ); - - // User-specific directory for Popcon index data - static void setPopconUserIndexDir( const std::string &s ); - - static int access( const std::string &, int ); - static time_t timestamp( const std::string& ); - - // RAII-style classes to temporarily override directories - class OverridePopconSourceDir - { - std::string old; - public: - OverridePopconSourceDir(const std::string& path) : old(Path::popconSourceDir()) - { - Path::setPopconSourceDir(path); - } - ~OverridePopconSourceDir() { Path::setPopconSourceDir(old); } - }; - class OverridePopconIndexDir - { - std::string old; - public: - OverridePopconIndexDir(const std::string& path) : old(Path::popconIndexDir()) - { - Path::setPopconIndexDir(path); - } - ~OverridePopconIndexDir() { Path::setPopconIndexDir(old); } - }; - class OverridePopconUserSourceDir - { - std::string old; - public: - OverridePopconUserSourceDir(const std::string& path) : old(Path::popconUserSourceDir()) - { - Path::setPopconUserSourceDir(path); - } - ~OverridePopconUserSourceDir() { Path::setPopconUserSourceDir(old); } - }; - class OverridePopconUserIndexDir - { - std::string old; - public: - OverridePopconUserIndexDir(const std::string& path) : old(Path::popconUserIndexDir()) - { - Path::setPopconUserIndexDir(path); - } - ~OverridePopconUserIndexDir() { Path::setPopconUserIndexDir(old); } - }; -protected: - static Path *s_instance; - static Path &instance(); - - // Directory where Popcon source data is found - std::string m_popconSourceDir; - - // Directory where Popcon indexes are kept - std::string m_popconIndexDir; - - // User-specific directory for Popcon source data - std::string m_popconUserSourceDir; - - // User-specific directory for Popcon index data - std::string m_popconUserIndexDir; -}; - -} -} - -// vim:set ts=4 sw=4: -#endif diff --git a/ept/popcon/maint/popconindexer.cc b/ept/popcon/maint/popconindexer.cc deleted file mode 100644 index a8dea69..0000000 --- a/ept/popcon/maint/popconindexer.cc +++ /dev/null @@ -1,248 +0,0 @@ -#include <ept/popcon/popcon.h> -#include <ept/popcon/maint/popconindexer.h> -#include <ept/popcon/maint/path.h> - -#include <wibble/exception.h> -#include <wibble/sys/fs.h> - -#include <tagcoll/diskindex/mmap.h> - -#include <unistd.h> -#include <set> -#include <string> -#include <cstdio> -#include <cstring> - -using namespace std; - -namespace ept { -namespace popcon { - -template<typename STRUCT> -struct StructIndexer : public tagcoll::diskindex::MMapIndexer -{ - const STRUCT& data; - StructIndexer(const STRUCT& data) : data(data) {} - - int encodedSize() const { return sizeof(STRUCT); } - void encode(char* buf) const { *(STRUCT*)buf = data; } -}; - -/// MMapIndexer that indexes the package names -struct PopconGenerator : public tagcoll::diskindex::MMapIndexer -{ - // Sorted set of all available package names and data - std::map<std::string, Score> data; - - int encodedSize() const - { - int size = data.size() * sizeof(Score); - for (std::map<std::string, Score>::const_iterator i = data.begin(); - i != data.end(); ++i) - size += i->first.size() + 1; - return tagcoll::diskindex::MMap::align(size); - } - - void encode(char* buf) const - { - int pos = data.size() * sizeof(Score); - int idx = 0; - for (std::map<std::string, Score>::const_iterator i = data.begin(); - i != data.end(); ++i) - { - ((Score*)buf)[idx] = i->second; - ((Score*)buf)[idx].offset = pos; - memcpy(buf + pos, i->first.c_str(), i->first.size() + 1); - pos += i->first.size() + 1; - ++idx; - } - } -}; - - -PopconIndexer::PopconIndexer() - : mainSource(Path::popconSourceDir()), - userSource(Path::popconUserSourceDir()) -{ - rescan(); -} - -void PopconIndexer::rescan() -{ - ts_main_src = mainSource.timestamp(); - ts_user_src = userSource.timestamp(); - ts_main_sco = Path::timestamp(Path::scores()); - ts_user_sco = Path::timestamp(Path::userScores()); - ts_main_idx = Path::timestamp(Path::scoresIndex()); - ts_user_idx = Path::timestamp(Path::userScoresIndex()); -} - -bool PopconIndexer::needsRebuild() const -{ - // If there are no indexes of any kind, then we need rebuilding - if (ts_user_sco == 0 || ts_main_sco == 0 || ts_user_idx == 0 && ts_main_idx == 0) - return true; - - // If the user index is ok, then we are fine - if (ts_user_sco >= sourceTimestamp() && ts_user_idx >= sourceTimestamp()) - return false; - - // If there are user sources, then we cannot use the system index - if (ts_user_src > 0) - return true; - - // If there are no user sources, then we can fallback on the system - // indexes in case the user indexes are not up to date - if (ts_main_sco >= sourceTimestamp() && ts_main_idx >= sourceTimestamp()) - return false; - - return true; -} - -bool PopconIndexer::userIndexIsRedundant() const -{ - // If there is no user index, then it is not redundant - if (ts_user_idx == 0) - return false; - - // If the system index is not up to date, then the user index is not - // redundant - if (ts_main_idx < sourceTimestamp()) - return false; - - return true; -} - -bool PopconIndexer::rebuild(const std::string& scofname, const std::string& idxfname) -{ - PopconGenerator gen; - InfoStruct is; - is.submissions = 0; - if (!mainSource.readScores(gen.data, is.submissions)) - userSource.readScores(gen.data, is.submissions); - if (gen.data.empty()) - return false; - - StructIndexer<InfoStruct> infoStruct(is); - - // Create the index - tagcoll::diskindex::MasterMMapIndexer master(idxfname); - master.append(gen); - master.append(infoStruct); - master.commit(); - -// for (map<string, Score>::const_iterator i = gen.data.begin(); i != gen.data.end(); ++i) -// { -// fprintf(stderr, "%s %d %f\n", i->first.c_str(), i->second.offset, i->second.score); -// } - - // Create the score file - FILE* out = fopen(scofname.c_str(), "wt"); - if (out == NULL) - throw wibble::exception::File(scofname, "opening and truncating file for writing"); - for (map<string, Score>::const_iterator i = gen.data.begin(); - i != gen.data.end(); ++i) - { - fprintf(out, "%s %f\n", i->first.c_str(), i->second.score); - } - fclose(out); - return true; -} - -bool PopconIndexer::rebuildIfNeeded() -{ - if (needsRebuild()) - { - // Decide if we rebuild the user index or the system index - if (Path::access(Path::popconIndexDir(), W_OK) == 0) - { - // Since we can write on the system index directory, we rebuild - // the system index - if (!rebuild(Path::scores(), Path::scoresIndex())) - return false; - ts_main_sco = Path::timestamp(Path::scores()); - ts_main_idx = Path::timestamp(Path::scoresIndex()); - if (Path::scores() == Path::userScores()) - ts_user_sco = ts_main_sco; - if (Path::scoresIndex() == Path::userScoresIndex()) - ts_user_idx = ts_main_idx; - } else { - wibble::sys::fs::mkFilePath(Path::userScores()); - wibble::sys::fs::mkFilePath(Path::userScoresIndex()); - if (!rebuild(Path::userScores(), Path::userScoresIndex())) - return false; - ts_user_sco = Path::timestamp(Path::userScores()); - ts_user_idx = Path::timestamp(Path::userScoresIndex()); - } - return true; - } - return false; -} - -bool PopconIndexer::deleteRedundantUserIndex() -{ - if (userIndexIsRedundant()) - { - // Delete the user indexes if they exist - if (Path::scores() != Path::userScores()) - { - unlink(Path::userScores().c_str()); - ts_user_sco = 0; - } - if (Path::scoresIndex() != Path::userScoresIndex()) - { - unlink(Path::userScoresIndex().c_str()); - ts_user_idx = 0; - } - return true; - } - return false; -} - -bool PopconIndexer::getUpToDatePopcon(std::string& scofname, std::string& idxfname) -{ - // If there are no indexes of any kind, then we have nothing to return - if (ts_user_sco == 0 && ts_main_sco == 0 && ts_user_idx == 0 && ts_main_idx == 0) - return false; - - // If the user index is up to date, use it - if (ts_user_sco >= sourceTimestamp() && - ts_user_idx >= sourceTimestamp()) - { - scofname = Path::userScores(); - idxfname = Path::userScoresIndex(); - return true; - } - - // If the user index is not up to date and we have user sources, we cannot - // fall back to the system index - if (ts_user_src != 0) - return false; - - // Fallback to the system index - if (ts_main_sco >= sourceTimestamp() && - ts_main_idx >= sourceTimestamp()) - { - scofname = Path::scores(); - idxfname = Path::scoresIndex(); - return true; - } - - return false; -} - - -bool PopconIndexer::obtainWorkingPopcon(std::string& scofname, std::string& idxfname) -{ - PopconIndexer indexer; - - indexer.rebuildIfNeeded(); - indexer.deleteRedundantUserIndex(); - return indexer.getUpToDatePopcon(scofname, idxfname); -} - - -} -} - -// vim:set ts=4 sw=4: diff --git a/ept/popcon/maint/popconindexer.h b/ept/popcon/maint/popconindexer.h deleted file mode 100644 index 86789cf..0000000 --- a/ept/popcon/maint/popconindexer.h +++ /dev/null @@ -1,75 +0,0 @@ -#ifndef EPT_DEBTAGS_PKGIDINDEXER_H -#define EPT_DEBTAGS_PKGIDINDEXER_H - -/** @file - * @author Enrico Zini <enrico@enricozini.org> - * Rebuild and maintain the map from package IDs to package names - */ - -/* - * Copyright (C) 2003-2007 Enrico Zini <enrico@debian.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <ept/popcon/maint/sourcedir.h> -#include <string> - -namespace ept { -namespace popcon { - -struct InfoStruct -{ - size_t submissions; -}; - -struct PopconIndexer -{ - SourceDir mainSource; - SourceDir userSource; - time_t ts_main_src; - time_t ts_user_src; - time_t ts_main_sco; - time_t ts_user_sco; - time_t ts_main_idx; - time_t ts_user_idx; - - time_t sourceTimestamp() const - { - time_t res = ts_main_src; - if (ts_user_src > res) res = ts_user_src; - return res; - - } - bool needsRebuild() const; - bool rebuild(const std::string& scofname, const std::string& idxfname); - bool rebuildIfNeeded(); - bool getUpToDatePopcon(std::string& scofname, std::string& idxfname); - - bool userIndexIsRedundant() const; - bool deleteRedundantUserIndex(); - - void rescan(); - - PopconIndexer(); - - static bool obtainWorkingPopcon(std::string& scofname, std::string& idxfname); -}; - -} -} - -// vim:set ts=4 sw=4: -#endif diff --git a/ept/popcon/maint/sourcedir.cc b/ept/popcon/maint/sourcedir.cc deleted file mode 100644 index c908edd..0000000 --- a/ept/popcon/maint/sourcedir.cc +++ /dev/null @@ -1,155 +0,0 @@ -#include <ept/popcon/maint/sourcedir.h> -#include <ept/popcon/maint/path.h> - -#include <wibble/string.h> -#include <wibble/sys/fs.h> - -#include <tagcoll/input/zlib.h> -#include <tagcoll/input/stdio.h> - -#include <cstdlib> - -using namespace std; -using namespace wibble; - -namespace ept { -namespace popcon { - -SourceDir::SourceDir(const std::string& path) - : sys::fs::Directory(path) -{ -} -SourceDir::~SourceDir() -{ -} - -SourceDir::FileType SourceDir::fileType(const std::string& name) -{ - if (name[0] == '.') return SKIP; - - if (name == "all-popcon-results.txt") return RAW; - if (name == "all-popcon-results.txt.gz") return RAWGZ; - - return SKIP; -} - -time_t SourceDir::timestamp() -{ - if (!exists()) return 0; - - time_t max = 0; - for (const_iterator d = begin(); d != end(); ++d) - { - string name = *d; - FileType type = fileType(name); - if (type == SKIP) continue; - - time_t ts = Path::timestamp(str::joinpath(m_path, name)); - if (ts > max) max = ts; - } - - return max; -} - -bool readLine(tagcoll::input::Input& in, string& str) -{ - str.clear(); - int c; - while ((c = in.nextChar()) != tagcoll::input::Input::Eof && c != '\n') - str += c; - return c != tagcoll::input::Input::Eof; -} - -static void parseScores(tagcoll::input::Input& in, map<std::string, Score>& out, size_t& submissions) -{ - string line; - while (readLine(in, line)) - { - if (line.size() < 10) - continue; - if (line.substr(0, 13) == "Submissions: ") - { - submissions = strtoul(line.substr(13).c_str(), 0, 10); - continue; - } - if (line.substr(0, 9) != "Package: ") - continue; - size_t start = 9; - size_t end = line.find(' ', start); - if (end == string::npos) - continue; - string name = line.substr(start, end-start); - // Skip packages not in the apt index - //if (!apt.isValid(name)) - //continue; - - start = line.find_first_not_of(' ', end); - if (start == string::npos) continue; - end = line.find(' ', start); - if (end == string::npos) continue; - string vote = line.substr(start, end-start); - - start = line.find_first_not_of(' ', end); - if (start == string::npos) continue; - end = line.find(' ', start); - if (end == string::npos) continue; - string old = line.substr(start, end-start); - - start = line.find_first_not_of(' ', end); - if (start == string::npos) continue; - end = line.find(' ', start); - if (end == string::npos) continue; - string recent = line.substr(start, end-start); - - start = line.find_first_not_of(' ', end); - if (start == string::npos) continue; - end = line.find(' ', start); - if (end == string::npos) end = line.size(); - string nofiles = line.substr(start, end-start); - - float score = (float)strtoul(vote.c_str(), NULL, 10) - + (float)strtoul(recent.c_str(), NULL, 10) * 0.5f - + (float)strtoul(old.c_str(), NULL, 10) * 0.3f - + (float)strtoul(nofiles.c_str(), NULL, 10) * 0.8f; - - if (score > 0) - out.insert(make_pair(name, Score(score))); - } -} - -bool SourceDir::readScores(map<std::string, Score>& out, size_t& submissions) -{ - if (!exists()) return false; - - bool done = false; - - for (const_iterator d = begin(); d != end(); ++d) - { - string name = *d; - FileType type = fileType(name); - if (type == RAW) - { - // Read uncompressed data - tagcoll::input::Stdio in(str::joinpath(m_path, name)); - - // Read the scores - parseScores(in, out, submissions); - done = true; - } - else if (type == RAWGZ) - { - // Read compressed data - tagcoll::input::Zlib in(str::joinpath(m_path, name)); - - // Read the scores - parseScores(in, out, submissions); - done = true; - } - } - return done; -} - -} -} - -// vim:set ts=4 sw=4: diff --git a/ept/popcon/maint/sourcedir.h b/ept/popcon/maint/sourcedir.h deleted file mode 100644 index eccb6f1..0000000 --- a/ept/popcon/maint/sourcedir.h +++ /dev/null @@ -1,64 +0,0 @@ -#ifndef EPT_POPCON_SOURCEDIR_H -#define EPT_POPCON_SOURCEDIR_H - -/** @file - * @author Enrico Zini <enrico@enricozini.org> - * Popcon data source directory access - */ - -/* - * Copyright (C) 2003,2004,2005,2006,2007 Enrico Zini <enrico@debian.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <ept/popcon/popcon.h> -#include <wibble/sys/fs.h> -#include <string> -#include <map> - -namespace ept { -namespace popcon { - -/** - * Access a directory containing Debtags data files - */ -class SourceDir : public wibble::sys::fs::Directory -{ -protected: - enum FileType { SKIP, RAW, RAWGZ }; - - // Check if a file name is a tag file, a vocabulary file or a file to skip. - // Please notice that it works on file names, not paths. - FileType fileType(const std::string& name); - -public: - SourceDir(const std::string& path); - ~SourceDir(); - - /// Return the time of the newest file in the source directory - time_t timestamp(); - - /** - * Read the tag files in the directory and output their content to the map - */ - bool readScores(std::map<std::string, Score>& out, size_t& submissions); -}; - -} -} - -// vim:set ts=4 sw=4: -#endif diff --git a/ept/popcon/popcon.cc b/ept/popcon/popcon.cc deleted file mode 100644 index 2ce9e53..0000000 --- a/ept/popcon/popcon.cc +++ /dev/null @@ -1,95 +0,0 @@ -// -*- mode: c++; tab-width: 4; indent-tabs-mode: t -*- - -/** @file - * @author Enrico Zini <enrico@enricozini.org> - * Quick map from package IDs to package names - */ - -/* - * Copyright (C) 2007 Enrico Zini <enrico@debian.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <ept/popcon/popcon.h> -#include <ept/popcon/maint/popconindexer.h> -#include <ept/popcon/maint/path.h> - -//#include <iostream> - -using namespace std; - -namespace ept { -namespace popcon { - -size_t Popcon::GeneralInfo::submissions() const -{ - if (!m_buf) return 0; - return ((InfoStruct*)m_buf)->submissions; -} - -Popcon::Popcon() -{ - std::string scofname, idxfname; - - if (!PopconIndexer::obtainWorkingPopcon(scofname, idxfname)) - { - m_timestamp = 0; - return; - } - - //cerr << "GOT " << scofname << " " << idxfname << endl; - - m_timestamp = Path::timestamp(idxfname); - - mastermmap.init(idxfname); - tagcoll::diskindex::MMap::init(mastermmap, 0); - - m_info.init(mastermmap, 1); - - //cerr << "SIZE " << size() << endl; - //for (size_t i = 0; i < size(); ++i) - //{ - // cerr << "EL " << i << ": " << ((Score*)m_buf)[i].offset << " " << ((Score*)m_buf)[i].score << endl; - //} -} - -float Popcon::scoreByName(const std::string& name) const -{ - // Binary search the index to find the package ID - int begin, end; - - /* Binary search */ - begin = -1, end = size(); - while (end - begin > 1) - { - int cur = (end + begin) / 2; - if (this->name(cur) > name) - end = cur; - else - begin = cur; - } - - if (begin == -1 || this->name(begin) != name) - //throw NotFoundException(string("looking for the ID of string ") + str); - return 0; - else - return score(begin); -} - -} -} - -// vim:set ts=4 sw=4: diff --git a/ept/popcon/popcon.h b/ept/popcon/popcon.h deleted file mode 100644 index 684f98a..0000000 --- a/ept/popcon/popcon.h +++ /dev/null @@ -1,153 +0,0 @@ -// -*- mode: c++; tab-width: 4; indent-tabs-mode: t -*- -#ifndef EPT_POPCON_POPCON_H -#define EPT_POPCON_POPCON_H - -/** @file - * @author Enrico Zini <enrico@enricozini.org> - * Access popcon data - */ - -/* - * Copyright (C) 2007 Enrico Zini <enrico@debian.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <tagcoll/diskindex/mmap.h> -#include <string> - -namespace ept { -namespace apt { -class Apt; -} - -namespace popcon { - -/** - * Store the score information in the popcon cache. - * - * Currently, this is only one float; more can be added in the future. - */ -class Score -{ -protected: - unsigned offset; - -public: - float score; - - Score(float score) : offset(offset), score(score) {} - - friend class Popcon; - friend class PopconIndexer; - friend class PopconGenerator; -}; - -/** - * Maps Packages to IDs and vice-versa. - * - * This is used in building the Debtags fast index, which works representing - * tags and packages as int IDs. - * - * Index building works like this: - * 1. The file all-popcon-results.txt.gz is downloaded from - * http://popcon.debian.org/all-popcon-results.txt.gz - * 2. The file is put in either ~/.popcon/all-popcon-results.txt.gz - * or in /var/lib/popcon/all-popcon-results.txt.gz - * 3. If the file is newer than the index, it will be automatically used to - * recompute the scores and rebuild the index. - */ -class Popcon : public tagcoll::diskindex::MMap -{ - struct GeneralInfo : public tagcoll::diskindex::MMap - { - size_t submissions() const; - }; - - tagcoll::diskindex::MasterMMap mastermmap; - time_t m_timestamp; - - GeneralInfo m_info; - - /// Get the score structure by index - const Score* structByIndex(size_t idx) const - { - if (idx >= 0 && idx < size()) - return (Score*)m_buf + idx; - return 0; - } - -public: - Popcon(); - - /// Get the timestamp of when the index was last updated - time_t timestamp() const { return m_timestamp; } - - /// Return true if this data source has data, false if it's empty - bool hasData() const { return m_timestamp != 0; } - - /// Return the total number of popcon submissions - size_t submissions() const { return m_info.submissions(); } - - /// Get the number of packages in the index - size_t size() const - { - if (m_buf) - return ((Score*)m_buf)->offset / sizeof(Score); - else - return 0; - } - - /** - * Get a package name by index - * - * If the index is not valid, returns the empty string. - */ - std::string name(size_t idx) const - { - const Score* s = structByIndex(idx); - if (s == 0) return std::string(); - return std::string(m_buf + s->offset); - } - - /// Get the score by index - float scoreByIndex(size_t idx) const - { - const Score* s = structByIndex(idx); - if (!s) return 0; - return s->score; - } - - /// Get the score structure by package name - float scoreByName(const std::string& name) const; - - /// Get the score by index - float score(size_t idx) const { return scoreByIndex(idx); } - - /// Get the score by index - float operator[](int idx) const { return scoreByIndex(idx); } - - /// Get the score by name - float score(const std::string& name) const { return scoreByName(name); } - - /// Get the score structure by package name - float operator[](const std::string& name) const { return scoreByName(name); } -}; - -} -} - -// vim:set ts=4 sw=4: -#endif diff --git a/ept/popcon/popcon.test.h b/ept/popcon/popcon.test.h deleted file mode 100644 index 4509b85..0000000 --- a/ept/popcon/popcon.test.h +++ /dev/null @@ -1,108 +0,0 @@ -// -*- mode: c++; tab-width: 4; indent-tabs-mode: t -*- -/* - * popcon test - * - * Copyright (C) 2007 Enrico Zini <enrico@debian.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <ept/popcon/popcon.h> -#include <ept/popcon/maint/path.h> -#include <ept/apt/apt.h> -#include <set> - -#include <ept/test.h> - -using namespace std; -using namespace ept; -using namespace ept::popcon; -using namespace ept::apt; - -struct TestPopcon -{ - popcon::Path::OverridePopconSourceDir odsd; - popcon::Path::OverridePopconIndexDir odid; - popcon::Path::OverridePopconUserSourceDir odusd; - popcon::Path::OverridePopconUserIndexDir oduid; - - Apt apt; - Popcon popcon; - - TestPopcon() - : odsd( TEST_ENV_DIR "popcon" ), - odid( TEST_ENV_DIR "popcon" ), - odusd( TEST_ENV_DIR "popcon" ), - oduid( TEST_ENV_DIR "popcon" ) - {} - - Test basicAccess() - { - assert_eq(popcon.submissions(), 52024); - assert(popcon.size() > 0); - assert(popcon.score(0) > 0); - assert(!popcon.name(0).empty()); - } - - // Check that every valid index is accessible - Test accessibility() - { - for (size_t i = 0; i < popcon.size(); ++i) - { - //cerr << popcon.name(i) << " " << popcon.score(i) << endl; - assert(popcon.score(i) > 0); - } - } - - // Check that we can get a score for every package - Test haveScores() - { - int has = 0; - for (Apt::iterator i = apt.begin(); i != apt.end(); ++i) - { - float score = popcon.score(*i); - if (score > 0) - ++has; - } - // At least 1000 packages should have a score - assert(has > 1000); - } - - // Check that scores are meaningful - Test validScores() - { - assert(popcon["apt"] > popcon["libapt-pkg-dev"]); - } - - // If there is no data, Popcon should work as if all scores were 0 - Test fallbackValues() - { - popcon::Path::OverridePopconSourceDir odsd("./empty"); - popcon::Path::OverridePopconIndexDir odid("./empty"); - popcon::Path::OverridePopconUserSourceDir odusd("./empty"); - popcon::Path::OverridePopconUserIndexDir oduid("./empty"); - Popcon empty; - - assert_eq(empty.timestamp(), 0); - assert(!empty.hasData()); - - assert_eq(empty.submissions(), 0); - assert(empty.size() == 0); - assert(empty.score("apt") == 0.0); - } - -}; - -// vim:set ts=4 sw=4: |