summaryrefslogtreecommitdiff
path: root/ept/popcon
diff options
context:
space:
mode:
Diffstat (limited to 'ept/popcon')
-rw-r--r--ept/popcon/local.cc168
-rw-r--r--ept/popcon/local.h87
-rw-r--r--ept/popcon/local.test.h111
-rw-r--r--ept/popcon/maint/path.cc113
-rw-r--r--ept/popcon/maint/path.h125
-rw-r--r--ept/popcon/maint/popconindexer.cc248
-rw-r--r--ept/popcon/maint/popconindexer.h75
-rw-r--r--ept/popcon/maint/sourcedir.cc155
-rw-r--r--ept/popcon/maint/sourcedir.h64
-rw-r--r--ept/popcon/popcon.cc95
-rw-r--r--ept/popcon/popcon.h153
-rw-r--r--ept/popcon/popcon.test.h108
12 files changed, 0 insertions, 1502 deletions
diff --git a/ept/popcon/local.cc b/ept/popcon/local.cc
deleted file mode 100644
index 308a9e4..0000000
--- a/ept/popcon/local.cc
+++ /dev/null
@@ -1,168 +0,0 @@
-/** @file
- * @author Enrico Zini <enrico@enricozini.org>
- * Correlate popcon data with local popcon information
- */
-
-/*
- * Copyright (C) 2007 Enrico Zini <enrico@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <ept/popcon/local.h>
-#include <ept/popcon/popcon.h>
-#include <ept/popcon/maint/path.h>
-
-#include <wibble/exception.h>
-
-#include <algorithm>
-#include <fstream>
-#include <cmath>
-
-//#include <iostream>
-
-using namespace std;
-
-namespace ept {
-namespace popcon {
-
-// Split a string where there are separators
-static vector<string> split(const std::string& str, char sep = ' ')
-{
- vector<string> res;
- size_t start = 0;
- while (start < str.size())
- {
- size_t end = str.find(sep, start);
- if (end == string::npos)
- {
- res.push_back(str.substr(start));
- break;
- }
- else
- {
- res.push_back(str.substr(start, end-start));
- start = end + 1;
- }
- }
- return res;
-}
-
-// Reverse sort pairs by comparing their second element
-struct secondsort
-{
- bool operator()(const pair<string, float>& a, const pair<string, float>& b) const
- {
- if (a.second == b.second)
- return a.first > b.first;
- else
- return a.second > b.second;
- }
-};
-
-Local::Local(const std::string& file)
-{
- m_timestamp = Path::timestamp(file);
- if (m_timestamp == 0)
- return;
-
- ifstream in;
- in.open(file.c_str());
- if (!in.good())
- throw wibble::exception::File(file, "opening file for reading");
-
- while (!in.eof())
- {
- std::string line;
- getline(in, line);
- if (line.substr(0, 10) == "POPULARITY")
- continue;
- if (line.substr(0, 14) == "END-POPULARITY")
- continue;
- vector<string> data = split(line);
- if (data.size() < 4)
- continue;
- if (data[3] == "<NOFILES>")
- // This is an empty / virtual package
- m_scores.insert(make_pair(data[2], 0.1));
- else if (data.size() == 4)
- // Package normally in use
- m_scores.insert(make_pair(data[2], 1.0));
- else if (data[4] == "<OLD>")
- // Unused packages
- m_scores.insert(make_pair(data[2], 0.3));
- else if (data[4] == "<RECENT-CTIME>")
- // Recently installed packages
- m_scores.insert(make_pair(data[2], 0.5));
- }
-}
-
-float Local::score(const std::string& pkg) const
-{
- std::map<std::string, float>::const_iterator i = m_scores.find(pkg);
- if (i == m_scores.end())
- return 0;
- else
- return i->second;
-}
-
-/**
- * Return the TFIDF score of the package computed against the popcon
- * information.
- */
-float Local::tfidf(const Popcon& popcon, const std::string& pkg) const
-{
- float popconScore = popcon.score(pkg);
- //cerr << pkg << ": " << score(pkg) << " * log(" << (float)popcon.submissions() << " / " << popconScore << ") = " << score(pkg) * log((float)popcon.submissions() / popconScore) << endl;
- if (popconScore == 0)
- return 0;
- else
- return score(pkg) * log((float)popcon.submissions() / popconScore);
-
-}
-
-std::vector< std::pair<std::string, float> > Local::scores() const
-{
- vector< pair<string, float> > res;
- // Copy the scores in res
- copy(m_scores.begin(), m_scores.end(), back_inserter(res));
- // Sort res by score
- sort(res.begin(), res.end(), secondsort());
- return res;
-}
-
-std::vector< std::pair<std::string, float> > Local::tfidf(const Popcon& popcon) const
-{
- vector< pair<string, float> > res;
- // Compute the tfidf scores and store them into res
- for (std::map<std::string, float>::const_iterator i = m_scores.begin();
- i != m_scores.end(); ++i)
- {
- float popconScore = popcon.score(i->first);
- if (popconScore == 0)
- res.push_back(make_pair(i->first, 0.0f));
- else
- res.push_back(make_pair(i->first,
- i->second * log((float)popcon.submissions() / popconScore)));
- }
- // Sort res by score
- sort(res.begin(), res.end(), secondsort());
- return res;
-}
-
-}
-}
-
-// vim:set ts=4 sw=4:
diff --git a/ept/popcon/local.h b/ept/popcon/local.h
deleted file mode 100644
index 257cc72..0000000
--- a/ept/popcon/local.h
+++ /dev/null
@@ -1,87 +0,0 @@
-#ifndef EPT_POPCON_LOCAL_H
-#define EPT_POPCON_LOCAL_H
-
-/** @file
- * @author Enrico Zini <enrico@enricozini.org>
- * Correlate popcon data with local popcon information
- */
-
-/*
- * Copyright (C) 2007 Enrico Zini <enrico@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <string>
-#include <vector>
-#include <map>
-
-namespace ept {
-namespace popcon {
-
-class Popcon;
-
-/**
- * Access the results of the local daily popcon scan.
- */
-class Local
-{
-protected:
- std::map<std::string, float> m_scores;
- time_t m_timestamp;
-
-public:
- Local(const std::string& file = std::string("/var/log/popularity-contest"));
-
- /// Get the timestamp of the local popcon information
- time_t timestamp() const { return m_timestamp; }
-
- /// Return true if this data source has data, false if it's empty
- bool hasData() const { return m_timestamp != 0; }
-
- /**
- * Return the local score of the package
- */
- float score(const std::string& pkg) const;
-
- /**
- * Return the TFIDF score of the package computed against the popcon
- * information.
- *
- * The TFIDF score is high when a package is representative of this system,
- * that is, it is used in this system and not much used in other systems.
- */
- float tfidf(const Popcon& popcon, const std::string& pkg) const;
-
- /**
- * Read the local popcon vote and return the list of packages and their
- * local scores, sorted by ascending score.
- */
- std::vector< std::pair<std::string, float> > scores() const;
-
- /**
- * Read the local popcon vote and return the list of packages and their
- * TFIDF scores computed against the popcon information.
- *
- * The packages will be sorted by ascending score.
- */
- std::vector< std::pair<std::string, float> > tfidf(const Popcon& popcon) const;
-};
-
-}
-}
-
-// vim:set ts=4 sw=4:
-#endif
diff --git a/ept/popcon/local.test.h b/ept/popcon/local.test.h
deleted file mode 100644
index 66d9919..0000000
--- a/ept/popcon/local.test.h
+++ /dev/null
@@ -1,111 +0,0 @@
-// -*- mode: c++; tab-width: 4; indent-tabs-mode: t -*-
-/*
- * popcon/local test
- *
- * Copyright (C) 2007 Enrico Zini <enrico@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <ept/popcon/local.h>
-#include <ept/popcon/popcon.h>
-#include <ept/popcon/maint/path.h>
-
-#include <ept/test.h>
-
-using namespace std;
-using namespace ept;
-using namespace ept::popcon;
-
-struct TestPopconLocal
-{
- Path::OverridePopconSourceDir odsd;
- Path::OverridePopconIndexDir odid;
- Path::OverridePopconUserSourceDir odusd;
- Path::OverridePopconUserIndexDir oduid;
-
- Popcon popcon;
- Local local;
-
- TestPopconLocal()
- : odsd( TEST_ENV_DIR "popcon" ),
- odid( TEST_ENV_DIR "popcon" ),
- odusd( TEST_ENV_DIR "popcon" ),
- oduid( TEST_ENV_DIR "popcon" ),
- local( TEST_ENV_DIR "popcon/popularity-contest" )
- {}
-
- // Very basic access
- Test basicAccess()
- {
- assert(local.score("apt") > 0);
- assert(local.tfidf(popcon, "apt") > 0);
- }
-
-#if 0 // mornfall: apparently left out by enrico, leaving as it is
-// Check that every valid index is accessible
-template<> template<>
-void to::test< 2 >()
-{
- for (size_t i = 0; i < popcon.size(); ++i)
- {
- //cerr << popcon.name(i) << " " << popcon.score(i) << endl;
- assert(popcon.score(i) > 0);
- }
-}
-
-// Check that we can get a score for every package
-template<> template<>
-void to::test< 3 >()
-{
- int has = 0;
- for (Apt::iterator i = apt.begin(); i != apt.end(); ++i)
- {
- float score = popcon.score(*i);
- if (score > 0)
- ++has;
- }
- // At least 1000 packages should have a score
- assert(has > 1000);
-}
-
-// Check that scores are meaningful
-template<> template<>
-void to::test< 4 >()
-{
- assert(popcon["apt"] > popcon["libapt-pkg-dev"]);
-}
-
-// If there is no data, Popcon should work as if all scores were 0
-template<> template<>
-void to::test<5>()
-{
- Path::OverridePopconSourceDir odsd("./empty");
- Path::OverridePopconIndexDir odid("./empty");
- Path::OverridePopconUserSourceDir odusd("./empty");
- Path::OverridePopconUserIndexDir oduid("./empty");
- Popcon empty;
-
- assert_eq(empty.timestamp(), 0);
- assert(!empty.hasData());
-
- assert(empty.size() == 0);
- assert(empty.score("apt") == 0.0);
-}
-#endif
-
-};
-
-// vim:set ts=4 sw=4:
diff --git a/ept/popcon/maint/path.cc b/ept/popcon/maint/path.cc
deleted file mode 100644
index aef6314..0000000
--- a/ept/popcon/maint/path.cc
+++ /dev/null
@@ -1,113 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: t -*-
-
-/** \file
- * popcon paths
- */
-
-/*
- * Copyright (C) 2005,2006,2007 Enrico Zini <enrico@debian.org>, Peter Rockai <me@mornfall.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <ept/config.h>
-#include <ept/popcon/maint/path.h>
-
-#include <wibble/sys/fs.h>
-#include <wibble/string.h>
-
-#include <sys/types.h> // getpwuid, getuid
-#include <pwd.h> // getpwuid
-#include <unistd.h> // getuid
-
-using namespace wibble;
-
-namespace ept {
-namespace popcon {
-
-static std::string userdir()
-{
- std::string rcdir;
-
- struct passwd* udata = getpwuid(getuid());
- rcdir = str::joinpath(udata->pw_dir, ".popcon");
-
- return rcdir;
-}
-
-
-Path &Path::instance() {
- if (!s_instance) {
- s_instance = new Path;
- instance().m_popconSourceDir = POPCON_DB_DIR;
- instance().m_popconIndexDir = POPCON_DB_DIR;
- instance().m_popconUserSourceDir = userdir();
- instance().m_popconUserIndexDir = userdir();
- }
- return *s_instance;
-}
-
-int Path::access( const std::string &s, int m ) {
- return ::access( s.c_str(), m );
-}
-
-time_t Path::timestamp( const std::string& file ) {
- return sys::fs::timestamp(file, 0);
-}
-
-void Path::setPopconSourceDir( const std::string &s )
-{
- instance().m_popconSourceDir = s;
-}
-void Path::setPopconIndexDir( const std::string &s )
-{
- instance().m_popconIndexDir = s;
-}
-void Path::setPopconUserSourceDir( const std::string &s )
-{
- instance().m_popconUserSourceDir = s;
-}
-void Path::setPopconUserIndexDir( const std::string &s )
-{
- instance().m_popconUserIndexDir = s;
-}
-
-std::string Path::popconSourceDir() { return instance().m_popconSourceDir; }
-std::string Path::popconIndexDir() { return instance().m_popconIndexDir; }
-std::string Path::popconUserSourceDir() { return instance().m_popconUserSourceDir; }
-std::string Path::popconUserIndexDir() { return instance().m_popconUserIndexDir; }
-
-std::string Path::scores() {
- return str::joinpath(popconIndexDir(), "scores");
-}
-
-std::string Path::scoresIndex() {
- return str::joinpath(popconIndexDir(), "scores.idx");
-}
-
-std::string Path::userScores() {
- return str::joinpath(popconUserIndexDir(), "scores");
-}
-
-std::string Path::userScoresIndex() {
- return str::joinpath(popconUserIndexDir(), "scores.idx");
-}
-
-Path *Path::s_instance = 0;
-
-}
-}
-
-// vim:set ts=4 sw=4:
diff --git a/ept/popcon/maint/path.h b/ept/popcon/maint/path.h
deleted file mode 100644
index cb4c31d..0000000
--- a/ept/popcon/maint/path.h
+++ /dev/null
@@ -1,125 +0,0 @@
-// -*- mode: c++; indent-tabs-mode: t -*-
-/** \file
- * popcon paths
- */
-
-/*
- * Copyright (C) 2005,2006,2007 Enrico Zini <enrico@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef EPT_POPCON_PATH_H
-#define EPT_POPCON_PATH_H
-
-#include <string>
-
-namespace ept {
-namespace popcon {
-
-/**
- * Singleton class to configure and access the various Popcon paths
- */
-class Path
-{
-public:
- static std::string scores();
- static std::string scoresIndex();
- static std::string userScores();
- static std::string userScoresIndex();
-
- static std::string popconSourceDir();
- static std::string popconIndexDir();
- static std::string popconUserSourceDir();
- static std::string popconUserIndexDir();
-
- // Directory where Popcon source data is found
- static void setPopconSourceDir( const std::string &s );
-
- // Directory where Popcon indexes are kept
- static void setPopconIndexDir( const std::string &s );
-
- // User-specific directory for Popcon source data
- static void setPopconUserSourceDir( const std::string &s );
-
- // User-specific directory for Popcon index data
- static void setPopconUserIndexDir( const std::string &s );
-
- static int access( const std::string &, int );
- static time_t timestamp( const std::string& );
-
- // RAII-style classes to temporarily override directories
- class OverridePopconSourceDir
- {
- std::string old;
- public:
- OverridePopconSourceDir(const std::string& path) : old(Path::popconSourceDir())
- {
- Path::setPopconSourceDir(path);
- }
- ~OverridePopconSourceDir() { Path::setPopconSourceDir(old); }
- };
- class OverridePopconIndexDir
- {
- std::string old;
- public:
- OverridePopconIndexDir(const std::string& path) : old(Path::popconIndexDir())
- {
- Path::setPopconIndexDir(path);
- }
- ~OverridePopconIndexDir() { Path::setPopconIndexDir(old); }
- };
- class OverridePopconUserSourceDir
- {
- std::string old;
- public:
- OverridePopconUserSourceDir(const std::string& path) : old(Path::popconUserSourceDir())
- {
- Path::setPopconUserSourceDir(path);
- }
- ~OverridePopconUserSourceDir() { Path::setPopconUserSourceDir(old); }
- };
- class OverridePopconUserIndexDir
- {
- std::string old;
- public:
- OverridePopconUserIndexDir(const std::string& path) : old(Path::popconUserIndexDir())
- {
- Path::setPopconUserIndexDir(path);
- }
- ~OverridePopconUserIndexDir() { Path::setPopconUserIndexDir(old); }
- };
-protected:
- static Path *s_instance;
- static Path &instance();
-
- // Directory where Popcon source data is found
- std::string m_popconSourceDir;
-
- // Directory where Popcon indexes are kept
- std::string m_popconIndexDir;
-
- // User-specific directory for Popcon source data
- std::string m_popconUserSourceDir;
-
- // User-specific directory for Popcon index data
- std::string m_popconUserIndexDir;
-};
-
-}
-}
-
-// vim:set ts=4 sw=4:
-#endif
diff --git a/ept/popcon/maint/popconindexer.cc b/ept/popcon/maint/popconindexer.cc
deleted file mode 100644
index a8dea69..0000000
--- a/ept/popcon/maint/popconindexer.cc
+++ /dev/null
@@ -1,248 +0,0 @@
-#include <ept/popcon/popcon.h>
-#include <ept/popcon/maint/popconindexer.h>
-#include <ept/popcon/maint/path.h>
-
-#include <wibble/exception.h>
-#include <wibble/sys/fs.h>
-
-#include <tagcoll/diskindex/mmap.h>
-
-#include <unistd.h>
-#include <set>
-#include <string>
-#include <cstdio>
-#include <cstring>
-
-using namespace std;
-
-namespace ept {
-namespace popcon {
-
-template<typename STRUCT>
-struct StructIndexer : public tagcoll::diskindex::MMapIndexer
-{
- const STRUCT& data;
- StructIndexer(const STRUCT& data) : data(data) {}
-
- int encodedSize() const { return sizeof(STRUCT); }
- void encode(char* buf) const { *(STRUCT*)buf = data; }
-};
-
-/// MMapIndexer that indexes the package names
-struct PopconGenerator : public tagcoll::diskindex::MMapIndexer
-{
- // Sorted set of all available package names and data
- std::map<std::string, Score> data;
-
- int encodedSize() const
- {
- int size = data.size() * sizeof(Score);
- for (std::map<std::string, Score>::const_iterator i = data.begin();
- i != data.end(); ++i)
- size += i->first.size() + 1;
- return tagcoll::diskindex::MMap::align(size);
- }
-
- void encode(char* buf) const
- {
- int pos = data.size() * sizeof(Score);
- int idx = 0;
- for (std::map<std::string, Score>::const_iterator i = data.begin();
- i != data.end(); ++i)
- {
- ((Score*)buf)[idx] = i->second;
- ((Score*)buf)[idx].offset = pos;
- memcpy(buf + pos, i->first.c_str(), i->first.size() + 1);
- pos += i->first.size() + 1;
- ++idx;
- }
- }
-};
-
-
-PopconIndexer::PopconIndexer()
- : mainSource(Path::popconSourceDir()),
- userSource(Path::popconUserSourceDir())
-{
- rescan();
-}
-
-void PopconIndexer::rescan()
-{
- ts_main_src = mainSource.timestamp();
- ts_user_src = userSource.timestamp();
- ts_main_sco = Path::timestamp(Path::scores());
- ts_user_sco = Path::timestamp(Path::userScores());
- ts_main_idx = Path::timestamp(Path::scoresIndex());
- ts_user_idx = Path::timestamp(Path::userScoresIndex());
-}
-
-bool PopconIndexer::needsRebuild() const
-{
- // If there are no indexes of any kind, then we need rebuilding
- if (ts_user_sco == 0 || ts_main_sco == 0 || ts_user_idx == 0 && ts_main_idx == 0)
- return true;
-
- // If the user index is ok, then we are fine
- if (ts_user_sco >= sourceTimestamp() && ts_user_idx >= sourceTimestamp())
- return false;
-
- // If there are user sources, then we cannot use the system index
- if (ts_user_src > 0)
- return true;
-
- // If there are no user sources, then we can fallback on the system
- // indexes in case the user indexes are not up to date
- if (ts_main_sco >= sourceTimestamp() && ts_main_idx >= sourceTimestamp())
- return false;
-
- return true;
-}
-
-bool PopconIndexer::userIndexIsRedundant() const
-{
- // If there is no user index, then it is not redundant
- if (ts_user_idx == 0)
- return false;
-
- // If the system index is not up to date, then the user index is not
- // redundant
- if (ts_main_idx < sourceTimestamp())
- return false;
-
- return true;
-}
-
-bool PopconIndexer::rebuild(const std::string& scofname, const std::string& idxfname)
-{
- PopconGenerator gen;
- InfoStruct is;
- is.submissions = 0;
- if (!mainSource.readScores(gen.data, is.submissions))
- userSource.readScores(gen.data, is.submissions);
- if (gen.data.empty())
- return false;
-
- StructIndexer<InfoStruct> infoStruct(is);
-
- // Create the index
- tagcoll::diskindex::MasterMMapIndexer master(idxfname);
- master.append(gen);
- master.append(infoStruct);
- master.commit();
-
-// for (map<string, Score>::const_iterator i = gen.data.begin(); i != gen.data.end(); ++i)
-// {
-// fprintf(stderr, "%s %d %f\n", i->first.c_str(), i->second.offset, i->second.score);
-// }
-
- // Create the score file
- FILE* out = fopen(scofname.c_str(), "wt");
- if (out == NULL)
- throw wibble::exception::File(scofname, "opening and truncating file for writing");
- for (map<string, Score>::const_iterator i = gen.data.begin();
- i != gen.data.end(); ++i)
- {
- fprintf(out, "%s %f\n", i->first.c_str(), i->second.score);
- }
- fclose(out);
- return true;
-}
-
-bool PopconIndexer::rebuildIfNeeded()
-{
- if (needsRebuild())
- {
- // Decide if we rebuild the user index or the system index
- if (Path::access(Path::popconIndexDir(), W_OK) == 0)
- {
- // Since we can write on the system index directory, we rebuild
- // the system index
- if (!rebuild(Path::scores(), Path::scoresIndex()))
- return false;
- ts_main_sco = Path::timestamp(Path::scores());
- ts_main_idx = Path::timestamp(Path::scoresIndex());
- if (Path::scores() == Path::userScores())
- ts_user_sco = ts_main_sco;
- if (Path::scoresIndex() == Path::userScoresIndex())
- ts_user_idx = ts_main_idx;
- } else {
- wibble::sys::fs::mkFilePath(Path::userScores());
- wibble::sys::fs::mkFilePath(Path::userScoresIndex());
- if (!rebuild(Path::userScores(), Path::userScoresIndex()))
- return false;
- ts_user_sco = Path::timestamp(Path::userScores());
- ts_user_idx = Path::timestamp(Path::userScoresIndex());
- }
- return true;
- }
- return false;
-}
-
-bool PopconIndexer::deleteRedundantUserIndex()
-{
- if (userIndexIsRedundant())
- {
- // Delete the user indexes if they exist
- if (Path::scores() != Path::userScores())
- {
- unlink(Path::userScores().c_str());
- ts_user_sco = 0;
- }
- if (Path::scoresIndex() != Path::userScoresIndex())
- {
- unlink(Path::userScoresIndex().c_str());
- ts_user_idx = 0;
- }
- return true;
- }
- return false;
-}
-
-bool PopconIndexer::getUpToDatePopcon(std::string& scofname, std::string& idxfname)
-{
- // If there are no indexes of any kind, then we have nothing to return
- if (ts_user_sco == 0 && ts_main_sco == 0 && ts_user_idx == 0 && ts_main_idx == 0)
- return false;
-
- // If the user index is up to date, use it
- if (ts_user_sco >= sourceTimestamp() &&
- ts_user_idx >= sourceTimestamp())
- {
- scofname = Path::userScores();
- idxfname = Path::userScoresIndex();
- return true;
- }
-
- // If the user index is not up to date and we have user sources, we cannot
- // fall back to the system index
- if (ts_user_src != 0)
- return false;
-
- // Fallback to the system index
- if (ts_main_sco >= sourceTimestamp() &&
- ts_main_idx >= sourceTimestamp())
- {
- scofname = Path::scores();
- idxfname = Path::scoresIndex();
- return true;
- }
-
- return false;
-}
-
-
-bool PopconIndexer::obtainWorkingPopcon(std::string& scofname, std::string& idxfname)
-{
- PopconIndexer indexer;
-
- indexer.rebuildIfNeeded();
- indexer.deleteRedundantUserIndex();
- return indexer.getUpToDatePopcon(scofname, idxfname);
-}
-
-
-}
-}
-
-// vim:set ts=4 sw=4:
diff --git a/ept/popcon/maint/popconindexer.h b/ept/popcon/maint/popconindexer.h
deleted file mode 100644
index 86789cf..0000000
--- a/ept/popcon/maint/popconindexer.h
+++ /dev/null
@@ -1,75 +0,0 @@
-#ifndef EPT_DEBTAGS_PKGIDINDEXER_H
-#define EPT_DEBTAGS_PKGIDINDEXER_H
-
-/** @file
- * @author Enrico Zini <enrico@enricozini.org>
- * Rebuild and maintain the map from package IDs to package names
- */
-
-/*
- * Copyright (C) 2003-2007 Enrico Zini <enrico@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <ept/popcon/maint/sourcedir.h>
-#include <string>
-
-namespace ept {
-namespace popcon {
-
-struct InfoStruct
-{
- size_t submissions;
-};
-
-struct PopconIndexer
-{
- SourceDir mainSource;
- SourceDir userSource;
- time_t ts_main_src;
- time_t ts_user_src;
- time_t ts_main_sco;
- time_t ts_user_sco;
- time_t ts_main_idx;
- time_t ts_user_idx;
-
- time_t sourceTimestamp() const
- {
- time_t res = ts_main_src;
- if (ts_user_src > res) res = ts_user_src;
- return res;
-
- }
- bool needsRebuild() const;
- bool rebuild(const std::string& scofname, const std::string& idxfname);
- bool rebuildIfNeeded();
- bool getUpToDatePopcon(std::string& scofname, std::string& idxfname);
-
- bool userIndexIsRedundant() const;
- bool deleteRedundantUserIndex();
-
- void rescan();
-
- PopconIndexer();
-
- static bool obtainWorkingPopcon(std::string& scofname, std::string& idxfname);
-};
-
-}
-}
-
-// vim:set ts=4 sw=4:
-#endif
diff --git a/ept/popcon/maint/sourcedir.cc b/ept/popcon/maint/sourcedir.cc
deleted file mode 100644
index c908edd..0000000
--- a/ept/popcon/maint/sourcedir.cc
+++ /dev/null
@@ -1,155 +0,0 @@
-#include <ept/popcon/maint/sourcedir.h>
-#include <ept/popcon/maint/path.h>
-
-#include <wibble/string.h>
-#include <wibble/sys/fs.h>
-
-#include <tagcoll/input/zlib.h>
-#include <tagcoll/input/stdio.h>
-
-#include <cstdlib>
-
-using namespace std;
-using namespace wibble;
-
-namespace ept {
-namespace popcon {
-
-SourceDir::SourceDir(const std::string& path)
- : sys::fs::Directory(path)
-{
-}
-SourceDir::~SourceDir()
-{
-}
-
-SourceDir::FileType SourceDir::fileType(const std::string& name)
-{
- if (name[0] == '.') return SKIP;
-
- if (name == "all-popcon-results.txt") return RAW;
- if (name == "all-popcon-results.txt.gz") return RAWGZ;
-
- return SKIP;
-}
-
-time_t SourceDir::timestamp()
-{
- if (!exists()) return 0;
-
- time_t max = 0;
- for (const_iterator d = begin(); d != end(); ++d)
- {
- string name = *d;
- FileType type = fileType(name);
- if (type == SKIP) continue;
-
- time_t ts = Path::timestamp(str::joinpath(m_path, name));
- if (ts > max) max = ts;
- }
-
- return max;
-}
-
-bool readLine(tagcoll::input::Input& in, string& str)
-{
- str.clear();
- int c;
- while ((c = in.nextChar()) != tagcoll::input::Input::Eof && c != '\n')
- str += c;
- return c != tagcoll::input::Input::Eof;
-}
-
-static void parseScores(tagcoll::input::Input& in, map<std::string, Score>& out, size_t& submissions)
-{
- string line;
- while (readLine(in, line))
- {
- if (line.size() < 10)
- continue;
- if (line.substr(0, 13) == "Submissions: ")
- {
- submissions = strtoul(line.substr(13).c_str(), 0, 10);
- continue;
- }
- if (line.substr(0, 9) != "Package: ")
- continue;
- size_t start = 9;
- size_t end = line.find(' ', start);
- if (end == string::npos)
- continue;
- string name = line.substr(start, end-start);
- // Skip packages not in the apt index
- //if (!apt.isValid(name))
- //continue;
-
- start = line.find_first_not_of(' ', end);
- if (start == string::npos) continue;
- end = line.find(' ', start);
- if (end == string::npos) continue;
- string vote = line.substr(start, end-start);
-
- start = line.find_first_not_of(' ', end);
- if (start == string::npos) continue;
- end = line.find(' ', start);
- if (end == string::npos) continue;
- string old = line.substr(start, end-start);
-
- start = line.find_first_not_of(' ', end);
- if (start == string::npos) continue;
- end = line.find(' ', start);
- if (end == string::npos) continue;
- string recent = line.substr(start, end-start);
-
- start = line.find_first_not_of(' ', end);
- if (start == string::npos) continue;
- end = line.find(' ', start);
- if (end == string::npos) end = line.size();
- string nofiles = line.substr(start, end-start);
-
- float score = (float)strtoul(vote.c_str(), NULL, 10)
- + (float)strtoul(recent.c_str(), NULL, 10) * 0.5f
- + (float)strtoul(old.c_str(), NULL, 10) * 0.3f
- + (float)strtoul(nofiles.c_str(), NULL, 10) * 0.8f;
-
- if (score > 0)
- out.insert(make_pair(name, Score(score)));
- }
-}
-
-bool SourceDir::readScores(map<std::string, Score>& out, size_t& submissions)
-{
- if (!exists()) return false;
-
- bool done = false;
-
- for (const_iterator d = begin(); d != end(); ++d)
- {
- string name = *d;
- FileType type = fileType(name);
- if (type == RAW)
- {
- // Read uncompressed data
- tagcoll::input::Stdio in(str::joinpath(m_path, name));
-
- // Read the scores
- parseScores(in, out, submissions);
- done = true;
- }
- else if (type == RAWGZ)
- {
- // Read compressed data
- tagcoll::input::Zlib in(str::joinpath(m_path, name));
-
- // Read the scores
- parseScores(in, out, submissions);
- done = true;
- }
- }
- return done;
-}
-
-}
-}
-
-// vim:set ts=4 sw=4:
diff --git a/ept/popcon/maint/sourcedir.h b/ept/popcon/maint/sourcedir.h
deleted file mode 100644
index eccb6f1..0000000
--- a/ept/popcon/maint/sourcedir.h
+++ /dev/null
@@ -1,64 +0,0 @@
-#ifndef EPT_POPCON_SOURCEDIR_H
-#define EPT_POPCON_SOURCEDIR_H
-
-/** @file
- * @author Enrico Zini <enrico@enricozini.org>
- * Popcon data source directory access
- */
-
-/*
- * Copyright (C) 2003,2004,2005,2006,2007 Enrico Zini <enrico@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <ept/popcon/popcon.h>
-#include <wibble/sys/fs.h>
-#include <string>
-#include <map>
-
-namespace ept {
-namespace popcon {
-
-/**
- * Access a directory containing Debtags data files
- */
-class SourceDir : public wibble::sys::fs::Directory
-{
-protected:
- enum FileType { SKIP, RAW, RAWGZ };
-
- // Check if a file name is a tag file, a vocabulary file or a file to skip.
- // Please notice that it works on file names, not paths.
- FileType fileType(const std::string& name);
-
-public:
- SourceDir(const std::string& path);
- ~SourceDir();
-
- /// Return the time of the newest file in the source directory
- time_t timestamp();
-
- /**
- * Read the tag files in the directory and output their content to the map
- */
- bool readScores(std::map<std::string, Score>& out, size_t& submissions);
-};
-
-}
-}
-
-// vim:set ts=4 sw=4:
-#endif
diff --git a/ept/popcon/popcon.cc b/ept/popcon/popcon.cc
deleted file mode 100644
index 2ce9e53..0000000
--- a/ept/popcon/popcon.cc
+++ /dev/null
@@ -1,95 +0,0 @@
-// -*- mode: c++; tab-width: 4; indent-tabs-mode: t -*-
-
-/** @file
- * @author Enrico Zini <enrico@enricozini.org>
- * Quick map from package IDs to package names
- */
-
-/*
- * Copyright (C) 2007 Enrico Zini <enrico@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <ept/popcon/popcon.h>
-#include <ept/popcon/maint/popconindexer.h>
-#include <ept/popcon/maint/path.h>
-
-//#include <iostream>
-
-using namespace std;
-
-namespace ept {
-namespace popcon {
-
-size_t Popcon::GeneralInfo::submissions() const
-{
- if (!m_buf) return 0;
- return ((InfoStruct*)m_buf)->submissions;
-}
-
-Popcon::Popcon()
-{
- std::string scofname, idxfname;
-
- if (!PopconIndexer::obtainWorkingPopcon(scofname, idxfname))
- {
- m_timestamp = 0;
- return;
- }
-
- //cerr << "GOT " << scofname << " " << idxfname << endl;
-
- m_timestamp = Path::timestamp(idxfname);
-
- mastermmap.init(idxfname);
- tagcoll::diskindex::MMap::init(mastermmap, 0);
-
- m_info.init(mastermmap, 1);
-
- //cerr << "SIZE " << size() << endl;
- //for (size_t i = 0; i < size(); ++i)
- //{
- // cerr << "EL " << i << ": " << ((Score*)m_buf)[i].offset << " " << ((Score*)m_buf)[i].score << endl;
- //}
-}
-
-float Popcon::scoreByName(const std::string& name) const
-{
- // Binary search the index to find the package ID
- int begin, end;
-
- /* Binary search */
- begin = -1, end = size();
- while (end - begin > 1)
- {
- int cur = (end + begin) / 2;
- if (this->name(cur) > name)
- end = cur;
- else
- begin = cur;
- }
-
- if (begin == -1 || this->name(begin) != name)
- //throw NotFoundException(string("looking for the ID of string ") + str);
- return 0;
- else
- return score(begin);
-}
-
-}
-}
-
-// vim:set ts=4 sw=4:
diff --git a/ept/popcon/popcon.h b/ept/popcon/popcon.h
deleted file mode 100644
index 684f98a..0000000
--- a/ept/popcon/popcon.h
+++ /dev/null
@@ -1,153 +0,0 @@
-// -*- mode: c++; tab-width: 4; indent-tabs-mode: t -*-
-#ifndef EPT_POPCON_POPCON_H
-#define EPT_POPCON_POPCON_H
-
-/** @file
- * @author Enrico Zini <enrico@enricozini.org>
- * Access popcon data
- */
-
-/*
- * Copyright (C) 2007 Enrico Zini <enrico@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <tagcoll/diskindex/mmap.h>
-#include <string>
-
-namespace ept {
-namespace apt {
-class Apt;
-}
-
-namespace popcon {
-
-/**
- * Store the score information in the popcon cache.
- *
- * Currently, this is only one float; more can be added in the future.
- */
-class Score
-{
-protected:
- unsigned offset;
-
-public:
- float score;
-
- Score(float score) : offset(offset), score(score) {}
-
- friend class Popcon;
- friend class PopconIndexer;
- friend class PopconGenerator;
-};
-
-/**
- * Maps Packages to IDs and vice-versa.
- *
- * This is used in building the Debtags fast index, which works representing
- * tags and packages as int IDs.
- *
- * Index building works like this:
- * 1. The file all-popcon-results.txt.gz is downloaded from
- * http://popcon.debian.org/all-popcon-results.txt.gz
- * 2. The file is put in either ~/.popcon/all-popcon-results.txt.gz
- * or in /var/lib/popcon/all-popcon-results.txt.gz
- * 3. If the file is newer than the index, it will be automatically used to
- * recompute the scores and rebuild the index.
- */
-class Popcon : public tagcoll::diskindex::MMap
-{
- struct GeneralInfo : public tagcoll::diskindex::MMap
- {
- size_t submissions() const;
- };
-
- tagcoll::diskindex::MasterMMap mastermmap;
- time_t m_timestamp;
-
- GeneralInfo m_info;
-
- /// Get the score structure by index
- const Score* structByIndex(size_t idx) const
- {
- if (idx >= 0 && idx < size())
- return (Score*)m_buf + idx;
- return 0;
- }
-
-public:
- Popcon();
-
- /// Get the timestamp of when the index was last updated
- time_t timestamp() const { return m_timestamp; }
-
- /// Return true if this data source has data, false if it's empty
- bool hasData() const { return m_timestamp != 0; }
-
- /// Return the total number of popcon submissions
- size_t submissions() const { return m_info.submissions(); }
-
- /// Get the number of packages in the index
- size_t size() const
- {
- if (m_buf)
- return ((Score*)m_buf)->offset / sizeof(Score);
- else
- return 0;
- }
-
- /**
- * Get a package name by index
- *
- * If the index is not valid, returns the empty string.
- */
- std::string name(size_t idx) const
- {
- const Score* s = structByIndex(idx);
- if (s == 0) return std::string();
- return std::string(m_buf + s->offset);
- }
-
- /// Get the score by index
- float scoreByIndex(size_t idx) const
- {
- const Score* s = structByIndex(idx);
- if (!s) return 0;
- return s->score;
- }
-
- /// Get the score structure by package name
- float scoreByName(const std::string& name) const;
-
- /// Get the score by index
- float score(size_t idx) const { return scoreByIndex(idx); }
-
- /// Get the score by index
- float operator[](int idx) const { return scoreByIndex(idx); }
-
- /// Get the score by name
- float score(const std::string& name) const { return scoreByName(name); }
-
- /// Get the score structure by package name
- float operator[](const std::string& name) const { return scoreByName(name); }
-};
-
-}
-}
-
-// vim:set ts=4 sw=4:
-#endif
diff --git a/ept/popcon/popcon.test.h b/ept/popcon/popcon.test.h
deleted file mode 100644
index 4509b85..0000000
--- a/ept/popcon/popcon.test.h
+++ /dev/null
@@ -1,108 +0,0 @@
-// -*- mode: c++; tab-width: 4; indent-tabs-mode: t -*-
-/*
- * popcon test
- *
- * Copyright (C) 2007 Enrico Zini <enrico@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <ept/popcon/popcon.h>
-#include <ept/popcon/maint/path.h>
-#include <ept/apt/apt.h>
-#include <set>
-
-#include <ept/test.h>
-
-using namespace std;
-using namespace ept;
-using namespace ept::popcon;
-using namespace ept::apt;
-
-struct TestPopcon
-{
- popcon::Path::OverridePopconSourceDir odsd;
- popcon::Path::OverridePopconIndexDir odid;
- popcon::Path::OverridePopconUserSourceDir odusd;
- popcon::Path::OverridePopconUserIndexDir oduid;
-
- Apt apt;
- Popcon popcon;
-
- TestPopcon()
- : odsd( TEST_ENV_DIR "popcon" ),
- odid( TEST_ENV_DIR "popcon" ),
- odusd( TEST_ENV_DIR "popcon" ),
- oduid( TEST_ENV_DIR "popcon" )
- {}
-
- Test basicAccess()
- {
- assert_eq(popcon.submissions(), 52024);
- assert(popcon.size() > 0);
- assert(popcon.score(0) > 0);
- assert(!popcon.name(0).empty());
- }
-
- // Check that every valid index is accessible
- Test accessibility()
- {
- for (size_t i = 0; i < popcon.size(); ++i)
- {
- //cerr << popcon.name(i) << " " << popcon.score(i) << endl;
- assert(popcon.score(i) > 0);
- }
- }
-
- // Check that we can get a score for every package
- Test haveScores()
- {
- int has = 0;
- for (Apt::iterator i = apt.begin(); i != apt.end(); ++i)
- {
- float score = popcon.score(*i);
- if (score > 0)
- ++has;
- }
- // At least 1000 packages should have a score
- assert(has > 1000);
- }
-
- // Check that scores are meaningful
- Test validScores()
- {
- assert(popcon["apt"] > popcon["libapt-pkg-dev"]);
- }
-
- // If there is no data, Popcon should work as if all scores were 0
- Test fallbackValues()
- {
- popcon::Path::OverridePopconSourceDir odsd("./empty");
- popcon::Path::OverridePopconIndexDir odid("./empty");
- popcon::Path::OverridePopconUserSourceDir odusd("./empty");
- popcon::Path::OverridePopconUserIndexDir oduid("./empty");
- Popcon empty;
-
- assert_eq(empty.timestamp(), 0);
- assert(!empty.hasData());
-
- assert_eq(empty.submissions(), 0);
- assert(empty.size() == 0);
- assert(empty.score("apt") == 0.0);
- }
-
-};
-
-// vim:set ts=4 sw=4: