summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--apt/cache.py12
-rw-r--r--debian/changelog12
-rw-r--r--tests/test_apt_cache.py9
3 files changed, 32 insertions, 1 deletions
diff --git a/apt/cache.py b/apt/cache.py
index 8e07e4d0..3679e4ba 100644
--- a/apt/cache.py
+++ b/apt/cache.py
@@ -64,6 +64,7 @@ class Cache(object):
self._callbacks = {}
self._weakref = weakref.WeakValueDictionary()
self._set = set()
+ self._sorted_set = None
if memonly:
# force apt to build its caches in memory
apt_pkg.config.set("Dir::Cache::pkgcache", "")
@@ -126,6 +127,7 @@ class Cache(object):
self._list = apt_pkg.SourceList()
self._list.read_main_list()
self._set.clear()
+ self._sorted_set = None
self._weakref.clear()
progress.op = _("Building data structures")
@@ -157,7 +159,15 @@ class Cache(object):
raise KeyError('The cache has no package named %r' % key)
def __iter__(self):
- for pkgname in self._set:
+ # We iterate sorted over package names here. With this we read the
+ # package lists linearly if we need to access the package records,
+ # instead of having to do thousands of random seeks; the latter
+ # is disastrous if we use compressed package indexes, and slower than
+ # necessary for uncompressed indexes.
+ if self._sorted_set is None:
+ self._sorted_set = sorted(self._set)
+
+ for pkgname in self._sorted_set:
yield self[pkgname]
raise StopIteration
diff --git a/debian/changelog b/debian/changelog
index 8f76bb3e..b7bb3ad0 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -12,6 +12,18 @@ python-apt (0.7.96) UNRELEASED; urgency=low
* utils/get_debian_mirrors.py:
- ignore mirrors without a county
+ [ Martin Pitt ]
+ * tests/test_apt_cache.py: Test accessing the record of all packages during
+ iteration. This both ensures that it's well-formatted and structured, and
+ also that accessing it does not take an inordinate amount of time. This
+ exposes a severe performance problem when using gzip compressed package
+ indexes.
+ * apt/cache.py: When iterating over the cache, do so sorted by package name.
+ With this we read the the package lists linearly if we need to access the
+ package records, instead of having to do thousands of random seeks; the
+ latter is disastrous if we use compressed package indexes, and slower than
+ necessary for uncompressed indexes.
+
-- Michael Vogt <mvo@debian.org> Tue, 01 Jun 2010 16:20:00 +0200
python-apt (0.7.95) unstable; urgency=low
diff --git a/tests/test_apt_cache.py b/tests/test_apt_cache.py
index fdcf482d..a00fa08b 100644
--- a/tests/test_apt_cache.py
+++ b/tests/test_apt_cache.py
@@ -30,5 +30,14 @@ class TestAptCache(unittest.TestCase):
self.assertTrue(isinstance(dep.relation, str))
self.assertTrue(dep.pre_depend in (True, False))
+ # accessing record should take a reasonable time; in
+ # particular, when using compressed indexes, it should not use
+ # tons of seek operations
+ r = pkg.candidate.record
+ self.assertEqual(r['Package'], pkg.name)
+ self.assert_('Version' in r)
+ self.assert_(len(r['Description']) > 0)
+ self.assert_(str(r).startswith('Package: %s\n' % pkg.name))
+
if __name__ == "__main__":
unittest.main()