diff options
| -rw-r--r-- | apt/cache.py | 12 | ||||
| -rw-r--r-- | debian/changelog | 12 | ||||
| -rw-r--r-- | tests/test_apt_cache.py | 9 |
3 files changed, 32 insertions, 1 deletions
diff --git a/apt/cache.py b/apt/cache.py index 8e07e4d0..3679e4ba 100644 --- a/apt/cache.py +++ b/apt/cache.py @@ -64,6 +64,7 @@ class Cache(object): self._callbacks = {} self._weakref = weakref.WeakValueDictionary() self._set = set() + self._sorted_set = None if memonly: # force apt to build its caches in memory apt_pkg.config.set("Dir::Cache::pkgcache", "") @@ -126,6 +127,7 @@ class Cache(object): self._list = apt_pkg.SourceList() self._list.read_main_list() self._set.clear() + self._sorted_set = None self._weakref.clear() progress.op = _("Building data structures") @@ -157,7 +159,15 @@ class Cache(object): raise KeyError('The cache has no package named %r' % key) def __iter__(self): - for pkgname in self._set: + # We iterate sorted over package names here. With this we read the + # package lists linearly if we need to access the package records, + # instead of having to do thousands of random seeks; the latter + # is disastrous if we use compressed package indexes, and slower than + # necessary for uncompressed indexes. + if self._sorted_set is None: + self._sorted_set = sorted(self._set) + + for pkgname in self._sorted_set: yield self[pkgname] raise StopIteration diff --git a/debian/changelog b/debian/changelog index 8f76bb3e..b7bb3ad0 100644 --- a/debian/changelog +++ b/debian/changelog @@ -12,6 +12,18 @@ python-apt (0.7.96) UNRELEASED; urgency=low * utils/get_debian_mirrors.py: - ignore mirrors without a county + [ Martin Pitt ] + * tests/test_apt_cache.py: Test accessing the record of all packages during + iteration. This both ensures that it's well-formatted and structured, and + also that accessing it does not take an inordinate amount of time. This + exposes a severe performance problem when using gzip compressed package + indexes. + * apt/cache.py: When iterating over the cache, do so sorted by package name. + With this we read the the package lists linearly if we need to access the + package records, instead of having to do thousands of random seeks; the + latter is disastrous if we use compressed package indexes, and slower than + necessary for uncompressed indexes. + -- Michael Vogt <mvo@debian.org> Tue, 01 Jun 2010 16:20:00 +0200 python-apt (0.7.95) unstable; urgency=low diff --git a/tests/test_apt_cache.py b/tests/test_apt_cache.py index fdcf482d..a00fa08b 100644 --- a/tests/test_apt_cache.py +++ b/tests/test_apt_cache.py @@ -30,5 +30,14 @@ class TestAptCache(unittest.TestCase): self.assertTrue(isinstance(dep.relation, str)) self.assertTrue(dep.pre_depend in (True, False)) + # accessing record should take a reasonable time; in + # particular, when using compressed indexes, it should not use + # tons of seek operations + r = pkg.candidate.record + self.assertEqual(r['Package'], pkg.name) + self.assert_('Version' in r) + self.assert_(len(r['Description']) > 0) + self.assert_(str(r).startswith('Package: %s\n' % pkg.name)) + if __name__ == "__main__": unittest.main() |
