summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Vogt <michael.vogt@ubuntu.com>2010-06-25 17:31:28 +0200
committerMichael Vogt <michael.vogt@ubuntu.com>2010-06-25 17:31:28 +0200
commit340f6a3d54f5705801267f365fb08b5d20228fe6 (patch)
tree52d3bd94c02ae9be75fb11bfd423043ef10838cc
parentd6cc940087ee8897bad4e6a25d04403d28c5dc0a (diff)
parent337fec14957b98c01bc61a5b516bcf8cd98ccb82 (diff)
downloadpython-apt-340f6a3d54f5705801267f365fb08b5d20228fe6.tar.gz
* tests/test_apt_cache.py: Test accessing the record of all packages during
iteration. This both ensures that it's well-formatted and structured, and also that accessing it does not take an inordinate amount of time. This exposes a severe performance problem when using gzip compressed package indexes. * apt/cache.py: When iterating over the cache, do so sorted by package name. With this we read the the package lists linearly if we need to access the package records, instead of having to do thousands of random seeks; the latter is disastrous if we use compressed package indexes, and slower than necessary for uncompressed indexes.
-rw-r--r--apt/cache.py12
-rw-r--r--debian/changelog12
-rw-r--r--tests/test_apt_cache.py9
3 files changed, 32 insertions, 1 deletions
diff --git a/apt/cache.py b/apt/cache.py
index 8e07e4d0..3679e4ba 100644
--- a/apt/cache.py
+++ b/apt/cache.py
@@ -64,6 +64,7 @@ class Cache(object):
self._callbacks = {}
self._weakref = weakref.WeakValueDictionary()
self._set = set()
+ self._sorted_set = None
if memonly:
# force apt to build its caches in memory
apt_pkg.config.set("Dir::Cache::pkgcache", "")
@@ -126,6 +127,7 @@ class Cache(object):
self._list = apt_pkg.SourceList()
self._list.read_main_list()
self._set.clear()
+ self._sorted_set = None
self._weakref.clear()
progress.op = _("Building data structures")
@@ -157,7 +159,15 @@ class Cache(object):
raise KeyError('The cache has no package named %r' % key)
def __iter__(self):
- for pkgname in self._set:
+ # We iterate sorted over package names here. With this we read the
+ # package lists linearly if we need to access the package records,
+ # instead of having to do thousands of random seeks; the latter
+ # is disastrous if we use compressed package indexes, and slower than
+ # necessary for uncompressed indexes.
+ if self._sorted_set is None:
+ self._sorted_set = sorted(self._set)
+
+ for pkgname in self._sorted_set:
yield self[pkgname]
raise StopIteration
diff --git a/debian/changelog b/debian/changelog
index 8f76bb3e..b7bb3ad0 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -12,6 +12,18 @@ python-apt (0.7.96) UNRELEASED; urgency=low
* utils/get_debian_mirrors.py:
- ignore mirrors without a county
+ [ Martin Pitt ]
+ * tests/test_apt_cache.py: Test accessing the record of all packages during
+ iteration. This both ensures that it's well-formatted and structured, and
+ also that accessing it does not take an inordinate amount of time. This
+ exposes a severe performance problem when using gzip compressed package
+ indexes.
+ * apt/cache.py: When iterating over the cache, do so sorted by package name.
+ With this we read the the package lists linearly if we need to access the
+ package records, instead of having to do thousands of random seeks; the
+ latter is disastrous if we use compressed package indexes, and slower than
+ necessary for uncompressed indexes.
+
-- Michael Vogt <mvo@debian.org> Tue, 01 Jun 2010 16:20:00 +0200
python-apt (0.7.95) unstable; urgency=low
diff --git a/tests/test_apt_cache.py b/tests/test_apt_cache.py
index fdcf482d..a00fa08b 100644
--- a/tests/test_apt_cache.py
+++ b/tests/test_apt_cache.py
@@ -30,5 +30,14 @@ class TestAptCache(unittest.TestCase):
self.assertTrue(isinstance(dep.relation, str))
self.assertTrue(dep.pre_depend in (True, False))
+ # accessing record should take a reasonable time; in
+ # particular, when using compressed indexes, it should not use
+ # tons of seek operations
+ r = pkg.candidate.record
+ self.assertEqual(r['Package'], pkg.name)
+ self.assert_('Version' in r)
+ self.assert_(len(r['Description']) > 0)
+ self.assert_(str(r).startswith('Package: %s\n' % pkg.name))
+
if __name__ == "__main__":
unittest.main()