From 67c4ac7e7a9c8a8c20d0796b5a58e8f4797c3d7b Mon Sep 17 00:00:00 2001 From: Guillem Jover Date: Fri, 26 Sep 2014 05:43:56 +0200 Subject: dpkg: Switch the filesdb module to use the FNV hash function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use it instead of what seems to be a custom hash function. This seems to reduce dispersion somewhat. As a side effect this fixes an integer overflow. Addresses: #760741 Warned-by: ASan Reported-by: Bálint Réczey --- debian/changelog | 3 +++ src/filesdb.c | 15 +++++---------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/debian/changelog b/debian/changelog index 291745a12..a8733f68e 100644 --- a/debian/changelog +++ b/debian/changelog @@ -111,6 +111,9 @@ dpkg (1.17.14) UNRELEASED; urgency=low Based on a patch by Bálint Réczey . Closes: #760690 * Switch the libdpkg string hashing function from FNV-1 to the recommended FNV-1a variant. + * Switch the dpkg files database string hashing function from what appears + to be a custom hash function to the libdpkg FNV-1a implementation. As a + side effect this fixes an integer overflow. Addresses: #760741 [ Raphaël Hertzog ] * Explain better in deb-triggers(5) why interest/activate-noawait should be diff --git a/src/filesdb.c b/src/filesdb.c index af1e466bb..9c8780e46 100644 --- a/src/filesdb.c +++ b/src/filesdb.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -523,9 +524,9 @@ struct fileiterator { int nbinn; }; -/* This must always be a power of two. If you change it consider changing - * the per-character hashing factor (currently 1785 = 137 * 13) too. */ -#define BINS (1 << 17) +/* This must always be a prime for optimal performance. + * This is the closest one to 2^17 (131072). */ +#define BINS 131071 static struct filenamenode *bins[BINS]; @@ -576,12 +577,6 @@ void filesdbinit(void) { } } -static int hash(const char *name) { - int v= 0; - while (*name) { v *= 1787; v += *name; name++; } - return v; -} - struct filenamenode *findnamenode(const char *name, enum fnnflags flags) { struct filenamenode **pointerp, *newnode; const char *orig_name = name; @@ -590,7 +585,7 @@ struct filenamenode *findnamenode(const char *name, enum fnnflags flags) { * leading slash. */ name = path_skip_slash_dotslash(name); - pointerp= bins + (hash(name) & (BINS-1)); + pointerp = bins + (str_fnv_hash(name) % (BINS)); while (*pointerp) { /* XXX: Why is the assert needed? It's checking already added entries. */ assert((*pointerp)->name[0] == '/'); -- cgit v1.2.3