summaryrefslogtreecommitdiff
path: root/pkgtools/pkg_install
diff options
context:
space:
mode:
authorjperkin <jperkin@pkgsrc.org>2020-07-01 10:03:19 +0000
committerjperkin <jperkin@pkgsrc.org>2020-07-01 10:03:19 +0000
commitee88dc9da634d450be2904aec58b6425bd0094de (patch)
tree5f785a024d975d846521cb549c8aa87ed1ece73c /pkgtools/pkg_install
parent4f0502e95d169cbaa145debd4845151727b3c0fc (diff)
downloadpkgsrc-ee88dc9da634d450be2904aec58b6425bd0094de.tar.gz
pkg_install: Fix and speed up "pkg_admin rebuild-tree".
In the pkg_admin front end, instead of adding +REQUIRED_BY entries as they are found, which previously led to duplicate entries, cache the results and write out the files at the end. Underneath, add a caching version of iterate_pkg_db() that avoids the same pkgdb directory lookup for every installed package, but is only suitable for reads. Also add a cache for best_match lookups to avoid expensive matches each time. For all caches, use a simple hashing function to improve lookup performance. In summary, as well as fixing +REQUIRED_BY files, these patches reduce the wall/user/system time of "pkg_admin rebuild-tree" on a system with 12,762 packages installed down from 13m52s/11m20s/2m32s to just 1m4s/1m3s/0m1s.
Diffstat (limited to 'pkgtools/pkg_install')
-rw-r--r--pkgtools/pkg_install/files/add/perform.c12
-rw-r--r--pkgtools/pkg_install/files/admin/main.c152
-rw-r--r--pkgtools/pkg_install/files/create/perform.c6
-rw-r--r--pkgtools/pkg_install/files/info/perform.c8
-rw-r--r--pkgtools/pkg_install/files/lib/iterate.c143
-rw-r--r--pkgtools/pkg_install/files/lib/lib.h23
6 files changed, 299 insertions, 45 deletions
diff --git a/pkgtools/pkg_install/files/add/perform.c b/pkgtools/pkg_install/files/add/perform.c
index c5bf251555d..048ec8dc128 100644
--- a/pkgtools/pkg_install/files/add/perform.c
+++ b/pkgtools/pkg_install/files/add/perform.c
@@ -1,4 +1,4 @@
-/* $NetBSD: perform.c,v 1.111 2020/07/01 09:46:04 jperkin Exp $ */
+/* $NetBSD: perform.c,v 1.112 2020/07/01 10:03:19 jperkin Exp $ */
#if HAVE_CONFIG_H
#include "config.h"
#endif
@@ -6,7 +6,7 @@
#if HAVE_SYS_CDEFS_H
#include <sys/cdefs.h>
#endif
-__RCSID("$NetBSD: perform.c,v 1.111 2020/07/01 09:46:04 jperkin Exp $");
+__RCSID("$NetBSD: perform.c,v 1.112 2020/07/01 10:03:19 jperkin Exp $");
/*-
* Copyright (c) 2003 Grant Beattie <grant@NetBSD.org>
@@ -450,7 +450,7 @@ check_other_installed(struct pkg_task *pkg)
return -1;
}
*iter = '\0';
- pkg->other_version = find_best_matching_installed_pkg(pkgbase);
+ pkg->other_version = find_best_matching_installed_pkg(pkgbase, 0);
free(pkgbase);
if (pkg->other_version == NULL)
return 0;
@@ -1127,7 +1127,7 @@ install_depend_pkg(const char *dep)
warnx("Can't install dependency %s, continuing", dep);
}
- if (find_best_matching_installed_pkg(dep) == NULL) {
+ if (find_best_matching_installed_pkg(dep, 0) == NULL) {
if (!ForceDepends) {
warnx("Just installed dependency %s disappeared", dep);
return 1;
@@ -1158,7 +1158,7 @@ check_dependencies(struct pkg_task *pkg)
} else if (p->type != PLIST_PKGDEP)
continue;
- if (find_best_matching_installed_pkg(p->name) == NULL) {
+ if (find_best_matching_installed_pkg(p->name, 0) == NULL) {
if (install_depend_pkg(p->name) != 0) {
status = -1;
break;
@@ -1177,7 +1177,7 @@ check_dependencies(struct pkg_task *pkg)
} else if (p->type != PLIST_PKGDEP)
continue;
- best_installed = find_best_matching_installed_pkg(p->name);
+ best_installed = find_best_matching_installed_pkg(p->name, 0);
for (i = 0; i < pkg->dep_length; ++i) {
if (strcmp(best_installed, pkg->dependencies[i]) == 0)
diff --git a/pkgtools/pkg_install/files/admin/main.c b/pkgtools/pkg_install/files/admin/main.c
index c61f9510421..7bd43f771d4 100644
--- a/pkgtools/pkg_install/files/admin/main.c
+++ b/pkgtools/pkg_install/files/admin/main.c
@@ -1,4 +1,4 @@
-/* $NetBSD: main.c,v 1.67 2019/10/11 11:57:41 joerg Exp $ */
+/* $NetBSD: main.c,v 1.68 2020/07/01 10:03:20 jperkin Exp $ */
#if HAVE_CONFIG_H
#include "config.h"
@@ -7,7 +7,7 @@
#if HAVE_SYS_CDEFS_H
#include <sys/cdefs.h>
#endif
-__RCSID("$NetBSD: main.c,v 1.67 2019/10/11 11:57:41 joerg Exp $");
+__RCSID("$NetBSD: main.c,v 1.68 2020/07/01 10:03:20 jperkin Exp $");
/*-
* Copyright (c) 1999-2019 The NetBSD Foundation, Inc.
@@ -90,6 +90,25 @@ struct pkgdb_count {
size_t packages;
};
+/*
+ * A hashed list of +REQUIRED_BY entries.
+ */
+struct reqd_by_entry {
+ char *pkgname;
+ SLIST_ENTRY(reqd_by_entry) entries;
+};
+SLIST_HEAD(reqd_by_entry_head, reqd_by_entry);
+
+/*
+ * A hashed list of packages that contain +REQUIRED_BY entries.
+ */
+struct pkg_reqd_by {
+ char *pkgname;
+ struct reqd_by_entry_head required_by[PKG_HASH_SIZE];
+ SLIST_ENTRY(pkg_reqd_by) entries;
+};
+SLIST_HEAD(pkg_reqd_by_head, pkg_reqd_by);
+
static const char Options[] = "C:K:SVbd:qs:v";
int quiet, verbose;
@@ -280,37 +299,79 @@ remove_required_by(const char *pkgname, void *cookie)
}
static void
-add_required_by(const char *pattern, const char *required_by)
+add_required_by(const char *pattern, const char *pkgname, struct pkg_reqd_by_head *hash)
{
- char *best_installed, *path;
- int fd;
- size_t len;
-
- best_installed = find_best_matching_installed_pkg(pattern);
+ struct pkg_reqd_by_head *phead;
+ struct pkg_reqd_by *pkg;
+ struct reqd_by_entry_head *ehead;
+ struct reqd_by_entry *entry;
+ char *best_installed;
+ int i;
+
+ best_installed = find_best_matching_installed_pkg(pattern, 1);
if (best_installed == NULL) {
- warnx("Dependency %s of %s unresolved", pattern, required_by);
+ warnx("Dependency %s of %s unresolved", pattern, pkgname);
return;
}
- path = pkgdb_pkg_file(best_installed, REQUIRED_BY_FNAME);
- free(best_installed);
+ /*
+ * Find correct reqd_by head based on hash of best_installed, which is
+ * the package in question that we are adding +REQUIRED_BY entries for.
+ */
+ phead = &hash[PKG_HASH_ENTRY(best_installed)];
- if ((fd = open(path, O_WRONLY | O_APPEND | O_CREAT, 0644)) == -1)
- errx(EXIT_FAILURE, "Cannot write to %s", path);
- free(path);
-
- len = strlen(required_by);
- if (write(fd, required_by, len) != (ssize_t)len ||
- write(fd, "\n", 1) != 1 ||
- close(fd) == -1)
- errx(EXIT_FAILURE, "Cannot write to %s", path);
-}
+ /*
+ * Look for an existing entry in this hash list.
+ */
+ SLIST_FOREACH(pkg, phead, entries) {
+ if (strcmp(pkg->pkgname, best_installed) == 0) {
+
+ /*
+ * Found an entry, now see if it already has a
+ * +REQUIRED_BY entry recorded for this pkgname,
+ * and if not then add it.
+ */
+ ehead = &pkg->required_by[PKG_HASH_ENTRY(pkgname)];
+ SLIST_FOREACH(entry, ehead, entries) {
+ if (strcmp(entry->pkgname, pkgname) == 0)
+ break;
+ }
+ if (entry == NULL) {
+ entry = xmalloc(sizeof(*entry));
+ entry->pkgname = xstrdup(pkgname);
+ SLIST_INSERT_HEAD(ehead, entry, entries);
+ }
+
+ break;
+ }
+ }
+
+ /*
+ * Create new package containing its first +REQUIRED_BY entry.
+ */
+ if (pkg == NULL) {
+ pkg = xmalloc(sizeof(*pkg));
+ pkg->pkgname = xstrdup(best_installed);
+ for (i = 0; i < PKG_HASH_SIZE; i++)
+ SLIST_INIT(&pkg->required_by[i]);
+
+ ehead = &pkg->required_by[PKG_HASH_ENTRY(pkgname)];
+ entry = xmalloc(sizeof(*entry));
+ entry->pkgname = xstrdup(pkgname);
+ SLIST_INSERT_HEAD(ehead, entry, entries);
+
+ SLIST_INSERT_HEAD(phead, pkg, entries);
+ }
+
+ free(best_installed);
+}
static int
add_depends_of(const char *pkgname, void *cookie)
{
FILE *fp;
+ struct pkg_reqd_by_head *h = cookie;
plist_t *p;
package_t plist;
char *path;
@@ -325,7 +386,7 @@ add_depends_of(const char *pkgname, void *cookie)
for (p = plist.head; p; p = p->next) {
if (p->type == PLIST_PKGDEP)
- add_required_by(p->name, pkgname);
+ add_required_by(p->name, pkgname, h);
}
free_plist(&plist);
@@ -336,10 +397,53 @@ add_depends_of(const char *pkgname, void *cookie)
static void
rebuild_tree(void)
{
- if (iterate_pkg_db(remove_required_by, NULL) == -1)
+ FILE *fp;
+ struct pkg_reqd_by_head pkgs[PKG_HASH_SIZE];
+ struct pkg_reqd_by *p;
+ struct reqd_by_entry *e;
+ int fd, i, j;
+ char *path;
+
+ for (i = 0; i < PKG_HASH_SIZE; i++)
+ SLIST_INIT(&pkgs[i]);
+
+ /*
+ * First, calculate all of the +REQUIRED_BY entries and store in our
+ * pkgs hashed list.
+ */
+ if (iterate_pkg_db(add_depends_of, &pkgs) == -1)
errx(EXIT_FAILURE, "cannot iterate pkgdb");
- if (iterate_pkg_db(add_depends_of, NULL) == -1)
+
+ /*
+ * Now we can remove all existing +REQUIRED_BY files.
+ */
+ if (iterate_pkg_db(remove_required_by, NULL) == -1)
errx(EXIT_FAILURE, "cannot iterate pkgdb");
+
+ /*
+ * Finally, write out all the new +REQUIRED_BY files.
+ */
+ for (i = 0; i < PKG_HASH_SIZE; i++) {
+ SLIST_FOREACH(p, &pkgs[i], entries) {
+ path = pkgdb_pkg_file(p->pkgname, REQUIRED_BY_FNAME);
+
+ if ((fd = open(path, O_WRONLY | O_APPEND | O_CREAT,
+ 0644)) == -1)
+ errx(EXIT_FAILURE, "cannot write to %s", path);
+
+ if ((fp = fdopen(fd, "a")) == NULL)
+ errx(EXIT_FAILURE, "cannot open %s", path);
+
+ for (j = 0; j < PKG_HASH_SIZE; j++) {
+ SLIST_FOREACH(e, &p->required_by[j], entries)
+ fprintf(fp, "%s\n", e->pkgname);
+ }
+ if (fclose(fp) == EOF) {
+ remove(path);
+ errx(EXIT_FAILURE, "cannot close %s", path);
+ }
+ }
+ }
}
int
diff --git a/pkgtools/pkg_install/files/create/perform.c b/pkgtools/pkg_install/files/create/perform.c
index 2827c0e4b08..f178f2807b3 100644
--- a/pkgtools/pkg_install/files/create/perform.c
+++ b/pkgtools/pkg_install/files/create/perform.c
@@ -1,4 +1,4 @@
-/* $NetBSD: perform.c,v 1.27 2015/12/27 12:36:42 joerg Exp $ */
+/* $NetBSD: perform.c,v 1.28 2020/07/01 10:03:20 jperkin Exp $ */
#if HAVE_CONFIG_H
#include "config.h"
@@ -7,7 +7,7 @@
#if HAVE_SYS_CDEFS_H
#include <sys/cdefs.h>
#endif
-__RCSID("$NetBSD: perform.c,v 1.27 2015/12/27 12:36:42 joerg Exp $");
+__RCSID("$NetBSD: perform.c,v 1.28 2020/07/01 10:03:20 jperkin Exp $");
/*
* FreeBSD install - a package for the installation and maintainance
@@ -68,7 +68,7 @@ register_depends(package_t *plist, char *deps, int build_only)
cp = strsep(&deps, " \t\n");
if (*cp) {
char *best_installed;
- best_installed = find_best_matching_installed_pkg(cp);
+ best_installed = find_best_matching_installed_pkg(cp, 1);
if (best_installed != NULL) {
add_plist(plist, PLIST_BLDDEP, best_installed);
if (Verbose && !PlistOnly && build_only)
diff --git a/pkgtools/pkg_install/files/info/perform.c b/pkgtools/pkg_install/files/info/perform.c
index e3bbbcb7fe0..89033813589 100644
--- a/pkgtools/pkg_install/files/info/perform.c
+++ b/pkgtools/pkg_install/files/info/perform.c
@@ -1,4 +1,4 @@
-/* $NetBSD: perform.c,v 1.63 2017/04/19 21:42:50 joerg Exp $ */
+/* $NetBSD: perform.c,v 1.64 2020/07/01 10:03:20 jperkin Exp $ */
#if HAVE_CONFIG_H
#include "config.h"
@@ -7,7 +7,7 @@
#if HAVE_SYS_CDEFS_H
#include <sys/cdefs.h>
#endif
-__RCSID("$NetBSD: perform.c,v 1.63 2017/04/19 21:42:50 joerg Exp $");
+__RCSID("$NetBSD: perform.c,v 1.64 2020/07/01 10:03:20 jperkin Exp $");
/*-
* Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>.
@@ -566,13 +566,13 @@ CheckForBestPkg(const char *pkgname)
{
char *pattern, *best_match;
- best_match = find_best_matching_installed_pkg(pkgname);
+ best_match = find_best_matching_installed_pkg(pkgname, 1);
if (best_match == NULL) {
if (ispkgpattern(pkgname))
return 1;
pattern = xasprintf("%s-[0-9]*", pkgname);
- best_match = find_best_matching_installed_pkg(pattern);
+ best_match = find_best_matching_installed_pkg(pattern, 1);
free(pattern);
}
diff --git a/pkgtools/pkg_install/files/lib/iterate.c b/pkgtools/pkg_install/files/lib/iterate.c
index 36778a07ab9..a438e988e69 100644
--- a/pkgtools/pkg_install/files/lib/iterate.c
+++ b/pkgtools/pkg_install/files/lib/iterate.c
@@ -1,4 +1,4 @@
-/* $NetBSD: iterate.c,v 1.8 2010/01/22 13:30:42 joerg Exp $ */
+/* $NetBSD: iterate.c,v 1.9 2020/07/01 10:03:20 jperkin Exp $ */
/*-
* Copyright (c) 2007 Joerg Sonnenberger <joerg@NetBSD.org>.
@@ -45,6 +45,34 @@
#include "lib.h"
/*
+ * We define a couple of different caches to hold frequently accessed data.
+ *
+ * Firstly, we cache the results of readdir() on the package database directory
+ * when using iterate_pkg_db_cached(). This helps a lot during recursive calls
+ * and avoids exponential system calls, but is not suitable for situations
+ * where the database directory may be updated, for example during installs.
+ * In those situations the regular iterate_pkg_db() must be used.
+ *
+ * Secondly, we have a cache for matches of pattern lookups, avoiding expensive
+ * pkg_match() calls each time.
+ */
+struct pkg_db_list {
+ char *pkgname;
+ SLIST_ENTRY(pkg_db_list) entries;
+};
+SLIST_HEAD(pkg_db_list_head, pkg_db_list);
+
+struct pkg_match_list {
+ char *pattern;
+ char *pkgname;
+ SLIST_ENTRY(pkg_match_list) entries;
+};
+SLIST_HEAD(pkg_match_list_head, pkg_match_list);
+
+static struct pkg_db_list_head pkg_list_cache;
+static struct pkg_match_list_head pkg_match_cache[PKG_HASH_SIZE];
+
+/*
* Generic iteration function:
* - get new entries from srciter, stop on NULL
* - call matchiter for those entries, stop on non-null return value.
@@ -167,6 +195,74 @@ iterate_pkg_db(int (*matchiter)(const char *, void *), void *cookie)
return retval;
}
+struct pkg_db_iter_arg {
+ struct pkg_db_list_head head;
+ struct pkg_db_list *list;
+};
+
+static const char *
+pkg_db_iter_cached(void *cookie)
+{
+ struct pkg_db_iter_arg *arg = cookie;
+
+ if (arg->list == NULL)
+ arg->list = SLIST_FIRST(&arg->head);
+ else
+ arg->list = SLIST_NEXT(arg->list, entries);
+
+ if (arg->list != NULL)
+ return arg->list->pkgname;
+
+ return NULL;
+}
+
+/*
+ * Call matchiter for every installed package, using cached data to
+ * significantly increase performance during recursive calls.
+ *
+ * This is not suitable for every situation, for example when finding new
+ * matches after package installation/removal. In those situations the
+ * regular iterate_pkg_db() must be used.
+ */
+static int
+iterate_pkg_db_cached(int (*matchiter)(const char *, void *), void *cookie)
+{
+ DIR *dirp;
+ struct dirent *dp;
+ struct stat st;
+ struct pkg_db_iter_arg arg;
+ struct pkg_db_list *pkg;
+ const char *pkgdir;
+ int retval;
+
+ if (SLIST_EMPTY(&pkg_list_cache)) {
+ SLIST_INIT(&pkg_list_cache);
+
+ if ((dirp = opendir(pkgdb_get_dir())) == NULL) {
+ if (errno == ENOENT)
+ return 0; /* Empty pkgdb */
+ return -1;
+ }
+
+ while ((pkgdir = pkg_db_iter(dirp)) != NULL) {
+ pkg = xmalloc(sizeof(struct pkg_db_list));
+ pkg->pkgname = xstrdup(pkgdir);
+ SLIST_INSERT_HEAD(&pkg_list_cache, pkg, entries);
+ }
+
+ if (closedir(dirp) == -1)
+ return -1;
+ }
+
+ arg.head = pkg_list_cache;
+ arg.list = NULL;
+
+ retval = iterate_pkg_generic_src(matchiter, cookie,
+ pkg_db_iter_cached, &arg);
+
+ return retval;
+}
+
static int
match_by_basename(const char *pkg, void *cookie)
{
@@ -189,7 +285,7 @@ match_by_pattern(const char *pkg, void *cookie)
{
const char *pattern = cookie;
- return pkg_match(pattern, pkg);
+ return pkg_match(pattern, pkg);
}
struct add_matching_arg {
@@ -287,20 +383,55 @@ match_best_installed(const char *pkg, void *cookie)
/*
* Returns a copy of the name of best matching package.
* If no package matched the pattern or an error occured, return NULL.
+ *
+ * If use_cached is set, return a cached match entry if it exists, and also use
+ * the iterate_pkg_db cache, otherwise clear any matching cache entry and use
+ * regular iterate_pkg_db().
*/
char *
-find_best_matching_installed_pkg(const char *pattern)
+find_best_matching_installed_pkg(const char *pattern, int use_cached)
{
struct best_installed_match_arg arg;
+ struct pkg_match_list *pkg;
+ int idx = PKG_HASH_ENTRY(pattern), rv;
+
+ if (pattern == NULL)
+ return NULL;
+
+ SLIST_FOREACH(pkg, &pkg_match_cache[idx], entries) {
+ if (strcmp(pattern, pkg->pattern) == 0) {
+ if (use_cached)
+ return xstrdup(pkg->pkgname);
+ SLIST_REMOVE(&pkg_match_cache[idx], pkg,
+ pkg_match_list, entries);
+ free(pkg->pattern);
+ free(pkg->pkgname);
+ free(pkg);
+ break;
+ }
+ }
arg.pattern = pattern;
arg.best_current_match = NULL;
- if (iterate_pkg_db(match_best_installed, &arg) == -1) {
+ if (use_cached)
+ rv = iterate_pkg_db_cached(match_best_installed, &arg);
+ else
+ rv = iterate_pkg_db(match_best_installed, &arg);
+
+ if (rv == -1) {
warnx("could not process pkgdb");
return NULL;
}
+ if (arg.best_current_match != NULL) {
+ pkg = xmalloc(sizeof(struct pkg_match_list));
+ pkg->pattern = xstrdup(pattern);
+ pkg->pkgname = xstrdup(arg.best_current_match);
+ SLIST_INSERT_HEAD(&pkg_match_cache[idx],
+ pkg, entries);
+ }
+
return arg.best_current_match;
}
@@ -317,7 +448,7 @@ match_and_call(const char *pkg, void *cookie)
if (pkg_match(arg->pattern, pkg) == 1) {
return (*arg->call_fn)(pkg, arg->cookie);
- } else
+ } else
return 0;
}
@@ -460,7 +591,7 @@ match_file_and_call(const char *filename, void *cookie)
if (ret == 1)
return (*arg->call_fn)(filename, arg->cookie);
- else
+ else
return 0;
}
diff --git a/pkgtools/pkg_install/files/lib/lib.h b/pkgtools/pkg_install/files/lib/lib.h
index 17fe9231c59..937b53980fb 100644
--- a/pkgtools/pkg_install/files/lib/lib.h
+++ b/pkgtools/pkg_install/files/lib/lib.h
@@ -1,4 +1,4 @@
-/* $NetBSD: lib.h,v 1.69 2018/02/26 23:45:02 ginsbach Exp $ */
+/* $NetBSD: lib.h,v 1.70 2020/07/01 10:03:20 jperkin Exp $ */
/* from FreeBSD Id: lib.h,v 1.25 1997/10/08 07:48:03 charnier Exp */
@@ -231,6 +231,25 @@ typedef struct _lpkg_t {
TAILQ_HEAD(_lpkg_head_t, _lpkg_t);
typedef struct _lpkg_head_t lpkg_head_t;
+/*
+ * To improve performance when handling lists containing a large number of
+ * packages, it can be beneficial to use hashed lookups to avoid excessive
+ * strcmp() calls when searching for existing entries.
+ *
+ * The simple hashing function below uses the first 3 characters of either a
+ * pattern match or package name (as they are guaranteed to exist).
+ *
+ * Based on pkgsrc package names across the tree, this can still result in
+ * somewhat uneven distribution due to high numbers of packages beginning with
+ * "p5-", "php", "py-" etc, and so there are diminishing returns when trying to
+ * use a hash size larger than around 16 or so.
+ */
+#define PKG_HASH_SIZE 16
+#define PKG_HASH_ENTRY(x) (((unsigned char)(x)[0] \
+ + (unsigned char)(x)[1] * 257 \
+ + (unsigned char)(x)[2] * 65537) \
+ & (PKG_HASH_SIZE - 1))
+
struct pkg_vulnerabilities {
size_t entries;
char **vulnerability;
@@ -287,7 +306,7 @@ int iterate_pkg_db(int (*)(const char *, void *), void *);
int add_installed_pkgs_by_basename(const char *, lpkg_head_t *);
int add_installed_pkgs_by_pattern(const char *, lpkg_head_t *);
-char *find_best_matching_installed_pkg(const char *);
+char *find_best_matching_installed_pkg(const char *, int);
char *find_best_matching_file(const char *, const char *, int, int);
int match_installed_pkgs(const char *, int (*)(const char *, void *), void *);
int match_local_files(const char *, int, int, const char *, int (*cb)(const char *, void *), void *);