diff options
author | jperkin <jperkin@pkgsrc.org> | 2020-07-01 10:03:19 +0000 |
---|---|---|
committer | jperkin <jperkin@pkgsrc.org> | 2020-07-01 10:03:19 +0000 |
commit | ee88dc9da634d450be2904aec58b6425bd0094de (patch) | |
tree | 5f785a024d975d846521cb549c8aa87ed1ece73c /pkgtools/pkg_install | |
parent | 4f0502e95d169cbaa145debd4845151727b3c0fc (diff) | |
download | pkgsrc-ee88dc9da634d450be2904aec58b6425bd0094de.tar.gz |
pkg_install: Fix and speed up "pkg_admin rebuild-tree".
In the pkg_admin front end, instead of adding +REQUIRED_BY entries as they
are found, which previously led to duplicate entries, cache the results and
write out the files at the end.
Underneath, add a caching version of iterate_pkg_db() that avoids the same
pkgdb directory lookup for every installed package, but is only suitable for
reads. Also add a cache for best_match lookups to avoid expensive matches
each time.
For all caches, use a simple hashing function to improve lookup performance.
In summary, as well as fixing +REQUIRED_BY files, these patches reduce the
wall/user/system time of "pkg_admin rebuild-tree" on a system with 12,762
packages installed down from 13m52s/11m20s/2m32s to just 1m4s/1m3s/0m1s.
Diffstat (limited to 'pkgtools/pkg_install')
-rw-r--r-- | pkgtools/pkg_install/files/add/perform.c | 12 | ||||
-rw-r--r-- | pkgtools/pkg_install/files/admin/main.c | 152 | ||||
-rw-r--r-- | pkgtools/pkg_install/files/create/perform.c | 6 | ||||
-rw-r--r-- | pkgtools/pkg_install/files/info/perform.c | 8 | ||||
-rw-r--r-- | pkgtools/pkg_install/files/lib/iterate.c | 143 | ||||
-rw-r--r-- | pkgtools/pkg_install/files/lib/lib.h | 23 |
6 files changed, 299 insertions, 45 deletions
diff --git a/pkgtools/pkg_install/files/add/perform.c b/pkgtools/pkg_install/files/add/perform.c index c5bf251555d..048ec8dc128 100644 --- a/pkgtools/pkg_install/files/add/perform.c +++ b/pkgtools/pkg_install/files/add/perform.c @@ -1,4 +1,4 @@ -/* $NetBSD: perform.c,v 1.111 2020/07/01 09:46:04 jperkin Exp $ */ +/* $NetBSD: perform.c,v 1.112 2020/07/01 10:03:19 jperkin Exp $ */ #if HAVE_CONFIG_H #include "config.h" #endif @@ -6,7 +6,7 @@ #if HAVE_SYS_CDEFS_H #include <sys/cdefs.h> #endif -__RCSID("$NetBSD: perform.c,v 1.111 2020/07/01 09:46:04 jperkin Exp $"); +__RCSID("$NetBSD: perform.c,v 1.112 2020/07/01 10:03:19 jperkin Exp $"); /*- * Copyright (c) 2003 Grant Beattie <grant@NetBSD.org> @@ -450,7 +450,7 @@ check_other_installed(struct pkg_task *pkg) return -1; } *iter = '\0'; - pkg->other_version = find_best_matching_installed_pkg(pkgbase); + pkg->other_version = find_best_matching_installed_pkg(pkgbase, 0); free(pkgbase); if (pkg->other_version == NULL) return 0; @@ -1127,7 +1127,7 @@ install_depend_pkg(const char *dep) warnx("Can't install dependency %s, continuing", dep); } - if (find_best_matching_installed_pkg(dep) == NULL) { + if (find_best_matching_installed_pkg(dep, 0) == NULL) { if (!ForceDepends) { warnx("Just installed dependency %s disappeared", dep); return 1; @@ -1158,7 +1158,7 @@ check_dependencies(struct pkg_task *pkg) } else if (p->type != PLIST_PKGDEP) continue; - if (find_best_matching_installed_pkg(p->name) == NULL) { + if (find_best_matching_installed_pkg(p->name, 0) == NULL) { if (install_depend_pkg(p->name) != 0) { status = -1; break; @@ -1177,7 +1177,7 @@ check_dependencies(struct pkg_task *pkg) } else if (p->type != PLIST_PKGDEP) continue; - best_installed = find_best_matching_installed_pkg(p->name); + best_installed = find_best_matching_installed_pkg(p->name, 0); for (i = 0; i < pkg->dep_length; ++i) { if (strcmp(best_installed, pkg->dependencies[i]) == 0) diff --git a/pkgtools/pkg_install/files/admin/main.c b/pkgtools/pkg_install/files/admin/main.c index c61f9510421..7bd43f771d4 100644 --- a/pkgtools/pkg_install/files/admin/main.c +++ b/pkgtools/pkg_install/files/admin/main.c @@ -1,4 +1,4 @@ -/* $NetBSD: main.c,v 1.67 2019/10/11 11:57:41 joerg Exp $ */ +/* $NetBSD: main.c,v 1.68 2020/07/01 10:03:20 jperkin Exp $ */ #if HAVE_CONFIG_H #include "config.h" @@ -7,7 +7,7 @@ #if HAVE_SYS_CDEFS_H #include <sys/cdefs.h> #endif -__RCSID("$NetBSD: main.c,v 1.67 2019/10/11 11:57:41 joerg Exp $"); +__RCSID("$NetBSD: main.c,v 1.68 2020/07/01 10:03:20 jperkin Exp $"); /*- * Copyright (c) 1999-2019 The NetBSD Foundation, Inc. @@ -90,6 +90,25 @@ struct pkgdb_count { size_t packages; }; +/* + * A hashed list of +REQUIRED_BY entries. + */ +struct reqd_by_entry { + char *pkgname; + SLIST_ENTRY(reqd_by_entry) entries; +}; +SLIST_HEAD(reqd_by_entry_head, reqd_by_entry); + +/* + * A hashed list of packages that contain +REQUIRED_BY entries. + */ +struct pkg_reqd_by { + char *pkgname; + struct reqd_by_entry_head required_by[PKG_HASH_SIZE]; + SLIST_ENTRY(pkg_reqd_by) entries; +}; +SLIST_HEAD(pkg_reqd_by_head, pkg_reqd_by); + static const char Options[] = "C:K:SVbd:qs:v"; int quiet, verbose; @@ -280,37 +299,79 @@ remove_required_by(const char *pkgname, void *cookie) } static void -add_required_by(const char *pattern, const char *required_by) +add_required_by(const char *pattern, const char *pkgname, struct pkg_reqd_by_head *hash) { - char *best_installed, *path; - int fd; - size_t len; - - best_installed = find_best_matching_installed_pkg(pattern); + struct pkg_reqd_by_head *phead; + struct pkg_reqd_by *pkg; + struct reqd_by_entry_head *ehead; + struct reqd_by_entry *entry; + char *best_installed; + int i; + + best_installed = find_best_matching_installed_pkg(pattern, 1); if (best_installed == NULL) { - warnx("Dependency %s of %s unresolved", pattern, required_by); + warnx("Dependency %s of %s unresolved", pattern, pkgname); return; } - path = pkgdb_pkg_file(best_installed, REQUIRED_BY_FNAME); - free(best_installed); + /* + * Find correct reqd_by head based on hash of best_installed, which is + * the package in question that we are adding +REQUIRED_BY entries for. + */ + phead = &hash[PKG_HASH_ENTRY(best_installed)]; - if ((fd = open(path, O_WRONLY | O_APPEND | O_CREAT, 0644)) == -1) - errx(EXIT_FAILURE, "Cannot write to %s", path); - free(path); - - len = strlen(required_by); - if (write(fd, required_by, len) != (ssize_t)len || - write(fd, "\n", 1) != 1 || - close(fd) == -1) - errx(EXIT_FAILURE, "Cannot write to %s", path); -} + /* + * Look for an existing entry in this hash list. + */ + SLIST_FOREACH(pkg, phead, entries) { + if (strcmp(pkg->pkgname, best_installed) == 0) { + + /* + * Found an entry, now see if it already has a + * +REQUIRED_BY entry recorded for this pkgname, + * and if not then add it. + */ + ehead = &pkg->required_by[PKG_HASH_ENTRY(pkgname)]; + SLIST_FOREACH(entry, ehead, entries) { + if (strcmp(entry->pkgname, pkgname) == 0) + break; + } + if (entry == NULL) { + entry = xmalloc(sizeof(*entry)); + entry->pkgname = xstrdup(pkgname); + SLIST_INSERT_HEAD(ehead, entry, entries); + } + + break; + } + } + + /* + * Create new package containing its first +REQUIRED_BY entry. + */ + if (pkg == NULL) { + pkg = xmalloc(sizeof(*pkg)); + pkg->pkgname = xstrdup(best_installed); + for (i = 0; i < PKG_HASH_SIZE; i++) + SLIST_INIT(&pkg->required_by[i]); + + ehead = &pkg->required_by[PKG_HASH_ENTRY(pkgname)]; + entry = xmalloc(sizeof(*entry)); + entry->pkgname = xstrdup(pkgname); + SLIST_INSERT_HEAD(ehead, entry, entries); + + SLIST_INSERT_HEAD(phead, pkg, entries); + } + + free(best_installed); +} static int add_depends_of(const char *pkgname, void *cookie) { FILE *fp; + struct pkg_reqd_by_head *h = cookie; plist_t *p; package_t plist; char *path; @@ -325,7 +386,7 @@ add_depends_of(const char *pkgname, void *cookie) for (p = plist.head; p; p = p->next) { if (p->type == PLIST_PKGDEP) - add_required_by(p->name, pkgname); + add_required_by(p->name, pkgname, h); } free_plist(&plist); @@ -336,10 +397,53 @@ add_depends_of(const char *pkgname, void *cookie) static void rebuild_tree(void) { - if (iterate_pkg_db(remove_required_by, NULL) == -1) + FILE *fp; + struct pkg_reqd_by_head pkgs[PKG_HASH_SIZE]; + struct pkg_reqd_by *p; + struct reqd_by_entry *e; + int fd, i, j; + char *path; + + for (i = 0; i < PKG_HASH_SIZE; i++) + SLIST_INIT(&pkgs[i]); + + /* + * First, calculate all of the +REQUIRED_BY entries and store in our + * pkgs hashed list. + */ + if (iterate_pkg_db(add_depends_of, &pkgs) == -1) errx(EXIT_FAILURE, "cannot iterate pkgdb"); - if (iterate_pkg_db(add_depends_of, NULL) == -1) + + /* + * Now we can remove all existing +REQUIRED_BY files. + */ + if (iterate_pkg_db(remove_required_by, NULL) == -1) errx(EXIT_FAILURE, "cannot iterate pkgdb"); + + /* + * Finally, write out all the new +REQUIRED_BY files. + */ + for (i = 0; i < PKG_HASH_SIZE; i++) { + SLIST_FOREACH(p, &pkgs[i], entries) { + path = pkgdb_pkg_file(p->pkgname, REQUIRED_BY_FNAME); + + if ((fd = open(path, O_WRONLY | O_APPEND | O_CREAT, + 0644)) == -1) + errx(EXIT_FAILURE, "cannot write to %s", path); + + if ((fp = fdopen(fd, "a")) == NULL) + errx(EXIT_FAILURE, "cannot open %s", path); + + for (j = 0; j < PKG_HASH_SIZE; j++) { + SLIST_FOREACH(e, &p->required_by[j], entries) + fprintf(fp, "%s\n", e->pkgname); + } + if (fclose(fp) == EOF) { + remove(path); + errx(EXIT_FAILURE, "cannot close %s", path); + } + } + } } int diff --git a/pkgtools/pkg_install/files/create/perform.c b/pkgtools/pkg_install/files/create/perform.c index 2827c0e4b08..f178f2807b3 100644 --- a/pkgtools/pkg_install/files/create/perform.c +++ b/pkgtools/pkg_install/files/create/perform.c @@ -1,4 +1,4 @@ -/* $NetBSD: perform.c,v 1.27 2015/12/27 12:36:42 joerg Exp $ */ +/* $NetBSD: perform.c,v 1.28 2020/07/01 10:03:20 jperkin Exp $ */ #if HAVE_CONFIG_H #include "config.h" @@ -7,7 +7,7 @@ #if HAVE_SYS_CDEFS_H #include <sys/cdefs.h> #endif -__RCSID("$NetBSD: perform.c,v 1.27 2015/12/27 12:36:42 joerg Exp $"); +__RCSID("$NetBSD: perform.c,v 1.28 2020/07/01 10:03:20 jperkin Exp $"); /* * FreeBSD install - a package for the installation and maintainance @@ -68,7 +68,7 @@ register_depends(package_t *plist, char *deps, int build_only) cp = strsep(&deps, " \t\n"); if (*cp) { char *best_installed; - best_installed = find_best_matching_installed_pkg(cp); + best_installed = find_best_matching_installed_pkg(cp, 1); if (best_installed != NULL) { add_plist(plist, PLIST_BLDDEP, best_installed); if (Verbose && !PlistOnly && build_only) diff --git a/pkgtools/pkg_install/files/info/perform.c b/pkgtools/pkg_install/files/info/perform.c index e3bbbcb7fe0..89033813589 100644 --- a/pkgtools/pkg_install/files/info/perform.c +++ b/pkgtools/pkg_install/files/info/perform.c @@ -1,4 +1,4 @@ -/* $NetBSD: perform.c,v 1.63 2017/04/19 21:42:50 joerg Exp $ */ +/* $NetBSD: perform.c,v 1.64 2020/07/01 10:03:20 jperkin Exp $ */ #if HAVE_CONFIG_H #include "config.h" @@ -7,7 +7,7 @@ #if HAVE_SYS_CDEFS_H #include <sys/cdefs.h> #endif -__RCSID("$NetBSD: perform.c,v 1.63 2017/04/19 21:42:50 joerg Exp $"); +__RCSID("$NetBSD: perform.c,v 1.64 2020/07/01 10:03:20 jperkin Exp $"); /*- * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>. @@ -566,13 +566,13 @@ CheckForBestPkg(const char *pkgname) { char *pattern, *best_match; - best_match = find_best_matching_installed_pkg(pkgname); + best_match = find_best_matching_installed_pkg(pkgname, 1); if (best_match == NULL) { if (ispkgpattern(pkgname)) return 1; pattern = xasprintf("%s-[0-9]*", pkgname); - best_match = find_best_matching_installed_pkg(pattern); + best_match = find_best_matching_installed_pkg(pattern, 1); free(pattern); } diff --git a/pkgtools/pkg_install/files/lib/iterate.c b/pkgtools/pkg_install/files/lib/iterate.c index 36778a07ab9..a438e988e69 100644 --- a/pkgtools/pkg_install/files/lib/iterate.c +++ b/pkgtools/pkg_install/files/lib/iterate.c @@ -1,4 +1,4 @@ -/* $NetBSD: iterate.c,v 1.8 2010/01/22 13:30:42 joerg Exp $ */ +/* $NetBSD: iterate.c,v 1.9 2020/07/01 10:03:20 jperkin Exp $ */ /*- * Copyright (c) 2007 Joerg Sonnenberger <joerg@NetBSD.org>. @@ -45,6 +45,34 @@ #include "lib.h" /* + * We define a couple of different caches to hold frequently accessed data. + * + * Firstly, we cache the results of readdir() on the package database directory + * when using iterate_pkg_db_cached(). This helps a lot during recursive calls + * and avoids exponential system calls, but is not suitable for situations + * where the database directory may be updated, for example during installs. + * In those situations the regular iterate_pkg_db() must be used. + * + * Secondly, we have a cache for matches of pattern lookups, avoiding expensive + * pkg_match() calls each time. + */ +struct pkg_db_list { + char *pkgname; + SLIST_ENTRY(pkg_db_list) entries; +}; +SLIST_HEAD(pkg_db_list_head, pkg_db_list); + +struct pkg_match_list { + char *pattern; + char *pkgname; + SLIST_ENTRY(pkg_match_list) entries; +}; +SLIST_HEAD(pkg_match_list_head, pkg_match_list); + +static struct pkg_db_list_head pkg_list_cache; +static struct pkg_match_list_head pkg_match_cache[PKG_HASH_SIZE]; + +/* * Generic iteration function: * - get new entries from srciter, stop on NULL * - call matchiter for those entries, stop on non-null return value. @@ -167,6 +195,74 @@ iterate_pkg_db(int (*matchiter)(const char *, void *), void *cookie) return retval; } +struct pkg_db_iter_arg { + struct pkg_db_list_head head; + struct pkg_db_list *list; +}; + +static const char * +pkg_db_iter_cached(void *cookie) +{ + struct pkg_db_iter_arg *arg = cookie; + + if (arg->list == NULL) + arg->list = SLIST_FIRST(&arg->head); + else + arg->list = SLIST_NEXT(arg->list, entries); + + if (arg->list != NULL) + return arg->list->pkgname; + + return NULL; +} + +/* + * Call matchiter for every installed package, using cached data to + * significantly increase performance during recursive calls. + * + * This is not suitable for every situation, for example when finding new + * matches after package installation/removal. In those situations the + * regular iterate_pkg_db() must be used. + */ +static int +iterate_pkg_db_cached(int (*matchiter)(const char *, void *), void *cookie) +{ + DIR *dirp; + struct dirent *dp; + struct stat st; + struct pkg_db_iter_arg arg; + struct pkg_db_list *pkg; + const char *pkgdir; + int retval; + + if (SLIST_EMPTY(&pkg_list_cache)) { + SLIST_INIT(&pkg_list_cache); + + if ((dirp = opendir(pkgdb_get_dir())) == NULL) { + if (errno == ENOENT) + return 0; /* Empty pkgdb */ + return -1; + } + + while ((pkgdir = pkg_db_iter(dirp)) != NULL) { + pkg = xmalloc(sizeof(struct pkg_db_list)); + pkg->pkgname = xstrdup(pkgdir); + SLIST_INSERT_HEAD(&pkg_list_cache, pkg, entries); + } + + if (closedir(dirp) == -1) + return -1; + } + + arg.head = pkg_list_cache; + arg.list = NULL; + + retval = iterate_pkg_generic_src(matchiter, cookie, + pkg_db_iter_cached, &arg); + + return retval; +} + static int match_by_basename(const char *pkg, void *cookie) { @@ -189,7 +285,7 @@ match_by_pattern(const char *pkg, void *cookie) { const char *pattern = cookie; - return pkg_match(pattern, pkg); + return pkg_match(pattern, pkg); } struct add_matching_arg { @@ -287,20 +383,55 @@ match_best_installed(const char *pkg, void *cookie) /* * Returns a copy of the name of best matching package. * If no package matched the pattern or an error occured, return NULL. + * + * If use_cached is set, return a cached match entry if it exists, and also use + * the iterate_pkg_db cache, otherwise clear any matching cache entry and use + * regular iterate_pkg_db(). */ char * -find_best_matching_installed_pkg(const char *pattern) +find_best_matching_installed_pkg(const char *pattern, int use_cached) { struct best_installed_match_arg arg; + struct pkg_match_list *pkg; + int idx = PKG_HASH_ENTRY(pattern), rv; + + if (pattern == NULL) + return NULL; + + SLIST_FOREACH(pkg, &pkg_match_cache[idx], entries) { + if (strcmp(pattern, pkg->pattern) == 0) { + if (use_cached) + return xstrdup(pkg->pkgname); + SLIST_REMOVE(&pkg_match_cache[idx], pkg, + pkg_match_list, entries); + free(pkg->pattern); + free(pkg->pkgname); + free(pkg); + break; + } + } arg.pattern = pattern; arg.best_current_match = NULL; - if (iterate_pkg_db(match_best_installed, &arg) == -1) { + if (use_cached) + rv = iterate_pkg_db_cached(match_best_installed, &arg); + else + rv = iterate_pkg_db(match_best_installed, &arg); + + if (rv == -1) { warnx("could not process pkgdb"); return NULL; } + if (arg.best_current_match != NULL) { + pkg = xmalloc(sizeof(struct pkg_match_list)); + pkg->pattern = xstrdup(pattern); + pkg->pkgname = xstrdup(arg.best_current_match); + SLIST_INSERT_HEAD(&pkg_match_cache[idx], + pkg, entries); + } + return arg.best_current_match; } @@ -317,7 +448,7 @@ match_and_call(const char *pkg, void *cookie) if (pkg_match(arg->pattern, pkg) == 1) { return (*arg->call_fn)(pkg, arg->cookie); - } else + } else return 0; } @@ -460,7 +591,7 @@ match_file_and_call(const char *filename, void *cookie) if (ret == 1) return (*arg->call_fn)(filename, arg->cookie); - else + else return 0; } diff --git a/pkgtools/pkg_install/files/lib/lib.h b/pkgtools/pkg_install/files/lib/lib.h index 17fe9231c59..937b53980fb 100644 --- a/pkgtools/pkg_install/files/lib/lib.h +++ b/pkgtools/pkg_install/files/lib/lib.h @@ -1,4 +1,4 @@ -/* $NetBSD: lib.h,v 1.69 2018/02/26 23:45:02 ginsbach Exp $ */ +/* $NetBSD: lib.h,v 1.70 2020/07/01 10:03:20 jperkin Exp $ */ /* from FreeBSD Id: lib.h,v 1.25 1997/10/08 07:48:03 charnier Exp */ @@ -231,6 +231,25 @@ typedef struct _lpkg_t { TAILQ_HEAD(_lpkg_head_t, _lpkg_t); typedef struct _lpkg_head_t lpkg_head_t; +/* + * To improve performance when handling lists containing a large number of + * packages, it can be beneficial to use hashed lookups to avoid excessive + * strcmp() calls when searching for existing entries. + * + * The simple hashing function below uses the first 3 characters of either a + * pattern match or package name (as they are guaranteed to exist). + * + * Based on pkgsrc package names across the tree, this can still result in + * somewhat uneven distribution due to high numbers of packages beginning with + * "p5-", "php", "py-" etc, and so there are diminishing returns when trying to + * use a hash size larger than around 16 or so. + */ +#define PKG_HASH_SIZE 16 +#define PKG_HASH_ENTRY(x) (((unsigned char)(x)[0] \ + + (unsigned char)(x)[1] * 257 \ + + (unsigned char)(x)[2] * 65537) \ + & (PKG_HASH_SIZE - 1)) + struct pkg_vulnerabilities { size_t entries; char **vulnerability; @@ -287,7 +306,7 @@ int iterate_pkg_db(int (*)(const char *, void *), void *); int add_installed_pkgs_by_basename(const char *, lpkg_head_t *); int add_installed_pkgs_by_pattern(const char *, lpkg_head_t *); -char *find_best_matching_installed_pkg(const char *); +char *find_best_matching_installed_pkg(const char *, int); char *find_best_matching_file(const char *, const char *, int, int); int match_installed_pkgs(const char *, int (*)(const char *, void *), void *); int match_local_files(const char *, int, int, const char *, int (*cb)(const char *, void *), void *); |