summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordmcmahill <dmcmahill@pkgsrc.org>2003-07-23 09:41:23 +0000
committerdmcmahill <dmcmahill@pkgsrc.org>2003-07-23 09:41:23 +0000
commit28fd31fa83171df72b457bd86b86fc5dd1d39b08 (patch)
treeb87d783cb2dda0050f33813665088c4d11f50b76
parent3cd568e4b73e2ec89310d95cb183c830be148ca9 (diff)
downloadpkgsrc-28fd31fa83171df72b457bd86b86fc5dd1d39b08.tar.gz
rework the INDEX file generation. The new approach speeds up things by
several orders of magnitude and 'make index' now takes 30 minutes or so instead of several days on my test machine. The approach now is to take one pass through every package and extract some key information including the explicitly listed dependencies. After the data is extracted, the dependencies are flattened in one step which avoids the extremely inefficient recursive make that was previously used.
-rw-r--r--Makefile48
-rw-r--r--mk/bsd.pkg.mk22
-rwxr-xr-xmk/scripts/genindex.awk389
3 files changed, 449 insertions, 10 deletions
diff --git a/Makefile b/Makefile
index 2c088fd8689..02d18a53d5e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.55 2003/06/30 22:05:11 hubertf Exp $
+# $NetBSD: Makefile,v 1.56 2003/07/23 09:41:23 dmcmahill Exp $
#
.include "mk/bsd.prefs.mk"
@@ -101,14 +101,48 @@ README.html: .PRECIOUS
_PKGSRCDIR=${.CURDIR}
.endif
-index:
- @${RM} -f ${.CURDIR}/INDEX
- @${MAKE} ${.CURDIR}/INDEX
+.PHONY: index
+index: ${.CURDIR}/INDEX
${.CURDIR}/INDEX:
- @${ECHO} -n "Generating INDEX - please wait.."
- @${MAKE} describe ECHO_MSG="${ECHO} > /dev/null" > ${.CURDIR}/INDEX
- @${ECHO} " Done."
+ @${RM} -f ${.CURDIR}/DEPENDSDB
+ @${ECHO_MSG} "Extracting complete dependency database. This may take a while..."
+ @DB=${.CURDIR}/DEPENDSDB ; \
+ PKGSRCDIR=${.CURDIR} ; \
+ npkg=1; \
+ ${RM} -fr $$DB ; \
+ list=`${GREP} '^[[:space:]]*'SUBDIR */Makefile | sed 's,/Makefile.*=[[:space:]]*,/,'` ; \
+ for pkgdir in $$list ; do \
+ if [ ! -d $$pkgdir ]; then \
+ echo " " ; \
+ echo "WARNING: the package directory $pkgdir is listed in" > /dev/stderr ; \
+ echo $pkgdir | sed 's;/.*;/Makefile;g' > /dev/stderr ; \
+ echo "but the directory does not exist. Please fix this!" > /dev/stderr ; \
+ else \
+ cd $$pkgdir ; \
+ l=`${MAKE} print-summary-data` ; \
+ if [ $$? != 0 ]; then \
+ echo "WARNING (printdepends): the package in $$pkgdir had problem with" \
+ > /dev/stderr ; \
+ echo " ${MAKE} print-summary-data" > /dev/stderr ; \
+ echo " database information for this package" > /dev/stderr ; \
+ echo " will be dropped." > /dev/stderr ; \
+ ${MAKE} print-summary-data 2>&1 > /dev/stderr ; \
+ else \
+ echo "$$l" >> $$DB ; \
+ fi ; \
+ fi ; \
+ echo -n "." ; \
+ if [ `${EXPR} $$npkg % 100 = 0` -eq 1 ]; then \
+ echo " " ; \
+ echo "$$npkg" ; \
+ fi ; \
+ npkg=`${EXPR} $$npkg + 1` ; \
+ cd $$PKGSRCDIR ; \
+ done
+ @${RM} -f ${.CURDIR}/INDEX
+ @${AWK} -f ./mk/scripts/genindex.awk PKGSRCDIR=${.CURDIR} SORT=${SORT} ${.CURDIR}/DEPENDSDB
+ @${RM} -f ${.CURDIR}/DEPENDSDB
print-index: ${.CURDIR}/INDEX
@${AWK} -F\| '{ printf("Port:\t%s\nPath:\t%s\nInfo:\t%s\nMaint:\t%s\nIndex:\t%s\nB-deps:\t%s\nR-deps:\t%s\nArch:\t%s\n\n", $$1, $$2, $$4, $$6, $$7, $$8, $$9, $$10); }' < ${.CURDIR}/INDEX
diff --git a/mk/bsd.pkg.mk b/mk/bsd.pkg.mk
index 945a17d4910..75e9fd036fa 100644
--- a/mk/bsd.pkg.mk
+++ b/mk/bsd.pkg.mk
@@ -1,4 +1,4 @@
-# $NetBSD: bsd.pkg.mk,v 1.1220 2003/07/22 13:48:48 agc Exp $
+# $NetBSD: bsd.pkg.mk,v 1.1221 2003/07/23 09:41:26 dmcmahill Exp $
#
# This file is in the public domain.
#
@@ -3996,8 +3996,24 @@ print-summary-data:
@${ECHO} wildcard ${PKGPATH} ${PKGWILDCARD:Q}
@${ECHO} comment ${PKGPATH} ${COMMENT:Q}
@${ECHO} license ${PKGPATH} ${LICENSE:Q}
- @${ECHO} onlyfor ${PKGPATH} ${ONLY_FOR_ARCHS}
- @${ECHO} notfor ${PKGPATH} ${NOT_FOR_OPSYS}
+ @if [ "${ONLY_FOR_ARCHS}" = "" ]; then \
+ ${ECHO} "onlyfor ${PKGPATH} any"; \
+ else \
+ ${ECHO} "onlyfor ${PKGPATH} ${ONLY_FOR_ARCHS}"; \
+ fi;
+ if [ "${NOT_FOR_OPSYS}" = "" ]; then \
+ ${ECHO} "notfor ${PKGPATH} any"; \
+ else \
+ ${ECHO} "notfor ${PKGPATH} not ${NOT_FOR_OPSYS}"; \
+ fi;
+ @${ECHO} "maintainer ${PKGPATH} ${MAINTAINER}"
+ @${ECHO} "categories ${PKGPATH} ${CATEGORIES}"
+ @if [ -f ${DESCR_SRC} ]; then \
+ ${ECHO} "descr ${PKGPATH} ${DESCR_SRC}"; \
+ else \
+ ${ECHO} "descr ${PKGPATH} /dev/null"; \
+ fi
+ @${ECHO} "prefix ${PKGPATH} ${PREFIX}"
.endif
.if !target(show-license)
diff --git a/mk/scripts/genindex.awk b/mk/scripts/genindex.awk
new file mode 100755
index 00000000000..b21fcc2cf41
--- /dev/null
+++ b/mk/scripts/genindex.awk
@@ -0,0 +1,389 @@
+#!/usr/bin/awk -f
+# $NetBSD: genindex.awk,v 1.1 2003/07/23 09:41:29 dmcmahill Exp $
+#
+# Copyright (c) 2002, 2003 The NetBSD Foundation, Inc.
+# All rights reserved.
+#
+# This code is derived from software contributed to The NetBSD Foundation
+# by Dan McMahill.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+# must display the following acknowledgement:
+# This product includes software developed by the NetBSD
+# Foundation, Inc. and its contributors.
+# 4. Neither the name of The NetBSD Foundation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+# Global variables
+#-----------------
+# The following associative arrays are used for storing the dependency
+# information and other information for the packages
+#
+# topdepends[] : index=pkgdir (math/scilab)
+# List of explicitly listed depencencies by name.
+# I.e. "xless-[0-9]* pvm-3.4.3"
+#
+# alldepends[] : index=pkgdir (math/scilab)
+# Flattened dependency list by name.
+#
+#
+
+
+BEGIN {
+ debug = 0;
+ printf("Reading database file\n");
+}
+
+#conflicts /usr/pkgsrc/math/scilab
+#depends /usr/pkgsrc/math/scilab xless-[0-9]*:../../x11/xless pvm-3.4.3:../../parallel/pvm3
+#
+
+/^(build_)?depends / {
+#
+# Read in the entire depends tree
+# These lines look like:
+#
+#depends /usr/pkgsrc/math/scilab xless-[0-9]*:../../x11/xless pvm-3.4.3:../../parallel/pvm3
+#build_depends /usr/pkgsrc/math/scilab libtool-base>=1.4.20010614nb9:../../devel/libtool-base
+#
+ deptype=$1;
+# pkg=fulldir2pkgdir($2);
+ pkg = $2;
+ if (pkg in topdepends) {}
+ else {topdepends[pkg] = "";}
+ if (pkg in topbuilddepends) {}
+ else {topbuilddepends[pkg] = "";}
+
+ for (i = 3; i <= NF; i++) {
+ split($i, a,":");
+ pkgpat = a[1];
+ pkgdir = a[2];
+ sub(/[\.\/]*/, "", pkgdir);
+ if (pkgdir !~ /\//) {
+ pkgcat = pkg;
+ gsub(/\/.*/, "", pkgcat);
+ pkgdir=pkgcat "/" pkgdir;
+ if (debug)
+ printf("Corrected missing category directory to get \"%s\"\n",
+ pkgdir);
+ }
+ if (debug){
+ printf("package in directory %s %s on:\n",
+ pkg, deptype);
+ printf("\tpkgpat = %s\n", pkgpat);
+ printf("\tpkgdir = %s\n", pkgdir);
+ }
+
+
+#
+# store the package directory in a associative array with the wildcard
+# pattern as the index since we will need to be able to look this up later
+#
+ pat2dir[pkgpat] = pkgdir;
+
+ if (deptype == "depends") {
+ topdepends[pkg] = topdepends[pkg] " " pkgpat " " ;
+ if (debug) {
+ printf("Appending %s to topdepends[%s] (%s)\n",
+ pkgpat, pkg, topdepends[pkg]);
+ }
+ }
+ else {
+ if (debug) {
+ printf("Appending %s to topbuilddepends[%s] (%s)\n",
+ pkgpat, pkg, topbuilddepends[pkg]);
+ }
+ topbuilddepends[pkg] = topbuilddepends[pkg] " " pkgpat " " ;
+ }
+ }
+
+ next;
+}
+
+/^categories /{
+ dir = $2;
+ gsub(/^categories[ \t]*/, "");
+ tmp = substr($0, length($1) + 1);
+ gsub(/^[ \t]*/, "", tmp);
+ categories[dir] = tmp;
+ next;
+}
+
+/^comment /{
+ dir = $2;
+ gsub(/^comment[ \t]*/, "");
+ tmp = substr($0, length($1) + 1);
+ gsub(/^[ \t]*/, "", tmp);
+ comment[dir] = tmp;
+ next;
+}
+
+/^descr /{
+ descr[$2] = $3;
+ next;
+}
+
+/^index / {
+#
+# read lines like:
+#index /usr/pkgsrc/math/scilab scilab-2.6nb3
+# and store the directory name in a associative array where the index
+# is the package name and in a associative array that lets us lookup
+# name from directory. We use fuldir2pkgdir to get "math/scilab"
+# and drop the /usr/pkgsrc part.
+#
+# pkgname2dir[$3] = fulldir2pkgdir($2);
+# pkgdir2name[fulldir2pkgdir($2)] = $3;
+ pkgname2dir[$3] = $2;
+ pkgdir2name[$2] = $3;
+ next;
+}
+
+/^license /{
+ license[$2] = $3;
+ next;
+}
+
+/^maintainer /{
+ maintainer[$2] = $3;
+ next;
+}
+
+/^notfor /{
+ dir = $2;
+ gsub(/^notfor[ \t]*/, "");
+ tmp = substr($0, length($1) + 1);
+ gsub(/^[ \t]*/, "", tmp);
+ notfor[dir] = tmp;
+ next;
+}
+
+/^onlyfor /{
+ dir = $2;
+ gsub(/^onlyfor[ \t]*/, "");
+ tmp = substr($0, length($1) + 1);
+ gsub(/^[ \t]*/, "", tmp);
+ onlyfor[dir] = tmp;
+ next;
+}
+
+/^prefix /{
+ prefix[$2] = $3;
+ next;
+}
+
+/^wildcard /{
+ wildcard[$2] = $3;
+}
+
+#
+# Now recurse the tree to give a flattened depends list for each pkg
+#
+
+END {
+ if( SORT == "" ) { SORT = "sort"; }
+ indexf = SORT " > INDEX";
+ if ( dependsfile == "" ) dependsfile = "/dev/null";
+ if ( builddependsfile == "" ) builddependsfile = "/dev/null";
+
+ printf("Flattening dependencies\n");
+ printf("") > dependsfile;
+ for (toppkg in topdepends){
+ if (debug) printf("calling find_all_depends(%s, run)\n", toppkg);
+ find_all_depends(toppkg, "run");
+ if (debug) printf("%s depends on: %s, topdepends on %s\n",
+ toppkg, alldepends[toppkg],
+ topdepends[toppkg]);
+ printf("%s depends on: %s\n",
+ toppkg, alldepends[toppkg]) >> dependsfile;
+ flatdepends[toppkg] = alldepends[toppkg];
+ }
+ close(dependsfile);
+
+
+# clear out the flattened depends list and repeat for the build depends
+ for( pkg in alldepends) {
+ delete alldepends[pkg];
+ }
+
+ printf("Flattening build dependencies\n");
+ printf("") > builddependsfile;
+ for (toppkg in topbuilddepends){
+ find_all_depends(toppkg, "build");
+ printf("%s build_depends on: %s\n",
+ toppkg, alldepends[toppkg]) >> builddependsfile;
+ }
+ close(builddependsfile);
+
+ printf("Generating INDEX file\n");
+
+# Output format:
+# package-name|package-path|installation-prefix|comment| \
+# description-file|maintainer|categories|build deps|run deps|for arch| \
+# not for opsys
+
+ pkgcnt = 0;
+ for (toppkg in topdepends){
+ pkgcnt++;
+ pkgdir = PKGSRCDIR "/" toppkg;
+ printf("%s|", pkgdir2name[toppkg]) | indexf;
+ printf("%s|", pkgdir) | indexf;
+ printf("%s|", prefix[toppkg]) | indexf;
+ printf("%s|", comment[toppkg]) | indexf;
+ printf("%s|", descr[toppkg]) | indexf;
+ printf("%s|", maintainer[toppkg]) | indexf;
+ printf("%s|", categories[toppkg]) | indexf;
+ gsub(/^ /, "", alldepends[toppkg]);
+ gsub(/ $/, "", alldepends[toppkg]);
+ printf("%s|", alldepends[toppkg]) | indexf;
+ gsub(/^ /, "", flatdepends[toppkg]);
+ gsub(/ $/, "", flatdepends[toppkg]);
+ printf("%s|", flatdepends[toppkg]) | indexf;
+ printf("%s|", onlyfor[toppkg]) | indexf;
+ printf("%s", notfor[toppkg]) | indexf;
+ printf("\n") | indexf;
+ }
+ close(indexf);
+ printf("Indexed %d packages\n", pkgcnt);
+ exit 0;
+}
+
+function find_all_depends(pkg, type, pkgreg, i, deps, depdir, topdep){
+# pkg is the package directory, like math/scilab
+
+# printf("find_all_depends(%s, %s)\n", pkg, type);
+# if we find the package already has been fully depended
+# then return the depends list
+ if (pkg in alldepends){
+ if (debug) printf("\t%s is allready depended. Returning %s\n",
+ pkg, alldepends[pkg]);
+ return(alldepends[pkg]);
+ }
+
+# if this package has no top dependencies, enter an empty flat dependency
+# list for it.
+ if( type == "run" ) {
+# we only want DEPENDS
+ topdep = topdepends[pkg];
+ } else {
+# we want BUILD_DEPENDS and DEPENDS
+ topdep = topdepends[pkg] " " topbuilddepends[pkg];
+ }
+ if (topdep ~ "^[ \t]*$") {
+ alldepends[pkg] = " ";
+ if (debug) printf("\t%s has no depends(%s). Returning %s\n",
+ pkg, topdep, alldepends[pkg]);
+ return(alldepends[pkg]);
+ }
+
+# recursively gather depends that each of the depends has
+ pkgreg = reg2str(pkg);
+ split(topdep, deps);
+ i = 1;
+ alldepends[pkg] = " ";
+ while ( i in deps ) {
+
+# figure out the directory name associated with the package hame
+# in (wild card/dewey) version form
+ depdir = pat2dir[deps[i]];
+ if (debug) printf("\tadding dependency #%d on \"%s\" (%s)\n",
+ i, deps[i], depdir);
+
+# do not add ourselves to the list (should not happen, but
+# we would like to not get stuck in a loop if one exists)
+# if (" "deps[i]" " !~ pkgreg){
+
+# if we do not already have this dependency (deps[i]) listed, then add
+# it. However, we may have already added it because another package
+# we depend on may also have depended on
+# deps[i].
+ if (alldepends[pkg] !~ reg2str(deps[i])){
+ alldepends[pkg] = alldepends[pkg] " " deps[i] " " find_all_depends(depdir, type);
+ }
+ else {
+ if (debug) printf("\t%s is already listed in %s\n",
+ deps[i], alldepends[pkg]);
+ }
+
+ i = i + 1;
+ } # while i
+
+ if (debug) printf("\tcalling uniq() on alldepends[%s] = %s\n",
+ pkg, alldepends[pkg]);
+ alldepends[pkg] = uniq(alldepends[pkg]);
+ if (debug) printf("\tuniq() output alldepends[%s] = %s\n",
+ pkg, alldepends[pkg]);
+ return(alldepends[pkg]);
+}
+
+#
+# take a string which has special characters like '+' in it and
+# escape them. Also put a space before and after since that's how
+# we'll distinguish things like gnome from gnome-libs
+#
+function reg2str(reg){
+ gsub(/\./, "\\\.", reg);
+ gsub(/\+/, "\\\+", reg);
+ gsub(/\*/, "\\\*", reg);
+ gsub(/\?/, "\\\?", reg);
+ gsub(/\[/, "\\\[", reg);
+ gsub(/\]/, "\\\]", reg);
+ reg = " "reg" ";
+ return(reg);
+}
+
+#
+# accepts a full path to a package directory, like "/usr/pkgsrc/math/scilab"
+# and returns just the last 2 directories, like "math/scilab"
+#
+function fulldir2pkgdir(d, i){
+ i = match(d, /\/[^\/]+\/[^\/]+$/);
+ return substr(d, i + 1);
+}
+
+#
+# take the depends lists and uniq them.
+#
+function uniq(list, deps, i, ulist){
+
+# split out the depends
+ split(list, deps);
+
+ i = 1;
+ ulist = " ";
+ while (i in deps){
+# printf("uniq(): Checking \"%s\"\n", ulist);
+# printf(" for \"%s\"\n", reg2str(deps[i]));
+ if (ulist !~reg2str(deps[i])){
+ ulist = ulist deps[i]" ";
+ }
+ i++;
+ }
+ return(ulist);
+}
+
+
+