summaryrefslogtreecommitdiff
path: root/mk
diff options
context:
space:
mode:
authorjperkin <jperkin@pkgsrc.org>2020-08-27 11:45:45 +0000
committerjperkin <jperkin@pkgsrc.org>2020-08-27 11:45:45 +0000
commitb8e01e0a50cef62d05902a44d4605c682185c0a2 (patch)
tree7e2acb4b08116818da09a1b05eaf6a421e19465d /mk
parent19976ca6a49f865379650d091bb1a763e8ee0ac7 (diff)
downloadpkgsrc-b8e01e0a50cef62d05902a44d4605c682185c0a2.tar.gz
mk: Rewrite the checksum script in awk.
The previous shell script version's runtime was quadratic against the number of distfiles to verify. Historically this has not been an issue, with usually only a handful of files per package. However, with the introduction of Go modules the number of distfiles used by a single package can be very high. For example, in an upcoming update of www/grafana to version 7.1.5, the number of GO_MODULE_FILES is 821. Running 'bmake checksum' takes: real 18m20.743s user 17m27.975s sys 0m49.239s With the awk code, this is reduced to a far more sensible: real 0m4.330s user 0m3.241s sys 0m0.875s The script has been written to emulate the previous version precisely, preserving the same output and error messages and supporting all of its behaviour, with the one exception that previous exit values of 128 have been changed to 3, in order to avoid any potential signed 8-bit issues. The one change in the pkgsrc infrastructure is that the mk/fetch/fetch script no longer sets a working default value for ${CHECKSUM}. This is not a problem in a pkgsrc environment as all of the required variables are set correctly, but if there happen to be any users who are using this script in a standalone environment, they will need to set it accordingly. This was probably required in many situations previously anyway, as none of the script's environment variables were set, and trying to support this would be fragile at best.
Diffstat (limited to 'mk')
-rwxr-xr-xmk/checksum/checksum191
-rwxr-xr-xmk/checksum/checksum.awk308
-rw-r--r--mk/checksum/checksum.mk10
-rwxr-xr-xmk/fetch/fetch4
4 files changed, 315 insertions, 198 deletions
diff --git a/mk/checksum/checksum b/mk/checksum/checksum
deleted file mode 100755
index 99568659fd9..00000000000
--- a/mk/checksum/checksum
+++ /dev/null
@@ -1,191 +0,0 @@
-#!/bin/sh
-#
-# $NetBSD: checksum,v 1.16 2018/08/22 20:48:36 maya Exp $
-#
-# Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
-# All rights reserved.
-#
-# This code is derived from software contributed to The NetBSD Foundation
-# by Johnny C. Lam.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
-# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
-# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-#
-
-######################################################################
-#
-# NAME
-# checksum -- checksum files
-#
-# SYNOPSIS
-# checksum [options] distinfo [file ...]
-#
-# DESCRIPTION
-# checksum will verify the checksums in the distinfo file for each
-# of the files specified.
-#
-# The checksum utility exits with one of the following values:
-#
-# 0 All of the file checksums verify.
-#
-# 1 At least one of the file checksums did not match.
-#
-# 2 At least one of the files is missing any checksum.
-#
-# >2 An error occurred.
-#
-# OPTIONS
-# -a algorithm Only verify checksums for the specified algorithm.
-#
-# -p The specified files are patches, so strip out any
-# lines containing NetBSD RCS ID tags before
-# computing the checksums for verification.
-#
-# -s suffix Strip the specified suffix from the file names
-# when searching for the checksum.
-#
-######################################################################
-
-set -e # exit on errors
-
-: ${DIGEST:=digest}
-: ${CAT:=cat}
-: ${ECHO:=echo}
-: ${SED:=sed}
-: ${TEST:=test}
-
-self="${0##*/}"
-
-usage() {
- ${ECHO} 1>&2 "usage: $self [-a algorithm] [-p] [-s suffix] distinfo [file ...]"
-}
-
-# Process optional arguments
-algorithm=
-patch=
-suffix=
-while ${TEST} $# -gt 0; do
- case "$1" in
- -a) algorithm="$2"; shift 2 ;;
- -p) patch=yes; shift ;;
- -s) suffix="$2"; shift 2 ;;
- --) shift; break ;;
- -*) ${ECHO} 1>&2 "$self: unknown option -- ${1#-}"
- usage
- exit 128
- ;;
- *) break ;;
- esac
-done
-
-# Process required arguments
-${TEST} $# -gt 0 || { usage; exit 128; }
-distinfo="$1"; shift
-files="$@"
-files_left="$@"
-
-if ${TEST} ! -f "$distinfo"; then
- ${ECHO} 1>&2 "$self: distinfo file missing: $distinfo"
- exit 128
-fi
-
-digestcmd=
-case "${DIGEST}" in
-/*)
- if ${TEST} -x "${DIGEST}"; then
- digestcmd="${DIGEST}"
- fi
- ;;
-*)
- SAVEIFS="$IFS"; IFS=:
- for i in $PATH; do
- if ${TEST} -x "$i/${DIGEST}"; then
- digestcmd="$i/${DIGEST}"
- break
- fi
- done
- IFS="$SAVEIFS"
- ;;
-esac
-
-if ${TEST} -z "$digestcmd"; then
- ${ECHO} 1>&2 "$self: \`\`${DIGEST}'' is missing"
- exit 128
-fi
-
-{ exitcode=0
- while read d_alg d_file d_equals d_checksum; do
- case "$d_alg" in
- "#"*) continue ;; # skip comments
- "\$"*) continue ;; # skip RCS ID
- "") continue ;; # skip empty lines
- Size) continue ;; # skip lines holding filesizes, not checksums
- esac
-
- if ${TEST} -n "$algorithm"; then
- ${TEST} "$d_alg" = "$algorithm" || continue
- fi
-
- for file in $files; do
- sfile="${file%$suffix}"
- ${TEST} -z "$patch" || sfile="${sfile##*/}"
- ${TEST} "$d_file" = "($sfile)" || continue
-
- new_files_left=
- for file_left in $files_left; do
- ${TEST} "${file_left}" = "${file}" || \
- new_files_left="${new_files_left} ${file_left}"
- done
- files_left="${new_files_left}"
-
- if ${TEST} "$d_checksum" = "IGNORE"; then
- ${ECHO} 1>&2 "$self: Ignoring checksum for $sfile"
- continue
- fi
- if ${TEST} ! -f $file; then
- ${ECHO} 1>&2 "$self: $file does not exist"
- exit 128
- fi
- if ${TEST} -z "$patch"; then
- checksum=`${DIGEST} $d_alg < $file`
- else
- checksum=`${SED} -e '/[$]NetBSD.*/d' $file | ${DIGEST} $d_alg`
- fi
- if ${TEST} "$d_checksum" = "$checksum"; then
- ${ECHO} "=> Checksum $d_alg OK for $sfile"
- else
- ${ECHO} 1>&2 "$self: Checksum $d_alg mismatch for $sfile"
- exit 1
- fi
- break
- done
- done
- if ${TEST} -n "$files_left"; then
- for file in $files_left; do
- if ${TEST} -n "$algorithm"; then
- ${ECHO} 1>&2 "$self: No $algorithm checksum recorded for $file"
- else
- ${ECHO} 1>&2 "$self: No checksum recorded for $file"
- fi
- exitcode=2
- done
- fi
- exit $exitcode; } < $distinfo
diff --git a/mk/checksum/checksum.awk b/mk/checksum/checksum.awk
new file mode 100755
index 00000000000..eb03dacb2ae
--- /dev/null
+++ b/mk/checksum/checksum.awk
@@ -0,0 +1,308 @@
+#!/usr/bin/awk -f
+#
+# $NetBSD: checksum.awk,v 1.1 2020/08/27 11:45:45 jperkin Exp $
+#
+###########################################################################
+#
+# NAME
+# checksum.awk -- checksum files
+#
+# SYNOPSIS
+# checksum.awk [options] distinfo [file ...]
+#
+# DESCRIPTION
+# checksum will verify the checksums in the distinfo file for each
+# of the files specified.
+#
+# The checksum utility exits with one of the following values:
+#
+# 0 All of the file checksums verify.
+#
+# 1 At least one of the file checksums did not match.
+#
+# 2 At least one of the files is missing any checksum.
+#
+# >2 An error occurred.
+#
+# OPTIONS
+# -a algorithm Only verify checksums for the specified algorithm.
+#
+# -p The specified files are patches, so strip out any
+# lines containing NetBSD RCS ID tags before
+# computing the checksums for verification.
+#
+# -s suffix Strip the specified suffix from the file names
+# when searching for the checksum.
+#
+# BUGS
+# The flow of this program is not performed in the most optimal way
+# possible, as it was deemed important to retain output compatibility
+# with the previous shell script implementation.
+#
+
+BEGIN {
+ DIGEST = ENVIRON["DIGEST"] ? ENVIRON["DIGEST"] : "digest"
+ SED = ENVIRON["SED"] ? ENVIRON["SED"] : "sed"
+
+ # Retain output compatible with previous "checksum" shell script
+ progname = "checksum"
+
+ only_alg = ""
+ distinfo = ""
+ exitcode = 0
+ patch = 0
+ suffix = ""
+
+ for (arg = 1; arg < ARGC; arg++) {
+ opt = ARGV[arg]
+ if (opt == "-a") {
+ only_alg = ARGV[++arg]
+ } else if (opt == "-p") {
+ patch = 1
+ } else if (opt == "-s") {
+ suffix = ARGV[++arg]
+ } else if (opt == "--") {
+ arg++
+ break
+ } else if (match(opt, /^-.*/) != 0) {
+ opt = substr(opt, RSTART + 1, RLENGTH)
+ err(sprintf("%s: unknown option -- %s", progname, opt))
+ usage()
+ exit 3
+ } else {
+ break
+ }
+ }
+
+ if (arg >= ARGC) {
+ usage()
+ exit 3
+ }
+
+ distinfo = ARGV[arg++]
+ cmd = sprintf("test -f %s", distinfo)
+ if (system(cmd) != 0) {
+ err(sprintf("%s: distinfo file missing: %s", progname,
+ distinfo))
+ exit 3
+ }
+
+ #
+ # Initialise list of files to check, passed on the command line. In
+ # order to keep things simple, distfiles[] is also used when operating
+ # in patch mode (-p).
+ #
+ while (arg < ARGC) {
+ distfile = ARGV[arg++]
+ sfile = distfile
+ if (suffix) {
+ sfile = strip_suffix(sfile)
+ }
+ if (patch) {
+ gsub(/.*\//, "", sfile)
+ }
+
+ #
+ # Have we seen this file in distinfo? Used later to verify
+ # that all checksums have been recorded.
+ #
+ seen[sfile] = 0
+
+ #
+ # Store the filename to be checked in the distinfo file. The
+ # -s flag allows temporary download files to be tested instead,
+ # where the suffix will be stripped to match distinfo.
+ #
+ distfiles[sfile] = distfile
+ }
+
+ #
+ # Parse the distinfo file for checksums that must be verified. We're
+ # only interested in lines of the format:
+ #
+ # algorithm (distfile) = checksum
+ #
+ while (getline < distinfo) {
+ if (NF != 4) {
+ continue
+ }
+ if ($0 ~ /^(\#|\$|Size)/) {
+ continue
+ }
+
+ algorithm = $1
+ # strip "(filename)" -> "filename"
+ distfile = substr($2, 2, (length($2) - 2))
+ checksum = $4
+
+ # Skip IGNORE lines (likely legacy at this point).
+ if (checksum == "IGNORE") {
+ continue
+ }
+
+ # If -a is set then skip non-matching algorithms.
+ if (only_alg && tolower(algorithm) != tolower(only_alg)) {
+ continue
+ }
+
+ # Skip if file not in distfiles.
+ if (!(distfile in distfiles)) {
+ continue
+ }
+
+ #
+ # Handle patch files inline. As they need to be modified (by
+ # removing the $NetBSD: checksum.awk,v 1.1 2020/08/27 11:45:45 jperkin Exp $) they are parsed individually by
+ # digest(1), and so we calculate the checksums now rather than
+ # saving for later processing to simplify things.
+ #
+ if (patch) {
+ patchfile = distfiles[distfile]
+ cmd = sprintf("%s -e '/[$]NetBSD.*/d' %s | %s %s",
+ SED, patchfile, DIGEST, algorithm)
+ while ((cmd | getline) > 0) {
+ checksums[algorithm, distfile] = $1
+ }
+ close(cmd)
+ continue
+ }
+
+ #
+ # If not a patch file, then we're handling a distfile, where we
+ # want to build a list of input files to digest(1) so they can
+ # all be calculated in one go.
+ #
+ distsums[algorithm] = sprintf("%s %s", distsums[algorithm],
+ distfiles[distfile])
+ }
+ close(distinfo)
+
+ #
+ # We now have a list of distfiles to be checked for each algorithm,
+ # pass them all to a single digest(1) command and parse the checksums
+ # to be compared against distinfo.
+ #
+ for (algorithm in distsums) {
+ cmd = sprintf("%s %s %s", DIGEST, algorithm,
+ distsums[algorithm])
+ while ((cmd | getline) > 0) {
+ # Should be unnecessary, but just in case. If we want
+ # to be really paranoid then test that $1 == algorithm.
+ if (NF != 4) {
+ continue
+ }
+ # strip "(filename)" -> "filename"
+ distfile = substr($2, 2, length($2) - 2)
+ if (suffix) {
+ distfile = strip_suffix(distfile)
+ }
+ checksums[$1, distfile] = $4
+ }
+ close(cmd)
+ }
+
+ #
+ # Now that we have computed all the necessary checksums for all of the
+ # files listed on the command line, go back through distinfo and verify
+ # that they all match.
+ #
+ while (getline < distinfo) {
+ if (NF != 4) {
+ continue
+ }
+ if ($0 ~ /^(\#|\$|Size)/) {
+ continue
+ }
+
+ algorithm = $1
+ # strip "(filename)" -> "filename"
+ distfile = substr($2, 2, (length($2) - 2))
+ checksum = $4
+
+ # If -a is set then skip non-matching algorithms.
+ if (only_alg && tolower(algorithm) != tolower(only_alg)) {
+ continue
+ }
+
+ # Skip if file not in distfiles.
+ if (!(distfile in distfiles)) {
+ continue
+ }
+
+ # This is likely very legacy at this point.
+ if (checksum == "IGNORE") {
+ err(sprintf("%s: Ignoring checksum for %s", progname,
+ distfile))
+ continue
+ }
+
+ if (checksums[algorithm,distfile] == checksum) {
+ printf("=> Checksum %s OK for %s\n", algorithm,
+ distfile)
+ seen[distfile] = 1
+ } else {
+ err(sprintf("%s: Checksum %s mismatch for %s",
+ progname, algorithm, distfile))
+ exit 1
+ }
+ }
+ close(distinfo)
+
+ #
+ # Check that all distfiles supplied on the command line have at least
+ # one matching checksum.
+ #
+ for (distfile in distfiles) {
+ if (seen[distfile])
+ continue
+
+ if (only_alg) {
+ err(sprintf("%s: No %s checksum recorded for %s",
+ progname, only_alg, distfile))
+ } else {
+ err(sprintf("%s: No checksum recorded for %s",
+ progname, distfile))
+ }
+ exitcode = 2
+ }
+
+ exit(exitcode)
+}
+
+function err(errmsg)
+{
+ printf("%s\n", errmsg) > "/dev/stderr"
+}
+
+function usage()
+{
+ err(sprintf("usage: %s [-a algorithm] [-p] [-s suffix]" \
+ " distinfo [file ...]", progname))
+}
+
+#
+# In order to provide maximum compatibility, the following function attempts
+# to strip the exact string suffix, rather than a simple sub() which may
+# interpret e.g. dots incorrectly as it uses regular expressions.
+#
+# "suffix" is a global variable, and this function is only called when it is
+# set.
+#
+function strip_suffix(filename)
+{
+ len_file = length(filename)
+ len_sufx = length(suffix)
+ len_s1 = len_file - len_sufx
+
+ if (len_s1 <= 0)
+ return filename
+
+ s1 = substr(filename, 1, len_s1)
+ s2 = substr(filename, len_s1 + 1, len_sufx)
+
+ if (s2 == suffix) {
+ return s1
+ } else {
+ return filename
+ }
+}
diff --git a/mk/checksum/checksum.mk b/mk/checksum/checksum.mk
index 4d82d777b14..83ad6d15d99 100644
--- a/mk/checksum/checksum.mk
+++ b/mk/checksum/checksum.mk
@@ -1,4 +1,4 @@
-# $NetBSD: checksum.mk,v 1.22 2016/01/06 07:38:25 dholland Exp $
+# $NetBSD: checksum.mk,v 1.23 2020/08/27 11:45:45 jperkin Exp $
#
# See bsd.checksum.mk for helpful comments.
#
@@ -27,10 +27,10 @@ _PATCH_DIGEST_ALGORITHMS?= SHA1
_COOKIE.checksum= ${_COOKIE.extract}
_CHECKSUM_CMD= \
- ${PKGSRC_SETENV} DIGEST=${TOOLS_DIGEST:Q} CAT=${TOOLS_CAT:Q} \
- ECHO=${TOOLS_ECHO:Q} SED=${TOOLS_CMDLINE_SED:Q} \
- TEST=${TOOLS_TEST:Q} \
- ${SH} ${PKGSRCDIR}/mk/checksum/checksum \
+ ${PKGSRC_SETENV} \
+ DIGEST=${TOOLS_DIGEST:Q} SED=${TOOLS_CMDLINE_SED:Q} \
+ ${AWK} -f ${PKGSRCDIR}/mk/checksum/checksum.awk --
+
.if defined(NO_CHECKSUM) || empty(_CKSUMFILES)
checksum checksum-phase:
diff --git a/mk/fetch/fetch b/mk/fetch/fetch
index f37adacc714..c6eadd51cc2 100755
--- a/mk/fetch/fetch
+++ b/mk/fetch/fetch
@@ -1,6 +1,6 @@
#!/bin/sh
#
-# $NetBSD: fetch,v 1.19 2018/08/22 20:48:37 maya Exp $
+# $NetBSD: fetch,v 1.20 2020/08/27 11:45:45 jperkin Exp $
#
# Copyright (c) 2006, 2015 The NetBSD Foundation, Inc.
# All rights reserved.
@@ -103,7 +103,7 @@
######################################################################
: ${PKGSRCDIR:=/usr/pkgsrc}
-: ${CHECKSUM:=${PKGSRCDIR}/mk/checksum/checksum}
+: ${CHECKSUM:=false}
: ${CP:=cp}
: ${ECHO:=echo}
: ${FETCH_CMD:=ftp}