#! /bin/sh # # Control script for running PCP QA tests # # Copyright (c) 1997-2002 Silicon Graphics, Inc. All Rights Reserved. # mypid=$$ status=0 needwrap=true try=0 n_bad=0 bad="" notrun="" interrupt=true myname=`basename $0` iam=$myname # a synonym # status and log files CHECKLOCK=/tmp/check-LOCK CHECKSTS=/tmp/check.sts # If you change these, hangcheck.pcpqa CHECKPID=/tmp/check.pid # will need to change, too. CHECKSLOG=/var/tmp/check-start.log # A check.log already exists for # another reason. _wallclock() { date "+%H %M %S" | $PCP_AWK_PROG '{ print $1*3600 + $2*60 + $3 }' } _timestamp() { now=`date "+%D-%T"` $PCP_ECHO_PROG $PCP_ECHO_N " [$now]""$PCP_ECHO_C" } _release_lock() { if [ -f "$CHECKLOCK" ] then LOCKOWNER=`cat "$CHECKLOCK" 2>/dev/null` || return 0 [ "$LOCKOWNER" = "$mypid" ] && rm -f "$CHECKLOCK" fi return 0 } _wrapup() { # for hangcheck ... # remove files that were used by hangcheck # if [ "$HANGCHECK" = true -a "$USER" = pcpqa ] then checkpid=`cat "$CHECKPID"` [ "$checkpid" = "$mypid" -a -f "$CHECKSTS" ] && rm -f "$CHECKSTS" [ "$checkpid" = "$mypid" -a -f "$CHECKPID" ] && rm -f "$CHECKPID" fi if [ -z "$tmp" ] then # did not get very far into the intialization! : else # release the lock and remove backup files _release_lock [ -d $tmp ] && ( rm -rf $tmp/checksums ; rmdir $tmp ) if $showme then : elif $needwrap then if [ -f check.time -a -f $tmp.time ] then cat check.time $tmp.time \ | $PCP_AWK_PROG ' { t[$1] = $2 } END { if (NR > 0) { for (i in t) print i " " t[i] } }' \ | sort -n >$tmp.out mv $tmp.out check.time fi echo "" >>check.log date >>check.log echo $list | fmt | sed -e 's/^/ /' >>check.log $interrupt && echo "Interrupted! [running $seq]" >>check.log if [ ! -z "$notrun" ] then [ $color = true ] && tput bold && tput setaf 4 # blue echo "Not run:$notrun" [ $color = true ] && tput sgr0 # reset echo "Not run:$notrun" | fmt >>check.log fi if [ ! -z "$n_bad" -a "$n_bad" != 0 ] then [ $color = true ] && tput bold && tput setaf 1 # red echo "Failures:$bad" echo "Failed $n_bad of $try tests" [ $color = true ] && tput sgr0 # reset echo "Failures:$bad" | fmt >>check.log echo "Failed $n_bad of $try tests" >>check.log else if [ $try != 0 ] then [ $color = true ] && tput bold && tput setaf 2 # green echo "Passed all $try tests" [ $color = true ] && tput sgr0 # reset echo "Passed all $try tests" >>check.log fi fi needwrap=false fi rm -f $tmp.* fi } _addfiles () { af=$1 [ "$af" = "" ] && return 1 [ ! -f "$af" ] && touch "$af" shift for fn in "$@" do fgrep -s "$fn" "$af" >/dev/null [ $? = 1 ] && echo "$fn" >>"$af" done return 0 } _check_lock() { # Check that a check process of that process ID found in # $CHECKLOCK exists, and if not, release the lock. [ ! -f "$CHECKLOCK" ] && return 0 PID=`cat "$CHECKLOCK" 2>/dev/null` || return 0 CCNT=`ps -e -o "pid args" | grep -v grep | grep "$PID" | grep check | \ $PCP_AWK_PROG '{ print $1 }'` if [ "$PID" != "$CCNT" ] then # We can remove the lock; no check process found with that ID $sudo rm -f "$CHECKLOCK" fi return 0 } _get_lock() { # Does someone else have a lock on check at this time? If so, we # can't run a test until the lock is removed. # # NOTE: the use of check-LOCK rather than check.pid was done so that # people running check manually (rather than run.pcpqa running check) # can have tests running between themselves. This is better than # having people waiting on one long series of tests passed to check # and having spent 10 minutes waiting for nothing. # Check that an instance of check who claims to have the lock actually # exists! _check_lock # Get (make) a lock echomessage=true for sleeptime in \ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 \ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 \ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 \ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 \ 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \ 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \ 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \ 5 5 5 5 5 5 5 5 5 5 5 5 \ 5 5 5 5 5 5 5 5 5 5 5 5 \ 5 5 5 5 5 5 5 5 5 5 5 5 \ 5 5 5 5 5 5 5 5 5 5 5 5 \ 5 5 5 5 5 5 5 5 5 5 5 5 0 # 10 minutes waiting time per test... do if [ -f "$CHECKLOCK" ] then LOCKOWNER=`cat "$CHECKLOCK" 2>/dev/null` || continue if [ "$LOCKOWNER" != $mypid ] then # wait until lock disappears... if [ "$sleeptime" = 0 ] then # We should leave... something's terribly wrong. echo "" return 1 else $echomessage && \ $PCP_ECHO_PROG $PCP_ECHO_N " waiting for lock [owner pid=$LOCKOWNER]... ""$PCP_ECHO_C" && \ echomessage=false sleep $sleeptime fi else # already have lock break fi else # make lock echo "$mypid" >"$CHECKLOCK" chmod a+r "$CHECKLOCK" break fi done $echomessage || echo "got it; proceeding: $seq" return 0 } _make_checkfiles() { if [ ! -f $tmp.checkfiles ] then [ -z "$PCP_PMCDOPTIONS_PATH" ] && \ PCP_PMCDOPTIONS_PATH="$PCP_SYSCONF_DIR/pmcd/pmcd.options" [ -z "$PCP_PMLOGGERCONTROL_PATH" ] && \ PCP_PMLOGGERCONTROL_PATH="$PCP_SYSCONF_DIR/pmlogger/control" [ -z "$PCP_PMIECONTROL_PATH" ] && \ PCP_PMIECONTROL_PATH="$PCP_SYSCONF_DIR/pmie/control" _checkfiles="$PCP_PMCDCONF_PATH \ $PCP_PMLOGGERCONTROL_PATH \ $PCP_SYSCONF_DIR/pmlogger/config.default \ $PCP_PMLOGGERCONTROL_PATH \ $PCP_PMCDOPTIONS_PATH \ $PCP_DIR/etc/init.d/pcp \ $PCP_DIR/etc/pcp.conf $PCP_DIR/etc/pcp.env \ $PCP_PMDAS_DIR/sample/dsohelp.dir \ $PCP_PMDAS_DIR/sample/dsohelp.pag \ $PCP_PMDAS_DIR/sample/help.dir \ $PCP_PMDAS_DIR/sample/help.pag \ $PCP_PMDAS_DIR/simple/simple.conf" fi } _checksums() { cmd="$1" _make_checkfiles case "$cmd" in get) mkdir -p $tmp/checksums chmod a+w $tmp/checksums for f in `cat $tmp.checkfiles` do buf=`echo $f | sed -e 's;/;+;g'` buf=$tmp/checksums/$buf [ -f $f ] && sum $f [ -f $f -a ! -f $buf ] && $sudo cp $f $buf done ;; check) for f in `cat $tmp.checkfiles` do buf=`echo $f | sed -e 's;/;+;g'` buf=$tmp/checksums/$buf if [ ! -f $f ] then if fgrep "$f" $2 >/dev/null 2>&1 then echo " Missing: \"$f\"" [ -f $buf ] && $sudo cp -f $buf $f fi else _cs=`sum $f` if fgrep "$_cs" $2 >/dev/null 2>&1 then $sudo rm -f $f.$seq.O else echo " Changed: \"$f\"" $sudo cp -f $f $f.$seq.O [ -f $buf ] && $sudo cp -f $buf $f fi fi done ;; *) bozo ;; esac return 0 } trap "_wrapup; exit \$status" 0 1 2 3 15 # by default don't output timestamps timestamp=false # extra stuff for tracing QA runs - off/on via $qatrace qatrace=false qadepot=mazur.melbourne qasrc=`hostname` # constants - meaningful as state transitions in qavis qanotyet=1 # test not yet started qarunning=2 # test still going qafailed=3 # test failed qapassed=4 # test passed PCP_TRACE_TIMEOUT=15 export PCP_TRACE_TIMEOUT # generic initialization... this may take a while to run, because (unless # $quick is true) make is run. . ./common # we have to cheat a bit... but we need to create a check.[pid|sts] file # to tell hangcheck that we are alive, but not ready to run yet. if [ "$HANGCHECK" = true -a "$USER" = pcpqa ] then # for hangcheck ... # Save pid of check in a well known place, so that hangcheck can be sure it # has the right pid (getting the pid from ps output is not reliable enough). # if [ -f "$CHECKPID" ] then checkpidowner=`/bin/sh "ls -l $CHECKPID" | $PCP_AWK_PROG '{ print $3 }'` if [ "$checkpidowner" != pcpqa ] then $sudo rm -f "$CHECKPID" else # There should be a BIG FAT WARNING here if QA is trying to # run tests twice! echo "$myname: a check.pid file already exists... are you already running tests?!" >&2 exit 1 fi fi [ ! -f "$CHECKPID" ] && echo "$mypid" >"$CHECKPID" # for hangcheck ... # Save the status of check in a well known place, so that hangcheck can be # sure to know where check is up to (getting test number from ps output is # not reliable enough since the trace stuff has been introduced). # if [ -f "$CHECKSTS" ] then checkpidowner=`/bin/sh "ls -l $CHECKSTS" | $PCP_AWK_PROG '{ print $3 }'` if [ "$checkpidowner" != pcpqa ] then $sudo rm -f "$CHECKSTS" else echo "$myname: a check.sts file already exists... are you already running tests?!" >&2 exit 1 fi fi [ ! -f "$CHECKSTS" ] && echo "preamble" >"$CHECKSTS" fi [ -f check.time ] || touch check.time [ "`_get_config pmcd`" != on ] && _change_config pmcd on if $showme then qatrace=false fi if $qatrace then # if tracing turned on, make sure trace agent running ok switchon=`pmprobe -h $qadepot trace.control.reset 2>&1 | $PCP_AWK_PROG '{ print $2 }'` [ "$switchon" != "1" ] && qatrace=false fi if $qatrace then for seq in $list do $verbose && printf "Preparing pmtrace tags: %-.16s:%s\r" \ "$qasrc" "$seq" pmtrace -qh $qadepot -v $qanotyet "$qasrc:$seq" 2>/dev/null done $verbose && printf "%68s\r" " " fi torun=`echo $list | wc -w | sed -e 's/ //g'` haverun=0 for seq in $list do err=false if $showme then echo $seq continue fi if [ $torun -gt 9 ] then pct=`expr 100 \* $haverun / $torun` haverun=`expr $haverun + 1` $PCP_ECHO_PROG $PCP_ECHO_N "[$pct%] ""$PCP_ECHO_C" fi $PCP_ECHO_PROG $PCP_ECHO_N "$seq""$PCP_ECHO_C" if [ ! -f $seq ] then echo " [not run, missing]" notrun="$notrun $seq" continue else # really going to try and run this one # rm -f $seq.out.bad lasttime=`sed -n -e "/^$seq /s/.* //p" &1 fi if $check_config then # save checksums for critical conf and control files [ ! -f $tmp.checksums ] && _checksums get >$tmp.checksums fi start=`_wallclock` $timestamp && _timestamp # for hangcheck ... [ "$HANGCHECK" = true -a "$USER" = pcpqa ] && echo "$seq" >"$CHECKSTS" if $qatrace then pmtrace -qh $qadepot -v $qarunning "$qasrc:$seq" 2>/dev/null pmtrace -qh $qadepot -e "sh $seq" "$qasrc:$seq" >$tmp.out.1 2>&1 sts=$? # check for trace errors on first line of test & blow them away $PCP_AWK_PROG '/pmtrace: / {if (NR != 1) print $0; next} {print $0}' $tmp.out.1 > $tmp.out else sh $seq >$tmp.out 2>&1 sts=$? fi $timestamp && _timestamp stop=`_wallclock` # for hangcheck ... [ "$HANGCHECK" = true -a "$USER" = pcpqa ] && echo "working" >"$CHECKSTS" if $check_config then # check the saved checksums _checksums check $tmp.checksums >$tmp.check if [ -s $tmp.check ] then echo "$myname: $seq: ERROR: test failed to restore the following config files:" >>$tmp.out cat $tmp.check >>$tmp.out $PCP_ECHO_PROG $PCP_ECHO_N " [config not restored]""$PCP_ECHO_C" fi fi # remove the lock _release_lock if [ -f core ] then $PCP_ECHO_PROG $PCP_ECHO_N " [dumped core]""$PCP_ECHO_C" mv core $seq.core err=true fi if [ -f $seq.notrun ] then [ $color = true ] && tput bold && tput setaf 4 # blue echo " [not run] `cat $seq.notrun`" [ $color = true ] && tput sgr0 # reset notrun="$notrun $seq" else if [ $sts -ne 0 ] then $PCP_ECHO_PROG $PCP_ECHO_N " [failed, exit status $sts]""$PCP_ECHO_C" err=true fi if [ ! -f $seq.out ] then echo " - no qualified output" mv $tmp.out $seq.out.bad err=true else if diff $seq.out $tmp.out >/dev/null 2>&1 then echo "" if $err then : else echo "$seq `expr $stop - $start`" >>$tmp.time fi else [ $color = true ] && tput bold && tput setaf 1 # red echo " - output mismatch (see $seq.out.bad)" [ $color = true ] && tput sgr0 # reset mv $tmp.out $seq.out.bad $diff $seq.out $seq.out.bad err=true fi fi # really tried to run the test, update the state # if $qatrace then if $err then pmtrace -qh $qadepot -v $qafailed "$qasrc:$seq:$qaown" 2>/dev/null else pmtrace -qh $qadepot -v $qapassed "$qasrc:$seq:$qaown" 2>/dev/null fi fi fi fi # come here for each test, except when $showme is true # if $err then bad="$bad $seq" n_bad=`expr $n_bad + 1` quick=false [ $diff = true ] || echo "Check local PMCD is still alive ..." $OPTION_AGENTS && _haveagents $OPTION_LOGGER && _havelogger fi [ -f $seq.notrun ] || try=`expr $try + 1` rm -f $seq.notrun done interrupt=false status=$n_bad exit