summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2011-11-08 19:55:37 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2011-11-08 19:55:37 +0000
commit45221cbef70ef8318757e59657a66d24e476f534 (patch)
tree8597093cb69765ea6e8968acb878493237817edf
parenta482f1f76a84c8f5a013981545322dda32ae8462 (diff)
downloadillumos-joyent-45221cbef70ef8318757e59657a66d24e476f534.tar.gz
OS-722 add logging and lock breaking when dlmgmtd lock is held too long
-rw-r--r--usr/src/cmd/dlmgmtd/dlmgmt_door.c5
-rw-r--r--usr/src/lib/brand/joyent/zone/statechange.ksh31
2 files changed, 35 insertions, 1 deletions
diff --git a/usr/src/cmd/dlmgmtd/dlmgmt_door.c b/usr/src/cmd/dlmgmtd/dlmgmt_door.c
index 29d265cfdb..ef5fa0e745 100644
--- a/usr/src/cmd/dlmgmtd/dlmgmt_door.c
+++ b/usr/src/cmd/dlmgmtd/dlmgmt_door.c
@@ -1329,6 +1329,10 @@ dlmgmt_zonehalt(void *argp, void *retp, size_t *sz, zoneid_t zoneid,
int err = 0;
dlmgmt_door_zonehalt_t *zonehalt = argp;
dlmgmt_zonehalt_retval_t *retvalp = retp;
+ static char my_pid[10];
+
+ if (my_pid[0] == NULL)
+ (void) snprintf(my_pid, sizeof (my_pid), "%d\n", getpid());
if ((err = dlmgmt_checkprivs(0, cred)) == 0) {
if (zoneid != GLOBAL_ZONEID) {
@@ -1353,6 +1357,7 @@ dlmgmt_zonehalt(void *argp, void *retp, size_t *sz, zoneid_t zoneid,
while ((fd = open(ZONE_LOCK, O_WRONLY |
O_CREAT | O_EXCL, S_IRUSR | S_IWUSR)) < 0)
(void) sleep(1);
+ (void) write(fd, my_pid, sizeof(my_pid));
(void) close(fd);
dlmgmt_table_lock(B_TRUE);
diff --git a/usr/src/lib/brand/joyent/zone/statechange.ksh b/usr/src/lib/brand/joyent/zone/statechange.ksh
index 30e3e4be13..6d5749fd1e 100644
--- a/usr/src/lib/brand/joyent/zone/statechange.ksh
+++ b/usr/src/lib/brand/joyent/zone/statechange.ksh
@@ -63,13 +63,42 @@ SNAPSHOT_DIR=root/checkpoints
#
lock_file()
{
+ local cnt=0
+ local prev_pid=0
while true; do
if (set -o noclobber; echo "$$" >$LOCKFILE) 2>/dev/null; then
trap 'rm -f $LOCKFILE; exit $?' INT TERM EXIT
break;
- else
+ fi
+
+ local hold_pid=`cat $LOCKFILE 2>/dev/null`
+
+ # the file might be gone or empty when we run the cat cmd
+ if [[ -z "$hold_pid" ]]; then
sleep 1
+ cnt=0
+ continue
+ fi
+
+ # if held by a different process, restart counter
+ if [[ $prev_pid != $hold_pid ]]; then
+ prev_pid=$hold_pid
+ cnt=0
fi
+
+ [[ $cnt == 20 || $cnt == 40 ]] && \
+ logger -p daemon.err "dlmgmtd lock file $LOCKFILE" \
+ "held by pid $hold_pid for $cnt seconds"
+
+ if [[ $cnt == 60 ]]; then
+ logger -p daemon.err "breaking dlmgmtd lock" \
+ "held by pid $hold_pid after $cnt seconds"
+ unlock_file
+ continue
+ fi
+
+ sleep 1
+ let cnt=$cnt+1
done
}