diff options
| author | Jerry Jelinek <jerry.jelinek@joyent.com> | 2011-11-08 19:55:37 +0000 |
|---|---|---|
| committer | Jerry Jelinek <jerry.jelinek@joyent.com> | 2011-11-08 19:55:37 +0000 |
| commit | 45221cbef70ef8318757e59657a66d24e476f534 (patch) | |
| tree | 8597093cb69765ea6e8968acb878493237817edf | |
| parent | a482f1f76a84c8f5a013981545322dda32ae8462 (diff) | |
| download | illumos-joyent-45221cbef70ef8318757e59657a66d24e476f534.tar.gz | |
OS-722 add logging and lock breaking when dlmgmtd lock is held too long
| -rw-r--r-- | usr/src/cmd/dlmgmtd/dlmgmt_door.c | 5 | ||||
| -rw-r--r-- | usr/src/lib/brand/joyent/zone/statechange.ksh | 31 |
2 files changed, 35 insertions, 1 deletions
diff --git a/usr/src/cmd/dlmgmtd/dlmgmt_door.c b/usr/src/cmd/dlmgmtd/dlmgmt_door.c index 29d265cfdb..ef5fa0e745 100644 --- a/usr/src/cmd/dlmgmtd/dlmgmt_door.c +++ b/usr/src/cmd/dlmgmtd/dlmgmt_door.c @@ -1329,6 +1329,10 @@ dlmgmt_zonehalt(void *argp, void *retp, size_t *sz, zoneid_t zoneid, int err = 0; dlmgmt_door_zonehalt_t *zonehalt = argp; dlmgmt_zonehalt_retval_t *retvalp = retp; + static char my_pid[10]; + + if (my_pid[0] == NULL) + (void) snprintf(my_pid, sizeof (my_pid), "%d\n", getpid()); if ((err = dlmgmt_checkprivs(0, cred)) == 0) { if (zoneid != GLOBAL_ZONEID) { @@ -1353,6 +1357,7 @@ dlmgmt_zonehalt(void *argp, void *retp, size_t *sz, zoneid_t zoneid, while ((fd = open(ZONE_LOCK, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR)) < 0) (void) sleep(1); + (void) write(fd, my_pid, sizeof(my_pid)); (void) close(fd); dlmgmt_table_lock(B_TRUE); diff --git a/usr/src/lib/brand/joyent/zone/statechange.ksh b/usr/src/lib/brand/joyent/zone/statechange.ksh index 30e3e4be13..6d5749fd1e 100644 --- a/usr/src/lib/brand/joyent/zone/statechange.ksh +++ b/usr/src/lib/brand/joyent/zone/statechange.ksh @@ -63,13 +63,42 @@ SNAPSHOT_DIR=root/checkpoints # lock_file() { + local cnt=0 + local prev_pid=0 while true; do if (set -o noclobber; echo "$$" >$LOCKFILE) 2>/dev/null; then trap 'rm -f $LOCKFILE; exit $?' INT TERM EXIT break; - else + fi + + local hold_pid=`cat $LOCKFILE 2>/dev/null` + + # the file might be gone or empty when we run the cat cmd + if [[ -z "$hold_pid" ]]; then sleep 1 + cnt=0 + continue + fi + + # if held by a different process, restart counter + if [[ $prev_pid != $hold_pid ]]; then + prev_pid=$hold_pid + cnt=0 fi + + [[ $cnt == 20 || $cnt == 40 ]] && \ + logger -p daemon.err "dlmgmtd lock file $LOCKFILE" \ + "held by pid $hold_pid for $cnt seconds" + + if [[ $cnt == 60 ]]; then + logger -p daemon.err "breaking dlmgmtd lock" \ + "held by pid $hold_pid after $cnt seconds" + unlock_file + continue + fi + + sleep 1 + let cnt=$cnt+1 done } |
