diff options
| author | Jerry Jelinek <jerry.jelinek@joyent.com> | 2012-03-16 13:28:40 +0000 |
|---|---|---|
| committer | Jerry Jelinek <jerry.jelinek@joyent.com> | 2012-03-16 13:28:40 +0000 |
| commit | 2e9c9a5042bf2e640d2716e2ffd54d2f2460e089 (patch) | |
| tree | 98d6ef398934a97a266877a2d83c00de043c048e /usr/src | |
| parent | 93c4aa5e044dab4275fd2ca94a5f16f8580ba9db (diff) | |
| download | illumos-joyent-2e9c9a5042bf2e640d2716e2ffd54d2f2460e089.tar.gz | |
OS-1019 zone stuck in down state: amon socket in /var/run preventing shutdown
Diffstat (limited to 'usr/src')
| -rw-r--r-- | usr/src/cmd/zoneadmd/vplat.c | 41 | ||||
| -rw-r--r-- | usr/src/cmd/zoneadmd/zoneadmd.c | 6 | ||||
| -rw-r--r-- | usr/src/lib/brand/joyent/zone/poststate.ksh | 7 | ||||
| -rw-r--r-- | usr/src/lib/brand/joyent/zone/prestate.ksh | 7 | ||||
| -rw-r--r-- | usr/src/lib/brand/joyent/zone/statechange.ksh | 76 | ||||
| -rwxr-xr-x | usr/src/lib/brand/kvm/zone/poststate.ksh | 7 | ||||
| -rwxr-xr-x | usr/src/lib/brand/kvm/zone/prestate.ksh | 7 | ||||
| -rwxr-xr-x | usr/src/lib/brand/kvm/zone/statechange.ksh | 76 |
8 files changed, 190 insertions, 37 deletions
diff --git a/usr/src/cmd/zoneadmd/vplat.c b/usr/src/cmd/zoneadmd/vplat.c index f8837ca057..895b1d7dd2 100644 --- a/usr/src/cmd/zoneadmd/vplat.c +++ b/usr/src/cmd/zoneadmd/vplat.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2012, Joyent Inc. All rights reserved. + * Copyright (c) 2012, Joyent Inc. All rights reserved. */ /* @@ -163,6 +163,7 @@ extern int getnetmaskbyaddr(struct in_addr, struct in_addr *); /* from zoneadmd */ extern char query_hook[]; +extern char post_statechg_hook[]; /* * For each "net" resource configured in zonecfg, we track a zone_addr_list_t @@ -590,6 +591,24 @@ root_to_lu(zlog_t *zlogp, char *zroot, size_t zrootlen, boolean_t isresolved) } /* + * Perform brand-specific cleanup if we are unable to unmount a FS. + */ +static void +brand_umount_cleanup(zlog_t *zlogp, char *path) +{ + char cmdbuf[2 * MAXPATHLEN]; + + if (post_statechg_hook[0] == '\0') + return; + + if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook, + ZONE_STATE_DOWN, Z_UNMOUNT, path) > sizeof (cmdbuf)) + return; + + (void) do_subproc(zlogp, cmdbuf, NULL, B_FALSE); +} + +/* * The general strategy for unmounting filesystems is as follows: * * - Remote filesystems may be dead, and attempting to contact them as @@ -731,27 +750,17 @@ unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd) "retrying in 1 second", path); (void) sleep(1); - } else if (fail == 16) { - char cmdbuf[MAXPATHLEN + 21]; - - zerror(zlogp, B_FALSE, - "unable to unmount '%s', " - "trying to kill GZ " - "processes", - path); - (void) snprintf(cmdbuf, - sizeof (cmdbuf), - "/usr/sbin/fuser -ck %s", - path); - (void) system(cmdbuf); - (void) sleep(2); - } else { + } else if (fail > 17) { error++; zerror(zlogp, B_FALSE, "unable to unmount '%s'", path); free_mnttable(mnts, nmnt); goto out; + } else { + /* Try the hook 2 times */ + brand_umount_cleanup(zlogp, + path); } } } diff --git a/usr/src/cmd/zoneadmd/zoneadmd.c b/usr/src/cmd/zoneadmd/zoneadmd.c index 70540f3bda..222ea78522 100644 --- a/usr/src/cmd/zoneadmd/zoneadmd.c +++ b/usr/src/cmd/zoneadmd/zoneadmd.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, Joyent Inc. All rights reserved. + * Copyright (c) 2012, Joyent Inc. All rights reserved. */ /* @@ -117,8 +117,8 @@ static zoneid_t zone_id; static zoneid_t zone_did = 0; dladm_handle_t dld_handle = NULL; -static char pre_statechg_hook[2 * MAXPATHLEN]; -static char post_statechg_hook[2 * MAXPATHLEN]; +char pre_statechg_hook[2 * MAXPATHLEN]; +char post_statechg_hook[2 * MAXPATHLEN]; char query_hook[2 * MAXPATHLEN]; zlog_t logsys; diff --git a/usr/src/lib/brand/joyent/zone/poststate.ksh b/usr/src/lib/brand/joyent/zone/poststate.ksh index f35f4b096a..10538bb207 100644 --- a/usr/src/lib/brand/joyent/zone/poststate.ksh +++ b/usr/src/lib/brand/joyent/zone/poststate.ksh @@ -19,7 +19,7 @@ # # CDDL HEADER END # -# Copyright 2010, 2011 Joyent, Inc. All rights reserved. +# Copyright 2012 Joyent, Inc. All rights reserved. # Use is subject to license terms. # @@ -31,12 +31,11 @@ if [[ -n $_ZONEADMD_brand_debug ]]; then logfile=/var/log/zone_bh.$1 date >>$logfile echo "zone $1 post-state-change $3 $4" >>$logfile - ksh -x /usr/lib/brand/joyent/statechange "post" $1 $2 $3 $4 \ - >>$logfile 2>&1 + ksh -x /usr/lib/brand/joyent/statechange "post" $@ >>$logfile 2>&1 res=$? echo "zone $1 post-state-change result $?" >>$logfile else - /usr/lib/brand/joyent/statechange "post" $1 $2 $3 $4 + /usr/lib/brand/joyent/statechange "post" $@ res=$? fi diff --git a/usr/src/lib/brand/joyent/zone/prestate.ksh b/usr/src/lib/brand/joyent/zone/prestate.ksh index d9acd13005..4cac6baab0 100644 --- a/usr/src/lib/brand/joyent/zone/prestate.ksh +++ b/usr/src/lib/brand/joyent/zone/prestate.ksh @@ -19,7 +19,7 @@ # # CDDL HEADER END # -# Copyright 2010, 2011 Joyent, Inc. All rights reserved. +# Copyright 2012 Joyent, Inc. All rights reserved. # Use is subject to license terms. # @@ -31,12 +31,11 @@ if [[ -n $_ZONEADMD_brand_debug ]]; then logfile=/var/log/zone_bh.$1 date >>$logfile echo "zone $1 pre-state-change $3 $4" >>$logfile - ksh -x /usr/lib/brand/joyent/statechange "pre" $1 $2 $3 $4 \ - >>$logfile 2>&1 + ksh -x /usr/lib/brand/joyent/statechange "pre" $@ >>$logfile 2>&1 res=$? echo "zone $1 pre-state-change result $?" >>$logfile else - /usr/lib/brand/joyent/statechange "pre" $1 $2 $3 $4 + /usr/lib/brand/joyent/statechange "pre" $@ res=$? fi diff --git a/usr/src/lib/brand/joyent/zone/statechange.ksh b/usr/src/lib/brand/joyent/zone/statechange.ksh index 97b041528a..f14610b19e 100644 --- a/usr/src/lib/brand/joyent/zone/statechange.ksh +++ b/usr/src/lib/brand/joyent/zone/statechange.ksh @@ -19,7 +19,7 @@ # # CDDL HEADER END # -# Copyright 2010, 2012 Joyent, Inc. All rights reserved. +# Copyright 2012 Joyent, Inc. All rights reserved. # Use is subject to license terms. # @@ -47,7 +47,13 @@ export PATH # # ready 0 # boot 1 +# forceboot 2 +# reboot 3 # halt 4 +# uninstalling 5 +# mount 6 +# forcemount 7 +# unmount 8 subcommand=$1 ZONENAME=$2 @@ -483,6 +489,70 @@ cleanup_net() done } +kill_gz_sockholder() +{ + echo "searching for GZ process holding socket $1" + logger -p daemon.err "zone $ZONENAME " \ + "searching for GZ process holding socket $1" + + pid=`(cd /proc; + for i in *; + do + pfiles $i 2>/dev/null | egrep -s "AF_UNIX $1"; + [ $? == 0 ] && echo "$i"; + done)` + + [ -z "$pid" ] && return + + echo "killing GZ process $pid holding socket $1" + logger -p daemon.err "zone $ZONENAME " \ + "killing GZ process $pid holding socket $1" + + kill -9 $pid +} + +# zonadmd unable to unmount the given path, try to cleanup so unmount can +# succeed. +cleanup_mount() +{ + echo "attempting to cleanup mount $1" + logger -p daemon.err "zone $ZONENAME " \ + "attempting to cleanup mount $1" + + cnt=`fuser -c $1 2>/dev/null | wc -w` + if [ $cnt -gt 0 ]; then + echo "trying to kill GZ processes under $1" + logger -p daemon.err "zone $ZONENAME " \ + "trying to kill GZ processes under $1" + fuser -ck $1 + + # Exit out to give the zoneadmd umount a chance to suceed now. + # Zoneadmd will give us another shot if it still can't umount. + sleep 1 + exit 0 + fi + + # Processes which are injected into a zone and then open a file as a + # socket end-point will show in pfiles with the path relative to the + # zone's root. For example, a zone with its root at /zones/foo/root and + # an open socket as /zones/foo/root/var/run/x will show up in a pfiles + # search as /var/run/x. This is a problem since we have no way to + # narrow down which process to kill. + # + # Because the socket doesn't have enough information for us to tie to + # the specific GZ process, we hardcode to kill things we know will open + # sockets into the zone: + # /var/run/smartdc/metadata.sock + # /var/run/.smartdc-amon.sock + + ZVR=$ZONEPATH/root/var/run + [ -S $ZVR/smartdc/metadata.sock ] && + kill_gz_sockholder /var/run/smartdc/metadata.sock + + [ -S $ZVR/.smartdc-amon.sock ] && + kill_gz_sockholder /var/run/.smartdc-amon.sock +} + # # Main # @@ -502,6 +572,10 @@ fi # We can't set a rctl until we have a process in the zone to grab [[ "$subcommand" == "post" && $cmd == 1 ]] && setup_cpu_baseline +# Zone halt is hung unmounting, try to recover +[[ "$subcommand" == "post" && $state == 6 && $cmd == 8 ]] && \ + cleanup_mount "$6" + if [[ "$subcommand" == "pre" && $cmd == 4 ]]; then cleanup_snapshots cleanup_net diff --git a/usr/src/lib/brand/kvm/zone/poststate.ksh b/usr/src/lib/brand/kvm/zone/poststate.ksh index 3770d3d5bc..10538bb207 100755 --- a/usr/src/lib/brand/kvm/zone/poststate.ksh +++ b/usr/src/lib/brand/kvm/zone/poststate.ksh @@ -19,7 +19,7 @@ # # CDDL HEADER END # -# Copyright 2010, 2011 Joyent, Inc. All rights reserved. +# Copyright 2012 Joyent, Inc. All rights reserved. # Use is subject to license terms. # @@ -31,12 +31,11 @@ if [[ -n $_ZONEADMD_brand_debug ]]; then logfile=/var/log/zone_bh.$1 date >>$logfile echo "zone $1 post-state-change $3 $4" >>$logfile - ksh -x /usr/lib/brand/kvm/statechange "post" $1 $2 $3 $4 \ - >>$logfile 2>&1 + ksh -x /usr/lib/brand/joyent/statechange "post" $@ >>$logfile 2>&1 res=$? echo "zone $1 post-state-change result $?" >>$logfile else - /usr/lib/brand/kvm/statechange "post" $1 $2 $3 $4 + /usr/lib/brand/joyent/statechange "post" $@ res=$? fi diff --git a/usr/src/lib/brand/kvm/zone/prestate.ksh b/usr/src/lib/brand/kvm/zone/prestate.ksh index cc1ca9052f..4cac6baab0 100755 --- a/usr/src/lib/brand/kvm/zone/prestate.ksh +++ b/usr/src/lib/brand/kvm/zone/prestate.ksh @@ -19,7 +19,7 @@ # # CDDL HEADER END # -# Copyright 2010, 2011 Joyent, Inc. All rights reserved. +# Copyright 2012 Joyent, Inc. All rights reserved. # Use is subject to license terms. # @@ -31,12 +31,11 @@ if [[ -n $_ZONEADMD_brand_debug ]]; then logfile=/var/log/zone_bh.$1 date >>$logfile echo "zone $1 pre-state-change $3 $4" >>$logfile - ksh -x /usr/lib/brand/kvm/statechange "pre" $1 $2 $3 $4 \ - >>$logfile 2>&1 + ksh -x /usr/lib/brand/joyent/statechange "pre" $@ >>$logfile 2>&1 res=$? echo "zone $1 pre-state-change result $?" >>$logfile else - /usr/lib/brand/kvm/statechange "pre" $1 $2 $3 $4 + /usr/lib/brand/joyent/statechange "pre" $@ res=$? fi diff --git a/usr/src/lib/brand/kvm/zone/statechange.ksh b/usr/src/lib/brand/kvm/zone/statechange.ksh index efb10be50e..88d382328c 100755 --- a/usr/src/lib/brand/kvm/zone/statechange.ksh +++ b/usr/src/lib/brand/kvm/zone/statechange.ksh @@ -19,7 +19,7 @@ # # CDDL HEADER END # -# Copyright 2010, 2012 Joyent, Inc. All rights reserved. +# Copyright 2012 Joyent, Inc. All rights reserved. # Use is subject to license terms. # @@ -47,7 +47,13 @@ export PATH # # ready 0 # boot 1 +# forceboot 2 +# reboot 3 # halt 4 +# uninstalling 5 +# mount 6 +# forcemount 7 +# unmount 8 subcommand=$1 ZONENAME=$2 @@ -400,6 +406,70 @@ cleanup_net() done } +kill_gz_sockholder() +{ + echo "searching for GZ process holding socket $1" + logger -p daemon.err "zone $ZONENAME " \ + "searching for GZ process holding socket $1" + + pid=`(cd /proc; + for i in *; + do + pfiles $i 2>/dev/null | egrep -s "AF_UNIX $1"; + [ $? == 0 ] && echo "$i"; + done)` + + [ -z "$pid" ] && return + + echo "killing GZ process $pid holding socket $1" + logger -p daemon.err "zone $ZONENAME " \ + "killing GZ process $pid holding socket $1" + + kill -9 $pid +} + +# zonadmd unable to unmount the given path, try to cleanup so unmount can +# succeed. +cleanup_mount() +{ + echo "attempting to cleanup mount $1" + logger -p daemon.err "zone $ZONENAME " \ + "attempting to cleanup mount $1" + + cnt=`fuser -c $1 2>/dev/null | wc -w` + if [ $cnt -gt 0 ]; then + echo "trying to kill GZ processes under $1" + logger -p daemon.err "zone $ZONENAME " \ + "trying to kill GZ processes under $1" + fuser -ck $1 + + # Exit out to give the zoneadmd umount a chance to suceed now. + # Zoneadmd will give us another shot if it still can't umount. + sleep 1 + exit 0 + fi + + # Processes which are injected into a zone and then open a file as a + # socket end-point will show in pfiles with the path relative to the + # zone's root. For example, a zone with its root at /zones/foo/root and + # an open socket as /zones/foo/root/var/run/x will show up in a pfiles + # search as /var/run/x. This is a problem since we have no way to + # narrow down which process to kill. + # + # Because the socket doesn't have enough information for us to tie to + # the specific GZ process, we hardcode to kill things we know will open + # sockets into the zone: + # /var/run/smartdc/metadata.sock + # /var/run/.smartdc-amon.sock + + ZVR=$ZONEPATH/root/var/run + [ -S $ZVR/smartdc/metadata.sock ] && + kill_gz_sockholder /var/run/smartdc/metadata.sock + + [ -S $ZVR/.smartdc-amon.sock ] && + kill_gz_sockholder /var/run/.smartdc-amon.sock +} + # # Main # @@ -416,4 +486,8 @@ echo "statechange $subcommand $cmd" >>/tmp/kvm.log # We can't set a rctl until we have a process in the zone to grab [[ "$subcommand" == "post" && $cmd == 1 ]] && setup_cpu_baseline +# Zone halt is hung unmounting, try to recover +[[ "$subcommand" == "post" && $state == 6 && $cmd == 8 ]] && \ + cleanup_mount "$6" + exit 0 |
