summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2012-03-16 13:28:40 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2012-03-16 13:28:40 +0000
commit2e9c9a5042bf2e640d2716e2ffd54d2f2460e089 (patch)
tree98d6ef398934a97a266877a2d83c00de043c048e /usr/src
parent93c4aa5e044dab4275fd2ca94a5f16f8580ba9db (diff)
downloadillumos-joyent-2e9c9a5042bf2e640d2716e2ffd54d2f2460e089.tar.gz
OS-1019 zone stuck in down state: amon socket in /var/run preventing shutdown
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/zoneadmd/vplat.c41
-rw-r--r--usr/src/cmd/zoneadmd/zoneadmd.c6
-rw-r--r--usr/src/lib/brand/joyent/zone/poststate.ksh7
-rw-r--r--usr/src/lib/brand/joyent/zone/prestate.ksh7
-rw-r--r--usr/src/lib/brand/joyent/zone/statechange.ksh76
-rwxr-xr-xusr/src/lib/brand/kvm/zone/poststate.ksh7
-rwxr-xr-xusr/src/lib/brand/kvm/zone/prestate.ksh7
-rwxr-xr-xusr/src/lib/brand/kvm/zone/statechange.ksh76
8 files changed, 190 insertions, 37 deletions
diff --git a/usr/src/cmd/zoneadmd/vplat.c b/usr/src/cmd/zoneadmd/vplat.c
index f8837ca057..895b1d7dd2 100644
--- a/usr/src/cmd/zoneadmd/vplat.c
+++ b/usr/src/cmd/zoneadmd/vplat.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2012, Joyent Inc. All rights reserved.
+ * Copyright (c) 2012, Joyent Inc. All rights reserved.
*/
/*
@@ -163,6 +163,7 @@ extern int getnetmaskbyaddr(struct in_addr, struct in_addr *);
/* from zoneadmd */
extern char query_hook[];
+extern char post_statechg_hook[];
/*
* For each "net" resource configured in zonecfg, we track a zone_addr_list_t
@@ -590,6 +591,24 @@ root_to_lu(zlog_t *zlogp, char *zroot, size_t zrootlen, boolean_t isresolved)
}
/*
+ * Perform brand-specific cleanup if we are unable to unmount a FS.
+ */
+static void
+brand_umount_cleanup(zlog_t *zlogp, char *path)
+{
+ char cmdbuf[2 * MAXPATHLEN];
+
+ if (post_statechg_hook[0] == '\0')
+ return;
+
+ if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook,
+ ZONE_STATE_DOWN, Z_UNMOUNT, path) > sizeof (cmdbuf))
+ return;
+
+ (void) do_subproc(zlogp, cmdbuf, NULL, B_FALSE);
+}
+
+/*
* The general strategy for unmounting filesystems is as follows:
*
* - Remote filesystems may be dead, and attempting to contact them as
@@ -731,27 +750,17 @@ unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd)
"retrying in 1 second",
path);
(void) sleep(1);
- } else if (fail == 16) {
- char cmdbuf[MAXPATHLEN + 21];
-
- zerror(zlogp, B_FALSE,
- "unable to unmount '%s', "
- "trying to kill GZ "
- "processes",
- path);
- (void) snprintf(cmdbuf,
- sizeof (cmdbuf),
- "/usr/sbin/fuser -ck %s",
- path);
- (void) system(cmdbuf);
- (void) sleep(2);
- } else {
+ } else if (fail > 17) {
error++;
zerror(zlogp, B_FALSE,
"unable to unmount '%s'",
path);
free_mnttable(mnts, nmnt);
goto out;
+ } else {
+ /* Try the hook 2 times */
+ brand_umount_cleanup(zlogp,
+ path);
}
}
}
diff --git a/usr/src/cmd/zoneadmd/zoneadmd.c b/usr/src/cmd/zoneadmd/zoneadmd.c
index 70540f3bda..222ea78522 100644
--- a/usr/src/cmd/zoneadmd/zoneadmd.c
+++ b/usr/src/cmd/zoneadmd/zoneadmd.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, Joyent Inc. All rights reserved.
+ * Copyright (c) 2012, Joyent Inc. All rights reserved.
*/
/*
@@ -117,8 +117,8 @@ static zoneid_t zone_id;
static zoneid_t zone_did = 0;
dladm_handle_t dld_handle = NULL;
-static char pre_statechg_hook[2 * MAXPATHLEN];
-static char post_statechg_hook[2 * MAXPATHLEN];
+char pre_statechg_hook[2 * MAXPATHLEN];
+char post_statechg_hook[2 * MAXPATHLEN];
char query_hook[2 * MAXPATHLEN];
zlog_t logsys;
diff --git a/usr/src/lib/brand/joyent/zone/poststate.ksh b/usr/src/lib/brand/joyent/zone/poststate.ksh
index f35f4b096a..10538bb207 100644
--- a/usr/src/lib/brand/joyent/zone/poststate.ksh
+++ b/usr/src/lib/brand/joyent/zone/poststate.ksh
@@ -19,7 +19,7 @@
#
# CDDL HEADER END
#
-# Copyright 2010, 2011 Joyent, Inc. All rights reserved.
+# Copyright 2012 Joyent, Inc. All rights reserved.
# Use is subject to license terms.
#
@@ -31,12 +31,11 @@ if [[ -n $_ZONEADMD_brand_debug ]]; then
logfile=/var/log/zone_bh.$1
date >>$logfile
echo "zone $1 post-state-change $3 $4" >>$logfile
- ksh -x /usr/lib/brand/joyent/statechange "post" $1 $2 $3 $4 \
- >>$logfile 2>&1
+ ksh -x /usr/lib/brand/joyent/statechange "post" $@ >>$logfile 2>&1
res=$?
echo "zone $1 post-state-change result $?" >>$logfile
else
- /usr/lib/brand/joyent/statechange "post" $1 $2 $3 $4
+ /usr/lib/brand/joyent/statechange "post" $@
res=$?
fi
diff --git a/usr/src/lib/brand/joyent/zone/prestate.ksh b/usr/src/lib/brand/joyent/zone/prestate.ksh
index d9acd13005..4cac6baab0 100644
--- a/usr/src/lib/brand/joyent/zone/prestate.ksh
+++ b/usr/src/lib/brand/joyent/zone/prestate.ksh
@@ -19,7 +19,7 @@
#
# CDDL HEADER END
#
-# Copyright 2010, 2011 Joyent, Inc. All rights reserved.
+# Copyright 2012 Joyent, Inc. All rights reserved.
# Use is subject to license terms.
#
@@ -31,12 +31,11 @@ if [[ -n $_ZONEADMD_brand_debug ]]; then
logfile=/var/log/zone_bh.$1
date >>$logfile
echo "zone $1 pre-state-change $3 $4" >>$logfile
- ksh -x /usr/lib/brand/joyent/statechange "pre" $1 $2 $3 $4 \
- >>$logfile 2>&1
+ ksh -x /usr/lib/brand/joyent/statechange "pre" $@ >>$logfile 2>&1
res=$?
echo "zone $1 pre-state-change result $?" >>$logfile
else
- /usr/lib/brand/joyent/statechange "pre" $1 $2 $3 $4
+ /usr/lib/brand/joyent/statechange "pre" $@
res=$?
fi
diff --git a/usr/src/lib/brand/joyent/zone/statechange.ksh b/usr/src/lib/brand/joyent/zone/statechange.ksh
index 97b041528a..f14610b19e 100644
--- a/usr/src/lib/brand/joyent/zone/statechange.ksh
+++ b/usr/src/lib/brand/joyent/zone/statechange.ksh
@@ -19,7 +19,7 @@
#
# CDDL HEADER END
#
-# Copyright 2010, 2012 Joyent, Inc. All rights reserved.
+# Copyright 2012 Joyent, Inc. All rights reserved.
# Use is subject to license terms.
#
@@ -47,7 +47,13 @@ export PATH
#
# ready 0
# boot 1
+# forceboot 2
+# reboot 3
# halt 4
+# uninstalling 5
+# mount 6
+# forcemount 7
+# unmount 8
subcommand=$1
ZONENAME=$2
@@ -483,6 +489,70 @@ cleanup_net()
done
}
+kill_gz_sockholder()
+{
+ echo "searching for GZ process holding socket $1"
+ logger -p daemon.err "zone $ZONENAME " \
+ "searching for GZ process holding socket $1"
+
+ pid=`(cd /proc;
+ for i in *;
+ do
+ pfiles $i 2>/dev/null | egrep -s "AF_UNIX $1";
+ [ $? == 0 ] && echo "$i";
+ done)`
+
+ [ -z "$pid" ] && return
+
+ echo "killing GZ process $pid holding socket $1"
+ logger -p daemon.err "zone $ZONENAME " \
+ "killing GZ process $pid holding socket $1"
+
+ kill -9 $pid
+}
+
+# zonadmd unable to unmount the given path, try to cleanup so unmount can
+# succeed.
+cleanup_mount()
+{
+ echo "attempting to cleanup mount $1"
+ logger -p daemon.err "zone $ZONENAME " \
+ "attempting to cleanup mount $1"
+
+ cnt=`fuser -c $1 2>/dev/null | wc -w`
+ if [ $cnt -gt 0 ]; then
+ echo "trying to kill GZ processes under $1"
+ logger -p daemon.err "zone $ZONENAME " \
+ "trying to kill GZ processes under $1"
+ fuser -ck $1
+
+ # Exit out to give the zoneadmd umount a chance to suceed now.
+ # Zoneadmd will give us another shot if it still can't umount.
+ sleep 1
+ exit 0
+ fi
+
+ # Processes which are injected into a zone and then open a file as a
+ # socket end-point will show in pfiles with the path relative to the
+ # zone's root. For example, a zone with its root at /zones/foo/root and
+ # an open socket as /zones/foo/root/var/run/x will show up in a pfiles
+ # search as /var/run/x. This is a problem since we have no way to
+ # narrow down which process to kill.
+ #
+ # Because the socket doesn't have enough information for us to tie to
+ # the specific GZ process, we hardcode to kill things we know will open
+ # sockets into the zone:
+ # /var/run/smartdc/metadata.sock
+ # /var/run/.smartdc-amon.sock
+
+ ZVR=$ZONEPATH/root/var/run
+ [ -S $ZVR/smartdc/metadata.sock ] &&
+ kill_gz_sockholder /var/run/smartdc/metadata.sock
+
+ [ -S $ZVR/.smartdc-amon.sock ] &&
+ kill_gz_sockholder /var/run/.smartdc-amon.sock
+}
+
#
# Main
#
@@ -502,6 +572,10 @@ fi
# We can't set a rctl until we have a process in the zone to grab
[[ "$subcommand" == "post" && $cmd == 1 ]] && setup_cpu_baseline
+# Zone halt is hung unmounting, try to recover
+[[ "$subcommand" == "post" && $state == 6 && $cmd == 8 ]] && \
+ cleanup_mount "$6"
+
if [[ "$subcommand" == "pre" && $cmd == 4 ]]; then
cleanup_snapshots
cleanup_net
diff --git a/usr/src/lib/brand/kvm/zone/poststate.ksh b/usr/src/lib/brand/kvm/zone/poststate.ksh
index 3770d3d5bc..10538bb207 100755
--- a/usr/src/lib/brand/kvm/zone/poststate.ksh
+++ b/usr/src/lib/brand/kvm/zone/poststate.ksh
@@ -19,7 +19,7 @@
#
# CDDL HEADER END
#
-# Copyright 2010, 2011 Joyent, Inc. All rights reserved.
+# Copyright 2012 Joyent, Inc. All rights reserved.
# Use is subject to license terms.
#
@@ -31,12 +31,11 @@ if [[ -n $_ZONEADMD_brand_debug ]]; then
logfile=/var/log/zone_bh.$1
date >>$logfile
echo "zone $1 post-state-change $3 $4" >>$logfile
- ksh -x /usr/lib/brand/kvm/statechange "post" $1 $2 $3 $4 \
- >>$logfile 2>&1
+ ksh -x /usr/lib/brand/joyent/statechange "post" $@ >>$logfile 2>&1
res=$?
echo "zone $1 post-state-change result $?" >>$logfile
else
- /usr/lib/brand/kvm/statechange "post" $1 $2 $3 $4
+ /usr/lib/brand/joyent/statechange "post" $@
res=$?
fi
diff --git a/usr/src/lib/brand/kvm/zone/prestate.ksh b/usr/src/lib/brand/kvm/zone/prestate.ksh
index cc1ca9052f..4cac6baab0 100755
--- a/usr/src/lib/brand/kvm/zone/prestate.ksh
+++ b/usr/src/lib/brand/kvm/zone/prestate.ksh
@@ -19,7 +19,7 @@
#
# CDDL HEADER END
#
-# Copyright 2010, 2011 Joyent, Inc. All rights reserved.
+# Copyright 2012 Joyent, Inc. All rights reserved.
# Use is subject to license terms.
#
@@ -31,12 +31,11 @@ if [[ -n $_ZONEADMD_brand_debug ]]; then
logfile=/var/log/zone_bh.$1
date >>$logfile
echo "zone $1 pre-state-change $3 $4" >>$logfile
- ksh -x /usr/lib/brand/kvm/statechange "pre" $1 $2 $3 $4 \
- >>$logfile 2>&1
+ ksh -x /usr/lib/brand/joyent/statechange "pre" $@ >>$logfile 2>&1
res=$?
echo "zone $1 pre-state-change result $?" >>$logfile
else
- /usr/lib/brand/kvm/statechange "pre" $1 $2 $3 $4
+ /usr/lib/brand/joyent/statechange "pre" $@
res=$?
fi
diff --git a/usr/src/lib/brand/kvm/zone/statechange.ksh b/usr/src/lib/brand/kvm/zone/statechange.ksh
index efb10be50e..88d382328c 100755
--- a/usr/src/lib/brand/kvm/zone/statechange.ksh
+++ b/usr/src/lib/brand/kvm/zone/statechange.ksh
@@ -19,7 +19,7 @@
#
# CDDL HEADER END
#
-# Copyright 2010, 2012 Joyent, Inc. All rights reserved.
+# Copyright 2012 Joyent, Inc. All rights reserved.
# Use is subject to license terms.
#
@@ -47,7 +47,13 @@ export PATH
#
# ready 0
# boot 1
+# forceboot 2
+# reboot 3
# halt 4
+# uninstalling 5
+# mount 6
+# forcemount 7
+# unmount 8
subcommand=$1
ZONENAME=$2
@@ -400,6 +406,70 @@ cleanup_net()
done
}
+kill_gz_sockholder()
+{
+ echo "searching for GZ process holding socket $1"
+ logger -p daemon.err "zone $ZONENAME " \
+ "searching for GZ process holding socket $1"
+
+ pid=`(cd /proc;
+ for i in *;
+ do
+ pfiles $i 2>/dev/null | egrep -s "AF_UNIX $1";
+ [ $? == 0 ] && echo "$i";
+ done)`
+
+ [ -z "$pid" ] && return
+
+ echo "killing GZ process $pid holding socket $1"
+ logger -p daemon.err "zone $ZONENAME " \
+ "killing GZ process $pid holding socket $1"
+
+ kill -9 $pid
+}
+
+# zonadmd unable to unmount the given path, try to cleanup so unmount can
+# succeed.
+cleanup_mount()
+{
+ echo "attempting to cleanup mount $1"
+ logger -p daemon.err "zone $ZONENAME " \
+ "attempting to cleanup mount $1"
+
+ cnt=`fuser -c $1 2>/dev/null | wc -w`
+ if [ $cnt -gt 0 ]; then
+ echo "trying to kill GZ processes under $1"
+ logger -p daemon.err "zone $ZONENAME " \
+ "trying to kill GZ processes under $1"
+ fuser -ck $1
+
+ # Exit out to give the zoneadmd umount a chance to suceed now.
+ # Zoneadmd will give us another shot if it still can't umount.
+ sleep 1
+ exit 0
+ fi
+
+ # Processes which are injected into a zone and then open a file as a
+ # socket end-point will show in pfiles with the path relative to the
+ # zone's root. For example, a zone with its root at /zones/foo/root and
+ # an open socket as /zones/foo/root/var/run/x will show up in a pfiles
+ # search as /var/run/x. This is a problem since we have no way to
+ # narrow down which process to kill.
+ #
+ # Because the socket doesn't have enough information for us to tie to
+ # the specific GZ process, we hardcode to kill things we know will open
+ # sockets into the zone:
+ # /var/run/smartdc/metadata.sock
+ # /var/run/.smartdc-amon.sock
+
+ ZVR=$ZONEPATH/root/var/run
+ [ -S $ZVR/smartdc/metadata.sock ] &&
+ kill_gz_sockholder /var/run/smartdc/metadata.sock
+
+ [ -S $ZVR/.smartdc-amon.sock ] &&
+ kill_gz_sockholder /var/run/.smartdc-amon.sock
+}
+
#
# Main
#
@@ -416,4 +486,8 @@ echo "statechange $subcommand $cmd" >>/tmp/kvm.log
# We can't set a rctl until we have a process in the zone to grab
[[ "$subcommand" == "post" && $cmd == 1 ]] && setup_cpu_baseline
+# Zone halt is hung unmounting, try to recover
+[[ "$subcommand" == "post" && $state == 6 && $cmd == 8 ]] && \
+ cleanup_mount "$6"
+
exit 0