summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2013-10-21 15:07:33 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2013-10-21 15:07:33 +0000
commit42563ae1864088afc97ac8b51de017195c9231dc (patch)
tree45249c510579fe34111cdaa2e1f4b29700c3494a
parent2bcf8f7d85a23c46ca9afb42d2f6779285b3974b (diff)
downloadillumos-joyent-42563ae1864088afc97ac8b51de017195c9231dc.tar.gz
OS-2564 zone boot failed: could not start zoneadmd
-rw-r--r--usr/src/cmd/zoneadmd/zcons.c99
-rw-r--r--usr/src/cmd/zoneadmd/zoneadmd.c7
-rw-r--r--usr/src/cmd/zoneadmd/zoneadmd.h3
3 files changed, 87 insertions, 22 deletions
diff --git a/usr/src/cmd/zoneadmd/zcons.c b/usr/src/cmd/zoneadmd/zcons.c
index 84504d86e1..153ccf9b62 100644
--- a/usr/src/cmd/zoneadmd/zcons.c
+++ b/usr/src/cmd/zoneadmd/zcons.c
@@ -131,7 +131,10 @@ char bad_boot_arg[BOOTARGS_MAX];
*/
static int eventstream[2];
-
+/* flag used to cope with race creating master zcons devlink */
+static boolean_t master_zcons_failed = B_FALSE;
+/* flag to track if we've seen a state change when there is no master zcons */
+static boolean_t state_changed = B_FALSE;
int
eventstream_init()
@@ -412,7 +415,8 @@ devlinks:
*
* In very rare cases the open returns ENOENT if devfs doesn't have
* everything setup yet due to heavy zone startup load. Wait for
- * 1 sec. and retry a few times before we fail to boot the zone.
+ * 1 sec. and retry a few times. Even if we can't setup the zone's
+ * console, we still go ahead and boot the zone.
*/
(void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s",
zone_name, ZCONS_MASTER_NAME);
@@ -425,7 +429,7 @@ devlinks:
if (masterfd == -1) {
zerror(zlogp, B_TRUE, "ERROR: could not open master side of "
"zone console for %s to acquire slave handle", zone_name);
- goto error;
+ master_zcons_failed = B_TRUE;
}
(void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s",
@@ -436,33 +440,35 @@ devlinks:
break;
(void) sleep(1);
}
- if (slavefd == -1) {
+ if (slavefd == -1)
zerror(zlogp, B_TRUE, "ERROR: could not open slave side of zone"
" console for %s to acquire slave handle", zone_name);
- (void) close(masterfd);
- goto error;
- }
+
/*
* This ioctl can occasionally return ENXIO if devfs doesn't have
* everything plumbed up yet due to heavy zone startup load. Wait for
* 1 sec. and retry a few times before we fail to boot the zone.
*/
- for (i = 0; i < ZCONS_RETRY; i++) {
- if (ioctl(masterfd, ZC_HOLDSLAVE, (caddr_t)(intptr_t)slavefd)
- == 0) {
- rv = 0;
- break;
- } else if (errno != ENXIO) {
- break;
+ if (masterfd != -1 && slavefd != -1) {
+ for (i = 0; i < ZCONS_RETRY; i++) {
+ if (ioctl(masterfd, ZC_HOLDSLAVE,
+ (caddr_t)(intptr_t)slavefd) == 0) {
+ rv = 0;
+ break;
+ } else if (errno != ENXIO) {
+ break;
+ }
+ (void) sleep(1);
}
- (void) sleep(1);
+ if (rv != 0)
+ zerror(zlogp, B_TRUE, "ERROR: error while acquiring "
+ "slave handle of zone console for %s", zone_name);
}
- if (rv != 0)
- zerror(zlogp, B_TRUE, "ERROR: error while acquiring slave "
- "handle of zone console for %s", zone_name);
- (void) close(slavefd);
- (void) close(masterfd);
+ if (slavefd != -1)
+ (void) close(slavefd);
+ if (masterfd != -1)
+ (void) close(masterfd);
error:
if (ddef_hdl)
@@ -873,7 +879,6 @@ init_console(zlog_t *zlogp)
if (init_console_dev(zlogp) == -1) {
zerror(zlogp, B_FALSE,
"console setup: device initialization failed");
- return (-1);
}
if ((serverfd = init_console_sock(zlogp)) == -1) {
@@ -885,6 +890,17 @@ init_console(zlog_t *zlogp)
}
/*
+ * Maintain a simple flag that tracks if we have seen at least one state
+ * change. This is currently only used to handle the special case where we are
+ * running without a console device, which is what normally drives shutdown.
+ */
+void
+zcons_statechanged()
+{
+ state_changed = B_TRUE;
+}
+
+/*
* serve_console() is the master loop for driving console I/O. It is also the
* routine which is ultimately responsible for "pulling the plug" on zoneadmd
* when it realizes that the daemon should shut down.
@@ -902,6 +918,7 @@ serve_console(zlog_t *zlogp)
int masterfd;
zone_state_t zstate;
char conspath[MAXPATHLEN];
+ static boolean_t cons_warned = B_FALSE;
(void) snprintf(conspath, sizeof (conspath),
"/dev/zcons/%s/%s", zone_name, ZCONS_MASTER_NAME);
@@ -909,6 +926,46 @@ serve_console(zlog_t *zlogp)
for (;;) {
masterfd = open(conspath, O_RDWR|O_NONBLOCK|O_NOCTTY);
if (masterfd == -1) {
+ if (master_zcons_failed) {
+ /*
+ * If we don't have a console and the zone is
+ * not shutting down, there may have been a
+ * race/failure with devfs while creating the
+ * console. In this case we want to leave the
+ * zone up, even without a console, so
+ * periodically recheck.
+ */
+ int i;
+
+ /*
+ * In the normal flow of this loop, we use
+ * do_console_io to give things a chance to get
+ * going first. However, in this case we can't
+ * use that, so we have to wait for at least
+ * one state change before checking the state.
+ */
+ for (i = 0; i < 60; i++) {
+ if (state_changed)
+ break;
+ (void) sleep(1);
+ }
+
+ if (i < 60 && zone_get_state(zone_name,
+ &zstate) == Z_OK &&
+ (zstate == ZONE_STATE_READY ||
+ zstate == ZONE_STATE_RUNNING)) {
+ if (!cons_warned) {
+ zerror(zlogp, B_FALSE,
+ "WARNING: missing zone "
+ "console for %s",
+ zone_name);
+ cons_warned = B_TRUE;
+ }
+ (void) sleep(ZCONS_RETRY);
+ continue;
+ }
+ }
+
zerror(zlogp, B_TRUE, "failed to open console master");
(void) mutex_lock(&lock);
goto death;
diff --git a/usr/src/cmd/zoneadmd/zoneadmd.c b/usr/src/cmd/zoneadmd/zoneadmd.c
index bfa3b937ca..c79f70fa06 100644
--- a/usr/src/cmd/zoneadmd/zoneadmd.c
+++ b/usr/src/cmd/zoneadmd/zoneadmd.c
@@ -1445,6 +1445,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
rval = zone_ready(zlogp, Z_MNT_BOOT, zstate, debug);
if (rval == 0)
eventstream_write(Z_EVT_ZONE_READIED);
+ zcons_statechanged();
break;
case Z_BOOT:
case Z_FORCEBOOT:
@@ -1455,6 +1456,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
zstate, debug);
}
audit_put_record(zlogp, uc, rval, "boot");
+ zcons_statechanged();
if (rval != 0) {
bringup_failure_recovery = B_TRUE;
(void) zone_halt(zlogp, B_FALSE, B_FALSE,
@@ -1578,6 +1580,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
rval = zone_bootup(zlogp, zargp->bootbuf, zstate,
debug);
audit_put_record(zlogp, uc, rval, "boot");
+ zcons_statechanged();
if (rval != 0) {
bringup_failure_recovery = B_TRUE;
(void) zone_halt(zlogp, B_FALSE, B_TRUE,
@@ -1592,6 +1595,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate,
debug)) != 0)
break;
+ zcons_statechanged();
eventstream_write(Z_EVT_ZONE_HALTED);
break;
case Z_REBOOT:
@@ -1639,6 +1643,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate,
debug)) != 0)
break;
+ zcons_statechanged();
if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
debug)) == 0)
eventstream_write(Z_EVT_ZONE_READIED);
@@ -1661,6 +1666,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
debug)) != 0)
break;
eventstream_write(Z_EVT_ZONE_HALTED);
+ zcons_statechanged();
break;
case Z_REBOOT:
(void) strlcpy(boot_args, zargp->bootbuf,
@@ -1672,6 +1678,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
boot_args[0] = '\0';
break;
}
+ zcons_statechanged();
if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
debug)) != 0) {
eventstream_write(Z_EVT_ZONE_BOOTFAILED);
diff --git a/usr/src/cmd/zoneadmd/zoneadmd.h b/usr/src/cmd/zoneadmd/zoneadmd.h
index 58ec1516fa..230d6603eb 100644
--- a/usr/src/cmd/zoneadmd/zoneadmd.h
+++ b/usr/src/cmd/zoneadmd/zoneadmd.h
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, Joyent Inc. All rights reserved.
+ * Copyright (c) 2013, Joyent Inc. All rights reserved.
*/
#ifndef _ZONEADMD_H
@@ -152,6 +152,7 @@ extern void resolve_lofs(zlog_t *zlogp, char *path, size_t pathlen);
*/
extern int init_console(zlog_t *);
extern void serve_console(zlog_t *);
+extern void zcons_statechanged();
/*
* Memory capping thread creation.