diff options
| author | Jerry Jelinek <jerry.jelinek@joyent.com> | 2013-10-21 15:07:33 +0000 |
|---|---|---|
| committer | Jerry Jelinek <jerry.jelinek@joyent.com> | 2013-10-21 15:07:33 +0000 |
| commit | 42563ae1864088afc97ac8b51de017195c9231dc (patch) | |
| tree | 45249c510579fe34111cdaa2e1f4b29700c3494a | |
| parent | 2bcf8f7d85a23c46ca9afb42d2f6779285b3974b (diff) | |
| download | illumos-joyent-42563ae1864088afc97ac8b51de017195c9231dc.tar.gz | |
OS-2564 zone boot failed: could not start zoneadmd
| -rw-r--r-- | usr/src/cmd/zoneadmd/zcons.c | 99 | ||||
| -rw-r--r-- | usr/src/cmd/zoneadmd/zoneadmd.c | 7 | ||||
| -rw-r--r-- | usr/src/cmd/zoneadmd/zoneadmd.h | 3 |
3 files changed, 87 insertions, 22 deletions
diff --git a/usr/src/cmd/zoneadmd/zcons.c b/usr/src/cmd/zoneadmd/zcons.c index 84504d86e1..153ccf9b62 100644 --- a/usr/src/cmd/zoneadmd/zcons.c +++ b/usr/src/cmd/zoneadmd/zcons.c @@ -131,7 +131,10 @@ char bad_boot_arg[BOOTARGS_MAX]; */ static int eventstream[2]; - +/* flag used to cope with race creating master zcons devlink */ +static boolean_t master_zcons_failed = B_FALSE; +/* flag to track if we've seen a state change when there is no master zcons */ +static boolean_t state_changed = B_FALSE; int eventstream_init() @@ -412,7 +415,8 @@ devlinks: * * In very rare cases the open returns ENOENT if devfs doesn't have * everything setup yet due to heavy zone startup load. Wait for - * 1 sec. and retry a few times before we fail to boot the zone. + * 1 sec. and retry a few times. Even if we can't setup the zone's + * console, we still go ahead and boot the zone. */ (void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s", zone_name, ZCONS_MASTER_NAME); @@ -425,7 +429,7 @@ devlinks: if (masterfd == -1) { zerror(zlogp, B_TRUE, "ERROR: could not open master side of " "zone console for %s to acquire slave handle", zone_name); - goto error; + master_zcons_failed = B_TRUE; } (void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s", @@ -436,33 +440,35 @@ devlinks: break; (void) sleep(1); } - if (slavefd == -1) { + if (slavefd == -1) zerror(zlogp, B_TRUE, "ERROR: could not open slave side of zone" " console for %s to acquire slave handle", zone_name); - (void) close(masterfd); - goto error; - } + /* * This ioctl can occasionally return ENXIO if devfs doesn't have * everything plumbed up yet due to heavy zone startup load. Wait for * 1 sec. and retry a few times before we fail to boot the zone. */ - for (i = 0; i < ZCONS_RETRY; i++) { - if (ioctl(masterfd, ZC_HOLDSLAVE, (caddr_t)(intptr_t)slavefd) - == 0) { - rv = 0; - break; - } else if (errno != ENXIO) { - break; + if (masterfd != -1 && slavefd != -1) { + for (i = 0; i < ZCONS_RETRY; i++) { + if (ioctl(masterfd, ZC_HOLDSLAVE, + (caddr_t)(intptr_t)slavefd) == 0) { + rv = 0; + break; + } else if (errno != ENXIO) { + break; + } + (void) sleep(1); } - (void) sleep(1); + if (rv != 0) + zerror(zlogp, B_TRUE, "ERROR: error while acquiring " + "slave handle of zone console for %s", zone_name); } - if (rv != 0) - zerror(zlogp, B_TRUE, "ERROR: error while acquiring slave " - "handle of zone console for %s", zone_name); - (void) close(slavefd); - (void) close(masterfd); + if (slavefd != -1) + (void) close(slavefd); + if (masterfd != -1) + (void) close(masterfd); error: if (ddef_hdl) @@ -873,7 +879,6 @@ init_console(zlog_t *zlogp) if (init_console_dev(zlogp) == -1) { zerror(zlogp, B_FALSE, "console setup: device initialization failed"); - return (-1); } if ((serverfd = init_console_sock(zlogp)) == -1) { @@ -885,6 +890,17 @@ init_console(zlog_t *zlogp) } /* + * Maintain a simple flag that tracks if we have seen at least one state + * change. This is currently only used to handle the special case where we are + * running without a console device, which is what normally drives shutdown. + */ +void +zcons_statechanged() +{ + state_changed = B_TRUE; +} + +/* * serve_console() is the master loop for driving console I/O. It is also the * routine which is ultimately responsible for "pulling the plug" on zoneadmd * when it realizes that the daemon should shut down. @@ -902,6 +918,7 @@ serve_console(zlog_t *zlogp) int masterfd; zone_state_t zstate; char conspath[MAXPATHLEN]; + static boolean_t cons_warned = B_FALSE; (void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s", zone_name, ZCONS_MASTER_NAME); @@ -909,6 +926,46 @@ serve_console(zlog_t *zlogp) for (;;) { masterfd = open(conspath, O_RDWR|O_NONBLOCK|O_NOCTTY); if (masterfd == -1) { + if (master_zcons_failed) { + /* + * If we don't have a console and the zone is + * not shutting down, there may have been a + * race/failure with devfs while creating the + * console. In this case we want to leave the + * zone up, even without a console, so + * periodically recheck. + */ + int i; + + /* + * In the normal flow of this loop, we use + * do_console_io to give things a chance to get + * going first. However, in this case we can't + * use that, so we have to wait for at least + * one state change before checking the state. + */ + for (i = 0; i < 60; i++) { + if (state_changed) + break; + (void) sleep(1); + } + + if (i < 60 && zone_get_state(zone_name, + &zstate) == Z_OK && + (zstate == ZONE_STATE_READY || + zstate == ZONE_STATE_RUNNING)) { + if (!cons_warned) { + zerror(zlogp, B_FALSE, + "WARNING: missing zone " + "console for %s", + zone_name); + cons_warned = B_TRUE; + } + (void) sleep(ZCONS_RETRY); + continue; + } + } + zerror(zlogp, B_TRUE, "failed to open console master"); (void) mutex_lock(&lock); goto death; diff --git a/usr/src/cmd/zoneadmd/zoneadmd.c b/usr/src/cmd/zoneadmd/zoneadmd.c index bfa3b937ca..c79f70fa06 100644 --- a/usr/src/cmd/zoneadmd/zoneadmd.c +++ b/usr/src/cmd/zoneadmd/zoneadmd.c @@ -1445,6 +1445,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp, rval = zone_ready(zlogp, Z_MNT_BOOT, zstate, debug); if (rval == 0) eventstream_write(Z_EVT_ZONE_READIED); + zcons_statechanged(); break; case Z_BOOT: case Z_FORCEBOOT: @@ -1455,6 +1456,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp, zstate, debug); } audit_put_record(zlogp, uc, rval, "boot"); + zcons_statechanged(); if (rval != 0) { bringup_failure_recovery = B_TRUE; (void) zone_halt(zlogp, B_FALSE, B_FALSE, @@ -1578,6 +1580,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp, rval = zone_bootup(zlogp, zargp->bootbuf, zstate, debug); audit_put_record(zlogp, uc, rval, "boot"); + zcons_statechanged(); if (rval != 0) { bringup_failure_recovery = B_TRUE; (void) zone_halt(zlogp, B_FALSE, B_TRUE, @@ -1592,6 +1595,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp, if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate, debug)) != 0) break; + zcons_statechanged(); eventstream_write(Z_EVT_ZONE_HALTED); break; case Z_REBOOT: @@ -1639,6 +1643,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp, if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate, debug)) != 0) break; + zcons_statechanged(); if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate, debug)) == 0) eventstream_write(Z_EVT_ZONE_READIED); @@ -1661,6 +1666,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp, debug)) != 0) break; eventstream_write(Z_EVT_ZONE_HALTED); + zcons_statechanged(); break; case Z_REBOOT: (void) strlcpy(boot_args, zargp->bootbuf, @@ -1672,6 +1678,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp, boot_args[0] = '\0'; break; } + zcons_statechanged(); if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate, debug)) != 0) { eventstream_write(Z_EVT_ZONE_BOOTFAILED); diff --git a/usr/src/cmd/zoneadmd/zoneadmd.h b/usr/src/cmd/zoneadmd/zoneadmd.h index 58ec1516fa..230d6603eb 100644 --- a/usr/src/cmd/zoneadmd/zoneadmd.h +++ b/usr/src/cmd/zoneadmd/zoneadmd.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, Joyent Inc. All rights reserved. + * Copyright (c) 2013, Joyent Inc. All rights reserved. */ #ifndef _ZONEADMD_H @@ -152,6 +152,7 @@ extern void resolve_lofs(zlog_t *zlogp, char *path, size_t pathlen); */ extern int init_console(zlog_t *); extern void serve_console(zlog_t *); +extern void zcons_statechanged(); /* * Memory capping thread creation. |
