1 files changed, 835 insertions, 106 deletions
diff --git a/usr/src/cmd/zoneadmd/vplat.c b/usr/src/cmd/zoneadmd/vplat.c
index 3b84bb82a0..75eca58961 100644
--- a/usr/src/cmd/zoneadmd/vplat.c
+++ b/usr/src/cmd/zoneadmd/vplat.c
@@ -34,6 +34,33 @@
  * the way up, ready the zone; on the way down, they halt the zone.
  * See the much longer block comment at the beginning of zoneadmd.c
  * for a bigger picture of how the whole program functions.
+ *
+ * This module also has primary responsibility for the layout of "scratch
+ * zones."  These are mounted, but inactive, zones that are used during
+ * operating system upgrade and potentially other administrative action.  The
+ * scratch zone environment is similar to the miniroot environment.  The zone's
+ * actual root is mounted read-write on /a, and the standard paths (/usr,
+ * /sbin, /lib) all lead to read-only copies of the running system's binaries.
+ * This allows the administrative tools to manipulate the zone using "-R /a"
+ * without relying on any binaries in the zone itself.
+ *
+ * If the scratch zone is on an alternate root (Live Upgrade [LU] boot
+ * environment), then we must resolve the lofs mounts used there to uncover
+ * writable (unshared) resources.  Shared resources, though, are always
+ * read-only.  In addition, if the "same" zone with a different root path is
+ * currently running, then "/b" inside the zone points to the running zone's
+ * root.  This allows LU to synchronize configuration files during the upgrade
+ * process.
+ *
+ * To construct this environment, this module creates a tmpfs mount on
+ * $ZONEPATH/lu.  Inside this scratch area, the miniroot-like environment as
+ * described above is constructed on the fly.  The zone is then created using
+ * $ZONEPATH/lu as the root.
+ *
+ * Note that scratch zones are inactive.  The zone's bits are not running and
+ * likely cannot be run correctly until upgrade is done.  Init is not running
+ * there, nor is SMF.  Because of this, the "mounted" state of a scratch zone
+ * is not a part of the usual halt/ready/boot state machine.
  */
 
 #include <sys/param.h>
@@ -141,10 +168,22 @@ static struct symlink_info dev_symlinks[] = {
 /* for routing socket */
 static int rts_seqno = 0;
 
+/* mangled zone name when mounting in an alternate root environment */
+static char kernzone[ZONENAME_MAX];
+
+/* array of cached mount entries for resolve_lofs */
+static struct mnttab *resolve_lofs_mnts, *resolve_lofs_mnt_max;
+
 /* from libsocket, not in any header file */
 extern int getnetmaskbyaddr(struct in_addr, struct in_addr *);
 
 /*
+ * An optimization for build_mnttable: reallocate (and potentially copy the
+ * data) only once every N times through the loop.
+ */
+#define	MNTTAB_HUNK	32
+
+/*
  * Private autofs system call
  */
 extern int _autofssys(int, void *);
@@ -158,6 +197,244 @@ autofs_cleanup(zoneid_t zoneid)
 	return (_autofssys(AUTOFS_UNMOUNTALL, (void *)zoneid));
 }
 
+static void
+free_mnttable(struct mnttab *mnt_array, uint_t nelem)
+{
+	uint_t i;
+
+	if (mnt_array == NULL)
+		return;
+	for (i = 0; i < nelem; i++) {
+		free(mnt_array[i].mnt_mountp);
+		free(mnt_array[i].mnt_fstype);
+		free(mnt_array[i].mnt_special);
+		free(mnt_array[i].mnt_mntopts);
+		assert(mnt_array[i].mnt_time == NULL);
+	}
+	free(mnt_array);
+}
+
+/*
+ * Build the mount table for the zone rooted at "zroot", storing the resulting
+ * array of struct mnttabs in "mnt_arrayp" and the number of elements in the
+ * array in "nelemp".
+ */
+static int
+build_mnttable(zlog_t *zlogp, const char *zroot, size_t zrootlen, FILE *mnttab,
+    struct mnttab **mnt_arrayp, uint_t *nelemp)
+{
+	struct mnttab mnt;
+	struct mnttab *mnts;
+	struct mnttab *mnp;
+	uint_t nmnt;
+
+	rewind(mnttab);
+	resetmnttab(mnttab);
+	nmnt = 0;
+	mnts = NULL;
+	while (getmntent(mnttab, &mnt) == 0) {
+		struct mnttab *tmp_array;
+
+		if (strncmp(mnt.mnt_mountp, zroot, zrootlen) != 0)
+			continue;
+		if (nmnt % MNTTAB_HUNK == 0) {
+			tmp_array = realloc(mnts,
+			    (nmnt + MNTTAB_HUNK) * sizeof (*mnts));
+			if (tmp_array == NULL) {
+				free_mnttable(mnts, nmnt);
+				return (-1);
+			}
+			mnts = tmp_array;
+		}
+		mnp = &mnts[nmnt++];
+
+		/*
+		 * Zero out any fields we're not using.
+		 */
+		(void) memset(mnp, 0, sizeof (*mnp));
+
+		if (mnt.mnt_special != NULL)
+			mnp->mnt_special = strdup(mnt.mnt_special);
+		if (mnt.mnt_mntopts != NULL)
+			mnp->mnt_mntopts = strdup(mnt.mnt_mntopts);
+		mnp->mnt_mountp = strdup(mnt.mnt_mountp);
+		mnp->mnt_fstype = strdup(mnt.mnt_fstype);
+		if ((mnt.mnt_special != NULL && mnp->mnt_special == NULL) ||
+		    (mnt.mnt_mntopts != NULL && mnp->mnt_mntopts == NULL) ||
+		    mnp->mnt_mountp == NULL || mnp->mnt_fstype == NULL) {
+			zerror(zlogp, B_TRUE, "memory allocation failed");
+			free_mnttable(mnts, nmnt);
+			return (-1);
+		}
+	}
+	*mnt_arrayp = mnts;
+	*nelemp = nmnt;
+	return (0);
+}
+
+/*
+ * This is an optimization.  The resolve_lofs function is used quite frequently
+ * to manipulate file paths, and on a machine with a large number of zones,
+ * there will be a huge number of mounted file systems.  Thus, we trigger a
+ * reread of the list of mount points
+ */
+static void
+lofs_discard_mnttab(void)
+{
+	free_mnttable(resolve_lofs_mnts,
+	    resolve_lofs_mnt_max - resolve_lofs_mnts);
+	resolve_lofs_mnts = resolve_lofs_mnt_max = NULL;
+}
+
+static int
+lofs_read_mnttab(zlog_t *zlogp)
+{
+	FILE *mnttab;
+	uint_t nmnts;
+
+	if ((mnttab = fopen(MNTTAB, "r")) == NULL)
+		return (-1);
+	if (build_mnttable(zlogp, "", 0, mnttab, &resolve_lofs_mnts,
+	    &nmnts) == -1) {
+		(void) fclose(mnttab);
+		return (-1);
+	}
+	(void) fclose(mnttab);
+	resolve_lofs_mnt_max = resolve_lofs_mnts + nmnts;
+	return (0);
+}
+
+/*
+ * This function loops over potential loopback mounts and symlinks in a given
+ * path and resolves them all down to an absolute path.
+ */
+static void
+resolve_lofs(zlog_t *zlogp, char *path, size_t pathlen)
+{
+	int len, arlen;
+	const char *altroot;
+	char tmppath[MAXPATHLEN];
+	boolean_t outside_altroot;
+
+	if ((len = resolvepath(path, tmppath, sizeof (tmppath))) == -1)
+		return;
+	tmppath[len] = '\0';
+	(void) strlcpy(path, tmppath, sizeof (tmppath));
+
+	/* This happens once per zoneadmd operation. */
+	if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
+		return;
+
+	altroot = zonecfg_get_root();
+	arlen = strlen(altroot);
+	outside_altroot = B_FALSE;
+	for (;;) {
+		struct mnttab *mnp;
+
+		for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max;
+		    mnp++) {
+			if (mnp->mnt_fstype == NULL ||
+			    mnp->mnt_mountp == NULL ||
+			    mnp->mnt_special == NULL ||
+			    strcmp(mnp->mnt_fstype, MNTTYPE_LOFS) != 0)
+				continue;
+			len = strlen(mnp->mnt_mountp);
+			if (strncmp(mnp->mnt_mountp, path, len) == 0 &&
+			    (path[len] == '/' || path[len] == '\0'))
+				break;
+		}
+		if (mnp >= resolve_lofs_mnt_max)
+			break;
+		if (outside_altroot) {
+			char *cp;
+			int olen = sizeof (MNTOPT_RO) - 1;
+
+			/*
+			 * If we run into a read-only mount outside of the
+			 * alternate root environment, then the user doesn't
+			 * want this path to be made read-write.
+			 */
+			if (mnp->mnt_mntopts != NULL &&
+			    (cp = strstr(mnp->mnt_mntopts, MNTOPT_RO)) !=
+			    NULL &&
+			    (cp == mnp->mnt_mntopts || cp[-1] == ',') &&
+			    (cp[olen] == '\0' || cp[olen] == ',')) {
+				break;
+			}
+		} else if (arlen > 0 &&
+		    (strncmp(mnp->mnt_special, altroot, arlen) != 0 ||
+		    (mnp->mnt_special[arlen] != '\0' &&
+		    mnp->mnt_special[arlen] != '/'))) {
+			outside_altroot = B_TRUE;
+		}
+		/* use temporary buffer because new path might be longer */
+		(void) snprintf(tmppath, sizeof (tmppath), "%s%s",
+		    mnp->mnt_special, path + len);
+		if ((len = resolvepath(tmppath, path, pathlen)) == -1)
+			break;
+		path[len] = '\0';
+	}
+}
+
+/*
+ * For a regular mount, check if a replacement lofs mount is needed because the
+ * referenced device is already mounted somewhere.
+ */
+static int
+check_lofs_needed(zlog_t *zlogp, struct zone_fstab *fsptr)
+{
+	struct mnttab *mnp;
+	zone_fsopt_t *optptr, *onext;
+
+	/* This happens once per zoneadmd operation. */
+	if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
+		return (-1);
+
+	/*
+	 * If this special node isn't already in use, then it's ours alone;
+	 * no need to worry about conflicting mounts.
+	 */
+	for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max;
+	    mnp++) {
+		if (strcmp(mnp->mnt_special, fsptr->zone_fs_special) == 0)
+			break;
+	}
+	if (mnp >= resolve_lofs_mnt_max)
+		return (0);
+
+	/*
+	 * Convert this duplicate mount into a lofs mount.
+	 */
+	(void) strlcpy(fsptr->zone_fs_special, mnp->mnt_mountp,
+	    sizeof (fsptr->zone_fs_special));
+	(void) strlcpy(fsptr->zone_fs_type, MNTTYPE_LOFS,
+	    sizeof (fsptr->zone_fs_type));
+	fsptr->zone_fs_raw[0] = '\0';
+
+	/*
+	 * Discard all but one of the original options and set that to be the
+	 * same set of options used for inherit package directory resources.
+	 */
+	optptr = fsptr->zone_fs_options;
+	if (optptr == NULL) {
+		optptr = malloc(sizeof (*optptr));
+		if (optptr == NULL) {
+			zerror(zlogp, B_TRUE, "cannot mount %s",
+			    fsptr->zone_fs_dir);
+			return (-1);
+		}
+	} else {
+		while ((onext = optptr->zone_fsopt_next) != NULL) {
+			optptr->zone_fsopt_next = onext->zone_fsopt_next;
+			free(onext);
+		}
+	}
+	(void) strcpy(optptr->zone_fsopt_opt, IPD_DEFAULT_OPTS);
+	optptr->zone_fsopt_next = NULL;
+	fsptr->zone_fs_options = optptr;
+	return (0);
+}
+
 static int
 make_one_dir(zlog_t *zlogp, const char *prefix, const char *subdir, mode_t mode)
 {
@@ -237,8 +514,9 @@ make_dev_links(zlog_t *zlogp, char *zonepath)
 			(void) unlink(dev);
 		}
 		if (symlink(dev_symlinks[i].sl_target, dev) != 0) {
-			zerror(zlogp, B_TRUE, "could not setup %s symlink",
-			    dev_symlinks[i].sl_source);
+			zerror(zlogp, B_TRUE, "could not setup %s->%s symlink",
+			    dev_symlinks[i].sl_source,
+			    dev_symlinks[i].sl_target);
 			return (-1);
 		}
 	}
@@ -257,6 +535,8 @@ create_dev_files(zlog_t *zlogp)
 		zerror(zlogp, B_TRUE, "unable to determine zone root");
 		return (-1);
 	}
+	if (zonecfg_in_alt_root())
+		resolve_lofs(zlogp, zonepath, sizeof (zonepath));
 
 	if (make_dev_dirs(zlogp, zonepath) != 0)
 		return (-1);
@@ -344,74 +624,16 @@ is_remote_fstype(const char *fstype, char *const *remote_fstypes)
 	return (B_FALSE);
 }
 
-static void
-free_mnttable(struct mnttab *mnt_array, uint_t nelem)
-{
-	uint_t i;
-
-	if (mnt_array == NULL)
-		return;
-	for (i = 0; i < nelem; i++) {
-		free(mnt_array[i].mnt_mountp);
-		free(mnt_array[i].mnt_fstype);
-		assert(mnt_array[i].mnt_special == NULL);
-		assert(mnt_array[i].mnt_mntopts == NULL);
-		assert(mnt_array[i].mnt_time == NULL);
-	}
-	free(mnt_array);
-}
-
 /*
- * Build the mount table for the zone rooted at "zroot", storing the resulting
- * array of struct mnttabs in "mnt_arrayp" and the number of elements in the
- * array in "nelemp".
+ * This converts a zone root path (normally of the form .../root) to a Live
+ * Upgrade scratch zone root (of the form .../lu).
  */
-static int
-build_mnttable(zlog_t *zlogp, const char *zroot, size_t zrootlen, FILE *mnttab,
-    struct mnttab **mnt_arrayp, uint_t *nelemp)
+static void
+root_to_lu(zlog_t *zlogp, char *zroot, size_t zrootlen, boolean_t isresolved)
 {
-	struct mnttab mnt;
-	struct mnttab *mnts;
-	struct mnttab *mnp;
-	uint_t nmnt;
-
-	rewind(mnttab);
-	resetmnttab(mnttab);
-	nmnt = 0;
-	mnts = NULL;
-	while (getmntent(mnttab, &mnt) == 0) {
-		struct mnttab *tmp_array;
-
-		if (strncmp(mnt.mnt_mountp, zroot, zrootlen) != 0)
-			continue;
-		nmnt++;
-		tmp_array = realloc(mnts, nmnt * sizeof (*mnts));
-		if (tmp_array == NULL) {
-			nmnt--;
-			free_mnttable(mnts, nmnt);
-			return (-1);
-		}
-		mnts = tmp_array;
-		mnp = &mnts[nmnt - 1];
-		/*
-		 * Zero out the fields we won't be using.
-		 */
-		mnp->mnt_special = NULL;
-		mnp->mnt_mntopts = NULL;
-		mnp->mnt_time = NULL;
-
-		mnp->mnt_mountp = strdup(mnt.mnt_mountp);
-		mnp->mnt_fstype = strdup(mnt.mnt_fstype);
-		if (mnp->mnt_mountp == NULL ||
-		    mnp->mnt_fstype == NULL) {
-			zerror(zlogp, B_TRUE, "memory allocation failed");
-			free_mnttable(mnts, nmnt);
-			return (-1);
-		}
-	}
-	*mnt_arrayp = mnts;
-	*nelemp = nmnt;
-	return (0);
+	if (!isresolved && zonecfg_in_alt_root())
+		resolve_lofs(zlogp, zroot, zrootlen);
+	(void) strcpy(strrchr(zroot, '/') + 1, "lu");
 }
 
 /*
@@ -444,9 +666,8 @@ build_mnttable(zlog_t *zlogp, const char *zroot, size_t zrootlen, FILE *mnttab,
  * Zone must be down (ie, no processes or threads active).
  */
 static int
-unmount_filesystems(zlog_t *zlogp)
+unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd)
 {
-	zoneid_t zoneid;
 	int error = 0;
 	FILE *mnttab;
 	struct mnttab *mnts;
@@ -457,15 +678,12 @@ unmount_filesystems(zlog_t *zlogp)
 	boolean_t stuck = B_FALSE;
 	char **remote_fstypes = NULL;
 
-	if ((zoneid = getzoneidbyname(zone_name)) == -1) {
-		zerror(zlogp, B_TRUE, "unable to find zoneid");
-		return (-1);
-	}
-
 	if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) {
 		zerror(zlogp, B_FALSE, "unable to determine zone root");
 		return (-1);
 	}
+	if (unmount_cmd)
+		root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE);
 
 	(void) strcat(zroot, "/");
 	zrootlen = strlen(zroot);
@@ -796,6 +1014,7 @@ static int
 mount_one(zlog_t *zlogp, struct zone_fstab *fsptr, const char *rootpath)
 {
 	char    path[MAXPATHLEN];
+	char	specpath[MAXPATHLEN];
 	char    optstr[MAX_MNTOPT_STR];
 	zone_fsopt_t *optptr;
 
@@ -815,12 +1034,22 @@ mount_one(zlog_t *zlogp, struct zone_fstab *fsptr, const char *rootpath)
 	if (strlen(fsptr->zone_fs_special) == 0) {
 		/*
 		 * A zero-length special is how we distinguish IPDs from
-		 * general-purpose FSs.
+		 * general-purpose FSs.  Make sure it mounts from a place that
+		 * can be seen via the alternate zone's root.
 		 */
+		if (snprintf(specpath, sizeof (specpath), "%s%s",
+		    zonecfg_get_root(), fsptr->zone_fs_dir) >=
+		    sizeof (specpath)) {
+			zerror(zlogp, B_FALSE, "cannot mount %s: path too "
+			    "long in alternate root", fsptr->zone_fs_dir);
+			return (-1);
+		}
+		if (zonecfg_in_alt_root())
+			resolve_lofs(zlogp, specpath, sizeof (specpath));
 		if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS,
-		    fsptr->zone_fs_dir, path) != 0) {
+		    specpath, path) != 0) {
 			zerror(zlogp, B_TRUE, "failed to loopback mount %s",
-			    fsptr->zone_fs_dir);
+			    specpath);
 			return (-1);
 		}
 		return (0);
@@ -843,6 +1072,36 @@ mount_one(zlog_t *zlogp, struct zone_fstab *fsptr, const char *rootpath)
 	}
 
 	/*
+	 * If we're looking at an alternate root environment, then construct
+	 * read-only loopback mounts as necessary.  For all lofs mounts, make
+	 * sure that the 'special' entry points inside the alternate root.  (We
+	 * don't do this with other mounts, as devfs isn't in the alternate
+	 * root, and we need to assume the device environment is roughly the
+	 * same.)
+	 */
+	if (zonecfg_in_alt_root()) {
+		struct stat64 st;
+
+		if (stat64(fsptr->zone_fs_special, &st) != -1 &&
+		    S_ISBLK(st.st_mode) &&
+		    check_lofs_needed(zlogp, fsptr) == -1)
+			return (-1);
+		if (strcmp(fsptr->zone_fs_type, MNTTYPE_LOFS) == 0) {
+			if (snprintf(specpath, sizeof (specpath), "%s%s",
+			    zonecfg_get_root(), fsptr->zone_fs_special) >=
+			    sizeof (specpath)) {
+				zerror(zlogp, B_FALSE, "cannot mount %s: path "
+				    "too long in alternate root",
+				    fsptr->zone_fs_special);
+				return (-1);
+			}
+			resolve_lofs(zlogp, specpath, sizeof (specpath));
+			(void) strlcpy(fsptr->zone_fs_special, specpath,
+			    sizeof (fsptr->zone_fs_special));
+		}
+	}
+
+	/*
 	 * Run 'fsck -m' if there's a device to fsck.
 	 */
 	if (fsptr->zone_fs_raw[0] != '\0' &&
@@ -879,8 +1138,174 @@ free_fs_data(struct zone_fstab *fsarray, uint_t nelem)
 	free(fsarray);
 }
 
+/*
+ * This function constructs the miniroot-like "scratch zone" environment.  If
+ * it returns B_FALSE, then the error has already been logged.
+ */
+static boolean_t
+build_mounted(zlog_t *zlogp, char *rootpath, size_t rootlen,
+    const char *zonepath)
+{
+	char tmp[MAXPATHLEN], fromdir[MAXPATHLEN];
+	char luroot[MAXPATHLEN];
+	const char **cpp;
+	static const char *mkdirs[] = {
+		"/system", "/system/contract", "/proc", "/dev", "/tmp",
+		"/a", NULL
+	};
+	static const char *localdirs[] = {
+		"/etc", "/var", NULL
+	};
+	static const char *loopdirs[] = {
+		"/etc/lib", "/etc/fs", "/lib", "/sbin", "/platform",
+		"/usr", NULL
+	};
+	static const char *tmpdirs[] = {
+		"/tmp", "/var/run", NULL
+	};
+	FILE *fp;
+	struct stat st;
+	char *altstr;
+	uuid_t uuid;
+
+	/*
+	 * Construct a small Solaris environment, including the zone root
+	 * mounted on '/a' inside that environment.
+	 */
+	resolve_lofs(zlogp, rootpath, rootlen);
+	(void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
+	resolve_lofs(zlogp, luroot, sizeof (luroot));
+	(void) snprintf(tmp, sizeof (tmp), "%s/bin", luroot);
+	(void) symlink("./usr/bin", tmp);
+
+	/*
+	 * These are mostly special mount points; not handled here.  (See
+	 * zone_mount_early.)
+	 */
+	for (cpp = mkdirs; *cpp != NULL; cpp++) {
+		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
+		if (mkdir(tmp, 0755) != 0) {
+			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
+			return (B_FALSE);
+		}
+	}
+
+	/*
+	 * These are mounted read-write from the zone undergoing upgrade.  We
+	 * must be careful not to 'leak' things from the main system into the
+	 * zone, and this accomplishes that goal.
+	 */
+	for (cpp = localdirs; *cpp != NULL; cpp++) {
+		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
+		(void) snprintf(fromdir, sizeof (fromdir), "%s%s", rootpath,
+		    *cpp);
+		if (mkdir(tmp, 0755) != 0) {
+			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
+			return (B_FALSE);
+		}
+		if (domount(zlogp, MNTTYPE_LOFS, "", fromdir, tmp) != 0) {
+			zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp,
+			    *cpp);
+			return (B_FALSE);
+		}
+	}
+
+	/*
+	 * These are things mounted read-only from the running system because
+	 * they contain binaries that must match system.
+	 */
+	for (cpp = loopdirs; *cpp != NULL; cpp++) {
+		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
+		if (mkdir(tmp, 0755) != 0) {
+			if (errno != EEXIST) {
+				zerror(zlogp, B_TRUE, "cannot create %s", tmp);
+				return (B_FALSE);
+			}
+			if (lstat(tmp, &st) != 0) {
+				zerror(zlogp, B_TRUE, "cannot stat %s", tmp);
+				return (B_FALSE);
+			}
+			/*
+			 * Ignore any non-directories encountered.  These are
+			 * things that have been converted into symlinks
+			 * (/etc/fs and /etc/lib) and no longer need a lofs
+			 * fixup.
+			 */
+			if (!S_ISDIR(st.st_mode))
+				continue;
+		}
+		if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, *cpp,
+		    tmp) != 0) {
+			zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp,
+			    *cpp);
+			return (B_FALSE);
+		}
+	}
+
+	/*
+	 * These are things with tmpfs mounted inside.
+	 */
+	for (cpp = tmpdirs; *cpp != NULL; cpp++) {
+		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
+		if (mkdir(tmp, 0755) != 0 && errno != EEXIST) {
+			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
+			return (B_FALSE);
+		}
+		if (domount(zlogp, MNTTYPE_TMPFS, "", "swap", tmp) != 0) {
+			zerror(zlogp, B_TRUE, "cannot mount swap on %s", *cpp);
+			return (B_FALSE);
+		}
+	}
+
+	/*
+	 * This is here to support lucopy.  If there's an instance of this same
+	 * zone on the current running system, then we mount its root up as
+	 * read-only inside the scratch zone.
+	 */
+	(void) zonecfg_get_uuid(zone_name, uuid);
+	altstr = strdup(zonecfg_get_root());
+	if (altstr == NULL) {
+		zerror(zlogp, B_TRUE, "out of memory");
+		return (B_FALSE);
+	}
+	zonecfg_set_root("");
+	(void) strlcpy(tmp, zone_name, sizeof (tmp));
+	(void) zonecfg_get_name_by_uuid(uuid, tmp, sizeof (tmp));
+	if (zone_get_rootpath(tmp, fromdir, sizeof (fromdir)) == Z_OK &&
+	    strcmp(fromdir, rootpath) != 0) {
+		(void) snprintf(tmp, sizeof (tmp), "%s/b", luroot);
+		if (mkdir(tmp, 0755) != 0) {
+			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
+			return (B_FALSE);
+		}
+		if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, fromdir,
+		    tmp) != 0) {
+			zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp,
+			    fromdir);
+			return (B_FALSE);
+		}
+	}
+	zonecfg_set_root(altstr);
+	free(altstr);
+
+	if ((fp = zonecfg_open_scratch(luroot, B_TRUE)) == NULL) {
+		zerror(zlogp, B_TRUE, "cannot open zone mapfile");
+		return (B_FALSE);
+	}
+	(void) ftruncate(fileno(fp), 0);
+	if (zonecfg_add_scratch(fp, zone_name, kernzone, "/") == -1) {
+		zerror(zlogp, B_TRUE, "cannot add zone mapfile entry");
+	}
+	zonecfg_close_scratch(fp);
+	(void) snprintf(tmp, sizeof (tmp), "%s/a", luroot);
+	if (domount(zlogp, MNTTYPE_LOFS, "", rootpath, tmp) != 0)
+		return (B_FALSE);
+	(void) strlcpy(rootpath, tmp, rootlen);
+	return (B_TRUE);
+}
+
 static int
-mount_filesystems(zlog_t *zlogp)
+mount_filesystems(zlog_t *zlogp, boolean_t mount_cmd)
 {
 	char	rootpath[MAXPATHLEN];
 	char	zonepath[MAXPATHLEN];
@@ -891,10 +1316,11 @@ mount_filesystems(zlog_t *zlogp)
 	zone_state_t zstate;
 
 	if (zone_get_state(zone_name, &zstate) != Z_OK ||
-	    zstate != ZONE_STATE_READY) {
+	    (zstate != ZONE_STATE_READY && zstate != ZONE_STATE_MOUNTED)) {
 		zerror(zlogp, B_FALSE,
-		    "zone must be in '%s' state to mount file-systems",
-		    zone_state_str(ZONE_STATE_READY));
+		    "zone must be in '%s' or '%s' state to mount file-systems",
+		    zone_state_str(ZONE_STATE_READY),
+		    zone_state_str(ZONE_STATE_MOUNTED));
 		goto bad;
 	}
 
@@ -936,9 +1362,14 @@ mount_filesystems(zlog_t *zlogp)
 	}
 	fs_ptr = tmp_ptr;
 	fsp = &fs_ptr[num_fs - 1];
+	/*
+	 * Note that mount_one will prepend the alternate root to
+	 * zone_fs_special and do the necessary resolution, so all that is
+	 * needed here is to strip the root added by zone_get_zonepath.
+	 */
 	(void) strlcpy(fsp->zone_fs_dir, "/dev", sizeof (fsp->zone_fs_dir));
 	(void) snprintf(fsp->zone_fs_special, sizeof (fsp->zone_fs_special),
-	    "%s/dev", zonepath);
+	    "%s/dev", zonepath + strlen(zonecfg_get_root()));
 	fsp->zone_fs_raw[0] = '\0';
 	(void) strlcpy(fsp->zone_fs_type, MNTTYPE_LOFS,
 	    sizeof (fsp->zone_fs_type));
@@ -1011,8 +1442,28 @@ mount_filesystems(zlog_t *zlogp)
 	zonecfg_fini_handle(handle);
 	handle = NULL;
 
+	/*
+	 * If we're mounting a zone for administration, then we need to set up
+	 * the "/a" environment inside the zone so that the commands that run
+	 * in there have access to both the running system's utilities and the
+	 * to-be-modified zone's files.
+	 */
+	if (mount_cmd &&
+	    !build_mounted(zlogp, rootpath, sizeof (rootpath), zonepath))
+		goto bad;
+
 	qsort(fs_ptr, num_fs, sizeof (*fs_ptr), fs_compare);
 	for (i = 0; i < num_fs; i++) {
+		if (mount_cmd && strcmp(fs_ptr[i].zone_fs_dir, "/dev") == 0) {
+			size_t slen = strlen(rootpath) - 2;
+
+			/* /dev is special and always goes at the top */
+			rootpath[slen] = '\0';
+			if (mount_one(zlogp, &fs_ptr[i], rootpath) != 0)
+				goto bad;
+			rootpath[slen] = '/';
+			continue;
+		}
 		if (mount_one(zlogp, &fs_ptr[i], rootpath) != 0)
 			goto bad;
 	}
@@ -1795,7 +2246,7 @@ devfsadm_call(zlog_t *zlogp, const char *arg)
 	if (status == 0 || status == -1)
 		return (status);
 	zerror(zlogp, B_FALSE, "%s call (%s %s %s) unexpectedly returned %d",
-	    DEVFSADM, DEVFSADM_PATH, arg, zone_name, status);
+		    DEVFSADM, DEVFSADM_PATH, arg, zone_name, status);
 	return (-1);
 }
 
@@ -2062,21 +2513,115 @@ prtmount(const char *fs, void *x) {
 	return (0);
 }
 
-int
-vplat_create(zlog_t *zlogp)
+/*
+ * Look for zones running on the main system that are using this root (or any
+ * subdirectory of it).  Return B_TRUE and print an error if a conflicting zone
+ * is found or if we can't tell.
+ */
+static boolean_t
+duplicate_zone_root(zlog_t *zlogp, const char *rootpath)
 {
-	int rval = -1;
+	zoneid_t *zids = NULL;
+	uint_t nzids = 0;
+	boolean_t retv;
+	int rlen, zlen;
+	char zroot[MAXPATHLEN];
+	char zonename[ZONENAME_MAX];
+
+	for (;;) {
+		nzids += 10;
+		zids = malloc(nzids * sizeof (*zids));
+		if (zids == NULL) {
+			zerror(zlogp, B_TRUE, "unable to allocate memory");
+			return (B_TRUE);
+		}
+		if (zone_list(zids, &nzids) == 0)
+			break;
+		free(zids);
+	}
+	retv = B_FALSE;
+	rlen = strlen(rootpath);
+	while (nzids > 0) {
+		/*
+		 * Ignore errors; they just mean that the zone has disappeared
+		 * while we were busy.
+		 */
+		if (zone_getattr(zids[--nzids], ZONE_ATTR_ROOT, zroot,
+		    sizeof (zroot)) == -1)
+			continue;
+		zlen = strlen(zroot);
+		if (zlen > rlen)
+			zlen = rlen;
+		if (strncmp(rootpath, zroot, zlen) == 0 &&
+		    (zroot[zlen] == '\0' || zroot[zlen] == '/') &&
+		    (rootpath[zlen] == '\0' || rootpath[zlen] == '/')) {
+			if (getzonenamebyid(zids[nzids], zonename,
+			    sizeof (zonename)) == -1)
+				(void) snprintf(zonename, sizeof (zonename),
+				    "id %d", (int)zids[nzids]);
+			zerror(zlogp, B_FALSE,
+			    "zone root %s already in use by zone %s",
+			    rootpath, zonename);
+			retv = B_TRUE;
+			break;
+		}
+	}
+	free(zids);
+	return (retv);
+}
+
+/*
+ * Search for loopback mounts that use this same source node (same device and
+ * inode).  Return B_TRUE if there is one or if we can't tell.
+ */
+static boolean_t
+duplicate_reachable_path(zlog_t *zlogp, const char *rootpath)
+{
+	struct stat64 rst, zst;
+	struct mnttab *mnp;
+
+	if (stat64(rootpath, &rst) == -1) {
+		zerror(zlogp, B_TRUE, "can't stat %s", rootpath);
+		return (B_TRUE);
+	}
+	if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
+		return (B_TRUE);
+	for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; mnp++) {
+		if (mnp->mnt_fstype == NULL ||
+		    strcmp(MNTTYPE_LOFS, mnp->mnt_fstype) != 0)
+			continue;
+		/* We're looking at a loopback mount.  Stat it. */
+		if (mnp->mnt_special != NULL &&
+		    stat64(mnp->mnt_special, &zst) != -1 &&
+		    rst.st_dev == zst.st_dev && rst.st_ino == zst.st_ino) {
+			zerror(zlogp, B_FALSE,
+			    "zone root %s is reachable through %s",
+			    rootpath, mnp->mnt_mountp);
+			return (B_TRUE);
+		}
+	}
+	return (B_FALSE);
+}
+
+zoneid_t
+vplat_create(zlog_t *zlogp, boolean_t mount_cmd)
+{
+	zoneid_t rval = -1;
 	priv_set_t *privs;
 	char rootpath[MAXPATHLEN];
 	char *rctlbuf = NULL;
-	size_t rctlbufsz;
-	zoneid_t zoneid;
+	size_t rctlbufsz = 0;
+	zoneid_t zoneid = -1;
 	int xerr;
+	char *kzone;
+	FILE *fp = NULL;
 
 	if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
 		zerror(zlogp, B_TRUE, "unable to determine zone root");
 		return (-1);
 	}
+	if (zonecfg_in_alt_root())
+		resolve_lofs(zlogp, rootpath, sizeof (rootpath));
 
 	if ((privs = priv_allocset()) == NULL) {
 		zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
@@ -2087,13 +2632,80 @@ vplat_create(zlog_t *zlogp)
 		zerror(zlogp, B_TRUE, "Failed to initialize privileges");
 		goto error;
 	}
-	if (get_rctls(zlogp, &rctlbuf, &rctlbufsz) != 0) {
+	if (!mount_cmd && get_rctls(zlogp, &rctlbuf, &rctlbufsz) != 0) {
 		zerror(zlogp, B_FALSE, "Unable to get list of rctls");
 		goto error;
 	}
 
+	kzone = zone_name;
+
+	/*
+	 * We must do this scan twice.  First, we look for zones running on the
+	 * main system that are using this root (or any subdirectory of it).
+	 * Next, we reduce to the shortest path and search for loopback mounts
+	 * that use this same source node (same device and inode).
+	 */
+	if (duplicate_zone_root(zlogp, rootpath))
+		goto error;
+	if (duplicate_reachable_path(zlogp, rootpath))
+		goto error;
+
+	if (mount_cmd) {
+		root_to_lu(zlogp, rootpath, sizeof (rootpath), B_TRUE);
+
+		/*
+		 * Forge up a special root for this zone.  When a zone is
+		 * mounted, we can't let the zone have its own root because the
+		 * tools that will be used in this "scratch zone" need access
+		 * to both the zone's resources and the running machine's
+		 * executables.
+		 *
+		 * Note that the mkdir here also catches read-only filesystems.
+		 */
+		if (mkdir(rootpath, 0755) != 0 && errno != EEXIST) {
+			zerror(zlogp, B_TRUE, "cannot create %s", rootpath);
+			goto error;
+		}
+		if (domount(zlogp, "tmpfs", "", "swap", rootpath) != 0)
+			goto error;
+	}
+
+	if (zonecfg_in_alt_root()) {
+		/*
+		 * If we are mounting up a zone in an alternate root partition,
+		 * then we have some additional work to do before starting the
+		 * zone.  First, resolve the root path down so that we're not
+		 * fooled by duplicates.  Then forge up an internal name for
+		 * the zone.
+		 */
+		if ((fp = zonecfg_open_scratch("", B_TRUE)) == NULL) {
+			zerror(zlogp, B_TRUE, "cannot open mapfile");
+			goto error;
+		}
+		if (zonecfg_lock_scratch(fp) != 0) {
+			zerror(zlogp, B_TRUE, "cannot lock mapfile");
+			goto error;
+		}
+		if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(),
+		    NULL, 0) == 0) {
+			zerror(zlogp, B_FALSE, "scratch zone already running");
+			goto error;
+		}
+		/* This is the preferred name */
+		(void) snprintf(kernzone, sizeof (kernzone), "SUNWlu-%s",
+		    zone_name);
+		srandom(getpid());
+		while (zonecfg_reverse_scratch(fp, kernzone, NULL, 0, NULL,
+		    0) == 0) {
+			/* This is just an arbitrary name; note "." usage */
+			(void) snprintf(kernzone, sizeof (kernzone),
+			    "SUNWlu.%08lX%08lX", random(), random());
+		}
+		kzone = kernzone;
+	}
+
 	xerr = 0;
-	if ((zoneid = zone_create(zone_name, rootpath, privs, rctlbuf,
+	if ((zoneid = zone_create(kzone, rootpath, privs, rctlbuf,
 	    rctlbufsz, &xerr)) == -1) {
 		if (xerr == ZE_AREMOUNTS) {
 			if (zonecfg_find_mounts(rootpath, NULL, NULL) < 1) {
@@ -2117,42 +2729,147 @@ vplat_create(zlog_t *zlogp)
 		}
 		goto error;
 	}
+
+	if (zonecfg_in_alt_root() &&
+	    zonecfg_add_scratch(fp, zone_name, kernzone,
+	    zonecfg_get_root()) == -1) {
+		zerror(zlogp, B_TRUE, "cannot add mapfile entry");
+		goto error;
+	}
+
 	/*
-	 * The following is a warning, not an error.
+	 * The following is a warning, not an error, and is not performed when
+	 * merely mounting a zone for administrative use.
 	 */
-	if (bind_to_pool(zlogp, zoneid) != 0)
+	if (!mount_cmd && bind_to_pool(zlogp, zoneid) != 0)
 		zerror(zlogp, B_FALSE, "WARNING: unable to bind zone to "
 		    "requested pool; using default pool.");
-	rval = 0;
+	rval = zoneid;
+	zoneid = -1;
+
 error:
+	if (zoneid != -1)
+		(void) zone_destroy(zoneid);
 	if (rctlbuf != NULL)
 		free(rctlbuf);
 	priv_freeset(privs);
+	if (fp != NULL)
+		zonecfg_close_scratch(fp);
+	lofs_discard_mnttab();
 	return (rval);
 }
 
 int
-vplat_bringup(zlog_t *zlogp)
+vplat_bringup(zlog_t *zlogp, boolean_t mount_cmd)
 {
-	if (create_dev_files(zlogp) != 0)
+	if (create_dev_files(zlogp) != 0 ||
+	    mount_filesystems(zlogp, mount_cmd) != 0) {
+		lofs_discard_mnttab();
 		return (-1);
-	if (mount_filesystems(zlogp) != 0)
+	}
+	if (!mount_cmd && (devfsadm_register(zlogp) != 0 ||
+	    configure_network_interfaces(zlogp) != 0)) {
+		lofs_discard_mnttab();
 		return (-1);
-	if (devfsadm_register(zlogp) != 0)
+	}
+	lofs_discard_mnttab();
+	return (0);
+}
+
+static int
+lu_root_teardown(zlog_t *zlogp)
+{
+	char zroot[MAXPATHLEN];
+
+	if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) {
+		zerror(zlogp, B_FALSE, "unable to determine zone root");
 		return (-1);
-	if (configure_network_interfaces(zlogp) != 0)
+	}
+	root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE);
+
+	/*
+	 * At this point, the processes are gone, the filesystems (save the
+	 * root) are unmounted, and the zone is on death row.  But there may
+	 * still be creds floating about in the system that reference the
+	 * zone_t, and which pin down zone_rootvp causing this call to fail
+	 * with EBUSY.  Thus, we try for a little while before just giving up.
+	 * (How I wish this were not true, and umount2 just did the right
+	 * thing, or tmpfs supported MS_FORCE This is a gross hack.)
+	 */
+	if (umount2(zroot, MS_FORCE) != 0) {
+		if (errno == ENOTSUP && umount2(zroot, 0) == 0)
+			goto unmounted;
+		if (errno == EBUSY) {
+			int tries = 10;
+
+			while (--tries >= 0) {
+				(void) sleep(1);
+				if (umount2(zroot, 0) == 0)
+					goto unmounted;
+				if (errno != EBUSY)
+					break;
+			}
+		}
+		zerror(zlogp, B_TRUE, "unable to unmount '%s'", zroot);
 		return (-1);
-	return (0);
+	}
+unmounted:
+
+	/*
+	 * Only zones in an alternate root environment have scratch zone
+	 * entries.
+	 */
+	if (zonecfg_in_alt_root()) {
+		FILE *fp;
+		int retv;
+
+		if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) {
+			zerror(zlogp, B_TRUE, "cannot open mapfile");
+			return (-1);
+		}
+		retv = -1;
+		if (zonecfg_lock_scratch(fp) != 0)
+			zerror(zlogp, B_TRUE, "cannot lock mapfile");
+		else if (zonecfg_delete_scratch(fp, kernzone) != 0)
+			zerror(zlogp, B_TRUE, "cannot delete map entry");
+		else
+			retv = 0;
+		zonecfg_close_scratch(fp);
+		return (retv);
+	} else {
+		return (0);
+	}
 }
 
 int
-vplat_teardown(zlog_t *zlogp)
+vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd)
 {
+	char *kzone;
 	zoneid_t zoneid;
 
-	if ((zoneid = getzoneidbyname(zone_name)) == ZONE_ID_UNDEFINED) {
+	kzone = zone_name;
+	if (zonecfg_in_alt_root()) {
+		FILE *fp;
+
+		if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) {
+			zerror(zlogp, B_TRUE, "unable to open map file");
+			goto error;
+		}
+		if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(),
+		    kernzone, sizeof (kernzone)) != 0) {
+			zerror(zlogp, B_FALSE, "unable to find scratch zone");
+			zonecfg_close_scratch(fp);
+			goto error;
+		}
+		zonecfg_close_scratch(fp);
+		kzone = kernzone;
+	}
+
+	if ((zoneid = getzoneidbyname(kzone)) == ZONE_ID_UNDEFINED) {
 		if (!bringup_failure_recovery)
 			zerror(zlogp, B_TRUE, "unable to get zoneid");
+		if (unmount_cmd)
+			(void) lu_root_teardown(zlogp);
 		goto error;
 	}
 
@@ -2161,21 +2878,22 @@ vplat_teardown(zlog_t *zlogp)
 		goto error;
 	}
 
-	if (devfsadm_unregister(zlogp) != 0)
+	if (!unmount_cmd && devfsadm_unregister(zlogp) != 0)
 		goto error;
 
-	if (unconfigure_network_interfaces(zlogp, zoneid) != 0) {
+	if (!unmount_cmd &&
+	    unconfigure_network_interfaces(zlogp, zoneid) != 0) {
 		zerror(zlogp, B_FALSE,
 		    "unable to unconfigure network interfaces in zone");
 		goto error;
 	}
 
-	if (tcp_abort_connections(zlogp, zoneid) != 0) {
+	if (!unmount_cmd && tcp_abort_connections(zlogp, zoneid) != 0) {
 		zerror(zlogp, B_TRUE, "unable to abort TCP connections");
 		goto error;
 	}
 
-	if (unmount_filesystems(zlogp) != 0) {
+	if (unmount_filesystems(zlogp, zoneid, unmount_cmd) != 0) {
 		zerror(zlogp, B_FALSE,
 		    "unable to unmount file systems in zone");
 		goto error;
@@ -2185,10 +2903,21 @@ vplat_teardown(zlog_t *zlogp)
 		zerror(zlogp, B_TRUE, "unable to destroy zone");
 		goto error;
 	}
-	destroy_console_slave();
 
+	/*
+	 * Special teardown for alternate boot environments: remove the tmpfs
+	 * root for the zone and then remove it from the map file.
+	 */
+	if (unmount_cmd && lu_root_teardown(zlogp) != 0)
+		goto error;
+
+	if (!unmount_cmd)
+		destroy_console_slave();
+
+	lofs_discard_mnttab();
 	return (0);
 
 error:
+	lofs_discard_mnttab();
 	return (-1);
 }