summaryrefslogtreecommitdiff
path: root/usr/src/cmd/zoneadmd/vplat.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/cmd/zoneadmd/vplat.c')
-rw-r--r--usr/src/cmd/zoneadmd/vplat.c941
1 files changed, 835 insertions, 106 deletions
diff --git a/usr/src/cmd/zoneadmd/vplat.c b/usr/src/cmd/zoneadmd/vplat.c
index 3b84bb82a0..75eca58961 100644
--- a/usr/src/cmd/zoneadmd/vplat.c
+++ b/usr/src/cmd/zoneadmd/vplat.c
@@ -34,6 +34,33 @@
* the way up, ready the zone; on the way down, they halt the zone.
* See the much longer block comment at the beginning of zoneadmd.c
* for a bigger picture of how the whole program functions.
+ *
+ * This module also has primary responsibility for the layout of "scratch
+ * zones." These are mounted, but inactive, zones that are used during
+ * operating system upgrade and potentially other administrative action. The
+ * scratch zone environment is similar to the miniroot environment. The zone's
+ * actual root is mounted read-write on /a, and the standard paths (/usr,
+ * /sbin, /lib) all lead to read-only copies of the running system's binaries.
+ * This allows the administrative tools to manipulate the zone using "-R /a"
+ * without relying on any binaries in the zone itself.
+ *
+ * If the scratch zone is on an alternate root (Live Upgrade [LU] boot
+ * environment), then we must resolve the lofs mounts used there to uncover
+ * writable (unshared) resources. Shared resources, though, are always
+ * read-only. In addition, if the "same" zone with a different root path is
+ * currently running, then "/b" inside the zone points to the running zone's
+ * root. This allows LU to synchronize configuration files during the upgrade
+ * process.
+ *
+ * To construct this environment, this module creates a tmpfs mount on
+ * $ZONEPATH/lu. Inside this scratch area, the miniroot-like environment as
+ * described above is constructed on the fly. The zone is then created using
+ * $ZONEPATH/lu as the root.
+ *
+ * Note that scratch zones are inactive. The zone's bits are not running and
+ * likely cannot be run correctly until upgrade is done. Init is not running
+ * there, nor is SMF. Because of this, the "mounted" state of a scratch zone
+ * is not a part of the usual halt/ready/boot state machine.
*/
#include <sys/param.h>
@@ -141,10 +168,22 @@ static struct symlink_info dev_symlinks[] = {
/* for routing socket */
static int rts_seqno = 0;
+/* mangled zone name when mounting in an alternate root environment */
+static char kernzone[ZONENAME_MAX];
+
+/* array of cached mount entries for resolve_lofs */
+static struct mnttab *resolve_lofs_mnts, *resolve_lofs_mnt_max;
+
/* from libsocket, not in any header file */
extern int getnetmaskbyaddr(struct in_addr, struct in_addr *);
/*
+ * An optimization for build_mnttable: reallocate (and potentially copy the
+ * data) only once every N times through the loop.
+ */
+#define MNTTAB_HUNK 32
+
+/*
* Private autofs system call
*/
extern int _autofssys(int, void *);
@@ -158,6 +197,244 @@ autofs_cleanup(zoneid_t zoneid)
return (_autofssys(AUTOFS_UNMOUNTALL, (void *)zoneid));
}
+static void
+free_mnttable(struct mnttab *mnt_array, uint_t nelem)
+{
+ uint_t i;
+
+ if (mnt_array == NULL)
+ return;
+ for (i = 0; i < nelem; i++) {
+ free(mnt_array[i].mnt_mountp);
+ free(mnt_array[i].mnt_fstype);
+ free(mnt_array[i].mnt_special);
+ free(mnt_array[i].mnt_mntopts);
+ assert(mnt_array[i].mnt_time == NULL);
+ }
+ free(mnt_array);
+}
+
+/*
+ * Build the mount table for the zone rooted at "zroot", storing the resulting
+ * array of struct mnttabs in "mnt_arrayp" and the number of elements in the
+ * array in "nelemp".
+ */
+static int
+build_mnttable(zlog_t *zlogp, const char *zroot, size_t zrootlen, FILE *mnttab,
+ struct mnttab **mnt_arrayp, uint_t *nelemp)
+{
+ struct mnttab mnt;
+ struct mnttab *mnts;
+ struct mnttab *mnp;
+ uint_t nmnt;
+
+ rewind(mnttab);
+ resetmnttab(mnttab);
+ nmnt = 0;
+ mnts = NULL;
+ while (getmntent(mnttab, &mnt) == 0) {
+ struct mnttab *tmp_array;
+
+ if (strncmp(mnt.mnt_mountp, zroot, zrootlen) != 0)
+ continue;
+ if (nmnt % MNTTAB_HUNK == 0) {
+ tmp_array = realloc(mnts,
+ (nmnt + MNTTAB_HUNK) * sizeof (*mnts));
+ if (tmp_array == NULL) {
+ free_mnttable(mnts, nmnt);
+ return (-1);
+ }
+ mnts = tmp_array;
+ }
+ mnp = &mnts[nmnt++];
+
+ /*
+ * Zero out any fields we're not using.
+ */
+ (void) memset(mnp, 0, sizeof (*mnp));
+
+ if (mnt.mnt_special != NULL)
+ mnp->mnt_special = strdup(mnt.mnt_special);
+ if (mnt.mnt_mntopts != NULL)
+ mnp->mnt_mntopts = strdup(mnt.mnt_mntopts);
+ mnp->mnt_mountp = strdup(mnt.mnt_mountp);
+ mnp->mnt_fstype = strdup(mnt.mnt_fstype);
+ if ((mnt.mnt_special != NULL && mnp->mnt_special == NULL) ||
+ (mnt.mnt_mntopts != NULL && mnp->mnt_mntopts == NULL) ||
+ mnp->mnt_mountp == NULL || mnp->mnt_fstype == NULL) {
+ zerror(zlogp, B_TRUE, "memory allocation failed");
+ free_mnttable(mnts, nmnt);
+ return (-1);
+ }
+ }
+ *mnt_arrayp = mnts;
+ *nelemp = nmnt;
+ return (0);
+}
+
+/*
+ * This is an optimization. The resolve_lofs function is used quite frequently
+ * to manipulate file paths, and on a machine with a large number of zones,
+ * there will be a huge number of mounted file systems. Thus, we trigger a
+ * reread of the list of mount points
+ */
+static void
+lofs_discard_mnttab(void)
+{
+ free_mnttable(resolve_lofs_mnts,
+ resolve_lofs_mnt_max - resolve_lofs_mnts);
+ resolve_lofs_mnts = resolve_lofs_mnt_max = NULL;
+}
+
+static int
+lofs_read_mnttab(zlog_t *zlogp)
+{
+ FILE *mnttab;
+ uint_t nmnts;
+
+ if ((mnttab = fopen(MNTTAB, "r")) == NULL)
+ return (-1);
+ if (build_mnttable(zlogp, "", 0, mnttab, &resolve_lofs_mnts,
+ &nmnts) == -1) {
+ (void) fclose(mnttab);
+ return (-1);
+ }
+ (void) fclose(mnttab);
+ resolve_lofs_mnt_max = resolve_lofs_mnts + nmnts;
+ return (0);
+}
+
+/*
+ * This function loops over potential loopback mounts and symlinks in a given
+ * path and resolves them all down to an absolute path.
+ */
+static void
+resolve_lofs(zlog_t *zlogp, char *path, size_t pathlen)
+{
+ int len, arlen;
+ const char *altroot;
+ char tmppath[MAXPATHLEN];
+ boolean_t outside_altroot;
+
+ if ((len = resolvepath(path, tmppath, sizeof (tmppath))) == -1)
+ return;
+ tmppath[len] = '\0';
+ (void) strlcpy(path, tmppath, sizeof (tmppath));
+
+ /* This happens once per zoneadmd operation. */
+ if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
+ return;
+
+ altroot = zonecfg_get_root();
+ arlen = strlen(altroot);
+ outside_altroot = B_FALSE;
+ for (;;) {
+ struct mnttab *mnp;
+
+ for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max;
+ mnp++) {
+ if (mnp->mnt_fstype == NULL ||
+ mnp->mnt_mountp == NULL ||
+ mnp->mnt_special == NULL ||
+ strcmp(mnp->mnt_fstype, MNTTYPE_LOFS) != 0)
+ continue;
+ len = strlen(mnp->mnt_mountp);
+ if (strncmp(mnp->mnt_mountp, path, len) == 0 &&
+ (path[len] == '/' || path[len] == '\0'))
+ break;
+ }
+ if (mnp >= resolve_lofs_mnt_max)
+ break;
+ if (outside_altroot) {
+ char *cp;
+ int olen = sizeof (MNTOPT_RO) - 1;
+
+ /*
+ * If we run into a read-only mount outside of the
+ * alternate root environment, then the user doesn't
+ * want this path to be made read-write.
+ */
+ if (mnp->mnt_mntopts != NULL &&
+ (cp = strstr(mnp->mnt_mntopts, MNTOPT_RO)) !=
+ NULL &&
+ (cp == mnp->mnt_mntopts || cp[-1] == ',') &&
+ (cp[olen] == '\0' || cp[olen] == ',')) {
+ break;
+ }
+ } else if (arlen > 0 &&
+ (strncmp(mnp->mnt_special, altroot, arlen) != 0 ||
+ (mnp->mnt_special[arlen] != '\0' &&
+ mnp->mnt_special[arlen] != '/'))) {
+ outside_altroot = B_TRUE;
+ }
+ /* use temporary buffer because new path might be longer */
+ (void) snprintf(tmppath, sizeof (tmppath), "%s%s",
+ mnp->mnt_special, path + len);
+ if ((len = resolvepath(tmppath, path, pathlen)) == -1)
+ break;
+ path[len] = '\0';
+ }
+}
+
+/*
+ * For a regular mount, check if a replacement lofs mount is needed because the
+ * referenced device is already mounted somewhere.
+ */
+static int
+check_lofs_needed(zlog_t *zlogp, struct zone_fstab *fsptr)
+{
+ struct mnttab *mnp;
+ zone_fsopt_t *optptr, *onext;
+
+ /* This happens once per zoneadmd operation. */
+ if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
+ return (-1);
+
+ /*
+ * If this special node isn't already in use, then it's ours alone;
+ * no need to worry about conflicting mounts.
+ */
+ for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max;
+ mnp++) {
+ if (strcmp(mnp->mnt_special, fsptr->zone_fs_special) == 0)
+ break;
+ }
+ if (mnp >= resolve_lofs_mnt_max)
+ return (0);
+
+ /*
+ * Convert this duplicate mount into a lofs mount.
+ */
+ (void) strlcpy(fsptr->zone_fs_special, mnp->mnt_mountp,
+ sizeof (fsptr->zone_fs_special));
+ (void) strlcpy(fsptr->zone_fs_type, MNTTYPE_LOFS,
+ sizeof (fsptr->zone_fs_type));
+ fsptr->zone_fs_raw[0] = '\0';
+
+ /*
+ * Discard all but one of the original options and set that to be the
+ * same set of options used for inherit package directory resources.
+ */
+ optptr = fsptr->zone_fs_options;
+ if (optptr == NULL) {
+ optptr = malloc(sizeof (*optptr));
+ if (optptr == NULL) {
+ zerror(zlogp, B_TRUE, "cannot mount %s",
+ fsptr->zone_fs_dir);
+ return (-1);
+ }
+ } else {
+ while ((onext = optptr->zone_fsopt_next) != NULL) {
+ optptr->zone_fsopt_next = onext->zone_fsopt_next;
+ free(onext);
+ }
+ }
+ (void) strcpy(optptr->zone_fsopt_opt, IPD_DEFAULT_OPTS);
+ optptr->zone_fsopt_next = NULL;
+ fsptr->zone_fs_options = optptr;
+ return (0);
+}
+
static int
make_one_dir(zlog_t *zlogp, const char *prefix, const char *subdir, mode_t mode)
{
@@ -237,8 +514,9 @@ make_dev_links(zlog_t *zlogp, char *zonepath)
(void) unlink(dev);
}
if (symlink(dev_symlinks[i].sl_target, dev) != 0) {
- zerror(zlogp, B_TRUE, "could not setup %s symlink",
- dev_symlinks[i].sl_source);
+ zerror(zlogp, B_TRUE, "could not setup %s->%s symlink",
+ dev_symlinks[i].sl_source,
+ dev_symlinks[i].sl_target);
return (-1);
}
}
@@ -257,6 +535,8 @@ create_dev_files(zlog_t *zlogp)
zerror(zlogp, B_TRUE, "unable to determine zone root");
return (-1);
}
+ if (zonecfg_in_alt_root())
+ resolve_lofs(zlogp, zonepath, sizeof (zonepath));
if (make_dev_dirs(zlogp, zonepath) != 0)
return (-1);
@@ -344,74 +624,16 @@ is_remote_fstype(const char *fstype, char *const *remote_fstypes)
return (B_FALSE);
}
-static void
-free_mnttable(struct mnttab *mnt_array, uint_t nelem)
-{
- uint_t i;
-
- if (mnt_array == NULL)
- return;
- for (i = 0; i < nelem; i++) {
- free(mnt_array[i].mnt_mountp);
- free(mnt_array[i].mnt_fstype);
- assert(mnt_array[i].mnt_special == NULL);
- assert(mnt_array[i].mnt_mntopts == NULL);
- assert(mnt_array[i].mnt_time == NULL);
- }
- free(mnt_array);
-}
-
/*
- * Build the mount table for the zone rooted at "zroot", storing the resulting
- * array of struct mnttabs in "mnt_arrayp" and the number of elements in the
- * array in "nelemp".
+ * This converts a zone root path (normally of the form .../root) to a Live
+ * Upgrade scratch zone root (of the form .../lu).
*/
-static int
-build_mnttable(zlog_t *zlogp, const char *zroot, size_t zrootlen, FILE *mnttab,
- struct mnttab **mnt_arrayp, uint_t *nelemp)
+static void
+root_to_lu(zlog_t *zlogp, char *zroot, size_t zrootlen, boolean_t isresolved)
{
- struct mnttab mnt;
- struct mnttab *mnts;
- struct mnttab *mnp;
- uint_t nmnt;
-
- rewind(mnttab);
- resetmnttab(mnttab);
- nmnt = 0;
- mnts = NULL;
- while (getmntent(mnttab, &mnt) == 0) {
- struct mnttab *tmp_array;
-
- if (strncmp(mnt.mnt_mountp, zroot, zrootlen) != 0)
- continue;
- nmnt++;
- tmp_array = realloc(mnts, nmnt * sizeof (*mnts));
- if (tmp_array == NULL) {
- nmnt--;
- free_mnttable(mnts, nmnt);
- return (-1);
- }
- mnts = tmp_array;
- mnp = &mnts[nmnt - 1];
- /*
- * Zero out the fields we won't be using.
- */
- mnp->mnt_special = NULL;
- mnp->mnt_mntopts = NULL;
- mnp->mnt_time = NULL;
-
- mnp->mnt_mountp = strdup(mnt.mnt_mountp);
- mnp->mnt_fstype = strdup(mnt.mnt_fstype);
- if (mnp->mnt_mountp == NULL ||
- mnp->mnt_fstype == NULL) {
- zerror(zlogp, B_TRUE, "memory allocation failed");
- free_mnttable(mnts, nmnt);
- return (-1);
- }
- }
- *mnt_arrayp = mnts;
- *nelemp = nmnt;
- return (0);
+ if (!isresolved && zonecfg_in_alt_root())
+ resolve_lofs(zlogp, zroot, zrootlen);
+ (void) strcpy(strrchr(zroot, '/') + 1, "lu");
}
/*
@@ -444,9 +666,8 @@ build_mnttable(zlog_t *zlogp, const char *zroot, size_t zrootlen, FILE *mnttab,
* Zone must be down (ie, no processes or threads active).
*/
static int
-unmount_filesystems(zlog_t *zlogp)
+unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd)
{
- zoneid_t zoneid;
int error = 0;
FILE *mnttab;
struct mnttab *mnts;
@@ -457,15 +678,12 @@ unmount_filesystems(zlog_t *zlogp)
boolean_t stuck = B_FALSE;
char **remote_fstypes = NULL;
- if ((zoneid = getzoneidbyname(zone_name)) == -1) {
- zerror(zlogp, B_TRUE, "unable to find zoneid");
- return (-1);
- }
-
if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) {
zerror(zlogp, B_FALSE, "unable to determine zone root");
return (-1);
}
+ if (unmount_cmd)
+ root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE);
(void) strcat(zroot, "/");
zrootlen = strlen(zroot);
@@ -796,6 +1014,7 @@ static int
mount_one(zlog_t *zlogp, struct zone_fstab *fsptr, const char *rootpath)
{
char path[MAXPATHLEN];
+ char specpath[MAXPATHLEN];
char optstr[MAX_MNTOPT_STR];
zone_fsopt_t *optptr;
@@ -815,12 +1034,22 @@ mount_one(zlog_t *zlogp, struct zone_fstab *fsptr, const char *rootpath)
if (strlen(fsptr->zone_fs_special) == 0) {
/*
* A zero-length special is how we distinguish IPDs from
- * general-purpose FSs.
+ * general-purpose FSs. Make sure it mounts from a place that
+ * can be seen via the alternate zone's root.
*/
+ if (snprintf(specpath, sizeof (specpath), "%s%s",
+ zonecfg_get_root(), fsptr->zone_fs_dir) >=
+ sizeof (specpath)) {
+ zerror(zlogp, B_FALSE, "cannot mount %s: path too "
+ "long in alternate root", fsptr->zone_fs_dir);
+ return (-1);
+ }
+ if (zonecfg_in_alt_root())
+ resolve_lofs(zlogp, specpath, sizeof (specpath));
if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS,
- fsptr->zone_fs_dir, path) != 0) {
+ specpath, path) != 0) {
zerror(zlogp, B_TRUE, "failed to loopback mount %s",
- fsptr->zone_fs_dir);
+ specpath);
return (-1);
}
return (0);
@@ -843,6 +1072,36 @@ mount_one(zlog_t *zlogp, struct zone_fstab *fsptr, const char *rootpath)
}
/*
+ * If we're looking at an alternate root environment, then construct
+ * read-only loopback mounts as necessary. For all lofs mounts, make
+ * sure that the 'special' entry points inside the alternate root. (We
+ * don't do this with other mounts, as devfs isn't in the alternate
+ * root, and we need to assume the device environment is roughly the
+ * same.)
+ */
+ if (zonecfg_in_alt_root()) {
+ struct stat64 st;
+
+ if (stat64(fsptr->zone_fs_special, &st) != -1 &&
+ S_ISBLK(st.st_mode) &&
+ check_lofs_needed(zlogp, fsptr) == -1)
+ return (-1);
+ if (strcmp(fsptr->zone_fs_type, MNTTYPE_LOFS) == 0) {
+ if (snprintf(specpath, sizeof (specpath), "%s%s",
+ zonecfg_get_root(), fsptr->zone_fs_special) >=
+ sizeof (specpath)) {
+ zerror(zlogp, B_FALSE, "cannot mount %s: path "
+ "too long in alternate root",
+ fsptr->zone_fs_special);
+ return (-1);
+ }
+ resolve_lofs(zlogp, specpath, sizeof (specpath));
+ (void) strlcpy(fsptr->zone_fs_special, specpath,
+ sizeof (fsptr->zone_fs_special));
+ }
+ }
+
+ /*
* Run 'fsck -m' if there's a device to fsck.
*/
if (fsptr->zone_fs_raw[0] != '\0' &&
@@ -879,8 +1138,174 @@ free_fs_data(struct zone_fstab *fsarray, uint_t nelem)
free(fsarray);
}
+/*
+ * This function constructs the miniroot-like "scratch zone" environment. If
+ * it returns B_FALSE, then the error has already been logged.
+ */
+static boolean_t
+build_mounted(zlog_t *zlogp, char *rootpath, size_t rootlen,
+ const char *zonepath)
+{
+ char tmp[MAXPATHLEN], fromdir[MAXPATHLEN];
+ char luroot[MAXPATHLEN];
+ const char **cpp;
+ static const char *mkdirs[] = {
+ "/system", "/system/contract", "/proc", "/dev", "/tmp",
+ "/a", NULL
+ };
+ static const char *localdirs[] = {
+ "/etc", "/var", NULL
+ };
+ static const char *loopdirs[] = {
+ "/etc/lib", "/etc/fs", "/lib", "/sbin", "/platform",
+ "/usr", NULL
+ };
+ static const char *tmpdirs[] = {
+ "/tmp", "/var/run", NULL
+ };
+ FILE *fp;
+ struct stat st;
+ char *altstr;
+ uuid_t uuid;
+
+ /*
+ * Construct a small Solaris environment, including the zone root
+ * mounted on '/a' inside that environment.
+ */
+ resolve_lofs(zlogp, rootpath, rootlen);
+ (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
+ resolve_lofs(zlogp, luroot, sizeof (luroot));
+ (void) snprintf(tmp, sizeof (tmp), "%s/bin", luroot);
+ (void) symlink("./usr/bin", tmp);
+
+ /*
+ * These are mostly special mount points; not handled here. (See
+ * zone_mount_early.)
+ */
+ for (cpp = mkdirs; *cpp != NULL; cpp++) {
+ (void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
+ if (mkdir(tmp, 0755) != 0) {
+ zerror(zlogp, B_TRUE, "cannot create %s", tmp);
+ return (B_FALSE);
+ }
+ }
+
+ /*
+ * These are mounted read-write from the zone undergoing upgrade. We
+ * must be careful not to 'leak' things from the main system into the
+ * zone, and this accomplishes that goal.
+ */
+ for (cpp = localdirs; *cpp != NULL; cpp++) {
+ (void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
+ (void) snprintf(fromdir, sizeof (fromdir), "%s%s", rootpath,
+ *cpp);
+ if (mkdir(tmp, 0755) != 0) {
+ zerror(zlogp, B_TRUE, "cannot create %s", tmp);
+ return (B_FALSE);
+ }
+ if (domount(zlogp, MNTTYPE_LOFS, "", fromdir, tmp) != 0) {
+ zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp,
+ *cpp);
+ return (B_FALSE);
+ }
+ }
+
+ /*
+ * These are things mounted read-only from the running system because
+ * they contain binaries that must match system.
+ */
+ for (cpp = loopdirs; *cpp != NULL; cpp++) {
+ (void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
+ if (mkdir(tmp, 0755) != 0) {
+ if (errno != EEXIST) {
+ zerror(zlogp, B_TRUE, "cannot create %s", tmp);
+ return (B_FALSE);
+ }
+ if (lstat(tmp, &st) != 0) {
+ zerror(zlogp, B_TRUE, "cannot stat %s", tmp);
+ return (B_FALSE);
+ }
+ /*
+ * Ignore any non-directories encountered. These are
+ * things that have been converted into symlinks
+ * (/etc/fs and /etc/lib) and no longer need a lofs
+ * fixup.
+ */
+ if (!S_ISDIR(st.st_mode))
+ continue;
+ }
+ if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, *cpp,
+ tmp) != 0) {
+ zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp,
+ *cpp);
+ return (B_FALSE);
+ }
+ }
+
+ /*
+ * These are things with tmpfs mounted inside.
+ */
+ for (cpp = tmpdirs; *cpp != NULL; cpp++) {
+ (void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
+ if (mkdir(tmp, 0755) != 0 && errno != EEXIST) {
+ zerror(zlogp, B_TRUE, "cannot create %s", tmp);
+ return (B_FALSE);
+ }
+ if (domount(zlogp, MNTTYPE_TMPFS, "", "swap", tmp) != 0) {
+ zerror(zlogp, B_TRUE, "cannot mount swap on %s", *cpp);
+ return (B_FALSE);
+ }
+ }
+
+ /*
+ * This is here to support lucopy. If there's an instance of this same
+ * zone on the current running system, then we mount its root up as
+ * read-only inside the scratch zone.
+ */
+ (void) zonecfg_get_uuid(zone_name, uuid);
+ altstr = strdup(zonecfg_get_root());
+ if (altstr == NULL) {
+ zerror(zlogp, B_TRUE, "out of memory");
+ return (B_FALSE);
+ }
+ zonecfg_set_root("");
+ (void) strlcpy(tmp, zone_name, sizeof (tmp));
+ (void) zonecfg_get_name_by_uuid(uuid, tmp, sizeof (tmp));
+ if (zone_get_rootpath(tmp, fromdir, sizeof (fromdir)) == Z_OK &&
+ strcmp(fromdir, rootpath) != 0) {
+ (void) snprintf(tmp, sizeof (tmp), "%s/b", luroot);
+ if (mkdir(tmp, 0755) != 0) {
+ zerror(zlogp, B_TRUE, "cannot create %s", tmp);
+ return (B_FALSE);
+ }
+ if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, fromdir,
+ tmp) != 0) {
+ zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp,
+ fromdir);
+ return (B_FALSE);
+ }
+ }
+ zonecfg_set_root(altstr);
+ free(altstr);
+
+ if ((fp = zonecfg_open_scratch(luroot, B_TRUE)) == NULL) {
+ zerror(zlogp, B_TRUE, "cannot open zone mapfile");
+ return (B_FALSE);
+ }
+ (void) ftruncate(fileno(fp), 0);
+ if (zonecfg_add_scratch(fp, zone_name, kernzone, "/") == -1) {
+ zerror(zlogp, B_TRUE, "cannot add zone mapfile entry");
+ }
+ zonecfg_close_scratch(fp);
+ (void) snprintf(tmp, sizeof (tmp), "%s/a", luroot);
+ if (domount(zlogp, MNTTYPE_LOFS, "", rootpath, tmp) != 0)
+ return (B_FALSE);
+ (void) strlcpy(rootpath, tmp, rootlen);
+ return (B_TRUE);
+}
+
static int
-mount_filesystems(zlog_t *zlogp)
+mount_filesystems(zlog_t *zlogp, boolean_t mount_cmd)
{
char rootpath[MAXPATHLEN];
char zonepath[MAXPATHLEN];
@@ -891,10 +1316,11 @@ mount_filesystems(zlog_t *zlogp)
zone_state_t zstate;
if (zone_get_state(zone_name, &zstate) != Z_OK ||
- zstate != ZONE_STATE_READY) {
+ (zstate != ZONE_STATE_READY && zstate != ZONE_STATE_MOUNTED)) {
zerror(zlogp, B_FALSE,
- "zone must be in '%s' state to mount file-systems",
- zone_state_str(ZONE_STATE_READY));
+ "zone must be in '%s' or '%s' state to mount file-systems",
+ zone_state_str(ZONE_STATE_READY),
+ zone_state_str(ZONE_STATE_MOUNTED));
goto bad;
}
@@ -936,9 +1362,14 @@ mount_filesystems(zlog_t *zlogp)
}
fs_ptr = tmp_ptr;
fsp = &fs_ptr[num_fs - 1];
+ /*
+ * Note that mount_one will prepend the alternate root to
+ * zone_fs_special and do the necessary resolution, so all that is
+ * needed here is to strip the root added by zone_get_zonepath.
+ */
(void) strlcpy(fsp->zone_fs_dir, "/dev", sizeof (fsp->zone_fs_dir));
(void) snprintf(fsp->zone_fs_special, sizeof (fsp->zone_fs_special),
- "%s/dev", zonepath);
+ "%s/dev", zonepath + strlen(zonecfg_get_root()));
fsp->zone_fs_raw[0] = '\0';
(void) strlcpy(fsp->zone_fs_type, MNTTYPE_LOFS,
sizeof (fsp->zone_fs_type));
@@ -1011,8 +1442,28 @@ mount_filesystems(zlog_t *zlogp)
zonecfg_fini_handle(handle);
handle = NULL;
+ /*
+ * If we're mounting a zone for administration, then we need to set up
+ * the "/a" environment inside the zone so that the commands that run
+ * in there have access to both the running system's utilities and the
+ * to-be-modified zone's files.
+ */
+ if (mount_cmd &&
+ !build_mounted(zlogp, rootpath, sizeof (rootpath), zonepath))
+ goto bad;
+
qsort(fs_ptr, num_fs, sizeof (*fs_ptr), fs_compare);
for (i = 0; i < num_fs; i++) {
+ if (mount_cmd && strcmp(fs_ptr[i].zone_fs_dir, "/dev") == 0) {
+ size_t slen = strlen(rootpath) - 2;
+
+ /* /dev is special and always goes at the top */
+ rootpath[slen] = '\0';
+ if (mount_one(zlogp, &fs_ptr[i], rootpath) != 0)
+ goto bad;
+ rootpath[slen] = '/';
+ continue;
+ }
if (mount_one(zlogp, &fs_ptr[i], rootpath) != 0)
goto bad;
}
@@ -1795,7 +2246,7 @@ devfsadm_call(zlog_t *zlogp, const char *arg)
if (status == 0 || status == -1)
return (status);
zerror(zlogp, B_FALSE, "%s call (%s %s %s) unexpectedly returned %d",
- DEVFSADM, DEVFSADM_PATH, arg, zone_name, status);
+ DEVFSADM, DEVFSADM_PATH, arg, zone_name, status);
return (-1);
}
@@ -2062,21 +2513,115 @@ prtmount(const char *fs, void *x) {
return (0);
}
-int
-vplat_create(zlog_t *zlogp)
+/*
+ * Look for zones running on the main system that are using this root (or any
+ * subdirectory of it). Return B_TRUE and print an error if a conflicting zone
+ * is found or if we can't tell.
+ */
+static boolean_t
+duplicate_zone_root(zlog_t *zlogp, const char *rootpath)
{
- int rval = -1;
+ zoneid_t *zids = NULL;
+ uint_t nzids = 0;
+ boolean_t retv;
+ int rlen, zlen;
+ char zroot[MAXPATHLEN];
+ char zonename[ZONENAME_MAX];
+
+ for (;;) {
+ nzids += 10;
+ zids = malloc(nzids * sizeof (*zids));
+ if (zids == NULL) {
+ zerror(zlogp, B_TRUE, "unable to allocate memory");
+ return (B_TRUE);
+ }
+ if (zone_list(zids, &nzids) == 0)
+ break;
+ free(zids);
+ }
+ retv = B_FALSE;
+ rlen = strlen(rootpath);
+ while (nzids > 0) {
+ /*
+ * Ignore errors; they just mean that the zone has disappeared
+ * while we were busy.
+ */
+ if (zone_getattr(zids[--nzids], ZONE_ATTR_ROOT, zroot,
+ sizeof (zroot)) == -1)
+ continue;
+ zlen = strlen(zroot);
+ if (zlen > rlen)
+ zlen = rlen;
+ if (strncmp(rootpath, zroot, zlen) == 0 &&
+ (zroot[zlen] == '\0' || zroot[zlen] == '/') &&
+ (rootpath[zlen] == '\0' || rootpath[zlen] == '/')) {
+ if (getzonenamebyid(zids[nzids], zonename,
+ sizeof (zonename)) == -1)
+ (void) snprintf(zonename, sizeof (zonename),
+ "id %d", (int)zids[nzids]);
+ zerror(zlogp, B_FALSE,
+ "zone root %s already in use by zone %s",
+ rootpath, zonename);
+ retv = B_TRUE;
+ break;
+ }
+ }
+ free(zids);
+ return (retv);
+}
+
+/*
+ * Search for loopback mounts that use this same source node (same device and
+ * inode). Return B_TRUE if there is one or if we can't tell.
+ */
+static boolean_t
+duplicate_reachable_path(zlog_t *zlogp, const char *rootpath)
+{
+ struct stat64 rst, zst;
+ struct mnttab *mnp;
+
+ if (stat64(rootpath, &rst) == -1) {
+ zerror(zlogp, B_TRUE, "can't stat %s", rootpath);
+ return (B_TRUE);
+ }
+ if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
+ return (B_TRUE);
+ for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; mnp++) {
+ if (mnp->mnt_fstype == NULL ||
+ strcmp(MNTTYPE_LOFS, mnp->mnt_fstype) != 0)
+ continue;
+ /* We're looking at a loopback mount. Stat it. */
+ if (mnp->mnt_special != NULL &&
+ stat64(mnp->mnt_special, &zst) != -1 &&
+ rst.st_dev == zst.st_dev && rst.st_ino == zst.st_ino) {
+ zerror(zlogp, B_FALSE,
+ "zone root %s is reachable through %s",
+ rootpath, mnp->mnt_mountp);
+ return (B_TRUE);
+ }
+ }
+ return (B_FALSE);
+}
+
+zoneid_t
+vplat_create(zlog_t *zlogp, boolean_t mount_cmd)
+{
+ zoneid_t rval = -1;
priv_set_t *privs;
char rootpath[MAXPATHLEN];
char *rctlbuf = NULL;
- size_t rctlbufsz;
- zoneid_t zoneid;
+ size_t rctlbufsz = 0;
+ zoneid_t zoneid = -1;
int xerr;
+ char *kzone;
+ FILE *fp = NULL;
if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
zerror(zlogp, B_TRUE, "unable to determine zone root");
return (-1);
}
+ if (zonecfg_in_alt_root())
+ resolve_lofs(zlogp, rootpath, sizeof (rootpath));
if ((privs = priv_allocset()) == NULL) {
zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
@@ -2087,13 +2632,80 @@ vplat_create(zlog_t *zlogp)
zerror(zlogp, B_TRUE, "Failed to initialize privileges");
goto error;
}
- if (get_rctls(zlogp, &rctlbuf, &rctlbufsz) != 0) {
+ if (!mount_cmd && get_rctls(zlogp, &rctlbuf, &rctlbufsz) != 0) {
zerror(zlogp, B_FALSE, "Unable to get list of rctls");
goto error;
}
+ kzone = zone_name;
+
+ /*
+ * We must do this scan twice. First, we look for zones running on the
+ * main system that are using this root (or any subdirectory of it).
+ * Next, we reduce to the shortest path and search for loopback mounts
+ * that use this same source node (same device and inode).
+ */
+ if (duplicate_zone_root(zlogp, rootpath))
+ goto error;
+ if (duplicate_reachable_path(zlogp, rootpath))
+ goto error;
+
+ if (mount_cmd) {
+ root_to_lu(zlogp, rootpath, sizeof (rootpath), B_TRUE);
+
+ /*
+ * Forge up a special root for this zone. When a zone is
+ * mounted, we can't let the zone have its own root because the
+ * tools that will be used in this "scratch zone" need access
+ * to both the zone's resources and the running machine's
+ * executables.
+ *
+ * Note that the mkdir here also catches read-only filesystems.
+ */
+ if (mkdir(rootpath, 0755) != 0 && errno != EEXIST) {
+ zerror(zlogp, B_TRUE, "cannot create %s", rootpath);
+ goto error;
+ }
+ if (domount(zlogp, "tmpfs", "", "swap", rootpath) != 0)
+ goto error;
+ }
+
+ if (zonecfg_in_alt_root()) {
+ /*
+ * If we are mounting up a zone in an alternate root partition,
+ * then we have some additional work to do before starting the
+ * zone. First, resolve the root path down so that we're not
+ * fooled by duplicates. Then forge up an internal name for
+ * the zone.
+ */
+ if ((fp = zonecfg_open_scratch("", B_TRUE)) == NULL) {
+ zerror(zlogp, B_TRUE, "cannot open mapfile");
+ goto error;
+ }
+ if (zonecfg_lock_scratch(fp) != 0) {
+ zerror(zlogp, B_TRUE, "cannot lock mapfile");
+ goto error;
+ }
+ if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(),
+ NULL, 0) == 0) {
+ zerror(zlogp, B_FALSE, "scratch zone already running");
+ goto error;
+ }
+ /* This is the preferred name */
+ (void) snprintf(kernzone, sizeof (kernzone), "SUNWlu-%s",
+ zone_name);
+ srandom(getpid());
+ while (zonecfg_reverse_scratch(fp, kernzone, NULL, 0, NULL,
+ 0) == 0) {
+ /* This is just an arbitrary name; note "." usage */
+ (void) snprintf(kernzone, sizeof (kernzone),
+ "SUNWlu.%08lX%08lX", random(), random());
+ }
+ kzone = kernzone;
+ }
+
xerr = 0;
- if ((zoneid = zone_create(zone_name, rootpath, privs, rctlbuf,
+ if ((zoneid = zone_create(kzone, rootpath, privs, rctlbuf,
rctlbufsz, &xerr)) == -1) {
if (xerr == ZE_AREMOUNTS) {
if (zonecfg_find_mounts(rootpath, NULL, NULL) < 1) {
@@ -2117,42 +2729,147 @@ vplat_create(zlog_t *zlogp)
}
goto error;
}
+
+ if (zonecfg_in_alt_root() &&
+ zonecfg_add_scratch(fp, zone_name, kernzone,
+ zonecfg_get_root()) == -1) {
+ zerror(zlogp, B_TRUE, "cannot add mapfile entry");
+ goto error;
+ }
+
/*
- * The following is a warning, not an error.
+ * The following is a warning, not an error, and is not performed when
+ * merely mounting a zone for administrative use.
*/
- if (bind_to_pool(zlogp, zoneid) != 0)
+ if (!mount_cmd && bind_to_pool(zlogp, zoneid) != 0)
zerror(zlogp, B_FALSE, "WARNING: unable to bind zone to "
"requested pool; using default pool.");
- rval = 0;
+ rval = zoneid;
+ zoneid = -1;
+
error:
+ if (zoneid != -1)
+ (void) zone_destroy(zoneid);
if (rctlbuf != NULL)
free(rctlbuf);
priv_freeset(privs);
+ if (fp != NULL)
+ zonecfg_close_scratch(fp);
+ lofs_discard_mnttab();
return (rval);
}
int
-vplat_bringup(zlog_t *zlogp)
+vplat_bringup(zlog_t *zlogp, boolean_t mount_cmd)
{
- if (create_dev_files(zlogp) != 0)
+ if (create_dev_files(zlogp) != 0 ||
+ mount_filesystems(zlogp, mount_cmd) != 0) {
+ lofs_discard_mnttab();
return (-1);
- if (mount_filesystems(zlogp) != 0)
+ }
+ if (!mount_cmd && (devfsadm_register(zlogp) != 0 ||
+ configure_network_interfaces(zlogp) != 0)) {
+ lofs_discard_mnttab();
return (-1);
- if (devfsadm_register(zlogp) != 0)
+ }
+ lofs_discard_mnttab();
+ return (0);
+}
+
+static int
+lu_root_teardown(zlog_t *zlogp)
+{
+ char zroot[MAXPATHLEN];
+
+ if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) {
+ zerror(zlogp, B_FALSE, "unable to determine zone root");
return (-1);
- if (configure_network_interfaces(zlogp) != 0)
+ }
+ root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE);
+
+ /*
+ * At this point, the processes are gone, the filesystems (save the
+ * root) are unmounted, and the zone is on death row. But there may
+ * still be creds floating about in the system that reference the
+ * zone_t, and which pin down zone_rootvp causing this call to fail
+ * with EBUSY. Thus, we try for a little while before just giving up.
+ * (How I wish this were not true, and umount2 just did the right
+ * thing, or tmpfs supported MS_FORCE This is a gross hack.)
+ */
+ if (umount2(zroot, MS_FORCE) != 0) {
+ if (errno == ENOTSUP && umount2(zroot, 0) == 0)
+ goto unmounted;
+ if (errno == EBUSY) {
+ int tries = 10;
+
+ while (--tries >= 0) {
+ (void) sleep(1);
+ if (umount2(zroot, 0) == 0)
+ goto unmounted;
+ if (errno != EBUSY)
+ break;
+ }
+ }
+ zerror(zlogp, B_TRUE, "unable to unmount '%s'", zroot);
return (-1);
- return (0);
+ }
+unmounted:
+
+ /*
+ * Only zones in an alternate root environment have scratch zone
+ * entries.
+ */
+ if (zonecfg_in_alt_root()) {
+ FILE *fp;
+ int retv;
+
+ if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) {
+ zerror(zlogp, B_TRUE, "cannot open mapfile");
+ return (-1);
+ }
+ retv = -1;
+ if (zonecfg_lock_scratch(fp) != 0)
+ zerror(zlogp, B_TRUE, "cannot lock mapfile");
+ else if (zonecfg_delete_scratch(fp, kernzone) != 0)
+ zerror(zlogp, B_TRUE, "cannot delete map entry");
+ else
+ retv = 0;
+ zonecfg_close_scratch(fp);
+ return (retv);
+ } else {
+ return (0);
+ }
}
int
-vplat_teardown(zlog_t *zlogp)
+vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd)
{
+ char *kzone;
zoneid_t zoneid;
- if ((zoneid = getzoneidbyname(zone_name)) == ZONE_ID_UNDEFINED) {
+ kzone = zone_name;
+ if (zonecfg_in_alt_root()) {
+ FILE *fp;
+
+ if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) {
+ zerror(zlogp, B_TRUE, "unable to open map file");
+ goto error;
+ }
+ if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(),
+ kernzone, sizeof (kernzone)) != 0) {
+ zerror(zlogp, B_FALSE, "unable to find scratch zone");
+ zonecfg_close_scratch(fp);
+ goto error;
+ }
+ zonecfg_close_scratch(fp);
+ kzone = kernzone;
+ }
+
+ if ((zoneid = getzoneidbyname(kzone)) == ZONE_ID_UNDEFINED) {
if (!bringup_failure_recovery)
zerror(zlogp, B_TRUE, "unable to get zoneid");
+ if (unmount_cmd)
+ (void) lu_root_teardown(zlogp);
goto error;
}
@@ -2161,21 +2878,22 @@ vplat_teardown(zlog_t *zlogp)
goto error;
}
- if (devfsadm_unregister(zlogp) != 0)
+ if (!unmount_cmd && devfsadm_unregister(zlogp) != 0)
goto error;
- if (unconfigure_network_interfaces(zlogp, zoneid) != 0) {
+ if (!unmount_cmd &&
+ unconfigure_network_interfaces(zlogp, zoneid) != 0) {
zerror(zlogp, B_FALSE,
"unable to unconfigure network interfaces in zone");
goto error;
}
- if (tcp_abort_connections(zlogp, zoneid) != 0) {
+ if (!unmount_cmd && tcp_abort_connections(zlogp, zoneid) != 0) {
zerror(zlogp, B_TRUE, "unable to abort TCP connections");
goto error;
}
- if (unmount_filesystems(zlogp) != 0) {
+ if (unmount_filesystems(zlogp, zoneid, unmount_cmd) != 0) {
zerror(zlogp, B_FALSE,
"unable to unmount file systems in zone");
goto error;
@@ -2185,10 +2903,21 @@ vplat_teardown(zlog_t *zlogp)
zerror(zlogp, B_TRUE, "unable to destroy zone");
goto error;
}
- destroy_console_slave();
+ /*
+ * Special teardown for alternate boot environments: remove the tmpfs
+ * root for the zone and then remove it from the map file.
+ */
+ if (unmount_cmd && lu_root_teardown(zlogp) != 0)
+ goto error;
+
+ if (!unmount_cmd)
+ destroy_console_slave();
+
+ lofs_discard_mnttab();
return (0);
error:
+ lofs_discard_mnttab();
return (-1);
}