summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/uts/common/fs/vfs.c24
-rw-r--r--usr/src/uts/common/os/zone.c127
-rw-r--r--usr/src/uts/common/sys/zone.h12
3 files changed, 99 insertions, 64 deletions
diff --git a/usr/src/uts/common/fs/vfs.c b/usr/src/uts/common/fs/vfs.c
index 409e69a00d..6e93d056df 100644
--- a/usr/src/uts/common/fs/vfs.c
+++ b/usr/src/uts/common/fs/vfs.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -1129,6 +1130,7 @@ domount(char *fsname, struct mounta *uap, vnode_t *vp, struct cred *credp,
struct pathname pn, rpn;
vsk_anchor_t *vskap;
char fstname[FSTYPSZ];
+ zone_t *zone;
/*
* The v_flag value for the mount point vp is permanently set
@@ -1590,9 +1592,24 @@ domount(char *fsname, struct mounta *uap, vnode_t *vp, struct cred *credp,
}
/*
- * Serialize with zone creations.
+ * Serialize with zone state transitions.
+ * See vfs_list_add; zone mounted into is:
+ * zone_find_by_path(refstr_value(vfsp->vfs_mntpt))
+ * not the zone doing the mount (curproc->p_zone), but if we're already
+ * inside a NGZ, then we know what zone we are.
*/
- mount_in_progress();
+ if (INGLOBALZONE(curproc)) {
+ zone = zone_find_by_path(mountpt);
+ ASSERT(zone != NULL);
+ } else {
+ zone = curproc->p_zone;
+ /*
+ * zone_find_by_path does a hold, so do one here too so that
+ * we can do a zone_rele after mount_completed.
+ */
+ zone_hold(zone);
+ }
+ mount_in_progress(zone);
/*
* Instantiate (or reinstantiate) the file system. If appropriate,
* splice it into the file system name space.
@@ -1761,7 +1778,8 @@ domount(char *fsname, struct mounta *uap, vnode_t *vp, struct cred *credp,
vfs_unlock(vfsp);
}
- mount_completed();
+ mount_completed(zone);
+ zone_rele(zone);
if (splice)
vn_vfsunlock(vp);
diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c
index 5d146c03b5..56c654331e 100644
--- a/usr/src/uts/common/os/zone.c
+++ b/usr/src/uts/common/os/zone.c
@@ -378,13 +378,6 @@ rctl_hndl_t rc_zone_shmmax;
rctl_hndl_t rc_zone_shmmni;
rctl_hndl_t rc_zone_semmni;
rctl_hndl_t rc_zone_msgmni;
-/*
- * Synchronization primitives used to synchronize between mounts and zone
- * creation/destruction.
- */
-static int mounts_in_progress;
-static kcondvar_t mount_cv;
-static kmutex_t mount_lock;
const char * const zone_default_initname = "/sbin/init";
static char * const zone_prefix = "/zone/";
@@ -430,17 +423,20 @@ static const int ZONE_SYSCALL_API_VERSION = 6;
/*
* Certain filesystems (such as NFS and autofs) need to know which zone
* the mount is being placed in. Because of this, we need to be able to
- * ensure that a zone isn't in the process of being created such that
- * nfs_mount() thinks it is in the global zone, while by the time it
- * gets added the list of mounted zones, it ends up on zoneA's mount
- * list.
+ * ensure that a zone isn't in the process of being created/destroyed such
+ * that nfs_mount() thinks it is in the global/NGZ zone, while by the time
+ * it gets added the list of mounted zones, it ends up on the wrong zone's
+ * mount list. Since a zone can't reside on an NFS file system, we don't
+ * have to worry about the zonepath itself.
*
* The following functions: block_mounts()/resume_mounts() and
* mount_in_progress()/mount_completed() are used by zones and the VFS
- * layer (respectively) to synchronize zone creation and new mounts.
+ * layer (respectively) to synchronize zone state transitions and new
+ * mounts within a zone. This syncronization is on a per-zone basis, so
+ * activity for one zone will not interfere with activity for another zone.
*
* The semantics are like a reader-reader lock such that there may
- * either be multiple mounts (or zone creations, if that weren't
+ * either be multiple mounts (or zone state transitions, if that weren't
* serialized by zonehash_lock) in progress at the same time, but not
* both.
*
@@ -448,10 +444,8 @@ static const int ZONE_SYSCALL_API_VERSION = 6;
* taking too long.
*
* The semantics are such that there is unfair bias towards the
- * "current" operation. This means that zone creations may starve if
- * there is a rapid succession of new mounts coming in to the system, or
- * there is a remote possibility that zones will be created at such a
- * rate that new mounts will not be able to proceed.
+ * "current" operation. This means that zone halt may starve if
+ * there is a rapid succession of new mounts coming in to the zone.
*/
/*
* Prevent new mounts from progressing to the point of calling
@@ -459,7 +453,7 @@ static const int ZONE_SYSCALL_API_VERSION = 6;
* them to complete.
*/
static int
-block_mounts(void)
+block_mounts(zone_t *zp)
{
int retval = 0;
@@ -468,19 +462,21 @@ block_mounts(void)
* called with zonehash_lock held.
*/
ASSERT(MUTEX_NOT_HELD(&zonehash_lock));
- mutex_enter(&mount_lock);
- while (mounts_in_progress > 0) {
- if (cv_wait_sig(&mount_cv, &mount_lock) == 0)
+ mutex_enter(&zp->zone_mount_lock);
+ while (zp->zone_mounts_in_progress > 0) {
+ if (cv_wait_sig(&zp->zone_mount_cv, &zp->zone_mount_lock) == 0)
goto signaled;
}
/*
* A negative value of mounts_in_progress indicates that mounts
- * have been blocked by (-mounts_in_progress) different callers.
+ * have been blocked by (-mounts_in_progress) different callers
+ * (remotely possible if two threads enter zone_shutdown at the same
+ * time).
*/
- mounts_in_progress--;
+ zp->zone_mounts_in_progress--;
retval = 1;
signaled:
- mutex_exit(&mount_lock);
+ mutex_exit(&zp->zone_mount_lock);
return (retval);
}
@@ -489,26 +485,26 @@ signaled:
* Allow them to progress if we were the last obstacle.
*/
static void
-resume_mounts(void)
+resume_mounts(zone_t *zp)
{
- mutex_enter(&mount_lock);
- if (++mounts_in_progress == 0)
- cv_broadcast(&mount_cv);
- mutex_exit(&mount_lock);
+ mutex_enter(&zp->zone_mount_lock);
+ if (++zp->zone_mounts_in_progress == 0)
+ cv_broadcast(&zp->zone_mount_cv);
+ mutex_exit(&zp->zone_mount_lock);
}
/*
- * The VFS layer is busy with a mount; zones should wait until all
- * mounts are completed to progress.
+ * The VFS layer is busy with a mount; this zone should wait until all
+ * of its mounts are completed to progress.
*/
void
-mount_in_progress(void)
+mount_in_progress(zone_t *zp)
{
- mutex_enter(&mount_lock);
- while (mounts_in_progress < 0)
- cv_wait(&mount_cv, &mount_lock);
- mounts_in_progress++;
- mutex_exit(&mount_lock);
+ mutex_enter(&zp->zone_mount_lock);
+ while (zp->zone_mounts_in_progress < 0)
+ cv_wait(&zp->zone_mount_cv, &zp->zone_mount_lock);
+ zp->zone_mounts_in_progress++;
+ mutex_exit(&zp->zone_mount_lock);
}
/*
@@ -516,12 +512,12 @@ mount_in_progress(void)
* callers if this is the last mount.
*/
void
-mount_completed(void)
+mount_completed(zone_t *zp)
{
- mutex_enter(&mount_lock);
- if (--mounts_in_progress == 0)
- cv_broadcast(&mount_cv);
- mutex_exit(&mount_lock);
+ mutex_enter(&zp->zone_mount_lock);
+ if (--zp->zone_mounts_in_progress == 0)
+ cv_broadcast(&zp->zone_mount_cv);
+ mutex_exit(&zp->zone_mount_lock);
}
/*
@@ -4410,7 +4406,7 @@ zone_create(const char *zone_name, const char *zone_root,
return (zone_create_error(error, 0, extended_error));
}
- if (block_mounts() == 0) {
+ if (block_mounts(zone) == 0) {
mutex_enter(&pp->p_lock);
if (curthread != pp->p_agenttp)
continuelwps(pp);
@@ -4561,7 +4557,7 @@ zone_create(const char *zone_name, const char *zone_root,
/*
* The zone is fully visible, so we can let mounts progress.
*/
- resume_mounts();
+ resume_mounts(zone);
if (rctls)
nvlist_free(rctls);
@@ -4577,7 +4573,7 @@ errout:
continuelwps(pp);
mutex_exit(&pp->p_lock);
- resume_mounts();
+ resume_mounts(zone);
if (rctls)
nvlist_free(rctls);
/*
@@ -4732,15 +4728,6 @@ zone_shutdown(zoneid_t zoneid)
if (zoneid < MIN_USERZONEID || zoneid > MAX_ZONEID)
return (set_errno(EINVAL));
- /*
- * Block mounts so that VFS_MOUNT() can get an accurate view of
- * the zone's status with regards to ZONE_IS_SHUTTING down.
- *
- * e.g. NFS can fail the mount if it determines that the zone
- * has already begun the shutdown sequence.
- */
- if (block_mounts() == 0)
- return (set_errno(EINTR));
mutex_enter(&zonehash_lock);
/*
* Look for zone under hash lock to prevent races with other
@@ -4748,9 +4735,30 @@ zone_shutdown(zoneid_t zoneid)
*/
if ((zone = zone_find_all_by_id(zoneid)) == NULL) {
mutex_exit(&zonehash_lock);
- resume_mounts();
return (set_errno(EINVAL));
}
+
+ /*
+ * We have to drop zonehash_lock before calling block_mounts.
+ * Hold the zone so we can continue to use the zone_t.
+ */
+ zone_hold(zone);
+ mutex_exit(&zonehash_lock);
+
+ /*
+ * Block mounts so that VFS_MOUNT() can get an accurate view of
+ * the zone's status with regards to ZONE_IS_SHUTTING down.
+ *
+ * e.g. NFS can fail the mount if it determines that the zone
+ * has already begun the shutdown sequence.
+ *
+ */
+ if (block_mounts(zone) == 0) {
+ zone_rele(zone);
+ return (set_errno(EINTR));
+ }
+
+ mutex_enter(&zonehash_lock);
mutex_enter(&zone_status_lock);
status = zone_status_get(zone);
/*
@@ -4759,7 +4767,8 @@ zone_shutdown(zoneid_t zoneid)
if (status < ZONE_IS_READY) {
mutex_exit(&zone_status_lock);
mutex_exit(&zonehash_lock);
- resume_mounts();
+ resume_mounts(zone);
+ zone_rele(zone);
return (set_errno(EINVAL));
}
/*
@@ -4769,7 +4778,8 @@ zone_shutdown(zoneid_t zoneid)
if (status >= ZONE_IS_DOWN) {
mutex_exit(&zone_status_lock);
mutex_exit(&zonehash_lock);
- resume_mounts();
+ resume_mounts(zone);
+ zone_rele(zone);
return (0);
}
/*
@@ -4804,10 +4814,9 @@ zone_shutdown(zoneid_t zoneid)
}
}
}
- zone_hold(zone); /* so we can use the zone_t later */
mutex_exit(&zone_status_lock);
mutex_exit(&zonehash_lock);
- resume_mounts();
+ resume_mounts(zone);
if (error = zone_empty(zone)) {
zone_rele(zone);
diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h
index 7e567c0cdb..7aab7c8d32 100644
--- a/usr/src/uts/common/sys/zone.h
+++ b/usr/src/uts/common/sys/zone.h
@@ -600,6 +600,14 @@ typedef struct zone {
* DTrace-private per-zone state
*/
int zone_dtrace_getf; /* # of unprivileged getf()s */
+
+ /*
+ * Synchronization primitives used to synchronize between mounts and
+ * zone creation/destruction.
+ */
+ int zone_mounts_in_progress;
+ kcondvar_t zone_mount_cv;
+ kmutex_t zone_mount_lock;
} zone_t;
/*
@@ -819,8 +827,8 @@ extern int zone_dataset_visible(const char *, int *);
extern int zone_kadmin(int, int, const char *, cred_t *);
extern void zone_shutdown_global(void);
-extern void mount_in_progress(void);
-extern void mount_completed(void);
+extern void mount_in_progress(zone_t *);
+extern void mount_completed(zone_t *);
extern int zone_walk(int (*)(zone_t *, void *), void *);