summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/os/zone.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/os/zone.c')
-rw-r--r--usr/src/uts/common/os/zone.c825
1 files changed, 629 insertions, 196 deletions
diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c
index 033bc96ea3..75354330ef 100644
--- a/usr/src/uts/common/os/zone.c
+++ b/usr/src/uts/common/os/zone.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -61,6 +61,10 @@
* initialized zone is added to the list of active zones on the system but
* isn't accessible.
*
+ * ZONE_IS_INITIALIZED: Initialization complete except the ZSD callbacks are
+ * not yet completed. Not possible to enter the zone, but attributes can
+ * be retrieved.
+ *
* ZONE_IS_READY: zsched (the kernel dummy process for a zone) is
* ready. The zone is made visible after the ZSD constructor callbacks are
* executed. A zone remains in this state until it transitions into
@@ -228,6 +232,7 @@
#include <sys/door.h>
#include <sys/cpuvar.h>
+#include <sys/sdt.h>
#include <sys/uadmin.h>
#include <sys/session.h>
@@ -313,6 +318,7 @@ evchan_t *zone_event_chan;
*/
const char *zone_status_table[] = {
ZONE_EVENT_UNINITIALIZED, /* uninitialized */
+ ZONE_EVENT_INITIALIZED, /* initialized */
ZONE_EVENT_READY, /* ready */
ZONE_EVENT_READY, /* booting */
ZONE_EVENT_RUNNING, /* running */
@@ -351,6 +357,19 @@ static int zone_remove_datalink(zoneid_t, char *);
static int zone_check_datalink(zoneid_t *, char *);
static int zone_list_datalink(zoneid_t, int *, char *);
+typedef boolean_t zsd_applyfn_t(kmutex_t *, boolean_t, zone_t *, zone_key_t);
+
+static void zsd_apply_all_zones(zsd_applyfn_t *, zone_key_t);
+static void zsd_apply_all_keys(zsd_applyfn_t *, zone_t *);
+static boolean_t zsd_apply_create(kmutex_t *, boolean_t, zone_t *, zone_key_t);
+static boolean_t zsd_apply_shutdown(kmutex_t *, boolean_t, zone_t *,
+ zone_key_t);
+static boolean_t zsd_apply_destroy(kmutex_t *, boolean_t, zone_t *, zone_key_t);
+static boolean_t zsd_wait_for_creator(zone_t *, struct zsd_entry *,
+ kmutex_t *);
+static boolean_t zsd_wait_for_inprogress(zone_t *, struct zsd_entry *,
+ kmutex_t *);
+
/*
* Bump this number when you alter the zone syscall interfaces; this is
* because we need to have support for previous API versions in libc
@@ -485,78 +504,54 @@ mount_completed(void)
* The locking strategy and overall picture is as follows:
*
* When someone calls zone_key_create(), a template ZSD entry is added to the
- * global list "zsd_registered_keys", protected by zsd_key_lock. The
- * constructor callback is called immediately on all existing zones, and a
- * copy of the ZSD entry added to the per-zone zone_zsd list (protected by
- * zone_lock). As this operation requires the list of zones, the list of
- * registered keys, and the per-zone list of ZSD entries to remain constant
- * throughout the entire operation, it must grab zonehash_lock, zone_lock for
- * all existing zones, and zsd_key_lock, in that order. Similar locking is
- * needed when zone_key_delete() is called. It is thus sufficient to hold
- * zsd_key_lock *or* zone_lock to prevent additions to or removals from the
- * per-zone zone_zsd list.
+ * global list "zsd_registered_keys", protected by zsd_key_lock. While
+ * holding that lock all the existing zones are marked as
+ * ZSD_CREATE_NEEDED and a copy of the ZSD entry added to the per-zone
+ * zone_zsd list (protected by zone_lock). The global list is updated first
+ * (under zone_key_lock) to make sure that newly created zones use the
+ * most recent list of keys. Then under zonehash_lock we walk the zones
+ * and mark them. Similar locking is used in zone_key_delete().
*
- * Note that this implementation does not make a copy of the ZSD entry if a
- * constructor callback is not provided. A zone_getspecific() on such an
- * uninitialized ZSD entry will return NULL.
+ * The actual create, shutdown, and destroy callbacks are done without
+ * holding any lock. And zsd_flags are used to ensure that the operations
+ * completed so that when zone_key_create (and zone_create) is done, as well as
+ * zone_key_delete (and zone_destroy) is done, all the necessary callbacks
+ * are completed.
*
* When new zones are created constructor callbacks for all registered ZSD
- * entries will be called.
+ * entries will be called. That also uses the above two phases of marking
+ * what needs to be done, and then running the callbacks without holding
+ * any locks.
*
* The framework does not provide any locking around zone_getspecific() and
* zone_setspecific() apart from that needed for internal consistency, so
* callers interested in atomic "test-and-set" semantics will need to provide
* their own locking.
*/
-void
-zone_key_create(zone_key_t *keyp, void *(*create)(zoneid_t),
- void (*shutdown)(zoneid_t, void *), void (*destroy)(zoneid_t, void *))
-{
- struct zsd_entry *zsdp;
- struct zsd_entry *t;
- struct zone *zone;
- zsdp = kmem_alloc(sizeof (*zsdp), KM_SLEEP);
- zsdp->zsd_data = NULL;
- zsdp->zsd_create = create;
- zsdp->zsd_shutdown = shutdown;
- zsdp->zsd_destroy = destroy;
-
- mutex_enter(&zonehash_lock); /* stop the world */
- for (zone = list_head(&zone_active); zone != NULL;
- zone = list_next(&zone_active, zone))
- mutex_enter(&zone->zone_lock); /* lock all zones */
-
- mutex_enter(&zsd_key_lock);
- *keyp = zsdp->zsd_key = ++zsd_keyval;
- ASSERT(zsd_keyval != 0);
- list_insert_tail(&zsd_registered_keys, zsdp);
- mutex_exit(&zsd_key_lock);
+/*
+ * Helper function to find the zsd_entry associated with the key in the
+ * given list.
+ */
+static struct zsd_entry *
+zsd_find(list_t *l, zone_key_t key)
+{
+ struct zsd_entry *zsd;
- if (create != NULL) {
- for (zone = list_head(&zone_active); zone != NULL;
- zone = list_next(&zone_active, zone)) {
- t = kmem_alloc(sizeof (*t), KM_SLEEP);
- t->zsd_key = *keyp;
- t->zsd_data = (*create)(zone->zone_id);
- t->zsd_create = create;
- t->zsd_shutdown = shutdown;
- t->zsd_destroy = destroy;
- list_insert_tail(&zone->zone_zsd, t);
+ for (zsd = list_head(l); zsd != NULL; zsd = list_next(l, zsd)) {
+ if (zsd->zsd_key == key) {
+ return (zsd);
}
}
- for (zone = list_head(&zone_active); zone != NULL;
- zone = list_next(&zone_active, zone))
- mutex_exit(&zone->zone_lock);
- mutex_exit(&zonehash_lock);
+ return (NULL);
}
/*
* Helper function to find the zsd_entry associated with the key in the
- * given list.
+ * given list. Move it to the front of the list.
*/
static struct zsd_entry *
-zsd_find(list_t *l, zone_key_t key)
+zsd_find_mru(list_t *l, zone_key_t key)
{
struct zsd_entry *zsd;
@@ -575,9 +570,88 @@ zsd_find(list_t *l, zone_key_t key)
return (NULL);
}
+void
+zone_key_create(zone_key_t *keyp, void *(*create)(zoneid_t),
+ void (*shutdown)(zoneid_t, void *), void (*destroy)(zoneid_t, void *))
+{
+ struct zsd_entry *zsdp;
+ struct zsd_entry *t;
+ struct zone *zone;
+ zone_key_t key;
+
+ zsdp = kmem_zalloc(sizeof (*zsdp), KM_SLEEP);
+ zsdp->zsd_data = NULL;
+ zsdp->zsd_create = create;
+ zsdp->zsd_shutdown = shutdown;
+ zsdp->zsd_destroy = destroy;
+
+ /*
+ * Insert in global list of callbacks. Makes future zone creations
+ * see it.
+ */
+ mutex_enter(&zsd_key_lock);
+ *keyp = key = zsdp->zsd_key = ++zsd_keyval;
+ ASSERT(zsd_keyval != 0);
+ list_insert_tail(&zsd_registered_keys, zsdp);
+ mutex_exit(&zsd_key_lock);
+
+ /*
+ * Insert for all existing zones and mark them as needing
+ * a create callback.
+ */
+ mutex_enter(&zonehash_lock); /* stop the world */
+ for (zone = list_head(&zone_active); zone != NULL;
+ zone = list_next(&zone_active, zone)) {
+ zone_status_t status;
+
+ mutex_enter(&zone->zone_lock);
+
+ /* Skip zones that are on the way down or not yet up */
+ status = zone_status_get(zone);
+ if (status >= ZONE_IS_DOWN ||
+ status == ZONE_IS_UNINITIALIZED) {
+ mutex_exit(&zone->zone_lock);
+ continue;
+ }
+
+ t = zsd_find_mru(&zone->zone_zsd, key);
+ if (t != NULL) {
+ /*
+ * A zsd_configure already inserted it after
+ * we dropped zsd_key_lock above.
+ */
+ mutex_exit(&zone->zone_lock);
+ continue;
+ }
+ t = kmem_zalloc(sizeof (*t), KM_SLEEP);
+ t->zsd_key = key;
+ t->zsd_create = create;
+ t->zsd_shutdown = shutdown;
+ t->zsd_destroy = destroy;
+ if (create != NULL) {
+ t->zsd_flags = ZSD_CREATE_NEEDED;
+ DTRACE_PROBE2(zsd__create__needed,
+ zone_t *, zone, zone_key_t, key);
+ }
+ list_insert_tail(&zone->zone_zsd, t);
+ mutex_exit(&zone->zone_lock);
+ }
+ mutex_exit(&zonehash_lock);
+
+ if (create != NULL) {
+ /* Now call the create callback for this key */
+ zsd_apply_all_zones(zsd_apply_create, key);
+ }
+}
+
/*
* Function called when a module is being unloaded, or otherwise wishes
* to unregister its ZSD key and callbacks.
+ *
+ * Remove from the global list and determine the functions that need to
+ * be called under a global lock. Then call the functions without
+ * holding any locks. Finally free up the zone_zsd entries. (The apply
+ * functions need to access the zone_zsd entries to find zsd_data etc.)
*/
int
zone_key_delete(zone_key_t key)
@@ -585,65 +659,88 @@ zone_key_delete(zone_key_t key)
struct zsd_entry *zsdp = NULL;
zone_t *zone;
- mutex_enter(&zonehash_lock); /* Zone create/delete waits for us */
- for (zone = list_head(&zone_active); zone != NULL;
- zone = list_next(&zone_active, zone))
- mutex_enter(&zone->zone_lock); /* lock all zones */
-
mutex_enter(&zsd_key_lock);
- zsdp = zsd_find(&zsd_registered_keys, key);
- if (zsdp == NULL)
- goto notfound;
+ zsdp = zsd_find_mru(&zsd_registered_keys, key);
+ if (zsdp == NULL) {
+ mutex_exit(&zsd_key_lock);
+ return (-1);
+ }
list_remove(&zsd_registered_keys, zsdp);
mutex_exit(&zsd_key_lock);
+ mutex_enter(&zonehash_lock);
for (zone = list_head(&zone_active); zone != NULL;
zone = list_next(&zone_active, zone)) {
struct zsd_entry *del;
- void *data;
-
- if (!(zone->zone_flags & ZF_DESTROYED)) {
- del = zsd_find(&zone->zone_zsd, key);
- if (del != NULL) {
- data = del->zsd_data;
- ASSERT(del->zsd_shutdown == zsdp->zsd_shutdown);
- ASSERT(del->zsd_destroy == zsdp->zsd_destroy);
- list_remove(&zone->zone_zsd, del);
- kmem_free(del, sizeof (*del));
- } else {
- data = NULL;
- }
- if (zsdp->zsd_shutdown)
- zsdp->zsd_shutdown(zone->zone_id, data);
- if (zsdp->zsd_destroy)
- zsdp->zsd_destroy(zone->zone_id, data);
+
+ mutex_enter(&zone->zone_lock);
+ del = zsd_find_mru(&zone->zone_zsd, key);
+ if (del == NULL) {
+ /*
+ * Somebody else got here first e.g the zone going
+ * away.
+ */
+ mutex_exit(&zone->zone_lock);
+ continue;
+ }
+ ASSERT(del->zsd_shutdown == zsdp->zsd_shutdown);
+ ASSERT(del->zsd_destroy == zsdp->zsd_destroy);
+ if (del->zsd_shutdown != NULL &&
+ (del->zsd_flags & ZSD_SHUTDOWN_ALL) == 0) {
+ del->zsd_flags |= ZSD_SHUTDOWN_NEEDED;
+ DTRACE_PROBE2(zsd__shutdown__needed,
+ zone_t *, zone, zone_key_t, key);
+ }
+ if (del->zsd_destroy != NULL &&
+ (del->zsd_flags & ZSD_DESTROY_ALL) == 0) {
+ del->zsd_flags |= ZSD_DESTROY_NEEDED;
+ DTRACE_PROBE2(zsd__destroy__needed,
+ zone_t *, zone, zone_key_t, key);
}
mutex_exit(&zone->zone_lock);
}
mutex_exit(&zonehash_lock);
kmem_free(zsdp, sizeof (*zsdp));
- return (0);
-notfound:
- mutex_exit(&zsd_key_lock);
+ /* Now call the shutdown and destroy callback for this key */
+ zsd_apply_all_zones(zsd_apply_shutdown, key);
+ zsd_apply_all_zones(zsd_apply_destroy, key);
+
+ /* Now we can free up the zsdp structures in each zone */
+ mutex_enter(&zonehash_lock);
for (zone = list_head(&zone_active); zone != NULL;
- zone = list_next(&zone_active, zone))
+ zone = list_next(&zone_active, zone)) {
+ struct zsd_entry *del;
+
+ mutex_enter(&zone->zone_lock);
+ del = zsd_find(&zone->zone_zsd, key);
+ if (del != NULL) {
+ list_remove(&zone->zone_zsd, del);
+ ASSERT(!(del->zsd_flags & ZSD_ALL_INPROGRESS));
+ kmem_free(del, sizeof (*del));
+ }
mutex_exit(&zone->zone_lock);
+ }
mutex_exit(&zonehash_lock);
- return (-1);
+
+ return (0);
}
/*
* ZSD counterpart of pthread_setspecific().
+ *
+ * Since all zsd callbacks, including those with no create function,
+ * have an entry in zone_zsd, if the key is registered it is part of
+ * the zone_zsd list.
+ * Return an error if the key wasn't registerd.
*/
int
zone_setspecific(zone_key_t key, zone_t *zone, const void *data)
{
struct zsd_entry *t;
- struct zsd_entry *zsdp = NULL;
mutex_enter(&zone->zone_lock);
- t = zsd_find(&zone->zone_zsd, key);
+ t = zsd_find_mru(&zone->zone_zsd, key);
if (t != NULL) {
/*
* Replace old value with new
@@ -652,36 +749,8 @@ zone_setspecific(zone_key_t key, zone_t *zone, const void *data)
mutex_exit(&zone->zone_lock);
return (0);
}
- /*
- * If there was no previous value, go through the list of registered
- * keys.
- *
- * We avoid grabbing zsd_key_lock until we are sure we need it; this is
- * necessary for shutdown callbacks to be able to execute without fear
- * of deadlock.
- */
- mutex_enter(&zsd_key_lock);
- zsdp = zsd_find(&zsd_registered_keys, key);
- if (zsdp == NULL) { /* Key was not registered */
- mutex_exit(&zsd_key_lock);
- mutex_exit(&zone->zone_lock);
- return (-1);
- }
-
- /*
- * Add a zsd_entry to this zone, using the template we just retrieved
- * to initialize the constructor and destructor(s).
- */
- t = kmem_alloc(sizeof (*t), KM_SLEEP);
- t->zsd_key = key;
- t->zsd_data = (void *)data;
- t->zsd_create = zsdp->zsd_create;
- t->zsd_shutdown = zsdp->zsd_shutdown;
- t->zsd_destroy = zsdp->zsd_destroy;
- list_insert_tail(&zone->zone_zsd, t);
- mutex_exit(&zsd_key_lock);
mutex_exit(&zone->zone_lock);
- return (0);
+ return (-1);
}
/*
@@ -694,7 +763,7 @@ zone_getspecific(zone_key_t key, zone_t *zone)
void *data;
mutex_enter(&zone->zone_lock);
- t = zsd_find(&zone->zone_zsd, key);
+ t = zsd_find_mru(&zone->zone_zsd, key);
data = (t == NULL ? NULL : t->zsd_data);
mutex_exit(&zone->zone_lock);
return (data);
@@ -703,42 +772,41 @@ zone_getspecific(zone_key_t key, zone_t *zone)
/*
* Function used to initialize a zone's list of ZSD callbacks and data
* when the zone is being created. The callbacks are initialized from
- * the template list (zsd_registered_keys), and the constructor
- * callback executed (if one exists).
- *
- * This is called before the zone is made publicly available, hence no
- * need to grab zone_lock.
- *
- * Although we grab and release zsd_key_lock, new entries cannot be
- * added to or removed from the zsd_registered_keys list until we
- * release zonehash_lock, so there isn't a window for a
- * zone_key_create() to come in after we've dropped zsd_key_lock but
- * before the zone is added to the zone list, such that the constructor
- * callbacks aren't executed for the new zone.
+ * the template list (zsd_registered_keys). The constructor callback is
+ * executed later (once the zone exists and with locks dropped).
*/
static void
zone_zsd_configure(zone_t *zone)
{
struct zsd_entry *zsdp;
struct zsd_entry *t;
- zoneid_t zoneid = zone->zone_id;
ASSERT(MUTEX_HELD(&zonehash_lock));
ASSERT(list_head(&zone->zone_zsd) == NULL);
+ mutex_enter(&zone->zone_lock);
mutex_enter(&zsd_key_lock);
for (zsdp = list_head(&zsd_registered_keys); zsdp != NULL;
zsdp = list_next(&zsd_registered_keys, zsdp)) {
+ /*
+ * Since this zone is ZONE_IS_UNCONFIGURED, zone_key_create
+ * should not have added anything to it.
+ */
+ ASSERT(zsd_find(&zone->zone_zsd, zsdp->zsd_key) == NULL);
+
+ t = kmem_zalloc(sizeof (*t), KM_SLEEP);
+ t->zsd_key = zsdp->zsd_key;
+ t->zsd_create = zsdp->zsd_create;
+ t->zsd_shutdown = zsdp->zsd_shutdown;
+ t->zsd_destroy = zsdp->zsd_destroy;
if (zsdp->zsd_create != NULL) {
- t = kmem_alloc(sizeof (*t), KM_SLEEP);
- t->zsd_key = zsdp->zsd_key;
- t->zsd_create = zsdp->zsd_create;
- t->zsd_data = (*t->zsd_create)(zoneid);
- t->zsd_shutdown = zsdp->zsd_shutdown;
- t->zsd_destroy = zsdp->zsd_destroy;
- list_insert_tail(&zone->zone_zsd, t);
+ t->zsd_flags = ZSD_CREATE_NEEDED;
+ DTRACE_PROBE2(zsd__create__needed,
+ zone_t *, zone, zone_key_t, zsdp->zsd_key);
}
+ list_insert_tail(&zone->zone_zsd, t);
}
mutex_exit(&zsd_key_lock);
+ mutex_exit(&zone->zone_lock);
}
enum zsd_callback_type { ZSD_CREATE, ZSD_SHUTDOWN, ZSD_DESTROY };
@@ -749,70 +817,47 @@ enum zsd_callback_type { ZSD_CREATE, ZSD_SHUTDOWN, ZSD_DESTROY };
static void
zone_zsd_callbacks(zone_t *zone, enum zsd_callback_type ct)
{
- struct zsd_entry *zsdp;
struct zsd_entry *t;
- zoneid_t zoneid = zone->zone_id;
ASSERT(ct == ZSD_SHUTDOWN || ct == ZSD_DESTROY);
ASSERT(ct != ZSD_SHUTDOWN || zone_status_get(zone) >= ZONE_IS_EMPTY);
ASSERT(ct != ZSD_DESTROY || zone_status_get(zone) >= ZONE_IS_DOWN);
- mutex_enter(&zone->zone_lock);
- if (ct == ZSD_DESTROY) {
- if (zone->zone_flags & ZF_DESTROYED) {
- /*
- * Make sure destructors are only called once.
- */
- mutex_exit(&zone->zone_lock);
- return;
- }
- zone->zone_flags |= ZF_DESTROYED;
- }
- mutex_exit(&zone->zone_lock);
-
/*
- * Both zsd_key_lock and zone_lock need to be held in order to add or
- * remove a ZSD key, (either globally as part of
- * zone_key_create()/zone_key_delete(), or on a per-zone basis, as is
- * possible through zone_setspecific()), so it's sufficient to hold
- * zsd_key_lock here.
- *
- * This is a good thing, since we don't want to recursively try to grab
- * zone_lock if a callback attempts to do something like a crfree() or
- * zone_rele().
+ * Run the callback solely based on what is registered for the zone
+ * in zone_zsd. The global list can change independently of this
+ * as keys are registered and unregistered and we don't register new
+ * callbacks for a zone that is in the process of going away.
*/
- mutex_enter(&zsd_key_lock);
- for (zsdp = list_head(&zsd_registered_keys); zsdp != NULL;
- zsdp = list_next(&zsd_registered_keys, zsdp)) {
- zone_key_t key = zsdp->zsd_key;
+ mutex_enter(&zone->zone_lock);
+ for (t = list_head(&zone->zone_zsd); t != NULL;
+ t = list_next(&zone->zone_zsd, t)) {
+ zone_key_t key = t->zsd_key;
/* Skip if no callbacks registered */
- if (ct == ZSD_SHUTDOWN && zsdp->zsd_shutdown == NULL)
- continue;
- if (ct == ZSD_DESTROY && zsdp->zsd_destroy == NULL)
- continue;
- /*
- * Call the callback with the zone-specific data if we can find
- * any, otherwise with NULL.
- */
- t = zsd_find(&zone->zone_zsd, key);
- if (t != NULL) {
- if (ct == ZSD_SHUTDOWN) {
- t->zsd_shutdown(zoneid, t->zsd_data);
- } else {
- ASSERT(ct == ZSD_DESTROY);
- t->zsd_destroy(zoneid, t->zsd_data);
+
+ if (ct == ZSD_SHUTDOWN) {
+ if (t->zsd_shutdown != NULL &&
+ (t->zsd_flags & ZSD_SHUTDOWN_ALL) == 0) {
+ t->zsd_flags |= ZSD_SHUTDOWN_NEEDED;
+ DTRACE_PROBE2(zsd__shutdown__needed,
+ zone_t *, zone, zone_key_t, key);
}
} else {
- if (ct == ZSD_SHUTDOWN) {
- zsdp->zsd_shutdown(zoneid, NULL);
- } else {
- ASSERT(ct == ZSD_DESTROY);
- zsdp->zsd_destroy(zoneid, NULL);
+ if (t->zsd_destroy != NULL &&
+ (t->zsd_flags & ZSD_DESTROY_ALL) == 0) {
+ t->zsd_flags |= ZSD_DESTROY_NEEDED;
+ DTRACE_PROBE2(zsd__destroy__needed,
+ zone_t *, zone, zone_key_t, key);
}
}
}
- mutex_exit(&zsd_key_lock);
+ mutex_exit(&zone->zone_lock);
+
+ /* Now call the shutdown and destroy callback for this key */
+ zsd_apply_all_keys(zsd_apply_shutdown, zone);
+ zsd_apply_all_keys(zsd_apply_destroy, zone);
+
}
/*
@@ -827,12 +872,379 @@ zone_free_zsd(zone_t *zone)
/*
* Free all the zsd_entry's we had on this zone.
*/
+ mutex_enter(&zone->zone_lock);
for (t = list_head(&zone->zone_zsd); t != NULL; t = next) {
next = list_next(&zone->zone_zsd, t);
list_remove(&zone->zone_zsd, t);
+ ASSERT(!(t->zsd_flags & ZSD_ALL_INPROGRESS));
kmem_free(t, sizeof (*t));
}
list_destroy(&zone->zone_zsd);
+ mutex_exit(&zone->zone_lock);
+
+}
+
+/*
+ * Apply a function to all zones for particular key value.
+ *
+ * The applyfn has to drop zonehash_lock if it does some work, and
+ * then reacquire it before it returns.
+ * When the lock is dropped we don't follow list_next even
+ * if it is possible to do so without any hazards. This is
+ * because we want the design to allow for the list of zones
+ * to change in any arbitrary way during the time the
+ * lock was dropped.
+ *
+ * It is safe to restart the loop at list_head since the applyfn
+ * changes the zsd_flags as it does work, so a subsequent
+ * pass through will have no effect in applyfn, hence the loop will terminate
+ * in at worst O(N^2).
+ */
+static void
+zsd_apply_all_zones(zsd_applyfn_t *applyfn, zone_key_t key)
+{
+ zone_t *zone;
+
+ mutex_enter(&zonehash_lock);
+ zone = list_head(&zone_active);
+ while (zone != NULL) {
+ if ((applyfn)(&zonehash_lock, B_FALSE, zone, key)) {
+ /* Lock dropped - restart at head */
+ zone = list_head(&zone_active);
+ } else {
+ zone = list_next(&zone_active, zone);
+ }
+ }
+ mutex_exit(&zonehash_lock);
+}
+
+/*
+ * Apply a function to all keys for a particular zone.
+ *
+ * The applyfn has to drop zonehash_lock if it does some work, and
+ * then reacquire it before it returns.
+ * When the lock is dropped we don't follow list_next even
+ * if it is possible to do so without any hazards. This is
+ * because we want the design to allow for the list of zsd callbacks
+ * to change in any arbitrary way during the time the
+ * lock was dropped.
+ *
+ * It is safe to restart the loop at list_head since the applyfn
+ * changes the zsd_flags as it does work, so a subsequent
+ * pass through will have no effect in applyfn, hence the loop will terminate
+ * in at worst O(N^2).
+ */
+static void
+zsd_apply_all_keys(zsd_applyfn_t *applyfn, zone_t *zone)
+{
+ struct zsd_entry *t;
+
+ mutex_enter(&zone->zone_lock);
+ t = list_head(&zone->zone_zsd);
+ while (t != NULL) {
+ if ((applyfn)(NULL, B_TRUE, zone, t->zsd_key)) {
+ /* Lock dropped - restart at head */
+ t = list_head(&zone->zone_zsd);
+ } else {
+ t = list_next(&zone->zone_zsd, t);
+ }
+ }
+ mutex_exit(&zone->zone_lock);
+}
+
+/*
+ * Call the create function for the zone and key if CREATE_NEEDED
+ * is set.
+ * If some other thread gets here first and sets CREATE_INPROGRESS, then
+ * we wait for that thread to complete so that we can ensure that
+ * all the callbacks are done when we've looped over all zones/keys.
+ *
+ * When we call the create function, we drop the global held by the
+ * caller, and return true to tell the caller it needs to re-evalute the
+ * state.
+ * If the caller holds zone_lock then zone_lock_held is set, and zone_lock
+ * remains held on exit.
+ */
+static boolean_t
+zsd_apply_create(kmutex_t *lockp, boolean_t zone_lock_held,
+ zone_t *zone, zone_key_t key)
+{
+ void *result;
+ struct zsd_entry *t;
+ boolean_t dropped;
+
+ if (lockp != NULL) {
+ ASSERT(MUTEX_HELD(lockp));
+ }
+ if (zone_lock_held) {
+ ASSERT(MUTEX_HELD(&zone->zone_lock));
+ } else {
+ mutex_enter(&zone->zone_lock);
+ }
+
+ t = zsd_find(&zone->zone_zsd, key);
+ if (t == NULL) {
+ /*
+ * Somebody else got here first e.g the zone going
+ * away.
+ */
+ if (!zone_lock_held)
+ mutex_exit(&zone->zone_lock);
+ return (B_FALSE);
+ }
+ dropped = B_FALSE;
+ if (zsd_wait_for_inprogress(zone, t, lockp))
+ dropped = B_TRUE;
+
+ if (t->zsd_flags & ZSD_CREATE_NEEDED) {
+ t->zsd_flags &= ~ZSD_CREATE_NEEDED;
+ t->zsd_flags |= ZSD_CREATE_INPROGRESS;
+ DTRACE_PROBE2(zsd__create__inprogress,
+ zone_t *, zone, zone_key_t, key);
+ mutex_exit(&zone->zone_lock);
+ if (lockp != NULL)
+ mutex_exit(lockp);
+
+ dropped = B_TRUE;
+ ASSERT(t->zsd_create != NULL);
+ DTRACE_PROBE2(zsd__create__start,
+ zone_t *, zone, zone_key_t, key);
+
+ result = (*t->zsd_create)(zone->zone_id);
+
+ DTRACE_PROBE2(zsd__create__end,
+ zone_t *, zone, voidn *, result);
+
+ ASSERT(result != NULL);
+ if (lockp != NULL)
+ mutex_enter(lockp);
+ mutex_enter(&zone->zone_lock);
+ t->zsd_data = result;
+ t->zsd_flags &= ~ZSD_CREATE_INPROGRESS;
+ t->zsd_flags |= ZSD_CREATE_COMPLETED;
+ cv_broadcast(&t->zsd_cv);
+ DTRACE_PROBE2(zsd__create__completed,
+ zone_t *, zone, zone_key_t, key);
+ }
+ if (!zone_lock_held)
+ mutex_exit(&zone->zone_lock);
+ return (dropped);
+}
+
+/*
+ * Call the shutdown function for the zone and key if SHUTDOWN_NEEDED
+ * is set.
+ * If some other thread gets here first and sets *_INPROGRESS, then
+ * we wait for that thread to complete so that we can ensure that
+ * all the callbacks are done when we've looped over all zones/keys.
+ *
+ * When we call the shutdown function, we drop the global held by the
+ * caller, and return true to tell the caller it needs to re-evalute the
+ * state.
+ * If the caller holds zone_lock then zone_lock_held is set, and zone_lock
+ * remains held on exit.
+ */
+static boolean_t
+zsd_apply_shutdown(kmutex_t *lockp, boolean_t zone_lock_held,
+ zone_t *zone, zone_key_t key)
+{
+ struct zsd_entry *t;
+ void *data;
+ boolean_t dropped;
+
+ if (lockp != NULL) {
+ ASSERT(MUTEX_HELD(lockp));
+ }
+ if (zone_lock_held) {
+ ASSERT(MUTEX_HELD(&zone->zone_lock));
+ } else {
+ mutex_enter(&zone->zone_lock);
+ }
+
+ t = zsd_find(&zone->zone_zsd, key);
+ if (t == NULL) {
+ /*
+ * Somebody else got here first e.g the zone going
+ * away.
+ */
+ if (!zone_lock_held)
+ mutex_exit(&zone->zone_lock);
+ return (B_FALSE);
+ }
+ dropped = B_FALSE;
+ if (zsd_wait_for_creator(zone, t, lockp))
+ dropped = B_TRUE;
+
+ if (zsd_wait_for_inprogress(zone, t, lockp))
+ dropped = B_TRUE;
+
+ if (t->zsd_flags & ZSD_SHUTDOWN_NEEDED) {
+ t->zsd_flags &= ~ZSD_SHUTDOWN_NEEDED;
+ t->zsd_flags |= ZSD_SHUTDOWN_INPROGRESS;
+ DTRACE_PROBE2(zsd__shutdown__inprogress,
+ zone_t *, zone, zone_key_t, key);
+ mutex_exit(&zone->zone_lock);
+ if (lockp != NULL)
+ mutex_exit(lockp);
+ dropped = B_TRUE;
+
+ ASSERT(t->zsd_shutdown != NULL);
+ data = t->zsd_data;
+
+ DTRACE_PROBE2(zsd__shutdown__start,
+ zone_t *, zone, zone_key_t, key);
+
+ (t->zsd_shutdown)(zone->zone_id, data);
+ DTRACE_PROBE2(zsd__shutdown__end,
+ zone_t *, zone, zone_key_t, key);
+
+ if (lockp != NULL)
+ mutex_enter(lockp);
+ mutex_enter(&zone->zone_lock);
+ t->zsd_flags &= ~ZSD_SHUTDOWN_INPROGRESS;
+ t->zsd_flags |= ZSD_SHUTDOWN_COMPLETED;
+ cv_broadcast(&t->zsd_cv);
+ DTRACE_PROBE2(zsd__shutdown__completed,
+ zone_t *, zone, zone_key_t, key);
+ }
+ if (!zone_lock_held)
+ mutex_exit(&zone->zone_lock);
+ return (dropped);
+}
+
+/*
+ * Call the destroy function for the zone and key if DESTROY_NEEDED
+ * is set.
+ * If some other thread gets here first and sets *_INPROGRESS, then
+ * we wait for that thread to complete so that we can ensure that
+ * all the callbacks are done when we've looped over all zones/keys.
+ *
+ * When we call the destroy function, we drop the global held by the
+ * caller, and return true to tell the caller it needs to re-evalute the
+ * state.
+ * If the caller holds zone_lock then zone_lock_held is set, and zone_lock
+ * remains held on exit.
+ */
+static boolean_t
+zsd_apply_destroy(kmutex_t *lockp, boolean_t zone_lock_held,
+ zone_t *zone, zone_key_t key)
+{
+ struct zsd_entry *t;
+ void *data;
+ boolean_t dropped;
+
+ if (lockp != NULL) {
+ ASSERT(MUTEX_HELD(lockp));
+ }
+ if (zone_lock_held) {
+ ASSERT(MUTEX_HELD(&zone->zone_lock));
+ } else {
+ mutex_enter(&zone->zone_lock);
+ }
+
+ t = zsd_find(&zone->zone_zsd, key);
+ if (t == NULL) {
+ /*
+ * Somebody else got here first e.g the zone going
+ * away.
+ */
+ if (!zone_lock_held)
+ mutex_exit(&zone->zone_lock);
+ return (B_FALSE);
+ }
+ dropped = B_FALSE;
+ if (zsd_wait_for_creator(zone, t, lockp))
+ dropped = B_TRUE;
+
+ if (zsd_wait_for_inprogress(zone, t, lockp))
+ dropped = B_TRUE;
+
+ if (t->zsd_flags & ZSD_DESTROY_NEEDED) {
+ t->zsd_flags &= ~ZSD_DESTROY_NEEDED;
+ t->zsd_flags |= ZSD_DESTROY_INPROGRESS;
+ DTRACE_PROBE2(zsd__destroy__inprogress,
+ zone_t *, zone, zone_key_t, key);
+ mutex_exit(&zone->zone_lock);
+ if (lockp != NULL)
+ mutex_exit(lockp);
+ dropped = B_TRUE;
+
+ ASSERT(t->zsd_destroy != NULL);
+ data = t->zsd_data;
+ DTRACE_PROBE2(zsd__destroy__start,
+ zone_t *, zone, zone_key_t, key);
+
+ (t->zsd_destroy)(zone->zone_id, data);
+ DTRACE_PROBE2(zsd__destroy__end,
+ zone_t *, zone, zone_key_t, key);
+
+ if (lockp != NULL)
+ mutex_enter(lockp);
+ mutex_enter(&zone->zone_lock);
+ t->zsd_data = NULL;
+ t->zsd_flags &= ~ZSD_DESTROY_INPROGRESS;
+ t->zsd_flags |= ZSD_DESTROY_COMPLETED;
+ cv_broadcast(&t->zsd_cv);
+ DTRACE_PROBE2(zsd__destroy__completed,
+ zone_t *, zone, zone_key_t, key);
+ }
+ if (!zone_lock_held)
+ mutex_exit(&zone->zone_lock);
+ return (dropped);
+}
+
+/*
+ * Wait for any CREATE_NEEDED flag to be cleared.
+ * Returns true if lockp was temporarily dropped while waiting.
+ */
+static boolean_t
+zsd_wait_for_creator(zone_t *zone, struct zsd_entry *t, kmutex_t *lockp)
+{
+ boolean_t dropped = B_FALSE;
+
+ while (t->zsd_flags & ZSD_CREATE_NEEDED) {
+ DTRACE_PROBE2(zsd__wait__for__creator,
+ zone_t *, zone, struct zsd_entry *, t);
+ if (lockp != NULL) {
+ dropped = B_TRUE;
+ mutex_exit(lockp);
+ }
+ cv_wait(&t->zsd_cv, &zone->zone_lock);
+ if (lockp != NULL) {
+ /* First drop zone_lock to preserve order */
+ mutex_exit(&zone->zone_lock);
+ mutex_enter(lockp);
+ mutex_enter(&zone->zone_lock);
+ }
+ }
+ return (dropped);
+}
+
+/*
+ * Wait for any INPROGRESS flag to be cleared.
+ * Returns true if lockp was temporarily dropped while waiting.
+ */
+static boolean_t
+zsd_wait_for_inprogress(zone_t *zone, struct zsd_entry *t, kmutex_t *lockp)
+{
+ boolean_t dropped = B_FALSE;
+
+ while (t->zsd_flags & ZSD_ALL_INPROGRESS) {
+ DTRACE_PROBE2(zsd__wait__for__inprogress,
+ zone_t *, zone, struct zsd_entry *, t);
+ if (lockp != NULL) {
+ dropped = B_TRUE;
+ mutex_exit(lockp);
+ }
+ cv_wait(&t->zsd_cv, &zone->zone_lock);
+ if (lockp != NULL) {
+ /* First drop zone_lock to preserve order */
+ mutex_exit(&zone->zone_lock);
+ mutex_enter(lockp);
+ mutex_enter(&zone->zone_lock);
+ }
+ }
+ return (dropped);
}
/*
@@ -2960,10 +3372,15 @@ zsched(void *arg)
/*
* Tell the world that we're done setting up.
*
- * At this point we want to set the zone status to ZONE_IS_READY
+ * At this point we want to set the zone status to ZONE_IS_INITIALIZED
* and atomically set the zone's processor set visibility. Once
* we drop pool_lock() this zone will automatically get updated
* to reflect any future changes to the pools configuration.
+ *
+ * Note that after we drop the locks below (zonehash_lock in
+ * particular) other operations such as a zone_getattr call can
+ * now proceed and observe the zone. That is the reason for doing a
+ * state transition to the INITIALIZED state.
*/
pool_lock();
mutex_enter(&cpu_lock);
@@ -2974,12 +3391,21 @@ zsched(void *arg)
zone_pset_set(zone, pool_default->pool_pset->pset_id);
mutex_enter(&zone_status_lock);
ASSERT(zone_status_get(zone) == ZONE_IS_UNINITIALIZED);
- zone_status_set(zone, ZONE_IS_READY);
+ zone_status_set(zone, ZONE_IS_INITIALIZED);
mutex_exit(&zone_status_lock);
mutex_exit(&zonehash_lock);
mutex_exit(&cpu_lock);
pool_unlock();
+ /* Now call the create callback for this key */
+ zsd_apply_all_keys(zsd_apply_create, zone);
+
+ /* The callbacks are complete. Mark ZONE_IS_READY */
+ mutex_enter(&zone_status_lock);
+ ASSERT(zone_status_get(zone) == ZONE_IS_INITIALIZED);
+ zone_status_set(zone, ZONE_IS_READY);
+ mutex_exit(&zone_status_lock);
+
/*
* Once we see the zone transition to the ZONE_IS_BOOTING state,
* we launch init, and set the state to running.
@@ -4071,7 +4497,7 @@ zone_getattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
return (set_errno(EINVAL));
}
zone_status = zone_status_get(zone);
- if (zone_status < ZONE_IS_READY) {
+ if (zone_status < ZONE_IS_INITIALIZED) {
mutex_exit(&zonehash_lock);
return (set_errno(EINVAL));
}
@@ -5698,21 +6124,28 @@ zone_list_datalink(zoneid_t zoneid, int *nump, char *buf)
/*
* Public interface for looking up a zone by zoneid. It's a customized version
- * for netstack_zone_create(), it:
- * 1. Doesn't acquire the zonehash_lock, since it is called from
- * zone_key_create() or zone_zsd_configure(), lock already held.
- * 2. Doesn't check the status of the zone.
- * 3. It will be called even before zone_init is called, in that case the
+ * for netstack_zone_create(). It can only be called from the zsd create
+ * callbacks, since it doesn't have reference on the zone structure hence if
+ * it is called elsewhere the zone could disappear after the zonehash_lock
+ * is dropped.
+ *
+ * Furthermore it
+ * 1. Doesn't check the status of the zone.
+ * 2. It will be called even before zone_init is called, in that case the
* address of zone0 is returned directly, and netstack_zone_create()
* will only assign a value to zone0.zone_netstack, won't break anything.
+ * 3. Returns without the zone being held.
*/
zone_t *
zone_find_by_id_nolock(zoneid_t zoneid)
{
- ASSERT(MUTEX_HELD(&zonehash_lock));
+ zone_t *zone;
+ mutex_enter(&zonehash_lock);
if (zonehashbyid == NULL)
- return (&zone0);
+ zone = &zone0;
else
- return (zone_find_all_by_id(zoneid));
+ zone = zone_find_all_by_id(zoneid);
+ mutex_exit(&zonehash_lock);
+ return (zone);
}