summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/os/netstack.c
diff options
context:
space:
mode:
authornordmark <none@none>2008-01-22 15:57:26 -0800
committernordmark <none@none>2008-01-22 15:57:26 -0800
commitbd41d0a82bd89bc81d63ae5dfc2ba4245f74ea6c (patch)
tree34d74b100f909c973299a5ded0d0a231ac2d069e /usr/src/uts/common/os/netstack.c
parentc63537d6ab9d03a6ce330b36e829aba258c25d87 (diff)
downloadillumos-gate-bd41d0a82bd89bc81d63ae5dfc2ba4245f74ea6c.tar.gz
6558857 ZSD callback locking cause deadlocks
Diffstat (limited to 'usr/src/uts/common/os/netstack.c')
-rw-r--r--usr/src/uts/common/os/netstack.c594
1 files changed, 357 insertions, 237 deletions
diff --git a/usr/src/uts/common/os/netstack.c b/usr/src/uts/common/os/netstack.c
index 44b147dc48..c1e59fe6c3 100644
--- a/usr/src/uts/common/os/netstack.c
+++ b/usr/src/uts/common/os/netstack.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -106,10 +106,6 @@ static void *netstack_zone_create(zoneid_t zoneid);
static void netstack_zone_shutdown(zoneid_t zoneid, void *arg);
static void netstack_zone_destroy(zoneid_t zoneid, void *arg);
-static void netstack_do_create(netstack_t *ns, int moduleid);
-static void netstack_do_shutdown(netstack_t *ns, int moduleid);
-static void netstack_do_destroy(netstack_t *ns, int moduleid);
-
static void netstack_shared_zone_add(zoneid_t zoneid);
static void netstack_shared_zone_remove(zoneid_t zoneid);
static void netstack_shared_kstat_add(kstat_t *ks);
@@ -117,6 +113,16 @@ static void netstack_shared_kstat_remove(kstat_t *ks);
typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);
+static void apply_all_netstacks(int, applyfn_t *);
+static void apply_all_modules(netstack_t *, applyfn_t *);
+static void apply_all_modules_reverse(netstack_t *, applyfn_t *);
+static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
+static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
+static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
+static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
+static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
+ kmutex_t *);
+
void
netstack_init(void)
{
@@ -156,6 +162,10 @@ netstack_register(int moduleid,
ASSERT(moduleid >= 0 && moduleid < NS_MAX);
ASSERT(module_create != NULL);
+ /*
+ * Make instances created after this point in time run the create
+ * callback.
+ */
mutex_enter(&netstack_g_lock);
ASSERT(ns_reg[moduleid].nr_create == NULL);
ASSERT(ns_reg[moduleid].nr_flags == 0);
@@ -166,15 +176,17 @@ netstack_register(int moduleid,
/*
* Determine the set of stacks that exist before we drop the lock.
- * Set CREATE_NEEDED for each of those.
+ * Set NSS_CREATE_NEEDED for each of those.
* netstacks which have been deleted will have NSS_CREATE_COMPLETED
* set, but check NSF_CLOSING to be sure.
*/
for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
+ nm_state_t *nms = &ns->netstack_m_state[moduleid];
+
mutex_enter(&ns->netstack_lock);
if (!(ns->netstack_flags & NSF_CLOSING) &&
- (ns->netstack_m_state[moduleid] & NSS_CREATE_ALL) == 0) {
- ns->netstack_m_state[moduleid] |= NSS_CREATE_NEEDED;
+ (nms->nms_flags & NSS_CREATE_ALL) == 0) {
+ nms->nms_flags |= NSS_CREATE_NEEDED;
DTRACE_PROBE2(netstack__create__needed,
netstack_t *, ns, int, moduleid);
}
@@ -183,12 +195,12 @@ netstack_register(int moduleid,
mutex_exit(&netstack_g_lock);
/*
- * Call the create function for each stack that has CREATE_NEEDED
- * for this moduleid.
- * Set CREATE_INPROGRESS, drop lock, and after done,
- * set CREATE_COMPLETE
+ * At this point in time a new instance can be created or an instance
+ * can be destroyed, or some other module can register or unregister.
+ * Make sure we either run all the create functions for this moduleid
+ * or we wait for any other creators for this moduleid.
*/
- netstack_do_create(NULL, moduleid);
+ apply_all_netstacks(moduleid, netstack_apply_create);
}
void
@@ -204,41 +216,57 @@ netstack_unregister(int moduleid)
mutex_enter(&netstack_g_lock);
/*
* Determine the set of stacks that exist before we drop the lock.
- * Set SHUTDOWN_NEEDED and DESTROY_NEEDED for each of those.
+ * Set NSS_SHUTDOWN_NEEDED and NSS_DESTROY_NEEDED for each of those.
+ * That ensures that when we return all the callbacks for existing
+ * instances have completed. And since we set NRF_DYING no new
+ * instances can use this module.
*/
for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
+ nm_state_t *nms = &ns->netstack_m_state[moduleid];
+
mutex_enter(&ns->netstack_lock);
if (ns_reg[moduleid].nr_shutdown != NULL &&
- (ns->netstack_m_state[moduleid] & NSS_CREATE_COMPLETED) &&
- (ns->netstack_m_state[moduleid] & NSS_SHUTDOWN_ALL) == 0) {
- ns->netstack_m_state[moduleid] |= NSS_SHUTDOWN_NEEDED;
+ (nms->nms_flags & NSS_CREATE_COMPLETED) &&
+ (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
+ nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
DTRACE_PROBE2(netstack__shutdown__needed,
netstack_t *, ns, int, moduleid);
}
if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
ns_reg[moduleid].nr_destroy != NULL &&
- (ns->netstack_m_state[moduleid] & NSS_CREATE_COMPLETED) &&
- (ns->netstack_m_state[moduleid] & NSS_DESTROY_ALL) == 0) {
- ns->netstack_m_state[moduleid] |= NSS_DESTROY_NEEDED;
+ (nms->nms_flags & NSS_CREATE_COMPLETED) &&
+ (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
+ nms->nms_flags |= NSS_DESTROY_NEEDED;
DTRACE_PROBE2(netstack__destroy__needed,
netstack_t *, ns, int, moduleid);
}
mutex_exit(&ns->netstack_lock);
}
+ /*
+ * Prevent any new netstack from calling the registered create
+ * function, while keeping the function pointers in place until the
+ * shutdown and destroy callbacks are complete.
+ */
+ ns_reg[moduleid].nr_flags |= NRF_DYING;
mutex_exit(&netstack_g_lock);
- netstack_do_shutdown(NULL, moduleid);
- netstack_do_destroy(NULL, moduleid);
+ apply_all_netstacks(moduleid, netstack_apply_shutdown);
+ apply_all_netstacks(moduleid, netstack_apply_destroy);
/*
- * Clear the netstack_m_state so that we can handle this module
+ * Clear the nms_flags so that we can handle this module
* being loaded again.
+ * Also remove the registered functions.
*/
mutex_enter(&netstack_g_lock);
+ ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
+ ASSERT(ns_reg[moduleid].nr_flags & NRF_DYING);
for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
+ nm_state_t *nms = &ns->netstack_m_state[moduleid];
+
mutex_enter(&ns->netstack_lock);
- if (ns->netstack_m_state[moduleid] & NSS_DESTROY_COMPLETED) {
- ns->netstack_m_state[moduleid] = 0;
+ if (nms->nms_flags & NSS_DESTROY_COMPLETED) {
+ nms->nms_flags = 0;
DTRACE_PROBE2(netstack__destroy__done,
netstack_t *, ns, int, moduleid);
}
@@ -304,6 +332,7 @@ netstack_zone_create(zoneid_t zoneid)
}
/* Not found */
mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&ns->netstack_cv, NULL, CV_DEFAULT, NULL);
ns->netstack_stackid = zoneid;
ns->netstack_numzones = 1;
ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */
@@ -311,26 +340,44 @@ netstack_zone_create(zoneid_t zoneid)
*nsp = ns;
zone->zone_netstack = ns;
+ mutex_enter(&ns->netstack_lock);
+ /*
+ * Mark this netstack as having a CREATE running so
+ * any netstack_register/netstack_unregister waits for
+ * the existing create callbacks to complete in moduleid order
+ */
+ ns->netstack_flags |= NSF_ZONE_CREATE;
+
/*
* Determine the set of module create functions that need to be
* called before we drop the lock.
+ * Set NSS_CREATE_NEEDED for each of those.
+ * Skip any with NRF_DYING set, since those are in the process of
+ * going away, by checking for flags being exactly NRF_REGISTERED.
*/
for (i = 0; i < NS_MAX; i++) {
- mutex_enter(&ns->netstack_lock);
- if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
- (ns->netstack_m_state[i] & NSS_CREATE_ALL) == 0) {
- ns->netstack_m_state[i] |= NSS_CREATE_NEEDED;
+ nm_state_t *nms = &ns->netstack_m_state[i];
+
+ cv_init(&nms->nms_cv, NULL, CV_DEFAULT, NULL);
+
+ if ((ns_reg[i].nr_flags == NRF_REGISTERED) &&
+ (nms->nms_flags & NSS_CREATE_ALL) == 0) {
+ nms->nms_flags |= NSS_CREATE_NEEDED;
DTRACE_PROBE2(netstack__create__needed,
netstack_t *, ns, int, i);
}
- mutex_exit(&ns->netstack_lock);
}
+ mutex_exit(&ns->netstack_lock);
mutex_exit(&netstack_g_lock);
- netstack_do_create(ns, NS_ALL);
+ apply_all_modules(ns, netstack_apply_create);
+ /* Tell any waiting netstack_register/netstack_unregister to proceed */
mutex_enter(&ns->netstack_lock);
ns->netstack_flags &= ~NSF_UNINIT;
+ ASSERT(ns->netstack_flags & NSF_ZONE_CREATE);
+ ns->netstack_flags &= ~NSF_ZONE_CREATE;
+ cv_broadcast(&ns->netstack_cv);
mutex_exit(&ns->netstack_lock);
return (ns);
@@ -356,29 +403,46 @@ netstack_zone_shutdown(zoneid_t zoneid, void *arg)
mutex_exit(&ns->netstack_lock);
mutex_enter(&netstack_g_lock);
+ mutex_enter(&ns->netstack_lock);
+ /*
+ * Mark this netstack as having a SHUTDOWN running so
+ * any netstack_register/netstack_unregister waits for
+ * the existing create callbacks to complete in moduleid order
+ */
+ ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
+ ns->netstack_flags |= NSF_ZONE_SHUTDOWN;
+
/*
* Determine the set of stacks that exist before we drop the lock.
- * Set SHUTDOWN_NEEDED for each of those.
+ * Set NSS_SHUTDOWN_NEEDED for each of those.
*/
for (i = 0; i < NS_MAX; i++) {
- mutex_enter(&ns->netstack_lock);
+ nm_state_t *nms = &ns->netstack_m_state[i];
+
if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
ns_reg[i].nr_shutdown != NULL &&
- (ns->netstack_m_state[i] & NSS_CREATE_COMPLETED) &&
- (ns->netstack_m_state[i] & NSS_SHUTDOWN_ALL) == 0) {
- ns->netstack_m_state[i] |= NSS_SHUTDOWN_NEEDED;
+ (nms->nms_flags & NSS_CREATE_COMPLETED) &&
+ (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
+ nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
DTRACE_PROBE2(netstack__shutdown__needed,
netstack_t *, ns, int, i);
}
- mutex_exit(&ns->netstack_lock);
}
+ mutex_exit(&ns->netstack_lock);
mutex_exit(&netstack_g_lock);
/*
* Call the shutdown function for all registered modules for this
* netstack.
*/
- netstack_do_shutdown(ns, NS_ALL);
+ apply_all_modules(ns, netstack_apply_shutdown);
+
+ /* Tell any waiting netstack_register/netstack_unregister to proceed */
+ mutex_enter(&ns->netstack_lock);
+ ASSERT(ns->netstack_flags & NSF_ZONE_SHUTDOWN);
+ ns->netstack_flags &= ~NSF_ZONE_SHUTDOWN;
+ cv_broadcast(&ns->netstack_cv);
+ mutex_exit(&ns->netstack_lock);
}
/*
@@ -429,70 +493,183 @@ netstack_stack_inactive(netstack_t *ns)
int i;
mutex_enter(&netstack_g_lock);
+ mutex_enter(&ns->netstack_lock);
+ /*
+ * Mark this netstack as having a DESTROY running so
+ * any netstack_register/netstack_unregister waits for
+ * the existing destroy callbacks to complete in reverse moduleid order
+ */
+ ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
+ ns->netstack_flags |= NSF_ZONE_DESTROY;
/*
* If the shutdown callback wasn't called earlier (e.g., if this is
- * a netstack shared between multiple zones), then we call it now.
+ * a netstack shared between multiple zones), then we schedule it now.
+ *
+ * Determine the set of stacks that exist before we drop the lock.
+ * Set NSS_DESTROY_NEEDED for each of those. That
+ * ensures that when we return all the callbacks for existing
+ * instances have completed.
*/
for (i = 0; i < NS_MAX; i++) {
- mutex_enter(&ns->netstack_lock);
+ nm_state_t *nms = &ns->netstack_m_state[i];
+
if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
ns_reg[i].nr_shutdown != NULL &&
- (ns->netstack_m_state[i] & NSS_CREATE_COMPLETED) &&
- (ns->netstack_m_state[i] & NSS_SHUTDOWN_ALL) == 0) {
- ns->netstack_m_state[i] |= NSS_SHUTDOWN_NEEDED;
+ (nms->nms_flags & NSS_CREATE_COMPLETED) &&
+ (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
+ nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
DTRACE_PROBE2(netstack__shutdown__needed,
netstack_t *, ns, int, i);
}
- mutex_exit(&ns->netstack_lock);
- }
- /*
- * Determine the set of stacks that exist before we drop the lock.
- * Set DESTROY_NEEDED for each of those.
- */
- for (i = 0; i < NS_MAX; i++) {
- mutex_enter(&ns->netstack_lock);
+
if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
ns_reg[i].nr_destroy != NULL &&
- (ns->netstack_m_state[i] & NSS_CREATE_COMPLETED) &&
- (ns->netstack_m_state[i] & NSS_DESTROY_ALL) == 0) {
- ns->netstack_m_state[i] |= NSS_DESTROY_NEEDED;
+ (nms->nms_flags & NSS_CREATE_COMPLETED) &&
+ (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
+ nms->nms_flags |= NSS_DESTROY_NEEDED;
DTRACE_PROBE2(netstack__destroy__needed,
netstack_t *, ns, int, i);
}
- mutex_exit(&ns->netstack_lock);
}
+ mutex_exit(&ns->netstack_lock);
mutex_exit(&netstack_g_lock);
/*
* Call the shutdown and destroy functions for all registered modules
* for this netstack.
+ *
+ * Since there are some ordering dependencies between the modules we
+ * tear them down in the reverse order of what was used to create them.
+ *
+ * Since a netstack_t is never reused (when a zone is rebooted it gets
+ * a new zoneid == netstackid i.e. a new netstack_t is allocated) we
+ * leave nms_flags the way it is i.e. with NSS_DESTROY_COMPLETED set.
+ * That is different than in the netstack_unregister() case.
*/
- netstack_do_shutdown(ns, NS_ALL);
- netstack_do_destroy(ns, NS_ALL);
+ apply_all_modules(ns, netstack_apply_shutdown);
+ apply_all_modules_reverse(ns, netstack_apply_destroy);
+
+ /* Tell any waiting netstack_register/netstack_unregister to proceed */
+ mutex_enter(&ns->netstack_lock);
+ ASSERT(ns->netstack_flags & NSF_ZONE_DESTROY);
+ ns->netstack_flags &= ~NSF_ZONE_DESTROY;
+ cv_broadcast(&ns->netstack_cv);
+ mutex_exit(&ns->netstack_lock);
+}
+
+/*
+ * Apply a function to all netstacks for a particular moduleid.
+ *
+ * If there is any zone activity (due to a zone being created, shutdown,
+ * or destroyed) we wait for that to complete before we proceed. This ensures
+ * that the moduleids are processed in order when a zone is created or
+ * destroyed.
+ *
+ * The applyfn has to drop netstack_g_lock if it does some work.
+ * In that case we don't follow netstack_next,
+ * even if it is possible to do so without any hazards. This is
+ * because we want the design to allow for the list of netstacks threaded
+ * by netstack_next to change in any arbitrary way during the time the
+ * lock was dropped.
+ *
+ * It is safe to restart the loop at netstack_head since the applyfn
+ * changes netstack_m_state as it processes things, so a subsequent
+ * pass through will have no effect in applyfn, hence the loop will terminate
+ * in at worst O(N^2).
+ */
+static void
+apply_all_netstacks(int moduleid, applyfn_t *applyfn)
+{
+ netstack_t *ns;
+
+ mutex_enter(&netstack_g_lock);
+ ns = netstack_head;
+ while (ns != NULL) {
+ if (wait_for_zone_creator(ns, &netstack_g_lock)) {
+ /* Lock dropped - restart at head */
+ ns = netstack_head;
+ } else if ((applyfn)(&netstack_g_lock, ns, moduleid)) {
+ /* Lock dropped - restart at head */
+ ns = netstack_head;
+ } else {
+ ns = ns->netstack_next;
+ }
+ }
+ mutex_exit(&netstack_g_lock);
+}
+
+/*
+ * Apply a function to all moduleids for a particular netstack.
+ *
+ * Since the netstack linkage doesn't matter in this case we can
+ * ignore whether the function drops the lock.
+ */
+static void
+apply_all_modules(netstack_t *ns, applyfn_t *applyfn)
+{
+ int i;
+
+ mutex_enter(&netstack_g_lock);
+ for (i = 0; i < NS_MAX; i++) {
+ /*
+ * We don't care whether the lock was dropped
+ * since we are not iterating over netstack_head.
+ */
+ (void) (applyfn)(&netstack_g_lock, ns, i);
+ }
+ mutex_exit(&netstack_g_lock);
+}
+
+/* Like the above but in reverse moduleid order */
+static void
+apply_all_modules_reverse(netstack_t *ns, applyfn_t *applyfn)
+{
+ int i;
+
+ mutex_enter(&netstack_g_lock);
+ for (i = NS_MAX-1; i >= 0; i--) {
+ /*
+ * We don't care whether the lock was dropped
+ * since we are not iterating over netstack_head.
+ */
+ (void) (applyfn)(&netstack_g_lock, ns, i);
+ }
+ mutex_exit(&netstack_g_lock);
}
/*
* Call the create function for the ns and moduleid if CREATE_NEEDED
* is set.
- * When it calls it, it drops the netstack_lock held by the caller,
- * and returns true to tell the caller it needs to re-evalute the
- * state..
+ * If some other thread gets here first and sets *_INPROGRESS, then
+ * we wait for that thread to complete so that we can ensure that
+ * all the callbacks are done when we've looped over all netstacks/moduleids.
+ *
+ * When we call the create function, we temporarily drop the netstack_lock
+ * held by the caller, and return true to tell the caller it needs to
+ * re-evalute the state.
*/
static boolean_t
netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
{
void *result;
netstackid_t stackid;
+ nm_state_t *nms = &ns->netstack_m_state[moduleid];
+ boolean_t dropped = B_FALSE;
ASSERT(MUTEX_HELD(lockp));
mutex_enter(&ns->netstack_lock);
- if (ns->netstack_m_state[moduleid] & NSS_CREATE_NEEDED) {
- ns->netstack_m_state[moduleid] &= ~NSS_CREATE_NEEDED;
- ns->netstack_m_state[moduleid] |= NSS_CREATE_INPROGRESS;
+
+ if (wait_for_nms_inprogress(ns, nms, lockp))
+ dropped = B_TRUE;
+
+ if (nms->nms_flags & NSS_CREATE_NEEDED) {
+ nms->nms_flags &= ~NSS_CREATE_NEEDED;
+ nms->nms_flags |= NSS_CREATE_INPROGRESS;
DTRACE_PROBE2(netstack__create__inprogress,
netstack_t *, ns, int, moduleid);
mutex_exit(&ns->netstack_lock);
mutex_exit(lockp);
+ dropped = B_TRUE;
ASSERT(ns_reg[moduleid].nr_create != NULL);
stackid = ns->netstack_stackid;
@@ -504,42 +681,55 @@ netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
void *, result, netstack_t *, ns);
ASSERT(result != NULL);
+ mutex_enter(lockp);
mutex_enter(&ns->netstack_lock);
ns->netstack_modules[moduleid] = result;
- ns->netstack_m_state[moduleid] &= ~NSS_CREATE_INPROGRESS;
- ns->netstack_m_state[moduleid] |= NSS_CREATE_COMPLETED;
+ nms->nms_flags &= ~NSS_CREATE_INPROGRESS;
+ nms->nms_flags |= NSS_CREATE_COMPLETED;
+ cv_broadcast(&nms->nms_cv);
DTRACE_PROBE2(netstack__create__completed,
netstack_t *, ns, int, moduleid);
mutex_exit(&ns->netstack_lock);
- return (B_TRUE);
+ return (dropped);
} else {
mutex_exit(&ns->netstack_lock);
- return (B_FALSE);
+ return (dropped);
}
}
/*
* Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
* is set.
- * When it calls it, it drops the netstack_lock held by the caller,
- * and returns true to tell the caller it needs to re-evalute the
- * state..
+ * If some other thread gets here first and sets *_INPROGRESS, then
+ * we wait for that thread to complete so that we can ensure that
+ * all the callbacks are done when we've looped over all netstacks/moduleids.
+ *
+ * When we call the shutdown function, we temporarily drop the netstack_lock
+ * held by the caller, and return true to tell the caller it needs to
+ * re-evalute the state.
*/
static boolean_t
netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
{
netstackid_t stackid;
void * netstack_module;
+ nm_state_t *nms = &ns->netstack_m_state[moduleid];
+ boolean_t dropped = B_FALSE;
ASSERT(MUTEX_HELD(lockp));
mutex_enter(&ns->netstack_lock);
- if (ns->netstack_m_state[moduleid] & NSS_SHUTDOWN_NEEDED) {
- ns->netstack_m_state[moduleid] &= ~NSS_SHUTDOWN_NEEDED;
- ns->netstack_m_state[moduleid] |= NSS_SHUTDOWN_INPROGRESS;
+
+ if (wait_for_nms_inprogress(ns, nms, lockp))
+ dropped = B_TRUE;
+
+ if (nms->nms_flags & NSS_SHUTDOWN_NEEDED) {
+ nms->nms_flags &= ~NSS_SHUTDOWN_NEEDED;
+ nms->nms_flags |= NSS_SHUTDOWN_INPROGRESS;
DTRACE_PROBE2(netstack__shutdown__inprogress,
netstack_t *, ns, int, moduleid);
mutex_exit(&ns->netstack_lock);
mutex_exit(lockp);
+ dropped = B_TRUE;
ASSERT(ns_reg[moduleid].nr_shutdown != NULL);
stackid = ns->netstack_stackid;
@@ -551,43 +741,55 @@ netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
DTRACE_PROBE1(netstack__shutdown__end,
netstack_t *, ns);
+ mutex_enter(lockp);
mutex_enter(&ns->netstack_lock);
- ns->netstack_m_state[moduleid] &= ~NSS_SHUTDOWN_INPROGRESS;
- ns->netstack_m_state[moduleid] |= NSS_SHUTDOWN_COMPLETED;
+ nms->nms_flags &= ~NSS_SHUTDOWN_INPROGRESS;
+ nms->nms_flags |= NSS_SHUTDOWN_COMPLETED;
+ cv_broadcast(&nms->nms_cv);
DTRACE_PROBE2(netstack__shutdown__completed,
netstack_t *, ns, int, moduleid);
mutex_exit(&ns->netstack_lock);
- return (B_TRUE);
+ return (dropped);
} else {
mutex_exit(&ns->netstack_lock);
- return (B_FALSE);
+ return (dropped);
}
}
/*
* Call the destroy function for the ns and moduleid if DESTROY_NEEDED
* is set.
- * When it calls it, it drops the netstack_lock held by the caller,
- * and returns true to tell the caller it needs to re-evalute the
- * state..
+ * If some other thread gets here first and sets *_INPROGRESS, then
+ * we wait for that thread to complete so that we can ensure that
+ * all the callbacks are done when we've looped over all netstacks/moduleids.
+ *
+ * When we call the destroy function, we temporarily drop the netstack_lock
+ * held by the caller, and return true to tell the caller it needs to
+ * re-evalute the state.
*/
static boolean_t
netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
{
netstackid_t stackid;
void * netstack_module;
+ nm_state_t *nms = &ns->netstack_m_state[moduleid];
+ boolean_t dropped = B_FALSE;
ASSERT(MUTEX_HELD(lockp));
mutex_enter(&ns->netstack_lock);
- if (ns->netstack_m_state[moduleid] & NSS_DESTROY_NEEDED) {
- ns->netstack_m_state[moduleid] &= ~NSS_DESTROY_NEEDED;
- ns->netstack_m_state[moduleid] |= NSS_DESTROY_INPROGRESS;
+
+ if (wait_for_nms_inprogress(ns, nms, lockp))
+ dropped = B_TRUE;
+
+ if (nms->nms_flags & NSS_DESTROY_NEEDED) {
+ nms->nms_flags &= ~NSS_DESTROY_NEEDED;
+ nms->nms_flags |= NSS_DESTROY_INPROGRESS;
DTRACE_PROBE2(netstack__destroy__inprogress,
netstack_t *, ns, int, moduleid);
mutex_exit(&ns->netstack_lock);
mutex_exit(lockp);
+ dropped = B_TRUE;
- /* XXX race against unregister? */
ASSERT(ns_reg[moduleid].nr_destroy != NULL);
stackid = ns->netstack_stackid;
netstack_module = ns->netstack_modules[moduleid];
@@ -598,177 +800,83 @@ netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
DTRACE_PROBE1(netstack__destroy__end,
netstack_t *, ns);
+ mutex_enter(lockp);
mutex_enter(&ns->netstack_lock);
ns->netstack_modules[moduleid] = NULL;
- ns->netstack_m_state[moduleid] &= ~NSS_DESTROY_INPROGRESS;
- ns->netstack_m_state[moduleid] |= NSS_DESTROY_COMPLETED;
+ nms->nms_flags &= ~NSS_DESTROY_INPROGRESS;
+ nms->nms_flags |= NSS_DESTROY_COMPLETED;
+ cv_broadcast(&nms->nms_cv);
DTRACE_PROBE2(netstack__destroy__completed,
netstack_t *, ns, int, moduleid);
mutex_exit(&ns->netstack_lock);
- return (B_TRUE);
+ return (dropped);
} else {
mutex_exit(&ns->netstack_lock);
- return (B_FALSE);
+ return (dropped);
}
}
/*
- * Apply a function to all netstacks for a particular moduleid.
- *
- * The applyfn has to drop netstack_g_lock if it does some work.
- * In that case we don't follow netstack_next after reacquiring the
- * lock, even if it is possible to do so without any hazards. This is
- * because we want the design to allow for the list of netstacks threaded
- * by netstack_next to change in any arbitrary way during the time the
- * lock was dropped.
- *
- * It is safe to restart the loop at netstack_head since the applyfn
- * changes netstack_m_state as it processes things, so a subsequent
- * pass through will have no effect in applyfn, hence the loop will terminate
- * in at worst O(N^2).
+ * If somebody is creating the netstack (due to a new zone being created)
+ * then we wait for them to complete. This ensures that any additional
+ * netstack_register() doesn't cause the create functions to run out of
+ * order.
+ * Note that we do not need such a global wait in the case of the shutdown
+ * and destroy callbacks, since in that case it is sufficient for both
+ * threads to set NEEDED and wait for INPROGRESS to ensure ordering.
+ * Returns true if lockp was temporarily dropped while waiting.
*/
-static void
-apply_all_netstacks(int moduleid, applyfn_t *applyfn)
+static boolean_t
+wait_for_zone_creator(netstack_t *ns, kmutex_t *lockp)
{
- netstack_t *ns;
+ boolean_t dropped = B_FALSE;
- mutex_enter(&netstack_g_lock);
- ns = netstack_head;
- while (ns != NULL) {
- if ((applyfn)(&netstack_g_lock, ns, moduleid)) {
- /* Lock dropped - restart at head */
-#ifdef NS_DEBUG
- (void) printf("apply_all_netstacks: "
- "LD for %p/%d, %d\n",
- (void *)ns, ns->netstack_stackid, moduleid);
-#endif
- mutex_enter(&netstack_g_lock);
- ns = netstack_head;
- } else {
- ns = ns->netstack_next;
+ mutex_enter(&ns->netstack_lock);
+ while (ns->netstack_flags & NSF_ZONE_CREATE) {
+ DTRACE_PROBE1(netstack__wait__zone__inprogress,
+ netstack_t *, ns);
+ if (lockp != NULL) {
+ dropped = B_TRUE;
+ mutex_exit(lockp);
+ }
+ cv_wait(&ns->netstack_cv, &ns->netstack_lock);
+ if (lockp != NULL) {
+ /* First drop netstack_lock to preserve order */
+ mutex_exit(&ns->netstack_lock);
+ mutex_enter(lockp);
+ mutex_enter(&ns->netstack_lock);
}
}
- mutex_exit(&netstack_g_lock);
+ mutex_exit(&ns->netstack_lock);
+ return (dropped);
}
/*
- * Apply a function to all moduleids for a particular netstack.
- *
- * Since the netstack linkage doesn't matter in this case we can
- * ignore whether the function drops the lock.
+ * Wait for any INPROGRESS flag to be cleared for the netstack/moduleid
+ * combination.
+ * Returns true if lockp was temporarily dropped while waiting.
*/
-static void
-apply_all_modules(netstack_t *ns, applyfn_t *applyfn)
+static boolean_t
+wait_for_nms_inprogress(netstack_t *ns, nm_state_t *nms, kmutex_t *lockp)
{
- int i;
-
- mutex_enter(&netstack_g_lock);
- for (i = 0; i < NS_MAX; i++) {
- if ((applyfn)(&netstack_g_lock, ns, i)) {
- /*
- * Lock dropped but since we are not iterating over
- * netstack_head we can just reacquire the lock.
- */
- mutex_enter(&netstack_g_lock);
+ boolean_t dropped = B_FALSE;
+
+ while (nms->nms_flags & NSS_ALL_INPROGRESS) {
+ DTRACE_PROBE2(netstack__wait__nms__inprogress,
+ netstack_t *, ns, nm_state_t *, nms);
+ if (lockp != NULL) {
+ dropped = B_TRUE;
+ mutex_exit(lockp);
}
- }
- mutex_exit(&netstack_g_lock);
-}
-
-/* Like the above but in reverse moduleid order */
-static void
-apply_all_modules_reverse(netstack_t *ns, applyfn_t *applyfn)
-{
- int i;
-
- mutex_enter(&netstack_g_lock);
- for (i = NS_MAX-1; i >= 0; i--) {
- if ((applyfn)(&netstack_g_lock, ns, i)) {
- /*
- * Lock dropped but since we are not iterating over
- * netstack_head we can just reacquire the lock.
- */
- mutex_enter(&netstack_g_lock);
+ cv_wait(&nms->nms_cv, &ns->netstack_lock);
+ if (lockp != NULL) {
+ /* First drop netstack_lock to preserve order */
+ mutex_exit(&ns->netstack_lock);
+ mutex_enter(lockp);
+ mutex_enter(&ns->netstack_lock);
}
}
- mutex_exit(&netstack_g_lock);
-}
-
-/*
- * Apply a function to a subset of all module/netstack combinations.
- *
- * If ns is non-NULL we restrict it to that particular instance.
- * If moduleid is a particular one (not NS_ALL), then we restrict it
- * to that particular moduleid.
- * When walking the moduleid, the reverse argument specifies that they
- * should be walked in reverse order.
- * The applyfn returns true if it had dropped the locks.
- */
-static void
-netstack_do_apply(netstack_t *ns, int moduleid, boolean_t reverse,
- applyfn_t *applyfn)
-{
- if (ns != NULL) {
- ASSERT(moduleid == NS_ALL);
- if (reverse)
- apply_all_modules_reverse(ns, applyfn);
- else
- apply_all_modules(ns, applyfn);
- } else {
- ASSERT(moduleid != NS_ALL);
-
- apply_all_netstacks(moduleid, applyfn);
- }
-}
-
-/*
- * Run the create function for all modules x stack combinations
- * that have NSS_CREATE_NEEDED set.
- *
- * Call the create function for each stack that has CREATE_NEEDED.
- * Set CREATE_INPROGRESS, drop lock, and after done,
- * set CREATE_COMPLETE
- */
-static void
-netstack_do_create(netstack_t *ns, int moduleid)
-{
- netstack_do_apply(ns, moduleid, B_FALSE, netstack_apply_create);
-}
-
-/*
- * Run the shutdown function for all modules x stack combinations
- * that have NSS_SHUTDOWN_NEEDED set.
- *
- * Call the shutdown function for each stack that has SHUTDOWN_NEEDED.
- * Set SHUTDOWN_INPROGRESS, drop lock, and after done,
- * set SHUTDOWN_COMPLETE
- */
-static void
-netstack_do_shutdown(netstack_t *ns, int moduleid)
-{
- netstack_do_apply(ns, moduleid, B_FALSE, netstack_apply_shutdown);
-}
-
-/*
- * Run the destroy function for all modules x stack combinations
- * that have NSS_DESTROY_NEEDED set.
- *
- * Call the destroy function for each stack that has DESTROY_NEEDED.
- * Set DESTROY_INPROGRESS, drop lock, and after done,
- * set DESTROY_COMPLETE
- *
- * Since a netstack_t is never reused (when a zone is rebooted it gets
- * a new zoneid == netstackid i.e. a new netstack_t is allocated) we leave
- * netstack_m_state the way it is i.e. with NSS_DESTROY_COMPLETED set.
- */
-static void
-netstack_do_destroy(netstack_t *ns, int moduleid)
-{
- /*
- * Have to walk the moduleids in reverse order since some
- * modules make implicit assumptions about the order
- */
- netstack_do_apply(ns, moduleid, B_TRUE, netstack_apply_destroy);
+ return (dropped);
}
/*
@@ -845,7 +953,10 @@ netstack_find_by_zoneid(zoneid_t zoneid)
}
/*
- * Find a stack instance given the zoneid.
+ * Find a stack instance given the zoneid. Can only be called from
+ * the create callback. See the comments in zone_find_by_id_nolock why
+ * that limitation exists.
+ *
* Increases the reference count if found; caller must do a
* netstack_rele().
*
@@ -853,8 +964,6 @@ netstack_find_by_zoneid(zoneid_t zoneid)
* matches.
*
* Skip the unitialized ones.
- *
- * NOTE: The caller must hold zonehash_lock.
*/
netstack_t *
netstack_find_by_zoneid_nolock(zoneid_t zoneid)
@@ -875,7 +984,7 @@ netstack_find_by_zoneid_nolock(zoneid_t zoneid)
else
netstack_hold(ns);
- zone_rele(zone);
+ /* zone_find_by_id_nolock does not have a hold on the zone */
return (ns);
}
@@ -913,6 +1022,7 @@ netstack_rele(netstack_t *ns)
netstack_t **nsp;
boolean_t found;
int refcnt, numzones;
+ int i;
mutex_enter(&ns->netstack_lock);
ASSERT(ns->netstack_refcnt > 0);
@@ -959,6 +1069,14 @@ netstack_rele(netstack_t *ns)
ASSERT(ns->netstack_numzones == 0);
ASSERT(ns->netstack_flags & NSF_CLOSING);
+
+ for (i = 0; i < NS_MAX; i++) {
+ nm_state_t *nms = &ns->netstack_m_state[i];
+
+ cv_destroy(&nms->nms_cv);
+ }
+ mutex_destroy(&ns->netstack_lock);
+ cv_destroy(&ns->netstack_cv);
kmem_free(ns, sizeof (*ns));
}
}
@@ -996,7 +1114,7 @@ kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
zoneid_t zoneid = ks_netstackid;
return (kstat_create_zone(ks_module, ks_instance, ks_name,
- ks_class, ks_type, ks_ndata, ks_flags, zoneid));
+ ks_class, ks_type, ks_ndata, ks_flags, zoneid));
}
}
@@ -1144,7 +1262,9 @@ netstack_find_shared_zoneid(zoneid_t zoneid)
/*
* Hide the fact that zoneids and netstackids are allocated from
* the same space in the current implementation.
- * XXX could add checks that the stackid/zoneids are valid...
+ * We currently do not check that the stackid/zoneids are valid, since there
+ * is no need for that. But this should only be done for ids that are
+ * valid.
*/
zoneid_t
netstackid_to_zoneid(netstackid_t stackid)