summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/os/netstack.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/os/netstack.c')
-rw-r--r--usr/src/uts/common/os/netstack.c1217
1 files changed, 1217 insertions, 0 deletions
diff --git a/usr/src/uts/common/os/netstack.c b/usr/src/uts/common/os/netstack.c
new file mode 100644
index 0000000000..60ee49f8ed
--- /dev/null
+++ b/usr/src/uts/common/os/netstack.c
@@ -0,0 +1,1217 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/vm.h>
+#include <sys/proc.h>
+#include <sys/tuneable.h>
+#include <sys/systm.h>
+#include <sys/cmn_err.h>
+#include <sys/debug.h>
+#include <sys/sdt.h>
+#include <sys/mutex.h>
+#include <sys/bitmap.h>
+#include <sys/atomic.h>
+#include <sys/kobj.h>
+#include <sys/disp.h>
+#include <vm/seg_kmem.h>
+#include <sys/zone.h>
+#include <sys/netstack.h>
+
+/*
+ * What we use so that the zones framework can tell us about new zones,
+ * which we use to create new stacks.
+ */
+static zone_key_t netstack_zone_key;
+
+static int netstack_initialized = 0;
+
+/*
+ * Track the registered netstacks.
+ * The global lock protects
+ * - ns_reg
+ * - the list starting at netstack_head and following the netstack_next
+ * pointers.
+ */
+static kmutex_t netstack_g_lock;
+
+/*
+ * Registry of netstacks with their create/shutdown/destory functions.
+ */
+static struct netstack_registry ns_reg[NS_MAX];
+
+/*
+ * Global list of existing stacks. We use this when a new zone with
+ * an exclusive IP instance is created.
+ *
+ * Note that in some cases a netstack_t needs to stay around after the zone
+ * has gone away. This is because there might be outstanding references
+ * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data
+ * structure and all the foo_stack_t's hanging off of it will be cleaned up
+ * when the last reference to it is dropped.
+ * However, the same zone might be rebooted. That is handled using the
+ * assumption that the zones framework picks a new zoneid each time a zone
+ * is (re)booted. We assert for that condition in netstack_zone_create().
+ * Thus the old netstack_t can take its time for things to time out.
+ */
+static netstack_t *netstack_head;
+
+/*
+ * To support kstat_create_netstack() using kstat_zone_add we need
+ * to track both
+ * - all zoneids that use the global/shared stack
+ * - all kstats that have been added for the shared stack
+ */
+struct shared_zone_list {
+ struct shared_zone_list *sz_next;
+ zoneid_t sz_zoneid;
+};
+
+struct shared_kstat_list {
+ struct shared_kstat_list *sk_next;
+ kstat_t *sk_kstat;
+};
+
+static kmutex_t netstack_shared_lock; /* protects the following two */
+static struct shared_zone_list *netstack_shared_zones;
+static struct shared_kstat_list *netstack_shared_kstats;
+
+static void *netstack_zone_create(zoneid_t zoneid);
+static void netstack_zone_shutdown(zoneid_t zoneid, void *arg);
+static void netstack_zone_destroy(zoneid_t zoneid, void *arg);
+
+static void netstack_do_create(void);
+static void netstack_do_shutdown(void);
+static void netstack_do_destroy(void);
+
+static void netstack_shared_zone_add(zoneid_t zoneid);
+static void netstack_shared_zone_remove(zoneid_t zoneid);
+static void netstack_shared_kstat_add(kstat_t *ks);
+static void netstack_shared_kstat_remove(kstat_t *ks);
+
+
+void
+netstack_init(void)
+{
+ mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ netstack_initialized = 1;
+
+ /*
+ * We want to be informed each time a zone is created or
+ * destroyed in the kernel, so we can maintain the
+ * stack instance information.
+ */
+ zone_key_create(&netstack_zone_key, netstack_zone_create,
+ netstack_zone_shutdown, netstack_zone_destroy);
+}
+
+/*
+ * Register a new module with the framework.
+ * This registers interest in changes to the set of netstacks.
+ * The createfn and destroyfn are required, but the shutdownfn can be
+ * NULL.
+ * Note that due to the current zsd implementation, when the create
+ * function is called the zone isn't fully present, thus functions
+ * like zone_find_by_* will fail, hence the create function can not
+ * use many zones kernel functions including zcmn_err().
+ */
+void
+netstack_register(int moduleid,
+ void *(*module_create)(netstackid_t, netstack_t *),
+ void (*module_shutdown)(netstackid_t, void *),
+ void (*module_destroy)(netstackid_t, void *))
+{
+ netstack_t *ns;
+
+ ASSERT(netstack_initialized);
+ ASSERT(moduleid >= 0 && moduleid < NS_MAX);
+ ASSERT(module_create != NULL);
+
+ mutex_enter(&netstack_g_lock);
+ ASSERT(ns_reg[moduleid].nr_create == NULL);
+ ASSERT(ns_reg[moduleid].nr_flags == 0);
+ ns_reg[moduleid].nr_create = module_create;
+ ns_reg[moduleid].nr_shutdown = module_shutdown;
+ ns_reg[moduleid].nr_destroy = module_destroy;
+ ns_reg[moduleid].nr_flags = NRF_REGISTERED;
+
+ /*
+ * Determine the set of stacks that exist before we drop the lock.
+ * Set CREATE_NEEDED for each of those.
+ * netstacks which have been deleted will have NSS_CREATE_COMPLETED
+ * set, but check NSF_CLOSING to be sure.
+ */
+ for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
+ mutex_enter(&ns->netstack_lock);
+ if (!(ns->netstack_flags & NSF_CLOSING) &&
+ (ns->netstack_m_state[moduleid] & NSS_CREATE_ALL) == 0) {
+ ns->netstack_m_state[moduleid] |= NSS_CREATE_NEEDED;
+ DTRACE_PROBE2(netstack__create__needed,
+ netstack_t *, ns, int, moduleid);
+ }
+ mutex_exit(&ns->netstack_lock);
+ }
+ mutex_exit(&netstack_g_lock);
+
+ /*
+ * Call the create function for each stack that has CREATE_NEEDED.
+ * Set CREATE_INPROGRESS, drop lock, and after done,
+ * set CREATE_COMPLETE
+ */
+ netstack_do_create();
+}
+
+void
+netstack_unregister(int moduleid)
+{
+ netstack_t *ns;
+
+ ASSERT(moduleid >= 0 && moduleid < NS_MAX);
+
+ ASSERT(ns_reg[moduleid].nr_create != NULL);
+ ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
+
+ mutex_enter(&netstack_g_lock);
+ /*
+ * Determine the set of stacks that exist before we drop the lock.
+ * Set SHUTDOWN_NEEDED and DESTROY_NEEDED for each of those.
+ */
+ for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
+ mutex_enter(&ns->netstack_lock);
+ if (ns_reg[moduleid].nr_shutdown != NULL &&
+ (ns->netstack_m_state[moduleid] & NSS_CREATE_COMPLETED) &&
+ (ns->netstack_m_state[moduleid] & NSS_SHUTDOWN_ALL) == 0) {
+ ns->netstack_m_state[moduleid] |= NSS_SHUTDOWN_NEEDED;
+ DTRACE_PROBE2(netstack__shutdown__needed,
+ netstack_t *, ns, int, moduleid);
+ }
+ if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
+ ns_reg[moduleid].nr_destroy != NULL &&
+ (ns->netstack_m_state[moduleid] & NSS_CREATE_COMPLETED) &&
+ (ns->netstack_m_state[moduleid] & NSS_DESTROY_ALL) == 0) {
+ ns->netstack_m_state[moduleid] |= NSS_DESTROY_NEEDED;
+ DTRACE_PROBE2(netstack__destroy__needed,
+ netstack_t *, ns, int, moduleid);
+ }
+ mutex_exit(&ns->netstack_lock);
+ }
+ mutex_exit(&netstack_g_lock);
+
+ netstack_do_shutdown();
+ netstack_do_destroy();
+
+ /*
+ * Clear the netstack_m_state so that we can handle this module
+ * being loaded again.
+ */
+ mutex_enter(&netstack_g_lock);
+ for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
+ mutex_enter(&ns->netstack_lock);
+ if (ns->netstack_m_state[moduleid] & NSS_DESTROY_COMPLETED) {
+ ns->netstack_m_state[moduleid] = 0;
+ DTRACE_PROBE2(netstack__destroy__done,
+ netstack_t *, ns, int, moduleid);
+ }
+ mutex_exit(&ns->netstack_lock);
+ }
+
+ ns_reg[moduleid].nr_create = NULL;
+ ns_reg[moduleid].nr_shutdown = NULL;
+ ns_reg[moduleid].nr_destroy = NULL;
+ ns_reg[moduleid].nr_flags = 0;
+ mutex_exit(&netstack_g_lock);
+}
+
+/*
+ * Lookup and/or allocate a netstack for this zone.
+ */
+static void *
+netstack_zone_create(zoneid_t zoneid)
+{
+ netstackid_t stackid;
+ netstack_t *ns;
+ netstack_t **nsp;
+ zone_t *zone;
+ int i;
+
+ ASSERT(netstack_initialized);
+
+ zone = zone_find_by_id_nolock(zoneid);
+ ASSERT(zone != NULL);
+
+ if (zone->zone_flags & ZF_NET_EXCL) {
+ stackid = zoneid;
+ } else {
+ /* Look for the stack instance for the global */
+ stackid = GLOBAL_NETSTACKID;
+ }
+
+ /* Allocate even if it isn't needed; simplifies locking */
+ ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP);
+
+ /* Look if there is a matching stack instance */
+ mutex_enter(&netstack_g_lock);
+ for (nsp = &netstack_head; *nsp != NULL;
+ nsp = &((*nsp)->netstack_next)) {
+ if ((*nsp)->netstack_stackid == stackid) {
+ /*
+ * Should never find a pre-existing exclusive stack
+ */
+ ASSERT(stackid == GLOBAL_NETSTACKID);
+ kmem_free(ns, sizeof (netstack_t));
+ ns = *nsp;
+ mutex_enter(&ns->netstack_lock);
+ ns->netstack_numzones++;
+ mutex_exit(&ns->netstack_lock);
+ mutex_exit(&netstack_g_lock);
+ DTRACE_PROBE1(netstack__inc__numzones,
+ netstack_t *, ns);
+ /* Record that we have a new shared stack zone */
+ netstack_shared_zone_add(zoneid);
+ zone->zone_netstack = ns;
+ return (ns);
+ }
+ }
+ /* Not found */
+ mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL);
+ ns->netstack_stackid = zoneid;
+ ns->netstack_numzones = 1;
+ ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */
+ ns->netstack_flags = NSF_UNINIT;
+ *nsp = ns;
+ zone->zone_netstack = ns;
+
+ /*
+ * Determine the set of module create functions that need to be
+ * called before we drop the lock.
+ */
+ for (i = 0; i < NS_MAX; i++) {
+ mutex_enter(&ns->netstack_lock);
+ if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
+ (ns->netstack_m_state[i] & NSS_CREATE_ALL) == 0) {
+ ns->netstack_m_state[i] |= NSS_CREATE_NEEDED;
+ DTRACE_PROBE2(netstack__create__needed,
+ netstack_t *, ns, int, i);
+ }
+ mutex_exit(&ns->netstack_lock);
+ }
+ mutex_exit(&netstack_g_lock);
+
+ netstack_do_create();
+
+ mutex_enter(&ns->netstack_lock);
+ ns->netstack_flags &= ~NSF_UNINIT;
+ mutex_exit(&ns->netstack_lock);
+
+ return (ns);
+}
+
+/* ARGSUSED */
+static void
+netstack_zone_shutdown(zoneid_t zoneid, void *arg)
+{
+ netstack_t *ns = (netstack_t *)arg;
+ int i;
+
+ ASSERT(arg != NULL);
+
+ mutex_enter(&ns->netstack_lock);
+ ASSERT(ns->netstack_numzones > 0);
+ if (ns->netstack_numzones != 1) {
+ /* Stack instance being used by other zone */
+ mutex_exit(&ns->netstack_lock);
+ ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
+ return;
+ }
+ mutex_exit(&ns->netstack_lock);
+
+ mutex_enter(&netstack_g_lock);
+ /*
+ * Determine the set of stacks that exist before we drop the lock.
+ * Set SHUTDOWN_NEEDED for each of those.
+ */
+ for (i = 0; i < NS_MAX; i++) {
+ mutex_enter(&ns->netstack_lock);
+ if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
+ ns_reg[i].nr_shutdown != NULL &&
+ (ns->netstack_m_state[i] & NSS_CREATE_COMPLETED) &&
+ (ns->netstack_m_state[i] & NSS_SHUTDOWN_ALL) == 0) {
+ ns->netstack_m_state[i] |= NSS_SHUTDOWN_NEEDED;
+ DTRACE_PROBE2(netstack__shutdown__needed,
+ netstack_t *, ns, int, i);
+ }
+ mutex_exit(&ns->netstack_lock);
+ }
+ mutex_exit(&netstack_g_lock);
+
+ /* Call the shutdown function for all registered modules */
+ netstack_do_shutdown();
+}
+
+/*
+ * Common routine to release a zone.
+ * If this was the last zone using the stack instance then prepare to
+ * have the refcnt dropping to zero free the zone.
+ */
+/* ARGSUSED */
+static void
+netstack_zone_destroy(zoneid_t zoneid, void *arg)
+{
+ netstack_t *ns = (netstack_t *)arg;
+
+ ASSERT(arg != NULL);
+
+ mutex_enter(&ns->netstack_lock);
+ ASSERT(ns->netstack_numzones > 0);
+ ns->netstack_numzones--;
+ if (ns->netstack_numzones != 0) {
+ /* Stack instance being used by other zone */
+ mutex_exit(&ns->netstack_lock);
+ ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
+ /* Record that we a shared stack zone has gone away */
+ netstack_shared_zone_remove(zoneid);
+ return;
+ }
+ /*
+ * Set CLOSING so that netstack_find_by will not find it
+ * and decrement the reference count.
+ */
+ ns->netstack_flags |= NSF_CLOSING;
+ mutex_exit(&ns->netstack_lock);
+ DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns);
+ /* No other thread can call zone_destroy for this stack */
+
+ /*
+ * Decrease refcnt to account for the one in netstack_zone_init()
+ */
+ netstack_rele(ns);
+}
+
+/*
+ * Called when the reference count drops to zero.
+ * Call the destroy functions for each registered module.
+ */
+static void
+netstack_stack_inactive(netstack_t *ns)
+{
+ int i;
+
+ mutex_enter(&netstack_g_lock);
+ /*
+ * If the shutdown callback wasn't called earlier (e.g., if this is
+ * a netstack shared between multiple zones), then we call it now.
+ */
+ for (i = 0; i < NS_MAX; i++) {
+ mutex_enter(&ns->netstack_lock);
+ if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
+ ns_reg[i].nr_shutdown != NULL &&
+ (ns->netstack_m_state[i] & NSS_CREATE_COMPLETED) &&
+ (ns->netstack_m_state[i] & NSS_SHUTDOWN_ALL) == 0) {
+ ns->netstack_m_state[i] |= NSS_SHUTDOWN_NEEDED;
+ DTRACE_PROBE2(netstack__shutdown__needed,
+ netstack_t *, ns, int, i);
+ }
+ mutex_exit(&ns->netstack_lock);
+ }
+ /*
+ * Determine the set of stacks that exist before we drop the lock.
+ * Set DESTROY_NEEDED for each of those.
+ */
+ for (i = 0; i < NS_MAX; i++) {
+ mutex_enter(&ns->netstack_lock);
+ if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
+ ns_reg[i].nr_destroy != NULL &&
+ (ns->netstack_m_state[i] & NSS_CREATE_COMPLETED) &&
+ (ns->netstack_m_state[i] & NSS_DESTROY_ALL) == 0) {
+ ns->netstack_m_state[i] |= NSS_DESTROY_NEEDED;
+ DTRACE_PROBE2(netstack__destroy__needed,
+ netstack_t *, ns, int, i);
+ }
+ mutex_exit(&ns->netstack_lock);
+ }
+ mutex_exit(&netstack_g_lock);
+
+ netstack_do_shutdown();
+ netstack_do_destroy();
+}
+
+/*
+ * Call the create function for the ns and moduleid if CREATE_NEEDED
+ * is set.
+ * When it calls it, it drops the netstack_lock held by the caller,
+ * and returns true to tell the caller it needs to re-evalute the
+ * state..
+ */
+static boolean_t
+netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
+{
+ void *result;
+ netstackid_t stackid;
+
+ ASSERT(MUTEX_HELD(lockp));
+ mutex_enter(&ns->netstack_lock);
+ if (ns->netstack_m_state[moduleid] & NSS_CREATE_NEEDED) {
+ ns->netstack_m_state[moduleid] &= ~NSS_CREATE_NEEDED;
+ ns->netstack_m_state[moduleid] |= NSS_CREATE_INPROGRESS;
+ DTRACE_PROBE2(netstack__create__inprogress,
+ netstack_t *, ns, int, moduleid);
+ mutex_exit(&ns->netstack_lock);
+ mutex_exit(lockp);
+
+ ASSERT(ns_reg[moduleid].nr_create != NULL);
+ stackid = ns->netstack_stackid;
+ DTRACE_PROBE2(netstack__create__start,
+ netstackid_t, stackid,
+ netstack_t *, ns);
+ result = (ns_reg[moduleid].nr_create)(stackid, ns);
+ DTRACE_PROBE2(netstack__create__end,
+ void *, result, netstack_t *, ns);
+
+ ASSERT(result != NULL);
+ mutex_enter(&ns->netstack_lock);
+ ns->netstack_modules[moduleid] = result;
+ ns->netstack_m_state[moduleid] &= ~NSS_CREATE_INPROGRESS;
+ ns->netstack_m_state[moduleid] |= NSS_CREATE_COMPLETED;
+ DTRACE_PROBE2(netstack__create__completed,
+ netstack_t *, ns, int, moduleid);
+ mutex_exit(&ns->netstack_lock);
+ return (B_TRUE);
+ } else {
+ mutex_exit(&ns->netstack_lock);
+ return (B_FALSE);
+ }
+}
+
+/*
+ * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
+ * is set.
+ * When it calls it, it drops the netstack_lock held by the caller,
+ * and returns true to tell the caller it needs to re-evalute the
+ * state..
+ */
+static boolean_t
+netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
+{
+ netstackid_t stackid;
+ void * netstack_module;
+
+ ASSERT(MUTEX_HELD(lockp));
+ mutex_enter(&ns->netstack_lock);
+ if (ns->netstack_m_state[moduleid] & NSS_SHUTDOWN_NEEDED) {
+ ns->netstack_m_state[moduleid] &= ~NSS_SHUTDOWN_NEEDED;
+ ns->netstack_m_state[moduleid] |= NSS_SHUTDOWN_INPROGRESS;
+ DTRACE_PROBE2(netstack__shutdown__inprogress,
+ netstack_t *, ns, int, moduleid);
+ mutex_exit(&ns->netstack_lock);
+ mutex_exit(lockp);
+
+ ASSERT(ns_reg[moduleid].nr_shutdown != NULL);
+ stackid = ns->netstack_stackid;
+ netstack_module = ns->netstack_modules[moduleid];
+ DTRACE_PROBE2(netstack__shutdown__start,
+ netstackid_t, stackid,
+ void *, netstack_module);
+ (ns_reg[moduleid].nr_shutdown)(stackid, netstack_module);
+ DTRACE_PROBE1(netstack__shutdown__end,
+ netstack_t *, ns);
+
+ mutex_enter(&ns->netstack_lock);
+ ns->netstack_m_state[moduleid] &= ~NSS_SHUTDOWN_INPROGRESS;
+ ns->netstack_m_state[moduleid] |= NSS_SHUTDOWN_COMPLETED;
+ DTRACE_PROBE2(netstack__shutdown__completed,
+ netstack_t *, ns, int, moduleid);
+ mutex_exit(&ns->netstack_lock);
+ return (B_TRUE);
+ } else {
+ mutex_exit(&ns->netstack_lock);
+ return (B_FALSE);
+ }
+}
+
+/*
+ * Call the destroy function for the ns and moduleid if DESTROY_NEEDED
+ * is set.
+ * When it calls it, it drops the netstack_lock held by the caller,
+ * and returns true to tell the caller it needs to re-evalute the
+ * state..
+ */
+static boolean_t
+netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
+{
+ netstackid_t stackid;
+ void * netstack_module;
+
+ ASSERT(MUTEX_HELD(lockp));
+ mutex_enter(&ns->netstack_lock);
+ if (ns->netstack_m_state[moduleid] & NSS_DESTROY_NEEDED) {
+ ns->netstack_m_state[moduleid] &= ~NSS_DESTROY_NEEDED;
+ ns->netstack_m_state[moduleid] |= NSS_DESTROY_INPROGRESS;
+ DTRACE_PROBE2(netstack__destroy__inprogress,
+ netstack_t *, ns, int, moduleid);
+ mutex_exit(&ns->netstack_lock);
+ mutex_exit(lockp);
+
+ /* XXX race against unregister? */
+ ASSERT(ns_reg[moduleid].nr_destroy != NULL);
+ stackid = ns->netstack_stackid;
+ netstack_module = ns->netstack_modules[moduleid];
+ DTRACE_PROBE2(netstack__destroy__start,
+ netstackid_t, stackid,
+ void *, netstack_module);
+ (ns_reg[moduleid].nr_destroy)(stackid, netstack_module);
+ DTRACE_PROBE1(netstack__destroy__end,
+ netstack_t *, ns);
+
+ mutex_enter(&ns->netstack_lock);
+ ns->netstack_modules[moduleid] = NULL;
+ ns->netstack_m_state[moduleid] &= ~NSS_DESTROY_INPROGRESS;
+ ns->netstack_m_state[moduleid] |= NSS_DESTROY_COMPLETED;
+ DTRACE_PROBE2(netstack__destroy__completed,
+ netstack_t *, ns, int, moduleid);
+ mutex_exit(&ns->netstack_lock);
+ return (B_TRUE);
+ } else {
+ mutex_exit(&ns->netstack_lock);
+ return (B_FALSE);
+ }
+}
+
+static void
+apply_loop(netstack_t **headp, kmutex_t *lockp,
+ boolean_t (*applyfn)(kmutex_t *, netstack_t *, int moduleid))
+{
+ netstack_t *ns;
+ int i;
+ boolean_t lock_dropped, result;
+
+ lock_dropped = B_FALSE;
+ ns = *headp;
+ while (ns != NULL) {
+ for (i = 0; i < NS_MAX; i++) {
+ result = (applyfn)(lockp, ns, i);
+ if (result) {
+#ifdef NS_DEBUG
+ (void) printf("netstack_do_apply: "
+ "LD for %p/%d, %d\n",
+ (void *)ns, ns->netstack_stackid, i);
+#endif
+ lock_dropped = B_TRUE;
+ mutex_enter(lockp);
+ }
+ }
+ /*
+ * If at least one applyfn call caused lockp to be dropped,
+ * then we don't follow netstack_next after reacquiring the
+ * lock, even if it is possible to do so without any hazards.
+ * This is because we want the design to allow for the list of
+ * netstacks threaded by netstack_next to change in any
+ * arbitrary way during the time the 'lockp' was dropped.
+ *
+ * It is safe to restart the loop at *headp since
+ * the applyfn changes netstack_m_state as it processes
+ * things, so a subsequent pass through will have no
+ * effect in applyfn, hence the loop will terminate
+ * in at worst O(N^2).
+ */
+ if (lock_dropped) {
+#ifdef NS_DEBUG
+ (void) printf("netstack_do_apply: "
+ "Lock Dropped for %p/%d, %d\n",
+ (void *)ns, ns->netstack_stackid, i);
+#endif
+ lock_dropped = B_FALSE;
+ ns = *headp;
+ } else {
+ ns = ns->netstack_next;
+ }
+ }
+}
+
+/* Like above, but in the reverse order of moduleids */
+static void
+apply_loop_reverse(netstack_t **headp, kmutex_t *lockp,
+ boolean_t (*applyfn)(kmutex_t *, netstack_t *, int moduleid))
+{
+ netstack_t *ns;
+ int i;
+ boolean_t lock_dropped, result;
+
+ lock_dropped = B_FALSE;
+ ns = *headp;
+ while (ns != NULL) {
+ for (i = NS_MAX-1; i >= 0; i--) {
+ result = (applyfn)(lockp, ns, i);
+ if (result) {
+#ifdef NS_DEBUG
+ (void) printf("netstack_do_apply: "
+ "LD for %p/%d, %d\n",
+ (void *)ns, ns->netstack_stackid, i);
+#endif
+ lock_dropped = B_TRUE;
+ mutex_enter(lockp);
+ }
+ }
+ /*
+ * If at least one applyfn call caused lockp to be dropped,
+ * then we don't follow netstack_next after reacquiring the
+ * lock, even if it is possible to do so without any hazards.
+ * This is because we want the design to allow for the list of
+ * netstacks threaded by netstack_next to change in any
+ * arbitrary way during the time the 'lockp' was dropped.
+ *
+ * It is safe to restart the loop at *headp since
+ * the applyfn changes netstack_m_state as it processes
+ * things, so a subsequent pass through will have no
+ * effect in applyfn, hence the loop will terminate
+ * in at worst O(N^2).
+ */
+ if (lock_dropped) {
+#ifdef NS_DEBUG
+ (void) printf("netstack_do_apply: "
+ "Lock Dropped for %p/%d, %d\n",
+ (void *)ns, ns->netstack_stackid, i);
+#endif
+ lock_dropped = B_FALSE;
+ ns = *headp;
+ } else {
+ ns = ns->netstack_next;
+ }
+ }
+}
+
+/*
+ * Apply a function to all module/netstack combinations.
+ * The applyfn returns true if it had dropped the locks.
+ */
+static void
+netstack_do_apply(int reverse,
+ boolean_t (*applyfn)(kmutex_t *, netstack_t *, int moduleid))
+{
+ mutex_enter(&netstack_g_lock);
+ if (reverse)
+ apply_loop_reverse(&netstack_head, &netstack_g_lock, applyfn);
+ else
+ apply_loop(&netstack_head, &netstack_g_lock, applyfn);
+ mutex_exit(&netstack_g_lock);
+}
+
+/*
+ * Run the create function for all modules x stack combinations
+ * that have NSS_CREATE_NEEDED set.
+ *
+ * Call the create function for each stack that has CREATE_NEEDED.
+ * Set CREATE_INPROGRESS, drop lock, and after done,
+ * set CREATE_COMPLETE
+ */
+static void
+netstack_do_create(void)
+{
+ netstack_do_apply(B_FALSE, netstack_apply_create);
+}
+
+/*
+ * Run the shutdown function for all modules x stack combinations
+ * that have NSS_SHUTDOWN_NEEDED set.
+ *
+ * Call the shutdown function for each stack that has SHUTDOWN_NEEDED.
+ * Set SHUTDOWN_INPROGRESS, drop lock, and after done,
+ * set SHUTDOWN_COMPLETE
+ */
+static void
+netstack_do_shutdown(void)
+{
+ netstack_do_apply(B_FALSE, netstack_apply_shutdown);
+}
+
+/*
+ * Run the destroy function for all modules x stack combinations
+ * that have NSS_DESTROY_NEEDED set.
+ *
+ * Call the destroy function for each stack that has DESTROY_NEEDED.
+ * Set DESTROY_INPROGRESS, drop lock, and after done,
+ * set DESTROY_COMPLETE
+ *
+ * Since a netstack_t is never reused (when a zone is rebooted it gets
+ * a new zoneid == netstackid i.e. a new netstack_t is allocated) we leave
+ * netstack_m_state the way it is i.e. with NSS_DESTROY_COMPLETED set.
+ */
+static void
+netstack_do_destroy(void)
+{
+ /*
+ * Have to walk the moduleids in reverse order since some
+ * modules make implicit assumptions about the order
+ */
+ netstack_do_apply(B_TRUE, netstack_apply_destroy);
+}
+
+/*
+ * Get the stack instance used in caller's zone.
+ * Increases the reference count, caller must do a netstack_rele.
+ * It can't be called after zone_destroy() has started.
+ */
+static netstack_t *
+netstack_get_current(void)
+{
+ netstack_t *ns;
+
+ ns = curproc->p_zone->zone_netstack;
+ ASSERT(ns != NULL);
+ if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
+ return (NULL);
+
+ netstack_hold(ns);
+
+ return (ns);
+}
+
+/*
+ * Find a stack instance given the cred.
+ * This is used by the modules to potentially allow for a future when
+ * something other than the zoneid is used to determine the stack.
+ */
+netstack_t *
+netstack_find_by_cred(const cred_t *cr)
+{
+ zoneid_t zoneid = crgetzoneid(cr);
+
+ /* Handle the case when cr_zone is NULL */
+ if (zoneid == (zoneid_t)-1)
+ zoneid = GLOBAL_ZONEID;
+
+ /* For performance ... */
+ if (curproc->p_zone->zone_id == zoneid)
+ return (netstack_get_current());
+ else
+ return (netstack_find_by_zoneid(zoneid));
+}
+
+/*
+ * Find a stack instance given the zoneid.
+ * Increases the reference count if found; caller must do a
+ * netstack_rele().
+ *
+ * If there is no exact match then assume the shared stack instance
+ * matches.
+ *
+ * Skip the unitialized ones.
+ */
+netstack_t *
+netstack_find_by_zoneid(zoneid_t zoneid)
+{
+ netstack_t *ns;
+ zone_t *zone;
+
+ zone = zone_find_by_id(zoneid);
+
+ if (zone == NULL)
+ return (NULL);
+
+ ns = zone->zone_netstack;
+ ASSERT(ns != NULL);
+ if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
+ ns = NULL;
+ else
+ netstack_hold(ns);
+
+ zone_rele(zone);
+ return (ns);
+}
+
+/*
+ * Find a stack instance given the zoneid.
+ * Increases the reference count if found; caller must do a
+ * netstack_rele().
+ *
+ * If there is no exact match then assume the shared stack instance
+ * matches.
+ *
+ * Skip the unitialized ones.
+ *
+ * NOTE: The caller must hold zonehash_lock.
+ */
+netstack_t *
+netstack_find_by_zoneid_nolock(zoneid_t zoneid)
+{
+ netstack_t *ns;
+ zone_t *zone;
+
+ zone = zone_find_by_id_nolock(zoneid);
+
+ if (zone == NULL)
+ return (NULL);
+
+ ns = zone->zone_netstack;
+ ASSERT(ns != NULL);
+
+ if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
+ ns = NULL;
+ else
+ netstack_hold(ns);
+
+ zone_rele(zone);
+ return (ns);
+}
+
+/*
+ * Find a stack instance given the stackid with exact match?
+ * Increases the reference count if found; caller must do a
+ * netstack_rele().
+ *
+ * Skip the unitialized ones.
+ */
+netstack_t *
+netstack_find_by_stackid(netstackid_t stackid)
+{
+ netstack_t *ns;
+
+ mutex_enter(&netstack_g_lock);
+ for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
+ mutex_enter(&ns->netstack_lock);
+ if (ns->netstack_stackid == stackid &&
+ !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
+ mutex_exit(&ns->netstack_lock);
+ netstack_hold(ns);
+ mutex_exit(&netstack_g_lock);
+ return (ns);
+ }
+ mutex_exit(&ns->netstack_lock);
+ }
+ mutex_exit(&netstack_g_lock);
+ return (NULL);
+}
+
+void
+netstack_rele(netstack_t *ns)
+{
+ netstack_t **nsp;
+ boolean_t found;
+ int refcnt, numzones;
+
+ mutex_enter(&ns->netstack_lock);
+ ASSERT(ns->netstack_refcnt > 0);
+ ns->netstack_refcnt--;
+ /*
+ * As we drop the lock additional netstack_rele()s can come in
+ * and decrement the refcnt to zero and free the netstack_t.
+ * Store pointers in local variables and if we were not the last
+ * then don't reference the netstack_t after that.
+ */
+ refcnt = ns->netstack_refcnt;
+ numzones = ns->netstack_numzones;
+ DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
+ mutex_exit(&ns->netstack_lock);
+
+ if (refcnt == 0 && numzones == 0) {
+ /*
+ * Time to call the destroy functions and free up
+ * the structure
+ */
+ netstack_stack_inactive(ns);
+
+ /* Finally remove from list of netstacks */
+ mutex_enter(&netstack_g_lock);
+ found = B_FALSE;
+ for (nsp = &netstack_head; *nsp != NULL;
+ nsp = &(*nsp)->netstack_next) {
+ if (*nsp == ns) {
+ *nsp = ns->netstack_next;
+ ns->netstack_next = NULL;
+ found = B_TRUE;
+ break;
+ }
+ }
+ ASSERT(found);
+ mutex_exit(&netstack_g_lock);
+
+ ASSERT(ns->netstack_flags & NSF_CLOSING);
+ kmem_free(ns, sizeof (*ns));
+ }
+}
+
+void
+netstack_hold(netstack_t *ns)
+{
+ mutex_enter(&ns->netstack_lock);
+ ns->netstack_refcnt++;
+ ASSERT(ns->netstack_refcnt > 0);
+ mutex_exit(&ns->netstack_lock);
+ DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
+}
+
+/*
+ * To support kstat_create_netstack() using kstat_zone_add we need
+ * to track both
+ * - all zoneids that use the global/shared stack
+ * - all kstats that have been added for the shared stack
+ */
+kstat_t *
+kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
+ char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
+ netstackid_t ks_netstackid)
+{
+ kstat_t *ks;
+
+ if (ks_netstackid == GLOBAL_NETSTACKID) {
+ ks = kstat_create_zone(ks_module, ks_instance, ks_name,
+ ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
+ if (ks != NULL)
+ netstack_shared_kstat_add(ks);
+ return (ks);
+ } else {
+ zoneid_t zoneid = ks_netstackid;
+
+ return (kstat_create_zone(ks_module, ks_instance, ks_name,
+ ks_class, ks_type, ks_ndata, ks_flags, zoneid));
+ }
+}
+
+void
+kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid)
+{
+ if (ks_netstackid == GLOBAL_NETSTACKID) {
+ netstack_shared_kstat_remove(ks);
+ }
+ kstat_delete(ks);
+}
+
+static void
+netstack_shared_zone_add(zoneid_t zoneid)
+{
+ struct shared_zone_list *sz;
+ struct shared_kstat_list *sk;
+
+ sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP);
+ sz->sz_zoneid = zoneid;
+
+ /* Insert in list */
+ mutex_enter(&netstack_shared_lock);
+ sz->sz_next = netstack_shared_zones;
+ netstack_shared_zones = sz;
+
+ /*
+ * Perform kstat_zone_add for each existing shared stack kstat.
+ * Note: Holds netstack_shared_lock lock across kstat_zone_add.
+ */
+ for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
+ kstat_zone_add(sk->sk_kstat, zoneid);
+ }
+ mutex_exit(&netstack_shared_lock);
+}
+
+static void
+netstack_shared_zone_remove(zoneid_t zoneid)
+{
+ struct shared_zone_list **szp, *sz;
+ struct shared_kstat_list *sk;
+
+ /* Find in list */
+ mutex_enter(&netstack_shared_lock);
+ sz = NULL;
+ for (szp = &netstack_shared_zones; *szp != NULL;
+ szp = &((*szp)->sz_next)) {
+ if ((*szp)->sz_zoneid == zoneid) {
+ sz = *szp;
+ break;
+ }
+ }
+ /* We must find it */
+ ASSERT(sz != NULL);
+ *szp = sz->sz_next;
+ sz->sz_next = NULL;
+
+ /*
+ * Perform kstat_zone_remove for each existing shared stack kstat.
+ * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
+ */
+ for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
+ kstat_zone_remove(sk->sk_kstat, zoneid);
+ }
+ mutex_exit(&netstack_shared_lock);
+
+ kmem_free(sz, sizeof (*sz));
+}
+
+static void
+netstack_shared_kstat_add(kstat_t *ks)
+{
+ struct shared_zone_list *sz;
+ struct shared_kstat_list *sk;
+
+ sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP);
+ sk->sk_kstat = ks;
+
+ /* Insert in list */
+ mutex_enter(&netstack_shared_lock);
+ sk->sk_next = netstack_shared_kstats;
+ netstack_shared_kstats = sk;
+
+ /*
+ * Perform kstat_zone_add for each existing shared stack zone.
+ * Note: Holds netstack_shared_lock lock across kstat_zone_add.
+ */
+ for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
+ kstat_zone_add(ks, sz->sz_zoneid);
+ }
+ mutex_exit(&netstack_shared_lock);
+}
+
+static void
+netstack_shared_kstat_remove(kstat_t *ks)
+{
+ struct shared_zone_list *sz;
+ struct shared_kstat_list **skp, *sk;
+
+ /* Find in list */
+ mutex_enter(&netstack_shared_lock);
+ sk = NULL;
+ for (skp = &netstack_shared_kstats; *skp != NULL;
+ skp = &((*skp)->sk_next)) {
+ if ((*skp)->sk_kstat == ks) {
+ sk = *skp;
+ break;
+ }
+ }
+ /* Must find it */
+ ASSERT(sk != NULL);
+ *skp = sk->sk_next;
+ sk->sk_next = NULL;
+
+ /*
+ * Perform kstat_zone_remove for each existing shared stack kstat.
+ * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
+ */
+ for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
+ kstat_zone_remove(ks, sz->sz_zoneid);
+ }
+ mutex_exit(&netstack_shared_lock);
+ kmem_free(sk, sizeof (*sk));
+}
+
+/*
+ * If a zoneid is part of the shared zone, return true
+ */
+static boolean_t
+netstack_find_shared_zoneid(zoneid_t zoneid)
+{
+ struct shared_zone_list *sz;
+
+ mutex_enter(&netstack_shared_lock);
+ for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
+ if (sz->sz_zoneid == zoneid) {
+ mutex_exit(&netstack_shared_lock);
+ return (B_TRUE);
+ }
+ }
+ mutex_exit(&netstack_shared_lock);
+ return (B_FALSE);
+}
+
+/*
+ * Hide the fact that zoneids and netstackids are allocated from
+ * the same space in the current implementation.
+ * XXX could add checks that the stackid/zoneids are valid...
+ */
+zoneid_t
+netstackid_to_zoneid(netstackid_t stackid)
+{
+ return (stackid);
+}
+
+netstackid_t
+zoneid_to_netstackid(zoneid_t zoneid)
+{
+ if (netstack_find_shared_zoneid(zoneid))
+ return (GLOBAL_ZONEID);
+ else
+ return (zoneid);
+}
+
+/*
+ * Simplistic support for walking all the handles.
+ * Example usage:
+ * netstack_handle_t nh;
+ * netstack_t *ns;
+ *
+ * netstack_next_init(&nh);
+ * while ((ns = netstack_next(&nh)) != NULL) {
+ * do something;
+ * netstack_rele(ns);
+ * }
+ * netstack_next_fini(&nh);
+ */
+void
+netstack_next_init(netstack_handle_t *handle)
+{
+ *handle = 0;
+}
+
+/* ARGSUSED */
+void
+netstack_next_fini(netstack_handle_t *handle)
+{
+}
+
+netstack_t *
+netstack_next(netstack_handle_t *handle)
+{
+ netstack_t *ns;
+ int i, end;
+
+ end = *handle;
+ /* Walk skipping *handle number of instances */
+
+ /* Look if there is a matching stack instance */
+ mutex_enter(&netstack_g_lock);
+ ns = netstack_head;
+ for (i = 0; i < end; i++) {
+ if (ns == NULL)
+ break;
+ ns = ns->netstack_next;
+ }
+ /* skip those with that aren't really here */
+ while (ns != NULL) {
+ mutex_enter(&ns->netstack_lock);
+ if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) {
+ mutex_exit(&ns->netstack_lock);
+ break;
+ }
+ mutex_exit(&ns->netstack_lock);
+ end++;
+ ns = ns->netstack_next;
+ }
+ if (ns != NULL) {
+ *handle = end + 1;
+ netstack_hold(ns);
+ }
+ mutex_exit(&netstack_g_lock);
+ return (ns);
+}