diff options
author | dh155122 <none@none> | 2007-01-19 16:59:38 -0800 |
---|---|---|
committer | dh155122 <none@none> | 2007-01-19 16:59:38 -0800 |
commit | f4b3ec61df05330d25f55a36b975b4d7519fdeb1 (patch) | |
tree | 395c234b901886c84a82603a767e031fca136e09 /usr/src/uts/common/os/netstack.c | |
parent | 2e59fc6dac28cd69376c21d6b90a5624160ba94c (diff) | |
download | illumos-joyent-f4b3ec61df05330d25f55a36b975b4d7519fdeb1.tar.gz |
PSARC 2006/366 IP Instances
6289221 RFE: Need virtualized ip-stack for each local zone
6512601 panic in ipsec_in_tag - allocation failure
6514637 error message from dhcpagent: add_pkt_opt: option type 60 is missing required value
6364643 RFE: allow persistent setting of interface flags per zone
6307539 RFE: Invalid network address causes zone boot failure
5041214 Allow IPMP configuration with zones
5005887 RFE: zoneadmd should support plumbing an interface via DHCP
4991139 RFE: zones should provide a mechanism to configure a defaultrouter for a zone
6218378 zoneadmd doesn't set the netmask for non-loopback addresses hosted on lo0
4963280 zones: need to virtualize the IPv6 default address selection mechanism
4963285 zones: need support of stateless address autoconfiguration for IPv6
5048068 zones don't boot if one of its interfaces has failed
5057154 RFE: ability to change interface status from within a zone
4963287 zones should support the plumbing of the first (and only) logical interface
4978517 TCP privileged port space should be partitioned per zone
5023347 zones don't work well with network routes other than default
4963372 investigate whether global zone can act as a router for local zones
6378364 RFE: Allow each zone to have its own virtual IPFilter
Diffstat (limited to 'usr/src/uts/common/os/netstack.c')
-rw-r--r-- | usr/src/uts/common/os/netstack.c | 1217 |
1 files changed, 1217 insertions, 0 deletions
diff --git a/usr/src/uts/common/os/netstack.c b/usr/src/uts/common/os/netstack.c new file mode 100644 index 0000000000..60ee49f8ed --- /dev/null +++ b/usr/src/uts/common/os/netstack.c @@ -0,0 +1,1217 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/param.h> +#include <sys/sysmacros.h> +#include <sys/vm.h> +#include <sys/proc.h> +#include <sys/tuneable.h> +#include <sys/systm.h> +#include <sys/cmn_err.h> +#include <sys/debug.h> +#include <sys/sdt.h> +#include <sys/mutex.h> +#include <sys/bitmap.h> +#include <sys/atomic.h> +#include <sys/kobj.h> +#include <sys/disp.h> +#include <vm/seg_kmem.h> +#include <sys/zone.h> +#include <sys/netstack.h> + +/* + * What we use so that the zones framework can tell us about new zones, + * which we use to create new stacks. + */ +static zone_key_t netstack_zone_key; + +static int netstack_initialized = 0; + +/* + * Track the registered netstacks. + * The global lock protects + * - ns_reg + * - the list starting at netstack_head and following the netstack_next + * pointers. + */ +static kmutex_t netstack_g_lock; + +/* + * Registry of netstacks with their create/shutdown/destory functions. + */ +static struct netstack_registry ns_reg[NS_MAX]; + +/* + * Global list of existing stacks. We use this when a new zone with + * an exclusive IP instance is created. + * + * Note that in some cases a netstack_t needs to stay around after the zone + * has gone away. This is because there might be outstanding references + * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data + * structure and all the foo_stack_t's hanging off of it will be cleaned up + * when the last reference to it is dropped. + * However, the same zone might be rebooted. That is handled using the + * assumption that the zones framework picks a new zoneid each time a zone + * is (re)booted. We assert for that condition in netstack_zone_create(). + * Thus the old netstack_t can take its time for things to time out. + */ +static netstack_t *netstack_head; + +/* + * To support kstat_create_netstack() using kstat_zone_add we need + * to track both + * - all zoneids that use the global/shared stack + * - all kstats that have been added for the shared stack + */ +struct shared_zone_list { + struct shared_zone_list *sz_next; + zoneid_t sz_zoneid; +}; + +struct shared_kstat_list { + struct shared_kstat_list *sk_next; + kstat_t *sk_kstat; +}; + +static kmutex_t netstack_shared_lock; /* protects the following two */ +static struct shared_zone_list *netstack_shared_zones; +static struct shared_kstat_list *netstack_shared_kstats; + +static void *netstack_zone_create(zoneid_t zoneid); +static void netstack_zone_shutdown(zoneid_t zoneid, void *arg); +static void netstack_zone_destroy(zoneid_t zoneid, void *arg); + +static void netstack_do_create(void); +static void netstack_do_shutdown(void); +static void netstack_do_destroy(void); + +static void netstack_shared_zone_add(zoneid_t zoneid); +static void netstack_shared_zone_remove(zoneid_t zoneid); +static void netstack_shared_kstat_add(kstat_t *ks); +static void netstack_shared_kstat_remove(kstat_t *ks); + + +void +netstack_init(void) +{ + mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL); + + netstack_initialized = 1; + + /* + * We want to be informed each time a zone is created or + * destroyed in the kernel, so we can maintain the + * stack instance information. + */ + zone_key_create(&netstack_zone_key, netstack_zone_create, + netstack_zone_shutdown, netstack_zone_destroy); +} + +/* + * Register a new module with the framework. + * This registers interest in changes to the set of netstacks. + * The createfn and destroyfn are required, but the shutdownfn can be + * NULL. + * Note that due to the current zsd implementation, when the create + * function is called the zone isn't fully present, thus functions + * like zone_find_by_* will fail, hence the create function can not + * use many zones kernel functions including zcmn_err(). + */ +void +netstack_register(int moduleid, + void *(*module_create)(netstackid_t, netstack_t *), + void (*module_shutdown)(netstackid_t, void *), + void (*module_destroy)(netstackid_t, void *)) +{ + netstack_t *ns; + + ASSERT(netstack_initialized); + ASSERT(moduleid >= 0 && moduleid < NS_MAX); + ASSERT(module_create != NULL); + + mutex_enter(&netstack_g_lock); + ASSERT(ns_reg[moduleid].nr_create == NULL); + ASSERT(ns_reg[moduleid].nr_flags == 0); + ns_reg[moduleid].nr_create = module_create; + ns_reg[moduleid].nr_shutdown = module_shutdown; + ns_reg[moduleid].nr_destroy = module_destroy; + ns_reg[moduleid].nr_flags = NRF_REGISTERED; + + /* + * Determine the set of stacks that exist before we drop the lock. + * Set CREATE_NEEDED for each of those. + * netstacks which have been deleted will have NSS_CREATE_COMPLETED + * set, but check NSF_CLOSING to be sure. + */ + for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) { + mutex_enter(&ns->netstack_lock); + if (!(ns->netstack_flags & NSF_CLOSING) && + (ns->netstack_m_state[moduleid] & NSS_CREATE_ALL) == 0) { + ns->netstack_m_state[moduleid] |= NSS_CREATE_NEEDED; + DTRACE_PROBE2(netstack__create__needed, + netstack_t *, ns, int, moduleid); + } + mutex_exit(&ns->netstack_lock); + } + mutex_exit(&netstack_g_lock); + + /* + * Call the create function for each stack that has CREATE_NEEDED. + * Set CREATE_INPROGRESS, drop lock, and after done, + * set CREATE_COMPLETE + */ + netstack_do_create(); +} + +void +netstack_unregister(int moduleid) +{ + netstack_t *ns; + + ASSERT(moduleid >= 0 && moduleid < NS_MAX); + + ASSERT(ns_reg[moduleid].nr_create != NULL); + ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED); + + mutex_enter(&netstack_g_lock); + /* + * Determine the set of stacks that exist before we drop the lock. + * Set SHUTDOWN_NEEDED and DESTROY_NEEDED for each of those. + */ + for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) { + mutex_enter(&ns->netstack_lock); + if (ns_reg[moduleid].nr_shutdown != NULL && + (ns->netstack_m_state[moduleid] & NSS_CREATE_COMPLETED) && + (ns->netstack_m_state[moduleid] & NSS_SHUTDOWN_ALL) == 0) { + ns->netstack_m_state[moduleid] |= NSS_SHUTDOWN_NEEDED; + DTRACE_PROBE2(netstack__shutdown__needed, + netstack_t *, ns, int, moduleid); + } + if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) && + ns_reg[moduleid].nr_destroy != NULL && + (ns->netstack_m_state[moduleid] & NSS_CREATE_COMPLETED) && + (ns->netstack_m_state[moduleid] & NSS_DESTROY_ALL) == 0) { + ns->netstack_m_state[moduleid] |= NSS_DESTROY_NEEDED; + DTRACE_PROBE2(netstack__destroy__needed, + netstack_t *, ns, int, moduleid); + } + mutex_exit(&ns->netstack_lock); + } + mutex_exit(&netstack_g_lock); + + netstack_do_shutdown(); + netstack_do_destroy(); + + /* + * Clear the netstack_m_state so that we can handle this module + * being loaded again. + */ + mutex_enter(&netstack_g_lock); + for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) { + mutex_enter(&ns->netstack_lock); + if (ns->netstack_m_state[moduleid] & NSS_DESTROY_COMPLETED) { + ns->netstack_m_state[moduleid] = 0; + DTRACE_PROBE2(netstack__destroy__done, + netstack_t *, ns, int, moduleid); + } + mutex_exit(&ns->netstack_lock); + } + + ns_reg[moduleid].nr_create = NULL; + ns_reg[moduleid].nr_shutdown = NULL; + ns_reg[moduleid].nr_destroy = NULL; + ns_reg[moduleid].nr_flags = 0; + mutex_exit(&netstack_g_lock); +} + +/* + * Lookup and/or allocate a netstack for this zone. + */ +static void * +netstack_zone_create(zoneid_t zoneid) +{ + netstackid_t stackid; + netstack_t *ns; + netstack_t **nsp; + zone_t *zone; + int i; + + ASSERT(netstack_initialized); + + zone = zone_find_by_id_nolock(zoneid); + ASSERT(zone != NULL); + + if (zone->zone_flags & ZF_NET_EXCL) { + stackid = zoneid; + } else { + /* Look for the stack instance for the global */ + stackid = GLOBAL_NETSTACKID; + } + + /* Allocate even if it isn't needed; simplifies locking */ + ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP); + + /* Look if there is a matching stack instance */ + mutex_enter(&netstack_g_lock); + for (nsp = &netstack_head; *nsp != NULL; + nsp = &((*nsp)->netstack_next)) { + if ((*nsp)->netstack_stackid == stackid) { + /* + * Should never find a pre-existing exclusive stack + */ + ASSERT(stackid == GLOBAL_NETSTACKID); + kmem_free(ns, sizeof (netstack_t)); + ns = *nsp; + mutex_enter(&ns->netstack_lock); + ns->netstack_numzones++; + mutex_exit(&ns->netstack_lock); + mutex_exit(&netstack_g_lock); + DTRACE_PROBE1(netstack__inc__numzones, + netstack_t *, ns); + /* Record that we have a new shared stack zone */ + netstack_shared_zone_add(zoneid); + zone->zone_netstack = ns; + return (ns); + } + } + /* Not found */ + mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL); + ns->netstack_stackid = zoneid; + ns->netstack_numzones = 1; + ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */ + ns->netstack_flags = NSF_UNINIT; + *nsp = ns; + zone->zone_netstack = ns; + + /* + * Determine the set of module create functions that need to be + * called before we drop the lock. + */ + for (i = 0; i < NS_MAX; i++) { + mutex_enter(&ns->netstack_lock); + if ((ns_reg[i].nr_flags & NRF_REGISTERED) && + (ns->netstack_m_state[i] & NSS_CREATE_ALL) == 0) { + ns->netstack_m_state[i] |= NSS_CREATE_NEEDED; + DTRACE_PROBE2(netstack__create__needed, + netstack_t *, ns, int, i); + } + mutex_exit(&ns->netstack_lock); + } + mutex_exit(&netstack_g_lock); + + netstack_do_create(); + + mutex_enter(&ns->netstack_lock); + ns->netstack_flags &= ~NSF_UNINIT; + mutex_exit(&ns->netstack_lock); + + return (ns); +} + +/* ARGSUSED */ +static void +netstack_zone_shutdown(zoneid_t zoneid, void *arg) +{ + netstack_t *ns = (netstack_t *)arg; + int i; + + ASSERT(arg != NULL); + + mutex_enter(&ns->netstack_lock); + ASSERT(ns->netstack_numzones > 0); + if (ns->netstack_numzones != 1) { + /* Stack instance being used by other zone */ + mutex_exit(&ns->netstack_lock); + ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID); + return; + } + mutex_exit(&ns->netstack_lock); + + mutex_enter(&netstack_g_lock); + /* + * Determine the set of stacks that exist before we drop the lock. + * Set SHUTDOWN_NEEDED for each of those. + */ + for (i = 0; i < NS_MAX; i++) { + mutex_enter(&ns->netstack_lock); + if ((ns_reg[i].nr_flags & NRF_REGISTERED) && + ns_reg[i].nr_shutdown != NULL && + (ns->netstack_m_state[i] & NSS_CREATE_COMPLETED) && + (ns->netstack_m_state[i] & NSS_SHUTDOWN_ALL) == 0) { + ns->netstack_m_state[i] |= NSS_SHUTDOWN_NEEDED; + DTRACE_PROBE2(netstack__shutdown__needed, + netstack_t *, ns, int, i); + } + mutex_exit(&ns->netstack_lock); + } + mutex_exit(&netstack_g_lock); + + /* Call the shutdown function for all registered modules */ + netstack_do_shutdown(); +} + +/* + * Common routine to release a zone. + * If this was the last zone using the stack instance then prepare to + * have the refcnt dropping to zero free the zone. + */ +/* ARGSUSED */ +static void +netstack_zone_destroy(zoneid_t zoneid, void *arg) +{ + netstack_t *ns = (netstack_t *)arg; + + ASSERT(arg != NULL); + + mutex_enter(&ns->netstack_lock); + ASSERT(ns->netstack_numzones > 0); + ns->netstack_numzones--; + if (ns->netstack_numzones != 0) { + /* Stack instance being used by other zone */ + mutex_exit(&ns->netstack_lock); + ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID); + /* Record that we a shared stack zone has gone away */ + netstack_shared_zone_remove(zoneid); + return; + } + /* + * Set CLOSING so that netstack_find_by will not find it + * and decrement the reference count. + */ + ns->netstack_flags |= NSF_CLOSING; + mutex_exit(&ns->netstack_lock); + DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns); + /* No other thread can call zone_destroy for this stack */ + + /* + * Decrease refcnt to account for the one in netstack_zone_init() + */ + netstack_rele(ns); +} + +/* + * Called when the reference count drops to zero. + * Call the destroy functions for each registered module. + */ +static void +netstack_stack_inactive(netstack_t *ns) +{ + int i; + + mutex_enter(&netstack_g_lock); + /* + * If the shutdown callback wasn't called earlier (e.g., if this is + * a netstack shared between multiple zones), then we call it now. + */ + for (i = 0; i < NS_MAX; i++) { + mutex_enter(&ns->netstack_lock); + if ((ns_reg[i].nr_flags & NRF_REGISTERED) && + ns_reg[i].nr_shutdown != NULL && + (ns->netstack_m_state[i] & NSS_CREATE_COMPLETED) && + (ns->netstack_m_state[i] & NSS_SHUTDOWN_ALL) == 0) { + ns->netstack_m_state[i] |= NSS_SHUTDOWN_NEEDED; + DTRACE_PROBE2(netstack__shutdown__needed, + netstack_t *, ns, int, i); + } + mutex_exit(&ns->netstack_lock); + } + /* + * Determine the set of stacks that exist before we drop the lock. + * Set DESTROY_NEEDED for each of those. + */ + for (i = 0; i < NS_MAX; i++) { + mutex_enter(&ns->netstack_lock); + if ((ns_reg[i].nr_flags & NRF_REGISTERED) && + ns_reg[i].nr_destroy != NULL && + (ns->netstack_m_state[i] & NSS_CREATE_COMPLETED) && + (ns->netstack_m_state[i] & NSS_DESTROY_ALL) == 0) { + ns->netstack_m_state[i] |= NSS_DESTROY_NEEDED; + DTRACE_PROBE2(netstack__destroy__needed, + netstack_t *, ns, int, i); + } + mutex_exit(&ns->netstack_lock); + } + mutex_exit(&netstack_g_lock); + + netstack_do_shutdown(); + netstack_do_destroy(); +} + +/* + * Call the create function for the ns and moduleid if CREATE_NEEDED + * is set. + * When it calls it, it drops the netstack_lock held by the caller, + * and returns true to tell the caller it needs to re-evalute the + * state.. + */ +static boolean_t +netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid) +{ + void *result; + netstackid_t stackid; + + ASSERT(MUTEX_HELD(lockp)); + mutex_enter(&ns->netstack_lock); + if (ns->netstack_m_state[moduleid] & NSS_CREATE_NEEDED) { + ns->netstack_m_state[moduleid] &= ~NSS_CREATE_NEEDED; + ns->netstack_m_state[moduleid] |= NSS_CREATE_INPROGRESS; + DTRACE_PROBE2(netstack__create__inprogress, + netstack_t *, ns, int, moduleid); + mutex_exit(&ns->netstack_lock); + mutex_exit(lockp); + + ASSERT(ns_reg[moduleid].nr_create != NULL); + stackid = ns->netstack_stackid; + DTRACE_PROBE2(netstack__create__start, + netstackid_t, stackid, + netstack_t *, ns); + result = (ns_reg[moduleid].nr_create)(stackid, ns); + DTRACE_PROBE2(netstack__create__end, + void *, result, netstack_t *, ns); + + ASSERT(result != NULL); + mutex_enter(&ns->netstack_lock); + ns->netstack_modules[moduleid] = result; + ns->netstack_m_state[moduleid] &= ~NSS_CREATE_INPROGRESS; + ns->netstack_m_state[moduleid] |= NSS_CREATE_COMPLETED; + DTRACE_PROBE2(netstack__create__completed, + netstack_t *, ns, int, moduleid); + mutex_exit(&ns->netstack_lock); + return (B_TRUE); + } else { + mutex_exit(&ns->netstack_lock); + return (B_FALSE); + } +} + +/* + * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED + * is set. + * When it calls it, it drops the netstack_lock held by the caller, + * and returns true to tell the caller it needs to re-evalute the + * state.. + */ +static boolean_t +netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid) +{ + netstackid_t stackid; + void * netstack_module; + + ASSERT(MUTEX_HELD(lockp)); + mutex_enter(&ns->netstack_lock); + if (ns->netstack_m_state[moduleid] & NSS_SHUTDOWN_NEEDED) { + ns->netstack_m_state[moduleid] &= ~NSS_SHUTDOWN_NEEDED; + ns->netstack_m_state[moduleid] |= NSS_SHUTDOWN_INPROGRESS; + DTRACE_PROBE2(netstack__shutdown__inprogress, + netstack_t *, ns, int, moduleid); + mutex_exit(&ns->netstack_lock); + mutex_exit(lockp); + + ASSERT(ns_reg[moduleid].nr_shutdown != NULL); + stackid = ns->netstack_stackid; + netstack_module = ns->netstack_modules[moduleid]; + DTRACE_PROBE2(netstack__shutdown__start, + netstackid_t, stackid, + void *, netstack_module); + (ns_reg[moduleid].nr_shutdown)(stackid, netstack_module); + DTRACE_PROBE1(netstack__shutdown__end, + netstack_t *, ns); + + mutex_enter(&ns->netstack_lock); + ns->netstack_m_state[moduleid] &= ~NSS_SHUTDOWN_INPROGRESS; + ns->netstack_m_state[moduleid] |= NSS_SHUTDOWN_COMPLETED; + DTRACE_PROBE2(netstack__shutdown__completed, + netstack_t *, ns, int, moduleid); + mutex_exit(&ns->netstack_lock); + return (B_TRUE); + } else { + mutex_exit(&ns->netstack_lock); + return (B_FALSE); + } +} + +/* + * Call the destroy function for the ns and moduleid if DESTROY_NEEDED + * is set. + * When it calls it, it drops the netstack_lock held by the caller, + * and returns true to tell the caller it needs to re-evalute the + * state.. + */ +static boolean_t +netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid) +{ + netstackid_t stackid; + void * netstack_module; + + ASSERT(MUTEX_HELD(lockp)); + mutex_enter(&ns->netstack_lock); + if (ns->netstack_m_state[moduleid] & NSS_DESTROY_NEEDED) { + ns->netstack_m_state[moduleid] &= ~NSS_DESTROY_NEEDED; + ns->netstack_m_state[moduleid] |= NSS_DESTROY_INPROGRESS; + DTRACE_PROBE2(netstack__destroy__inprogress, + netstack_t *, ns, int, moduleid); + mutex_exit(&ns->netstack_lock); + mutex_exit(lockp); + + /* XXX race against unregister? */ + ASSERT(ns_reg[moduleid].nr_destroy != NULL); + stackid = ns->netstack_stackid; + netstack_module = ns->netstack_modules[moduleid]; + DTRACE_PROBE2(netstack__destroy__start, + netstackid_t, stackid, + void *, netstack_module); + (ns_reg[moduleid].nr_destroy)(stackid, netstack_module); + DTRACE_PROBE1(netstack__destroy__end, + netstack_t *, ns); + + mutex_enter(&ns->netstack_lock); + ns->netstack_modules[moduleid] = NULL; + ns->netstack_m_state[moduleid] &= ~NSS_DESTROY_INPROGRESS; + ns->netstack_m_state[moduleid] |= NSS_DESTROY_COMPLETED; + DTRACE_PROBE2(netstack__destroy__completed, + netstack_t *, ns, int, moduleid); + mutex_exit(&ns->netstack_lock); + return (B_TRUE); + } else { + mutex_exit(&ns->netstack_lock); + return (B_FALSE); + } +} + +static void +apply_loop(netstack_t **headp, kmutex_t *lockp, + boolean_t (*applyfn)(kmutex_t *, netstack_t *, int moduleid)) +{ + netstack_t *ns; + int i; + boolean_t lock_dropped, result; + + lock_dropped = B_FALSE; + ns = *headp; + while (ns != NULL) { + for (i = 0; i < NS_MAX; i++) { + result = (applyfn)(lockp, ns, i); + if (result) { +#ifdef NS_DEBUG + (void) printf("netstack_do_apply: " + "LD for %p/%d, %d\n", + (void *)ns, ns->netstack_stackid, i); +#endif + lock_dropped = B_TRUE; + mutex_enter(lockp); + } + } + /* + * If at least one applyfn call caused lockp to be dropped, + * then we don't follow netstack_next after reacquiring the + * lock, even if it is possible to do so without any hazards. + * This is because we want the design to allow for the list of + * netstacks threaded by netstack_next to change in any + * arbitrary way during the time the 'lockp' was dropped. + * + * It is safe to restart the loop at *headp since + * the applyfn changes netstack_m_state as it processes + * things, so a subsequent pass through will have no + * effect in applyfn, hence the loop will terminate + * in at worst O(N^2). + */ + if (lock_dropped) { +#ifdef NS_DEBUG + (void) printf("netstack_do_apply: " + "Lock Dropped for %p/%d, %d\n", + (void *)ns, ns->netstack_stackid, i); +#endif + lock_dropped = B_FALSE; + ns = *headp; + } else { + ns = ns->netstack_next; + } + } +} + +/* Like above, but in the reverse order of moduleids */ +static void +apply_loop_reverse(netstack_t **headp, kmutex_t *lockp, + boolean_t (*applyfn)(kmutex_t *, netstack_t *, int moduleid)) +{ + netstack_t *ns; + int i; + boolean_t lock_dropped, result; + + lock_dropped = B_FALSE; + ns = *headp; + while (ns != NULL) { + for (i = NS_MAX-1; i >= 0; i--) { + result = (applyfn)(lockp, ns, i); + if (result) { +#ifdef NS_DEBUG + (void) printf("netstack_do_apply: " + "LD for %p/%d, %d\n", + (void *)ns, ns->netstack_stackid, i); +#endif + lock_dropped = B_TRUE; + mutex_enter(lockp); + } + } + /* + * If at least one applyfn call caused lockp to be dropped, + * then we don't follow netstack_next after reacquiring the + * lock, even if it is possible to do so without any hazards. + * This is because we want the design to allow for the list of + * netstacks threaded by netstack_next to change in any + * arbitrary way during the time the 'lockp' was dropped. + * + * It is safe to restart the loop at *headp since + * the applyfn changes netstack_m_state as it processes + * things, so a subsequent pass through will have no + * effect in applyfn, hence the loop will terminate + * in at worst O(N^2). + */ + if (lock_dropped) { +#ifdef NS_DEBUG + (void) printf("netstack_do_apply: " + "Lock Dropped for %p/%d, %d\n", + (void *)ns, ns->netstack_stackid, i); +#endif + lock_dropped = B_FALSE; + ns = *headp; + } else { + ns = ns->netstack_next; + } + } +} + +/* + * Apply a function to all module/netstack combinations. + * The applyfn returns true if it had dropped the locks. + */ +static void +netstack_do_apply(int reverse, + boolean_t (*applyfn)(kmutex_t *, netstack_t *, int moduleid)) +{ + mutex_enter(&netstack_g_lock); + if (reverse) + apply_loop_reverse(&netstack_head, &netstack_g_lock, applyfn); + else + apply_loop(&netstack_head, &netstack_g_lock, applyfn); + mutex_exit(&netstack_g_lock); +} + +/* + * Run the create function for all modules x stack combinations + * that have NSS_CREATE_NEEDED set. + * + * Call the create function for each stack that has CREATE_NEEDED. + * Set CREATE_INPROGRESS, drop lock, and after done, + * set CREATE_COMPLETE + */ +static void +netstack_do_create(void) +{ + netstack_do_apply(B_FALSE, netstack_apply_create); +} + +/* + * Run the shutdown function for all modules x stack combinations + * that have NSS_SHUTDOWN_NEEDED set. + * + * Call the shutdown function for each stack that has SHUTDOWN_NEEDED. + * Set SHUTDOWN_INPROGRESS, drop lock, and after done, + * set SHUTDOWN_COMPLETE + */ +static void +netstack_do_shutdown(void) +{ + netstack_do_apply(B_FALSE, netstack_apply_shutdown); +} + +/* + * Run the destroy function for all modules x stack combinations + * that have NSS_DESTROY_NEEDED set. + * + * Call the destroy function for each stack that has DESTROY_NEEDED. + * Set DESTROY_INPROGRESS, drop lock, and after done, + * set DESTROY_COMPLETE + * + * Since a netstack_t is never reused (when a zone is rebooted it gets + * a new zoneid == netstackid i.e. a new netstack_t is allocated) we leave + * netstack_m_state the way it is i.e. with NSS_DESTROY_COMPLETED set. + */ +static void +netstack_do_destroy(void) +{ + /* + * Have to walk the moduleids in reverse order since some + * modules make implicit assumptions about the order + */ + netstack_do_apply(B_TRUE, netstack_apply_destroy); +} + +/* + * Get the stack instance used in caller's zone. + * Increases the reference count, caller must do a netstack_rele. + * It can't be called after zone_destroy() has started. + */ +static netstack_t * +netstack_get_current(void) +{ + netstack_t *ns; + + ns = curproc->p_zone->zone_netstack; + ASSERT(ns != NULL); + if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) + return (NULL); + + netstack_hold(ns); + + return (ns); +} + +/* + * Find a stack instance given the cred. + * This is used by the modules to potentially allow for a future when + * something other than the zoneid is used to determine the stack. + */ +netstack_t * +netstack_find_by_cred(const cred_t *cr) +{ + zoneid_t zoneid = crgetzoneid(cr); + + /* Handle the case when cr_zone is NULL */ + if (zoneid == (zoneid_t)-1) + zoneid = GLOBAL_ZONEID; + + /* For performance ... */ + if (curproc->p_zone->zone_id == zoneid) + return (netstack_get_current()); + else + return (netstack_find_by_zoneid(zoneid)); +} + +/* + * Find a stack instance given the zoneid. + * Increases the reference count if found; caller must do a + * netstack_rele(). + * + * If there is no exact match then assume the shared stack instance + * matches. + * + * Skip the unitialized ones. + */ +netstack_t * +netstack_find_by_zoneid(zoneid_t zoneid) +{ + netstack_t *ns; + zone_t *zone; + + zone = zone_find_by_id(zoneid); + + if (zone == NULL) + return (NULL); + + ns = zone->zone_netstack; + ASSERT(ns != NULL); + if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) + ns = NULL; + else + netstack_hold(ns); + + zone_rele(zone); + return (ns); +} + +/* + * Find a stack instance given the zoneid. + * Increases the reference count if found; caller must do a + * netstack_rele(). + * + * If there is no exact match then assume the shared stack instance + * matches. + * + * Skip the unitialized ones. + * + * NOTE: The caller must hold zonehash_lock. + */ +netstack_t * +netstack_find_by_zoneid_nolock(zoneid_t zoneid) +{ + netstack_t *ns; + zone_t *zone; + + zone = zone_find_by_id_nolock(zoneid); + + if (zone == NULL) + return (NULL); + + ns = zone->zone_netstack; + ASSERT(ns != NULL); + + if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) + ns = NULL; + else + netstack_hold(ns); + + zone_rele(zone); + return (ns); +} + +/* + * Find a stack instance given the stackid with exact match? + * Increases the reference count if found; caller must do a + * netstack_rele(). + * + * Skip the unitialized ones. + */ +netstack_t * +netstack_find_by_stackid(netstackid_t stackid) +{ + netstack_t *ns; + + mutex_enter(&netstack_g_lock); + for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) { + mutex_enter(&ns->netstack_lock); + if (ns->netstack_stackid == stackid && + !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) { + mutex_exit(&ns->netstack_lock); + netstack_hold(ns); + mutex_exit(&netstack_g_lock); + return (ns); + } + mutex_exit(&ns->netstack_lock); + } + mutex_exit(&netstack_g_lock); + return (NULL); +} + +void +netstack_rele(netstack_t *ns) +{ + netstack_t **nsp; + boolean_t found; + int refcnt, numzones; + + mutex_enter(&ns->netstack_lock); + ASSERT(ns->netstack_refcnt > 0); + ns->netstack_refcnt--; + /* + * As we drop the lock additional netstack_rele()s can come in + * and decrement the refcnt to zero and free the netstack_t. + * Store pointers in local variables and if we were not the last + * then don't reference the netstack_t after that. + */ + refcnt = ns->netstack_refcnt; + numzones = ns->netstack_numzones; + DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns); + mutex_exit(&ns->netstack_lock); + + if (refcnt == 0 && numzones == 0) { + /* + * Time to call the destroy functions and free up + * the structure + */ + netstack_stack_inactive(ns); + + /* Finally remove from list of netstacks */ + mutex_enter(&netstack_g_lock); + found = B_FALSE; + for (nsp = &netstack_head; *nsp != NULL; + nsp = &(*nsp)->netstack_next) { + if (*nsp == ns) { + *nsp = ns->netstack_next; + ns->netstack_next = NULL; + found = B_TRUE; + break; + } + } + ASSERT(found); + mutex_exit(&netstack_g_lock); + + ASSERT(ns->netstack_flags & NSF_CLOSING); + kmem_free(ns, sizeof (*ns)); + } +} + +void +netstack_hold(netstack_t *ns) +{ + mutex_enter(&ns->netstack_lock); + ns->netstack_refcnt++; + ASSERT(ns->netstack_refcnt > 0); + mutex_exit(&ns->netstack_lock); + DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns); +} + +/* + * To support kstat_create_netstack() using kstat_zone_add we need + * to track both + * - all zoneids that use the global/shared stack + * - all kstats that have been added for the shared stack + */ +kstat_t * +kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name, + char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags, + netstackid_t ks_netstackid) +{ + kstat_t *ks; + + if (ks_netstackid == GLOBAL_NETSTACKID) { + ks = kstat_create_zone(ks_module, ks_instance, ks_name, + ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID); + if (ks != NULL) + netstack_shared_kstat_add(ks); + return (ks); + } else { + zoneid_t zoneid = ks_netstackid; + + return (kstat_create_zone(ks_module, ks_instance, ks_name, + ks_class, ks_type, ks_ndata, ks_flags, zoneid)); + } +} + +void +kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid) +{ + if (ks_netstackid == GLOBAL_NETSTACKID) { + netstack_shared_kstat_remove(ks); + } + kstat_delete(ks); +} + +static void +netstack_shared_zone_add(zoneid_t zoneid) +{ + struct shared_zone_list *sz; + struct shared_kstat_list *sk; + + sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP); + sz->sz_zoneid = zoneid; + + /* Insert in list */ + mutex_enter(&netstack_shared_lock); + sz->sz_next = netstack_shared_zones; + netstack_shared_zones = sz; + + /* + * Perform kstat_zone_add for each existing shared stack kstat. + * Note: Holds netstack_shared_lock lock across kstat_zone_add. + */ + for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) { + kstat_zone_add(sk->sk_kstat, zoneid); + } + mutex_exit(&netstack_shared_lock); +} + +static void +netstack_shared_zone_remove(zoneid_t zoneid) +{ + struct shared_zone_list **szp, *sz; + struct shared_kstat_list *sk; + + /* Find in list */ + mutex_enter(&netstack_shared_lock); + sz = NULL; + for (szp = &netstack_shared_zones; *szp != NULL; + szp = &((*szp)->sz_next)) { + if ((*szp)->sz_zoneid == zoneid) { + sz = *szp; + break; + } + } + /* We must find it */ + ASSERT(sz != NULL); + *szp = sz->sz_next; + sz->sz_next = NULL; + + /* + * Perform kstat_zone_remove for each existing shared stack kstat. + * Note: Holds netstack_shared_lock lock across kstat_zone_remove. + */ + for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) { + kstat_zone_remove(sk->sk_kstat, zoneid); + } + mutex_exit(&netstack_shared_lock); + + kmem_free(sz, sizeof (*sz)); +} + +static void +netstack_shared_kstat_add(kstat_t *ks) +{ + struct shared_zone_list *sz; + struct shared_kstat_list *sk; + + sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP); + sk->sk_kstat = ks; + + /* Insert in list */ + mutex_enter(&netstack_shared_lock); + sk->sk_next = netstack_shared_kstats; + netstack_shared_kstats = sk; + + /* + * Perform kstat_zone_add for each existing shared stack zone. + * Note: Holds netstack_shared_lock lock across kstat_zone_add. + */ + for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) { + kstat_zone_add(ks, sz->sz_zoneid); + } + mutex_exit(&netstack_shared_lock); +} + +static void +netstack_shared_kstat_remove(kstat_t *ks) +{ + struct shared_zone_list *sz; + struct shared_kstat_list **skp, *sk; + + /* Find in list */ + mutex_enter(&netstack_shared_lock); + sk = NULL; + for (skp = &netstack_shared_kstats; *skp != NULL; + skp = &((*skp)->sk_next)) { + if ((*skp)->sk_kstat == ks) { + sk = *skp; + break; + } + } + /* Must find it */ + ASSERT(sk != NULL); + *skp = sk->sk_next; + sk->sk_next = NULL; + + /* + * Perform kstat_zone_remove for each existing shared stack kstat. + * Note: Holds netstack_shared_lock lock across kstat_zone_remove. + */ + for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) { + kstat_zone_remove(ks, sz->sz_zoneid); + } + mutex_exit(&netstack_shared_lock); + kmem_free(sk, sizeof (*sk)); +} + +/* + * If a zoneid is part of the shared zone, return true + */ +static boolean_t +netstack_find_shared_zoneid(zoneid_t zoneid) +{ + struct shared_zone_list *sz; + + mutex_enter(&netstack_shared_lock); + for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) { + if (sz->sz_zoneid == zoneid) { + mutex_exit(&netstack_shared_lock); + return (B_TRUE); + } + } + mutex_exit(&netstack_shared_lock); + return (B_FALSE); +} + +/* + * Hide the fact that zoneids and netstackids are allocated from + * the same space in the current implementation. + * XXX could add checks that the stackid/zoneids are valid... + */ +zoneid_t +netstackid_to_zoneid(netstackid_t stackid) +{ + return (stackid); +} + +netstackid_t +zoneid_to_netstackid(zoneid_t zoneid) +{ + if (netstack_find_shared_zoneid(zoneid)) + return (GLOBAL_ZONEID); + else + return (zoneid); +} + +/* + * Simplistic support for walking all the handles. + * Example usage: + * netstack_handle_t nh; + * netstack_t *ns; + * + * netstack_next_init(&nh); + * while ((ns = netstack_next(&nh)) != NULL) { + * do something; + * netstack_rele(ns); + * } + * netstack_next_fini(&nh); + */ +void +netstack_next_init(netstack_handle_t *handle) +{ + *handle = 0; +} + +/* ARGSUSED */ +void +netstack_next_fini(netstack_handle_t *handle) +{ +} + +netstack_t * +netstack_next(netstack_handle_t *handle) +{ + netstack_t *ns; + int i, end; + + end = *handle; + /* Walk skipping *handle number of instances */ + + /* Look if there is a matching stack instance */ + mutex_enter(&netstack_g_lock); + ns = netstack_head; + for (i = 0; i < end; i++) { + if (ns == NULL) + break; + ns = ns->netstack_next; + } + /* skip those with that aren't really here */ + while (ns != NULL) { + mutex_enter(&ns->netstack_lock); + if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) { + mutex_exit(&ns->netstack_lock); + break; + } + mutex_exit(&ns->netstack_lock); + end++; + ns = ns->netstack_next; + } + if (ns != NULL) { + *handle = end + 1; + netstack_hold(ns); + } + mutex_exit(&netstack_g_lock); + return (ns); +} |