diff options
Diffstat (limited to 'usr/src/uts/common/inet/ipf/solaris.c')
-rw-r--r-- | usr/src/uts/common/inet/ipf/solaris.c | 215 |
1 files changed, 185 insertions, 30 deletions
diff --git a/usr/src/uts/common/inet/ipf/solaris.c b/usr/src/uts/common/inet/ipf/solaris.c index 1a1d94def5..c541f4dddc 100644 --- a/usr/src/uts/common/inet/ipf/solaris.c +++ b/usr/src/uts/common/inet/ipf/solaris.c @@ -5,6 +5,50 @@ * * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2014, Joyent, Inc. All rights reserved. + */ + +/* + * ipfilter kernel module mutexes and locking: + * + * Enabling ipfilter creates a per-netstack ipf_stack_t object that is + * stored in the ipf_stacks list, which is protected by ipf_stack_lock. + * ipf_stack_t objects are accessed in three contexts: + * + * 1) administering that filter (eg: ioctls handled with iplioctl()) + * 2) reading log data (eg: iplread() / iplwrite()) + * 3) filtering packets (eg: ipf_hook4_* and ipf_hook6_* pfhooks + * functions) + * + * Each ipf_stack_t has a RW lock, ifs_ipf_global, protecting access to the + * whole structure. The structure also has locks protecting the various + * data structures used for filtering. The following guidelines should be + * followed for ipf_stack_t locks: + * + * - ipf_stack_lock must be held when accessing the ipf_stacks list + * - ipf_stack_lock should be held before acquiring ifs_ipf_global for + * a stack (the exception to this is ipf_stack_destroy(), which removes + * the ipf_stack_t from the list, then drops ipf_stack_lock before + * acquiring ifs_ipf_global) + * - ifs_ipf_global must be held when accessing an ipf_stack_t in that list: + * - The write lock is held only during stack creation / destruction + * - The read lock should be held for all other accesses + * - To alter the filtering data in the administrative context, one must: + * - acquire the read lock for ifs_ipf_global + * - then acquire the write lock for the data in question + * - In the filtering path, the read lock needs to be held for each type of + * filtering data used + * - ifs_ipf_global does not need to be held in the filtering path: + * - The filtering hooks don't need to modify the stack itself + * - The ipf_stack_t will not be destroyed until the hooks are unregistered. + * This requires a write lock on the hook, ensuring that no active hooks + * (eg: the filtering path) are running, and that the hooks won't be run + * afterward. + * + * Note that there is a deadlock possible when calling net_hook_register() + * or net_hook_unregister() with ifs_ipf_global held: see the comments in + * iplattach() and ipldetach() for details. */ #include <sys/systm.h> @@ -73,7 +117,8 @@ static int ipf_property_g_update __P((dev_info_t *)); static char *ipf_devfiles[] = { IPL_NAME, IPNAT_NAME, IPSTATE_NAME, IPAUTH_NAME, IPSYNC_NAME, IPSCAN_NAME, IPLOOKUP_NAME, NULL }; - +extern void *ipf_state; /* DDI state */ +extern vmem_t *ipf_minor; /* minor number arena */ static struct cb_ops ipf_cb_ops = { iplopen, @@ -221,10 +266,11 @@ static const filter_kstats_t ipf_kstat_tmp = { static int ipf_kstat_update(kstat_t *ksp, int rwflag); static void -ipf_kstat_init(ipf_stack_t *ifs) +ipf_kstat_init(ipf_stack_t *ifs, boolean_t from_gz) { - ifs->ifs_kstatp[0] = net_kstat_create(ifs->ifs_netid, "ipf", 0, - "inbound", "net", KSTAT_TYPE_NAMED, + ifs->ifs_kstatp[0] = net_kstat_create(ifs->ifs_netid, + (from_gz ? "ipf_gz" : "ipf"), + 0, "inbound", "net", KSTAT_TYPE_NAMED, sizeof (filter_kstats_t) / sizeof (kstat_named_t), 0); if (ifs->ifs_kstatp[0] != NULL) { bcopy(&ipf_kstat_tmp, ifs->ifs_kstatp[0]->ks_data, @@ -234,8 +280,9 @@ ipf_kstat_init(ipf_stack_t *ifs) kstat_install(ifs->ifs_kstatp[0]); } - ifs->ifs_kstatp[1] = net_kstat_create(ifs->ifs_netid, "ipf", 0, - "outbound", "net", KSTAT_TYPE_NAMED, + ifs->ifs_kstatp[1] = net_kstat_create(ifs->ifs_netid, + (from_gz ? "ipf_gz" : "ipf"), + 0, "outbound", "net", KSTAT_TYPE_NAMED, sizeof (filter_kstats_t) / sizeof (kstat_named_t), 0); if (ifs->ifs_kstatp[1] != NULL) { bcopy(&ipf_kstat_tmp, ifs->ifs_kstatp[1]->ks_data, @@ -369,12 +416,14 @@ dev_info_t *dip; * Initialize things for IPF for each stack instance */ static void * -ipf_stack_create(const netid_t id) +ipf_stack_create_one(const netid_t id, const zoneid_t zid, boolean_t from_gz, + ipf_stack_t *ifs_gz) { ipf_stack_t *ifs; #ifdef IPFDEBUG - cmn_err(CE_NOTE, "IP Filter:stack_create id=%d", id); + cmn_err(CE_NOTE, "IP Filter:stack_create_one id=%d global=%d", id, + global); #endif ifs = (ipf_stack_t *)kmem_alloc(sizeof (*ifs), KM_SLEEP); @@ -398,8 +447,11 @@ ipf_stack_create(const netid_t id) RWLOCK_INIT(&ifs->ifs_ipf_mutex, "ipf filter rwlock"); RWLOCK_INIT(&ifs->ifs_ipf_frcache, "ipf cache rwlock"); ifs->ifs_netid = id; - ifs->ifs_zone = net_getzoneidbynetid(id); - ipf_kstat_init(ifs); + ifs->ifs_zone = zid; + ifs->ifs_gz_controlled = from_gz; + ifs->ifs_gz_cont_ifs = ifs_gz; + + ipf_kstat_init(ifs, from_gz); #ifdef IPFDEBUG cmn_err(CE_CONT, "IP Filter:stack_create zone=%d", ifs->ifs_zone); @@ -427,31 +479,84 @@ ipf_stack_create(const netid_t id) return (ifs); } +static void * +ipf_stack_create(const netid_t id) +{ + ipf_stack_t *ifs = NULL; + zoneid_t zid = net_getzoneidbynetid(id); + + /* + * Create two ipfilter stacks for a zone - the first can only be + * controlled from the global zone, and the second is owned by + * the zone itself. There is no need to create a GZ-controlled + * stack for the global zone, since we're already in the global + * zone. See the "GZ-controlled and per-zone stacks" comment block in + * ip_fil_solaris.c for details. + */ + if (zid != GLOBAL_ZONEID) + ifs = ipf_stack_create_one(id, zid, B_TRUE, NULL); + + return (ipf_stack_create_one(id, zid, B_FALSE, ifs)); +} /* - * This function should only ever be used to find the pointer to the - * ipfilter stack structure for the zone that is currently being - * executed... so if you're running in the context of zone 1, you - * should not attempt to find the ipf_stack_t for zone 0 or 2 or - * anything else but 1. In that way, the returned pointer is safe - * as it will only be nuked when the instance is destroyed as part - * of the final shutdown of a zone. + * Find an ipfilter stack for the given zone. Return the GZ-controlled or + * per-zone stack if set by an earlier SIOCIPFZONESET ioctl call. See the + * "GZ-controlled and per-zone stacks" comment block in ip_fil_solaris.c for + * details. + * + * This function returns with the ipf_stack_t's ifs_ipf_global + * read lock held (if the stack is found). See the "ipfilter kernel module + * mutexes and locking" comment block at the top of this file. */ ipf_stack_t * -ipf_find_stack(const zoneid_t zone) +ipf_find_stack(const zoneid_t orig_zone, ipf_devstate_t *isp) { ipf_stack_t *ifs; + boolean_t gz_stack; + zoneid_t zone; + + /* + * If we're in the GZ, determine if we're acting on a zone's stack, + * and whether or not that stack is the GZ-controlled or in-zone + * one. See the "GZ and per-zone stacks" note at the top of this + * file. + */ + if (orig_zone == GLOBAL_ZONEID && + (isp->ipfs_zoneid != IPFS_ZONE_UNSET)) { + /* Global zone, and we've set the zoneid for this fd already */ + + if (orig_zone == isp->ipfs_zoneid) { + /* There's only a per-zone stack for the GZ */ + gz_stack = B_FALSE; + } else { + gz_stack = isp->ipfs_gz; + } + + zone = isp->ipfs_zoneid; + } else { + /* + * Non-global zone or GZ without having set a zoneid: act on + * the per-zone stack of the zone that this ioctl originated + * from. + */ + gz_stack = B_FALSE; + zone = orig_zone; + } mutex_enter(&ipf_stack_lock); for (ifs = ipf_stacks; ifs != NULL; ifs = ifs->ifs_next) { - if (ifs->ifs_zone == zone) + if (ifs->ifs_zone == zone && ifs->ifs_gz_controlled == gz_stack) break; } + + if (ifs != NULL) { + READ_ENTER(&ifs->ifs_ipf_global); + } mutex_exit(&ipf_stack_lock); return (ifs); } - static int ipf_detach_check_zone(ipf_stack_t *ifs) { /* @@ -495,7 +600,8 @@ static int ipf_detach_check_all() /* - * Destroy things for ipf for one stack. + * Remove ipf kstats for both the per-zone ipf stack and the + * GZ-controlled stack for the same zone, if it exists. */ /* ARGSUSED */ static void @@ -503,6 +609,15 @@ ipf_stack_shutdown(const netid_t id, void *arg) { ipf_stack_t *ifs = (ipf_stack_t *)arg; + /* + * The GZ-controlled stack + */ + if (ifs->ifs_gz_cont_ifs != NULL) + ipf_kstat_fini(ifs->ifs_gz_cont_ifs); + + /* + * The per-zone stack + */ ipf_kstat_fini(ifs); } @@ -512,13 +627,12 @@ ipf_stack_shutdown(const netid_t id, void *arg) */ /* ARGSUSED */ static void -ipf_stack_destroy(const netid_t id, void *arg) +ipf_stack_destroy_one(const netid_t id, ipf_stack_t *ifs) { - ipf_stack_t *ifs = (ipf_stack_t *)arg; timeout_id_t tid; #ifdef IPFDEBUG - (void) printf("ipf_stack_destroy(%p)\n", (void *)ifs); + (void) printf("ipf_stack_destroy_one(%p)\n", (void *)ifs); #endif /* @@ -546,7 +660,7 @@ ipf_stack_destroy(const netid_t id, void *arg) WRITE_ENTER(&ifs->ifs_ipf_global); if (ipldetach(ifs) != 0) { - printf("ipf_stack_destroy: ipldetach failed\n"); + printf("ipf_stack_destroy_one: ipldetach failed\n"); } ipftuneable_free(ifs); @@ -560,6 +674,30 @@ ipf_stack_destroy(const netid_t id, void *arg) } +/* + * Destroy things for ipf for both the per-zone ipf stack and the + * GZ-controlled stack for the same zone, if it exists. See the "GZ-controlled + * and per-zone stacks" comment block in ip_fil_solaris.c for details. + */ +/* ARGSUSED */ +static void +ipf_stack_destroy(const netid_t id, void *arg) +{ + ipf_stack_t *ifs = (ipf_stack_t *)arg; + + /* + * The GZ-controlled stack + */ + if (ifs->ifs_gz_cont_ifs != NULL) + ipf_stack_destroy_one(id, ifs->ifs_gz_cont_ifs); + + /* + * The per-zone stack + */ + ipf_stack_destroy_one(id, ifs); +} + + static int ipf_attach(dip, cmd) dev_info_t *dip; ddi_attach_cmd_t cmd; @@ -586,27 +724,39 @@ ddi_attach_cmd_t cmd; (void) ipf_property_g_update(dip); + if (ddi_soft_state_init(&ipf_state, sizeof (ipf_devstate_t), 1) + != 0) { + ddi_prop_remove_all(dip); + return (DDI_FAILURE); + } + for (i = 0; ((s = ipf_devfiles[i]) != NULL); i++) { s = strrchr(s, '/'); if (s == NULL) continue; s++; if (ddi_create_minor_node(dip, s, S_IFCHR, i, - DDI_PSEUDO, 0) == - DDI_FAILURE) { - ddi_remove_minor_node(dip, NULL); + DDI_PSEUDO, 0) == DDI_FAILURE) goto attach_failed; - } } ipf_dev_info = dip; ipfncb = net_instance_alloc(NETINFO_VERSION); + if (ipfncb == NULL) + goto attach_failed; + ipfncb->nin_name = "ipf"; ipfncb->nin_create = ipf_stack_create; ipfncb->nin_destroy = ipf_stack_destroy; ipfncb->nin_shutdown = ipf_stack_shutdown; - i = net_instance_register(ipfncb); + if (net_instance_register(ipfncb) == DDI_FAILURE) { + net_instance_free(ipfncb); + goto attach_failed; + } + + ipf_minor = vmem_create("ipf_minor", (void *)1, UINT32_MAX - 1, + 1, NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER); #ifdef IPFDEBUG cmn_err(CE_CONT, "IP Filter:stack_create callback_reg=%d", i); @@ -619,7 +769,9 @@ ddi_attach_cmd_t cmd; } attach_failed: + ddi_remove_minor_node(dip, NULL); ddi_prop_remove_all(dip); + ddi_soft_state_fini(&ipf_state); return (DDI_FAILURE); } @@ -652,6 +804,9 @@ ddi_detach_cmd_t cmd; return (DDI_FAILURE); } + vmem_destroy(ipf_minor); + ddi_soft_state_fini(&ipf_state); + (void) net_instance_unregister(ipfncb); net_instance_free(ipfncb); |