diff options
Diffstat (limited to 'usr/src/uts/common/inet')
-rw-r--r-- | usr/src/uts/common/inet/ipf/fil.c | 53 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ipf/ip_fil_solaris.c | 225 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ipf/ip_log.c | 10 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ipf/ip_state.c | 2 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ipf/netinet/ip_fil.h | 30 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ipf/netinet/ipf_stack.h | 4 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ipf/solaris.c | 215 |
7 files changed, 461 insertions, 78 deletions
diff --git a/usr/src/uts/common/inet/ipf/fil.c b/usr/src/uts/common/inet/ipf/fil.c index 3955d881e8..67a82d2f68 100644 --- a/usr/src/uts/common/inet/ipf/fil.c +++ b/usr/src/uts/common/inet/ipf/fil.c @@ -4,6 +4,8 @@ * See the IPFILTER.LICENCE file for details on licencing. * * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * + * Copyright (c) 2014, Joyent, Inc. All rights reserved. */ #if defined(KERNEL) || defined(_KERNEL) @@ -134,6 +136,9 @@ struct file; # endif #endif #include "netinet/ipl.h" +#if defined(_KERNEL) +#include <sys/sunddi.h> +#endif /* END OF INCLUDES */ #if !defined(lint) @@ -5696,6 +5701,54 @@ static int fr_objbytes[NUM_OBJ_TYPES][2] = { /* ------------------------------------------------------------------------ */ +/* Function: fr_getzoneid */ +/* Returns: int - 0 = success, else failure */ +/* Parameters: idsp(I) - pointer to ipf_devstate_t */ +/* data(I) - pointer to ioctl data */ +/* */ +/* Set the zone ID in idsp based on the zone name in ipfzoneobj. Further */ +/* ioctls will act on the IPF stack for that zone ID. */ +/* ------------------------------------------------------------------------ */ +#if defined(_KERNEL) +int fr_setzoneid(idsp, data) +ipf_devstate_t *idsp; +void *data; +{ + int error = 0; + ipfzoneobj_t ipfzo; + zone_t *zone; + + error = BCOPYIN(data, &ipfzo, sizeof(ipfzo)); + if (error != 0) + return EFAULT; + + if (memchr(ipfzo.ipfz_zonename, '\0', ZONENAME_MAX) == NULL) + return EFAULT; + + /* + * The global zone doesn't have a GZ-controlled stack, so no + * sense in going any further + */ + if (strcmp(ipfzo.ipfz_zonename, "global") == 0) + return ENODEV; + + if ((zone = zone_find_by_name(ipfzo.ipfz_zonename)) == NULL) + return ENODEV; + + /* + * Store the zone ID that to control, and whether it's the + * GZ-controlled stack that's wanted + */ + idsp->ipfs_zoneid = zone->zone_id; + idsp->ipfs_gz = (ipfzo.ipfz_gz == 1) ? B_TRUE : B_FALSE; + zone_rele(zone); + + return error; +} +#endif + + +/* ------------------------------------------------------------------------ */ /* Function: fr_inobj */ /* Returns: int - 0 = success, else failure */ /* Parameters: data(I) - pointer to ioctl data */ diff --git a/usr/src/uts/common/inet/ipf/ip_fil_solaris.c b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c index c6738164ee..20dc18b588 100644 --- a/usr/src/uts/common/inet/ipf/ip_fil_solaris.c +++ b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c @@ -4,6 +4,8 @@ * See the IPFILTER.LICENCE file for details on licencing. * * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * + * Copyright (c) 2013, Joyent, Inc. All rights reserved. */ #if !defined(lint) @@ -101,6 +103,54 @@ u_long *ip_forwarding = NULL; #endif #endif +vmem_t *ipf_minor; /* minor number arena */ +void *ipf_state; /* DDI state */ + +/* + * GZ-controlled and per-zone stacks: + * + * For each non-global zone, we create two ipf stacks: the per-zone stack and + * the GZ-controlled stack. The per-zone stack can be controlled and observed + * from inside the zone or from the global zone. The GZ-controlled stack can + * only be controlled and observed from the global zone (though the rules + * still only affect that non-global zone). + * + * The two hooks are always arranged so that the GZ-controlled stack is always + * "outermost" with respect to the zone. The traffic flow then looks like + * this: + * + * Inbound: + * + * nic ---> [ GZ-controlled rules ] ---> [ per-zone rules ] ---> zone + * + * Outbound: + * + * nic <--- [ GZ-controlled rules ] <--- [ per-zone rules ] <--- zone + */ + +/* IPv4 hook names */ +char *hook4_nicevents = "ipfilter_hook4_nicevents"; +char *hook4_nicevents_gz = "ipfilter_hook4_nicevents_gz"; +char *hook4_in = "ipfilter_hook4_in"; +char *hook4_in_gz = "ipfilter_hook4_in_gz"; +char *hook4_out = "ipfilter_hook4_out"; +char *hook4_out_gz = "ipfilter_hook4_out_gz"; +char *hook4_loop_in = "ipfilter_hook4_loop_in"; +char *hook4_loop_in_gz = "ipfilter_hook4_loop_in_gz"; +char *hook4_loop_out = "ipfilter_hook4_loop_out"; +char *hook4_loop_out_gz = "ipfilter_hook4_loop_out_gz"; + +/* IPv6 hook names */ +char *hook6_nicevents = "ipfilter_hook6_nicevents"; +char *hook6_nicevents_gz = "ipfilter_hook6_nicevents_gz"; +char *hook6_in = "ipfilter_hook6_in"; +char *hook6_in_gz = "ipfilter_hook6_in_gz"; +char *hook6_out = "ipfilter_hook6_out"; +char *hook6_out_gz = "ipfilter_hook6_out_gz"; +char *hook6_loop_in = "ipfilter_hook6_loop_in"; +char *hook6_loop_in_gz = "ipfilter_hook6_loop_in_gz"; +char *hook6_loop_out = "ipfilter_hook6_loop_out"; +char *hook6_loop_out_gz = "ipfilter_hook6_loop_out_gz"; /* ------------------------------------------------------------------------ */ /* Function: ipldetach */ @@ -117,7 +167,7 @@ int ipldetach(ifs) ipf_stack_t *ifs; { - ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0); + ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk)); #if SOLARIS2 < 10 @@ -244,7 +294,7 @@ ipf_stack_t *ifs; cmn_err(CE_CONT, "iplattach()\n"); #endif - ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0); + ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk)); ifs->ifs_fr_flags = IPF_LOGGING; #ifdef _KERNEL ifs->ifs_fr_update_ipid = 0; @@ -269,16 +319,39 @@ ipf_stack_t *ifs; if (fr_initialise(ifs) < 0) return -1; - HOOK_INIT(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4, - "ipfilter_hook4_nicevents", ifs); - HOOK_INIT(ifs->ifs_ipfhook4_in, ipf_hook4_in, - "ipfilter_hook4_in", ifs); - HOOK_INIT(ifs->ifs_ipfhook4_out, ipf_hook4_out, - "ipfilter_hook4_out", ifs); - HOOK_INIT(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in, - "ipfilter_hook4_loop_in", ifs); - HOOK_INIT(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out, - "ipfilter_hook4_loop_out", ifs); + /* + * For incoming packets, we want the GZ-controlled hooks to run before + * the per-zone hooks, regardless of what order they're are installed. + * See the "GZ-controlled and per-zone stacks" comment block at the top + * of this file. + */ +#define HOOK_INIT_GZ_BEFORE(x, fn, n, gzn, a) \ + HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs); \ + (x)->h_hint = ifs->ifs_gz_controlled ? HH_BEFORE : HH_AFTER; \ + (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn); + + HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4, + hook4_nicevents, hook4_nicevents_gz, ifs); + HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_in, ipf_hook4_in, + hook4_in, hook4_in_gz, ifs); + HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in, + hook4_loop_in, hook4_loop_in_gz, ifs); + + /* + * For outgoing packets, we want the GZ-controlled hooks to run after + * the per-zone hooks, regardless of what order they're are installed. + * See the "GZ-controlled and per-zone stacks" comment block at the top + * of this file. + */ +#define HOOK_INIT_GZ_AFTER(x, fn, n, gzn, a) \ + HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs); \ + (x)->h_hint = ifs->ifs_gz_controlled ? HH_AFTER : HH_BEFORE; \ + (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn); + + HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_out, ipf_hook4_out, + hook4_out, hook4_out_gz, ifs); + HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out, + hook4_loop_out, hook4_loop_out_gz, ifs); /* * If we hold this lock over all of the net_hook_register calls, we @@ -323,6 +396,7 @@ ipf_stack_t *ifs; if (!ifs->ifs_hook4_loopback_out) goto hookup_failed; } + /* * Add IPv6 hooks */ @@ -330,16 +404,16 @@ ipf_stack_t *ifs; if (ifs->ifs_ipf_ipv6 == NULL) goto hookup_failed; - HOOK_INIT(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6, - "ipfilter_hook6_nicevents", ifs); - HOOK_INIT(ifs->ifs_ipfhook6_in, ipf_hook6_in, - "ipfilter_hook6_in", ifs); - HOOK_INIT(ifs->ifs_ipfhook6_out, ipf_hook6_out, - "ipfilter_hook6_out", ifs); - HOOK_INIT(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in, - "ipfilter_hook6_loop_in", ifs); - HOOK_INIT(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out, - "ipfilter_hook6_loop_out", ifs); + HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6, + hook6_nicevents, hook6_nicevents_gz, ifs); + HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_in, ipf_hook6_in, + hook6_in, hook6_in_gz, ifs); + HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in, + hook6_loop_in, hook6_loop_in_gz, ifs); + HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_out, ipf_hook6_out, + hook6_out, hook6_out_gz, ifs); + HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out, + hook6_loop_out, hook6_loop_out_gz, ifs); ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6, NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0); @@ -519,35 +593,47 @@ int *rp; minor_t unit; u_int enable; ipf_stack_t *ifs; + zoneid_t zid; + ipf_devstate_t *isp; #ifdef IPFDEBUG cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n", dev, cmd, data, mode, cp, rp); #endif unit = getminor(dev); - if (IPL_LOGMAX < unit) + + isp = ddi_get_soft_state(ipf_state, unit); + if (isp == NULL) return ENXIO; + unit = isp->ipfs_minor; + + zid = crgetzoneid(cp); + if (cmd == SIOCIPFZONESET) { + if (zid == GLOBAL_ZONEID) + return fr_setzoneid(isp, (caddr_t) data); + return EACCES; + } /* - * As we're calling ipf_find_stack in user space, from a given zone - * to find the stack pointer for this zone, there is no need to have - * a hold/refence count here. + * ipf_find_stack returns with a read lock on ifs_ipf_global */ - ifs = ipf_find_stack(crgetzoneid(cp)); - ASSERT(ifs != NULL); + ifs = ipf_find_stack(zid, isp); + if (ifs == NULL) + return ENXIO; if (ifs->ifs_fr_running <= 0) { if (unit != IPL_LOGIPF) { + RWLOCK_EXIT(&ifs->ifs_ipf_global); return EIO; } if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET && cmd != SIOCIPFSET && cmd != SIOCFRENB && cmd != SIOCGETFS && cmd != SIOCGETFF) { + RWLOCK_EXIT(&ifs->ifs_ipf_global); return EIO; } } - READ_ENTER(&ifs->ifs_ipf_global); if (ifs->ifs_fr_enable_active != 0) { RWLOCK_EXIT(&ifs->ifs_ipf_global); return EBUSY; @@ -848,7 +934,9 @@ dev_t *devp; int flags, otype; cred_t *cred; { + ipf_devstate_t *isp; minor_t min = getminor(*devp); + minor_t minor; #ifdef IPFDEBUG cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred); @@ -856,8 +944,25 @@ cred_t *cred; if (!(otype & OTYP_CHR)) return ENXIO; - min = (IPL_LOGMAX < min) ? ENXIO : 0; - return min; + if (IPL_LOGMAX < min) + return ENXIO; + + minor = (minor_t)(uintptr_t)vmem_alloc(ipf_minor, 1, + VM_BESTFIT | VM_SLEEP); + + if (ddi_soft_state_zalloc(ipf_state, minor) != 0) { + vmem_free(ipf_minor, (void *)(uintptr_t)minor, 1); + return ENXIO; + } + + *devp = makedevice(getmajor(*devp), minor); + isp = ddi_get_soft_state(ipf_state, minor); + VERIFY(isp != NULL); + + isp->ipfs_minor = min; + isp->ipfs_zoneid = IPFS_ZONE_UNSET; + + return 0; } @@ -873,8 +978,13 @@ cred_t *cred; cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred); #endif - min = (IPL_LOGMAX < min) ? ENXIO : 0; - return min; + if (IPL_LOGMAX < min) + return ENXIO; + + ddi_soft_state_free(ipf_state, min); + vmem_free(ipf_minor, (void *)(uintptr_t)min, 1); + + return 0; } #ifdef IPFILTER_LOG @@ -892,30 +1002,41 @@ cred_t *cp; { ipf_stack_t *ifs; int ret; + minor_t unit; + ipf_devstate_t *isp; + + unit = getminor(dev); + isp = ddi_get_soft_state(ipf_state, unit); + if (isp == NULL) + return ENXIO; + unit = isp->ipfs_minor; + /* - * As we're calling ipf_find_stack in user space, from a given zone - * to find the stack pointer for this zone, there is no need to have - * a hold/refence count here. + * ipf_find_stack returns with a read lock on ifs_ipf_global */ - ifs = ipf_find_stack(crgetzoneid(cp)); - ASSERT(ifs != NULL); + ifs = ipf_find_stack(crgetzoneid(cp), isp); + if (ifs == NULL) + return ENXIO; # ifdef IPFDEBUG cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp); # endif if (ifs->ifs_fr_running < 1) { + RWLOCK_EXIT(&ifs->ifs_ipf_global); return EIO; } # ifdef IPFILTER_SYNC - if (getminor(dev) == IPL_LOGSYNC) { + if (unit == IPL_LOGSYNC) { + RWLOCK_EXIT(&ifs->ifs_ipf_global); return ipfsync_read(uio); } # endif - ret = ipflog_read(getminor(dev), uio, ifs); + ret = ipflog_read(unit, uio, ifs); + RWLOCK_EXIT(&ifs->ifs_ipf_global); return ret; } #endif /* IPFILTER_LOG */ @@ -933,30 +1054,41 @@ register struct uio *uio; cred_t *cp; { ipf_stack_t *ifs; + minor_t unit; + ipf_devstate_t *isp; + + unit = getminor(dev); + isp = ddi_get_soft_state(ipf_state, unit); + if (isp == NULL) + return ENXIO; + unit = isp->ipfs_minor; /* - * As we're calling ipf_find_stack in user space, from a given zone - * to find the stack pointer for this zone, there is no need to have - * a hold/refence count here. + * ipf_find_stack returns with a read lock on ifs_ipf_global */ - ifs = ipf_find_stack(crgetzoneid(cp)); - ASSERT(ifs != NULL); + ifs = ipf_find_stack(crgetzoneid(cp), isp); + if (ifs == NULL) + return ENXIO; #ifdef IPFDEBUG cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp); #endif if (ifs->ifs_fr_running < 1) { + RWLOCK_EXIT(&ifs->ifs_ipf_global); return EIO; } #ifdef IPFILTER_SYNC - if (getminor(dev) == IPL_LOGSYNC) + if (getminor(dev) == IPL_LOGSYNC) { + RWLOCK_EXIT(&ifs->ifs_ipf_global); return ipfsync_write(uio); + } #endif /* IPFILTER_SYNC */ dev = dev; /* LINT */ uio = uio; /* LINT */ cp = cp; /* LINT */ + RWLOCK_EXIT(&ifs->ifs_ipf_global); return ENXIO; } @@ -1998,7 +2130,6 @@ int ipf_hook6(hook_data_t info, int out, int loopback, void *arg) fw->hpe_mb = qpi.qpi_m; fw->hpe_hdr = qpi.qpi_data; return rval; - } diff --git a/usr/src/uts/common/inet/ipf/ip_log.c b/usr/src/uts/common/inet/ipf/ip_log.c index 1fe54f3a07..d60a666b2d 100644 --- a/usr/src/uts/common/inet/ipf/ip_log.c +++ b/usr/src/uts/common/inet/ipf/ip_log.c @@ -7,6 +7,8 @@ * * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2014, Joyent, Inc. All rights reserved. */ #include <sys/param.h> @@ -572,10 +574,18 @@ ipf_stack_t *ifs; while (ifs->ifs_iplt[unit] == NULL) { # if SOLARIS && defined(_KERNEL) + /* + * Prevent a deadlock with ipldetach() - see the "ipfilter + * kernel module mutexes and locking" comment block in solaris.c + * for details. + */ + RWLOCK_EXIT(&ifs->ifs_ipf_global); if (!cv_wait_sig(&ifs->ifs_iplwait, &ifs->ifs_ipl_mutex.ipf_lk)) { + READ_ENTER(&ifs->ifs_ipf_global); MUTEX_EXIT(&ifs->ifs_ipl_mutex); return EINTR; } + READ_ENTER(&ifs->ifs_ipf_global); # else # if defined(__hpux) && defined(_KERNEL) lock_t *l; diff --git a/usr/src/uts/common/inet/ipf/ip_state.c b/usr/src/uts/common/inet/ipf/ip_state.c index adf9ff9382..c1fe642d00 100644 --- a/usr/src/uts/common/inet/ipf/ip_state.c +++ b/usr/src/uts/common/inet/ipf/ip_state.c @@ -4,6 +4,8 @@ * See the IPFILTER.LICENCE file for details on licencing. * * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * + * Copyright (c) 2014, Joyent, Inc. All rights reserved. */ #if defined(KERNEL) || defined(_KERNEL) diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_fil.h b/usr/src/uts/common/inet/ipf/netinet/ip_fil.h index 0a2d6431d8..479a8044f4 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ip_fil.h +++ b/usr/src/uts/common/inet/ipf/netinet/ip_fil.h @@ -7,12 +7,15 @@ * $Id: ip_fil.h,v 2.170.2.22 2005/07/16 05:55:35 darrenr Exp $ * * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * + * Copyright (c) 2014, Joyent, Inc. All rights reserved. */ #ifndef __IP_FIL_H__ #define __IP_FIL_H__ #include "netinet/ip_compat.h" +#include <sys/zone.h> #ifndef SOLARIS # define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4))) @@ -106,6 +109,7 @@ #define SIOCADDFR SIOCADAFR #define SIOCDELFR SIOCRMAFR #define SIOCINSFR SIOCINAFR +# define SIOCIPFZONESET _IOWR('r', 97, struct ipfzoneobj) /* * What type of table is getting flushed? @@ -1165,6 +1169,26 @@ typedef struct ipfobj { u_char ipfo_xxxpad[32]; /* reserved for future use */ } ipfobj_t; +/* + * ioctl struct for setting what zone further ioctls will act on. ipfz_gz is a + * boolean: set it to 1 to operate on the GZ-controlled stack. + */ +typedef struct ipfzoneobj { + u_32_t ipfz_gz; /* GZ stack boolean */ + char ipfz_zonename[ZONENAME_MAX]; /* zone to act on */ +} ipfzoneobj_t; + +#if defined(_KERNEL) +/* Set ipfs_zoneid to this if no zone has been set: */ +#define IPFS_ZONE_UNSET -2 + +typedef struct ipf_devstate { + zoneid_t ipfs_zoneid; + minor_t ipfs_minor; + boolean_t ipfs_gz; +} ipf_devstate_t; +#endif + #define IPFOBJ_FRENTRY 0 /* struct frentry */ #define IPFOBJ_IPFSTAT 1 /* struct friostat */ #define IPFOBJ_IPFINFO 2 /* struct fr_info */ @@ -1352,7 +1376,6 @@ extern void ipfilterattach __P((int)); extern int ipl_enable __P((void)); extern int ipl_disable __P((void)); # ifdef MENTAT -extern ipf_stack_t *ipf_find_stack(const zoneid_t zone); extern int fr_check __P((struct ip *, int, void *, int, void *, mblk_t **, ipf_stack_t *)); # if SOLARIS @@ -1365,6 +1388,7 @@ extern int iplioctl __P((dev_t, int, int *, int, cred_t *, int *)); extern int fr_make_rst __P((fr_info_t *)); extern int fr_make_icmp __P((fr_info_t *)); extern void fr_calc_chksum __P((fr_info_t *, mb_t *)); +extern ipf_stack_t *ipf_find_stack(const zoneid_t, ipf_devstate_t *); # endif extern int iplopen __P((dev_t *, int, int, cred_t *)); extern int iplclose __P((dev_t, int, int, cred_t *)); @@ -1576,6 +1600,10 @@ extern int ipf_earlydrop __P((int, ipftq_t *, int, ipf_stack_t *)); extern u_32_t ipf_random __P((void)); #endif +#if defined(_KERNEL) +extern int fr_setzoneid __P((ipf_devstate_t *, void *)); +#endif + extern char ipfilter_version[]; #ifdef USE_INET6 extern int icmptoicmp6types[ICMP_MAXTYPE+1]; diff --git a/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h b/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h index 75703994a5..a239f1c1ca 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h +++ b/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h @@ -5,6 +5,8 @@ * * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * + * Copyright 2014 Joyent, Inc. All rights reserved. */ #ifndef __IPF_STACK_H__ @@ -41,8 +43,10 @@ struct ipf_stack { struct ipf_stack *ifs_next; struct ipf_stack **ifs_pnext; + struct ipf_stack *ifs_gz_cont_ifs; netid_t ifs_netid; zoneid_t ifs_zone; + boolean_t ifs_gz_controlled; /* ipf module */ fr_info_t ifs_frcache[2][8]; diff --git a/usr/src/uts/common/inet/ipf/solaris.c b/usr/src/uts/common/inet/ipf/solaris.c index 1a1d94def5..c541f4dddc 100644 --- a/usr/src/uts/common/inet/ipf/solaris.c +++ b/usr/src/uts/common/inet/ipf/solaris.c @@ -5,6 +5,50 @@ * * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2014, Joyent, Inc. All rights reserved. + */ + +/* + * ipfilter kernel module mutexes and locking: + * + * Enabling ipfilter creates a per-netstack ipf_stack_t object that is + * stored in the ipf_stacks list, which is protected by ipf_stack_lock. + * ipf_stack_t objects are accessed in three contexts: + * + * 1) administering that filter (eg: ioctls handled with iplioctl()) + * 2) reading log data (eg: iplread() / iplwrite()) + * 3) filtering packets (eg: ipf_hook4_* and ipf_hook6_* pfhooks + * functions) + * + * Each ipf_stack_t has a RW lock, ifs_ipf_global, protecting access to the + * whole structure. The structure also has locks protecting the various + * data structures used for filtering. The following guidelines should be + * followed for ipf_stack_t locks: + * + * - ipf_stack_lock must be held when accessing the ipf_stacks list + * - ipf_stack_lock should be held before acquiring ifs_ipf_global for + * a stack (the exception to this is ipf_stack_destroy(), which removes + * the ipf_stack_t from the list, then drops ipf_stack_lock before + * acquiring ifs_ipf_global) + * - ifs_ipf_global must be held when accessing an ipf_stack_t in that list: + * - The write lock is held only during stack creation / destruction + * - The read lock should be held for all other accesses + * - To alter the filtering data in the administrative context, one must: + * - acquire the read lock for ifs_ipf_global + * - then acquire the write lock for the data in question + * - In the filtering path, the read lock needs to be held for each type of + * filtering data used + * - ifs_ipf_global does not need to be held in the filtering path: + * - The filtering hooks don't need to modify the stack itself + * - The ipf_stack_t will not be destroyed until the hooks are unregistered. + * This requires a write lock on the hook, ensuring that no active hooks + * (eg: the filtering path) are running, and that the hooks won't be run + * afterward. + * + * Note that there is a deadlock possible when calling net_hook_register() + * or net_hook_unregister() with ifs_ipf_global held: see the comments in + * iplattach() and ipldetach() for details. */ #include <sys/systm.h> @@ -73,7 +117,8 @@ static int ipf_property_g_update __P((dev_info_t *)); static char *ipf_devfiles[] = { IPL_NAME, IPNAT_NAME, IPSTATE_NAME, IPAUTH_NAME, IPSYNC_NAME, IPSCAN_NAME, IPLOOKUP_NAME, NULL }; - +extern void *ipf_state; /* DDI state */ +extern vmem_t *ipf_minor; /* minor number arena */ static struct cb_ops ipf_cb_ops = { iplopen, @@ -221,10 +266,11 @@ static const filter_kstats_t ipf_kstat_tmp = { static int ipf_kstat_update(kstat_t *ksp, int rwflag); static void -ipf_kstat_init(ipf_stack_t *ifs) +ipf_kstat_init(ipf_stack_t *ifs, boolean_t from_gz) { - ifs->ifs_kstatp[0] = net_kstat_create(ifs->ifs_netid, "ipf", 0, - "inbound", "net", KSTAT_TYPE_NAMED, + ifs->ifs_kstatp[0] = net_kstat_create(ifs->ifs_netid, + (from_gz ? "ipf_gz" : "ipf"), + 0, "inbound", "net", KSTAT_TYPE_NAMED, sizeof (filter_kstats_t) / sizeof (kstat_named_t), 0); if (ifs->ifs_kstatp[0] != NULL) { bcopy(&ipf_kstat_tmp, ifs->ifs_kstatp[0]->ks_data, @@ -234,8 +280,9 @@ ipf_kstat_init(ipf_stack_t *ifs) kstat_install(ifs->ifs_kstatp[0]); } - ifs->ifs_kstatp[1] = net_kstat_create(ifs->ifs_netid, "ipf", 0, - "outbound", "net", KSTAT_TYPE_NAMED, + ifs->ifs_kstatp[1] = net_kstat_create(ifs->ifs_netid, + (from_gz ? "ipf_gz" : "ipf"), + 0, "outbound", "net", KSTAT_TYPE_NAMED, sizeof (filter_kstats_t) / sizeof (kstat_named_t), 0); if (ifs->ifs_kstatp[1] != NULL) { bcopy(&ipf_kstat_tmp, ifs->ifs_kstatp[1]->ks_data, @@ -369,12 +416,14 @@ dev_info_t *dip; * Initialize things for IPF for each stack instance */ static void * -ipf_stack_create(const netid_t id) +ipf_stack_create_one(const netid_t id, const zoneid_t zid, boolean_t from_gz, + ipf_stack_t *ifs_gz) { ipf_stack_t *ifs; #ifdef IPFDEBUG - cmn_err(CE_NOTE, "IP Filter:stack_create id=%d", id); + cmn_err(CE_NOTE, "IP Filter:stack_create_one id=%d global=%d", id, + global); #endif ifs = (ipf_stack_t *)kmem_alloc(sizeof (*ifs), KM_SLEEP); @@ -398,8 +447,11 @@ ipf_stack_create(const netid_t id) RWLOCK_INIT(&ifs->ifs_ipf_mutex, "ipf filter rwlock"); RWLOCK_INIT(&ifs->ifs_ipf_frcache, "ipf cache rwlock"); ifs->ifs_netid = id; - ifs->ifs_zone = net_getzoneidbynetid(id); - ipf_kstat_init(ifs); + ifs->ifs_zone = zid; + ifs->ifs_gz_controlled = from_gz; + ifs->ifs_gz_cont_ifs = ifs_gz; + + ipf_kstat_init(ifs, from_gz); #ifdef IPFDEBUG cmn_err(CE_CONT, "IP Filter:stack_create zone=%d", ifs->ifs_zone); @@ -427,31 +479,84 @@ ipf_stack_create(const netid_t id) return (ifs); } +static void * +ipf_stack_create(const netid_t id) +{ + ipf_stack_t *ifs = NULL; + zoneid_t zid = net_getzoneidbynetid(id); + + /* + * Create two ipfilter stacks for a zone - the first can only be + * controlled from the global zone, and the second is owned by + * the zone itself. There is no need to create a GZ-controlled + * stack for the global zone, since we're already in the global + * zone. See the "GZ-controlled and per-zone stacks" comment block in + * ip_fil_solaris.c for details. + */ + if (zid != GLOBAL_ZONEID) + ifs = ipf_stack_create_one(id, zid, B_TRUE, NULL); + + return (ipf_stack_create_one(id, zid, B_FALSE, ifs)); +} /* - * This function should only ever be used to find the pointer to the - * ipfilter stack structure for the zone that is currently being - * executed... so if you're running in the context of zone 1, you - * should not attempt to find the ipf_stack_t for zone 0 or 2 or - * anything else but 1. In that way, the returned pointer is safe - * as it will only be nuked when the instance is destroyed as part - * of the final shutdown of a zone. + * Find an ipfilter stack for the given zone. Return the GZ-controlled or + * per-zone stack if set by an earlier SIOCIPFZONESET ioctl call. See the + * "GZ-controlled and per-zone stacks" comment block in ip_fil_solaris.c for + * details. + * + * This function returns with the ipf_stack_t's ifs_ipf_global + * read lock held (if the stack is found). See the "ipfilter kernel module + * mutexes and locking" comment block at the top of this file. */ ipf_stack_t * -ipf_find_stack(const zoneid_t zone) +ipf_find_stack(const zoneid_t orig_zone, ipf_devstate_t *isp) { ipf_stack_t *ifs; + boolean_t gz_stack; + zoneid_t zone; + + /* + * If we're in the GZ, determine if we're acting on a zone's stack, + * and whether or not that stack is the GZ-controlled or in-zone + * one. See the "GZ and per-zone stacks" note at the top of this + * file. + */ + if (orig_zone == GLOBAL_ZONEID && + (isp->ipfs_zoneid != IPFS_ZONE_UNSET)) { + /* Global zone, and we've set the zoneid for this fd already */ + + if (orig_zone == isp->ipfs_zoneid) { + /* There's only a per-zone stack for the GZ */ + gz_stack = B_FALSE; + } else { + gz_stack = isp->ipfs_gz; + } + + zone = isp->ipfs_zoneid; + } else { + /* + * Non-global zone or GZ without having set a zoneid: act on + * the per-zone stack of the zone that this ioctl originated + * from. + */ + gz_stack = B_FALSE; + zone = orig_zone; + } mutex_enter(&ipf_stack_lock); for (ifs = ipf_stacks; ifs != NULL; ifs = ifs->ifs_next) { - if (ifs->ifs_zone == zone) + if (ifs->ifs_zone == zone && ifs->ifs_gz_controlled == gz_stack) break; } + + if (ifs != NULL) { + READ_ENTER(&ifs->ifs_ipf_global); + } mutex_exit(&ipf_stack_lock); return (ifs); } - static int ipf_detach_check_zone(ipf_stack_t *ifs) { /* @@ -495,7 +600,8 @@ static int ipf_detach_check_all() /* - * Destroy things for ipf for one stack. + * Remove ipf kstats for both the per-zone ipf stack and the + * GZ-controlled stack for the same zone, if it exists. */ /* ARGSUSED */ static void @@ -503,6 +609,15 @@ ipf_stack_shutdown(const netid_t id, void *arg) { ipf_stack_t *ifs = (ipf_stack_t *)arg; + /* + * The GZ-controlled stack + */ + if (ifs->ifs_gz_cont_ifs != NULL) + ipf_kstat_fini(ifs->ifs_gz_cont_ifs); + + /* + * The per-zone stack + */ ipf_kstat_fini(ifs); } @@ -512,13 +627,12 @@ ipf_stack_shutdown(const netid_t id, void *arg) */ /* ARGSUSED */ static void -ipf_stack_destroy(const netid_t id, void *arg) +ipf_stack_destroy_one(const netid_t id, ipf_stack_t *ifs) { - ipf_stack_t *ifs = (ipf_stack_t *)arg; timeout_id_t tid; #ifdef IPFDEBUG - (void) printf("ipf_stack_destroy(%p)\n", (void *)ifs); + (void) printf("ipf_stack_destroy_one(%p)\n", (void *)ifs); #endif /* @@ -546,7 +660,7 @@ ipf_stack_destroy(const netid_t id, void *arg) WRITE_ENTER(&ifs->ifs_ipf_global); if (ipldetach(ifs) != 0) { - printf("ipf_stack_destroy: ipldetach failed\n"); + printf("ipf_stack_destroy_one: ipldetach failed\n"); } ipftuneable_free(ifs); @@ -560,6 +674,30 @@ ipf_stack_destroy(const netid_t id, void *arg) } +/* + * Destroy things for ipf for both the per-zone ipf stack and the + * GZ-controlled stack for the same zone, if it exists. See the "GZ-controlled + * and per-zone stacks" comment block in ip_fil_solaris.c for details. + */ +/* ARGSUSED */ +static void +ipf_stack_destroy(const netid_t id, void *arg) +{ + ipf_stack_t *ifs = (ipf_stack_t *)arg; + + /* + * The GZ-controlled stack + */ + if (ifs->ifs_gz_cont_ifs != NULL) + ipf_stack_destroy_one(id, ifs->ifs_gz_cont_ifs); + + /* + * The per-zone stack + */ + ipf_stack_destroy_one(id, ifs); +} + + static int ipf_attach(dip, cmd) dev_info_t *dip; ddi_attach_cmd_t cmd; @@ -586,27 +724,39 @@ ddi_attach_cmd_t cmd; (void) ipf_property_g_update(dip); + if (ddi_soft_state_init(&ipf_state, sizeof (ipf_devstate_t), 1) + != 0) { + ddi_prop_remove_all(dip); + return (DDI_FAILURE); + } + for (i = 0; ((s = ipf_devfiles[i]) != NULL); i++) { s = strrchr(s, '/'); if (s == NULL) continue; s++; if (ddi_create_minor_node(dip, s, S_IFCHR, i, - DDI_PSEUDO, 0) == - DDI_FAILURE) { - ddi_remove_minor_node(dip, NULL); + DDI_PSEUDO, 0) == DDI_FAILURE) goto attach_failed; - } } ipf_dev_info = dip; ipfncb = net_instance_alloc(NETINFO_VERSION); + if (ipfncb == NULL) + goto attach_failed; + ipfncb->nin_name = "ipf"; ipfncb->nin_create = ipf_stack_create; ipfncb->nin_destroy = ipf_stack_destroy; ipfncb->nin_shutdown = ipf_stack_shutdown; - i = net_instance_register(ipfncb); + if (net_instance_register(ipfncb) == DDI_FAILURE) { + net_instance_free(ipfncb); + goto attach_failed; + } + + ipf_minor = vmem_create("ipf_minor", (void *)1, UINT32_MAX - 1, + 1, NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER); #ifdef IPFDEBUG cmn_err(CE_CONT, "IP Filter:stack_create callback_reg=%d", i); @@ -619,7 +769,9 @@ ddi_attach_cmd_t cmd; } attach_failed: + ddi_remove_minor_node(dip, NULL); ddi_prop_remove_all(dip); + ddi_soft_state_fini(&ipf_state); return (DDI_FAILURE); } @@ -652,6 +804,9 @@ ddi_detach_cmd_t cmd; return (DDI_FAILURE); } + vmem_destroy(ipf_minor); + ddi_soft_state_fini(&ipf_state); + (void) net_instance_unregister(ipfncb); net_instance_free(ipfncb); |