summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/inet
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/inet')
-rw-r--r--usr/src/uts/common/inet/ipf/fil.c53
-rw-r--r--usr/src/uts/common/inet/ipf/ip_fil_solaris.c225
-rw-r--r--usr/src/uts/common/inet/ipf/ip_log.c10
-rw-r--r--usr/src/uts/common/inet/ipf/ip_state.c2
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_fil.h30
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ipf_stack.h4
-rw-r--r--usr/src/uts/common/inet/ipf/solaris.c215
7 files changed, 461 insertions, 78 deletions
diff --git a/usr/src/uts/common/inet/ipf/fil.c b/usr/src/uts/common/inet/ipf/fil.c
index 3955d881e8..67a82d2f68 100644
--- a/usr/src/uts/common/inet/ipf/fil.c
+++ b/usr/src/uts/common/inet/ipf/fil.c
@@ -4,6 +4,8 @@
* See the IPFILTER.LICENCE file for details on licencing.
*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#if defined(KERNEL) || defined(_KERNEL)
@@ -134,6 +136,9 @@ struct file;
# endif
#endif
#include "netinet/ipl.h"
+#if defined(_KERNEL)
+#include <sys/sunddi.h>
+#endif
/* END OF INCLUDES */
#if !defined(lint)
@@ -5696,6 +5701,54 @@ static int fr_objbytes[NUM_OBJ_TYPES][2] = {
/* ------------------------------------------------------------------------ */
+/* Function: fr_getzoneid */
+/* Returns: int - 0 = success, else failure */
+/* Parameters: idsp(I) - pointer to ipf_devstate_t */
+/* data(I) - pointer to ioctl data */
+/* */
+/* Set the zone ID in idsp based on the zone name in ipfzoneobj. Further */
+/* ioctls will act on the IPF stack for that zone ID. */
+/* ------------------------------------------------------------------------ */
+#if defined(_KERNEL)
+int fr_setzoneid(idsp, data)
+ipf_devstate_t *idsp;
+void *data;
+{
+ int error = 0;
+ ipfzoneobj_t ipfzo;
+ zone_t *zone;
+
+ error = BCOPYIN(data, &ipfzo, sizeof(ipfzo));
+ if (error != 0)
+ return EFAULT;
+
+ if (memchr(ipfzo.ipfz_zonename, '\0', ZONENAME_MAX) == NULL)
+ return EFAULT;
+
+ /*
+ * The global zone doesn't have a GZ-controlled stack, so no
+ * sense in going any further
+ */
+ if (strcmp(ipfzo.ipfz_zonename, "global") == 0)
+ return ENODEV;
+
+ if ((zone = zone_find_by_name(ipfzo.ipfz_zonename)) == NULL)
+ return ENODEV;
+
+ /*
+ * Store the zone ID that to control, and whether it's the
+ * GZ-controlled stack that's wanted
+ */
+ idsp->ipfs_zoneid = zone->zone_id;
+ idsp->ipfs_gz = (ipfzo.ipfz_gz == 1) ? B_TRUE : B_FALSE;
+ zone_rele(zone);
+
+ return error;
+}
+#endif
+
+
+/* ------------------------------------------------------------------------ */
/* Function: fr_inobj */
/* Returns: int - 0 = success, else failure */
/* Parameters: data(I) - pointer to ioctl data */
diff --git a/usr/src/uts/common/inet/ipf/ip_fil_solaris.c b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c
index c6738164ee..20dc18b588 100644
--- a/usr/src/uts/common/inet/ipf/ip_fil_solaris.c
+++ b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c
@@ -4,6 +4,8 @@
* See the IPFILTER.LICENCE file for details on licencing.
*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
#if !defined(lint)
@@ -101,6 +103,54 @@ u_long *ip_forwarding = NULL;
#endif
#endif
+vmem_t *ipf_minor; /* minor number arena */
+void *ipf_state; /* DDI state */
+
+/*
+ * GZ-controlled and per-zone stacks:
+ *
+ * For each non-global zone, we create two ipf stacks: the per-zone stack and
+ * the GZ-controlled stack. The per-zone stack can be controlled and observed
+ * from inside the zone or from the global zone. The GZ-controlled stack can
+ * only be controlled and observed from the global zone (though the rules
+ * still only affect that non-global zone).
+ *
+ * The two hooks are always arranged so that the GZ-controlled stack is always
+ * "outermost" with respect to the zone. The traffic flow then looks like
+ * this:
+ *
+ * Inbound:
+ *
+ * nic ---> [ GZ-controlled rules ] ---> [ per-zone rules ] ---> zone
+ *
+ * Outbound:
+ *
+ * nic <--- [ GZ-controlled rules ] <--- [ per-zone rules ] <--- zone
+ */
+
+/* IPv4 hook names */
+char *hook4_nicevents = "ipfilter_hook4_nicevents";
+char *hook4_nicevents_gz = "ipfilter_hook4_nicevents_gz";
+char *hook4_in = "ipfilter_hook4_in";
+char *hook4_in_gz = "ipfilter_hook4_in_gz";
+char *hook4_out = "ipfilter_hook4_out";
+char *hook4_out_gz = "ipfilter_hook4_out_gz";
+char *hook4_loop_in = "ipfilter_hook4_loop_in";
+char *hook4_loop_in_gz = "ipfilter_hook4_loop_in_gz";
+char *hook4_loop_out = "ipfilter_hook4_loop_out";
+char *hook4_loop_out_gz = "ipfilter_hook4_loop_out_gz";
+
+/* IPv6 hook names */
+char *hook6_nicevents = "ipfilter_hook6_nicevents";
+char *hook6_nicevents_gz = "ipfilter_hook6_nicevents_gz";
+char *hook6_in = "ipfilter_hook6_in";
+char *hook6_in_gz = "ipfilter_hook6_in_gz";
+char *hook6_out = "ipfilter_hook6_out";
+char *hook6_out_gz = "ipfilter_hook6_out_gz";
+char *hook6_loop_in = "ipfilter_hook6_loop_in";
+char *hook6_loop_in_gz = "ipfilter_hook6_loop_in_gz";
+char *hook6_loop_out = "ipfilter_hook6_loop_out";
+char *hook6_loop_out_gz = "ipfilter_hook6_loop_out_gz";
/* ------------------------------------------------------------------------ */
/* Function: ipldetach */
@@ -117,7 +167,7 @@ int ipldetach(ifs)
ipf_stack_t *ifs;
{
- ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0);
+ ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk));
#if SOLARIS2 < 10
@@ -244,7 +294,7 @@ ipf_stack_t *ifs;
cmn_err(CE_CONT, "iplattach()\n");
#endif
- ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0);
+ ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk));
ifs->ifs_fr_flags = IPF_LOGGING;
#ifdef _KERNEL
ifs->ifs_fr_update_ipid = 0;
@@ -269,16 +319,39 @@ ipf_stack_t *ifs;
if (fr_initialise(ifs) < 0)
return -1;
- HOOK_INIT(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4,
- "ipfilter_hook4_nicevents", ifs);
- HOOK_INIT(ifs->ifs_ipfhook4_in, ipf_hook4_in,
- "ipfilter_hook4_in", ifs);
- HOOK_INIT(ifs->ifs_ipfhook4_out, ipf_hook4_out,
- "ipfilter_hook4_out", ifs);
- HOOK_INIT(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in,
- "ipfilter_hook4_loop_in", ifs);
- HOOK_INIT(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out,
- "ipfilter_hook4_loop_out", ifs);
+ /*
+ * For incoming packets, we want the GZ-controlled hooks to run before
+ * the per-zone hooks, regardless of what order they're are installed.
+ * See the "GZ-controlled and per-zone stacks" comment block at the top
+ * of this file.
+ */
+#define HOOK_INIT_GZ_BEFORE(x, fn, n, gzn, a) \
+ HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs); \
+ (x)->h_hint = ifs->ifs_gz_controlled ? HH_BEFORE : HH_AFTER; \
+ (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn);
+
+ HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4,
+ hook4_nicevents, hook4_nicevents_gz, ifs);
+ HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_in, ipf_hook4_in,
+ hook4_in, hook4_in_gz, ifs);
+ HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in,
+ hook4_loop_in, hook4_loop_in_gz, ifs);
+
+ /*
+ * For outgoing packets, we want the GZ-controlled hooks to run after
+ * the per-zone hooks, regardless of what order they're are installed.
+ * See the "GZ-controlled and per-zone stacks" comment block at the top
+ * of this file.
+ */
+#define HOOK_INIT_GZ_AFTER(x, fn, n, gzn, a) \
+ HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs); \
+ (x)->h_hint = ifs->ifs_gz_controlled ? HH_AFTER : HH_BEFORE; \
+ (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn);
+
+ HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_out, ipf_hook4_out,
+ hook4_out, hook4_out_gz, ifs);
+ HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out,
+ hook4_loop_out, hook4_loop_out_gz, ifs);
/*
* If we hold this lock over all of the net_hook_register calls, we
@@ -323,6 +396,7 @@ ipf_stack_t *ifs;
if (!ifs->ifs_hook4_loopback_out)
goto hookup_failed;
}
+
/*
* Add IPv6 hooks
*/
@@ -330,16 +404,16 @@ ipf_stack_t *ifs;
if (ifs->ifs_ipf_ipv6 == NULL)
goto hookup_failed;
- HOOK_INIT(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6,
- "ipfilter_hook6_nicevents", ifs);
- HOOK_INIT(ifs->ifs_ipfhook6_in, ipf_hook6_in,
- "ipfilter_hook6_in", ifs);
- HOOK_INIT(ifs->ifs_ipfhook6_out, ipf_hook6_out,
- "ipfilter_hook6_out", ifs);
- HOOK_INIT(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in,
- "ipfilter_hook6_loop_in", ifs);
- HOOK_INIT(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out,
- "ipfilter_hook6_loop_out", ifs);
+ HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6,
+ hook6_nicevents, hook6_nicevents_gz, ifs);
+ HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_in, ipf_hook6_in,
+ hook6_in, hook6_in_gz, ifs);
+ HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in,
+ hook6_loop_in, hook6_loop_in_gz, ifs);
+ HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_out, ipf_hook6_out,
+ hook6_out, hook6_out_gz, ifs);
+ HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out,
+ hook6_loop_out, hook6_loop_out_gz, ifs);
ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6,
NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0);
@@ -519,35 +593,47 @@ int *rp;
minor_t unit;
u_int enable;
ipf_stack_t *ifs;
+ zoneid_t zid;
+ ipf_devstate_t *isp;
#ifdef IPFDEBUG
cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
dev, cmd, data, mode, cp, rp);
#endif
unit = getminor(dev);
- if (IPL_LOGMAX < unit)
+
+ isp = ddi_get_soft_state(ipf_state, unit);
+ if (isp == NULL)
return ENXIO;
+ unit = isp->ipfs_minor;
+
+ zid = crgetzoneid(cp);
+ if (cmd == SIOCIPFZONESET) {
+ if (zid == GLOBAL_ZONEID)
+ return fr_setzoneid(isp, (caddr_t) data);
+ return EACCES;
+ }
/*
- * As we're calling ipf_find_stack in user space, from a given zone
- * to find the stack pointer for this zone, there is no need to have
- * a hold/refence count here.
+ * ipf_find_stack returns with a read lock on ifs_ipf_global
*/
- ifs = ipf_find_stack(crgetzoneid(cp));
- ASSERT(ifs != NULL);
+ ifs = ipf_find_stack(zid, isp);
+ if (ifs == NULL)
+ return ENXIO;
if (ifs->ifs_fr_running <= 0) {
if (unit != IPL_LOGIPF) {
+ RWLOCK_EXIT(&ifs->ifs_ipf_global);
return EIO;
}
if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
cmd != SIOCIPFSET && cmd != SIOCFRENB &&
cmd != SIOCGETFS && cmd != SIOCGETFF) {
+ RWLOCK_EXIT(&ifs->ifs_ipf_global);
return EIO;
}
}
- READ_ENTER(&ifs->ifs_ipf_global);
if (ifs->ifs_fr_enable_active != 0) {
RWLOCK_EXIT(&ifs->ifs_ipf_global);
return EBUSY;
@@ -848,7 +934,9 @@ dev_t *devp;
int flags, otype;
cred_t *cred;
{
+ ipf_devstate_t *isp;
minor_t min = getminor(*devp);
+ minor_t minor;
#ifdef IPFDEBUG
cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
@@ -856,8 +944,25 @@ cred_t *cred;
if (!(otype & OTYP_CHR))
return ENXIO;
- min = (IPL_LOGMAX < min) ? ENXIO : 0;
- return min;
+ if (IPL_LOGMAX < min)
+ return ENXIO;
+
+ minor = (minor_t)(uintptr_t)vmem_alloc(ipf_minor, 1,
+ VM_BESTFIT | VM_SLEEP);
+
+ if (ddi_soft_state_zalloc(ipf_state, minor) != 0) {
+ vmem_free(ipf_minor, (void *)(uintptr_t)minor, 1);
+ return ENXIO;
+ }
+
+ *devp = makedevice(getmajor(*devp), minor);
+ isp = ddi_get_soft_state(ipf_state, minor);
+ VERIFY(isp != NULL);
+
+ isp->ipfs_minor = min;
+ isp->ipfs_zoneid = IPFS_ZONE_UNSET;
+
+ return 0;
}
@@ -873,8 +978,13 @@ cred_t *cred;
cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
#endif
- min = (IPL_LOGMAX < min) ? ENXIO : 0;
- return min;
+ if (IPL_LOGMAX < min)
+ return ENXIO;
+
+ ddi_soft_state_free(ipf_state, min);
+ vmem_free(ipf_minor, (void *)(uintptr_t)min, 1);
+
+ return 0;
}
#ifdef IPFILTER_LOG
@@ -892,30 +1002,41 @@ cred_t *cp;
{
ipf_stack_t *ifs;
int ret;
+ minor_t unit;
+ ipf_devstate_t *isp;
+
+ unit = getminor(dev);
+ isp = ddi_get_soft_state(ipf_state, unit);
+ if (isp == NULL)
+ return ENXIO;
+ unit = isp->ipfs_minor;
+
/*
- * As we're calling ipf_find_stack in user space, from a given zone
- * to find the stack pointer for this zone, there is no need to have
- * a hold/refence count here.
+ * ipf_find_stack returns with a read lock on ifs_ipf_global
*/
- ifs = ipf_find_stack(crgetzoneid(cp));
- ASSERT(ifs != NULL);
+ ifs = ipf_find_stack(crgetzoneid(cp), isp);
+ if (ifs == NULL)
+ return ENXIO;
# ifdef IPFDEBUG
cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
# endif
if (ifs->ifs_fr_running < 1) {
+ RWLOCK_EXIT(&ifs->ifs_ipf_global);
return EIO;
}
# ifdef IPFILTER_SYNC
- if (getminor(dev) == IPL_LOGSYNC) {
+ if (unit == IPL_LOGSYNC) {
+ RWLOCK_EXIT(&ifs->ifs_ipf_global);
return ipfsync_read(uio);
}
# endif
- ret = ipflog_read(getminor(dev), uio, ifs);
+ ret = ipflog_read(unit, uio, ifs);
+ RWLOCK_EXIT(&ifs->ifs_ipf_global);
return ret;
}
#endif /* IPFILTER_LOG */
@@ -933,30 +1054,41 @@ register struct uio *uio;
cred_t *cp;
{
ipf_stack_t *ifs;
+ minor_t unit;
+ ipf_devstate_t *isp;
+
+ unit = getminor(dev);
+ isp = ddi_get_soft_state(ipf_state, unit);
+ if (isp == NULL)
+ return ENXIO;
+ unit = isp->ipfs_minor;
/*
- * As we're calling ipf_find_stack in user space, from a given zone
- * to find the stack pointer for this zone, there is no need to have
- * a hold/refence count here.
+ * ipf_find_stack returns with a read lock on ifs_ipf_global
*/
- ifs = ipf_find_stack(crgetzoneid(cp));
- ASSERT(ifs != NULL);
+ ifs = ipf_find_stack(crgetzoneid(cp), isp);
+ if (ifs == NULL)
+ return ENXIO;
#ifdef IPFDEBUG
cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp);
#endif
if (ifs->ifs_fr_running < 1) {
+ RWLOCK_EXIT(&ifs->ifs_ipf_global);
return EIO;
}
#ifdef IPFILTER_SYNC
- if (getminor(dev) == IPL_LOGSYNC)
+ if (getminor(dev) == IPL_LOGSYNC) {
+ RWLOCK_EXIT(&ifs->ifs_ipf_global);
return ipfsync_write(uio);
+ }
#endif /* IPFILTER_SYNC */
dev = dev; /* LINT */
uio = uio; /* LINT */
cp = cp; /* LINT */
+ RWLOCK_EXIT(&ifs->ifs_ipf_global);
return ENXIO;
}
@@ -1998,7 +2130,6 @@ int ipf_hook6(hook_data_t info, int out, int loopback, void *arg)
fw->hpe_mb = qpi.qpi_m;
fw->hpe_hdr = qpi.qpi_data;
return rval;
-
}
diff --git a/usr/src/uts/common/inet/ipf/ip_log.c b/usr/src/uts/common/inet/ipf/ip_log.c
index 1fe54f3a07..d60a666b2d 100644
--- a/usr/src/uts/common/inet/ipf/ip_log.c
+++ b/usr/src/uts/common/inet/ipf/ip_log.c
@@ -7,6 +7,8 @@
*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#include <sys/param.h>
@@ -572,10 +574,18 @@ ipf_stack_t *ifs;
while (ifs->ifs_iplt[unit] == NULL) {
# if SOLARIS && defined(_KERNEL)
+ /*
+ * Prevent a deadlock with ipldetach() - see the "ipfilter
+ * kernel module mutexes and locking" comment block in solaris.c
+ * for details.
+ */
+ RWLOCK_EXIT(&ifs->ifs_ipf_global);
if (!cv_wait_sig(&ifs->ifs_iplwait, &ifs->ifs_ipl_mutex.ipf_lk)) {
+ READ_ENTER(&ifs->ifs_ipf_global);
MUTEX_EXIT(&ifs->ifs_ipl_mutex);
return EINTR;
}
+ READ_ENTER(&ifs->ifs_ipf_global);
# else
# if defined(__hpux) && defined(_KERNEL)
lock_t *l;
diff --git a/usr/src/uts/common/inet/ipf/ip_state.c b/usr/src/uts/common/inet/ipf/ip_state.c
index adf9ff9382..c1fe642d00 100644
--- a/usr/src/uts/common/inet/ipf/ip_state.c
+++ b/usr/src/uts/common/inet/ipf/ip_state.c
@@ -4,6 +4,8 @@
* See the IPFILTER.LICENCE file for details on licencing.
*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#if defined(KERNEL) || defined(_KERNEL)
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_fil.h b/usr/src/uts/common/inet/ipf/netinet/ip_fil.h
index 0a2d6431d8..479a8044f4 100644
--- a/usr/src/uts/common/inet/ipf/netinet/ip_fil.h
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_fil.h
@@ -7,12 +7,15 @@
* $Id: ip_fil.h,v 2.170.2.22 2005/07/16 05:55:35 darrenr Exp $
*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#ifndef __IP_FIL_H__
#define __IP_FIL_H__
#include "netinet/ip_compat.h"
+#include <sys/zone.h>
#ifndef SOLARIS
# define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
@@ -106,6 +109,7 @@
#define SIOCADDFR SIOCADAFR
#define SIOCDELFR SIOCRMAFR
#define SIOCINSFR SIOCINAFR
+# define SIOCIPFZONESET _IOWR('r', 97, struct ipfzoneobj)
/*
* What type of table is getting flushed?
@@ -1165,6 +1169,26 @@ typedef struct ipfobj {
u_char ipfo_xxxpad[32]; /* reserved for future use */
} ipfobj_t;
+/*
+ * ioctl struct for setting what zone further ioctls will act on. ipfz_gz is a
+ * boolean: set it to 1 to operate on the GZ-controlled stack.
+ */
+typedef struct ipfzoneobj {
+ u_32_t ipfz_gz; /* GZ stack boolean */
+ char ipfz_zonename[ZONENAME_MAX]; /* zone to act on */
+} ipfzoneobj_t;
+
+#if defined(_KERNEL)
+/* Set ipfs_zoneid to this if no zone has been set: */
+#define IPFS_ZONE_UNSET -2
+
+typedef struct ipf_devstate {
+ zoneid_t ipfs_zoneid;
+ minor_t ipfs_minor;
+ boolean_t ipfs_gz;
+} ipf_devstate_t;
+#endif
+
#define IPFOBJ_FRENTRY 0 /* struct frentry */
#define IPFOBJ_IPFSTAT 1 /* struct friostat */
#define IPFOBJ_IPFINFO 2 /* struct fr_info */
@@ -1352,7 +1376,6 @@ extern void ipfilterattach __P((int));
extern int ipl_enable __P((void));
extern int ipl_disable __P((void));
# ifdef MENTAT
-extern ipf_stack_t *ipf_find_stack(const zoneid_t zone);
extern int fr_check __P((struct ip *, int, void *, int, void *,
mblk_t **, ipf_stack_t *));
# if SOLARIS
@@ -1365,6 +1388,7 @@ extern int iplioctl __P((dev_t, int, int *, int, cred_t *, int *));
extern int fr_make_rst __P((fr_info_t *));
extern int fr_make_icmp __P((fr_info_t *));
extern void fr_calc_chksum __P((fr_info_t *, mb_t *));
+extern ipf_stack_t *ipf_find_stack(const zoneid_t, ipf_devstate_t *);
# endif
extern int iplopen __P((dev_t *, int, int, cred_t *));
extern int iplclose __P((dev_t, int, int, cred_t *));
@@ -1576,6 +1600,10 @@ extern int ipf_earlydrop __P((int, ipftq_t *, int, ipf_stack_t *));
extern u_32_t ipf_random __P((void));
#endif
+#if defined(_KERNEL)
+extern int fr_setzoneid __P((ipf_devstate_t *, void *));
+#endif
+
extern char ipfilter_version[];
#ifdef USE_INET6
extern int icmptoicmp6types[ICMP_MAXTYPE+1];
diff --git a/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h b/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h
index 75703994a5..a239f1c1ca 100644
--- a/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h
+++ b/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h
@@ -5,6 +5,8 @@
*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
#ifndef __IPF_STACK_H__
@@ -41,8 +43,10 @@
struct ipf_stack {
struct ipf_stack *ifs_next;
struct ipf_stack **ifs_pnext;
+ struct ipf_stack *ifs_gz_cont_ifs;
netid_t ifs_netid;
zoneid_t ifs_zone;
+ boolean_t ifs_gz_controlled;
/* ipf module */
fr_info_t ifs_frcache[2][8];
diff --git a/usr/src/uts/common/inet/ipf/solaris.c b/usr/src/uts/common/inet/ipf/solaris.c
index 1a1d94def5..c541f4dddc 100644
--- a/usr/src/uts/common/inet/ipf/solaris.c
+++ b/usr/src/uts/common/inet/ipf/solaris.c
@@ -5,6 +5,50 @@
*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * ipfilter kernel module mutexes and locking:
+ *
+ * Enabling ipfilter creates a per-netstack ipf_stack_t object that is
+ * stored in the ipf_stacks list, which is protected by ipf_stack_lock.
+ * ipf_stack_t objects are accessed in three contexts:
+ *
+ * 1) administering that filter (eg: ioctls handled with iplioctl())
+ * 2) reading log data (eg: iplread() / iplwrite())
+ * 3) filtering packets (eg: ipf_hook4_* and ipf_hook6_* pfhooks
+ * functions)
+ *
+ * Each ipf_stack_t has a RW lock, ifs_ipf_global, protecting access to the
+ * whole structure. The structure also has locks protecting the various
+ * data structures used for filtering. The following guidelines should be
+ * followed for ipf_stack_t locks:
+ *
+ * - ipf_stack_lock must be held when accessing the ipf_stacks list
+ * - ipf_stack_lock should be held before acquiring ifs_ipf_global for
+ * a stack (the exception to this is ipf_stack_destroy(), which removes
+ * the ipf_stack_t from the list, then drops ipf_stack_lock before
+ * acquiring ifs_ipf_global)
+ * - ifs_ipf_global must be held when accessing an ipf_stack_t in that list:
+ * - The write lock is held only during stack creation / destruction
+ * - The read lock should be held for all other accesses
+ * - To alter the filtering data in the administrative context, one must:
+ * - acquire the read lock for ifs_ipf_global
+ * - then acquire the write lock for the data in question
+ * - In the filtering path, the read lock needs to be held for each type of
+ * filtering data used
+ * - ifs_ipf_global does not need to be held in the filtering path:
+ * - The filtering hooks don't need to modify the stack itself
+ * - The ipf_stack_t will not be destroyed until the hooks are unregistered.
+ * This requires a write lock on the hook, ensuring that no active hooks
+ * (eg: the filtering path) are running, and that the hooks won't be run
+ * afterward.
+ *
+ * Note that there is a deadlock possible when calling net_hook_register()
+ * or net_hook_unregister() with ifs_ipf_global held: see the comments in
+ * iplattach() and ipldetach() for details.
*/
#include <sys/systm.h>
@@ -73,7 +117,8 @@ static int ipf_property_g_update __P((dev_info_t *));
static char *ipf_devfiles[] = { IPL_NAME, IPNAT_NAME, IPSTATE_NAME,
IPAUTH_NAME, IPSYNC_NAME, IPSCAN_NAME,
IPLOOKUP_NAME, NULL };
-
+extern void *ipf_state; /* DDI state */
+extern vmem_t *ipf_minor; /* minor number arena */
static struct cb_ops ipf_cb_ops = {
iplopen,
@@ -221,10 +266,11 @@ static const filter_kstats_t ipf_kstat_tmp = {
static int ipf_kstat_update(kstat_t *ksp, int rwflag);
static void
-ipf_kstat_init(ipf_stack_t *ifs)
+ipf_kstat_init(ipf_stack_t *ifs, boolean_t from_gz)
{
- ifs->ifs_kstatp[0] = net_kstat_create(ifs->ifs_netid, "ipf", 0,
- "inbound", "net", KSTAT_TYPE_NAMED,
+ ifs->ifs_kstatp[0] = net_kstat_create(ifs->ifs_netid,
+ (from_gz ? "ipf_gz" : "ipf"),
+ 0, "inbound", "net", KSTAT_TYPE_NAMED,
sizeof (filter_kstats_t) / sizeof (kstat_named_t), 0);
if (ifs->ifs_kstatp[0] != NULL) {
bcopy(&ipf_kstat_tmp, ifs->ifs_kstatp[0]->ks_data,
@@ -234,8 +280,9 @@ ipf_kstat_init(ipf_stack_t *ifs)
kstat_install(ifs->ifs_kstatp[0]);
}
- ifs->ifs_kstatp[1] = net_kstat_create(ifs->ifs_netid, "ipf", 0,
- "outbound", "net", KSTAT_TYPE_NAMED,
+ ifs->ifs_kstatp[1] = net_kstat_create(ifs->ifs_netid,
+ (from_gz ? "ipf_gz" : "ipf"),
+ 0, "outbound", "net", KSTAT_TYPE_NAMED,
sizeof (filter_kstats_t) / sizeof (kstat_named_t), 0);
if (ifs->ifs_kstatp[1] != NULL) {
bcopy(&ipf_kstat_tmp, ifs->ifs_kstatp[1]->ks_data,
@@ -369,12 +416,14 @@ dev_info_t *dip;
* Initialize things for IPF for each stack instance
*/
static void *
-ipf_stack_create(const netid_t id)
+ipf_stack_create_one(const netid_t id, const zoneid_t zid, boolean_t from_gz,
+ ipf_stack_t *ifs_gz)
{
ipf_stack_t *ifs;
#ifdef IPFDEBUG
- cmn_err(CE_NOTE, "IP Filter:stack_create id=%d", id);
+ cmn_err(CE_NOTE, "IP Filter:stack_create_one id=%d global=%d", id,
+ global);
#endif
ifs = (ipf_stack_t *)kmem_alloc(sizeof (*ifs), KM_SLEEP);
@@ -398,8 +447,11 @@ ipf_stack_create(const netid_t id)
RWLOCK_INIT(&ifs->ifs_ipf_mutex, "ipf filter rwlock");
RWLOCK_INIT(&ifs->ifs_ipf_frcache, "ipf cache rwlock");
ifs->ifs_netid = id;
- ifs->ifs_zone = net_getzoneidbynetid(id);
- ipf_kstat_init(ifs);
+ ifs->ifs_zone = zid;
+ ifs->ifs_gz_controlled = from_gz;
+ ifs->ifs_gz_cont_ifs = ifs_gz;
+
+ ipf_kstat_init(ifs, from_gz);
#ifdef IPFDEBUG
cmn_err(CE_CONT, "IP Filter:stack_create zone=%d", ifs->ifs_zone);
@@ -427,31 +479,84 @@ ipf_stack_create(const netid_t id)
return (ifs);
}
+static void *
+ipf_stack_create(const netid_t id)
+{
+ ipf_stack_t *ifs = NULL;
+ zoneid_t zid = net_getzoneidbynetid(id);
+
+ /*
+ * Create two ipfilter stacks for a zone - the first can only be
+ * controlled from the global zone, and the second is owned by
+ * the zone itself. There is no need to create a GZ-controlled
+ * stack for the global zone, since we're already in the global
+ * zone. See the "GZ-controlled and per-zone stacks" comment block in
+ * ip_fil_solaris.c for details.
+ */
+ if (zid != GLOBAL_ZONEID)
+ ifs = ipf_stack_create_one(id, zid, B_TRUE, NULL);
+
+ return (ipf_stack_create_one(id, zid, B_FALSE, ifs));
+}
/*
- * This function should only ever be used to find the pointer to the
- * ipfilter stack structure for the zone that is currently being
- * executed... so if you're running in the context of zone 1, you
- * should not attempt to find the ipf_stack_t for zone 0 or 2 or
- * anything else but 1. In that way, the returned pointer is safe
- * as it will only be nuked when the instance is destroyed as part
- * of the final shutdown of a zone.
+ * Find an ipfilter stack for the given zone. Return the GZ-controlled or
+ * per-zone stack if set by an earlier SIOCIPFZONESET ioctl call. See the
+ * "GZ-controlled and per-zone stacks" comment block in ip_fil_solaris.c for
+ * details.
+ *
+ * This function returns with the ipf_stack_t's ifs_ipf_global
+ * read lock held (if the stack is found). See the "ipfilter kernel module
+ * mutexes and locking" comment block at the top of this file.
*/
ipf_stack_t *
-ipf_find_stack(const zoneid_t zone)
+ipf_find_stack(const zoneid_t orig_zone, ipf_devstate_t *isp)
{
ipf_stack_t *ifs;
+ boolean_t gz_stack;
+ zoneid_t zone;
+
+ /*
+ * If we're in the GZ, determine if we're acting on a zone's stack,
+ * and whether or not that stack is the GZ-controlled or in-zone
+ * one. See the "GZ and per-zone stacks" note at the top of this
+ * file.
+ */
+ if (orig_zone == GLOBAL_ZONEID &&
+ (isp->ipfs_zoneid != IPFS_ZONE_UNSET)) {
+ /* Global zone, and we've set the zoneid for this fd already */
+
+ if (orig_zone == isp->ipfs_zoneid) {
+ /* There's only a per-zone stack for the GZ */
+ gz_stack = B_FALSE;
+ } else {
+ gz_stack = isp->ipfs_gz;
+ }
+
+ zone = isp->ipfs_zoneid;
+ } else {
+ /*
+ * Non-global zone or GZ without having set a zoneid: act on
+ * the per-zone stack of the zone that this ioctl originated
+ * from.
+ */
+ gz_stack = B_FALSE;
+ zone = orig_zone;
+ }
mutex_enter(&ipf_stack_lock);
for (ifs = ipf_stacks; ifs != NULL; ifs = ifs->ifs_next) {
- if (ifs->ifs_zone == zone)
+ if (ifs->ifs_zone == zone && ifs->ifs_gz_controlled == gz_stack)
break;
}
+
+ if (ifs != NULL) {
+ READ_ENTER(&ifs->ifs_ipf_global);
+ }
mutex_exit(&ipf_stack_lock);
return (ifs);
}
-
static int ipf_detach_check_zone(ipf_stack_t *ifs)
{
/*
@@ -495,7 +600,8 @@ static int ipf_detach_check_all()
/*
- * Destroy things for ipf for one stack.
+ * Remove ipf kstats for both the per-zone ipf stack and the
+ * GZ-controlled stack for the same zone, if it exists.
*/
/* ARGSUSED */
static void
@@ -503,6 +609,15 @@ ipf_stack_shutdown(const netid_t id, void *arg)
{
ipf_stack_t *ifs = (ipf_stack_t *)arg;
+ /*
+ * The GZ-controlled stack
+ */
+ if (ifs->ifs_gz_cont_ifs != NULL)
+ ipf_kstat_fini(ifs->ifs_gz_cont_ifs);
+
+ /*
+ * The per-zone stack
+ */
ipf_kstat_fini(ifs);
}
@@ -512,13 +627,12 @@ ipf_stack_shutdown(const netid_t id, void *arg)
*/
/* ARGSUSED */
static void
-ipf_stack_destroy(const netid_t id, void *arg)
+ipf_stack_destroy_one(const netid_t id, ipf_stack_t *ifs)
{
- ipf_stack_t *ifs = (ipf_stack_t *)arg;
timeout_id_t tid;
#ifdef IPFDEBUG
- (void) printf("ipf_stack_destroy(%p)\n", (void *)ifs);
+ (void) printf("ipf_stack_destroy_one(%p)\n", (void *)ifs);
#endif
/*
@@ -546,7 +660,7 @@ ipf_stack_destroy(const netid_t id, void *arg)
WRITE_ENTER(&ifs->ifs_ipf_global);
if (ipldetach(ifs) != 0) {
- printf("ipf_stack_destroy: ipldetach failed\n");
+ printf("ipf_stack_destroy_one: ipldetach failed\n");
}
ipftuneable_free(ifs);
@@ -560,6 +674,30 @@ ipf_stack_destroy(const netid_t id, void *arg)
}
+/*
+ * Destroy things for ipf for both the per-zone ipf stack and the
+ * GZ-controlled stack for the same zone, if it exists. See the "GZ-controlled
+ * and per-zone stacks" comment block in ip_fil_solaris.c for details.
+ */
+/* ARGSUSED */
+static void
+ipf_stack_destroy(const netid_t id, void *arg)
+{
+ ipf_stack_t *ifs = (ipf_stack_t *)arg;
+
+ /*
+ * The GZ-controlled stack
+ */
+ if (ifs->ifs_gz_cont_ifs != NULL)
+ ipf_stack_destroy_one(id, ifs->ifs_gz_cont_ifs);
+
+ /*
+ * The per-zone stack
+ */
+ ipf_stack_destroy_one(id, ifs);
+}
+
+
static int ipf_attach(dip, cmd)
dev_info_t *dip;
ddi_attach_cmd_t cmd;
@@ -586,27 +724,39 @@ ddi_attach_cmd_t cmd;
(void) ipf_property_g_update(dip);
+ if (ddi_soft_state_init(&ipf_state, sizeof (ipf_devstate_t), 1)
+ != 0) {
+ ddi_prop_remove_all(dip);
+ return (DDI_FAILURE);
+ }
+
for (i = 0; ((s = ipf_devfiles[i]) != NULL); i++) {
s = strrchr(s, '/');
if (s == NULL)
continue;
s++;
if (ddi_create_minor_node(dip, s, S_IFCHR, i,
- DDI_PSEUDO, 0) ==
- DDI_FAILURE) {
- ddi_remove_minor_node(dip, NULL);
+ DDI_PSEUDO, 0) == DDI_FAILURE)
goto attach_failed;
- }
}
ipf_dev_info = dip;
ipfncb = net_instance_alloc(NETINFO_VERSION);
+ if (ipfncb == NULL)
+ goto attach_failed;
+
ipfncb->nin_name = "ipf";
ipfncb->nin_create = ipf_stack_create;
ipfncb->nin_destroy = ipf_stack_destroy;
ipfncb->nin_shutdown = ipf_stack_shutdown;
- i = net_instance_register(ipfncb);
+ if (net_instance_register(ipfncb) == DDI_FAILURE) {
+ net_instance_free(ipfncb);
+ goto attach_failed;
+ }
+
+ ipf_minor = vmem_create("ipf_minor", (void *)1, UINT32_MAX - 1,
+ 1, NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
#ifdef IPFDEBUG
cmn_err(CE_CONT, "IP Filter:stack_create callback_reg=%d", i);
@@ -619,7 +769,9 @@ ddi_attach_cmd_t cmd;
}
attach_failed:
+ ddi_remove_minor_node(dip, NULL);
ddi_prop_remove_all(dip);
+ ddi_soft_state_fini(&ipf_state);
return (DDI_FAILURE);
}
@@ -652,6 +804,9 @@ ddi_detach_cmd_t cmd;
return (DDI_FAILURE);
}
+ vmem_destroy(ipf_minor);
+ ddi_soft_state_fini(&ipf_state);
+
(void) net_instance_unregister(ipfncb);
net_instance_free(ipfncb);