summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--manifest1
-rw-r--r--usr/src/cmd/devfsadm/misc_link.c4
-rw-r--r--usr/src/cmd/ipf/lib/common/printfr.c18
-rw-r--r--usr/src/cmd/ipf/tools/Makefile.tools6
-rw-r--r--usr/src/cmd/ipf/tools/ipf_y.y17
-rw-r--r--usr/src/cmd/ipf/tools/ipmon_y.y5
-rw-r--r--usr/src/cmd/ipf/tools/ipnat_y.y4
-rw-r--r--usr/src/cmd/ipf/tools/ippool_y.y4
-rw-r--r--usr/src/cmd/ipf/tools/lexer.c37
-rw-r--r--usr/src/cmd/ipf/tools/lexer.h5
-rw-r--r--usr/src/uts/common/Makefile.files4
-rw-r--r--usr/src/uts/common/inet/ipf/cfw.c659
-rw-r--r--usr/src/uts/common/inet/ipf/fil.c5
-rw-r--r--usr/src/uts/common/inet/ipf/ip_fil_solaris.c45
-rw-r--r--usr/src/uts/common/inet/ipf/ip_log.c3
-rw-r--r--usr/src/uts/common/inet/ipf/ip_state.c19
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/Makefile7
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_fil.h46
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_state.h4
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ipf_cfw.h69
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ipf_stack.h4
-rw-r--r--usr/src/uts/common/inet/ipf/solaris.c9
-rw-r--r--usr/src/uts/intel/ipf/ipf.global-objs.debug6415
-rw-r--r--usr/src/uts/sparc/ipf/ipf.global-objs.debug6415
24 files changed, 978 insertions, 27 deletions
diff --git a/manifest b/manifest
index 10864d500a..73246a034b 100644
--- a/manifest
+++ b/manifest
@@ -80,6 +80,7 @@ s dev/ip6=../devices/pseudo/ip6@0:ip6
s dev/ip=../devices/pseudo/ip@0:ip
s dev/ipauth=../devices/pseudo/ipf@0:ipauth
s dev/ipf=../devices/pseudo/ipf@0:ipf
+s dev/ipfev=../devices/pseudo/ipf@0:ipfev
s dev/ipl=../devices/pseudo/ipf@0:ipf
s dev/iplookup=../devices/pseudo/ipf@0:iplookup
s dev/ipmpstub=../devices/pseudo/dlpistub@0:ipmpstub
diff --git a/usr/src/cmd/devfsadm/misc_link.c b/usr/src/cmd/devfsadm/misc_link.c
index 55aff1e4f7..49be9e9b2d 100644
--- a/usr/src/cmd/devfsadm/misc_link.c
+++ b/usr/src/cmd/devfsadm/misc_link.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2015, Joyent, Inc. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
*/
#include <regex.h>
@@ -124,7 +124,7 @@ static devfsadm_create_t misc_cbt[] = {
},
{ "pseudo", "ddi_pseudo",
"(^ipf$)|(^ipnat$)|(^ipstate$)|(^ipauth$)|"
- "(^ipsync$)|(^ipscan$)|(^iplookup$)",
+ "(^ipsync$)|(^ipscan$)|(^iplookup$)|(^ipfev$)",
TYPE_EXACT | DRV_RE, ILEVEL_0, minor_name,
},
{ "pseudo", "ddi_pseudo", "dld",
diff --git a/usr/src/cmd/ipf/lib/common/printfr.c b/usr/src/cmd/ipf/lib/common/printfr.c
index 063eb87c07..b096c46e0e 100644
--- a/usr/src/cmd/ipf/lib/common/printfr.c
+++ b/usr/src/cmd/ipf/lib/common/printfr.c
@@ -7,9 +7,10 @@
*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2019 Joyent, Inc.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
+#include <uuid/uuid.h>
#include "ipf.h"
@@ -386,7 +387,8 @@ ioctlfunc_t iocfunc;
printf(" head %s", fp->fr_grhead);
if (*fp->fr_group != '\0')
printf(" group %s", fp->fr_group);
- if (fp->fr_logtag != FR_NOLOGTAG || *fp->fr_nattag.ipt_tag) {
+ if (fp->fr_logtag != FR_NOLOGTAG || *fp->fr_nattag.ipt_tag ||
+ (fp->fr_flags & FR_CFWLOG) || !uuid_is_null(fp->fr_uuid)) {
char *s = "";
printf(" set-tag(");
@@ -397,6 +399,18 @@ ioctlfunc_t iocfunc;
if (*fp->fr_nattag.ipt_tag) {
printf("%snat=%-.*s", s, IPFTAG_LEN,
fp->fr_nattag.ipt_tag);
+ s = ", ";
+ }
+ if (fp->fr_flags & FR_CFWLOG) {
+ printf("cfwlog");
+ s = ", ";
+ }
+
+ if (!uuid_is_null(fp->fr_uuid)) {
+ char uuid[UUID_PRINTABLE_STRING_LENGTH];
+
+ uuid_unparse(fp->fr_uuid, uuid);
+ printf("%suuid=%s", s, uuid);
}
printf(")");
}
diff --git a/usr/src/cmd/ipf/tools/Makefile.tools b/usr/src/cmd/ipf/tools/Makefile.tools
index ce0db79970..c57f0ace87 100644
--- a/usr/src/cmd/ipf/tools/Makefile.tools
+++ b/usr/src/cmd/ipf/tools/Makefile.tools
@@ -23,7 +23,7 @@
# Use is subject to license terms.
#
# Copyright 2013 Nexenta Systems, Inc. All rights reserved.
-# Copyright (c) 2012, Joyent Inc. All rights reserved.
+# Copyright 2019 Joyent, Inc.
#
PROG= ipf ipfs ipmon ipnat ippool ipfstat
@@ -35,7 +35,7 @@ IPFSTAT_OBJS= ipfstat.o ipfzone.o
IPMON_OBJS= ipmon.o ipfzone.o ipmon_y.o ipmon_l.o
IPNAT_OBJS= ipnat.o ipfzone.o ipnat_y.o ipnat_l.o
IPPOOL_OBJS= ippool.o ipfzone.o ippool_y.o ippool_l.o
-IPFTEST_OBJS= ipftest.o ipfzone.o \
+IPFTEST_OBJS= cfw.o ipftest.o ipfzone.o \
ip_fil.o ip_state.o ip_compat.o \
ip_frag.o ip_nat.o ip_nat6.o fil.o \
ip_htable.o ip_lookup.o \
@@ -56,7 +56,7 @@ SRCS= $(OBJSL:%.o=../%.c)
include ../../../Makefile.cmd
include ../../Makefile.ipf
-LDLIBS += $(LIBBPF)
+LDLIBS += $(LIBBPF) -luuid
LDFLAGS += $(MAPFILE.NGB:%=-M%)
CPPFLAGS += -I. -DIPFILTER_LOOKUP -DIPFILTER_LOG
diff --git a/usr/src/cmd/ipf/tools/ipf_y.y b/usr/src/cmd/ipf/tools/ipf_y.y
index 7689d676c7..c8909b4e92 100644
--- a/usr/src/cmd/ipf/tools/ipf_y.y
+++ b/usr/src/cmd/ipf/tools/ipf_y.y
@@ -6,6 +6,7 @@
*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2019 Joyent, Inc.
*/
#include "ipf.h"
@@ -16,6 +17,7 @@
# define _NET_BPF_H_
# include <pcap.h>
#endif
+#include <uuid/uuid.h>
#include "netinet/ip_pool.h"
#include "netinet/ip_htable.h"
#include "netinet/ipl.h"
@@ -98,6 +100,7 @@ static int set_ipv6_addr = 0;
union i6addr m;
} ipp;
union i6addr ip6;
+ uuid_t uuid;
};
%type <port> portnum
@@ -117,6 +120,7 @@ static int set_ipv6_addr = 0;
%token YY_CMP_EQ YY_CMP_NE YY_CMP_LE YY_CMP_GE YY_CMP_LT YY_CMP_GT
%token YY_RANGE_OUT YY_RANGE_IN
%token <ip6> YY_IPV6
+%token <uuid> YY_UUID
%token IPFY_PASS IPFY_BLOCK IPFY_COUNT IPFY_CALL
%token IPFY_RETICMP IPFY_RETRST IPFY_RETICMPASDST
@@ -127,6 +131,7 @@ static int set_ipv6_addr = 0;
%token IPFY_HEAD IPFY_GROUP
%token IPFY_AUTH IPFY_PREAUTH
%token IPFY_LOG IPFY_BODY IPFY_FIRST IPFY_LEVEL IPFY_ORBLOCK
+%token IPFY_UUID IPFY_CFWLOG
%token IPFY_LOGTAG IPFY_MATCHTAG IPFY_SETTAG IPFY_SKIP
%token IPFY_FROM IPFY_ALL IPFY_ANY IPFY_BPFV4 IPFY_BPFV6 IPFY_POOL IPFY_HASH
%token IPFY_PPS
@@ -518,6 +523,8 @@ taginlist:
taginspec:
logtag
|nattag
+ |uuidtag
+ |cfwtag
;
nattag: IPFY_NAT '=' YY_STR { DOALL(strncpy(fr->fr_nattag.ipt_tag,\
@@ -530,6 +537,12 @@ nattag: IPFY_NAT '=' YY_STR { DOALL(strncpy(fr->fr_nattag.ipt_tag,\
logtag: IPFY_LOG '=' YY_NUMBER { DOALL(fr->fr_logtag = $3;) }
;
+cfwtag: IPFY_CFWLOG { DOALL(fr->fr_flags |= FR_CFWLOG;) }
+ ;
+
+uuidtag: IPFY_UUID '=' YY_UUID { DOALL(uuid_copy(fr->fr_uuid, $3);) }
+ ;
+
settagout:
| IPFY_SETTAG '(' tagoutlist ')'
;
@@ -542,6 +555,8 @@ tagoutlist:
tagoutspec:
logtag
| nattag
+ | uuidtag
+ | cfwtag
;
matchtagin:
@@ -1566,6 +1581,7 @@ static struct wordtab ipfwords[96] = {
{ "bpf-v6", IPFY_BPFV6 },
#endif
{ "call", IPFY_CALL },
+ { "cfwlog", IPFY_CFWLOG },
{ "code", IPFY_ICMPCODE },
{ "count", IPFY_COUNT },
{ "dup-to", IPFY_DUPTO },
@@ -1641,6 +1657,7 @@ static struct wordtab ipfwords[96] = {
{ "to", IPFY_TO },
{ "ttl", IPFY_TTL },
{ "udp", IPFY_UDP },
+ { "uuid", IPFY_UUID },
{ "v6hdrs", IPF6_V6HDRS },
{ "with", IPFY_WITH },
{ NULL, 0 }
diff --git a/usr/src/cmd/ipf/tools/ipmon_y.y b/usr/src/cmd/ipf/tools/ipmon_y.y
index e6dda81c35..b4fce4015d 100644
--- a/usr/src/cmd/ipf/tools/ipmon_y.y
+++ b/usr/src/cmd/ipf/tools/ipmon_y.y
@@ -1,11 +1,14 @@
/*
* Copyright (C) 1993-2005 by Darren Reed.
* See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * Copyright 2019 Joyent, Inc.
*/
%{
#include "ipf.h"
#include <syslog.h>
+#include <uuid/uuid.h>
#undef OPT_NAT
#undef OPT_VERBOSE
#include "ipmon_l.h"
@@ -42,11 +45,13 @@ static ipmon_action_t *alist = NULL;
struct in_addr addr;
struct opt *opt;
union i6addr ip6;
+ uuid_t uuid;
}
%token <num> YY_NUMBER YY_HEX
%token <str> YY_STR
%token <ip6> YY_IPV6
+%token <uuid> YY_UUID
%token YY_COMMENT
%token YY_CMP_EQ YY_CMP_NE YY_CMP_LE YY_CMP_GE YY_CMP_LT YY_CMP_GT
%token YY_RANGE_OUT YY_RANGE_IN
diff --git a/usr/src/cmd/ipf/tools/ipnat_y.y b/usr/src/cmd/ipf/tools/ipnat_y.y
index d929bf413a..2c913afea2 100644
--- a/usr/src/cmd/ipf/tools/ipnat_y.y
+++ b/usr/src/cmd/ipf/tools/ipnat_y.y
@@ -6,6 +6,7 @@
*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2019 Joyent, Inc.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
@@ -39,6 +40,7 @@
#include <sys/time.h>
#include <syslog.h>
#include <net/if.h>
+#include <uuid/uuid.h>
#if __FreeBSD_version >= 300000
# include <net/if_var.h>
#endif
@@ -89,6 +91,7 @@ static void setnatproto __P((int));
int v;
} ipp;
union i6addr ip6;
+ uuid_t uuid;
};
%token <num> YY_NUMBER YY_HEX
@@ -97,6 +100,7 @@ static void setnatproto __P((int));
%token YY_CMP_EQ YY_CMP_NE YY_CMP_LE YY_CMP_GE YY_CMP_LT YY_CMP_GT
%token YY_RANGE_OUT YY_RANGE_IN
%token <ip6> YY_IPV6
+%token <uuid> YY_UUID
%token IPNY_MAPBLOCK IPNY_RDR IPNY_PORT IPNY_PORTS IPNY_AUTO IPNY_RANGE
%token IPNY_MAP IPNY_BIMAP IPNY_FROM IPNY_TO IPNY_MASK IPNY_PORTMAP IPNY_ANY
diff --git a/usr/src/cmd/ipf/tools/ippool_y.y b/usr/src/cmd/ipf/tools/ippool_y.y
index cca5052bd4..5aadd22206 100644
--- a/usr/src/cmd/ipf/tools/ippool_y.y
+++ b/usr/src/cmd/ipf/tools/ippool_y.y
@@ -6,6 +6,7 @@
*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2019 Joyent, Inc.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
@@ -34,6 +35,7 @@
#include <netdb.h>
#include <ctype.h>
#include <unistd.h>
+#include <uuid/uuid.h>
#include "ipf.h"
#include "netinet/ip_lookup.h"
@@ -66,6 +68,7 @@ static int set_ipv6_addr = 0;
iphtent_t *ipe;
ip_pool_node_t *ipp;
union i6addr ip6;
+ uuid_t uuid;
}
%token <num> YY_NUMBER YY_HEX
@@ -74,6 +77,7 @@ static int set_ipv6_addr = 0;
%token YY_CMP_EQ YY_CMP_NE YY_CMP_LE YY_CMP_GE YY_CMP_LT YY_CMP_GT
%token YY_RANGE_OUT YY_RANGE_IN
%token <ip6> YY_IPV6
+%token <uuid> YY_UUID
%token IPT_IPF IPT_NAT IPT_COUNT IPT_AUTH IPT_IN IPT_OUT
%token IPT_TABLE IPT_GROUPMAP IPT_HASH
diff --git a/usr/src/cmd/ipf/tools/lexer.c b/usr/src/cmd/ipf/tools/lexer.c
index 3db3a0888b..b4ee8b3f77 100644
--- a/usr/src/cmd/ipf/tools/lexer.c
+++ b/usr/src/cmd/ipf/tools/lexer.c
@@ -5,6 +5,7 @@
*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2019 Joyent, Inc.
*/
#include <ctype.h>
@@ -14,6 +15,7 @@
#endif
#include <sys/ioctl.h>
#include <syslog.h>
+#include <uuid/uuid.h>
#ifdef TEST_LEXER
# define NO_YACC
union {
@@ -21,6 +23,7 @@ union {
char *str;
struct in_addr ipa;
i6addr_t ip6;
+ uuid_t uuid;
} yylval;
#endif
#include "lexer.h"
@@ -455,6 +458,40 @@ nextchar:
}
#endif
+ /*
+ * UUID: e.g., "2426e38c-9f63-c0b8-cfd5-9aaeaf992d42" or its uppercase
+ * variant.
+ */
+ if (isbuilding == 0 && (ishex(c) || c == '-')) {
+ char uuidbuf[UUID_PRINTABLE_STRING_LENGTH], *s, oc;
+ int start;
+
+ start = yypos;
+ s = uuidbuf;
+ oc = c;
+
+ /*
+ * Don't worry about exact position of hexdigits and hyphens
+ * because uuid_parse() will provide the sanity check.
+ */
+ do {
+ *s++ = c;
+ c = yygetc(1);
+ } while ((ishex(c) || c == '-') &&
+ (s - uuidbuf < sizeof (uuidbuf)));
+ yyunputc(c);
+ *s = '\0';
+
+ if (uuid_parse(uuidbuf, yylval.uuid) == 0) {
+ rval = YY_UUID;
+ yyexpectaddr = 0;
+ goto done;
+ }
+ yypos = start;
+ c = oc;
+ }
+
+
if (c == ':') {
if (isbuilding == 1) {
yyunputc(c);
diff --git a/usr/src/cmd/ipf/tools/lexer.h b/usr/src/cmd/ipf/tools/lexer.h
index a296cb0bc3..448b3e6ffd 100644
--- a/usr/src/cmd/ipf/tools/lexer.h
+++ b/usr/src/cmd/ipf/tools/lexer.h
@@ -1,4 +1,6 @@
-
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
typedef struct wordtab {
char *w_word;
int w_value;
@@ -16,6 +18,7 @@ typedef struct wordtab {
#define YY_IPV6 1008
#define YY_STR 1009
#define YY_IPADDR 1010
+#define YY_UUID 1011
#endif
#define YYBUFSIZ 8192
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index 32e82de896..43919b6e3a 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -1804,8 +1804,8 @@ USB_IA_OBJS += usb_ia.o
SCSA2USB_OBJS += scsa2usb.o usb_ms_bulkonly.o usb_ms_cbi.o
-IPF_OBJS += ip_fil_solaris.o fil.o solaris.o ip_state.o ip_frag.o ip_nat.o \
- ip_proxy.o ip_auth.o ip_pool.o ip_htable.o ip_lookup.o \
+IPF_OBJS += cfw.o ip_fil_solaris.o fil.o solaris.o ip_state.o ip_frag.o \
+ ip_nat.o ip_proxy.o ip_auth.o ip_pool.o ip_htable.o ip_lookup.o \
ip_log.o misc.o ip_compat.o ip_nat6.o drand48.o
IPD_OBJS += ipd.o
diff --git a/usr/src/uts/common/inet/ipf/cfw.c b/usr/src/uts/common/inet/ipf/cfw.c
new file mode 100644
index 0000000000..941aeac328
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/cfw.c
@@ -0,0 +1,659 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019, Joyent, Inc.
+ */
+
+/* IPF oddness for compilation in userland for IPF tests. */
+#if defined(KERNEL) || defined(_KERNEL)
+#undef KERNEL
+#undef _KERNEL
+#define KERNEL 1
+#define _KERNEL 1
+#endif
+
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <net/if.h>
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include "netinet/ip_compat.h"
+#ifdef USE_INET6
+#include <netinet/icmp6.h>
+#endif
+#include <netinet/tcpip.h>
+#include "netinet/ip_fil.h"
+#include "netinet/ip_nat.h"
+#include "netinet/ip_frag.h"
+#include "netinet/ip_state.h"
+#include "netinet/ip_proxy.h"
+#include "netinet/ip_auth.h"
+#include "netinet/ipf_stack.h"
+#ifdef IPFILTER_SCAN
+#include "netinet/ip_scan.h"
+#endif
+#ifdef IPFILTER_SYNC
+#include "netinet/ip_sync.h"
+#endif
+#include "netinet/ip_pool.h"
+#include "netinet/ip_htable.h"
+#ifdef IPFILTER_COMPILED
+#include "netinet/ip_rules.h"
+#endif
+#if defined(_KERNEL)
+#include <sys/sunddi.h>
+#endif
+
+#include "netinet/ipf_cfw.h"
+#include <sys/file.h>
+#include <sys/uio.h>
+#include <sys/cred.h>
+#include <sys/ddi.h>
+
+/*
+ * cfw == Cloud Firewall ==> routines for a global-zone data collector about
+ * ipf events for SmartOS. The only ones that CFW cares about are ones
+ * enforced by global-zone-controlled rulesets.
+ *
+ * The variable below is tied into the GZ-only ipf device /dev/ipfev, that
+ * flips this on when there is an open instance. This feature will also
+ * consume an fr_flag to have per-rule granularity.
+ */
+boolean_t ipf_cfwlog_enabled;
+
+/*
+ * Because ipf's test tools in $SRC/cmd insert all of these files, we need to
+ * stub out what we can vs. drag in even more headers and who knows what else.
+ */
+#ifdef _KERNEL
+
+/*
+ * CFW event ring buffer. Remember, this is for ALL ZONES because only a
+ * global-zone event-reader will be consuming these. In other words, it's
+ * not something to instantiate per-netstack.
+ *
+ * We may want to get more sophisticated and performant (e.g. per-processor),
+ * but for now keep the ring buffer simple and stupid.
+ * Must be a power of 2, to be bitmaskable, and must be countable by a uint_t
+ *
+ * Resizeable, see ipf_cfw_ring_resize() below.
+ */
+#define IPF_CFW_DEFAULT_RING_BUFS 1024
+#define IPF_CFW_MIN_RING_BUFS 8
+#define IPF_CFW_MAX_RING_BUFS (1U << 31U)
+
+/* Assume C's init-to-zero is sufficient for these types... */
+static kmutex_t cfw_ringlock;
+static kcondvar_t cfw_ringcv;
+
+static cfwev_t *cfw_ring; /* NULL by default. */
+static uint32_t cfw_ringsize; /* 0 by default, number of array elements. */
+static uint32_t cfw_ringmask; /* 0 by default. */
+
+/* If these are equal, we're either empty or full. */
+static uint_t cfw_ringstart, cfw_ringend;
+static boolean_t cfw_ringfull; /* Tell the difference here! */
+/* Bean-counters. */
+static uint64_t cfw_evreports;
+static uint64_t cfw_evdrops;
+
+/*
+ * Place an event in the CFW event ring buffer.
+ *
+ * For now, be simple and drop the oldest event if we overflow. We may wish to
+ * selectively drop older events based on type in the future.
+ */
+static void
+ipf_cfwev_report(cfwev_t *event)
+{
+ mutex_enter(&cfw_ringlock);
+ cfw_ring[cfw_ringend] = *event;
+ cfw_ringend++;
+ cfw_ringend &= cfw_ringmask;
+ if (cfw_ringfull) {
+ cfw_ringstart++;
+ cfw_ringstart &= cfw_ringmask;
+ ASSERT3U(cfw_ringstart, ==, cfw_ringend);
+ DTRACE_PROBE(ipf__cfw__evdrop);
+ cfw_evdrops++;
+ } else {
+ cfw_ringfull = (cfw_ringend == cfw_ringstart);
+ }
+ cfw_evreports++;
+ cv_broadcast(&cfw_ringcv);
+ mutex_exit(&cfw_ringlock);
+}
+
+/*
+ * Provide access to multiple CFW events that can allow copying straight from
+ * the ring buffer up to userland. Requires a callback (which could call
+ * uiomove() directly, OR to a local still-in-kernel buffer) that must do the
+ * data copying-out.
+ *
+ * Callback function is of the form:
+ *
+ * uint_t cfw_many_cb(cfwev_t *evptr, int num_avail, void *cbarg);
+ *
+ * The function must return how many events got consumed, which MUST be <= the
+ * number available. The function must ALSO UNDERSTAND that cfw_ringlock is
+ * held and must not be released during this time. The function may be called
+ * more than once, if the available buffers wrap-around OR "block" is set and
+ * we don't have enough buffers. If any callback returns 0, exit the function
+ * with however many were consumed.
+ *
+ * This function, like the callback, returns the number of events *CONSUMED*.
+ *
+ * . . .
+ *
+ * Tunables for ipf_cfwev_consume_many().
+ *
+ * If you wish to attempt to coalesce reads (to reduce the likelihood of one
+ * event at a time during high load) change the number of tries below to
+ * something not 0. Early experiments set this to 10.
+ *
+ * The wait between tries is in usecs in cfw_timeout_wait. The pessimal
+ * case for this is a timeout_wait-spaced trickle of one event at a time.
+ */
+uint_t cfw_timeout_tries = 0;
+uint_t cfw_timeout_wait = 10000; /* 10ms wait. */
+
+typedef struct uio_error_s {
+ struct uio *ue_uio;
+ int ue_error;
+} uio_error_t;
+
+static uint_t
+ipf_cfwev_consume_many(uint_t num_requested, boolean_t block,
+ cfwmanycb_t cfw_many_cb, void *cbarg)
+{
+ uint_t consumed = 0, cb_consumed, contig_size;
+ uint_t timeout_tries = cfw_timeout_tries;
+ boolean_t eintr = B_FALSE;
+
+ mutex_enter(&cfw_ringlock);
+
+ while (num_requested > 0) {
+ clock_t delta;
+
+ /* Silly reality checks */
+ ASSERT3U(cfw_ringstart, <, cfw_ringsize);
+ ASSERT3U(cfw_ringend, <, cfw_ringsize);
+
+ if (cfw_ringstart > cfw_ringend || cfw_ringfull) {
+ /* We have from ringstart to the buffer's end. */
+ contig_size = cfw_ringsize - cfw_ringstart;
+ } else if (cfw_ringstart < cfw_ringend) {
+ /* We have no potential wrapping at this time. */
+ contig_size = cfw_ringend - cfw_ringstart;
+ } else if (block && cv_wait_sig(&cfw_ringcv, &cfw_ringlock)) {
+ /* Maybe something to consume now, try again. */
+ continue;
+ } else {
+ /* Nothing (more) to consume, return! */
+ eintr = (block && consumed == 0);
+ break;
+ }
+
+ /* Less asked-for than what we needed. */
+ if (num_requested < contig_size)
+ contig_size = num_requested;
+
+ cb_consumed =
+ cfw_many_cb(&(cfw_ring[cfw_ringstart]), contig_size, cbarg);
+ ASSERT3U(cb_consumed, <=, contig_size);
+
+ cfw_ringstart += cb_consumed;
+ ASSERT3U(cfw_ringstart, <=, cfw_ringmask + 1);
+ cfw_ringstart &= cfw_ringmask; /* In case of wraparound. */
+ consumed += cb_consumed;
+ cfw_ringfull = (cfw_ringfull && cb_consumed == 0);
+ if (cb_consumed < contig_size) {
+ /*
+ * Callback returned less than given.
+ * This is likely a uio error, but we have
+ * something. Get out of here.
+ */
+ break;
+ }
+ ASSERT3U(cb_consumed, ==, contig_size);
+ num_requested -= contig_size;
+
+ if (num_requested == 0) {
+ /* All done! */
+ break;
+ }
+
+ if (cfw_ringstart != cfw_ringend) {
+ /*
+ * We wrapped around the end of the buffer, and
+ * we have more available to fill our request.
+ */
+ ASSERT0(cfw_ringstart);
+ ASSERT(!cfw_ringfull);
+ continue;
+ }
+
+ /*
+ * We obtained some of the events we requested, but not all.
+ * Since we have nothing to consume, wait *a little* longer.
+ */
+ if (timeout_tries == 0)
+ break; /* Don't bother... */
+ delta = drv_usectohz(cfw_timeout_wait);
+ timeout_tries--;
+
+ switch (cv_reltimedwait_sig(&cfw_ringcv, &cfw_ringlock, delta,
+ TR_CLOCK_TICK)) {
+ case 0:
+ /*
+ * Received signal! Return what we have OR if we have
+ * nothing, EINTR.
+ */
+ DTRACE_PROBE1(ipf__cfw__timedsignal, int, consumed);
+ eintr = (consumed == 0);
+ num_requested = 0;
+ break;
+ case -1:
+ /* Time reached! Bail with what we got. */
+ DTRACE_PROBE(ipf__cfw__timedexpired);
+ num_requested = 0;
+ break;
+ default:
+ /* Aha! We've got more! */
+ DTRACE_PROBE(ipf__cfw__moredata);
+ break;
+ }
+ }
+
+ mutex_exit(&cfw_ringlock);
+ if (eintr)
+ ((uio_error_t *)cbarg)->ue_error = EINTR;
+ return (consumed);
+}
+
+/*
+ * SmartOS likes using the zone's debug id. Make sure we squirrel that away in
+ * the ipf netstack instance if it's not there.
+ */
+static inline zoneid_t
+ifs_to_did(ipf_stack_t *ifs)
+{
+ if (ifs->ifs_zone_did == 0) {
+ zone_t *zone;
+
+ /*
+ * We can't get the zone_did at initialization time because
+ * most zone data isn't readily available then, cement the did
+ * in place now.
+ */
+ VERIFY3U(ifs->ifs_zone, !=, GLOBAL_ZONEID);
+ zone = zone_find_by_id(ifs->ifs_zone);
+ if (zone != NULL) {
+ ifs->ifs_zone_did = zone->zone_did;
+ zone_rele(zone);
+ }
+ /* Else we are either in shutdown or something weirder. */
+ }
+ return (ifs->ifs_zone_did);
+}
+
+/*
+ * ipf_block_cfwlog()
+ *
+ * Called by fr_check(). Record drop events for the global-zone data
+ * collector. Use rest-of-ipf-style names for the parameters.
+ */
+void
+ipf_block_cfwlog(frentry_t *fr, fr_info_t *fin, ipf_stack_t *ifs)
+{
+ cfwev_t event = {0};
+
+ /*
+ * We need a rule.
+ * Capture failure by using dtrace on this function's entry.
+ * 'ipf_block_cfwlog:entry /arg0 == NULL/ { printf("GOTCHA!\n"); }'
+ */
+ if (fr == NULL)
+ return;
+
+ event.cfwev_type = CFWEV_BLOCK;
+ event.cfwev_length = sizeof (event);
+ /*
+ * IPF code elsewhere does the cheesy single-flag check, even though
+ * there are two flags in a rule (one for in, one for out).
+ */
+ event.cfwev_direction = (fr->fr_flags & FR_INQUE) ?
+ CFWDIR_IN : CFWDIR_OUT;
+
+ event.cfwev_protocol = fin->fin_p;
+ /*
+ * NOTE: fin_*port is in host/native order, and ICMP info is here too.
+ */
+ event.cfwev_sport = htons(fin->fin_sport);
+ event.cfwev_dport = htons(fin->fin_dport);
+
+ switch (fin->fin_v) {
+ case IPV4_VERSION:
+ IN6_INADDR_TO_V4MAPPED(&fin->fin_src, &event.cfwev_saddr);
+ IN6_INADDR_TO_V4MAPPED(&fin->fin_dst, &event.cfwev_daddr);
+ break;
+ case IPV6_VERSION:
+ event.cfwev_saddr = fin->fin_src6.in6;
+ event.cfwev_daddr = fin->fin_dst6.in6;
+ break;
+ default:
+ /* We should never reach here, but mark it if we do. */
+ DTRACE_PROBE1(ipf__cfw__frinfo__badipversion, frinfo_t *, fin);
+ return;
+ }
+
+ /*
+ * uniqtime() is what ipf's GETKTIME() uses.
+ * If cfwev_tstamp needs to be sourced from elsewhere, fix that here.
+ */
+ uniqtime(&event.cfwev_tstamp);
+ event.cfwev_zonedid = ifs_to_did(ifs);
+ event.cfwev_ruleid = fin->fin_rule;
+ memcpy(event.cfwev_ruleuuid, fr->fr_uuid, sizeof (uuid_t));
+
+ ipf_cfwev_report(&event);
+}
+
+/*
+ * ipf_log_cfwlog()
+ *
+ * Twin of ipstate_log(), but records state events for the global-zone data
+ * collector.
+ */
+void
+ipf_log_cfwlog(struct ipstate *is, uint_t type, ipf_stack_t *ifs)
+{
+ cfwev_t event = {0};
+
+ switch (type) {
+ case ISL_NEW:
+ case ISL_CLONE:
+ event.cfwev_type = CFWEV_BEGIN;
+ break;
+ case ISL_EXPIRE:
+ case ISL_FLUSH:
+ case ISL_REMOVE:
+ case ISL_KILLED:
+ case ISL_ORPHAN:
+ /*
+ * We don't care about session disappearances in CFW logging
+ * for now. (Possible future: CFWEV_END)
+ */
+ return;
+ default:
+ event.cfwev_type = CFWEV_BLOCK;
+ break;
+ }
+
+ /*
+ * IPF code elsewhere does the cheesy single-flag check, even though
+ * there are two flags in a rule (one for in, one for out). Follow
+ * suit here.
+ */
+ event.cfwev_length = sizeof (event);
+ ASSERT(is->is_rule != NULL);
+ event.cfwev_direction = (is->is_rule->fr_flags & FR_INQUE) ?
+ CFWDIR_IN : CFWDIR_OUT;
+ event.cfwev_protocol = is->is_p;
+ switch (is->is_p) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ /* NOTE: is_*port is in network order. */
+ event.cfwev_sport = is->is_sport;
+ event.cfwev_dport = is->is_dport;
+ break;
+ case IPPROTO_ICMP:
+ case IPPROTO_ICMPV6:
+ /* Scribble the ICMP type in sport... */
+ event.cfwev_sport = is->is_icmp.ici_type;
+ break;
+ /* Other protocols leave the event's port fields empty. */
+ }
+
+ switch(is->is_v) {
+ case IPV4_VERSION:
+ IN6_INADDR_TO_V4MAPPED(&is->is_src.in4, &event.cfwev_saddr);
+ IN6_INADDR_TO_V4MAPPED(&is->is_dst.in4, &event.cfwev_daddr);
+ break;
+ case IPV6_VERSION:
+ event.cfwev_saddr = is->is_src.in6;
+ event.cfwev_daddr = is->is_dst.in6;
+ break;
+ default:
+ /* Can't parse addresses if we don't know the version. Drop. */
+ DTRACE_PROBE1(ipf__cfw__ipstate__badipversion,
+ struct ipstate *, is);
+ return;
+ }
+
+ /*
+ * uniqtime() is what ipf's GETKTIME() uses.
+ * If cfwev_tstamp needs to be sourced from elsewhere, fix that here.
+ */
+ uniqtime(&event.cfwev_tstamp);
+ event.cfwev_zonedid = ifs_to_did(ifs);
+ event.cfwev_ruleid = is->is_rulen;
+ memcpy(event.cfwev_ruleuuid, is->is_uuid, sizeof (uuid_t));
+
+ ipf_cfwev_report(&event);
+}
+
+/*
+ * Callback routine we use for ipf_cfwev_consume_many().
+ * Returning 0 means error indication.
+ */
+static uint_t
+cfwlog_read_manycb(cfwev_t *evptr, uint_t num_avail, void *cbarg)
+{
+ uio_error_t *ue = (uio_error_t *)cbarg;
+
+ ASSERT(MUTEX_HELD(&cfw_ringlock));
+
+ if (ue->ue_error != 0)
+ return (0);
+
+ ue->ue_error = uiomove((caddr_t)evptr, num_avail * sizeof (*evptr),
+ UIO_READ, ue->ue_uio);
+ if (ue->ue_error != 0)
+ return (0);
+
+ return (num_avail);
+}
+
+/*
+ * Resize the CFW event ring buffer.
+ *
+ * The caller must ensure the new size is a power of 2 between
+ * IPF_CFW_{MIN,MAX}_RING_BUFS (inclusive) or the special values
+ * IPF_CFW_RING_ALLOCATE (first-time creation) or IPF_CFW_RING_DESTROY
+ * (netstack-unload destruction).
+ *
+ * Everything in the current ring will be destroyed (and reported as a drop)
+ * upon resize.
+ */
+int
+ipf_cfw_ring_resize(uint32_t newsize)
+{
+ ASSERT(MUTEX_HELD(&cfw_ringlock) || newsize == IPF_CFW_RING_ALLOCATE ||
+ newsize == IPF_CFW_RING_DESTROY);
+
+ if (newsize == IPF_CFW_RING_ALLOCATE) {
+ if (cfw_ring != NULL)
+ return (EBUSY);
+ newsize = IPF_CFW_DEFAULT_RING_BUFS;
+ /* Fall through to allocating a new ring buffer. */
+ } else {
+ /* We may be called during error cleanup, so be liberal here. */
+ if ((cfw_ring == NULL && newsize == IPF_CFW_RING_DESTROY) ||
+ newsize == cfw_ringsize) {
+ return (0);
+ }
+ kmem_free(cfw_ring, cfw_ringsize * sizeof (cfwev_t));
+ cfw_ring = NULL;
+ if (cfw_ringfull) {
+ cfw_evdrops += cfw_ringsize;
+ } else if (cfw_ringstart > cfw_ringend) {
+ cfw_evdrops += cfw_ringend +
+ (cfw_ringsize - cfw_ringstart);
+ } else {
+ cfw_evdrops += cfw_ringend - cfw_ringstart;
+ }
+ cfw_ringsize = cfw_ringmask = cfw_ringstart = cfw_ringend = 0;
+ cfw_ringfull = B_FALSE;
+
+ if (newsize == IPF_CFW_RING_DESTROY)
+ return (0);
+ /*
+ * Keep the reports & drops around because if we're just
+ * resizing, we need to know what we lost.
+ */
+ }
+
+ ASSERT(ISP2(newsize));
+ cfw_ring = kmem_alloc(newsize * sizeof (cfwev_t), KM_SLEEP);
+ /* KM_SLEEP means we always succeed. */
+ cfw_ringsize = newsize;
+ cfw_ringmask = cfw_ringsize - 1;
+
+ return (0);
+}
+
+/*
+ * ioctl handler for /dev/ipfev. Only supports SIOCIPFCFWCFG (get data
+ * collector statistics and configuration), and SIOCIPFCFWNEWSZ (resize the
+ * event ring buffer).
+ */
+/* ARGSUSED */
+int
+ipf_cfwlog_ioctl(dev_t dev, int cmd, intptr_t data, int mode, cred_t *cp,
+ int *rp)
+{
+ ipfcfwcfg_t cfginfo;
+ int error;
+
+ if (cmd != SIOCIPFCFWCFG && cmd != SIOCIPFCFWNEWSZ)
+ return (EIO);
+
+ if (crgetzoneid(cp) != GLOBAL_ZONEID)
+ return (EACCES);
+
+ error = COPYIN((caddr_t)data, (caddr_t)&cfginfo, sizeof (cfginfo));
+ if (error != 0)
+ return (EFAULT);
+
+ cfginfo.ipfcfwc_maxevsize = sizeof (cfwev_t);
+ mutex_enter(&cfw_ringlock);
+ cfginfo.ipfcfwc_evreports = cfw_evreports;
+ if (cmd == SIOCIPFCFWNEWSZ) {
+ uint32_t newsize = cfginfo.ipfcfwc_evringsize;
+
+ /* Do ioctl parameter checking here, then call the resizer. */
+ if (newsize < IPF_CFW_MIN_RING_BUFS ||
+ newsize > IPF_CFW_MAX_RING_BUFS || !ISP2(newsize)) {
+ error = EINVAL;
+ } else {
+ error = ipf_cfw_ring_resize(cfginfo.ipfcfwc_evringsize);
+ }
+ } else {
+ error = 0;
+ }
+ /* Both cfw_evdrops and cfw_ringsize are affected by resize. */
+ cfginfo.ipfcfwc_evdrops = cfw_evdrops;
+ cfginfo.ipfcfwc_evringsize = cfw_ringsize;
+ mutex_exit(&cfw_ringlock);
+
+ if (error != 0)
+ return (error);
+
+ error = COPYOUT((caddr_t)&cfginfo, (caddr_t)data, sizeof (cfginfo));
+ if (error != 0)
+ return (EFAULT);
+
+ return (0);
+}
+
+/*
+ * Send events up via /dev/ipfev reads. Will return only complete events.
+ */
+/* ARGSUSED */
+int
+ipf_cfwlog_read(dev_t dev, struct uio *uio, cred_t *cp)
+{
+ uint_t requested, consumed;
+ uio_error_t ue = {uio, 0};
+ boolean_t block;
+
+ if (uio->uio_resid == 0)
+ return (0);
+ if (uio->uio_resid < sizeof (cfwev_t))
+ return (EINVAL);
+
+ block = ((uio->uio_fmode & (FNDELAY | FNONBLOCK)) == 0);
+ requested = uio->uio_resid / sizeof (cfwev_t);
+
+ /*
+ * As stated earlier, ipf_cfwev_consume_many() takes a callback.
+ * The callback may be called multiple times before we return.
+ * The callback will execute uiomove().
+ */
+ consumed = ipf_cfwev_consume_many(requested, block, cfwlog_read_manycb,
+ &ue);
+ ASSERT3U(consumed, <=, requested);
+ if (!block && consumed == 0 && ue.ue_error == 0) {
+ /* No data available. */
+ ue.ue_error = EWOULDBLOCK;
+ } else if (ue.ue_error != 0 && ue.ue_error != EINTR) {
+ /*
+ * We had a problem that wasn't simply a
+ * case of cv_wait_sig() receiving a signal.
+ */
+ DTRACE_PROBE1(ipf__cfw__uiodiscard, int, consumed);
+ mutex_enter(&cfw_ringlock);
+ cfw_evdrops += consumed;
+ mutex_exit(&cfw_ringlock);
+ }
+ return (ue.ue_error);
+}
+
+#else /* _KERNEL */
+
+/* Blank stubs to satisfy userland's test compilations. */
+
+int
+ipf_cfw_ring_resize(uint32_t a)
+{
+ return (0);
+}
+
+void
+ipf_log_cfwlog(struct ipstate *a, uint_t b, ipf_stack_t *c)
+{
+}
+
+void
+ipf_block_cfwlog(frentry_t *a, fr_info_t *b, ipf_stack_t *c)
+{
+}
+
+#endif /* _KERNEL */
diff --git a/usr/src/uts/common/inet/ipf/fil.c b/usr/src/uts/common/inet/ipf/fil.c
index 78980be106..48fa6e7325 100644
--- a/usr/src/uts/common/inet/ipf/fil.c
+++ b/usr/src/uts/common/inet/ipf/fil.c
@@ -5,7 +5,7 @@
*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
*
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
*/
#if defined(KERNEL) || defined(_KERNEL)
@@ -2588,6 +2588,9 @@ ipf_stack_t *ifs;
}
#endif
+ if (IFS_CFWLOG(ifs, fr) && FR_ISBLOCK(pass))
+ ipf_block_cfwlog(fr, fin, ifs);
+
/*
* The FI_STATE flag is cleared here so that calling fr_checkstate
* will work when called from inside of fr_fastroute. Although
diff --git a/usr/src/uts/common/inet/ipf/ip_fil_solaris.c b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c
index 4cb67a2dab..6aba0a7562 100644
--- a/usr/src/uts/common/inet/ipf/ip_fil_solaris.c
+++ b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c
@@ -5,7 +5,7 @@
*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
*
- * Copyright 2018 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
#if !defined(lint)
@@ -907,6 +907,9 @@ int *rp;
return ENXIO;
unit = isp->ipfs_minor;
+ if (unit == IPL_LOGEV)
+ return (ipf_cfwlog_ioctl(dev, cmd, data, mode, cp, rp));
+
zid = crgetzoneid(cp);
if (cmd == SIOCIPFZONESET) {
if (zid == GLOBAL_ZONEID)
@@ -1247,11 +1250,35 @@ cred_t *cred;
if (IPL_LOGMAX < min)
return ENXIO;
+ /* Special-case ipfev: global-zone-open only. */
+ if (min == IPL_LOGEV) {
+ if (crgetzoneid(cred) != GLOBAL_ZONEID)
+ return (ENXIO);
+ /*
+ * Else enable the CFW logging of events.
+ * NOTE: For now, we only allow one open at a time.
+ * Use atomic_cas to confirm/deny. And also for now,
+ * assume sizeof (boolean_t) == sizeof (uint_t).
+ *
+ * Per the *_{refrele,REFRELE}() in other parts of inet,
+ * ensure all loads/stores complete before calling cas.
+ * membar_exit() does this.
+ */
+ membar_exit();
+ if (atomic_cas_uint(&ipf_cfwlog_enabled, 0, 1) != 0)
+ return (EBUSY);
+ }
+
minor = (minor_t)(uintptr_t)vmem_alloc(ipf_minor, 1,
VM_BESTFIT | VM_SLEEP);
if (ddi_soft_state_zalloc(ipf_state, minor) != 0) {
vmem_free(ipf_minor, (void *)(uintptr_t)minor, 1);
+ if (min == IPL_LOGEV) {
+ /* See above... */
+ membar_exit();
+ VERIFY(atomic_cas_uint(&ipf_cfwlog_enabled, 1, 0) == 1);
+ }
return ENXIO;
}
@@ -1273,6 +1300,7 @@ int flags, otype;
cred_t *cred;
{
minor_t min = getminor(dev);
+ ipf_devstate_t *isp;
#ifdef IPFDEBUG
cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
@@ -1281,6 +1309,15 @@ cred_t *cred;
if (IPL_LOGMAX < min)
return ENXIO;
+ isp = ddi_get_soft_state(ipf_state, min);
+ if (isp != NULL && isp->ipfs_minor == IPL_LOGEV) {
+ /*
+ * Disable CFW logging. See iplopen() for details.
+ */
+ membar_exit();
+ VERIFY(atomic_cas_uint(&ipf_cfwlog_enabled, 1, 0) == 1);
+ }
+
ddi_soft_state_free(ipf_state, min);
vmem_free(ipf_minor, (void *)(uintptr_t)min, 1);
@@ -1311,6 +1348,9 @@ cred_t *cp;
return ENXIO;
unit = isp->ipfs_minor;
+ if (unit == IPL_LOGEV)
+ return (ipf_cfwlog_read(dev, uio, cp));
+
/*
* ipf_find_stack returns with a read lock on ifs_ipf_global
*/
@@ -1362,6 +1402,9 @@ cred_t *cp;
return ENXIO;
unit = isp->ipfs_minor;
+ if (unit == IPL_LOGEV)
+ return (EIO); /* ipfev doesn't support write yet. */
+
/*
* ipf_find_stack returns with a read lock on ifs_ipf_global
*/
diff --git a/usr/src/uts/common/inet/ipf/ip_log.c b/usr/src/uts/common/inet/ipf/ip_log.c
index 584ee42d9a..7f5a1c839d 100644
--- a/usr/src/uts/common/inet/ipf/ip_log.c
+++ b/usr/src/uts/common/inet/ipf/ip_log.c
@@ -8,7 +8,7 @@
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
*/
#include <sys/param.h>
@@ -380,6 +380,7 @@ u_int flags;
if (fin->fin_nattag != NULL)
bcopy(fin->fin_nattag, (void *)&ipfl.fl_nattag,
sizeof(ipfl.fl_nattag));
+ bcopy(fin->fin_fr->fr_uuid, ipfl.fl_uuid, sizeof (ipfl.fl_uuid));
ipfl.fl_flags = flags;
ipfl.fl_dir = fin->fin_out;
ipfl.fl_lflags = fin->fin_flx;
diff --git a/usr/src/uts/common/inet/ipf/ip_state.c b/usr/src/uts/common/inet/ipf/ip_state.c
index 184f8775b6..a45bcbfdaf 100644
--- a/usr/src/uts/common/inet/ipf/ip_state.c
+++ b/usr/src/uts/common/inet/ipf/ip_state.c
@@ -5,7 +5,7 @@
*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
*
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
*/
#if defined(KERNEL) || defined(_KERNEL)
@@ -108,6 +108,7 @@ struct file;
# include <sys/systm.h>
# endif
#endif
+#include <sys/uuid.h>
/* END OF INCLUDES */
@@ -1445,6 +1446,7 @@ u_int flags;
is->is_sti.tqe_flags |= TQE_RULEBASED;
}
is->is_tag = fr->fr_logtag;
+ memcpy(is->is_uuid, fr->fr_uuid, sizeof (uuid_t));
is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1];
is->is_ifp[(1 - out) << 1] = fr->fr_ifas[2];
@@ -1524,6 +1526,9 @@ u_int flags;
if (ifs->ifs_ipstate_logging)
ipstate_log(is, ISL_NEW, ifs);
+ if (IFS_CFWLOG(ifs, is->is_rule))
+ ipf_log_cfwlog(is, ISL_NEW, ifs);
+
RWLOCK_EXIT(&ifs->ifs_ipf_state);
fin->fin_rev = IP6_NEQ(&is->is_dst, &fin->fin_daddr);
fin->fin_flx |= FI_STATE;
@@ -2314,6 +2319,8 @@ u_32_t cmask;
is->is_flags &= ~(SI_W_SPORT|SI_W_DPORT);
if ((flags & SI_CLONED) && ifs->ifs_ipstate_logging)
ipstate_log(is, ISL_CLONE, ifs);
+ if ((flags & SI_CLONED) && IFS_CFWLOG(ifs, is->is_rule))
+ ipf_log_cfwlog(is, ISL_CLONE, ifs);
}
ret = -1;
@@ -3397,6 +3404,15 @@ ipf_stack_t *ifs;
if (ifs->ifs_ipstate_logging != 0 && why != 0)
ipstate_log(is, why, ifs);
+ /*
+ * For now, ipf_log_cfwlog() copes with all "why" values. Strictly
+ * speaking, though, they all map to one event (CFWEV_END), which for
+ * now is not supported, hence no code calling ipf_log_cfwlog() like
+ * below:
+ *
+ * if (why != 0 && IFS_CFWLOG(ifs, is->is_rule))
+ * ipf_log_cfwlog(is, why, ifs);
+ */
if (is->is_rule != NULL) {
is->is_rule->fr_statecnt--;
@@ -3931,7 +3947,6 @@ int flags;
return rval;
}
-
/* ------------------------------------------------------------------------ */
/* Function: ipstate_log */
/* Returns: Nil */
diff --git a/usr/src/uts/common/inet/ipf/netinet/Makefile b/usr/src/uts/common/inet/ipf/netinet/Makefile
index cca3b48ac4..88f91e633f 100644
--- a/usr/src/uts/common/inet/ipf/netinet/Makefile
+++ b/usr/src/uts/common/inet/ipf/netinet/Makefile
@@ -1,16 +1,15 @@
#
-#ident "%Z%%M% %I% %E% SMI"
-#
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
+# Copyright 2019 Joyent, Inc.
#
# uts/common/inet/ipf/netinet/Makefile
#
# include global definitions
include ../../../../../Makefile.master
-HDRS= ipl.h ip_compat.h ip_fil.h ip_nat.h ip_proxy.h ip_state.h \
- ip_frag.h ip_auth.h ip_lookup.h ip_pool.h ip_htable.h ipf_stack.h
+HDRS= ipl.h ip_compat.h ip_fil.h ip_nat.h ip_proxy.h ip_state.h ip_frag.h \
+ ip_auth.h ip_lookup.h ip_pool.h ip_htable.h ipf_stack.h ipf_cfw.h
ROOTDIRS= $(ROOT)/usr/include/netinet
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_fil.h b/usr/src/uts/common/inet/ipf/netinet/ip_fil.h
index 90fc701ae1..cb7c7be36b 100644
--- a/usr/src/uts/common/inet/ipf/netinet/ip_fil.h
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_fil.h
@@ -8,7 +8,7 @@
*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
*
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2019, Joyent, Inc.
*/
#ifndef __IP_FIL_H__
@@ -16,6 +16,7 @@
#include "netinet/ip_compat.h"
#include <sys/zone.h>
+#include <sys/uuid.h>
#ifdef SOLARIS
#undef SOLARIS
@@ -115,6 +116,8 @@
#define SIOCDELFR SIOCRMAFR
#define SIOCINSFR SIOCINAFR
# define SIOCIPFZONESET _IOWR('r', 97, struct ipfzoneobj)
+# define SIOCIPFCFWCFG _IOR('r', 98, struct ipfcfwcfg)
+# define SIOCIPFCFWNEWSZ _IOWR('r', 99, struct ipfcfwcfg)
/*
* What type of table is getting flushed?
@@ -599,6 +602,7 @@ typedef struct frentry {
u_32_t fr_flags; /* per-rule flags && options (see below) */
u_32_t fr_logtag; /* user defined log tag # */
u_32_t fr_collect; /* collection number */
+ uuid_t fr_uuid; /* user defined uuid */
u_int fr_arg; /* misc. numeric arg for rule */
u_int fr_loglevel; /* syslog log facility + priority */
u_int fr_age[2]; /* non-TCP timeouts */
@@ -727,6 +731,7 @@ typedef struct frentry {
#define FR_NEWISN 0x400000 /* new ISN for outgoing TCP */
#define FR_NOICMPERR 0x800000 /* do not match ICMP errors in state */
#define FR_STATESYNC 0x1000000 /* synchronize state to slave */
+#define FR_CFWLOG 0x2000000 /* Global CFW logging enabled */
#define FR_NOMATCH 0x8000000 /* no match occured */
/* 0x10000000 FF_LOGPASS */
/* 0x20000000 FF_LOGBLOCK */
@@ -882,6 +887,7 @@ typedef struct ipflog {
u_32_t fl_lflags;
u_32_t fl_logtag;
ipftag_t fl_nattag;
+ uuid_t fl_uuid;
u_short fl_plen; /* extra data after hlen */
u_short fl_loglevel; /* syslog log level */
char fl_group[FR_GROUPLEN];
@@ -930,6 +936,7 @@ typedef struct ipflog {
#define IPSYNC_NAME "/dev/ipsync"
#define IPSCAN_NAME "/dev/ipscan"
#define IPLOOKUP_NAME "/dev/iplookup"
+#define IPFEV_NAME "/dev/ipfev"
#define IPL_LOGIPF 0 /* Minor device #'s for accessing logs */
#define IPL_LOGNAT 1
@@ -938,8 +945,9 @@ typedef struct ipflog {
#define IPL_LOGSYNC 4
#define IPL_LOGSCAN 5
#define IPL_LOGLOOKUP 6
-#define IPL_LOGCOUNT 7
-#define IPL_LOGMAX 7
+#define IPL_LOGEV 7
+#define IPL_LOGCOUNT 8
+#define IPL_LOGMAX 8
#define IPL_LOGSIZE (IPL_LOGMAX + 1)
#define IPL_LOGALL -1
#define IPL_LOGNONE -2
@@ -1180,6 +1188,21 @@ typedef struct ipfzoneobj {
char ipfz_zonename[ZONENAME_MAX]; /* zone to act on */
} ipfzoneobj_t;
+/* ioctl to grab CFW logging parameters */
+typedef struct ipfcfwcfg {
+ /* CFG => Max event size, NEWSZ => ignored in, like CFG out. */
+ uint32_t ipfcfwc_maxevsize;
+ /*
+ * CFG => Current ring size,
+ * NEWSZ => New ring size, must be 2^N for 3 <= N <= 31.
+ */
+ uint32_t ipfcfwc_evringsize;
+ /* CFG => Number of event reports, NEWSZ => ignored in, like CFG out. */
+ uint64_t ipfcfwc_evreports;
+ /* CFG => Number of event drops, NEWSZ => ignored in, like CFG out. */
+ uint64_t ipfcfwc_evdrops;
+} ipfcfwcfg_t;
+
#if defined(_KERNEL)
/* Set ipfs_zoneid to this if no zone has been set: */
#define IPFS_ZONE_UNSET -2
@@ -1559,6 +1582,23 @@ extern int ipllog __P((int, fr_info_t *, void **, size_t *, int *, int,
ipf_stack_t *));
extern void fr_logunload __P((ipf_stack_t *));
+/* SmartOS single-FD global-zone state accumulator (see cfw.c) */
+extern boolean_t ipf_cfwlog_enabled;
+struct ipstate; /* Ugggh. */
+extern void ipf_log_cfwlog __P((struct ipstate *, uint_t, ipf_stack_t *));
+extern void ipf_block_cfwlog __P((frentry_t *, fr_info_t *, ipf_stack_t *));
+#define IFS_CFWLOG(ifs, fr) ((ifs)->ifs_gz_controlled && ipf_cfwlog_enabled &&\
+ fr != NULL && ((fr)->fr_flags & FR_CFWLOG))
+struct cfwev_s; /* See ipf_cfw.h */
+extern boolean_t ipf_cfwev_consume __P((struct cfwev_s *, boolean_t));
+/* See cfw.c's ipf_cfwev_consume_many() for details. */
+typedef uint_t (*cfwmanycb_t) __P((struct cfwev_s *, uint_t, void *));
+extern int ipf_cfwlog_read __P((dev_t, struct uio *, struct cred *));
+extern int ipf_cfwlog_ioctl __P((dev_t, int, intptr_t, int, cred_t *, int *));
+#define IPF_CFW_RING_ALLOCATE 0
+#define IPF_CFW_RING_DESTROY 1
+extern int ipf_cfw_ring_resize(uint32_t);
+
extern frentry_t *fr_acctpkt __P((fr_info_t *, u_32_t *));
extern int fr_copytolog __P((int, char *, int));
extern u_short fr_cksum __P((mb_t *, ip_t *, int, void *));
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_state.h b/usr/src/uts/common/inet/ipf/netinet/ip_state.h
index 4c605c1b89..ef315d5ef1 100644
--- a/usr/src/uts/common/inet/ipf/netinet/ip_state.h
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_state.h
@@ -8,11 +8,14 @@
*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2019 Joyent, Inc.
*/
#ifndef __IP_STATE_H__
#define __IP_STATE_H__
+#include <sys/uuid.h>
+
#if defined(__STDC__) || defined(__GNUC__) || defined(_AIX51)
# define SIOCDELST _IOW('r', 61, struct ipfobj)
#else
@@ -66,6 +69,7 @@ typedef struct ipstate {
/* in both directions */
u_32_t is_optmsk[2]; /* " " mask */
/* in both directions */
+ uuid_t is_uuid;
u_short is_sec; /* security options set */
u_short is_secmsk; /* " " mask */
u_short is_auth; /* authentication options set */
diff --git a/usr/src/uts/common/inet/ipf/netinet/ipf_cfw.h b/usr/src/uts/common/inet/ipf/netinet/ipf_cfw.h
new file mode 100644
index 0000000000..1972d2b3f7
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/netinet/ipf_cfw.h
@@ -0,0 +1,69 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#ifndef __IPF_CFW_H__
+#define __IPF_CFW_H__
+
+#include <sys/types.h>
+#include <inet/ip6.h>
+#include <sys/uuid.h>
+
+/* Because ipf compiles this kernel file in userland testing... */
+#ifndef ASSERT3U
+#define ASSERT3U(a, b, c) ASSERT(a ## b ## c);
+#endif /* ASSERT3U */
+
+/*
+ * CFW Event, which is emitted to a global-zone listener. The global-zone
+ * listener solves the one-fd-per-zone problem of using each zone's ipmon.
+ *
+ * These must be 64-bit aligned because they form an array in-kernel. There
+ * might be reserved fields to ensure that alignment.
+ */
+#define CFWEV_BLOCK 1
+#define CFWEV_BEGIN 2
+#define CFWEV_END 3
+#define CFWDIR_IN 1
+#define CFWDIR_OUT 2
+
+typedef struct cfwev_s {
+ uint16_t cfwev_type; /* BEGIN, END, BLOCK */
+ uint16_t cfwev_length; /* in bytes, so capped to 65535 bytes */
+ zoneid_t cfwev_zonedid; /* Pullable from ipf_stack_t. */
+
+ uint32_t cfwev_ruleid; /* Pullable from fr_info_t. */
+ uint16_t cfwev_sport; /* Source port (network order) */
+ uint16_t cfwev_dport; /* Dest. port (network order) */
+
+ uint8_t cfwev_protocol; /* IPPROTO_* */
+ /* "direction" informs if src/dst are local/remote or remote/local. */
+ uint8_t cfwev_direction;
+ uint8_t cfwev_reserved[6]; /* Ensures 64-bit alignment. */
+
+ in6_addr_t cfwev_saddr; /* IPv4 addresses are V4MAPPED. */
+ in6_addr_t cfwev_daddr;
+
+ /*
+ * Because of 'struct timeval' being different between 32-bit and
+ * 64-bit ABIs, this interface is only usable by 64-bit binaries.
+ */
+ struct timeval cfwev_tstamp;
+
+ uuid_t cfwev_ruleuuid; /* Pullable from fr_info_t. */
+} cfwev_t;
+
+
+
+#endif /* __IPF_CFW_H__ */
diff --git a/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h b/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h
index 5c156e9c44..0b2a8d826f 100644
--- a/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h
+++ b/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h
@@ -6,7 +6,7 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
- * Copyright 2018 Joyent, Inc. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
*/
#ifndef __IPF_STACK_H__
@@ -46,6 +46,7 @@ struct ipf_stack {
struct ipf_stack *ifs_gz_cont_ifs;
netid_t ifs_netid;
zoneid_t ifs_zone;
+ zoneid_t ifs_zone_did;
boolean_t ifs_gz_controlled;
/* ipf module */
@@ -316,6 +317,7 @@ struct ipf_stack {
char *ifs_addmask_key;
char *ifs_rn_zeros;
char *ifs_rn_ones;
+
#ifdef KERNEL
/* kstats for inbound and outbound */
kstat_t *ifs_kstatp[2];
diff --git a/usr/src/uts/common/inet/ipf/solaris.c b/usr/src/uts/common/inet/ipf/solaris.c
index 5d56debc31..5ccbfa3188 100644
--- a/usr/src/uts/common/inet/ipf/solaris.c
+++ b/usr/src/uts/common/inet/ipf/solaris.c
@@ -6,7 +6,7 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
*/
/*
@@ -116,7 +116,7 @@ static void ipf_stack_shutdown __P((const netid_t, void *));
static int ipf_property_g_update __P((dev_info_t *));
static char *ipf_devfiles[] = { IPL_NAME, IPNAT_NAME, IPSTATE_NAME,
IPAUTH_NAME, IPSYNC_NAME, IPSCAN_NAME,
- IPLOOKUP_NAME, NULL };
+ IPLOOKUP_NAME, IPFEV_NAME, NULL };
extern void *ipf_state; /* DDI state */
extern vmem_t *ipf_minor; /* minor number arena */
@@ -741,6 +741,9 @@ ddi_attach_cmd_t cmd;
ipf_dev_info = dip;
+ if (ipf_cfw_ring_resize(IPF_CFW_RING_ALLOCATE) != 0)
+ goto attach_failed;
+
ipfncb = net_instance_alloc(NETINFO_VERSION);
if (ipfncb == NULL)
goto attach_failed;
@@ -768,6 +771,7 @@ ddi_attach_cmd_t cmd;
}
attach_failed:
+ (void) ipf_cfw_ring_resize(IPF_CFW_RING_DESTROY);
ddi_remove_minor_node(dip, NULL);
ddi_prop_remove_all(dip);
ddi_soft_state_fini(&ipf_state);
@@ -795,6 +799,7 @@ ddi_detach_cmd_t cmd;
* framework guarantees we are not active with this devinfo
* node in any other entry points at this time.
*/
+ (void) ipf_cfw_ring_resize(IPF_CFW_RING_DESTROY);
ddi_prop_remove_all(dip);
i = ddi_get_instance(dip);
ddi_remove_minor_node(dip, NULL);
diff --git a/usr/src/uts/intel/ipf/ipf.global-objs.debug64 b/usr/src/uts/intel/ipf/ipf.global-objs.debug64
index 5ebc7eed2b..ea5510a78d 100644
--- a/usr/src/uts/intel/ipf/ipf.global-objs.debug64
+++ b/usr/src/uts/intel/ipf/ipf.global-objs.debug64
@@ -22,9 +22,21 @@
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# Copyright 2018 Joyent, Inc. All rights reserved
+# Copyright 2019 Joyent, Inc.
#
+cfw_evdrops
+cfw_evreports
+cfw_ring
+cfw_ringcv
+cfw_ringend
+cfw_ringfull
+cfw_ringlock
+cfw_ringmask
+cfw_ringsize
+cfw_ringstart
+cfw_timeout_tries
+cfw_timeout_wait
fr_availfuncs
fr_features
fr_objbytes
@@ -68,6 +80,7 @@ icmptoicmp6unreach
idletime_tab
ip6exthdr
ipf_cb_ops
+ipf_cfwlog_enabled
ipf_dev_info
ipf_devfiles
ipf_eth_bcast_addr
diff --git a/usr/src/uts/sparc/ipf/ipf.global-objs.debug64 b/usr/src/uts/sparc/ipf/ipf.global-objs.debug64
index cb3e5485b2..b42dca618a 100644
--- a/usr/src/uts/sparc/ipf/ipf.global-objs.debug64
+++ b/usr/src/uts/sparc/ipf/ipf.global-objs.debug64
@@ -22,9 +22,21 @@
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# Copyright 2013 Joyent, Inc. All rights reserved
+# Copyright 2019 Joyent, Inc.
#
+cfw_evdrops
+cfw_evreports
+cfw_ring
+cfw_ringcv
+cfw_ringend
+cfw_ringfull
+cfw_ringlock
+cfw_ringmask
+cfw_ringsize
+cfw_ringstart
+cfw_timeout_tries
+cfw_timeout_wait
hook4_vnd_in
hook4_vnd_out
hook6_vnd_in
@@ -60,6 +72,7 @@ icmptoicmp6unreach
idletime_tab
ip6exthdr
ipf_cb_ops
+ipf_cfwlog_enabled
ipf_dev_info
ipf_devfiles
ipf_kstat_tmp