diff options
24 files changed, 978 insertions, 27 deletions
@@ -80,6 +80,7 @@ s dev/ip6=../devices/pseudo/ip6@0:ip6 s dev/ip=../devices/pseudo/ip@0:ip s dev/ipauth=../devices/pseudo/ipf@0:ipauth s dev/ipf=../devices/pseudo/ipf@0:ipf +s dev/ipfev=../devices/pseudo/ipf@0:ipfev s dev/ipl=../devices/pseudo/ipf@0:ipf s dev/iplookup=../devices/pseudo/ipf@0:iplookup s dev/ipmpstub=../devices/pseudo/dlpistub@0:ipmpstub diff --git a/usr/src/cmd/devfsadm/misc_link.c b/usr/src/cmd/devfsadm/misc_link.c index 55aff1e4f7..49be9e9b2d 100644 --- a/usr/src/cmd/devfsadm/misc_link.c +++ b/usr/src/cmd/devfsadm/misc_link.c @@ -21,7 +21,7 @@ /* * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2015, Joyent, Inc. All rights reserved. + * Copyright 2019 Joyent, Inc. */ #include <regex.h> @@ -124,7 +124,7 @@ static devfsadm_create_t misc_cbt[] = { }, { "pseudo", "ddi_pseudo", "(^ipf$)|(^ipnat$)|(^ipstate$)|(^ipauth$)|" - "(^ipsync$)|(^ipscan$)|(^iplookup$)", + "(^ipsync$)|(^ipscan$)|(^iplookup$)|(^ipfev$)", TYPE_EXACT | DRV_RE, ILEVEL_0, minor_name, }, { "pseudo", "ddi_pseudo", "dld", diff --git a/usr/src/cmd/ipf/lib/common/printfr.c b/usr/src/cmd/ipf/lib/common/printfr.c index 063eb87c07..b096c46e0e 100644 --- a/usr/src/cmd/ipf/lib/common/printfr.c +++ b/usr/src/cmd/ipf/lib/common/printfr.c @@ -7,9 +7,10 @@ * * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2019 Joyent, Inc. */ -#pragma ident "%Z%%M% %I% %E% SMI" +#include <uuid/uuid.h> #include "ipf.h" @@ -386,7 +387,8 @@ ioctlfunc_t iocfunc; printf(" head %s", fp->fr_grhead); if (*fp->fr_group != '\0') printf(" group %s", fp->fr_group); - if (fp->fr_logtag != FR_NOLOGTAG || *fp->fr_nattag.ipt_tag) { + if (fp->fr_logtag != FR_NOLOGTAG || *fp->fr_nattag.ipt_tag || + (fp->fr_flags & FR_CFWLOG) || !uuid_is_null(fp->fr_uuid)) { char *s = ""; printf(" set-tag("); @@ -397,6 +399,18 @@ ioctlfunc_t iocfunc; if (*fp->fr_nattag.ipt_tag) { printf("%snat=%-.*s", s, IPFTAG_LEN, fp->fr_nattag.ipt_tag); + s = ", "; + } + if (fp->fr_flags & FR_CFWLOG) { + printf("cfwlog"); + s = ", "; + } + + if (!uuid_is_null(fp->fr_uuid)) { + char uuid[UUID_PRINTABLE_STRING_LENGTH]; + + uuid_unparse(fp->fr_uuid, uuid); + printf("%suuid=%s", s, uuid); } printf(")"); } diff --git a/usr/src/cmd/ipf/tools/Makefile.tools b/usr/src/cmd/ipf/tools/Makefile.tools index ce0db79970..c57f0ace87 100644 --- a/usr/src/cmd/ipf/tools/Makefile.tools +++ b/usr/src/cmd/ipf/tools/Makefile.tools @@ -23,7 +23,7 @@ # Use is subject to license terms. # # Copyright 2013 Nexenta Systems, Inc. All rights reserved. -# Copyright (c) 2012, Joyent Inc. All rights reserved. +# Copyright 2019 Joyent, Inc. # PROG= ipf ipfs ipmon ipnat ippool ipfstat @@ -35,7 +35,7 @@ IPFSTAT_OBJS= ipfstat.o ipfzone.o IPMON_OBJS= ipmon.o ipfzone.o ipmon_y.o ipmon_l.o IPNAT_OBJS= ipnat.o ipfzone.o ipnat_y.o ipnat_l.o IPPOOL_OBJS= ippool.o ipfzone.o ippool_y.o ippool_l.o -IPFTEST_OBJS= ipftest.o ipfzone.o \ +IPFTEST_OBJS= cfw.o ipftest.o ipfzone.o \ ip_fil.o ip_state.o ip_compat.o \ ip_frag.o ip_nat.o ip_nat6.o fil.o \ ip_htable.o ip_lookup.o \ @@ -56,7 +56,7 @@ SRCS= $(OBJSL:%.o=../%.c) include ../../../Makefile.cmd include ../../Makefile.ipf -LDLIBS += $(LIBBPF) +LDLIBS += $(LIBBPF) -luuid LDFLAGS += $(MAPFILE.NGB:%=-M%) CPPFLAGS += -I. -DIPFILTER_LOOKUP -DIPFILTER_LOG diff --git a/usr/src/cmd/ipf/tools/ipf_y.y b/usr/src/cmd/ipf/tools/ipf_y.y index 7689d676c7..c8909b4e92 100644 --- a/usr/src/cmd/ipf/tools/ipf_y.y +++ b/usr/src/cmd/ipf/tools/ipf_y.y @@ -6,6 +6,7 @@ * * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2019 Joyent, Inc. */ #include "ipf.h" @@ -16,6 +17,7 @@ # define _NET_BPF_H_ # include <pcap.h> #endif +#include <uuid/uuid.h> #include "netinet/ip_pool.h" #include "netinet/ip_htable.h" #include "netinet/ipl.h" @@ -98,6 +100,7 @@ static int set_ipv6_addr = 0; union i6addr m; } ipp; union i6addr ip6; + uuid_t uuid; }; %type <port> portnum @@ -117,6 +120,7 @@ static int set_ipv6_addr = 0; %token YY_CMP_EQ YY_CMP_NE YY_CMP_LE YY_CMP_GE YY_CMP_LT YY_CMP_GT %token YY_RANGE_OUT YY_RANGE_IN %token <ip6> YY_IPV6 +%token <uuid> YY_UUID %token IPFY_PASS IPFY_BLOCK IPFY_COUNT IPFY_CALL %token IPFY_RETICMP IPFY_RETRST IPFY_RETICMPASDST @@ -127,6 +131,7 @@ static int set_ipv6_addr = 0; %token IPFY_HEAD IPFY_GROUP %token IPFY_AUTH IPFY_PREAUTH %token IPFY_LOG IPFY_BODY IPFY_FIRST IPFY_LEVEL IPFY_ORBLOCK +%token IPFY_UUID IPFY_CFWLOG %token IPFY_LOGTAG IPFY_MATCHTAG IPFY_SETTAG IPFY_SKIP %token IPFY_FROM IPFY_ALL IPFY_ANY IPFY_BPFV4 IPFY_BPFV6 IPFY_POOL IPFY_HASH %token IPFY_PPS @@ -518,6 +523,8 @@ taginlist: taginspec: logtag |nattag + |uuidtag + |cfwtag ; nattag: IPFY_NAT '=' YY_STR { DOALL(strncpy(fr->fr_nattag.ipt_tag,\ @@ -530,6 +537,12 @@ nattag: IPFY_NAT '=' YY_STR { DOALL(strncpy(fr->fr_nattag.ipt_tag,\ logtag: IPFY_LOG '=' YY_NUMBER { DOALL(fr->fr_logtag = $3;) } ; +cfwtag: IPFY_CFWLOG { DOALL(fr->fr_flags |= FR_CFWLOG;) } + ; + +uuidtag: IPFY_UUID '=' YY_UUID { DOALL(uuid_copy(fr->fr_uuid, $3);) } + ; + settagout: | IPFY_SETTAG '(' tagoutlist ')' ; @@ -542,6 +555,8 @@ tagoutlist: tagoutspec: logtag | nattag + | uuidtag + | cfwtag ; matchtagin: @@ -1566,6 +1581,7 @@ static struct wordtab ipfwords[96] = { { "bpf-v6", IPFY_BPFV6 }, #endif { "call", IPFY_CALL }, + { "cfwlog", IPFY_CFWLOG }, { "code", IPFY_ICMPCODE }, { "count", IPFY_COUNT }, { "dup-to", IPFY_DUPTO }, @@ -1641,6 +1657,7 @@ static struct wordtab ipfwords[96] = { { "to", IPFY_TO }, { "ttl", IPFY_TTL }, { "udp", IPFY_UDP }, + { "uuid", IPFY_UUID }, { "v6hdrs", IPF6_V6HDRS }, { "with", IPFY_WITH }, { NULL, 0 } diff --git a/usr/src/cmd/ipf/tools/ipmon_y.y b/usr/src/cmd/ipf/tools/ipmon_y.y index e6dda81c35..b4fce4015d 100644 --- a/usr/src/cmd/ipf/tools/ipmon_y.y +++ b/usr/src/cmd/ipf/tools/ipmon_y.y @@ -1,11 +1,14 @@ /* * Copyright (C) 1993-2005 by Darren Reed. * See the IPFILTER.LICENCE file for details on licencing. + * + * Copyright 2019 Joyent, Inc. */ %{ #include "ipf.h" #include <syslog.h> +#include <uuid/uuid.h> #undef OPT_NAT #undef OPT_VERBOSE #include "ipmon_l.h" @@ -42,11 +45,13 @@ static ipmon_action_t *alist = NULL; struct in_addr addr; struct opt *opt; union i6addr ip6; + uuid_t uuid; } %token <num> YY_NUMBER YY_HEX %token <str> YY_STR %token <ip6> YY_IPV6 +%token <uuid> YY_UUID %token YY_COMMENT %token YY_CMP_EQ YY_CMP_NE YY_CMP_LE YY_CMP_GE YY_CMP_LT YY_CMP_GT %token YY_RANGE_OUT YY_RANGE_IN diff --git a/usr/src/cmd/ipf/tools/ipnat_y.y b/usr/src/cmd/ipf/tools/ipnat_y.y index d929bf413a..2c913afea2 100644 --- a/usr/src/cmd/ipf/tools/ipnat_y.y +++ b/usr/src/cmd/ipf/tools/ipnat_y.y @@ -6,6 +6,7 @@ * * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2019 Joyent, Inc. */ #pragma ident "%Z%%M% %I% %E% SMI" @@ -39,6 +40,7 @@ #include <sys/time.h> #include <syslog.h> #include <net/if.h> +#include <uuid/uuid.h> #if __FreeBSD_version >= 300000 # include <net/if_var.h> #endif @@ -89,6 +91,7 @@ static void setnatproto __P((int)); int v; } ipp; union i6addr ip6; + uuid_t uuid; }; %token <num> YY_NUMBER YY_HEX @@ -97,6 +100,7 @@ static void setnatproto __P((int)); %token YY_CMP_EQ YY_CMP_NE YY_CMP_LE YY_CMP_GE YY_CMP_LT YY_CMP_GT %token YY_RANGE_OUT YY_RANGE_IN %token <ip6> YY_IPV6 +%token <uuid> YY_UUID %token IPNY_MAPBLOCK IPNY_RDR IPNY_PORT IPNY_PORTS IPNY_AUTO IPNY_RANGE %token IPNY_MAP IPNY_BIMAP IPNY_FROM IPNY_TO IPNY_MASK IPNY_PORTMAP IPNY_ANY diff --git a/usr/src/cmd/ipf/tools/ippool_y.y b/usr/src/cmd/ipf/tools/ippool_y.y index cca5052bd4..5aadd22206 100644 --- a/usr/src/cmd/ipf/tools/ippool_y.y +++ b/usr/src/cmd/ipf/tools/ippool_y.y @@ -6,6 +6,7 @@ * * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2019 Joyent, Inc. */ #pragma ident "%Z%%M% %I% %E% SMI" @@ -34,6 +35,7 @@ #include <netdb.h> #include <ctype.h> #include <unistd.h> +#include <uuid/uuid.h> #include "ipf.h" #include "netinet/ip_lookup.h" @@ -66,6 +68,7 @@ static int set_ipv6_addr = 0; iphtent_t *ipe; ip_pool_node_t *ipp; union i6addr ip6; + uuid_t uuid; } %token <num> YY_NUMBER YY_HEX @@ -74,6 +77,7 @@ static int set_ipv6_addr = 0; %token YY_CMP_EQ YY_CMP_NE YY_CMP_LE YY_CMP_GE YY_CMP_LT YY_CMP_GT %token YY_RANGE_OUT YY_RANGE_IN %token <ip6> YY_IPV6 +%token <uuid> YY_UUID %token IPT_IPF IPT_NAT IPT_COUNT IPT_AUTH IPT_IN IPT_OUT %token IPT_TABLE IPT_GROUPMAP IPT_HASH diff --git a/usr/src/cmd/ipf/tools/lexer.c b/usr/src/cmd/ipf/tools/lexer.c index 3db3a0888b..b4ee8b3f77 100644 --- a/usr/src/cmd/ipf/tools/lexer.c +++ b/usr/src/cmd/ipf/tools/lexer.c @@ -5,6 +5,7 @@ * * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2019 Joyent, Inc. */ #include <ctype.h> @@ -14,6 +15,7 @@ #endif #include <sys/ioctl.h> #include <syslog.h> +#include <uuid/uuid.h> #ifdef TEST_LEXER # define NO_YACC union { @@ -21,6 +23,7 @@ union { char *str; struct in_addr ipa; i6addr_t ip6; + uuid_t uuid; } yylval; #endif #include "lexer.h" @@ -455,6 +458,40 @@ nextchar: } #endif + /* + * UUID: e.g., "2426e38c-9f63-c0b8-cfd5-9aaeaf992d42" or its uppercase + * variant. + */ + if (isbuilding == 0 && (ishex(c) || c == '-')) { + char uuidbuf[UUID_PRINTABLE_STRING_LENGTH], *s, oc; + int start; + + start = yypos; + s = uuidbuf; + oc = c; + + /* + * Don't worry about exact position of hexdigits and hyphens + * because uuid_parse() will provide the sanity check. + */ + do { + *s++ = c; + c = yygetc(1); + } while ((ishex(c) || c == '-') && + (s - uuidbuf < sizeof (uuidbuf))); + yyunputc(c); + *s = '\0'; + + if (uuid_parse(uuidbuf, yylval.uuid) == 0) { + rval = YY_UUID; + yyexpectaddr = 0; + goto done; + } + yypos = start; + c = oc; + } + + if (c == ':') { if (isbuilding == 1) { yyunputc(c); diff --git a/usr/src/cmd/ipf/tools/lexer.h b/usr/src/cmd/ipf/tools/lexer.h index a296cb0bc3..448b3e6ffd 100644 --- a/usr/src/cmd/ipf/tools/lexer.h +++ b/usr/src/cmd/ipf/tools/lexer.h @@ -1,4 +1,6 @@ - +/* + * Copyright 2019 Joyent, Inc. + */ typedef struct wordtab { char *w_word; int w_value; @@ -16,6 +18,7 @@ typedef struct wordtab { #define YY_IPV6 1008 #define YY_STR 1009 #define YY_IPADDR 1010 +#define YY_UUID 1011 #endif #define YYBUFSIZ 8192 diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 32e82de896..43919b6e3a 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -1804,8 +1804,8 @@ USB_IA_OBJS += usb_ia.o SCSA2USB_OBJS += scsa2usb.o usb_ms_bulkonly.o usb_ms_cbi.o -IPF_OBJS += ip_fil_solaris.o fil.o solaris.o ip_state.o ip_frag.o ip_nat.o \ - ip_proxy.o ip_auth.o ip_pool.o ip_htable.o ip_lookup.o \ +IPF_OBJS += cfw.o ip_fil_solaris.o fil.o solaris.o ip_state.o ip_frag.o \ + ip_nat.o ip_proxy.o ip_auth.o ip_pool.o ip_htable.o ip_lookup.o \ ip_log.o misc.o ip_compat.o ip_nat6.o drand48.o IPD_OBJS += ipd.o diff --git a/usr/src/uts/common/inet/ipf/cfw.c b/usr/src/uts/common/inet/ipf/cfw.c new file mode 100644 index 0000000000..941aeac328 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/cfw.c @@ -0,0 +1,659 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019, Joyent, Inc. + */ + +/* IPF oddness for compilation in userland for IPF tests. */ +#if defined(KERNEL) || defined(_KERNEL) +#undef KERNEL +#undef _KERNEL +#define KERNEL 1 +#define _KERNEL 1 +#endif + +#include <sys/errno.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/time.h> +#include <sys/socket.h> +#include <net/if.h> +#include <net/route.h> +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/ip_var.h> +#include <netinet/tcp.h> +#include "netinet/ip_compat.h" +#ifdef USE_INET6 +#include <netinet/icmp6.h> +#endif +#include <netinet/tcpip.h> +#include "netinet/ip_fil.h" +#include "netinet/ip_nat.h" +#include "netinet/ip_frag.h" +#include "netinet/ip_state.h" +#include "netinet/ip_proxy.h" +#include "netinet/ip_auth.h" +#include "netinet/ipf_stack.h" +#ifdef IPFILTER_SCAN +#include "netinet/ip_scan.h" +#endif +#ifdef IPFILTER_SYNC +#include "netinet/ip_sync.h" +#endif +#include "netinet/ip_pool.h" +#include "netinet/ip_htable.h" +#ifdef IPFILTER_COMPILED +#include "netinet/ip_rules.h" +#endif +#if defined(_KERNEL) +#include <sys/sunddi.h> +#endif + +#include "netinet/ipf_cfw.h" +#include <sys/file.h> +#include <sys/uio.h> +#include <sys/cred.h> +#include <sys/ddi.h> + +/* + * cfw == Cloud Firewall ==> routines for a global-zone data collector about + * ipf events for SmartOS. The only ones that CFW cares about are ones + * enforced by global-zone-controlled rulesets. + * + * The variable below is tied into the GZ-only ipf device /dev/ipfev, that + * flips this on when there is an open instance. This feature will also + * consume an fr_flag to have per-rule granularity. + */ +boolean_t ipf_cfwlog_enabled; + +/* + * Because ipf's test tools in $SRC/cmd insert all of these files, we need to + * stub out what we can vs. drag in even more headers and who knows what else. + */ +#ifdef _KERNEL + +/* + * CFW event ring buffer. Remember, this is for ALL ZONES because only a + * global-zone event-reader will be consuming these. In other words, it's + * not something to instantiate per-netstack. + * + * We may want to get more sophisticated and performant (e.g. per-processor), + * but for now keep the ring buffer simple and stupid. + * Must be a power of 2, to be bitmaskable, and must be countable by a uint_t + * + * Resizeable, see ipf_cfw_ring_resize() below. + */ +#define IPF_CFW_DEFAULT_RING_BUFS 1024 +#define IPF_CFW_MIN_RING_BUFS 8 +#define IPF_CFW_MAX_RING_BUFS (1U << 31U) + +/* Assume C's init-to-zero is sufficient for these types... */ +static kmutex_t cfw_ringlock; +static kcondvar_t cfw_ringcv; + +static cfwev_t *cfw_ring; /* NULL by default. */ +static uint32_t cfw_ringsize; /* 0 by default, number of array elements. */ +static uint32_t cfw_ringmask; /* 0 by default. */ + +/* If these are equal, we're either empty or full. */ +static uint_t cfw_ringstart, cfw_ringend; +static boolean_t cfw_ringfull; /* Tell the difference here! */ +/* Bean-counters. */ +static uint64_t cfw_evreports; +static uint64_t cfw_evdrops; + +/* + * Place an event in the CFW event ring buffer. + * + * For now, be simple and drop the oldest event if we overflow. We may wish to + * selectively drop older events based on type in the future. + */ +static void +ipf_cfwev_report(cfwev_t *event) +{ + mutex_enter(&cfw_ringlock); + cfw_ring[cfw_ringend] = *event; + cfw_ringend++; + cfw_ringend &= cfw_ringmask; + if (cfw_ringfull) { + cfw_ringstart++; + cfw_ringstart &= cfw_ringmask; + ASSERT3U(cfw_ringstart, ==, cfw_ringend); + DTRACE_PROBE(ipf__cfw__evdrop); + cfw_evdrops++; + } else { + cfw_ringfull = (cfw_ringend == cfw_ringstart); + } + cfw_evreports++; + cv_broadcast(&cfw_ringcv); + mutex_exit(&cfw_ringlock); +} + +/* + * Provide access to multiple CFW events that can allow copying straight from + * the ring buffer up to userland. Requires a callback (which could call + * uiomove() directly, OR to a local still-in-kernel buffer) that must do the + * data copying-out. + * + * Callback function is of the form: + * + * uint_t cfw_many_cb(cfwev_t *evptr, int num_avail, void *cbarg); + * + * The function must return how many events got consumed, which MUST be <= the + * number available. The function must ALSO UNDERSTAND that cfw_ringlock is + * held and must not be released during this time. The function may be called + * more than once, if the available buffers wrap-around OR "block" is set and + * we don't have enough buffers. If any callback returns 0, exit the function + * with however many were consumed. + * + * This function, like the callback, returns the number of events *CONSUMED*. + * + * . . . + * + * Tunables for ipf_cfwev_consume_many(). + * + * If you wish to attempt to coalesce reads (to reduce the likelihood of one + * event at a time during high load) change the number of tries below to + * something not 0. Early experiments set this to 10. + * + * The wait between tries is in usecs in cfw_timeout_wait. The pessimal + * case for this is a timeout_wait-spaced trickle of one event at a time. + */ +uint_t cfw_timeout_tries = 0; +uint_t cfw_timeout_wait = 10000; /* 10ms wait. */ + +typedef struct uio_error_s { + struct uio *ue_uio; + int ue_error; +} uio_error_t; + +static uint_t +ipf_cfwev_consume_many(uint_t num_requested, boolean_t block, + cfwmanycb_t cfw_many_cb, void *cbarg) +{ + uint_t consumed = 0, cb_consumed, contig_size; + uint_t timeout_tries = cfw_timeout_tries; + boolean_t eintr = B_FALSE; + + mutex_enter(&cfw_ringlock); + + while (num_requested > 0) { + clock_t delta; + + /* Silly reality checks */ + ASSERT3U(cfw_ringstart, <, cfw_ringsize); + ASSERT3U(cfw_ringend, <, cfw_ringsize); + + if (cfw_ringstart > cfw_ringend || cfw_ringfull) { + /* We have from ringstart to the buffer's end. */ + contig_size = cfw_ringsize - cfw_ringstart; + } else if (cfw_ringstart < cfw_ringend) { + /* We have no potential wrapping at this time. */ + contig_size = cfw_ringend - cfw_ringstart; + } else if (block && cv_wait_sig(&cfw_ringcv, &cfw_ringlock)) { + /* Maybe something to consume now, try again. */ + continue; + } else { + /* Nothing (more) to consume, return! */ + eintr = (block && consumed == 0); + break; + } + + /* Less asked-for than what we needed. */ + if (num_requested < contig_size) + contig_size = num_requested; + + cb_consumed = + cfw_many_cb(&(cfw_ring[cfw_ringstart]), contig_size, cbarg); + ASSERT3U(cb_consumed, <=, contig_size); + + cfw_ringstart += cb_consumed; + ASSERT3U(cfw_ringstart, <=, cfw_ringmask + 1); + cfw_ringstart &= cfw_ringmask; /* In case of wraparound. */ + consumed += cb_consumed; + cfw_ringfull = (cfw_ringfull && cb_consumed == 0); + if (cb_consumed < contig_size) { + /* + * Callback returned less than given. + * This is likely a uio error, but we have + * something. Get out of here. + */ + break; + } + ASSERT3U(cb_consumed, ==, contig_size); + num_requested -= contig_size; + + if (num_requested == 0) { + /* All done! */ + break; + } + + if (cfw_ringstart != cfw_ringend) { + /* + * We wrapped around the end of the buffer, and + * we have more available to fill our request. + */ + ASSERT0(cfw_ringstart); + ASSERT(!cfw_ringfull); + continue; + } + + /* + * We obtained some of the events we requested, but not all. + * Since we have nothing to consume, wait *a little* longer. + */ + if (timeout_tries == 0) + break; /* Don't bother... */ + delta = drv_usectohz(cfw_timeout_wait); + timeout_tries--; + + switch (cv_reltimedwait_sig(&cfw_ringcv, &cfw_ringlock, delta, + TR_CLOCK_TICK)) { + case 0: + /* + * Received signal! Return what we have OR if we have + * nothing, EINTR. + */ + DTRACE_PROBE1(ipf__cfw__timedsignal, int, consumed); + eintr = (consumed == 0); + num_requested = 0; + break; + case -1: + /* Time reached! Bail with what we got. */ + DTRACE_PROBE(ipf__cfw__timedexpired); + num_requested = 0; + break; + default: + /* Aha! We've got more! */ + DTRACE_PROBE(ipf__cfw__moredata); + break; + } + } + + mutex_exit(&cfw_ringlock); + if (eintr) + ((uio_error_t *)cbarg)->ue_error = EINTR; + return (consumed); +} + +/* + * SmartOS likes using the zone's debug id. Make sure we squirrel that away in + * the ipf netstack instance if it's not there. + */ +static inline zoneid_t +ifs_to_did(ipf_stack_t *ifs) +{ + if (ifs->ifs_zone_did == 0) { + zone_t *zone; + + /* + * We can't get the zone_did at initialization time because + * most zone data isn't readily available then, cement the did + * in place now. + */ + VERIFY3U(ifs->ifs_zone, !=, GLOBAL_ZONEID); + zone = zone_find_by_id(ifs->ifs_zone); + if (zone != NULL) { + ifs->ifs_zone_did = zone->zone_did; + zone_rele(zone); + } + /* Else we are either in shutdown or something weirder. */ + } + return (ifs->ifs_zone_did); +} + +/* + * ipf_block_cfwlog() + * + * Called by fr_check(). Record drop events for the global-zone data + * collector. Use rest-of-ipf-style names for the parameters. + */ +void +ipf_block_cfwlog(frentry_t *fr, fr_info_t *fin, ipf_stack_t *ifs) +{ + cfwev_t event = {0}; + + /* + * We need a rule. + * Capture failure by using dtrace on this function's entry. + * 'ipf_block_cfwlog:entry /arg0 == NULL/ { printf("GOTCHA!\n"); }' + */ + if (fr == NULL) + return; + + event.cfwev_type = CFWEV_BLOCK; + event.cfwev_length = sizeof (event); + /* + * IPF code elsewhere does the cheesy single-flag check, even though + * there are two flags in a rule (one for in, one for out). + */ + event.cfwev_direction = (fr->fr_flags & FR_INQUE) ? + CFWDIR_IN : CFWDIR_OUT; + + event.cfwev_protocol = fin->fin_p; + /* + * NOTE: fin_*port is in host/native order, and ICMP info is here too. + */ + event.cfwev_sport = htons(fin->fin_sport); + event.cfwev_dport = htons(fin->fin_dport); + + switch (fin->fin_v) { + case IPV4_VERSION: + IN6_INADDR_TO_V4MAPPED(&fin->fin_src, &event.cfwev_saddr); + IN6_INADDR_TO_V4MAPPED(&fin->fin_dst, &event.cfwev_daddr); + break; + case IPV6_VERSION: + event.cfwev_saddr = fin->fin_src6.in6; + event.cfwev_daddr = fin->fin_dst6.in6; + break; + default: + /* We should never reach here, but mark it if we do. */ + DTRACE_PROBE1(ipf__cfw__frinfo__badipversion, frinfo_t *, fin); + return; + } + + /* + * uniqtime() is what ipf's GETKTIME() uses. + * If cfwev_tstamp needs to be sourced from elsewhere, fix that here. + */ + uniqtime(&event.cfwev_tstamp); + event.cfwev_zonedid = ifs_to_did(ifs); + event.cfwev_ruleid = fin->fin_rule; + memcpy(event.cfwev_ruleuuid, fr->fr_uuid, sizeof (uuid_t)); + + ipf_cfwev_report(&event); +} + +/* + * ipf_log_cfwlog() + * + * Twin of ipstate_log(), but records state events for the global-zone data + * collector. + */ +void +ipf_log_cfwlog(struct ipstate *is, uint_t type, ipf_stack_t *ifs) +{ + cfwev_t event = {0}; + + switch (type) { + case ISL_NEW: + case ISL_CLONE: + event.cfwev_type = CFWEV_BEGIN; + break; + case ISL_EXPIRE: + case ISL_FLUSH: + case ISL_REMOVE: + case ISL_KILLED: + case ISL_ORPHAN: + /* + * We don't care about session disappearances in CFW logging + * for now. (Possible future: CFWEV_END) + */ + return; + default: + event.cfwev_type = CFWEV_BLOCK; + break; + } + + /* + * IPF code elsewhere does the cheesy single-flag check, even though + * there are two flags in a rule (one for in, one for out). Follow + * suit here. + */ + event.cfwev_length = sizeof (event); + ASSERT(is->is_rule != NULL); + event.cfwev_direction = (is->is_rule->fr_flags & FR_INQUE) ? + CFWDIR_IN : CFWDIR_OUT; + event.cfwev_protocol = is->is_p; + switch (is->is_p) { + case IPPROTO_TCP: + case IPPROTO_UDP: + /* NOTE: is_*port is in network order. */ + event.cfwev_sport = is->is_sport; + event.cfwev_dport = is->is_dport; + break; + case IPPROTO_ICMP: + case IPPROTO_ICMPV6: + /* Scribble the ICMP type in sport... */ + event.cfwev_sport = is->is_icmp.ici_type; + break; + /* Other protocols leave the event's port fields empty. */ + } + + switch(is->is_v) { + case IPV4_VERSION: + IN6_INADDR_TO_V4MAPPED(&is->is_src.in4, &event.cfwev_saddr); + IN6_INADDR_TO_V4MAPPED(&is->is_dst.in4, &event.cfwev_daddr); + break; + case IPV6_VERSION: + event.cfwev_saddr = is->is_src.in6; + event.cfwev_daddr = is->is_dst.in6; + break; + default: + /* Can't parse addresses if we don't know the version. Drop. */ + DTRACE_PROBE1(ipf__cfw__ipstate__badipversion, + struct ipstate *, is); + return; + } + + /* + * uniqtime() is what ipf's GETKTIME() uses. + * If cfwev_tstamp needs to be sourced from elsewhere, fix that here. + */ + uniqtime(&event.cfwev_tstamp); + event.cfwev_zonedid = ifs_to_did(ifs); + event.cfwev_ruleid = is->is_rulen; + memcpy(event.cfwev_ruleuuid, is->is_uuid, sizeof (uuid_t)); + + ipf_cfwev_report(&event); +} + +/* + * Callback routine we use for ipf_cfwev_consume_many(). + * Returning 0 means error indication. + */ +static uint_t +cfwlog_read_manycb(cfwev_t *evptr, uint_t num_avail, void *cbarg) +{ + uio_error_t *ue = (uio_error_t *)cbarg; + + ASSERT(MUTEX_HELD(&cfw_ringlock)); + + if (ue->ue_error != 0) + return (0); + + ue->ue_error = uiomove((caddr_t)evptr, num_avail * sizeof (*evptr), + UIO_READ, ue->ue_uio); + if (ue->ue_error != 0) + return (0); + + return (num_avail); +} + +/* + * Resize the CFW event ring buffer. + * + * The caller must ensure the new size is a power of 2 between + * IPF_CFW_{MIN,MAX}_RING_BUFS (inclusive) or the special values + * IPF_CFW_RING_ALLOCATE (first-time creation) or IPF_CFW_RING_DESTROY + * (netstack-unload destruction). + * + * Everything in the current ring will be destroyed (and reported as a drop) + * upon resize. + */ +int +ipf_cfw_ring_resize(uint32_t newsize) +{ + ASSERT(MUTEX_HELD(&cfw_ringlock) || newsize == IPF_CFW_RING_ALLOCATE || + newsize == IPF_CFW_RING_DESTROY); + + if (newsize == IPF_CFW_RING_ALLOCATE) { + if (cfw_ring != NULL) + return (EBUSY); + newsize = IPF_CFW_DEFAULT_RING_BUFS; + /* Fall through to allocating a new ring buffer. */ + } else { + /* We may be called during error cleanup, so be liberal here. */ + if ((cfw_ring == NULL && newsize == IPF_CFW_RING_DESTROY) || + newsize == cfw_ringsize) { + return (0); + } + kmem_free(cfw_ring, cfw_ringsize * sizeof (cfwev_t)); + cfw_ring = NULL; + if (cfw_ringfull) { + cfw_evdrops += cfw_ringsize; + } else if (cfw_ringstart > cfw_ringend) { + cfw_evdrops += cfw_ringend + + (cfw_ringsize - cfw_ringstart); + } else { + cfw_evdrops += cfw_ringend - cfw_ringstart; + } + cfw_ringsize = cfw_ringmask = cfw_ringstart = cfw_ringend = 0; + cfw_ringfull = B_FALSE; + + if (newsize == IPF_CFW_RING_DESTROY) + return (0); + /* + * Keep the reports & drops around because if we're just + * resizing, we need to know what we lost. + */ + } + + ASSERT(ISP2(newsize)); + cfw_ring = kmem_alloc(newsize * sizeof (cfwev_t), KM_SLEEP); + /* KM_SLEEP means we always succeed. */ + cfw_ringsize = newsize; + cfw_ringmask = cfw_ringsize - 1; + + return (0); +} + +/* + * ioctl handler for /dev/ipfev. Only supports SIOCIPFCFWCFG (get data + * collector statistics and configuration), and SIOCIPFCFWNEWSZ (resize the + * event ring buffer). + */ +/* ARGSUSED */ +int +ipf_cfwlog_ioctl(dev_t dev, int cmd, intptr_t data, int mode, cred_t *cp, + int *rp) +{ + ipfcfwcfg_t cfginfo; + int error; + + if (cmd != SIOCIPFCFWCFG && cmd != SIOCIPFCFWNEWSZ) + return (EIO); + + if (crgetzoneid(cp) != GLOBAL_ZONEID) + return (EACCES); + + error = COPYIN((caddr_t)data, (caddr_t)&cfginfo, sizeof (cfginfo)); + if (error != 0) + return (EFAULT); + + cfginfo.ipfcfwc_maxevsize = sizeof (cfwev_t); + mutex_enter(&cfw_ringlock); + cfginfo.ipfcfwc_evreports = cfw_evreports; + if (cmd == SIOCIPFCFWNEWSZ) { + uint32_t newsize = cfginfo.ipfcfwc_evringsize; + + /* Do ioctl parameter checking here, then call the resizer. */ + if (newsize < IPF_CFW_MIN_RING_BUFS || + newsize > IPF_CFW_MAX_RING_BUFS || !ISP2(newsize)) { + error = EINVAL; + } else { + error = ipf_cfw_ring_resize(cfginfo.ipfcfwc_evringsize); + } + } else { + error = 0; + } + /* Both cfw_evdrops and cfw_ringsize are affected by resize. */ + cfginfo.ipfcfwc_evdrops = cfw_evdrops; + cfginfo.ipfcfwc_evringsize = cfw_ringsize; + mutex_exit(&cfw_ringlock); + + if (error != 0) + return (error); + + error = COPYOUT((caddr_t)&cfginfo, (caddr_t)data, sizeof (cfginfo)); + if (error != 0) + return (EFAULT); + + return (0); +} + +/* + * Send events up via /dev/ipfev reads. Will return only complete events. + */ +/* ARGSUSED */ +int +ipf_cfwlog_read(dev_t dev, struct uio *uio, cred_t *cp) +{ + uint_t requested, consumed; + uio_error_t ue = {uio, 0}; + boolean_t block; + + if (uio->uio_resid == 0) + return (0); + if (uio->uio_resid < sizeof (cfwev_t)) + return (EINVAL); + + block = ((uio->uio_fmode & (FNDELAY | FNONBLOCK)) == 0); + requested = uio->uio_resid / sizeof (cfwev_t); + + /* + * As stated earlier, ipf_cfwev_consume_many() takes a callback. + * The callback may be called multiple times before we return. + * The callback will execute uiomove(). + */ + consumed = ipf_cfwev_consume_many(requested, block, cfwlog_read_manycb, + &ue); + ASSERT3U(consumed, <=, requested); + if (!block && consumed == 0 && ue.ue_error == 0) { + /* No data available. */ + ue.ue_error = EWOULDBLOCK; + } else if (ue.ue_error != 0 && ue.ue_error != EINTR) { + /* + * We had a problem that wasn't simply a + * case of cv_wait_sig() receiving a signal. + */ + DTRACE_PROBE1(ipf__cfw__uiodiscard, int, consumed); + mutex_enter(&cfw_ringlock); + cfw_evdrops += consumed; + mutex_exit(&cfw_ringlock); + } + return (ue.ue_error); +} + +#else /* _KERNEL */ + +/* Blank stubs to satisfy userland's test compilations. */ + +int +ipf_cfw_ring_resize(uint32_t a) +{ + return (0); +} + +void +ipf_log_cfwlog(struct ipstate *a, uint_t b, ipf_stack_t *c) +{ +} + +void +ipf_block_cfwlog(frentry_t *a, fr_info_t *b, ipf_stack_t *c) +{ +} + +#endif /* _KERNEL */ diff --git a/usr/src/uts/common/inet/ipf/fil.c b/usr/src/uts/common/inet/ipf/fil.c index 78980be106..48fa6e7325 100644 --- a/usr/src/uts/common/inet/ipf/fil.c +++ b/usr/src/uts/common/inet/ipf/fil.c @@ -5,7 +5,7 @@ * * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * - * Copyright (c) 2014, Joyent, Inc. All rights reserved. + * Copyright 2019 Joyent, Inc. */ #if defined(KERNEL) || defined(_KERNEL) @@ -2588,6 +2588,9 @@ ipf_stack_t *ifs; } #endif + if (IFS_CFWLOG(ifs, fr) && FR_ISBLOCK(pass)) + ipf_block_cfwlog(fr, fin, ifs); + /* * The FI_STATE flag is cleared here so that calling fr_checkstate * will work when called from inside of fr_fastroute. Although diff --git a/usr/src/uts/common/inet/ipf/ip_fil_solaris.c b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c index 4cb67a2dab..6aba0a7562 100644 --- a/usr/src/uts/common/inet/ipf/ip_fil_solaris.c +++ b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c @@ -5,7 +5,7 @@ * * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * - * Copyright 2018 Joyent, Inc. + * Copyright 2019 Joyent, Inc. */ #if !defined(lint) @@ -907,6 +907,9 @@ int *rp; return ENXIO; unit = isp->ipfs_minor; + if (unit == IPL_LOGEV) + return (ipf_cfwlog_ioctl(dev, cmd, data, mode, cp, rp)); + zid = crgetzoneid(cp); if (cmd == SIOCIPFZONESET) { if (zid == GLOBAL_ZONEID) @@ -1247,11 +1250,35 @@ cred_t *cred; if (IPL_LOGMAX < min) return ENXIO; + /* Special-case ipfev: global-zone-open only. */ + if (min == IPL_LOGEV) { + if (crgetzoneid(cred) != GLOBAL_ZONEID) + return (ENXIO); + /* + * Else enable the CFW logging of events. + * NOTE: For now, we only allow one open at a time. + * Use atomic_cas to confirm/deny. And also for now, + * assume sizeof (boolean_t) == sizeof (uint_t). + * + * Per the *_{refrele,REFRELE}() in other parts of inet, + * ensure all loads/stores complete before calling cas. + * membar_exit() does this. + */ + membar_exit(); + if (atomic_cas_uint(&ipf_cfwlog_enabled, 0, 1) != 0) + return (EBUSY); + } + minor = (minor_t)(uintptr_t)vmem_alloc(ipf_minor, 1, VM_BESTFIT | VM_SLEEP); if (ddi_soft_state_zalloc(ipf_state, minor) != 0) { vmem_free(ipf_minor, (void *)(uintptr_t)minor, 1); + if (min == IPL_LOGEV) { + /* See above... */ + membar_exit(); + VERIFY(atomic_cas_uint(&ipf_cfwlog_enabled, 1, 0) == 1); + } return ENXIO; } @@ -1273,6 +1300,7 @@ int flags, otype; cred_t *cred; { minor_t min = getminor(dev); + ipf_devstate_t *isp; #ifdef IPFDEBUG cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred); @@ -1281,6 +1309,15 @@ cred_t *cred; if (IPL_LOGMAX < min) return ENXIO; + isp = ddi_get_soft_state(ipf_state, min); + if (isp != NULL && isp->ipfs_minor == IPL_LOGEV) { + /* + * Disable CFW logging. See iplopen() for details. + */ + membar_exit(); + VERIFY(atomic_cas_uint(&ipf_cfwlog_enabled, 1, 0) == 1); + } + ddi_soft_state_free(ipf_state, min); vmem_free(ipf_minor, (void *)(uintptr_t)min, 1); @@ -1311,6 +1348,9 @@ cred_t *cp; return ENXIO; unit = isp->ipfs_minor; + if (unit == IPL_LOGEV) + return (ipf_cfwlog_read(dev, uio, cp)); + /* * ipf_find_stack returns with a read lock on ifs_ipf_global */ @@ -1362,6 +1402,9 @@ cred_t *cp; return ENXIO; unit = isp->ipfs_minor; + if (unit == IPL_LOGEV) + return (EIO); /* ipfev doesn't support write yet. */ + /* * ipf_find_stack returns with a read lock on ifs_ipf_global */ diff --git a/usr/src/uts/common/inet/ipf/ip_log.c b/usr/src/uts/common/inet/ipf/ip_log.c index 584ee42d9a..7f5a1c839d 100644 --- a/usr/src/uts/common/inet/ipf/ip_log.c +++ b/usr/src/uts/common/inet/ipf/ip_log.c @@ -8,7 +8,7 @@ * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2014, Joyent, Inc. All rights reserved. + * Copyright 2019 Joyent, Inc. */ #include <sys/param.h> @@ -380,6 +380,7 @@ u_int flags; if (fin->fin_nattag != NULL) bcopy(fin->fin_nattag, (void *)&ipfl.fl_nattag, sizeof(ipfl.fl_nattag)); + bcopy(fin->fin_fr->fr_uuid, ipfl.fl_uuid, sizeof (ipfl.fl_uuid)); ipfl.fl_flags = flags; ipfl.fl_dir = fin->fin_out; ipfl.fl_lflags = fin->fin_flx; diff --git a/usr/src/uts/common/inet/ipf/ip_state.c b/usr/src/uts/common/inet/ipf/ip_state.c index 184f8775b6..a45bcbfdaf 100644 --- a/usr/src/uts/common/inet/ipf/ip_state.c +++ b/usr/src/uts/common/inet/ipf/ip_state.c @@ -5,7 +5,7 @@ * * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * - * Copyright (c) 2014, Joyent, Inc. All rights reserved. + * Copyright 2019 Joyent, Inc. */ #if defined(KERNEL) || defined(_KERNEL) @@ -108,6 +108,7 @@ struct file; # include <sys/systm.h> # endif #endif +#include <sys/uuid.h> /* END OF INCLUDES */ @@ -1445,6 +1446,7 @@ u_int flags; is->is_sti.tqe_flags |= TQE_RULEBASED; } is->is_tag = fr->fr_logtag; + memcpy(is->is_uuid, fr->fr_uuid, sizeof (uuid_t)); is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1]; is->is_ifp[(1 - out) << 1] = fr->fr_ifas[2]; @@ -1524,6 +1526,9 @@ u_int flags; if (ifs->ifs_ipstate_logging) ipstate_log(is, ISL_NEW, ifs); + if (IFS_CFWLOG(ifs, is->is_rule)) + ipf_log_cfwlog(is, ISL_NEW, ifs); + RWLOCK_EXIT(&ifs->ifs_ipf_state); fin->fin_rev = IP6_NEQ(&is->is_dst, &fin->fin_daddr); fin->fin_flx |= FI_STATE; @@ -2314,6 +2319,8 @@ u_32_t cmask; is->is_flags &= ~(SI_W_SPORT|SI_W_DPORT); if ((flags & SI_CLONED) && ifs->ifs_ipstate_logging) ipstate_log(is, ISL_CLONE, ifs); + if ((flags & SI_CLONED) && IFS_CFWLOG(ifs, is->is_rule)) + ipf_log_cfwlog(is, ISL_CLONE, ifs); } ret = -1; @@ -3397,6 +3404,15 @@ ipf_stack_t *ifs; if (ifs->ifs_ipstate_logging != 0 && why != 0) ipstate_log(is, why, ifs); + /* + * For now, ipf_log_cfwlog() copes with all "why" values. Strictly + * speaking, though, they all map to one event (CFWEV_END), which for + * now is not supported, hence no code calling ipf_log_cfwlog() like + * below: + * + * if (why != 0 && IFS_CFWLOG(ifs, is->is_rule)) + * ipf_log_cfwlog(is, why, ifs); + */ if (is->is_rule != NULL) { is->is_rule->fr_statecnt--; @@ -3931,7 +3947,6 @@ int flags; return rval; } - /* ------------------------------------------------------------------------ */ /* Function: ipstate_log */ /* Returns: Nil */ diff --git a/usr/src/uts/common/inet/ipf/netinet/Makefile b/usr/src/uts/common/inet/ipf/netinet/Makefile index cca3b48ac4..88f91e633f 100644 --- a/usr/src/uts/common/inet/ipf/netinet/Makefile +++ b/usr/src/uts/common/inet/ipf/netinet/Makefile @@ -1,16 +1,15 @@ # -#ident "%Z%%M% %I% %E% SMI" -# # Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. +# Copyright 2019 Joyent, Inc. # # uts/common/inet/ipf/netinet/Makefile # # include global definitions include ../../../../../Makefile.master -HDRS= ipl.h ip_compat.h ip_fil.h ip_nat.h ip_proxy.h ip_state.h \ - ip_frag.h ip_auth.h ip_lookup.h ip_pool.h ip_htable.h ipf_stack.h +HDRS= ipl.h ip_compat.h ip_fil.h ip_nat.h ip_proxy.h ip_state.h ip_frag.h \ + ip_auth.h ip_lookup.h ip_pool.h ip_htable.h ipf_stack.h ipf_cfw.h ROOTDIRS= $(ROOT)/usr/include/netinet diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_fil.h b/usr/src/uts/common/inet/ipf/netinet/ip_fil.h index 90fc701ae1..cb7c7be36b 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ip_fil.h +++ b/usr/src/uts/common/inet/ipf/netinet/ip_fil.h @@ -8,7 +8,7 @@ * * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * - * Copyright (c) 2014, Joyent, Inc. All rights reserved. + * Copyright 2019, Joyent, Inc. */ #ifndef __IP_FIL_H__ @@ -16,6 +16,7 @@ #include "netinet/ip_compat.h" #include <sys/zone.h> +#include <sys/uuid.h> #ifdef SOLARIS #undef SOLARIS @@ -115,6 +116,8 @@ #define SIOCDELFR SIOCRMAFR #define SIOCINSFR SIOCINAFR # define SIOCIPFZONESET _IOWR('r', 97, struct ipfzoneobj) +# define SIOCIPFCFWCFG _IOR('r', 98, struct ipfcfwcfg) +# define SIOCIPFCFWNEWSZ _IOWR('r', 99, struct ipfcfwcfg) /* * What type of table is getting flushed? @@ -599,6 +602,7 @@ typedef struct frentry { u_32_t fr_flags; /* per-rule flags && options (see below) */ u_32_t fr_logtag; /* user defined log tag # */ u_32_t fr_collect; /* collection number */ + uuid_t fr_uuid; /* user defined uuid */ u_int fr_arg; /* misc. numeric arg for rule */ u_int fr_loglevel; /* syslog log facility + priority */ u_int fr_age[2]; /* non-TCP timeouts */ @@ -727,6 +731,7 @@ typedef struct frentry { #define FR_NEWISN 0x400000 /* new ISN for outgoing TCP */ #define FR_NOICMPERR 0x800000 /* do not match ICMP errors in state */ #define FR_STATESYNC 0x1000000 /* synchronize state to slave */ +#define FR_CFWLOG 0x2000000 /* Global CFW logging enabled */ #define FR_NOMATCH 0x8000000 /* no match occured */ /* 0x10000000 FF_LOGPASS */ /* 0x20000000 FF_LOGBLOCK */ @@ -882,6 +887,7 @@ typedef struct ipflog { u_32_t fl_lflags; u_32_t fl_logtag; ipftag_t fl_nattag; + uuid_t fl_uuid; u_short fl_plen; /* extra data after hlen */ u_short fl_loglevel; /* syslog log level */ char fl_group[FR_GROUPLEN]; @@ -930,6 +936,7 @@ typedef struct ipflog { #define IPSYNC_NAME "/dev/ipsync" #define IPSCAN_NAME "/dev/ipscan" #define IPLOOKUP_NAME "/dev/iplookup" +#define IPFEV_NAME "/dev/ipfev" #define IPL_LOGIPF 0 /* Minor device #'s for accessing logs */ #define IPL_LOGNAT 1 @@ -938,8 +945,9 @@ typedef struct ipflog { #define IPL_LOGSYNC 4 #define IPL_LOGSCAN 5 #define IPL_LOGLOOKUP 6 -#define IPL_LOGCOUNT 7 -#define IPL_LOGMAX 7 +#define IPL_LOGEV 7 +#define IPL_LOGCOUNT 8 +#define IPL_LOGMAX 8 #define IPL_LOGSIZE (IPL_LOGMAX + 1) #define IPL_LOGALL -1 #define IPL_LOGNONE -2 @@ -1180,6 +1188,21 @@ typedef struct ipfzoneobj { char ipfz_zonename[ZONENAME_MAX]; /* zone to act on */ } ipfzoneobj_t; +/* ioctl to grab CFW logging parameters */ +typedef struct ipfcfwcfg { + /* CFG => Max event size, NEWSZ => ignored in, like CFG out. */ + uint32_t ipfcfwc_maxevsize; + /* + * CFG => Current ring size, + * NEWSZ => New ring size, must be 2^N for 3 <= N <= 31. + */ + uint32_t ipfcfwc_evringsize; + /* CFG => Number of event reports, NEWSZ => ignored in, like CFG out. */ + uint64_t ipfcfwc_evreports; + /* CFG => Number of event drops, NEWSZ => ignored in, like CFG out. */ + uint64_t ipfcfwc_evdrops; +} ipfcfwcfg_t; + #if defined(_KERNEL) /* Set ipfs_zoneid to this if no zone has been set: */ #define IPFS_ZONE_UNSET -2 @@ -1559,6 +1582,23 @@ extern int ipllog __P((int, fr_info_t *, void **, size_t *, int *, int, ipf_stack_t *)); extern void fr_logunload __P((ipf_stack_t *)); +/* SmartOS single-FD global-zone state accumulator (see cfw.c) */ +extern boolean_t ipf_cfwlog_enabled; +struct ipstate; /* Ugggh. */ +extern void ipf_log_cfwlog __P((struct ipstate *, uint_t, ipf_stack_t *)); +extern void ipf_block_cfwlog __P((frentry_t *, fr_info_t *, ipf_stack_t *)); +#define IFS_CFWLOG(ifs, fr) ((ifs)->ifs_gz_controlled && ipf_cfwlog_enabled &&\ + fr != NULL && ((fr)->fr_flags & FR_CFWLOG)) +struct cfwev_s; /* See ipf_cfw.h */ +extern boolean_t ipf_cfwev_consume __P((struct cfwev_s *, boolean_t)); +/* See cfw.c's ipf_cfwev_consume_many() for details. */ +typedef uint_t (*cfwmanycb_t) __P((struct cfwev_s *, uint_t, void *)); +extern int ipf_cfwlog_read __P((dev_t, struct uio *, struct cred *)); +extern int ipf_cfwlog_ioctl __P((dev_t, int, intptr_t, int, cred_t *, int *)); +#define IPF_CFW_RING_ALLOCATE 0 +#define IPF_CFW_RING_DESTROY 1 +extern int ipf_cfw_ring_resize(uint32_t); + extern frentry_t *fr_acctpkt __P((fr_info_t *, u_32_t *)); extern int fr_copytolog __P((int, char *, int)); extern u_short fr_cksum __P((mb_t *, ip_t *, int, void *)); diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_state.h b/usr/src/uts/common/inet/ipf/netinet/ip_state.h index 4c605c1b89..ef315d5ef1 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ip_state.h +++ b/usr/src/uts/common/inet/ipf/netinet/ip_state.h @@ -8,11 +8,14 @@ * * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2019 Joyent, Inc. */ #ifndef __IP_STATE_H__ #define __IP_STATE_H__ +#include <sys/uuid.h> + #if defined(__STDC__) || defined(__GNUC__) || defined(_AIX51) # define SIOCDELST _IOW('r', 61, struct ipfobj) #else @@ -66,6 +69,7 @@ typedef struct ipstate { /* in both directions */ u_32_t is_optmsk[2]; /* " " mask */ /* in both directions */ + uuid_t is_uuid; u_short is_sec; /* security options set */ u_short is_secmsk; /* " " mask */ u_short is_auth; /* authentication options set */ diff --git a/usr/src/uts/common/inet/ipf/netinet/ipf_cfw.h b/usr/src/uts/common/inet/ipf/netinet/ipf_cfw.h new file mode 100644 index 0000000000..1972d2b3f7 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/netinet/ipf_cfw.h @@ -0,0 +1,69 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019 Joyent, Inc. + */ + +#ifndef __IPF_CFW_H__ +#define __IPF_CFW_H__ + +#include <sys/types.h> +#include <inet/ip6.h> +#include <sys/uuid.h> + +/* Because ipf compiles this kernel file in userland testing... */ +#ifndef ASSERT3U +#define ASSERT3U(a, b, c) ASSERT(a ## b ## c); +#endif /* ASSERT3U */ + +/* + * CFW Event, which is emitted to a global-zone listener. The global-zone + * listener solves the one-fd-per-zone problem of using each zone's ipmon. + * + * These must be 64-bit aligned because they form an array in-kernel. There + * might be reserved fields to ensure that alignment. + */ +#define CFWEV_BLOCK 1 +#define CFWEV_BEGIN 2 +#define CFWEV_END 3 +#define CFWDIR_IN 1 +#define CFWDIR_OUT 2 + +typedef struct cfwev_s { + uint16_t cfwev_type; /* BEGIN, END, BLOCK */ + uint16_t cfwev_length; /* in bytes, so capped to 65535 bytes */ + zoneid_t cfwev_zonedid; /* Pullable from ipf_stack_t. */ + + uint32_t cfwev_ruleid; /* Pullable from fr_info_t. */ + uint16_t cfwev_sport; /* Source port (network order) */ + uint16_t cfwev_dport; /* Dest. port (network order) */ + + uint8_t cfwev_protocol; /* IPPROTO_* */ + /* "direction" informs if src/dst are local/remote or remote/local. */ + uint8_t cfwev_direction; + uint8_t cfwev_reserved[6]; /* Ensures 64-bit alignment. */ + + in6_addr_t cfwev_saddr; /* IPv4 addresses are V4MAPPED. */ + in6_addr_t cfwev_daddr; + + /* + * Because of 'struct timeval' being different between 32-bit and + * 64-bit ABIs, this interface is only usable by 64-bit binaries. + */ + struct timeval cfwev_tstamp; + + uuid_t cfwev_ruleuuid; /* Pullable from fr_info_t. */ +} cfwev_t; + + + +#endif /* __IPF_CFW_H__ */ diff --git a/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h b/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h index 5c156e9c44..0b2a8d826f 100644 --- a/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h +++ b/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h @@ -6,7 +6,7 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * - * Copyright 2018 Joyent, Inc. All rights reserved. + * Copyright 2019 Joyent, Inc. */ #ifndef __IPF_STACK_H__ @@ -46,6 +46,7 @@ struct ipf_stack { struct ipf_stack *ifs_gz_cont_ifs; netid_t ifs_netid; zoneid_t ifs_zone; + zoneid_t ifs_zone_did; boolean_t ifs_gz_controlled; /* ipf module */ @@ -316,6 +317,7 @@ struct ipf_stack { char *ifs_addmask_key; char *ifs_rn_zeros; char *ifs_rn_ones; + #ifdef KERNEL /* kstats for inbound and outbound */ kstat_t *ifs_kstatp[2]; diff --git a/usr/src/uts/common/inet/ipf/solaris.c b/usr/src/uts/common/inet/ipf/solaris.c index 5d56debc31..5ccbfa3188 100644 --- a/usr/src/uts/common/inet/ipf/solaris.c +++ b/usr/src/uts/common/inet/ipf/solaris.c @@ -6,7 +6,7 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2014, Joyent, Inc. All rights reserved. + * Copyright 2019 Joyent, Inc. */ /* @@ -116,7 +116,7 @@ static void ipf_stack_shutdown __P((const netid_t, void *)); static int ipf_property_g_update __P((dev_info_t *)); static char *ipf_devfiles[] = { IPL_NAME, IPNAT_NAME, IPSTATE_NAME, IPAUTH_NAME, IPSYNC_NAME, IPSCAN_NAME, - IPLOOKUP_NAME, NULL }; + IPLOOKUP_NAME, IPFEV_NAME, NULL }; extern void *ipf_state; /* DDI state */ extern vmem_t *ipf_minor; /* minor number arena */ @@ -741,6 +741,9 @@ ddi_attach_cmd_t cmd; ipf_dev_info = dip; + if (ipf_cfw_ring_resize(IPF_CFW_RING_ALLOCATE) != 0) + goto attach_failed; + ipfncb = net_instance_alloc(NETINFO_VERSION); if (ipfncb == NULL) goto attach_failed; @@ -768,6 +771,7 @@ ddi_attach_cmd_t cmd; } attach_failed: + (void) ipf_cfw_ring_resize(IPF_CFW_RING_DESTROY); ddi_remove_minor_node(dip, NULL); ddi_prop_remove_all(dip); ddi_soft_state_fini(&ipf_state); @@ -795,6 +799,7 @@ ddi_detach_cmd_t cmd; * framework guarantees we are not active with this devinfo * node in any other entry points at this time. */ + (void) ipf_cfw_ring_resize(IPF_CFW_RING_DESTROY); ddi_prop_remove_all(dip); i = ddi_get_instance(dip); ddi_remove_minor_node(dip, NULL); diff --git a/usr/src/uts/intel/ipf/ipf.global-objs.debug64 b/usr/src/uts/intel/ipf/ipf.global-objs.debug64 index 5ebc7eed2b..ea5510a78d 100644 --- a/usr/src/uts/intel/ipf/ipf.global-objs.debug64 +++ b/usr/src/uts/intel/ipf/ipf.global-objs.debug64 @@ -22,9 +22,21 @@ # Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# Copyright 2018 Joyent, Inc. All rights reserved +# Copyright 2019 Joyent, Inc. # +cfw_evdrops +cfw_evreports +cfw_ring +cfw_ringcv +cfw_ringend +cfw_ringfull +cfw_ringlock +cfw_ringmask +cfw_ringsize +cfw_ringstart +cfw_timeout_tries +cfw_timeout_wait fr_availfuncs fr_features fr_objbytes @@ -68,6 +80,7 @@ icmptoicmp6unreach idletime_tab ip6exthdr ipf_cb_ops +ipf_cfwlog_enabled ipf_dev_info ipf_devfiles ipf_eth_bcast_addr diff --git a/usr/src/uts/sparc/ipf/ipf.global-objs.debug64 b/usr/src/uts/sparc/ipf/ipf.global-objs.debug64 index cb3e5485b2..b42dca618a 100644 --- a/usr/src/uts/sparc/ipf/ipf.global-objs.debug64 +++ b/usr/src/uts/sparc/ipf/ipf.global-objs.debug64 @@ -22,9 +22,21 @@ # Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# Copyright 2013 Joyent, Inc. All rights reserved +# Copyright 2019 Joyent, Inc. # +cfw_evdrops +cfw_evreports +cfw_ring +cfw_ringcv +cfw_ringend +cfw_ringfull +cfw_ringlock +cfw_ringmask +cfw_ringsize +cfw_ringstart +cfw_timeout_tries +cfw_timeout_wait hook4_vnd_in hook4_vnd_out hook6_vnd_in @@ -60,6 +72,7 @@ icmptoicmp6unreach idletime_tab ip6exthdr ipf_cb_ops +ipf_cfwlog_enabled ipf_dev_info ipf_devfiles ipf_kstat_tmp |