diff options
Diffstat (limited to 'usr/src/uts/common')
56 files changed, 36471 insertions, 307 deletions
diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules index 82fdea4c43..7de4d5f59e 100644 --- a/usr/src/uts/common/Makefile.rules +++ b/usr/src/uts/common/Makefile.rules @@ -362,13 +362,13 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/inet/tcp/%.c $(CTFCONVERT_O) -IPFFLAGS=-I $(UTSBASE)/common/inet/ipf -$(OBJS_DIR)/%.o: $(SRC)/common/ipf/%.c +IPFFLAGS=-I $(UTSBASE)/common/inet/pfil +$(OBJS_DIR)/%.o: $(UTSBASE)/common/inet/ipf/%.c $(COMPILE.c) $(IPFFLAGS) -o $@ $< $(CTFCONVERT_O) -IPFFLAG2=-I $(SRC)/common/ipf -$(OBJS_DIR)/%.o: $(UTSBASE)/common/inet/ipf/%.c +IPFFLAG2=-I $(UTSBASE)/common/inet/ipf +$(OBJS_DIR)/%.o: $(UTSBASE)/common/inet/pfil/%.c $(COMPILE.c) $(IPFFLAG2) -o $@ $< $(CTFCONVERT_O) @@ -1103,10 +1103,10 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/arp/%.c $(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/ip/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) -$(LINTS_DIR)/%.ln: $(SRC)/common/ipf/%.c +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/ipf/%.c @($(LHEAD) $(LINT.c) $(IPFFLAGS) $< $(LTAIL)) -$(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/ipf/%.c +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/pfil/%.c @($(LHEAD) $(LINT.c) $(IPFFLAG2) $< $(LTAIL)) $(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/kssl/%.c diff --git a/usr/src/uts/common/inet/ipf/bpf-ipf.h b/usr/src/uts/common/inet/ipf/bpf-ipf.h new file mode 100644 index 0000000000..544455e5ff --- /dev/null +++ b/usr/src/uts/common/inet/ipf/bpf-ipf.h @@ -0,0 +1,450 @@ +/*- + * Copyright (c) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from the Stanford/CMU enet packet filter, + * (net/enet.c) distributed as part of 4.3BSD, and code contributed + * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence + * Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)bpf.h 7.1 (Berkeley) 5/7/91 + * + * @(#) $Header: /devel/CVS/IP-Filter/bpf-ipf.h,v 2.1 2002/10/26 12:14:26 darrenr Exp $ (LBL) + */ + +#ifndef BPF_MAJOR_VERSION + +#ifdef __cplusplus +extern "C" { +#endif + +/* BSD style release date */ +#define BPF_RELEASE 199606 + +typedef int bpf_int32; +typedef u_int bpf_u_int32; + +/* + * Alignment macros. BPF_WORDALIGN rounds up to the next + * even multiple of BPF_ALIGNMENT. + */ +#ifndef __NetBSD__ +#define BPF_ALIGNMENT sizeof(bpf_int32) +#else +#define BPF_ALIGNMENT sizeof(long) +#endif +#define BPF_WORDALIGN(x) (((x)+(BPF_ALIGNMENT-1))&~(BPF_ALIGNMENT-1)) + +#define BPF_MAXINSNS 512 +#define BPF_MAXBUFSIZE 0x8000 +#define BPF_MINBUFSIZE 32 + +/* + * Structure for BIOCSETF. + */ +struct bpf_program { + u_int bf_len; + struct bpf_insn *bf_insns; +}; + +/* + * Struct returned by BIOCGSTATS. + */ +struct bpf_stat { + u_int bs_recv; /* number of packets received */ + u_int bs_drop; /* number of packets dropped */ +}; + +/* + * Struct return by BIOCVERSION. This represents the version number of + * the filter language described by the instruction encodings below. + * bpf understands a program iff kernel_major == filter_major && + * kernel_minor >= filter_minor, that is, if the value returned by the + * running kernel has the same major number and a minor number equal + * equal to or less than the filter being downloaded. Otherwise, the + * results are undefined, meaning an error may be returned or packets + * may be accepted haphazardly. + * It has nothing to do with the source code version. + */ +struct bpf_version { + u_short bv_major; + u_short bv_minor; +}; +/* Current version number of filter architecture. */ +#define BPF_MAJOR_VERSION 1 +#define BPF_MINOR_VERSION 1 + +/* + * BPF ioctls + * + * The first set is for compatibility with Sun's pcc style + * header files. If your using gcc, we assume that you + * have run fixincludes so the latter set should work. + */ +#if (defined(sun) || defined(ibm032)) && !defined(__GNUC__) +#define BIOCGBLEN _IOR(B,102, u_int) +#define BIOCSBLEN _IOWR(B,102, u_int) +#define BIOCSETF _IOW(B,103, struct bpf_program) +#define BIOCFLUSH _IO(B,104) +#define BIOCPROMISC _IO(B,105) +#define BIOCGDLT _IOR(B,106, u_int) +#define BIOCGETIF _IOR(B,107, struct ifreq) +#define BIOCSETIF _IOW(B,108, struct ifreq) +#define BIOCSRTIMEOUT _IOW(B,109, struct timeval) +#define BIOCGRTIMEOUT _IOR(B,110, struct timeval) +#define BIOCGSTATS _IOR(B,111, struct bpf_stat) +#define BIOCIMMEDIATE _IOW(B,112, u_int) +#define BIOCVERSION _IOR(B,113, struct bpf_version) +#define BIOCSTCPF _IOW(B,114, struct bpf_program) +#define BIOCSUDPF _IOW(B,115, struct bpf_program) +#else +#define BIOCGBLEN _IOR('B',102, u_int) +#define BIOCSBLEN _IOWR('B',102, u_int) +#define BIOCSETF _IOW('B',103, struct bpf_program) +#define BIOCFLUSH _IO('B',104) +#define BIOCPROMISC _IO('B',105) +#define BIOCGDLT _IOR('B',106, u_int) +#define BIOCGETIF _IOR('B',107, struct ifreq) +#define BIOCSETIF _IOW('B',108, struct ifreq) +#define BIOCSRTIMEOUT _IOW('B',109, struct timeval) +#define BIOCGRTIMEOUT _IOR('B',110, struct timeval) +#define BIOCGSTATS _IOR('B',111, struct bpf_stat) +#define BIOCIMMEDIATE _IOW('B',112, u_int) +#define BIOCVERSION _IOR('B',113, struct bpf_version) +#define BIOCSTCPF _IOW('B',114, struct bpf_program) +#define BIOCSUDPF _IOW('B',115, struct bpf_program) +#endif + +/* + * Structure prepended to each packet. + */ +struct bpf_hdr { + struct timeval bh_tstamp; /* time stamp */ + bpf_u_int32 bh_caplen; /* length of captured portion */ + bpf_u_int32 bh_datalen; /* original length of packet */ + u_short bh_hdrlen; /* length of bpf header (this struct + plus alignment padding) */ +}; +/* + * Because the structure above is not a multiple of 4 bytes, some compilers + * will insist on inserting padding; hence, sizeof(struct bpf_hdr) won't work. + * Only the kernel needs to know about it; applications use bh_hdrlen. + */ +#if defined(KERNEL) || defined(_KERNEL) +#define SIZEOF_BPF_HDR 18 +#endif + +/* + * Data-link level type codes. + */ + +/* + * These are the types that are the same on all platforms; on other + * platforms, a <net/bpf.h> should be supplied that defines the additional + * DLT_* codes appropriately for that platform (the BSDs, for example, + * should not just pick up this version of "bpf.h"; they should also define + * the additional DLT_* codes used by their kernels, as well as the values + * defined here - and, if the values they use for particular DLT_ types + * differ from those here, they should use their values, not the ones + * here). + */ +#define DLT_NULL 0 /* no link-layer encapsulation */ +#define DLT_EN10MB 1 /* Ethernet (10Mb) */ +#define DLT_EN3MB 2 /* Experimental Ethernet (3Mb) */ +#define DLT_AX25 3 /* Amateur Radio AX.25 */ +#define DLT_PRONET 4 /* Proteon ProNET Token Ring */ +#define DLT_CHAOS 5 /* Chaos */ +#define DLT_IEEE802 6 /* IEEE 802 Networks */ +#define DLT_ARCNET 7 /* ARCNET */ +#define DLT_SLIP 8 /* Serial Line IP */ +#define DLT_PPP 9 /* Point-to-point Protocol */ +#define DLT_FDDI 10 /* FDDI */ + +/* + * These are values from the traditional libpcap "bpf.h". + * Ports of this to particular platforms should replace these definitions + * with the ones appropriate to that platform, if the values are + * different on that platform. + */ +#define DLT_ATM_RFC1483 11 /* LLC/SNAP encapsulated atm */ +#define DLT_RAW 12 /* raw IP */ + +/* + * These are values from BSD/OS's "bpf.h". + * These are not the same as the values from the traditional libpcap + * "bpf.h"; however, these values shouldn't be generated by any + * OS other than BSD/OS, so the correct values to use here are the + * BSD/OS values. + * + * Platforms that have already assigned these values to other + * DLT_ codes, however, should give these codes the values + * from that platform, so that programs that use these codes will + * continue to compile - even though they won't correctly read + * files of these types. + */ +#ifdef __NetBSD__ +#ifndef DLT_SLIP_BSDOS +#define DLT_SLIP_BSDOS 13 /* BSD/OS Serial Line IP */ +#define DLT_PPP_BSDOS 14 /* BSD/OS Point-to-point Protocol */ +#endif +#else +#define DLT_SLIP_BSDOS 15 /* BSD/OS Serial Line IP */ +#define DLT_PPP_BSDOS 16 /* BSD/OS Point-to-point Protocol */ +#endif + +#define DLT_ATM_CLIP 19 /* Linux Classical-IP over ATM */ + +/* + * These values are defined by NetBSD; other platforms should refrain from + * using them for other purposes, so that NetBSD savefiles with link + * types of 50 or 51 can be read as this type on all platforms. + */ +#define DLT_PPP_SERIAL 50 /* PPP over serial with HDLC encapsulation */ +#define DLT_PPP_ETHER 51 /* PPP over Ethernet */ + +/* + * Values between 100 and 103 are used in capture file headers as + * link-layer types corresponding to DLT_ types that differ + * between platforms; don't use those values for new DLT_ new types. + */ + +/* + * This value was defined by libpcap 0.5; platforms that have defined + * it with a different value should define it here with that value - + * a link type of 104 in a save file will be mapped to DLT_C_HDLC, + * whatever value that happens to be, so programs will correctly + * handle files with that link type regardless of the value of + * DLT_C_HDLC. + * + * The name DLT_C_HDLC was used by BSD/OS; we use that name for source + * compatibility with programs written for BSD/OS. + * + * libpcap 0.5 defined it as DLT_CHDLC; we define DLT_CHDLC as well, + * for source compatibility with programs written for libpcap 0.5. + */ +#define DLT_C_HDLC 104 /* Cisco HDLC */ +#define DLT_CHDLC DLT_C_HDLC + +#define DLT_IEEE802_11 105 /* IEEE 802.11 wireless */ + +/* + * Values between 106 and 107 are used in capture file headers as + * link-layer types corresponding to DLT_ types that might differ + * between platforms; don't use those values for new DLT_ new types. + */ + +/* + * OpenBSD DLT_LOOP, for loopback devices; it's like DLT_NULL, except + * that the AF_ type in the link-layer header is in network byte order. + * + * OpenBSD defines it as 12, but that collides with DLT_RAW, so we + * define it as 108 here. If OpenBSD picks up this file, it should + * define DLT_LOOP as 12 in its version, as per the comment above - + * and should not use 108 as a DLT_ value. + */ +#define DLT_LOOP 108 + +/* + * Values between 109 and 112 are used in capture file headers as + * link-layer types corresponding to DLT_ types that might differ + * between platforms; don't use those values for new DLT_ types + * other than the corresponding DLT_ types. + */ + +/* + * This is for Linux cooked sockets. + */ +#define DLT_LINUX_SLL 113 + +/* + * Apple LocalTalk hardware. + */ +#define DLT_LTALK 114 + +/* + * Acorn Econet. + */ +#define DLT_ECONET 115 + +/* + * Reserved for use with OpenBSD ipfilter. + */ +#define DLT_IPFILTER 116 + +/* + * Reserved for use in capture-file headers as a link-layer type + * corresponding to OpenBSD DLT_PFLOG; DLT_PFLOG is 17 in OpenBSD, + * but that's DLT_LANE8023 in SuSE 6.3, so we can't use 17 for it + * in capture-file headers. + */ +#define DLT_PFLOG 117 + +/* + * Registered for Cisco-internal use. + */ +#define DLT_CISCO_IOS 118 + +/* + * Reserved for 802.11 cards using the Prism II chips, with a link-layer + * header including Prism monitor mode information plus an 802.11 + * header. + */ +#define DLT_PRISM_HEADER 119 + +/* + * Reserved for Aironet 802.11 cards, with an Aironet link-layer header + * (see Doug Ambrisko's FreeBSD patches). + */ +#define DLT_AIRONET_HEADER 120 + +/* + * Reserved for Siemens HiPath HDLC. + */ +#define DLT_HHDLC 121 + +/* + * Reserved for RFC 2625 IP-over-Fibre Channel, as per a request from + * Don Lee <donlee@cray.com>. + * + * This is not for use with raw Fibre Channel, where the link-layer + * header starts with a Fibre Channel frame header; it's for IP-over-FC, + * where the link-layer header starts with an RFC 2625 Network_Header + * field. + */ +#define DLT_IP_OVER_FC 122 + +/* + * The instruction encodings. + */ +/* instruction classes */ +#define BPF_CLASS(code) ((code) & 0x07) +#define BPF_LD 0x00 +#define BPF_LDX 0x01 +#define BPF_ST 0x02 +#define BPF_STX 0x03 +#define BPF_ALU 0x04 +#define BPF_JMP 0x05 +#define BPF_RET 0x06 +#define BPF_MISC 0x07 + +/* ld/ldx fields */ +#define BPF_SIZE(code) ((code) & 0x18) +#define BPF_W 0x00 +#define BPF_H 0x08 +#define BPF_B 0x10 +#define BPF_MODE(code) ((code) & 0xe0) +#define BPF_IMM 0x00 +#define BPF_ABS 0x20 +#define BPF_IND 0x40 +#define BPF_MEM 0x60 +#define BPF_LEN 0x80 +#define BPF_MSH 0xa0 + +/* alu/jmp fields */ +#define BPF_OP(code) ((code) & 0xf0) +#define BPF_ADD 0x00 +#define BPF_SUB 0x10 +#define BPF_MUL 0x20 +#define BPF_DIV 0x30 +#define BPF_OR 0x40 +#define BPF_AND 0x50 +#define BPF_LSH 0x60 +#define BPF_RSH 0x70 +#define BPF_NEG 0x80 +#define BPF_JA 0x00 +#define BPF_JEQ 0x10 +#define BPF_JGT 0x20 +#define BPF_JGE 0x30 +#define BPF_JSET 0x40 +#define BPF_SRC(code) ((code) & 0x08) +#define BPF_K 0x00 +#define BPF_X 0x08 + +/* ret - BPF_K and BPF_X also apply */ +#define BPF_RVAL(code) ((code) & 0x18) +#define BPF_A 0x10 + +/* misc */ +#define BPF_MISCOP(code) ((code) & 0xf8) +#define BPF_TAX 0x00 +#define BPF_TXA 0x80 + +/* + * The instruction data structure. + */ +struct bpf_insn { + u_short code; + u_char jt; + u_char jf; + bpf_int32 k; +}; + +/* + * Macros for insn array initializers. + */ +#define BPF_STMT(code, k) { (u_short)(code), 0, 0, k } +#define BPF_JUMP(code, k, jt, jf) { (u_short)(code), jt, jf, k } + +#if defined(BSD) && (defined(KERNEL) || defined(_KERNEL)) +/* + * Systems based on non-BSD kernels don't have ifnet's (or they don't mean + * anything if it is in <net/if.h>) and won't work like this. + */ +# if __STDC__ +extern void bpf_tap(struct ifnet *, u_char *, u_int); +extern void bpf_mtap(struct ifnet *, struct mbuf *); +extern void bpfattach(struct ifnet *, u_int, u_int); +extern void bpfilterattach(int); +# else +extern void bpf_tap(); +extern void bpf_mtap(); +extern void bpfattach(); +extern void bpfilterattach(); +# endif /* __STDC__ */ +#endif /* BSD && (_KERNEL || KERNEL) */ +#if __STDC__ || defined(__cplusplus) +extern int bpf_validate(struct bpf_insn *, int); +extern u_int bpf_filter(struct bpf_insn *, u_char *, u_int, u_int); +#else +extern int bpf_validate(); +extern u_int bpf_filter(); +#endif + +/* + * Number of scratch memory words (for BPF_LD|BPF_MEM and BPF_ST). + */ +#define BPF_MEMWORDS 16 + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/usr/src/uts/common/inet/ipf/fil.c b/usr/src/uts/common/inet/ipf/fil.c new file mode 100644 index 0000000000..1dd95e9ba5 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/fil.c @@ -0,0 +1,6397 @@ +/* + * Copyright (C) 1993-2003 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#if defined(KERNEL) || defined(_KERNEL) +# undef KERNEL +# undef _KERNEL +# define KERNEL 1 +# define _KERNEL 1 +#endif +#include <sys/errno.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/time.h> +#if defined(__NetBSD__) +# if (NetBSD >= 199905) && !defined(IPFILTER_LKM) && defined(_KERNEL) +# include "opt_ipfilter_log.h" +# endif +#endif +#if defined(_KERNEL) && defined(__FreeBSD_version) && \ + (__FreeBSD_version >= 220000) +# if (__FreeBSD_version >= 400000) +# if !defined(IPFILTER_LKM) +# include "opt_inet6.h" +# endif +# if (__FreeBSD_version == 400019) +# define CSUM_DELAY_DATA +# endif +# endif +# include <sys/filio.h> +#else +# include <sys/ioctl.h> +#endif +#if !defined(_AIX51) +# include <sys/fcntl.h> +#endif +#if defined(_KERNEL) +# include <sys/systm.h> +# include <sys/file.h> +#else +# include <stdio.h> +# include <string.h> +# include <stdlib.h> +# include <stddef.h> +# include <sys/file.h> +# define _KERNEL +# ifdef __OpenBSD__ +struct file; +# endif +# include <sys/uio.h> +# undef _KERNEL +#endif +#if !defined(__SVR4) && !defined(__svr4__) && !defined(__hpux) && \ + !defined(linux) +# include <sys/mbuf.h> +#else +# if !defined(linux) +# include <sys/byteorder.h> +# endif +# if (SOLARIS2 < 5) && defined(sun) +# include <sys/dditypes.h> +# endif +#endif +#ifdef __hpux +# define _NET_ROUTE_INCLUDED +#endif +#if !defined(linux) +# include <sys/protosw.h> +#endif +#include <sys/socket.h> +#include <net/if.h> +#ifdef sun +# include <net/af.h> +#endif +#if !defined(_KERNEL) && defined(__FreeBSD__) +# include "radix_ipf.h" +#endif +#include <net/route.h> +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#if !defined(linux) +# include <netinet/ip_var.h> +#endif +#if defined(__sgi) && defined(IFF_DRVRLOCK) /* IRIX 6 */ +# include <sys/hashing.h> +# include <netinet/in_var.h> +#endif +#include <netinet/tcp.h> +#if (!defined(__sgi) && !defined(AIX)) || defined(_KERNEL) +# include <netinet/udp.h> +# include <netinet/ip_icmp.h> +#endif +#ifdef __hpux +# undef _NET_ROUTE_INCLUDED +#endif +#include "netinet/ip_compat.h" +#ifdef USE_INET6 +# include <netinet/icmp6.h> +# if !SOLARIS && defined(_KERNEL) && !defined(__osf__) && !defined(__hpux) +# include <netinet6/in6_var.h> +# endif +#endif +#include <netinet/tcpip.h> +#include "netinet/ip_fil.h" +#include "netinet/ip_nat.h" +#include "netinet/ip_frag.h" +#include "netinet/ip_state.h" +#include "netinet/ip_proxy.h" +#include "netinet/ip_auth.h" +#ifdef IPFILTER_SCAN +# include "netinet/ip_scan.h" +#endif +#ifdef IPFILTER_SYNC +# include "netinet/ip_sync.h" +#endif +#include "netinet/ip_pool.h" +#include "netinet/ip_htable.h" +#ifdef IPFILTER_COMPILED +# include "netinet/ip_rules.h" +#endif +#if defined(IPFILTER_BPF) && defined(_KERNEL) +# include <net/bpf.h> +#endif +#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) +# include <sys/malloc.h> +# if defined(_KERNEL) && !defined(IPFILTER_LKM) +# include "opt_ipfilter.h" +# endif +#endif +#include "netinet/ipl.h" +/* END OF INCLUDES */ + +#if !defined(lint) +static const char sccsid[] = "@(#)fil.c 1.36 6/5/96 (C) 1993-2000 Darren Reed"; +static const char rcsid[] = "@(#)$Id: fil.c,v 2.243.2.64 2005/08/13 05:19:59 darrenr Exp $"; +#endif + +#ifndef _KERNEL +# include "ipf.h" +# include "ipt.h" +# include "bpf-ipf.h" +extern int opts; + +# define FR_VERBOSE(verb_pr) verbose verb_pr +# define FR_DEBUG(verb_pr) debug verb_pr +#else /* #ifndef _KERNEL */ +# define FR_VERBOSE(verb_pr) +# define FR_DEBUG(verb_pr) +#endif /* _KERNEL */ + + +fr_info_t frcache[2][8]; +struct filterstats frstats[2] = { { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 } }; +struct frentry *ipfilter[2][2] = { { NULL, NULL }, { NULL, NULL } }, + *ipfilter6[2][2] = { { NULL, NULL }, { NULL, NULL } }, + *ipacct6[2][2] = { { NULL, NULL }, { NULL, NULL } }, + *ipacct[2][2] = { { NULL, NULL }, { NULL, NULL } }, + *ipnatrules[2][2] = { { NULL, NULL }, { NULL, NULL } }; +struct frgroup *ipfgroups[IPL_LOGSIZE][2]; +char ipfilter_version[] = IPL_VERSION; +int fr_refcnt = 0; +/* + * For fr_running: + * 0 == loading, 1 = running, -1 = disabled, -2 = unloading + */ +int fr_running = 0; +int fr_flags = IPF_LOGGING; +int fr_active = 0; +int fr_control_forwarding = 0; +int fr_update_ipid = 0; +u_short fr_ip_id = 0; +int fr_chksrc = 0; /* causes a system crash if enabled */ +int fr_minttl = 4; +int fr_icmpminfragmtu = 68; +u_long fr_frouteok[2] = {0, 0}; +u_long fr_userifqs = 0; +u_long fr_badcoalesces[2] = {0, 0}; +u_char ipf_iss_secret[32]; +#if defined(IPFILTER_DEFAULT_BLOCK) +int fr_pass = FR_BLOCK|FR_NOMATCH; +#else +int fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH; +#endif +int fr_features = 0 +#ifdef IPFILTER_LKM + | IPF_FEAT_LKM +#endif +#ifdef IPFILTER_LOG + | IPF_FEAT_LOG +#endif +#ifdef IPFILTER_LOOKUP + | IPF_FEAT_LOOKUP +#endif +#ifdef IPFILTER_BPF + | IPF_FEAT_BPF +#endif +#ifdef IPFILTER_COMPILED + | IPF_FEAT_COMPILED +#endif +#ifdef IPFILTER_CKSUM + | IPF_FEAT_CKSUM +#endif +#ifdef IPFILTER_SYNC + | IPF_FEAT_SYNC +#endif +#ifdef IPFILTER_SCAN + | IPF_FEAT_SCAN +#endif +#ifdef USE_INET6 + | IPF_FEAT_IPV6 +#endif + ; + +static INLINE int fr_ipfcheck __P((fr_info_t *, frentry_t *, int)); +static int fr_portcheck __P((frpcmp_t *, u_short *)); +static int frflushlist __P((int, minor_t, int *, frentry_t **)); +static ipfunc_t fr_findfunc __P((ipfunc_t)); +static frentry_t *fr_firewall __P((fr_info_t *, u_32_t *)); +static int fr_funcinit __P((frentry_t *fr)); +static INLINE void frpr_ah __P((fr_info_t *)); +static INLINE void frpr_esp __P((fr_info_t *)); +static INLINE void frpr_gre __P((fr_info_t *)); +static INLINE void frpr_udp __P((fr_info_t *)); +static INLINE void frpr_tcp __P((fr_info_t *)); +static INLINE void frpr_icmp __P((fr_info_t *)); +static INLINE void frpr_ipv4hdr __P((fr_info_t *)); +static INLINE int frpr_pullup __P((fr_info_t *, int)); +static INLINE void frpr_short __P((fr_info_t *, int)); +static INLINE void frpr_tcpcommon __P((fr_info_t *)); +static INLINE void frpr_udpcommon __P((fr_info_t *)); +static INLINE int fr_updateipid __P((fr_info_t *)); +#ifdef IPFILTER_LOOKUP +static int fr_grpmapinit __P((frentry_t *fr)); +static INLINE void *fr_resolvelookup __P((u_int, u_int, lookupfunc_t *)); +#endif +static void frsynclist __P((frentry_t *, void *)); +static ipftuneable_t *fr_findtunebyname __P((const char *)); +static ipftuneable_t *fr_findtunebycookie __P((void *, void **)); + + +/* + * bit values for identifying presence of individual IP options + * All of these tables should be ordered by increasing key value on the left + * hand side to allow for binary searching of the array and include a trailer + * with a 0 for the bitmask for linear searches to easily find the end with. + */ +const struct optlist ipopts[20] = { + { IPOPT_NOP, 0x000001 }, + { IPOPT_RR, 0x000002 }, + { IPOPT_ZSU, 0x000004 }, + { IPOPT_MTUP, 0x000008 }, + { IPOPT_MTUR, 0x000010 }, + { IPOPT_ENCODE, 0x000020 }, + { IPOPT_TS, 0x000040 }, + { IPOPT_TR, 0x000080 }, + { IPOPT_SECURITY, 0x000100 }, + { IPOPT_LSRR, 0x000200 }, + { IPOPT_E_SEC, 0x000400 }, + { IPOPT_CIPSO, 0x000800 }, + { IPOPT_SATID, 0x001000 }, + { IPOPT_SSRR, 0x002000 }, + { IPOPT_ADDEXT, 0x004000 }, + { IPOPT_VISA, 0x008000 }, + { IPOPT_IMITD, 0x010000 }, + { IPOPT_EIP, 0x020000 }, + { IPOPT_FINN, 0x040000 }, + { 0, 0x000000 } +}; + +#ifdef USE_INET6 +struct optlist ip6exthdr[] = { + { IPPROTO_HOPOPTS, 0x000001 }, + { IPPROTO_IPV6, 0x000002 }, + { IPPROTO_ROUTING, 0x000004 }, + { IPPROTO_FRAGMENT, 0x000008 }, + { IPPROTO_ESP, 0x000010 }, + { IPPROTO_AH, 0x000020 }, + { IPPROTO_NONE, 0x000040 }, + { IPPROTO_DSTOPTS, 0x000080 }, + { 0, 0 } +}; +#endif + +struct optlist tcpopts[] = { + { TCPOPT_NOP, 0x000001 }, + { TCPOPT_MAXSEG, 0x000002 }, + { TCPOPT_WINDOW, 0x000004 }, + { TCPOPT_SACK_PERMITTED, 0x000008 }, + { TCPOPT_SACK, 0x000010 }, + { TCPOPT_TIMESTAMP, 0x000020 }, + { 0, 0x000000 } +}; + +/* + * bit values for identifying presence of individual IP security options + */ +const struct optlist secopt[8] = { + { IPSO_CLASS_RES4, 0x01 }, + { IPSO_CLASS_TOPS, 0x02 }, + { IPSO_CLASS_SECR, 0x04 }, + { IPSO_CLASS_RES3, 0x08 }, + { IPSO_CLASS_CONF, 0x10 }, + { IPSO_CLASS_UNCL, 0x20 }, + { IPSO_CLASS_RES2, 0x40 }, + { IPSO_CLASS_RES1, 0x80 } +}; + + +/* + * Table of functions available for use with call rules. + */ +static ipfunc_resolve_t fr_availfuncs[] = { +#ifdef IPFILTER_LOOKUP + { "fr_srcgrpmap", fr_srcgrpmap, fr_grpmapinit }, + { "fr_dstgrpmap", fr_dstgrpmap, fr_grpmapinit }, +#endif + { "", NULL } +}; + + +/* + * The next section of code is a a collection of small routines that set + * fields in the fr_info_t structure passed based on properties of the + * current packet. There are different routines for the same protocol + * for each of IPv4 and IPv6. Adding a new protocol, for which there + * will "special" inspection for setup, is now more easily done by adding + * a new routine and expanding the frpr_ipinit*() function rather than by + * adding more code to a growing switch statement. + */ +#ifdef USE_INET6 +static INLINE int frpr_ah6 __P((fr_info_t *)); +static INLINE void frpr_esp6 __P((fr_info_t *)); +static INLINE void frpr_gre6 __P((fr_info_t *)); +static INLINE void frpr_udp6 __P((fr_info_t *)); +static INLINE void frpr_tcp6 __P((fr_info_t *)); +static INLINE void frpr_icmp6 __P((fr_info_t *)); +static INLINE int frpr_ipv6hdr __P((fr_info_t *)); +static INLINE void frpr_short6 __P((fr_info_t *, int)); +static INLINE int frpr_hopopts6 __P((fr_info_t *)); +static INLINE int frpr_routing6 __P((fr_info_t *)); +static INLINE int frpr_dstopts6 __P((fr_info_t *)); +static INLINE int frpr_fragment6 __P((fr_info_t *)); +static INLINE int frpr_ipv6exthdr __P((fr_info_t *, int, int)); + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_short6 */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv6 Only */ +/* This is function enforces the 'is a packet too short to be legit' rule */ +/* for IPv6 and marks the packet with FI_SHORT if so. See function comment */ +/* for frpr_short() for more details. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_short6(fin, xmin) +fr_info_t *fin; +int xmin; +{ + + if (fin->fin_dlen < xmin) + fin->fin_flx |= FI_SHORT; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_ipv6hdr */ +/* Returns: int */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv6 Only */ +/* Copy values from the IPv6 header into the fr_info_t struct and call the */ +/* per-protocol analyzer if it exists. */ +/* ------------------------------------------------------------------------ */ +static INLINE int frpr_ipv6hdr(fin) +fr_info_t *fin; +{ + ip6_t *ip6 = (ip6_t *)fin->fin_ip; + int p, go = 1, i, hdrcount; + fr_ip_t *fi = &fin->fin_fi; + + fin->fin_off = 0; + + fi->fi_tos = 0; + fi->fi_optmsk = 0; + fi->fi_secmsk = 0; + fi->fi_auth = 0; + + p = ip6->ip6_nxt; + fi->fi_ttl = ip6->ip6_hlim; + fi->fi_src.in6 = ip6->ip6_src; + fi->fi_dst.in6 = ip6->ip6_dst; + fin->fin_id = 0; + + hdrcount = 0; + while (go && !(fin->fin_flx & (FI_BAD|FI_SHORT))) { + switch (p) + { + case IPPROTO_UDP : + frpr_udp6(fin); + go = 0; + break; + + case IPPROTO_TCP : + frpr_tcp6(fin); + go = 0; + break; + + case IPPROTO_ICMPV6 : + frpr_icmp6(fin); + go = 0; + break; + + case IPPROTO_GRE : + frpr_gre6(fin); + go = 0; + break; + + case IPPROTO_HOPOPTS : + /* + * hop by hop ext header is only allowed + * right after IPv6 header. + */ + if (hdrcount != 0) { + fin->fin_flx |= FI_BAD; + p = IPPROTO_NONE; + } else { + p = frpr_hopopts6(fin); + } + break; + + case IPPROTO_DSTOPTS : + p = frpr_dstopts6(fin); + break; + + case IPPROTO_ROUTING : + p = frpr_routing6(fin); + break; + + case IPPROTO_AH : + p = frpr_ah6(fin); + break; + + case IPPROTO_ESP : + frpr_esp6(fin); + go = 0; + break; + + case IPPROTO_IPV6 : + for (i = 0; ip6exthdr[i].ol_bit != 0; i++) + if (ip6exthdr[i].ol_val == p) { + fin->fin_flx |= ip6exthdr[i].ol_bit; + break; + } + go = 0; + break; + + case IPPROTO_NONE : + go = 0; + break; + + case IPPROTO_FRAGMENT : + p = frpr_fragment6(fin); + if (fin->fin_off != 0) /* Not the first frag */ + go = 0; + break; + + default : + go = 0; + break; + } + hdrcount++; + + /* + * It is important to note that at this point, for the + * extension headers (go != 0), the entire header may not have + * been pulled up when the code gets to this point. This is + * only done for "go != 0" because the other header handlers + * will all pullup their complete header. The other indicator + * of an incomplete packet is that this was just an extension + * header. + */ + if ((go != 0) && (p != IPPROTO_NONE) && + (frpr_pullup(fin, 0) == -1)) { + p = IPPROTO_NONE; + go = 0; + } + } + fi->fi_p = p; + + if (fin->fin_flx & FI_BAD) + return -1; + + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_ipv6exthdr */ +/* Returns: int - value of the next header or IPPROTO_NONE if error */ +/* Parameters: fin(I) - pointer to packet information */ +/* multiple(I) - flag indicating yes/no if multiple occurances */ +/* of this extension header are allowed. */ +/* proto(I) - protocol number for this extension header */ +/* */ +/* IPv6 Only */ +/* ------------------------------------------------------------------------ */ +static INLINE int frpr_ipv6exthdr(fin, multiple, proto) +fr_info_t *fin; +int multiple, proto; +{ + struct ip6_ext *hdr; + u_short shift; + int i; + + fin->fin_flx |= FI_V6EXTHDR; + + /* 8 is default length of extension hdr */ + if ((fin->fin_dlen - 8) < 0) { + fin->fin_flx |= FI_SHORT; + return IPPROTO_NONE; + } + + if (frpr_pullup(fin, 8) == -1) + return IPPROTO_NONE; + + hdr = fin->fin_dp; + shift = 8 + (hdr->ip6e_len << 3); + if (shift > fin->fin_dlen) { /* Nasty extension header length? */ + fin->fin_flx |= FI_BAD; + return IPPROTO_NONE; + } + + for (i = 0; ip6exthdr[i].ol_bit != 0; i++) + if (ip6exthdr[i].ol_val == proto) { + /* + * Most IPv6 extension headers are only allowed once. + */ + if ((multiple == 0) && + ((fin->fin_optmsk & ip6exthdr[i].ol_bit) != 0)) + fin->fin_flx |= FI_BAD; + else + fin->fin_optmsk |= ip6exthdr[i].ol_bit; + break; + } + + fin->fin_dp = (char *)fin->fin_dp + shift; + fin->fin_dlen -= shift; + + return hdr->ip6e_nxt; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_hopopts6 */ +/* Returns: int - value of the next header or IPPROTO_NONE if error */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv6 Only */ +/* This is function checks pending hop by hop options extension header */ +/* ------------------------------------------------------------------------ */ +static INLINE int frpr_hopopts6(fin) +fr_info_t *fin; +{ + return frpr_ipv6exthdr(fin, 0, IPPROTO_HOPOPTS); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_routing6 */ +/* Returns: int - value of the next header or IPPROTO_NONE if error */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv6 Only */ +/* This is function checks pending routing extension header */ +/* ------------------------------------------------------------------------ */ +static INLINE int frpr_routing6(fin) +fr_info_t *fin; +{ + struct ip6_ext *hdr; + int shift; + + hdr = fin->fin_dp; + if (frpr_ipv6exthdr(fin, 0, IPPROTO_ROUTING) == IPPROTO_NONE) + return IPPROTO_NONE; + + shift = 8 + (hdr->ip6e_len << 3); + /* + * Nasty extension header length? + */ + if ((hdr->ip6e_len << 3) & 15) { + fin->fin_flx |= FI_BAD; + /* + * Compensate for the changes made in frpr_ipv6exthdr() + */ + fin->fin_dlen += shift; + fin->fin_dp = (char *)fin->fin_dp - shift; + return IPPROTO_NONE; + } + + return hdr->ip6e_nxt; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_fragment6 */ +/* Returns: int - value of the next header or IPPROTO_NONE if error */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv6 Only */ +/* Examine the IPv6 fragment header and extract fragment offset information.*/ +/* */ +/* We don't know where the transport layer header (or whatever is next is), */ +/* as it could be behind destination options (amongst others). Because */ +/* there is no fragment cache, there is no knowledge about whether or not an*/ +/* upper layer header has been seen (or where it ends) and thus we are not */ +/* able to continue processing beyond this header with any confidence. */ +/* ------------------------------------------------------------------------ */ +static INLINE int frpr_fragment6(fin) +fr_info_t *fin; +{ + struct ip6_frag *frag; + int dlen; + + fin->fin_flx |= FI_FRAG; + + dlen = fin->fin_dlen; + if (frpr_ipv6exthdr(fin, 0, IPPROTO_FRAGMENT) == IPPROTO_NONE) + return IPPROTO_NONE; + + if (frpr_pullup(fin, sizeof(*frag)) == -1) + return IPPROTO_NONE; + + frpr_short6(fin, sizeof(*frag)); + + if ((fin->fin_flx & FI_SHORT) != 0) + return IPPROTO_NONE; + + frag = (struct ip6_frag *)((char *)fin->fin_dp - sizeof(*frag)); + /* + * Fragment but no fragmentation info set? Bad packet... + */ + if (frag->ip6f_offlg == 0) { + fin->fin_flx |= FI_BAD; + return IPPROTO_NONE; + } + + fin->fin_id = frag->ip6f_ident; + fin->fin_off = frag->ip6f_offlg & IP6F_OFF_MASK; + fin->fin_off = ntohs(fin->fin_off); + if (fin->fin_off != 0) + fin->fin_flx |= FI_FRAGBODY; + + fin->fin_dp = (char *)frag + sizeof(*frag); + fin->fin_dlen = dlen - sizeof(*frag); + + /* length of hdrs(after frag hdr) + data */ + fin->fin_flen = fin->fin_dlen; + + /* + * If the frag is not the last one and the payload length + * is not multiple of 8, it must be dropped. + */ + if ((frag->ip6f_offlg & IP6F_MORE_FRAG) && (dlen % 8)) { + fin->fin_flx |= FI_BAD; + return IPPROTO_NONE; + } + + return frag->ip6f_nxt; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_dstopts6 */ +/* Returns: int - value of the next header or IPPROTO_NONE if error */ +/* Parameters: fin(I) - pointer to packet information */ +/* nextheader(I) - stores next header value */ +/* */ +/* IPv6 Only */ +/* This is function checks pending destination options extension header */ +/* ------------------------------------------------------------------------ */ +static INLINE int frpr_dstopts6(fin) +fr_info_t *fin; +{ + return frpr_ipv6exthdr(fin, 1, IPPROTO_DSTOPTS); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_icmp6 */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv6 Only */ +/* This routine is mainly concerned with determining the minimum valid size */ +/* for an ICMPv6 packet. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_icmp6(fin) +fr_info_t *fin; +{ + int minicmpsz = sizeof(struct icmp6_hdr); + struct icmp6_hdr *icmp6; + + if (frpr_pullup(fin, ICMP6ERR_MINPKTLEN - sizeof(ip6_t)) == -1) + return; + + if (fin->fin_dlen > 1) { + icmp6 = fin->fin_dp; + + fin->fin_data[0] = *(u_short *)icmp6; + + switch (icmp6->icmp6_type) + { + case ICMP6_ECHO_REPLY : + case ICMP6_ECHO_REQUEST : + minicmpsz = ICMP6ERR_MINPKTLEN - sizeof(ip6_t); + break; + case ICMP6_DST_UNREACH : + case ICMP6_PACKET_TOO_BIG : + case ICMP6_TIME_EXCEEDED : + case ICMP6_PARAM_PROB : + if ((fin->fin_m != NULL) && + (M_LEN(fin->fin_m) < fin->fin_plen)) { + if (fr_coalesce(fin) != 1) + return; + } + fin->fin_flx |= FI_ICMPERR; + minicmpsz = ICMP6ERR_IPICMPHLEN - sizeof(ip6_t); + break; + default : + break; + } + } + + frpr_short6(fin, minicmpsz); + fin->fin_flen -= fin->fin_dlen - minicmpsz; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_udp6 */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv6 Only */ +/* Analyse the packet for IPv6/UDP properties. */ +/* Is not expected to be called for fragmented packets. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_udp6(fin) +fr_info_t *fin; +{ + + fr_checkv6sum(fin); + + frpr_short6(fin, sizeof(struct udphdr)); + if (frpr_pullup(fin, sizeof(struct udphdr)) == -1) + return; + + fin->fin_flen -= fin->fin_dlen - sizeof(struct udphdr); + + frpr_udpcommon(fin); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_tcp6 */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv6 Only */ +/* Analyse the packet for IPv6/TCP properties. */ +/* Is not expected to be called for fragmented packets. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_tcp6(fin) +fr_info_t *fin; +{ + + fr_checkv6sum(fin); + + frpr_short6(fin, sizeof(struct tcphdr)); + if (frpr_pullup(fin, sizeof(struct tcphdr)) == -1) + return; + + fin->fin_flen -= fin->fin_dlen - sizeof(struct tcphdr); + + frpr_tcpcommon(fin); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_esp6 */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv6 Only */ +/* Analyse the packet for ESP properties. */ +/* The minimum length is taken to be the SPI (32bits) plus a tail (32bits) */ +/* even though the newer ESP packets must also have a sequence number that */ +/* is 32bits as well, it is not possible(?) to determine the version from a */ +/* simple packet header. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_esp6(fin) +fr_info_t *fin; +{ + int i; + frpr_short6(fin, sizeof(grehdr_t)); + + (void) frpr_pullup(fin, 8); + + for (i = 0; ip6exthdr[i].ol_bit != 0; i++) + if (ip6exthdr[i].ol_val == IPPROTO_ESP) { + fin->fin_optmsk |= ip6exthdr[i].ol_bit; + break; + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_ah6 */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv6 Only */ +/* Analyse the packet for AH properties. */ +/* The minimum length is taken to be the combination of all fields in the */ +/* header being present and no authentication data (null algorithm used.) */ +/* ------------------------------------------------------------------------ */ +static INLINE int frpr_ah6(fin) +fr_info_t *fin; +{ + authhdr_t *ah; + int i, shift; + + frpr_short6(fin, 12); + + if (frpr_pullup(fin, sizeof(*ah)) == -1) + return IPPROTO_NONE; + + for (i = 0; ip6exthdr[i].ol_bit != 0; i++) + if (ip6exthdr[i].ol_val == IPPROTO_AH) { + fin->fin_optmsk |= ip6exthdr[i].ol_bit; + break; + } + + ah = (authhdr_t *)fin->fin_dp; + + shift = (ah->ah_plen + 2) * 4; + fin->fin_dlen -= shift; + fin->fin_dp = (char*)fin->fin_dp + shift; + + return ah->ah_next; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_gre6 */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* Analyse the packet for GRE properties. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_gre6(fin) +fr_info_t *fin; +{ + grehdr_t *gre; + + frpr_short6(fin, sizeof(grehdr_t)); + + if (frpr_pullup(fin, sizeof(grehdr_t)) == -1) + return; + + gre = fin->fin_dp; + if (GRE_REV(gre->gr_flags) == 1) + fin->fin_data[0] = gre->gr_call; +} +#endif /* USE_INET6 */ + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_pullup */ +/* Returns: int - 0 == pullup succeeded, -1 == failure */ +/* Parameters: fin(I) - pointer to packet information */ +/* plen(I) - length (excluding L3 header) to pullup */ +/* */ +/* Short inline function to cut down on code duplication to perform a call */ +/* to fr_pullup to ensure there is the required amount of data, */ +/* consecutively in the packet buffer. */ +/* ------------------------------------------------------------------------ */ +static INLINE int frpr_pullup(fin, plen) +fr_info_t *fin; +int plen; +{ +#if defined(_KERNEL) + if (fin->fin_m != NULL) { + if (fin->fin_dp != NULL) + plen += (char *)fin->fin_dp - + ((char *)fin->fin_ip + fin->fin_hlen); + plen += ((char *)fin->fin_ip - MTOD(fin->fin_m, char *)) + + fin->fin_hlen; + if (M_LEN(fin->fin_m) < plen) { + if (fr_pullup(fin->fin_m, fin, plen) == NULL) + return -1; + } + } +#endif + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_short */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* xmin(I) - minimum header size */ +/* */ +/* Check if a packet is "short" as defined by xmin. The rule we are */ +/* applying here is that the packet must not be fragmented within the layer */ +/* 4 header. That is, it must not be a fragment that has its offset set to */ +/* start within the layer 4 header (hdrmin) or if it is at offset 0, the */ +/* entire layer 4 header must be present (min). */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_short(fin, xmin) +fr_info_t *fin; +int xmin; +{ + + if (fin->fin_off == 0) { + if (fin->fin_dlen < xmin) + fin->fin_flx |= FI_SHORT; + } else if (fin->fin_off < xmin) { + fin->fin_flx |= FI_SHORT; + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_icmp */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv4 Only */ +/* Do a sanity check on the packet for ICMP (v4). In nearly all cases, */ +/* except extrememly bad packets, both type and code will be present. */ +/* The expected minimum size of an ICMP packet is very much dependent on */ +/* the type of it. */ +/* */ +/* XXX - other ICMP sanity checks? */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_icmp(fin) +fr_info_t *fin; +{ + int minicmpsz = sizeof(struct icmp); + icmphdr_t *icmp; + ip_t *oip; + + if (fin->fin_off != 0) { + frpr_short(fin, ICMPERR_ICMPHLEN); + return; + } + + if (frpr_pullup(fin, ICMPERR_ICMPHLEN) == -1) + return; + + fr_checkv4sum(fin); + + if (fin->fin_dlen > 1) { + icmp = fin->fin_dp; + + fin->fin_data[0] = *(u_short *)icmp; + + switch (icmp->icmp_type) + { + case ICMP_ECHOREPLY : + case ICMP_ECHO : + /* Router discovery messaes - RFC 1256 */ + case ICMP_ROUTERADVERT : + case ICMP_ROUTERSOLICIT : + minicmpsz = ICMP_MINLEN; + break; + /* + * type(1) + code(1) + cksum(2) + id(2) seq(2) + + * 3 * timestamp(3 * 4) + */ + case ICMP_TSTAMP : + case ICMP_TSTAMPREPLY : + minicmpsz = 20; + break; + /* + * type(1) + code(1) + cksum(2) + id(2) seq(2) + + * mask(4) + */ + case ICMP_MASKREQ : + case ICMP_MASKREPLY : + minicmpsz = 12; + break; + /* + * type(1) + code(1) + cksum(2) + id(2) seq(2) + ip(20+) + */ + case ICMP_UNREACH : + if (icmp->icmp_code == ICMP_UNREACH_NEEDFRAG) { + if (icmp->icmp_nextmtu < fr_icmpminfragmtu) + fin->fin_flx |= FI_BAD; + } + /* FALLTHRU */ + case ICMP_SOURCEQUENCH : + case ICMP_REDIRECT : + case ICMP_TIMXCEED : + case ICMP_PARAMPROB : + fin->fin_flx |= FI_ICMPERR; + if (fr_coalesce(fin) != 1) + return; + /* + * ICMP error packets should not be generated for IP + * packets that are a fragment that isn't the first + * fragment. + */ + oip = (ip_t *)((char *)fin->fin_dp + ICMPERR_ICMPHLEN); + if ((ntohs(oip->ip_off) & IP_OFFMASK) != 0) + fin->fin_flx |= FI_BAD; + break; + default : + break; + } + + if (fin->fin_dlen >= 6) /* ID field */ + fin->fin_data[1] = icmp->icmp_id; + } + + frpr_short(fin, minicmpsz); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_tcpcommon */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* TCP header sanity checking. Look for bad combinations of TCP flags, */ +/* and make some checks with how they interact with other fields. */ +/* If compiled with IPFILTER_CKSUM, check to see if the TCP checksum is */ +/* valid and mark the packet as bad if not. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_tcpcommon(fin) +fr_info_t *fin; +{ + int flags, tlen; + tcphdr_t *tcp; + + fin->fin_flx |= FI_TCPUDP; + if (fin->fin_off != 0) + return; + + if (frpr_pullup(fin, sizeof(*tcp)) == -1) + return; + tcp = fin->fin_dp; + + if (fin->fin_dlen > 3) { + fin->fin_sport = ntohs(tcp->th_sport); + fin->fin_dport = ntohs(tcp->th_dport); + } + + if ((fin->fin_flx & FI_SHORT) != 0) + return; + + /* + * Use of the TCP data offset *must* result in a value that is at + * least the same size as the TCP header. + */ + tlen = TCP_OFF(tcp) << 2; + if (tlen < sizeof(tcphdr_t)) { + fin->fin_flx |= FI_BAD; + return; + } + + flags = tcp->th_flags; + fin->fin_tcpf = tcp->th_flags; + + /* + * If the urgent flag is set, then the urgent pointer must + * also be set and vice versa. Good TCP packets do not have + * just one of these set. + */ + if ((flags & TH_URG) != 0 && (tcp->th_urp == 0)) { + fin->fin_flx |= FI_BAD; + } else if ((flags & TH_URG) == 0 && (tcp->th_urp != 0)) { + /* Ignore this case, it shows up in "real" traffic with */ + /* bogus values in the urgent pointer field. */ + flags = flags; /* LINT */ + } else if (((flags & (TH_SYN|TH_FIN)) != 0) && + ((flags & (TH_RST|TH_ACK)) == TH_RST)) { + /* TH_FIN|TH_RST|TH_ACK seems to appear "naturally" */ + fin->fin_flx |= FI_BAD; + } else if (!(flags & TH_ACK)) { + /* + * If the ack bit isn't set, then either the SYN or + * RST bit must be set. If the SYN bit is set, then + * we expect the ACK field to be 0. If the ACK is + * not set and if URG, PSH or FIN are set, consdier + * that to indicate a bad TCP packet. + */ + if ((flags == TH_SYN) && (tcp->th_ack != 0)) { + /* + * Cisco PIX sets the ACK field to a random value. + * In light of this, do not set FI_BAD until a patch + * is available from Cisco to ensure that + * interoperability between existing systems is + * achieved. + */ + /*fin->fin_flx |= FI_BAD*/; + flags = flags; /* LINT */ + } else if (!(flags & (TH_RST|TH_SYN))) { + fin->fin_flx |= FI_BAD; + } else if ((flags & (TH_URG|TH_PUSH|TH_FIN)) != 0) { + fin->fin_flx |= FI_BAD; + } + } + + /* + * At this point, it's not exactly clear what is to be gained by + * marking up which TCP options are and are not present. The one we + * are most interested in is the TCP window scale. This is only in + * a SYN packet [RFC1323] so we don't need this here...? + * Now if we were to analyse the header for passive fingerprinting, + * then that might add some weight to adding this... + */ + if (tlen == sizeof(tcphdr_t)) + return; + + if (frpr_pullup(fin, tlen) == -1) + return; + +#if 0 + ip = fin->fin_ip; + s = (u_char *)(tcp + 1); + off = IP_HL(ip) << 2; +# ifdef _KERNEL + if (fin->fin_mp != NULL) { + mb_t *m = *fin->fin_mp; + + if (off + tlen > M_LEN(m)) + return; + } +# endif + for (tlen -= (int)sizeof(*tcp); tlen > 0; ) { + opt = *s; + if (opt == '\0') + break; + else if (opt == TCPOPT_NOP) + ol = 1; + else { + if (tlen < 2) + break; + ol = (int)*(s + 1); + if (ol < 2 || ol > tlen) + break; + } + + for (i = 9, mv = 4; mv >= 0; ) { + op = ipopts + i; + if (opt == (u_char)op->ol_val) { + optmsk |= op->ol_bit; + break; + } + } + tlen -= ol; + s += ol; + } +#endif /* 0 */ +} + + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_udpcommon */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* Extract the UDP source and destination ports, if present. If compiled */ +/* with IPFILTER_CKSUM, check to see if the UDP checksum is valid. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_udpcommon(fin) +fr_info_t *fin; +{ + udphdr_t *udp; + + fin->fin_flx |= FI_TCPUDP; + + if (!fin->fin_off && (fin->fin_dlen > 3)) { + if (frpr_pullup(fin, sizeof(*udp)) == -1) { + fin->fin_flx |= FI_SHORT; + return; + } + + udp = fin->fin_dp; + + fin->fin_sport = ntohs(udp->uh_sport); + fin->fin_dport = ntohs(udp->uh_dport); + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_tcp */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv4 Only */ +/* Analyse the packet for IPv4/TCP properties. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_tcp(fin) +fr_info_t *fin; +{ + + fr_checkv4sum(fin); + + frpr_short(fin, sizeof(tcphdr_t)); + + frpr_tcpcommon(fin); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_udp */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv4 Only */ +/* Analyse the packet for IPv4/UDP properties. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_udp(fin) +fr_info_t *fin; +{ + + fr_checkv4sum(fin); + + frpr_short(fin, sizeof(udphdr_t)); + + frpr_udpcommon(fin); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_esp */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* Analyse the packet for ESP properties. */ +/* The minimum length is taken to be the SPI (32bits) plus a tail (32bits) */ +/* even though the newer ESP packets must also have a sequence number that */ +/* is 32bits as well, it is not possible(?) to determine the version from a */ +/* simple packet header. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_esp(fin) +fr_info_t *fin; +{ + if ((fin->fin_off == 0) && (frpr_pullup(fin, 8) == -1)) + return; + + frpr_short(fin, 8); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_ah */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* Analyse the packet for AH properties. */ +/* The minimum length is taken to be the combination of all fields in the */ +/* header being present and no authentication data (null algorithm used.) */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_ah(fin) +fr_info_t *fin; +{ + authhdr_t *ah; + int len; + + if ((fin->fin_off == 0) && (frpr_pullup(fin, sizeof(*ah)) == -1)) + return; + + ah = (authhdr_t *)fin->fin_dp; + + len = (ah->ah_plen + 2) << 2; + frpr_short(fin, len); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_gre */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* Analyse the packet for GRE properties. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_gre(fin) +fr_info_t *fin; +{ + grehdr_t *gre; + + if ((fin->fin_off == 0) && (frpr_pullup(fin, sizeof(grehdr_t)) == -1)) + return; + + frpr_short(fin, sizeof(grehdr_t)); + + if (fin->fin_off == 0) { + gre = fin->fin_dp; + if (GRE_REV(gre->gr_flags) == 1) + fin->fin_data[0] = gre->gr_call; + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_ipv4hdr */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv4 Only */ +/* Analyze the IPv4 header and set fields in the fr_info_t structure. */ +/* Check all options present and flag their presence if any exist. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_ipv4hdr(fin) +fr_info_t *fin; +{ + u_short optmsk = 0, secmsk = 0, auth = 0; + int hlen, ol, mv, p, i; + const struct optlist *op; + u_char *s, opt; + u_short off; + fr_ip_t *fi; + ip_t *ip; + + fi = &fin->fin_fi; + hlen = fin->fin_hlen; + + ip = fin->fin_ip; + p = ip->ip_p; + fi->fi_p = p; + fi->fi_tos = ip->ip_tos; + fin->fin_id = ip->ip_id; + off = ip->ip_off; + + /* Get both TTL and protocol */ + fi->fi_p = ip->ip_p; + fi->fi_ttl = ip->ip_ttl; +#if 0 + (*(((u_short *)fi) + 1)) = (*(((u_short *)ip) + 4)); +#endif + + /* Zero out bits not used in IPv6 address */ + fi->fi_src.i6[1] = 0; + fi->fi_src.i6[2] = 0; + fi->fi_src.i6[3] = 0; + fi->fi_dst.i6[1] = 0; + fi->fi_dst.i6[2] = 0; + fi->fi_dst.i6[3] = 0; + + fi->fi_saddr = ip->ip_src.s_addr; + fi->fi_daddr = ip->ip_dst.s_addr; + + /* + * set packet attribute flags based on the offset and + * calculate the byte offset that it represents. + */ + off &= IP_MF|IP_OFFMASK; + if (off != 0) { + fi->fi_flx |= FI_FRAG; + off &= IP_OFFMASK; + if (off != 0) { + fin->fin_flx |= FI_FRAGBODY; + off <<= 3; + if ((off + fin->fin_dlen > 65535) || + (fin->fin_dlen == 0) || + ((ip->ip_off & IP_MF) && (fin->fin_dlen & 7))) { + /* + * The length of the packet, starting at its + * offset cannot exceed 65535 (0xffff) as the + * length of an IP packet is only 16 bits. + * + * Any fragment that isn't the last fragment + * must have a length greater than 0 and it + * must be an even multiple of 8. + */ + fi->fi_flx |= FI_BAD; + } + } + } + fin->fin_off = off; + + /* + * Call per-protocol setup and checking + */ + switch (p) + { + case IPPROTO_UDP : + frpr_udp(fin); + break; + case IPPROTO_TCP : + frpr_tcp(fin); + break; + case IPPROTO_ICMP : + frpr_icmp(fin); + break; + case IPPROTO_AH : + frpr_ah(fin); + break; + case IPPROTO_ESP : + frpr_esp(fin); + break; + case IPPROTO_GRE : + frpr_gre(fin); + break; + } + + ip = fin->fin_ip; + if (ip == NULL) + return; + + /* + * If it is a standard IP header (no options), set the flag fields + * which relate to options to 0. + */ + if (hlen == sizeof(*ip)) { + fi->fi_optmsk = 0; + fi->fi_secmsk = 0; + fi->fi_auth = 0; + return; + } + + /* + * So the IP header has some IP options attached. Walk the entire + * list of options present with this packet and set flags to indicate + * which ones are here and which ones are not. For the somewhat out + * of date and obscure security classification options, set a flag to + * represent which classification is present. + */ + fi->fi_flx |= FI_OPTIONS; + + for (s = (u_char *)(ip + 1), hlen -= (int)sizeof(*ip); hlen > 0; ) { + opt = *s; + if (opt == '\0') + break; + else if (opt == IPOPT_NOP) + ol = 1; + else { + if (hlen < 2) + break; + ol = (int)*(s + 1); + if (ol < 2 || ol > hlen) + break; + } + for (i = 9, mv = 4; mv >= 0; ) { + op = ipopts + i; + if ((opt == (u_char)op->ol_val) && (ol > 4)) { + optmsk |= op->ol_bit; + if (opt == IPOPT_SECURITY) { + const struct optlist *sp; + u_char sec; + int j, m; + + sec = *(s + 2); /* classification */ + for (j = 3, m = 2; m >= 0; ) { + sp = secopt + j; + if (sec == sp->ol_val) { + secmsk |= sp->ol_bit; + auth = *(s + 3); + auth *= 256; + auth += *(s + 4); + break; + } + if (sec < sp->ol_val) + j -= m; + else + j += m; + m--; + } + } + break; + } + if (opt < op->ol_val) + i -= mv; + else + i += mv; + mv--; + } + hlen -= ol; + s += ol; + } + + /* + * + */ + if (auth && !(auth & 0x0100)) + auth &= 0xff00; + fi->fi_optmsk = optmsk; + fi->fi_secmsk = secmsk; + fi->fi_auth = auth; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_makefrip */ +/* Returns: int - 1 == hdr checking error, 0 == OK */ +/* Parameters: hlen(I) - length of IP packet header */ +/* ip(I) - pointer to the IP header */ +/* fin(IO) - pointer to packet information */ +/* */ +/* Compact the IP header into a structure which contains just the info. */ +/* which is useful for comparing IP headers with and store this information */ +/* in the fr_info_t structure pointer to by fin. At present, it is assumed */ +/* this function will be called with either an IPv4 or IPv6 packet. */ +/* ------------------------------------------------------------------------ */ +int fr_makefrip(hlen, ip, fin) +int hlen; +ip_t *ip; +fr_info_t *fin; +{ + int v; + + fin->fin_nat = NULL; + fin->fin_state = NULL; + fin->fin_depth = 0; + fin->fin_hlen = (u_short)hlen; + fin->fin_ip = ip; + fin->fin_rule = 0xffffffff; + fin->fin_group[0] = -1; + fin->fin_group[1] = '\0'; + fin->fin_dlen = fin->fin_plen - hlen; + fin->fin_dp = (char *)ip + hlen; + + v = fin->fin_v; + if (v == 4) + frpr_ipv4hdr(fin); +#ifdef USE_INET6 + else if (v == 6) { + if (frpr_ipv6hdr(fin) == -1) + return -1; + } +#endif + if (fin->fin_ip == NULL) + return -1; + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_portcheck */ +/* Returns: int - 1 == port matched, 0 == port match failed */ +/* Parameters: frp(I) - pointer to port check `expression' */ +/* pop(I) - pointer to port number to evaluate */ +/* */ +/* Perform a comparison of a port number against some other(s), using a */ +/* structure with compare information stored in it. */ +/* ------------------------------------------------------------------------ */ +static INLINE int fr_portcheck(frp, pop) +frpcmp_t *frp; +u_short *pop; +{ + u_short tup, po; + int err = 1; + + tup = *pop; + po = frp->frp_port; + + /* + * Do opposite test to that required and continue if that succeeds. + */ + switch (frp->frp_cmp) + { + case FR_EQUAL : + if (tup != po) /* EQUAL */ + err = 0; + break; + case FR_NEQUAL : + if (tup == po) /* NOTEQUAL */ + err = 0; + break; + case FR_LESST : + if (tup >= po) /* LESSTHAN */ + err = 0; + break; + case FR_GREATERT : + if (tup <= po) /* GREATERTHAN */ + err = 0; + break; + case FR_LESSTE : + if (tup > po) /* LT or EQ */ + err = 0; + break; + case FR_GREATERTE : + if (tup < po) /* GT or EQ */ + err = 0; + break; + case FR_OUTRANGE : + if (tup >= po && tup <= frp->frp_top) /* Out of range */ + err = 0; + break; + case FR_INRANGE : + if (tup <= po || tup >= frp->frp_top) /* In range */ + err = 0; + break; + case FR_INCRANGE : + if (tup < po || tup > frp->frp_top) /* Inclusive range */ + err = 0; + break; + default : + break; + } + return err; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_tcpudpchk */ +/* Returns: int - 1 == protocol matched, 0 == check failed */ +/* Parameters: fin(I) - pointer to packet information */ +/* ft(I) - pointer to structure with comparison data */ +/* */ +/* Compares the current pcket (assuming it is TCP/UDP) information with a */ +/* structure containing information that we want to match against. */ +/* ------------------------------------------------------------------------ */ +int fr_tcpudpchk(fin, ft) +fr_info_t *fin; +frtuc_t *ft; +{ + int err = 1; + + /* + * Both ports should *always* be in the first fragment. + * So far, I cannot find any cases where they can not be. + * + * compare destination ports + */ + if (ft->ftu_dcmp) + err = fr_portcheck(&ft->ftu_dst, &fin->fin_dport); + + /* + * compare source ports + */ + if (err && ft->ftu_scmp) + err = fr_portcheck(&ft->ftu_src, &fin->fin_sport); + + /* + * If we don't have all the TCP/UDP header, then how can we + * expect to do any sort of match on it ? If we were looking for + * TCP flags, then NO match. If not, then match (which should + * satisfy the "short" class too). + */ + if (err && (fin->fin_p == IPPROTO_TCP)) { + if (fin->fin_flx & FI_SHORT) + return !(ft->ftu_tcpf | ft->ftu_tcpfm); + /* + * Match the flags ? If not, abort this match. + */ + if (ft->ftu_tcpfm && + ft->ftu_tcpf != (fin->fin_tcpf & ft->ftu_tcpfm)) { + FR_DEBUG(("f. %#x & %#x != %#x\n", fin->fin_tcpf, + ft->ftu_tcpfm, ft->ftu_tcpf)); + err = 0; + } + } + return err; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_ipfcheck */ +/* Returns: int - 0 == match, 1 == no match */ +/* Parameters: fin(I) - pointer to packet information */ +/* fr(I) - pointer to filter rule */ +/* portcmp(I) - flag indicating whether to attempt matching on */ +/* TCP/UDP port data. */ +/* */ +/* Check to see if a packet matches an IPFilter rule. Checks of addresses, */ +/* port numbers, etc, for "standard" IPFilter rules are all orchestrated in */ +/* this function. */ +/* ------------------------------------------------------------------------ */ +static INLINE int fr_ipfcheck(fin, fr, portcmp) +fr_info_t *fin; +frentry_t *fr; +int portcmp; +{ + u_32_t *ld, *lm, *lip; + fripf_t *fri; + fr_ip_t *fi; + int i; + + fi = &fin->fin_fi; + fri = fr->fr_ipf; + lip = (u_32_t *)fi; + lm = (u_32_t *)&fri->fri_mip; + ld = (u_32_t *)&fri->fri_ip; + + /* + * first 32 bits to check coversion: + * IP version, TOS, TTL, protocol + */ + i = ((*lip & *lm) != *ld); + FR_DEBUG(("0. %#08x & %#08x != %#08x\n", + *lip, *lm, *ld)); + if (i) + return 1; + + /* + * Next 32 bits is a constructed bitmask indicating which IP options + * are present (if any) in this packet. + */ + lip++, lm++, ld++; + i |= ((*lip & *lm) != *ld); + FR_DEBUG(("1. %#08x & %#08x != %#08x\n", + *lip, *lm, *ld)); + if (i) + return 1; + + lip++, lm++, ld++; + /* + * Unrolled loops (4 each, for 32 bits) for address checks. + */ + /* + * Check the source address. + */ +#ifdef IPFILTER_LOOKUP + if (fr->fr_satype == FRI_LOOKUP) { + i = (*fr->fr_srcfunc)(fr->fr_srcptr, fi->fi_v, lip); + if (i == -1) + return 1; + lip += 3; + lm += 3; + ld += 3; + } else { +#endif + i = ((*lip & *lm) != *ld); + FR_DEBUG(("2a. %#08x & %#08x != %#08x\n", + *lip, *lm, *ld)); + if (fi->fi_v == 6) { + lip++, lm++, ld++; + i |= ((*lip & *lm) != *ld); + FR_DEBUG(("2b. %#08x & %#08x != %#08x\n", + *lip, *lm, *ld)); + lip++, lm++, ld++; + i |= ((*lip & *lm) != *ld); + FR_DEBUG(("2c. %#08x & %#08x != %#08x\n", + *lip, *lm, *ld)); + lip++, lm++, ld++; + i |= ((*lip & *lm) != *ld); + FR_DEBUG(("2d. %#08x & %#08x != %#08x\n", + *lip, *lm, *ld)); + } else { + lip += 3; + lm += 3; + ld += 3; + } +#ifdef IPFILTER_LOOKUP + } +#endif + i ^= (fr->fr_flags & FR_NOTSRCIP) >> 6; + if (i) + return 1; + + /* + * Check the destination address. + */ + lip++, lm++, ld++; +#ifdef IPFILTER_LOOKUP + if (fr->fr_datype == FRI_LOOKUP) { + i = (*fr->fr_dstfunc)(fr->fr_dstptr, fi->fi_v, lip); + if (i == -1) + return 1; + lip += 3; + lm += 3; + ld += 3; + } else { +#endif + i = ((*lip & *lm) != *ld); + FR_DEBUG(("3a. %#08x & %#08x != %#08x\n", + *lip, *lm, *ld)); + if (fi->fi_v == 6) { + lip++, lm++, ld++; + i |= ((*lip & *lm) != *ld); + FR_DEBUG(("3b. %#08x & %#08x != %#08x\n", + *lip, *lm, *ld)); + lip++, lm++, ld++; + i |= ((*lip & *lm) != *ld); + FR_DEBUG(("3c. %#08x & %#08x != %#08x\n", + *lip, *lm, *ld)); + lip++, lm++, ld++; + i |= ((*lip & *lm) != *ld); + FR_DEBUG(("3d. %#08x & %#08x != %#08x\n", + *lip, *lm, *ld)); + } else { + lip += 3; + lm += 3; + ld += 3; + } +#ifdef IPFILTER_LOOKUP + } +#endif + i ^= (fr->fr_flags & FR_NOTDSTIP) >> 7; + if (i) + return 1; + /* + * IP addresses matched. The next 32bits contains: + * mast of old IP header security & authentication bits. + */ + lip++, lm++, ld++; + i |= ((*lip & *lm) != *ld); + FR_DEBUG(("4. %#08x & %#08x != %#08x\n", + *lip, *lm, *ld)); + + /* + * Next we have 32 bits of packet flags. + */ + lip++, lm++, ld++; + i |= ((*lip & *lm) != *ld); + FR_DEBUG(("5. %#08x & %#08x != %#08x\n", + *lip, *lm, *ld)); + + if (i == 0) { + /* + * If a fragment, then only the first has what we're + * looking for here... + */ + if (portcmp) { + if (!fr_tcpudpchk(fin, &fr->fr_tuc)) + i = 1; + } else { + if (fr->fr_dcmp || fr->fr_scmp || + fr->fr_tcpf || fr->fr_tcpfm) + i = 1; + if (fr->fr_icmpm || fr->fr_icmp) { + if (((fi->fi_p != IPPROTO_ICMP) && + (fi->fi_p != IPPROTO_ICMPV6)) || + fin->fin_off || (fin->fin_dlen < 2)) + i = 1; + else if ((fin->fin_data[0] & fr->fr_icmpm) != + fr->fr_icmp) { + FR_DEBUG(("i. %#x & %#x != %#x\n", + fin->fin_data[0], + fr->fr_icmpm, fr->fr_icmp)); + i = 1; + } + } + } + } + return i; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_scanlist */ +/* Returns: int - result flags of scanning filter list */ +/* Parameters: fin(I) - pointer to packet information */ +/* pass(I) - default result to return for filtering */ +/* */ +/* Check the input/output list of rules for a match to the current packet. */ +/* If a match is found, the value of fr_flags from the rule becomes the */ +/* return value and fin->fin_fr points to the matched rule. */ +/* */ +/* This function may be called recusively upto 16 times (limit inbuilt.) */ +/* When unwinding, it should finish up with fin_depth as 0. */ +/* */ +/* Could be per interface, but this gets real nasty when you don't have, */ +/* or can't easily change, the kernel source code to . */ +/* ------------------------------------------------------------------------ */ +int fr_scanlist(fin, pass) +fr_info_t *fin; +u_32_t pass; +{ + int rulen, portcmp, off, logged, skip; + struct frentry *fr, *fnext; + u_32_t passt, passo; + + /* + * Do not allow nesting deeper than 16 levels. + */ + if (fin->fin_depth >= 16) + return pass; + + fr = fin->fin_fr; + + /* + * If there are no rules in this list, return now. + */ + if (fr == NULL) + return pass; + + skip = 0; + logged = 0; + portcmp = 0; + fin->fin_depth++; + fin->fin_fr = NULL; + off = fin->fin_off; + + if ((fin->fin_flx & FI_TCPUDP) && (fin->fin_dlen > 3) && !off) + portcmp = 1; + + for (rulen = 0; fr; fr = fnext, rulen++) { + fnext = fr->fr_next; + if (skip != 0) { + FR_VERBOSE(("%d (%#x)\n", skip, fr->fr_flags)); + skip--; + continue; + } + + /* + * In all checks below, a null (zero) value in the + * filter struture is taken to mean a wildcard. + * + * check that we are working for the right interface + */ +#ifdef _KERNEL + if (fr->fr_ifa && fr->fr_ifa != fin->fin_ifp) + continue; +#else + if (opts & (OPT_VERBOSE|OPT_DEBUG)) + printf("\n"); + FR_VERBOSE(("%c", FR_ISSKIP(pass) ? 's' : + FR_ISPASS(pass) ? 'p' : + FR_ISACCOUNT(pass) ? 'A' : + FR_ISAUTH(pass) ? 'a' : + (pass & FR_NOMATCH) ? 'n' :'b')); + if (fr->fr_ifa && fr->fr_ifa != fin->fin_ifp) + continue; + FR_VERBOSE((":i")); +#endif + + switch (fr->fr_type) + { + case FR_T_IPF : + case FR_T_IPF|FR_T_BUILTIN : + if (fr_ipfcheck(fin, fr, portcmp)) + continue; + break; +#if defined(IPFILTER_BPF) + case FR_T_BPFOPC : + case FR_T_BPFOPC|FR_T_BUILTIN : + { + u_char *mc; + + if (*fin->fin_mp == NULL) + continue; + if (fin->fin_v != fr->fr_v) + continue; + mc = (u_char *)fin->fin_m; + if (!bpf_filter(fr->fr_data, mc, fin->fin_plen, 0)) + continue; + break; + } +#endif + case FR_T_CALLFUNC|FR_T_BUILTIN : + { + frentry_t *f; + + f = (*fr->fr_func)(fin, &pass); + if (f != NULL) + fr = f; + else + continue; + break; + } + default : + break; + } + + if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) { + if (fin->fin_nattag == NULL) + continue; + if (fr_matchtag(&fr->fr_nattag, fin->fin_nattag) == 0) + continue; + } + FR_VERBOSE(("=%s.%d *", fr->fr_group, rulen)); + + passt = fr->fr_flags; + + /* + * Allowing a rule with the "keep state" flag set to match + * packets that have been tagged "out of window" by the TCP + * state tracking is foolish as the attempt to add a new + * state entry to the table will fail. + */ + if ((passt & FR_KEEPSTATE) && (fin->fin_flx & FI_OOW)) + continue; + + /* + * If the rule is a "call now" rule, then call the function + * in the rule, if it exists and use the results from that. + * If the function pointer is bad, just make like we ignore + * it, except for increasing the hit counter. + */ + if ((passt & FR_CALLNOW) != 0) { + ATOMIC_INC64(fr->fr_hits); + if ((fr->fr_func != NULL) && + (fr->fr_func != (ipfunc_t)-1)) { + frentry_t *frs; + + frs = fin->fin_fr; + fin->fin_fr = fr; + fr = (*fr->fr_func)(fin, &passt); + if (fr == NULL) { + fin->fin_fr = frs; + continue; + } + passt = fr->fr_flags; + fin->fin_fr = fr; + } + } else { + fin->fin_fr = fr; + } + +#ifdef IPFILTER_LOG + /* + * Just log this packet... + */ + if ((passt & FR_LOGMASK) == FR_LOG) { + if (ipflog(fin, passt) == -1) { + if (passt & FR_LOGORBLOCK) { + passt &= ~FR_CMDMASK; + passt |= FR_BLOCK|FR_QUICK; + } + ATOMIC_INCL(frstats[fin->fin_out].fr_skip); + } + ATOMIC_INCL(frstats[fin->fin_out].fr_pkl); + logged = 1; + } +#endif /* IPFILTER_LOG */ + fr->fr_bytes += (U_QUAD_T)fin->fin_plen; + passo = pass; + if (FR_ISSKIP(passt)) + skip = fr->fr_arg; + else if ((passt & FR_LOGMASK) != FR_LOG) + pass = passt; + if (passt & (FR_RETICMP|FR_FAKEICMP)) + fin->fin_icode = fr->fr_icode; + FR_DEBUG(("pass %#x\n", pass)); + ATOMIC_INC64(fr->fr_hits); + fin->fin_rule = rulen; + (void) strncpy(fin->fin_group, fr->fr_group, FR_GROUPLEN); + if (fr->fr_grp != NULL) { + fin->fin_fr = *fr->fr_grp; + pass = fr_scanlist(fin, pass); + if (fin->fin_fr == NULL) { + fin->fin_rule = rulen; + (void) strncpy(fin->fin_group, fr->fr_group, + FR_GROUPLEN); + fin->fin_fr = fr; + } + if (fin->fin_flx & FI_DONTCACHE) + logged = 1; + } + + if (pass & FR_QUICK) { + /* + * Finally, if we've asked to track state for this + * packet, set it up. Add state for "quick" rules + * here so that if the action fails we can consider + * the rule to "not match" and keep on processing + * filter rules. + */ + if ((pass & FR_KEEPSTATE) && + !(fin->fin_flx & FI_STATE)) { + int out = fin->fin_out; + + if (fr_addstate(fin, NULL, 0) != NULL) { + ATOMIC_INCL(frstats[out].fr_ads); + } else { + ATOMIC_INCL(frstats[out].fr_bads); + pass = passo; + continue; + } + } + break; + } + } + if (logged) + fin->fin_flx |= FI_DONTCACHE; + fin->fin_depth--; + return pass; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_acctpkt */ +/* Returns: frentry_t* - always returns NULL */ +/* Parameters: fin(I) - pointer to packet information */ +/* passp(IO) - pointer to current/new filter decision (unused) */ +/* */ +/* Checks a packet against accounting rules, if there are any for the given */ +/* IP protocol version. */ +/* */ +/* N.B.: this function returns NULL to match the prototype used by other */ +/* functions called from the IPFilter "mainline" in fr_check(). */ +/* ------------------------------------------------------------------------ */ +frentry_t *fr_acctpkt(fin, passp) +fr_info_t *fin; +u_32_t *passp; +{ + char group[FR_GROUPLEN]; + frentry_t *fr, *frsave; + u_32_t pass, rulen; + + passp = passp; +#ifdef USE_INET6 + if (fin->fin_v == 6) + fr = ipacct6[fin->fin_out][fr_active]; + else +#endif + fr = ipacct[fin->fin_out][fr_active]; + + if (fr != NULL) { + frsave = fin->fin_fr; + bcopy(fin->fin_group, group, FR_GROUPLEN); + rulen = fin->fin_rule; + fin->fin_fr = fr; + pass = fr_scanlist(fin, FR_NOMATCH); + if (FR_ISACCOUNT(pass)) { + ATOMIC_INCL(frstats[0].fr_acct); + } + fin->fin_fr = frsave; + bcopy(group, fin->fin_group, FR_GROUPLEN); + fin->fin_rule = rulen; + } + return NULL; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_firewall */ +/* Returns: frentry_t* - returns pointer to matched rule, if no matches */ +/* were found, returns NULL. */ +/* Parameters: fin(I) - pointer to packet information */ +/* passp(IO) - pointer to current/new filter decision (unused) */ +/* */ +/* Applies an appropriate set of firewall rules to the packet, to see if */ +/* there are any matches. The first check is to see if a match can be seen */ +/* in the cache. If not, then search an appropriate list of rules. Once a */ +/* matching rule is found, take any appropriate actions as defined by the */ +/* rule - except logging. */ +/* ------------------------------------------------------------------------ */ +static frentry_t *fr_firewall(fin, passp) +fr_info_t *fin; +u_32_t *passp; +{ + frentry_t *fr; + fr_info_t *fc; + u_32_t pass; + int out; + + out = fin->fin_out; + pass = *passp; + + /* + * If a packet is found in the auth table, then skip checking + * the access lists for permission but we do need to consider + * the result as if it were from the ACL's. + */ + fc = &frcache[out][CACHE_HASH(fin)]; + READ_ENTER(&ipf_frcache); + if (!bcmp((char *)fin, (char *)fc, FI_CSIZE)) { + /* + * copy cached data so we can unlock the mutexes earlier. + */ + bcopy((char *)fc, (char *)fin, FI_COPYSIZE); + RWLOCK_EXIT(&ipf_frcache); + ATOMIC_INCL(frstats[out].fr_chit); + + if ((fr = fin->fin_fr) != NULL) { + ATOMIC_INC64(fr->fr_hits); + pass = fr->fr_flags; + } + } else { + RWLOCK_EXIT(&ipf_frcache); + +#ifdef USE_INET6 + if (fin->fin_v == 6) + fin->fin_fr = ipfilter6[out][fr_active]; + else +#endif + fin->fin_fr = ipfilter[out][fr_active]; + if (fin->fin_fr != NULL) + pass = fr_scanlist(fin, fr_pass); + + if (((pass & FR_KEEPSTATE) == 0) && + ((fin->fin_flx & FI_DONTCACHE) == 0)) { + WRITE_ENTER(&ipf_frcache); + bcopy((char *)fin, (char *)fc, FI_COPYSIZE); + RWLOCK_EXIT(&ipf_frcache); + } + if ((pass & FR_NOMATCH)) { + ATOMIC_INCL(frstats[out].fr_nom); + } + fr = fin->fin_fr; + } + + /* + * Apply packets per second rate-limiting to a rule as required. + */ + if ((fr != NULL) && (fr->fr_pps != 0) && + !ppsratecheck(&fr->fr_lastpkt, &fr->fr_curpps, fr->fr_pps)) { + pass &= ~(FR_CMDMASK|FR_DUP|FR_RETICMP|FR_RETRST); + pass |= FR_BLOCK; + ATOMIC_INCL(frstats[out].fr_ppshit); + } + + /* + * If we fail to add a packet to the authorization queue, then we + * drop the packet later. However, if it was added then pretend + * we've dropped it already. + */ + if (FR_ISAUTH(pass)) { + if (fr_newauth(fin->fin_m, fin) != 0) { +#ifdef _KERNEL + fin->fin_m = *fin->fin_mp = NULL; +#else + ; +#endif + fin->fin_error = 0; + } else + fin->fin_error = ENOSPC; + } + + if ((fr != NULL) && (fr->fr_func != NULL) && + (fr->fr_func != (ipfunc_t)-1) && !(pass & FR_CALLNOW)) + (void) (*fr->fr_func)(fin, &pass); + + /* + * If a rule is a pre-auth rule, check again in the list of rules + * loaded for authenticated use. It does not particulary matter + * if this search fails because a "preauth" result, from a rule, + * is treated as "not a pass", hence the packet is blocked. + */ + if (FR_ISPREAUTH(pass)) { + if ((fin->fin_fr = ipauth) != NULL) + pass = fr_scanlist(fin, fr_pass); + } + + /* + * If the rule has "keep frag" and the packet is actually a fragment, + * then create a fragment state entry. + */ + if ((pass & (FR_KEEPFRAG|FR_KEEPSTATE)) == FR_KEEPFRAG) { + if (fin->fin_flx & FI_FRAG) { + if (fr_newfrag(fin, pass) == -1) { + ATOMIC_INCL(frstats[out].fr_bnfr); + } else { + ATOMIC_INCL(frstats[out].fr_nfr); + } + } else { + ATOMIC_INCL(frstats[out].fr_cfr); + } + } + + /* + * Finally, if we've asked to track state for this packet, set it up. + */ + if ((pass & FR_KEEPSTATE) && !(fin->fin_flx & FI_STATE)) { + if (fr_addstate(fin, NULL, 0) != NULL) { + ATOMIC_INCL(frstats[out].fr_ads); + } else { + ATOMIC_INCL(frstats[out].fr_bads); + if (FR_ISPASS(pass)) { + pass &= ~FR_CMDMASK; + pass |= FR_BLOCK; + } + } + } + + fr = fin->fin_fr; + + if (passp != NULL) + *passp = pass; + + return fr; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_check */ +/* Returns: int - 0 == packet allowed through, */ +/* User space: */ +/* -1 == packet blocked */ +/* 1 == packet not matched */ +/* -2 == requires authentication */ +/* Kernel: */ +/* > 0 == filter error # for packet */ +/* Parameters: ip(I) - pointer to start of IPv4/6 packet */ +/* hlen(I) - length of header */ +/* ifp(I) - pointer to interface this packet is on */ +/* out(I) - 0 == packet going in, 1 == packet going out */ +/* mp(IO) - pointer to caller's buffer pointer that holds this */ +/* IP packet. */ +/* Solaris & HP-UX ONLY : */ +/* qpi(I) - pointer to STREAMS queue information for this */ +/* interface & direction. */ +/* */ +/* fr_check() is the master function for all IPFilter packet processing. */ +/* It orchestrates: Network Address Translation (NAT), checking for packet */ +/* authorisation (or pre-authorisation), presence of related state info., */ +/* generating log entries, IP packet accounting, routing of packets as */ +/* directed by firewall rules and of course whether or not to allow the */ +/* packet to be further processed by the kernel. */ +/* */ +/* For packets blocked, the contents of "mp" will be NULL'd and the buffer */ +/* freed. Packets passed may be returned with the pointer pointed to by */ +/* by "mp" changed to a new buffer. */ +/* ------------------------------------------------------------------------ */ +int fr_check(ip, hlen, ifp, out +#if defined(_KERNEL) && defined(MENTAT) +, qif, mp) +void *qif; +#else +, mp) +#endif +mb_t **mp; +ip_t *ip; +int hlen; +void *ifp; +int out; +{ + /* + * The above really sucks, but short of writing a diff + */ + fr_info_t frinfo; + fr_info_t *fin = &frinfo; + u_32_t pass = fr_pass; + frentry_t *fr = NULL; + int v = IP_V(ip); + mb_t *mc = NULL; + mb_t *m; +#ifdef USE_INET6 + ip6_t *ip6; +#endif +#ifdef _KERNEL +# ifdef MENTAT + qpktinfo_t *qpi = qif; +#endif +#endif + SPL_INT(s); + + /* + * The first part of fr_check() deals with making sure that what goes + * into the filtering engine makes some sense. Information about the + * the packet is distilled, collected into a fr_info_t structure and + * the an attempt to ensure the buffer the packet is in is big enough + * to hold all the required packet headers. + */ +#ifdef _KERNEL +# ifdef MENTAT + if (!OK_32PTR(ip)) + return 2; +# endif + + READ_ENTER(&ipf_global); + + if (fr_running <= 0) { + RWLOCK_EXIT(&ipf_global); + return 0; + } + + bzero((char *)fin, sizeof(*fin)); + +# ifdef MENTAT + if (qpi->qpi_flags & QF_GROUP) + fin->fin_flx |= FI_MBCAST; + m = qpi->qpi_m; + fin->fin_qfm = m; + fin->fin_qpi = qpi; +# else /* MENTAT */ + + m = *mp; + +# if defined(M_MCAST) + if ((m->m_flags & M_MCAST) != 0) + fin->fin_flx |= FI_MBCAST|FI_MULTICAST; +# endif +# if defined(M_MLOOP) + if ((m->m_flags & M_MLOOP) != 0) + fin->fin_flx |= FI_MBCAST|FI_MULTICAST; +# endif +# if defined(M_BCAST) + if ((m->m_flags & M_BCAST) != 0) + fin->fin_flx |= FI_MBCAST|FI_BROADCAST; +# endif +# ifdef M_CANFASTFWD + /* + * XXX For now, IP Filter and fast-forwarding of cached flows + * XXX are mutually exclusive. Eventually, IP Filter should + * XXX get a "can-fast-forward" filter rule. + */ + m->m_flags &= ~M_CANFASTFWD; +# endif /* M_CANFASTFWD */ +# ifdef CSUM_DELAY_DATA + /* + * disable delayed checksums. + */ + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + in_delayed_cksum(m); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + } +# endif /* CSUM_DELAY_DATA */ +# endif /* MENTAT */ +#else + READ_ENTER(&ipf_global); + + bzero((char *)fin, sizeof(*fin)); + m = *mp; +#endif /* _KERNEL */ + + fin->fin_v = v; + fin->fin_m = m; + fin->fin_ip = ip; + fin->fin_mp = mp; + fin->fin_out = out; + fin->fin_ifp = ifp; + fin->fin_error = ENETUNREACH; + fin->fin_hlen = (u_short)hlen; + fin->fin_dp = (char *)ip + hlen; + + fin->fin_ipoff = (char *)ip - MTOD(m, char *); + + SPL_NET(s); + +#ifdef USE_INET6 + if (v == 6) { + ATOMIC_INCL(frstats[out].fr_ipv6); + /* + * Jumbo grams are quite likely too big for internal buffer + * structures to handle comfortably, for now, so just drop + * them. + */ + ip6 = (ip6_t *)ip; + fin->fin_plen = ntohs(ip6->ip6_plen); + if (fin->fin_plen == 0) { + pass = FR_BLOCK|FR_NOMATCH; + goto filtered; + } + fin->fin_plen += sizeof(ip6_t); + } else +#endif + { +#if (OpenBSD >= 200311) && defined(_KERNEL) + ip->ip_len = ntohs(ip->ip_len); + ip->ip_off = ntohs(ip->ip_off); +#endif + fin->fin_plen = ip->ip_len; + } + + if (fr_makefrip(hlen, ip, fin) == -1) { + READ_ENTER(&ipf_mutex); + pass = FR_BLOCK; + goto filtered; + } + + /* + * For at least IPv6 packets, if a m_pullup() fails then this pointer + * becomes NULL and so we have no packet to free. + */ + if (*fin->fin_mp == NULL) + goto finished; + + if (!out) { + if (v == 4) { +#ifdef _KERNEL + if (fr_chksrc && !fr_verifysrc(fin)) { + ATOMIC_INCL(frstats[0].fr_badsrc); + fin->fin_flx |= FI_BADSRC; + } +#endif + if (fin->fin_ip->ip_ttl < fr_minttl) { + ATOMIC_INCL(frstats[0].fr_badttl); + fin->fin_flx |= FI_LOWTTL; + } + } +#ifdef USE_INET6 + else if (v == 6) { + ip6 = (ip6_t *)ip; +#ifdef _KERNEL + if (fr_chksrc && !fr_verifysrc(fin)) { + ATOMIC_INCL(frstats[0].fr_badsrc); + fin->fin_flx |= FI_BADSRC; + } +#endif + if (ip6->ip6_hlim < fr_minttl) { + ATOMIC_INCL(frstats[0].fr_badttl); + fin->fin_flx |= FI_LOWTTL; + } + } +#endif + } + + if (fin->fin_flx & FI_SHORT) { + ATOMIC_INCL(frstats[out].fr_short); + } + + READ_ENTER(&ipf_mutex); + + /* + * Check auth now. This, combined with the check below to see if apass + * is 0 is to ensure that we don't count the packet twice, which can + * otherwise occur when we reprocess it. As it is, we only count it + * after it has no auth. table matchup. This also stops NAT from + * occuring until after the packet has been auth'd. + */ + fr = fr_checkauth(fin, &pass); + if (!out) { + if (fr_checknatin(fin, &pass) == -1) { + RWLOCK_EXIT(&ipf_mutex); + goto finished; + } + } + if (!out) + (void) fr_acctpkt(fin, NULL); + + if (fr == NULL) + if ((fin->fin_flx & (FI_FRAG|FI_BAD)) == FI_FRAG) + fr = fr_knownfrag(fin, &pass); + if (fr == NULL) + fr = fr_checkstate(fin, &pass); + + if ((pass & FR_NOMATCH) || (fr == NULL)) + fr = fr_firewall(fin, &pass); + + fin->fin_fr = fr; + + /* + * Only count/translate packets which will be passed on, out the + * interface. + */ + if (out && FR_ISPASS(pass)) { + (void) fr_acctpkt(fin, NULL); + + if (fr_checknatout(fin, &pass) == -1) { + RWLOCK_EXIT(&ipf_mutex); + goto finished; + } else if ((fr_update_ipid != 0) && (v == 4)) { + if (fr_updateipid(fin) == -1) { + ATOMIC_INCL(frstats[1].fr_ipud); + pass &= ~FR_CMDMASK; + pass |= FR_BLOCK; + } else { + ATOMIC_INCL(frstats[0].fr_ipud); + } + } + } + +#ifdef IPFILTER_LOG + if ((fr_flags & FF_LOGGING) || (pass & FR_LOGMASK)) { + (void) fr_dolog(fin, &pass); + } +#endif + + if (fin->fin_state != NULL) + fr_statederef(fin, (ipstate_t **)&fin->fin_state); + + if (fin->fin_nat != NULL) + fr_natderef((nat_t **)&fin->fin_nat); + + /* + * Only allow FR_DUP to work if a rule matched - it makes no sense to + * set FR_DUP as a "default" as there are no instructions about where + * to send the packet. Use fin_m here because it may have changed + * (without an update of 'm') in prior processing. + */ + if ((fr != NULL) && (pass & FR_DUP)) { + mc = M_DUPLICATE(fin->fin_m); + } + + if (pass & (FR_RETRST|FR_RETICMP)) { + /* + * Should we return an ICMP packet to indicate error + * status passing through the packet filter ? + * WARNING: ICMP error packets AND TCP RST packets should + * ONLY be sent in repsonse to incoming packets. Sending them + * in response to outbound packets can result in a panic on + * some operating systems. + */ + if (!out) { + if (pass & FR_RETICMP) { + int dst; + + if ((pass & FR_RETMASK) == FR_FAKEICMP) + dst = 1; + else + dst = 0; + (void) fr_send_icmp_err(ICMP_UNREACH, fin, dst); + ATOMIC_INCL(frstats[0].fr_ret); + } else if (((pass & FR_RETMASK) == FR_RETRST) && + !(fin->fin_flx & FI_SHORT)) { + if (fr_send_reset(fin) == 0) { + ATOMIC_INCL(frstats[1].fr_ret); + } + } + } else { + if (pass & FR_RETRST) + fin->fin_error = ECONNRESET; + } + } + + /* + * If we didn't drop off the bottom of the list of rules (and thus + * the 'current' rule fr is not NULL), then we may have some extra + * instructions about what to do with a packet. + * Once we're finished return to our caller, freeing the packet if + * we are dropping it (* BSD ONLY *). + * Reassign m from fin_m as we may have a new buffer, now. + */ +filtered: + m = fin->fin_m; + + if (fr != NULL) { + frdest_t *fdp; + + fdp = &fr->fr_tifs[fin->fin_rev]; + + if (!out && (pass & FR_FASTROUTE)) { + /* + * For fastroute rule, no destioation interface defined + * so pass NULL as the frdest_t parameter + */ + (void) fr_fastroute(m, mp, fin, NULL); + m = *mp = NULL; + } else if ((fdp->fd_ifp != NULL) && + (fdp->fd_ifp != (struct ifnet *)-1)) { + /* this is for to rules: */ + (void) fr_fastroute(m, mp, fin, fdp); + m = *mp = NULL; + } + + /* + * Generate a duplicated packet. + */ + if (mc != NULL) + (void) fr_fastroute(mc, &mc, fin, &fr->fr_dif); + } + + /* + * This late because the likes of fr_fastroute() use fin_fr. + */ + RWLOCK_EXIT(&ipf_mutex); + +finished: + if (!FR_ISPASS(pass)) { + ATOMIC_INCL(frstats[out].fr_block); + if (*mp != NULL) { + FREE_MB_T(*mp); + m = *mp = NULL; + } + } else { + ATOMIC_INCL(frstats[out].fr_pass); +#if defined(_KERNEL) && defined(__sgi) + if ((fin->fin_hbuf != NULL) && + (mtod(fin->fin_m, struct ip *) != fin->fin_ip)) { + COPYBACK(m, 0, fin->fin_plen, fin->fin_hbuf); + } +#endif + } + + SPL_X(s); + RWLOCK_EXIT(&ipf_global); + +#ifdef _KERNEL +# if OpenBSD >= 200311 + if (FR_ISPASS(pass) && (v == 4)) { + ip = fin->fin_ip; + ip->ip_len = ntohs(ip->ip_len); + ip->ip_off = ntohs(ip->ip_off); + } +# endif + return (FR_ISPASS(pass)) ? 0 : fin->fin_error; +#else /* _KERNEL */ + FR_VERBOSE(("fin_flx %#x pass %#x ", fin->fin_flx, pass)); + if ((pass & FR_NOMATCH) != 0) + return 1; + + if ((pass & FR_RETMASK) != 0) + switch (pass & FR_RETMASK) + { + case FR_RETRST : + return 3; + case FR_RETICMP : + return 4; + case FR_FAKEICMP : + return 5; + } + + switch (pass & FR_CMDMASK) + { + case FR_PASS : + return 0; + case FR_BLOCK : + return -1; + case FR_AUTH : + return -2; + case FR_ACCOUNT : + return -3; + case FR_PREAUTH : + return -4; + } + return 2; +#endif /* _KERNEL */ +} + + +#ifdef IPFILTER_LOG +/* ------------------------------------------------------------------------ */ +/* Function: fr_dolog */ +/* Returns: frentry_t* - returns contents of fin_fr (no change made) */ +/* Parameters: fin(I) - pointer to packet information */ +/* passp(IO) - pointer to current/new filter decision (unused) */ +/* */ +/* Checks flags set to see how a packet should be logged, if it is to be */ +/* logged. Adjust statistics based on its success or not. */ +/* ------------------------------------------------------------------------ */ +frentry_t *fr_dolog(fin, passp) +fr_info_t *fin; +u_32_t *passp; +{ + u_32_t pass; + int out; + + out = fin->fin_out; + pass = *passp; + + if ((fr_flags & FF_LOGNOMATCH) && (pass & FR_NOMATCH)) { + pass |= FF_LOGNOMATCH; + ATOMIC_INCL(frstats[out].fr_npkl); + goto logit; + } else if (((pass & FR_LOGMASK) == FR_LOGP) || + (FR_ISPASS(pass) && (fr_flags & FF_LOGPASS))) { + if ((pass & FR_LOGMASK) != FR_LOGP) + pass |= FF_LOGPASS; + ATOMIC_INCL(frstats[out].fr_ppkl); + goto logit; + } else if (((pass & FR_LOGMASK) == FR_LOGB) || + (FR_ISBLOCK(pass) && (fr_flags & FF_LOGBLOCK))) { + if ((pass & FR_LOGMASK) != FR_LOGB) + pass |= FF_LOGBLOCK; + ATOMIC_INCL(frstats[out].fr_bpkl); +logit: + if (ipflog(fin, pass) == -1) { + ATOMIC_INCL(frstats[out].fr_skip); + + /* + * If the "or-block" option has been used then + * block the packet if we failed to log it. + */ + if ((pass & FR_LOGORBLOCK) && + FR_ISPASS(pass)) { + pass &= ~FR_CMDMASK; + pass |= FR_BLOCK; + } + } + *passp = pass; + } + + return fin->fin_fr; +} +#endif /* IPFILTER_LOG */ + + +/* ------------------------------------------------------------------------ */ +/* Function: ipf_cksum */ +/* Returns: u_short - IP header checksum */ +/* Parameters: addr(I) - pointer to start of buffer to checksum */ +/* len(I) - length of buffer in bytes */ +/* */ +/* Calculate the two's complement 16 bit checksum of the buffer passed. */ +/* */ +/* N.B.: addr should be 16bit aligned. */ +/* ------------------------------------------------------------------------ */ +u_short ipf_cksum(addr, len) +u_short *addr; +int len; +{ + u_32_t sum = 0; + + for (sum = 0; len > 1; len -= 2) + sum += *addr++; + + /* mop up an odd byte, if necessary */ + if (len == 1) + sum += *(u_char *)addr; + + /* + * add back carry outs from top 16 bits to low 16 bits + */ + sum = (sum >> 16) + (sum & 0xffff); /* add hi 16 to low 16 */ + sum += (sum >> 16); /* add carry */ + return (u_short)(~sum); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_cksum */ +/* Returns: u_short - layer 4 checksum */ +/* Parameters: m(I ) - pointer to buffer holding packet */ +/* ip(I) - pointer to IP header */ +/* l4proto(I) - protocol to caclulate checksum for */ +/* l4hdr(I) - pointer to layer 4 header */ +/* */ +/* Calculates the TCP checksum for the packet held in "m", using the data */ +/* in the IP header "ip" to seed it. */ +/* */ +/* NB: This function assumes we've pullup'd enough for all of the IP header */ +/* and the TCP header. We also assume that data blocks aren't allocated in */ +/* odd sizes. */ +/* */ +/* Expects ip_len to be in host byte order when called. */ +/* ------------------------------------------------------------------------ */ +u_short fr_cksum(m, ip, l4proto, l4hdr) +mb_t *m; +ip_t *ip; +int l4proto; +void *l4hdr; +{ + u_short *sp, slen, sumsave, l4hlen, *csump; + u_int sum, sum2; + int hlen; +#ifdef USE_INET6 + ip6_t *ip6; +#endif + + csump = NULL; + sumsave = 0; + l4hlen = 0; + sp = NULL; + slen = 0; + hlen = 0; + sum = 0; + + /* + * Add up IP Header portion + */ +#ifdef USE_INET6 + if (IP_V(ip) == 4) { +#endif + hlen = IP_HL(ip) << 2; + slen = ip->ip_len - hlen; + sum = htons((u_short)l4proto); + sum += htons(slen); + sp = (u_short *)&ip->ip_src; + sum += *sp++; /* ip_src */ + sum += *sp++; + sum += *sp++; /* ip_dst */ + sum += *sp++; +#ifdef USE_INET6 + } else if (IP_V(ip) == 6) { + ip6 = (ip6_t *)ip; + hlen = sizeof(*ip6); + slen = ntohs(ip6->ip6_plen); + sum = htons((u_short)l4proto); + sum += htons(slen); + sp = (u_short *)&ip6->ip6_src; + sum += *sp++; /* ip6_src */ + sum += *sp++; + sum += *sp++; + sum += *sp++; + sum += *sp++; + sum += *sp++; + sum += *sp++; + sum += *sp++; + sum += *sp++; /* ip6_dst */ + sum += *sp++; + sum += *sp++; + sum += *sp++; + sum += *sp++; + sum += *sp++; + sum += *sp++; + sum += *sp++; + } +#endif + + switch (l4proto) + { + case IPPROTO_UDP : + csump = &((udphdr_t *)l4hdr)->uh_sum; + l4hlen = sizeof(udphdr_t); + break; + + case IPPROTO_TCP : + csump = &((tcphdr_t *)l4hdr)->th_sum; + l4hlen = sizeof(tcphdr_t); + break; + case IPPROTO_ICMP : + csump = &((icmphdr_t *)l4hdr)->icmp_cksum; + l4hlen = 4; + sum = 0; + break; + default : + break; + } + + if (csump != NULL) { + sumsave = *csump; + *csump = 0; + } + + l4hlen = l4hlen; /* LINT */ + +#ifdef _KERNEL +# ifdef MENTAT + { + void *rp = m->b_rptr; + + if ((unsigned char *)ip > m->b_rptr && (unsigned char *)ip < m->b_wptr) + m->b_rptr = (u_char *)ip; + sum2 = ip_cksum(m, hlen, sum); /* hlen == offset */ + m->b_rptr = rp; + sum2 = (sum2 & 0xffff) + (sum2 >> 16); + sum2 = ~sum2 & 0xffff; + } +# else /* MENTAT */ +# if defined(BSD) || defined(sun) +# if BSD >= 199103 + m->m_data += hlen; +# else + m->m_off += hlen; +# endif + m->m_len -= hlen; + sum2 = in_cksum(m, slen); + m->m_len += hlen; +# if BSD >= 199103 + m->m_data -= hlen; +# else + m->m_off -= hlen; +# endif + /* + * Both sum and sum2 are partial sums, so combine them together. + */ + sum += ~sum2 & 0xffff; + while (sum > 0xffff) + sum = (sum & 0xffff) + (sum >> 16); + sum2 = ~sum & 0xffff; +# else /* defined(BSD) || defined(sun) */ +{ + union { + u_char c[2]; + u_short s; + } bytes; + u_short len = ip->ip_len; +# if defined(__sgi) + int add; +# endif + + /* + * Add up IP Header portion + */ + if (sp != (u_short *)l4hdr) + sp = (u_short *)l4hdr; + + switch (l4proto) + { + case IPPROTO_UDP : + sum += *sp++; /* sport */ + sum += *sp++; /* dport */ + sum += *sp++; /* udp length */ + sum += *sp++; /* checksum */ + break; + + case IPPROTO_TCP : + sum += *sp++; /* sport */ + sum += *sp++; /* dport */ + sum += *sp++; /* seq */ + sum += *sp++; + sum += *sp++; /* ack */ + sum += *sp++; + sum += *sp++; /* off */ + sum += *sp++; /* win */ + sum += *sp++; /* checksum */ + sum += *sp++; /* urp */ + break; + case IPPROTO_ICMP : + sum = *sp++; /* type/code */ + sum += *sp++; /* checksum */ + break; + } + +# ifdef __sgi + /* + * In case we had to copy the IP & TCP header out of mbufs, + * skip over the mbuf bits which are the header + */ + if ((caddr_t)ip != mtod(m, caddr_t)) { + hlen = (caddr_t)sp - (caddr_t)ip; + while (hlen) { + add = MIN(hlen, m->m_len); + sp = (u_short *)(mtod(m, caddr_t) + add); + hlen -= add; + if (add == m->m_len) { + m = m->m_next; + if (!hlen) { + if (!m) + break; + sp = mtod(m, u_short *); + } + PANIC((!m),("fr_cksum(1): not enough data")); + } + } + } +# endif + + len -= (l4hlen + hlen); + if (len <= 0) + goto nodata; + + while (len > 1) { + if (((caddr_t)sp - mtod(m, caddr_t)) >= m->m_len) { + m = m->m_next; + PANIC((!m),("fr_cksum(2): not enough data")); + sp = mtod(m, u_short *); + } + if (((caddr_t)(sp + 1) - mtod(m, caddr_t)) > m->m_len) { + bytes.c[0] = *(u_char *)sp; + m = m->m_next; + PANIC((!m),("fr_cksum(3): not enough data")); + sp = mtod(m, u_short *); + bytes.c[1] = *(u_char *)sp; + sum += bytes.s; + sp = (u_short *)((u_char *)sp + 1); + } + if ((u_long)sp & 1) { + bcopy((char *)sp++, (char *)&bytes.s, sizeof(bytes.s)); + sum += bytes.s; + } else + sum += *sp++; + len -= 2; + } + + if (len != 0) + sum += ntohs(*(u_char *)sp << 8); +nodata: + while (sum > 0xffff) + sum = (sum & 0xffff) + (sum >> 16); + sum2 = (u_short)(~sum & 0xffff); +} +# endif /* defined(BSD) || defined(sun) */ +# endif /* MENTAT */ +#else /* _KERNEL */ + for (; slen > 1; slen -= 2) + sum += *sp++; + if (slen) + sum += ntohs(*(u_char *)sp << 8); + while (sum > 0xffff) + sum = (sum & 0xffff) + (sum >> 16); + sum2 = (u_short)(~sum & 0xffff); +#endif /* _KERNEL */ + if (csump != NULL) + *csump = sumsave; + return sum2; +} + + +#if defined(_KERNEL) && ( ((BSD < 199103) && !defined(MENTAT)) || \ + defined(__sgi) ) && !defined(linux) && !defined(_AIX51) +/* + * Copyright (c) 1982, 1986, 1988, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 + * $Id: fil.c,v 2.243.2.64 2005/08/13 05:19:59 darrenr Exp $ + */ +/* + * Copy data from an mbuf chain starting "off" bytes from the beginning, + * continuing for "len" bytes, into the indicated buffer. + */ +void +m_copydata(m, off, len, cp) + mb_t *m; + int off; + int len; + caddr_t cp; +{ + unsigned count; + + if (off < 0 || len < 0) + panic("m_copydata"); + while (off > 0) { + if (m == 0) + panic("m_copydata"); + if (off < m->m_len) + break; + off -= m->m_len; + m = m->m_next; + } + while (len > 0) { + if (m == 0) + panic("m_copydata"); + count = MIN(m->m_len - off, len); + bcopy(mtod(m, caddr_t) + off, cp, count); + len -= count; + cp += count; + off = 0; + m = m->m_next; + } +} + + +/* + * Copy data from a buffer back into the indicated mbuf chain, + * starting "off" bytes from the beginning, extending the mbuf + * chain if necessary. + */ +void +m_copyback(m0, off, len, cp) + struct mbuf *m0; + int off; + int len; + caddr_t cp; +{ + int mlen; + struct mbuf *m = m0, *n; + int totlen = 0; + + if (m0 == 0) + return; + while (off > (mlen = m->m_len)) { + off -= mlen; + totlen += mlen; + if (m->m_next == 0) { + n = m_getclr(M_DONTWAIT, m->m_type); + if (n == 0) + goto out; + n->m_len = min(MLEN, len + off); + m->m_next = n; + } + m = m->m_next; + } + while (len > 0) { + mlen = min(m->m_len - off, len); + bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen); + cp += mlen; + len -= mlen; + mlen += off; + off = 0; + totlen += mlen; + if (len == 0) + break; + if (m->m_next == 0) { + n = m_get(M_DONTWAIT, m->m_type); + if (n == 0) + break; + n->m_len = min(MLEN, len); + m->m_next = n; + } + m = m->m_next; + } +out: +#if 0 + if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) + m->m_pkthdr.len = totlen; +#endif + return; +} +#endif /* (_KERNEL) && ( ((BSD < 199103) && !MENTAT) || __sgi) */ + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_findgroup */ +/* Returns: frgroup_t * - NULL = group not found, else pointer to group */ +/* Parameters: group(I) - group name to search for */ +/* unit(I) - device to which this group belongs */ +/* set(I) - which set of rules (inactive/inactive) this is */ +/* fgpp(O) - pointer to place to store pointer to the pointer */ +/* to where to add the next (last) group or where */ +/* to delete group from. */ +/* */ +/* Search amongst the defined groups for a particular group number. */ +/* ------------------------------------------------------------------------ */ +frgroup_t *fr_findgroup(group, unit, set, fgpp) +char *group; +minor_t unit; +int set; +frgroup_t ***fgpp; +{ + frgroup_t *fg, **fgp; + + /* + * Which list of groups to search in is dependent on which list of + * rules are being operated on. + */ + fgp = &ipfgroups[unit][set]; + + while ((fg = *fgp) != NULL) { + if (strncmp(group, fg->fg_name, FR_GROUPLEN) == 0) + break; + else + fgp = &fg->fg_next; + } + if (fgpp != NULL) + *fgpp = fgp; + return fg; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_addgroup */ +/* Returns: frgroup_t * - NULL == did not create group, */ +/* != NULL == pointer to the group */ +/* Parameters: num(I) - group number to add */ +/* head(I) - rule pointer that is using this as the head */ +/* flags(I) - rule flags which describe the type of rule it is */ +/* unit(I) - device to which this group will belong to */ +/* set(I) - which set of rules (inactive/inactive) this is */ +/* Write Locks: ipf_mutex */ +/* */ +/* Add a new group head, or if it already exists, increase the reference */ +/* count to it. */ +/* ------------------------------------------------------------------------ */ +frgroup_t *fr_addgroup(group, head, flags, unit, set) +char *group; +void *head; +u_32_t flags; +minor_t unit; +int set; +{ + frgroup_t *fg, **fgp; + u_32_t gflags; + + if (group == NULL) + return NULL; + + if (unit == IPL_LOGIPF && *group == '\0') + return NULL; + + fgp = NULL; + gflags = flags & FR_INOUT; + + fg = fr_findgroup(group, unit, set, &fgp); + if (fg != NULL) { + if (fg->fg_flags == 0) + fg->fg_flags = gflags; + else if (gflags != fg->fg_flags) + return NULL; + fg->fg_ref++; + return fg; + } + KMALLOC(fg, frgroup_t *); + if (fg != NULL) { + fg->fg_head = head; + fg->fg_start = NULL; + fg->fg_next = *fgp; + bcopy(group, fg->fg_name, FR_GROUPLEN); + fg->fg_flags = gflags; + fg->fg_ref = 1; + *fgp = fg; + } + return fg; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_delgroup */ +/* Returns: Nil */ +/* Parameters: group(I) - group name to delete */ +/* unit(I) - device to which this group belongs */ +/* set(I) - which set of rules (inactive/inactive) this is */ +/* Write Locks: ipf_mutex */ +/* */ +/* Attempt to delete a group head. */ +/* Only do this when its reference count reaches 0. */ +/* ------------------------------------------------------------------------ */ +void fr_delgroup(group, unit, set) +char *group; +minor_t unit; +int set; +{ + frgroup_t *fg, **fgp; + + fg = fr_findgroup(group, unit, set, &fgp); + if (fg == NULL) + return; + + fg->fg_ref--; + if (fg->fg_ref == 0) { + *fgp = fg->fg_next; + KFREE(fg); + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_getrulen */ +/* Returns: frentry_t * - NULL == not found, else pointer to rule n */ +/* Parameters: unit(I) - device for which to count the rule's number */ +/* flags(I) - which set of rules to find the rule in */ +/* group(I) - group name */ +/* n(I) - rule number to find */ +/* */ +/* Find rule # n in group # g and return a pointer to it. Return NULl if */ +/* group # g doesn't exist or there are less than n rules in the group. */ +/* ------------------------------------------------------------------------ */ +frentry_t *fr_getrulen(unit, group, n) +int unit; +char *group; +u_32_t n; +{ + frentry_t *fr; + frgroup_t *fg; + + fg = fr_findgroup(group, unit, fr_active, NULL); + if (fg == NULL) + return NULL; + for (fr = fg->fg_head; fr && n; fr = fr->fr_next, n--) + ; + if (n != 0) + return NULL; + return fr; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_rulen */ +/* Returns: int - >= 0 - rule number, -1 == search failed */ +/* Parameters: unit(I) - device for which to count the rule's number */ +/* fr(I) - pointer to rule to match */ +/* */ +/* Return the number for a rule on a specific filtering device. */ +/* ------------------------------------------------------------------------ */ +int fr_rulen(unit, fr) +int unit; +frentry_t *fr; +{ + frentry_t *fh; + frgroup_t *fg; + u_32_t n = 0; + + if (fr == NULL) + return -1; + fg = fr_findgroup(fr->fr_group, unit, fr_active, NULL); + if (fg == NULL) + return -1; + for (fh = fg->fg_head; fh; n++, fh = fh->fr_next) + if (fh == fr) + break; + if (fh == NULL) + return -1; + return n; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frflushlist */ +/* Returns: int - >= 0 - number of flushed rules */ +/* Parameters: set(I) - which set of rules (inactive/inactive) this is */ +/* unit(I) - device for which to flush rules */ +/* flags(I) - which set of rules to flush */ +/* nfreedp(O) - pointer to int where flush count is stored */ +/* listp(I) - pointer to list to flush pointer */ +/* Write Locks: ipf_mutex */ +/* */ +/* Recursively flush rules from the list, descending groups as they are */ +/* encountered. if a rule is the head of a group and it has lost all its */ +/* group members, then also delete the group reference. nfreedp is needed */ +/* to store the accumulating count of rules removed, whereas the returned */ +/* value is just the number removed from the current list. The latter is */ +/* needed to correctly adjust reference counts on rules that define groups. */ +/* */ +/* NOTE: Rules not loaded from user space cannot be flushed. */ +/* ------------------------------------------------------------------------ */ +static int frflushlist(set, unit, nfreedp, listp) +int set; +minor_t unit; +int *nfreedp; +frentry_t **listp; +{ + int freed = 0, i; + frentry_t *fp; + + while ((fp = *listp) != NULL) { + if ((fp->fr_type & FR_T_BUILTIN) || + !(fp->fr_flags & FR_COPIED)) { + listp = &fp->fr_next; + continue; + } + *listp = fp->fr_next; + if (fp->fr_grp != NULL) { + i = frflushlist(set, unit, nfreedp, fp->fr_grp); + fp->fr_ref -= i; + } + + if (fp->fr_grhead != NULL) { + fr_delgroup(fp->fr_grhead, unit, set); + *fp->fr_grhead = '\0'; + } + + ASSERT(fp->fr_ref > 0); + fp->fr_next = NULL; + if (fr_derefrule(&fp) == 0) + freed++; + } + *nfreedp += freed; + return freed; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frflush */ +/* Returns: int - >= 0 - number of flushed rules */ +/* Parameters: unit(I) - device for which to flush rules */ +/* flags(I) - which set of rules to flush */ +/* */ +/* Calls flushlist() for all filter rules (accounting, firewall - both IPv4 */ +/* and IPv6) as defined by the value of flags. */ +/* ------------------------------------------------------------------------ */ +int frflush(unit, proto, flags) +minor_t unit; +int proto, flags; +{ + int flushed = 0, set; + + WRITE_ENTER(&ipf_mutex); + bzero((char *)frcache, sizeof(frcache)); + + set = fr_active; + if ((flags & FR_INACTIVE) == FR_INACTIVE) + set = 1 - set; + + if (flags & FR_OUTQUE) { + if (proto == 0 || proto == 6) { + (void) frflushlist(set, unit, + &flushed, &ipfilter6[1][set]); + (void) frflushlist(set, unit, + &flushed, &ipacct6[1][set]); + } + if (proto == 0 || proto == 4) { + (void) frflushlist(set, unit, + &flushed, &ipfilter[1][set]); + (void) frflushlist(set, unit, + &flushed, &ipacct[1][set]); + } + } + if (flags & FR_INQUE) { + if (proto == 0 || proto == 6) { + (void) frflushlist(set, unit, + &flushed, &ipfilter6[0][set]); + (void) frflushlist(set, unit, + &flushed, &ipacct6[0][set]); + } + if (proto == 0 || proto == 4) { + (void) frflushlist(set, unit, + &flushed, &ipfilter[0][set]); + (void) frflushlist(set, unit, + &flushed, &ipacct[0][set]); + } + } + RWLOCK_EXIT(&ipf_mutex); + + if (unit == IPL_LOGIPF) { + int tmp; + + tmp = frflush(IPL_LOGCOUNT, proto, flags); + if (tmp >= 0) + flushed += tmp; + } + return flushed; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: memstr */ +/* Returns: char * - NULL if failed, != NULL pointer to matching bytes */ +/* Parameters: src(I) - pointer to byte sequence to match */ +/* dst(I) - pointer to byte sequence to search */ +/* slen(I) - match length */ +/* dlen(I) - length available to search in */ +/* */ +/* Search dst for a sequence of bytes matching those at src and extend for */ +/* slen bytes. */ +/* ------------------------------------------------------------------------ */ +char *memstr(src, dst, slen, dlen) +char *src, *dst; +int slen, dlen; +{ + char *s = NULL; + + while (dlen >= slen) { + if (bcmp(src, dst, slen) == 0) { + s = dst; + break; + } + dst++; + dlen--; + } + return s; +} +/* ------------------------------------------------------------------------ */ +/* Function: fr_fixskip */ +/* Returns: Nil */ +/* Parameters: listp(IO) - pointer to start of list with skip rule */ +/* rp(I) - rule added/removed with skip in it. */ +/* addremove(I) - adjustment (-1/+1) to make to skip count, */ +/* depending on whether a rule was just added */ +/* or removed. */ +/* */ +/* Adjust all the rules in a list which would have skip'd past the position */ +/* where we are inserting to skip to the right place given the change. */ +/* ------------------------------------------------------------------------ */ +void fr_fixskip(listp, rp, addremove) +frentry_t **listp, *rp; +int addremove; +{ + int rules, rn; + frentry_t *fp; + + rules = 0; + for (fp = *listp; (fp != NULL) && (fp != rp); fp = fp->fr_next) + rules++; + + if (!fp) + return; + + for (rn = 0, fp = *listp; fp && (fp != rp); fp = fp->fr_next, rn++) + if (FR_ISSKIP(fp->fr_flags) && (rn + fp->fr_arg >= rules)) + fp->fr_arg += addremove; +} + + +#ifdef _KERNEL +/* ------------------------------------------------------------------------ */ +/* Function: count4bits */ +/* Returns: int - >= 0 - number of consecutive bits in input */ +/* Parameters: ip(I) - 32bit IP address */ +/* */ +/* IPv4 ONLY */ +/* count consecutive 1's in bit mask. If the mask generated by counting */ +/* consecutive 1's is different to that passed, return -1, else return # */ +/* of bits. */ +/* ------------------------------------------------------------------------ */ +int count4bits(ip) +u_32_t ip; +{ + u_32_t ipn; + int cnt = 0, i, j; + + ip = ipn = ntohl(ip); + for (i = 32; i; i--, ipn *= 2) + if (ipn & 0x80000000) + cnt++; + else + break; + ipn = 0; + for (i = 32, j = cnt; i; i--, j--) { + ipn *= 2; + if (j > 0) + ipn++; + } + if (ipn == ip) + return cnt; + return -1; +} + + +#ifdef USE_INET6 +/* ------------------------------------------------------------------------ */ +/* Function: count6bits */ +/* Returns: int - >= 0 - number of consecutive bits in input */ +/* Parameters: msk(I) - pointer to start of IPv6 bitmask */ +/* */ +/* IPv6 ONLY */ +/* count consecutive 1's in bit mask. */ +/* ------------------------------------------------------------------------ */ +int count6bits(msk) +u_32_t *msk; +{ + int i = 0, k; + u_32_t j; + + for (k = 3; k >= 0; k--) + if (msk[k] == 0xffffffff) + i += 32; + else { + for (j = msk[k]; j; j <<= 1) + if (j & 0x80000000) + i++; + } + return i; +} +# endif +#endif /* _KERNEL */ + + +/* ------------------------------------------------------------------------ */ +/* Function: frsynclist */ +/* Returns: void */ +/* Parameters: fr(I) - start of filter list to sync interface names for */ +/* ifp(I) - interface pointer for limiting sync lookups */ +/* Write Locks: ipf_mutex */ +/* */ +/* Walk through a list of filter rules and resolve any interface names into */ +/* pointers. Where dynamic addresses are used, also update the IP address */ +/* used in the rule. The interface pointer is used to limit the lookups to */ +/* a specific set of matching names if it is non-NULL. */ +/* ------------------------------------------------------------------------ */ +static void frsynclist(fr, ifp) +frentry_t *fr; +void *ifp; +{ + frdest_t *fdp; + int v, i; + + for (; fr; fr = fr->fr_next) { + v = fr->fr_v; + + /* + * Lookup all the interface names that are part of the rule. + */ + for (i = 0; i < 4; i++) { + if ((ifp != NULL) && (fr->fr_ifas[i] != ifp)) + continue; + fr->fr_ifas[i] = fr_resolvenic(fr->fr_ifnames[i], v); + } + + if (fr->fr_type == FR_T_IPF) { + if (fr->fr_satype != FRI_NORMAL && + fr->fr_satype != FRI_LOOKUP) { + (void)fr_ifpaddr(v, fr->fr_satype, + fr->fr_ifas[fr->fr_sifpidx], + &fr->fr_src, &fr->fr_smsk); + } + if (fr->fr_datype != FRI_NORMAL && + fr->fr_datype != FRI_LOOKUP) { + (void)fr_ifpaddr(v, fr->fr_datype, + fr->fr_ifas[fr->fr_difpidx], + &fr->fr_dst, &fr->fr_dmsk); + } + } + + fdp = &fr->fr_tifs[0]; + if ((ifp == NULL) || (fdp->fd_ifp == ifp)) + fr_resolvedest(fdp, v); + + fdp = &fr->fr_tifs[1]; + if ((ifp == NULL) || (fdp->fd_ifp == ifp)) + fr_resolvedest(fdp, v); + + fdp = &fr->fr_dif; + if ((ifp == NULL) || (fdp->fd_ifp == ifp)) { + fr_resolvedest(fdp, v); + + fr->fr_flags &= ~FR_DUP; + if ((fdp->fd_ifp != (void *)-1) && + (fdp->fd_ifp != NULL)) + fr->fr_flags |= FR_DUP; + } + +#ifdef IPFILTER_LOOKUP + if (fr->fr_type == FR_T_IPF && fr->fr_satype == FRI_LOOKUP && + fr->fr_srcptr == NULL) { + fr->fr_srcptr = fr_resolvelookup(fr->fr_srctype, + fr->fr_srcnum, + &fr->fr_srcfunc); + } + if (fr->fr_type == FR_T_IPF && fr->fr_datype == FRI_LOOKUP && + fr->fr_dstptr == NULL) { + fr->fr_dstptr = fr_resolvelookup(fr->fr_dsttype, + fr->fr_dstnum, + &fr->fr_dstfunc); + } +#endif + } +} + + +#ifdef _KERNEL +/* ------------------------------------------------------------------------ */ +/* Function: frsync */ +/* Returns: void */ +/* Parameters: Nil */ +/* */ +/* frsync() is called when we suspect that the interface list or */ +/* information about interfaces (like IP#) has changed. Go through all */ +/* filter rules, NAT entries and the state table and check if anything */ +/* needs to be changed/updated. */ +/* ------------------------------------------------------------------------ */ +void frsync(ifp) +void *ifp; +{ + int i; + +# if !SOLARIS + fr_natsync(ifp); + fr_statesync(ifp); +# endif + + WRITE_ENTER(&ipf_mutex); + frsynclist(ipacct[0][fr_active], ifp); + frsynclist(ipacct[1][fr_active], ifp); + frsynclist(ipfilter[0][fr_active], ifp); + frsynclist(ipfilter[1][fr_active], ifp); + frsynclist(ipacct6[0][fr_active], ifp); + frsynclist(ipacct6[1][fr_active], ifp); + frsynclist(ipfilter6[0][fr_active], ifp); + frsynclist(ipfilter6[1][fr_active], ifp); + + for (i = 0; i < IPL_LOGSIZE; i++) { + frgroup_t *g; + + for (g = ipfgroups[i][0]; g != NULL; g = g->fg_next) + frsynclist(g->fg_start, ifp); + for (g = ipfgroups[i][1]; g != NULL; g = g->fg_next) + frsynclist(g->fg_start, ifp); + } + RWLOCK_EXIT(&ipf_mutex); +} + + +/* + * In the functions below, bcopy() is called because the pointer being + * copied _from_ in this instance is a pointer to a char buf (which could + * end up being unaligned) and on the kernel's local stack. + */ +/* ------------------------------------------------------------------------ */ +/* Function: copyinptr */ +/* Returns: int - 0 = success, else failure */ +/* Parameters: src(I) - pointer to the source address */ +/* dst(I) - destination address */ +/* size(I) - number of bytes to copy */ +/* */ +/* Copy a block of data in from user space, given a pointer to the pointer */ +/* to start copying from (src) and a pointer to where to store it (dst). */ +/* NB: src - pointer to user space pointer, dst - kernel space pointer */ +/* ------------------------------------------------------------------------ */ +int copyinptr(src, dst, size) +void *src, *dst; +size_t size; +{ + caddr_t ca; + int err; + +# if SOLARIS + err = COPYIN(src, (caddr_t)&ca, sizeof(ca)); + if (err != 0) + return err; +# else + bcopy(src, (caddr_t)&ca, sizeof(ca)); +# endif + err = COPYIN(ca, dst, size); + return err; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: copyoutptr */ +/* Returns: int - 0 = success, else failure */ +/* Parameters: src(I) - pointer to the source address */ +/* dst(I) - destination address */ +/* size(I) - number of bytes to copy */ +/* */ +/* Copy a block of data out to user space, given a pointer to the pointer */ +/* to start copying from (src) and a pointer to where to store it (dst). */ +/* NB: src - kernel space pointer, dst - pointer to user space pointer. */ +/* ------------------------------------------------------------------------ */ +int copyoutptr(src, dst, size) +void *src, *dst; +size_t size; +{ + caddr_t ca; + int err; + +# if SOLARIS + err = COPYIN(dst, (caddr_t)&ca, sizeof(ca)); + if (err != 0) + return err; +# else + bcopy(dst, (caddr_t)&ca, sizeof(ca)); +# endif + err = COPYOUT(src, ca, size); + return err; +} +#endif + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_lock */ +/* Returns: (void) */ +/* Parameters: data(I) - pointer to lock value to set */ +/* lockp(O) - pointer to location to store old lock value */ +/* */ +/* Get the new value for the lock integer, set it and return the old value */ +/* in *lockp. */ +/* ------------------------------------------------------------------------ */ +void fr_lock(data, lockp) +caddr_t data; +int *lockp; +{ + int arg; + + BCOPYIN(data, (caddr_t)&arg, sizeof(arg)); + BCOPYOUT((caddr_t)lockp, data, sizeof(*lockp)); + *lockp = arg; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_getstat */ +/* Returns: Nil */ +/* Parameters: fiop(I) - pointer to ipfilter stats structure */ +/* */ +/* Stores a copy of current pointers, counters, etc, in the friostat */ +/* structure. */ +/* ------------------------------------------------------------------------ */ +void fr_getstat(fiop) +friostat_t *fiop; +{ + int i, j; + + bcopy((char *)frstats, (char *)fiop->f_st, sizeof(filterstats_t) * 2); + fiop->f_locks[IPL_LOGSTATE] = fr_state_lock; + fiop->f_locks[IPL_LOGNAT] = fr_nat_lock; + fiop->f_locks[IPL_LOGIPF] = fr_frag_lock; + fiop->f_locks[IPL_LOGAUTH] = fr_auth_lock; + + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) { + fiop->f_ipf[i][j] = ipfilter[i][j]; + fiop->f_acct[i][j] = ipacct[i][j]; + fiop->f_ipf6[i][j] = ipfilter6[i][j]; + fiop->f_acct6[i][j] = ipacct6[i][j]; + } + + fiop->f_ticks = fr_ticks; + fiop->f_active = fr_active; + fiop->f_froute[0] = fr_frouteok[0]; + fiop->f_froute[1] = fr_frouteok[1]; + + fiop->f_running = fr_running; + for (i = 0; i < IPL_LOGSIZE; i++) { + fiop->f_groups[i][0] = ipfgroups[i][0]; + fiop->f_groups[i][1] = ipfgroups[i][1]; + } +#ifdef IPFILTER_LOG + fiop->f_logging = 1; +#else + fiop->f_logging = 0; +#endif + fiop->f_defpass = fr_pass; + fiop->f_features = fr_features; + (void) strncpy(fiop->f_version, ipfilter_version, + sizeof(fiop->f_version)); +} + + +#ifdef USE_INET6 +int icmptoicmp6types[ICMP_MAXTYPE+1] = { + ICMP6_ECHO_REPLY, /* 0: ICMP_ECHOREPLY */ + -1, /* 1: UNUSED */ + -1, /* 2: UNUSED */ + ICMP6_DST_UNREACH, /* 3: ICMP_UNREACH */ + -1, /* 4: ICMP_SOURCEQUENCH */ + ND_REDIRECT, /* 5: ICMP_REDIRECT */ + -1, /* 6: UNUSED */ + -1, /* 7: UNUSED */ + ICMP6_ECHO_REQUEST, /* 8: ICMP_ECHO */ + -1, /* 9: UNUSED */ + -1, /* 10: UNUSED */ + ICMP6_TIME_EXCEEDED, /* 11: ICMP_TIMXCEED */ + ICMP6_PARAM_PROB, /* 12: ICMP_PARAMPROB */ + -1, /* 13: ICMP_TSTAMP */ + -1, /* 14: ICMP_TSTAMPREPLY */ + -1, /* 15: ICMP_IREQ */ + -1, /* 16: ICMP_IREQREPLY */ + -1, /* 17: ICMP_MASKREQ */ + -1, /* 18: ICMP_MASKREPLY */ +}; + + +int icmptoicmp6unreach[ICMP_MAX_UNREACH] = { + ICMP6_DST_UNREACH_ADDR, /* 0: ICMP_UNREACH_NET */ + ICMP6_DST_UNREACH_ADDR, /* 1: ICMP_UNREACH_HOST */ + -1, /* 2: ICMP_UNREACH_PROTOCOL */ + ICMP6_DST_UNREACH_NOPORT, /* 3: ICMP_UNREACH_PORT */ + -1, /* 4: ICMP_UNREACH_NEEDFRAG */ + ICMP6_DST_UNREACH_NOTNEIGHBOR, /* 5: ICMP_UNREACH_SRCFAIL */ + ICMP6_DST_UNREACH_ADDR, /* 6: ICMP_UNREACH_NET_UNKNOWN */ + ICMP6_DST_UNREACH_ADDR, /* 7: ICMP_UNREACH_HOST_UNKNOWN */ + -1, /* 8: ICMP_UNREACH_ISOLATED */ + ICMP6_DST_UNREACH_ADMIN, /* 9: ICMP_UNREACH_NET_PROHIB */ + ICMP6_DST_UNREACH_ADMIN, /* 10: ICMP_UNREACH_HOST_PROHIB */ + -1, /* 11: ICMP_UNREACH_TOSNET */ + -1, /* 12: ICMP_UNREACH_TOSHOST */ + ICMP6_DST_UNREACH_ADMIN, /* 13: ICMP_UNREACH_ADMIN_PROHIBIT */ +}; +int icmpreplytype6[ICMP6_MAXTYPE + 1]; +#endif + +int icmpreplytype4[ICMP_MAXTYPE + 1]; + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_matchicmpqueryreply */ +/* Returns: int - 1 if "icmp" is a valid reply to "ic" else 0. */ +/* Parameters: v(I) - IP protocol version (4 or 6) */ +/* ic(I) - ICMP information */ +/* icmp(I) - ICMP packet header */ +/* rev(I) - direction (0 = forward/1 = reverse) of packet */ +/* */ +/* Check if the ICMP packet defined by the header pointed to by icmp is a */ +/* reply to one as described by what's in ic. If it is a match, return 1, */ +/* else return 0 for no match. */ +/* ------------------------------------------------------------------------ */ +int fr_matchicmpqueryreply(v, ic, icmp, rev) +int v; +icmpinfo_t *ic; +icmphdr_t *icmp; +int rev; +{ + int ictype; + + ictype = ic->ici_type; + + if (v == 4) { + /* + * If we matched its type on the way in, then when going out + * it will still be the same type. + */ + if ((!rev && (icmp->icmp_type == ictype)) || + (rev && (icmpreplytype4[ictype] == icmp->icmp_type))) { + if (icmp->icmp_type != ICMP_ECHOREPLY) + return 1; + if (icmp->icmp_id == ic->ici_id) + return 1; + } + } +#ifdef USE_INET6 + else if (v == 6) { + if ((!rev && (icmp->icmp_type == ictype)) || + (rev && (icmpreplytype6[ictype] == icmp->icmp_type))) { + if (icmp->icmp_type != ICMP6_ECHO_REPLY) + return 1; + if (icmp->icmp_id == ic->ici_id) + return 1; + } + } +#endif + return 0; +} + + +#ifdef IPFILTER_LOOKUP +/* ------------------------------------------------------------------------ */ +/* Function: fr_resolvelookup */ +/* Returns: void * - NULL = failure, else success. */ +/* Parameters: type(I) - type of lookup these parameters are for. */ +/* number(I) - table number to use when searching */ +/* funcptr(IO) - pointer to pointer for storing IP address */ +/* searching function. */ +/* */ +/* Search for the "table" number passed in amongst those configured for */ +/* that particular type. If the type is recognised then the function to */ +/* call to do the IP address search will be change, regardless of whether */ +/* or not the "table" number exists. */ +/* ------------------------------------------------------------------------ */ +static void *fr_resolvelookup(type, number, funcptr) +u_int type, number; +lookupfunc_t *funcptr; +{ + char name[FR_GROUPLEN]; + iphtable_t *iph; + ip_pool_t *ipo; + void *ptr; + +#if defined(SNPRINTF) && defined(_KERNEL) + (void) SNPRINTF(name, sizeof(name), "%u", number); +#else + (void) sprintf(name, "%u", number); +#endif + + READ_ENTER(&ip_poolrw); + + switch (type) + { + case IPLT_POOL : +# if (defined(__osf__) && defined(_KERNEL)) + ptr = NULL; + *funcptr = NULL; +# else + ipo = ip_pool_find(IPL_LOGIPF, name); + ptr = ipo; + if (ipo != NULL) { + ATOMIC_INC32(ipo->ipo_ref); + } + *funcptr = ip_pool_search; +# endif + break; + case IPLT_HASH : + iph = fr_findhtable(IPL_LOGIPF, name); + ptr = iph; + if (iph != NULL) { + ATOMIC_INC32(iph->iph_ref); + } + *funcptr = fr_iphmfindip; + break; + default: + ptr = NULL; + *funcptr = NULL; + break; + } + RWLOCK_EXIT(&ip_poolrw); + + return ptr; +} +#endif + + +/* ------------------------------------------------------------------------ */ +/* Function: frrequest */ +/* Returns: int - 0 == success, > 0 == errno value */ +/* Parameters: unit(I) - device for which this is for */ +/* req(I) - ioctl command (SIOC*) */ +/* data(I) - pointr to ioctl data */ +/* set(I) - 1 or 0 (filter set) */ +/* makecopy(I) - flag indicating whether data points to a rule */ +/* in kernel space & hence doesn't need copying. */ +/* */ +/* This function handles all the requests which operate on the list of */ +/* filter rules. This includes adding, deleting, insertion. It is also */ +/* responsible for creating groups when a "head" rule is loaded. Interface */ +/* names are resolved here and other sanity checks are made on the content */ +/* of the rule structure being loaded. If a rule has user defined timeouts */ +/* then make sure they are created and initialised before exiting. */ +/* ------------------------------------------------------------------------ */ +int frrequest(unit, req, data, set, makecopy) +int unit; +ioctlcmd_t req; +int set, makecopy; +caddr_t data; +{ + frentry_t frd, *fp, *f, **fprev, **ftail; + int error = 0, in, v; + void *ptr, *uptr; + u_int *p, *pp; + frgroup_t *fg; + char *group; + + fg = NULL; + fp = &frd; + if (makecopy != 0) { + error = fr_inobj(data, fp, IPFOBJ_FRENTRY); + if (error) + return EFAULT; + if ((fp->fr_flags & FR_T_BUILTIN) != 0) + return EINVAL; + fp->fr_ref = 0; + fp->fr_flags |= FR_COPIED; + } else { + fp = (frentry_t *)data; + if ((fp->fr_type & FR_T_BUILTIN) == 0) + return EINVAL; + fp->fr_flags &= ~FR_COPIED; + } + + if (((fp->fr_dsize == 0) && (fp->fr_data != NULL)) || + ((fp->fr_dsize != 0) && (fp->fr_data == NULL))) + return EINVAL; + + v = fp->fr_v; + uptr = fp->fr_data; + + /* + * Only filter rules for IPv4 or IPv6 are accepted. + */ + if (v == 4) + /*EMPTY*/; +#ifdef USE_INET6 + else if (v == 6) + /*EMPTY*/; +#endif + else { + return EINVAL; + } + + /* + * If the rule is being loaded from user space, i.e. we had to copy it + * into kernel space, then do not trust the function pointer in the + * rule. + */ + if ((makecopy == 1) && (fp->fr_func != NULL)) { + if (fr_findfunc(fp->fr_func) == NULL) + return ESRCH; + error = fr_funcinit(fp); + if (error != 0) + return error; + } + + ptr = NULL; + /* + * Check that the group number does exist and that its use (in/out) + * matches what the rule is. + */ + if (!strncmp(fp->fr_grhead, "0", FR_GROUPLEN)) + *fp->fr_grhead = '\0'; + group = fp->fr_group; + if (!strncmp(group, "0", FR_GROUPLEN)) + *group = '\0'; + + if (FR_ISACCOUNT(fp->fr_flags)) + unit = IPL_LOGCOUNT; + + if ((req != (int)SIOCZRLST) && (*group != '\0')) { + fg = fr_findgroup(group, unit, set, NULL); + if (fg == NULL) + return ESRCH; + if (fg->fg_flags == 0) + fg->fg_flags = fp->fr_flags & FR_INOUT; + else if (fg->fg_flags != (fp->fr_flags & FR_INOUT)) + return ESRCH; + } + + in = (fp->fr_flags & FR_INQUE) ? 0 : 1; + + /* + * Work out which rule list this change is being applied to. + */ + ftail = NULL; + fprev = NULL; + if (unit == IPL_LOGAUTH) + fprev = &ipauth; + else if (v == 4) { + if (FR_ISACCOUNT(fp->fr_flags)) + fprev = &ipacct[in][set]; + else if ((fp->fr_flags & (FR_OUTQUE|FR_INQUE)) != 0) + fprev = &ipfilter[in][set]; + } else if (v == 6) { + if (FR_ISACCOUNT(fp->fr_flags)) + fprev = &ipacct6[in][set]; + else if ((fp->fr_flags & (FR_OUTQUE|FR_INQUE)) != 0) + fprev = &ipfilter6[in][set]; + } + if (fprev == NULL) + return ESRCH; + + if (*group != '\0') { + if (!fg && !(fg = fr_findgroup(group, unit, set, NULL))) + return ESRCH; + fprev = &fg->fg_start; + } + + ftail = fprev; + for (f = *ftail; (f = *ftail) != NULL; ftail = &f->fr_next) { + if (fp->fr_collect <= f->fr_collect) { + ftail = fprev; + f = NULL; + break; + } + fprev = ftail; + } + + /* + * Copy in extra data for the rule. + */ + if (fp->fr_dsize != 0) { + if (makecopy != 0) { + KMALLOCS(ptr, void *, fp->fr_dsize); + if (!ptr) + return ENOMEM; + error = COPYIN(uptr, ptr, fp->fr_dsize); + } else { + ptr = uptr; + error = 0; + } + if (error != 0) { + KFREES(ptr, fp->fr_dsize); + return ENOMEM; + } + fp->fr_data = ptr; + } else + fp->fr_data = NULL; + + /* + * Perform per-rule type sanity checks of their members. + */ + switch (fp->fr_type & ~FR_T_BUILTIN) + { +#if defined(IPFILTER_BPF) + case FR_T_BPFOPC : + if (fp->fr_dsize == 0) + return EINVAL; + if (!bpf_validate(ptr, fp->fr_dsize/sizeof(struct bpf_insn))) { + if (makecopy && fp->fr_data != NULL) { + KFREES(fp->fr_data, fp->fr_dsize); + } + return EINVAL; + } + break; +#endif + case FR_T_IPF : + if (fp->fr_dsize != sizeof(fripf_t)) + return EINVAL; + + /* + * Allowing a rule with both "keep state" and "with oow" is + * pointless because adding a state entry to the table will + * fail with the out of window (oow) flag set. + */ + if ((fp->fr_flags & FR_KEEPSTATE) && (fp->fr_flx & FI_OOW)) + return EINVAL; + + switch (fp->fr_satype) + { + case FRI_BROADCAST : + case FRI_DYNAMIC : + case FRI_NETWORK : + case FRI_NETMASKED : + case FRI_PEERADDR : + if (fp->fr_sifpidx < 0 || fp->fr_sifpidx > 3) { + if (makecopy && fp->fr_data != NULL) { + KFREES(fp->fr_data, fp->fr_dsize); + } + return EINVAL; + } + break; +#ifdef IPFILTER_LOOKUP + case FRI_LOOKUP : + fp->fr_srcptr = fr_resolvelookup(fp->fr_srctype, + fp->fr_srcnum, + &fp->fr_srcfunc); + break; +#endif + default : + break; + } + + switch (fp->fr_datype) + { + case FRI_BROADCAST : + case FRI_DYNAMIC : + case FRI_NETWORK : + case FRI_NETMASKED : + case FRI_PEERADDR : + if (fp->fr_difpidx < 0 || fp->fr_difpidx > 3) { + if (makecopy && fp->fr_data != NULL) { + KFREES(fp->fr_data, fp->fr_dsize); + } + return EINVAL; + } + break; +#ifdef IPFILTER_LOOKUP + case FRI_LOOKUP : + fp->fr_dstptr = fr_resolvelookup(fp->fr_dsttype, + fp->fr_dstnum, + &fp->fr_dstfunc); + break; +#endif + default : + break; + } + break; + case FR_T_NONE : + break; + case FR_T_CALLFUNC : + break; + case FR_T_COMPIPF : + break; + default : + if (makecopy && fp->fr_data != NULL) { + KFREES(fp->fr_data, fp->fr_dsize); + } + return EINVAL; + } + + /* + * Lookup all the interface names that are part of the rule. + */ + frsynclist(fp, NULL); + fp->fr_statecnt = 0; + + /* + * Look for an existing matching filter rule, but don't include the + * next or interface pointer in the comparison (fr_next, fr_ifa). + * This elminates rules which are indentical being loaded. Checksum + * the constant part of the filter rule to make comparisons quicker + * (this meaning no pointers are included). + */ + for (fp->fr_cksum = 0, p = (u_int *)&fp->fr_func, pp = &fp->fr_cksum; + p < pp; p++) + fp->fr_cksum += *p; + pp = (u_int *)(fp->fr_caddr + fp->fr_dsize); + for (p = (u_int *)fp->fr_data; p < pp; p++) + fp->fr_cksum += *p; + + WRITE_ENTER(&ipf_mutex); + bzero((char *)frcache, sizeof(frcache)); + + for (; (f = *ftail) != NULL; ftail = &f->fr_next) { + if ((fp->fr_cksum != f->fr_cksum) || + (f->fr_dsize != fp->fr_dsize)) + continue; + if (bcmp((char *)&f->fr_func, (char *)&fp->fr_func, FR_CMPSIZ)) + continue; + if ((!ptr && !f->fr_data) || + (ptr && f->fr_data && + !bcmp((char *)ptr, (char *)f->fr_data, f->fr_dsize))) + break; + } + + /* + * If zero'ing statistics, copy current to caller and zero. + */ + if (req == (ioctlcmd_t)SIOCZRLST) { + if (f == NULL) + error = ESRCH; + else { + /* + * Copy and reduce lock because of impending copyout. + * Well we should, but if we do then the atomicity of + * this call and the correctness of fr_hits and + * fr_bytes cannot be guaranteed. As it is, this code + * only resets them to 0 if they are successfully + * copied out into user space. + */ + bcopy((char *)f, (char *)fp, sizeof(*f)); + /* MUTEX_DOWNGRADE(&ipf_mutex); */ + + /* + * When we copy this rule back out, set the data + * pointer to be what it was in user space. + */ + fp->fr_data = uptr; + error = fr_outobj(data, fp, IPFOBJ_FRENTRY); + + if (error == 0) { + if ((f->fr_dsize != 0) && (uptr != NULL)) + error = COPYOUT(f->fr_data, uptr, + f->fr_dsize); + if (error == 0) { + f->fr_hits = 0; + f->fr_bytes = 0; + } + } + } + + if ((ptr != NULL) && (makecopy != 0)) { + KFREES(ptr, fp->fr_dsize); + } + RWLOCK_EXIT(&ipf_mutex); + return error; + } + + if (!f) { + /* + * At the end of this, ftail must point to the place where the + * new rule is to be saved/inserted/added. + * For SIOCAD*FR, this should be the last rule in the group of + * rules that have equal fr_collect fields. + * For SIOCIN*FR, ... + */ + if (req == (ioctlcmd_t)SIOCADAFR || + req == (ioctlcmd_t)SIOCADIFR) { + + for (ftail = fprev; (f = *ftail) != NULL; ) { + if (f->fr_collect > fp->fr_collect) + break; + ftail = &f->fr_next; + } + f = NULL; + ptr = NULL; + error = 0; + } else if (req == (ioctlcmd_t)SIOCINAFR || + req == (ioctlcmd_t)SIOCINIFR) { + while ((f = *fprev) != NULL) { + if (f->fr_collect >= fp->fr_collect) + break; + fprev = &f->fr_next; + } + ftail = fprev; + if (fp->fr_hits != 0) { + while (fp->fr_hits && (f = *ftail)) { + if (f->fr_collect != fp->fr_collect) + break; + fprev = ftail; + ftail = &f->fr_next; + fp->fr_hits--; + } + } + f = NULL; + ptr = NULL; + error = 0; + } + } + + /* + * Request to remove a rule. + */ + if (req == (ioctlcmd_t)SIOCRMAFR || req == (ioctlcmd_t)SIOCRMIFR) { + if (!f) + error = ESRCH; + else { + /* + * Do not allow activity from user space to interfere + * with rules not loaded that way. + */ + if ((makecopy == 1) && !(f->fr_flags & FR_COPIED)) { + error = EPERM; + goto done; + } + + /* + * Return EBUSY if the rule is being reference by + * something else (eg state information. + */ + if (f->fr_ref > 1) { + error = EBUSY; + goto done; + } +#ifdef IPFILTER_SCAN + if (f->fr_isctag[0] != '\0' && + (f->fr_isc != (struct ipscan *)-1)) + ipsc_detachfr(f); +#endif + if ((fg != NULL) && (fg->fg_head != NULL)) + fg->fg_head->fr_ref--; + if (unit == IPL_LOGAUTH) { + error = fr_preauthcmd(req, f, ftail); + goto done; + } + if (*f->fr_grhead != '\0') + fr_delgroup(f->fr_grhead, unit, set); + fr_fixskip(ftail, f, -1); + *ftail = f->fr_next; + f->fr_next = NULL; + (void)fr_derefrule(&f); + } + } else { + /* + * Not removing, so we must be adding/inserting a rule. + */ + if (f) + error = EEXIST; + else { + if (unit == IPL_LOGAUTH) { + error = fr_preauthcmd(req, fp, ftail); + goto done; + } + if (makecopy) { + KMALLOC(f, frentry_t *); + } else + f = fp; + if (f != NULL) { + if (fg != NULL && fg->fg_head != NULL) + fg->fg_head->fr_ref++; + if (fp != f) + bcopy((char *)fp, (char *)f, + sizeof(*f)); + MUTEX_NUKE(&f->fr_lock); + MUTEX_INIT(&f->fr_lock, "filter rule lock"); +#ifdef IPFILTER_SCAN + if (f->fr_isctag[0] != '\0' && + ipsc_attachfr(f)) + f->fr_isc = (struct ipscan *)-1; +#endif + f->fr_hits = 0; + if (makecopy != 0) + f->fr_ref = 1; + f->fr_next = *ftail; + *ftail = f; + if (req == (ioctlcmd_t)SIOCINIFR || + req == (ioctlcmd_t)SIOCINAFR) + fr_fixskip(ftail, f, 1); + f->fr_grp = NULL; + group = f->fr_grhead; + if (*group != '\0') { + fg = fr_addgroup(group, f, f->fr_flags, + unit, set); + if (fg != NULL) + f->fr_grp = &fg->fg_start; + } + } else + error = ENOMEM; + } + } +done: + RWLOCK_EXIT(&ipf_mutex); + if ((ptr != NULL) && (error != 0) && (makecopy != 0)) { + KFREES(ptr, fp->fr_dsize); + } + return (error); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_funcinit */ +/* Returns: int - 0 == success, else ESRCH: cannot resolve rule details */ +/* Parameters: fr(I) - pointer to filter rule */ +/* */ +/* If a rule is a call rule, then check if the function it points to needs */ +/* an init function to be called now the rule has been loaded. */ +/* ------------------------------------------------------------------------ */ +static int fr_funcinit(fr) +frentry_t *fr; +{ + ipfunc_resolve_t *ft; + int err; + + err = ESRCH; + + for (ft = fr_availfuncs; ft->ipfu_addr != NULL; ft++) + if (ft->ipfu_addr == fr->fr_func) { + err = 0; + if (ft->ipfu_init != NULL) + err = (*ft->ipfu_init)(fr); + break; + } + return err; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_findfunc */ +/* Returns: ipfunc_t - pointer to function if found, else NULL */ +/* Parameters: funcptr(I) - function pointer to lookup */ +/* */ +/* Look for a function in the table of known functions. */ +/* ------------------------------------------------------------------------ */ +static ipfunc_t fr_findfunc(funcptr) +ipfunc_t funcptr; +{ + ipfunc_resolve_t *ft; + + for (ft = fr_availfuncs; ft->ipfu_addr != NULL; ft++) + if (ft->ipfu_addr == funcptr) + return funcptr; + return NULL; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_resolvefunc */ +/* Returns: int - 0 == success, else error */ +/* Parameters: data(IO) - ioctl data pointer to ipfunc_resolve_t struct */ +/* */ +/* Copy in a ipfunc_resolve_t structure and then fill in the missing field. */ +/* This will either be the function name (if the pointer is set) or the */ +/* function pointer if the name is set. When found, fill in the other one */ +/* so that the entire, complete, structure can be copied back to user space.*/ +/* ------------------------------------------------------------------------ */ +int fr_resolvefunc(data) +void *data; +{ + ipfunc_resolve_t res, *ft; + + BCOPYIN(data, &res, sizeof(res)); + + if (res.ipfu_addr == NULL && res.ipfu_name[0] != '\0') { + for (ft = fr_availfuncs; ft->ipfu_addr != NULL; ft++) + if (strncmp(res.ipfu_name, ft->ipfu_name, + sizeof(res.ipfu_name)) == 0) { + res.ipfu_addr = ft->ipfu_addr; + res.ipfu_init = ft->ipfu_init; + if (COPYOUT(&res, data, sizeof(res)) != 0) + return EFAULT; + return 0; + } + } + if (res.ipfu_addr != NULL && res.ipfu_name[0] == '\0') { + for (ft = fr_availfuncs; ft->ipfu_addr != NULL; ft++) + if (ft->ipfu_addr == res.ipfu_addr) { + (void) strncpy(res.ipfu_name, ft->ipfu_name, + sizeof(res.ipfu_name)); + res.ipfu_init = ft->ipfu_init; + if (COPYOUT(&res, data, sizeof(res)) != 0) + return EFAULT; + return 0; + } + } + return ESRCH; +} + + +#if !defined(_KERNEL) || (!defined(__NetBSD__) && !defined(__OpenBSD__) && !defined(__FreeBSD__)) || \ + (defined(__FreeBSD__) && (__FreeBSD_version < 490000)) || \ + (defined(__NetBSD__) && (__NetBSD_Version__ < 105000000)) || \ + (defined(__OpenBSD__) && (OpenBSD < 200006)) +/* + * From: NetBSD + * ppsratecheck(): packets (or events) per second limitation. + */ +int +ppsratecheck(lasttime, curpps, maxpps) + struct timeval *lasttime; + int *curpps; + int maxpps; /* maximum pps allowed */ +{ + struct timeval tv, delta; + int rv; + + GETKTIME(&tv); + + delta.tv_sec = tv.tv_sec - lasttime->tv_sec; + delta.tv_usec = tv.tv_usec - lasttime->tv_usec; + if (delta.tv_usec < 0) { + delta.tv_sec--; + delta.tv_usec += 1000000; + } + + /* + * check for 0,0 is so that the message will be seen at least once. + * if more than one second have passed since the last update of + * lasttime, reset the counter. + * + * we do increment *curpps even in *curpps < maxpps case, as some may + * try to use *curpps for stat purposes as well. + */ + if ((lasttime->tv_sec == 0 && lasttime->tv_usec == 0) || + delta.tv_sec >= 1) { + *lasttime = tv; + *curpps = 0; + rv = 1; + } else if (maxpps < 0) + rv = 1; + else if (*curpps < maxpps) + rv = 1; + else + rv = 0; + *curpps = *curpps + 1; + + return (rv); +} +#endif + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_derefrule */ +/* Returns: int - 0 == rule freed up, else rule not freed */ +/* Parameters: fr(I) - pointer to filter rule */ +/* */ +/* Decrement the reference counter to a rule by one. If it reaches zero, */ +/* free it and any associated storage space being used by it. */ +/* ------------------------------------------------------------------------ */ +int fr_derefrule(frp) +frentry_t **frp; +{ + frentry_t *fr; + + fr = *frp; + + MUTEX_ENTER(&fr->fr_lock); + fr->fr_ref--; + if (fr->fr_ref == 0) { + MUTEX_EXIT(&fr->fr_lock); + MUTEX_DESTROY(&fr->fr_lock); + +#ifdef IPFILTER_LOOKUP + if (fr->fr_type == FR_T_IPF && fr->fr_satype == FRI_LOOKUP) + ip_lookup_deref(fr->fr_srctype, fr->fr_srcptr); + if (fr->fr_type == FR_T_IPF && fr->fr_datype == FRI_LOOKUP) + ip_lookup_deref(fr->fr_dsttype, fr->fr_dstptr); +#endif + + if (fr->fr_dsize) { + KFREES(fr->fr_data, fr->fr_dsize); + } + if ((fr->fr_flags & FR_COPIED) != 0) { + KFREE(fr); + return 0; + } + return 1; + } else { + MUTEX_EXIT(&fr->fr_lock); + } + *frp = NULL; + return -1; +} + + +#ifdef IPFILTER_LOOKUP +/* ------------------------------------------------------------------------ */ +/* Function: fr_grpmapinit */ +/* Returns: int - 0 == success, else ESRCH because table entry not found*/ +/* Parameters: fr(I) - pointer to rule to find hash table for */ +/* */ +/* Looks for group hash table fr_arg and stores a pointer to it in fr_ptr. */ +/* fr_ptr is later used by fr_srcgrpmap and fr_dstgrpmap. */ +/* ------------------------------------------------------------------------ */ +static int fr_grpmapinit(fr) +frentry_t *fr; +{ + char name[FR_GROUPLEN]; + iphtable_t *iph; + +#if defined(SNPRINTF) && defined(_KERNEL) + (void) SNPRINTF(name, sizeof(name), "%d", fr->fr_arg); +#else + (void) sprintf(name, "%d", fr->fr_arg); +#endif + iph = fr_findhtable(IPL_LOGIPF, name); + if (iph == NULL) + return ESRCH; + if ((iph->iph_flags & FR_INOUT) != (fr->fr_flags & FR_INOUT)) + return ESRCH; + fr->fr_ptr = iph; + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_srcgrpmap */ +/* Returns: frentry_t * - pointer to "new last matching" rule or NULL */ +/* Parameters: fin(I) - pointer to packet information */ +/* passp(IO) - pointer to current/new filter decision (unused) */ +/* */ +/* Look for a rule group head in a hash table, using the source address as */ +/* the key, and descend into that group and continue matching rules against */ +/* the packet. */ +/* ------------------------------------------------------------------------ */ +frentry_t *fr_srcgrpmap(fin, passp) +fr_info_t *fin; +u_32_t *passp; +{ + frgroup_t *fg; + void *rval; + + rval = fr_iphmfindgroup(fin->fin_fr->fr_ptr, fin->fin_v, &fin->fin_src); + if (rval == NULL) + return NULL; + + fg = rval; + fin->fin_fr = fg->fg_start; + (void) fr_scanlist(fin, *passp); + return fin->fin_fr; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_dstgrpmap */ +/* Returns: frentry_t * - pointer to "new last matching" rule or NULL */ +/* Parameters: fin(I) - pointer to packet information */ +/* passp(IO) - pointer to current/new filter decision (unused) */ +/* */ +/* Look for a rule group head in a hash table, using the destination */ +/* address as the key, and descend into that group and continue matching */ +/* rules against the packet. */ +/* ------------------------------------------------------------------------ */ +frentry_t *fr_dstgrpmap(fin, passp) +fr_info_t *fin; +u_32_t *passp; +{ + frgroup_t *fg; + void *rval; + + rval = fr_iphmfindgroup(fin->fin_fr->fr_ptr, fin->fin_v, &fin->fin_dst); + if (rval == NULL) + return NULL; + + fg = rval; + fin->fin_fr = fg->fg_start; + (void) fr_scanlist(fin, *passp); + return fin->fin_fr; +} +#endif /* IPFILTER_LOOKUP */ + +/* + * Queue functions + * =============== + * These functions manage objects on queues for efficient timeouts. There are + * a number of system defined queues as well as user defined timeouts. It is + * expected that a lock is held in the domain in which the queue belongs + * (i.e. either state or NAT) when calling any of these functions that prevents + * fr_freetimeoutqueue() from being called at the same time as any other. + */ + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_addtimeoutqueue */ +/* Returns: struct ifqtq * - NULL if malloc fails, else pointer to */ +/* timeout queue with given interval. */ +/* Parameters: parent(I) - pointer to pointer to parent node of this list */ +/* of interface queues. */ +/* seconds(I) - timeout value in seconds for this queue. */ +/* */ +/* This routine first looks for a timeout queue that matches the interval */ +/* being requested. If it finds one, increments the reference counter and */ +/* returns a pointer to it. If none are found, it allocates a new one and */ +/* inserts it at the top of the list. */ +/* */ +/* Locking. */ +/* It is assumed that the caller of this function has an appropriate lock */ +/* held (exclusively) in the domain that encompases 'parent'. */ +/* ------------------------------------------------------------------------ */ +ipftq_t *fr_addtimeoutqueue(parent, seconds) +ipftq_t **parent; +u_int seconds; +{ + ipftq_t *ifq; + u_int period; + + period = seconds * IPF_HZ_DIVIDE; + + MUTEX_ENTER(&ipf_timeoutlock); + for (ifq = *parent; ifq != NULL; ifq = ifq->ifq_next) { + if (ifq->ifq_ttl == period) { + /* + * Reset the delete flag, if set, so the structure + * gets reused rather than freed and reallocated. + */ + MUTEX_ENTER(&ifq->ifq_lock); + ifq->ifq_flags &= ~IFQF_DELETE; + ifq->ifq_ref++; + MUTEX_EXIT(&ifq->ifq_lock); + MUTEX_EXIT(&ipf_timeoutlock); + + return ifq; + } + } + + KMALLOC(ifq, ipftq_t *); + if (ifq != NULL) { + ifq->ifq_ttl = period; + ifq->ifq_head = NULL; + ifq->ifq_tail = &ifq->ifq_head; + ifq->ifq_next = *parent; + ifq->ifq_pnext = parent; + ifq->ifq_ref = 1; + ifq->ifq_flags = IFQF_USER; + *parent = ifq; + fr_userifqs++; + MUTEX_NUKE(&ifq->ifq_lock); + MUTEX_INIT(&ifq->ifq_lock, "ipftq mutex"); + } + MUTEX_EXIT(&ipf_timeoutlock); + return ifq; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_deletetimeoutqueue */ +/* Returns: int - new reference count value of the timeout queue */ +/* Parameters: ifq(I) - timeout queue which is losing a reference. */ +/* Locks: ifq->ifq_lock */ +/* */ +/* This routine must be called when we're discarding a pointer to a timeout */ +/* queue object, taking care of the reference counter. */ +/* */ +/* Now that this just sets a DELETE flag, it requires the expire code to */ +/* check the list of user defined timeout queues and call the free function */ +/* below (currently commented out) to stop memory leaking. It is done this */ +/* way because the locking may not be sufficient to safely do a free when */ +/* this function is called. */ +/* ------------------------------------------------------------------------ */ +int fr_deletetimeoutqueue(ifq) +ipftq_t *ifq; +{ + + ifq->ifq_ref--; + if ((ifq->ifq_ref == 0) && ((ifq->ifq_flags & IFQF_USER) != 0)) { + ifq->ifq_flags |= IFQF_DELETE; + } + + return ifq->ifq_ref; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_freetimeoutqueue */ +/* Parameters: ifq(I) - timeout queue which is losing a reference. */ +/* Returns: Nil */ +/* */ +/* Locking: */ +/* It is assumed that the caller of this function has an appropriate lock */ +/* held (exclusively) in the domain that encompases the callers "domain". */ +/* The ifq_lock for this structure should not be held. */ +/* */ +/* Remove a user definde timeout queue from the list of queues it is in and */ +/* tidy up after this is done. */ +/* ------------------------------------------------------------------------ */ +void fr_freetimeoutqueue(ifq) +ipftq_t *ifq; +{ + + + if (((ifq->ifq_flags & IFQF_DELETE) == 0) || (ifq->ifq_ref != 0) || + ((ifq->ifq_flags & IFQF_USER) == 0)) { + printf("fr_freetimeoutqueue(%lx) flags 0x%x ttl %d ref %d\n", + (u_long)ifq, ifq->ifq_flags, ifq->ifq_ttl, + ifq->ifq_ref); + return; + } + + /* + * Remove from its position in the list. + */ + *ifq->ifq_pnext = ifq->ifq_next; + if (ifq->ifq_next != NULL) + ifq->ifq_next->ifq_pnext = ifq->ifq_pnext; + + MUTEX_DESTROY(&ifq->ifq_lock); + fr_userifqs--; + KFREE(ifq); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_deletequeueentry */ +/* Returns: Nil */ +/* Parameters: tqe(I) - timeout queue entry to delete */ +/* ifq(I) - timeout queue to remove entry from */ +/* */ +/* Remove a tail queue entry from its queue and make it an orphan. */ +/* fr_deletetimeoutqueue is called to make sure the reference count on the */ +/* queue is correct. We can't, however, call fr_freetimeoutqueue because */ +/* the correct lock(s) may not be held that would make it safe to do so. */ +/* ------------------------------------------------------------------------ */ +void fr_deletequeueentry(tqe) +ipftqent_t *tqe; +{ + ipftq_t *ifq; + + ifq = tqe->tqe_ifq; + if (ifq == NULL) + return; + + MUTEX_ENTER(&ifq->ifq_lock); + + if (tqe->tqe_pnext != NULL) { + *tqe->tqe_pnext = tqe->tqe_next; + if (tqe->tqe_next != NULL) + tqe->tqe_next->tqe_pnext = tqe->tqe_pnext; + else /* we must be the tail anyway */ + ifq->ifq_tail = tqe->tqe_pnext; + + tqe->tqe_pnext = NULL; + tqe->tqe_ifq = NULL; + } + + (void) fr_deletetimeoutqueue(ifq); + + MUTEX_EXIT(&ifq->ifq_lock); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_queuefront */ +/* Returns: Nil */ +/* Parameters: tqe(I) - pointer to timeout queue entry */ +/* */ +/* Move a queue entry to the front of the queue, if it isn't already there. */ +/* ------------------------------------------------------------------------ */ +void fr_queuefront(tqe) +ipftqent_t *tqe; +{ + ipftq_t *ifq; + + ifq = tqe->tqe_ifq; + if (ifq == NULL) + return; + + MUTEX_ENTER(&ifq->ifq_lock); + if (ifq->ifq_head != tqe) { + *tqe->tqe_pnext = tqe->tqe_next; + if (tqe->tqe_next) + tqe->tqe_next->tqe_pnext = tqe->tqe_pnext; + else + ifq->ifq_tail = tqe->tqe_pnext; + + tqe->tqe_next = ifq->ifq_head; + ifq->ifq_head->tqe_pnext = &tqe->tqe_next; + ifq->ifq_head = tqe; + tqe->tqe_pnext = &ifq->ifq_head; + } + MUTEX_EXIT(&ifq->ifq_lock); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_queueback */ +/* Returns: Nil */ +/* Parameters: tqe(I) - pointer to timeout queue entry */ +/* */ +/* Move a queue entry to the back of the queue, if it isn't already there. */ +/* ------------------------------------------------------------------------ */ +void fr_queueback(tqe) +ipftqent_t *tqe; +{ + ipftq_t *ifq; + + ifq = tqe->tqe_ifq; + if (ifq == NULL) + return; + tqe->tqe_die = fr_ticks + ifq->ifq_ttl; + + MUTEX_ENTER(&ifq->ifq_lock); + if (tqe->tqe_next == NULL) { /* at the end already ? */ + MUTEX_EXIT(&ifq->ifq_lock); + return; + } + + /* + * Remove from list + */ + *tqe->tqe_pnext = tqe->tqe_next; + tqe->tqe_next->tqe_pnext = tqe->tqe_pnext; + + /* + * Make it the last entry. + */ + tqe->tqe_next = NULL; + tqe->tqe_pnext = ifq->ifq_tail; + *ifq->ifq_tail = tqe; + ifq->ifq_tail = &tqe->tqe_next; + MUTEX_EXIT(&ifq->ifq_lock); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_queueappend */ +/* Returns: Nil */ +/* Parameters: tqe(I) - pointer to timeout queue entry */ +/* ifq(I) - pointer to timeout queue */ +/* parent(I) - owing object pointer */ +/* */ +/* Add a new item to this queue and put it on the very end. */ +/* ------------------------------------------------------------------------ */ +void fr_queueappend(tqe, ifq, parent) +ipftqent_t *tqe; +ipftq_t *ifq; +void *parent; +{ + + MUTEX_ENTER(&ifq->ifq_lock); + tqe->tqe_parent = parent; + tqe->tqe_pnext = ifq->ifq_tail; + *ifq->ifq_tail = tqe; + ifq->ifq_tail = &tqe->tqe_next; + tqe->tqe_next = NULL; + tqe->tqe_ifq = ifq; + tqe->tqe_die = fr_ticks + ifq->ifq_ttl; + ifq->ifq_ref++; + MUTEX_EXIT(&ifq->ifq_lock); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_movequeue */ +/* Returns: Nil */ +/* Parameters: tq(I) - pointer to timeout queue information */ +/* oifp(I) - old timeout queue entry was on */ +/* nifp(I) - new timeout queue to put entry on */ +/* */ +/* Move a queue entry from one timeout queue to another timeout queue. */ +/* If it notices that the current entry is already last and does not need */ +/* to move queue, the return. */ +/* ------------------------------------------------------------------------ */ +void fr_movequeue(tqe, oifq, nifq) +ipftqent_t *tqe; +ipftq_t *oifq, *nifq; +{ + /* + * Is the operation here going to be a no-op ? + */ + MUTEX_ENTER(&oifq->ifq_lock); + if (oifq == nifq && *oifq->ifq_tail == tqe) { + MUTEX_EXIT(&oifq->ifq_lock); + return; + } + + /* + * Remove from the old queue + */ + *tqe->tqe_pnext = tqe->tqe_next; + if (tqe->tqe_next) + tqe->tqe_next->tqe_pnext = tqe->tqe_pnext; + else + oifq->ifq_tail = tqe->tqe_pnext; + tqe->tqe_next = NULL; + + /* + * If we're moving from one queue to another, release the lock on the + * old queue and get a lock on the new queue. For user defined queues, + * if we're moving off it, call delete in case it can now be freed. + */ + if (oifq != nifq) { + tqe->tqe_ifq = NULL; + + (void) fr_deletetimeoutqueue(oifq); + + MUTEX_EXIT(&oifq->ifq_lock); + + MUTEX_ENTER(&nifq->ifq_lock); + + tqe->tqe_ifq = nifq; + nifq->ifq_ref++; + } + + /* + * Add to the bottom of the new queue + */ + tqe->tqe_die = fr_ticks + nifq->ifq_ttl; + tqe->tqe_pnext = nifq->ifq_tail; + *nifq->ifq_tail = tqe; + nifq->ifq_tail = &tqe->tqe_next; + MUTEX_EXIT(&nifq->ifq_lock); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_updateipid */ +/* Returns: int - 0 == success, -1 == error (packet should be droppped) */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* When we are doing NAT, change the IP of every packet to represent a */ +/* single sequence of packets coming from the host, hiding any host */ +/* specific sequencing that might otherwise be revealed. If the packet is */ +/* a fragment, then store the 'new' IPid in the fragment cache and look up */ +/* the fragment cache for non-leading fragments. If a non-leading fragment */ +/* has no match in the cache, return an error. */ +/* ------------------------------------------------------------------------ */ +static INLINE int fr_updateipid(fin) +fr_info_t *fin; +{ + u_short id, ido, sums; + u_32_t sumd, sum; + ip_t *ip; + + if (fin->fin_off != 0) { + sum = fr_ipid_knownfrag(fin); + if (sum == 0xffffffff) + return -1; + sum &= 0xffff; + id = (u_short)sum; + } else { + id = fr_nextipid(fin); + if (fin->fin_off == 0 && (fin->fin_flx & FI_FRAG) != 0) + (void) fr_ipid_newfrag(fin, (u_32_t)id); + } + + ip = fin->fin_ip; + ido = ntohs(ip->ip_id); + if (id == ido) + return 0; + ip->ip_id = htons(id); + CALC_SUMD(ido, id, sumd); /* DESTRUCTIVE MACRO! id,ido change */ + sum = (~ntohs(ip->ip_sum)) & 0xffff; + sum += sumd; + sum = (sum >> 16) + (sum & 0xffff); + sum = (sum >> 16) + (sum & 0xffff); + sums = ~(u_short)sum; + ip->ip_sum = htons(sums); + return 0; +} + + +#ifdef NEED_FRGETIFNAME +/* ------------------------------------------------------------------------ */ +/* Function: fr_getifname */ +/* Returns: char * - pointer to interface name */ +/* Parameters: ifp(I) - pointer to network interface */ +/* buffer(O) - pointer to where to store interface name */ +/* */ +/* Constructs an interface name in the buffer passed. The buffer passed is */ +/* expected to be at least LIFNAMSIZ in bytes big. If buffer is passed in */ +/* as a NULL pointer then return a pointer to a static array. */ +/* ------------------------------------------------------------------------ */ +char *fr_getifname(ifp, buffer) +struct ifnet *ifp; +char *buffer; +{ + static char namebuf[LIFNAMSIZ]; +# if defined(MENTAT) || defined(__FreeBSD__) || defined(__osf__) || \ + defined(__sgi) || defined(linux) || defined(_AIX51) || \ + (defined(sun) && !defined(__SVR4) && !defined(__svr4__)) + int unit, space; + char temp[20]; + char *s; +# endif + + if (buffer == NULL) + buffer = namebuf; + (void) strncpy(buffer, ifp->if_name, LIFNAMSIZ); + buffer[LIFNAMSIZ - 1] = '\0'; +# if defined(MENTAT) || defined(__FreeBSD__) || defined(__osf__) || \ + defined(__sgi) || defined(_AIX51) || \ + (defined(sun) && !defined(__SVR4) && !defined(__svr4__)) + for (s = buffer; *s; s++) + ; + unit = ifp->if_unit; + space = LIFNAMSIZ - (s - buffer); + if (space > 0) { +# if defined(SNPRINTF) && defined(_KERNEL) + (void) SNPRINTF(temp, sizeof(temp), "%d", unit); +# else + (void) sprintf(temp, "%d", unit); +# endif + (void) strncpy(s, temp, space); + } +# endif + return buffer; +} +#endif + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_ioctlswitch */ +/* Returns: int - -1 continue processing, else ioctl return value */ +/* Parameters: unit(I) - device unit opened */ +/* data(I) - pointer to ioctl data */ +/* cmd(I) - ioctl command */ +/* mode(I) - mode value */ +/* */ +/* Based on the value of unit, call the appropriate ioctl handler or return */ +/* EIO if ipfilter is not running. Also checks if write perms are req'd */ +/* for the device in order to execute the ioctl. */ +/* ------------------------------------------------------------------------ */ +int fr_ioctlswitch(unit, data, cmd, mode) +int unit, mode; +ioctlcmd_t cmd; +void *data; +{ + int error = 0; + + switch (unit) + { + case IPL_LOGIPF : + error = -1; + break; + case IPL_LOGNAT : + if (fr_running > 0) + error = fr_nat_ioctl(data, cmd, mode); + else + error = EIO; + break; + case IPL_LOGSTATE : + if (fr_running > 0) + error = fr_state_ioctl(data, cmd, mode); + else + error = EIO; + break; + case IPL_LOGAUTH : + if (fr_running > 0) { + if ((cmd == (ioctlcmd_t)SIOCADAFR) || + (cmd == (ioctlcmd_t)SIOCRMAFR)) { + if (!(mode & FWRITE)) { + error = EPERM; + } else { + error = frrequest(unit, cmd, data, + fr_active, 1); + } + } else { + error = fr_auth_ioctl(data, cmd, mode); + } + } else + error = EIO; + break; + case IPL_LOGSYNC : +#ifdef IPFILTER_SYNC + if (fr_running > 0) + error = fr_sync_ioctl(data, cmd, mode); + else +#endif + error = EIO; + break; + case IPL_LOGSCAN : +#ifdef IPFILTER_SCAN + if (fr_running > 0) + error = fr_scan_ioctl(data, cmd, mode); + else +#endif + error = EIO; + break; + case IPL_LOGLOOKUP : +#ifdef IPFILTER_LOOKUP + if (fr_running > 0) + error = ip_lookup_ioctl(data, cmd, mode); + else +#endif + error = EIO; + break; + default : + error = EIO; + break; + } + + return error; +} + + +/* + * This array defines the expected size of objects coming into the kernel + * for the various recognised object types. + */ +#define NUM_OBJ_TYPES 14 + +static int fr_objbytes[NUM_OBJ_TYPES][2] = { + { 1, sizeof(struct frentry) }, /* frentry */ + { 0, sizeof(struct friostat) }, + { 0, sizeof(struct fr_info) }, + { 0, sizeof(struct fr_authstat) }, + { 0, sizeof(struct ipfrstat) }, + { 0, sizeof(struct ipnat) }, + { 0, sizeof(struct natstat) }, + { 0, sizeof(struct ipstate_save) }, + { 1, sizeof(struct nat_save) }, /* nat_save */ + { 0, sizeof(struct natlookup) }, + { 1, sizeof(struct ipstate) }, /* ipstate */ + { 0, sizeof(struct ips_stat) }, + { 0, sizeof(struct frauth) }, + { 0, sizeof(struct ipftune) } +}; + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_inobj */ +/* Returns: int - 0 = success, else failure */ +/* Parameters: data(I) - pointer to ioctl data */ +/* ptr(I) - pointer to store real data in */ +/* type(I) - type of structure being moved */ +/* */ +/* Copy in the contents of what the ipfobj_t points to. In future, we */ +/* add things to check for version numbers, sizes, etc, to make it backward */ +/* compatible at the ABI for user land. */ +/* ------------------------------------------------------------------------ */ +int fr_inobj(data, ptr, type) +void *data; +void *ptr; +int type; +{ + ipfobj_t obj; + int error = 0; + + if ((type < 0) || (type > NUM_OBJ_TYPES-1)) + return EINVAL; + + BCOPYIN((caddr_t)data, (caddr_t)&obj, sizeof(obj)); + + if (obj.ipfo_type != type) + return EINVAL; + +#ifndef IPFILTER_COMPAT + if ((fr_objbytes[type][0] & 1) != 0) { + if (obj.ipfo_size < fr_objbytes[type][1]) + return EINVAL; + } else if (obj.ipfo_size != fr_objbytes[type][1]) + return EINVAL; +#else + if (obj.ipfo_rev != IPFILTER_VERSION) + /* XXX compatibility hook here */ + ; + if ((fr_objbytes[type][0] & 1) != 0) { + if (obj.ipfo_size < fr_objbytes[type][1]) + /* XXX compatibility hook here */ + return EINVAL; + } else if (obj.ipfo_size != fr_objbytes[type][1]) + /* XXX compatibility hook here */ + return EINVAL; +#endif + + if ((fr_objbytes[type][0] & 1) != 0) { + error = COPYIN((caddr_t)obj.ipfo_ptr, (caddr_t)ptr, + fr_objbytes[type][1]); + } else { + error = COPYIN((caddr_t)obj.ipfo_ptr, (caddr_t)ptr, + obj.ipfo_size); + } + return error; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_inobjsz */ +/* Returns: int - 0 = success, else failure */ +/* Parameters: data(I) - pointer to ioctl data */ +/* ptr(I) - pointer to store real data in */ +/* type(I) - type of structure being moved */ +/* sz(I) - size of data to copy */ +/* */ +/* As per fr_inobj, except the size of the object to copy in is passed in */ +/* but it must not be smaller than the size defined for the type and the */ +/* type must allow for varied sized objects. The extra requirement here is */ +/* that sz must match the size of the object being passed in - this is not */ +/* not possible nor required in fr_inobj(). */ +/* ------------------------------------------------------------------------ */ +int fr_inobjsz(data, ptr, type, sz) +void *data; +void *ptr; +int type, sz; +{ + ipfobj_t obj; + int error; + + if ((type < 0) || (type > NUM_OBJ_TYPES-1)) + return EINVAL; + if (((fr_objbytes[type][0] & 1) == 0) || (sz < fr_objbytes[type][1])) + return EINVAL; + + BCOPYIN((caddr_t)data, (caddr_t)&obj, sizeof(obj)); + + if (obj.ipfo_type != type) + return EINVAL; + +#ifndef IPFILTER_COMPAT + if (obj.ipfo_size != sz) + return EINVAL; +#else + if (obj.ipfo_rev != IPFILTER_VERSION) + /* XXX compatibility hook here */ + ; + if (obj.ipfo_size != sz) + /* XXX compatibility hook here */ + return EINVAL; +#endif + + error = COPYIN((caddr_t)obj.ipfo_ptr, (caddr_t)ptr, sz); + return error; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_outobjsz */ +/* Returns: int - 0 = success, else failure */ +/* Parameters: data(I) - pointer to ioctl data */ +/* ptr(I) - pointer to store real data in */ +/* type(I) - type of structure being moved */ +/* sz(I) - size of data to copy */ +/* */ +/* As per fr_outobj, except the size of the object to copy out is passed in */ +/* but it must not be smaller than the size defined for the type and the */ +/* type must allow for varied sized objects. The extra requirement here is */ +/* that sz must match the size of the object being passed in - this is not */ +/* not possible nor required in fr_outobj(). */ +/* ------------------------------------------------------------------------ */ +int fr_outobjsz(data, ptr, type, sz) +void *data; +void *ptr; +int type, sz; +{ + ipfobj_t obj; + int error; + + if ((type < 0) || (type > NUM_OBJ_TYPES-1) || + ((fr_objbytes[type][0] & 1) == 0) || + (sz < fr_objbytes[type][1])) + return EINVAL; + + BCOPYIN((caddr_t)data, (caddr_t)&obj, sizeof(obj)); + + if (obj.ipfo_type != type) + return EINVAL; + +#ifndef IPFILTER_COMPAT + if (obj.ipfo_size != sz) + return EINVAL; +#else + if (obj.ipfo_rev != IPFILTER_VERSION) + /* XXX compatibility hook here */ + ; + if (obj.ipfo_size != sz) + /* XXX compatibility hook here */ + return EINVAL; +#endif + + error = COPYOUT((caddr_t)ptr, (caddr_t)obj.ipfo_ptr, sz); + return error; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_outobj */ +/* Returns: int - 0 = success, else failure */ +/* Parameters: data(I) - pointer to ioctl data */ +/* ptr(I) - pointer to store real data in */ +/* type(I) - type of structure being moved */ +/* */ +/* Copy out the contents of what ptr is to where ipfobj points to. In */ +/* future, we add things to check for version numbers, sizes, etc, to make */ +/* it backward compatible at the ABI for user land. */ +/* ------------------------------------------------------------------------ */ +int fr_outobj(data, ptr, type) +void *data; +void *ptr; +int type; +{ + ipfobj_t obj; + int error; + + if ((type < 0) || (type > NUM_OBJ_TYPES-1)) + return EINVAL; + + BCOPYIN((caddr_t)data, (caddr_t)&obj, sizeof(obj)); + + if (obj.ipfo_type != type) + return EINVAL; + +#ifndef IPFILTER_COMPAT + if ((fr_objbytes[type][0] & 1) != 0) { + if (obj.ipfo_size < fr_objbytes[type][1]) + return EINVAL; + } else if (obj.ipfo_size != fr_objbytes[type][1]) + return EINVAL; +#else + if (obj.ipfo_rev != IPFILTER_VERSION) + /* XXX compatibility hook here */ + ; + if ((fr_objbytes[type][0] & 1) != 0) { + if (obj.ipfo_size < fr_objbytes[type][1]) + /* XXX compatibility hook here */ + return EINVAL; + } else if (obj.ipfo_size != fr_objbytes[type][1]) + /* XXX compatibility hook here */ + return EINVAL; +#endif + + error = COPYOUT((caddr_t)ptr, (caddr_t)obj.ipfo_ptr, obj.ipfo_size); + return error; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_checkl4sum */ +/* Returns: int - 0 = good, -1 = bad, 1 = cannot check */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* If possible, calculate the layer 4 checksum for the packet. If this is */ +/* not possible, return without indicating a failure or success but in a */ +/* way that is ditinguishable. */ +/* ------------------------------------------------------------------------ */ +int fr_checkl4sum(fin) +fr_info_t *fin; +{ + u_short sum, hdrsum, *csump; + udphdr_t *udp; + int dosum; + + if ((fin->fin_flx & FI_NOCKSUM) != 0) + return 0; + + /* + * If the TCP packet isn't a fragment, isn't too short and otherwise + * isn't already considered "bad", then validate the checksum. If + * this check fails then considered the packet to be "bad". + */ + if ((fin->fin_flx & (FI_FRAG|FI_SHORT|FI_BAD)) != 0) + return 1; + + csump = NULL; + hdrsum = 0; + dosum = 0; + sum = 0; + +#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_VALID) + if (dohwcksum && ((*fin->fin_mp)->b_ick_flag == ICK_VALID)) { + hdrsum = 0; + sum = 0; + } else { +#endif + switch (fin->fin_p) + { + case IPPROTO_TCP : + csump = &((tcphdr_t *)fin->fin_dp)->th_sum; + dosum = 1; + break; + + case IPPROTO_UDP : + udp = fin->fin_dp; + if (udp->uh_sum != 0) { + csump = &udp->uh_sum; + dosum = 1; + } + break; + + case IPPROTO_ICMP : + csump = &((struct icmp *)fin->fin_dp)->icmp_cksum; + dosum = 1; + break; + + default : + return 1; + /*NOTREACHED*/ + } + + if (csump != NULL) + hdrsum = *csump; + + if (dosum) + sum = fr_cksum(fin->fin_m, fin->fin_ip, + fin->fin_p, fin->fin_dp); +#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_VALID) + } +#endif +#if !defined(_KERNEL) + if (sum == hdrsum) { + FR_DEBUG(("checkl4sum: %hx == %hx\n", sum, hdrsum)); + } else { + FR_DEBUG(("checkl4sum: %hx != %hx\n", sum, hdrsum)); + } +#endif + if (hdrsum == sum) + return 0; + return -1; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_ifpfillv4addr */ +/* Returns: int - 0 = address update, -1 = address not updated */ +/* Parameters: atype(I) - type of network address update to perform */ +/* sin(I) - pointer to source of address information */ +/* mask(I) - pointer to source of netmask information */ +/* inp(I) - pointer to destination address store */ +/* inpmask(I) - pointer to destination netmask store */ +/* */ +/* Given a type of network address update (atype) to perform, copy */ +/* information from sin/mask into inp/inpmask. If ipnmask is NULL then no */ +/* netmask update is performed unless FRI_NETMASKED is passed as atype, in */ +/* which case the operation fails. For all values of atype other than */ +/* FRI_NETMASKED, if inpmask is non-NULL then the mask is set to an all 1s */ +/* value. */ +/* ------------------------------------------------------------------------ */ +int fr_ifpfillv4addr(atype, sin, mask, inp, inpmask) +int atype; +struct sockaddr_in *sin, *mask; +struct in_addr *inp, *inpmask; +{ + if (inpmask != NULL && atype != FRI_NETMASKED) + inpmask->s_addr = 0xffffffff; + + if (atype == FRI_NETWORK || atype == FRI_NETMASKED) { + if (atype == FRI_NETMASKED) { + if (inpmask == NULL) + return -1; + inpmask->s_addr = mask->sin_addr.s_addr; + } + inp->s_addr = sin->sin_addr.s_addr & mask->sin_addr.s_addr; + } else { + inp->s_addr = sin->sin_addr.s_addr; + } + return 0; +} + + +#ifdef USE_INET6 +/* ------------------------------------------------------------------------ */ +/* Function: fr_ifpfillv6addr */ +/* Returns: int - 0 = address update, -1 = address not updated */ +/* Parameters: atype(I) - type of network address update to perform */ +/* sin(I) - pointer to source of address information */ +/* mask(I) - pointer to source of netmask information */ +/* inp(I) - pointer to destination address store */ +/* inpmask(I) - pointer to destination netmask store */ +/* */ +/* Given a type of network address update (atype) to perform, copy */ +/* information from sin/mask into inp/inpmask. If ipnmask is NULL then no */ +/* netmask update is performed unless FRI_NETMASKED is passed as atype, in */ +/* which case the operation fails. For all values of atype other than */ +/* FRI_NETMASKED, if inpmask is non-NULL then the mask is set to an all 1s */ +/* value. */ +/* ------------------------------------------------------------------------ */ +int fr_ifpfillv6addr(atype, sin, mask, inp, inpmask) +int atype; +struct sockaddr_in6 *sin, *mask; +struct in_addr *inp, *inpmask; +{ + i6addr_t *src, *dst, *and, *dmask; + + src = (i6addr_t *)&sin->sin6_addr; + and = (i6addr_t *)&mask->sin6_addr; + dst = (i6addr_t *)inp; + dmask = (i6addr_t *)inpmask; + + if (inpmask != NULL && atype != FRI_NETMASKED) { + dmask->i6[0] = 0xffffffff; + dmask->i6[1] = 0xffffffff; + dmask->i6[2] = 0xffffffff; + dmask->i6[3] = 0xffffffff; + } + + if (atype == FRI_NETWORK || atype == FRI_NETMASKED) { + if (atype == FRI_NETMASKED) { + if (inpmask == NULL) + return -1; + dmask->i6[0] = and->i6[0]; + dmask->i6[1] = and->i6[1]; + dmask->i6[2] = and->i6[2]; + dmask->i6[3] = and->i6[3]; + } + + dst->i6[0] = src->i6[0] & and->i6[0]; + dst->i6[1] = src->i6[1] & and->i6[1]; + dst->i6[2] = src->i6[2] & and->i6[2]; + dst->i6[3] = src->i6[3] & and->i6[3]; + } else { + dst->i6[0] = src->i6[0]; + dst->i6[1] = src->i6[1]; + dst->i6[2] = src->i6[2]; + dst->i6[3] = src->i6[3]; + } + return 0; +} +#endif + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_matchtag */ +/* Returns: 0 == mismatch, 1 == match. */ +/* Parameters: tag1(I) - pointer to first tag to compare */ +/* tag2(I) - pointer to second tag to compare */ +/* */ +/* Returns true (non-zero) or false(0) if the two tag structures can be */ +/* considered to be a match or not match, respectively. The tag is 16 */ +/* bytes long (16 characters) but that is overlayed with 4 32bit ints so */ +/* compare the ints instead, for speed. tag1 is the master of the */ +/* comparison. This function should only be called with both tag1 and tag2 */ +/* as non-NULL pointers. */ +/* ------------------------------------------------------------------------ */ +int fr_matchtag(tag1, tag2) +ipftag_t *tag1, *tag2; +{ + if (tag1 == tag2) + return 1; + + if ((tag1->ipt_num[0] == 0) && (tag2->ipt_num[0] == 0)) + return 1; + + if ((tag1->ipt_num[0] == tag2->ipt_num[0]) && + (tag1->ipt_num[1] == tag2->ipt_num[1]) && + (tag1->ipt_num[2] == tag2->ipt_num[2]) && + (tag1->ipt_num[3] == tag2->ipt_num[3])) + return 1; + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_coalesce */ +/* Returns: 1 == success, -1 == failure, 0 == no change */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* Attempt to get all of the packet data into a single, contiguous buffer. */ +/* If this call returns a failure then the buffers have also been freed. */ +/* ------------------------------------------------------------------------ */ +int fr_coalesce(fin) +fr_info_t *fin; +{ + if ((fin->fin_flx & FI_COALESCE) != 0) + return 1; + + /* + * If the mbuf pointers indicate that there is no mbuf to work with, + * return but do not indicate success or failure. + */ + if (fin->fin_m == NULL || fin->fin_mp == NULL) + return 0; + +#if defined(_KERNEL) + if (fr_pullup(fin->fin_m, fin, fin->fin_plen) == NULL) { + ATOMIC_INCL(fr_badcoalesces[fin->fin_out]); +# ifdef MENTAT + FREE_MB_T(*fin->fin_mp); +# endif + *fin->fin_mp = NULL; + fin->fin_m = NULL; + return -1; + } +#else + fin = fin; /* LINT */ +#endif + return 1; +} + + +/* + * The following table lists all of the tunable variables that can be + * accessed via SIOCIPFGET/SIOCIPFSET/SIOCIPFGETNEXt. The format of each row + * in the table below is as follows: + * + * pointer to value, name of value, minimum, maximum, size of the value's + * container, value attribute flags + * + * For convienience, IPFT_RDONLY means the value is read-only, IPFT_WRDISABLED + * means the value can only be written to when IPFilter is loaded but disabled. + * The obvious implication is if neither of these are set then the value can be + * changed at any time without harm. + */ +ipftuneable_t ipf_tuneables[] = { + /* filtering */ + { { &fr_flags }, "fr_flags", 0, 0xffffffff, + sizeof(fr_flags), 0 }, + { { &fr_active }, "fr_active", 0, 0, + sizeof(fr_active), IPFT_RDONLY }, + { { &fr_control_forwarding }, "fr_control_forwarding", 0, 1, + sizeof(fr_control_forwarding), 0 }, + { { &fr_update_ipid }, "fr_update_ipid", 0, 1, + sizeof(fr_update_ipid), 0 }, + { { &fr_chksrc }, "fr_chksrc", 0, 1, + sizeof(fr_chksrc), 0 }, + { { &fr_minttl }, "fr_minttl", 0, 1, + sizeof(fr_minttl), 0 }, + { { &fr_icmpminfragmtu }, "fr_icmpminfragmtu", 0, 1, + sizeof(fr_icmpminfragmtu), 0 }, + { { &fr_pass }, "fr_pass", 0, 0xffffffff, + sizeof(fr_pass), 0 }, + /* state */ + { { &fr_tcpidletimeout }, "fr_tcpidletimeout", 1, 0x7fffffff, + sizeof(fr_tcpidletimeout), IPFT_WRDISABLED }, + { { &fr_tcpclosewait }, "fr_tcpclosewait", 1, 0x7fffffff, + sizeof(fr_tcpclosewait), IPFT_WRDISABLED }, + { { &fr_tcplastack }, "fr_tcplastack", 1, 0x7fffffff, + sizeof(fr_tcplastack), IPFT_WRDISABLED }, + { { &fr_tcptimeout }, "fr_tcptimeout", 1, 0x7fffffff, + sizeof(fr_tcptimeout), IPFT_WRDISABLED }, + { { &fr_tcpclosed }, "fr_tcpclosed", 1, 0x7fffffff, + sizeof(fr_tcpclosed), IPFT_WRDISABLED }, + { { &fr_tcphalfclosed }, "fr_tcphalfclosed", 1, 0x7fffffff, + sizeof(fr_tcphalfclosed), IPFT_WRDISABLED }, + { { &fr_udptimeout }, "fr_udptimeout", 1, 0x7fffffff, + sizeof(fr_udptimeout), IPFT_WRDISABLED }, + { { &fr_udpacktimeout }, "fr_udpacktimeout", 1, 0x7fffffff, + sizeof(fr_udpacktimeout), IPFT_WRDISABLED }, + { { &fr_icmptimeout }, "fr_icmptimeout", 1, 0x7fffffff, + sizeof(fr_icmptimeout), IPFT_WRDISABLED }, + { { &fr_icmpacktimeout }, "fr_icmpacktimeout", 1, 0x7fffffff, + sizeof(fr_icmpacktimeout), IPFT_WRDISABLED }, + { { &fr_iptimeout }, "fr_iptimeout", 1, 0x7fffffff, + sizeof(fr_iptimeout), IPFT_WRDISABLED }, + { { &fr_statemax }, "fr_statemax", 1, 0x7fffffff, + sizeof(fr_statemax), 0 }, + { { &fr_statesize }, "fr_statesize", 1, 0x7fffffff, + sizeof(fr_statesize), IPFT_WRDISABLED }, + { { &fr_state_lock }, "fr_state_lock", 0, 1, + sizeof(fr_state_lock), IPFT_RDONLY }, + { { &fr_state_maxbucket }, "fr_state_maxbucket", 1, 0x7fffffff, + sizeof(fr_state_maxbucket), IPFT_WRDISABLED }, + { { &fr_state_maxbucket_reset }, "fr_state_maxbucket_reset", 0, 1, + sizeof(fr_state_maxbucket_reset), IPFT_WRDISABLED }, + { { &ipstate_logging }, "ipstate_logging", 0, 1, + sizeof(ipstate_logging), 0 }, + /* nat */ + { { &fr_nat_lock }, "fr_nat_lock", 0, 1, + sizeof(fr_nat_lock), IPFT_RDONLY }, + { { &ipf_nattable_sz }, "ipf_nattable_sz", 1, 0x7fffffff, + sizeof(ipf_nattable_sz), IPFT_WRDISABLED }, + { { &ipf_nattable_max }, "ipf_nattable_max", 1, 0x7fffffff, + sizeof(ipf_nattable_max), 0 }, + { { &ipf_natrules_sz }, "ipf_natrules_sz", 1, 0x7fffffff, + sizeof(ipf_natrules_sz), IPFT_WRDISABLED }, + { { &ipf_rdrrules_sz }, "ipf_rdrrules_sz", 1, 0x7fffffff, + sizeof(ipf_rdrrules_sz), IPFT_WRDISABLED }, + { { &ipf_hostmap_sz }, "ipf_hostmap_sz", 1, 0x7fffffff, + sizeof(ipf_hostmap_sz), IPFT_WRDISABLED }, + { { &fr_nat_maxbucket }, "fr_nat_maxbucket", 1, 0x7fffffff, + sizeof(fr_nat_maxbucket), IPFT_WRDISABLED }, + { { &fr_nat_maxbucket_reset }, "fr_nat_maxbucket_reset", 0, 1, + sizeof(fr_nat_maxbucket_reset), IPFT_WRDISABLED }, + { { &nat_logging }, "nat_logging", 0, 1, + sizeof(nat_logging), 0 }, + { { &fr_defnatage }, "fr_defnatage", 1, 0x7fffffff, + sizeof(fr_defnatage), IPFT_WRDISABLED }, + { { &fr_defnatipage }, "fr_defnatipage", 1, 0x7fffffff, + sizeof(fr_defnatipage), IPFT_WRDISABLED }, + { { &fr_defnaticmpage }, "fr_defnaticmpage", 1, 0x7fffffff, + sizeof(fr_defnaticmpage), IPFT_WRDISABLED }, + /* frag */ + { { &ipfr_size }, "ipfr_size", 1, 0x7fffffff, + sizeof(ipfr_size), IPFT_WRDISABLED }, + { { &fr_ipfrttl }, "fr_ipfrttl", 1, 0x7fffffff, + sizeof(fr_ipfrttl), IPFT_WRDISABLED }, +#ifdef IPFILTER_LOG + /* log */ + { { &ipl_suppress }, "ipl_suppress", 0, 1, + sizeof(ipl_suppress), 0 }, + { { &ipl_buffer_sz }, "ipl_buffer_sz", 0, 0, + sizeof(ipl_buffer_sz), IPFT_RDONLY }, + { { &ipl_logmax }, "ipl_logmax", 0, 0x7fffffff, + sizeof(ipl_logmax), IPFT_WRDISABLED }, + { { &ipl_logall }, "ipl_logall", 0, 1, + sizeof(ipl_logall), 0 }, + { { &ipl_logsize }, "ipl_logsize", 0, 0x80000, + sizeof(ipl_logsize), 0 }, +#endif + { { NULL }, NULL, 0, 0 } +}; + +static ipftuneable_t *ipf_tunelist = NULL; + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_findtunebycookie */ +/* Returns: NULL = search failed, else pointer to tune struct */ +/* Parameters: cookie(I) - cookie value to search for amongst tuneables */ +/* next(O) - pointer to place to store the cookie for the */ +/* "next" tuneable, if it is desired. */ +/* */ +/* This function is used to walk through all of the existing tunables with */ +/* successive calls. It searches the known tunables for the one which has */ +/* a matching value for "cookie" - ie its address. When returning a match, */ +/* the next one to be found may be returned inside next. */ +/* ------------------------------------------------------------------------ */ +static ipftuneable_t *fr_findtunebycookie(cookie, next) +void *cookie, **next; +{ + ipftuneable_t *ta, **tap; + + for (ta = ipf_tuneables; ta->ipft_name != NULL; ta++) + if (ta == cookie) { + if (next != NULL) { + /* + * If the next entry in the array has a name + * present, then return a pointer to it for + * where to go next, else return a pointer to + * the dynaminc list as a key to search there + * next. This facilitates a weak linking of + * the two "lists" together. + */ + if ((ta + 1)->ipft_name != NULL) + *next = ta + 1; + else + *next = &ipf_tunelist; + } + return ta; + } + + for (tap = &ipf_tunelist; (ta = *tap) != NULL; tap = &ta->ipft_next) + if (tap == cookie) { + if (next != NULL) + *next = &ta->ipft_next; + return ta; + } + + if (next != NULL) + *next = NULL; + return NULL; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_findtunebyname */ +/* Returns: NULL = search failed, else pointer to tune struct */ +/* Parameters: name(I) - name of the tuneable entry to find. */ +/* */ +/* Search the static array of tuneables and the list of dynamic tuneables */ +/* for an entry with a matching name. If we can find one, return a pointer */ +/* to the matching structure. */ +/* ------------------------------------------------------------------------ */ +static ipftuneable_t *fr_findtunebyname(name) +const char *name; +{ + ipftuneable_t *ta; + + for (ta = ipf_tuneables; ta->ipft_name != NULL; ta++) + if (!strcmp(ta->ipft_name, name)) { + return ta; + } + + for (ta = ipf_tunelist; ta != NULL; ta = ta->ipft_next) + if (!strcmp(ta->ipft_name, name)) { + return ta; + } + + return NULL; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_addipftune */ +/* Returns: int - 0 == success, else failure */ +/* Parameters: newtune - pointer to new tune struct to add to tuneables */ +/* */ +/* Appends the tune structure pointer to by "newtune" to the end of the */ +/* current list of "dynamic" tuneable parameters. Once added, the owner */ +/* of the object is not expected to ever change "ipft_next". */ +/* ------------------------------------------------------------------------ */ +int fr_addipftune(newtune) +ipftuneable_t *newtune; +{ + ipftuneable_t *ta, **tap; + + ta = fr_findtunebyname(newtune->ipft_name); + if (ta != NULL) + return EEXIST; + + for (tap = &ipf_tunelist; *tap != NULL; tap = &(*tap)->ipft_next) + ; + + newtune->ipft_next = NULL; + *tap = newtune; + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_delipftune */ +/* Returns: int - 0 == success, else failure */ +/* Parameters: oldtune - pointer to tune struct to remove from the list of */ +/* current dynamic tuneables */ +/* */ +/* Search for the tune structure, by pointer, in the list of those that are */ +/* dynamically added at run time. If found, adjust the list so that this */ +/* structure is no longer part of it. */ +/* ------------------------------------------------------------------------ */ +int fr_delipftune(oldtune) +ipftuneable_t *oldtune; +{ + ipftuneable_t *ta, **tap; + + for (tap = &ipf_tunelist; (ta = *tap) != NULL; tap = &ta->ipft_next) + if (ta == oldtune) { + *tap = oldtune->ipft_next; + oldtune->ipft_next = NULL; + return 0; + } + + return ESRCH; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_ipftune */ +/* Returns: int - 0 == success, else failure */ +/* Parameters: cmd(I) - ioctl command number */ +/* data(I) - pointer to ioctl data structure */ +/* */ +/* Implement handling of SIOCIPFGETNEXT, SIOCIPFGET and SIOCIPFSET. These */ +/* three ioctls provide the means to access and control global variables */ +/* within IPFilter, allowing (for example) timeouts and table sizes to be */ +/* changed without rebooting, reloading or recompiling. The initialisation */ +/* and 'destruction' routines of the various components of ipfilter are all */ +/* each responsible for handling their own values being too big. */ +/* ------------------------------------------------------------------------ */ +int fr_ipftune(cmd, data) +ioctlcmd_t cmd; +void *data; +{ + ipftuneable_t *ta; + ipftune_t tu; + void *cookie; + int error; + + error = fr_inobj(data, &tu, IPFOBJ_TUNEABLE); + if (error != 0) + return error; + + tu.ipft_name[sizeof(tu.ipft_name) - 1] = '\0'; + cookie = tu.ipft_cookie; + ta = NULL; + + switch (cmd) + { + case SIOCIPFGETNEXT : + /* + * If cookie is non-NULL, assume it to be a pointer to the last + * entry we looked at, so find it (if possible) and return a + * pointer to the next one after it. The last entry in the + * the table is a NULL entry, so when we get to it, set cookie + * to NULL and return that, indicating end of list, erstwhile + * if we come in with cookie set to NULL, we are starting anew + * at the front of the list. + */ + if (cookie != NULL) { + ta = fr_findtunebycookie(cookie, &tu.ipft_cookie); + } else { + ta = ipf_tuneables; + tu.ipft_cookie = ta + 1; + } + if (ta != NULL) { + /* + * Entry found, but does the data pointed to by that + * row fit in what we can return? + */ + if (ta->ipft_sz > sizeof(tu.ipft_un)) + return EINVAL; + + tu.ipft_vlong = 0; + if (ta->ipft_sz == sizeof(u_long)) + tu.ipft_vlong = *ta->ipft_plong; + else if (ta->ipft_sz == sizeof(u_int)) + tu.ipft_vint = *ta->ipft_pint; + else if (ta->ipft_sz == sizeof(u_short)) + tu.ipft_vshort = *ta->ipft_pshort; + else if (ta->ipft_sz == sizeof(u_char)) + tu.ipft_vchar = *ta->ipft_pchar; + + tu.ipft_sz = ta->ipft_sz; + tu.ipft_min = ta->ipft_min; + tu.ipft_max = ta->ipft_max; + tu.ipft_flags = ta->ipft_flags; + bcopy(ta->ipft_name, tu.ipft_name, + MIN(sizeof(tu.ipft_name), + strlen(ta->ipft_name) + 1)); + } + error = fr_outobj(data, &tu, IPFOBJ_TUNEABLE); + break; + + case SIOCIPFGET : + case SIOCIPFSET : + /* + * Search by name or by cookie value for a particular entry + * in the tuning paramter table. + */ + error = ESRCH; + if (cookie != NULL) { + ta = fr_findtunebycookie(cookie, NULL); + if (ta != NULL) + error = 0; + } else if (tu.ipft_name[0] != '\0') { + ta = fr_findtunebyname(tu.ipft_name); + if (ta != NULL) + error = 0; + } + if (error != 0) + break; + + if (cmd == (ioctlcmd_t)SIOCIPFGET) { + /* + * Fetch the tuning parameters for a particular value + */ + tu.ipft_vlong = 0; + if (ta->ipft_sz == sizeof(u_long)) + tu.ipft_vlong = *ta->ipft_plong; + else if (ta->ipft_sz == sizeof(u_int)) + tu.ipft_vint = *ta->ipft_pint; + else if (ta->ipft_sz == sizeof(u_short)) + tu.ipft_vshort = *ta->ipft_pshort; + else if (ta->ipft_sz == sizeof(u_char)) + tu.ipft_vchar = *ta->ipft_pchar; + tu.ipft_cookie = ta; + tu.ipft_sz = ta->ipft_sz; + tu.ipft_min = ta->ipft_min; + tu.ipft_max = ta->ipft_max; + tu.ipft_flags = ta->ipft_flags; + error = fr_outobj(data, &tu, IPFOBJ_TUNEABLE); + + } else if (cmd == (ioctlcmd_t)SIOCIPFSET) { + /* + * Set an internal parameter. The hard part here is + * getting the new value safely and correctly out of + * the kernel (given we only know its size, not type.) + */ + u_long in; + + if (((ta->ipft_flags & IPFT_WRDISABLED) != 0) && + (fr_running > 0)) { + error = EBUSY; + break; + } + + in = tu.ipft_vlong; + if (in < ta->ipft_min || in > ta->ipft_max) { + error = EINVAL; + break; + } + + if (ta->ipft_sz == sizeof(u_long)) { + tu.ipft_vlong = *ta->ipft_plong; + *ta->ipft_plong = in; + } else if (ta->ipft_sz == sizeof(u_int)) { + tu.ipft_vint = *ta->ipft_pint; + *ta->ipft_pint = (u_int)(in & 0xffffffff); + } else if (ta->ipft_sz == sizeof(u_short)) { + tu.ipft_vshort = *ta->ipft_pshort; + *ta->ipft_pshort = (u_short)(in & 0xffff); + } else if (ta->ipft_sz == sizeof(u_char)) { + tu.ipft_vchar = *ta->ipft_pchar; + *ta->ipft_pchar = (u_char)(in & 0xff); + } + error = fr_outobj(data, &tu, IPFOBJ_TUNEABLE); + } + break; + + default : + error = EINVAL; + break; + } + + return error; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_initialise */ +/* Returns: int - 0 == success, < 0 == failure */ +/* Parameters: None. */ +/* */ +/* Call of the initialise functions for all the various subsystems inside */ +/* of IPFilter. If any of them should fail, return immeadiately a failure */ +/* BUT do not try to recover from the error here. */ +/* ------------------------------------------------------------------------ */ +int fr_initialise() +{ + int i; + +#ifdef IPFILTER_LOG + i = fr_loginit(); + if (i < 0) + return -10 + i; +#endif + i = fr_natinit(); + if (i < 0) + return -20 + i; + + i = fr_stateinit(); + if (i < 0) + return -30 + i; + + i = fr_authinit(); + if (i < 0) + return -40 + i; + + i = fr_fraginit(); + if (i < 0) + return -50 + i; + + i = appr_init(); + if (i < 0) + return -60 + i; + +#ifdef IPFILTER_SYNC + i = ipfsync_init(); + if (i < 0) + return -70 + i; +#endif +#ifdef IPFILTER_SCAN + i = ipsc_init(); + if (i < 0) + return -80 + i; +#endif +#ifdef IPFILTER_LOOKUP + i = ip_lookup_init(); + if (i < 0) + return -90 + i; +#endif +#ifdef IPFILTER_COMPILED + ipfrule_add(); +#endif + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_deinitialise */ +/* Returns: None. */ +/* Parameters: None. */ +/* */ +/* Call all the various subsystem cleanup routines to deallocate memory or */ +/* destroy locks or whatever they've done that they need to now undo. */ +/* The order here IS important as there are some cross references of */ +/* internal data structures. */ +/* ------------------------------------------------------------------------ */ +void fr_deinitialise() +{ + fr_fragunload(); + fr_authunload(); + fr_natunload(); + fr_stateunload(); +#ifdef IPFILTER_SCAN + fr_scanunload(); +#endif + appr_unload(); + +#ifdef IPFILTER_COMPILED + ipfrule_remove(); +#endif + + (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE); + (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE); + (void) frflush(IPL_LOGCOUNT, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE); + (void) frflush(IPL_LOGCOUNT, 0, FR_INQUE|FR_OUTQUE); + +#ifdef IPFILTER_LOOKUP + ip_lookup_unload(); +#endif + +#ifdef IPFILTER_LOG + fr_logunload(); +#endif +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_zerostats */ +/* Returns: int - 0 = success, else failure */ +/* Parameters: data(O) - pointer to pointer for copying data back to */ +/* */ +/* Copies the current statistics out to userspace and then zero's the */ +/* current ones in the kernel. The lock is only held across the bzero() as */ +/* the copyout may result in paging (ie network activity.) */ +/* ------------------------------------------------------------------------ */ +int fr_zerostats(data) +caddr_t data; +{ + friostat_t fio; + int error; + + fr_getstat(&fio); + error = copyoutptr(&fio, data, sizeof(fio)); + if (error) + return EFAULT; + + WRITE_ENTER(&ipf_mutex); + bzero((char *)frstats, sizeof(*frstats) * 2); + RWLOCK_EXIT(&ipf_mutex); + + return 0; +} + + +#ifdef _KERNEL +/* ------------------------------------------------------------------------ */ +/* Function: fr_resolvedest */ +/* Returns: Nil */ +/* Parameters: fdp(IO) - pointer to destination information to resolve */ +/* v(I) - IP protocol version to match */ +/* */ +/* Looks up an interface name in the frdest structure pointed to by fdp and */ +/* if a matching name can be found for the particular IP protocol version */ +/* then store the interface pointer in the frdest struct. If no match is */ +/* found, then set the interface pointer to be -1 as NULL is considered to */ +/* indicate there is no information at all in the structure. */ +/* ------------------------------------------------------------------------ */ +void fr_resolvedest(fdp, v) +frdest_t *fdp; +int v; +{ + void *ifp; + + ifp = NULL; + v = v; /* LINT */ + + if (*fdp->fd_ifname != '\0') { + ifp = GETIFP(fdp->fd_ifname, v); + if (ifp == NULL) + ifp = (void *)-1; + } + fdp->fd_ifp = ifp; +} +#endif /* _KERNEL */ + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_resolvenic */ +/* Returns: void* - NULL = wildcard name, -1 = failed to find NIC, else */ +/* pointer to interface structure for NIC */ +/* Parameters: name(I) - complete interface name */ +/* v(I) - IP protocol version */ +/* */ +/* Look for a network interface structure that firstly has a matching name */ +/* to that passed in and that is also being used for that IP protocol */ +/* version (necessary on some platforms where there are separate listings */ +/* for both IPv4 and IPv6 on the same physical NIC. */ +/* */ +/* One might wonder why name gets terminated with a \0 byte in here. The */ +/* reason is an interface name could get into the kernel structures of ipf */ +/* in any number of ways and so long as they all use the same sized array */ +/* to put the name in, it makes sense to ensure it gets null terminated */ +/* before it is used for its intended purpose - finding its match in the */ +/* kernel's list of configured interfaces. */ +/* */ +/* NOTE: This SHOULD ONLY be used with IPFilter structures that have an */ +/* array for the name that is LIFNAMSIZ bytes (at least) in length. */ +/* ------------------------------------------------------------------------ */ +void *fr_resolvenic(name, v) +char *name; +int v; +{ + void *nic; + + if (name[0] == '\0') + return NULL; + + if ((name[1] == '\0') && ((name[0] == '-') || (name[0] == '*'))) { + return NULL; + } + + name[LIFNAMSIZ - 1] = '\0'; + + nic = GETIFP(name, v); + if (nic == NULL) + nic = (void *)-1; + return nic; +} diff --git a/usr/src/uts/common/inet/ipf/ip_auth.c b/usr/src/uts/common/inet/ipf/ip_auth.c new file mode 100644 index 0000000000..b6f0844354 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/ip_auth.c @@ -0,0 +1,796 @@ +/* + * Copyright (C) 1998-2003 by Darren Reed & Guido van Rooij. + * + * See the IPFILTER.LICENCE file for details on licencing. + */ +#if defined(KERNEL) || defined(_KERNEL) +# undef KERNEL +# undef _KERNEL +# define KERNEL 1 +# define _KERNEL 1 +#endif +#include <sys/errno.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/time.h> +#include <sys/file.h> +#if !defined(_KERNEL) +# include <stdio.h> +# include <stdlib.h> +# include <string.h> +# define _KERNEL +# ifdef __OpenBSD__ +struct file; +# endif +# include <sys/uio.h> +# undef _KERNEL +#endif +#if defined(_KERNEL) && (__FreeBSD_version >= 220000) +# include <sys/filio.h> +# include <sys/fcntl.h> +#else +# include <sys/ioctl.h> +#endif +#if !defined(linux) +# include <sys/protosw.h> +#endif +#include <sys/socket.h> +#if defined(_KERNEL) +# include <sys/systm.h> +# if !defined(__SVR4) && !defined(__svr4__) && !defined(linux) +# include <sys/mbuf.h> +# endif +#endif +#if defined(__SVR4) || defined(__svr4__) +# include <sys/filio.h> +# include <sys/byteorder.h> +# ifdef _KERNEL +# include <sys/dditypes.h> +# endif +# include <sys/stream.h> +# include <sys/kmem.h> +#endif +#if (_BSDI_VERSION >= 199802) || (__FreeBSD_version >= 400000) +# include <sys/queue.h> +#endif +#if defined(__NetBSD__) || defined(__OpenBSD__) || defined(bsdi) +# include <machine/cpu.h> +#endif +#if defined(_KERNEL) && defined(__NetBSD__) && (__NetBSD_Version__ >= 104000000) +# include <sys/proc.h> +#endif +#include <net/if.h> +#ifdef sun +# include <net/af.h> +#endif +#include <net/route.h> +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#if !defined(_KERNEL) && !defined(__osf__) && !defined(__sgi) +# define KERNEL +# define _KERNEL +# define NOT_KERNEL +#endif +#if !defined(linux) +# include <netinet/ip_var.h> +#endif +#ifdef NOT_KERNEL +# undef _KERNEL +# undef KERNEL +#endif +#include <netinet/tcp.h> +#if defined(IRIX) && (IRIX < 60516) /* IRIX < 6 */ +extern struct ifqueue ipintrq; /* ip packet input queue */ +#else +# if !defined(__hpux) && !defined(linux) +# if __FreeBSD_version >= 300000 +# include <net/if_var.h> +# if __FreeBSD_version >= 500042 +# define IF_QFULL _IF_QFULL +# define IF_DROP _IF_DROP +# endif /* __FreeBSD_version >= 500042 */ +# endif +# include <netinet/in_var.h> +# include <netinet/tcp_fsm.h> +# endif +#endif +#include <netinet/udp.h> +#include <netinet/ip_icmp.h> +#include "netinet/ip_compat.h" +#include <netinet/tcpip.h> +#include "netinet/ip_fil.h" +#include "netinet/ip_auth.h" +#if !defined(MENTAT) && !defined(linux) +# include <net/netisr.h> +# ifdef __FreeBSD__ +# include <machine/cpufunc.h> +# endif +#endif +#if (__FreeBSD_version >= 300000) +# include <sys/malloc.h> +# if defined(_KERNEL) && !defined(IPFILTER_LKM) +# include <sys/libkern.h> +# include <sys/systm.h> +# endif +#endif +/* END OF INCLUDES */ + +#if !defined(lint) +static const char rcsid[] = "@(#)$Id: ip_auth.c,v 2.73.2.5 2005/06/12 07:18:14 darrenr Exp $"; +#endif + + +#if SOLARIS +extern kcondvar_t ipfauthwait; +#endif /* SOLARIS */ +#if defined(linux) && defined(_KERNEL) +wait_queue_head_t fr_authnext_linux; +#endif + +int fr_authsize = FR_NUMAUTH; +int fr_authused = 0; +int fr_defaultauthage = 600; +int fr_auth_lock = 0; +int fr_auth_init = 0; +fr_authstat_t fr_authstats; +static frauth_t *fr_auth = NULL; +mb_t **fr_authpkts = NULL; +int fr_authstart = 0, fr_authend = 0, fr_authnext = 0; +frauthent_t *fae_list = NULL; +frentry_t *ipauth = NULL, + *fr_authlist = NULL; + + +int fr_authinit() +{ + KMALLOCS(fr_auth, frauth_t *, fr_authsize * sizeof(*fr_auth)); + if (fr_auth != NULL) + bzero((char *)fr_auth, fr_authsize * sizeof(*fr_auth)); + else + return -1; + + KMALLOCS(fr_authpkts, mb_t **, fr_authsize * sizeof(*fr_authpkts)); + if (fr_authpkts != NULL) + bzero((char *)fr_authpkts, fr_authsize * sizeof(*fr_authpkts)); + else + return -2; + + MUTEX_INIT(&ipf_authmx, "ipf auth log mutex"); + RWLOCK_INIT(&ipf_auth, "ipf IP User-Auth rwlock"); +#if SOLARIS && defined(_KERNEL) + cv_init(&ipfauthwait, "ipf auth condvar", CV_DRIVER, NULL); +#endif +#if defined(linux) && defined(_KERNEL) + init_waitqueue_head(&fr_authnext_linux); +#endif + + fr_auth_init = 1; + + return 0; +} + + +/* + * Check if a packet has authorization. If the packet is found to match an + * authorization result and that would result in a feedback loop (i.e. it + * will end up returning FR_AUTH) then return FR_BLOCK instead. + */ +frentry_t *fr_checkauth(fin, passp) +fr_info_t *fin; +u_32_t *passp; +{ + frentry_t *fr; + frauth_t *fra; + u_32_t pass; + u_short id; + ip_t *ip; + int i; + + if (fr_auth_lock || !fr_authused) + return NULL; + + ip = fin->fin_ip; + id = ip->ip_id; + + READ_ENTER(&ipf_auth); + for (i = fr_authstart; i != fr_authend; ) { + /* + * index becomes -2 only after an SIOCAUTHW. Check this in + * case the same packet gets sent again and it hasn't yet been + * auth'd. + */ + fra = fr_auth + i; + if ((fra->fra_index == -2) && (id == fra->fra_info.fin_id) && + !bcmp((char *)fin, (char *)&fra->fra_info, FI_CSIZE)) { + /* + * Avoid feedback loop. + */ + if (!(pass = fra->fra_pass) || (FR_ISAUTH(pass))) + pass = FR_BLOCK; + /* + * Create a dummy rule for the stateful checking to + * use and return. Zero out any values we don't + * trust from userland! + */ + if ((pass & FR_KEEPSTATE) || ((pass & FR_KEEPFRAG) && + (fin->fin_flx & FI_FRAG))) { + KMALLOC(fr, frentry_t *); + if (fr) { + bcopy((char *)fra->fra_info.fin_fr, + (char *)fr, sizeof(*fr)); + fr->fr_grp = NULL; + fr->fr_ifa = fin->fin_ifp; + fr->fr_func = NULL; + fr->fr_ref = 1; + fr->fr_flags = pass; + fr->fr_ifas[1] = NULL; + fr->fr_ifas[2] = NULL; + fr->fr_ifas[3] = NULL; + } + } else + fr = fra->fra_info.fin_fr; + fin->fin_fr = fr; + RWLOCK_EXIT(&ipf_auth); + WRITE_ENTER(&ipf_auth); + if ((fr != NULL) && (fr != fra->fra_info.fin_fr)) { + fr->fr_next = fr_authlist; + fr_authlist = fr; + } + fr_authstats.fas_hits++; + fra->fra_index = -1; + fr_authused--; + if (i == fr_authstart) { + while (fra->fra_index == -1) { + i++; + fra++; + if (i == fr_authsize) { + i = 0; + fra = fr_auth; + } + fr_authstart = i; + if (i == fr_authend) + break; + } + if (fr_authstart == fr_authend) { + fr_authnext = 0; + fr_authstart = fr_authend = 0; + } + } + RWLOCK_EXIT(&ipf_auth); + if (passp != NULL) + *passp = pass; + ATOMIC_INC64(fr_authstats.fas_hits); + return fr; + } + i++; + if (i == fr_authsize) + i = 0; + } + fr_authstats.fas_miss++; + RWLOCK_EXIT(&ipf_auth); + ATOMIC_INC64(fr_authstats.fas_miss); + return NULL; +} + + +/* + * Check if we have room in the auth array to hold details for another packet. + * If we do, store it and wake up any user programs which are waiting to + * hear about these events. + */ +int fr_newauth(m, fin) +mb_t *m; +fr_info_t *fin; +{ +#if defined(_KERNEL) && defined(MENTAT) + qpktinfo_t *qpi = fin->fin_qpi; +#endif + frauth_t *fra; +#if !defined(sparc) && !defined(m68k) + ip_t *ip; +#endif + int i; + + if (fr_auth_lock) + return 0; + + WRITE_ENTER(&ipf_auth); + if (fr_authstart > fr_authend) { + fr_authstats.fas_nospace++; + RWLOCK_EXIT(&ipf_auth); + return 0; + } else { + if (fr_authused == fr_authsize) { + fr_authstats.fas_nospace++; + RWLOCK_EXIT(&ipf_auth); + return 0; + } + } + + fr_authstats.fas_added++; + fr_authused++; + i = fr_authend++; + if (fr_authend == fr_authsize) + fr_authend = 0; + RWLOCK_EXIT(&ipf_auth); + + fra = fr_auth + i; + fra->fra_index = i; + fra->fra_pass = 0; + fra->fra_age = fr_defaultauthage; + bcopy((char *)fin, (char *)&fra->fra_info, sizeof(*fin)); +#if !defined(sparc) && !defined(m68k) + /* + * No need to copyback here as we want to undo the changes, not keep + * them. + */ + ip = fin->fin_ip; +# if defined(MENTAT) && defined(_KERNEL) + if ((ip == (ip_t *)m->b_rptr) && (fin->fin_v == 4)) +# endif + { + register u_short bo; + + bo = ip->ip_len; + ip->ip_len = htons(bo); + bo = ip->ip_off; + ip->ip_off = htons(bo); + } +#endif +#if SOLARIS && defined(_KERNEL) + m->b_rptr -= qpi->qpi_off; + fr_authpkts[i] = *(mblk_t **)fin->fin_mp; + fra->fra_q = qpi->qpi_q; /* The queue can disappear! */ + cv_signal(&ipfauthwait); +#else +# if defined(BSD) && !defined(sparc) && (BSD >= 199306) + if (!fin->fin_out) { + ip->ip_len = htons(ip->ip_len); + ip->ip_off = htons(ip->ip_off); + } +# endif + fr_authpkts[i] = m; + WAKEUP(&fr_authnext,0); +#endif + return 1; +} + + +int fr_auth_ioctl(data, cmd, mode) +caddr_t data; +ioctlcmd_t cmd; +int mode; +{ + mb_t *m; +#if defined(_KERNEL) && !defined(MENTAT) && !defined(linux) && \ + (!defined(__FreeBSD_version) || (__FreeBSD_version < 501000)) + struct ifqueue *ifq; + SPL_INT(s); +#endif + frauth_t auth, *au = &auth, *fra; + int i, error = 0, len; + char *t; + + switch (cmd) + { + case SIOCSTLCK : + if (!(mode & FWRITE)) { + error = EPERM; + break; + } + fr_lock(data, &fr_auth_lock); + break; + + case SIOCATHST: + fr_authstats.fas_faelist = fae_list; + error = fr_outobj(data, &fr_authstats, IPFOBJ_AUTHSTAT); + break; + + case SIOCIPFFL: + SPL_NET(s); + WRITE_ENTER(&ipf_auth); + i = fr_authflush(); + RWLOCK_EXIT(&ipf_auth); + SPL_X(s); + error = copyoutptr((char *)&i, data, sizeof(i)); + break; + + case SIOCAUTHW: +fr_authioctlloop: + error = fr_inobj(data, au, IPFOBJ_FRAUTH); + READ_ENTER(&ipf_auth); + if ((fr_authnext != fr_authend) && fr_authpkts[fr_authnext]) { + error = fr_outobj(data, &fr_auth[fr_authnext], + IPFOBJ_FRAUTH); + if (auth.fra_len != 0 && auth.fra_buf != NULL) { + /* + * Copy packet contents out to user space if + * requested. Bail on an error. + */ + m = fr_authpkts[fr_authnext]; + len = MSGDSIZE(m); + if (len > auth.fra_len) + len = auth.fra_len; + auth.fra_len = len; + for (t = auth.fra_buf; m && (len > 0); ) { + i = MIN(M_LEN(m), len); + error = copyoutptr(MTOD(m, char *), + t, i); + len -= i; + t += i; + if (error != 0) + break; + } + } + RWLOCK_EXIT(&ipf_auth); + if (error != 0) + break; + SPL_NET(s); + WRITE_ENTER(&ipf_auth); + fr_authnext++; + if (fr_authnext == fr_authsize) + fr_authnext = 0; + RWLOCK_EXIT(&ipf_auth); + SPL_X(s); + return 0; + } + RWLOCK_EXIT(&ipf_auth); + /* + * We exit ipf_global here because a program that enters in + * here will have a lock on it and goto sleep having this lock. + * If someone were to do an 'ipf -D' the system would then + * deadlock. The catch with releasing it here is that the + * caller of this function expects it to be held when we + * return so we have to reacquire it in here. + */ + RWLOCK_EXIT(&ipf_global); + + MUTEX_ENTER(&ipf_authmx); +#ifdef _KERNEL +# if SOLARIS + error = 0; + if (!cv_wait_sig(&ipfauthwait, &ipf_authmx.ipf_lk)) + error = EINTR; +# else /* SOLARIS */ +# ifdef __hpux + { + lock_t *l; + + l = get_sleep_lock(&fr_authnext); + error = sleep(&fr_authnext, PZERO+1); + spinunlock(l); + } +# else +# ifdef __osf__ + error = mpsleep(&fr_authnext, PSUSP|PCATCH, "fr_authnext", 0, + &ipf_authmx, MS_LOCK_SIMPLE); +# else + error = SLEEP(&fr_authnext, "fr_authnext"); +# endif /* __osf__ */ +# endif /* __hpux */ +# endif /* SOLARIS */ +#endif + MUTEX_EXIT(&ipf_authmx); + READ_ENTER(&ipf_global); + if (error == 0) { + READ_ENTER(&ipf_auth); + goto fr_authioctlloop; + } + break; + + case SIOCAUTHR: + error = fr_inobj(data, &auth, IPFOBJ_FRAUTH); + if (error != 0) + return error; + SPL_NET(s); + WRITE_ENTER(&ipf_auth); + i = au->fra_index; + fra = fr_auth + i; + if ((i < 0) || (i >= fr_authsize) || + (fra->fra_info.fin_id != au->fra_info.fin_id)) { + RWLOCK_EXIT(&ipf_auth); + SPL_X(s); + return ESRCH; + } + m = fr_authpkts[i]; + fra->fra_index = -2; + fra->fra_pass = au->fra_pass; + fr_authpkts[i] = NULL; + RWLOCK_EXIT(&ipf_auth); +#ifdef _KERNEL + if ((m != NULL) && (au->fra_info.fin_out != 0)) { +# ifdef MENTAT + error = !putq(fra->fra_q, m); +# else /* MENTAT */ +# if defined(linux) || defined(AIX) +# else +# if (_BSDI_VERSION >= 199802) || defined(__OpenBSD__) || \ + (defined(__sgi) && (IRIX >= 60500) || defined(AIX) || \ + (defined(__FreeBSD__) && (__FreeBSD_version >= 470102))) + error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, + NULL); +# else + error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL); +# endif +# endif /* Linux */ +# endif /* MENTAT */ + if (error != 0) + fr_authstats.fas_sendfail++; + else + fr_authstats.fas_sendok++; + } else if (m) { +# ifdef MENTAT + error = !putq(fra->fra_q, m); +# else /* MENTAT */ +# if defined(linux) || defined(AIX) +# else +# if (__FreeBSD_version >= 501000) + netisr_dispatch(NETISR_IP, m); +# else +# if (IRIX >= 60516) + ifq = &((struct ifnet *)fra->fra_info.fin_ifp)->if_snd; +# else + ifq = &ipintrq; +# endif + if (IF_QFULL(ifq)) { + IF_DROP(ifq); + FREE_MB_T(m); + error = ENOBUFS; + } else { + IF_ENQUEUE(ifq, m); +# if IRIX < 60500 + schednetisr(NETISR_IP); +# endif + } +# endif +# endif /* Linux */ +# endif /* MENTAT */ + if (error != 0) + fr_authstats.fas_quefail++; + else + fr_authstats.fas_queok++; + } else + error = EINVAL; +# ifdef MENTAT + if (error != 0) + error = EINVAL; +# else /* MENTAT */ + /* + * If we experience an error which will result in the packet + * not being processed, make sure we advance to the next one. + */ + if (error == ENOBUFS) { + fr_authused--; + fra->fra_index = -1; + fra->fra_pass = 0; + if (i == fr_authstart) { + while (fra->fra_index == -1) { + i++; + if (i == fr_authsize) + i = 0; + fr_authstart = i; + if (i == fr_authend) + break; + } + if (fr_authstart == fr_authend) { + fr_authnext = 0; + fr_authstart = fr_authend = 0; + } + } + } +# endif /* MENTAT */ +#endif /* _KERNEL */ + SPL_X(s); + break; + + default : + error = EINVAL; + break; + } + return error; +} + + +/* + * Free all network buffer memory used to keep saved packets. + */ +void fr_authunload() +{ + register int i; + register frauthent_t *fae, **faep; + frentry_t *fr, **frp; + mb_t *m; + + if (fr_auth != NULL) { + KFREES(fr_auth, fr_authsize * sizeof(*fr_auth)); + fr_auth = NULL; + } + + if (fr_authpkts != NULL) { + for (i = 0; i < fr_authsize; i++) { + m = fr_authpkts[i]; + if (m != NULL) { + FREE_MB_T(m); + fr_authpkts[i] = NULL; + } + } + KFREES(fr_authpkts, fr_authsize * sizeof(*fr_authpkts)); + fr_authpkts = NULL; + } + + faep = &fae_list; + while ((fae = *faep) != NULL) { + *faep = fae->fae_next; + KFREE(fae); + } + ipauth = NULL; + + if (fr_authlist != NULL) { + for (frp = &fr_authlist; ((fr = *frp) != NULL); ) { + if (fr->fr_ref == 1) { + *frp = fr->fr_next; + KFREE(fr); + } else + frp = &fr->fr_next; + } + } + + if (fr_auth_init == 1) { +# if SOLARIS && defined(_KERNEL) + cv_destroy(&ipfauthwait); +# endif + MUTEX_DESTROY(&ipf_authmx); + RW_DESTROY(&ipf_auth); + + fr_auth_init = 0; + } +} + + +/* + * Slowly expire held auth records. Timeouts are set + * in expectation of this being called twice per second. + */ +void fr_authexpire() +{ + register int i; + register frauth_t *fra; + register frauthent_t *fae, **faep; + register frentry_t *fr, **frp; + mb_t *m; + SPL_INT(s); + + if (fr_auth_lock) + return; + + SPL_NET(s); + WRITE_ENTER(&ipf_auth); + for (i = 0, fra = fr_auth; i < fr_authsize; i++, fra++) { + fra->fra_age--; + if ((fra->fra_age == 0) && (m = fr_authpkts[i])) { + FREE_MB_T(m); + fr_authpkts[i] = NULL; + fr_auth[i].fra_index = -1; + fr_authstats.fas_expire++; + fr_authused--; + } + } + + for (faep = &fae_list; ((fae = *faep) != NULL); ) { + fae->fae_age--; + if (fae->fae_age == 0) { + *faep = fae->fae_next; + KFREE(fae); + fr_authstats.fas_expire++; + } else + faep = &fae->fae_next; + } + if (fae_list != NULL) + ipauth = &fae_list->fae_fr; + else + ipauth = NULL; + + for (frp = &fr_authlist; ((fr = *frp) != NULL); ) { + if (fr->fr_ref == 1) { + *frp = fr->fr_next; + KFREE(fr); + } else + frp = &fr->fr_next; + } + RWLOCK_EXIT(&ipf_auth); + SPL_X(s); +} + +int fr_preauthcmd(cmd, fr, frptr) +ioctlcmd_t cmd; +frentry_t *fr, **frptr; +{ + frauthent_t *fae, **faep; + int error = 0; + SPL_INT(s); + + if ((cmd != SIOCADAFR) && (cmd != SIOCRMAFR)) + return EIO; + + for (faep = &fae_list; ((fae = *faep) != NULL); ) { + if (&fae->fae_fr == fr) + break; + else + faep = &fae->fae_next; + } + + if (cmd == (ioctlcmd_t)SIOCRMAFR) { + if (fr == NULL || frptr == NULL) + error = EINVAL; + else if (fae == NULL) + error = ESRCH; + else { + SPL_NET(s); + WRITE_ENTER(&ipf_auth); + *faep = fae->fae_next; + if (ipauth == &fae->fae_fr) + ipauth = fae_list ? &fae_list->fae_fr : NULL; + RWLOCK_EXIT(&ipf_auth); + SPL_X(s); + + KFREE(fae); + } + } else if (fr != NULL && frptr != NULL) { + KMALLOC(fae, frauthent_t *); + if (fae != NULL) { + bcopy((char *)fr, (char *)&fae->fae_fr, + sizeof(*fr)); + SPL_NET(s); + WRITE_ENTER(&ipf_auth); + fae->fae_age = fr_defaultauthage; + fae->fae_fr.fr_hits = 0; + fae->fae_fr.fr_next = *frptr; + *frptr = &fae->fae_fr; + fae->fae_next = *faep; + *faep = fae; + ipauth = &fae_list->fae_fr; + RWLOCK_EXIT(&ipf_auth); + SPL_X(s); + } else + error = ENOMEM; + } else + error = EINVAL; + return error; +} + + +/* + * Flush held packets. + * Must already be properly SPL'ed and Locked on &ipf_auth. + * + */ +int fr_authflush() +{ + register int i, num_flushed; + mb_t *m; + + if (fr_auth_lock) + return -1; + + num_flushed = 0; + + for (i = 0 ; i < fr_authsize; i++) { + m = fr_authpkts[i]; + if (m != NULL) { + FREE_MB_T(m); + fr_authpkts[i] = NULL; + fr_auth[i].fra_index = -1; + /* perhaps add & use a flush counter inst.*/ + fr_authstats.fas_expire++; + fr_authused--; + num_flushed++; + } + } + + fr_authstart = 0; + fr_authend = 0; + fr_authnext = 0; + + return num_flushed; +} diff --git a/usr/src/uts/common/inet/ipf/ip_fil_solaris.c b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c new file mode 100644 index 0000000000..287cca861e --- /dev/null +++ b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c @@ -0,0 +1,1612 @@ +/* + * Copyright (C) 1993-2001, 2003 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#if !defined(lint) +static const char sccsid[] = "%W% %G% (C) 1993-2000 Darren Reed"; +static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $"; +#endif + +#include <sys/types.h> +#include <sys/errno.h> +#include <sys/param.h> +#include <sys/cpuvar.h> +#include <sys/open.h> +#include <sys/ioctl.h> +#include <sys/filio.h> +#include <sys/systm.h> +#include <sys/strsubr.h> +#include <sys/cred.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/ksynch.h> +#include <sys/kmem.h> +#include <sys/mkdev.h> +#include <sys/protosw.h> +#include <sys/socket.h> +#include <sys/dditypes.h> +#include <sys/cmn_err.h> +#include <net/if.h> +#include <net/af.h> +#include <net/route.h> +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/ip_var.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <netinet/tcpip.h> +#include <netinet/ip_icmp.h> +#include "netinet/ip_compat.h" +#ifdef USE_INET6 +# include <netinet/icmp6.h> +#endif +#include "netinet/ip_fil.h" +#include "netinet/ip_nat.h" +#include "netinet/ip_frag.h" +#include "netinet/ip_state.h" +#include "netinet/ip_auth.h" +#include "netinet/ip_proxy.h" +#ifdef IPFILTER_LOOKUP +# include "netinet/ip_lookup.h" +#endif +#include <inet/ip_ire.h> + +#include <sys/md5.h> + +extern int fr_flags, fr_active; +#if SOLARIS2 >= 7 +timeout_id_t fr_timer_id; +#else +int fr_timer_id; +#endif + + +static int fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp)); + +ipfmutex_t ipl_mutex, ipf_authmx, ipf_rw, ipf_stinsert; +ipfmutex_t ipf_nat_new, ipf_natio, ipf_timeoutlock; +ipfrwlock_t ipf_mutex, ipf_global, ipf_ipidfrag, ipf_frcache; +ipfrwlock_t ipf_frag, ipf_state, ipf_nat, ipf_natfrag, ipf_auth; +kcondvar_t iplwait, ipfauthwait; +#if SOLARIS2 < 10 +#if SOLARIS2 >= 7 +timeout_id_t fr_timer_id; +u_int *ip_ttl_ptr = NULL; +u_int *ip_mtudisc = NULL; +# if SOLARIS2 >= 8 +int *ip_forwarding = NULL; +u_int *ip6_forwarding = NULL; +# else +u_int *ip_forwarding = NULL; +# endif +#else +int fr_timer_id; +u_long *ip_ttl_ptr = NULL; +u_long *ip_mtudisc = NULL; +u_long *ip_forwarding = NULL; +#endif +#endif +int ipf_locks_done = 0; + + +/* ------------------------------------------------------------------------ */ +/* Function: ipldetach */ +/* Returns: int - 0 == success, else error. */ +/* Parameters: Nil */ +/* */ +/* This function is responsible for undoing anything that might have been */ +/* done in a call to iplattach(). It must be able to clean up from a call */ +/* to iplattach() that did not succeed. Why might that happen? Someone */ +/* configures a table to be so large that we cannot allocate enough memory */ +/* for it. */ +/* ------------------------------------------------------------------------ */ +int ipldetach() +{ + + ASSERT(rw_read_locked(&ipf_global.ipf_lk) == 0); + +#if SOLARIS2 < 10 + + if (fr_control_forwarding & 2) { + if (ip_forwarding != NULL) + *ip_forwarding = 0; +#if SOLARIS2 >= 8 + if (ip6_forwarding != NULL) + *ip6_forwarding = 0; +#endif + } +#endif + +#ifdef IPFDEBUG + cmn_err(CE_CONT, "ipldetach()\n"); +#endif + + fr_deinitialise(); + + (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE); + (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE); + + if (ipf_locks_done == 1) { + MUTEX_DESTROY(&ipf_timeoutlock); + MUTEX_DESTROY(&ipf_rw); + RW_DESTROY(&ipf_ipidfrag); + ipf_locks_done = 0; + } + return 0; +} + + +int iplattach __P((void)) +{ +#if SOLARIS2 < 10 + int i; +#endif + +#ifdef IPFDEBUG + cmn_err(CE_CONT, "iplattach()\n"); +#endif + + ASSERT(rw_read_locked(&ipf_global.ipf_lk) == 0); + + bzero((char *)frcache, sizeof(frcache)); + MUTEX_INIT(&ipf_rw, "ipf rw mutex"); + MUTEX_INIT(&ipf_timeoutlock, "ipf timeout lock mutex"); + RWLOCK_INIT(&ipf_ipidfrag, "ipf IP NAT-Frag rwlock"); + ipf_locks_done = 1; + + if (fr_initialise() < 0) + return -1; + +/* Do not use private interface ip_params_arr[] in Solaris 10 */ +#if SOLARIS2 < 10 + +#if SOLARIS2 >= 8 + ip_forwarding = &ip_g_forward; +#endif + /* + * XXX - There is no terminator for this array, so it is not possible + * to tell if what we are looking for is missing and go off the end + * of the array. + */ + +#if SOLARIS2 <= 8 + for (i = 0; ; i++) { + if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) { + ip_ttl_ptr = &ip_param_arr[i].ip_param_value; + } else if (!strcmp(ip_param_arr[i].ip_param_name, + "ip_path_mtu_discovery")) { + ip_mtudisc = &ip_param_arr[i].ip_param_value; + } +#if SOLARIS2 < 8 + else if (!strcmp(ip_param_arr[i].ip_param_name, + "ip_forwarding")) { + ip_forwarding = &ip_param_arr[i].ip_param_value; + } +#else + else if (!strcmp(ip_param_arr[i].ip_param_name, + "ip6_forwarding")) { + ip6_forwarding = &ip_param_arr[i].ip_param_value; + } +#endif + + if (ip_mtudisc != NULL && ip_ttl_ptr != NULL && +#if SOLARIS2 >= 8 + ip6_forwarding != NULL && +#endif + ip_forwarding != NULL) + break; + } +#endif + + if (fr_control_forwarding & 1) { + if (ip_forwarding != NULL) + *ip_forwarding = 1; +#if SOLARIS2 >= 8 + if (ip6_forwarding != NULL) + *ip6_forwarding = 1; +#endif + } + +#endif + + return 0; +} + + +/* + * Filter ioctl interface. + */ +/*ARGSUSED*/ +int iplioctl(dev, cmd, data, mode, cp, rp) +dev_t dev; +int cmd; +#if SOLARIS2 >= 7 +intptr_t data; +#else +int *data; +#endif +int mode; +cred_t *cp; +int *rp; +{ + int error = 0, tmp; + friostat_t fio; + minor_t unit; + u_int enable; + +#ifdef IPFDEBUG + cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n", + dev, cmd, data, mode, cp, rp); +#endif + unit = getminor(dev); + if (IPL_LOGMAX < unit) + return ENXIO; + + if (fr_running <= 0) { + if (unit != IPL_LOGIPF) + return EIO; + if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET && + cmd != SIOCIPFSET && cmd != SIOCFRENB && + cmd != SIOCGETFS && cmd != SIOCGETFF) + return EIO; + } + + READ_ENTER(&ipf_global); + + error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode); + if (error != -1) { + RWLOCK_EXIT(&ipf_global); + return error; + } + error = 0; + + switch (cmd) + { + case SIOCFRENB : + if (!(mode & FWRITE)) + error = EPERM; + else { + error = COPYIN((caddr_t)data, (caddr_t)&enable, + sizeof(enable)); + if (error != 0) { + error = EFAULT; + break; + } + + RWLOCK_EXIT(&ipf_global); + WRITE_ENTER(&ipf_global); + if (enable) { + if (fr_running > 0) + error = 0; + else + error = iplattach(); + if (error == 0) + fr_running = 1; + else + (void) ipldetach(); + } else { + error = ipldetach(); + if (error == 0) + fr_running = -1; + } + } + break; + case SIOCIPFSET : + if (!(mode & FWRITE)) { + error = EPERM; + break; + } + /* FALLTHRU */ + case SIOCIPFGETNEXT : + case SIOCIPFGET : + error = fr_ipftune(cmd, (void *)data); + break; + case SIOCSETFF : + if (!(mode & FWRITE)) + error = EPERM; + else { + error = COPYIN((caddr_t)data, (caddr_t)&fr_flags, + sizeof(fr_flags)); + if (error != 0) + error = EFAULT; + } + break; + case SIOCGETFF : + error = COPYOUT((caddr_t)&fr_flags, (caddr_t)data, + sizeof(fr_flags)); + if (error != 0) + error = EFAULT; + break; + case SIOCFUNCL : + error = fr_resolvefunc((void *)data); + break; + case SIOCINAFR : + case SIOCRMAFR : + case SIOCADAFR : + case SIOCZRLST : + if (!(mode & FWRITE)) + error = EPERM; + else + error = frrequest(unit, cmd, (caddr_t)data, + fr_active, 1); + break; + case SIOCINIFR : + case SIOCRMIFR : + case SIOCADIFR : + if (!(mode & FWRITE)) + error = EPERM; + else + error = frrequest(unit, cmd, (caddr_t)data, + 1 - fr_active, 1); + break; + case SIOCSWAPA : + if (!(mode & FWRITE)) + error = EPERM; + else { + WRITE_ENTER(&ipf_mutex); + bzero((char *)frcache, sizeof(frcache[0]) * 2); + error = COPYOUT((caddr_t)&fr_active, (caddr_t)data, + sizeof(fr_active)); + if (error != 0) + error = EFAULT; + else + fr_active = 1 - fr_active; + RWLOCK_EXIT(&ipf_mutex); + } + break; + case SIOCGETFS : + fr_getstat(&fio); + error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT); + break; + case SIOCFRZST : + if (!(mode & FWRITE)) + error = EPERM; + else + error = fr_zerostats((caddr_t)data); + break; + case SIOCIPFFL : + if (!(mode & FWRITE)) + error = EPERM; + else { + error = COPYIN((caddr_t)data, (caddr_t)&tmp, + sizeof(tmp)); + if (!error) { + tmp = frflush(unit, 4, tmp); + error = COPYOUT((caddr_t)&tmp, (caddr_t)data, + sizeof(tmp)); + if (error != 0) + error = EFAULT; + } else + error = EFAULT; + } + break; +#ifdef USE_INET6 + case SIOCIPFL6 : + if (!(mode & FWRITE)) + error = EPERM; + else { + error = COPYIN((caddr_t)data, (caddr_t)&tmp, + sizeof(tmp)); + if (!error) { + tmp = frflush(unit, 6, tmp); + error = COPYOUT((caddr_t)&tmp, (caddr_t)data, + sizeof(tmp)); + if (error != 0) + error = EFAULT; + } else + error = EFAULT; + } + break; +#endif + case SIOCSTLCK : + error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp)); + if (error == 0) { + fr_state_lock = tmp; + fr_nat_lock = tmp; + fr_frag_lock = tmp; + fr_auth_lock = tmp; + } else + error = EFAULT; + break; +#ifdef IPFILTER_LOG + case SIOCIPFFB : + if (!(mode & FWRITE)) + error = EPERM; + else { + tmp = ipflog_clear(unit); + error = COPYOUT((caddr_t)&tmp, (caddr_t)data, + sizeof(tmp)); + if (error) + error = EFAULT; + } + break; +#endif /* IPFILTER_LOG */ + case SIOCFRSYN : + if (!(mode & FWRITE)) + error = EPERM; + else { + RWLOCK_EXIT(&ipf_global); + WRITE_ENTER(&ipf_global); + error = ipfsync(); + } + break; + case SIOCGFRST : + error = fr_outobj((void *)data, fr_fragstats(), + IPFOBJ_FRAGSTAT); + break; + case FIONREAD : +#ifdef IPFILTER_LOG + tmp = (int)iplused[IPL_LOGIPF]; + + error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp)); + if (error != 0) + error = EFAULT; +#endif + break; + default : + cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p", cmd, (void *)data); + error = EINVAL; + break; + } + RWLOCK_EXIT(&ipf_global); + return error; +} + + +void *get_unit(name, v) +char *name; +int v; +{ + qif_t *qf; + int sap; + + if (v == 4) + sap = 0x0800; + else if (v == 6) + sap = 0x86dd; + else + return NULL; + rw_enter(&pfil_rw, RW_READER); + qf = qif_iflookup(name, sap); + rw_exit(&pfil_rw); + return qf; +} + + +/* + * routines below for saving IP headers to buffer + */ +/*ARGSUSED*/ +int iplopen(devp, flags, otype, cred) +dev_t *devp; +int flags, otype; +cred_t *cred; +{ + minor_t min = getminor(*devp); + +#ifdef IPFDEBUG + cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred); +#endif + if (!(otype & OTYP_CHR)) + return ENXIO; + + min = (IPL_LOGMAX < min) ? ENXIO : 0; + return min; +} + + +/*ARGSUSED*/ +int iplclose(dev, flags, otype, cred) +dev_t dev; +int flags, otype; +cred_t *cred; +{ + minor_t min = getminor(dev); + +#ifdef IPFDEBUG + cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred); +#endif + + min = (IPL_LOGMAX < min) ? ENXIO : 0; + return min; +} + +#ifdef IPFILTER_LOG +/* + * iplread/ipllog + * both of these must operate with at least splnet() lest they be + * called during packet processing and cause an inconsistancy to appear in + * the filter lists. + */ +/*ARGSUSED*/ +int iplread(dev, uio, cp) +dev_t dev; +register struct uio *uio; +cred_t *cp; +{ +# ifdef IPFDEBUG + cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp); +# endif +# ifdef IPFILTER_SYNC + if (getminor(dev) == IPL_LOGSYNC) + return ipfsync_read(uio); +# endif + + return ipflog_read(getminor(dev), uio); +} +#endif /* IPFILTER_LOG */ + + +/* + * iplread/ipllog + * both of these must operate with at least splnet() lest they be + * called during packet processing and cause an inconsistancy to appear in + * the filter lists. + */ +int iplwrite(dev, uio, cp) +dev_t dev; +register struct uio *uio; +cred_t *cp; +{ +#ifdef IPFDEBUG + cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp); +#endif +#ifdef IPFILTER_SYNC + if (getminor(dev) == IPL_LOGSYNC) + return ipfsync_write(uio); +#endif /* IPFILTER_SYNC */ + dev = dev; /* LINT */ + uio = uio; /* LINT */ + cp = cp; /* LINT */ + return ENXIO; +} + + +/* + * fr_send_reset - this could conceivably be a call to tcp_respond(), but that + * requires a large amount of setting up and isn't any more efficient. + */ +int fr_send_reset(fin) +fr_info_t *fin; +{ + tcphdr_t *tcp, *tcp2; + int tlen, hlen; + mblk_t *m; +#ifdef USE_INET6 + ip6_t *ip6; +#endif + ip_t *ip; + + tcp = fin->fin_dp; + if (tcp->th_flags & TH_RST) + return -1; + +#ifndef IPFILTER_CKSUM + if (fr_checkl4sum(fin) == -1) + return -1; +#endif + + tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0; +#ifdef USE_INET6 + if (fin->fin_v == 6) + hlen = sizeof(ip6_t); + else +#endif + hlen = sizeof(ip_t); + hlen += sizeof(*tcp2); + if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL) + return -1; + + m->b_rptr += 64; + MTYPE(m) = M_DATA; + m->b_wptr = m->b_rptr + hlen; + ip = (ip_t *)m->b_rptr; + bzero((char *)ip, hlen); + tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2)); + tcp2->th_dport = tcp->th_sport; + tcp2->th_sport = tcp->th_dport; + if (tcp->th_flags & TH_ACK) { + tcp2->th_seq = tcp->th_ack; + tcp2->th_flags = TH_RST; + } else { + tcp2->th_ack = ntohl(tcp->th_seq); + tcp2->th_ack += tlen; + tcp2->th_ack = htonl(tcp2->th_ack); + tcp2->th_flags = TH_RST|TH_ACK; + } + tcp2->th_off = sizeof(struct tcphdr) >> 2; + + ip->ip_v = fin->fin_v; +#ifdef USE_INET6 + if (fin->fin_v == 6) { + ip6 = (ip6_t *)m->b_rptr; + ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow; + ip6->ip6_src = fin->fin_dst6; + ip6->ip6_dst = fin->fin_src6; + ip6->ip6_plen = htons(sizeof(*tcp)); + ip6->ip6_nxt = IPPROTO_TCP; + tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2); + } else +#endif + { + ip->ip_src.s_addr = fin->fin_daddr; + ip->ip_dst.s_addr = fin->fin_saddr; + ip->ip_id = fr_nextipid(fin); + ip->ip_hl = sizeof(*ip) >> 2; + ip->ip_p = IPPROTO_TCP; + ip->ip_len = sizeof(*ip) + sizeof(*tcp); + ip->ip_tos = fin->fin_ip->ip_tos; + tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2); + } + return fr_send_ip(fin, m, &m); +} + +/* + * Function: fr_send_ip + * Returns: 0: success + * -1: failed + * Parameters: + * fin: packet information + * m: the message block where ip head starts + * + * Send a new packet through the IP stack. + * + * For IPv4 packets, ip_len must be in host byte order, and ip_v, + * ip_ttl, ip_off, and ip_sum are ignored (filled in by this + * function). + * + * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled + * in by this function. + * + * All other portions of the packet must be in on-the-wire format. + */ +/*ARGSUSED*/ +static int fr_send_ip(fin, m, mpp) +fr_info_t *fin; +mblk_t *m, **mpp; +{ + qpktinfo_t qpi, *qpip; + fr_info_t fnew; + qif_t *qif; + ip_t *ip; + int i, hlen; + + ip = (ip_t *)m->b_rptr; + bzero((char *)&fnew, sizeof(fnew)); + +#ifdef USE_INET6 + if (fin->fin_v == 6) { + ip6_t *ip6; + + ip6 = (ip6_t *)ip; + ip6->ip6_vfc = 0x60; + ip6->ip6_hlim = 127; + fnew.fin_v = 6; + hlen = sizeof(*ip6); + } else +#endif + { + fnew.fin_v = 4; +#if SOLARIS2 >= 10 + ip->ip_ttl = 255; + + ip->ip_off = htons(IP_DF); +#else + if (ip_ttl_ptr != NULL) + ip->ip_ttl = (u_char)(*ip_ttl_ptr); + else + ip->ip_ttl = 63; + if (ip_mtudisc != NULL) + ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0); + else + ip->ip_off = htons(IP_DF); +#endif + /* + * The dance with byte order and ip_len/ip_off is because in + * fr_fastroute, it expects them to be in host byte order but + * ipf_cksum expects them to be in network byte order. + */ + ip->ip_len = htons(ip->ip_len); + ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip)); + ip->ip_len = ntohs(ip->ip_len); + ip->ip_off = ntohs(ip->ip_off); + hlen = sizeof(*ip); + } + + qpip = fin->fin_qpi; + qpi.qpi_q = qpip->qpi_q; + qpi.qpi_off = 0; + qpi.qpi_name = qpip->qpi_name; + qif = qpip->qpi_real; + qpi.qpi_real = qif; + qpi.qpi_ill = qif->qf_ill; + qpi.qpi_hl = qif->qf_hl; + qpi.qpi_ppa = qif->qf_ppa; + qpi.qpi_num = qif->qf_num; + qpi.qpi_flags = qif->qf_flags; + qpi.qpi_max_frag = qif->qf_max_frag; + qpi.qpi_m = m; + qpi.qpi_data = ip; + fnew.fin_qpi = &qpi; + fnew.fin_ifp = fin->fin_ifp; + fnew.fin_flx = FI_NOCKSUM; + fnew.fin_m = m; + fnew.fin_ip = ip; + fnew.fin_mp = mpp; + fnew.fin_hlen = hlen; + fnew.fin_dp = (char *)ip + hlen; + (void) fr_makefrip(hlen, ip, &fnew); + + i = fr_fastroute(m, mpp, &fnew, NULL); + return i; +} + + +int fr_send_icmp_err(type, fin, dst) +int type; +fr_info_t *fin; +int dst; +{ + struct in_addr dst4; + struct icmp *icmp; + qpktinfo_t *qpi; + int hlen, code; + u_short sz; +#ifdef USE_INET6 + mblk_t *mb; +#endif + mblk_t *m; +#ifdef USE_INET6 + ip6_t *ip6; +#endif + ip_t *ip; + + if ((type < 0) || (type > ICMP_MAXTYPE)) + return -1; + + code = fin->fin_icode; +#ifdef USE_INET6 + if ((code < 0) || (code > sizeof(icmptoicmp6unreach)/sizeof(int))) + return -1; +#endif + +#ifndef IPFILTER_CKSUM + if (fr_checkl4sum(fin) == -1) + return -1; +#endif + + qpi = fin->fin_qpi; + +#ifdef USE_INET6 + mb = fin->fin_qfm; + + if (fin->fin_v == 6) { + sz = sizeof(ip6_t); + sz += MIN(mb->b_wptr - mb->b_rptr, 512); + hlen = sizeof(ip6_t); + type = icmptoicmp6types[type]; + if (type == ICMP6_DST_UNREACH) + code = icmptoicmp6unreach[code]; + } else +#endif + { + if ((fin->fin_p == IPPROTO_ICMP) && + !(fin->fin_flx & FI_SHORT)) + switch (ntohs(fin->fin_data[0]) >> 8) + { + case ICMP_ECHO : + case ICMP_TSTAMP : + case ICMP_IREQ : + case ICMP_MASKREQ : + break; + default : + return 0; + } + + sz = sizeof(ip_t) * 2; + sz += 8; /* 64 bits of data */ + hlen = sizeof(ip_t); + } + + sz += offsetof(struct icmp, icmp_ip); + if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL) + return -1; + MTYPE(m) = M_DATA; + m->b_rptr += 64; + m->b_wptr = m->b_rptr + sz; + bzero((char *)m->b_rptr, (size_t)sz); + ip = (ip_t *)m->b_rptr; + ip->ip_v = fin->fin_v; + icmp = (struct icmp *)(m->b_rptr + hlen); + icmp->icmp_type = type & 0xff; + icmp->icmp_code = code & 0xff; +#ifdef icmp_nextmtu + if (type == ICMP_UNREACH && (qpi->qpi_max_frag != 0) && + fin->fin_icode == ICMP_UNREACH_NEEDFRAG) + icmp->icmp_nextmtu = htons(qpi->qpi_max_frag); +#endif + +#ifdef USE_INET6 + if (fin->fin_v == 6) { + struct in6_addr dst6; + int csz; + + if (dst == 0) { + if (fr_ifpaddr(6, FRI_NORMAL, qpi->qpi_real, + (struct in_addr *)&dst6, NULL) == -1) { + FREE_MB_T(m); + return -1; + } + } else + dst6 = fin->fin_dst6; + + csz = sz; + sz -= sizeof(ip6_t); + ip6 = (ip6_t *)m->b_rptr; + ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow; + ip6->ip6_plen = htons((u_short)sz); + ip6->ip6_nxt = IPPROTO_ICMPV6; + ip6->ip6_src = dst6; + ip6->ip6_dst = fin->fin_src6; + sz -= offsetof(struct icmp, icmp_ip); + bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz); + icmp->icmp_cksum = csz - sizeof(ip6_t); + } else +#endif + { + ip->ip_hl = sizeof(*ip) >> 2; + ip->ip_p = IPPROTO_ICMP; + ip->ip_id = fin->fin_ip->ip_id; + ip->ip_tos = fin->fin_ip->ip_tos; + ip->ip_len = (u_short)sz; + if (dst == 0) { + if (fr_ifpaddr(4, FRI_NORMAL, qpi->qpi_real, + &dst4, NULL) == -1) { + FREE_MB_T(m); + return -1; + } + } else + dst4 = fin->fin_dst; + ip->ip_src = dst4; + ip->ip_dst = fin->fin_src; + bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip, + sizeof(*fin->fin_ip)); + bcopy((char *)fin->fin_ip + fin->fin_hlen, + (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8); + icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len); + icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off); + icmp->icmp_cksum = ipf_cksum((u_short *)icmp, + sz - sizeof(ip_t)); + } + + /* + * Need to exit out of these so we don't recursively call rw_enter + * from fr_qout. + */ + return fr_send_ip(fin, m, &m); +} + +#ifdef IRE_ILL_CN +#include <sys/time.h> +#include <sys/varargs.h> + +#ifndef _KERNEL +#include <stdio.h> +#endif + +#define NULLADDR_RATE_LIMIT 10 /* 10 seconds */ + + +/* + * Print out warning message at rate-limited speed. + */ +static void rate_limit_message(int rate, const char *message, ...) +{ + static time_t last_time = 0; + time_t now; + va_list args; + char msg_buf[256]; + int need_printed = 0; + + now = ddi_get_time(); + + /* make sure, no multiple entries */ + ASSERT(MUTEX_NOT_HELD(&(ipf_rw.ipf_lk))); + MUTEX_ENTER(&ipf_rw); + if (now - last_time >= rate) { + need_printed = 1; + last_time = now; + } + MUTEX_EXIT(&ipf_rw); + + if (need_printed) { + va_start(args, message); + (void)vsnprintf(msg_buf, 255, message, args); + va_end(args); +#ifdef _KERNEL + cmn_err(CE_WARN, msg_buf); +#else + fprintf(std_err, msg_buf); +#endif + } +} +#endif + +/* + * return the first IP Address associated with an interface + */ +/*ARGSUSED*/ +int fr_ifpaddr(v, atype, qifptr, inp, inpmask) +int v, atype; +void *qifptr; +struct in_addr *inp, *inpmask; +{ +#ifdef USE_INET6 + struct sockaddr_in6 sin6, mask6; +#endif + struct sockaddr_in sin, mask; + qif_t *qif; + +#ifdef USE_INET6 +#ifdef IRE_ILL_CN + s_ill_t *ill; +#endif +#endif + if ((qifptr == NULL) || (qifptr == (void *)-1)) + return -1; + + qif = qifptr; + +#ifdef USE_INET6 +#ifdef IRE_ILL_CN + ill = qif->qf_ill; +#endif +#endif + +#ifdef USE_INET6 + if (v == 6) { +#ifndef IRE_ILL_CN + in6_addr_t *inp6; + ipif_t *ipif; + ill_t *ill; + + ill = qif->qf_ill; + + /* + * First is always link local. + */ + for (ipif = ill->ill_ipif; ipif; ipif = ipif->ipif_next) { + inp6 = &ipif->ipif_v6lcl_addr; + if (!IN6_IS_ADDR_LINKLOCAL(inp6) && + !IN6_IS_ADDR_LOOPBACK(inp6)) + break; + } + if (ipif == NULL) + return -1; + + mask6.sin6_addr = ipif->ipif_v6net_mask; + if (atype == FRI_BROADCAST) + sin6.sin6_addr = ipif->ipif_v6brd_addr; + else if (atype == FRI_PEERADDR) + sin6.sin6_addr = ipif->ipif_v6pp_dst_addr; + else + sin6.sin6_addr = *inp6; +#else /* IRE_ILL_CN */ + if (IN6_IS_ADDR_UNSPECIFIED(&ill->netmask.in6.sin6_addr) || + IN6_IS_ADDR_UNSPECIFIED(&ill->localaddr.in6.sin6_addr)) { + rate_limit_message(NULLADDR_RATE_LIMIT, + "Check pfild is running: IP#/netmask is 0 on %s.\n", + ill->ill_name); + return -1; + } + mask6 = ill->netmask.in6; + if (atype == FRI_BROADCAST) + sin6 = ill->broadaddr.in6; + else if (atype == FRI_PEERADDR) + sin6 = ill->dstaddr.in6; + else + sin6 = ill->localaddr.in6; +#endif /* IRE_ILL_CN */ + return fr_ifpfillv6addr(atype, &sin6, &mask6, inp, inpmask); + } +#endif + +#ifndef IRE_ILL_CN + + switch (atype) + { + case FRI_BROADCAST : + sin.sin_addr.s_addr = QF_V4_BROADCAST(qif); + break; + case FRI_PEERADDR : + sin.sin_addr.s_addr = QF_V4_PEERADDR(qif); + break; + default : + sin.sin_addr.s_addr = QF_V4_ADDR(qif); + break; + } + mask.sin_addr.s_addr = QF_V4_NETMASK(qif); + +#else + if (ill->netmask.in.sin_addr.s_addr == 0 || + ill->localaddr.in.sin_addr.s_addr == 0) { + rate_limit_message(NULLADDR_RATE_LIMIT, + "Check pfild is running: IP#/netmask is 0 on %s.\n", + ill->ill_name); + return -1; + } + mask = ill->netmask.in; + if (atype == FRI_BROADCAST) + sin = ill->broadaddr.in; + else if (atype == FRI_PEERADDR) + sin = ill->dstaddr.in; + else + sin = ill->localaddr.in; +#endif /* IRE_ILL_CN */ + return fr_ifpfillv4addr(atype, &sin, &mask, inp, inpmask); +} + + +u_32_t fr_newisn(fin) +fr_info_t *fin; +{ + static int iss_seq_off = 0; + u_char hash[16]; + u_32_t newiss; + MD5_CTX ctx; + + /* + * Compute the base value of the ISS. It is a hash + * of (saddr, sport, daddr, dport, secret). + */ + MD5Init(&ctx); + + MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src, + sizeof(fin->fin_fi.fi_src)); + MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst, + sizeof(fin->fin_fi.fi_dst)); + MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat)); + + MD5Update(&ctx, ipf_iss_secret, sizeof(ipf_iss_secret)); + + MD5Final(hash, &ctx); + + bcopy(hash, &newiss, sizeof(newiss)); + + /* + * Now increment our "timer", and add it in to + * the computed value. + * + * XXX Use `addin'? + * XXX TCP_ISSINCR too large to use? + */ + iss_seq_off += 0x00010000; + newiss += iss_seq_off; + return newiss; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_nextipid */ +/* Returns: int - 0 == success, -1 == error (packet should be droppped) */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* Returns the next IPv4 ID to use for this packet. */ +/* ------------------------------------------------------------------------ */ +u_short fr_nextipid(fin) +fr_info_t *fin; +{ + static u_short ipid = 0; + ipstate_t *is; + nat_t *nat; + u_short id; + + MUTEX_ENTER(&ipf_rw); + if (fin->fin_state != NULL) { + is = fin->fin_state; + id = (u_short)(is->is_pkts[(fin->fin_rev << 1) + 1] & 0xffff); + } else if (fin->fin_nat != NULL) { + nat = fin->fin_nat; + id = (u_short)(nat->nat_pkts[fin->fin_out] & 0xffff); + } else + id = ipid++; + MUTEX_EXIT(&ipf_rw); + + return id; +} + + +#ifndef IPFILTER_CKSUM +/* ARGSUSED */ +#endif +INLINE void fr_checkv4sum(fin) +fr_info_t *fin; +{ +#ifdef IPFILTER_CKSUM + if (fr_checkl4sum(fin) == -1) + fin->fin_flx |= FI_BAD; +#endif +} + + +#ifdef USE_INET6 +# ifndef IPFILTER_CKSUM +/* ARGSUSED */ +# endif +INLINE void fr_checkv6sum(fin) +fr_info_t *fin; +{ +# ifdef IPFILTER_CKSUM + if (fr_checkl4sum(fin) == -1) + fin->fin_flx |= FI_BAD; +# endif +} +#endif /* USE_INET6 */ + + +/* + * Function: fr_verifysrc + * Returns: int (really boolean) + * Parameters: fin - packet information + * + * Check whether the packet has a valid source address for the interface on + * which the packet arrived, implementing the "fr_chksrc" feature. + * Returns true iff the packet's source address is valid. + * Pre-Solaris 10, we call into the routing code to make the determination. + * On Solaris 10 and later, we have a valid address set from pfild to check + * against. + */ +int fr_verifysrc(fin) +fr_info_t *fin; +{ + ire_t *dir; + int result; + +#if SOLARIS2 >= 6 + dir = ire_route_lookup(fin->fin_saddr, 0xffffffff, 0, 0, NULL, + NULL, NULL, NULL, MATCH_IRE_DSTONLY| + MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE); +#else + dir = ire_lookup(fin->fin_saddr); +#endif + + if (!dir) + return 0; + result = (ire_to_ill(dir) == fin->fin_ifp); +#if SOLARIS2 >= 8 + ire_refrele(dir); +#endif + return result; +} + + +#if (SOLARIS2 < 7) +void fr_slowtimer() +#else +/*ARGSUSED*/ +void fr_slowtimer __P((void *ptr)) +#endif +{ + + WRITE_ENTER(&ipf_global); + if (fr_running <= 0) { + if (fr_running == -1) + fr_timer_id = timeout(fr_slowtimer, NULL, + drv_usectohz(500000)); + else + fr_timer_id = NULL; + RWLOCK_EXIT(&ipf_global); + return; + } + MUTEX_DOWNGRADE(&ipf_global); + + fr_fragexpire(); + fr_timeoutstate(); + fr_natexpire(); + fr_authexpire(); + fr_ticks++; + if (fr_running == -1 || fr_running == 1) + fr_timer_id = timeout(fr_slowtimer, NULL, drv_usectohz(500000)); + else + fr_timer_id = NULL; + RWLOCK_EXIT(&ipf_global); +} + + +/* + * Function: fr_fastroute + * Returns: 0: success; + * -1: failed + * Parameters: + * mb: the message block where ip head starts + * mpp: the pointer to the pointer of the orignal + * packet message + * fin: packet information + * fdp: destination interface information + * if it is NULL, no interface information provided. + * + * This function is for fastroute/to/dup-to rules. It calls + * pfil_make_lay2_packet to search route, make lay-2 header + * ,and identify output queue for the IP packet. + * The destination address depends on the following conditions: + * 1: for fastroute rule, fdp is passed in as NULL, so the + * destination address is the IP Packet's destination address + * 2: for to/dup-to rule, if an ip address is specified after + * the interface name, this address is the as destination + * address. Otherwise IP Packet's destination address is used + */ +int fr_fastroute(mb, mpp, fin, fdp) +mblk_t *mb, **mpp; +fr_info_t *fin; +frdest_t *fdp; +{ + struct in_addr dst; +#ifndef IRE_ILL_CN + size_t hlen = 0; + ill_t *ifp; + ire_t *dir; + u_char *s; + frdest_t fd; +#ifdef USE_INET6 + ip6_t *ip6 = (ip6_t *)fin->fin_ip; +#endif +#else + void *target = NULL; + char *ifname = NULL; +#endif + queue_t *q = NULL; + mblk_t *mp = NULL; + qpktinfo_t *qpi; + frentry_t *fr; + qif_t *qif; + ip_t *ip; +#ifndef sparc + u_short __iplen, __ipoff; +#endif +#ifdef USE_INET6 + struct in6_addr dst6; +#endif +#ifndef IRE_ILL_CN + dir = NULL; +#endif + fr = fin->fin_fr; + ip = fin->fin_ip; + qpi = fin->fin_qpi; + + /* + * If this is a duplicate mblk then we want ip to point at that + * data, not the original, if and only if it is already pointing at + * the current mblk data. + */ + if (ip == (ip_t *)qpi->qpi_m->b_rptr && qpi->qpi_m != mb) + ip = (ip_t *)mb->b_rptr; + + /* + * If there is another M_PROTO, we don't want it + */ + if (*mpp != mb) { + mp = unlinkb(*mpp); + freeb(*mpp); + *mpp = mp; + } + +#ifdef IRE_ILL_CN + if (fdp != NULL) { +#else + /* + * If the fdp is NULL then there is no set route for this packet. + */ + if (fdp == NULL) { + qif = fin->fin_ifp; + + switch (fin->fin_v) + { + case 4 : + fd.fd_ip = ip->ip_dst; + break; +#ifdef USE_INET6 + case 6 : + fd.fd_ip6.in6 = ip6->ip6_dst; + break; +#endif + } + fdp = &fd; + } else { +#endif + qif = fdp->fd_ifp; + + if (qif == NULL || qif == (void *)-1) + goto bad_fastroute; + } + + /* + * In case we're here due to "to <if>" being used with + * "keep state", check that we're going in the correct + * direction. + */ + if ((fr != NULL) && (fin->fin_rev != 0)) { + if ((qif != NULL) && (fdp == &fr->fr_tif)) + return -1; + dst.s_addr = fin->fin_fi.fi_daddr; + } else { + if (fin->fin_v == 4) { + if (fdp && fdp->fd_ip.s_addr != 0) { + dst = fdp->fd_ip; +#ifdef IRE_ILL_CN + target = &dst; +#endif + } else + dst.s_addr = fin->fin_fi.fi_daddr; + } +#ifdef USE_INET6 + else if (fin->fin_v == 6) { + if (fdp && IP6_NOTZERO(&fdp->fd_ip)) { + dst6 = fdp->fd_ip6.in6; +#ifdef IRE_ILL_CN + target = &dst6; +#endif + } else + dst6 = fin->fin_dst6; + } +#endif + else + goto bad_fastroute; + } + +#ifndef IRE_ILL_CN +#if SOLARIS2 >= 6 + if (fin->fin_v == 4) { + dir = ire_route_lookup(dst.s_addr, 0xffffffff, 0, 0, NULL, + NULL, NULL, MATCH_IRE_DSTONLY| + MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE); + } +# ifdef USE_INET6 + else if (fin->fin_v == 6) { + dir = ire_route_lookup_v6(&ip6->ip6_dst, NULL, 0, 0, + NULL, NULL, NULL, MATCH_IRE_DSTONLY| + MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE); + } +# endif +#else + dir = ire_lookup(dst.s_addr); +#endif +#if SOLARIS2 < 8 + if (dir != NULL) + if (dir->ire_ll_hdr_mp == NULL || dir->ire_ll_hdr_length == 0) + dir = NULL; +#else + if (dir != NULL) + if (dir->ire_fp_mp == NULL || dir->ire_dlureq_mp == NULL) { + ire_refrele(dir); + dir = NULL; + } +#endif +#else /* IRE_ILL_CN */ + if (fdp && fdp->fd_ifname[0] != 0) + ifname = fdp->fd_ifname; + + DB_CKSUMFLAGS(mb) = 0; /* disable hardware checksum */ + mp = pfil_make_dl_packet(mb, ip, target, ifname, &q); + if (mp == NULL) + { + goto bad_fastroute; + } + mb = mp; +#endif /* IRE_ILL_CN */ + +#ifdef IRE_ILL_CN + if (mp != NULL) { +#else + if (dir != NULL) { +#if SOLARIS2 < 8 + mp = dir->ire_ll_hdr_mp; + hlen = dir->ire_ll_hdr_length; +#else + mp = dir->ire_fp_mp; + hlen = mp ? mp->b_wptr - mp->b_rptr : 0; + if (mp == NULL) + mp = dir->ire_dlureq_mp; +#endif +#endif + if (fin->fin_out == 0) { + void *saveqif; + u_32_t pass; + + saveqif = fin->fin_ifp; + fin->fin_ifp = qif; + fin->fin_out = 1; + (void)fr_acctpkt(fin, &pass); + fin->fin_fr = NULL; + if (!fr || !(fr->fr_flags & FR_RETMASK)) + (void) fr_checkstate(fin, &pass); + + switch (fr_checknatout(fin, NULL)) + { + /* FALLTHROUGH */ + case 0 : + case 1 : + break; + case -1 : + goto bad_fastroute; + } + + fin->fin_out = 0; + fin->fin_ifp = saveqif; + } +#ifndef sparc + if (fin->fin_v == 4) { + __iplen = (u_short)ip->ip_len, + __ipoff = (u_short)ip->ip_off; + + ip->ip_len = htons(__iplen); + ip->ip_off = htons(__ipoff); + } +#endif +#ifndef IRE_ILL_CN + ifp = qif->qf_ill; + + if (mp != NULL) { + s = mb->b_rptr; + if ( +#if (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC) + (dohwcksum && + ifp->ill_ick.ick_magic == ICK_M_CTL_MAGIC) || +#endif + (hlen && (s - mb->b_datap->db_base) >= hlen)) { + s -= hlen; + mb->b_rptr = (u_char *)s; + bcopy((char *)mp->b_rptr, (char *)s, hlen); + } else { + mblk_t *mp2; + + mp2 = copyb(mp); + if (mp2 == NULL) + goto bad_fastroute; + linkb(mp2, mb); + mb = mp2; + } + } + *mpp = mb; + + if (dir->ire_stq != NULL) + q = dir->ire_stq; + else if (dir->ire_rfq != NULL) + q = WR(dir->ire_rfq); + if (q != NULL) + q = q->q_next; + if (q != NULL) { + RWLOCK_EXIT(&ipf_global); +#if (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC) + if ((fin->fin_p == IPPROTO_TCP) && dohwcksum && + (ifp->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) { + tcphdr_t *tcp; + u_32_t t; + + tcp = (tcphdr_t *)((char *)ip + fin->fin_hlen); + t = ip->ip_src.s_addr; + t += ip->ip_dst.s_addr; + t += 30; + t = (t & 0xffff) + (t >> 16); + tcp->th_sum = t & 0xffff; + } +#endif + putnext(q, mb); + ATOMIC_INCL(fr_frouteok[0]); +#if SOLARIS2 >= 8 + ire_refrele(dir); +#endif + READ_ENTER(&ipf_global); + return 0; + } +#else /* IRE_ILL_CN */ + mb->b_queue = q; + *mpp = mb; + pfil_send_dl_packet(q, mb); + ATOMIC_INCL(fr_frouteok[0]); + return 0; +#endif /* IRE_ILL_CN */ + } +bad_fastroute: +#ifndef IRE_ILL_CN +#if SOLARIS2 >= 8 + if (dir != NULL) + ire_refrele(dir); +#endif +#endif + freemsg(mb); + ATOMIC_INCL(fr_frouteok[1]); + return -1; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_pullup */ +/* Returns: NULL == pullup failed, else pointer to protocol header */ +/* Parameters: m(I) - pointer to buffer where data packet starts */ +/* fin(I) - pointer to packet information */ +/* len(I) - number of bytes to pullup */ +/* */ +/* Attempt to move at least len bytes (from the start of the buffer) into a */ +/* single buffer for ease of access. Operating system native functions are */ +/* used to manage buffers - if necessary. If the entire packet ends up in */ +/* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has */ +/* not been called. Both fin_ip and fin_dp are updated before exiting _IF_ */ +/* and ONLY if the pullup succeeds. */ +/* */ +/* We assume that 'min' is a pointer to a buffer that is part of the chain */ +/* of buffers that starts at *fin->fin_mp. */ +/* ------------------------------------------------------------------------ */ +void *fr_pullup(min, fin, len) +mb_t *min; +fr_info_t *fin; +int len; +{ + qpktinfo_t *qpi = fin->fin_qpi; + int out = fin->fin_out, dpoff, ipoff; + mb_t *m = min; + char *ip; + + if (m == NULL) + return NULL; + + ip = (char *)fin->fin_ip; + if ((fin->fin_flx & FI_COALESCE) != 0) + return ip; + + ipoff = fin->fin_ipoff; + if (fin->fin_dp != NULL) + dpoff = (char *)fin->fin_dp - (char *)ip; + else + dpoff = 0; + + if (M_LEN(m) < len) { + + /* + * pfil_precheck ensures the IP header is on a 32bit + * aligned address so simply fail if that isn't currently + * the case (should never happen). + */ + int inc = 0; + + if (ipoff > 0) { + if ((ipoff & 3) != 0) { + inc = 4 - (ipoff & 3); + if (m->b_rptr - inc >= m->b_datap->db_base) + m->b_rptr -= inc; + else + inc = 0; + } + } + if (pullupmsg(m, len + ipoff + inc) == 0) { + ATOMIC_INCL(frstats[out].fr_pull[1]); + FREE_MB_T(*fin->fin_mp); + *fin->fin_mp = NULL; + fin->fin_m = NULL; + fin->fin_ip = NULL; + fin->fin_dp = NULL; + qpi->qpi_data = NULL; + return NULL; + } + m->b_rptr += inc; + fin->fin_m = m; + ip = MTOD(m, char *) + ipoff; + qpi->qpi_data = ip; + } + + ATOMIC_INCL(frstats[out].fr_pull[0]); + fin->fin_ip = (ip_t *)ip; + if (fin->fin_dp != NULL) + fin->fin_dp = (char *)fin->fin_ip + dpoff; + + if (len == fin->fin_plen) + fin->fin_flx |= FI_COALESCE; + return ip; +} diff --git a/usr/src/uts/common/inet/ipf/ip_frag.c b/usr/src/uts/common/inet/ipf/ip_frag.c new file mode 100644 index 0000000000..29362c8a83 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/ip_frag.c @@ -0,0 +1,885 @@ +/* + * Copyright (C) 1993-2003 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#if defined(KERNEL) || defined(_KERNEL) +# undef KERNEL +# undef _KERNEL +# define KERNEL 1 +# define _KERNEL 1 +#endif +#include <sys/errno.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/time.h> +#include <sys/file.h> +#ifdef __hpux +# include <sys/timeout.h> +#endif +#if !defined(_KERNEL) +# include <stdio.h> +# include <string.h> +# include <stdlib.h> +# define _KERNEL +# ifdef __OpenBSD__ +struct file; +# endif +# include <sys/uio.h> +# undef _KERNEL +#endif +#if defined(_KERNEL) && (__FreeBSD_version >= 220000) +# include <sys/filio.h> +# include <sys/fcntl.h> +#else +# include <sys/ioctl.h> +#endif +#if !defined(linux) +# include <sys/protosw.h> +#endif +#include <sys/socket.h> +#if defined(_KERNEL) +# include <sys/systm.h> +# if !defined(__SVR4) && !defined(__svr4__) +# include <sys/mbuf.h> +# endif +#endif +#if !defined(__SVR4) && !defined(__svr4__) +# if defined(_KERNEL) && !defined(__sgi) && !defined(AIX) +# include <sys/kernel.h> +# endif +#else +# include <sys/byteorder.h> +# ifdef _KERNEL +# include <sys/dditypes.h> +# endif +# include <sys/stream.h> +# include <sys/kmem.h> +#endif +#include <net/if.h> +#ifdef sun +# include <net/af.h> +#endif +#include <net/route.h> +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#if !defined(linux) +# include <netinet/ip_var.h> +#endif +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <netinet/ip_icmp.h> +#include "netinet/ip_compat.h" +#include <netinet/tcpip.h> +#include "netinet/ip_fil.h" +#include "netinet/ip_nat.h" +#include "netinet/ip_frag.h" +#include "netinet/ip_state.h" +#include "netinet/ip_auth.h" +#include "netinet/ip_proxy.h" +#if (__FreeBSD_version >= 300000) +# include <sys/malloc.h> +# if defined(_KERNEL) +# ifndef IPFILTER_LKM +# include <sys/libkern.h> +# include <sys/systm.h> +# endif +extern struct callout_handle fr_slowtimer_ch; +# endif +#endif +#if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000) +# include <sys/callout.h> +extern struct callout fr_slowtimer_ch; +#endif +#if defined(__OpenBSD__) +# include <sys/timeout.h> +extern struct timeout fr_slowtimer_ch; +#endif +/* END OF INCLUDES */ + +#if !defined(lint) +static const char sccsid[] = "@(#)ip_frag.c 1.11 3/24/96 (C) 1993-2000 Darren Reed"; +static const char rcsid[] = "@(#)$Id: ip_frag.c,v 2.77.2.5 2005/08/11 14:33:10 darrenr Exp $"; +#endif + + +static ipfr_t *ipfr_list = NULL; +static ipfr_t **ipfr_tail = &ipfr_list; +static ipfr_t **ipfr_heads; + +static ipfr_t *ipfr_natlist = NULL; +static ipfr_t **ipfr_nattail = &ipfr_natlist; +static ipfr_t **ipfr_nattab; + +static ipfr_t *ipfr_ipidlist = NULL; +static ipfr_t **ipfr_ipidtail = &ipfr_ipidlist; +static ipfr_t **ipfr_ipidtab; + +static ipfrstat_t ipfr_stats; +static int ipfr_inuse = 0; +int ipfr_size = IPFT_SIZE; + +int fr_ipfrttl = 120; /* 60 seconds */ +int fr_frag_lock = 0; +int fr_frag_init = 0; +u_long fr_ticks = 0; + + +static ipfr_t *ipfr_newfrag __P((fr_info_t *, u_32_t, ipfr_t **)); +static ipfr_t *fr_fraglookup __P((fr_info_t *, ipfr_t **)); +static void fr_fragdelete __P((ipfr_t *, ipfr_t ***)); + +static frentry_t frblock; + +/* ------------------------------------------------------------------------ */ +/* Function: fr_fraginit */ +/* Returns: int - 0 == success, -1 == error */ +/* Parameters: Nil */ +/* */ +/* Initialise the hash tables for the fragment cache lookups. */ +/* ------------------------------------------------------------------------ */ +int fr_fraginit() +{ + KMALLOCS(ipfr_heads, ipfr_t **, ipfr_size * sizeof(ipfr_t *)); + if (ipfr_heads == NULL) + return -1; + bzero((char *)ipfr_heads, ipfr_size * sizeof(ipfr_t *)); + + KMALLOCS(ipfr_nattab, ipfr_t **, ipfr_size * sizeof(ipfr_t *)); + if (ipfr_nattab == NULL) + return -1; + bzero((char *)ipfr_nattab, ipfr_size * sizeof(ipfr_t *)); + + KMALLOCS(ipfr_ipidtab, ipfr_t **, ipfr_size * sizeof(ipfr_t *)); + if (ipfr_ipidtab == NULL) + return -1; + bzero((char *)ipfr_ipidtab, ipfr_size * sizeof(ipfr_t *)); + + RWLOCK_INIT(&ipf_frag, "ipf fragment rwlock"); + + /* Initialise frblock with "block in all" */ + bzero((char *)&frblock, sizeof(frblock)); + frblock.fr_flags = FR_BLOCK|FR_INQUE; /* block in */ + frblock.fr_ref = 1; + + fr_frag_init = 1; + + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_fragunload */ +/* Returns: Nil */ +/* Parameters: Nil */ +/* */ +/* Free all memory allocated whilst running and from initialisation. */ +/* ------------------------------------------------------------------------ */ +void fr_fragunload() +{ + if (fr_frag_init == 1) { + fr_fragclear(); + + RW_DESTROY(&ipf_frag); + fr_frag_init = 0; + } + + if (ipfr_heads != NULL) + KFREES(ipfr_heads, ipfr_size * sizeof(ipfr_t *)); + ipfr_heads = NULL; + + if (ipfr_nattab != NULL) + KFREES(ipfr_nattab, ipfr_size * sizeof(ipfr_t *)); + ipfr_nattab = NULL; + + if (ipfr_ipidtab != NULL) + KFREES(ipfr_ipidtab, ipfr_size * sizeof(ipfr_t *)); + ipfr_ipidtab = NULL; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_fragstats */ +/* Returns: ipfrstat_t* - pointer to struct with current frag stats */ +/* Parameters: Nil */ +/* */ +/* Updates ipfr_stats with current information and returns a pointer to it */ +/* ------------------------------------------------------------------------ */ +ipfrstat_t *fr_fragstats() +{ + ipfr_stats.ifs_table = ipfr_heads; + ipfr_stats.ifs_nattab = ipfr_nattab; + ipfr_stats.ifs_inuse = ipfr_inuse; + return &ipfr_stats; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: ipfr_newfrag */ +/* Returns: ipfr_t * - pointer to fragment cache state info or NULL */ +/* Parameters: fin(I) - pointer to packet information */ +/* table(I) - pointer to frag table to add to */ +/* */ +/* Add a new entry to the fragment cache, registering it as having come */ +/* through this box, with the result of the filter operation. */ +/* ------------------------------------------------------------------------ */ +static ipfr_t *ipfr_newfrag(fin, pass, table) +fr_info_t *fin; +u_32_t pass; +ipfr_t *table[]; +{ + ipfr_t *fra, frag; + u_int idx, off; + + if (ipfr_inuse >= IPFT_SIZE) + return NULL; + + if ((fin->fin_flx & (FI_FRAG|FI_BAD)) != FI_FRAG) + return NULL; + + if (pass & FR_FRSTRICT) + if (fin->fin_off != 0) + return NULL; + + frag.ipfr_p = fin->fin_p; + idx = fin->fin_p; + frag.ipfr_id = fin->fin_id; + idx += fin->fin_id; + frag.ipfr_source = fin->fin_fi.fi_src; + idx += frag.ipfr_src.s_addr; + frag.ipfr_dest = fin->fin_fi.fi_dst; + idx += frag.ipfr_dst.s_addr; + frag.ipfr_ifp = fin->fin_ifp; + idx *= 127; + idx %= IPFT_SIZE; + + frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY; + frag.ipfr_secmsk = fin->fin_fi.fi_secmsk; + frag.ipfr_auth = fin->fin_fi.fi_auth; + + /* + * first, make sure it isn't already there... + */ + for (fra = table[idx]; (fra != NULL); fra = fra->ipfr_hnext) + if (!bcmp((char *)&frag.ipfr_ifp, (char *)&fra->ipfr_ifp, + IPFR_CMPSZ)) { + ipfr_stats.ifs_exists++; + return NULL; + } + + /* + * allocate some memory, if possible, if not, just record that we + * failed to do so. + */ + KMALLOC(fra, ipfr_t *); + if (fra == NULL) { + ipfr_stats.ifs_nomem++; + return NULL; + } + + fra->ipfr_rule = fin->fin_fr; + if (fra->ipfr_rule != NULL) { + + frentry_t *fr; + + fr = fin->fin_fr; + MUTEX_ENTER(&fr->fr_lock); + fr->fr_ref++; + MUTEX_EXIT(&fr->fr_lock); + } + + /* + * Insert the fragment into the fragment table, copy the struct used + * in the search using bcopy rather than reassign each field. + * Set the ttl to the default. + */ + if ((fra->ipfr_hnext = table[idx]) != NULL) + table[idx]->ipfr_hprev = &fra->ipfr_hnext; + fra->ipfr_hprev = table + idx; + fra->ipfr_data = NULL; + table[idx] = fra; + bcopy((char *)&frag.ipfr_ifp, (char *)&fra->ipfr_ifp, IPFR_CMPSZ); + fra->ipfr_ttl = fr_ticks + fr_ipfrttl; + + /* + * Compute the offset of the expected start of the next packet. + */ + off = fin->fin_off; + if (off == 0) { + fra->ipfr_seen0 = 1; + fra->ipfr_firstend = fin->fin_flen; + } else { + fra->ipfr_seen0 = 0; + fra->ipfr_firstend = 0; + } + fra->ipfr_off = off + fin->fin_dlen; + fra->ipfr_pass = pass; + ipfr_stats.ifs_new++; + ipfr_inuse++; + return fra; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_newfrag */ +/* Returns: int - 0 == success, -1 == error */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* Add a new entry to the fragment cache table based on the current packet */ +/* ------------------------------------------------------------------------ */ +int fr_newfrag(fin, pass) +u_32_t pass; +fr_info_t *fin; +{ + ipfr_t *fra; + + if (fr_frag_lock != 0) + return -1; + + WRITE_ENTER(&ipf_frag); + fra = ipfr_newfrag(fin, pass, ipfr_heads); + if (fra != NULL) { + *ipfr_tail = fra; + fra->ipfr_prev = ipfr_tail; + ipfr_tail = &fra->ipfr_next; + if (ipfr_list == NULL) + ipfr_list = fra; + fra->ipfr_next = NULL; + } + RWLOCK_EXIT(&ipf_frag); + return fra ? 0 : -1; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_nat_newfrag */ +/* Returns: int - 0 == success, -1 == error */ +/* Parameters: fin(I) - pointer to packet information */ +/* nat(I) - pointer to NAT structure */ +/* */ +/* Create a new NAT fragment cache entry based on the current packet and */ +/* the NAT structure for this "session". */ +/* ------------------------------------------------------------------------ */ +int fr_nat_newfrag(fin, pass, nat) +fr_info_t *fin; +u_32_t pass; +nat_t *nat; +{ + ipfr_t *fra; + + if ((fin->fin_v != 4) || (fr_frag_lock != 0)) + return 0; + + WRITE_ENTER(&ipf_natfrag); + fra = ipfr_newfrag(fin, pass, ipfr_nattab); + if (fra != NULL) { + fra->ipfr_data = nat; + nat->nat_data = fra; + *ipfr_nattail = fra; + fra->ipfr_prev = ipfr_nattail; + ipfr_nattail = &fra->ipfr_next; + fra->ipfr_next = NULL; + } + RWLOCK_EXIT(&ipf_natfrag); + return fra ? 0 : -1; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_ipid_newfrag */ +/* Returns: int - 0 == success, -1 == error */ +/* Parameters: fin(I) - pointer to packet information */ +/* ipid(I) - new IP ID for this fragmented packet */ +/* */ +/* Create a new fragment cache entry for this packet and store, as a data */ +/* pointer, the new IP ID value. */ +/* ------------------------------------------------------------------------ */ +int fr_ipid_newfrag(fin, ipid) +fr_info_t *fin; +u_32_t ipid; +{ + ipfr_t *fra; + + if (fr_frag_lock) + return 0; + + WRITE_ENTER(&ipf_ipidfrag); + fra = ipfr_newfrag(fin, 0, ipfr_ipidtab); + if (fra != NULL) { + fra->ipfr_data = (void *)(uintptr_t)ipid; + *ipfr_ipidtail = fra; + fra->ipfr_prev = ipfr_ipidtail; + ipfr_ipidtail = &fra->ipfr_next; + fra->ipfr_next = NULL; + } + RWLOCK_EXIT(&ipf_ipidfrag); + return fra ? 0 : -1; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_fraglookup */ +/* Returns: ipfr_t * - pointer to ipfr_t structure if there's a */ +/* matching entry in the frag table, else NULL */ +/* Parameters: fin(I) - pointer to packet information */ +/* table(I) - pointer to fragment cache table to search */ +/* */ +/* Check the fragment cache to see if there is already a record of this */ +/* packet with its filter result known. */ +/* ------------------------------------------------------------------------ */ +static ipfr_t *fr_fraglookup(fin, table) +fr_info_t *fin; +ipfr_t *table[]; +{ + ipfr_t *f, frag; + u_int idx; + + if ((fin->fin_flx & (FI_FRAG|FI_BAD)) != FI_FRAG) + return NULL; + + /* + * For fragments, we record protocol, packet id, TOS and both IP#'s + * (these should all be the same for all fragments of a packet). + * + * build up a hash value to index the table with. + */ + frag.ipfr_p = fin->fin_p; + idx = fin->fin_p; + frag.ipfr_id = fin->fin_id; + idx += fin->fin_id; + frag.ipfr_source = fin->fin_fi.fi_src; + idx += frag.ipfr_src.s_addr; + frag.ipfr_dest = fin->fin_fi.fi_dst; + idx += frag.ipfr_dst.s_addr; + frag.ipfr_ifp = fin->fin_ifp; + idx *= 127; + idx %= IPFT_SIZE; + + frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY; + frag.ipfr_secmsk = fin->fin_fi.fi_secmsk; + frag.ipfr_auth = fin->fin_fi.fi_auth; + + /* + * check the table, careful to only compare the right amount of data + */ + for (f = table[idx]; f; f = f->ipfr_hnext) + if (!bcmp((char *)&frag.ipfr_ifp, (char *)&f->ipfr_ifp, + IPFR_CMPSZ)) { + u_short off; + + /* + * We don't want to let short packets match because + * they could be compromising the security of other + * rules that want to match on layer 4 fields (and + * can't because they have been fragmented off.) + * Why do this check here? The counter acts as an + * indicator of this kind of attack, whereas if it was + * elsewhere, it wouldn't know if other matching + * packets had been seen. + */ + if (fin->fin_flx & FI_SHORT) { + ATOMIC_INCL(ipfr_stats.ifs_short); + continue; + } + + /* + * XXX - We really need to be guarding against the + * retransmission of (src,dst,id,offset-range) here + * because a fragmented packet is never resent with + * the same IP ID# (or shouldn't). + */ + off = fin->fin_off; /* same as in ipfr_newfrag() */ + if (f->ipfr_seen0) { + if (off == 0) { + ATOMIC_INCL(ipfr_stats.ifs_retrans0); + continue; + } + } else if (off == 0) { + f->ipfr_seen0 = 1; + f->ipfr_firstend = fin->fin_flen; + } + + if (f != table[idx]) { + ipfr_t **fp; + + /* + * Move fragment info. to the top of the list + * to speed up searches. First, delink... + */ + fp = f->ipfr_hprev; + (*fp) = f->ipfr_hnext; + if (f->ipfr_hnext != NULL) + f->ipfr_hnext->ipfr_hprev = fp; + /* + * Then put back at the top of the chain. + */ + f->ipfr_hnext = table[idx]; + table[idx]->ipfr_hprev = &f->ipfr_hnext; + f->ipfr_hprev = table + idx; + table[idx] = f; + } + + if (fin->fin_v == 6) { + if (f->ipfr_seen0 && (off < f->ipfr_firstend)) + fin->fin_flx |= FI_BAD; + } + /* + * If we've follwed the fragments, and this is the + * last (in order), shrink expiration time. + */ + if (off == f->ipfr_off) { + if (!(fin->fin_ip->ip_off & IP_MF)) + f->ipfr_ttl = fr_ticks + 1; + f->ipfr_off = fin->fin_dlen + off; + } else if (f->ipfr_pass & FR_FRSTRICT) + continue; + ATOMIC_INCL(ipfr_stats.ifs_hits); + return f; + } + return NULL; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_nat_knownfrag */ +/* Returns: nat_t* - pointer to 'parent' NAT structure if frag table */ +/* match found, else NULL */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* Functional interface for NAT lookups of the NAT fragment cache */ +/* ------------------------------------------------------------------------ */ +nat_t *fr_nat_knownfrag(fin) +fr_info_t *fin; +{ + nat_t *nat; + ipfr_t *ipf; + + if ((fin->fin_v != 4) || (fr_frag_lock) || !ipfr_natlist) + return NULL; + READ_ENTER(&ipf_natfrag); + ipf = fr_fraglookup(fin, ipfr_nattab); + if (ipf != NULL) { + nat = ipf->ipfr_data; + /* + * This is the last fragment for this packet. + */ + if ((ipf->ipfr_ttl == fr_ticks + 1) && (nat != NULL)) { + nat->nat_data = NULL; + ipf->ipfr_data = NULL; + } + } else + nat = NULL; + RWLOCK_EXIT(&ipf_natfrag); + return nat; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_ipid_knownfrag */ +/* Returns: u_32_t - IPv4 ID for this packet if match found, else */ +/* return 0xfffffff to indicate no match. */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* Functional interface for IP ID lookups of the IP ID fragment cache */ +/* ------------------------------------------------------------------------ */ +u_32_t fr_ipid_knownfrag(fin) +fr_info_t *fin; +{ + ipfr_t *ipf; + u_32_t id; + + if ((fin->fin_v != 4) || (fr_frag_lock) || !ipfr_ipidlist) + return 0xffffffff; + + READ_ENTER(&ipf_ipidfrag); + ipf = fr_fraglookup(fin, ipfr_ipidtab); + if (ipf != NULL) + id = (u_32_t)(uintptr_t)ipf->ipfr_data; + else + id = 0xffffffff; + RWLOCK_EXIT(&ipf_ipidfrag); + return id; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_knownfrag */ +/* Returns: frentry_t* - pointer to filter rule if a match is found in */ +/* the frag cache table, else NULL. */ +/* Parameters: fin(I) - pointer to packet information */ +/* passp(O) - pointer to where to store rule flags resturned */ +/* */ +/* Functional interface for normal lookups of the fragment cache. If a */ +/* match is found, return the rule pointer and flags from the rule, except */ +/* that if FR_LOGFIRST is set, reset FR_LOG. */ +/* ------------------------------------------------------------------------ */ +frentry_t *fr_knownfrag(fin, passp) +fr_info_t *fin; +u_32_t *passp; +{ + frentry_t *fr = NULL; + ipfr_t *fra; + u_32_t pass, oflx; + + if ((fr_frag_lock) || (ipfr_list == NULL)) + return NULL; + + READ_ENTER(&ipf_frag); + oflx = fin->fin_flx; + fra = fr_fraglookup(fin, ipfr_heads); + if (fra != NULL) { + fr = fra->ipfr_rule; + fin->fin_fr = fr; + if (fr != NULL) { + pass = fr->fr_flags; + if ((pass & FR_LOGFIRST) != 0) + pass &= ~(FR_LOGFIRST|FR_LOG); + *passp = pass; + } + } + if (!(oflx & FI_BAD) && (fin->fin_flx & FI_BAD)) { + *passp &= ~FR_CMDMASK; + *passp |= FR_BLOCK; + fr = &frblock; + } + RWLOCK_EXIT(&ipf_frag); + return fr; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_forget */ +/* Returns: Nil */ +/* Parameters: ptr(I) - pointer to data structure */ +/* */ +/* Search through all of the fragment cache entries and wherever a pointer */ +/* is found to match ptr, reset it to NULL. */ +/* ------------------------------------------------------------------------ */ +void fr_forget(ptr) +void *ptr; +{ + ipfr_t *fr; + + WRITE_ENTER(&ipf_frag); + for (fr = ipfr_list; fr; fr = fr->ipfr_next) + if (fr->ipfr_data == ptr) + fr->ipfr_data = NULL; + RWLOCK_EXIT(&ipf_frag); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_forgetnat */ +/* Returns: Nil */ +/* Parameters: ptr(I) - pointer to data structure */ +/* */ +/* Search through all of the fragment cache entries for NAT and wherever a */ +/* pointer is found to match ptr, reset it to NULL. */ +/* ------------------------------------------------------------------------ */ +void fr_forgetnat(ptr) +void *ptr; +{ + ipfr_t *fr; + + WRITE_ENTER(&ipf_natfrag); + for (fr = ipfr_natlist; fr; fr = fr->ipfr_next) + if (fr->ipfr_data == ptr) + fr->ipfr_data = NULL; + RWLOCK_EXIT(&ipf_natfrag); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_fragdelete */ +/* Returns: Nil */ +/* Parameters: fra(I) - pointer to fragment structure to delete */ +/* tail(IO) - pointer to the pointer to the tail of the frag */ +/* list */ +/* */ +/* Remove a fragment cache table entry from the table & list. Also free */ +/* the filter rule it is associated with it if it is no longer used as a */ +/* result of decreasing the reference count. */ +/* ------------------------------------------------------------------------ */ +static void fr_fragdelete(fra, tail) +ipfr_t *fra, ***tail; +{ + frentry_t *fr; + + fr = fra->ipfr_rule; + if (fr != NULL) + (void)fr_derefrule(&fr); + + if (fra->ipfr_next) + fra->ipfr_next->ipfr_prev = fra->ipfr_prev; + *fra->ipfr_prev = fra->ipfr_next; + if (*tail == &fra->ipfr_next) + *tail = fra->ipfr_prev; + + if (fra->ipfr_hnext) + fra->ipfr_hnext->ipfr_hprev = fra->ipfr_hprev; + *fra->ipfr_hprev = fra->ipfr_hnext; + KFREE(fra); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_fragclear */ +/* Returns: Nil */ +/* Parameters: Nil */ +/* */ +/* Free memory in use by fragment state information kept. Do the normal */ +/* fragment state stuff first and then the NAT-fragment table. */ +/* ------------------------------------------------------------------------ */ +void fr_fragclear() +{ + ipfr_t *fra; + nat_t *nat; + + WRITE_ENTER(&ipf_frag); + while ((fra = ipfr_list) != NULL) + fr_fragdelete(fra, &ipfr_tail); + ipfr_tail = &ipfr_list; + RWLOCK_EXIT(&ipf_frag); + + WRITE_ENTER(&ipf_nat); + WRITE_ENTER(&ipf_natfrag); + while ((fra = ipfr_natlist) != NULL) { + nat = fra->ipfr_data; + if (nat != NULL) { + if (nat->nat_data == fra) + nat->nat_data = NULL; + } + fr_fragdelete(fra, &ipfr_nattail); + } + ipfr_nattail = &ipfr_natlist; + RWLOCK_EXIT(&ipf_natfrag); + RWLOCK_EXIT(&ipf_nat); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_fragexpire */ +/* Returns: Nil */ +/* Parameters: Nil */ +/* */ +/* Expire entries in the fragment cache table that have been there too long */ +/* ------------------------------------------------------------------------ */ +void fr_fragexpire() +{ + ipfr_t **fp, *fra; + nat_t *nat; + SPL_INT(s); + + if (fr_frag_lock) + return; + + SPL_NET(s); + WRITE_ENTER(&ipf_frag); + /* + * Go through the entire table, looking for entries to expire, + * which is indicated by the ttl being less than or equal to fr_ticks. + */ + for (fp = &ipfr_list; ((fra = *fp) != NULL); ) { + if (fra->ipfr_ttl > fr_ticks) + break; + fr_fragdelete(fra, &ipfr_tail); + ipfr_stats.ifs_expire++; + ipfr_inuse--; + } + RWLOCK_EXIT(&ipf_frag); + + WRITE_ENTER(&ipf_ipidfrag); + for (fp = &ipfr_ipidlist; ((fra = *fp) != NULL); ) { + if (fra->ipfr_ttl > fr_ticks) + break; + fr_fragdelete(fra, &ipfr_ipidtail); + ipfr_stats.ifs_expire++; + ipfr_inuse--; + } + RWLOCK_EXIT(&ipf_ipidfrag); + + /* + * Same again for the NAT table, except that if the structure also + * still points to a NAT structure, and the NAT structure points back + * at the one to be free'd, NULL the reference from the NAT struct. + * NOTE: We need to grab both mutex's early, and in this order so as + * to prevent a deadlock if both try to expire at the same time. + */ + WRITE_ENTER(&ipf_nat); + WRITE_ENTER(&ipf_natfrag); + for (fp = &ipfr_natlist; ((fra = *fp) != NULL); ) { + if (fra->ipfr_ttl > fr_ticks) + break; + nat = fra->ipfr_data; + if (nat != NULL) { + if (nat->nat_data == fra) + nat->nat_data = NULL; + } + fr_fragdelete(fra, &ipfr_nattail); + ipfr_stats.ifs_expire++; + ipfr_inuse--; + } + RWLOCK_EXIT(&ipf_natfrag); + RWLOCK_EXIT(&ipf_nat); + SPL_X(s); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_slowtimer */ +/* Returns: Nil */ +/* Parameters: Nil */ +/* */ +/* Slowly expire held state for fragments. Timeouts are set * in */ +/* expectation of this being called twice per second. */ +/* ------------------------------------------------------------------------ */ +#if !defined(_KERNEL) || (!SOLARIS && !defined(__hpux) && !defined(__sgi) && \ + !defined(__osf__) && !defined(linux)) +# if defined(_KERNEL) && ((BSD >= 199103) || defined(__sgi)) +void fr_slowtimer __P((void *ptr)) +# else +int fr_slowtimer() +# endif +{ + READ_ENTER(&ipf_global); + + fr_fragexpire(); + fr_timeoutstate(); + fr_natexpire(); + fr_authexpire(); + fr_ticks++; + if (fr_running <= 0) + goto done; +# ifdef _KERNEL +# if defined(__NetBSD__) && (__NetBSD_Version__ >= 104240000) + callout_reset(&fr_slowtimer_ch, hz / 2, fr_slowtimer, NULL); +# else +# if defined(__OpenBSD__) + timeout_add(&fr_slowtimer_ch, hz/2); +# else +# if (__FreeBSD_version >= 300000) + fr_slowtimer_ch = timeout(fr_slowtimer, NULL, hz/2); +# else +# ifdef linux + ; +# else + timeout(fr_slowtimer, NULL, hz/2); +# endif +# endif /* FreeBSD */ +# endif /* OpenBSD */ +# endif /* NetBSD */ +# endif +done: + RWLOCK_EXIT(&ipf_global); +# if (BSD < 199103) || !defined(_KERNEL) + return 0; +# endif +} +#endif /* !SOLARIS && !defined(__hpux) && !defined(__sgi) */ diff --git a/usr/src/uts/common/inet/ipf/ip_htable.c b/usr/src/uts/common/inet/ipf/ip_htable.c new file mode 100644 index 0000000000..4ce3cc411e --- /dev/null +++ b/usr/src/uts/common/inet/ipf/ip_htable.c @@ -0,0 +1,618 @@ +/* + * Copyright (C) 1993-2001, 2003 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#if defined(KERNEL) || defined(_KERNEL) +# undef KERNEL +# undef _KERNEL +# define KERNEL 1 +# define _KERNEL 1 +#endif +#include <sys/param.h> +#include <sys/types.h> +#include <sys/errno.h> +#include <sys/time.h> +#include <sys/file.h> +#if !defined(_KERNEL) +# include <stdlib.h> +# include <string.h> +# define _KERNEL +# ifdef __OpenBSD__ +struct file; +# endif +# include <sys/uio.h> +# undef _KERNEL +#endif +#include <sys/socket.h> +#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) +# include <sys/malloc.h> +#endif +#if defined(__FreeBSD__) +# include <sys/cdefs.h> +# include <sys/proc.h> +#endif +#if !defined(__svr4__) && !defined(__SVR4) && !defined(__hpux) && \ + !defined(linux) +# include <sys/mbuf.h> +#endif +#if defined(_KERNEL) +# include <sys/systm.h> +#else +# include <stdio.h> +#endif +#include <netinet/in.h> +#include <net/if.h> + +#include "netinet/ip_compat.h" +#include "netinet/ip_fil.h" +#include "netinet/ip_lookup.h" +#include "netinet/ip_htable.h" +/* END OF INCLUDES */ + +#if !defined(lint) +static const char rcsid[] = "@(#)$Id: ip_htable.c,v 2.34.2.3 2005/05/14 05:11:38 darrenr Exp $"; +#endif + +#ifdef IPFILTER_LOOKUP +static iphtent_t *fr_iphmfind __P((iphtable_t *, struct in_addr *)); +#ifdef USE_INET6 +static iphtent_t *fr_iphmfind6 __P((iphtable_t *, struct in6_addr *)); +static uint32_t sum4(uint32_t *); +static void left_shift_ipv6 __P((char *)); +#endif + +static u_long ipht_nomem[IPL_LOGSIZE] = { 0, 0, 0, 0, 0, 0, 0, 0 }; +static u_long ipf_nhtables[IPL_LOGSIZE] = { 0, 0, 0, 0, 0, 0, 0, 0 }; +static u_long ipf_nhtnodes[IPL_LOGSIZE] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + +iphtable_t *ipf_htables[IPL_LOGSIZE] = { NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL }; + + +void fr_htable_unload() +{ + iplookupflush_t fop; + + fop.iplf_unit = IPL_LOGALL; + (void)fr_flushhtable(&fop); +} + + +int fr_gethtablestat(op) +iplookupop_t *op; +{ + iphtstat_t stats; + + if (op->iplo_size != sizeof(stats)) + return EINVAL; + + stats.iphs_tables = ipf_htables[op->iplo_unit]; + stats.iphs_numtables = ipf_nhtables[op->iplo_unit]; + stats.iphs_numnodes = ipf_nhtnodes[op->iplo_unit]; + stats.iphs_nomem = ipht_nomem[op->iplo_unit]; + + return COPYOUT(&stats, op->iplo_struct, sizeof(stats)); + +} + + +/* + * Create a new hash table using the template passed. + */ +int fr_newhtable(op) +iplookupop_t *op; +{ + iphtable_t *iph, *oiph; + char name[FR_GROUPLEN]; + int err, i, unit; + + KMALLOC(iph, iphtable_t *); + if (iph == NULL) { + ipht_nomem[op->iplo_unit]++; + return ENOMEM; + } + + err = COPYIN(op->iplo_struct, iph, sizeof(*iph)); + if (err != 0) { + KFREE(iph); + return EFAULT; + } + + unit = op->iplo_unit; + if (iph->iph_unit != unit) { + KFREE(iph); + return EINVAL; + } + + if ((op->iplo_arg & IPHASH_ANON) == 0) { + if (fr_findhtable(op->iplo_unit, op->iplo_name) != NULL) { + KFREE(iph); + return EEXIST; + } + } else { + i = IPHASH_ANON; + do { + i++; +#if defined(SNPRINTF) && defined(_KERNEL) + (void)SNPRINTF(name, sizeof(name), "%u", i); +#else + (void)sprintf(name, "%u", i); +#endif + for (oiph = ipf_htables[unit]; oiph != NULL; + oiph = oiph->iph_next) + if (strncmp(oiph->iph_name, name, + sizeof(oiph->iph_name)) == 0) + break; + } while (oiph != NULL); + (void)strncpy(iph->iph_name, name, sizeof(iph->iph_name)); + err = COPYOUT(iph, op->iplo_struct, sizeof(*iph)); + if (err != 0) { + KFREE(iph); + return EFAULT; + } + iph->iph_type |= IPHASH_ANON; + } + + KMALLOCS(iph->iph_table, iphtent_t **, + iph->iph_size * sizeof(*iph->iph_table)); + if (iph->iph_table == NULL) { + KFREE(iph); + ipht_nomem[unit]++; + return ENOMEM; + } + + bzero((char *)iph->iph_table, iph->iph_size * sizeof(*iph->iph_table)); + iph->iph_masks[0] = 0; + iph->iph_masks[1] = 0; + iph->iph_masks[2] = 0; + iph->iph_masks[3] = 0; + + iph->iph_next = ipf_htables[unit]; + iph->iph_pnext = &ipf_htables[unit]; + if (ipf_htables[unit] != NULL) + ipf_htables[unit]->iph_pnext = &iph->iph_next; + ipf_htables[unit] = iph; + + ipf_nhtables[unit]++; + + return 0; +} + + +/* + */ +int fr_removehtable(op) +iplookupop_t *op; +{ + iphtable_t *iph; + + + iph = fr_findhtable(op->iplo_unit, op->iplo_name); + if (iph == NULL) + return ESRCH; + + if (iph->iph_unit != op->iplo_unit) { + return EINVAL; + } + + if (iph->iph_ref != 0) { + return EBUSY; + } + + fr_delhtable(iph); + + return 0; +} + + +void fr_delhtable(iph) +iphtable_t *iph; +{ + iphtent_t *ipe; + int i; + + for (i = 0; i < iph->iph_size; i++) + while ((ipe = iph->iph_table[i]) != NULL) + if (fr_delhtent(iph, ipe) != 0) + return; + + *iph->iph_pnext = iph->iph_next; + if (iph->iph_next != NULL) + iph->iph_next->iph_pnext = iph->iph_pnext; + + ipf_nhtables[iph->iph_unit]--; + + if (iph->iph_ref == 0) { + KFREES(iph->iph_table, iph->iph_size * sizeof(*iph->iph_table)); + KFREE(iph); + } +} + + +void fr_derefhtable(iph) +iphtable_t *iph; +{ + iph->iph_ref--; + if (iph->iph_ref == 0) + fr_delhtable(iph); +} + + +iphtable_t *fr_findhtable(unit, name) +int unit; +char *name; +{ + iphtable_t *iph; + + for (iph = ipf_htables[unit]; iph != NULL; iph = iph->iph_next) + if (strncmp(iph->iph_name, name, sizeof(iph->iph_name)) == 0) + break; + return iph; +} + + +size_t fr_flushhtable(op) +iplookupflush_t *op; +{ + iphtable_t *iph; + size_t freed; + int i; + + freed = 0; + + for (i = 0; i <= IPL_LOGMAX; i++) { + if (op->iplf_unit == i || op->iplf_unit == IPL_LOGALL) { + while ((iph = ipf_htables[i]) != NULL) { + fr_delhtable(iph); + freed++; + } + } + } + + return freed; +} + + +/* + * Add an entry to a hash table. + */ +int fr_addhtent(iph, ipeo) +iphtable_t *iph; +iphtent_t *ipeo; +{ + iphtent_t *ipe; + u_int hv; + int bits; + + KMALLOC(ipe, iphtent_t *); + if (ipe == NULL) + return -1; + + bcopy((char *)ipeo, (char *)ipe, sizeof(*ipe)); +#ifdef USE_INET6 + if (ipe->ipe_family == AF_INET6) { + bits = count6bits((u_32_t *)ipe->ipe_mask.in6_addr8); + hv = IPE_HASH_FN(sum4((uint32_t *)ipe->ipe_addr.in6_addr8), + sum4((uint32_t *)ipe->ipe_mask.in6_addr8), + iph->iph_size); + } else +#endif + if (ipe->ipe_family == AF_INET) + { + ipe->ipe_addr.in4_addr &= ipe->ipe_mask.in4_addr; + ipe->ipe_addr.in4_addr = ntohl(ipe->ipe_addr.in4_addr); + bits = count4bits(ipe->ipe_mask.in4_addr); + ipe->ipe_mask.in4_addr = ntohl(ipe->ipe_mask.in4_addr); + + hv = IPE_HASH_FN(ipe->ipe_addr.in4_addr, ipe->ipe_mask.in4_addr, + iph->iph_size); + } else + return -1; + + ipe->ipe_ref = 0; + ipe->ipe_next = iph->iph_table[hv]; + ipe->ipe_pnext = iph->iph_table + hv; + + if (iph->iph_table[hv] != NULL) + iph->iph_table[hv]->ipe_pnext = &ipe->ipe_next; + iph->iph_table[hv] = ipe; +#ifdef USE_INET6 + if (ipe->ipe_family == AF_INET6) { + if ((bits >= 0) && (bits != 128)) + if (bits >= 96) + iph->iph_masks[0] |= 1 << (bits - 96); + else if (bits >= 64) + iph->iph_masks[1] |= 1 << (bits - 64); + else if (bits >= 32) + iph->iph_masks[2] |= 1 << (bits - 32); + else + iph->iph_masks[3] |= 1 << bits; + + } else +#endif + { + if ((bits >= 0) && (bits != 32)) + iph->iph_masks[3] |= 1 << bits; + } + + switch (iph->iph_type & ~IPHASH_ANON) + { + case IPHASH_GROUPMAP : + ipe->ipe_ptr = fr_addgroup(ipe->ipe_group, NULL, + iph->iph_flags, IPL_LOGIPF, + fr_active); + break; + + default : + ipe->ipe_ptr = NULL; + ipe->ipe_value = 0; + break; + } + + ipf_nhtnodes[iph->iph_unit]++; + + return 0; +} + + +/* + * Delete an entry from a hash table. + */ +int fr_delhtent(iph, ipe) +iphtable_t *iph; +iphtent_t *ipe; +{ + + if (ipe->ipe_ref != 0) + return EBUSY; + + + *ipe->ipe_pnext = ipe->ipe_next; + if (ipe->ipe_next != NULL) + ipe->ipe_next->ipe_pnext = ipe->ipe_pnext; + + switch (iph->iph_type & ~IPHASH_ANON) + { + case IPHASH_GROUPMAP : + if (ipe->ipe_group != NULL) + fr_delgroup(ipe->ipe_group, IPL_LOGIPF, fr_active); + break; + + default : + ipe->ipe_ptr = NULL; + ipe->ipe_value = 0; + break; + } + + KFREE(ipe); + + ipf_nhtnodes[iph->iph_unit]--; + + return 0; +} + + +void *fr_iphmfindgroup(tptr, version, aptr) +void *tptr; +int version; +void *aptr; +{ + i6addr_t *addr; + iphtable_t *iph; + iphtent_t *ipe; + void *rval; + + if ((version != 4) +#ifdef USE_INET6 + && (version != 6) +#endif + ) + return NULL; + + READ_ENTER(&ip_poolrw); + iph = tptr; + addr = aptr; + +#ifdef USE_INET6 + if (version == 6) + ipe = fr_iphmfind6(iph, &addr->in6); + else +#endif + if (version == 4) + ipe = fr_iphmfind(iph, &addr->in4); + else + ipe = NULL; + if (ipe != NULL) + rval = ipe->ipe_ptr; + else + rval = NULL; + RWLOCK_EXIT(&ip_poolrw); + return rval; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_iphmfindip */ +/* Returns: int - 0 == +ve match, -1 == error, 1 == -ve/no match */ +/* Parameters: tptr(I) - pointer to the pool to search */ +/* version(I) - IP protocol version (4 or 6) */ +/* aptr(I) - pointer to address information */ +/* */ +/* Search the hash table for a given address and return a search result. */ +/* ------------------------------------------------------------------------ */ +int fr_iphmfindip(tptr, version, aptr) +void *tptr, *aptr; +int version; +{ + i6addr_t *addr; + iphtable_t *iph; + iphtent_t *ipe; + int rval; + + if ((version != 4) +#ifdef USE_INET6 + && (version != 6) +#endif + ) + return -1; + + if (tptr == NULL || aptr == NULL) + return -1; + + iph = tptr; + addr = aptr; + + READ_ENTER(&ip_poolrw); +#ifdef USE_INET6 + if (version == 6) + ipe = fr_iphmfind6(iph, &addr->in6); + else +#endif + if (version == 4) + ipe = fr_iphmfind(iph, &addr->in4); + else + ipe = NULL; + if (ipe != NULL) + rval = 0; + else + rval = 1; + RWLOCK_EXIT(&ip_poolrw); + return rval; +} + + +/* Locks: ip_poolrw */ +static iphtent_t *fr_iphmfind(iph, addr) +iphtable_t *iph; +struct in_addr *addr; +{ + u_32_t hmsk, msk, ips; + iphtent_t *ipe; + u_int hv; + + hmsk = iph->iph_masks[3]; + msk = 0xffffffff; +maskloop: + ips = ntohl(addr->s_addr) & msk; + hv = IPE_HASH_FN(ips, msk, iph->iph_size); + for (ipe = iph->iph_table[hv]; (ipe != NULL); ipe = ipe->ipe_next) { + if (ipe->ipe_mask.in4_addr != msk || + ipe->ipe_addr.in4_addr != ips) { + continue; + } + break; + } + + if ((ipe == NULL) && (hmsk != 0)) { + while (hmsk != 0) { + msk <<= 1; + if (hmsk & 0x80000000) + break; + hmsk <<= 1; + } + if (hmsk != 0) { + hmsk <<= 1; + goto maskloop; + } + } + return ipe; +} + + +#ifdef USE_INET6 +/* Locks: ip_poolrw */ +static iphtent_t *fr_iphmfind6(iph, addr) +iphtable_t *iph; +struct in6_addr *addr; +{ + u_32_t hmsk[4], msk[4], ips[4], *and; + iphtent_t *ipe; + u_int hv; + + hmsk[0] = iph->iph_masks[0]; + hmsk[1] = iph->iph_masks[1]; + hmsk[2] = iph->iph_masks[2]; + hmsk[3] = iph->iph_masks[3]; + + msk[0] = 0xffffffff; + msk[1] = 0xffffffff; + msk[2] = 0xffffffff; + msk[3] = 0xffffffff; +maskloop: + and = (u_32_t *)addr->s6_addr; + ips[0] = *and & msk[0]; + ips[1] = *(and + 1) & msk[1]; + ips[2] = *(and + 2) & msk[2]; + ips[3] = *(and + 3) & msk[3]; + + hv = IPE_HASH_FN(sum4((uint32_t *)addr), sum4((uint32_t *)msk), + iph->iph_size); + for (ipe = iph->iph_table[hv]; (ipe != NULL); ipe = ipe->ipe_next) { + if (bcmp((void *)&ipe->ipe_mask.in6, (void *)msk, 16) || + bcmp((void *)&ipe->ipe_addr.in6, (void *)ips, 16)) + continue; + break; + } + + if ((ipe == NULL) && ((hmsk[0] != 0) || + (hmsk[1] != 0) || + (hmsk[2] != 0) || + (hmsk[3] != 0) )) { + while ((hmsk[0] != 0) && (hmsk[1] != 0) && + (hmsk[2] != 0) && (hmsk[3] != 0)) { + left_shift_ipv6((char *)msk); + if (hmsk[0] & 0x80000000) + break; + left_shift_ipv6((char *)hmsk); + } + if ((hmsk[0] != 0) && (hmsk[1] != 0) && + (hmsk[2] != 0) && (hmsk[3] != 0)) { + left_shift_ipv6((char *)hmsk); + goto maskloop; + } + } + return ipe; +} + + +/* + * sum4: ipv6 add -> 4 bytes values + */ +static uint32_t sum4(add) +uint32_t *add; +{ + return (*add + *(add + 1) + *(add + 2) + *(add + 3)); +} + +/* + * left shift on 128 bits + */ +static void left_shift_ipv6(data) +char *data; +{ + u_32_t *sd; + + sd = (u_32_t *)data; + sd[0] <<= 1; + if (sd[1] >= 0x80000000) + sd[0] += 1; + + sd[1] <<= 1; + if (sd[2] >= 0x80000000) + sd[1] += 1; + + sd[2] <<= 1; + if (sd[3] >= 0x80000000) + sd[2] += 1; + + sd[3] <<= 1; +} +#endif +#endif /* IPFILTER_LOOKUP */ diff --git a/usr/src/uts/common/inet/ipf/ip_log.c b/usr/src/uts/common/inet/ipf/ip_log.c new file mode 100644 index 0000000000..364b2e08e5 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/ip_log.c @@ -0,0 +1,676 @@ +/* + * Copyright (C) 1997-2003 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * $Id: ip_log.c,v 2.75.2.7 2005/06/11 07:47:44 darrenr Exp $ + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/param.h> +#if defined(KERNEL) || defined(_KERNEL) +# undef KERNEL +# undef _KERNEL +# define KERNEL 1 +# define _KERNEL 1 +#endif +#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ + defined(_KERNEL) +# include "opt_ipfilter_log.h" +#endif +#if defined(__FreeBSD__) && !defined(IPFILTER_LKM) +# if defined(_KERNEL) +# if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) +# include "opt_ipfilter.h" +# endif +# else +# include <osreldate.h> +# endif +#endif +#ifndef SOLARIS +# define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4))) +#endif +#include <sys/errno.h> +#include <sys/types.h> +#include <sys/file.h> +#ifndef _KERNEL +# include <stdio.h> +# include <string.h> +# include <stdlib.h> +# include <ctype.h> +# define _KERNEL +# define KERNEL +# ifdef __OpenBSD__ +struct file; +# endif +# include <sys/uio.h> +# undef _KERNEL +# undef KERNEL +#endif +#if __FreeBSD_version >= 220000 && defined(_KERNEL) +# include <sys/fcntl.h> +# include <sys/filio.h> +#else +# include <sys/ioctl.h> +#endif +#include <sys/time.h> +#if defined(_KERNEL) +# include <sys/systm.h> +# if defined(NetBSD) && (__NetBSD_Version__ >= 104000000) +# include <sys/proc.h> +# endif +#endif /* _KERNEL */ +#if !SOLARIS && !defined(__hpux) && !defined(linux) +# if (NetBSD > 199609) || (OpenBSD > 199603) || (__FreeBSD_version >= 300000) +# include <sys/dirent.h> +# else +# include <sys/dir.h> +# endif +# include <sys/mbuf.h> +#else +# if !defined(__hpux) && defined(_KERNEL) +# include <sys/filio.h> +# include <sys/cred.h> +# include <sys/ddi.h> +# include <sys/sunddi.h> +# include <sys/ksynch.h> +# include <sys/kmem.h> +# include <sys/mkdev.h> +# include <sys/dditypes.h> +# include <sys/cmn_err.h> +# endif /* !__hpux */ +#endif /* !SOLARIS && !__hpux */ +#if !defined(linux) +# include <sys/protosw.h> +#endif +#include <sys/socket.h> + +#include <net/if.h> +#ifdef sun +# include <net/af.h> +#endif +#if __FreeBSD_version >= 300000 +# include <net/if_var.h> +#endif +#include <net/route.h> +#include <netinet/in.h> +#ifdef __sgi +# include <sys/ddi.h> +# ifdef IFF_DRVRLOCK /* IRIX6 */ +# include <sys/hashing.h> +# endif +#endif +#if !defined(__hpux) && !defined(linux) && \ + !(defined(__sgi) && !defined(IFF_DRVRLOCK)) /*IRIX<6*/ +# include <netinet/in_var.h> +#endif +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <netinet/ip_icmp.h> +#ifdef USE_INET6 +# include <netinet/icmp6.h> +#endif +#if !defined(linux) +# include <netinet/ip_var.h> +#endif +#ifndef _KERNEL +# include <syslog.h> +#endif +#include "netinet/ip_compat.h" +#include <netinet/tcpip.h> +#include "netinet/ip_fil.h" +#include "netinet/ip_nat.h" +#include "netinet/ip_frag.h" +#include "netinet/ip_state.h" +#include "netinet/ip_auth.h" +#if (__FreeBSD_version >= 300000) || defined(__NetBSD__) +# include <sys/malloc.h> +#endif +/* END OF INCLUDES */ + +#ifdef IPFILTER_LOG + +# if defined(IPL_SELECT) +# include <machine/sys/user.h> +# include <sys/kthread_iface.h> +# define READ_COLLISION 0x001 + +iplog_select_t iplog_ss[IPL_LOGMAX+1]; + +extern int selwait; +# endif /* IPL_SELECT */ + +# if defined(linux) && defined(_KERNEL) +wait_queue_head_t iplh_linux[IPL_LOGSIZE]; +# endif +# if SOLARIS +extern kcondvar_t iplwait; +# endif + +iplog_t **iplh[IPL_LOGSIZE], *iplt[IPL_LOGSIZE], *ipll[IPL_LOGSIZE]; +int iplused[IPL_LOGSIZE]; +static fr_info_t iplcrc[IPL_LOGSIZE]; +int ipl_suppress = 1; +int ipl_buffer_sz; +int ipl_logmax = IPL_LOGMAX; +int ipl_logall = 0; +int ipl_log_init = 0; +int ipl_logsize = IPFILTER_LOGSIZE; +int ipl_magic[IPL_LOGSIZE] = { IPL_MAGIC, IPL_MAGIC_NAT, IPL_MAGIC_STATE, + IPL_MAGIC, IPL_MAGIC, IPL_MAGIC, + IPL_MAGIC, IPL_MAGIC }; + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_loginit */ +/* Returns: int - 0 == success (always returned) */ +/* Parameters: Nil */ +/* */ +/* Initialise log buffers & pointers. Also iniialised the CRC to a local */ +/* secret for use in calculating the "last log checksum". */ +/* ------------------------------------------------------------------------ */ +int fr_loginit() +{ + int i; + + for (i = IPL_LOGMAX; i >= 0; i--) { + iplt[i] = NULL; + ipll[i] = NULL; + iplh[i] = &iplt[i]; + iplused[i] = 0; + bzero((char *)&iplcrc[i], sizeof(iplcrc[i])); +# ifdef IPL_SELECT + iplog_ss[i].read_waiter = 0; + iplog_ss[i].state = 0; +# endif +# if defined(linux) && defined(_KERNEL) + init_waitqueue_head(iplh_linux + i); +# endif + } + +# if SOLARIS && defined(_KERNEL) + cv_init(&iplwait, "ipl condvar", CV_DRIVER, NULL); +# endif + MUTEX_INIT(&ipl_mutex, "ipf log mutex"); + + ipl_log_init = 1; + + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_logunload */ +/* Returns: Nil */ +/* Parameters: Nil */ +/* */ +/* Clean up any log data that has accumulated without being read. */ +/* ------------------------------------------------------------------------ */ +void fr_logunload() +{ + int i; + + if (ipl_log_init == 0) + return; + + for (i = IPL_LOGMAX; i >= 0; i--) + (void) ipflog_clear(i); + +# if SOLARIS && defined(_KERNEL) + cv_destroy(&iplwait); +# endif + MUTEX_DESTROY(&ipl_mutex); + + ipl_log_init = 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: ipflog */ +/* Returns: int - 0 == success, -1 == failure */ +/* Parameters: fin(I) - pointer to packet information */ +/* flags(I) - flags from filter rules */ +/* */ +/* Create a log record for a packet given that it has been triggered by a */ +/* rule (or the default setting). Calculate the transport protocol header */ +/* size using predetermined size of a couple of popular protocols and thus */ +/* how much data to copy into the log, including part of the data body if */ +/* requested. */ +/* ------------------------------------------------------------------------ */ +int ipflog(fin, flags) +fr_info_t *fin; +u_int flags; +{ + register size_t hlen; + int types[2], mlen; + size_t sizes[2]; + void *ptrs[2]; + ipflog_t ipfl; + u_char p; + mb_t *m; +# if (SOLARIS || defined(__hpux)) && defined(_KERNEL) +# ifndef IRE_ILL_CN + qif_t *ifp; +# else + s_ill_t *ifp; +# endif /* IRE_ILL_CN */ +# else + struct ifnet *ifp; +# endif /* SOLARIS || __hpux */ + + ipfl.fl_nattag.ipt_num[0] = 0; + m = fin->fin_m; + ifp = fin->fin_ifp; + hlen = fin->fin_hlen; + /* + * calculate header size. + */ + if (fin->fin_off == 0) { + p = fin->fin_fi.fi_p; + if (p == IPPROTO_TCP) + hlen += MIN(sizeof(tcphdr_t), fin->fin_dlen); + else if (p == IPPROTO_UDP) + hlen += MIN(sizeof(udphdr_t), fin->fin_dlen); + else if (p == IPPROTO_ICMP) { + struct icmp *icmp; + + icmp = (struct icmp *)fin->fin_dp; + + /* + * For ICMP, if the packet is an error packet, also + * include the information about the packet which + * caused the error. + */ + switch (icmp->icmp_type) + { + case ICMP_UNREACH : + case ICMP_SOURCEQUENCH : + case ICMP_REDIRECT : + case ICMP_TIMXCEED : + case ICMP_PARAMPROB : + hlen += MIN(sizeof(struct icmp) + 8, + fin->fin_dlen); + break; + default : + hlen += MIN(sizeof(struct icmp), + fin->fin_dlen); + break; + } + } +# ifdef USE_INET6 + else if (p == IPPROTO_ICMPV6) { + struct icmp6_hdr *icmp; + + icmp = (struct icmp6_hdr *)fin->fin_dp; + + /* + * For ICMPV6, if the packet is an error packet, also + * include the information about the packet which + * caused the error. + */ + if (icmp->icmp6_type < 128) { + hlen += MIN(sizeof(struct icmp6_hdr) + 8, + fin->fin_dlen); + } else { + hlen += MIN(sizeof(struct icmp6_hdr), + fin->fin_dlen); + } + } +# endif + } + /* + * Get the interface number and name to which this packet is + * currently associated. + */ +# if (SOLARIS || defined(__hpux)) && defined(_KERNEL) + ipfl.fl_unit = (u_int)0; + (void) strncpy(ipfl.fl_ifname, IFNAME(ifp), sizeof(ipfl.fl_ifname)); +# else +# if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199603)) || \ + (defined(OpenBSD) && (OpenBSD >= 199603)) || defined(linux) || \ + (defined(__FreeBSD__) && (__FreeBSD_version >= 501113)) + COPYIFNAME(ifp, ipfl.fl_ifname); +# else + ipfl.fl_unit = (u_int)ifp->if_unit; +# if defined(_KERNEL) + if ((ipfl.fl_ifname[0] = ifp->if_name[0])) + if ((ipfl.fl_ifname[1] = ifp->if_name[1])) + if ((ipfl.fl_ifname[2] = ifp->if_name[2])) + ipfl.fl_ifname[3] = ifp->if_name[3]; +# else + (void) strncpy(ipfl.fl_ifname, IFNAME(ifp), sizeof(ipfl.fl_ifname)); + ipfl.fl_ifname[sizeof(ipfl.fl_ifname) - 1] = '\0'; +# endif +# endif +# endif /* __hpux || SOLARIS */ + mlen = fin->fin_plen - hlen; + if (!ipl_logall) { + mlen = (flags & FR_LOGBODY) ? MIN(mlen, 128) : 0; + } else if ((flags & FR_LOGBODY) == 0) { + mlen = 0; + } + if (mlen < 0) + mlen = 0; + ipfl.fl_plen = (u_char)mlen; + ipfl.fl_hlen = (u_char)hlen; + ipfl.fl_rule = fin->fin_rule; + (void) strncpy(ipfl.fl_group, fin->fin_group, FR_GROUPLEN); + if (fin->fin_fr != NULL) { + ipfl.fl_loglevel = fin->fin_fr->fr_loglevel; + ipfl.fl_logtag = fin->fin_fr->fr_logtag; + } else { + ipfl.fl_loglevel = 0xffff; + ipfl.fl_logtag = FR_NOLOGTAG; + } + if (fin->fin_nattag != NULL) + bcopy(fin->fin_nattag, (void *)&ipfl.fl_nattag, + sizeof(ipfl.fl_nattag)); + ipfl.fl_flags = flags; + ipfl.fl_dir = fin->fin_out; + ipfl.fl_lflags = fin->fin_flx; + ptrs[0] = (void *)&ipfl; + sizes[0] = sizeof(ipfl); + types[0] = 0; +# if defined(MENTAT) && defined(_KERNEL) + /* + * Are we copied from the mblk or an aligned array ? + */ + if (fin->fin_ip == (ip_t *)m->b_rptr) { + ptrs[1] = m; + sizes[1] = hlen + mlen; + types[1] = 1; + } else { + ptrs[1] = fin->fin_ip; + sizes[1] = hlen + mlen; + types[1] = 0; + } +# else + ptrs[1] = m; + sizes[1] = hlen + mlen; + types[1] = 1; +# endif /* MENTAT */ + return ipllog(IPL_LOGIPF, fin, ptrs, sizes, types, 2); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: ipllog */ +/* Returns: int - 0 == success, -1 == failure */ +/* Parameters: dev(I) - device that owns this log record */ +/* fin(I) - pointer to packet information */ +/* items(I) - array of pointers to log data */ +/* itemsz(I) - array of size of valid memory pointed to */ +/* types(I) - type of data pointed to by items pointers */ +/* cnt(I) - number of elements in arrays items/itemsz/types */ +/* */ +/* Takes an array of parameters and constructs one record to include the */ +/* miscellaneous packet information, as well as packet data, for reading */ +/* from the log device. */ +/* ------------------------------------------------------------------------ */ +int ipllog(dev, fin, items, itemsz, types, cnt) +int dev; +fr_info_t *fin; +void **items; +size_t *itemsz; +int *types, cnt; +{ + caddr_t buf, ptr; + iplog_t *ipl; + size_t len; + int i; + SPL_INT(s); + + /* + * Check to see if this log record has a CRC which matches the last + * record logged. If it does, just up the count on the previous one + * rather than create a new one. + */ + if (ipl_suppress) { + MUTEX_ENTER(&ipl_mutex); + if ((fin != NULL) && (fin->fin_off == 0)) { + if ((ipll[dev] != NULL) && + bcmp((char *)fin, (char *)&iplcrc[dev], + FI_LCSIZE) == 0) { + ipll[dev]->ipl_count++; + MUTEX_EXIT(&ipl_mutex); + return 0; + } + bcopy((char *)fin, (char *)&iplcrc[dev], FI_LCSIZE); + } else + bzero((char *)&iplcrc[dev], FI_CSIZE); + MUTEX_EXIT(&ipl_mutex); + } + + /* + * Get the total amount of data to be logged. + */ + for (i = 0, len = sizeof(iplog_t); i < cnt; i++) + len += itemsz[i]; + + /* + * check that we have space to record this information and can + * allocate that much. + */ + KMALLOCS(buf, caddr_t, len); + if (buf == NULL) + return -1; + SPL_NET(s); + MUTEX_ENTER(&ipl_mutex); + if ((iplused[dev] + len) > ipl_logsize) { + MUTEX_EXIT(&ipl_mutex); + SPL_X(s); + KFREES(buf, len); + return -1; + } + iplused[dev] += len; + MUTEX_EXIT(&ipl_mutex); + SPL_X(s); + + /* + * advance the log pointer to the next empty record and deduct the + * amount of space we're going to use. + */ + ipl = (iplog_t *)buf; + ipl->ipl_magic = ipl_magic[dev]; + ipl->ipl_count = 1; + ipl->ipl_next = NULL; + ipl->ipl_dsize = len; +#ifdef _KERNEL + GETKTIME(&ipl->ipl_sec); +#else + ipl->ipl_sec = 0; + ipl->ipl_usec = 0; +#endif + + /* + * Loop through all the items to be logged, copying each one to the + * buffer. Use bcopy for normal data or the mb_t copyout routine. + */ + for (i = 0, ptr = buf + sizeof(*ipl); i < cnt; i++) { + if (types[i] == 0) { + bcopy(items[i], ptr, itemsz[i]); + } else if (types[i] == 1) { + COPYDATA(items[i], 0, itemsz[i], ptr); + } + ptr += itemsz[i]; + } + SPL_NET(s); + MUTEX_ENTER(&ipl_mutex); + ipll[dev] = ipl; + *iplh[dev] = ipl; + iplh[dev] = &ipl->ipl_next; + + /* + * Now that the log record has been completed and added to the queue, + * wake up any listeners who may want to read it. + */ +# if SOLARIS && defined(_KERNEL) + cv_signal(&iplwait); + MUTEX_EXIT(&ipl_mutex); +# else + MUTEX_EXIT(&ipl_mutex); + WAKEUP(iplh,dev); +# endif + SPL_X(s); +# ifdef IPL_SELECT + iplog_input_ready(dev); +# endif + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: ipflog_read */ +/* Returns: int - 0 == success, else error value. */ +/* Parameters: unit(I) - device we are reading from */ +/* uio(O) - pointer to information about where to store data */ +/* */ +/* Called to handle a read on an IPFilter device. Returns only complete */ +/* log messages - will not partially copy a log record out to userland. */ +/* */ +/* NOTE: This function will block and wait for a signal to return data if */ +/* there is none present. Asynchronous I/O is not implemented. */ +/* ------------------------------------------------------------------------ */ +int ipflog_read(unit, uio) +minor_t unit; +struct uio *uio; +{ + size_t dlen, copied; + int error = 0; + iplog_t *ipl; + SPL_INT(s); + + /* + * Sanity checks. Make sure the minor # is valid and we're copying + * a valid chunk of data. + */ + if (IPL_LOGMAX < unit) + return ENXIO; + if (uio->uio_resid == 0) + return 0; + if ((uio->uio_resid < sizeof(iplog_t)) || + (uio->uio_resid > ipl_logsize)) + return EINVAL; + + /* + * Lock the log so we can snapshot the variables. Wait for a signal + * if the log is empty. + */ + SPL_NET(s); + MUTEX_ENTER(&ipl_mutex); + + while (iplt[unit] == NULL) { +# if SOLARIS && defined(_KERNEL) + if (!cv_wait_sig(&iplwait, &ipl_mutex.ipf_lk)) { + MUTEX_EXIT(&ipl_mutex); + return EINTR; + } +# else +# if defined(__hpux) && defined(_KERNEL) + lock_t *l; + +# ifdef IPL_SELECT + if (uio->uio_fpflags & (FNBLOCK|FNDELAY)) { + /* this is no blocking system call */ + MUTEX_EXIT(&ipl_mutex); + return 0; + } +# endif + + MUTEX_EXIT(&ipl_mutex); + l = get_sleep_lock(&iplh[unit]); + error = sleep(&iplh[unit], PZERO+1); + spinunlock(l); +# else +# if defined(__osf__) && defined(_KERNEL) + error = mpsleep(&iplh[unit], PSUSP|PCATCH, "iplread", 0, + &ipl_mutex, MS_LOCK_SIMPLE); +# else + MUTEX_EXIT(&ipl_mutex); + SPL_X(s); + error = SLEEP(unit + iplh, "ipl sleep"); +# endif /* __osf__ */ +# endif /* __hpux */ + if (error) + return error; + SPL_NET(s); + MUTEX_ENTER(&ipl_mutex); +# endif /* SOLARIS */ + } + +# if (BSD >= 199101) || defined(__FreeBSD__) || defined(__osf__) + uio->uio_rw = UIO_READ; +# endif + + for (copied = 0; (ipl = iplt[unit]) != NULL; copied += dlen) { + dlen = ipl->ipl_dsize; + if (dlen > uio->uio_resid) + break; + /* + * Don't hold the mutex over the uiomove call. + */ + iplt[unit] = ipl->ipl_next; + iplused[unit] -= dlen; + MUTEX_EXIT(&ipl_mutex); + SPL_X(s); + error = UIOMOVE((caddr_t)ipl, dlen, UIO_READ, uio); + if (error) { + SPL_NET(s); + MUTEX_ENTER(&ipl_mutex); + ipl->ipl_next = iplt[unit]; + iplt[unit] = ipl; + iplused[unit] += dlen; + break; + } + MUTEX_ENTER(&ipl_mutex); + KFREES((caddr_t)ipl, dlen); + SPL_NET(s); + } + if (!iplt[unit]) { + iplused[unit] = 0; + iplh[unit] = &iplt[unit]; + ipll[unit] = NULL; + } + + MUTEX_EXIT(&ipl_mutex); + SPL_X(s); + return error; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: ipflog_clear */ +/* Returns: int - number of log bytes cleared. */ +/* Parameters: unit(I) - device we are reading from */ +/* */ +/* Deletes all queued up log records for a given output device. */ +/* ------------------------------------------------------------------------ */ +int ipflog_clear(unit) +minor_t unit; +{ + iplog_t *ipl; + int used; + SPL_INT(s); + + SPL_NET(s); + MUTEX_ENTER(&ipl_mutex); + while ((ipl = iplt[unit]) != NULL) { + iplt[unit] = ipl->ipl_next; + KFREES((caddr_t)ipl, ipl->ipl_dsize); + } + iplh[unit] = &iplt[unit]; + ipll[unit] = NULL; + used = iplused[unit]; + iplused[unit] = 0; + bzero((char *)&iplcrc[unit], FI_CSIZE); + MUTEX_EXIT(&ipl_mutex); + SPL_X(s); + return used; +} +#endif /* IPFILTER_LOG */ diff --git a/usr/src/uts/common/inet/ipf/ip_lookup.c b/usr/src/uts/common/inet/ipf/ip_lookup.c new file mode 100644 index 0000000000..299dadf0bb --- /dev/null +++ b/usr/src/uts/common/inet/ipf/ip_lookup.c @@ -0,0 +1,532 @@ +/* + * Copyright (C) 2002-2003 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#if defined(KERNEL) || defined(_KERNEL) +# undef KERNEL +# undef _KERNEL +# define KERNEL 1 +# define _KERNEL 1 +#endif +#if defined(__osf__) +# define _PROTO_NET_H_ +#endif +#include <sys/param.h> +#include <sys/errno.h> +#include <sys/types.h> +#include <sys/time.h> +#include <sys/file.h> +#if __FreeBSD_version >= 220000 && defined(_KERNEL) +# include <sys/fcntl.h> +# include <sys/filio.h> +#else +# include <sys/ioctl.h> +#endif +#if !defined(_KERNEL) +# include <string.h> +# define _KERNEL +# ifdef __OpenBSD__ +struct file; +# endif +# include <sys/uio.h> +# undef _KERNEL +#endif +#include <sys/socket.h> +#if (defined(__osf__) || defined(AIX) || defined(__hpux) || defined(__sgi)) && defined(_KERNEL) +# ifdef __osf__ +# include <net/radix.h> +# endif +# include "radix_ipf_local.h" +# define _RADIX_H_ +#endif +#include <net/if.h> +#if defined(__FreeBSD__) +# include <sys/cdefs.h> +# include <sys/proc.h> +#endif +#if defined(_KERNEL) +# include <sys/systm.h> +# if !defined(__SVR4) && !defined(__svr4__) +# include <sys/mbuf.h> +# endif +#endif +#include <netinet/in.h> + +#include "netinet/ip_compat.h" +#include "netinet/ip_fil.h" +#include "netinet/ip_pool.h" +#include "netinet/ip_htable.h" +#include "netinet/ip_lookup.h" +/* END OF INCLUDES */ + +#if !defined(lint) +static const char rcsid[] = "@(#)$Id: ip_lookup.c,v 2.35.2.7 2005/06/12 07:18:20 darrenr Exp $"; +#endif + +#ifdef IPFILTER_LOOKUP +int ip_lookup_inited = 0; + +static int iplookup_addnode __P((caddr_t)); +static int iplookup_delnode __P((caddr_t data)); +static int iplookup_addtable __P((caddr_t)); +static int iplookup_deltable __P((caddr_t)); +static int iplookup_stats __P((caddr_t)); +static int iplookup_flush __P((caddr_t)); + + +/* ------------------------------------------------------------------------ */ +/* Function: iplookup_init */ +/* Returns: int - 0 = success, else error */ +/* Parameters: Nil */ +/* */ +/* Initialise all of the subcomponents of the lookup infrstructure. */ +/* ------------------------------------------------------------------------ */ +int ip_lookup_init() +{ + + if (ip_pool_init() == -1) + return -1; + + RWLOCK_INIT(&ip_poolrw, "ip pool rwlock"); + + ip_lookup_inited = 1; + + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: iplookup_unload */ +/* Returns: int - 0 = success, else error */ +/* Parameters: Nil */ +/* */ +/* Free up all pool related memory that has been allocated whilst IPFilter */ +/* has been running. Also, do any other deinitialisation required such */ +/* ip_lookup_init() can be called again, safely. */ +/* ------------------------------------------------------------------------ */ +void ip_lookup_unload() +{ + ip_pool_fini(); + fr_htable_unload(); + + if (ip_lookup_inited == 1) { + RW_DESTROY(&ip_poolrw); + ip_lookup_inited = 0; + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: iplookup_ioctl */ +/* Returns: int - 0 = success, else error */ +/* Parameters: data(IO) - pointer to ioctl data to be copied to/from user */ +/* space. */ +/* cmd(I) - ioctl command number */ +/* mode(I) - file mode bits used with open */ +/* */ +/* Handle ioctl commands sent to the ioctl device. For the most part, this */ +/* involves just calling another function to handle the specifics of each */ +/* command. */ +/* ------------------------------------------------------------------------ */ +int ip_lookup_ioctl(data, cmd, mode) +caddr_t data; +ioctlcmd_t cmd; +int mode; +{ + int err; + SPL_INT(s); + + mode = mode; /* LINT */ + + SPL_NET(s); + + switch (cmd) + { + case SIOCLOOKUPADDNODE : + case SIOCLOOKUPADDNODEW : + WRITE_ENTER(&ip_poolrw); + err = iplookup_addnode(data); + RWLOCK_EXIT(&ip_poolrw); + break; + + case SIOCLOOKUPDELNODE : + case SIOCLOOKUPDELNODEW : + WRITE_ENTER(&ip_poolrw); + err = iplookup_delnode(data); + RWLOCK_EXIT(&ip_poolrw); + break; + + case SIOCLOOKUPADDTABLE : + WRITE_ENTER(&ip_poolrw); + err = iplookup_addtable(data); + RWLOCK_EXIT(&ip_poolrw); + break; + + case SIOCLOOKUPDELTABLE : + WRITE_ENTER(&ip_poolrw); + err = iplookup_deltable(data); + RWLOCK_EXIT(&ip_poolrw); + break; + + case SIOCLOOKUPSTAT : + case SIOCLOOKUPSTATW : + WRITE_ENTER(&ip_poolrw); + err = iplookup_stats(data); + RWLOCK_EXIT(&ip_poolrw); + break; + + case SIOCLOOKUPFLUSH : + WRITE_ENTER(&ip_poolrw); + err = iplookup_flush(data); + RWLOCK_EXIT(&ip_poolrw); + break; + + default : + err = EINVAL; + break; + } + SPL_X(s); + return err; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: iplookup_addnode */ +/* Returns: int - 0 = success, else error */ +/* Parameters: data(I) - pointer to data from ioctl call */ +/* */ +/* Add a new data node to a lookup structure. First, check to see if the */ +/* parent structure refered to by name exists and if it does, then go on to */ +/* add a node to it. */ +/* ------------------------------------------------------------------------ */ +static int iplookup_addnode(data) +caddr_t data; +{ + ip_pool_node_t node, *m; + iplookupop_t op; + iphtable_t *iph; + iphtent_t hte; + ip_pool_t *p; + int err; + + err = 0; + BCOPYIN(data, &op, sizeof(op)); + op.iplo_name[sizeof(op.iplo_name) - 1] = '\0'; + + switch (op.iplo_type) + { + case IPLT_POOL : + if (op.iplo_size != sizeof(node)) + return EINVAL; + + err = COPYIN(op.iplo_struct, &node, sizeof(node)); + if (err != 0) + return EFAULT; + + p = ip_pool_find(op.iplo_unit, op.iplo_name); + if (p == NULL) + return ESRCH; + + /* + * add an entry to a pool - return an error if it already + * exists remove an entry from a pool - if it exists + * - in both cases, the pool *must* exist! + */ + m = ip_pool_findeq(p, &node.ipn_addr, &node.ipn_mask); + if (m) + return EEXIST; + err = ip_pool_insert(p, &node.ipn_addr, + &node.ipn_mask, node.ipn_info); + break; + + case IPLT_HASH : + if (op.iplo_size != sizeof(hte)) + return EINVAL; + + err = COPYIN(op.iplo_struct, &hte, sizeof(hte)); + if (err != 0) + return EFAULT; + + iph = fr_findhtable(op.iplo_unit, op.iplo_name); + if (iph == NULL) + return ESRCH; + err = fr_addhtent(iph, &hte); + break; + + default : + err = EINVAL; + break; + } + return err; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: iplookup_delnode */ +/* Returns: int - 0 = success, else error */ +/* Parameters: data(I) - pointer to data from ioctl call */ +/* */ +/* Delete a node from a lookup table by first looking for the table it is */ +/* in and then deleting the entry that gets found. */ +/* ------------------------------------------------------------------------ */ +static int iplookup_delnode(data) +caddr_t data; +{ + ip_pool_node_t node, *m; + iplookupop_t op; + iphtable_t *iph; + iphtent_t hte; + ip_pool_t *p; + int err; + + err = 0; + BCOPYIN(data, &op, sizeof(op)); + + op.iplo_name[sizeof(op.iplo_name) - 1] = '\0'; + + switch (op.iplo_type) + { + case IPLT_POOL : + if (op.iplo_size != sizeof(node)) + return EINVAL; + + err = COPYIN(op.iplo_struct, &node, sizeof(node)); + if (err != 0) + return EFAULT; + + p = ip_pool_find(op.iplo_unit, op.iplo_name); + if (!p) + return ESRCH; + + m = ip_pool_findeq(p, &node.ipn_addr, &node.ipn_mask); + if (m == NULL) + return ENOENT; + err = ip_pool_remove(p, m); + break; + + case IPLT_HASH : + if (op.iplo_size != sizeof(hte)) + return EINVAL; + + err = COPYIN(op.iplo_struct, &hte, sizeof(hte)); + if (err != 0) + return EFAULT; + + iph = fr_findhtable(op.iplo_unit, op.iplo_name); + if (iph == NULL) + return ESRCH; + err = fr_delhtent(iph, &hte); + break; + + default : + err = EINVAL; + break; + } + return err; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: iplookup_addtable */ +/* Returns: int - 0 = success, else error */ +/* Parameters: data(I) - pointer to data from ioctl call */ +/* */ +/* Create a new lookup table, if one doesn't already exist using the name */ +/* for this one. */ +/* ------------------------------------------------------------------------ */ +static int iplookup_addtable(data) +caddr_t data; +{ + iplookupop_t op; + int err; + + err = 0; + BCOPYIN(data, &op, sizeof(op)); + + op.iplo_name[sizeof(op.iplo_name) - 1] = '\0'; + + switch (op.iplo_type) + { + case IPLT_POOL : + if (ip_pool_find(op.iplo_unit, op.iplo_name) != NULL) + err = EEXIST; + else + err = ip_pool_create(&op); + break; + + case IPLT_HASH : + if (fr_findhtable(op.iplo_unit, op.iplo_name) != NULL) + err = EEXIST; + else + err = fr_newhtable(&op); + break; + + default : + err = EINVAL; + break; + } + return err; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: iplookup_deltable */ +/* Returns: int - 0 = success, else error */ +/* Parameters: data(I) - pointer to data from ioctl call */ +/* */ +/* Decodes ioctl request to remove a particular hash table or pool and */ +/* calls the relevant function to do the cleanup. */ +/* ------------------------------------------------------------------------ */ +static int iplookup_deltable(data) +caddr_t data; +{ + iplookupop_t op; + int err; + + BCOPYIN(data, &op, sizeof(op)); + op.iplo_name[sizeof(op.iplo_name) - 1] = '\0'; + + if (op.iplo_arg & IPLT_ANON) + op.iplo_arg &= IPLT_ANON; + + /* + * create a new pool - fail if one already exists with + * the same # + */ + switch (op.iplo_type) + { + case IPLT_POOL : + err = ip_pool_destroy(&op); + break; + + case IPLT_HASH : + err = fr_removehtable(&op); + break; + + default : + err = EINVAL; + break; + } + return err; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: iplookup_stats */ +/* Returns: int - 0 = success, else error */ +/* Parameters: data(I) - pointer to data from ioctl call */ +/* */ +/* Copy statistical information from inside the kernel back to user space. */ +/* ------------------------------------------------------------------------ */ +static int iplookup_stats(data) +caddr_t data; +{ + iplookupop_t op; + int err; + + err = 0; + BCOPYIN(data, &op, sizeof(op)); + + switch (op.iplo_type) + { + case IPLT_POOL : + err = ip_pool_statistics(&op); + break; + + case IPLT_HASH : + err = fr_gethtablestat(&op); + break; + + default : + err = EINVAL; + break; + } + return err; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: iplookup_flush */ +/* Returns: int - 0 = success, else error */ +/* Parameters: data(I) - pointer to data from ioctl call */ +/* */ +/* A flush is called when we want to flush all the nodes from a particular */ +/* entry in the hash table/pool or want to remove all groups from those. */ +/* ------------------------------------------------------------------------ */ +static int iplookup_flush(data) +caddr_t data; +{ + int err, unit, num, type; + iplookupflush_t flush; + + err = 0; + BCOPYIN(data, &flush, sizeof(flush)); + + flush.iplf_name[sizeof(flush.iplf_name) - 1] = '\0'; + + unit = flush.iplf_unit; + if ((unit < 0 || unit > IPL_LOGMAX) && (unit != IPLT_ALL)) + return EINVAL; + + type = flush.iplf_type; + err = EINVAL; + num = 0; + + if (type == IPLT_POOL || type == IPLT_ALL) { + err = 0; + num = ip_pool_flush(&flush); + } + + if (type == IPLT_HASH || type == IPLT_ALL) { + err = 0; + num += fr_flushhtable(&flush); + } + + if (err == 0) { + flush.iplf_count = num; + err = COPYOUT(&flush, data, sizeof(flush)); + } + return err; +} + + +void ip_lookup_deref(type, ptr) +int type; +void *ptr; +{ + if (ptr == NULL) + return; + + WRITE_ENTER(&ip_poolrw); + switch (type) + { + case IPLT_POOL : + ip_pool_deref(ptr); + break; + + case IPLT_HASH : + fr_derefhtable(ptr); + break; + } + RWLOCK_EXIT(&ip_poolrw); +} + + +#else /* IPFILTER_LOOKUP */ + +/*ARGSUSED*/ +int ip_lookup_ioctl(data, cmd, mode) +caddr_t data; +ioctlcmd_t cmd; +int mode; +{ + return EIO; +} +#endif /* IPFILTER_LOOKUP */ diff --git a/usr/src/uts/common/inet/ipf/ip_nat.c b/usr/src/uts/common/inet/ipf/ip_nat.c new file mode 100644 index 0000000000..8814553e20 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/ip_nat.c @@ -0,0 +1,4849 @@ +/* + * Copyright (C) 1995-2003 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#if defined(KERNEL) || defined(_KERNEL) +# undef KERNEL +# undef _KERNEL +# define KERNEL 1 +# define _KERNEL 1 +#endif +#include <sys/errno.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/time.h> +#include <sys/file.h> +#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ + defined(_KERNEL) +# include "opt_ipfilter_log.h" +#endif +#if !defined(_KERNEL) +# include <stdio.h> +# include <string.h> +# include <stdlib.h> +# define _KERNEL +# ifdef __OpenBSD__ +struct file; +# endif +# include <sys/uio.h> +# undef _KERNEL +#endif +#if defined(_KERNEL) && (__FreeBSD_version >= 220000) +# include <sys/filio.h> +# include <sys/fcntl.h> +#else +# include <sys/ioctl.h> +#endif +#if !defined(AIX) +# include <sys/fcntl.h> +#endif +#if !defined(linux) +# include <sys/protosw.h> +#endif +#include <sys/socket.h> +#if defined(_KERNEL) +# include <sys/systm.h> +# if !defined(__SVR4) && !defined(__svr4__) +# include <sys/mbuf.h> +# endif +#endif +#if defined(__SVR4) || defined(__svr4__) +# include <sys/filio.h> +# include <sys/byteorder.h> +# ifdef _KERNEL +# include <sys/dditypes.h> +# endif +# include <sys/stream.h> +# include <sys/kmem.h> +#endif +#if __FreeBSD_version >= 300000 +# include <sys/queue.h> +#endif +#include <net/if.h> +#if __FreeBSD_version >= 300000 +# include <net/if_var.h> +# if defined(_KERNEL) && !defined(IPFILTER_LKM) +# include "opt_ipfilter.h" +# endif +#endif +#ifdef sun +# include <net/af.h> +#endif +#include <net/route.h> +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> + +#ifdef RFC1825 +# include <vpn/md5.h> +# include <vpn/ipsec.h> +extern struct ifnet vpnif; +#endif + +#if !defined(linux) +# include <netinet/ip_var.h> +#endif +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <netinet/ip_icmp.h> +#include "netinet/ip_compat.h" +#include <netinet/tcpip.h> +#include "netinet/ip_fil.h" +#include "netinet/ip_nat.h" +#include "netinet/ip_frag.h" +#include "netinet/ip_state.h" +#include "netinet/ip_proxy.h" +#ifdef IPFILTER_SYNC +#include "netinet/ip_sync.h" +#endif +#if (__FreeBSD_version >= 300000) +# include <sys/malloc.h> +#endif +/* END OF INCLUDES */ + +#undef SOCKADDR_IN +#define SOCKADDR_IN struct sockaddr_in + +#if !defined(lint) +static const char sccsid[] = "@(#)ip_nat.c 1.11 6/5/96 (C) 1995 Darren Reed"; +static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 darrenr Exp $"; +#endif + + +/* ======================================================================== */ +/* How the NAT is organised and works. */ +/* */ +/* Inside (interface y) NAT Outside (interface x) */ +/* -------------------- -+- ------------------------------------- */ +/* Packet going | out, processsed by fr_checknatout() for x */ +/* ------------> | ------------> */ +/* src=10.1.1.1 | src=192.1.1.1 */ +/* | */ +/* | in, processed by fr_checknatin() for x */ +/* <------------ | <------------ */ +/* dst=10.1.1.1 | dst=192.1.1.1 */ +/* -------------------- -+- ------------------------------------- */ +/* fr_checknatout() - changes ip_src and if required, sport */ +/* - creates a new mapping, if required. */ +/* fr_checknatin() - changes ip_dst and if required, dport */ +/* */ +/* In the NAT table, internal source is recorded as "in" and externally */ +/* seen as "out". */ +/* ======================================================================== */ + + +nat_t **nat_table[2] = { NULL, NULL }, + *nat_instances = NULL; +ipnat_t *nat_list = NULL; +u_int ipf_nattable_max = NAT_TABLE_MAX; +u_int ipf_nattable_sz = NAT_TABLE_SZ; +u_int ipf_natrules_sz = NAT_SIZE; +u_int ipf_rdrrules_sz = RDR_SIZE; +u_int ipf_hostmap_sz = HOSTMAP_SIZE; +u_int fr_nat_maxbucket = 0, + fr_nat_maxbucket_reset = 1; +u_32_t nat_masks = 0; +u_32_t rdr_masks = 0; +ipnat_t **nat_rules = NULL; +ipnat_t **rdr_rules = NULL; +hostmap_t **maptable = NULL; +ipftq_t nat_tqb[IPF_TCP_NSTATES]; +ipftq_t nat_udptq; +ipftq_t nat_icmptq; +ipftq_t nat_iptq; +ipftq_t *nat_utqe = NULL; +#ifdef IPFILTER_LOG +int nat_logging = 1; +#else +int nat_logging = 0; +#endif + +u_long fr_defnatage = DEF_NAT_AGE, + fr_defnatipage = 120, /* 60 seconds */ + fr_defnaticmpage = 6; /* 3 seconds */ +natstat_t nat_stats; +int fr_nat_lock = 0; +int fr_nat_init = 0; +#if SOLARIS +extern int pfil_delayed_copy; +#endif + +static int nat_flushtable __P((void)); +static int nat_clearlist __P((void)); +static void nat_addnat __P((struct ipnat *)); +static void nat_addrdr __P((struct ipnat *)); +static void nat_delete __P((struct nat *, int)); +static void nat_delrdr __P((struct ipnat *)); +static void nat_delnat __P((struct ipnat *)); +static int fr_natgetent __P((caddr_t)); +static int fr_natgetsz __P((caddr_t)); +static int fr_natputent __P((caddr_t, int)); +static void nat_tabmove __P((nat_t *)); +static int nat_match __P((fr_info_t *, ipnat_t *)); +static INLINE int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *)); +static INLINE int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *)); +static hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr, + struct in_addr, struct in_addr, u_32_t)); +static void nat_hostmapdel __P((struct hostmap *)); +static INLINE int nat_icmpquerytype4 __P((int)); +static int nat_siocaddnat __P((ipnat_t *, ipnat_t **, int)); +static void nat_siocdelnat __P((ipnat_t *, ipnat_t **, int)); +static INLINE int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *, + tcphdr_t *, nat_t **, int)); +static void nat_resolverule __P((ipnat_t *)); +static nat_t *fr_natclone __P((fr_info_t *, nat_t *)); +static void nat_mssclamp __P((tcphdr_t *, u_32_t, fr_info_t *, u_short *)); +static INLINE int nat_wildok __P((nat_t *, int, int, int, int)); + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_natinit */ +/* Returns: int - 0 == success, -1 == failure */ +/* Parameters: Nil */ +/* */ +/* Initialise all of the NAT locks, tables and other structures. */ +/* ------------------------------------------------------------------------ */ +int fr_natinit() +{ + int i; + + KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz); + if (nat_table[0] != NULL) + bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *)); + else + return -1; + + KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz); + if (nat_table[1] != NULL) + bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *)); + else + return -2; + + KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz); + if (nat_rules != NULL) + bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *)); + else + return -3; + + KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz); + if (rdr_rules != NULL) + bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *)); + else + return -4; + + KMALLOCS(maptable, hostmap_t **, sizeof(hostmap_t *) * ipf_hostmap_sz); + if (maptable != NULL) + bzero((char *)maptable, sizeof(hostmap_t *) * ipf_hostmap_sz); + else + return -5; + + KMALLOCS(nat_stats.ns_bucketlen[0], u_long *, + ipf_nattable_sz * sizeof(u_long)); + if (nat_stats.ns_bucketlen[0] == NULL) + return -6; + bzero((char *)nat_stats.ns_bucketlen[0], + ipf_nattable_sz * sizeof(u_long)); + + KMALLOCS(nat_stats.ns_bucketlen[1], u_long *, + ipf_nattable_sz * sizeof(u_long)); + if (nat_stats.ns_bucketlen[1] == NULL) + return -7; + + bzero((char *)nat_stats.ns_bucketlen[1], + ipf_nattable_sz * sizeof(u_long)); + + if (fr_nat_maxbucket == 0) { + for (i = ipf_nattable_sz; i > 0; i >>= 1) + fr_nat_maxbucket++; + fr_nat_maxbucket *= 2; + } + + fr_sttab_init(nat_tqb); + /* + * Increase this because we may have "keep state" following this too + * and packet storms can occur if this is removed too quickly. + */ + nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = fr_tcplastack; + nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &nat_udptq; + nat_udptq.ifq_ttl = fr_defnatage; + nat_udptq.ifq_ref = 1; + nat_udptq.ifq_head = NULL; + nat_udptq.ifq_tail = &nat_udptq.ifq_head; + MUTEX_INIT(&nat_udptq.ifq_lock, "nat ipftq udp tab"); + nat_udptq.ifq_next = &nat_icmptq; + nat_icmptq.ifq_ttl = fr_defnaticmpage; + nat_icmptq.ifq_ref = 1; + nat_icmptq.ifq_head = NULL; + nat_icmptq.ifq_tail = &nat_icmptq.ifq_head; + MUTEX_INIT(&nat_icmptq.ifq_lock, "nat icmp ipftq tab"); + nat_icmptq.ifq_next = &nat_iptq; + nat_iptq.ifq_ttl = fr_defnatipage; + nat_iptq.ifq_ref = 1; + nat_iptq.ifq_head = NULL; + nat_iptq.ifq_tail = &nat_iptq.ifq_head; + MUTEX_INIT(&nat_iptq.ifq_lock, "nat ip ipftq tab"); + nat_iptq.ifq_next = NULL; + + for (i = 0; i < IPF_TCP_NSTATES; i++) { + if (nat_tqb[i].ifq_ttl < fr_defnaticmpage) + nat_tqb[i].ifq_ttl = fr_defnaticmpage; +#ifdef LARGE_NAT + else if (nat_tqb[i].ifq_ttl > fr_defnatage) + nat_tqb[i].ifq_ttl = fr_defnatage; +#endif + } + + /* + * Increase this because we may have "keep state" following + * this too and packet storms can occur if this is removed + * too quickly. + */ + nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl; + + RWLOCK_INIT(&ipf_nat, "ipf IP NAT rwlock"); + RWLOCK_INIT(&ipf_natfrag, "ipf IP NAT-Frag rwlock"); + MUTEX_INIT(&ipf_nat_new, "ipf nat new mutex"); + MUTEX_INIT(&ipf_natio, "ipf nat io mutex"); + + fr_nat_init = 1; + + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_addrdr */ +/* Returns: Nil */ +/* Parameters: n(I) - pointer to NAT rule to add */ +/* */ +/* Adds a redirect rule to the hash table of redirect rules and the list of */ +/* loaded NAT rules. Updates the bitmask indicating which netmasks are in */ +/* use by redirect rules. */ +/* ------------------------------------------------------------------------ */ +static void nat_addrdr(n) +ipnat_t *n; +{ + ipnat_t **np; + u_32_t j; + u_int hv; + int k; + + k = count4bits(n->in_outmsk); + if ((k >= 0) && (k != 32)) + rdr_masks |= 1 << k; + j = (n->in_outip & n->in_outmsk); + hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz); + np = rdr_rules + hv; + while (*np != NULL) + np = &(*np)->in_rnext; + n->in_rnext = NULL; + n->in_prnext = np; + n->in_hv = hv; + *np = n; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_addnat */ +/* Returns: Nil */ +/* Parameters: n(I) - pointer to NAT rule to add */ +/* */ +/* Adds a NAT map rule to the hash table of rules and the list of loaded */ +/* NAT rules. Updates the bitmask indicating which netmasks are in use by */ +/* redirect rules. */ +/* ------------------------------------------------------------------------ */ +static void nat_addnat(n) +ipnat_t *n; +{ + ipnat_t **np; + u_32_t j; + u_int hv; + int k; + + k = count4bits(n->in_inmsk); + if ((k >= 0) && (k != 32)) + nat_masks |= 1 << k; + j = (n->in_inip & n->in_inmsk); + hv = NAT_HASH_FN(j, 0, ipf_natrules_sz); + np = nat_rules + hv; + while (*np != NULL) + np = &(*np)->in_mnext; + n->in_mnext = NULL; + n->in_pmnext = np; + n->in_hv = hv; + *np = n; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_delrdr */ +/* Returns: Nil */ +/* Parameters: n(I) - pointer to NAT rule to delete */ +/* */ +/* Removes a redirect rule from the hash table of redirect rules. */ +/* ------------------------------------------------------------------------ */ +static void nat_delrdr(n) +ipnat_t *n; +{ + if (n->in_rnext) + n->in_rnext->in_prnext = n->in_prnext; + *n->in_prnext = n->in_rnext; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_delnat */ +/* Returns: Nil */ +/* Parameters: n(I) - pointer to NAT rule to delete */ +/* */ +/* Removes a NAT map rule from the hash table of NAT map rules. */ +/* ------------------------------------------------------------------------ */ +static void nat_delnat(n) +ipnat_t *n; +{ + if (n->in_mnext != NULL) + n->in_mnext->in_pmnext = n->in_pmnext; + *n->in_pmnext = n->in_mnext; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_hostmap */ +/* Returns: struct hostmap* - NULL if no hostmap could be created, */ +/* else a pointer to the hostmapping to use */ +/* Parameters: np(I) - pointer to NAT rule */ +/* real(I) - real IP address */ +/* map(I) - mapped IP address */ +/* port(I) - destination port number */ +/* Write Locks: ipf_nat */ +/* */ +/* Check if an ip address has already been allocated for a given mapping */ +/* that is not doing port based translation. If is not yet allocated, then */ +/* create a new entry if a non-NULL NAT rule pointer has been supplied. */ +/* ------------------------------------------------------------------------ */ +static struct hostmap *nat_hostmap(np, src, dst, map, port) +ipnat_t *np; +struct in_addr src; +struct in_addr dst; +struct in_addr map; +u_32_t port; +{ + hostmap_t *hm; + u_int hv; + + hv = (src.s_addr ^ dst.s_addr); + hv += src.s_addr; + hv += dst.s_addr; + hv %= HOSTMAP_SIZE; + for (hm = maptable[hv]; hm; hm = hm->hm_next) + if ((hm->hm_srcip.s_addr == src.s_addr) && + (hm->hm_dstip.s_addr == dst.s_addr) && + ((np == NULL) || (np == hm->hm_ipnat)) && + ((port == 0) || (port == hm->hm_port))) { + hm->hm_ref++; + return hm; + } + + if (np == NULL) + return NULL; + + KMALLOC(hm, hostmap_t *); + if (hm) { + hm->hm_next = maptable[hv]; + hm->hm_pnext = maptable + hv; + if (maptable[hv] != NULL) + maptable[hv]->hm_pnext = &hm->hm_next; + maptable[hv] = hm; + hm->hm_ipnat = np; + hm->hm_srcip = src; + hm->hm_dstip = dst; + hm->hm_mapip = map; + hm->hm_ref = 1; + hm->hm_port = port; + } + return hm; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_hostmapdel */ +/* Returns: Nil */ +/* Parameters: hm(I) - pointer to hostmap structure */ +/* Write Locks: ipf_nat */ +/* */ +/* Decrement the references to this hostmap structure by one. If this */ +/* reaches zero then remove it and free it. */ +/* ------------------------------------------------------------------------ */ +static void nat_hostmapdel(hm) +struct hostmap *hm; +{ + hm->hm_ref--; + if (hm->hm_ref == 0) { + if (hm->hm_next) + hm->hm_next->hm_pnext = hm->hm_pnext; + *hm->hm_pnext = hm->hm_next; + KFREE(hm); + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fix_outcksum */ +/* Returns: Nil */ +/* Parameters: fin(I) - pointer to packet information */ +/* sp(I) - location of 16bit checksum to update */ +/* n((I) - amount to adjust checksum by */ +/* */ +/* Adjusts the 16bit checksum by "n" for packets going out. */ +/* ------------------------------------------------------------------------ */ +void fix_outcksum(fin, sp, n) +fr_info_t *fin; +u_short *sp; +u_32_t n; +{ + u_short sumshort; + u_32_t sum1; + + if (n == 0) + return; + + if (n & NAT_HW_CKSUM) { + n &= 0xffff; + n += fin->fin_dlen; + n = (n & 0xffff) + (n >> 16); + *sp = n & 0xffff; + return; + } + sum1 = (~ntohs(*sp)) & 0xffff; + sum1 += (n); + sum1 = (sum1 >> 16) + (sum1 & 0xffff); + /* Again */ + sum1 = (sum1 >> 16) + (sum1 & 0xffff); + sumshort = ~(u_short)sum1; + *(sp) = htons(sumshort); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fix_incksum */ +/* Returns: Nil */ +/* Parameters: fin(I) - pointer to packet information */ +/* sp(I) - location of 16bit checksum to update */ +/* n((I) - amount to adjust checksum by */ +/* */ +/* Adjusts the 16bit checksum by "n" for packets going in. */ +/* ------------------------------------------------------------------------ */ +void fix_incksum(fin, sp, n) +fr_info_t *fin; +u_short *sp; +u_32_t n; +{ + u_short sumshort; + u_32_t sum1; + + if (n == 0) + return; + + if (n & NAT_HW_CKSUM) { + n &= 0xffff; + n += fin->fin_dlen; + n = (n & 0xffff) + (n >> 16); + *sp = n & 0xffff; + return; + } + sum1 = (~ntohs(*sp)) & 0xffff; + sum1 += ~(n) & 0xffff; + sum1 = (sum1 >> 16) + (sum1 & 0xffff); + /* Again */ + sum1 = (sum1 >> 16) + (sum1 & 0xffff); + sumshort = ~(u_short)sum1; + *(sp) = htons(sumshort); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fix_datacksum */ +/* Returns: Nil */ +/* Parameters: sp(I) - location of 16bit checksum to update */ +/* n((I) - amount to adjust checksum by */ +/* */ +/* Fix_datacksum is used *only* for the adjustments of checksums in the */ +/* data section of an IP packet. */ +/* */ +/* The only situation in which you need to do this is when NAT'ing an */ +/* ICMP error message. Such a message, contains in its body the IP header */ +/* of the original IP packet, that causes the error. */ +/* */ +/* You can't use fix_incksum or fix_outcksum in that case, because for the */ +/* kernel the data section of the ICMP error is just data, and no special */ +/* processing like hardware cksum or ntohs processing have been done by the */ +/* kernel on the data section. */ +/* ------------------------------------------------------------------------ */ +void fix_datacksum(sp, n) +u_short *sp; +u_32_t n; +{ + u_short sumshort; + u_32_t sum1; + + if (n == 0) + return; + + sum1 = (~ntohs(*sp)) & 0xffff; + sum1 += (n); + sum1 = (sum1 >> 16) + (sum1 & 0xffff); + /* Again */ + sum1 = (sum1 >> 16) + (sum1 & 0xffff); + sumshort = ~(u_short)sum1; + *(sp) = htons(sumshort); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_nat_ioctl */ +/* Returns: int - 0 == success, != 0 == failure */ +/* Parameters: data(I) - pointer to ioctl data */ +/* cmd(I) - ioctl command integer */ +/* mode(I) - file mode bits used with open */ +/* */ +/* Processes an ioctl call made to operate on the IP Filter NAT device. */ +/* ------------------------------------------------------------------------ */ +int fr_nat_ioctl(data, cmd, mode) +ioctlcmd_t cmd; +caddr_t data; +int mode; +{ + ipnat_t *nat, *nt, *n = NULL, **np = NULL; + int error = 0, ret, arg, getlock; + ipnat_t natd; + +#if (BSD >= 199306) && defined(_KERNEL) + if ((securelevel >= 2) && (mode & FWRITE)) + return EPERM; +#endif + +#if defined(__osf__) && defined(_KERNEL) + getlock = 0; +#else + getlock = (mode & NAT_LOCKHELD) ? 0 : 1; +#endif + + nat = NULL; /* XXX gcc -Wuninitialized */ + if (cmd == (ioctlcmd_t)SIOCADNAT) { + KMALLOC(nt, ipnat_t *); + } else { + nt = NULL; + } + + if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) { + if (mode & NAT_SYSSPACE) { + bcopy(data, (char *)&natd, sizeof(natd)); + error = 0; + } else { + error = fr_inobj(data, &natd, IPFOBJ_IPNAT); + } + + } else if (cmd == (ioctlcmd_t)SIOCIPFFL) { /* SIOCFLNAT & SIOCCNATL */ + BCOPYIN(data, &arg, sizeof(arg)); + } + + if (error != 0) + goto done; + + /* + * For add/delete, look to see if the NAT entry is already present + */ + if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) { + nat = &natd; + if (nat->in_v == 0) /* For backward compat. */ + nat->in_v = 4; + nat->in_flags &= IPN_USERFLAGS; + if ((nat->in_redir & NAT_MAPBLK) == 0) { + if ((nat->in_flags & IPN_SPLIT) == 0) + nat->in_inip &= nat->in_inmsk; + if ((nat->in_flags & IPN_IPRANGE) == 0) + nat->in_outip &= nat->in_outmsk; + } + MUTEX_ENTER(&ipf_natio); + for (np = &nat_list; ((n = *np) != NULL); np = &n->in_next) + if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags, + IPN_CMPSIZ)) + break; + } + + switch (cmd) + { +#ifdef IPFILTER_LOG + case SIOCIPFFB : + { + int tmp; + + if (!(mode & FWRITE)) + error = EPERM; + else { + tmp = ipflog_clear(IPL_LOGNAT); + BCOPYOUT((char *)&tmp, (char *)data, sizeof(tmp)); + } + break; + } + case SIOCSETLG : + if (!(mode & FWRITE)) + error = EPERM; + else { + BCOPYIN((char *)data, (char *)&nat_logging, + sizeof(nat_logging)); + } + break; + case SIOCGETLG : + BCOPYOUT((char *)&nat_logging, (char *)data, + sizeof(nat_logging)); + break; + case FIONREAD : + arg = iplused[IPL_LOGNAT]; + BCOPYOUT(&arg, data, sizeof(arg)); + break; +#endif + case SIOCADNAT : + if (!(mode & FWRITE)) { + error = EPERM; + } else if (n != NULL) { + error = EEXIST; + } else if (nt == NULL) { + error = ENOMEM; + } + if (error != 0) { + MUTEX_EXIT(&ipf_natio); + break; + } + bcopy((char *)nat, (char *)nt, sizeof(*n)); + error = nat_siocaddnat(nt, np, getlock); + MUTEX_EXIT(&ipf_natio); + if (error == 0) + nt = NULL; + break; + case SIOCRMNAT : + if (!(mode & FWRITE)) { + error = EPERM; + n = NULL; + } else if (n == NULL) { + error = ESRCH; + } + + if (error != 0) { + MUTEX_EXIT(&ipf_natio); + break; + } + nat_siocdelnat(n, np, getlock); + + MUTEX_EXIT(&ipf_natio); + n = NULL; + break; + case SIOCGNATS : + nat_stats.ns_table[0] = nat_table[0]; + nat_stats.ns_table[1] = nat_table[1]; + nat_stats.ns_list = nat_list; + nat_stats.ns_maptable = maptable; + nat_stats.ns_nattab_sz = ipf_nattable_sz; + nat_stats.ns_nattab_max = ipf_nattable_max; + nat_stats.ns_rultab_sz = ipf_natrules_sz; + nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz; + nat_stats.ns_hostmap_sz = ipf_hostmap_sz; + nat_stats.ns_instances = nat_instances; + nat_stats.ns_apslist = ap_sess_list; + error = fr_outobj(data, &nat_stats, IPFOBJ_NATSTAT); + break; + case SIOCGNATL : + { + natlookup_t nl; + + if (getlock) { + READ_ENTER(&ipf_nat); + } + error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP); + if (error == 0) { + if (nat_lookupredir(&nl) != NULL) { + error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP); + } else { + error = ESRCH; + } + } + if (getlock) { + RWLOCK_EXIT(&ipf_nat); + } + break; + } + case SIOCIPFFL : /* old SIOCFLNAT & SIOCCNATL */ + if (!(mode & FWRITE)) { + error = EPERM; + break; + } + if (getlock) { + WRITE_ENTER(&ipf_nat); + } + error = 0; + if (arg == 0) + ret = nat_flushtable(); + else if (arg == 1) + ret = nat_clearlist(); + else + error = EINVAL; + if (getlock) { + RWLOCK_EXIT(&ipf_nat); + } + if (error == 0) { + BCOPYOUT(&ret, data, sizeof(ret)); + } + break; + case SIOCPROXY : + error = appr_ioctl(data, cmd, mode); + break; + case SIOCSTLCK : + if (!(mode & FWRITE)) { + error = EPERM; + } else { + fr_lock(data, &fr_nat_lock); + } + break; + case SIOCSTPUT : + if (fr_nat_lock && (mode & FWRITE)) { + error = fr_natputent(data, getlock); + } else { + error = EACCES; + } + break; + case SIOCSTGSZ : + if (fr_nat_lock) { + if (getlock) { + READ_ENTER(&ipf_nat); + } + error = fr_natgetsz(data); + if (getlock) { + RWLOCK_EXIT(&ipf_nat); + } + } else + error = EACCES; + break; + case SIOCSTGET : + if (fr_nat_lock) { + if (getlock) { + READ_ENTER(&ipf_nat); + } + error = fr_natgetent(data); + if (getlock) { + RWLOCK_EXIT(&ipf_nat); + } + } else + error = EACCES; + break; + default : + error = EINVAL; + break; + } +done: + if (nt) + KFREE(nt); + return error; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_siocaddnat */ +/* Returns: int - 0 == success, != 0 == failure */ +/* Parameters: n(I) - pointer to new NAT rule */ +/* np(I) - pointer to where to insert new NAT rule */ +/* getlock(I) - flag indicating if lock on ipf_nat is held */ +/* Mutex Locks: ipf_natio */ +/* */ +/* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ +/* from information passed to the kernel, then add it to the appropriate */ +/* NAT rule table(s). */ +/* ------------------------------------------------------------------------ */ +static int nat_siocaddnat(n, np, getlock) +ipnat_t *n, **np; +int getlock; +{ + int error = 0, i, j; + + nat_resolverule(n); + if (n->in_plabel[0] != '\0') { + if (n->in_apr == NULL) + return ENOENT; + } + + if ((n->in_age[0] == 0) && (n->in_age[1] != 0)) + return EINVAL; + + n->in_use = 0; + if (n->in_redir & NAT_MAPBLK) + n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk); + else if (n->in_flags & IPN_AUTOPORTMAP) + n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk); + else if (n->in_flags & IPN_IPRANGE) + n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip); + else if (n->in_flags & IPN_SPLIT) + n->in_space = 2; + else if (n->in_outmsk != 0) + n->in_space = ~ntohl(n->in_outmsk); + else + n->in_space = 1; + + /* + * Calculate the number of valid IP addresses in the output + * mapping range. In all cases, the range is inclusive of + * the start and ending IP addresses. + * If to a CIDR address, lose 2: broadcast + network address + * (so subtract 1) + * If to a range, add one. + * If to a single IP address, set to 1. + */ + if (n->in_space) { + if ((n->in_flags & IPN_IPRANGE) != 0) + n->in_space += 1; + else + n->in_space -= 1; + } else + n->in_space = 1; + + if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) && + ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0)) + n->in_nip = ntohl(n->in_outip) + 1; + else if ((n->in_flags & IPN_SPLIT) && + (n->in_redir & NAT_REDIRECT)) + n->in_nip = ntohl(n->in_inip); + else + n->in_nip = ntohl(n->in_outip); + if (n->in_redir & NAT_MAP) { + n->in_pnext = ntohs(n->in_pmin); + /* + * Multiply by the number of ports made available. + */ + if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) { + n->in_space *= (ntohs(n->in_pmax) - + ntohs(n->in_pmin) + 1); + /* + * Because two different sources can map to + * different destinations but use the same + * local IP#/port #. + * If the result is smaller than in_space, then + * we may have wrapped around 32bits. + */ + i = n->in_inmsk; + if ((i != 0) && (i != 0xffffffff)) { + j = n->in_space * (~ntohl(i) + 1); + if (j >= n->in_space) + n->in_space = j; + else + n->in_space = 0xffffffff; + } + } + /* + * If no protocol is specified, multiple by 256 to allow for + * at least one IP:IP mapping per protocol. + */ + if ((n->in_flags & IPN_TCPUDPICMP) == 0) { + j = n->in_space * 256; + if (j >= n->in_space) + n->in_space = j; + else + n->in_space = 0xffffffff; + } + } + + /* Otherwise, these fields are preset */ + + if (getlock) { + WRITE_ENTER(&ipf_nat); + } + n->in_next = NULL; + *np = n; + + if (n->in_age[0] != 0) + n->in_tqehead[0] = fr_addtimeoutqueue(&nat_utqe, n->in_age[0]); + + if (n->in_age[1] != 0) + n->in_tqehead[1] = fr_addtimeoutqueue(&nat_utqe, n->in_age[1]); + + if (n->in_redir & NAT_REDIRECT) { + n->in_flags &= ~IPN_NOTDST; + nat_addrdr(n); + } + if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) { + n->in_flags &= ~IPN_NOTSRC; + nat_addnat(n); + } + n = NULL; + nat_stats.ns_rules++; +#if SOLARIS + pfil_delayed_copy = 0; +#endif + if (getlock) { + RWLOCK_EXIT(&ipf_nat); /* WRITE */ + } + + return error; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_resolvrule */ +/* Returns: Nil */ +/* Parameters: n(I) - pointer to NAT rule */ +/* */ +/* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ +/* from information passed to the kernel, then add it to the appropriate */ +/* NAT rule table(s). */ +/* ------------------------------------------------------------------------ */ +static void nat_resolverule(n) +ipnat_t *n; +{ + n->in_ifnames[0][LIFNAMSIZ - 1] = '\0'; + n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4); + + n->in_ifnames[1][LIFNAMSIZ - 1] = '\0'; + if (n->in_ifnames[1][0] == '\0') { + (void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ); + n->in_ifps[1] = n->in_ifps[0]; + } else { + n->in_ifps[1] = fr_resolvenic(n->in_ifnames[0], 4); + } + + if (n->in_plabel[0] != '\0') { + n->in_apr = appr_lookup(n->in_p, n->in_plabel); + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_siocdelnat */ +/* Returns: int - 0 == success, != 0 == failure */ +/* Parameters: n(I) - pointer to new NAT rule */ +/* np(I) - pointer to where to insert new NAT rule */ +/* getlock(I) - flag indicating if lock on ipf_nat is held */ +/* Mutex Locks: ipf_natio */ +/* */ +/* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ +/* from information passed to the kernel, then add it to the appropriate */ +/* NAT rule table(s). */ +/* ------------------------------------------------------------------------ */ +static void nat_siocdelnat(n, np, getlock) +ipnat_t *n, **np; +int getlock; +{ + if (getlock) { + WRITE_ENTER(&ipf_nat); + } + if (n->in_redir & NAT_REDIRECT) + nat_delrdr(n); + if (n->in_redir & (NAT_MAPBLK|NAT_MAP)) + nat_delnat(n); + if (nat_list == NULL) { + nat_masks = 0; + rdr_masks = 0; + } + + if (n->in_tqehead[0] != NULL) { + if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) { + fr_freetimeoutqueue(n->in_tqehead[1]); + } + } + + if (n->in_tqehead[1] != NULL) { + if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) { + fr_freetimeoutqueue(n->in_tqehead[1]); + } + } + + *np = n->in_next; + + if (n->in_use == 0) { + if (n->in_apr) + appr_free(n->in_apr); + KFREE(n); + nat_stats.ns_rules--; +#if SOLARIS + if (nat_stats.ns_rules == 0) + pfil_delayed_copy = 1; +#endif + } else { + n->in_flags |= IPN_DELETE; + n->in_next = NULL; + } + if (getlock) { + RWLOCK_EXIT(&ipf_nat); /* READ/WRITE */ + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_natgetsz */ +/* Returns: int - 0 == success, != 0 is the error value. */ +/* Parameters: data(I) - pointer to natget structure with kernel pointer */ +/* get the size of. */ +/* */ +/* Handle SIOCSTGSZ. */ +/* Return the size of the nat list entry to be copied back to user space. */ +/* The size of the entry is stored in the ng_sz field and the enture natget */ +/* structure is copied back to the user. */ +/* ------------------------------------------------------------------------ */ +static int fr_natgetsz(data) +caddr_t data; +{ + ap_session_t *aps; + nat_t *nat, *n; + natget_t ng; + + BCOPYIN(data, &ng, sizeof(ng)); + + nat = ng.ng_ptr; + if (!nat) { + nat = nat_instances; + ng.ng_sz = 0; + /* + * Empty list so the size returned is 0. Simple. + */ + if (nat == NULL) { + BCOPYOUT(&ng, data, sizeof(ng)); + return 0; + } + } else { + /* + * Make sure the pointer we're copying from exists in the + * current list of entries. Security precaution to prevent + * copying of random kernel data. + */ + for (n = nat_instances; n; n = n->nat_next) + if (n == nat) + break; + if (!n) + return ESRCH; + } + + /* + * Incluse any space required for proxy data structures. + */ + ng.ng_sz = sizeof(nat_save_t); + aps = nat->nat_aps; + if (aps != NULL) { + ng.ng_sz += sizeof(ap_session_t) - 4; + if (aps->aps_data != 0) + ng.ng_sz += aps->aps_psiz; + } + + BCOPYOUT(&ng, data, sizeof(ng)); + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_natgetent */ +/* Returns: int - 0 == success, != 0 is the error value. */ +/* Parameters: data(I) - pointer to natget structure with kernel pointer */ +/* to NAT structure to copy out. */ +/* */ +/* Handle SIOCSTGET. */ +/* Copies out NAT entry to user space. Any additional data held for a */ +/* proxy is also copied, as to is the NAT rule which was responsible for it */ +/* ------------------------------------------------------------------------ */ +static int fr_natgetent(data) +caddr_t data; +{ + int error, outsize; + ap_session_t *aps; + nat_save_t *ipn, ipns; + nat_t *n, *nat; + + error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE); + if (error != 0) + return error; + + if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920)) + return EINVAL; + + KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize); + if (ipn == NULL) + return ENOMEM; + + ipn->ipn_dsize = ipns.ipn_dsize; + nat = ipns.ipn_next; + if (nat == NULL) { + nat = nat_instances; + if (nat == NULL) { + if (nat_instances == NULL) + error = ENOENT; + goto finished; + } + } else { + /* + * Make sure the pointer we're copying from exists in the + * current list of entries. Security precaution to prevent + * copying of random kernel data. + */ + for (n = nat_instances; n; n = n->nat_next) + if (n == nat) + break; + if (n == NULL) { + error = ESRCH; + goto finished; + } + } + ipn->ipn_next = nat->nat_next; + + /* + * Copy the NAT structure. + */ + bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat)); + + /* + * If we have a pointer to the NAT rule it belongs to, save that too. + */ + if (nat->nat_ptr != NULL) + bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat, + sizeof(ipn->ipn_ipnat)); + + /* + * If we also know the NAT entry has an associated filter rule, + * save that too. + */ + if (nat->nat_fr != NULL) + bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr, + sizeof(ipn->ipn_fr)); + + /* + * Last but not least, if there is an application proxy session set + * up for this NAT entry, then copy that out too, including any + * private data saved along side it by the proxy. + */ + aps = nat->nat_aps; + outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data); + if (aps != NULL) { + char *s; + + if (outsize < sizeof(*aps)) { + error = ENOBUFS; + goto finished; + } + + s = ipn->ipn_data; + bcopy((char *)aps, s, sizeof(*aps)); + s += sizeof(*aps); + outsize -= sizeof(*aps); + if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz)) + bcopy(aps->aps_data, s, aps->aps_psiz); + else + error = ENOBUFS; + } + if (error == 0) { + error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize); + } + +finished: + if (ipn != NULL) { + KFREES(ipn, ipns.ipn_dsize); + } + return error; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_natputent */ +/* Returns: int - 0 == success, != 0 is the error value. */ +/* Parameters: data(I) - pointer to natget structure with NAT */ +/* structure information to load into the kernel */ +/* getlock(I) - flag indicating whether or not a write lock */ +/* on ipf_nat is already held. */ +/* */ +/* Handle SIOCSTPUT. */ +/* Loads a NAT table entry from user space, including a NAT rule, proxy and */ +/* firewall rule data structures, if pointers to them indicate so. */ +/* ------------------------------------------------------------------------ */ +static int fr_natputent(data, getlock) +caddr_t data; +int getlock; +{ + nat_save_t ipn, *ipnn; + ap_session_t *aps; + nat_t *n, *nat; + frentry_t *fr; + fr_info_t fin; + ipnat_t *in; + int error; + + error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE); + if (error != 0) + return error; + + /* + * Initialise early because of code at junkput label. + */ + in = NULL; + aps = NULL; + nat = NULL; + ipnn = NULL; + + /* + * New entry, copy in the rest of the NAT entry if it's size is more + * than just the nat_t structure. + */ + fr = NULL; + if (ipn.ipn_dsize > sizeof(ipn)) { + if (ipn.ipn_dsize > 81920) { + error = ENOMEM; + goto junkput; + } + + KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize); + if (ipnn == NULL) + return ENOMEM; + + error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize); + if (error != 0) { + error = EFAULT; + goto junkput; + } + } else + ipnn = &ipn; + + KMALLOC(nat, nat_t *); + if (nat == NULL) { + error = ENOMEM; + goto junkput; + } + + bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat)); + /* + * Initialize all these so that nat_delete() doesn't cause a crash. + */ + bzero((char *)nat, offsetof(struct nat, nat_tqe)); + nat->nat_tqe.tqe_pnext = NULL; + nat->nat_tqe.tqe_next = NULL; + nat->nat_tqe.tqe_ifq = NULL; + nat->nat_tqe.tqe_parent = nat; + + /* + * Restore the rule associated with this nat session + */ + in = ipnn->ipn_nat.nat_ptr; + if (in != NULL) { + KMALLOC(in, ipnat_t *); + nat->nat_ptr = in; + if (in == NULL) { + error = ENOMEM; + goto junkput; + } + bzero((char *)in, offsetof(struct ipnat, in_next6)); + bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in)); + in->in_use = 1; + in->in_flags |= IPN_DELETE; + + ATOMIC_INC(nat_stats.ns_rules); + + nat_resolverule(in); + } + + /* + * Check that the NAT entry doesn't already exist in the kernel. + */ + bzero((char *)&fin, sizeof(fin)); + fin.fin_p = nat->nat_p; + if (nat->nat_dir == NAT_OUTBOUND) { + fin.fin_data[0] = ntohs(nat->nat_oport); + fin.fin_data[1] = ntohs(nat->nat_outport); + fin.fin_ifp = nat->nat_ifps[1]; + if (getlock) { + READ_ENTER(&ipf_nat); + } + n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p, + nat->nat_oip, nat->nat_outip); + if (getlock) { + RWLOCK_EXIT(&ipf_nat); + } + if (n != NULL) { + error = EEXIST; + goto junkput; + } + } else if (nat->nat_dir == NAT_INBOUND) { + fin.fin_data[0] = ntohs(nat->nat_inport); + fin.fin_data[1] = ntohs(nat->nat_oport); + fin.fin_ifp = nat->nat_ifps[0]; + if (getlock) { + READ_ENTER(&ipf_nat); + } + n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p, + nat->nat_outip, nat->nat_oip); + if (getlock) { + RWLOCK_EXIT(&ipf_nat); + } + if (n != NULL) { + error = EEXIST; + goto junkput; + } + } else { + error = EINVAL; + goto junkput; + } + + /* + * Restore ap_session_t structure. Include the private data allocated + * if it was there. + */ + aps = nat->nat_aps; + if (aps != NULL) { + KMALLOC(aps, ap_session_t *); + nat->nat_aps = aps; + if (aps == NULL) { + error = ENOMEM; + goto junkput; + } + bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps)); + if (in != NULL) + aps->aps_apr = in->in_apr; + else + aps->aps_apr = NULL; + if (aps->aps_psiz != 0) { + if (aps->aps_psiz > 81920) { + error = ENOMEM; + goto junkput; + } + KMALLOCS(aps->aps_data, void *, aps->aps_psiz); + if (aps->aps_data == NULL) { + error = ENOMEM; + goto junkput; + } + bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data, + aps->aps_psiz); + } else { + aps->aps_psiz = 0; + aps->aps_data = NULL; + } + } + + /* + * If there was a filtering rule associated with this entry then + * build up a new one. + */ + fr = nat->nat_fr; + if (fr != NULL) { + if ((nat->nat_flags & SI_NEWFR) != 0) { + KMALLOC(fr, frentry_t *); + nat->nat_fr = fr; + if (fr == NULL) { + error = ENOMEM; + goto junkput; + } + ipnn->ipn_nat.nat_fr = fr; + fr->fr_ref = 1; + (void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE); + bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr)); + MUTEX_NUKE(&fr->fr_lock); + MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock"); + } else { + READ_ENTER(&ipf_nat); + for (n = nat_instances; n; n = n->nat_next) + if (n->nat_fr == fr) + break; + + if (n != NULL) { + MUTEX_ENTER(&fr->fr_lock); + fr->fr_ref++; + MUTEX_EXIT(&fr->fr_lock); + } + RWLOCK_EXIT(&ipf_nat); + + if (!n) { + error = ESRCH; + goto junkput; + } + } + } + + if (ipnn != &ipn) { + KFREES(ipnn, ipn.ipn_dsize); + ipnn = NULL; + } + + if (getlock) { + WRITE_ENTER(&ipf_nat); + } + error = nat_insert(nat, nat->nat_rev); + if ((error == 0) && (aps != NULL)) { + aps->aps_next = ap_sess_list; + ap_sess_list = aps; + } + if (getlock) { + RWLOCK_EXIT(&ipf_nat); + } + + if (error == 0) + return 0; + + error = ENOMEM; + +junkput: + if (fr != NULL) + (void) fr_derefrule(&fr); + + if ((ipnn != NULL) && (ipnn != &ipn)) { + KFREES(ipnn, ipn.ipn_dsize); + } + if (nat != NULL) { + if (aps != NULL) { + if (aps->aps_data != NULL) { + KFREES(aps->aps_data, aps->aps_psiz); + } + KFREE(aps); + } + if (in != NULL) { + if (in->in_apr) + appr_free(in->in_apr); + KFREE(in); + } + KFREE(nat); + } + return error; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_delete */ +/* Returns: Nil */ +/* Parameters: natd(I) - pointer to NAT structure to delete */ +/* logtype(I) - type of LOG record to create before deleting */ +/* Write Lock: ipf_nat */ +/* */ +/* Delete a nat entry from the various lists and table. If NAT logging is */ +/* enabled then generate a NAT log record for this event. */ +/* ------------------------------------------------------------------------ */ +static void nat_delete(nat, logtype) +struct nat *nat; +int logtype; +{ + struct ipnat *ipn; + + if (logtype != 0 && nat_logging != 0) + nat_log(nat, logtype); + + MUTEX_ENTER(&ipf_nat_new); + + /* + * Take it as a general indication that all the pointers are set if + * nat_pnext is set. + */ + if (nat->nat_pnext != NULL) { + nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--; + nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--; + + *nat->nat_pnext = nat->nat_next; + if (nat->nat_next != NULL) { + nat->nat_next->nat_pnext = nat->nat_pnext; + nat->nat_next = NULL; + } + nat->nat_pnext = NULL; + + *nat->nat_phnext[0] = nat->nat_hnext[0]; + if (nat->nat_hnext[0] != NULL) { + nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; + nat->nat_hnext[0] = NULL; + } + nat->nat_phnext[0] = NULL; + + *nat->nat_phnext[1] = nat->nat_hnext[1]; + if (nat->nat_hnext[1] != NULL) { + nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; + nat->nat_hnext[1] = NULL; + } + nat->nat_phnext[1] = NULL; + + if ((nat->nat_flags & SI_WILDP) != 0) + nat_stats.ns_wilds--; + } + + if (nat->nat_me != NULL) { + *nat->nat_me = NULL; + nat->nat_me = NULL; + } + + fr_deletequeueentry(&nat->nat_tqe); + + nat->nat_ref--; + if (nat->nat_ref > 0) { + MUTEX_EXIT(&ipf_nat_new); + return; + } + +#ifdef IPFILTER_SYNC + if (nat->nat_sync) + ipfsync_del(nat->nat_sync); +#endif + + if (nat->nat_fr != NULL) + (void)fr_derefrule(&nat->nat_fr); + + if (nat->nat_hm != NULL) + nat_hostmapdel(nat->nat_hm); + + /* + * If there is an active reference from the nat entry to its parent + * rule, decrement the rule's reference count and free it too if no + * longer being used. + */ + ipn = nat->nat_ptr; + if (ipn != NULL) { + ipn->in_space++; + ipn->in_use--; + if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) { + if (ipn->in_apr) + appr_free(ipn->in_apr); + KFREE(ipn); + nat_stats.ns_rules--; +#if SOLARIS + if (nat_stats.ns_rules == 0) + pfil_delayed_copy = 1; +#endif + } + } + + MUTEX_DESTROY(&nat->nat_lock); + + aps_free(nat->nat_aps); + nat_stats.ns_inuse--; + MUTEX_EXIT(&ipf_nat_new); + + /* + * If there's a fragment table entry too for this nat entry, then + * dereference that as well. This is after nat_lock is released + * because of Tru64. + */ + fr_forgetnat((void *)nat); + + KFREE(nat); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_flushtable */ +/* Returns: int - number of NAT rules deleted */ +/* Parameters: Nil */ +/* */ +/* Deletes all currently active NAT sessions. In deleting each NAT entry a */ +/* log record should be emitted in nat_delete() if NAT logging is enabled. */ +/* ------------------------------------------------------------------------ */ +/* + * nat_flushtable - clear the NAT table of all mapping entries. + */ +static int nat_flushtable() +{ + nat_t *nat; + int j = 0; + + /* + * ALL NAT mappings deleted, so lets just make the deletions + * quicker. + */ + if (nat_table[0] != NULL) + bzero((char *)nat_table[0], + sizeof(nat_table[0]) * ipf_nattable_sz); + if (nat_table[1] != NULL) + bzero((char *)nat_table[1], + sizeof(nat_table[1]) * ipf_nattable_sz); + + while ((nat = nat_instances) != NULL) { + nat_delete(nat, NL_FLUSH); + j++; + } + + nat_stats.ns_inuse = 0; + return j; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_clearlist */ +/* Returns: int - number of NAT/RDR rules deleted */ +/* Parameters: Nil */ +/* */ +/* Delete all rules in the current list of rules. There is nothing elegant */ +/* about this cleanup: simply free all entries on the list of rules and */ +/* clear out the tables used for hashed NAT rule lookups. */ +/* ------------------------------------------------------------------------ */ +static int nat_clearlist() +{ + ipnat_t *n, **np = &nat_list; + int i = 0; + + if (nat_rules != NULL) + bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz); + if (rdr_rules != NULL) + bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz); + + while ((n = *np) != NULL) { + *np = n->in_next; + if (n->in_use == 0) { + if (n->in_apr != NULL) + appr_free(n->in_apr); + KFREE(n); + nat_stats.ns_rules--; + } else { + n->in_flags |= IPN_DELETE; + n->in_next = NULL; + } + i++; + } +#if SOLARIS + pfil_delayed_copy = 1; +#endif + nat_masks = 0; + rdr_masks = 0; + return i; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_newmap */ +/* Returns: int - -1 == error, 0 == success */ +/* Parameters: fin(I) - pointer to packet information */ +/* nat(I) - pointer to NAT entry */ +/* ni(I) - pointer to structure with misc. information needed */ +/* to create new NAT entry. */ +/* */ +/* Given an empty NAT structure, populate it with new information about a */ +/* new NAT session, as defined by the matching NAT rule. */ +/* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/ +/* to the new IP address for the translation. */ +/* ------------------------------------------------------------------------ */ +static INLINE int nat_newmap(fin, nat, ni) +fr_info_t *fin; +nat_t *nat; +natinfo_t *ni; +{ + u_short st_port, dport, sport, port, sp, dp; + struct in_addr in, inb; + hostmap_t *hm; + u_32_t flags; + u_32_t st_ip; + ipnat_t *np; + nat_t *natl; + int l; + + /* + * If it's an outbound packet which doesn't match any existing + * record, then create a new port + */ + l = 0; + hm = NULL; + np = ni->nai_np; + st_ip = np->in_nip; + st_port = np->in_pnext; + flags = ni->nai_flags; + sport = ni->nai_sport; + dport = ni->nai_dport; + + /* + * Do a loop until we either run out of entries to try or we find + * a NAT mapping that isn't currently being used. This is done + * because the change to the source is not (usually) being fixed. + */ + do { + port = 0; + in.s_addr = htonl(np->in_nip); + if (l == 0) { + /* + * Check to see if there is an existing NAT + * setup for this IP address pair. + */ + hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, + in, 0); + if (hm != NULL) + in.s_addr = hm->hm_mapip.s_addr; + } else if ((l == 1) && (hm != NULL)) { + nat_hostmapdel(hm); + hm = NULL; + } + in.s_addr = ntohl(in.s_addr); + + nat->nat_hm = hm; + + if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) { + if (l > 0) + return -1; + } + + if (np->in_redir == NAT_BIMAP && + np->in_inmsk == np->in_outmsk) { + /* + * map the address block in a 1:1 fashion + */ + in.s_addr = np->in_outip; + in.s_addr |= fin->fin_saddr & ~np->in_inmsk; + in.s_addr = ntohl(in.s_addr); + + } else if (np->in_redir & NAT_MAPBLK) { + if ((l >= np->in_ppip) || ((l > 0) && + !(flags & IPN_TCPUDP))) + return -1; + /* + * map-block - Calculate destination address. + */ + in.s_addr = ntohl(fin->fin_saddr); + in.s_addr &= ntohl(~np->in_inmsk); + inb.s_addr = in.s_addr; + in.s_addr /= np->in_ippip; + in.s_addr &= ntohl(~np->in_outmsk); + in.s_addr += ntohl(np->in_outip); + /* + * Calculate destination port. + */ + if ((flags & IPN_TCPUDP) && + (np->in_ppip != 0)) { + port = ntohs(sport) + l; + port %= np->in_ppip; + port += np->in_ppip * + (inb.s_addr % np->in_ippip); + port += MAPBLK_MINPORT; + port = htons(port); + } + + } else if ((np->in_outip == 0) && + (np->in_outmsk == 0xffffffff)) { + /* + * 0/32 - use the interface's IP address. + */ + if ((l > 0) || + fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, + &in, NULL) == -1) + return -1; + in.s_addr = ntohl(in.s_addr); + + } else if ((np->in_outip == 0) && (np->in_outmsk == 0)) { + /* + * 0/0 - use the original source address/port. + */ + if (l > 0) + return -1; + in.s_addr = ntohl(fin->fin_saddr); + + } else if ((np->in_outmsk != 0xffffffff) && + (np->in_pnext == 0) && ((l > 0) || (hm == NULL))) + np->in_nip++; + + natl = NULL; + + if ((flags & IPN_TCPUDP) && + ((np->in_redir & NAT_MAPBLK) == 0) && + (np->in_flags & IPN_AUTOPORTMAP)) { + /* + * "ports auto" (without map-block) + */ + if ((l > 0) && (l % np->in_ppip == 0)) { + if (l > np->in_space) { + return -1; + } else if ((l > np->in_ppip) && + np->in_outmsk != 0xffffffff) + np->in_nip++; + } + if (np->in_ppip != 0) { + port = ntohs(sport); + port += (l % np->in_ppip); + port %= np->in_ppip; + port += np->in_ppip * + (ntohl(fin->fin_saddr) % + np->in_ippip); + port += MAPBLK_MINPORT; + port = htons(port); + } + + } else if (((np->in_redir & NAT_MAPBLK) == 0) && + (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) { + /* + * Standard port translation. Select next port. + */ + port = htons(np->in_pnext++); + + if (np->in_pnext > ntohs(np->in_pmax)) { + np->in_pnext = ntohs(np->in_pmin); + if (np->in_outmsk != 0xffffffff) + np->in_nip++; + } + } + + if (np->in_flags & IPN_IPRANGE) { + if (np->in_nip > ntohl(np->in_outmsk)) + np->in_nip = ntohl(np->in_outip); + } else { + if ((np->in_outmsk != 0xffffffff) && + ((np->in_nip + 1) & ntohl(np->in_outmsk)) > + ntohl(np->in_outip)) + np->in_nip = ntohl(np->in_outip) + 1; + } + + if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY))) + port = sport; + + /* + * Here we do a lookup of the connection as seen from + * the outside. If an IP# pair already exists, try + * again. So if you have A->B becomes C->B, you can + * also have D->E become C->E but not D->B causing + * another C->B. Also take protocol and ports into + * account when determining whether a pre-existing + * NAT setup will cause an external conflict where + * this is appropriate. + */ + inb.s_addr = htonl(in.s_addr); + sp = fin->fin_data[0]; + dp = fin->fin_data[1]; + fin->fin_data[0] = fin->fin_data[1]; + fin->fin_data[1] = htons(port); + natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH), + (u_int)fin->fin_p, fin->fin_dst, inb); + fin->fin_data[0] = sp; + fin->fin_data[1] = dp; + + /* + * Has the search wrapped around and come back to the + * start ? + */ + if ((natl != NULL) && + (np->in_pnext != 0) && (st_port == np->in_pnext) && + (np->in_nip != 0) && (st_ip == np->in_nip)) + return -1; + l++; + } while (natl != NULL); + + if (np->in_space > 0) + np->in_space--; + + /* Setup the NAT table */ + nat->nat_inip = fin->fin_src; + nat->nat_outip.s_addr = htonl(in.s_addr); + nat->nat_oip = fin->fin_dst; + if (nat->nat_hm == NULL) + nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, + nat->nat_outip, 0); + + /* + * The ICMP checksum does not have a pseudo header containing + * the IP addresses + */ + ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr)); + ni->nai_sum2 = LONG_SUM(in.s_addr); + if ((flags & IPN_TCPUDP)) { + ni->nai_sum1 += ntohs(sport); + ni->nai_sum2 += ntohs(port); + } + + if (flags & IPN_TCPUDP) { + nat->nat_inport = sport; + nat->nat_outport = port; /* sport */ + nat->nat_oport = dport; + ((tcphdr_t *)fin->fin_dp)->th_sport = port; + } else if (flags & IPN_ICMPQUERY) { + ((icmphdr_t *)fin->fin_dp)->icmp_id = port; + nat->nat_inport = port; + nat->nat_outport = port; + } + + ni->nai_ip.s_addr = in.s_addr; + ni->nai_port = port; + ni->nai_nport = dport; + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_newrdr */ +/* Returns: int - -1 == error, 0 == success (no move), 1 == success and */ +/* allow rule to be moved if IPN_ROUNDR is set. */ +/* Parameters: fin(I) - pointer to packet information */ +/* nat(I) - pointer to NAT entry */ +/* ni(I) - pointer to structure with misc. information needed */ +/* to create new NAT entry. */ +/* */ +/* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/ +/* to the new IP address for the translation. */ +/* ------------------------------------------------------------------------ */ +static INLINE int nat_newrdr(fin, nat, ni) +fr_info_t *fin; +nat_t *nat; +natinfo_t *ni; +{ + u_short nport, dport, sport; + struct in_addr in; + hostmap_t *hm; + u_32_t flags; + ipnat_t *np; + int move; + + move = 1; + hm = NULL; + in.s_addr = 0; + np = ni->nai_np; + flags = ni->nai_flags; + sport = ni->nai_sport; + dport = ni->nai_dport; + + /* + * If the matching rule has IPN_STICKY set, then we want to have the + * same rule kick in as before. Why would this happen? If you have + * a collection of rdr rules with "round-robin sticky", the current + * packet might match a different one to the previous connection but + * we want the same destination to be used. + */ + if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == + (IPN_ROUNDR|IPN_STICKY)) { + hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in, + (u_32_t)dport); + if (hm != NULL) { + in.s_addr = ntohl(hm->hm_mapip.s_addr); + np = hm->hm_ipnat; + ni->nai_np = np; + move = 0; + } + } + + /* + * Otherwise, it's an inbound packet. Most likely, we don't + * want to rewrite source ports and source addresses. Instead, + * we want to rewrite to a fixed internal address and fixed + * internal port. + */ + if (np->in_flags & IPN_SPLIT) { + in.s_addr = np->in_nip; + + if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) { + hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, + in, (u_32_t)dport); + if (hm != NULL) { + in.s_addr = hm->hm_mapip.s_addr; + move = 0; + } + } + + if (hm == NULL || hm->hm_ref == 1) { + if (np->in_inip == htonl(in.s_addr)) { + np->in_nip = ntohl(np->in_inmsk); + move = 0; + } else { + np->in_nip = ntohl(np->in_inip); + } + } + + } else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) { + /* + * 0/32 - use the interface's IP address. + */ + if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL) == -1) + return -1; + in.s_addr = ntohl(in.s_addr); + + } else if ((np->in_inip == 0) && (np->in_inmsk== 0)) { + /* + * 0/0 - use the original destination address/port. + */ + in.s_addr = ntohl(fin->fin_daddr); + + } else if (np->in_redir == NAT_BIMAP && + np->in_inmsk == np->in_outmsk) { + /* + * map the address block in a 1:1 fashion + */ + in.s_addr = np->in_inip; + in.s_addr |= fin->fin_daddr & ~np->in_inmsk; + in.s_addr = ntohl(in.s_addr); + } else { + in.s_addr = ntohl(np->in_inip); + } + + if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0)) + nport = dport; + else { + /* + * Whilst not optimized for the case where + * pmin == pmax, the gain is not significant. + */ + if (((np->in_flags & IPN_FIXEDDPORT) == 0) && + (np->in_pmin != np->in_pmax)) { + nport = ntohs(dport) - ntohs(np->in_pmin) + + ntohs(np->in_pnext); + nport = htons(nport); + } else + nport = np->in_pnext; + } + + /* + * When the redirect-to address is set to 0.0.0.0, just + * assume a blank `forwarding' of the packet. We don't + * setup any translation for this either. + */ + if (in.s_addr == 0) { + if (nport == dport) + return -1; + in.s_addr = ntohl(fin->fin_daddr); + } + + nat->nat_inip.s_addr = htonl(in.s_addr); + nat->nat_outip = fin->fin_dst; + nat->nat_oip = fin->fin_src; + + ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport); + ni->nai_sum2 = LONG_SUM(in.s_addr) + ntohs(nport); + + ni->nai_ip.s_addr = in.s_addr; + ni->nai_nport = nport; + ni->nai_port = sport; + + if (flags & IPN_TCPUDP) { + nat->nat_inport = nport; + nat->nat_outport = dport; + nat->nat_oport = sport; + ((tcphdr_t *)fin->fin_dp)->th_dport = nport; + } else if (flags & IPN_ICMPQUERY) { + ((icmphdr_t *)fin->fin_dp)->icmp_id = nport; + nat->nat_inport = nport; + nat->nat_outport = nport; + } + + return move; +} + +/* ------------------------------------------------------------------------ */ +/* Function: nat_new */ +/* Returns: nat_t* - NULL == failure to create new NAT structure, */ +/* else pointer to new NAT structure */ +/* Parameters: fin(I) - pointer to packet information */ +/* np(I) - pointer to NAT rule */ +/* natsave(I) - pointer to where to store NAT struct pointer */ +/* flags(I) - flags describing the current packet */ +/* direction(I) - direction of packet (in/out) */ +/* Write Lock: ipf_nat */ +/* */ +/* Attempts to create a new NAT entry. Does not actually change the packet */ +/* in any way. */ +/* */ +/* This fucntion is in three main parts: (1) deal with creating a new NAT */ +/* structure for a "MAP" rule (outgoing NAT translation); (2) deal with */ +/* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */ +/* and (3) building that structure and putting it into the NAT table(s). */ +/* ------------------------------------------------------------------------ */ +nat_t *nat_new(fin, np, natsave, flags, direction) +fr_info_t *fin; +ipnat_t *np; +nat_t **natsave; +u_int flags; +int direction; +{ + u_short port = 0, sport = 0, dport = 0, nport = 0; + tcphdr_t *tcp = NULL; + hostmap_t *hm = NULL; + struct in_addr in; + nat_t *nat, *natl; + u_int nflags; + natinfo_t ni; + u_32_t sumd; + int move; +#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC) + qpktinfo_t *qpi = fin->fin_qpi; +#endif + + if (nat_stats.ns_inuse >= ipf_nattable_max) { + nat_stats.ns_memfail++; + return NULL; + } + + move = 1; + nflags = np->in_flags & flags; + nflags &= NAT_FROMRULE; + + ni.nai_np = np; + ni.nai_nflags = nflags; + ni.nai_flags = flags; + + /* Give me a new nat */ + KMALLOC(nat, nat_t *); + if (nat == NULL) { + nat_stats.ns_memfail++; + /* + * Try to automatically tune the max # of entries in the + * table allowed to be less than what will cause kmem_alloc() + * to fail and try to eliminate panics due to out of memory + * conditions arising. + */ + if (ipf_nattable_max > ipf_nattable_sz) { + ipf_nattable_max = nat_stats.ns_inuse - 100; + printf("ipf_nattable_max reduced to %d\n", + ipf_nattable_max); + } + return NULL; + } + + if (flags & IPN_TCPUDP) { + tcp = fin->fin_dp; + ni.nai_sport = htons(fin->fin_sport); + ni.nai_dport = htons(fin->fin_dport); + } else if (flags & IPN_ICMPQUERY) { + /* + * In the ICMP query NAT code, we translate the ICMP id fields + * to make them unique. This is indepedent of the ICMP type + * (e.g. in the unlikely event that a host sends an echo and + * an tstamp request with the same id, both packets will have + * their ip address/id field changed in the same way). + */ + /* The icmp_id field is used by the sender to identify the + * process making the icmp request. (the receiver justs + * copies it back in its response). So, it closely matches + * the concept of source port. We overlay sport, so we can + * maximally reuse the existing code. + */ + ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id; + ni.nai_dport = ni.nai_sport; + } + + bzero((char *)nat, sizeof(*nat)); + nat->nat_flags = flags; + + if ((flags & NAT_SLAVE) == 0) { + MUTEX_ENTER(&ipf_nat_new); + } + + /* + * Search the current table for a match. + */ + if (direction == NAT_OUTBOUND) { + /* + * We can now arrange to call this for the same connection + * because ipf_nat_new doesn't protect the code path into + * this function. + */ + natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p, + fin->fin_src, fin->fin_dst); + if (natl != NULL) { + nat = natl; + goto done; + } + + move = nat_newmap(fin, nat, &ni); + if (move == -1) + goto badnat; + + np = ni.nai_np; + in = ni.nai_ip; + } else { + /* + * NAT_INBOUND is used only for redirects rules + */ + natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p, + fin->fin_src, fin->fin_dst); + if (natl != NULL) { + nat = natl; + goto done; + } + + move = nat_newrdr(fin, nat, &ni); + if (move == -1) + goto badnat; + + np = ni.nai_np; + in = ni.nai_ip; + } + port = ni.nai_port; + nport = ni.nai_nport; + + if ((move == 1) && (np->in_flags & IPN_ROUNDR)) { + if (np->in_redir == NAT_REDIRECT) { + nat_delrdr(np); + nat_addrdr(np); + } else if (np->in_redir == NAT_MAP) { + nat_delnat(np); + nat_addnat(np); + } + } + + if (flags & IPN_TCPUDP) { + sport = ni.nai_sport; + dport = ni.nai_dport; + } else if (flags & IPN_ICMPQUERY) { + sport = ni.nai_sport; + dport = 0; + } + + CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd); + nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); +#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC) + if ((flags & IPN_TCP) && dohwcksum && +#ifndef IRE_ILL_CN + (((ill_t *)qpi->qpi_ill)->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) { +#else + (((s_ill_t *)qpi->qpi_ill)->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) { +#endif /* IRE_ILL_CN */ + if (direction == NAT_OUTBOUND) + ni.nai_sum1 = LONG_SUM(in.s_addr); + else + ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr)); + ni.nai_sum1 += LONG_SUM(ntohl(fin->fin_daddr)); + ni.nai_sum1 += 30; + ni.nai_sum1 = (ni.nai_sum1 & 0xffff) + (ni.nai_sum1 >> 16); + nat->nat_sumd[1] = NAT_HW_CKSUM|(ni.nai_sum1 & 0xffff); + } else +#endif + nat->nat_sumd[1] = nat->nat_sumd[0]; + + if ((flags & IPN_TCPUDPICMP) && ((sport != port) || (dport != nport))) { + if (direction == NAT_OUTBOUND) + ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr)); + else + ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)); + + ni.nai_sum2 = LONG_SUM(in.s_addr); + + CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd); + nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16); + } else { + nat->nat_ipsumd = nat->nat_sumd[0]; + if (!(flags & IPN_TCPUDPICMP)) { + nat->nat_sumd[0] = 0; + nat->nat_sumd[1] = 0; + } + } + + if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) { + goto badnat; + } + if (flags & SI_WILDP) + nat_stats.ns_wilds++; + goto done; +badnat: + nat_stats.ns_badnat++; + if ((hm = nat->nat_hm) != NULL) + nat_hostmapdel(hm); + KFREE(nat); + nat = NULL; +done: + if ((flags & NAT_SLAVE) == 0) { + MUTEX_EXIT(&ipf_nat_new); + } + return nat; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_finalise */ +/* Returns: int - 0 == sucess, -1 == failure */ +/* Parameters: fin(I) - pointer to packet information */ +/* nat(I) - pointer to NAT entry */ +/* ni(I) - pointer to structure with misc. information needed */ +/* to create new NAT entry. */ +/* Write Lock: ipf_nat */ +/* */ +/* This is the tail end of constructing a new NAT entry and is the same */ +/* for both IPv4 and IPv6. */ +/* ------------------------------------------------------------------------ */ +/*ARGSUSED*/ +static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction) +fr_info_t *fin; +nat_t *nat; +natinfo_t *ni; +tcphdr_t *tcp; +nat_t **natsave; +int direction; +{ + frentry_t *fr; + ipnat_t *np; + + np = ni->nai_np; + + COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0]); +#ifdef IPFILTER_SYNC + if ((nat->nat_flags & SI_CLONE) == 0) + nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat); +#endif + + nat->nat_me = natsave; + nat->nat_dir = direction; + nat->nat_ifps[0] = fin->fin_ifp; + nat->nat_ptr = np; + nat->nat_p = fin->fin_p; + nat->nat_mssclamp = np->in_mssclamp; + fr = fin->fin_fr; + nat->nat_fr = fr; + + if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0)) + if (appr_new(fin, nat) == -1) + return -1; + + if (nat_insert(nat, fin->fin_rev) == 0) { + if (nat_logging) + nat_log(nat, (u_int)np->in_redir); + np->in_use++; + if (fr != NULL) { + MUTEX_ENTER(&fr->fr_lock); + fr->fr_ref++; + MUTEX_EXIT(&fr->fr_lock); + } + return 0; + } + + /* + * nat_insert failed, so cleanup time... + */ + return -1; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_insert */ +/* Returns: int - 0 == sucess, -1 == failure */ +/* Parameters: nat(I) - pointer to NAT structure */ +/* rev(I) - flag indicating forward/reverse direction of packet */ +/* Write Lock: ipf_nat */ +/* */ +/* Insert a NAT entry into the hash tables for searching and add it to the */ +/* list of active NAT entries. Adjust global counters when complete. */ +/* ------------------------------------------------------------------------ */ +int nat_insert(nat, rev) +nat_t *nat; +int rev; +{ + u_int hv1, hv2; + nat_t **natp; + + /* + * Try and return an error as early as possible, so calculate the hash + * entry numbers first and then proceed. + */ + if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) { + hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, + 0xffffffff); + hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport, + ipf_nattable_sz); + hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, + 0xffffffff); + hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport, + ipf_nattable_sz); + } else { + hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff); + hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, ipf_nattable_sz); + hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff); + hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, ipf_nattable_sz); + } + + if (nat_stats.ns_bucketlen[0][hv1] >= fr_nat_maxbucket || + nat_stats.ns_bucketlen[1][hv2] >= fr_nat_maxbucket) { + return -1; + } + + nat->nat_hv[0] = hv1; + nat->nat_hv[1] = hv2; + + MUTEX_INIT(&nat->nat_lock, "nat entry lock"); + + nat->nat_rev = rev; + nat->nat_ref = 1; + nat->nat_bytes[0] = 0; + nat->nat_pkts[0] = 0; + nat->nat_bytes[1] = 0; + nat->nat_pkts[1] = 0; + + nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0'; + nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4); + + if (nat->nat_ifnames[1][0] !='\0') { + nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; + nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4); + } else { + (void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0], + LIFNAMSIZ); + nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; + nat->nat_ifps[1] = nat->nat_ifps[0]; + } + + nat->nat_next = nat_instances; + nat->nat_pnext = &nat_instances; + if (nat_instances) + nat_instances->nat_pnext = &nat->nat_next; + nat_instances = nat; + + natp = &nat_table[0][hv1]; + if (*natp) + (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; + nat->nat_phnext[0] = natp; + nat->nat_hnext[0] = *natp; + *natp = nat; + nat_stats.ns_bucketlen[0][hv1]++; + + natp = &nat_table[1][hv2]; + if (*natp) + (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; + nat->nat_phnext[1] = natp; + nat->nat_hnext[1] = *natp; + *natp = nat; + nat_stats.ns_bucketlen[1][hv2]++; + + fr_setnatqueue(nat, rev); + + nat_stats.ns_added++; + nat_stats.ns_inuse++; + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_icmperrorlookup */ +/* Returns: nat_t* - point to matching NAT structure */ +/* Parameters: fin(I) - pointer to packet information */ +/* dir(I) - direction of packet (in/out) */ +/* */ +/* Check if the ICMP error message is related to an existing TCP, UDP or */ +/* ICMP query nat entry. It is assumed that the packet is already of the */ +/* the required length. */ +/* ------------------------------------------------------------------------ */ +nat_t *nat_icmperrorlookup(fin, dir) +fr_info_t *fin; +int dir; +{ + int flags = 0, minlen; + icmphdr_t *orgicmp; + tcphdr_t *tcp = NULL; + u_short data[2]; + nat_t *nat; + ip_t *oip; + u_int p; + + /* + * Does it at least have the return (basic) IP header ? + * Only a basic IP header (no options) should be with an ICMP error + * header. Also, if it's not an error type, then return. + */ + if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR)) + return NULL; + + /* + * Check packet size + */ + oip = (ip_t *)((char *)fin->fin_dp + 8); + minlen = IP_HL(oip) << 2; + if ((minlen < sizeof(ip_t)) || + (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)) + return NULL; + /* + * Is the buffer big enough for all of it ? It's the size of the IP + * header claimed in the encapsulated part which is of concern. It + * may be too big to be in this buffer but not so big that it's + * outside the ICMP packet, leading to TCP deref's causing problems. + * This is possible because we don't know how big oip_hl is when we + * do the pullup early in fr_check() and thus can't gaurantee it is + * all here now. + */ +#ifdef _KERNEL + { + mb_t *m; + + m = fin->fin_m; +# if defined(MENTAT) + if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr) + return NULL; +# else + if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > + (char *)fin->fin_ip + M_LEN(m)) + return NULL; +# endif + } +#endif + + if (fin->fin_daddr != oip->ip_src.s_addr) + return NULL; + + p = oip->ip_p; + if (p == IPPROTO_TCP) + flags = IPN_TCP; + else if (p == IPPROTO_UDP) + flags = IPN_UDP; + else if (p == IPPROTO_ICMP) { + orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); + + /* see if this is related to an ICMP query */ + if (nat_icmpquerytype4(orgicmp->icmp_type)) { + data[0] = fin->fin_data[0]; + data[1] = fin->fin_data[1]; + fin->fin_data[0] = 0; + fin->fin_data[1] = orgicmp->icmp_id; + + flags = IPN_ICMPERR|IPN_ICMPQUERY; + /* + * NOTE : dir refers to the direction of the original + * ip packet. By definition the icmp error + * message flows in the opposite direction. + */ + if (dir == NAT_INBOUND) + nat = nat_inlookup(fin, flags, p, oip->ip_dst, + oip->ip_src); + else + nat = nat_outlookup(fin, flags, p, oip->ip_dst, + oip->ip_src); + fin->fin_data[0] = data[0]; + fin->fin_data[1] = data[1]; + return nat; + } + } + + if (flags & IPN_TCPUDP) { + minlen += 8; /* + 64bits of data to get ports */ + if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen) + return NULL; + + data[0] = fin->fin_data[0]; + data[1] = fin->fin_data[1]; + tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); + fin->fin_data[0] = ntohs(tcp->th_dport); + fin->fin_data[1] = ntohs(tcp->th_sport); + + if (dir == NAT_INBOUND) { + nat = nat_inlookup(fin, flags, p, oip->ip_dst, + oip->ip_src); + } else { + nat = nat_outlookup(fin, flags, p, oip->ip_dst, + oip->ip_src); + } + fin->fin_data[0] = data[0]; + fin->fin_data[1] = data[1]; + return nat; + } + if (dir == NAT_INBOUND) + return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src); + else + return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_icmperror */ +/* Returns: nat_t* - point to matching NAT structure */ +/* Parameters: fin(I) - pointer to packet information */ +/* nflags(I) - NAT flags for this packet */ +/* dir(I) - direction of packet (in/out) */ +/* */ +/* Fix up an ICMP packet which is an error message for an existing NAT */ +/* session. This will correct both packet header data and checksums. */ +/* */ +/* This should *ONLY* be used for incoming ICMP error packets to make sure */ +/* a NAT'd ICMP packet gets correctly recognised. */ +/* ------------------------------------------------------------------------ */ +nat_t *nat_icmperror(fin, nflags, dir) +fr_info_t *fin; +u_int *nflags; +int dir; +{ + u_32_t sum1, sum2, sumd, sumd2; + struct in_addr in; + icmphdr_t *icmp; + int flags, dlen; + u_short *csump; + tcphdr_t *tcp; + nat_t *nat; + ip_t *oip; + void *dp; + + if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) + return NULL; + /* + * nat_icmperrorlookup() will return NULL for `defective' packets. + */ + if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir))) + return NULL; + + tcp = NULL; + csump = NULL; + flags = 0; + sumd2 = 0; + *nflags = IPN_ICMPERR; + icmp = fin->fin_dp; + oip = (ip_t *)&icmp->icmp_ip; + dp = (((char *)oip) + (IP_HL(oip) << 2)); + if (oip->ip_p == IPPROTO_TCP) { + tcp = (tcphdr_t *)dp; + csump = (u_short *)&tcp->th_sum; + flags = IPN_TCP; + } else if (oip->ip_p == IPPROTO_UDP) { + udphdr_t *udp; + + udp = (udphdr_t *)dp; + tcp = (tcphdr_t *)dp; + csump = (u_short *)&udp->uh_sum; + flags = IPN_UDP; + } else if (oip->ip_p == IPPROTO_ICMP) + flags = IPN_ICMPQUERY; + dlen = fin->fin_plen - ((char *)dp - (char *)fin->fin_ip); + + /* + * Need to adjust ICMP header to include the real IP#'s and + * port #'s. Only apply a checksum change relative to the + * IP address change as it will be modified again in fr_checknatout + * for both address and port. Two checksum changes are + * necessary for the two header address changes. Be careful + * to only modify the checksum once for the port # and twice + * for the IP#. + */ + + /* + * Step 1 + * Fix the IP addresses in the offending IP packet. You also need + * to adjust the IP header checksum of that offending IP packet + * and the ICMP checksum of the ICMP error message itself. + * + * Unfortunately, for UDP and TCP, the IP addresses are also contained + * in the pseudo header that is used to compute the UDP resp. TCP + * checksum. So, we must compensate that as well. Even worse, the + * change in the UDP and TCP checksums require yet another + * adjustment of the ICMP checksum of the ICMP error message. + */ + + if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) { + sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr)); + in = nat->nat_inip; + oip->ip_src = in; + } else { + sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr)); + in = nat->nat_outip; + oip->ip_dst = in; + } + + sum2 = LONG_SUM(ntohl(in.s_addr)); + + CALC_SUMD(sum1, sum2, sumd); + + /* + * Fix IP checksum of the offending IP packet to adjust for + * the change in the IP address. + * + * Normally, you would expect that the ICMP checksum of the + * ICMP error message needs to be adjusted as well for the + * IP address change in oip. + * However, this is a NOP, because the ICMP checksum is + * calculated over the complete ICMP packet, which includes the + * changed oip IP addresses and oip->ip_sum. However, these + * two changes cancel each other out (if the delta for + * the IP address is x, then the delta for ip_sum is minus x), + * so no change in the icmp_cksum is necessary. + * + * Be careful that nat_dir refers to the direction of the + * offending IP packet (oip), not to its ICMP response (icmp) + */ + fix_datacksum(&oip->ip_sum, sumd); + /* Fix icmp cksum : IP Addr + Cksum */ + sumd2 = (sumd >> 16); + + /* + * Fix UDP pseudo header checksum to compensate for the + * IP address change. + */ + if ((oip->ip_p == IPPROTO_UDP) && (dlen >= 8) && (*csump != 0)) { + /* + * The UDP checksum is optional, only adjust it + * if it has been set. + */ + sum1 = ntohs(*csump); + fix_datacksum(csump, sumd); + sum2 = ntohs(*csump); + + /* + * Fix ICMP checksum to compensate the UDP + * checksum adjustment. + */ + sumd2 = sumd << 1; + CALC_SUMD(sum1, sum2, sumd); + sumd2 += sumd; + } + + /* + * Fix TCP pseudo header checksum to compensate for the + * IP address change. Before we can do the change, we + * must make sure that oip is sufficient large to hold + * the TCP checksum (normally it does not!). + * 18 = offsetof(tcphdr_t, th_sum) + 2 + */ + else if (oip->ip_p == IPPROTO_TCP && dlen >= 18) { + sum1 = ntohs(*csump); + fix_datacksum(csump, sumd); + sum2 = ntohs(*csump); + + /* + * Fix ICMP checksum to compensate the TCP + * checksum adjustment. + */ + sumd2 = sumd << 1; + CALC_SUMD(sum1, sum2, sumd); + sumd2 += sumd; + } else { + if (nat->nat_dir == NAT_OUTBOUND) + sumd2 = ~sumd2; + else + sumd2 = ~sumd2 + 1; + } + + if (((flags & IPN_TCPUDP) != 0) && (dlen >= 4)) { + int mode = 0; + + /* + * Step 2 : + * For offending TCP/UDP IP packets, translate the ports as + * well, based on the NAT specification. Of course such + * a change must be reflected in the ICMP checksum as well. + * + * Advance notice : Now it becomes complicated :-) + * + * Since the port fields are part of the TCP/UDP checksum + * of the offending IP packet, you need to adjust that checksum + * as well... but, if you change, you must change the icmp + * checksum *again*, to reflect that change. + * + * To further complicate: the TCP checksum is not in the first + * 8 bytes of the offending ip packet, so it most likely is not + * available. Some OSses like Solaris return enough bytes to + * include the TCP checksum. So we have to check if the + * ip->ip_len actually holds the TCP checksum of the oip! + */ + + if (nat->nat_oport == tcp->th_dport) { + if (tcp->th_sport != nat->nat_inport) { + mode = 1; + sum1 = ntohs(nat->nat_inport); + sum2 = ntohs(tcp->th_sport); + } + } else if (tcp->th_sport == nat->nat_oport) { + mode = 2; + sum1 = ntohs(nat->nat_outport); + sum2 = ntohs(tcp->th_dport); + } + + if (mode == 1) { + /* + * Fix ICMP checksum to compensate port adjustment. + */ + tcp->th_sport = htons(sum1); + + /* + * Fix udp checksum to compensate port adjustment. + * NOTE : the offending IP packet flows the other + * direction compared to the ICMP message. + * + * The UDP checksum is optional, only adjust it if + * it has been set. + */ + if ((oip->ip_p == IPPROTO_UDP) && + (dlen >= 8) && (*csump != 0)) { + sumd = sum1 - sum2; + sumd2 += sumd; + + sum1 = ntohs(*csump); + fix_datacksum(csump, sumd); + sum2 = ntohs(*csump); + + /* + * Fix ICMP checksum to compenstate + * UDP checksum adjustment. + */ + CALC_SUMD(sum1, sum2, sumd); + sumd2 += sumd; + } + + /* + * Fix TCP checksum (if present) to compensate port + * adjustment. NOTE : the offending IP packet flows + * the other direction compared to the ICMP message. + */ + if (oip->ip_p == IPPROTO_TCP) { + if (dlen >= 18) { + sumd = sum1 - sum2; + sumd2 += sumd; + + sum1 = ntohs(*csump); + fix_datacksum(csump, sumd); + sum2 = ntohs(*csump); + + /* + * Fix ICMP checksum to compensate + * TCP checksum adjustment. + */ + CALC_SUMD(sum1, sum2, sumd); + sumd2 += sumd; + } else { + sumd = sum2 - sum1 + 1; + sumd2 += sumd; + } + } + } else if (mode == 2) { + /* + * Fix ICMP checksum to compensate port adjustment. + */ + tcp->th_dport = htons(sum1); + + /* + * Fix UDP checksum to compensate port adjustment. + * NOTE : the offending IP packet flows the other + * direction compared to the ICMP message. + * + * The UDP checksum is optional, only adjust + * it if it has been set. + */ + if ((oip->ip_p == IPPROTO_UDP) && + (dlen >= 8) && (*csump != 0)) { + sumd = sum1 - sum2; + sumd2 += sumd; + + sum1 = ntohs(*csump); + fix_datacksum(csump, sumd); + sum2 = ntohs(*csump); + + /* + * Fix ICMP checksum to compensate + * UDP checksum adjustment. + */ + CALC_SUMD(sum1, sum2, sumd); + sumd2 += sumd; + } + + /* + * Fix TCP checksum (if present) to compensate port + * adjustment. NOTE : the offending IP packet flows + * the other direction compared to the ICMP message. + */ + if (oip->ip_p == IPPROTO_TCP) { + if (dlen >= 18) { + sumd = sum1 - sum2; + sumd2 += sumd; + + sum1 = ntohs(*csump); + fix_datacksum(csump, sumd); + sum2 = ntohs(*csump); + + /* + * Fix ICMP checksum to compensate + * TCP checksum adjustment. + */ + CALC_SUMD(sum1, sum2, sumd); + sumd2 += sumd; + } else { + if (nat->nat_dir == NAT_INBOUND) + sumd = sum2 - sum1; + else + sumd = sum2 - sum1 + 1; + sumd2 += sumd; + } + } + } + if (sumd2 != 0) { + sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); + sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); + fix_incksum(fin, &icmp->icmp_cksum, sumd2); + } + } else if (((flags & IPN_ICMPQUERY) != 0) && (dlen >= 8)) { + icmphdr_t *orgicmp; + + /* + * XXX - what if this is bogus hl and we go off the end ? + * In this case, nat_icmperrorlookup() will have returned NULL. + */ + orgicmp = (icmphdr_t *)dp; + + if (nat->nat_dir == NAT_OUTBOUND) { + if (orgicmp->icmp_id != nat->nat_inport) { + + /* + * Fix ICMP checksum (of the offening ICMP + * query packet) to compensate the change + * in the ICMP id of the offending ICMP + * packet. + * + * Since you modify orgicmp->icmp_id with + * a delta (say x) and you compensate that + * in origicmp->icmp_cksum with a delta + * minus x, you don't have to adjust the + * overall icmp->icmp_cksum + */ + sum1 = ntohs(orgicmp->icmp_id); + sum2 = ntohs(nat->nat_inport); + CALC_SUMD(sum1, sum2, sumd); + orgicmp->icmp_id = nat->nat_inport; + fix_datacksum(&orgicmp->icmp_cksum, sumd); + } + } /* nat_dir == NAT_INBOUND is impossible for icmp queries */ + } + return nat; +} + + +/* + * NB: these lookups don't lock access to the list, it assumed that it has + * already been done! + */ + +/* ------------------------------------------------------------------------ */ +/* Function: nat_inlookup */ +/* Returns: nat_t* - NULL == no match, */ +/* else pointer to matching NAT entry */ +/* Parameters: fin(I) - pointer to packet information */ +/* flags(I) - NAT flags for this packet */ +/* p(I) - protocol for this packet */ +/* src(I) - source IP address */ +/* mapdst(I) - destination IP address */ +/* */ +/* Lookup a nat entry based on the mapped destination ip address/port and */ +/* real source address/port. We use this lookup when receiving a packet, */ +/* we're looking for a table entry, based on the destination address. */ +/* */ +/* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */ +/* */ +/* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */ +/* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */ +/* */ +/* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */ +/* the packet is of said protocol */ +/* ------------------------------------------------------------------------ */ +nat_t *nat_inlookup(fin, flags, p, src, mapdst) +fr_info_t *fin; +u_int flags, p; +struct in_addr src , mapdst; +{ + u_short sport, dport; + ipnat_t *ipn; + u_int sflags; + nat_t *nat; + int nflags; + u_32_t dst; + void *ifp; + u_int hv; + + if (fin != NULL) + ifp = fin->fin_ifp; + else + ifp = NULL; + sport = 0; + dport = 0; + dst = mapdst.s_addr; + sflags = flags & NAT_TCPUDPICMP; + + switch (p) + { + case IPPROTO_TCP : + case IPPROTO_UDP : + sport = htons(fin->fin_data[0]); + dport = htons(fin->fin_data[1]); + break; + case IPPROTO_ICMP : + if (flags & IPN_ICMPERR) + sport = fin->fin_data[1]; + else + dport = fin->fin_data[1]; + break; + default : + break; + } + + + if ((flags & SI_WILDP) != 0) + goto find_in_wild_ports; + + hv = NAT_HASH_FN(dst, dport, 0xffffffff); + hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz); + nat = nat_table[1][hv]; + for (; nat; nat = nat->nat_hnext[1]) { + nflags = nat->nat_flags; + + if (ifp != NULL) { + if (nat->nat_dir == NAT_REDIRECT) { + if (ifp != nat->nat_ifps[0]) + continue; + } else { + if (ifp != nat->nat_ifps[1]) + continue; + } + } + + if (nat->nat_oip.s_addr == src.s_addr && + nat->nat_outip.s_addr == dst && + (((p == 0) && + (sflags == (nat->nat_flags & IPN_TCPUDPICMP))) + || (p == nat->nat_p))) { + switch (p) + { +#if 0 + case IPPROTO_GRE : + if (nat->nat_call[1] != fin->fin_data[0]) + continue; + break; +#endif + case IPPROTO_ICMP : + if ((flags & IPN_ICMPERR) != 0) { + if (nat->nat_outport != sport) + continue; + } else { + if (nat->nat_outport != dport) + continue; + } + break; + case IPPROTO_TCP : + case IPPROTO_UDP : + if (nat->nat_oport != sport) + continue; + if (nat->nat_outport != dport) + continue; + break; + default : + break; + } + + ipn = nat->nat_ptr; + if ((ipn != NULL) && (nat->nat_aps != NULL)) + if (appr_match(fin, nat) != 0) + continue; + return nat; + } + } + + /* + * So if we didn't find it but there are wildcard members in the hash + * table, go back and look for them. We do this search and update here + * because it is modifying the NAT table and we want to do this only + * for the first packet that matches. The exception, of course, is + * for "dummy" (FI_IGNORE) lookups. + */ +find_in_wild_ports: + if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) + return NULL; + if (nat_stats.ns_wilds == 0) + return NULL; + + RWLOCK_EXIT(&ipf_nat); + + hv = NAT_HASH_FN(dst, 0, 0xffffffff); + hv = NAT_HASH_FN(src.s_addr, hv, ipf_nattable_sz); + + WRITE_ENTER(&ipf_nat); + + nat = nat_table[1][hv]; + for (; nat; nat = nat->nat_hnext[1]) { + if (ifp != NULL) { + if (nat->nat_dir == NAT_REDIRECT) { + if (ifp != nat->nat_ifps[0]) + continue; + } else { + if (ifp != nat->nat_ifps[1]) + continue; + } + } + + if (nat->nat_p != fin->fin_p) + continue; + if (nat->nat_oip.s_addr != src.s_addr || + nat->nat_outip.s_addr != dst) + continue; + + nflags = nat->nat_flags; + if (!(nflags & (NAT_TCPUDP|SI_WILDP))) + continue; + + if (nat_wildok(nat, (int)sport, (int)dport, nflags, + NAT_INBOUND) == 1) { + if ((fin->fin_flx & FI_IGNORE) != 0) + break; + if ((nflags & SI_CLONE) != 0) { + nat = fr_natclone(fin, nat); + if (nat == NULL) + break; + } else { + MUTEX_ENTER(&ipf_nat_new); + nat_stats.ns_wilds--; + MUTEX_EXIT(&ipf_nat_new); + } + nat->nat_oport = sport; + nat->nat_outport = dport; + nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); + nat_tabmove(nat); + break; + } + } + + MUTEX_DOWNGRADE(&ipf_nat); + + return nat; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_tabmove */ +/* Returns: Nil */ +/* Parameters: nat(I) - pointer to NAT structure */ +/* Write Lock: ipf_nat */ +/* */ +/* This function is only called for TCP/UDP NAT table entries where the */ +/* original was placed in the table without hashing on the ports and we now */ +/* want to include hashing on port numbers. */ +/* ------------------------------------------------------------------------ */ +static void nat_tabmove(nat) +nat_t *nat; +{ + nat_t **natp; + u_int hv; + + if (nat->nat_flags & SI_CLONE) + return; + + /* + * Remove the NAT entry from the old location + */ + if (nat->nat_hnext[0]) + nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; + *nat->nat_phnext[0] = nat->nat_hnext[0]; + nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--; + + if (nat->nat_hnext[1]) + nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; + *nat->nat_phnext[1] = nat->nat_hnext[1]; + nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--; + + /* + * Add into the NAT table in the new position + */ + hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff); + hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport, + ipf_nattable_sz); + nat->nat_hv[0] = hv; + natp = &nat_table[0][hv]; + if (*natp) + (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; + nat->nat_phnext[0] = natp; + nat->nat_hnext[0] = *natp; + *natp = nat; + nat_stats.ns_bucketlen[0][hv]++; + + hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff); + hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport, + ipf_nattable_sz); + nat->nat_hv[1] = hv; + natp = &nat_table[1][hv]; + if (*natp) + (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; + nat->nat_phnext[1] = natp; + nat->nat_hnext[1] = *natp; + *natp = nat; + nat_stats.ns_bucketlen[1][hv]++; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_outlookup */ +/* Returns: nat_t* - NULL == no match, */ +/* else pointer to matching NAT entry */ +/* Parameters: fin(I) - pointer to packet information */ +/* flags(I) - NAT flags for this packet */ +/* p(I) - protocol for this packet */ +/* src(I) - source IP address */ +/* dst(I) - destination IP address */ +/* rw(I) - 1 == write lock on ipf_nat held, 0 == read lock. */ +/* */ +/* Lookup a nat entry based on the source 'real' ip address/port and */ +/* destination address/port. We use this lookup when sending a packet out, */ +/* we're looking for a table entry, based on the source address. */ +/* */ +/* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */ +/* */ +/* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */ +/* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */ +/* */ +/* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */ +/* the packet is of said protocol */ +/* ------------------------------------------------------------------------ */ +nat_t *nat_outlookup(fin, flags, p, src, dst) +fr_info_t *fin; +u_int flags, p; +struct in_addr src , dst; +{ + u_short sport, dport; + u_int sflags; + ipnat_t *ipn; + u_32_t srcip; + nat_t *nat; + int nflags; + void *ifp; + u_int hv; + + ifp = fin->fin_ifp; + srcip = src.s_addr; + sflags = flags & IPN_TCPUDPICMP; + sport = 0; + dport = 0; + + switch (p) + { + case IPPROTO_TCP : + case IPPROTO_UDP : + sport = htons(fin->fin_data[0]); + dport = htons(fin->fin_data[1]); + break; + case IPPROTO_ICMP : + if (flags & IPN_ICMPERR) + sport = fin->fin_data[1]; + else + dport = fin->fin_data[1]; + break; + default : + break; + } + + if ((flags & SI_WILDP) != 0) + goto find_out_wild_ports; + + hv = NAT_HASH_FN(srcip, sport, 0xffffffff); + hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz); + nat = nat_table[0][hv]; + for (; nat; nat = nat->nat_hnext[0]) { + nflags = nat->nat_flags; + + if (ifp != NULL) { + if (nat->nat_dir == NAT_REDIRECT) { + if (ifp != nat->nat_ifps[1]) + continue; + } else { + if (ifp != nat->nat_ifps[0]) + continue; + } + } + + if (nat->nat_inip.s_addr == srcip && + nat->nat_oip.s_addr == dst.s_addr && + (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP))) + || (p == nat->nat_p))) { + switch (p) + { +#if 0 + case IPPROTO_GRE : + if (nat->nat_call[1] != fin->fin_data[0]) + continue; + break; +#endif + case IPPROTO_TCP : + case IPPROTO_UDP : + if (nat->nat_oport != dport) + continue; + if (nat->nat_inport != sport) + continue; + break; + default : + break; + } + + ipn = nat->nat_ptr; + if ((ipn != NULL) && (nat->nat_aps != NULL)) + if (appr_match(fin, nat) != 0) + continue; + return nat; + } + } + + /* + * So if we didn't find it but there are wildcard members in the hash + * table, go back and look for them. We do this search and update here + * because it is modifying the NAT table and we want to do this only + * for the first packet that matches. The exception, of course, is + * for "dummy" (FI_IGNORE) lookups. + */ +find_out_wild_ports: + if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) + return NULL; + if (nat_stats.ns_wilds == 0) + return NULL; + + RWLOCK_EXIT(&ipf_nat); + + hv = NAT_HASH_FN(srcip, 0, 0xffffffff); + hv = NAT_HASH_FN(dst.s_addr, hv, ipf_nattable_sz); + + WRITE_ENTER(&ipf_nat); + + nat = nat_table[0][hv]; + for (; nat; nat = nat->nat_hnext[0]) { + if (ifp != NULL) { + if (nat->nat_dir == NAT_REDIRECT) { + if (ifp != nat->nat_ifps[1]) + continue; + } else { + if (ifp != nat->nat_ifps[0]) + continue; + } + } + + if (nat->nat_p != fin->fin_p) + continue; + if ((nat->nat_inip.s_addr != srcip) || + (nat->nat_oip.s_addr != dst.s_addr)) + continue; + + nflags = nat->nat_flags; + if (!(nflags & (NAT_TCPUDP|SI_WILDP))) + continue; + + if (nat_wildok(nat, (int)sport, (int)dport, nflags, + NAT_OUTBOUND) == 1) { + if ((fin->fin_flx & FI_IGNORE) != 0) + break; + if ((nflags & SI_CLONE) != 0) { + nat = fr_natclone(fin, nat); + if (nat == NULL) + break; + } else { + MUTEX_ENTER(&ipf_nat_new); + nat_stats.ns_wilds--; + MUTEX_EXIT(&ipf_nat_new); + } + nat->nat_inport = sport; + nat->nat_oport = dport; + if (nat->nat_outport == 0) + nat->nat_outport = sport; + nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); + nat_tabmove(nat); + break; + } + } + + MUTEX_DOWNGRADE(&ipf_nat); + + return nat; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_lookupredir */ +/* Returns: nat_t* - NULL == no match, */ +/* else pointer to matching NAT entry */ +/* Parameters: np(I) - pointer to description of packet to find NAT table */ +/* entry for. */ +/* */ +/* Lookup the NAT tables to search for a matching redirect */ +/* ------------------------------------------------------------------------ */ +nat_t *nat_lookupredir(np) +natlookup_t *np; +{ + fr_info_t fi; + nat_t *nat; + + bzero((char *)&fi, sizeof(fi)); + if (np->nl_flags & IPN_IN) { + fi.fin_data[0] = ntohs(np->nl_realport); + fi.fin_data[1] = ntohs(np->nl_outport); + } else { + fi.fin_data[0] = ntohs(np->nl_inport); + fi.fin_data[1] = ntohs(np->nl_outport); + } + if (np->nl_flags & IPN_TCP) + fi.fin_p = IPPROTO_TCP; + else if (np->nl_flags & IPN_UDP) + fi.fin_p = IPPROTO_UDP; + else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY)) + fi.fin_p = IPPROTO_ICMP; + + /* + * We can do two sorts of lookups: + * - IPN_IN: we have the `real' and `out' address, look for `in'. + * - default: we have the `in' and `out' address, look for `real'. + */ + if (np->nl_flags & IPN_IN) { + if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p, + np->nl_realip, np->nl_outip))) { + np->nl_inip = nat->nat_inip; + np->nl_inport = nat->nat_inport; + } + } else { + /* + * If nl_inip is non null, this is a lookup based on the real + * ip address. Else, we use the fake. + */ + if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p, + np->nl_inip, np->nl_outip))) { + + if ((np->nl_flags & IPN_FINDFORWARD) != 0) { + fr_info_t fin; + bzero((char *)&fin, sizeof(fin)); + fin.fin_p = nat->nat_p; + fin.fin_data[0] = ntohs(nat->nat_outport); + fin.fin_data[1] = ntohs(nat->nat_oport); + if (nat_inlookup(&fin, np->nl_flags, fin.fin_p, + nat->nat_outip, + nat->nat_oip) != NULL) { + np->nl_flags &= ~IPN_FINDFORWARD; + } + } + + np->nl_realip = nat->nat_outip; + np->nl_realport = nat->nat_outport; + } + } + + return nat; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_match */ +/* Returns: int - 0 == no match, 1 == match */ +/* Parameters: fin(I) - pointer to packet information */ +/* np(I) - pointer to NAT rule */ +/* */ +/* Pull the matching of a packet against a NAT rule out of that complex */ +/* loop inside fr_checknatin() and lay it out properly in its own function. */ +/* ------------------------------------------------------------------------ */ +static int nat_match(fin, np) +fr_info_t *fin; +ipnat_t *np; +{ + frtuc_t *ft; + + if (fin->fin_v != 4) + return 0; + + if (np->in_p && fin->fin_p != np->in_p) + return 0; + + if (fin->fin_out) { + if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK))) + return 0; + if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip) + ^ ((np->in_flags & IPN_NOTSRC) != 0)) + return 0; + if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip) + ^ ((np->in_flags & IPN_NOTDST) != 0)) + return 0; + } else { + if (!(np->in_redir & NAT_REDIRECT)) + return 0; + if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip) + ^ ((np->in_flags & IPN_NOTSRC) != 0)) + return 0; + if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip) + ^ ((np->in_flags & IPN_NOTDST) != 0)) + return 0; + } + + ft = &np->in_tuc; + if (!(fin->fin_flx & FI_TCPUDP) || + (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) { + if (ft->ftu_scmp || ft->ftu_dcmp) + return 0; + return 1; + } + + return fr_tcpudpchk(fin, ft); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_update */ +/* Returns: Nil */ +/* Parameters: nat(I) - pointer to NAT structure */ +/* np(I) - pointer to NAT rule */ +/* */ +/* Updates the lifetime of a NAT table entry for non-TCP packets. Must be */ +/* called with fin_rev updated - i.e. after calling nat_proto(). */ +/* ------------------------------------------------------------------------ */ +void nat_update(fin, nat, np) +fr_info_t *fin; +nat_t *nat; +ipnat_t *np; +{ + ipftq_t *ifq, *ifq2; + ipftqent_t *tqe; + + MUTEX_ENTER(&nat->nat_lock); + tqe = &nat->nat_tqe; + ifq = tqe->tqe_ifq; + + /* + * We allow over-riding of NAT timeouts from NAT rules, even for + * TCP, however, if it is TCP and there is no rule timeout set, + * then do not update the timeout here. + */ + if (np != NULL) + ifq2 = np->in_tqehead[fin->fin_rev]; + else + ifq2 = NULL; + + if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) { + (void) fr_tcp_age(&nat->nat_tqe, fin, nat_tqb, 0); + } else { + if (ifq2 == NULL) { + if (nat->nat_p == IPPROTO_UDP) + ifq2 = &nat_udptq; + else if (nat->nat_p == IPPROTO_ICMP) + ifq2 = &nat_icmptq; + else + ifq2 = &nat_iptq; + } + + fr_movequeue(tqe, ifq, ifq2); + } + MUTEX_EXIT(&nat->nat_lock); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_checknatout */ +/* Returns: int - -1 == packet failed NAT checks so block it, */ +/* 0 == no packet translation occurred, */ +/* 1 == packet was successfully translated. */ +/* Parameters: fin(I) - pointer to packet information */ +/* passp(I) - pointer to filtering result flags */ +/* */ +/* Check to see if an outcoming packet should be changed. ICMP packets are */ +/* first checked to see if they match an existing entry (if an error), */ +/* otherwise a search of the current NAT table is made. If neither results */ +/* in a match then a search for a matching NAT rule is made. Create a new */ +/* NAT entry if a we matched a NAT rule. Lastly, actually change the */ +/* packet header(s) as required. */ +/* ------------------------------------------------------------------------ */ +int fr_checknatout(fin, passp) +fr_info_t *fin; +u_32_t *passp; +{ + struct ifnet *ifp, *sifp; + icmphdr_t *icmp = NULL; + tcphdr_t *tcp = NULL; + int rval, natfailed; + ipnat_t *np = NULL; + u_int nflags = 0; + u_32_t ipa, iph; + int natadd = 1; + frentry_t *fr; + nat_t *nat; + + if (nat_stats.ns_rules == 0 || fr_nat_lock != 0) + return 0; + + natfailed = 0; + fr = fin->fin_fr; + sifp = fin->fin_ifp; + if ((fr != NULL) && !(fr->fr_flags & FR_DUP) && + fr->fr_tif.fd_ifp && fr->fr_tif.fd_ifp != (void *)-1) + fin->fin_ifp = fr->fr_tif.fd_ifp; + ifp = fin->fin_ifp; + + if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { + switch (fin->fin_p) + { + case IPPROTO_TCP : + nflags = IPN_TCP; + break; + case IPPROTO_UDP : + nflags = IPN_UDP; + break; + case IPPROTO_ICMP : + icmp = fin->fin_dp; + + /* + * This is an incoming packet, so the destination is + * the icmp_id and the source port equals 0 + */ + if (nat_icmpquerytype4(icmp->icmp_type)) + nflags = IPN_ICMPQUERY; + break; + default : + break; + } + + if ((nflags & IPN_TCPUDP)) + tcp = fin->fin_dp; + } + + ipa = fin->fin_saddr; + + READ_ENTER(&ipf_nat); + + if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && + (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND))) + /*EMPTY*/; + else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin))) + natadd = 0; + else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p, + fin->fin_src, fin->fin_dst))) { + nflags = nat->nat_flags; + } else { + u_32_t hv, msk, nmsk; + + /* + * If there is no current entry in the nat table for this IP#, + * create one for it (if there is a matching rule). + */ + RWLOCK_EXIT(&ipf_nat); + msk = 0xffffffff; + nmsk = nat_masks; + WRITE_ENTER(&ipf_nat); +maskloop: + iph = ipa & htonl(msk); + hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz); + for (np = nat_rules[hv]; np; np = np->in_mnext) + { + if ((np->in_ifps[0] && (np->in_ifps[0] != ifp))) + continue; + if (np->in_v != fin->fin_v) + continue; + if (np->in_p && (np->in_p != fin->fin_p)) + continue; + if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags)) + continue; + if (np->in_flags & IPN_FILTER) { + if (!nat_match(fin, np)) + continue; + } else if ((ipa & np->in_inmsk) != np->in_inip) + continue; + + if ((fr != NULL) && + !fr_matchtag(&np->in_tag, &fr->fr_nattag)) + continue; + + if (*np->in_plabel != '\0') { + if (((np->in_flags & IPN_FILTER) == 0) && + (np->in_dport != tcp->th_dport)) + continue; + if (appr_ok(fin, tcp, np) == 0) + continue; + } + + if ((nat = nat_new(fin, np, NULL, nflags, + NAT_OUTBOUND))) { + np->in_hits++; + break; + } else + natfailed = -1; + } + if ((np == NULL) && (nmsk != 0)) { + while (nmsk) { + msk <<= 1; + if (nmsk & 0x80000000) + break; + nmsk <<= 1; + } + if (nmsk != 0) { + nmsk <<= 1; + goto maskloop; + } + } + MUTEX_DOWNGRADE(&ipf_nat); + } + + if (nat != NULL) { + rval = fr_natout(fin, nat, natadd, nflags); + if (rval == 1) { + MUTEX_ENTER(&nat->nat_lock); + nat->nat_ref++; + MUTEX_EXIT(&nat->nat_lock); + fin->fin_nat = nat; + } + } else + rval = natfailed; + RWLOCK_EXIT(&ipf_nat); + + if (rval == -1) { + if (passp != NULL) + *passp = FR_BLOCK; + fin->fin_flx |= FI_BADNAT; + } + fin->fin_ifp = sifp; + return rval; +} + +/* ------------------------------------------------------------------------ */ +/* Function: fr_natout */ +/* Returns: int - -1 == packet failed NAT checks so block it, */ +/* 1 == packet was successfully translated. */ +/* Parameters: fin(I) - pointer to packet information */ +/* nat(I) - pointer to NAT structure */ +/* natadd(I) - flag indicating if it is safe to add frag cache */ +/* nflags(I) - NAT flags set for this packet */ +/* */ +/* Translate a packet coming "out" on an interface. */ +/* ------------------------------------------------------------------------ */ +int fr_natout(fin, nat, natadd, nflags) +fr_info_t *fin; +nat_t *nat; +int natadd; +u_32_t nflags; +{ + icmphdr_t *icmp; + u_short *csump; + tcphdr_t *tcp; + ipnat_t *np; + int i; + + tcp = NULL; + icmp = NULL; + csump = NULL; + np = nat->nat_ptr; + + if ((natadd != 0) && (fin->fin_flx & FI_FRAG) && (np != NULL)) + (void) fr_nat_newfrag(fin, 0, nat); + + MUTEX_ENTER(&nat->nat_lock); + nat->nat_bytes[1] += fin->fin_plen; + nat->nat_pkts[1]++; + MUTEX_EXIT(&nat->nat_lock); + + /* + * Fix up checksums, not by recalculating them, but + * simply computing adjustments. + * This is only done for STREAMS based IP implementations where the + * checksum has already been calculated by IP. In all other cases, + * IPFilter is called before the checksum needs calculating so there + * is no call to modify whatever is in the header now. + */ + if (fin->fin_v == 4) { + if (nflags == IPN_ICMPERR) { + u_32_t s1, s2, sumd; + + s1 = LONG_SUM(ntohl(fin->fin_saddr)); + s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr)); + CALC_SUMD(s1, s2, sumd); + fix_outcksum(fin, &fin->fin_ip->ip_sum, sumd); + } +#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ + defined(linux) || defined(BRIDGE_IPF) + else { + /* + * Strictly speaking, this isn't necessary on BSD + * kernels because they do checksum calculation after + * this code has run BUT if ipfilter is being used + * to do NAT as a bridge, that code doesn't exist. + */ + if (nat->nat_dir == NAT_OUTBOUND) + fix_outcksum(fin, &fin->fin_ip->ip_sum, + nat->nat_ipsumd); + else + fix_incksum(fin, &fin->fin_ip->ip_sum, + nat->nat_ipsumd); + } +#endif + } + + if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { + if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) { + tcp = fin->fin_dp; + + tcp->th_sport = nat->nat_outport; + fin->fin_data[0] = ntohs(nat->nat_outport); + } + + if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) { + icmp = fin->fin_dp; + icmp->icmp_id = nat->nat_outport; + } + + csump = nat_proto(fin, nat, nflags); + } + + fin->fin_ip->ip_src = nat->nat_outip; + + nat_update(fin, nat, np); + + /* + * The above comments do not hold for layer 4 (or higher) checksums... + */ + if (csump != NULL) { + if (nat->nat_dir == NAT_OUTBOUND) + fix_outcksum(fin, csump, nat->nat_sumd[1]); + else + fix_incksum(fin, csump, nat->nat_sumd[1]); + } +#ifdef IPFILTER_SYNC + ipfsync_update(SMC_NAT, fin, nat->nat_sync); +#endif + /* ------------------------------------------------------------- */ + /* A few quick notes: */ + /* Following are test conditions prior to calling the */ + /* appr_check routine. */ + /* */ + /* A NULL tcp indicates a non TCP/UDP packet. When dealing */ + /* with a redirect rule, we attempt to match the packet's */ + /* source port against in_dport, otherwise we'd compare the */ + /* packet's destination. */ + /* ------------------------------------------------------------- */ + if ((np != NULL) && (np->in_apr != NULL)) { + i = appr_check(fin, nat); + if (i == 0) + i = 1; + } else + i = 1; + ATOMIC_INCL(nat_stats.ns_mapped[1]); + fin->fin_flx |= FI_NATED; + return i; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_checknatin */ +/* Returns: int - -1 == packet failed NAT checks so block it, */ +/* 0 == no packet translation occurred, */ +/* 1 == packet was successfully translated. */ +/* Parameters: fin(I) - pointer to packet information */ +/* passp(I) - pointer to filtering result flags */ +/* */ +/* Check to see if an incoming packet should be changed. ICMP packets are */ +/* first checked to see if they match an existing entry (if an error), */ +/* otherwise a search of the current NAT table is made. If neither results */ +/* in a match then a search for a matching NAT rule is made. Create a new */ +/* NAT entry if a we matched a NAT rule. Lastly, actually change the */ +/* packet header(s) as required. */ +/* ------------------------------------------------------------------------ */ +int fr_checknatin(fin, passp) +fr_info_t *fin; +u_32_t *passp; +{ + u_int nflags, natadd; + int rval, natfailed; + struct ifnet *ifp; + struct in_addr in; + icmphdr_t *icmp; + tcphdr_t *tcp; + u_short dport; + ipnat_t *np; + nat_t *nat; + u_32_t iph; + + if (nat_stats.ns_rules == 0 || fr_nat_lock != 0) + return 0; + + tcp = NULL; + icmp = NULL; + dport = 0; + natadd = 1; + nflags = 0; + natfailed = 0; + ifp = fin->fin_ifp; + + if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { + switch (fin->fin_p) + { + case IPPROTO_TCP : + nflags = IPN_TCP; + break; + case IPPROTO_UDP : + nflags = IPN_UDP; + break; + case IPPROTO_ICMP : + icmp = fin->fin_dp; + + /* + * This is an incoming packet, so the destination is + * the icmp_id and the source port equals 0 + */ + if (nat_icmpquerytype4(icmp->icmp_type)) { + nflags = IPN_ICMPQUERY; + dport = icmp->icmp_id; + } break; + default : + break; + } + + if ((nflags & IPN_TCPUDP)) { + tcp = fin->fin_dp; + dport = tcp->th_dport; + } + } + + in = fin->fin_dst; + + READ_ENTER(&ipf_nat); + + if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && + (nat = nat_icmperror(fin, &nflags, NAT_INBOUND))) + /*EMPTY*/; + else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin))) + natadd = 0; + else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p, + fin->fin_src, in))) { + nflags = nat->nat_flags; + } else { + u_32_t hv, msk, rmsk; + + RWLOCK_EXIT(&ipf_nat); + rmsk = rdr_masks; + msk = 0xffffffff; + WRITE_ENTER(&ipf_nat); + /* + * If there is no current entry in the nat table for this IP#, + * create one for it (if there is a matching rule). + */ +maskloop: + iph = in.s_addr & htonl(msk); + hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz); + for (np = rdr_rules[hv]; np; np = np->in_rnext) { + if (np->in_ifps[0] && (np->in_ifps[0] != ifp)) + continue; + if (np->in_v != fin->fin_v) + continue; + if (np->in_p && (np->in_p != fin->fin_p)) + continue; + if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags)) + continue; + if (np->in_flags & IPN_FILTER) { + if (!nat_match(fin, np)) + continue; + } else { + if ((in.s_addr & np->in_outmsk) != np->in_outip) + continue; + if (np->in_pmin && + ((ntohs(np->in_pmax) < ntohs(dport)) || + (ntohs(dport) < ntohs(np->in_pmin)))) + continue; + } + + if (*np->in_plabel != '\0') { + if (!appr_ok(fin, tcp, np)) { + continue; + } + } + + nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND); + if (nat != NULL) { + np->in_hits++; + break; + } else + natfailed = -1; + } + + if ((np == NULL) && (rmsk != 0)) { + while (rmsk) { + msk <<= 1; + if (rmsk & 0x80000000) + break; + rmsk <<= 1; + } + if (rmsk != 0) { + rmsk <<= 1; + goto maskloop; + } + } + MUTEX_DOWNGRADE(&ipf_nat); + } + if (nat != NULL) { + rval = fr_natin(fin, nat, natadd, nflags); + if (rval == 1) { + MUTEX_ENTER(&nat->nat_lock); + nat->nat_ref++; + MUTEX_EXIT(&nat->nat_lock); + fin->fin_nat = nat; + fin->fin_state = nat->nat_state; + } + } else + rval = natfailed; + RWLOCK_EXIT(&ipf_nat); + + if (rval == -1) { + if (passp != NULL) + *passp = FR_BLOCK; + fin->fin_flx |= FI_BADNAT; + } + return rval; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_natin */ +/* Returns: int - -1 == packet failed NAT checks so block it, */ +/* 1 == packet was successfully translated. */ +/* Parameters: fin(I) - pointer to packet information */ +/* nat(I) - pointer to NAT structure */ +/* natadd(I) - flag indicating if it is safe to add frag cache */ +/* nflags(I) - NAT flags set for this packet */ +/* Locks Held: ipf_nat (READ) */ +/* */ +/* Translate a packet coming "in" on an interface. */ +/* ------------------------------------------------------------------------ */ +int fr_natin(fin, nat, natadd, nflags) +fr_info_t *fin; +nat_t *nat; +int natadd; +u_32_t nflags; +{ + icmphdr_t *icmp; + u_short *csump; + tcphdr_t *tcp; + ipnat_t *np; + int i; + + tcp = NULL; + csump = NULL; + np = nat->nat_ptr; + fin->fin_fr = nat->nat_fr; + + if (np != NULL) { + if ((natadd != 0) && (fin->fin_flx & FI_FRAG)) + (void) fr_nat_newfrag(fin, 0, nat); + + /* ------------------------------------------------------------- */ + /* A few quick notes: */ + /* Following are test conditions prior to calling the */ + /* appr_check routine. */ + /* */ + /* A NULL tcp indicates a non TCP/UDP packet. When dealing */ + /* with a map rule, we attempt to match the packet's */ + /* source port against in_dport, otherwise we'd compare the */ + /* packet's destination. */ + /* ------------------------------------------------------------- */ + if (np->in_apr != NULL) { + i = appr_check(fin, nat); + if (i == -1) { + return -1; + } + } + } + +#ifdef IPFILTER_SYNC + ipfsync_update(SMC_NAT, fin, nat->nat_sync); +#endif + + MUTEX_ENTER(&nat->nat_lock); + nat->nat_bytes[0] += fin->fin_plen; + nat->nat_pkts[0]++; + MUTEX_EXIT(&nat->nat_lock); + + fin->fin_ip->ip_dst = nat->nat_inip; + fin->fin_fi.fi_daddr = nat->nat_inip.s_addr; + if (nflags & IPN_TCPUDP) + tcp = fin->fin_dp; + + /* + * Fix up checksums, not by recalculating them, but + * simply computing adjustments. + * Why only do this for some platforms on inbound packets ? + * Because for those that it is done, IP processing is yet to happen + * and so the IPv4 header checksum has not yet been evaluated. + * Perhaps it should always be done for the benefit of things like + * fast forwarding (so that it doesn't need to be recomputed) but with + * header checksum offloading, perhaps it is a moot point. + */ +#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ + defined(__osf__) || defined(linux) + if (nat->nat_dir == NAT_OUTBOUND) + fix_incksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd); + else + fix_outcksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd); +#endif + + if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { + if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) { + tcp->th_dport = nat->nat_inport; + fin->fin_data[1] = ntohs(nat->nat_inport); + } + + + if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) { + icmp = fin->fin_dp; + + icmp->icmp_id = nat->nat_inport; + } + + csump = nat_proto(fin, nat, nflags); + } + + nat_update(fin, nat, np); + + /* + * The above comments do not hold for layer 4 (or higher) checksums... + */ + if (csump != NULL) { + if (nat->nat_dir == NAT_OUTBOUND) + fix_incksum(fin, csump, nat->nat_sumd[0]); + else + fix_outcksum(fin, csump, nat->nat_sumd[0]); + } + ATOMIC_INCL(nat_stats.ns_mapped[0]); + fin->fin_flx |= FI_NATED; + if (np != NULL && np->in_tag.ipt_num[0] != 0) + fin->fin_nattag = &np->in_tag; + return 1; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_proto */ +/* Returns: u_short* - pointer to transport header checksum to update, */ +/* NULL if the transport protocol is not recognised */ +/* as needing a checksum update. */ +/* Parameters: fin(I) - pointer to packet information */ +/* nat(I) - pointer to NAT structure */ +/* nflags(I) - NAT flags set for this packet */ +/* */ +/* Return the pointer to the checksum field for each protocol so understood.*/ +/* If support for making other changes to a protocol header is required, */ +/* that is not strictly 'address' translation, such as clamping the MSS in */ +/* TCP down to a specific value, then do it from here. */ +/* ------------------------------------------------------------------------ */ +u_short *nat_proto(fin, nat, nflags) +fr_info_t *fin; +nat_t *nat; +u_int nflags; +{ + icmphdr_t *icmp; + u_short *csump; + tcphdr_t *tcp; + udphdr_t *udp; + + csump = NULL; + if (fin->fin_out == 0) { + fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND); + } else { + fin->fin_rev = (nat->nat_dir == NAT_INBOUND); + } + + switch (fin->fin_p) + { + case IPPROTO_TCP : + tcp = fin->fin_dp; + + csump = &tcp->th_sum; + + /* + * Do a MSS CLAMPING on a SYN packet, + * only deal IPv4 for now. + */ + if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0) + nat_mssclamp(tcp, nat->nat_mssclamp, fin, csump); + + break; + + case IPPROTO_UDP : + udp = fin->fin_dp; + + if (udp->uh_sum) + csump = &udp->uh_sum; + break; + + case IPPROTO_ICMP : + icmp = fin->fin_dp; + + if ((nflags & IPN_ICMPQUERY) != 0) { + if (icmp->icmp_cksum != 0) + csump = &icmp->icmp_cksum; + } + break; + } + return csump; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_natunload */ +/* Returns: Nil */ +/* Parameters: Nil */ +/* */ +/* Free all memory used by NAT structures allocated at runtime. */ +/* ------------------------------------------------------------------------ */ +void fr_natunload() +{ + ipftq_t *ifq, *ifqnext; + + (void) nat_clearlist(); + (void) nat_flushtable(); + + /* + * Proxy timeout queues are not cleaned here because although they + * exist on the NAT list, appr_unload is called after fr_natunload + * and the proxies actually are responsible for them being created. + * Should the proxy timeouts have their own list? There's no real + * justification as this is the only complication. + */ + for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) { + ifqnext = ifq->ifq_next; + if (((ifq->ifq_flags & IFQF_PROXY) == 0) && + (fr_deletetimeoutqueue(ifq) == 0)) + fr_freetimeoutqueue(ifq); + } + + if (nat_table[0] != NULL) { + KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz); + nat_table[0] = NULL; + } + if (nat_table[1] != NULL) { + KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz); + nat_table[1] = NULL; + } + if (nat_rules != NULL) { + KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz); + nat_rules = NULL; + } + if (rdr_rules != NULL) { + KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz); + rdr_rules = NULL; + } + if (maptable != NULL) { + KFREES(maptable, sizeof(hostmap_t *) * ipf_hostmap_sz); + maptable = NULL; + } + if (nat_stats.ns_bucketlen[0] != NULL) { + KFREES(nat_stats.ns_bucketlen[0], + sizeof(u_long *) * ipf_nattable_sz); + nat_stats.ns_bucketlen[0] = NULL; + } + if (nat_stats.ns_bucketlen[1] != NULL) { + KFREES(nat_stats.ns_bucketlen[1], + sizeof(u_long *) * ipf_nattable_sz); + nat_stats.ns_bucketlen[1] = NULL; + } + + if (fr_nat_maxbucket_reset == 1) + fr_nat_maxbucket = 0; + + if (fr_nat_init == 1) { + fr_nat_init = 0; + fr_sttab_destroy(nat_tqb); + + RW_DESTROY(&ipf_natfrag); + RW_DESTROY(&ipf_nat); + + MUTEX_DESTROY(&ipf_nat_new); + MUTEX_DESTROY(&ipf_natio); + + MUTEX_DESTROY(&nat_udptq.ifq_lock); + MUTEX_DESTROY(&nat_icmptq.ifq_lock); + MUTEX_DESTROY(&nat_iptq.ifq_lock); + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_natexpire */ +/* Returns: Nil */ +/* Parameters: Nil */ +/* */ +/* Check all of the timeout queues for entries at the top which need to be */ +/* expired. */ +/* ------------------------------------------------------------------------ */ +void fr_natexpire() +{ + ipftq_t *ifq, *ifqnext; + ipftqent_t *tqe, *tqn; + int i; + SPL_INT(s); + + SPL_NET(s); + WRITE_ENTER(&ipf_nat); + for (ifq = nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) { + for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { + if (tqe->tqe_die > fr_ticks) + break; + tqn = tqe->tqe_next; + nat_delete(tqe->tqe_parent, NL_EXPIRE); + } + } + + for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) { + ifqnext = ifq->ifq_next; + + for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { + if (tqe->tqe_die > fr_ticks) + break; + tqn = tqe->tqe_next; + nat_delete(tqe->tqe_parent, NL_EXPIRE); + } + } + + for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) { + ifqnext = ifq->ifq_next; + + if (((ifq->ifq_flags & IFQF_DELETE) != 0) && + (ifq->ifq_ref == 0)) { + fr_freetimeoutqueue(ifq); + } + } + + RWLOCK_EXIT(&ipf_nat); + SPL_X(s); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_natsync */ +/* Returns: Nil */ +/* Parameters: ifp(I) - pointer to network interface */ +/* */ +/* Walk through all of the currently active NAT sessions, looking for those */ +/* which need to have their translated address updated. */ +/* ------------------------------------------------------------------------ */ +void fr_natsync(ifp) +void *ifp; +{ + u_32_t sum1, sum2, sumd; + struct in_addr in; + ipnat_t *n; + nat_t *nat; + void *ifp2; + SPL_INT(s); + + if (fr_running <= 0) + return; + + /* + * Change IP addresses for NAT sessions for any protocol except TCP + * since it will break the TCP connection anyway. The only rules + * which will get changed are those which are "map ... -> 0/32", + * where the rule specifies the address is taken from the interface. + */ + SPL_NET(s); + WRITE_ENTER(&ipf_nat); + + if (fr_running <= 0) { + RWLOCK_EXIT(&ipf_nat); + return; + } + + for (nat = nat_instances; nat; nat = nat->nat_next) { + if ((nat->nat_flags & IPN_TCP) != 0) + continue; + n = nat->nat_ptr; + if ((n == NULL) || + (n->in_outip != 0) || (n->in_outmsk != 0xffffffff)) + continue; + if (((ifp == NULL) || (ifp == nat->nat_ifps[0]) || + (ifp == nat->nat_ifps[1]))) { + nat->nat_ifps[0] = GETIFP(nat->nat_ifnames[0], 4); + if (nat->nat_ifnames[1][0] != '\0') { + nat->nat_ifps[1] = GETIFP(nat->nat_ifnames[1], + 4); + } else + nat->nat_ifps[1] = nat->nat_ifps[0]; + ifp2 = nat->nat_ifps[0]; + if (ifp2 == NULL) + continue; + + /* + * Change the map-to address to be the same as the + * new one. + */ + sum1 = nat->nat_outip.s_addr; + if (fr_ifpaddr(4, FRI_NORMAL, ifp2, &in, NULL) != -1) + nat->nat_outip = in; + sum2 = nat->nat_outip.s_addr; + + if (sum1 == sum2) + continue; + /* + * Readjust the checksum adjustment to take into + * account the new IP#. + */ + CALC_SUMD(sum1, sum2, sumd); + /* XXX - dont change for TCP when solaris does + * hardware checksumming. + */ + sumd += nat->nat_sumd[0]; + nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); + nat->nat_sumd[1] = nat->nat_sumd[0]; + } + } + + for (n = nat_list; (n != NULL); n = n->in_next) { + if ((ifp == NULL) || (n->in_ifps[0] == ifp)) + n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4); + if ((ifp == NULL) || (n->in_ifps[1] == ifp)) + n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4); + } + RWLOCK_EXIT(&ipf_nat); + SPL_X(s); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_icmpquerytype4 */ +/* Returns: int - 1 == success, 0 == failure */ +/* Parameters: icmptype(I) - ICMP type number */ +/* */ +/* Tests to see if the ICMP type number passed is a query/response type or */ +/* not. */ +/* ------------------------------------------------------------------------ */ +static INLINE int nat_icmpquerytype4(icmptype) +int icmptype; +{ + + /* + * For the ICMP query NAT code, it is essential that both the query + * and the reply match on the NAT rule. Because the NAT structure + * does not keep track of the icmptype, and a single NAT structure + * is used for all icmp types with the same src, dest and id, we + * simply define the replies as queries as well. The funny thing is, + * altough it seems silly to call a reply a query, this is exactly + * as it is defined in the IPv4 specification + */ + + switch (icmptype) + { + + case ICMP_ECHOREPLY: + case ICMP_ECHO: + /* route aedvertisement/solliciation is currently unsupported: */ + /* it would require rewriting the ICMP data section */ + case ICMP_TSTAMP: + case ICMP_TSTAMPREPLY: + case ICMP_IREQ: + case ICMP_IREQREPLY: + case ICMP_MASKREQ: + case ICMP_MASKREPLY: + return 1; + default: + return 0; + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_log */ +/* Returns: Nil */ +/* Parameters: nat(I) - pointer to NAT structure */ +/* type(I) - type of log entry to create */ +/* */ +/* Creates a NAT log entry. */ +/* ------------------------------------------------------------------------ */ +void nat_log(nat, type) +struct nat *nat; +u_int type; +{ +#ifdef IPFILTER_LOG +# ifndef LARGE_NAT + struct ipnat *np; + int rulen; +# endif + struct natlog natl; + void *items[1]; + size_t sizes[1]; + int types[1]; + + natl.nl_inip = nat->nat_inip; + natl.nl_outip = nat->nat_outip; + natl.nl_origip = nat->nat_oip; + natl.nl_bytes[0] = nat->nat_bytes[0]; + natl.nl_bytes[1] = nat->nat_bytes[1]; + natl.nl_pkts[0] = nat->nat_pkts[0]; + natl.nl_pkts[1] = nat->nat_pkts[1]; + natl.nl_origport = nat->nat_oport; + natl.nl_inport = nat->nat_inport; + natl.nl_outport = nat->nat_outport; + natl.nl_p = nat->nat_p; + natl.nl_type = type; + natl.nl_rule = -1; +# ifndef LARGE_NAT + if (nat->nat_ptr != NULL) { + for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++) + if (np == nat->nat_ptr) { + natl.nl_rule = rulen; + break; + } + } +# endif + items[0] = &natl; + sizes[0] = sizeof(natl); + types[0] = 0; + + (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1); +#endif +} + + +#if defined(__OpenBSD__) +/* ------------------------------------------------------------------------ */ +/* Function: nat_ifdetach */ +/* Returns: Nil */ +/* Parameters: ifp(I) - pointer to network interface */ +/* */ +/* Compatibility interface for OpenBSD to trigger the correct updating of */ +/* interface references within IPFilter. */ +/* ------------------------------------------------------------------------ */ +void nat_ifdetach(ifp) +void *ifp; +{ + frsync(ifp); + return; +} +#endif + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_natderef */ +/* Returns: Nil */ +/* Parameters: isp(I) - pointer to pointer to NAT table entry */ +/* */ +/* Decrement the reference counter for this NAT table entry and free it if */ +/* there are no more things using it. */ +/* ------------------------------------------------------------------------ */ +void fr_natderef(natp) +nat_t **natp; +{ + nat_t *nat; + + nat = *natp; + *natp = NULL; + WRITE_ENTER(&ipf_nat); + nat->nat_ref--; + if (nat->nat_ref == 0) + nat_delete(nat, NL_EXPIRE); + RWLOCK_EXIT(&ipf_nat); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_natclone */ +/* Returns: ipstate_t* - NULL == cloning failed, */ +/* else pointer to new state structure */ +/* Parameters: fin(I) - pointer to packet information */ +/* is(I) - pointer to master state structure */ +/* Write Lock: ipf_nat */ +/* */ +/* Create a "duplcate" state table entry from the master. */ +/* ------------------------------------------------------------------------ */ +static nat_t *fr_natclone(fin, nat) +fr_info_t *fin; +nat_t *nat; +{ + frentry_t *fr; + nat_t *clone; + ipnat_t *np; + + KMALLOC(clone, nat_t *); + if (clone == NULL) + return NULL; + bcopy((char *)nat, (char *)clone, sizeof(*clone)); + + MUTEX_NUKE(&clone->nat_lock); + + clone->nat_aps = NULL; + /* + * Initialize all these so that nat_delete() doesn't cause a crash. + */ + clone->nat_tqe.tqe_pnext = NULL; + clone->nat_tqe.tqe_next = NULL; + clone->nat_tqe.tqe_ifq = NULL; + clone->nat_tqe.tqe_parent = clone; + + clone->nat_flags &= ~SI_CLONE; + clone->nat_flags |= SI_CLONED; + + if (clone->nat_hm) + clone->nat_hm->hm_ref++; + + if (nat_insert(clone, fin->fin_rev) == -1) { + KFREE(clone); + return NULL; + } + np = clone->nat_ptr; + if (np != NULL) { + if (nat_logging) + nat_log(clone, (u_int)np->in_redir); + np->in_use++; + } + fr = clone->nat_fr; + if (fr != NULL) { + MUTEX_ENTER(&fr->fr_lock); + fr->fr_ref++; + MUTEX_EXIT(&fr->fr_lock); + } + + /* + * Because the clone is created outside the normal loop of things and + * TCP has special needs in terms of state, initialise the timeout + * state of the new NAT from here. + */ + if (clone->nat_p == IPPROTO_TCP) { + (void) fr_tcp_age(&clone->nat_tqe, fin, nat_tqb, + clone->nat_flags); + } +#ifdef IPFILTER_SYNC + clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone); +#endif + if (nat_logging) + nat_log(clone, NL_CLONE); + return clone; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_wildok */ +/* Returns: int - 1 == packet's ports match wildcards */ +/* 0 == packet's ports don't match wildcards */ +/* Parameters: nat(I) - NAT entry */ +/* sport(I) - source port */ +/* dport(I) - destination port */ +/* flags(I) - wildcard flags */ +/* dir(I) - packet direction */ +/* */ +/* Use NAT entry and packet direction to determine which combination of */ +/* wildcard flags should be used. */ +/* ------------------------------------------------------------------------ */ +static INLINE int nat_wildok(nat, sport, dport, flags, dir) +nat_t *nat; +int sport; +int dport; +int flags; +int dir; +{ + /* + * When called by dir is set to + * nat_inlookup NAT_INBOUND (0) + * nat_outlookup NAT_OUTBOUND (1) + * + * We simply combine the packet's direction in dir with the original + * "intended" direction of that NAT entry in nat->nat_dir to decide + * which combination of wildcard flags to allow. + */ + + switch ((dir << 1) | nat->nat_dir) + { + case 3: /* outbound packet / outbound entry */ + if (((nat->nat_inport == sport) || + (flags & SI_W_SPORT)) && + ((nat->nat_oport == dport) || + (flags & SI_W_DPORT))) + return 1; + break; + case 2: /* outbound packet / inbound entry */ + if (((nat->nat_outport == sport) || + (flags & SI_W_DPORT)) && + ((nat->nat_oport == dport) || + (flags & SI_W_SPORT))) + return 1; + break; + case 1: /* inbound packet / outbound entry */ + if (((nat->nat_oport == sport) || + (flags & SI_W_DPORT)) && + ((nat->nat_outport == dport) || + (flags & SI_W_SPORT))) + return 1; + break; + case 0: /* inbound packet / inbound entry */ + if (((nat->nat_oport == sport) || + (flags & SI_W_SPORT)) && + ((nat->nat_outport == dport) || + (flags & SI_W_DPORT))) + return 1; + break; + default: + break; + } + + return(0); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_mssclamp */ +/* Returns: Nil */ +/* Parameters: tcp(I) - pointer to TCP header */ +/* maxmss(I) - value to clamp the TCP MSS to */ +/* fin(I) - pointer to packet information */ +/* csump(I) - pointer to TCP checksum */ +/* */ +/* Check for MSS option and clamp it if necessary. If found and changed, */ +/* then the TCP header checksum will be updated to reflect the change in */ +/* the MSS. */ +/* ------------------------------------------------------------------------ */ +static void nat_mssclamp(tcp, maxmss, fin, csump) +tcphdr_t *tcp; +u_32_t maxmss; +fr_info_t *fin; +u_short *csump; +{ + u_char *cp, *ep, opt; + int hlen, advance; + u_32_t mss, sumd; + + hlen = TCP_OFF(tcp) << 2; + if (hlen > sizeof(*tcp)) { + cp = (u_char *)tcp + sizeof(*tcp); + ep = (u_char *)tcp + hlen; + + while (cp < ep) { + opt = cp[0]; + if (opt == TCPOPT_EOL) + break; + else if (opt == TCPOPT_NOP) { + cp++; + continue; + } + + if (cp + 1 >= ep) + break; + advance = cp[1]; + if ((cp + advance > ep) || (advance <= 0)) + break; + switch (opt) + { + case TCPOPT_MAXSEG: + if (advance != 4) + break; + mss = cp[2] * 256 + cp[3]; + if (mss > maxmss) { + cp[2] = maxmss / 256; + cp[3] = maxmss & 0xff; + CALC_SUMD(mss, maxmss, sumd); + fix_outcksum(fin, csump, sumd); + } + break; + default: + /* ignore unknown options */ + break; + } + + cp += advance; + } + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_setnatqueue */ +/* Returns: Nil */ +/* Parameters: nat(I)- pointer to NAT structure */ +/* rev(I) - forward(0) or reverse(1) direction */ +/* Locks: ipf_nat (read or write) */ +/* */ +/* Put the NAT entry on its default queue entry, using rev as a helped in */ +/* determining which queue it should be placed on. */ +/* ------------------------------------------------------------------------ */ +void fr_setnatqueue(nat, rev) +nat_t *nat; +int rev; +{ + ipftq_t *oifq, *nifq; + + if (nat->nat_ptr != NULL) + nifq = nat->nat_ptr->in_tqehead[rev]; + else + nifq = NULL; + + if (nifq == NULL) { + switch (nat->nat_p) + { + case IPPROTO_UDP : + nifq = &nat_udptq; + break; + case IPPROTO_ICMP : + nifq = &nat_icmptq; + break; + case IPPROTO_TCP : + nifq = nat_tqb + nat->nat_tqe.tqe_state[rev]; + break; + default : + nifq = &nat_iptq; + break; + } + } + + oifq = nat->nat_tqe.tqe_ifq; + /* + * If it's currently on a timeout queue, move it from one queue to + * another, else put it on the end of the newly determined queue. + */ + if (oifq != NULL) + fr_movequeue(&nat->nat_tqe, oifq, nifq); + else + fr_queueappend(&nat->nat_tqe, nifq, nat); + return; +} diff --git a/usr/src/uts/common/inet/ipf/ip_pool.c b/usr/src/uts/common/inet/ipf/ip_pool.c new file mode 100644 index 0000000000..e3aace93df --- /dev/null +++ b/usr/src/uts/common/inet/ipf/ip_pool.c @@ -0,0 +1,789 @@ +/* + * Copyright (C) 1993-2001, 2003 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#if defined(KERNEL) || defined(_KERNEL) +# undef KERNEL +# undef _KERNEL +# define KERNEL 1 +# define _KERNEL 1 +#endif +#if defined(__osf__) +# define _PROTO_NET_H_ +#endif +#include <sys/errno.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/file.h> +#if !defined(_KERNEL) && !defined(__KERNEL__) +# include <stdio.h> +# include <stdlib.h> +# include <string.h> +# define _KERNEL +# ifdef __OpenBSD__ +struct file; +# endif +# include <sys/uio.h> +# undef _KERNEL +#else +# include <sys/systm.h> +# if defined(NetBSD) && (__NetBSD_Version__ >= 104000000) +# include <sys/proc.h> +# endif +#endif +#include <sys/time.h> +#if !defined(linux) +# include <sys/protosw.h> +#endif +#include <sys/socket.h> +#if defined(_KERNEL) && (!defined(__SVR4) && !defined(__svr4__)) +# include <sys/mbuf.h> +#endif +#if defined(__SVR4) || defined(__svr4__) +# include <sys/filio.h> +# include <sys/byteorder.h> +# ifdef _KERNEL +# include <sys/dditypes.h> +# endif +# include <sys/stream.h> +# include <sys/kmem.h> +#endif +#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) +# include <sys/malloc.h> +#endif + +#if defined(_KERNEL) && (defined(__osf__) || defined(AIX) || \ + defined(__hpux) || defined(__sgi)) +# ifdef __osf__ +# include <net/radix.h> +# endif +# include "radix_ipf_local.h" +# define _RADIX_H_ +#endif +#include <net/if.h> +#include <netinet/in.h> + +#include "netinet/ip_compat.h" +#include "netinet/ip_fil.h" +#include "netinet/ip_pool.h" + +#if defined(IPFILTER_LOOKUP) && defined(_KERNEL) && \ + ((BSD >= 198911) && !defined(__osf__) && \ + !defined(__hpux) && !defined(__sgi)) +static int rn_freenode __P((struct radix_node *, void *)); +#endif + +/* END OF INCLUDES */ + +#if !defined(lint) +static const char sccsid[] = "@(#)ip_fil.c 2.41 6/5/96 (C) 1993-2000 Darren Reed"; +static const char rcsid[] = "@(#)$Id: ip_pool.c,v 2.55.2.14 2005/06/12 07:18:26 darrenr Exp $"; +#endif + +#ifdef IPFILTER_LOOKUP + +# ifndef RADIX_NODE_HEAD_LOCK +# define RADIX_NODE_HEAD_LOCK(x) ; +# endif +# ifndef RADIX_NODE_HEAD_UNLOCK +# define RADIX_NODE_HEAD_UNLOCK(x) ; +# endif + +ip_pool_stat_t ipoolstat; +ipfrwlock_t ip_poolrw; + +/* + * Binary tree routines from Sedgewick and enhanced to do ranges of addresses. + * NOTE: Insertion *MUST* be from greatest range to least for it to work! + * These should be replaced, eventually, by something else - most notably a + * interval searching method. The important feature is to be able to find + * the best match. + * + * So why not use a radix tree for this? As the first line implies, it + * has been written to work with a _range_ of addresses. A range is not + * necessarily a match with any given netmask so what we end up dealing + * with is an interval tree. Implementations of these are hard to find + * and the one herein is far from bug free. + * + * Sigh, in the end I became convinced that the bugs the code contained did + * not make it worthwhile not using radix trees. For now the radix tree from + * 4.4 BSD is used, but this is not viewed as a long term solution. + */ +ip_pool_t *ip_pool_list[IPL_LOGSIZE] = { NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL }; + + +#ifdef TEST_POOL +void treeprint __P((ip_pool_t *)); + +int +main(argc, argv) + int argc; + char *argv[]; +{ + addrfamily_t a, b; + iplookupop_t op; + ip_pool_t *ipo; + i6addr_t ip; + + RWLOCK_INIT(&ip_poolrw, "poolrw"); + ip_pool_init(); + + bzero((char *)&a, sizeof(a)); + bzero((char *)&b, sizeof(b)); + bzero((char *)&ip, sizeof(ip)); + bzero((char *)&op, sizeof(op)); + strcpy(op.iplo_name, "0"); + + if (ip_pool_create(&op) == 0) + ipo = ip_pool_find(0, "0"); + + a.adf_addr.in4.s_addr = 0x0a010203; + b.adf_addr.in4.s_addr = 0xffffffff; + ip_pool_insert(ipo, &a, &b, 1); + ip_pool_insert(ipo, &a, &b, 1); + + a.adf_addr.in4.s_addr = 0x0a000000; + b.adf_addr.in4.s_addr = 0xff000000; + ip_pool_insert(ipo, &a, &b, 0); + ip_pool_insert(ipo, &a, &b, 0); + + a.adf_addr.in4.s_addr = 0x0a010100; + b.adf_addr.in4.s_addr = 0xffffff00; + ip_pool_insert(ipo, &a, &b, 1); + ip_pool_insert(ipo, &a, &b, 1); + + a.adf_addr.in4.s_addr = 0x0a010200; + b.adf_addr.in4.s_addr = 0xffffff00; + ip_pool_insert(ipo, &a, &b, 0); + ip_pool_insert(ipo, &a, &b, 0); + + a.adf_addr.in4.s_addr = 0x0a010000; + b.adf_addr.in4.s_addr = 0xffff0000; + ip_pool_insert(ipo, &a, &b, 1); + ip_pool_insert(ipo, &a, &b, 1); + + a.adf_addr.in4.s_addr = 0x0a01020f; + b.adf_addr.in4.s_addr = 0xffffffff; + ip_pool_insert(ipo, &a, &b, 1); + ip_pool_insert(ipo, &a, &b, 1); +#ifdef DEBUG_POOL +treeprint(ipo); +#endif + ip.in4.s_addr = 0x0a00aabb; + printf("search(%#x) = %d (0)\n", ip.in4.s_addr, + ip_pool_search(ipo, 4, &ip)); + + ip.in4.s_addr = 0x0a000001; + printf("search(%#x) = %d (0)\n", ip.in4.s_addr, + ip_pool_search(ipo, 4, &ip)); + + ip.in4.s_addr = 0x0a000101; + printf("search(%#x) = %d (0)\n", ip.in4.s_addr, + ip_pool_search(ipo, 4, &ip)); + + ip.in4.s_addr = 0x0a010001; + printf("search(%#x) = %d (1)\n", ip.in4.s_addr, + ip_pool_search(ipo, 4, &ip)); + + ip.in4.s_addr = 0x0a010101; + printf("search(%#x) = %d (1)\n", ip.in4.s_addr, + ip_pool_search(ipo, 4, &ip)); + + ip.in4.s_addr = 0x0a010201; + printf("search(%#x) = %d (0)\n", ip.in4.s_addr, + ip_pool_search(ipo, 4, &ip)); + + ip.in4.s_addr = 0x0a010203; + printf("search(%#x) = %d (1)\n", ip.in4.s_addr, + ip_pool_search(ipo, 4, &ip)); + + ip.in4.s_addr = 0x0a01020f; + printf("search(%#x) = %d (1)\n", ip.in4.s_addr, + ip_pool_search(ipo, 4, &ip)); + + ip.in4.s_addr = 0x0b00aabb; + printf("search(%#x) = %d (-1)\n", ip.in4.s_addr, + ip_pool_search(ipo, 4, &ip)); + +#ifdef DEBUG_POOL +treeprint(ipo); +#endif + + ip_pool_fini(); + + return 0; +} + + +void +treeprint(ipo) +ip_pool_t *ipo; +{ + ip_pool_node_t *c; + + for (c = ipo->ipo_list; c != NULL; c = c->ipn_next) + printf("Node %p(%s) (%#x/%#x) = %d hits %lu\n", + c, c->ipn_name, c->ipn_addr.adf_addr.in4.s_addr, + c->ipn_mask.adf_addr.in4.s_addr, + c->ipn_info, c->ipn_hits); +} +#endif /* TEST_POOL */ + + +/* ------------------------------------------------------------------------ */ +/* Function: ip_pool_init */ +/* Returns: int - 0 = success, else error */ +/* */ +/* Initialise the routing table data structures where required. */ +/* ------------------------------------------------------------------------ */ +int ip_pool_init() +{ + + bzero((char *)&ipoolstat, sizeof(ipoolstat)); + +#if (!defined(_KERNEL) || (BSD < 199306)) + rn_init(); +#endif + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: ip_pool_fini */ +/* Returns: int - 0 = success, else error */ +/* Locks: WRITE(ipf_global) */ +/* */ +/* Clean up all the pool data structures allocated and call the cleanup */ +/* function for the radix tree that supports the pools. ip_pool_destroy() is*/ +/* used to delete the pools one by one to ensure they're properly freed up. */ +/* ------------------------------------------------------------------------ */ +void ip_pool_fini() +{ + ip_pool_t *p, *q; + iplookupop_t op; + int i; + + ASSERT(rw_read_locked(&ipf_global.ipf_lk) == 0); + + for (i = 0; i <= IPL_LOGMAX; i++) { + for (q = ip_pool_list[i]; (p = q) != NULL; ) { + op.iplo_unit = i; + (void)strncpy(op.iplo_name, p->ipo_name, + sizeof(op.iplo_name)); + q = p->ipo_next; + (void) ip_pool_destroy(&op); + } + } + +#if (!defined(_KERNEL) || (BSD < 199306)) + rn_fini(); +#endif +} + + +/* ------------------------------------------------------------------------ */ +/* Function: ip_pool_statistics */ +/* Returns: int - 0 = success, else error */ +/* Parameters: op(I) - pointer to lookup operation arguments */ +/* */ +/* Copy the current statistics out into user space, collecting pool list */ +/* pointers as appropriate for later use. */ +/* ------------------------------------------------------------------------ */ +int ip_pool_statistics(op) +iplookupop_t *op; +{ + ip_pool_stat_t stats; + int unit, i, err = 0; + + if (op->iplo_size != sizeof(ipoolstat)) + return EINVAL; + + bcopy((char *)&ipoolstat, (char *)&stats, sizeof(stats)); + unit = op->iplo_unit; + if (unit == IPL_LOGALL) { + for (i = 0; i < IPL_LOGSIZE; i++) + stats.ipls_list[i] = ip_pool_list[i]; + } else if (unit >= 0 && unit < IPL_LOGSIZE) { + if (op->iplo_name[0] != '\0') + stats.ipls_list[unit] = ip_pool_find(unit, + op->iplo_name); + else + stats.ipls_list[unit] = ip_pool_list[unit]; + } else + err = EINVAL; + if (err == 0) + err = COPYOUT(&stats, op->iplo_struct, sizeof(stats)); + return err; +} + + + +/* ------------------------------------------------------------------------ */ +/* Function: ip_pool_find */ +/* Returns: int - 0 = success, else error */ +/* Parameters: ipo(I) - pointer to the pool getting the new node. */ +/* */ +/* Find a matching pool inside the collection of pools for a particular */ +/* device, indicated by the unit number. */ +/* ------------------------------------------------------------------------ */ +void *ip_pool_find(unit, name) +int unit; +char *name; +{ + ip_pool_t *p; + + for (p = ip_pool_list[unit]; p != NULL; p = p->ipo_next) + if (strncmp(p->ipo_name, name, sizeof(p->ipo_name)) == 0) + break; + return p; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: ip_pool_findeq */ +/* Returns: int - 0 = success, else error */ +/* Parameters: ipo(I) - pointer to the pool getting the new node. */ +/* addr(I) - pointer to address information to delete */ +/* mask(I) - */ +/* */ +/* Searches for an exact match of an entry in the pool. */ +/* ------------------------------------------------------------------------ */ +ip_pool_node_t *ip_pool_findeq(ipo, addr, mask) +ip_pool_t *ipo; +addrfamily_t *addr, *mask; +{ + struct radix_node *n; + SPL_INT(s); + + SPL_NET(s); + RADIX_NODE_HEAD_LOCK(ipo->ipo_head); + n = ipo->ipo_head->rnh_lookup(addr, mask, ipo->ipo_head); + RADIX_NODE_HEAD_UNLOCK(ipo->ipo_head); + SPL_X(s); + return (ip_pool_node_t *)n; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: ip_pool_search */ +/* Returns: int - 0 == +ve match, -1 == error, 1 == -ve/no match */ +/* Parameters: tptr(I) - pointer to the pool to search */ +/* version(I) - IP protocol version (4 or 6) */ +/* dptr(I) - pointer to address information */ +/* */ +/* Search the pool for a given address and return a search result. */ +/* ------------------------------------------------------------------------ */ +int ip_pool_search(tptr, version, dptr) +void *tptr; +int version; +void *dptr; +{ + struct radix_node *rn; + ip_pool_node_t *m; + i6addr_t *addr; + addrfamily_t v; + ip_pool_t *ipo; + int rv; + + ipo = tptr; + if (ipo == NULL) + return -1; + + rv = 1; + m = NULL; + addr = (i6addr_t *)dptr; + bzero(&v, sizeof(v)); + v.adf_len = offsetof(addrfamily_t, adf_addr); + + if (version == 4) { + v.adf_len += sizeof(addr->in4); + v.adf_addr.in4 = addr->in4; +#ifdef USE_INET6 + } else if (version == 6) { + v.adf_len += sizeof(addr->in6); + v.adf_addr.in6 = addr->in6; +#endif + } else + return -1; + + READ_ENTER(&ip_poolrw); + + RADIX_NODE_HEAD_LOCK(ipo->ipo_head); + rn = ipo->ipo_head->rnh_matchaddr(&v, ipo->ipo_head); + RADIX_NODE_HEAD_UNLOCK(ipo->ipo_head); + + if ((rn != NULL) && ((rn->rn_flags & RNF_ROOT) == 0)) { + m = (ip_pool_node_t *)rn; + ipo->ipo_hits++; + m->ipn_hits++; + rv = m->ipn_info; + } + RWLOCK_EXIT(&ip_poolrw); + return rv; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: ip_pool_insert */ +/* Returns: int - 0 = success, else error */ +/* Parameters: ipo(I) - pointer to the pool getting the new node. */ +/* addr(I) - IPv4/6 address being added as a node */ +/* mask(I) - IPv4/6 netmask to with the node being added */ +/* info(I) - extra information to store in this node. */ +/* Locks: WRITE(ip_poolrw) */ +/* */ +/* Add another node to the pool given by ipo. The three parameters passed */ +/* in (addr, mask, info) shold all be stored in the node. */ +/* ------------------------------------------------------------------------ */ +int ip_pool_insert(ipo, addr, mask, info) +ip_pool_t *ipo; +addrfamily_t *addr, *mask; +int info; +{ + struct radix_node *rn; + ip_pool_node_t *x; + + ASSERT(rw_read_locked(&ip_poolrw.ipf_lk) == 0); + + KMALLOC(x, ip_pool_node_t *); + if (x == NULL) { + return ENOMEM; + } + + bzero(x, sizeof(*x)); + + x->ipn_info = info; + (void)strncpy(x->ipn_name, ipo->ipo_name, sizeof(x->ipn_name)); + + bcopy(addr, &x->ipn_addr, sizeof(*addr)); + x->ipn_addr.adf_len = sizeof(x->ipn_addr); + bcopy(mask, &x->ipn_mask, sizeof(*mask)); + x->ipn_mask.adf_len = sizeof(x->ipn_mask); + + RADIX_NODE_HEAD_LOCK(ipo->ipo_head); + rn = ipo->ipo_head->rnh_addaddr(&x->ipn_addr, &x->ipn_mask, + ipo->ipo_head, x->ipn_nodes); + RADIX_NODE_HEAD_UNLOCK(ipo->ipo_head); +#ifdef DEBUG_POOL + printf("Added %p at %p\n", x, rn); +#endif + + if (rn == NULL) { + KFREE(x); + return ENOMEM; + } + + x->ipn_next = ipo->ipo_list; + x->ipn_pnext = &ipo->ipo_list; + if (ipo->ipo_list != NULL) + ipo->ipo_list->ipn_pnext = &x->ipn_next; + ipo->ipo_list = x; + + ipoolstat.ipls_nodes++; + + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: ip_pool_create */ +/* Returns: int - 0 = success, else error */ +/* Parameters: op(I) - pointer to iplookup struct with call details */ +/* Locks: WRITE(ip_poolrw) */ +/* */ +/* Creates a new group according to the paramters passed in via the */ +/* iplookupop structure. Does not check to see if the group already exists */ +/* when being inserted - assume this has already been done. If the pool is */ +/* marked as being anonymous, give it a new, unique, identifier. Call any */ +/* other functions required to initialise the structure. */ +/* ------------------------------------------------------------------------ */ +int ip_pool_create(op) +iplookupop_t *op; +{ + char name[FR_GROUPLEN]; + int poolnum, unit; + ip_pool_t *h; + + ASSERT(rw_read_locked(&ip_poolrw.ipf_lk) == 0); + + KMALLOC(h, ip_pool_t *); + if (h == NULL) + return ENOMEM; + bzero(h, sizeof(*h)); + + if (rn_inithead((void **)&h->ipo_head, + offsetof(addrfamily_t, adf_addr) << 3) == 0) { + KFREE(h); + return ENOMEM; + } + + unit = op->iplo_unit; + + if ((op->iplo_arg & IPOOL_ANON) != 0) { + ip_pool_t *p; + + poolnum = IPOOL_ANON; + +#if defined(SNPRINTF) && defined(_KERNEL) + (void)SNPRINTF(name, sizeof(name), "%x", poolnum); +#else + (void)sprintf(name, "%x", poolnum); +#endif + + for (p = ip_pool_list[unit]; p != NULL; ) { + if (strncmp(name, p->ipo_name, + sizeof(p->ipo_name)) == 0) { + poolnum++; +#if defined(SNPRINTF) && defined(_KERNEL) + (void)SNPRINTF(name, sizeof(name), "%x", poolnum); +#else + (void)sprintf(name, "%x", poolnum); +#endif + p = ip_pool_list[unit]; + } else + p = p->ipo_next; + } + + (void)strncpy(h->ipo_name, name, sizeof(h->ipo_name)); + } else { + (void) strncpy(h->ipo_name, op->iplo_name, sizeof(h->ipo_name)); + } + + h->ipo_ref = 1; + h->ipo_list = NULL; + h->ipo_unit = unit; + h->ipo_next = ip_pool_list[unit]; + if (ip_pool_list[unit] != NULL) + ip_pool_list[unit]->ipo_pnext = &h->ipo_next; + h->ipo_pnext = &ip_pool_list[unit]; + ip_pool_list[unit] = h; + + ipoolstat.ipls_pools++; + + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: ip_pool_remove */ +/* Returns: int - 0 = success, else error */ +/* Parameters: ipo(I) - pointer to the pool to remove the node from. */ +/* ipe(I) - address being deleted as a node */ +/* Locks: WRITE(ip_poolrw) */ +/* */ +/* Add another node to the pool given by ipo. The three parameters passed */ +/* in (addr, mask, info) shold all be stored in the node. */ +/* ------------------------------------------------------------------------ */ +int ip_pool_remove(ipo, ipe) +ip_pool_t *ipo; +ip_pool_node_t *ipe; +{ + ip_pool_node_t **ipp, *n; + + ASSERT(rw_read_locked(&ip_poolrw.ipf_lk) == 0); + + for (ipp = &ipo->ipo_list; (n = *ipp) != NULL; ipp = &n->ipn_next) { + if (ipe == n) { + *n->ipn_pnext = n->ipn_next; + if (n->ipn_next) + n->ipn_next->ipn_pnext = n->ipn_pnext; + break; + } + } + + if (n == NULL) + return ENOENT; + + RADIX_NODE_HEAD_LOCK(ipo->ipo_head); + ipo->ipo_head->rnh_deladdr(&n->ipn_addr, &n->ipn_mask, + ipo->ipo_head); + RADIX_NODE_HEAD_UNLOCK(ipo->ipo_head); + KFREE(n); + + ipoolstat.ipls_nodes--; + + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: ip_pool_destroy */ +/* Returns: int - 0 = success, else error */ +/* Parameters: op(I) - information about the pool to remove */ +/* Locks: WRITE(ip_poolrw) or WRITE(ipf_global) */ +/* */ +/* Search for a pool using paramters passed in and if it's not otherwise */ +/* busy, free it. */ +/* */ +/* NOTE: Because this function is called out of ipldetach() where ip_poolrw */ +/* may not be initialised, we can't use an ASSERT to enforce the locking */ +/* assertion that one of the two (ip_poolrw,ipf_global) is held. */ +/* ------------------------------------------------------------------------ */ +int ip_pool_destroy(op) +iplookupop_t *op; +{ + ip_pool_t *ipo; + + ipo = ip_pool_find(op->iplo_unit, op->iplo_name); + if (ipo == NULL) + return ESRCH; + + if (ipo->ipo_ref != 1) + return EBUSY; + + ip_pool_free(ipo); + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: ip_pool_flush */ +/* Returns: int - number of pools deleted */ +/* Parameters: fp(I) - which pool(s) to flush */ +/* Locks: WRITE(ip_poolrw) or WRITE(ipf_global) */ +/* */ +/* Free all pools associated with the device that matches the unit number */ +/* passed in with operation. */ +/* */ +/* NOTE: Because this function is called out of ipldetach() where ip_poolrw */ +/* may not be initialised, we can't use an ASSERT to enforce the locking */ +/* assertion that one of the two (ip_poolrw,ipf_global) is held. */ +/* ------------------------------------------------------------------------ */ +int ip_pool_flush(fp) +iplookupflush_t *fp; +{ + int i, num = 0, unit, err; + ip_pool_t *p, *q; + iplookupop_t op; + + unit = fp->iplf_unit; + + for (i = 0; i <= IPL_LOGMAX; i++) { + if (unit != IPLT_ALL && i != unit) + continue; + for (q = ip_pool_list[i]; (p = q) != NULL; ) { + op.iplo_unit = i; + (void)strncpy(op.iplo_name, p->ipo_name, + sizeof(op.iplo_name)); + q = p->ipo_next; + err = ip_pool_destroy(&op); + if (err == 0) + num++; + else + break; + } + } + return num; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: ip_pool_free */ +/* Returns: void */ +/* Parameters: ipo(I) - pointer to pool structure */ +/* Locks: WRITE(ip_poolrw) or WRITE(ipf_global) */ +/* */ +/* Deletes the pool strucutre passed in from the list of pools and deletes */ +/* all of the address information stored in it, including any tree data */ +/* structures also allocated. */ +/* */ +/* NOTE: Because this function is called out of ipldetach() where ip_poolrw */ +/* may not be initialised, we can't use an ASSERT to enforce the locking */ +/* assertion that one of the two (ip_poolrw,ipf_global) is held. */ +/* ------------------------------------------------------------------------ */ +void ip_pool_free(ipo) +ip_pool_t *ipo; +{ + ip_pool_node_t *n; + + RADIX_NODE_HEAD_LOCK(ipo->ipo_head); + while ((n = ipo->ipo_list) != NULL) { + ipo->ipo_head->rnh_deladdr(&n->ipn_addr, &n->ipn_mask, + ipo->ipo_head); + + *n->ipn_pnext = n->ipn_next; + if (n->ipn_next) + n->ipn_next->ipn_pnext = n->ipn_pnext; + + KFREE(n); + + ipoolstat.ipls_nodes--; + } + RADIX_NODE_HEAD_UNLOCK(ipo->ipo_head); + + ipo->ipo_list = NULL; + if (ipo->ipo_next != NULL) + ipo->ipo_next->ipo_pnext = ipo->ipo_pnext; + *ipo->ipo_pnext = ipo->ipo_next; + rn_freehead(ipo->ipo_head); + KFREE(ipo); + + ipoolstat.ipls_pools--; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: ip_pool_deref */ +/* Returns: void */ +/* Parameters: ipo(I) - pointer to pool structure */ +/* Locks: WRITE(ip_poolrw) */ +/* */ +/* Drop the number of known references to this pool structure by one and if */ +/* we arrive at zero known references, free it. */ +/* ------------------------------------------------------------------------ */ +void ip_pool_deref(ipo) +ip_pool_t *ipo; +{ + + ASSERT(rw_read_locked(&ip_poolrw.ipf_lk) == 0); + + ipo->ipo_ref--; + if (ipo->ipo_ref == 0) + ip_pool_free(ipo); +} + + +# if defined(_KERNEL) && ((BSD >= 198911) && !defined(__osf__) && \ + !defined(__hpux) && !defined(__sgi)) +static int +rn_freenode(struct radix_node *n, void *p) +{ + struct radix_node_head *rnh = p; + struct radix_node *d; + + d = rnh->rnh_deladdr(n->rn_key, NULL, rnh); + if (d != NULL) { + FreeS(d, max_keylen + 2 * sizeof (*d)); + } + return 0; +} + + +void +rn_freehead(rnh) + struct radix_node_head *rnh; +{ + + RADIX_NODE_HEAD_LOCK(rnh); + (*rnh->rnh_walktree)(rnh, rn_freenode, rnh); + + rnh->rnh_addaddr = NULL; + rnh->rnh_deladdr = NULL; + rnh->rnh_matchaddr = NULL; + rnh->rnh_lookup = NULL; + rnh->rnh_walktree = NULL; + RADIX_NODE_HEAD_UNLOCK(rnh); + + Free(rnh); +} +# endif + +#endif /* IPFILTER_LOOKUP */ diff --git a/usr/src/uts/common/inet/ipf/ip_proxy.c b/usr/src/uts/common/inet/ipf/ip_proxy.c new file mode 100644 index 0000000000..99f8a14b88 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/ip_proxy.c @@ -0,0 +1,858 @@ +/* + * Copyright (C) 1997-2003 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#if defined(KERNEL) || defined(_KERNEL) +# undef KERNEL +# undef _KERNEL +# define KERNEL 1 +# define _KERNEL 1 +#endif +#include <sys/errno.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/time.h> +#include <sys/file.h> +#if !defined(AIX) +# include <sys/fcntl.h> +#endif +#if !defined(_KERNEL) && !defined(__KERNEL__) +# include <stdio.h> +# include <string.h> +# include <stdlib.h> +# include <ctype.h> +# define _KERNEL +# ifdef __OpenBSD__ +struct file; +# endif +# include <sys/uio.h> +# undef _KERNEL +#endif +#if !defined(linux) +# include <sys/protosw.h> +#endif +#include <sys/socket.h> +#if defined(_KERNEL) +# if !defined(__NetBSD__) && !defined(sun) && !defined(__osf__) && \ + !defined(__OpenBSD__) && !defined(__hpux) && !defined(__sgi) && \ + !defined(AIX) +# include <sys/ctype.h> +# endif +# include <sys/systm.h> +# if !defined(__SVR4) && !defined(__svr4__) +# include <sys/mbuf.h> +# endif +#endif +#if defined(_KERNEL) && (__FreeBSD_version >= 220000) +# include <sys/filio.h> +# include <sys/fcntl.h> +# if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM) +# include "opt_ipfilter.h" +# endif +#else +# include <sys/ioctl.h> +#endif +#if defined(__SVR4) || defined(__svr4__) +# include <sys/byteorder.h> +# ifdef _KERNEL +# include <sys/dditypes.h> +# endif +# include <sys/stream.h> +# include <sys/kmem.h> +#endif +#if __FreeBSD__ > 2 +# include <sys/queue.h> +#endif +#include <net/if.h> +#ifdef sun +# include <net/af.h> +#endif +#include <net/route.h> +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#ifndef linux +# include <netinet/ip_var.h> +#endif +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <netinet/ip_icmp.h> +#include "netinet/ip_compat.h" +#include <netinet/tcpip.h> +#include "netinet/ip_fil.h" +#include "netinet/ip_nat.h" +#include "netinet/ip_state.h" +#include "netinet/ip_proxy.h" +#if (__FreeBSD_version >= 300000) +# include <sys/malloc.h> +#endif + +#include "netinet/ip_ftp_pxy.c" +#include "netinet/ip_rcmd_pxy.c" +# include "netinet/ip_pptp_pxy.c" +#if defined(_KERNEL) +# include "netinet/ip_irc_pxy.c" +# include "netinet/ip_raudio_pxy.c" +# include "netinet/ip_h323_pxy.c" +# include "netinet/ip_netbios_pxy.c" +#endif +#include "netinet/ip_ipsec_pxy.c" +#include "netinet/ip_rpcb_pxy.c" + +/* END OF INCLUDES */ + +#if !defined(lint) +static const char rcsid[] = "@(#)$Id: ip_proxy.c,v 2.62.2.14 2005/06/18 02:41:33 darrenr Exp $"; +#endif + +static int appr_fixseqack __P((fr_info_t *, ip_t *, ap_session_t *, int )); + +#define AP_SESS_SIZE 53 + +#if defined(_KERNEL) +int ipf_proxy_debug = 0; +#else +int ipf_proxy_debug = 2; +#endif +ap_session_t *ap_sess_tab[AP_SESS_SIZE]; +ap_session_t *ap_sess_list = NULL; +aproxy_t *ap_proxylist = NULL; +aproxy_t ap_proxies[] = { +#ifdef IPF_FTP_PROXY + { NULL, "ftp", (char)IPPROTO_TCP, 0, 0, ippr_ftp_init, ippr_ftp_fini, + ippr_ftp_new, NULL, ippr_ftp_in, ippr_ftp_out, NULL }, +#endif +#ifdef IPF_IRC_PROXY + { NULL, "irc", (char)IPPROTO_TCP, 0, 0, ippr_irc_init, ippr_irc_fini, + ippr_irc_new, NULL, NULL, ippr_irc_out, NULL, NULL }, +#endif +#ifdef IPF_RCMD_PROXY + { NULL, "rcmd", (char)IPPROTO_TCP, 0, 0, ippr_rcmd_init, ippr_rcmd_fini, + ippr_rcmd_new, NULL, ippr_rcmd_in, ippr_rcmd_out, NULL, NULL }, +#endif +#ifdef IPF_RAUDIO_PROXY + { NULL, "raudio", (char)IPPROTO_TCP, 0, 0, ippr_raudio_init, ippr_raudio_fini, + ippr_raudio_new, NULL, ippr_raudio_in, ippr_raudio_out, NULL, NULL }, +#endif +#ifdef IPF_MSNRPC_PROXY + { NULL, "msnrpc", (char)IPPROTO_TCP, 0, 0, ippr_msnrpc_init, ippr_msnrpc_fini, + ippr_msnrpc_new, NULL, ippr_msnrpc_in, ippr_msnrpc_out, NULL, NULL }, +#endif +#ifdef IPF_NETBIOS_PROXY + { NULL, "netbios", (char)IPPROTO_UDP, 0, 0, ippr_netbios_init, ippr_netbios_fini, + NULL, NULL, NULL, ippr_netbios_out, NULL, NULL }, +#endif +#ifdef IPF_IPSEC_PROXY + { NULL, "ipsec", (char)IPPROTO_UDP, 0, 0, + ippr_ipsec_init, ippr_ipsec_fini, ippr_ipsec_new, ippr_ipsec_del, + ippr_ipsec_inout, ippr_ipsec_inout, ippr_ipsec_match, NULL }, +#endif +#ifdef IPF_PPTP_PROXY + { NULL, "pptp", (char)IPPROTO_TCP, 0, 0, + ippr_pptp_init, ippr_pptp_fini, ippr_pptp_new, ippr_pptp_del, + ippr_pptp_inout, ippr_pptp_inout, NULL, NULL }, +#endif +#ifdef IPF_H323_PROXY + { NULL, "h323", (char)IPPROTO_TCP, 0, 0, ippr_h323_init, ippr_h323_fini, + ippr_h323_new, ippr_h323_del, ippr_h323_in, NULL, NULL }, + { NULL, "h245", (char)IPPROTO_TCP, 0, 0, NULL, NULL, + ippr_h245_new, NULL, NULL, ippr_h245_out, NULL }, +#endif +#ifdef IPF_RPCB_PROXY +# if 0 + { NULL, "rpcbt", (char)IPPROTO_TCP, 0, 0, + ippr_rpcb_init, ippr_rpcb_fini, ippr_rpcb_new, ippr_rpcb_del, + ippr_rpcb_in, ippr_rpcb_out, NULL, NULL }, +# endif + { NULL, "rpcbu", (char)IPPROTO_UDP, 0, 0, + ippr_rpcb_init, ippr_rpcb_fini, ippr_rpcb_new, ippr_rpcb_del, + ippr_rpcb_in, ippr_rpcb_out, NULL, NULL }, +#endif + { NULL, "", '\0', 0, 0, NULL, NULL, NULL, NULL } +}; + +/* + * Dynamically add a new kernel proxy. Ensure that it is unique in the + * collection compiled in and dynamically added. + */ +int appr_add(ap) +aproxy_t *ap; +{ + aproxy_t *a; + + for (a = ap_proxies; a->apr_p; a++) + if ((a->apr_p == ap->apr_p) && + !strncmp(a->apr_label, ap->apr_label, + sizeof(ap->apr_label))) { + if (ipf_proxy_debug > 1) + printf("appr_add: %s/%d already present (B)\n", + a->apr_label, a->apr_p); + return -1; + } + + for (a = ap_proxylist; a->apr_p; a = a->apr_next) + if ((a->apr_p == ap->apr_p) && + !strncmp(a->apr_label, ap->apr_label, + sizeof(ap->apr_label))) { + if (ipf_proxy_debug > 1) + printf("appr_add: %s/%d already present (D)\n", + a->apr_label, a->apr_p); + return -1; + } + ap->apr_next = ap_proxylist; + ap_proxylist = ap; + if (ap->apr_init != NULL) + return (*ap->apr_init)(); + return 0; +} + + +/* + * Check to see if the proxy this control request has come through for + * exists, and if it does and it has a control function then invoke that + * control function. + */ +int appr_ctl(ctl) +ap_ctl_t *ctl; +{ + aproxy_t *a; + int error; + + a = appr_lookup(ctl->apc_p, ctl->apc_label); + if (a == NULL) { + if (ipf_proxy_debug > 1) + printf("appr_ctl: can't find %s/%d\n", + ctl->apc_label, ctl->apc_p); + error = ESRCH; + } else if (a->apr_ctl == NULL) { + if (ipf_proxy_debug > 1) + printf("appr_ctl: no ctl function for %s/%d\n", + ctl->apc_label, ctl->apc_p); + error = ENXIO; + } else { + error = (*a->apr_ctl)(a, ctl); + if ((error != 0) && (ipf_proxy_debug > 1)) + printf("appr_ctl: %s/%d ctl error %d\n", + a->apr_label, a->apr_p, error); + } + return error; +} + + +/* + * Delete a proxy that has been added dynamically from those available. + * If it is in use, return 1 (do not destroy NOW), not in use 0 or -1 + * if it cannot be matched. + */ +int appr_del(ap) +aproxy_t *ap; +{ + aproxy_t *a, **app; + + for (app = &ap_proxylist; ((a = *app) != NULL); app = &a->apr_next) + if (a == ap) { + a->apr_flags |= APR_DELETE; + *app = a->apr_next; + if (ap->apr_ref != 0) { + if (ipf_proxy_debug > 2) + printf("appr_del: orphaning %s/%d\n", + ap->apr_label, ap->apr_p); + return 1; + } + return 0; + } + if (ipf_proxy_debug > 1) + printf("appr_del: proxy %lx not found\n", (u_long)ap); + return -1; +} + + +/* + * Return 1 if the packet is a good match against a proxy, else 0. + */ +int appr_ok(fin, tcp, nat) +fr_info_t *fin; +tcphdr_t *tcp; +ipnat_t *nat; +{ + aproxy_t *apr = nat->in_apr; + u_short dport = nat->in_dport; + + if ((apr == NULL) || (apr->apr_flags & APR_DELETE) || + (fin->fin_p != apr->apr_p)) + return 0; + if ((tcp == NULL) && dport) + return 0; + return 1; +} + + +int appr_ioctl(data, cmd, mode) +caddr_t data; +ioctlcmd_t cmd; +int mode; +{ + ap_ctl_t ctl; + caddr_t ptr; + int error; + + mode = mode; /* LINT */ + + switch (cmd) + { + case SIOCPROXY : + (void) BCOPYIN(data, &ctl, sizeof(ctl)); + ptr = NULL; + + if (ctl.apc_dsize > 0) { + KMALLOCS(ptr, caddr_t, ctl.apc_dsize); + if (ptr == NULL) + error = ENOMEM; + else { + error = copyinptr(ctl.apc_data, ptr, + ctl.apc_dsize); + if (error == 0) + ctl.apc_data = ptr; + } + } else { + ctl.apc_data = NULL; + error = 0; + } + + if (error == 0) + error = appr_ctl(&ctl); + + if ((ctl.apc_dsize > 0) && (ptr != NULL) && + (ctl.apc_data == ptr)) { + KFREES(ptr, ctl.apc_dsize); + } + break; + + default : + error = EINVAL; + } + return error; +} + + +/* + * If a proxy has a match function, call that to do extended packet + * matching. + */ +int appr_match(fin, nat) +fr_info_t *fin; +nat_t *nat; +{ + aproxy_t *apr; + ipnat_t *ipn; + int result; + + ipn = nat->nat_ptr; + if (ipf_proxy_debug > 8) + printf("appr_match(%lx,%lx) aps %lx ptr %lx\n", + (u_long)fin, (u_long)nat, (u_long)nat->nat_aps, + (u_long)ipn); + + if ((fin->fin_flx & (FI_SHORT|FI_BAD)) != 0) { + if (ipf_proxy_debug > 0) + printf("appr_match: flx 0x%x (BAD|SHORT)\n", + fin->fin_flx); + return -1; + } + + apr = ipn->in_apr; + if ((apr == NULL) || (apr->apr_flags & APR_DELETE)) { + if (ipf_proxy_debug > 0) + printf("appr_match:apr %lx apr_flags 0x%x\n", + (u_long)apr, apr ? apr->apr_flags : 0); + return -1; + } + + if (apr->apr_match != NULL) { + result = (*apr->apr_match)(fin, nat->nat_aps, nat); + if (result != 0) { + if (ipf_proxy_debug > 4) + printf("appr_match: result %d\n", result); + return -1; + } + } + return 0; +} + + +/* + * Allocate a new application proxy structure and fill it in with the + * relevant details. call the init function once complete, prior to + * returning. + */ +int appr_new(fin, nat) +fr_info_t *fin; +nat_t *nat; +{ + register ap_session_t *aps; + aproxy_t *apr; + + if (ipf_proxy_debug > 8) + printf("appr_new(%lx,%lx) \n", (u_long)fin, (u_long)nat); + + if ((nat->nat_ptr == NULL) || (nat->nat_aps != NULL)) { + if (ipf_proxy_debug > 0) + printf("appr_new: nat_ptr %lx nat_aps %lx\n", + (u_long)nat->nat_ptr, (u_long)nat->nat_aps); + return -1; + } + + apr = nat->nat_ptr->in_apr; + + if ((apr->apr_flags & APR_DELETE) || + (fin->fin_p != apr->apr_p)) { + if (ipf_proxy_debug > 2) + printf("appr_new: apr_flags 0x%x p %d/%d\n", + apr->apr_flags, fin->fin_p, apr->apr_p); + return -1; + } + + KMALLOC(aps, ap_session_t *); + if (!aps) { + if (ipf_proxy_debug > 0) + printf("appr_new: malloc failed (%lu)\n", + (u_long)sizeof(ap_session_t)); + return -1; + } + + bzero((char *)aps, sizeof(*aps)); + aps->aps_p = fin->fin_p; + aps->aps_data = NULL; + aps->aps_apr = apr; + aps->aps_psiz = 0; + if (apr->apr_new != NULL) + if ((*apr->apr_new)(fin, aps, nat) == -1) { + if ((aps->aps_data != NULL) && (aps->aps_psiz != 0)) { + KFREES(aps->aps_data, aps->aps_psiz); + } + KFREE(aps); + if (ipf_proxy_debug > 2) + printf("appr_new: new(%lx) failed\n", + (u_long)apr->apr_new); + return -1; + } + aps->aps_nat = nat; + aps->aps_next = ap_sess_list; + ap_sess_list = aps; + nat->nat_aps = aps; + + return 0; +} + + +/* + * Check to see if a packet should be passed through an active proxy routine + * if one has been setup for it. We don't need to check the checksum here if + * IPFILTER_CKSUM is defined because if it is, a failed check causes FI_BAD + * to be set. + */ +int appr_check(fin, nat) +fr_info_t *fin; +nat_t *nat; +{ +#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) +# if defined(ICK_VALID) + mb_t *m; +# endif + int dosum = 1; +#endif + tcphdr_t *tcp = NULL; + udphdr_t *udp = NULL; + ap_session_t *aps; + aproxy_t *apr; + ip_t *ip; + short rv; + int err; +#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) + u_32_t s1, s2, sd; +#endif + + if (fin->fin_flx & FI_BAD) { + if (ipf_proxy_debug > 0) + printf("appr_check: flx 0x%x (BAD)\n", fin->fin_flx); + return -1; + } + +#ifndef IPFILTER_CKSUM + if ((fin->fin_out == 0) && (fr_checkl4sum(fin) == -1)) { + if (ipf_proxy_debug > 0) + printf("appr_check: l4 checksum failure %d\n", + fin->fin_p); + if (fin->fin_p == IPPROTO_TCP) + frstats[fin->fin_out].fr_tcpbad++; + return -1; + } +#endif + + aps = nat->nat_aps; + if ((aps != NULL) && (aps->aps_p == fin->fin_p)) { + /* + * If there is data in this packet to be proxied then try and + * get it all into the one buffer, else drop it. + */ +#if defined(MENTAT) || defined(HAVE_M_PULLDOWN) + if ((fin->fin_dlen > 0) && !(fin->fin_flx & FI_COALESCE)) + if (fr_coalesce(fin) == -1) { + if (ipf_proxy_debug > 0) + printf("appr_check: fr_coalesce failed %x\n", fin->fin_flx); + return -1; + } +#endif + ip = fin->fin_ip; + + switch (fin->fin_p) + { + case IPPROTO_TCP : + tcp = (tcphdr_t *)fin->fin_dp; + +#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_VALID) + m = fin->fin_qfm; + if (dohwcksum && (m->b_ick_flag == ICK_VALID)) + dosum = 0; +#endif + /* + * Don't bother the proxy with these...or in fact, + * should we free up proxy stuff when seen? + */ + if ((fin->fin_tcpf & TH_RST) != 0) + break; + /*FALLTHROUGH*/ + case IPPROTO_UDP : + udp = (udphdr_t *)fin->fin_dp; + break; + default : + break; + } + + apr = aps->aps_apr; + err = 0; + if (fin->fin_out != 0) { + if (apr->apr_outpkt != NULL) + err = (*apr->apr_outpkt)(fin, aps, nat); + } else { + if (apr->apr_inpkt != NULL) + err = (*apr->apr_inpkt)(fin, aps, nat); + } + + rv = APR_EXIT(err); + if (((ipf_proxy_debug > 0) && (rv != 0)) || + (ipf_proxy_debug > 8)) + printf("appr_check: out %d err %x rv %d\n", + fin->fin_out, err, rv); + if (rv == 1) + return -1; + + if (rv == 2) { + appr_free(apr); + nat->nat_aps = NULL; + return -1; + } + + /* + * If err != 0 then the data size of the packet has changed + * so we need to recalculate the header checksums for the + * packet. + */ +#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) + if (err != 0) { + short adjlen = err & 0xffff; + + s1 = LONG_SUM(ip->ip_len - adjlen); + s2 = LONG_SUM(ip->ip_len); + CALC_SUMD(s1, s2, sd); + fix_outcksum(fin, &ip->ip_sum, sd); + } +#endif + + /* + * For TCP packets, we may need to adjust the sequence and + * acknowledgement numbers to reflect changes in size of the + * data stream. + * + * For both TCP and UDP, recalculate the layer 4 checksum, + * regardless, as we can't tell (here) if data has been + * changed or not. + */ + if (tcp != NULL) { + err = appr_fixseqack(fin, ip, aps, APR_INC(err)); +#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) + if (dosum) + tcp->th_sum = fr_cksum(fin->fin_qfm, ip, + IPPROTO_TCP, tcp); +#else + tcp->th_sum = fr_cksum(fin->fin_m, ip, + IPPROTO_TCP, tcp); +#endif + } else if ((udp != NULL) && (udp->uh_sum != 0)) { +#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) + if (dosum) + udp->uh_sum = fr_cksum(fin->fin_qfm, ip, + IPPROTO_UDP, udp); +#else + udp->uh_sum = fr_cksum(fin->fin_m, ip, + IPPROTO_UDP, udp); +#endif + } + aps->aps_bytes += fin->fin_plen; + aps->aps_pkts++; + return 1; + } + return 0; +} + + +/* + * Search for an proxy by the protocol it is being used with and its name. + */ +aproxy_t *appr_lookup(pr, name) +u_int pr; +char *name; +{ + aproxy_t *ap; + + if (ipf_proxy_debug > 8) + printf("appr_lookup(%d,%s)\n", pr, name); + + for (ap = ap_proxies; ap->apr_p; ap++) + if ((ap->apr_p == pr) && + !strncmp(name, ap->apr_label, sizeof(ap->apr_label))) { + ap->apr_ref++; + return ap; + } + + for (ap = ap_proxylist; ap; ap = ap->apr_next) + if ((ap->apr_p == pr) && + !strncmp(name, ap->apr_label, sizeof(ap->apr_label))) { + ap->apr_ref++; + return ap; + } + if (ipf_proxy_debug > 2) + printf("appr_lookup: failed for %d/%s\n", pr, name); + return NULL; +} + + +void appr_free(ap) +aproxy_t *ap; +{ + ap->apr_ref--; +} + + +void aps_free(aps) +ap_session_t *aps; +{ + ap_session_t *a, **ap; + aproxy_t *apr; + + if (!aps) + return; + + for (ap = &ap_sess_list; ((a = *ap) != NULL); ap = &a->aps_next) + if (a == aps) { + *ap = a->aps_next; + break; + } + + apr = aps->aps_apr; + if ((apr != NULL) && (apr->apr_del != NULL)) + (*apr->apr_del)(aps); + + if ((aps->aps_data != NULL) && (aps->aps_psiz != 0)) + KFREES(aps->aps_data, aps->aps_psiz); + KFREE(aps); +} + + +/* + * returns 2 if ack or seq number in TCP header is changed, returns 0 otherwise + */ +static int appr_fixseqack(fin, ip, aps, inc) +fr_info_t *fin; +ip_t *ip; +ap_session_t *aps; +int inc; +{ + int sel, ch = 0, out, nlen; + u_32_t seq1, seq2; + tcphdr_t *tcp; + short inc2; + + tcp = (tcphdr_t *)fin->fin_dp; + out = fin->fin_out; + /* + * ip_len has already been adjusted by 'inc'. + */ + nlen = ip->ip_len; + nlen -= (IP_HL(ip) << 2) + (TCP_OFF(tcp) << 2); + + inc2 = inc; + inc = (int)inc2; + + if (out != 0) { + seq1 = (u_32_t)ntohl(tcp->th_seq); + sel = aps->aps_sel[out]; + + /* switch to other set ? */ + if ((aps->aps_seqmin[!sel] > aps->aps_seqmin[sel]) && + (seq1 > aps->aps_seqmin[!sel])) { + if (ipf_proxy_debug > 7) + printf("proxy out switch set seq %d -> %d %x > %x\n", + sel, !sel, seq1, + aps->aps_seqmin[!sel]); + sel = aps->aps_sel[out] = !sel; + } + + if (aps->aps_seqoff[sel]) { + seq2 = aps->aps_seqmin[sel] - aps->aps_seqoff[sel]; + if (seq1 > seq2) { + seq2 = aps->aps_seqoff[sel]; + seq1 += seq2; + tcp->th_seq = htonl(seq1); + ch = 1; + } + } + + if (inc && (seq1 > aps->aps_seqmin[!sel])) { + aps->aps_seqmin[sel] = seq1 + nlen - 1; + aps->aps_seqoff[sel] = aps->aps_seqoff[sel] + inc; + if (ipf_proxy_debug > 7) + printf("proxy seq set %d at %x to %d + %d\n", + sel, aps->aps_seqmin[sel], + aps->aps_seqoff[sel], inc); + } + + /***/ + + seq1 = ntohl(tcp->th_ack); + sel = aps->aps_sel[1 - out]; + + /* switch to other set ? */ + if ((aps->aps_ackmin[!sel] > aps->aps_ackmin[sel]) && + (seq1 > aps->aps_ackmin[!sel])) { + if (ipf_proxy_debug > 7) + printf("proxy out switch set ack %d -> %d %x > %x\n", + sel, !sel, seq1, + aps->aps_ackmin[!sel]); + sel = aps->aps_sel[1 - out] = !sel; + } + + if (aps->aps_ackoff[sel] && (seq1 > aps->aps_ackmin[sel])) { + seq2 = aps->aps_ackoff[sel]; + tcp->th_ack = htonl(seq1 - seq2); + ch = 1; + } + } else { + seq1 = ntohl(tcp->th_seq); + sel = aps->aps_sel[out]; + + /* switch to other set ? */ + if ((aps->aps_ackmin[!sel] > aps->aps_ackmin[sel]) && + (seq1 > aps->aps_ackmin[!sel])) { + if (ipf_proxy_debug > 7) + printf("proxy in switch set ack %d -> %d %x > %x\n", + sel, !sel, seq1, aps->aps_ackmin[!sel]); + sel = aps->aps_sel[out] = !sel; + } + + if (aps->aps_ackoff[sel]) { + seq2 = aps->aps_ackmin[sel] - aps->aps_ackoff[sel]; + if (seq1 > seq2) { + seq2 = aps->aps_ackoff[sel]; + seq1 += seq2; + tcp->th_seq = htonl(seq1); + ch = 1; + } + } + + if (inc && (seq1 > aps->aps_ackmin[!sel])) { + aps->aps_ackmin[!sel] = seq1 + nlen - 1; + aps->aps_ackoff[!sel] = aps->aps_ackoff[sel] + inc; + + if (ipf_proxy_debug > 7) + printf("proxy ack set %d at %x to %d + %d\n", + !sel, aps->aps_seqmin[!sel], + aps->aps_seqoff[sel], inc); + } + + /***/ + + seq1 = ntohl(tcp->th_ack); + sel = aps->aps_sel[1 - out]; + + /* switch to other set ? */ + if ((aps->aps_seqmin[!sel] > aps->aps_seqmin[sel]) && + (seq1 > aps->aps_seqmin[!sel])) { + if (ipf_proxy_debug > 7) + printf("proxy in switch set seq %d -> %d %x > %x\n", + sel, !sel, seq1, aps->aps_seqmin[!sel]); + sel = aps->aps_sel[1 - out] = !sel; + } + + if (aps->aps_seqoff[sel] != 0) { + if (ipf_proxy_debug > 7) + printf("sel %d seqoff %d seq1 %x seqmin %x\n", + sel, aps->aps_seqoff[sel], seq1, + aps->aps_seqmin[sel]); + if (seq1 > aps->aps_seqmin[sel]) { + seq2 = aps->aps_seqoff[sel]; + tcp->th_ack = htonl(seq1 - seq2); + ch = 1; + } + } + } + + if (ipf_proxy_debug > 8) + printf("appr_fixseqack: seq %x ack %x\n", + ntohl(tcp->th_seq), ntohl(tcp->th_ack)); + return ch ? 2 : 0; +} + + +/* + * Initialise hook for kernel application proxies. + * Call the initialise routine for all the compiled in kernel proxies. + */ +int appr_init() +{ + aproxy_t *ap; + int err = 0; + + for (ap = ap_proxies; ap->apr_p; ap++) { + if (ap->apr_init != NULL) { + err = (*ap->apr_init)(); + if (err != 0) + break; + } + } + return err; +} + + +/* + * Unload hook for kernel application proxies. + * Call the finialise routine for all the compiled in kernel proxies. + */ +void appr_unload() +{ + aproxy_t *ap; + + for (ap = ap_proxies; ap->apr_p; ap++) + if (ap->apr_fini != NULL) + (*ap->apr_fini)(); + for (ap = ap_proxylist; ap; ap = ap->apr_next) + if (ap->apr_fini != NULL) + (*ap->apr_fini)(); +} diff --git a/usr/src/uts/common/inet/ipf/ip_state.c b/usr/src/uts/common/inet/ipf/ip_state.c new file mode 100644 index 0000000000..9a5586a208 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/ip_state.c @@ -0,0 +1,3806 @@ +/* + * Copyright (C) 1995-2003 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#if defined(KERNEL) || defined(_KERNEL) +# undef KERNEL +# undef _KERNEL +# define KERNEL 1 +# define _KERNEL 1 +#endif +#include <sys/errno.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/file.h> +#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ + defined(_KERNEL) +# include "opt_ipfilter_log.h" +#endif +#if defined(_KERNEL) && defined(__FreeBSD_version) && \ + (__FreeBSD_version >= 400000) && !defined(KLD_MODULE) +#include "opt_inet6.h" +#endif +#if !defined(_KERNEL) && !defined(__KERNEL__) +# include <stdio.h> +# include <stdlib.h> +# include <string.h> +# define _KERNEL +# ifdef __OpenBSD__ +struct file; +# endif +# include <sys/uio.h> +# undef _KERNEL +#endif +#if defined(_KERNEL) && (__FreeBSD_version >= 220000) +# include <sys/filio.h> +# include <sys/fcntl.h> +# if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM) +# include "opt_ipfilter.h" +# endif +#else +# include <sys/ioctl.h> +#endif +#include <sys/time.h> +#if !defined(linux) +# include <sys/protosw.h> +#endif +#include <sys/socket.h> +#if defined(_KERNEL) +# include <sys/systm.h> +# if !defined(__SVR4) && !defined(__svr4__) +# include <sys/mbuf.h> +# endif +#endif +#if defined(__SVR4) || defined(__svr4__) +# include <sys/filio.h> +# include <sys/byteorder.h> +# ifdef _KERNEL +# include <sys/dditypes.h> +# endif +# include <sys/stream.h> +# include <sys/kmem.h> +#endif + +#include <net/if.h> +#ifdef sun +# include <net/af.h> +#endif +#include <net/route.h> +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/tcp.h> +#if !defined(linux) +# include <netinet/ip_var.h> +#endif +#if !defined(__hpux) && !defined(linux) +# include <netinet/tcp_fsm.h> +#endif +#include <netinet/udp.h> +#include <netinet/ip_icmp.h> +#include "netinet/ip_compat.h" +#include <netinet/tcpip.h> +#include "netinet/ip_fil.h" +#include "netinet/ip_nat.h" +#include "netinet/ip_frag.h" +#include "netinet/ip_state.h" +#include "netinet/ip_proxy.h" +#ifdef IPFILTER_SYNC +#include "netinet/ip_sync.h" +#endif +#ifdef IPFILTER_SCAN +#include "netinet/ip_scan.h" +#endif +#ifdef USE_INET6 +#include <netinet/icmp6.h> +#endif +#if (__FreeBSD_version >= 300000) +# include <sys/malloc.h> +# if defined(_KERNEL) && !defined(IPFILTER_LKM) +# include <sys/libkern.h> +# include <sys/systm.h> +# endif +#endif +/* END OF INCLUDES */ + + +#if !defined(lint) +static const char sccsid[] = "@(#)ip_state.c 1.8 6/5/96 (C) 1993-2000 Darren Reed"; +static const char rcsid[] = "@(#)$Id: ip_state.c,v 2.186.2.36 2005/08/11 19:58:03 darrenr Exp $"; +#endif + +static ipstate_t **ips_table = NULL; +static u_long *ips_seed = NULL; +static int ips_num = 0; +static u_long ips_last_force_flush = 0; +ips_stat_t ips_stats; + +#ifdef USE_INET6 +static ipstate_t *fr_checkicmp6matchingstate __P((fr_info_t *)); +#endif +static ipstate_t *fr_matchsrcdst __P((fr_info_t *, ipstate_t *, i6addr_t *, + i6addr_t *, tcphdr_t *, u_32_t)); +static ipstate_t *fr_checkicmpmatchingstate __P((fr_info_t *)); +static int fr_state_flush __P((int, int)); +static ips_stat_t *fr_statetstats __P((void)); +static void fr_delstate __P((ipstate_t *, int)); +static int fr_state_remove __P((caddr_t)); +static void fr_ipsmove __P((ipstate_t *, u_int)); +static int fr_tcpstate __P((fr_info_t *, tcphdr_t *, ipstate_t *)); +static int fr_tcpoptions __P((fr_info_t *, tcphdr_t *, tcpdata_t *)); +static ipstate_t *fr_stclone __P((fr_info_t *, tcphdr_t *, ipstate_t *)); +static void fr_fixinisn __P((fr_info_t *, ipstate_t *)); +static void fr_fixoutisn __P((fr_info_t *, ipstate_t *)); +static void fr_checknewisn __P((fr_info_t *, ipstate_t *)); + +int fr_stputent __P((caddr_t)); +int fr_stgetent __P((caddr_t)); + +#define ONE_DAY IPF_TTLVAL(1 * 86400) /* 1 day */ +#define FIVE_DAYS (5 * ONE_DAY) +#define DOUBLE_HASH(x) (((x) + ips_seed[(x) % fr_statesize]) % fr_statesize) + +u_long fr_tcpidletimeout = FIVE_DAYS, + fr_tcpclosewait = IPF_TTLVAL(2 * TCP_MSL), + fr_tcplastack = IPF_TTLVAL(2 * TCP_MSL), + fr_tcptimeout = IPF_TTLVAL(2 * TCP_MSL), + fr_tcpclosed = IPF_TTLVAL(60), + fr_tcphalfclosed = IPF_TTLVAL(2 * 3600), /* 2 hours */ + fr_udptimeout = IPF_TTLVAL(120), + fr_udpacktimeout = IPF_TTLVAL(12), + fr_icmptimeout = IPF_TTLVAL(60), + fr_icmpacktimeout = IPF_TTLVAL(6), + fr_iptimeout = IPF_TTLVAL(60); +int fr_statemax = IPSTATE_MAX, + fr_statesize = IPSTATE_SIZE; +int fr_state_doflush = 0, + fr_state_lock = 0, + fr_state_maxbucket = 0, + fr_state_maxbucket_reset = 1, + fr_state_init = 0; +ipftq_t ips_tqtqb[IPF_TCP_NSTATES], + ips_udptq, + ips_udpacktq, + ips_iptq, + ips_icmptq, + ips_icmpacktq, + *ips_utqe = NULL; +#ifdef IPFILTER_LOG +int ipstate_logging = 1; +#else +int ipstate_logging = 0; +#endif +ipstate_t *ips_list = NULL; + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_stateinit */ +/* Returns: int - 0 == success, -1 == failure */ +/* Parameters: Nil */ +/* */ +/* Initialise all the global variables used within the state code. */ +/* This action also includes initiailising locks. */ +/* ------------------------------------------------------------------------ */ +int fr_stateinit() +{ + int i; + + KMALLOCS(ips_table, ipstate_t **, fr_statesize * sizeof(ipstate_t *)); + if (ips_table == NULL) + return -1; + bzero((char *)ips_table, fr_statesize * sizeof(ipstate_t *)); + + KMALLOCS(ips_seed, u_long *, fr_statesize * sizeof(*ips_seed)); + if (ips_seed == NULL) + return -2; + for (i = 0; i < fr_statesize; i++) { + /* + * XXX - ips_seed[X] should be a random number of sorts. + */ +#if (__FreeBSD_version >= 400000) + ips_seed[i] = arc4random(); +#else + ips_seed[i] = ((u_long)ips_seed + i) * fr_statesize; + ips_seed[i] ^= 0xa5a55a5a; + ips_seed[i] *= (u_long)ips_seed; + ips_seed[i] ^= 0x5a5aa5a5; + ips_seed[i] *= fr_statemax; +#endif + } + + /* fill icmp reply type table */ + for (i = 0; i <= ICMP_MAXTYPE; i++) + icmpreplytype4[i] = -1; + icmpreplytype4[ICMP_ECHO] = ICMP_ECHOREPLY; + icmpreplytype4[ICMP_TSTAMP] = ICMP_TSTAMPREPLY; + icmpreplytype4[ICMP_IREQ] = ICMP_IREQREPLY; + icmpreplytype4[ICMP_MASKREQ] = ICMP_MASKREPLY; +#ifdef USE_INET6 + /* fill icmp reply type table */ + for (i = 0; i <= ICMP6_MAXTYPE; i++) + icmpreplytype6[i] = -1; + icmpreplytype6[ICMP6_ECHO_REQUEST] = ICMP6_ECHO_REPLY; + icmpreplytype6[ICMP6_MEMBERSHIP_QUERY] = ICMP6_MEMBERSHIP_REPORT; + icmpreplytype6[ICMP6_NI_QUERY] = ICMP6_NI_REPLY; + icmpreplytype6[ND_ROUTER_SOLICIT] = ND_ROUTER_ADVERT; + icmpreplytype6[ND_NEIGHBOR_SOLICIT] = ND_NEIGHBOR_ADVERT; +#endif + + KMALLOCS(ips_stats.iss_bucketlen, u_long *, + fr_statesize * sizeof(u_long)); + if (ips_stats.iss_bucketlen == NULL) + return -1; + bzero((char *)ips_stats.iss_bucketlen, fr_statesize * sizeof(u_long)); + + if (fr_state_maxbucket == 0) { + for (i = fr_statesize; i > 0; i >>= 1) + fr_state_maxbucket++; + fr_state_maxbucket *= 2; + } + + fr_sttab_init(ips_tqtqb); + ips_tqtqb[IPF_TCP_NSTATES - 1].ifq_next = &ips_udptq; + ips_udptq.ifq_ttl = (u_long)fr_udptimeout; + ips_udptq.ifq_ref = 1; + ips_udptq.ifq_head = NULL; + ips_udptq.ifq_tail = &ips_udptq.ifq_head; + MUTEX_INIT(&ips_udptq.ifq_lock, "ipftq udp tab"); + ips_udptq.ifq_next = &ips_udpacktq; + ips_udpacktq.ifq_ttl = (u_long)fr_udpacktimeout; + ips_udpacktq.ifq_ref = 1; + ips_udpacktq.ifq_head = NULL; + ips_udpacktq.ifq_tail = &ips_udpacktq.ifq_head; + MUTEX_INIT(&ips_udpacktq.ifq_lock, "ipftq udpack tab"); + ips_udpacktq.ifq_next = &ips_icmptq; + ips_icmptq.ifq_ttl = (u_long)fr_icmptimeout; + ips_icmptq.ifq_ref = 1; + ips_icmptq.ifq_head = NULL; + ips_icmptq.ifq_tail = &ips_icmptq.ifq_head; + MUTEX_INIT(&ips_icmptq.ifq_lock, "ipftq icmp tab"); + ips_icmptq.ifq_next = &ips_icmpacktq; + ips_icmpacktq.ifq_ttl = (u_long)fr_icmpacktimeout; + ips_icmpacktq.ifq_ref = 1; + ips_icmpacktq.ifq_head = NULL; + ips_icmpacktq.ifq_tail = &ips_icmpacktq.ifq_head; + MUTEX_INIT(&ips_icmpacktq.ifq_lock, "ipftq icmpack tab"); + ips_icmpacktq.ifq_next = &ips_iptq; + ips_iptq.ifq_ttl = (u_long)fr_iptimeout; + ips_iptq.ifq_ref = 1; + ips_iptq.ifq_head = NULL; + ips_iptq.ifq_tail = &ips_iptq.ifq_head; + MUTEX_INIT(&ips_iptq.ifq_lock, "ipftq ip tab"); + ips_iptq.ifq_next = NULL; + + RWLOCK_INIT(&ipf_state, "ipf IP state rwlock"); + MUTEX_INIT(&ipf_stinsert, "ipf state insert mutex"); + fr_state_init = 1; + + ips_last_force_flush = fr_ticks; + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_stateunload */ +/* Returns: Nil */ +/* Parameters: Nil */ +/* */ +/* Release and destroy any resources acquired or initialised so that */ +/* IPFilter can be unloaded or re-initialised. */ +/* ------------------------------------------------------------------------ */ +void fr_stateunload() +{ + ipftq_t *ifq, *ifqnext; + ipstate_t *is; + + WRITE_ENTER(&ipf_state); + while ((is = ips_list) != NULL) + fr_delstate(is, 0); + + /* + * Proxy timeout queues are not cleaned here because although they + * exist on the state list, appr_unload is called after fr_stateunload + * and the proxies actually are responsible for them being created. + * Should the proxy timeouts have their own list? There's no real + * justification as this is the only complicationA + */ + for (ifq = ips_utqe; ifq != NULL; ifq = ifqnext) { + ifqnext = ifq->ifq_next; + if (((ifq->ifq_flags & IFQF_PROXY) == 0) && + (fr_deletetimeoutqueue(ifq) == 0)) + fr_freetimeoutqueue(ifq); + } + + ips_stats.iss_inuse = 0; + ips_num = 0; + + if (fr_state_init == 1) { + fr_sttab_destroy(ips_tqtqb); + MUTEX_DESTROY(&ips_udptq.ifq_lock); + MUTEX_DESTROY(&ips_icmptq.ifq_lock); + MUTEX_DESTROY(&ips_udpacktq.ifq_lock); + MUTEX_DESTROY(&ips_icmpacktq.ifq_lock); + MUTEX_DESTROY(&ips_iptq.ifq_lock); + } + + if (ips_table != NULL) { + KFREES(ips_table, fr_statesize * sizeof(*ips_table)); + ips_table = NULL; + } + + if (ips_seed != NULL) { + KFREES(ips_seed, fr_statesize * sizeof(*ips_seed)); + ips_seed = NULL; + } + + if (ips_stats.iss_bucketlen != NULL) { + KFREES(ips_stats.iss_bucketlen, fr_statesize * sizeof(u_long)); + ips_stats.iss_bucketlen = NULL; + } + + RWLOCK_EXIT(&ipf_state); + + if (fr_state_maxbucket_reset == 1) + fr_state_maxbucket = 0; + + if (fr_state_init == 1) { + fr_state_init = 0; + RW_DESTROY(&ipf_state); + MUTEX_DESTROY(&ipf_stinsert); + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_statetstats */ +/* Returns: ips_state_t* - pointer to state stats structure */ +/* Parameters: Nil */ +/* */ +/* Put all the current numbers and pointers into a single struct and return */ +/* a pointer to it. */ +/* ------------------------------------------------------------------------ */ +static ips_stat_t *fr_statetstats() +{ + ips_stats.iss_active = ips_num; + ips_stats.iss_statesize = fr_statesize; + ips_stats.iss_statemax = fr_statemax; + ips_stats.iss_table = ips_table; + ips_stats.iss_list = ips_list; + ips_stats.iss_ticks = fr_ticks; + return &ips_stats; +} + +/* ------------------------------------------------------------------------ */ +/* Function: fr_state_remove */ +/* Returns: int - 0 == success, != 0 == failure */ +/* Parameters: data(I) - pointer to state structure to delete from table */ +/* */ +/* Search for a state structure that matches the one passed, according to */ +/* the IP addresses and other protocol specific information. */ +/* ------------------------------------------------------------------------ */ +static int fr_state_remove(data) +caddr_t data; +{ + ipstate_t *sp, st; + int error; + + sp = &st; + error = fr_inobj(data, &st, IPFOBJ_IPSTATE); + if (error) + return EFAULT; + + WRITE_ENTER(&ipf_state); + for (sp = ips_list; sp; sp = sp->is_next) + if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) && + !bcmp((caddr_t)&sp->is_src, (caddr_t)&st.is_src, + sizeof(st.is_src)) && + !bcmp((caddr_t)&sp->is_dst, (caddr_t)&st.is_src, + sizeof(st.is_dst)) && + !bcmp((caddr_t)&sp->is_ps, (caddr_t)&st.is_ps, + sizeof(st.is_ps))) { + fr_delstate(sp, ISL_REMOVE); + RWLOCK_EXIT(&ipf_state); + return 0; + } + RWLOCK_EXIT(&ipf_state); + return ESRCH; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_state_ioctl */ +/* Returns: int - 0 == success, != 0 == failure */ +/* Parameters: data(I) - pointer to ioctl data */ +/* cmd(I) - ioctl command integer */ +/* mode(I) - file mode bits used with open */ +/* */ +/* Processes an ioctl call made to operate on the IP Filter state device. */ +/* ------------------------------------------------------------------------ */ +int fr_state_ioctl(data, cmd, mode) +caddr_t data; +ioctlcmd_t cmd; +int mode; +{ + int arg, ret, error = 0; + + switch (cmd) + { + /* + * Delete an entry from the state table. + */ + case SIOCDELST : + error = fr_state_remove(data); + break; + /* + * Flush the state table + */ + case SIOCIPFFL : + BCOPYIN(data, (char *)&arg, sizeof(arg)); + if (arg == 0 || arg == 1) { + WRITE_ENTER(&ipf_state); + ret = fr_state_flush(arg, 4); + RWLOCK_EXIT(&ipf_state); + BCOPYOUT((char *)&ret, data, sizeof(ret)); + } else + error = EINVAL; + break; +#ifdef USE_INET6 + case SIOCIPFL6 : + BCOPYIN(data, (char *)&arg, sizeof(arg)); + if (arg == 0 || arg == 1) { + WRITE_ENTER(&ipf_state); + ret = fr_state_flush(arg, 6); + RWLOCK_EXIT(&ipf_state); + BCOPYOUT((char *)&ret, data, sizeof(ret)); + } else + error = EINVAL; + break; +#endif +#ifdef IPFILTER_LOG + /* + * Flush the state log. + */ + case SIOCIPFFB : + if (!(mode & FWRITE)) + error = EPERM; + else { + int tmp; + + tmp = ipflog_clear(IPL_LOGSTATE); + BCOPYOUT((char *)&tmp, data, sizeof(tmp)); + } + break; + /* + * Turn logging of state information on/off. + */ + case SIOCSETLG : + if (!(mode & FWRITE)) + error = EPERM; + else { + BCOPYIN((char *)data, (char *)&ipstate_logging, + sizeof(ipstate_logging)); + } + break; + /* + * Return the current state of logging. + */ + case SIOCGETLG : + BCOPYOUT((char *)&ipstate_logging, (char *)data, + sizeof(ipstate_logging)); + break; + /* + * Return the number of bytes currently waiting to be read. + */ + case FIONREAD : + arg = iplused[IPL_LOGSTATE]; /* returned in an int */ + BCOPYOUT((char *)&arg, data, sizeof(arg)); + break; +#endif + /* + * Get the current state statistics. + */ + case SIOCGETFS : + error = fr_outobj(data, fr_statetstats(), IPFOBJ_STATESTAT); + break; + /* + * Lock/Unlock the state table. (Locking prevents any changes, which + * means no packets match). + */ + case SIOCSTLCK : + if (!(mode & FWRITE)) { + error = EPERM; + } else { + fr_lock(data, &fr_state_lock); + } + break; + /* + * Add an entry to the current state table. + */ + case SIOCSTPUT : + if (!fr_state_lock || !(mode &FWRITE)) { + error = EACCES; + break; + } + error = fr_stputent(data); + break; + /* + * Get a state table entry. + */ + case SIOCSTGET : + if (!fr_state_lock) { + error = EACCES; + break; + } + error = fr_stgetent(data); + break; + default : + error = EINVAL; + break; + } + return error; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_stgetent */ +/* Returns: int - 0 == success, != 0 == failure */ +/* Parameters: data(I) - pointer to state structure to retrieve from table */ +/* */ +/* Copy out state information from the kernel to a user space process. If */ +/* there is a filter rule associated with the state entry, copy that out */ +/* as well. The entry to copy out is taken from the value of "ips_next" in */ +/* the struct passed in and if not null and not found in the list of current*/ +/* state entries, the retrieval fails. */ +/* ------------------------------------------------------------------------ */ +int fr_stgetent(data) +caddr_t data; +{ + ipstate_t *is, *isn; + ipstate_save_t ips; + int error; + + error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); + if (error) + return EFAULT; + + isn = ips.ips_next; + if (isn == NULL) { + isn = ips_list; + if (isn == NULL) { + if (ips.ips_next == NULL) + return ENOENT; + return 0; + } + } else { + /* + * Make sure the pointer we're copying from exists in the + * current list of entries. Security precaution to prevent + * copying of random kernel data. + */ + for (is = ips_list; is; is = is->is_next) + if (is == isn) + break; + if (!is) + return ESRCH; + } + ips.ips_next = isn->is_next; + bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is)); + ips.ips_rule = isn->is_rule; + if (isn->is_rule != NULL) + bcopy((char *)isn->is_rule, (char *)&ips.ips_fr, + sizeof(ips.ips_fr)); + error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); + if (error) + return EFAULT; + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_stputent */ +/* Returns: int - 0 == success, != 0 == failure */ +/* Parameters: data(I) - pointer to state information struct */ +/* */ +/* This function implements the SIOCSTPUT ioctl: insert a state entry into */ +/* the state table. If the state info. includes a pointer to a filter rule */ +/* then also add in an orphaned rule (will not show up in any "ipfstat -io" */ +/* output. */ +/* ------------------------------------------------------------------------ */ +int fr_stputent(data) +caddr_t data; +{ + ipstate_t *is, *isn; + ipstate_save_t ips; + int error, i; + frentry_t *fr; + char *name; + + error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); + if (error) + return EFAULT; + + KMALLOC(isn, ipstate_t *); + if (isn == NULL) + return ENOMEM; + + bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn)); + bzero((char *)isn, offsetof(struct ipstate, is_pkts)); + isn->is_sti.tqe_pnext = NULL; + isn->is_sti.tqe_next = NULL; + isn->is_sti.tqe_ifq = NULL; + isn->is_sti.tqe_parent = isn; + isn->is_ifp[0] = NULL; + isn->is_ifp[1] = NULL; + isn->is_ifp[2] = NULL; + isn->is_ifp[3] = NULL; + isn->is_sync = NULL; + fr = ips.ips_rule; + + if (fr == NULL) { + READ_ENTER(&ipf_state); + fr_stinsert(isn, 0); + MUTEX_EXIT(&isn->is_lock); + RWLOCK_EXIT(&ipf_state); + return 0; + } + + if (isn->is_flags & SI_NEWFR) { + KMALLOC(fr, frentry_t *); + if (fr == NULL) { + KFREE(isn); + return ENOMEM; + } + bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr)); + isn->is_rule = fr; + ips.ips_is.is_rule = fr; + MUTEX_NUKE(&fr->fr_lock); + MUTEX_INIT(&fr->fr_lock, "state filter rule lock"); + + /* + * Look up all the interface names in the rule. + */ + for (i = 0; i < 4; i++) { + name = fr->fr_ifnames[i]; + fr->fr_ifas[i] = fr_resolvenic(name, fr->fr_v); + name = isn->is_ifname[i]; + isn->is_ifp[i] = fr_resolvenic(name, isn->is_v); + } + + fr->fr_ref = 0; + fr->fr_dsize = 0; + fr->fr_data = NULL; + + fr_resolvedest(&fr->fr_tif, fr->fr_v); + fr_resolvedest(&fr->fr_dif, fr->fr_v); + + /* + * send a copy back to userland of what we ended up + * to allow for verification. + */ + error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); + if (error) { + KFREE(isn); + MUTEX_DESTROY(&fr->fr_lock); + KFREE(fr); + return EFAULT; + } + READ_ENTER(&ipf_state); + fr_stinsert(isn, 0); + MUTEX_EXIT(&isn->is_lock); + RWLOCK_EXIT(&ipf_state); + + } else { + READ_ENTER(&ipf_state); + for (is = ips_list; is; is = is->is_next) + if (is->is_rule == fr) { + fr_stinsert(isn, 0); + MUTEX_EXIT(&isn->is_lock); + break; + } + + if (is == NULL) { + KFREE(isn); + isn = NULL; + } + RWLOCK_EXIT(&ipf_state); + + return (isn == NULL) ? ESRCH : 0; + } + + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_stinsert */ +/* Returns: Nil */ +/* Parameters: is(I) - pointer to state structure */ +/* rev(I) - flag indicating forward/reverse direction of packet */ +/* */ +/* Inserts a state structure into the hash table (for lookups) and the list */ +/* of state entries (for enumeration). Resolves all of the interface names */ +/* to pointers and adjusts running stats for the hash table as appropriate. */ +/* */ +/* Locking: it is assumed that some kind of lock on ipf_state is held. */ +/* Exits with is_lock initialised and held. */ +/* ------------------------------------------------------------------------ */ +void fr_stinsert(is, rev) +ipstate_t *is; +int rev; +{ + frentry_t *fr; + u_int hv; + int i; + + MUTEX_INIT(&is->is_lock, "ipf state entry"); + + fr = is->is_rule; + if (fr != NULL) { + MUTEX_ENTER(&fr->fr_lock); + fr->fr_ref++; + fr->fr_statecnt++; + MUTEX_EXIT(&fr->fr_lock); + } + + /* + * Look up all the interface names in the state entry. + */ + for (i = 0; i < 4; i++) { + if (is->is_ifp[i] != NULL) + continue; + is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], is->is_v); + } + + /* + * If we could trust is_hv, then the modulous would not be needed, but + * when running with IPFILTER_SYNC, this stops bad values. + */ + hv = is->is_hv % fr_statesize; + is->is_hv = hv; + + /* + * We need to get both of these locks...the first because it is + * possible that once the insert is complete another packet might + * come along, match the entry and want to update it. + */ + MUTEX_ENTER(&is->is_lock); + MUTEX_ENTER(&ipf_stinsert); + + /* + * add into list table. + */ + if (ips_list != NULL) + ips_list->is_pnext = &is->is_next; + is->is_pnext = &ips_list; + is->is_next = ips_list; + ips_list = is; + + if (ips_table[hv] != NULL) + ips_table[hv]->is_phnext = &is->is_hnext; + else + ips_stats.iss_inuse++; + is->is_phnext = ips_table + hv; + is->is_hnext = ips_table[hv]; + ips_table[hv] = is; + ips_stats.iss_bucketlen[hv]++; + ips_num++; + MUTEX_EXIT(&ipf_stinsert); + + fr_setstatequeue(is, rev); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_addstate */ +/* Returns: ipstate_t* - NULL == failure, else pointer to new state */ +/* Parameters: fin(I) - pointer to packet information */ +/* stsave(O) - pointer to place to save pointer to created */ +/* state structure. */ +/* flags(I) - flags to use when creating the structure */ +/* */ +/* Creates a new IP state structure from the packet information collected. */ +/* Inserts it into the state table and appends to the bottom of the active */ +/* list. If the capacity of the table has reached the maximum allowed then */ +/* the call will fail and a flush is scheduled for the next timeout call. */ +/* ------------------------------------------------------------------------ */ +ipstate_t *fr_addstate(fin, stsave, flags) +fr_info_t *fin; +ipstate_t **stsave; +u_int flags; +{ + ipstate_t *is, ips; + struct icmp *ic; + u_int pass, hv; + frentry_t *fr; + tcphdr_t *tcp; + grehdr_t *gre; + void *ifp; + int out; + + if (fr_state_lock || + (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) + return NULL; + + if ((fin->fin_flx & FI_OOW) && !(fin->fin_tcpf & TH_SYN)) + return NULL; + + fr = fin->fin_fr; + if ((fr->fr_statemax == 0) && (ips_num == fr_statemax)) { + ATOMIC_INCL(ips_stats.iss_max); + fr_state_doflush = 1; + return NULL; + } + + /* + * If a "keep state" rule has reached the maximum number of references + * to it, then schedule an automatic flush in case we can clear out + * some "dead old wood". + */ + if ((fr != NULL) && (fr->fr_statemax != 0) && + (fr->fr_statecnt >= fr->fr_statemax)) { + MUTEX_EXIT(&fr->fr_lock); + ATOMIC_INCL(ips_stats.iss_maxref); + fr_state_doflush = 1; + return NULL; + } + + pass = (fr == NULL) ? 0 : fr->fr_flags; + + ic = NULL; + tcp = NULL; + out = fin->fin_out; + is = &ips; + bzero((char *)is, sizeof(*is)); + is->is_die = 1 + fr_ticks; + + /* + * Copy and calculate... + */ + hv = (is->is_p = fin->fin_fi.fi_p); + is->is_src = fin->fin_fi.fi_src; + hv += is->is_saddr; + is->is_dst = fin->fin_fi.fi_dst; + hv += is->is_daddr; +#ifdef USE_INET6 + if (fin->fin_v == 6) { + /* + * For ICMPv6, we check to see if the destination address is + * a multicast address. If it is, do not include it in the + * calculation of the hash because the correct reply will come + * back from a real address, not a multicast address. + */ + if ((is->is_p == IPPROTO_ICMPV6) && + IN6_IS_ADDR_MULTICAST(&is->is_dst.in6)) { + /* + * So you can do keep state with neighbour discovery. + * + * Here we could use the address from the neighbour + * solicit message to put in the state structure and + * we could use that without a wildcard flag too... + */ + is->is_flags |= SI_W_DADDR; + hv -= is->is_daddr; + } else { + hv += is->is_dst.i6[1]; + hv += is->is_dst.i6[2]; + hv += is->is_dst.i6[3]; + } + hv += is->is_src.i6[1]; + hv += is->is_src.i6[2]; + hv += is->is_src.i6[3]; + } +#endif + + switch (is->is_p) + { +#ifdef USE_INET6 + case IPPROTO_ICMPV6 : + ic = fin->fin_dp; + + switch (ic->icmp_type) + { + case ICMP6_ECHO_REQUEST : + is->is_icmp.ici_type = ic->icmp_type; + hv += (is->is_icmp.ici_id = ic->icmp_id); + break; + case ICMP6_MEMBERSHIP_QUERY : + case ND_ROUTER_SOLICIT : + case ND_NEIGHBOR_SOLICIT : + case ICMP6_NI_QUERY : + is->is_icmp.ici_type = ic->icmp_type; + break; + default : + return NULL; + } + ATOMIC_INCL(ips_stats.iss_icmp); + break; +#endif + case IPPROTO_ICMP : + ic = fin->fin_dp; + + switch (ic->icmp_type) + { + case ICMP_ECHO : + case ICMP_TSTAMP : + case ICMP_IREQ : + case ICMP_MASKREQ : + is->is_icmp.ici_type = ic->icmp_type; + hv += (is->is_icmp.ici_id = ic->icmp_id); + break; + default : + return NULL; + } + ATOMIC_INCL(ips_stats.iss_icmp); + break; + + case IPPROTO_GRE : + gre = fin->fin_dp; + + is->is_gre.gs_flags = gre->gr_flags; + is->is_gre.gs_ptype = gre->gr_ptype; + if (GRE_REV(is->is_gre.gs_flags) == 1) { + is->is_call[0] = fin->fin_data[0]; + is->is_call[1] = fin->fin_data[1]; + } + break; + + case IPPROTO_TCP : + tcp = fin->fin_dp; + + if (tcp->th_flags & TH_RST) + return NULL; + /* + * The endian of the ports doesn't matter, but the ack and + * sequence numbers do as we do mathematics on them later. + */ + is->is_sport = htons(fin->fin_data[0]); + is->is_dport = htons(fin->fin_data[1]); + if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { + hv += is->is_sport; + hv += is->is_dport; + } + + /* + * If this is a real packet then initialise fields in the + * state information structure from the TCP header information. + */ + + is->is_maxdwin = 1; + is->is_maxswin = ntohs(tcp->th_win); + if (is->is_maxswin == 0) + is->is_maxswin = 1; + + if ((fin->fin_flx & FI_IGNORE) == 0) { + is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen - + (TCP_OFF(tcp) << 2) + + ((tcp->th_flags & TH_SYN) ? 1 : 0) + + ((tcp->th_flags & TH_FIN) ? 1 : 0); + is->is_maxsend = is->is_send; + + /* + * Window scale option is only present in + * SYN/SYN-ACK packet. + */ + if ((tcp->th_flags & ~(TH_FIN|TH_ACK|TH_ECNALL)) == + TH_SYN && + (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) { + if (fr_tcpoptions(fin, tcp, + &is->is_tcp.ts_data[0])) + is->is_swinflags = TCP_WSCALE_SEEN| + TCP_WSCALE_FIRST; + } + + if ((fin->fin_out != 0) && (pass & FR_NEWISN) != 0) { + fr_checknewisn(fin, is); + fr_fixoutisn(fin, is); + } + + if ((tcp->th_flags & TH_OPENING) == TH_SYN) + flags |= IS_TCPFSM; + else { + is->is_maxdwin = is->is_maxswin * 2; + is->is_dend = ntohl(tcp->th_ack); + is->is_maxdend = ntohl(tcp->th_ack); + is->is_maxdwin *= 2; + } + } + + /* + * If we're creating state for a starting connection, start the + * timer on it as we'll never see an error if it fails to + * connect. + */ + ATOMIC_INCL(ips_stats.iss_tcp); + break; + + case IPPROTO_UDP : + tcp = fin->fin_dp; + + is->is_sport = htons(fin->fin_data[0]); + is->is_dport = htons(fin->fin_data[1]); + if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { + hv += tcp->th_dport; + hv += tcp->th_sport; + } + ATOMIC_INCL(ips_stats.iss_udp); + break; + + default : + break; + } + hv = DOUBLE_HASH(hv); + is->is_hv = hv; + is->is_rule = fr; + is->is_flags = flags & IS_INHERITED; + + /* + * Look for identical state. + */ + for (is = ips_table[is->is_hv % fr_statesize]; is != NULL; + is = is->is_hnext) { + if (bcmp(&ips.is_src, &is->is_src, + offsetof(struct ipstate, is_ps) - + offsetof(struct ipstate, is_src)) == 0) + break; + } + if (is != NULL) + return NULL; + + if (ips_stats.iss_bucketlen[hv] >= fr_state_maxbucket) { + ATOMIC_INCL(ips_stats.iss_bucketfull); + return NULL; + } + KMALLOC(is, ipstate_t *); + if (is == NULL) { + ATOMIC_INCL(ips_stats.iss_nomem); + return NULL; + } + bcopy((char *)&ips, (char *)is, sizeof(*is)); + /* + * Do not do the modulous here, it is done in fr_stinsert(). + */ + if (fr != NULL) { + (void) strncpy(is->is_group, fr->fr_group, FR_GROUPLEN); + if (fr->fr_age[0] != 0) { + is->is_tqehead[0] = fr_addtimeoutqueue(&ips_utqe, + fr->fr_age[0]); + is->is_sti.tqe_flags |= TQE_RULEBASED; + } + if (fr->fr_age[1] != 0) { + is->is_tqehead[1] = fr_addtimeoutqueue(&ips_utqe, + fr->fr_age[1]); + is->is_sti.tqe_flags |= TQE_RULEBASED; + } + + is->is_tag = fr->fr_logtag; + + is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1]; + is->is_ifp[(1 - out) << 1] = fr->fr_ifas[2]; + is->is_ifp[((1 - out) << 1) + 1] = fr->fr_ifas[3]; + + if (((ifp = fr->fr_ifas[1]) != NULL) && + (ifp != (void *)-1)) { + COPYIFNAME(ifp, is->is_ifname[(out << 1) + 1]); + } + if (((ifp = fr->fr_ifas[2]) != NULL) && + (ifp != (void *)-1)) { + COPYIFNAME(ifp, is->is_ifname[(1 - out) << 1]); + } + if (((ifp = fr->fr_ifas[3]) != NULL) && + (ifp != (void *)-1)) { + COPYIFNAME(ifp, is->is_ifname[((1 - out) << 1) + 1]); + } + } else { + pass = fr_flags; + is->is_tag = FR_NOLOGTAG; + } + + is->is_ifp[out << 1] = fin->fin_ifp; + if (fin->fin_ifp != NULL) { + COPYIFNAME(fin->fin_ifp, is->is_ifname[out << 1]); + } + + /* + * It may seem strange to set is_ref to 2, but fr_check() will call + * fr_statederef() after calling fr_addstate() and the idea is to + * have it exist at the end of fr_check() with is_ref == 1. + */ + is->is_ref = 2; + is->is_pass = pass; + is->is_pkts[0] = 0, is->is_bytes[0] = 0; + is->is_pkts[1] = 0, is->is_bytes[1] = 0; + is->is_pkts[2] = 0, is->is_bytes[2] = 0; + is->is_pkts[3] = 0, is->is_bytes[3] = 0; + if ((fin->fin_flx & FI_IGNORE) == 0) { + is->is_pkts[out] = 1; + is->is_bytes[out] = fin->fin_plen; + is->is_flx[out][0] = fin->fin_flx & FI_CMP; + is->is_flx[out][0] &= ~FI_OOW; + } + + if (pass & FR_STSTRICT) + is->is_flags |= IS_STRICT; + + if (pass & FR_STATESYNC) + is->is_flags |= IS_STATESYNC; + + /* + * We want to check everything that is a property of this packet, + * but we don't (automatically) care about it's fragment status as + * this may change. + */ + is->is_v = fin->fin_v; + is->is_opt[0] = fin->fin_optmsk; + is->is_optmsk[0] = 0xffffffff; + is->is_optmsk[1] = 0xffffffff; + if (is->is_v == 6) { + is->is_opt[0] &= ~0x8; + is->is_optmsk[0] &= ~0x8; + is->is_optmsk[1] &= ~0x8; + } + is->is_sec = fin->fin_secmsk; + is->is_secmsk = 0xffff; + is->is_auth = fin->fin_auth; + is->is_authmsk = 0xffff; + if (flags & (SI_WILDP|SI_WILDA)) { + ATOMIC_INCL(ips_stats.iss_wild); + } + is->is_rulen = fin->fin_rule; + + + if (pass & FR_LOGFIRST) + is->is_pass &= ~(FR_LOGFIRST|FR_LOG); + + READ_ENTER(&ipf_state); + is->is_me = stsave; + + fr_stinsert(is, fin->fin_rev); + + if (fin->fin_p == IPPROTO_TCP) { + /* + * If we're creating state for a starting connection, start the + * timer on it as we'll never see an error if it fails to + * connect. + */ + (void) fr_tcp_age(&is->is_sti, fin, ips_tqtqb, is->is_flags); + MUTEX_EXIT(&is->is_lock); +#ifdef IPFILTER_SCAN + if ((is->is_flags & SI_CLONE) == 0) + (void) ipsc_attachis(is); +#endif + } else { + MUTEX_EXIT(&is->is_lock); + } +#ifdef IPFILTER_SYNC + if ((is->is_flags & IS_STATESYNC) && ((is->is_flags & SI_CLONE) == 0)) + is->is_sync = ipfsync_new(SMC_STATE, fin, is); +#endif + if (ipstate_logging) + ipstate_log(is, ISL_NEW); + + RWLOCK_EXIT(&ipf_state); + fin->fin_state = is; + fin->fin_rev = IP6_NEQ(&is->is_dst, &fin->fin_daddr); + fin->fin_flx |= FI_STATE; + if (fin->fin_flx & FI_FRAG) + (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); + + return is; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_tcpoptions */ +/* Returns: int - 1 == packet matches state entry, 0 == it does not */ +/* Parameters: fin(I) - pointer to packet information */ +/* tcp(I) - pointer to TCP packet header */ +/* td(I) - pointer to TCP data held as part of the state */ +/* */ +/* Look after the TCP header for any options and deal with those that are */ +/* present. Record details about those that we recogise. */ +/* ------------------------------------------------------------------------ */ +static int fr_tcpoptions(fin, tcp, td) +fr_info_t *fin; +tcphdr_t *tcp; +tcpdata_t *td; +{ + int off, mlen, ol, i, len, retval; + char buf[64], *s, opt; + mb_t *m = NULL; + + len = (TCP_OFF(tcp) << 2); + if (fin->fin_dlen < len) + return 0; + len -= sizeof(*tcp); + + off = fin->fin_plen - fin->fin_dlen + sizeof(*tcp) + fin->fin_ipoff; + + m = fin->fin_m; + mlen = MSGDSIZE(m) - off; + if (len > mlen) { + len = mlen; + retval = 0; + } else { + retval = 1; + } + + COPYDATA(m, off, len, buf); + + for (s = buf; len > 0; ) { + opt = *s; + if (opt == TCPOPT_EOL) + break; + else if (opt == TCPOPT_NOP) + ol = 1; + else { + if (len < 2) + break; + ol = (int)*(s + 1); + if (ol < 2 || ol > len) + break; + + /* + * Extract the TCP options we are interested in out of + * the header and store them in the the tcpdata struct. + */ + switch (opt) + { + case TCPOPT_WINDOW : + if (ol == TCPOLEN_WINDOW) { + i = (int)*(s + 2); + if (i > TCP_WSCALE_MAX) + i = TCP_WSCALE_MAX; + else if (i < 0) + i = 0; + td->td_winscale = i; + } + break; + case TCPOPT_MAXSEG : + /* + * So, if we wanted to set the TCP MAXSEG, + * it should be done here... + */ + if (ol == TCPOLEN_MAXSEG) { + i = (int)*(s + 2); + i <<= 8; + i += (int)*(s + 3); + td->td_maxseg = i; + } + break; + } + } + len -= ol; + s += ol; + } + return retval; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_tcpstate */ +/* Returns: int - 1 == packet matches state entry, 0 == it does not */ +/* Parameters: fin(I) - pointer to packet information */ +/* tcp(I) - pointer to TCP packet header */ +/* is(I) - pointer to master state structure */ +/* */ +/* Check to see if a packet with TCP headers fits within the TCP window. */ +/* Change timeout depending on whether new packet is a SYN-ACK returning */ +/* for a SYN or a RST or FIN which indicate time to close up shop. */ +/* ------------------------------------------------------------------------ */ +static int fr_tcpstate(fin, tcp, is) +fr_info_t *fin; +tcphdr_t *tcp; +ipstate_t *is; +{ + int source, ret = 0, flags; + tcpdata_t *fdata, *tdata; + + source = !fin->fin_rev; + if (((is->is_flags & IS_TCPFSM) != 0) && (source == 1) && + (ntohs(is->is_sport) != fin->fin_data[0])) + source = 0; + fdata = &is->is_tcp.ts_data[!source]; + tdata = &is->is_tcp.ts_data[source]; + + MUTEX_ENTER(&is->is_lock); + if (fr_tcpinwindow(fin, fdata, tdata, tcp, is->is_flags)) { +#ifdef IPFILTER_SCAN + if (is->is_flags & (IS_SC_CLIENT|IS_SC_SERVER)) { + ipsc_packet(fin, is); + if (FR_ISBLOCK(is->is_pass)) { + MUTEX_EXIT(&is->is_lock); + return 1; + } + } +#endif + + /* + * Nearing end of connection, start timeout. + */ + ret = fr_tcp_age(&is->is_sti, fin, ips_tqtqb, is->is_flags); + if (ret == 0) { + MUTEX_EXIT(&is->is_lock); + return 0; + } + + /* + * set s0's as appropriate. Use syn-ack packet as it + * contains both pieces of required information. + */ + /* + * Window scale option is only present in SYN/SYN-ACK packet. + * Compare with ~TH_FIN to mask out T/TCP setups. + */ + flags = tcp->th_flags & ~(TH_FIN|TH_ECNALL); + if (flags == (TH_SYN|TH_ACK)) { + is->is_s0[source] = ntohl(tcp->th_ack); + is->is_s0[!source] = ntohl(tcp->th_seq) + 1; + if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2)) && + tdata->td_winscale) { + if (fr_tcpoptions(fin, tcp, fdata)) { + fdata->td_winflags = TCP_WSCALE_SEEN| + TCP_WSCALE_FIRST; + } else { + if (!fdata->td_winscale) + tdata->td_winscale = 0; + } + } + if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) + fr_checknewisn(fin, is); + } else if (flags == TH_SYN) { + is->is_s0[source] = ntohl(tcp->th_seq) + 1; + if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) + if (fr_tcpoptions(fin, tcp, tdata)) { + tdata->td_winflags = TCP_WSCALE_SEEN| + TCP_WSCALE_FIRST; + } + + if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) + fr_checknewisn(fin, is); + + } + ret = 1; + } else + fin->fin_flx |= FI_OOW; + MUTEX_EXIT(&is->is_lock); + return ret; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_checknewisn */ +/* Returns: Nil */ +/* Parameters: fin(I) - pointer to packet information */ +/* is(I) - pointer to master state structure */ +/* */ +/* Check to see if this TCP connection is expecting and needs a new */ +/* sequence number for a particular direction of the connection. */ +/* */ +/* NOTE: This does not actually change the sequence numbers, only gets new */ +/* one ready. */ +/* ------------------------------------------------------------------------ */ +static void fr_checknewisn(fin, is) +fr_info_t *fin; +ipstate_t *is; +{ + u_32_t sumd, old, new; + tcphdr_t *tcp; + int i; + + i = fin->fin_rev; + tcp = fin->fin_dp; + + if (((i == 0) && !(is->is_flags & IS_ISNSYN)) || + ((i == 1) && !(is->is_flags & IS_ISNACK))) { + old = ntohl(tcp->th_seq); + new = fr_newisn(fin); + is->is_isninc[i] = new - old; + CALC_SUMD(old, new, sumd); + is->is_sumd[i] = (sumd & 0xffff) + (sumd >> 16); + + is->is_flags |= ((i == 0) ? IS_ISNSYN : IS_ISNACK); + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_tcpinwindow */ +/* Returns: int - 1 == packet inside TCP "window", 0 == not inside. */ +/* Parameters: fin(I) - pointer to packet information */ +/* fdata(I) - pointer to tcp state informatio (forward) */ +/* tdata(I) - pointer to tcp state informatio (reverse) */ +/* tcp(I) - pointer to TCP packet header */ +/* */ +/* Given a packet has matched addresses and ports, check to see if it is */ +/* within the TCP data window. In a show of generosity, allow packets that */ +/* are within the window space behind the current sequence # as well. */ +/* ------------------------------------------------------------------------ */ +int fr_tcpinwindow(fin, fdata, tdata, tcp, flags) +fr_info_t *fin; +tcpdata_t *fdata, *tdata; +tcphdr_t *tcp; +int flags; +{ + tcp_seq seq, ack, end; + int ackskew, tcpflags; + u_32_t win, maxwin; + + /* + * Find difference between last checked packet and this packet. + */ + tcpflags = tcp->th_flags; + seq = ntohl(tcp->th_seq); + ack = ntohl(tcp->th_ack); + if (tcpflags & TH_SYN) + win = ntohs(tcp->th_win); + else + win = ntohs(tcp->th_win) << fdata->td_winscale; + if (win == 0) + win = 1; + + /* + * if window scaling is present, the scaling is only allowed + * for windows not in the first SYN packet. In that packet the + * window is 65535 to specify the largest window possible + * for receivers not implementing the window scale option. + * Currently, we do not assume TTCP here. That means that + * if we see a second packet from a host (after the initial + * SYN), we can assume that the receiver of the SYN did + * already send back the SYN/ACK (and thus that we know if + * the receiver also does window scaling) + */ + if (!(tcpflags & TH_SYN) && (fdata->td_winflags & TCP_WSCALE_FIRST)) { + if (tdata->td_winflags & TCP_WSCALE_SEEN) { + fdata->td_winflags &= ~TCP_WSCALE_FIRST; + fdata->td_maxwin = win; + } else { + fdata->td_winscale = 0; + fdata->td_winflags = 0; + tdata->td_winscale = 0; + tdata->td_winflags = 0; + } + } + + end = seq + fin->fin_dlen - (TCP_OFF(tcp) << 2) + + ((tcpflags & TH_SYN) ? 1 : 0) + ((tcpflags & TH_FIN) ? 1 : 0); + + if ((fdata->td_end == 0) && + (!(flags & IS_TCPFSM) || + ((tcpflags & TH_OPENING) == TH_OPENING))) { + /* + * Must be a (outgoing) SYN-ACK in reply to a SYN. + */ + fdata->td_end = end; + fdata->td_maxwin = 1; + fdata->td_maxend = end + win; + } + + if (!(tcpflags & TH_ACK)) { /* Pretend an ack was sent */ + ack = tdata->td_end; + } else if (((tcpflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) && + (ack == 0)) { + /* gross hack to get around certain broken tcp stacks */ + ack = tdata->td_end; + } + + if (seq == end) + seq = end = fdata->td_end; + + maxwin = tdata->td_maxwin; + ackskew = tdata->td_end - ack; + + /* + * Strict sequencing only allows in-order delivery. + */ + if ((flags & IS_STRICT) != 0) { + if (seq != fdata->td_end) { + return 0; + } + } + +#define SEQ_GE(a,b) ((int)((a) - (b)) >= 0) +#define SEQ_GT(a,b) ((int)((a) - (b)) > 0) + if ( +#if defined(_KERNEL) + (SEQ_GE(fdata->td_maxend, end)) && + (SEQ_GE(seq, fdata->td_end - maxwin)) && +#endif +/* XXX what about big packets */ +#define MAXACKWINDOW 66000 + (-ackskew <= (MAXACKWINDOW << fdata->td_winscale)) && + ( ackskew <= (MAXACKWINDOW << fdata->td_winscale))) { + + /* if ackskew < 0 then this should be due to fragmented + * packets. There is no way to know the length of the + * total packet in advance. + * We do know the total length from the fragment cache though. + * Note however that there might be more sessions with + * exactly the same source and destination parameters in the + * state cache (and source and destination is the only stuff + * that is saved in the fragment cache). Note further that + * some TCP connections in the state cache are hashed with + * sport and dport as well which makes it not worthwhile to + * look for them. + * Thus, when ackskew is negative but still seems to belong + * to this session, we bump up the destinations end value. + */ + if (ackskew < 0) + tdata->td_end = ack; + + /* update max window seen */ + if (fdata->td_maxwin < win) + fdata->td_maxwin = win; + if (SEQ_GT(end, fdata->td_end)) + fdata->td_end = end; + if (SEQ_GE(ack + win, tdata->td_maxend)) + tdata->td_maxend = ack + win; + return 1; + } + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_stclone */ +/* Returns: ipstate_t* - NULL == cloning failed, */ +/* else pointer to new state structure */ +/* Parameters: fin(I) - pointer to packet information */ +/* tcp(I) - pointer to TCP/UDP header */ +/* is(I) - pointer to master state structure */ +/* */ +/* Create a "duplcate" state table entry from the master. */ +/* ------------------------------------------------------------------------ */ +static ipstate_t *fr_stclone(fin, tcp, is) +fr_info_t *fin; +tcphdr_t *tcp; +ipstate_t *is; +{ + ipstate_t *clone; + u_32_t send; + + if (ips_num == fr_statemax) { + ATOMIC_INCL(ips_stats.iss_max); + fr_state_doflush = 1; + return NULL; + } + KMALLOC(clone, ipstate_t *); + if (clone == NULL) + return NULL; + bcopy((char *)is, (char *)clone, sizeof(*clone)); + + MUTEX_NUKE(&clone->is_lock); + + clone->is_die = ONE_DAY + fr_ticks; + clone->is_state[0] = 0; + clone->is_state[1] = 0; + send = ntohl(tcp->th_seq) + fin->fin_dlen - (TCP_OFF(tcp) << 2) + + ((tcp->th_flags & TH_SYN) ? 1 : 0) + + ((tcp->th_flags & TH_FIN) ? 1 : 0); + + if (fin->fin_rev == 1) { + clone->is_dend = send; + clone->is_maxdend = send; + clone->is_send = 0; + clone->is_maxswin = 1; + clone->is_maxdwin = ntohs(tcp->th_win); + if (clone->is_maxdwin == 0) + clone->is_maxdwin = 1; + } else { + clone->is_send = send; + clone->is_maxsend = send; + clone->is_dend = 0; + clone->is_maxdwin = 1; + clone->is_maxswin = ntohs(tcp->th_win); + if (clone->is_maxswin == 0) + clone->is_maxswin = 1; + } + + clone->is_flags &= ~SI_CLONE; + clone->is_flags |= SI_CLONED; + fr_stinsert(clone, fin->fin_rev); + clone->is_ref = 2; + if (clone->is_p == IPPROTO_TCP) { + (void) fr_tcp_age(&clone->is_sti, fin, ips_tqtqb, + clone->is_flags); + } + MUTEX_EXIT(&clone->is_lock); +#ifdef IPFILTER_SCAN + (void) ipsc_attachis(is); +#endif +#ifdef IPFILTER_SYNC + if (is->is_flags & IS_STATESYNC) + clone->is_sync = ipfsync_new(SMC_STATE, fin, clone); +#endif + return clone; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_matchsrcdst */ +/* Returns: Nil */ +/* Parameters: fin(I) - pointer to packet information */ +/* is(I) - pointer to state structure */ +/* src(I) - pointer to source address */ +/* dst(I) - pointer to destination address */ +/* tcp(I) - pointer to TCP/UDP header */ +/* */ +/* Match a state table entry against an IP packet. The logic below is that */ +/* ret gets set to one if the match succeeds, else remains 0. If it is */ +/* still 0 after the test. no match. */ +/* ------------------------------------------------------------------------ */ +static ipstate_t *fr_matchsrcdst(fin, is, src, dst, tcp, cmask) +fr_info_t *fin; +ipstate_t *is; +i6addr_t *src, *dst; +tcphdr_t *tcp; +u_32_t cmask; +{ + int ret = 0, rev, out, flags, flx = 0, idx; + u_short sp, dp; + u_32_t cflx; + void *ifp; + + rev = IP6_NEQ(&is->is_dst, dst); + ifp = fin->fin_ifp; + out = fin->fin_out; + flags = is->is_flags; + sp = 0; + dp = 0; + + if (tcp != NULL) { + sp = htons(fin->fin_sport); + dp = ntohs(fin->fin_dport); + } + if (!rev) { + if (tcp != NULL) { + if (!(flags & SI_W_SPORT) && (sp != is->is_sport)) + rev = 1; + else if (!(flags & SI_W_DPORT) && (dp != is->is_dport)) + rev = 1; + } + } + + idx = (out << 1) + rev; + + /* + * If the interface for this 'direction' is set, make sure it matches. + * An interface name that is not set matches any, as does a name of *. + */ + if ((is->is_ifp[idx] == NULL && + (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) || + is->is_ifp[idx] == ifp) + ret = 1; + + if (ret == 0) + return NULL; + ret = 0; + + /* + * Match addresses and ports. + */ + if (rev == 0) { + if ((IP6_EQ(&is->is_dst, dst) || (flags & SI_W_DADDR)) && + (IP6_EQ(&is->is_src, src) || (flags & SI_W_SADDR))) { + if (tcp) { + if ((sp == is->is_sport || flags & SI_W_SPORT)&& + (dp == is->is_dport || flags & SI_W_DPORT)) + ret = 1; + } else { + ret = 1; + } + } + } else { + if ((IP6_EQ(&is->is_dst, src) || (flags & SI_W_DADDR)) && + (IP6_EQ(&is->is_src, dst) || (flags & SI_W_SADDR))) { + if (tcp) { + if ((dp == is->is_sport || flags & SI_W_SPORT)&& + (sp == is->is_dport || flags & SI_W_DPORT)) + ret = 1; + } else { + ret = 1; + } + } + } + + if (ret == 0) + return NULL; + + /* + * Whether or not this should be here, is questionable, but the aim + * is to get this out of the main line. + */ + if (tcp == NULL) + flags = is->is_flags & ~(SI_WILDP|SI_NEWFR|SI_CLONE|SI_CLONED); + + /* + * Only one of the source or destination address can be flaged as a + * wildcard. Fill in the missing address, if set. + * For IPv6, if the address being copied in is multicast, then + * don't reset the wild flag - multicast causes it to be set in the + * first place! + */ + if ((flags & (SI_W_SADDR|SI_W_DADDR))) { + fr_ip_t *fi = &fin->fin_fi; + + if ((flags & SI_W_SADDR) != 0) { + if (rev == 0) { +#ifdef USE_INET6 + if (is->is_v == 6 && + IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) + /*EMPTY*/; + else +#endif + { + is->is_src = fi->fi_src; + is->is_flags &= ~SI_W_SADDR; + } + } else { +#ifdef USE_INET6 + if (is->is_v == 6 && + IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) + /*EMPTY*/; + else +#endif + { + is->is_src = fi->fi_dst; + is->is_flags &= ~SI_W_SADDR; + } + } + } else if ((flags & SI_W_DADDR) != 0) { + if (rev == 0) { +#ifdef USE_INET6 + if (is->is_v == 6 && + IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) + /*EMPTY*/; + else +#endif + { + is->is_dst = fi->fi_dst; + is->is_flags &= ~SI_W_DADDR; + } + } else { +#ifdef USE_INET6 + if (is->is_v == 6 && + IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) + /*EMPTY*/; + else +#endif + { + is->is_dst = fi->fi_src; + is->is_flags &= ~SI_W_DADDR; + } + } + } + if ((is->is_flags & (SI_WILDA|SI_WILDP)) == 0) { + ATOMIC_DECL(ips_stats.iss_wild); + } + } + + flx = fin->fin_flx & cmask; + cflx = is->is_flx[out][rev]; + + /* + * Match up any flags set from IP options. + */ + if ((cflx && (flx != (cflx & cmask))) || + ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]) || + ((fin->fin_secmsk & is->is_secmsk) != is->is_sec) || + ((fin->fin_auth & is->is_authmsk) != is->is_auth)) + return NULL; + + /* + * Only one of the source or destination port can be flagged as a + * wildcard. When filling it in, fill in a copy of the matched entry + * if it has the cloning flag set. + */ + if ((fin->fin_flx & FI_IGNORE) != 0) { + fin->fin_rev = rev; + return is; + } + + if ((flags & (SI_W_SPORT|SI_W_DPORT))) { + if ((flags & SI_CLONE) != 0) { + ipstate_t *clone; + + clone = fr_stclone(fin, tcp, is); + if (clone == NULL) + return NULL; + is = clone; + } else { + ATOMIC_DECL(ips_stats.iss_wild); + } + + if ((flags & SI_W_SPORT) != 0) { + if (rev == 0) { + is->is_sport = sp; + is->is_send = ntohl(tcp->th_seq); + } else { + is->is_sport = dp; + is->is_send = ntohl(tcp->th_ack); + } + is->is_maxsend = is->is_send + 1; + } else if ((flags & SI_W_DPORT) != 0) { + if (rev == 0) { + is->is_dport = dp; + is->is_dend = ntohl(tcp->th_ack); + } else { + is->is_dport = sp; + is->is_dend = ntohl(tcp->th_seq); + } + is->is_maxdend = is->is_dend + 1; + } + is->is_flags &= ~(SI_W_SPORT|SI_W_DPORT); + if ((flags & SI_CLONED) && ipstate_logging) + ipstate_log(is, ISL_CLONE); + } + + ret = -1; + + if (is->is_flx[out][rev] == 0) { + is->is_flx[out][rev] = flx; + is->is_opt[rev] = fin->fin_optmsk; + if (is->is_v == 6) { + is->is_opt[rev] &= ~0x8; + is->is_optmsk[rev] &= ~0x8; + } + } + + /* + * Check if the interface name for this "direction" is set and if not, + * fill it in. + */ + if (is->is_ifp[idx] == NULL && + (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) { + is->is_ifp[idx] = ifp; + COPYIFNAME(ifp, is->is_ifname[idx]); + } + fin->fin_rev = rev; + return is; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_checkicmpmatchingstate */ +/* Returns: Nil */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* If we've got an ICMP error message, using the information stored in the */ +/* ICMP packet, look for a matching state table entry. */ +/* */ +/* If we return NULL then no lock on ipf_state is held. */ +/* If we return non-null then a read-lock on ipf_state is held. */ +/* ------------------------------------------------------------------------ */ +static ipstate_t *fr_checkicmpmatchingstate(fin) +fr_info_t *fin; +{ + ipstate_t *is, **isp; + u_short sport, dport; + u_char pr; + int backward, i, oi; + i6addr_t dst, src; + struct icmp *ic; + u_short savelen; + icmphdr_t *icmp; + fr_info_t ofin; + tcphdr_t *tcp; + int len; + ip_t *oip; + u_int hv; + + /* + * Does it at least have the return (basic) IP header ? + * Is it an actual recognised ICMP error type? + * Only a basic IP header (no options) should be with + * an ICMP error header. + */ + if ((fin->fin_v != 4) || (fin->fin_hlen != sizeof(ip_t)) || + (fin->fin_plen < ICMPERR_MINPKTLEN) || + !(fin->fin_flx & FI_ICMPERR)) + return NULL; + ic = fin->fin_dp; + + oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN); + /* + * Check if the at least the old IP header (with options) and + * 8 bytes of payload is present. + */ + if (fin->fin_plen < ICMPERR_MAXPKTLEN + ((IP_HL(oip) - 5) << 2)) + return NULL; + + /* + * Sanity Checks. + */ + len = fin->fin_dlen - ICMPERR_ICMPHLEN; + if ((len <= 0) || ((IP_HL(oip) << 2) > len)) + return NULL; + + /* + * Is the buffer big enough for all of it ? It's the size of the IP + * header claimed in the encapsulated part which is of concern. It + * may be too big to be in this buffer but not so big that it's + * outside the ICMP packet, leading to TCP deref's causing problems. + * This is possible because we don't know how big oip_hl is when we + * do the pullup early in fr_check() and thus can't guarantee it is + * all here now. + */ +#ifdef _KERNEL + { + mb_t *m; + + m = fin->fin_m; +# if defined(MENTAT) + if ((char *)oip + len > (char *)m->b_wptr) + return NULL; +# else + if ((char *)oip + len > (char *)fin->fin_ip + m->m_len) + return NULL; +# endif + } +#endif + bcopy((char *)fin, (char *)&ofin, sizeof(fin)); + + /* + * in the IPv4 case we must zero the i6addr union otherwise + * the IP6_EQ and IP6_NEQ macros produce the wrong results because + * of the 'junk' in the unused part of the union + */ + bzero((char *)&src, sizeof(src)); + bzero((char *)&dst, sizeof(dst)); + + /* + * we make an fin entry to be able to feed it to + * matchsrcdst note that not all fields are encessary + * but this is the cleanest way. Note further we fill + * in fin_mp such that if someone uses it we'll get + * a kernel panic. fr_matchsrcdst does not use this. + * + * watch out here, as ip is in host order and oip in network + * order. Any change we make must be undone afterwards, like + * oip->ip_off - it is still in network byte order so fix it. + */ + savelen = oip->ip_len; + oip->ip_len = len; + oip->ip_off = ntohs(oip->ip_off); + + ofin.fin_flx = FI_NOCKSUM; + ofin.fin_v = 4; + ofin.fin_ip = oip; + ofin.fin_m = NULL; /* if dereferenced, panic XXX */ + ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ + ofin.fin_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; + (void) fr_makefrip(IP_HL(oip) << 2, oip, &ofin); + ofin.fin_ifp = fin->fin_ifp; + ofin.fin_out = !fin->fin_out; + /* + * Reset the short and bad flag here because in fr_matchsrcdst() + * the flags for the current packet (fin_flx) are compared against + * those for the existing session. + */ + ofin.fin_flx &= ~(FI_BAD|FI_SHORT); + + /* + * Put old values of ip_len and ip_off back as we don't know + * if we have to forward the packet (or process it again. + */ + oip->ip_len = savelen; + oip->ip_off = htons(oip->ip_off); + + switch (oip->ip_p) + { + case IPPROTO_ICMP : + /* + * an ICMP error can only be generated as a result of an + * ICMP query, not as the response on an ICMP error + * + * XXX theoretically ICMP_ECHOREP and the other reply's are + * ICMP query's as well, but adding them here seems strange XXX + */ + if ((ofin.fin_flx & FI_ICMPERR) != 0) + return NULL; + + /* + * perform a lookup of the ICMP packet in the state table + */ + icmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); + hv = (pr = oip->ip_p); + src.in4 = oip->ip_src; + hv += src.in4.s_addr; + dst.in4 = oip->ip_dst; + hv += dst.in4.s_addr; + hv += icmp->icmp_id; + hv = DOUBLE_HASH(hv); + + READ_ENTER(&ipf_state); + for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) { + isp = &is->is_hnext; + if ((is->is_p != pr) || (is->is_v != 4)) + continue; + if (is->is_pass & FR_NOICMPERR) + continue; + is = fr_matchsrcdst(&ofin, is, &src, &dst, + NULL, FI_ICMPCMP); + if (is != NULL) { + if ((is->is_pass & FR_NOICMPERR) != 0) { + RWLOCK_EXIT(&ipf_state); + return NULL; + } + /* + * i : the index of this packet (the icmp + * unreachable) + * oi : the index of the original packet found + * in the icmp header (i.e. the packet + * causing this icmp) + * backward : original packet was backward + * compared to the state + */ + backward = IP6_NEQ(&is->is_src, &src); + fin->fin_rev = !backward; + i = (!backward << 1) + fin->fin_out; + oi = (backward << 1) + ofin.fin_out; + if (is->is_icmppkts[i] > is->is_pkts[oi]) + continue; + ips_stats.iss_hits++; + is->is_icmppkts[i]++; + return is; + } + } + RWLOCK_EXIT(&ipf_state); + return NULL; + case IPPROTO_TCP : + case IPPROTO_UDP : + break; + default : + return NULL; + } + + tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); + dport = tcp->th_dport; + sport = tcp->th_sport; + + hv = (pr = oip->ip_p); + src.in4 = oip->ip_src; + hv += src.in4.s_addr; + dst.in4 = oip->ip_dst; + hv += dst.in4.s_addr; + hv += dport; + hv += sport; + hv = DOUBLE_HASH(hv); + + READ_ENTER(&ipf_state); + for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) { + isp = &is->is_hnext; + /* + * Only allow this icmp though if the + * encapsulated packet was allowed through the + * other way around. Note that the minimal amount + * of info present does not allow for checking against + * tcp internals such as seq and ack numbers. Only the + * ports are known to be present and can be even if the + * short flag is set. + */ + if ((is->is_p == pr) && (is->is_v == 4) && + (is = fr_matchsrcdst(&ofin, is, &src, &dst, + tcp, FI_ICMPCMP))) { + /* + * i : the index of this packet (the icmp unreachable) + * oi : the index of the original packet found in the + * icmp header (i.e. the packet causing this icmp) + * backward : original packet was backward compared to + * the state + */ + backward = IP6_NEQ(&is->is_src, &src); + fin->fin_rev = !backward; + i = (!backward << 1) + fin->fin_out; + oi = (backward << 1) + ofin.fin_out; + + if (((is->is_pass & FR_NOICMPERR) != 0) || + (is->is_icmppkts[i] > is->is_pkts[oi])) + break; + ips_stats.iss_hits++; + is->is_icmppkts[i]++; + /* + * we deliberately do not touch the timeouts + * for the accompanying state table entry. + * It remains to be seen if that is correct. XXX + */ + return is; + } + } + RWLOCK_EXIT(&ipf_state); + return NULL; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_ipsmove */ +/* Returns: Nil */ +/* Parameters: is(I) - pointer to state table entry */ +/* hv(I) - new hash value for state table entry */ +/* Write Locks: ipf_state */ +/* */ +/* Move a state entry from one position in the hash table to another. */ +/* ------------------------------------------------------------------------ */ +static void fr_ipsmove(is, hv) +ipstate_t *is; +u_int hv; +{ + ipstate_t **isp; + u_int hvm; + + ASSERT(rw_read_locked(&ipf_state.ipf_lk) == 0); + + hvm = is->is_hv; + /* + * Remove the hash from the old location... + */ + isp = is->is_phnext; + if (is->is_hnext) + is->is_hnext->is_phnext = isp; + *isp = is->is_hnext; + if (ips_table[hvm] == NULL) + ips_stats.iss_inuse--; + ips_stats.iss_bucketlen[hvm]--; + + /* + * ...and put the hash in the new one. + */ + hvm = DOUBLE_HASH(hv); + is->is_hv = hvm; + isp = &ips_table[hvm]; + if (*isp) + (*isp)->is_phnext = &is->is_hnext; + else + ips_stats.iss_inuse++; + ips_stats.iss_bucketlen[hvm]++; + is->is_phnext = isp; + is->is_hnext = *isp; + *isp = is; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_stlookup */ +/* Returns: ipstate_t* - NULL == no matching state found, */ +/* else pointer to state information is returned */ +/* Parameters: fin(I) - pointer to packet information */ +/* tcp(I) - pointer to TCP/UDP header. */ +/* */ +/* Search the state table for a matching entry to the packet described by */ +/* the contents of *fin. */ +/* */ +/* If we return NULL then no lock on ipf_state is held. */ +/* If we return non-null then a read-lock on ipf_state is held. */ +/* ------------------------------------------------------------------------ */ +ipstate_t *fr_stlookup(fin, tcp, ifqp) +fr_info_t *fin; +tcphdr_t *tcp; +ipftq_t **ifqp; +{ + u_int hv, hvm, pr, v, tryagain; + ipstate_t *is, **isp; + u_short dport, sport; + i6addr_t src, dst; + struct icmp *ic; + ipftq_t *ifq; + int oow; + + is = NULL; + ifq = NULL; + tcp = fin->fin_dp; + ic = (struct icmp *)tcp; + hv = (pr = fin->fin_fi.fi_p); + src = fin->fin_fi.fi_src; + dst = fin->fin_fi.fi_dst; + hv += src.in4.s_addr; + hv += dst.in4.s_addr; + + v = fin->fin_fi.fi_v; +#ifdef USE_INET6 + if (v == 6) { + hv += fin->fin_fi.fi_src.i6[1]; + hv += fin->fin_fi.fi_src.i6[2]; + hv += fin->fin_fi.fi_src.i6[3]; + + if ((fin->fin_p == IPPROTO_ICMPV6) && + IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_dst.in6)) { + hv -= dst.in4.s_addr; + } else { + hv += fin->fin_fi.fi_dst.i6[1]; + hv += fin->fin_fi.fi_dst.i6[2]; + hv += fin->fin_fi.fi_dst.i6[3]; + } + } +#endif + + /* + * Search the hash table for matching packet header info. + */ + switch (pr) + { +#ifdef USE_INET6 + case IPPROTO_ICMPV6 : + tryagain = 0; + if (v == 6) { + if ((ic->icmp_type == ICMP6_ECHO_REQUEST) || + (ic->icmp_type == ICMP6_ECHO_REPLY)) { + hv += ic->icmp_id; + } + } + READ_ENTER(&ipf_state); +icmp6again: + hvm = DOUBLE_HASH(hv); + for (isp = &ips_table[hvm]; ((is = *isp) != NULL); ) { + isp = &is->is_hnext; + if ((is->is_p != pr) || (is->is_v != v)) + continue; + is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); + if (is != NULL && + fr_matchicmpqueryreply(v, &is->is_icmp, + ic, fin->fin_rev)) { + if (fin->fin_rev) + ifq = &ips_icmpacktq; + else + ifq = &ips_icmptq; + break; + } + } + + if (is != NULL) { + if ((tryagain != 0) && !(is->is_flags & SI_W_DADDR)) { + hv += fin->fin_fi.fi_src.i6[0]; + hv += fin->fin_fi.fi_src.i6[1]; + hv += fin->fin_fi.fi_src.i6[2]; + hv += fin->fin_fi.fi_src.i6[3]; + fr_ipsmove(is, hv); + MUTEX_DOWNGRADE(&ipf_state); + } + break; + } + RWLOCK_EXIT(&ipf_state); + + /* + * No matching icmp state entry. Perhaps this is a + * response to another state entry. + * + * XXX With some ICMP6 packets, the "other" address is already + * in the packet, after the ICMP6 header, and this could be + * used in place of the multicast address. However, taking + * advantage of this requires some significant code changes + * to handle the specific types where that is the case. + */ + if ((ips_stats.iss_wild != 0) && (v == 6) && (tryagain == 0) && + !IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_src.in6)) { + hv -= fin->fin_fi.fi_src.i6[0]; + hv -= fin->fin_fi.fi_src.i6[1]; + hv -= fin->fin_fi.fi_src.i6[2]; + hv -= fin->fin_fi.fi_src.i6[3]; + tryagain = 1; + WRITE_ENTER(&ipf_state); + goto icmp6again; + } + + is = fr_checkicmp6matchingstate(fin); + if (is != NULL) + return is; + break; +#endif + + case IPPROTO_ICMP : + if (v == 4) { + hv += ic->icmp_id; + } + hv = DOUBLE_HASH(hv); + READ_ENTER(&ipf_state); + for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) { + isp = &is->is_hnext; + if ((is->is_p != pr) || (is->is_v != v)) + continue; + is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); + if (is != NULL && + fr_matchicmpqueryreply(v, &is->is_icmp, + ic, fin->fin_rev)) { + if (fin->fin_rev) + ifq = &ips_icmpacktq; + else + ifq = &ips_icmptq; + break; + } + } + if (is == NULL) { + RWLOCK_EXIT(&ipf_state); + } + break; + + case IPPROTO_TCP : + case IPPROTO_UDP : + ifqp = NULL; + sport = htons(fin->fin_data[0]); + hv += sport; + dport = htons(fin->fin_data[1]); + hv += dport; + oow = 0; + tryagain = 0; + READ_ENTER(&ipf_state); +retry_tcpudp: + hvm = DOUBLE_HASH(hv); + for (isp = &ips_table[hvm]; ((is = *isp) != NULL); ) { + isp = &is->is_hnext; + if ((is->is_p != pr) || (is->is_v != v)) + continue; + fin->fin_flx &= ~FI_OOW; + is = fr_matchsrcdst(fin, is, &src, &dst, tcp, FI_CMP); + if (is != NULL) { + if (pr == IPPROTO_TCP) { + if (!fr_tcpstate(fin, tcp, is)) { + oow |= fin->fin_flx & FI_OOW; + continue; + } + } + break; + } + } + if (is != NULL) { + if (tryagain && + !(is->is_flags & (SI_CLONE|SI_WILDP|SI_WILDA))) { + hv += dport; + hv += sport; + fr_ipsmove(is, hv); + MUTEX_DOWNGRADE(&ipf_state); + } + break; + } + RWLOCK_EXIT(&ipf_state); + + if (!tryagain && ips_stats.iss_wild) { + hv -= dport; + hv -= sport; + tryagain = 1; + WRITE_ENTER(&ipf_state); + goto retry_tcpudp; + } + fin->fin_flx |= oow; + break; + +#if 0 + case IPPROTO_GRE : + gre = fin->fin_dp; + if (GRE_REV(gre->gr_flags) == 1) { + hv += gre->gr_call; + } + /* FALLTHROUGH */ +#endif + default : + ifqp = NULL; + hvm = DOUBLE_HASH(hv); + READ_ENTER(&ipf_state); + for (isp = &ips_table[hvm]; ((is = *isp) != NULL); ) { + isp = &is->is_hnext; + if ((is->is_p != pr) || (is->is_v != v)) + continue; + is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); + if (is != NULL) { + ifq = &ips_iptq; + break; + } + } + if (is == NULL) { + RWLOCK_EXIT(&ipf_state); + } + break; + } + + if ((is != NULL) && ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) && + (is->is_tqehead[fin->fin_rev] != NULL)) + ifq = is->is_tqehead[fin->fin_rev]; + if (ifq != NULL && ifqp != NULL) + *ifqp = ifq; + return is; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_updatestate */ +/* Returns: Nil */ +/* Parameters: fin(I) - pointer to packet information */ +/* is(I) - pointer to state table entry */ +/* Read Locks: ipf_state */ +/* */ +/* Updates packet and byte counters for a newly received packet. Seeds the */ +/* fragment cache with a new entry as required. */ +/* ------------------------------------------------------------------------ */ +void fr_updatestate(fin, is, ifq) +fr_info_t *fin; +ipstate_t *is; +ipftq_t *ifq; +{ + ipftqent_t *tqe; + int i, pass; + + i = (fin->fin_rev << 1) + fin->fin_out; + + /* + * For TCP packets, ifq == NULL. For all others, check if this new + * queue is different to the last one it was on and move it if so. + */ + tqe = &is->is_sti; + MUTEX_ENTER(&is->is_lock); + if ((tqe->tqe_flags & TQE_RULEBASED) != 0) + ifq = is->is_tqehead[fin->fin_rev]; + + if (ifq != NULL) + fr_movequeue(tqe, tqe->tqe_ifq, ifq); + + is->is_pkts[i]++; + is->is_bytes[i] += fin->fin_plen; + MUTEX_EXIT(&is->is_lock); + +#ifdef IPFILTER_SYNC + if (is->is_flags & IS_STATESYNC) + ipfsync_update(SMC_STATE, fin, is->is_sync); +#endif + + ATOMIC_INCL(ips_stats.iss_hits); + + fin->fin_fr = is->is_rule; + + /* + * If this packet is a fragment and the rule says to track fragments, + * then create a new fragment cache entry. + */ + pass = is->is_pass; + if ((fin->fin_flx & FI_FRAG) && FR_ISPASS(pass)) + (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_checkstate */ +/* Returns: frentry_t* - NULL == search failed, */ +/* else pointer to rule for matching state */ +/* Parameters: ifp(I) - pointer to interface */ +/* passp(I) - pointer to filtering result flags */ +/* */ +/* Check if a packet is associated with an entry in the state table. */ +/* ------------------------------------------------------------------------ */ +frentry_t *fr_checkstate(fin, passp) +fr_info_t *fin; +u_32_t *passp; +{ + ipstate_t *is; + frentry_t *fr; + tcphdr_t *tcp; + ipftq_t *ifq; + u_int pass; + + if (fr_state_lock || (ips_list == NULL) || + (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) + return NULL; + + is = NULL; + if ((fin->fin_flx & FI_TCPUDP) || + (fin->fin_fi.fi_p == IPPROTO_ICMP) +#ifdef USE_INET6 + || (fin->fin_fi.fi_p == IPPROTO_ICMPV6) +#endif + ) + tcp = fin->fin_dp; + else + tcp = NULL; + + /* + * Search the hash table for matching packet header info. + */ + ifq = NULL; + is = fin->fin_state; + if (is == NULL) + is = fr_stlookup(fin, tcp, &ifq); + switch (fin->fin_p) + { +#ifdef USE_INET6 + case IPPROTO_ICMPV6 : + if (is != NULL) + break; + if (fin->fin_v == 6) { + is = fr_checkicmp6matchingstate(fin); + if (is != NULL) + goto matched; + } + break; +#endif + case IPPROTO_ICMP : + if (is != NULL) + break; + /* + * No matching icmp state entry. Perhaps this is a + * response to another state entry. + */ + is = fr_checkicmpmatchingstate(fin); + if (is != NULL) + goto matched; + break; + case IPPROTO_TCP : + if (is == NULL) + break; + + if (is->is_pass & FR_NEWISN) { + if (fin->fin_out == 0) + fr_fixinisn(fin, is); + else if (fin->fin_out == 1) + fr_fixoutisn(fin, is); + } + break; + default : + if (fin->fin_rev) + ifq = &ips_udpacktq; + else + ifq = &ips_udptq; + break; + } + if (is == NULL) { + ATOMIC_INCL(ips_stats.iss_miss); + return NULL; + } + +matched: + fr = is->is_rule; + if (fr != NULL) { + if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) { + if (fin->fin_nattag == NULL) + return NULL; + if (fr_matchtag(&fr->fr_nattag, fin->fin_nattag) != 0) + return NULL; + } + (void) strncpy(fin->fin_group, fr->fr_group, FR_GROUPLEN); + fin->fin_icode = fr->fr_icode; + } + + fin->fin_rule = is->is_rulen; + pass = is->is_pass; + fr_updatestate(fin, is, ifq); + if (fin->fin_out == 1) + fin->fin_nat = is->is_nat[fin->fin_rev]; + + fin->fin_state = is; + is->is_touched = fr_ticks; + MUTEX_ENTER(&is->is_lock); + is->is_ref++; + MUTEX_EXIT(&is->is_lock); + RWLOCK_EXIT(&ipf_state); + fin->fin_flx |= FI_STATE; + if ((pass & FR_LOGFIRST) != 0) + pass &= ~(FR_LOGFIRST|FR_LOG); + *passp = pass; + return fr; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_fixoutisn */ +/* Returns: Nil */ +/* Parameters: fin(I) - pointer to packet information */ +/* is(I) - pointer to master state structure */ +/* */ +/* Called only for outbound packets, adjusts the sequence number and the */ +/* TCP checksum to match that change. */ +/* ------------------------------------------------------------------------ */ +static void fr_fixoutisn(fin, is) +fr_info_t *fin; +ipstate_t *is; +{ + tcphdr_t *tcp; + int rev; + u_32_t seq; + + tcp = fin->fin_dp; + rev = fin->fin_rev; + if ((is->is_flags & IS_ISNSYN) != 0) { + if (rev == 0) { + seq = ntohl(tcp->th_seq); + seq += is->is_isninc[0]; + tcp->th_seq = htonl(seq); + fix_outcksum(fin, &tcp->th_sum, is->is_sumd[0]); + } + } + if ((is->is_flags & IS_ISNACK) != 0) { + if (rev == 1) { + seq = ntohl(tcp->th_seq); + seq += is->is_isninc[1]; + tcp->th_seq = htonl(seq); + fix_outcksum(fin, &tcp->th_sum, is->is_sumd[1]); + } + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_fixinisn */ +/* Returns: Nil */ +/* Parameters: fin(I) - pointer to packet information */ +/* is(I) - pointer to master state structure */ +/* */ +/* Called only for inbound packets, adjusts the acknowledge number and the */ +/* TCP checksum to match that change. */ +/* ------------------------------------------------------------------------ */ +static void fr_fixinisn(fin, is) +fr_info_t *fin; +ipstate_t *is; +{ + tcphdr_t *tcp; + int rev; + u_32_t ack; + + tcp = fin->fin_dp; + rev = fin->fin_rev; + if ((is->is_flags & IS_ISNSYN) != 0) { + if (rev == 1) { + ack = ntohl(tcp->th_ack); + ack -= is->is_isninc[0]; + tcp->th_ack = htonl(ack); + fix_incksum(fin, &tcp->th_sum, is->is_sumd[0]); + } + } + if ((is->is_flags & IS_ISNACK) != 0) { + if (rev == 0) { + ack = ntohl(tcp->th_ack); + ack -= is->is_isninc[1]; + tcp->th_ack = htonl(ack); + fix_incksum(fin, &tcp->th_sum, is->is_sumd[1]); + } + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_statesync */ +/* Returns: Nil */ +/* Parameters: ifp(I) - pointer to interface */ +/* */ +/* Walk through all state entries and if an interface pointer match is */ +/* found then look it up again, based on its name in case the pointer has */ +/* changed since last time. */ +/* */ +/* If ifp is passed in as being non-null then we are only doing updates for */ +/* existing, matching, uses of it. */ +/* ------------------------------------------------------------------------ */ +void fr_statesync(ifp) +void *ifp; +{ + ipstate_t *is; + int i; + + if (fr_running <= 0) + return; + + WRITE_ENTER(&ipf_state); + + if (fr_running <= 0) { + RWLOCK_EXIT(&ipf_state); + return; + } + + for (is = ips_list; is; is = is->is_next) { + /* + * Look up all the interface names in the state entry. + */ + for (i = 0; i < 4; i++) { + if (ifp == NULL || ifp == is->is_ifp[i]) + is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], + is->is_v); + } + } + RWLOCK_EXIT(&ipf_state); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_delstate */ +/* Returns: Nil */ +/* Parameters: is(I) - pointer to state structure to delete */ +/* why(I) - if not 0, log reason why it was deleted */ +/* Write Locks: ipf_state */ +/* */ +/* Deletes a state entry from the enumerated list as well as the hash table */ +/* and timeout queue lists. Make adjustments to hash table statistics and */ +/* global counters as required. */ +/* ------------------------------------------------------------------------ */ +static void fr_delstate(is, why) +ipstate_t *is; +int why; +{ + + ASSERT(rw_read_locked(&ipf_state.ipf_lk) == 0); + + /* + * Since we want to delete this, remove it from the state table, + * where it can be found & used, first. + */ + if (is->is_pnext != NULL) { + *is->is_pnext = is->is_next; + + if (is->is_next != NULL) + is->is_next->is_pnext = is->is_pnext; + + is->is_pnext = NULL; + is->is_next = NULL; + } + + if (is->is_phnext != NULL) { + *is->is_phnext = is->is_hnext; + if (is->is_hnext != NULL) + is->is_hnext->is_phnext = is->is_phnext; + if (ips_table[is->is_hv] == NULL) + ips_stats.iss_inuse--; + ips_stats.iss_bucketlen[is->is_hv]--; + + is->is_phnext = NULL; + is->is_hnext = NULL; + } + + /* + * Because ips_stats.iss_wild is a count of entries in the state + * table that have wildcard flags set, only decerement it once + * and do it here. + */ + if (is->is_flags & (SI_WILDP|SI_WILDA)) { + if (!(is->is_flags & SI_CLONED)) { + ATOMIC_DECL(ips_stats.iss_wild); + } + is->is_flags &= ~(SI_WILDP|SI_WILDA); + } + + /* + * Next, remove it from the timeout queue it is in. + */ + fr_deletequeueentry(&is->is_sti); + + is->is_me = NULL; + + /* + * If it is still in use by something else, do not go any further, + * but note that at this point it is now an orphan. + */ + is->is_ref--; + if (is->is_ref > 0) + return; + + if (is->is_tqehead[0] != NULL) { + if (fr_deletetimeoutqueue(is->is_tqehead[0]) == 0) + fr_freetimeoutqueue(is->is_tqehead[0]); + } + if (is->is_tqehead[1] != NULL) { + if (fr_deletetimeoutqueue(is->is_tqehead[1]) == 0) + fr_freetimeoutqueue(is->is_tqehead[1]); + } + +#ifdef IPFILTER_SYNC + if (is->is_sync) + ipfsync_del(is->is_sync); +#endif +#ifdef IPFILTER_SCAN + (void) ipsc_detachis(is); +#endif + + if (ipstate_logging != 0 && why != 0) + ipstate_log(is, why); + + if (is->is_rule != NULL) { + is->is_rule->fr_statecnt--; + (void)fr_derefrule(&is->is_rule); + } + + MUTEX_DESTROY(&is->is_lock); + KFREE(is); + ips_num--; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_timeoutstate */ +/* Returns: Nil */ +/* Parameters: Nil */ +/* */ +/* Slowly expire held state for thingslike UDP and ICMP. The algorithm */ +/* used here is to keep the queue sorted with the oldest things at the top */ +/* and the youngest at the bottom. So if the top one doesn't need to be */ +/* expired then neither will any under it. */ +/* ------------------------------------------------------------------------ */ +void fr_timeoutstate() +{ + ipftq_t *ifq, *ifqnext; + ipftqent_t *tqe, *tqn; + ipstate_t *is; + SPL_INT(s); + + SPL_NET(s); + WRITE_ENTER(&ipf_state); + for (ifq = ips_tqtqb; ifq != NULL; ifq = ifq->ifq_next) + for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { + if (tqe->tqe_die > fr_ticks) + break; + tqn = tqe->tqe_next; + is = tqe->tqe_parent; + fr_delstate(is, ISL_EXPIRE); + } + + for (ifq = ips_utqe; ifq != NULL; ifq = ifqnext) { + ifqnext = ifq->ifq_next; + + for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { + if (tqe->tqe_die > fr_ticks) + break; + tqn = tqe->tqe_next; + is = tqe->tqe_parent; + fr_delstate(is, ISL_EXPIRE); + } + } + + for (ifq = ips_utqe; ifq != NULL; ifq = ifqnext) { + ifqnext = ifq->ifq_next; + + if (((ifq->ifq_flags & IFQF_DELETE) != 0) && + (ifq->ifq_ref == 0)) { + fr_freetimeoutqueue(ifq); + } + } + + if (fr_state_doflush) { + (void) fr_state_flush(2, 0); + fr_state_doflush = 0; + } + + RWLOCK_EXIT(&ipf_state); + SPL_X(s); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_state_flush */ +/* Returns: int - 0 == success, -1 == failure */ +/* Parameters: Nil */ +/* Write Locks: ipf_state */ +/* */ +/* Flush state tables. Three actions currently defined: */ +/* which == 0 : flush all state table entries */ +/* which == 1 : flush TCP connections which have started to close but are */ +/* stuck for some reason. */ +/* which == 2 : flush TCP connections which have been idle for a long time, */ +/* starting at > 4 days idle and working back in successive half-*/ +/* days to at most 12 hours old. If this fails to free enough */ +/* slots then work backwards in half hour slots to 30 minutes. */ +/* If that too fails, then work backwards in 30 second intervals */ +/* for the last 30 minutes to at worst 30 seconds idle. */ +/* ------------------------------------------------------------------------ */ +static int fr_state_flush(which, proto) +int which, proto; +{ + ipftq_t *ifq, *ifqnext; + ipftqent_t *tqe, *tqn; + ipstate_t *is, **isp; + int delete, removed; + long try, maxtick; + u_long interval; + SPL_INT(s); + + removed = 0; + + SPL_NET(s); + for (isp = &ips_list; ((is = *isp) != NULL); ) { + delete = 0; + + if ((proto != 0) && (is->is_v != proto)) { + isp = &is->is_next; + continue; + } + + switch (which) + { + case 0 : + delete = 1; + break; + case 1 : + case 2 : + if (is->is_p != IPPROTO_TCP) + break; + if ((is->is_state[0] != IPF_TCPS_ESTABLISHED) || + (is->is_state[1] != IPF_TCPS_ESTABLISHED)) + delete = 1; + break; + } + + if (delete) { + if (is->is_p == IPPROTO_TCP) + ips_stats.iss_fin++; + else + ips_stats.iss_expire++; + fr_delstate(is, ISL_FLUSH); + removed++; + } else + isp = &is->is_next; + } + + if (which != 2) { + SPL_X(s); + return removed; + } + + /* + * Asked to remove inactive entries because the table is full, try + * again, 3 times, if first attempt failed with a different criteria + * each time. The order tried in must be in decreasing age. + * Another alternative is to implement random drop and drop N entries + * at random until N have been freed up. + */ + if (fr_ticks - ips_last_force_flush < IPF_TTLVAL(5)) + goto force_flush_skipped; + ips_last_force_flush = fr_ticks; + + if (fr_ticks > IPF_TTLVAL(43200)) + interval = IPF_TTLVAL(43200); + else if (fr_ticks > IPF_TTLVAL(1800)) + interval = IPF_TTLVAL(1800); + else if (fr_ticks > IPF_TTLVAL(30)) + interval = IPF_TTLVAL(30); + else + interval = IPF_TTLVAL(10); + try = fr_ticks - (fr_ticks - interval); + if (try < 0) + goto force_flush_skipped; + + while (removed == 0) { + maxtick = fr_ticks - interval; + if (maxtick < 0) + break; + + while (try < maxtick) { + for (ifq = ips_tqtqb; ifq != NULL; + ifq = ifq->ifq_next) { + for (tqn = ifq->ifq_head; + ((tqe = tqn) != NULL); ) { + if (tqe->tqe_die > try) + break; + tqn = tqe->tqe_next; + is = tqe->tqe_parent; + fr_delstate(is, ISL_EXPIRE); + removed++; + } + } + + for (ifq = ips_utqe; ifq != NULL; ifq = ifqnext) { + ifqnext = ifq->ifq_next; + + for (tqn = ifq->ifq_head; + ((tqe = tqn) != NULL); ) { + if (tqe->tqe_die > try) + break; + tqn = tqe->tqe_next; + is = tqe->tqe_parent; + fr_delstate(is, ISL_EXPIRE); + removed++; + } + } + if (try + interval > maxtick) + break; + try += interval; + } + + if (removed == 0) { + if (interval == IPF_TTLVAL(43200)) { + interval = IPF_TTLVAL(1800); + } else if (interval == IPF_TTLVAL(1800)) { + interval = IPF_TTLVAL(30); + } else if (interval == IPF_TTLVAL(30)) { + interval = IPF_TTLVAL(10); + } else { + break; + } + } + } +force_flush_skipped: + SPL_X(s); + return removed; +} + + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_tcp_age */ +/* Returns: int - 1 == state transition made, 0 == no change (rejected) */ +/* Parameters: tq(I) - pointer to timeout queue information */ +/* fin(I) - pointer to packet information */ +/* tqtab(I) - TCP timeout queue table this is in */ +/* flags(I) - flags from state/NAT entry */ +/* */ +/* Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29: */ +/* */ +/* - (try to) base state transitions on real evidence only, */ +/* i.e. packets that are sent and have been received by ipfilter; */ +/* diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used. */ +/* */ +/* - deal with half-closed connections correctly; */ +/* */ +/* - store the state of the source in state[0] such that ipfstat */ +/* displays the state as source/dest instead of dest/source; the calls */ +/* to fr_tcp_age have been changed accordingly. */ +/* */ +/* Internal Parameters: */ +/* */ +/* state[0] = state of source (host that initiated connection) */ +/* state[1] = state of dest (host that accepted the connection) */ +/* */ +/* dir == 0 : a packet from source to dest */ +/* dir == 1 : a packet from dest to source */ +/* */ +/* Locking: it is assumed that the parent of the tqe structure is locked. */ +/* ------------------------------------------------------------------------ */ +int fr_tcp_age(tqe, fin, tqtab, flags) +ipftqent_t *tqe; +fr_info_t *fin; +ipftq_t *tqtab; +int flags; +{ + int dlen, ostate, nstate, rval, dir; + u_char tcpflags; + tcphdr_t *tcp; + + tcp = fin->fin_dp; + + rval = 0; + dir = fin->fin_rev; + tcpflags = tcp->th_flags; + dlen = fin->fin_dlen - (TCP_OFF(tcp) << 2); + + if (tcpflags & TH_RST) { + if (!(tcpflags & TH_PUSH) && !dlen) + nstate = IPF_TCPS_CLOSED; + else + nstate = IPF_TCPS_CLOSE_WAIT; + rval = 1; + } else { + ostate = tqe->tqe_state[1 - dir]; + nstate = tqe->tqe_state[dir]; + + switch (nstate) + { + case IPF_TCPS_CLOSED: /* 0 */ + if ((tcpflags & TH_OPENING) == TH_OPENING) { + /* + * 'dir' received an S and sends SA in + * response, CLOSED -> SYN_RECEIVED + */ + nstate = IPF_TCPS_SYN_RECEIVED; + rval = 1; + } else if ((tcpflags & TH_OPENING) == TH_SYN) { + /* 'dir' sent S, CLOSED -> SYN_SENT */ + nstate = IPF_TCPS_SYN_SENT; + rval = 1; + } + /* + * the next piece of code makes it possible to get + * already established connections into the state table + * after a restart or reload of the filter rules; this + * does not work when a strict 'flags S keep state' is + * used for tcp connections of course + */ + if (((flags & IS_TCPFSM) == 0) && + ((tcpflags & TH_ACKMASK) == TH_ACK)) { + /* + * we saw an A, guess 'dir' is in ESTABLISHED + * mode + */ + switch (ostate) + { + case IPF_TCPS_CLOSED : + case IPF_TCPS_SYN_RECEIVED : + nstate = IPF_TCPS_HALF_ESTAB; + rval = 1; + break; + case IPF_TCPS_HALF_ESTAB : + case IPF_TCPS_ESTABLISHED : + nstate = IPF_TCPS_ESTABLISHED; + rval = 1; + break; + default : + break; + } + } + /* + * TODO: besides regular ACK packets we can have other + * packets as well; it is yet to be determined how we + * should initialize the states in those cases + */ + break; + + case IPF_TCPS_LISTEN: /* 1 */ + /* NOT USED */ + break; + + case IPF_TCPS_SYN_SENT: /* 2 */ + if ((tcpflags & ~(TH_ECN|TH_CWR)) == TH_SYN) { + /* + * A retransmitted SYN packet. We do not reset + * the timeout here to fr_tcptimeout because a + * connection connect timeout does not renew + * after every packet that is sent. We need to + * set rval so as to indicate the packet has + * passed the check for its flags being valid + * in the TCP FSM. Setting rval to 2 has the + * result of not resetting the timeout. + */ + rval = 2; + } else if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == + TH_ACK) { + /* + * we see an A from 'dir' which is in SYN_SENT + * state: 'dir' sent an A in response to an SA + * which it received, SYN_SENT -> ESTABLISHED + */ + nstate = IPF_TCPS_ESTABLISHED; + rval = 1; + } else if (tcpflags & TH_FIN) { + /* + * we see an F from 'dir' which is in SYN_SENT + * state and wants to close its side of the + * connection; SYN_SENT -> FIN_WAIT_1 + */ + nstate = IPF_TCPS_FIN_WAIT_1; + rval = 1; + } else if ((tcpflags & TH_OPENING) == TH_OPENING) { + /* + * we see an SA from 'dir' which is already in + * SYN_SENT state, this means we have a + * simultaneous open; SYN_SENT -> SYN_RECEIVED + */ + nstate = IPF_TCPS_SYN_RECEIVED; + rval = 1; + } + break; + + case IPF_TCPS_SYN_RECEIVED: /* 3 */ + if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) { + /* + * we see an A from 'dir' which was in + * SYN_RECEIVED state so it must now be in + * established state, SYN_RECEIVED -> + * ESTABLISHED + */ + nstate = IPF_TCPS_ESTABLISHED; + rval = 1; + } else if ((tcpflags & ~(TH_ECN|TH_CWR)) == + TH_OPENING) { + /* + * We see an SA from 'dir' which is already in + * SYN_RECEIVED state. + */ + rval = 2; + } else if (tcpflags & TH_FIN) { + /* + * we see an F from 'dir' which is in + * SYN_RECEIVED state and wants to close its + * side of the connection; SYN_RECEIVED -> + * FIN_WAIT_1 + */ + nstate = IPF_TCPS_FIN_WAIT_1; + rval = 1; + } + break; + + case IPF_TCPS_HALF_ESTAB: /* 4 */ + if (ostate >= IPF_TCPS_HALF_ESTAB) { + if ((tcpflags & TH_ACKMASK) == TH_ACK) { + nstate = IPF_TCPS_ESTABLISHED; + rval = 1; + } + } + + break; + + case IPF_TCPS_ESTABLISHED: /* 5 */ + rval = 1; + if (tcpflags & TH_FIN) { + /* + * 'dir' closed its side of the connection; + * this gives us a half-closed connection; + * ESTABLISHED -> FIN_WAIT_1 + */ + nstate = IPF_TCPS_FIN_WAIT_1; + } else if (tcpflags & TH_ACK) { + /* + * an ACK, should we exclude other flags here? + */ + if (ostate == IPF_TCPS_FIN_WAIT_1) { + /* + * We know the other side did an active + * close, so we are ACKing the recvd + * FIN packet (does the window matching + * code guarantee this?) and go into + * CLOSE_WAIT state; this gives us a + * half-closed connection + */ + nstate = IPF_TCPS_CLOSE_WAIT; + } else if (ostate < IPF_TCPS_CLOSE_WAIT) { + /* + * still a fully established + * connection reset timeout + */ + nstate = IPF_TCPS_ESTABLISHED; + } + } + break; + + case IPF_TCPS_CLOSE_WAIT: /* 6 */ + rval = 1; + if (tcpflags & TH_FIN) { + /* + * application closed and 'dir' sent a FIN, + * we're now going into LAST_ACK state + */ + nstate = IPF_TCPS_LAST_ACK; + } else { + /* + * we remain in CLOSE_WAIT because the other + * side has closed already and we did not + * close our side yet; reset timeout + */ + nstate = IPF_TCPS_CLOSE_WAIT; + } + break; + + case IPF_TCPS_FIN_WAIT_1: /* 7 */ + rval = 1; + if ((tcpflags & TH_ACK) && + ostate > IPF_TCPS_CLOSE_WAIT) { + /* + * if the other side is not active anymore + * it has sent us a FIN packet that we are + * ack'ing now with an ACK; this means both + * sides have now closed the connection and + * we go into TIME_WAIT + */ + /* + * XXX: how do we know we really are ACKing + * the FIN packet here? does the window code + * guarantee that? + */ + nstate = IPF_TCPS_TIME_WAIT; + } else { + /* + * we closed our side of the connection + * already but the other side is still active + * (ESTABLISHED/CLOSE_WAIT); continue with + * this half-closed connection + */ + nstate = IPF_TCPS_FIN_WAIT_1; + } + break; + + case IPF_TCPS_CLOSING: /* 8 */ + /* NOT USED */ + break; + + case IPF_TCPS_LAST_ACK: /* 9 */ + if (tcpflags & TH_ACK) { + if ((tcpflags & TH_PUSH) || dlen) + /* + * there is still data to be delivered, + * reset timeout + */ + rval = 1; + else + rval = 2; + } + /* + * we cannot detect when we go out of LAST_ACK state to + * CLOSED because that is based on the reception of ACK + * packets; ipfilter can only detect that a packet + * has been sent by a host + */ + break; + + case IPF_TCPS_FIN_WAIT_2: /* 10 */ + rval = 1; + if ((tcpflags & TH_OPENING) == TH_OPENING) + nstate = IPF_TCPS_SYN_RECEIVED; + else if (tcpflags & TH_SYN) + nstate = IPF_TCPS_SYN_SENT; + break; + + case IPF_TCPS_TIME_WAIT: /* 11 */ + /* we're in 2MSL timeout now */ + rval = 1; + break; + + default : +#if defined(_KERNEL) +# if SOLARIS + cmn_err(CE_NOTE, + "tcp %lx flags %x si %lx nstate %d ostate %d\n", + (u_long)tcp, tcpflags, (u_long)tqe, + nstate, ostate); +# else + printf("tcp %lx flags %x si %lx nstate %d ostate %d\n", + (u_long)tcp, tcpflags, (u_long)tqe, + nstate, ostate); +# endif +#else + abort(); +#endif + break; + } + } + + /* + * If rval == 2 then do not update the queue position, but treat the + * packet as being ok. + */ + if (rval == 2) + rval = 1; + else if (rval == 1) { + tqe->tqe_state[dir] = nstate; + if ((tqe->tqe_flags & TQE_RULEBASED) == 0) + fr_movequeue(tqe, tqe->tqe_ifq, tqtab + nstate); + } + + return rval; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: ipstate_log */ +/* Returns: Nil */ +/* Parameters: is(I) - pointer to state structure */ +/* type(I) - type of log entry to create */ +/* */ +/* Creates a state table log entry using the state structure and type info. */ +/* passed in. Log packet/byte counts, source/destination address and other */ +/* protocol specific information. */ +/* ------------------------------------------------------------------------ */ +void ipstate_log(is, type) +struct ipstate *is; +u_int type; +{ +#ifdef IPFILTER_LOG + struct ipslog ipsl; + size_t sizes[1]; + void *items[1]; + int types[1]; + + /* + * Copy information out of the ipstate_t structure and into the + * structure used for logging. + */ + ipsl.isl_type = type; + ipsl.isl_pkts[0] = is->is_pkts[0] + is->is_icmppkts[0]; + ipsl.isl_bytes[0] = is->is_bytes[0]; + ipsl.isl_pkts[1] = is->is_pkts[1] + is->is_icmppkts[1]; + ipsl.isl_bytes[1] = is->is_bytes[1]; + ipsl.isl_pkts[2] = is->is_pkts[2] + is->is_icmppkts[2]; + ipsl.isl_bytes[2] = is->is_bytes[2]; + ipsl.isl_pkts[3] = is->is_pkts[3] + is->is_icmppkts[3]; + ipsl.isl_bytes[3] = is->is_bytes[3]; + ipsl.isl_src = is->is_src; + ipsl.isl_dst = is->is_dst; + ipsl.isl_p = is->is_p; + ipsl.isl_v = is->is_v; + ipsl.isl_flags = is->is_flags; + ipsl.isl_tag = is->is_tag; + ipsl.isl_rulen = is->is_rulen; + (void) strncpy(ipsl.isl_group, is->is_group, FR_GROUPLEN); + + if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) { + ipsl.isl_sport = is->is_sport; + ipsl.isl_dport = is->is_dport; + if (ipsl.isl_p == IPPROTO_TCP) { + ipsl.isl_state[0] = is->is_state[0]; + ipsl.isl_state[1] = is->is_state[1]; + } + } else if (ipsl.isl_p == IPPROTO_ICMP) { + ipsl.isl_itype = is->is_icmp.ici_type; + } else if (ipsl.isl_p == IPPROTO_ICMPV6) { + ipsl.isl_itype = is->is_icmp.ici_type; + } else { + ipsl.isl_ps.isl_filler[0] = 0; + ipsl.isl_ps.isl_filler[1] = 0; + } + + items[0] = &ipsl; + sizes[0] = sizeof(ipsl); + types[0] = 0; + + if (ipllog(IPL_LOGSTATE, NULL, items, sizes, types, 1)) { + ATOMIC_INCL(ips_stats.iss_logged); + } else { + ATOMIC_INCL(ips_stats.iss_logfail); + } +#endif +} + + +#ifdef USE_INET6 +/* ------------------------------------------------------------------------ */ +/* Function: fr_checkicmp6matchingstate */ +/* Returns: ipstate_t* - NULL == no match found, */ +/* else pointer to matching state entry */ +/* Parameters: fin(I) - pointer to packet information */ +/* Locks: NULL == no locks, else Read Lock on ipf_state */ +/* */ +/* If we've got an ICMPv6 error message, using the information stored in */ +/* the ICMPv6 packet, look for a matching state table entry. */ +/* ------------------------------------------------------------------------ */ +static ipstate_t *fr_checkicmp6matchingstate(fin) +fr_info_t *fin; +{ + struct icmp6_hdr *ic6, *oic; + int backward, i; + ipstate_t *is, **isp; + u_short sport, dport; + i6addr_t dst, src; + u_short savelen; + icmpinfo_t *ic; + fr_info_t ofin; + tcphdr_t *tcp; + ip6_t *oip6; + u_char pr; + u_int hv; + + /* + * Does it at least have the return (basic) IP header ? + * Is it an actual recognised ICMP error type? + * Only a basic IP header (no options) should be with + * an ICMP error header. + */ + if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN) || + !(fin->fin_flx & FI_ICMPERR)) + return NULL; + + ic6 = fin->fin_dp; + + oip6 = (ip6_t *)((char *)ic6 + ICMPERR_ICMPHLEN); + if (fin->fin_plen < sizeof(*oip6)) + return NULL; + + bcopy((char *)fin, (char *)&ofin, sizeof(fin)); + ofin.fin_v = 6; + ofin.fin_ifp = fin->fin_ifp; + ofin.fin_out = !fin->fin_out; + ofin.fin_m = NULL; /* if dereferenced, panic XXX */ + ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ + + /* + * We make a fin entry to be able to feed it to + * matchsrcdst. Note that not all fields are necessary + * but this is the cleanest way. Note further we fill + * in fin_mp such that if someone uses it we'll get + * a kernel panic. fr_matchsrcdst does not use this. + * + * watch out here, as ip is in host order and oip6 in network + * order. Any change we make must be undone afterwards. + */ + savelen = oip6->ip6_plen; + oip6->ip6_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; + ofin.fin_flx = FI_NOCKSUM; + ofin.fin_ip = (ip_t *)oip6; + ofin.fin_plen = oip6->ip6_plen; + (void) fr_makefrip(sizeof(*oip6), (ip_t *)oip6, &ofin); + ofin.fin_flx &= ~(FI_BAD|FI_SHORT); + oip6->ip6_plen = savelen; + + if (oip6->ip6_nxt == IPPROTO_ICMPV6) { + oic = (struct icmp6_hdr *)(oip6 + 1); + /* + * an ICMP error can only be generated as a result of an + * ICMP query, not as the response on an ICMP error + * + * XXX theoretically ICMP_ECHOREP and the other reply's are + * ICMP query's as well, but adding them here seems strange XXX + */ + if (!(oic->icmp6_type & ICMP6_INFOMSG_MASK)) + return NULL; + + /* + * perform a lookup of the ICMP packet in the state table + */ + hv = (pr = oip6->ip6_nxt); + src.in6 = oip6->ip6_src; + hv += src.in4.s_addr; + dst.in6 = oip6->ip6_dst; + hv += dst.in4.s_addr; + hv += oic->icmp6_id; + hv += oic->icmp6_seq; + hv = DOUBLE_HASH(hv); + + READ_ENTER(&ipf_state); + for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) { + ic = &is->is_icmp; + isp = &is->is_hnext; + if ((is->is_p == pr) && + !(is->is_pass & FR_NOICMPERR) && + (oic->icmp6_id == ic->ici_id) && + (oic->icmp6_seq == ic->ici_seq) && + (is = fr_matchsrcdst(&ofin, is, &src, + &dst, NULL, FI_ICMPCMP))) { + /* + * in the state table ICMP query's are stored + * with the type of the corresponding ICMP + * response. Correct here + */ + if (((ic->ici_type == ICMP6_ECHO_REPLY) && + (oic->icmp6_type == ICMP6_ECHO_REQUEST)) || + (ic->ici_type - 1 == oic->icmp6_type )) { + ips_stats.iss_hits++; + backward = IP6_NEQ(&is->is_dst, &src); + fin->fin_rev = !backward; + i = (backward << 1) + fin->fin_out; + is->is_icmppkts[i]++; + return is; + } + } + } + RWLOCK_EXIT(&ipf_state); + return NULL; + } + + hv = (pr = oip6->ip6_nxt); + src.in6 = oip6->ip6_src; + hv += src.i6[0]; + hv += src.i6[1]; + hv += src.i6[2]; + hv += src.i6[3]; + dst.in6 = oip6->ip6_dst; + hv += dst.i6[0]; + hv += dst.i6[1]; + hv += dst.i6[2]; + hv += dst.i6[3]; + + if ((oip6->ip6_nxt == IPPROTO_TCP) || (oip6->ip6_nxt == IPPROTO_UDP)) { + tcp = (tcphdr_t *)(oip6 + 1); + dport = tcp->th_dport; + sport = tcp->th_sport; + hv += dport; + hv += sport; + } else + tcp = NULL; + hv = DOUBLE_HASH(hv); + + READ_ENTER(&ipf_state); + for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) { + isp = &is->is_hnext; + /* + * Only allow this icmp though if the + * encapsulated packet was allowed through the + * other way around. Note that the minimal amount + * of info present does not allow for checking against + * tcp internals such as seq and ack numbers. + */ + if ((is->is_p != pr) || (is->is_v != 6) || + (is->is_pass & FR_NOICMPERR)) + continue; + is = fr_matchsrcdst(&ofin, is, &src, &dst, tcp, FI_ICMPCMP); + if (is != NULL) { + ips_stats.iss_hits++; + backward = IP6_NEQ(&is->is_dst, &src); + fin->fin_rev = !backward; + i = (backward << 1) + fin->fin_out; + is->is_icmppkts[i]++; + /* + * we deliberately do not touch the timeouts + * for the accompanying state table entry. + * It remains to be seen if that is correct. XXX + */ + return is; + } + } + RWLOCK_EXIT(&ipf_state); + return NULL; +} +#endif + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_sttab_init */ +/* Returns: Nil */ +/* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ +/* */ +/* Initialise the array of timeout queues for TCP. */ +/* ------------------------------------------------------------------------ */ +void fr_sttab_init(tqp) +ipftq_t *tqp; +{ + int i; + + for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) { + tqp[i].ifq_ttl = 0; + tqp[i].ifq_ref = 1; + tqp[i].ifq_head = NULL; + tqp[i].ifq_tail = &tqp[i].ifq_head; + tqp[i].ifq_next = tqp + i + 1; + MUTEX_INIT(&tqp[i].ifq_lock, "ipftq tcp tab"); + } + tqp[IPF_TCP_NSTATES - 1].ifq_next = NULL; + tqp[IPF_TCPS_CLOSED].ifq_ttl = fr_tcpclosed; + tqp[IPF_TCPS_LISTEN].ifq_ttl = fr_tcptimeout; + tqp[IPF_TCPS_SYN_SENT].ifq_ttl = fr_tcptimeout; + tqp[IPF_TCPS_SYN_RECEIVED].ifq_ttl = fr_tcptimeout; + tqp[IPF_TCPS_ESTABLISHED].ifq_ttl = fr_tcpidletimeout; + tqp[IPF_TCPS_CLOSE_WAIT].ifq_ttl = fr_tcphalfclosed; + tqp[IPF_TCPS_FIN_WAIT_1].ifq_ttl = fr_tcphalfclosed; + tqp[IPF_TCPS_CLOSING].ifq_ttl = fr_tcptimeout; + tqp[IPF_TCPS_LAST_ACK].ifq_ttl = fr_tcplastack; + tqp[IPF_TCPS_FIN_WAIT_2].ifq_ttl = fr_tcpclosewait; + tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = fr_tcptimeout; + tqp[IPF_TCPS_HALF_ESTAB].ifq_ttl = fr_tcptimeout; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_sttab_destroy */ +/* Returns: Nil */ +/* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ +/* */ +/* Do whatever is necessary to "destroy" each of the entries in the array */ +/* of timeout queues for TCP. */ +/* ------------------------------------------------------------------------ */ +void fr_sttab_destroy(tqp) +ipftq_t *tqp; +{ + int i; + + for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) + MUTEX_DESTROY(&tqp[i].ifq_lock); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_statederef */ +/* Returns: Nil */ +/* Parameters: isp(I) - pointer to pointer to state table entry */ +/* */ +/* Decrement the reference counter for this state table entry and free it */ +/* if there are no more things using it. */ +/* */ +/* When operating in userland (ipftest), we have no timers to clear a state */ +/* entry. Therefore, we make a few simple tests before deleting an entry */ +/* outright. We compare states on each side looking for a combination of */ +/* TIME_WAIT (should really be FIN_WAIT_2?) and LAST_ACK. Then we factor */ +/* in packet direction with the interface list to make sure we don't */ +/* prematurely delete an entry on a final inbound packet that's we're also */ +/* supposed to route elsewhere. */ +/* */ +/* Internal parameters: */ +/* state[0] = state of source (host that initiated connection) */ +/* state[1] = state of dest (host that accepted the connection) */ +/* */ +/* dir == 0 : a packet from source to dest */ +/* dir == 1 : a packet from dest to source */ +/* ------------------------------------------------------------------------ */ +void fr_statederef(fin, isp) +fr_info_t *fin; +ipstate_t **isp; +{ + ipstate_t *is = *isp; +#if 0 + int nstate, ostate, dir, eol; + + eol = 0; /* End-of-the-line flag. */ + dir = fin->fin_rev; + ostate = is->is_state[1 - dir]; + nstate = is->is_state[dir]; + /* + * Determine whether this packet is local or routed. State entries + * with us as the destination will have an interface list of + * int1,-,-,int1. Entries with us as the origin run as -,int1,int1,-. + */ + if ((fin->fin_p == IPPROTO_TCP) && (fin->fin_out == 0)) { + if ((strcmp(is->is_ifname[0], is->is_ifname[3]) == 0) && + (strcmp(is->is_ifname[1], is->is_ifname[2]) == 0)) { + if ((dir == 0) && + (strcmp(is->is_ifname[1], "-") == 0) && + (strcmp(is->is_ifname[0], "-") != 0)) { + eol = 1; + } else if ((dir == 1) && + (strcmp(is->is_ifname[0], "-") == 0) && + (strcmp(is->is_ifname[1], "-") != 0)) { + eol = 1; + } + } + } +#endif + + fin = fin; /* LINT */ + is = *isp; + *isp = NULL; + WRITE_ENTER(&ipf_state); + is->is_ref--; + if (is->is_ref == 0) { + is->is_ref++; /* To counter ref-- in fr_delstate() */ + fr_delstate(is, ISL_EXPIRE); +#ifndef _KERNEL +#if 0 + } else if (((fin->fin_out == 1) || (eol == 1)) && + ((ostate == IPF_TCPS_LAST_ACK) && + (nstate == IPF_TCPS_TIME_WAIT))) { + ; +#else + } else if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) || + (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) { +#endif + fr_delstate(is, ISL_ORPHAN); +#endif + } + RWLOCK_EXIT(&ipf_state); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_setstatequeue */ +/* Returns: Nil */ +/* Parameters: is(I) - pointer to state structure */ +/* rev(I) - forward(0) or reverse(1) direction */ +/* Locks: ipf_state (read or write) */ +/* */ +/* Put the state entry on its default queue entry, using rev as a helped in */ +/* determining which queue it should be placed on. */ +/* ------------------------------------------------------------------------ */ +void fr_setstatequeue(is, rev) +ipstate_t *is; +int rev; +{ + ipftq_t *oifq, *nifq; + + + if ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) + nifq = is->is_tqehead[rev]; + else + nifq = NULL; + + if (nifq == NULL) { + switch (is->is_p) + { +#ifdef USE_INET6 + case IPPROTO_ICMPV6 : + if (rev == 1) + nifq = &ips_icmpacktq; + else + nifq = &ips_icmptq; + break; +#endif + case IPPROTO_ICMP : + if (rev == 1) + nifq = &ips_icmpacktq; + else + nifq = &ips_icmptq; + break; + case IPPROTO_TCP : + nifq = ips_tqtqb + is->is_state[rev]; + break; + + case IPPROTO_UDP : + if (rev == 1) + nifq = &ips_udpacktq; + else + nifq = &ips_udptq; + break; + + default : + nifq = &ips_iptq; + break; + } + } + + oifq = is->is_sti.tqe_ifq; + /* + * If it's currently on a timeout queue, move it from one queue to + * another, else put it on the end of the newly determined queue. + */ + if (oifq != NULL) + fr_movequeue(&is->is_sti, oifq, nifq); + else + fr_queueappend(&is->is_sti, nifq, is); + return; +} diff --git a/usr/src/uts/common/inet/ipf/ipf.h b/usr/src/uts/common/inet/ipf/ipf.h new file mode 100644 index 0000000000..8ae3352883 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/ipf.h @@ -0,0 +1,315 @@ +/* + * Copyright (C) 1993-2001, 2003 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * @(#)ipf.h 1.12 6/5/96 + * $Id: ipf.h,v 2.71.2.7 2005/06/12 07:18:31 darrenr Exp $ + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef __IPF_H__ +#define __IPF_H__ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#if defined(__osf__) +# define radix_mask ipf_radix_mask +# define radix_node ipf_radix_node +# define radix_node_head ipf_radix_node_head +#endif + +#include <sys/param.h> +#include <sys/types.h> +#include <sys/file.h> +/* + * This is a workaround for <sys/uio.h> troubles on FreeBSD, HPUX, OpenBSD. + * Needed here because on some systems <sys/uio.h> gets included by things + * like <sys/socket.h> + */ +#ifndef _KERNEL +# define ADD_KERNEL +# define _KERNEL +# define KERNEL +#endif +#ifdef __OpenBSD__ +struct file; +#endif +#include <sys/uio.h> +#ifdef ADD_KERNEL +# undef _KERNEL +# undef KERNEL +#endif +#include <sys/time.h> +#include <sys/socket.h> +#include <net/if.h> +#if __FreeBSD_version >= 300000 +# include <net/if_var.h> +#endif +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/ip_icmp.h> +#ifndef TCP_PAWS_IDLE /* IRIX */ +# include <netinet/tcp.h> +#endif +#include <netinet/udp.h> + +#include <arpa/inet.h> + +#include <errno.h> +#include <limits.h> +#include <netdb.h> +#include <stdlib.h> +#include <stddef.h> +#include <stdio.h> +#if !defined(__SVR4) && !defined(__svr4__) && defined(sun) +# include <strings.h> +#endif +#include <string.h> +#include <unistd.h> + +#include "netinet/ip_compat.h" +#include "netinet/ip_fil.h" +#include "netinet/ip_nat.h" +#include "netinet/ip_frag.h" +#include "netinet/ip_state.h" +#include "netinet/ip_proxy.h" +#include "netinet/ip_auth.h" +#include "netinet/ip_lookup.h" +#include "netinet/ip_pool.h" +#ifdef IPFILTER_SCAN +#include "netinet/ip_scan.h" +#endif +#include "netinet/ip_htable.h" +#ifdef IPFILTER_SYNC +#include "netinet/ip_sync.h" +#endif + +#include "opts.h" + +#ifndef __P +# ifdef __STDC__ +# define __P(x) x +# else +# define __P(x) () +# endif +#endif +#ifndef __STDC__ +# undef const +# define const +#endif + +#ifndef U_32_T +# define U_32_T 1 +# if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) || \ + defined(__sgi) +typedef u_int32_t u_32_t; +# else +# if defined(__alpha__) || defined(__alpha) || defined(_LP64) +typedef unsigned int u_32_t; +# else +# if SOLARIS2 >= 6 +typedef uint32_t u_32_t; +# else +typedef unsigned int u_32_t; +# endif +# endif +# endif /* __NetBSD__ || __OpenBSD__ || __FreeBSD__ || __sgi */ +#endif /* U_32_T */ + +#ifndef MAXHOSTNAMELEN +# define MAXHOSTNAMELEN 256 +#endif + +#define MAX_ICMPCODE 16 +#define MAX_ICMPTYPE 19 + + +struct ipopt_names { + int on_value; + int on_bit; + int on_siz; + char *on_name; +}; + + +typedef struct alist_s { + struct alist_s *al_next; + int al_not; + sa_family_t al_family; + i6addr_t al_i6addr; + i6addr_t al_i6mask; +} alist_t; + +#define al_addr al_i6addr.in4_addr +#define al_mask al_i6mask.in4_addr +#define al_1 al_addr +#define al_2 al_mask + + +typedef struct { + u_short fb_c; + u_char fb_t; + u_char fb_f; + u_32_t fb_k; +} fakebpf_t; + + +#if defined(__NetBSD__) || defined(__OpenBSD__) || \ + (_BSDI_VERSION >= 199701) || (__FreeBSD_version >= 300000) || \ + SOLARIS || defined(__sgi) || defined(__osf__) || defined(linux) +# include <stdarg.h> +typedef int (* ioctlfunc_t) __P((int, ioctlcmd_t, ...)); +#else +typedef int (* ioctlfunc_t) __P((dev_t, ioctlcmd_t, void *)); +#endif +typedef void (* addfunc_t) __P((int, ioctlfunc_t, void *)); +typedef int (* copyfunc_t) __P((void *, void *, size_t)); + + +/* + * SunOS4 + */ +#if defined(sun) && !defined(__SVR4) && !defined(__svr4__) +extern int ioctl __P((int, int, void *)); +#endif + +extern char thishost[]; +extern char flagset[]; +extern u_char flags[]; +extern struct ipopt_names ionames[]; +extern struct ipopt_names secclass[]; +extern char *icmpcodes[MAX_ICMPCODE + 1]; +extern char *icmptypes[MAX_ICMPTYPE + 1]; +extern int use_inet6; +extern int lineNum; +extern struct ipopt_names v6ionames[]; + + +extern int addicmp __P((char ***, struct frentry *, int)); +extern int addipopt __P((char *, struct ipopt_names *, int, char *)); +extern int addkeep __P((char ***, struct frentry *, int)); +extern int bcopywrap __P((void *, void *, size_t)); +extern void binprint __P((void *, size_t)); +extern void initparse __P((void)); +extern u_32_t buildopts __P((char *, char *, int)); +extern int checkrev __P((char *)); +extern int count6bits __P((u_32_t *)); +extern int count4bits __P((u_32_t)); +extern int extras __P((char ***, struct frentry *, int)); +extern char *fac_toname __P((int)); +extern int fac_findname __P((char *)); +extern void fill6bits __P((int, u_int *)); +extern int gethost __P((char *, u_32_t *)); +extern int getport __P((struct frentry *, char *, u_short *)); +extern int getportproto __P((char *, int)); +extern int getproto __P((char *)); +extern char *getline __P((char *, size_t, FILE *, int *)); +extern int genmask __P((char *, u_32_t *)); +extern char *getnattype __P((struct ipnat *)); +extern char *getsumd __P((u_32_t)); +extern u_32_t getoptbyname __P((char *)); +extern u_32_t getoptbyvalue __P((int)); +extern u_32_t getv6optbyname __P((char *)); +extern u_32_t getv6optbyvalue __P((int)); +extern void hexdump __P((FILE *, void *, int, int)); +extern int hostmask __P((char ***, char *, char *, u_32_t *, u_32_t *, int)); +extern int hostnum __P((u_32_t *, char *, int, char *)); +extern int icmpcode __P((char *)); +extern int icmpidnum __P((char *, u_short *, int)); +extern void initparse __P((void)); +extern void ipf_dotuning __P((int, char *, ioctlfunc_t)); +extern void ipf_addrule __P((int, ioctlfunc_t, void *)); +extern int ipf_parsefile __P((int, addfunc_t, ioctlfunc_t *, char *)); +extern int ipf_parsesome __P((int, addfunc_t, ioctlfunc_t *, FILE *)); +extern int ipmon_parsefile __P((char *)); +extern int ipmon_parsesome __P((FILE *)); +extern void ipnat_addrule __P((int, ioctlfunc_t, void *)); +extern int ipnat_parsefile __P((int, addfunc_t, ioctlfunc_t, char *)); +extern int ipnat_parsesome __P((int, addfunc_t, ioctlfunc_t, FILE *)); +extern int ippool_parsefile __P((int, char *, ioctlfunc_t)); +extern int ippool_parsesome __P((int, FILE *, ioctlfunc_t)); +extern int kmemcpywrap __P((void *, void *, size_t)); +extern char *kvatoname __P((ipfunc_t, ioctlfunc_t)); +extern int load_hash __P((struct iphtable_s *, struct iphtent_s *, + ioctlfunc_t)); +extern int load_hashnode __P((int, char *, struct iphtent_s *, ioctlfunc_t)); +extern int load_pool __P((struct ip_pool_s *list, ioctlfunc_t)); +extern int load_poolnode __P((int, char *, ip_pool_node_t *, ioctlfunc_t)); +extern int loglevel __P((char **, u_int *, int)); +extern alist_t *make_range __P((int, struct in_addr, struct in_addr)); +extern ipfunc_t nametokva __P((char *, ioctlfunc_t)); +extern ipnat_t *natparse __P((char *, int)); +extern void natparsefile __P((int, char *, int)); +extern void nat_setgroupmap __P((struct ipnat *)); +extern int ntomask __P((int, int, u_32_t *)); +extern u_32_t optname __P((char ***, u_short *, int)); +extern struct frentry *parse __P((char *, int)); +extern char *portname __P((int, int)); +extern int portnum __P((char *, char *, u_short *, int)); +extern int ports __P((char ***, char *, u_short *, int *, u_short *, int)); +extern int pri_findname __P((char *)); +extern char *pri_toname __P((int)); +extern void print_toif __P((char *, struct frdest *)); +extern void printaps __P((ap_session_t *, int)); +extern void printbuf __P((char *, int, int)); +extern void printfr __P((struct frentry *, ioctlfunc_t)); +extern void printtunable __P((ipftune_t *)); +extern struct iphtable_s *printhash __P((struct iphtable_s *, copyfunc_t, + char *, int)); +extern struct iphtent_s *printhashnode __P((struct iphtable_s *, + struct iphtent_s *, + copyfunc_t, int)); +extern void printhostmask __P((int, u_32_t *, u_32_t *)); +extern void printip __P((u_32_t *)); +extern void printlog __P((struct frentry *)); +extern void printlookup __P((i6addr_t *addr, i6addr_t *mask)); +extern void printmask __P((int, u_32_t *)); +extern void printpacket __P((struct ip *)); +extern void printpacket6 __P((struct ip *)); +extern struct ip_pool_s *printpool __P((struct ip_pool_s *, copyfunc_t, + char *, int)); +extern struct ip_pool_node *printpoolnode __P((struct ip_pool_node *, int)); +extern void printproto __P((struct protoent *, int, struct ipnat *)); +extern void printportcmp __P((int, struct frpcmp *)); +extern void optprint __P((u_short *, u_long, u_long)); +#ifdef USE_INET6 +extern void optprintv6 __P((u_short *, u_long, u_long)); +#endif +extern int ratoi __P((char *, int *, int, int)); +extern int ratoui __P((char *, u_int *, u_int, u_int)); +extern int remove_hash __P((struct iphtable_s *, ioctlfunc_t)); +extern int remove_hashnode __P((int, char *, struct iphtent_s *, ioctlfunc_t)); +extern int remove_pool __P((ip_pool_t *, ioctlfunc_t)); +extern int remove_poolnode __P((int, char *, ip_pool_node_t *, ioctlfunc_t)); +extern u_char tcp_flags __P((char *, u_char *, int)); +extern u_char tcpflags __P((char *)); +extern int to_interface __P((struct frdest *, char *, int)); +extern void printc __P((struct frentry *)); +extern void printC __P((int)); +extern void emit __P((int, int, void *, struct frentry *)); +extern u_char secbit __P((int)); +extern u_char seclevel __P((char *)); +extern void printfraginfo __P((char *, struct ipfr *)); +extern void printifname __P((char *, char *, void *)); +extern char *hostname __P((int, void *)); +extern struct ipstate *printstate __P((struct ipstate *, int, u_long)); +extern void printsbuf __P((char *)); +extern void printnat __P((struct ipnat *, int)); +extern void printactivenat __P((struct nat *, int)); +extern void printhostmap __P((struct hostmap *, u_int)); +extern void printpacket __P((struct ip *)); + +extern void set_variable __P((char *, char *)); +extern char *get_variable __P((char *, char **, int)); +extern void resetlexer __P((void)); + +#if SOLARIS +extern int gethostname __P((char *, int )); +extern void sync __P((void)); +#endif + +#endif /* __IPF_H__ */ diff --git a/usr/src/uts/common/inet/ipf/ipmon.h b/usr/src/uts/common/inet/ipf/ipmon.h new file mode 100644 index 0000000000..765a646954 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/ipmon.h @@ -0,0 +1,94 @@ +/* + * Copyright (C) 1993-2001 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * @(#)ip_fil.h 1.35 6/5/96 + * $Id: ipmon.h,v 2.8 2003/07/25 22:16:20 darrenr Exp $ + */ + + +typedef struct ipmon_action { + struct ipmon_action *ac_next; + int ac_mflag; /* collection of things to compare */ + int ac_dflag; /* flags to compliment the doing fields */ + int ac_syslog; /* = 1 to syslog rules. */ + char *ac_savefile; /* filename to save log records to */ + FILE *ac_savefp; + int ac_direction; + char ac_group[FR_GROUPLEN]; + char ac_nattag[16]; + u_32_t ac_logtag; + int ac_type; /* nat/state/ipf */ + int ac_proto; + int ac_rule; + int ac_packet; + int ac_second; + int ac_result; + u_32_t ac_sip; + u_32_t ac_smsk; + u_32_t ac_dip; + u_32_t ac_dmsk; + u_short ac_sport; + u_short ac_dport; + char *ac_exec; /* execute argument */ + char *ac_run; /* actual command that gets run */ + char *ac_iface; + /* + * used with ac_packet/ac_second + */ + struct timeval ac_last; + int ac_pktcnt; +} ipmon_action_t; + +#define ac_lastsec ac_last.tv_sec +#define ac_lastusec ac_last.tv_usec + +/* + * Flags indicating what fields to do matching upon (ac_mflag). + */ +#define IPMAC_DIRECTION 0x0001 +#define IPMAC_DSTIP 0x0002 +#define IPMAC_DSTPORT 0x0004 +#define IPMAC_EVERY 0x0008 +#define IPMAC_GROUP 0x0010 +#define IPMAC_INTERFACE 0x0020 +#define IPMAC_LOGTAG 0x0040 +#define IPMAC_NATTAG 0x0080 +#define IPMAC_PROTOCOL 0x0100 +#define IPMAC_RESULT 0x0200 +#define IPMAC_RULE 0x0400 +#define IPMAC_SRCIP 0x0800 +#define IPMAC_SRCPORT 0x1000 +#define IPMAC_TYPE 0x2000 +#define IPMAC_WITH 0x4000 + +#define IPMR_BLOCK 1 +#define IPMR_PASS 2 +#define IPMR_NOMATCH 3 +#define IPMR_LOG 4 + +#define IPMDO_SAVERAW 0x0001 + +#define OPT_SYSLOG 0x001 +#define OPT_RESOLVE 0x002 +#define OPT_HEXBODY 0x004 +#define OPT_VERBOSE 0x008 +#define OPT_HEXHDR 0x010 +#define OPT_TAIL 0x020 +#define OPT_NAT 0x080 +#define OPT_STATE 0x100 +#define OPT_FILTER 0x200 +#define OPT_PORTNUM 0x400 +#define OPT_LOGALL (OPT_NAT|OPT_STATE|OPT_FILTER) + +#define HOSTNAME_V4(a,b) hostname((a), 4, (u_32_t *)&(b)) + +#ifndef LOGFAC +#define LOGFAC LOG_LOCAL0 +#endif + +extern int load_config __P((char *)); +extern void dumphex __P((FILE *, int, char *, int)); +extern int check_action __P((char *, char *, int, int)); +extern char *getword __P((int)); diff --git a/usr/src/uts/common/inet/ipf/ipt.h b/usr/src/uts/common/inet/ipf/ipt.h new file mode 100644 index 0000000000..938e40041e --- /dev/null +++ b/usr/src/uts/common/inet/ipf/ipt.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 1993-2001 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * $Id: ipt.h,v 2.6 2003/02/16 02:33:09 darrenr Exp $ + */ + +#ifndef __IPT_H__ +#define __IPT_H__ + +#ifndef __P +# define P_DEF +# ifdef __STDC__ +# define __P(x) x +# else +# define __P(x) () +# endif +#endif + +#include <fcntl.h> + + +struct ipread { + int (*r_open) __P((char *)); + int (*r_close) __P((void)); + int (*r_readip) __P((char *, int, char **, int *)); + int r_flags; +}; + +#define R_DO_CKSUM 0x01 + +extern void debug __P((char *, ...)); +extern void verbose __P((char *, ...)); + +#ifdef P_DEF +# undef __P +# undef P_DEF +#endif + +#endif /* __IPT_H__ */ diff --git a/usr/src/uts/common/inet/ipf/netinet/Makefile b/usr/src/uts/common/inet/ipf/netinet/Makefile new file mode 100644 index 0000000000..e64d612f4c --- /dev/null +++ b/usr/src/uts/common/inet/ipf/netinet/Makefile @@ -0,0 +1,32 @@ +# +#ident "%Z%%M% %I% %E% SMI" +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# uts/common/inet/ipf/netinet/Makefile +# +# include global definitions +include ../../../../../Makefile.master + +HDRS= ipl.h ip_compat.h ip_fil.h ip_icmp.h ip_nat.h ip_proxy.h ip_state.h + +ROOTDIRS= $(ROOT)/usr/include/netinet + +ROOTHDRS= $(HDRS:%=$(ROOT)/usr/include/netinet/%) + +CHECKHDRS= $(HDRS:%.h=%.check) + +$(ROOTDIRS)/%: % + $(INS.file) + +.KEEP_STATE: + +.PARALLEL: $(CHECKHDRS) + +install_h: $(ROOTDIRS) $(ROOTHDRS) + +$(ROOTDIRS): + $(INS.dir) + +check: $(CHECKHDRS) diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_auth.h b/usr/src/uts/common/inet/ipf/netinet/ip_auth.h new file mode 100644 index 0000000000..3892778270 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/netinet/ip_auth.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 1997-2001 by Darren Reed & Guido Van Rooij. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * $Id: ip_auth.h,v 2.16 2003/07/25 12:29:56 darrenr Exp $ + * + */ +#ifndef __IP_AUTH_H__ +#define __IP_AUTH_H__ + +#define FR_NUMAUTH 32 + +typedef struct frauth { + int fra_age; + int fra_len; + int fra_index; + u_32_t fra_pass; + fr_info_t fra_info; + char *fra_buf; +#ifdef MENTAT + queue_t *fra_q; +#endif +} frauth_t; + +typedef struct frauthent { + struct frentry fae_fr; + struct frauthent *fae_next; + u_long fae_age; +} frauthent_t; + +typedef struct fr_authstat { + U_QUAD_T fas_hits; + U_QUAD_T fas_miss; + u_long fas_nospace; + u_long fas_added; + u_long fas_sendfail; + u_long fas_sendok; + u_long fas_queok; + u_long fas_quefail; + u_long fas_expire; + frauthent_t *fas_faelist; +} fr_authstat_t; + + +extern frentry_t *ipauth; +extern struct fr_authstat fr_authstats; +extern int fr_defaultauthage; +extern int fr_authstart; +extern int fr_authend; +extern int fr_authsize; +extern int fr_authused; +extern int fr_auth_lock; +extern frentry_t *fr_checkauth __P((fr_info_t *, u_32_t *)); +extern void fr_authexpire __P((void)); +extern int fr_authinit __P((void)); +extern void fr_authunload __P((void)); +extern int fr_authflush __P((void)); +extern mb_t **fr_authpkts; +extern int fr_newauth __P((mb_t *, fr_info_t *)); +extern int fr_preauthcmd __P((ioctlcmd_t, frentry_t *, frentry_t **)); +extern int fr_auth_ioctl __P((caddr_t, ioctlcmd_t, int)); + +#endif /* __IP_AUTH_H__ */ diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_compat.h b/usr/src/uts/common/inet/ipf/netinet/ip_compat.h new file mode 100644 index 0000000000..43368d9937 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/netinet/ip_compat.h @@ -0,0 +1,2423 @@ +/* + * Copyright (C) 1993-2001, 2003 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * @(#)ip_compat.h 1.8 1/14/96 + * $Id: ip_compat.h,v 2.142.2.30 2005/08/11 15:13:49 darrenr Exp $ + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + + +#ifndef __IP_COMPAT_H__ +#define __IP_COMPAT_H__ + +#ifndef __P +# ifdef __STDC__ +# define __P(x) x +# else +# define __P(x) () +# endif +#endif +#ifndef __STDC__ +# undef const +# define const +#endif + +#if defined(_KERNEL) || defined(KERNEL) || defined(__KERNEL__) +# undef KERNEL +# undef _KERNEL +# undef __KERNEL__ +# define KERNEL +# define _KERNEL +# define __KERNEL__ +#endif + +#ifndef SOLARIS +#define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4))) +#endif +#if SOLARIS2 >= 8 +# ifndef USE_INET6 +# define USE_INET6 +# endif +#endif +#if defined(__FreeBSD_version) && (__FreeBSD_version >= 400000) && \ + !defined(_KERNEL) && !defined(USE_INET6) && !defined(NOINET6) +# define USE_INET6 +#endif +#if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 105000000) && \ + !defined(_KERNEL) && !defined(USE_INET6) +# define USE_INET6 +# define IPFILTER_M_IPFILTER +#endif +#if defined(OpenBSD) && (OpenBSD >= 200206) && \ + !defined(_KERNEL) && !defined(USE_INET6) +# define USE_INET6 +#endif +#if defined(__osf__) +# define USE_INET6 +#endif +#if defined(linux) && (!defined(_KERNEL) || defined(CONFIG_IPV6)) +# define USE_INET6 +#endif +#if defined(HPUXREV) && (HPUXREV >= 1111) +# define USE_INET6 +#endif + +#if defined(BSD) && (BSD < 199103) && defined(__osf__) +# undef BSD +# define BSD 199103 +#endif + +#if defined(__SVR4) || defined(__svr4__) || defined(__sgi) +# define index strchr +# if !defined(_KERNEL) +# define bzero(a,b) memset(a,0,b) +# define bcmp memcmp +# define bcopy(a,b,c) memmove(b,a,c) +# endif +#endif + +#ifndef LIFNAMSIZ +# ifdef IF_NAMESIZE +# define LIFNAMSIZ IF_NAMESIZE +# else +# ifdef IFNAMSIZ +# define LIFNAMSIZ IFNAMSIZ +# else +# define LIFNAMSIZ 16 +# endif +# endif +#endif + +#if defined(__sgi) || defined(bsdi) || defined(__hpux) || defined(hpux) +struct ether_addr { + u_char ether_addr_octet[6]; +}; +#endif + +#if defined(__sgi) && !defined(IPFILTER_LKM) +# ifdef __STDC__ +# define IPL_EXTERN(ep) ipfilter##ep +# else +# define IPL_EXTERN(ep) ipfilter/**/ep +# endif +#else +# ifdef __STDC__ +# define IPL_EXTERN(ep) ipl##ep +# else +# define IPL_EXTERN(ep) ipl/**/ep +# endif +#endif + +/* + * This is a workaround for <sys/uio.h> troubles on FreeBSD and OpenBSD. + */ +#ifndef linux +# ifndef _KERNEL +# define ADD_KERNEL +# define _KERNEL +# define KERNEL +# endif +# ifdef __OpenBSD__ +struct file; +# endif +# include <sys/uio.h> +# ifdef ADD_KERNEL +# undef _KERNEL +# undef KERNEL +# endif +#endif + + +/* ----------------------------------------------------------------------- */ +/* S O L A R I S */ +/* ----------------------------------------------------------------------- */ +#if SOLARIS +# define MENTAT 1 +# include <sys/cmn_err.h> +# include <sys/isa_defs.h> +# include <sys/stream.h> +# include <sys/ioccom.h> +# include <sys/sysmacros.h> +# include <sys/kmem.h> +# if SOLARIS2 >= 10 +# include <sys/procset.h> +# include <sys/proc.h> +# include <sys/devops.h> +# include <sys/ddi_impldefs.h> +# endif +/* + * because Solaris 2 defines these in two places :-/ + */ +# ifndef KERNEL +# define _KERNEL +# undef RES_INIT +# endif /* _KERNEL */ + +# if SOLARIS2 >= 8 +# include <netinet/ip6.h> +# include <netinet/icmp6.h> +# endif + +# include <inet/common.h> +/* These 5 are defined in <inet/ip.h> and <netinet/ip.h> */ +# undef IPOPT_EOL +# undef IPOPT_NOP +# undef IPOPT_LSRR +# undef IPOPT_RR +# undef IPOPT_SSRR +# ifdef i386 +# define _SYS_PROMIF_H +# endif +# include <inet/ip.h> +# undef COPYOUT +# include <inet/ip_ire.h> +# ifndef KERNEL +# undef _KERNEL +# endif +# if SOLARIS2 >= 8 +# define SNPRINTF snprintf + +# include <inet/ip_if.h> +# define ipif_local_addr ipif_lcl_addr +/* Only defined in private include file */ +# ifndef V4_PART_OF_V6 +# define V4_PART_OF_V6(v6) v6.s6_addr32[3] +# endif +struct ip6_ext { + u_char ip6e_nxt; + u_char ip6e_len; +}; +# endif /* SOLARIS2 >= 8 */ + +# if SOLARIS2 >= 6 +# include <sys/atomic.h> +typedef uint32_t u_32_t; +# else +typedef unsigned int u_32_t; +# endif +# define U_32_T 1 + +# ifdef _KERNEL +# define KRWLOCK_T krwlock_t +# define KMUTEX_T kmutex_t +# include "qif.h" +# include "pfil.h" +# if SOLARIS2 >= 6 +# if SOLARIS2 == 6 +# define ATOMIC_INCL(x) atomic_add_long((uint32_t*)&(x), 1) +# define ATOMIC_DECL(x) atomic_add_long((uint32_t*)&(x), -1) +# else +# define ATOMIC_INCL(x) atomic_add_long(&(x), 1) +# define ATOMIC_DECL(x) atomic_add_long(&(x), -1) +# endif /* SOLARIS2 == 6 */ +# define ATOMIC_INC64(x) atomic_add_64((uint64_t*)&(x), 1) +# define ATOMIC_INC32(x) atomic_add_32((uint32_t*)&(x), 1) +# define ATOMIC_INC16(x) atomic_add_16((uint16_t*)&(x), 1) +# define ATOMIC_DEC64(x) atomic_add_64((uint64_t*)&(x), -1) +# define ATOMIC_DEC32(x) atomic_add_32((uint32_t*)&(x), -1) +# define ATOMIC_DEC16(x) atomic_add_16((uint16_t*)&(x), -1) +# else +# define ATOMIC_INC(x) { mutex_enter(&ipf_rw); (x)++; \ + mutex_exit(&ipf_rw); } +# define ATOMIC_DEC(x) { mutex_enter(&ipf_rw); (x)--; \ + mutex_exit(&ipf_rw); } +# endif /* SOLARIS2 >= 6 */ +# define USE_MUTEXES +# define MUTEX_ENTER(x) mutex_enter(&(x)->ipf_lk) +# define READ_ENTER(x) rw_enter(&(x)->ipf_lk, RW_READER) +# define WRITE_ENTER(x) rw_enter(&(x)->ipf_lk, RW_WRITER) +# define MUTEX_DOWNGRADE(x) rw_downgrade(&(x)->ipf_lk) +# define RWLOCK_INIT(x, y) rw_init(&(x)->ipf_lk, (y), \ + RW_DRIVER, NULL) +# define RWLOCK_EXIT(x) rw_exit(&(x)->ipf_lk) +# define RW_DESTROY(x) rw_destroy(&(x)->ipf_lk) +# define MUTEX_INIT(x, y) mutex_init(&(x)->ipf_lk, (y), \ + MUTEX_DRIVER, NULL) +# define MUTEX_DESTROY(x) mutex_destroy(&(x)->ipf_lk) +# define MUTEX_NUKE(x) bzero((x), sizeof(*(x))) +# define MUTEX_EXIT(x) mutex_exit(&(x)->ipf_lk) +# define COPYIN(a,b,c) copyin((caddr_t)(a), (caddr_t)(b), (c)) +# define COPYOUT(a,b,c) copyout((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYIN(a,b,c) (void) copyin((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYOUT(a,b,c) (void) copyout((caddr_t)(a), (caddr_t)(b), (c)) +# define UIOMOVE(a,b,c,d) uiomove((caddr_t)a,b,c,d) +# define KFREE(x) kmem_free((char *)(x), sizeof(*(x))) +# define KFREES(x,s) kmem_free((char *)(x), (s)) +# define SPL_NET(x) ; +# define SPL_IMP(x) ; +# undef SPL_X +# define SPL_X(x) ; +# ifdef sparc +# define ntohs(x) (x) +# define ntohl(x) (x) +# define htons(x) (x) +# define htonl(x) (x) +# endif /* sparc */ +# define KMALLOC(a,b) (a) = (b)kmem_alloc(sizeof(*(a)), KM_NOSLEEP) +# define KMALLOCS(a,b,c) (a) = (b)kmem_alloc((c), KM_NOSLEEP) +# define GET_MINOR(x) getminor(x) +extern void *get_unit __P((char *, int)); +# define GETIFP(n, v) get_unit(n, v) +# define IFNAME(x) ((qif_t *)x)->qf_name +# define COPYIFNAME(x, b) \ + (void) strncpy(b, ((qif_t *)x)->qf_name, \ + LIFNAMSIZ) +#ifdef IRE_ILL_CN +extern kmutex_t s_ill_g_head_lock; +extern struct s_ill_s *s_ill_g_head; /* ILL List Head */ +#endif /* IRE_ILL_CN */ +# define GETKTIME(x) uniqtime((struct timeval *)x) +# define MSGDSIZE(x) msgdsize(x) +# define M_LEN(x) ((x)->b_wptr - (x)->b_rptr) +# define M_DUPLICATE(x) dupmsg((x)) +# define MTOD(m,t) ((t)((m)->b_rptr)) +# define MTYPE(m) ((m)->b_datap->db_type) +# define FREE_MB_T(m) freemsg(m) +# define m_next b_cont +# define CACHE_HASH(x) (((qpktinfo_t *)(x)->fin_qpi)->qpi_num & 7) +# define IPF_PANIC(x,y) if (x) { printf y; cmn_err(CE_PANIC, "ipf_panic"); } +typedef mblk_t mb_t; +# endif /* _KERNEL */ + +# if (SOLARIS2 >= 7) +# ifdef lint +# define ALIGN32(ptr) (ptr ? 0L : 0L) +# define ALIGN16(ptr) (ptr ? 0L : 0L) +# else +# define ALIGN32(ptr) (ptr) +# define ALIGN16(ptr) (ptr) +# endif +# endif + +# if SOLARIS2 < 6 +typedef struct uio uio_t; +# endif +typedef int ioctlcmd_t; + +# define OS_RECOGNISED 1 + +#endif /* SOLARIS */ + +/* ----------------------------------------------------------------------- */ +/* H P U X */ +/* ----------------------------------------------------------------------- */ +#ifdef __hpux +# define MENTAT 1 +# include <sys/sysmacros.h> +# include <sys/spinlock.h> +# include <sys/lock.h> +# include <sys/stream.h> +# ifdef USE_INET6 +# include <netinet/if_ether.h> +# include <netinet/ip6.h> +# include <netinet/icmp6.h> +typedef struct ip6_hdr ip6_t; +# endif + +# ifdef _KERNEL +# define SNPRINTF sprintf +# if (HPUXREV >= 1111) +# define IPL_SELECT +# ifdef IPL_SELECT +# include <machine/sys/user.h> +# include <sys/kthread_iface.h> +# define READ_COLLISION 0x01 + +typedef struct iplog_select_s { + kthread_t *read_waiter; + int state; +} iplog_select_t; +# endif +# endif + +# define GETKTIME(x) uniqtime((struct timeval *)x) + +# if HPUXREV == 1111 +# include "kern_svcs.h" +# else +# include <sys/kern_svcs.h> +# endif +# undef ti_flags +# undef TCP_NODELAY +# undef TCP_MAXSEG +# include <sys/reg.h> +# include "../netinet/ip_info.h" +/* + * According to /usr/include/sys/spinlock.h on HP-UX 11.00, these functions + * are available. Attempting to use them actually results in unresolved + * symbols when it comes time to load the module. + * This has been fixed! Yipee! + */ +# if 1 +# ifdef __LP64__ +# define ATOMIC_INCL(x) lock_and_incr_int64(&ipf_rw.ipf_lk, &(x), 1) +# define ATOMIC_DECL(x) lock_and_incr_int64(&ipf_rw.ipf_lk, &(x), -1) +# else +# define ATOMIC_INCL(x) lock_and_incr_int32(&ipf_rw.ipf_lk, &(x), 1) +# define ATOMIC_DECL(x) lock_and_incr_int32(&ipf_rw.ipf_lk, &(x), -1) +# endif +# define ATOMIC_INC64(x) lock_and_incr_int64(&ipf_rw.ipf_lk, &(x), 1) +# define ATOMIC_INC32(x) lock_and_incr_int32(&ipf_rw.ipf_lk, &(x), 1) +# define ATOMIC_INC16(x) lock_and_incr_int16(&ipf_rw.ipf_lk, &(x), 1) +# define ATOMIC_DEC64(x) lock_and_incr_int64(&ipf_rw.ipf_lk, &(x), -1) +# define ATOMIC_DEC32(x) lock_and_incr_int32(&ipf_rw.ipf_lk, &(x), -1) +# define ATOMIC_DEC16(x) lock_and_incr_int16(&ipf_rw.ipf_lk, &(x), -1) +# else /* 0 */ +# define ATOMIC_INC64(x) { MUTEX_ENTER(&ipf_rw); (x)++; \ + MUTEX_EXIT(&ipf_rw); } +# define ATOMIC_DEC64(x) { MUTEX_ENTER(&ipf_rw); (x)--; \ + MUTEX_EXIT(&ipf_rw); } +# define ATOMIC_INC32(x) { MUTEX_ENTER(&ipf_rw); (x)++; \ + MUTEX_EXIT(&ipf_rw); } +# define ATOMIC_DEC32(x) { MUTEX_ENTER(&ipf_rw); (x)--; \ + MUTEX_EXIT(&ipf_rw); } +# define ATOMIC_INCL(x) { MUTEX_ENTER(&ipf_rw); (x)++; \ + MUTEX_EXIT(&ipf_rw); } +# define ATOMIC_DECL(x) { MUTEX_ENTER(&ipf_rw); (x)--; \ + MUTEX_EXIT(&ipf_rw); } +# define ATOMIC_INC(x) { MUTEX_ENTER(&ipf_rw); (x)++; \ + MUTEX_EXIT(&ipf_rw); } +# define ATOMIC_DEC(x) { MUTEX_ENTER(&ipf_rw); (x)--; \ + MUTEX_EXIT(&ipf_rw); } +# endif +# define ip_cksum ip_csuma +# define memcpy(a,b,c) bcopy((caddr_t)b, (caddr_t)a, c) +# define USE_MUTEXES +# define MUTEX_INIT(x, y) initlock(&(x)->ipf_lk, 0, 0, (y)) +# define MUTEX_ENTER(x) spinlock(&(x)->ipf_lk) +# define MUTEX_EXIT(x) spinunlock(&(x)->ipf_lk); +# define MUTEX_DESTROY(x) +# define MUTEX_NUKE(x) bzero((char *)(x), sizeof(*(x))) +# define KMUTEX_T lock_t +# define kmutex_t lock_t /* for pfil.h */ +# define krwlock_t lock_t /* for pfil.h */ +/* + * The read-write lock implementation in HP-UX 11.0 is crippled - it can + * only be used by threads working in a user context! + * This has been fixed! Yipee! (Or at least it does in 11.00, not 11.11..) + */ +# if HPUXREV < 1111 +# define MUTEX_DOWNGRADE(x) lock_write_to_read(x) +# define KRWLOCK_T struct rw_lock +# define READ_ENTER(x) lock_read(&(x)->ipf_lk) +# define WRITE_ENTER(x) lock_write(&(x)->ipf_lk) +# if HPUXREV >= 1111 +# define RWLOCK_INIT(x, y) rwlock_init4(&(x)->ipf_lk, 0, RWLCK_CANSLEEP, 0, y) +# else +# define RWLOCK_INIT(x, y) lock_init3(&(x)->ipf_lk, 0, 1, 0, 0, y) +# endif +# define RWLOCK_EXIT(x) lock_done(&(x)->ipf_lk) +# else +# define KRWLOCK_T lock_t +# define KMUTEX_T lock_t +# define READ_ENTER(x) MUTEX_ENTER(x) +# define WRITE_ENTER(x) MUTEX_ENTER(x) +# define MUTEX_DOWNGRADE(x) +# define RWLOCK_INIT(x, y) initlock(&(x)->ipf_lk, 0, 0, y) +# define RWLOCK_EXIT(x) MUTEX_EXIT(x) +# endif +# define RW_DESTROY(x) +# define COPYIN(a,b,c) copyin((caddr_t)(a), (caddr_t)(b), (c)) +# define COPYOUT(a,b,c) copyout((caddr_t)(a), (caddr_t)(b), (c)) +# if HPUXREV >= 1111 +# define BCOPYIN(a,b,c) 0; bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYOUT(a,b,c) 0; bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# else +# define BCOPYIN(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYOUT(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# endif +# define SPL_NET(x) ; +# define SPL_IMP(x) ; +# undef SPL_X +# define SPL_X(x) ; +extern void *get_unit __P((char *, int)); +# define GETIFP(n, v) get_unit(n, v) +# define IFNAME(x, b) ((ill_t *)x)->ill_name +# define COPYIFNAME(x, b) \ + (void) strncpy(b, ((qif_t *)x)->qf_name, \ + LIFNAMSIZ) +# define UIOMOVE(a,b,c,d) uiomove((caddr_t)a,b,c,d) +# define SLEEP(id, n) { lock_t *_l = get_sleep_lock((caddr_t)id); \ + sleep(id, PZERO+1); \ + spinunlock(_l); \ + } +# define WAKEUP(id,x) { lock_t *_l = get_sleep_lock((caddr_t)id); \ + wakeup(id + x); \ + spinunlock(_l); \ + } +# define KMALLOC(a, b) MALLOC((a), b, sizeof(*(a)), M_IOSYS, M_NOWAIT) +# define KMALLOCS(a, b, c) MALLOC((a), b, (c), M_IOSYS, M_NOWAIT) +# define KFREE(x) kmem_free((char *)(x), sizeof(*(x))) +# define KFREES(x,s) kmem_free((char *)(x), (s)) +# define MSGDSIZE(x) msgdsize(x) +# define M_LEN(x) ((x)->b_wptr - (x)->b_rptr) +# define M_DUPLICATE(x) dupmsg((x)) +# define MTOD(m,t) ((t)((m)->b_rptr)) +# define MTYPE(m) ((m)->b_datap->db_type) +# define FREE_MB_T(m) freemsg(m) +# define m_next b_cont +# define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); } +typedef mblk_t mb_t; + +# define CACHE_HASH(x) (((qpktinfo_t *)(x)->fin_qpi)->qpi_num & 7) + +# include "qif.h" +# include "pfil.h" + +# else /* _KERNEL */ + +typedef unsigned char uchar_t; + +# ifndef _SYS_STREAM_INCLUDED +typedef char * mblk_t; +typedef void * queue_t; +typedef u_long ulong; +# endif +# include <netinet/ip_info.h> + +# endif /* _KERNEL */ + +# ifdef lint +# define ALIGN32(ptr) (ptr ? 0L : 0L) +# define ALIGN16(ptr) (ptr ? 0L : 0L) +# else +# define ALIGN32(ptr) (ptr) +# define ALIGN16(ptr) (ptr) +# endif + +typedef struct uio uio_t; +typedef int ioctlcmd_t; +typedef int minor_t; +typedef unsigned int u_32_t; +# define U_32_T 1 + +# define OS_RECOGNISED 1 + +#endif /* __hpux */ + +/* ----------------------------------------------------------------------- */ +/* I R I X */ +/* ----------------------------------------------------------------------- */ +#ifdef __sgi +# undef MENTAT +# if IRIX < 60500 +typedef struct uio uio_t; +# endif +typedef int ioctlcmd_t; +typedef u_int32_t u_32_t; +# define U_32_T 1 + +# ifdef INET6 +# define USE_INET6 +# endif + +# define hz HZ +# include <sys/ksynch.h> +# define IPF_LOCK_PL plhi +# include <sys/sema.h> +# undef kmutex_t +typedef struct { + lock_t *l; + int pl; +} kmutex_t; + +# ifdef MUTEX_INIT +# define KMUTEX_T mutex_t +# else +# define KMUTEX_T kmutex_t +# define KRWLOCK_T kmutex_t +# endif + +# ifdef _KERNEL +# define ATOMIC_INC(x) { MUTEX_ENTER(&ipf_rw); \ + (x)++; MUTEX_EXIT(&ipf_rw); } +# define ATOMIC_DEC(x) { MUTEX_ENTER(&ipf_rw); \ + (x)--; MUTEX_EXIT(&ipf_rw); } +# define USE_MUTEXES +# ifdef MUTEX_INIT +# include <sys/atomic_ops.h> +# define ATOMIC_INCL(x) atomicAddUlong(&(x), 1) +# define ATOMIC_INC64(x) atomicAddUint64(&(x), 1) +# define ATOMIC_INC32(x) atomicAddUint(&(x), 1) +# define ATOMIC_INC16 ATOMIC_INC +# define ATOMIC_DECL(x) atomicAddUlong(&(x), -1) +# define ATOMIC_DEC64(x) atomicAddUint64(&(x), -1) +# define ATOMIC_DEC32(x) atomicAddUint(&(x), -1) +# define ATOMIC_DEC16 ATOMIC_DEC +# undef MUTEX_INIT +# define MUTEX_INIT(x, y) mutex_init(&(x)->ipf_lk, \ + MUTEX_DEFAULT, y) +# undef MUTEX_ENTER +# define MUTEX_ENTER(x) mutex_lock(&(x)->ipf_lk, 0) +# undef MUTEX_EXIT +# define MUTEX_EXIT(x) mutex_unlock(&(x)->ipf_lk) +# undef MUTEX_DESTROY +# define MUTEX_DESTROY(x) mutex_destroy(&(x)->ipf_lk) +# define MUTEX_DOWNGRADE(x) mrdemote(&(x)->ipf_lk) +# define KRWLOCK_T mrlock_t +# define RWLOCK_INIT(x, y) mrinit(&(x)->ipf_lk, y) +# undef RW_DESTROY +# define RW_DESTROY(x) mrfree(&(x)->ipf_lk) +# define READ_ENTER(x) RW_RDLOCK(&(x)->ipf_lk) +# define WRITE_ENTER(x) RW_WRLOCK(&(x)->ipf_lk) +# define RWLOCK_EXIT(x) RW_UNLOCK(&(x)->ipf_lk) +# else +# define READ_ENTER(x) MUTEX_ENTER(&(x)->ipf_lk) +# define WRITE_ENTER(x) MUTEX_ENTER(&(x)->ipf_lk) +# define MUTEX_DOWNGRADE(x) ; +# define RWLOCK_EXIT(x) MUTEX_EXIT(&(x)->ipf_lk) +# define MUTEX_EXIT(x) UNLOCK((x)->ipf_lk.l, (x)->ipf_lk.pl); +# define MUTEX_INIT(x,y) (x)->ipf_lk.l = LOCK_ALLOC((uchar_t)-1, IPF_LOCK_PL, (lkinfo_t *)-1, KM_NOSLEEP) +# define MUTEX_DESTROY(x) LOCK_DEALLOC((x)->ipf_lk.l) +# define MUTEX_ENTER(x) (x)->ipf_lk.pl = LOCK((x)->ipf_lk.l, \ + IPF_LOCK_PL); +# endif +# define MUTEX_NUKE(x) bzero((x), sizeof(*(x))) +# define FREE_MB_T(m) m_freem(m) +# define MTOD(m,t) mtod(m,t) +# define COPYIN(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0) +# define COPYOUT(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0) +# define BCOPYIN(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0) +# define BCOPYOUT(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0) +# define UIOMOVE(a,b,c,d) uiomove((caddr_t)a,b,c,d) +# define SLEEP(id, n) sleep((id), PZERO+1) +# define WAKEUP(id,x) wakeup(id+x) +# define KFREE(x) kmem_free((char *)(x), sizeof(*(x))) +# define KFREES(x,s) kmem_free((char *)(x), (s)) +# define GETIFP(n,v) ifunit(n) +# include <sys/kmem.h> +# include <sys/ddi.h> +# define KMALLOC(a,b) (a) = (b)kmem_alloc(sizeof(*(a)), KM_NOSLEEP) +# define KMALLOCS(a,b,c) (a) = (b)kmem_alloc((c), KM_NOSLEEP) +# define GET_MINOR(x) getminor(x) +# define USE_SPL 1 +# define SPL_IMP(x) (x) = splimp() +# define SPL_NET(x) (x) = splnet() +# define SPL_X(x) (void) splx(x) +extern void m_copydata __P((struct mbuf *, int, int, caddr_t)); +extern void m_copyback __P((struct mbuf *, int, int, caddr_t)); +# define MSGDSIZE(x) mbufchainlen(x) +# define M_LEN(x) (x)->m_len +# define M_DUPLICATE(x) m_copy((x), 0, M_COPYALL) +# define GETKTIME(x) microtime((struct timeval *)x) +# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \ + ((struct ifnet *)fin->fin_ifp)->if_unit) & 7) +# define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); } +typedef struct mbuf mb_t; +# else +# undef RW_DESTROY +# undef MUTEX_INIT +# undef MUTEX_DESTROY +# endif /* _KERNEL */ + +# define OS_RECOGNISED 1 + +#endif /* __sgi */ + +/* ----------------------------------------------------------------------- */ +/* T R U 6 4 */ +/* ----------------------------------------------------------------------- */ +#ifdef __osf__ +# undef MENTAT + +# include <kern/lock.h> +# include <sys/sysmacros.h> + +# ifdef _KERNEL +# define KMUTEX_T simple_lock_data_t +# define KRWLOCK_T lock_data_t +# include <net/net_globals.h> +# define USE_MUTEXES +# define READ_ENTER(x) lock_read(&(x)->ipf_lk) +# define WRITE_ENTER(x) lock_write(&(x)->ipf_lk) +# define MUTEX_DOWNGRADE(x) lock_write_to_read(&(x)->ipf_lk) +# define RWLOCK_INIT(x, y) lock_init(&(x)->ipf_lk, TRUE) +# define RWLOCK_EXIT(x) lock_done(&(x)->ipf_lk) +# define RW_DESTROY(x) lock_terminate(&(x)->ipf_lk) +# define MUTEX_ENTER(x) simple_lock(&(x)->ipf_lk) +# define MUTEX_INIT(x, y) simple_lock_init(&(x)->ipf_lk) +# define MUTEX_DESTROY(x) simple_lock_terminate(&(x)->ipf_lk) +# define MUTEX_EXIT(x) simple_unlock(&(x)->ipf_lk) +# define MUTEX_NUKE(x) bzero(x, sizeof(*(x))) +# define ATOMIC_INC64(x) atomic_incq((uint64_t*)&(x)) +# define ATOMIC_DEC64(x) atomic_decq((uint64_t*)&(x)) +# define ATOMIC_INC32(x) atomic_incl((uint32_t*)&(x)) +# define ATOMIC_DEC32(x) atomic_decl((uint32_t*)&(x)) +# define ATOMIC_INC16(x) { simple_lock(&ipf_rw); (x)++; \ + simple_unlock(&ipf_rw); } +# define ATOMIC_DEC16(x) { simple_lock(&ipf_rw); (x)--; \ + simple_unlock(&ipf_rw); } +# define ATOMIC_INCL(x) atomic_incl((uint32_t*)&(x)) +# define ATOMIC_DECL(x) atomic_decl((uint32_t*)&(x)) +# define ATOMIC_INC(x) { simple_lock(&ipf_rw); (x)++; \ + simple_unlock(&ipf_rw); } +# define ATOMIC_DEC(x) { simple_lock(&ipf_rw); (x)--; \ + simple_unlock(&ipf_rw); } +# define SPL_NET(x) ; +# define SPL_IMP(x) ; +# undef SPL_X +# define SPL_X(x) ; +# define UIOMOVE(a,b,c,d) uiomove((caddr_t)a, b, d) +# define FREE_MB_T(m) m_freem(m) +# define MTOD(m,t) mtod(m,t) +# define GETIFP(n, v) ifunit(n) +# define GET_MINOR getminor +# define WAKEUP(id,x) wakeup(id + x) +# define COPYIN(a,b,c) copyin((caddr_t)(a), (caddr_t)(b), (c)) +# define COPYOUT(a,b,c) copyout((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYIN(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYOUT(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# define KMALLOC(a, b) MALLOC((a), b, sizeof(*(a)), M_PFILT, M_NOWAIT) +# define KMALLOCS(a, b, c) MALLOC((a), b, (c), M_PFILT, \ + ((c) > 4096) ? M_WAITOK : M_NOWAIT) +# define KFREE(x) FREE((x), M_PFILT) +# define KFREES(x,s) FREE((x), M_PFILT) +# define MSGDSIZE(x) mbufchainlen(x) +# define M_LEN(x) (x)->m_len +# define M_DUPLICATE(x) m_copy((x), 0, M_COPYALL) +# define GETKTIME(x) microtime((struct timeval *)x) +# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \ + ((struct ifnet *)fin->fin_ifp)->if_unit) & 7) +# define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); } +typedef struct mbuf mb_t; +# endif /* _KERNEL */ + +# if (defined(_KERNEL) || defined(_NO_BITFIELDS) || (__STDC__ == 1)) +# define IP_V(x) ((x)->ip_vhl >> 4) +# define IP_HL(x) ((x)->ip_vhl & 0xf) +# define IP_V_A(x,y) (x)->ip_vhl |= (((y) << 4) & 0xf0) +# define IP_HL_A(x,y) (x)->ip_vhl |= ((y) & 0xf) +# define TCP_X2(x) ((x)->th_xoff & 0xf) +# define TCP_X2_A(x,y) (x)->th_xoff |= ((y) & 0xf) +# define TCP_OFF(x) ((x)->th_xoff >> 4) +# define TCP_OFF_A(x,y) (x)->th_xoff |= (((y) << 4) & 0xf0) +# endif + +/* + * These are from's Solaris' #defines for little endian. + */ +#define IP6F_MORE_FRAG 0x0100 +#define IP6F_RESERVED_MASK 0x0600 +#define IP6F_OFF_MASK 0xf8ff + +struct ip6_ext { + u_char ip6e_nxt; + u_char ip6e_len; +}; + +typedef int ioctlcmd_t; +/* + * Really, any arch where sizeof(long) != sizeof(int). + */ +typedef unsigned int u_32_t; +# define U_32_T 1 + +# define OS_RECOGNISED 1 +#endif /* __osf__ */ + +/* ----------------------------------------------------------------------- */ +/* N E T B S D */ +/* ----------------------------------------------------------------------- */ +#ifdef __NetBSD__ +# if defined(_KERNEL) && !defined(IPFILTER_LKM) +# include "bpfilter.h" +# if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 104110000) +# include "opt_inet.h" +# endif +# ifdef INET6 +# define USE_INET6 +# endif +# if (__NetBSD_Version__ >= 105000000) +# define HAVE_M_PULLDOWN 1 +# endif +# endif + +# ifdef _KERNEL +# define MSGDSIZE(x) mbufchainlen(x) +# define M_LEN(x) (x)->m_len +# define M_DUPLICATE(x) m_copy((x), 0, M_COPYALL) +# define GETKTIME(x) microtime((struct timeval *)x) +# define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); } +# define COPYIN(a,b,c) copyin((caddr_t)(a), (caddr_t)(b), (c)) +# define COPYOUT(a,b,c) copyout((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYIN(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYOUT(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) +typedef struct mbuf mb_t; +# endif /* _KERNEL */ +# if (NetBSD <= 1991011) && (NetBSD >= 199606) +# define IFNAME(x) ((struct ifnet *)x)->if_xname +# define COPYIFNAME(x, b) \ + (void) strncpy(b, \ + ((struct ifnet *)x)->if_xname, \ + LIFNAMSIZ) +# define CACHE_HASH(x) ((((struct ifnet *)fin->fin_ifp)->if_index)&7) +# else +# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \ + ((struct ifnet *)fin->fin_ifp)->if_unit) & 7) +# endif + +typedef struct uio uio_t; +typedef u_long ioctlcmd_t; +typedef int minor_t; +typedef u_int32_t u_32_t; +# define U_32_T 1 + +# define OS_RECOGNISED 1 +#endif /* __NetBSD__ */ + + +/* ----------------------------------------------------------------------- */ +/* F R E E B S D */ +/* ----------------------------------------------------------------------- */ +#ifdef __FreeBSD__ +# if defined(_KERNEL) +# if (__FreeBSD_version >= 500000) +# include "opt_bpf.h" +# else +# include "bpf.h" +# endif +# if defined(__FreeBSD_version) && (__FreeBSD_version >= 400000) +# include "opt_inet6.h" +# endif +# if defined(INET6) && !defined(USE_INET6) +# define USE_INET6 +# endif +# endif + +# if defined(_KERNEL) +# if (__FreeBSD_version >= 400000) +/* + * When #define'd, the 5.2.1 kernel panics when used with the ftp proxy. + * There may be other, safe, kernels but this is not extensively tested yet. + */ +# define HAVE_M_PULLDOWN +# endif +# if !defined(IPFILTER_LKM) && (__FreeBSD_version >= 300000) +# include "opt_ipfilter.h" +# endif +# define COPYIN(a,b,c) copyin((caddr_t)(a), (caddr_t)(b), (c)) +# define COPYOUT(a,b,c) copyout((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYIN(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYOUT(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) + +# if (__FreeBSD_version >= 500043) +# define NETBSD_PF +# endif +# endif /* _KERNEL */ + +# if (__FreeBSD_version >= 500043) +# include <sys/mutex.h> +# include <sys/sx.h> +/* + * Whilst the sx(9) locks on FreeBSD have the right semantics and interface + * for what we want to use them for, despite testing showing they work - + * with a WITNESS kernel, it generates LOR messages. + */ +# define KMUTEX_T struct mtx +# if 1 +# define KRWLOCK_T struct mtx +# else +# define KRWLOCK_T struct sx +# endif +# endif + +# if (__FreeBSD_version >= 501113) +# include <net/if_var.h> +# define IFNAME(x) ((struct ifnet *)x)->if_xname +# define COPYIFNAME(x, b) \ + (void) strncpy(b, \ + ((struct ifnet *)x)->if_xname, \ + LIFNAMSIZ) +# endif +# if (__FreeBSD_version >= 500043) +# define CACHE_HASH(x) ((((struct ifnet *)fin->fin_ifp)->if_index) & 7) +# else +# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \ + ((struct ifnet *)fin->fin_ifp)->if_unit) & 7) +# endif + +# ifdef _KERNEL +# define GETKTIME(x) microtime((struct timeval *)x) + +# if (__FreeBSD_version >= 500002) +# include <netinet/in_systm.h> +# include <netinet/ip.h> +# include <machine/in_cksum.h> +# endif + +# if (__FreeBSD_version >= 500043) +# define USE_MUTEXES +# define MUTEX_ENTER(x) mtx_lock(&(x)->ipf_lk) +# define MUTEX_EXIT(x) mtx_unlock(&(x)->ipf_lk) +# define MUTEX_INIT(x,y) mtx_init(&(x)->ipf_lk, (y), NULL,\ + MTX_DEF) +# define MUTEX_DESTROY(x) mtx_destroy(&(x)->ipf_lk) +# define MUTEX_NUKE(x) bzero((x), sizeof(*(x))) +/* + * Whilst the sx(9) locks on FreeBSD have the right semantics and interface + * for what we want to use them for, despite testing showing they work - + * with a WITNESS kernel, it generates LOR messages. + */ +# if 1 +# define READ_ENTER(x) mtx_lock(&(x)->ipf_lk) +# define WRITE_ENTER(x) mtx_lock(&(x)->ipf_lk) +# define RWLOCK_EXIT(x) mtx_unlock(&(x)->ipf_lk) +# define MUTEX_DOWNGRADE(x) ; +# define RWLOCK_INIT(x,y) mtx_init(&(x)->ipf_lk, (y), NULL,\ + MTX_DEF) +# define RW_DESTROY(x) mtx_destroy(&(x)->ipf_lk) +# else +# define READ_ENTER(x) sx_slock(&(x)->ipf_lk) +# define WRITE_ENTER(x) sx_xlock(&(x)->ipf_lk) +# define MUTEX_DOWNGRADE(x) sx_downgrade(&(x)->ipf_lk) +# define RWLOCK_INIT(x, y) sx_init(&(x)->ipf_lk, (y)) +# define RW_DESTROY(x) sx_destroy(&(x)->ipf_lk) +# ifdef sx_unlock +# define RWLOCK_EXIT(x) sx_unlock(x) +# else +# define RWLOCK_EXIT(x) do { \ + if ((x)->ipf_lk.sx_cnt < 0) \ + sx_xunlock(&(x)->ipf_lk); \ + else \ + sx_sunlock(&(x)->ipf_lk); \ + } while (0) +# endif +# endif +# include <machine/atomic.h> +# define ATOMIC_INC(x) { mtx_lock(&ipf_rw.ipf_lk); (x)++; \ + mtx_unlock(&ipf_rw.ipf_lk); } +# define ATOMIC_DEC(x) { mtx_lock(&ipf_rw.ipf_lk); (x)--; \ + mtx_unlock(&ipf_rw.ipf_lk); } +# define ATOMIC_INCL(x) atomic_add_long(&(x), 1) +# define ATOMIC_INC64(x) ATOMIC_INC(x) +# define ATOMIC_INC32(x) atomic_add_32(&(x), 1) +# define ATOMIC_INC16(x) atomic_add_16(&(x), 1) +# define ATOMIC_DECL(x) atomic_add_long(&(x), -1) +# define ATOMIC_DEC64(x) ATOMIC_DEC(x) +# define ATOMIC_DEC32(x) atomic_add_32(&(x), -1) +# define ATOMIC_DEC16(x) atomic_add_16(&(x), -1) +# define SPL_X(x) ; +# define SPL_NET(x) ; +# define SPL_IMP(x) ; +extern int in_cksum __P((struct mbuf *, int)); +# endif /* __FreeBSD_version >= 500043 */ +# define MSGDSIZE(x) mbufchainlen(x) +# define M_LEN(x) (x)->m_len +# define M_DUPLICATE(x) m_copy((x), 0, M_COPYALL) +# define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); } +typedef struct mbuf mb_t; +# endif /* _KERNEL */ + +# if __FreeBSD__ < 3 +# include <machine/spl.h> +# else +# if __FreeBSD__ == 3 +# if defined(IPFILTER_LKM) && !defined(ACTUALLY_LKM_NOT_KERNEL) +# define ACTUALLY_LKM_NOT_KERNEL +# endif +# endif +# endif + +# if (__FreeBSD_version >= 300000) +typedef u_long ioctlcmd_t; +# else +typedef int ioctlcmd_t; +# endif +typedef struct uio uio_t; +typedef int minor_t; +typedef u_int32_t u_32_t; +# define U_32_T 1 + +# define OS_RECOGNISED 1 +#endif /* __FreeBSD__ */ + + +/* ----------------------------------------------------------------------- */ +/* O P E N B S D */ +/* ----------------------------------------------------------------------- */ +#ifdef __OpenBSD__ +# ifdef INET6 +# define USE_INET6 +# endif + +# ifdef _KERNEL +# if !defined(IPFILTER_LKM) +# include "bpfilter.h" +# endif +# if (OpenBSD >= 200311) +# define SNPRINTF snprintf +# if defined(USE_INET6) +# include "netinet6/in6_var.h" +# include "netinet6/nd6.h" +# endif +# endif +# if (OpenBSD >= 200012) +# define HAVE_M_PULLDOWN 1 +# endif +# define COPYIN(a,b,c) copyin((caddr_t)(a), (caddr_t)(b), (c)) +# define COPYOUT(a,b,c) copyout((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYIN(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYOUT(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# define GETKTIME(x) microtime((struct timeval *)x) +# define MSGDSIZE(x) mbufchainlen(x) +# define M_LEN(x) (x)->m_len +# define M_DUPLICATE(x) m_copy((x), 0, M_COPYALL) +# define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); } +typedef struct mbuf mb_t; +# endif /* _KERNEL */ +# if (OpenBSD >= 199603) +# define IFNAME(x, b) ((struct ifnet *)x)->if_xname +# define COPYIFNAME(x, b) \ + (void) strncpy(b, \ + ((struct ifnet *)x)->if_xname, \ + LIFNAMSIZ) +# define CACHE_HASH(x) ((((struct ifnet *)fin->fin_ifp)->if_index)&7) +# else +# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \ + ((struct ifnet *)fin->fin_ifp)->if_unit) & 7) +# endif + +typedef struct uio uio_t; +typedef u_long ioctlcmd_t; +typedef int minor_t; +typedef u_int32_t u_32_t; +# define U_32_T 1 + +# define OS_RECOGNISED 1 +#endif /* __OpenBSD__ */ + + +/* ----------------------------------------------------------------------- */ +/* B S D O S */ +/* ----------------------------------------------------------------------- */ +#ifdef _BSDI_VERSION +# ifdef INET6 +# define USE_INET6 +# endif + +# ifdef _KERNEL +# define GETKTIME(x) microtime((struct timeval *)x) +# define MSGDSIZE(x) mbufchainlen(x) +# define M_LEN(x) (x)->m_len +# define M_DUPLICATE(x) m_copy((x), 0, M_COPYALL) +# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \ + ((struct ifnet *)fin->fin_ifp)->if_unit) & 7) +typedef struct mbuf mb_t; +# endif /* _KERNEL */ + +# if (_BSDI_VERSION >= 199701) +typedef u_long ioctlcmd_t; +# else +typedef int ioctlcmd_t; +# endif +typedef u_int32_t u_32_t; +# define U_32_T 1 + +#endif /* _BSDI_VERSION */ + + +/* ----------------------------------------------------------------------- */ +/* S U N O S 4 */ +/* ----------------------------------------------------------------------- */ +#if defined(sun) && !defined(OS_RECOGNISED) /* SunOS4 */ +# ifdef _KERNEL +# include <sys/kmem_alloc.h> +# define GETKTIME(x) uniqtime((struct timeval *)x) +# define MSGDSIZE(x) mbufchainlen(x) +# define M_LEN(x) (x)->m_len +# define M_DUPLICATE(x) m_copy((x), 0, M_COPYALL) +# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \ + ((struct ifnet *)fin->fin_ifp)->if_unit) & 7) +# define GETIFP(n, v) ifunit(n, IFNAMSIZ) +# define KFREE(x) kmem_free((char *)(x), sizeof(*(x))) +# define KFREES(x,s) kmem_free((char *)(x), (s)) +# define SLEEP(id, n) sleep((id), PZERO+1) +# define WAKEUP(id,x) wakeup(id + x) +# define UIOMOVE(a,b,c,d) uiomove((caddr_t)a,b,c,d) +# define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); } + +extern void m_copydata __P((struct mbuf *, int, int, caddr_t)); +extern void m_copyback __P((struct mbuf *, int, int, caddr_t)); + +typedef struct mbuf mb_t; +# endif + +typedef struct uio uio_t; +typedef int ioctlcmd_t; +typedef int minor_t; +typedef unsigned int u_32_t; +# define U_32_T 1 + +# define OS_RECOGNISED 1 + +#endif /* SunOS 4 */ + +/* ----------------------------------------------------------------------- */ +/* L I N U X */ +/* ----------------------------------------------------------------------- */ +#if defined(linux) && !defined(OS_RECOGNISED) +#include <linux/config.h> +#include <linux/version.h> +# if LINUX >= 20600 +# define HDR_T_PRIVATE 1 +# endif +# undef USE_INET6 +# ifdef USE_INET6 +struct ip6_ext { + u_char ip6e_nxt; + u_char ip6e_len; +}; +# endif + +# ifdef _KERNEL +# define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); } +# define BCOPYIN(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYOUT(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# define COPYIN(a,b,c) copy_from_user((caddr_t)(b), (caddr_t)(a), (c)) +# define COPYOUT(a,b,c) copy_to_user((caddr_t)(b), (caddr_t)(a), (c)) +# define FREE_MB_T(m) kfree_skb(m) +# define GETKTIME(x) do_gettimeofday((struct timeval *)x) +# define SLEEP(x,s) 0, interruptible_sleep_on(x##_linux) +# define WAKEUP(x,y) wake_up(x##_linux + y) +# define UIOMOVE(a,b,c,d) uiomove(a,b,c,d) +# define USE_MUTEXES +# define KRWLOCK_T rwlock_t +# define KMUTEX_T spinlock_t +# define MUTEX_INIT(x,y) spin_lock_init(&(x)->ipf_lk) +# define MUTEX_ENTER(x) spin_lock(&(x)->ipf_lk) +# define MUTEX_EXIT(x) spin_unlock(&(x)->ipf_lk) +# define MUTEX_DESTROY(x) do { } while (0) +# define MUTEX_NUKE(x) bzero(&(x)->ipf_lk, sizeof((x)->ipf_lk)) +# define READ_ENTER(x) ipf_read_enter(x) +# define WRITE_ENTER(x) ipf_write_enter(x) +# define RWLOCK_INIT(x,y) rwlock_init(&(x)->ipf_lk) +# define RW_DESTROY(x) do { } while (0) +# define RWLOCK_EXIT(x) ipf_rw_exit(x) +# define MUTEX_DOWNGRADE(x) ipf_rw_downgrade(x) +# define ATOMIC_INCL(x) MUTEX_ENTER(&ipf_rw); (x)++; \ + MUTEX_EXIT(&ipf_rw) +# define ATOMIC_DECL(x) MUTEX_ENTER(&ipf_rw); (x)--; \ + MUTEX_EXIT(&ipf_rw) +# define ATOMIC_INC64(x) MUTEX_ENTER(&ipf_rw); (x)++; \ + MUTEX_EXIT(&ipf_rw) +# define ATOMIC_INC32(x) MUTEX_ENTER(&ipf_rw); (x)++; \ + MUTEX_EXIT(&ipf_rw) +# define ATOMIC_INC16(x) MUTEX_ENTER(&ipf_rw); (x)++; \ + MUTEX_EXIT(&ipf_rw) +# define ATOMIC_DEC64(x) MUTEX_ENTER(&ipf_rw); (x)--; \ + MUTEX_EXIT(&ipf_rw) +# define ATOMIC_DEC32(x) MUTEX_ENTER(&ipf_rw); (x)--; \ + MUTEX_EXIT(&ipf_rw) +# define ATOMIC_DEC16(x) MUTEX_ENTER(&ipf_rw); (x)--; \ + MUTEX_EXIT(&ipf_rw) +# define SPL_IMP(x) do { } while (0) +# define SPL_NET(x) do { } while (0) +# define SPL_X(x) do { } while (0) +# define IFNAME(x) ((struct net_device*)x)->name +# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \ + ((struct net_device *)fin->fin_ifp)->ifindex) & 7) +typedef struct sk_buff mb_t; +extern void m_copydata __P((mb_t *, int, int, caddr_t)); +extern void m_copyback __P((mb_t *, int, int, caddr_t)); +extern void m_adj __P((mb_t *, int)); +extern mb_t *m_pullup __P((mb_t *, int)); +# define mbuf sk_buff + +# define mtod(m, t) ((t)(m)->data) +# define m_len len +# define m_next next +# define M_DUPLICATE(m) skb_clone((m), in_interrupt() ? GFP_ATOMIC : \ + GFP_KERNEL) +# define MSGDSIZE(m) (m)->len +# define M_LEN(m) (m)->len + +# define splnet(x) ; +# define printf printk +# define bcopy(s,d,z) memmove(d, s, z) +# define bzero(s,z) memset(s, 0, z) +# define bcmp(a,b,z) memcmp(a, b, z) + +# define ifnet net_device +# define if_xname name +# define if_unit ifindex + +# define KMALLOC(x,t) (x) = (t)kmalloc(sizeof(*(x)), \ + in_interrupt() ? GFP_ATOMIC : GFP_KERNEL) +# define KFREE(x) kfree(x) +# define KMALLOCS(x,t,s) (x) = (t)kmalloc((s), \ + in_interrupt() ? GFP_ATOMIC : GFP_KERNEL) +# define KFREES(x,s) kfree(x) + +# define GETIFP(n,v) dev_get_by_name(n) + +# else +# include <net/ethernet.h> + +struct mbuf { +}; + +# ifndef _NET_ROUTE_H +struct rtentry { +}; +# endif + +struct ifnet { + char if_xname[IFNAMSIZ]; + int if_unit; + int (* if_output) __P((struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *)); + struct ifaddr *if_addrlist; +}; +# define IFNAME(x) ((struct ifnet *)x)->if_xname + +# endif /* _KERNEL */ + +# define COPYIFNAME(x, b) \ + (void) strncpy(b, \ + ((struct ifnet *)x)->if_xname, \ + LIFNAMSIZ) + +# include <linux/fs.h> +# define FWRITE FMODE_WRITE +# define FREAD FMODE_READ + +# define __USE_MISC 1 +# define __FAVOR_BSD 1 + +typedef struct uio { + struct iovec *uio_iov; + void *uio_file; + char *uio_buf; + int uio_iovcnt; + int uio_offset; + size_t uio_resid; + int uio_rw; +} uio_t; + +extern int uiomove __P((caddr_t, size_t, int, struct uio *)); + +# define UIO_READ 1 +# define UIO_WRITE 2 + +typedef u_long ioctlcmd_t; +typedef int minor_t; +typedef u_int32_t u_32_t; +# define U_32_T 1 + +# define OS_RECOGNISED 1 + +#endif + + +/* ----------------------------------------------------------------------- */ +/* A I X */ +/* ----------------------------------------------------------------------- */ +#if defined(_AIX51) +# undef MENTAT + +# include <sys/lock.h> +# include <sys/sysmacros.h> + +# ifdef _KERNEL +# define rw_read_locked(x) 0 +# include <net/net_globals.h> +# include <net/net_malloc.h> +# define KMUTEX_T simple_lock_t +# define KRWLOCK_T complex_lock_t +# define USE_MUTEXES 1 +# define USE_SPL 1 +# define READ_ENTER(x) lock_read((x)->ipf_lk) +# define WRITE_ENTER(x) lock_write((x)->ipf_lk) +# define MUTEX_DOWNGRADE(x) lock_write_to_read((x)->ipf_lk) +# define RWLOCK_INIT(x, y) lock_alloc(&(x)->ipf_lk, \ + LOCK_ALLOC_PIN, \ + (u_short)y, 0); \ + lock_init((x)->ipf_lk, TRUE) +# define RWLOCK_EXIT(x) lock_done((x)->ipf_lk) +# define RW_DESTROY(x) lock_free(&(x)->ipf_lk) +# define MUTEX_ENTER(x) simple_lock((x)->ipf_lk) +# define MUTEX_INIT(x, y) lock_alloc(&(x)->ipf_lk, \ + LOCK_ALLOC_PIN, \ + (u_short)y, 0); \ + simple_lock_init((x)->ipf_lk) +# define MUTEX_DESTROY(x) lock_free(&(x)->ipf_lk) +# define MUTEX_EXIT(x) simple_unlock((x)->ipf_lk) +# define MUTEX_NUKE(x) bzero(&(x)->ipf_lk, sizeof((x)->ipf_lk)) +# define ATOMIC_INC64(x) { MUTEX_ENTER(&ipf_rw); (x)++; \ + MUTEX_EXIT(&ipf_rw); } +# define ATOMIC_DEC64(x) { MUTEX_ENTER(&ipf_rw); (x)--; \ + MUTEX_EXIT(&ipf_rw); } +# define ATOMIC_INC32(x) { MUTEX_ENTER(&ipf_rw); (x)++; \ + MUTEX_EXIT(&ipf_rw); } +# define ATOMIC_DEC32(x) { MUTEX_ENTER(&ipf_rw); (x)--; \ + MUTEX_EXIT(&ipf_rw); } +# define ATOMIC_INCL(x) { MUTEX_ENTER(&ipf_rw); (x)++; \ + MUTEX_EXIT(&ipf_rw); } +# define ATOMIC_DECL(x) { MUTEX_ENTER(&ipf_rw); (x)--; \ + MUTEX_EXIT(&ipf_rw); } +# define ATOMIC_INC(x) { MUTEX_ENTER(&ipf_rw); (x)++; \ + MUTEX_EXIT(&ipf_rw); } +# define ATOMIC_DEC(x) { MUTEX_ENTER(&ipf_rw); (x)--; \ + MUTEX_EXIT(&ipf_rw); } +# define SPL_NET(x) x = splnet() +# define SPL_IMP(x) x = splimp() +# undef SPL_X +# define SPL_X(x) splx(x) +# define UIOMOVE(a,b,c,d) uiomove((caddr_t)a,b,c,d) +extern void* getifp __P((char *, int)); +# define GETIFP(n, v) getifp(n, v) +# define GET_MINOR minor +# define SLEEP(id, n) sleepx((id), PZERO+1, 0) +# define WAKEUP(id,x) wakeup(id) +# define COPYIN(a,b,c) copyin((caddr_t)(a), (caddr_t)(b), (c)) +# define COPYOUT(a,b,c) copyout((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYIN(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYOUT(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# define KMALLOC(a, b) MALLOC((a), b, sizeof(*(a)), M_TEMP, M_NOWAIT) +# define KMALLOCS(a, b, c) MALLOC((a), b, (c), M_TEMP, \ + ((c) > 4096) ? M_WAITOK : M_NOWAIT) +# define KFREE(x) FREE((x), M_TEMP) +# define KFREES(x,s) FREE((x), M_TEMP) +# define MSGDSIZE(x) mbufchainlen(x) +# define M_LEN(x) (x)->m_len +# define M_DUPLICATE(x) m_copy((x), 0, M_COPYALL) +# define GETKTIME(x) +# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \ + ((struct ifnet *)fin->fin_ifp)->if_unit) & 7) +# define IPF_PANIC(x,y) +typedef struct mbuf mb_t; +# endif /* _KERNEL */ + +/* + * These are from's Solaris' #defines for little endian. + */ +#if !defined(IP6F_MORE_FRAG) +# define IP6F_MORE_FRAG 0x0100 +#endif +#if !defined(IP6F_RESERVED_MASK) +# define IP6F_RESERVED_MASK 0x0600 +#endif +#if !defined(IP6F_OFF_MASK) +# define IP6F_OFF_MASK 0xf8ff +#endif + +struct ip6_ext { + u_char ip6e_nxt; + u_char ip6e_len; +}; + +typedef int ioctlcmd_t; +typedef int minor_t; +/* + * Really, any arch where sizeof(long) != sizeof(int). + */ +typedef unsigned int u_32_t; +# define U_32_T 1 + +# define OS_RECOGNISED 1 +#endif /* _AIX51 */ + + +#ifndef OS_RECOGNISED +#error ip_compat.h does not recognise this platform/OS. +#endif + + +/* ----------------------------------------------------------------------- */ +/* G E N E R I C */ +/* ----------------------------------------------------------------------- */ +#ifndef OS_RECOGNISED +#endif + +/* + * For BSD kernels, if bpf is in the kernel, enable ipfilter to use bpf in + * filter rules. + */ +#if !defined(IPFILTER_BPF) && ((NBPF > 0) || (NBPFILTER > 0) || (DEV_BPF > 0)) +# define IPFILTER_BPF +#endif + +/* + * Userland locking primitives + */ +typedef struct { + char *eMm_owner; + char *eMm_heldin; + u_int eMm_magic; + int eMm_held; + int eMm_heldat; +#ifdef __hpux + char eMm_fill[8]; +#endif +} eMmutex_t; + +typedef struct { + char *eMrw_owner; + char *eMrw_heldin; + u_int eMrw_magic; + short eMrw_read; + short eMrw_write; + int eMrw_heldat; +#ifdef __hpux + char eMm_fill[24]; +#endif +} eMrwlock_t; + +typedef union { +#ifdef KMUTEX_T + struct { + KMUTEX_T ipf_slk; + char *ipf_lname; + } ipf_lkun_s; +#endif + eMmutex_t ipf_emu; +} ipfmutex_t; + +typedef union { +#ifdef KRWLOCK_T + struct { + KRWLOCK_T ipf_slk; + char *ipf_lname; + int ipf_sr; + int ipf_sw; + u_int ipf_magic; + } ipf_lkun_s; +#endif + eMrwlock_t ipf_emu; +} ipfrwlock_t; + +#define ipf_lk ipf_lkun_s.ipf_slk +#define ipf_lname ipf_lkun_s.ipf_lname +#define ipf_isr ipf_lkun_s.ipf_sr +#define ipf_isw ipf_lkun_s.ipf_sw +#define ipf_magic ipf_lkun_s.ipf_magic + +#if !defined(__GNUC__) || \ + (defined(__FreeBSD_version) && (__FreeBSD_version >= 503000)) +# ifndef INLINE +# define INLINE +# endif +#else +# define INLINE __inline__ +#endif + +#if defined(linux) && defined(_KERNEL) +extern INLINE void ipf_read_enter __P((ipfrwlock_t *)); +extern INLINE void ipf_write_enter __P((ipfrwlock_t *)); +extern INLINE void ipf_rw_exit __P((ipfrwlock_t *)); +extern INLINE void ipf_rw_downgrade __P((ipfrwlock_t *)); +#endif + +/* + * In a non-kernel environment, there are a lot of macros that need to be + * filled in to be null-ops or to point to some compatibility function, + * somewhere in userland. + */ +#ifndef _KERNEL +typedef struct mb_s { + struct mb_s *mb_next; + int mb_len; + u_long mb_buf[2048]; +} mb_t; +# undef m_next +# define m_next mb_next +# define MSGDSIZE(x) (x)->mb_len /* XXX - from ipt.c */ +# define M_LEN(x) (x)->mb_len +# define M_DUPLICATE(x) (x) +# define GETKTIME(x) gettimeofday((struct timeval *)(x), NULL) +# undef MTOD +# define MTOD(m, t) ((t)(m)->mb_buf) +# define FREE_MB_T(x) +# define SLEEP(x,y) 1; +# define WAKEUP(x,y) ; +# define IPF_PANIC(x,y) ; +# define PANIC(x,y) ; +# define SPL_NET(x) ; +# define SPL_IMP(x) ; +# define SPL_X(x) ; +# define KMALLOC(a,b) (a) = (b)malloc(sizeof(*a)) +# define KMALLOCS(a,b,c) (a) = (b)malloc(c) +# define KFREE(x) free(x) +# define KFREES(x,s) free(x) +# define GETIFP(x, v) get_unit(x,v) +# define COPYIN(a,b,c) (bcopy((a), (b), (c)), 0) +# define COPYOUT(a,b,c) (bcopy((a), (b), (c)), 0) +# define BCOPYIN(a,b,c) (bcopy((a), (b), (c)), 0) +# define BCOPYOUT(a,b,c) (bcopy((a), (b), (c)), 0) +# define COPYDATA(m, o, l, b) bcopy(MTOD((mb_t *)m, char *) + (o), \ + (b), (l)) +# define COPYBACK(m, o, l, b) bcopy((b), \ + MTOD((mb_t *)m, char *) + (o), \ + (l)) +# define UIOMOVE(a,b,c,d) ipfuiomove(a,b,c,d) +extern void m_copydata __P((mb_t *, int, int, caddr_t)); +extern int ipfuiomove __P((caddr_t, int, int, struct uio *)); +# ifndef CACHE_HASH +# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \ + ((struct ifnet *)fin->fin_ifp)->if_unit) & 7) +# endif + +# define MUTEX_DESTROY(x) eMmutex_destroy(&(x)->ipf_emu) +# define MUTEX_ENTER(x) eMmutex_enter(&(x)->ipf_emu, \ + __FILE__, __LINE__) +# define MUTEX_EXIT(x) eMmutex_exit(&(x)->ipf_emu) +# define MUTEX_INIT(x,y) eMmutex_init(&(x)->ipf_emu, y) +# define MUTEX_NUKE(x) bzero((x), sizeof(*(x))) + +# define MUTEX_DOWNGRADE(x) eMrwlock_downgrade(&(x)->ipf_emu, \ + __FILE__, __LINE__) +# define READ_ENTER(x) eMrwlock_read_enter(&(x)->ipf_emu, \ + __FILE__, __LINE__) +# define RWLOCK_INIT(x, y) eMrwlock_init(&(x)->ipf_emu, y) +# define RWLOCK_EXIT(x) eMrwlock_exit(&(x)->ipf_emu) +# define RW_DESTROY(x) eMrwlock_destroy(&(x)->ipf_emu) +# define WRITE_ENTER(x) eMrwlock_write_enter(&(x)->ipf_emu, \ + __FILE__, \ + __LINE__) + +# define USE_MUTEXES 1 + +extern void eMmutex_destroy __P((eMmutex_t *)); +extern void eMmutex_enter __P((eMmutex_t *, char *, int)); +extern void eMmutex_exit __P((eMmutex_t *)); +extern void eMmutex_init __P((eMmutex_t *, char *)); +extern void eMrwlock_destroy __P((eMrwlock_t *)); +extern void eMrwlock_exit __P((eMrwlock_t *)); +extern void eMrwlock_init __P((eMrwlock_t *, char *)); +extern void eMrwlock_read_enter __P((eMrwlock_t *, char *, int)); +extern void eMrwlock_write_enter __P((eMrwlock_t *, char *, int)); +extern void eMrwlock_downgrade __P((eMrwlock_t *, char *, int)); + +#endif + +#define MAX_IPV4HDR ((0xf << 2) + sizeof(struct icmp) + sizeof(ip_t) + 8) + +#ifndef IP_OFFMASK +# define IP_OFFMASK 0x1fff +#endif + + +/* + * On BSD's use quad_t as a guarantee for getting at least a 64bit sized + * object. + */ +#if BSD > 199306 +# define USE_QUAD_T +# define U_QUAD_T u_quad_t +# define QUAD_T quad_t +#else /* BSD > 199306 */ +# define U_QUAD_T u_long +# define QUAD_T long +#endif /* BSD > 199306 */ + + +#ifdef USE_INET6 +# if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) || \ + defined(__osf__) || defined(linux) +# include <netinet/ip6.h> +# include <netinet/icmp6.h> +# if !defined(linux) +# if defined(_KERNEL) && !defined(__osf__) +# include <netinet6/ip6_var.h> +# endif +# endif +typedef struct ip6_hdr ip6_t; +# endif +#endif + +#ifndef MAX +# define MAX(a,b) (((a) > (b)) ? (a) : (b)) +#endif + +#if defined(_KERNEL) +# ifdef MENTAT +# define COPYDATA mb_copydata +# define COPYBACK mb_copyback +# else +# define COPYDATA m_copydata +# define COPYBACK m_copyback +# endif +# if (BSD >= 199306) || defined(__FreeBSD__) +# if (defined(__NetBSD_Version__) && (__NetBSD_Version__ < 105180000)) || \ + defined(__FreeBSD__) || (defined(OpenBSD) && (OpenBSD < 200206)) || \ + defined(_BSDI_VERSION) +# include <vm/vm.h> +# endif +# if !defined(__FreeBSD__) || (defined (__FreeBSD_version) && \ + (__FreeBSD_version >= 300000)) +# if (defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 105180000)) || \ + (defined(OpenBSD) && (OpenBSD >= 200111)) +# include <uvm/uvm_extern.h> +# else +# include <vm/vm_extern.h> +extern vm_map_t kmem_map; +# endif +# include <sys/proc.h> +# else /* !__FreeBSD__ || (__FreeBSD__ && __FreeBSD_version >= 300000) */ +# include <vm/vm_kern.h> +# endif /* !__FreeBSD__ || (__FreeBSD__ && __FreeBSD_version >= 300000) */ + +# ifdef IPFILTER_M_IPFILTER +# include <sys/malloc.h> +MALLOC_DECLARE(M_IPFILTER); +# define _M_IPF M_IPFILTER +# else /* IPFILTER_M_IPFILTER */ +# ifdef M_PFIL +# define _M_IPF M_PFIL +# else +# ifdef M_IPFILTER +# define _M_IPF M_IPFILTER +# else +# define _M_IPF M_TEMP +# endif /* M_IPFILTER */ +# endif /* M_PFIL */ +# endif /* IPFILTER_M_IPFILTER */ +# define KMALLOC(a, b) MALLOC((a), b, sizeof(*(a)), _M_IPF, M_NOWAIT) +# define KMALLOCS(a, b, c) MALLOC((a), b, (c), _M_IPF, M_NOWAIT) +# define KFREE(x) FREE((x), _M_IPF) +# define KFREES(x,s) FREE((x), _M_IPF) +# define UIOMOVE(a,b,c,d) uiomove(a,b,d) +# define SLEEP(id, n) tsleep((id), PPAUSE|PCATCH, n, 0) +# define WAKEUP(id,x) wakeup(id+x) +# define GETIFP(n, v) ifunit(n) +# endif /* (Free)BSD */ + +# if !defined(USE_MUTEXES) && !defined(SPL_NET) +# if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199407)) || \ + (defined(OpenBSD) && (OpenBSD >= 200006)) +# define SPL_NET(x) x = splsoftnet() +# else +# define SPL_IMP(x) x = splimp() +# define SPL_NET(x) x = splnet() +# endif /* NetBSD && (NetBSD <= 1991011) && (NetBSD >= 199407) */ +# define SPL_X(x) (void) splx(x) +# endif /* !USE_MUTEXES */ + +# ifndef FREE_MB_T +# define FREE_MB_T(m) m_freem(m) +# endif + +# ifndef MTOD +# define MTOD(m,t) mtod(m,t) +# endif + +# ifndef COPYIN +# define COPYIN(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0) +# define COPYOUT(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0) +# define BCOPYIN(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0) +# define BCOPYOUT(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0) +# endif + +# ifndef KMALLOC +# define KMALLOC(a,b) (a) = (b)new_kmem_alloc(sizeof(*(a)), \ + KMEM_NOSLEEP) +# define KMALLOCS(a,b,c) (a) = (b)new_kmem_alloc((c), KMEM_NOSLEEP) +# endif + +# ifndef GET_MINOR +# define GET_MINOR(x) minor(x) +# endif +# define PANIC(x,y) if (x) panic y +#endif /* _KERNEL */ + +#ifndef IFNAME +# define IFNAME(x) ((struct ifnet *)x)->if_name +#endif +#ifndef COPYIFNAME +# define NEED_FRGETIFNAME +extern char *fr_getifname __P((struct ifnet *, char *)); +# define COPYIFNAME(x, b) \ + fr_getifname((struct ifnet *)x, b) +#endif + +#ifndef ASSERT +# define ASSERT(x) +#endif + +/* + * Because the ctype(3) posix definition, if used "safely" in code everywhere, + * would mean all normal code that walks through strings needed casts. Yuck. + */ +#define ISALNUM(x) isalnum((u_char)(x)) +#define ISALPHA(x) isalpha((u_char)(x)) +#define ISASCII(x) isascii((u_char)(x)) +#define ISDIGIT(x) isdigit((u_char)(x)) +#define ISPRINT(x) isprint((u_char)(x)) +#define ISSPACE(x) isspace((u_char)(x)) +#define ISUPPER(x) isupper((u_char)(x)) +#define ISXDIGIT(x) isxdigit((u_char)(x)) +#define ISLOWER(x) islower((u_char)(x)) +#define TOUPPER(x) toupper((u_char)(x)) +#define TOLOWER(x) tolower((u_char)(x)) + +/* + * If mutexes aren't being used, turn all the mutex functions into null-ops. + */ +#if !defined(USE_MUTEXES) +# define USE_SPL 1 +# undef RW_DESTROY +# undef MUTEX_INIT +# undef MUTEX_NUKE +# undef MUTEX_DESTROY +# define MUTEX_ENTER(x) ; +# define READ_ENTER(x) ; +# define WRITE_ENTER(x) ; +# define MUTEX_DOWNGRADE(x) ; +# define RWLOCK_INIT(x, y) ; +# define RWLOCK_EXIT(x) ; +# define RW_DESTROY(x) ; +# define MUTEX_EXIT(x) ; +# define MUTEX_INIT(x,y) ; +# define MUTEX_DESTROY(x) ; +# define MUTEX_NUKE(x) ; +#endif /* !USE_MUTEXES */ +#ifndef ATOMIC_INC +# define ATOMIC_INC(x) (x)++ +# define ATOMIC_DEC(x) (x)-- +#endif + +#if defined(USE_SPL) && defined(_KERNEL) +# define SPL_INT(x) int x +#else +# define SPL_INT(x) +#endif + +/* + * If there are no atomic operations for bit sizes defined, define them to all + * use a generic one that works for all sizes. + */ +#ifndef ATOMIC_INCL +# define ATOMIC_INCL ATOMIC_INC +# define ATOMIC_INC64 ATOMIC_INC +# define ATOMIC_INC32 ATOMIC_INC +# define ATOMIC_INC16 ATOMIC_INC +# define ATOMIC_DECL ATOMIC_DEC +# define ATOMIC_DEC64 ATOMIC_DEC +# define ATOMIC_DEC32 ATOMIC_DEC +# define ATOMIC_DEC16 ATOMIC_DEC +#endif + +#ifndef HDR_T_PRIVATE +typedef struct tcphdr tcphdr_t; +typedef struct udphdr udphdr_t; +#endif +typedef struct icmp icmphdr_t; +typedef struct ip ip_t; +typedef struct ether_header ether_header_t; +typedef struct tcpiphdr tcpiphdr_t; + +#ifndef FR_GROUPLEN +# define FR_GROUPLEN 16 +#endif + +#ifdef offsetof +# undef offsetof +#endif +#ifndef offsetof +# define offsetof(t,m) (int)((&((t *)0L)->m)) +#endif + +/* + * This set of macros has been brought about because on Tru64 it is not + * possible to easily assign or examine values in a structure that are + * bit fields. + */ +#ifndef IP_V +# define IP_V(x) (x)->ip_v +#endif +#ifndef IP_V_A +# define IP_V_A(x,y) (x)->ip_v = (y) +#endif +#ifndef IP_HL +# define IP_HL(x) (x)->ip_hl +#endif +#ifndef IP_HL_A +# define IP_HL_A(x,y) (x)->ip_hl = (y) +#endif +#ifndef TCP_X2 +# define TCP_X2(x) (x)->th_x2 +#endif +#ifndef TCP_X2_A +# define TCP_X2_A(x,y) (x)->th_x2 = (y) +#endif +#ifndef TCP_OFF +# define TCP_OFF(x) (x)->th_off +#endif +#ifndef TCP_OFF_A +# define TCP_OFF_A(x,y) (x)->th_off = (y) +#endif +#define IPMINLEN(i, h) ((i)->ip_len >= (IP_HL(i) * 4 + sizeof(struct h))) + + +/* + * XXX - This is one of those *awful* hacks which nobody likes + */ +#ifdef ultrix +#define A_A +#else +#define A_A & +#endif + +#define TCPF_ALL (TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG|\ + TH_ECN|TH_CWR) + +#if (BSD >= 199306) && !defined(m_act) +# define m_act m_nextpkt +#endif + +/* + * Security Options for Intenet Protocol (IPSO) as defined in RFC 1108. + * + * Basic Option + * + * 00000001 - (Reserved 4) + * 00111101 - Top Secret + * 01011010 - Secret + * 10010110 - Confidential + * 01100110 - (Reserved 3) + * 11001100 - (Reserved 2) + * 10101011 - Unclassified + * 11110001 - (Reserved 1) + */ +#define IPSO_CLASS_RES4 0x01 +#define IPSO_CLASS_TOPS 0x3d +#define IPSO_CLASS_SECR 0x5a +#define IPSO_CLASS_CONF 0x96 +#define IPSO_CLASS_RES3 0x66 +#define IPSO_CLASS_RES2 0xcc +#define IPSO_CLASS_UNCL 0xab +#define IPSO_CLASS_RES1 0xf1 + +#define IPSO_AUTH_GENSER 0x80 +#define IPSO_AUTH_ESI 0x40 +#define IPSO_AUTH_SCI 0x20 +#define IPSO_AUTH_NSA 0x10 +#define IPSO_AUTH_DOE 0x08 +#define IPSO_AUTH_UN 0x06 +#define IPSO_AUTH_FTE 0x01 + +/* + * IP option #defines + */ +#undef IPOPT_RR +#define IPOPT_RR 7 +#undef IPOPT_ZSU +#define IPOPT_ZSU 10 /* ZSU */ +#undef IPOPT_MTUP +#define IPOPT_MTUP 11 /* MTUP */ +#undef IPOPT_MTUR +#define IPOPT_MTUR 12 /* MTUR */ +#undef IPOPT_ENCODE +#define IPOPT_ENCODE 15 /* ENCODE */ +#undef IPOPT_TS +#define IPOPT_TS 68 +#undef IPOPT_TR +#define IPOPT_TR 82 /* TR */ +#undef IPOPT_SECURITY +#define IPOPT_SECURITY 130 +#undef IPOPT_LSRR +#define IPOPT_LSRR 131 +#undef IPOPT_E_SEC +#define IPOPT_E_SEC 133 /* E-SEC */ +#undef IPOPT_CIPSO +#define IPOPT_CIPSO 134 /* CIPSO */ +#undef IPOPT_SATID +#define IPOPT_SATID 136 +#ifndef IPOPT_SID +# define IPOPT_SID IPOPT_SATID +#endif +#undef IPOPT_SSRR +#define IPOPT_SSRR 137 +#undef IPOPT_ADDEXT +#define IPOPT_ADDEXT 147 /* ADDEXT */ +#undef IPOPT_VISA +#define IPOPT_VISA 142 /* VISA */ +#undef IPOPT_IMITD +#define IPOPT_IMITD 144 /* IMITD */ +#undef IPOPT_EIP +#define IPOPT_EIP 145 /* EIP */ +#undef IPOPT_RTRALRT +#define IPOPT_RTRALRT 148 /* RTRALRT */ +#undef IPOPT_SDB +#define IPOPT_SDB 149 +#undef IPOPT_NSAPA +#define IPOPT_NSAPA 150 +#undef IPOPT_DPS +#define IPOPT_DPS 151 +#undef IPOPT_UMP +#define IPOPT_UMP 152 +#undef IPOPT_FINN +#define IPOPT_FINN 205 /* FINN */ + +#ifndef TCPOPT_EOL +# define TCPOPT_EOL 0 +#endif +#ifndef TCPOPT_NOP +# define TCPOPT_NOP 1 +#endif +#ifndef TCPOPT_MAXSEG +# define TCPOPT_MAXSEG 2 +#endif +#ifndef TCPOLEN_MAXSEG +# define TCPOLEN_MAXSEG 4 +#endif +#ifndef TCPOPT_WINDOW +# define TCPOPT_WINDOW 3 +#endif +#ifndef TCPOLEN_WINDOW +# define TCPOLEN_WINDOW 3 +#endif +#ifndef TCPOPT_SACK_PERMITTED +# define TCPOPT_SACK_PERMITTED 4 +#endif +#ifndef TCPOLEN_SACK_PERMITTED +# define TCPOLEN_SACK_PERMITTED 2 +#endif +#ifndef TCPOPT_SACK +# define TCPOPT_SACK 5 +#endif +#ifndef TCPOPT_TIMESTAMP +# define TCPOPT_TIMESTAMP 8 +#endif + +#ifndef ICMP_MINLEN +# define ICMP_MINLEN 8 +#endif +#ifndef ICMP_ECHOREPLY +# define ICMP_ECHOREPLY 0 +#endif +#ifndef ICMP_UNREACH +# define ICMP_UNREACH 3 +#endif +#ifndef ICMP_UNREACH_NET +# define ICMP_UNREACH_NET 0 +#endif +#ifndef ICMP_UNREACH_HOST +# define ICMP_UNREACH_HOST 1 +#endif +#ifndef ICMP_UNREACH_PROTOCOL +# define ICMP_UNREACH_PROTOCOL 2 +#endif +#ifndef ICMP_UNREACH_PORT +# define ICMP_UNREACH_PORT 3 +#endif +#ifndef ICMP_UNREACH_NEEDFRAG +# define ICMP_UNREACH_NEEDFRAG 4 +#endif +#ifndef ICMP_UNREACH_SRCFAIL +# define ICMP_UNREACH_SRCFAIL 5 +#endif +#ifndef ICMP_UNREACH_NET_UNKNOWN +# define ICMP_UNREACH_NET_UNKNOWN 6 +#endif +#ifndef ICMP_UNREACH_HOST_UNKNOWN +# define ICMP_UNREACH_HOST_UNKNOWN 7 +#endif +#ifndef ICMP_UNREACH_ISOLATED +# define ICMP_UNREACH_ISOLATED 8 +#endif +#ifndef ICMP_UNREACH_NET_PROHIB +# define ICMP_UNREACH_NET_PROHIB 9 +#endif +#ifndef ICMP_UNREACH_HOST_PROHIB +# define ICMP_UNREACH_HOST_PROHIB 10 +#endif +#ifndef ICMP_UNREACH_TOSNET +# define ICMP_UNREACH_TOSNET 11 +#endif +#ifndef ICMP_UNREACH_TOSHOST +# define ICMP_UNREACH_TOSHOST 12 +#endif +#ifndef ICMP_UNREACH_ADMIN_PROHIBIT +# define ICMP_UNREACH_ADMIN_PROHIBIT 13 +#endif +#ifndef ICMP_UNREACH_FILTER +# define ICMP_UNREACH_FILTER 13 +#endif +#ifndef ICMP_UNREACH_HOST_PRECEDENCE +# define ICMP_UNREACH_HOST_PRECEDENCE 14 +#endif +#ifndef ICMP_UNREACH_PRECEDENCE_CUTOFF +# define ICMP_UNREACH_PRECEDENCE_CUTOFF 15 +#endif +#ifndef ICMP_SOURCEQUENCH +# define ICMP_SOURCEQUENCH 4 +#endif +#ifndef ICMP_REDIRECT_NET +# define ICMP_REDIRECT_NET 0 +#endif +#ifndef ICMP_REDIRECT_HOST +# define ICMP_REDIRECT_HOST 1 +#endif +#ifndef ICMP_REDIRECT_TOSNET +# define ICMP_REDIRECT_TOSNET 2 +#endif +#ifndef ICMP_REDIRECT_TOSHOST +# define ICMP_REDIRECT_TOSHOST 3 +#endif +#ifndef ICMP_ALTHOSTADDR +# define ICMP_ALTHOSTADDR 6 +#endif +#ifndef ICMP_TIMXCEED +# define ICMP_TIMXCEED 11 +#endif +#ifndef ICMP_TIMXCEED_INTRANS +# define ICMP_TIMXCEED_INTRANS 0 +#endif +#ifndef ICMP_TIMXCEED_REASS +# define ICMP_TIMXCEED_REASS 1 +#endif +#ifndef ICMP_PARAMPROB +# define ICMP_PARAMPROB 12 +#endif +#ifndef ICMP_PARAMPROB_ERRATPTR +# define ICMP_PARAMPROB_ERRATPTR 0 +#endif +#ifndef ICMP_PARAMPROB_OPTABSENT +# define ICMP_PARAMPROB_OPTABSENT 1 +#endif +#ifndef ICMP_PARAMPROB_LENGTH +# define ICMP_PARAMPROB_LENGTH 2 +#endif +#ifndef ICMP_TSTAMP +# define ICMP_TSTAMP 13 +#endif +#ifndef ICMP_TSTAMPREPLY +# define ICMP_TSTAMPREPLY 14 +#endif +#ifndef ICMP_IREQ +# define ICMP_IREQ 15 +#endif +#ifndef ICMP_IREQREPLY +# define ICMP_IREQREPLY 16 +#endif +#ifndef ICMP_MASKREQ +# define ICMP_MASKREQ 17 +#endif +#ifndef ICMP_MASKREPLY +# define ICMP_MASKREPLY 18 +#endif +#ifndef ICMP_TRACEROUTE +# define ICMP_TRACEROUTE 30 +#endif +#ifndef ICMP_DATACONVERR +# define ICMP_DATACONVERR 31 +#endif +#ifndef ICMP_MOBILE_REDIRECT +# define ICMP_MOBILE_REDIRECT 32 +#endif +#ifndef ICMP_IPV6_WHEREAREYOU +# define ICMP_IPV6_WHEREAREYOU 33 +#endif +#ifndef ICMP_IPV6_IAMHERE +# define ICMP_IPV6_IAMHERE 34 +#endif +#ifndef ICMP_MOBILE_REGREQUEST +# define ICMP_MOBILE_REGREQUEST 35 +#endif +#ifndef ICMP_MOBILE_REGREPLY +# define ICMP_MOBILE_REGREPLY 36 +#endif +#ifndef ICMP_SKIP +# define ICMP_SKIP 39 +#endif +#ifndef ICMP_PHOTURIS +# define ICMP_PHOTURIS 40 +#endif +#ifndef ICMP_PHOTURIS_UNKNOWN_INDEX +# define ICMP_PHOTURIS_UNKNOWN_INDEX 1 +#endif +#ifndef ICMP_PHOTURIS_AUTH_FAILED +# define ICMP_PHOTURIS_AUTH_FAILED 2 +#endif +#ifndef ICMP_PHOTURIS_DECRYPT_FAILED +# define ICMP_PHOTURIS_DECRYPT_FAILED 3 +#endif +#ifndef IPVERSION +# define IPVERSION 4 +#endif +#ifndef IPOPT_MINOFF +# define IPOPT_MINOFF 4 +#endif +#ifndef IPOPT_COPIED +# define IPOPT_COPIED(x) ((x)&0x80) +#endif +#ifndef IPOPT_EOL +# define IPOPT_EOL 0 +#endif +#ifndef IPOPT_NOP +# define IPOPT_NOP 1 +#endif +#ifndef IP_MF +# define IP_MF ((u_short)0x2000) +#endif +#ifndef ETHERTYPE_IP +# define ETHERTYPE_IP ((u_short)0x0800) +#endif +#ifndef TH_FIN +# define TH_FIN 0x01 +#endif +#ifndef TH_SYN +# define TH_SYN 0x02 +#endif +#ifndef TH_RST +# define TH_RST 0x04 +#endif +#ifndef TH_PUSH +# define TH_PUSH 0x08 +#endif +#ifndef TH_ACK +# define TH_ACK 0x10 +#endif +#ifndef TH_URG +# define TH_URG 0x20 +#endif +#undef TH_ACKMASK +#define TH_ACKMASK (TH_FIN|TH_SYN|TH_RST|TH_ACK) + +#ifndef IPOPT_EOL +# define IPOPT_EOL 0 +#endif +#ifndef IPOPT_NOP +# define IPOPT_NOP 1 +#endif +#ifndef IPOPT_RR +# define IPOPT_RR 7 +#endif +#ifndef IPOPT_TS +# define IPOPT_TS 68 +#endif +#ifndef IPOPT_SECURITY +# define IPOPT_SECURITY 130 +#endif +#ifndef IPOPT_LSRR +# define IPOPT_LSRR 131 +#endif +#ifndef IPOPT_SATID +# define IPOPT_SATID 136 +#endif +#ifndef IPOPT_SSRR +# define IPOPT_SSRR 137 +#endif +#ifndef IPOPT_SECUR_UNCLASS +# define IPOPT_SECUR_UNCLASS ((u_short)0x0000) +#endif +#ifndef IPOPT_SECUR_CONFID +# define IPOPT_SECUR_CONFID ((u_short)0xf135) +#endif +#ifndef IPOPT_SECUR_EFTO +# define IPOPT_SECUR_EFTO ((u_short)0x789a) +#endif +#ifndef IPOPT_SECUR_MMMM +# define IPOPT_SECUR_MMMM ((u_short)0xbc4d) +#endif +#ifndef IPOPT_SECUR_RESTR +# define IPOPT_SECUR_RESTR ((u_short)0xaf13) +#endif +#ifndef IPOPT_SECUR_SECRET +# define IPOPT_SECUR_SECRET ((u_short)0xd788) +#endif +#ifndef IPOPT_SECUR_TOPSECRET +# define IPOPT_SECUR_TOPSECRET ((u_short)0x6bc5) +#endif +#ifndef IPOPT_OLEN +# define IPOPT_OLEN 1 +#endif +#ifndef IPPROTO_HOPOPTS +# define IPPROTO_HOPOPTS 0 +#endif +#ifndef IPPROTO_ENCAP +# define IPPROTO_ENCAP 4 +#endif +#ifndef IPPROTO_IPV6 +# define IPPROTO_IPV6 41 +#endif +#ifndef IPPROTO_ROUTING +# define IPPROTO_ROUTING 43 +#endif +#ifndef IPPROTO_FRAGMENT +# define IPPROTO_FRAGMENT 44 +#endif +#ifndef IPPROTO_GRE +# define IPPROTO_GRE 47 /* GRE encaps RFC 1701 */ +#endif +#ifndef IPPROTO_ESP +# define IPPROTO_ESP 50 +#endif +#ifndef IPPROTO_AH +# define IPPROTO_AH 51 +#endif +#ifndef IPPROTO_ICMPV6 +# define IPPROTO_ICMPV6 58 +#endif +#ifndef IPPROTO_NONE +# define IPPROTO_NONE 59 +#endif +#ifndef IPPROTO_DSTOPTS +# define IPPROTO_DSTOPTS 60 +#endif +#ifndef IPPROTO_FRAGMENT +# define IPPROTO_FRAGMENT 44 +#endif +#ifndef ICMP_ROUTERADVERT +# define ICMP_ROUTERADVERT 9 +#endif +#ifndef ICMP_ROUTERSOLICIT +# define ICMP_ROUTERSOLICIT 10 +#endif +#ifndef ICMP6_DST_UNREACH +# define ICMP6_DST_UNREACH 1 +#endif +#ifndef ICMP6_PACKET_TOO_BIG +# define ICMP6_PACKET_TOO_BIG 2 +#endif +#ifndef ICMP6_TIME_EXCEEDED +# define ICMP6_TIME_EXCEEDED 3 +#endif +#ifndef ICMP6_PARAM_PROB +# define ICMP6_PARAM_PROB 4 +#endif + +#ifndef ICMP6_ECHO_REQUEST +# define ICMP6_ECHO_REQUEST 128 +#endif +#ifndef ICMP6_ECHO_REPLY +# define ICMP6_ECHO_REPLY 129 +#endif +#ifndef ICMP6_MEMBERSHIP_QUERY +# define ICMP6_MEMBERSHIP_QUERY 130 +#endif +#ifndef MLD6_LISTENER_QUERY +# define MLD6_LISTENER_QUERY 130 +#endif +#ifndef ICMP6_MEMBERSHIP_REPORT +# define ICMP6_MEMBERSHIP_REPORT 131 +#endif +#ifndef MLD6_LISTENER_REPORT +# define MLD6_LISTENER_REPORT 131 +#endif +#ifndef ICMP6_MEMBERSHIP_REDUCTION +# define ICMP6_MEMBERSHIP_REDUCTION 132 +#endif +#ifndef MLD6_LISTENER_DONE +# define MLD6_LISTENER_DONE 132 +#endif +#ifndef ND_ROUTER_SOLICIT +# define ND_ROUTER_SOLICIT 133 +#endif +#ifndef ND_ROUTER_ADVERT +# define ND_ROUTER_ADVERT 134 +#endif +#ifndef ND_NEIGHBOR_SOLICIT +# define ND_NEIGHBOR_SOLICIT 135 +#endif +#ifndef ND_NEIGHBOR_ADVERT +# define ND_NEIGHBOR_ADVERT 136 +#endif +#ifndef ND_REDIRECT +# define ND_REDIRECT 137 +#endif +#ifndef ICMP6_ROUTER_RENUMBERING +# define ICMP6_ROUTER_RENUMBERING 138 +#endif +#ifndef ICMP6_WRUREQUEST +# define ICMP6_WRUREQUEST 139 +#endif +#ifndef ICMP6_WRUREPLY +# define ICMP6_WRUREPLY 140 +#endif +#ifndef ICMP6_FQDN_QUERY +# define ICMP6_FQDN_QUERY 139 +#endif +#ifndef ICMP6_FQDN_REPLY +# define ICMP6_FQDN_REPLY 140 +#endif +#ifndef ICMP6_NI_QUERY +# define ICMP6_NI_QUERY 139 +#endif +#ifndef ICMP6_NI_REPLY +# define ICMP6_NI_REPLY 140 +#endif +#ifndef MLD6_MTRACE_RESP +# define MLD6_MTRACE_RESP 200 +#endif +#ifndef MLD6_MTRACE +# define MLD6_MTRACE 201 +#endif +#ifndef ICMP6_HADISCOV_REQUEST +# define ICMP6_HADISCOV_REQUEST 202 +#endif +#ifndef ICMP6_HADISCOV_REPLY +# define ICMP6_HADISCOV_REPLY 203 +#endif +#ifndef ICMP6_MOBILEPREFIX_SOLICIT +# define ICMP6_MOBILEPREFIX_SOLICIT 204 +#endif +#ifndef ICMP6_MOBILEPREFIX_ADVERT +# define ICMP6_MOBILEPREFIX_ADVERT 205 +#endif +#ifndef ICMP6_MAXTYPE +# define ICMP6_MAXTYPE 205 +#endif + +#ifndef ICMP6_DST_UNREACH_NOROUTE +# define ICMP6_DST_UNREACH_NOROUTE 0 +#endif +#ifndef ICMP6_DST_UNREACH_ADMIN +# define ICMP6_DST_UNREACH_ADMIN 1 +#endif +#ifndef ICMP6_DST_UNREACH_NOTNEIGHBOR +# define ICMP6_DST_UNREACH_NOTNEIGHBOR 2 +#endif +#ifndef ICMP6_DST_UNREACH_BEYONDSCOPE +# define ICMP6_DST_UNREACH_BEYONDSCOPE 2 +#endif +#ifndef ICMP6_DST_UNREACH_ADDR +# define ICMP6_DST_UNREACH_ADDR 3 +#endif +#ifndef ICMP6_DST_UNREACH_NOPORT +# define ICMP6_DST_UNREACH_NOPORT 4 +#endif +#ifndef ICMP6_TIME_EXCEED_TRANSIT +# define ICMP6_TIME_EXCEED_TRANSIT 0 +#endif +#ifndef ICMP6_TIME_EXCEED_REASSEMBLY +# define ICMP6_TIME_EXCEED_REASSEMBLY 1 +#endif + +#ifndef ICMP6_NI_SUCCESS +# define ICMP6_NI_SUCCESS 0 +#endif +#ifndef ICMP6_NI_REFUSED +# define ICMP6_NI_REFUSED 1 +#endif +#ifndef ICMP6_NI_UNKNOWN +# define ICMP6_NI_UNKNOWN 2 +#endif + +#ifndef ICMP6_ROUTER_RENUMBERING_COMMAND +# define ICMP6_ROUTER_RENUMBERING_COMMAND 0 +#endif +#ifndef ICMP6_ROUTER_RENUMBERING_RESULT +# define ICMP6_ROUTER_RENUMBERING_RESULT 1 +#endif +#ifndef ICMP6_ROUTER_RENUMBERING_SEQNUM_RESET +# define ICMP6_ROUTER_RENUMBERING_SEQNUM_RESET 255 +#endif + +#ifndef ICMP6_PARAMPROB_HEADER +# define ICMP6_PARAMPROB_HEADER 0 +#endif +#ifndef ICMP6_PARAMPROB_NEXTHEADER +# define ICMP6_PARAMPROB_NEXTHEADER 1 +#endif +#ifndef ICMP6_PARAMPROB_OPTION +# define ICMP6_PARAMPROB_OPTION 2 +#endif + +#ifndef ICMP6_NI_SUBJ_IPV6 +# define ICMP6_NI_SUBJ_IPV6 0 +#endif +#ifndef ICMP6_NI_SUBJ_FQDN +# define ICMP6_NI_SUBJ_FQDN 1 +#endif +#ifndef ICMP6_NI_SUBJ_IPV4 +# define ICMP6_NI_SUBJ_IPV4 2 +#endif + +/* + * ECN is a new addition to TCP - RFC 2481 + */ +#ifndef TH_ECN +# define TH_ECN 0x40 +#endif +#ifndef TH_CWR +# define TH_CWR 0x80 +#endif +#define TH_ECNALL (TH_ECN|TH_CWR) + +/* + * TCP States + */ +#define IPF_TCPS_CLOSED 0 /* closed */ +#define IPF_TCPS_LISTEN 1 /* listening for connection */ +#define IPF_TCPS_SYN_SENT 2 /* active, have sent syn */ +#define IPF_TCPS_SYN_RECEIVED 3 /* have send and received syn */ +#define IPF_TCPS_HALF_ESTAB 4 /* for connections not fully "up" */ +/* states < IPF_TCPS_ESTABLISHED are those where connections not established */ +#define IPF_TCPS_ESTABLISHED 5 /* established */ +#define IPF_TCPS_CLOSE_WAIT 6 /* rcvd fin, waiting for close */ +/* states > IPF_TCPS_CLOSE_WAIT are those where user has closed */ +#define IPF_TCPS_FIN_WAIT_1 7 /* have closed, sent fin */ +#define IPF_TCPS_CLOSING 8 /* closed xchd FIN; await FIN ACK */ +#define IPF_TCPS_LAST_ACK 9 /* had fin and close; await FIN ACK */ +/* states > IPF_TCPS_CLOSE_WAIT && < IPF_TCPS_FIN_WAIT_2 await ACK of FIN */ +#define IPF_TCPS_FIN_WAIT_2 10 /* have closed, fin is acked */ +#define IPF_TCPS_TIME_WAIT 11 /* in 2*msl quiet wait after close */ +#define IPF_TCP_NSTATES 12 + +#define TCP_MSL 120 + +#undef ICMP_MAX_UNREACH +#define ICMP_MAX_UNREACH 14 +#undef ICMP_MAXTYPE +#define ICMP_MAXTYPE 18 + +#ifndef IFNAMSIZ +#define IFNAMSIZ 16 +#endif + +#ifndef LOG_FTP +# define LOG_FTP (11<<3) +#endif +#ifndef LOG_AUTHPRIV +# define LOG_AUTHPRIV (10<<3) +#endif +#ifndef LOG_AUDIT +# define LOG_AUDIT (13<<3) +#endif +#ifndef LOG_NTP +# define LOG_NTP (12<<3) +#endif +#ifndef LOG_SECURITY +# define LOG_SECURITY (13<<3) +#endif +#ifndef LOG_LFMT +# define LOG_LFMT (14<<3) +#endif +#ifndef LOG_CONSOLE +# define LOG_CONSOLE (14<<3) +#endif + +/* + * ICMP error replies have an IP header (20 bytes), 8 bytes of ICMP data, + * another IP header and then 64 bits of data, totalling 56. Of course, + * the last 64 bits is dependent on that being available. + */ +#define ICMPERR_ICMPHLEN 8 +#define ICMPERR_IPICMPHLEN (20 + 8) +#define ICMPERR_MINPKTLEN (20 + 8 + 20) +#define ICMPERR_MAXPKTLEN (20 + 8 + 20 + 8) +#define ICMP6ERR_MINPKTLEN (40 + 8) +#define ICMP6ERR_IPICMPHLEN (40 + 8 + 40) + +#ifndef MIN +# define MIN(a,b) (((a)<(b))?(a):(b)) +#endif + +#ifdef IPF_DEBUG +# define DPRINT(x) printf x +#else +# define DPRINT(x) +#endif + +#ifdef RESCUE +# undef IPFILTER_BPF +#endif + +#endif /* __IP_COMPAT_H__ */ diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_fil.h b/usr/src/uts/common/inet/ipf/netinet/ip_fil.h new file mode 100644 index 0000000000..e7ec154f4f --- /dev/null +++ b/usr/src/uts/common/inet/ipf/netinet/ip_fil.h @@ -0,0 +1,1431 @@ +/* + * Copyright (C) 1993-2001, 2003 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * @(#)ip_fil.h 1.35 6/5/96 + * $Id: ip_fil.h,v 2.170.2.22 2005/07/16 05:55:35 darrenr Exp $ + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifndef __IP_FIL_H__ +#define __IP_FIL_H__ + +#include "netinet/ip_compat.h" + +#ifndef SOLARIS +# define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4))) +#endif + +#ifndef __P +# ifdef __STDC__ +# define __P(x) x +# else +# define __P(x) () +# endif +#endif + +#if defined(__STDC__) || defined(__GNUC__) || defined(_AIX51) +# define SIOCADAFR _IOW('r', 60, struct ipfobj) +# define SIOCRMAFR _IOW('r', 61, struct ipfobj) +# define SIOCSETFF _IOW('r', 62, u_int) +# define SIOCGETFF _IOR('r', 63, u_int) +# define SIOCGETFS _IOWR('r', 64, struct ipfobj) +# define SIOCIPFFL _IOWR('r', 65, int) +# define SIOCIPFFB _IOR('r', 66, int) +# define SIOCADIFR _IOW('r', 67, struct ipfobj) +# define SIOCRMIFR _IOW('r', 68, struct ipfobj) +# define SIOCSWAPA _IOR('r', 69, u_int) +# define SIOCINAFR _IOW('r', 70, struct ipfobj) +# define SIOCINIFR _IOW('r', 71, struct ipfobj) +# define SIOCFRENB _IOW('r', 72, u_int) +# define SIOCFRSYN _IOW('r', 73, u_int) +# define SIOCFRZST _IOWR('r', 74, struct ipfobj) +# define SIOCZRLST _IOWR('r', 75, struct ipfobj) +# define SIOCAUTHW _IOWR('r', 76, struct ipfobj) +# define SIOCAUTHR _IOWR('r', 77, struct ipfobj) +# define SIOCATHST _IOWR('r', 78, struct ipfobj) +# define SIOCSTLCK _IOWR('r', 79, u_int) +# define SIOCSTPUT _IOWR('r', 80, struct ipfobj) +# define SIOCSTGET _IOWR('r', 81, struct ipfobj) +# define SIOCSTGSZ _IOWR('r', 82, struct ipfobj) +# define SIOCGFRST _IOWR('r', 83, struct ipfobj) +# define SIOCSETLG _IOWR('r', 84, int) +# define SIOCGETLG _IOWR('r', 85, int) +# define SIOCFUNCL _IOWR('r', 86, struct ipfunc_resolve) +# define SIOCIPFGETNEXT _IOWR('r', 87, struct ipfobj) +# define SIOCIPFGET _IOWR('r', 88, struct ipfobj) +# define SIOCIPFSET _IOWR('r', 89, struct ipfobj) +# define SIOCIPFL6 _IOWR('r', 90, int) +#else +# define SIOCADAFR _IOW(r, 60, struct ipfobj) +# define SIOCRMAFR _IOW(r, 61, struct ipfobj) +# define SIOCSETFF _IOW(r, 62, u_int) +# define SIOCGETFF _IOR(r, 63, u_int) +# define SIOCGETFS _IOWR(r, 64, struct ipfobj) +# define SIOCIPFFL _IOWR(r, 65, int) +# define SIOCIPFFB _IOR(r, 66, int) +# define SIOCADIFR _IOW(r, 67, struct ipfobj) +# define SIOCRMIFR _IOW(r, 68, struct ipfobj) +# define SIOCSWAPA _IOR(r, 69, u_int) +# define SIOCINAFR _IOW(r, 70, struct ipfobj) +# define SIOCINIFR _IOW(r, 71, struct ipfobj) +# define SIOCFRENB _IOW(r, 72, u_int) +# define SIOCFRSYN _IOW(r, 73, u_int) +# define SIOCFRZST _IOWR(r, 74, struct ipfobj) +# define SIOCZRLST _IOWR(r, 75, struct ipfobj) +# define SIOCAUTHW _IOWR(r, 76, struct ipfobj) +# define SIOCAUTHR _IOWR(r, 77, struct ipfobj) +# define SIOCATHST _IOWR(r, 78, struct ipfobj) +# define SIOCSTLCK _IOWR(r, 79, u_int) +# define SIOCSTPUT _IOWR(r, 80, struct ipfobj) +# define SIOCSTGET _IOWR(r, 81, struct ipfobj) +# define SIOCSTGSZ _IOWR(r, 82, struct ipfobj) +# define SIOCGFRST _IOWR(r, 83, struct ipfobj) +# define SIOCSETLG _IOWR(r, 84, int) +# define SIOCGETLG _IOWR(r, 85, int) +# define SIOCFUNCL _IOWR(r, 86, struct ipfunc_resolve) +# define SIOCIPFGETNEXT _IOWR(r, 87, struct ipfobj) +# define SIOCIPFGET _IOWR(r, 88, struct ipfobj) +# define SIOCIPFSET _IOWR(r, 89, struct ipfobj) +# define SIOCIPFL6 _IOWR(r, 90, int) +#endif +#define SIOCADDFR SIOCADAFR +#define SIOCDELFR SIOCRMAFR +#define SIOCINSFR SIOCINAFR + + +struct ipscan; +struct ifnet; + + +typedef int (* lookupfunc_t) __P((void *, int, void *)); + +/* + * i6addr is used as a container for both IPv4 and IPv6 addresses, as well + * as other types of objects, depending on its qualifier. + */ +#ifdef USE_INET6 +typedef union i6addr { + u_32_t i6[4]; + struct in_addr in4; + struct in6_addr in6; + void *vptr[2]; + lookupfunc_t lptr[2]; +} i6addr_t; +#define in6_addr8 in6.s6_addr +#else +typedef union i6addr { + u_32_t i6[4]; + struct in_addr in4; + void *vptr[2]; + lookupfunc_t lptr[2]; +} i6addr_t; +#endif + +#define in4_addr in4.s_addr +#define iplookupnum i6[0] +#define iplookuptype i6[1] +/* + * NOTE: These DO overlap the above on 64bit systems and this IS recognised. + */ +#define iplookupptr vptr[0] +#define iplookupfunc lptr[1] + +#define I60(x) (((i6addr_t *)(x))->i6[0]) +#define I61(x) (((i6addr_t *)(x))->i6[1]) +#define I62(x) (((i6addr_t *)(x))->i6[2]) +#define I63(x) (((i6addr_t *)(x))->i6[3]) +#define HI60(x) ntohl(((i6addr_t *)(x))->i6[0]) +#define HI61(x) ntohl(((i6addr_t *)(x))->i6[1]) +#define HI62(x) ntohl(((i6addr_t *)(x))->i6[2]) +#define HI63(x) ntohl(((i6addr_t *)(x))->i6[3]) + +#define IP6_EQ(a,b) ((I63(a) == I63(b)) && (I62(a) == I62(b)) && \ + (I61(a) == I61(b)) && (I60(a) == I60(b))) +#define IP6_NEQ(a,b) ((I63(a) != I63(b)) || (I62(a) != I62(b)) || \ + (I61(a) != I61(b)) || (I60(a) != I60(b))) +#define IP6_ISZERO(a) ((I60(a) | I61(a) | I62(a) | I63(a)) == 0) +#define IP6_NOTZERO(a) ((I60(a) | I61(a) | I62(a) | I63(a)) != 0) +#define IP6_GT(a,b) (HI60(a) > HI60(b) || (HI60(a) == HI60(b) && \ + (HI61(a) > HI61(b) || (HI61(a) == HI61(b) && \ + (HI62(a) > HI62(b) || (HI62(a) == HI62(b) && \ + HI63(a) > HI63(b))))))) +#define IP6_LT(a,b) (HI60(a) < HI60(b) || (HI60(a) == HI60(b) && \ + (HI61(a) < HI61(b) || (HI61(a) == HI61(b) && \ + (HI62(a) < HI62(b) || (HI62(a) == HI62(b) && \ + HI63(a) < HI63(b))))))) +#define NLADD(n,x) htonl(ntohl(n) + (x)) +#define IP6_INC(a) \ + { i6addr_t *_i6 = (i6addr_t *)(a); \ + _i6->i6[0] = NLADD(_i6->i6[0], 1); \ + if (_i6->i6[0] == 0) { \ + _i6->i6[0] = NLADD(_i6->i6[1], 1); \ + if (_i6->i6[1] == 0) { \ + _i6->i6[0] = NLADD(_i6->i6[2], 1); \ + if (_i6->i6[2] == 0) { \ + _i6->i6[0] = NLADD(_i6->i6[3], 1); \ + } \ + } \ + } \ + } +#define IP6_ADD(a,x,d) \ + { i6addr_t *_s = (i6addr_t *)(a); \ + i6addr_t *_d = (i6addr_t *)(d); \ + _d->i6[0] = NLADD(_s->i6[0], x); \ + if (ntohl(_d->i6[0]) < ntohl(_s->i6[0])) { \ + _d->i6[1] = NLADD(_d->i6[1], 1); \ + if (ntohl(_d->i6[1]) < ntohl(_s->i6[1])) { \ + _d->i6[2] = NLADD(_d->i6[2], 1); \ + if (ntohl(_d->i6[2]) < ntohl(_s->i6[2])) { \ + _d->i6[3] = NLADD(_d->i6[3], 1); \ + } \ + } \ + } \ + } +#define IP6_AND(a,b,d) { i6addr_t *_s1 = (i6addr_t *)(a); \ + i6addr_t *_s2 = (i6addr_t *)(d); \ + i6addr_t *_d = (i6addr_t *)(d); \ + _d->i6[0] = _s1->i6[0] & _s2->i6[0]; \ + _d->i6[1] = _s1->i6[1] & _s2->i6[1]; \ + _d->i6[2] = _s1->i6[2] & _s2->i6[2]; \ + _d->i6[3] = _s1->i6[3] & _s2->i6[3]; \ + } +#define IP6_MERGE(a,b,c) \ + { i6addr_t *_d, *_s1, *_s2; \ + _d = (i6addr_t *)(a); \ + _s1 = (i6addr_t *)(b); \ + _s2 = (i6addr_t *)(c); \ + _d->i6[0] |= _s1->i6[0] & ~_s2->i6[0]; \ + _d->i6[1] |= _s1->i6[1] & ~_s2->i6[1]; \ + _d->i6[2] |= _s1->i6[2] & ~_s2->i6[2]; \ + _d->i6[2] |= _s1->i6[3] & ~_s2->i6[3]; \ + } + + +typedef struct fr_ip { + u_32_t fi_v:4; /* IP version */ + u_32_t fi_xx:4; /* spare */ + u_32_t fi_tos:8; /* IP packet TOS */ + u_32_t fi_ttl:8; /* IP packet TTL */ + u_32_t fi_p:8; /* IP packet protocol */ + u_32_t fi_optmsk; /* bitmask composed from IP options */ + i6addr_t fi_src; /* source address from packet */ + i6addr_t fi_dst; /* destination address from packet */ + u_short fi_secmsk; /* bitmask composed from IP security options */ + u_short fi_auth; /* authentication code from IP sec. options */ + u_32_t fi_flx; /* packet flags */ + u_32_t fi_tcpmsk; /* TCP options set/reset */ + u_32_t fi_res1; /* RESERVED */ +} fr_ip_t; + +/* + * For use in fi_flx + */ +#define FI_TCPUDP 0x0001 /* TCP/UCP implied comparison*/ +#define FI_OPTIONS 0x0002 +#define FI_FRAG 0x0004 +#define FI_SHORT 0x0008 +#define FI_NATED 0x0010 +#define FI_MULTICAST 0x0020 +#define FI_BROADCAST 0x0040 +#define FI_MBCAST 0x0080 +#define FI_STATE 0x0100 +#define FI_BADNAT 0x0200 +#define FI_BAD 0x0400 +#define FI_OOW 0x0800 /* Out of state window, else match */ +#define FI_ICMPERR 0x1000 +#define FI_FRAGBODY 0x2000 +#define FI_BADSRC 0x4000 +#define FI_LOWTTL 0x8000 +#define FI_CMP 0xcfe3 /* Not FI_FRAG,FI_NATED,FI_FRAGTAIL */ +#define FI_ICMPCMP 0x0003 /* Flags we can check for ICMP error packets */ +#define FI_WITH 0xeffe /* Not FI_TCPUDP */ +#define FI_V6EXTHDR 0x10000 +#define FI_COALESCE 0x20000 +#define FI_NOCKSUM 0x20000000 /* don't do a L4 checksum validation */ +#define FI_DONTCACHE 0x40000000 /* don't cache the result */ +#define FI_IGNORE 0x80000000 + +#define fi_saddr fi_src.in4.s_addr +#define fi_daddr fi_dst.in4.s_addr +#define fi_srcnum fi_src.iplookupnum +#define fi_dstnum fi_dst.iplookupnum +#define fi_srctype fi_src.iplookuptype +#define fi_dsttype fi_dst.iplookuptype +#define fi_srcptr fi_src.iplookupptr +#define fi_dstptr fi_dst.iplookupptr +#define fi_srcfunc fi_src.iplookupfunc +#define fi_dstfunc fi_dst.iplookupfunc + + +/* + * These are both used by the state and NAT code to indicate that one port or + * the other should be treated as a wildcard. + * NOTE: When updating, check bit masks in ip_state.h and update there too. + */ +#define SI_W_SPORT 0x00000100 +#define SI_W_DPORT 0x00000200 +#define SI_WILDP (SI_W_SPORT|SI_W_DPORT) +#define SI_W_SADDR 0x00000400 +#define SI_W_DADDR 0x00000800 +#define SI_WILDA (SI_W_SADDR|SI_W_DADDR) +#define SI_NEWFR 0x00001000 +#define SI_CLONE 0x00002000 +#define SI_CLONED 0x00004000 + + +typedef struct fr_info { + void *fin_ifp; /* interface packet is `on' */ + fr_ip_t fin_fi; /* IP Packet summary */ + union { + u_short fid_16[2]; /* TCP/UDP ports, ICMP code/type */ + u_32_t fid_32; + } fin_dat; + int fin_out; /* in or out ? 1 == out, 0 == in */ + int fin_rev; /* state only: 1 = reverse */ + u_short fin_hlen; /* length of IP header in bytes */ + u_char fin_tcpf; /* TCP header flags (SYN, ACK, etc) */ + u_char fin_icode; /* ICMP error to return */ + u_32_t fin_rule; /* rule # last matched */ + char fin_group[FR_GROUPLEN]; /* group number, -1 for none */ + struct frentry *fin_fr; /* last matching rule */ + void *fin_dp; /* start of data past IP header */ + int fin_dlen; /* length of data portion of packet */ + int fin_plen; + int fin_flen; /* length of layer 4 hdr and + ipv6 ext hdr after fragment hdr */ + int fin_ipoff; /* # bytes from buffer start to hdr */ + u_32_t fin_id; /* IP packet id field */ + u_short fin_off; + int fin_depth; /* Group nesting depth */ + int fin_error; /* Error code to return */ + void *fin_nat; + void *fin_state; + void *fin_nattag; + ip_t *fin_ip; + mb_t **fin_mp; /* pointer to pointer to mbuf */ + mb_t *fin_m; /* pointer to mbuf */ +#ifdef MENTAT + mb_t *fin_qfm; /* pointer to mblk where pkt starts */ + void *fin_qpi; +#endif +#ifdef __sgi + void *fin_hbuf; +#endif +} fr_info_t; + +#define fin_v fin_fi.fi_v +#define fin_p fin_fi.fi_p +#define fin_flx fin_fi.fi_flx +#define fin_optmsk fin_fi.fi_optmsk +#define fin_secmsk fin_fi.fi_secmsk +#define fin_auth fin_fi.fi_auth +#define fin_src fin_fi.fi_src.in4 +#define fin_src6 fin_fi.fi_src.in6 +#define fin_saddr fin_fi.fi_saddr +#define fin_dst fin_fi.fi_dst.in4 +#define fin_dst6 fin_fi.fi_dst.in6 +#define fin_daddr fin_fi.fi_daddr +#define fin_data fin_dat.fid_16 +#define fin_sport fin_dat.fid_16[0] +#define fin_dport fin_dat.fid_16[1] +#define fin_ports fin_dat.fid_32 + +#define IPF_IN 0 +#define IPF_OUT 1 + +typedef struct frentry *(*ipfunc_t) __P((fr_info_t *, u_32_t *)); +typedef int (*ipfuncinit_t) __P((struct frentry *)); + +typedef struct ipfunc_resolve { + char ipfu_name[32]; + ipfunc_t ipfu_addr; + ipfuncinit_t ipfu_init; +} ipfunc_resolve_t; + +/* + * Size for compares on fr_info structures + */ +#define FI_CSIZE offsetof(fr_info_t, fin_icode) +#define FI_LCSIZE offsetof(fr_info_t, fin_dp) + +/* + * Size for copying cache fr_info structure + */ +#define FI_COPYSIZE offsetof(fr_info_t, fin_dp) + +/* + * Structure for holding IPFilter's tag information + */ +#define IPFTAG_LEN 16 +typedef struct { + union { + u_32_t iptu_num[4]; + char iptu_tag[IPFTAG_LEN]; + } ipt_un; + int ipt_not; +} ipftag_t; + +#define ipt_tag ipt_un.iptu_tag +#define ipt_num ipt_un.iptu_num + + +/* + * This structure is used to hold information about the next hop for where + * to forward a packet. + */ +typedef struct frdest { + void *fd_ifp; + i6addr_t fd_ip6; + char fd_ifname[LIFNAMSIZ]; +} frdest_t; + +#define fd_ip fd_ip6.in4 + + +/* + * This structure holds information about a port comparison. + */ +typedef struct frpcmp { + int frp_cmp; /* data for port comparisons */ + u_short frp_port; /* top port for <> and >< */ + u_short frp_top; /* top port for <> and >< */ +} frpcmp_t; + +#define FR_NONE 0 +#define FR_EQUAL 1 +#define FR_NEQUAL 2 +#define FR_LESST 3 +#define FR_GREATERT 4 +#define FR_LESSTE 5 +#define FR_GREATERTE 6 +#define FR_OUTRANGE 7 +#define FR_INRANGE 8 +#define FR_INCRANGE 9 + +/* + * Structure containing all the relevant TCP things that can be checked in + * a filter rule. + */ +typedef struct frtuc { + u_char ftu_tcpfm; /* tcp flags mask */ + u_char ftu_tcpf; /* tcp flags */ + frpcmp_t ftu_src; + frpcmp_t ftu_dst; +} frtuc_t; + +#define ftu_scmp ftu_src.frp_cmp +#define ftu_dcmp ftu_dst.frp_cmp +#define ftu_sport ftu_src.frp_port +#define ftu_dport ftu_dst.frp_port +#define ftu_stop ftu_src.frp_top +#define ftu_dtop ftu_dst.frp_top + +#define FR_TCPFMAX 0x3f + +/* + * This structure makes up what is considered to be the IPFilter specific + * matching components of a filter rule, as opposed to the data structures + * used to define the result which are in frentry_t and not here. + */ +typedef struct fripf { + fr_ip_t fri_ip; + fr_ip_t fri_mip; /* mask structure */ + + u_short fri_icmpm; /* data for ICMP packets (mask) */ + u_short fri_icmp; + + frtuc_t fri_tuc; + int fri_satype; /* addres type */ + int fri_datype; /* addres type */ + int fri_sifpidx; /* doing dynamic addressing */ + int fri_difpidx; /* index into fr_ifps[] to use when */ +} fripf_t; + +#define fri_dstnum fri_ip.fi_dstnum +#define fri_srcnum fri_mip.fi_srcnum +#define fri_dstptr fri_ip.fi_dstptr +#define fri_srcptr fri_mip.fi_srcptr + +#define FRI_NORMAL 0 /* Normal address */ +#define FRI_DYNAMIC 1 /* dynamic address */ +#define FRI_LOOKUP 2 /* address is a pool # */ +#define FRI_RANGE 3 /* address/mask is a range */ +#define FRI_NETWORK 4 /* network address from if */ +#define FRI_BROADCAST 5 /* broadcast address from if */ +#define FRI_PEERADDR 6 /* Peer address for P-to-P */ +#define FRI_NETMASKED 7 /* network address with netmask from if */ + + +typedef struct frentry * (* frentfunc_t) __P((fr_info_t *)); + +typedef struct frentry { + ipfmutex_t fr_lock; + struct frentry *fr_next; + struct frentry **fr_grp; + struct ipscan *fr_isc; + void *fr_ifas[4]; + void *fr_ptr; /* for use with fr_arg */ + char *fr_comment; /* text comment for rule */ + int fr_ref; /* reference count - for grouping */ + int fr_statecnt; /* state count - for limit rules */ + /* + * These are only incremented when a packet matches this rule and + * it is the last match + */ + U_QUAD_T fr_hits; + U_QUAD_T fr_bytes; + + /* + * For PPS rate limiting + */ + struct timeval fr_lastpkt; + int fr_curpps; + + union { + void *fru_data; + caddr_t fru_caddr; + fripf_t *fru_ipf; + frentfunc_t fru_func; + } fr_dun; + + /* + * Fields after this may not change whilst in the kernel. + */ + ipfunc_t fr_func; /* call this function */ + int fr_dsize; + int fr_pps; + int fr_statemax; /* max reference count */ + int fr_flineno; /* line number from conf file */ + u_32_t fr_type; + u_32_t fr_flags; /* per-rule flags && options (see below) */ + u_32_t fr_logtag; /* user defined log tag # */ + u_32_t fr_collect; /* collection number */ + u_int fr_arg; /* misc. numeric arg for rule */ + u_int fr_loglevel; /* syslog log facility + priority */ + u_int fr_age[2]; /* non-TCP timeouts */ + u_char fr_v; + u_char fr_icode; /* return ICMP code */ + char fr_group[FR_GROUPLEN]; /* group to which this rule belongs */ + char fr_grhead[FR_GROUPLEN]; /* group # which this rule starts */ + ipftag_t fr_nattag; + char fr_ifnames[4][LIFNAMSIZ]; + char fr_isctag[16]; + frdest_t fr_tifs[2]; /* "to"/"reply-to" interface */ + frdest_t fr_dif; /* duplicate packet interface */ + /* + * This must be last and will change after loaded into the kernel. + */ + u_int fr_cksum; /* checksum on filter rules for performance */ +} frentry_t; + +#define fr_caddr fr_dun.fru_caddr +#define fr_data fr_dun.fru_data +#define fr_dfunc fr_dun.fru_func +#define fr_ipf fr_dun.fru_ipf +#define fr_ip fr_ipf->fri_ip +#define fr_mip fr_ipf->fri_mip +#define fr_icmpm fr_ipf->fri_icmpm +#define fr_icmp fr_ipf->fri_icmp +#define fr_tuc fr_ipf->fri_tuc +#define fr_satype fr_ipf->fri_satype +#define fr_datype fr_ipf->fri_datype +#define fr_sifpidx fr_ipf->fri_sifpidx +#define fr_difpidx fr_ipf->fri_difpidx +#define fr_proto fr_ip.fi_p +#define fr_mproto fr_mip.fi_p +#define fr_ttl fr_ip.fi_ttl +#define fr_mttl fr_mip.fi_ttl +#define fr_tos fr_ip.fi_tos +#define fr_mtos fr_mip.fi_tos +#define fr_tcpfm fr_tuc.ftu_tcpfm +#define fr_tcpf fr_tuc.ftu_tcpf +#define fr_scmp fr_tuc.ftu_scmp +#define fr_dcmp fr_tuc.ftu_dcmp +#define fr_dport fr_tuc.ftu_dport +#define fr_sport fr_tuc.ftu_sport +#define fr_stop fr_tuc.ftu_stop +#define fr_dtop fr_tuc.ftu_dtop +#define fr_dst fr_ip.fi_dst.in4 +#define fr_daddr fr_ip.fi_dst.in4.s_addr +#define fr_src fr_ip.fi_src.in4 +#define fr_saddr fr_ip.fi_src.in4.s_addr +#define fr_dmsk fr_mip.fi_dst.in4 +#define fr_dmask fr_mip.fi_dst.in4.s_addr +#define fr_smsk fr_mip.fi_src.in4 +#define fr_smask fr_mip.fi_src.in4.s_addr +#define fr_dstnum fr_ip.fi_dstnum +#define fr_srcnum fr_ip.fi_srcnum +#define fr_dsttype fr_ip.fi_dsttype +#define fr_srctype fr_ip.fi_srctype +#define fr_dstptr fr_mip.fi_dstptr +#define fr_srcptr fr_mip.fi_srcptr +#define fr_dstfunc fr_mip.fi_dstfunc +#define fr_srcfunc fr_mip.fi_srcfunc +#define fr_optbits fr_ip.fi_optmsk +#define fr_optmask fr_mip.fi_optmsk +#define fr_secbits fr_ip.fi_secmsk +#define fr_secmask fr_mip.fi_secmsk +#define fr_authbits fr_ip.fi_auth +#define fr_authmask fr_mip.fi_auth +#define fr_flx fr_ip.fi_flx +#define fr_mflx fr_mip.fi_flx +#define fr_ifname fr_ifnames[0] +#define fr_oifname fr_ifnames[2] +#define fr_ifa fr_ifas[0] +#define fr_oifa fr_ifas[2] +#define fr_tif fr_tifs[0] +#define fr_rif fr_tifs[1] + +#define FR_NOLOGTAG 0 + +#ifndef offsetof +#define offsetof(t,m) (int)((&((t *)0L)->m)) +#endif +#define FR_CMPSIZ (sizeof(struct frentry) - \ + offsetof(struct frentry, fr_func)) + +/* + * fr_type + */ +#define FR_T_NONE 0 +#define FR_T_IPF 1 /* IPF structures */ +#define FR_T_BPFOPC 2 /* BPF opcode */ +#define FR_T_CALLFUNC 3 /* callout to function in fr_func only */ +#define FR_T_COMPIPF 4 /* compiled C code */ +#define FR_T_BUILTIN 0x80000000 /* rule is in kernel space */ + +/* + * fr_flags + */ +#define FR_CALL 0x00000 /* call rule */ +#define FR_BLOCK 0x00001 /* do not allow packet to pass */ +#define FR_PASS 0x00002 /* allow packet to pass */ +#define FR_AUTH 0x00003 /* use authentication */ +#define FR_PREAUTH 0x00004 /* require preauthentication */ +#define FR_ACCOUNT 0x00005 /* Accounting rule */ +#define FR_SKIP 0x00006 /* skip rule */ +#define FR_DIVERT 0x00007 /* divert rule */ +#define FR_CMDMASK 0x0000f +#define FR_LOG 0x00010 /* Log */ +#define FR_LOGB 0x00011 /* Log-fail */ +#define FR_LOGP 0x00012 /* Log-pass */ +#define FR_LOGMASK (FR_LOG|FR_CMDMASK) +#define FR_CALLNOW 0x00020 /* call another function (fr_func) if matches */ +#define FR_NOTSRCIP 0x00040 +#define FR_NOTDSTIP 0x00080 +#define FR_QUICK 0x00100 /* match & stop processing list */ +#define FR_KEEPFRAG 0x00200 /* keep fragment information */ +#define FR_KEEPSTATE 0x00400 /* keep `connection' state information */ +#define FR_FASTROUTE 0x00800 /* bypass normal routing */ +#define FR_RETRST 0x01000 /* Return TCP RST packet - reset connection */ +#define FR_RETICMP 0x02000 /* Return ICMP unreachable packet */ +#define FR_FAKEICMP 0x03000 /* Return ICMP unreachable with fake source */ +#define FR_OUTQUE 0x04000 /* outgoing packets */ +#define FR_INQUE 0x08000 /* ingoing packets */ +#define FR_LOGBODY 0x10000 /* Log the body */ +#define FR_LOGFIRST 0x20000 /* Log the first byte if state held */ +#define FR_LOGORBLOCK 0x40000 /* block the packet if it can't be logged */ +#define FR_DUP 0x80000 /* duplicate packet */ +#define FR_FRSTRICT 0x100000 /* strict frag. cache */ +#define FR_STSTRICT 0x200000 /* strict keep state */ +#define FR_NEWISN 0x400000 /* new ISN for outgoing TCP */ +#define FR_NOICMPERR 0x800000 /* do not match ICMP errors in state */ +#define FR_STATESYNC 0x1000000 /* synchronize state to slave */ +#define FR_NOMATCH 0x8000000 /* no match occured */ + /* 0x10000000 FF_LOGPASS */ + /* 0x20000000 FF_LOGBLOCK */ + /* 0x40000000 FF_LOGNOMATCH */ + /* 0x80000000 FF_BLOCKNONIP */ +#define FR_COPIED 0x40000000 /* copied from user space */ +#define FR_INACTIVE 0x80000000 /* only used when flush'ing rules */ + +#define FR_RETMASK (FR_RETICMP|FR_RETRST|FR_FAKEICMP) +#define FR_ISBLOCK(x) (((x) & FR_CMDMASK) == FR_BLOCK) +#define FR_ISPASS(x) (((x) & FR_CMDMASK) == FR_PASS) +#define FR_ISAUTH(x) (((x) & FR_CMDMASK) == FR_AUTH) +#define FR_ISPREAUTH(x) (((x) & FR_CMDMASK) == FR_PREAUTH) +#define FR_ISACCOUNT(x) (((x) & FR_CMDMASK) == FR_ACCOUNT) +#define FR_ISSKIP(x) (((x) & FR_CMDMASK) == FR_SKIP) +#define FR_ISNOMATCH(x) ((x) & FR_NOMATCH) +#define FR_INOUT (FR_INQUE|FR_OUTQUE) + +/* + * recognized flags for SIOCGETFF and SIOCSETFF, and get put in fr_flags + */ +#define FF_LOGPASS 0x10000000 +#define FF_LOGBLOCK 0x20000000 +#define FF_LOGNOMATCH 0x40000000 +#define FF_LOGGING (FF_LOGPASS|FF_LOGBLOCK|FF_LOGNOMATCH) +#define FF_BLOCKNONIP 0x80000000 /* Solaris2 Only */ + + +/* + * Structure that passes information on what/how to flush to the kernel. + */ +typedef struct ipfflush { + int ipflu_how; + int ipflu_arg; +} ipfflush_t; + + +/* + * + */ +typedef struct ipfgetctl { + u_int ipfg_min; /* min value */ + u_int ipfg_current; /* current value */ + u_int ipfg_max; /* max value */ + u_int ipfg_default; /* default value */ + u_int ipfg_steps; /* value increments */ + char ipfg_name[40]; /* tag name for this control */ +} ipfgetctl_t; + +typedef struct ipfsetctl { + int ipfs_which; /* 0 = min 1 = current 2 = max 3 = default */ + u_int ipfs_value; /* min value */ + char ipfs_name[40]; /* tag name for this control */ +} ipfsetctl_t; + + +/* + * Some of the statistics below are in their own counters, but most are kept + * in this single structure so that they can all easily be collected and + * copied back as required. + * + * NOTE: when changing, keep in sync with kstats (below). + */ +typedef struct filterstats { + u_long fr_pass; /* packets allowed */ + u_long fr_block; /* packets denied */ + u_long fr_nom; /* packets which don't match any rule */ + u_long fr_short; /* packets which are short */ + u_long fr_ppkl; /* packets allowed and logged */ + u_long fr_bpkl; /* packets denied and logged */ + u_long fr_npkl; /* packets unmatched and logged */ + u_long fr_pkl; /* packets logged */ + u_long fr_skip; /* packets to be logged but buffer full */ + u_long fr_ret; /* packets for which a return is sent */ + u_long fr_acct; /* packets for which counting was performed */ + u_long fr_bnfr; /* bad attempts to allocate fragment state */ + u_long fr_nfr; /* new fragment state kept */ + u_long fr_cfr; /* add new fragment state but complete pkt */ + u_long fr_bads; /* bad attempts to allocate packet state */ + u_long fr_ads; /* new packet state kept */ + u_long fr_chit; /* cached hit */ + u_long fr_tcpbad; /* TCP checksum check failures */ + u_long fr_pull[2]; /* good and bad pullup attempts */ + u_long fr_badsrc; /* source received doesn't match route */ + u_long fr_badttl; /* TTL in packet doesn't reach minimum */ + u_long fr_bad; /* bad IP packets to the filter */ + u_long fr_ipv6; /* IPv6 packets in/out */ + u_long fr_ppshit; /* dropped because of pps ceiling */ + u_long fr_ipud; /* IP id update failures */ +} filterstats_t; + +/* + * kstat "copy" of the above - keep in sync! + * also keep in sync with initialisation code in solaris.c, ipf_kstat_init(). + */ +typedef struct filter_kstats { + kstat_named_t fks_pass; /* see above for comments */ + kstat_named_t fks_block; + kstat_named_t fks_nom; + kstat_named_t fks_short; + kstat_named_t fks_ppkl; + kstat_named_t fks_bpkl; + kstat_named_t fks_npkl; + kstat_named_t fks_pkl; + kstat_named_t fks_skip; + kstat_named_t fks_ret; + kstat_named_t fks_acct; + kstat_named_t fks_bnfr; + kstat_named_t fks_nfr; + kstat_named_t fks_cfr; + kstat_named_t fks_bads; + kstat_named_t fks_ads; + kstat_named_t fks_chit; + kstat_named_t fks_tcpbad; + kstat_named_t fks_pull[2]; + kstat_named_t fks_badsrc; + kstat_named_t fks_badttl; + kstat_named_t fks_bad; + kstat_named_t fks_ipv6; + kstat_named_t fks_ppshit; + kstat_named_t fks_ipud; +} filter_kstats_t; + +/* + * Log structure. Each packet header logged is prepended by one of these. + * Following this in the log records read from the device will be an ipflog + * structure which is then followed by any packet data. + */ +typedef struct iplog { + u_32_t ipl_magic; + u_int ipl_count; + struct timeval ipl_time; + size_t ipl_dsize; + struct iplog *ipl_next; +} iplog_t; + +#define ipl_sec ipl_time.tv_sec +#define ipl_usec ipl_time.tv_usec + +#define IPL_MAGIC 0x49504c4d /* 'IPLM' */ +#define IPL_MAGIC_NAT 0x49504c4e /* 'IPLN' */ +#define IPL_MAGIC_STATE 0x49504c53 /* 'IPLS' */ +#define IPLOG_SIZE sizeof(iplog_t) + +typedef struct ipflog { +#if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199603)) || \ + (defined(OpenBSD) && (OpenBSD >= 199603)) +#else + u_int fl_unit; +#endif + u_32_t fl_rule; + u_32_t fl_flags; + u_32_t fl_lflags; + u_32_t fl_logtag; + ipftag_t fl_nattag; + u_short fl_plen; /* extra data after hlen */ + u_short fl_loglevel; /* syslog log level */ + char fl_group[FR_GROUPLEN]; + u_char fl_hlen; /* length of IP headers saved */ + u_char fl_dir; + u_char fl_xxx[2]; /* pad */ + char fl_ifname[LIFNAMSIZ]; +} ipflog_t; + +#ifndef IPF_LOGGING +# define IPF_LOGGING 0 +#endif +#ifndef IPF_DEFAULT_PASS +# define IPF_DEFAULT_PASS FR_PASS +#endif + +#define DEFAULT_IPFLOGSIZE 8192 +#ifndef IPFILTER_LOGSIZE +# define IPFILTER_LOGSIZE DEFAULT_IPFLOGSIZE +#else +# if IPFILTER_LOGSIZE < DEFAULT_IPFLOGSIZE +# error IPFILTER_LOGSIZE too small. Must be >= DEFAULT_IPFLOGSIZE +# endif +#endif + +#define IPF_OPTCOPY 0x07ff00 /* bit mask of copied options */ + +/* + * Device filenames for reading log information. Use ipf on Solaris2 because + * ipl is already a name used by something else. + */ +#ifndef IPL_NAME +# if SOLARIS +# define IPL_NAME "/dev/ipf" +# else +# define IPL_NAME "/dev/ipl" +# endif +#endif +/* + * Pathnames for various IP Filter control devices. Used by LKM + * and userland, so defined here. + */ +#define IPNAT_NAME "/dev/ipnat" +#define IPSTATE_NAME "/dev/ipstate" +#define IPAUTH_NAME "/dev/ipauth" +#define IPSYNC_NAME "/dev/ipsync" +#define IPSCAN_NAME "/dev/ipscan" +#define IPLOOKUP_NAME "/dev/iplookup" + +#define IPL_LOGIPF 0 /* Minor device #'s for accessing logs */ +#define IPL_LOGNAT 1 +#define IPL_LOGSTATE 2 +#define IPL_LOGAUTH 3 +#define IPL_LOGSYNC 4 +#define IPL_LOGSCAN 5 +#define IPL_LOGLOOKUP 6 +#define IPL_LOGCOUNT 7 +#define IPL_LOGMAX 7 +#define IPL_LOGSIZE IPL_LOGMAX + 1 +#define IPL_LOGALL -1 +#define IPL_LOGNONE -2 + +/* + * For SIOCGETFS + */ +typedef struct friostat { + struct filterstats f_st[2]; + struct frentry *f_ipf[2][2]; + struct frentry *f_acct[2][2]; + struct frentry *f_ipf6[2][2]; + struct frentry *f_acct6[2][2]; + struct frentry *f_auth; + struct frgroup *f_groups[IPL_LOGSIZE][2]; + u_long f_froute[2]; + u_long f_ticks; + int f_locks[IPL_LOGMAX]; + size_t f_kmutex_sz; + size_t f_krwlock_sz; + int f_defpass; /* default pass - from fr_pass */ + int f_active; /* 1 or 0 - active rule set */ + int f_running; /* 1 if running, else 0 */ + int f_logging; /* 1 if enabled, else 0 */ + int f_features; + char f_version[32]; /* version string */ +} friostat_t; + +#define f_fin f_ipf[0] +#define f_fin6 f_ipf6[0] +#define f_fout f_ipf[1] +#define f_fout6 f_ipf6[1] +#define f_acctin f_acct[0] +#define f_acctin6 f_acct6[0] +#define f_acctout f_acct[1] +#define f_acctout6 f_acct6[1] + +#define IPF_FEAT_LKM 0x001 +#define IPF_FEAT_LOG 0x002 +#define IPF_FEAT_LOOKUP 0x004 +#define IPF_FEAT_BPF 0x008 +#define IPF_FEAT_COMPILED 0x010 +#define IPF_FEAT_CKSUM 0x020 +#define IPF_FEAT_SYNC 0x040 +#define IPF_FEAT_SCAN 0x080 +#define IPF_FEAT_IPV6 0x100 + +typedef struct optlist { + u_short ol_val; + int ol_bit; +} optlist_t; + + +/* + * Group list structure. + */ +typedef struct frgroup { + struct frgroup *fg_next; + struct frentry *fg_head; + struct frentry *fg_start; + u_32_t fg_flags; + int fg_ref; + char fg_name[FR_GROUPLEN]; +} frgroup_t; + +#define FG_NAME(g) (*(g)->fg_name == '\0' ? "" : (g)->fg_name) + + +/* + * Used by state and NAT tables + */ +typedef struct icmpinfo { + u_short ici_id; + u_short ici_seq; + u_char ici_type; +} icmpinfo_t; + +typedef struct udpinfo { + u_short us_sport; + u_short us_dport; +} udpinfo_t; + + +typedef struct tcpdata { + u_32_t td_end; + u_32_t td_maxend; + u_32_t td_maxwin; + u_32_t td_winscale; + u_32_t td_maxseg; + int td_winflags; +} tcpdata_t; + +#define TCP_WSCALE_MAX 14 + +#define TCP_WSCALE_SEEN 0x00000001 +#define TCP_WSCALE_FIRST 0x00000002 + + +typedef struct tcpinfo { + u_short ts_sport; + u_short ts_dport; + tcpdata_t ts_data[2]; +} tcpinfo_t; + + +/* + * Structures to define a GRE header as seen in a packet. + */ +struct grebits { + u_32_t grb_C:1; + u_32_t grb_R:1; + u_32_t grb_K:1; + u_32_t grb_S:1; + u_32_t grb_s:1; + u_32_t grb_recur:1; + u_32_t grb_A:1; + u_32_t grb_flags:3; + u_32_t grb_ver:3; + u_short grb_ptype; +}; + +typedef struct grehdr { + union { + struct grebits gru_bits; + u_short gru_flags; + } gr_un; + u_short gr_len; + u_short gr_call; +} grehdr_t; + +#define gr_flags gr_un.gru_flags +#define gr_bits gr_un.gru_bits +#define gr_ptype gr_bits.grb_ptype +#define gr_C gr_bits.grb_C +#define gr_R gr_bits.grb_R +#define gr_K gr_bits.grb_K +#define gr_S gr_bits.grb_S +#define gr_s gr_bits.grb_s +#define gr_recur gr_bits.grb_recur +#define gr_A gr_bits.grb_A +#define gr_ver gr_bits.grb_ver + +/* + * GRE information tracked by "keep state" + */ +typedef struct greinfo { + u_short gs_call[2]; + u_short gs_flags; + u_short gs_ptype; +} greinfo_t; + +#define GRE_REV(x) ((ntohs(x) >> 13) & 7) + + +/* + * Format of an Authentication header + */ +typedef struct authhdr { + u_char ah_next; + u_char ah_plen; + u_short ah_reserved; + u_32_t ah_spi; + u_32_t ah_seq; + /* Following the sequence number field is 0 or more bytes of */ + /* authentication data, as specified by ah_plen - RFC 2402. */ +} authhdr_t; + + +/* + * Timeout tail queue list member + */ +typedef struct ipftqent { + struct ipftqent **tqe_pnext; + struct ipftqent *tqe_next; + struct ipftq *tqe_ifq; + void *tqe_parent; /* pointer back to NAT/state struct */ + u_long tqe_die; /* when this entriy is to die */ + u_long tqe_touched; + int tqe_flags; + int tqe_state[2]; /* current state of this entry */ +} ipftqent_t; + +#define TQE_RULEBASED 0x00000001 + + +/* + * Timeout tail queue head for IPFilter + */ +typedef struct ipftq { + ipfmutex_t ifq_lock; + u_int ifq_ttl; + ipftqent_t *ifq_head; + ipftqent_t **ifq_tail; + struct ipftq *ifq_next; + struct ipftq **ifq_pnext; + int ifq_ref; + u_int ifq_flags; +} ipftq_t; + +#define IFQF_USER 0x01 /* User defined aging */ +#define IFQF_DELETE 0x02 /* Marked for deletion */ +#define IFQF_PROXY 0x04 /* Timeout queue in use by a proxy */ + +#define IPF_HZ_MULT 1 +#define IPF_HZ_DIVIDE 2 /* How many times a second ipfilter */ + /* checks its timeout queues. */ +#define IPF_TTLVAL(x) (((x) / IPF_HZ_MULT) * IPF_HZ_DIVIDE) + +/* + * Structure to define address for pool lookups. + */ +typedef struct { + u_char adf_len; + sa_family_t adf_family; + i6addr_t adf_addr; +} addrfamily_t; + + +/* + * Object structure description. For passing through in ioctls. + */ +typedef struct ipfobj { + u_32_t ipfo_rev; /* IPFilter version number */ + u_32_t ipfo_size; /* size of object at ipfo_ptr */ + void *ipfo_ptr; /* pointer to object */ + int ipfo_type; /* type of object being pointed to */ + int ipfo_offset; /* bytes from ipfo_ptr where to start */ + u_char ipfo_xxxpad[32]; /* reserved for future use */ +} ipfobj_t; + +#define IPFOBJ_FRENTRY 0 /* struct frentry */ +#define IPFOBJ_IPFSTAT 1 /* struct friostat */ +#define IPFOBJ_IPFINFO 2 /* struct fr_info */ +#define IPFOBJ_AUTHSTAT 3 /* struct fr_authstat */ +#define IPFOBJ_FRAGSTAT 4 /* struct ipfrstat */ +#define IPFOBJ_IPNAT 5 /* struct ipnat */ +#define IPFOBJ_NATSTAT 6 /* struct natstat */ +#define IPFOBJ_STATESAVE 7 /* struct ipstate_save */ +#define IPFOBJ_NATSAVE 8 /* struct nat_save */ +#define IPFOBJ_NATLOOKUP 9 /* struct natlookup */ +#define IPFOBJ_IPSTATE 10 /* struct ipstate */ +#define IPFOBJ_STATESTAT 11 /* struct ips_stat */ +#define IPFOBJ_FRAUTH 12 /* struct frauth */ +#define IPFOBJ_TUNEABLE 13 /* struct ipftune */ + + +typedef union ipftunevalptr { + void *ipftp_void; + u_long *ipftp_long; + u_int *ipftp_int; + u_short *ipftp_short; + u_char *ipftp_char; +} ipftunevalptr_t; + +typedef struct ipftuneable { + ipftunevalptr_t ipft_una; + char *ipft_name; + u_long ipft_min; + u_long ipft_max; + int ipft_sz; + int ipft_flags; + struct ipftuneable *ipft_next; +} ipftuneable_t; + +#define ipft_addr ipft_una.ipftp_void +#define ipft_plong ipft_una.ipftp_long +#define ipft_pint ipft_una.ipftp_int +#define ipft_pshort ipft_una.ipftp_short +#define ipft_pchar ipft_una.ipftp_char + +#define IPFT_RDONLY 1 /* read-only */ +#define IPFT_WRDISABLED 2 /* write when disabled only */ + +typedef union ipftuneval { + u_long ipftu_long; + u_int ipftu_int; + u_short ipftu_short; + u_char ipftu_char; +} ipftuneval_t; + +typedef struct ipftune { + void *ipft_cookie; + ipftuneval_t ipft_un; + u_long ipft_min; + u_long ipft_max; + int ipft_sz; + int ipft_flags; + char ipft_name[80]; +} ipftune_t; + +#define ipft_vlong ipft_un.ipftu_long +#define ipft_vint ipft_un.ipftu_int +#define ipft_vshort ipft_un.ipftu_short +#define ipft_vchar ipft_un.ipftu_char + + +/* +** HPUX Port +*/ +#ifdef __hpux +/* HP-UX locking sequence deadlock detection module lock MAJOR ID */ +# define IPF_SMAJ 0 /* temp assignment XXX, not critical */ +#endif + +#if !defined(CDEV_MAJOR) && defined (__FreeBSD_version) && \ + (__FreeBSD_version >= 220000) +# define CDEV_MAJOR 79 +#endif + +/* + * Post NetBSD 1.2 has the PFIL interface for packet filters. This turns + * on those hooks. We don't need any special mods in non-IP Filter code + * with this! + */ +#if (defined(NetBSD) && (NetBSD > 199609) && (NetBSD <= 1991011)) || \ + (defined(NetBSD1_2) && NetBSD1_2 > 1) || \ + (defined(__FreeBSD__) && (__FreeBSD_version >= 500043)) +# if (NetBSD >= 199905) +# define PFIL_HOOKS +# endif +# ifdef PFIL_HOOKS +# define NETBSD_PF +# endif +#endif + +#ifndef _KERNEL +extern int fr_check __P((struct ip *, int, void *, int, mb_t **)); +extern int (*fr_checkp) __P((ip_t *, int, void *, int, mb_t **)); +extern int ipf_log __P((void)); +extern struct ifnet *get_unit __P((char *, int)); +extern char *get_ifname __P((struct ifnet *)); +# if defined(__NetBSD__) || defined(__OpenBSD__) || \ + (_BSDI_VERSION >= 199701) || (__FreeBSD_version >= 300000) +extern int iplioctl __P((int, ioctlcmd_t, caddr_t, int)); +# else +extern int iplioctl __P((int, ioctlcmd_t, caddr_t, int)); +# endif +extern int iplopen __P((dev_t, int)); +extern int iplclose __P((dev_t, int)); +extern void m_freem __P((mb_t *)); +#else /* #ifndef _KERNEL */ +# if defined(__NetBSD__) && defined(PFIL_HOOKS) +extern void ipfilterattach __P((int)); +# endif +extern int ipl_enable __P((void)); +extern int ipl_disable __P((void)); +# ifdef MENTAT +extern int fr_check __P((struct ip *, int, void *, int, void *, + mblk_t **)); +# if SOLARIS +# if SOLARIS2 >= 7 +extern int iplioctl __P((dev_t, int, intptr_t, int, cred_t *, int *)); +# else +extern int iplioctl __P((dev_t, int, int *, int, cred_t *, int *)); +# endif +extern int iplopen __P((dev_t *, int, int, cred_t *)); +extern int iplclose __P((dev_t, int, int, cred_t *)); +extern int iplread __P((dev_t, uio_t *, cred_t *)); +extern int iplwrite __P((dev_t, uio_t *, cred_t *)); +# endif +# ifdef __hpux +extern int iplopen __P((dev_t, int, intptr_t, int)); +extern int iplclose __P((dev_t, int, int)); +extern int iplioctl __P((dev_t, int, caddr_t, int)); +extern int iplread __P((dev_t, uio_t *)); +extern int iplwrite __P((dev_t, uio_t *)); +extern int iplselect __P((dev_t, int)); +# endif +extern int ipfsync __P((void)); +extern int fr_qout __P((queue_t *, mblk_t *)); +# else /* MENTAT */ +extern int fr_check __P((struct ip *, int, void *, int, mb_t **)); +extern int (*fr_checkp) __P((ip_t *, int, void *, int, mb_t **)); +extern size_t mbufchainlen __P((mb_t *)); +# ifdef __sgi +# include <sys/cred.h> +extern int iplioctl __P((dev_t, int, caddr_t, int, cred_t *, int *)); +extern int iplopen __P((dev_t *, int, int, cred_t *)); +extern int iplclose __P((dev_t, int, int, cred_t *)); +extern int iplread __P((dev_t, uio_t *, cred_t *)); +extern int iplwrite __P((dev_t, uio_t *, cred_t *)); +extern int ipfsync __P((void)); +extern int ipfilter_sgi_attach __P((void)); +extern void ipfilter_sgi_detach __P((void)); +extern void ipfilter_sgi_intfsync __P((void)); +# else +# ifdef IPFILTER_LKM +extern int iplidentify __P((char *)); +# endif +# if (_BSDI_VERSION >= 199510) || (__FreeBSD_version >= 220000) || \ + (NetBSD >= 199511) || defined(__OpenBSD__) +# if defined(__NetBSD__) || (_BSDI_VERSION >= 199701) || \ + defined(__OpenBSD__) || (__FreeBSD_version >= 300000) +# if (__FreeBSD_version >= 500024) +# if (__FreeBSD_version >= 502116) +extern int iplioctl __P((struct cdev*, u_long, caddr_t, int, struct thread *)); +# else +extern int iplioctl __P((dev_t, u_long, caddr_t, int, struct thread *)); +# endif /* __FreeBSD_version >= 502116 */ +# else +extern int iplioctl __P((dev_t, u_long, caddr_t, int, struct proc *)); +# endif /* __FreeBSD_version >= 500024 */ +# else +extern int iplioctl __P((dev_t, int, caddr_t, int, struct proc *)); +# endif +# if (__FreeBSD_version >= 500024) +# if (__FreeBSD_version >= 502116) +extern int iplopen __P((struct cdev*, int, int, struct thread *)); +extern int iplclose __P((struct cdev*, int, int, struct thread *)); +# else +extern int iplopen __P((dev_t, int, int, struct thread *)); +extern int iplclose __P((dev_t, int, int, struct thread *)); +# endif /* __FreeBSD_version >= 502116 */ +# else +extern int iplopen __P((dev_t, int, int, struct proc *)); +extern int iplclose __P((dev_t, int, int, struct proc *)); +# endif /* __FreeBSD_version >= 500024 */ +# else +# ifdef linux +extern int iplioctl __P((struct inode *, struct file *, u_int, u_long)); +# else +extern int iplopen __P((dev_t, int)); +extern int iplclose __P((dev_t, int)); +extern int iplioctl __P((dev_t, int, caddr_t, int)); +# endif +# endif /* (_BSDI_VERSION >= 199510) */ +# if BSD >= 199306 +# if (__FreeBSD_version >= 502116) +extern int iplread __P((struct cdev*, struct uio *, int)); +extern int iplwrite __P((struct cdev*, struct uio *, int)); +# else +extern int iplread __P((dev_t, struct uio *, int)); +extern int iplwrite __P((dev_t, struct uio *, int)); +# endif /* __FreeBSD_version >= 502116 */ +# else +# ifndef linux +extern int iplread __P((dev_t, struct uio *)); +extern int iplwrite __P((dev_t, struct uio *)); +# endif +# endif /* BSD >= 199306 */ +# endif /* __ sgi */ +# endif /* MENTAT */ + +#endif /* #ifndef _KERNEL */ + +extern ipfmutex_t ipl_mutex, ipf_authmx, ipf_rw, ipf_hostmap; +extern ipfmutex_t ipf_timeoutlock, ipf_stinsert, ipf_natio, ipf_nat_new; +extern ipfrwlock_t ipf_mutex, ipf_global, ip_poolrw, ipf_ipidfrag; +extern ipfrwlock_t ipf_frag, ipf_state, ipf_nat, ipf_natfrag, ipf_auth; +extern ipfrwlock_t ipf_frcache; + +extern char *memstr __P((char *, char *, int, int)); +extern int count4bits __P((u_32_t)); +extern int count6bits __P((u_32_t *)); +extern int frrequest __P((int, ioctlcmd_t, caddr_t, int, int)); +extern char *getifname __P((struct ifnet *)); +extern int iplattach __P((void)); +extern int ipldetach __P((void)); +extern u_short ipf_cksum __P((u_short *, int)); +extern int copyinptr __P((void *, void *, size_t)); +extern int copyoutptr __P((void *, void *, size_t)); +extern int fr_fastroute __P((mb_t *, mb_t **, fr_info_t *, frdest_t *)); +extern int fr_inobj __P((void *, void *, int)); +extern int fr_inobjsz __P((void *, void *, int, int)); +extern int fr_ioctlswitch __P((int, void *, ioctlcmd_t, int)); +extern int fr_ipftune __P((ioctlcmd_t, void *)); +extern int fr_outobj __P((void *, void *, int)); +extern int fr_outobjsz __P((void *, void *, int, int)); +extern void *fr_pullup __P((mb_t *, fr_info_t *, int)); +extern void fr_resolvedest __P((struct frdest *, int)); +extern int fr_resolvefunc __P((void *)); +extern void *fr_resolvenic __P((char *, int)); +extern int fr_send_icmp_err __P((int, fr_info_t *, int)); +extern int fr_send_reset __P((fr_info_t *)); +#if (__FreeBSD_version < 490000) || !defined(_KERNEL) +extern int ppsratecheck __P((struct timeval *, int *, int)); +#endif +extern ipftq_t *fr_addtimeoutqueue __P((ipftq_t **, u_int)); +extern void fr_deletequeueentry __P((ipftqent_t *)); +extern int fr_deletetimeoutqueue __P((ipftq_t *)); +extern void fr_freetimeoutqueue __P((ipftq_t *)); +extern void fr_movequeue __P((ipftqent_t *, ipftq_t *, ipftq_t *)); +extern void fr_queueappend __P((ipftqent_t *, ipftq_t *, void *)); +extern void fr_queueback __P((ipftqent_t *)); +extern void fr_queuefront __P((ipftqent_t *)); +extern void fr_checkv4sum __P((fr_info_t *)); +extern int fr_checkl4sum __P((fr_info_t *)); +extern int fr_ifpfillv4addr __P((int, struct sockaddr_in *, + struct sockaddr_in *, struct in_addr *, + struct in_addr *)); +extern int fr_coalesce __P((fr_info_t *)); +#ifdef USE_INET6 +extern void fr_checkv6sum __P((fr_info_t *)); +extern int fr_ifpfillv6addr __P((int, struct sockaddr_in6 *, + struct sockaddr_in6 *, struct in_addr *, + struct in_addr *)); +#endif + +extern int fr_addipftune __P((ipftuneable_t *)); +extern int fr_delipftune __P((ipftuneable_t *)); + +extern int frflush __P((minor_t, int, int)); +extern void frsync __P((void *)); +extern frgroup_t *fr_addgroup __P((char *, void *, u_32_t, minor_t, int)); +extern int fr_derefrule __P((frentry_t **)); +extern void fr_delgroup __P((char *, minor_t, int)); +extern frgroup_t *fr_findgroup __P((char *, minor_t, int, frgroup_t ***)); + +extern int fr_loginit __P((void)); +extern int ipflog_clear __P((minor_t)); +extern int ipflog_read __P((minor_t, uio_t *)); +extern int ipflog __P((fr_info_t *, u_int)); +extern int ipllog __P((int, fr_info_t *, void **, size_t *, int *, int)); +extern void fr_logunload __P((void)); + +extern frentry_t *fr_acctpkt __P((fr_info_t *, u_32_t *)); +extern int fr_copytolog __P((int, char *, int)); +extern u_short fr_cksum __P((mb_t *, ip_t *, int, void *)); +extern void fr_deinitialise __P((void)); +extern frentry_t *fr_dolog __P((fr_info_t *, u_32_t *)); +extern frentry_t *fr_dstgrpmap __P((fr_info_t *, u_32_t *)); +extern void fr_fixskip __P((frentry_t **, frentry_t *, int)); +extern void fr_forgetifp __P((void *)); +extern frentry_t *fr_getrulen __P((int, char *, u_32_t)); +extern void fr_getstat __P((struct friostat *)); +extern int fr_ifpaddr __P((int, int, void *, + struct in_addr *, struct in_addr *)); +extern int fr_initialise __P((void)); +extern void fr_lock __P((caddr_t, int *)); +extern int fr_makefrip __P((int, ip_t *, fr_info_t *)); +extern int fr_matchtag __P((ipftag_t *, ipftag_t *)); +extern int fr_matchicmpqueryreply __P((int, icmpinfo_t *, + struct icmp *, int)); +extern u_32_t fr_newisn __P((fr_info_t *)); +extern u_short fr_nextipid __P((fr_info_t *)); +extern int fr_rulen __P((int, frentry_t *)); +extern int fr_scanlist __P((fr_info_t *, u_32_t)); +extern frentry_t *fr_srcgrpmap __P((fr_info_t *, u_32_t *)); +extern int fr_tcpudpchk __P((fr_info_t *, frtuc_t *)); +extern int fr_verifysrc __P((fr_info_t *fin)); +extern int fr_zerostats __P((char *)); + +extern int fr_running; +extern u_long fr_frouteok[2]; +extern int fr_pass; +extern int fr_flags; +extern int fr_active; +extern int fr_chksrc; +extern int fr_minttl; +extern int fr_refcnt; +extern int fr_control_forwarding; +extern int fr_update_ipid; +extern int nat_logging; +extern int ipstate_logging; +extern int ipl_suppress; +extern int ipl_buffer_sz; +extern int ipl_logmax; +extern int ipl_logall; +extern int ipl_logsize; +extern u_long fr_ticks; +extern fr_info_t frcache[2][8]; +extern char ipfilter_version[]; +extern iplog_t **iplh[IPL_LOGMAX+1], *iplt[IPL_LOGMAX+1]; +extern int iplused[IPL_LOGMAX + 1]; +extern struct frentry *ipfilter[2][2], *ipacct[2][2]; +#ifdef USE_INET6 +extern struct frentry *ipfilter6[2][2], *ipacct6[2][2]; +extern int icmptoicmp6types[ICMP_MAXTYPE+1]; +extern int icmptoicmp6unreach[ICMP_MAX_UNREACH]; +extern int icmpreplytype6[ICMP6_MAXTYPE + 1]; +#endif +extern int icmpreplytype4[ICMP_MAXTYPE + 1]; +extern struct frgroup *ipfgroups[IPL_LOGSIZE][2]; +extern struct filterstats frstats[]; +extern frentry_t *ipfrule_match __P((fr_info_t *)); +extern u_char ipf_iss_secret[32]; +extern ipftuneable_t ipf_tuneables[]; + +#endif /* __IP_FIL_H__ */ diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_frag.h b/usr/src/uts/common/inet/ipf/netinet/ip_frag.h new file mode 100644 index 0000000000..7eb7399010 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/netinet/ip_frag.h @@ -0,0 +1,96 @@ +/* + * Copyright (C) 1993-2001 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * @(#)ip_frag.h 1.5 3/24/96 + * $Id: ip_frag.h,v 2.23.2.2 2005/06/10 18:02:37 darrenr Exp $ + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifndef __IP_FRAG_H__ +#define __IP_FRAG_H__ + +#define IPFT_SIZE 257 + +typedef struct ipfr { + struct ipfr *ipfr_hnext, **ipfr_hprev; + struct ipfr *ipfr_next, **ipfr_prev; + void *ipfr_data; + void *ipfr_ifp; + i6addr_t ipfr_source; + i6addr_t ipfr_dest; + u_32_t ipfr_optmsk; + u_short ipfr_secmsk; + u_short ipfr_auth; + u_32_t ipfr_id; + u_char ipfr_p; + u_char ipfr_tos; + u_32_t ipfr_pass; + u_short ipfr_off; + u_char ipfr_ttl; + u_char ipfr_seen0; + u_short ipfr_firstend; + frentry_t *ipfr_rule; +} ipfr_t; + +#define ipfr_src ipfr_source.in4 +#define ipfr_dst ipfr_dest.in4 + +typedef struct ipfrstat { + u_long ifs_exists; /* add & already exists */ + u_long ifs_nomem; + u_long ifs_new; + u_long ifs_hits; + u_long ifs_expire; + u_long ifs_inuse; + u_long ifs_retrans0; + u_long ifs_short; + struct ipfr **ifs_table; + struct ipfr **ifs_nattab; +} ipfrstat_t; + +#define IPFR_CMPSZ (offsetof(ipfr_t, ipfr_tos) - \ + offsetof(ipfr_t, ipfr_ifp)) + +extern int ipfr_size; +extern int fr_ipfrttl; +extern int fr_frag_lock; +extern int fr_fraginit __P((void)); +extern void fr_fragunload __P((void)); +extern ipfrstat_t *fr_fragstats __P((void)); + +extern int fr_newfrag __P((fr_info_t *, u_32_t)); +extern frentry_t *fr_knownfrag __P((fr_info_t *, u_32_t *)); + +extern int fr_nat_newfrag __P((fr_info_t *, u_32_t, struct nat *)); +extern nat_t *fr_nat_knownfrag __P((fr_info_t *)); + +extern int fr_ipid_newfrag __P((fr_info_t *, u_32_t)); +extern u_32_t fr_ipid_knownfrag __P((fr_info_t *)); + +extern void fr_forget __P((void *)); +extern void fr_forgetnat __P((void *)); +extern void fr_fragclear __P((void)); +extern void fr_fragexpire __P((void)); + +#if defined(_KERNEL) && ((BSD >= 199306) || SOLARIS || defined(__sgi) \ + || defined(__osf__) || (defined(__sgi) && (IRIX >= 60500))) +# if defined(SOLARIS2) && (SOLARIS2 < 7) +extern void fr_slowtimer __P((void)); +# else +extern void fr_slowtimer __P((void *)); +# endif +#else +# if defined(linux) && defined(_KERNEL) +extern void fr_slowtimer __P((long)); +# else +extern int fr_slowtimer __P((void)); +# endif +#endif + +#endif /* __IP_FRAG_H__ */ diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_ftp_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_ftp_pxy.c new file mode 100644 index 0000000000..7d1ed33c96 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/netinet/ip_ftp_pxy.c @@ -0,0 +1,1458 @@ +/* + * Copyright (C) 1997-2003 by Darren Reed + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * $Id: ip_ftp_pxy.c,v 2.88.2.15 2005/03/19 19:38:10 darrenr Exp $ + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * + * Simple FTP transparent proxy for in-kernel use. For use with the NAT + * code. +*/ + +#pragma ident "%Z%%M% %I% %E% SMI" + + +#define IPF_FTP_PROXY + +#define IPF_MINPORTLEN 18 +#define IPF_MAXPORTLEN 30 +#define IPF_MIN227LEN 39 +#define IPF_MAX227LEN 51 +#define IPF_MIN229LEN 47 +#define IPF_MAX229LEN 51 + +#define FTPXY_GO 0 +#define FTPXY_INIT 1 +#define FTPXY_USER_1 2 +#define FTPXY_USOK_1 3 +#define FTPXY_PASS_1 4 +#define FTPXY_PAOK_1 5 +#define FTPXY_AUTH_1 6 +#define FTPXY_AUOK_1 7 +#define FTPXY_ADAT_1 8 +#define FTPXY_ADOK_1 9 +#define FTPXY_ACCT_1 10 +#define FTPXY_ACOK_1 11 +#define FTPXY_USER_2 12 +#define FTPXY_USOK_2 13 +#define FTPXY_PASS_2 14 +#define FTPXY_PAOK_2 15 + +/* + * Values for FTP commands. Numerics cover 0-999 + */ +#define FTPXY_C_PASV 1000 + +int ippr_ftp_client __P((fr_info_t *, ip_t *, nat_t *, ftpinfo_t *, int)); +int ippr_ftp_complete __P((char *, size_t)); +int ippr_ftp_in __P((fr_info_t *, ap_session_t *, nat_t *)); +int ippr_ftp_init __P((void)); +void ippr_ftp_fini __P((void)); +int ippr_ftp_new __P((fr_info_t *, ap_session_t *, nat_t *)); +int ippr_ftp_out __P((fr_info_t *, ap_session_t *, nat_t *)); +int ippr_ftp_pasv __P((fr_info_t *, ip_t *, nat_t *, ftpinfo_t *, int)); +int ippr_ftp_epsv __P((fr_info_t *, ip_t *, nat_t *, ftpside_t *, int)); +int ippr_ftp_port __P((fr_info_t *, ip_t *, nat_t *, ftpside_t *, int)); +int ippr_ftp_process __P((fr_info_t *, nat_t *, ftpinfo_t *, int)); +int ippr_ftp_server __P((fr_info_t *, ip_t *, nat_t *, ftpinfo_t *, int)); +int ippr_ftp_valid __P((ftpinfo_t *, int, char *, size_t)); +int ippr_ftp_server_valid __P((ftpside_t *, char *, size_t)); +int ippr_ftp_client_valid __P((ftpside_t *, char *, size_t)); +u_short ippr_ftp_atoi __P((char **)); +int ippr_ftp_pasvreply __P((fr_info_t *, ip_t *, nat_t *, ftpside_t *, + u_int, char *, char *, u_int)); + + +int ftp_proxy_init = 0; +int ippr_ftp_pasvonly = 0; +int ippr_ftp_insecure = 0; /* Do not require logins before transfers */ +int ippr_ftp_pasvrdr = 0; +int ippr_ftp_forcepasv = 0; /* PASV must be last command prior to 227 */ +#if defined(_KERNEL) +int ippr_ftp_debug = 0; +#else +int ippr_ftp_debug = 2; +#endif +/* + * 1 - security + * 2 - errors + * 3 - error debugging + * 4 - parsing errors + * 5 - parsing info + * 6 - parsing debug + */ + +static frentry_t ftppxyfr; +static ipftuneable_t ftptune = { + { &ippr_ftp_debug }, + "ippr_ftp_debug", + 0, + 10, + sizeof(ippr_ftp_debug), + 0, + NULL +}; + + +/* + * Initialize local structures. + */ +int ippr_ftp_init() +{ + bzero((char *)&ftppxyfr, sizeof(ftppxyfr)); + ftppxyfr.fr_ref = 1; + ftppxyfr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; + MUTEX_INIT(&ftppxyfr.fr_lock, "FTP Proxy Mutex"); + ftp_proxy_init = 1; + (void) fr_addipftune(&ftptune); + + return 0; +} + + +void ippr_ftp_fini() +{ + (void) fr_delipftune(&ftptune); + + if (ftp_proxy_init == 1) { + MUTEX_DESTROY(&ftppxyfr.fr_lock); + ftp_proxy_init = 0; + } +} + + +int ippr_ftp_new(fin, aps, nat) +fr_info_t *fin; +ap_session_t *aps; +nat_t *nat; +{ + ftpinfo_t *ftp; + ftpside_t *f; + + KMALLOC(ftp, ftpinfo_t *); + if (ftp == NULL) + return -1; + + fin = fin; /* LINT */ + nat = nat; /* LINT */ + + aps->aps_data = ftp; + aps->aps_psiz = sizeof(ftpinfo_t); + + bzero((char *)ftp, sizeof(*ftp)); + f = &ftp->ftp_side[0]; + f->ftps_rptr = f->ftps_buf; + f->ftps_wptr = f->ftps_buf; + f = &ftp->ftp_side[1]; + f->ftps_rptr = f->ftps_buf; + f->ftps_wptr = f->ftps_buf; + ftp->ftp_passok = FTPXY_INIT; + ftp->ftp_incok = 0; + return 0; +} + + +int ippr_ftp_port(fin, ip, nat, f, dlen) +fr_info_t *fin; +ip_t *ip; +nat_t *nat; +ftpside_t *f; +int dlen; +{ + tcphdr_t *tcp, tcph, *tcp2 = &tcph; + char newbuf[IPF_FTPBUFSZ], *s; + struct in_addr swip, swip2; + u_int a1, a2, a3, a4; + int inc, off, flags; + u_short a5, a6, sp; + size_t nlen, olen; + fr_info_t fi; + nat_t *nat2; + mb_t *m; + + m = fin->fin_m; + tcp = (tcphdr_t *)fin->fin_dp; + off = (char *)tcp - (char *)ip + (TCP_OFF(tcp) << 2) + fin->fin_ipoff; + + /* + * Check for client sending out PORT message. + */ + if (dlen < IPF_MINPORTLEN) { + if (ippr_ftp_debug > 1) + printf("ippr_ftp_port:dlen(%d) < IPF_MINPORTLEN\n", + dlen); + return 0; + } + /* + * Skip the PORT command + space + */ + s = f->ftps_rptr + 5; + /* + * Pick out the address components, two at a time. + */ + a1 = ippr_ftp_atoi(&s); + if (s == NULL) { + if (ippr_ftp_debug > 1) + printf("ippr_ftp_port:ippr_ftp_atoi(%d) failed\n", 1); + return 0; + } + a2 = ippr_ftp_atoi(&s); + if (s == NULL) { + if (ippr_ftp_debug > 1) + printf("ippr_ftp_port:ippr_ftp_atoi(%d) failed\n", 2); + return 0; + } + + /* + * Check that IP address in the PORT/PASV reply is the same as the + * sender of the command - prevents using PORT for port scanning. + */ + a1 <<= 16; + a1 |= a2; + if (((nat->nat_dir == NAT_OUTBOUND) && + (a1 != ntohl(nat->nat_inip.s_addr))) || + ((nat->nat_dir == NAT_INBOUND) && + (a1 != ntohl(nat->nat_oip.s_addr)))) { + if (ippr_ftp_debug > 0) + printf("ippr_ftp_port:%s != nat->nat_inip\n", "a1"); + return APR_ERR(1); + } + + a5 = ippr_ftp_atoi(&s); + if (s == NULL) { + if (ippr_ftp_debug > 1) + printf("ippr_ftp_port:ippr_ftp_atoi(%d) failed\n", 3); + return 0; + } + if (*s == ')') + s++; + + /* + * check for CR-LF at the end. + */ + if (*s == '\n') + s--; + if ((*s == '\r') && (*(s + 1) == '\n')) { + s += 2; + a6 = a5 & 0xff; + } else { + if (ippr_ftp_debug > 1) + printf("ippr_ftp_port:missing %s\n", "cr-lf"); + return 0; + } + + a5 >>= 8; + a5 &= 0xff; + sp = a5 << 8 | a6; + /* + * Don't allow the PORT command to specify a port < 1024 due to + * security crap. + */ + if (sp < 1024) { + if (ippr_ftp_debug > 0) + printf("ippr_ftp_port:sp(%d) < 1024\n", sp); + return 0; + } + /* + * Calculate new address parts for PORT command + */ + if (nat->nat_dir == NAT_INBOUND) + a1 = ntohl(nat->nat_oip.s_addr); + else + a1 = ntohl(ip->ip_src.s_addr); + a2 = (a1 >> 16) & 0xff; + a3 = (a1 >> 8) & 0xff; + a4 = a1 & 0xff; + a1 >>= 24; + olen = s - f->ftps_rptr; + /* DO NOT change this to snprintf! */ +#if defined(SNPRINTF) && defined(_KERNEL) + (void) SNPRINTF(newbuf, sizeof(newbuf), "%s %u,%u,%u,%u,%u,%u\r\n", + "PORT", a1, a2, a3, a4, a5, a6); +#else + (void) sprintf(newbuf, "%s %u,%u,%u,%u,%u,%u\r\n", + "PORT", a1, a2, a3, a4, a5, a6); +#endif + + nlen = strlen(newbuf); + inc = nlen - olen; + if ((inc + ip->ip_len) > 65535) { + if (ippr_ftp_debug > 0) + printf("ippr_ftp_port:inc(%d) + ip->ip_len > 65535\n", + inc); + return 0; + } + +#if !defined(_KERNEL) + bcopy(newbuf, MTOD(m, char *) + off, nlen); +#else +# if defined(MENTAT) + if (inc < 0) + (void)adjmsg(m, inc); +# else /* defined(MENTAT) */ + /* + * m_adj takes care of pkthdr.len, if required and treats inc<0 to + * mean remove -len bytes from the end of the packet. + * The mbuf chain will be extended if necessary by m_copyback(). + */ + if (inc < 0) + m_adj(m, inc); +# endif /* defined(MENTAT) */ +#endif /* !defined(_KERNEL) */ + COPYBACK(m, off, nlen, newbuf); + + if (inc != 0) { + ip->ip_len += inc; + fin->fin_dlen += inc; + fin->fin_plen += inc; + } + + /* + * The server may not make the connection back from port 20, but + * it is the most likely so use it here to check for a conflicting + * mapping. + */ + bcopy((char *)fin, (char *)&fi, sizeof(fi)); + fi.fin_state = NULL; + fi.fin_nat = NULL; + fi.fin_flx |= FI_IGNORE; + fi.fin_data[0] = sp; + fi.fin_data[1] = fin->fin_data[1] - 1; + /* + * Add skeleton NAT entry for connection which will come back the + * other way. + */ + if (nat->nat_dir == NAT_OUTBOUND) + nat2 = nat_outlookup(&fi, NAT_SEARCH|IPN_TCP, nat->nat_p, + nat->nat_inip, nat->nat_oip); + else + nat2 = nat_inlookup(&fi, NAT_SEARCH|IPN_TCP, nat->nat_p, + nat->nat_inip, nat->nat_oip); + if (nat2 == NULL) { + int slen; + + slen = ip->ip_len; + ip->ip_len = fin->fin_hlen + sizeof(*tcp2); + bzero((char *)tcp2, sizeof(*tcp2)); + tcp2->th_win = htons(8192); + tcp2->th_sport = htons(sp); + TCP_OFF_A(tcp2, 5); + tcp2->th_flags = TH_SYN; + tcp2->th_dport = 0; /* XXX - don't specify remote port */ + fi.fin_data[1] = 0; + fi.fin_dlen = sizeof(*tcp2); + fi.fin_plen = fi.fin_hlen + sizeof(*tcp2); + fi.fin_dp = (char *)tcp2; + fi.fin_fr = &ftppxyfr; + fi.fin_out = nat->nat_dir; + fi.fin_flx &= FI_LOWTTL|FI_FRAG|FI_TCPUDP|FI_OPTIONS|FI_IGNORE; + swip = ip->ip_src; + swip2 = ip->ip_dst; + if (nat->nat_dir == NAT_OUTBOUND) { + fi.fin_fi.fi_saddr = nat->nat_inip.s_addr; + ip->ip_src = nat->nat_inip; + } else if (nat->nat_dir == NAT_INBOUND) { + fi.fin_fi.fi_saddr = nat->nat_oip.s_addr; + ip->ip_src = nat->nat_oip; + } + + flags = NAT_SLAVE|IPN_TCP|SI_W_DPORT; + if (nat->nat_dir == NAT_INBOUND) + flags |= NAT_NOTRULEPORT; + nat2 = nat_new(&fi, nat->nat_ptr, NULL, flags, nat->nat_dir); + + if (nat2 != NULL) { + (void) nat_proto(&fi, nat2, IPN_TCP); + nat_update(&fi, nat2, nat->nat_ptr); + fi.fin_ifp = NULL; + if (nat->nat_dir == NAT_INBOUND) { + fi.fin_fi.fi_daddr = nat->nat_inip.s_addr; + ip->ip_dst = nat->nat_inip; + } + (void) fr_addstate(&fi, &nat2->nat_state, SI_W_DPORT); + if (fi.fin_state != NULL) + fr_statederef(&fi, (ipstate_t **)&fi.fin_state); + } + ip->ip_len = slen; + ip->ip_src = swip; + ip->ip_dst = swip2; + } else { + ipstate_t *is; + + nat_update(&fi, nat2, nat->nat_ptr); + READ_ENTER(&ipf_state); + is = nat2->nat_state; + if (is != NULL) { + MUTEX_ENTER(&is->is_lock); + (void)fr_tcp_age(&is->is_sti, &fi, ips_tqtqb, + is->is_flags); + MUTEX_EXIT(&is->is_lock); + } + RWLOCK_EXIT(&ipf_state); + } + return APR_INC(inc); +} + + +int ippr_ftp_client(fin, ip, nat, ftp, dlen) +fr_info_t *fin; +nat_t *nat; +ftpinfo_t *ftp; +ip_t *ip; +int dlen; +{ + char *rptr, *wptr, cmd[6], c; + ftpside_t *f; + int inc, i; + + inc = 0; + f = &ftp->ftp_side[0]; + rptr = f->ftps_rptr; + wptr = f->ftps_wptr; + + for (i = 0; (i < 5) && (i < dlen); i++) { + c = rptr[i]; + if (ISALPHA(c)) { + cmd[i] = TOUPPER(c); + } else { + cmd[i] = c; + } + } + cmd[i] = '\0'; + + ftp->ftp_incok = 0; + if (!strncmp(cmd, "USER ", 5) || !strncmp(cmd, "XAUT ", 5)) { + if (ftp->ftp_passok == FTPXY_ADOK_1 || + ftp->ftp_passok == FTPXY_AUOK_1) { + ftp->ftp_passok = FTPXY_USER_2; + ftp->ftp_incok = 1; + } else { + ftp->ftp_passok = FTPXY_USER_1; + ftp->ftp_incok = 1; + } + } else if (!strncmp(cmd, "AUTH ", 5)) { + ftp->ftp_passok = FTPXY_AUTH_1; + ftp->ftp_incok = 1; + } else if (!strncmp(cmd, "PASS ", 5)) { + if (ftp->ftp_passok == FTPXY_USOK_1) { + ftp->ftp_passok = FTPXY_PASS_1; + ftp->ftp_incok = 1; + } else if (ftp->ftp_passok == FTPXY_USOK_2) { + ftp->ftp_passok = FTPXY_PASS_2; + ftp->ftp_incok = 1; + } + } else if ((ftp->ftp_passok == FTPXY_AUOK_1) && + !strncmp(cmd, "ADAT ", 5)) { + ftp->ftp_passok = FTPXY_ADAT_1; + ftp->ftp_incok = 1; + } else if ((ftp->ftp_passok == FTPXY_PAOK_1 || + ftp->ftp_passok == FTPXY_PAOK_2) && + !strncmp(cmd, "ACCT ", 5)) { + ftp->ftp_passok = FTPXY_ACCT_1; + ftp->ftp_incok = 1; + } else if ((ftp->ftp_passok == FTPXY_GO) && !ippr_ftp_pasvonly && + !strncmp(cmd, "PORT ", 5)) { + inc = ippr_ftp_port(fin, ip, nat, f, dlen); + } else if (ippr_ftp_insecure && !ippr_ftp_pasvonly && + !strncmp(cmd, "PORT ", 5)) { + inc = ippr_ftp_port(fin, ip, nat, f, dlen); + } + + while ((*rptr++ != '\n') && (rptr < wptr)) + ; + f->ftps_rptr = rptr; + return inc; +} + + +int ippr_ftp_pasv(fin, ip, nat, ftp, dlen) +fr_info_t *fin; +ip_t *ip; +nat_t *nat; +ftpinfo_t *ftp; +int dlen; +{ + u_int a1, a2, a3, a4, data_ip; + char newbuf[IPF_FTPBUFSZ]; + char *s, *brackets[2]; + u_short a5, a6; + ftpside_t *f; + + if (ippr_ftp_forcepasv != 0 && + ftp->ftp_side[0].ftps_cmds != FTPXY_C_PASV) { + if (ippr_ftp_debug > 0) + printf("ippr_ftp_pasv:ftps_cmds(%d) != FTPXY_C_PASV\n", + ftp->ftp_side[0].ftps_cmds); + return 0; + } + + f = &ftp->ftp_side[1]; + +#define PASV_REPLEN 24 + /* + * Check for PASV reply message. + */ + if (dlen < IPF_MIN227LEN) { + if (ippr_ftp_debug > 1) + printf("ippr_ftp_pasv:dlen(%d) < IPF_MIN227LEN\n", + dlen); + return 0; + } else if (strncmp(f->ftps_rptr, + "227 Entering Passive Mod", PASV_REPLEN)) { + if (ippr_ftp_debug > 0) + printf("ippr_ftp_pasv:%d reply wrong\n", 227); + return 0; + } + + brackets[0] = ""; + brackets[1] = ""; + /* + * Skip the PASV reply + space + */ + s = f->ftps_rptr + PASV_REPLEN; + while (*s && !ISDIGIT(*s)) { + if (*s == '(') { + brackets[0] = "("; + brackets[1] = ")"; + } + s++; + } + + /* + * Pick out the address components, two at a time. + */ + a1 = ippr_ftp_atoi(&s); + if (s == NULL) { + if (ippr_ftp_debug > 1) + printf("ippr_ftp_pasv:ippr_ftp_atoi(%d) failed\n", 1); + return 0; + } + a2 = ippr_ftp_atoi(&s); + if (s == NULL) { + if (ippr_ftp_debug > 1) + printf("ippr_ftp_pasv:ippr_ftp_atoi(%d) failed\n", 2); + return 0; + } + + /* + * check that IP address in the PASV reply is the same as the + * sender of the command - prevents using PASV for port scanning. + */ + a1 <<= 16; + a1 |= a2; + + if (((nat->nat_dir == NAT_INBOUND) && + (a1 != ntohl(nat->nat_inip.s_addr))) || + ((nat->nat_dir == NAT_OUTBOUND) && + (a1 != ntohl(nat->nat_oip.s_addr)))) { + if (ippr_ftp_debug > 0) + printf("ippr_ftp_pasv:%s != nat->nat_oip\n", "a1"); + return 0; + } + + a5 = ippr_ftp_atoi(&s); + if (s == NULL) { + if (ippr_ftp_debug > 1) + printf("ippr_ftp_pasv:ippr_ftp_atoi(%d) failed\n", 3); + return 0; + } + + if (*s == ')') + s++; + if (*s == '.') + s++; + if (*s == '\n') + s--; + /* + * check for CR-LF at the end. + */ + if ((*s == '\r') && (*(s + 1) == '\n')) { + s += 2; + } else { + if (ippr_ftp_debug > 1) + printf("ippr_ftp_pasv:missing %s", "cr-lf\n"); + return 0; + } + + a6 = a5 & 0xff; + a5 >>= 8; + /* + * Calculate new address parts for 227 reply + */ + if (nat->nat_dir == NAT_INBOUND) { + data_ip = nat->nat_outip.s_addr; + a1 = ntohl(data_ip); + } else + data_ip = htonl(a1); + + a2 = (a1 >> 16) & 0xff; + a3 = (a1 >> 8) & 0xff; + a4 = a1 & 0xff; + a1 >>= 24; + +#if defined(SNPRINTF) && defined(_KERNEL) + (void) SNPRINTF(newbuf, sizeof(newbuf), "%s %s%u,%u,%u,%u,%u,%u%s\r\n", + "227 Entering Passive Mode", brackets[0], a1, a2, a3, a4, + a5, a6, brackets[1]); +#else + (void) sprintf(newbuf, "%s %s%u,%u,%u,%u,%u,%u%s\r\n", + "227 Entering Passive Mode", brackets[0], a1, a2, a3, a4, + a5, a6, brackets[1]); +#endif + return ippr_ftp_pasvreply(fin, ip, nat, f, (a5 << 8 | a6), + newbuf, s, data_ip); +} + +int ippr_ftp_pasvreply(fin, ip, nat, f, port, newmsg, s, data_ip) +fr_info_t *fin; +ip_t *ip; +nat_t *nat; +ftpside_t *f; +u_int port; +char *newmsg; +char *s; +u_int data_ip; +{ + int inc, off, nflags, sflags; + tcphdr_t *tcp, tcph, *tcp2; + struct in_addr swip, swip2; + struct in_addr data_addr; + size_t nlen, olen; + fr_info_t fi; + nat_t *nat2; + mb_t *m; + + m = fin->fin_m; + tcp = (tcphdr_t *)fin->fin_dp; + off = (char *)tcp - (char *)ip + (TCP_OFF(tcp) << 2) + fin->fin_ipoff; + + data_addr.s_addr = data_ip; + tcp2 = &tcph; + inc = 0; + + + olen = s - f->ftps_rptr; + nlen = strlen(newmsg); + inc = nlen - olen; + if ((inc + ip->ip_len) > 65535) { + if (ippr_ftp_debug > 0) + printf("ippr_ftp_pasv:inc(%d) + ip->ip_len > 65535\n", + inc); + return 0; + } + +#if !defined(_KERNEL) + bcopy(newmsg, MTOD(m, char *) + off, nlen); +#else +# if defined(MENTAT) + if (inc < 0) + (void)adjmsg(m, inc); +# else /* defined(MENTAT) */ + /* + * m_adj takes care of pkthdr.len, if required and treats inc<0 to + * mean remove -len bytes from the end of the packet. + * The mbuf chain will be extended if necessary by m_copyback(). + */ + if (inc < 0) + m_adj(m, inc); +# endif /* defined(MENTAT) */ +#endif /* !defined(_KERNEL) */ + COPYBACK(m, off, nlen, newmsg); + + if (inc != 0) { + ip->ip_len += inc; + fin->fin_dlen += inc; + fin->fin_plen += inc; + } + + /* + * Add skeleton NAT entry for connection which will come back the + * other way. + */ + bcopy((char *)fin, (char *)&fi, sizeof(fi)); + fi.fin_state = NULL; + fi.fin_nat = NULL; + fi.fin_flx |= FI_IGNORE; + fi.fin_data[0] = 0; + fi.fin_data[1] = port; + nflags = IPN_TCP|SI_W_SPORT; + if (ippr_ftp_pasvrdr && f->ftps_ifp) + nflags |= SI_W_DPORT; + if (nat->nat_dir == NAT_OUTBOUND) + nat2 = nat_outlookup(&fi, nflags|NAT_SEARCH, + nat->nat_p, nat->nat_inip, nat->nat_oip); + else + nat2 = nat_inlookup(&fi, nflags|NAT_SEARCH, + nat->nat_p, nat->nat_inip, nat->nat_oip); + if (nat2 == NULL) { + int slen; + + slen = ip->ip_len; + ip->ip_len = fin->fin_hlen + sizeof(*tcp2); + bzero((char *)tcp2, sizeof(*tcp2)); + tcp2->th_win = htons(8192); + tcp2->th_sport = 0; /* XXX - fake it for nat_new */ + TCP_OFF_A(tcp2, 5); + tcp2->th_flags = TH_SYN; + fi.fin_data[1] = port; + fi.fin_dlen = sizeof(*tcp2); + tcp2->th_dport = htons(port); + fi.fin_data[0] = 0; + fi.fin_dp = (char *)tcp2; + fi.fin_plen = fi.fin_hlen + sizeof(*tcp); + fi.fin_fr = &ftppxyfr; + fi.fin_out = nat->nat_dir; + fi.fin_flx &= FI_LOWTTL|FI_FRAG|FI_TCPUDP|FI_OPTIONS|FI_IGNORE; + swip = ip->ip_src; + swip2 = ip->ip_dst; + if (nat->nat_dir == NAT_OUTBOUND) { + fi.fin_fi.fi_daddr = data_addr.s_addr; + fi.fin_fi.fi_saddr = nat->nat_inip.s_addr; + ip->ip_dst = data_addr; + ip->ip_src = nat->nat_inip; + } else if (nat->nat_dir == NAT_INBOUND) { + fi.fin_fi.fi_saddr = nat->nat_oip.s_addr; + fi.fin_fi.fi_daddr = nat->nat_outip.s_addr; + ip->ip_src = nat->nat_oip; + ip->ip_dst = nat->nat_outip; + } + + sflags = nflags; + nflags |= NAT_SLAVE; + if (nat->nat_dir == NAT_INBOUND) + nflags |= NAT_NOTRULEPORT; + nat2 = nat_new(&fi, nat->nat_ptr, NULL, nflags, nat->nat_dir); + if (nat2 != NULL) { + (void) nat_proto(&fi, nat2, IPN_TCP); + nat_update(&fi, nat2, nat->nat_ptr); + fi.fin_ifp = NULL; + if (nat->nat_dir == NAT_INBOUND) { + fi.fin_fi.fi_daddr = nat->nat_inip.s_addr; + ip->ip_dst = nat->nat_inip; + } + (void) fr_addstate(&fi, &nat2->nat_state, sflags); + if (fi.fin_state != NULL) + fr_statederef(&fi, (ipstate_t **)&fi.fin_state); + } + + ip->ip_len = slen; + ip->ip_src = swip; + ip->ip_dst = swip2; + } else { + ipstate_t *is; + + nat_update(&fi, nat2, nat->nat_ptr); + READ_ENTER(&ipf_state); + is = nat2->nat_state; + if (is != NULL) { + MUTEX_ENTER(&is->is_lock); + (void)fr_tcp_age(&is->is_sti, &fi, ips_tqtqb, + is->is_flags); + MUTEX_EXIT(&is->is_lock); + } + RWLOCK_EXIT(&ipf_state); + } + return inc; +} + + +int ippr_ftp_server(fin, ip, nat, ftp, dlen) +fr_info_t *fin; +ip_t *ip; +nat_t *nat; +ftpinfo_t *ftp; +int dlen; +{ + char *rptr, *wptr; + ftpside_t *f; + int inc; + + inc = 0; + f = &ftp->ftp_side[1]; + rptr = f->ftps_rptr; + wptr = f->ftps_wptr; + + if (*rptr == ' ') + goto server_cmd_ok; + if (!ISDIGIT(*rptr) || !ISDIGIT(*(rptr + 1)) || !ISDIGIT(*(rptr + 2))) + return 0; + if (ftp->ftp_passok == FTPXY_GO) { + if (!strncmp(rptr, "227 ", 4)) + inc = ippr_ftp_pasv(fin, ip, nat, ftp, dlen); + else if (!strncmp(rptr, "229 ", 4)) + inc = ippr_ftp_epsv(fin, ip, nat, f, dlen); + } else if (ippr_ftp_insecure && !strncmp(rptr, "227 ", 4)) { + inc = ippr_ftp_pasv(fin, ip, nat, ftp, dlen); + } else if (ippr_ftp_insecure && !strncmp(rptr, "229 ", 4)) { + inc = ippr_ftp_epsv(fin, ip, nat, f, dlen); + } else if (*rptr == '5' || *rptr == '4') + ftp->ftp_passok = FTPXY_INIT; + else if (ftp->ftp_incok) { + if (*rptr == '3') { + if (ftp->ftp_passok == FTPXY_ACCT_1) + ftp->ftp_passok = FTPXY_GO; + else + ftp->ftp_passok++; + } else if (*rptr == '2') { + switch (ftp->ftp_passok) + { + case FTPXY_USER_1 : + case FTPXY_USER_2 : + case FTPXY_PASS_1 : + case FTPXY_PASS_2 : + case FTPXY_ACCT_1 : + ftp->ftp_passok = FTPXY_GO; + break; + default : + ftp->ftp_passok += 3; + break; + } + } + } +server_cmd_ok: + ftp->ftp_incok = 0; + + while ((*rptr++ != '\n') && (rptr < wptr)) + ; + f->ftps_rptr = rptr; + return inc; +} + + +/* + * Look to see if the buffer starts with something which we recognise as + * being the correct syntax for the FTP protocol. + */ +int ippr_ftp_client_valid(ftps, buf, len) +ftpside_t *ftps; +char *buf; +size_t len; +{ + register char *s, c, pc; + register size_t i = len; + char cmd[5]; + + s = buf; + + if (ftps->ftps_junk == 1) + return 1; + + if (i < 5) { + if (ippr_ftp_debug > 3) + printf("ippr_ftp_client_valid:i(%d) < 5\n", (int)i); + return 2; + } + + i--; + c = *s++; + + if (ISALPHA(c)) { + cmd[0] = TOUPPER(c); + c = *s++; + i--; + if (ISALPHA(c)) { + cmd[1] = TOUPPER(c); + c = *s++; + i--; + if (ISALPHA(c)) { + cmd[2] = TOUPPER(c); + c = *s++; + i--; + if (ISALPHA(c)) { + cmd[3] = TOUPPER(c); + c = *s++; + i--; + if ((c != ' ') && (c != '\r')) + goto bad_client_command; + } else if ((c != ' ') && (c != '\r')) + goto bad_client_command; + } else + goto bad_client_command; + } else + goto bad_client_command; + } else { +bad_client_command: + if (ippr_ftp_debug > 3) + printf("%s:bad:junk %d len %d/%d c 0x%x buf [%*s]\n", + "ippr_ftp_client_valid", + ftps->ftps_junk, (int)len, (int)i, c, + (int)len, buf); + return 1; + } + + for (; i; i--) { + pc = c; + c = *s++; + if ((pc == '\r') && (c == '\n')) { + cmd[4] = '\0'; + if (!strcmp(cmd, "PASV")) + ftps->ftps_cmds = FTPXY_C_PASV; + else + ftps->ftps_cmds = 0; + return 0; + } + } +#if !defined(_KERNEL) + printf("ippr_ftp_client_valid:junk after cmd[%*.*s]\n", + (int)len, (int)len, buf); +#endif + return 2; +} + + +int ippr_ftp_server_valid(ftps, buf, len) +ftpside_t *ftps; +char *buf; +size_t len; +{ + register char *s, c, pc; + register size_t i = len; + int cmd; + + s = buf; + cmd = 0; + + if (ftps->ftps_junk == 1) + return 1; + + if (i < 5) { + if (ippr_ftp_debug > 3) + printf("ippr_ftp_servert_valid:i(%d) < 5\n", (int)i); + return 2; + } + + c = *s++; + i--; + if (c == ' ') + goto search_eol; + + if (ISDIGIT(c)) { + cmd = (c - '0') * 100; + c = *s++; + i--; + if (ISDIGIT(c)) { + cmd += (c - '0') * 10; + c = *s++; + i--; + if (ISDIGIT(c)) { + cmd += (c - '0'); + c = *s++; + i--; + if ((c != '-') && (c != ' ')) + goto bad_server_command; + } else + goto bad_server_command; + } else + goto bad_server_command; + } else { +bad_server_command: + if (ippr_ftp_debug > 3) + printf("%s:bad:junk %d len %d/%d c 0x%x buf [%*s]\n", + "ippr_ftp_server_valid", + ftps->ftps_junk, (int)len, (int)i, + c, (int)len, buf); + return 1; + } +search_eol: + for (; i; i--) { + pc = c; + c = *s++; + if ((pc == '\r') && (c == '\n')) { + ftps->ftps_cmds = cmd; + return 0; + } + } + if (ippr_ftp_debug > 3) + printf("ippr_ftp_server_valid:junk after cmd[%*s]\n", + (int)len, buf); + return 2; +} + + +int ippr_ftp_valid(ftp, side, buf, len) +ftpinfo_t *ftp; +int side; +char *buf; +size_t len; +{ + ftpside_t *ftps; + int ret; + + ftps = &ftp->ftp_side[side]; + + if (side == 0) + ret = ippr_ftp_client_valid(ftps, buf, len); + else + ret = ippr_ftp_server_valid(ftps, buf, len); + return ret; +} + + +/* + * For map rules, the following applies: + * rv == 0 for outbound processing, + * rv == 1 for inbound processing. + * For rdr rules, the following applies: + * rv == 0 for inbound processing, + * rv == 1 for outbound processing. + */ +int ippr_ftp_process(fin, nat, ftp, rv) +fr_info_t *fin; +nat_t *nat; +ftpinfo_t *ftp; +int rv; +{ + int mlen, len, off, inc, i, sel, sel2, ok, ackoff, seqoff; + char *rptr, *wptr, *s; + u_32_t thseq, thack; + ap_session_t *aps; + ftpside_t *f, *t; + tcphdr_t *tcp; + ip_t *ip; + mb_t *m; + + m = fin->fin_m; + ip = fin->fin_ip; + tcp = (tcphdr_t *)fin->fin_dp; + off = (char *)tcp - (char *)ip + (TCP_OFF(tcp) << 2) + fin->fin_ipoff; + + f = &ftp->ftp_side[rv]; + t = &ftp->ftp_side[1 - rv]; + thseq = ntohl(tcp->th_seq); + thack = ntohl(tcp->th_ack); + +#ifdef __sgi + mlen = fin->fin_plen - off; +#else + mlen = MSGDSIZE(m) - off; +#endif + if (ippr_ftp_debug > 4) + printf("ippr_ftp_process: mlen %d\n", mlen); + + if (mlen <= 0) { + if ((tcp->th_flags & TH_OPENING) == TH_OPENING) { + f->ftps_seq[0] = thseq + 1; + t->ftps_seq[0] = thack; + } + return 0; + } + aps = nat->nat_aps; + + sel = aps->aps_sel[1 - rv]; + sel2 = aps->aps_sel[rv]; + if (rv == 0) { + seqoff = aps->aps_seqoff[sel]; + if (aps->aps_seqmin[sel] > seqoff + thseq) + seqoff = aps->aps_seqoff[!sel]; + ackoff = aps->aps_ackoff[sel2]; + if (aps->aps_ackmin[sel2] > ackoff + thack) + ackoff = aps->aps_ackoff[!sel2]; + } else { + seqoff = aps->aps_ackoff[sel]; + if (ippr_ftp_debug > 2) + printf("seqoff %d thseq %x ackmin %x\n", seqoff, thseq, + aps->aps_ackmin[sel]); + if (aps->aps_ackmin[sel] > seqoff + thseq) + seqoff = aps->aps_ackoff[!sel]; + + ackoff = aps->aps_seqoff[sel2]; + if (ippr_ftp_debug > 2) + printf("ackoff %d thack %x seqmin %x\n", ackoff, thack, + aps->aps_seqmin[sel2]); + if (ackoff > 0) { + if (aps->aps_seqmin[sel2] > ackoff + thack) + ackoff = aps->aps_seqoff[!sel2]; + } else { + if (aps->aps_seqmin[sel2] > thack) + ackoff = aps->aps_seqoff[!sel2]; + } + } + if (ippr_ftp_debug > 2) { + printf("%s: %x seq %x/%d ack %x/%d len %d/%d off %d\n", + rv ? "IN" : "OUT", tcp->th_flags, thseq, seqoff, + thack, ackoff, mlen, fin->fin_plen, off); + printf("sel %d seqmin %x/%x offset %d/%d\n", sel, + aps->aps_seqmin[sel], aps->aps_seqmin[sel2], + aps->aps_seqoff[sel], aps->aps_seqoff[sel2]); + printf("sel %d ackmin %x/%x offset %d/%d\n", sel2, + aps->aps_ackmin[sel], aps->aps_ackmin[sel2], + aps->aps_ackoff[sel], aps->aps_ackoff[sel2]); + } + + /* + * XXX - Ideally, this packet should get dropped because we now know + * that it is out of order (and there is no real danger in doing so + * apart from causing packets to go through here ordered). + */ + if (ippr_ftp_debug > 2) { + printf("rv %d t:seq[0] %x seq[1] %x %d/%d\n", + rv, t->ftps_seq[0], t->ftps_seq[1], seqoff, ackoff); + } + + ok = 0; + if (t->ftps_seq[0] == 0) { + t->ftps_seq[0] = thack; + ok = 1; + } else { + if (ackoff == 0) { + if (t->ftps_seq[0] == thack) + ok = 1; + else if (t->ftps_seq[1] == thack) { + t->ftps_seq[0] = thack; + ok = 1; + } + } else { + if (t->ftps_seq[0] + ackoff == thack) + ok = 1; + else if (t->ftps_seq[0] == thack + ackoff) + ok = 1; + else if (t->ftps_seq[1] + ackoff == thack) { + t->ftps_seq[0] = thack - ackoff; + ok = 1; + } else if (t->ftps_seq[1] == thack + ackoff) { + t->ftps_seq[0] = thack - ackoff; + ok = 1; + } + } + } + + if (ippr_ftp_debug > 2) { + if (!ok) + printf("%s ok\n", "not"); + } + + if (!mlen) { + if (t->ftps_seq[0] + ackoff != thack) { + if (ippr_ftp_debug > 1) { + printf("%s:seq[0](%x) + (%x) != (%x)\n", + "ippr_ftp_process", t->ftps_seq[0], + ackoff, thack); + } + return APR_ERR(1); + } + + if (ippr_ftp_debug > 2) { + printf("ippr_ftp_process:f:seq[0] %x seq[1] %x\n", + f->ftps_seq[0], f->ftps_seq[1]); + } + + if (tcp->th_flags & TH_FIN) { + if (thseq == f->ftps_seq[1]) { + f->ftps_seq[0] = f->ftps_seq[1] - seqoff; + f->ftps_seq[1] = thseq + 1 - seqoff; + } else { + if (ippr_ftp_debug > 1) { + printf("FIN: thseq %x seqoff %d ftps_seq %x\n", + thseq, seqoff, f->ftps_seq[0]); + } + return APR_ERR(1); + } + } + f->ftps_len = 0; + return 0; + } + + ok = 0; + if ((thseq == f->ftps_seq[0]) || (thseq == f->ftps_seq[1])) { + ok = 1; + /* + * Retransmitted data packet. + */ + } else if ((thseq + mlen == f->ftps_seq[0]) || + (thseq + mlen == f->ftps_seq[1])) { + ok = 1; + } + + if (ok == 0) { + inc = thseq - f->ftps_seq[0]; + if (ippr_ftp_debug > 1) { + printf("inc %d sel %d rv %d\n", inc, sel, rv); + printf("th_seq %x ftps_seq %x/%x\n", + thseq, f->ftps_seq[0], f->ftps_seq[1]); + printf("ackmin %x ackoff %d\n", aps->aps_ackmin[sel], + aps->aps_ackoff[sel]); + printf("seqmin %x seqoff %d\n", aps->aps_seqmin[sel], + aps->aps_seqoff[sel]); + } + + return APR_ERR(1); + } + + inc = 0; + rptr = f->ftps_rptr; + wptr = f->ftps_wptr; + f->ftps_seq[0] = thseq; + f->ftps_seq[1] = f->ftps_seq[0] + mlen; + f->ftps_len = mlen; + + while (mlen > 0) { + len = MIN(mlen, sizeof(f->ftps_buf) - (wptr - rptr)); + COPYDATA(m, off, len, wptr); + mlen -= len; + off += len; + wptr += len; + + if (ippr_ftp_debug > 3) + printf("%s:len %d/%d off %d wptr %lx junk %d [%*s]\n", + "ippr_ftp_process", + len, mlen, off, (u_long)wptr, f->ftps_junk, + len, rptr); + + f->ftps_wptr = wptr; + if (f->ftps_junk != 0) { + i = f->ftps_junk; + f->ftps_junk = ippr_ftp_valid(ftp, rv, rptr, + wptr - rptr); + + if (ippr_ftp_debug > 5) + printf("%s:junk %d -> %d\n", + "ippr_ftp_process", i, f->ftps_junk); + + if (f->ftps_junk != 0) { + if (wptr - rptr == sizeof(f->ftps_buf)) { + if (ippr_ftp_debug > 4) + printf("%s:full buffer\n", + "ippr_ftp_process"); + f->ftps_rptr = f->ftps_buf; + f->ftps_wptr = f->ftps_buf; + rptr = f->ftps_rptr; + wptr = f->ftps_wptr; + /* + * Because we throw away data here that + * we would otherwise parse, set the + * junk flag to indicate just ignore + * any data upto the next CRLF. + */ + f->ftps_junk = 1; + continue; + } + } + } + + while ((f->ftps_junk == 0) && (wptr > rptr)) { + len = wptr - rptr; + f->ftps_junk = ippr_ftp_valid(ftp, rv, rptr, len); + + if (ippr_ftp_debug > 3) { + printf("%s=%d len %d rv %d ptr %lx/%lx ", + "ippr_ftp_valid", + f->ftps_junk, len, rv, (u_long)rptr, + (u_long)wptr); + printf("buf [%*s]\n", len, rptr); + } + + if (f->ftps_junk == 0) { + f->ftps_rptr = rptr; + if (rv) + inc += ippr_ftp_server(fin, ip, nat, + ftp, len); + else + inc += ippr_ftp_client(fin, ip, nat, + ftp, len); + rptr = f->ftps_rptr; + wptr = f->ftps_wptr; + } + } + + /* + * Off to a bad start so lets just forget about using the + * ftp proxy for this connection. + */ + if ((f->ftps_cmds == 0) && (f->ftps_junk == 1)) { + /* f->ftps_seq[1] += inc; */ + + if (ippr_ftp_debug > 1) + printf("%s:cmds == 0 junk == 1\n", + "ippr_ftp_process"); + return APR_ERR(2); + } + + if ((f->ftps_junk != 0) && (rptr < wptr)) { + for (s = rptr; s < wptr; s++) { + if ((*s == '\r') && (s + 1 < wptr) && + (*(s + 1) == '\n')) { + rptr = s + 2; + f->ftps_junk = 0; + break; + } + } + } + + if (rptr == wptr) { + rptr = wptr = f->ftps_buf; + } else { + /* + * Compact the buffer back to the start. The junk + * flag should already be set and because we're not + * throwing away any data, it is preserved from its + * current state. + */ + if (rptr > f->ftps_buf) { + bcopy(rptr, f->ftps_buf, len); + wptr -= rptr - f->ftps_buf; + rptr = f->ftps_buf; + } + } + f->ftps_rptr = rptr; + f->ftps_wptr = wptr; + } + + /* f->ftps_seq[1] += inc; */ + if (tcp->th_flags & TH_FIN) + f->ftps_seq[1]++; + if (ippr_ftp_debug > 3) { +#ifdef __sgi + mlen = fin->fin_plen; +#else + mlen = MSGDSIZE(m); +#endif + mlen -= off; + printf("ftps_seq[1] = %x inc %d len %d\n", + f->ftps_seq[1], inc, mlen); + } + + f->ftps_rptr = rptr; + f->ftps_wptr = wptr; + return APR_INC(inc); +} + + +int ippr_ftp_out(fin, aps, nat) +fr_info_t *fin; +ap_session_t *aps; +nat_t *nat; +{ + ftpinfo_t *ftp; + int rev; + + ftp = aps->aps_data; + if (ftp == NULL) + return 0; + + rev = (nat->nat_dir == NAT_OUTBOUND) ? 0 : 1; + if (ftp->ftp_side[1 - rev].ftps_ifp == NULL) + ftp->ftp_side[1 - rev].ftps_ifp = fin->fin_ifp; + + return ippr_ftp_process(fin, nat, ftp, rev); +} + + +int ippr_ftp_in(fin, aps, nat) +fr_info_t *fin; +ap_session_t *aps; +nat_t *nat; +{ + ftpinfo_t *ftp; + int rev; + + ftp = aps->aps_data; + if (ftp == NULL) + return 0; + + rev = (nat->nat_dir == NAT_OUTBOUND) ? 0 : 1; + if (ftp->ftp_side[rev].ftps_ifp == NULL) + ftp->ftp_side[rev].ftps_ifp = fin->fin_ifp; + + return ippr_ftp_process(fin, nat, ftp, 1 - rev); +} + + +/* + * ippr_ftp_atoi - implement a version of atoi which processes numbers in + * pairs separated by commas (which are expected to be in the range 0 - 255), + * returning a 16 bit number combining either side of the , as the MSB and + * LSB. + */ +u_short ippr_ftp_atoi(ptr) +char **ptr; +{ + register char *s = *ptr, c; + register u_char i = 0, j = 0; + + while (((c = *s++) != '\0') && ISDIGIT(c)) { + i *= 10; + i += c - '0'; + } + if (c != ',') { + *ptr = NULL; + return 0; + } + while (((c = *s++) != '\0') && ISDIGIT(c)) { + j *= 10; + j += c - '0'; + } + *ptr = s; + i &= 0xff; + j &= 0xff; + return (i << 8) | j; +} + + +int ippr_ftp_epsv(fin, ip, nat, f, dlen) +fr_info_t *fin; +ip_t *ip; +nat_t *nat; +ftpside_t *f; +int dlen; +{ + char newbuf[IPF_FTPBUFSZ]; + char *s; + u_short ap = 0; + +#define EPSV_REPLEN 33 + /* + * Check for EPSV reply message. + */ + if (dlen < IPF_MIN229LEN) + return (0); + else if (strncmp(f->ftps_rptr, + "229 Entering Extended Passive Mode", EPSV_REPLEN)) + return (0); + + /* + * Skip the EPSV command + space + */ + s = f->ftps_rptr + 33; + while (*s && !ISDIGIT(*s)) + s++; + + /* + * As per RFC 2428, there are no addres components in the EPSV + * response. So we'll go straight to getting the port. + */ + while (*s && ISDIGIT(*s)) { + ap *= 10; + ap += *s++ - '0'; + } + + if (!s) + return 0; + + if (*s == '|') + s++; + if (*s == ')') + s++; + if (*s == '\n') + s--; + /* + * check for CR-LF at the end. + */ + if ((*s == '\r') && (*(s + 1) == '\n')) { + s += 2; + } else + return 0; + +#if defined(SNPRINTF) && defined(_KERNEL) + (void) SNPRINTF(newbuf, sizeof(newbuf), "%s (|||%u|)\r\n", + "229 Entering Extended Passive Mode", ap); +#else + (void) sprintf(newbuf, "%s (|||%u|)\r\n", + "229 Entering Extended Passive Mode", ap); +#endif + + return ippr_ftp_pasvreply(fin, ip, nat, f, (u_int)ap, newbuf, s, + ip->ip_src.s_addr); +} diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_h323_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_h323_pxy.c new file mode 100644 index 0000000000..7fc62276c5 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/netinet/ip_h323_pxy.c @@ -0,0 +1,294 @@ +/* + * Copyright 2001, QNX Software Systems Ltd. All Rights Reserved + * + * This source code has been published by QNX Software Systems Ltd. (QSSL). + * However, any use, reproduction, modification, distribution or transfer of + * this software, or any software which includes or is based upon any of this + * code, is only permitted under the terms of the QNX Open Community License + * version 1.0 (see licensing.qnx.com for details) or as otherwise expressly + * authorized by a written license agreement from QSSL. For more information, + * please email licensing@qnx.com. + * + * For more details, see QNX_OCL.txt provided with this distribution. + */ + +/* + * Simple H.323 proxy + * + * by xtang@canada.com + * ported to ipfilter 3.4.20 by Michael Grant mg-ipf@grant.org + */ + +#if __FreeBSD_version >= 220000 && defined(_KERNEL) +# include <sys/fcntl.h> +# include <sys/filio.h> +#else +# ifndef linux +# include <sys/ioctl.h> +# endif +#endif + +#define IPF_H323_PROXY + +int ippr_h323_init __P((void)); +void ippr_h323_fini __P((void)); +int ippr_h323_new __P((fr_info_t *, ap_session_t *, nat_t *)); +void ippr_h323_del __P((ap_session_t *)); +int ippr_h323_out __P((fr_info_t *, ap_session_t *, nat_t *)); +int ippr_h323_in __P((fr_info_t *, ap_session_t *, nat_t *)); + +int ippr_h245_new __P((fr_info_t *, ap_session_t *, nat_t *)); +int ippr_h245_out __P((fr_info_t *, ap_session_t *, nat_t *)); +int ippr_h245_in __P((fr_info_t *, ap_session_t *, nat_t *)); + +static frentry_t h323_fr; + +int h323_proxy_init = 0; + +static int find_port __P((int, caddr_t, int datlen, int *, u_short *)); + + +static int find_port(ipaddr, data, datlen, off, port) +int ipaddr; +caddr_t data; +int datlen, *off; +unsigned short *port; +{ + u_32_t addr, netaddr; + u_char *dp; + int offset; + + if (datlen < 6) + return -1; + + *port = 0; + offset = *off; + dp = (u_char *)data; + netaddr = ntohl(ipaddr); + + for (offset = 0; offset <= datlen - 6; offset++, dp++) { + addr = (dp[0] << 24) | (dp[1] << 16) | (dp[2] << 8) | dp[3]; + if (netaddr == addr) + { + *port = (*(dp + 4) << 8) | *(dp + 5); + break; + } + } + *off = offset; + return (offset > datlen - 6) ? -1 : 0; +} + +/* + * Initialize local structures. + */ +int ippr_h323_init() +{ + bzero((char *)&h323_fr, sizeof(h323_fr)); + h323_fr.fr_ref = 1; + h323_fr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; + MUTEX_INIT(&h323_fr.fr_lock, "H323 proxy rule lock"); + h323_proxy_init = 1; + + return 0; +} + + +void ippr_h323_fini() +{ + if (h323_proxy_init == 1) { + MUTEX_DESTROY(&h323_fr.fr_lock); + h323_proxy_init = 0; + } +} + + +int ippr_h323_new(fin, aps, nat) +fr_info_t *fin; +ap_session_t *aps; +nat_t *nat; +{ + fin = fin; /* LINT */ + nat = nat; /* LINT */ + + aps->aps_data = NULL; + aps->aps_psiz = 0; + + return 0; +} + + +void ippr_h323_del(aps) +ap_session_t *aps; +{ + int i; + ipnat_t *ipn; + + if (aps->aps_data) { + for (i = 0, ipn = aps->aps_data; + i < (aps->aps_psiz / sizeof(ipnat_t)); + i++, ipn = (ipnat_t *)((char *)ipn + sizeof(*ipn))) + { + /* + * Check the comment in ippr_h323_in() function, + * just above fr_nat_ioctl() call. + * We are lucky here because this function is not + * called with ipf_nat locked. + */ + if (fr_nat_ioctl((caddr_t)ipn, SIOCRMNAT, NAT_SYSSPACE| + NAT_LOCKHELD|FWRITE) == -1) { + /*EMPTY*/; + /* log the error */ + } + } + KFREES(aps->aps_data, aps->aps_psiz); + /* avoid double free */ + aps->aps_data = NULL; + aps->aps_psiz = 0; + } + return; +} + + +int ippr_h323_in(fin, aps, nat) +fr_info_t *fin; +ap_session_t *aps; +nat_t *nat; +{ + int ipaddr, off, datlen; + unsigned short port; + caddr_t data; + tcphdr_t *tcp; + ip_t *ip; + + ip = fin->fin_ip; + tcp = (tcphdr_t *)fin->fin_dp; + ipaddr = ip->ip_src.s_addr; + + data = (caddr_t)tcp + (TCP_OFF(tcp) << 2); + datlen = fin->fin_dlen - (TCP_OFF(tcp) << 2); + if (find_port(ipaddr, data, datlen, &off, &port) == 0) { + ipnat_t *ipn; + char *newarray; + + /* setup a nat rule to set a h245 proxy on tcp-port "port" + * it's like: + * map <if> <inter_ip>/<mask> -> <gate_ip>/<mask> proxy port <port> <port>/tcp + */ + KMALLOCS(newarray, char *, aps->aps_psiz + sizeof(*ipn)); + if (newarray == NULL) { + return -1; + } + ipn = (ipnat_t *)&newarray[aps->aps_psiz]; + bcopy((caddr_t)nat->nat_ptr, (caddr_t)ipn, sizeof(ipnat_t)); + (void) strncpy(ipn->in_plabel, "h245", APR_LABELLEN); + + ipn->in_inip = nat->nat_inip.s_addr; + ipn->in_inmsk = 0xffffffff; + ipn->in_dport = htons(port); + /* + * we got a problem here. we need to call fr_nat_ioctl() to add + * the h245 proxy rule, but since we already hold (READ locked) + * the nat table rwlock (ipf_nat), if we go into fr_nat_ioctl(), + * it will try to WRITE lock it. This will causing dead lock + * on RTP. + * + * The quick & dirty solution here is release the read lock, + * call fr_nat_ioctl() and re-lock it. + * A (maybe better) solution is do a UPGRADE(), and instead + * of calling fr_nat_ioctl(), we add the nat rule ourself. + */ + RWLOCK_EXIT(&ipf_nat); + if (fr_nat_ioctl((caddr_t)ipn, SIOCADNAT, + NAT_SYSSPACE|FWRITE) == -1) { + READ_ENTER(&ipf_nat); + return -1; + } + READ_ENTER(&ipf_nat); + if (aps->aps_data != NULL && aps->aps_psiz > 0) { + bcopy(aps->aps_data, newarray, aps->aps_psiz); + KFREES(aps->aps_data, aps->aps_psiz); + } + aps->aps_data = newarray; + aps->aps_psiz += sizeof(*ipn); + } + return 0; +} + + +int ippr_h245_new(fin, aps, nat) +fr_info_t *fin; +ap_session_t *aps; +nat_t *nat; +{ + fin = fin; /* LINT */ + nat = nat; /* LINT */ + + aps->aps_data = NULL; + aps->aps_psiz = 0; + return 0; +} + + +int ippr_h245_out(fin, aps, nat) +fr_info_t *fin; +ap_session_t *aps; +nat_t *nat; +{ + int ipaddr, off, datlen; + tcphdr_t *tcp; + caddr_t data; + u_short port; + ip_t *ip; + + aps = aps; /* LINT */ + + ip = fin->fin_ip; + tcp = (tcphdr_t *)fin->fin_dp; + ipaddr = nat->nat_inip.s_addr; + data = (caddr_t)tcp + (TCP_OFF(tcp) << 2); + datlen = fin->fin_dlen - (TCP_OFF(tcp) << 2); + if (find_port(ipaddr, data, datlen, &off, &port) == 0) { + fr_info_t fi; + nat_t *nat2; + +/* port = htons(port); */ + nat2 = nat_outlookup(fin->fin_ifp, IPN_UDP, IPPROTO_UDP, + ip->ip_src, ip->ip_dst); + if (nat2 == NULL) { + struct ip newip; + struct udphdr udp; + + bcopy((caddr_t)ip, (caddr_t)&newip, sizeof(newip)); + newip.ip_len = fin->fin_hlen + sizeof(udp); + newip.ip_p = IPPROTO_UDP; + newip.ip_src = nat->nat_inip; + + bzero((char *)&udp, sizeof(udp)); + udp.uh_sport = port; + + bcopy((caddr_t)fin, (caddr_t)&fi, sizeof(fi)); + fi.fin_fi.fi_p = IPPROTO_UDP; + fi.fin_data[0] = port; + fi.fin_data[1] = 0; + fi.fin_dp = (char *)&udp; + + nat2 = nat_new(&fi, nat->nat_ptr, NULL, + NAT_SLAVE|IPN_UDP|SI_W_DPORT, + NAT_OUTBOUND); + if (nat2 != NULL) { + (void) nat_proto(&fi, nat2, IPN_UDP); + nat_update(&fi, nat2, nat2->nat_ptr); + + nat2->nat_ptr->in_hits++; +#ifdef IPFILTER_LOG + nat_log(nat2, (u_int)(nat->nat_ptr->in_redir)); +#endif + bcopy((caddr_t)&ip->ip_src.s_addr, + data + off, 4); + bcopy((caddr_t)&nat2->nat_outport, + data + off + 4, 2); + } + } + } + return 0; +} diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_htable.h b/usr/src/uts/common/inet/ipf/netinet/ip_htable.h new file mode 100644 index 0000000000..082959a9c5 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/netinet/ip_htable.h @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2003 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifndef __IP_HTABLE_H__ +#define __IP_HTABLE_H__ + +#include "netinet/ip_lookup.h" + +typedef struct iphtent_s { + struct iphtent_s *ipe_next, **ipe_pnext; + void *ipe_ptr; + sa_family_t ipe_family; + i6addr_t ipe_addr; + i6addr_t ipe_mask; + int ipe_ref; + union { + char ipeu_char[16]; + u_long ipeu_long; + u_int ipeu_int; + }ipe_un; +} iphtent_t; + +#define ipe_value ipe_un.ipeu_int +#define ipe_group ipe_un.ipeu_char + +#define IPE_HASH_FN(a, m, s) (((a) * (m)) % (s)) + + +typedef struct iphtable_s { + ipfrwlock_t iph_rwlock; + struct iphtable_s *iph_next, **iph_pnext; + struct iphtent_s **iph_table; + size_t iph_size; /* size of hash table */ + u_long iph_seed; /* hashing seed */ + u_32_t iph_flags; + u_int iph_unit; /* IPL_LOG* */ + u_int iph_ref; + u_int iph_type; /* lookup or group map - IPHASH_* */ + u_int iph_masks[4]; /* IPv4 or IPv6 netmasks in use */ + char iph_name[FR_GROUPLEN]; /* hash table number */ +} iphtable_t; + + +/* iph_type */ +#define IPHASH_LOOKUP 0 +#define IPHASH_GROUPMAP 1 +#define IPHASH_ANON 0x80000000 + + +typedef struct iphtstat_s { + iphtable_t *iphs_tables; + u_long iphs_numtables; + u_long iphs_numnodes; + u_long iphs_nomem; + u_long iphs_pad[16]; +} iphtstat_t; + + +extern iphtable_t *ipf_htables[IPL_LOGSIZE]; + +extern void fr_htable_unload __P((void)); +extern int fr_newhtable __P((iplookupop_t *)); +extern iphtable_t *fr_findhtable __P((int, char *)); +extern int fr_removehtable __P((iplookupop_t *)); +extern size_t fr_flushhtable __P((iplookupflush_t *)); +extern int fr_addhtent __P((iphtable_t *, iphtent_t *)); +extern int fr_delhtent __P((iphtable_t *, iphtent_t *)); +extern void fr_derefhtable __P((iphtable_t *)); +extern void fr_delhtable __P((iphtable_t *)); +extern void *fr_iphmfindgroup __P((void *, int, void *)); +extern int fr_iphmfindip __P((void *, int, void *)); +extern int fr_gethtablestat __P((iplookupop_t *)); + +#endif /* __IP_HTABLE_H__ */ diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_ipsec_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_ipsec_pxy.c new file mode 100644 index 0000000000..aa7e1afd4d --- /dev/null +++ b/usr/src/uts/common/inet/ipf/netinet/ip_ipsec_pxy.c @@ -0,0 +1,346 @@ +/* + * Copyright (C) 2001-2003 by Darren Reed + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * Simple ISAKMP transparent proxy for in-kernel use. For use with the NAT + * code. + * + * $Id: ip_ipsec_pxy.c,v 2.20.2.7 2005/07/15 21:56:50 darrenr Exp $ + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#define IPF_IPSEC_PROXY + + +int ippr_ipsec_init __P((void)); +void ippr_ipsec_fini __P((void)); +int ippr_ipsec_new __P((fr_info_t *, ap_session_t *, nat_t *)); +void ippr_ipsec_del __P((ap_session_t *)); +int ippr_ipsec_inout __P((fr_info_t *, ap_session_t *, nat_t *)); +int ippr_ipsec_match __P((fr_info_t *, ap_session_t *, nat_t *)); + +static frentry_t ipsecfr; +static ipftq_t *ipsecnattqe; +static ipftq_t *ipsecstatetqe; +static char ipsec_buffer[1500]; + +int ipsec_proxy_init = 0; +int ipsec_proxy_ttl = 60; + +/* + * IPSec application proxy initialization. + */ +int ippr_ipsec_init() +{ + bzero((char *)&ipsecfr, sizeof(ipsecfr)); + ipsecfr.fr_ref = 1; + ipsecfr.fr_flags = FR_OUTQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; + MUTEX_INIT(&ipsecfr.fr_lock, "IPsec proxy rule lock"); + ipsec_proxy_init = 1; + + ipsecnattqe = fr_addtimeoutqueue(&nat_utqe, ipsec_proxy_ttl); + if (ipsecnattqe == NULL) + return -1; + ipsecstatetqe = fr_addtimeoutqueue(&ips_utqe, ipsec_proxy_ttl); + if (ipsecstatetqe == NULL) { + if (fr_deletetimeoutqueue(ipsecnattqe) == 0) + fr_freetimeoutqueue(ipsecnattqe); + ipsecnattqe = NULL; + return -1; + } + + ipsecnattqe->ifq_flags |= IFQF_PROXY; + ipsecstatetqe->ifq_flags |= IFQF_PROXY; + + ipsecfr.fr_age[0] = ipsec_proxy_ttl; + ipsecfr.fr_age[1] = ipsec_proxy_ttl; + return 0; +} + + +void ippr_ipsec_fini() +{ + if (ipsecnattqe != NULL) { + if (fr_deletetimeoutqueue(ipsecnattqe) == 0) + fr_freetimeoutqueue(ipsecnattqe); + } + ipsecnattqe = NULL; + if (ipsecstatetqe != NULL) { + if (fr_deletetimeoutqueue(ipsecstatetqe) == 0) + fr_freetimeoutqueue(ipsecstatetqe); + } + ipsecstatetqe = NULL; + + if (ipsec_proxy_init == 1) { + MUTEX_DESTROY(&ipsecfr.fr_lock); + ipsec_proxy_init = 0; + } +} + + +/* + * Setup for a new IPSEC proxy. + */ +int ippr_ipsec_new(fin, aps, nat) +fr_info_t *fin; +ap_session_t *aps; +nat_t *nat; +{ + ipsec_pxy_t *ipsec; + fr_info_t fi; + ipnat_t *ipn; + char *ptr; + int p, off, dlen, ttl; + mb_t *m; + ip_t *ip; + + off = fin->fin_plen - fin->fin_dlen + fin->fin_ipoff; + bzero(ipsec_buffer, sizeof(ipsec_buffer)); + ip = fin->fin_ip; + m = fin->fin_m; + + dlen = M_LEN(m) - off; + if (dlen < 16) + return -1; + COPYDATA(m, off, MIN(sizeof(ipsec_buffer), dlen), ipsec_buffer); + + if (nat_outlookup(fin, 0, IPPROTO_ESP, nat->nat_inip, + ip->ip_dst) != NULL) + return -1; + + aps->aps_psiz = sizeof(*ipsec); + KMALLOCS(aps->aps_data, ipsec_pxy_t *, sizeof(*ipsec)); + if (aps->aps_data == NULL) + return -1; + + ipsec = aps->aps_data; + bzero((char *)ipsec, sizeof(*ipsec)); + + /* + * Create NAT rule against which the tunnel/transport mapping is + * created. This is required because the current NAT rule does not + * describe ESP but UDP instead. + */ + ipn = &ipsec->ipsc_rule; + ttl = IPF_TTLVAL(ipsecnattqe->ifq_ttl); + ipn->in_tqehead[0] = fr_addtimeoutqueue(&nat_utqe, ttl); + ipn->in_tqehead[1] = fr_addtimeoutqueue(&nat_utqe, ttl); + ipn->in_ifps[0] = fin->fin_ifp; + ipn->in_apr = NULL; + ipn->in_use = 1; + ipn->in_hits = 1; + ipn->in_nip = ntohl(nat->nat_outip.s_addr); + ipn->in_ippip = 1; + ipn->in_inip = nat->nat_inip.s_addr; + ipn->in_inmsk = 0xffffffff; + ipn->in_outip = fin->fin_saddr; + ipn->in_outmsk = nat->nat_outip.s_addr; + ipn->in_srcip = fin->fin_saddr; + ipn->in_srcmsk = 0xffffffff; + ipn->in_redir = NAT_MAP; + bcopy(nat->nat_ptr->in_ifnames[0], ipn->in_ifnames[0], + sizeof(ipn->in_ifnames[0])); + ipn->in_p = IPPROTO_ESP; + + bcopy((char *)fin, (char *)&fi, sizeof(fi)); + fi.fin_state = NULL; + fi.fin_nat = NULL; + fi.fin_fi.fi_p = IPPROTO_ESP; + fi.fin_fr = &ipsecfr; + fi.fin_data[0] = 0; + fi.fin_data[1] = 0; + p = ip->ip_p; + ip->ip_p = IPPROTO_ESP; + fi.fin_flx &= ~(FI_TCPUDP|FI_STATE|FI_FRAG); + fi.fin_flx |= FI_IGNORE; + + ptr = ipsec_buffer; + bcopy(ptr, (char *)ipsec->ipsc_icookie, sizeof(ipsec_cookie_t)); + ptr += sizeof(ipsec_cookie_t); + bcopy(ptr, (char *)ipsec->ipsc_rcookie, sizeof(ipsec_cookie_t)); + /* + * The responder cookie should only be non-zero if the initiator + * cookie is non-zero. Therefore, it is safe to assume(!) that the + * cookies are both set after copying if the responder is non-zero. + */ + if ((ipsec->ipsc_rcookie[0]|ipsec->ipsc_rcookie[1]) != 0) + ipsec->ipsc_rckset = 1; + + ipsec->ipsc_nat = nat_new(&fi, ipn, &ipsec->ipsc_nat, + NAT_SLAVE|SI_WILDP, NAT_OUTBOUND); + if (ipsec->ipsc_nat != NULL) { + (void) nat_proto(&fi, ipsec->ipsc_nat, 0); + nat_update(&fi, ipsec->ipsc_nat, ipn); + + fi.fin_data[0] = 0; + fi.fin_data[1] = 0; + ipsec->ipsc_state = fr_addstate(&fi, &ipsec->ipsc_state, + SI_WILDP); + if (fi.fin_state != NULL) + fr_statederef(&fi, (ipstate_t **)&fi.fin_state); + } + ip->ip_p = p & 0xff; + return 0; +} + + +/* + * For outgoing IKE packets. refresh timeouts for NAT & state entries, if + * we can. If they have disappeared, recreate them. + */ +int ippr_ipsec_inout(fin, aps, nat) +fr_info_t *fin; +ap_session_t *aps; +nat_t *nat; +{ + ipsec_pxy_t *ipsec; + fr_info_t fi; + ip_t *ip; + int p; + + if ((fin->fin_out == 1) && (nat->nat_dir == NAT_INBOUND)) + return 0; + + if ((fin->fin_out == 0) && (nat->nat_dir == NAT_OUTBOUND)) + return 0; + + ipsec = aps->aps_data; + + if (ipsec != NULL) { + ip = fin->fin_ip; + p = ip->ip_p; + + if ((ipsec->ipsc_nat == NULL) || (ipsec->ipsc_state == NULL)) { + bcopy((char *)fin, (char *)&fi, sizeof(fi)); + fi.fin_state = NULL; + fi.fin_nat = NULL; + fi.fin_fi.fi_p = IPPROTO_ESP; + fi.fin_fr = &ipsecfr; + fi.fin_data[0] = 0; + fi.fin_data[1] = 0; + ip->ip_p = IPPROTO_ESP; + fi.fin_flx &= ~(FI_TCPUDP|FI_STATE|FI_FRAG); + fi.fin_flx |= FI_IGNORE; + } + + /* + * Update NAT timeout/create NAT if missing. + */ + if (ipsec->ipsc_nat != NULL) + fr_queueback(&ipsec->ipsc_nat->nat_tqe); + else { + ipsec->ipsc_nat = nat_new(&fi, &ipsec->ipsc_rule, + &ipsec->ipsc_nat, + NAT_SLAVE|SI_WILDP, + nat->nat_dir); + if (ipsec->ipsc_nat != NULL) { + (void) nat_proto(&fi, ipsec->ipsc_nat, 0); + nat_update(&fi, ipsec->ipsc_nat, + &ipsec->ipsc_rule); + } + } + + /* + * Update state timeout/create state if missing. + */ + READ_ENTER(&ipf_state); + if (ipsec->ipsc_state != NULL) { + fr_queueback(&ipsec->ipsc_state->is_sti); + ipsec->ipsc_state->is_die = nat->nat_age; + RWLOCK_EXIT(&ipf_state); + } else { + RWLOCK_EXIT(&ipf_state); + fi.fin_data[0] = 0; + fi.fin_data[1] = 0; + ipsec->ipsc_state = fr_addstate(&fi, + &ipsec->ipsc_state, + SI_WILDP); + if (fi.fin_state != NULL) + fr_statederef(&fi, (ipstate_t **)&fi.fin_state); + } + ip->ip_p = p; + } + return 0; +} + + +/* + * This extends the NAT matching to be based on the cookies associated with + * a session and found at the front of IKE packets. The cookies are always + * in the same order (not reversed depending on packet flow direction as with + * UDP/TCP port numbers). + */ +int ippr_ipsec_match(fin, aps, nat) +fr_info_t *fin; +ap_session_t *aps; +nat_t *nat; +{ + ipsec_pxy_t *ipsec; + u_32_t cookies[4]; + mb_t *m; + int off; + + nat = nat; /* LINT */ + + if ((fin->fin_dlen < sizeof(cookies)) || (fin->fin_flx & FI_FRAG)) + return -1; + + off = fin->fin_plen - fin->fin_dlen + fin->fin_ipoff; + ipsec = aps->aps_data; + m = fin->fin_m; + COPYDATA(m, off, sizeof(cookies), (char *)cookies); + + if ((cookies[0] != ipsec->ipsc_icookie[0]) || + (cookies[1] != ipsec->ipsc_icookie[1])) + return -1; + + if (ipsec->ipsc_rckset == 0) { + if ((cookies[2]|cookies[3]) == 0) { + return 0; + } + ipsec->ipsc_rckset = 1; + ipsec->ipsc_rcookie[0] = cookies[2]; + ipsec->ipsc_rcookie[1] = cookies[3]; + return 0; + } + + if ((cookies[2] != ipsec->ipsc_rcookie[0]) || + (cookies[3] != ipsec->ipsc_rcookie[1])) + return -1; + return 0; +} + + +/* + * clean up after ourselves. + */ +void ippr_ipsec_del(aps) +ap_session_t *aps; +{ + ipsec_pxy_t *ipsec; + + ipsec = aps->aps_data; + + if (ipsec != NULL) { + /* + * Don't bother changing any of the NAT structure details, + * *_del() is on a callback from aps_free(), from nat_delete() + */ + + READ_ENTER(&ipf_state); + if (ipsec->ipsc_state != NULL) { + ipsec->ipsc_state->is_die = fr_ticks + 1; + ipsec->ipsc_state->is_me = NULL; + fr_queuefront(&ipsec->ipsc_state->is_sti); + } + RWLOCK_EXIT(&ipf_state); + + ipsec->ipsc_state = NULL; + ipsec->ipsc_nat = NULL; + } +} diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_irc_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_irc_pxy.c new file mode 100644 index 0000000000..4b7a139048 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/netinet/ip_irc_pxy.c @@ -0,0 +1,438 @@ +/* + * Copyright (C) 2000-2003 Darren Reed + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * $Id: ip_irc_pxy.c,v 2.39.2.4 2005/02/04 10:22:55 darrenr Exp $ + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#define IPF_IRC_PROXY + +#define IPF_IRCBUFSZ 96 /* This *MUST* be >= 64! */ + + +int ippr_irc_init __P((void)); +void ippr_irc_fini __P((void)); +int ippr_irc_new __P((fr_info_t *, ap_session_t *, nat_t *)); +int ippr_irc_out __P((fr_info_t *, ap_session_t *, nat_t *)); +int ippr_irc_send __P((fr_info_t *, nat_t *)); +int ippr_irc_complete __P((ircinfo_t *, char *, size_t)); +u_short ipf_irc_atoi __P((char **)); + +static frentry_t ircnatfr; + +int irc_proxy_init = 0; + + +/* + * Initialize local structures. + */ +int ippr_irc_init() +{ + bzero((char *)&ircnatfr, sizeof(ircnatfr)); + ircnatfr.fr_ref = 1; + ircnatfr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; + MUTEX_INIT(&ircnatfr.fr_lock, "IRC proxy rule lock"); + irc_proxy_init = 1; + + return 0; +} + + +void ippr_irc_fini() +{ + if (irc_proxy_init == 1) { + MUTEX_DESTROY(&ircnatfr.fr_lock); + irc_proxy_init = 0; + } +} + + +char *ippr_irc_dcctypes[] = { + "CHAT ", /* CHAT chat ipnumber portnumber */ + "SEND ", /* SEND filename ipnumber portnumber */ + "MOVE ", + "TSEND ", + "SCHAT ", + NULL, +}; + + +/* + * :A PRIVMSG B :^ADCC CHAT chat 0 0^A\r\n + * PRIVMSG B ^ADCC CHAT chat 0 0^A\r\n + */ + + +int ippr_irc_complete(ircp, buf, len) +ircinfo_t *ircp; +char *buf; +size_t len; +{ + register char *s, c; + register size_t i; + u_32_t l; + int j, k; + + ircp->irc_ipnum = 0; + ircp->irc_port = 0; + + if (len < 31) + return 0; + s = buf; + c = *s++; + i = len - 1; + + if ((c != ':') && (c != 'P')) + return 0; + + if (c == ':') { + /* + * Loosely check that the source is a nickname of some sort + */ + s++; + c = *s; + ircp->irc_snick = s; + if (!ISALPHA(c)) + return 0; + i--; + for (c = *s; !ISSPACE(c) && (i > 0); i--) + c = *s++; + if (i < 31) + return 0; + if (c != 'P') + return 0; + } else + ircp->irc_snick = NULL; + + /* + * Check command string + */ + if (strncmp(s, "PRIVMSG ", 8)) + return 0; + i -= 8; + s += 8; + c = *s; + ircp->irc_dnick = s; + + /* + * Loosely check that the destination is a nickname of some sort + */ + if (!ISALPHA(c)) + return 0; + for (; !ISSPACE(c) && (i > 0); i--) + c = *s++; + if (i < 20) + return 0; + s++, + i--; + + /* + * Look for a ^A to start the DCC + */ + c = *s; + if (c == ':') { + s++; + c = *s; + } + + if (strncmp(s, "\001DCC ", 4)) + return 0; + + i -= 4; + s += 4; + + /* + * Check for a recognised DCC command + */ + for (j = 0, k = 0; ippr_irc_dcctypes[j]; j++) { + k = MIN(strlen(ippr_irc_dcctypes[j]), i); + if (!strncmp(ippr_irc_dcctypes[j], s, k)) + break; + } + if (!ippr_irc_dcctypes[j]) + return 0; + + ircp->irc_type = s; + i -= k; + s += k; + + if (i < 11) + return 0; + + /* + * Check for the arg + */ + c = *s; + if (ISSPACE(c)) + return 0; + ircp->irc_arg = s; + for (; (c != ' ') && (c != '\001') && (i > 0); i--) + c = *s++; + + if (c == '\001') /* In reality a ^A can quote another ^A...*/ + return 0; + + if (i < 5) + return 0; + + s++; + i--; + c = *s; + if (!ISDIGIT(c)) + return 0; + ircp->irc_addr = s; + /* + * Get the IP# + */ + for (l = 0; ISDIGIT(c) && (i > 0); i--) { + l *= 10; + l += c - '0'; + c = *s++; + } + + if (i < 4) + return 0; + + if (c != ' ') + return 0; + + ircp->irc_ipnum = l; + s++; + i--; + c = *s; + if (!ISDIGIT(c)) + return 0; + /* + * Get the port# + */ + for (l = 0; ISDIGIT(c) && (i > 0); i--) { + l *= 10; + l += c - '0'; + c = *s++; + } + if (i < 3) + return 0; + if (strncmp(s, "\001\r\n", 3)) + return 0; + s += 3; + ircp->irc_len = s - buf; + ircp->irc_port = l; + return 1; +} + + +int ippr_irc_new(fin, aps, nat) +fr_info_t *fin; +ap_session_t *aps; +nat_t *nat; +{ + ircinfo_t *irc; + + KMALLOC(irc, ircinfo_t *); + if (irc == NULL) + return -1; + + fin = fin; /* LINT */ + nat = nat; /* LINT */ + + aps->aps_data = irc; + aps->aps_psiz = sizeof(ircinfo_t); + + bzero((char *)irc, sizeof(*irc)); + return 0; +} + + +int ippr_irc_send(fin, nat) +fr_info_t *fin; +nat_t *nat; +{ + char ctcpbuf[IPF_IRCBUFSZ], newbuf[IPF_IRCBUFSZ]; + tcphdr_t *tcp, tcph, *tcp2 = &tcph; + int off, inc = 0, i, dlen; + size_t nlen = 0, olen; + struct in_addr swip; + u_short a5, sp; + ircinfo_t *irc; + fr_info_t fi; + nat_t *nat2; + u_int a1; + ip_t *ip; + mb_t *m; +#ifdef MENTAT + mb_t *m1; +#endif + + m = fin->fin_m; + ip = fin->fin_ip; + tcp = (tcphdr_t *)fin->fin_dp; + bzero(ctcpbuf, sizeof(ctcpbuf)); + off = (char *)tcp - (char *)ip + (TCP_OFF(tcp) << 2) + fin->fin_ipoff; + +#ifdef __sgi + dlen = fin->fin_plen - off; +#else + dlen = MSGDSIZE(m) - off; +#endif + if (dlen <= 0) + return 0; + COPYDATA(m, off, MIN(sizeof(ctcpbuf), dlen), ctcpbuf); + + if (dlen <= 0) + return 0; + ctcpbuf[sizeof(ctcpbuf) - 1] = '\0'; + *newbuf = '\0'; + + irc = nat->nat_aps->aps_data; + if (ippr_irc_complete(irc, ctcpbuf, dlen) == 0) + return 0; + + /* + * check that IP address in the PORT/PASV reply is the same as the + * sender of the command - prevents using PORT for port scanning. + */ + if (irc->irc_ipnum != ntohl(nat->nat_inip.s_addr)) + return 0; + + a5 = irc->irc_port; + + /* + * Calculate new address parts for the DCC command + */ + a1 = ntohl(ip->ip_src.s_addr); + olen = irc->irc_len; + i = irc->irc_addr - ctcpbuf; + i++; + (void) strncpy(newbuf, ctcpbuf, i); + /* DO NOT change these! */ +#if defined(SNPRINTF) && defined(KERNEL) + (void) SNPRINTF(newbuf, sizeof(newbuf) - i, "%u %u\001\r\n", a1, a5); +#else + (void) sprintf(newbuf, "%u %u\001\r\n", a1, a5); +#endif + + nlen = strlen(newbuf); + inc = nlen - olen; + + if ((inc + ip->ip_len) > 65535) + return 0; + +#ifdef MENTAT + for (m1 = m; m1->b_cont; m1 = m1->b_cont) + ; + if ((inc > 0) && (m1->b_datap->db_lim - m1->b_wptr < inc)) { + mblk_t *nm; + + /* alloc enough to keep same trailer space for lower driver */ + nm = allocb(nlen, BPRI_MED); + PANIC((!nm),("ippr_irc_out: allocb failed")); + + nm->b_band = m1->b_band; + nm->b_wptr += nlen; + + m1->b_wptr -= olen; + PANIC((m1->b_wptr < m1->b_rptr), + ("ippr_irc_out: cannot handle fragmented data block")); + + linkb(m1, nm); + } else { +# if SOLARIS && defined(ICK_VALID) + if (m1->b_datap->db_struiolim == m1->b_wptr) + m1->b_datap->db_struiolim += inc; + m1->b_datap->db_struioflag &= ~STRUIO_IP; +# endif + m1->b_wptr += inc; + } +#else + if (inc < 0) + m_adj(m, inc); + /* the mbuf chain will be extended if necessary by m_copyback() */ +#endif + COPYBACK(m, off, nlen, newbuf); + + if (inc != 0) { +#if defined(MENTAT) || defined(__sgi) + register u_32_t sum1, sum2; + + sum1 = ip->ip_len; + sum2 = ip->ip_len + inc; + + /* Because ~1 == -2, We really need ~1 == -1 */ + if (sum1 > sum2) + sum2--; + sum2 -= sum1; + sum2 = (sum2 & 0xffff) + (sum2 >> 16); + + fix_outcksum(fin, &ip->ip_sum, sum2); +#endif + ip->ip_len += inc; + } + + /* + * Add skeleton NAT entry for connection which will come back the + * other way. + */ + sp = htons(a5); + /* + * Don't allow the PORT command to specify a port < 1024 due to + * security crap. + */ + if (ntohs(sp) < 1024) + return 0; + + /* + * The server may not make the connection back from port 20, but + * it is the most likely so use it here to check for a conflicting + * mapping. + */ + bcopy((caddr_t)fin, (caddr_t)&fi, sizeof(fi)); + fi.fin_data[0] = sp; + fi.fin_data[1] = fin->fin_data[1]; + nat2 = nat_outlookup(fin, IPN_TCP, nat->nat_p, nat->nat_inip, + ip->ip_dst); + if (nat2 == NULL) { + bcopy((caddr_t)fin, (caddr_t)&fi, sizeof(fi)); + bzero((char *)tcp2, sizeof(*tcp2)); + tcp2->th_win = htons(8192); + tcp2->th_sport = sp; + tcp2->th_dport = 0; /* XXX - don't specify remote port */ + fi.fin_state = NULL; + fi.fin_nat = NULL; + fi.fin_data[0] = ntohs(sp); + fi.fin_data[1] = 0; + fi.fin_dp = (char *)tcp2; + fi.fin_fr = &ircnatfr; + fi.fin_dlen = sizeof(*tcp2); + fi.fin_plen = fi.fin_hlen + sizeof(*tcp2); + swip = ip->ip_src; + ip->ip_src = nat->nat_inip; + nat2 = nat_new(&fi, nat->nat_ptr, NULL, + NAT_SLAVE|IPN_TCP|SI_W_DPORT, NAT_OUTBOUND); + if (nat2 != NULL) { + (void) nat_proto(&fi, nat2, 0); + nat_update(&fi, nat2, nat2->nat_ptr); + + (void) fr_addstate(&fi, NULL, SI_W_DPORT); + if (fi.fin_state != NULL) + fr_statederef(&fi, (ipstate_t **)&fi.fin_state); + } + ip->ip_src = swip; + } + return inc; +} + + +int ippr_irc_out(fin, aps, nat) +fr_info_t *fin; +ap_session_t *aps; +nat_t *nat; +{ + aps = aps; /* LINT */ + return ippr_irc_send(fin, nat); +} diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_lookup.h b/usr/src/uts/common/inet/ipf/netinet/ip_lookup.h new file mode 100644 index 0000000000..76cdd8fd0a --- /dev/null +++ b/usr/src/uts/common/inet/ipf/netinet/ip_lookup.h @@ -0,0 +1,67 @@ +/* + * Copyright (C) 1993-2005 by Darren Reed. + * See the IPFILTER.LICENCE file for details on licencing. + */ + +#ifndef __IP_LOOKUP_H__ +#define __IP_LOOKUP_H__ + +#if defined(__STDC__) || defined(__GNUC__) || defined(_AIX51) +# define SIOCLOOKUPADDTABLE _IOWR('r', 60, struct iplookupop) +# define SIOCLOOKUPDELTABLE _IOWR('r', 61, struct iplookupop) +# define SIOCLOOKUPSTAT _IOWR('r', 64, struct iplookupop) +# define SIOCLOOKUPSTATW _IOW('r', 64, struct iplookupop) +# define SIOCLOOKUPFLUSH _IOWR('r', 65, struct iplookupflush) +# define SIOCLOOKUPADDNODE _IOWR('r', 67, struct iplookupop) +# define SIOCLOOKUPADDNODEW _IOW('r', 67, struct iplookupop) +# define SIOCLOOKUPDELNODE _IOWR('r', 68, struct iplookupop) +# define SIOCLOOKUPDELNODEW _IOW('r', 68, struct iplookupop) +#else +# define SIOCLOOKUPADDTABLE _IOWR(r, 60, struct iplookupop) +# define SIOCLOOKUPDELTABLE _IOWR(r, 61, struct iplookupop) +# define SIOCLOOKUPSTAT _IOWR(r, 64, struct iplookupop) +# define SIOCLOOKUPSTATW _IOW(r, 64, struct iplookupop) +# define SIOCLOOKUPFLUSH _IOWR(r, 65, struct iplookupflush) +# define SIOCLOOKUPADDNODE _IOWR(r, 67, struct iplookupop) +# define SIOCLOOKUPADDNODEW _IOW(r, 67, struct iplookupop) +# define SIOCLOOKUPDELNODE _IOWR(r, 68, struct iplookupop) +# define SIOCLOOKUPDELNODEW _IOW(r, 68, struct iplookupop) +#endif + +typedef struct iplookupop { + int iplo_type; /* IPLT_* */ + int iplo_unit; /* IPL_LOG* */ + u_int iplo_arg; + char iplo_name[FR_GROUPLEN]; + size_t iplo_size; /* sizeof struct at iplo_struct */ + void *iplo_struct; +} iplookupop_t; + +typedef struct iplookupflush { + int iplf_type; /* IPLT_* */ + int iplf_unit; /* IPL_LOG* */ + u_int iplf_arg; + size_t iplf_count; + char iplf_name[FR_GROUPLEN]; +} iplookupflush_t; + +typedef struct iplookuplink { + int ipll_type; /* IPLT_* */ + int ipll_unit; /* IPL_LOG* */ + u_int ipll_num; + char ipll_group[FR_GROUPLEN]; +} iplookuplink_t; + +#define IPLT_ALL -1 +#define IPLT_NONE 0 +#define IPLT_POOL 1 +#define IPLT_HASH 2 + +#define IPLT_ANON 0x80000000 + +extern int ip_lookup_init __P((void)); +extern int ip_lookup_ioctl __P((caddr_t, ioctlcmd_t, int)); +extern void ip_lookup_unload __P((void)); +extern void ip_lookup_deref __P((int, void *)); + +#endif /* __IP_LOOKUP_H__ */ diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_nat.h b/usr/src/uts/common/inet/ipf/netinet/ip_nat.h new file mode 100644 index 0000000000..930e8aa103 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/netinet/ip_nat.h @@ -0,0 +1,478 @@ +/* + * Copyright (C) 1995-2001, 2003 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * @(#)ip_nat.h 1.5 2/4/96 + * $Id: ip_nat.h,v 2.90.2.11 2005/06/18 02:41:32 darrenr Exp $ +* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifndef __IP_NAT_H__ +#define __IP_NAT_H__ + +#ifndef SOLARIS +#define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4))) +#endif + +#if defined(__STDC__) || defined(__GNUC__) || defined(_AIX51) +#define SIOCADNAT _IOW('r', 60, struct ipfobj) +#define SIOCRMNAT _IOW('r', 61, struct ipfobj) +#define SIOCGNATS _IOWR('r', 62, struct ipfobj) +#define SIOCGNATL _IOWR('r', 63, struct ipfobj) +#else +#define SIOCADNAT _IOW(r, 60, struct ipfobj) +#define SIOCRMNAT _IOW(r, 61, struct ipfobj) +#define SIOCGNATS _IOWR(r, 62, struct ipfobj) +#define SIOCGNATL _IOWR(r, 63, struct ipfobj) +#endif + +#undef LARGE_NAT /* define this if you're setting up a system to NAT + * LARGE numbers of networks/hosts - i.e. in the + * hundreds or thousands. In such a case, you should + * also change the RDR_SIZE and NAT_SIZE below to more + * appropriate sizes. The figures below were used for + * a setup with 1000-2000 networks to NAT. + */ +#ifndef NAT_SIZE +# ifdef LARGE_NAT +# define NAT_SIZE 2047 +# else +# define NAT_SIZE 127 +# endif +#endif +#ifndef RDR_SIZE +# ifdef LARGE_NAT +# define RDR_SIZE 2047 +# else +# define RDR_SIZE 127 +# endif +#endif +#ifndef HOSTMAP_SIZE +# ifdef LARGE_NAT +# define HOSTMAP_SIZE 8191 +# else +# define HOSTMAP_SIZE 2047 +# endif +#endif +#ifndef NAT_TABLE_MAX +/* + * This is newly introduced and for the sake of "least surprise", the numbers + * present aren't what we'd normally use for creating a proper hash table. + */ +# ifdef LARGE_NAT +# define NAT_TABLE_MAX 180000 +# else +# define NAT_TABLE_MAX 30000 +# endif +#endif +#ifndef NAT_TABLE_SZ +# ifdef LARGE_NAT +# define NAT_TABLE_SZ 16383 +# else +# define NAT_TABLE_SZ 2047 +# endif +#endif +#ifndef APR_LABELLEN +#define APR_LABELLEN 16 +#endif +#define NAT_HW_CKSUM 0x80000000 + +#define DEF_NAT_AGE 1200 /* 10 minutes (600 seconds) */ + +struct ipstate; +struct ap_session; + +typedef struct nat { + ipfmutex_t nat_lock; + struct nat *nat_next; + struct nat **nat_pnext; + struct nat *nat_hnext[2]; + struct nat **nat_phnext[2]; + struct hostmap *nat_hm; + void *nat_data; + struct nat **nat_me; + struct ipstate *nat_state; + struct ap_session *nat_aps; /* proxy session */ + frentry_t *nat_fr; /* filter rule ptr if appropriate */ + struct ipnat *nat_ptr; /* pointer back to the rule */ + void *nat_ifps[2]; + void *nat_sync; + ipftqent_t nat_tqe; + u_32_t nat_flags; + u_32_t nat_sumd[2]; /* ip checksum delta for data segment*/ + u_32_t nat_ipsumd; /* ip checksum delta for ip header */ + u_32_t nat_mssclamp; /* if != zero clamp MSS to this */ + i6addr_t nat_inip6; + i6addr_t nat_outip6; + i6addr_t nat_oip6; /* other ip */ + U_QUAD_T nat_pkts[2]; + U_QUAD_T nat_bytes[2]; + union { + udpinfo_t nat_unu; + tcpinfo_t nat_unt; + icmpinfo_t nat_uni; + greinfo_t nat_ugre; + } nat_un; + u_short nat_oport; /* other port */ + u_short nat_use; + u_char nat_p; /* protocol for NAT */ + int nat_dir; + int nat_ref; /* reference count */ + int nat_hv[2]; + char nat_ifnames[2][LIFNAMSIZ]; + int nat_rev; /* 0 = forward, 1 = reverse */ +} nat_t; + +#define nat_inip nat_inip6.in4 +#define nat_outip nat_outip6.in4 +#define nat_oip nat_oip6.in4 +#define nat_age nat_tqe.tqe_die +#define nat_inport nat_un.nat_unt.ts_sport +#define nat_outport nat_un.nat_unt.ts_dport +#define nat_type nat_un.nat_uni.ici_type +#define nat_seq nat_un.nat_uni.ici_seq +#define nat_id nat_un.nat_uni.ici_id +#define nat_tcpstate nat_tqe.tqe_state + +/* + * Values for nat_dir + */ +#define NAT_INBOUND 0 +#define NAT_OUTBOUND 1 + +/* + * Definitions for nat_flags + */ +#define NAT_TCP 0x0001 /* IPN_TCP */ +#define NAT_UDP 0x0002 /* IPN_UDP */ +#define NAT_ICMPERR 0x0004 /* IPN_ICMPERR */ +#define NAT_ICMPQUERY 0x0008 /* IPN_ICMPQUERY */ +#define NAT_SEARCH 0x0010 +#define NAT_SLAVE 0x0020 /* Slave connection for a proxy */ +#define NAT_NOTRULEPORT 0x0040 + +#define NAT_TCPUDP (NAT_TCP|NAT_UDP) +#define NAT_TCPUDPICMP (NAT_TCP|NAT_UDP|NAT_ICMPERR) +#define NAT_TCPUDPICMPQ (NAT_TCP|NAT_UDP|NAT_ICMPQUERY) +#define NAT_FROMRULE (NAT_TCP|NAT_UDP) + +/* 0x0100 reserved for FI_W_SPORT */ +/* 0x0200 reserved for FI_W_DPORT */ +/* 0x0400 reserved for FI_W_SADDR */ +/* 0x0800 reserved for FI_W_DADDR */ +/* 0x1000 reserved for FI_W_NEWFR */ +/* 0x2000 reserved for SI_CLONE */ +/* 0x4000 reserved for SI_CLONED */ +/* 0x8000 reserved for SI_IGNOREPKT */ + +#define NAT_DEBUG 0x800000 + +typedef struct ipnat { + struct ipnat *in_next; /* NAT rule list next */ + struct ipnat *in_rnext; /* rdr rule hash next */ + struct ipnat **in_prnext; /* prior rdr next ptr */ + struct ipnat *in_mnext; /* map rule hash next */ + struct ipnat **in_pmnext; /* prior map next ptr */ + struct ipftq *in_tqehead[2]; + void *in_ifps[2]; + void *in_apr; + char *in_comment; + i6addr_t in_next6; + u_long in_space; + u_long in_hits; + u_int in_use; + u_int in_hv; + int in_flineno; /* conf. file line number */ + u_short in_pnext; + u_char in_v; + u_char in_xxx; + /* From here to the end is covered by IPN_CMPSIZ */ + u_32_t in_flags; + u_32_t in_mssclamp; /* if != 0 clamp MSS to this */ + u_int in_age[2]; + int in_redir; /* see below for values */ + int in_p; /* protocol. */ + i6addr_t in_in[2]; + i6addr_t in_out[2]; + i6addr_t in_src[2]; + frtuc_t in_tuc; + u_short in_port[2]; + u_short in_ppip; /* ports per IP. */ + u_short in_ippip; /* IP #'s per IP# */ + char in_ifnames[2][LIFNAMSIZ]; + char in_plabel[APR_LABELLEN]; /* proxy label. */ + ipftag_t in_tag; +} ipnat_t; + +#define in_pmin in_port[0] /* Also holds static redir port */ +#define in_pmax in_port[1] +#define in_nextip in_next6.in4 +#define in_nip in_next6.in4.s_addr +#define in_inip in_in[0].in4.s_addr +#define in_inmsk in_in[1].in4.s_addr +#define in_outip in_out[0].in4.s_addr +#define in_outmsk in_out[1].in4.s_addr +#define in_srcip in_src[0].in4.s_addr +#define in_srcmsk in_src[1].in4.s_addr +#define in_scmp in_tuc.ftu_scmp +#define in_dcmp in_tuc.ftu_dcmp +#define in_stop in_tuc.ftu_stop +#define in_dtop in_tuc.ftu_dtop +#define in_sport in_tuc.ftu_sport +#define in_dport in_tuc.ftu_dport + +/* + * Bit definitions for in_flags + */ +#define IPN_ANY 0x00000 +#define IPN_TCP 0x00001 +#define IPN_UDP 0x00002 +#define IPN_TCPUDP (IPN_TCP|IPN_UDP) +#define IPN_ICMPERR 0x00004 +#define IPN_TCPUDPICMP (IPN_TCP|IPN_UDP|IPN_ICMPERR) +#define IPN_ICMPQUERY 0x00008 +#define IPN_TCPUDPICMPQ (IPN_TCP|IPN_UDP|IPN_ICMPQUERY) +#define IPN_RF (IPN_TCPUDP|IPN_DELETE|IPN_ICMPERR) +#define IPN_AUTOPORTMAP 0x00010 +#define IPN_IPRANGE 0x00020 +#define IPN_FILTER 0x00040 +#define IPN_SPLIT 0x00080 +#define IPN_ROUNDR 0x00100 +#define IPN_NOTSRC 0x04000 +#define IPN_NOTDST 0x08000 +#define IPN_DYNSRCIP 0x10000 /* dynamic src IP# */ +#define IPN_DYNDSTIP 0x20000 /* dynamic dst IP# */ +#define IPN_DELETE 0x40000 +#define IPN_STICKY 0x80000 +#define IPN_FRAG 0x100000 +#define IPN_FIXEDDPORT 0x200000 +#define IPN_FINDFORWARD 0x400000 +#define IPN_IN 0x800000 +#define IPN_USERFLAGS (IPN_TCPUDP|IPN_AUTOPORTMAP|IPN_IPRANGE|IPN_SPLIT|\ + IPN_ROUNDR|IPN_FILTER|IPN_NOTSRC|IPN_NOTDST|\ + IPN_FRAG|IPN_STICKY|IPN_FIXEDDPORT|IPN_ICMPQUERY) + +/* + * Values for in_redir + */ +#define NAT_MAP 0x01 +#define NAT_REDIRECT 0x02 +#define NAT_BIMAP (NAT_MAP|NAT_REDIRECT) +#define NAT_MAPBLK 0x04 + +#define MAPBLK_MINPORT 1024 /* don't use reserved ports for src port */ +#define USABLE_PORTS (65536 - MAPBLK_MINPORT) + +#define IPN_CMPSIZ (sizeof(ipnat_t) - offsetof(ipnat_t, in_flags)) + +typedef struct natlookup { + struct in_addr nl_inip; + struct in_addr nl_outip; + struct in_addr nl_realip; + int nl_flags; + u_short nl_inport; + u_short nl_outport; + u_short nl_realport; +} natlookup_t; + + +typedef struct nat_save { + void *ipn_next; + struct nat ipn_nat; + struct ipnat ipn_ipnat; + struct frentry ipn_fr; + int ipn_dsize; + char ipn_data[4]; +} nat_save_t; + +#define ipn_rule ipn_nat.nat_fr + +typedef struct natget { + void *ng_ptr; + int ng_sz; +} natget_t; + + +#undef tr_flags +typedef struct nattrpnt { + struct in_addr tr_dstip; /* real destination IP# */ + struct in_addr tr_srcip; /* real source IP# */ + struct in_addr tr_locip; /* local source IP# */ + u_int tr_flags; + int tr_expire; + u_short tr_dstport; /* real destination port# */ + u_short tr_srcport; /* real source port# */ + u_short tr_locport; /* local source port# */ + struct nattrpnt *tr_hnext; + struct nattrpnt **tr_phnext; + struct nattrpnt *tr_next; + struct nattrpnt **tr_pnext; /* previous next */ +} nattrpnt_t; + +#define TN_CMPSIZ offsetof(nattrpnt_t, tr_hnext) + + +/* + * This structure gets used to help NAT sessions keep the same NAT rule (and + * thus translation for IP address) when: + * (a) round-robin redirects are in use + * (b) different IP add + */ +typedef struct hostmap { + struct hostmap *hm_next; + struct hostmap **hm_pnext; + struct ipnat *hm_ipnat; + struct in_addr hm_srcip; + struct in_addr hm_dstip; + struct in_addr hm_mapip; + u_32_t hm_port; + int hm_ref; +} hostmap_t; + + +/* + * Structure used to pass information in to nat_newmap and nat_newrdr. + */ +typedef struct natinfo { + ipnat_t *nai_np; + u_32_t nai_sum1; + u_32_t nai_sum2; + u_32_t nai_nflags; + u_32_t nai_flags; + struct in_addr nai_ip; + u_short nai_port; + u_short nai_nport; + u_short nai_sport; + u_short nai_dport; +} natinfo_t; + + +typedef struct natstat { + u_long ns_mapped[2]; + u_long ns_rules; + u_long ns_added; + u_long ns_expire; + u_long ns_inuse; + u_long ns_logged; + u_long ns_logfail; + u_long ns_memfail; + u_long ns_badnat; + u_long ns_addtrpnt; + nat_t **ns_table[2]; + hostmap_t **ns_maptable; + ipnat_t *ns_list; + void *ns_apslist; + u_int ns_wilds; + u_int ns_nattab_sz; + u_int ns_nattab_max; + u_int ns_rultab_sz; + u_int ns_rdrtab_sz; + u_int ns_trpntab_sz; + u_int ns_hostmap_sz; + nat_t *ns_instances; + nattrpnt_t *ns_trpntlist; + u_long *ns_bucketlen[2]; +} natstat_t; + +typedef struct natlog { + struct in_addr nl_origip; + struct in_addr nl_outip; + struct in_addr nl_inip; + u_short nl_origport; + u_short nl_outport; + u_short nl_inport; + u_short nl_type; + int nl_rule; + U_QUAD_T nl_pkts[2]; + U_QUAD_T nl_bytes[2]; + u_char nl_p; +} natlog_t; + + +#define NL_NEWMAP NAT_MAP +#define NL_NEWRDR NAT_REDIRECT +#define NL_NEWBIMAP NAT_BIMAP +#define NL_NEWBLOCK NAT_MAPBLK +#define NL_CLONE 0xfffd +#define NL_FLUSH 0xfffe +#define NL_EXPIRE 0xffff + +#define NAT_HASH_FN(k,l,m) (((k) + ((k) >> 12) + l) % (m)) + +#define LONG_SUM(in) (((in) & 0xffff) + ((in) >> 16)) + +#define CALC_SUMD(s1, s2, sd) { \ + (s1) = ((s1) & 0xffff) + ((s1) >> 16); \ + (s2) = ((s2) & 0xffff) + ((s2) >> 16); \ + /* Do it twice */ \ + (s1) = ((s1) & 0xffff) + ((s1) >> 16); \ + (s2) = ((s2) & 0xffff) + ((s2) >> 16); \ + /* Because ~1 == -2, We really need ~1 == -1 */ \ + if ((s1) > (s2)) (s2)--; \ + (sd) = (s2) - (s1); \ + (sd) = ((sd) & 0xffff) + ((sd) >> 16); } + +#define NAT_SYSSPACE 0x80000000 +#define NAT_LOCKHELD 0x40000000 + + +extern u_int ipf_nattable_sz; +extern u_int ipf_nattable_max; +extern u_int ipf_natrules_sz; +extern u_int ipf_rdrrules_sz; +extern u_int ipf_hostmap_sz; +extern u_int fr_nat_maxbucket; +extern u_int fr_nat_maxbucket_reset; +extern int fr_nat_lock; +extern void fr_natsync __P((void *)); +extern u_long fr_defnatage; +extern u_long fr_defnaticmpage; +extern u_long fr_defnatipage; + /* nat_table[0] -> hashed list sorted by inside (ip, port) */ + /* nat_table[1] -> hashed list sorted by outside (ip, port) */ +extern nat_t **nat_table[2]; +extern nat_t *nat_instances; +extern ipnat_t *nat_list; +extern ipnat_t **nat_rules; +extern ipnat_t **rdr_rules; +extern ipftq_t *nat_utqe; +extern natstat_t nat_stats; + +#if defined(__OpenBSD__) +extern void nat_ifdetach __P((void *)); +#endif +extern int fr_nat_ioctl __P((caddr_t, ioctlcmd_t, int)); +extern int fr_natinit __P((void)); +extern nat_t *nat_new __P((fr_info_t *, ipnat_t *, nat_t **, u_int, int)); +extern nat_t *nat_outlookup __P((fr_info_t *, u_int, u_int, struct in_addr, + struct in_addr)); +extern void fix_datacksum __P((u_short *, u_32_t)); +extern nat_t *nat_inlookup __P((fr_info_t *, u_int, u_int, struct in_addr, + struct in_addr)); +extern nat_t *nat_tnlookup __P((fr_info_t *, int)); +extern nat_t *nat_maplookup __P((void *, u_int, struct in_addr, + struct in_addr)); +extern nat_t *nat_lookupredir __P((natlookup_t *)); +extern nat_t *nat_icmperrorlookup __P((fr_info_t *, int)); +extern nat_t *nat_icmperror __P((fr_info_t *, u_int *, int)); +extern int nat_insert __P((nat_t *, int)); + +extern int fr_checknatout __P((fr_info_t *, u_32_t *)); +extern int fr_natout __P((fr_info_t *, nat_t *, int, u_32_t)); +extern int fr_checknatin __P((fr_info_t *, u_32_t *)); +extern int fr_natin __P((fr_info_t *, nat_t *, int, u_32_t)); +extern void fr_natunload __P((void)); +extern void fr_natexpire __P((void)); +extern void nat_log __P((struct nat *, u_int)); +extern void fix_incksum __P((fr_info_t *, u_short *, u_32_t)); +extern void fix_outcksum __P((fr_info_t *, u_short *, u_32_t)); +extern void fr_natderef __P((nat_t **)); +extern u_short *nat_proto __P((fr_info_t *, nat_t *, u_int)); +extern void nat_update __P((fr_info_t *, nat_t *, ipnat_t *)); +extern void fr_setnatqueue __P((nat_t *, int)); + +#endif /* __IP_NAT_H__ */ diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_netbios_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_netbios_pxy.c new file mode 100644 index 0000000000..8bfa8fac47 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/netinet/ip_netbios_pxy.c @@ -0,0 +1,118 @@ +/* + * Simple netbios-dgm transparent proxy for in-kernel use. + * For use with the NAT code. + * $Id: ip_netbios_pxy.c,v 2.8.2.1 2005/07/15 21:56:51 darrenr Exp $ + */ + +/*- + * Copyright (c) 2002-2003 Paul J. Ledbetter III + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: ip_netbios_pxy.c,v 2.8.2.1 2005/07/15 21:56:51 darrenr Exp $ + */ + +#define IPF_NETBIOS_PROXY + +int ippr_netbios_init __P((void)); +void ippr_netbios_fini __P((void)); +int ippr_netbios_out __P((fr_info_t *, ap_session_t *, nat_t *)); + +static frentry_t netbiosfr; + +int netbios_proxy_init = 0; + +/* + * Initialize local structures. + */ +int ippr_netbios_init() +{ + bzero((char *)&netbiosfr, sizeof(netbiosfr)); + netbiosfr.fr_ref = 1; + netbiosfr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; + MUTEX_INIT(&netbiosfr.fr_lock, "NETBIOS proxy rule lock"); + netbios_proxy_init = 1; + + return 0; +} + + +void ippr_netbios_fini() +{ + if (netbios_proxy_init == 1) { + MUTEX_DESTROY(&netbiosfr.fr_lock); + netbios_proxy_init = 0; + } +} + + +int ippr_netbios_out(fin, aps, nat) +fr_info_t *fin; +ap_session_t *aps; +nat_t *nat; +{ + char dgmbuf[6]; + int off, dlen; + udphdr_t *udp; + ip_t *ip; + mb_t *m; + + aps = aps; /* LINT */ + nat = nat; /* LINT */ + + m = fin->fin_m; + dlen = fin->fin_dlen - sizeof(*udp); + /* + * no net bios datagram could possibly be shorter than this + */ + if (dlen < 11) + return 0; + + ip = fin->fin_ip; + udp = (udphdr_t *)fin->fin_dp; + off = (char *)udp - (char *)ip + sizeof(*udp) + fin->fin_ipoff; + + /* + * move past the + * ip header; + * udp header; + * 4 bytes into the net bios dgm header. + * According to rfc1002, this should be the exact location of + * the source address/port + */ + off += 4; + + /* Copy NATed source Address/port*/ + dgmbuf[0] = (char)((ip->ip_src.s_addr ) &0xFF); + dgmbuf[1] = (char)((ip->ip_src.s_addr >> 8) &0xFF); + dgmbuf[2] = (char)((ip->ip_src.s_addr >> 16)&0xFF); + dgmbuf[3] = (char)((ip->ip_src.s_addr >> 24)&0xFF); + + dgmbuf[4] = (char)((udp->uh_sport )&0xFF); + dgmbuf[5] = (char)((udp->uh_sport >> 8)&0xFF); + + /* replace data in packet */ + COPYBACK(m, off, sizeof(dgmbuf), dgmbuf); + + return 0; +} diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_pool.h b/usr/src/uts/common/inet/ipf/netinet/ip_pool.h new file mode 100644 index 0000000000..b40ba2b0fb --- /dev/null +++ b/usr/src/uts/common/inet/ipf/netinet/ip_pool.h @@ -0,0 +1,91 @@ +/* + * Copyright (C) 1993-2001, 2003 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * $Id: ip_pool.h,v 2.26.2.3 2005/06/12 07:18:27 darrenr Exp $ + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifndef __IP_POOL_H__ +#define __IP_POOL_H__ + +#if defined(_KERNEL) && !defined(__osf__) && !defined(__hpux) && \ + !defined(linux) && !defined(sun) && !defined(AIX) +# include <net/radix.h> +extern void rn_freehead __P((struct radix_node_head *)); +# define FreeS(p, z) KFREES(p, z) +extern int max_keylen; +#else +# if defined(__osf__) || defined(__hpux) +# include "radix_ipf_local.h" +# define radix_mask ipf_radix_mask +# define radix_node ipf_radix_node +# define radix_node_head ipf_radix_node_head +# else +# include "radix_ipf.h" +# endif +#endif +#include "netinet/ip_lookup.h" + +#define IP_POOL_NOMATCH 0 +#define IP_POOL_POSITIVE 1 + +typedef struct ip_pool_node { + struct radix_node ipn_nodes[2]; + addrfamily_t ipn_addr; + addrfamily_t ipn_mask; + int ipn_info; + char ipn_name[FR_GROUPLEN]; + u_long ipn_hits; + struct ip_pool_node *ipn_next, **ipn_pnext; +} ip_pool_node_t; + + +typedef struct ip_pool_s { + struct ip_pool_s *ipo_next; + struct ip_pool_s **ipo_pnext; + struct radix_node_head *ipo_head; + ip_pool_node_t *ipo_list; + u_long ipo_hits; + int ipo_unit; + int ipo_flags; + int ipo_ref; + char ipo_name[FR_GROUPLEN]; +} ip_pool_t; + +#define IPOOL_ANON 0x80000000 + + +typedef struct ip_pool_stat { + u_long ipls_pools; + u_long ipls_tables; + u_long ipls_nodes; + ip_pool_t *ipls_list[IPL_LOGSIZE]; +} ip_pool_stat_t; + + +extern ip_pool_stat_t ipoolstat; +extern ip_pool_t *ip_pool_list[IPL_LOGSIZE]; + +extern int ip_pool_search __P((void *, int, void *)); +extern int ip_pool_init __P((void)); +extern void ip_pool_fini __P((void)); +extern int ip_pool_create __P((iplookupop_t *)); +extern int ip_pool_insert __P((ip_pool_t *, addrfamily_t *, + addrfamily_t *, int)); +extern int ip_pool_remove __P((ip_pool_t *, ip_pool_node_t *)); +extern int ip_pool_destroy __P((iplookupop_t *)); +extern void ip_pool_free __P((ip_pool_t *)); +extern void ip_pool_deref __P((ip_pool_t *)); +extern void *ip_pool_find __P((int, char *)); +extern ip_pool_node_t *ip_pool_findeq __P((ip_pool_t *, + addrfamily_t *, addrfamily_t *)); +extern int ip_pool_flush __P((iplookupflush_t *)); +extern int ip_pool_statistics __P((iplookupop_t *)); + +#endif /* __IP_POOL_H__ */ diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_pptp_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_pptp_pxy.c new file mode 100644 index 0000000000..480edf1a00 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/netinet/ip_pptp_pxy.c @@ -0,0 +1,528 @@ +/* + * Copyright (C) 2002-2003 by Darren Reed + * + * Simple PPTP transparent proxy for in-kernel use. For use with the NAT + * code. + * + * $Id: ip_pptp_pxy.c,v 2.10.2.10 2005/07/15 21:56:52 darrenr Exp $ + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#define IPF_PPTP_PROXY + +typedef struct pptp_hdr { + u_short pptph_len; + u_short pptph_type; + u_32_t pptph_cookie; +} pptp_hdr_t; + +#define PPTP_MSGTYPE_CTL 1 +#define PPTP_MTCTL_STARTREQ 1 +#define PPTP_MTCTL_STARTREP 2 +#define PPTP_MTCTL_STOPREQ 3 +#define PPTP_MTCTL_STOPREP 4 +#define PPTP_MTCTL_ECHOREQ 5 +#define PPTP_MTCTL_ECHOREP 6 +#define PPTP_MTCTL_OUTREQ 7 +#define PPTP_MTCTL_OUTREP 8 +#define PPTP_MTCTL_INREQ 9 +#define PPTP_MTCTL_INREP 10 +#define PPTP_MTCTL_INCONNECT 11 +#define PPTP_MTCTL_CLEAR 12 +#define PPTP_MTCTL_DISCONNECT 13 +#define PPTP_MTCTL_WANERROR 14 +#define PPTP_MTCTL_LINKINFO 15 + + +int ippr_pptp_init __P((void)); +void ippr_pptp_fini __P((void)); +int ippr_pptp_new __P((fr_info_t *, ap_session_t *, nat_t *)); +void ippr_pptp_del __P((ap_session_t *)); +int ippr_pptp_inout __P((fr_info_t *, ap_session_t *, nat_t *)); +void ippr_pptp_donatstate __P((fr_info_t *, nat_t *, pptp_pxy_t *)); +int ippr_pptp_message __P((fr_info_t *, nat_t *, pptp_pxy_t *, pptp_side_t *)); +int ippr_pptp_nextmessage __P((fr_info_t *, nat_t *, pptp_pxy_t *, int)); +int ippr_pptp_mctl __P((fr_info_t *, nat_t *, pptp_pxy_t *, pptp_side_t *)); + +static frentry_t pptpfr; + +int pptp_proxy_init = 0; +int ippr_pptp_debug = 0; +int ippr_pptp_gretimeout = IPF_TTLVAL(120); /* 2 minutes */ + + +/* + * PPTP application proxy initialization. + */ +int ippr_pptp_init() +{ + bzero((char *)&pptpfr, sizeof(pptpfr)); + pptpfr.fr_ref = 1; + pptpfr.fr_age[0] = ippr_pptp_gretimeout; + pptpfr.fr_age[1] = ippr_pptp_gretimeout; + pptpfr.fr_flags = FR_OUTQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; + MUTEX_INIT(&pptpfr.fr_lock, "PPTP proxy rule lock"); + pptp_proxy_init = 1; + + return 0; +} + + +void ippr_pptp_fini() +{ + if (pptp_proxy_init == 1) { + MUTEX_DESTROY(&pptpfr.fr_lock); + pptp_proxy_init = 0; + } +} + + +/* + * Setup for a new PPTP proxy. + */ +int ippr_pptp_new(fin, aps, nat) +fr_info_t *fin; +ap_session_t *aps; +nat_t *nat; +{ + pptp_pxy_t *pptp; + ipnat_t *ipn; + ip_t *ip; + + ip = fin->fin_ip; + + if (nat_outlookup(fin, 0, IPPROTO_GRE, nat->nat_inip, + ip->ip_dst) != NULL) { + if (ippr_pptp_debug > 0) + printf("ippr_pptp_new: GRE session already exists\n"); + return -1; + } + + aps->aps_psiz = sizeof(*pptp); + KMALLOCS(aps->aps_data, pptp_pxy_t *, sizeof(*pptp)); + if (aps->aps_data == NULL) { + if (ippr_pptp_debug > 0) + printf("ippr_pptp_new: malloc for aps_data failed\n"); + return -1; + } + + /* + * Create NAT rule against which the tunnel/transport mapping is + * created. This is required because the current NAT rule does not + * describe GRE but TCP instead. + */ + pptp = aps->aps_data; + bzero((char *)pptp, sizeof(*pptp)); + ipn = &pptp->pptp_rule; + ipn->in_ifps[0] = fin->fin_ifp; + ipn->in_apr = NULL; + ipn->in_use = 1; + ipn->in_hits = 1; + ipn->in_ippip = 1; + if (nat->nat_dir == NAT_OUTBOUND) { + ipn->in_nip = ntohl(nat->nat_outip.s_addr); + ipn->in_outip = fin->fin_saddr; + ipn->in_redir = NAT_MAP; + } else if (nat->nat_dir == NAT_INBOUND) { + ipn->in_nip = 0; + ipn->in_outip = nat->nat_outip.s_addr; + ipn->in_redir = NAT_REDIRECT; + } + ipn->in_inip = nat->nat_inip.s_addr; + ipn->in_inmsk = 0xffffffff; + ipn->in_outmsk = 0xffffffff; + ipn->in_srcip = fin->fin_saddr; + ipn->in_srcmsk = 0xffffffff; + bcopy(nat->nat_ptr->in_ifnames[0], ipn->in_ifnames[0], + sizeof(ipn->in_ifnames[0])); + ipn->in_p = IPPROTO_GRE; + + pptp->pptp_side[0].pptps_wptr = pptp->pptp_side[0].pptps_buffer; + pptp->pptp_side[1].pptps_wptr = pptp->pptp_side[1].pptps_buffer; + return 0; +} + + +void ippr_pptp_donatstate(fin, nat, pptp) +fr_info_t *fin; +nat_t *nat; +pptp_pxy_t *pptp; +{ + fr_info_t fi; + grehdr_t gre; + nat_t *nat2; + u_char p; + ip_t *ip; + + ip = fin->fin_ip; + p = ip->ip_p; + + nat2 = pptp->pptp_nat; + if ((nat2 == NULL) || (pptp->pptp_state == NULL)) { + bcopy((char *)fin, (char *)&fi, sizeof(fi)); + bzero((char *)&gre, sizeof(gre)); + fi.fin_state = NULL; + fi.fin_nat = NULL; + fi.fin_fi.fi_p = IPPROTO_GRE; + fi.fin_fr = &pptpfr; + if ((nat->nat_dir == NAT_OUTBOUND && fin->fin_out) || + (nat->nat_dir == NAT_INBOUND && !fin->fin_out)) { + fi.fin_data[0] = pptp->pptp_call[0]; + fi.fin_data[1] = pptp->pptp_call[1]; + } else { + fi.fin_data[0] = pptp->pptp_call[1]; + fi.fin_data[1] = pptp->pptp_call[0]; + } + ip = fin->fin_ip; + ip->ip_p = IPPROTO_GRE; + fi.fin_flx &= ~(FI_TCPUDP|FI_STATE|FI_FRAG); + fi.fin_flx |= FI_IGNORE; + fi.fin_dp = &gre; + gre.gr_flags = htons(1 << 13); + if (fin->fin_out && nat->nat_dir == NAT_INBOUND) { + fi.fin_fi.fi_saddr = fin->fin_fi.fi_daddr; + fi.fin_fi.fi_daddr = nat->nat_outip.s_addr; + } else if (!fin->fin_out && nat->nat_dir == NAT_OUTBOUND) { + fi.fin_fi.fi_saddr = nat->nat_inip.s_addr; + fi.fin_fi.fi_daddr = fin->fin_fi.fi_saddr; + } + } + + /* + * Update NAT timeout/create NAT if missing. + */ + if (nat2 != NULL) + fr_queueback(&nat2->nat_tqe); + else { + nat2 = nat_new(&fi, &pptp->pptp_rule, &pptp->pptp_nat, + NAT_SLAVE, nat->nat_dir); + pptp->pptp_nat = nat2; + if (nat2 != NULL) { + (void) nat_proto(&fi, nat2, 0); + nat_update(&fi, nat2, nat2->nat_ptr); + } + } + + READ_ENTER(&ipf_state); + if (pptp->pptp_state != NULL) { + fr_queueback(&pptp->pptp_state->is_sti); + RWLOCK_EXIT(&ipf_state); + } else { + RWLOCK_EXIT(&ipf_state); + if (nat->nat_dir == NAT_INBOUND) + fi.fin_fi.fi_daddr = nat2->nat_inip.s_addr; + else + fi.fin_fi.fi_saddr = nat2->nat_inip.s_addr; + fi.fin_ifp = NULL; + pptp->pptp_state = fr_addstate(&fi, &pptp->pptp_state, + 0); + if (fi.fin_state != NULL) + fr_statederef(&fi, (ipstate_t **)&fi.fin_state); + } + ip->ip_p = p; + return; +} + + +/* + * Try and build up the next PPTP message in the TCP stream and if we can + * build it up completely (fits in our buffer) then pass it off to the message + * parsing function. + */ +int ippr_pptp_nextmessage(fin, nat, pptp, rev) +fr_info_t *fin; +nat_t *nat; +pptp_pxy_t *pptp; +int rev; +{ + static char *funcname = "ippr_pptp_nextmessage"; + pptp_side_t *pptps; + u_32_t start, end; + pptp_hdr_t *hdr; + tcphdr_t *tcp; + int dlen, off; + u_short len; + char *msg; + + tcp = fin->fin_dp; + dlen = fin->fin_dlen - (TCP_OFF(tcp) << 2); + start = ntohl(tcp->th_seq); + pptps = &pptp->pptp_side[rev]; + off = (char *)tcp - (char *)fin->fin_ip + (TCP_OFF(tcp) << 2) + + fin->fin_ipoff; + + if (dlen <= 0) + return 0; + /* + * If the complete data packet is before what we expect to see + * "next", just ignore it as the chances are we've already seen it. + * The next if statement following this one really just causes packets + * ahead of what we've seen to be dropped, implying that something in + * the middle went missing and we want to see that first. + */ + end = start + dlen; + if (pptps->pptps_next > end && pptps->pptps_next > start) + return 0; + + if (pptps->pptps_next != start) { + if (ippr_pptp_debug > 5) + printf("%s: next (%x) != start (%x)\n", funcname, + pptps->pptps_next, start); + return -1; + } + + msg = (char *)fin->fin_dp + (TCP_OFF(tcp) << 2); + + while (dlen > 0) { + off += pptps->pptps_bytes; + if (pptps->pptps_gothdr == 0) { + /* + * PPTP has an 8 byte header that inclues the cookie. + * The start of every message should include one and + * it should match 1a2b3c4d. Byte order is ignored, + * deliberately, when printing out the error. + */ + len = MIN(8 - pptps->pptps_bytes, dlen); + COPYDATA(fin->fin_m, off, len, pptps->pptps_wptr); + pptps->pptps_bytes += len; + pptps->pptps_wptr += len; + hdr = (pptp_hdr_t *)pptps->pptps_buffer; + if (pptps->pptps_bytes == 8) { + pptps->pptps_next += 8; + if (ntohl(hdr->pptph_cookie) != 0x1a2b3c4d) { + if (ippr_pptp_debug > 1) + printf("%s: bad cookie (%x)\n", + funcname, + hdr->pptph_cookie); + return -1; + } + } + dlen -= len; + msg += len; + off += len; + + pptps->pptps_gothdr = 1; + len = ntohs(hdr->pptph_len); + pptps->pptps_len = len; + pptps->pptps_nexthdr += len; + + /* + * If a message is too big for the buffer, just set + * the fields for the next message to come along. + * The messages defined in RFC 2637 will not exceed + * 512 bytes (in total length) so this is likely a + * bad data packet, anyway. + */ + if (len > sizeof(pptps->pptps_buffer)) { + if (ippr_pptp_debug > 3) + printf("%s: message too big (%d)\n", + funcname, len); + pptps->pptps_next = pptps->pptps_nexthdr; + pptps->pptps_wptr = pptps->pptps_buffer; + pptps->pptps_gothdr = 0; + pptps->pptps_bytes = 0; + pptps->pptps_len = 0; + break; + } + } + + len = MIN(pptps->pptps_len - pptps->pptps_bytes, dlen); + COPYDATA(fin->fin_m, off, len, pptps->pptps_wptr); + pptps->pptps_bytes += len; + pptps->pptps_wptr += len; + pptps->pptps_next += len; + + if (pptps->pptps_len > pptps->pptps_bytes) + break; + + (void) ippr_pptp_message(fin, nat, pptp, pptps); + pptps->pptps_wptr = pptps->pptps_buffer; + pptps->pptps_gothdr = 0; + pptps->pptps_bytes = 0; + pptps->pptps_len = 0; + + start += len; + msg += len; + dlen -= len; + } + + return 0; +} + + +/* + * handle a complete PPTP message + */ +int ippr_pptp_message(fin, nat, pptp, pptps) +fr_info_t *fin; +nat_t *nat; +pptp_pxy_t *pptp; +pptp_side_t *pptps; +{ + pptp_hdr_t *hdr = (pptp_hdr_t *)pptps->pptps_buffer; + + switch (ntohs(hdr->pptph_type)) + { + case PPTP_MSGTYPE_CTL : + (void) ippr_pptp_mctl(fin, nat, pptp, pptps); + break; + + default : + break; + } + return 0; +} + + +/* + * handle a complete PPTP control message + */ +int ippr_pptp_mctl(fin, nat, pptp, pptps) +fr_info_t *fin; +nat_t *nat; +pptp_pxy_t *pptp; +pptp_side_t *pptps; +{ + u_short *buffer = (u_short *)(pptps->pptps_buffer); + pptp_side_t *pptpo; + + if (pptps == &pptp->pptp_side[0]) + pptpo = &pptp->pptp_side[1]; + else + pptpo = &pptp->pptp_side[0]; + + /* + * Breakout to handle all the various messages. Most are just state + * transition. + */ + switch (ntohs(buffer[4])) + { + case PPTP_MTCTL_STARTREQ : + pptps->pptps_state = PPTP_MTCTL_STARTREQ; + break; + case PPTP_MTCTL_STARTREP : + if (pptpo->pptps_state == PPTP_MTCTL_STARTREQ) + pptps->pptps_state = PPTP_MTCTL_STARTREP; + break; + case PPTP_MTCTL_STOPREQ : + pptps->pptps_state = PPTP_MTCTL_STOPREQ; + break; + case PPTP_MTCTL_STOPREP : + if (pptpo->pptps_state == PPTP_MTCTL_STOPREQ) + pptps->pptps_state = PPTP_MTCTL_STOPREP; + break; + case PPTP_MTCTL_ECHOREQ : + pptps->pptps_state = PPTP_MTCTL_ECHOREQ; + break; + case PPTP_MTCTL_ECHOREP : + if (pptpo->pptps_state == PPTP_MTCTL_ECHOREQ) + pptps->pptps_state = PPTP_MTCTL_ECHOREP; + break; + case PPTP_MTCTL_OUTREQ : + pptps->pptps_state = PPTP_MTCTL_OUTREQ; + break; + case PPTP_MTCTL_OUTREP : + if (pptpo->pptps_state == PPTP_MTCTL_OUTREQ) { + pptps->pptps_state = PPTP_MTCTL_OUTREP; + pptp->pptp_call[0] = buffer[7]; + pptp->pptp_call[1] = buffer[6]; + ippr_pptp_donatstate(fin, nat, pptp); + } + break; + case PPTP_MTCTL_INREQ : + pptps->pptps_state = PPTP_MTCTL_INREQ; + break; + case PPTP_MTCTL_INREP : + if (pptpo->pptps_state == PPTP_MTCTL_INREQ) { + pptps->pptps_state = PPTP_MTCTL_INREP; + pptp->pptp_call[0] = buffer[7]; + pptp->pptp_call[1] = buffer[6]; + ippr_pptp_donatstate(fin, nat, pptp); + } + break; + case PPTP_MTCTL_INCONNECT : + pptps->pptps_state = PPTP_MTCTL_INCONNECT; + break; + case PPTP_MTCTL_CLEAR : + pptps->pptps_state = PPTP_MTCTL_CLEAR; + break; + case PPTP_MTCTL_DISCONNECT : + pptps->pptps_state = PPTP_MTCTL_DISCONNECT; + break; + case PPTP_MTCTL_WANERROR : + pptps->pptps_state = PPTP_MTCTL_WANERROR; + break; + case PPTP_MTCTL_LINKINFO : + pptps->pptps_state = PPTP_MTCTL_LINKINFO; + break; + } + + return 0; +} + + +/* + * For outgoing PPTP packets. refresh timeouts for NAT & state entries, if + * we can. If they have disappeared, recreate them. + */ +int ippr_pptp_inout(fin, aps, nat) +fr_info_t *fin; +ap_session_t *aps; +nat_t *nat; +{ + pptp_pxy_t *pptp; + tcphdr_t *tcp; + int rev; + + if ((fin->fin_out == 1) && (nat->nat_dir == NAT_INBOUND)) + rev = 1; + else if ((fin->fin_out == 0) && (nat->nat_dir == NAT_OUTBOUND)) + rev = 1; + else + rev = 0; + + tcp = (tcphdr_t *)fin->fin_dp; + if ((tcp->th_flags & TH_OPENING) == TH_OPENING) { + pptp = (pptp_pxy_t *)aps->aps_data; + pptp->pptp_side[1 - rev].pptps_next = ntohl(tcp->th_ack); + pptp->pptp_side[1 - rev].pptps_nexthdr = ntohl(tcp->th_ack); + pptp->pptp_side[rev].pptps_next = ntohl(tcp->th_seq) + 1; + pptp->pptp_side[rev].pptps_nexthdr = ntohl(tcp->th_seq) + 1; + } + return ippr_pptp_nextmessage(fin, nat, (pptp_pxy_t *)aps->aps_data, + rev); +} + + +/* + * clean up after ourselves. + */ +void ippr_pptp_del(aps) +ap_session_t *aps; +{ + pptp_pxy_t *pptp; + + pptp = aps->aps_data; + + if (pptp != NULL) { + /* + * Don't bother changing any of the NAT structure details, + * *_del() is on a callback from aps_free(), from nat_delete() + */ + + READ_ENTER(&ipf_state); + if (pptp->pptp_state != NULL) { + pptp->pptp_state->is_die = fr_ticks + 1; + pptp->pptp_state->is_me = NULL; + fr_queuefront(&pptp->pptp_state->is_sti); + } + RWLOCK_EXIT(&ipf_state); + + pptp->pptp_state = NULL; + pptp->pptp_nat = NULL; + } +} diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_proxy.h b/usr/src/uts/common/inet/ipf/netinet/ip_proxy.h new file mode 100644 index 0000000000..1e0bedef64 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/netinet/ip_proxy.h @@ -0,0 +1,458 @@ +/* + * Copyright (C) 1997-2001 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * $Id: ip_proxy.h,v 2.31.2.3 2005/06/18 02:41:33 darrenr Exp $ + */ + +#ifndef __IP_PROXY_H__ +#define __IP_PROXY_H__ + +#ifndef SOLARIS +#define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4))) +#endif + +#if defined(__STDC__) || defined(__GNUC__) || defined(_AIX51) +#define SIOCPROXY _IOWR('r', 64, struct ap_control) +#else +#define SIOCPROXY _IOWR(r, 64, struct ap_control) +#endif + +#ifndef APR_LABELLEN +#define APR_LABELLEN 16 +#endif +#define AP_SESS_SIZE 53 + +struct nat; +struct ipnat; +struct ipstate; + +typedef struct ap_tcp { + u_short apt_sport; /* source port */ + u_short apt_dport; /* destination port */ + short apt_sel[2]; /* {seq,ack}{off,min} set selector */ + short apt_seqoff[2]; /* sequence # difference */ + u_32_t apt_seqmin[2]; /* don't change seq-off until after this */ + short apt_ackoff[2]; /* sequence # difference */ + u_32_t apt_ackmin[2]; /* don't change seq-off until after this */ + u_char apt_state[2]; /* connection state */ +} ap_tcp_t; + +typedef struct ap_udp { + u_short apu_sport; /* source port */ + u_short apu_dport; /* destination port */ +} ap_udp_t; + +typedef struct ap_session { + struct aproxy *aps_apr; + union { + struct ap_tcp apu_tcp; + struct ap_udp apu_udp; + } aps_un; + u_int aps_flags; + U_QUAD_T aps_bytes; /* bytes sent */ + U_QUAD_T aps_pkts; /* packets sent */ + void *aps_nat; /* pointer back to nat struct */ + void *aps_data; /* private data */ + int aps_p; /* protocol */ + int aps_psiz; /* size of private data */ + struct ap_session *aps_hnext; + struct ap_session *aps_next; +} ap_session_t; + +#define aps_sport aps_un.apu_tcp.apt_sport +#define aps_dport aps_un.apu_tcp.apt_dport +#define aps_sel aps_un.apu_tcp.apt_sel +#define aps_seqoff aps_un.apu_tcp.apt_seqoff +#define aps_seqmin aps_un.apu_tcp.apt_seqmin +#define aps_state aps_un.apu_tcp.apt_state +#define aps_ackoff aps_un.apu_tcp.apt_ackoff +#define aps_ackmin aps_un.apu_tcp.apt_ackmin + + +typedef struct ap_control { + char apc_label[APR_LABELLEN]; + u_char apc_p; + /* + * The following fields are upto the proxy's apr_ctl routine to deal + * with. When the proxy gets this in kernel space, apc_data will + * point to a malloc'd region of memory of apc_dsize bytes. If the + * proxy wants to keep that memory, it must set apc_data to NULL + * before it returns. It is expected if this happens that it will + * take care to free it in apr_fini or otherwise as appropriate. + * apc_cmd is provided as a standard place to put simple commands, + * with apc_arg being available to put a simple arg. + */ + u_long apc_cmd; + u_long apc_arg; + void *apc_data; + size_t apc_dsize; +} ap_ctl_t; + + +typedef struct aproxy { + struct aproxy *apr_next; + char apr_label[APR_LABELLEN]; /* Proxy label # */ + u_char apr_p; /* protocol */ + int apr_ref; /* +1 per rule referencing it */ + int apr_flags; + int (* apr_init) __P((void)); + void (* apr_fini) __P((void)); + int (* apr_new) __P((fr_info_t *, ap_session_t *, struct nat *)); + void (* apr_del) __P((ap_session_t *)); + int (* apr_inpkt) __P((fr_info_t *, ap_session_t *, struct nat *)); + int (* apr_outpkt) __P((fr_info_t *, ap_session_t *, struct nat *)); + int (* apr_match) __P((fr_info_t *, ap_session_t *, struct nat *)); + int (* apr_ctl) __P((struct aproxy *, struct ap_control *)); +} aproxy_t; + +#define APR_DELETE 1 + +#define APR_ERR(x) ((x) << 16) +#define APR_EXIT(x) (((x) >> 16) & 0xffff) +#define APR_INC(x) ((x) & 0xffff) + +/* + * Generic #define's to cover missing things in the kernel + */ +#ifndef isdigit +#define isdigit(x) ((x) >= '0' && (x) <= '9') +#endif +#ifndef isupper +#define isupper(x) (((unsigned)(x) >= 'A') && ((unsigned)(x) <= 'Z')) +#endif +#ifndef islower +#define islower(x) (((unsigned)(x) >= 'a') && ((unsigned)(x) <= 'z')) +#endif +#ifndef isalpha +#define isalpha(x) (isupper(x) || islower(x)) +#endif +#ifndef toupper +#define toupper(x) (isupper(x) ? (x) : (x) - 'a' + 'A') +#endif +#ifndef isspace +#define isspace(x) (((x) == ' ') || ((x) == '\r') || ((x) == '\n') || \ + ((x) == '\t') || ((x) == '\b')) +#endif + +/* + * This is the scratch buffer size used to hold strings from the TCP stream + * that we may want to parse. It's an arbitrary size, really, but it must + * be at least as large as IPF_FTPBUFSZ. + */ +#define FTP_BUFSZ 120 + +/* + * This buffer, however, doesn't need to be nearly so big. It just needs to + * be able to squeeze in the largest command it needs to rewrite, Which ones + * does it rewrite? EPRT, PORT, 227 replies. + */ +#define IPF_FTPBUFSZ 80 /* This *MUST* be >= 53! */ + +typedef struct ftpside { + char *ftps_rptr; + char *ftps_wptr; + void *ftps_ifp; + u_32_t ftps_seq[2]; + u_32_t ftps_len; + int ftps_junk; /* 2 = no cr/lf yet, 1 = cannot parse */ + int ftps_cmds; + char ftps_buf[FTP_BUFSZ]; +} ftpside_t; + +typedef struct ftpinfo { + int ftp_passok; + int ftp_incok; + ftpside_t ftp_side[2]; +} ftpinfo_t; + + +/* + * For the irc proxy. + */ +typedef struct ircinfo { + size_t irc_len; + char *irc_snick; + char *irc_dnick; + char *irc_type; + char *irc_arg; + char *irc_addr; + u_32_t irc_ipnum; + u_short irc_port; +} ircinfo_t; + + +/* + * Real audio proxy structure and #defines + */ +typedef struct raudio_s { + int rap_seenpna; + int rap_seenver; + int rap_version; + int rap_eos; /* End Of Startup */ + int rap_gotid; + int rap_gotlen; + int rap_mode; + int rap_sdone; + u_short rap_plport; + u_short rap_prport; + u_short rap_srport; + char rap_svr[19]; + u_32_t rap_sbf; /* flag to indicate which of the 19 bytes have + * been filled + */ + u_32_t rap_sseq; +} raudio_t; + +#define RA_ID_END 0 +#define RA_ID_UDP 1 +#define RA_ID_ROBUST 7 + +#define RAP_M_UDP 1 +#define RAP_M_ROBUST 2 +#define RAP_M_TCP 4 +#define RAP_M_UDP_ROBUST (RAP_M_UDP|RAP_M_ROBUST) + + +/* + * MSN RPC proxy + */ +typedef struct msnrpcinfo { + u_int mri_flags; + int mri_cmd[2]; + u_int mri_valid; + struct in_addr mri_raddr; + u_short mri_rport; +} msnrpcinfo_t; + + +/* + * IPSec proxy + */ +typedef u_32_t ipsec_cookie_t[2]; + +typedef struct ipsec_pxy { + ipsec_cookie_t ipsc_icookie; + ipsec_cookie_t ipsc_rcookie; + int ipsc_rckset; + ipnat_t ipsc_rule; + nat_t *ipsc_nat; + struct ipstate *ipsc_state; +} ipsec_pxy_t; + +/* + * PPTP proxy + */ +typedef struct pptp_side { + u_32_t pptps_nexthdr; + u_32_t pptps_next; + int pptps_state; + int pptps_gothdr; + int pptps_len; + int pptps_bytes; + char *pptps_wptr; + char pptps_buffer[512]; +} pptp_side_t; + +typedef struct pptp_pxy { + ipnat_t pptp_rule; + nat_t *pptp_nat; + struct ipstate *pptp_state; + u_short pptp_call[2]; + pptp_side_t pptp_side[2]; +} pptp_pxy_t; + + +/* + * Sun RPCBIND proxy + */ +#define RPCB_MAXMSG 888 +#define RPCB_RES_PMAP 0 /* Response contains a v2 port. */ +#define RPCB_RES_STRING 1 /* " " " v3 (GETADDR) string. */ +#define RPCB_RES_LIST 2 /* " " " v4 (GETADDRLIST) list. */ +#define RPCB_MAXREQS 32 /* Arbitrary limit on tracked transactions */ + +#define RPCB_REQMIN 40 +#define RPCB_REQMAX 888 +#define RPCB_REPMIN 20 +#define RPCB_REPMAX 604 /* XXX double check this! */ + +/* + * These macros determine the number of bytes between p and the end of + * r->rs_buf relative to l. + */ +#define RPCB_BUF_END(r) (char *)((r)->rm_msgbuf + (r)->rm_buflen) +#define RPCB_BUF_GEQ(r, p, l) \ + ((RPCB_BUF_END((r)) > (char *)(p)) && \ + ((RPCB_BUF_END((r)) - (char *)(p)) >= (l))) +#define RPCB_BUF_EQ(r, p, l) \ + (RPCB_BUF_END((r)) == ((char *)(p) + (l))) + +/* + * The following correspond to RPC(B) detailed in RFC183[13]. + */ +#define RPCB_CALL 0 +#define RPCB_REPLY 1 +#define RPCB_MSG_VERSION 2 +#define RPCB_PROG 100000 +#define RPCB_GETPORT 3 +#define RPCB_GETADDR 3 +#define RPCB_GETADDRLIST 11 +#define RPCB_MSG_ACCEPTED 0 +#define RPCB_MSG_DENIED 1 + +/* BEGIN (Generic XDR structures) */ +typedef struct xdr_string { + u_32_t *xs_len; + char *xs_str; +} xdr_string_t; + +typedef struct xdr_auth { + /* u_32_t xa_flavor; */ + xdr_string_t xa_string; +} xdr_auth_t; + +typedef struct xdr_uaddr { + u_32_t xu_ip; + u_short xu_port; + xdr_string_t xu_str; +} xdr_uaddr_t; + +typedef struct xdr_proto { + u_int xp_proto; + xdr_string_t xp_str; +} xdr_proto_t; + +#define xu_xslen xu_str.xs_len +#define xu_xsstr xu_str.xs_str +#define xp_xslen xp_str.xs_len +#define xp_xsstr xp_str.xs_str +/* END (Generic XDR structures) */ + +/* BEGIN (RPC call structures) */ +typedef struct pmap_args { + /* u_32_t pa_prog; */ + /* u_32_t pa_vers; */ + u_32_t *pa_prot; + /* u_32_t pa_port; */ +} pmap_args_t; + +typedef struct rpcb_args { + /* u_32_t *ra_prog; */ + /* u_32_t *ra_vers; */ + xdr_proto_t ra_netid; + xdr_uaddr_t ra_maddr; + /* xdr_string_t ra_owner; */ +} rpcb_args_t; + +typedef struct rpc_call { + /* u_32_t rc_rpcvers; */ + /* u_32_t rc_prog; */ + u_32_t *rc_vers; + u_32_t *rc_proc; + xdr_auth_t rc_authcred; + xdr_auth_t rc_authverf; + union { + pmap_args_t ra_pmapargs; + rpcb_args_t ra_rpcbargs; + } rpcb_args; +} rpc_call_t; + +#define rc_pmapargs rpcb_args.ra_pmapargs +#define rc_rpcbargs rpcb_args.ra_rpcbargs +/* END (RPC call structures) */ + +/* BEGIN (RPC reply structures) */ +typedef struct rpcb_entry { + xdr_uaddr_t re_maddr; + xdr_proto_t re_netid; + /* u_32_t re_semantics; */ + xdr_string_t re_family; + xdr_proto_t re_proto; + u_32_t *re_more; /* 1 == another entry follows */ +} rpcb_entry_t; + +typedef struct rpcb_listp { + u_32_t *rl_list; /* 1 == list follows */ + int rl_cnt; + rpcb_entry_t rl_entries[2]; /* TCP / UDP only */ +} rpcb_listp_t; + +typedef struct rpc_resp { + /* u_32_t rr_acceptdeny; */ + /* Omitted 'message denied' fork; we don't care about rejects. */ + xdr_auth_t rr_authverf; + /* u_32_t *rr_astat; */ + union { + u_32_t *resp_pmap; + xdr_uaddr_t resp_getaddr; + rpcb_listp_t resp_getaddrlist; + } rpcb_reply; +} rpc_resp_t; + +#define rr_v2 rpcb_reply.resp_pmap +#define rr_v3 rpcb_reply.resp_getaddr +#define rr_v4 rpcb_reply.resp_getaddrlist +/* END (RPC reply structures) */ + +/* BEGIN (RPC message structure & macros) */ +typedef struct rpc_msg { + char rm_msgbuf[RPCB_MAXMSG]; /* RPCB data buffer */ + u_int rm_buflen; + u_32_t *rm_xid; + /* u_32_t Call vs Reply */ + union { + rpc_call_t rb_call; + rpc_resp_t rb_resp; + } rm_body; +} rpc_msg_t; + +#define rm_call rm_body.rb_call +#define rm_resp rm_body.rb_resp +/* END (RPC message structure & macros) */ + +/* + * These code paths aren't hot enough to warrant per transaction + * mutexes. + */ +typedef struct rpcb_xact { + struct rpcb_xact *rx_next; + struct rpcb_xact **rx_pnext; + u_32_t rx_xid; /* RPC transmission ID */ + u_int rx_type; /* RPCB response type */ + u_int rx_ref; /* reference count */ + u_int rx_proto; /* transport protocol (v2 only) */ +} rpcb_xact_t; + +typedef struct rpcb_session { + ipfmutex_t rs_rxlock; + rpcb_xact_t *rs_rxlist; +} rpcb_session_t; + +/* + * For an explanation, please see the following: + * RFC1832 - Sections 3.11, 4.4, and 4.5. + */ +#define XDRALIGN(x) ((((x) % 4) != 0) ? ((((x) + 3) / 4) * 4) : (x)) + +extern ap_session_t *ap_sess_tab[AP_SESS_SIZE]; +extern ap_session_t *ap_sess_list; +extern aproxy_t ap_proxies[]; +extern int ippr_ftp_pasvonly; + +extern int appr_add __P((aproxy_t *)); +extern int appr_ctl __P((ap_ctl_t *)); +extern int appr_del __P((aproxy_t *)); +extern int appr_init __P((void)); +extern void appr_unload __P((void)); +extern int appr_ok __P((fr_info_t *, tcphdr_t *, struct ipnat *)); +extern int appr_match __P((fr_info_t *, struct nat *)); +extern void appr_free __P((aproxy_t *)); +extern void aps_free __P((ap_session_t *)); +extern int appr_check __P((fr_info_t *, struct nat *)); +extern aproxy_t *appr_lookup __P((u_int, char *)); +extern int appr_new __P((fr_info_t *, struct nat *)); +extern int appr_ioctl __P((caddr_t, ioctlcmd_t, int)); + +#endif /* __IP_PROXY_H__ */ diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_raudio_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_raudio_pxy.c new file mode 100644 index 0000000000..a9abc5809b --- /dev/null +++ b/usr/src/uts/common/inet/ipf/netinet/ip_raudio_pxy.c @@ -0,0 +1,341 @@ +/* + * Copyright (C) 1998-2003 by Darren Reed + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * $Id: ip_raudio_pxy.c,v 1.40.2.3 2005/02/04 10:22:55 darrenr Exp $ + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#define IPF_RAUDIO_PROXY + + +int ippr_raudio_init __P((void)); +void ippr_raudio_fini __P((void)); +int ippr_raudio_new __P((fr_info_t *, ap_session_t *, nat_t *)); +int ippr_raudio_in __P((fr_info_t *, ap_session_t *, nat_t *)); +int ippr_raudio_out __P((fr_info_t *, ap_session_t *, nat_t *)); + +static frentry_t raudiofr; + +int raudio_proxy_init = 0; + + +/* + * Real Audio application proxy initialization. + */ +int ippr_raudio_init() +{ + bzero((char *)&raudiofr, sizeof(raudiofr)); + raudiofr.fr_ref = 1; + raudiofr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; + MUTEX_INIT(&raudiofr.fr_lock, "Real Audio proxy rule lock"); + raudio_proxy_init = 1; + + return 0; +} + + +void ippr_raudio_fini() +{ + if (raudio_proxy_init == 1) { + MUTEX_DESTROY(&raudiofr.fr_lock); + raudio_proxy_init = 0; + } +} + + +/* + * Setup for a new proxy to handle Real Audio. + */ +int ippr_raudio_new(fin, aps, nat) +fr_info_t *fin; +ap_session_t *aps; +nat_t *nat; +{ + raudio_t *rap; + + KMALLOCS(aps->aps_data, void *, sizeof(raudio_t)); + if (aps->aps_data == NULL) + return -1; + + fin = fin; /* LINT */ + nat = nat; /* LINT */ + + bzero(aps->aps_data, sizeof(raudio_t)); + rap = aps->aps_data; + aps->aps_psiz = sizeof(raudio_t); + rap->rap_mode = RAP_M_TCP; /* default is for TCP */ + return 0; +} + + + +int ippr_raudio_out(fin, aps, nat) +fr_info_t *fin; +ap_session_t *aps; +nat_t *nat; +{ + raudio_t *rap = aps->aps_data; + unsigned char membuf[512 + 1], *s; + u_short id = 0; + tcphdr_t *tcp; + int off, dlen; + int len = 0; + mb_t *m; + + nat = nat; /* LINT */ + + /* + * If we've already processed the start messages, then nothing left + * for the proxy to do. + */ + if (rap->rap_eos == 1) + return 0; + + m = fin->fin_m; + tcp = (tcphdr_t *)fin->fin_dp; + off = (char *)tcp - (char *)fin->fin_ip; + off += (TCP_OFF(tcp) << 2) + fin->fin_ipoff; + +#ifdef __sgi + dlen = fin->fin_plen - off; +#else + dlen = MSGDSIZE(m) - off; +#endif + if (dlen <= 0) + return 0; + + if (dlen > sizeof(membuf)) + dlen = sizeof(membuf); + + bzero((char *)membuf, sizeof(membuf)); + COPYDATA(m, off, dlen, (char *)membuf); + /* + * In all the startup parsing, ensure that we don't go outside + * the packet buffer boundary. + */ + /* + * Look for the start of connection "PNA" string if not seen yet. + */ + if (rap->rap_seenpna == 0) { + s = (u_char *)memstr("PNA", (char *)membuf, 3, dlen); + if (s == NULL) + return 0; + s += 3; + rap->rap_seenpna = 1; + } else + s = membuf; + + /* + * Directly after the PNA will be the version number of this + * connection. + */ + if (rap->rap_seenpna == 1 && rap->rap_seenver == 0) { + if ((s + 1) - membuf < dlen) { + rap->rap_version = (*s << 8) | *(s + 1); + s += 2; + rap->rap_seenver = 1; + } else + return 0; + } + + /* + * Now that we've been past the PNA and version number, we're into the + * startup messages block. This ends when a message with an ID of 0. + */ + while ((rap->rap_eos == 0) && ((s + 1) - membuf < dlen)) { + if (rap->rap_gotid == 0) { + id = (*s << 8) | *(s + 1); + s += 2; + rap->rap_gotid = 1; + if (id == RA_ID_END) { + rap->rap_eos = 1; + break; + } + } else if (rap->rap_gotlen == 0) { + len = (*s << 8) | *(s + 1); + s += 2; + rap->rap_gotlen = 1; + } + + if (rap->rap_gotid == 1 && rap->rap_gotlen == 1) { + if (id == RA_ID_UDP) { + rap->rap_mode &= ~RAP_M_TCP; + rap->rap_mode |= RAP_M_UDP; + rap->rap_plport = (*s << 8) | *(s + 1); + } else if (id == RA_ID_ROBUST) { + rap->rap_mode |= RAP_M_ROBUST; + rap->rap_prport = (*s << 8) | *(s + 1); + } + s += len; + rap->rap_gotlen = 0; + rap->rap_gotid = 0; + } + } + return 0; +} + + +int ippr_raudio_in(fin, aps, nat) +fr_info_t *fin; +ap_session_t *aps; +nat_t *nat; +{ + unsigned char membuf[IPF_MAXPORTLEN + 1], *s; + tcphdr_t *tcp, tcph, *tcp2 = &tcph; + raudio_t *rap = aps->aps_data; + struct in_addr swa, swb; + int off, dlen, slen; + int a1, a2, a3, a4; + u_short sp, dp; + fr_info_t fi; + tcp_seq seq; + nat_t *nat2; + u_char swp; + ip_t *ip; + mb_t *m; + + /* + * Wait until we've seen the end of the start messages and even then + * only proceed further if we're using UDP. If they want to use TCP + * then data is sent back on the same channel that is already open. + */ + if (rap->rap_sdone != 0) + return 0; + + m = fin->fin_m; + tcp = (tcphdr_t *)fin->fin_dp; + off = (char *)tcp - (char *)fin->fin_ip; + off += (TCP_OFF(tcp) << 2) + fin->fin_ipoff; + +#ifdef __sgi + dlen = fin->fin_plen - off; +#else + dlen = MSGDSIZE(m) - off; +#endif + if (dlen <= 0) + return 0; + + if (dlen > sizeof(membuf)) + dlen = sizeof(membuf); + + bzero((char *)membuf, sizeof(membuf)); + COPYDATA(m, off, dlen, (char *)membuf); + + seq = ntohl(tcp->th_seq); + /* + * Check to see if the data in this packet is of interest to us. + * We only care for the first 19 bytes coming back from the server. + */ + if (rap->rap_sseq == 0) { + s = (u_char *)memstr("PNA", (char *)membuf, 3, dlen); + if (s == NULL) + return 0; + a1 = s - membuf; + dlen -= a1; + a1 = 0; + rap->rap_sseq = seq; + a2 = MIN(dlen, sizeof(rap->rap_svr)); + } else if (seq <= rap->rap_sseq + sizeof(rap->rap_svr)) { + /* + * seq # which is the start of data and from that the offset + * into the buffer array. + */ + a1 = seq - rap->rap_sseq; + a2 = MIN(dlen, sizeof(rap->rap_svr)); + a2 -= a1; + s = membuf; + } else + return 0; + + for (a3 = a1, a4 = a2; (a4 > 0) && (a3 < 19) && (a3 >= 0); a4--,a3++) { + rap->rap_sbf |= (1 << a3); + rap->rap_svr[a3] = *s++; + } + + if ((rap->rap_sbf != 0x7ffff) || (!rap->rap_eos)) /* 19 bits */ + return 0; + rap->rap_sdone = 1; + + s = (u_char *)rap->rap_svr + 11; + if (((*s << 8) | *(s + 1)) == RA_ID_ROBUST) { + s += 2; + rap->rap_srport = (*s << 8) | *(s + 1); + } + + ip = fin->fin_ip; + swp = ip->ip_p; + swa = ip->ip_src; + swb = ip->ip_dst; + + ip->ip_p = IPPROTO_UDP; + ip->ip_src = nat->nat_inip; + ip->ip_dst = nat->nat_oip; + + bcopy((char *)fin, (char *)&fi, sizeof(fi)); + bzero((char *)tcp2, sizeof(*tcp2)); + TCP_OFF_A(tcp2, 5); + fi.fin_state = NULL; + fi.fin_nat = NULL; + fi.fin_flx |= FI_IGNORE; + fi.fin_dp = (char *)tcp2; + fi.fin_fr = &raudiofr; + fi.fin_dlen = sizeof(*tcp2); + fi.fin_plen = fi.fin_hlen + sizeof(*tcp2); + tcp2->th_win = htons(8192); + slen = ip->ip_len; + ip->ip_len = fin->fin_hlen + sizeof(*tcp); + + if (((rap->rap_mode & RAP_M_UDP_ROBUST) == RAP_M_UDP_ROBUST) && + (rap->rap_srport != 0)) { + dp = rap->rap_srport; + sp = rap->rap_prport; + tcp2->th_sport = htons(sp); + tcp2->th_dport = htons(dp); + fi.fin_data[0] = dp; + fi.fin_data[1] = sp; + fi.fin_out = 0; + nat2 = nat_new(&fi, nat->nat_ptr, NULL, + NAT_SLAVE|IPN_UDP | (sp ? 0 : SI_W_SPORT), + NAT_OUTBOUND); + if (nat2 != NULL) { + (void) nat_proto(&fi, nat2, IPN_UDP); + nat_update(&fi, nat2, nat2->nat_ptr); + + (void) fr_addstate(&fi, NULL, (sp ? 0 : SI_W_SPORT)); + if (fi.fin_state != NULL) + fr_statederef(&fi, (ipstate_t **)&fi.fin_state); + } + } + + if ((rap->rap_mode & RAP_M_UDP) == RAP_M_UDP) { + sp = rap->rap_plport; + tcp2->th_sport = htons(sp); + tcp2->th_dport = 0; /* XXX - don't specify remote port */ + fi.fin_data[0] = sp; + fi.fin_data[1] = 0; + fi.fin_out = 1; + nat2 = nat_new(&fi, nat->nat_ptr, NULL, + NAT_SLAVE|IPN_UDP|SI_W_DPORT, + NAT_OUTBOUND); + if (nat2 != NULL) { + (void) nat_proto(&fi, nat2, IPN_UDP); + nat_update(&fi, nat2, nat2->nat_ptr); + + (void) fr_addstate(&fi, NULL, SI_W_DPORT); + if (fi.fin_state != NULL) + fr_statederef(&fi, (ipstate_t **)&fi.fin_state); + } + } + + ip->ip_p = swp; + ip->ip_len = slen; + ip->ip_src = swa; + ip->ip_dst = swb; + return 0; +} diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_rcmd_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_rcmd_pxy.c new file mode 100644 index 0000000000..919c47cb90 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/netinet/ip_rcmd_pxy.c @@ -0,0 +1,239 @@ +/* + * Copyright (C) 1998-2003 by Darren Reed + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * $Id: ip_rcmd_pxy.c,v 1.41.2.4 2005/02/04 10:22:55 darrenr Exp $ + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * + * Simple RCMD transparent proxy for in-kernel use. For use with the NAT + * code. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#define IPF_RCMD_PROXY + + +int ippr_rcmd_init __P((void)); +void ippr_rcmd_fini __P((void)); +int ippr_rcmd_new __P((fr_info_t *, ap_session_t *, nat_t *)); +int ippr_rcmd_out __P((fr_info_t *, ap_session_t *, nat_t *)); +int ippr_rcmd_in __P((fr_info_t *, ap_session_t *, nat_t *)); +u_short ipf_rcmd_atoi __P((char *)); +int ippr_rcmd_portmsg __P((fr_info_t *, ap_session_t *, nat_t *)); + +static frentry_t rcmdfr; + +int rcmd_proxy_init = 0; + + +/* + * RCMD application proxy initialization. + */ +int ippr_rcmd_init() +{ + bzero((char *)&rcmdfr, sizeof(rcmdfr)); + rcmdfr.fr_ref = 1; + rcmdfr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; + MUTEX_INIT(&rcmdfr.fr_lock, "RCMD proxy rule lock"); + rcmd_proxy_init = 1; + + return 0; +} + + +void ippr_rcmd_fini() +{ + if (rcmd_proxy_init == 1) { + MUTEX_DESTROY(&rcmdfr.fr_lock); + rcmd_proxy_init = 0; + } +} + + +/* + * Setup for a new RCMD proxy. + */ +int ippr_rcmd_new(fin, aps, nat) +fr_info_t *fin; +ap_session_t *aps; +nat_t *nat; +{ + tcphdr_t *tcp = (tcphdr_t *)fin->fin_dp; + + fin = fin; /* LINT */ + nat = nat; /* LINT */ + + aps->aps_psiz = sizeof(u_32_t); + KMALLOCS(aps->aps_data, u_32_t *, sizeof(u_32_t)); + if (aps->aps_data == NULL) { +#ifdef IP_RCMD_PROXY_DEBUG + printf("ippr_rcmd_new:KMALLOCS(%d) failed\n", sizeof(u_32_t)); +#endif + return -1; + } + *(u_32_t *)aps->aps_data = 0; + aps->aps_sport = tcp->th_sport; + aps->aps_dport = tcp->th_dport; + return 0; +} + + +/* + * ipf_rcmd_atoi - implement a simple version of atoi + */ +u_short ipf_rcmd_atoi(ptr) +char *ptr; +{ + register char *s = ptr, c; + register u_short i = 0; + + while (((c = *s++) != '\0') && ISDIGIT(c)) { + i *= 10; + i += c - '0'; + } + return i; +} + + +int ippr_rcmd_portmsg(fin, aps, nat) +fr_info_t *fin; +ap_session_t *aps; +nat_t *nat; +{ + tcphdr_t *tcp, tcph, *tcp2 = &tcph; + struct in_addr swip, swip2; + int off, dlen, nflags; + char portbuf[8], *s; + fr_info_t fi; + u_short sp; + nat_t *nat2; + ip_t *ip; + mb_t *m; + + tcp = (tcphdr_t *)fin->fin_dp; + + if (tcp->th_flags & TH_SYN) { + *(u_32_t *)aps->aps_data = htonl(ntohl(tcp->th_seq) + 1); + return 0; + } + + if ((*(u_32_t *)aps->aps_data != 0) && + (tcp->th_seq != *(u_32_t *)aps->aps_data)) + return 0; + + m = fin->fin_m; + ip = fin->fin_ip; + off = (char *)tcp - (char *)ip + (TCP_OFF(tcp) << 2) + fin->fin_ipoff; + +#ifdef __sgi + dlen = fin->fin_plen - off; +#else + dlen = MSGDSIZE(m) - off; +#endif + if (dlen <= 0) + return 0; + + bzero(portbuf, sizeof(portbuf)); + COPYDATA(m, off, MIN(sizeof(portbuf), dlen), portbuf); + + portbuf[sizeof(portbuf) - 1] = '\0'; + s = portbuf; + sp = ipf_rcmd_atoi(s); + if (sp == 0) { +#ifdef IP_RCMD_PROXY_DEBUG + printf("ippr_rcmd_portmsg:sp == 0 dlen %d [%s]\n", + dlen, portbuf); +#endif + return 0; + } + + /* + * Add skeleton NAT entry for connection which will come back the + * other way. + */ + bcopy((char *)fin, (char *)&fi, sizeof(fi)); + fi.fin_flx |= FI_IGNORE; + fi.fin_data[0] = sp; + fi.fin_data[1] = 0; + if (nat->nat_dir == NAT_OUTBOUND) + nat2 = nat_outlookup(&fi, NAT_SEARCH|IPN_TCP, nat->nat_p, + nat->nat_inip, nat->nat_oip); + else + nat2 = nat_inlookup(&fi, NAT_SEARCH|IPN_TCP, nat->nat_p, + nat->nat_inip, nat->nat_oip); + if (nat2 == NULL) { + int slen; + + slen = ip->ip_len; + ip->ip_len = fin->fin_hlen + sizeof(*tcp); + bzero((char *)tcp2, sizeof(*tcp2)); + tcp2->th_win = htons(8192); + tcp2->th_sport = htons(sp); + tcp2->th_dport = 0; /* XXX - don't specify remote port */ + TCP_OFF_A(tcp2, 5); + tcp2->th_flags = TH_SYN; + fi.fin_dp = (char *)tcp2; + fi.fin_fr = &rcmdfr; + fi.fin_dlen = sizeof(*tcp2); + fi.fin_plen = fi.fin_hlen + sizeof(*tcp2); + fi.fin_flx &= FI_LOWTTL|FI_FRAG|FI_TCPUDP|FI_OPTIONS|FI_IGNORE; + nflags = NAT_SLAVE|IPN_TCP|SI_W_DPORT; + + swip = ip->ip_src; + swip2 = ip->ip_dst; + + if (nat->nat_dir == NAT_OUTBOUND) { + fi.fin_fi.fi_saddr = nat->nat_inip.s_addr; + ip->ip_src = nat->nat_inip; + } else { + fi.fin_fi.fi_saddr = nat->nat_oip.s_addr; + ip->ip_src = nat->nat_oip; + nflags |= NAT_NOTRULEPORT; + } + + nat2 = nat_new(&fi, nat->nat_ptr, NULL, nflags, nat->nat_dir); + + if (nat2 != NULL) { + (void) nat_proto(&fi, nat2, IPN_TCP); + nat_update(&fi, nat2, nat2->nat_ptr); + fi.fin_ifp = NULL; + if (nat->nat_dir == NAT_INBOUND) { + fi.fin_fi.fi_daddr = nat->nat_inip.s_addr; + ip->ip_dst = nat->nat_inip; + } + (void) fr_addstate(&fi, &nat2->nat_state, SI_W_DPORT); + if (fi.fin_state != NULL) + fr_statederef(&fi, (ipstate_t **)&fi.fin_state); + } + ip->ip_len = slen; + ip->ip_src = swip; + ip->ip_dst = swip2; + } + return 0; +} + + +int ippr_rcmd_out(fin, aps, nat) +fr_info_t *fin; +ap_session_t *aps; +nat_t *nat; +{ + if (nat->nat_dir == NAT_OUTBOUND) + return ippr_rcmd_portmsg(fin, aps, nat); + return 0; +} + + +int ippr_rcmd_in(fin, aps, nat) +fr_info_t *fin; +ap_session_t *aps; +nat_t *nat; +{ + if (nat->nat_dir == NAT_INBOUND) + return ippr_rcmd_portmsg(fin, aps, nat); + return 0; +} diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_rpcb_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_rpcb_pxy.c new file mode 100644 index 0000000000..f67c01a232 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/netinet/ip_rpcb_pxy.c @@ -0,0 +1,1452 @@ +/* + * Copyright (C) 2002-2003 by Ryan Beasley <ryanb@goddamnbastard.org> + * + * See the IPFILTER.LICENCE file for details on licencing. + */ +/* + * Overview: + * This is an in-kernel application proxy for Sun's RPCBIND (nee portmap) + * protocol as defined in RFC1833. It is far from complete, mostly + * lacking in less-likely corner cases, but it's definitely functional. + * + * Invocation: + * rdr <int> <e_ip>/32 port <e_p> -> <i_ip> port <i_p> udp proxy rpcbu + * + * If the host running IP Filter is the same as the RPC server, it's + * perfectly legal for both the internal and external addresses and ports + * to match. + * + * When triggered by appropriate IP NAT rules, this proxy works by + * examining data contained in received packets. Requests and replies are + * modified, NAT and state table entries created, etc., as necessary. + */ +/* + * TODO / NOTES + * + * o Must implement locking to protect proxy session data. + * o Fragmentation isn't supported. + * o Only supports UDP. + * o Doesn't support multiple RPC records in a single request. + * o Errors should be more fine-grained. (e.g., malloc failure vs. + * illegal RPCB request / reply) + * o Even with the limit on the total amount of recorded transactions, + * should there be a timeout on transaction removal? + * o There is a potential collision between cloning, wildcard NAT and + * state entries. There should be an appr_getport routine for + * to avoid this. + * o The enclosed hack of STREAMS support is pretty sick and most likely + * broken. + * + * $Id: ip_rpcb_pxy.c,v 2.25.2.3 2005/02/04 10:22:56 darrenr Exp $ + */ + +#define IPF_RPCB_PROXY + +/* + * Function prototypes + */ +int ippr_rpcb_init __P((void)); +void ippr_rpcb_fini __P((void)); +int ippr_rpcb_new __P((fr_info_t *, ap_session_t *, nat_t *)); +void ippr_rpcb_del __P((ap_session_t *)); +int ippr_rpcb_in __P((fr_info_t *, ap_session_t *, nat_t *)); +int ippr_rpcb_out __P((fr_info_t *, ap_session_t *, nat_t *)); + +static void ippr_rpcb_flush __P((rpcb_session_t *)); +static int ippr_rpcb_decodereq __P((fr_info_t *, nat_t *, + rpcb_session_t *, rpc_msg_t *)); +static int ippr_rpcb_skipauth __P((rpc_msg_t *, xdr_auth_t *, u_32_t **)); +static int ippr_rpcb_insert __P((rpcb_session_t *, rpcb_xact_t *)); +static int ippr_rpcb_xdrrpcb __P((rpc_msg_t *, u_32_t *, rpcb_args_t *)); +static int ippr_rpcb_getuaddr __P((rpc_msg_t *, xdr_uaddr_t *, + u_32_t **)); +static u_int ippr_rpcb_atoi __P((char *)); +static int ippr_rpcb_modreq __P((fr_info_t *, nat_t *, rpc_msg_t *, + mb_t *, u_int)); +static int ippr_rpcb_decoderep __P((fr_info_t *, nat_t *, + rpcb_session_t *, rpc_msg_t *, rpcb_xact_t **)); +static rpcb_xact_t * ippr_rpcb_lookup __P((rpcb_session_t *, u_32_t)); +static void ippr_rpcb_deref __P((rpcb_session_t *, rpcb_xact_t *)); +static int ippr_rpcb_getproto __P((rpc_msg_t *, xdr_proto_t *, + u_32_t **)); +static int ippr_rpcb_getnat __P((fr_info_t *, nat_t *, u_int, u_int)); +static int ippr_rpcb_modv3 __P((fr_info_t *, nat_t *, rpc_msg_t *, + mb_t *, u_int)); +static int ippr_rpcb_modv4 __P((fr_info_t *, nat_t *, rpc_msg_t *, + mb_t *, u_int)); +static void ippr_rpcb_fixlen __P((fr_info_t *, int)); + +/* + * Global variables + */ +static frentry_t rpcbfr; /* Skeleton rule for reference by entities + this proxy creates. */ +static int rpcbcnt; /* Upper bound of allocated RPCB sessions. */ + /* XXX rpcbcnt still requires locking. */ + +int rpcb_proxy_init = 0; + + +/* + * Since rpc_msg contains only pointers, one should use this macro as a + * handy way to get to the goods. (In case you're wondering about the name, + * this started as BYTEREF -> BREF -> B.) + */ +#define B(r) (u_32_t)ntohl(*(r)) + +/* + * Public subroutines + */ + +/* -------------------------------------------------------------------- */ +/* Function: ippr_rpcb_init */ +/* Returns: int - 0 == success */ +/* Parameters: (void) */ +/* */ +/* Initialize the filter rule entry and session limiter. */ +/* -------------------------------------------------------------------- */ +int +ippr_rpcb_init() +{ + rpcbcnt = 0; + + bzero((char *)&rpcbfr, sizeof(rpcbfr)); + rpcbfr.fr_ref = 1; + rpcbfr.fr_flags = FR_PASS|FR_QUICK|FR_KEEPSTATE; + MUTEX_INIT(&rpcbfr.fr_lock, "ipf Sun RPCB proxy rule lock"); + rpcb_proxy_init = 1; + + return(0); +} + +/* -------------------------------------------------------------------- */ +/* Function: ippr_rpcb_fini */ +/* Returns: void */ +/* Parameters: (void) */ +/* */ +/* Destroy rpcbfr's mutex to avoid a lock leak. */ +/* -------------------------------------------------------------------- */ +void +ippr_rpcb_fini() +{ + if (rpcb_proxy_init == 1) { + MUTEX_DESTROY(&rpcbfr.fr_lock); + rpcb_proxy_init = 0; + } +} + +/* -------------------------------------------------------------------- */ +/* Function: ippr_rpcb_new */ +/* Returns: int - -1 == failure, 0 == success */ +/* Parameters: fin(I) - pointer to packet information */ +/* aps(I) - pointer to proxy session structure */ +/* nat(I) - pointer to NAT session structure */ +/* */ +/* Allocate resources for per-session proxy structures. */ +/* -------------------------------------------------------------------- */ +int +ippr_rpcb_new(fin, aps, nat) + fr_info_t *fin; + ap_session_t *aps; + nat_t *nat; +{ + rpcb_session_t *rs; + + fin = fin; /* LINT */ + nat = nat; /* LINT */ + + KMALLOC(rs, rpcb_session_t *); + if (rs == NULL) + return(-1); + + bzero((char *)rs, sizeof(*rs)); + MUTEX_INIT(&rs->rs_rxlock, "ipf Sun RPCB proxy session lock"); + + aps->aps_data = rs; + + return(0); +} + +/* -------------------------------------------------------------------- */ +/* Function: ippr_rpcb_del */ +/* Returns: void */ +/* Parameters: aps(I) - pointer to proxy session structure */ +/* */ +/* Free up a session's list of RPCB requests. */ +/* -------------------------------------------------------------------- */ +void +ippr_rpcb_del(aps) + ap_session_t *aps; +{ + rpcb_session_t *rs; + rs = (rpcb_session_t *)aps->aps_data; + + MUTEX_ENTER(&rs->rs_rxlock); + ippr_rpcb_flush(rs); + MUTEX_EXIT(&rs->rs_rxlock); + MUTEX_DESTROY(&rs->rs_rxlock); +} + +/* -------------------------------------------------------------------- */ +/* Function: ippr_rpcb_in */ +/* Returns: int - APR_ERR(1) == drop the packet, */ +/* APR_ERR(2) == kill the proxy session, */ +/* else change in packet length (in bytes) */ +/* Parameters: fin(I) - pointer to packet information */ +/* ip(I) - pointer to packet header */ +/* aps(I) - pointer to proxy session structure */ +/* nat(I) - pointer to NAT session structure */ +/* */ +/* Given a presumed RPCB request, perform some minor tests and pass off */ +/* for decoding. Also pass packet off for a rewrite if necessary. */ +/* -------------------------------------------------------------------- */ +int +ippr_rpcb_in(fin, aps, nat) + fr_info_t *fin; + ap_session_t *aps; + nat_t *nat; +{ + rpc_msg_t rpcmsg, *rm; + rpcb_session_t *rs; + u_int off, dlen; + mb_t *m; + int rv; + + /* Disallow fragmented or illegally short packets. */ + if ((fin->fin_flx & (FI_FRAG|FI_SHORT)) != 0) + return(APR_ERR(1)); + + /* Perform basic variable initialization. */ + rs = (rpcb_session_t *)aps->aps_data; + + m = fin->fin_m; + off = (char *)fin->fin_dp - (char *)fin->fin_ip; + off += sizeof(udphdr_t) + fin->fin_ipoff; + dlen = fin->fin_dlen - sizeof(udphdr_t); + + /* Disallow packets outside legal range for supported requests. */ + if ((dlen < RPCB_REQMIN) || (dlen > RPCB_REQMAX)) + return(APR_ERR(1)); + + /* Copy packet over to convenience buffer. */ + rm = &rpcmsg; + bzero((char *)rm, sizeof(*rm)); + COPYDATA(m, off, dlen, (caddr_t)&rm->rm_msgbuf); + rm->rm_buflen = dlen; + + /* Send off to decode request. */ + rv = ippr_rpcb_decodereq(fin, nat, rs, rm); + + switch(rv) + { + case -1: + return(APR_ERR(1)); + case 0: + break; + case 1: + rv = ippr_rpcb_modreq(fin, nat, rm, m, off); + break; + default: + /*CONSTANTCONDITION*/ + IPF_PANIC(1, ("illegal rv %d (ippr_rpcb_req)", rv)); + } + + return(rv); +} + +/* -------------------------------------------------------------------- */ +/* Function: ippr_rpcb_out */ +/* Returns: int - APR_ERR(1) == drop the packet, */ +/* APR_ERR(2) == kill the proxy session, */ +/* else change in packet length (in bytes) */ +/* Parameters: fin(I) - pointer to packet information */ +/* ip(I) - pointer to packet header */ +/* aps(I) - pointer to proxy session structure */ +/* nat(I) - pointer to NAT session structure */ +/* */ +/* Given a presumed RPCB reply, perform some minor tests and pass off */ +/* for decoding. If the message indicates a successful request with */ +/* valid addressing information, create NAT and state structures to */ +/* allow direct communication between RPC client and server. */ +/* -------------------------------------------------------------------- */ +int +ippr_rpcb_out(fin, aps, nat) + fr_info_t *fin; + ap_session_t *aps; + nat_t *nat; +{ + rpc_msg_t rpcmsg, *rm; + rpcb_session_t *rs; + rpcb_xact_t *rx; + u_int off, dlen; + int rv, diff; + mb_t *m; + + /* Disallow fragmented or illegally short packets. */ + if ((fin->fin_flx & (FI_FRAG|FI_SHORT)) != 0) + return(APR_ERR(1)); + + /* Perform basic variable initialization. */ + rs = (rpcb_session_t *)aps->aps_data; + + m = fin->fin_m; + off = (char *)fin->fin_dp - (char *)fin->fin_ip; + off += sizeof(udphdr_t) + fin->fin_ipoff; + dlen = fin->fin_dlen - sizeof(udphdr_t); + diff = 0; + + /* Disallow packets outside legal range for supported requests. */ + if ((dlen < RPCB_REPMIN) || (dlen > RPCB_REPMAX)) + return(APR_ERR(1)); + + /* Copy packet over to convenience buffer. */ + rm = &rpcmsg; + bzero((char *)rm, sizeof(*rm)); + COPYDATA(m, off, dlen, (caddr_t)&rm->rm_msgbuf); + rm->rm_buflen = dlen; + + /* Send off to decode reply. */ + rv = ippr_rpcb_decoderep(fin, nat, rs, rm, &rx); + + switch(rv) + { + case -1: /* Bad packet */ + if (rx != NULL) { + MUTEX_ENTER(&rs->rs_rxlock); + ippr_rpcb_deref(rs, rx); + MUTEX_EXIT(&rs->rs_rxlock); + } + return(APR_ERR(1)); + case 0: /* Negative reply / request rejected */ + break; + case 1: /* Positive reply */ + /* + * With the IP address embedded in a GETADDR(LIST) reply, + * we'll need to rewrite the packet in the very possible + * event that the internal & external addresses aren't the + * same. (i.e., this box is either a router or rpcbind + * only listens on loopback.) + */ + if (nat->nat_inip.s_addr != nat->nat_outip.s_addr) { + if (rx->rx_type == RPCB_RES_STRING) + diff = ippr_rpcb_modv3(fin, nat, rm, m, off); + else if (rx->rx_type == RPCB_RES_LIST) + diff = ippr_rpcb_modv4(fin, nat, rm, m, off); + } + break; + default: + /*CONSTANTCONDITION*/ + IPF_PANIC(1, ("illegal rv %d (ippr_rpcb_decoderep)", rv)); + } + + if (rx != NULL) { + MUTEX_ENTER(&rs->rs_rxlock); + /* XXX Gross hack - I'm overloading the reference + * counter to deal with both threads and retransmitted + * requests. One deref signals that this thread is + * finished with rx, and the other signals that we've + * processed its reply. + */ + ippr_rpcb_deref(rs, rx); + ippr_rpcb_deref(rs, rx); + MUTEX_EXIT(&rs->rs_rxlock); + } + + return(diff); +} + +/* + * Private support subroutines + */ + +/* -------------------------------------------------------------------- */ +/* Function: ippr_rpcb_flush */ +/* Returns: void */ +/* Parameters: rs(I) - pointer to RPCB session structure */ +/* */ +/* Simply flushes the list of outstanding transactions, if any. */ +/* -------------------------------------------------------------------- */ +static void +ippr_rpcb_flush(rs) + rpcb_session_t *rs; +{ + rpcb_xact_t *r1, *r2; + + r1 = rs->rs_rxlist; + if (r1 == NULL) + return; + + while (r1 != NULL) { + r2 = r1; + r1 = r1->rx_next; + KFREE(r2); + } +} + +/* -------------------------------------------------------------------- */ +/* Function: ippr_rpcb_decodereq */ +/* Returns: int - -1 == bad request or critical failure, */ +/* 0 == request successfully decoded, */ +/* 1 == request successfully decoded; requires */ +/* address rewrite/modification */ +/* Parameters: fin(I) - pointer to packet information */ +/* nat(I) - pointer to NAT session structure */ +/* rs(I) - pointer to RPCB session structure */ +/* rm(I) - pointer to RPC message structure */ +/* */ +/* Take a presumed RPCB request, decode it, and store the results in */ +/* the transaction list. If the internal target address needs to be */ +/* modified, store its location in ptr. */ +/* WARNING: It's the responsibility of the caller to make sure there */ +/* is enough room in rs_buf for the basic RPC message "preamble". */ +/* -------------------------------------------------------------------- */ +static int +ippr_rpcb_decodereq(fin, nat, rs, rm) + fr_info_t *fin; + nat_t *nat; + rpcb_session_t *rs; + rpc_msg_t *rm; +{ + rpcb_args_t *ra; + u_32_t xdr, *p; + rpc_call_t *rc; + rpcb_xact_t rx; + int mod; + + p = (u_32_t *)rm->rm_msgbuf; + mod = 0; + + bzero((char *)&rx, sizeof(rx)); + rc = &rm->rm_call; + + rm->rm_xid = p; + rx.rx_xid = B(p++); /* Record this message's XID. */ + + /* Parse out and test the RPC header. */ + if ((B(p++) != RPCB_CALL) || + (B(p++) != RPCB_MSG_VERSION) || + (B(p++) != RPCB_PROG)) + return(-1); + + /* Record the RPCB version and procedure. */ + rc->rc_vers = p++; + rc->rc_proc = p++; + + /* Bypass RPC authentication stuff. */ + if (ippr_rpcb_skipauth(rm, &rc->rc_authcred, &p) != 0) + return(-1); + if (ippr_rpcb_skipauth(rm, &rc->rc_authverf, &p) != 0) + return(-1); + + /* Compare RPCB version and procedure numbers. */ + switch(B(rc->rc_vers)) + { + case 2: + /* This proxy only supports PMAP_GETPORT. */ + if (B(rc->rc_proc) != RPCB_GETPORT) + return(-1); + + /* Portmap requests contain four 4 byte parameters. */ + if (RPCB_BUF_EQ(rm, p, 16) == 0) + return(-1); + + p += 2; /* Skip requested program and version numbers. */ + + /* Sanity check the requested protocol. */ + xdr = B(p); + if (!(xdr == IPPROTO_UDP || xdr == IPPROTO_TCP)) + return(-1); + + rx.rx_type = RPCB_RES_PMAP; + rx.rx_proto = xdr; + break; + case 3: + case 4: + /* GETADDRLIST is exclusive to v4; GETADDR for v3 & v4 */ + switch(B(rc->rc_proc)) + { + case RPCB_GETADDR: + rx.rx_type = RPCB_RES_STRING; + rx.rx_proto = (u_int)fin->fin_p; + break; + case RPCB_GETADDRLIST: + if (B(rc->rc_vers) != 4) + return(-1); + rx.rx_type = RPCB_RES_LIST; + break; + default: + return(-1); + } + + ra = &rc->rc_rpcbargs; + + /* Decode the 'struct rpcb' request. */ + if (ippr_rpcb_xdrrpcb(rm, p, ra) != 0) + return(-1); + + /* Are the target address & port valid? */ + if ((ra->ra_maddr.xu_ip != nat->nat_outip.s_addr) || + (ra->ra_maddr.xu_port != nat->nat_outport)) + return(-1); + + /* Do we need to rewrite this packet? */ + if ((nat->nat_outip.s_addr != nat->nat_inip.s_addr) || + (nat->nat_outport != nat->nat_inport)) + mod = 1; + break; + default: + return(-1); + } + + MUTEX_ENTER(&rs->rs_rxlock); + if (ippr_rpcb_insert(rs, &rx) != 0) { + MUTEX_EXIT(&rs->rs_rxlock); + return(-1); + } + MUTEX_EXIT(&rs->rs_rxlock); + + return(mod); +} + +/* -------------------------------------------------------------------- */ +/* Function: ippr_rpcb_skipauth */ +/* Returns: int -- -1 == illegal auth parameters (lengths) */ +/* 0 == valid parameters, pointer advanced */ +/* Parameters: rm(I) - pointer to RPC message structure */ +/* auth(I) - pointer to RPC auth structure */ +/* buf(IO) - pointer to location within convenience buffer */ +/* */ +/* Record auth data length & location of auth data, then advance past */ +/* it. */ +/* -------------------------------------------------------------------- */ +static int +ippr_rpcb_skipauth(rm, auth, buf) + rpc_msg_t *rm; + xdr_auth_t *auth; + u_32_t **buf; +{ + u_32_t *p, xdr; + + p = *buf; + + /* Make sure we have enough space for expected fixed auth parms. */ + if (RPCB_BUF_GEQ(rm, p, 8) == 0) + return(-1); + + p++; /* We don't care about auth_flavor. */ + + auth->xa_string.xs_len = p; + xdr = B(p++); /* Length of auth_data */ + + /* Test for absurdity / illegality of auth_data length. */ + if ((XDRALIGN(xdr) < xdr) || (RPCB_BUF_GEQ(rm, p, XDRALIGN(xdr)) == 0)) + return(-1); + + auth->xa_string.xs_str = (char *)p; + + p += XDRALIGN(xdr); /* Advance our location. */ + + *buf = (u_32_t *)p; + + return(0); +} + +/* -------------------------------------------------------------------- */ +/* Function: ippr_rpcb_insert */ +/* Returns: int -- -1 == list insertion failed, */ +/* 0 == item successfully added */ +/* Parameters: rs(I) - pointer to RPCB session structure */ +/* rx(I) - pointer to RPCB transaction structure */ +/* -------------------------------------------------------------------- */ +static int +ippr_rpcb_insert(rs, rx) + rpcb_session_t *rs; + rpcb_xact_t *rx; +{ + rpcb_xact_t *rxp; + + rxp = ippr_rpcb_lookup(rs, rx->rx_xid); + if (rxp != NULL) { + ++rxp->rx_ref; + return(0); + } + + if (rpcbcnt == RPCB_MAXREQS) + return(-1); + + KMALLOC(rxp, rpcb_xact_t *); + if (rxp == NULL) + return(-1); + + bcopy((char *)rx, (char *)rxp, sizeof(*rx)); + + if (rs->rs_rxlist != NULL) + rs->rs_rxlist->rx_pnext = &rxp->rx_next; + + rxp->rx_pnext = &rs->rs_rxlist; + rxp->rx_next = rs->rs_rxlist; + rs->rs_rxlist = rxp; + + rxp->rx_ref = 1; + + ++rpcbcnt; + + return(0); +} + +/* -------------------------------------------------------------------- */ +/* Function: ippr_rpcb_xdrrpcb */ +/* Returns: int -- -1 == failure to properly decode the request */ +/* 0 == rpcb successfully decoded */ +/* Parameters: rs(I) - pointer to RPCB session structure */ +/* p(I) - pointer to location within session buffer */ +/* rpcb(O) - pointer to rpcb (xdr type) structure */ +/* */ +/* Decode a XDR encoded rpcb structure and record its contents in rpcb */ +/* within only the context of TCP/UDP over IP networks. */ +/* -------------------------------------------------------------------- */ +static int +ippr_rpcb_xdrrpcb(rm, p, ra) + rpc_msg_t *rm; + u_32_t *p; + rpcb_args_t *ra; +{ + if (!RPCB_BUF_GEQ(rm, p, 20)) + return(-1); + + /* Bypass target program & version. */ + p += 2; + + /* Decode r_netid. Must be "tcp" or "udp". */ + if (ippr_rpcb_getproto(rm, &ra->ra_netid, &p) != 0) + return(-1); + + /* Decode r_maddr. */ + if (ippr_rpcb_getuaddr(rm, &ra->ra_maddr, &p) != 0) + return(-1); + + /* Advance to r_owner and make sure it's empty. */ + if (!RPCB_BUF_EQ(rm, p, 4) || (B(p) != 0)) + return(-1); + + return(0); +} + +/* -------------------------------------------------------------------- */ +/* Function: ippr_rpcb_getuaddr */ +/* Returns: int -- -1 == illegal string, */ +/* 0 == string parsed; contents recorded */ +/* Parameters: rm(I) - pointer to RPC message structure */ +/* xu(I) - pointer to universal address structure */ +/* p(IO) - pointer to location within message buffer */ +/* */ +/* Decode the IP address / port at p and record them in xu. */ +/* -------------------------------------------------------------------- */ +static int +ippr_rpcb_getuaddr(rm, xu, p) + rpc_msg_t *rm; + xdr_uaddr_t *xu; + u_32_t **p; +{ + char *c, *i, *b, *pp; + u_int d, dd, l, t; + char uastr[24]; + + /* Test for string length. */ + if (!RPCB_BUF_GEQ(rm, *p, 4)) + return(-1); + + xu->xu_xslen = (*p)++; + xu->xu_xsstr = (char *)*p; + + /* Length check */ + l = B(xu->xu_xslen); + if (l < 11 || l > 23 || !RPCB_BUF_GEQ(rm, *p, XDRALIGN(l))) + return(-1); + + /* Advance p */ + *(char **)p += XDRALIGN(l); + + /* Copy string to local buffer & terminate C style */ + bcopy(xu->xu_xsstr, uastr, l); + uastr[l] = '\0'; + + i = (char *)&xu->xu_ip; + pp = (char *)&xu->xu_port; + + /* + * Expected format: a.b.c.d.e.f where [a-d] correspond to bytes of + * an IP address and [ef] are the bytes of a L4 port. + */ + if (!(ISDIGIT(uastr[0]) && ISDIGIT(uastr[l-1]))) + return(-1); + b = uastr; + for (c = &uastr[1], d = 0, dd = 0; c < &uastr[l-1]; c++) { + if (ISDIGIT(*c)) { + dd = 0; + continue; + } + if (*c == '.') { + if (dd != 0) + return(-1); + + /* Check for ASCII byte. */ + *c = '\0'; + t = ippr_rpcb_atoi(b); + if (t > 255) + return(-1); + + /* Aim b at beginning of the next byte. */ + b = c + 1; + + /* Switch off IP addr vs port parsing. */ + if (d < 4) + i[d++] = t & 0xff; + else + pp[d++ - 4] = t & 0xff; + + dd = 1; + continue; + } + return(-1); + } + if (d != 5) /* String must contain exactly 5 periods. */ + return(-1); + + /* Handle the last byte (port low byte) */ + t = ippr_rpcb_atoi(b); + if (t > 255) + return(-1); + pp[d - 4] = t & 0xff; + + return(0); +} + +/* -------------------------------------------------------------------- */ +/* Function: ippr_rpcb_atoi (XXX should be generic for all proxies) */ +/* Returns: int -- integer representation of supplied string */ +/* Parameters: ptr(I) - input string */ +/* */ +/* Simple version of atoi(3) ripped from ip_rcmd_pxy.c. */ +/* -------------------------------------------------------------------- */ +static u_int +ippr_rpcb_atoi(ptr) + char *ptr; +{ + register char *s = ptr, c; + register u_int i = 0; + + while (((c = *s++) != '\0') && ISDIGIT(c)) { + i *= 10; + i += c - '0'; + } + return i; +} + +/* -------------------------------------------------------------------- */ +/* Function: ippr_rpcb_modreq */ +/* Returns: int -- change in datagram length */ +/* APR_ERR(2) - critical failure */ +/* Parameters: fin(I) - pointer to packet information */ +/* nat(I) - pointer to NAT session */ +/* rm(I) - pointer to RPC message structure */ +/* m(I) - pointer to mbuf chain */ +/* off(I) - current offset within mbuf chain */ +/* */ +/* When external and internal addresses differ, we rewrite the former */ +/* with the latter. (This is exclusive to protocol versions 3 & 4). */ +/* -------------------------------------------------------------------- */ +static int +ippr_rpcb_modreq(fin, nat, rm, m, off) + fr_info_t *fin; + nat_t *nat; + rpc_msg_t *rm; + mb_t *m; + u_int off; +{ + u_int len, xlen, pos, bogo; + rpcb_args_t *ra; + char uaddr[24]; + udphdr_t *udp; + char *i, *p; + int diff; + + ra = &rm->rm_call.rc_rpcbargs; + i = (char *)&nat->nat_inip.s_addr; + p = (char *)&nat->nat_inport; + + /* Form new string. */ + bzero(uaddr, sizeof(uaddr)); /* Just in case we need padding. */ +#if defined(SNPRINTF) && defined(_KERNEL) + (void) SNPRINTF(uaddr, sizeof(uaddr), +#else + (void) sprintf(uaddr, +#endif + "%u.%u.%u.%u.%u.%u", i[0] & 0xff, i[1] & 0xff, + i[2] & 0xff, i[3] & 0xff, p[0] & 0xff, p[1] & 0xff); + len = strlen(uaddr); + xlen = XDRALIGN(len); + + /* Determine mbuf offset to start writing to. */ + pos = (char *)ra->ra_maddr.xu_xslen - rm->rm_msgbuf; + off += pos; + + /* Write new string length. */ + bogo = htonl(len); + COPYBACK(m, off, 4, (caddr_t)&bogo); + off += 4; + + /* Write new string. */ + COPYBACK(m, off, xlen, uaddr); + off += xlen; + + /* Write in zero r_owner. */ + bogo = 0; + COPYBACK(m, off, 4, (caddr_t)&bogo); + + /* Determine difference in data lengths. */ + diff = xlen - XDRALIGN(B(ra->ra_maddr.xu_xslen)); + + /* + * If our new string has a different length, make necessary + * adjustments. + */ + if (diff != 0) { + udp = fin->fin_dp; + udp->uh_ulen = htons(ntohs(udp->uh_ulen) + diff); + fin->fin_ip->ip_len += diff; + fin->fin_dlen += diff; + fin->fin_plen += diff; + /* XXX Storage lengths. */ + } + + return(diff); +} + +/* -------------------------------------------------------------------- */ +/* Function: ippr_rpcb_decoderep */ +/* Returns: int - -1 == bad request or critical failure, */ +/* 0 == valid, negative reply */ +/* 1 == vaddlid, positive reply; needs no changes */ +/* Parameters: fin(I) - pointer to packet information */ +/* nat(I) - pointer to NAT session structure */ +/* rs(I) - pointer to RPCB session structure */ +/* rm(I) - pointer to RPC message structure */ +/* rxp(O) - pointer to RPCB transaction structure */ +/* */ +/* Take a presumed RPCB reply, extract the XID, search for the original */ +/* request information, and determine whether the request was accepted */ +/* or rejected. With a valid accepted reply, go ahead and create NAT */ +/* and state entries, and finish up by rewriting the packet as */ +/* required. */ +/* */ +/* WARNING: It's the responsibility of the caller to make sure there */ +/* is enough room in rs_buf for the basic RPC message "preamble". */ +/* -------------------------------------------------------------------- */ +static int +ippr_rpcb_decoderep(fin, nat, rs, rm, rxp) + fr_info_t *fin; + nat_t *nat; + rpcb_session_t *rs; + rpc_msg_t *rm; + rpcb_xact_t **rxp; +{ + rpcb_listp_t *rl; + rpcb_entry_t *re; + rpcb_xact_t *rx; + u_32_t xdr, *p; + rpc_resp_t *rr; + int rv, cnt; + + p = (u_32_t *)rm->rm_msgbuf; + + bzero((char *)&rx, sizeof(rx)); + rr = &rm->rm_resp; + + rm->rm_xid = p; + xdr = B(p++); /* Record this message's XID. */ + + /* Lookup XID */ + MUTEX_ENTER(&rs->rs_rxlock); + if ((rx = ippr_rpcb_lookup(rs, xdr)) == NULL) { + MUTEX_EXIT(&rs->rs_rxlock); + return(-1); + } + ++rx->rx_ref; /* per thread reference */ + MUTEX_EXIT(&rs->rs_rxlock); + + *rxp = rx; + + /* Test call vs reply */ + if (B(p++) != RPCB_REPLY) + return(-1); + + /* Test reply_stat */ + switch(B(p++)) + { + case RPCB_MSG_DENIED: + return(0); + case RPCB_MSG_ACCEPTED: + break; + default: + return(-1); + } + + /* Bypass RPC authentication stuff. */ + if (ippr_rpcb_skipauth(rm, &rr->rr_authverf, &p) != 0) + return(-1); + + /* Test accept status */ + if (!RPCB_BUF_GEQ(rm, p, 4)) + return(-1); + if (B(p++) != 0) + return(0); + + /* Parse out the expected reply */ + switch(rx->rx_type) + { + case RPCB_RES_PMAP: + /* There must be only one 4 byte argument. */ + if (!RPCB_BUF_EQ(rm, p, 4)) + return(-1); + + rr->rr_v2 = p; + xdr = B(rr->rr_v2); + + /* Reply w/ a 0 port indicates service isn't registered */ + if (xdr == 0) + return(0); + + /* Is the value sane? */ + if (xdr > 65535) + return(-1); + + /* Create NAT & state table entries. */ + if (ippr_rpcb_getnat(fin, nat, rx->rx_proto, (u_int)xdr) != 0) + return(-1); + break; + case RPCB_RES_STRING: + /* Expecting a XDR string; need 4 bytes for length */ + if (!RPCB_BUF_GEQ(rm, p, 4)) + return(-1); + + rr->rr_v3.xu_str.xs_len = p++; + rr->rr_v3.xu_str.xs_str = (char *)p; + + xdr = B(rr->rr_v3.xu_xslen); + + /* A null string indicates an unregistered service */ + if ((xdr == 0) && RPCB_BUF_EQ(rm, p, 0)) + return(0); + + /* Decode the target IP address / port. */ + if (ippr_rpcb_getuaddr(rm, &rr->rr_v3, &p) != 0) + return(-1); + + /* Validate the IP address and port contained. */ + if (nat->nat_inip.s_addr != rr->rr_v3.xu_ip) + return(-1); + + /* Create NAT & state table entries. */ + if (ippr_rpcb_getnat(fin, nat, rx->rx_proto, + (u_int)rr->rr_v3.xu_port) != 0) + return(-1); + break; + case RPCB_RES_LIST: + if (!RPCB_BUF_GEQ(rm, p, 4)) + return(-1); + /* rpcb_entry_list_ptr */ + switch(B(p)) + { + case 0: + return(0); + case 1: + break; + default: + return(-1); + } + rl = &rr->rr_v4; + rl->rl_list = p++; + cnt = 0; + + for(;;) { + re = &rl->rl_entries[rl->rl_cnt]; + if (ippr_rpcb_getuaddr(rm, &re->re_maddr, &p) != 0) + return(-1); + if (ippr_rpcb_getproto(rm, &re->re_netid, &p) != 0) + return(-1); + /* re_semantics & re_pfamily length */ + if (!RPCB_BUF_GEQ(rm, p, 12)) + return(-1); + p++; /* Skipping re_semantics. */ + xdr = B(p++); + if ((xdr != 4) || strncmp((char *)p, "inet", 4)) + return(-1); + p++; + if (ippr_rpcb_getproto(rm, &re->re_proto, &p) != 0) + return(-1); + if (!RPCB_BUF_GEQ(rm, p, 4)) + return(-1); + re->re_more = p; + if (B(re->re_more) > 1) /* 0,1 only legal values */ + return(-1); + ++rl->rl_cnt; + ++cnt; + if (B(re->re_more) == 0) + break; + /* Replies in max out at 2; TCP and/or UDP */ + if (cnt > 2) + return(-1); + p++; + } + + for(rl->rl_cnt = 0; rl->rl_cnt < cnt; rl->rl_cnt++) { + re = &rl->rl_entries[rl->rl_cnt]; + rv = ippr_rpcb_getnat(fin, nat, + re->re_proto.xp_proto, + (u_int)re->re_maddr.xu_port); + if (rv != 0) + return(-1); + } + break; + default: + /*CONSTANTCONDITION*/ + IPF_PANIC(1, ("illegal rx_type %d", rx->rx_type)); + } + + return(1); +} + +/* -------------------------------------------------------------------- */ +/* Function: ippr_rpcb_lookup */ +/* Returns: rpcb_xact_t * - NULL == no matching record, */ +/* else pointer to relevant entry */ +/* Parameters: rs(I) - pointer to RPCB session */ +/* xid(I) - XID to look for */ +/* -------------------------------------------------------------------- */ +static rpcb_xact_t * +ippr_rpcb_lookup(rs, xid) + rpcb_session_t *rs; + u_32_t xid; +{ + rpcb_xact_t *rx; + + if (rs->rs_rxlist == NULL) + return(NULL); + + for (rx = rs->rs_rxlist; rx != NULL; rx = rx->rx_next) + if (rx->rx_xid == xid) + break; + + return(rx); +} + +/* -------------------------------------------------------------------- */ +/* Function: ippr_rpcb_deref */ +/* Returns: (void) */ +/* Parameters: rs(I) - pointer to RPCB session */ +/* rx(I) - pointer to RPC transaction struct to remove */ +/* force(I) - indicates to delete entry regardless of */ +/* reference count */ +/* Locking: rs->rs_rxlock must be held write only */ +/* */ +/* Free the RPCB transaction record rx from the chain of entries. */ +/* -------------------------------------------------------------------- */ +static void +ippr_rpcb_deref(rs, rx) + rpcb_session_t *rs; + rpcb_xact_t *rx; +{ + rs = rs; /* LINT */ + + if (rx == NULL) + return; + + if (--rx->rx_ref != 0) + return; + + if (rx->rx_next != NULL) + rx->rx_next->rx_pnext = rx->rx_pnext; + + *rx->rx_pnext = rx->rx_next; + + KFREE(rx); + + --rpcbcnt; +} + +/* -------------------------------------------------------------------- */ +/* Function: ippr_rpcb_getproto */ +/* Returns: int - -1 == illegal protocol/netid, */ +/* 0 == legal protocol/netid */ +/* Parameters: rm(I) - pointer to RPC message structure */ +/* xp(I) - pointer to netid structure */ +/* p(IO) - pointer to location within packet buffer */ +/* */ +/* Decode netid/proto stored at p and record its numeric value. */ +/* -------------------------------------------------------------------- */ +static int +ippr_rpcb_getproto(rm, xp, p) + rpc_msg_t *rm; + xdr_proto_t *xp; + u_32_t **p; +{ + u_int len; + + /* Must have 4 bytes for length & 4 bytes for "tcp" or "udp". */ + if (!RPCB_BUF_GEQ(rm, p, 8)) + return(-1); + + xp->xp_xslen = (*p)++; + xp->xp_xsstr = (char *)*p; + + /* Test the string length. */ + len = B(xp->xp_xslen); + if (len != 3) + return(-1); + + /* Test the actual string & record the protocol accordingly. */ + if (!strncmp((char *)xp->xp_xsstr, "tcp\0", 4)) + xp->xp_proto = IPPROTO_TCP; + else if (!strncmp((char *)xp->xp_xsstr, "udp\0", 4)) + xp->xp_proto = IPPROTO_UDP; + else { + return(-1); + } + + /* Advance past the string. */ + (*p)++; + + return(0); +} + +/* -------------------------------------------------------------------- */ +/* Function: ippr_rpcb_getnat */ +/* Returns: int -- -1 == failed to create table entries, */ +/* 0 == success */ +/* Parameters: fin(I) - pointer to packet information */ +/* nat(I) - pointer to NAT table entry */ +/* proto(I) - transport protocol for new entries */ +/* port(I) - new port to use w/ wildcard table entries */ +/* */ +/* Create state and NAT entries to handle an anticipated connection */ +/* attempt between RPC client and server. */ +/* -------------------------------------------------------------------- */ +static int +ippr_rpcb_getnat(fin, nat, proto, port) + fr_info_t *fin; + nat_t *nat; + u_int proto; + u_int port; +{ + ipnat_t *ipn, ipnat; + tcphdr_t tcp; + ipstate_t *is; + fr_info_t fi; + nat_t *natl; + int nflags; + + ipn = nat->nat_ptr; + + /* Generate dummy fr_info */ + bcopy((char *)fin, (char *)&fi, sizeof(fi)); + fi.fin_out = 0; + fi.fin_src = fin->fin_dst; + fi.fin_dst = nat->nat_outip; + fi.fin_p = proto; + fi.fin_sport = 0; + fi.fin_dport = port & 0xffff; + fi.fin_flx |= FI_IGNORE; + + bzero((char *)&tcp, sizeof(tcp)); + tcp.th_dport = htons(port); + + if (proto == IPPROTO_TCP) { + tcp.th_win = htons(8192); + TCP_OFF_A(&tcp, sizeof(tcphdr_t) >> 2); + fi.fin_dlen = sizeof(tcphdr_t); + tcp.th_flags = TH_SYN; + nflags = NAT_TCP; + } else { + fi.fin_dlen = sizeof(udphdr_t); + nflags = NAT_UDP; + } + + nflags |= SI_W_SPORT|NAT_SEARCH; + fi.fin_dp = &tcp; + fi.fin_plen = fi.fin_hlen + fi.fin_dlen; + + /* + * Search for existing NAT & state entries. Pay close attention to + * mutexes / locks grabbed from lookup routines, as not doing so could + * lead to bad things. + * + * If successful, fr_stlookup returns with ipf_state locked. We have + * no use for this lock, so simply unlock it if necessary. + */ + is = fr_stlookup(&fi, &tcp, NULL); + if (is != NULL) + RWLOCK_EXIT(&ipf_state); + + RWLOCK_EXIT(&ipf_nat); + + WRITE_ENTER(&ipf_nat); + natl = nat_inlookup(&fi, nflags, proto, fi.fin_src, fi.fin_dst); + + if ((natl != NULL) && (is != NULL)) { + MUTEX_DOWNGRADE(&ipf_nat); + return(0); + } + + /* Slightly modify the following structures for actual use in creating + * NAT and/or state entries. We're primarily concerned with stripping + * flags that may be detrimental to the creation process or simply + * shouldn't be associated with a table entry. + */ + fi.fin_fr = &rpcbfr; + fi.fin_flx &= ~FI_IGNORE; + nflags &= ~NAT_SEARCH; + + if (natl == NULL) { + /* XXX Since we're just copying the original ipn contents + * back, would we be better off just sending a pointer to + * the 'temp' copy off to nat_new instead? + */ + /* Generate template/bogus NAT rule. */ + bcopy((char *)ipn, (char *)&ipnat, sizeof(ipnat)); + ipn->in_flags = nflags & IPN_TCPUDP; + ipn->in_apr = NULL; + ipn->in_p = proto; + ipn->in_pmin = htons(fi.fin_dport); + ipn->in_pmax = htons(fi.fin_dport); + ipn->in_pnext = htons(fi.fin_dport); + ipn->in_space = 1; + ipn->in_ippip = 1; + if (ipn->in_flags & IPN_FILTER) { + ipn->in_scmp = 0; + ipn->in_dcmp = 0; + } + *ipn->in_plabel = '\0'; + + /* Create NAT entry. return NULL if this fails. */ + natl = nat_new(&fi, ipn, NULL, nflags|SI_CLONE|NAT_SLAVE, + NAT_INBOUND); + + bcopy((char *)&ipnat, (char *)ipn, sizeof(ipnat)); + + if (natl == NULL) { + MUTEX_DOWNGRADE(&ipf_nat); + return(-1); + } + + ipn->in_use++; + (void) nat_proto(&fi, natl, nflags); + nat_update(&fi, natl, natl->nat_ptr); + } + MUTEX_DOWNGRADE(&ipf_nat); + + if (is == NULL) { + /* Create state entry. Return NULL if this fails. */ + fi.fin_dst = nat->nat_inip; + fi.fin_nat = (void *)natl; + fi.fin_flx |= FI_NATED; + fi.fin_flx &= ~FI_STATE; + nflags &= NAT_TCPUDP; + nflags |= SI_W_SPORT|SI_CLONE; + + is = fr_addstate(&fi, NULL, nflags); + if (is == NULL) { + /* + * XXX nat_delete is private to ip_nat.c. Should + * check w/ Darren about this one. + * + * nat_delete(natl, NL_EXPIRE); + */ + return(-1); + } + if (fi.fin_state != NULL) + fr_statederef(&fi, (ipstate_t **)&fi.fin_state); + } + + return(0); +} + +/* -------------------------------------------------------------------- */ +/* Function: ippr_rpcb_modv3 */ +/* Returns: int -- change in packet length */ +/* Parameters: fin(I) - pointer to packet information */ +/* nat(I) - pointer to NAT session */ +/* rm(I) - pointer to RPC message structure */ +/* m(I) - pointer to mbuf chain */ +/* off(I) - offset within mbuf chain */ +/* */ +/* Write a new universal address string to this packet, adjusting */ +/* lengths as necessary. */ +/* -------------------------------------------------------------------- */ +static int +ippr_rpcb_modv3(fin, nat, rm, m, off) + fr_info_t *fin; + nat_t *nat; + rpc_msg_t *rm; + mb_t *m; + u_int off; +{ + u_int len, xlen, pos, bogo; + rpc_resp_t *rr; + char uaddr[24]; + char *i, *p; + int diff; + + rr = &rm->rm_resp; + i = (char *)&nat->nat_outip.s_addr; + p = (char *)&rr->rr_v3.xu_port; + + /* Form new string. */ + bzero(uaddr, sizeof(uaddr)); /* Just in case we need padding. */ +#if defined(SNPRINTF) && defined(_KERNEL) + (void) SNPRINTF(uaddr, sizeof(uaddr), +#else + (void) sprintf(uaddr, +#endif + "%u.%u.%u.%u.%u.%u", i[0] & 0xff, i[1] & 0xff, + i[2] & 0xff, i[3] & 0xff, p[0] & 0xff, p[1] & 0xff); + len = strlen(uaddr); + xlen = XDRALIGN(len); + + /* Determine mbuf offset to write to. */ + pos = (char *)rr->rr_v3.xu_xslen - rm->rm_msgbuf; + off += pos; + + /* Write new string length. */ + bogo = htonl(len); + COPYBACK(m, off, 4, (caddr_t)&bogo); + off += 4; + + /* Write new string. */ + COPYBACK(m, off, xlen, uaddr); + + /* Determine difference in data lengths. */ + diff = xlen - XDRALIGN(B(rr->rr_v3.xu_xslen)); + + /* + * If our new string has a different length, make necessary + * adjustments. + */ + if (diff != 0) + ippr_rpcb_fixlen(fin, diff); + + return(diff); +} + +/* -------------------------------------------------------------------- */ +/* Function: ippr_rpcb_modv4 */ +/* Returns: int -- change in packet length */ +/* Parameters: fin(I) - pointer to packet information */ +/* nat(I) - pointer to NAT session */ +/* rm(I) - pointer to RPC message structure */ +/* m(I) - pointer to mbuf chain */ +/* off(I) - offset within mbuf chain */ +/* */ +/* Write new rpcb_entry list, adjusting lengths as necessary. */ +/* -------------------------------------------------------------------- */ +static int +ippr_rpcb_modv4(fin, nat, rm, m, off) + fr_info_t *fin; + nat_t *nat; + rpc_msg_t *rm; + mb_t *m; + u_int off; +{ + u_int len, xlen, pos, bogo; + rpcb_listp_t *rl; + rpcb_entry_t *re; + rpc_resp_t *rr; + char uaddr[24]; + int diff, cnt; + char *i, *p; + + diff = 0; + rr = &rm->rm_resp; + rl = &rr->rr_v4; + + i = (char *)&nat->nat_outip.s_addr; + + /* Determine mbuf offset to write to. */ + re = &rl->rl_entries[0]; + pos = (char *)re->re_maddr.xu_xslen - rm->rm_msgbuf; + off += pos; + + for (cnt = 0; cnt < rl->rl_cnt; cnt++) { + re = &rl->rl_entries[cnt]; + p = (char *)&re->re_maddr.xu_port; + + /* Form new string. */ + bzero(uaddr, sizeof(uaddr)); /* Just in case we need + padding. */ +#if defined(SNPRINTF) && defined(_KERNEL) + (void) SNPRINTF(uaddr, sizeof(uaddr), +#else + (void) sprintf(uaddr, +#endif + "%u.%u.%u.%u.%u.%u", i[0] & 0xff, + i[1] & 0xff, i[2] & 0xff, i[3] & 0xff, + p[0] & 0xff, p[1] & 0xff); + len = strlen(uaddr); + xlen = XDRALIGN(len); + + /* Write new string length. */ + bogo = htonl(len); + COPYBACK(m, off, 4, (caddr_t)&bogo); + off += 4; + + /* Write new string. */ + COPYBACK(m, off, xlen, uaddr); + off += xlen; + + /* Record any change in length. */ + diff += xlen - XDRALIGN(B(re->re_maddr.xu_xslen)); + + /* If the length changed, copy back the rest of this entry. */ + len = ((char *)re->re_more + 4) - + (char *)re->re_netid.xp_xslen; + if (diff != 0) { + COPYBACK(m, off, len, (caddr_t)re->re_netid.xp_xslen); + } + off += len; + } + + /* + * If our new string has a different length, make necessary + * adjustments. + */ + if (diff != 0) + ippr_rpcb_fixlen(fin, diff); + + return(diff); +} + + +/* -------------------------------------------------------------------- */ +/* Function: ippr_rpcb_fixlen */ +/* Returns: (void) */ +/* Parameters: fin(I) - pointer to packet information */ +/* len(I) - change in packet length */ +/* */ +/* Adjust various packet related lengths held in structure and packet */ +/* header fields. */ +/* -------------------------------------------------------------------- */ +static void +ippr_rpcb_fixlen(fin, len) + fr_info_t *fin; + int len; +{ + udphdr_t *udp; + + udp = fin->fin_dp; + udp->uh_ulen = htons(ntohs(udp->uh_ulen) + len); + fin->fin_ip->ip_len += len; + fin->fin_dlen += len; + fin->fin_plen += len; +} + +#undef B diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_state.h b/usr/src/uts/common/inet/ipf/netinet/ip_state.h new file mode 100644 index 0000000000..7b5891d86d --- /dev/null +++ b/usr/src/uts/common/inet/ipf/netinet/ip_state.h @@ -0,0 +1,267 @@ +/* + * Copyright (C) 1995-2001 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * @(#)ip_state.h 1.3 1/12/96 (C) 1995 Darren Reed + * $Id: ip_state.h,v 2.68.2.5 2005/08/11 19:58:04 darrenr Exp $ + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifndef __IP_STATE_H__ +#define __IP_STATE_H__ + +#if defined(__STDC__) || defined(__GNUC__) || defined(_AIX51) +# define SIOCDELST _IOW('r', 61, struct ipfobj) +#else +# define SIOCDELST _IOW(r, 61, struct ipfobj) +#endif + +struct ipscan; + +#ifndef IPSTATE_SIZE +# define IPSTATE_SIZE 5737 +#endif +#ifndef IPSTATE_MAX +# define IPSTATE_MAX 4013 /* Maximum number of states held */ +#endif + +#define PAIRS(s1,d1,s2,d2) ((((s1) == (s2)) && ((d1) == (d2))) ||\ + (((s1) == (d2)) && ((d1) == (s2)))) +#define IPPAIR(s1,d1,s2,d2) PAIRS((s1).s_addr, (d1).s_addr, \ + (s2).s_addr, (d2).s_addr) + + +typedef struct ipstate { + ipfmutex_t is_lock; + struct ipstate *is_next; + struct ipstate **is_pnext; + struct ipstate *is_hnext; + struct ipstate **is_phnext; + struct ipstate **is_me; + void *is_ifp[4]; + void *is_sync; + struct nat *is_nat[2]; + frentry_t *is_rule; + struct ipftq *is_tqehead[2]; + struct ipscan *is_isc; + U_QUAD_T is_pkts[4]; + U_QUAD_T is_bytes[4]; + U_QUAD_T is_icmppkts[4]; + struct ipftqent is_sti; + u_int is_frage[2]; + int is_ref; /* reference count */ + int is_isninc[2]; + u_short is_sumd[2]; + i6addr_t is_src; + i6addr_t is_dst; + u_int is_pass; + u_char is_p; /* Protocol */ + u_char is_v; + u_32_t is_hv; + u_32_t is_tag; + u_32_t is_opt[2]; /* packet options set */ + /* in both directions */ + u_32_t is_optmsk[2]; /* " " mask */ + /* in both directions */ + u_short is_sec; /* security options set */ + u_short is_secmsk; /* " " mask */ + u_short is_auth; /* authentication options set */ + u_short is_authmsk; /* " " mask */ + union { + icmpinfo_t is_ics; + tcpinfo_t is_ts; + udpinfo_t is_us; + greinfo_t is_ug; + } is_ps; + u_32_t is_flags; + int is_flx[2][2]; + u_32_t is_rulen; /* rule number when created */ + u_32_t is_s0[2]; + u_short is_smsk[2]; + char is_group[FR_GROUPLEN]; + char is_sbuf[2][16]; + char is_ifname[4][LIFNAMSIZ]; +} ipstate_t; + +#define is_die is_sti.tqe_die +#define is_state is_sti.tqe_state +#define is_touched is_sti.tqe_touched +#define is_saddr is_src.in4.s_addr +#define is_daddr is_dst.in4.s_addr +#define is_icmp is_ps.is_ics +#define is_type is_icmp.ici_type +#define is_code is_icmp.ici_code +#define is_tcp is_ps.is_ts +#define is_udp is_ps.is_us +#define is_send is_tcp.ts_data[0].td_end +#define is_dend is_tcp.ts_data[1].td_end +#define is_maxswin is_tcp.ts_data[0].td_maxwin +#define is_maxdwin is_tcp.ts_data[1].td_maxwin +#define is_maxsend is_tcp.ts_data[0].td_maxend +#define is_maxdend is_tcp.ts_data[1].td_maxend +#define is_swinscale is_tcp.ts_data[0].td_winscale +#define is_dwinscale is_tcp.ts_data[1].td_winscale +#define is_swinflags is_tcp.ts_data[0].td_winflags +#define is_dwinflags is_tcp.ts_data[1].td_winflags +#define is_sport is_tcp.ts_sport +#define is_dport is_tcp.ts_dport +#define is_ifpin is_ifp[0] +#define is_ifpout is_ifp[2] +#define is_gre is_ps.is_ug +#define is_call is_gre.gs_call + +#define IS_WSPORT SI_W_SPORT /* 0x00100 */ +#define IS_WDPORT SI_W_DPORT /* 0x00200 */ +#define IS_WSADDR SI_W_SADDR /* 0x00400 */ +#define IS_WDADDR SI_W_DADDR /* 0x00800 */ +#define IS_NEWFR SI_NEWFR /* 0x01000 */ +#define IS_CLONE SI_CLONE /* 0x02000 */ +#define IS_CLONED SI_CLONED /* 0x04000 */ +#define IS_TCPFSM 0x10000 +#define IS_STRICT 0x20000 +#define IS_ISNSYN 0x40000 +#define IS_ISNACK 0x80000 +#define IS_STATESYNC 0x100000 +/* + * IS_SC flags are for scan-operations that need to be recognised in state. + */ +#define IS_SC_CLIENT 0x10000000 +#define IS_SC_SERVER 0x20000000 +#define IS_SC_MATCHC 0x40000000 +#define IS_SC_MATCHS 0x80000000 +#define IS_SC_MATCHALL (IS_SC_MATCHC|IS_SC_MATCHC) +#define IS_SC_ALL (IS_SC_MATCHC|IS_SC_MATCHC|IS_SC_CLIENT|IS_SC_SERVER) + +/* + * Flags that can be passed into fr_addstate + */ +#define IS_INHERITED 0x0fffff00 + +#define TH_OPENING (TH_SYN|TH_ACK) +/* + * is_flags: + * Bits 0 - 3 are use as a mask with the current packet's bits to check for + * whether it is short, tcp/udp, a fragment or the presence of IP options. + * Bits 4 - 7 are set from the initial packet and contain what the packet + * anded with bits 0-3 must match. + * Bits 8,9 are used to indicate wildcard source/destination port matching. + * Bits 10,11 are reserved for other wildcard flag compatibility. + * Bits 12,13 are for scaning. + */ + +typedef struct ipstate_save { + void *ips_next; + struct ipstate ips_is; + struct frentry ips_fr; +} ipstate_save_t; + +#define ips_rule ips_is.is_rule + + +typedef struct ipslog { + U_QUAD_T isl_pkts[4]; + U_QUAD_T isl_bytes[4]; + i6addr_t isl_src; + i6addr_t isl_dst; + u_32_t isl_tag; + u_short isl_type; + union { + u_short isl_filler[2]; + u_short isl_ports[2]; + u_short isl_icmp; + } isl_ps; + u_char isl_v; + u_char isl_p; + u_char isl_flags; + u_char isl_state[2]; + u_32_t isl_rulen; + char isl_group[FR_GROUPLEN]; +} ipslog_t; + +#define isl_sport isl_ps.isl_ports[0] +#define isl_dport isl_ps.isl_ports[1] +#define isl_itype isl_ps.isl_icmp + +#define ISL_NEW 0 +#define ISL_CLONE 1 +#define ISL_EXPIRE 0xffff +#define ISL_FLUSH 0xfffe +#define ISL_REMOVE 0xfffd +#define ISL_INTERMEDIATE 0xfffc +#define ISL_KILLED 0xfffb +#define ISL_ORPHAN 0xfffa + + +typedef struct ips_stat { + u_long iss_hits; + u_long iss_miss; + u_long iss_max; + u_long iss_maxref; + u_long iss_tcp; + u_long iss_udp; + u_long iss_icmp; + u_long iss_nomem; + u_long iss_expire; + u_long iss_fin; + u_long iss_active; + u_long iss_logged; + u_long iss_logfail; + u_long iss_inuse; + u_long iss_wild; + u_long iss_killed; + u_long iss_ticks; + u_long iss_bucketfull; + int iss_statesize; + int iss_statemax; + ipstate_t **iss_table; + ipstate_t *iss_list; + u_long *iss_bucketlen; +} ips_stat_t; + + +extern u_long fr_tcpidletimeout; +extern u_long fr_tcpclosewait; +extern u_long fr_tcplastack; +extern u_long fr_tcptimeout; +extern u_long fr_tcpclosed; +extern u_long fr_tcphalfclosed; +extern u_long fr_udptimeout; +extern u_long fr_udpacktimeout; +extern u_long fr_icmptimeout; +extern u_long fr_icmpacktimeout; +extern u_long fr_iptimeout; +extern int fr_statemax; +extern int fr_statesize; +extern int fr_state_lock; +extern int fr_state_maxbucket; +extern int fr_state_maxbucket_reset; +extern ipstate_t *ips_list; +extern ipftq_t *ips_utqe; +extern ipftq_t ips_tqtqb[IPF_TCP_NSTATES]; + +extern int fr_stateinit __P((void)); +extern ipstate_t *fr_addstate __P((fr_info_t *, ipstate_t **, u_int)); +extern frentry_t *fr_checkstate __P((struct fr_info *, u_32_t *)); +extern ipstate_t *fr_stlookup __P((fr_info_t *, tcphdr_t *, ipftq_t **)); +extern void fr_statesync __P((void *)); +extern void fr_timeoutstate __P((void)); +extern int fr_tcp_age __P((struct ipftqent *, struct fr_info *, + struct ipftq *, int)); +extern int fr_tcpinwindow __P((struct fr_info *, struct tcpdata *, + struct tcpdata *, tcphdr_t *, int)); +extern void fr_stateunload __P((void)); +extern void ipstate_log __P((struct ipstate *, u_int)); +extern int fr_state_ioctl __P((caddr_t, ioctlcmd_t, int)); +extern void fr_stinsert __P((struct ipstate *, int)); +extern void fr_sttab_init __P((struct ipftq *)); +extern void fr_sttab_destroy __P((struct ipftq *)); +extern void fr_updatestate __P((fr_info_t *, ipstate_t *, ipftq_t *)); +extern void fr_statederef __P((fr_info_t *, ipstate_t **)); +extern void fr_setstatequeue __P((ipstate_t *, int)); + +#endif /* __IP_STATE_H__ */ diff --git a/usr/src/uts/common/inet/ipf/netinet/ipl.h b/usr/src/uts/common/inet/ipf/netinet/ipl.h new file mode 100644 index 0000000000..df2c0b4f3b --- /dev/null +++ b/usr/src/uts/common/inet/ipf/netinet/ipl.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 1993-2001, 2003 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * @(#)ipl.h 1.21 6/5/96 + * $Id: ipl.h,v 2.52.2.10 2005/08/13 05:42:49 darrenr Exp $ + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + + +#ifndef __IPL_H__ +#define __IPL_H__ + +#define IPL_VERSION "IP Filter: v4.1.9" + +#define IPFILTER_VERSION 4010900 + +#endif diff --git a/usr/src/uts/common/inet/ipf/opts.h b/usr/src/uts/common/inet/ipf/opts.h new file mode 100644 index 0000000000..d944df6242 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/opts.h @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2000 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * $Id: opts.h,v 2.12 2003/08/14 14:24:27 darrenr Exp $ + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifndef __OPTS_H__ +#define __OPTS_H__ + +#ifndef SOLARIS +#define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4))) +#endif +#define OPT_REMOVE 0x000001 +#define OPT_DEBUG 0x000002 +#define OPT_AUTHSTATS 0x000004 +#define OPT_RAW 0x000008 +#define OPT_LOG 0x000010 +#define OPT_SHOWLIST 0x000020 +#define OPT_VERBOSE 0x000040 +#define OPT_DONOTHING 0x000080 +#define OPT_HITS 0x000100 +#define OPT_BRIEF 0x000200 +#define OPT_ACCNT 0x000400 +#define OPT_FRSTATES 0x000800 +#define OPT_SHOWLINENO 0x001000 +#define OPT_PRINTFR 0x002000 +#define OPT_OUTQUE FR_OUTQUE /* 0x4000 */ +#define OPT_INQUE FR_INQUE /* 0x8000 */ +#define OPT_ZERORULEST 0x010000 +#define OPT_SAVEOUT 0x020000 +#define OPT_IPSTATES 0x040000 +#define OPT_INACTIVE 0x080000 +#define OPT_NAT 0x100000 +#define OPT_GROUPS 0x200000 +#define OPT_STATETOP 0x400000 +#define OPT_FLUSH 0x800000 +#define OPT_CLEAR 0x1000000 +#define OPT_HEX 0x2000000 +#define OPT_ASCII 0x4000000 +#define OPT_NORESOLVE 0x8000000 +#define OPT_UNDEF 0x10000000 + +#define OPT_STAT OPT_FRSTATES +#define OPT_LIST OPT_SHOWLIST + + +#ifndef __P +# ifdef __STDC__ +# define __P(x) x +# else +# define __P(x) () +# endif +#endif + +#if defined(sun) && !SOLARIS +# define STRERROR(x) sys_errlist[x] +extern char *sys_errlist[]; +#else +# define STRERROR(x) strerror(x) +#endif + +extern int opts; + +#endif /* __OPTS_H__ */ diff --git a/usr/src/uts/common/inet/ipf/pfil.conf b/usr/src/uts/common/inet/ipf/pfil.conf deleted file mode 100644 index 018946143b..0000000000 --- a/usr/src/uts/common/inet/ipf/pfil.conf +++ /dev/null @@ -1,2 +0,0 @@ -name="pfil" parent="pseudo" instance=0; - diff --git a/usr/src/uts/common/inet/ipf/radix.c b/usr/src/uts/common/inet/ipf/radix.c new file mode 100644 index 0000000000..69b50c062a --- /dev/null +++ b/usr/src/uts/common/inet/ipf/radix.c @@ -0,0 +1,1206 @@ +/* + * Copyright (c) 1988, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)radix.c 8.6 (Berkeley) 10/17/95 + */ + +/* + * Routines to build and maintain radix trees for routing lookups. + */ +#if defined(KERNEL) || defined(_KERNEL) +# undef KERNEL +# undef _KERNEL +# define KERNEL 1 +# define _KERNEL 1 +#endif +#define __SYS_ATOMIC_OPS_H__ +#if !defined(__svr4__) && !defined(__SVR4) && !defined(__osf__) && \ + !defined(__hpux) && !defined(__sgi) +#include <sys/cdefs.h> +#endif +#ifndef __P +# ifdef __STDC__ +# define __P(x) x +# else +# define __P(x) () +# endif +#endif +#ifdef __osf__ +# define CONST +# define _IPV6_SWTAB_H +# define _PROTO_NET_H_ +# define _PROTO_IPV6_H +# include <sys/malloc.h> +#endif + +#include <sys/param.h> +#ifdef _KERNEL +#include <sys/systm.h> +#else +void panic __P((char *str)); +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#endif +#ifdef __hpux +#include <syslog.h> +#else +#include <sys/syslog.h> +#endif +#include <sys/time.h> +#include <netinet/in.h> +#include <sys/socket.h> +#include <net/if.h> +#include "netinet/ip_compat.h" +#include "netinet/ip_fil.h" +/* END OF INCLUDES */ +#include "radix_ipf.h" +#ifndef min +# define min MIN +#endif +#ifndef max +# define max MAX +#endif + +int max_keylen = 16; +static struct radix_mask *rn_mkfreelist; +static struct radix_node_head *mask_rnhead; +static char *addmask_key; +static u_char normal_chars[] = {0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff}; +static char *rn_zeros = NULL, *rn_ones = NULL; + +#define rn_masktop (mask_rnhead->rnh_treetop) +#undef Bcmp +#define Bcmp(a, b, l) (l == 0 ? 0 : bcmp((caddr_t)(a), (caddr_t)(b), (u_long)l)) + +static int rn_satisfies_leaf __P((char *, struct radix_node *, int)); +static int rn_lexobetter __P((void *, void *)); +static struct radix_mask *rn_new_radix_mask __P((struct radix_node *, + struct radix_mask *)); +static int rn_freenode __P((struct radix_node *, void *)); +#if defined(AIX) && !defined(_KERNEL) +struct radix_node *rn_match __P((void *, struct radix_node_head *)); +struct radix_node *rn_addmask __P((int, int, void *)); +#define FreeS(x, y) KFREES(x, y) +#define Bcopy(x, y, z) bcopy(x, y, z) +#endif + +/* + * The data structure for the keys is a radix tree with one way + * branching removed. The index rn_b at an internal node n represents a bit + * position to be tested. The tree is arranged so that all descendants + * of a node n have keys whose bits all agree up to position rn_b - 1. + * (We say the index of n is rn_b.) + * + * There is at least one descendant which has a one bit at position rn_b, + * and at least one with a zero there. + * + * A route is determined by a pair of key and mask. We require that the + * bit-wise logical and of the key and mask to be the key. + * We define the index of a route to associated with the mask to be + * the first bit number in the mask where 0 occurs (with bit number 0 + * representing the highest order bit). + * + * We say a mask is normal if every bit is 0, past the index of the mask. + * If a node n has a descendant (k, m) with index(m) == index(n) == rn_b, + * and m is a normal mask, then the route applies to every descendant of n. + * If the index(m) < rn_b, this implies the trailing last few bits of k + * before bit b are all 0, (and hence consequently true of every descendant + * of n), so the route applies to all descendants of the node as well. + * + * Similar logic shows that a non-normal mask m such that + * index(m) <= index(n) could potentially apply to many children of n. + * Thus, for each non-host route, we attach its mask to a list at an internal + * node as high in the tree as we can go. + * + * The present version of the code makes use of normal routes in short- + * circuiting an explict mask and compare operation when testing whether + * a key satisfies a normal route, and also in remembering the unique leaf + * that governs a subtree. + */ + +struct radix_node * +rn_search(v_arg, head) + void *v_arg; + struct radix_node *head; +{ + struct radix_node *x; + caddr_t v; + + for (x = head, v = v_arg; x->rn_b >= 0;) { + if (x->rn_bmask & v[x->rn_off]) + x = x->rn_r; + else + x = x->rn_l; + } + return (x); +} + +struct radix_node * +rn_search_m(v_arg, head, m_arg) + struct radix_node *head; + void *v_arg, *m_arg; +{ + struct radix_node *x; + caddr_t v = v_arg, m = m_arg; + + for (x = head; x->rn_b >= 0;) { + if ((x->rn_bmask & m[x->rn_off]) && + (x->rn_bmask & v[x->rn_off])) + x = x->rn_r; + else + x = x->rn_l; + } + return x; +} + +int +rn_refines(m_arg, n_arg) + void *m_arg, *n_arg; +{ + caddr_t m = m_arg, n = n_arg; + caddr_t lim, lim2 = lim = n + *(u_char *)n; + int longer = (*(u_char *)n++) - (int)(*(u_char *)m++); + int masks_are_equal = 1; + + if (longer > 0) + lim -= longer; + while (n < lim) { + if (*n & ~(*m)) + return 0; + if (*n++ != *m++) + masks_are_equal = 0; + } + while (n < lim2) + if (*n++) + return 0; + if (masks_are_equal && (longer < 0)) + for (lim2 = m - longer; m < lim2; ) + if (*m++) + return 1; + return (!masks_are_equal); +} + +struct radix_node * +rn_lookup(v_arg, m_arg, head) + void *v_arg, *m_arg; + struct radix_node_head *head; +{ + struct radix_node *x; + caddr_t netmask = 0; + + if (m_arg) { + if ((x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_off)) == 0) + return (0); + netmask = x->rn_key; + } + x = rn_match(v_arg, head); + if (x && netmask) { + while (x && x->rn_mask != netmask) + x = x->rn_dupedkey; + } + return x; +} + +static int +rn_satisfies_leaf(trial, leaf, skip) + char *trial; + struct radix_node *leaf; + int skip; +{ + char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask; + char *cplim; + int length = min(*(u_char *)cp, *(u_char *)cp2); + + if (cp3 == 0) + cp3 = rn_ones; + else + length = min(length, *(u_char *)cp3); + cplim = cp + length; + cp3 += skip; + cp2 += skip; + for (cp += skip; cp < cplim; cp++, cp2++, cp3++) + if ((*cp ^ *cp2) & *cp3) + return 0; + return 1; +} + +struct radix_node * +rn_match(v_arg, head) + void *v_arg; + struct radix_node_head *head; +{ + caddr_t v = v_arg; + struct radix_node *t = head->rnh_treetop, *x; + caddr_t cp = v, cp2; + caddr_t cplim; + struct radix_node *saved_t, *top = t; + int off = t->rn_off, vlen = *(u_char *)cp, matched_off; + int test, b, rn_b; + + /* + * Open code rn_search(v, top) to avoid overhead of extra + * subroutine call. + */ + for (; t->rn_b >= 0; ) { + if (t->rn_bmask & cp[t->rn_off]) + t = t->rn_r; + else + t = t->rn_l; + } + /* + * See if we match exactly as a host destination + * or at least learn how many bits match, for normal mask finesse. + * + * It doesn't hurt us to limit how many bytes to check + * to the length of the mask, since if it matches we had a genuine + * match and the leaf we have is the most specific one anyway; + * if it didn't match with a shorter length it would fail + * with a long one. This wins big for class B&C netmasks which + * are probably the most common case... + */ + if (t->rn_mask) + vlen = *(u_char *)t->rn_mask; + cp += off; + cp2 = t->rn_key + off; + cplim = v + vlen; + for (; cp < cplim; cp++, cp2++) + if (*cp != *cp2) + goto on1; + /* + * This extra grot is in case we are explicitly asked + * to look up the default. Ugh! + */ + if ((t->rn_flags & RNF_ROOT) && t->rn_dupedkey) + t = t->rn_dupedkey; + return t; +on1: + test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */ + for (b = 7; (test >>= 1) > 0;) + b--; + matched_off = cp - v; + b += matched_off << 3; + rn_b = -1 - b; + /* + * If there is a host route in a duped-key chain, it will be first. + */ + if ((saved_t = t)->rn_mask == 0) + t = t->rn_dupedkey; + for (; t; t = t->rn_dupedkey) + /* + * Even if we don't match exactly as a host, + * we may match if the leaf we wound up at is + * a route to a net. + */ + if (t->rn_flags & RNF_NORMAL) { + if (rn_b <= t->rn_b) + return t; + } else if (rn_satisfies_leaf(v, t, matched_off)) + return t; + t = saved_t; + /* start searching up the tree */ + do { + struct radix_mask *m; + t = t->rn_p; + m = t->rn_mklist; + if (m) { + /* + * If non-contiguous masks ever become important + * we can restore the masking and open coding of + * the search and satisfaction test and put the + * calculation of "off" back before the "do". + */ + do { + if (m->rm_flags & RNF_NORMAL) { + if (rn_b <= m->rm_b) + return (m->rm_leaf); + } else { + off = min(t->rn_off, matched_off); + x = rn_search_m(v, t, m->rm_mask); + while (x && x->rn_mask != m->rm_mask) + x = x->rn_dupedkey; + if (x && rn_satisfies_leaf(v, x, off)) + return x; + } + m = m->rm_mklist; + } while (m); + } + } while (t != top); + return 0; +} + +#ifdef RN_DEBUG +int rn_nodenum; +struct radix_node *rn_clist; +int rn_saveinfo; +int rn_debug = 1; +#endif + +struct radix_node * +rn_newpair(v, b, nodes) + void *v; + int b; + struct radix_node nodes[2]; +{ + struct radix_node *tt = nodes, *t = tt + 1; + t->rn_b = b; + t->rn_bmask = 0x80 >> (b & 7); + t->rn_l = tt; + t->rn_off = b >> 3; + tt->rn_b = -1; + tt->rn_key = (caddr_t)v; + tt->rn_p = t; + tt->rn_flags = t->rn_flags = RNF_ACTIVE; +#ifdef RN_DEBUG + tt->rn_info = rn_nodenum++; + t->rn_info = rn_nodenum++; + tt->rn_twin = t; + tt->rn_ybro = rn_clist; + rn_clist = tt; +#endif + return t; +} + +struct radix_node * +rn_insert(v_arg, head, dupentry, nodes) + void *v_arg; + struct radix_node_head *head; + int *dupentry; + struct radix_node nodes[2]; +{ + caddr_t v = v_arg; + struct radix_node *top = head->rnh_treetop; + int head_off = top->rn_off, vlen = (int)*((u_char *)v); + struct radix_node *t = rn_search(v_arg, top); + caddr_t cp = v + head_off; + int b; + struct radix_node *tt; + +#ifdef RN_DEBUG + if (rn_debug) + log(LOG_DEBUG, "rn_insert(%p,%p,%p,%p)\n", v_arg, head, dupentry, nodes); +#endif + /* + * Find first bit at which v and t->rn_key differ + */ + { + caddr_t cp2 = t->rn_key + head_off; + int cmp_res; + caddr_t cplim = v + vlen; + + while (cp < cplim) + if (*cp2++ != *cp++) + goto on1; + *dupentry = 1; + return t; +on1: + *dupentry = 0; + cmp_res = (cp[-1] ^ cp2[-1]) & 0xff; + for (b = (cp - v) << 3; cmp_res; b--) + cmp_res >>= 1; + } + { + struct radix_node *p, *x = top; + cp = v; + do { + p = x; + if (cp[x->rn_off] & x->rn_bmask) + x = x->rn_r; + else + x = x->rn_l; + } while (b > (unsigned) x->rn_b); /* x->rn_b < b && x->rn_b >= 0 */ +#ifdef RN_DEBUG + if (rn_debug) + log(LOG_DEBUG, "rn_insert: Going In:\n"); // traverse(p); +#endif + t = rn_newpair(v_arg, b, nodes); + tt = t->rn_l; + if ((cp[p->rn_off] & p->rn_bmask) == 0) + p->rn_l = t; + else + p->rn_r = t; + x->rn_p = t; + t->rn_p = p; /* frees x, p as temp vars below */ + if ((cp[t->rn_off] & t->rn_bmask) == 0) { + t->rn_r = x; + } else { + t->rn_r = tt; + t->rn_l = x; + } +#ifdef RN_DEBUG + if (rn_debug) + log(LOG_DEBUG, "rn_insert: Coming Out:\n"); // traverse(p); +#endif + } + return (tt); +} + +struct radix_node * +rn_addmask(n_arg, search, skip) + int search, skip; + void *n_arg; +{ + caddr_t netmask = (caddr_t)n_arg; + struct radix_node *x; + caddr_t cp, cplim; + int b = 0, mlen, j; + int maskduplicated, m0, isnormal; + struct radix_node *saved_x; + static int last_zeroed = 0; + +#ifdef RN_DEBUG + if (rn_debug) + log(LOG_DEBUG, "rn_addmask(%p,%d,%d)\n", n_arg, search, skip); +#endif + mlen = *(u_char *)netmask; + if ((mlen = *(u_char *)netmask) > max_keylen) + mlen = max_keylen; + if (skip == 0) + skip = 1; + if (mlen <= skip) + return (mask_rnhead->rnh_nodes); + if (skip > 1) + Bcopy(rn_ones + 1, addmask_key + 1, skip - 1); + if ((m0 = mlen) > skip) + Bcopy(netmask + skip, addmask_key + skip, mlen - skip); + /* + * Trim trailing zeroes. + */ + for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;) + cp--; + mlen = cp - addmask_key; + if (mlen <= skip) { + if (m0 >= last_zeroed) + last_zeroed = mlen; + return (mask_rnhead->rnh_nodes); + } + if (m0 < last_zeroed) + Bzero(addmask_key + m0, last_zeroed - m0); + *addmask_key = last_zeroed = mlen; + x = rn_search(addmask_key, rn_masktop); + if (Bcmp(addmask_key, x->rn_key, mlen) != 0) + x = 0; + if (x || search) + return (x); + R_Malloc(x, struct radix_node *, max_keylen + 2 * sizeof (*x)); + if ((saved_x = x) == 0) + return (0); + Bzero(x, max_keylen + 2 * sizeof (*x)); + netmask = cp = (caddr_t)(x + 2); + Bcopy(addmask_key, cp, mlen); + x = rn_insert(cp, mask_rnhead, &maskduplicated, x); + if (maskduplicated) { +#if 0 + log(LOG_ERR, "rn_addmask: mask impossibly already in tree\n"); +#endif + Free(saved_x); + return (x); + } + /* + * Calculate index of mask, and check for normalcy. + */ + cplim = netmask + mlen; + isnormal = 1; + for (cp = netmask + skip; (cp < cplim) && *(u_char *)cp == 0xff;) + cp++; + if (cp != cplim) { + for (j = 0x80; (j & *cp) != 0; j >>= 1) + b++; + if (*cp != normal_chars[b] || cp != (cplim - 1)) + isnormal = 0; + } + b += (cp - netmask) << 3; + x->rn_b = -1 - b; + if (isnormal) + x->rn_flags |= RNF_NORMAL; + return (x); +} + +static int /* XXX: arbitrary ordering for non-contiguous masks */ +rn_lexobetter(m_arg, n_arg) + void *m_arg, *n_arg; +{ + u_char *mp = m_arg, *np = n_arg, *lim; + + if (*mp > *np) + return 1; /* not really, but need to check longer one first */ + if (*mp == *np) + for (lim = mp + *mp; mp < lim;) + if (*mp++ > *np++) + return 1; + return 0; +} + +static struct radix_mask * +rn_new_radix_mask(tt, next) + struct radix_node *tt; + struct radix_mask *next; +{ + struct radix_mask *m; + + MKGet(m); + if (m == 0) { +#if 0 + log(LOG_ERR, "Mask for route not entered\n"); +#endif + return (0); + } + Bzero(m, sizeof *m); + m->rm_b = tt->rn_b; + m->rm_flags = tt->rn_flags; + if (tt->rn_flags & RNF_NORMAL) + m->rm_leaf = tt; + else + m->rm_mask = tt->rn_mask; + m->rm_mklist = next; + tt->rn_mklist = m; + return m; +} + +struct radix_node * +rn_addroute(v_arg, n_arg, head, treenodes) + void *v_arg, *n_arg; + struct radix_node_head *head; + struct radix_node treenodes[2]; +{ + caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg; + struct radix_node *t, *x = NULL, *tt; + struct radix_node *saved_tt, *top = head->rnh_treetop; + short b = 0, b_leaf = 0; + int keyduplicated; + caddr_t mmask; + struct radix_mask *m, **mp; + +#ifdef RN_DEBUG + if (rn_debug) + log(LOG_DEBUG, "rn_addroute(%p,%p,%p,%p)\n", v_arg, n_arg, head, treenodes); +#endif + /* + * In dealing with non-contiguous masks, there may be + * many different routes which have the same mask. + * We will find it useful to have a unique pointer to + * the mask to speed avoiding duplicate references at + * nodes and possibly save time in calculating indices. + */ + if (netmask) { + if ((x = rn_addmask(netmask, 0, top->rn_off)) == 0) + return (0); + b_leaf = x->rn_b; + b = -1 - x->rn_b; + netmask = x->rn_key; + } + /* + * Deal with duplicated keys: attach node to previous instance + */ + saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes); + if (keyduplicated) { + for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) { + if (tt->rn_mask == netmask) + return (0); + if (netmask == 0 || + (tt->rn_mask && + ((b_leaf < tt->rn_b) || /* index(netmask) > node */ + rn_refines(netmask, tt->rn_mask) || + rn_lexobetter(netmask, tt->rn_mask)))) + break; + } + /* + * If the mask is not duplicated, we wouldn't + * find it among possible duplicate key entries + * anyway, so the above test doesn't hurt. + * + * We sort the masks for a duplicated key the same way as + * in a masklist -- most specific to least specific. + * This may require the unfortunate nuisance of relocating + * the head of the list. + * + * We also reverse, or doubly link the list through the + * parent pointer. + */ + if (tt == saved_tt) { + struct radix_node *xx = x; + /* link in at head of list */ + (tt = treenodes)->rn_dupedkey = t; + tt->rn_flags = t->rn_flags; + tt->rn_p = x = t->rn_p; + t->rn_p = tt; + if (x->rn_l == t) + x->rn_l = tt; + else + x->rn_r = tt; + saved_tt = tt; + x = xx; + } else { + (tt = treenodes)->rn_dupedkey = t->rn_dupedkey; + t->rn_dupedkey = tt; + tt->rn_p = t; + if (tt->rn_dupedkey) + tt->rn_dupedkey->rn_p = tt; + } +#ifdef RN_DEBUG + t=tt+1; + tt->rn_info = rn_nodenum++; + t->rn_info = rn_nodenum++; + tt->rn_twin = t; + tt->rn_ybro = rn_clist; + rn_clist = tt; +#endif + tt->rn_key = (caddr_t) v; + tt->rn_b = -1; + tt->rn_flags = RNF_ACTIVE; + } + /* + * Put mask in tree. + */ + if (netmask) { + tt->rn_mask = netmask; + tt->rn_b = x->rn_b; + tt->rn_flags |= x->rn_flags & RNF_NORMAL; + } + t = saved_tt->rn_p; + if (keyduplicated) + goto on2; + b_leaf = -1 - t->rn_b; + if (t->rn_r == saved_tt) + x = t->rn_l; + else + x = t->rn_r; + /* Promote general routes from below */ + if (x->rn_b < 0) { + for (mp = &t->rn_mklist; x; x = x->rn_dupedkey) + if (x->rn_mask && (x->rn_b >= b_leaf) && x->rn_mklist == 0) { + *mp = m = rn_new_radix_mask(x, 0); + if (m) + mp = &m->rm_mklist; + } + } else if (x->rn_mklist) { + /* + * Skip over masks whose index is > that of new node + */ + for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) + if (m->rm_b >= b_leaf) + break; + t->rn_mklist = m; + *mp = 0; + } +on2: + /* Add new route to highest possible ancestor's list */ + if ((netmask == 0) || (b > t->rn_b )) + return tt; /* can't lift at all */ + b_leaf = tt->rn_b; + do { + x = t; + t = t->rn_p; + } while (b <= t->rn_b && x != top); + /* + * Search through routes associated with node to + * insert new route according to index. + * Need same criteria as when sorting dupedkeys to avoid + * double loop on deletion. + */ + for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) { + if (m->rm_b < b_leaf) + continue; + if (m->rm_b > b_leaf) + break; + if (m->rm_flags & RNF_NORMAL) { + mmask = m->rm_leaf->rn_mask; + if (tt->rn_flags & RNF_NORMAL) { +#if 0 + log(LOG_ERR, "Non-unique normal route," + " mask not entered\n"); +#endif + return tt; + } + } else + mmask = m->rm_mask; + if (mmask == netmask) { + m->rm_refs++; + tt->rn_mklist = m; + return tt; + } + if (rn_refines(netmask, mmask) + || rn_lexobetter(netmask, mmask)) + break; + } + *mp = rn_new_radix_mask(tt, *mp); + return tt; +} + +struct radix_node * +rn_delete(v_arg, netmask_arg, head) + void *v_arg, *netmask_arg; + struct radix_node_head *head; +{ + struct radix_node *t, *p, *x, *tt; + struct radix_mask *m, *saved_m, **mp; + struct radix_node *dupedkey, *saved_tt, *top; + caddr_t v, netmask; + int b, head_off, vlen; + + v = v_arg; + netmask = netmask_arg; + x = head->rnh_treetop; + tt = rn_search(v, x); + head_off = x->rn_off; + vlen = *(u_char *)v; + saved_tt = tt; + top = x; + if (tt == 0 || + Bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off)) + return (0); + /* + * Delete our route from mask lists. + */ + if (netmask) { + if ((x = rn_addmask(netmask, 1, head_off)) == 0) + return (0); + netmask = x->rn_key; + while (tt->rn_mask != netmask) + if ((tt = tt->rn_dupedkey) == 0) + return (0); + } + if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0) + goto on1; + if (tt->rn_flags & RNF_NORMAL) { + if (m->rm_leaf != tt || m->rm_refs > 0) { +#if 0 + log(LOG_ERR, "rn_delete: inconsistent annotation\n"); +#endif + return 0; /* dangling ref could cause disaster */ + } + } else { + if (m->rm_mask != tt->rn_mask) { +#if 0 + log(LOG_ERR, "rn_delete: inconsistent annotation\n"); +#endif + goto on1; + } + if (--m->rm_refs >= 0) + goto on1; + } + b = -1 - tt->rn_b; + t = saved_tt->rn_p; + if (b > t->rn_b) + goto on1; /* Wasn't lifted at all */ + do { + x = t; + t = t->rn_p; + } while (b <= t->rn_b && x != top); + for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) + if (m == saved_m) { + *mp = m->rm_mklist; + MKFree(m); + break; + } + if (m == 0) { +#if 0 + log(LOG_ERR, "rn_delete: couldn't find our annotation\n"); +#endif + if (tt->rn_flags & RNF_NORMAL) + return (0); /* Dangling ref to us */ + } +on1: + /* + * Eliminate us from tree + */ + if (tt->rn_flags & RNF_ROOT) + return (0); +#ifdef RN_DEBUG + /* Get us out of the creation list */ + for (t = rn_clist; t && t->rn_ybro != tt; t = t->rn_ybro) + ; + if (t) t->rn_ybro = tt->rn_ybro; +#endif + t = tt->rn_p; + dupedkey = saved_tt->rn_dupedkey; + if (dupedkey) { + /* + * Here, tt is the deletion target and + * saved_tt is the head of the dupedkey chain. + */ + if (tt == saved_tt) { + x = dupedkey; + x->rn_p = t; + if (t->rn_l == tt) + t->rn_l = x; + else + t->rn_r = x; + } else { + /* find node in front of tt on the chain */ + for (x = p = saved_tt; p && p->rn_dupedkey != tt;) + p = p->rn_dupedkey; + if (p) { + p->rn_dupedkey = tt->rn_dupedkey; + if (tt->rn_dupedkey) + tt->rn_dupedkey->rn_p = p; + } +#if 0 + else + log(LOG_ERR, "rn_delete: couldn't find us\n"); +#endif + } + t = tt + 1; + if (t->rn_flags & RNF_ACTIVE) { +#ifndef RN_DEBUG + *++x = *t; + p = t->rn_p; +#else + b = t->rn_info; + *++x = *t; + t->rn_info = b; + p = t->rn_p; +#endif + if (p->rn_l == t) + p->rn_l = x; + else + p->rn_r = x; + x->rn_l->rn_p = x; + x->rn_r->rn_p = x; + } + goto out; + } + if (t->rn_l == tt) + x = t->rn_r; + else + x = t->rn_l; + p = t->rn_p; + if (p->rn_r == t) + p->rn_r = x; + else + p->rn_l = x; + x->rn_p = p; + /* + * Demote routes attached to us. + */ + if (t->rn_mklist) { + if (x->rn_b >= 0) { + for (mp = &x->rn_mklist; (m = *mp) != NULL;) + mp = &m->rm_mklist; + *mp = t->rn_mklist; + } else { + /* If there are any key,mask pairs in a sibling + duped-key chain, some subset will appear sorted + in the same order attached to our mklist */ + for (m = t->rn_mklist; m && x; x = x->rn_dupedkey) + if (m == x->rn_mklist) { + struct radix_mask *mm = m->rm_mklist; + x->rn_mklist = 0; + if (--(m->rm_refs) < 0) + MKFree(m); + m = mm; + } +#if 0 + if (m) + log(LOG_ERR, "%s %p at %p\n", + "rn_delete: Orphaned Mask", m, x); +#endif + } + } + /* + * We may be holding an active internal node in the tree. + */ + x = tt + 1; + if (t != x) { +#ifndef RN_DEBUG + *t = *x; +#else + b = t->rn_info; + *t = *x; + t->rn_info = b; +#endif + t->rn_l->rn_p = t; + t->rn_r->rn_p = t; + p = x->rn_p; + if (p->rn_l == x) + p->rn_l = t; + else + p->rn_r = t; + } +out: + tt->rn_flags &= ~RNF_ACTIVE; + tt[1].rn_flags &= ~RNF_ACTIVE; + return (tt); +} + +int +rn_walktree(h, f, w) + struct radix_node_head *h; + int (*f) __P((struct radix_node *, void *)); + void *w; +{ + int error; + struct radix_node *base, *next; + struct radix_node *rn = h->rnh_treetop; + /* + * This gets complicated because we may delete the node + * while applying the function f to it, so we need to calculate + * the successor node in advance. + */ + /* First time through node, go left */ + while (rn->rn_b >= 0) + rn = rn->rn_l; + for (;;) { + base = rn; + /* If at right child go back up, otherwise, go right */ + while (rn->rn_p->rn_r == rn && (rn->rn_flags & RNF_ROOT) == 0) + rn = rn->rn_p; + /* Find the next *leaf* since next node might vanish, too */ + for (rn = rn->rn_p->rn_r; rn->rn_b >= 0;) + rn = rn->rn_l; + next = rn; + /* Process leaves */ + while ((rn = base) != NULL) { + base = rn->rn_dupedkey; + if (!(rn->rn_flags & RNF_ROOT) + && (error = (*f)(rn, w))) + return (error); + } + rn = next; + if (rn->rn_flags & RNF_ROOT) + return (0); + } + /* NOTREACHED */ +} + +int +rn_inithead(head, off) + void **head; + int off; +{ + struct radix_node_head *rnh; + + if (*head) + return (1); + R_Malloc(rnh, struct radix_node_head *, sizeof (*rnh)); + if (rnh == 0) + return (0); + *head = rnh; + return rn_inithead0(rnh, off); +} + +int +rn_inithead0(rnh, off) + struct radix_node_head *rnh; + int off; +{ + struct radix_node *t, *tt, *ttt; + + Bzero(rnh, sizeof (*rnh)); + t = rn_newpair(rn_zeros, off, rnh->rnh_nodes); + ttt = rnh->rnh_nodes + 2; + t->rn_r = ttt; + t->rn_p = t; + tt = t->rn_l; + tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE; + tt->rn_b = -1 - off; + *ttt = *tt; + ttt->rn_key = rn_ones; + rnh->rnh_addaddr = rn_addroute; + rnh->rnh_deladdr = rn_delete; + rnh->rnh_matchaddr = rn_match; + rnh->rnh_lookup = rn_lookup; + rnh->rnh_walktree = rn_walktree; + rnh->rnh_treetop = t; + return (1); +} + +void +rn_init() +{ + char *cp, *cplim; + + if (max_keylen == 0) { +#if 0 + log(LOG_ERR, + "rn_init: radix functions require max_keylen be set\n"); +#endif + return; + } + if (rn_zeros == NULL) { + R_Malloc(rn_zeros, char *, 3 * max_keylen); + } + if (rn_zeros == NULL) + panic("rn_init"); + Bzero(rn_zeros, 3 * max_keylen); + rn_ones = cp = rn_zeros + max_keylen; + addmask_key = cplim = rn_ones + max_keylen; + while (cp < cplim) + *cp++ = -1; + if (rn_inithead((void *)&mask_rnhead, 0) == 0) + panic("rn_init 2"); +} + + +static int +rn_freenode(struct radix_node *n, void *p) +{ + struct radix_node_head *rnh = p; + struct radix_node *d; + + d = rnh->rnh_deladdr(n->rn_key, NULL, rnh); + if (d != NULL) { + FreeS(d, max_keylen + 2 * sizeof (*d)); + } + return 0; +} + + +void +rn_freehead(rnh) + struct radix_node_head *rnh; +{ + + (void)rn_walktree(rnh, rn_freenode, rnh); + + rnh->rnh_addaddr = NULL; + rnh->rnh_deladdr = NULL; + rnh->rnh_matchaddr = NULL; + rnh->rnh_lookup = NULL; + rnh->rnh_walktree = NULL; + + Free(rnh); +} + + +void +rn_fini() +{ + struct radix_mask *m; + + if (rn_zeros != NULL) { + FreeS(rn_zeros, 3 * max_keylen); + rn_zeros = NULL; + } + + if (mask_rnhead != NULL) { + rn_freehead(mask_rnhead); + mask_rnhead = NULL; + } + + while ((m = rn_mkfreelist) != NULL) { + rn_mkfreelist = m->rm_mklist; + KFREE(m); + } +} + + +#ifdef USE_MAIN + +typedef struct myst { + addrfamily_t dst; + addrfamily_t mask; + struct radix_node nodes[2]; +} myst_t; + +int +main(int argc, char *argv[]) +{ + struct radix_node_head *rnh; + struct radix_node *rn; + addrfamily_t af, mf; + myst_t st1, st2, *stp; + + memset(&st1, 0, sizeof(st1)); + memset(&st2, 0, sizeof(st2)); + memset(&af, 0, sizeof(af)); + + rn_init(); + + rnh = NULL; + rn_inithead(&rnh, offsetof(addrfamily_t, adf_addr) << 3); + + st1.dst.adf_len = sizeof(st1); + st1.mask.adf_len = sizeof(st1); + st1.dst.adf_addr.in4.s_addr = inet_addr("127.0.0.0"); + st1.mask.adf_addr.in4.s_addr = inet_addr("255.0.0.0"); + rn = rnh->rnh_addaddr(&st1.dst, &st1.mask, rnh, st1.nodes); + printf("add.1 %p\n", rn); + + st2.dst.adf_len = sizeof(st2); + st2.mask.adf_len = sizeof(st2); + st2.dst.adf_addr.in4.s_addr = inet_addr("127.0.1.0"); + st2.mask.adf_addr.in4.s_addr = inet_addr("255.255.255.0"); + rn = rnh->rnh_addaddr(&st2.dst, &st2.mask, rnh, st2.nodes); + printf("add.2 %p\n", rn); + + af.adf_len = sizeof(af); + af.adf_addr.in4.s_addr = inet_addr("127.0.1.0"); + rn = rnh->rnh_matchaddr(&af, rnh); + if (rn != NULL) { + printf("1.lookup = %p key %p mask %p\n", rn, rn->rn_key, rn->rn_mask); + stp = rn->rn_key; + printf("%s/", inet_ntoa(stp->dst.adf_addr.in4)); + stp = rn->rn_mask; + printf("%s\n", inet_ntoa(stp->dst.adf_addr.in4)); + } + + mf.adf_len = sizeof(mf); + mf.adf_addr.in4.s_addr = inet_addr("255.255.255.0"); + rn = rnh->rnh_lookup(&af, &mf, rnh); + if (rn != NULL) { + printf("2.lookup = %p key %p mask %p\n", rn, rn->rn_key, rn->rn_mask); + stp = rn->rn_key; + printf("%s/", inet_ntoa(stp->dst.adf_addr.in4)); + stp = rn->rn_mask; + printf("%s\n", inet_ntoa(stp->dst.adf_addr.in4)); + } + + af.adf_len = sizeof(af); + af.adf_addr.in4.s_addr = inet_addr("126.0.0.1"); + rn = rnh->rnh_matchaddr(&af, rnh); + if (rn != NULL) { + printf("3.lookup = %p key %p mask %p\n", rn, rn->rn_key, rn->rn_mask); + stp = rn->rn_key; + printf("%s/", inet_ntoa(stp->dst.adf_addr.in4)); + stp = rn->rn_mask; + printf("%s\n", inet_ntoa(stp->dst.adf_addr.in4)); + } + + return 0; +} + + +void +log(int level, char *format, ...) +{ + va_list ap; + + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); +} +#endif + + +#ifndef _KERNEL +void +panic(char *str) +{ + fputs(str, stderr); + abort(); +} +#endif diff --git a/usr/src/uts/common/inet/ipf/radix.h b/usr/src/uts/common/inet/ipf/radix.h new file mode 100644 index 0000000000..0b55412605 --- /dev/null +++ b/usr/src/uts/common/inet/ipf/radix.h @@ -0,0 +1,166 @@ +/* + * Copyright (c) 1988, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)radix.h 8.2 (Berkeley) 10/31/94 + */ + +#ifndef _NET_RADIX_H_ +#define _NET_RADIX_H_ + +#ifndef __P +# ifdef __STDC__ +# define __P(x) x +# else +# define __P(x) () +# endif +#endif + +/* + * Radix search tree node layout. + */ + +struct radix_node { + struct radix_mask *rn_mklist; /* list of masks contained in subtree */ + struct radix_node *rn_p; /* parent */ + short rn_b; /* bit offset; -1-index(netmask) */ + char rn_bmask; /* node: mask for bit test*/ + u_char rn_flags; /* enumerated next */ +#define RNF_NORMAL 1 /* leaf contains normal route */ +#define RNF_ROOT 2 /* leaf is root leaf for tree */ +#define RNF_ACTIVE 4 /* This node is alive (for rtfree) */ + union { + struct { /* leaf only data: */ + caddr_t rn_Key; /* object of search */ + caddr_t rn_Mask; /* netmask, if present */ + struct radix_node *rn_Dupedkey; + } rn_leaf; + struct { /* node only data: */ + int rn_Off; /* where to start compare */ + struct radix_node *rn_L;/* progeny */ + struct radix_node *rn_R;/* progeny */ + } rn_node; + } rn_u; +#ifdef RN_DEBUG + int rn_info; + struct radix_node *rn_twin; + struct radix_node *rn_ybro; +#endif +}; + +#define rn_dupedkey rn_u.rn_leaf.rn_Dupedkey +#define rn_key rn_u.rn_leaf.rn_Key +#define rn_mask rn_u.rn_leaf.rn_Mask +#define rn_off rn_u.rn_node.rn_Off +#define rn_l rn_u.rn_node.rn_L +#define rn_r rn_u.rn_node.rn_R + +/* + * Annotations to tree concerning potential routes applying to subtrees. + */ + +extern struct radix_mask { + short rm_b; /* bit offset; -1-index(netmask) */ + char rm_unused; /* cf. rn_bmask */ + u_char rm_flags; /* cf. rn_flags */ + struct radix_mask *rm_mklist; /* more masks to try */ + union { + caddr_t rmu_mask; /* the mask */ + struct radix_node *rmu_leaf; /* for normal routes */ + } rm_rmu; + int rm_refs; /* # of references to this struct */ +} *rn_mkfreelist; + +#define rm_mask rm_rmu.rmu_mask +#define rm_leaf rm_rmu.rmu_leaf /* extra field would make 32 bytes */ + +#define MKGet(m) {\ + if (rn_mkfreelist) {\ + m = rn_mkfreelist; \ + rn_mkfreelist = (m)->rm_mklist; \ + } else \ + R_Malloc(m, struct radix_mask *, sizeof (*(m))); }\ + +#define MKFree(m) { (m)->rm_mklist = rn_mkfreelist; rn_mkfreelist = (m);} + +struct radix_node_head { + struct radix_node *rnh_treetop; + struct radix_node *rnh_leaflist; + u_long rnh_hits; + u_int rnh_number; + u_int rnh_ref; + int rnh_addrsize; /* permit, but not require fixed keys */ + int rnh_pktsize; /* permit, but not require fixed keys */ + struct radix_node *(*rnh_addaddr) /* add based on sockaddr */ + __P((void *v, void *mask, + struct radix_node_head *head, struct radix_node nodes[])); + struct radix_node *(*rnh_addpkt) /* add based on packet hdr */ + __P((void *v, void *mask, + struct radix_node_head *head, struct radix_node nodes[])); + struct radix_node *(*rnh_deladdr) /* remove based on sockaddr */ + __P((void *v, void *mask, struct radix_node_head *head)); + struct radix_node *(*rnh_delpkt) /* remove based on packet hdr */ + __P((void *v, void *mask, struct radix_node_head *head)); + struct radix_node *(*rnh_matchaddr) /* locate based on sockaddr */ + __P((void *v, struct radix_node_head *head)); + struct radix_node *(*rnh_lookup) /* locate based on sockaddr */ + __P((void *v, void *mask, struct radix_node_head *head)); + struct radix_node *(*rnh_matchpkt) /* locate based on packet hdr */ + __P((void *v, struct radix_node_head *head)); + int (*rnh_walktree) /* traverse tree */ + __P((struct radix_node_head *, + int (*)(struct radix_node *, void *), void *)); + struct radix_node rnh_nodes[3]; /* empty tree for common case */ +}; + + +#define Bcmp(a, b, n) bcmp(((caddr_t)(a)), ((caddr_t)(b)), (unsigned)(n)) +#define Bcopy(a, b, n) bcopy(((caddr_t)(a)), ((caddr_t)(b)), (unsigned)(n)) +#define Bzero(p, n) bzero((caddr_t)(p), (unsigned)(n)); +#define R_Malloc(p, t, n) KMALLOCS(p, t, n) +#define FreeS(p, z) KFREES(p, z) +#define Free(p) KFREE(p) + +void rn_init __P((void)); +void rn_fini __P((void)); +int rn_inithead __P((void **, int)); +void rn_freehead __P((struct radix_node_head *)); +int rn_inithead0 __P((struct radix_node_head *, int)); +int rn_refines __P((void *, void *)); +int rn_walktree __P((struct radix_node_head *, + int (*)(struct radix_node *, void *), void *)); +struct radix_node + *rn_addmask __P((void *, int, int)), + *rn_addroute __P((void *, void *, struct radix_node_head *, + struct radix_node [2])), + *rn_delete __P((void *, void *, struct radix_node_head *)), + *rn_insert __P((void *, struct radix_node_head *, int *, + struct radix_node [2])), + *rn_lookup __P((void *, void *, struct radix_node_head *)), + *rn_match __P((void *, struct radix_node_head *)), + *rn_newpair __P((void *, int, struct radix_node[2])), + *rn_search __P((void *, struct radix_node *)), + *rn_search_m __P((void *, struct radix_node *, void *)); + +#endif /* _NET_RADIX_H_ */ diff --git a/usr/src/uts/common/inet/ipf/radix_ipf.h b/usr/src/uts/common/inet/ipf/radix_ipf.h new file mode 100644 index 0000000000..357b9c40dc --- /dev/null +++ b/usr/src/uts/common/inet/ipf/radix_ipf.h @@ -0,0 +1,212 @@ +/* + * Copyright (c) 1988, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)radix.h 8.2 (Berkeley) 10/31/94 + */ + +#if !defined(_NET_RADIX_H_) && !defined(_RADIX_H_) +#define _NET_RADIX_H_ +#ifndef _RADIX_H_ +#define _RADIX_H_ +#endif /* _RADIX_H_ */ + +#ifndef __P +# ifdef __STDC__ +# define __P(x) x +# else +# define __P(x) () +# endif +#endif + +#if defined(__sgi) +# define radix_mask ipf_radix_mask +# define radix_node ipf_radix_node +# define radix_node_head ipf_radix_node_head +#endif + +/* + * Radix search tree node layout. + */ + +struct radix_node { + struct radix_mask *rn_mklist; /* list of masks contained in subtree */ + struct radix_node *rn_p; /* parent */ + short rn_b; /* bit offset; -1-index(netmask) */ + char rn_bmask; /* node: mask for bit test*/ + u_char rn_flags; /* enumerated next */ +#define RNF_NORMAL 1 /* leaf contains normal route */ +#define RNF_ROOT 2 /* leaf is root leaf for tree */ +#define RNF_ACTIVE 4 /* This node is alive (for rtfree) */ + union { + struct { /* leaf only data: */ + caddr_t rn_Key; /* object of search */ + caddr_t rn_Mask; /* netmask, if present */ + struct radix_node *rn_Dupedkey; + } rn_leaf; + struct { /* node only data: */ + int rn_Off; /* where to start compare */ + struct radix_node *rn_L;/* progeny */ + struct radix_node *rn_R;/* progeny */ + } rn_node; + } rn_u; +#ifdef RN_DEBUG + int rn_info; + struct radix_node *rn_twin; + struct radix_node *rn_ybro; +#endif +}; + +#define rn_dupedkey rn_u.rn_leaf.rn_Dupedkey +#define rn_key rn_u.rn_leaf.rn_Key +#define rn_mask rn_u.rn_leaf.rn_Mask +#define rn_off rn_u.rn_node.rn_Off +#define rn_l rn_u.rn_node.rn_L +#define rn_r rn_u.rn_node.rn_R + +/* + * Annotations to tree concerning potential routes applying to subtrees. + */ + +struct radix_mask { + short rm_b; /* bit offset; -1-index(netmask) */ + char rm_unused; /* cf. rn_bmask */ + u_char rm_flags; /* cf. rn_flags */ + struct radix_mask *rm_mklist; /* more masks to try */ + union { + caddr_t rmu_mask; /* the mask */ + struct radix_node *rmu_leaf; /* for normal routes */ + } rm_rmu; + int rm_refs; /* # of references to this struct */ +}; + +#define rm_mask rm_rmu.rmu_mask +#define rm_leaf rm_rmu.rmu_leaf /* extra field would make 32 bytes */ + +#define MKGet(m) {\ + if (rn_mkfreelist) {\ + m = rn_mkfreelist; \ + rn_mkfreelist = (m)->rm_mklist; \ + } else \ + R_Malloc(m, struct radix_mask *, sizeof (*(m))); }\ + +#define MKFree(m) { (m)->rm_mklist = rn_mkfreelist; rn_mkfreelist = (m);} + +struct radix_node_head { + struct radix_node *rnh_treetop; + struct radix_node *rnh_leaflist; + u_long rnh_hits; + u_int rnh_number; + u_int rnh_ref; + int rnh_addrsize; /* permit, but not require fixed keys */ + int rnh_pktsize; /* permit, but not require fixed keys */ + struct radix_node *(*rnh_addaddr) /* add based on sockaddr */ + __P((void *v, void *mask, + struct radix_node_head *head, struct radix_node nodes[])); + struct radix_node *(*rnh_addpkt) /* add based on packet hdr */ + __P((void *v, void *mask, + struct radix_node_head *head, struct radix_node nodes[])); + struct radix_node *(*rnh_deladdr) /* remove based on sockaddr */ + __P((void *v, void *mask, struct radix_node_head *head)); + struct radix_node *(*rnh_delpkt) /* remove based on packet hdr */ + __P((void *v, void *mask, struct radix_node_head *head)); + struct radix_node *(*rnh_matchaddr) /* locate based on sockaddr */ + __P((void *v, struct radix_node_head *head)); + struct radix_node *(*rnh_lookup) /* locate based on sockaddr */ + __P((void *v, void *mask, struct radix_node_head *head)); + struct radix_node *(*rnh_matchpkt) /* locate based on packet hdr */ + __P((void *v, struct radix_node_head *head)); + int (*rnh_walktree) /* traverse tree */ + __P((struct radix_node_head *, + int (*)(struct radix_node *, void *), void *)); + struct radix_node rnh_nodes[3]; /* empty tree for common case */ +}; + + +#if defined(AIX) +# undef Bcmp +# undef Bzero +# undef R_Malloc +# undef Free +#endif +#define Bcmp(a, b, n) bcmp(((caddr_t)(a)), ((caddr_t)(b)), (unsigned)(n)) +#if defined(linux) && defined(_KERNEL) +# define Bcopy(a, b, n) memmove(((caddr_t)(b)), ((caddr_t)(a)), (unsigned)(n)) +#else +# define Bcopy(a, b, n) bcopy(((caddr_t)(a)), ((caddr_t)(b)), (unsigned)(n)) +#endif +#define Bzero(p, n) bzero((caddr_t)(p), (unsigned)(n)); +#define R_Malloc(p, t, n) KMALLOCS(p, t, n) +#define FreeS(p, z) KFREES(p, z) +#define Free(p) KFREE(p) + +#if (defined(__osf__) || defined(AIX) || (IRIX >= 60516)) && defined(_KERNEL) +# define rn_init ipf_rn_init +# define rn_fini ipf_rn_fini +# define rn_inithead ipf_rn_inithead +# define rn_freehead ipf_rn_freehead +# define rn_inithead0 ipf_rn_inithead0 +# define rn_refines ipf_rn_refines +# define rn_walktree ipf_rn_walktree +# define rn_addmask ipf_rn_addmask +# define rn_addroute ipf_rn_addroute +# define rn_delete ipf_rn_delete +# define rn_insert ipf_rn_insert +# define rn_lookup ipf_rn_lookup +# define rn_match ipf_rn_match +# define rn_newpair ipf_rn_newpair +# define rn_search ipf_rn_search +# define rn_search_m ipf_rn_search_m +# define max_keylen ipf_maxkeylen +# define rn_mkfreelist ipf_rn_mkfreelist +# define rn_zeros ipf_rn_zeros +# define rn_ones ipf_rn_ones +# define rn_satisfies_leaf ipf_rn_satisfies_leaf +# define rn_lexobetter ipf_rn_lexobetter +# define rn_new_radix_mask ipf_rn_new_radix_mask +# define rn_freenode ipf_rn_freenode +#endif + +void rn_init __P((void)); +void rn_fini __P((void)); +int rn_inithead __P((void **, int)); +void rn_freehead __P((struct radix_node_head *)); +int rn_inithead0 __P((struct radix_node_head *, int)); +int rn_refines __P((void *, void *)); +int rn_walktree __P((struct radix_node_head *, + int (*)(struct radix_node *, void *), void *)); +struct radix_node + *rn_addmask __P((void *, int, int)), + *rn_addroute __P((void *, void *, struct radix_node_head *, + struct radix_node [2])), + *rn_delete __P((void *, void *, struct radix_node_head *)), + *rn_insert __P((void *, struct radix_node_head *, int *, + struct radix_node [2])), + *rn_lookup __P((void *, void *, struct radix_node_head *)), + *rn_match __P((void *, struct radix_node_head *)), + *rn_newpair __P((void *, int, struct radix_node[2])), + *rn_search __P((void *, struct radix_node *)), + *rn_search_m __P((void *, struct radix_node *, void *)); + +#endif /* _NET_RADIX_H_ */ diff --git a/usr/src/uts/common/inet/ipf/solaris.c b/usr/src/uts/common/inet/ipf/solaris.c new file mode 100644 index 0000000000..0c21d1f15f --- /dev/null +++ b/usr/src/uts/common/inet/ipf/solaris.c @@ -0,0 +1,696 @@ +/* + * Copyright (C) 1993-2001, 2003 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* #pragma ident "@(#)solaris.c 1.12 6/5/96 (C) 1995 Darren Reed"*/ +#pragma ident "@(#)$Id: solaris.c,v 2.73.2.6 2005/07/13 21:40:47 darrenr Exp $" + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/systm.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/errno.h> +#include <sys/uio.h> +#include <sys/buf.h> +#include <sys/modctl.h> +#include <sys/open.h> +#include <sys/kmem.h> +#include <sys/conf.h> +#include <sys/cmn_err.h> +#include <sys/stat.h> +#include <sys/cred.h> +#include <sys/dditypes.h> +#include <sys/stream.h> +#include <sys/poll.h> +#include <sys/autoconf.h> +#include <sys/byteorder.h> +#include <sys/socket.h> +#include <sys/dlpi.h> +#include <sys/stropts.h> +#include <sys/kstat.h> +#include <sys/sockio.h> +#include <net/if.h> +#if SOLARIS2 >= 6 +# include <net/if_types.h> +#endif +#include <net/af.h> +#include <net/route.h> +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/if_ether.h> +#include <netinet/ip.h> +#include <netinet/ip_var.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <netinet/tcpip.h> +#include <netinet/ip_icmp.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include "netinet/ip_compat.h" +#include "netinet/ipl.h" +#include "netinet/ip_fil.h" +#include "netinet/ip_nat.h" +#include "netinet/ip_frag.h" +#include "netinet/ip_auth.h" +#include "netinet/ip_state.h" + + +extern struct filterstats frstats[]; +extern int fr_running; +extern int fr_flags; +extern int iplwrite __P((dev_t, struct uio *, cred_t *)); + +extern ipnat_t *nat_list; + +static int ipf_getinfo __P((dev_info_t *, ddi_info_cmd_t, + void *, void **)); +#if SOLARIS2 < 10 +static int ipf_identify __P((dev_info_t *)); +#endif +static int ipf_attach __P((dev_info_t *, ddi_attach_cmd_t)); +static int ipf_detach __P((dev_info_t *, ddi_detach_cmd_t)); +static int fr_qifsync __P((ip_t *, int, void *, int, void *, mblk_t **)); +static int ipf_property_update __P((dev_info_t *)); +static char *ipf_devfiles[] = { IPL_NAME, IPNAT_NAME, IPSTATE_NAME, + IPAUTH_NAME, IPSYNC_NAME, IPSCAN_NAME, + IPLOOKUP_NAME, NULL }; + + +#if SOLARIS2 >= 7 +extern timeout_id_t fr_timer_id; +#else +extern int fr_timer_id; +#endif + +static struct cb_ops ipf_cb_ops = { + iplopen, + iplclose, + nodev, /* strategy */ + nodev, /* print */ + nodev, /* dump */ + iplread, + iplwrite, /* write */ + iplioctl, /* ioctl */ + nodev, /* devmap */ + nodev, /* mmap */ + nodev, /* segmap */ + nochpoll, /* poll */ + ddi_prop_op, + NULL, + D_MTSAFE, +#if SOLARIS2 > 4 + CB_REV, + nodev, /* aread */ + nodev, /* awrite */ +#endif +}; + +static struct dev_ops ipf_ops = { + DEVO_REV, + 0, + ipf_getinfo, +#if SOLARIS2 >= 10 + nulldev, +#else + ipf_identify, +#endif + nulldev, + ipf_attach, + ipf_detach, + nodev, /* reset */ + &ipf_cb_ops, + (struct bus_ops *)0 +}; + +extern struct mod_ops mod_driverops; +static struct modldrv iplmod = { + &mod_driverops, IPL_VERSION, &ipf_ops }; +static struct modlinkage modlink1 = { MODREV_1, &iplmod, NULL }; + +#if SOLARIS2 >= 6 +static size_t hdrsizes[57][2] = { + { 0, 0 }, + { IFT_OTHER, 0 }, + { IFT_1822, 0 }, + { IFT_HDH1822, 0 }, + { IFT_X25DDN, 0 }, + { IFT_X25, 0 }, + { IFT_ETHER, 14 }, + { IFT_ISO88023, 0 }, + { IFT_ISO88024, 0 }, + { IFT_ISO88025, 0 }, + { IFT_ISO88026, 0 }, + { IFT_STARLAN, 0 }, + { IFT_P10, 0 }, + { IFT_P80, 0 }, + { IFT_HY, 0 }, + { IFT_FDDI, 24 }, + { IFT_LAPB, 0 }, + { IFT_SDLC, 0 }, + { IFT_T1, 0 }, + { IFT_CEPT, 0 }, + { IFT_ISDNBASIC, 0 }, + { IFT_ISDNPRIMARY, 0 }, + { IFT_PTPSERIAL, 0 }, + { IFT_PPP, 0 }, + { IFT_LOOP, 0 }, + { IFT_EON, 0 }, + { IFT_XETHER, 0 }, + { IFT_NSIP, 0 }, + { IFT_SLIP, 0 }, + { IFT_ULTRA, 0 }, + { IFT_DS3, 0 }, + { IFT_SIP, 0 }, + { IFT_FRELAY, 0 }, + { IFT_RS232, 0 }, + { IFT_PARA, 0 }, + { IFT_ARCNET, 0 }, + { IFT_ARCNETPLUS, 0 }, + { IFT_ATM, 0 }, + { IFT_MIOX25, 0 }, + { IFT_SONET, 0 }, + { IFT_X25PLE, 0 }, + { IFT_ISO88022LLC, 0 }, + { IFT_LOCALTALK, 0 }, + { IFT_SMDSDXI, 0 }, + { IFT_FRELAYDCE, 0 }, + { IFT_V35, 0 }, + { IFT_HSSI, 0 }, + { IFT_HIPPI, 0 }, + { IFT_MODEM, 0 }, + { IFT_AAL5, 0 }, + { IFT_SONETPATH, 0 }, + { IFT_SONETVT, 0 }, + { IFT_SMDSICIP, 0 }, + { IFT_PROPVIRTUAL, 0 }, + { IFT_PROPMUX, 0 }, +}; +#endif /* SOLARIS2 >= 6 */ + +static dev_info_t *ipf_dev_info = NULL; + +static const filter_kstats_t ipf_kstat_tmp = { + { "pass", KSTAT_DATA_ULONG }, + { "block", KSTAT_DATA_ULONG }, + { "nomatch", KSTAT_DATA_ULONG }, + { "short", KSTAT_DATA_ULONG }, + { "pass, logged", KSTAT_DATA_ULONG }, + { "block, logged", KSTAT_DATA_ULONG }, + { "nomatch, logged", KSTAT_DATA_ULONG }, + { "logged", KSTAT_DATA_ULONG }, + { "skip", KSTAT_DATA_ULONG }, + { "return sent", KSTAT_DATA_ULONG }, + { "acct", KSTAT_DATA_ULONG }, + { "bad frag state alloc", KSTAT_DATA_ULONG }, + { "new frag state kept", KSTAT_DATA_ULONG }, + { "new frag state compl. pkt", KSTAT_DATA_ULONG }, + { "bad pkt state alloc", KSTAT_DATA_ULONG }, + { "new pkt kept state", KSTAT_DATA_ULONG }, + { "cachehit", KSTAT_DATA_ULONG }, + { "tcp cksum bad", KSTAT_DATA_ULONG }, + {{ "pullup ok", KSTAT_DATA_ULONG }, + { "pullup nok", KSTAT_DATA_ULONG }}, + { "src != route", KSTAT_DATA_ULONG }, + { "ttl invalid", KSTAT_DATA_ULONG }, + { "bad ip pkt", KSTAT_DATA_ULONG }, + { "ipv6 pkt", KSTAT_DATA_ULONG }, + { "dropped:pps ceiling", KSTAT_DATA_ULONG }, + { "ip upd. fail", KSTAT_DATA_ULONG } +}; + +kstat_t *ipf_kstatp[2] = {NULL, NULL}; +static int ipf_kstat_update(kstat_t *ksp, int rwflag); + +static void +ipf_kstat_init(void) +{ + int i; + + for (i = 0; i < 2; i++) { + ipf_kstatp[i] = kstat_create("ipf", 0, + (i==0)?"inbound":"outbound", + "net", + KSTAT_TYPE_NAMED, + sizeof (filter_kstats_t) / sizeof (kstat_named_t), + 0); + if (ipf_kstatp[i] != NULL) { + bcopy(&ipf_kstat_tmp, ipf_kstatp[i]->ks_data, + sizeof (filter_kstats_t)); + ipf_kstatp[i]->ks_update = ipf_kstat_update; + ipf_kstatp[i]->ks_private = &frstats[i]; + kstat_install(ipf_kstatp[i]); + } + } + +#ifdef IPFDEBUG + cmn_err(CE_NOTE, "IP Filter: ipf_kstat_init() installed 0x%x, 0x%x", + ipf_kstatp[0], ipf_kstatp[1]); +#endif +} + +static void +ipf_kstat_fini(void) +{ + int i; + for (i = 0; i < 2; i++) { + if (ipf_kstatp[i] != NULL) { + kstat_delete(ipf_kstatp[i]); + ipf_kstatp[i] = NULL; + } + } +} + +static int +ipf_kstat_update(kstat_t *ksp, int rwflag) +{ + filter_kstats_t *fkp; + filterstats_t *fsp; + + if (rwflag == KSTAT_WRITE) + return (EACCES); + + fkp = ksp->ks_data; + fsp = ksp->ks_private; + + fkp->fks_pass.value.ul = fsp->fr_pass; + fkp->fks_block.value.ul = fsp->fr_block; + fkp->fks_nom.value.ul = fsp->fr_nom; + fkp->fks_short.value.ul = fsp->fr_short; + fkp->fks_ppkl.value.ul = fsp->fr_ppkl; + fkp->fks_bpkl.value.ul = fsp->fr_bpkl; + fkp->fks_npkl.value.ul = fsp->fr_npkl; + fkp->fks_pkl.value.ul = fsp->fr_pkl; + fkp->fks_skip.value.ul = fsp->fr_skip; + fkp->fks_ret.value.ul = fsp->fr_ret; + fkp->fks_acct.value.ul = fsp->fr_acct; + fkp->fks_bnfr.value.ul = fsp->fr_bnfr; + fkp->fks_nfr.value.ul = fsp->fr_nfr; + fkp->fks_cfr.value.ul = fsp->fr_cfr; + fkp->fks_bads.value.ul = fsp->fr_bads; + fkp->fks_ads.value.ul = fsp->fr_ads; + fkp->fks_chit.value.ul = fsp->fr_chit; + fkp->fks_tcpbad.value.ul = fsp->fr_tcpbad; + fkp->fks_pull[0].value.ul = fsp->fr_pull[0]; + fkp->fks_pull[1].value.ul = fsp->fr_pull[1]; + fkp->fks_badsrc.value.ul = fsp->fr_badsrc; + fkp->fks_badttl.value.ul = fsp->fr_badttl; + fkp->fks_bad.value.ul = fsp->fr_bad; + fkp->fks_ipv6.value.ul = fsp->fr_ipv6; + fkp->fks_ppshit.value.ul = fsp->fr_ppshit; + fkp->fks_ipud.value.ul = fsp->fr_ipud; + + return (0); +} + +int _init() +{ + int ipfinst; + + ipf_kstat_init(); + ipfinst = mod_install(&modlink1); + if (ipfinst != 0) + ipf_kstat_fini(); +#ifdef IPFDEBUG + cmn_err(CE_NOTE, "IP Filter: _init() = %d", ipfinst); +#endif + return ipfinst; +} + + +int _fini(void) +{ + int ipfinst; + + ipfinst = mod_remove(&modlink1); +#ifdef IPFDEBUG + cmn_err(CE_NOTE, "IP Filter: _fini() = %d", ipfinst); +#endif + if (ipfinst == 0) + ipf_kstat_fini(); + return ipfinst; +} + + +int _info(modinfop) +struct modinfo *modinfop; +{ + int ipfinst; + + ipfinst = mod_info(&modlink1, modinfop); +#ifdef IPFDEBUG + cmn_err(CE_NOTE, "IP Filter: _info(%x) = %x", modinfop, ipfinst); +#endif + return ipfinst; +} + + +#if SOLARIS2 < 10 +static int ipf_identify(dip) +dev_info_t *dip; +{ +# ifdef IPFDEBUG + cmn_err(CE_NOTE, "IP Filter: ipf_identify(%x)", dip); +# endif + if (strcmp(ddi_get_name(dip), "ipf") == 0) + return (DDI_IDENTIFIED); + return (DDI_NOT_IDENTIFIED); +} +#endif + + +static int ipf_attach(dip, cmd) +dev_info_t *dip; +ddi_attach_cmd_t cmd; +{ + char *s; + int i; + int instance; + +#ifdef IPFDEBUG + cmn_err(CE_NOTE, "IP Filter: ipf_attach(%x,%x)", dip, cmd); +#endif + + if ((pfilinterface != PFIL_INTERFACE) || (PFIL_INTERFACE < 2000000)) { + cmn_err(CE_NOTE, "pfilinterface(%d) != %d\n", pfilinterface, + PFIL_INTERFACE); + return EINVAL; + } + + switch (cmd) + { + case DDI_ATTACH: + instance = ddi_get_instance(dip); + /* Only one instance of ipf (instance 0) can be attached. */ + if (instance > 0) + return DDI_FAILURE; + if (fr_running != 0) + return DDI_FAILURE; + +#ifdef IPFDEBUG + cmn_err(CE_NOTE, "IP Filter: attach ipf instance %d", instance); +#endif + + (void) ipf_property_update(dip); + + for (i = 0; ((s = ipf_devfiles[i]) != NULL); i++) { + s = strrchr(s, '/'); + if (s == NULL) + continue; + s++; + if (ddi_create_minor_node(dip, s, S_IFCHR, i, + DDI_PSEUDO, 0) == + DDI_FAILURE) { + ddi_remove_minor_node(dip, NULL); + goto attach_failed; + } + } + + ipf_dev_info = dip; + /* + * Initialize mutex's + */ + RWLOCK_INIT(&ipf_global, "ipf filter load/unload mutex"); + RWLOCK_INIT(&ipf_mutex, "ipf filter rwlock"); + RWLOCK_INIT(&ipf_frcache, "ipf cache rwlock"); + + /* + * Lock people out while we set things up. + */ + WRITE_ENTER(&ipf_global); + if ((fr_running != 0) || (iplattach() == -1)) { + RWLOCK_EXIT(&ipf_global); + goto attach_failed; + } + + if (pfil_add_hook(fr_check, PFIL_IN|PFIL_OUT, &pfh_inet4)) + cmn_err(CE_WARN, "IP Filter: %s(pfh_inet4) failed", + "pfil_add_hook"); +#ifdef USE_INET6 + if (pfil_add_hook(fr_check, PFIL_IN|PFIL_OUT, &pfh_inet6)) + cmn_err(CE_WARN, "IP Filter: %s(pfh_inet6) failed", + "pfil_add_hook"); +#endif + if (pfil_add_hook(fr_qifsync, PFIL_IN|PFIL_OUT, &pfh_sync)) + cmn_err(CE_WARN, "IP Filter: %s(pfh_sync) failed", + "pfil_add_hook"); + + fr_timer_id = timeout(fr_slowtimer, NULL, + drv_usectohz(500000)); + + fr_running = 1; + + RWLOCK_EXIT(&ipf_global); + + cmn_err(CE_CONT, "!%s, running.\n", ipfilter_version); + + return DDI_SUCCESS; + /* NOTREACHED */ + default: + break; + } + +attach_failed: +#ifdef IPFDEBUG + cmn_err(CE_NOTE, "IP Filter: failed to attach\n"); +#endif + /* + * Use our own detach routine to toss + * away any stuff we allocated above. + */ + (void) ipf_detach(dip, DDI_DETACH); + return DDI_FAILURE; +} + + +static int ipf_detach(dip, cmd) +dev_info_t *dip; +ddi_detach_cmd_t cmd; +{ + int i; + +#ifdef IPFDEBUG + cmn_err(CE_NOTE, "IP Filter: ipf_detach(%x,%x)", dip, cmd); +#endif + switch (cmd) { + case DDI_DETACH: + if (fr_refcnt != 0) + return DDI_FAILURE; + + if (fr_running == -2 || fr_running == 0) + break; + /* + * Make sure we're the only one's modifying things. With + * this lock others should just fall out of the loop. + */ + WRITE_ENTER(&ipf_global); + if (fr_running <= 0) { + RWLOCK_EXIT(&ipf_global); + return DDI_FAILURE; + } + fr_running = -2; + + if (pfil_remove_hook(fr_check, PFIL_IN|PFIL_OUT, &pfh_inet4)) + cmn_err(CE_WARN, "IP Filter: %s(pfh_inet4) failed", + "pfil_remove_hook"); +#ifdef USE_INET6 + if (pfil_remove_hook(fr_check, PFIL_IN|PFIL_OUT, &pfh_inet6)) + cmn_err(CE_WARN, "IP Filter: %s(pfh_inet6) failed", + "pfil_add_hook"); +#endif + if (pfil_remove_hook(fr_qifsync, PFIL_IN|PFIL_OUT, &pfh_sync)) + cmn_err(CE_WARN, "IP Filter: %s(pfh_sync) failed", + "pfil_remove_hook"); + + RWLOCK_EXIT(&ipf_global); + + if (fr_timer_id != 0) { + (void) untimeout(fr_timer_id); + fr_timer_id = 0; + } + + /* + * Undo what we did in ipf_attach, freeing resources + * and removing things we installed. The system + * framework guarantees we are not active with this devinfo + * node in any other entry points at this time. + */ + ddi_prop_remove_all(dip); + i = ddi_get_instance(dip); + ddi_remove_minor_node(dip, NULL); + if (i > 0) { + cmn_err(CE_CONT, "IP Filter: still attached (%d)\n", i); + return DDI_FAILURE; + } + + WRITE_ENTER(&ipf_global); + if (!ipldetach()) { + RWLOCK_EXIT(&ipf_global); + RW_DESTROY(&ipf_mutex); + RW_DESTROY(&ipf_frcache); + RW_DESTROY(&ipf_global); + cmn_err(CE_CONT, "!%s detached.\n", ipfilter_version); + return (DDI_SUCCESS); + } + RWLOCK_EXIT(&ipf_global); + break; + default: + break; + } + cmn_err(CE_NOTE, "IP Filter: failed to detach\n"); + return DDI_FAILURE; +} + + +/*ARGSUSED*/ +static int ipf_getinfo(dip, infocmd, arg, result) +dev_info_t *dip; +ddi_info_cmd_t infocmd; +void *arg, **result; +{ + int error; + + if (fr_running <= 0) + return DDI_FAILURE; + error = DDI_FAILURE; +#ifdef IPFDEBUG + cmn_err(CE_NOTE, "IP Filter: ipf_getinfo(%x,%x,%x)", dip, infocmd, arg); +#endif + switch (infocmd) { + case DDI_INFO_DEVT2DEVINFO: + *result = ipf_dev_info; + error = DDI_SUCCESS; + break; + case DDI_INFO_DEVT2INSTANCE: + *result = (void *)0; + error = DDI_SUCCESS; + break; + default: + break; + } + return (error); +} + + +/* + * look for bad consistancies between the list of interfaces the filter knows + * about and those which are currently configured. + */ +/*ARGSUSED*/ +static int fr_qifsync(ip, hlen, il, out, qif, mp) +ip_t *ip; +int hlen; +void *il; +int out; +void *qif; +mblk_t **mp; +{ + + frsync(qif); + /* + * Resync. any NAT `connections' using this interface and its IP #. + */ + fr_natsync(qif); + fr_statesync(qif); + return 0; +} + + +/* + * look for bad consistancies between the list of interfaces the filter knows + * about and those which are currently configured. + */ +int ipfsync() +{ + frsync(NULL); + return 0; +} + + +/* + * Fetch configuration file values that have been entered into the ipf.conf + * driver file. + */ +static int ipf_property_update(dip) +dev_info_t *dip; +{ + ipftuneable_t *ipft; + int64_t *i64p; + char *name; + u_int one; + int *i32p; + int err; + +#ifdef DDI_NO_AUTODETACH + if (ddi_prop_update_int(DDI_DEV_T_NONE, dip, + DDI_NO_AUTODETACH, 1) != DDI_PROP_SUCCESS) { + cmn_err(CE_WARN, "!updating DDI_NO_AUTODETACH failed"); + return DDI_FAILURE; + } +#else + if (ddi_prop_update_int(DDI_DEV_T_NONE, dip, + "ddi-no-autodetach", 1) != DDI_PROP_SUCCESS) { + cmn_err(CE_WARN, "!updating ddi-no-autodetach failed"); + return DDI_FAILURE; + } +#endif + + err = DDI_SUCCESS; + ipft = ipf_tuneables; + for (ipft = ipf_tuneables; (name = ipft->ipft_name) != NULL; ipft++) { + one = 1; + switch (ipft->ipft_sz) + { + case 4 : + i32p = NULL; + err = ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, + 0, name, &i32p, &one); + if (err == DDI_PROP_NOT_FOUND) + continue; +#ifdef IPFDEBUG + cmn_err(CE_CONT, "IP Filter: lookup_int(%s) = %d\n", + name, err); +#endif + if (err != DDI_PROP_SUCCESS) + return err; + if (*i32p >= ipft->ipft_min && *i32p <= ipft->ipft_max) + *ipft->ipft_pint = *i32p; + else + err = DDI_PROP_CANNOT_DECODE; + ddi_prop_free(i32p); + break; + +#if SOLARIS2 > 8 + case 8 : + i64p = NULL; + err = ddi_prop_lookup_int64_array(DDI_DEV_T_ANY, dip, + 0, name, &i64p, &one); + if (err == DDI_PROP_NOT_FOUND) + continue; +# ifdef IPFDEBUG + cmn_err(CE_CONT, "IP Filter: lookup_int64(%s) = %d\n", + name, err); +# endif + if (err != DDI_PROP_SUCCESS) + return err; + if (*i64p >= ipft->ipft_min && *i64p <= ipft->ipft_max) + *ipft->ipft_pint = *i64p; + else + err = DDI_PROP_CANNOT_DECODE; + ddi_prop_free(i64p); + break; +#endif + + default : + break; + } + if (err != DDI_SUCCESS) + break; + } + + return err; +} diff --git a/usr/src/uts/common/inet/ipf/compat.h b/usr/src/uts/common/inet/pfil/compat.h index 776dd8d0c6..ddaa3354e2 100644 --- a/usr/src/uts/common/inet/ipf/compat.h +++ b/usr/src/uts/common/inet/pfil/compat.h @@ -3,7 +3,6 @@ * * See the IPFILTER.LICENCE file for details on licencing. */ - #ifdef DEBUG # define PFILDEBUG #endif @@ -42,3 +41,38 @@ # define ASSERT(x) #endif +/* + * The list of SAPs below all come from Sun's <atm/iftypes.h> file. It's not + * yet clear whether pfil should deal with any of these or not. + */ +#ifndef IFMP_SAP +# define IFMP_SAP 0x0065 +#endif + +#ifndef LANER_SAP +# define LANER_SAP 0x9999 +#endif + +#ifndef SNMP_SAP +# define SNMP_SAP 0x999a +#endif + +#ifndef ILMI_SAP +# define ILMI_SAP 0x999b +#endif + +#ifndef SIG_SAP +# define SIG_SAP 0x999c +#endif + +#ifndef Q93B_MGMT_SAP +# define Q93B_MGMT_SAP 0x999d +#endif + +#ifndef UTIL_SAP +# define UTIL_SAP 0x999e +#endif + +#ifndef ERROR_SAP +# define ERROR_SAP 0x999f +#endif diff --git a/usr/src/uts/common/inet/ipf/misc.c b/usr/src/uts/common/inet/pfil/misc.c index b65ca63837..2a8c84dd4c 100644 --- a/usr/src/uts/common/inet/ipf/misc.c +++ b/usr/src/uts/common/inet/pfil/misc.c @@ -1,8 +1,10 @@ /* - * Copyright (C) 2000 by Darren Reed. + * Copyright (C) 2003 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. */ #ifndef __hpux -#pragma ident "@(#)$Id: misc.c,v 1.9 2003/07/20 15:36:27 darrenr Exp $" +#pragma ident "@(#)$Id: misc.c,v 1.12 2003/11/29 07:11:03 darrenr Exp $" #else struct uio; #endif diff --git a/usr/src/uts/common/inet/ipf/ndd.c b/usr/src/uts/common/inet/pfil/ndd.c index c733a1d162..ce85cc0091 100644 --- a/usr/src/uts/common/inet/ipf/ndd.c +++ b/usr/src/uts/common/inet/pfil/ndd.c @@ -3,7 +3,7 @@ * * See the IPFILTER.LICENCE file for details on licencing. * - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -30,13 +30,15 @@ static int qif_report(queue_t *, mblk_t *, caddr_t); static int sill_report(queue_t *, mblk_t *, caddr_t); static int qif_ipmp_report(queue_t *, mblk_t *, caddr_t); static int qif_ipmp_set(queue_t *, mblk_t *, char *, caddr_t); +static int pfil_hl_set(queue_t *, mblk_t *, char *, caddr_t); extern int pfil_report(queue_t *, mblk_t *, caddr_t); #else static int qif_report(queue_t *, mblk_t *, caddr_t, cred_t *); static int sill_report(queue_t *, mblk_t *, caddr_t, cred_t *); static int qif_ipmp_report(queue_t *, mblk_t *, caddr_t, cred_t *); -static int qif_ipmp_set(queue_t *, mblk_t *, char *, caddr_t, cred_t *); +static int qif_ipmp_set(queue_t *, mblk_t *, char *, caddr_t , cred_t *); +static int pfil_hl_set(queue_t *, mblk_t *, char *, caddr_t , cred_t *); extern int pfil_report(queue_t *, mblk_t *, caddr_t, cred_t *); #endif @@ -101,16 +103,24 @@ int pfil_nd_set(queue_t *q, mblk_t *mp, char *str, caddr_t ptr, cred_t *cred) (void *)q, (void *)mp, str, (void *)str, (void *)ptr)); +#if (SOLARIS2 >= 10) if (ddi_strtol(str, &end, 10, &i) != 0) return (EINVAL); - +#else + i = mi_strtol(str, &end, 10); +#endif + if (ptr == (caddr_t)&pfildebug) { #ifdef PFILDEBUG +#if (SOLARIS2 >= 10) if ((end == str) || (i < 0) || (i > 100)) +#else + if (i < 0 || i > 1) +#endif #endif return EINVAL; } else if (ptr == (caddr_t)&qif_verbose) { - if (i < 0 || i > 1) + if ((end == str) || (i < 0) || (i > 1)) return EINVAL; } *((int *)ptr) = i; @@ -122,7 +132,7 @@ int pfil_nd_set(queue_t *q, mblk_t *mp, char *str, caddr_t ptr, cred_t *cred) /* ------------------------------------------------------------------------ */ /* Function: pfil_ioctl_nd */ -/* Returns: int - B_TRUE == success, B_FALSE == getset error */ +/* Returns: int - B_TRUE == success, B_FALSE == getset error */ /* Parameters: q(I) - pointer to queue */ /* mp(I) - pointer to mblk */ /* */ @@ -146,11 +156,13 @@ int pfil_ioctl_nd(queue_t *q, mblk_t *mp) int pfil_nd_init() { +#ifdef PFILDEBUG if (!nd_load(&pfil_nd, "pfildebug", pfil_nd_get, pfil_nd_set, (caddr_t)&pfildebug)) { nd_free(&pfil_nd); return -1; } +#endif if (!nd_load(&pfil_nd, "pfil_delayed_copy", pfil_nd_get, pfil_nd_set, (caddr_t)&pfil_delayed_copy)) { @@ -209,6 +221,11 @@ int pfil_nd_init() return -1; } + if (!nd_load(&pfil_nd, "pfil_hl", NULL, pfil_hl_set, NULL)) { + nd_free(&pfil_nd); + return -1; + } + return 0; } @@ -267,6 +284,7 @@ static int qif_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *cred) } + /* ------------------------------------------------------------------------ */ /* Function: sill_report */ /* Returns: int */ @@ -303,6 +321,7 @@ static int sill_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *cred) return 0; } + /* ------------------------------------------------------------------------ */ /* Function: qif_ipmp_report */ /* Returns: int */ @@ -378,3 +397,46 @@ static int qif_ipmp_set(queue_t *q, mblk_t *mp, char *str, caddr_t ptr, return 0; } + +/* ------------------------------------------------------------------------ */ +/* Function: pfil_hl_set */ +/* Returns: int - 0 == success, > 0 error occurred */ +/* Parameters: q(I) - pointer to queue */ +/* mp(I) - pointer to mblk */ +/* str(I) - pointer to new value as a string */ +/* ptr(I) - pointer to value to be stored */ +/* cred(I) - pointer to credential information */ +/* */ +/* Explicitly set the header length (hl) field of the qif structure. This */ +/* is used in situations where pfil cannot, for some reason, automatically */ +/* determine it via either ioctl snooping or looking at passing messages. */ +/* ndd -set /dev/pfil pfil_hl ipmp0=14 or v4:ipmp0=14 */ +/* ------------------------------------------------------------------------ */ +#if !defined(sun) || SOLARIS2 <= 8 +/*ARGSUSED*/ +static int pfil_hl_set(queue_t *q, mblk_t *mp, char *str, caddr_t ptr) +#else +/*ARGSUSED*/ +static int pfil_hl_set(queue_t *q, mblk_t *mp, char *str, caddr_t ptr, + cred_t *cred) +#endif +{ + char *s, *t; + + /* LINTED: E_CONSTANT_CONDITION */ + PRINT(2, (CE_CONT, "pfil_hl_set(0x%lx,0x%lx,0x%lx[%s],0x%lx)\n", + (u_long)q, (u_long)mp, (u_long)str, str, (u_long)ptr)); + + t = NULL; + s = str; + do { + if (t != NULL) + s = t + 1; + t = strchr(s, ';'); + if (t != NULL) + *t = '\0'; + qif_hl_set(s); + } while (t != NULL); + + return 0; +} diff --git a/usr/src/uts/common/inet/ipf/os.h b/usr/src/uts/common/inet/pfil/os.h index f0c8e7502a..b7a77e130e 100644 --- a/usr/src/uts/common/inet/ipf/os.h +++ b/usr/src/uts/common/inet/pfil/os.h @@ -1,3 +1,8 @@ +/* + * Copyright (C) 2003 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + */ #include <sys/sunddi.h> #include <sys/ddi.h> #if SOLARIS2 >= 6 @@ -28,3 +33,8 @@ #define KMFREE(v, z) kmem_free(v, z) extern caddr_t pfil_nd; + +#if defined(atomic_add_long) && (SOLARIS2 < 7) +# undef atomic_add_long +# define atomic_add_long(x,y) atomic_add_32((uint32_t *)x, y) +#endif diff --git a/usr/src/uts/common/inet/ipf/pfil.c b/usr/src/uts/common/inet/pfil/pfil.c index a42055d693..d44f1ac5bc 100644 --- a/usr/src/uts/common/inet/ipf/pfil.c +++ b/usr/src/uts/common/inet/pfil/pfil.c @@ -3,12 +3,11 @@ * * See the IPFILTER.LICENCE file for details on licencing. * - * ident "@(#)$Id: pfil.c,v 1.22 2003/08/18 22:13:59 darrenr Exp $" + * ident "@(#)$Id: pfil.c,v 1.27 2003/11/30 09:45:57 darrenr Exp $" * - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ - #ifndef __hpux #pragma ident "%Z%%M% %I% %E% SMI" #else @@ -80,11 +79,11 @@ struct pfil_head pfh_sync; static int pfil_list_add(pfil_list_t *, int (*) __P((struct ip *, int, void *, int, - struct qif *, mblk_t **)), + void *, mblk_t **)), int); static int pfil_list_remove(pfil_list_t *, int (*) __P((struct ip *, int, void *, int, - struct qif *, mblk_t **))); + void *, mblk_t **))); /* ------------------------------------------------------------------------ */ @@ -116,14 +115,14 @@ int pfil_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *cred) (void) mi_mpprintf(mp, "in"); (void) mi_mpprintf(mp, "function\tflags"); for (p = ph->ph_in.pfl_top; p; p = p->pfil_next) - (void)mi_mpprintf(mp,"%p\t%x", - (void *)p->pfil_func, p->pfil_flags); + (void) mi_mpprintf(mp,"%p\t%x", + (void *)p->pfil_func, p->pfil_flags); (void) mi_mpprintf(mp, "out"); (void) mi_mpprintf(mp, "function\tflags"); for (p = ph->ph_out.pfl_top; p; p = p->pfil_next) - (void)mi_mpprintf(mp,"%p\t%x", - (void *)p->pfil_func, p->pfil_flags); + (void) mi_mpprintf(mp,"%p\t%x", + (void *)p->pfil_func, p->pfil_flags); RW_EXIT(&ph->ph_lock); @@ -176,7 +175,7 @@ pfil_init(ph) int pfil_add_hook(func, flags, ph) int (*func) __P((struct ip *, int, void *, int, - struct qif *, mblk_t **)); + void *, mblk_t **)); int flags; struct pfil_head *ph; { @@ -216,7 +215,7 @@ static int pfil_list_add(list, func, flags) pfil_list_t *list; int (*func) __P((struct ip *, int, void *, int, - struct qif *, mblk_t **)); + void *, mblk_t **)); int flags; { struct packet_filter_hook *pfh; @@ -270,7 +269,7 @@ pfil_list_add(list, func, flags) int pfil_remove_hook(func, flags, ph) int (*func) __P((struct ip *, int, void *, int, - struct qif *, mblk_t **)); + void *, mblk_t **)); int flags; struct pfil_head *ph; { @@ -309,7 +308,7 @@ static int pfil_list_remove(list, func) pfil_list_t *list; int (*func) __P((struct ip *, int, void *, int, - struct qif *, mblk_t **)); + void *, mblk_t **)); { struct packet_filter_hook *pfh; diff --git a/usr/src/uts/common/inet/pfil/pfil.conf b/usr/src/uts/common/inet/pfil/pfil.conf new file mode 100644 index 0000000000..1cf479a0d6 --- /dev/null +++ b/usr/src/uts/common/inet/pfil/pfil.conf @@ -0,0 +1,28 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +name="pfil" parent="pseudo" instance=0; + diff --git a/usr/src/uts/common/inet/ipf/pfil.h b/usr/src/uts/common/inet/pfil/pfil.h index 9c3da19714..97f926da8b 100644 --- a/usr/src/uts/common/inet/ipf/pfil.h +++ b/usr/src/uts/common/inet/pfil/pfil.h @@ -3,7 +3,7 @@ * * See the IPFILTER.LICENCE file for details on licencing. * - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -12,8 +12,8 @@ #ifndef _NET_PFIL_H_ #define _NET_PFIL_H_ -#define PFIL_RELEASE "2.1.4" -#define PFIL_VERSION 2010400 +#define PFIL_RELEASE "2.1.6" +#define PFIL_VERSION 2010600 #define PFIL_INTERFACE 2000000 #ifndef __P @@ -24,6 +24,7 @@ # endif #endif +#ifdef sun # include <inet/ip.h> # if SOLARIS2 < 9 # include <netinet/in_systm.h> @@ -34,15 +35,19 @@ # undef IPOPT_SSRR # include <netinet/ip.h> # endif +#endif +#ifdef __hpux +# include <netinet/in_systm.h> +# include <netinet/in.h> +# include <netinet/ip.h> +#endif -struct qif; -struct ip; typedef struct packet_filter_hook { struct packet_filter_hook *pfil_next; struct packet_filter_hook **pfil_pnext; - int (*pfil_func) __P((struct ip *, int, void *, int, struct qif *, - mblk_t **)); + int (*pfil_func) __P((struct ip *, int, void *, int, + void *, mblk_t **)); int pfil_flags; } packet_filter_hook_t; @@ -82,12 +87,12 @@ typedef struct pfil_head { void pfil_init __P((struct pfil_head *)); struct packet_filter_hook *pfil_hook_get __P((int, struct pfil_head *)); int pfil_add_hook __P((int (*func) __P((struct ip *, int, void *, int, - struct qif *, mblk_t **)), int, + void *, mblk_t **)), int, struct pfil_head *)); int pfil_remove_hook __P((int (*func) __P((struct ip *, int, void *, int, - struct qif *, mblk_t **)), int, + void *, mblk_t **)), int, struct pfil_head *)); -int pfil_sendbuf __P((mblk_t *)); +int pfil_sendbuf(mblk_t *); mblk_t *pfil_make_dl_packet __P((mblk_t *, struct ip *, void *, char *, queue_t **)); void pfil_send_dl_packet __P((queue_t *, mblk_t *)); @@ -103,8 +108,6 @@ extern struct pfil_head pfh_sync; /* Notification of interface */ extern krwlock_t qif_rwlock; extern krwlock_t pfil_rw; -extern u_int pfil_ip_csum_hdr __P((u_char *)); - /* * NOTE: On Solaris, even though pfilwput(), etc, are prototyped as returning * an int, the return value is never checked and much code ignores it, anyway, diff --git a/usr/src/uts/common/inet/pfil/pfild.h b/usr/src/uts/common/inet/pfil/pfild.h new file mode 100644 index 0000000000..e866e514ae --- /dev/null +++ b/usr/src/uts/common/inet/pfil/pfild.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2003 by Darren Reed. + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/socket.h> +#include <net/if.h> +#include <netinet/in.h> + +/* + * STREAMS control messages used to communicate between pfild and pfil. + * Messages are sent down to /dev/pfil as M_PROTO->M_DATA. + * M_PROTO block contains uint32_t command code. + * M_DATA block contains [an array of] the corresponding data structure. + */ + +/* + * Data structure used to pass interface configuration information from + * pfild to the pfil kernel module. + */ +#define PFILCMD_IFADDRS 1 +struct pfil_ifaddrs { + char name[LIFNAMSIZ]; + union { + struct sockaddr_in in; + struct sockaddr_in6 in6; + } localaddr; + union { + struct sockaddr_in in; + struct sockaddr_in6 in6; + } netmask; + union { + struct sockaddr_in in; + struct sockaddr_in6 in6; + } broadaddr; + union { + struct sockaddr_in in; + struct sockaddr_in6 in6; + } dstaddr; + uint_t mtu; +}; + +/* + * Data structure used to pass interface valid source address set information + * from pfild to the pfil kernel module. + */ +#define PFILCMD_IFADDRSET 2 +struct pfil_ifaddrset { + char name[LIFNAMSIZ]; + uint8_t af; + uint32_t nspans; +}; +struct pfil_v4span { + uint32_t first, last; /* in host byte order! */ +}; +struct pfil_v6span { + struct in6_addr first, last; +}; diff --git a/usr/src/uts/common/inet/ipf/pfildrv.c b/usr/src/uts/common/inet/pfil/pfildrv.c index e787b5338e..45d694d304 100644 --- a/usr/src/uts/common/inet/ipf/pfildrv.c +++ b/usr/src/uts/common/inet/pfil/pfildrv.c @@ -3,7 +3,7 @@ * * See the IPFILTER.LICENCE file for details on licencing. * - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -11,6 +11,7 @@ #include <sys/conf.h> #include <sys/debug.h> +#include <sys/atomic.h> #include <sys/ethernet.h> #include <sys/stream.h> #include <sys/errno.h> @@ -97,6 +98,9 @@ struct streamtab pfil_dev_strtab = { &pfil_rinit, &pfil_winit }; +extern int nulldev(); +extern int nodev(); + void pfil_donotip(int, qif_t *, queue_t *, mblk_t *, mblk_t *, struct ip *, size_t); static int pfil_info(dev_info_t *, ddi_info_cmd_t , void *, void **); static int pfil_attach(dev_info_t *, ddi_attach_cmd_t); @@ -105,9 +109,48 @@ static int pfil_identify(dev_info_t *); #endif static int pfil_detach(dev_info_t *, ddi_detach_cmd_t); +#ifdef DDI_DEFINE_STREAM_OPS DDI_DEFINE_STREAM_OPS(pfil_devops, nulldev, nulldev, pfil_attach, pfil_detach, nulldev, pfil_info, D_MP, &pfil_dev_strtab); +#else +static struct cb_ops pfil_ops = { + nodev, /* cb_open */ + nodev, /* cb_close */ + nodev, /* cb_strategy */ + nodev, /* cb_print */ + nodev, /* cb_dump */ + nodev, /* cb_read */ + nodev, /* cb_write */ + nodev, /* cb_ioctl */ + nodev, /* cb_devmap */ + nodev, /* cb_mmap */ + nodev, /* cb_segmap */ + nochpoll, /* cb_chpoll */ + ddi_prop_op, /* cb_prop_op */ + &pfilinfo, /* cb_stream */ + D_MP /* cb_flag */ +}; + +static struct dev_ops pfil_devops = +{ + DEVO_REV, /* devo_rev */ + 0, /* devo_refcnt */ + pfil_info, /* devo_getinfo */ +#if SOLARIS2 >= 10 + nulldev, +#else + pfil_identify, /* devo_identify */ +#endif + nulldev, /* devo_probe */ + pfil_attach, /* devo_attach */ + pfil_detach, /* devo_detach */ + nodev, /* devo_reset */ + &pfil_ops, /* devo_cb_ops */ + NULL /* devo_bus_ops */ +}; +#endif + static struct modldrv modldrv = { &mod_driverops, "pfil Streams driver "/**/PFIL_RELEASE, &pfil_devops }; @@ -115,9 +158,8 @@ static struct modldrv modldrv = { /************************************************************************ * STREAMS module information */ -static int pfilmodopen(queue_t *q, dev_t *devp, int flag, int sflag, - cred_t *crp); -static int pfilmodclose(queue_t *q, int flag, cred_t *crp); +static int pfilmodopen(queue_t *, dev_t *, int, int, cred_t *); +static int pfilmodclose(queue_t *, int, cred_t *); static struct qinit pfilmod_rinit = { (pfi_t)pfilmodrput, NULL, pfilmodopen, pfilmodclose, @@ -174,7 +216,11 @@ static int pfil_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) pfil_dev_info = devi; +#if SOLARIS2 >= 8 return (ddi_create_minor_node(devi, "pfil", S_IFCHR, 0, DDI_PSEUDO, 0)); +#else + return (ddi_create_minor_node(devi, "pfil", S_IFCHR, 0, NULL, 0)); +#endif } @@ -324,7 +370,6 @@ static int pfildevclose(queue_t *q, int flag, cred_t *crp) return 0; } - /************************************************************************ * STREAMS module functions */ @@ -394,8 +439,11 @@ static int pfilmodclose(queue_t *q, int flag, cred_t *crp) /* ------------------------------------------------------------------------ */ /* Function: pfil_precheck */ -/* Returns: int - < 0 is error in this function, 0 == pass packet, else */ -/* (> 0) indicates passing prohibited */ +/* Returns: int - < 0 pass packet because it's not a type subject to */ +/* firewall rules (i.e. internal STREAMS messages), */ +/* 0 == pass packet, else > 0 indicates passing */ +/* prohibited (possibly due to an error occuring in */ +/* this function.) */ /* Parameters: q(I) - pointer to STREAMS queue */ /* mp(I) - pointer to STREAMS message */ /* qif(I) - pointer to per-queue interface information */ @@ -413,23 +461,32 @@ int pfil_precheck(queue_t *q, mblk_t **mp, int flags, qif_t *qif) { register struct ip *ip; size_t hlen, len, off, mlen, iphlen, plen; - int err, out, sap, realigned = 0; packet_filter_hook_t *pfh; + qpktinfo_t qpkt, *qpi; struct pfil_head *ph; mblk_t *m, *mt = *mp; - u_char *bp, *s; - qif_t qf, *qp; + int err, out, sap; + u_char *bp; #if SOLARIS2 >= 8 ip6_t *ip6; #endif #ifndef sparc u_short __ipoff, __iplen; #endif - qf = *qif; - qp = qif; - qif = &qf; - qif->qf_next = NULL; - qif->qf_flags = 0; + + qpi = &qpkt; + qpi->qpi_q = q; + qpi->qpi_off = 0; + qpi->qpi_name = qif->qf_name; + qpi->qpi_real = qif; + qpi->qpi_ill = qif->qf_ill; + qpi->qpi_hl = qif->qf_hl; + qpi->qpi_ppa = qif->qf_ppa; + qpi->qpi_num = qif->qf_num; + qpi->qpi_flags = qif->qf_flags; + qpi->qpi_max_frag = qif->qf_max_frag; + if ((flags & PFIL_GROUP) != 0) + qpi->qpi_flags |= QF_GROUP; /* * If there is only M_DATA for a packet going out, then any header @@ -437,10 +494,9 @@ int pfil_precheck(queue_t *q, mblk_t **mp, int flags, qif_t *qif) * the M_DATA) is prepended before the IP header. We need to set the * offset to account for this. */ - qif->qf_off = 0; out = (flags & PFIL_OUT) ? 1 : 0; - off = (out) ? qif->qf_hl : 0; -tryagain: + off = (out) ? qpi->qpi_hl : 0; + ip = NULL; m = NULL; #if SOLARIS2 >= 8 @@ -468,13 +524,13 @@ tryagain: off = 0; m = mt; } else { - atomic_add_long(&qp->qf_notdata, 1); + atomic_add_long(&qif->qf_notdata, 1); return -1; } } else { m = mt->b_cont; if (m == NULL) { - atomic_add_long(&qp->qf_nodata, 1); + atomic_add_long(&qif->qf_nodata, 1); return -3; /* No data blocks */ } } @@ -484,7 +540,7 @@ tryagain: m = mt; break; default : - atomic_add_long(&qp->qf_notdata, 1); + atomic_add_long(&qif->qf_notdata, 1); return -2; } @@ -497,7 +553,7 @@ tryagain: off = 0; /* Any non-M_DATA cancels the offset */ if (m == NULL) { - atomic_add_long(&qp->qf_nodata, 1); + atomic_add_long(&qif->qf_nodata, 1); return -3; /* No data blocks */ } @@ -510,7 +566,7 @@ tryagain: if ((dl->dl_primitive == DL_UNITDATA_IND) && (dl->dl_group_address == 1)) { - qif->qf_flags |= QF_GROUP; + qpi->qpi_flags |= QF_GROUP; if (((*((u_char *)m->b_rptr) == 0x0) && ((*((u_char *)m->b_rptr + 2) == 0x45)))) off += 2; @@ -519,52 +575,21 @@ tryagain: } /* - * If there is more than one copy of this message traversing the - * STREAMS stack (ie packet is being used for snoop data) then make a - * copy of it for our use so we become the sole owner of the new - * message and do a freemsg() on the one passed in as we're no longer - * using it or passing it up. - */ - if ((pfil_delayed_copy == 0) && (m->b_datap->db_ref > 1)) { - mblk_t *new; - -forced_copy: - new = copymsg(m); - if (new == NULL) { - atomic_add_long(&qp->qf_copyfail, 1); - return -3; - } - atomic_add_long(&qp->qf_copy, 1); - - if (mt != m) - mt->b_cont = new; - else { - *mp = new; - mt = new; - } - freemsg(m); - m = new; - } - - ip = (struct ip *)(m->b_rptr + off); - - /* * We might have a 1st data block which is really M_PROTO, i.e. it is * only big enough for the link layer header */ - while ((u_char *)ip >= m->b_wptr) { - len = (u_char *)ip - m->b_wptr; + while ((len = m->b_wptr - m->b_rptr) <= off) { + off -= len; m = m->b_cont; if (m == NULL) { - atomic_add_long(&qp->qf_nodata, 1); + atomic_add_long(&qif->qf_nodata, 1); return -4; /* not enough data for IP */ } - ip = (struct ip *)(m->b_rptr + len); } - off = (u_char *)ip - m->b_rptr; - mlen = msgdsize(m) - off; - if (mlen == 0) - mlen = mt->b_wptr - mt->b_rptr; + + ip = (struct ip *)(m->b_rptr + off); + len = m->b_wptr - m->b_rptr - off; + mlen = msgdsize(m); #ifdef IRE_ILL_CN sap = ((s_ill_t *)qif->qf_ill)->ill_sap; @@ -572,7 +597,105 @@ forced_copy: sap = ((ill_t *)qif->qf_ill)->ill_sap; #endif - if (sap == ETHERTYPE_IP) { + if (mlen == 0) + mlen = m->b_wptr - m->b_rptr; + mlen -= off; + +#ifdef PFILDEBUG + /*LINTED: E_CONSTANT_CONDITION*/ + PRINT(10,(CE_CONT, + "!IP Filter[%s]: out %d len %ld/%ld sap %d ip %p b_rptr %p off %ld m %p/%d/%d/%p mt %p/%d/%d/%p\n", + qif->qf_name, out, len, mlen, sap, + (void *)ip, (void *)m->b_rptr, off, + (void *)m, MTYPE(m), (int)MLEN(m), (void *)m->b_cont, + (void *)mt, MTYPE(mt), (int)MLEN(mt), (void *)mt->b_cont)); +#endif + + /* + * If there is more than one copy of this message traversing the + * STREAMS stack (ie the packet is being used for snoop data), the + * IP header isn't on a 32bit aligned address, or the IP header + * isn't contain within a single block, then make a copy which + * meets our requirements and do a freemsg on the one passed in + * since we're no longer using it or passing it up. + */ + + if ((pfil_delayed_copy == 0 && m->b_datap->db_ref > 1) + || ((uintptr_t)ip & 0x3) || len < sizeof(*ip) + || (sap != IP_DL_SAP +#if SOLARIS2 >= 8 + && sap != IP6_DL_SAP +#endif + )) { + mblk_t *b; + mblk_t *nm; + mblk_t *nmt; + mblk_t *previous_nm; + +forced_copy: + nmt = NULL; + previous_nm = NULL; + + /* + * Duplicate the message block descriptors up to (and + * including if the offset is non-zero) the block where + * IP begins. + */ + for (b = mt; b != m || off; b = b->b_cont) { + nm = dupb(b); + if (nm == NULL) { + atomic_add_long(&qif->qf_copyfail, 1); + if (nmt) + freemsg(nmt); + return ENOBUFS; + } + + nm->b_cont = NULL; + if (nmt) + linkb(previous_nm, nm); + else + nmt = nm; + previous_nm = nm; + + /* + * Set the length so the block only contains what + * appears before IP. + */ + if (b == m) { + nm->b_wptr = nm->b_rptr + off; + break; + } + } + + m->b_rptr += off; + nm = msgpullup(m, -1); + m->b_rptr -= off; + + if (nm == NULL) { + atomic_add_long(&qif->qf_copyfail, 1); + if (nmt) + freemsg(nmt); + return ENOBUFS; + } + + if (nmt) + linkb(previous_nm, nm); + else + nmt = nm; + + freemsg(mt); + + *mp = nmt; + mt = nmt; + m = nm; + + ip = (struct ip *)m->b_rptr; + len = m->b_wptr - m->b_rptr; + mlen = len; + off = 0; + } + + if (sap == IP_DL_SAP) { u_short tlen; hlen = sizeof(*ip); @@ -582,7 +705,6 @@ forced_copy: ((char *)&tlen)[1] = ((char *)&ip->ip_len)[1]; plen = ntohs(tlen); - sap = 0; ph = &pfh_inet4; } #if SOLARIS2 >= 8 @@ -599,165 +721,43 @@ forced_copy: if (plen == 0) return EMSGSIZE; /* Jumbo gram */ - sap = IP6_DL_SAP; ph = &pfh_inet6; } #endif else { - hlen = 0; sap = -1; } - - len = m->b_wptr - m->b_rptr - off; -#ifdef PFILDEBUG - /*LINTED: E_CONSTANT_CONDITION*/ - PRINT(10,(CE_CONT, - "!IP Filter[%s]: out %d len %ld/%ld sap %d ip %p b_rptr %p off %ld m %p/%d/%d/%p mt %p/%d/%d/%p\n", - qif->qf_name, out, len, mlen, sap, - (void *)ip, (void *)m->b_rptr, off, - (void *)m, MTYPE(m), (int)MLEN(m), (void *)m->b_cont, - (void *)mt, MTYPE(mt), (int)MLEN(mt), (void *)mt->b_cont)); -#endif - - /* - * Ok, the IP header isn't on a 32bit aligned address so fix this. - */ - if (((uintptr_t)ip & 0x3) || (len < sizeof(*ip)) || (sap == -1)) { - mblk_t *m2, *m1; - int off2; - - if (m->b_datap->db_ref > 1) - goto forced_copy; - /* - * If we have already tried to realign the IP header and we - * are back here, then the attempt has failed, so stop now - * rather than try again (could keep on retrying with no - * benefit.) - */ - if (realigned) { - atomic_add_long(&qp->qf_drop, 1); - return EINVAL; - } - realigned = 1; - - len = msgdsize(m); - if (len < sizeof(*ip)) { - atomic_add_long(&qp->qf_bad, 1); - return EINVAL; - } - - /* - * XXX - Now I understand how pullupmsg() & STREAMS messages - * work better, this can possibly be junked in favour of using - * pullupmsg() which will preserve all the dblk bits correctly, - * as is done in fr_pullup in the ipf code. - */ - - /* - * Junk using pullupmsg() - */ - off2 = (uintptr_t)ip & 0x3; - if (off2) - off2 = 4 - off2; - m2 = allocb(len + off2, BPRI_HI); - if (m2 == NULL) { - atomic_add_long(&qp->qf_drop, 1); - return ENOBUFS; - } - - MTYPE(m2) = M_DATA; - if (m->b_rptr != (u_char *)ip) - m2->b_rptr += off2; - m2->b_wptr = m2->b_rptr + len; - m1 = m; - s = (u_char *)m->b_rptr; - for (bp = m2->b_rptr; m1 && (bp < m2->b_wptr); bp += len) { - len = MIN(m1->b_wptr - s, m2->b_wptr - bp); - bcopy(s, bp, len); - m1 = m1->b_cont; - if (m1 != NULL) - s = m1->b_rptr; - } - - if ((mt != m) && (mt->b_cont == m) && (off == 0)) { - /* - * check if the buffer we're changing is chained in- - * between other buffers and unlink/relink as required. - */ - (void) unlinkb(mt); /* should return 'm' */ - m1 = unlinkb(m); - if (m1 != NULL) - linkb(m2, m1); - freemsg(m); - linkb(mt, m2); - } else { - if (m == mt) { - m1 = unlinkb(mt); - if (m1) - linkb(m2, m1); - } - freemsg(mt); - *mp = m2; - mt = m2; - } - - off = 0; - goto tryagain; - } - - if (((sap == 0) && (ip->ip_v != IPVERSION)) + if (((sap == IP_DL_SAP) && (ip->ip_v != IPVERSION)) #if SOLARIS2 >= 8 || ((sap == IP6_DL_SAP) && (((ip6->ip6_vfc) & 0xf0) != 0x60)) #endif + || sap == -1 ) { - atomic_add_long(&qp->qf_notip, 1); + atomic_add_long(&qif->qf_notip, 1); #ifdef PFILDEBUG pfil_donotip(out, qif, q, m, mt, ip, off); #endif return EINVAL; } - /* - * The code in IPFilter assumes that both the ip_off and ip_len - * fields are in host byte order, so convert them here to fulfill - * that expectation. - * - * If the target compile host is non-SPARC, assume it is a little - * endian machine, requiring the conversion of offset/length fields - * to both be host byte ordered. - */ -#ifndef sparc - if (sap == 0) { - __ipoff = (u_short)ip->ip_off; - ip->ip_len = plen; - ip->ip_off = ntohs(__ipoff); - } -#endif - if (sap == 0) + if (sap == IP_DL_SAP) iphlen = ip->ip_hl << 2; #if SOLARIS2 >= 8 else if (sap == IP6_DL_SAP) iphlen = sizeof(ip6_t); #endif + if (( #if SOLARIS2 >= 8 - (sap == IP6_DL_SAP) && (mlen < iphlen + plen)) || - ((sap == 0) && + (sap == IP6_DL_SAP) && (mlen < plen)) || + ((sap == IP_DL_SAP) && #endif ((iphlen < hlen) || (iphlen > plen) || (mlen < plen)))) { /* * Bad IP packet or not enough data/data length mismatches */ -#ifndef sparc - if (sap == 0) { - __ipoff = (u_short)ip->ip_off; - - ip->ip_len = htons(plen); - ip->ip_off = htons(__ipoff); - } -#endif - atomic_add_long(&qp->qf_bad, 1); + atomic_add_long(&qif->qf_bad, 1); return EINVAL; } @@ -769,28 +769,43 @@ forced_copy: if (m->b_datap->db_ref > 1) goto forced_copy; if (!pullupmsg(m, (int)iphlen + off)) { - atomic_add_long(&qp->qf_nodata, 1); - return -5; + atomic_add_long(&qif->qf_nodata, 1); + return ENOBUFS; } ip = (struct ip *)ALIGN32(m->b_rptr + off); } - if (sap == IP6_DL_SAP) { - if ((len > iphlen + plen) && (off == 0)) - m->b_wptr -= len - (iphlen + plen); - } else { - if ((len > plen) && (off == 0)) - m->b_wptr -= len - plen; + /* + * Discard any excess data. + */ + if (sap == IP6_DL_SAP && len > iphlen + plen) + m->b_wptr = m->b_rptr + off + plen + iphlen; + else if (sap == IP_DL_SAP && len > plen) + m->b_wptr = m->b_rptr + off + plen; + + /* + * The code in IPFilter assumes that both the ip_off and ip_len + * fields are in host byte order, so convert them here to fulfill + * that expectation. + * + * If the target compile host is non-SPARC, assume it is a little + * endian machine, requiring the conversion of offset/length fields + * to both be host byte ordered. + */ +#ifndef sparc + if (sap == IP_DL_SAP) { + __ipoff = (u_short)ip->ip_off; + ip->ip_len = plen; + ip->ip_off = ntohs(__ipoff); } +#endif - qif->qf_m = m; - qif->qf_q = q; - qif->qf_data = ip; - qif->qf_oq = OTHERQ(q); - qif->qf_off = off; + qpi->qpi_m = m; + qpi->qpi_off = off; + qpi->qpi_data = ip; - if (qp->qf_ipmp != NULL) - qp = qp->qf_ipmp; + if (qif->qf_ipmp != NULL) + qif = qif->qf_ipmp; READ_ENTER(&ph->ph_lock); @@ -802,15 +817,24 @@ forced_copy: flags, (void *)ph, (void *)pfh)); for (; pfh; pfh = pfh->pfil_next) if (pfh->pfil_func) { - err = (*pfh->pfil_func)(ip, iphlen, qif->qf_ill, out, - qif, mp); + err = (*pfh->pfil_func)(ip, iphlen, qif, out, qpi, mp); if (err || !*mp) break; - ip = qif->qf_data; + /* + * fr_pullup may have allocated a new buffer. + */ + ip = qpi->qpi_data; } RW_EXIT(&ph->ph_lock); /* + * Functions called via pfil_func should only return values >= 0, so + * convert any that are < 0 to be > 0 and preserve the absolute value. + */ + if (err < 0) + err = -err; + + /* * If we still have a STREAMS message after calling the filtering * hooks, return the byte order of the fields changed above on * platforms where this is required. They are refetched from the @@ -818,8 +842,8 @@ forced_copy: * them in some way. */ #ifndef sparc - if (*mp != NULL) { - if (sap == 0) { + if ((err == 0) && (*mp != NULL)) { + if (sap == IP_DL_SAP) { __iplen = (u_short)ip->ip_len; __ipoff = (u_short)ip->ip_off; ip->ip_len = htons(__iplen); @@ -827,7 +851,6 @@ forced_copy: } } #endif - return err; } @@ -1015,6 +1038,9 @@ static void pfil_remif(queue_t *rq) #endif /* IRE_ILL_CN */ +/************************************************************************ + * + */ #ifdef PFILDEBUG /* ------------------------------------------------------------------------ */ /* Function: pfil_donotip */ @@ -1090,6 +1116,51 @@ void pfil_donotip(int out, qif_t *qif, queue_t *q, mblk_t *m, mblk_t *mt, struct #endif +/* ------------------------------------------------------------------------ */ +/* Function: pfil_property_update */ +/* Returns: int - DDI_SUCCESS == success, else failure */ +/* Parameters: modinfop(I) - pointer to module informatio buffer */ +/* */ +/* Fetch configuration file values that have been entered into the */ +/* pfil.conf driver file. */ +/* ------------------------------------------------------------------------ */ +static int pfil_property_update(dev_info_t *dip) +{ + char *list, *s, *t; + int err; + + if (ddi_prop_update_int(DDI_DEV_T_ANY, dip, + "ddi-no-autodetach", 1) == -1) { + cmn_err(CE_WARN, "!updating ddi-no-authdetach failed"); + return DDI_FAILURE; + } + + list = NULL; + err = ddi_prop_lookup_string(DDI_DEV_T_ANY, dip, + 0, "qif_ipmp_set", &list); +#ifdef IPFDEBUG + cmn_err(CE_CONT, "IP Filter: lookup_string(pfil_ipmp_list) = %d\n", + err); +#endif + if (err == DDI_SUCCESS) { + t = NULL; + s = list; + do { + if (t != NULL) + s = t + 1; + t = strchr(s, ';'); + if (t != NULL) + *t = '\0'; + qif_ipmp_update(s); + } while (t != NULL); + + ddi_prop_free(list); + } + + return DDI_SUCCESS; +} + + #if SOLARIS2 == 8 int miocpullup(mblk_t *m, size_t len) { @@ -1098,4 +1169,3 @@ int miocpullup(mblk_t *m, size_t len) return pullupmsg(m->b_cont, len); } #endif - diff --git a/usr/src/uts/common/inet/ipf/pfilstream.c b/usr/src/uts/common/inet/pfil/pfilstream.c index 580f60d95a..51d1b30d5a 100644 --- a/usr/src/uts/common/inet/ipf/pfilstream.c +++ b/usr/src/uts/common/inet/pfil/pfilstream.c @@ -3,7 +3,7 @@ * * See the IPFILTER.LICENCE file for details on licencing. * - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -20,6 +20,7 @@ struct uio; #include <sys/dlpi.h> #include <sys/cmn_err.h> #ifdef sun +# include <sys/atomic.h> # include <sys/sockio.h> # include <sys/ksynch.h> # include <sys/strsubr.h> @@ -27,6 +28,7 @@ struct uio; #endif #ifdef __hpux # include <sys/dlpi_ext.h> +# include <net/mtcp.h> #endif #include <netinet/in.h> #include <netinet/in_systm.h> @@ -37,6 +39,8 @@ struct uio; # include <inet/common.h> # if SOLARIS2 >= 8 # include <netinet/ip6.h> +# else +# include <net/if_dl.h> # endif # if SOLARIS2 >= 10 # include <sys/policy.h> @@ -50,17 +54,10 @@ struct uio; # include <inet/ip_if.h> #endif -#ifdef sun -# include <inet/ipf/compat.h> -# include <inet/ipf/pfil.h> -# include <inet/ipf/qif.h> -# include <pfild.h> -#else -# include "compat.h" -# include "pfil.h" -# include "qif.h" -# include "pfild.h" -#endif +#include "compat.h" +#include "qif.h" +#include "pfil.h" +#include "pfild.h" #if SOLARIS2 >= 10 extern queue_t *pfildq; @@ -85,6 +82,7 @@ extern queue_t *pfildq; static int pfil_drv_priv __P((cred_t *)); + #ifdef PFILDEBUG /* ------------------------------------------------------------------------ */ /* Function: pfil_printmchain */ @@ -209,6 +207,7 @@ static void pfil_printioctl(mblk_t *mp) } #endif /* PFILDEBUG */ + /* ------------------------------------------------------------------------ */ /* Function: pfilbind */ /* Returns: int - 0 == success, else error */ @@ -234,7 +233,7 @@ int pfilbind(queue_t *q) /* ------------------------------------------------------------------------ */ /* Function: pfilwput_ioctl */ -/* Returns: void */ +/* Returns: void */ /* Parameters: q(I) - pointer to queue */ /* mp(I) - pointer to STREAMS message */ /* */ @@ -318,7 +317,21 @@ void pfilwput_ioctl(queue_t *q, mblk_t *mp) break; #endif #endif /* pre-S10 */ - default: +#ifdef __hpux + case ND_SET : + case ND_GET : + if (pfil_ioctl_nd(q, mp)) { + if (iocp->ioc_error) + iocp->ioc_count = 0; + mp->b_datap->db_type = M_IOCACK; + qreply(q, mp); + } else { + miocnak(q, mp, 0, EINVAL); + } + return; + break; +#endif + default : break; } @@ -408,7 +421,7 @@ static void pfil_update_ifaddrset(mblk_t *mp) */ /* ------------------------------------------------------------------------ */ /* Function: pfilwput */ -/* Returns: void */ +/* Returns: void */ /* Parameters: q(I) - pointer to queue */ /* mp(I) - pointer to STREAMS message */ /* */ @@ -416,19 +429,21 @@ static void pfil_update_ifaddrset(mblk_t *mp) /* /dev/pfil, not the STREAMS module pushed on another queue. As it does */ /* not do any IO, this should never be called except to handle ioctl's and */ /* so all other messages are free'd and no reply sent back. */ -/* The only ioctls handled by the driver are ND_GET/ND_SET. */ -/* pfilwput also handles PFILCMD_IFADDRS and PFILCMD_IFADDRSET messages */ +/* The only ioctls handled by the driver are ND_GET/ND_SET. */ +/* pfilwput also handles PFILCMD_IFADDRS and PFILCMD_IFADDRSET messages. */ +/* NOTE: HP-UX does not need or have pfil implemented as a STREAMS device. */ /* ------------------------------------------------------------------------ */ +#ifdef sun void pfilwput(queue_t *q, mblk_t *mp) { struct iocblk *iocp; uint32_t cmd; -#ifdef PFILDEBUG +# ifdef PFILDEBUG /* LINTED: E_CONSTANT_CONDITION */ PRINT(9,(CE_CONT, "!pfilwput(%p,%p) [%s] qif %p\n", (void *)q, (void *)mp, QTONM(q), (void *)q->q_ptr)); -#endif +# endif switch (MTYPE(mp)) { @@ -463,7 +478,8 @@ void pfilwput(queue_t *q, mblk_t *mp) #endif case M_IOCTL: iocp = (struct iocblk *)mp->b_rptr; - switch (iocp->ioc_cmd) { + switch (iocp->ioc_cmd) + { case ND_SET : case ND_GET : if (pfil_ioctl_nd(q, mp)) { @@ -475,17 +491,20 @@ void pfilwput(queue_t *q, mblk_t *mp) miocnak(q, mp, 0, EINVAL); } break; - default: + + default : miocnak(q, mp, 0, EINVAL); break; } return; - default: + default : break; } + freemsg(mp); } +#endif /************************************************************************ @@ -493,7 +512,7 @@ void pfilwput(queue_t *q, mblk_t *mp) */ /* ------------------------------------------------------------------------ */ /* Function: pfilmodwput */ -/* Returns: Void. */ +/* Returns: void */ /* Parameters: q(I) - pointer to queue */ /* mp(I) - pointer to STREAMS message */ /* */ @@ -527,7 +546,7 @@ void pfilmodwput(queue_t *q, mblk_t *mp) break; } - /*FALLTHROUGH*/ + /*FALLTHROUGH*/ case M_DATA : atomic_add_long(&qif->qf_nw, 1); @@ -535,6 +554,7 @@ void pfilmodwput(queue_t *q, mblk_t *mp) int i; i = pfil_precheck(q, &mp, PFIL_OUT, qif); + /* LINTED: E_CONSTANT_CONDITION */ PRINT(9, (CE_CONT, "!%s: pfil_precheck=%d mp %p\n", "pfilmodwput", i, (void *)mp)); @@ -547,9 +567,10 @@ void pfilmodwput(queue_t *q, mblk_t *mp) } break; - case M_IOCTL: + case M_IOCTL : pfilwput_ioctl(q, mp); return; + default : break; } @@ -572,9 +593,10 @@ void pfilmodrput(queue_t *q, mblk_t *mp) { union DL_primitives *dl; dl_bind_ack_t *b; - int i; + int i, flags; qif_t *qif; + flags = 0; qif = q->q_ptr; /* LINTED: E_CONSTANT_CONDITION */ @@ -582,7 +604,9 @@ void pfilmodrput(queue_t *q, mblk_t *mp) (void *)q, (void *)mp, mp->b_datap->db_type, QTONM(q), QTONM(OTHERQ(q)), (void *)qif, (void *)qif->qf_ill)); - switch (MTYPE(mp)) { + + switch (MTYPE(mp)) + { #ifdef DL_IOC_HDR_INFO case M_IOCACK : { @@ -602,6 +626,7 @@ void pfilmodrput(queue_t *q, mblk_t *mp) pfil_printioctl(mp); #endif break; + case M_PROTO : case M_PCPROTO : @@ -616,7 +641,7 @@ void pfilmodrput(queue_t *q, mblk_t *mp) case DL_UNITDATA_IND : if ((MLEN(mp) >= sizeof(dl_unitdata_ind_t)) && (dl->unitdata_ind.dl_group_address)) - qif->qf_flags |= QF_GROUP; + flags |= PFIL_GROUP; break; case DL_SUBS_BIND_ACK : @@ -625,7 +650,6 @@ void pfilmodrput(queue_t *q, mblk_t *mp) c = (dl_subs_bind_ack_t *)dl; if (qif->qf_sap == 0) { -/* XXX: What is this message? */ #if 0 qif->qf_sap = c->dl_sap; if (qif->qf_sap < 0) @@ -655,7 +679,9 @@ void pfilmodrput(queue_t *q, mblk_t *mp) qif->qf_waitack++; break; } - if (!b->dl_sap || b->dl_sap == IP_DL_SAP || b->dl_sap == IP6_DL_SAP) + + if (!b->dl_sap || b->dl_sap == IP_DL_SAP || + b->dl_sap == IP6_DL_SAP) (void) pfilbind(q); break; @@ -672,7 +698,8 @@ void pfilmodrput(queue_t *q, mblk_t *mp) atomic_add_long(&qif->qf_nr, 1); if (qif->qf_ill != NULL) { - i = pfil_precheck(q, &mp, PFIL_IN, qif); + flags |= PFIL_IN; + i = pfil_precheck(q, &mp, flags, qif); /* LINTED: E_CONSTANT_CONDITION */ PRINT(9, (CE_CONT, @@ -686,9 +713,11 @@ void pfilmodrput(queue_t *q, mblk_t *mp) } } break; + default : break; } + putnext(q, mp); } @@ -730,4 +759,3 @@ void pfil_startup() pfil_init(&pfh_inet6); pfil_init(&pfh_sync); } - diff --git a/usr/src/uts/common/inet/ipf/pkt.c b/usr/src/uts/common/inet/pfil/pkt.c index 0b4653b5d4..6a9c74b139 100644 --- a/usr/src/uts/common/inet/ipf/pkt.c +++ b/usr/src/uts/common/inet/pfil/pkt.c @@ -3,8 +3,6 @@ * * See the IPFILTER.LICENCE file for details on licencing. * - * ident "@(#)$Id: pkt.c,v 1.8 2003/07/28 05:13:58 darrenr Exp $" - * * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -25,7 +23,9 @@ #include <netinet/in.h> #include <netinet/ip.h> #if SOLARIS2 >= 8 -#include <netinet/ip6.h> +# include <netinet/ip6.h> +#else +# include <net/if_dl.h> #endif #undef IPOPT_EOL @@ -253,7 +253,6 @@ mblk_t *mb; /* Function: pfil_sendbuf */ /* Returns: int - 0 == success, 1 == failure */ /* Parameters: m(I) - pointer to streams message */ -/* v - indicated v4 or v6 */ /* */ /* Output an IPv4 packet to whichever interface has the correct route. */ /* ------------------------------------------------------------------------ */ diff --git a/usr/src/uts/common/inet/ipf/qif.c b/usr/src/uts/common/inet/pfil/qif.c index 7715098c0f..fc74ca00fc 100644 --- a/usr/src/uts/common/inet/ipf/qif.c +++ b/usr/src/uts/common/inet/pfil/qif.c @@ -3,7 +3,7 @@ * * See the IPFILTER.LICENCE file for details on licencing. * - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -42,10 +42,10 @@ # endif # include <net/if_dl.h> #endif -#include <inet/common.h> #include <netinet/in.h> #include <netinet/in_systm.h> #include <netinet/ip.h> +#include <inet/common.h> #undef IPOPT_EOL #undef IPOPT_NOP #undef IPOPT_LSRR @@ -326,7 +326,7 @@ qif_attach(rq) (hdrsizes[ill->ill_type][0] == ill->ill_type)) qif->qf_hl = hdrsizes[ill->ill_type][1]; - if (qif->qf_hl == 0) { + if (qif->qf_hl == 0 && ill->ill_type != IFT_OTHER) { cmn_err(CE_WARN, "!Unknown layer 2 header size for %s type %d sap %x\n", qif->qf_name, ill->ill_type, ill->ill_sap); @@ -364,7 +364,7 @@ qif_attach(rq) pfh = pfil_hook_get(PFIL_IN, &pfh_sync); for (; pfh; pfh = pfh->pfil_next) if (pfh->pfil_func) - (void) (*pfh->pfil_func)(NULL, 0, ill, 0, qif, NULL); + (void) (*pfh->pfil_func)(NULL, 0, qif, 0, qif, NULL); RW_EXIT(&pfh_sync.ph_lock); @@ -420,6 +420,7 @@ qif_new(q, mflags) return qif; } + /* ------------------------------------------------------------------------ */ /* Function: qif_delete */ /* Returns: void */ @@ -470,7 +471,7 @@ queue_t *q; pfh = pfil_hook_get(PFIL_OUT, &pfh_sync); for (; pfh; pfh = pfh->pfil_next) if (pfh->pfil_func) - (void) (*pfh->pfil_func)(NULL, 0, qif->qf_ill, + (void) (*pfh->pfil_func)(NULL, 0, qif, 1, qif, NULL); RW_EXIT(&pfh_sync.ph_lock); } @@ -480,9 +481,10 @@ queue_t *q; freeb(qif->qf_addrset); mutex_destroy(&qif->qf_ptl.pt_lock); cv_destroy(&qif->qf_ptl.pt_cv); - if (qif->qf_qifsz == sizeof(*qif)) { + + if (qif->qf_qifsz == sizeof(*qif)) kmem_cache_free(qif_cache, qif); - } else { + else { KMFREE(qif, qif->qf_qifsz); } } @@ -604,6 +606,7 @@ qif_t *qif_walk(qif_t **qfp) return *qfp; } + /* ------------------------------------------------------------------------ */ /* Function: qif_ipmp_update */ /* Returns: void */ @@ -710,7 +713,7 @@ void qif_ipmp_delete(char *qifname) pfh = pfil_hook_get(PFIL_OUT, &pfh_sync); for (; pfh; pfh = pfh->pfil_next) if (pfh->pfil_func) - (void) (*pfh->pfil_func)(NULL, 0, qif->qf_ill, 1, + (void) (*pfh->pfil_func)(NULL, 0, qif, 1, qif, NULL); KMFREE(qif, qif->qf_qifsz); @@ -796,3 +799,59 @@ void qif_ipmp_syncslave(qif_t *target, const int sap) } } + +/* ------------------------------------------------------------------------ */ +/* Function: qif_hl_set */ +/* Returns: void */ +/* Parameters: ipmpconf(I) - string with header length setting for NIC */ +/* */ +/* For NICs that we cannot automatically determine the MAC header length of */ +/* we provide a manual crook to achieve that with. The input syntax for */ +/* the string is "[v4:|v6:]<ifname>=<length>" */ +/* ------------------------------------------------------------------------ */ +void qif_hl_set(char *ipmpconf) +{ + qif_t *qf; + char *s; + + if (!strncmp(ipmpconf, "v4:", 3)) { + ipmpconf += 3; + } else if (!strncmp(ipmpconf, "v6:", 3)) { +#if SOLARIS2 >= 8 + ipmpconf += 3; +#else + return; +#endif + } + + s = strchr(ipmpconf, '='); + if (s != NULL) { + if (*(s + 1) == '\0') + *s = '\0'; + else + *s++ = '\0'; + } + if (s == NULL || *s == NULL) + return; + + READ_ENTER(&pfil_rw); + for (qf = qif_head; qf; qf = qf->qf_next) + if (strcmp(qf->qf_name, ipmpconf) == 0) + break; + + if (qf != NULL) { + int hl = 0; + + for (; *s != '\0'; s++) { + char c = *s; + + if (c < '0' || c > '9') + return; + hl *= 10; + hl += c - '0'; + } + qf->qf_hl = hl; + } + + RW_EXIT(&pfil_rw); +} diff --git a/usr/src/uts/common/inet/ipf/qif.h b/usr/src/uts/common/inet/pfil/qif.h index 2e3c7cc886..7371ab420a 100644 --- a/usr/src/uts/common/inet/ipf/qif.h +++ b/usr/src/uts/common/inet/pfil/qif.h @@ -3,14 +3,17 @@ * * See the IPFILTER.LICENCE file for details on licencing. * - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" -#include "pfil.h" -#include <sys/ptms.h> +#ifdef sun +# include <sys/dditypes.h> +# include <sys/ptms.h> +#endif + #ifdef IRE_ILL_CN typedef union { @@ -63,7 +66,6 @@ typedef struct s_ill_s { uint_t mtu; } s_ill_t; - typedef struct qif { /* for alignment reasons, the lock is first. */ kmutex_t qf_lock; @@ -103,7 +105,7 @@ typedef struct qif { char qf_name[LIFNAMSIZ]; char *qf_members; - /* ON10 specific */ + /* ON(10, NV) specific */ mblk_t *qf_addrset; size_t qf_off; mblk_t *qf_m; @@ -155,7 +157,15 @@ typedef struct qpktinfo { #endif #ifdef __hpux -# define QF_V4_ADDR(x) ((ifinfot_t *)(x)->qf_ill)->ifi_addr[0] +# define QF_V4_ADDR(x) ((ifinfo_t *)(x)->qf_ill)->ifi_addr[0] +# define QF_V4_BROADCAST(x) 0 +# define QF_V4_NETMASK(x) 0xffffffff +# define QF_V4_PEERADDR(x) 0 +# ifdef USE_INET6 +# define QF_V6_BROADCAST(x) 0 +# define QF_V6_NETMASK(x) 0 +# define QF_V6_PEERADDR(x) 0 +# endif #endif @@ -170,14 +180,16 @@ extern int qif_startup(void); extern void qif_stop(void); extern void *qif_iflookup(char *, int); +#ifdef __hpux struct irinfo_s; extern void *ir_to_ill(struct irinfo_s *ir); - +#endif extern struct qif *qif_walk(struct qif **); extern struct qif *qif_head; extern int qif_verbose; extern void qif_update(struct qif *, mblk_t *); extern void qif_nd_init(void); +extern void qif_hl_set(char *); extern void qif_ipmp_delete(char *); extern void qif_ipmp_update(char *); extern void qif_ipmp_syncmaster(struct qif *, const int); |
