summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/inet
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/inet')
-rw-r--r--usr/src/uts/common/inet/ipf/bpf-ipf.h450
-rw-r--r--usr/src/uts/common/inet/ipf/fil.c6397
-rw-r--r--usr/src/uts/common/inet/ipf/ip_auth.c796
-rw-r--r--usr/src/uts/common/inet/ipf/ip_fil_solaris.c1612
-rw-r--r--usr/src/uts/common/inet/ipf/ip_frag.c885
-rw-r--r--usr/src/uts/common/inet/ipf/ip_htable.c618
-rw-r--r--usr/src/uts/common/inet/ipf/ip_log.c676
-rw-r--r--usr/src/uts/common/inet/ipf/ip_lookup.c532
-rw-r--r--usr/src/uts/common/inet/ipf/ip_nat.c4849
-rw-r--r--usr/src/uts/common/inet/ipf/ip_pool.c789
-rw-r--r--usr/src/uts/common/inet/ipf/ip_proxy.c858
-rw-r--r--usr/src/uts/common/inet/ipf/ip_state.c3806
-rw-r--r--usr/src/uts/common/inet/ipf/ipf.h315
-rw-r--r--usr/src/uts/common/inet/ipf/ipmon.h94
-rw-r--r--usr/src/uts/common/inet/ipf/ipt.h41
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/Makefile32
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_auth.h64
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_compat.h2423
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_fil.h1431
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_frag.h96
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_ftp_pxy.c1458
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_h323_pxy.c294
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_htable.h82
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_ipsec_pxy.c346
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_irc_pxy.c438
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_lookup.h67
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_nat.h478
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_netbios_pxy.c118
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_pool.h91
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_pptp_pxy.c528
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_proxy.h458
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_raudio_pxy.c341
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_rcmd_pxy.c239
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_rpcb_pxy.c1452
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_state.h267
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ipl.h23
-rw-r--r--usr/src/uts/common/inet/ipf/opts.h71
-rw-r--r--usr/src/uts/common/inet/ipf/pfil.conf2
-rw-r--r--usr/src/uts/common/inet/ipf/radix.c1206
-rw-r--r--usr/src/uts/common/inet/ipf/radix.h166
-rw-r--r--usr/src/uts/common/inet/ipf/radix_ipf.h212
-rw-r--r--usr/src/uts/common/inet/ipf/solaris.c696
-rw-r--r--usr/src/uts/common/inet/pfil/compat.h (renamed from usr/src/uts/common/inet/ipf/compat.h)36
-rw-r--r--usr/src/uts/common/inet/pfil/misc.c (renamed from usr/src/uts/common/inet/ipf/misc.c)6
-rw-r--r--usr/src/uts/common/inet/pfil/ndd.c (renamed from usr/src/uts/common/inet/ipf/ndd.c)72
-rw-r--r--usr/src/uts/common/inet/pfil/os.h (renamed from usr/src/uts/common/inet/ipf/os.h)10
-rw-r--r--usr/src/uts/common/inet/pfil/pfil.c (renamed from usr/src/uts/common/inet/ipf/pfil.c)25
-rw-r--r--usr/src/uts/common/inet/pfil/pfil.conf28
-rw-r--r--usr/src/uts/common/inet/pfil/pfil.h (renamed from usr/src/uts/common/inet/ipf/pfil.h)27
-rw-r--r--usr/src/uts/common/inet/pfil/pfild.h65
-rw-r--r--usr/src/uts/common/inet/pfil/pfildrv.c (renamed from usr/src/uts/common/inet/ipf/pfildrv.c)500
-rw-r--r--usr/src/uts/common/inet/pfil/pfilstream.c (renamed from usr/src/uts/common/inet/ipf/pfilstream.c)92
-rw-r--r--usr/src/uts/common/inet/pfil/pkt.c (renamed from usr/src/uts/common/inet/ipf/pkt.c)7
-rw-r--r--usr/src/uts/common/inet/pfil/qif.c (renamed from usr/src/uts/common/inet/ipf/qif.c)75
-rw-r--r--usr/src/uts/common/inet/pfil/qif.h (renamed from usr/src/uts/common/inet/ipf/qif.h)26
55 files changed, 36465 insertions, 301 deletions
diff --git a/usr/src/uts/common/inet/ipf/bpf-ipf.h b/usr/src/uts/common/inet/ipf/bpf-ipf.h
new file mode 100644
index 0000000000..544455e5ff
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/bpf-ipf.h
@@ -0,0 +1,450 @@
+/*-
+ * Copyright (c) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)bpf.h 7.1 (Berkeley) 5/7/91
+ *
+ * @(#) $Header: /devel/CVS/IP-Filter/bpf-ipf.h,v 2.1 2002/10/26 12:14:26 darrenr Exp $ (LBL)
+ */
+
+#ifndef BPF_MAJOR_VERSION
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* BSD style release date */
+#define BPF_RELEASE 199606
+
+typedef int bpf_int32;
+typedef u_int bpf_u_int32;
+
+/*
+ * Alignment macros. BPF_WORDALIGN rounds up to the next
+ * even multiple of BPF_ALIGNMENT.
+ */
+#ifndef __NetBSD__
+#define BPF_ALIGNMENT sizeof(bpf_int32)
+#else
+#define BPF_ALIGNMENT sizeof(long)
+#endif
+#define BPF_WORDALIGN(x) (((x)+(BPF_ALIGNMENT-1))&~(BPF_ALIGNMENT-1))
+
+#define BPF_MAXINSNS 512
+#define BPF_MAXBUFSIZE 0x8000
+#define BPF_MINBUFSIZE 32
+
+/*
+ * Structure for BIOCSETF.
+ */
+struct bpf_program {
+ u_int bf_len;
+ struct bpf_insn *bf_insns;
+};
+
+/*
+ * Struct returned by BIOCGSTATS.
+ */
+struct bpf_stat {
+ u_int bs_recv; /* number of packets received */
+ u_int bs_drop; /* number of packets dropped */
+};
+
+/*
+ * Struct return by BIOCVERSION. This represents the version number of
+ * the filter language described by the instruction encodings below.
+ * bpf understands a program iff kernel_major == filter_major &&
+ * kernel_minor >= filter_minor, that is, if the value returned by the
+ * running kernel has the same major number and a minor number equal
+ * equal to or less than the filter being downloaded. Otherwise, the
+ * results are undefined, meaning an error may be returned or packets
+ * may be accepted haphazardly.
+ * It has nothing to do with the source code version.
+ */
+struct bpf_version {
+ u_short bv_major;
+ u_short bv_minor;
+};
+/* Current version number of filter architecture. */
+#define BPF_MAJOR_VERSION 1
+#define BPF_MINOR_VERSION 1
+
+/*
+ * BPF ioctls
+ *
+ * The first set is for compatibility with Sun's pcc style
+ * header files. If your using gcc, we assume that you
+ * have run fixincludes so the latter set should work.
+ */
+#if (defined(sun) || defined(ibm032)) && !defined(__GNUC__)
+#define BIOCGBLEN _IOR(B,102, u_int)
+#define BIOCSBLEN _IOWR(B,102, u_int)
+#define BIOCSETF _IOW(B,103, struct bpf_program)
+#define BIOCFLUSH _IO(B,104)
+#define BIOCPROMISC _IO(B,105)
+#define BIOCGDLT _IOR(B,106, u_int)
+#define BIOCGETIF _IOR(B,107, struct ifreq)
+#define BIOCSETIF _IOW(B,108, struct ifreq)
+#define BIOCSRTIMEOUT _IOW(B,109, struct timeval)
+#define BIOCGRTIMEOUT _IOR(B,110, struct timeval)
+#define BIOCGSTATS _IOR(B,111, struct bpf_stat)
+#define BIOCIMMEDIATE _IOW(B,112, u_int)
+#define BIOCVERSION _IOR(B,113, struct bpf_version)
+#define BIOCSTCPF _IOW(B,114, struct bpf_program)
+#define BIOCSUDPF _IOW(B,115, struct bpf_program)
+#else
+#define BIOCGBLEN _IOR('B',102, u_int)
+#define BIOCSBLEN _IOWR('B',102, u_int)
+#define BIOCSETF _IOW('B',103, struct bpf_program)
+#define BIOCFLUSH _IO('B',104)
+#define BIOCPROMISC _IO('B',105)
+#define BIOCGDLT _IOR('B',106, u_int)
+#define BIOCGETIF _IOR('B',107, struct ifreq)
+#define BIOCSETIF _IOW('B',108, struct ifreq)
+#define BIOCSRTIMEOUT _IOW('B',109, struct timeval)
+#define BIOCGRTIMEOUT _IOR('B',110, struct timeval)
+#define BIOCGSTATS _IOR('B',111, struct bpf_stat)
+#define BIOCIMMEDIATE _IOW('B',112, u_int)
+#define BIOCVERSION _IOR('B',113, struct bpf_version)
+#define BIOCSTCPF _IOW('B',114, struct bpf_program)
+#define BIOCSUDPF _IOW('B',115, struct bpf_program)
+#endif
+
+/*
+ * Structure prepended to each packet.
+ */
+struct bpf_hdr {
+ struct timeval bh_tstamp; /* time stamp */
+ bpf_u_int32 bh_caplen; /* length of captured portion */
+ bpf_u_int32 bh_datalen; /* original length of packet */
+ u_short bh_hdrlen; /* length of bpf header (this struct
+ plus alignment padding) */
+};
+/*
+ * Because the structure above is not a multiple of 4 bytes, some compilers
+ * will insist on inserting padding; hence, sizeof(struct bpf_hdr) won't work.
+ * Only the kernel needs to know about it; applications use bh_hdrlen.
+ */
+#if defined(KERNEL) || defined(_KERNEL)
+#define SIZEOF_BPF_HDR 18
+#endif
+
+/*
+ * Data-link level type codes.
+ */
+
+/*
+ * These are the types that are the same on all platforms; on other
+ * platforms, a <net/bpf.h> should be supplied that defines the additional
+ * DLT_* codes appropriately for that platform (the BSDs, for example,
+ * should not just pick up this version of "bpf.h"; they should also define
+ * the additional DLT_* codes used by their kernels, as well as the values
+ * defined here - and, if the values they use for particular DLT_ types
+ * differ from those here, they should use their values, not the ones
+ * here).
+ */
+#define DLT_NULL 0 /* no link-layer encapsulation */
+#define DLT_EN10MB 1 /* Ethernet (10Mb) */
+#define DLT_EN3MB 2 /* Experimental Ethernet (3Mb) */
+#define DLT_AX25 3 /* Amateur Radio AX.25 */
+#define DLT_PRONET 4 /* Proteon ProNET Token Ring */
+#define DLT_CHAOS 5 /* Chaos */
+#define DLT_IEEE802 6 /* IEEE 802 Networks */
+#define DLT_ARCNET 7 /* ARCNET */
+#define DLT_SLIP 8 /* Serial Line IP */
+#define DLT_PPP 9 /* Point-to-point Protocol */
+#define DLT_FDDI 10 /* FDDI */
+
+/*
+ * These are values from the traditional libpcap "bpf.h".
+ * Ports of this to particular platforms should replace these definitions
+ * with the ones appropriate to that platform, if the values are
+ * different on that platform.
+ */
+#define DLT_ATM_RFC1483 11 /* LLC/SNAP encapsulated atm */
+#define DLT_RAW 12 /* raw IP */
+
+/*
+ * These are values from BSD/OS's "bpf.h".
+ * These are not the same as the values from the traditional libpcap
+ * "bpf.h"; however, these values shouldn't be generated by any
+ * OS other than BSD/OS, so the correct values to use here are the
+ * BSD/OS values.
+ *
+ * Platforms that have already assigned these values to other
+ * DLT_ codes, however, should give these codes the values
+ * from that platform, so that programs that use these codes will
+ * continue to compile - even though they won't correctly read
+ * files of these types.
+ */
+#ifdef __NetBSD__
+#ifndef DLT_SLIP_BSDOS
+#define DLT_SLIP_BSDOS 13 /* BSD/OS Serial Line IP */
+#define DLT_PPP_BSDOS 14 /* BSD/OS Point-to-point Protocol */
+#endif
+#else
+#define DLT_SLIP_BSDOS 15 /* BSD/OS Serial Line IP */
+#define DLT_PPP_BSDOS 16 /* BSD/OS Point-to-point Protocol */
+#endif
+
+#define DLT_ATM_CLIP 19 /* Linux Classical-IP over ATM */
+
+/*
+ * These values are defined by NetBSD; other platforms should refrain from
+ * using them for other purposes, so that NetBSD savefiles with link
+ * types of 50 or 51 can be read as this type on all platforms.
+ */
+#define DLT_PPP_SERIAL 50 /* PPP over serial with HDLC encapsulation */
+#define DLT_PPP_ETHER 51 /* PPP over Ethernet */
+
+/*
+ * Values between 100 and 103 are used in capture file headers as
+ * link-layer types corresponding to DLT_ types that differ
+ * between platforms; don't use those values for new DLT_ new types.
+ */
+
+/*
+ * This value was defined by libpcap 0.5; platforms that have defined
+ * it with a different value should define it here with that value -
+ * a link type of 104 in a save file will be mapped to DLT_C_HDLC,
+ * whatever value that happens to be, so programs will correctly
+ * handle files with that link type regardless of the value of
+ * DLT_C_HDLC.
+ *
+ * The name DLT_C_HDLC was used by BSD/OS; we use that name for source
+ * compatibility with programs written for BSD/OS.
+ *
+ * libpcap 0.5 defined it as DLT_CHDLC; we define DLT_CHDLC as well,
+ * for source compatibility with programs written for libpcap 0.5.
+ */
+#define DLT_C_HDLC 104 /* Cisco HDLC */
+#define DLT_CHDLC DLT_C_HDLC
+
+#define DLT_IEEE802_11 105 /* IEEE 802.11 wireless */
+
+/*
+ * Values between 106 and 107 are used in capture file headers as
+ * link-layer types corresponding to DLT_ types that might differ
+ * between platforms; don't use those values for new DLT_ new types.
+ */
+
+/*
+ * OpenBSD DLT_LOOP, for loopback devices; it's like DLT_NULL, except
+ * that the AF_ type in the link-layer header is in network byte order.
+ *
+ * OpenBSD defines it as 12, but that collides with DLT_RAW, so we
+ * define it as 108 here. If OpenBSD picks up this file, it should
+ * define DLT_LOOP as 12 in its version, as per the comment above -
+ * and should not use 108 as a DLT_ value.
+ */
+#define DLT_LOOP 108
+
+/*
+ * Values between 109 and 112 are used in capture file headers as
+ * link-layer types corresponding to DLT_ types that might differ
+ * between platforms; don't use those values for new DLT_ types
+ * other than the corresponding DLT_ types.
+ */
+
+/*
+ * This is for Linux cooked sockets.
+ */
+#define DLT_LINUX_SLL 113
+
+/*
+ * Apple LocalTalk hardware.
+ */
+#define DLT_LTALK 114
+
+/*
+ * Acorn Econet.
+ */
+#define DLT_ECONET 115
+
+/*
+ * Reserved for use with OpenBSD ipfilter.
+ */
+#define DLT_IPFILTER 116
+
+/*
+ * Reserved for use in capture-file headers as a link-layer type
+ * corresponding to OpenBSD DLT_PFLOG; DLT_PFLOG is 17 in OpenBSD,
+ * but that's DLT_LANE8023 in SuSE 6.3, so we can't use 17 for it
+ * in capture-file headers.
+ */
+#define DLT_PFLOG 117
+
+/*
+ * Registered for Cisco-internal use.
+ */
+#define DLT_CISCO_IOS 118
+
+/*
+ * Reserved for 802.11 cards using the Prism II chips, with a link-layer
+ * header including Prism monitor mode information plus an 802.11
+ * header.
+ */
+#define DLT_PRISM_HEADER 119
+
+/*
+ * Reserved for Aironet 802.11 cards, with an Aironet link-layer header
+ * (see Doug Ambrisko's FreeBSD patches).
+ */
+#define DLT_AIRONET_HEADER 120
+
+/*
+ * Reserved for Siemens HiPath HDLC.
+ */
+#define DLT_HHDLC 121
+
+/*
+ * Reserved for RFC 2625 IP-over-Fibre Channel, as per a request from
+ * Don Lee <donlee@cray.com>.
+ *
+ * This is not for use with raw Fibre Channel, where the link-layer
+ * header starts with a Fibre Channel frame header; it's for IP-over-FC,
+ * where the link-layer header starts with an RFC 2625 Network_Header
+ * field.
+ */
+#define DLT_IP_OVER_FC 122
+
+/*
+ * The instruction encodings.
+ */
+/* instruction classes */
+#define BPF_CLASS(code) ((code) & 0x07)
+#define BPF_LD 0x00
+#define BPF_LDX 0x01
+#define BPF_ST 0x02
+#define BPF_STX 0x03
+#define BPF_ALU 0x04
+#define BPF_JMP 0x05
+#define BPF_RET 0x06
+#define BPF_MISC 0x07
+
+/* ld/ldx fields */
+#define BPF_SIZE(code) ((code) & 0x18)
+#define BPF_W 0x00
+#define BPF_H 0x08
+#define BPF_B 0x10
+#define BPF_MODE(code) ((code) & 0xe0)
+#define BPF_IMM 0x00
+#define BPF_ABS 0x20
+#define BPF_IND 0x40
+#define BPF_MEM 0x60
+#define BPF_LEN 0x80
+#define BPF_MSH 0xa0
+
+/* alu/jmp fields */
+#define BPF_OP(code) ((code) & 0xf0)
+#define BPF_ADD 0x00
+#define BPF_SUB 0x10
+#define BPF_MUL 0x20
+#define BPF_DIV 0x30
+#define BPF_OR 0x40
+#define BPF_AND 0x50
+#define BPF_LSH 0x60
+#define BPF_RSH 0x70
+#define BPF_NEG 0x80
+#define BPF_JA 0x00
+#define BPF_JEQ 0x10
+#define BPF_JGT 0x20
+#define BPF_JGE 0x30
+#define BPF_JSET 0x40
+#define BPF_SRC(code) ((code) & 0x08)
+#define BPF_K 0x00
+#define BPF_X 0x08
+
+/* ret - BPF_K and BPF_X also apply */
+#define BPF_RVAL(code) ((code) & 0x18)
+#define BPF_A 0x10
+
+/* misc */
+#define BPF_MISCOP(code) ((code) & 0xf8)
+#define BPF_TAX 0x00
+#define BPF_TXA 0x80
+
+/*
+ * The instruction data structure.
+ */
+struct bpf_insn {
+ u_short code;
+ u_char jt;
+ u_char jf;
+ bpf_int32 k;
+};
+
+/*
+ * Macros for insn array initializers.
+ */
+#define BPF_STMT(code, k) { (u_short)(code), 0, 0, k }
+#define BPF_JUMP(code, k, jt, jf) { (u_short)(code), jt, jf, k }
+
+#if defined(BSD) && (defined(KERNEL) || defined(_KERNEL))
+/*
+ * Systems based on non-BSD kernels don't have ifnet's (or they don't mean
+ * anything if it is in <net/if.h>) and won't work like this.
+ */
+# if __STDC__
+extern void bpf_tap(struct ifnet *, u_char *, u_int);
+extern void bpf_mtap(struct ifnet *, struct mbuf *);
+extern void bpfattach(struct ifnet *, u_int, u_int);
+extern void bpfilterattach(int);
+# else
+extern void bpf_tap();
+extern void bpf_mtap();
+extern void bpfattach();
+extern void bpfilterattach();
+# endif /* __STDC__ */
+#endif /* BSD && (_KERNEL || KERNEL) */
+#if __STDC__ || defined(__cplusplus)
+extern int bpf_validate(struct bpf_insn *, int);
+extern u_int bpf_filter(struct bpf_insn *, u_char *, u_int, u_int);
+#else
+extern int bpf_validate();
+extern u_int bpf_filter();
+#endif
+
+/*
+ * Number of scratch memory words (for BPF_LD|BPF_MEM and BPF_ST).
+ */
+#define BPF_MEMWORDS 16
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/usr/src/uts/common/inet/ipf/fil.c b/usr/src/uts/common/inet/ipf/fil.c
new file mode 100644
index 0000000000..1dd95e9ba5
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/fil.c
@@ -0,0 +1,6397 @@
+/*
+ * Copyright (C) 1993-2003 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#if defined(KERNEL) || defined(_KERNEL)
+# undef KERNEL
+# undef _KERNEL
+# define KERNEL 1
+# define _KERNEL 1
+#endif
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#if defined(__NetBSD__)
+# if (NetBSD >= 199905) && !defined(IPFILTER_LKM) && defined(_KERNEL)
+# include "opt_ipfilter_log.h"
+# endif
+#endif
+#if defined(_KERNEL) && defined(__FreeBSD_version) && \
+ (__FreeBSD_version >= 220000)
+# if (__FreeBSD_version >= 400000)
+# if !defined(IPFILTER_LKM)
+# include "opt_inet6.h"
+# endif
+# if (__FreeBSD_version == 400019)
+# define CSUM_DELAY_DATA
+# endif
+# endif
+# include <sys/filio.h>
+#else
+# include <sys/ioctl.h>
+#endif
+#if !defined(_AIX51)
+# include <sys/fcntl.h>
+#endif
+#if defined(_KERNEL)
+# include <sys/systm.h>
+# include <sys/file.h>
+#else
+# include <stdio.h>
+# include <string.h>
+# include <stdlib.h>
+# include <stddef.h>
+# include <sys/file.h>
+# define _KERNEL
+# ifdef __OpenBSD__
+struct file;
+# endif
+# include <sys/uio.h>
+# undef _KERNEL
+#endif
+#if !defined(__SVR4) && !defined(__svr4__) && !defined(__hpux) && \
+ !defined(linux)
+# include <sys/mbuf.h>
+#else
+# if !defined(linux)
+# include <sys/byteorder.h>
+# endif
+# if (SOLARIS2 < 5) && defined(sun)
+# include <sys/dditypes.h>
+# endif
+#endif
+#ifdef __hpux
+# define _NET_ROUTE_INCLUDED
+#endif
+#if !defined(linux)
+# include <sys/protosw.h>
+#endif
+#include <sys/socket.h>
+#include <net/if.h>
+#ifdef sun
+# include <net/af.h>
+#endif
+#if !defined(_KERNEL) && defined(__FreeBSD__)
+# include "radix_ipf.h"
+#endif
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#if !defined(linux)
+# include <netinet/ip_var.h>
+#endif
+#if defined(__sgi) && defined(IFF_DRVRLOCK) /* IRIX 6 */
+# include <sys/hashing.h>
+# include <netinet/in_var.h>
+#endif
+#include <netinet/tcp.h>
+#if (!defined(__sgi) && !defined(AIX)) || defined(_KERNEL)
+# include <netinet/udp.h>
+# include <netinet/ip_icmp.h>
+#endif
+#ifdef __hpux
+# undef _NET_ROUTE_INCLUDED
+#endif
+#include "netinet/ip_compat.h"
+#ifdef USE_INET6
+# include <netinet/icmp6.h>
+# if !SOLARIS && defined(_KERNEL) && !defined(__osf__) && !defined(__hpux)
+# include <netinet6/in6_var.h>
+# endif
+#endif
+#include <netinet/tcpip.h>
+#include "netinet/ip_fil.h"
+#include "netinet/ip_nat.h"
+#include "netinet/ip_frag.h"
+#include "netinet/ip_state.h"
+#include "netinet/ip_proxy.h"
+#include "netinet/ip_auth.h"
+#ifdef IPFILTER_SCAN
+# include "netinet/ip_scan.h"
+#endif
+#ifdef IPFILTER_SYNC
+# include "netinet/ip_sync.h"
+#endif
+#include "netinet/ip_pool.h"
+#include "netinet/ip_htable.h"
+#ifdef IPFILTER_COMPILED
+# include "netinet/ip_rules.h"
+#endif
+#if defined(IPFILTER_BPF) && defined(_KERNEL)
+# include <net/bpf.h>
+#endif
+#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000)
+# include <sys/malloc.h>
+# if defined(_KERNEL) && !defined(IPFILTER_LKM)
+# include "opt_ipfilter.h"
+# endif
+#endif
+#include "netinet/ipl.h"
+/* END OF INCLUDES */
+
+#if !defined(lint)
+static const char sccsid[] = "@(#)fil.c 1.36 6/5/96 (C) 1993-2000 Darren Reed";
+static const char rcsid[] = "@(#)$Id: fil.c,v 2.243.2.64 2005/08/13 05:19:59 darrenr Exp $";
+#endif
+
+#ifndef _KERNEL
+# include "ipf.h"
+# include "ipt.h"
+# include "bpf-ipf.h"
+extern int opts;
+
+# define FR_VERBOSE(verb_pr) verbose verb_pr
+# define FR_DEBUG(verb_pr) debug verb_pr
+#else /* #ifndef _KERNEL */
+# define FR_VERBOSE(verb_pr)
+# define FR_DEBUG(verb_pr)
+#endif /* _KERNEL */
+
+
+fr_info_t frcache[2][8];
+struct filterstats frstats[2] = { { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 } };
+struct frentry *ipfilter[2][2] = { { NULL, NULL }, { NULL, NULL } },
+ *ipfilter6[2][2] = { { NULL, NULL }, { NULL, NULL } },
+ *ipacct6[2][2] = { { NULL, NULL }, { NULL, NULL } },
+ *ipacct[2][2] = { { NULL, NULL }, { NULL, NULL } },
+ *ipnatrules[2][2] = { { NULL, NULL }, { NULL, NULL } };
+struct frgroup *ipfgroups[IPL_LOGSIZE][2];
+char ipfilter_version[] = IPL_VERSION;
+int fr_refcnt = 0;
+/*
+ * For fr_running:
+ * 0 == loading, 1 = running, -1 = disabled, -2 = unloading
+ */
+int fr_running = 0;
+int fr_flags = IPF_LOGGING;
+int fr_active = 0;
+int fr_control_forwarding = 0;
+int fr_update_ipid = 0;
+u_short fr_ip_id = 0;
+int fr_chksrc = 0; /* causes a system crash if enabled */
+int fr_minttl = 4;
+int fr_icmpminfragmtu = 68;
+u_long fr_frouteok[2] = {0, 0};
+u_long fr_userifqs = 0;
+u_long fr_badcoalesces[2] = {0, 0};
+u_char ipf_iss_secret[32];
+#if defined(IPFILTER_DEFAULT_BLOCK)
+int fr_pass = FR_BLOCK|FR_NOMATCH;
+#else
+int fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH;
+#endif
+int fr_features = 0
+#ifdef IPFILTER_LKM
+ | IPF_FEAT_LKM
+#endif
+#ifdef IPFILTER_LOG
+ | IPF_FEAT_LOG
+#endif
+#ifdef IPFILTER_LOOKUP
+ | IPF_FEAT_LOOKUP
+#endif
+#ifdef IPFILTER_BPF
+ | IPF_FEAT_BPF
+#endif
+#ifdef IPFILTER_COMPILED
+ | IPF_FEAT_COMPILED
+#endif
+#ifdef IPFILTER_CKSUM
+ | IPF_FEAT_CKSUM
+#endif
+#ifdef IPFILTER_SYNC
+ | IPF_FEAT_SYNC
+#endif
+#ifdef IPFILTER_SCAN
+ | IPF_FEAT_SCAN
+#endif
+#ifdef USE_INET6
+ | IPF_FEAT_IPV6
+#endif
+ ;
+
+static INLINE int fr_ipfcheck __P((fr_info_t *, frentry_t *, int));
+static int fr_portcheck __P((frpcmp_t *, u_short *));
+static int frflushlist __P((int, minor_t, int *, frentry_t **));
+static ipfunc_t fr_findfunc __P((ipfunc_t));
+static frentry_t *fr_firewall __P((fr_info_t *, u_32_t *));
+static int fr_funcinit __P((frentry_t *fr));
+static INLINE void frpr_ah __P((fr_info_t *));
+static INLINE void frpr_esp __P((fr_info_t *));
+static INLINE void frpr_gre __P((fr_info_t *));
+static INLINE void frpr_udp __P((fr_info_t *));
+static INLINE void frpr_tcp __P((fr_info_t *));
+static INLINE void frpr_icmp __P((fr_info_t *));
+static INLINE void frpr_ipv4hdr __P((fr_info_t *));
+static INLINE int frpr_pullup __P((fr_info_t *, int));
+static INLINE void frpr_short __P((fr_info_t *, int));
+static INLINE void frpr_tcpcommon __P((fr_info_t *));
+static INLINE void frpr_udpcommon __P((fr_info_t *));
+static INLINE int fr_updateipid __P((fr_info_t *));
+#ifdef IPFILTER_LOOKUP
+static int fr_grpmapinit __P((frentry_t *fr));
+static INLINE void *fr_resolvelookup __P((u_int, u_int, lookupfunc_t *));
+#endif
+static void frsynclist __P((frentry_t *, void *));
+static ipftuneable_t *fr_findtunebyname __P((const char *));
+static ipftuneable_t *fr_findtunebycookie __P((void *, void **));
+
+
+/*
+ * bit values for identifying presence of individual IP options
+ * All of these tables should be ordered by increasing key value on the left
+ * hand side to allow for binary searching of the array and include a trailer
+ * with a 0 for the bitmask for linear searches to easily find the end with.
+ */
+const struct optlist ipopts[20] = {
+ { IPOPT_NOP, 0x000001 },
+ { IPOPT_RR, 0x000002 },
+ { IPOPT_ZSU, 0x000004 },
+ { IPOPT_MTUP, 0x000008 },
+ { IPOPT_MTUR, 0x000010 },
+ { IPOPT_ENCODE, 0x000020 },
+ { IPOPT_TS, 0x000040 },
+ { IPOPT_TR, 0x000080 },
+ { IPOPT_SECURITY, 0x000100 },
+ { IPOPT_LSRR, 0x000200 },
+ { IPOPT_E_SEC, 0x000400 },
+ { IPOPT_CIPSO, 0x000800 },
+ { IPOPT_SATID, 0x001000 },
+ { IPOPT_SSRR, 0x002000 },
+ { IPOPT_ADDEXT, 0x004000 },
+ { IPOPT_VISA, 0x008000 },
+ { IPOPT_IMITD, 0x010000 },
+ { IPOPT_EIP, 0x020000 },
+ { IPOPT_FINN, 0x040000 },
+ { 0, 0x000000 }
+};
+
+#ifdef USE_INET6
+struct optlist ip6exthdr[] = {
+ { IPPROTO_HOPOPTS, 0x000001 },
+ { IPPROTO_IPV6, 0x000002 },
+ { IPPROTO_ROUTING, 0x000004 },
+ { IPPROTO_FRAGMENT, 0x000008 },
+ { IPPROTO_ESP, 0x000010 },
+ { IPPROTO_AH, 0x000020 },
+ { IPPROTO_NONE, 0x000040 },
+ { IPPROTO_DSTOPTS, 0x000080 },
+ { 0, 0 }
+};
+#endif
+
+struct optlist tcpopts[] = {
+ { TCPOPT_NOP, 0x000001 },
+ { TCPOPT_MAXSEG, 0x000002 },
+ { TCPOPT_WINDOW, 0x000004 },
+ { TCPOPT_SACK_PERMITTED, 0x000008 },
+ { TCPOPT_SACK, 0x000010 },
+ { TCPOPT_TIMESTAMP, 0x000020 },
+ { 0, 0x000000 }
+};
+
+/*
+ * bit values for identifying presence of individual IP security options
+ */
+const struct optlist secopt[8] = {
+ { IPSO_CLASS_RES4, 0x01 },
+ { IPSO_CLASS_TOPS, 0x02 },
+ { IPSO_CLASS_SECR, 0x04 },
+ { IPSO_CLASS_RES3, 0x08 },
+ { IPSO_CLASS_CONF, 0x10 },
+ { IPSO_CLASS_UNCL, 0x20 },
+ { IPSO_CLASS_RES2, 0x40 },
+ { IPSO_CLASS_RES1, 0x80 }
+};
+
+
+/*
+ * Table of functions available for use with call rules.
+ */
+static ipfunc_resolve_t fr_availfuncs[] = {
+#ifdef IPFILTER_LOOKUP
+ { "fr_srcgrpmap", fr_srcgrpmap, fr_grpmapinit },
+ { "fr_dstgrpmap", fr_dstgrpmap, fr_grpmapinit },
+#endif
+ { "", NULL }
+};
+
+
+/*
+ * The next section of code is a a collection of small routines that set
+ * fields in the fr_info_t structure passed based on properties of the
+ * current packet. There are different routines for the same protocol
+ * for each of IPv4 and IPv6. Adding a new protocol, for which there
+ * will "special" inspection for setup, is now more easily done by adding
+ * a new routine and expanding the frpr_ipinit*() function rather than by
+ * adding more code to a growing switch statement.
+ */
+#ifdef USE_INET6
+static INLINE int frpr_ah6 __P((fr_info_t *));
+static INLINE void frpr_esp6 __P((fr_info_t *));
+static INLINE void frpr_gre6 __P((fr_info_t *));
+static INLINE void frpr_udp6 __P((fr_info_t *));
+static INLINE void frpr_tcp6 __P((fr_info_t *));
+static INLINE void frpr_icmp6 __P((fr_info_t *));
+static INLINE int frpr_ipv6hdr __P((fr_info_t *));
+static INLINE void frpr_short6 __P((fr_info_t *, int));
+static INLINE int frpr_hopopts6 __P((fr_info_t *));
+static INLINE int frpr_routing6 __P((fr_info_t *));
+static INLINE int frpr_dstopts6 __P((fr_info_t *));
+static INLINE int frpr_fragment6 __P((fr_info_t *));
+static INLINE int frpr_ipv6exthdr __P((fr_info_t *, int, int));
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frpr_short6 */
+/* Returns: void */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* IPv6 Only */
+/* This is function enforces the 'is a packet too short to be legit' rule */
+/* for IPv6 and marks the packet with FI_SHORT if so. See function comment */
+/* for frpr_short() for more details. */
+/* ------------------------------------------------------------------------ */
+static INLINE void frpr_short6(fin, xmin)
+fr_info_t *fin;
+int xmin;
+{
+
+ if (fin->fin_dlen < xmin)
+ fin->fin_flx |= FI_SHORT;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frpr_ipv6hdr */
+/* Returns: int */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* IPv6 Only */
+/* Copy values from the IPv6 header into the fr_info_t struct and call the */
+/* per-protocol analyzer if it exists. */
+/* ------------------------------------------------------------------------ */
+static INLINE int frpr_ipv6hdr(fin)
+fr_info_t *fin;
+{
+ ip6_t *ip6 = (ip6_t *)fin->fin_ip;
+ int p, go = 1, i, hdrcount;
+ fr_ip_t *fi = &fin->fin_fi;
+
+ fin->fin_off = 0;
+
+ fi->fi_tos = 0;
+ fi->fi_optmsk = 0;
+ fi->fi_secmsk = 0;
+ fi->fi_auth = 0;
+
+ p = ip6->ip6_nxt;
+ fi->fi_ttl = ip6->ip6_hlim;
+ fi->fi_src.in6 = ip6->ip6_src;
+ fi->fi_dst.in6 = ip6->ip6_dst;
+ fin->fin_id = 0;
+
+ hdrcount = 0;
+ while (go && !(fin->fin_flx & (FI_BAD|FI_SHORT))) {
+ switch (p)
+ {
+ case IPPROTO_UDP :
+ frpr_udp6(fin);
+ go = 0;
+ break;
+
+ case IPPROTO_TCP :
+ frpr_tcp6(fin);
+ go = 0;
+ break;
+
+ case IPPROTO_ICMPV6 :
+ frpr_icmp6(fin);
+ go = 0;
+ break;
+
+ case IPPROTO_GRE :
+ frpr_gre6(fin);
+ go = 0;
+ break;
+
+ case IPPROTO_HOPOPTS :
+ /*
+ * hop by hop ext header is only allowed
+ * right after IPv6 header.
+ */
+ if (hdrcount != 0) {
+ fin->fin_flx |= FI_BAD;
+ p = IPPROTO_NONE;
+ } else {
+ p = frpr_hopopts6(fin);
+ }
+ break;
+
+ case IPPROTO_DSTOPTS :
+ p = frpr_dstopts6(fin);
+ break;
+
+ case IPPROTO_ROUTING :
+ p = frpr_routing6(fin);
+ break;
+
+ case IPPROTO_AH :
+ p = frpr_ah6(fin);
+ break;
+
+ case IPPROTO_ESP :
+ frpr_esp6(fin);
+ go = 0;
+ break;
+
+ case IPPROTO_IPV6 :
+ for (i = 0; ip6exthdr[i].ol_bit != 0; i++)
+ if (ip6exthdr[i].ol_val == p) {
+ fin->fin_flx |= ip6exthdr[i].ol_bit;
+ break;
+ }
+ go = 0;
+ break;
+
+ case IPPROTO_NONE :
+ go = 0;
+ break;
+
+ case IPPROTO_FRAGMENT :
+ p = frpr_fragment6(fin);
+ if (fin->fin_off != 0) /* Not the first frag */
+ go = 0;
+ break;
+
+ default :
+ go = 0;
+ break;
+ }
+ hdrcount++;
+
+ /*
+ * It is important to note that at this point, for the
+ * extension headers (go != 0), the entire header may not have
+ * been pulled up when the code gets to this point. This is
+ * only done for "go != 0" because the other header handlers
+ * will all pullup their complete header. The other indicator
+ * of an incomplete packet is that this was just an extension
+ * header.
+ */
+ if ((go != 0) && (p != IPPROTO_NONE) &&
+ (frpr_pullup(fin, 0) == -1)) {
+ p = IPPROTO_NONE;
+ go = 0;
+ }
+ }
+ fi->fi_p = p;
+
+ if (fin->fin_flx & FI_BAD)
+ return -1;
+
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frpr_ipv6exthdr */
+/* Returns: int - value of the next header or IPPROTO_NONE if error */
+/* Parameters: fin(I) - pointer to packet information */
+/* multiple(I) - flag indicating yes/no if multiple occurances */
+/* of this extension header are allowed. */
+/* proto(I) - protocol number for this extension header */
+/* */
+/* IPv6 Only */
+/* ------------------------------------------------------------------------ */
+static INLINE int frpr_ipv6exthdr(fin, multiple, proto)
+fr_info_t *fin;
+int multiple, proto;
+{
+ struct ip6_ext *hdr;
+ u_short shift;
+ int i;
+
+ fin->fin_flx |= FI_V6EXTHDR;
+
+ /* 8 is default length of extension hdr */
+ if ((fin->fin_dlen - 8) < 0) {
+ fin->fin_flx |= FI_SHORT;
+ return IPPROTO_NONE;
+ }
+
+ if (frpr_pullup(fin, 8) == -1)
+ return IPPROTO_NONE;
+
+ hdr = fin->fin_dp;
+ shift = 8 + (hdr->ip6e_len << 3);
+ if (shift > fin->fin_dlen) { /* Nasty extension header length? */
+ fin->fin_flx |= FI_BAD;
+ return IPPROTO_NONE;
+ }
+
+ for (i = 0; ip6exthdr[i].ol_bit != 0; i++)
+ if (ip6exthdr[i].ol_val == proto) {
+ /*
+ * Most IPv6 extension headers are only allowed once.
+ */
+ if ((multiple == 0) &&
+ ((fin->fin_optmsk & ip6exthdr[i].ol_bit) != 0))
+ fin->fin_flx |= FI_BAD;
+ else
+ fin->fin_optmsk |= ip6exthdr[i].ol_bit;
+ break;
+ }
+
+ fin->fin_dp = (char *)fin->fin_dp + shift;
+ fin->fin_dlen -= shift;
+
+ return hdr->ip6e_nxt;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frpr_hopopts6 */
+/* Returns: int - value of the next header or IPPROTO_NONE if error */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* IPv6 Only */
+/* This is function checks pending hop by hop options extension header */
+/* ------------------------------------------------------------------------ */
+static INLINE int frpr_hopopts6(fin)
+fr_info_t *fin;
+{
+ return frpr_ipv6exthdr(fin, 0, IPPROTO_HOPOPTS);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frpr_routing6 */
+/* Returns: int - value of the next header or IPPROTO_NONE if error */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* IPv6 Only */
+/* This is function checks pending routing extension header */
+/* ------------------------------------------------------------------------ */
+static INLINE int frpr_routing6(fin)
+fr_info_t *fin;
+{
+ struct ip6_ext *hdr;
+ int shift;
+
+ hdr = fin->fin_dp;
+ if (frpr_ipv6exthdr(fin, 0, IPPROTO_ROUTING) == IPPROTO_NONE)
+ return IPPROTO_NONE;
+
+ shift = 8 + (hdr->ip6e_len << 3);
+ /*
+ * Nasty extension header length?
+ */
+ if ((hdr->ip6e_len << 3) & 15) {
+ fin->fin_flx |= FI_BAD;
+ /*
+ * Compensate for the changes made in frpr_ipv6exthdr()
+ */
+ fin->fin_dlen += shift;
+ fin->fin_dp = (char *)fin->fin_dp - shift;
+ return IPPROTO_NONE;
+ }
+
+ return hdr->ip6e_nxt;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frpr_fragment6 */
+/* Returns: int - value of the next header or IPPROTO_NONE if error */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* IPv6 Only */
+/* Examine the IPv6 fragment header and extract fragment offset information.*/
+/* */
+/* We don't know where the transport layer header (or whatever is next is), */
+/* as it could be behind destination options (amongst others). Because */
+/* there is no fragment cache, there is no knowledge about whether or not an*/
+/* upper layer header has been seen (or where it ends) and thus we are not */
+/* able to continue processing beyond this header with any confidence. */
+/* ------------------------------------------------------------------------ */
+static INLINE int frpr_fragment6(fin)
+fr_info_t *fin;
+{
+ struct ip6_frag *frag;
+ int dlen;
+
+ fin->fin_flx |= FI_FRAG;
+
+ dlen = fin->fin_dlen;
+ if (frpr_ipv6exthdr(fin, 0, IPPROTO_FRAGMENT) == IPPROTO_NONE)
+ return IPPROTO_NONE;
+
+ if (frpr_pullup(fin, sizeof(*frag)) == -1)
+ return IPPROTO_NONE;
+
+ frpr_short6(fin, sizeof(*frag));
+
+ if ((fin->fin_flx & FI_SHORT) != 0)
+ return IPPROTO_NONE;
+
+ frag = (struct ip6_frag *)((char *)fin->fin_dp - sizeof(*frag));
+ /*
+ * Fragment but no fragmentation info set? Bad packet...
+ */
+ if (frag->ip6f_offlg == 0) {
+ fin->fin_flx |= FI_BAD;
+ return IPPROTO_NONE;
+ }
+
+ fin->fin_id = frag->ip6f_ident;
+ fin->fin_off = frag->ip6f_offlg & IP6F_OFF_MASK;
+ fin->fin_off = ntohs(fin->fin_off);
+ if (fin->fin_off != 0)
+ fin->fin_flx |= FI_FRAGBODY;
+
+ fin->fin_dp = (char *)frag + sizeof(*frag);
+ fin->fin_dlen = dlen - sizeof(*frag);
+
+ /* length of hdrs(after frag hdr) + data */
+ fin->fin_flen = fin->fin_dlen;
+
+ /*
+ * If the frag is not the last one and the payload length
+ * is not multiple of 8, it must be dropped.
+ */
+ if ((frag->ip6f_offlg & IP6F_MORE_FRAG) && (dlen % 8)) {
+ fin->fin_flx |= FI_BAD;
+ return IPPROTO_NONE;
+ }
+
+ return frag->ip6f_nxt;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frpr_dstopts6 */
+/* Returns: int - value of the next header or IPPROTO_NONE if error */
+/* Parameters: fin(I) - pointer to packet information */
+/* nextheader(I) - stores next header value */
+/* */
+/* IPv6 Only */
+/* This is function checks pending destination options extension header */
+/* ------------------------------------------------------------------------ */
+static INLINE int frpr_dstopts6(fin)
+fr_info_t *fin;
+{
+ return frpr_ipv6exthdr(fin, 1, IPPROTO_DSTOPTS);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frpr_icmp6 */
+/* Returns: void */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* IPv6 Only */
+/* This routine is mainly concerned with determining the minimum valid size */
+/* for an ICMPv6 packet. */
+/* ------------------------------------------------------------------------ */
+static INLINE void frpr_icmp6(fin)
+fr_info_t *fin;
+{
+ int minicmpsz = sizeof(struct icmp6_hdr);
+ struct icmp6_hdr *icmp6;
+
+ if (frpr_pullup(fin, ICMP6ERR_MINPKTLEN - sizeof(ip6_t)) == -1)
+ return;
+
+ if (fin->fin_dlen > 1) {
+ icmp6 = fin->fin_dp;
+
+ fin->fin_data[0] = *(u_short *)icmp6;
+
+ switch (icmp6->icmp6_type)
+ {
+ case ICMP6_ECHO_REPLY :
+ case ICMP6_ECHO_REQUEST :
+ minicmpsz = ICMP6ERR_MINPKTLEN - sizeof(ip6_t);
+ break;
+ case ICMP6_DST_UNREACH :
+ case ICMP6_PACKET_TOO_BIG :
+ case ICMP6_TIME_EXCEEDED :
+ case ICMP6_PARAM_PROB :
+ if ((fin->fin_m != NULL) &&
+ (M_LEN(fin->fin_m) < fin->fin_plen)) {
+ if (fr_coalesce(fin) != 1)
+ return;
+ }
+ fin->fin_flx |= FI_ICMPERR;
+ minicmpsz = ICMP6ERR_IPICMPHLEN - sizeof(ip6_t);
+ break;
+ default :
+ break;
+ }
+ }
+
+ frpr_short6(fin, minicmpsz);
+ fin->fin_flen -= fin->fin_dlen - minicmpsz;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frpr_udp6 */
+/* Returns: void */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* IPv6 Only */
+/* Analyse the packet for IPv6/UDP properties. */
+/* Is not expected to be called for fragmented packets. */
+/* ------------------------------------------------------------------------ */
+static INLINE void frpr_udp6(fin)
+fr_info_t *fin;
+{
+
+ fr_checkv6sum(fin);
+
+ frpr_short6(fin, sizeof(struct udphdr));
+ if (frpr_pullup(fin, sizeof(struct udphdr)) == -1)
+ return;
+
+ fin->fin_flen -= fin->fin_dlen - sizeof(struct udphdr);
+
+ frpr_udpcommon(fin);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frpr_tcp6 */
+/* Returns: void */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* IPv6 Only */
+/* Analyse the packet for IPv6/TCP properties. */
+/* Is not expected to be called for fragmented packets. */
+/* ------------------------------------------------------------------------ */
+static INLINE void frpr_tcp6(fin)
+fr_info_t *fin;
+{
+
+ fr_checkv6sum(fin);
+
+ frpr_short6(fin, sizeof(struct tcphdr));
+ if (frpr_pullup(fin, sizeof(struct tcphdr)) == -1)
+ return;
+
+ fin->fin_flen -= fin->fin_dlen - sizeof(struct tcphdr);
+
+ frpr_tcpcommon(fin);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frpr_esp6 */
+/* Returns: void */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* IPv6 Only */
+/* Analyse the packet for ESP properties. */
+/* The minimum length is taken to be the SPI (32bits) plus a tail (32bits) */
+/* even though the newer ESP packets must also have a sequence number that */
+/* is 32bits as well, it is not possible(?) to determine the version from a */
+/* simple packet header. */
+/* ------------------------------------------------------------------------ */
+static INLINE void frpr_esp6(fin)
+fr_info_t *fin;
+{
+ int i;
+ frpr_short6(fin, sizeof(grehdr_t));
+
+ (void) frpr_pullup(fin, 8);
+
+ for (i = 0; ip6exthdr[i].ol_bit != 0; i++)
+ if (ip6exthdr[i].ol_val == IPPROTO_ESP) {
+ fin->fin_optmsk |= ip6exthdr[i].ol_bit;
+ break;
+ }
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frpr_ah6 */
+/* Returns: void */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* IPv6 Only */
+/* Analyse the packet for AH properties. */
+/* The minimum length is taken to be the combination of all fields in the */
+/* header being present and no authentication data (null algorithm used.) */
+/* ------------------------------------------------------------------------ */
+static INLINE int frpr_ah6(fin)
+fr_info_t *fin;
+{
+ authhdr_t *ah;
+ int i, shift;
+
+ frpr_short6(fin, 12);
+
+ if (frpr_pullup(fin, sizeof(*ah)) == -1)
+ return IPPROTO_NONE;
+
+ for (i = 0; ip6exthdr[i].ol_bit != 0; i++)
+ if (ip6exthdr[i].ol_val == IPPROTO_AH) {
+ fin->fin_optmsk |= ip6exthdr[i].ol_bit;
+ break;
+ }
+
+ ah = (authhdr_t *)fin->fin_dp;
+
+ shift = (ah->ah_plen + 2) * 4;
+ fin->fin_dlen -= shift;
+ fin->fin_dp = (char*)fin->fin_dp + shift;
+
+ return ah->ah_next;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frpr_gre6 */
+/* Returns: void */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* Analyse the packet for GRE properties. */
+/* ------------------------------------------------------------------------ */
+static INLINE void frpr_gre6(fin)
+fr_info_t *fin;
+{
+ grehdr_t *gre;
+
+ frpr_short6(fin, sizeof(grehdr_t));
+
+ if (frpr_pullup(fin, sizeof(grehdr_t)) == -1)
+ return;
+
+ gre = fin->fin_dp;
+ if (GRE_REV(gre->gr_flags) == 1)
+ fin->fin_data[0] = gre->gr_call;
+}
+#endif /* USE_INET6 */
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frpr_pullup */
+/* Returns: int - 0 == pullup succeeded, -1 == failure */
+/* Parameters: fin(I) - pointer to packet information */
+/* plen(I) - length (excluding L3 header) to pullup */
+/* */
+/* Short inline function to cut down on code duplication to perform a call */
+/* to fr_pullup to ensure there is the required amount of data, */
+/* consecutively in the packet buffer. */
+/* ------------------------------------------------------------------------ */
+static INLINE int frpr_pullup(fin, plen)
+fr_info_t *fin;
+int plen;
+{
+#if defined(_KERNEL)
+ if (fin->fin_m != NULL) {
+ if (fin->fin_dp != NULL)
+ plen += (char *)fin->fin_dp -
+ ((char *)fin->fin_ip + fin->fin_hlen);
+ plen += ((char *)fin->fin_ip - MTOD(fin->fin_m, char *)) +
+ fin->fin_hlen;
+ if (M_LEN(fin->fin_m) < plen) {
+ if (fr_pullup(fin->fin_m, fin, plen) == NULL)
+ return -1;
+ }
+ }
+#endif
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frpr_short */
+/* Returns: void */
+/* Parameters: fin(I) - pointer to packet information */
+/* xmin(I) - minimum header size */
+/* */
+/* Check if a packet is "short" as defined by xmin. The rule we are */
+/* applying here is that the packet must not be fragmented within the layer */
+/* 4 header. That is, it must not be a fragment that has its offset set to */
+/* start within the layer 4 header (hdrmin) or if it is at offset 0, the */
+/* entire layer 4 header must be present (min). */
+/* ------------------------------------------------------------------------ */
+static INLINE void frpr_short(fin, xmin)
+fr_info_t *fin;
+int xmin;
+{
+
+ if (fin->fin_off == 0) {
+ if (fin->fin_dlen < xmin)
+ fin->fin_flx |= FI_SHORT;
+ } else if (fin->fin_off < xmin) {
+ fin->fin_flx |= FI_SHORT;
+ }
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frpr_icmp */
+/* Returns: void */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* IPv4 Only */
+/* Do a sanity check on the packet for ICMP (v4). In nearly all cases, */
+/* except extrememly bad packets, both type and code will be present. */
+/* The expected minimum size of an ICMP packet is very much dependent on */
+/* the type of it. */
+/* */
+/* XXX - other ICMP sanity checks? */
+/* ------------------------------------------------------------------------ */
+static INLINE void frpr_icmp(fin)
+fr_info_t *fin;
+{
+ int minicmpsz = sizeof(struct icmp);
+ icmphdr_t *icmp;
+ ip_t *oip;
+
+ if (fin->fin_off != 0) {
+ frpr_short(fin, ICMPERR_ICMPHLEN);
+ return;
+ }
+
+ if (frpr_pullup(fin, ICMPERR_ICMPHLEN) == -1)
+ return;
+
+ fr_checkv4sum(fin);
+
+ if (fin->fin_dlen > 1) {
+ icmp = fin->fin_dp;
+
+ fin->fin_data[0] = *(u_short *)icmp;
+
+ switch (icmp->icmp_type)
+ {
+ case ICMP_ECHOREPLY :
+ case ICMP_ECHO :
+ /* Router discovery messaes - RFC 1256 */
+ case ICMP_ROUTERADVERT :
+ case ICMP_ROUTERSOLICIT :
+ minicmpsz = ICMP_MINLEN;
+ break;
+ /*
+ * type(1) + code(1) + cksum(2) + id(2) seq(2) +
+ * 3 * timestamp(3 * 4)
+ */
+ case ICMP_TSTAMP :
+ case ICMP_TSTAMPREPLY :
+ minicmpsz = 20;
+ break;
+ /*
+ * type(1) + code(1) + cksum(2) + id(2) seq(2) +
+ * mask(4)
+ */
+ case ICMP_MASKREQ :
+ case ICMP_MASKREPLY :
+ minicmpsz = 12;
+ break;
+ /*
+ * type(1) + code(1) + cksum(2) + id(2) seq(2) + ip(20+)
+ */
+ case ICMP_UNREACH :
+ if (icmp->icmp_code == ICMP_UNREACH_NEEDFRAG) {
+ if (icmp->icmp_nextmtu < fr_icmpminfragmtu)
+ fin->fin_flx |= FI_BAD;
+ }
+ /* FALLTHRU */
+ case ICMP_SOURCEQUENCH :
+ case ICMP_REDIRECT :
+ case ICMP_TIMXCEED :
+ case ICMP_PARAMPROB :
+ fin->fin_flx |= FI_ICMPERR;
+ if (fr_coalesce(fin) != 1)
+ return;
+ /*
+ * ICMP error packets should not be generated for IP
+ * packets that are a fragment that isn't the first
+ * fragment.
+ */
+ oip = (ip_t *)((char *)fin->fin_dp + ICMPERR_ICMPHLEN);
+ if ((ntohs(oip->ip_off) & IP_OFFMASK) != 0)
+ fin->fin_flx |= FI_BAD;
+ break;
+ default :
+ break;
+ }
+
+ if (fin->fin_dlen >= 6) /* ID field */
+ fin->fin_data[1] = icmp->icmp_id;
+ }
+
+ frpr_short(fin, minicmpsz);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frpr_tcpcommon */
+/* Returns: void */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* TCP header sanity checking. Look for bad combinations of TCP flags, */
+/* and make some checks with how they interact with other fields. */
+/* If compiled with IPFILTER_CKSUM, check to see if the TCP checksum is */
+/* valid and mark the packet as bad if not. */
+/* ------------------------------------------------------------------------ */
+static INLINE void frpr_tcpcommon(fin)
+fr_info_t *fin;
+{
+ int flags, tlen;
+ tcphdr_t *tcp;
+
+ fin->fin_flx |= FI_TCPUDP;
+ if (fin->fin_off != 0)
+ return;
+
+ if (frpr_pullup(fin, sizeof(*tcp)) == -1)
+ return;
+ tcp = fin->fin_dp;
+
+ if (fin->fin_dlen > 3) {
+ fin->fin_sport = ntohs(tcp->th_sport);
+ fin->fin_dport = ntohs(tcp->th_dport);
+ }
+
+ if ((fin->fin_flx & FI_SHORT) != 0)
+ return;
+
+ /*
+ * Use of the TCP data offset *must* result in a value that is at
+ * least the same size as the TCP header.
+ */
+ tlen = TCP_OFF(tcp) << 2;
+ if (tlen < sizeof(tcphdr_t)) {
+ fin->fin_flx |= FI_BAD;
+ return;
+ }
+
+ flags = tcp->th_flags;
+ fin->fin_tcpf = tcp->th_flags;
+
+ /*
+ * If the urgent flag is set, then the urgent pointer must
+ * also be set and vice versa. Good TCP packets do not have
+ * just one of these set.
+ */
+ if ((flags & TH_URG) != 0 && (tcp->th_urp == 0)) {
+ fin->fin_flx |= FI_BAD;
+ } else if ((flags & TH_URG) == 0 && (tcp->th_urp != 0)) {
+ /* Ignore this case, it shows up in "real" traffic with */
+ /* bogus values in the urgent pointer field. */
+ flags = flags; /* LINT */
+ } else if (((flags & (TH_SYN|TH_FIN)) != 0) &&
+ ((flags & (TH_RST|TH_ACK)) == TH_RST)) {
+ /* TH_FIN|TH_RST|TH_ACK seems to appear "naturally" */
+ fin->fin_flx |= FI_BAD;
+ } else if (!(flags & TH_ACK)) {
+ /*
+ * If the ack bit isn't set, then either the SYN or
+ * RST bit must be set. If the SYN bit is set, then
+ * we expect the ACK field to be 0. If the ACK is
+ * not set and if URG, PSH or FIN are set, consdier
+ * that to indicate a bad TCP packet.
+ */
+ if ((flags == TH_SYN) && (tcp->th_ack != 0)) {
+ /*
+ * Cisco PIX sets the ACK field to a random value.
+ * In light of this, do not set FI_BAD until a patch
+ * is available from Cisco to ensure that
+ * interoperability between existing systems is
+ * achieved.
+ */
+ /*fin->fin_flx |= FI_BAD*/;
+ flags = flags; /* LINT */
+ } else if (!(flags & (TH_RST|TH_SYN))) {
+ fin->fin_flx |= FI_BAD;
+ } else if ((flags & (TH_URG|TH_PUSH|TH_FIN)) != 0) {
+ fin->fin_flx |= FI_BAD;
+ }
+ }
+
+ /*
+ * At this point, it's not exactly clear what is to be gained by
+ * marking up which TCP options are and are not present. The one we
+ * are most interested in is the TCP window scale. This is only in
+ * a SYN packet [RFC1323] so we don't need this here...?
+ * Now if we were to analyse the header for passive fingerprinting,
+ * then that might add some weight to adding this...
+ */
+ if (tlen == sizeof(tcphdr_t))
+ return;
+
+ if (frpr_pullup(fin, tlen) == -1)
+ return;
+
+#if 0
+ ip = fin->fin_ip;
+ s = (u_char *)(tcp + 1);
+ off = IP_HL(ip) << 2;
+# ifdef _KERNEL
+ if (fin->fin_mp != NULL) {
+ mb_t *m = *fin->fin_mp;
+
+ if (off + tlen > M_LEN(m))
+ return;
+ }
+# endif
+ for (tlen -= (int)sizeof(*tcp); tlen > 0; ) {
+ opt = *s;
+ if (opt == '\0')
+ break;
+ else if (opt == TCPOPT_NOP)
+ ol = 1;
+ else {
+ if (tlen < 2)
+ break;
+ ol = (int)*(s + 1);
+ if (ol < 2 || ol > tlen)
+ break;
+ }
+
+ for (i = 9, mv = 4; mv >= 0; ) {
+ op = ipopts + i;
+ if (opt == (u_char)op->ol_val) {
+ optmsk |= op->ol_bit;
+ break;
+ }
+ }
+ tlen -= ol;
+ s += ol;
+ }
+#endif /* 0 */
+}
+
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frpr_udpcommon */
+/* Returns: void */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* Extract the UDP source and destination ports, if present. If compiled */
+/* with IPFILTER_CKSUM, check to see if the UDP checksum is valid. */
+/* ------------------------------------------------------------------------ */
+static INLINE void frpr_udpcommon(fin)
+fr_info_t *fin;
+{
+ udphdr_t *udp;
+
+ fin->fin_flx |= FI_TCPUDP;
+
+ if (!fin->fin_off && (fin->fin_dlen > 3)) {
+ if (frpr_pullup(fin, sizeof(*udp)) == -1) {
+ fin->fin_flx |= FI_SHORT;
+ return;
+ }
+
+ udp = fin->fin_dp;
+
+ fin->fin_sport = ntohs(udp->uh_sport);
+ fin->fin_dport = ntohs(udp->uh_dport);
+ }
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frpr_tcp */
+/* Returns: void */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* IPv4 Only */
+/* Analyse the packet for IPv4/TCP properties. */
+/* ------------------------------------------------------------------------ */
+static INLINE void frpr_tcp(fin)
+fr_info_t *fin;
+{
+
+ fr_checkv4sum(fin);
+
+ frpr_short(fin, sizeof(tcphdr_t));
+
+ frpr_tcpcommon(fin);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frpr_udp */
+/* Returns: void */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* IPv4 Only */
+/* Analyse the packet for IPv4/UDP properties. */
+/* ------------------------------------------------------------------------ */
+static INLINE void frpr_udp(fin)
+fr_info_t *fin;
+{
+
+ fr_checkv4sum(fin);
+
+ frpr_short(fin, sizeof(udphdr_t));
+
+ frpr_udpcommon(fin);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frpr_esp */
+/* Returns: void */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* Analyse the packet for ESP properties. */
+/* The minimum length is taken to be the SPI (32bits) plus a tail (32bits) */
+/* even though the newer ESP packets must also have a sequence number that */
+/* is 32bits as well, it is not possible(?) to determine the version from a */
+/* simple packet header. */
+/* ------------------------------------------------------------------------ */
+static INLINE void frpr_esp(fin)
+fr_info_t *fin;
+{
+ if ((fin->fin_off == 0) && (frpr_pullup(fin, 8) == -1))
+ return;
+
+ frpr_short(fin, 8);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frpr_ah */
+/* Returns: void */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* Analyse the packet for AH properties. */
+/* The minimum length is taken to be the combination of all fields in the */
+/* header being present and no authentication data (null algorithm used.) */
+/* ------------------------------------------------------------------------ */
+static INLINE void frpr_ah(fin)
+fr_info_t *fin;
+{
+ authhdr_t *ah;
+ int len;
+
+ if ((fin->fin_off == 0) && (frpr_pullup(fin, sizeof(*ah)) == -1))
+ return;
+
+ ah = (authhdr_t *)fin->fin_dp;
+
+ len = (ah->ah_plen + 2) << 2;
+ frpr_short(fin, len);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frpr_gre */
+/* Returns: void */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* Analyse the packet for GRE properties. */
+/* ------------------------------------------------------------------------ */
+static INLINE void frpr_gre(fin)
+fr_info_t *fin;
+{
+ grehdr_t *gre;
+
+ if ((fin->fin_off == 0) && (frpr_pullup(fin, sizeof(grehdr_t)) == -1))
+ return;
+
+ frpr_short(fin, sizeof(grehdr_t));
+
+ if (fin->fin_off == 0) {
+ gre = fin->fin_dp;
+ if (GRE_REV(gre->gr_flags) == 1)
+ fin->fin_data[0] = gre->gr_call;
+ }
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frpr_ipv4hdr */
+/* Returns: void */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* IPv4 Only */
+/* Analyze the IPv4 header and set fields in the fr_info_t structure. */
+/* Check all options present and flag their presence if any exist. */
+/* ------------------------------------------------------------------------ */
+static INLINE void frpr_ipv4hdr(fin)
+fr_info_t *fin;
+{
+ u_short optmsk = 0, secmsk = 0, auth = 0;
+ int hlen, ol, mv, p, i;
+ const struct optlist *op;
+ u_char *s, opt;
+ u_short off;
+ fr_ip_t *fi;
+ ip_t *ip;
+
+ fi = &fin->fin_fi;
+ hlen = fin->fin_hlen;
+
+ ip = fin->fin_ip;
+ p = ip->ip_p;
+ fi->fi_p = p;
+ fi->fi_tos = ip->ip_tos;
+ fin->fin_id = ip->ip_id;
+ off = ip->ip_off;
+
+ /* Get both TTL and protocol */
+ fi->fi_p = ip->ip_p;
+ fi->fi_ttl = ip->ip_ttl;
+#if 0
+ (*(((u_short *)fi) + 1)) = (*(((u_short *)ip) + 4));
+#endif
+
+ /* Zero out bits not used in IPv6 address */
+ fi->fi_src.i6[1] = 0;
+ fi->fi_src.i6[2] = 0;
+ fi->fi_src.i6[3] = 0;
+ fi->fi_dst.i6[1] = 0;
+ fi->fi_dst.i6[2] = 0;
+ fi->fi_dst.i6[3] = 0;
+
+ fi->fi_saddr = ip->ip_src.s_addr;
+ fi->fi_daddr = ip->ip_dst.s_addr;
+
+ /*
+ * set packet attribute flags based on the offset and
+ * calculate the byte offset that it represents.
+ */
+ off &= IP_MF|IP_OFFMASK;
+ if (off != 0) {
+ fi->fi_flx |= FI_FRAG;
+ off &= IP_OFFMASK;
+ if (off != 0) {
+ fin->fin_flx |= FI_FRAGBODY;
+ off <<= 3;
+ if ((off + fin->fin_dlen > 65535) ||
+ (fin->fin_dlen == 0) ||
+ ((ip->ip_off & IP_MF) && (fin->fin_dlen & 7))) {
+ /*
+ * The length of the packet, starting at its
+ * offset cannot exceed 65535 (0xffff) as the
+ * length of an IP packet is only 16 bits.
+ *
+ * Any fragment that isn't the last fragment
+ * must have a length greater than 0 and it
+ * must be an even multiple of 8.
+ */
+ fi->fi_flx |= FI_BAD;
+ }
+ }
+ }
+ fin->fin_off = off;
+
+ /*
+ * Call per-protocol setup and checking
+ */
+ switch (p)
+ {
+ case IPPROTO_UDP :
+ frpr_udp(fin);
+ break;
+ case IPPROTO_TCP :
+ frpr_tcp(fin);
+ break;
+ case IPPROTO_ICMP :
+ frpr_icmp(fin);
+ break;
+ case IPPROTO_AH :
+ frpr_ah(fin);
+ break;
+ case IPPROTO_ESP :
+ frpr_esp(fin);
+ break;
+ case IPPROTO_GRE :
+ frpr_gre(fin);
+ break;
+ }
+
+ ip = fin->fin_ip;
+ if (ip == NULL)
+ return;
+
+ /*
+ * If it is a standard IP header (no options), set the flag fields
+ * which relate to options to 0.
+ */
+ if (hlen == sizeof(*ip)) {
+ fi->fi_optmsk = 0;
+ fi->fi_secmsk = 0;
+ fi->fi_auth = 0;
+ return;
+ }
+
+ /*
+ * So the IP header has some IP options attached. Walk the entire
+ * list of options present with this packet and set flags to indicate
+ * which ones are here and which ones are not. For the somewhat out
+ * of date and obscure security classification options, set a flag to
+ * represent which classification is present.
+ */
+ fi->fi_flx |= FI_OPTIONS;
+
+ for (s = (u_char *)(ip + 1), hlen -= (int)sizeof(*ip); hlen > 0; ) {
+ opt = *s;
+ if (opt == '\0')
+ break;
+ else if (opt == IPOPT_NOP)
+ ol = 1;
+ else {
+ if (hlen < 2)
+ break;
+ ol = (int)*(s + 1);
+ if (ol < 2 || ol > hlen)
+ break;
+ }
+ for (i = 9, mv = 4; mv >= 0; ) {
+ op = ipopts + i;
+ if ((opt == (u_char)op->ol_val) && (ol > 4)) {
+ optmsk |= op->ol_bit;
+ if (opt == IPOPT_SECURITY) {
+ const struct optlist *sp;
+ u_char sec;
+ int j, m;
+
+ sec = *(s + 2); /* classification */
+ for (j = 3, m = 2; m >= 0; ) {
+ sp = secopt + j;
+ if (sec == sp->ol_val) {
+ secmsk |= sp->ol_bit;
+ auth = *(s + 3);
+ auth *= 256;
+ auth += *(s + 4);
+ break;
+ }
+ if (sec < sp->ol_val)
+ j -= m;
+ else
+ j += m;
+ m--;
+ }
+ }
+ break;
+ }
+ if (opt < op->ol_val)
+ i -= mv;
+ else
+ i += mv;
+ mv--;
+ }
+ hlen -= ol;
+ s += ol;
+ }
+
+ /*
+ *
+ */
+ if (auth && !(auth & 0x0100))
+ auth &= 0xff00;
+ fi->fi_optmsk = optmsk;
+ fi->fi_secmsk = secmsk;
+ fi->fi_auth = auth;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_makefrip */
+/* Returns: int - 1 == hdr checking error, 0 == OK */
+/* Parameters: hlen(I) - length of IP packet header */
+/* ip(I) - pointer to the IP header */
+/* fin(IO) - pointer to packet information */
+/* */
+/* Compact the IP header into a structure which contains just the info. */
+/* which is useful for comparing IP headers with and store this information */
+/* in the fr_info_t structure pointer to by fin. At present, it is assumed */
+/* this function will be called with either an IPv4 or IPv6 packet. */
+/* ------------------------------------------------------------------------ */
+int fr_makefrip(hlen, ip, fin)
+int hlen;
+ip_t *ip;
+fr_info_t *fin;
+{
+ int v;
+
+ fin->fin_nat = NULL;
+ fin->fin_state = NULL;
+ fin->fin_depth = 0;
+ fin->fin_hlen = (u_short)hlen;
+ fin->fin_ip = ip;
+ fin->fin_rule = 0xffffffff;
+ fin->fin_group[0] = -1;
+ fin->fin_group[1] = '\0';
+ fin->fin_dlen = fin->fin_plen - hlen;
+ fin->fin_dp = (char *)ip + hlen;
+
+ v = fin->fin_v;
+ if (v == 4)
+ frpr_ipv4hdr(fin);
+#ifdef USE_INET6
+ else if (v == 6) {
+ if (frpr_ipv6hdr(fin) == -1)
+ return -1;
+ }
+#endif
+ if (fin->fin_ip == NULL)
+ return -1;
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_portcheck */
+/* Returns: int - 1 == port matched, 0 == port match failed */
+/* Parameters: frp(I) - pointer to port check `expression' */
+/* pop(I) - pointer to port number to evaluate */
+/* */
+/* Perform a comparison of a port number against some other(s), using a */
+/* structure with compare information stored in it. */
+/* ------------------------------------------------------------------------ */
+static INLINE int fr_portcheck(frp, pop)
+frpcmp_t *frp;
+u_short *pop;
+{
+ u_short tup, po;
+ int err = 1;
+
+ tup = *pop;
+ po = frp->frp_port;
+
+ /*
+ * Do opposite test to that required and continue if that succeeds.
+ */
+ switch (frp->frp_cmp)
+ {
+ case FR_EQUAL :
+ if (tup != po) /* EQUAL */
+ err = 0;
+ break;
+ case FR_NEQUAL :
+ if (tup == po) /* NOTEQUAL */
+ err = 0;
+ break;
+ case FR_LESST :
+ if (tup >= po) /* LESSTHAN */
+ err = 0;
+ break;
+ case FR_GREATERT :
+ if (tup <= po) /* GREATERTHAN */
+ err = 0;
+ break;
+ case FR_LESSTE :
+ if (tup > po) /* LT or EQ */
+ err = 0;
+ break;
+ case FR_GREATERTE :
+ if (tup < po) /* GT or EQ */
+ err = 0;
+ break;
+ case FR_OUTRANGE :
+ if (tup >= po && tup <= frp->frp_top) /* Out of range */
+ err = 0;
+ break;
+ case FR_INRANGE :
+ if (tup <= po || tup >= frp->frp_top) /* In range */
+ err = 0;
+ break;
+ case FR_INCRANGE :
+ if (tup < po || tup > frp->frp_top) /* Inclusive range */
+ err = 0;
+ break;
+ default :
+ break;
+ }
+ return err;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_tcpudpchk */
+/* Returns: int - 1 == protocol matched, 0 == check failed */
+/* Parameters: fin(I) - pointer to packet information */
+/* ft(I) - pointer to structure with comparison data */
+/* */
+/* Compares the current pcket (assuming it is TCP/UDP) information with a */
+/* structure containing information that we want to match against. */
+/* ------------------------------------------------------------------------ */
+int fr_tcpudpchk(fin, ft)
+fr_info_t *fin;
+frtuc_t *ft;
+{
+ int err = 1;
+
+ /*
+ * Both ports should *always* be in the first fragment.
+ * So far, I cannot find any cases where they can not be.
+ *
+ * compare destination ports
+ */
+ if (ft->ftu_dcmp)
+ err = fr_portcheck(&ft->ftu_dst, &fin->fin_dport);
+
+ /*
+ * compare source ports
+ */
+ if (err && ft->ftu_scmp)
+ err = fr_portcheck(&ft->ftu_src, &fin->fin_sport);
+
+ /*
+ * If we don't have all the TCP/UDP header, then how can we
+ * expect to do any sort of match on it ? If we were looking for
+ * TCP flags, then NO match. If not, then match (which should
+ * satisfy the "short" class too).
+ */
+ if (err && (fin->fin_p == IPPROTO_TCP)) {
+ if (fin->fin_flx & FI_SHORT)
+ return !(ft->ftu_tcpf | ft->ftu_tcpfm);
+ /*
+ * Match the flags ? If not, abort this match.
+ */
+ if (ft->ftu_tcpfm &&
+ ft->ftu_tcpf != (fin->fin_tcpf & ft->ftu_tcpfm)) {
+ FR_DEBUG(("f. %#x & %#x != %#x\n", fin->fin_tcpf,
+ ft->ftu_tcpfm, ft->ftu_tcpf));
+ err = 0;
+ }
+ }
+ return err;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_ipfcheck */
+/* Returns: int - 0 == match, 1 == no match */
+/* Parameters: fin(I) - pointer to packet information */
+/* fr(I) - pointer to filter rule */
+/* portcmp(I) - flag indicating whether to attempt matching on */
+/* TCP/UDP port data. */
+/* */
+/* Check to see if a packet matches an IPFilter rule. Checks of addresses, */
+/* port numbers, etc, for "standard" IPFilter rules are all orchestrated in */
+/* this function. */
+/* ------------------------------------------------------------------------ */
+static INLINE int fr_ipfcheck(fin, fr, portcmp)
+fr_info_t *fin;
+frentry_t *fr;
+int portcmp;
+{
+ u_32_t *ld, *lm, *lip;
+ fripf_t *fri;
+ fr_ip_t *fi;
+ int i;
+
+ fi = &fin->fin_fi;
+ fri = fr->fr_ipf;
+ lip = (u_32_t *)fi;
+ lm = (u_32_t *)&fri->fri_mip;
+ ld = (u_32_t *)&fri->fri_ip;
+
+ /*
+ * first 32 bits to check coversion:
+ * IP version, TOS, TTL, protocol
+ */
+ i = ((*lip & *lm) != *ld);
+ FR_DEBUG(("0. %#08x & %#08x != %#08x\n",
+ *lip, *lm, *ld));
+ if (i)
+ return 1;
+
+ /*
+ * Next 32 bits is a constructed bitmask indicating which IP options
+ * are present (if any) in this packet.
+ */
+ lip++, lm++, ld++;
+ i |= ((*lip & *lm) != *ld);
+ FR_DEBUG(("1. %#08x & %#08x != %#08x\n",
+ *lip, *lm, *ld));
+ if (i)
+ return 1;
+
+ lip++, lm++, ld++;
+ /*
+ * Unrolled loops (4 each, for 32 bits) for address checks.
+ */
+ /*
+ * Check the source address.
+ */
+#ifdef IPFILTER_LOOKUP
+ if (fr->fr_satype == FRI_LOOKUP) {
+ i = (*fr->fr_srcfunc)(fr->fr_srcptr, fi->fi_v, lip);
+ if (i == -1)
+ return 1;
+ lip += 3;
+ lm += 3;
+ ld += 3;
+ } else {
+#endif
+ i = ((*lip & *lm) != *ld);
+ FR_DEBUG(("2a. %#08x & %#08x != %#08x\n",
+ *lip, *lm, *ld));
+ if (fi->fi_v == 6) {
+ lip++, lm++, ld++;
+ i |= ((*lip & *lm) != *ld);
+ FR_DEBUG(("2b. %#08x & %#08x != %#08x\n",
+ *lip, *lm, *ld));
+ lip++, lm++, ld++;
+ i |= ((*lip & *lm) != *ld);
+ FR_DEBUG(("2c. %#08x & %#08x != %#08x\n",
+ *lip, *lm, *ld));
+ lip++, lm++, ld++;
+ i |= ((*lip & *lm) != *ld);
+ FR_DEBUG(("2d. %#08x & %#08x != %#08x\n",
+ *lip, *lm, *ld));
+ } else {
+ lip += 3;
+ lm += 3;
+ ld += 3;
+ }
+#ifdef IPFILTER_LOOKUP
+ }
+#endif
+ i ^= (fr->fr_flags & FR_NOTSRCIP) >> 6;
+ if (i)
+ return 1;
+
+ /*
+ * Check the destination address.
+ */
+ lip++, lm++, ld++;
+#ifdef IPFILTER_LOOKUP
+ if (fr->fr_datype == FRI_LOOKUP) {
+ i = (*fr->fr_dstfunc)(fr->fr_dstptr, fi->fi_v, lip);
+ if (i == -1)
+ return 1;
+ lip += 3;
+ lm += 3;
+ ld += 3;
+ } else {
+#endif
+ i = ((*lip & *lm) != *ld);
+ FR_DEBUG(("3a. %#08x & %#08x != %#08x\n",
+ *lip, *lm, *ld));
+ if (fi->fi_v == 6) {
+ lip++, lm++, ld++;
+ i |= ((*lip & *lm) != *ld);
+ FR_DEBUG(("3b. %#08x & %#08x != %#08x\n",
+ *lip, *lm, *ld));
+ lip++, lm++, ld++;
+ i |= ((*lip & *lm) != *ld);
+ FR_DEBUG(("3c. %#08x & %#08x != %#08x\n",
+ *lip, *lm, *ld));
+ lip++, lm++, ld++;
+ i |= ((*lip & *lm) != *ld);
+ FR_DEBUG(("3d. %#08x & %#08x != %#08x\n",
+ *lip, *lm, *ld));
+ } else {
+ lip += 3;
+ lm += 3;
+ ld += 3;
+ }
+#ifdef IPFILTER_LOOKUP
+ }
+#endif
+ i ^= (fr->fr_flags & FR_NOTDSTIP) >> 7;
+ if (i)
+ return 1;
+ /*
+ * IP addresses matched. The next 32bits contains:
+ * mast of old IP header security & authentication bits.
+ */
+ lip++, lm++, ld++;
+ i |= ((*lip & *lm) != *ld);
+ FR_DEBUG(("4. %#08x & %#08x != %#08x\n",
+ *lip, *lm, *ld));
+
+ /*
+ * Next we have 32 bits of packet flags.
+ */
+ lip++, lm++, ld++;
+ i |= ((*lip & *lm) != *ld);
+ FR_DEBUG(("5. %#08x & %#08x != %#08x\n",
+ *lip, *lm, *ld));
+
+ if (i == 0) {
+ /*
+ * If a fragment, then only the first has what we're
+ * looking for here...
+ */
+ if (portcmp) {
+ if (!fr_tcpudpchk(fin, &fr->fr_tuc))
+ i = 1;
+ } else {
+ if (fr->fr_dcmp || fr->fr_scmp ||
+ fr->fr_tcpf || fr->fr_tcpfm)
+ i = 1;
+ if (fr->fr_icmpm || fr->fr_icmp) {
+ if (((fi->fi_p != IPPROTO_ICMP) &&
+ (fi->fi_p != IPPROTO_ICMPV6)) ||
+ fin->fin_off || (fin->fin_dlen < 2))
+ i = 1;
+ else if ((fin->fin_data[0] & fr->fr_icmpm) !=
+ fr->fr_icmp) {
+ FR_DEBUG(("i. %#x & %#x != %#x\n",
+ fin->fin_data[0],
+ fr->fr_icmpm, fr->fr_icmp));
+ i = 1;
+ }
+ }
+ }
+ }
+ return i;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_scanlist */
+/* Returns: int - result flags of scanning filter list */
+/* Parameters: fin(I) - pointer to packet information */
+/* pass(I) - default result to return for filtering */
+/* */
+/* Check the input/output list of rules for a match to the current packet. */
+/* If a match is found, the value of fr_flags from the rule becomes the */
+/* return value and fin->fin_fr points to the matched rule. */
+/* */
+/* This function may be called recusively upto 16 times (limit inbuilt.) */
+/* When unwinding, it should finish up with fin_depth as 0. */
+/* */
+/* Could be per interface, but this gets real nasty when you don't have, */
+/* or can't easily change, the kernel source code to . */
+/* ------------------------------------------------------------------------ */
+int fr_scanlist(fin, pass)
+fr_info_t *fin;
+u_32_t pass;
+{
+ int rulen, portcmp, off, logged, skip;
+ struct frentry *fr, *fnext;
+ u_32_t passt, passo;
+
+ /*
+ * Do not allow nesting deeper than 16 levels.
+ */
+ if (fin->fin_depth >= 16)
+ return pass;
+
+ fr = fin->fin_fr;
+
+ /*
+ * If there are no rules in this list, return now.
+ */
+ if (fr == NULL)
+ return pass;
+
+ skip = 0;
+ logged = 0;
+ portcmp = 0;
+ fin->fin_depth++;
+ fin->fin_fr = NULL;
+ off = fin->fin_off;
+
+ if ((fin->fin_flx & FI_TCPUDP) && (fin->fin_dlen > 3) && !off)
+ portcmp = 1;
+
+ for (rulen = 0; fr; fr = fnext, rulen++) {
+ fnext = fr->fr_next;
+ if (skip != 0) {
+ FR_VERBOSE(("%d (%#x)\n", skip, fr->fr_flags));
+ skip--;
+ continue;
+ }
+
+ /*
+ * In all checks below, a null (zero) value in the
+ * filter struture is taken to mean a wildcard.
+ *
+ * check that we are working for the right interface
+ */
+#ifdef _KERNEL
+ if (fr->fr_ifa && fr->fr_ifa != fin->fin_ifp)
+ continue;
+#else
+ if (opts & (OPT_VERBOSE|OPT_DEBUG))
+ printf("\n");
+ FR_VERBOSE(("%c", FR_ISSKIP(pass) ? 's' :
+ FR_ISPASS(pass) ? 'p' :
+ FR_ISACCOUNT(pass) ? 'A' :
+ FR_ISAUTH(pass) ? 'a' :
+ (pass & FR_NOMATCH) ? 'n' :'b'));
+ if (fr->fr_ifa && fr->fr_ifa != fin->fin_ifp)
+ continue;
+ FR_VERBOSE((":i"));
+#endif
+
+ switch (fr->fr_type)
+ {
+ case FR_T_IPF :
+ case FR_T_IPF|FR_T_BUILTIN :
+ if (fr_ipfcheck(fin, fr, portcmp))
+ continue;
+ break;
+#if defined(IPFILTER_BPF)
+ case FR_T_BPFOPC :
+ case FR_T_BPFOPC|FR_T_BUILTIN :
+ {
+ u_char *mc;
+
+ if (*fin->fin_mp == NULL)
+ continue;
+ if (fin->fin_v != fr->fr_v)
+ continue;
+ mc = (u_char *)fin->fin_m;
+ if (!bpf_filter(fr->fr_data, mc, fin->fin_plen, 0))
+ continue;
+ break;
+ }
+#endif
+ case FR_T_CALLFUNC|FR_T_BUILTIN :
+ {
+ frentry_t *f;
+
+ f = (*fr->fr_func)(fin, &pass);
+ if (f != NULL)
+ fr = f;
+ else
+ continue;
+ break;
+ }
+ default :
+ break;
+ }
+
+ if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) {
+ if (fin->fin_nattag == NULL)
+ continue;
+ if (fr_matchtag(&fr->fr_nattag, fin->fin_nattag) == 0)
+ continue;
+ }
+ FR_VERBOSE(("=%s.%d *", fr->fr_group, rulen));
+
+ passt = fr->fr_flags;
+
+ /*
+ * Allowing a rule with the "keep state" flag set to match
+ * packets that have been tagged "out of window" by the TCP
+ * state tracking is foolish as the attempt to add a new
+ * state entry to the table will fail.
+ */
+ if ((passt & FR_KEEPSTATE) && (fin->fin_flx & FI_OOW))
+ continue;
+
+ /*
+ * If the rule is a "call now" rule, then call the function
+ * in the rule, if it exists and use the results from that.
+ * If the function pointer is bad, just make like we ignore
+ * it, except for increasing the hit counter.
+ */
+ if ((passt & FR_CALLNOW) != 0) {
+ ATOMIC_INC64(fr->fr_hits);
+ if ((fr->fr_func != NULL) &&
+ (fr->fr_func != (ipfunc_t)-1)) {
+ frentry_t *frs;
+
+ frs = fin->fin_fr;
+ fin->fin_fr = fr;
+ fr = (*fr->fr_func)(fin, &passt);
+ if (fr == NULL) {
+ fin->fin_fr = frs;
+ continue;
+ }
+ passt = fr->fr_flags;
+ fin->fin_fr = fr;
+ }
+ } else {
+ fin->fin_fr = fr;
+ }
+
+#ifdef IPFILTER_LOG
+ /*
+ * Just log this packet...
+ */
+ if ((passt & FR_LOGMASK) == FR_LOG) {
+ if (ipflog(fin, passt) == -1) {
+ if (passt & FR_LOGORBLOCK) {
+ passt &= ~FR_CMDMASK;
+ passt |= FR_BLOCK|FR_QUICK;
+ }
+ ATOMIC_INCL(frstats[fin->fin_out].fr_skip);
+ }
+ ATOMIC_INCL(frstats[fin->fin_out].fr_pkl);
+ logged = 1;
+ }
+#endif /* IPFILTER_LOG */
+ fr->fr_bytes += (U_QUAD_T)fin->fin_plen;
+ passo = pass;
+ if (FR_ISSKIP(passt))
+ skip = fr->fr_arg;
+ else if ((passt & FR_LOGMASK) != FR_LOG)
+ pass = passt;
+ if (passt & (FR_RETICMP|FR_FAKEICMP))
+ fin->fin_icode = fr->fr_icode;
+ FR_DEBUG(("pass %#x\n", pass));
+ ATOMIC_INC64(fr->fr_hits);
+ fin->fin_rule = rulen;
+ (void) strncpy(fin->fin_group, fr->fr_group, FR_GROUPLEN);
+ if (fr->fr_grp != NULL) {
+ fin->fin_fr = *fr->fr_grp;
+ pass = fr_scanlist(fin, pass);
+ if (fin->fin_fr == NULL) {
+ fin->fin_rule = rulen;
+ (void) strncpy(fin->fin_group, fr->fr_group,
+ FR_GROUPLEN);
+ fin->fin_fr = fr;
+ }
+ if (fin->fin_flx & FI_DONTCACHE)
+ logged = 1;
+ }
+
+ if (pass & FR_QUICK) {
+ /*
+ * Finally, if we've asked to track state for this
+ * packet, set it up. Add state for "quick" rules
+ * here so that if the action fails we can consider
+ * the rule to "not match" and keep on processing
+ * filter rules.
+ */
+ if ((pass & FR_KEEPSTATE) &&
+ !(fin->fin_flx & FI_STATE)) {
+ int out = fin->fin_out;
+
+ if (fr_addstate(fin, NULL, 0) != NULL) {
+ ATOMIC_INCL(frstats[out].fr_ads);
+ } else {
+ ATOMIC_INCL(frstats[out].fr_bads);
+ pass = passo;
+ continue;
+ }
+ }
+ break;
+ }
+ }
+ if (logged)
+ fin->fin_flx |= FI_DONTCACHE;
+ fin->fin_depth--;
+ return pass;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_acctpkt */
+/* Returns: frentry_t* - always returns NULL */
+/* Parameters: fin(I) - pointer to packet information */
+/* passp(IO) - pointer to current/new filter decision (unused) */
+/* */
+/* Checks a packet against accounting rules, if there are any for the given */
+/* IP protocol version. */
+/* */
+/* N.B.: this function returns NULL to match the prototype used by other */
+/* functions called from the IPFilter "mainline" in fr_check(). */
+/* ------------------------------------------------------------------------ */
+frentry_t *fr_acctpkt(fin, passp)
+fr_info_t *fin;
+u_32_t *passp;
+{
+ char group[FR_GROUPLEN];
+ frentry_t *fr, *frsave;
+ u_32_t pass, rulen;
+
+ passp = passp;
+#ifdef USE_INET6
+ if (fin->fin_v == 6)
+ fr = ipacct6[fin->fin_out][fr_active];
+ else
+#endif
+ fr = ipacct[fin->fin_out][fr_active];
+
+ if (fr != NULL) {
+ frsave = fin->fin_fr;
+ bcopy(fin->fin_group, group, FR_GROUPLEN);
+ rulen = fin->fin_rule;
+ fin->fin_fr = fr;
+ pass = fr_scanlist(fin, FR_NOMATCH);
+ if (FR_ISACCOUNT(pass)) {
+ ATOMIC_INCL(frstats[0].fr_acct);
+ }
+ fin->fin_fr = frsave;
+ bcopy(group, fin->fin_group, FR_GROUPLEN);
+ fin->fin_rule = rulen;
+ }
+ return NULL;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_firewall */
+/* Returns: frentry_t* - returns pointer to matched rule, if no matches */
+/* were found, returns NULL. */
+/* Parameters: fin(I) - pointer to packet information */
+/* passp(IO) - pointer to current/new filter decision (unused) */
+/* */
+/* Applies an appropriate set of firewall rules to the packet, to see if */
+/* there are any matches. The first check is to see if a match can be seen */
+/* in the cache. If not, then search an appropriate list of rules. Once a */
+/* matching rule is found, take any appropriate actions as defined by the */
+/* rule - except logging. */
+/* ------------------------------------------------------------------------ */
+static frentry_t *fr_firewall(fin, passp)
+fr_info_t *fin;
+u_32_t *passp;
+{
+ frentry_t *fr;
+ fr_info_t *fc;
+ u_32_t pass;
+ int out;
+
+ out = fin->fin_out;
+ pass = *passp;
+
+ /*
+ * If a packet is found in the auth table, then skip checking
+ * the access lists for permission but we do need to consider
+ * the result as if it were from the ACL's.
+ */
+ fc = &frcache[out][CACHE_HASH(fin)];
+ READ_ENTER(&ipf_frcache);
+ if (!bcmp((char *)fin, (char *)fc, FI_CSIZE)) {
+ /*
+ * copy cached data so we can unlock the mutexes earlier.
+ */
+ bcopy((char *)fc, (char *)fin, FI_COPYSIZE);
+ RWLOCK_EXIT(&ipf_frcache);
+ ATOMIC_INCL(frstats[out].fr_chit);
+
+ if ((fr = fin->fin_fr) != NULL) {
+ ATOMIC_INC64(fr->fr_hits);
+ pass = fr->fr_flags;
+ }
+ } else {
+ RWLOCK_EXIT(&ipf_frcache);
+
+#ifdef USE_INET6
+ if (fin->fin_v == 6)
+ fin->fin_fr = ipfilter6[out][fr_active];
+ else
+#endif
+ fin->fin_fr = ipfilter[out][fr_active];
+ if (fin->fin_fr != NULL)
+ pass = fr_scanlist(fin, fr_pass);
+
+ if (((pass & FR_KEEPSTATE) == 0) &&
+ ((fin->fin_flx & FI_DONTCACHE) == 0)) {
+ WRITE_ENTER(&ipf_frcache);
+ bcopy((char *)fin, (char *)fc, FI_COPYSIZE);
+ RWLOCK_EXIT(&ipf_frcache);
+ }
+ if ((pass & FR_NOMATCH)) {
+ ATOMIC_INCL(frstats[out].fr_nom);
+ }
+ fr = fin->fin_fr;
+ }
+
+ /*
+ * Apply packets per second rate-limiting to a rule as required.
+ */
+ if ((fr != NULL) && (fr->fr_pps != 0) &&
+ !ppsratecheck(&fr->fr_lastpkt, &fr->fr_curpps, fr->fr_pps)) {
+ pass &= ~(FR_CMDMASK|FR_DUP|FR_RETICMP|FR_RETRST);
+ pass |= FR_BLOCK;
+ ATOMIC_INCL(frstats[out].fr_ppshit);
+ }
+
+ /*
+ * If we fail to add a packet to the authorization queue, then we
+ * drop the packet later. However, if it was added then pretend
+ * we've dropped it already.
+ */
+ if (FR_ISAUTH(pass)) {
+ if (fr_newauth(fin->fin_m, fin) != 0) {
+#ifdef _KERNEL
+ fin->fin_m = *fin->fin_mp = NULL;
+#else
+ ;
+#endif
+ fin->fin_error = 0;
+ } else
+ fin->fin_error = ENOSPC;
+ }
+
+ if ((fr != NULL) && (fr->fr_func != NULL) &&
+ (fr->fr_func != (ipfunc_t)-1) && !(pass & FR_CALLNOW))
+ (void) (*fr->fr_func)(fin, &pass);
+
+ /*
+ * If a rule is a pre-auth rule, check again in the list of rules
+ * loaded for authenticated use. It does not particulary matter
+ * if this search fails because a "preauth" result, from a rule,
+ * is treated as "not a pass", hence the packet is blocked.
+ */
+ if (FR_ISPREAUTH(pass)) {
+ if ((fin->fin_fr = ipauth) != NULL)
+ pass = fr_scanlist(fin, fr_pass);
+ }
+
+ /*
+ * If the rule has "keep frag" and the packet is actually a fragment,
+ * then create a fragment state entry.
+ */
+ if ((pass & (FR_KEEPFRAG|FR_KEEPSTATE)) == FR_KEEPFRAG) {
+ if (fin->fin_flx & FI_FRAG) {
+ if (fr_newfrag(fin, pass) == -1) {
+ ATOMIC_INCL(frstats[out].fr_bnfr);
+ } else {
+ ATOMIC_INCL(frstats[out].fr_nfr);
+ }
+ } else {
+ ATOMIC_INCL(frstats[out].fr_cfr);
+ }
+ }
+
+ /*
+ * Finally, if we've asked to track state for this packet, set it up.
+ */
+ if ((pass & FR_KEEPSTATE) && !(fin->fin_flx & FI_STATE)) {
+ if (fr_addstate(fin, NULL, 0) != NULL) {
+ ATOMIC_INCL(frstats[out].fr_ads);
+ } else {
+ ATOMIC_INCL(frstats[out].fr_bads);
+ if (FR_ISPASS(pass)) {
+ pass &= ~FR_CMDMASK;
+ pass |= FR_BLOCK;
+ }
+ }
+ }
+
+ fr = fin->fin_fr;
+
+ if (passp != NULL)
+ *passp = pass;
+
+ return fr;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_check */
+/* Returns: int - 0 == packet allowed through, */
+/* User space: */
+/* -1 == packet blocked */
+/* 1 == packet not matched */
+/* -2 == requires authentication */
+/* Kernel: */
+/* > 0 == filter error # for packet */
+/* Parameters: ip(I) - pointer to start of IPv4/6 packet */
+/* hlen(I) - length of header */
+/* ifp(I) - pointer to interface this packet is on */
+/* out(I) - 0 == packet going in, 1 == packet going out */
+/* mp(IO) - pointer to caller's buffer pointer that holds this */
+/* IP packet. */
+/* Solaris & HP-UX ONLY : */
+/* qpi(I) - pointer to STREAMS queue information for this */
+/* interface & direction. */
+/* */
+/* fr_check() is the master function for all IPFilter packet processing. */
+/* It orchestrates: Network Address Translation (NAT), checking for packet */
+/* authorisation (or pre-authorisation), presence of related state info., */
+/* generating log entries, IP packet accounting, routing of packets as */
+/* directed by firewall rules and of course whether or not to allow the */
+/* packet to be further processed by the kernel. */
+/* */
+/* For packets blocked, the contents of "mp" will be NULL'd and the buffer */
+/* freed. Packets passed may be returned with the pointer pointed to by */
+/* by "mp" changed to a new buffer. */
+/* ------------------------------------------------------------------------ */
+int fr_check(ip, hlen, ifp, out
+#if defined(_KERNEL) && defined(MENTAT)
+, qif, mp)
+void *qif;
+#else
+, mp)
+#endif
+mb_t **mp;
+ip_t *ip;
+int hlen;
+void *ifp;
+int out;
+{
+ /*
+ * The above really sucks, but short of writing a diff
+ */
+ fr_info_t frinfo;
+ fr_info_t *fin = &frinfo;
+ u_32_t pass = fr_pass;
+ frentry_t *fr = NULL;
+ int v = IP_V(ip);
+ mb_t *mc = NULL;
+ mb_t *m;
+#ifdef USE_INET6
+ ip6_t *ip6;
+#endif
+#ifdef _KERNEL
+# ifdef MENTAT
+ qpktinfo_t *qpi = qif;
+#endif
+#endif
+ SPL_INT(s);
+
+ /*
+ * The first part of fr_check() deals with making sure that what goes
+ * into the filtering engine makes some sense. Information about the
+ * the packet is distilled, collected into a fr_info_t structure and
+ * the an attempt to ensure the buffer the packet is in is big enough
+ * to hold all the required packet headers.
+ */
+#ifdef _KERNEL
+# ifdef MENTAT
+ if (!OK_32PTR(ip))
+ return 2;
+# endif
+
+ READ_ENTER(&ipf_global);
+
+ if (fr_running <= 0) {
+ RWLOCK_EXIT(&ipf_global);
+ return 0;
+ }
+
+ bzero((char *)fin, sizeof(*fin));
+
+# ifdef MENTAT
+ if (qpi->qpi_flags & QF_GROUP)
+ fin->fin_flx |= FI_MBCAST;
+ m = qpi->qpi_m;
+ fin->fin_qfm = m;
+ fin->fin_qpi = qpi;
+# else /* MENTAT */
+
+ m = *mp;
+
+# if defined(M_MCAST)
+ if ((m->m_flags & M_MCAST) != 0)
+ fin->fin_flx |= FI_MBCAST|FI_MULTICAST;
+# endif
+# if defined(M_MLOOP)
+ if ((m->m_flags & M_MLOOP) != 0)
+ fin->fin_flx |= FI_MBCAST|FI_MULTICAST;
+# endif
+# if defined(M_BCAST)
+ if ((m->m_flags & M_BCAST) != 0)
+ fin->fin_flx |= FI_MBCAST|FI_BROADCAST;
+# endif
+# ifdef M_CANFASTFWD
+ /*
+ * XXX For now, IP Filter and fast-forwarding of cached flows
+ * XXX are mutually exclusive. Eventually, IP Filter should
+ * XXX get a "can-fast-forward" filter rule.
+ */
+ m->m_flags &= ~M_CANFASTFWD;
+# endif /* M_CANFASTFWD */
+# ifdef CSUM_DELAY_DATA
+ /*
+ * disable delayed checksums.
+ */
+ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+ in_delayed_cksum(m);
+ m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+ }
+# endif /* CSUM_DELAY_DATA */
+# endif /* MENTAT */
+#else
+ READ_ENTER(&ipf_global);
+
+ bzero((char *)fin, sizeof(*fin));
+ m = *mp;
+#endif /* _KERNEL */
+
+ fin->fin_v = v;
+ fin->fin_m = m;
+ fin->fin_ip = ip;
+ fin->fin_mp = mp;
+ fin->fin_out = out;
+ fin->fin_ifp = ifp;
+ fin->fin_error = ENETUNREACH;
+ fin->fin_hlen = (u_short)hlen;
+ fin->fin_dp = (char *)ip + hlen;
+
+ fin->fin_ipoff = (char *)ip - MTOD(m, char *);
+
+ SPL_NET(s);
+
+#ifdef USE_INET6
+ if (v == 6) {
+ ATOMIC_INCL(frstats[out].fr_ipv6);
+ /*
+ * Jumbo grams are quite likely too big for internal buffer
+ * structures to handle comfortably, for now, so just drop
+ * them.
+ */
+ ip6 = (ip6_t *)ip;
+ fin->fin_plen = ntohs(ip6->ip6_plen);
+ if (fin->fin_plen == 0) {
+ pass = FR_BLOCK|FR_NOMATCH;
+ goto filtered;
+ }
+ fin->fin_plen += sizeof(ip6_t);
+ } else
+#endif
+ {
+#if (OpenBSD >= 200311) && defined(_KERNEL)
+ ip->ip_len = ntohs(ip->ip_len);
+ ip->ip_off = ntohs(ip->ip_off);
+#endif
+ fin->fin_plen = ip->ip_len;
+ }
+
+ if (fr_makefrip(hlen, ip, fin) == -1) {
+ READ_ENTER(&ipf_mutex);
+ pass = FR_BLOCK;
+ goto filtered;
+ }
+
+ /*
+ * For at least IPv6 packets, if a m_pullup() fails then this pointer
+ * becomes NULL and so we have no packet to free.
+ */
+ if (*fin->fin_mp == NULL)
+ goto finished;
+
+ if (!out) {
+ if (v == 4) {
+#ifdef _KERNEL
+ if (fr_chksrc && !fr_verifysrc(fin)) {
+ ATOMIC_INCL(frstats[0].fr_badsrc);
+ fin->fin_flx |= FI_BADSRC;
+ }
+#endif
+ if (fin->fin_ip->ip_ttl < fr_minttl) {
+ ATOMIC_INCL(frstats[0].fr_badttl);
+ fin->fin_flx |= FI_LOWTTL;
+ }
+ }
+#ifdef USE_INET6
+ else if (v == 6) {
+ ip6 = (ip6_t *)ip;
+#ifdef _KERNEL
+ if (fr_chksrc && !fr_verifysrc(fin)) {
+ ATOMIC_INCL(frstats[0].fr_badsrc);
+ fin->fin_flx |= FI_BADSRC;
+ }
+#endif
+ if (ip6->ip6_hlim < fr_minttl) {
+ ATOMIC_INCL(frstats[0].fr_badttl);
+ fin->fin_flx |= FI_LOWTTL;
+ }
+ }
+#endif
+ }
+
+ if (fin->fin_flx & FI_SHORT) {
+ ATOMIC_INCL(frstats[out].fr_short);
+ }
+
+ READ_ENTER(&ipf_mutex);
+
+ /*
+ * Check auth now. This, combined with the check below to see if apass
+ * is 0 is to ensure that we don't count the packet twice, which can
+ * otherwise occur when we reprocess it. As it is, we only count it
+ * after it has no auth. table matchup. This also stops NAT from
+ * occuring until after the packet has been auth'd.
+ */
+ fr = fr_checkauth(fin, &pass);
+ if (!out) {
+ if (fr_checknatin(fin, &pass) == -1) {
+ RWLOCK_EXIT(&ipf_mutex);
+ goto finished;
+ }
+ }
+ if (!out)
+ (void) fr_acctpkt(fin, NULL);
+
+ if (fr == NULL)
+ if ((fin->fin_flx & (FI_FRAG|FI_BAD)) == FI_FRAG)
+ fr = fr_knownfrag(fin, &pass);
+ if (fr == NULL)
+ fr = fr_checkstate(fin, &pass);
+
+ if ((pass & FR_NOMATCH) || (fr == NULL))
+ fr = fr_firewall(fin, &pass);
+
+ fin->fin_fr = fr;
+
+ /*
+ * Only count/translate packets which will be passed on, out the
+ * interface.
+ */
+ if (out && FR_ISPASS(pass)) {
+ (void) fr_acctpkt(fin, NULL);
+
+ if (fr_checknatout(fin, &pass) == -1) {
+ RWLOCK_EXIT(&ipf_mutex);
+ goto finished;
+ } else if ((fr_update_ipid != 0) && (v == 4)) {
+ if (fr_updateipid(fin) == -1) {
+ ATOMIC_INCL(frstats[1].fr_ipud);
+ pass &= ~FR_CMDMASK;
+ pass |= FR_BLOCK;
+ } else {
+ ATOMIC_INCL(frstats[0].fr_ipud);
+ }
+ }
+ }
+
+#ifdef IPFILTER_LOG
+ if ((fr_flags & FF_LOGGING) || (pass & FR_LOGMASK)) {
+ (void) fr_dolog(fin, &pass);
+ }
+#endif
+
+ if (fin->fin_state != NULL)
+ fr_statederef(fin, (ipstate_t **)&fin->fin_state);
+
+ if (fin->fin_nat != NULL)
+ fr_natderef((nat_t **)&fin->fin_nat);
+
+ /*
+ * Only allow FR_DUP to work if a rule matched - it makes no sense to
+ * set FR_DUP as a "default" as there are no instructions about where
+ * to send the packet. Use fin_m here because it may have changed
+ * (without an update of 'm') in prior processing.
+ */
+ if ((fr != NULL) && (pass & FR_DUP)) {
+ mc = M_DUPLICATE(fin->fin_m);
+ }
+
+ if (pass & (FR_RETRST|FR_RETICMP)) {
+ /*
+ * Should we return an ICMP packet to indicate error
+ * status passing through the packet filter ?
+ * WARNING: ICMP error packets AND TCP RST packets should
+ * ONLY be sent in repsonse to incoming packets. Sending them
+ * in response to outbound packets can result in a panic on
+ * some operating systems.
+ */
+ if (!out) {
+ if (pass & FR_RETICMP) {
+ int dst;
+
+ if ((pass & FR_RETMASK) == FR_FAKEICMP)
+ dst = 1;
+ else
+ dst = 0;
+ (void) fr_send_icmp_err(ICMP_UNREACH, fin, dst);
+ ATOMIC_INCL(frstats[0].fr_ret);
+ } else if (((pass & FR_RETMASK) == FR_RETRST) &&
+ !(fin->fin_flx & FI_SHORT)) {
+ if (fr_send_reset(fin) == 0) {
+ ATOMIC_INCL(frstats[1].fr_ret);
+ }
+ }
+ } else {
+ if (pass & FR_RETRST)
+ fin->fin_error = ECONNRESET;
+ }
+ }
+
+ /*
+ * If we didn't drop off the bottom of the list of rules (and thus
+ * the 'current' rule fr is not NULL), then we may have some extra
+ * instructions about what to do with a packet.
+ * Once we're finished return to our caller, freeing the packet if
+ * we are dropping it (* BSD ONLY *).
+ * Reassign m from fin_m as we may have a new buffer, now.
+ */
+filtered:
+ m = fin->fin_m;
+
+ if (fr != NULL) {
+ frdest_t *fdp;
+
+ fdp = &fr->fr_tifs[fin->fin_rev];
+
+ if (!out && (pass & FR_FASTROUTE)) {
+ /*
+ * For fastroute rule, no destioation interface defined
+ * so pass NULL as the frdest_t parameter
+ */
+ (void) fr_fastroute(m, mp, fin, NULL);
+ m = *mp = NULL;
+ } else if ((fdp->fd_ifp != NULL) &&
+ (fdp->fd_ifp != (struct ifnet *)-1)) {
+ /* this is for to rules: */
+ (void) fr_fastroute(m, mp, fin, fdp);
+ m = *mp = NULL;
+ }
+
+ /*
+ * Generate a duplicated packet.
+ */
+ if (mc != NULL)
+ (void) fr_fastroute(mc, &mc, fin, &fr->fr_dif);
+ }
+
+ /*
+ * This late because the likes of fr_fastroute() use fin_fr.
+ */
+ RWLOCK_EXIT(&ipf_mutex);
+
+finished:
+ if (!FR_ISPASS(pass)) {
+ ATOMIC_INCL(frstats[out].fr_block);
+ if (*mp != NULL) {
+ FREE_MB_T(*mp);
+ m = *mp = NULL;
+ }
+ } else {
+ ATOMIC_INCL(frstats[out].fr_pass);
+#if defined(_KERNEL) && defined(__sgi)
+ if ((fin->fin_hbuf != NULL) &&
+ (mtod(fin->fin_m, struct ip *) != fin->fin_ip)) {
+ COPYBACK(m, 0, fin->fin_plen, fin->fin_hbuf);
+ }
+#endif
+ }
+
+ SPL_X(s);
+ RWLOCK_EXIT(&ipf_global);
+
+#ifdef _KERNEL
+# if OpenBSD >= 200311
+ if (FR_ISPASS(pass) && (v == 4)) {
+ ip = fin->fin_ip;
+ ip->ip_len = ntohs(ip->ip_len);
+ ip->ip_off = ntohs(ip->ip_off);
+ }
+# endif
+ return (FR_ISPASS(pass)) ? 0 : fin->fin_error;
+#else /* _KERNEL */
+ FR_VERBOSE(("fin_flx %#x pass %#x ", fin->fin_flx, pass));
+ if ((pass & FR_NOMATCH) != 0)
+ return 1;
+
+ if ((pass & FR_RETMASK) != 0)
+ switch (pass & FR_RETMASK)
+ {
+ case FR_RETRST :
+ return 3;
+ case FR_RETICMP :
+ return 4;
+ case FR_FAKEICMP :
+ return 5;
+ }
+
+ switch (pass & FR_CMDMASK)
+ {
+ case FR_PASS :
+ return 0;
+ case FR_BLOCK :
+ return -1;
+ case FR_AUTH :
+ return -2;
+ case FR_ACCOUNT :
+ return -3;
+ case FR_PREAUTH :
+ return -4;
+ }
+ return 2;
+#endif /* _KERNEL */
+}
+
+
+#ifdef IPFILTER_LOG
+/* ------------------------------------------------------------------------ */
+/* Function: fr_dolog */
+/* Returns: frentry_t* - returns contents of fin_fr (no change made) */
+/* Parameters: fin(I) - pointer to packet information */
+/* passp(IO) - pointer to current/new filter decision (unused) */
+/* */
+/* Checks flags set to see how a packet should be logged, if it is to be */
+/* logged. Adjust statistics based on its success or not. */
+/* ------------------------------------------------------------------------ */
+frentry_t *fr_dolog(fin, passp)
+fr_info_t *fin;
+u_32_t *passp;
+{
+ u_32_t pass;
+ int out;
+
+ out = fin->fin_out;
+ pass = *passp;
+
+ if ((fr_flags & FF_LOGNOMATCH) && (pass & FR_NOMATCH)) {
+ pass |= FF_LOGNOMATCH;
+ ATOMIC_INCL(frstats[out].fr_npkl);
+ goto logit;
+ } else if (((pass & FR_LOGMASK) == FR_LOGP) ||
+ (FR_ISPASS(pass) && (fr_flags & FF_LOGPASS))) {
+ if ((pass & FR_LOGMASK) != FR_LOGP)
+ pass |= FF_LOGPASS;
+ ATOMIC_INCL(frstats[out].fr_ppkl);
+ goto logit;
+ } else if (((pass & FR_LOGMASK) == FR_LOGB) ||
+ (FR_ISBLOCK(pass) && (fr_flags & FF_LOGBLOCK))) {
+ if ((pass & FR_LOGMASK) != FR_LOGB)
+ pass |= FF_LOGBLOCK;
+ ATOMIC_INCL(frstats[out].fr_bpkl);
+logit:
+ if (ipflog(fin, pass) == -1) {
+ ATOMIC_INCL(frstats[out].fr_skip);
+
+ /*
+ * If the "or-block" option has been used then
+ * block the packet if we failed to log it.
+ */
+ if ((pass & FR_LOGORBLOCK) &&
+ FR_ISPASS(pass)) {
+ pass &= ~FR_CMDMASK;
+ pass |= FR_BLOCK;
+ }
+ }
+ *passp = pass;
+ }
+
+ return fin->fin_fr;
+}
+#endif /* IPFILTER_LOG */
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: ipf_cksum */
+/* Returns: u_short - IP header checksum */
+/* Parameters: addr(I) - pointer to start of buffer to checksum */
+/* len(I) - length of buffer in bytes */
+/* */
+/* Calculate the two's complement 16 bit checksum of the buffer passed. */
+/* */
+/* N.B.: addr should be 16bit aligned. */
+/* ------------------------------------------------------------------------ */
+u_short ipf_cksum(addr, len)
+u_short *addr;
+int len;
+{
+ u_32_t sum = 0;
+
+ for (sum = 0; len > 1; len -= 2)
+ sum += *addr++;
+
+ /* mop up an odd byte, if necessary */
+ if (len == 1)
+ sum += *(u_char *)addr;
+
+ /*
+ * add back carry outs from top 16 bits to low 16 bits
+ */
+ sum = (sum >> 16) + (sum & 0xffff); /* add hi 16 to low 16 */
+ sum += (sum >> 16); /* add carry */
+ return (u_short)(~sum);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_cksum */
+/* Returns: u_short - layer 4 checksum */
+/* Parameters: m(I ) - pointer to buffer holding packet */
+/* ip(I) - pointer to IP header */
+/* l4proto(I) - protocol to caclulate checksum for */
+/* l4hdr(I) - pointer to layer 4 header */
+/* */
+/* Calculates the TCP checksum for the packet held in "m", using the data */
+/* in the IP header "ip" to seed it. */
+/* */
+/* NB: This function assumes we've pullup'd enough for all of the IP header */
+/* and the TCP header. We also assume that data blocks aren't allocated in */
+/* odd sizes. */
+/* */
+/* Expects ip_len to be in host byte order when called. */
+/* ------------------------------------------------------------------------ */
+u_short fr_cksum(m, ip, l4proto, l4hdr)
+mb_t *m;
+ip_t *ip;
+int l4proto;
+void *l4hdr;
+{
+ u_short *sp, slen, sumsave, l4hlen, *csump;
+ u_int sum, sum2;
+ int hlen;
+#ifdef USE_INET6
+ ip6_t *ip6;
+#endif
+
+ csump = NULL;
+ sumsave = 0;
+ l4hlen = 0;
+ sp = NULL;
+ slen = 0;
+ hlen = 0;
+ sum = 0;
+
+ /*
+ * Add up IP Header portion
+ */
+#ifdef USE_INET6
+ if (IP_V(ip) == 4) {
+#endif
+ hlen = IP_HL(ip) << 2;
+ slen = ip->ip_len - hlen;
+ sum = htons((u_short)l4proto);
+ sum += htons(slen);
+ sp = (u_short *)&ip->ip_src;
+ sum += *sp++; /* ip_src */
+ sum += *sp++;
+ sum += *sp++; /* ip_dst */
+ sum += *sp++;
+#ifdef USE_INET6
+ } else if (IP_V(ip) == 6) {
+ ip6 = (ip6_t *)ip;
+ hlen = sizeof(*ip6);
+ slen = ntohs(ip6->ip6_plen);
+ sum = htons((u_short)l4proto);
+ sum += htons(slen);
+ sp = (u_short *)&ip6->ip6_src;
+ sum += *sp++; /* ip6_src */
+ sum += *sp++;
+ sum += *sp++;
+ sum += *sp++;
+ sum += *sp++;
+ sum += *sp++;
+ sum += *sp++;
+ sum += *sp++;
+ sum += *sp++; /* ip6_dst */
+ sum += *sp++;
+ sum += *sp++;
+ sum += *sp++;
+ sum += *sp++;
+ sum += *sp++;
+ sum += *sp++;
+ sum += *sp++;
+ }
+#endif
+
+ switch (l4proto)
+ {
+ case IPPROTO_UDP :
+ csump = &((udphdr_t *)l4hdr)->uh_sum;
+ l4hlen = sizeof(udphdr_t);
+ break;
+
+ case IPPROTO_TCP :
+ csump = &((tcphdr_t *)l4hdr)->th_sum;
+ l4hlen = sizeof(tcphdr_t);
+ break;
+ case IPPROTO_ICMP :
+ csump = &((icmphdr_t *)l4hdr)->icmp_cksum;
+ l4hlen = 4;
+ sum = 0;
+ break;
+ default :
+ break;
+ }
+
+ if (csump != NULL) {
+ sumsave = *csump;
+ *csump = 0;
+ }
+
+ l4hlen = l4hlen; /* LINT */
+
+#ifdef _KERNEL
+# ifdef MENTAT
+ {
+ void *rp = m->b_rptr;
+
+ if ((unsigned char *)ip > m->b_rptr && (unsigned char *)ip < m->b_wptr)
+ m->b_rptr = (u_char *)ip;
+ sum2 = ip_cksum(m, hlen, sum); /* hlen == offset */
+ m->b_rptr = rp;
+ sum2 = (sum2 & 0xffff) + (sum2 >> 16);
+ sum2 = ~sum2 & 0xffff;
+ }
+# else /* MENTAT */
+# if defined(BSD) || defined(sun)
+# if BSD >= 199103
+ m->m_data += hlen;
+# else
+ m->m_off += hlen;
+# endif
+ m->m_len -= hlen;
+ sum2 = in_cksum(m, slen);
+ m->m_len += hlen;
+# if BSD >= 199103
+ m->m_data -= hlen;
+# else
+ m->m_off -= hlen;
+# endif
+ /*
+ * Both sum and sum2 are partial sums, so combine them together.
+ */
+ sum += ~sum2 & 0xffff;
+ while (sum > 0xffff)
+ sum = (sum & 0xffff) + (sum >> 16);
+ sum2 = ~sum & 0xffff;
+# else /* defined(BSD) || defined(sun) */
+{
+ union {
+ u_char c[2];
+ u_short s;
+ } bytes;
+ u_short len = ip->ip_len;
+# if defined(__sgi)
+ int add;
+# endif
+
+ /*
+ * Add up IP Header portion
+ */
+ if (sp != (u_short *)l4hdr)
+ sp = (u_short *)l4hdr;
+
+ switch (l4proto)
+ {
+ case IPPROTO_UDP :
+ sum += *sp++; /* sport */
+ sum += *sp++; /* dport */
+ sum += *sp++; /* udp length */
+ sum += *sp++; /* checksum */
+ break;
+
+ case IPPROTO_TCP :
+ sum += *sp++; /* sport */
+ sum += *sp++; /* dport */
+ sum += *sp++; /* seq */
+ sum += *sp++;
+ sum += *sp++; /* ack */
+ sum += *sp++;
+ sum += *sp++; /* off */
+ sum += *sp++; /* win */
+ sum += *sp++; /* checksum */
+ sum += *sp++; /* urp */
+ break;
+ case IPPROTO_ICMP :
+ sum = *sp++; /* type/code */
+ sum += *sp++; /* checksum */
+ break;
+ }
+
+# ifdef __sgi
+ /*
+ * In case we had to copy the IP & TCP header out of mbufs,
+ * skip over the mbuf bits which are the header
+ */
+ if ((caddr_t)ip != mtod(m, caddr_t)) {
+ hlen = (caddr_t)sp - (caddr_t)ip;
+ while (hlen) {
+ add = MIN(hlen, m->m_len);
+ sp = (u_short *)(mtod(m, caddr_t) + add);
+ hlen -= add;
+ if (add == m->m_len) {
+ m = m->m_next;
+ if (!hlen) {
+ if (!m)
+ break;
+ sp = mtod(m, u_short *);
+ }
+ PANIC((!m),("fr_cksum(1): not enough data"));
+ }
+ }
+ }
+# endif
+
+ len -= (l4hlen + hlen);
+ if (len <= 0)
+ goto nodata;
+
+ while (len > 1) {
+ if (((caddr_t)sp - mtod(m, caddr_t)) >= m->m_len) {
+ m = m->m_next;
+ PANIC((!m),("fr_cksum(2): not enough data"));
+ sp = mtod(m, u_short *);
+ }
+ if (((caddr_t)(sp + 1) - mtod(m, caddr_t)) > m->m_len) {
+ bytes.c[0] = *(u_char *)sp;
+ m = m->m_next;
+ PANIC((!m),("fr_cksum(3): not enough data"));
+ sp = mtod(m, u_short *);
+ bytes.c[1] = *(u_char *)sp;
+ sum += bytes.s;
+ sp = (u_short *)((u_char *)sp + 1);
+ }
+ if ((u_long)sp & 1) {
+ bcopy((char *)sp++, (char *)&bytes.s, sizeof(bytes.s));
+ sum += bytes.s;
+ } else
+ sum += *sp++;
+ len -= 2;
+ }
+
+ if (len != 0)
+ sum += ntohs(*(u_char *)sp << 8);
+nodata:
+ while (sum > 0xffff)
+ sum = (sum & 0xffff) + (sum >> 16);
+ sum2 = (u_short)(~sum & 0xffff);
+}
+# endif /* defined(BSD) || defined(sun) */
+# endif /* MENTAT */
+#else /* _KERNEL */
+ for (; slen > 1; slen -= 2)
+ sum += *sp++;
+ if (slen)
+ sum += ntohs(*(u_char *)sp << 8);
+ while (sum > 0xffff)
+ sum = (sum & 0xffff) + (sum >> 16);
+ sum2 = (u_short)(~sum & 0xffff);
+#endif /* _KERNEL */
+ if (csump != NULL)
+ *csump = sumsave;
+ return sum2;
+}
+
+
+#if defined(_KERNEL) && ( ((BSD < 199103) && !defined(MENTAT)) || \
+ defined(__sgi) ) && !defined(linux) && !defined(_AIX51)
+/*
+ * Copyright (c) 1982, 1986, 1988, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94
+ * $Id: fil.c,v 2.243.2.64 2005/08/13 05:19:59 darrenr Exp $
+ */
+/*
+ * Copy data from an mbuf chain starting "off" bytes from the beginning,
+ * continuing for "len" bytes, into the indicated buffer.
+ */
+void
+m_copydata(m, off, len, cp)
+ mb_t *m;
+ int off;
+ int len;
+ caddr_t cp;
+{
+ unsigned count;
+
+ if (off < 0 || len < 0)
+ panic("m_copydata");
+ while (off > 0) {
+ if (m == 0)
+ panic("m_copydata");
+ if (off < m->m_len)
+ break;
+ off -= m->m_len;
+ m = m->m_next;
+ }
+ while (len > 0) {
+ if (m == 0)
+ panic("m_copydata");
+ count = MIN(m->m_len - off, len);
+ bcopy(mtod(m, caddr_t) + off, cp, count);
+ len -= count;
+ cp += count;
+ off = 0;
+ m = m->m_next;
+ }
+}
+
+
+/*
+ * Copy data from a buffer back into the indicated mbuf chain,
+ * starting "off" bytes from the beginning, extending the mbuf
+ * chain if necessary.
+ */
+void
+m_copyback(m0, off, len, cp)
+ struct mbuf *m0;
+ int off;
+ int len;
+ caddr_t cp;
+{
+ int mlen;
+ struct mbuf *m = m0, *n;
+ int totlen = 0;
+
+ if (m0 == 0)
+ return;
+ while (off > (mlen = m->m_len)) {
+ off -= mlen;
+ totlen += mlen;
+ if (m->m_next == 0) {
+ n = m_getclr(M_DONTWAIT, m->m_type);
+ if (n == 0)
+ goto out;
+ n->m_len = min(MLEN, len + off);
+ m->m_next = n;
+ }
+ m = m->m_next;
+ }
+ while (len > 0) {
+ mlen = min(m->m_len - off, len);
+ bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen);
+ cp += mlen;
+ len -= mlen;
+ mlen += off;
+ off = 0;
+ totlen += mlen;
+ if (len == 0)
+ break;
+ if (m->m_next == 0) {
+ n = m_get(M_DONTWAIT, m->m_type);
+ if (n == 0)
+ break;
+ n->m_len = min(MLEN, len);
+ m->m_next = n;
+ }
+ m = m->m_next;
+ }
+out:
+#if 0
+ if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
+ m->m_pkthdr.len = totlen;
+#endif
+ return;
+}
+#endif /* (_KERNEL) && ( ((BSD < 199103) && !MENTAT) || __sgi) */
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_findgroup */
+/* Returns: frgroup_t * - NULL = group not found, else pointer to group */
+/* Parameters: group(I) - group name to search for */
+/* unit(I) - device to which this group belongs */
+/* set(I) - which set of rules (inactive/inactive) this is */
+/* fgpp(O) - pointer to place to store pointer to the pointer */
+/* to where to add the next (last) group or where */
+/* to delete group from. */
+/* */
+/* Search amongst the defined groups for a particular group number. */
+/* ------------------------------------------------------------------------ */
+frgroup_t *fr_findgroup(group, unit, set, fgpp)
+char *group;
+minor_t unit;
+int set;
+frgroup_t ***fgpp;
+{
+ frgroup_t *fg, **fgp;
+
+ /*
+ * Which list of groups to search in is dependent on which list of
+ * rules are being operated on.
+ */
+ fgp = &ipfgroups[unit][set];
+
+ while ((fg = *fgp) != NULL) {
+ if (strncmp(group, fg->fg_name, FR_GROUPLEN) == 0)
+ break;
+ else
+ fgp = &fg->fg_next;
+ }
+ if (fgpp != NULL)
+ *fgpp = fgp;
+ return fg;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_addgroup */
+/* Returns: frgroup_t * - NULL == did not create group, */
+/* != NULL == pointer to the group */
+/* Parameters: num(I) - group number to add */
+/* head(I) - rule pointer that is using this as the head */
+/* flags(I) - rule flags which describe the type of rule it is */
+/* unit(I) - device to which this group will belong to */
+/* set(I) - which set of rules (inactive/inactive) this is */
+/* Write Locks: ipf_mutex */
+/* */
+/* Add a new group head, or if it already exists, increase the reference */
+/* count to it. */
+/* ------------------------------------------------------------------------ */
+frgroup_t *fr_addgroup(group, head, flags, unit, set)
+char *group;
+void *head;
+u_32_t flags;
+minor_t unit;
+int set;
+{
+ frgroup_t *fg, **fgp;
+ u_32_t gflags;
+
+ if (group == NULL)
+ return NULL;
+
+ if (unit == IPL_LOGIPF && *group == '\0')
+ return NULL;
+
+ fgp = NULL;
+ gflags = flags & FR_INOUT;
+
+ fg = fr_findgroup(group, unit, set, &fgp);
+ if (fg != NULL) {
+ if (fg->fg_flags == 0)
+ fg->fg_flags = gflags;
+ else if (gflags != fg->fg_flags)
+ return NULL;
+ fg->fg_ref++;
+ return fg;
+ }
+ KMALLOC(fg, frgroup_t *);
+ if (fg != NULL) {
+ fg->fg_head = head;
+ fg->fg_start = NULL;
+ fg->fg_next = *fgp;
+ bcopy(group, fg->fg_name, FR_GROUPLEN);
+ fg->fg_flags = gflags;
+ fg->fg_ref = 1;
+ *fgp = fg;
+ }
+ return fg;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_delgroup */
+/* Returns: Nil */
+/* Parameters: group(I) - group name to delete */
+/* unit(I) - device to which this group belongs */
+/* set(I) - which set of rules (inactive/inactive) this is */
+/* Write Locks: ipf_mutex */
+/* */
+/* Attempt to delete a group head. */
+/* Only do this when its reference count reaches 0. */
+/* ------------------------------------------------------------------------ */
+void fr_delgroup(group, unit, set)
+char *group;
+minor_t unit;
+int set;
+{
+ frgroup_t *fg, **fgp;
+
+ fg = fr_findgroup(group, unit, set, &fgp);
+ if (fg == NULL)
+ return;
+
+ fg->fg_ref--;
+ if (fg->fg_ref == 0) {
+ *fgp = fg->fg_next;
+ KFREE(fg);
+ }
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_getrulen */
+/* Returns: frentry_t * - NULL == not found, else pointer to rule n */
+/* Parameters: unit(I) - device for which to count the rule's number */
+/* flags(I) - which set of rules to find the rule in */
+/* group(I) - group name */
+/* n(I) - rule number to find */
+/* */
+/* Find rule # n in group # g and return a pointer to it. Return NULl if */
+/* group # g doesn't exist or there are less than n rules in the group. */
+/* ------------------------------------------------------------------------ */
+frentry_t *fr_getrulen(unit, group, n)
+int unit;
+char *group;
+u_32_t n;
+{
+ frentry_t *fr;
+ frgroup_t *fg;
+
+ fg = fr_findgroup(group, unit, fr_active, NULL);
+ if (fg == NULL)
+ return NULL;
+ for (fr = fg->fg_head; fr && n; fr = fr->fr_next, n--)
+ ;
+ if (n != 0)
+ return NULL;
+ return fr;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_rulen */
+/* Returns: int - >= 0 - rule number, -1 == search failed */
+/* Parameters: unit(I) - device for which to count the rule's number */
+/* fr(I) - pointer to rule to match */
+/* */
+/* Return the number for a rule on a specific filtering device. */
+/* ------------------------------------------------------------------------ */
+int fr_rulen(unit, fr)
+int unit;
+frentry_t *fr;
+{
+ frentry_t *fh;
+ frgroup_t *fg;
+ u_32_t n = 0;
+
+ if (fr == NULL)
+ return -1;
+ fg = fr_findgroup(fr->fr_group, unit, fr_active, NULL);
+ if (fg == NULL)
+ return -1;
+ for (fh = fg->fg_head; fh; n++, fh = fh->fr_next)
+ if (fh == fr)
+ break;
+ if (fh == NULL)
+ return -1;
+ return n;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frflushlist */
+/* Returns: int - >= 0 - number of flushed rules */
+/* Parameters: set(I) - which set of rules (inactive/inactive) this is */
+/* unit(I) - device for which to flush rules */
+/* flags(I) - which set of rules to flush */
+/* nfreedp(O) - pointer to int where flush count is stored */
+/* listp(I) - pointer to list to flush pointer */
+/* Write Locks: ipf_mutex */
+/* */
+/* Recursively flush rules from the list, descending groups as they are */
+/* encountered. if a rule is the head of a group and it has lost all its */
+/* group members, then also delete the group reference. nfreedp is needed */
+/* to store the accumulating count of rules removed, whereas the returned */
+/* value is just the number removed from the current list. The latter is */
+/* needed to correctly adjust reference counts on rules that define groups. */
+/* */
+/* NOTE: Rules not loaded from user space cannot be flushed. */
+/* ------------------------------------------------------------------------ */
+static int frflushlist(set, unit, nfreedp, listp)
+int set;
+minor_t unit;
+int *nfreedp;
+frentry_t **listp;
+{
+ int freed = 0, i;
+ frentry_t *fp;
+
+ while ((fp = *listp) != NULL) {
+ if ((fp->fr_type & FR_T_BUILTIN) ||
+ !(fp->fr_flags & FR_COPIED)) {
+ listp = &fp->fr_next;
+ continue;
+ }
+ *listp = fp->fr_next;
+ if (fp->fr_grp != NULL) {
+ i = frflushlist(set, unit, nfreedp, fp->fr_grp);
+ fp->fr_ref -= i;
+ }
+
+ if (fp->fr_grhead != NULL) {
+ fr_delgroup(fp->fr_grhead, unit, set);
+ *fp->fr_grhead = '\0';
+ }
+
+ ASSERT(fp->fr_ref > 0);
+ fp->fr_next = NULL;
+ if (fr_derefrule(&fp) == 0)
+ freed++;
+ }
+ *nfreedp += freed;
+ return freed;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frflush */
+/* Returns: int - >= 0 - number of flushed rules */
+/* Parameters: unit(I) - device for which to flush rules */
+/* flags(I) - which set of rules to flush */
+/* */
+/* Calls flushlist() for all filter rules (accounting, firewall - both IPv4 */
+/* and IPv6) as defined by the value of flags. */
+/* ------------------------------------------------------------------------ */
+int frflush(unit, proto, flags)
+minor_t unit;
+int proto, flags;
+{
+ int flushed = 0, set;
+
+ WRITE_ENTER(&ipf_mutex);
+ bzero((char *)frcache, sizeof(frcache));
+
+ set = fr_active;
+ if ((flags & FR_INACTIVE) == FR_INACTIVE)
+ set = 1 - set;
+
+ if (flags & FR_OUTQUE) {
+ if (proto == 0 || proto == 6) {
+ (void) frflushlist(set, unit,
+ &flushed, &ipfilter6[1][set]);
+ (void) frflushlist(set, unit,
+ &flushed, &ipacct6[1][set]);
+ }
+ if (proto == 0 || proto == 4) {
+ (void) frflushlist(set, unit,
+ &flushed, &ipfilter[1][set]);
+ (void) frflushlist(set, unit,
+ &flushed, &ipacct[1][set]);
+ }
+ }
+ if (flags & FR_INQUE) {
+ if (proto == 0 || proto == 6) {
+ (void) frflushlist(set, unit,
+ &flushed, &ipfilter6[0][set]);
+ (void) frflushlist(set, unit,
+ &flushed, &ipacct6[0][set]);
+ }
+ if (proto == 0 || proto == 4) {
+ (void) frflushlist(set, unit,
+ &flushed, &ipfilter[0][set]);
+ (void) frflushlist(set, unit,
+ &flushed, &ipacct[0][set]);
+ }
+ }
+ RWLOCK_EXIT(&ipf_mutex);
+
+ if (unit == IPL_LOGIPF) {
+ int tmp;
+
+ tmp = frflush(IPL_LOGCOUNT, proto, flags);
+ if (tmp >= 0)
+ flushed += tmp;
+ }
+ return flushed;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: memstr */
+/* Returns: char * - NULL if failed, != NULL pointer to matching bytes */
+/* Parameters: src(I) - pointer to byte sequence to match */
+/* dst(I) - pointer to byte sequence to search */
+/* slen(I) - match length */
+/* dlen(I) - length available to search in */
+/* */
+/* Search dst for a sequence of bytes matching those at src and extend for */
+/* slen bytes. */
+/* ------------------------------------------------------------------------ */
+char *memstr(src, dst, slen, dlen)
+char *src, *dst;
+int slen, dlen;
+{
+ char *s = NULL;
+
+ while (dlen >= slen) {
+ if (bcmp(src, dst, slen) == 0) {
+ s = dst;
+ break;
+ }
+ dst++;
+ dlen--;
+ }
+ return s;
+}
+/* ------------------------------------------------------------------------ */
+/* Function: fr_fixskip */
+/* Returns: Nil */
+/* Parameters: listp(IO) - pointer to start of list with skip rule */
+/* rp(I) - rule added/removed with skip in it. */
+/* addremove(I) - adjustment (-1/+1) to make to skip count, */
+/* depending on whether a rule was just added */
+/* or removed. */
+/* */
+/* Adjust all the rules in a list which would have skip'd past the position */
+/* where we are inserting to skip to the right place given the change. */
+/* ------------------------------------------------------------------------ */
+void fr_fixskip(listp, rp, addremove)
+frentry_t **listp, *rp;
+int addremove;
+{
+ int rules, rn;
+ frentry_t *fp;
+
+ rules = 0;
+ for (fp = *listp; (fp != NULL) && (fp != rp); fp = fp->fr_next)
+ rules++;
+
+ if (!fp)
+ return;
+
+ for (rn = 0, fp = *listp; fp && (fp != rp); fp = fp->fr_next, rn++)
+ if (FR_ISSKIP(fp->fr_flags) && (rn + fp->fr_arg >= rules))
+ fp->fr_arg += addremove;
+}
+
+
+#ifdef _KERNEL
+/* ------------------------------------------------------------------------ */
+/* Function: count4bits */
+/* Returns: int - >= 0 - number of consecutive bits in input */
+/* Parameters: ip(I) - 32bit IP address */
+/* */
+/* IPv4 ONLY */
+/* count consecutive 1's in bit mask. If the mask generated by counting */
+/* consecutive 1's is different to that passed, return -1, else return # */
+/* of bits. */
+/* ------------------------------------------------------------------------ */
+int count4bits(ip)
+u_32_t ip;
+{
+ u_32_t ipn;
+ int cnt = 0, i, j;
+
+ ip = ipn = ntohl(ip);
+ for (i = 32; i; i--, ipn *= 2)
+ if (ipn & 0x80000000)
+ cnt++;
+ else
+ break;
+ ipn = 0;
+ for (i = 32, j = cnt; i; i--, j--) {
+ ipn *= 2;
+ if (j > 0)
+ ipn++;
+ }
+ if (ipn == ip)
+ return cnt;
+ return -1;
+}
+
+
+#ifdef USE_INET6
+/* ------------------------------------------------------------------------ */
+/* Function: count6bits */
+/* Returns: int - >= 0 - number of consecutive bits in input */
+/* Parameters: msk(I) - pointer to start of IPv6 bitmask */
+/* */
+/* IPv6 ONLY */
+/* count consecutive 1's in bit mask. */
+/* ------------------------------------------------------------------------ */
+int count6bits(msk)
+u_32_t *msk;
+{
+ int i = 0, k;
+ u_32_t j;
+
+ for (k = 3; k >= 0; k--)
+ if (msk[k] == 0xffffffff)
+ i += 32;
+ else {
+ for (j = msk[k]; j; j <<= 1)
+ if (j & 0x80000000)
+ i++;
+ }
+ return i;
+}
+# endif
+#endif /* _KERNEL */
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frsynclist */
+/* Returns: void */
+/* Parameters: fr(I) - start of filter list to sync interface names for */
+/* ifp(I) - interface pointer for limiting sync lookups */
+/* Write Locks: ipf_mutex */
+/* */
+/* Walk through a list of filter rules and resolve any interface names into */
+/* pointers. Where dynamic addresses are used, also update the IP address */
+/* used in the rule. The interface pointer is used to limit the lookups to */
+/* a specific set of matching names if it is non-NULL. */
+/* ------------------------------------------------------------------------ */
+static void frsynclist(fr, ifp)
+frentry_t *fr;
+void *ifp;
+{
+ frdest_t *fdp;
+ int v, i;
+
+ for (; fr; fr = fr->fr_next) {
+ v = fr->fr_v;
+
+ /*
+ * Lookup all the interface names that are part of the rule.
+ */
+ for (i = 0; i < 4; i++) {
+ if ((ifp != NULL) && (fr->fr_ifas[i] != ifp))
+ continue;
+ fr->fr_ifas[i] = fr_resolvenic(fr->fr_ifnames[i], v);
+ }
+
+ if (fr->fr_type == FR_T_IPF) {
+ if (fr->fr_satype != FRI_NORMAL &&
+ fr->fr_satype != FRI_LOOKUP) {
+ (void)fr_ifpaddr(v, fr->fr_satype,
+ fr->fr_ifas[fr->fr_sifpidx],
+ &fr->fr_src, &fr->fr_smsk);
+ }
+ if (fr->fr_datype != FRI_NORMAL &&
+ fr->fr_datype != FRI_LOOKUP) {
+ (void)fr_ifpaddr(v, fr->fr_datype,
+ fr->fr_ifas[fr->fr_difpidx],
+ &fr->fr_dst, &fr->fr_dmsk);
+ }
+ }
+
+ fdp = &fr->fr_tifs[0];
+ if ((ifp == NULL) || (fdp->fd_ifp == ifp))
+ fr_resolvedest(fdp, v);
+
+ fdp = &fr->fr_tifs[1];
+ if ((ifp == NULL) || (fdp->fd_ifp == ifp))
+ fr_resolvedest(fdp, v);
+
+ fdp = &fr->fr_dif;
+ if ((ifp == NULL) || (fdp->fd_ifp == ifp)) {
+ fr_resolvedest(fdp, v);
+
+ fr->fr_flags &= ~FR_DUP;
+ if ((fdp->fd_ifp != (void *)-1) &&
+ (fdp->fd_ifp != NULL))
+ fr->fr_flags |= FR_DUP;
+ }
+
+#ifdef IPFILTER_LOOKUP
+ if (fr->fr_type == FR_T_IPF && fr->fr_satype == FRI_LOOKUP &&
+ fr->fr_srcptr == NULL) {
+ fr->fr_srcptr = fr_resolvelookup(fr->fr_srctype,
+ fr->fr_srcnum,
+ &fr->fr_srcfunc);
+ }
+ if (fr->fr_type == FR_T_IPF && fr->fr_datype == FRI_LOOKUP &&
+ fr->fr_dstptr == NULL) {
+ fr->fr_dstptr = fr_resolvelookup(fr->fr_dsttype,
+ fr->fr_dstnum,
+ &fr->fr_dstfunc);
+ }
+#endif
+ }
+}
+
+
+#ifdef _KERNEL
+/* ------------------------------------------------------------------------ */
+/* Function: frsync */
+/* Returns: void */
+/* Parameters: Nil */
+/* */
+/* frsync() is called when we suspect that the interface list or */
+/* information about interfaces (like IP#) has changed. Go through all */
+/* filter rules, NAT entries and the state table and check if anything */
+/* needs to be changed/updated. */
+/* ------------------------------------------------------------------------ */
+void frsync(ifp)
+void *ifp;
+{
+ int i;
+
+# if !SOLARIS
+ fr_natsync(ifp);
+ fr_statesync(ifp);
+# endif
+
+ WRITE_ENTER(&ipf_mutex);
+ frsynclist(ipacct[0][fr_active], ifp);
+ frsynclist(ipacct[1][fr_active], ifp);
+ frsynclist(ipfilter[0][fr_active], ifp);
+ frsynclist(ipfilter[1][fr_active], ifp);
+ frsynclist(ipacct6[0][fr_active], ifp);
+ frsynclist(ipacct6[1][fr_active], ifp);
+ frsynclist(ipfilter6[0][fr_active], ifp);
+ frsynclist(ipfilter6[1][fr_active], ifp);
+
+ for (i = 0; i < IPL_LOGSIZE; i++) {
+ frgroup_t *g;
+
+ for (g = ipfgroups[i][0]; g != NULL; g = g->fg_next)
+ frsynclist(g->fg_start, ifp);
+ for (g = ipfgroups[i][1]; g != NULL; g = g->fg_next)
+ frsynclist(g->fg_start, ifp);
+ }
+ RWLOCK_EXIT(&ipf_mutex);
+}
+
+
+/*
+ * In the functions below, bcopy() is called because the pointer being
+ * copied _from_ in this instance is a pointer to a char buf (which could
+ * end up being unaligned) and on the kernel's local stack.
+ */
+/* ------------------------------------------------------------------------ */
+/* Function: copyinptr */
+/* Returns: int - 0 = success, else failure */
+/* Parameters: src(I) - pointer to the source address */
+/* dst(I) - destination address */
+/* size(I) - number of bytes to copy */
+/* */
+/* Copy a block of data in from user space, given a pointer to the pointer */
+/* to start copying from (src) and a pointer to where to store it (dst). */
+/* NB: src - pointer to user space pointer, dst - kernel space pointer */
+/* ------------------------------------------------------------------------ */
+int copyinptr(src, dst, size)
+void *src, *dst;
+size_t size;
+{
+ caddr_t ca;
+ int err;
+
+# if SOLARIS
+ err = COPYIN(src, (caddr_t)&ca, sizeof(ca));
+ if (err != 0)
+ return err;
+# else
+ bcopy(src, (caddr_t)&ca, sizeof(ca));
+# endif
+ err = COPYIN(ca, dst, size);
+ return err;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: copyoutptr */
+/* Returns: int - 0 = success, else failure */
+/* Parameters: src(I) - pointer to the source address */
+/* dst(I) - destination address */
+/* size(I) - number of bytes to copy */
+/* */
+/* Copy a block of data out to user space, given a pointer to the pointer */
+/* to start copying from (src) and a pointer to where to store it (dst). */
+/* NB: src - kernel space pointer, dst - pointer to user space pointer. */
+/* ------------------------------------------------------------------------ */
+int copyoutptr(src, dst, size)
+void *src, *dst;
+size_t size;
+{
+ caddr_t ca;
+ int err;
+
+# if SOLARIS
+ err = COPYIN(dst, (caddr_t)&ca, sizeof(ca));
+ if (err != 0)
+ return err;
+# else
+ bcopy(dst, (caddr_t)&ca, sizeof(ca));
+# endif
+ err = COPYOUT(src, ca, size);
+ return err;
+}
+#endif
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_lock */
+/* Returns: (void) */
+/* Parameters: data(I) - pointer to lock value to set */
+/* lockp(O) - pointer to location to store old lock value */
+/* */
+/* Get the new value for the lock integer, set it and return the old value */
+/* in *lockp. */
+/* ------------------------------------------------------------------------ */
+void fr_lock(data, lockp)
+caddr_t data;
+int *lockp;
+{
+ int arg;
+
+ BCOPYIN(data, (caddr_t)&arg, sizeof(arg));
+ BCOPYOUT((caddr_t)lockp, data, sizeof(*lockp));
+ *lockp = arg;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_getstat */
+/* Returns: Nil */
+/* Parameters: fiop(I) - pointer to ipfilter stats structure */
+/* */
+/* Stores a copy of current pointers, counters, etc, in the friostat */
+/* structure. */
+/* ------------------------------------------------------------------------ */
+void fr_getstat(fiop)
+friostat_t *fiop;
+{
+ int i, j;
+
+ bcopy((char *)frstats, (char *)fiop->f_st, sizeof(filterstats_t) * 2);
+ fiop->f_locks[IPL_LOGSTATE] = fr_state_lock;
+ fiop->f_locks[IPL_LOGNAT] = fr_nat_lock;
+ fiop->f_locks[IPL_LOGIPF] = fr_frag_lock;
+ fiop->f_locks[IPL_LOGAUTH] = fr_auth_lock;
+
+ for (i = 0; i < 2; i++)
+ for (j = 0; j < 2; j++) {
+ fiop->f_ipf[i][j] = ipfilter[i][j];
+ fiop->f_acct[i][j] = ipacct[i][j];
+ fiop->f_ipf6[i][j] = ipfilter6[i][j];
+ fiop->f_acct6[i][j] = ipacct6[i][j];
+ }
+
+ fiop->f_ticks = fr_ticks;
+ fiop->f_active = fr_active;
+ fiop->f_froute[0] = fr_frouteok[0];
+ fiop->f_froute[1] = fr_frouteok[1];
+
+ fiop->f_running = fr_running;
+ for (i = 0; i < IPL_LOGSIZE; i++) {
+ fiop->f_groups[i][0] = ipfgroups[i][0];
+ fiop->f_groups[i][1] = ipfgroups[i][1];
+ }
+#ifdef IPFILTER_LOG
+ fiop->f_logging = 1;
+#else
+ fiop->f_logging = 0;
+#endif
+ fiop->f_defpass = fr_pass;
+ fiop->f_features = fr_features;
+ (void) strncpy(fiop->f_version, ipfilter_version,
+ sizeof(fiop->f_version));
+}
+
+
+#ifdef USE_INET6
+int icmptoicmp6types[ICMP_MAXTYPE+1] = {
+ ICMP6_ECHO_REPLY, /* 0: ICMP_ECHOREPLY */
+ -1, /* 1: UNUSED */
+ -1, /* 2: UNUSED */
+ ICMP6_DST_UNREACH, /* 3: ICMP_UNREACH */
+ -1, /* 4: ICMP_SOURCEQUENCH */
+ ND_REDIRECT, /* 5: ICMP_REDIRECT */
+ -1, /* 6: UNUSED */
+ -1, /* 7: UNUSED */
+ ICMP6_ECHO_REQUEST, /* 8: ICMP_ECHO */
+ -1, /* 9: UNUSED */
+ -1, /* 10: UNUSED */
+ ICMP6_TIME_EXCEEDED, /* 11: ICMP_TIMXCEED */
+ ICMP6_PARAM_PROB, /* 12: ICMP_PARAMPROB */
+ -1, /* 13: ICMP_TSTAMP */
+ -1, /* 14: ICMP_TSTAMPREPLY */
+ -1, /* 15: ICMP_IREQ */
+ -1, /* 16: ICMP_IREQREPLY */
+ -1, /* 17: ICMP_MASKREQ */
+ -1, /* 18: ICMP_MASKREPLY */
+};
+
+
+int icmptoicmp6unreach[ICMP_MAX_UNREACH] = {
+ ICMP6_DST_UNREACH_ADDR, /* 0: ICMP_UNREACH_NET */
+ ICMP6_DST_UNREACH_ADDR, /* 1: ICMP_UNREACH_HOST */
+ -1, /* 2: ICMP_UNREACH_PROTOCOL */
+ ICMP6_DST_UNREACH_NOPORT, /* 3: ICMP_UNREACH_PORT */
+ -1, /* 4: ICMP_UNREACH_NEEDFRAG */
+ ICMP6_DST_UNREACH_NOTNEIGHBOR, /* 5: ICMP_UNREACH_SRCFAIL */
+ ICMP6_DST_UNREACH_ADDR, /* 6: ICMP_UNREACH_NET_UNKNOWN */
+ ICMP6_DST_UNREACH_ADDR, /* 7: ICMP_UNREACH_HOST_UNKNOWN */
+ -1, /* 8: ICMP_UNREACH_ISOLATED */
+ ICMP6_DST_UNREACH_ADMIN, /* 9: ICMP_UNREACH_NET_PROHIB */
+ ICMP6_DST_UNREACH_ADMIN, /* 10: ICMP_UNREACH_HOST_PROHIB */
+ -1, /* 11: ICMP_UNREACH_TOSNET */
+ -1, /* 12: ICMP_UNREACH_TOSHOST */
+ ICMP6_DST_UNREACH_ADMIN, /* 13: ICMP_UNREACH_ADMIN_PROHIBIT */
+};
+int icmpreplytype6[ICMP6_MAXTYPE + 1];
+#endif
+
+int icmpreplytype4[ICMP_MAXTYPE + 1];
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_matchicmpqueryreply */
+/* Returns: int - 1 if "icmp" is a valid reply to "ic" else 0. */
+/* Parameters: v(I) - IP protocol version (4 or 6) */
+/* ic(I) - ICMP information */
+/* icmp(I) - ICMP packet header */
+/* rev(I) - direction (0 = forward/1 = reverse) of packet */
+/* */
+/* Check if the ICMP packet defined by the header pointed to by icmp is a */
+/* reply to one as described by what's in ic. If it is a match, return 1, */
+/* else return 0 for no match. */
+/* ------------------------------------------------------------------------ */
+int fr_matchicmpqueryreply(v, ic, icmp, rev)
+int v;
+icmpinfo_t *ic;
+icmphdr_t *icmp;
+int rev;
+{
+ int ictype;
+
+ ictype = ic->ici_type;
+
+ if (v == 4) {
+ /*
+ * If we matched its type on the way in, then when going out
+ * it will still be the same type.
+ */
+ if ((!rev && (icmp->icmp_type == ictype)) ||
+ (rev && (icmpreplytype4[ictype] == icmp->icmp_type))) {
+ if (icmp->icmp_type != ICMP_ECHOREPLY)
+ return 1;
+ if (icmp->icmp_id == ic->ici_id)
+ return 1;
+ }
+ }
+#ifdef USE_INET6
+ else if (v == 6) {
+ if ((!rev && (icmp->icmp_type == ictype)) ||
+ (rev && (icmpreplytype6[ictype] == icmp->icmp_type))) {
+ if (icmp->icmp_type != ICMP6_ECHO_REPLY)
+ return 1;
+ if (icmp->icmp_id == ic->ici_id)
+ return 1;
+ }
+ }
+#endif
+ return 0;
+}
+
+
+#ifdef IPFILTER_LOOKUP
+/* ------------------------------------------------------------------------ */
+/* Function: fr_resolvelookup */
+/* Returns: void * - NULL = failure, else success. */
+/* Parameters: type(I) - type of lookup these parameters are for. */
+/* number(I) - table number to use when searching */
+/* funcptr(IO) - pointer to pointer for storing IP address */
+/* searching function. */
+/* */
+/* Search for the "table" number passed in amongst those configured for */
+/* that particular type. If the type is recognised then the function to */
+/* call to do the IP address search will be change, regardless of whether */
+/* or not the "table" number exists. */
+/* ------------------------------------------------------------------------ */
+static void *fr_resolvelookup(type, number, funcptr)
+u_int type, number;
+lookupfunc_t *funcptr;
+{
+ char name[FR_GROUPLEN];
+ iphtable_t *iph;
+ ip_pool_t *ipo;
+ void *ptr;
+
+#if defined(SNPRINTF) && defined(_KERNEL)
+ (void) SNPRINTF(name, sizeof(name), "%u", number);
+#else
+ (void) sprintf(name, "%u", number);
+#endif
+
+ READ_ENTER(&ip_poolrw);
+
+ switch (type)
+ {
+ case IPLT_POOL :
+# if (defined(__osf__) && defined(_KERNEL))
+ ptr = NULL;
+ *funcptr = NULL;
+# else
+ ipo = ip_pool_find(IPL_LOGIPF, name);
+ ptr = ipo;
+ if (ipo != NULL) {
+ ATOMIC_INC32(ipo->ipo_ref);
+ }
+ *funcptr = ip_pool_search;
+# endif
+ break;
+ case IPLT_HASH :
+ iph = fr_findhtable(IPL_LOGIPF, name);
+ ptr = iph;
+ if (iph != NULL) {
+ ATOMIC_INC32(iph->iph_ref);
+ }
+ *funcptr = fr_iphmfindip;
+ break;
+ default:
+ ptr = NULL;
+ *funcptr = NULL;
+ break;
+ }
+ RWLOCK_EXIT(&ip_poolrw);
+
+ return ptr;
+}
+#endif
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: frrequest */
+/* Returns: int - 0 == success, > 0 == errno value */
+/* Parameters: unit(I) - device for which this is for */
+/* req(I) - ioctl command (SIOC*) */
+/* data(I) - pointr to ioctl data */
+/* set(I) - 1 or 0 (filter set) */
+/* makecopy(I) - flag indicating whether data points to a rule */
+/* in kernel space & hence doesn't need copying. */
+/* */
+/* This function handles all the requests which operate on the list of */
+/* filter rules. This includes adding, deleting, insertion. It is also */
+/* responsible for creating groups when a "head" rule is loaded. Interface */
+/* names are resolved here and other sanity checks are made on the content */
+/* of the rule structure being loaded. If a rule has user defined timeouts */
+/* then make sure they are created and initialised before exiting. */
+/* ------------------------------------------------------------------------ */
+int frrequest(unit, req, data, set, makecopy)
+int unit;
+ioctlcmd_t req;
+int set, makecopy;
+caddr_t data;
+{
+ frentry_t frd, *fp, *f, **fprev, **ftail;
+ int error = 0, in, v;
+ void *ptr, *uptr;
+ u_int *p, *pp;
+ frgroup_t *fg;
+ char *group;
+
+ fg = NULL;
+ fp = &frd;
+ if (makecopy != 0) {
+ error = fr_inobj(data, fp, IPFOBJ_FRENTRY);
+ if (error)
+ return EFAULT;
+ if ((fp->fr_flags & FR_T_BUILTIN) != 0)
+ return EINVAL;
+ fp->fr_ref = 0;
+ fp->fr_flags |= FR_COPIED;
+ } else {
+ fp = (frentry_t *)data;
+ if ((fp->fr_type & FR_T_BUILTIN) == 0)
+ return EINVAL;
+ fp->fr_flags &= ~FR_COPIED;
+ }
+
+ if (((fp->fr_dsize == 0) && (fp->fr_data != NULL)) ||
+ ((fp->fr_dsize != 0) && (fp->fr_data == NULL)))
+ return EINVAL;
+
+ v = fp->fr_v;
+ uptr = fp->fr_data;
+
+ /*
+ * Only filter rules for IPv4 or IPv6 are accepted.
+ */
+ if (v == 4)
+ /*EMPTY*/;
+#ifdef USE_INET6
+ else if (v == 6)
+ /*EMPTY*/;
+#endif
+ else {
+ return EINVAL;
+ }
+
+ /*
+ * If the rule is being loaded from user space, i.e. we had to copy it
+ * into kernel space, then do not trust the function pointer in the
+ * rule.
+ */
+ if ((makecopy == 1) && (fp->fr_func != NULL)) {
+ if (fr_findfunc(fp->fr_func) == NULL)
+ return ESRCH;
+ error = fr_funcinit(fp);
+ if (error != 0)
+ return error;
+ }
+
+ ptr = NULL;
+ /*
+ * Check that the group number does exist and that its use (in/out)
+ * matches what the rule is.
+ */
+ if (!strncmp(fp->fr_grhead, "0", FR_GROUPLEN))
+ *fp->fr_grhead = '\0';
+ group = fp->fr_group;
+ if (!strncmp(group, "0", FR_GROUPLEN))
+ *group = '\0';
+
+ if (FR_ISACCOUNT(fp->fr_flags))
+ unit = IPL_LOGCOUNT;
+
+ if ((req != (int)SIOCZRLST) && (*group != '\0')) {
+ fg = fr_findgroup(group, unit, set, NULL);
+ if (fg == NULL)
+ return ESRCH;
+ if (fg->fg_flags == 0)
+ fg->fg_flags = fp->fr_flags & FR_INOUT;
+ else if (fg->fg_flags != (fp->fr_flags & FR_INOUT))
+ return ESRCH;
+ }
+
+ in = (fp->fr_flags & FR_INQUE) ? 0 : 1;
+
+ /*
+ * Work out which rule list this change is being applied to.
+ */
+ ftail = NULL;
+ fprev = NULL;
+ if (unit == IPL_LOGAUTH)
+ fprev = &ipauth;
+ else if (v == 4) {
+ if (FR_ISACCOUNT(fp->fr_flags))
+ fprev = &ipacct[in][set];
+ else if ((fp->fr_flags & (FR_OUTQUE|FR_INQUE)) != 0)
+ fprev = &ipfilter[in][set];
+ } else if (v == 6) {
+ if (FR_ISACCOUNT(fp->fr_flags))
+ fprev = &ipacct6[in][set];
+ else if ((fp->fr_flags & (FR_OUTQUE|FR_INQUE)) != 0)
+ fprev = &ipfilter6[in][set];
+ }
+ if (fprev == NULL)
+ return ESRCH;
+
+ if (*group != '\0') {
+ if (!fg && !(fg = fr_findgroup(group, unit, set, NULL)))
+ return ESRCH;
+ fprev = &fg->fg_start;
+ }
+
+ ftail = fprev;
+ for (f = *ftail; (f = *ftail) != NULL; ftail = &f->fr_next) {
+ if (fp->fr_collect <= f->fr_collect) {
+ ftail = fprev;
+ f = NULL;
+ break;
+ }
+ fprev = ftail;
+ }
+
+ /*
+ * Copy in extra data for the rule.
+ */
+ if (fp->fr_dsize != 0) {
+ if (makecopy != 0) {
+ KMALLOCS(ptr, void *, fp->fr_dsize);
+ if (!ptr)
+ return ENOMEM;
+ error = COPYIN(uptr, ptr, fp->fr_dsize);
+ } else {
+ ptr = uptr;
+ error = 0;
+ }
+ if (error != 0) {
+ KFREES(ptr, fp->fr_dsize);
+ return ENOMEM;
+ }
+ fp->fr_data = ptr;
+ } else
+ fp->fr_data = NULL;
+
+ /*
+ * Perform per-rule type sanity checks of their members.
+ */
+ switch (fp->fr_type & ~FR_T_BUILTIN)
+ {
+#if defined(IPFILTER_BPF)
+ case FR_T_BPFOPC :
+ if (fp->fr_dsize == 0)
+ return EINVAL;
+ if (!bpf_validate(ptr, fp->fr_dsize/sizeof(struct bpf_insn))) {
+ if (makecopy && fp->fr_data != NULL) {
+ KFREES(fp->fr_data, fp->fr_dsize);
+ }
+ return EINVAL;
+ }
+ break;
+#endif
+ case FR_T_IPF :
+ if (fp->fr_dsize != sizeof(fripf_t))
+ return EINVAL;
+
+ /*
+ * Allowing a rule with both "keep state" and "with oow" is
+ * pointless because adding a state entry to the table will
+ * fail with the out of window (oow) flag set.
+ */
+ if ((fp->fr_flags & FR_KEEPSTATE) && (fp->fr_flx & FI_OOW))
+ return EINVAL;
+
+ switch (fp->fr_satype)
+ {
+ case FRI_BROADCAST :
+ case FRI_DYNAMIC :
+ case FRI_NETWORK :
+ case FRI_NETMASKED :
+ case FRI_PEERADDR :
+ if (fp->fr_sifpidx < 0 || fp->fr_sifpidx > 3) {
+ if (makecopy && fp->fr_data != NULL) {
+ KFREES(fp->fr_data, fp->fr_dsize);
+ }
+ return EINVAL;
+ }
+ break;
+#ifdef IPFILTER_LOOKUP
+ case FRI_LOOKUP :
+ fp->fr_srcptr = fr_resolvelookup(fp->fr_srctype,
+ fp->fr_srcnum,
+ &fp->fr_srcfunc);
+ break;
+#endif
+ default :
+ break;
+ }
+
+ switch (fp->fr_datype)
+ {
+ case FRI_BROADCAST :
+ case FRI_DYNAMIC :
+ case FRI_NETWORK :
+ case FRI_NETMASKED :
+ case FRI_PEERADDR :
+ if (fp->fr_difpidx < 0 || fp->fr_difpidx > 3) {
+ if (makecopy && fp->fr_data != NULL) {
+ KFREES(fp->fr_data, fp->fr_dsize);
+ }
+ return EINVAL;
+ }
+ break;
+#ifdef IPFILTER_LOOKUP
+ case FRI_LOOKUP :
+ fp->fr_dstptr = fr_resolvelookup(fp->fr_dsttype,
+ fp->fr_dstnum,
+ &fp->fr_dstfunc);
+ break;
+#endif
+ default :
+ break;
+ }
+ break;
+ case FR_T_NONE :
+ break;
+ case FR_T_CALLFUNC :
+ break;
+ case FR_T_COMPIPF :
+ break;
+ default :
+ if (makecopy && fp->fr_data != NULL) {
+ KFREES(fp->fr_data, fp->fr_dsize);
+ }
+ return EINVAL;
+ }
+
+ /*
+ * Lookup all the interface names that are part of the rule.
+ */
+ frsynclist(fp, NULL);
+ fp->fr_statecnt = 0;
+
+ /*
+ * Look for an existing matching filter rule, but don't include the
+ * next or interface pointer in the comparison (fr_next, fr_ifa).
+ * This elminates rules which are indentical being loaded. Checksum
+ * the constant part of the filter rule to make comparisons quicker
+ * (this meaning no pointers are included).
+ */
+ for (fp->fr_cksum = 0, p = (u_int *)&fp->fr_func, pp = &fp->fr_cksum;
+ p < pp; p++)
+ fp->fr_cksum += *p;
+ pp = (u_int *)(fp->fr_caddr + fp->fr_dsize);
+ for (p = (u_int *)fp->fr_data; p < pp; p++)
+ fp->fr_cksum += *p;
+
+ WRITE_ENTER(&ipf_mutex);
+ bzero((char *)frcache, sizeof(frcache));
+
+ for (; (f = *ftail) != NULL; ftail = &f->fr_next) {
+ if ((fp->fr_cksum != f->fr_cksum) ||
+ (f->fr_dsize != fp->fr_dsize))
+ continue;
+ if (bcmp((char *)&f->fr_func, (char *)&fp->fr_func, FR_CMPSIZ))
+ continue;
+ if ((!ptr && !f->fr_data) ||
+ (ptr && f->fr_data &&
+ !bcmp((char *)ptr, (char *)f->fr_data, f->fr_dsize)))
+ break;
+ }
+
+ /*
+ * If zero'ing statistics, copy current to caller and zero.
+ */
+ if (req == (ioctlcmd_t)SIOCZRLST) {
+ if (f == NULL)
+ error = ESRCH;
+ else {
+ /*
+ * Copy and reduce lock because of impending copyout.
+ * Well we should, but if we do then the atomicity of
+ * this call and the correctness of fr_hits and
+ * fr_bytes cannot be guaranteed. As it is, this code
+ * only resets them to 0 if they are successfully
+ * copied out into user space.
+ */
+ bcopy((char *)f, (char *)fp, sizeof(*f));
+ /* MUTEX_DOWNGRADE(&ipf_mutex); */
+
+ /*
+ * When we copy this rule back out, set the data
+ * pointer to be what it was in user space.
+ */
+ fp->fr_data = uptr;
+ error = fr_outobj(data, fp, IPFOBJ_FRENTRY);
+
+ if (error == 0) {
+ if ((f->fr_dsize != 0) && (uptr != NULL))
+ error = COPYOUT(f->fr_data, uptr,
+ f->fr_dsize);
+ if (error == 0) {
+ f->fr_hits = 0;
+ f->fr_bytes = 0;
+ }
+ }
+ }
+
+ if ((ptr != NULL) && (makecopy != 0)) {
+ KFREES(ptr, fp->fr_dsize);
+ }
+ RWLOCK_EXIT(&ipf_mutex);
+ return error;
+ }
+
+ if (!f) {
+ /*
+ * At the end of this, ftail must point to the place where the
+ * new rule is to be saved/inserted/added.
+ * For SIOCAD*FR, this should be the last rule in the group of
+ * rules that have equal fr_collect fields.
+ * For SIOCIN*FR, ...
+ */
+ if (req == (ioctlcmd_t)SIOCADAFR ||
+ req == (ioctlcmd_t)SIOCADIFR) {
+
+ for (ftail = fprev; (f = *ftail) != NULL; ) {
+ if (f->fr_collect > fp->fr_collect)
+ break;
+ ftail = &f->fr_next;
+ }
+ f = NULL;
+ ptr = NULL;
+ error = 0;
+ } else if (req == (ioctlcmd_t)SIOCINAFR ||
+ req == (ioctlcmd_t)SIOCINIFR) {
+ while ((f = *fprev) != NULL) {
+ if (f->fr_collect >= fp->fr_collect)
+ break;
+ fprev = &f->fr_next;
+ }
+ ftail = fprev;
+ if (fp->fr_hits != 0) {
+ while (fp->fr_hits && (f = *ftail)) {
+ if (f->fr_collect != fp->fr_collect)
+ break;
+ fprev = ftail;
+ ftail = &f->fr_next;
+ fp->fr_hits--;
+ }
+ }
+ f = NULL;
+ ptr = NULL;
+ error = 0;
+ }
+ }
+
+ /*
+ * Request to remove a rule.
+ */
+ if (req == (ioctlcmd_t)SIOCRMAFR || req == (ioctlcmd_t)SIOCRMIFR) {
+ if (!f)
+ error = ESRCH;
+ else {
+ /*
+ * Do not allow activity from user space to interfere
+ * with rules not loaded that way.
+ */
+ if ((makecopy == 1) && !(f->fr_flags & FR_COPIED)) {
+ error = EPERM;
+ goto done;
+ }
+
+ /*
+ * Return EBUSY if the rule is being reference by
+ * something else (eg state information.
+ */
+ if (f->fr_ref > 1) {
+ error = EBUSY;
+ goto done;
+ }
+#ifdef IPFILTER_SCAN
+ if (f->fr_isctag[0] != '\0' &&
+ (f->fr_isc != (struct ipscan *)-1))
+ ipsc_detachfr(f);
+#endif
+ if ((fg != NULL) && (fg->fg_head != NULL))
+ fg->fg_head->fr_ref--;
+ if (unit == IPL_LOGAUTH) {
+ error = fr_preauthcmd(req, f, ftail);
+ goto done;
+ }
+ if (*f->fr_grhead != '\0')
+ fr_delgroup(f->fr_grhead, unit, set);
+ fr_fixskip(ftail, f, -1);
+ *ftail = f->fr_next;
+ f->fr_next = NULL;
+ (void)fr_derefrule(&f);
+ }
+ } else {
+ /*
+ * Not removing, so we must be adding/inserting a rule.
+ */
+ if (f)
+ error = EEXIST;
+ else {
+ if (unit == IPL_LOGAUTH) {
+ error = fr_preauthcmd(req, fp, ftail);
+ goto done;
+ }
+ if (makecopy) {
+ KMALLOC(f, frentry_t *);
+ } else
+ f = fp;
+ if (f != NULL) {
+ if (fg != NULL && fg->fg_head != NULL)
+ fg->fg_head->fr_ref++;
+ if (fp != f)
+ bcopy((char *)fp, (char *)f,
+ sizeof(*f));
+ MUTEX_NUKE(&f->fr_lock);
+ MUTEX_INIT(&f->fr_lock, "filter rule lock");
+#ifdef IPFILTER_SCAN
+ if (f->fr_isctag[0] != '\0' &&
+ ipsc_attachfr(f))
+ f->fr_isc = (struct ipscan *)-1;
+#endif
+ f->fr_hits = 0;
+ if (makecopy != 0)
+ f->fr_ref = 1;
+ f->fr_next = *ftail;
+ *ftail = f;
+ if (req == (ioctlcmd_t)SIOCINIFR ||
+ req == (ioctlcmd_t)SIOCINAFR)
+ fr_fixskip(ftail, f, 1);
+ f->fr_grp = NULL;
+ group = f->fr_grhead;
+ if (*group != '\0') {
+ fg = fr_addgroup(group, f, f->fr_flags,
+ unit, set);
+ if (fg != NULL)
+ f->fr_grp = &fg->fg_start;
+ }
+ } else
+ error = ENOMEM;
+ }
+ }
+done:
+ RWLOCK_EXIT(&ipf_mutex);
+ if ((ptr != NULL) && (error != 0) && (makecopy != 0)) {
+ KFREES(ptr, fp->fr_dsize);
+ }
+ return (error);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_funcinit */
+/* Returns: int - 0 == success, else ESRCH: cannot resolve rule details */
+/* Parameters: fr(I) - pointer to filter rule */
+/* */
+/* If a rule is a call rule, then check if the function it points to needs */
+/* an init function to be called now the rule has been loaded. */
+/* ------------------------------------------------------------------------ */
+static int fr_funcinit(fr)
+frentry_t *fr;
+{
+ ipfunc_resolve_t *ft;
+ int err;
+
+ err = ESRCH;
+
+ for (ft = fr_availfuncs; ft->ipfu_addr != NULL; ft++)
+ if (ft->ipfu_addr == fr->fr_func) {
+ err = 0;
+ if (ft->ipfu_init != NULL)
+ err = (*ft->ipfu_init)(fr);
+ break;
+ }
+ return err;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_findfunc */
+/* Returns: ipfunc_t - pointer to function if found, else NULL */
+/* Parameters: funcptr(I) - function pointer to lookup */
+/* */
+/* Look for a function in the table of known functions. */
+/* ------------------------------------------------------------------------ */
+static ipfunc_t fr_findfunc(funcptr)
+ipfunc_t funcptr;
+{
+ ipfunc_resolve_t *ft;
+
+ for (ft = fr_availfuncs; ft->ipfu_addr != NULL; ft++)
+ if (ft->ipfu_addr == funcptr)
+ return funcptr;
+ return NULL;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_resolvefunc */
+/* Returns: int - 0 == success, else error */
+/* Parameters: data(IO) - ioctl data pointer to ipfunc_resolve_t struct */
+/* */
+/* Copy in a ipfunc_resolve_t structure and then fill in the missing field. */
+/* This will either be the function name (if the pointer is set) or the */
+/* function pointer if the name is set. When found, fill in the other one */
+/* so that the entire, complete, structure can be copied back to user space.*/
+/* ------------------------------------------------------------------------ */
+int fr_resolvefunc(data)
+void *data;
+{
+ ipfunc_resolve_t res, *ft;
+
+ BCOPYIN(data, &res, sizeof(res));
+
+ if (res.ipfu_addr == NULL && res.ipfu_name[0] != '\0') {
+ for (ft = fr_availfuncs; ft->ipfu_addr != NULL; ft++)
+ if (strncmp(res.ipfu_name, ft->ipfu_name,
+ sizeof(res.ipfu_name)) == 0) {
+ res.ipfu_addr = ft->ipfu_addr;
+ res.ipfu_init = ft->ipfu_init;
+ if (COPYOUT(&res, data, sizeof(res)) != 0)
+ return EFAULT;
+ return 0;
+ }
+ }
+ if (res.ipfu_addr != NULL && res.ipfu_name[0] == '\0') {
+ for (ft = fr_availfuncs; ft->ipfu_addr != NULL; ft++)
+ if (ft->ipfu_addr == res.ipfu_addr) {
+ (void) strncpy(res.ipfu_name, ft->ipfu_name,
+ sizeof(res.ipfu_name));
+ res.ipfu_init = ft->ipfu_init;
+ if (COPYOUT(&res, data, sizeof(res)) != 0)
+ return EFAULT;
+ return 0;
+ }
+ }
+ return ESRCH;
+}
+
+
+#if !defined(_KERNEL) || (!defined(__NetBSD__) && !defined(__OpenBSD__) && !defined(__FreeBSD__)) || \
+ (defined(__FreeBSD__) && (__FreeBSD_version < 490000)) || \
+ (defined(__NetBSD__) && (__NetBSD_Version__ < 105000000)) || \
+ (defined(__OpenBSD__) && (OpenBSD < 200006))
+/*
+ * From: NetBSD
+ * ppsratecheck(): packets (or events) per second limitation.
+ */
+int
+ppsratecheck(lasttime, curpps, maxpps)
+ struct timeval *lasttime;
+ int *curpps;
+ int maxpps; /* maximum pps allowed */
+{
+ struct timeval tv, delta;
+ int rv;
+
+ GETKTIME(&tv);
+
+ delta.tv_sec = tv.tv_sec - lasttime->tv_sec;
+ delta.tv_usec = tv.tv_usec - lasttime->tv_usec;
+ if (delta.tv_usec < 0) {
+ delta.tv_sec--;
+ delta.tv_usec += 1000000;
+ }
+
+ /*
+ * check for 0,0 is so that the message will be seen at least once.
+ * if more than one second have passed since the last update of
+ * lasttime, reset the counter.
+ *
+ * we do increment *curpps even in *curpps < maxpps case, as some may
+ * try to use *curpps for stat purposes as well.
+ */
+ if ((lasttime->tv_sec == 0 && lasttime->tv_usec == 0) ||
+ delta.tv_sec >= 1) {
+ *lasttime = tv;
+ *curpps = 0;
+ rv = 1;
+ } else if (maxpps < 0)
+ rv = 1;
+ else if (*curpps < maxpps)
+ rv = 1;
+ else
+ rv = 0;
+ *curpps = *curpps + 1;
+
+ return (rv);
+}
+#endif
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_derefrule */
+/* Returns: int - 0 == rule freed up, else rule not freed */
+/* Parameters: fr(I) - pointer to filter rule */
+/* */
+/* Decrement the reference counter to a rule by one. If it reaches zero, */
+/* free it and any associated storage space being used by it. */
+/* ------------------------------------------------------------------------ */
+int fr_derefrule(frp)
+frentry_t **frp;
+{
+ frentry_t *fr;
+
+ fr = *frp;
+
+ MUTEX_ENTER(&fr->fr_lock);
+ fr->fr_ref--;
+ if (fr->fr_ref == 0) {
+ MUTEX_EXIT(&fr->fr_lock);
+ MUTEX_DESTROY(&fr->fr_lock);
+
+#ifdef IPFILTER_LOOKUP
+ if (fr->fr_type == FR_T_IPF && fr->fr_satype == FRI_LOOKUP)
+ ip_lookup_deref(fr->fr_srctype, fr->fr_srcptr);
+ if (fr->fr_type == FR_T_IPF && fr->fr_datype == FRI_LOOKUP)
+ ip_lookup_deref(fr->fr_dsttype, fr->fr_dstptr);
+#endif
+
+ if (fr->fr_dsize) {
+ KFREES(fr->fr_data, fr->fr_dsize);
+ }
+ if ((fr->fr_flags & FR_COPIED) != 0) {
+ KFREE(fr);
+ return 0;
+ }
+ return 1;
+ } else {
+ MUTEX_EXIT(&fr->fr_lock);
+ }
+ *frp = NULL;
+ return -1;
+}
+
+
+#ifdef IPFILTER_LOOKUP
+/* ------------------------------------------------------------------------ */
+/* Function: fr_grpmapinit */
+/* Returns: int - 0 == success, else ESRCH because table entry not found*/
+/* Parameters: fr(I) - pointer to rule to find hash table for */
+/* */
+/* Looks for group hash table fr_arg and stores a pointer to it in fr_ptr. */
+/* fr_ptr is later used by fr_srcgrpmap and fr_dstgrpmap. */
+/* ------------------------------------------------------------------------ */
+static int fr_grpmapinit(fr)
+frentry_t *fr;
+{
+ char name[FR_GROUPLEN];
+ iphtable_t *iph;
+
+#if defined(SNPRINTF) && defined(_KERNEL)
+ (void) SNPRINTF(name, sizeof(name), "%d", fr->fr_arg);
+#else
+ (void) sprintf(name, "%d", fr->fr_arg);
+#endif
+ iph = fr_findhtable(IPL_LOGIPF, name);
+ if (iph == NULL)
+ return ESRCH;
+ if ((iph->iph_flags & FR_INOUT) != (fr->fr_flags & FR_INOUT))
+ return ESRCH;
+ fr->fr_ptr = iph;
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_srcgrpmap */
+/* Returns: frentry_t * - pointer to "new last matching" rule or NULL */
+/* Parameters: fin(I) - pointer to packet information */
+/* passp(IO) - pointer to current/new filter decision (unused) */
+/* */
+/* Look for a rule group head in a hash table, using the source address as */
+/* the key, and descend into that group and continue matching rules against */
+/* the packet. */
+/* ------------------------------------------------------------------------ */
+frentry_t *fr_srcgrpmap(fin, passp)
+fr_info_t *fin;
+u_32_t *passp;
+{
+ frgroup_t *fg;
+ void *rval;
+
+ rval = fr_iphmfindgroup(fin->fin_fr->fr_ptr, fin->fin_v, &fin->fin_src);
+ if (rval == NULL)
+ return NULL;
+
+ fg = rval;
+ fin->fin_fr = fg->fg_start;
+ (void) fr_scanlist(fin, *passp);
+ return fin->fin_fr;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_dstgrpmap */
+/* Returns: frentry_t * - pointer to "new last matching" rule or NULL */
+/* Parameters: fin(I) - pointer to packet information */
+/* passp(IO) - pointer to current/new filter decision (unused) */
+/* */
+/* Look for a rule group head in a hash table, using the destination */
+/* address as the key, and descend into that group and continue matching */
+/* rules against the packet. */
+/* ------------------------------------------------------------------------ */
+frentry_t *fr_dstgrpmap(fin, passp)
+fr_info_t *fin;
+u_32_t *passp;
+{
+ frgroup_t *fg;
+ void *rval;
+
+ rval = fr_iphmfindgroup(fin->fin_fr->fr_ptr, fin->fin_v, &fin->fin_dst);
+ if (rval == NULL)
+ return NULL;
+
+ fg = rval;
+ fin->fin_fr = fg->fg_start;
+ (void) fr_scanlist(fin, *passp);
+ return fin->fin_fr;
+}
+#endif /* IPFILTER_LOOKUP */
+
+/*
+ * Queue functions
+ * ===============
+ * These functions manage objects on queues for efficient timeouts. There are
+ * a number of system defined queues as well as user defined timeouts. It is
+ * expected that a lock is held in the domain in which the queue belongs
+ * (i.e. either state or NAT) when calling any of these functions that prevents
+ * fr_freetimeoutqueue() from being called at the same time as any other.
+ */
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_addtimeoutqueue */
+/* Returns: struct ifqtq * - NULL if malloc fails, else pointer to */
+/* timeout queue with given interval. */
+/* Parameters: parent(I) - pointer to pointer to parent node of this list */
+/* of interface queues. */
+/* seconds(I) - timeout value in seconds for this queue. */
+/* */
+/* This routine first looks for a timeout queue that matches the interval */
+/* being requested. If it finds one, increments the reference counter and */
+/* returns a pointer to it. If none are found, it allocates a new one and */
+/* inserts it at the top of the list. */
+/* */
+/* Locking. */
+/* It is assumed that the caller of this function has an appropriate lock */
+/* held (exclusively) in the domain that encompases 'parent'. */
+/* ------------------------------------------------------------------------ */
+ipftq_t *fr_addtimeoutqueue(parent, seconds)
+ipftq_t **parent;
+u_int seconds;
+{
+ ipftq_t *ifq;
+ u_int period;
+
+ period = seconds * IPF_HZ_DIVIDE;
+
+ MUTEX_ENTER(&ipf_timeoutlock);
+ for (ifq = *parent; ifq != NULL; ifq = ifq->ifq_next) {
+ if (ifq->ifq_ttl == period) {
+ /*
+ * Reset the delete flag, if set, so the structure
+ * gets reused rather than freed and reallocated.
+ */
+ MUTEX_ENTER(&ifq->ifq_lock);
+ ifq->ifq_flags &= ~IFQF_DELETE;
+ ifq->ifq_ref++;
+ MUTEX_EXIT(&ifq->ifq_lock);
+ MUTEX_EXIT(&ipf_timeoutlock);
+
+ return ifq;
+ }
+ }
+
+ KMALLOC(ifq, ipftq_t *);
+ if (ifq != NULL) {
+ ifq->ifq_ttl = period;
+ ifq->ifq_head = NULL;
+ ifq->ifq_tail = &ifq->ifq_head;
+ ifq->ifq_next = *parent;
+ ifq->ifq_pnext = parent;
+ ifq->ifq_ref = 1;
+ ifq->ifq_flags = IFQF_USER;
+ *parent = ifq;
+ fr_userifqs++;
+ MUTEX_NUKE(&ifq->ifq_lock);
+ MUTEX_INIT(&ifq->ifq_lock, "ipftq mutex");
+ }
+ MUTEX_EXIT(&ipf_timeoutlock);
+ return ifq;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_deletetimeoutqueue */
+/* Returns: int - new reference count value of the timeout queue */
+/* Parameters: ifq(I) - timeout queue which is losing a reference. */
+/* Locks: ifq->ifq_lock */
+/* */
+/* This routine must be called when we're discarding a pointer to a timeout */
+/* queue object, taking care of the reference counter. */
+/* */
+/* Now that this just sets a DELETE flag, it requires the expire code to */
+/* check the list of user defined timeout queues and call the free function */
+/* below (currently commented out) to stop memory leaking. It is done this */
+/* way because the locking may not be sufficient to safely do a free when */
+/* this function is called. */
+/* ------------------------------------------------------------------------ */
+int fr_deletetimeoutqueue(ifq)
+ipftq_t *ifq;
+{
+
+ ifq->ifq_ref--;
+ if ((ifq->ifq_ref == 0) && ((ifq->ifq_flags & IFQF_USER) != 0)) {
+ ifq->ifq_flags |= IFQF_DELETE;
+ }
+
+ return ifq->ifq_ref;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_freetimeoutqueue */
+/* Parameters: ifq(I) - timeout queue which is losing a reference. */
+/* Returns: Nil */
+/* */
+/* Locking: */
+/* It is assumed that the caller of this function has an appropriate lock */
+/* held (exclusively) in the domain that encompases the callers "domain". */
+/* The ifq_lock for this structure should not be held. */
+/* */
+/* Remove a user definde timeout queue from the list of queues it is in and */
+/* tidy up after this is done. */
+/* ------------------------------------------------------------------------ */
+void fr_freetimeoutqueue(ifq)
+ipftq_t *ifq;
+{
+
+
+ if (((ifq->ifq_flags & IFQF_DELETE) == 0) || (ifq->ifq_ref != 0) ||
+ ((ifq->ifq_flags & IFQF_USER) == 0)) {
+ printf("fr_freetimeoutqueue(%lx) flags 0x%x ttl %d ref %d\n",
+ (u_long)ifq, ifq->ifq_flags, ifq->ifq_ttl,
+ ifq->ifq_ref);
+ return;
+ }
+
+ /*
+ * Remove from its position in the list.
+ */
+ *ifq->ifq_pnext = ifq->ifq_next;
+ if (ifq->ifq_next != NULL)
+ ifq->ifq_next->ifq_pnext = ifq->ifq_pnext;
+
+ MUTEX_DESTROY(&ifq->ifq_lock);
+ fr_userifqs--;
+ KFREE(ifq);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_deletequeueentry */
+/* Returns: Nil */
+/* Parameters: tqe(I) - timeout queue entry to delete */
+/* ifq(I) - timeout queue to remove entry from */
+/* */
+/* Remove a tail queue entry from its queue and make it an orphan. */
+/* fr_deletetimeoutqueue is called to make sure the reference count on the */
+/* queue is correct. We can't, however, call fr_freetimeoutqueue because */
+/* the correct lock(s) may not be held that would make it safe to do so. */
+/* ------------------------------------------------------------------------ */
+void fr_deletequeueentry(tqe)
+ipftqent_t *tqe;
+{
+ ipftq_t *ifq;
+
+ ifq = tqe->tqe_ifq;
+ if (ifq == NULL)
+ return;
+
+ MUTEX_ENTER(&ifq->ifq_lock);
+
+ if (tqe->tqe_pnext != NULL) {
+ *tqe->tqe_pnext = tqe->tqe_next;
+ if (tqe->tqe_next != NULL)
+ tqe->tqe_next->tqe_pnext = tqe->tqe_pnext;
+ else /* we must be the tail anyway */
+ ifq->ifq_tail = tqe->tqe_pnext;
+
+ tqe->tqe_pnext = NULL;
+ tqe->tqe_ifq = NULL;
+ }
+
+ (void) fr_deletetimeoutqueue(ifq);
+
+ MUTEX_EXIT(&ifq->ifq_lock);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_queuefront */
+/* Returns: Nil */
+/* Parameters: tqe(I) - pointer to timeout queue entry */
+/* */
+/* Move a queue entry to the front of the queue, if it isn't already there. */
+/* ------------------------------------------------------------------------ */
+void fr_queuefront(tqe)
+ipftqent_t *tqe;
+{
+ ipftq_t *ifq;
+
+ ifq = tqe->tqe_ifq;
+ if (ifq == NULL)
+ return;
+
+ MUTEX_ENTER(&ifq->ifq_lock);
+ if (ifq->ifq_head != tqe) {
+ *tqe->tqe_pnext = tqe->tqe_next;
+ if (tqe->tqe_next)
+ tqe->tqe_next->tqe_pnext = tqe->tqe_pnext;
+ else
+ ifq->ifq_tail = tqe->tqe_pnext;
+
+ tqe->tqe_next = ifq->ifq_head;
+ ifq->ifq_head->tqe_pnext = &tqe->tqe_next;
+ ifq->ifq_head = tqe;
+ tqe->tqe_pnext = &ifq->ifq_head;
+ }
+ MUTEX_EXIT(&ifq->ifq_lock);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_queueback */
+/* Returns: Nil */
+/* Parameters: tqe(I) - pointer to timeout queue entry */
+/* */
+/* Move a queue entry to the back of the queue, if it isn't already there. */
+/* ------------------------------------------------------------------------ */
+void fr_queueback(tqe)
+ipftqent_t *tqe;
+{
+ ipftq_t *ifq;
+
+ ifq = tqe->tqe_ifq;
+ if (ifq == NULL)
+ return;
+ tqe->tqe_die = fr_ticks + ifq->ifq_ttl;
+
+ MUTEX_ENTER(&ifq->ifq_lock);
+ if (tqe->tqe_next == NULL) { /* at the end already ? */
+ MUTEX_EXIT(&ifq->ifq_lock);
+ return;
+ }
+
+ /*
+ * Remove from list
+ */
+ *tqe->tqe_pnext = tqe->tqe_next;
+ tqe->tqe_next->tqe_pnext = tqe->tqe_pnext;
+
+ /*
+ * Make it the last entry.
+ */
+ tqe->tqe_next = NULL;
+ tqe->tqe_pnext = ifq->ifq_tail;
+ *ifq->ifq_tail = tqe;
+ ifq->ifq_tail = &tqe->tqe_next;
+ MUTEX_EXIT(&ifq->ifq_lock);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_queueappend */
+/* Returns: Nil */
+/* Parameters: tqe(I) - pointer to timeout queue entry */
+/* ifq(I) - pointer to timeout queue */
+/* parent(I) - owing object pointer */
+/* */
+/* Add a new item to this queue and put it on the very end. */
+/* ------------------------------------------------------------------------ */
+void fr_queueappend(tqe, ifq, parent)
+ipftqent_t *tqe;
+ipftq_t *ifq;
+void *parent;
+{
+
+ MUTEX_ENTER(&ifq->ifq_lock);
+ tqe->tqe_parent = parent;
+ tqe->tqe_pnext = ifq->ifq_tail;
+ *ifq->ifq_tail = tqe;
+ ifq->ifq_tail = &tqe->tqe_next;
+ tqe->tqe_next = NULL;
+ tqe->tqe_ifq = ifq;
+ tqe->tqe_die = fr_ticks + ifq->ifq_ttl;
+ ifq->ifq_ref++;
+ MUTEX_EXIT(&ifq->ifq_lock);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_movequeue */
+/* Returns: Nil */
+/* Parameters: tq(I) - pointer to timeout queue information */
+/* oifp(I) - old timeout queue entry was on */
+/* nifp(I) - new timeout queue to put entry on */
+/* */
+/* Move a queue entry from one timeout queue to another timeout queue. */
+/* If it notices that the current entry is already last and does not need */
+/* to move queue, the return. */
+/* ------------------------------------------------------------------------ */
+void fr_movequeue(tqe, oifq, nifq)
+ipftqent_t *tqe;
+ipftq_t *oifq, *nifq;
+{
+ /*
+ * Is the operation here going to be a no-op ?
+ */
+ MUTEX_ENTER(&oifq->ifq_lock);
+ if (oifq == nifq && *oifq->ifq_tail == tqe) {
+ MUTEX_EXIT(&oifq->ifq_lock);
+ return;
+ }
+
+ /*
+ * Remove from the old queue
+ */
+ *tqe->tqe_pnext = tqe->tqe_next;
+ if (tqe->tqe_next)
+ tqe->tqe_next->tqe_pnext = tqe->tqe_pnext;
+ else
+ oifq->ifq_tail = tqe->tqe_pnext;
+ tqe->tqe_next = NULL;
+
+ /*
+ * If we're moving from one queue to another, release the lock on the
+ * old queue and get a lock on the new queue. For user defined queues,
+ * if we're moving off it, call delete in case it can now be freed.
+ */
+ if (oifq != nifq) {
+ tqe->tqe_ifq = NULL;
+
+ (void) fr_deletetimeoutqueue(oifq);
+
+ MUTEX_EXIT(&oifq->ifq_lock);
+
+ MUTEX_ENTER(&nifq->ifq_lock);
+
+ tqe->tqe_ifq = nifq;
+ nifq->ifq_ref++;
+ }
+
+ /*
+ * Add to the bottom of the new queue
+ */
+ tqe->tqe_die = fr_ticks + nifq->ifq_ttl;
+ tqe->tqe_pnext = nifq->ifq_tail;
+ *nifq->ifq_tail = tqe;
+ nifq->ifq_tail = &tqe->tqe_next;
+ MUTEX_EXIT(&nifq->ifq_lock);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_updateipid */
+/* Returns: int - 0 == success, -1 == error (packet should be droppped) */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* When we are doing NAT, change the IP of every packet to represent a */
+/* single sequence of packets coming from the host, hiding any host */
+/* specific sequencing that might otherwise be revealed. If the packet is */
+/* a fragment, then store the 'new' IPid in the fragment cache and look up */
+/* the fragment cache for non-leading fragments. If a non-leading fragment */
+/* has no match in the cache, return an error. */
+/* ------------------------------------------------------------------------ */
+static INLINE int fr_updateipid(fin)
+fr_info_t *fin;
+{
+ u_short id, ido, sums;
+ u_32_t sumd, sum;
+ ip_t *ip;
+
+ if (fin->fin_off != 0) {
+ sum = fr_ipid_knownfrag(fin);
+ if (sum == 0xffffffff)
+ return -1;
+ sum &= 0xffff;
+ id = (u_short)sum;
+ } else {
+ id = fr_nextipid(fin);
+ if (fin->fin_off == 0 && (fin->fin_flx & FI_FRAG) != 0)
+ (void) fr_ipid_newfrag(fin, (u_32_t)id);
+ }
+
+ ip = fin->fin_ip;
+ ido = ntohs(ip->ip_id);
+ if (id == ido)
+ return 0;
+ ip->ip_id = htons(id);
+ CALC_SUMD(ido, id, sumd); /* DESTRUCTIVE MACRO! id,ido change */
+ sum = (~ntohs(ip->ip_sum)) & 0xffff;
+ sum += sumd;
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum = (sum >> 16) + (sum & 0xffff);
+ sums = ~(u_short)sum;
+ ip->ip_sum = htons(sums);
+ return 0;
+}
+
+
+#ifdef NEED_FRGETIFNAME
+/* ------------------------------------------------------------------------ */
+/* Function: fr_getifname */
+/* Returns: char * - pointer to interface name */
+/* Parameters: ifp(I) - pointer to network interface */
+/* buffer(O) - pointer to where to store interface name */
+/* */
+/* Constructs an interface name in the buffer passed. The buffer passed is */
+/* expected to be at least LIFNAMSIZ in bytes big. If buffer is passed in */
+/* as a NULL pointer then return a pointer to a static array. */
+/* ------------------------------------------------------------------------ */
+char *fr_getifname(ifp, buffer)
+struct ifnet *ifp;
+char *buffer;
+{
+ static char namebuf[LIFNAMSIZ];
+# if defined(MENTAT) || defined(__FreeBSD__) || defined(__osf__) || \
+ defined(__sgi) || defined(linux) || defined(_AIX51) || \
+ (defined(sun) && !defined(__SVR4) && !defined(__svr4__))
+ int unit, space;
+ char temp[20];
+ char *s;
+# endif
+
+ if (buffer == NULL)
+ buffer = namebuf;
+ (void) strncpy(buffer, ifp->if_name, LIFNAMSIZ);
+ buffer[LIFNAMSIZ - 1] = '\0';
+# if defined(MENTAT) || defined(__FreeBSD__) || defined(__osf__) || \
+ defined(__sgi) || defined(_AIX51) || \
+ (defined(sun) && !defined(__SVR4) && !defined(__svr4__))
+ for (s = buffer; *s; s++)
+ ;
+ unit = ifp->if_unit;
+ space = LIFNAMSIZ - (s - buffer);
+ if (space > 0) {
+# if defined(SNPRINTF) && defined(_KERNEL)
+ (void) SNPRINTF(temp, sizeof(temp), "%d", unit);
+# else
+ (void) sprintf(temp, "%d", unit);
+# endif
+ (void) strncpy(s, temp, space);
+ }
+# endif
+ return buffer;
+}
+#endif
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_ioctlswitch */
+/* Returns: int - -1 continue processing, else ioctl return value */
+/* Parameters: unit(I) - device unit opened */
+/* data(I) - pointer to ioctl data */
+/* cmd(I) - ioctl command */
+/* mode(I) - mode value */
+/* */
+/* Based on the value of unit, call the appropriate ioctl handler or return */
+/* EIO if ipfilter is not running. Also checks if write perms are req'd */
+/* for the device in order to execute the ioctl. */
+/* ------------------------------------------------------------------------ */
+int fr_ioctlswitch(unit, data, cmd, mode)
+int unit, mode;
+ioctlcmd_t cmd;
+void *data;
+{
+ int error = 0;
+
+ switch (unit)
+ {
+ case IPL_LOGIPF :
+ error = -1;
+ break;
+ case IPL_LOGNAT :
+ if (fr_running > 0)
+ error = fr_nat_ioctl(data, cmd, mode);
+ else
+ error = EIO;
+ break;
+ case IPL_LOGSTATE :
+ if (fr_running > 0)
+ error = fr_state_ioctl(data, cmd, mode);
+ else
+ error = EIO;
+ break;
+ case IPL_LOGAUTH :
+ if (fr_running > 0) {
+ if ((cmd == (ioctlcmd_t)SIOCADAFR) ||
+ (cmd == (ioctlcmd_t)SIOCRMAFR)) {
+ if (!(mode & FWRITE)) {
+ error = EPERM;
+ } else {
+ error = frrequest(unit, cmd, data,
+ fr_active, 1);
+ }
+ } else {
+ error = fr_auth_ioctl(data, cmd, mode);
+ }
+ } else
+ error = EIO;
+ break;
+ case IPL_LOGSYNC :
+#ifdef IPFILTER_SYNC
+ if (fr_running > 0)
+ error = fr_sync_ioctl(data, cmd, mode);
+ else
+#endif
+ error = EIO;
+ break;
+ case IPL_LOGSCAN :
+#ifdef IPFILTER_SCAN
+ if (fr_running > 0)
+ error = fr_scan_ioctl(data, cmd, mode);
+ else
+#endif
+ error = EIO;
+ break;
+ case IPL_LOGLOOKUP :
+#ifdef IPFILTER_LOOKUP
+ if (fr_running > 0)
+ error = ip_lookup_ioctl(data, cmd, mode);
+ else
+#endif
+ error = EIO;
+ break;
+ default :
+ error = EIO;
+ break;
+ }
+
+ return error;
+}
+
+
+/*
+ * This array defines the expected size of objects coming into the kernel
+ * for the various recognised object types.
+ */
+#define NUM_OBJ_TYPES 14
+
+static int fr_objbytes[NUM_OBJ_TYPES][2] = {
+ { 1, sizeof(struct frentry) }, /* frentry */
+ { 0, sizeof(struct friostat) },
+ { 0, sizeof(struct fr_info) },
+ { 0, sizeof(struct fr_authstat) },
+ { 0, sizeof(struct ipfrstat) },
+ { 0, sizeof(struct ipnat) },
+ { 0, sizeof(struct natstat) },
+ { 0, sizeof(struct ipstate_save) },
+ { 1, sizeof(struct nat_save) }, /* nat_save */
+ { 0, sizeof(struct natlookup) },
+ { 1, sizeof(struct ipstate) }, /* ipstate */
+ { 0, sizeof(struct ips_stat) },
+ { 0, sizeof(struct frauth) },
+ { 0, sizeof(struct ipftune) }
+};
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_inobj */
+/* Returns: int - 0 = success, else failure */
+/* Parameters: data(I) - pointer to ioctl data */
+/* ptr(I) - pointer to store real data in */
+/* type(I) - type of structure being moved */
+/* */
+/* Copy in the contents of what the ipfobj_t points to. In future, we */
+/* add things to check for version numbers, sizes, etc, to make it backward */
+/* compatible at the ABI for user land. */
+/* ------------------------------------------------------------------------ */
+int fr_inobj(data, ptr, type)
+void *data;
+void *ptr;
+int type;
+{
+ ipfobj_t obj;
+ int error = 0;
+
+ if ((type < 0) || (type > NUM_OBJ_TYPES-1))
+ return EINVAL;
+
+ BCOPYIN((caddr_t)data, (caddr_t)&obj, sizeof(obj));
+
+ if (obj.ipfo_type != type)
+ return EINVAL;
+
+#ifndef IPFILTER_COMPAT
+ if ((fr_objbytes[type][0] & 1) != 0) {
+ if (obj.ipfo_size < fr_objbytes[type][1])
+ return EINVAL;
+ } else if (obj.ipfo_size != fr_objbytes[type][1])
+ return EINVAL;
+#else
+ if (obj.ipfo_rev != IPFILTER_VERSION)
+ /* XXX compatibility hook here */
+ ;
+ if ((fr_objbytes[type][0] & 1) != 0) {
+ if (obj.ipfo_size < fr_objbytes[type][1])
+ /* XXX compatibility hook here */
+ return EINVAL;
+ } else if (obj.ipfo_size != fr_objbytes[type][1])
+ /* XXX compatibility hook here */
+ return EINVAL;
+#endif
+
+ if ((fr_objbytes[type][0] & 1) != 0) {
+ error = COPYIN((caddr_t)obj.ipfo_ptr, (caddr_t)ptr,
+ fr_objbytes[type][1]);
+ } else {
+ error = COPYIN((caddr_t)obj.ipfo_ptr, (caddr_t)ptr,
+ obj.ipfo_size);
+ }
+ return error;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_inobjsz */
+/* Returns: int - 0 = success, else failure */
+/* Parameters: data(I) - pointer to ioctl data */
+/* ptr(I) - pointer to store real data in */
+/* type(I) - type of structure being moved */
+/* sz(I) - size of data to copy */
+/* */
+/* As per fr_inobj, except the size of the object to copy in is passed in */
+/* but it must not be smaller than the size defined for the type and the */
+/* type must allow for varied sized objects. The extra requirement here is */
+/* that sz must match the size of the object being passed in - this is not */
+/* not possible nor required in fr_inobj(). */
+/* ------------------------------------------------------------------------ */
+int fr_inobjsz(data, ptr, type, sz)
+void *data;
+void *ptr;
+int type, sz;
+{
+ ipfobj_t obj;
+ int error;
+
+ if ((type < 0) || (type > NUM_OBJ_TYPES-1))
+ return EINVAL;
+ if (((fr_objbytes[type][0] & 1) == 0) || (sz < fr_objbytes[type][1]))
+ return EINVAL;
+
+ BCOPYIN((caddr_t)data, (caddr_t)&obj, sizeof(obj));
+
+ if (obj.ipfo_type != type)
+ return EINVAL;
+
+#ifndef IPFILTER_COMPAT
+ if (obj.ipfo_size != sz)
+ return EINVAL;
+#else
+ if (obj.ipfo_rev != IPFILTER_VERSION)
+ /* XXX compatibility hook here */
+ ;
+ if (obj.ipfo_size != sz)
+ /* XXX compatibility hook here */
+ return EINVAL;
+#endif
+
+ error = COPYIN((caddr_t)obj.ipfo_ptr, (caddr_t)ptr, sz);
+ return error;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_outobjsz */
+/* Returns: int - 0 = success, else failure */
+/* Parameters: data(I) - pointer to ioctl data */
+/* ptr(I) - pointer to store real data in */
+/* type(I) - type of structure being moved */
+/* sz(I) - size of data to copy */
+/* */
+/* As per fr_outobj, except the size of the object to copy out is passed in */
+/* but it must not be smaller than the size defined for the type and the */
+/* type must allow for varied sized objects. The extra requirement here is */
+/* that sz must match the size of the object being passed in - this is not */
+/* not possible nor required in fr_outobj(). */
+/* ------------------------------------------------------------------------ */
+int fr_outobjsz(data, ptr, type, sz)
+void *data;
+void *ptr;
+int type, sz;
+{
+ ipfobj_t obj;
+ int error;
+
+ if ((type < 0) || (type > NUM_OBJ_TYPES-1) ||
+ ((fr_objbytes[type][0] & 1) == 0) ||
+ (sz < fr_objbytes[type][1]))
+ return EINVAL;
+
+ BCOPYIN((caddr_t)data, (caddr_t)&obj, sizeof(obj));
+
+ if (obj.ipfo_type != type)
+ return EINVAL;
+
+#ifndef IPFILTER_COMPAT
+ if (obj.ipfo_size != sz)
+ return EINVAL;
+#else
+ if (obj.ipfo_rev != IPFILTER_VERSION)
+ /* XXX compatibility hook here */
+ ;
+ if (obj.ipfo_size != sz)
+ /* XXX compatibility hook here */
+ return EINVAL;
+#endif
+
+ error = COPYOUT((caddr_t)ptr, (caddr_t)obj.ipfo_ptr, sz);
+ return error;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_outobj */
+/* Returns: int - 0 = success, else failure */
+/* Parameters: data(I) - pointer to ioctl data */
+/* ptr(I) - pointer to store real data in */
+/* type(I) - type of structure being moved */
+/* */
+/* Copy out the contents of what ptr is to where ipfobj points to. In */
+/* future, we add things to check for version numbers, sizes, etc, to make */
+/* it backward compatible at the ABI for user land. */
+/* ------------------------------------------------------------------------ */
+int fr_outobj(data, ptr, type)
+void *data;
+void *ptr;
+int type;
+{
+ ipfobj_t obj;
+ int error;
+
+ if ((type < 0) || (type > NUM_OBJ_TYPES-1))
+ return EINVAL;
+
+ BCOPYIN((caddr_t)data, (caddr_t)&obj, sizeof(obj));
+
+ if (obj.ipfo_type != type)
+ return EINVAL;
+
+#ifndef IPFILTER_COMPAT
+ if ((fr_objbytes[type][0] & 1) != 0) {
+ if (obj.ipfo_size < fr_objbytes[type][1])
+ return EINVAL;
+ } else if (obj.ipfo_size != fr_objbytes[type][1])
+ return EINVAL;
+#else
+ if (obj.ipfo_rev != IPFILTER_VERSION)
+ /* XXX compatibility hook here */
+ ;
+ if ((fr_objbytes[type][0] & 1) != 0) {
+ if (obj.ipfo_size < fr_objbytes[type][1])
+ /* XXX compatibility hook here */
+ return EINVAL;
+ } else if (obj.ipfo_size != fr_objbytes[type][1])
+ /* XXX compatibility hook here */
+ return EINVAL;
+#endif
+
+ error = COPYOUT((caddr_t)ptr, (caddr_t)obj.ipfo_ptr, obj.ipfo_size);
+ return error;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_checkl4sum */
+/* Returns: int - 0 = good, -1 = bad, 1 = cannot check */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* If possible, calculate the layer 4 checksum for the packet. If this is */
+/* not possible, return without indicating a failure or success but in a */
+/* way that is ditinguishable. */
+/* ------------------------------------------------------------------------ */
+int fr_checkl4sum(fin)
+fr_info_t *fin;
+{
+ u_short sum, hdrsum, *csump;
+ udphdr_t *udp;
+ int dosum;
+
+ if ((fin->fin_flx & FI_NOCKSUM) != 0)
+ return 0;
+
+ /*
+ * If the TCP packet isn't a fragment, isn't too short and otherwise
+ * isn't already considered "bad", then validate the checksum. If
+ * this check fails then considered the packet to be "bad".
+ */
+ if ((fin->fin_flx & (FI_FRAG|FI_SHORT|FI_BAD)) != 0)
+ return 1;
+
+ csump = NULL;
+ hdrsum = 0;
+ dosum = 0;
+ sum = 0;
+
+#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_VALID)
+ if (dohwcksum && ((*fin->fin_mp)->b_ick_flag == ICK_VALID)) {
+ hdrsum = 0;
+ sum = 0;
+ } else {
+#endif
+ switch (fin->fin_p)
+ {
+ case IPPROTO_TCP :
+ csump = &((tcphdr_t *)fin->fin_dp)->th_sum;
+ dosum = 1;
+ break;
+
+ case IPPROTO_UDP :
+ udp = fin->fin_dp;
+ if (udp->uh_sum != 0) {
+ csump = &udp->uh_sum;
+ dosum = 1;
+ }
+ break;
+
+ case IPPROTO_ICMP :
+ csump = &((struct icmp *)fin->fin_dp)->icmp_cksum;
+ dosum = 1;
+ break;
+
+ default :
+ return 1;
+ /*NOTREACHED*/
+ }
+
+ if (csump != NULL)
+ hdrsum = *csump;
+
+ if (dosum)
+ sum = fr_cksum(fin->fin_m, fin->fin_ip,
+ fin->fin_p, fin->fin_dp);
+#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_VALID)
+ }
+#endif
+#if !defined(_KERNEL)
+ if (sum == hdrsum) {
+ FR_DEBUG(("checkl4sum: %hx == %hx\n", sum, hdrsum));
+ } else {
+ FR_DEBUG(("checkl4sum: %hx != %hx\n", sum, hdrsum));
+ }
+#endif
+ if (hdrsum == sum)
+ return 0;
+ return -1;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_ifpfillv4addr */
+/* Returns: int - 0 = address update, -1 = address not updated */
+/* Parameters: atype(I) - type of network address update to perform */
+/* sin(I) - pointer to source of address information */
+/* mask(I) - pointer to source of netmask information */
+/* inp(I) - pointer to destination address store */
+/* inpmask(I) - pointer to destination netmask store */
+/* */
+/* Given a type of network address update (atype) to perform, copy */
+/* information from sin/mask into inp/inpmask. If ipnmask is NULL then no */
+/* netmask update is performed unless FRI_NETMASKED is passed as atype, in */
+/* which case the operation fails. For all values of atype other than */
+/* FRI_NETMASKED, if inpmask is non-NULL then the mask is set to an all 1s */
+/* value. */
+/* ------------------------------------------------------------------------ */
+int fr_ifpfillv4addr(atype, sin, mask, inp, inpmask)
+int atype;
+struct sockaddr_in *sin, *mask;
+struct in_addr *inp, *inpmask;
+{
+ if (inpmask != NULL && atype != FRI_NETMASKED)
+ inpmask->s_addr = 0xffffffff;
+
+ if (atype == FRI_NETWORK || atype == FRI_NETMASKED) {
+ if (atype == FRI_NETMASKED) {
+ if (inpmask == NULL)
+ return -1;
+ inpmask->s_addr = mask->sin_addr.s_addr;
+ }
+ inp->s_addr = sin->sin_addr.s_addr & mask->sin_addr.s_addr;
+ } else {
+ inp->s_addr = sin->sin_addr.s_addr;
+ }
+ return 0;
+}
+
+
+#ifdef USE_INET6
+/* ------------------------------------------------------------------------ */
+/* Function: fr_ifpfillv6addr */
+/* Returns: int - 0 = address update, -1 = address not updated */
+/* Parameters: atype(I) - type of network address update to perform */
+/* sin(I) - pointer to source of address information */
+/* mask(I) - pointer to source of netmask information */
+/* inp(I) - pointer to destination address store */
+/* inpmask(I) - pointer to destination netmask store */
+/* */
+/* Given a type of network address update (atype) to perform, copy */
+/* information from sin/mask into inp/inpmask. If ipnmask is NULL then no */
+/* netmask update is performed unless FRI_NETMASKED is passed as atype, in */
+/* which case the operation fails. For all values of atype other than */
+/* FRI_NETMASKED, if inpmask is non-NULL then the mask is set to an all 1s */
+/* value. */
+/* ------------------------------------------------------------------------ */
+int fr_ifpfillv6addr(atype, sin, mask, inp, inpmask)
+int atype;
+struct sockaddr_in6 *sin, *mask;
+struct in_addr *inp, *inpmask;
+{
+ i6addr_t *src, *dst, *and, *dmask;
+
+ src = (i6addr_t *)&sin->sin6_addr;
+ and = (i6addr_t *)&mask->sin6_addr;
+ dst = (i6addr_t *)inp;
+ dmask = (i6addr_t *)inpmask;
+
+ if (inpmask != NULL && atype != FRI_NETMASKED) {
+ dmask->i6[0] = 0xffffffff;
+ dmask->i6[1] = 0xffffffff;
+ dmask->i6[2] = 0xffffffff;
+ dmask->i6[3] = 0xffffffff;
+ }
+
+ if (atype == FRI_NETWORK || atype == FRI_NETMASKED) {
+ if (atype == FRI_NETMASKED) {
+ if (inpmask == NULL)
+ return -1;
+ dmask->i6[0] = and->i6[0];
+ dmask->i6[1] = and->i6[1];
+ dmask->i6[2] = and->i6[2];
+ dmask->i6[3] = and->i6[3];
+ }
+
+ dst->i6[0] = src->i6[0] & and->i6[0];
+ dst->i6[1] = src->i6[1] & and->i6[1];
+ dst->i6[2] = src->i6[2] & and->i6[2];
+ dst->i6[3] = src->i6[3] & and->i6[3];
+ } else {
+ dst->i6[0] = src->i6[0];
+ dst->i6[1] = src->i6[1];
+ dst->i6[2] = src->i6[2];
+ dst->i6[3] = src->i6[3];
+ }
+ return 0;
+}
+#endif
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_matchtag */
+/* Returns: 0 == mismatch, 1 == match. */
+/* Parameters: tag1(I) - pointer to first tag to compare */
+/* tag2(I) - pointer to second tag to compare */
+/* */
+/* Returns true (non-zero) or false(0) if the two tag structures can be */
+/* considered to be a match or not match, respectively. The tag is 16 */
+/* bytes long (16 characters) but that is overlayed with 4 32bit ints so */
+/* compare the ints instead, for speed. tag1 is the master of the */
+/* comparison. This function should only be called with both tag1 and tag2 */
+/* as non-NULL pointers. */
+/* ------------------------------------------------------------------------ */
+int fr_matchtag(tag1, tag2)
+ipftag_t *tag1, *tag2;
+{
+ if (tag1 == tag2)
+ return 1;
+
+ if ((tag1->ipt_num[0] == 0) && (tag2->ipt_num[0] == 0))
+ return 1;
+
+ if ((tag1->ipt_num[0] == tag2->ipt_num[0]) &&
+ (tag1->ipt_num[1] == tag2->ipt_num[1]) &&
+ (tag1->ipt_num[2] == tag2->ipt_num[2]) &&
+ (tag1->ipt_num[3] == tag2->ipt_num[3]))
+ return 1;
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_coalesce */
+/* Returns: 1 == success, -1 == failure, 0 == no change */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* Attempt to get all of the packet data into a single, contiguous buffer. */
+/* If this call returns a failure then the buffers have also been freed. */
+/* ------------------------------------------------------------------------ */
+int fr_coalesce(fin)
+fr_info_t *fin;
+{
+ if ((fin->fin_flx & FI_COALESCE) != 0)
+ return 1;
+
+ /*
+ * If the mbuf pointers indicate that there is no mbuf to work with,
+ * return but do not indicate success or failure.
+ */
+ if (fin->fin_m == NULL || fin->fin_mp == NULL)
+ return 0;
+
+#if defined(_KERNEL)
+ if (fr_pullup(fin->fin_m, fin, fin->fin_plen) == NULL) {
+ ATOMIC_INCL(fr_badcoalesces[fin->fin_out]);
+# ifdef MENTAT
+ FREE_MB_T(*fin->fin_mp);
+# endif
+ *fin->fin_mp = NULL;
+ fin->fin_m = NULL;
+ return -1;
+ }
+#else
+ fin = fin; /* LINT */
+#endif
+ return 1;
+}
+
+
+/*
+ * The following table lists all of the tunable variables that can be
+ * accessed via SIOCIPFGET/SIOCIPFSET/SIOCIPFGETNEXt. The format of each row
+ * in the table below is as follows:
+ *
+ * pointer to value, name of value, minimum, maximum, size of the value's
+ * container, value attribute flags
+ *
+ * For convienience, IPFT_RDONLY means the value is read-only, IPFT_WRDISABLED
+ * means the value can only be written to when IPFilter is loaded but disabled.
+ * The obvious implication is if neither of these are set then the value can be
+ * changed at any time without harm.
+ */
+ipftuneable_t ipf_tuneables[] = {
+ /* filtering */
+ { { &fr_flags }, "fr_flags", 0, 0xffffffff,
+ sizeof(fr_flags), 0 },
+ { { &fr_active }, "fr_active", 0, 0,
+ sizeof(fr_active), IPFT_RDONLY },
+ { { &fr_control_forwarding }, "fr_control_forwarding", 0, 1,
+ sizeof(fr_control_forwarding), 0 },
+ { { &fr_update_ipid }, "fr_update_ipid", 0, 1,
+ sizeof(fr_update_ipid), 0 },
+ { { &fr_chksrc }, "fr_chksrc", 0, 1,
+ sizeof(fr_chksrc), 0 },
+ { { &fr_minttl }, "fr_minttl", 0, 1,
+ sizeof(fr_minttl), 0 },
+ { { &fr_icmpminfragmtu }, "fr_icmpminfragmtu", 0, 1,
+ sizeof(fr_icmpminfragmtu), 0 },
+ { { &fr_pass }, "fr_pass", 0, 0xffffffff,
+ sizeof(fr_pass), 0 },
+ /* state */
+ { { &fr_tcpidletimeout }, "fr_tcpidletimeout", 1, 0x7fffffff,
+ sizeof(fr_tcpidletimeout), IPFT_WRDISABLED },
+ { { &fr_tcpclosewait }, "fr_tcpclosewait", 1, 0x7fffffff,
+ sizeof(fr_tcpclosewait), IPFT_WRDISABLED },
+ { { &fr_tcplastack }, "fr_tcplastack", 1, 0x7fffffff,
+ sizeof(fr_tcplastack), IPFT_WRDISABLED },
+ { { &fr_tcptimeout }, "fr_tcptimeout", 1, 0x7fffffff,
+ sizeof(fr_tcptimeout), IPFT_WRDISABLED },
+ { { &fr_tcpclosed }, "fr_tcpclosed", 1, 0x7fffffff,
+ sizeof(fr_tcpclosed), IPFT_WRDISABLED },
+ { { &fr_tcphalfclosed }, "fr_tcphalfclosed", 1, 0x7fffffff,
+ sizeof(fr_tcphalfclosed), IPFT_WRDISABLED },
+ { { &fr_udptimeout }, "fr_udptimeout", 1, 0x7fffffff,
+ sizeof(fr_udptimeout), IPFT_WRDISABLED },
+ { { &fr_udpacktimeout }, "fr_udpacktimeout", 1, 0x7fffffff,
+ sizeof(fr_udpacktimeout), IPFT_WRDISABLED },
+ { { &fr_icmptimeout }, "fr_icmptimeout", 1, 0x7fffffff,
+ sizeof(fr_icmptimeout), IPFT_WRDISABLED },
+ { { &fr_icmpacktimeout }, "fr_icmpacktimeout", 1, 0x7fffffff,
+ sizeof(fr_icmpacktimeout), IPFT_WRDISABLED },
+ { { &fr_iptimeout }, "fr_iptimeout", 1, 0x7fffffff,
+ sizeof(fr_iptimeout), IPFT_WRDISABLED },
+ { { &fr_statemax }, "fr_statemax", 1, 0x7fffffff,
+ sizeof(fr_statemax), 0 },
+ { { &fr_statesize }, "fr_statesize", 1, 0x7fffffff,
+ sizeof(fr_statesize), IPFT_WRDISABLED },
+ { { &fr_state_lock }, "fr_state_lock", 0, 1,
+ sizeof(fr_state_lock), IPFT_RDONLY },
+ { { &fr_state_maxbucket }, "fr_state_maxbucket", 1, 0x7fffffff,
+ sizeof(fr_state_maxbucket), IPFT_WRDISABLED },
+ { { &fr_state_maxbucket_reset }, "fr_state_maxbucket_reset", 0, 1,
+ sizeof(fr_state_maxbucket_reset), IPFT_WRDISABLED },
+ { { &ipstate_logging }, "ipstate_logging", 0, 1,
+ sizeof(ipstate_logging), 0 },
+ /* nat */
+ { { &fr_nat_lock }, "fr_nat_lock", 0, 1,
+ sizeof(fr_nat_lock), IPFT_RDONLY },
+ { { &ipf_nattable_sz }, "ipf_nattable_sz", 1, 0x7fffffff,
+ sizeof(ipf_nattable_sz), IPFT_WRDISABLED },
+ { { &ipf_nattable_max }, "ipf_nattable_max", 1, 0x7fffffff,
+ sizeof(ipf_nattable_max), 0 },
+ { { &ipf_natrules_sz }, "ipf_natrules_sz", 1, 0x7fffffff,
+ sizeof(ipf_natrules_sz), IPFT_WRDISABLED },
+ { { &ipf_rdrrules_sz }, "ipf_rdrrules_sz", 1, 0x7fffffff,
+ sizeof(ipf_rdrrules_sz), IPFT_WRDISABLED },
+ { { &ipf_hostmap_sz }, "ipf_hostmap_sz", 1, 0x7fffffff,
+ sizeof(ipf_hostmap_sz), IPFT_WRDISABLED },
+ { { &fr_nat_maxbucket }, "fr_nat_maxbucket", 1, 0x7fffffff,
+ sizeof(fr_nat_maxbucket), IPFT_WRDISABLED },
+ { { &fr_nat_maxbucket_reset }, "fr_nat_maxbucket_reset", 0, 1,
+ sizeof(fr_nat_maxbucket_reset), IPFT_WRDISABLED },
+ { { &nat_logging }, "nat_logging", 0, 1,
+ sizeof(nat_logging), 0 },
+ { { &fr_defnatage }, "fr_defnatage", 1, 0x7fffffff,
+ sizeof(fr_defnatage), IPFT_WRDISABLED },
+ { { &fr_defnatipage }, "fr_defnatipage", 1, 0x7fffffff,
+ sizeof(fr_defnatipage), IPFT_WRDISABLED },
+ { { &fr_defnaticmpage }, "fr_defnaticmpage", 1, 0x7fffffff,
+ sizeof(fr_defnaticmpage), IPFT_WRDISABLED },
+ /* frag */
+ { { &ipfr_size }, "ipfr_size", 1, 0x7fffffff,
+ sizeof(ipfr_size), IPFT_WRDISABLED },
+ { { &fr_ipfrttl }, "fr_ipfrttl", 1, 0x7fffffff,
+ sizeof(fr_ipfrttl), IPFT_WRDISABLED },
+#ifdef IPFILTER_LOG
+ /* log */
+ { { &ipl_suppress }, "ipl_suppress", 0, 1,
+ sizeof(ipl_suppress), 0 },
+ { { &ipl_buffer_sz }, "ipl_buffer_sz", 0, 0,
+ sizeof(ipl_buffer_sz), IPFT_RDONLY },
+ { { &ipl_logmax }, "ipl_logmax", 0, 0x7fffffff,
+ sizeof(ipl_logmax), IPFT_WRDISABLED },
+ { { &ipl_logall }, "ipl_logall", 0, 1,
+ sizeof(ipl_logall), 0 },
+ { { &ipl_logsize }, "ipl_logsize", 0, 0x80000,
+ sizeof(ipl_logsize), 0 },
+#endif
+ { { NULL }, NULL, 0, 0 }
+};
+
+static ipftuneable_t *ipf_tunelist = NULL;
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_findtunebycookie */
+/* Returns: NULL = search failed, else pointer to tune struct */
+/* Parameters: cookie(I) - cookie value to search for amongst tuneables */
+/* next(O) - pointer to place to store the cookie for the */
+/* "next" tuneable, if it is desired. */
+/* */
+/* This function is used to walk through all of the existing tunables with */
+/* successive calls. It searches the known tunables for the one which has */
+/* a matching value for "cookie" - ie its address. When returning a match, */
+/* the next one to be found may be returned inside next. */
+/* ------------------------------------------------------------------------ */
+static ipftuneable_t *fr_findtunebycookie(cookie, next)
+void *cookie, **next;
+{
+ ipftuneable_t *ta, **tap;
+
+ for (ta = ipf_tuneables; ta->ipft_name != NULL; ta++)
+ if (ta == cookie) {
+ if (next != NULL) {
+ /*
+ * If the next entry in the array has a name
+ * present, then return a pointer to it for
+ * where to go next, else return a pointer to
+ * the dynaminc list as a key to search there
+ * next. This facilitates a weak linking of
+ * the two "lists" together.
+ */
+ if ((ta + 1)->ipft_name != NULL)
+ *next = ta + 1;
+ else
+ *next = &ipf_tunelist;
+ }
+ return ta;
+ }
+
+ for (tap = &ipf_tunelist; (ta = *tap) != NULL; tap = &ta->ipft_next)
+ if (tap == cookie) {
+ if (next != NULL)
+ *next = &ta->ipft_next;
+ return ta;
+ }
+
+ if (next != NULL)
+ *next = NULL;
+ return NULL;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_findtunebyname */
+/* Returns: NULL = search failed, else pointer to tune struct */
+/* Parameters: name(I) - name of the tuneable entry to find. */
+/* */
+/* Search the static array of tuneables and the list of dynamic tuneables */
+/* for an entry with a matching name. If we can find one, return a pointer */
+/* to the matching structure. */
+/* ------------------------------------------------------------------------ */
+static ipftuneable_t *fr_findtunebyname(name)
+const char *name;
+{
+ ipftuneable_t *ta;
+
+ for (ta = ipf_tuneables; ta->ipft_name != NULL; ta++)
+ if (!strcmp(ta->ipft_name, name)) {
+ return ta;
+ }
+
+ for (ta = ipf_tunelist; ta != NULL; ta = ta->ipft_next)
+ if (!strcmp(ta->ipft_name, name)) {
+ return ta;
+ }
+
+ return NULL;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_addipftune */
+/* Returns: int - 0 == success, else failure */
+/* Parameters: newtune - pointer to new tune struct to add to tuneables */
+/* */
+/* Appends the tune structure pointer to by "newtune" to the end of the */
+/* current list of "dynamic" tuneable parameters. Once added, the owner */
+/* of the object is not expected to ever change "ipft_next". */
+/* ------------------------------------------------------------------------ */
+int fr_addipftune(newtune)
+ipftuneable_t *newtune;
+{
+ ipftuneable_t *ta, **tap;
+
+ ta = fr_findtunebyname(newtune->ipft_name);
+ if (ta != NULL)
+ return EEXIST;
+
+ for (tap = &ipf_tunelist; *tap != NULL; tap = &(*tap)->ipft_next)
+ ;
+
+ newtune->ipft_next = NULL;
+ *tap = newtune;
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_delipftune */
+/* Returns: int - 0 == success, else failure */
+/* Parameters: oldtune - pointer to tune struct to remove from the list of */
+/* current dynamic tuneables */
+/* */
+/* Search for the tune structure, by pointer, in the list of those that are */
+/* dynamically added at run time. If found, adjust the list so that this */
+/* structure is no longer part of it. */
+/* ------------------------------------------------------------------------ */
+int fr_delipftune(oldtune)
+ipftuneable_t *oldtune;
+{
+ ipftuneable_t *ta, **tap;
+
+ for (tap = &ipf_tunelist; (ta = *tap) != NULL; tap = &ta->ipft_next)
+ if (ta == oldtune) {
+ *tap = oldtune->ipft_next;
+ oldtune->ipft_next = NULL;
+ return 0;
+ }
+
+ return ESRCH;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_ipftune */
+/* Returns: int - 0 == success, else failure */
+/* Parameters: cmd(I) - ioctl command number */
+/* data(I) - pointer to ioctl data structure */
+/* */
+/* Implement handling of SIOCIPFGETNEXT, SIOCIPFGET and SIOCIPFSET. These */
+/* three ioctls provide the means to access and control global variables */
+/* within IPFilter, allowing (for example) timeouts and table sizes to be */
+/* changed without rebooting, reloading or recompiling. The initialisation */
+/* and 'destruction' routines of the various components of ipfilter are all */
+/* each responsible for handling their own values being too big. */
+/* ------------------------------------------------------------------------ */
+int fr_ipftune(cmd, data)
+ioctlcmd_t cmd;
+void *data;
+{
+ ipftuneable_t *ta;
+ ipftune_t tu;
+ void *cookie;
+ int error;
+
+ error = fr_inobj(data, &tu, IPFOBJ_TUNEABLE);
+ if (error != 0)
+ return error;
+
+ tu.ipft_name[sizeof(tu.ipft_name) - 1] = '\0';
+ cookie = tu.ipft_cookie;
+ ta = NULL;
+
+ switch (cmd)
+ {
+ case SIOCIPFGETNEXT :
+ /*
+ * If cookie is non-NULL, assume it to be a pointer to the last
+ * entry we looked at, so find it (if possible) and return a
+ * pointer to the next one after it. The last entry in the
+ * the table is a NULL entry, so when we get to it, set cookie
+ * to NULL and return that, indicating end of list, erstwhile
+ * if we come in with cookie set to NULL, we are starting anew
+ * at the front of the list.
+ */
+ if (cookie != NULL) {
+ ta = fr_findtunebycookie(cookie, &tu.ipft_cookie);
+ } else {
+ ta = ipf_tuneables;
+ tu.ipft_cookie = ta + 1;
+ }
+ if (ta != NULL) {
+ /*
+ * Entry found, but does the data pointed to by that
+ * row fit in what we can return?
+ */
+ if (ta->ipft_sz > sizeof(tu.ipft_un))
+ return EINVAL;
+
+ tu.ipft_vlong = 0;
+ if (ta->ipft_sz == sizeof(u_long))
+ tu.ipft_vlong = *ta->ipft_plong;
+ else if (ta->ipft_sz == sizeof(u_int))
+ tu.ipft_vint = *ta->ipft_pint;
+ else if (ta->ipft_sz == sizeof(u_short))
+ tu.ipft_vshort = *ta->ipft_pshort;
+ else if (ta->ipft_sz == sizeof(u_char))
+ tu.ipft_vchar = *ta->ipft_pchar;
+
+ tu.ipft_sz = ta->ipft_sz;
+ tu.ipft_min = ta->ipft_min;
+ tu.ipft_max = ta->ipft_max;
+ tu.ipft_flags = ta->ipft_flags;
+ bcopy(ta->ipft_name, tu.ipft_name,
+ MIN(sizeof(tu.ipft_name),
+ strlen(ta->ipft_name) + 1));
+ }
+ error = fr_outobj(data, &tu, IPFOBJ_TUNEABLE);
+ break;
+
+ case SIOCIPFGET :
+ case SIOCIPFSET :
+ /*
+ * Search by name or by cookie value for a particular entry
+ * in the tuning paramter table.
+ */
+ error = ESRCH;
+ if (cookie != NULL) {
+ ta = fr_findtunebycookie(cookie, NULL);
+ if (ta != NULL)
+ error = 0;
+ } else if (tu.ipft_name[0] != '\0') {
+ ta = fr_findtunebyname(tu.ipft_name);
+ if (ta != NULL)
+ error = 0;
+ }
+ if (error != 0)
+ break;
+
+ if (cmd == (ioctlcmd_t)SIOCIPFGET) {
+ /*
+ * Fetch the tuning parameters for a particular value
+ */
+ tu.ipft_vlong = 0;
+ if (ta->ipft_sz == sizeof(u_long))
+ tu.ipft_vlong = *ta->ipft_plong;
+ else if (ta->ipft_sz == sizeof(u_int))
+ tu.ipft_vint = *ta->ipft_pint;
+ else if (ta->ipft_sz == sizeof(u_short))
+ tu.ipft_vshort = *ta->ipft_pshort;
+ else if (ta->ipft_sz == sizeof(u_char))
+ tu.ipft_vchar = *ta->ipft_pchar;
+ tu.ipft_cookie = ta;
+ tu.ipft_sz = ta->ipft_sz;
+ tu.ipft_min = ta->ipft_min;
+ tu.ipft_max = ta->ipft_max;
+ tu.ipft_flags = ta->ipft_flags;
+ error = fr_outobj(data, &tu, IPFOBJ_TUNEABLE);
+
+ } else if (cmd == (ioctlcmd_t)SIOCIPFSET) {
+ /*
+ * Set an internal parameter. The hard part here is
+ * getting the new value safely and correctly out of
+ * the kernel (given we only know its size, not type.)
+ */
+ u_long in;
+
+ if (((ta->ipft_flags & IPFT_WRDISABLED) != 0) &&
+ (fr_running > 0)) {
+ error = EBUSY;
+ break;
+ }
+
+ in = tu.ipft_vlong;
+ if (in < ta->ipft_min || in > ta->ipft_max) {
+ error = EINVAL;
+ break;
+ }
+
+ if (ta->ipft_sz == sizeof(u_long)) {
+ tu.ipft_vlong = *ta->ipft_plong;
+ *ta->ipft_plong = in;
+ } else if (ta->ipft_sz == sizeof(u_int)) {
+ tu.ipft_vint = *ta->ipft_pint;
+ *ta->ipft_pint = (u_int)(in & 0xffffffff);
+ } else if (ta->ipft_sz == sizeof(u_short)) {
+ tu.ipft_vshort = *ta->ipft_pshort;
+ *ta->ipft_pshort = (u_short)(in & 0xffff);
+ } else if (ta->ipft_sz == sizeof(u_char)) {
+ tu.ipft_vchar = *ta->ipft_pchar;
+ *ta->ipft_pchar = (u_char)(in & 0xff);
+ }
+ error = fr_outobj(data, &tu, IPFOBJ_TUNEABLE);
+ }
+ break;
+
+ default :
+ error = EINVAL;
+ break;
+ }
+
+ return error;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_initialise */
+/* Returns: int - 0 == success, < 0 == failure */
+/* Parameters: None. */
+/* */
+/* Call of the initialise functions for all the various subsystems inside */
+/* of IPFilter. If any of them should fail, return immeadiately a failure */
+/* BUT do not try to recover from the error here. */
+/* ------------------------------------------------------------------------ */
+int fr_initialise()
+{
+ int i;
+
+#ifdef IPFILTER_LOG
+ i = fr_loginit();
+ if (i < 0)
+ return -10 + i;
+#endif
+ i = fr_natinit();
+ if (i < 0)
+ return -20 + i;
+
+ i = fr_stateinit();
+ if (i < 0)
+ return -30 + i;
+
+ i = fr_authinit();
+ if (i < 0)
+ return -40 + i;
+
+ i = fr_fraginit();
+ if (i < 0)
+ return -50 + i;
+
+ i = appr_init();
+ if (i < 0)
+ return -60 + i;
+
+#ifdef IPFILTER_SYNC
+ i = ipfsync_init();
+ if (i < 0)
+ return -70 + i;
+#endif
+#ifdef IPFILTER_SCAN
+ i = ipsc_init();
+ if (i < 0)
+ return -80 + i;
+#endif
+#ifdef IPFILTER_LOOKUP
+ i = ip_lookup_init();
+ if (i < 0)
+ return -90 + i;
+#endif
+#ifdef IPFILTER_COMPILED
+ ipfrule_add();
+#endif
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_deinitialise */
+/* Returns: None. */
+/* Parameters: None. */
+/* */
+/* Call all the various subsystem cleanup routines to deallocate memory or */
+/* destroy locks or whatever they've done that they need to now undo. */
+/* The order here IS important as there are some cross references of */
+/* internal data structures. */
+/* ------------------------------------------------------------------------ */
+void fr_deinitialise()
+{
+ fr_fragunload();
+ fr_authunload();
+ fr_natunload();
+ fr_stateunload();
+#ifdef IPFILTER_SCAN
+ fr_scanunload();
+#endif
+ appr_unload();
+
+#ifdef IPFILTER_COMPILED
+ ipfrule_remove();
+#endif
+
+ (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE);
+ (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE);
+ (void) frflush(IPL_LOGCOUNT, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE);
+ (void) frflush(IPL_LOGCOUNT, 0, FR_INQUE|FR_OUTQUE);
+
+#ifdef IPFILTER_LOOKUP
+ ip_lookup_unload();
+#endif
+
+#ifdef IPFILTER_LOG
+ fr_logunload();
+#endif
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_zerostats */
+/* Returns: int - 0 = success, else failure */
+/* Parameters: data(O) - pointer to pointer for copying data back to */
+/* */
+/* Copies the current statistics out to userspace and then zero's the */
+/* current ones in the kernel. The lock is only held across the bzero() as */
+/* the copyout may result in paging (ie network activity.) */
+/* ------------------------------------------------------------------------ */
+int fr_zerostats(data)
+caddr_t data;
+{
+ friostat_t fio;
+ int error;
+
+ fr_getstat(&fio);
+ error = copyoutptr(&fio, data, sizeof(fio));
+ if (error)
+ return EFAULT;
+
+ WRITE_ENTER(&ipf_mutex);
+ bzero((char *)frstats, sizeof(*frstats) * 2);
+ RWLOCK_EXIT(&ipf_mutex);
+
+ return 0;
+}
+
+
+#ifdef _KERNEL
+/* ------------------------------------------------------------------------ */
+/* Function: fr_resolvedest */
+/* Returns: Nil */
+/* Parameters: fdp(IO) - pointer to destination information to resolve */
+/* v(I) - IP protocol version to match */
+/* */
+/* Looks up an interface name in the frdest structure pointed to by fdp and */
+/* if a matching name can be found for the particular IP protocol version */
+/* then store the interface pointer in the frdest struct. If no match is */
+/* found, then set the interface pointer to be -1 as NULL is considered to */
+/* indicate there is no information at all in the structure. */
+/* ------------------------------------------------------------------------ */
+void fr_resolvedest(fdp, v)
+frdest_t *fdp;
+int v;
+{
+ void *ifp;
+
+ ifp = NULL;
+ v = v; /* LINT */
+
+ if (*fdp->fd_ifname != '\0') {
+ ifp = GETIFP(fdp->fd_ifname, v);
+ if (ifp == NULL)
+ ifp = (void *)-1;
+ }
+ fdp->fd_ifp = ifp;
+}
+#endif /* _KERNEL */
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_resolvenic */
+/* Returns: void* - NULL = wildcard name, -1 = failed to find NIC, else */
+/* pointer to interface structure for NIC */
+/* Parameters: name(I) - complete interface name */
+/* v(I) - IP protocol version */
+/* */
+/* Look for a network interface structure that firstly has a matching name */
+/* to that passed in and that is also being used for that IP protocol */
+/* version (necessary on some platforms where there are separate listings */
+/* for both IPv4 and IPv6 on the same physical NIC. */
+/* */
+/* One might wonder why name gets terminated with a \0 byte in here. The */
+/* reason is an interface name could get into the kernel structures of ipf */
+/* in any number of ways and so long as they all use the same sized array */
+/* to put the name in, it makes sense to ensure it gets null terminated */
+/* before it is used for its intended purpose - finding its match in the */
+/* kernel's list of configured interfaces. */
+/* */
+/* NOTE: This SHOULD ONLY be used with IPFilter structures that have an */
+/* array for the name that is LIFNAMSIZ bytes (at least) in length. */
+/* ------------------------------------------------------------------------ */
+void *fr_resolvenic(name, v)
+char *name;
+int v;
+{
+ void *nic;
+
+ if (name[0] == '\0')
+ return NULL;
+
+ if ((name[1] == '\0') && ((name[0] == '-') || (name[0] == '*'))) {
+ return NULL;
+ }
+
+ name[LIFNAMSIZ - 1] = '\0';
+
+ nic = GETIFP(name, v);
+ if (nic == NULL)
+ nic = (void *)-1;
+ return nic;
+}
diff --git a/usr/src/uts/common/inet/ipf/ip_auth.c b/usr/src/uts/common/inet/ipf/ip_auth.c
new file mode 100644
index 0000000000..b6f0844354
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/ip_auth.c
@@ -0,0 +1,796 @@
+/*
+ * Copyright (C) 1998-2003 by Darren Reed & Guido van Rooij.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ */
+#if defined(KERNEL) || defined(_KERNEL)
+# undef KERNEL
+# undef _KERNEL
+# define KERNEL 1
+# define _KERNEL 1
+#endif
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/file.h>
+#if !defined(_KERNEL)
+# include <stdio.h>
+# include <stdlib.h>
+# include <string.h>
+# define _KERNEL
+# ifdef __OpenBSD__
+struct file;
+# endif
+# include <sys/uio.h>
+# undef _KERNEL
+#endif
+#if defined(_KERNEL) && (__FreeBSD_version >= 220000)
+# include <sys/filio.h>
+# include <sys/fcntl.h>
+#else
+# include <sys/ioctl.h>
+#endif
+#if !defined(linux)
+# include <sys/protosw.h>
+#endif
+#include <sys/socket.h>
+#if defined(_KERNEL)
+# include <sys/systm.h>
+# if !defined(__SVR4) && !defined(__svr4__) && !defined(linux)
+# include <sys/mbuf.h>
+# endif
+#endif
+#if defined(__SVR4) || defined(__svr4__)
+# include <sys/filio.h>
+# include <sys/byteorder.h>
+# ifdef _KERNEL
+# include <sys/dditypes.h>
+# endif
+# include <sys/stream.h>
+# include <sys/kmem.h>
+#endif
+#if (_BSDI_VERSION >= 199802) || (__FreeBSD_version >= 400000)
+# include <sys/queue.h>
+#endif
+#if defined(__NetBSD__) || defined(__OpenBSD__) || defined(bsdi)
+# include <machine/cpu.h>
+#endif
+#if defined(_KERNEL) && defined(__NetBSD__) && (__NetBSD_Version__ >= 104000000)
+# include <sys/proc.h>
+#endif
+#include <net/if.h>
+#ifdef sun
+# include <net/af.h>
+#endif
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#if !defined(_KERNEL) && !defined(__osf__) && !defined(__sgi)
+# define KERNEL
+# define _KERNEL
+# define NOT_KERNEL
+#endif
+#if !defined(linux)
+# include <netinet/ip_var.h>
+#endif
+#ifdef NOT_KERNEL
+# undef _KERNEL
+# undef KERNEL
+#endif
+#include <netinet/tcp.h>
+#if defined(IRIX) && (IRIX < 60516) /* IRIX < 6 */
+extern struct ifqueue ipintrq; /* ip packet input queue */
+#else
+# if !defined(__hpux) && !defined(linux)
+# if __FreeBSD_version >= 300000
+# include <net/if_var.h>
+# if __FreeBSD_version >= 500042
+# define IF_QFULL _IF_QFULL
+# define IF_DROP _IF_DROP
+# endif /* __FreeBSD_version >= 500042 */
+# endif
+# include <netinet/in_var.h>
+# include <netinet/tcp_fsm.h>
+# endif
+#endif
+#include <netinet/udp.h>
+#include <netinet/ip_icmp.h>
+#include "netinet/ip_compat.h"
+#include <netinet/tcpip.h>
+#include "netinet/ip_fil.h"
+#include "netinet/ip_auth.h"
+#if !defined(MENTAT) && !defined(linux)
+# include <net/netisr.h>
+# ifdef __FreeBSD__
+# include <machine/cpufunc.h>
+# endif
+#endif
+#if (__FreeBSD_version >= 300000)
+# include <sys/malloc.h>
+# if defined(_KERNEL) && !defined(IPFILTER_LKM)
+# include <sys/libkern.h>
+# include <sys/systm.h>
+# endif
+#endif
+/* END OF INCLUDES */
+
+#if !defined(lint)
+static const char rcsid[] = "@(#)$Id: ip_auth.c,v 2.73.2.5 2005/06/12 07:18:14 darrenr Exp $";
+#endif
+
+
+#if SOLARIS
+extern kcondvar_t ipfauthwait;
+#endif /* SOLARIS */
+#if defined(linux) && defined(_KERNEL)
+wait_queue_head_t fr_authnext_linux;
+#endif
+
+int fr_authsize = FR_NUMAUTH;
+int fr_authused = 0;
+int fr_defaultauthage = 600;
+int fr_auth_lock = 0;
+int fr_auth_init = 0;
+fr_authstat_t fr_authstats;
+static frauth_t *fr_auth = NULL;
+mb_t **fr_authpkts = NULL;
+int fr_authstart = 0, fr_authend = 0, fr_authnext = 0;
+frauthent_t *fae_list = NULL;
+frentry_t *ipauth = NULL,
+ *fr_authlist = NULL;
+
+
+int fr_authinit()
+{
+ KMALLOCS(fr_auth, frauth_t *, fr_authsize * sizeof(*fr_auth));
+ if (fr_auth != NULL)
+ bzero((char *)fr_auth, fr_authsize * sizeof(*fr_auth));
+ else
+ return -1;
+
+ KMALLOCS(fr_authpkts, mb_t **, fr_authsize * sizeof(*fr_authpkts));
+ if (fr_authpkts != NULL)
+ bzero((char *)fr_authpkts, fr_authsize * sizeof(*fr_authpkts));
+ else
+ return -2;
+
+ MUTEX_INIT(&ipf_authmx, "ipf auth log mutex");
+ RWLOCK_INIT(&ipf_auth, "ipf IP User-Auth rwlock");
+#if SOLARIS && defined(_KERNEL)
+ cv_init(&ipfauthwait, "ipf auth condvar", CV_DRIVER, NULL);
+#endif
+#if defined(linux) && defined(_KERNEL)
+ init_waitqueue_head(&fr_authnext_linux);
+#endif
+
+ fr_auth_init = 1;
+
+ return 0;
+}
+
+
+/*
+ * Check if a packet has authorization. If the packet is found to match an
+ * authorization result and that would result in a feedback loop (i.e. it
+ * will end up returning FR_AUTH) then return FR_BLOCK instead.
+ */
+frentry_t *fr_checkauth(fin, passp)
+fr_info_t *fin;
+u_32_t *passp;
+{
+ frentry_t *fr;
+ frauth_t *fra;
+ u_32_t pass;
+ u_short id;
+ ip_t *ip;
+ int i;
+
+ if (fr_auth_lock || !fr_authused)
+ return NULL;
+
+ ip = fin->fin_ip;
+ id = ip->ip_id;
+
+ READ_ENTER(&ipf_auth);
+ for (i = fr_authstart; i != fr_authend; ) {
+ /*
+ * index becomes -2 only after an SIOCAUTHW. Check this in
+ * case the same packet gets sent again and it hasn't yet been
+ * auth'd.
+ */
+ fra = fr_auth + i;
+ if ((fra->fra_index == -2) && (id == fra->fra_info.fin_id) &&
+ !bcmp((char *)fin, (char *)&fra->fra_info, FI_CSIZE)) {
+ /*
+ * Avoid feedback loop.
+ */
+ if (!(pass = fra->fra_pass) || (FR_ISAUTH(pass)))
+ pass = FR_BLOCK;
+ /*
+ * Create a dummy rule for the stateful checking to
+ * use and return. Zero out any values we don't
+ * trust from userland!
+ */
+ if ((pass & FR_KEEPSTATE) || ((pass & FR_KEEPFRAG) &&
+ (fin->fin_flx & FI_FRAG))) {
+ KMALLOC(fr, frentry_t *);
+ if (fr) {
+ bcopy((char *)fra->fra_info.fin_fr,
+ (char *)fr, sizeof(*fr));
+ fr->fr_grp = NULL;
+ fr->fr_ifa = fin->fin_ifp;
+ fr->fr_func = NULL;
+ fr->fr_ref = 1;
+ fr->fr_flags = pass;
+ fr->fr_ifas[1] = NULL;
+ fr->fr_ifas[2] = NULL;
+ fr->fr_ifas[3] = NULL;
+ }
+ } else
+ fr = fra->fra_info.fin_fr;
+ fin->fin_fr = fr;
+ RWLOCK_EXIT(&ipf_auth);
+ WRITE_ENTER(&ipf_auth);
+ if ((fr != NULL) && (fr != fra->fra_info.fin_fr)) {
+ fr->fr_next = fr_authlist;
+ fr_authlist = fr;
+ }
+ fr_authstats.fas_hits++;
+ fra->fra_index = -1;
+ fr_authused--;
+ if (i == fr_authstart) {
+ while (fra->fra_index == -1) {
+ i++;
+ fra++;
+ if (i == fr_authsize) {
+ i = 0;
+ fra = fr_auth;
+ }
+ fr_authstart = i;
+ if (i == fr_authend)
+ break;
+ }
+ if (fr_authstart == fr_authend) {
+ fr_authnext = 0;
+ fr_authstart = fr_authend = 0;
+ }
+ }
+ RWLOCK_EXIT(&ipf_auth);
+ if (passp != NULL)
+ *passp = pass;
+ ATOMIC_INC64(fr_authstats.fas_hits);
+ return fr;
+ }
+ i++;
+ if (i == fr_authsize)
+ i = 0;
+ }
+ fr_authstats.fas_miss++;
+ RWLOCK_EXIT(&ipf_auth);
+ ATOMIC_INC64(fr_authstats.fas_miss);
+ return NULL;
+}
+
+
+/*
+ * Check if we have room in the auth array to hold details for another packet.
+ * If we do, store it and wake up any user programs which are waiting to
+ * hear about these events.
+ */
+int fr_newauth(m, fin)
+mb_t *m;
+fr_info_t *fin;
+{
+#if defined(_KERNEL) && defined(MENTAT)
+ qpktinfo_t *qpi = fin->fin_qpi;
+#endif
+ frauth_t *fra;
+#if !defined(sparc) && !defined(m68k)
+ ip_t *ip;
+#endif
+ int i;
+
+ if (fr_auth_lock)
+ return 0;
+
+ WRITE_ENTER(&ipf_auth);
+ if (fr_authstart > fr_authend) {
+ fr_authstats.fas_nospace++;
+ RWLOCK_EXIT(&ipf_auth);
+ return 0;
+ } else {
+ if (fr_authused == fr_authsize) {
+ fr_authstats.fas_nospace++;
+ RWLOCK_EXIT(&ipf_auth);
+ return 0;
+ }
+ }
+
+ fr_authstats.fas_added++;
+ fr_authused++;
+ i = fr_authend++;
+ if (fr_authend == fr_authsize)
+ fr_authend = 0;
+ RWLOCK_EXIT(&ipf_auth);
+
+ fra = fr_auth + i;
+ fra->fra_index = i;
+ fra->fra_pass = 0;
+ fra->fra_age = fr_defaultauthage;
+ bcopy((char *)fin, (char *)&fra->fra_info, sizeof(*fin));
+#if !defined(sparc) && !defined(m68k)
+ /*
+ * No need to copyback here as we want to undo the changes, not keep
+ * them.
+ */
+ ip = fin->fin_ip;
+# if defined(MENTAT) && defined(_KERNEL)
+ if ((ip == (ip_t *)m->b_rptr) && (fin->fin_v == 4))
+# endif
+ {
+ register u_short bo;
+
+ bo = ip->ip_len;
+ ip->ip_len = htons(bo);
+ bo = ip->ip_off;
+ ip->ip_off = htons(bo);
+ }
+#endif
+#if SOLARIS && defined(_KERNEL)
+ m->b_rptr -= qpi->qpi_off;
+ fr_authpkts[i] = *(mblk_t **)fin->fin_mp;
+ fra->fra_q = qpi->qpi_q; /* The queue can disappear! */
+ cv_signal(&ipfauthwait);
+#else
+# if defined(BSD) && !defined(sparc) && (BSD >= 199306)
+ if (!fin->fin_out) {
+ ip->ip_len = htons(ip->ip_len);
+ ip->ip_off = htons(ip->ip_off);
+ }
+# endif
+ fr_authpkts[i] = m;
+ WAKEUP(&fr_authnext,0);
+#endif
+ return 1;
+}
+
+
+int fr_auth_ioctl(data, cmd, mode)
+caddr_t data;
+ioctlcmd_t cmd;
+int mode;
+{
+ mb_t *m;
+#if defined(_KERNEL) && !defined(MENTAT) && !defined(linux) && \
+ (!defined(__FreeBSD_version) || (__FreeBSD_version < 501000))
+ struct ifqueue *ifq;
+ SPL_INT(s);
+#endif
+ frauth_t auth, *au = &auth, *fra;
+ int i, error = 0, len;
+ char *t;
+
+ switch (cmd)
+ {
+ case SIOCSTLCK :
+ if (!(mode & FWRITE)) {
+ error = EPERM;
+ break;
+ }
+ fr_lock(data, &fr_auth_lock);
+ break;
+
+ case SIOCATHST:
+ fr_authstats.fas_faelist = fae_list;
+ error = fr_outobj(data, &fr_authstats, IPFOBJ_AUTHSTAT);
+ break;
+
+ case SIOCIPFFL:
+ SPL_NET(s);
+ WRITE_ENTER(&ipf_auth);
+ i = fr_authflush();
+ RWLOCK_EXIT(&ipf_auth);
+ SPL_X(s);
+ error = copyoutptr((char *)&i, data, sizeof(i));
+ break;
+
+ case SIOCAUTHW:
+fr_authioctlloop:
+ error = fr_inobj(data, au, IPFOBJ_FRAUTH);
+ READ_ENTER(&ipf_auth);
+ if ((fr_authnext != fr_authend) && fr_authpkts[fr_authnext]) {
+ error = fr_outobj(data, &fr_auth[fr_authnext],
+ IPFOBJ_FRAUTH);
+ if (auth.fra_len != 0 && auth.fra_buf != NULL) {
+ /*
+ * Copy packet contents out to user space if
+ * requested. Bail on an error.
+ */
+ m = fr_authpkts[fr_authnext];
+ len = MSGDSIZE(m);
+ if (len > auth.fra_len)
+ len = auth.fra_len;
+ auth.fra_len = len;
+ for (t = auth.fra_buf; m && (len > 0); ) {
+ i = MIN(M_LEN(m), len);
+ error = copyoutptr(MTOD(m, char *),
+ t, i);
+ len -= i;
+ t += i;
+ if (error != 0)
+ break;
+ }
+ }
+ RWLOCK_EXIT(&ipf_auth);
+ if (error != 0)
+ break;
+ SPL_NET(s);
+ WRITE_ENTER(&ipf_auth);
+ fr_authnext++;
+ if (fr_authnext == fr_authsize)
+ fr_authnext = 0;
+ RWLOCK_EXIT(&ipf_auth);
+ SPL_X(s);
+ return 0;
+ }
+ RWLOCK_EXIT(&ipf_auth);
+ /*
+ * We exit ipf_global here because a program that enters in
+ * here will have a lock on it and goto sleep having this lock.
+ * If someone were to do an 'ipf -D' the system would then
+ * deadlock. The catch with releasing it here is that the
+ * caller of this function expects it to be held when we
+ * return so we have to reacquire it in here.
+ */
+ RWLOCK_EXIT(&ipf_global);
+
+ MUTEX_ENTER(&ipf_authmx);
+#ifdef _KERNEL
+# if SOLARIS
+ error = 0;
+ if (!cv_wait_sig(&ipfauthwait, &ipf_authmx.ipf_lk))
+ error = EINTR;
+# else /* SOLARIS */
+# ifdef __hpux
+ {
+ lock_t *l;
+
+ l = get_sleep_lock(&fr_authnext);
+ error = sleep(&fr_authnext, PZERO+1);
+ spinunlock(l);
+ }
+# else
+# ifdef __osf__
+ error = mpsleep(&fr_authnext, PSUSP|PCATCH, "fr_authnext", 0,
+ &ipf_authmx, MS_LOCK_SIMPLE);
+# else
+ error = SLEEP(&fr_authnext, "fr_authnext");
+# endif /* __osf__ */
+# endif /* __hpux */
+# endif /* SOLARIS */
+#endif
+ MUTEX_EXIT(&ipf_authmx);
+ READ_ENTER(&ipf_global);
+ if (error == 0) {
+ READ_ENTER(&ipf_auth);
+ goto fr_authioctlloop;
+ }
+ break;
+
+ case SIOCAUTHR:
+ error = fr_inobj(data, &auth, IPFOBJ_FRAUTH);
+ if (error != 0)
+ return error;
+ SPL_NET(s);
+ WRITE_ENTER(&ipf_auth);
+ i = au->fra_index;
+ fra = fr_auth + i;
+ if ((i < 0) || (i >= fr_authsize) ||
+ (fra->fra_info.fin_id != au->fra_info.fin_id)) {
+ RWLOCK_EXIT(&ipf_auth);
+ SPL_X(s);
+ return ESRCH;
+ }
+ m = fr_authpkts[i];
+ fra->fra_index = -2;
+ fra->fra_pass = au->fra_pass;
+ fr_authpkts[i] = NULL;
+ RWLOCK_EXIT(&ipf_auth);
+#ifdef _KERNEL
+ if ((m != NULL) && (au->fra_info.fin_out != 0)) {
+# ifdef MENTAT
+ error = !putq(fra->fra_q, m);
+# else /* MENTAT */
+# if defined(linux) || defined(AIX)
+# else
+# if (_BSDI_VERSION >= 199802) || defined(__OpenBSD__) || \
+ (defined(__sgi) && (IRIX >= 60500) || defined(AIX) || \
+ (defined(__FreeBSD__) && (__FreeBSD_version >= 470102)))
+ error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL,
+ NULL);
+# else
+ error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL);
+# endif
+# endif /* Linux */
+# endif /* MENTAT */
+ if (error != 0)
+ fr_authstats.fas_sendfail++;
+ else
+ fr_authstats.fas_sendok++;
+ } else if (m) {
+# ifdef MENTAT
+ error = !putq(fra->fra_q, m);
+# else /* MENTAT */
+# if defined(linux) || defined(AIX)
+# else
+# if (__FreeBSD_version >= 501000)
+ netisr_dispatch(NETISR_IP, m);
+# else
+# if (IRIX >= 60516)
+ ifq = &((struct ifnet *)fra->fra_info.fin_ifp)->if_snd;
+# else
+ ifq = &ipintrq;
+# endif
+ if (IF_QFULL(ifq)) {
+ IF_DROP(ifq);
+ FREE_MB_T(m);
+ error = ENOBUFS;
+ } else {
+ IF_ENQUEUE(ifq, m);
+# if IRIX < 60500
+ schednetisr(NETISR_IP);
+# endif
+ }
+# endif
+# endif /* Linux */
+# endif /* MENTAT */
+ if (error != 0)
+ fr_authstats.fas_quefail++;
+ else
+ fr_authstats.fas_queok++;
+ } else
+ error = EINVAL;
+# ifdef MENTAT
+ if (error != 0)
+ error = EINVAL;
+# else /* MENTAT */
+ /*
+ * If we experience an error which will result in the packet
+ * not being processed, make sure we advance to the next one.
+ */
+ if (error == ENOBUFS) {
+ fr_authused--;
+ fra->fra_index = -1;
+ fra->fra_pass = 0;
+ if (i == fr_authstart) {
+ while (fra->fra_index == -1) {
+ i++;
+ if (i == fr_authsize)
+ i = 0;
+ fr_authstart = i;
+ if (i == fr_authend)
+ break;
+ }
+ if (fr_authstart == fr_authend) {
+ fr_authnext = 0;
+ fr_authstart = fr_authend = 0;
+ }
+ }
+ }
+# endif /* MENTAT */
+#endif /* _KERNEL */
+ SPL_X(s);
+ break;
+
+ default :
+ error = EINVAL;
+ break;
+ }
+ return error;
+}
+
+
+/*
+ * Free all network buffer memory used to keep saved packets.
+ */
+void fr_authunload()
+{
+ register int i;
+ register frauthent_t *fae, **faep;
+ frentry_t *fr, **frp;
+ mb_t *m;
+
+ if (fr_auth != NULL) {
+ KFREES(fr_auth, fr_authsize * sizeof(*fr_auth));
+ fr_auth = NULL;
+ }
+
+ if (fr_authpkts != NULL) {
+ for (i = 0; i < fr_authsize; i++) {
+ m = fr_authpkts[i];
+ if (m != NULL) {
+ FREE_MB_T(m);
+ fr_authpkts[i] = NULL;
+ }
+ }
+ KFREES(fr_authpkts, fr_authsize * sizeof(*fr_authpkts));
+ fr_authpkts = NULL;
+ }
+
+ faep = &fae_list;
+ while ((fae = *faep) != NULL) {
+ *faep = fae->fae_next;
+ KFREE(fae);
+ }
+ ipauth = NULL;
+
+ if (fr_authlist != NULL) {
+ for (frp = &fr_authlist; ((fr = *frp) != NULL); ) {
+ if (fr->fr_ref == 1) {
+ *frp = fr->fr_next;
+ KFREE(fr);
+ } else
+ frp = &fr->fr_next;
+ }
+ }
+
+ if (fr_auth_init == 1) {
+# if SOLARIS && defined(_KERNEL)
+ cv_destroy(&ipfauthwait);
+# endif
+ MUTEX_DESTROY(&ipf_authmx);
+ RW_DESTROY(&ipf_auth);
+
+ fr_auth_init = 0;
+ }
+}
+
+
+/*
+ * Slowly expire held auth records. Timeouts are set
+ * in expectation of this being called twice per second.
+ */
+void fr_authexpire()
+{
+ register int i;
+ register frauth_t *fra;
+ register frauthent_t *fae, **faep;
+ register frentry_t *fr, **frp;
+ mb_t *m;
+ SPL_INT(s);
+
+ if (fr_auth_lock)
+ return;
+
+ SPL_NET(s);
+ WRITE_ENTER(&ipf_auth);
+ for (i = 0, fra = fr_auth; i < fr_authsize; i++, fra++) {
+ fra->fra_age--;
+ if ((fra->fra_age == 0) && (m = fr_authpkts[i])) {
+ FREE_MB_T(m);
+ fr_authpkts[i] = NULL;
+ fr_auth[i].fra_index = -1;
+ fr_authstats.fas_expire++;
+ fr_authused--;
+ }
+ }
+
+ for (faep = &fae_list; ((fae = *faep) != NULL); ) {
+ fae->fae_age--;
+ if (fae->fae_age == 0) {
+ *faep = fae->fae_next;
+ KFREE(fae);
+ fr_authstats.fas_expire++;
+ } else
+ faep = &fae->fae_next;
+ }
+ if (fae_list != NULL)
+ ipauth = &fae_list->fae_fr;
+ else
+ ipauth = NULL;
+
+ for (frp = &fr_authlist; ((fr = *frp) != NULL); ) {
+ if (fr->fr_ref == 1) {
+ *frp = fr->fr_next;
+ KFREE(fr);
+ } else
+ frp = &fr->fr_next;
+ }
+ RWLOCK_EXIT(&ipf_auth);
+ SPL_X(s);
+}
+
+int fr_preauthcmd(cmd, fr, frptr)
+ioctlcmd_t cmd;
+frentry_t *fr, **frptr;
+{
+ frauthent_t *fae, **faep;
+ int error = 0;
+ SPL_INT(s);
+
+ if ((cmd != SIOCADAFR) && (cmd != SIOCRMAFR))
+ return EIO;
+
+ for (faep = &fae_list; ((fae = *faep) != NULL); ) {
+ if (&fae->fae_fr == fr)
+ break;
+ else
+ faep = &fae->fae_next;
+ }
+
+ if (cmd == (ioctlcmd_t)SIOCRMAFR) {
+ if (fr == NULL || frptr == NULL)
+ error = EINVAL;
+ else if (fae == NULL)
+ error = ESRCH;
+ else {
+ SPL_NET(s);
+ WRITE_ENTER(&ipf_auth);
+ *faep = fae->fae_next;
+ if (ipauth == &fae->fae_fr)
+ ipauth = fae_list ? &fae_list->fae_fr : NULL;
+ RWLOCK_EXIT(&ipf_auth);
+ SPL_X(s);
+
+ KFREE(fae);
+ }
+ } else if (fr != NULL && frptr != NULL) {
+ KMALLOC(fae, frauthent_t *);
+ if (fae != NULL) {
+ bcopy((char *)fr, (char *)&fae->fae_fr,
+ sizeof(*fr));
+ SPL_NET(s);
+ WRITE_ENTER(&ipf_auth);
+ fae->fae_age = fr_defaultauthage;
+ fae->fae_fr.fr_hits = 0;
+ fae->fae_fr.fr_next = *frptr;
+ *frptr = &fae->fae_fr;
+ fae->fae_next = *faep;
+ *faep = fae;
+ ipauth = &fae_list->fae_fr;
+ RWLOCK_EXIT(&ipf_auth);
+ SPL_X(s);
+ } else
+ error = ENOMEM;
+ } else
+ error = EINVAL;
+ return error;
+}
+
+
+/*
+ * Flush held packets.
+ * Must already be properly SPL'ed and Locked on &ipf_auth.
+ *
+ */
+int fr_authflush()
+{
+ register int i, num_flushed;
+ mb_t *m;
+
+ if (fr_auth_lock)
+ return -1;
+
+ num_flushed = 0;
+
+ for (i = 0 ; i < fr_authsize; i++) {
+ m = fr_authpkts[i];
+ if (m != NULL) {
+ FREE_MB_T(m);
+ fr_authpkts[i] = NULL;
+ fr_auth[i].fra_index = -1;
+ /* perhaps add & use a flush counter inst.*/
+ fr_authstats.fas_expire++;
+ fr_authused--;
+ num_flushed++;
+ }
+ }
+
+ fr_authstart = 0;
+ fr_authend = 0;
+ fr_authnext = 0;
+
+ return num_flushed;
+}
diff --git a/usr/src/uts/common/inet/ipf/ip_fil_solaris.c b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c
new file mode 100644
index 0000000000..287cca861e
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c
@@ -0,0 +1,1612 @@
+/*
+ * Copyright (C) 1993-2001, 2003 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#if !defined(lint)
+static const char sccsid[] = "%W% %G% (C) 1993-2000 Darren Reed";
+static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $";
+#endif
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h>
+#include <sys/cpuvar.h>
+#include <sys/open.h>
+#include <sys/ioctl.h>
+#include <sys/filio.h>
+#include <sys/systm.h>
+#include <sys/strsubr.h>
+#include <sys/cred.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/ksynch.h>
+#include <sys/kmem.h>
+#include <sys/mkdev.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/dditypes.h>
+#include <sys/cmn_err.h>
+#include <net/if.h>
+#include <net/af.h>
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/tcpip.h>
+#include <netinet/ip_icmp.h>
+#include "netinet/ip_compat.h"
+#ifdef USE_INET6
+# include <netinet/icmp6.h>
+#endif
+#include "netinet/ip_fil.h"
+#include "netinet/ip_nat.h"
+#include "netinet/ip_frag.h"
+#include "netinet/ip_state.h"
+#include "netinet/ip_auth.h"
+#include "netinet/ip_proxy.h"
+#ifdef IPFILTER_LOOKUP
+# include "netinet/ip_lookup.h"
+#endif
+#include <inet/ip_ire.h>
+
+#include <sys/md5.h>
+
+extern int fr_flags, fr_active;
+#if SOLARIS2 >= 7
+timeout_id_t fr_timer_id;
+#else
+int fr_timer_id;
+#endif
+
+
+static int fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp));
+
+ipfmutex_t ipl_mutex, ipf_authmx, ipf_rw, ipf_stinsert;
+ipfmutex_t ipf_nat_new, ipf_natio, ipf_timeoutlock;
+ipfrwlock_t ipf_mutex, ipf_global, ipf_ipidfrag, ipf_frcache;
+ipfrwlock_t ipf_frag, ipf_state, ipf_nat, ipf_natfrag, ipf_auth;
+kcondvar_t iplwait, ipfauthwait;
+#if SOLARIS2 < 10
+#if SOLARIS2 >= 7
+timeout_id_t fr_timer_id;
+u_int *ip_ttl_ptr = NULL;
+u_int *ip_mtudisc = NULL;
+# if SOLARIS2 >= 8
+int *ip_forwarding = NULL;
+u_int *ip6_forwarding = NULL;
+# else
+u_int *ip_forwarding = NULL;
+# endif
+#else
+int fr_timer_id;
+u_long *ip_ttl_ptr = NULL;
+u_long *ip_mtudisc = NULL;
+u_long *ip_forwarding = NULL;
+#endif
+#endif
+int ipf_locks_done = 0;
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: ipldetach */
+/* Returns: int - 0 == success, else error. */
+/* Parameters: Nil */
+/* */
+/* This function is responsible for undoing anything that might have been */
+/* done in a call to iplattach(). It must be able to clean up from a call */
+/* to iplattach() that did not succeed. Why might that happen? Someone */
+/* configures a table to be so large that we cannot allocate enough memory */
+/* for it. */
+/* ------------------------------------------------------------------------ */
+int ipldetach()
+{
+
+ ASSERT(rw_read_locked(&ipf_global.ipf_lk) == 0);
+
+#if SOLARIS2 < 10
+
+ if (fr_control_forwarding & 2) {
+ if (ip_forwarding != NULL)
+ *ip_forwarding = 0;
+#if SOLARIS2 >= 8
+ if (ip6_forwarding != NULL)
+ *ip6_forwarding = 0;
+#endif
+ }
+#endif
+
+#ifdef IPFDEBUG
+ cmn_err(CE_CONT, "ipldetach()\n");
+#endif
+
+ fr_deinitialise();
+
+ (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE);
+ (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE);
+
+ if (ipf_locks_done == 1) {
+ MUTEX_DESTROY(&ipf_timeoutlock);
+ MUTEX_DESTROY(&ipf_rw);
+ RW_DESTROY(&ipf_ipidfrag);
+ ipf_locks_done = 0;
+ }
+ return 0;
+}
+
+
+int iplattach __P((void))
+{
+#if SOLARIS2 < 10
+ int i;
+#endif
+
+#ifdef IPFDEBUG
+ cmn_err(CE_CONT, "iplattach()\n");
+#endif
+
+ ASSERT(rw_read_locked(&ipf_global.ipf_lk) == 0);
+
+ bzero((char *)frcache, sizeof(frcache));
+ MUTEX_INIT(&ipf_rw, "ipf rw mutex");
+ MUTEX_INIT(&ipf_timeoutlock, "ipf timeout lock mutex");
+ RWLOCK_INIT(&ipf_ipidfrag, "ipf IP NAT-Frag rwlock");
+ ipf_locks_done = 1;
+
+ if (fr_initialise() < 0)
+ return -1;
+
+/* Do not use private interface ip_params_arr[] in Solaris 10 */
+#if SOLARIS2 < 10
+
+#if SOLARIS2 >= 8
+ ip_forwarding = &ip_g_forward;
+#endif
+ /*
+ * XXX - There is no terminator for this array, so it is not possible
+ * to tell if what we are looking for is missing and go off the end
+ * of the array.
+ */
+
+#if SOLARIS2 <= 8
+ for (i = 0; ; i++) {
+ if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) {
+ ip_ttl_ptr = &ip_param_arr[i].ip_param_value;
+ } else if (!strcmp(ip_param_arr[i].ip_param_name,
+ "ip_path_mtu_discovery")) {
+ ip_mtudisc = &ip_param_arr[i].ip_param_value;
+ }
+#if SOLARIS2 < 8
+ else if (!strcmp(ip_param_arr[i].ip_param_name,
+ "ip_forwarding")) {
+ ip_forwarding = &ip_param_arr[i].ip_param_value;
+ }
+#else
+ else if (!strcmp(ip_param_arr[i].ip_param_name,
+ "ip6_forwarding")) {
+ ip6_forwarding = &ip_param_arr[i].ip_param_value;
+ }
+#endif
+
+ if (ip_mtudisc != NULL && ip_ttl_ptr != NULL &&
+#if SOLARIS2 >= 8
+ ip6_forwarding != NULL &&
+#endif
+ ip_forwarding != NULL)
+ break;
+ }
+#endif
+
+ if (fr_control_forwarding & 1) {
+ if (ip_forwarding != NULL)
+ *ip_forwarding = 1;
+#if SOLARIS2 >= 8
+ if (ip6_forwarding != NULL)
+ *ip6_forwarding = 1;
+#endif
+ }
+
+#endif
+
+ return 0;
+}
+
+
+/*
+ * Filter ioctl interface.
+ */
+/*ARGSUSED*/
+int iplioctl(dev, cmd, data, mode, cp, rp)
+dev_t dev;
+int cmd;
+#if SOLARIS2 >= 7
+intptr_t data;
+#else
+int *data;
+#endif
+int mode;
+cred_t *cp;
+int *rp;
+{
+ int error = 0, tmp;
+ friostat_t fio;
+ minor_t unit;
+ u_int enable;
+
+#ifdef IPFDEBUG
+ cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
+ dev, cmd, data, mode, cp, rp);
+#endif
+ unit = getminor(dev);
+ if (IPL_LOGMAX < unit)
+ return ENXIO;
+
+ if (fr_running <= 0) {
+ if (unit != IPL_LOGIPF)
+ return EIO;
+ if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
+ cmd != SIOCIPFSET && cmd != SIOCFRENB &&
+ cmd != SIOCGETFS && cmd != SIOCGETFF)
+ return EIO;
+ }
+
+ READ_ENTER(&ipf_global);
+
+ error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode);
+ if (error != -1) {
+ RWLOCK_EXIT(&ipf_global);
+ return error;
+ }
+ error = 0;
+
+ switch (cmd)
+ {
+ case SIOCFRENB :
+ if (!(mode & FWRITE))
+ error = EPERM;
+ else {
+ error = COPYIN((caddr_t)data, (caddr_t)&enable,
+ sizeof(enable));
+ if (error != 0) {
+ error = EFAULT;
+ break;
+ }
+
+ RWLOCK_EXIT(&ipf_global);
+ WRITE_ENTER(&ipf_global);
+ if (enable) {
+ if (fr_running > 0)
+ error = 0;
+ else
+ error = iplattach();
+ if (error == 0)
+ fr_running = 1;
+ else
+ (void) ipldetach();
+ } else {
+ error = ipldetach();
+ if (error == 0)
+ fr_running = -1;
+ }
+ }
+ break;
+ case SIOCIPFSET :
+ if (!(mode & FWRITE)) {
+ error = EPERM;
+ break;
+ }
+ /* FALLTHRU */
+ case SIOCIPFGETNEXT :
+ case SIOCIPFGET :
+ error = fr_ipftune(cmd, (void *)data);
+ break;
+ case SIOCSETFF :
+ if (!(mode & FWRITE))
+ error = EPERM;
+ else {
+ error = COPYIN((caddr_t)data, (caddr_t)&fr_flags,
+ sizeof(fr_flags));
+ if (error != 0)
+ error = EFAULT;
+ }
+ break;
+ case SIOCGETFF :
+ error = COPYOUT((caddr_t)&fr_flags, (caddr_t)data,
+ sizeof(fr_flags));
+ if (error != 0)
+ error = EFAULT;
+ break;
+ case SIOCFUNCL :
+ error = fr_resolvefunc((void *)data);
+ break;
+ case SIOCINAFR :
+ case SIOCRMAFR :
+ case SIOCADAFR :
+ case SIOCZRLST :
+ if (!(mode & FWRITE))
+ error = EPERM;
+ else
+ error = frrequest(unit, cmd, (caddr_t)data,
+ fr_active, 1);
+ break;
+ case SIOCINIFR :
+ case SIOCRMIFR :
+ case SIOCADIFR :
+ if (!(mode & FWRITE))
+ error = EPERM;
+ else
+ error = frrequest(unit, cmd, (caddr_t)data,
+ 1 - fr_active, 1);
+ break;
+ case SIOCSWAPA :
+ if (!(mode & FWRITE))
+ error = EPERM;
+ else {
+ WRITE_ENTER(&ipf_mutex);
+ bzero((char *)frcache, sizeof(frcache[0]) * 2);
+ error = COPYOUT((caddr_t)&fr_active, (caddr_t)data,
+ sizeof(fr_active));
+ if (error != 0)
+ error = EFAULT;
+ else
+ fr_active = 1 - fr_active;
+ RWLOCK_EXIT(&ipf_mutex);
+ }
+ break;
+ case SIOCGETFS :
+ fr_getstat(&fio);
+ error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT);
+ break;
+ case SIOCFRZST :
+ if (!(mode & FWRITE))
+ error = EPERM;
+ else
+ error = fr_zerostats((caddr_t)data);
+ break;
+ case SIOCIPFFL :
+ if (!(mode & FWRITE))
+ error = EPERM;
+ else {
+ error = COPYIN((caddr_t)data, (caddr_t)&tmp,
+ sizeof(tmp));
+ if (!error) {
+ tmp = frflush(unit, 4, tmp);
+ error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
+ sizeof(tmp));
+ if (error != 0)
+ error = EFAULT;
+ } else
+ error = EFAULT;
+ }
+ break;
+#ifdef USE_INET6
+ case SIOCIPFL6 :
+ if (!(mode & FWRITE))
+ error = EPERM;
+ else {
+ error = COPYIN((caddr_t)data, (caddr_t)&tmp,
+ sizeof(tmp));
+ if (!error) {
+ tmp = frflush(unit, 6, tmp);
+ error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
+ sizeof(tmp));
+ if (error != 0)
+ error = EFAULT;
+ } else
+ error = EFAULT;
+ }
+ break;
+#endif
+ case SIOCSTLCK :
+ error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
+ if (error == 0) {
+ fr_state_lock = tmp;
+ fr_nat_lock = tmp;
+ fr_frag_lock = tmp;
+ fr_auth_lock = tmp;
+ } else
+ error = EFAULT;
+ break;
+#ifdef IPFILTER_LOG
+ case SIOCIPFFB :
+ if (!(mode & FWRITE))
+ error = EPERM;
+ else {
+ tmp = ipflog_clear(unit);
+ error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
+ sizeof(tmp));
+ if (error)
+ error = EFAULT;
+ }
+ break;
+#endif /* IPFILTER_LOG */
+ case SIOCFRSYN :
+ if (!(mode & FWRITE))
+ error = EPERM;
+ else {
+ RWLOCK_EXIT(&ipf_global);
+ WRITE_ENTER(&ipf_global);
+ error = ipfsync();
+ }
+ break;
+ case SIOCGFRST :
+ error = fr_outobj((void *)data, fr_fragstats(),
+ IPFOBJ_FRAGSTAT);
+ break;
+ case FIONREAD :
+#ifdef IPFILTER_LOG
+ tmp = (int)iplused[IPL_LOGIPF];
+
+ error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp));
+ if (error != 0)
+ error = EFAULT;
+#endif
+ break;
+ default :
+ cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p", cmd, (void *)data);
+ error = EINVAL;
+ break;
+ }
+ RWLOCK_EXIT(&ipf_global);
+ return error;
+}
+
+
+void *get_unit(name, v)
+char *name;
+int v;
+{
+ qif_t *qf;
+ int sap;
+
+ if (v == 4)
+ sap = 0x0800;
+ else if (v == 6)
+ sap = 0x86dd;
+ else
+ return NULL;
+ rw_enter(&pfil_rw, RW_READER);
+ qf = qif_iflookup(name, sap);
+ rw_exit(&pfil_rw);
+ return qf;
+}
+
+
+/*
+ * routines below for saving IP headers to buffer
+ */
+/*ARGSUSED*/
+int iplopen(devp, flags, otype, cred)
+dev_t *devp;
+int flags, otype;
+cred_t *cred;
+{
+ minor_t min = getminor(*devp);
+
+#ifdef IPFDEBUG
+ cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
+#endif
+ if (!(otype & OTYP_CHR))
+ return ENXIO;
+
+ min = (IPL_LOGMAX < min) ? ENXIO : 0;
+ return min;
+}
+
+
+/*ARGSUSED*/
+int iplclose(dev, flags, otype, cred)
+dev_t dev;
+int flags, otype;
+cred_t *cred;
+{
+ minor_t min = getminor(dev);
+
+#ifdef IPFDEBUG
+ cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
+#endif
+
+ min = (IPL_LOGMAX < min) ? ENXIO : 0;
+ return min;
+}
+
+#ifdef IPFILTER_LOG
+/*
+ * iplread/ipllog
+ * both of these must operate with at least splnet() lest they be
+ * called during packet processing and cause an inconsistancy to appear in
+ * the filter lists.
+ */
+/*ARGSUSED*/
+int iplread(dev, uio, cp)
+dev_t dev;
+register struct uio *uio;
+cred_t *cp;
+{
+# ifdef IPFDEBUG
+ cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
+# endif
+# ifdef IPFILTER_SYNC
+ if (getminor(dev) == IPL_LOGSYNC)
+ return ipfsync_read(uio);
+# endif
+
+ return ipflog_read(getminor(dev), uio);
+}
+#endif /* IPFILTER_LOG */
+
+
+/*
+ * iplread/ipllog
+ * both of these must operate with at least splnet() lest they be
+ * called during packet processing and cause an inconsistancy to appear in
+ * the filter lists.
+ */
+int iplwrite(dev, uio, cp)
+dev_t dev;
+register struct uio *uio;
+cred_t *cp;
+{
+#ifdef IPFDEBUG
+ cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp);
+#endif
+#ifdef IPFILTER_SYNC
+ if (getminor(dev) == IPL_LOGSYNC)
+ return ipfsync_write(uio);
+#endif /* IPFILTER_SYNC */
+ dev = dev; /* LINT */
+ uio = uio; /* LINT */
+ cp = cp; /* LINT */
+ return ENXIO;
+}
+
+
+/*
+ * fr_send_reset - this could conceivably be a call to tcp_respond(), but that
+ * requires a large amount of setting up and isn't any more efficient.
+ */
+int fr_send_reset(fin)
+fr_info_t *fin;
+{
+ tcphdr_t *tcp, *tcp2;
+ int tlen, hlen;
+ mblk_t *m;
+#ifdef USE_INET6
+ ip6_t *ip6;
+#endif
+ ip_t *ip;
+
+ tcp = fin->fin_dp;
+ if (tcp->th_flags & TH_RST)
+ return -1;
+
+#ifndef IPFILTER_CKSUM
+ if (fr_checkl4sum(fin) == -1)
+ return -1;
+#endif
+
+ tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0;
+#ifdef USE_INET6
+ if (fin->fin_v == 6)
+ hlen = sizeof(ip6_t);
+ else
+#endif
+ hlen = sizeof(ip_t);
+ hlen += sizeof(*tcp2);
+ if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL)
+ return -1;
+
+ m->b_rptr += 64;
+ MTYPE(m) = M_DATA;
+ m->b_wptr = m->b_rptr + hlen;
+ ip = (ip_t *)m->b_rptr;
+ bzero((char *)ip, hlen);
+ tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2));
+ tcp2->th_dport = tcp->th_sport;
+ tcp2->th_sport = tcp->th_dport;
+ if (tcp->th_flags & TH_ACK) {
+ tcp2->th_seq = tcp->th_ack;
+ tcp2->th_flags = TH_RST;
+ } else {
+ tcp2->th_ack = ntohl(tcp->th_seq);
+ tcp2->th_ack += tlen;
+ tcp2->th_ack = htonl(tcp2->th_ack);
+ tcp2->th_flags = TH_RST|TH_ACK;
+ }
+ tcp2->th_off = sizeof(struct tcphdr) >> 2;
+
+ ip->ip_v = fin->fin_v;
+#ifdef USE_INET6
+ if (fin->fin_v == 6) {
+ ip6 = (ip6_t *)m->b_rptr;
+ ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
+ ip6->ip6_src = fin->fin_dst6;
+ ip6->ip6_dst = fin->fin_src6;
+ ip6->ip6_plen = htons(sizeof(*tcp));
+ ip6->ip6_nxt = IPPROTO_TCP;
+ tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2);
+ } else
+#endif
+ {
+ ip->ip_src.s_addr = fin->fin_daddr;
+ ip->ip_dst.s_addr = fin->fin_saddr;
+ ip->ip_id = fr_nextipid(fin);
+ ip->ip_hl = sizeof(*ip) >> 2;
+ ip->ip_p = IPPROTO_TCP;
+ ip->ip_len = sizeof(*ip) + sizeof(*tcp);
+ ip->ip_tos = fin->fin_ip->ip_tos;
+ tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2);
+ }
+ return fr_send_ip(fin, m, &m);
+}
+
+/*
+ * Function: fr_send_ip
+ * Returns: 0: success
+ * -1: failed
+ * Parameters:
+ * fin: packet information
+ * m: the message block where ip head starts
+ *
+ * Send a new packet through the IP stack.
+ *
+ * For IPv4 packets, ip_len must be in host byte order, and ip_v,
+ * ip_ttl, ip_off, and ip_sum are ignored (filled in by this
+ * function).
+ *
+ * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled
+ * in by this function.
+ *
+ * All other portions of the packet must be in on-the-wire format.
+ */
+/*ARGSUSED*/
+static int fr_send_ip(fin, m, mpp)
+fr_info_t *fin;
+mblk_t *m, **mpp;
+{
+ qpktinfo_t qpi, *qpip;
+ fr_info_t fnew;
+ qif_t *qif;
+ ip_t *ip;
+ int i, hlen;
+
+ ip = (ip_t *)m->b_rptr;
+ bzero((char *)&fnew, sizeof(fnew));
+
+#ifdef USE_INET6
+ if (fin->fin_v == 6) {
+ ip6_t *ip6;
+
+ ip6 = (ip6_t *)ip;
+ ip6->ip6_vfc = 0x60;
+ ip6->ip6_hlim = 127;
+ fnew.fin_v = 6;
+ hlen = sizeof(*ip6);
+ } else
+#endif
+ {
+ fnew.fin_v = 4;
+#if SOLARIS2 >= 10
+ ip->ip_ttl = 255;
+
+ ip->ip_off = htons(IP_DF);
+#else
+ if (ip_ttl_ptr != NULL)
+ ip->ip_ttl = (u_char)(*ip_ttl_ptr);
+ else
+ ip->ip_ttl = 63;
+ if (ip_mtudisc != NULL)
+ ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0);
+ else
+ ip->ip_off = htons(IP_DF);
+#endif
+ /*
+ * The dance with byte order and ip_len/ip_off is because in
+ * fr_fastroute, it expects them to be in host byte order but
+ * ipf_cksum expects them to be in network byte order.
+ */
+ ip->ip_len = htons(ip->ip_len);
+ ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip));
+ ip->ip_len = ntohs(ip->ip_len);
+ ip->ip_off = ntohs(ip->ip_off);
+ hlen = sizeof(*ip);
+ }
+
+ qpip = fin->fin_qpi;
+ qpi.qpi_q = qpip->qpi_q;
+ qpi.qpi_off = 0;
+ qpi.qpi_name = qpip->qpi_name;
+ qif = qpip->qpi_real;
+ qpi.qpi_real = qif;
+ qpi.qpi_ill = qif->qf_ill;
+ qpi.qpi_hl = qif->qf_hl;
+ qpi.qpi_ppa = qif->qf_ppa;
+ qpi.qpi_num = qif->qf_num;
+ qpi.qpi_flags = qif->qf_flags;
+ qpi.qpi_max_frag = qif->qf_max_frag;
+ qpi.qpi_m = m;
+ qpi.qpi_data = ip;
+ fnew.fin_qpi = &qpi;
+ fnew.fin_ifp = fin->fin_ifp;
+ fnew.fin_flx = FI_NOCKSUM;
+ fnew.fin_m = m;
+ fnew.fin_ip = ip;
+ fnew.fin_mp = mpp;
+ fnew.fin_hlen = hlen;
+ fnew.fin_dp = (char *)ip + hlen;
+ (void) fr_makefrip(hlen, ip, &fnew);
+
+ i = fr_fastroute(m, mpp, &fnew, NULL);
+ return i;
+}
+
+
+int fr_send_icmp_err(type, fin, dst)
+int type;
+fr_info_t *fin;
+int dst;
+{
+ struct in_addr dst4;
+ struct icmp *icmp;
+ qpktinfo_t *qpi;
+ int hlen, code;
+ u_short sz;
+#ifdef USE_INET6
+ mblk_t *mb;
+#endif
+ mblk_t *m;
+#ifdef USE_INET6
+ ip6_t *ip6;
+#endif
+ ip_t *ip;
+
+ if ((type < 0) || (type > ICMP_MAXTYPE))
+ return -1;
+
+ code = fin->fin_icode;
+#ifdef USE_INET6
+ if ((code < 0) || (code > sizeof(icmptoicmp6unreach)/sizeof(int)))
+ return -1;
+#endif
+
+#ifndef IPFILTER_CKSUM
+ if (fr_checkl4sum(fin) == -1)
+ return -1;
+#endif
+
+ qpi = fin->fin_qpi;
+
+#ifdef USE_INET6
+ mb = fin->fin_qfm;
+
+ if (fin->fin_v == 6) {
+ sz = sizeof(ip6_t);
+ sz += MIN(mb->b_wptr - mb->b_rptr, 512);
+ hlen = sizeof(ip6_t);
+ type = icmptoicmp6types[type];
+ if (type == ICMP6_DST_UNREACH)
+ code = icmptoicmp6unreach[code];
+ } else
+#endif
+ {
+ if ((fin->fin_p == IPPROTO_ICMP) &&
+ !(fin->fin_flx & FI_SHORT))
+ switch (ntohs(fin->fin_data[0]) >> 8)
+ {
+ case ICMP_ECHO :
+ case ICMP_TSTAMP :
+ case ICMP_IREQ :
+ case ICMP_MASKREQ :
+ break;
+ default :
+ return 0;
+ }
+
+ sz = sizeof(ip_t) * 2;
+ sz += 8; /* 64 bits of data */
+ hlen = sizeof(ip_t);
+ }
+
+ sz += offsetof(struct icmp, icmp_ip);
+ if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL)
+ return -1;
+ MTYPE(m) = M_DATA;
+ m->b_rptr += 64;
+ m->b_wptr = m->b_rptr + sz;
+ bzero((char *)m->b_rptr, (size_t)sz);
+ ip = (ip_t *)m->b_rptr;
+ ip->ip_v = fin->fin_v;
+ icmp = (struct icmp *)(m->b_rptr + hlen);
+ icmp->icmp_type = type & 0xff;
+ icmp->icmp_code = code & 0xff;
+#ifdef icmp_nextmtu
+ if (type == ICMP_UNREACH && (qpi->qpi_max_frag != 0) &&
+ fin->fin_icode == ICMP_UNREACH_NEEDFRAG)
+ icmp->icmp_nextmtu = htons(qpi->qpi_max_frag);
+#endif
+
+#ifdef USE_INET6
+ if (fin->fin_v == 6) {
+ struct in6_addr dst6;
+ int csz;
+
+ if (dst == 0) {
+ if (fr_ifpaddr(6, FRI_NORMAL, qpi->qpi_real,
+ (struct in_addr *)&dst6, NULL) == -1) {
+ FREE_MB_T(m);
+ return -1;
+ }
+ } else
+ dst6 = fin->fin_dst6;
+
+ csz = sz;
+ sz -= sizeof(ip6_t);
+ ip6 = (ip6_t *)m->b_rptr;
+ ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
+ ip6->ip6_plen = htons((u_short)sz);
+ ip6->ip6_nxt = IPPROTO_ICMPV6;
+ ip6->ip6_src = dst6;
+ ip6->ip6_dst = fin->fin_src6;
+ sz -= offsetof(struct icmp, icmp_ip);
+ bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz);
+ icmp->icmp_cksum = csz - sizeof(ip6_t);
+ } else
+#endif
+ {
+ ip->ip_hl = sizeof(*ip) >> 2;
+ ip->ip_p = IPPROTO_ICMP;
+ ip->ip_id = fin->fin_ip->ip_id;
+ ip->ip_tos = fin->fin_ip->ip_tos;
+ ip->ip_len = (u_short)sz;
+ if (dst == 0) {
+ if (fr_ifpaddr(4, FRI_NORMAL, qpi->qpi_real,
+ &dst4, NULL) == -1) {
+ FREE_MB_T(m);
+ return -1;
+ }
+ } else
+ dst4 = fin->fin_dst;
+ ip->ip_src = dst4;
+ ip->ip_dst = fin->fin_src;
+ bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip,
+ sizeof(*fin->fin_ip));
+ bcopy((char *)fin->fin_ip + fin->fin_hlen,
+ (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8);
+ icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len);
+ icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off);
+ icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
+ sz - sizeof(ip_t));
+ }
+
+ /*
+ * Need to exit out of these so we don't recursively call rw_enter
+ * from fr_qout.
+ */
+ return fr_send_ip(fin, m, &m);
+}
+
+#ifdef IRE_ILL_CN
+#include <sys/time.h>
+#include <sys/varargs.h>
+
+#ifndef _KERNEL
+#include <stdio.h>
+#endif
+
+#define NULLADDR_RATE_LIMIT 10 /* 10 seconds */
+
+
+/*
+ * Print out warning message at rate-limited speed.
+ */
+static void rate_limit_message(int rate, const char *message, ...)
+{
+ static time_t last_time = 0;
+ time_t now;
+ va_list args;
+ char msg_buf[256];
+ int need_printed = 0;
+
+ now = ddi_get_time();
+
+ /* make sure, no multiple entries */
+ ASSERT(MUTEX_NOT_HELD(&(ipf_rw.ipf_lk)));
+ MUTEX_ENTER(&ipf_rw);
+ if (now - last_time >= rate) {
+ need_printed = 1;
+ last_time = now;
+ }
+ MUTEX_EXIT(&ipf_rw);
+
+ if (need_printed) {
+ va_start(args, message);
+ (void)vsnprintf(msg_buf, 255, message, args);
+ va_end(args);
+#ifdef _KERNEL
+ cmn_err(CE_WARN, msg_buf);
+#else
+ fprintf(std_err, msg_buf);
+#endif
+ }
+}
+#endif
+
+/*
+ * return the first IP Address associated with an interface
+ */
+/*ARGSUSED*/
+int fr_ifpaddr(v, atype, qifptr, inp, inpmask)
+int v, atype;
+void *qifptr;
+struct in_addr *inp, *inpmask;
+{
+#ifdef USE_INET6
+ struct sockaddr_in6 sin6, mask6;
+#endif
+ struct sockaddr_in sin, mask;
+ qif_t *qif;
+
+#ifdef USE_INET6
+#ifdef IRE_ILL_CN
+ s_ill_t *ill;
+#endif
+#endif
+ if ((qifptr == NULL) || (qifptr == (void *)-1))
+ return -1;
+
+ qif = qifptr;
+
+#ifdef USE_INET6
+#ifdef IRE_ILL_CN
+ ill = qif->qf_ill;
+#endif
+#endif
+
+#ifdef USE_INET6
+ if (v == 6) {
+#ifndef IRE_ILL_CN
+ in6_addr_t *inp6;
+ ipif_t *ipif;
+ ill_t *ill;
+
+ ill = qif->qf_ill;
+
+ /*
+ * First is always link local.
+ */
+ for (ipif = ill->ill_ipif; ipif; ipif = ipif->ipif_next) {
+ inp6 = &ipif->ipif_v6lcl_addr;
+ if (!IN6_IS_ADDR_LINKLOCAL(inp6) &&
+ !IN6_IS_ADDR_LOOPBACK(inp6))
+ break;
+ }
+ if (ipif == NULL)
+ return -1;
+
+ mask6.sin6_addr = ipif->ipif_v6net_mask;
+ if (atype == FRI_BROADCAST)
+ sin6.sin6_addr = ipif->ipif_v6brd_addr;
+ else if (atype == FRI_PEERADDR)
+ sin6.sin6_addr = ipif->ipif_v6pp_dst_addr;
+ else
+ sin6.sin6_addr = *inp6;
+#else /* IRE_ILL_CN */
+ if (IN6_IS_ADDR_UNSPECIFIED(&ill->netmask.in6.sin6_addr) ||
+ IN6_IS_ADDR_UNSPECIFIED(&ill->localaddr.in6.sin6_addr)) {
+ rate_limit_message(NULLADDR_RATE_LIMIT,
+ "Check pfild is running: IP#/netmask is 0 on %s.\n",
+ ill->ill_name);
+ return -1;
+ }
+ mask6 = ill->netmask.in6;
+ if (atype == FRI_BROADCAST)
+ sin6 = ill->broadaddr.in6;
+ else if (atype == FRI_PEERADDR)
+ sin6 = ill->dstaddr.in6;
+ else
+ sin6 = ill->localaddr.in6;
+#endif /* IRE_ILL_CN */
+ return fr_ifpfillv6addr(atype, &sin6, &mask6, inp, inpmask);
+ }
+#endif
+
+#ifndef IRE_ILL_CN
+
+ switch (atype)
+ {
+ case FRI_BROADCAST :
+ sin.sin_addr.s_addr = QF_V4_BROADCAST(qif);
+ break;
+ case FRI_PEERADDR :
+ sin.sin_addr.s_addr = QF_V4_PEERADDR(qif);
+ break;
+ default :
+ sin.sin_addr.s_addr = QF_V4_ADDR(qif);
+ break;
+ }
+ mask.sin_addr.s_addr = QF_V4_NETMASK(qif);
+
+#else
+ if (ill->netmask.in.sin_addr.s_addr == 0 ||
+ ill->localaddr.in.sin_addr.s_addr == 0) {
+ rate_limit_message(NULLADDR_RATE_LIMIT,
+ "Check pfild is running: IP#/netmask is 0 on %s.\n",
+ ill->ill_name);
+ return -1;
+ }
+ mask = ill->netmask.in;
+ if (atype == FRI_BROADCAST)
+ sin = ill->broadaddr.in;
+ else if (atype == FRI_PEERADDR)
+ sin = ill->dstaddr.in;
+ else
+ sin = ill->localaddr.in;
+#endif /* IRE_ILL_CN */
+ return fr_ifpfillv4addr(atype, &sin, &mask, inp, inpmask);
+}
+
+
+u_32_t fr_newisn(fin)
+fr_info_t *fin;
+{
+ static int iss_seq_off = 0;
+ u_char hash[16];
+ u_32_t newiss;
+ MD5_CTX ctx;
+
+ /*
+ * Compute the base value of the ISS. It is a hash
+ * of (saddr, sport, daddr, dport, secret).
+ */
+ MD5Init(&ctx);
+
+ MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src,
+ sizeof(fin->fin_fi.fi_src));
+ MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst,
+ sizeof(fin->fin_fi.fi_dst));
+ MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat));
+
+ MD5Update(&ctx, ipf_iss_secret, sizeof(ipf_iss_secret));
+
+ MD5Final(hash, &ctx);
+
+ bcopy(hash, &newiss, sizeof(newiss));
+
+ /*
+ * Now increment our "timer", and add it in to
+ * the computed value.
+ *
+ * XXX Use `addin'?
+ * XXX TCP_ISSINCR too large to use?
+ */
+ iss_seq_off += 0x00010000;
+ newiss += iss_seq_off;
+ return newiss;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_nextipid */
+/* Returns: int - 0 == success, -1 == error (packet should be droppped) */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* Returns the next IPv4 ID to use for this packet. */
+/* ------------------------------------------------------------------------ */
+u_short fr_nextipid(fin)
+fr_info_t *fin;
+{
+ static u_short ipid = 0;
+ ipstate_t *is;
+ nat_t *nat;
+ u_short id;
+
+ MUTEX_ENTER(&ipf_rw);
+ if (fin->fin_state != NULL) {
+ is = fin->fin_state;
+ id = (u_short)(is->is_pkts[(fin->fin_rev << 1) + 1] & 0xffff);
+ } else if (fin->fin_nat != NULL) {
+ nat = fin->fin_nat;
+ id = (u_short)(nat->nat_pkts[fin->fin_out] & 0xffff);
+ } else
+ id = ipid++;
+ MUTEX_EXIT(&ipf_rw);
+
+ return id;
+}
+
+
+#ifndef IPFILTER_CKSUM
+/* ARGSUSED */
+#endif
+INLINE void fr_checkv4sum(fin)
+fr_info_t *fin;
+{
+#ifdef IPFILTER_CKSUM
+ if (fr_checkl4sum(fin) == -1)
+ fin->fin_flx |= FI_BAD;
+#endif
+}
+
+
+#ifdef USE_INET6
+# ifndef IPFILTER_CKSUM
+/* ARGSUSED */
+# endif
+INLINE void fr_checkv6sum(fin)
+fr_info_t *fin;
+{
+# ifdef IPFILTER_CKSUM
+ if (fr_checkl4sum(fin) == -1)
+ fin->fin_flx |= FI_BAD;
+# endif
+}
+#endif /* USE_INET6 */
+
+
+/*
+ * Function: fr_verifysrc
+ * Returns: int (really boolean)
+ * Parameters: fin - packet information
+ *
+ * Check whether the packet has a valid source address for the interface on
+ * which the packet arrived, implementing the "fr_chksrc" feature.
+ * Returns true iff the packet's source address is valid.
+ * Pre-Solaris 10, we call into the routing code to make the determination.
+ * On Solaris 10 and later, we have a valid address set from pfild to check
+ * against.
+ */
+int fr_verifysrc(fin)
+fr_info_t *fin;
+{
+ ire_t *dir;
+ int result;
+
+#if SOLARIS2 >= 6
+ dir = ire_route_lookup(fin->fin_saddr, 0xffffffff, 0, 0, NULL,
+ NULL, NULL, NULL, MATCH_IRE_DSTONLY|
+ MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE);
+#else
+ dir = ire_lookup(fin->fin_saddr);
+#endif
+
+ if (!dir)
+ return 0;
+ result = (ire_to_ill(dir) == fin->fin_ifp);
+#if SOLARIS2 >= 8
+ ire_refrele(dir);
+#endif
+ return result;
+}
+
+
+#if (SOLARIS2 < 7)
+void fr_slowtimer()
+#else
+/*ARGSUSED*/
+void fr_slowtimer __P((void *ptr))
+#endif
+{
+
+ WRITE_ENTER(&ipf_global);
+ if (fr_running <= 0) {
+ if (fr_running == -1)
+ fr_timer_id = timeout(fr_slowtimer, NULL,
+ drv_usectohz(500000));
+ else
+ fr_timer_id = NULL;
+ RWLOCK_EXIT(&ipf_global);
+ return;
+ }
+ MUTEX_DOWNGRADE(&ipf_global);
+
+ fr_fragexpire();
+ fr_timeoutstate();
+ fr_natexpire();
+ fr_authexpire();
+ fr_ticks++;
+ if (fr_running == -1 || fr_running == 1)
+ fr_timer_id = timeout(fr_slowtimer, NULL, drv_usectohz(500000));
+ else
+ fr_timer_id = NULL;
+ RWLOCK_EXIT(&ipf_global);
+}
+
+
+/*
+ * Function: fr_fastroute
+ * Returns: 0: success;
+ * -1: failed
+ * Parameters:
+ * mb: the message block where ip head starts
+ * mpp: the pointer to the pointer of the orignal
+ * packet message
+ * fin: packet information
+ * fdp: destination interface information
+ * if it is NULL, no interface information provided.
+ *
+ * This function is for fastroute/to/dup-to rules. It calls
+ * pfil_make_lay2_packet to search route, make lay-2 header
+ * ,and identify output queue for the IP packet.
+ * The destination address depends on the following conditions:
+ * 1: for fastroute rule, fdp is passed in as NULL, so the
+ * destination address is the IP Packet's destination address
+ * 2: for to/dup-to rule, if an ip address is specified after
+ * the interface name, this address is the as destination
+ * address. Otherwise IP Packet's destination address is used
+ */
+int fr_fastroute(mb, mpp, fin, fdp)
+mblk_t *mb, **mpp;
+fr_info_t *fin;
+frdest_t *fdp;
+{
+ struct in_addr dst;
+#ifndef IRE_ILL_CN
+ size_t hlen = 0;
+ ill_t *ifp;
+ ire_t *dir;
+ u_char *s;
+ frdest_t fd;
+#ifdef USE_INET6
+ ip6_t *ip6 = (ip6_t *)fin->fin_ip;
+#endif
+#else
+ void *target = NULL;
+ char *ifname = NULL;
+#endif
+ queue_t *q = NULL;
+ mblk_t *mp = NULL;
+ qpktinfo_t *qpi;
+ frentry_t *fr;
+ qif_t *qif;
+ ip_t *ip;
+#ifndef sparc
+ u_short __iplen, __ipoff;
+#endif
+#ifdef USE_INET6
+ struct in6_addr dst6;
+#endif
+#ifndef IRE_ILL_CN
+ dir = NULL;
+#endif
+ fr = fin->fin_fr;
+ ip = fin->fin_ip;
+ qpi = fin->fin_qpi;
+
+ /*
+ * If this is a duplicate mblk then we want ip to point at that
+ * data, not the original, if and only if it is already pointing at
+ * the current mblk data.
+ */
+ if (ip == (ip_t *)qpi->qpi_m->b_rptr && qpi->qpi_m != mb)
+ ip = (ip_t *)mb->b_rptr;
+
+ /*
+ * If there is another M_PROTO, we don't want it
+ */
+ if (*mpp != mb) {
+ mp = unlinkb(*mpp);
+ freeb(*mpp);
+ *mpp = mp;
+ }
+
+#ifdef IRE_ILL_CN
+ if (fdp != NULL) {
+#else
+ /*
+ * If the fdp is NULL then there is no set route for this packet.
+ */
+ if (fdp == NULL) {
+ qif = fin->fin_ifp;
+
+ switch (fin->fin_v)
+ {
+ case 4 :
+ fd.fd_ip = ip->ip_dst;
+ break;
+#ifdef USE_INET6
+ case 6 :
+ fd.fd_ip6.in6 = ip6->ip6_dst;
+ break;
+#endif
+ }
+ fdp = &fd;
+ } else {
+#endif
+ qif = fdp->fd_ifp;
+
+ if (qif == NULL || qif == (void *)-1)
+ goto bad_fastroute;
+ }
+
+ /*
+ * In case we're here due to "to <if>" being used with
+ * "keep state", check that we're going in the correct
+ * direction.
+ */
+ if ((fr != NULL) && (fin->fin_rev != 0)) {
+ if ((qif != NULL) && (fdp == &fr->fr_tif))
+ return -1;
+ dst.s_addr = fin->fin_fi.fi_daddr;
+ } else {
+ if (fin->fin_v == 4) {
+ if (fdp && fdp->fd_ip.s_addr != 0) {
+ dst = fdp->fd_ip;
+#ifdef IRE_ILL_CN
+ target = &dst;
+#endif
+ } else
+ dst.s_addr = fin->fin_fi.fi_daddr;
+ }
+#ifdef USE_INET6
+ else if (fin->fin_v == 6) {
+ if (fdp && IP6_NOTZERO(&fdp->fd_ip)) {
+ dst6 = fdp->fd_ip6.in6;
+#ifdef IRE_ILL_CN
+ target = &dst6;
+#endif
+ } else
+ dst6 = fin->fin_dst6;
+ }
+#endif
+ else
+ goto bad_fastroute;
+ }
+
+#ifndef IRE_ILL_CN
+#if SOLARIS2 >= 6
+ if (fin->fin_v == 4) {
+ dir = ire_route_lookup(dst.s_addr, 0xffffffff, 0, 0, NULL,
+ NULL, NULL, MATCH_IRE_DSTONLY|
+ MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE);
+ }
+# ifdef USE_INET6
+ else if (fin->fin_v == 6) {
+ dir = ire_route_lookup_v6(&ip6->ip6_dst, NULL, 0, 0,
+ NULL, NULL, NULL, MATCH_IRE_DSTONLY|
+ MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE);
+ }
+# endif
+#else
+ dir = ire_lookup(dst.s_addr);
+#endif
+#if SOLARIS2 < 8
+ if (dir != NULL)
+ if (dir->ire_ll_hdr_mp == NULL || dir->ire_ll_hdr_length == 0)
+ dir = NULL;
+#else
+ if (dir != NULL)
+ if (dir->ire_fp_mp == NULL || dir->ire_dlureq_mp == NULL) {
+ ire_refrele(dir);
+ dir = NULL;
+ }
+#endif
+#else /* IRE_ILL_CN */
+ if (fdp && fdp->fd_ifname[0] != 0)
+ ifname = fdp->fd_ifname;
+
+ DB_CKSUMFLAGS(mb) = 0; /* disable hardware checksum */
+ mp = pfil_make_dl_packet(mb, ip, target, ifname, &q);
+ if (mp == NULL)
+ {
+ goto bad_fastroute;
+ }
+ mb = mp;
+#endif /* IRE_ILL_CN */
+
+#ifdef IRE_ILL_CN
+ if (mp != NULL) {
+#else
+ if (dir != NULL) {
+#if SOLARIS2 < 8
+ mp = dir->ire_ll_hdr_mp;
+ hlen = dir->ire_ll_hdr_length;
+#else
+ mp = dir->ire_fp_mp;
+ hlen = mp ? mp->b_wptr - mp->b_rptr : 0;
+ if (mp == NULL)
+ mp = dir->ire_dlureq_mp;
+#endif
+#endif
+ if (fin->fin_out == 0) {
+ void *saveqif;
+ u_32_t pass;
+
+ saveqif = fin->fin_ifp;
+ fin->fin_ifp = qif;
+ fin->fin_out = 1;
+ (void)fr_acctpkt(fin, &pass);
+ fin->fin_fr = NULL;
+ if (!fr || !(fr->fr_flags & FR_RETMASK))
+ (void) fr_checkstate(fin, &pass);
+
+ switch (fr_checknatout(fin, NULL))
+ {
+ /* FALLTHROUGH */
+ case 0 :
+ case 1 :
+ break;
+ case -1 :
+ goto bad_fastroute;
+ }
+
+ fin->fin_out = 0;
+ fin->fin_ifp = saveqif;
+ }
+#ifndef sparc
+ if (fin->fin_v == 4) {
+ __iplen = (u_short)ip->ip_len,
+ __ipoff = (u_short)ip->ip_off;
+
+ ip->ip_len = htons(__iplen);
+ ip->ip_off = htons(__ipoff);
+ }
+#endif
+#ifndef IRE_ILL_CN
+ ifp = qif->qf_ill;
+
+ if (mp != NULL) {
+ s = mb->b_rptr;
+ if (
+#if (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
+ (dohwcksum &&
+ ifp->ill_ick.ick_magic == ICK_M_CTL_MAGIC) ||
+#endif
+ (hlen && (s - mb->b_datap->db_base) >= hlen)) {
+ s -= hlen;
+ mb->b_rptr = (u_char *)s;
+ bcopy((char *)mp->b_rptr, (char *)s, hlen);
+ } else {
+ mblk_t *mp2;
+
+ mp2 = copyb(mp);
+ if (mp2 == NULL)
+ goto bad_fastroute;
+ linkb(mp2, mb);
+ mb = mp2;
+ }
+ }
+ *mpp = mb;
+
+ if (dir->ire_stq != NULL)
+ q = dir->ire_stq;
+ else if (dir->ire_rfq != NULL)
+ q = WR(dir->ire_rfq);
+ if (q != NULL)
+ q = q->q_next;
+ if (q != NULL) {
+ RWLOCK_EXIT(&ipf_global);
+#if (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
+ if ((fin->fin_p == IPPROTO_TCP) && dohwcksum &&
+ (ifp->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
+ tcphdr_t *tcp;
+ u_32_t t;
+
+ tcp = (tcphdr_t *)((char *)ip + fin->fin_hlen);
+ t = ip->ip_src.s_addr;
+ t += ip->ip_dst.s_addr;
+ t += 30;
+ t = (t & 0xffff) + (t >> 16);
+ tcp->th_sum = t & 0xffff;
+ }
+#endif
+ putnext(q, mb);
+ ATOMIC_INCL(fr_frouteok[0]);
+#if SOLARIS2 >= 8
+ ire_refrele(dir);
+#endif
+ READ_ENTER(&ipf_global);
+ return 0;
+ }
+#else /* IRE_ILL_CN */
+ mb->b_queue = q;
+ *mpp = mb;
+ pfil_send_dl_packet(q, mb);
+ ATOMIC_INCL(fr_frouteok[0]);
+ return 0;
+#endif /* IRE_ILL_CN */
+ }
+bad_fastroute:
+#ifndef IRE_ILL_CN
+#if SOLARIS2 >= 8
+ if (dir != NULL)
+ ire_refrele(dir);
+#endif
+#endif
+ freemsg(mb);
+ ATOMIC_INCL(fr_frouteok[1]);
+ return -1;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_pullup */
+/* Returns: NULL == pullup failed, else pointer to protocol header */
+/* Parameters: m(I) - pointer to buffer where data packet starts */
+/* fin(I) - pointer to packet information */
+/* len(I) - number of bytes to pullup */
+/* */
+/* Attempt to move at least len bytes (from the start of the buffer) into a */
+/* single buffer for ease of access. Operating system native functions are */
+/* used to manage buffers - if necessary. If the entire packet ends up in */
+/* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has */
+/* not been called. Both fin_ip and fin_dp are updated before exiting _IF_ */
+/* and ONLY if the pullup succeeds. */
+/* */
+/* We assume that 'min' is a pointer to a buffer that is part of the chain */
+/* of buffers that starts at *fin->fin_mp. */
+/* ------------------------------------------------------------------------ */
+void *fr_pullup(min, fin, len)
+mb_t *min;
+fr_info_t *fin;
+int len;
+{
+ qpktinfo_t *qpi = fin->fin_qpi;
+ int out = fin->fin_out, dpoff, ipoff;
+ mb_t *m = min;
+ char *ip;
+
+ if (m == NULL)
+ return NULL;
+
+ ip = (char *)fin->fin_ip;
+ if ((fin->fin_flx & FI_COALESCE) != 0)
+ return ip;
+
+ ipoff = fin->fin_ipoff;
+ if (fin->fin_dp != NULL)
+ dpoff = (char *)fin->fin_dp - (char *)ip;
+ else
+ dpoff = 0;
+
+ if (M_LEN(m) < len) {
+
+ /*
+ * pfil_precheck ensures the IP header is on a 32bit
+ * aligned address so simply fail if that isn't currently
+ * the case (should never happen).
+ */
+ int inc = 0;
+
+ if (ipoff > 0) {
+ if ((ipoff & 3) != 0) {
+ inc = 4 - (ipoff & 3);
+ if (m->b_rptr - inc >= m->b_datap->db_base)
+ m->b_rptr -= inc;
+ else
+ inc = 0;
+ }
+ }
+ if (pullupmsg(m, len + ipoff + inc) == 0) {
+ ATOMIC_INCL(frstats[out].fr_pull[1]);
+ FREE_MB_T(*fin->fin_mp);
+ *fin->fin_mp = NULL;
+ fin->fin_m = NULL;
+ fin->fin_ip = NULL;
+ fin->fin_dp = NULL;
+ qpi->qpi_data = NULL;
+ return NULL;
+ }
+ m->b_rptr += inc;
+ fin->fin_m = m;
+ ip = MTOD(m, char *) + ipoff;
+ qpi->qpi_data = ip;
+ }
+
+ ATOMIC_INCL(frstats[out].fr_pull[0]);
+ fin->fin_ip = (ip_t *)ip;
+ if (fin->fin_dp != NULL)
+ fin->fin_dp = (char *)fin->fin_ip + dpoff;
+
+ if (len == fin->fin_plen)
+ fin->fin_flx |= FI_COALESCE;
+ return ip;
+}
diff --git a/usr/src/uts/common/inet/ipf/ip_frag.c b/usr/src/uts/common/inet/ipf/ip_frag.c
new file mode 100644
index 0000000000..29362c8a83
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/ip_frag.c
@@ -0,0 +1,885 @@
+/*
+ * Copyright (C) 1993-2003 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#if defined(KERNEL) || defined(_KERNEL)
+# undef KERNEL
+# undef _KERNEL
+# define KERNEL 1
+# define _KERNEL 1
+#endif
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/file.h>
+#ifdef __hpux
+# include <sys/timeout.h>
+#endif
+#if !defined(_KERNEL)
+# include <stdio.h>
+# include <string.h>
+# include <stdlib.h>
+# define _KERNEL
+# ifdef __OpenBSD__
+struct file;
+# endif
+# include <sys/uio.h>
+# undef _KERNEL
+#endif
+#if defined(_KERNEL) && (__FreeBSD_version >= 220000)
+# include <sys/filio.h>
+# include <sys/fcntl.h>
+#else
+# include <sys/ioctl.h>
+#endif
+#if !defined(linux)
+# include <sys/protosw.h>
+#endif
+#include <sys/socket.h>
+#if defined(_KERNEL)
+# include <sys/systm.h>
+# if !defined(__SVR4) && !defined(__svr4__)
+# include <sys/mbuf.h>
+# endif
+#endif
+#if !defined(__SVR4) && !defined(__svr4__)
+# if defined(_KERNEL) && !defined(__sgi) && !defined(AIX)
+# include <sys/kernel.h>
+# endif
+#else
+# include <sys/byteorder.h>
+# ifdef _KERNEL
+# include <sys/dditypes.h>
+# endif
+# include <sys/stream.h>
+# include <sys/kmem.h>
+#endif
+#include <net/if.h>
+#ifdef sun
+# include <net/af.h>
+#endif
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#if !defined(linux)
+# include <netinet/ip_var.h>
+#endif
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/ip_icmp.h>
+#include "netinet/ip_compat.h"
+#include <netinet/tcpip.h>
+#include "netinet/ip_fil.h"
+#include "netinet/ip_nat.h"
+#include "netinet/ip_frag.h"
+#include "netinet/ip_state.h"
+#include "netinet/ip_auth.h"
+#include "netinet/ip_proxy.h"
+#if (__FreeBSD_version >= 300000)
+# include <sys/malloc.h>
+# if defined(_KERNEL)
+# ifndef IPFILTER_LKM
+# include <sys/libkern.h>
+# include <sys/systm.h>
+# endif
+extern struct callout_handle fr_slowtimer_ch;
+# endif
+#endif
+#if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000)
+# include <sys/callout.h>
+extern struct callout fr_slowtimer_ch;
+#endif
+#if defined(__OpenBSD__)
+# include <sys/timeout.h>
+extern struct timeout fr_slowtimer_ch;
+#endif
+/* END OF INCLUDES */
+
+#if !defined(lint)
+static const char sccsid[] = "@(#)ip_frag.c 1.11 3/24/96 (C) 1993-2000 Darren Reed";
+static const char rcsid[] = "@(#)$Id: ip_frag.c,v 2.77.2.5 2005/08/11 14:33:10 darrenr Exp $";
+#endif
+
+
+static ipfr_t *ipfr_list = NULL;
+static ipfr_t **ipfr_tail = &ipfr_list;
+static ipfr_t **ipfr_heads;
+
+static ipfr_t *ipfr_natlist = NULL;
+static ipfr_t **ipfr_nattail = &ipfr_natlist;
+static ipfr_t **ipfr_nattab;
+
+static ipfr_t *ipfr_ipidlist = NULL;
+static ipfr_t **ipfr_ipidtail = &ipfr_ipidlist;
+static ipfr_t **ipfr_ipidtab;
+
+static ipfrstat_t ipfr_stats;
+static int ipfr_inuse = 0;
+int ipfr_size = IPFT_SIZE;
+
+int fr_ipfrttl = 120; /* 60 seconds */
+int fr_frag_lock = 0;
+int fr_frag_init = 0;
+u_long fr_ticks = 0;
+
+
+static ipfr_t *ipfr_newfrag __P((fr_info_t *, u_32_t, ipfr_t **));
+static ipfr_t *fr_fraglookup __P((fr_info_t *, ipfr_t **));
+static void fr_fragdelete __P((ipfr_t *, ipfr_t ***));
+
+static frentry_t frblock;
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_fraginit */
+/* Returns: int - 0 == success, -1 == error */
+/* Parameters: Nil */
+/* */
+/* Initialise the hash tables for the fragment cache lookups. */
+/* ------------------------------------------------------------------------ */
+int fr_fraginit()
+{
+ KMALLOCS(ipfr_heads, ipfr_t **, ipfr_size * sizeof(ipfr_t *));
+ if (ipfr_heads == NULL)
+ return -1;
+ bzero((char *)ipfr_heads, ipfr_size * sizeof(ipfr_t *));
+
+ KMALLOCS(ipfr_nattab, ipfr_t **, ipfr_size * sizeof(ipfr_t *));
+ if (ipfr_nattab == NULL)
+ return -1;
+ bzero((char *)ipfr_nattab, ipfr_size * sizeof(ipfr_t *));
+
+ KMALLOCS(ipfr_ipidtab, ipfr_t **, ipfr_size * sizeof(ipfr_t *));
+ if (ipfr_ipidtab == NULL)
+ return -1;
+ bzero((char *)ipfr_ipidtab, ipfr_size * sizeof(ipfr_t *));
+
+ RWLOCK_INIT(&ipf_frag, "ipf fragment rwlock");
+
+ /* Initialise frblock with "block in all" */
+ bzero((char *)&frblock, sizeof(frblock));
+ frblock.fr_flags = FR_BLOCK|FR_INQUE; /* block in */
+ frblock.fr_ref = 1;
+
+ fr_frag_init = 1;
+
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_fragunload */
+/* Returns: Nil */
+/* Parameters: Nil */
+/* */
+/* Free all memory allocated whilst running and from initialisation. */
+/* ------------------------------------------------------------------------ */
+void fr_fragunload()
+{
+ if (fr_frag_init == 1) {
+ fr_fragclear();
+
+ RW_DESTROY(&ipf_frag);
+ fr_frag_init = 0;
+ }
+
+ if (ipfr_heads != NULL)
+ KFREES(ipfr_heads, ipfr_size * sizeof(ipfr_t *));
+ ipfr_heads = NULL;
+
+ if (ipfr_nattab != NULL)
+ KFREES(ipfr_nattab, ipfr_size * sizeof(ipfr_t *));
+ ipfr_nattab = NULL;
+
+ if (ipfr_ipidtab != NULL)
+ KFREES(ipfr_ipidtab, ipfr_size * sizeof(ipfr_t *));
+ ipfr_ipidtab = NULL;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_fragstats */
+/* Returns: ipfrstat_t* - pointer to struct with current frag stats */
+/* Parameters: Nil */
+/* */
+/* Updates ipfr_stats with current information and returns a pointer to it */
+/* ------------------------------------------------------------------------ */
+ipfrstat_t *fr_fragstats()
+{
+ ipfr_stats.ifs_table = ipfr_heads;
+ ipfr_stats.ifs_nattab = ipfr_nattab;
+ ipfr_stats.ifs_inuse = ipfr_inuse;
+ return &ipfr_stats;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: ipfr_newfrag */
+/* Returns: ipfr_t * - pointer to fragment cache state info or NULL */
+/* Parameters: fin(I) - pointer to packet information */
+/* table(I) - pointer to frag table to add to */
+/* */
+/* Add a new entry to the fragment cache, registering it as having come */
+/* through this box, with the result of the filter operation. */
+/* ------------------------------------------------------------------------ */
+static ipfr_t *ipfr_newfrag(fin, pass, table)
+fr_info_t *fin;
+u_32_t pass;
+ipfr_t *table[];
+{
+ ipfr_t *fra, frag;
+ u_int idx, off;
+
+ if (ipfr_inuse >= IPFT_SIZE)
+ return NULL;
+
+ if ((fin->fin_flx & (FI_FRAG|FI_BAD)) != FI_FRAG)
+ return NULL;
+
+ if (pass & FR_FRSTRICT)
+ if (fin->fin_off != 0)
+ return NULL;
+
+ frag.ipfr_p = fin->fin_p;
+ idx = fin->fin_p;
+ frag.ipfr_id = fin->fin_id;
+ idx += fin->fin_id;
+ frag.ipfr_source = fin->fin_fi.fi_src;
+ idx += frag.ipfr_src.s_addr;
+ frag.ipfr_dest = fin->fin_fi.fi_dst;
+ idx += frag.ipfr_dst.s_addr;
+ frag.ipfr_ifp = fin->fin_ifp;
+ idx *= 127;
+ idx %= IPFT_SIZE;
+
+ frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY;
+ frag.ipfr_secmsk = fin->fin_fi.fi_secmsk;
+ frag.ipfr_auth = fin->fin_fi.fi_auth;
+
+ /*
+ * first, make sure it isn't already there...
+ */
+ for (fra = table[idx]; (fra != NULL); fra = fra->ipfr_hnext)
+ if (!bcmp((char *)&frag.ipfr_ifp, (char *)&fra->ipfr_ifp,
+ IPFR_CMPSZ)) {
+ ipfr_stats.ifs_exists++;
+ return NULL;
+ }
+
+ /*
+ * allocate some memory, if possible, if not, just record that we
+ * failed to do so.
+ */
+ KMALLOC(fra, ipfr_t *);
+ if (fra == NULL) {
+ ipfr_stats.ifs_nomem++;
+ return NULL;
+ }
+
+ fra->ipfr_rule = fin->fin_fr;
+ if (fra->ipfr_rule != NULL) {
+
+ frentry_t *fr;
+
+ fr = fin->fin_fr;
+ MUTEX_ENTER(&fr->fr_lock);
+ fr->fr_ref++;
+ MUTEX_EXIT(&fr->fr_lock);
+ }
+
+ /*
+ * Insert the fragment into the fragment table, copy the struct used
+ * in the search using bcopy rather than reassign each field.
+ * Set the ttl to the default.
+ */
+ if ((fra->ipfr_hnext = table[idx]) != NULL)
+ table[idx]->ipfr_hprev = &fra->ipfr_hnext;
+ fra->ipfr_hprev = table + idx;
+ fra->ipfr_data = NULL;
+ table[idx] = fra;
+ bcopy((char *)&frag.ipfr_ifp, (char *)&fra->ipfr_ifp, IPFR_CMPSZ);
+ fra->ipfr_ttl = fr_ticks + fr_ipfrttl;
+
+ /*
+ * Compute the offset of the expected start of the next packet.
+ */
+ off = fin->fin_off;
+ if (off == 0) {
+ fra->ipfr_seen0 = 1;
+ fra->ipfr_firstend = fin->fin_flen;
+ } else {
+ fra->ipfr_seen0 = 0;
+ fra->ipfr_firstend = 0;
+ }
+ fra->ipfr_off = off + fin->fin_dlen;
+ fra->ipfr_pass = pass;
+ ipfr_stats.ifs_new++;
+ ipfr_inuse++;
+ return fra;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_newfrag */
+/* Returns: int - 0 == success, -1 == error */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* Add a new entry to the fragment cache table based on the current packet */
+/* ------------------------------------------------------------------------ */
+int fr_newfrag(fin, pass)
+u_32_t pass;
+fr_info_t *fin;
+{
+ ipfr_t *fra;
+
+ if (fr_frag_lock != 0)
+ return -1;
+
+ WRITE_ENTER(&ipf_frag);
+ fra = ipfr_newfrag(fin, pass, ipfr_heads);
+ if (fra != NULL) {
+ *ipfr_tail = fra;
+ fra->ipfr_prev = ipfr_tail;
+ ipfr_tail = &fra->ipfr_next;
+ if (ipfr_list == NULL)
+ ipfr_list = fra;
+ fra->ipfr_next = NULL;
+ }
+ RWLOCK_EXIT(&ipf_frag);
+ return fra ? 0 : -1;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_nat_newfrag */
+/* Returns: int - 0 == success, -1 == error */
+/* Parameters: fin(I) - pointer to packet information */
+/* nat(I) - pointer to NAT structure */
+/* */
+/* Create a new NAT fragment cache entry based on the current packet and */
+/* the NAT structure for this "session". */
+/* ------------------------------------------------------------------------ */
+int fr_nat_newfrag(fin, pass, nat)
+fr_info_t *fin;
+u_32_t pass;
+nat_t *nat;
+{
+ ipfr_t *fra;
+
+ if ((fin->fin_v != 4) || (fr_frag_lock != 0))
+ return 0;
+
+ WRITE_ENTER(&ipf_natfrag);
+ fra = ipfr_newfrag(fin, pass, ipfr_nattab);
+ if (fra != NULL) {
+ fra->ipfr_data = nat;
+ nat->nat_data = fra;
+ *ipfr_nattail = fra;
+ fra->ipfr_prev = ipfr_nattail;
+ ipfr_nattail = &fra->ipfr_next;
+ fra->ipfr_next = NULL;
+ }
+ RWLOCK_EXIT(&ipf_natfrag);
+ return fra ? 0 : -1;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_ipid_newfrag */
+/* Returns: int - 0 == success, -1 == error */
+/* Parameters: fin(I) - pointer to packet information */
+/* ipid(I) - new IP ID for this fragmented packet */
+/* */
+/* Create a new fragment cache entry for this packet and store, as a data */
+/* pointer, the new IP ID value. */
+/* ------------------------------------------------------------------------ */
+int fr_ipid_newfrag(fin, ipid)
+fr_info_t *fin;
+u_32_t ipid;
+{
+ ipfr_t *fra;
+
+ if (fr_frag_lock)
+ return 0;
+
+ WRITE_ENTER(&ipf_ipidfrag);
+ fra = ipfr_newfrag(fin, 0, ipfr_ipidtab);
+ if (fra != NULL) {
+ fra->ipfr_data = (void *)(uintptr_t)ipid;
+ *ipfr_ipidtail = fra;
+ fra->ipfr_prev = ipfr_ipidtail;
+ ipfr_ipidtail = &fra->ipfr_next;
+ fra->ipfr_next = NULL;
+ }
+ RWLOCK_EXIT(&ipf_ipidfrag);
+ return fra ? 0 : -1;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_fraglookup */
+/* Returns: ipfr_t * - pointer to ipfr_t structure if there's a */
+/* matching entry in the frag table, else NULL */
+/* Parameters: fin(I) - pointer to packet information */
+/* table(I) - pointer to fragment cache table to search */
+/* */
+/* Check the fragment cache to see if there is already a record of this */
+/* packet with its filter result known. */
+/* ------------------------------------------------------------------------ */
+static ipfr_t *fr_fraglookup(fin, table)
+fr_info_t *fin;
+ipfr_t *table[];
+{
+ ipfr_t *f, frag;
+ u_int idx;
+
+ if ((fin->fin_flx & (FI_FRAG|FI_BAD)) != FI_FRAG)
+ return NULL;
+
+ /*
+ * For fragments, we record protocol, packet id, TOS and both IP#'s
+ * (these should all be the same for all fragments of a packet).
+ *
+ * build up a hash value to index the table with.
+ */
+ frag.ipfr_p = fin->fin_p;
+ idx = fin->fin_p;
+ frag.ipfr_id = fin->fin_id;
+ idx += fin->fin_id;
+ frag.ipfr_source = fin->fin_fi.fi_src;
+ idx += frag.ipfr_src.s_addr;
+ frag.ipfr_dest = fin->fin_fi.fi_dst;
+ idx += frag.ipfr_dst.s_addr;
+ frag.ipfr_ifp = fin->fin_ifp;
+ idx *= 127;
+ idx %= IPFT_SIZE;
+
+ frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY;
+ frag.ipfr_secmsk = fin->fin_fi.fi_secmsk;
+ frag.ipfr_auth = fin->fin_fi.fi_auth;
+
+ /*
+ * check the table, careful to only compare the right amount of data
+ */
+ for (f = table[idx]; f; f = f->ipfr_hnext)
+ if (!bcmp((char *)&frag.ipfr_ifp, (char *)&f->ipfr_ifp,
+ IPFR_CMPSZ)) {
+ u_short off;
+
+ /*
+ * We don't want to let short packets match because
+ * they could be compromising the security of other
+ * rules that want to match on layer 4 fields (and
+ * can't because they have been fragmented off.)
+ * Why do this check here? The counter acts as an
+ * indicator of this kind of attack, whereas if it was
+ * elsewhere, it wouldn't know if other matching
+ * packets had been seen.
+ */
+ if (fin->fin_flx & FI_SHORT) {
+ ATOMIC_INCL(ipfr_stats.ifs_short);
+ continue;
+ }
+
+ /*
+ * XXX - We really need to be guarding against the
+ * retransmission of (src,dst,id,offset-range) here
+ * because a fragmented packet is never resent with
+ * the same IP ID# (or shouldn't).
+ */
+ off = fin->fin_off; /* same as in ipfr_newfrag() */
+ if (f->ipfr_seen0) {
+ if (off == 0) {
+ ATOMIC_INCL(ipfr_stats.ifs_retrans0);
+ continue;
+ }
+ } else if (off == 0) {
+ f->ipfr_seen0 = 1;
+ f->ipfr_firstend = fin->fin_flen;
+ }
+
+ if (f != table[idx]) {
+ ipfr_t **fp;
+
+ /*
+ * Move fragment info. to the top of the list
+ * to speed up searches. First, delink...
+ */
+ fp = f->ipfr_hprev;
+ (*fp) = f->ipfr_hnext;
+ if (f->ipfr_hnext != NULL)
+ f->ipfr_hnext->ipfr_hprev = fp;
+ /*
+ * Then put back at the top of the chain.
+ */
+ f->ipfr_hnext = table[idx];
+ table[idx]->ipfr_hprev = &f->ipfr_hnext;
+ f->ipfr_hprev = table + idx;
+ table[idx] = f;
+ }
+
+ if (fin->fin_v == 6) {
+ if (f->ipfr_seen0 && (off < f->ipfr_firstend))
+ fin->fin_flx |= FI_BAD;
+ }
+ /*
+ * If we've follwed the fragments, and this is the
+ * last (in order), shrink expiration time.
+ */
+ if (off == f->ipfr_off) {
+ if (!(fin->fin_ip->ip_off & IP_MF))
+ f->ipfr_ttl = fr_ticks + 1;
+ f->ipfr_off = fin->fin_dlen + off;
+ } else if (f->ipfr_pass & FR_FRSTRICT)
+ continue;
+ ATOMIC_INCL(ipfr_stats.ifs_hits);
+ return f;
+ }
+ return NULL;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_nat_knownfrag */
+/* Returns: nat_t* - pointer to 'parent' NAT structure if frag table */
+/* match found, else NULL */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* Functional interface for NAT lookups of the NAT fragment cache */
+/* ------------------------------------------------------------------------ */
+nat_t *fr_nat_knownfrag(fin)
+fr_info_t *fin;
+{
+ nat_t *nat;
+ ipfr_t *ipf;
+
+ if ((fin->fin_v != 4) || (fr_frag_lock) || !ipfr_natlist)
+ return NULL;
+ READ_ENTER(&ipf_natfrag);
+ ipf = fr_fraglookup(fin, ipfr_nattab);
+ if (ipf != NULL) {
+ nat = ipf->ipfr_data;
+ /*
+ * This is the last fragment for this packet.
+ */
+ if ((ipf->ipfr_ttl == fr_ticks + 1) && (nat != NULL)) {
+ nat->nat_data = NULL;
+ ipf->ipfr_data = NULL;
+ }
+ } else
+ nat = NULL;
+ RWLOCK_EXIT(&ipf_natfrag);
+ return nat;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_ipid_knownfrag */
+/* Returns: u_32_t - IPv4 ID for this packet if match found, else */
+/* return 0xfffffff to indicate no match. */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* Functional interface for IP ID lookups of the IP ID fragment cache */
+/* ------------------------------------------------------------------------ */
+u_32_t fr_ipid_knownfrag(fin)
+fr_info_t *fin;
+{
+ ipfr_t *ipf;
+ u_32_t id;
+
+ if ((fin->fin_v != 4) || (fr_frag_lock) || !ipfr_ipidlist)
+ return 0xffffffff;
+
+ READ_ENTER(&ipf_ipidfrag);
+ ipf = fr_fraglookup(fin, ipfr_ipidtab);
+ if (ipf != NULL)
+ id = (u_32_t)(uintptr_t)ipf->ipfr_data;
+ else
+ id = 0xffffffff;
+ RWLOCK_EXIT(&ipf_ipidfrag);
+ return id;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_knownfrag */
+/* Returns: frentry_t* - pointer to filter rule if a match is found in */
+/* the frag cache table, else NULL. */
+/* Parameters: fin(I) - pointer to packet information */
+/* passp(O) - pointer to where to store rule flags resturned */
+/* */
+/* Functional interface for normal lookups of the fragment cache. If a */
+/* match is found, return the rule pointer and flags from the rule, except */
+/* that if FR_LOGFIRST is set, reset FR_LOG. */
+/* ------------------------------------------------------------------------ */
+frentry_t *fr_knownfrag(fin, passp)
+fr_info_t *fin;
+u_32_t *passp;
+{
+ frentry_t *fr = NULL;
+ ipfr_t *fra;
+ u_32_t pass, oflx;
+
+ if ((fr_frag_lock) || (ipfr_list == NULL))
+ return NULL;
+
+ READ_ENTER(&ipf_frag);
+ oflx = fin->fin_flx;
+ fra = fr_fraglookup(fin, ipfr_heads);
+ if (fra != NULL) {
+ fr = fra->ipfr_rule;
+ fin->fin_fr = fr;
+ if (fr != NULL) {
+ pass = fr->fr_flags;
+ if ((pass & FR_LOGFIRST) != 0)
+ pass &= ~(FR_LOGFIRST|FR_LOG);
+ *passp = pass;
+ }
+ }
+ if (!(oflx & FI_BAD) && (fin->fin_flx & FI_BAD)) {
+ *passp &= ~FR_CMDMASK;
+ *passp |= FR_BLOCK;
+ fr = &frblock;
+ }
+ RWLOCK_EXIT(&ipf_frag);
+ return fr;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_forget */
+/* Returns: Nil */
+/* Parameters: ptr(I) - pointer to data structure */
+/* */
+/* Search through all of the fragment cache entries and wherever a pointer */
+/* is found to match ptr, reset it to NULL. */
+/* ------------------------------------------------------------------------ */
+void fr_forget(ptr)
+void *ptr;
+{
+ ipfr_t *fr;
+
+ WRITE_ENTER(&ipf_frag);
+ for (fr = ipfr_list; fr; fr = fr->ipfr_next)
+ if (fr->ipfr_data == ptr)
+ fr->ipfr_data = NULL;
+ RWLOCK_EXIT(&ipf_frag);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_forgetnat */
+/* Returns: Nil */
+/* Parameters: ptr(I) - pointer to data structure */
+/* */
+/* Search through all of the fragment cache entries for NAT and wherever a */
+/* pointer is found to match ptr, reset it to NULL. */
+/* ------------------------------------------------------------------------ */
+void fr_forgetnat(ptr)
+void *ptr;
+{
+ ipfr_t *fr;
+
+ WRITE_ENTER(&ipf_natfrag);
+ for (fr = ipfr_natlist; fr; fr = fr->ipfr_next)
+ if (fr->ipfr_data == ptr)
+ fr->ipfr_data = NULL;
+ RWLOCK_EXIT(&ipf_natfrag);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_fragdelete */
+/* Returns: Nil */
+/* Parameters: fra(I) - pointer to fragment structure to delete */
+/* tail(IO) - pointer to the pointer to the tail of the frag */
+/* list */
+/* */
+/* Remove a fragment cache table entry from the table & list. Also free */
+/* the filter rule it is associated with it if it is no longer used as a */
+/* result of decreasing the reference count. */
+/* ------------------------------------------------------------------------ */
+static void fr_fragdelete(fra, tail)
+ipfr_t *fra, ***tail;
+{
+ frentry_t *fr;
+
+ fr = fra->ipfr_rule;
+ if (fr != NULL)
+ (void)fr_derefrule(&fr);
+
+ if (fra->ipfr_next)
+ fra->ipfr_next->ipfr_prev = fra->ipfr_prev;
+ *fra->ipfr_prev = fra->ipfr_next;
+ if (*tail == &fra->ipfr_next)
+ *tail = fra->ipfr_prev;
+
+ if (fra->ipfr_hnext)
+ fra->ipfr_hnext->ipfr_hprev = fra->ipfr_hprev;
+ *fra->ipfr_hprev = fra->ipfr_hnext;
+ KFREE(fra);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_fragclear */
+/* Returns: Nil */
+/* Parameters: Nil */
+/* */
+/* Free memory in use by fragment state information kept. Do the normal */
+/* fragment state stuff first and then the NAT-fragment table. */
+/* ------------------------------------------------------------------------ */
+void fr_fragclear()
+{
+ ipfr_t *fra;
+ nat_t *nat;
+
+ WRITE_ENTER(&ipf_frag);
+ while ((fra = ipfr_list) != NULL)
+ fr_fragdelete(fra, &ipfr_tail);
+ ipfr_tail = &ipfr_list;
+ RWLOCK_EXIT(&ipf_frag);
+
+ WRITE_ENTER(&ipf_nat);
+ WRITE_ENTER(&ipf_natfrag);
+ while ((fra = ipfr_natlist) != NULL) {
+ nat = fra->ipfr_data;
+ if (nat != NULL) {
+ if (nat->nat_data == fra)
+ nat->nat_data = NULL;
+ }
+ fr_fragdelete(fra, &ipfr_nattail);
+ }
+ ipfr_nattail = &ipfr_natlist;
+ RWLOCK_EXIT(&ipf_natfrag);
+ RWLOCK_EXIT(&ipf_nat);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_fragexpire */
+/* Returns: Nil */
+/* Parameters: Nil */
+/* */
+/* Expire entries in the fragment cache table that have been there too long */
+/* ------------------------------------------------------------------------ */
+void fr_fragexpire()
+{
+ ipfr_t **fp, *fra;
+ nat_t *nat;
+ SPL_INT(s);
+
+ if (fr_frag_lock)
+ return;
+
+ SPL_NET(s);
+ WRITE_ENTER(&ipf_frag);
+ /*
+ * Go through the entire table, looking for entries to expire,
+ * which is indicated by the ttl being less than or equal to fr_ticks.
+ */
+ for (fp = &ipfr_list; ((fra = *fp) != NULL); ) {
+ if (fra->ipfr_ttl > fr_ticks)
+ break;
+ fr_fragdelete(fra, &ipfr_tail);
+ ipfr_stats.ifs_expire++;
+ ipfr_inuse--;
+ }
+ RWLOCK_EXIT(&ipf_frag);
+
+ WRITE_ENTER(&ipf_ipidfrag);
+ for (fp = &ipfr_ipidlist; ((fra = *fp) != NULL); ) {
+ if (fra->ipfr_ttl > fr_ticks)
+ break;
+ fr_fragdelete(fra, &ipfr_ipidtail);
+ ipfr_stats.ifs_expire++;
+ ipfr_inuse--;
+ }
+ RWLOCK_EXIT(&ipf_ipidfrag);
+
+ /*
+ * Same again for the NAT table, except that if the structure also
+ * still points to a NAT structure, and the NAT structure points back
+ * at the one to be free'd, NULL the reference from the NAT struct.
+ * NOTE: We need to grab both mutex's early, and in this order so as
+ * to prevent a deadlock if both try to expire at the same time.
+ */
+ WRITE_ENTER(&ipf_nat);
+ WRITE_ENTER(&ipf_natfrag);
+ for (fp = &ipfr_natlist; ((fra = *fp) != NULL); ) {
+ if (fra->ipfr_ttl > fr_ticks)
+ break;
+ nat = fra->ipfr_data;
+ if (nat != NULL) {
+ if (nat->nat_data == fra)
+ nat->nat_data = NULL;
+ }
+ fr_fragdelete(fra, &ipfr_nattail);
+ ipfr_stats.ifs_expire++;
+ ipfr_inuse--;
+ }
+ RWLOCK_EXIT(&ipf_natfrag);
+ RWLOCK_EXIT(&ipf_nat);
+ SPL_X(s);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_slowtimer */
+/* Returns: Nil */
+/* Parameters: Nil */
+/* */
+/* Slowly expire held state for fragments. Timeouts are set * in */
+/* expectation of this being called twice per second. */
+/* ------------------------------------------------------------------------ */
+#if !defined(_KERNEL) || (!SOLARIS && !defined(__hpux) && !defined(__sgi) && \
+ !defined(__osf__) && !defined(linux))
+# if defined(_KERNEL) && ((BSD >= 199103) || defined(__sgi))
+void fr_slowtimer __P((void *ptr))
+# else
+int fr_slowtimer()
+# endif
+{
+ READ_ENTER(&ipf_global);
+
+ fr_fragexpire();
+ fr_timeoutstate();
+ fr_natexpire();
+ fr_authexpire();
+ fr_ticks++;
+ if (fr_running <= 0)
+ goto done;
+# ifdef _KERNEL
+# if defined(__NetBSD__) && (__NetBSD_Version__ >= 104240000)
+ callout_reset(&fr_slowtimer_ch, hz / 2, fr_slowtimer, NULL);
+# else
+# if defined(__OpenBSD__)
+ timeout_add(&fr_slowtimer_ch, hz/2);
+# else
+# if (__FreeBSD_version >= 300000)
+ fr_slowtimer_ch = timeout(fr_slowtimer, NULL, hz/2);
+# else
+# ifdef linux
+ ;
+# else
+ timeout(fr_slowtimer, NULL, hz/2);
+# endif
+# endif /* FreeBSD */
+# endif /* OpenBSD */
+# endif /* NetBSD */
+# endif
+done:
+ RWLOCK_EXIT(&ipf_global);
+# if (BSD < 199103) || !defined(_KERNEL)
+ return 0;
+# endif
+}
+#endif /* !SOLARIS && !defined(__hpux) && !defined(__sgi) */
diff --git a/usr/src/uts/common/inet/ipf/ip_htable.c b/usr/src/uts/common/inet/ipf/ip_htable.c
new file mode 100644
index 0000000000..4ce3cc411e
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/ip_htable.c
@@ -0,0 +1,618 @@
+/*
+ * Copyright (C) 1993-2001, 2003 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#if defined(KERNEL) || defined(_KERNEL)
+# undef KERNEL
+# undef _KERNEL
+# define KERNEL 1
+# define _KERNEL 1
+#endif
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/file.h>
+#if !defined(_KERNEL)
+# include <stdlib.h>
+# include <string.h>
+# define _KERNEL
+# ifdef __OpenBSD__
+struct file;
+# endif
+# include <sys/uio.h>
+# undef _KERNEL
+#endif
+#include <sys/socket.h>
+#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000)
+# include <sys/malloc.h>
+#endif
+#if defined(__FreeBSD__)
+# include <sys/cdefs.h>
+# include <sys/proc.h>
+#endif
+#if !defined(__svr4__) && !defined(__SVR4) && !defined(__hpux) && \
+ !defined(linux)
+# include <sys/mbuf.h>
+#endif
+#if defined(_KERNEL)
+# include <sys/systm.h>
+#else
+# include <stdio.h>
+#endif
+#include <netinet/in.h>
+#include <net/if.h>
+
+#include "netinet/ip_compat.h"
+#include "netinet/ip_fil.h"
+#include "netinet/ip_lookup.h"
+#include "netinet/ip_htable.h"
+/* END OF INCLUDES */
+
+#if !defined(lint)
+static const char rcsid[] = "@(#)$Id: ip_htable.c,v 2.34.2.3 2005/05/14 05:11:38 darrenr Exp $";
+#endif
+
+#ifdef IPFILTER_LOOKUP
+static iphtent_t *fr_iphmfind __P((iphtable_t *, struct in_addr *));
+#ifdef USE_INET6
+static iphtent_t *fr_iphmfind6 __P((iphtable_t *, struct in6_addr *));
+static uint32_t sum4(uint32_t *);
+static void left_shift_ipv6 __P((char *));
+#endif
+
+static u_long ipht_nomem[IPL_LOGSIZE] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+static u_long ipf_nhtables[IPL_LOGSIZE] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+static u_long ipf_nhtnodes[IPL_LOGSIZE] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+
+iphtable_t *ipf_htables[IPL_LOGSIZE] = { NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL };
+
+
+void fr_htable_unload()
+{
+ iplookupflush_t fop;
+
+ fop.iplf_unit = IPL_LOGALL;
+ (void)fr_flushhtable(&fop);
+}
+
+
+int fr_gethtablestat(op)
+iplookupop_t *op;
+{
+ iphtstat_t stats;
+
+ if (op->iplo_size != sizeof(stats))
+ return EINVAL;
+
+ stats.iphs_tables = ipf_htables[op->iplo_unit];
+ stats.iphs_numtables = ipf_nhtables[op->iplo_unit];
+ stats.iphs_numnodes = ipf_nhtnodes[op->iplo_unit];
+ stats.iphs_nomem = ipht_nomem[op->iplo_unit];
+
+ return COPYOUT(&stats, op->iplo_struct, sizeof(stats));
+
+}
+
+
+/*
+ * Create a new hash table using the template passed.
+ */
+int fr_newhtable(op)
+iplookupop_t *op;
+{
+ iphtable_t *iph, *oiph;
+ char name[FR_GROUPLEN];
+ int err, i, unit;
+
+ KMALLOC(iph, iphtable_t *);
+ if (iph == NULL) {
+ ipht_nomem[op->iplo_unit]++;
+ return ENOMEM;
+ }
+
+ err = COPYIN(op->iplo_struct, iph, sizeof(*iph));
+ if (err != 0) {
+ KFREE(iph);
+ return EFAULT;
+ }
+
+ unit = op->iplo_unit;
+ if (iph->iph_unit != unit) {
+ KFREE(iph);
+ return EINVAL;
+ }
+
+ if ((op->iplo_arg & IPHASH_ANON) == 0) {
+ if (fr_findhtable(op->iplo_unit, op->iplo_name) != NULL) {
+ KFREE(iph);
+ return EEXIST;
+ }
+ } else {
+ i = IPHASH_ANON;
+ do {
+ i++;
+#if defined(SNPRINTF) && defined(_KERNEL)
+ (void)SNPRINTF(name, sizeof(name), "%u", i);
+#else
+ (void)sprintf(name, "%u", i);
+#endif
+ for (oiph = ipf_htables[unit]; oiph != NULL;
+ oiph = oiph->iph_next)
+ if (strncmp(oiph->iph_name, name,
+ sizeof(oiph->iph_name)) == 0)
+ break;
+ } while (oiph != NULL);
+ (void)strncpy(iph->iph_name, name, sizeof(iph->iph_name));
+ err = COPYOUT(iph, op->iplo_struct, sizeof(*iph));
+ if (err != 0) {
+ KFREE(iph);
+ return EFAULT;
+ }
+ iph->iph_type |= IPHASH_ANON;
+ }
+
+ KMALLOCS(iph->iph_table, iphtent_t **,
+ iph->iph_size * sizeof(*iph->iph_table));
+ if (iph->iph_table == NULL) {
+ KFREE(iph);
+ ipht_nomem[unit]++;
+ return ENOMEM;
+ }
+
+ bzero((char *)iph->iph_table, iph->iph_size * sizeof(*iph->iph_table));
+ iph->iph_masks[0] = 0;
+ iph->iph_masks[1] = 0;
+ iph->iph_masks[2] = 0;
+ iph->iph_masks[3] = 0;
+
+ iph->iph_next = ipf_htables[unit];
+ iph->iph_pnext = &ipf_htables[unit];
+ if (ipf_htables[unit] != NULL)
+ ipf_htables[unit]->iph_pnext = &iph->iph_next;
+ ipf_htables[unit] = iph;
+
+ ipf_nhtables[unit]++;
+
+ return 0;
+}
+
+
+/*
+ */
+int fr_removehtable(op)
+iplookupop_t *op;
+{
+ iphtable_t *iph;
+
+
+ iph = fr_findhtable(op->iplo_unit, op->iplo_name);
+ if (iph == NULL)
+ return ESRCH;
+
+ if (iph->iph_unit != op->iplo_unit) {
+ return EINVAL;
+ }
+
+ if (iph->iph_ref != 0) {
+ return EBUSY;
+ }
+
+ fr_delhtable(iph);
+
+ return 0;
+}
+
+
+void fr_delhtable(iph)
+iphtable_t *iph;
+{
+ iphtent_t *ipe;
+ int i;
+
+ for (i = 0; i < iph->iph_size; i++)
+ while ((ipe = iph->iph_table[i]) != NULL)
+ if (fr_delhtent(iph, ipe) != 0)
+ return;
+
+ *iph->iph_pnext = iph->iph_next;
+ if (iph->iph_next != NULL)
+ iph->iph_next->iph_pnext = iph->iph_pnext;
+
+ ipf_nhtables[iph->iph_unit]--;
+
+ if (iph->iph_ref == 0) {
+ KFREES(iph->iph_table, iph->iph_size * sizeof(*iph->iph_table));
+ KFREE(iph);
+ }
+}
+
+
+void fr_derefhtable(iph)
+iphtable_t *iph;
+{
+ iph->iph_ref--;
+ if (iph->iph_ref == 0)
+ fr_delhtable(iph);
+}
+
+
+iphtable_t *fr_findhtable(unit, name)
+int unit;
+char *name;
+{
+ iphtable_t *iph;
+
+ for (iph = ipf_htables[unit]; iph != NULL; iph = iph->iph_next)
+ if (strncmp(iph->iph_name, name, sizeof(iph->iph_name)) == 0)
+ break;
+ return iph;
+}
+
+
+size_t fr_flushhtable(op)
+iplookupflush_t *op;
+{
+ iphtable_t *iph;
+ size_t freed;
+ int i;
+
+ freed = 0;
+
+ for (i = 0; i <= IPL_LOGMAX; i++) {
+ if (op->iplf_unit == i || op->iplf_unit == IPL_LOGALL) {
+ while ((iph = ipf_htables[i]) != NULL) {
+ fr_delhtable(iph);
+ freed++;
+ }
+ }
+ }
+
+ return freed;
+}
+
+
+/*
+ * Add an entry to a hash table.
+ */
+int fr_addhtent(iph, ipeo)
+iphtable_t *iph;
+iphtent_t *ipeo;
+{
+ iphtent_t *ipe;
+ u_int hv;
+ int bits;
+
+ KMALLOC(ipe, iphtent_t *);
+ if (ipe == NULL)
+ return -1;
+
+ bcopy((char *)ipeo, (char *)ipe, sizeof(*ipe));
+#ifdef USE_INET6
+ if (ipe->ipe_family == AF_INET6) {
+ bits = count6bits((u_32_t *)ipe->ipe_mask.in6_addr8);
+ hv = IPE_HASH_FN(sum4((uint32_t *)ipe->ipe_addr.in6_addr8),
+ sum4((uint32_t *)ipe->ipe_mask.in6_addr8),
+ iph->iph_size);
+ } else
+#endif
+ if (ipe->ipe_family == AF_INET)
+ {
+ ipe->ipe_addr.in4_addr &= ipe->ipe_mask.in4_addr;
+ ipe->ipe_addr.in4_addr = ntohl(ipe->ipe_addr.in4_addr);
+ bits = count4bits(ipe->ipe_mask.in4_addr);
+ ipe->ipe_mask.in4_addr = ntohl(ipe->ipe_mask.in4_addr);
+
+ hv = IPE_HASH_FN(ipe->ipe_addr.in4_addr, ipe->ipe_mask.in4_addr,
+ iph->iph_size);
+ } else
+ return -1;
+
+ ipe->ipe_ref = 0;
+ ipe->ipe_next = iph->iph_table[hv];
+ ipe->ipe_pnext = iph->iph_table + hv;
+
+ if (iph->iph_table[hv] != NULL)
+ iph->iph_table[hv]->ipe_pnext = &ipe->ipe_next;
+ iph->iph_table[hv] = ipe;
+#ifdef USE_INET6
+ if (ipe->ipe_family == AF_INET6) {
+ if ((bits >= 0) && (bits != 128))
+ if (bits >= 96)
+ iph->iph_masks[0] |= 1 << (bits - 96);
+ else if (bits >= 64)
+ iph->iph_masks[1] |= 1 << (bits - 64);
+ else if (bits >= 32)
+ iph->iph_masks[2] |= 1 << (bits - 32);
+ else
+ iph->iph_masks[3] |= 1 << bits;
+
+ } else
+#endif
+ {
+ if ((bits >= 0) && (bits != 32))
+ iph->iph_masks[3] |= 1 << bits;
+ }
+
+ switch (iph->iph_type & ~IPHASH_ANON)
+ {
+ case IPHASH_GROUPMAP :
+ ipe->ipe_ptr = fr_addgroup(ipe->ipe_group, NULL,
+ iph->iph_flags, IPL_LOGIPF,
+ fr_active);
+ break;
+
+ default :
+ ipe->ipe_ptr = NULL;
+ ipe->ipe_value = 0;
+ break;
+ }
+
+ ipf_nhtnodes[iph->iph_unit]++;
+
+ return 0;
+}
+
+
+/*
+ * Delete an entry from a hash table.
+ */
+int fr_delhtent(iph, ipe)
+iphtable_t *iph;
+iphtent_t *ipe;
+{
+
+ if (ipe->ipe_ref != 0)
+ return EBUSY;
+
+
+ *ipe->ipe_pnext = ipe->ipe_next;
+ if (ipe->ipe_next != NULL)
+ ipe->ipe_next->ipe_pnext = ipe->ipe_pnext;
+
+ switch (iph->iph_type & ~IPHASH_ANON)
+ {
+ case IPHASH_GROUPMAP :
+ if (ipe->ipe_group != NULL)
+ fr_delgroup(ipe->ipe_group, IPL_LOGIPF, fr_active);
+ break;
+
+ default :
+ ipe->ipe_ptr = NULL;
+ ipe->ipe_value = 0;
+ break;
+ }
+
+ KFREE(ipe);
+
+ ipf_nhtnodes[iph->iph_unit]--;
+
+ return 0;
+}
+
+
+void *fr_iphmfindgroup(tptr, version, aptr)
+void *tptr;
+int version;
+void *aptr;
+{
+ i6addr_t *addr;
+ iphtable_t *iph;
+ iphtent_t *ipe;
+ void *rval;
+
+ if ((version != 4)
+#ifdef USE_INET6
+ && (version != 6)
+#endif
+ )
+ return NULL;
+
+ READ_ENTER(&ip_poolrw);
+ iph = tptr;
+ addr = aptr;
+
+#ifdef USE_INET6
+ if (version == 6)
+ ipe = fr_iphmfind6(iph, &addr->in6);
+ else
+#endif
+ if (version == 4)
+ ipe = fr_iphmfind(iph, &addr->in4);
+ else
+ ipe = NULL;
+ if (ipe != NULL)
+ rval = ipe->ipe_ptr;
+ else
+ rval = NULL;
+ RWLOCK_EXIT(&ip_poolrw);
+ return rval;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_iphmfindip */
+/* Returns: int - 0 == +ve match, -1 == error, 1 == -ve/no match */
+/* Parameters: tptr(I) - pointer to the pool to search */
+/* version(I) - IP protocol version (4 or 6) */
+/* aptr(I) - pointer to address information */
+/* */
+/* Search the hash table for a given address and return a search result. */
+/* ------------------------------------------------------------------------ */
+int fr_iphmfindip(tptr, version, aptr)
+void *tptr, *aptr;
+int version;
+{
+ i6addr_t *addr;
+ iphtable_t *iph;
+ iphtent_t *ipe;
+ int rval;
+
+ if ((version != 4)
+#ifdef USE_INET6
+ && (version != 6)
+#endif
+ )
+ return -1;
+
+ if (tptr == NULL || aptr == NULL)
+ return -1;
+
+ iph = tptr;
+ addr = aptr;
+
+ READ_ENTER(&ip_poolrw);
+#ifdef USE_INET6
+ if (version == 6)
+ ipe = fr_iphmfind6(iph, &addr->in6);
+ else
+#endif
+ if (version == 4)
+ ipe = fr_iphmfind(iph, &addr->in4);
+ else
+ ipe = NULL;
+ if (ipe != NULL)
+ rval = 0;
+ else
+ rval = 1;
+ RWLOCK_EXIT(&ip_poolrw);
+ return rval;
+}
+
+
+/* Locks: ip_poolrw */
+static iphtent_t *fr_iphmfind(iph, addr)
+iphtable_t *iph;
+struct in_addr *addr;
+{
+ u_32_t hmsk, msk, ips;
+ iphtent_t *ipe;
+ u_int hv;
+
+ hmsk = iph->iph_masks[3];
+ msk = 0xffffffff;
+maskloop:
+ ips = ntohl(addr->s_addr) & msk;
+ hv = IPE_HASH_FN(ips, msk, iph->iph_size);
+ for (ipe = iph->iph_table[hv]; (ipe != NULL); ipe = ipe->ipe_next) {
+ if (ipe->ipe_mask.in4_addr != msk ||
+ ipe->ipe_addr.in4_addr != ips) {
+ continue;
+ }
+ break;
+ }
+
+ if ((ipe == NULL) && (hmsk != 0)) {
+ while (hmsk != 0) {
+ msk <<= 1;
+ if (hmsk & 0x80000000)
+ break;
+ hmsk <<= 1;
+ }
+ if (hmsk != 0) {
+ hmsk <<= 1;
+ goto maskloop;
+ }
+ }
+ return ipe;
+}
+
+
+#ifdef USE_INET6
+/* Locks: ip_poolrw */
+static iphtent_t *fr_iphmfind6(iph, addr)
+iphtable_t *iph;
+struct in6_addr *addr;
+{
+ u_32_t hmsk[4], msk[4], ips[4], *and;
+ iphtent_t *ipe;
+ u_int hv;
+
+ hmsk[0] = iph->iph_masks[0];
+ hmsk[1] = iph->iph_masks[1];
+ hmsk[2] = iph->iph_masks[2];
+ hmsk[3] = iph->iph_masks[3];
+
+ msk[0] = 0xffffffff;
+ msk[1] = 0xffffffff;
+ msk[2] = 0xffffffff;
+ msk[3] = 0xffffffff;
+maskloop:
+ and = (u_32_t *)addr->s6_addr;
+ ips[0] = *and & msk[0];
+ ips[1] = *(and + 1) & msk[1];
+ ips[2] = *(and + 2) & msk[2];
+ ips[3] = *(and + 3) & msk[3];
+
+ hv = IPE_HASH_FN(sum4((uint32_t *)addr), sum4((uint32_t *)msk),
+ iph->iph_size);
+ for (ipe = iph->iph_table[hv]; (ipe != NULL); ipe = ipe->ipe_next) {
+ if (bcmp((void *)&ipe->ipe_mask.in6, (void *)msk, 16) ||
+ bcmp((void *)&ipe->ipe_addr.in6, (void *)ips, 16))
+ continue;
+ break;
+ }
+
+ if ((ipe == NULL) && ((hmsk[0] != 0) ||
+ (hmsk[1] != 0) ||
+ (hmsk[2] != 0) ||
+ (hmsk[3] != 0) )) {
+ while ((hmsk[0] != 0) && (hmsk[1] != 0) &&
+ (hmsk[2] != 0) && (hmsk[3] != 0)) {
+ left_shift_ipv6((char *)msk);
+ if (hmsk[0] & 0x80000000)
+ break;
+ left_shift_ipv6((char *)hmsk);
+ }
+ if ((hmsk[0] != 0) && (hmsk[1] != 0) &&
+ (hmsk[2] != 0) && (hmsk[3] != 0)) {
+ left_shift_ipv6((char *)hmsk);
+ goto maskloop;
+ }
+ }
+ return ipe;
+}
+
+
+/*
+ * sum4: ipv6 add -> 4 bytes values
+ */
+static uint32_t sum4(add)
+uint32_t *add;
+{
+ return (*add + *(add + 1) + *(add + 2) + *(add + 3));
+}
+
+/*
+ * left shift on 128 bits
+ */
+static void left_shift_ipv6(data)
+char *data;
+{
+ u_32_t *sd;
+
+ sd = (u_32_t *)data;
+ sd[0] <<= 1;
+ if (sd[1] >= 0x80000000)
+ sd[0] += 1;
+
+ sd[1] <<= 1;
+ if (sd[2] >= 0x80000000)
+ sd[1] += 1;
+
+ sd[2] <<= 1;
+ if (sd[3] >= 0x80000000)
+ sd[2] += 1;
+
+ sd[3] <<= 1;
+}
+#endif
+#endif /* IPFILTER_LOOKUP */
diff --git a/usr/src/uts/common/inet/ipf/ip_log.c b/usr/src/uts/common/inet/ipf/ip_log.c
new file mode 100644
index 0000000000..364b2e08e5
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/ip_log.c
@@ -0,0 +1,676 @@
+/*
+ * Copyright (C) 1997-2003 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * $Id: ip_log.c,v 2.75.2.7 2005/06/11 07:47:44 darrenr Exp $
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/param.h>
+#if defined(KERNEL) || defined(_KERNEL)
+# undef KERNEL
+# undef _KERNEL
+# define KERNEL 1
+# define _KERNEL 1
+#endif
+#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
+ defined(_KERNEL)
+# include "opt_ipfilter_log.h"
+#endif
+#if defined(__FreeBSD__) && !defined(IPFILTER_LKM)
+# if defined(_KERNEL)
+# if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000)
+# include "opt_ipfilter.h"
+# endif
+# else
+# include <osreldate.h>
+# endif
+#endif
+#ifndef SOLARIS
+# define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#endif
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/file.h>
+#ifndef _KERNEL
+# include <stdio.h>
+# include <string.h>
+# include <stdlib.h>
+# include <ctype.h>
+# define _KERNEL
+# define KERNEL
+# ifdef __OpenBSD__
+struct file;
+# endif
+# include <sys/uio.h>
+# undef _KERNEL
+# undef KERNEL
+#endif
+#if __FreeBSD_version >= 220000 && defined(_KERNEL)
+# include <sys/fcntl.h>
+# include <sys/filio.h>
+#else
+# include <sys/ioctl.h>
+#endif
+#include <sys/time.h>
+#if defined(_KERNEL)
+# include <sys/systm.h>
+# if defined(NetBSD) && (__NetBSD_Version__ >= 104000000)
+# include <sys/proc.h>
+# endif
+#endif /* _KERNEL */
+#if !SOLARIS && !defined(__hpux) && !defined(linux)
+# if (NetBSD > 199609) || (OpenBSD > 199603) || (__FreeBSD_version >= 300000)
+# include <sys/dirent.h>
+# else
+# include <sys/dir.h>
+# endif
+# include <sys/mbuf.h>
+#else
+# if !defined(__hpux) && defined(_KERNEL)
+# include <sys/filio.h>
+# include <sys/cred.h>
+# include <sys/ddi.h>
+# include <sys/sunddi.h>
+# include <sys/ksynch.h>
+# include <sys/kmem.h>
+# include <sys/mkdev.h>
+# include <sys/dditypes.h>
+# include <sys/cmn_err.h>
+# endif /* !__hpux */
+#endif /* !SOLARIS && !__hpux */
+#if !defined(linux)
+# include <sys/protosw.h>
+#endif
+#include <sys/socket.h>
+
+#include <net/if.h>
+#ifdef sun
+# include <net/af.h>
+#endif
+#if __FreeBSD_version >= 300000
+# include <net/if_var.h>
+#endif
+#include <net/route.h>
+#include <netinet/in.h>
+#ifdef __sgi
+# include <sys/ddi.h>
+# ifdef IFF_DRVRLOCK /* IRIX6 */
+# include <sys/hashing.h>
+# endif
+#endif
+#if !defined(__hpux) && !defined(linux) && \
+ !(defined(__sgi) && !defined(IFF_DRVRLOCK)) /*IRIX<6*/
+# include <netinet/in_var.h>
+#endif
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/ip_icmp.h>
+#ifdef USE_INET6
+# include <netinet/icmp6.h>
+#endif
+#if !defined(linux)
+# include <netinet/ip_var.h>
+#endif
+#ifndef _KERNEL
+# include <syslog.h>
+#endif
+#include "netinet/ip_compat.h"
+#include <netinet/tcpip.h>
+#include "netinet/ip_fil.h"
+#include "netinet/ip_nat.h"
+#include "netinet/ip_frag.h"
+#include "netinet/ip_state.h"
+#include "netinet/ip_auth.h"
+#if (__FreeBSD_version >= 300000) || defined(__NetBSD__)
+# include <sys/malloc.h>
+#endif
+/* END OF INCLUDES */
+
+#ifdef IPFILTER_LOG
+
+# if defined(IPL_SELECT)
+# include <machine/sys/user.h>
+# include <sys/kthread_iface.h>
+# define READ_COLLISION 0x001
+
+iplog_select_t iplog_ss[IPL_LOGMAX+1];
+
+extern int selwait;
+# endif /* IPL_SELECT */
+
+# if defined(linux) && defined(_KERNEL)
+wait_queue_head_t iplh_linux[IPL_LOGSIZE];
+# endif
+# if SOLARIS
+extern kcondvar_t iplwait;
+# endif
+
+iplog_t **iplh[IPL_LOGSIZE], *iplt[IPL_LOGSIZE], *ipll[IPL_LOGSIZE];
+int iplused[IPL_LOGSIZE];
+static fr_info_t iplcrc[IPL_LOGSIZE];
+int ipl_suppress = 1;
+int ipl_buffer_sz;
+int ipl_logmax = IPL_LOGMAX;
+int ipl_logall = 0;
+int ipl_log_init = 0;
+int ipl_logsize = IPFILTER_LOGSIZE;
+int ipl_magic[IPL_LOGSIZE] = { IPL_MAGIC, IPL_MAGIC_NAT, IPL_MAGIC_STATE,
+ IPL_MAGIC, IPL_MAGIC, IPL_MAGIC,
+ IPL_MAGIC, IPL_MAGIC };
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_loginit */
+/* Returns: int - 0 == success (always returned) */
+/* Parameters: Nil */
+/* */
+/* Initialise log buffers & pointers. Also iniialised the CRC to a local */
+/* secret for use in calculating the "last log checksum". */
+/* ------------------------------------------------------------------------ */
+int fr_loginit()
+{
+ int i;
+
+ for (i = IPL_LOGMAX; i >= 0; i--) {
+ iplt[i] = NULL;
+ ipll[i] = NULL;
+ iplh[i] = &iplt[i];
+ iplused[i] = 0;
+ bzero((char *)&iplcrc[i], sizeof(iplcrc[i]));
+# ifdef IPL_SELECT
+ iplog_ss[i].read_waiter = 0;
+ iplog_ss[i].state = 0;
+# endif
+# if defined(linux) && defined(_KERNEL)
+ init_waitqueue_head(iplh_linux + i);
+# endif
+ }
+
+# if SOLARIS && defined(_KERNEL)
+ cv_init(&iplwait, "ipl condvar", CV_DRIVER, NULL);
+# endif
+ MUTEX_INIT(&ipl_mutex, "ipf log mutex");
+
+ ipl_log_init = 1;
+
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_logunload */
+/* Returns: Nil */
+/* Parameters: Nil */
+/* */
+/* Clean up any log data that has accumulated without being read. */
+/* ------------------------------------------------------------------------ */
+void fr_logunload()
+{
+ int i;
+
+ if (ipl_log_init == 0)
+ return;
+
+ for (i = IPL_LOGMAX; i >= 0; i--)
+ (void) ipflog_clear(i);
+
+# if SOLARIS && defined(_KERNEL)
+ cv_destroy(&iplwait);
+# endif
+ MUTEX_DESTROY(&ipl_mutex);
+
+ ipl_log_init = 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: ipflog */
+/* Returns: int - 0 == success, -1 == failure */
+/* Parameters: fin(I) - pointer to packet information */
+/* flags(I) - flags from filter rules */
+/* */
+/* Create a log record for a packet given that it has been triggered by a */
+/* rule (or the default setting). Calculate the transport protocol header */
+/* size using predetermined size of a couple of popular protocols and thus */
+/* how much data to copy into the log, including part of the data body if */
+/* requested. */
+/* ------------------------------------------------------------------------ */
+int ipflog(fin, flags)
+fr_info_t *fin;
+u_int flags;
+{
+ register size_t hlen;
+ int types[2], mlen;
+ size_t sizes[2];
+ void *ptrs[2];
+ ipflog_t ipfl;
+ u_char p;
+ mb_t *m;
+# if (SOLARIS || defined(__hpux)) && defined(_KERNEL)
+# ifndef IRE_ILL_CN
+ qif_t *ifp;
+# else
+ s_ill_t *ifp;
+# endif /* IRE_ILL_CN */
+# else
+ struct ifnet *ifp;
+# endif /* SOLARIS || __hpux */
+
+ ipfl.fl_nattag.ipt_num[0] = 0;
+ m = fin->fin_m;
+ ifp = fin->fin_ifp;
+ hlen = fin->fin_hlen;
+ /*
+ * calculate header size.
+ */
+ if (fin->fin_off == 0) {
+ p = fin->fin_fi.fi_p;
+ if (p == IPPROTO_TCP)
+ hlen += MIN(sizeof(tcphdr_t), fin->fin_dlen);
+ else if (p == IPPROTO_UDP)
+ hlen += MIN(sizeof(udphdr_t), fin->fin_dlen);
+ else if (p == IPPROTO_ICMP) {
+ struct icmp *icmp;
+
+ icmp = (struct icmp *)fin->fin_dp;
+
+ /*
+ * For ICMP, if the packet is an error packet, also
+ * include the information about the packet which
+ * caused the error.
+ */
+ switch (icmp->icmp_type)
+ {
+ case ICMP_UNREACH :
+ case ICMP_SOURCEQUENCH :
+ case ICMP_REDIRECT :
+ case ICMP_TIMXCEED :
+ case ICMP_PARAMPROB :
+ hlen += MIN(sizeof(struct icmp) + 8,
+ fin->fin_dlen);
+ break;
+ default :
+ hlen += MIN(sizeof(struct icmp),
+ fin->fin_dlen);
+ break;
+ }
+ }
+# ifdef USE_INET6
+ else if (p == IPPROTO_ICMPV6) {
+ struct icmp6_hdr *icmp;
+
+ icmp = (struct icmp6_hdr *)fin->fin_dp;
+
+ /*
+ * For ICMPV6, if the packet is an error packet, also
+ * include the information about the packet which
+ * caused the error.
+ */
+ if (icmp->icmp6_type < 128) {
+ hlen += MIN(sizeof(struct icmp6_hdr) + 8,
+ fin->fin_dlen);
+ } else {
+ hlen += MIN(sizeof(struct icmp6_hdr),
+ fin->fin_dlen);
+ }
+ }
+# endif
+ }
+ /*
+ * Get the interface number and name to which this packet is
+ * currently associated.
+ */
+# if (SOLARIS || defined(__hpux)) && defined(_KERNEL)
+ ipfl.fl_unit = (u_int)0;
+ (void) strncpy(ipfl.fl_ifname, IFNAME(ifp), sizeof(ipfl.fl_ifname));
+# else
+# if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199603)) || \
+ (defined(OpenBSD) && (OpenBSD >= 199603)) || defined(linux) || \
+ (defined(__FreeBSD__) && (__FreeBSD_version >= 501113))
+ COPYIFNAME(ifp, ipfl.fl_ifname);
+# else
+ ipfl.fl_unit = (u_int)ifp->if_unit;
+# if defined(_KERNEL)
+ if ((ipfl.fl_ifname[0] = ifp->if_name[0]))
+ if ((ipfl.fl_ifname[1] = ifp->if_name[1]))
+ if ((ipfl.fl_ifname[2] = ifp->if_name[2]))
+ ipfl.fl_ifname[3] = ifp->if_name[3];
+# else
+ (void) strncpy(ipfl.fl_ifname, IFNAME(ifp), sizeof(ipfl.fl_ifname));
+ ipfl.fl_ifname[sizeof(ipfl.fl_ifname) - 1] = '\0';
+# endif
+# endif
+# endif /* __hpux || SOLARIS */
+ mlen = fin->fin_plen - hlen;
+ if (!ipl_logall) {
+ mlen = (flags & FR_LOGBODY) ? MIN(mlen, 128) : 0;
+ } else if ((flags & FR_LOGBODY) == 0) {
+ mlen = 0;
+ }
+ if (mlen < 0)
+ mlen = 0;
+ ipfl.fl_plen = (u_char)mlen;
+ ipfl.fl_hlen = (u_char)hlen;
+ ipfl.fl_rule = fin->fin_rule;
+ (void) strncpy(ipfl.fl_group, fin->fin_group, FR_GROUPLEN);
+ if (fin->fin_fr != NULL) {
+ ipfl.fl_loglevel = fin->fin_fr->fr_loglevel;
+ ipfl.fl_logtag = fin->fin_fr->fr_logtag;
+ } else {
+ ipfl.fl_loglevel = 0xffff;
+ ipfl.fl_logtag = FR_NOLOGTAG;
+ }
+ if (fin->fin_nattag != NULL)
+ bcopy(fin->fin_nattag, (void *)&ipfl.fl_nattag,
+ sizeof(ipfl.fl_nattag));
+ ipfl.fl_flags = flags;
+ ipfl.fl_dir = fin->fin_out;
+ ipfl.fl_lflags = fin->fin_flx;
+ ptrs[0] = (void *)&ipfl;
+ sizes[0] = sizeof(ipfl);
+ types[0] = 0;
+# if defined(MENTAT) && defined(_KERNEL)
+ /*
+ * Are we copied from the mblk or an aligned array ?
+ */
+ if (fin->fin_ip == (ip_t *)m->b_rptr) {
+ ptrs[1] = m;
+ sizes[1] = hlen + mlen;
+ types[1] = 1;
+ } else {
+ ptrs[1] = fin->fin_ip;
+ sizes[1] = hlen + mlen;
+ types[1] = 0;
+ }
+# else
+ ptrs[1] = m;
+ sizes[1] = hlen + mlen;
+ types[1] = 1;
+# endif /* MENTAT */
+ return ipllog(IPL_LOGIPF, fin, ptrs, sizes, types, 2);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: ipllog */
+/* Returns: int - 0 == success, -1 == failure */
+/* Parameters: dev(I) - device that owns this log record */
+/* fin(I) - pointer to packet information */
+/* items(I) - array of pointers to log data */
+/* itemsz(I) - array of size of valid memory pointed to */
+/* types(I) - type of data pointed to by items pointers */
+/* cnt(I) - number of elements in arrays items/itemsz/types */
+/* */
+/* Takes an array of parameters and constructs one record to include the */
+/* miscellaneous packet information, as well as packet data, for reading */
+/* from the log device. */
+/* ------------------------------------------------------------------------ */
+int ipllog(dev, fin, items, itemsz, types, cnt)
+int dev;
+fr_info_t *fin;
+void **items;
+size_t *itemsz;
+int *types, cnt;
+{
+ caddr_t buf, ptr;
+ iplog_t *ipl;
+ size_t len;
+ int i;
+ SPL_INT(s);
+
+ /*
+ * Check to see if this log record has a CRC which matches the last
+ * record logged. If it does, just up the count on the previous one
+ * rather than create a new one.
+ */
+ if (ipl_suppress) {
+ MUTEX_ENTER(&ipl_mutex);
+ if ((fin != NULL) && (fin->fin_off == 0)) {
+ if ((ipll[dev] != NULL) &&
+ bcmp((char *)fin, (char *)&iplcrc[dev],
+ FI_LCSIZE) == 0) {
+ ipll[dev]->ipl_count++;
+ MUTEX_EXIT(&ipl_mutex);
+ return 0;
+ }
+ bcopy((char *)fin, (char *)&iplcrc[dev], FI_LCSIZE);
+ } else
+ bzero((char *)&iplcrc[dev], FI_CSIZE);
+ MUTEX_EXIT(&ipl_mutex);
+ }
+
+ /*
+ * Get the total amount of data to be logged.
+ */
+ for (i = 0, len = sizeof(iplog_t); i < cnt; i++)
+ len += itemsz[i];
+
+ /*
+ * check that we have space to record this information and can
+ * allocate that much.
+ */
+ KMALLOCS(buf, caddr_t, len);
+ if (buf == NULL)
+ return -1;
+ SPL_NET(s);
+ MUTEX_ENTER(&ipl_mutex);
+ if ((iplused[dev] + len) > ipl_logsize) {
+ MUTEX_EXIT(&ipl_mutex);
+ SPL_X(s);
+ KFREES(buf, len);
+ return -1;
+ }
+ iplused[dev] += len;
+ MUTEX_EXIT(&ipl_mutex);
+ SPL_X(s);
+
+ /*
+ * advance the log pointer to the next empty record and deduct the
+ * amount of space we're going to use.
+ */
+ ipl = (iplog_t *)buf;
+ ipl->ipl_magic = ipl_magic[dev];
+ ipl->ipl_count = 1;
+ ipl->ipl_next = NULL;
+ ipl->ipl_dsize = len;
+#ifdef _KERNEL
+ GETKTIME(&ipl->ipl_sec);
+#else
+ ipl->ipl_sec = 0;
+ ipl->ipl_usec = 0;
+#endif
+
+ /*
+ * Loop through all the items to be logged, copying each one to the
+ * buffer. Use bcopy for normal data or the mb_t copyout routine.
+ */
+ for (i = 0, ptr = buf + sizeof(*ipl); i < cnt; i++) {
+ if (types[i] == 0) {
+ bcopy(items[i], ptr, itemsz[i]);
+ } else if (types[i] == 1) {
+ COPYDATA(items[i], 0, itemsz[i], ptr);
+ }
+ ptr += itemsz[i];
+ }
+ SPL_NET(s);
+ MUTEX_ENTER(&ipl_mutex);
+ ipll[dev] = ipl;
+ *iplh[dev] = ipl;
+ iplh[dev] = &ipl->ipl_next;
+
+ /*
+ * Now that the log record has been completed and added to the queue,
+ * wake up any listeners who may want to read it.
+ */
+# if SOLARIS && defined(_KERNEL)
+ cv_signal(&iplwait);
+ MUTEX_EXIT(&ipl_mutex);
+# else
+ MUTEX_EXIT(&ipl_mutex);
+ WAKEUP(iplh,dev);
+# endif
+ SPL_X(s);
+# ifdef IPL_SELECT
+ iplog_input_ready(dev);
+# endif
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: ipflog_read */
+/* Returns: int - 0 == success, else error value. */
+/* Parameters: unit(I) - device we are reading from */
+/* uio(O) - pointer to information about where to store data */
+/* */
+/* Called to handle a read on an IPFilter device. Returns only complete */
+/* log messages - will not partially copy a log record out to userland. */
+/* */
+/* NOTE: This function will block and wait for a signal to return data if */
+/* there is none present. Asynchronous I/O is not implemented. */
+/* ------------------------------------------------------------------------ */
+int ipflog_read(unit, uio)
+minor_t unit;
+struct uio *uio;
+{
+ size_t dlen, copied;
+ int error = 0;
+ iplog_t *ipl;
+ SPL_INT(s);
+
+ /*
+ * Sanity checks. Make sure the minor # is valid and we're copying
+ * a valid chunk of data.
+ */
+ if (IPL_LOGMAX < unit)
+ return ENXIO;
+ if (uio->uio_resid == 0)
+ return 0;
+ if ((uio->uio_resid < sizeof(iplog_t)) ||
+ (uio->uio_resid > ipl_logsize))
+ return EINVAL;
+
+ /*
+ * Lock the log so we can snapshot the variables. Wait for a signal
+ * if the log is empty.
+ */
+ SPL_NET(s);
+ MUTEX_ENTER(&ipl_mutex);
+
+ while (iplt[unit] == NULL) {
+# if SOLARIS && defined(_KERNEL)
+ if (!cv_wait_sig(&iplwait, &ipl_mutex.ipf_lk)) {
+ MUTEX_EXIT(&ipl_mutex);
+ return EINTR;
+ }
+# else
+# if defined(__hpux) && defined(_KERNEL)
+ lock_t *l;
+
+# ifdef IPL_SELECT
+ if (uio->uio_fpflags & (FNBLOCK|FNDELAY)) {
+ /* this is no blocking system call */
+ MUTEX_EXIT(&ipl_mutex);
+ return 0;
+ }
+# endif
+
+ MUTEX_EXIT(&ipl_mutex);
+ l = get_sleep_lock(&iplh[unit]);
+ error = sleep(&iplh[unit], PZERO+1);
+ spinunlock(l);
+# else
+# if defined(__osf__) && defined(_KERNEL)
+ error = mpsleep(&iplh[unit], PSUSP|PCATCH, "iplread", 0,
+ &ipl_mutex, MS_LOCK_SIMPLE);
+# else
+ MUTEX_EXIT(&ipl_mutex);
+ SPL_X(s);
+ error = SLEEP(unit + iplh, "ipl sleep");
+# endif /* __osf__ */
+# endif /* __hpux */
+ if (error)
+ return error;
+ SPL_NET(s);
+ MUTEX_ENTER(&ipl_mutex);
+# endif /* SOLARIS */
+ }
+
+# if (BSD >= 199101) || defined(__FreeBSD__) || defined(__osf__)
+ uio->uio_rw = UIO_READ;
+# endif
+
+ for (copied = 0; (ipl = iplt[unit]) != NULL; copied += dlen) {
+ dlen = ipl->ipl_dsize;
+ if (dlen > uio->uio_resid)
+ break;
+ /*
+ * Don't hold the mutex over the uiomove call.
+ */
+ iplt[unit] = ipl->ipl_next;
+ iplused[unit] -= dlen;
+ MUTEX_EXIT(&ipl_mutex);
+ SPL_X(s);
+ error = UIOMOVE((caddr_t)ipl, dlen, UIO_READ, uio);
+ if (error) {
+ SPL_NET(s);
+ MUTEX_ENTER(&ipl_mutex);
+ ipl->ipl_next = iplt[unit];
+ iplt[unit] = ipl;
+ iplused[unit] += dlen;
+ break;
+ }
+ MUTEX_ENTER(&ipl_mutex);
+ KFREES((caddr_t)ipl, dlen);
+ SPL_NET(s);
+ }
+ if (!iplt[unit]) {
+ iplused[unit] = 0;
+ iplh[unit] = &iplt[unit];
+ ipll[unit] = NULL;
+ }
+
+ MUTEX_EXIT(&ipl_mutex);
+ SPL_X(s);
+ return error;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: ipflog_clear */
+/* Returns: int - number of log bytes cleared. */
+/* Parameters: unit(I) - device we are reading from */
+/* */
+/* Deletes all queued up log records for a given output device. */
+/* ------------------------------------------------------------------------ */
+int ipflog_clear(unit)
+minor_t unit;
+{
+ iplog_t *ipl;
+ int used;
+ SPL_INT(s);
+
+ SPL_NET(s);
+ MUTEX_ENTER(&ipl_mutex);
+ while ((ipl = iplt[unit]) != NULL) {
+ iplt[unit] = ipl->ipl_next;
+ KFREES((caddr_t)ipl, ipl->ipl_dsize);
+ }
+ iplh[unit] = &iplt[unit];
+ ipll[unit] = NULL;
+ used = iplused[unit];
+ iplused[unit] = 0;
+ bzero((char *)&iplcrc[unit], FI_CSIZE);
+ MUTEX_EXIT(&ipl_mutex);
+ SPL_X(s);
+ return used;
+}
+#endif /* IPFILTER_LOG */
diff --git a/usr/src/uts/common/inet/ipf/ip_lookup.c b/usr/src/uts/common/inet/ipf/ip_lookup.c
new file mode 100644
index 0000000000..299dadf0bb
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/ip_lookup.c
@@ -0,0 +1,532 @@
+/*
+ * Copyright (C) 2002-2003 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#if defined(KERNEL) || defined(_KERNEL)
+# undef KERNEL
+# undef _KERNEL
+# define KERNEL 1
+# define _KERNEL 1
+#endif
+#if defined(__osf__)
+# define _PROTO_NET_H_
+#endif
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/file.h>
+#if __FreeBSD_version >= 220000 && defined(_KERNEL)
+# include <sys/fcntl.h>
+# include <sys/filio.h>
+#else
+# include <sys/ioctl.h>
+#endif
+#if !defined(_KERNEL)
+# include <string.h>
+# define _KERNEL
+# ifdef __OpenBSD__
+struct file;
+# endif
+# include <sys/uio.h>
+# undef _KERNEL
+#endif
+#include <sys/socket.h>
+#if (defined(__osf__) || defined(AIX) || defined(__hpux) || defined(__sgi)) && defined(_KERNEL)
+# ifdef __osf__
+# include <net/radix.h>
+# endif
+# include "radix_ipf_local.h"
+# define _RADIX_H_
+#endif
+#include <net/if.h>
+#if defined(__FreeBSD__)
+# include <sys/cdefs.h>
+# include <sys/proc.h>
+#endif
+#if defined(_KERNEL)
+# include <sys/systm.h>
+# if !defined(__SVR4) && !defined(__svr4__)
+# include <sys/mbuf.h>
+# endif
+#endif
+#include <netinet/in.h>
+
+#include "netinet/ip_compat.h"
+#include "netinet/ip_fil.h"
+#include "netinet/ip_pool.h"
+#include "netinet/ip_htable.h"
+#include "netinet/ip_lookup.h"
+/* END OF INCLUDES */
+
+#if !defined(lint)
+static const char rcsid[] = "@(#)$Id: ip_lookup.c,v 2.35.2.7 2005/06/12 07:18:20 darrenr Exp $";
+#endif
+
+#ifdef IPFILTER_LOOKUP
+int ip_lookup_inited = 0;
+
+static int iplookup_addnode __P((caddr_t));
+static int iplookup_delnode __P((caddr_t data));
+static int iplookup_addtable __P((caddr_t));
+static int iplookup_deltable __P((caddr_t));
+static int iplookup_stats __P((caddr_t));
+static int iplookup_flush __P((caddr_t));
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: iplookup_init */
+/* Returns: int - 0 = success, else error */
+/* Parameters: Nil */
+/* */
+/* Initialise all of the subcomponents of the lookup infrstructure. */
+/* ------------------------------------------------------------------------ */
+int ip_lookup_init()
+{
+
+ if (ip_pool_init() == -1)
+ return -1;
+
+ RWLOCK_INIT(&ip_poolrw, "ip pool rwlock");
+
+ ip_lookup_inited = 1;
+
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: iplookup_unload */
+/* Returns: int - 0 = success, else error */
+/* Parameters: Nil */
+/* */
+/* Free up all pool related memory that has been allocated whilst IPFilter */
+/* has been running. Also, do any other deinitialisation required such */
+/* ip_lookup_init() can be called again, safely. */
+/* ------------------------------------------------------------------------ */
+void ip_lookup_unload()
+{
+ ip_pool_fini();
+ fr_htable_unload();
+
+ if (ip_lookup_inited == 1) {
+ RW_DESTROY(&ip_poolrw);
+ ip_lookup_inited = 0;
+ }
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: iplookup_ioctl */
+/* Returns: int - 0 = success, else error */
+/* Parameters: data(IO) - pointer to ioctl data to be copied to/from user */
+/* space. */
+/* cmd(I) - ioctl command number */
+/* mode(I) - file mode bits used with open */
+/* */
+/* Handle ioctl commands sent to the ioctl device. For the most part, this */
+/* involves just calling another function to handle the specifics of each */
+/* command. */
+/* ------------------------------------------------------------------------ */
+int ip_lookup_ioctl(data, cmd, mode)
+caddr_t data;
+ioctlcmd_t cmd;
+int mode;
+{
+ int err;
+ SPL_INT(s);
+
+ mode = mode; /* LINT */
+
+ SPL_NET(s);
+
+ switch (cmd)
+ {
+ case SIOCLOOKUPADDNODE :
+ case SIOCLOOKUPADDNODEW :
+ WRITE_ENTER(&ip_poolrw);
+ err = iplookup_addnode(data);
+ RWLOCK_EXIT(&ip_poolrw);
+ break;
+
+ case SIOCLOOKUPDELNODE :
+ case SIOCLOOKUPDELNODEW :
+ WRITE_ENTER(&ip_poolrw);
+ err = iplookup_delnode(data);
+ RWLOCK_EXIT(&ip_poolrw);
+ break;
+
+ case SIOCLOOKUPADDTABLE :
+ WRITE_ENTER(&ip_poolrw);
+ err = iplookup_addtable(data);
+ RWLOCK_EXIT(&ip_poolrw);
+ break;
+
+ case SIOCLOOKUPDELTABLE :
+ WRITE_ENTER(&ip_poolrw);
+ err = iplookup_deltable(data);
+ RWLOCK_EXIT(&ip_poolrw);
+ break;
+
+ case SIOCLOOKUPSTAT :
+ case SIOCLOOKUPSTATW :
+ WRITE_ENTER(&ip_poolrw);
+ err = iplookup_stats(data);
+ RWLOCK_EXIT(&ip_poolrw);
+ break;
+
+ case SIOCLOOKUPFLUSH :
+ WRITE_ENTER(&ip_poolrw);
+ err = iplookup_flush(data);
+ RWLOCK_EXIT(&ip_poolrw);
+ break;
+
+ default :
+ err = EINVAL;
+ break;
+ }
+ SPL_X(s);
+ return err;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: iplookup_addnode */
+/* Returns: int - 0 = success, else error */
+/* Parameters: data(I) - pointer to data from ioctl call */
+/* */
+/* Add a new data node to a lookup structure. First, check to see if the */
+/* parent structure refered to by name exists and if it does, then go on to */
+/* add a node to it. */
+/* ------------------------------------------------------------------------ */
+static int iplookup_addnode(data)
+caddr_t data;
+{
+ ip_pool_node_t node, *m;
+ iplookupop_t op;
+ iphtable_t *iph;
+ iphtent_t hte;
+ ip_pool_t *p;
+ int err;
+
+ err = 0;
+ BCOPYIN(data, &op, sizeof(op));
+ op.iplo_name[sizeof(op.iplo_name) - 1] = '\0';
+
+ switch (op.iplo_type)
+ {
+ case IPLT_POOL :
+ if (op.iplo_size != sizeof(node))
+ return EINVAL;
+
+ err = COPYIN(op.iplo_struct, &node, sizeof(node));
+ if (err != 0)
+ return EFAULT;
+
+ p = ip_pool_find(op.iplo_unit, op.iplo_name);
+ if (p == NULL)
+ return ESRCH;
+
+ /*
+ * add an entry to a pool - return an error if it already
+ * exists remove an entry from a pool - if it exists
+ * - in both cases, the pool *must* exist!
+ */
+ m = ip_pool_findeq(p, &node.ipn_addr, &node.ipn_mask);
+ if (m)
+ return EEXIST;
+ err = ip_pool_insert(p, &node.ipn_addr,
+ &node.ipn_mask, node.ipn_info);
+ break;
+
+ case IPLT_HASH :
+ if (op.iplo_size != sizeof(hte))
+ return EINVAL;
+
+ err = COPYIN(op.iplo_struct, &hte, sizeof(hte));
+ if (err != 0)
+ return EFAULT;
+
+ iph = fr_findhtable(op.iplo_unit, op.iplo_name);
+ if (iph == NULL)
+ return ESRCH;
+ err = fr_addhtent(iph, &hte);
+ break;
+
+ default :
+ err = EINVAL;
+ break;
+ }
+ return err;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: iplookup_delnode */
+/* Returns: int - 0 = success, else error */
+/* Parameters: data(I) - pointer to data from ioctl call */
+/* */
+/* Delete a node from a lookup table by first looking for the table it is */
+/* in and then deleting the entry that gets found. */
+/* ------------------------------------------------------------------------ */
+static int iplookup_delnode(data)
+caddr_t data;
+{
+ ip_pool_node_t node, *m;
+ iplookupop_t op;
+ iphtable_t *iph;
+ iphtent_t hte;
+ ip_pool_t *p;
+ int err;
+
+ err = 0;
+ BCOPYIN(data, &op, sizeof(op));
+
+ op.iplo_name[sizeof(op.iplo_name) - 1] = '\0';
+
+ switch (op.iplo_type)
+ {
+ case IPLT_POOL :
+ if (op.iplo_size != sizeof(node))
+ return EINVAL;
+
+ err = COPYIN(op.iplo_struct, &node, sizeof(node));
+ if (err != 0)
+ return EFAULT;
+
+ p = ip_pool_find(op.iplo_unit, op.iplo_name);
+ if (!p)
+ return ESRCH;
+
+ m = ip_pool_findeq(p, &node.ipn_addr, &node.ipn_mask);
+ if (m == NULL)
+ return ENOENT;
+ err = ip_pool_remove(p, m);
+ break;
+
+ case IPLT_HASH :
+ if (op.iplo_size != sizeof(hte))
+ return EINVAL;
+
+ err = COPYIN(op.iplo_struct, &hte, sizeof(hte));
+ if (err != 0)
+ return EFAULT;
+
+ iph = fr_findhtable(op.iplo_unit, op.iplo_name);
+ if (iph == NULL)
+ return ESRCH;
+ err = fr_delhtent(iph, &hte);
+ break;
+
+ default :
+ err = EINVAL;
+ break;
+ }
+ return err;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: iplookup_addtable */
+/* Returns: int - 0 = success, else error */
+/* Parameters: data(I) - pointer to data from ioctl call */
+/* */
+/* Create a new lookup table, if one doesn't already exist using the name */
+/* for this one. */
+/* ------------------------------------------------------------------------ */
+static int iplookup_addtable(data)
+caddr_t data;
+{
+ iplookupop_t op;
+ int err;
+
+ err = 0;
+ BCOPYIN(data, &op, sizeof(op));
+
+ op.iplo_name[sizeof(op.iplo_name) - 1] = '\0';
+
+ switch (op.iplo_type)
+ {
+ case IPLT_POOL :
+ if (ip_pool_find(op.iplo_unit, op.iplo_name) != NULL)
+ err = EEXIST;
+ else
+ err = ip_pool_create(&op);
+ break;
+
+ case IPLT_HASH :
+ if (fr_findhtable(op.iplo_unit, op.iplo_name) != NULL)
+ err = EEXIST;
+ else
+ err = fr_newhtable(&op);
+ break;
+
+ default :
+ err = EINVAL;
+ break;
+ }
+ return err;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: iplookup_deltable */
+/* Returns: int - 0 = success, else error */
+/* Parameters: data(I) - pointer to data from ioctl call */
+/* */
+/* Decodes ioctl request to remove a particular hash table or pool and */
+/* calls the relevant function to do the cleanup. */
+/* ------------------------------------------------------------------------ */
+static int iplookup_deltable(data)
+caddr_t data;
+{
+ iplookupop_t op;
+ int err;
+
+ BCOPYIN(data, &op, sizeof(op));
+ op.iplo_name[sizeof(op.iplo_name) - 1] = '\0';
+
+ if (op.iplo_arg & IPLT_ANON)
+ op.iplo_arg &= IPLT_ANON;
+
+ /*
+ * create a new pool - fail if one already exists with
+ * the same #
+ */
+ switch (op.iplo_type)
+ {
+ case IPLT_POOL :
+ err = ip_pool_destroy(&op);
+ break;
+
+ case IPLT_HASH :
+ err = fr_removehtable(&op);
+ break;
+
+ default :
+ err = EINVAL;
+ break;
+ }
+ return err;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: iplookup_stats */
+/* Returns: int - 0 = success, else error */
+/* Parameters: data(I) - pointer to data from ioctl call */
+/* */
+/* Copy statistical information from inside the kernel back to user space. */
+/* ------------------------------------------------------------------------ */
+static int iplookup_stats(data)
+caddr_t data;
+{
+ iplookupop_t op;
+ int err;
+
+ err = 0;
+ BCOPYIN(data, &op, sizeof(op));
+
+ switch (op.iplo_type)
+ {
+ case IPLT_POOL :
+ err = ip_pool_statistics(&op);
+ break;
+
+ case IPLT_HASH :
+ err = fr_gethtablestat(&op);
+ break;
+
+ default :
+ err = EINVAL;
+ break;
+ }
+ return err;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: iplookup_flush */
+/* Returns: int - 0 = success, else error */
+/* Parameters: data(I) - pointer to data from ioctl call */
+/* */
+/* A flush is called when we want to flush all the nodes from a particular */
+/* entry in the hash table/pool or want to remove all groups from those. */
+/* ------------------------------------------------------------------------ */
+static int iplookup_flush(data)
+caddr_t data;
+{
+ int err, unit, num, type;
+ iplookupflush_t flush;
+
+ err = 0;
+ BCOPYIN(data, &flush, sizeof(flush));
+
+ flush.iplf_name[sizeof(flush.iplf_name) - 1] = '\0';
+
+ unit = flush.iplf_unit;
+ if ((unit < 0 || unit > IPL_LOGMAX) && (unit != IPLT_ALL))
+ return EINVAL;
+
+ type = flush.iplf_type;
+ err = EINVAL;
+ num = 0;
+
+ if (type == IPLT_POOL || type == IPLT_ALL) {
+ err = 0;
+ num = ip_pool_flush(&flush);
+ }
+
+ if (type == IPLT_HASH || type == IPLT_ALL) {
+ err = 0;
+ num += fr_flushhtable(&flush);
+ }
+
+ if (err == 0) {
+ flush.iplf_count = num;
+ err = COPYOUT(&flush, data, sizeof(flush));
+ }
+ return err;
+}
+
+
+void ip_lookup_deref(type, ptr)
+int type;
+void *ptr;
+{
+ if (ptr == NULL)
+ return;
+
+ WRITE_ENTER(&ip_poolrw);
+ switch (type)
+ {
+ case IPLT_POOL :
+ ip_pool_deref(ptr);
+ break;
+
+ case IPLT_HASH :
+ fr_derefhtable(ptr);
+ break;
+ }
+ RWLOCK_EXIT(&ip_poolrw);
+}
+
+
+#else /* IPFILTER_LOOKUP */
+
+/*ARGSUSED*/
+int ip_lookup_ioctl(data, cmd, mode)
+caddr_t data;
+ioctlcmd_t cmd;
+int mode;
+{
+ return EIO;
+}
+#endif /* IPFILTER_LOOKUP */
diff --git a/usr/src/uts/common/inet/ipf/ip_nat.c b/usr/src/uts/common/inet/ipf/ip_nat.c
new file mode 100644
index 0000000000..8814553e20
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/ip_nat.c
@@ -0,0 +1,4849 @@
+/*
+ * Copyright (C) 1995-2003 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#if defined(KERNEL) || defined(_KERNEL)
+# undef KERNEL
+# undef _KERNEL
+# define KERNEL 1
+# define _KERNEL 1
+#endif
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/file.h>
+#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
+ defined(_KERNEL)
+# include "opt_ipfilter_log.h"
+#endif
+#if !defined(_KERNEL)
+# include <stdio.h>
+# include <string.h>
+# include <stdlib.h>
+# define _KERNEL
+# ifdef __OpenBSD__
+struct file;
+# endif
+# include <sys/uio.h>
+# undef _KERNEL
+#endif
+#if defined(_KERNEL) && (__FreeBSD_version >= 220000)
+# include <sys/filio.h>
+# include <sys/fcntl.h>
+#else
+# include <sys/ioctl.h>
+#endif
+#if !defined(AIX)
+# include <sys/fcntl.h>
+#endif
+#if !defined(linux)
+# include <sys/protosw.h>
+#endif
+#include <sys/socket.h>
+#if defined(_KERNEL)
+# include <sys/systm.h>
+# if !defined(__SVR4) && !defined(__svr4__)
+# include <sys/mbuf.h>
+# endif
+#endif
+#if defined(__SVR4) || defined(__svr4__)
+# include <sys/filio.h>
+# include <sys/byteorder.h>
+# ifdef _KERNEL
+# include <sys/dditypes.h>
+# endif
+# include <sys/stream.h>
+# include <sys/kmem.h>
+#endif
+#if __FreeBSD_version >= 300000
+# include <sys/queue.h>
+#endif
+#include <net/if.h>
+#if __FreeBSD_version >= 300000
+# include <net/if_var.h>
+# if defined(_KERNEL) && !defined(IPFILTER_LKM)
+# include "opt_ipfilter.h"
+# endif
+#endif
+#ifdef sun
+# include <net/af.h>
+#endif
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+
+#ifdef RFC1825
+# include <vpn/md5.h>
+# include <vpn/ipsec.h>
+extern struct ifnet vpnif;
+#endif
+
+#if !defined(linux)
+# include <netinet/ip_var.h>
+#endif
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/ip_icmp.h>
+#include "netinet/ip_compat.h"
+#include <netinet/tcpip.h>
+#include "netinet/ip_fil.h"
+#include "netinet/ip_nat.h"
+#include "netinet/ip_frag.h"
+#include "netinet/ip_state.h"
+#include "netinet/ip_proxy.h"
+#ifdef IPFILTER_SYNC
+#include "netinet/ip_sync.h"
+#endif
+#if (__FreeBSD_version >= 300000)
+# include <sys/malloc.h>
+#endif
+/* END OF INCLUDES */
+
+#undef SOCKADDR_IN
+#define SOCKADDR_IN struct sockaddr_in
+
+#if !defined(lint)
+static const char sccsid[] = "@(#)ip_nat.c 1.11 6/5/96 (C) 1995 Darren Reed";
+static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 darrenr Exp $";
+#endif
+
+
+/* ======================================================================== */
+/* How the NAT is organised and works. */
+/* */
+/* Inside (interface y) NAT Outside (interface x) */
+/* -------------------- -+- ------------------------------------- */
+/* Packet going | out, processsed by fr_checknatout() for x */
+/* ------------> | ------------> */
+/* src=10.1.1.1 | src=192.1.1.1 */
+/* | */
+/* | in, processed by fr_checknatin() for x */
+/* <------------ | <------------ */
+/* dst=10.1.1.1 | dst=192.1.1.1 */
+/* -------------------- -+- ------------------------------------- */
+/* fr_checknatout() - changes ip_src and if required, sport */
+/* - creates a new mapping, if required. */
+/* fr_checknatin() - changes ip_dst and if required, dport */
+/* */
+/* In the NAT table, internal source is recorded as "in" and externally */
+/* seen as "out". */
+/* ======================================================================== */
+
+
+nat_t **nat_table[2] = { NULL, NULL },
+ *nat_instances = NULL;
+ipnat_t *nat_list = NULL;
+u_int ipf_nattable_max = NAT_TABLE_MAX;
+u_int ipf_nattable_sz = NAT_TABLE_SZ;
+u_int ipf_natrules_sz = NAT_SIZE;
+u_int ipf_rdrrules_sz = RDR_SIZE;
+u_int ipf_hostmap_sz = HOSTMAP_SIZE;
+u_int fr_nat_maxbucket = 0,
+ fr_nat_maxbucket_reset = 1;
+u_32_t nat_masks = 0;
+u_32_t rdr_masks = 0;
+ipnat_t **nat_rules = NULL;
+ipnat_t **rdr_rules = NULL;
+hostmap_t **maptable = NULL;
+ipftq_t nat_tqb[IPF_TCP_NSTATES];
+ipftq_t nat_udptq;
+ipftq_t nat_icmptq;
+ipftq_t nat_iptq;
+ipftq_t *nat_utqe = NULL;
+#ifdef IPFILTER_LOG
+int nat_logging = 1;
+#else
+int nat_logging = 0;
+#endif
+
+u_long fr_defnatage = DEF_NAT_AGE,
+ fr_defnatipage = 120, /* 60 seconds */
+ fr_defnaticmpage = 6; /* 3 seconds */
+natstat_t nat_stats;
+int fr_nat_lock = 0;
+int fr_nat_init = 0;
+#if SOLARIS
+extern int pfil_delayed_copy;
+#endif
+
+static int nat_flushtable __P((void));
+static int nat_clearlist __P((void));
+static void nat_addnat __P((struct ipnat *));
+static void nat_addrdr __P((struct ipnat *));
+static void nat_delete __P((struct nat *, int));
+static void nat_delrdr __P((struct ipnat *));
+static void nat_delnat __P((struct ipnat *));
+static int fr_natgetent __P((caddr_t));
+static int fr_natgetsz __P((caddr_t));
+static int fr_natputent __P((caddr_t, int));
+static void nat_tabmove __P((nat_t *));
+static int nat_match __P((fr_info_t *, ipnat_t *));
+static INLINE int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
+static INLINE int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
+static hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
+ struct in_addr, struct in_addr, u_32_t));
+static void nat_hostmapdel __P((struct hostmap *));
+static INLINE int nat_icmpquerytype4 __P((int));
+static int nat_siocaddnat __P((ipnat_t *, ipnat_t **, int));
+static void nat_siocdelnat __P((ipnat_t *, ipnat_t **, int));
+static INLINE int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
+ tcphdr_t *, nat_t **, int));
+static void nat_resolverule __P((ipnat_t *));
+static nat_t *fr_natclone __P((fr_info_t *, nat_t *));
+static void nat_mssclamp __P((tcphdr_t *, u_32_t, fr_info_t *, u_short *));
+static INLINE int nat_wildok __P((nat_t *, int, int, int, int));
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_natinit */
+/* Returns: int - 0 == success, -1 == failure */
+/* Parameters: Nil */
+/* */
+/* Initialise all of the NAT locks, tables and other structures. */
+/* ------------------------------------------------------------------------ */
+int fr_natinit()
+{
+ int i;
+
+ KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
+ if (nat_table[0] != NULL)
+ bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
+ else
+ return -1;
+
+ KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
+ if (nat_table[1] != NULL)
+ bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
+ else
+ return -2;
+
+ KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
+ if (nat_rules != NULL)
+ bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
+ else
+ return -3;
+
+ KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
+ if (rdr_rules != NULL)
+ bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
+ else
+ return -4;
+
+ KMALLOCS(maptable, hostmap_t **, sizeof(hostmap_t *) * ipf_hostmap_sz);
+ if (maptable != NULL)
+ bzero((char *)maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
+ else
+ return -5;
+
+ KMALLOCS(nat_stats.ns_bucketlen[0], u_long *,
+ ipf_nattable_sz * sizeof(u_long));
+ if (nat_stats.ns_bucketlen[0] == NULL)
+ return -6;
+ bzero((char *)nat_stats.ns_bucketlen[0],
+ ipf_nattable_sz * sizeof(u_long));
+
+ KMALLOCS(nat_stats.ns_bucketlen[1], u_long *,
+ ipf_nattable_sz * sizeof(u_long));
+ if (nat_stats.ns_bucketlen[1] == NULL)
+ return -7;
+
+ bzero((char *)nat_stats.ns_bucketlen[1],
+ ipf_nattable_sz * sizeof(u_long));
+
+ if (fr_nat_maxbucket == 0) {
+ for (i = ipf_nattable_sz; i > 0; i >>= 1)
+ fr_nat_maxbucket++;
+ fr_nat_maxbucket *= 2;
+ }
+
+ fr_sttab_init(nat_tqb);
+ /*
+ * Increase this because we may have "keep state" following this too
+ * and packet storms can occur if this is removed too quickly.
+ */
+ nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = fr_tcplastack;
+ nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &nat_udptq;
+ nat_udptq.ifq_ttl = fr_defnatage;
+ nat_udptq.ifq_ref = 1;
+ nat_udptq.ifq_head = NULL;
+ nat_udptq.ifq_tail = &nat_udptq.ifq_head;
+ MUTEX_INIT(&nat_udptq.ifq_lock, "nat ipftq udp tab");
+ nat_udptq.ifq_next = &nat_icmptq;
+ nat_icmptq.ifq_ttl = fr_defnaticmpage;
+ nat_icmptq.ifq_ref = 1;
+ nat_icmptq.ifq_head = NULL;
+ nat_icmptq.ifq_tail = &nat_icmptq.ifq_head;
+ MUTEX_INIT(&nat_icmptq.ifq_lock, "nat icmp ipftq tab");
+ nat_icmptq.ifq_next = &nat_iptq;
+ nat_iptq.ifq_ttl = fr_defnatipage;
+ nat_iptq.ifq_ref = 1;
+ nat_iptq.ifq_head = NULL;
+ nat_iptq.ifq_tail = &nat_iptq.ifq_head;
+ MUTEX_INIT(&nat_iptq.ifq_lock, "nat ip ipftq tab");
+ nat_iptq.ifq_next = NULL;
+
+ for (i = 0; i < IPF_TCP_NSTATES; i++) {
+ if (nat_tqb[i].ifq_ttl < fr_defnaticmpage)
+ nat_tqb[i].ifq_ttl = fr_defnaticmpage;
+#ifdef LARGE_NAT
+ else if (nat_tqb[i].ifq_ttl > fr_defnatage)
+ nat_tqb[i].ifq_ttl = fr_defnatage;
+#endif
+ }
+
+ /*
+ * Increase this because we may have "keep state" following
+ * this too and packet storms can occur if this is removed
+ * too quickly.
+ */
+ nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
+
+ RWLOCK_INIT(&ipf_nat, "ipf IP NAT rwlock");
+ RWLOCK_INIT(&ipf_natfrag, "ipf IP NAT-Frag rwlock");
+ MUTEX_INIT(&ipf_nat_new, "ipf nat new mutex");
+ MUTEX_INIT(&ipf_natio, "ipf nat io mutex");
+
+ fr_nat_init = 1;
+
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_addrdr */
+/* Returns: Nil */
+/* Parameters: n(I) - pointer to NAT rule to add */
+/* */
+/* Adds a redirect rule to the hash table of redirect rules and the list of */
+/* loaded NAT rules. Updates the bitmask indicating which netmasks are in */
+/* use by redirect rules. */
+/* ------------------------------------------------------------------------ */
+static void nat_addrdr(n)
+ipnat_t *n;
+{
+ ipnat_t **np;
+ u_32_t j;
+ u_int hv;
+ int k;
+
+ k = count4bits(n->in_outmsk);
+ if ((k >= 0) && (k != 32))
+ rdr_masks |= 1 << k;
+ j = (n->in_outip & n->in_outmsk);
+ hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
+ np = rdr_rules + hv;
+ while (*np != NULL)
+ np = &(*np)->in_rnext;
+ n->in_rnext = NULL;
+ n->in_prnext = np;
+ n->in_hv = hv;
+ *np = n;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_addnat */
+/* Returns: Nil */
+/* Parameters: n(I) - pointer to NAT rule to add */
+/* */
+/* Adds a NAT map rule to the hash table of rules and the list of loaded */
+/* NAT rules. Updates the bitmask indicating which netmasks are in use by */
+/* redirect rules. */
+/* ------------------------------------------------------------------------ */
+static void nat_addnat(n)
+ipnat_t *n;
+{
+ ipnat_t **np;
+ u_32_t j;
+ u_int hv;
+ int k;
+
+ k = count4bits(n->in_inmsk);
+ if ((k >= 0) && (k != 32))
+ nat_masks |= 1 << k;
+ j = (n->in_inip & n->in_inmsk);
+ hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
+ np = nat_rules + hv;
+ while (*np != NULL)
+ np = &(*np)->in_mnext;
+ n->in_mnext = NULL;
+ n->in_pmnext = np;
+ n->in_hv = hv;
+ *np = n;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_delrdr */
+/* Returns: Nil */
+/* Parameters: n(I) - pointer to NAT rule to delete */
+/* */
+/* Removes a redirect rule from the hash table of redirect rules. */
+/* ------------------------------------------------------------------------ */
+static void nat_delrdr(n)
+ipnat_t *n;
+{
+ if (n->in_rnext)
+ n->in_rnext->in_prnext = n->in_prnext;
+ *n->in_prnext = n->in_rnext;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_delnat */
+/* Returns: Nil */
+/* Parameters: n(I) - pointer to NAT rule to delete */
+/* */
+/* Removes a NAT map rule from the hash table of NAT map rules. */
+/* ------------------------------------------------------------------------ */
+static void nat_delnat(n)
+ipnat_t *n;
+{
+ if (n->in_mnext != NULL)
+ n->in_mnext->in_pmnext = n->in_pmnext;
+ *n->in_pmnext = n->in_mnext;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_hostmap */
+/* Returns: struct hostmap* - NULL if no hostmap could be created, */
+/* else a pointer to the hostmapping to use */
+/* Parameters: np(I) - pointer to NAT rule */
+/* real(I) - real IP address */
+/* map(I) - mapped IP address */
+/* port(I) - destination port number */
+/* Write Locks: ipf_nat */
+/* */
+/* Check if an ip address has already been allocated for a given mapping */
+/* that is not doing port based translation. If is not yet allocated, then */
+/* create a new entry if a non-NULL NAT rule pointer has been supplied. */
+/* ------------------------------------------------------------------------ */
+static struct hostmap *nat_hostmap(np, src, dst, map, port)
+ipnat_t *np;
+struct in_addr src;
+struct in_addr dst;
+struct in_addr map;
+u_32_t port;
+{
+ hostmap_t *hm;
+ u_int hv;
+
+ hv = (src.s_addr ^ dst.s_addr);
+ hv += src.s_addr;
+ hv += dst.s_addr;
+ hv %= HOSTMAP_SIZE;
+ for (hm = maptable[hv]; hm; hm = hm->hm_next)
+ if ((hm->hm_srcip.s_addr == src.s_addr) &&
+ (hm->hm_dstip.s_addr == dst.s_addr) &&
+ ((np == NULL) || (np == hm->hm_ipnat)) &&
+ ((port == 0) || (port == hm->hm_port))) {
+ hm->hm_ref++;
+ return hm;
+ }
+
+ if (np == NULL)
+ return NULL;
+
+ KMALLOC(hm, hostmap_t *);
+ if (hm) {
+ hm->hm_next = maptable[hv];
+ hm->hm_pnext = maptable + hv;
+ if (maptable[hv] != NULL)
+ maptable[hv]->hm_pnext = &hm->hm_next;
+ maptable[hv] = hm;
+ hm->hm_ipnat = np;
+ hm->hm_srcip = src;
+ hm->hm_dstip = dst;
+ hm->hm_mapip = map;
+ hm->hm_ref = 1;
+ hm->hm_port = port;
+ }
+ return hm;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_hostmapdel */
+/* Returns: Nil */
+/* Parameters: hm(I) - pointer to hostmap structure */
+/* Write Locks: ipf_nat */
+/* */
+/* Decrement the references to this hostmap structure by one. If this */
+/* reaches zero then remove it and free it. */
+/* ------------------------------------------------------------------------ */
+static void nat_hostmapdel(hm)
+struct hostmap *hm;
+{
+ hm->hm_ref--;
+ if (hm->hm_ref == 0) {
+ if (hm->hm_next)
+ hm->hm_next->hm_pnext = hm->hm_pnext;
+ *hm->hm_pnext = hm->hm_next;
+ KFREE(hm);
+ }
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fix_outcksum */
+/* Returns: Nil */
+/* Parameters: fin(I) - pointer to packet information */
+/* sp(I) - location of 16bit checksum to update */
+/* n((I) - amount to adjust checksum by */
+/* */
+/* Adjusts the 16bit checksum by "n" for packets going out. */
+/* ------------------------------------------------------------------------ */
+void fix_outcksum(fin, sp, n)
+fr_info_t *fin;
+u_short *sp;
+u_32_t n;
+{
+ u_short sumshort;
+ u_32_t sum1;
+
+ if (n == 0)
+ return;
+
+ if (n & NAT_HW_CKSUM) {
+ n &= 0xffff;
+ n += fin->fin_dlen;
+ n = (n & 0xffff) + (n >> 16);
+ *sp = n & 0xffff;
+ return;
+ }
+ sum1 = (~ntohs(*sp)) & 0xffff;
+ sum1 += (n);
+ sum1 = (sum1 >> 16) + (sum1 & 0xffff);
+ /* Again */
+ sum1 = (sum1 >> 16) + (sum1 & 0xffff);
+ sumshort = ~(u_short)sum1;
+ *(sp) = htons(sumshort);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fix_incksum */
+/* Returns: Nil */
+/* Parameters: fin(I) - pointer to packet information */
+/* sp(I) - location of 16bit checksum to update */
+/* n((I) - amount to adjust checksum by */
+/* */
+/* Adjusts the 16bit checksum by "n" for packets going in. */
+/* ------------------------------------------------------------------------ */
+void fix_incksum(fin, sp, n)
+fr_info_t *fin;
+u_short *sp;
+u_32_t n;
+{
+ u_short sumshort;
+ u_32_t sum1;
+
+ if (n == 0)
+ return;
+
+ if (n & NAT_HW_CKSUM) {
+ n &= 0xffff;
+ n += fin->fin_dlen;
+ n = (n & 0xffff) + (n >> 16);
+ *sp = n & 0xffff;
+ return;
+ }
+ sum1 = (~ntohs(*sp)) & 0xffff;
+ sum1 += ~(n) & 0xffff;
+ sum1 = (sum1 >> 16) + (sum1 & 0xffff);
+ /* Again */
+ sum1 = (sum1 >> 16) + (sum1 & 0xffff);
+ sumshort = ~(u_short)sum1;
+ *(sp) = htons(sumshort);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fix_datacksum */
+/* Returns: Nil */
+/* Parameters: sp(I) - location of 16bit checksum to update */
+/* n((I) - amount to adjust checksum by */
+/* */
+/* Fix_datacksum is used *only* for the adjustments of checksums in the */
+/* data section of an IP packet. */
+/* */
+/* The only situation in which you need to do this is when NAT'ing an */
+/* ICMP error message. Such a message, contains in its body the IP header */
+/* of the original IP packet, that causes the error. */
+/* */
+/* You can't use fix_incksum or fix_outcksum in that case, because for the */
+/* kernel the data section of the ICMP error is just data, and no special */
+/* processing like hardware cksum or ntohs processing have been done by the */
+/* kernel on the data section. */
+/* ------------------------------------------------------------------------ */
+void fix_datacksum(sp, n)
+u_short *sp;
+u_32_t n;
+{
+ u_short sumshort;
+ u_32_t sum1;
+
+ if (n == 0)
+ return;
+
+ sum1 = (~ntohs(*sp)) & 0xffff;
+ sum1 += (n);
+ sum1 = (sum1 >> 16) + (sum1 & 0xffff);
+ /* Again */
+ sum1 = (sum1 >> 16) + (sum1 & 0xffff);
+ sumshort = ~(u_short)sum1;
+ *(sp) = htons(sumshort);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_nat_ioctl */
+/* Returns: int - 0 == success, != 0 == failure */
+/* Parameters: data(I) - pointer to ioctl data */
+/* cmd(I) - ioctl command integer */
+/* mode(I) - file mode bits used with open */
+/* */
+/* Processes an ioctl call made to operate on the IP Filter NAT device. */
+/* ------------------------------------------------------------------------ */
+int fr_nat_ioctl(data, cmd, mode)
+ioctlcmd_t cmd;
+caddr_t data;
+int mode;
+{
+ ipnat_t *nat, *nt, *n = NULL, **np = NULL;
+ int error = 0, ret, arg, getlock;
+ ipnat_t natd;
+
+#if (BSD >= 199306) && defined(_KERNEL)
+ if ((securelevel >= 2) && (mode & FWRITE))
+ return EPERM;
+#endif
+
+#if defined(__osf__) && defined(_KERNEL)
+ getlock = 0;
+#else
+ getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
+#endif
+
+ nat = NULL; /* XXX gcc -Wuninitialized */
+ if (cmd == (ioctlcmd_t)SIOCADNAT) {
+ KMALLOC(nt, ipnat_t *);
+ } else {
+ nt = NULL;
+ }
+
+ if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
+ if (mode & NAT_SYSSPACE) {
+ bcopy(data, (char *)&natd, sizeof(natd));
+ error = 0;
+ } else {
+ error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
+ }
+
+ } else if (cmd == (ioctlcmd_t)SIOCIPFFL) { /* SIOCFLNAT & SIOCCNATL */
+ BCOPYIN(data, &arg, sizeof(arg));
+ }
+
+ if (error != 0)
+ goto done;
+
+ /*
+ * For add/delete, look to see if the NAT entry is already present
+ */
+ if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
+ nat = &natd;
+ if (nat->in_v == 0) /* For backward compat. */
+ nat->in_v = 4;
+ nat->in_flags &= IPN_USERFLAGS;
+ if ((nat->in_redir & NAT_MAPBLK) == 0) {
+ if ((nat->in_flags & IPN_SPLIT) == 0)
+ nat->in_inip &= nat->in_inmsk;
+ if ((nat->in_flags & IPN_IPRANGE) == 0)
+ nat->in_outip &= nat->in_outmsk;
+ }
+ MUTEX_ENTER(&ipf_natio);
+ for (np = &nat_list; ((n = *np) != NULL); np = &n->in_next)
+ if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
+ IPN_CMPSIZ))
+ break;
+ }
+
+ switch (cmd)
+ {
+#ifdef IPFILTER_LOG
+ case SIOCIPFFB :
+ {
+ int tmp;
+
+ if (!(mode & FWRITE))
+ error = EPERM;
+ else {
+ tmp = ipflog_clear(IPL_LOGNAT);
+ BCOPYOUT((char *)&tmp, (char *)data, sizeof(tmp));
+ }
+ break;
+ }
+ case SIOCSETLG :
+ if (!(mode & FWRITE))
+ error = EPERM;
+ else {
+ BCOPYIN((char *)data, (char *)&nat_logging,
+ sizeof(nat_logging));
+ }
+ break;
+ case SIOCGETLG :
+ BCOPYOUT((char *)&nat_logging, (char *)data,
+ sizeof(nat_logging));
+ break;
+ case FIONREAD :
+ arg = iplused[IPL_LOGNAT];
+ BCOPYOUT(&arg, data, sizeof(arg));
+ break;
+#endif
+ case SIOCADNAT :
+ if (!(mode & FWRITE)) {
+ error = EPERM;
+ } else if (n != NULL) {
+ error = EEXIST;
+ } else if (nt == NULL) {
+ error = ENOMEM;
+ }
+ if (error != 0) {
+ MUTEX_EXIT(&ipf_natio);
+ break;
+ }
+ bcopy((char *)nat, (char *)nt, sizeof(*n));
+ error = nat_siocaddnat(nt, np, getlock);
+ MUTEX_EXIT(&ipf_natio);
+ if (error == 0)
+ nt = NULL;
+ break;
+ case SIOCRMNAT :
+ if (!(mode & FWRITE)) {
+ error = EPERM;
+ n = NULL;
+ } else if (n == NULL) {
+ error = ESRCH;
+ }
+
+ if (error != 0) {
+ MUTEX_EXIT(&ipf_natio);
+ break;
+ }
+ nat_siocdelnat(n, np, getlock);
+
+ MUTEX_EXIT(&ipf_natio);
+ n = NULL;
+ break;
+ case SIOCGNATS :
+ nat_stats.ns_table[0] = nat_table[0];
+ nat_stats.ns_table[1] = nat_table[1];
+ nat_stats.ns_list = nat_list;
+ nat_stats.ns_maptable = maptable;
+ nat_stats.ns_nattab_sz = ipf_nattable_sz;
+ nat_stats.ns_nattab_max = ipf_nattable_max;
+ nat_stats.ns_rultab_sz = ipf_natrules_sz;
+ nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
+ nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
+ nat_stats.ns_instances = nat_instances;
+ nat_stats.ns_apslist = ap_sess_list;
+ error = fr_outobj(data, &nat_stats, IPFOBJ_NATSTAT);
+ break;
+ case SIOCGNATL :
+ {
+ natlookup_t nl;
+
+ if (getlock) {
+ READ_ENTER(&ipf_nat);
+ }
+ error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
+ if (error == 0) {
+ if (nat_lookupredir(&nl) != NULL) {
+ error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
+ } else {
+ error = ESRCH;
+ }
+ }
+ if (getlock) {
+ RWLOCK_EXIT(&ipf_nat);
+ }
+ break;
+ }
+ case SIOCIPFFL : /* old SIOCFLNAT & SIOCCNATL */
+ if (!(mode & FWRITE)) {
+ error = EPERM;
+ break;
+ }
+ if (getlock) {
+ WRITE_ENTER(&ipf_nat);
+ }
+ error = 0;
+ if (arg == 0)
+ ret = nat_flushtable();
+ else if (arg == 1)
+ ret = nat_clearlist();
+ else
+ error = EINVAL;
+ if (getlock) {
+ RWLOCK_EXIT(&ipf_nat);
+ }
+ if (error == 0) {
+ BCOPYOUT(&ret, data, sizeof(ret));
+ }
+ break;
+ case SIOCPROXY :
+ error = appr_ioctl(data, cmd, mode);
+ break;
+ case SIOCSTLCK :
+ if (!(mode & FWRITE)) {
+ error = EPERM;
+ } else {
+ fr_lock(data, &fr_nat_lock);
+ }
+ break;
+ case SIOCSTPUT :
+ if (fr_nat_lock && (mode & FWRITE)) {
+ error = fr_natputent(data, getlock);
+ } else {
+ error = EACCES;
+ }
+ break;
+ case SIOCSTGSZ :
+ if (fr_nat_lock) {
+ if (getlock) {
+ READ_ENTER(&ipf_nat);
+ }
+ error = fr_natgetsz(data);
+ if (getlock) {
+ RWLOCK_EXIT(&ipf_nat);
+ }
+ } else
+ error = EACCES;
+ break;
+ case SIOCSTGET :
+ if (fr_nat_lock) {
+ if (getlock) {
+ READ_ENTER(&ipf_nat);
+ }
+ error = fr_natgetent(data);
+ if (getlock) {
+ RWLOCK_EXIT(&ipf_nat);
+ }
+ } else
+ error = EACCES;
+ break;
+ default :
+ error = EINVAL;
+ break;
+ }
+done:
+ if (nt)
+ KFREE(nt);
+ return error;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_siocaddnat */
+/* Returns: int - 0 == success, != 0 == failure */
+/* Parameters: n(I) - pointer to new NAT rule */
+/* np(I) - pointer to where to insert new NAT rule */
+/* getlock(I) - flag indicating if lock on ipf_nat is held */
+/* Mutex Locks: ipf_natio */
+/* */
+/* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */
+/* from information passed to the kernel, then add it to the appropriate */
+/* NAT rule table(s). */
+/* ------------------------------------------------------------------------ */
+static int nat_siocaddnat(n, np, getlock)
+ipnat_t *n, **np;
+int getlock;
+{
+ int error = 0, i, j;
+
+ nat_resolverule(n);
+ if (n->in_plabel[0] != '\0') {
+ if (n->in_apr == NULL)
+ return ENOENT;
+ }
+
+ if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
+ return EINVAL;
+
+ n->in_use = 0;
+ if (n->in_redir & NAT_MAPBLK)
+ n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
+ else if (n->in_flags & IPN_AUTOPORTMAP)
+ n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
+ else if (n->in_flags & IPN_IPRANGE)
+ n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
+ else if (n->in_flags & IPN_SPLIT)
+ n->in_space = 2;
+ else if (n->in_outmsk != 0)
+ n->in_space = ~ntohl(n->in_outmsk);
+ else
+ n->in_space = 1;
+
+ /*
+ * Calculate the number of valid IP addresses in the output
+ * mapping range. In all cases, the range is inclusive of
+ * the start and ending IP addresses.
+ * If to a CIDR address, lose 2: broadcast + network address
+ * (so subtract 1)
+ * If to a range, add one.
+ * If to a single IP address, set to 1.
+ */
+ if (n->in_space) {
+ if ((n->in_flags & IPN_IPRANGE) != 0)
+ n->in_space += 1;
+ else
+ n->in_space -= 1;
+ } else
+ n->in_space = 1;
+
+ if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
+ ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
+ n->in_nip = ntohl(n->in_outip) + 1;
+ else if ((n->in_flags & IPN_SPLIT) &&
+ (n->in_redir & NAT_REDIRECT))
+ n->in_nip = ntohl(n->in_inip);
+ else
+ n->in_nip = ntohl(n->in_outip);
+ if (n->in_redir & NAT_MAP) {
+ n->in_pnext = ntohs(n->in_pmin);
+ /*
+ * Multiply by the number of ports made available.
+ */
+ if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
+ n->in_space *= (ntohs(n->in_pmax) -
+ ntohs(n->in_pmin) + 1);
+ /*
+ * Because two different sources can map to
+ * different destinations but use the same
+ * local IP#/port #.
+ * If the result is smaller than in_space, then
+ * we may have wrapped around 32bits.
+ */
+ i = n->in_inmsk;
+ if ((i != 0) && (i != 0xffffffff)) {
+ j = n->in_space * (~ntohl(i) + 1);
+ if (j >= n->in_space)
+ n->in_space = j;
+ else
+ n->in_space = 0xffffffff;
+ }
+ }
+ /*
+ * If no protocol is specified, multiple by 256 to allow for
+ * at least one IP:IP mapping per protocol.
+ */
+ if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
+ j = n->in_space * 256;
+ if (j >= n->in_space)
+ n->in_space = j;
+ else
+ n->in_space = 0xffffffff;
+ }
+ }
+
+ /* Otherwise, these fields are preset */
+
+ if (getlock) {
+ WRITE_ENTER(&ipf_nat);
+ }
+ n->in_next = NULL;
+ *np = n;
+
+ if (n->in_age[0] != 0)
+ n->in_tqehead[0] = fr_addtimeoutqueue(&nat_utqe, n->in_age[0]);
+
+ if (n->in_age[1] != 0)
+ n->in_tqehead[1] = fr_addtimeoutqueue(&nat_utqe, n->in_age[1]);
+
+ if (n->in_redir & NAT_REDIRECT) {
+ n->in_flags &= ~IPN_NOTDST;
+ nat_addrdr(n);
+ }
+ if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
+ n->in_flags &= ~IPN_NOTSRC;
+ nat_addnat(n);
+ }
+ n = NULL;
+ nat_stats.ns_rules++;
+#if SOLARIS
+ pfil_delayed_copy = 0;
+#endif
+ if (getlock) {
+ RWLOCK_EXIT(&ipf_nat); /* WRITE */
+ }
+
+ return error;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_resolvrule */
+/* Returns: Nil */
+/* Parameters: n(I) - pointer to NAT rule */
+/* */
+/* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */
+/* from information passed to the kernel, then add it to the appropriate */
+/* NAT rule table(s). */
+/* ------------------------------------------------------------------------ */
+static void nat_resolverule(n)
+ipnat_t *n;
+{
+ n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
+ n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
+
+ n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
+ if (n->in_ifnames[1][0] == '\0') {
+ (void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
+ n->in_ifps[1] = n->in_ifps[0];
+ } else {
+ n->in_ifps[1] = fr_resolvenic(n->in_ifnames[0], 4);
+ }
+
+ if (n->in_plabel[0] != '\0') {
+ n->in_apr = appr_lookup(n->in_p, n->in_plabel);
+ }
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_siocdelnat */
+/* Returns: int - 0 == success, != 0 == failure */
+/* Parameters: n(I) - pointer to new NAT rule */
+/* np(I) - pointer to where to insert new NAT rule */
+/* getlock(I) - flag indicating if lock on ipf_nat is held */
+/* Mutex Locks: ipf_natio */
+/* */
+/* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */
+/* from information passed to the kernel, then add it to the appropriate */
+/* NAT rule table(s). */
+/* ------------------------------------------------------------------------ */
+static void nat_siocdelnat(n, np, getlock)
+ipnat_t *n, **np;
+int getlock;
+{
+ if (getlock) {
+ WRITE_ENTER(&ipf_nat);
+ }
+ if (n->in_redir & NAT_REDIRECT)
+ nat_delrdr(n);
+ if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
+ nat_delnat(n);
+ if (nat_list == NULL) {
+ nat_masks = 0;
+ rdr_masks = 0;
+ }
+
+ if (n->in_tqehead[0] != NULL) {
+ if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
+ fr_freetimeoutqueue(n->in_tqehead[1]);
+ }
+ }
+
+ if (n->in_tqehead[1] != NULL) {
+ if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
+ fr_freetimeoutqueue(n->in_tqehead[1]);
+ }
+ }
+
+ *np = n->in_next;
+
+ if (n->in_use == 0) {
+ if (n->in_apr)
+ appr_free(n->in_apr);
+ KFREE(n);
+ nat_stats.ns_rules--;
+#if SOLARIS
+ if (nat_stats.ns_rules == 0)
+ pfil_delayed_copy = 1;
+#endif
+ } else {
+ n->in_flags |= IPN_DELETE;
+ n->in_next = NULL;
+ }
+ if (getlock) {
+ RWLOCK_EXIT(&ipf_nat); /* READ/WRITE */
+ }
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_natgetsz */
+/* Returns: int - 0 == success, != 0 is the error value. */
+/* Parameters: data(I) - pointer to natget structure with kernel pointer */
+/* get the size of. */
+/* */
+/* Handle SIOCSTGSZ. */
+/* Return the size of the nat list entry to be copied back to user space. */
+/* The size of the entry is stored in the ng_sz field and the enture natget */
+/* structure is copied back to the user. */
+/* ------------------------------------------------------------------------ */
+static int fr_natgetsz(data)
+caddr_t data;
+{
+ ap_session_t *aps;
+ nat_t *nat, *n;
+ natget_t ng;
+
+ BCOPYIN(data, &ng, sizeof(ng));
+
+ nat = ng.ng_ptr;
+ if (!nat) {
+ nat = nat_instances;
+ ng.ng_sz = 0;
+ /*
+ * Empty list so the size returned is 0. Simple.
+ */
+ if (nat == NULL) {
+ BCOPYOUT(&ng, data, sizeof(ng));
+ return 0;
+ }
+ } else {
+ /*
+ * Make sure the pointer we're copying from exists in the
+ * current list of entries. Security precaution to prevent
+ * copying of random kernel data.
+ */
+ for (n = nat_instances; n; n = n->nat_next)
+ if (n == nat)
+ break;
+ if (!n)
+ return ESRCH;
+ }
+
+ /*
+ * Incluse any space required for proxy data structures.
+ */
+ ng.ng_sz = sizeof(nat_save_t);
+ aps = nat->nat_aps;
+ if (aps != NULL) {
+ ng.ng_sz += sizeof(ap_session_t) - 4;
+ if (aps->aps_data != 0)
+ ng.ng_sz += aps->aps_psiz;
+ }
+
+ BCOPYOUT(&ng, data, sizeof(ng));
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_natgetent */
+/* Returns: int - 0 == success, != 0 is the error value. */
+/* Parameters: data(I) - pointer to natget structure with kernel pointer */
+/* to NAT structure to copy out. */
+/* */
+/* Handle SIOCSTGET. */
+/* Copies out NAT entry to user space. Any additional data held for a */
+/* proxy is also copied, as to is the NAT rule which was responsible for it */
+/* ------------------------------------------------------------------------ */
+static int fr_natgetent(data)
+caddr_t data;
+{
+ int error, outsize;
+ ap_session_t *aps;
+ nat_save_t *ipn, ipns;
+ nat_t *n, *nat;
+
+ error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
+ if (error != 0)
+ return error;
+
+ if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
+ return EINVAL;
+
+ KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
+ if (ipn == NULL)
+ return ENOMEM;
+
+ ipn->ipn_dsize = ipns.ipn_dsize;
+ nat = ipns.ipn_next;
+ if (nat == NULL) {
+ nat = nat_instances;
+ if (nat == NULL) {
+ if (nat_instances == NULL)
+ error = ENOENT;
+ goto finished;
+ }
+ } else {
+ /*
+ * Make sure the pointer we're copying from exists in the
+ * current list of entries. Security precaution to prevent
+ * copying of random kernel data.
+ */
+ for (n = nat_instances; n; n = n->nat_next)
+ if (n == nat)
+ break;
+ if (n == NULL) {
+ error = ESRCH;
+ goto finished;
+ }
+ }
+ ipn->ipn_next = nat->nat_next;
+
+ /*
+ * Copy the NAT structure.
+ */
+ bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
+
+ /*
+ * If we have a pointer to the NAT rule it belongs to, save that too.
+ */
+ if (nat->nat_ptr != NULL)
+ bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
+ sizeof(ipn->ipn_ipnat));
+
+ /*
+ * If we also know the NAT entry has an associated filter rule,
+ * save that too.
+ */
+ if (nat->nat_fr != NULL)
+ bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
+ sizeof(ipn->ipn_fr));
+
+ /*
+ * Last but not least, if there is an application proxy session set
+ * up for this NAT entry, then copy that out too, including any
+ * private data saved along side it by the proxy.
+ */
+ aps = nat->nat_aps;
+ outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
+ if (aps != NULL) {
+ char *s;
+
+ if (outsize < sizeof(*aps)) {
+ error = ENOBUFS;
+ goto finished;
+ }
+
+ s = ipn->ipn_data;
+ bcopy((char *)aps, s, sizeof(*aps));
+ s += sizeof(*aps);
+ outsize -= sizeof(*aps);
+ if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
+ bcopy(aps->aps_data, s, aps->aps_psiz);
+ else
+ error = ENOBUFS;
+ }
+ if (error == 0) {
+ error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
+ }
+
+finished:
+ if (ipn != NULL) {
+ KFREES(ipn, ipns.ipn_dsize);
+ }
+ return error;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_natputent */
+/* Returns: int - 0 == success, != 0 is the error value. */
+/* Parameters: data(I) - pointer to natget structure with NAT */
+/* structure information to load into the kernel */
+/* getlock(I) - flag indicating whether or not a write lock */
+/* on ipf_nat is already held. */
+/* */
+/* Handle SIOCSTPUT. */
+/* Loads a NAT table entry from user space, including a NAT rule, proxy and */
+/* firewall rule data structures, if pointers to them indicate so. */
+/* ------------------------------------------------------------------------ */
+static int fr_natputent(data, getlock)
+caddr_t data;
+int getlock;
+{
+ nat_save_t ipn, *ipnn;
+ ap_session_t *aps;
+ nat_t *n, *nat;
+ frentry_t *fr;
+ fr_info_t fin;
+ ipnat_t *in;
+ int error;
+
+ error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
+ if (error != 0)
+ return error;
+
+ /*
+ * Initialise early because of code at junkput label.
+ */
+ in = NULL;
+ aps = NULL;
+ nat = NULL;
+ ipnn = NULL;
+
+ /*
+ * New entry, copy in the rest of the NAT entry if it's size is more
+ * than just the nat_t structure.
+ */
+ fr = NULL;
+ if (ipn.ipn_dsize > sizeof(ipn)) {
+ if (ipn.ipn_dsize > 81920) {
+ error = ENOMEM;
+ goto junkput;
+ }
+
+ KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
+ if (ipnn == NULL)
+ return ENOMEM;
+
+ error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
+ if (error != 0) {
+ error = EFAULT;
+ goto junkput;
+ }
+ } else
+ ipnn = &ipn;
+
+ KMALLOC(nat, nat_t *);
+ if (nat == NULL) {
+ error = ENOMEM;
+ goto junkput;
+ }
+
+ bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
+ /*
+ * Initialize all these so that nat_delete() doesn't cause a crash.
+ */
+ bzero((char *)nat, offsetof(struct nat, nat_tqe));
+ nat->nat_tqe.tqe_pnext = NULL;
+ nat->nat_tqe.tqe_next = NULL;
+ nat->nat_tqe.tqe_ifq = NULL;
+ nat->nat_tqe.tqe_parent = nat;
+
+ /*
+ * Restore the rule associated with this nat session
+ */
+ in = ipnn->ipn_nat.nat_ptr;
+ if (in != NULL) {
+ KMALLOC(in, ipnat_t *);
+ nat->nat_ptr = in;
+ if (in == NULL) {
+ error = ENOMEM;
+ goto junkput;
+ }
+ bzero((char *)in, offsetof(struct ipnat, in_next6));
+ bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
+ in->in_use = 1;
+ in->in_flags |= IPN_DELETE;
+
+ ATOMIC_INC(nat_stats.ns_rules);
+
+ nat_resolverule(in);
+ }
+
+ /*
+ * Check that the NAT entry doesn't already exist in the kernel.
+ */
+ bzero((char *)&fin, sizeof(fin));
+ fin.fin_p = nat->nat_p;
+ if (nat->nat_dir == NAT_OUTBOUND) {
+ fin.fin_data[0] = ntohs(nat->nat_oport);
+ fin.fin_data[1] = ntohs(nat->nat_outport);
+ fin.fin_ifp = nat->nat_ifps[1];
+ if (getlock) {
+ READ_ENTER(&ipf_nat);
+ }
+ n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
+ nat->nat_oip, nat->nat_outip);
+ if (getlock) {
+ RWLOCK_EXIT(&ipf_nat);
+ }
+ if (n != NULL) {
+ error = EEXIST;
+ goto junkput;
+ }
+ } else if (nat->nat_dir == NAT_INBOUND) {
+ fin.fin_data[0] = ntohs(nat->nat_inport);
+ fin.fin_data[1] = ntohs(nat->nat_oport);
+ fin.fin_ifp = nat->nat_ifps[0];
+ if (getlock) {
+ READ_ENTER(&ipf_nat);
+ }
+ n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
+ nat->nat_outip, nat->nat_oip);
+ if (getlock) {
+ RWLOCK_EXIT(&ipf_nat);
+ }
+ if (n != NULL) {
+ error = EEXIST;
+ goto junkput;
+ }
+ } else {
+ error = EINVAL;
+ goto junkput;
+ }
+
+ /*
+ * Restore ap_session_t structure. Include the private data allocated
+ * if it was there.
+ */
+ aps = nat->nat_aps;
+ if (aps != NULL) {
+ KMALLOC(aps, ap_session_t *);
+ nat->nat_aps = aps;
+ if (aps == NULL) {
+ error = ENOMEM;
+ goto junkput;
+ }
+ bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
+ if (in != NULL)
+ aps->aps_apr = in->in_apr;
+ else
+ aps->aps_apr = NULL;
+ if (aps->aps_psiz != 0) {
+ if (aps->aps_psiz > 81920) {
+ error = ENOMEM;
+ goto junkput;
+ }
+ KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
+ if (aps->aps_data == NULL) {
+ error = ENOMEM;
+ goto junkput;
+ }
+ bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
+ aps->aps_psiz);
+ } else {
+ aps->aps_psiz = 0;
+ aps->aps_data = NULL;
+ }
+ }
+
+ /*
+ * If there was a filtering rule associated with this entry then
+ * build up a new one.
+ */
+ fr = nat->nat_fr;
+ if (fr != NULL) {
+ if ((nat->nat_flags & SI_NEWFR) != 0) {
+ KMALLOC(fr, frentry_t *);
+ nat->nat_fr = fr;
+ if (fr == NULL) {
+ error = ENOMEM;
+ goto junkput;
+ }
+ ipnn->ipn_nat.nat_fr = fr;
+ fr->fr_ref = 1;
+ (void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
+ bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
+ MUTEX_NUKE(&fr->fr_lock);
+ MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
+ } else {
+ READ_ENTER(&ipf_nat);
+ for (n = nat_instances; n; n = n->nat_next)
+ if (n->nat_fr == fr)
+ break;
+
+ if (n != NULL) {
+ MUTEX_ENTER(&fr->fr_lock);
+ fr->fr_ref++;
+ MUTEX_EXIT(&fr->fr_lock);
+ }
+ RWLOCK_EXIT(&ipf_nat);
+
+ if (!n) {
+ error = ESRCH;
+ goto junkput;
+ }
+ }
+ }
+
+ if (ipnn != &ipn) {
+ KFREES(ipnn, ipn.ipn_dsize);
+ ipnn = NULL;
+ }
+
+ if (getlock) {
+ WRITE_ENTER(&ipf_nat);
+ }
+ error = nat_insert(nat, nat->nat_rev);
+ if ((error == 0) && (aps != NULL)) {
+ aps->aps_next = ap_sess_list;
+ ap_sess_list = aps;
+ }
+ if (getlock) {
+ RWLOCK_EXIT(&ipf_nat);
+ }
+
+ if (error == 0)
+ return 0;
+
+ error = ENOMEM;
+
+junkput:
+ if (fr != NULL)
+ (void) fr_derefrule(&fr);
+
+ if ((ipnn != NULL) && (ipnn != &ipn)) {
+ KFREES(ipnn, ipn.ipn_dsize);
+ }
+ if (nat != NULL) {
+ if (aps != NULL) {
+ if (aps->aps_data != NULL) {
+ KFREES(aps->aps_data, aps->aps_psiz);
+ }
+ KFREE(aps);
+ }
+ if (in != NULL) {
+ if (in->in_apr)
+ appr_free(in->in_apr);
+ KFREE(in);
+ }
+ KFREE(nat);
+ }
+ return error;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_delete */
+/* Returns: Nil */
+/* Parameters: natd(I) - pointer to NAT structure to delete */
+/* logtype(I) - type of LOG record to create before deleting */
+/* Write Lock: ipf_nat */
+/* */
+/* Delete a nat entry from the various lists and table. If NAT logging is */
+/* enabled then generate a NAT log record for this event. */
+/* ------------------------------------------------------------------------ */
+static void nat_delete(nat, logtype)
+struct nat *nat;
+int logtype;
+{
+ struct ipnat *ipn;
+
+ if (logtype != 0 && nat_logging != 0)
+ nat_log(nat, logtype);
+
+ MUTEX_ENTER(&ipf_nat_new);
+
+ /*
+ * Take it as a general indication that all the pointers are set if
+ * nat_pnext is set.
+ */
+ if (nat->nat_pnext != NULL) {
+ nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
+ nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
+
+ *nat->nat_pnext = nat->nat_next;
+ if (nat->nat_next != NULL) {
+ nat->nat_next->nat_pnext = nat->nat_pnext;
+ nat->nat_next = NULL;
+ }
+ nat->nat_pnext = NULL;
+
+ *nat->nat_phnext[0] = nat->nat_hnext[0];
+ if (nat->nat_hnext[0] != NULL) {
+ nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
+ nat->nat_hnext[0] = NULL;
+ }
+ nat->nat_phnext[0] = NULL;
+
+ *nat->nat_phnext[1] = nat->nat_hnext[1];
+ if (nat->nat_hnext[1] != NULL) {
+ nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
+ nat->nat_hnext[1] = NULL;
+ }
+ nat->nat_phnext[1] = NULL;
+
+ if ((nat->nat_flags & SI_WILDP) != 0)
+ nat_stats.ns_wilds--;
+ }
+
+ if (nat->nat_me != NULL) {
+ *nat->nat_me = NULL;
+ nat->nat_me = NULL;
+ }
+
+ fr_deletequeueentry(&nat->nat_tqe);
+
+ nat->nat_ref--;
+ if (nat->nat_ref > 0) {
+ MUTEX_EXIT(&ipf_nat_new);
+ return;
+ }
+
+#ifdef IPFILTER_SYNC
+ if (nat->nat_sync)
+ ipfsync_del(nat->nat_sync);
+#endif
+
+ if (nat->nat_fr != NULL)
+ (void)fr_derefrule(&nat->nat_fr);
+
+ if (nat->nat_hm != NULL)
+ nat_hostmapdel(nat->nat_hm);
+
+ /*
+ * If there is an active reference from the nat entry to its parent
+ * rule, decrement the rule's reference count and free it too if no
+ * longer being used.
+ */
+ ipn = nat->nat_ptr;
+ if (ipn != NULL) {
+ ipn->in_space++;
+ ipn->in_use--;
+ if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) {
+ if (ipn->in_apr)
+ appr_free(ipn->in_apr);
+ KFREE(ipn);
+ nat_stats.ns_rules--;
+#if SOLARIS
+ if (nat_stats.ns_rules == 0)
+ pfil_delayed_copy = 1;
+#endif
+ }
+ }
+
+ MUTEX_DESTROY(&nat->nat_lock);
+
+ aps_free(nat->nat_aps);
+ nat_stats.ns_inuse--;
+ MUTEX_EXIT(&ipf_nat_new);
+
+ /*
+ * If there's a fragment table entry too for this nat entry, then
+ * dereference that as well. This is after nat_lock is released
+ * because of Tru64.
+ */
+ fr_forgetnat((void *)nat);
+
+ KFREE(nat);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_flushtable */
+/* Returns: int - number of NAT rules deleted */
+/* Parameters: Nil */
+/* */
+/* Deletes all currently active NAT sessions. In deleting each NAT entry a */
+/* log record should be emitted in nat_delete() if NAT logging is enabled. */
+/* ------------------------------------------------------------------------ */
+/*
+ * nat_flushtable - clear the NAT table of all mapping entries.
+ */
+static int nat_flushtable()
+{
+ nat_t *nat;
+ int j = 0;
+
+ /*
+ * ALL NAT mappings deleted, so lets just make the deletions
+ * quicker.
+ */
+ if (nat_table[0] != NULL)
+ bzero((char *)nat_table[0],
+ sizeof(nat_table[0]) * ipf_nattable_sz);
+ if (nat_table[1] != NULL)
+ bzero((char *)nat_table[1],
+ sizeof(nat_table[1]) * ipf_nattable_sz);
+
+ while ((nat = nat_instances) != NULL) {
+ nat_delete(nat, NL_FLUSH);
+ j++;
+ }
+
+ nat_stats.ns_inuse = 0;
+ return j;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_clearlist */
+/* Returns: int - number of NAT/RDR rules deleted */
+/* Parameters: Nil */
+/* */
+/* Delete all rules in the current list of rules. There is nothing elegant */
+/* about this cleanup: simply free all entries on the list of rules and */
+/* clear out the tables used for hashed NAT rule lookups. */
+/* ------------------------------------------------------------------------ */
+static int nat_clearlist()
+{
+ ipnat_t *n, **np = &nat_list;
+ int i = 0;
+
+ if (nat_rules != NULL)
+ bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
+ if (rdr_rules != NULL)
+ bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
+
+ while ((n = *np) != NULL) {
+ *np = n->in_next;
+ if (n->in_use == 0) {
+ if (n->in_apr != NULL)
+ appr_free(n->in_apr);
+ KFREE(n);
+ nat_stats.ns_rules--;
+ } else {
+ n->in_flags |= IPN_DELETE;
+ n->in_next = NULL;
+ }
+ i++;
+ }
+#if SOLARIS
+ pfil_delayed_copy = 1;
+#endif
+ nat_masks = 0;
+ rdr_masks = 0;
+ return i;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_newmap */
+/* Returns: int - -1 == error, 0 == success */
+/* Parameters: fin(I) - pointer to packet information */
+/* nat(I) - pointer to NAT entry */
+/* ni(I) - pointer to structure with misc. information needed */
+/* to create new NAT entry. */
+/* */
+/* Given an empty NAT structure, populate it with new information about a */
+/* new NAT session, as defined by the matching NAT rule. */
+/* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
+/* to the new IP address for the translation. */
+/* ------------------------------------------------------------------------ */
+static INLINE int nat_newmap(fin, nat, ni)
+fr_info_t *fin;
+nat_t *nat;
+natinfo_t *ni;
+{
+ u_short st_port, dport, sport, port, sp, dp;
+ struct in_addr in, inb;
+ hostmap_t *hm;
+ u_32_t flags;
+ u_32_t st_ip;
+ ipnat_t *np;
+ nat_t *natl;
+ int l;
+
+ /*
+ * If it's an outbound packet which doesn't match any existing
+ * record, then create a new port
+ */
+ l = 0;
+ hm = NULL;
+ np = ni->nai_np;
+ st_ip = np->in_nip;
+ st_port = np->in_pnext;
+ flags = ni->nai_flags;
+ sport = ni->nai_sport;
+ dport = ni->nai_dport;
+
+ /*
+ * Do a loop until we either run out of entries to try or we find
+ * a NAT mapping that isn't currently being used. This is done
+ * because the change to the source is not (usually) being fixed.
+ */
+ do {
+ port = 0;
+ in.s_addr = htonl(np->in_nip);
+ if (l == 0) {
+ /*
+ * Check to see if there is an existing NAT
+ * setup for this IP address pair.
+ */
+ hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
+ in, 0);
+ if (hm != NULL)
+ in.s_addr = hm->hm_mapip.s_addr;
+ } else if ((l == 1) && (hm != NULL)) {
+ nat_hostmapdel(hm);
+ hm = NULL;
+ }
+ in.s_addr = ntohl(in.s_addr);
+
+ nat->nat_hm = hm;
+
+ if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
+ if (l > 0)
+ return -1;
+ }
+
+ if (np->in_redir == NAT_BIMAP &&
+ np->in_inmsk == np->in_outmsk) {
+ /*
+ * map the address block in a 1:1 fashion
+ */
+ in.s_addr = np->in_outip;
+ in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
+ in.s_addr = ntohl(in.s_addr);
+
+ } else if (np->in_redir & NAT_MAPBLK) {
+ if ((l >= np->in_ppip) || ((l > 0) &&
+ !(flags & IPN_TCPUDP)))
+ return -1;
+ /*
+ * map-block - Calculate destination address.
+ */
+ in.s_addr = ntohl(fin->fin_saddr);
+ in.s_addr &= ntohl(~np->in_inmsk);
+ inb.s_addr = in.s_addr;
+ in.s_addr /= np->in_ippip;
+ in.s_addr &= ntohl(~np->in_outmsk);
+ in.s_addr += ntohl(np->in_outip);
+ /*
+ * Calculate destination port.
+ */
+ if ((flags & IPN_TCPUDP) &&
+ (np->in_ppip != 0)) {
+ port = ntohs(sport) + l;
+ port %= np->in_ppip;
+ port += np->in_ppip *
+ (inb.s_addr % np->in_ippip);
+ port += MAPBLK_MINPORT;
+ port = htons(port);
+ }
+
+ } else if ((np->in_outip == 0) &&
+ (np->in_outmsk == 0xffffffff)) {
+ /*
+ * 0/32 - use the interface's IP address.
+ */
+ if ((l > 0) ||
+ fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
+ &in, NULL) == -1)
+ return -1;
+ in.s_addr = ntohl(in.s_addr);
+
+ } else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
+ /*
+ * 0/0 - use the original source address/port.
+ */
+ if (l > 0)
+ return -1;
+ in.s_addr = ntohl(fin->fin_saddr);
+
+ } else if ((np->in_outmsk != 0xffffffff) &&
+ (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
+ np->in_nip++;
+
+ natl = NULL;
+
+ if ((flags & IPN_TCPUDP) &&
+ ((np->in_redir & NAT_MAPBLK) == 0) &&
+ (np->in_flags & IPN_AUTOPORTMAP)) {
+ /*
+ * "ports auto" (without map-block)
+ */
+ if ((l > 0) && (l % np->in_ppip == 0)) {
+ if (l > np->in_space) {
+ return -1;
+ } else if ((l > np->in_ppip) &&
+ np->in_outmsk != 0xffffffff)
+ np->in_nip++;
+ }
+ if (np->in_ppip != 0) {
+ port = ntohs(sport);
+ port += (l % np->in_ppip);
+ port %= np->in_ppip;
+ port += np->in_ppip *
+ (ntohl(fin->fin_saddr) %
+ np->in_ippip);
+ port += MAPBLK_MINPORT;
+ port = htons(port);
+ }
+
+ } else if (((np->in_redir & NAT_MAPBLK) == 0) &&
+ (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
+ /*
+ * Standard port translation. Select next port.
+ */
+ port = htons(np->in_pnext++);
+
+ if (np->in_pnext > ntohs(np->in_pmax)) {
+ np->in_pnext = ntohs(np->in_pmin);
+ if (np->in_outmsk != 0xffffffff)
+ np->in_nip++;
+ }
+ }
+
+ if (np->in_flags & IPN_IPRANGE) {
+ if (np->in_nip > ntohl(np->in_outmsk))
+ np->in_nip = ntohl(np->in_outip);
+ } else {
+ if ((np->in_outmsk != 0xffffffff) &&
+ ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
+ ntohl(np->in_outip))
+ np->in_nip = ntohl(np->in_outip) + 1;
+ }
+
+ if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
+ port = sport;
+
+ /*
+ * Here we do a lookup of the connection as seen from
+ * the outside. If an IP# pair already exists, try
+ * again. So if you have A->B becomes C->B, you can
+ * also have D->E become C->E but not D->B causing
+ * another C->B. Also take protocol and ports into
+ * account when determining whether a pre-existing
+ * NAT setup will cause an external conflict where
+ * this is appropriate.
+ */
+ inb.s_addr = htonl(in.s_addr);
+ sp = fin->fin_data[0];
+ dp = fin->fin_data[1];
+ fin->fin_data[0] = fin->fin_data[1];
+ fin->fin_data[1] = htons(port);
+ natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
+ (u_int)fin->fin_p, fin->fin_dst, inb);
+ fin->fin_data[0] = sp;
+ fin->fin_data[1] = dp;
+
+ /*
+ * Has the search wrapped around and come back to the
+ * start ?
+ */
+ if ((natl != NULL) &&
+ (np->in_pnext != 0) && (st_port == np->in_pnext) &&
+ (np->in_nip != 0) && (st_ip == np->in_nip))
+ return -1;
+ l++;
+ } while (natl != NULL);
+
+ if (np->in_space > 0)
+ np->in_space--;
+
+ /* Setup the NAT table */
+ nat->nat_inip = fin->fin_src;
+ nat->nat_outip.s_addr = htonl(in.s_addr);
+ nat->nat_oip = fin->fin_dst;
+ if (nat->nat_hm == NULL)
+ nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
+ nat->nat_outip, 0);
+
+ /*
+ * The ICMP checksum does not have a pseudo header containing
+ * the IP addresses
+ */
+ ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
+ ni->nai_sum2 = LONG_SUM(in.s_addr);
+ if ((flags & IPN_TCPUDP)) {
+ ni->nai_sum1 += ntohs(sport);
+ ni->nai_sum2 += ntohs(port);
+ }
+
+ if (flags & IPN_TCPUDP) {
+ nat->nat_inport = sport;
+ nat->nat_outport = port; /* sport */
+ nat->nat_oport = dport;
+ ((tcphdr_t *)fin->fin_dp)->th_sport = port;
+ } else if (flags & IPN_ICMPQUERY) {
+ ((icmphdr_t *)fin->fin_dp)->icmp_id = port;
+ nat->nat_inport = port;
+ nat->nat_outport = port;
+ }
+
+ ni->nai_ip.s_addr = in.s_addr;
+ ni->nai_port = port;
+ ni->nai_nport = dport;
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_newrdr */
+/* Returns: int - -1 == error, 0 == success (no move), 1 == success and */
+/* allow rule to be moved if IPN_ROUNDR is set. */
+/* Parameters: fin(I) - pointer to packet information */
+/* nat(I) - pointer to NAT entry */
+/* ni(I) - pointer to structure with misc. information needed */
+/* to create new NAT entry. */
+/* */
+/* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
+/* to the new IP address for the translation. */
+/* ------------------------------------------------------------------------ */
+static INLINE int nat_newrdr(fin, nat, ni)
+fr_info_t *fin;
+nat_t *nat;
+natinfo_t *ni;
+{
+ u_short nport, dport, sport;
+ struct in_addr in;
+ hostmap_t *hm;
+ u_32_t flags;
+ ipnat_t *np;
+ int move;
+
+ move = 1;
+ hm = NULL;
+ in.s_addr = 0;
+ np = ni->nai_np;
+ flags = ni->nai_flags;
+ sport = ni->nai_sport;
+ dport = ni->nai_dport;
+
+ /*
+ * If the matching rule has IPN_STICKY set, then we want to have the
+ * same rule kick in as before. Why would this happen? If you have
+ * a collection of rdr rules with "round-robin sticky", the current
+ * packet might match a different one to the previous connection but
+ * we want the same destination to be used.
+ */
+ if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) ==
+ (IPN_ROUNDR|IPN_STICKY)) {
+ hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
+ (u_32_t)dport);
+ if (hm != NULL) {
+ in.s_addr = ntohl(hm->hm_mapip.s_addr);
+ np = hm->hm_ipnat;
+ ni->nai_np = np;
+ move = 0;
+ }
+ }
+
+ /*
+ * Otherwise, it's an inbound packet. Most likely, we don't
+ * want to rewrite source ports and source addresses. Instead,
+ * we want to rewrite to a fixed internal address and fixed
+ * internal port.
+ */
+ if (np->in_flags & IPN_SPLIT) {
+ in.s_addr = np->in_nip;
+
+ if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
+ hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
+ in, (u_32_t)dport);
+ if (hm != NULL) {
+ in.s_addr = hm->hm_mapip.s_addr;
+ move = 0;
+ }
+ }
+
+ if (hm == NULL || hm->hm_ref == 1) {
+ if (np->in_inip == htonl(in.s_addr)) {
+ np->in_nip = ntohl(np->in_inmsk);
+ move = 0;
+ } else {
+ np->in_nip = ntohl(np->in_inip);
+ }
+ }
+
+ } else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
+ /*
+ * 0/32 - use the interface's IP address.
+ */
+ if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL) == -1)
+ return -1;
+ in.s_addr = ntohl(in.s_addr);
+
+ } else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
+ /*
+ * 0/0 - use the original destination address/port.
+ */
+ in.s_addr = ntohl(fin->fin_daddr);
+
+ } else if (np->in_redir == NAT_BIMAP &&
+ np->in_inmsk == np->in_outmsk) {
+ /*
+ * map the address block in a 1:1 fashion
+ */
+ in.s_addr = np->in_inip;
+ in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
+ in.s_addr = ntohl(in.s_addr);
+ } else {
+ in.s_addr = ntohl(np->in_inip);
+ }
+
+ if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
+ nport = dport;
+ else {
+ /*
+ * Whilst not optimized for the case where
+ * pmin == pmax, the gain is not significant.
+ */
+ if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
+ (np->in_pmin != np->in_pmax)) {
+ nport = ntohs(dport) - ntohs(np->in_pmin) +
+ ntohs(np->in_pnext);
+ nport = htons(nport);
+ } else
+ nport = np->in_pnext;
+ }
+
+ /*
+ * When the redirect-to address is set to 0.0.0.0, just
+ * assume a blank `forwarding' of the packet. We don't
+ * setup any translation for this either.
+ */
+ if (in.s_addr == 0) {
+ if (nport == dport)
+ return -1;
+ in.s_addr = ntohl(fin->fin_daddr);
+ }
+
+ nat->nat_inip.s_addr = htonl(in.s_addr);
+ nat->nat_outip = fin->fin_dst;
+ nat->nat_oip = fin->fin_src;
+
+ ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
+ ni->nai_sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
+
+ ni->nai_ip.s_addr = in.s_addr;
+ ni->nai_nport = nport;
+ ni->nai_port = sport;
+
+ if (flags & IPN_TCPUDP) {
+ nat->nat_inport = nport;
+ nat->nat_outport = dport;
+ nat->nat_oport = sport;
+ ((tcphdr_t *)fin->fin_dp)->th_dport = nport;
+ } else if (flags & IPN_ICMPQUERY) {
+ ((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
+ nat->nat_inport = nport;
+ nat->nat_outport = nport;
+ }
+
+ return move;
+}
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_new */
+/* Returns: nat_t* - NULL == failure to create new NAT structure, */
+/* else pointer to new NAT structure */
+/* Parameters: fin(I) - pointer to packet information */
+/* np(I) - pointer to NAT rule */
+/* natsave(I) - pointer to where to store NAT struct pointer */
+/* flags(I) - flags describing the current packet */
+/* direction(I) - direction of packet (in/out) */
+/* Write Lock: ipf_nat */
+/* */
+/* Attempts to create a new NAT entry. Does not actually change the packet */
+/* in any way. */
+/* */
+/* This fucntion is in three main parts: (1) deal with creating a new NAT */
+/* structure for a "MAP" rule (outgoing NAT translation); (2) deal with */
+/* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
+/* and (3) building that structure and putting it into the NAT table(s). */
+/* ------------------------------------------------------------------------ */
+nat_t *nat_new(fin, np, natsave, flags, direction)
+fr_info_t *fin;
+ipnat_t *np;
+nat_t **natsave;
+u_int flags;
+int direction;
+{
+ u_short port = 0, sport = 0, dport = 0, nport = 0;
+ tcphdr_t *tcp = NULL;
+ hostmap_t *hm = NULL;
+ struct in_addr in;
+ nat_t *nat, *natl;
+ u_int nflags;
+ natinfo_t ni;
+ u_32_t sumd;
+ int move;
+#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
+ qpktinfo_t *qpi = fin->fin_qpi;
+#endif
+
+ if (nat_stats.ns_inuse >= ipf_nattable_max) {
+ nat_stats.ns_memfail++;
+ return NULL;
+ }
+
+ move = 1;
+ nflags = np->in_flags & flags;
+ nflags &= NAT_FROMRULE;
+
+ ni.nai_np = np;
+ ni.nai_nflags = nflags;
+ ni.nai_flags = flags;
+
+ /* Give me a new nat */
+ KMALLOC(nat, nat_t *);
+ if (nat == NULL) {
+ nat_stats.ns_memfail++;
+ /*
+ * Try to automatically tune the max # of entries in the
+ * table allowed to be less than what will cause kmem_alloc()
+ * to fail and try to eliminate panics due to out of memory
+ * conditions arising.
+ */
+ if (ipf_nattable_max > ipf_nattable_sz) {
+ ipf_nattable_max = nat_stats.ns_inuse - 100;
+ printf("ipf_nattable_max reduced to %d\n",
+ ipf_nattable_max);
+ }
+ return NULL;
+ }
+
+ if (flags & IPN_TCPUDP) {
+ tcp = fin->fin_dp;
+ ni.nai_sport = htons(fin->fin_sport);
+ ni.nai_dport = htons(fin->fin_dport);
+ } else if (flags & IPN_ICMPQUERY) {
+ /*
+ * In the ICMP query NAT code, we translate the ICMP id fields
+ * to make them unique. This is indepedent of the ICMP type
+ * (e.g. in the unlikely event that a host sends an echo and
+ * an tstamp request with the same id, both packets will have
+ * their ip address/id field changed in the same way).
+ */
+ /* The icmp_id field is used by the sender to identify the
+ * process making the icmp request. (the receiver justs
+ * copies it back in its response). So, it closely matches
+ * the concept of source port. We overlay sport, so we can
+ * maximally reuse the existing code.
+ */
+ ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
+ ni.nai_dport = ni.nai_sport;
+ }
+
+ bzero((char *)nat, sizeof(*nat));
+ nat->nat_flags = flags;
+
+ if ((flags & NAT_SLAVE) == 0) {
+ MUTEX_ENTER(&ipf_nat_new);
+ }
+
+ /*
+ * Search the current table for a match.
+ */
+ if (direction == NAT_OUTBOUND) {
+ /*
+ * We can now arrange to call this for the same connection
+ * because ipf_nat_new doesn't protect the code path into
+ * this function.
+ */
+ natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
+ fin->fin_src, fin->fin_dst);
+ if (natl != NULL) {
+ nat = natl;
+ goto done;
+ }
+
+ move = nat_newmap(fin, nat, &ni);
+ if (move == -1)
+ goto badnat;
+
+ np = ni.nai_np;
+ in = ni.nai_ip;
+ } else {
+ /*
+ * NAT_INBOUND is used only for redirects rules
+ */
+ natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
+ fin->fin_src, fin->fin_dst);
+ if (natl != NULL) {
+ nat = natl;
+ goto done;
+ }
+
+ move = nat_newrdr(fin, nat, &ni);
+ if (move == -1)
+ goto badnat;
+
+ np = ni.nai_np;
+ in = ni.nai_ip;
+ }
+ port = ni.nai_port;
+ nport = ni.nai_nport;
+
+ if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
+ if (np->in_redir == NAT_REDIRECT) {
+ nat_delrdr(np);
+ nat_addrdr(np);
+ } else if (np->in_redir == NAT_MAP) {
+ nat_delnat(np);
+ nat_addnat(np);
+ }
+ }
+
+ if (flags & IPN_TCPUDP) {
+ sport = ni.nai_sport;
+ dport = ni.nai_dport;
+ } else if (flags & IPN_ICMPQUERY) {
+ sport = ni.nai_sport;
+ dport = 0;
+ }
+
+ CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
+ nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
+#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
+ if ((flags & IPN_TCP) && dohwcksum &&
+#ifndef IRE_ILL_CN
+ (((ill_t *)qpi->qpi_ill)->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
+#else
+ (((s_ill_t *)qpi->qpi_ill)->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
+#endif /* IRE_ILL_CN */
+ if (direction == NAT_OUTBOUND)
+ ni.nai_sum1 = LONG_SUM(in.s_addr);
+ else
+ ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
+ ni.nai_sum1 += LONG_SUM(ntohl(fin->fin_daddr));
+ ni.nai_sum1 += 30;
+ ni.nai_sum1 = (ni.nai_sum1 & 0xffff) + (ni.nai_sum1 >> 16);
+ nat->nat_sumd[1] = NAT_HW_CKSUM|(ni.nai_sum1 & 0xffff);
+ } else
+#endif
+ nat->nat_sumd[1] = nat->nat_sumd[0];
+
+ if ((flags & IPN_TCPUDPICMP) && ((sport != port) || (dport != nport))) {
+ if (direction == NAT_OUTBOUND)
+ ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
+ else
+ ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr));
+
+ ni.nai_sum2 = LONG_SUM(in.s_addr);
+
+ CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
+ nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
+ } else {
+ nat->nat_ipsumd = nat->nat_sumd[0];
+ if (!(flags & IPN_TCPUDPICMP)) {
+ nat->nat_sumd[0] = 0;
+ nat->nat_sumd[1] = 0;
+ }
+ }
+
+ if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
+ goto badnat;
+ }
+ if (flags & SI_WILDP)
+ nat_stats.ns_wilds++;
+ goto done;
+badnat:
+ nat_stats.ns_badnat++;
+ if ((hm = nat->nat_hm) != NULL)
+ nat_hostmapdel(hm);
+ KFREE(nat);
+ nat = NULL;
+done:
+ if ((flags & NAT_SLAVE) == 0) {
+ MUTEX_EXIT(&ipf_nat_new);
+ }
+ return nat;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_finalise */
+/* Returns: int - 0 == sucess, -1 == failure */
+/* Parameters: fin(I) - pointer to packet information */
+/* nat(I) - pointer to NAT entry */
+/* ni(I) - pointer to structure with misc. information needed */
+/* to create new NAT entry. */
+/* Write Lock: ipf_nat */
+/* */
+/* This is the tail end of constructing a new NAT entry and is the same */
+/* for both IPv4 and IPv6. */
+/* ------------------------------------------------------------------------ */
+/*ARGSUSED*/
+static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction)
+fr_info_t *fin;
+nat_t *nat;
+natinfo_t *ni;
+tcphdr_t *tcp;
+nat_t **natsave;
+int direction;
+{
+ frentry_t *fr;
+ ipnat_t *np;
+
+ np = ni->nai_np;
+
+ COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0]);
+#ifdef IPFILTER_SYNC
+ if ((nat->nat_flags & SI_CLONE) == 0)
+ nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
+#endif
+
+ nat->nat_me = natsave;
+ nat->nat_dir = direction;
+ nat->nat_ifps[0] = fin->fin_ifp;
+ nat->nat_ptr = np;
+ nat->nat_p = fin->fin_p;
+ nat->nat_mssclamp = np->in_mssclamp;
+ fr = fin->fin_fr;
+ nat->nat_fr = fr;
+
+ if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
+ if (appr_new(fin, nat) == -1)
+ return -1;
+
+ if (nat_insert(nat, fin->fin_rev) == 0) {
+ if (nat_logging)
+ nat_log(nat, (u_int)np->in_redir);
+ np->in_use++;
+ if (fr != NULL) {
+ MUTEX_ENTER(&fr->fr_lock);
+ fr->fr_ref++;
+ MUTEX_EXIT(&fr->fr_lock);
+ }
+ return 0;
+ }
+
+ /*
+ * nat_insert failed, so cleanup time...
+ */
+ return -1;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_insert */
+/* Returns: int - 0 == sucess, -1 == failure */
+/* Parameters: nat(I) - pointer to NAT structure */
+/* rev(I) - flag indicating forward/reverse direction of packet */
+/* Write Lock: ipf_nat */
+/* */
+/* Insert a NAT entry into the hash tables for searching and add it to the */
+/* list of active NAT entries. Adjust global counters when complete. */
+/* ------------------------------------------------------------------------ */
+int nat_insert(nat, rev)
+nat_t *nat;
+int rev;
+{
+ u_int hv1, hv2;
+ nat_t **natp;
+
+ /*
+ * Try and return an error as early as possible, so calculate the hash
+ * entry numbers first and then proceed.
+ */
+ if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
+ hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
+ 0xffffffff);
+ hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
+ ipf_nattable_sz);
+ hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
+ 0xffffffff);
+ hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
+ ipf_nattable_sz);
+ } else {
+ hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
+ hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, ipf_nattable_sz);
+ hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
+ hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, ipf_nattable_sz);
+ }
+
+ if (nat_stats.ns_bucketlen[0][hv1] >= fr_nat_maxbucket ||
+ nat_stats.ns_bucketlen[1][hv2] >= fr_nat_maxbucket) {
+ return -1;
+ }
+
+ nat->nat_hv[0] = hv1;
+ nat->nat_hv[1] = hv2;
+
+ MUTEX_INIT(&nat->nat_lock, "nat entry lock");
+
+ nat->nat_rev = rev;
+ nat->nat_ref = 1;
+ nat->nat_bytes[0] = 0;
+ nat->nat_pkts[0] = 0;
+ nat->nat_bytes[1] = 0;
+ nat->nat_pkts[1] = 0;
+
+ nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
+ nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4);
+
+ if (nat->nat_ifnames[1][0] !='\0') {
+ nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
+ nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4);
+ } else {
+ (void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
+ LIFNAMSIZ);
+ nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
+ nat->nat_ifps[1] = nat->nat_ifps[0];
+ }
+
+ nat->nat_next = nat_instances;
+ nat->nat_pnext = &nat_instances;
+ if (nat_instances)
+ nat_instances->nat_pnext = &nat->nat_next;
+ nat_instances = nat;
+
+ natp = &nat_table[0][hv1];
+ if (*natp)
+ (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
+ nat->nat_phnext[0] = natp;
+ nat->nat_hnext[0] = *natp;
+ *natp = nat;
+ nat_stats.ns_bucketlen[0][hv1]++;
+
+ natp = &nat_table[1][hv2];
+ if (*natp)
+ (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
+ nat->nat_phnext[1] = natp;
+ nat->nat_hnext[1] = *natp;
+ *natp = nat;
+ nat_stats.ns_bucketlen[1][hv2]++;
+
+ fr_setnatqueue(nat, rev);
+
+ nat_stats.ns_added++;
+ nat_stats.ns_inuse++;
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_icmperrorlookup */
+/* Returns: nat_t* - point to matching NAT structure */
+/* Parameters: fin(I) - pointer to packet information */
+/* dir(I) - direction of packet (in/out) */
+/* */
+/* Check if the ICMP error message is related to an existing TCP, UDP or */
+/* ICMP query nat entry. It is assumed that the packet is already of the */
+/* the required length. */
+/* ------------------------------------------------------------------------ */
+nat_t *nat_icmperrorlookup(fin, dir)
+fr_info_t *fin;
+int dir;
+{
+ int flags = 0, minlen;
+ icmphdr_t *orgicmp;
+ tcphdr_t *tcp = NULL;
+ u_short data[2];
+ nat_t *nat;
+ ip_t *oip;
+ u_int p;
+
+ /*
+ * Does it at least have the return (basic) IP header ?
+ * Only a basic IP header (no options) should be with an ICMP error
+ * header. Also, if it's not an error type, then return.
+ */
+ if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
+ return NULL;
+
+ /*
+ * Check packet size
+ */
+ oip = (ip_t *)((char *)fin->fin_dp + 8);
+ minlen = IP_HL(oip) << 2;
+ if ((minlen < sizeof(ip_t)) ||
+ (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
+ return NULL;
+ /*
+ * Is the buffer big enough for all of it ? It's the size of the IP
+ * header claimed in the encapsulated part which is of concern. It
+ * may be too big to be in this buffer but not so big that it's
+ * outside the ICMP packet, leading to TCP deref's causing problems.
+ * This is possible because we don't know how big oip_hl is when we
+ * do the pullup early in fr_check() and thus can't gaurantee it is
+ * all here now.
+ */
+#ifdef _KERNEL
+ {
+ mb_t *m;
+
+ m = fin->fin_m;
+# if defined(MENTAT)
+ if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
+ return NULL;
+# else
+ if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
+ (char *)fin->fin_ip + M_LEN(m))
+ return NULL;
+# endif
+ }
+#endif
+
+ if (fin->fin_daddr != oip->ip_src.s_addr)
+ return NULL;
+
+ p = oip->ip_p;
+ if (p == IPPROTO_TCP)
+ flags = IPN_TCP;
+ else if (p == IPPROTO_UDP)
+ flags = IPN_UDP;
+ else if (p == IPPROTO_ICMP) {
+ orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
+
+ /* see if this is related to an ICMP query */
+ if (nat_icmpquerytype4(orgicmp->icmp_type)) {
+ data[0] = fin->fin_data[0];
+ data[1] = fin->fin_data[1];
+ fin->fin_data[0] = 0;
+ fin->fin_data[1] = orgicmp->icmp_id;
+
+ flags = IPN_ICMPERR|IPN_ICMPQUERY;
+ /*
+ * NOTE : dir refers to the direction of the original
+ * ip packet. By definition the icmp error
+ * message flows in the opposite direction.
+ */
+ if (dir == NAT_INBOUND)
+ nat = nat_inlookup(fin, flags, p, oip->ip_dst,
+ oip->ip_src);
+ else
+ nat = nat_outlookup(fin, flags, p, oip->ip_dst,
+ oip->ip_src);
+ fin->fin_data[0] = data[0];
+ fin->fin_data[1] = data[1];
+ return nat;
+ }
+ }
+
+ if (flags & IPN_TCPUDP) {
+ minlen += 8; /* + 64bits of data to get ports */
+ if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
+ return NULL;
+
+ data[0] = fin->fin_data[0];
+ data[1] = fin->fin_data[1];
+ tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
+ fin->fin_data[0] = ntohs(tcp->th_dport);
+ fin->fin_data[1] = ntohs(tcp->th_sport);
+
+ if (dir == NAT_INBOUND) {
+ nat = nat_inlookup(fin, flags, p, oip->ip_dst,
+ oip->ip_src);
+ } else {
+ nat = nat_outlookup(fin, flags, p, oip->ip_dst,
+ oip->ip_src);
+ }
+ fin->fin_data[0] = data[0];
+ fin->fin_data[1] = data[1];
+ return nat;
+ }
+ if (dir == NAT_INBOUND)
+ return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
+ else
+ return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_icmperror */
+/* Returns: nat_t* - point to matching NAT structure */
+/* Parameters: fin(I) - pointer to packet information */
+/* nflags(I) - NAT flags for this packet */
+/* dir(I) - direction of packet (in/out) */
+/* */
+/* Fix up an ICMP packet which is an error message for an existing NAT */
+/* session. This will correct both packet header data and checksums. */
+/* */
+/* This should *ONLY* be used for incoming ICMP error packets to make sure */
+/* a NAT'd ICMP packet gets correctly recognised. */
+/* ------------------------------------------------------------------------ */
+nat_t *nat_icmperror(fin, nflags, dir)
+fr_info_t *fin;
+u_int *nflags;
+int dir;
+{
+ u_32_t sum1, sum2, sumd, sumd2;
+ struct in_addr in;
+ icmphdr_t *icmp;
+ int flags, dlen;
+ u_short *csump;
+ tcphdr_t *tcp;
+ nat_t *nat;
+ ip_t *oip;
+ void *dp;
+
+ if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
+ return NULL;
+ /*
+ * nat_icmperrorlookup() will return NULL for `defective' packets.
+ */
+ if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
+ return NULL;
+
+ tcp = NULL;
+ csump = NULL;
+ flags = 0;
+ sumd2 = 0;
+ *nflags = IPN_ICMPERR;
+ icmp = fin->fin_dp;
+ oip = (ip_t *)&icmp->icmp_ip;
+ dp = (((char *)oip) + (IP_HL(oip) << 2));
+ if (oip->ip_p == IPPROTO_TCP) {
+ tcp = (tcphdr_t *)dp;
+ csump = (u_short *)&tcp->th_sum;
+ flags = IPN_TCP;
+ } else if (oip->ip_p == IPPROTO_UDP) {
+ udphdr_t *udp;
+
+ udp = (udphdr_t *)dp;
+ tcp = (tcphdr_t *)dp;
+ csump = (u_short *)&udp->uh_sum;
+ flags = IPN_UDP;
+ } else if (oip->ip_p == IPPROTO_ICMP)
+ flags = IPN_ICMPQUERY;
+ dlen = fin->fin_plen - ((char *)dp - (char *)fin->fin_ip);
+
+ /*
+ * Need to adjust ICMP header to include the real IP#'s and
+ * port #'s. Only apply a checksum change relative to the
+ * IP address change as it will be modified again in fr_checknatout
+ * for both address and port. Two checksum changes are
+ * necessary for the two header address changes. Be careful
+ * to only modify the checksum once for the port # and twice
+ * for the IP#.
+ */
+
+ /*
+ * Step 1
+ * Fix the IP addresses in the offending IP packet. You also need
+ * to adjust the IP header checksum of that offending IP packet
+ * and the ICMP checksum of the ICMP error message itself.
+ *
+ * Unfortunately, for UDP and TCP, the IP addresses are also contained
+ * in the pseudo header that is used to compute the UDP resp. TCP
+ * checksum. So, we must compensate that as well. Even worse, the
+ * change in the UDP and TCP checksums require yet another
+ * adjustment of the ICMP checksum of the ICMP error message.
+ */
+
+ if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
+ sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
+ in = nat->nat_inip;
+ oip->ip_src = in;
+ } else {
+ sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
+ in = nat->nat_outip;
+ oip->ip_dst = in;
+ }
+
+ sum2 = LONG_SUM(ntohl(in.s_addr));
+
+ CALC_SUMD(sum1, sum2, sumd);
+
+ /*
+ * Fix IP checksum of the offending IP packet to adjust for
+ * the change in the IP address.
+ *
+ * Normally, you would expect that the ICMP checksum of the
+ * ICMP error message needs to be adjusted as well for the
+ * IP address change in oip.
+ * However, this is a NOP, because the ICMP checksum is
+ * calculated over the complete ICMP packet, which includes the
+ * changed oip IP addresses and oip->ip_sum. However, these
+ * two changes cancel each other out (if the delta for
+ * the IP address is x, then the delta for ip_sum is minus x),
+ * so no change in the icmp_cksum is necessary.
+ *
+ * Be careful that nat_dir refers to the direction of the
+ * offending IP packet (oip), not to its ICMP response (icmp)
+ */
+ fix_datacksum(&oip->ip_sum, sumd);
+ /* Fix icmp cksum : IP Addr + Cksum */
+ sumd2 = (sumd >> 16);
+
+ /*
+ * Fix UDP pseudo header checksum to compensate for the
+ * IP address change.
+ */
+ if ((oip->ip_p == IPPROTO_UDP) && (dlen >= 8) && (*csump != 0)) {
+ /*
+ * The UDP checksum is optional, only adjust it
+ * if it has been set.
+ */
+ sum1 = ntohs(*csump);
+ fix_datacksum(csump, sumd);
+ sum2 = ntohs(*csump);
+
+ /*
+ * Fix ICMP checksum to compensate the UDP
+ * checksum adjustment.
+ */
+ sumd2 = sumd << 1;
+ CALC_SUMD(sum1, sum2, sumd);
+ sumd2 += sumd;
+ }
+
+ /*
+ * Fix TCP pseudo header checksum to compensate for the
+ * IP address change. Before we can do the change, we
+ * must make sure that oip is sufficient large to hold
+ * the TCP checksum (normally it does not!).
+ * 18 = offsetof(tcphdr_t, th_sum) + 2
+ */
+ else if (oip->ip_p == IPPROTO_TCP && dlen >= 18) {
+ sum1 = ntohs(*csump);
+ fix_datacksum(csump, sumd);
+ sum2 = ntohs(*csump);
+
+ /*
+ * Fix ICMP checksum to compensate the TCP
+ * checksum adjustment.
+ */
+ sumd2 = sumd << 1;
+ CALC_SUMD(sum1, sum2, sumd);
+ sumd2 += sumd;
+ } else {
+ if (nat->nat_dir == NAT_OUTBOUND)
+ sumd2 = ~sumd2;
+ else
+ sumd2 = ~sumd2 + 1;
+ }
+
+ if (((flags & IPN_TCPUDP) != 0) && (dlen >= 4)) {
+ int mode = 0;
+
+ /*
+ * Step 2 :
+ * For offending TCP/UDP IP packets, translate the ports as
+ * well, based on the NAT specification. Of course such
+ * a change must be reflected in the ICMP checksum as well.
+ *
+ * Advance notice : Now it becomes complicated :-)
+ *
+ * Since the port fields are part of the TCP/UDP checksum
+ * of the offending IP packet, you need to adjust that checksum
+ * as well... but, if you change, you must change the icmp
+ * checksum *again*, to reflect that change.
+ *
+ * To further complicate: the TCP checksum is not in the first
+ * 8 bytes of the offending ip packet, so it most likely is not
+ * available. Some OSses like Solaris return enough bytes to
+ * include the TCP checksum. So we have to check if the
+ * ip->ip_len actually holds the TCP checksum of the oip!
+ */
+
+ if (nat->nat_oport == tcp->th_dport) {
+ if (tcp->th_sport != nat->nat_inport) {
+ mode = 1;
+ sum1 = ntohs(nat->nat_inport);
+ sum2 = ntohs(tcp->th_sport);
+ }
+ } else if (tcp->th_sport == nat->nat_oport) {
+ mode = 2;
+ sum1 = ntohs(nat->nat_outport);
+ sum2 = ntohs(tcp->th_dport);
+ }
+
+ if (mode == 1) {
+ /*
+ * Fix ICMP checksum to compensate port adjustment.
+ */
+ tcp->th_sport = htons(sum1);
+
+ /*
+ * Fix udp checksum to compensate port adjustment.
+ * NOTE : the offending IP packet flows the other
+ * direction compared to the ICMP message.
+ *
+ * The UDP checksum is optional, only adjust it if
+ * it has been set.
+ */
+ if ((oip->ip_p == IPPROTO_UDP) &&
+ (dlen >= 8) && (*csump != 0)) {
+ sumd = sum1 - sum2;
+ sumd2 += sumd;
+
+ sum1 = ntohs(*csump);
+ fix_datacksum(csump, sumd);
+ sum2 = ntohs(*csump);
+
+ /*
+ * Fix ICMP checksum to compenstate
+ * UDP checksum adjustment.
+ */
+ CALC_SUMD(sum1, sum2, sumd);
+ sumd2 += sumd;
+ }
+
+ /*
+ * Fix TCP checksum (if present) to compensate port
+ * adjustment. NOTE : the offending IP packet flows
+ * the other direction compared to the ICMP message.
+ */
+ if (oip->ip_p == IPPROTO_TCP) {
+ if (dlen >= 18) {
+ sumd = sum1 - sum2;
+ sumd2 += sumd;
+
+ sum1 = ntohs(*csump);
+ fix_datacksum(csump, sumd);
+ sum2 = ntohs(*csump);
+
+ /*
+ * Fix ICMP checksum to compensate
+ * TCP checksum adjustment.
+ */
+ CALC_SUMD(sum1, sum2, sumd);
+ sumd2 += sumd;
+ } else {
+ sumd = sum2 - sum1 + 1;
+ sumd2 += sumd;
+ }
+ }
+ } else if (mode == 2) {
+ /*
+ * Fix ICMP checksum to compensate port adjustment.
+ */
+ tcp->th_dport = htons(sum1);
+
+ /*
+ * Fix UDP checksum to compensate port adjustment.
+ * NOTE : the offending IP packet flows the other
+ * direction compared to the ICMP message.
+ *
+ * The UDP checksum is optional, only adjust
+ * it if it has been set.
+ */
+ if ((oip->ip_p == IPPROTO_UDP) &&
+ (dlen >= 8) && (*csump != 0)) {
+ sumd = sum1 - sum2;
+ sumd2 += sumd;
+
+ sum1 = ntohs(*csump);
+ fix_datacksum(csump, sumd);
+ sum2 = ntohs(*csump);
+
+ /*
+ * Fix ICMP checksum to compensate
+ * UDP checksum adjustment.
+ */
+ CALC_SUMD(sum1, sum2, sumd);
+ sumd2 += sumd;
+ }
+
+ /*
+ * Fix TCP checksum (if present) to compensate port
+ * adjustment. NOTE : the offending IP packet flows
+ * the other direction compared to the ICMP message.
+ */
+ if (oip->ip_p == IPPROTO_TCP) {
+ if (dlen >= 18) {
+ sumd = sum1 - sum2;
+ sumd2 += sumd;
+
+ sum1 = ntohs(*csump);
+ fix_datacksum(csump, sumd);
+ sum2 = ntohs(*csump);
+
+ /*
+ * Fix ICMP checksum to compensate
+ * TCP checksum adjustment.
+ */
+ CALC_SUMD(sum1, sum2, sumd);
+ sumd2 += sumd;
+ } else {
+ if (nat->nat_dir == NAT_INBOUND)
+ sumd = sum2 - sum1;
+ else
+ sumd = sum2 - sum1 + 1;
+ sumd2 += sumd;
+ }
+ }
+ }
+ if (sumd2 != 0) {
+ sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
+ sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
+ fix_incksum(fin, &icmp->icmp_cksum, sumd2);
+ }
+ } else if (((flags & IPN_ICMPQUERY) != 0) && (dlen >= 8)) {
+ icmphdr_t *orgicmp;
+
+ /*
+ * XXX - what if this is bogus hl and we go off the end ?
+ * In this case, nat_icmperrorlookup() will have returned NULL.
+ */
+ orgicmp = (icmphdr_t *)dp;
+
+ if (nat->nat_dir == NAT_OUTBOUND) {
+ if (orgicmp->icmp_id != nat->nat_inport) {
+
+ /*
+ * Fix ICMP checksum (of the offening ICMP
+ * query packet) to compensate the change
+ * in the ICMP id of the offending ICMP
+ * packet.
+ *
+ * Since you modify orgicmp->icmp_id with
+ * a delta (say x) and you compensate that
+ * in origicmp->icmp_cksum with a delta
+ * minus x, you don't have to adjust the
+ * overall icmp->icmp_cksum
+ */
+ sum1 = ntohs(orgicmp->icmp_id);
+ sum2 = ntohs(nat->nat_inport);
+ CALC_SUMD(sum1, sum2, sumd);
+ orgicmp->icmp_id = nat->nat_inport;
+ fix_datacksum(&orgicmp->icmp_cksum, sumd);
+ }
+ } /* nat_dir == NAT_INBOUND is impossible for icmp queries */
+ }
+ return nat;
+}
+
+
+/*
+ * NB: these lookups don't lock access to the list, it assumed that it has
+ * already been done!
+ */
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_inlookup */
+/* Returns: nat_t* - NULL == no match, */
+/* else pointer to matching NAT entry */
+/* Parameters: fin(I) - pointer to packet information */
+/* flags(I) - NAT flags for this packet */
+/* p(I) - protocol for this packet */
+/* src(I) - source IP address */
+/* mapdst(I) - destination IP address */
+/* */
+/* Lookup a nat entry based on the mapped destination ip address/port and */
+/* real source address/port. We use this lookup when receiving a packet, */
+/* we're looking for a table entry, based on the destination address. */
+/* */
+/* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */
+/* */
+/* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */
+/* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */
+/* */
+/* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */
+/* the packet is of said protocol */
+/* ------------------------------------------------------------------------ */
+nat_t *nat_inlookup(fin, flags, p, src, mapdst)
+fr_info_t *fin;
+u_int flags, p;
+struct in_addr src , mapdst;
+{
+ u_short sport, dport;
+ ipnat_t *ipn;
+ u_int sflags;
+ nat_t *nat;
+ int nflags;
+ u_32_t dst;
+ void *ifp;
+ u_int hv;
+
+ if (fin != NULL)
+ ifp = fin->fin_ifp;
+ else
+ ifp = NULL;
+ sport = 0;
+ dport = 0;
+ dst = mapdst.s_addr;
+ sflags = flags & NAT_TCPUDPICMP;
+
+ switch (p)
+ {
+ case IPPROTO_TCP :
+ case IPPROTO_UDP :
+ sport = htons(fin->fin_data[0]);
+ dport = htons(fin->fin_data[1]);
+ break;
+ case IPPROTO_ICMP :
+ if (flags & IPN_ICMPERR)
+ sport = fin->fin_data[1];
+ else
+ dport = fin->fin_data[1];
+ break;
+ default :
+ break;
+ }
+
+
+ if ((flags & SI_WILDP) != 0)
+ goto find_in_wild_ports;
+
+ hv = NAT_HASH_FN(dst, dport, 0xffffffff);
+ hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
+ nat = nat_table[1][hv];
+ for (; nat; nat = nat->nat_hnext[1]) {
+ nflags = nat->nat_flags;
+
+ if (ifp != NULL) {
+ if (nat->nat_dir == NAT_REDIRECT) {
+ if (ifp != nat->nat_ifps[0])
+ continue;
+ } else {
+ if (ifp != nat->nat_ifps[1])
+ continue;
+ }
+ }
+
+ if (nat->nat_oip.s_addr == src.s_addr &&
+ nat->nat_outip.s_addr == dst &&
+ (((p == 0) &&
+ (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
+ || (p == nat->nat_p))) {
+ switch (p)
+ {
+#if 0
+ case IPPROTO_GRE :
+ if (nat->nat_call[1] != fin->fin_data[0])
+ continue;
+ break;
+#endif
+ case IPPROTO_ICMP :
+ if ((flags & IPN_ICMPERR) != 0) {
+ if (nat->nat_outport != sport)
+ continue;
+ } else {
+ if (nat->nat_outport != dport)
+ continue;
+ }
+ break;
+ case IPPROTO_TCP :
+ case IPPROTO_UDP :
+ if (nat->nat_oport != sport)
+ continue;
+ if (nat->nat_outport != dport)
+ continue;
+ break;
+ default :
+ break;
+ }
+
+ ipn = nat->nat_ptr;
+ if ((ipn != NULL) && (nat->nat_aps != NULL))
+ if (appr_match(fin, nat) != 0)
+ continue;
+ return nat;
+ }
+ }
+
+ /*
+ * So if we didn't find it but there are wildcard members in the hash
+ * table, go back and look for them. We do this search and update here
+ * because it is modifying the NAT table and we want to do this only
+ * for the first packet that matches. The exception, of course, is
+ * for "dummy" (FI_IGNORE) lookups.
+ */
+find_in_wild_ports:
+ if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
+ return NULL;
+ if (nat_stats.ns_wilds == 0)
+ return NULL;
+
+ RWLOCK_EXIT(&ipf_nat);
+
+ hv = NAT_HASH_FN(dst, 0, 0xffffffff);
+ hv = NAT_HASH_FN(src.s_addr, hv, ipf_nattable_sz);
+
+ WRITE_ENTER(&ipf_nat);
+
+ nat = nat_table[1][hv];
+ for (; nat; nat = nat->nat_hnext[1]) {
+ if (ifp != NULL) {
+ if (nat->nat_dir == NAT_REDIRECT) {
+ if (ifp != nat->nat_ifps[0])
+ continue;
+ } else {
+ if (ifp != nat->nat_ifps[1])
+ continue;
+ }
+ }
+
+ if (nat->nat_p != fin->fin_p)
+ continue;
+ if (nat->nat_oip.s_addr != src.s_addr ||
+ nat->nat_outip.s_addr != dst)
+ continue;
+
+ nflags = nat->nat_flags;
+ if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
+ continue;
+
+ if (nat_wildok(nat, (int)sport, (int)dport, nflags,
+ NAT_INBOUND) == 1) {
+ if ((fin->fin_flx & FI_IGNORE) != 0)
+ break;
+ if ((nflags & SI_CLONE) != 0) {
+ nat = fr_natclone(fin, nat);
+ if (nat == NULL)
+ break;
+ } else {
+ MUTEX_ENTER(&ipf_nat_new);
+ nat_stats.ns_wilds--;
+ MUTEX_EXIT(&ipf_nat_new);
+ }
+ nat->nat_oport = sport;
+ nat->nat_outport = dport;
+ nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
+ nat_tabmove(nat);
+ break;
+ }
+ }
+
+ MUTEX_DOWNGRADE(&ipf_nat);
+
+ return nat;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_tabmove */
+/* Returns: Nil */
+/* Parameters: nat(I) - pointer to NAT structure */
+/* Write Lock: ipf_nat */
+/* */
+/* This function is only called for TCP/UDP NAT table entries where the */
+/* original was placed in the table without hashing on the ports and we now */
+/* want to include hashing on port numbers. */
+/* ------------------------------------------------------------------------ */
+static void nat_tabmove(nat)
+nat_t *nat;
+{
+ nat_t **natp;
+ u_int hv;
+
+ if (nat->nat_flags & SI_CLONE)
+ return;
+
+ /*
+ * Remove the NAT entry from the old location
+ */
+ if (nat->nat_hnext[0])
+ nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
+ *nat->nat_phnext[0] = nat->nat_hnext[0];
+ nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
+
+ if (nat->nat_hnext[1])
+ nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
+ *nat->nat_phnext[1] = nat->nat_hnext[1];
+ nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
+
+ /*
+ * Add into the NAT table in the new position
+ */
+ hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
+ hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
+ ipf_nattable_sz);
+ nat->nat_hv[0] = hv;
+ natp = &nat_table[0][hv];
+ if (*natp)
+ (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
+ nat->nat_phnext[0] = natp;
+ nat->nat_hnext[0] = *natp;
+ *natp = nat;
+ nat_stats.ns_bucketlen[0][hv]++;
+
+ hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
+ hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
+ ipf_nattable_sz);
+ nat->nat_hv[1] = hv;
+ natp = &nat_table[1][hv];
+ if (*natp)
+ (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
+ nat->nat_phnext[1] = natp;
+ nat->nat_hnext[1] = *natp;
+ *natp = nat;
+ nat_stats.ns_bucketlen[1][hv]++;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_outlookup */
+/* Returns: nat_t* - NULL == no match, */
+/* else pointer to matching NAT entry */
+/* Parameters: fin(I) - pointer to packet information */
+/* flags(I) - NAT flags for this packet */
+/* p(I) - protocol for this packet */
+/* src(I) - source IP address */
+/* dst(I) - destination IP address */
+/* rw(I) - 1 == write lock on ipf_nat held, 0 == read lock. */
+/* */
+/* Lookup a nat entry based on the source 'real' ip address/port and */
+/* destination address/port. We use this lookup when sending a packet out, */
+/* we're looking for a table entry, based on the source address. */
+/* */
+/* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */
+/* */
+/* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */
+/* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */
+/* */
+/* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */
+/* the packet is of said protocol */
+/* ------------------------------------------------------------------------ */
+nat_t *nat_outlookup(fin, flags, p, src, dst)
+fr_info_t *fin;
+u_int flags, p;
+struct in_addr src , dst;
+{
+ u_short sport, dport;
+ u_int sflags;
+ ipnat_t *ipn;
+ u_32_t srcip;
+ nat_t *nat;
+ int nflags;
+ void *ifp;
+ u_int hv;
+
+ ifp = fin->fin_ifp;
+ srcip = src.s_addr;
+ sflags = flags & IPN_TCPUDPICMP;
+ sport = 0;
+ dport = 0;
+
+ switch (p)
+ {
+ case IPPROTO_TCP :
+ case IPPROTO_UDP :
+ sport = htons(fin->fin_data[0]);
+ dport = htons(fin->fin_data[1]);
+ break;
+ case IPPROTO_ICMP :
+ if (flags & IPN_ICMPERR)
+ sport = fin->fin_data[1];
+ else
+ dport = fin->fin_data[1];
+ break;
+ default :
+ break;
+ }
+
+ if ((flags & SI_WILDP) != 0)
+ goto find_out_wild_ports;
+
+ hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
+ hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
+ nat = nat_table[0][hv];
+ for (; nat; nat = nat->nat_hnext[0]) {
+ nflags = nat->nat_flags;
+
+ if (ifp != NULL) {
+ if (nat->nat_dir == NAT_REDIRECT) {
+ if (ifp != nat->nat_ifps[1])
+ continue;
+ } else {
+ if (ifp != nat->nat_ifps[0])
+ continue;
+ }
+ }
+
+ if (nat->nat_inip.s_addr == srcip &&
+ nat->nat_oip.s_addr == dst.s_addr &&
+ (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
+ || (p == nat->nat_p))) {
+ switch (p)
+ {
+#if 0
+ case IPPROTO_GRE :
+ if (nat->nat_call[1] != fin->fin_data[0])
+ continue;
+ break;
+#endif
+ case IPPROTO_TCP :
+ case IPPROTO_UDP :
+ if (nat->nat_oport != dport)
+ continue;
+ if (nat->nat_inport != sport)
+ continue;
+ break;
+ default :
+ break;
+ }
+
+ ipn = nat->nat_ptr;
+ if ((ipn != NULL) && (nat->nat_aps != NULL))
+ if (appr_match(fin, nat) != 0)
+ continue;
+ return nat;
+ }
+ }
+
+ /*
+ * So if we didn't find it but there are wildcard members in the hash
+ * table, go back and look for them. We do this search and update here
+ * because it is modifying the NAT table and we want to do this only
+ * for the first packet that matches. The exception, of course, is
+ * for "dummy" (FI_IGNORE) lookups.
+ */
+find_out_wild_ports:
+ if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
+ return NULL;
+ if (nat_stats.ns_wilds == 0)
+ return NULL;
+
+ RWLOCK_EXIT(&ipf_nat);
+
+ hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
+ hv = NAT_HASH_FN(dst.s_addr, hv, ipf_nattable_sz);
+
+ WRITE_ENTER(&ipf_nat);
+
+ nat = nat_table[0][hv];
+ for (; nat; nat = nat->nat_hnext[0]) {
+ if (ifp != NULL) {
+ if (nat->nat_dir == NAT_REDIRECT) {
+ if (ifp != nat->nat_ifps[1])
+ continue;
+ } else {
+ if (ifp != nat->nat_ifps[0])
+ continue;
+ }
+ }
+
+ if (nat->nat_p != fin->fin_p)
+ continue;
+ if ((nat->nat_inip.s_addr != srcip) ||
+ (nat->nat_oip.s_addr != dst.s_addr))
+ continue;
+
+ nflags = nat->nat_flags;
+ if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
+ continue;
+
+ if (nat_wildok(nat, (int)sport, (int)dport, nflags,
+ NAT_OUTBOUND) == 1) {
+ if ((fin->fin_flx & FI_IGNORE) != 0)
+ break;
+ if ((nflags & SI_CLONE) != 0) {
+ nat = fr_natclone(fin, nat);
+ if (nat == NULL)
+ break;
+ } else {
+ MUTEX_ENTER(&ipf_nat_new);
+ nat_stats.ns_wilds--;
+ MUTEX_EXIT(&ipf_nat_new);
+ }
+ nat->nat_inport = sport;
+ nat->nat_oport = dport;
+ if (nat->nat_outport == 0)
+ nat->nat_outport = sport;
+ nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
+ nat_tabmove(nat);
+ break;
+ }
+ }
+
+ MUTEX_DOWNGRADE(&ipf_nat);
+
+ return nat;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_lookupredir */
+/* Returns: nat_t* - NULL == no match, */
+/* else pointer to matching NAT entry */
+/* Parameters: np(I) - pointer to description of packet to find NAT table */
+/* entry for. */
+/* */
+/* Lookup the NAT tables to search for a matching redirect */
+/* ------------------------------------------------------------------------ */
+nat_t *nat_lookupredir(np)
+natlookup_t *np;
+{
+ fr_info_t fi;
+ nat_t *nat;
+
+ bzero((char *)&fi, sizeof(fi));
+ if (np->nl_flags & IPN_IN) {
+ fi.fin_data[0] = ntohs(np->nl_realport);
+ fi.fin_data[1] = ntohs(np->nl_outport);
+ } else {
+ fi.fin_data[0] = ntohs(np->nl_inport);
+ fi.fin_data[1] = ntohs(np->nl_outport);
+ }
+ if (np->nl_flags & IPN_TCP)
+ fi.fin_p = IPPROTO_TCP;
+ else if (np->nl_flags & IPN_UDP)
+ fi.fin_p = IPPROTO_UDP;
+ else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
+ fi.fin_p = IPPROTO_ICMP;
+
+ /*
+ * We can do two sorts of lookups:
+ * - IPN_IN: we have the `real' and `out' address, look for `in'.
+ * - default: we have the `in' and `out' address, look for `real'.
+ */
+ if (np->nl_flags & IPN_IN) {
+ if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
+ np->nl_realip, np->nl_outip))) {
+ np->nl_inip = nat->nat_inip;
+ np->nl_inport = nat->nat_inport;
+ }
+ } else {
+ /*
+ * If nl_inip is non null, this is a lookup based on the real
+ * ip address. Else, we use the fake.
+ */
+ if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
+ np->nl_inip, np->nl_outip))) {
+
+ if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
+ fr_info_t fin;
+ bzero((char *)&fin, sizeof(fin));
+ fin.fin_p = nat->nat_p;
+ fin.fin_data[0] = ntohs(nat->nat_outport);
+ fin.fin_data[1] = ntohs(nat->nat_oport);
+ if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
+ nat->nat_outip,
+ nat->nat_oip) != NULL) {
+ np->nl_flags &= ~IPN_FINDFORWARD;
+ }
+ }
+
+ np->nl_realip = nat->nat_outip;
+ np->nl_realport = nat->nat_outport;
+ }
+ }
+
+ return nat;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_match */
+/* Returns: int - 0 == no match, 1 == match */
+/* Parameters: fin(I) - pointer to packet information */
+/* np(I) - pointer to NAT rule */
+/* */
+/* Pull the matching of a packet against a NAT rule out of that complex */
+/* loop inside fr_checknatin() and lay it out properly in its own function. */
+/* ------------------------------------------------------------------------ */
+static int nat_match(fin, np)
+fr_info_t *fin;
+ipnat_t *np;
+{
+ frtuc_t *ft;
+
+ if (fin->fin_v != 4)
+ return 0;
+
+ if (np->in_p && fin->fin_p != np->in_p)
+ return 0;
+
+ if (fin->fin_out) {
+ if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
+ return 0;
+ if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
+ ^ ((np->in_flags & IPN_NOTSRC) != 0))
+ return 0;
+ if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
+ ^ ((np->in_flags & IPN_NOTDST) != 0))
+ return 0;
+ } else {
+ if (!(np->in_redir & NAT_REDIRECT))
+ return 0;
+ if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
+ ^ ((np->in_flags & IPN_NOTSRC) != 0))
+ return 0;
+ if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
+ ^ ((np->in_flags & IPN_NOTDST) != 0))
+ return 0;
+ }
+
+ ft = &np->in_tuc;
+ if (!(fin->fin_flx & FI_TCPUDP) ||
+ (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
+ if (ft->ftu_scmp || ft->ftu_dcmp)
+ return 0;
+ return 1;
+ }
+
+ return fr_tcpudpchk(fin, ft);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_update */
+/* Returns: Nil */
+/* Parameters: nat(I) - pointer to NAT structure */
+/* np(I) - pointer to NAT rule */
+/* */
+/* Updates the lifetime of a NAT table entry for non-TCP packets. Must be */
+/* called with fin_rev updated - i.e. after calling nat_proto(). */
+/* ------------------------------------------------------------------------ */
+void nat_update(fin, nat, np)
+fr_info_t *fin;
+nat_t *nat;
+ipnat_t *np;
+{
+ ipftq_t *ifq, *ifq2;
+ ipftqent_t *tqe;
+
+ MUTEX_ENTER(&nat->nat_lock);
+ tqe = &nat->nat_tqe;
+ ifq = tqe->tqe_ifq;
+
+ /*
+ * We allow over-riding of NAT timeouts from NAT rules, even for
+ * TCP, however, if it is TCP and there is no rule timeout set,
+ * then do not update the timeout here.
+ */
+ if (np != NULL)
+ ifq2 = np->in_tqehead[fin->fin_rev];
+ else
+ ifq2 = NULL;
+
+ if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
+ (void) fr_tcp_age(&nat->nat_tqe, fin, nat_tqb, 0);
+ } else {
+ if (ifq2 == NULL) {
+ if (nat->nat_p == IPPROTO_UDP)
+ ifq2 = &nat_udptq;
+ else if (nat->nat_p == IPPROTO_ICMP)
+ ifq2 = &nat_icmptq;
+ else
+ ifq2 = &nat_iptq;
+ }
+
+ fr_movequeue(tqe, ifq, ifq2);
+ }
+ MUTEX_EXIT(&nat->nat_lock);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_checknatout */
+/* Returns: int - -1 == packet failed NAT checks so block it, */
+/* 0 == no packet translation occurred, */
+/* 1 == packet was successfully translated. */
+/* Parameters: fin(I) - pointer to packet information */
+/* passp(I) - pointer to filtering result flags */
+/* */
+/* Check to see if an outcoming packet should be changed. ICMP packets are */
+/* first checked to see if they match an existing entry (if an error), */
+/* otherwise a search of the current NAT table is made. If neither results */
+/* in a match then a search for a matching NAT rule is made. Create a new */
+/* NAT entry if a we matched a NAT rule. Lastly, actually change the */
+/* packet header(s) as required. */
+/* ------------------------------------------------------------------------ */
+int fr_checknatout(fin, passp)
+fr_info_t *fin;
+u_32_t *passp;
+{
+ struct ifnet *ifp, *sifp;
+ icmphdr_t *icmp = NULL;
+ tcphdr_t *tcp = NULL;
+ int rval, natfailed;
+ ipnat_t *np = NULL;
+ u_int nflags = 0;
+ u_32_t ipa, iph;
+ int natadd = 1;
+ frentry_t *fr;
+ nat_t *nat;
+
+ if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
+ return 0;
+
+ natfailed = 0;
+ fr = fin->fin_fr;
+ sifp = fin->fin_ifp;
+ if ((fr != NULL) && !(fr->fr_flags & FR_DUP) &&
+ fr->fr_tif.fd_ifp && fr->fr_tif.fd_ifp != (void *)-1)
+ fin->fin_ifp = fr->fr_tif.fd_ifp;
+ ifp = fin->fin_ifp;
+
+ if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
+ switch (fin->fin_p)
+ {
+ case IPPROTO_TCP :
+ nflags = IPN_TCP;
+ break;
+ case IPPROTO_UDP :
+ nflags = IPN_UDP;
+ break;
+ case IPPROTO_ICMP :
+ icmp = fin->fin_dp;
+
+ /*
+ * This is an incoming packet, so the destination is
+ * the icmp_id and the source port equals 0
+ */
+ if (nat_icmpquerytype4(icmp->icmp_type))
+ nflags = IPN_ICMPQUERY;
+ break;
+ default :
+ break;
+ }
+
+ if ((nflags & IPN_TCPUDP))
+ tcp = fin->fin_dp;
+ }
+
+ ipa = fin->fin_saddr;
+
+ READ_ENTER(&ipf_nat);
+
+ if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
+ (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
+ /*EMPTY*/;
+ else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
+ natadd = 0;
+ else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
+ fin->fin_src, fin->fin_dst))) {
+ nflags = nat->nat_flags;
+ } else {
+ u_32_t hv, msk, nmsk;
+
+ /*
+ * If there is no current entry in the nat table for this IP#,
+ * create one for it (if there is a matching rule).
+ */
+ RWLOCK_EXIT(&ipf_nat);
+ msk = 0xffffffff;
+ nmsk = nat_masks;
+ WRITE_ENTER(&ipf_nat);
+maskloop:
+ iph = ipa & htonl(msk);
+ hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
+ for (np = nat_rules[hv]; np; np = np->in_mnext)
+ {
+ if ((np->in_ifps[0] && (np->in_ifps[0] != ifp)))
+ continue;
+ if (np->in_v != fin->fin_v)
+ continue;
+ if (np->in_p && (np->in_p != fin->fin_p))
+ continue;
+ if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
+ continue;
+ if (np->in_flags & IPN_FILTER) {
+ if (!nat_match(fin, np))
+ continue;
+ } else if ((ipa & np->in_inmsk) != np->in_inip)
+ continue;
+
+ if ((fr != NULL) &&
+ !fr_matchtag(&np->in_tag, &fr->fr_nattag))
+ continue;
+
+ if (*np->in_plabel != '\0') {
+ if (((np->in_flags & IPN_FILTER) == 0) &&
+ (np->in_dport != tcp->th_dport))
+ continue;
+ if (appr_ok(fin, tcp, np) == 0)
+ continue;
+ }
+
+ if ((nat = nat_new(fin, np, NULL, nflags,
+ NAT_OUTBOUND))) {
+ np->in_hits++;
+ break;
+ } else
+ natfailed = -1;
+ }
+ if ((np == NULL) && (nmsk != 0)) {
+ while (nmsk) {
+ msk <<= 1;
+ if (nmsk & 0x80000000)
+ break;
+ nmsk <<= 1;
+ }
+ if (nmsk != 0) {
+ nmsk <<= 1;
+ goto maskloop;
+ }
+ }
+ MUTEX_DOWNGRADE(&ipf_nat);
+ }
+
+ if (nat != NULL) {
+ rval = fr_natout(fin, nat, natadd, nflags);
+ if (rval == 1) {
+ MUTEX_ENTER(&nat->nat_lock);
+ nat->nat_ref++;
+ MUTEX_EXIT(&nat->nat_lock);
+ fin->fin_nat = nat;
+ }
+ } else
+ rval = natfailed;
+ RWLOCK_EXIT(&ipf_nat);
+
+ if (rval == -1) {
+ if (passp != NULL)
+ *passp = FR_BLOCK;
+ fin->fin_flx |= FI_BADNAT;
+ }
+ fin->fin_ifp = sifp;
+ return rval;
+}
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_natout */
+/* Returns: int - -1 == packet failed NAT checks so block it, */
+/* 1 == packet was successfully translated. */
+/* Parameters: fin(I) - pointer to packet information */
+/* nat(I) - pointer to NAT structure */
+/* natadd(I) - flag indicating if it is safe to add frag cache */
+/* nflags(I) - NAT flags set for this packet */
+/* */
+/* Translate a packet coming "out" on an interface. */
+/* ------------------------------------------------------------------------ */
+int fr_natout(fin, nat, natadd, nflags)
+fr_info_t *fin;
+nat_t *nat;
+int natadd;
+u_32_t nflags;
+{
+ icmphdr_t *icmp;
+ u_short *csump;
+ tcphdr_t *tcp;
+ ipnat_t *np;
+ int i;
+
+ tcp = NULL;
+ icmp = NULL;
+ csump = NULL;
+ np = nat->nat_ptr;
+
+ if ((natadd != 0) && (fin->fin_flx & FI_FRAG) && (np != NULL))
+ (void) fr_nat_newfrag(fin, 0, nat);
+
+ MUTEX_ENTER(&nat->nat_lock);
+ nat->nat_bytes[1] += fin->fin_plen;
+ nat->nat_pkts[1]++;
+ MUTEX_EXIT(&nat->nat_lock);
+
+ /*
+ * Fix up checksums, not by recalculating them, but
+ * simply computing adjustments.
+ * This is only done for STREAMS based IP implementations where the
+ * checksum has already been calculated by IP. In all other cases,
+ * IPFilter is called before the checksum needs calculating so there
+ * is no call to modify whatever is in the header now.
+ */
+ if (fin->fin_v == 4) {
+ if (nflags == IPN_ICMPERR) {
+ u_32_t s1, s2, sumd;
+
+ s1 = LONG_SUM(ntohl(fin->fin_saddr));
+ s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
+ CALC_SUMD(s1, s2, sumd);
+ fix_outcksum(fin, &fin->fin_ip->ip_sum, sumd);
+ }
+#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
+ defined(linux) || defined(BRIDGE_IPF)
+ else {
+ /*
+ * Strictly speaking, this isn't necessary on BSD
+ * kernels because they do checksum calculation after
+ * this code has run BUT if ipfilter is being used
+ * to do NAT as a bridge, that code doesn't exist.
+ */
+ if (nat->nat_dir == NAT_OUTBOUND)
+ fix_outcksum(fin, &fin->fin_ip->ip_sum,
+ nat->nat_ipsumd);
+ else
+ fix_incksum(fin, &fin->fin_ip->ip_sum,
+ nat->nat_ipsumd);
+ }
+#endif
+ }
+
+ if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
+ if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
+ tcp = fin->fin_dp;
+
+ tcp->th_sport = nat->nat_outport;
+ fin->fin_data[0] = ntohs(nat->nat_outport);
+ }
+
+ if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
+ icmp = fin->fin_dp;
+ icmp->icmp_id = nat->nat_outport;
+ }
+
+ csump = nat_proto(fin, nat, nflags);
+ }
+
+ fin->fin_ip->ip_src = nat->nat_outip;
+
+ nat_update(fin, nat, np);
+
+ /*
+ * The above comments do not hold for layer 4 (or higher) checksums...
+ */
+ if (csump != NULL) {
+ if (nat->nat_dir == NAT_OUTBOUND)
+ fix_outcksum(fin, csump, nat->nat_sumd[1]);
+ else
+ fix_incksum(fin, csump, nat->nat_sumd[1]);
+ }
+#ifdef IPFILTER_SYNC
+ ipfsync_update(SMC_NAT, fin, nat->nat_sync);
+#endif
+ /* ------------------------------------------------------------- */
+ /* A few quick notes: */
+ /* Following are test conditions prior to calling the */
+ /* appr_check routine. */
+ /* */
+ /* A NULL tcp indicates a non TCP/UDP packet. When dealing */
+ /* with a redirect rule, we attempt to match the packet's */
+ /* source port against in_dport, otherwise we'd compare the */
+ /* packet's destination. */
+ /* ------------------------------------------------------------- */
+ if ((np != NULL) && (np->in_apr != NULL)) {
+ i = appr_check(fin, nat);
+ if (i == 0)
+ i = 1;
+ } else
+ i = 1;
+ ATOMIC_INCL(nat_stats.ns_mapped[1]);
+ fin->fin_flx |= FI_NATED;
+ return i;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_checknatin */
+/* Returns: int - -1 == packet failed NAT checks so block it, */
+/* 0 == no packet translation occurred, */
+/* 1 == packet was successfully translated. */
+/* Parameters: fin(I) - pointer to packet information */
+/* passp(I) - pointer to filtering result flags */
+/* */
+/* Check to see if an incoming packet should be changed. ICMP packets are */
+/* first checked to see if they match an existing entry (if an error), */
+/* otherwise a search of the current NAT table is made. If neither results */
+/* in a match then a search for a matching NAT rule is made. Create a new */
+/* NAT entry if a we matched a NAT rule. Lastly, actually change the */
+/* packet header(s) as required. */
+/* ------------------------------------------------------------------------ */
+int fr_checknatin(fin, passp)
+fr_info_t *fin;
+u_32_t *passp;
+{
+ u_int nflags, natadd;
+ int rval, natfailed;
+ struct ifnet *ifp;
+ struct in_addr in;
+ icmphdr_t *icmp;
+ tcphdr_t *tcp;
+ u_short dport;
+ ipnat_t *np;
+ nat_t *nat;
+ u_32_t iph;
+
+ if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
+ return 0;
+
+ tcp = NULL;
+ icmp = NULL;
+ dport = 0;
+ natadd = 1;
+ nflags = 0;
+ natfailed = 0;
+ ifp = fin->fin_ifp;
+
+ if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
+ switch (fin->fin_p)
+ {
+ case IPPROTO_TCP :
+ nflags = IPN_TCP;
+ break;
+ case IPPROTO_UDP :
+ nflags = IPN_UDP;
+ break;
+ case IPPROTO_ICMP :
+ icmp = fin->fin_dp;
+
+ /*
+ * This is an incoming packet, so the destination is
+ * the icmp_id and the source port equals 0
+ */
+ if (nat_icmpquerytype4(icmp->icmp_type)) {
+ nflags = IPN_ICMPQUERY;
+ dport = icmp->icmp_id;
+ } break;
+ default :
+ break;
+ }
+
+ if ((nflags & IPN_TCPUDP)) {
+ tcp = fin->fin_dp;
+ dport = tcp->th_dport;
+ }
+ }
+
+ in = fin->fin_dst;
+
+ READ_ENTER(&ipf_nat);
+
+ if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
+ (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
+ /*EMPTY*/;
+ else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
+ natadd = 0;
+ else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
+ fin->fin_src, in))) {
+ nflags = nat->nat_flags;
+ } else {
+ u_32_t hv, msk, rmsk;
+
+ RWLOCK_EXIT(&ipf_nat);
+ rmsk = rdr_masks;
+ msk = 0xffffffff;
+ WRITE_ENTER(&ipf_nat);
+ /*
+ * If there is no current entry in the nat table for this IP#,
+ * create one for it (if there is a matching rule).
+ */
+maskloop:
+ iph = in.s_addr & htonl(msk);
+ hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
+ for (np = rdr_rules[hv]; np; np = np->in_rnext) {
+ if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
+ continue;
+ if (np->in_v != fin->fin_v)
+ continue;
+ if (np->in_p && (np->in_p != fin->fin_p))
+ continue;
+ if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
+ continue;
+ if (np->in_flags & IPN_FILTER) {
+ if (!nat_match(fin, np))
+ continue;
+ } else {
+ if ((in.s_addr & np->in_outmsk) != np->in_outip)
+ continue;
+ if (np->in_pmin &&
+ ((ntohs(np->in_pmax) < ntohs(dport)) ||
+ (ntohs(dport) < ntohs(np->in_pmin))))
+ continue;
+ }
+
+ if (*np->in_plabel != '\0') {
+ if (!appr_ok(fin, tcp, np)) {
+ continue;
+ }
+ }
+
+ nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
+ if (nat != NULL) {
+ np->in_hits++;
+ break;
+ } else
+ natfailed = -1;
+ }
+
+ if ((np == NULL) && (rmsk != 0)) {
+ while (rmsk) {
+ msk <<= 1;
+ if (rmsk & 0x80000000)
+ break;
+ rmsk <<= 1;
+ }
+ if (rmsk != 0) {
+ rmsk <<= 1;
+ goto maskloop;
+ }
+ }
+ MUTEX_DOWNGRADE(&ipf_nat);
+ }
+ if (nat != NULL) {
+ rval = fr_natin(fin, nat, natadd, nflags);
+ if (rval == 1) {
+ MUTEX_ENTER(&nat->nat_lock);
+ nat->nat_ref++;
+ MUTEX_EXIT(&nat->nat_lock);
+ fin->fin_nat = nat;
+ fin->fin_state = nat->nat_state;
+ }
+ } else
+ rval = natfailed;
+ RWLOCK_EXIT(&ipf_nat);
+
+ if (rval == -1) {
+ if (passp != NULL)
+ *passp = FR_BLOCK;
+ fin->fin_flx |= FI_BADNAT;
+ }
+ return rval;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_natin */
+/* Returns: int - -1 == packet failed NAT checks so block it, */
+/* 1 == packet was successfully translated. */
+/* Parameters: fin(I) - pointer to packet information */
+/* nat(I) - pointer to NAT structure */
+/* natadd(I) - flag indicating if it is safe to add frag cache */
+/* nflags(I) - NAT flags set for this packet */
+/* Locks Held: ipf_nat (READ) */
+/* */
+/* Translate a packet coming "in" on an interface. */
+/* ------------------------------------------------------------------------ */
+int fr_natin(fin, nat, natadd, nflags)
+fr_info_t *fin;
+nat_t *nat;
+int natadd;
+u_32_t nflags;
+{
+ icmphdr_t *icmp;
+ u_short *csump;
+ tcphdr_t *tcp;
+ ipnat_t *np;
+ int i;
+
+ tcp = NULL;
+ csump = NULL;
+ np = nat->nat_ptr;
+ fin->fin_fr = nat->nat_fr;
+
+ if (np != NULL) {
+ if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
+ (void) fr_nat_newfrag(fin, 0, nat);
+
+ /* ------------------------------------------------------------- */
+ /* A few quick notes: */
+ /* Following are test conditions prior to calling the */
+ /* appr_check routine. */
+ /* */
+ /* A NULL tcp indicates a non TCP/UDP packet. When dealing */
+ /* with a map rule, we attempt to match the packet's */
+ /* source port against in_dport, otherwise we'd compare the */
+ /* packet's destination. */
+ /* ------------------------------------------------------------- */
+ if (np->in_apr != NULL) {
+ i = appr_check(fin, nat);
+ if (i == -1) {
+ return -1;
+ }
+ }
+ }
+
+#ifdef IPFILTER_SYNC
+ ipfsync_update(SMC_NAT, fin, nat->nat_sync);
+#endif
+
+ MUTEX_ENTER(&nat->nat_lock);
+ nat->nat_bytes[0] += fin->fin_plen;
+ nat->nat_pkts[0]++;
+ MUTEX_EXIT(&nat->nat_lock);
+
+ fin->fin_ip->ip_dst = nat->nat_inip;
+ fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
+ if (nflags & IPN_TCPUDP)
+ tcp = fin->fin_dp;
+
+ /*
+ * Fix up checksums, not by recalculating them, but
+ * simply computing adjustments.
+ * Why only do this for some platforms on inbound packets ?
+ * Because for those that it is done, IP processing is yet to happen
+ * and so the IPv4 header checksum has not yet been evaluated.
+ * Perhaps it should always be done for the benefit of things like
+ * fast forwarding (so that it doesn't need to be recomputed) but with
+ * header checksum offloading, perhaps it is a moot point.
+ */
+#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
+ defined(__osf__) || defined(linux)
+ if (nat->nat_dir == NAT_OUTBOUND)
+ fix_incksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
+ else
+ fix_outcksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
+#endif
+
+ if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
+ if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
+ tcp->th_dport = nat->nat_inport;
+ fin->fin_data[1] = ntohs(nat->nat_inport);
+ }
+
+
+ if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
+ icmp = fin->fin_dp;
+
+ icmp->icmp_id = nat->nat_inport;
+ }
+
+ csump = nat_proto(fin, nat, nflags);
+ }
+
+ nat_update(fin, nat, np);
+
+ /*
+ * The above comments do not hold for layer 4 (or higher) checksums...
+ */
+ if (csump != NULL) {
+ if (nat->nat_dir == NAT_OUTBOUND)
+ fix_incksum(fin, csump, nat->nat_sumd[0]);
+ else
+ fix_outcksum(fin, csump, nat->nat_sumd[0]);
+ }
+ ATOMIC_INCL(nat_stats.ns_mapped[0]);
+ fin->fin_flx |= FI_NATED;
+ if (np != NULL && np->in_tag.ipt_num[0] != 0)
+ fin->fin_nattag = &np->in_tag;
+ return 1;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_proto */
+/* Returns: u_short* - pointer to transport header checksum to update, */
+/* NULL if the transport protocol is not recognised */
+/* as needing a checksum update. */
+/* Parameters: fin(I) - pointer to packet information */
+/* nat(I) - pointer to NAT structure */
+/* nflags(I) - NAT flags set for this packet */
+/* */
+/* Return the pointer to the checksum field for each protocol so understood.*/
+/* If support for making other changes to a protocol header is required, */
+/* that is not strictly 'address' translation, such as clamping the MSS in */
+/* TCP down to a specific value, then do it from here. */
+/* ------------------------------------------------------------------------ */
+u_short *nat_proto(fin, nat, nflags)
+fr_info_t *fin;
+nat_t *nat;
+u_int nflags;
+{
+ icmphdr_t *icmp;
+ u_short *csump;
+ tcphdr_t *tcp;
+ udphdr_t *udp;
+
+ csump = NULL;
+ if (fin->fin_out == 0) {
+ fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
+ } else {
+ fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
+ }
+
+ switch (fin->fin_p)
+ {
+ case IPPROTO_TCP :
+ tcp = fin->fin_dp;
+
+ csump = &tcp->th_sum;
+
+ /*
+ * Do a MSS CLAMPING on a SYN packet,
+ * only deal IPv4 for now.
+ */
+ if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
+ nat_mssclamp(tcp, nat->nat_mssclamp, fin, csump);
+
+ break;
+
+ case IPPROTO_UDP :
+ udp = fin->fin_dp;
+
+ if (udp->uh_sum)
+ csump = &udp->uh_sum;
+ break;
+
+ case IPPROTO_ICMP :
+ icmp = fin->fin_dp;
+
+ if ((nflags & IPN_ICMPQUERY) != 0) {
+ if (icmp->icmp_cksum != 0)
+ csump = &icmp->icmp_cksum;
+ }
+ break;
+ }
+ return csump;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_natunload */
+/* Returns: Nil */
+/* Parameters: Nil */
+/* */
+/* Free all memory used by NAT structures allocated at runtime. */
+/* ------------------------------------------------------------------------ */
+void fr_natunload()
+{
+ ipftq_t *ifq, *ifqnext;
+
+ (void) nat_clearlist();
+ (void) nat_flushtable();
+
+ /*
+ * Proxy timeout queues are not cleaned here because although they
+ * exist on the NAT list, appr_unload is called after fr_natunload
+ * and the proxies actually are responsible for them being created.
+ * Should the proxy timeouts have their own list? There's no real
+ * justification as this is the only complication.
+ */
+ for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
+ ifqnext = ifq->ifq_next;
+ if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
+ (fr_deletetimeoutqueue(ifq) == 0))
+ fr_freetimeoutqueue(ifq);
+ }
+
+ if (nat_table[0] != NULL) {
+ KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
+ nat_table[0] = NULL;
+ }
+ if (nat_table[1] != NULL) {
+ KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
+ nat_table[1] = NULL;
+ }
+ if (nat_rules != NULL) {
+ KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
+ nat_rules = NULL;
+ }
+ if (rdr_rules != NULL) {
+ KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
+ rdr_rules = NULL;
+ }
+ if (maptable != NULL) {
+ KFREES(maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
+ maptable = NULL;
+ }
+ if (nat_stats.ns_bucketlen[0] != NULL) {
+ KFREES(nat_stats.ns_bucketlen[0],
+ sizeof(u_long *) * ipf_nattable_sz);
+ nat_stats.ns_bucketlen[0] = NULL;
+ }
+ if (nat_stats.ns_bucketlen[1] != NULL) {
+ KFREES(nat_stats.ns_bucketlen[1],
+ sizeof(u_long *) * ipf_nattable_sz);
+ nat_stats.ns_bucketlen[1] = NULL;
+ }
+
+ if (fr_nat_maxbucket_reset == 1)
+ fr_nat_maxbucket = 0;
+
+ if (fr_nat_init == 1) {
+ fr_nat_init = 0;
+ fr_sttab_destroy(nat_tqb);
+
+ RW_DESTROY(&ipf_natfrag);
+ RW_DESTROY(&ipf_nat);
+
+ MUTEX_DESTROY(&ipf_nat_new);
+ MUTEX_DESTROY(&ipf_natio);
+
+ MUTEX_DESTROY(&nat_udptq.ifq_lock);
+ MUTEX_DESTROY(&nat_icmptq.ifq_lock);
+ MUTEX_DESTROY(&nat_iptq.ifq_lock);
+ }
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_natexpire */
+/* Returns: Nil */
+/* Parameters: Nil */
+/* */
+/* Check all of the timeout queues for entries at the top which need to be */
+/* expired. */
+/* ------------------------------------------------------------------------ */
+void fr_natexpire()
+{
+ ipftq_t *ifq, *ifqnext;
+ ipftqent_t *tqe, *tqn;
+ int i;
+ SPL_INT(s);
+
+ SPL_NET(s);
+ WRITE_ENTER(&ipf_nat);
+ for (ifq = nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
+ for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
+ if (tqe->tqe_die > fr_ticks)
+ break;
+ tqn = tqe->tqe_next;
+ nat_delete(tqe->tqe_parent, NL_EXPIRE);
+ }
+ }
+
+ for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
+ ifqnext = ifq->ifq_next;
+
+ for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
+ if (tqe->tqe_die > fr_ticks)
+ break;
+ tqn = tqe->tqe_next;
+ nat_delete(tqe->tqe_parent, NL_EXPIRE);
+ }
+ }
+
+ for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
+ ifqnext = ifq->ifq_next;
+
+ if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
+ (ifq->ifq_ref == 0)) {
+ fr_freetimeoutqueue(ifq);
+ }
+ }
+
+ RWLOCK_EXIT(&ipf_nat);
+ SPL_X(s);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_natsync */
+/* Returns: Nil */
+/* Parameters: ifp(I) - pointer to network interface */
+/* */
+/* Walk through all of the currently active NAT sessions, looking for those */
+/* which need to have their translated address updated. */
+/* ------------------------------------------------------------------------ */
+void fr_natsync(ifp)
+void *ifp;
+{
+ u_32_t sum1, sum2, sumd;
+ struct in_addr in;
+ ipnat_t *n;
+ nat_t *nat;
+ void *ifp2;
+ SPL_INT(s);
+
+ if (fr_running <= 0)
+ return;
+
+ /*
+ * Change IP addresses for NAT sessions for any protocol except TCP
+ * since it will break the TCP connection anyway. The only rules
+ * which will get changed are those which are "map ... -> 0/32",
+ * where the rule specifies the address is taken from the interface.
+ */
+ SPL_NET(s);
+ WRITE_ENTER(&ipf_nat);
+
+ if (fr_running <= 0) {
+ RWLOCK_EXIT(&ipf_nat);
+ return;
+ }
+
+ for (nat = nat_instances; nat; nat = nat->nat_next) {
+ if ((nat->nat_flags & IPN_TCP) != 0)
+ continue;
+ n = nat->nat_ptr;
+ if ((n == NULL) ||
+ (n->in_outip != 0) || (n->in_outmsk != 0xffffffff))
+ continue;
+ if (((ifp == NULL) || (ifp == nat->nat_ifps[0]) ||
+ (ifp == nat->nat_ifps[1]))) {
+ nat->nat_ifps[0] = GETIFP(nat->nat_ifnames[0], 4);
+ if (nat->nat_ifnames[1][0] != '\0') {
+ nat->nat_ifps[1] = GETIFP(nat->nat_ifnames[1],
+ 4);
+ } else
+ nat->nat_ifps[1] = nat->nat_ifps[0];
+ ifp2 = nat->nat_ifps[0];
+ if (ifp2 == NULL)
+ continue;
+
+ /*
+ * Change the map-to address to be the same as the
+ * new one.
+ */
+ sum1 = nat->nat_outip.s_addr;
+ if (fr_ifpaddr(4, FRI_NORMAL, ifp2, &in, NULL) != -1)
+ nat->nat_outip = in;
+ sum2 = nat->nat_outip.s_addr;
+
+ if (sum1 == sum2)
+ continue;
+ /*
+ * Readjust the checksum adjustment to take into
+ * account the new IP#.
+ */
+ CALC_SUMD(sum1, sum2, sumd);
+ /* XXX - dont change for TCP when solaris does
+ * hardware checksumming.
+ */
+ sumd += nat->nat_sumd[0];
+ nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
+ nat->nat_sumd[1] = nat->nat_sumd[0];
+ }
+ }
+
+ for (n = nat_list; (n != NULL); n = n->in_next) {
+ if ((ifp == NULL) || (n->in_ifps[0] == ifp))
+ n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
+ if ((ifp == NULL) || (n->in_ifps[1] == ifp))
+ n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
+ }
+ RWLOCK_EXIT(&ipf_nat);
+ SPL_X(s);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_icmpquerytype4 */
+/* Returns: int - 1 == success, 0 == failure */
+/* Parameters: icmptype(I) - ICMP type number */
+/* */
+/* Tests to see if the ICMP type number passed is a query/response type or */
+/* not. */
+/* ------------------------------------------------------------------------ */
+static INLINE int nat_icmpquerytype4(icmptype)
+int icmptype;
+{
+
+ /*
+ * For the ICMP query NAT code, it is essential that both the query
+ * and the reply match on the NAT rule. Because the NAT structure
+ * does not keep track of the icmptype, and a single NAT structure
+ * is used for all icmp types with the same src, dest and id, we
+ * simply define the replies as queries as well. The funny thing is,
+ * altough it seems silly to call a reply a query, this is exactly
+ * as it is defined in the IPv4 specification
+ */
+
+ switch (icmptype)
+ {
+
+ case ICMP_ECHOREPLY:
+ case ICMP_ECHO:
+ /* route aedvertisement/solliciation is currently unsupported: */
+ /* it would require rewriting the ICMP data section */
+ case ICMP_TSTAMP:
+ case ICMP_TSTAMPREPLY:
+ case ICMP_IREQ:
+ case ICMP_IREQREPLY:
+ case ICMP_MASKREQ:
+ case ICMP_MASKREPLY:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_log */
+/* Returns: Nil */
+/* Parameters: nat(I) - pointer to NAT structure */
+/* type(I) - type of log entry to create */
+/* */
+/* Creates a NAT log entry. */
+/* ------------------------------------------------------------------------ */
+void nat_log(nat, type)
+struct nat *nat;
+u_int type;
+{
+#ifdef IPFILTER_LOG
+# ifndef LARGE_NAT
+ struct ipnat *np;
+ int rulen;
+# endif
+ struct natlog natl;
+ void *items[1];
+ size_t sizes[1];
+ int types[1];
+
+ natl.nl_inip = nat->nat_inip;
+ natl.nl_outip = nat->nat_outip;
+ natl.nl_origip = nat->nat_oip;
+ natl.nl_bytes[0] = nat->nat_bytes[0];
+ natl.nl_bytes[1] = nat->nat_bytes[1];
+ natl.nl_pkts[0] = nat->nat_pkts[0];
+ natl.nl_pkts[1] = nat->nat_pkts[1];
+ natl.nl_origport = nat->nat_oport;
+ natl.nl_inport = nat->nat_inport;
+ natl.nl_outport = nat->nat_outport;
+ natl.nl_p = nat->nat_p;
+ natl.nl_type = type;
+ natl.nl_rule = -1;
+# ifndef LARGE_NAT
+ if (nat->nat_ptr != NULL) {
+ for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
+ if (np == nat->nat_ptr) {
+ natl.nl_rule = rulen;
+ break;
+ }
+ }
+# endif
+ items[0] = &natl;
+ sizes[0] = sizeof(natl);
+ types[0] = 0;
+
+ (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
+#endif
+}
+
+
+#if defined(__OpenBSD__)
+/* ------------------------------------------------------------------------ */
+/* Function: nat_ifdetach */
+/* Returns: Nil */
+/* Parameters: ifp(I) - pointer to network interface */
+/* */
+/* Compatibility interface for OpenBSD to trigger the correct updating of */
+/* interface references within IPFilter. */
+/* ------------------------------------------------------------------------ */
+void nat_ifdetach(ifp)
+void *ifp;
+{
+ frsync(ifp);
+ return;
+}
+#endif
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_natderef */
+/* Returns: Nil */
+/* Parameters: isp(I) - pointer to pointer to NAT table entry */
+/* */
+/* Decrement the reference counter for this NAT table entry and free it if */
+/* there are no more things using it. */
+/* ------------------------------------------------------------------------ */
+void fr_natderef(natp)
+nat_t **natp;
+{
+ nat_t *nat;
+
+ nat = *natp;
+ *natp = NULL;
+ WRITE_ENTER(&ipf_nat);
+ nat->nat_ref--;
+ if (nat->nat_ref == 0)
+ nat_delete(nat, NL_EXPIRE);
+ RWLOCK_EXIT(&ipf_nat);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_natclone */
+/* Returns: ipstate_t* - NULL == cloning failed, */
+/* else pointer to new state structure */
+/* Parameters: fin(I) - pointer to packet information */
+/* is(I) - pointer to master state structure */
+/* Write Lock: ipf_nat */
+/* */
+/* Create a "duplcate" state table entry from the master. */
+/* ------------------------------------------------------------------------ */
+static nat_t *fr_natclone(fin, nat)
+fr_info_t *fin;
+nat_t *nat;
+{
+ frentry_t *fr;
+ nat_t *clone;
+ ipnat_t *np;
+
+ KMALLOC(clone, nat_t *);
+ if (clone == NULL)
+ return NULL;
+ bcopy((char *)nat, (char *)clone, sizeof(*clone));
+
+ MUTEX_NUKE(&clone->nat_lock);
+
+ clone->nat_aps = NULL;
+ /*
+ * Initialize all these so that nat_delete() doesn't cause a crash.
+ */
+ clone->nat_tqe.tqe_pnext = NULL;
+ clone->nat_tqe.tqe_next = NULL;
+ clone->nat_tqe.tqe_ifq = NULL;
+ clone->nat_tqe.tqe_parent = clone;
+
+ clone->nat_flags &= ~SI_CLONE;
+ clone->nat_flags |= SI_CLONED;
+
+ if (clone->nat_hm)
+ clone->nat_hm->hm_ref++;
+
+ if (nat_insert(clone, fin->fin_rev) == -1) {
+ KFREE(clone);
+ return NULL;
+ }
+ np = clone->nat_ptr;
+ if (np != NULL) {
+ if (nat_logging)
+ nat_log(clone, (u_int)np->in_redir);
+ np->in_use++;
+ }
+ fr = clone->nat_fr;
+ if (fr != NULL) {
+ MUTEX_ENTER(&fr->fr_lock);
+ fr->fr_ref++;
+ MUTEX_EXIT(&fr->fr_lock);
+ }
+
+ /*
+ * Because the clone is created outside the normal loop of things and
+ * TCP has special needs in terms of state, initialise the timeout
+ * state of the new NAT from here.
+ */
+ if (clone->nat_p == IPPROTO_TCP) {
+ (void) fr_tcp_age(&clone->nat_tqe, fin, nat_tqb,
+ clone->nat_flags);
+ }
+#ifdef IPFILTER_SYNC
+ clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
+#endif
+ if (nat_logging)
+ nat_log(clone, NL_CLONE);
+ return clone;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_wildok */
+/* Returns: int - 1 == packet's ports match wildcards */
+/* 0 == packet's ports don't match wildcards */
+/* Parameters: nat(I) - NAT entry */
+/* sport(I) - source port */
+/* dport(I) - destination port */
+/* flags(I) - wildcard flags */
+/* dir(I) - packet direction */
+/* */
+/* Use NAT entry and packet direction to determine which combination of */
+/* wildcard flags should be used. */
+/* ------------------------------------------------------------------------ */
+static INLINE int nat_wildok(nat, sport, dport, flags, dir)
+nat_t *nat;
+int sport;
+int dport;
+int flags;
+int dir;
+{
+ /*
+ * When called by dir is set to
+ * nat_inlookup NAT_INBOUND (0)
+ * nat_outlookup NAT_OUTBOUND (1)
+ *
+ * We simply combine the packet's direction in dir with the original
+ * "intended" direction of that NAT entry in nat->nat_dir to decide
+ * which combination of wildcard flags to allow.
+ */
+
+ switch ((dir << 1) | nat->nat_dir)
+ {
+ case 3: /* outbound packet / outbound entry */
+ if (((nat->nat_inport == sport) ||
+ (flags & SI_W_SPORT)) &&
+ ((nat->nat_oport == dport) ||
+ (flags & SI_W_DPORT)))
+ return 1;
+ break;
+ case 2: /* outbound packet / inbound entry */
+ if (((nat->nat_outport == sport) ||
+ (flags & SI_W_DPORT)) &&
+ ((nat->nat_oport == dport) ||
+ (flags & SI_W_SPORT)))
+ return 1;
+ break;
+ case 1: /* inbound packet / outbound entry */
+ if (((nat->nat_oport == sport) ||
+ (flags & SI_W_DPORT)) &&
+ ((nat->nat_outport == dport) ||
+ (flags & SI_W_SPORT)))
+ return 1;
+ break;
+ case 0: /* inbound packet / inbound entry */
+ if (((nat->nat_oport == sport) ||
+ (flags & SI_W_SPORT)) &&
+ ((nat->nat_outport == dport) ||
+ (flags & SI_W_DPORT)))
+ return 1;
+ break;
+ default:
+ break;
+ }
+
+ return(0);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: nat_mssclamp */
+/* Returns: Nil */
+/* Parameters: tcp(I) - pointer to TCP header */
+/* maxmss(I) - value to clamp the TCP MSS to */
+/* fin(I) - pointer to packet information */
+/* csump(I) - pointer to TCP checksum */
+/* */
+/* Check for MSS option and clamp it if necessary. If found and changed, */
+/* then the TCP header checksum will be updated to reflect the change in */
+/* the MSS. */
+/* ------------------------------------------------------------------------ */
+static void nat_mssclamp(tcp, maxmss, fin, csump)
+tcphdr_t *tcp;
+u_32_t maxmss;
+fr_info_t *fin;
+u_short *csump;
+{
+ u_char *cp, *ep, opt;
+ int hlen, advance;
+ u_32_t mss, sumd;
+
+ hlen = TCP_OFF(tcp) << 2;
+ if (hlen > sizeof(*tcp)) {
+ cp = (u_char *)tcp + sizeof(*tcp);
+ ep = (u_char *)tcp + hlen;
+
+ while (cp < ep) {
+ opt = cp[0];
+ if (opt == TCPOPT_EOL)
+ break;
+ else if (opt == TCPOPT_NOP) {
+ cp++;
+ continue;
+ }
+
+ if (cp + 1 >= ep)
+ break;
+ advance = cp[1];
+ if ((cp + advance > ep) || (advance <= 0))
+ break;
+ switch (opt)
+ {
+ case TCPOPT_MAXSEG:
+ if (advance != 4)
+ break;
+ mss = cp[2] * 256 + cp[3];
+ if (mss > maxmss) {
+ cp[2] = maxmss / 256;
+ cp[3] = maxmss & 0xff;
+ CALC_SUMD(mss, maxmss, sumd);
+ fix_outcksum(fin, csump, sumd);
+ }
+ break;
+ default:
+ /* ignore unknown options */
+ break;
+ }
+
+ cp += advance;
+ }
+ }
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_setnatqueue */
+/* Returns: Nil */
+/* Parameters: nat(I)- pointer to NAT structure */
+/* rev(I) - forward(0) or reverse(1) direction */
+/* Locks: ipf_nat (read or write) */
+/* */
+/* Put the NAT entry on its default queue entry, using rev as a helped in */
+/* determining which queue it should be placed on. */
+/* ------------------------------------------------------------------------ */
+void fr_setnatqueue(nat, rev)
+nat_t *nat;
+int rev;
+{
+ ipftq_t *oifq, *nifq;
+
+ if (nat->nat_ptr != NULL)
+ nifq = nat->nat_ptr->in_tqehead[rev];
+ else
+ nifq = NULL;
+
+ if (nifq == NULL) {
+ switch (nat->nat_p)
+ {
+ case IPPROTO_UDP :
+ nifq = &nat_udptq;
+ break;
+ case IPPROTO_ICMP :
+ nifq = &nat_icmptq;
+ break;
+ case IPPROTO_TCP :
+ nifq = nat_tqb + nat->nat_tqe.tqe_state[rev];
+ break;
+ default :
+ nifq = &nat_iptq;
+ break;
+ }
+ }
+
+ oifq = nat->nat_tqe.tqe_ifq;
+ /*
+ * If it's currently on a timeout queue, move it from one queue to
+ * another, else put it on the end of the newly determined queue.
+ */
+ if (oifq != NULL)
+ fr_movequeue(&nat->nat_tqe, oifq, nifq);
+ else
+ fr_queueappend(&nat->nat_tqe, nifq, nat);
+ return;
+}
diff --git a/usr/src/uts/common/inet/ipf/ip_pool.c b/usr/src/uts/common/inet/ipf/ip_pool.c
new file mode 100644
index 0000000000..e3aace93df
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/ip_pool.c
@@ -0,0 +1,789 @@
+/*
+ * Copyright (C) 1993-2001, 2003 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#if defined(KERNEL) || defined(_KERNEL)
+# undef KERNEL
+# undef _KERNEL
+# define KERNEL 1
+# define _KERNEL 1
+#endif
+#if defined(__osf__)
+# define _PROTO_NET_H_
+#endif
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/file.h>
+#if !defined(_KERNEL) && !defined(__KERNEL__)
+# include <stdio.h>
+# include <stdlib.h>
+# include <string.h>
+# define _KERNEL
+# ifdef __OpenBSD__
+struct file;
+# endif
+# include <sys/uio.h>
+# undef _KERNEL
+#else
+# include <sys/systm.h>
+# if defined(NetBSD) && (__NetBSD_Version__ >= 104000000)
+# include <sys/proc.h>
+# endif
+#endif
+#include <sys/time.h>
+#if !defined(linux)
+# include <sys/protosw.h>
+#endif
+#include <sys/socket.h>
+#if defined(_KERNEL) && (!defined(__SVR4) && !defined(__svr4__))
+# include <sys/mbuf.h>
+#endif
+#if defined(__SVR4) || defined(__svr4__)
+# include <sys/filio.h>
+# include <sys/byteorder.h>
+# ifdef _KERNEL
+# include <sys/dditypes.h>
+# endif
+# include <sys/stream.h>
+# include <sys/kmem.h>
+#endif
+#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000)
+# include <sys/malloc.h>
+#endif
+
+#if defined(_KERNEL) && (defined(__osf__) || defined(AIX) || \
+ defined(__hpux) || defined(__sgi))
+# ifdef __osf__
+# include <net/radix.h>
+# endif
+# include "radix_ipf_local.h"
+# define _RADIX_H_
+#endif
+#include <net/if.h>
+#include <netinet/in.h>
+
+#include "netinet/ip_compat.h"
+#include "netinet/ip_fil.h"
+#include "netinet/ip_pool.h"
+
+#if defined(IPFILTER_LOOKUP) && defined(_KERNEL) && \
+ ((BSD >= 198911) && !defined(__osf__) && \
+ !defined(__hpux) && !defined(__sgi))
+static int rn_freenode __P((struct radix_node *, void *));
+#endif
+
+/* END OF INCLUDES */
+
+#if !defined(lint)
+static const char sccsid[] = "@(#)ip_fil.c 2.41 6/5/96 (C) 1993-2000 Darren Reed";
+static const char rcsid[] = "@(#)$Id: ip_pool.c,v 2.55.2.14 2005/06/12 07:18:26 darrenr Exp $";
+#endif
+
+#ifdef IPFILTER_LOOKUP
+
+# ifndef RADIX_NODE_HEAD_LOCK
+# define RADIX_NODE_HEAD_LOCK(x) ;
+# endif
+# ifndef RADIX_NODE_HEAD_UNLOCK
+# define RADIX_NODE_HEAD_UNLOCK(x) ;
+# endif
+
+ip_pool_stat_t ipoolstat;
+ipfrwlock_t ip_poolrw;
+
+/*
+ * Binary tree routines from Sedgewick and enhanced to do ranges of addresses.
+ * NOTE: Insertion *MUST* be from greatest range to least for it to work!
+ * These should be replaced, eventually, by something else - most notably a
+ * interval searching method. The important feature is to be able to find
+ * the best match.
+ *
+ * So why not use a radix tree for this? As the first line implies, it
+ * has been written to work with a _range_ of addresses. A range is not
+ * necessarily a match with any given netmask so what we end up dealing
+ * with is an interval tree. Implementations of these are hard to find
+ * and the one herein is far from bug free.
+ *
+ * Sigh, in the end I became convinced that the bugs the code contained did
+ * not make it worthwhile not using radix trees. For now the radix tree from
+ * 4.4 BSD is used, but this is not viewed as a long term solution.
+ */
+ip_pool_t *ip_pool_list[IPL_LOGSIZE] = { NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL };
+
+
+#ifdef TEST_POOL
+void treeprint __P((ip_pool_t *));
+
+int
+main(argc, argv)
+ int argc;
+ char *argv[];
+{
+ addrfamily_t a, b;
+ iplookupop_t op;
+ ip_pool_t *ipo;
+ i6addr_t ip;
+
+ RWLOCK_INIT(&ip_poolrw, "poolrw");
+ ip_pool_init();
+
+ bzero((char *)&a, sizeof(a));
+ bzero((char *)&b, sizeof(b));
+ bzero((char *)&ip, sizeof(ip));
+ bzero((char *)&op, sizeof(op));
+ strcpy(op.iplo_name, "0");
+
+ if (ip_pool_create(&op) == 0)
+ ipo = ip_pool_find(0, "0");
+
+ a.adf_addr.in4.s_addr = 0x0a010203;
+ b.adf_addr.in4.s_addr = 0xffffffff;
+ ip_pool_insert(ipo, &a, &b, 1);
+ ip_pool_insert(ipo, &a, &b, 1);
+
+ a.adf_addr.in4.s_addr = 0x0a000000;
+ b.adf_addr.in4.s_addr = 0xff000000;
+ ip_pool_insert(ipo, &a, &b, 0);
+ ip_pool_insert(ipo, &a, &b, 0);
+
+ a.adf_addr.in4.s_addr = 0x0a010100;
+ b.adf_addr.in4.s_addr = 0xffffff00;
+ ip_pool_insert(ipo, &a, &b, 1);
+ ip_pool_insert(ipo, &a, &b, 1);
+
+ a.adf_addr.in4.s_addr = 0x0a010200;
+ b.adf_addr.in4.s_addr = 0xffffff00;
+ ip_pool_insert(ipo, &a, &b, 0);
+ ip_pool_insert(ipo, &a, &b, 0);
+
+ a.adf_addr.in4.s_addr = 0x0a010000;
+ b.adf_addr.in4.s_addr = 0xffff0000;
+ ip_pool_insert(ipo, &a, &b, 1);
+ ip_pool_insert(ipo, &a, &b, 1);
+
+ a.adf_addr.in4.s_addr = 0x0a01020f;
+ b.adf_addr.in4.s_addr = 0xffffffff;
+ ip_pool_insert(ipo, &a, &b, 1);
+ ip_pool_insert(ipo, &a, &b, 1);
+#ifdef DEBUG_POOL
+treeprint(ipo);
+#endif
+ ip.in4.s_addr = 0x0a00aabb;
+ printf("search(%#x) = %d (0)\n", ip.in4.s_addr,
+ ip_pool_search(ipo, 4, &ip));
+
+ ip.in4.s_addr = 0x0a000001;
+ printf("search(%#x) = %d (0)\n", ip.in4.s_addr,
+ ip_pool_search(ipo, 4, &ip));
+
+ ip.in4.s_addr = 0x0a000101;
+ printf("search(%#x) = %d (0)\n", ip.in4.s_addr,
+ ip_pool_search(ipo, 4, &ip));
+
+ ip.in4.s_addr = 0x0a010001;
+ printf("search(%#x) = %d (1)\n", ip.in4.s_addr,
+ ip_pool_search(ipo, 4, &ip));
+
+ ip.in4.s_addr = 0x0a010101;
+ printf("search(%#x) = %d (1)\n", ip.in4.s_addr,
+ ip_pool_search(ipo, 4, &ip));
+
+ ip.in4.s_addr = 0x0a010201;
+ printf("search(%#x) = %d (0)\n", ip.in4.s_addr,
+ ip_pool_search(ipo, 4, &ip));
+
+ ip.in4.s_addr = 0x0a010203;
+ printf("search(%#x) = %d (1)\n", ip.in4.s_addr,
+ ip_pool_search(ipo, 4, &ip));
+
+ ip.in4.s_addr = 0x0a01020f;
+ printf("search(%#x) = %d (1)\n", ip.in4.s_addr,
+ ip_pool_search(ipo, 4, &ip));
+
+ ip.in4.s_addr = 0x0b00aabb;
+ printf("search(%#x) = %d (-1)\n", ip.in4.s_addr,
+ ip_pool_search(ipo, 4, &ip));
+
+#ifdef DEBUG_POOL
+treeprint(ipo);
+#endif
+
+ ip_pool_fini();
+
+ return 0;
+}
+
+
+void
+treeprint(ipo)
+ip_pool_t *ipo;
+{
+ ip_pool_node_t *c;
+
+ for (c = ipo->ipo_list; c != NULL; c = c->ipn_next)
+ printf("Node %p(%s) (%#x/%#x) = %d hits %lu\n",
+ c, c->ipn_name, c->ipn_addr.adf_addr.in4.s_addr,
+ c->ipn_mask.adf_addr.in4.s_addr,
+ c->ipn_info, c->ipn_hits);
+}
+#endif /* TEST_POOL */
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: ip_pool_init */
+/* Returns: int - 0 = success, else error */
+/* */
+/* Initialise the routing table data structures where required. */
+/* ------------------------------------------------------------------------ */
+int ip_pool_init()
+{
+
+ bzero((char *)&ipoolstat, sizeof(ipoolstat));
+
+#if (!defined(_KERNEL) || (BSD < 199306))
+ rn_init();
+#endif
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: ip_pool_fini */
+/* Returns: int - 0 = success, else error */
+/* Locks: WRITE(ipf_global) */
+/* */
+/* Clean up all the pool data structures allocated and call the cleanup */
+/* function for the radix tree that supports the pools. ip_pool_destroy() is*/
+/* used to delete the pools one by one to ensure they're properly freed up. */
+/* ------------------------------------------------------------------------ */
+void ip_pool_fini()
+{
+ ip_pool_t *p, *q;
+ iplookupop_t op;
+ int i;
+
+ ASSERT(rw_read_locked(&ipf_global.ipf_lk) == 0);
+
+ for (i = 0; i <= IPL_LOGMAX; i++) {
+ for (q = ip_pool_list[i]; (p = q) != NULL; ) {
+ op.iplo_unit = i;
+ (void)strncpy(op.iplo_name, p->ipo_name,
+ sizeof(op.iplo_name));
+ q = p->ipo_next;
+ (void) ip_pool_destroy(&op);
+ }
+ }
+
+#if (!defined(_KERNEL) || (BSD < 199306))
+ rn_fini();
+#endif
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: ip_pool_statistics */
+/* Returns: int - 0 = success, else error */
+/* Parameters: op(I) - pointer to lookup operation arguments */
+/* */
+/* Copy the current statistics out into user space, collecting pool list */
+/* pointers as appropriate for later use. */
+/* ------------------------------------------------------------------------ */
+int ip_pool_statistics(op)
+iplookupop_t *op;
+{
+ ip_pool_stat_t stats;
+ int unit, i, err = 0;
+
+ if (op->iplo_size != sizeof(ipoolstat))
+ return EINVAL;
+
+ bcopy((char *)&ipoolstat, (char *)&stats, sizeof(stats));
+ unit = op->iplo_unit;
+ if (unit == IPL_LOGALL) {
+ for (i = 0; i < IPL_LOGSIZE; i++)
+ stats.ipls_list[i] = ip_pool_list[i];
+ } else if (unit >= 0 && unit < IPL_LOGSIZE) {
+ if (op->iplo_name[0] != '\0')
+ stats.ipls_list[unit] = ip_pool_find(unit,
+ op->iplo_name);
+ else
+ stats.ipls_list[unit] = ip_pool_list[unit];
+ } else
+ err = EINVAL;
+ if (err == 0)
+ err = COPYOUT(&stats, op->iplo_struct, sizeof(stats));
+ return err;
+}
+
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: ip_pool_find */
+/* Returns: int - 0 = success, else error */
+/* Parameters: ipo(I) - pointer to the pool getting the new node. */
+/* */
+/* Find a matching pool inside the collection of pools for a particular */
+/* device, indicated by the unit number. */
+/* ------------------------------------------------------------------------ */
+void *ip_pool_find(unit, name)
+int unit;
+char *name;
+{
+ ip_pool_t *p;
+
+ for (p = ip_pool_list[unit]; p != NULL; p = p->ipo_next)
+ if (strncmp(p->ipo_name, name, sizeof(p->ipo_name)) == 0)
+ break;
+ return p;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: ip_pool_findeq */
+/* Returns: int - 0 = success, else error */
+/* Parameters: ipo(I) - pointer to the pool getting the new node. */
+/* addr(I) - pointer to address information to delete */
+/* mask(I) - */
+/* */
+/* Searches for an exact match of an entry in the pool. */
+/* ------------------------------------------------------------------------ */
+ip_pool_node_t *ip_pool_findeq(ipo, addr, mask)
+ip_pool_t *ipo;
+addrfamily_t *addr, *mask;
+{
+ struct radix_node *n;
+ SPL_INT(s);
+
+ SPL_NET(s);
+ RADIX_NODE_HEAD_LOCK(ipo->ipo_head);
+ n = ipo->ipo_head->rnh_lookup(addr, mask, ipo->ipo_head);
+ RADIX_NODE_HEAD_UNLOCK(ipo->ipo_head);
+ SPL_X(s);
+ return (ip_pool_node_t *)n;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: ip_pool_search */
+/* Returns: int - 0 == +ve match, -1 == error, 1 == -ve/no match */
+/* Parameters: tptr(I) - pointer to the pool to search */
+/* version(I) - IP protocol version (4 or 6) */
+/* dptr(I) - pointer to address information */
+/* */
+/* Search the pool for a given address and return a search result. */
+/* ------------------------------------------------------------------------ */
+int ip_pool_search(tptr, version, dptr)
+void *tptr;
+int version;
+void *dptr;
+{
+ struct radix_node *rn;
+ ip_pool_node_t *m;
+ i6addr_t *addr;
+ addrfamily_t v;
+ ip_pool_t *ipo;
+ int rv;
+
+ ipo = tptr;
+ if (ipo == NULL)
+ return -1;
+
+ rv = 1;
+ m = NULL;
+ addr = (i6addr_t *)dptr;
+ bzero(&v, sizeof(v));
+ v.adf_len = offsetof(addrfamily_t, adf_addr);
+
+ if (version == 4) {
+ v.adf_len += sizeof(addr->in4);
+ v.adf_addr.in4 = addr->in4;
+#ifdef USE_INET6
+ } else if (version == 6) {
+ v.adf_len += sizeof(addr->in6);
+ v.adf_addr.in6 = addr->in6;
+#endif
+ } else
+ return -1;
+
+ READ_ENTER(&ip_poolrw);
+
+ RADIX_NODE_HEAD_LOCK(ipo->ipo_head);
+ rn = ipo->ipo_head->rnh_matchaddr(&v, ipo->ipo_head);
+ RADIX_NODE_HEAD_UNLOCK(ipo->ipo_head);
+
+ if ((rn != NULL) && ((rn->rn_flags & RNF_ROOT) == 0)) {
+ m = (ip_pool_node_t *)rn;
+ ipo->ipo_hits++;
+ m->ipn_hits++;
+ rv = m->ipn_info;
+ }
+ RWLOCK_EXIT(&ip_poolrw);
+ return rv;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: ip_pool_insert */
+/* Returns: int - 0 = success, else error */
+/* Parameters: ipo(I) - pointer to the pool getting the new node. */
+/* addr(I) - IPv4/6 address being added as a node */
+/* mask(I) - IPv4/6 netmask to with the node being added */
+/* info(I) - extra information to store in this node. */
+/* Locks: WRITE(ip_poolrw) */
+/* */
+/* Add another node to the pool given by ipo. The three parameters passed */
+/* in (addr, mask, info) shold all be stored in the node. */
+/* ------------------------------------------------------------------------ */
+int ip_pool_insert(ipo, addr, mask, info)
+ip_pool_t *ipo;
+addrfamily_t *addr, *mask;
+int info;
+{
+ struct radix_node *rn;
+ ip_pool_node_t *x;
+
+ ASSERT(rw_read_locked(&ip_poolrw.ipf_lk) == 0);
+
+ KMALLOC(x, ip_pool_node_t *);
+ if (x == NULL) {
+ return ENOMEM;
+ }
+
+ bzero(x, sizeof(*x));
+
+ x->ipn_info = info;
+ (void)strncpy(x->ipn_name, ipo->ipo_name, sizeof(x->ipn_name));
+
+ bcopy(addr, &x->ipn_addr, sizeof(*addr));
+ x->ipn_addr.adf_len = sizeof(x->ipn_addr);
+ bcopy(mask, &x->ipn_mask, sizeof(*mask));
+ x->ipn_mask.adf_len = sizeof(x->ipn_mask);
+
+ RADIX_NODE_HEAD_LOCK(ipo->ipo_head);
+ rn = ipo->ipo_head->rnh_addaddr(&x->ipn_addr, &x->ipn_mask,
+ ipo->ipo_head, x->ipn_nodes);
+ RADIX_NODE_HEAD_UNLOCK(ipo->ipo_head);
+#ifdef DEBUG_POOL
+ printf("Added %p at %p\n", x, rn);
+#endif
+
+ if (rn == NULL) {
+ KFREE(x);
+ return ENOMEM;
+ }
+
+ x->ipn_next = ipo->ipo_list;
+ x->ipn_pnext = &ipo->ipo_list;
+ if (ipo->ipo_list != NULL)
+ ipo->ipo_list->ipn_pnext = &x->ipn_next;
+ ipo->ipo_list = x;
+
+ ipoolstat.ipls_nodes++;
+
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: ip_pool_create */
+/* Returns: int - 0 = success, else error */
+/* Parameters: op(I) - pointer to iplookup struct with call details */
+/* Locks: WRITE(ip_poolrw) */
+/* */
+/* Creates a new group according to the paramters passed in via the */
+/* iplookupop structure. Does not check to see if the group already exists */
+/* when being inserted - assume this has already been done. If the pool is */
+/* marked as being anonymous, give it a new, unique, identifier. Call any */
+/* other functions required to initialise the structure. */
+/* ------------------------------------------------------------------------ */
+int ip_pool_create(op)
+iplookupop_t *op;
+{
+ char name[FR_GROUPLEN];
+ int poolnum, unit;
+ ip_pool_t *h;
+
+ ASSERT(rw_read_locked(&ip_poolrw.ipf_lk) == 0);
+
+ KMALLOC(h, ip_pool_t *);
+ if (h == NULL)
+ return ENOMEM;
+ bzero(h, sizeof(*h));
+
+ if (rn_inithead((void **)&h->ipo_head,
+ offsetof(addrfamily_t, adf_addr) << 3) == 0) {
+ KFREE(h);
+ return ENOMEM;
+ }
+
+ unit = op->iplo_unit;
+
+ if ((op->iplo_arg & IPOOL_ANON) != 0) {
+ ip_pool_t *p;
+
+ poolnum = IPOOL_ANON;
+
+#if defined(SNPRINTF) && defined(_KERNEL)
+ (void)SNPRINTF(name, sizeof(name), "%x", poolnum);
+#else
+ (void)sprintf(name, "%x", poolnum);
+#endif
+
+ for (p = ip_pool_list[unit]; p != NULL; ) {
+ if (strncmp(name, p->ipo_name,
+ sizeof(p->ipo_name)) == 0) {
+ poolnum++;
+#if defined(SNPRINTF) && defined(_KERNEL)
+ (void)SNPRINTF(name, sizeof(name), "%x", poolnum);
+#else
+ (void)sprintf(name, "%x", poolnum);
+#endif
+ p = ip_pool_list[unit];
+ } else
+ p = p->ipo_next;
+ }
+
+ (void)strncpy(h->ipo_name, name, sizeof(h->ipo_name));
+ } else {
+ (void) strncpy(h->ipo_name, op->iplo_name, sizeof(h->ipo_name));
+ }
+
+ h->ipo_ref = 1;
+ h->ipo_list = NULL;
+ h->ipo_unit = unit;
+ h->ipo_next = ip_pool_list[unit];
+ if (ip_pool_list[unit] != NULL)
+ ip_pool_list[unit]->ipo_pnext = &h->ipo_next;
+ h->ipo_pnext = &ip_pool_list[unit];
+ ip_pool_list[unit] = h;
+
+ ipoolstat.ipls_pools++;
+
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: ip_pool_remove */
+/* Returns: int - 0 = success, else error */
+/* Parameters: ipo(I) - pointer to the pool to remove the node from. */
+/* ipe(I) - address being deleted as a node */
+/* Locks: WRITE(ip_poolrw) */
+/* */
+/* Add another node to the pool given by ipo. The three parameters passed */
+/* in (addr, mask, info) shold all be stored in the node. */
+/* ------------------------------------------------------------------------ */
+int ip_pool_remove(ipo, ipe)
+ip_pool_t *ipo;
+ip_pool_node_t *ipe;
+{
+ ip_pool_node_t **ipp, *n;
+
+ ASSERT(rw_read_locked(&ip_poolrw.ipf_lk) == 0);
+
+ for (ipp = &ipo->ipo_list; (n = *ipp) != NULL; ipp = &n->ipn_next) {
+ if (ipe == n) {
+ *n->ipn_pnext = n->ipn_next;
+ if (n->ipn_next)
+ n->ipn_next->ipn_pnext = n->ipn_pnext;
+ break;
+ }
+ }
+
+ if (n == NULL)
+ return ENOENT;
+
+ RADIX_NODE_HEAD_LOCK(ipo->ipo_head);
+ ipo->ipo_head->rnh_deladdr(&n->ipn_addr, &n->ipn_mask,
+ ipo->ipo_head);
+ RADIX_NODE_HEAD_UNLOCK(ipo->ipo_head);
+ KFREE(n);
+
+ ipoolstat.ipls_nodes--;
+
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: ip_pool_destroy */
+/* Returns: int - 0 = success, else error */
+/* Parameters: op(I) - information about the pool to remove */
+/* Locks: WRITE(ip_poolrw) or WRITE(ipf_global) */
+/* */
+/* Search for a pool using paramters passed in and if it's not otherwise */
+/* busy, free it. */
+/* */
+/* NOTE: Because this function is called out of ipldetach() where ip_poolrw */
+/* may not be initialised, we can't use an ASSERT to enforce the locking */
+/* assertion that one of the two (ip_poolrw,ipf_global) is held. */
+/* ------------------------------------------------------------------------ */
+int ip_pool_destroy(op)
+iplookupop_t *op;
+{
+ ip_pool_t *ipo;
+
+ ipo = ip_pool_find(op->iplo_unit, op->iplo_name);
+ if (ipo == NULL)
+ return ESRCH;
+
+ if (ipo->ipo_ref != 1)
+ return EBUSY;
+
+ ip_pool_free(ipo);
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: ip_pool_flush */
+/* Returns: int - number of pools deleted */
+/* Parameters: fp(I) - which pool(s) to flush */
+/* Locks: WRITE(ip_poolrw) or WRITE(ipf_global) */
+/* */
+/* Free all pools associated with the device that matches the unit number */
+/* passed in with operation. */
+/* */
+/* NOTE: Because this function is called out of ipldetach() where ip_poolrw */
+/* may not be initialised, we can't use an ASSERT to enforce the locking */
+/* assertion that one of the two (ip_poolrw,ipf_global) is held. */
+/* ------------------------------------------------------------------------ */
+int ip_pool_flush(fp)
+iplookupflush_t *fp;
+{
+ int i, num = 0, unit, err;
+ ip_pool_t *p, *q;
+ iplookupop_t op;
+
+ unit = fp->iplf_unit;
+
+ for (i = 0; i <= IPL_LOGMAX; i++) {
+ if (unit != IPLT_ALL && i != unit)
+ continue;
+ for (q = ip_pool_list[i]; (p = q) != NULL; ) {
+ op.iplo_unit = i;
+ (void)strncpy(op.iplo_name, p->ipo_name,
+ sizeof(op.iplo_name));
+ q = p->ipo_next;
+ err = ip_pool_destroy(&op);
+ if (err == 0)
+ num++;
+ else
+ break;
+ }
+ }
+ return num;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: ip_pool_free */
+/* Returns: void */
+/* Parameters: ipo(I) - pointer to pool structure */
+/* Locks: WRITE(ip_poolrw) or WRITE(ipf_global) */
+/* */
+/* Deletes the pool strucutre passed in from the list of pools and deletes */
+/* all of the address information stored in it, including any tree data */
+/* structures also allocated. */
+/* */
+/* NOTE: Because this function is called out of ipldetach() where ip_poolrw */
+/* may not be initialised, we can't use an ASSERT to enforce the locking */
+/* assertion that one of the two (ip_poolrw,ipf_global) is held. */
+/* ------------------------------------------------------------------------ */
+void ip_pool_free(ipo)
+ip_pool_t *ipo;
+{
+ ip_pool_node_t *n;
+
+ RADIX_NODE_HEAD_LOCK(ipo->ipo_head);
+ while ((n = ipo->ipo_list) != NULL) {
+ ipo->ipo_head->rnh_deladdr(&n->ipn_addr, &n->ipn_mask,
+ ipo->ipo_head);
+
+ *n->ipn_pnext = n->ipn_next;
+ if (n->ipn_next)
+ n->ipn_next->ipn_pnext = n->ipn_pnext;
+
+ KFREE(n);
+
+ ipoolstat.ipls_nodes--;
+ }
+ RADIX_NODE_HEAD_UNLOCK(ipo->ipo_head);
+
+ ipo->ipo_list = NULL;
+ if (ipo->ipo_next != NULL)
+ ipo->ipo_next->ipo_pnext = ipo->ipo_pnext;
+ *ipo->ipo_pnext = ipo->ipo_next;
+ rn_freehead(ipo->ipo_head);
+ KFREE(ipo);
+
+ ipoolstat.ipls_pools--;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: ip_pool_deref */
+/* Returns: void */
+/* Parameters: ipo(I) - pointer to pool structure */
+/* Locks: WRITE(ip_poolrw) */
+/* */
+/* Drop the number of known references to this pool structure by one and if */
+/* we arrive at zero known references, free it. */
+/* ------------------------------------------------------------------------ */
+void ip_pool_deref(ipo)
+ip_pool_t *ipo;
+{
+
+ ASSERT(rw_read_locked(&ip_poolrw.ipf_lk) == 0);
+
+ ipo->ipo_ref--;
+ if (ipo->ipo_ref == 0)
+ ip_pool_free(ipo);
+}
+
+
+# if defined(_KERNEL) && ((BSD >= 198911) && !defined(__osf__) && \
+ !defined(__hpux) && !defined(__sgi))
+static int
+rn_freenode(struct radix_node *n, void *p)
+{
+ struct radix_node_head *rnh = p;
+ struct radix_node *d;
+
+ d = rnh->rnh_deladdr(n->rn_key, NULL, rnh);
+ if (d != NULL) {
+ FreeS(d, max_keylen + 2 * sizeof (*d));
+ }
+ return 0;
+}
+
+
+void
+rn_freehead(rnh)
+ struct radix_node_head *rnh;
+{
+
+ RADIX_NODE_HEAD_LOCK(rnh);
+ (*rnh->rnh_walktree)(rnh, rn_freenode, rnh);
+
+ rnh->rnh_addaddr = NULL;
+ rnh->rnh_deladdr = NULL;
+ rnh->rnh_matchaddr = NULL;
+ rnh->rnh_lookup = NULL;
+ rnh->rnh_walktree = NULL;
+ RADIX_NODE_HEAD_UNLOCK(rnh);
+
+ Free(rnh);
+}
+# endif
+
+#endif /* IPFILTER_LOOKUP */
diff --git a/usr/src/uts/common/inet/ipf/ip_proxy.c b/usr/src/uts/common/inet/ipf/ip_proxy.c
new file mode 100644
index 0000000000..99f8a14b88
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/ip_proxy.c
@@ -0,0 +1,858 @@
+/*
+ * Copyright (C) 1997-2003 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#if defined(KERNEL) || defined(_KERNEL)
+# undef KERNEL
+# undef _KERNEL
+# define KERNEL 1
+# define _KERNEL 1
+#endif
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/file.h>
+#if !defined(AIX)
+# include <sys/fcntl.h>
+#endif
+#if !defined(_KERNEL) && !defined(__KERNEL__)
+# include <stdio.h>
+# include <string.h>
+# include <stdlib.h>
+# include <ctype.h>
+# define _KERNEL
+# ifdef __OpenBSD__
+struct file;
+# endif
+# include <sys/uio.h>
+# undef _KERNEL
+#endif
+#if !defined(linux)
+# include <sys/protosw.h>
+#endif
+#include <sys/socket.h>
+#if defined(_KERNEL)
+# if !defined(__NetBSD__) && !defined(sun) && !defined(__osf__) && \
+ !defined(__OpenBSD__) && !defined(__hpux) && !defined(__sgi) && \
+ !defined(AIX)
+# include <sys/ctype.h>
+# endif
+# include <sys/systm.h>
+# if !defined(__SVR4) && !defined(__svr4__)
+# include <sys/mbuf.h>
+# endif
+#endif
+#if defined(_KERNEL) && (__FreeBSD_version >= 220000)
+# include <sys/filio.h>
+# include <sys/fcntl.h>
+# if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM)
+# include "opt_ipfilter.h"
+# endif
+#else
+# include <sys/ioctl.h>
+#endif
+#if defined(__SVR4) || defined(__svr4__)
+# include <sys/byteorder.h>
+# ifdef _KERNEL
+# include <sys/dditypes.h>
+# endif
+# include <sys/stream.h>
+# include <sys/kmem.h>
+#endif
+#if __FreeBSD__ > 2
+# include <sys/queue.h>
+#endif
+#include <net/if.h>
+#ifdef sun
+# include <net/af.h>
+#endif
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifndef linux
+# include <netinet/ip_var.h>
+#endif
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/ip_icmp.h>
+#include "netinet/ip_compat.h"
+#include <netinet/tcpip.h>
+#include "netinet/ip_fil.h"
+#include "netinet/ip_nat.h"
+#include "netinet/ip_state.h"
+#include "netinet/ip_proxy.h"
+#if (__FreeBSD_version >= 300000)
+# include <sys/malloc.h>
+#endif
+
+#include "netinet/ip_ftp_pxy.c"
+#include "netinet/ip_rcmd_pxy.c"
+# include "netinet/ip_pptp_pxy.c"
+#if defined(_KERNEL)
+# include "netinet/ip_irc_pxy.c"
+# include "netinet/ip_raudio_pxy.c"
+# include "netinet/ip_h323_pxy.c"
+# include "netinet/ip_netbios_pxy.c"
+#endif
+#include "netinet/ip_ipsec_pxy.c"
+#include "netinet/ip_rpcb_pxy.c"
+
+/* END OF INCLUDES */
+
+#if !defined(lint)
+static const char rcsid[] = "@(#)$Id: ip_proxy.c,v 2.62.2.14 2005/06/18 02:41:33 darrenr Exp $";
+#endif
+
+static int appr_fixseqack __P((fr_info_t *, ip_t *, ap_session_t *, int ));
+
+#define AP_SESS_SIZE 53
+
+#if defined(_KERNEL)
+int ipf_proxy_debug = 0;
+#else
+int ipf_proxy_debug = 2;
+#endif
+ap_session_t *ap_sess_tab[AP_SESS_SIZE];
+ap_session_t *ap_sess_list = NULL;
+aproxy_t *ap_proxylist = NULL;
+aproxy_t ap_proxies[] = {
+#ifdef IPF_FTP_PROXY
+ { NULL, "ftp", (char)IPPROTO_TCP, 0, 0, ippr_ftp_init, ippr_ftp_fini,
+ ippr_ftp_new, NULL, ippr_ftp_in, ippr_ftp_out, NULL },
+#endif
+#ifdef IPF_IRC_PROXY
+ { NULL, "irc", (char)IPPROTO_TCP, 0, 0, ippr_irc_init, ippr_irc_fini,
+ ippr_irc_new, NULL, NULL, ippr_irc_out, NULL, NULL },
+#endif
+#ifdef IPF_RCMD_PROXY
+ { NULL, "rcmd", (char)IPPROTO_TCP, 0, 0, ippr_rcmd_init, ippr_rcmd_fini,
+ ippr_rcmd_new, NULL, ippr_rcmd_in, ippr_rcmd_out, NULL, NULL },
+#endif
+#ifdef IPF_RAUDIO_PROXY
+ { NULL, "raudio", (char)IPPROTO_TCP, 0, 0, ippr_raudio_init, ippr_raudio_fini,
+ ippr_raudio_new, NULL, ippr_raudio_in, ippr_raudio_out, NULL, NULL },
+#endif
+#ifdef IPF_MSNRPC_PROXY
+ { NULL, "msnrpc", (char)IPPROTO_TCP, 0, 0, ippr_msnrpc_init, ippr_msnrpc_fini,
+ ippr_msnrpc_new, NULL, ippr_msnrpc_in, ippr_msnrpc_out, NULL, NULL },
+#endif
+#ifdef IPF_NETBIOS_PROXY
+ { NULL, "netbios", (char)IPPROTO_UDP, 0, 0, ippr_netbios_init, ippr_netbios_fini,
+ NULL, NULL, NULL, ippr_netbios_out, NULL, NULL },
+#endif
+#ifdef IPF_IPSEC_PROXY
+ { NULL, "ipsec", (char)IPPROTO_UDP, 0, 0,
+ ippr_ipsec_init, ippr_ipsec_fini, ippr_ipsec_new, ippr_ipsec_del,
+ ippr_ipsec_inout, ippr_ipsec_inout, ippr_ipsec_match, NULL },
+#endif
+#ifdef IPF_PPTP_PROXY
+ { NULL, "pptp", (char)IPPROTO_TCP, 0, 0,
+ ippr_pptp_init, ippr_pptp_fini, ippr_pptp_new, ippr_pptp_del,
+ ippr_pptp_inout, ippr_pptp_inout, NULL, NULL },
+#endif
+#ifdef IPF_H323_PROXY
+ { NULL, "h323", (char)IPPROTO_TCP, 0, 0, ippr_h323_init, ippr_h323_fini,
+ ippr_h323_new, ippr_h323_del, ippr_h323_in, NULL, NULL },
+ { NULL, "h245", (char)IPPROTO_TCP, 0, 0, NULL, NULL,
+ ippr_h245_new, NULL, NULL, ippr_h245_out, NULL },
+#endif
+#ifdef IPF_RPCB_PROXY
+# if 0
+ { NULL, "rpcbt", (char)IPPROTO_TCP, 0, 0,
+ ippr_rpcb_init, ippr_rpcb_fini, ippr_rpcb_new, ippr_rpcb_del,
+ ippr_rpcb_in, ippr_rpcb_out, NULL, NULL },
+# endif
+ { NULL, "rpcbu", (char)IPPROTO_UDP, 0, 0,
+ ippr_rpcb_init, ippr_rpcb_fini, ippr_rpcb_new, ippr_rpcb_del,
+ ippr_rpcb_in, ippr_rpcb_out, NULL, NULL },
+#endif
+ { NULL, "", '\0', 0, 0, NULL, NULL, NULL, NULL }
+};
+
+/*
+ * Dynamically add a new kernel proxy. Ensure that it is unique in the
+ * collection compiled in and dynamically added.
+ */
+int appr_add(ap)
+aproxy_t *ap;
+{
+ aproxy_t *a;
+
+ for (a = ap_proxies; a->apr_p; a++)
+ if ((a->apr_p == ap->apr_p) &&
+ !strncmp(a->apr_label, ap->apr_label,
+ sizeof(ap->apr_label))) {
+ if (ipf_proxy_debug > 1)
+ printf("appr_add: %s/%d already present (B)\n",
+ a->apr_label, a->apr_p);
+ return -1;
+ }
+
+ for (a = ap_proxylist; a->apr_p; a = a->apr_next)
+ if ((a->apr_p == ap->apr_p) &&
+ !strncmp(a->apr_label, ap->apr_label,
+ sizeof(ap->apr_label))) {
+ if (ipf_proxy_debug > 1)
+ printf("appr_add: %s/%d already present (D)\n",
+ a->apr_label, a->apr_p);
+ return -1;
+ }
+ ap->apr_next = ap_proxylist;
+ ap_proxylist = ap;
+ if (ap->apr_init != NULL)
+ return (*ap->apr_init)();
+ return 0;
+}
+
+
+/*
+ * Check to see if the proxy this control request has come through for
+ * exists, and if it does and it has a control function then invoke that
+ * control function.
+ */
+int appr_ctl(ctl)
+ap_ctl_t *ctl;
+{
+ aproxy_t *a;
+ int error;
+
+ a = appr_lookup(ctl->apc_p, ctl->apc_label);
+ if (a == NULL) {
+ if (ipf_proxy_debug > 1)
+ printf("appr_ctl: can't find %s/%d\n",
+ ctl->apc_label, ctl->apc_p);
+ error = ESRCH;
+ } else if (a->apr_ctl == NULL) {
+ if (ipf_proxy_debug > 1)
+ printf("appr_ctl: no ctl function for %s/%d\n",
+ ctl->apc_label, ctl->apc_p);
+ error = ENXIO;
+ } else {
+ error = (*a->apr_ctl)(a, ctl);
+ if ((error != 0) && (ipf_proxy_debug > 1))
+ printf("appr_ctl: %s/%d ctl error %d\n",
+ a->apr_label, a->apr_p, error);
+ }
+ return error;
+}
+
+
+/*
+ * Delete a proxy that has been added dynamically from those available.
+ * If it is in use, return 1 (do not destroy NOW), not in use 0 or -1
+ * if it cannot be matched.
+ */
+int appr_del(ap)
+aproxy_t *ap;
+{
+ aproxy_t *a, **app;
+
+ for (app = &ap_proxylist; ((a = *app) != NULL); app = &a->apr_next)
+ if (a == ap) {
+ a->apr_flags |= APR_DELETE;
+ *app = a->apr_next;
+ if (ap->apr_ref != 0) {
+ if (ipf_proxy_debug > 2)
+ printf("appr_del: orphaning %s/%d\n",
+ ap->apr_label, ap->apr_p);
+ return 1;
+ }
+ return 0;
+ }
+ if (ipf_proxy_debug > 1)
+ printf("appr_del: proxy %lx not found\n", (u_long)ap);
+ return -1;
+}
+
+
+/*
+ * Return 1 if the packet is a good match against a proxy, else 0.
+ */
+int appr_ok(fin, tcp, nat)
+fr_info_t *fin;
+tcphdr_t *tcp;
+ipnat_t *nat;
+{
+ aproxy_t *apr = nat->in_apr;
+ u_short dport = nat->in_dport;
+
+ if ((apr == NULL) || (apr->apr_flags & APR_DELETE) ||
+ (fin->fin_p != apr->apr_p))
+ return 0;
+ if ((tcp == NULL) && dport)
+ return 0;
+ return 1;
+}
+
+
+int appr_ioctl(data, cmd, mode)
+caddr_t data;
+ioctlcmd_t cmd;
+int mode;
+{
+ ap_ctl_t ctl;
+ caddr_t ptr;
+ int error;
+
+ mode = mode; /* LINT */
+
+ switch (cmd)
+ {
+ case SIOCPROXY :
+ (void) BCOPYIN(data, &ctl, sizeof(ctl));
+ ptr = NULL;
+
+ if (ctl.apc_dsize > 0) {
+ KMALLOCS(ptr, caddr_t, ctl.apc_dsize);
+ if (ptr == NULL)
+ error = ENOMEM;
+ else {
+ error = copyinptr(ctl.apc_data, ptr,
+ ctl.apc_dsize);
+ if (error == 0)
+ ctl.apc_data = ptr;
+ }
+ } else {
+ ctl.apc_data = NULL;
+ error = 0;
+ }
+
+ if (error == 0)
+ error = appr_ctl(&ctl);
+
+ if ((ctl.apc_dsize > 0) && (ptr != NULL) &&
+ (ctl.apc_data == ptr)) {
+ KFREES(ptr, ctl.apc_dsize);
+ }
+ break;
+
+ default :
+ error = EINVAL;
+ }
+ return error;
+}
+
+
+/*
+ * If a proxy has a match function, call that to do extended packet
+ * matching.
+ */
+int appr_match(fin, nat)
+fr_info_t *fin;
+nat_t *nat;
+{
+ aproxy_t *apr;
+ ipnat_t *ipn;
+ int result;
+
+ ipn = nat->nat_ptr;
+ if (ipf_proxy_debug > 8)
+ printf("appr_match(%lx,%lx) aps %lx ptr %lx\n",
+ (u_long)fin, (u_long)nat, (u_long)nat->nat_aps,
+ (u_long)ipn);
+
+ if ((fin->fin_flx & (FI_SHORT|FI_BAD)) != 0) {
+ if (ipf_proxy_debug > 0)
+ printf("appr_match: flx 0x%x (BAD|SHORT)\n",
+ fin->fin_flx);
+ return -1;
+ }
+
+ apr = ipn->in_apr;
+ if ((apr == NULL) || (apr->apr_flags & APR_DELETE)) {
+ if (ipf_proxy_debug > 0)
+ printf("appr_match:apr %lx apr_flags 0x%x\n",
+ (u_long)apr, apr ? apr->apr_flags : 0);
+ return -1;
+ }
+
+ if (apr->apr_match != NULL) {
+ result = (*apr->apr_match)(fin, nat->nat_aps, nat);
+ if (result != 0) {
+ if (ipf_proxy_debug > 4)
+ printf("appr_match: result %d\n", result);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+
+/*
+ * Allocate a new application proxy structure and fill it in with the
+ * relevant details. call the init function once complete, prior to
+ * returning.
+ */
+int appr_new(fin, nat)
+fr_info_t *fin;
+nat_t *nat;
+{
+ register ap_session_t *aps;
+ aproxy_t *apr;
+
+ if (ipf_proxy_debug > 8)
+ printf("appr_new(%lx,%lx) \n", (u_long)fin, (u_long)nat);
+
+ if ((nat->nat_ptr == NULL) || (nat->nat_aps != NULL)) {
+ if (ipf_proxy_debug > 0)
+ printf("appr_new: nat_ptr %lx nat_aps %lx\n",
+ (u_long)nat->nat_ptr, (u_long)nat->nat_aps);
+ return -1;
+ }
+
+ apr = nat->nat_ptr->in_apr;
+
+ if ((apr->apr_flags & APR_DELETE) ||
+ (fin->fin_p != apr->apr_p)) {
+ if (ipf_proxy_debug > 2)
+ printf("appr_new: apr_flags 0x%x p %d/%d\n",
+ apr->apr_flags, fin->fin_p, apr->apr_p);
+ return -1;
+ }
+
+ KMALLOC(aps, ap_session_t *);
+ if (!aps) {
+ if (ipf_proxy_debug > 0)
+ printf("appr_new: malloc failed (%lu)\n",
+ (u_long)sizeof(ap_session_t));
+ return -1;
+ }
+
+ bzero((char *)aps, sizeof(*aps));
+ aps->aps_p = fin->fin_p;
+ aps->aps_data = NULL;
+ aps->aps_apr = apr;
+ aps->aps_psiz = 0;
+ if (apr->apr_new != NULL)
+ if ((*apr->apr_new)(fin, aps, nat) == -1) {
+ if ((aps->aps_data != NULL) && (aps->aps_psiz != 0)) {
+ KFREES(aps->aps_data, aps->aps_psiz);
+ }
+ KFREE(aps);
+ if (ipf_proxy_debug > 2)
+ printf("appr_new: new(%lx) failed\n",
+ (u_long)apr->apr_new);
+ return -1;
+ }
+ aps->aps_nat = nat;
+ aps->aps_next = ap_sess_list;
+ ap_sess_list = aps;
+ nat->nat_aps = aps;
+
+ return 0;
+}
+
+
+/*
+ * Check to see if a packet should be passed through an active proxy routine
+ * if one has been setup for it. We don't need to check the checksum here if
+ * IPFILTER_CKSUM is defined because if it is, a failed check causes FI_BAD
+ * to be set.
+ */
+int appr_check(fin, nat)
+fr_info_t *fin;
+nat_t *nat;
+{
+#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
+# if defined(ICK_VALID)
+ mb_t *m;
+# endif
+ int dosum = 1;
+#endif
+ tcphdr_t *tcp = NULL;
+ udphdr_t *udp = NULL;
+ ap_session_t *aps;
+ aproxy_t *apr;
+ ip_t *ip;
+ short rv;
+ int err;
+#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi)
+ u_32_t s1, s2, sd;
+#endif
+
+ if (fin->fin_flx & FI_BAD) {
+ if (ipf_proxy_debug > 0)
+ printf("appr_check: flx 0x%x (BAD)\n", fin->fin_flx);
+ return -1;
+ }
+
+#ifndef IPFILTER_CKSUM
+ if ((fin->fin_out == 0) && (fr_checkl4sum(fin) == -1)) {
+ if (ipf_proxy_debug > 0)
+ printf("appr_check: l4 checksum failure %d\n",
+ fin->fin_p);
+ if (fin->fin_p == IPPROTO_TCP)
+ frstats[fin->fin_out].fr_tcpbad++;
+ return -1;
+ }
+#endif
+
+ aps = nat->nat_aps;
+ if ((aps != NULL) && (aps->aps_p == fin->fin_p)) {
+ /*
+ * If there is data in this packet to be proxied then try and
+ * get it all into the one buffer, else drop it.
+ */
+#if defined(MENTAT) || defined(HAVE_M_PULLDOWN)
+ if ((fin->fin_dlen > 0) && !(fin->fin_flx & FI_COALESCE))
+ if (fr_coalesce(fin) == -1) {
+ if (ipf_proxy_debug > 0)
+ printf("appr_check: fr_coalesce failed %x\n", fin->fin_flx);
+ return -1;
+ }
+#endif
+ ip = fin->fin_ip;
+
+ switch (fin->fin_p)
+ {
+ case IPPROTO_TCP :
+ tcp = (tcphdr_t *)fin->fin_dp;
+
+#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_VALID)
+ m = fin->fin_qfm;
+ if (dohwcksum && (m->b_ick_flag == ICK_VALID))
+ dosum = 0;
+#endif
+ /*
+ * Don't bother the proxy with these...or in fact,
+ * should we free up proxy stuff when seen?
+ */
+ if ((fin->fin_tcpf & TH_RST) != 0)
+ break;
+ /*FALLTHROUGH*/
+ case IPPROTO_UDP :
+ udp = (udphdr_t *)fin->fin_dp;
+ break;
+ default :
+ break;
+ }
+
+ apr = aps->aps_apr;
+ err = 0;
+ if (fin->fin_out != 0) {
+ if (apr->apr_outpkt != NULL)
+ err = (*apr->apr_outpkt)(fin, aps, nat);
+ } else {
+ if (apr->apr_inpkt != NULL)
+ err = (*apr->apr_inpkt)(fin, aps, nat);
+ }
+
+ rv = APR_EXIT(err);
+ if (((ipf_proxy_debug > 0) && (rv != 0)) ||
+ (ipf_proxy_debug > 8))
+ printf("appr_check: out %d err %x rv %d\n",
+ fin->fin_out, err, rv);
+ if (rv == 1)
+ return -1;
+
+ if (rv == 2) {
+ appr_free(apr);
+ nat->nat_aps = NULL;
+ return -1;
+ }
+
+ /*
+ * If err != 0 then the data size of the packet has changed
+ * so we need to recalculate the header checksums for the
+ * packet.
+ */
+#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi)
+ if (err != 0) {
+ short adjlen = err & 0xffff;
+
+ s1 = LONG_SUM(ip->ip_len - adjlen);
+ s2 = LONG_SUM(ip->ip_len);
+ CALC_SUMD(s1, s2, sd);
+ fix_outcksum(fin, &ip->ip_sum, sd);
+ }
+#endif
+
+ /*
+ * For TCP packets, we may need to adjust the sequence and
+ * acknowledgement numbers to reflect changes in size of the
+ * data stream.
+ *
+ * For both TCP and UDP, recalculate the layer 4 checksum,
+ * regardless, as we can't tell (here) if data has been
+ * changed or not.
+ */
+ if (tcp != NULL) {
+ err = appr_fixseqack(fin, ip, aps, APR_INC(err));
+#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
+ if (dosum)
+ tcp->th_sum = fr_cksum(fin->fin_qfm, ip,
+ IPPROTO_TCP, tcp);
+#else
+ tcp->th_sum = fr_cksum(fin->fin_m, ip,
+ IPPROTO_TCP, tcp);
+#endif
+ } else if ((udp != NULL) && (udp->uh_sum != 0)) {
+#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
+ if (dosum)
+ udp->uh_sum = fr_cksum(fin->fin_qfm, ip,
+ IPPROTO_UDP, udp);
+#else
+ udp->uh_sum = fr_cksum(fin->fin_m, ip,
+ IPPROTO_UDP, udp);
+#endif
+ }
+ aps->aps_bytes += fin->fin_plen;
+ aps->aps_pkts++;
+ return 1;
+ }
+ return 0;
+}
+
+
+/*
+ * Search for an proxy by the protocol it is being used with and its name.
+ */
+aproxy_t *appr_lookup(pr, name)
+u_int pr;
+char *name;
+{
+ aproxy_t *ap;
+
+ if (ipf_proxy_debug > 8)
+ printf("appr_lookup(%d,%s)\n", pr, name);
+
+ for (ap = ap_proxies; ap->apr_p; ap++)
+ if ((ap->apr_p == pr) &&
+ !strncmp(name, ap->apr_label, sizeof(ap->apr_label))) {
+ ap->apr_ref++;
+ return ap;
+ }
+
+ for (ap = ap_proxylist; ap; ap = ap->apr_next)
+ if ((ap->apr_p == pr) &&
+ !strncmp(name, ap->apr_label, sizeof(ap->apr_label))) {
+ ap->apr_ref++;
+ return ap;
+ }
+ if (ipf_proxy_debug > 2)
+ printf("appr_lookup: failed for %d/%s\n", pr, name);
+ return NULL;
+}
+
+
+void appr_free(ap)
+aproxy_t *ap;
+{
+ ap->apr_ref--;
+}
+
+
+void aps_free(aps)
+ap_session_t *aps;
+{
+ ap_session_t *a, **ap;
+ aproxy_t *apr;
+
+ if (!aps)
+ return;
+
+ for (ap = &ap_sess_list; ((a = *ap) != NULL); ap = &a->aps_next)
+ if (a == aps) {
+ *ap = a->aps_next;
+ break;
+ }
+
+ apr = aps->aps_apr;
+ if ((apr != NULL) && (apr->apr_del != NULL))
+ (*apr->apr_del)(aps);
+
+ if ((aps->aps_data != NULL) && (aps->aps_psiz != 0))
+ KFREES(aps->aps_data, aps->aps_psiz);
+ KFREE(aps);
+}
+
+
+/*
+ * returns 2 if ack or seq number in TCP header is changed, returns 0 otherwise
+ */
+static int appr_fixseqack(fin, ip, aps, inc)
+fr_info_t *fin;
+ip_t *ip;
+ap_session_t *aps;
+int inc;
+{
+ int sel, ch = 0, out, nlen;
+ u_32_t seq1, seq2;
+ tcphdr_t *tcp;
+ short inc2;
+
+ tcp = (tcphdr_t *)fin->fin_dp;
+ out = fin->fin_out;
+ /*
+ * ip_len has already been adjusted by 'inc'.
+ */
+ nlen = ip->ip_len;
+ nlen -= (IP_HL(ip) << 2) + (TCP_OFF(tcp) << 2);
+
+ inc2 = inc;
+ inc = (int)inc2;
+
+ if (out != 0) {
+ seq1 = (u_32_t)ntohl(tcp->th_seq);
+ sel = aps->aps_sel[out];
+
+ /* switch to other set ? */
+ if ((aps->aps_seqmin[!sel] > aps->aps_seqmin[sel]) &&
+ (seq1 > aps->aps_seqmin[!sel])) {
+ if (ipf_proxy_debug > 7)
+ printf("proxy out switch set seq %d -> %d %x > %x\n",
+ sel, !sel, seq1,
+ aps->aps_seqmin[!sel]);
+ sel = aps->aps_sel[out] = !sel;
+ }
+
+ if (aps->aps_seqoff[sel]) {
+ seq2 = aps->aps_seqmin[sel] - aps->aps_seqoff[sel];
+ if (seq1 > seq2) {
+ seq2 = aps->aps_seqoff[sel];
+ seq1 += seq2;
+ tcp->th_seq = htonl(seq1);
+ ch = 1;
+ }
+ }
+
+ if (inc && (seq1 > aps->aps_seqmin[!sel])) {
+ aps->aps_seqmin[sel] = seq1 + nlen - 1;
+ aps->aps_seqoff[sel] = aps->aps_seqoff[sel] + inc;
+ if (ipf_proxy_debug > 7)
+ printf("proxy seq set %d at %x to %d + %d\n",
+ sel, aps->aps_seqmin[sel],
+ aps->aps_seqoff[sel], inc);
+ }
+
+ /***/
+
+ seq1 = ntohl(tcp->th_ack);
+ sel = aps->aps_sel[1 - out];
+
+ /* switch to other set ? */
+ if ((aps->aps_ackmin[!sel] > aps->aps_ackmin[sel]) &&
+ (seq1 > aps->aps_ackmin[!sel])) {
+ if (ipf_proxy_debug > 7)
+ printf("proxy out switch set ack %d -> %d %x > %x\n",
+ sel, !sel, seq1,
+ aps->aps_ackmin[!sel]);
+ sel = aps->aps_sel[1 - out] = !sel;
+ }
+
+ if (aps->aps_ackoff[sel] && (seq1 > aps->aps_ackmin[sel])) {
+ seq2 = aps->aps_ackoff[sel];
+ tcp->th_ack = htonl(seq1 - seq2);
+ ch = 1;
+ }
+ } else {
+ seq1 = ntohl(tcp->th_seq);
+ sel = aps->aps_sel[out];
+
+ /* switch to other set ? */
+ if ((aps->aps_ackmin[!sel] > aps->aps_ackmin[sel]) &&
+ (seq1 > aps->aps_ackmin[!sel])) {
+ if (ipf_proxy_debug > 7)
+ printf("proxy in switch set ack %d -> %d %x > %x\n",
+ sel, !sel, seq1, aps->aps_ackmin[!sel]);
+ sel = aps->aps_sel[out] = !sel;
+ }
+
+ if (aps->aps_ackoff[sel]) {
+ seq2 = aps->aps_ackmin[sel] - aps->aps_ackoff[sel];
+ if (seq1 > seq2) {
+ seq2 = aps->aps_ackoff[sel];
+ seq1 += seq2;
+ tcp->th_seq = htonl(seq1);
+ ch = 1;
+ }
+ }
+
+ if (inc && (seq1 > aps->aps_ackmin[!sel])) {
+ aps->aps_ackmin[!sel] = seq1 + nlen - 1;
+ aps->aps_ackoff[!sel] = aps->aps_ackoff[sel] + inc;
+
+ if (ipf_proxy_debug > 7)
+ printf("proxy ack set %d at %x to %d + %d\n",
+ !sel, aps->aps_seqmin[!sel],
+ aps->aps_seqoff[sel], inc);
+ }
+
+ /***/
+
+ seq1 = ntohl(tcp->th_ack);
+ sel = aps->aps_sel[1 - out];
+
+ /* switch to other set ? */
+ if ((aps->aps_seqmin[!sel] > aps->aps_seqmin[sel]) &&
+ (seq1 > aps->aps_seqmin[!sel])) {
+ if (ipf_proxy_debug > 7)
+ printf("proxy in switch set seq %d -> %d %x > %x\n",
+ sel, !sel, seq1, aps->aps_seqmin[!sel]);
+ sel = aps->aps_sel[1 - out] = !sel;
+ }
+
+ if (aps->aps_seqoff[sel] != 0) {
+ if (ipf_proxy_debug > 7)
+ printf("sel %d seqoff %d seq1 %x seqmin %x\n",
+ sel, aps->aps_seqoff[sel], seq1,
+ aps->aps_seqmin[sel]);
+ if (seq1 > aps->aps_seqmin[sel]) {
+ seq2 = aps->aps_seqoff[sel];
+ tcp->th_ack = htonl(seq1 - seq2);
+ ch = 1;
+ }
+ }
+ }
+
+ if (ipf_proxy_debug > 8)
+ printf("appr_fixseqack: seq %x ack %x\n",
+ ntohl(tcp->th_seq), ntohl(tcp->th_ack));
+ return ch ? 2 : 0;
+}
+
+
+/*
+ * Initialise hook for kernel application proxies.
+ * Call the initialise routine for all the compiled in kernel proxies.
+ */
+int appr_init()
+{
+ aproxy_t *ap;
+ int err = 0;
+
+ for (ap = ap_proxies; ap->apr_p; ap++) {
+ if (ap->apr_init != NULL) {
+ err = (*ap->apr_init)();
+ if (err != 0)
+ break;
+ }
+ }
+ return err;
+}
+
+
+/*
+ * Unload hook for kernel application proxies.
+ * Call the finialise routine for all the compiled in kernel proxies.
+ */
+void appr_unload()
+{
+ aproxy_t *ap;
+
+ for (ap = ap_proxies; ap->apr_p; ap++)
+ if (ap->apr_fini != NULL)
+ (*ap->apr_fini)();
+ for (ap = ap_proxylist; ap; ap = ap->apr_next)
+ if (ap->apr_fini != NULL)
+ (*ap->apr_fini)();
+}
diff --git a/usr/src/uts/common/inet/ipf/ip_state.c b/usr/src/uts/common/inet/ipf/ip_state.c
new file mode 100644
index 0000000000..9a5586a208
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/ip_state.c
@@ -0,0 +1,3806 @@
+/*
+ * Copyright (C) 1995-2003 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#if defined(KERNEL) || defined(_KERNEL)
+# undef KERNEL
+# undef _KERNEL
+# define KERNEL 1
+# define _KERNEL 1
+#endif
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/file.h>
+#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
+ defined(_KERNEL)
+# include "opt_ipfilter_log.h"
+#endif
+#if defined(_KERNEL) && defined(__FreeBSD_version) && \
+ (__FreeBSD_version >= 400000) && !defined(KLD_MODULE)
+#include "opt_inet6.h"
+#endif
+#if !defined(_KERNEL) && !defined(__KERNEL__)
+# include <stdio.h>
+# include <stdlib.h>
+# include <string.h>
+# define _KERNEL
+# ifdef __OpenBSD__
+struct file;
+# endif
+# include <sys/uio.h>
+# undef _KERNEL
+#endif
+#if defined(_KERNEL) && (__FreeBSD_version >= 220000)
+# include <sys/filio.h>
+# include <sys/fcntl.h>
+# if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM)
+# include "opt_ipfilter.h"
+# endif
+#else
+# include <sys/ioctl.h>
+#endif
+#include <sys/time.h>
+#if !defined(linux)
+# include <sys/protosw.h>
+#endif
+#include <sys/socket.h>
+#if defined(_KERNEL)
+# include <sys/systm.h>
+# if !defined(__SVR4) && !defined(__svr4__)
+# include <sys/mbuf.h>
+# endif
+#endif
+#if defined(__SVR4) || defined(__svr4__)
+# include <sys/filio.h>
+# include <sys/byteorder.h>
+# ifdef _KERNEL
+# include <sys/dditypes.h>
+# endif
+# include <sys/stream.h>
+# include <sys/kmem.h>
+#endif
+
+#include <net/if.h>
+#ifdef sun
+# include <net/af.h>
+#endif
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+#if !defined(linux)
+# include <netinet/ip_var.h>
+#endif
+#if !defined(__hpux) && !defined(linux)
+# include <netinet/tcp_fsm.h>
+#endif
+#include <netinet/udp.h>
+#include <netinet/ip_icmp.h>
+#include "netinet/ip_compat.h"
+#include <netinet/tcpip.h>
+#include "netinet/ip_fil.h"
+#include "netinet/ip_nat.h"
+#include "netinet/ip_frag.h"
+#include "netinet/ip_state.h"
+#include "netinet/ip_proxy.h"
+#ifdef IPFILTER_SYNC
+#include "netinet/ip_sync.h"
+#endif
+#ifdef IPFILTER_SCAN
+#include "netinet/ip_scan.h"
+#endif
+#ifdef USE_INET6
+#include <netinet/icmp6.h>
+#endif
+#if (__FreeBSD_version >= 300000)
+# include <sys/malloc.h>
+# if defined(_KERNEL) && !defined(IPFILTER_LKM)
+# include <sys/libkern.h>
+# include <sys/systm.h>
+# endif
+#endif
+/* END OF INCLUDES */
+
+
+#if !defined(lint)
+static const char sccsid[] = "@(#)ip_state.c 1.8 6/5/96 (C) 1993-2000 Darren Reed";
+static const char rcsid[] = "@(#)$Id: ip_state.c,v 2.186.2.36 2005/08/11 19:58:03 darrenr Exp $";
+#endif
+
+static ipstate_t **ips_table = NULL;
+static u_long *ips_seed = NULL;
+static int ips_num = 0;
+static u_long ips_last_force_flush = 0;
+ips_stat_t ips_stats;
+
+#ifdef USE_INET6
+static ipstate_t *fr_checkicmp6matchingstate __P((fr_info_t *));
+#endif
+static ipstate_t *fr_matchsrcdst __P((fr_info_t *, ipstate_t *, i6addr_t *,
+ i6addr_t *, tcphdr_t *, u_32_t));
+static ipstate_t *fr_checkicmpmatchingstate __P((fr_info_t *));
+static int fr_state_flush __P((int, int));
+static ips_stat_t *fr_statetstats __P((void));
+static void fr_delstate __P((ipstate_t *, int));
+static int fr_state_remove __P((caddr_t));
+static void fr_ipsmove __P((ipstate_t *, u_int));
+static int fr_tcpstate __P((fr_info_t *, tcphdr_t *, ipstate_t *));
+static int fr_tcpoptions __P((fr_info_t *, tcphdr_t *, tcpdata_t *));
+static ipstate_t *fr_stclone __P((fr_info_t *, tcphdr_t *, ipstate_t *));
+static void fr_fixinisn __P((fr_info_t *, ipstate_t *));
+static void fr_fixoutisn __P((fr_info_t *, ipstate_t *));
+static void fr_checknewisn __P((fr_info_t *, ipstate_t *));
+
+int fr_stputent __P((caddr_t));
+int fr_stgetent __P((caddr_t));
+
+#define ONE_DAY IPF_TTLVAL(1 * 86400) /* 1 day */
+#define FIVE_DAYS (5 * ONE_DAY)
+#define DOUBLE_HASH(x) (((x) + ips_seed[(x) % fr_statesize]) % fr_statesize)
+
+u_long fr_tcpidletimeout = FIVE_DAYS,
+ fr_tcpclosewait = IPF_TTLVAL(2 * TCP_MSL),
+ fr_tcplastack = IPF_TTLVAL(2 * TCP_MSL),
+ fr_tcptimeout = IPF_TTLVAL(2 * TCP_MSL),
+ fr_tcpclosed = IPF_TTLVAL(60),
+ fr_tcphalfclosed = IPF_TTLVAL(2 * 3600), /* 2 hours */
+ fr_udptimeout = IPF_TTLVAL(120),
+ fr_udpacktimeout = IPF_TTLVAL(12),
+ fr_icmptimeout = IPF_TTLVAL(60),
+ fr_icmpacktimeout = IPF_TTLVAL(6),
+ fr_iptimeout = IPF_TTLVAL(60);
+int fr_statemax = IPSTATE_MAX,
+ fr_statesize = IPSTATE_SIZE;
+int fr_state_doflush = 0,
+ fr_state_lock = 0,
+ fr_state_maxbucket = 0,
+ fr_state_maxbucket_reset = 1,
+ fr_state_init = 0;
+ipftq_t ips_tqtqb[IPF_TCP_NSTATES],
+ ips_udptq,
+ ips_udpacktq,
+ ips_iptq,
+ ips_icmptq,
+ ips_icmpacktq,
+ *ips_utqe = NULL;
+#ifdef IPFILTER_LOG
+int ipstate_logging = 1;
+#else
+int ipstate_logging = 0;
+#endif
+ipstate_t *ips_list = NULL;
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_stateinit */
+/* Returns: int - 0 == success, -1 == failure */
+/* Parameters: Nil */
+/* */
+/* Initialise all the global variables used within the state code. */
+/* This action also includes initiailising locks. */
+/* ------------------------------------------------------------------------ */
+int fr_stateinit()
+{
+ int i;
+
+ KMALLOCS(ips_table, ipstate_t **, fr_statesize * sizeof(ipstate_t *));
+ if (ips_table == NULL)
+ return -1;
+ bzero((char *)ips_table, fr_statesize * sizeof(ipstate_t *));
+
+ KMALLOCS(ips_seed, u_long *, fr_statesize * sizeof(*ips_seed));
+ if (ips_seed == NULL)
+ return -2;
+ for (i = 0; i < fr_statesize; i++) {
+ /*
+ * XXX - ips_seed[X] should be a random number of sorts.
+ */
+#if (__FreeBSD_version >= 400000)
+ ips_seed[i] = arc4random();
+#else
+ ips_seed[i] = ((u_long)ips_seed + i) * fr_statesize;
+ ips_seed[i] ^= 0xa5a55a5a;
+ ips_seed[i] *= (u_long)ips_seed;
+ ips_seed[i] ^= 0x5a5aa5a5;
+ ips_seed[i] *= fr_statemax;
+#endif
+ }
+
+ /* fill icmp reply type table */
+ for (i = 0; i <= ICMP_MAXTYPE; i++)
+ icmpreplytype4[i] = -1;
+ icmpreplytype4[ICMP_ECHO] = ICMP_ECHOREPLY;
+ icmpreplytype4[ICMP_TSTAMP] = ICMP_TSTAMPREPLY;
+ icmpreplytype4[ICMP_IREQ] = ICMP_IREQREPLY;
+ icmpreplytype4[ICMP_MASKREQ] = ICMP_MASKREPLY;
+#ifdef USE_INET6
+ /* fill icmp reply type table */
+ for (i = 0; i <= ICMP6_MAXTYPE; i++)
+ icmpreplytype6[i] = -1;
+ icmpreplytype6[ICMP6_ECHO_REQUEST] = ICMP6_ECHO_REPLY;
+ icmpreplytype6[ICMP6_MEMBERSHIP_QUERY] = ICMP6_MEMBERSHIP_REPORT;
+ icmpreplytype6[ICMP6_NI_QUERY] = ICMP6_NI_REPLY;
+ icmpreplytype6[ND_ROUTER_SOLICIT] = ND_ROUTER_ADVERT;
+ icmpreplytype6[ND_NEIGHBOR_SOLICIT] = ND_NEIGHBOR_ADVERT;
+#endif
+
+ KMALLOCS(ips_stats.iss_bucketlen, u_long *,
+ fr_statesize * sizeof(u_long));
+ if (ips_stats.iss_bucketlen == NULL)
+ return -1;
+ bzero((char *)ips_stats.iss_bucketlen, fr_statesize * sizeof(u_long));
+
+ if (fr_state_maxbucket == 0) {
+ for (i = fr_statesize; i > 0; i >>= 1)
+ fr_state_maxbucket++;
+ fr_state_maxbucket *= 2;
+ }
+
+ fr_sttab_init(ips_tqtqb);
+ ips_tqtqb[IPF_TCP_NSTATES - 1].ifq_next = &ips_udptq;
+ ips_udptq.ifq_ttl = (u_long)fr_udptimeout;
+ ips_udptq.ifq_ref = 1;
+ ips_udptq.ifq_head = NULL;
+ ips_udptq.ifq_tail = &ips_udptq.ifq_head;
+ MUTEX_INIT(&ips_udptq.ifq_lock, "ipftq udp tab");
+ ips_udptq.ifq_next = &ips_udpacktq;
+ ips_udpacktq.ifq_ttl = (u_long)fr_udpacktimeout;
+ ips_udpacktq.ifq_ref = 1;
+ ips_udpacktq.ifq_head = NULL;
+ ips_udpacktq.ifq_tail = &ips_udpacktq.ifq_head;
+ MUTEX_INIT(&ips_udpacktq.ifq_lock, "ipftq udpack tab");
+ ips_udpacktq.ifq_next = &ips_icmptq;
+ ips_icmptq.ifq_ttl = (u_long)fr_icmptimeout;
+ ips_icmptq.ifq_ref = 1;
+ ips_icmptq.ifq_head = NULL;
+ ips_icmptq.ifq_tail = &ips_icmptq.ifq_head;
+ MUTEX_INIT(&ips_icmptq.ifq_lock, "ipftq icmp tab");
+ ips_icmptq.ifq_next = &ips_icmpacktq;
+ ips_icmpacktq.ifq_ttl = (u_long)fr_icmpacktimeout;
+ ips_icmpacktq.ifq_ref = 1;
+ ips_icmpacktq.ifq_head = NULL;
+ ips_icmpacktq.ifq_tail = &ips_icmpacktq.ifq_head;
+ MUTEX_INIT(&ips_icmpacktq.ifq_lock, "ipftq icmpack tab");
+ ips_icmpacktq.ifq_next = &ips_iptq;
+ ips_iptq.ifq_ttl = (u_long)fr_iptimeout;
+ ips_iptq.ifq_ref = 1;
+ ips_iptq.ifq_head = NULL;
+ ips_iptq.ifq_tail = &ips_iptq.ifq_head;
+ MUTEX_INIT(&ips_iptq.ifq_lock, "ipftq ip tab");
+ ips_iptq.ifq_next = NULL;
+
+ RWLOCK_INIT(&ipf_state, "ipf IP state rwlock");
+ MUTEX_INIT(&ipf_stinsert, "ipf state insert mutex");
+ fr_state_init = 1;
+
+ ips_last_force_flush = fr_ticks;
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_stateunload */
+/* Returns: Nil */
+/* Parameters: Nil */
+/* */
+/* Release and destroy any resources acquired or initialised so that */
+/* IPFilter can be unloaded or re-initialised. */
+/* ------------------------------------------------------------------------ */
+void fr_stateunload()
+{
+ ipftq_t *ifq, *ifqnext;
+ ipstate_t *is;
+
+ WRITE_ENTER(&ipf_state);
+ while ((is = ips_list) != NULL)
+ fr_delstate(is, 0);
+
+ /*
+ * Proxy timeout queues are not cleaned here because although they
+ * exist on the state list, appr_unload is called after fr_stateunload
+ * and the proxies actually are responsible for them being created.
+ * Should the proxy timeouts have their own list? There's no real
+ * justification as this is the only complicationA
+ */
+ for (ifq = ips_utqe; ifq != NULL; ifq = ifqnext) {
+ ifqnext = ifq->ifq_next;
+ if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
+ (fr_deletetimeoutqueue(ifq) == 0))
+ fr_freetimeoutqueue(ifq);
+ }
+
+ ips_stats.iss_inuse = 0;
+ ips_num = 0;
+
+ if (fr_state_init == 1) {
+ fr_sttab_destroy(ips_tqtqb);
+ MUTEX_DESTROY(&ips_udptq.ifq_lock);
+ MUTEX_DESTROY(&ips_icmptq.ifq_lock);
+ MUTEX_DESTROY(&ips_udpacktq.ifq_lock);
+ MUTEX_DESTROY(&ips_icmpacktq.ifq_lock);
+ MUTEX_DESTROY(&ips_iptq.ifq_lock);
+ }
+
+ if (ips_table != NULL) {
+ KFREES(ips_table, fr_statesize * sizeof(*ips_table));
+ ips_table = NULL;
+ }
+
+ if (ips_seed != NULL) {
+ KFREES(ips_seed, fr_statesize * sizeof(*ips_seed));
+ ips_seed = NULL;
+ }
+
+ if (ips_stats.iss_bucketlen != NULL) {
+ KFREES(ips_stats.iss_bucketlen, fr_statesize * sizeof(u_long));
+ ips_stats.iss_bucketlen = NULL;
+ }
+
+ RWLOCK_EXIT(&ipf_state);
+
+ if (fr_state_maxbucket_reset == 1)
+ fr_state_maxbucket = 0;
+
+ if (fr_state_init == 1) {
+ fr_state_init = 0;
+ RW_DESTROY(&ipf_state);
+ MUTEX_DESTROY(&ipf_stinsert);
+ }
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_statetstats */
+/* Returns: ips_state_t* - pointer to state stats structure */
+/* Parameters: Nil */
+/* */
+/* Put all the current numbers and pointers into a single struct and return */
+/* a pointer to it. */
+/* ------------------------------------------------------------------------ */
+static ips_stat_t *fr_statetstats()
+{
+ ips_stats.iss_active = ips_num;
+ ips_stats.iss_statesize = fr_statesize;
+ ips_stats.iss_statemax = fr_statemax;
+ ips_stats.iss_table = ips_table;
+ ips_stats.iss_list = ips_list;
+ ips_stats.iss_ticks = fr_ticks;
+ return &ips_stats;
+}
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_state_remove */
+/* Returns: int - 0 == success, != 0 == failure */
+/* Parameters: data(I) - pointer to state structure to delete from table */
+/* */
+/* Search for a state structure that matches the one passed, according to */
+/* the IP addresses and other protocol specific information. */
+/* ------------------------------------------------------------------------ */
+static int fr_state_remove(data)
+caddr_t data;
+{
+ ipstate_t *sp, st;
+ int error;
+
+ sp = &st;
+ error = fr_inobj(data, &st, IPFOBJ_IPSTATE);
+ if (error)
+ return EFAULT;
+
+ WRITE_ENTER(&ipf_state);
+ for (sp = ips_list; sp; sp = sp->is_next)
+ if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) &&
+ !bcmp((caddr_t)&sp->is_src, (caddr_t)&st.is_src,
+ sizeof(st.is_src)) &&
+ !bcmp((caddr_t)&sp->is_dst, (caddr_t)&st.is_src,
+ sizeof(st.is_dst)) &&
+ !bcmp((caddr_t)&sp->is_ps, (caddr_t)&st.is_ps,
+ sizeof(st.is_ps))) {
+ fr_delstate(sp, ISL_REMOVE);
+ RWLOCK_EXIT(&ipf_state);
+ return 0;
+ }
+ RWLOCK_EXIT(&ipf_state);
+ return ESRCH;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_state_ioctl */
+/* Returns: int - 0 == success, != 0 == failure */
+/* Parameters: data(I) - pointer to ioctl data */
+/* cmd(I) - ioctl command integer */
+/* mode(I) - file mode bits used with open */
+/* */
+/* Processes an ioctl call made to operate on the IP Filter state device. */
+/* ------------------------------------------------------------------------ */
+int fr_state_ioctl(data, cmd, mode)
+caddr_t data;
+ioctlcmd_t cmd;
+int mode;
+{
+ int arg, ret, error = 0;
+
+ switch (cmd)
+ {
+ /*
+ * Delete an entry from the state table.
+ */
+ case SIOCDELST :
+ error = fr_state_remove(data);
+ break;
+ /*
+ * Flush the state table
+ */
+ case SIOCIPFFL :
+ BCOPYIN(data, (char *)&arg, sizeof(arg));
+ if (arg == 0 || arg == 1) {
+ WRITE_ENTER(&ipf_state);
+ ret = fr_state_flush(arg, 4);
+ RWLOCK_EXIT(&ipf_state);
+ BCOPYOUT((char *)&ret, data, sizeof(ret));
+ } else
+ error = EINVAL;
+ break;
+#ifdef USE_INET6
+ case SIOCIPFL6 :
+ BCOPYIN(data, (char *)&arg, sizeof(arg));
+ if (arg == 0 || arg == 1) {
+ WRITE_ENTER(&ipf_state);
+ ret = fr_state_flush(arg, 6);
+ RWLOCK_EXIT(&ipf_state);
+ BCOPYOUT((char *)&ret, data, sizeof(ret));
+ } else
+ error = EINVAL;
+ break;
+#endif
+#ifdef IPFILTER_LOG
+ /*
+ * Flush the state log.
+ */
+ case SIOCIPFFB :
+ if (!(mode & FWRITE))
+ error = EPERM;
+ else {
+ int tmp;
+
+ tmp = ipflog_clear(IPL_LOGSTATE);
+ BCOPYOUT((char *)&tmp, data, sizeof(tmp));
+ }
+ break;
+ /*
+ * Turn logging of state information on/off.
+ */
+ case SIOCSETLG :
+ if (!(mode & FWRITE))
+ error = EPERM;
+ else {
+ BCOPYIN((char *)data, (char *)&ipstate_logging,
+ sizeof(ipstate_logging));
+ }
+ break;
+ /*
+ * Return the current state of logging.
+ */
+ case SIOCGETLG :
+ BCOPYOUT((char *)&ipstate_logging, (char *)data,
+ sizeof(ipstate_logging));
+ break;
+ /*
+ * Return the number of bytes currently waiting to be read.
+ */
+ case FIONREAD :
+ arg = iplused[IPL_LOGSTATE]; /* returned in an int */
+ BCOPYOUT((char *)&arg, data, sizeof(arg));
+ break;
+#endif
+ /*
+ * Get the current state statistics.
+ */
+ case SIOCGETFS :
+ error = fr_outobj(data, fr_statetstats(), IPFOBJ_STATESTAT);
+ break;
+ /*
+ * Lock/Unlock the state table. (Locking prevents any changes, which
+ * means no packets match).
+ */
+ case SIOCSTLCK :
+ if (!(mode & FWRITE)) {
+ error = EPERM;
+ } else {
+ fr_lock(data, &fr_state_lock);
+ }
+ break;
+ /*
+ * Add an entry to the current state table.
+ */
+ case SIOCSTPUT :
+ if (!fr_state_lock || !(mode &FWRITE)) {
+ error = EACCES;
+ break;
+ }
+ error = fr_stputent(data);
+ break;
+ /*
+ * Get a state table entry.
+ */
+ case SIOCSTGET :
+ if (!fr_state_lock) {
+ error = EACCES;
+ break;
+ }
+ error = fr_stgetent(data);
+ break;
+ default :
+ error = EINVAL;
+ break;
+ }
+ return error;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_stgetent */
+/* Returns: int - 0 == success, != 0 == failure */
+/* Parameters: data(I) - pointer to state structure to retrieve from table */
+/* */
+/* Copy out state information from the kernel to a user space process. If */
+/* there is a filter rule associated with the state entry, copy that out */
+/* as well. The entry to copy out is taken from the value of "ips_next" in */
+/* the struct passed in and if not null and not found in the list of current*/
+/* state entries, the retrieval fails. */
+/* ------------------------------------------------------------------------ */
+int fr_stgetent(data)
+caddr_t data;
+{
+ ipstate_t *is, *isn;
+ ipstate_save_t ips;
+ int error;
+
+ error = fr_inobj(data, &ips, IPFOBJ_STATESAVE);
+ if (error)
+ return EFAULT;
+
+ isn = ips.ips_next;
+ if (isn == NULL) {
+ isn = ips_list;
+ if (isn == NULL) {
+ if (ips.ips_next == NULL)
+ return ENOENT;
+ return 0;
+ }
+ } else {
+ /*
+ * Make sure the pointer we're copying from exists in the
+ * current list of entries. Security precaution to prevent
+ * copying of random kernel data.
+ */
+ for (is = ips_list; is; is = is->is_next)
+ if (is == isn)
+ break;
+ if (!is)
+ return ESRCH;
+ }
+ ips.ips_next = isn->is_next;
+ bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is));
+ ips.ips_rule = isn->is_rule;
+ if (isn->is_rule != NULL)
+ bcopy((char *)isn->is_rule, (char *)&ips.ips_fr,
+ sizeof(ips.ips_fr));
+ error = fr_outobj(data, &ips, IPFOBJ_STATESAVE);
+ if (error)
+ return EFAULT;
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_stputent */
+/* Returns: int - 0 == success, != 0 == failure */
+/* Parameters: data(I) - pointer to state information struct */
+/* */
+/* This function implements the SIOCSTPUT ioctl: insert a state entry into */
+/* the state table. If the state info. includes a pointer to a filter rule */
+/* then also add in an orphaned rule (will not show up in any "ipfstat -io" */
+/* output. */
+/* ------------------------------------------------------------------------ */
+int fr_stputent(data)
+caddr_t data;
+{
+ ipstate_t *is, *isn;
+ ipstate_save_t ips;
+ int error, i;
+ frentry_t *fr;
+ char *name;
+
+ error = fr_inobj(data, &ips, IPFOBJ_STATESAVE);
+ if (error)
+ return EFAULT;
+
+ KMALLOC(isn, ipstate_t *);
+ if (isn == NULL)
+ return ENOMEM;
+
+ bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn));
+ bzero((char *)isn, offsetof(struct ipstate, is_pkts));
+ isn->is_sti.tqe_pnext = NULL;
+ isn->is_sti.tqe_next = NULL;
+ isn->is_sti.tqe_ifq = NULL;
+ isn->is_sti.tqe_parent = isn;
+ isn->is_ifp[0] = NULL;
+ isn->is_ifp[1] = NULL;
+ isn->is_ifp[2] = NULL;
+ isn->is_ifp[3] = NULL;
+ isn->is_sync = NULL;
+ fr = ips.ips_rule;
+
+ if (fr == NULL) {
+ READ_ENTER(&ipf_state);
+ fr_stinsert(isn, 0);
+ MUTEX_EXIT(&isn->is_lock);
+ RWLOCK_EXIT(&ipf_state);
+ return 0;
+ }
+
+ if (isn->is_flags & SI_NEWFR) {
+ KMALLOC(fr, frentry_t *);
+ if (fr == NULL) {
+ KFREE(isn);
+ return ENOMEM;
+ }
+ bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr));
+ isn->is_rule = fr;
+ ips.ips_is.is_rule = fr;
+ MUTEX_NUKE(&fr->fr_lock);
+ MUTEX_INIT(&fr->fr_lock, "state filter rule lock");
+
+ /*
+ * Look up all the interface names in the rule.
+ */
+ for (i = 0; i < 4; i++) {
+ name = fr->fr_ifnames[i];
+ fr->fr_ifas[i] = fr_resolvenic(name, fr->fr_v);
+ name = isn->is_ifname[i];
+ isn->is_ifp[i] = fr_resolvenic(name, isn->is_v);
+ }
+
+ fr->fr_ref = 0;
+ fr->fr_dsize = 0;
+ fr->fr_data = NULL;
+
+ fr_resolvedest(&fr->fr_tif, fr->fr_v);
+ fr_resolvedest(&fr->fr_dif, fr->fr_v);
+
+ /*
+ * send a copy back to userland of what we ended up
+ * to allow for verification.
+ */
+ error = fr_outobj(data, &ips, IPFOBJ_STATESAVE);
+ if (error) {
+ KFREE(isn);
+ MUTEX_DESTROY(&fr->fr_lock);
+ KFREE(fr);
+ return EFAULT;
+ }
+ READ_ENTER(&ipf_state);
+ fr_stinsert(isn, 0);
+ MUTEX_EXIT(&isn->is_lock);
+ RWLOCK_EXIT(&ipf_state);
+
+ } else {
+ READ_ENTER(&ipf_state);
+ for (is = ips_list; is; is = is->is_next)
+ if (is->is_rule == fr) {
+ fr_stinsert(isn, 0);
+ MUTEX_EXIT(&isn->is_lock);
+ break;
+ }
+
+ if (is == NULL) {
+ KFREE(isn);
+ isn = NULL;
+ }
+ RWLOCK_EXIT(&ipf_state);
+
+ return (isn == NULL) ? ESRCH : 0;
+ }
+
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_stinsert */
+/* Returns: Nil */
+/* Parameters: is(I) - pointer to state structure */
+/* rev(I) - flag indicating forward/reverse direction of packet */
+/* */
+/* Inserts a state structure into the hash table (for lookups) and the list */
+/* of state entries (for enumeration). Resolves all of the interface names */
+/* to pointers and adjusts running stats for the hash table as appropriate. */
+/* */
+/* Locking: it is assumed that some kind of lock on ipf_state is held. */
+/* Exits with is_lock initialised and held. */
+/* ------------------------------------------------------------------------ */
+void fr_stinsert(is, rev)
+ipstate_t *is;
+int rev;
+{
+ frentry_t *fr;
+ u_int hv;
+ int i;
+
+ MUTEX_INIT(&is->is_lock, "ipf state entry");
+
+ fr = is->is_rule;
+ if (fr != NULL) {
+ MUTEX_ENTER(&fr->fr_lock);
+ fr->fr_ref++;
+ fr->fr_statecnt++;
+ MUTEX_EXIT(&fr->fr_lock);
+ }
+
+ /*
+ * Look up all the interface names in the state entry.
+ */
+ for (i = 0; i < 4; i++) {
+ if (is->is_ifp[i] != NULL)
+ continue;
+ is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], is->is_v);
+ }
+
+ /*
+ * If we could trust is_hv, then the modulous would not be needed, but
+ * when running with IPFILTER_SYNC, this stops bad values.
+ */
+ hv = is->is_hv % fr_statesize;
+ is->is_hv = hv;
+
+ /*
+ * We need to get both of these locks...the first because it is
+ * possible that once the insert is complete another packet might
+ * come along, match the entry and want to update it.
+ */
+ MUTEX_ENTER(&is->is_lock);
+ MUTEX_ENTER(&ipf_stinsert);
+
+ /*
+ * add into list table.
+ */
+ if (ips_list != NULL)
+ ips_list->is_pnext = &is->is_next;
+ is->is_pnext = &ips_list;
+ is->is_next = ips_list;
+ ips_list = is;
+
+ if (ips_table[hv] != NULL)
+ ips_table[hv]->is_phnext = &is->is_hnext;
+ else
+ ips_stats.iss_inuse++;
+ is->is_phnext = ips_table + hv;
+ is->is_hnext = ips_table[hv];
+ ips_table[hv] = is;
+ ips_stats.iss_bucketlen[hv]++;
+ ips_num++;
+ MUTEX_EXIT(&ipf_stinsert);
+
+ fr_setstatequeue(is, rev);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_addstate */
+/* Returns: ipstate_t* - NULL == failure, else pointer to new state */
+/* Parameters: fin(I) - pointer to packet information */
+/* stsave(O) - pointer to place to save pointer to created */
+/* state structure. */
+/* flags(I) - flags to use when creating the structure */
+/* */
+/* Creates a new IP state structure from the packet information collected. */
+/* Inserts it into the state table and appends to the bottom of the active */
+/* list. If the capacity of the table has reached the maximum allowed then */
+/* the call will fail and a flush is scheduled for the next timeout call. */
+/* ------------------------------------------------------------------------ */
+ipstate_t *fr_addstate(fin, stsave, flags)
+fr_info_t *fin;
+ipstate_t **stsave;
+u_int flags;
+{
+ ipstate_t *is, ips;
+ struct icmp *ic;
+ u_int pass, hv;
+ frentry_t *fr;
+ tcphdr_t *tcp;
+ grehdr_t *gre;
+ void *ifp;
+ int out;
+
+ if (fr_state_lock ||
+ (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD)))
+ return NULL;
+
+ if ((fin->fin_flx & FI_OOW) && !(fin->fin_tcpf & TH_SYN))
+ return NULL;
+
+ fr = fin->fin_fr;
+ if ((fr->fr_statemax == 0) && (ips_num == fr_statemax)) {
+ ATOMIC_INCL(ips_stats.iss_max);
+ fr_state_doflush = 1;
+ return NULL;
+ }
+
+ /*
+ * If a "keep state" rule has reached the maximum number of references
+ * to it, then schedule an automatic flush in case we can clear out
+ * some "dead old wood".
+ */
+ if ((fr != NULL) && (fr->fr_statemax != 0) &&
+ (fr->fr_statecnt >= fr->fr_statemax)) {
+ MUTEX_EXIT(&fr->fr_lock);
+ ATOMIC_INCL(ips_stats.iss_maxref);
+ fr_state_doflush = 1;
+ return NULL;
+ }
+
+ pass = (fr == NULL) ? 0 : fr->fr_flags;
+
+ ic = NULL;
+ tcp = NULL;
+ out = fin->fin_out;
+ is = &ips;
+ bzero((char *)is, sizeof(*is));
+ is->is_die = 1 + fr_ticks;
+
+ /*
+ * Copy and calculate...
+ */
+ hv = (is->is_p = fin->fin_fi.fi_p);
+ is->is_src = fin->fin_fi.fi_src;
+ hv += is->is_saddr;
+ is->is_dst = fin->fin_fi.fi_dst;
+ hv += is->is_daddr;
+#ifdef USE_INET6
+ if (fin->fin_v == 6) {
+ /*
+ * For ICMPv6, we check to see if the destination address is
+ * a multicast address. If it is, do not include it in the
+ * calculation of the hash because the correct reply will come
+ * back from a real address, not a multicast address.
+ */
+ if ((is->is_p == IPPROTO_ICMPV6) &&
+ IN6_IS_ADDR_MULTICAST(&is->is_dst.in6)) {
+ /*
+ * So you can do keep state with neighbour discovery.
+ *
+ * Here we could use the address from the neighbour
+ * solicit message to put in the state structure and
+ * we could use that without a wildcard flag too...
+ */
+ is->is_flags |= SI_W_DADDR;
+ hv -= is->is_daddr;
+ } else {
+ hv += is->is_dst.i6[1];
+ hv += is->is_dst.i6[2];
+ hv += is->is_dst.i6[3];
+ }
+ hv += is->is_src.i6[1];
+ hv += is->is_src.i6[2];
+ hv += is->is_src.i6[3];
+ }
+#endif
+
+ switch (is->is_p)
+ {
+#ifdef USE_INET6
+ case IPPROTO_ICMPV6 :
+ ic = fin->fin_dp;
+
+ switch (ic->icmp_type)
+ {
+ case ICMP6_ECHO_REQUEST :
+ is->is_icmp.ici_type = ic->icmp_type;
+ hv += (is->is_icmp.ici_id = ic->icmp_id);
+ break;
+ case ICMP6_MEMBERSHIP_QUERY :
+ case ND_ROUTER_SOLICIT :
+ case ND_NEIGHBOR_SOLICIT :
+ case ICMP6_NI_QUERY :
+ is->is_icmp.ici_type = ic->icmp_type;
+ break;
+ default :
+ return NULL;
+ }
+ ATOMIC_INCL(ips_stats.iss_icmp);
+ break;
+#endif
+ case IPPROTO_ICMP :
+ ic = fin->fin_dp;
+
+ switch (ic->icmp_type)
+ {
+ case ICMP_ECHO :
+ case ICMP_TSTAMP :
+ case ICMP_IREQ :
+ case ICMP_MASKREQ :
+ is->is_icmp.ici_type = ic->icmp_type;
+ hv += (is->is_icmp.ici_id = ic->icmp_id);
+ break;
+ default :
+ return NULL;
+ }
+ ATOMIC_INCL(ips_stats.iss_icmp);
+ break;
+
+ case IPPROTO_GRE :
+ gre = fin->fin_dp;
+
+ is->is_gre.gs_flags = gre->gr_flags;
+ is->is_gre.gs_ptype = gre->gr_ptype;
+ if (GRE_REV(is->is_gre.gs_flags) == 1) {
+ is->is_call[0] = fin->fin_data[0];
+ is->is_call[1] = fin->fin_data[1];
+ }
+ break;
+
+ case IPPROTO_TCP :
+ tcp = fin->fin_dp;
+
+ if (tcp->th_flags & TH_RST)
+ return NULL;
+ /*
+ * The endian of the ports doesn't matter, but the ack and
+ * sequence numbers do as we do mathematics on them later.
+ */
+ is->is_sport = htons(fin->fin_data[0]);
+ is->is_dport = htons(fin->fin_data[1]);
+ if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) {
+ hv += is->is_sport;
+ hv += is->is_dport;
+ }
+
+ /*
+ * If this is a real packet then initialise fields in the
+ * state information structure from the TCP header information.
+ */
+
+ is->is_maxdwin = 1;
+ is->is_maxswin = ntohs(tcp->th_win);
+ if (is->is_maxswin == 0)
+ is->is_maxswin = 1;
+
+ if ((fin->fin_flx & FI_IGNORE) == 0) {
+ is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen -
+ (TCP_OFF(tcp) << 2) +
+ ((tcp->th_flags & TH_SYN) ? 1 : 0) +
+ ((tcp->th_flags & TH_FIN) ? 1 : 0);
+ is->is_maxsend = is->is_send;
+
+ /*
+ * Window scale option is only present in
+ * SYN/SYN-ACK packet.
+ */
+ if ((tcp->th_flags & ~(TH_FIN|TH_ACK|TH_ECNALL)) ==
+ TH_SYN &&
+ (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) {
+ if (fr_tcpoptions(fin, tcp,
+ &is->is_tcp.ts_data[0]))
+ is->is_swinflags = TCP_WSCALE_SEEN|
+ TCP_WSCALE_FIRST;
+ }
+
+ if ((fin->fin_out != 0) && (pass & FR_NEWISN) != 0) {
+ fr_checknewisn(fin, is);
+ fr_fixoutisn(fin, is);
+ }
+
+ if ((tcp->th_flags & TH_OPENING) == TH_SYN)
+ flags |= IS_TCPFSM;
+ else {
+ is->is_maxdwin = is->is_maxswin * 2;
+ is->is_dend = ntohl(tcp->th_ack);
+ is->is_maxdend = ntohl(tcp->th_ack);
+ is->is_maxdwin *= 2;
+ }
+ }
+
+ /*
+ * If we're creating state for a starting connection, start the
+ * timer on it as we'll never see an error if it fails to
+ * connect.
+ */
+ ATOMIC_INCL(ips_stats.iss_tcp);
+ break;
+
+ case IPPROTO_UDP :
+ tcp = fin->fin_dp;
+
+ is->is_sport = htons(fin->fin_data[0]);
+ is->is_dport = htons(fin->fin_data[1]);
+ if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) {
+ hv += tcp->th_dport;
+ hv += tcp->th_sport;
+ }
+ ATOMIC_INCL(ips_stats.iss_udp);
+ break;
+
+ default :
+ break;
+ }
+ hv = DOUBLE_HASH(hv);
+ is->is_hv = hv;
+ is->is_rule = fr;
+ is->is_flags = flags & IS_INHERITED;
+
+ /*
+ * Look for identical state.
+ */
+ for (is = ips_table[is->is_hv % fr_statesize]; is != NULL;
+ is = is->is_hnext) {
+ if (bcmp(&ips.is_src, &is->is_src,
+ offsetof(struct ipstate, is_ps) -
+ offsetof(struct ipstate, is_src)) == 0)
+ break;
+ }
+ if (is != NULL)
+ return NULL;
+
+ if (ips_stats.iss_bucketlen[hv] >= fr_state_maxbucket) {
+ ATOMIC_INCL(ips_stats.iss_bucketfull);
+ return NULL;
+ }
+ KMALLOC(is, ipstate_t *);
+ if (is == NULL) {
+ ATOMIC_INCL(ips_stats.iss_nomem);
+ return NULL;
+ }
+ bcopy((char *)&ips, (char *)is, sizeof(*is));
+ /*
+ * Do not do the modulous here, it is done in fr_stinsert().
+ */
+ if (fr != NULL) {
+ (void) strncpy(is->is_group, fr->fr_group, FR_GROUPLEN);
+ if (fr->fr_age[0] != 0) {
+ is->is_tqehead[0] = fr_addtimeoutqueue(&ips_utqe,
+ fr->fr_age[0]);
+ is->is_sti.tqe_flags |= TQE_RULEBASED;
+ }
+ if (fr->fr_age[1] != 0) {
+ is->is_tqehead[1] = fr_addtimeoutqueue(&ips_utqe,
+ fr->fr_age[1]);
+ is->is_sti.tqe_flags |= TQE_RULEBASED;
+ }
+
+ is->is_tag = fr->fr_logtag;
+
+ is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1];
+ is->is_ifp[(1 - out) << 1] = fr->fr_ifas[2];
+ is->is_ifp[((1 - out) << 1) + 1] = fr->fr_ifas[3];
+
+ if (((ifp = fr->fr_ifas[1]) != NULL) &&
+ (ifp != (void *)-1)) {
+ COPYIFNAME(ifp, is->is_ifname[(out << 1) + 1]);
+ }
+ if (((ifp = fr->fr_ifas[2]) != NULL) &&
+ (ifp != (void *)-1)) {
+ COPYIFNAME(ifp, is->is_ifname[(1 - out) << 1]);
+ }
+ if (((ifp = fr->fr_ifas[3]) != NULL) &&
+ (ifp != (void *)-1)) {
+ COPYIFNAME(ifp, is->is_ifname[((1 - out) << 1) + 1]);
+ }
+ } else {
+ pass = fr_flags;
+ is->is_tag = FR_NOLOGTAG;
+ }
+
+ is->is_ifp[out << 1] = fin->fin_ifp;
+ if (fin->fin_ifp != NULL) {
+ COPYIFNAME(fin->fin_ifp, is->is_ifname[out << 1]);
+ }
+
+ /*
+ * It may seem strange to set is_ref to 2, but fr_check() will call
+ * fr_statederef() after calling fr_addstate() and the idea is to
+ * have it exist at the end of fr_check() with is_ref == 1.
+ */
+ is->is_ref = 2;
+ is->is_pass = pass;
+ is->is_pkts[0] = 0, is->is_bytes[0] = 0;
+ is->is_pkts[1] = 0, is->is_bytes[1] = 0;
+ is->is_pkts[2] = 0, is->is_bytes[2] = 0;
+ is->is_pkts[3] = 0, is->is_bytes[3] = 0;
+ if ((fin->fin_flx & FI_IGNORE) == 0) {
+ is->is_pkts[out] = 1;
+ is->is_bytes[out] = fin->fin_plen;
+ is->is_flx[out][0] = fin->fin_flx & FI_CMP;
+ is->is_flx[out][0] &= ~FI_OOW;
+ }
+
+ if (pass & FR_STSTRICT)
+ is->is_flags |= IS_STRICT;
+
+ if (pass & FR_STATESYNC)
+ is->is_flags |= IS_STATESYNC;
+
+ /*
+ * We want to check everything that is a property of this packet,
+ * but we don't (automatically) care about it's fragment status as
+ * this may change.
+ */
+ is->is_v = fin->fin_v;
+ is->is_opt[0] = fin->fin_optmsk;
+ is->is_optmsk[0] = 0xffffffff;
+ is->is_optmsk[1] = 0xffffffff;
+ if (is->is_v == 6) {
+ is->is_opt[0] &= ~0x8;
+ is->is_optmsk[0] &= ~0x8;
+ is->is_optmsk[1] &= ~0x8;
+ }
+ is->is_sec = fin->fin_secmsk;
+ is->is_secmsk = 0xffff;
+ is->is_auth = fin->fin_auth;
+ is->is_authmsk = 0xffff;
+ if (flags & (SI_WILDP|SI_WILDA)) {
+ ATOMIC_INCL(ips_stats.iss_wild);
+ }
+ is->is_rulen = fin->fin_rule;
+
+
+ if (pass & FR_LOGFIRST)
+ is->is_pass &= ~(FR_LOGFIRST|FR_LOG);
+
+ READ_ENTER(&ipf_state);
+ is->is_me = stsave;
+
+ fr_stinsert(is, fin->fin_rev);
+
+ if (fin->fin_p == IPPROTO_TCP) {
+ /*
+ * If we're creating state for a starting connection, start the
+ * timer on it as we'll never see an error if it fails to
+ * connect.
+ */
+ (void) fr_tcp_age(&is->is_sti, fin, ips_tqtqb, is->is_flags);
+ MUTEX_EXIT(&is->is_lock);
+#ifdef IPFILTER_SCAN
+ if ((is->is_flags & SI_CLONE) == 0)
+ (void) ipsc_attachis(is);
+#endif
+ } else {
+ MUTEX_EXIT(&is->is_lock);
+ }
+#ifdef IPFILTER_SYNC
+ if ((is->is_flags & IS_STATESYNC) && ((is->is_flags & SI_CLONE) == 0))
+ is->is_sync = ipfsync_new(SMC_STATE, fin, is);
+#endif
+ if (ipstate_logging)
+ ipstate_log(is, ISL_NEW);
+
+ RWLOCK_EXIT(&ipf_state);
+ fin->fin_state = is;
+ fin->fin_rev = IP6_NEQ(&is->is_dst, &fin->fin_daddr);
+ fin->fin_flx |= FI_STATE;
+ if (fin->fin_flx & FI_FRAG)
+ (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE);
+
+ return is;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_tcpoptions */
+/* Returns: int - 1 == packet matches state entry, 0 == it does not */
+/* Parameters: fin(I) - pointer to packet information */
+/* tcp(I) - pointer to TCP packet header */
+/* td(I) - pointer to TCP data held as part of the state */
+/* */
+/* Look after the TCP header for any options and deal with those that are */
+/* present. Record details about those that we recogise. */
+/* ------------------------------------------------------------------------ */
+static int fr_tcpoptions(fin, tcp, td)
+fr_info_t *fin;
+tcphdr_t *tcp;
+tcpdata_t *td;
+{
+ int off, mlen, ol, i, len, retval;
+ char buf[64], *s, opt;
+ mb_t *m = NULL;
+
+ len = (TCP_OFF(tcp) << 2);
+ if (fin->fin_dlen < len)
+ return 0;
+ len -= sizeof(*tcp);
+
+ off = fin->fin_plen - fin->fin_dlen + sizeof(*tcp) + fin->fin_ipoff;
+
+ m = fin->fin_m;
+ mlen = MSGDSIZE(m) - off;
+ if (len > mlen) {
+ len = mlen;
+ retval = 0;
+ } else {
+ retval = 1;
+ }
+
+ COPYDATA(m, off, len, buf);
+
+ for (s = buf; len > 0; ) {
+ opt = *s;
+ if (opt == TCPOPT_EOL)
+ break;
+ else if (opt == TCPOPT_NOP)
+ ol = 1;
+ else {
+ if (len < 2)
+ break;
+ ol = (int)*(s + 1);
+ if (ol < 2 || ol > len)
+ break;
+
+ /*
+ * Extract the TCP options we are interested in out of
+ * the header and store them in the the tcpdata struct.
+ */
+ switch (opt)
+ {
+ case TCPOPT_WINDOW :
+ if (ol == TCPOLEN_WINDOW) {
+ i = (int)*(s + 2);
+ if (i > TCP_WSCALE_MAX)
+ i = TCP_WSCALE_MAX;
+ else if (i < 0)
+ i = 0;
+ td->td_winscale = i;
+ }
+ break;
+ case TCPOPT_MAXSEG :
+ /*
+ * So, if we wanted to set the TCP MAXSEG,
+ * it should be done here...
+ */
+ if (ol == TCPOLEN_MAXSEG) {
+ i = (int)*(s + 2);
+ i <<= 8;
+ i += (int)*(s + 3);
+ td->td_maxseg = i;
+ }
+ break;
+ }
+ }
+ len -= ol;
+ s += ol;
+ }
+ return retval;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_tcpstate */
+/* Returns: int - 1 == packet matches state entry, 0 == it does not */
+/* Parameters: fin(I) - pointer to packet information */
+/* tcp(I) - pointer to TCP packet header */
+/* is(I) - pointer to master state structure */
+/* */
+/* Check to see if a packet with TCP headers fits within the TCP window. */
+/* Change timeout depending on whether new packet is a SYN-ACK returning */
+/* for a SYN or a RST or FIN which indicate time to close up shop. */
+/* ------------------------------------------------------------------------ */
+static int fr_tcpstate(fin, tcp, is)
+fr_info_t *fin;
+tcphdr_t *tcp;
+ipstate_t *is;
+{
+ int source, ret = 0, flags;
+ tcpdata_t *fdata, *tdata;
+
+ source = !fin->fin_rev;
+ if (((is->is_flags & IS_TCPFSM) != 0) && (source == 1) &&
+ (ntohs(is->is_sport) != fin->fin_data[0]))
+ source = 0;
+ fdata = &is->is_tcp.ts_data[!source];
+ tdata = &is->is_tcp.ts_data[source];
+
+ MUTEX_ENTER(&is->is_lock);
+ if (fr_tcpinwindow(fin, fdata, tdata, tcp, is->is_flags)) {
+#ifdef IPFILTER_SCAN
+ if (is->is_flags & (IS_SC_CLIENT|IS_SC_SERVER)) {
+ ipsc_packet(fin, is);
+ if (FR_ISBLOCK(is->is_pass)) {
+ MUTEX_EXIT(&is->is_lock);
+ return 1;
+ }
+ }
+#endif
+
+ /*
+ * Nearing end of connection, start timeout.
+ */
+ ret = fr_tcp_age(&is->is_sti, fin, ips_tqtqb, is->is_flags);
+ if (ret == 0) {
+ MUTEX_EXIT(&is->is_lock);
+ return 0;
+ }
+
+ /*
+ * set s0's as appropriate. Use syn-ack packet as it
+ * contains both pieces of required information.
+ */
+ /*
+ * Window scale option is only present in SYN/SYN-ACK packet.
+ * Compare with ~TH_FIN to mask out T/TCP setups.
+ */
+ flags = tcp->th_flags & ~(TH_FIN|TH_ECNALL);
+ if (flags == (TH_SYN|TH_ACK)) {
+ is->is_s0[source] = ntohl(tcp->th_ack);
+ is->is_s0[!source] = ntohl(tcp->th_seq) + 1;
+ if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2)) &&
+ tdata->td_winscale) {
+ if (fr_tcpoptions(fin, tcp, fdata)) {
+ fdata->td_winflags = TCP_WSCALE_SEEN|
+ TCP_WSCALE_FIRST;
+ } else {
+ if (!fdata->td_winscale)
+ tdata->td_winscale = 0;
+ }
+ }
+ if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN))
+ fr_checknewisn(fin, is);
+ } else if (flags == TH_SYN) {
+ is->is_s0[source] = ntohl(tcp->th_seq) + 1;
+ if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2)))
+ if (fr_tcpoptions(fin, tcp, tdata)) {
+ tdata->td_winflags = TCP_WSCALE_SEEN|
+ TCP_WSCALE_FIRST;
+ }
+
+ if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN))
+ fr_checknewisn(fin, is);
+
+ }
+ ret = 1;
+ } else
+ fin->fin_flx |= FI_OOW;
+ MUTEX_EXIT(&is->is_lock);
+ return ret;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_checknewisn */
+/* Returns: Nil */
+/* Parameters: fin(I) - pointer to packet information */
+/* is(I) - pointer to master state structure */
+/* */
+/* Check to see if this TCP connection is expecting and needs a new */
+/* sequence number for a particular direction of the connection. */
+/* */
+/* NOTE: This does not actually change the sequence numbers, only gets new */
+/* one ready. */
+/* ------------------------------------------------------------------------ */
+static void fr_checknewisn(fin, is)
+fr_info_t *fin;
+ipstate_t *is;
+{
+ u_32_t sumd, old, new;
+ tcphdr_t *tcp;
+ int i;
+
+ i = fin->fin_rev;
+ tcp = fin->fin_dp;
+
+ if (((i == 0) && !(is->is_flags & IS_ISNSYN)) ||
+ ((i == 1) && !(is->is_flags & IS_ISNACK))) {
+ old = ntohl(tcp->th_seq);
+ new = fr_newisn(fin);
+ is->is_isninc[i] = new - old;
+ CALC_SUMD(old, new, sumd);
+ is->is_sumd[i] = (sumd & 0xffff) + (sumd >> 16);
+
+ is->is_flags |= ((i == 0) ? IS_ISNSYN : IS_ISNACK);
+ }
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_tcpinwindow */
+/* Returns: int - 1 == packet inside TCP "window", 0 == not inside. */
+/* Parameters: fin(I) - pointer to packet information */
+/* fdata(I) - pointer to tcp state informatio (forward) */
+/* tdata(I) - pointer to tcp state informatio (reverse) */
+/* tcp(I) - pointer to TCP packet header */
+/* */
+/* Given a packet has matched addresses and ports, check to see if it is */
+/* within the TCP data window. In a show of generosity, allow packets that */
+/* are within the window space behind the current sequence # as well. */
+/* ------------------------------------------------------------------------ */
+int fr_tcpinwindow(fin, fdata, tdata, tcp, flags)
+fr_info_t *fin;
+tcpdata_t *fdata, *tdata;
+tcphdr_t *tcp;
+int flags;
+{
+ tcp_seq seq, ack, end;
+ int ackskew, tcpflags;
+ u_32_t win, maxwin;
+
+ /*
+ * Find difference between last checked packet and this packet.
+ */
+ tcpflags = tcp->th_flags;
+ seq = ntohl(tcp->th_seq);
+ ack = ntohl(tcp->th_ack);
+ if (tcpflags & TH_SYN)
+ win = ntohs(tcp->th_win);
+ else
+ win = ntohs(tcp->th_win) << fdata->td_winscale;
+ if (win == 0)
+ win = 1;
+
+ /*
+ * if window scaling is present, the scaling is only allowed
+ * for windows not in the first SYN packet. In that packet the
+ * window is 65535 to specify the largest window possible
+ * for receivers not implementing the window scale option.
+ * Currently, we do not assume TTCP here. That means that
+ * if we see a second packet from a host (after the initial
+ * SYN), we can assume that the receiver of the SYN did
+ * already send back the SYN/ACK (and thus that we know if
+ * the receiver also does window scaling)
+ */
+ if (!(tcpflags & TH_SYN) && (fdata->td_winflags & TCP_WSCALE_FIRST)) {
+ if (tdata->td_winflags & TCP_WSCALE_SEEN) {
+ fdata->td_winflags &= ~TCP_WSCALE_FIRST;
+ fdata->td_maxwin = win;
+ } else {
+ fdata->td_winscale = 0;
+ fdata->td_winflags = 0;
+ tdata->td_winscale = 0;
+ tdata->td_winflags = 0;
+ }
+ }
+
+ end = seq + fin->fin_dlen - (TCP_OFF(tcp) << 2) +
+ ((tcpflags & TH_SYN) ? 1 : 0) + ((tcpflags & TH_FIN) ? 1 : 0);
+
+ if ((fdata->td_end == 0) &&
+ (!(flags & IS_TCPFSM) ||
+ ((tcpflags & TH_OPENING) == TH_OPENING))) {
+ /*
+ * Must be a (outgoing) SYN-ACK in reply to a SYN.
+ */
+ fdata->td_end = end;
+ fdata->td_maxwin = 1;
+ fdata->td_maxend = end + win;
+ }
+
+ if (!(tcpflags & TH_ACK)) { /* Pretend an ack was sent */
+ ack = tdata->td_end;
+ } else if (((tcpflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) &&
+ (ack == 0)) {
+ /* gross hack to get around certain broken tcp stacks */
+ ack = tdata->td_end;
+ }
+
+ if (seq == end)
+ seq = end = fdata->td_end;
+
+ maxwin = tdata->td_maxwin;
+ ackskew = tdata->td_end - ack;
+
+ /*
+ * Strict sequencing only allows in-order delivery.
+ */
+ if ((flags & IS_STRICT) != 0) {
+ if (seq != fdata->td_end) {
+ return 0;
+ }
+ }
+
+#define SEQ_GE(a,b) ((int)((a) - (b)) >= 0)
+#define SEQ_GT(a,b) ((int)((a) - (b)) > 0)
+ if (
+#if defined(_KERNEL)
+ (SEQ_GE(fdata->td_maxend, end)) &&
+ (SEQ_GE(seq, fdata->td_end - maxwin)) &&
+#endif
+/* XXX what about big packets */
+#define MAXACKWINDOW 66000
+ (-ackskew <= (MAXACKWINDOW << fdata->td_winscale)) &&
+ ( ackskew <= (MAXACKWINDOW << fdata->td_winscale))) {
+
+ /* if ackskew < 0 then this should be due to fragmented
+ * packets. There is no way to know the length of the
+ * total packet in advance.
+ * We do know the total length from the fragment cache though.
+ * Note however that there might be more sessions with
+ * exactly the same source and destination parameters in the
+ * state cache (and source and destination is the only stuff
+ * that is saved in the fragment cache). Note further that
+ * some TCP connections in the state cache are hashed with
+ * sport and dport as well which makes it not worthwhile to
+ * look for them.
+ * Thus, when ackskew is negative but still seems to belong
+ * to this session, we bump up the destinations end value.
+ */
+ if (ackskew < 0)
+ tdata->td_end = ack;
+
+ /* update max window seen */
+ if (fdata->td_maxwin < win)
+ fdata->td_maxwin = win;
+ if (SEQ_GT(end, fdata->td_end))
+ fdata->td_end = end;
+ if (SEQ_GE(ack + win, tdata->td_maxend))
+ tdata->td_maxend = ack + win;
+ return 1;
+ }
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_stclone */
+/* Returns: ipstate_t* - NULL == cloning failed, */
+/* else pointer to new state structure */
+/* Parameters: fin(I) - pointer to packet information */
+/* tcp(I) - pointer to TCP/UDP header */
+/* is(I) - pointer to master state structure */
+/* */
+/* Create a "duplcate" state table entry from the master. */
+/* ------------------------------------------------------------------------ */
+static ipstate_t *fr_stclone(fin, tcp, is)
+fr_info_t *fin;
+tcphdr_t *tcp;
+ipstate_t *is;
+{
+ ipstate_t *clone;
+ u_32_t send;
+
+ if (ips_num == fr_statemax) {
+ ATOMIC_INCL(ips_stats.iss_max);
+ fr_state_doflush = 1;
+ return NULL;
+ }
+ KMALLOC(clone, ipstate_t *);
+ if (clone == NULL)
+ return NULL;
+ bcopy((char *)is, (char *)clone, sizeof(*clone));
+
+ MUTEX_NUKE(&clone->is_lock);
+
+ clone->is_die = ONE_DAY + fr_ticks;
+ clone->is_state[0] = 0;
+ clone->is_state[1] = 0;
+ send = ntohl(tcp->th_seq) + fin->fin_dlen - (TCP_OFF(tcp) << 2) +
+ ((tcp->th_flags & TH_SYN) ? 1 : 0) +
+ ((tcp->th_flags & TH_FIN) ? 1 : 0);
+
+ if (fin->fin_rev == 1) {
+ clone->is_dend = send;
+ clone->is_maxdend = send;
+ clone->is_send = 0;
+ clone->is_maxswin = 1;
+ clone->is_maxdwin = ntohs(tcp->th_win);
+ if (clone->is_maxdwin == 0)
+ clone->is_maxdwin = 1;
+ } else {
+ clone->is_send = send;
+ clone->is_maxsend = send;
+ clone->is_dend = 0;
+ clone->is_maxdwin = 1;
+ clone->is_maxswin = ntohs(tcp->th_win);
+ if (clone->is_maxswin == 0)
+ clone->is_maxswin = 1;
+ }
+
+ clone->is_flags &= ~SI_CLONE;
+ clone->is_flags |= SI_CLONED;
+ fr_stinsert(clone, fin->fin_rev);
+ clone->is_ref = 2;
+ if (clone->is_p == IPPROTO_TCP) {
+ (void) fr_tcp_age(&clone->is_sti, fin, ips_tqtqb,
+ clone->is_flags);
+ }
+ MUTEX_EXIT(&clone->is_lock);
+#ifdef IPFILTER_SCAN
+ (void) ipsc_attachis(is);
+#endif
+#ifdef IPFILTER_SYNC
+ if (is->is_flags & IS_STATESYNC)
+ clone->is_sync = ipfsync_new(SMC_STATE, fin, clone);
+#endif
+ return clone;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_matchsrcdst */
+/* Returns: Nil */
+/* Parameters: fin(I) - pointer to packet information */
+/* is(I) - pointer to state structure */
+/* src(I) - pointer to source address */
+/* dst(I) - pointer to destination address */
+/* tcp(I) - pointer to TCP/UDP header */
+/* */
+/* Match a state table entry against an IP packet. The logic below is that */
+/* ret gets set to one if the match succeeds, else remains 0. If it is */
+/* still 0 after the test. no match. */
+/* ------------------------------------------------------------------------ */
+static ipstate_t *fr_matchsrcdst(fin, is, src, dst, tcp, cmask)
+fr_info_t *fin;
+ipstate_t *is;
+i6addr_t *src, *dst;
+tcphdr_t *tcp;
+u_32_t cmask;
+{
+ int ret = 0, rev, out, flags, flx = 0, idx;
+ u_short sp, dp;
+ u_32_t cflx;
+ void *ifp;
+
+ rev = IP6_NEQ(&is->is_dst, dst);
+ ifp = fin->fin_ifp;
+ out = fin->fin_out;
+ flags = is->is_flags;
+ sp = 0;
+ dp = 0;
+
+ if (tcp != NULL) {
+ sp = htons(fin->fin_sport);
+ dp = ntohs(fin->fin_dport);
+ }
+ if (!rev) {
+ if (tcp != NULL) {
+ if (!(flags & SI_W_SPORT) && (sp != is->is_sport))
+ rev = 1;
+ else if (!(flags & SI_W_DPORT) && (dp != is->is_dport))
+ rev = 1;
+ }
+ }
+
+ idx = (out << 1) + rev;
+
+ /*
+ * If the interface for this 'direction' is set, make sure it matches.
+ * An interface name that is not set matches any, as does a name of *.
+ */
+ if ((is->is_ifp[idx] == NULL &&
+ (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) ||
+ is->is_ifp[idx] == ifp)
+ ret = 1;
+
+ if (ret == 0)
+ return NULL;
+ ret = 0;
+
+ /*
+ * Match addresses and ports.
+ */
+ if (rev == 0) {
+ if ((IP6_EQ(&is->is_dst, dst) || (flags & SI_W_DADDR)) &&
+ (IP6_EQ(&is->is_src, src) || (flags & SI_W_SADDR))) {
+ if (tcp) {
+ if ((sp == is->is_sport || flags & SI_W_SPORT)&&
+ (dp == is->is_dport || flags & SI_W_DPORT))
+ ret = 1;
+ } else {
+ ret = 1;
+ }
+ }
+ } else {
+ if ((IP6_EQ(&is->is_dst, src) || (flags & SI_W_DADDR)) &&
+ (IP6_EQ(&is->is_src, dst) || (flags & SI_W_SADDR))) {
+ if (tcp) {
+ if ((dp == is->is_sport || flags & SI_W_SPORT)&&
+ (sp == is->is_dport || flags & SI_W_DPORT))
+ ret = 1;
+ } else {
+ ret = 1;
+ }
+ }
+ }
+
+ if (ret == 0)
+ return NULL;
+
+ /*
+ * Whether or not this should be here, is questionable, but the aim
+ * is to get this out of the main line.
+ */
+ if (tcp == NULL)
+ flags = is->is_flags & ~(SI_WILDP|SI_NEWFR|SI_CLONE|SI_CLONED);
+
+ /*
+ * Only one of the source or destination address can be flaged as a
+ * wildcard. Fill in the missing address, if set.
+ * For IPv6, if the address being copied in is multicast, then
+ * don't reset the wild flag - multicast causes it to be set in the
+ * first place!
+ */
+ if ((flags & (SI_W_SADDR|SI_W_DADDR))) {
+ fr_ip_t *fi = &fin->fin_fi;
+
+ if ((flags & SI_W_SADDR) != 0) {
+ if (rev == 0) {
+#ifdef USE_INET6
+ if (is->is_v == 6 &&
+ IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6))
+ /*EMPTY*/;
+ else
+#endif
+ {
+ is->is_src = fi->fi_src;
+ is->is_flags &= ~SI_W_SADDR;
+ }
+ } else {
+#ifdef USE_INET6
+ if (is->is_v == 6 &&
+ IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6))
+ /*EMPTY*/;
+ else
+#endif
+ {
+ is->is_src = fi->fi_dst;
+ is->is_flags &= ~SI_W_SADDR;
+ }
+ }
+ } else if ((flags & SI_W_DADDR) != 0) {
+ if (rev == 0) {
+#ifdef USE_INET6
+ if (is->is_v == 6 &&
+ IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6))
+ /*EMPTY*/;
+ else
+#endif
+ {
+ is->is_dst = fi->fi_dst;
+ is->is_flags &= ~SI_W_DADDR;
+ }
+ } else {
+#ifdef USE_INET6
+ if (is->is_v == 6 &&
+ IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6))
+ /*EMPTY*/;
+ else
+#endif
+ {
+ is->is_dst = fi->fi_src;
+ is->is_flags &= ~SI_W_DADDR;
+ }
+ }
+ }
+ if ((is->is_flags & (SI_WILDA|SI_WILDP)) == 0) {
+ ATOMIC_DECL(ips_stats.iss_wild);
+ }
+ }
+
+ flx = fin->fin_flx & cmask;
+ cflx = is->is_flx[out][rev];
+
+ /*
+ * Match up any flags set from IP options.
+ */
+ if ((cflx && (flx != (cflx & cmask))) ||
+ ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]) ||
+ ((fin->fin_secmsk & is->is_secmsk) != is->is_sec) ||
+ ((fin->fin_auth & is->is_authmsk) != is->is_auth))
+ return NULL;
+
+ /*
+ * Only one of the source or destination port can be flagged as a
+ * wildcard. When filling it in, fill in a copy of the matched entry
+ * if it has the cloning flag set.
+ */
+ if ((fin->fin_flx & FI_IGNORE) != 0) {
+ fin->fin_rev = rev;
+ return is;
+ }
+
+ if ((flags & (SI_W_SPORT|SI_W_DPORT))) {
+ if ((flags & SI_CLONE) != 0) {
+ ipstate_t *clone;
+
+ clone = fr_stclone(fin, tcp, is);
+ if (clone == NULL)
+ return NULL;
+ is = clone;
+ } else {
+ ATOMIC_DECL(ips_stats.iss_wild);
+ }
+
+ if ((flags & SI_W_SPORT) != 0) {
+ if (rev == 0) {
+ is->is_sport = sp;
+ is->is_send = ntohl(tcp->th_seq);
+ } else {
+ is->is_sport = dp;
+ is->is_send = ntohl(tcp->th_ack);
+ }
+ is->is_maxsend = is->is_send + 1;
+ } else if ((flags & SI_W_DPORT) != 0) {
+ if (rev == 0) {
+ is->is_dport = dp;
+ is->is_dend = ntohl(tcp->th_ack);
+ } else {
+ is->is_dport = sp;
+ is->is_dend = ntohl(tcp->th_seq);
+ }
+ is->is_maxdend = is->is_dend + 1;
+ }
+ is->is_flags &= ~(SI_W_SPORT|SI_W_DPORT);
+ if ((flags & SI_CLONED) && ipstate_logging)
+ ipstate_log(is, ISL_CLONE);
+ }
+
+ ret = -1;
+
+ if (is->is_flx[out][rev] == 0) {
+ is->is_flx[out][rev] = flx;
+ is->is_opt[rev] = fin->fin_optmsk;
+ if (is->is_v == 6) {
+ is->is_opt[rev] &= ~0x8;
+ is->is_optmsk[rev] &= ~0x8;
+ }
+ }
+
+ /*
+ * Check if the interface name for this "direction" is set and if not,
+ * fill it in.
+ */
+ if (is->is_ifp[idx] == NULL &&
+ (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) {
+ is->is_ifp[idx] = ifp;
+ COPYIFNAME(ifp, is->is_ifname[idx]);
+ }
+ fin->fin_rev = rev;
+ return is;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_checkicmpmatchingstate */
+/* Returns: Nil */
+/* Parameters: fin(I) - pointer to packet information */
+/* */
+/* If we've got an ICMP error message, using the information stored in the */
+/* ICMP packet, look for a matching state table entry. */
+/* */
+/* If we return NULL then no lock on ipf_state is held. */
+/* If we return non-null then a read-lock on ipf_state is held. */
+/* ------------------------------------------------------------------------ */
+static ipstate_t *fr_checkicmpmatchingstate(fin)
+fr_info_t *fin;
+{
+ ipstate_t *is, **isp;
+ u_short sport, dport;
+ u_char pr;
+ int backward, i, oi;
+ i6addr_t dst, src;
+ struct icmp *ic;
+ u_short savelen;
+ icmphdr_t *icmp;
+ fr_info_t ofin;
+ tcphdr_t *tcp;
+ int len;
+ ip_t *oip;
+ u_int hv;
+
+ /*
+ * Does it at least have the return (basic) IP header ?
+ * Is it an actual recognised ICMP error type?
+ * Only a basic IP header (no options) should be with
+ * an ICMP error header.
+ */
+ if ((fin->fin_v != 4) || (fin->fin_hlen != sizeof(ip_t)) ||
+ (fin->fin_plen < ICMPERR_MINPKTLEN) ||
+ !(fin->fin_flx & FI_ICMPERR))
+ return NULL;
+ ic = fin->fin_dp;
+
+ oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN);
+ /*
+ * Check if the at least the old IP header (with options) and
+ * 8 bytes of payload is present.
+ */
+ if (fin->fin_plen < ICMPERR_MAXPKTLEN + ((IP_HL(oip) - 5) << 2))
+ return NULL;
+
+ /*
+ * Sanity Checks.
+ */
+ len = fin->fin_dlen - ICMPERR_ICMPHLEN;
+ if ((len <= 0) || ((IP_HL(oip) << 2) > len))
+ return NULL;
+
+ /*
+ * Is the buffer big enough for all of it ? It's the size of the IP
+ * header claimed in the encapsulated part which is of concern. It
+ * may be too big to be in this buffer but not so big that it's
+ * outside the ICMP packet, leading to TCP deref's causing problems.
+ * This is possible because we don't know how big oip_hl is when we
+ * do the pullup early in fr_check() and thus can't guarantee it is
+ * all here now.
+ */
+#ifdef _KERNEL
+ {
+ mb_t *m;
+
+ m = fin->fin_m;
+# if defined(MENTAT)
+ if ((char *)oip + len > (char *)m->b_wptr)
+ return NULL;
+# else
+ if ((char *)oip + len > (char *)fin->fin_ip + m->m_len)
+ return NULL;
+# endif
+ }
+#endif
+ bcopy((char *)fin, (char *)&ofin, sizeof(fin));
+
+ /*
+ * in the IPv4 case we must zero the i6addr union otherwise
+ * the IP6_EQ and IP6_NEQ macros produce the wrong results because
+ * of the 'junk' in the unused part of the union
+ */
+ bzero((char *)&src, sizeof(src));
+ bzero((char *)&dst, sizeof(dst));
+
+ /*
+ * we make an fin entry to be able to feed it to
+ * matchsrcdst note that not all fields are encessary
+ * but this is the cleanest way. Note further we fill
+ * in fin_mp such that if someone uses it we'll get
+ * a kernel panic. fr_matchsrcdst does not use this.
+ *
+ * watch out here, as ip is in host order and oip in network
+ * order. Any change we make must be undone afterwards, like
+ * oip->ip_off - it is still in network byte order so fix it.
+ */
+ savelen = oip->ip_len;
+ oip->ip_len = len;
+ oip->ip_off = ntohs(oip->ip_off);
+
+ ofin.fin_flx = FI_NOCKSUM;
+ ofin.fin_v = 4;
+ ofin.fin_ip = oip;
+ ofin.fin_m = NULL; /* if dereferenced, panic XXX */
+ ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
+ ofin.fin_plen = fin->fin_dlen - ICMPERR_ICMPHLEN;
+ (void) fr_makefrip(IP_HL(oip) << 2, oip, &ofin);
+ ofin.fin_ifp = fin->fin_ifp;
+ ofin.fin_out = !fin->fin_out;
+ /*
+ * Reset the short and bad flag here because in fr_matchsrcdst()
+ * the flags for the current packet (fin_flx) are compared against
+ * those for the existing session.
+ */
+ ofin.fin_flx &= ~(FI_BAD|FI_SHORT);
+
+ /*
+ * Put old values of ip_len and ip_off back as we don't know
+ * if we have to forward the packet (or process it again.
+ */
+ oip->ip_len = savelen;
+ oip->ip_off = htons(oip->ip_off);
+
+ switch (oip->ip_p)
+ {
+ case IPPROTO_ICMP :
+ /*
+ * an ICMP error can only be generated as a result of an
+ * ICMP query, not as the response on an ICMP error
+ *
+ * XXX theoretically ICMP_ECHOREP and the other reply's are
+ * ICMP query's as well, but adding them here seems strange XXX
+ */
+ if ((ofin.fin_flx & FI_ICMPERR) != 0)
+ return NULL;
+
+ /*
+ * perform a lookup of the ICMP packet in the state table
+ */
+ icmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
+ hv = (pr = oip->ip_p);
+ src.in4 = oip->ip_src;
+ hv += src.in4.s_addr;
+ dst.in4 = oip->ip_dst;
+ hv += dst.in4.s_addr;
+ hv += icmp->icmp_id;
+ hv = DOUBLE_HASH(hv);
+
+ READ_ENTER(&ipf_state);
+ for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) {
+ isp = &is->is_hnext;
+ if ((is->is_p != pr) || (is->is_v != 4))
+ continue;
+ if (is->is_pass & FR_NOICMPERR)
+ continue;
+ is = fr_matchsrcdst(&ofin, is, &src, &dst,
+ NULL, FI_ICMPCMP);
+ if (is != NULL) {
+ if ((is->is_pass & FR_NOICMPERR) != 0) {
+ RWLOCK_EXIT(&ipf_state);
+ return NULL;
+ }
+ /*
+ * i : the index of this packet (the icmp
+ * unreachable)
+ * oi : the index of the original packet found
+ * in the icmp header (i.e. the packet
+ * causing this icmp)
+ * backward : original packet was backward
+ * compared to the state
+ */
+ backward = IP6_NEQ(&is->is_src, &src);
+ fin->fin_rev = !backward;
+ i = (!backward << 1) + fin->fin_out;
+ oi = (backward << 1) + ofin.fin_out;
+ if (is->is_icmppkts[i] > is->is_pkts[oi])
+ continue;
+ ips_stats.iss_hits++;
+ is->is_icmppkts[i]++;
+ return is;
+ }
+ }
+ RWLOCK_EXIT(&ipf_state);
+ return NULL;
+ case IPPROTO_TCP :
+ case IPPROTO_UDP :
+ break;
+ default :
+ return NULL;
+ }
+
+ tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
+ dport = tcp->th_dport;
+ sport = tcp->th_sport;
+
+ hv = (pr = oip->ip_p);
+ src.in4 = oip->ip_src;
+ hv += src.in4.s_addr;
+ dst.in4 = oip->ip_dst;
+ hv += dst.in4.s_addr;
+ hv += dport;
+ hv += sport;
+ hv = DOUBLE_HASH(hv);
+
+ READ_ENTER(&ipf_state);
+ for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) {
+ isp = &is->is_hnext;
+ /*
+ * Only allow this icmp though if the
+ * encapsulated packet was allowed through the
+ * other way around. Note that the minimal amount
+ * of info present does not allow for checking against
+ * tcp internals such as seq and ack numbers. Only the
+ * ports are known to be present and can be even if the
+ * short flag is set.
+ */
+ if ((is->is_p == pr) && (is->is_v == 4) &&
+ (is = fr_matchsrcdst(&ofin, is, &src, &dst,
+ tcp, FI_ICMPCMP))) {
+ /*
+ * i : the index of this packet (the icmp unreachable)
+ * oi : the index of the original packet found in the
+ * icmp header (i.e. the packet causing this icmp)
+ * backward : original packet was backward compared to
+ * the state
+ */
+ backward = IP6_NEQ(&is->is_src, &src);
+ fin->fin_rev = !backward;
+ i = (!backward << 1) + fin->fin_out;
+ oi = (backward << 1) + ofin.fin_out;
+
+ if (((is->is_pass & FR_NOICMPERR) != 0) ||
+ (is->is_icmppkts[i] > is->is_pkts[oi]))
+ break;
+ ips_stats.iss_hits++;
+ is->is_icmppkts[i]++;
+ /*
+ * we deliberately do not touch the timeouts
+ * for the accompanying state table entry.
+ * It remains to be seen if that is correct. XXX
+ */
+ return is;
+ }
+ }
+ RWLOCK_EXIT(&ipf_state);
+ return NULL;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_ipsmove */
+/* Returns: Nil */
+/* Parameters: is(I) - pointer to state table entry */
+/* hv(I) - new hash value for state table entry */
+/* Write Locks: ipf_state */
+/* */
+/* Move a state entry from one position in the hash table to another. */
+/* ------------------------------------------------------------------------ */
+static void fr_ipsmove(is, hv)
+ipstate_t *is;
+u_int hv;
+{
+ ipstate_t **isp;
+ u_int hvm;
+
+ ASSERT(rw_read_locked(&ipf_state.ipf_lk) == 0);
+
+ hvm = is->is_hv;
+ /*
+ * Remove the hash from the old location...
+ */
+ isp = is->is_phnext;
+ if (is->is_hnext)
+ is->is_hnext->is_phnext = isp;
+ *isp = is->is_hnext;
+ if (ips_table[hvm] == NULL)
+ ips_stats.iss_inuse--;
+ ips_stats.iss_bucketlen[hvm]--;
+
+ /*
+ * ...and put the hash in the new one.
+ */
+ hvm = DOUBLE_HASH(hv);
+ is->is_hv = hvm;
+ isp = &ips_table[hvm];
+ if (*isp)
+ (*isp)->is_phnext = &is->is_hnext;
+ else
+ ips_stats.iss_inuse++;
+ ips_stats.iss_bucketlen[hvm]++;
+ is->is_phnext = isp;
+ is->is_hnext = *isp;
+ *isp = is;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_stlookup */
+/* Returns: ipstate_t* - NULL == no matching state found, */
+/* else pointer to state information is returned */
+/* Parameters: fin(I) - pointer to packet information */
+/* tcp(I) - pointer to TCP/UDP header. */
+/* */
+/* Search the state table for a matching entry to the packet described by */
+/* the contents of *fin. */
+/* */
+/* If we return NULL then no lock on ipf_state is held. */
+/* If we return non-null then a read-lock on ipf_state is held. */
+/* ------------------------------------------------------------------------ */
+ipstate_t *fr_stlookup(fin, tcp, ifqp)
+fr_info_t *fin;
+tcphdr_t *tcp;
+ipftq_t **ifqp;
+{
+ u_int hv, hvm, pr, v, tryagain;
+ ipstate_t *is, **isp;
+ u_short dport, sport;
+ i6addr_t src, dst;
+ struct icmp *ic;
+ ipftq_t *ifq;
+ int oow;
+
+ is = NULL;
+ ifq = NULL;
+ tcp = fin->fin_dp;
+ ic = (struct icmp *)tcp;
+ hv = (pr = fin->fin_fi.fi_p);
+ src = fin->fin_fi.fi_src;
+ dst = fin->fin_fi.fi_dst;
+ hv += src.in4.s_addr;
+ hv += dst.in4.s_addr;
+
+ v = fin->fin_fi.fi_v;
+#ifdef USE_INET6
+ if (v == 6) {
+ hv += fin->fin_fi.fi_src.i6[1];
+ hv += fin->fin_fi.fi_src.i6[2];
+ hv += fin->fin_fi.fi_src.i6[3];
+
+ if ((fin->fin_p == IPPROTO_ICMPV6) &&
+ IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_dst.in6)) {
+ hv -= dst.in4.s_addr;
+ } else {
+ hv += fin->fin_fi.fi_dst.i6[1];
+ hv += fin->fin_fi.fi_dst.i6[2];
+ hv += fin->fin_fi.fi_dst.i6[3];
+ }
+ }
+#endif
+
+ /*
+ * Search the hash table for matching packet header info.
+ */
+ switch (pr)
+ {
+#ifdef USE_INET6
+ case IPPROTO_ICMPV6 :
+ tryagain = 0;
+ if (v == 6) {
+ if ((ic->icmp_type == ICMP6_ECHO_REQUEST) ||
+ (ic->icmp_type == ICMP6_ECHO_REPLY)) {
+ hv += ic->icmp_id;
+ }
+ }
+ READ_ENTER(&ipf_state);
+icmp6again:
+ hvm = DOUBLE_HASH(hv);
+ for (isp = &ips_table[hvm]; ((is = *isp) != NULL); ) {
+ isp = &is->is_hnext;
+ if ((is->is_p != pr) || (is->is_v != v))
+ continue;
+ is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
+ if (is != NULL &&
+ fr_matchicmpqueryreply(v, &is->is_icmp,
+ ic, fin->fin_rev)) {
+ if (fin->fin_rev)
+ ifq = &ips_icmpacktq;
+ else
+ ifq = &ips_icmptq;
+ break;
+ }
+ }
+
+ if (is != NULL) {
+ if ((tryagain != 0) && !(is->is_flags & SI_W_DADDR)) {
+ hv += fin->fin_fi.fi_src.i6[0];
+ hv += fin->fin_fi.fi_src.i6[1];
+ hv += fin->fin_fi.fi_src.i6[2];
+ hv += fin->fin_fi.fi_src.i6[3];
+ fr_ipsmove(is, hv);
+ MUTEX_DOWNGRADE(&ipf_state);
+ }
+ break;
+ }
+ RWLOCK_EXIT(&ipf_state);
+
+ /*
+ * No matching icmp state entry. Perhaps this is a
+ * response to another state entry.
+ *
+ * XXX With some ICMP6 packets, the "other" address is already
+ * in the packet, after the ICMP6 header, and this could be
+ * used in place of the multicast address. However, taking
+ * advantage of this requires some significant code changes
+ * to handle the specific types where that is the case.
+ */
+ if ((ips_stats.iss_wild != 0) && (v == 6) && (tryagain == 0) &&
+ !IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_src.in6)) {
+ hv -= fin->fin_fi.fi_src.i6[0];
+ hv -= fin->fin_fi.fi_src.i6[1];
+ hv -= fin->fin_fi.fi_src.i6[2];
+ hv -= fin->fin_fi.fi_src.i6[3];
+ tryagain = 1;
+ WRITE_ENTER(&ipf_state);
+ goto icmp6again;
+ }
+
+ is = fr_checkicmp6matchingstate(fin);
+ if (is != NULL)
+ return is;
+ break;
+#endif
+
+ case IPPROTO_ICMP :
+ if (v == 4) {
+ hv += ic->icmp_id;
+ }
+ hv = DOUBLE_HASH(hv);
+ READ_ENTER(&ipf_state);
+ for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) {
+ isp = &is->is_hnext;
+ if ((is->is_p != pr) || (is->is_v != v))
+ continue;
+ is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
+ if (is != NULL &&
+ fr_matchicmpqueryreply(v, &is->is_icmp,
+ ic, fin->fin_rev)) {
+ if (fin->fin_rev)
+ ifq = &ips_icmpacktq;
+ else
+ ifq = &ips_icmptq;
+ break;
+ }
+ }
+ if (is == NULL) {
+ RWLOCK_EXIT(&ipf_state);
+ }
+ break;
+
+ case IPPROTO_TCP :
+ case IPPROTO_UDP :
+ ifqp = NULL;
+ sport = htons(fin->fin_data[0]);
+ hv += sport;
+ dport = htons(fin->fin_data[1]);
+ hv += dport;
+ oow = 0;
+ tryagain = 0;
+ READ_ENTER(&ipf_state);
+retry_tcpudp:
+ hvm = DOUBLE_HASH(hv);
+ for (isp = &ips_table[hvm]; ((is = *isp) != NULL); ) {
+ isp = &is->is_hnext;
+ if ((is->is_p != pr) || (is->is_v != v))
+ continue;
+ fin->fin_flx &= ~FI_OOW;
+ is = fr_matchsrcdst(fin, is, &src, &dst, tcp, FI_CMP);
+ if (is != NULL) {
+ if (pr == IPPROTO_TCP) {
+ if (!fr_tcpstate(fin, tcp, is)) {
+ oow |= fin->fin_flx & FI_OOW;
+ continue;
+ }
+ }
+ break;
+ }
+ }
+ if (is != NULL) {
+ if (tryagain &&
+ !(is->is_flags & (SI_CLONE|SI_WILDP|SI_WILDA))) {
+ hv += dport;
+ hv += sport;
+ fr_ipsmove(is, hv);
+ MUTEX_DOWNGRADE(&ipf_state);
+ }
+ break;
+ }
+ RWLOCK_EXIT(&ipf_state);
+
+ if (!tryagain && ips_stats.iss_wild) {
+ hv -= dport;
+ hv -= sport;
+ tryagain = 1;
+ WRITE_ENTER(&ipf_state);
+ goto retry_tcpudp;
+ }
+ fin->fin_flx |= oow;
+ break;
+
+#if 0
+ case IPPROTO_GRE :
+ gre = fin->fin_dp;
+ if (GRE_REV(gre->gr_flags) == 1) {
+ hv += gre->gr_call;
+ }
+ /* FALLTHROUGH */
+#endif
+ default :
+ ifqp = NULL;
+ hvm = DOUBLE_HASH(hv);
+ READ_ENTER(&ipf_state);
+ for (isp = &ips_table[hvm]; ((is = *isp) != NULL); ) {
+ isp = &is->is_hnext;
+ if ((is->is_p != pr) || (is->is_v != v))
+ continue;
+ is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
+ if (is != NULL) {
+ ifq = &ips_iptq;
+ break;
+ }
+ }
+ if (is == NULL) {
+ RWLOCK_EXIT(&ipf_state);
+ }
+ break;
+ }
+
+ if ((is != NULL) && ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) &&
+ (is->is_tqehead[fin->fin_rev] != NULL))
+ ifq = is->is_tqehead[fin->fin_rev];
+ if (ifq != NULL && ifqp != NULL)
+ *ifqp = ifq;
+ return is;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_updatestate */
+/* Returns: Nil */
+/* Parameters: fin(I) - pointer to packet information */
+/* is(I) - pointer to state table entry */
+/* Read Locks: ipf_state */
+/* */
+/* Updates packet and byte counters for a newly received packet. Seeds the */
+/* fragment cache with a new entry as required. */
+/* ------------------------------------------------------------------------ */
+void fr_updatestate(fin, is, ifq)
+fr_info_t *fin;
+ipstate_t *is;
+ipftq_t *ifq;
+{
+ ipftqent_t *tqe;
+ int i, pass;
+
+ i = (fin->fin_rev << 1) + fin->fin_out;
+
+ /*
+ * For TCP packets, ifq == NULL. For all others, check if this new
+ * queue is different to the last one it was on and move it if so.
+ */
+ tqe = &is->is_sti;
+ MUTEX_ENTER(&is->is_lock);
+ if ((tqe->tqe_flags & TQE_RULEBASED) != 0)
+ ifq = is->is_tqehead[fin->fin_rev];
+
+ if (ifq != NULL)
+ fr_movequeue(tqe, tqe->tqe_ifq, ifq);
+
+ is->is_pkts[i]++;
+ is->is_bytes[i] += fin->fin_plen;
+ MUTEX_EXIT(&is->is_lock);
+
+#ifdef IPFILTER_SYNC
+ if (is->is_flags & IS_STATESYNC)
+ ipfsync_update(SMC_STATE, fin, is->is_sync);
+#endif
+
+ ATOMIC_INCL(ips_stats.iss_hits);
+
+ fin->fin_fr = is->is_rule;
+
+ /*
+ * If this packet is a fragment and the rule says to track fragments,
+ * then create a new fragment cache entry.
+ */
+ pass = is->is_pass;
+ if ((fin->fin_flx & FI_FRAG) && FR_ISPASS(pass))
+ (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_checkstate */
+/* Returns: frentry_t* - NULL == search failed, */
+/* else pointer to rule for matching state */
+/* Parameters: ifp(I) - pointer to interface */
+/* passp(I) - pointer to filtering result flags */
+/* */
+/* Check if a packet is associated with an entry in the state table. */
+/* ------------------------------------------------------------------------ */
+frentry_t *fr_checkstate(fin, passp)
+fr_info_t *fin;
+u_32_t *passp;
+{
+ ipstate_t *is;
+ frentry_t *fr;
+ tcphdr_t *tcp;
+ ipftq_t *ifq;
+ u_int pass;
+
+ if (fr_state_lock || (ips_list == NULL) ||
+ (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD)))
+ return NULL;
+
+ is = NULL;
+ if ((fin->fin_flx & FI_TCPUDP) ||
+ (fin->fin_fi.fi_p == IPPROTO_ICMP)
+#ifdef USE_INET6
+ || (fin->fin_fi.fi_p == IPPROTO_ICMPV6)
+#endif
+ )
+ tcp = fin->fin_dp;
+ else
+ tcp = NULL;
+
+ /*
+ * Search the hash table for matching packet header info.
+ */
+ ifq = NULL;
+ is = fin->fin_state;
+ if (is == NULL)
+ is = fr_stlookup(fin, tcp, &ifq);
+ switch (fin->fin_p)
+ {
+#ifdef USE_INET6
+ case IPPROTO_ICMPV6 :
+ if (is != NULL)
+ break;
+ if (fin->fin_v == 6) {
+ is = fr_checkicmp6matchingstate(fin);
+ if (is != NULL)
+ goto matched;
+ }
+ break;
+#endif
+ case IPPROTO_ICMP :
+ if (is != NULL)
+ break;
+ /*
+ * No matching icmp state entry. Perhaps this is a
+ * response to another state entry.
+ */
+ is = fr_checkicmpmatchingstate(fin);
+ if (is != NULL)
+ goto matched;
+ break;
+ case IPPROTO_TCP :
+ if (is == NULL)
+ break;
+
+ if (is->is_pass & FR_NEWISN) {
+ if (fin->fin_out == 0)
+ fr_fixinisn(fin, is);
+ else if (fin->fin_out == 1)
+ fr_fixoutisn(fin, is);
+ }
+ break;
+ default :
+ if (fin->fin_rev)
+ ifq = &ips_udpacktq;
+ else
+ ifq = &ips_udptq;
+ break;
+ }
+ if (is == NULL) {
+ ATOMIC_INCL(ips_stats.iss_miss);
+ return NULL;
+ }
+
+matched:
+ fr = is->is_rule;
+ if (fr != NULL) {
+ if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) {
+ if (fin->fin_nattag == NULL)
+ return NULL;
+ if (fr_matchtag(&fr->fr_nattag, fin->fin_nattag) != 0)
+ return NULL;
+ }
+ (void) strncpy(fin->fin_group, fr->fr_group, FR_GROUPLEN);
+ fin->fin_icode = fr->fr_icode;
+ }
+
+ fin->fin_rule = is->is_rulen;
+ pass = is->is_pass;
+ fr_updatestate(fin, is, ifq);
+ if (fin->fin_out == 1)
+ fin->fin_nat = is->is_nat[fin->fin_rev];
+
+ fin->fin_state = is;
+ is->is_touched = fr_ticks;
+ MUTEX_ENTER(&is->is_lock);
+ is->is_ref++;
+ MUTEX_EXIT(&is->is_lock);
+ RWLOCK_EXIT(&ipf_state);
+ fin->fin_flx |= FI_STATE;
+ if ((pass & FR_LOGFIRST) != 0)
+ pass &= ~(FR_LOGFIRST|FR_LOG);
+ *passp = pass;
+ return fr;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_fixoutisn */
+/* Returns: Nil */
+/* Parameters: fin(I) - pointer to packet information */
+/* is(I) - pointer to master state structure */
+/* */
+/* Called only for outbound packets, adjusts the sequence number and the */
+/* TCP checksum to match that change. */
+/* ------------------------------------------------------------------------ */
+static void fr_fixoutisn(fin, is)
+fr_info_t *fin;
+ipstate_t *is;
+{
+ tcphdr_t *tcp;
+ int rev;
+ u_32_t seq;
+
+ tcp = fin->fin_dp;
+ rev = fin->fin_rev;
+ if ((is->is_flags & IS_ISNSYN) != 0) {
+ if (rev == 0) {
+ seq = ntohl(tcp->th_seq);
+ seq += is->is_isninc[0];
+ tcp->th_seq = htonl(seq);
+ fix_outcksum(fin, &tcp->th_sum, is->is_sumd[0]);
+ }
+ }
+ if ((is->is_flags & IS_ISNACK) != 0) {
+ if (rev == 1) {
+ seq = ntohl(tcp->th_seq);
+ seq += is->is_isninc[1];
+ tcp->th_seq = htonl(seq);
+ fix_outcksum(fin, &tcp->th_sum, is->is_sumd[1]);
+ }
+ }
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_fixinisn */
+/* Returns: Nil */
+/* Parameters: fin(I) - pointer to packet information */
+/* is(I) - pointer to master state structure */
+/* */
+/* Called only for inbound packets, adjusts the acknowledge number and the */
+/* TCP checksum to match that change. */
+/* ------------------------------------------------------------------------ */
+static void fr_fixinisn(fin, is)
+fr_info_t *fin;
+ipstate_t *is;
+{
+ tcphdr_t *tcp;
+ int rev;
+ u_32_t ack;
+
+ tcp = fin->fin_dp;
+ rev = fin->fin_rev;
+ if ((is->is_flags & IS_ISNSYN) != 0) {
+ if (rev == 1) {
+ ack = ntohl(tcp->th_ack);
+ ack -= is->is_isninc[0];
+ tcp->th_ack = htonl(ack);
+ fix_incksum(fin, &tcp->th_sum, is->is_sumd[0]);
+ }
+ }
+ if ((is->is_flags & IS_ISNACK) != 0) {
+ if (rev == 0) {
+ ack = ntohl(tcp->th_ack);
+ ack -= is->is_isninc[1];
+ tcp->th_ack = htonl(ack);
+ fix_incksum(fin, &tcp->th_sum, is->is_sumd[1]);
+ }
+ }
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_statesync */
+/* Returns: Nil */
+/* Parameters: ifp(I) - pointer to interface */
+/* */
+/* Walk through all state entries and if an interface pointer match is */
+/* found then look it up again, based on its name in case the pointer has */
+/* changed since last time. */
+/* */
+/* If ifp is passed in as being non-null then we are only doing updates for */
+/* existing, matching, uses of it. */
+/* ------------------------------------------------------------------------ */
+void fr_statesync(ifp)
+void *ifp;
+{
+ ipstate_t *is;
+ int i;
+
+ if (fr_running <= 0)
+ return;
+
+ WRITE_ENTER(&ipf_state);
+
+ if (fr_running <= 0) {
+ RWLOCK_EXIT(&ipf_state);
+ return;
+ }
+
+ for (is = ips_list; is; is = is->is_next) {
+ /*
+ * Look up all the interface names in the state entry.
+ */
+ for (i = 0; i < 4; i++) {
+ if (ifp == NULL || ifp == is->is_ifp[i])
+ is->is_ifp[i] = fr_resolvenic(is->is_ifname[i],
+ is->is_v);
+ }
+ }
+ RWLOCK_EXIT(&ipf_state);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_delstate */
+/* Returns: Nil */
+/* Parameters: is(I) - pointer to state structure to delete */
+/* why(I) - if not 0, log reason why it was deleted */
+/* Write Locks: ipf_state */
+/* */
+/* Deletes a state entry from the enumerated list as well as the hash table */
+/* and timeout queue lists. Make adjustments to hash table statistics and */
+/* global counters as required. */
+/* ------------------------------------------------------------------------ */
+static void fr_delstate(is, why)
+ipstate_t *is;
+int why;
+{
+
+ ASSERT(rw_read_locked(&ipf_state.ipf_lk) == 0);
+
+ /*
+ * Since we want to delete this, remove it from the state table,
+ * where it can be found & used, first.
+ */
+ if (is->is_pnext != NULL) {
+ *is->is_pnext = is->is_next;
+
+ if (is->is_next != NULL)
+ is->is_next->is_pnext = is->is_pnext;
+
+ is->is_pnext = NULL;
+ is->is_next = NULL;
+ }
+
+ if (is->is_phnext != NULL) {
+ *is->is_phnext = is->is_hnext;
+ if (is->is_hnext != NULL)
+ is->is_hnext->is_phnext = is->is_phnext;
+ if (ips_table[is->is_hv] == NULL)
+ ips_stats.iss_inuse--;
+ ips_stats.iss_bucketlen[is->is_hv]--;
+
+ is->is_phnext = NULL;
+ is->is_hnext = NULL;
+ }
+
+ /*
+ * Because ips_stats.iss_wild is a count of entries in the state
+ * table that have wildcard flags set, only decerement it once
+ * and do it here.
+ */
+ if (is->is_flags & (SI_WILDP|SI_WILDA)) {
+ if (!(is->is_flags & SI_CLONED)) {
+ ATOMIC_DECL(ips_stats.iss_wild);
+ }
+ is->is_flags &= ~(SI_WILDP|SI_WILDA);
+ }
+
+ /*
+ * Next, remove it from the timeout queue it is in.
+ */
+ fr_deletequeueentry(&is->is_sti);
+
+ is->is_me = NULL;
+
+ /*
+ * If it is still in use by something else, do not go any further,
+ * but note that at this point it is now an orphan.
+ */
+ is->is_ref--;
+ if (is->is_ref > 0)
+ return;
+
+ if (is->is_tqehead[0] != NULL) {
+ if (fr_deletetimeoutqueue(is->is_tqehead[0]) == 0)
+ fr_freetimeoutqueue(is->is_tqehead[0]);
+ }
+ if (is->is_tqehead[1] != NULL) {
+ if (fr_deletetimeoutqueue(is->is_tqehead[1]) == 0)
+ fr_freetimeoutqueue(is->is_tqehead[1]);
+ }
+
+#ifdef IPFILTER_SYNC
+ if (is->is_sync)
+ ipfsync_del(is->is_sync);
+#endif
+#ifdef IPFILTER_SCAN
+ (void) ipsc_detachis(is);
+#endif
+
+ if (ipstate_logging != 0 && why != 0)
+ ipstate_log(is, why);
+
+ if (is->is_rule != NULL) {
+ is->is_rule->fr_statecnt--;
+ (void)fr_derefrule(&is->is_rule);
+ }
+
+ MUTEX_DESTROY(&is->is_lock);
+ KFREE(is);
+ ips_num--;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_timeoutstate */
+/* Returns: Nil */
+/* Parameters: Nil */
+/* */
+/* Slowly expire held state for thingslike UDP and ICMP. The algorithm */
+/* used here is to keep the queue sorted with the oldest things at the top */
+/* and the youngest at the bottom. So if the top one doesn't need to be */
+/* expired then neither will any under it. */
+/* ------------------------------------------------------------------------ */
+void fr_timeoutstate()
+{
+ ipftq_t *ifq, *ifqnext;
+ ipftqent_t *tqe, *tqn;
+ ipstate_t *is;
+ SPL_INT(s);
+
+ SPL_NET(s);
+ WRITE_ENTER(&ipf_state);
+ for (ifq = ips_tqtqb; ifq != NULL; ifq = ifq->ifq_next)
+ for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
+ if (tqe->tqe_die > fr_ticks)
+ break;
+ tqn = tqe->tqe_next;
+ is = tqe->tqe_parent;
+ fr_delstate(is, ISL_EXPIRE);
+ }
+
+ for (ifq = ips_utqe; ifq != NULL; ifq = ifqnext) {
+ ifqnext = ifq->ifq_next;
+
+ for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
+ if (tqe->tqe_die > fr_ticks)
+ break;
+ tqn = tqe->tqe_next;
+ is = tqe->tqe_parent;
+ fr_delstate(is, ISL_EXPIRE);
+ }
+ }
+
+ for (ifq = ips_utqe; ifq != NULL; ifq = ifqnext) {
+ ifqnext = ifq->ifq_next;
+
+ if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
+ (ifq->ifq_ref == 0)) {
+ fr_freetimeoutqueue(ifq);
+ }
+ }
+
+ if (fr_state_doflush) {
+ (void) fr_state_flush(2, 0);
+ fr_state_doflush = 0;
+ }
+
+ RWLOCK_EXIT(&ipf_state);
+ SPL_X(s);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_state_flush */
+/* Returns: int - 0 == success, -1 == failure */
+/* Parameters: Nil */
+/* Write Locks: ipf_state */
+/* */
+/* Flush state tables. Three actions currently defined: */
+/* which == 0 : flush all state table entries */
+/* which == 1 : flush TCP connections which have started to close but are */
+/* stuck for some reason. */
+/* which == 2 : flush TCP connections which have been idle for a long time, */
+/* starting at > 4 days idle and working back in successive half-*/
+/* days to at most 12 hours old. If this fails to free enough */
+/* slots then work backwards in half hour slots to 30 minutes. */
+/* If that too fails, then work backwards in 30 second intervals */
+/* for the last 30 minutes to at worst 30 seconds idle. */
+/* ------------------------------------------------------------------------ */
+static int fr_state_flush(which, proto)
+int which, proto;
+{
+ ipftq_t *ifq, *ifqnext;
+ ipftqent_t *tqe, *tqn;
+ ipstate_t *is, **isp;
+ int delete, removed;
+ long try, maxtick;
+ u_long interval;
+ SPL_INT(s);
+
+ removed = 0;
+
+ SPL_NET(s);
+ for (isp = &ips_list; ((is = *isp) != NULL); ) {
+ delete = 0;
+
+ if ((proto != 0) && (is->is_v != proto)) {
+ isp = &is->is_next;
+ continue;
+ }
+
+ switch (which)
+ {
+ case 0 :
+ delete = 1;
+ break;
+ case 1 :
+ case 2 :
+ if (is->is_p != IPPROTO_TCP)
+ break;
+ if ((is->is_state[0] != IPF_TCPS_ESTABLISHED) ||
+ (is->is_state[1] != IPF_TCPS_ESTABLISHED))
+ delete = 1;
+ break;
+ }
+
+ if (delete) {
+ if (is->is_p == IPPROTO_TCP)
+ ips_stats.iss_fin++;
+ else
+ ips_stats.iss_expire++;
+ fr_delstate(is, ISL_FLUSH);
+ removed++;
+ } else
+ isp = &is->is_next;
+ }
+
+ if (which != 2) {
+ SPL_X(s);
+ return removed;
+ }
+
+ /*
+ * Asked to remove inactive entries because the table is full, try
+ * again, 3 times, if first attempt failed with a different criteria
+ * each time. The order tried in must be in decreasing age.
+ * Another alternative is to implement random drop and drop N entries
+ * at random until N have been freed up.
+ */
+ if (fr_ticks - ips_last_force_flush < IPF_TTLVAL(5))
+ goto force_flush_skipped;
+ ips_last_force_flush = fr_ticks;
+
+ if (fr_ticks > IPF_TTLVAL(43200))
+ interval = IPF_TTLVAL(43200);
+ else if (fr_ticks > IPF_TTLVAL(1800))
+ interval = IPF_TTLVAL(1800);
+ else if (fr_ticks > IPF_TTLVAL(30))
+ interval = IPF_TTLVAL(30);
+ else
+ interval = IPF_TTLVAL(10);
+ try = fr_ticks - (fr_ticks - interval);
+ if (try < 0)
+ goto force_flush_skipped;
+
+ while (removed == 0) {
+ maxtick = fr_ticks - interval;
+ if (maxtick < 0)
+ break;
+
+ while (try < maxtick) {
+ for (ifq = ips_tqtqb; ifq != NULL;
+ ifq = ifq->ifq_next) {
+ for (tqn = ifq->ifq_head;
+ ((tqe = tqn) != NULL); ) {
+ if (tqe->tqe_die > try)
+ break;
+ tqn = tqe->tqe_next;
+ is = tqe->tqe_parent;
+ fr_delstate(is, ISL_EXPIRE);
+ removed++;
+ }
+ }
+
+ for (ifq = ips_utqe; ifq != NULL; ifq = ifqnext) {
+ ifqnext = ifq->ifq_next;
+
+ for (tqn = ifq->ifq_head;
+ ((tqe = tqn) != NULL); ) {
+ if (tqe->tqe_die > try)
+ break;
+ tqn = tqe->tqe_next;
+ is = tqe->tqe_parent;
+ fr_delstate(is, ISL_EXPIRE);
+ removed++;
+ }
+ }
+ if (try + interval > maxtick)
+ break;
+ try += interval;
+ }
+
+ if (removed == 0) {
+ if (interval == IPF_TTLVAL(43200)) {
+ interval = IPF_TTLVAL(1800);
+ } else if (interval == IPF_TTLVAL(1800)) {
+ interval = IPF_TTLVAL(30);
+ } else if (interval == IPF_TTLVAL(30)) {
+ interval = IPF_TTLVAL(10);
+ } else {
+ break;
+ }
+ }
+ }
+force_flush_skipped:
+ SPL_X(s);
+ return removed;
+}
+
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_tcp_age */
+/* Returns: int - 1 == state transition made, 0 == no change (rejected) */
+/* Parameters: tq(I) - pointer to timeout queue information */
+/* fin(I) - pointer to packet information */
+/* tqtab(I) - TCP timeout queue table this is in */
+/* flags(I) - flags from state/NAT entry */
+/* */
+/* Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29: */
+/* */
+/* - (try to) base state transitions on real evidence only, */
+/* i.e. packets that are sent and have been received by ipfilter; */
+/* diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used. */
+/* */
+/* - deal with half-closed connections correctly; */
+/* */
+/* - store the state of the source in state[0] such that ipfstat */
+/* displays the state as source/dest instead of dest/source; the calls */
+/* to fr_tcp_age have been changed accordingly. */
+/* */
+/* Internal Parameters: */
+/* */
+/* state[0] = state of source (host that initiated connection) */
+/* state[1] = state of dest (host that accepted the connection) */
+/* */
+/* dir == 0 : a packet from source to dest */
+/* dir == 1 : a packet from dest to source */
+/* */
+/* Locking: it is assumed that the parent of the tqe structure is locked. */
+/* ------------------------------------------------------------------------ */
+int fr_tcp_age(tqe, fin, tqtab, flags)
+ipftqent_t *tqe;
+fr_info_t *fin;
+ipftq_t *tqtab;
+int flags;
+{
+ int dlen, ostate, nstate, rval, dir;
+ u_char tcpflags;
+ tcphdr_t *tcp;
+
+ tcp = fin->fin_dp;
+
+ rval = 0;
+ dir = fin->fin_rev;
+ tcpflags = tcp->th_flags;
+ dlen = fin->fin_dlen - (TCP_OFF(tcp) << 2);
+
+ if (tcpflags & TH_RST) {
+ if (!(tcpflags & TH_PUSH) && !dlen)
+ nstate = IPF_TCPS_CLOSED;
+ else
+ nstate = IPF_TCPS_CLOSE_WAIT;
+ rval = 1;
+ } else {
+ ostate = tqe->tqe_state[1 - dir];
+ nstate = tqe->tqe_state[dir];
+
+ switch (nstate)
+ {
+ case IPF_TCPS_CLOSED: /* 0 */
+ if ((tcpflags & TH_OPENING) == TH_OPENING) {
+ /*
+ * 'dir' received an S and sends SA in
+ * response, CLOSED -> SYN_RECEIVED
+ */
+ nstate = IPF_TCPS_SYN_RECEIVED;
+ rval = 1;
+ } else if ((tcpflags & TH_OPENING) == TH_SYN) {
+ /* 'dir' sent S, CLOSED -> SYN_SENT */
+ nstate = IPF_TCPS_SYN_SENT;
+ rval = 1;
+ }
+ /*
+ * the next piece of code makes it possible to get
+ * already established connections into the state table
+ * after a restart or reload of the filter rules; this
+ * does not work when a strict 'flags S keep state' is
+ * used for tcp connections of course
+ */
+ if (((flags & IS_TCPFSM) == 0) &&
+ ((tcpflags & TH_ACKMASK) == TH_ACK)) {
+ /*
+ * we saw an A, guess 'dir' is in ESTABLISHED
+ * mode
+ */
+ switch (ostate)
+ {
+ case IPF_TCPS_CLOSED :
+ case IPF_TCPS_SYN_RECEIVED :
+ nstate = IPF_TCPS_HALF_ESTAB;
+ rval = 1;
+ break;
+ case IPF_TCPS_HALF_ESTAB :
+ case IPF_TCPS_ESTABLISHED :
+ nstate = IPF_TCPS_ESTABLISHED;
+ rval = 1;
+ break;
+ default :
+ break;
+ }
+ }
+ /*
+ * TODO: besides regular ACK packets we can have other
+ * packets as well; it is yet to be determined how we
+ * should initialize the states in those cases
+ */
+ break;
+
+ case IPF_TCPS_LISTEN: /* 1 */
+ /* NOT USED */
+ break;
+
+ case IPF_TCPS_SYN_SENT: /* 2 */
+ if ((tcpflags & ~(TH_ECN|TH_CWR)) == TH_SYN) {
+ /*
+ * A retransmitted SYN packet. We do not reset
+ * the timeout here to fr_tcptimeout because a
+ * connection connect timeout does not renew
+ * after every packet that is sent. We need to
+ * set rval so as to indicate the packet has
+ * passed the check for its flags being valid
+ * in the TCP FSM. Setting rval to 2 has the
+ * result of not resetting the timeout.
+ */
+ rval = 2;
+ } else if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) ==
+ TH_ACK) {
+ /*
+ * we see an A from 'dir' which is in SYN_SENT
+ * state: 'dir' sent an A in response to an SA
+ * which it received, SYN_SENT -> ESTABLISHED
+ */
+ nstate = IPF_TCPS_ESTABLISHED;
+ rval = 1;
+ } else if (tcpflags & TH_FIN) {
+ /*
+ * we see an F from 'dir' which is in SYN_SENT
+ * state and wants to close its side of the
+ * connection; SYN_SENT -> FIN_WAIT_1
+ */
+ nstate = IPF_TCPS_FIN_WAIT_1;
+ rval = 1;
+ } else if ((tcpflags & TH_OPENING) == TH_OPENING) {
+ /*
+ * we see an SA from 'dir' which is already in
+ * SYN_SENT state, this means we have a
+ * simultaneous open; SYN_SENT -> SYN_RECEIVED
+ */
+ nstate = IPF_TCPS_SYN_RECEIVED;
+ rval = 1;
+ }
+ break;
+
+ case IPF_TCPS_SYN_RECEIVED: /* 3 */
+ if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) {
+ /*
+ * we see an A from 'dir' which was in
+ * SYN_RECEIVED state so it must now be in
+ * established state, SYN_RECEIVED ->
+ * ESTABLISHED
+ */
+ nstate = IPF_TCPS_ESTABLISHED;
+ rval = 1;
+ } else if ((tcpflags & ~(TH_ECN|TH_CWR)) ==
+ TH_OPENING) {
+ /*
+ * We see an SA from 'dir' which is already in
+ * SYN_RECEIVED state.
+ */
+ rval = 2;
+ } else if (tcpflags & TH_FIN) {
+ /*
+ * we see an F from 'dir' which is in
+ * SYN_RECEIVED state and wants to close its
+ * side of the connection; SYN_RECEIVED ->
+ * FIN_WAIT_1
+ */
+ nstate = IPF_TCPS_FIN_WAIT_1;
+ rval = 1;
+ }
+ break;
+
+ case IPF_TCPS_HALF_ESTAB: /* 4 */
+ if (ostate >= IPF_TCPS_HALF_ESTAB) {
+ if ((tcpflags & TH_ACKMASK) == TH_ACK) {
+ nstate = IPF_TCPS_ESTABLISHED;
+ rval = 1;
+ }
+ }
+
+ break;
+
+ case IPF_TCPS_ESTABLISHED: /* 5 */
+ rval = 1;
+ if (tcpflags & TH_FIN) {
+ /*
+ * 'dir' closed its side of the connection;
+ * this gives us a half-closed connection;
+ * ESTABLISHED -> FIN_WAIT_1
+ */
+ nstate = IPF_TCPS_FIN_WAIT_1;
+ } else if (tcpflags & TH_ACK) {
+ /*
+ * an ACK, should we exclude other flags here?
+ */
+ if (ostate == IPF_TCPS_FIN_WAIT_1) {
+ /*
+ * We know the other side did an active
+ * close, so we are ACKing the recvd
+ * FIN packet (does the window matching
+ * code guarantee this?) and go into
+ * CLOSE_WAIT state; this gives us a
+ * half-closed connection
+ */
+ nstate = IPF_TCPS_CLOSE_WAIT;
+ } else if (ostate < IPF_TCPS_CLOSE_WAIT) {
+ /*
+ * still a fully established
+ * connection reset timeout
+ */
+ nstate = IPF_TCPS_ESTABLISHED;
+ }
+ }
+ break;
+
+ case IPF_TCPS_CLOSE_WAIT: /* 6 */
+ rval = 1;
+ if (tcpflags & TH_FIN) {
+ /*
+ * application closed and 'dir' sent a FIN,
+ * we're now going into LAST_ACK state
+ */
+ nstate = IPF_TCPS_LAST_ACK;
+ } else {
+ /*
+ * we remain in CLOSE_WAIT because the other
+ * side has closed already and we did not
+ * close our side yet; reset timeout
+ */
+ nstate = IPF_TCPS_CLOSE_WAIT;
+ }
+ break;
+
+ case IPF_TCPS_FIN_WAIT_1: /* 7 */
+ rval = 1;
+ if ((tcpflags & TH_ACK) &&
+ ostate > IPF_TCPS_CLOSE_WAIT) {
+ /*
+ * if the other side is not active anymore
+ * it has sent us a FIN packet that we are
+ * ack'ing now with an ACK; this means both
+ * sides have now closed the connection and
+ * we go into TIME_WAIT
+ */
+ /*
+ * XXX: how do we know we really are ACKing
+ * the FIN packet here? does the window code
+ * guarantee that?
+ */
+ nstate = IPF_TCPS_TIME_WAIT;
+ } else {
+ /*
+ * we closed our side of the connection
+ * already but the other side is still active
+ * (ESTABLISHED/CLOSE_WAIT); continue with
+ * this half-closed connection
+ */
+ nstate = IPF_TCPS_FIN_WAIT_1;
+ }
+ break;
+
+ case IPF_TCPS_CLOSING: /* 8 */
+ /* NOT USED */
+ break;
+
+ case IPF_TCPS_LAST_ACK: /* 9 */
+ if (tcpflags & TH_ACK) {
+ if ((tcpflags & TH_PUSH) || dlen)
+ /*
+ * there is still data to be delivered,
+ * reset timeout
+ */
+ rval = 1;
+ else
+ rval = 2;
+ }
+ /*
+ * we cannot detect when we go out of LAST_ACK state to
+ * CLOSED because that is based on the reception of ACK
+ * packets; ipfilter can only detect that a packet
+ * has been sent by a host
+ */
+ break;
+
+ case IPF_TCPS_FIN_WAIT_2: /* 10 */
+ rval = 1;
+ if ((tcpflags & TH_OPENING) == TH_OPENING)
+ nstate = IPF_TCPS_SYN_RECEIVED;
+ else if (tcpflags & TH_SYN)
+ nstate = IPF_TCPS_SYN_SENT;
+ break;
+
+ case IPF_TCPS_TIME_WAIT: /* 11 */
+ /* we're in 2MSL timeout now */
+ rval = 1;
+ break;
+
+ default :
+#if defined(_KERNEL)
+# if SOLARIS
+ cmn_err(CE_NOTE,
+ "tcp %lx flags %x si %lx nstate %d ostate %d\n",
+ (u_long)tcp, tcpflags, (u_long)tqe,
+ nstate, ostate);
+# else
+ printf("tcp %lx flags %x si %lx nstate %d ostate %d\n",
+ (u_long)tcp, tcpflags, (u_long)tqe,
+ nstate, ostate);
+# endif
+#else
+ abort();
+#endif
+ break;
+ }
+ }
+
+ /*
+ * If rval == 2 then do not update the queue position, but treat the
+ * packet as being ok.
+ */
+ if (rval == 2)
+ rval = 1;
+ else if (rval == 1) {
+ tqe->tqe_state[dir] = nstate;
+ if ((tqe->tqe_flags & TQE_RULEBASED) == 0)
+ fr_movequeue(tqe, tqe->tqe_ifq, tqtab + nstate);
+ }
+
+ return rval;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: ipstate_log */
+/* Returns: Nil */
+/* Parameters: is(I) - pointer to state structure */
+/* type(I) - type of log entry to create */
+/* */
+/* Creates a state table log entry using the state structure and type info. */
+/* passed in. Log packet/byte counts, source/destination address and other */
+/* protocol specific information. */
+/* ------------------------------------------------------------------------ */
+void ipstate_log(is, type)
+struct ipstate *is;
+u_int type;
+{
+#ifdef IPFILTER_LOG
+ struct ipslog ipsl;
+ size_t sizes[1];
+ void *items[1];
+ int types[1];
+
+ /*
+ * Copy information out of the ipstate_t structure and into the
+ * structure used for logging.
+ */
+ ipsl.isl_type = type;
+ ipsl.isl_pkts[0] = is->is_pkts[0] + is->is_icmppkts[0];
+ ipsl.isl_bytes[0] = is->is_bytes[0];
+ ipsl.isl_pkts[1] = is->is_pkts[1] + is->is_icmppkts[1];
+ ipsl.isl_bytes[1] = is->is_bytes[1];
+ ipsl.isl_pkts[2] = is->is_pkts[2] + is->is_icmppkts[2];
+ ipsl.isl_bytes[2] = is->is_bytes[2];
+ ipsl.isl_pkts[3] = is->is_pkts[3] + is->is_icmppkts[3];
+ ipsl.isl_bytes[3] = is->is_bytes[3];
+ ipsl.isl_src = is->is_src;
+ ipsl.isl_dst = is->is_dst;
+ ipsl.isl_p = is->is_p;
+ ipsl.isl_v = is->is_v;
+ ipsl.isl_flags = is->is_flags;
+ ipsl.isl_tag = is->is_tag;
+ ipsl.isl_rulen = is->is_rulen;
+ (void) strncpy(ipsl.isl_group, is->is_group, FR_GROUPLEN);
+
+ if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) {
+ ipsl.isl_sport = is->is_sport;
+ ipsl.isl_dport = is->is_dport;
+ if (ipsl.isl_p == IPPROTO_TCP) {
+ ipsl.isl_state[0] = is->is_state[0];
+ ipsl.isl_state[1] = is->is_state[1];
+ }
+ } else if (ipsl.isl_p == IPPROTO_ICMP) {
+ ipsl.isl_itype = is->is_icmp.ici_type;
+ } else if (ipsl.isl_p == IPPROTO_ICMPV6) {
+ ipsl.isl_itype = is->is_icmp.ici_type;
+ } else {
+ ipsl.isl_ps.isl_filler[0] = 0;
+ ipsl.isl_ps.isl_filler[1] = 0;
+ }
+
+ items[0] = &ipsl;
+ sizes[0] = sizeof(ipsl);
+ types[0] = 0;
+
+ if (ipllog(IPL_LOGSTATE, NULL, items, sizes, types, 1)) {
+ ATOMIC_INCL(ips_stats.iss_logged);
+ } else {
+ ATOMIC_INCL(ips_stats.iss_logfail);
+ }
+#endif
+}
+
+
+#ifdef USE_INET6
+/* ------------------------------------------------------------------------ */
+/* Function: fr_checkicmp6matchingstate */
+/* Returns: ipstate_t* - NULL == no match found, */
+/* else pointer to matching state entry */
+/* Parameters: fin(I) - pointer to packet information */
+/* Locks: NULL == no locks, else Read Lock on ipf_state */
+/* */
+/* If we've got an ICMPv6 error message, using the information stored in */
+/* the ICMPv6 packet, look for a matching state table entry. */
+/* ------------------------------------------------------------------------ */
+static ipstate_t *fr_checkicmp6matchingstate(fin)
+fr_info_t *fin;
+{
+ struct icmp6_hdr *ic6, *oic;
+ int backward, i;
+ ipstate_t *is, **isp;
+ u_short sport, dport;
+ i6addr_t dst, src;
+ u_short savelen;
+ icmpinfo_t *ic;
+ fr_info_t ofin;
+ tcphdr_t *tcp;
+ ip6_t *oip6;
+ u_char pr;
+ u_int hv;
+
+ /*
+ * Does it at least have the return (basic) IP header ?
+ * Is it an actual recognised ICMP error type?
+ * Only a basic IP header (no options) should be with
+ * an ICMP error header.
+ */
+ if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN) ||
+ !(fin->fin_flx & FI_ICMPERR))
+ return NULL;
+
+ ic6 = fin->fin_dp;
+
+ oip6 = (ip6_t *)((char *)ic6 + ICMPERR_ICMPHLEN);
+ if (fin->fin_plen < sizeof(*oip6))
+ return NULL;
+
+ bcopy((char *)fin, (char *)&ofin, sizeof(fin));
+ ofin.fin_v = 6;
+ ofin.fin_ifp = fin->fin_ifp;
+ ofin.fin_out = !fin->fin_out;
+ ofin.fin_m = NULL; /* if dereferenced, panic XXX */
+ ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
+
+ /*
+ * We make a fin entry to be able to feed it to
+ * matchsrcdst. Note that not all fields are necessary
+ * but this is the cleanest way. Note further we fill
+ * in fin_mp such that if someone uses it we'll get
+ * a kernel panic. fr_matchsrcdst does not use this.
+ *
+ * watch out here, as ip is in host order and oip6 in network
+ * order. Any change we make must be undone afterwards.
+ */
+ savelen = oip6->ip6_plen;
+ oip6->ip6_plen = fin->fin_dlen - ICMPERR_ICMPHLEN;
+ ofin.fin_flx = FI_NOCKSUM;
+ ofin.fin_ip = (ip_t *)oip6;
+ ofin.fin_plen = oip6->ip6_plen;
+ (void) fr_makefrip(sizeof(*oip6), (ip_t *)oip6, &ofin);
+ ofin.fin_flx &= ~(FI_BAD|FI_SHORT);
+ oip6->ip6_plen = savelen;
+
+ if (oip6->ip6_nxt == IPPROTO_ICMPV6) {
+ oic = (struct icmp6_hdr *)(oip6 + 1);
+ /*
+ * an ICMP error can only be generated as a result of an
+ * ICMP query, not as the response on an ICMP error
+ *
+ * XXX theoretically ICMP_ECHOREP and the other reply's are
+ * ICMP query's as well, but adding them here seems strange XXX
+ */
+ if (!(oic->icmp6_type & ICMP6_INFOMSG_MASK))
+ return NULL;
+
+ /*
+ * perform a lookup of the ICMP packet in the state table
+ */
+ hv = (pr = oip6->ip6_nxt);
+ src.in6 = oip6->ip6_src;
+ hv += src.in4.s_addr;
+ dst.in6 = oip6->ip6_dst;
+ hv += dst.in4.s_addr;
+ hv += oic->icmp6_id;
+ hv += oic->icmp6_seq;
+ hv = DOUBLE_HASH(hv);
+
+ READ_ENTER(&ipf_state);
+ for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) {
+ ic = &is->is_icmp;
+ isp = &is->is_hnext;
+ if ((is->is_p == pr) &&
+ !(is->is_pass & FR_NOICMPERR) &&
+ (oic->icmp6_id == ic->ici_id) &&
+ (oic->icmp6_seq == ic->ici_seq) &&
+ (is = fr_matchsrcdst(&ofin, is, &src,
+ &dst, NULL, FI_ICMPCMP))) {
+ /*
+ * in the state table ICMP query's are stored
+ * with the type of the corresponding ICMP
+ * response. Correct here
+ */
+ if (((ic->ici_type == ICMP6_ECHO_REPLY) &&
+ (oic->icmp6_type == ICMP6_ECHO_REQUEST)) ||
+ (ic->ici_type - 1 == oic->icmp6_type )) {
+ ips_stats.iss_hits++;
+ backward = IP6_NEQ(&is->is_dst, &src);
+ fin->fin_rev = !backward;
+ i = (backward << 1) + fin->fin_out;
+ is->is_icmppkts[i]++;
+ return is;
+ }
+ }
+ }
+ RWLOCK_EXIT(&ipf_state);
+ return NULL;
+ }
+
+ hv = (pr = oip6->ip6_nxt);
+ src.in6 = oip6->ip6_src;
+ hv += src.i6[0];
+ hv += src.i6[1];
+ hv += src.i6[2];
+ hv += src.i6[3];
+ dst.in6 = oip6->ip6_dst;
+ hv += dst.i6[0];
+ hv += dst.i6[1];
+ hv += dst.i6[2];
+ hv += dst.i6[3];
+
+ if ((oip6->ip6_nxt == IPPROTO_TCP) || (oip6->ip6_nxt == IPPROTO_UDP)) {
+ tcp = (tcphdr_t *)(oip6 + 1);
+ dport = tcp->th_dport;
+ sport = tcp->th_sport;
+ hv += dport;
+ hv += sport;
+ } else
+ tcp = NULL;
+ hv = DOUBLE_HASH(hv);
+
+ READ_ENTER(&ipf_state);
+ for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) {
+ isp = &is->is_hnext;
+ /*
+ * Only allow this icmp though if the
+ * encapsulated packet was allowed through the
+ * other way around. Note that the minimal amount
+ * of info present does not allow for checking against
+ * tcp internals such as seq and ack numbers.
+ */
+ if ((is->is_p != pr) || (is->is_v != 6) ||
+ (is->is_pass & FR_NOICMPERR))
+ continue;
+ is = fr_matchsrcdst(&ofin, is, &src, &dst, tcp, FI_ICMPCMP);
+ if (is != NULL) {
+ ips_stats.iss_hits++;
+ backward = IP6_NEQ(&is->is_dst, &src);
+ fin->fin_rev = !backward;
+ i = (backward << 1) + fin->fin_out;
+ is->is_icmppkts[i]++;
+ /*
+ * we deliberately do not touch the timeouts
+ * for the accompanying state table entry.
+ * It remains to be seen if that is correct. XXX
+ */
+ return is;
+ }
+ }
+ RWLOCK_EXIT(&ipf_state);
+ return NULL;
+}
+#endif
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_sttab_init */
+/* Returns: Nil */
+/* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */
+/* */
+/* Initialise the array of timeout queues for TCP. */
+/* ------------------------------------------------------------------------ */
+void fr_sttab_init(tqp)
+ipftq_t *tqp;
+{
+ int i;
+
+ for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) {
+ tqp[i].ifq_ttl = 0;
+ tqp[i].ifq_ref = 1;
+ tqp[i].ifq_head = NULL;
+ tqp[i].ifq_tail = &tqp[i].ifq_head;
+ tqp[i].ifq_next = tqp + i + 1;
+ MUTEX_INIT(&tqp[i].ifq_lock, "ipftq tcp tab");
+ }
+ tqp[IPF_TCP_NSTATES - 1].ifq_next = NULL;
+ tqp[IPF_TCPS_CLOSED].ifq_ttl = fr_tcpclosed;
+ tqp[IPF_TCPS_LISTEN].ifq_ttl = fr_tcptimeout;
+ tqp[IPF_TCPS_SYN_SENT].ifq_ttl = fr_tcptimeout;
+ tqp[IPF_TCPS_SYN_RECEIVED].ifq_ttl = fr_tcptimeout;
+ tqp[IPF_TCPS_ESTABLISHED].ifq_ttl = fr_tcpidletimeout;
+ tqp[IPF_TCPS_CLOSE_WAIT].ifq_ttl = fr_tcphalfclosed;
+ tqp[IPF_TCPS_FIN_WAIT_1].ifq_ttl = fr_tcphalfclosed;
+ tqp[IPF_TCPS_CLOSING].ifq_ttl = fr_tcptimeout;
+ tqp[IPF_TCPS_LAST_ACK].ifq_ttl = fr_tcplastack;
+ tqp[IPF_TCPS_FIN_WAIT_2].ifq_ttl = fr_tcpclosewait;
+ tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = fr_tcptimeout;
+ tqp[IPF_TCPS_HALF_ESTAB].ifq_ttl = fr_tcptimeout;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_sttab_destroy */
+/* Returns: Nil */
+/* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */
+/* */
+/* Do whatever is necessary to "destroy" each of the entries in the array */
+/* of timeout queues for TCP. */
+/* ------------------------------------------------------------------------ */
+void fr_sttab_destroy(tqp)
+ipftq_t *tqp;
+{
+ int i;
+
+ for (i = IPF_TCP_NSTATES - 1; i >= 0; i--)
+ MUTEX_DESTROY(&tqp[i].ifq_lock);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_statederef */
+/* Returns: Nil */
+/* Parameters: isp(I) - pointer to pointer to state table entry */
+/* */
+/* Decrement the reference counter for this state table entry and free it */
+/* if there are no more things using it. */
+/* */
+/* When operating in userland (ipftest), we have no timers to clear a state */
+/* entry. Therefore, we make a few simple tests before deleting an entry */
+/* outright. We compare states on each side looking for a combination of */
+/* TIME_WAIT (should really be FIN_WAIT_2?) and LAST_ACK. Then we factor */
+/* in packet direction with the interface list to make sure we don't */
+/* prematurely delete an entry on a final inbound packet that's we're also */
+/* supposed to route elsewhere. */
+/* */
+/* Internal parameters: */
+/* state[0] = state of source (host that initiated connection) */
+/* state[1] = state of dest (host that accepted the connection) */
+/* */
+/* dir == 0 : a packet from source to dest */
+/* dir == 1 : a packet from dest to source */
+/* ------------------------------------------------------------------------ */
+void fr_statederef(fin, isp)
+fr_info_t *fin;
+ipstate_t **isp;
+{
+ ipstate_t *is = *isp;
+#if 0
+ int nstate, ostate, dir, eol;
+
+ eol = 0; /* End-of-the-line flag. */
+ dir = fin->fin_rev;
+ ostate = is->is_state[1 - dir];
+ nstate = is->is_state[dir];
+ /*
+ * Determine whether this packet is local or routed. State entries
+ * with us as the destination will have an interface list of
+ * int1,-,-,int1. Entries with us as the origin run as -,int1,int1,-.
+ */
+ if ((fin->fin_p == IPPROTO_TCP) && (fin->fin_out == 0)) {
+ if ((strcmp(is->is_ifname[0], is->is_ifname[3]) == 0) &&
+ (strcmp(is->is_ifname[1], is->is_ifname[2]) == 0)) {
+ if ((dir == 0) &&
+ (strcmp(is->is_ifname[1], "-") == 0) &&
+ (strcmp(is->is_ifname[0], "-") != 0)) {
+ eol = 1;
+ } else if ((dir == 1) &&
+ (strcmp(is->is_ifname[0], "-") == 0) &&
+ (strcmp(is->is_ifname[1], "-") != 0)) {
+ eol = 1;
+ }
+ }
+ }
+#endif
+
+ fin = fin; /* LINT */
+ is = *isp;
+ *isp = NULL;
+ WRITE_ENTER(&ipf_state);
+ is->is_ref--;
+ if (is->is_ref == 0) {
+ is->is_ref++; /* To counter ref-- in fr_delstate() */
+ fr_delstate(is, ISL_EXPIRE);
+#ifndef _KERNEL
+#if 0
+ } else if (((fin->fin_out == 1) || (eol == 1)) &&
+ ((ostate == IPF_TCPS_LAST_ACK) &&
+ (nstate == IPF_TCPS_TIME_WAIT))) {
+ ;
+#else
+ } else if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) ||
+ (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) {
+#endif
+ fr_delstate(is, ISL_ORPHAN);
+#endif
+ }
+ RWLOCK_EXIT(&ipf_state);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* Function: fr_setstatequeue */
+/* Returns: Nil */
+/* Parameters: is(I) - pointer to state structure */
+/* rev(I) - forward(0) or reverse(1) direction */
+/* Locks: ipf_state (read or write) */
+/* */
+/* Put the state entry on its default queue entry, using rev as a helped in */
+/* determining which queue it should be placed on. */
+/* ------------------------------------------------------------------------ */
+void fr_setstatequeue(is, rev)
+ipstate_t *is;
+int rev;
+{
+ ipftq_t *oifq, *nifq;
+
+
+ if ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0)
+ nifq = is->is_tqehead[rev];
+ else
+ nifq = NULL;
+
+ if (nifq == NULL) {
+ switch (is->is_p)
+ {
+#ifdef USE_INET6
+ case IPPROTO_ICMPV6 :
+ if (rev == 1)
+ nifq = &ips_icmpacktq;
+ else
+ nifq = &ips_icmptq;
+ break;
+#endif
+ case IPPROTO_ICMP :
+ if (rev == 1)
+ nifq = &ips_icmpacktq;
+ else
+ nifq = &ips_icmptq;
+ break;
+ case IPPROTO_TCP :
+ nifq = ips_tqtqb + is->is_state[rev];
+ break;
+
+ case IPPROTO_UDP :
+ if (rev == 1)
+ nifq = &ips_udpacktq;
+ else
+ nifq = &ips_udptq;
+ break;
+
+ default :
+ nifq = &ips_iptq;
+ break;
+ }
+ }
+
+ oifq = is->is_sti.tqe_ifq;
+ /*
+ * If it's currently on a timeout queue, move it from one queue to
+ * another, else put it on the end of the newly determined queue.
+ */
+ if (oifq != NULL)
+ fr_movequeue(&is->is_sti, oifq, nifq);
+ else
+ fr_queueappend(&is->is_sti, nifq, is);
+ return;
+}
diff --git a/usr/src/uts/common/inet/ipf/ipf.h b/usr/src/uts/common/inet/ipf/ipf.h
new file mode 100644
index 0000000000..8ae3352883
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/ipf.h
@@ -0,0 +1,315 @@
+/*
+ * Copyright (C) 1993-2001, 2003 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * @(#)ipf.h 1.12 6/5/96
+ * $Id: ipf.h,v 2.71.2.7 2005/06/12 07:18:31 darrenr Exp $
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef __IPF_H__
+#define __IPF_H__
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#if defined(__osf__)
+# define radix_mask ipf_radix_mask
+# define radix_node ipf_radix_node
+# define radix_node_head ipf_radix_node_head
+#endif
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/file.h>
+/*
+ * This is a workaround for <sys/uio.h> troubles on FreeBSD, HPUX, OpenBSD.
+ * Needed here because on some systems <sys/uio.h> gets included by things
+ * like <sys/socket.h>
+ */
+#ifndef _KERNEL
+# define ADD_KERNEL
+# define _KERNEL
+# define KERNEL
+#endif
+#ifdef __OpenBSD__
+struct file;
+#endif
+#include <sys/uio.h>
+#ifdef ADD_KERNEL
+# undef _KERNEL
+# undef KERNEL
+#endif
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <net/if.h>
+#if __FreeBSD_version >= 300000
+# include <net/if_var.h>
+#endif
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
+#ifndef TCP_PAWS_IDLE /* IRIX */
+# include <netinet/tcp.h>
+#endif
+#include <netinet/udp.h>
+
+#include <arpa/inet.h>
+
+#include <errno.h>
+#include <limits.h>
+#include <netdb.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdio.h>
+#if !defined(__SVR4) && !defined(__svr4__) && defined(sun)
+# include <strings.h>
+#endif
+#include <string.h>
+#include <unistd.h>
+
+#include "netinet/ip_compat.h"
+#include "netinet/ip_fil.h"
+#include "netinet/ip_nat.h"
+#include "netinet/ip_frag.h"
+#include "netinet/ip_state.h"
+#include "netinet/ip_proxy.h"
+#include "netinet/ip_auth.h"
+#include "netinet/ip_lookup.h"
+#include "netinet/ip_pool.h"
+#ifdef IPFILTER_SCAN
+#include "netinet/ip_scan.h"
+#endif
+#include "netinet/ip_htable.h"
+#ifdef IPFILTER_SYNC
+#include "netinet/ip_sync.h"
+#endif
+
+#include "opts.h"
+
+#ifndef __P
+# ifdef __STDC__
+# define __P(x) x
+# else
+# define __P(x) ()
+# endif
+#endif
+#ifndef __STDC__
+# undef const
+# define const
+#endif
+
+#ifndef U_32_T
+# define U_32_T 1
+# if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) || \
+ defined(__sgi)
+typedef u_int32_t u_32_t;
+# else
+# if defined(__alpha__) || defined(__alpha) || defined(_LP64)
+typedef unsigned int u_32_t;
+# else
+# if SOLARIS2 >= 6
+typedef uint32_t u_32_t;
+# else
+typedef unsigned int u_32_t;
+# endif
+# endif
+# endif /* __NetBSD__ || __OpenBSD__ || __FreeBSD__ || __sgi */
+#endif /* U_32_T */
+
+#ifndef MAXHOSTNAMELEN
+# define MAXHOSTNAMELEN 256
+#endif
+
+#define MAX_ICMPCODE 16
+#define MAX_ICMPTYPE 19
+
+
+struct ipopt_names {
+ int on_value;
+ int on_bit;
+ int on_siz;
+ char *on_name;
+};
+
+
+typedef struct alist_s {
+ struct alist_s *al_next;
+ int al_not;
+ sa_family_t al_family;
+ i6addr_t al_i6addr;
+ i6addr_t al_i6mask;
+} alist_t;
+
+#define al_addr al_i6addr.in4_addr
+#define al_mask al_i6mask.in4_addr
+#define al_1 al_addr
+#define al_2 al_mask
+
+
+typedef struct {
+ u_short fb_c;
+ u_char fb_t;
+ u_char fb_f;
+ u_32_t fb_k;
+} fakebpf_t;
+
+
+#if defined(__NetBSD__) || defined(__OpenBSD__) || \
+ (_BSDI_VERSION >= 199701) || (__FreeBSD_version >= 300000) || \
+ SOLARIS || defined(__sgi) || defined(__osf__) || defined(linux)
+# include <stdarg.h>
+typedef int (* ioctlfunc_t) __P((int, ioctlcmd_t, ...));
+#else
+typedef int (* ioctlfunc_t) __P((dev_t, ioctlcmd_t, void *));
+#endif
+typedef void (* addfunc_t) __P((int, ioctlfunc_t, void *));
+typedef int (* copyfunc_t) __P((void *, void *, size_t));
+
+
+/*
+ * SunOS4
+ */
+#if defined(sun) && !defined(__SVR4) && !defined(__svr4__)
+extern int ioctl __P((int, int, void *));
+#endif
+
+extern char thishost[];
+extern char flagset[];
+extern u_char flags[];
+extern struct ipopt_names ionames[];
+extern struct ipopt_names secclass[];
+extern char *icmpcodes[MAX_ICMPCODE + 1];
+extern char *icmptypes[MAX_ICMPTYPE + 1];
+extern int use_inet6;
+extern int lineNum;
+extern struct ipopt_names v6ionames[];
+
+
+extern int addicmp __P((char ***, struct frentry *, int));
+extern int addipopt __P((char *, struct ipopt_names *, int, char *));
+extern int addkeep __P((char ***, struct frentry *, int));
+extern int bcopywrap __P((void *, void *, size_t));
+extern void binprint __P((void *, size_t));
+extern void initparse __P((void));
+extern u_32_t buildopts __P((char *, char *, int));
+extern int checkrev __P((char *));
+extern int count6bits __P((u_32_t *));
+extern int count4bits __P((u_32_t));
+extern int extras __P((char ***, struct frentry *, int));
+extern char *fac_toname __P((int));
+extern int fac_findname __P((char *));
+extern void fill6bits __P((int, u_int *));
+extern int gethost __P((char *, u_32_t *));
+extern int getport __P((struct frentry *, char *, u_short *));
+extern int getportproto __P((char *, int));
+extern int getproto __P((char *));
+extern char *getline __P((char *, size_t, FILE *, int *));
+extern int genmask __P((char *, u_32_t *));
+extern char *getnattype __P((struct ipnat *));
+extern char *getsumd __P((u_32_t));
+extern u_32_t getoptbyname __P((char *));
+extern u_32_t getoptbyvalue __P((int));
+extern u_32_t getv6optbyname __P((char *));
+extern u_32_t getv6optbyvalue __P((int));
+extern void hexdump __P((FILE *, void *, int, int));
+extern int hostmask __P((char ***, char *, char *, u_32_t *, u_32_t *, int));
+extern int hostnum __P((u_32_t *, char *, int, char *));
+extern int icmpcode __P((char *));
+extern int icmpidnum __P((char *, u_short *, int));
+extern void initparse __P((void));
+extern void ipf_dotuning __P((int, char *, ioctlfunc_t));
+extern void ipf_addrule __P((int, ioctlfunc_t, void *));
+extern int ipf_parsefile __P((int, addfunc_t, ioctlfunc_t *, char *));
+extern int ipf_parsesome __P((int, addfunc_t, ioctlfunc_t *, FILE *));
+extern int ipmon_parsefile __P((char *));
+extern int ipmon_parsesome __P((FILE *));
+extern void ipnat_addrule __P((int, ioctlfunc_t, void *));
+extern int ipnat_parsefile __P((int, addfunc_t, ioctlfunc_t, char *));
+extern int ipnat_parsesome __P((int, addfunc_t, ioctlfunc_t, FILE *));
+extern int ippool_parsefile __P((int, char *, ioctlfunc_t));
+extern int ippool_parsesome __P((int, FILE *, ioctlfunc_t));
+extern int kmemcpywrap __P((void *, void *, size_t));
+extern char *kvatoname __P((ipfunc_t, ioctlfunc_t));
+extern int load_hash __P((struct iphtable_s *, struct iphtent_s *,
+ ioctlfunc_t));
+extern int load_hashnode __P((int, char *, struct iphtent_s *, ioctlfunc_t));
+extern int load_pool __P((struct ip_pool_s *list, ioctlfunc_t));
+extern int load_poolnode __P((int, char *, ip_pool_node_t *, ioctlfunc_t));
+extern int loglevel __P((char **, u_int *, int));
+extern alist_t *make_range __P((int, struct in_addr, struct in_addr));
+extern ipfunc_t nametokva __P((char *, ioctlfunc_t));
+extern ipnat_t *natparse __P((char *, int));
+extern void natparsefile __P((int, char *, int));
+extern void nat_setgroupmap __P((struct ipnat *));
+extern int ntomask __P((int, int, u_32_t *));
+extern u_32_t optname __P((char ***, u_short *, int));
+extern struct frentry *parse __P((char *, int));
+extern char *portname __P((int, int));
+extern int portnum __P((char *, char *, u_short *, int));
+extern int ports __P((char ***, char *, u_short *, int *, u_short *, int));
+extern int pri_findname __P((char *));
+extern char *pri_toname __P((int));
+extern void print_toif __P((char *, struct frdest *));
+extern void printaps __P((ap_session_t *, int));
+extern void printbuf __P((char *, int, int));
+extern void printfr __P((struct frentry *, ioctlfunc_t));
+extern void printtunable __P((ipftune_t *));
+extern struct iphtable_s *printhash __P((struct iphtable_s *, copyfunc_t,
+ char *, int));
+extern struct iphtent_s *printhashnode __P((struct iphtable_s *,
+ struct iphtent_s *,
+ copyfunc_t, int));
+extern void printhostmask __P((int, u_32_t *, u_32_t *));
+extern void printip __P((u_32_t *));
+extern void printlog __P((struct frentry *));
+extern void printlookup __P((i6addr_t *addr, i6addr_t *mask));
+extern void printmask __P((int, u_32_t *));
+extern void printpacket __P((struct ip *));
+extern void printpacket6 __P((struct ip *));
+extern struct ip_pool_s *printpool __P((struct ip_pool_s *, copyfunc_t,
+ char *, int));
+extern struct ip_pool_node *printpoolnode __P((struct ip_pool_node *, int));
+extern void printproto __P((struct protoent *, int, struct ipnat *));
+extern void printportcmp __P((int, struct frpcmp *));
+extern void optprint __P((u_short *, u_long, u_long));
+#ifdef USE_INET6
+extern void optprintv6 __P((u_short *, u_long, u_long));
+#endif
+extern int ratoi __P((char *, int *, int, int));
+extern int ratoui __P((char *, u_int *, u_int, u_int));
+extern int remove_hash __P((struct iphtable_s *, ioctlfunc_t));
+extern int remove_hashnode __P((int, char *, struct iphtent_s *, ioctlfunc_t));
+extern int remove_pool __P((ip_pool_t *, ioctlfunc_t));
+extern int remove_poolnode __P((int, char *, ip_pool_node_t *, ioctlfunc_t));
+extern u_char tcp_flags __P((char *, u_char *, int));
+extern u_char tcpflags __P((char *));
+extern int to_interface __P((struct frdest *, char *, int));
+extern void printc __P((struct frentry *));
+extern void printC __P((int));
+extern void emit __P((int, int, void *, struct frentry *));
+extern u_char secbit __P((int));
+extern u_char seclevel __P((char *));
+extern void printfraginfo __P((char *, struct ipfr *));
+extern void printifname __P((char *, char *, void *));
+extern char *hostname __P((int, void *));
+extern struct ipstate *printstate __P((struct ipstate *, int, u_long));
+extern void printsbuf __P((char *));
+extern void printnat __P((struct ipnat *, int));
+extern void printactivenat __P((struct nat *, int));
+extern void printhostmap __P((struct hostmap *, u_int));
+extern void printpacket __P((struct ip *));
+
+extern void set_variable __P((char *, char *));
+extern char *get_variable __P((char *, char **, int));
+extern void resetlexer __P((void));
+
+#if SOLARIS
+extern int gethostname __P((char *, int ));
+extern void sync __P((void));
+#endif
+
+#endif /* __IPF_H__ */
diff --git a/usr/src/uts/common/inet/ipf/ipmon.h b/usr/src/uts/common/inet/ipf/ipmon.h
new file mode 100644
index 0000000000..765a646954
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/ipmon.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 1993-2001 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * @(#)ip_fil.h 1.35 6/5/96
+ * $Id: ipmon.h,v 2.8 2003/07/25 22:16:20 darrenr Exp $
+ */
+
+
+typedef struct ipmon_action {
+ struct ipmon_action *ac_next;
+ int ac_mflag; /* collection of things to compare */
+ int ac_dflag; /* flags to compliment the doing fields */
+ int ac_syslog; /* = 1 to syslog rules. */
+ char *ac_savefile; /* filename to save log records to */
+ FILE *ac_savefp;
+ int ac_direction;
+ char ac_group[FR_GROUPLEN];
+ char ac_nattag[16];
+ u_32_t ac_logtag;
+ int ac_type; /* nat/state/ipf */
+ int ac_proto;
+ int ac_rule;
+ int ac_packet;
+ int ac_second;
+ int ac_result;
+ u_32_t ac_sip;
+ u_32_t ac_smsk;
+ u_32_t ac_dip;
+ u_32_t ac_dmsk;
+ u_short ac_sport;
+ u_short ac_dport;
+ char *ac_exec; /* execute argument */
+ char *ac_run; /* actual command that gets run */
+ char *ac_iface;
+ /*
+ * used with ac_packet/ac_second
+ */
+ struct timeval ac_last;
+ int ac_pktcnt;
+} ipmon_action_t;
+
+#define ac_lastsec ac_last.tv_sec
+#define ac_lastusec ac_last.tv_usec
+
+/*
+ * Flags indicating what fields to do matching upon (ac_mflag).
+ */
+#define IPMAC_DIRECTION 0x0001
+#define IPMAC_DSTIP 0x0002
+#define IPMAC_DSTPORT 0x0004
+#define IPMAC_EVERY 0x0008
+#define IPMAC_GROUP 0x0010
+#define IPMAC_INTERFACE 0x0020
+#define IPMAC_LOGTAG 0x0040
+#define IPMAC_NATTAG 0x0080
+#define IPMAC_PROTOCOL 0x0100
+#define IPMAC_RESULT 0x0200
+#define IPMAC_RULE 0x0400
+#define IPMAC_SRCIP 0x0800
+#define IPMAC_SRCPORT 0x1000
+#define IPMAC_TYPE 0x2000
+#define IPMAC_WITH 0x4000
+
+#define IPMR_BLOCK 1
+#define IPMR_PASS 2
+#define IPMR_NOMATCH 3
+#define IPMR_LOG 4
+
+#define IPMDO_SAVERAW 0x0001
+
+#define OPT_SYSLOG 0x001
+#define OPT_RESOLVE 0x002
+#define OPT_HEXBODY 0x004
+#define OPT_VERBOSE 0x008
+#define OPT_HEXHDR 0x010
+#define OPT_TAIL 0x020
+#define OPT_NAT 0x080
+#define OPT_STATE 0x100
+#define OPT_FILTER 0x200
+#define OPT_PORTNUM 0x400
+#define OPT_LOGALL (OPT_NAT|OPT_STATE|OPT_FILTER)
+
+#define HOSTNAME_V4(a,b) hostname((a), 4, (u_32_t *)&(b))
+
+#ifndef LOGFAC
+#define LOGFAC LOG_LOCAL0
+#endif
+
+extern int load_config __P((char *));
+extern void dumphex __P((FILE *, int, char *, int));
+extern int check_action __P((char *, char *, int, int));
+extern char *getword __P((int));
diff --git a/usr/src/uts/common/inet/ipf/ipt.h b/usr/src/uts/common/inet/ipf/ipt.h
new file mode 100644
index 0000000000..938e40041e
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/ipt.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 1993-2001 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * $Id: ipt.h,v 2.6 2003/02/16 02:33:09 darrenr Exp $
+ */
+
+#ifndef __IPT_H__
+#define __IPT_H__
+
+#ifndef __P
+# define P_DEF
+# ifdef __STDC__
+# define __P(x) x
+# else
+# define __P(x) ()
+# endif
+#endif
+
+#include <fcntl.h>
+
+
+struct ipread {
+ int (*r_open) __P((char *));
+ int (*r_close) __P((void));
+ int (*r_readip) __P((char *, int, char **, int *));
+ int r_flags;
+};
+
+#define R_DO_CKSUM 0x01
+
+extern void debug __P((char *, ...));
+extern void verbose __P((char *, ...));
+
+#ifdef P_DEF
+# undef __P
+# undef P_DEF
+#endif
+
+#endif /* __IPT_H__ */
diff --git a/usr/src/uts/common/inet/ipf/netinet/Makefile b/usr/src/uts/common/inet/ipf/netinet/Makefile
new file mode 100644
index 0000000000..e64d612f4c
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/netinet/Makefile
@@ -0,0 +1,32 @@
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# uts/common/inet/ipf/netinet/Makefile
+#
+# include global definitions
+include ../../../../../Makefile.master
+
+HDRS= ipl.h ip_compat.h ip_fil.h ip_icmp.h ip_nat.h ip_proxy.h ip_state.h
+
+ROOTDIRS= $(ROOT)/usr/include/netinet
+
+ROOTHDRS= $(HDRS:%=$(ROOT)/usr/include/netinet/%)
+
+CHECKHDRS= $(HDRS:%.h=%.check)
+
+$(ROOTDIRS)/%: %
+ $(INS.file)
+
+.KEEP_STATE:
+
+.PARALLEL: $(CHECKHDRS)
+
+install_h: $(ROOTDIRS) $(ROOTHDRS)
+
+$(ROOTDIRS):
+ $(INS.dir)
+
+check: $(CHECKHDRS)
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_auth.h b/usr/src/uts/common/inet/ipf/netinet/ip_auth.h
new file mode 100644
index 0000000000..3892778270
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_auth.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 1997-2001 by Darren Reed & Guido Van Rooij.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * $Id: ip_auth.h,v 2.16 2003/07/25 12:29:56 darrenr Exp $
+ *
+ */
+#ifndef __IP_AUTH_H__
+#define __IP_AUTH_H__
+
+#define FR_NUMAUTH 32
+
+typedef struct frauth {
+ int fra_age;
+ int fra_len;
+ int fra_index;
+ u_32_t fra_pass;
+ fr_info_t fra_info;
+ char *fra_buf;
+#ifdef MENTAT
+ queue_t *fra_q;
+#endif
+} frauth_t;
+
+typedef struct frauthent {
+ struct frentry fae_fr;
+ struct frauthent *fae_next;
+ u_long fae_age;
+} frauthent_t;
+
+typedef struct fr_authstat {
+ U_QUAD_T fas_hits;
+ U_QUAD_T fas_miss;
+ u_long fas_nospace;
+ u_long fas_added;
+ u_long fas_sendfail;
+ u_long fas_sendok;
+ u_long fas_queok;
+ u_long fas_quefail;
+ u_long fas_expire;
+ frauthent_t *fas_faelist;
+} fr_authstat_t;
+
+
+extern frentry_t *ipauth;
+extern struct fr_authstat fr_authstats;
+extern int fr_defaultauthage;
+extern int fr_authstart;
+extern int fr_authend;
+extern int fr_authsize;
+extern int fr_authused;
+extern int fr_auth_lock;
+extern frentry_t *fr_checkauth __P((fr_info_t *, u_32_t *));
+extern void fr_authexpire __P((void));
+extern int fr_authinit __P((void));
+extern void fr_authunload __P((void));
+extern int fr_authflush __P((void));
+extern mb_t **fr_authpkts;
+extern int fr_newauth __P((mb_t *, fr_info_t *));
+extern int fr_preauthcmd __P((ioctlcmd_t, frentry_t *, frentry_t **));
+extern int fr_auth_ioctl __P((caddr_t, ioctlcmd_t, int));
+
+#endif /* __IP_AUTH_H__ */
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_compat.h b/usr/src/uts/common/inet/ipf/netinet/ip_compat.h
new file mode 100644
index 0000000000..43368d9937
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_compat.h
@@ -0,0 +1,2423 @@
+/*
+ * Copyright (C) 1993-2001, 2003 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * @(#)ip_compat.h 1.8 1/14/96
+ * $Id: ip_compat.h,v 2.142.2.30 2005/08/11 15:13:49 darrenr Exp $
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+
+#ifndef __IP_COMPAT_H__
+#define __IP_COMPAT_H__
+
+#ifndef __P
+# ifdef __STDC__
+# define __P(x) x
+# else
+# define __P(x) ()
+# endif
+#endif
+#ifndef __STDC__
+# undef const
+# define const
+#endif
+
+#if defined(_KERNEL) || defined(KERNEL) || defined(__KERNEL__)
+# undef KERNEL
+# undef _KERNEL
+# undef __KERNEL__
+# define KERNEL
+# define _KERNEL
+# define __KERNEL__
+#endif
+
+#ifndef SOLARIS
+#define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#endif
+#if SOLARIS2 >= 8
+# ifndef USE_INET6
+# define USE_INET6
+# endif
+#endif
+#if defined(__FreeBSD_version) && (__FreeBSD_version >= 400000) && \
+ !defined(_KERNEL) && !defined(USE_INET6) && !defined(NOINET6)
+# define USE_INET6
+#endif
+#if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 105000000) && \
+ !defined(_KERNEL) && !defined(USE_INET6)
+# define USE_INET6
+# define IPFILTER_M_IPFILTER
+#endif
+#if defined(OpenBSD) && (OpenBSD >= 200206) && \
+ !defined(_KERNEL) && !defined(USE_INET6)
+# define USE_INET6
+#endif
+#if defined(__osf__)
+# define USE_INET6
+#endif
+#if defined(linux) && (!defined(_KERNEL) || defined(CONFIG_IPV6))
+# define USE_INET6
+#endif
+#if defined(HPUXREV) && (HPUXREV >= 1111)
+# define USE_INET6
+#endif
+
+#if defined(BSD) && (BSD < 199103) && defined(__osf__)
+# undef BSD
+# define BSD 199103
+#endif
+
+#if defined(__SVR4) || defined(__svr4__) || defined(__sgi)
+# define index strchr
+# if !defined(_KERNEL)
+# define bzero(a,b) memset(a,0,b)
+# define bcmp memcmp
+# define bcopy(a,b,c) memmove(b,a,c)
+# endif
+#endif
+
+#ifndef LIFNAMSIZ
+# ifdef IF_NAMESIZE
+# define LIFNAMSIZ IF_NAMESIZE
+# else
+# ifdef IFNAMSIZ
+# define LIFNAMSIZ IFNAMSIZ
+# else
+# define LIFNAMSIZ 16
+# endif
+# endif
+#endif
+
+#if defined(__sgi) || defined(bsdi) || defined(__hpux) || defined(hpux)
+struct ether_addr {
+ u_char ether_addr_octet[6];
+};
+#endif
+
+#if defined(__sgi) && !defined(IPFILTER_LKM)
+# ifdef __STDC__
+# define IPL_EXTERN(ep) ipfilter##ep
+# else
+# define IPL_EXTERN(ep) ipfilter/**/ep
+# endif
+#else
+# ifdef __STDC__
+# define IPL_EXTERN(ep) ipl##ep
+# else
+# define IPL_EXTERN(ep) ipl/**/ep
+# endif
+#endif
+
+/*
+ * This is a workaround for <sys/uio.h> troubles on FreeBSD and OpenBSD.
+ */
+#ifndef linux
+# ifndef _KERNEL
+# define ADD_KERNEL
+# define _KERNEL
+# define KERNEL
+# endif
+# ifdef __OpenBSD__
+struct file;
+# endif
+# include <sys/uio.h>
+# ifdef ADD_KERNEL
+# undef _KERNEL
+# undef KERNEL
+# endif
+#endif
+
+
+/* ----------------------------------------------------------------------- */
+/* S O L A R I S */
+/* ----------------------------------------------------------------------- */
+#if SOLARIS
+# define MENTAT 1
+# include <sys/cmn_err.h>
+# include <sys/isa_defs.h>
+# include <sys/stream.h>
+# include <sys/ioccom.h>
+# include <sys/sysmacros.h>
+# include <sys/kmem.h>
+# if SOLARIS2 >= 10
+# include <sys/procset.h>
+# include <sys/proc.h>
+# include <sys/devops.h>
+# include <sys/ddi_impldefs.h>
+# endif
+/*
+ * because Solaris 2 defines these in two places :-/
+ */
+# ifndef KERNEL
+# define _KERNEL
+# undef RES_INIT
+# endif /* _KERNEL */
+
+# if SOLARIS2 >= 8
+# include <netinet/ip6.h>
+# include <netinet/icmp6.h>
+# endif
+
+# include <inet/common.h>
+/* These 5 are defined in <inet/ip.h> and <netinet/ip.h> */
+# undef IPOPT_EOL
+# undef IPOPT_NOP
+# undef IPOPT_LSRR
+# undef IPOPT_RR
+# undef IPOPT_SSRR
+# ifdef i386
+# define _SYS_PROMIF_H
+# endif
+# include <inet/ip.h>
+# undef COPYOUT
+# include <inet/ip_ire.h>
+# ifndef KERNEL
+# undef _KERNEL
+# endif
+# if SOLARIS2 >= 8
+# define SNPRINTF snprintf
+
+# include <inet/ip_if.h>
+# define ipif_local_addr ipif_lcl_addr
+/* Only defined in private include file */
+# ifndef V4_PART_OF_V6
+# define V4_PART_OF_V6(v6) v6.s6_addr32[3]
+# endif
+struct ip6_ext {
+ u_char ip6e_nxt;
+ u_char ip6e_len;
+};
+# endif /* SOLARIS2 >= 8 */
+
+# if SOLARIS2 >= 6
+# include <sys/atomic.h>
+typedef uint32_t u_32_t;
+# else
+typedef unsigned int u_32_t;
+# endif
+# define U_32_T 1
+
+# ifdef _KERNEL
+# define KRWLOCK_T krwlock_t
+# define KMUTEX_T kmutex_t
+# include "qif.h"
+# include "pfil.h"
+# if SOLARIS2 >= 6
+# if SOLARIS2 == 6
+# define ATOMIC_INCL(x) atomic_add_long((uint32_t*)&(x), 1)
+# define ATOMIC_DECL(x) atomic_add_long((uint32_t*)&(x), -1)
+# else
+# define ATOMIC_INCL(x) atomic_add_long(&(x), 1)
+# define ATOMIC_DECL(x) atomic_add_long(&(x), -1)
+# endif /* SOLARIS2 == 6 */
+# define ATOMIC_INC64(x) atomic_add_64((uint64_t*)&(x), 1)
+# define ATOMIC_INC32(x) atomic_add_32((uint32_t*)&(x), 1)
+# define ATOMIC_INC16(x) atomic_add_16((uint16_t*)&(x), 1)
+# define ATOMIC_DEC64(x) atomic_add_64((uint64_t*)&(x), -1)
+# define ATOMIC_DEC32(x) atomic_add_32((uint32_t*)&(x), -1)
+# define ATOMIC_DEC16(x) atomic_add_16((uint16_t*)&(x), -1)
+# else
+# define ATOMIC_INC(x) { mutex_enter(&ipf_rw); (x)++; \
+ mutex_exit(&ipf_rw); }
+# define ATOMIC_DEC(x) { mutex_enter(&ipf_rw); (x)--; \
+ mutex_exit(&ipf_rw); }
+# endif /* SOLARIS2 >= 6 */
+# define USE_MUTEXES
+# define MUTEX_ENTER(x) mutex_enter(&(x)->ipf_lk)
+# define READ_ENTER(x) rw_enter(&(x)->ipf_lk, RW_READER)
+# define WRITE_ENTER(x) rw_enter(&(x)->ipf_lk, RW_WRITER)
+# define MUTEX_DOWNGRADE(x) rw_downgrade(&(x)->ipf_lk)
+# define RWLOCK_INIT(x, y) rw_init(&(x)->ipf_lk, (y), \
+ RW_DRIVER, NULL)
+# define RWLOCK_EXIT(x) rw_exit(&(x)->ipf_lk)
+# define RW_DESTROY(x) rw_destroy(&(x)->ipf_lk)
+# define MUTEX_INIT(x, y) mutex_init(&(x)->ipf_lk, (y), \
+ MUTEX_DRIVER, NULL)
+# define MUTEX_DESTROY(x) mutex_destroy(&(x)->ipf_lk)
+# define MUTEX_NUKE(x) bzero((x), sizeof(*(x)))
+# define MUTEX_EXIT(x) mutex_exit(&(x)->ipf_lk)
+# define COPYIN(a,b,c) copyin((caddr_t)(a), (caddr_t)(b), (c))
+# define COPYOUT(a,b,c) copyout((caddr_t)(a), (caddr_t)(b), (c))
+# define BCOPYIN(a,b,c) (void) copyin((caddr_t)(a), (caddr_t)(b), (c))
+# define BCOPYOUT(a,b,c) (void) copyout((caddr_t)(a), (caddr_t)(b), (c))
+# define UIOMOVE(a,b,c,d) uiomove((caddr_t)a,b,c,d)
+# define KFREE(x) kmem_free((char *)(x), sizeof(*(x)))
+# define KFREES(x,s) kmem_free((char *)(x), (s))
+# define SPL_NET(x) ;
+# define SPL_IMP(x) ;
+# undef SPL_X
+# define SPL_X(x) ;
+# ifdef sparc
+# define ntohs(x) (x)
+# define ntohl(x) (x)
+# define htons(x) (x)
+# define htonl(x) (x)
+# endif /* sparc */
+# define KMALLOC(a,b) (a) = (b)kmem_alloc(sizeof(*(a)), KM_NOSLEEP)
+# define KMALLOCS(a,b,c) (a) = (b)kmem_alloc((c), KM_NOSLEEP)
+# define GET_MINOR(x) getminor(x)
+extern void *get_unit __P((char *, int));
+# define GETIFP(n, v) get_unit(n, v)
+# define IFNAME(x) ((qif_t *)x)->qf_name
+# define COPYIFNAME(x, b) \
+ (void) strncpy(b, ((qif_t *)x)->qf_name, \
+ LIFNAMSIZ)
+#ifdef IRE_ILL_CN
+extern kmutex_t s_ill_g_head_lock;
+extern struct s_ill_s *s_ill_g_head; /* ILL List Head */
+#endif /* IRE_ILL_CN */
+# define GETKTIME(x) uniqtime((struct timeval *)x)
+# define MSGDSIZE(x) msgdsize(x)
+# define M_LEN(x) ((x)->b_wptr - (x)->b_rptr)
+# define M_DUPLICATE(x) dupmsg((x))
+# define MTOD(m,t) ((t)((m)->b_rptr))
+# define MTYPE(m) ((m)->b_datap->db_type)
+# define FREE_MB_T(m) freemsg(m)
+# define m_next b_cont
+# define CACHE_HASH(x) (((qpktinfo_t *)(x)->fin_qpi)->qpi_num & 7)
+# define IPF_PANIC(x,y) if (x) { printf y; cmn_err(CE_PANIC, "ipf_panic"); }
+typedef mblk_t mb_t;
+# endif /* _KERNEL */
+
+# if (SOLARIS2 >= 7)
+# ifdef lint
+# define ALIGN32(ptr) (ptr ? 0L : 0L)
+# define ALIGN16(ptr) (ptr ? 0L : 0L)
+# else
+# define ALIGN32(ptr) (ptr)
+# define ALIGN16(ptr) (ptr)
+# endif
+# endif
+
+# if SOLARIS2 < 6
+typedef struct uio uio_t;
+# endif
+typedef int ioctlcmd_t;
+
+# define OS_RECOGNISED 1
+
+#endif /* SOLARIS */
+
+/* ----------------------------------------------------------------------- */
+/* H P U X */
+/* ----------------------------------------------------------------------- */
+#ifdef __hpux
+# define MENTAT 1
+# include <sys/sysmacros.h>
+# include <sys/spinlock.h>
+# include <sys/lock.h>
+# include <sys/stream.h>
+# ifdef USE_INET6
+# include <netinet/if_ether.h>
+# include <netinet/ip6.h>
+# include <netinet/icmp6.h>
+typedef struct ip6_hdr ip6_t;
+# endif
+
+# ifdef _KERNEL
+# define SNPRINTF sprintf
+# if (HPUXREV >= 1111)
+# define IPL_SELECT
+# ifdef IPL_SELECT
+# include <machine/sys/user.h>
+# include <sys/kthread_iface.h>
+# define READ_COLLISION 0x01
+
+typedef struct iplog_select_s {
+ kthread_t *read_waiter;
+ int state;
+} iplog_select_t;
+# endif
+# endif
+
+# define GETKTIME(x) uniqtime((struct timeval *)x)
+
+# if HPUXREV == 1111
+# include "kern_svcs.h"
+# else
+# include <sys/kern_svcs.h>
+# endif
+# undef ti_flags
+# undef TCP_NODELAY
+# undef TCP_MAXSEG
+# include <sys/reg.h>
+# include "../netinet/ip_info.h"
+/*
+ * According to /usr/include/sys/spinlock.h on HP-UX 11.00, these functions
+ * are available. Attempting to use them actually results in unresolved
+ * symbols when it comes time to load the module.
+ * This has been fixed! Yipee!
+ */
+# if 1
+# ifdef __LP64__
+# define ATOMIC_INCL(x) lock_and_incr_int64(&ipf_rw.ipf_lk, &(x), 1)
+# define ATOMIC_DECL(x) lock_and_incr_int64(&ipf_rw.ipf_lk, &(x), -1)
+# else
+# define ATOMIC_INCL(x) lock_and_incr_int32(&ipf_rw.ipf_lk, &(x), 1)
+# define ATOMIC_DECL(x) lock_and_incr_int32(&ipf_rw.ipf_lk, &(x), -1)
+# endif
+# define ATOMIC_INC64(x) lock_and_incr_int64(&ipf_rw.ipf_lk, &(x), 1)
+# define ATOMIC_INC32(x) lock_and_incr_int32(&ipf_rw.ipf_lk, &(x), 1)
+# define ATOMIC_INC16(x) lock_and_incr_int16(&ipf_rw.ipf_lk, &(x), 1)
+# define ATOMIC_DEC64(x) lock_and_incr_int64(&ipf_rw.ipf_lk, &(x), -1)
+# define ATOMIC_DEC32(x) lock_and_incr_int32(&ipf_rw.ipf_lk, &(x), -1)
+# define ATOMIC_DEC16(x) lock_and_incr_int16(&ipf_rw.ipf_lk, &(x), -1)
+# else /* 0 */
+# define ATOMIC_INC64(x) { MUTEX_ENTER(&ipf_rw); (x)++; \
+ MUTEX_EXIT(&ipf_rw); }
+# define ATOMIC_DEC64(x) { MUTEX_ENTER(&ipf_rw); (x)--; \
+ MUTEX_EXIT(&ipf_rw); }
+# define ATOMIC_INC32(x) { MUTEX_ENTER(&ipf_rw); (x)++; \
+ MUTEX_EXIT(&ipf_rw); }
+# define ATOMIC_DEC32(x) { MUTEX_ENTER(&ipf_rw); (x)--; \
+ MUTEX_EXIT(&ipf_rw); }
+# define ATOMIC_INCL(x) { MUTEX_ENTER(&ipf_rw); (x)++; \
+ MUTEX_EXIT(&ipf_rw); }
+# define ATOMIC_DECL(x) { MUTEX_ENTER(&ipf_rw); (x)--; \
+ MUTEX_EXIT(&ipf_rw); }
+# define ATOMIC_INC(x) { MUTEX_ENTER(&ipf_rw); (x)++; \
+ MUTEX_EXIT(&ipf_rw); }
+# define ATOMIC_DEC(x) { MUTEX_ENTER(&ipf_rw); (x)--; \
+ MUTEX_EXIT(&ipf_rw); }
+# endif
+# define ip_cksum ip_csuma
+# define memcpy(a,b,c) bcopy((caddr_t)b, (caddr_t)a, c)
+# define USE_MUTEXES
+# define MUTEX_INIT(x, y) initlock(&(x)->ipf_lk, 0, 0, (y))
+# define MUTEX_ENTER(x) spinlock(&(x)->ipf_lk)
+# define MUTEX_EXIT(x) spinunlock(&(x)->ipf_lk);
+# define MUTEX_DESTROY(x)
+# define MUTEX_NUKE(x) bzero((char *)(x), sizeof(*(x)))
+# define KMUTEX_T lock_t
+# define kmutex_t lock_t /* for pfil.h */
+# define krwlock_t lock_t /* for pfil.h */
+/*
+ * The read-write lock implementation in HP-UX 11.0 is crippled - it can
+ * only be used by threads working in a user context!
+ * This has been fixed! Yipee! (Or at least it does in 11.00, not 11.11..)
+ */
+# if HPUXREV < 1111
+# define MUTEX_DOWNGRADE(x) lock_write_to_read(x)
+# define KRWLOCK_T struct rw_lock
+# define READ_ENTER(x) lock_read(&(x)->ipf_lk)
+# define WRITE_ENTER(x) lock_write(&(x)->ipf_lk)
+# if HPUXREV >= 1111
+# define RWLOCK_INIT(x, y) rwlock_init4(&(x)->ipf_lk, 0, RWLCK_CANSLEEP, 0, y)
+# else
+# define RWLOCK_INIT(x, y) lock_init3(&(x)->ipf_lk, 0, 1, 0, 0, y)
+# endif
+# define RWLOCK_EXIT(x) lock_done(&(x)->ipf_lk)
+# else
+# define KRWLOCK_T lock_t
+# define KMUTEX_T lock_t
+# define READ_ENTER(x) MUTEX_ENTER(x)
+# define WRITE_ENTER(x) MUTEX_ENTER(x)
+# define MUTEX_DOWNGRADE(x)
+# define RWLOCK_INIT(x, y) initlock(&(x)->ipf_lk, 0, 0, y)
+# define RWLOCK_EXIT(x) MUTEX_EXIT(x)
+# endif
+# define RW_DESTROY(x)
+# define COPYIN(a,b,c) copyin((caddr_t)(a), (caddr_t)(b), (c))
+# define COPYOUT(a,b,c) copyout((caddr_t)(a), (caddr_t)(b), (c))
+# if HPUXREV >= 1111
+# define BCOPYIN(a,b,c) 0; bcopy((caddr_t)(a), (caddr_t)(b), (c))
+# define BCOPYOUT(a,b,c) 0; bcopy((caddr_t)(a), (caddr_t)(b), (c))
+# else
+# define BCOPYIN(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c))
+# define BCOPYOUT(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c))
+# endif
+# define SPL_NET(x) ;
+# define SPL_IMP(x) ;
+# undef SPL_X
+# define SPL_X(x) ;
+extern void *get_unit __P((char *, int));
+# define GETIFP(n, v) get_unit(n, v)
+# define IFNAME(x, b) ((ill_t *)x)->ill_name
+# define COPYIFNAME(x, b) \
+ (void) strncpy(b, ((qif_t *)x)->qf_name, \
+ LIFNAMSIZ)
+# define UIOMOVE(a,b,c,d) uiomove((caddr_t)a,b,c,d)
+# define SLEEP(id, n) { lock_t *_l = get_sleep_lock((caddr_t)id); \
+ sleep(id, PZERO+1); \
+ spinunlock(_l); \
+ }
+# define WAKEUP(id,x) { lock_t *_l = get_sleep_lock((caddr_t)id); \
+ wakeup(id + x); \
+ spinunlock(_l); \
+ }
+# define KMALLOC(a, b) MALLOC((a), b, sizeof(*(a)), M_IOSYS, M_NOWAIT)
+# define KMALLOCS(a, b, c) MALLOC((a), b, (c), M_IOSYS, M_NOWAIT)
+# define KFREE(x) kmem_free((char *)(x), sizeof(*(x)))
+# define KFREES(x,s) kmem_free((char *)(x), (s))
+# define MSGDSIZE(x) msgdsize(x)
+# define M_LEN(x) ((x)->b_wptr - (x)->b_rptr)
+# define M_DUPLICATE(x) dupmsg((x))
+# define MTOD(m,t) ((t)((m)->b_rptr))
+# define MTYPE(m) ((m)->b_datap->db_type)
+# define FREE_MB_T(m) freemsg(m)
+# define m_next b_cont
+# define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); }
+typedef mblk_t mb_t;
+
+# define CACHE_HASH(x) (((qpktinfo_t *)(x)->fin_qpi)->qpi_num & 7)
+
+# include "qif.h"
+# include "pfil.h"
+
+# else /* _KERNEL */
+
+typedef unsigned char uchar_t;
+
+# ifndef _SYS_STREAM_INCLUDED
+typedef char * mblk_t;
+typedef void * queue_t;
+typedef u_long ulong;
+# endif
+# include <netinet/ip_info.h>
+
+# endif /* _KERNEL */
+
+# ifdef lint
+# define ALIGN32(ptr) (ptr ? 0L : 0L)
+# define ALIGN16(ptr) (ptr ? 0L : 0L)
+# else
+# define ALIGN32(ptr) (ptr)
+# define ALIGN16(ptr) (ptr)
+# endif
+
+typedef struct uio uio_t;
+typedef int ioctlcmd_t;
+typedef int minor_t;
+typedef unsigned int u_32_t;
+# define U_32_T 1
+
+# define OS_RECOGNISED 1
+
+#endif /* __hpux */
+
+/* ----------------------------------------------------------------------- */
+/* I R I X */
+/* ----------------------------------------------------------------------- */
+#ifdef __sgi
+# undef MENTAT
+# if IRIX < 60500
+typedef struct uio uio_t;
+# endif
+typedef int ioctlcmd_t;
+typedef u_int32_t u_32_t;
+# define U_32_T 1
+
+# ifdef INET6
+# define USE_INET6
+# endif
+
+# define hz HZ
+# include <sys/ksynch.h>
+# define IPF_LOCK_PL plhi
+# include <sys/sema.h>
+# undef kmutex_t
+typedef struct {
+ lock_t *l;
+ int pl;
+} kmutex_t;
+
+# ifdef MUTEX_INIT
+# define KMUTEX_T mutex_t
+# else
+# define KMUTEX_T kmutex_t
+# define KRWLOCK_T kmutex_t
+# endif
+
+# ifdef _KERNEL
+# define ATOMIC_INC(x) { MUTEX_ENTER(&ipf_rw); \
+ (x)++; MUTEX_EXIT(&ipf_rw); }
+# define ATOMIC_DEC(x) { MUTEX_ENTER(&ipf_rw); \
+ (x)--; MUTEX_EXIT(&ipf_rw); }
+# define USE_MUTEXES
+# ifdef MUTEX_INIT
+# include <sys/atomic_ops.h>
+# define ATOMIC_INCL(x) atomicAddUlong(&(x), 1)
+# define ATOMIC_INC64(x) atomicAddUint64(&(x), 1)
+# define ATOMIC_INC32(x) atomicAddUint(&(x), 1)
+# define ATOMIC_INC16 ATOMIC_INC
+# define ATOMIC_DECL(x) atomicAddUlong(&(x), -1)
+# define ATOMIC_DEC64(x) atomicAddUint64(&(x), -1)
+# define ATOMIC_DEC32(x) atomicAddUint(&(x), -1)
+# define ATOMIC_DEC16 ATOMIC_DEC
+# undef MUTEX_INIT
+# define MUTEX_INIT(x, y) mutex_init(&(x)->ipf_lk, \
+ MUTEX_DEFAULT, y)
+# undef MUTEX_ENTER
+# define MUTEX_ENTER(x) mutex_lock(&(x)->ipf_lk, 0)
+# undef MUTEX_EXIT
+# define MUTEX_EXIT(x) mutex_unlock(&(x)->ipf_lk)
+# undef MUTEX_DESTROY
+# define MUTEX_DESTROY(x) mutex_destroy(&(x)->ipf_lk)
+# define MUTEX_DOWNGRADE(x) mrdemote(&(x)->ipf_lk)
+# define KRWLOCK_T mrlock_t
+# define RWLOCK_INIT(x, y) mrinit(&(x)->ipf_lk, y)
+# undef RW_DESTROY
+# define RW_DESTROY(x) mrfree(&(x)->ipf_lk)
+# define READ_ENTER(x) RW_RDLOCK(&(x)->ipf_lk)
+# define WRITE_ENTER(x) RW_WRLOCK(&(x)->ipf_lk)
+# define RWLOCK_EXIT(x) RW_UNLOCK(&(x)->ipf_lk)
+# else
+# define READ_ENTER(x) MUTEX_ENTER(&(x)->ipf_lk)
+# define WRITE_ENTER(x) MUTEX_ENTER(&(x)->ipf_lk)
+# define MUTEX_DOWNGRADE(x) ;
+# define RWLOCK_EXIT(x) MUTEX_EXIT(&(x)->ipf_lk)
+# define MUTEX_EXIT(x) UNLOCK((x)->ipf_lk.l, (x)->ipf_lk.pl);
+# define MUTEX_INIT(x,y) (x)->ipf_lk.l = LOCK_ALLOC((uchar_t)-1, IPF_LOCK_PL, (lkinfo_t *)-1, KM_NOSLEEP)
+# define MUTEX_DESTROY(x) LOCK_DEALLOC((x)->ipf_lk.l)
+# define MUTEX_ENTER(x) (x)->ipf_lk.pl = LOCK((x)->ipf_lk.l, \
+ IPF_LOCK_PL);
+# endif
+# define MUTEX_NUKE(x) bzero((x), sizeof(*(x)))
+# define FREE_MB_T(m) m_freem(m)
+# define MTOD(m,t) mtod(m,t)
+# define COPYIN(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0)
+# define COPYOUT(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0)
+# define BCOPYIN(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0)
+# define BCOPYOUT(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0)
+# define UIOMOVE(a,b,c,d) uiomove((caddr_t)a,b,c,d)
+# define SLEEP(id, n) sleep((id), PZERO+1)
+# define WAKEUP(id,x) wakeup(id+x)
+# define KFREE(x) kmem_free((char *)(x), sizeof(*(x)))
+# define KFREES(x,s) kmem_free((char *)(x), (s))
+# define GETIFP(n,v) ifunit(n)
+# include <sys/kmem.h>
+# include <sys/ddi.h>
+# define KMALLOC(a,b) (a) = (b)kmem_alloc(sizeof(*(a)), KM_NOSLEEP)
+# define KMALLOCS(a,b,c) (a) = (b)kmem_alloc((c), KM_NOSLEEP)
+# define GET_MINOR(x) getminor(x)
+# define USE_SPL 1
+# define SPL_IMP(x) (x) = splimp()
+# define SPL_NET(x) (x) = splnet()
+# define SPL_X(x) (void) splx(x)
+extern void m_copydata __P((struct mbuf *, int, int, caddr_t));
+extern void m_copyback __P((struct mbuf *, int, int, caddr_t));
+# define MSGDSIZE(x) mbufchainlen(x)
+# define M_LEN(x) (x)->m_len
+# define M_DUPLICATE(x) m_copy((x), 0, M_COPYALL)
+# define GETKTIME(x) microtime((struct timeval *)x)
+# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \
+ ((struct ifnet *)fin->fin_ifp)->if_unit) & 7)
+# define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); }
+typedef struct mbuf mb_t;
+# else
+# undef RW_DESTROY
+# undef MUTEX_INIT
+# undef MUTEX_DESTROY
+# endif /* _KERNEL */
+
+# define OS_RECOGNISED 1
+
+#endif /* __sgi */
+
+/* ----------------------------------------------------------------------- */
+/* T R U 6 4 */
+/* ----------------------------------------------------------------------- */
+#ifdef __osf__
+# undef MENTAT
+
+# include <kern/lock.h>
+# include <sys/sysmacros.h>
+
+# ifdef _KERNEL
+# define KMUTEX_T simple_lock_data_t
+# define KRWLOCK_T lock_data_t
+# include <net/net_globals.h>
+# define USE_MUTEXES
+# define READ_ENTER(x) lock_read(&(x)->ipf_lk)
+# define WRITE_ENTER(x) lock_write(&(x)->ipf_lk)
+# define MUTEX_DOWNGRADE(x) lock_write_to_read(&(x)->ipf_lk)
+# define RWLOCK_INIT(x, y) lock_init(&(x)->ipf_lk, TRUE)
+# define RWLOCK_EXIT(x) lock_done(&(x)->ipf_lk)
+# define RW_DESTROY(x) lock_terminate(&(x)->ipf_lk)
+# define MUTEX_ENTER(x) simple_lock(&(x)->ipf_lk)
+# define MUTEX_INIT(x, y) simple_lock_init(&(x)->ipf_lk)
+# define MUTEX_DESTROY(x) simple_lock_terminate(&(x)->ipf_lk)
+# define MUTEX_EXIT(x) simple_unlock(&(x)->ipf_lk)
+# define MUTEX_NUKE(x) bzero(x, sizeof(*(x)))
+# define ATOMIC_INC64(x) atomic_incq((uint64_t*)&(x))
+# define ATOMIC_DEC64(x) atomic_decq((uint64_t*)&(x))
+# define ATOMIC_INC32(x) atomic_incl((uint32_t*)&(x))
+# define ATOMIC_DEC32(x) atomic_decl((uint32_t*)&(x))
+# define ATOMIC_INC16(x) { simple_lock(&ipf_rw); (x)++; \
+ simple_unlock(&ipf_rw); }
+# define ATOMIC_DEC16(x) { simple_lock(&ipf_rw); (x)--; \
+ simple_unlock(&ipf_rw); }
+# define ATOMIC_INCL(x) atomic_incl((uint32_t*)&(x))
+# define ATOMIC_DECL(x) atomic_decl((uint32_t*)&(x))
+# define ATOMIC_INC(x) { simple_lock(&ipf_rw); (x)++; \
+ simple_unlock(&ipf_rw); }
+# define ATOMIC_DEC(x) { simple_lock(&ipf_rw); (x)--; \
+ simple_unlock(&ipf_rw); }
+# define SPL_NET(x) ;
+# define SPL_IMP(x) ;
+# undef SPL_X
+# define SPL_X(x) ;
+# define UIOMOVE(a,b,c,d) uiomove((caddr_t)a, b, d)
+# define FREE_MB_T(m) m_freem(m)
+# define MTOD(m,t) mtod(m,t)
+# define GETIFP(n, v) ifunit(n)
+# define GET_MINOR getminor
+# define WAKEUP(id,x) wakeup(id + x)
+# define COPYIN(a,b,c) copyin((caddr_t)(a), (caddr_t)(b), (c))
+# define COPYOUT(a,b,c) copyout((caddr_t)(a), (caddr_t)(b), (c))
+# define BCOPYIN(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c))
+# define BCOPYOUT(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c))
+# define KMALLOC(a, b) MALLOC((a), b, sizeof(*(a)), M_PFILT, M_NOWAIT)
+# define KMALLOCS(a, b, c) MALLOC((a), b, (c), M_PFILT, \
+ ((c) > 4096) ? M_WAITOK : M_NOWAIT)
+# define KFREE(x) FREE((x), M_PFILT)
+# define KFREES(x,s) FREE((x), M_PFILT)
+# define MSGDSIZE(x) mbufchainlen(x)
+# define M_LEN(x) (x)->m_len
+# define M_DUPLICATE(x) m_copy((x), 0, M_COPYALL)
+# define GETKTIME(x) microtime((struct timeval *)x)
+# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \
+ ((struct ifnet *)fin->fin_ifp)->if_unit) & 7)
+# define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); }
+typedef struct mbuf mb_t;
+# endif /* _KERNEL */
+
+# if (defined(_KERNEL) || defined(_NO_BITFIELDS) || (__STDC__ == 1))
+# define IP_V(x) ((x)->ip_vhl >> 4)
+# define IP_HL(x) ((x)->ip_vhl & 0xf)
+# define IP_V_A(x,y) (x)->ip_vhl |= (((y) << 4) & 0xf0)
+# define IP_HL_A(x,y) (x)->ip_vhl |= ((y) & 0xf)
+# define TCP_X2(x) ((x)->th_xoff & 0xf)
+# define TCP_X2_A(x,y) (x)->th_xoff |= ((y) & 0xf)
+# define TCP_OFF(x) ((x)->th_xoff >> 4)
+# define TCP_OFF_A(x,y) (x)->th_xoff |= (((y) << 4) & 0xf0)
+# endif
+
+/*
+ * These are from's Solaris' #defines for little endian.
+ */
+#define IP6F_MORE_FRAG 0x0100
+#define IP6F_RESERVED_MASK 0x0600
+#define IP6F_OFF_MASK 0xf8ff
+
+struct ip6_ext {
+ u_char ip6e_nxt;
+ u_char ip6e_len;
+};
+
+typedef int ioctlcmd_t;
+/*
+ * Really, any arch where sizeof(long) != sizeof(int).
+ */
+typedef unsigned int u_32_t;
+# define U_32_T 1
+
+# define OS_RECOGNISED 1
+#endif /* __osf__ */
+
+/* ----------------------------------------------------------------------- */
+/* N E T B S D */
+/* ----------------------------------------------------------------------- */
+#ifdef __NetBSD__
+# if defined(_KERNEL) && !defined(IPFILTER_LKM)
+# include "bpfilter.h"
+# if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 104110000)
+# include "opt_inet.h"
+# endif
+# ifdef INET6
+# define USE_INET6
+# endif
+# if (__NetBSD_Version__ >= 105000000)
+# define HAVE_M_PULLDOWN 1
+# endif
+# endif
+
+# ifdef _KERNEL
+# define MSGDSIZE(x) mbufchainlen(x)
+# define M_LEN(x) (x)->m_len
+# define M_DUPLICATE(x) m_copy((x), 0, M_COPYALL)
+# define GETKTIME(x) microtime((struct timeval *)x)
+# define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); }
+# define COPYIN(a,b,c) copyin((caddr_t)(a), (caddr_t)(b), (c))
+# define COPYOUT(a,b,c) copyout((caddr_t)(a), (caddr_t)(b), (c))
+# define BCOPYIN(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c))
+# define BCOPYOUT(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c))
+typedef struct mbuf mb_t;
+# endif /* _KERNEL */
+# if (NetBSD <= 1991011) && (NetBSD >= 199606)
+# define IFNAME(x) ((struct ifnet *)x)->if_xname
+# define COPYIFNAME(x, b) \
+ (void) strncpy(b, \
+ ((struct ifnet *)x)->if_xname, \
+ LIFNAMSIZ)
+# define CACHE_HASH(x) ((((struct ifnet *)fin->fin_ifp)->if_index)&7)
+# else
+# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \
+ ((struct ifnet *)fin->fin_ifp)->if_unit) & 7)
+# endif
+
+typedef struct uio uio_t;
+typedef u_long ioctlcmd_t;
+typedef int minor_t;
+typedef u_int32_t u_32_t;
+# define U_32_T 1
+
+# define OS_RECOGNISED 1
+#endif /* __NetBSD__ */
+
+
+/* ----------------------------------------------------------------------- */
+/* F R E E B S D */
+/* ----------------------------------------------------------------------- */
+#ifdef __FreeBSD__
+# if defined(_KERNEL)
+# if (__FreeBSD_version >= 500000)
+# include "opt_bpf.h"
+# else
+# include "bpf.h"
+# endif
+# if defined(__FreeBSD_version) && (__FreeBSD_version >= 400000)
+# include "opt_inet6.h"
+# endif
+# if defined(INET6) && !defined(USE_INET6)
+# define USE_INET6
+# endif
+# endif
+
+# if defined(_KERNEL)
+# if (__FreeBSD_version >= 400000)
+/*
+ * When #define'd, the 5.2.1 kernel panics when used with the ftp proxy.
+ * There may be other, safe, kernels but this is not extensively tested yet.
+ */
+# define HAVE_M_PULLDOWN
+# endif
+# if !defined(IPFILTER_LKM) && (__FreeBSD_version >= 300000)
+# include "opt_ipfilter.h"
+# endif
+# define COPYIN(a,b,c) copyin((caddr_t)(a), (caddr_t)(b), (c))
+# define COPYOUT(a,b,c) copyout((caddr_t)(a), (caddr_t)(b), (c))
+# define BCOPYIN(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c))
+# define BCOPYOUT(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c))
+
+# if (__FreeBSD_version >= 500043)
+# define NETBSD_PF
+# endif
+# endif /* _KERNEL */
+
+# if (__FreeBSD_version >= 500043)
+# include <sys/mutex.h>
+# include <sys/sx.h>
+/*
+ * Whilst the sx(9) locks on FreeBSD have the right semantics and interface
+ * for what we want to use them for, despite testing showing they work -
+ * with a WITNESS kernel, it generates LOR messages.
+ */
+# define KMUTEX_T struct mtx
+# if 1
+# define KRWLOCK_T struct mtx
+# else
+# define KRWLOCK_T struct sx
+# endif
+# endif
+
+# if (__FreeBSD_version >= 501113)
+# include <net/if_var.h>
+# define IFNAME(x) ((struct ifnet *)x)->if_xname
+# define COPYIFNAME(x, b) \
+ (void) strncpy(b, \
+ ((struct ifnet *)x)->if_xname, \
+ LIFNAMSIZ)
+# endif
+# if (__FreeBSD_version >= 500043)
+# define CACHE_HASH(x) ((((struct ifnet *)fin->fin_ifp)->if_index) & 7)
+# else
+# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \
+ ((struct ifnet *)fin->fin_ifp)->if_unit) & 7)
+# endif
+
+# ifdef _KERNEL
+# define GETKTIME(x) microtime((struct timeval *)x)
+
+# if (__FreeBSD_version >= 500002)
+# include <netinet/in_systm.h>
+# include <netinet/ip.h>
+# include <machine/in_cksum.h>
+# endif
+
+# if (__FreeBSD_version >= 500043)
+# define USE_MUTEXES
+# define MUTEX_ENTER(x) mtx_lock(&(x)->ipf_lk)
+# define MUTEX_EXIT(x) mtx_unlock(&(x)->ipf_lk)
+# define MUTEX_INIT(x,y) mtx_init(&(x)->ipf_lk, (y), NULL,\
+ MTX_DEF)
+# define MUTEX_DESTROY(x) mtx_destroy(&(x)->ipf_lk)
+# define MUTEX_NUKE(x) bzero((x), sizeof(*(x)))
+/*
+ * Whilst the sx(9) locks on FreeBSD have the right semantics and interface
+ * for what we want to use them for, despite testing showing they work -
+ * with a WITNESS kernel, it generates LOR messages.
+ */
+# if 1
+# define READ_ENTER(x) mtx_lock(&(x)->ipf_lk)
+# define WRITE_ENTER(x) mtx_lock(&(x)->ipf_lk)
+# define RWLOCK_EXIT(x) mtx_unlock(&(x)->ipf_lk)
+# define MUTEX_DOWNGRADE(x) ;
+# define RWLOCK_INIT(x,y) mtx_init(&(x)->ipf_lk, (y), NULL,\
+ MTX_DEF)
+# define RW_DESTROY(x) mtx_destroy(&(x)->ipf_lk)
+# else
+# define READ_ENTER(x) sx_slock(&(x)->ipf_lk)
+# define WRITE_ENTER(x) sx_xlock(&(x)->ipf_lk)
+# define MUTEX_DOWNGRADE(x) sx_downgrade(&(x)->ipf_lk)
+# define RWLOCK_INIT(x, y) sx_init(&(x)->ipf_lk, (y))
+# define RW_DESTROY(x) sx_destroy(&(x)->ipf_lk)
+# ifdef sx_unlock
+# define RWLOCK_EXIT(x) sx_unlock(x)
+# else
+# define RWLOCK_EXIT(x) do { \
+ if ((x)->ipf_lk.sx_cnt < 0) \
+ sx_xunlock(&(x)->ipf_lk); \
+ else \
+ sx_sunlock(&(x)->ipf_lk); \
+ } while (0)
+# endif
+# endif
+# include <machine/atomic.h>
+# define ATOMIC_INC(x) { mtx_lock(&ipf_rw.ipf_lk); (x)++; \
+ mtx_unlock(&ipf_rw.ipf_lk); }
+# define ATOMIC_DEC(x) { mtx_lock(&ipf_rw.ipf_lk); (x)--; \
+ mtx_unlock(&ipf_rw.ipf_lk); }
+# define ATOMIC_INCL(x) atomic_add_long(&(x), 1)
+# define ATOMIC_INC64(x) ATOMIC_INC(x)
+# define ATOMIC_INC32(x) atomic_add_32(&(x), 1)
+# define ATOMIC_INC16(x) atomic_add_16(&(x), 1)
+# define ATOMIC_DECL(x) atomic_add_long(&(x), -1)
+# define ATOMIC_DEC64(x) ATOMIC_DEC(x)
+# define ATOMIC_DEC32(x) atomic_add_32(&(x), -1)
+# define ATOMIC_DEC16(x) atomic_add_16(&(x), -1)
+# define SPL_X(x) ;
+# define SPL_NET(x) ;
+# define SPL_IMP(x) ;
+extern int in_cksum __P((struct mbuf *, int));
+# endif /* __FreeBSD_version >= 500043 */
+# define MSGDSIZE(x) mbufchainlen(x)
+# define M_LEN(x) (x)->m_len
+# define M_DUPLICATE(x) m_copy((x), 0, M_COPYALL)
+# define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); }
+typedef struct mbuf mb_t;
+# endif /* _KERNEL */
+
+# if __FreeBSD__ < 3
+# include <machine/spl.h>
+# else
+# if __FreeBSD__ == 3
+# if defined(IPFILTER_LKM) && !defined(ACTUALLY_LKM_NOT_KERNEL)
+# define ACTUALLY_LKM_NOT_KERNEL
+# endif
+# endif
+# endif
+
+# if (__FreeBSD_version >= 300000)
+typedef u_long ioctlcmd_t;
+# else
+typedef int ioctlcmd_t;
+# endif
+typedef struct uio uio_t;
+typedef int minor_t;
+typedef u_int32_t u_32_t;
+# define U_32_T 1
+
+# define OS_RECOGNISED 1
+#endif /* __FreeBSD__ */
+
+
+/* ----------------------------------------------------------------------- */
+/* O P E N B S D */
+/* ----------------------------------------------------------------------- */
+#ifdef __OpenBSD__
+# ifdef INET6
+# define USE_INET6
+# endif
+
+# ifdef _KERNEL
+# if !defined(IPFILTER_LKM)
+# include "bpfilter.h"
+# endif
+# if (OpenBSD >= 200311)
+# define SNPRINTF snprintf
+# if defined(USE_INET6)
+# include "netinet6/in6_var.h"
+# include "netinet6/nd6.h"
+# endif
+# endif
+# if (OpenBSD >= 200012)
+# define HAVE_M_PULLDOWN 1
+# endif
+# define COPYIN(a,b,c) copyin((caddr_t)(a), (caddr_t)(b), (c))
+# define COPYOUT(a,b,c) copyout((caddr_t)(a), (caddr_t)(b), (c))
+# define BCOPYIN(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c))
+# define BCOPYOUT(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c))
+# define GETKTIME(x) microtime((struct timeval *)x)
+# define MSGDSIZE(x) mbufchainlen(x)
+# define M_LEN(x) (x)->m_len
+# define M_DUPLICATE(x) m_copy((x), 0, M_COPYALL)
+# define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); }
+typedef struct mbuf mb_t;
+# endif /* _KERNEL */
+# if (OpenBSD >= 199603)
+# define IFNAME(x, b) ((struct ifnet *)x)->if_xname
+# define COPYIFNAME(x, b) \
+ (void) strncpy(b, \
+ ((struct ifnet *)x)->if_xname, \
+ LIFNAMSIZ)
+# define CACHE_HASH(x) ((((struct ifnet *)fin->fin_ifp)->if_index)&7)
+# else
+# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \
+ ((struct ifnet *)fin->fin_ifp)->if_unit) & 7)
+# endif
+
+typedef struct uio uio_t;
+typedef u_long ioctlcmd_t;
+typedef int minor_t;
+typedef u_int32_t u_32_t;
+# define U_32_T 1
+
+# define OS_RECOGNISED 1
+#endif /* __OpenBSD__ */
+
+
+/* ----------------------------------------------------------------------- */
+/* B S D O S */
+/* ----------------------------------------------------------------------- */
+#ifdef _BSDI_VERSION
+# ifdef INET6
+# define USE_INET6
+# endif
+
+# ifdef _KERNEL
+# define GETKTIME(x) microtime((struct timeval *)x)
+# define MSGDSIZE(x) mbufchainlen(x)
+# define M_LEN(x) (x)->m_len
+# define M_DUPLICATE(x) m_copy((x), 0, M_COPYALL)
+# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \
+ ((struct ifnet *)fin->fin_ifp)->if_unit) & 7)
+typedef struct mbuf mb_t;
+# endif /* _KERNEL */
+
+# if (_BSDI_VERSION >= 199701)
+typedef u_long ioctlcmd_t;
+# else
+typedef int ioctlcmd_t;
+# endif
+typedef u_int32_t u_32_t;
+# define U_32_T 1
+
+#endif /* _BSDI_VERSION */
+
+
+/* ----------------------------------------------------------------------- */
+/* S U N O S 4 */
+/* ----------------------------------------------------------------------- */
+#if defined(sun) && !defined(OS_RECOGNISED) /* SunOS4 */
+# ifdef _KERNEL
+# include <sys/kmem_alloc.h>
+# define GETKTIME(x) uniqtime((struct timeval *)x)
+# define MSGDSIZE(x) mbufchainlen(x)
+# define M_LEN(x) (x)->m_len
+# define M_DUPLICATE(x) m_copy((x), 0, M_COPYALL)
+# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \
+ ((struct ifnet *)fin->fin_ifp)->if_unit) & 7)
+# define GETIFP(n, v) ifunit(n, IFNAMSIZ)
+# define KFREE(x) kmem_free((char *)(x), sizeof(*(x)))
+# define KFREES(x,s) kmem_free((char *)(x), (s))
+# define SLEEP(id, n) sleep((id), PZERO+1)
+# define WAKEUP(id,x) wakeup(id + x)
+# define UIOMOVE(a,b,c,d) uiomove((caddr_t)a,b,c,d)
+# define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); }
+
+extern void m_copydata __P((struct mbuf *, int, int, caddr_t));
+extern void m_copyback __P((struct mbuf *, int, int, caddr_t));
+
+typedef struct mbuf mb_t;
+# endif
+
+typedef struct uio uio_t;
+typedef int ioctlcmd_t;
+typedef int minor_t;
+typedef unsigned int u_32_t;
+# define U_32_T 1
+
+# define OS_RECOGNISED 1
+
+#endif /* SunOS 4 */
+
+/* ----------------------------------------------------------------------- */
+/* L I N U X */
+/* ----------------------------------------------------------------------- */
+#if defined(linux) && !defined(OS_RECOGNISED)
+#include <linux/config.h>
+#include <linux/version.h>
+# if LINUX >= 20600
+# define HDR_T_PRIVATE 1
+# endif
+# undef USE_INET6
+# ifdef USE_INET6
+struct ip6_ext {
+ u_char ip6e_nxt;
+ u_char ip6e_len;
+};
+# endif
+
+# ifdef _KERNEL
+# define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); }
+# define BCOPYIN(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c))
+# define BCOPYOUT(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c))
+# define COPYIN(a,b,c) copy_from_user((caddr_t)(b), (caddr_t)(a), (c))
+# define COPYOUT(a,b,c) copy_to_user((caddr_t)(b), (caddr_t)(a), (c))
+# define FREE_MB_T(m) kfree_skb(m)
+# define GETKTIME(x) do_gettimeofday((struct timeval *)x)
+# define SLEEP(x,s) 0, interruptible_sleep_on(x##_linux)
+# define WAKEUP(x,y) wake_up(x##_linux + y)
+# define UIOMOVE(a,b,c,d) uiomove(a,b,c,d)
+# define USE_MUTEXES
+# define KRWLOCK_T rwlock_t
+# define KMUTEX_T spinlock_t
+# define MUTEX_INIT(x,y) spin_lock_init(&(x)->ipf_lk)
+# define MUTEX_ENTER(x) spin_lock(&(x)->ipf_lk)
+# define MUTEX_EXIT(x) spin_unlock(&(x)->ipf_lk)
+# define MUTEX_DESTROY(x) do { } while (0)
+# define MUTEX_NUKE(x) bzero(&(x)->ipf_lk, sizeof((x)->ipf_lk))
+# define READ_ENTER(x) ipf_read_enter(x)
+# define WRITE_ENTER(x) ipf_write_enter(x)
+# define RWLOCK_INIT(x,y) rwlock_init(&(x)->ipf_lk)
+# define RW_DESTROY(x) do { } while (0)
+# define RWLOCK_EXIT(x) ipf_rw_exit(x)
+# define MUTEX_DOWNGRADE(x) ipf_rw_downgrade(x)
+# define ATOMIC_INCL(x) MUTEX_ENTER(&ipf_rw); (x)++; \
+ MUTEX_EXIT(&ipf_rw)
+# define ATOMIC_DECL(x) MUTEX_ENTER(&ipf_rw); (x)--; \
+ MUTEX_EXIT(&ipf_rw)
+# define ATOMIC_INC64(x) MUTEX_ENTER(&ipf_rw); (x)++; \
+ MUTEX_EXIT(&ipf_rw)
+# define ATOMIC_INC32(x) MUTEX_ENTER(&ipf_rw); (x)++; \
+ MUTEX_EXIT(&ipf_rw)
+# define ATOMIC_INC16(x) MUTEX_ENTER(&ipf_rw); (x)++; \
+ MUTEX_EXIT(&ipf_rw)
+# define ATOMIC_DEC64(x) MUTEX_ENTER(&ipf_rw); (x)--; \
+ MUTEX_EXIT(&ipf_rw)
+# define ATOMIC_DEC32(x) MUTEX_ENTER(&ipf_rw); (x)--; \
+ MUTEX_EXIT(&ipf_rw)
+# define ATOMIC_DEC16(x) MUTEX_ENTER(&ipf_rw); (x)--; \
+ MUTEX_EXIT(&ipf_rw)
+# define SPL_IMP(x) do { } while (0)
+# define SPL_NET(x) do { } while (0)
+# define SPL_X(x) do { } while (0)
+# define IFNAME(x) ((struct net_device*)x)->name
+# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \
+ ((struct net_device *)fin->fin_ifp)->ifindex) & 7)
+typedef struct sk_buff mb_t;
+extern void m_copydata __P((mb_t *, int, int, caddr_t));
+extern void m_copyback __P((mb_t *, int, int, caddr_t));
+extern void m_adj __P((mb_t *, int));
+extern mb_t *m_pullup __P((mb_t *, int));
+# define mbuf sk_buff
+
+# define mtod(m, t) ((t)(m)->data)
+# define m_len len
+# define m_next next
+# define M_DUPLICATE(m) skb_clone((m), in_interrupt() ? GFP_ATOMIC : \
+ GFP_KERNEL)
+# define MSGDSIZE(m) (m)->len
+# define M_LEN(m) (m)->len
+
+# define splnet(x) ;
+# define printf printk
+# define bcopy(s,d,z) memmove(d, s, z)
+# define bzero(s,z) memset(s, 0, z)
+# define bcmp(a,b,z) memcmp(a, b, z)
+
+# define ifnet net_device
+# define if_xname name
+# define if_unit ifindex
+
+# define KMALLOC(x,t) (x) = (t)kmalloc(sizeof(*(x)), \
+ in_interrupt() ? GFP_ATOMIC : GFP_KERNEL)
+# define KFREE(x) kfree(x)
+# define KMALLOCS(x,t,s) (x) = (t)kmalloc((s), \
+ in_interrupt() ? GFP_ATOMIC : GFP_KERNEL)
+# define KFREES(x,s) kfree(x)
+
+# define GETIFP(n,v) dev_get_by_name(n)
+
+# else
+# include <net/ethernet.h>
+
+struct mbuf {
+};
+
+# ifndef _NET_ROUTE_H
+struct rtentry {
+};
+# endif
+
+struct ifnet {
+ char if_xname[IFNAMSIZ];
+ int if_unit;
+ int (* if_output) __P((struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *));
+ struct ifaddr *if_addrlist;
+};
+# define IFNAME(x) ((struct ifnet *)x)->if_xname
+
+# endif /* _KERNEL */
+
+# define COPYIFNAME(x, b) \
+ (void) strncpy(b, \
+ ((struct ifnet *)x)->if_xname, \
+ LIFNAMSIZ)
+
+# include <linux/fs.h>
+# define FWRITE FMODE_WRITE
+# define FREAD FMODE_READ
+
+# define __USE_MISC 1
+# define __FAVOR_BSD 1
+
+typedef struct uio {
+ struct iovec *uio_iov;
+ void *uio_file;
+ char *uio_buf;
+ int uio_iovcnt;
+ int uio_offset;
+ size_t uio_resid;
+ int uio_rw;
+} uio_t;
+
+extern int uiomove __P((caddr_t, size_t, int, struct uio *));
+
+# define UIO_READ 1
+# define UIO_WRITE 2
+
+typedef u_long ioctlcmd_t;
+typedef int minor_t;
+typedef u_int32_t u_32_t;
+# define U_32_T 1
+
+# define OS_RECOGNISED 1
+
+#endif
+
+
+/* ----------------------------------------------------------------------- */
+/* A I X */
+/* ----------------------------------------------------------------------- */
+#if defined(_AIX51)
+# undef MENTAT
+
+# include <sys/lock.h>
+# include <sys/sysmacros.h>
+
+# ifdef _KERNEL
+# define rw_read_locked(x) 0
+# include <net/net_globals.h>
+# include <net/net_malloc.h>
+# define KMUTEX_T simple_lock_t
+# define KRWLOCK_T complex_lock_t
+# define USE_MUTEXES 1
+# define USE_SPL 1
+# define READ_ENTER(x) lock_read((x)->ipf_lk)
+# define WRITE_ENTER(x) lock_write((x)->ipf_lk)
+# define MUTEX_DOWNGRADE(x) lock_write_to_read((x)->ipf_lk)
+# define RWLOCK_INIT(x, y) lock_alloc(&(x)->ipf_lk, \
+ LOCK_ALLOC_PIN, \
+ (u_short)y, 0); \
+ lock_init((x)->ipf_lk, TRUE)
+# define RWLOCK_EXIT(x) lock_done((x)->ipf_lk)
+# define RW_DESTROY(x) lock_free(&(x)->ipf_lk)
+# define MUTEX_ENTER(x) simple_lock((x)->ipf_lk)
+# define MUTEX_INIT(x, y) lock_alloc(&(x)->ipf_lk, \
+ LOCK_ALLOC_PIN, \
+ (u_short)y, 0); \
+ simple_lock_init((x)->ipf_lk)
+# define MUTEX_DESTROY(x) lock_free(&(x)->ipf_lk)
+# define MUTEX_EXIT(x) simple_unlock((x)->ipf_lk)
+# define MUTEX_NUKE(x) bzero(&(x)->ipf_lk, sizeof((x)->ipf_lk))
+# define ATOMIC_INC64(x) { MUTEX_ENTER(&ipf_rw); (x)++; \
+ MUTEX_EXIT(&ipf_rw); }
+# define ATOMIC_DEC64(x) { MUTEX_ENTER(&ipf_rw); (x)--; \
+ MUTEX_EXIT(&ipf_rw); }
+# define ATOMIC_INC32(x) { MUTEX_ENTER(&ipf_rw); (x)++; \
+ MUTEX_EXIT(&ipf_rw); }
+# define ATOMIC_DEC32(x) { MUTEX_ENTER(&ipf_rw); (x)--; \
+ MUTEX_EXIT(&ipf_rw); }
+# define ATOMIC_INCL(x) { MUTEX_ENTER(&ipf_rw); (x)++; \
+ MUTEX_EXIT(&ipf_rw); }
+# define ATOMIC_DECL(x) { MUTEX_ENTER(&ipf_rw); (x)--; \
+ MUTEX_EXIT(&ipf_rw); }
+# define ATOMIC_INC(x) { MUTEX_ENTER(&ipf_rw); (x)++; \
+ MUTEX_EXIT(&ipf_rw); }
+# define ATOMIC_DEC(x) { MUTEX_ENTER(&ipf_rw); (x)--; \
+ MUTEX_EXIT(&ipf_rw); }
+# define SPL_NET(x) x = splnet()
+# define SPL_IMP(x) x = splimp()
+# undef SPL_X
+# define SPL_X(x) splx(x)
+# define UIOMOVE(a,b,c,d) uiomove((caddr_t)a,b,c,d)
+extern void* getifp __P((char *, int));
+# define GETIFP(n, v) getifp(n, v)
+# define GET_MINOR minor
+# define SLEEP(id, n) sleepx((id), PZERO+1, 0)
+# define WAKEUP(id,x) wakeup(id)
+# define COPYIN(a,b,c) copyin((caddr_t)(a), (caddr_t)(b), (c))
+# define COPYOUT(a,b,c) copyout((caddr_t)(a), (caddr_t)(b), (c))
+# define BCOPYIN(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c))
+# define BCOPYOUT(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c))
+# define KMALLOC(a, b) MALLOC((a), b, sizeof(*(a)), M_TEMP, M_NOWAIT)
+# define KMALLOCS(a, b, c) MALLOC((a), b, (c), M_TEMP, \
+ ((c) > 4096) ? M_WAITOK : M_NOWAIT)
+# define KFREE(x) FREE((x), M_TEMP)
+# define KFREES(x,s) FREE((x), M_TEMP)
+# define MSGDSIZE(x) mbufchainlen(x)
+# define M_LEN(x) (x)->m_len
+# define M_DUPLICATE(x) m_copy((x), 0, M_COPYALL)
+# define GETKTIME(x)
+# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \
+ ((struct ifnet *)fin->fin_ifp)->if_unit) & 7)
+# define IPF_PANIC(x,y)
+typedef struct mbuf mb_t;
+# endif /* _KERNEL */
+
+/*
+ * These are from's Solaris' #defines for little endian.
+ */
+#if !defined(IP6F_MORE_FRAG)
+# define IP6F_MORE_FRAG 0x0100
+#endif
+#if !defined(IP6F_RESERVED_MASK)
+# define IP6F_RESERVED_MASK 0x0600
+#endif
+#if !defined(IP6F_OFF_MASK)
+# define IP6F_OFF_MASK 0xf8ff
+#endif
+
+struct ip6_ext {
+ u_char ip6e_nxt;
+ u_char ip6e_len;
+};
+
+typedef int ioctlcmd_t;
+typedef int minor_t;
+/*
+ * Really, any arch where sizeof(long) != sizeof(int).
+ */
+typedef unsigned int u_32_t;
+# define U_32_T 1
+
+# define OS_RECOGNISED 1
+#endif /* _AIX51 */
+
+
+#ifndef OS_RECOGNISED
+#error ip_compat.h does not recognise this platform/OS.
+#endif
+
+
+/* ----------------------------------------------------------------------- */
+/* G E N E R I C */
+/* ----------------------------------------------------------------------- */
+#ifndef OS_RECOGNISED
+#endif
+
+/*
+ * For BSD kernels, if bpf is in the kernel, enable ipfilter to use bpf in
+ * filter rules.
+ */
+#if !defined(IPFILTER_BPF) && ((NBPF > 0) || (NBPFILTER > 0) || (DEV_BPF > 0))
+# define IPFILTER_BPF
+#endif
+
+/*
+ * Userland locking primitives
+ */
+typedef struct {
+ char *eMm_owner;
+ char *eMm_heldin;
+ u_int eMm_magic;
+ int eMm_held;
+ int eMm_heldat;
+#ifdef __hpux
+ char eMm_fill[8];
+#endif
+} eMmutex_t;
+
+typedef struct {
+ char *eMrw_owner;
+ char *eMrw_heldin;
+ u_int eMrw_magic;
+ short eMrw_read;
+ short eMrw_write;
+ int eMrw_heldat;
+#ifdef __hpux
+ char eMm_fill[24];
+#endif
+} eMrwlock_t;
+
+typedef union {
+#ifdef KMUTEX_T
+ struct {
+ KMUTEX_T ipf_slk;
+ char *ipf_lname;
+ } ipf_lkun_s;
+#endif
+ eMmutex_t ipf_emu;
+} ipfmutex_t;
+
+typedef union {
+#ifdef KRWLOCK_T
+ struct {
+ KRWLOCK_T ipf_slk;
+ char *ipf_lname;
+ int ipf_sr;
+ int ipf_sw;
+ u_int ipf_magic;
+ } ipf_lkun_s;
+#endif
+ eMrwlock_t ipf_emu;
+} ipfrwlock_t;
+
+#define ipf_lk ipf_lkun_s.ipf_slk
+#define ipf_lname ipf_lkun_s.ipf_lname
+#define ipf_isr ipf_lkun_s.ipf_sr
+#define ipf_isw ipf_lkun_s.ipf_sw
+#define ipf_magic ipf_lkun_s.ipf_magic
+
+#if !defined(__GNUC__) || \
+ (defined(__FreeBSD_version) && (__FreeBSD_version >= 503000))
+# ifndef INLINE
+# define INLINE
+# endif
+#else
+# define INLINE __inline__
+#endif
+
+#if defined(linux) && defined(_KERNEL)
+extern INLINE void ipf_read_enter __P((ipfrwlock_t *));
+extern INLINE void ipf_write_enter __P((ipfrwlock_t *));
+extern INLINE void ipf_rw_exit __P((ipfrwlock_t *));
+extern INLINE void ipf_rw_downgrade __P((ipfrwlock_t *));
+#endif
+
+/*
+ * In a non-kernel environment, there are a lot of macros that need to be
+ * filled in to be null-ops or to point to some compatibility function,
+ * somewhere in userland.
+ */
+#ifndef _KERNEL
+typedef struct mb_s {
+ struct mb_s *mb_next;
+ int mb_len;
+ u_long mb_buf[2048];
+} mb_t;
+# undef m_next
+# define m_next mb_next
+# define MSGDSIZE(x) (x)->mb_len /* XXX - from ipt.c */
+# define M_LEN(x) (x)->mb_len
+# define M_DUPLICATE(x) (x)
+# define GETKTIME(x) gettimeofday((struct timeval *)(x), NULL)
+# undef MTOD
+# define MTOD(m, t) ((t)(m)->mb_buf)
+# define FREE_MB_T(x)
+# define SLEEP(x,y) 1;
+# define WAKEUP(x,y) ;
+# define IPF_PANIC(x,y) ;
+# define PANIC(x,y) ;
+# define SPL_NET(x) ;
+# define SPL_IMP(x) ;
+# define SPL_X(x) ;
+# define KMALLOC(a,b) (a) = (b)malloc(sizeof(*a))
+# define KMALLOCS(a,b,c) (a) = (b)malloc(c)
+# define KFREE(x) free(x)
+# define KFREES(x,s) free(x)
+# define GETIFP(x, v) get_unit(x,v)
+# define COPYIN(a,b,c) (bcopy((a), (b), (c)), 0)
+# define COPYOUT(a,b,c) (bcopy((a), (b), (c)), 0)
+# define BCOPYIN(a,b,c) (bcopy((a), (b), (c)), 0)
+# define BCOPYOUT(a,b,c) (bcopy((a), (b), (c)), 0)
+# define COPYDATA(m, o, l, b) bcopy(MTOD((mb_t *)m, char *) + (o), \
+ (b), (l))
+# define COPYBACK(m, o, l, b) bcopy((b), \
+ MTOD((mb_t *)m, char *) + (o), \
+ (l))
+# define UIOMOVE(a,b,c,d) ipfuiomove(a,b,c,d)
+extern void m_copydata __P((mb_t *, int, int, caddr_t));
+extern int ipfuiomove __P((caddr_t, int, int, struct uio *));
+# ifndef CACHE_HASH
+# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \
+ ((struct ifnet *)fin->fin_ifp)->if_unit) & 7)
+# endif
+
+# define MUTEX_DESTROY(x) eMmutex_destroy(&(x)->ipf_emu)
+# define MUTEX_ENTER(x) eMmutex_enter(&(x)->ipf_emu, \
+ __FILE__, __LINE__)
+# define MUTEX_EXIT(x) eMmutex_exit(&(x)->ipf_emu)
+# define MUTEX_INIT(x,y) eMmutex_init(&(x)->ipf_emu, y)
+# define MUTEX_NUKE(x) bzero((x), sizeof(*(x)))
+
+# define MUTEX_DOWNGRADE(x) eMrwlock_downgrade(&(x)->ipf_emu, \
+ __FILE__, __LINE__)
+# define READ_ENTER(x) eMrwlock_read_enter(&(x)->ipf_emu, \
+ __FILE__, __LINE__)
+# define RWLOCK_INIT(x, y) eMrwlock_init(&(x)->ipf_emu, y)
+# define RWLOCK_EXIT(x) eMrwlock_exit(&(x)->ipf_emu)
+# define RW_DESTROY(x) eMrwlock_destroy(&(x)->ipf_emu)
+# define WRITE_ENTER(x) eMrwlock_write_enter(&(x)->ipf_emu, \
+ __FILE__, \
+ __LINE__)
+
+# define USE_MUTEXES 1
+
+extern void eMmutex_destroy __P((eMmutex_t *));
+extern void eMmutex_enter __P((eMmutex_t *, char *, int));
+extern void eMmutex_exit __P((eMmutex_t *));
+extern void eMmutex_init __P((eMmutex_t *, char *));
+extern void eMrwlock_destroy __P((eMrwlock_t *));
+extern void eMrwlock_exit __P((eMrwlock_t *));
+extern void eMrwlock_init __P((eMrwlock_t *, char *));
+extern void eMrwlock_read_enter __P((eMrwlock_t *, char *, int));
+extern void eMrwlock_write_enter __P((eMrwlock_t *, char *, int));
+extern void eMrwlock_downgrade __P((eMrwlock_t *, char *, int));
+
+#endif
+
+#define MAX_IPV4HDR ((0xf << 2) + sizeof(struct icmp) + sizeof(ip_t) + 8)
+
+#ifndef IP_OFFMASK
+# define IP_OFFMASK 0x1fff
+#endif
+
+
+/*
+ * On BSD's use quad_t as a guarantee for getting at least a 64bit sized
+ * object.
+ */
+#if BSD > 199306
+# define USE_QUAD_T
+# define U_QUAD_T u_quad_t
+# define QUAD_T quad_t
+#else /* BSD > 199306 */
+# define U_QUAD_T u_long
+# define QUAD_T long
+#endif /* BSD > 199306 */
+
+
+#ifdef USE_INET6
+# if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) || \
+ defined(__osf__) || defined(linux)
+# include <netinet/ip6.h>
+# include <netinet/icmp6.h>
+# if !defined(linux)
+# if defined(_KERNEL) && !defined(__osf__)
+# include <netinet6/ip6_var.h>
+# endif
+# endif
+typedef struct ip6_hdr ip6_t;
+# endif
+#endif
+
+#ifndef MAX
+# define MAX(a,b) (((a) > (b)) ? (a) : (b))
+#endif
+
+#if defined(_KERNEL)
+# ifdef MENTAT
+# define COPYDATA mb_copydata
+# define COPYBACK mb_copyback
+# else
+# define COPYDATA m_copydata
+# define COPYBACK m_copyback
+# endif
+# if (BSD >= 199306) || defined(__FreeBSD__)
+# if (defined(__NetBSD_Version__) && (__NetBSD_Version__ < 105180000)) || \
+ defined(__FreeBSD__) || (defined(OpenBSD) && (OpenBSD < 200206)) || \
+ defined(_BSDI_VERSION)
+# include <vm/vm.h>
+# endif
+# if !defined(__FreeBSD__) || (defined (__FreeBSD_version) && \
+ (__FreeBSD_version >= 300000))
+# if (defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 105180000)) || \
+ (defined(OpenBSD) && (OpenBSD >= 200111))
+# include <uvm/uvm_extern.h>
+# else
+# include <vm/vm_extern.h>
+extern vm_map_t kmem_map;
+# endif
+# include <sys/proc.h>
+# else /* !__FreeBSD__ || (__FreeBSD__ && __FreeBSD_version >= 300000) */
+# include <vm/vm_kern.h>
+# endif /* !__FreeBSD__ || (__FreeBSD__ && __FreeBSD_version >= 300000) */
+
+# ifdef IPFILTER_M_IPFILTER
+# include <sys/malloc.h>
+MALLOC_DECLARE(M_IPFILTER);
+# define _M_IPF M_IPFILTER
+# else /* IPFILTER_M_IPFILTER */
+# ifdef M_PFIL
+# define _M_IPF M_PFIL
+# else
+# ifdef M_IPFILTER
+# define _M_IPF M_IPFILTER
+# else
+# define _M_IPF M_TEMP
+# endif /* M_IPFILTER */
+# endif /* M_PFIL */
+# endif /* IPFILTER_M_IPFILTER */
+# define KMALLOC(a, b) MALLOC((a), b, sizeof(*(a)), _M_IPF, M_NOWAIT)
+# define KMALLOCS(a, b, c) MALLOC((a), b, (c), _M_IPF, M_NOWAIT)
+# define KFREE(x) FREE((x), _M_IPF)
+# define KFREES(x,s) FREE((x), _M_IPF)
+# define UIOMOVE(a,b,c,d) uiomove(a,b,d)
+# define SLEEP(id, n) tsleep((id), PPAUSE|PCATCH, n, 0)
+# define WAKEUP(id,x) wakeup(id+x)
+# define GETIFP(n, v) ifunit(n)
+# endif /* (Free)BSD */
+
+# if !defined(USE_MUTEXES) && !defined(SPL_NET)
+# if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199407)) || \
+ (defined(OpenBSD) && (OpenBSD >= 200006))
+# define SPL_NET(x) x = splsoftnet()
+# else
+# define SPL_IMP(x) x = splimp()
+# define SPL_NET(x) x = splnet()
+# endif /* NetBSD && (NetBSD <= 1991011) && (NetBSD >= 199407) */
+# define SPL_X(x) (void) splx(x)
+# endif /* !USE_MUTEXES */
+
+# ifndef FREE_MB_T
+# define FREE_MB_T(m) m_freem(m)
+# endif
+
+# ifndef MTOD
+# define MTOD(m,t) mtod(m,t)
+# endif
+
+# ifndef COPYIN
+# define COPYIN(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0)
+# define COPYOUT(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0)
+# define BCOPYIN(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0)
+# define BCOPYOUT(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0)
+# endif
+
+# ifndef KMALLOC
+# define KMALLOC(a,b) (a) = (b)new_kmem_alloc(sizeof(*(a)), \
+ KMEM_NOSLEEP)
+# define KMALLOCS(a,b,c) (a) = (b)new_kmem_alloc((c), KMEM_NOSLEEP)
+# endif
+
+# ifndef GET_MINOR
+# define GET_MINOR(x) minor(x)
+# endif
+# define PANIC(x,y) if (x) panic y
+#endif /* _KERNEL */
+
+#ifndef IFNAME
+# define IFNAME(x) ((struct ifnet *)x)->if_name
+#endif
+#ifndef COPYIFNAME
+# define NEED_FRGETIFNAME
+extern char *fr_getifname __P((struct ifnet *, char *));
+# define COPYIFNAME(x, b) \
+ fr_getifname((struct ifnet *)x, b)
+#endif
+
+#ifndef ASSERT
+# define ASSERT(x)
+#endif
+
+/*
+ * Because the ctype(3) posix definition, if used "safely" in code everywhere,
+ * would mean all normal code that walks through strings needed casts. Yuck.
+ */
+#define ISALNUM(x) isalnum((u_char)(x))
+#define ISALPHA(x) isalpha((u_char)(x))
+#define ISASCII(x) isascii((u_char)(x))
+#define ISDIGIT(x) isdigit((u_char)(x))
+#define ISPRINT(x) isprint((u_char)(x))
+#define ISSPACE(x) isspace((u_char)(x))
+#define ISUPPER(x) isupper((u_char)(x))
+#define ISXDIGIT(x) isxdigit((u_char)(x))
+#define ISLOWER(x) islower((u_char)(x))
+#define TOUPPER(x) toupper((u_char)(x))
+#define TOLOWER(x) tolower((u_char)(x))
+
+/*
+ * If mutexes aren't being used, turn all the mutex functions into null-ops.
+ */
+#if !defined(USE_MUTEXES)
+# define USE_SPL 1
+# undef RW_DESTROY
+# undef MUTEX_INIT
+# undef MUTEX_NUKE
+# undef MUTEX_DESTROY
+# define MUTEX_ENTER(x) ;
+# define READ_ENTER(x) ;
+# define WRITE_ENTER(x) ;
+# define MUTEX_DOWNGRADE(x) ;
+# define RWLOCK_INIT(x, y) ;
+# define RWLOCK_EXIT(x) ;
+# define RW_DESTROY(x) ;
+# define MUTEX_EXIT(x) ;
+# define MUTEX_INIT(x,y) ;
+# define MUTEX_DESTROY(x) ;
+# define MUTEX_NUKE(x) ;
+#endif /* !USE_MUTEXES */
+#ifndef ATOMIC_INC
+# define ATOMIC_INC(x) (x)++
+# define ATOMIC_DEC(x) (x)--
+#endif
+
+#if defined(USE_SPL) && defined(_KERNEL)
+# define SPL_INT(x) int x
+#else
+# define SPL_INT(x)
+#endif
+
+/*
+ * If there are no atomic operations for bit sizes defined, define them to all
+ * use a generic one that works for all sizes.
+ */
+#ifndef ATOMIC_INCL
+# define ATOMIC_INCL ATOMIC_INC
+# define ATOMIC_INC64 ATOMIC_INC
+# define ATOMIC_INC32 ATOMIC_INC
+# define ATOMIC_INC16 ATOMIC_INC
+# define ATOMIC_DECL ATOMIC_DEC
+# define ATOMIC_DEC64 ATOMIC_DEC
+# define ATOMIC_DEC32 ATOMIC_DEC
+# define ATOMIC_DEC16 ATOMIC_DEC
+#endif
+
+#ifndef HDR_T_PRIVATE
+typedef struct tcphdr tcphdr_t;
+typedef struct udphdr udphdr_t;
+#endif
+typedef struct icmp icmphdr_t;
+typedef struct ip ip_t;
+typedef struct ether_header ether_header_t;
+typedef struct tcpiphdr tcpiphdr_t;
+
+#ifndef FR_GROUPLEN
+# define FR_GROUPLEN 16
+#endif
+
+#ifdef offsetof
+# undef offsetof
+#endif
+#ifndef offsetof
+# define offsetof(t,m) (int)((&((t *)0L)->m))
+#endif
+
+/*
+ * This set of macros has been brought about because on Tru64 it is not
+ * possible to easily assign or examine values in a structure that are
+ * bit fields.
+ */
+#ifndef IP_V
+# define IP_V(x) (x)->ip_v
+#endif
+#ifndef IP_V_A
+# define IP_V_A(x,y) (x)->ip_v = (y)
+#endif
+#ifndef IP_HL
+# define IP_HL(x) (x)->ip_hl
+#endif
+#ifndef IP_HL_A
+# define IP_HL_A(x,y) (x)->ip_hl = (y)
+#endif
+#ifndef TCP_X2
+# define TCP_X2(x) (x)->th_x2
+#endif
+#ifndef TCP_X2_A
+# define TCP_X2_A(x,y) (x)->th_x2 = (y)
+#endif
+#ifndef TCP_OFF
+# define TCP_OFF(x) (x)->th_off
+#endif
+#ifndef TCP_OFF_A
+# define TCP_OFF_A(x,y) (x)->th_off = (y)
+#endif
+#define IPMINLEN(i, h) ((i)->ip_len >= (IP_HL(i) * 4 + sizeof(struct h)))
+
+
+/*
+ * XXX - This is one of those *awful* hacks which nobody likes
+ */
+#ifdef ultrix
+#define A_A
+#else
+#define A_A &
+#endif
+
+#define TCPF_ALL (TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG|\
+ TH_ECN|TH_CWR)
+
+#if (BSD >= 199306) && !defined(m_act)
+# define m_act m_nextpkt
+#endif
+
+/*
+ * Security Options for Intenet Protocol (IPSO) as defined in RFC 1108.
+ *
+ * Basic Option
+ *
+ * 00000001 - (Reserved 4)
+ * 00111101 - Top Secret
+ * 01011010 - Secret
+ * 10010110 - Confidential
+ * 01100110 - (Reserved 3)
+ * 11001100 - (Reserved 2)
+ * 10101011 - Unclassified
+ * 11110001 - (Reserved 1)
+ */
+#define IPSO_CLASS_RES4 0x01
+#define IPSO_CLASS_TOPS 0x3d
+#define IPSO_CLASS_SECR 0x5a
+#define IPSO_CLASS_CONF 0x96
+#define IPSO_CLASS_RES3 0x66
+#define IPSO_CLASS_RES2 0xcc
+#define IPSO_CLASS_UNCL 0xab
+#define IPSO_CLASS_RES1 0xf1
+
+#define IPSO_AUTH_GENSER 0x80
+#define IPSO_AUTH_ESI 0x40
+#define IPSO_AUTH_SCI 0x20
+#define IPSO_AUTH_NSA 0x10
+#define IPSO_AUTH_DOE 0x08
+#define IPSO_AUTH_UN 0x06
+#define IPSO_AUTH_FTE 0x01
+
+/*
+ * IP option #defines
+ */
+#undef IPOPT_RR
+#define IPOPT_RR 7
+#undef IPOPT_ZSU
+#define IPOPT_ZSU 10 /* ZSU */
+#undef IPOPT_MTUP
+#define IPOPT_MTUP 11 /* MTUP */
+#undef IPOPT_MTUR
+#define IPOPT_MTUR 12 /* MTUR */
+#undef IPOPT_ENCODE
+#define IPOPT_ENCODE 15 /* ENCODE */
+#undef IPOPT_TS
+#define IPOPT_TS 68
+#undef IPOPT_TR
+#define IPOPT_TR 82 /* TR */
+#undef IPOPT_SECURITY
+#define IPOPT_SECURITY 130
+#undef IPOPT_LSRR
+#define IPOPT_LSRR 131
+#undef IPOPT_E_SEC
+#define IPOPT_E_SEC 133 /* E-SEC */
+#undef IPOPT_CIPSO
+#define IPOPT_CIPSO 134 /* CIPSO */
+#undef IPOPT_SATID
+#define IPOPT_SATID 136
+#ifndef IPOPT_SID
+# define IPOPT_SID IPOPT_SATID
+#endif
+#undef IPOPT_SSRR
+#define IPOPT_SSRR 137
+#undef IPOPT_ADDEXT
+#define IPOPT_ADDEXT 147 /* ADDEXT */
+#undef IPOPT_VISA
+#define IPOPT_VISA 142 /* VISA */
+#undef IPOPT_IMITD
+#define IPOPT_IMITD 144 /* IMITD */
+#undef IPOPT_EIP
+#define IPOPT_EIP 145 /* EIP */
+#undef IPOPT_RTRALRT
+#define IPOPT_RTRALRT 148 /* RTRALRT */
+#undef IPOPT_SDB
+#define IPOPT_SDB 149
+#undef IPOPT_NSAPA
+#define IPOPT_NSAPA 150
+#undef IPOPT_DPS
+#define IPOPT_DPS 151
+#undef IPOPT_UMP
+#define IPOPT_UMP 152
+#undef IPOPT_FINN
+#define IPOPT_FINN 205 /* FINN */
+
+#ifndef TCPOPT_EOL
+# define TCPOPT_EOL 0
+#endif
+#ifndef TCPOPT_NOP
+# define TCPOPT_NOP 1
+#endif
+#ifndef TCPOPT_MAXSEG
+# define TCPOPT_MAXSEG 2
+#endif
+#ifndef TCPOLEN_MAXSEG
+# define TCPOLEN_MAXSEG 4
+#endif
+#ifndef TCPOPT_WINDOW
+# define TCPOPT_WINDOW 3
+#endif
+#ifndef TCPOLEN_WINDOW
+# define TCPOLEN_WINDOW 3
+#endif
+#ifndef TCPOPT_SACK_PERMITTED
+# define TCPOPT_SACK_PERMITTED 4
+#endif
+#ifndef TCPOLEN_SACK_PERMITTED
+# define TCPOLEN_SACK_PERMITTED 2
+#endif
+#ifndef TCPOPT_SACK
+# define TCPOPT_SACK 5
+#endif
+#ifndef TCPOPT_TIMESTAMP
+# define TCPOPT_TIMESTAMP 8
+#endif
+
+#ifndef ICMP_MINLEN
+# define ICMP_MINLEN 8
+#endif
+#ifndef ICMP_ECHOREPLY
+# define ICMP_ECHOREPLY 0
+#endif
+#ifndef ICMP_UNREACH
+# define ICMP_UNREACH 3
+#endif
+#ifndef ICMP_UNREACH_NET
+# define ICMP_UNREACH_NET 0
+#endif
+#ifndef ICMP_UNREACH_HOST
+# define ICMP_UNREACH_HOST 1
+#endif
+#ifndef ICMP_UNREACH_PROTOCOL
+# define ICMP_UNREACH_PROTOCOL 2
+#endif
+#ifndef ICMP_UNREACH_PORT
+# define ICMP_UNREACH_PORT 3
+#endif
+#ifndef ICMP_UNREACH_NEEDFRAG
+# define ICMP_UNREACH_NEEDFRAG 4
+#endif
+#ifndef ICMP_UNREACH_SRCFAIL
+# define ICMP_UNREACH_SRCFAIL 5
+#endif
+#ifndef ICMP_UNREACH_NET_UNKNOWN
+# define ICMP_UNREACH_NET_UNKNOWN 6
+#endif
+#ifndef ICMP_UNREACH_HOST_UNKNOWN
+# define ICMP_UNREACH_HOST_UNKNOWN 7
+#endif
+#ifndef ICMP_UNREACH_ISOLATED
+# define ICMP_UNREACH_ISOLATED 8
+#endif
+#ifndef ICMP_UNREACH_NET_PROHIB
+# define ICMP_UNREACH_NET_PROHIB 9
+#endif
+#ifndef ICMP_UNREACH_HOST_PROHIB
+# define ICMP_UNREACH_HOST_PROHIB 10
+#endif
+#ifndef ICMP_UNREACH_TOSNET
+# define ICMP_UNREACH_TOSNET 11
+#endif
+#ifndef ICMP_UNREACH_TOSHOST
+# define ICMP_UNREACH_TOSHOST 12
+#endif
+#ifndef ICMP_UNREACH_ADMIN_PROHIBIT
+# define ICMP_UNREACH_ADMIN_PROHIBIT 13
+#endif
+#ifndef ICMP_UNREACH_FILTER
+# define ICMP_UNREACH_FILTER 13
+#endif
+#ifndef ICMP_UNREACH_HOST_PRECEDENCE
+# define ICMP_UNREACH_HOST_PRECEDENCE 14
+#endif
+#ifndef ICMP_UNREACH_PRECEDENCE_CUTOFF
+# define ICMP_UNREACH_PRECEDENCE_CUTOFF 15
+#endif
+#ifndef ICMP_SOURCEQUENCH
+# define ICMP_SOURCEQUENCH 4
+#endif
+#ifndef ICMP_REDIRECT_NET
+# define ICMP_REDIRECT_NET 0
+#endif
+#ifndef ICMP_REDIRECT_HOST
+# define ICMP_REDIRECT_HOST 1
+#endif
+#ifndef ICMP_REDIRECT_TOSNET
+# define ICMP_REDIRECT_TOSNET 2
+#endif
+#ifndef ICMP_REDIRECT_TOSHOST
+# define ICMP_REDIRECT_TOSHOST 3
+#endif
+#ifndef ICMP_ALTHOSTADDR
+# define ICMP_ALTHOSTADDR 6
+#endif
+#ifndef ICMP_TIMXCEED
+# define ICMP_TIMXCEED 11
+#endif
+#ifndef ICMP_TIMXCEED_INTRANS
+# define ICMP_TIMXCEED_INTRANS 0
+#endif
+#ifndef ICMP_TIMXCEED_REASS
+# define ICMP_TIMXCEED_REASS 1
+#endif
+#ifndef ICMP_PARAMPROB
+# define ICMP_PARAMPROB 12
+#endif
+#ifndef ICMP_PARAMPROB_ERRATPTR
+# define ICMP_PARAMPROB_ERRATPTR 0
+#endif
+#ifndef ICMP_PARAMPROB_OPTABSENT
+# define ICMP_PARAMPROB_OPTABSENT 1
+#endif
+#ifndef ICMP_PARAMPROB_LENGTH
+# define ICMP_PARAMPROB_LENGTH 2
+#endif
+#ifndef ICMP_TSTAMP
+# define ICMP_TSTAMP 13
+#endif
+#ifndef ICMP_TSTAMPREPLY
+# define ICMP_TSTAMPREPLY 14
+#endif
+#ifndef ICMP_IREQ
+# define ICMP_IREQ 15
+#endif
+#ifndef ICMP_IREQREPLY
+# define ICMP_IREQREPLY 16
+#endif
+#ifndef ICMP_MASKREQ
+# define ICMP_MASKREQ 17
+#endif
+#ifndef ICMP_MASKREPLY
+# define ICMP_MASKREPLY 18
+#endif
+#ifndef ICMP_TRACEROUTE
+# define ICMP_TRACEROUTE 30
+#endif
+#ifndef ICMP_DATACONVERR
+# define ICMP_DATACONVERR 31
+#endif
+#ifndef ICMP_MOBILE_REDIRECT
+# define ICMP_MOBILE_REDIRECT 32
+#endif
+#ifndef ICMP_IPV6_WHEREAREYOU
+# define ICMP_IPV6_WHEREAREYOU 33
+#endif
+#ifndef ICMP_IPV6_IAMHERE
+# define ICMP_IPV6_IAMHERE 34
+#endif
+#ifndef ICMP_MOBILE_REGREQUEST
+# define ICMP_MOBILE_REGREQUEST 35
+#endif
+#ifndef ICMP_MOBILE_REGREPLY
+# define ICMP_MOBILE_REGREPLY 36
+#endif
+#ifndef ICMP_SKIP
+# define ICMP_SKIP 39
+#endif
+#ifndef ICMP_PHOTURIS
+# define ICMP_PHOTURIS 40
+#endif
+#ifndef ICMP_PHOTURIS_UNKNOWN_INDEX
+# define ICMP_PHOTURIS_UNKNOWN_INDEX 1
+#endif
+#ifndef ICMP_PHOTURIS_AUTH_FAILED
+# define ICMP_PHOTURIS_AUTH_FAILED 2
+#endif
+#ifndef ICMP_PHOTURIS_DECRYPT_FAILED
+# define ICMP_PHOTURIS_DECRYPT_FAILED 3
+#endif
+#ifndef IPVERSION
+# define IPVERSION 4
+#endif
+#ifndef IPOPT_MINOFF
+# define IPOPT_MINOFF 4
+#endif
+#ifndef IPOPT_COPIED
+# define IPOPT_COPIED(x) ((x)&0x80)
+#endif
+#ifndef IPOPT_EOL
+# define IPOPT_EOL 0
+#endif
+#ifndef IPOPT_NOP
+# define IPOPT_NOP 1
+#endif
+#ifndef IP_MF
+# define IP_MF ((u_short)0x2000)
+#endif
+#ifndef ETHERTYPE_IP
+# define ETHERTYPE_IP ((u_short)0x0800)
+#endif
+#ifndef TH_FIN
+# define TH_FIN 0x01
+#endif
+#ifndef TH_SYN
+# define TH_SYN 0x02
+#endif
+#ifndef TH_RST
+# define TH_RST 0x04
+#endif
+#ifndef TH_PUSH
+# define TH_PUSH 0x08
+#endif
+#ifndef TH_ACK
+# define TH_ACK 0x10
+#endif
+#ifndef TH_URG
+# define TH_URG 0x20
+#endif
+#undef TH_ACKMASK
+#define TH_ACKMASK (TH_FIN|TH_SYN|TH_RST|TH_ACK)
+
+#ifndef IPOPT_EOL
+# define IPOPT_EOL 0
+#endif
+#ifndef IPOPT_NOP
+# define IPOPT_NOP 1
+#endif
+#ifndef IPOPT_RR
+# define IPOPT_RR 7
+#endif
+#ifndef IPOPT_TS
+# define IPOPT_TS 68
+#endif
+#ifndef IPOPT_SECURITY
+# define IPOPT_SECURITY 130
+#endif
+#ifndef IPOPT_LSRR
+# define IPOPT_LSRR 131
+#endif
+#ifndef IPOPT_SATID
+# define IPOPT_SATID 136
+#endif
+#ifndef IPOPT_SSRR
+# define IPOPT_SSRR 137
+#endif
+#ifndef IPOPT_SECUR_UNCLASS
+# define IPOPT_SECUR_UNCLASS ((u_short)0x0000)
+#endif
+#ifndef IPOPT_SECUR_CONFID
+# define IPOPT_SECUR_CONFID ((u_short)0xf135)
+#endif
+#ifndef IPOPT_SECUR_EFTO
+# define IPOPT_SECUR_EFTO ((u_short)0x789a)
+#endif
+#ifndef IPOPT_SECUR_MMMM
+# define IPOPT_SECUR_MMMM ((u_short)0xbc4d)
+#endif
+#ifndef IPOPT_SECUR_RESTR
+# define IPOPT_SECUR_RESTR ((u_short)0xaf13)
+#endif
+#ifndef IPOPT_SECUR_SECRET
+# define IPOPT_SECUR_SECRET ((u_short)0xd788)
+#endif
+#ifndef IPOPT_SECUR_TOPSECRET
+# define IPOPT_SECUR_TOPSECRET ((u_short)0x6bc5)
+#endif
+#ifndef IPOPT_OLEN
+# define IPOPT_OLEN 1
+#endif
+#ifndef IPPROTO_HOPOPTS
+# define IPPROTO_HOPOPTS 0
+#endif
+#ifndef IPPROTO_ENCAP
+# define IPPROTO_ENCAP 4
+#endif
+#ifndef IPPROTO_IPV6
+# define IPPROTO_IPV6 41
+#endif
+#ifndef IPPROTO_ROUTING
+# define IPPROTO_ROUTING 43
+#endif
+#ifndef IPPROTO_FRAGMENT
+# define IPPROTO_FRAGMENT 44
+#endif
+#ifndef IPPROTO_GRE
+# define IPPROTO_GRE 47 /* GRE encaps RFC 1701 */
+#endif
+#ifndef IPPROTO_ESP
+# define IPPROTO_ESP 50
+#endif
+#ifndef IPPROTO_AH
+# define IPPROTO_AH 51
+#endif
+#ifndef IPPROTO_ICMPV6
+# define IPPROTO_ICMPV6 58
+#endif
+#ifndef IPPROTO_NONE
+# define IPPROTO_NONE 59
+#endif
+#ifndef IPPROTO_DSTOPTS
+# define IPPROTO_DSTOPTS 60
+#endif
+#ifndef IPPROTO_FRAGMENT
+# define IPPROTO_FRAGMENT 44
+#endif
+#ifndef ICMP_ROUTERADVERT
+# define ICMP_ROUTERADVERT 9
+#endif
+#ifndef ICMP_ROUTERSOLICIT
+# define ICMP_ROUTERSOLICIT 10
+#endif
+#ifndef ICMP6_DST_UNREACH
+# define ICMP6_DST_UNREACH 1
+#endif
+#ifndef ICMP6_PACKET_TOO_BIG
+# define ICMP6_PACKET_TOO_BIG 2
+#endif
+#ifndef ICMP6_TIME_EXCEEDED
+# define ICMP6_TIME_EXCEEDED 3
+#endif
+#ifndef ICMP6_PARAM_PROB
+# define ICMP6_PARAM_PROB 4
+#endif
+
+#ifndef ICMP6_ECHO_REQUEST
+# define ICMP6_ECHO_REQUEST 128
+#endif
+#ifndef ICMP6_ECHO_REPLY
+# define ICMP6_ECHO_REPLY 129
+#endif
+#ifndef ICMP6_MEMBERSHIP_QUERY
+# define ICMP6_MEMBERSHIP_QUERY 130
+#endif
+#ifndef MLD6_LISTENER_QUERY
+# define MLD6_LISTENER_QUERY 130
+#endif
+#ifndef ICMP6_MEMBERSHIP_REPORT
+# define ICMP6_MEMBERSHIP_REPORT 131
+#endif
+#ifndef MLD6_LISTENER_REPORT
+# define MLD6_LISTENER_REPORT 131
+#endif
+#ifndef ICMP6_MEMBERSHIP_REDUCTION
+# define ICMP6_MEMBERSHIP_REDUCTION 132
+#endif
+#ifndef MLD6_LISTENER_DONE
+# define MLD6_LISTENER_DONE 132
+#endif
+#ifndef ND_ROUTER_SOLICIT
+# define ND_ROUTER_SOLICIT 133
+#endif
+#ifndef ND_ROUTER_ADVERT
+# define ND_ROUTER_ADVERT 134
+#endif
+#ifndef ND_NEIGHBOR_SOLICIT
+# define ND_NEIGHBOR_SOLICIT 135
+#endif
+#ifndef ND_NEIGHBOR_ADVERT
+# define ND_NEIGHBOR_ADVERT 136
+#endif
+#ifndef ND_REDIRECT
+# define ND_REDIRECT 137
+#endif
+#ifndef ICMP6_ROUTER_RENUMBERING
+# define ICMP6_ROUTER_RENUMBERING 138
+#endif
+#ifndef ICMP6_WRUREQUEST
+# define ICMP6_WRUREQUEST 139
+#endif
+#ifndef ICMP6_WRUREPLY
+# define ICMP6_WRUREPLY 140
+#endif
+#ifndef ICMP6_FQDN_QUERY
+# define ICMP6_FQDN_QUERY 139
+#endif
+#ifndef ICMP6_FQDN_REPLY
+# define ICMP6_FQDN_REPLY 140
+#endif
+#ifndef ICMP6_NI_QUERY
+# define ICMP6_NI_QUERY 139
+#endif
+#ifndef ICMP6_NI_REPLY
+# define ICMP6_NI_REPLY 140
+#endif
+#ifndef MLD6_MTRACE_RESP
+# define MLD6_MTRACE_RESP 200
+#endif
+#ifndef MLD6_MTRACE
+# define MLD6_MTRACE 201
+#endif
+#ifndef ICMP6_HADISCOV_REQUEST
+# define ICMP6_HADISCOV_REQUEST 202
+#endif
+#ifndef ICMP6_HADISCOV_REPLY
+# define ICMP6_HADISCOV_REPLY 203
+#endif
+#ifndef ICMP6_MOBILEPREFIX_SOLICIT
+# define ICMP6_MOBILEPREFIX_SOLICIT 204
+#endif
+#ifndef ICMP6_MOBILEPREFIX_ADVERT
+# define ICMP6_MOBILEPREFIX_ADVERT 205
+#endif
+#ifndef ICMP6_MAXTYPE
+# define ICMP6_MAXTYPE 205
+#endif
+
+#ifndef ICMP6_DST_UNREACH_NOROUTE
+# define ICMP6_DST_UNREACH_NOROUTE 0
+#endif
+#ifndef ICMP6_DST_UNREACH_ADMIN
+# define ICMP6_DST_UNREACH_ADMIN 1
+#endif
+#ifndef ICMP6_DST_UNREACH_NOTNEIGHBOR
+# define ICMP6_DST_UNREACH_NOTNEIGHBOR 2
+#endif
+#ifndef ICMP6_DST_UNREACH_BEYONDSCOPE
+# define ICMP6_DST_UNREACH_BEYONDSCOPE 2
+#endif
+#ifndef ICMP6_DST_UNREACH_ADDR
+# define ICMP6_DST_UNREACH_ADDR 3
+#endif
+#ifndef ICMP6_DST_UNREACH_NOPORT
+# define ICMP6_DST_UNREACH_NOPORT 4
+#endif
+#ifndef ICMP6_TIME_EXCEED_TRANSIT
+# define ICMP6_TIME_EXCEED_TRANSIT 0
+#endif
+#ifndef ICMP6_TIME_EXCEED_REASSEMBLY
+# define ICMP6_TIME_EXCEED_REASSEMBLY 1
+#endif
+
+#ifndef ICMP6_NI_SUCCESS
+# define ICMP6_NI_SUCCESS 0
+#endif
+#ifndef ICMP6_NI_REFUSED
+# define ICMP6_NI_REFUSED 1
+#endif
+#ifndef ICMP6_NI_UNKNOWN
+# define ICMP6_NI_UNKNOWN 2
+#endif
+
+#ifndef ICMP6_ROUTER_RENUMBERING_COMMAND
+# define ICMP6_ROUTER_RENUMBERING_COMMAND 0
+#endif
+#ifndef ICMP6_ROUTER_RENUMBERING_RESULT
+# define ICMP6_ROUTER_RENUMBERING_RESULT 1
+#endif
+#ifndef ICMP6_ROUTER_RENUMBERING_SEQNUM_RESET
+# define ICMP6_ROUTER_RENUMBERING_SEQNUM_RESET 255
+#endif
+
+#ifndef ICMP6_PARAMPROB_HEADER
+# define ICMP6_PARAMPROB_HEADER 0
+#endif
+#ifndef ICMP6_PARAMPROB_NEXTHEADER
+# define ICMP6_PARAMPROB_NEXTHEADER 1
+#endif
+#ifndef ICMP6_PARAMPROB_OPTION
+# define ICMP6_PARAMPROB_OPTION 2
+#endif
+
+#ifndef ICMP6_NI_SUBJ_IPV6
+# define ICMP6_NI_SUBJ_IPV6 0
+#endif
+#ifndef ICMP6_NI_SUBJ_FQDN
+# define ICMP6_NI_SUBJ_FQDN 1
+#endif
+#ifndef ICMP6_NI_SUBJ_IPV4
+# define ICMP6_NI_SUBJ_IPV4 2
+#endif
+
+/*
+ * ECN is a new addition to TCP - RFC 2481
+ */
+#ifndef TH_ECN
+# define TH_ECN 0x40
+#endif
+#ifndef TH_CWR
+# define TH_CWR 0x80
+#endif
+#define TH_ECNALL (TH_ECN|TH_CWR)
+
+/*
+ * TCP States
+ */
+#define IPF_TCPS_CLOSED 0 /* closed */
+#define IPF_TCPS_LISTEN 1 /* listening for connection */
+#define IPF_TCPS_SYN_SENT 2 /* active, have sent syn */
+#define IPF_TCPS_SYN_RECEIVED 3 /* have send and received syn */
+#define IPF_TCPS_HALF_ESTAB 4 /* for connections not fully "up" */
+/* states < IPF_TCPS_ESTABLISHED are those where connections not established */
+#define IPF_TCPS_ESTABLISHED 5 /* established */
+#define IPF_TCPS_CLOSE_WAIT 6 /* rcvd fin, waiting for close */
+/* states > IPF_TCPS_CLOSE_WAIT are those where user has closed */
+#define IPF_TCPS_FIN_WAIT_1 7 /* have closed, sent fin */
+#define IPF_TCPS_CLOSING 8 /* closed xchd FIN; await FIN ACK */
+#define IPF_TCPS_LAST_ACK 9 /* had fin and close; await FIN ACK */
+/* states > IPF_TCPS_CLOSE_WAIT && < IPF_TCPS_FIN_WAIT_2 await ACK of FIN */
+#define IPF_TCPS_FIN_WAIT_2 10 /* have closed, fin is acked */
+#define IPF_TCPS_TIME_WAIT 11 /* in 2*msl quiet wait after close */
+#define IPF_TCP_NSTATES 12
+
+#define TCP_MSL 120
+
+#undef ICMP_MAX_UNREACH
+#define ICMP_MAX_UNREACH 14
+#undef ICMP_MAXTYPE
+#define ICMP_MAXTYPE 18
+
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+
+#ifndef LOG_FTP
+# define LOG_FTP (11<<3)
+#endif
+#ifndef LOG_AUTHPRIV
+# define LOG_AUTHPRIV (10<<3)
+#endif
+#ifndef LOG_AUDIT
+# define LOG_AUDIT (13<<3)
+#endif
+#ifndef LOG_NTP
+# define LOG_NTP (12<<3)
+#endif
+#ifndef LOG_SECURITY
+# define LOG_SECURITY (13<<3)
+#endif
+#ifndef LOG_LFMT
+# define LOG_LFMT (14<<3)
+#endif
+#ifndef LOG_CONSOLE
+# define LOG_CONSOLE (14<<3)
+#endif
+
+/*
+ * ICMP error replies have an IP header (20 bytes), 8 bytes of ICMP data,
+ * another IP header and then 64 bits of data, totalling 56. Of course,
+ * the last 64 bits is dependent on that being available.
+ */
+#define ICMPERR_ICMPHLEN 8
+#define ICMPERR_IPICMPHLEN (20 + 8)
+#define ICMPERR_MINPKTLEN (20 + 8 + 20)
+#define ICMPERR_MAXPKTLEN (20 + 8 + 20 + 8)
+#define ICMP6ERR_MINPKTLEN (40 + 8)
+#define ICMP6ERR_IPICMPHLEN (40 + 8 + 40)
+
+#ifndef MIN
+# define MIN(a,b) (((a)<(b))?(a):(b))
+#endif
+
+#ifdef IPF_DEBUG
+# define DPRINT(x) printf x
+#else
+# define DPRINT(x)
+#endif
+
+#ifdef RESCUE
+# undef IPFILTER_BPF
+#endif
+
+#endif /* __IP_COMPAT_H__ */
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_fil.h b/usr/src/uts/common/inet/ipf/netinet/ip_fil.h
new file mode 100644
index 0000000000..e7ec154f4f
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_fil.h
@@ -0,0 +1,1431 @@
+/*
+ * Copyright (C) 1993-2001, 2003 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * @(#)ip_fil.h 1.35 6/5/96
+ * $Id: ip_fil.h,v 2.170.2.22 2005/07/16 05:55:35 darrenr Exp $
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifndef __IP_FIL_H__
+#define __IP_FIL_H__
+
+#include "netinet/ip_compat.h"
+
+#ifndef SOLARIS
+# define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#endif
+
+#ifndef __P
+# ifdef __STDC__
+# define __P(x) x
+# else
+# define __P(x) ()
+# endif
+#endif
+
+#if defined(__STDC__) || defined(__GNUC__) || defined(_AIX51)
+# define SIOCADAFR _IOW('r', 60, struct ipfobj)
+# define SIOCRMAFR _IOW('r', 61, struct ipfobj)
+# define SIOCSETFF _IOW('r', 62, u_int)
+# define SIOCGETFF _IOR('r', 63, u_int)
+# define SIOCGETFS _IOWR('r', 64, struct ipfobj)
+# define SIOCIPFFL _IOWR('r', 65, int)
+# define SIOCIPFFB _IOR('r', 66, int)
+# define SIOCADIFR _IOW('r', 67, struct ipfobj)
+# define SIOCRMIFR _IOW('r', 68, struct ipfobj)
+# define SIOCSWAPA _IOR('r', 69, u_int)
+# define SIOCINAFR _IOW('r', 70, struct ipfobj)
+# define SIOCINIFR _IOW('r', 71, struct ipfobj)
+# define SIOCFRENB _IOW('r', 72, u_int)
+# define SIOCFRSYN _IOW('r', 73, u_int)
+# define SIOCFRZST _IOWR('r', 74, struct ipfobj)
+# define SIOCZRLST _IOWR('r', 75, struct ipfobj)
+# define SIOCAUTHW _IOWR('r', 76, struct ipfobj)
+# define SIOCAUTHR _IOWR('r', 77, struct ipfobj)
+# define SIOCATHST _IOWR('r', 78, struct ipfobj)
+# define SIOCSTLCK _IOWR('r', 79, u_int)
+# define SIOCSTPUT _IOWR('r', 80, struct ipfobj)
+# define SIOCSTGET _IOWR('r', 81, struct ipfobj)
+# define SIOCSTGSZ _IOWR('r', 82, struct ipfobj)
+# define SIOCGFRST _IOWR('r', 83, struct ipfobj)
+# define SIOCSETLG _IOWR('r', 84, int)
+# define SIOCGETLG _IOWR('r', 85, int)
+# define SIOCFUNCL _IOWR('r', 86, struct ipfunc_resolve)
+# define SIOCIPFGETNEXT _IOWR('r', 87, struct ipfobj)
+# define SIOCIPFGET _IOWR('r', 88, struct ipfobj)
+# define SIOCIPFSET _IOWR('r', 89, struct ipfobj)
+# define SIOCIPFL6 _IOWR('r', 90, int)
+#else
+# define SIOCADAFR _IOW(r, 60, struct ipfobj)
+# define SIOCRMAFR _IOW(r, 61, struct ipfobj)
+# define SIOCSETFF _IOW(r, 62, u_int)
+# define SIOCGETFF _IOR(r, 63, u_int)
+# define SIOCGETFS _IOWR(r, 64, struct ipfobj)
+# define SIOCIPFFL _IOWR(r, 65, int)
+# define SIOCIPFFB _IOR(r, 66, int)
+# define SIOCADIFR _IOW(r, 67, struct ipfobj)
+# define SIOCRMIFR _IOW(r, 68, struct ipfobj)
+# define SIOCSWAPA _IOR(r, 69, u_int)
+# define SIOCINAFR _IOW(r, 70, struct ipfobj)
+# define SIOCINIFR _IOW(r, 71, struct ipfobj)
+# define SIOCFRENB _IOW(r, 72, u_int)
+# define SIOCFRSYN _IOW(r, 73, u_int)
+# define SIOCFRZST _IOWR(r, 74, struct ipfobj)
+# define SIOCZRLST _IOWR(r, 75, struct ipfobj)
+# define SIOCAUTHW _IOWR(r, 76, struct ipfobj)
+# define SIOCAUTHR _IOWR(r, 77, struct ipfobj)
+# define SIOCATHST _IOWR(r, 78, struct ipfobj)
+# define SIOCSTLCK _IOWR(r, 79, u_int)
+# define SIOCSTPUT _IOWR(r, 80, struct ipfobj)
+# define SIOCSTGET _IOWR(r, 81, struct ipfobj)
+# define SIOCSTGSZ _IOWR(r, 82, struct ipfobj)
+# define SIOCGFRST _IOWR(r, 83, struct ipfobj)
+# define SIOCSETLG _IOWR(r, 84, int)
+# define SIOCGETLG _IOWR(r, 85, int)
+# define SIOCFUNCL _IOWR(r, 86, struct ipfunc_resolve)
+# define SIOCIPFGETNEXT _IOWR(r, 87, struct ipfobj)
+# define SIOCIPFGET _IOWR(r, 88, struct ipfobj)
+# define SIOCIPFSET _IOWR(r, 89, struct ipfobj)
+# define SIOCIPFL6 _IOWR(r, 90, int)
+#endif
+#define SIOCADDFR SIOCADAFR
+#define SIOCDELFR SIOCRMAFR
+#define SIOCINSFR SIOCINAFR
+
+
+struct ipscan;
+struct ifnet;
+
+
+typedef int (* lookupfunc_t) __P((void *, int, void *));
+
+/*
+ * i6addr is used as a container for both IPv4 and IPv6 addresses, as well
+ * as other types of objects, depending on its qualifier.
+ */
+#ifdef USE_INET6
+typedef union i6addr {
+ u_32_t i6[4];
+ struct in_addr in4;
+ struct in6_addr in6;
+ void *vptr[2];
+ lookupfunc_t lptr[2];
+} i6addr_t;
+#define in6_addr8 in6.s6_addr
+#else
+typedef union i6addr {
+ u_32_t i6[4];
+ struct in_addr in4;
+ void *vptr[2];
+ lookupfunc_t lptr[2];
+} i6addr_t;
+#endif
+
+#define in4_addr in4.s_addr
+#define iplookupnum i6[0]
+#define iplookuptype i6[1]
+/*
+ * NOTE: These DO overlap the above on 64bit systems and this IS recognised.
+ */
+#define iplookupptr vptr[0]
+#define iplookupfunc lptr[1]
+
+#define I60(x) (((i6addr_t *)(x))->i6[0])
+#define I61(x) (((i6addr_t *)(x))->i6[1])
+#define I62(x) (((i6addr_t *)(x))->i6[2])
+#define I63(x) (((i6addr_t *)(x))->i6[3])
+#define HI60(x) ntohl(((i6addr_t *)(x))->i6[0])
+#define HI61(x) ntohl(((i6addr_t *)(x))->i6[1])
+#define HI62(x) ntohl(((i6addr_t *)(x))->i6[2])
+#define HI63(x) ntohl(((i6addr_t *)(x))->i6[3])
+
+#define IP6_EQ(a,b) ((I63(a) == I63(b)) && (I62(a) == I62(b)) && \
+ (I61(a) == I61(b)) && (I60(a) == I60(b)))
+#define IP6_NEQ(a,b) ((I63(a) != I63(b)) || (I62(a) != I62(b)) || \
+ (I61(a) != I61(b)) || (I60(a) != I60(b)))
+#define IP6_ISZERO(a) ((I60(a) | I61(a) | I62(a) | I63(a)) == 0)
+#define IP6_NOTZERO(a) ((I60(a) | I61(a) | I62(a) | I63(a)) != 0)
+#define IP6_GT(a,b) (HI60(a) > HI60(b) || (HI60(a) == HI60(b) && \
+ (HI61(a) > HI61(b) || (HI61(a) == HI61(b) && \
+ (HI62(a) > HI62(b) || (HI62(a) == HI62(b) && \
+ HI63(a) > HI63(b)))))))
+#define IP6_LT(a,b) (HI60(a) < HI60(b) || (HI60(a) == HI60(b) && \
+ (HI61(a) < HI61(b) || (HI61(a) == HI61(b) && \
+ (HI62(a) < HI62(b) || (HI62(a) == HI62(b) && \
+ HI63(a) < HI63(b)))))))
+#define NLADD(n,x) htonl(ntohl(n) + (x))
+#define IP6_INC(a) \
+ { i6addr_t *_i6 = (i6addr_t *)(a); \
+ _i6->i6[0] = NLADD(_i6->i6[0], 1); \
+ if (_i6->i6[0] == 0) { \
+ _i6->i6[0] = NLADD(_i6->i6[1], 1); \
+ if (_i6->i6[1] == 0) { \
+ _i6->i6[0] = NLADD(_i6->i6[2], 1); \
+ if (_i6->i6[2] == 0) { \
+ _i6->i6[0] = NLADD(_i6->i6[3], 1); \
+ } \
+ } \
+ } \
+ }
+#define IP6_ADD(a,x,d) \
+ { i6addr_t *_s = (i6addr_t *)(a); \
+ i6addr_t *_d = (i6addr_t *)(d); \
+ _d->i6[0] = NLADD(_s->i6[0], x); \
+ if (ntohl(_d->i6[0]) < ntohl(_s->i6[0])) { \
+ _d->i6[1] = NLADD(_d->i6[1], 1); \
+ if (ntohl(_d->i6[1]) < ntohl(_s->i6[1])) { \
+ _d->i6[2] = NLADD(_d->i6[2], 1); \
+ if (ntohl(_d->i6[2]) < ntohl(_s->i6[2])) { \
+ _d->i6[3] = NLADD(_d->i6[3], 1); \
+ } \
+ } \
+ } \
+ }
+#define IP6_AND(a,b,d) { i6addr_t *_s1 = (i6addr_t *)(a); \
+ i6addr_t *_s2 = (i6addr_t *)(d); \
+ i6addr_t *_d = (i6addr_t *)(d); \
+ _d->i6[0] = _s1->i6[0] & _s2->i6[0]; \
+ _d->i6[1] = _s1->i6[1] & _s2->i6[1]; \
+ _d->i6[2] = _s1->i6[2] & _s2->i6[2]; \
+ _d->i6[3] = _s1->i6[3] & _s2->i6[3]; \
+ }
+#define IP6_MERGE(a,b,c) \
+ { i6addr_t *_d, *_s1, *_s2; \
+ _d = (i6addr_t *)(a); \
+ _s1 = (i6addr_t *)(b); \
+ _s2 = (i6addr_t *)(c); \
+ _d->i6[0] |= _s1->i6[0] & ~_s2->i6[0]; \
+ _d->i6[1] |= _s1->i6[1] & ~_s2->i6[1]; \
+ _d->i6[2] |= _s1->i6[2] & ~_s2->i6[2]; \
+ _d->i6[2] |= _s1->i6[3] & ~_s2->i6[3]; \
+ }
+
+
+typedef struct fr_ip {
+ u_32_t fi_v:4; /* IP version */
+ u_32_t fi_xx:4; /* spare */
+ u_32_t fi_tos:8; /* IP packet TOS */
+ u_32_t fi_ttl:8; /* IP packet TTL */
+ u_32_t fi_p:8; /* IP packet protocol */
+ u_32_t fi_optmsk; /* bitmask composed from IP options */
+ i6addr_t fi_src; /* source address from packet */
+ i6addr_t fi_dst; /* destination address from packet */
+ u_short fi_secmsk; /* bitmask composed from IP security options */
+ u_short fi_auth; /* authentication code from IP sec. options */
+ u_32_t fi_flx; /* packet flags */
+ u_32_t fi_tcpmsk; /* TCP options set/reset */
+ u_32_t fi_res1; /* RESERVED */
+} fr_ip_t;
+
+/*
+ * For use in fi_flx
+ */
+#define FI_TCPUDP 0x0001 /* TCP/UCP implied comparison*/
+#define FI_OPTIONS 0x0002
+#define FI_FRAG 0x0004
+#define FI_SHORT 0x0008
+#define FI_NATED 0x0010
+#define FI_MULTICAST 0x0020
+#define FI_BROADCAST 0x0040
+#define FI_MBCAST 0x0080
+#define FI_STATE 0x0100
+#define FI_BADNAT 0x0200
+#define FI_BAD 0x0400
+#define FI_OOW 0x0800 /* Out of state window, else match */
+#define FI_ICMPERR 0x1000
+#define FI_FRAGBODY 0x2000
+#define FI_BADSRC 0x4000
+#define FI_LOWTTL 0x8000
+#define FI_CMP 0xcfe3 /* Not FI_FRAG,FI_NATED,FI_FRAGTAIL */
+#define FI_ICMPCMP 0x0003 /* Flags we can check for ICMP error packets */
+#define FI_WITH 0xeffe /* Not FI_TCPUDP */
+#define FI_V6EXTHDR 0x10000
+#define FI_COALESCE 0x20000
+#define FI_NOCKSUM 0x20000000 /* don't do a L4 checksum validation */
+#define FI_DONTCACHE 0x40000000 /* don't cache the result */
+#define FI_IGNORE 0x80000000
+
+#define fi_saddr fi_src.in4.s_addr
+#define fi_daddr fi_dst.in4.s_addr
+#define fi_srcnum fi_src.iplookupnum
+#define fi_dstnum fi_dst.iplookupnum
+#define fi_srctype fi_src.iplookuptype
+#define fi_dsttype fi_dst.iplookuptype
+#define fi_srcptr fi_src.iplookupptr
+#define fi_dstptr fi_dst.iplookupptr
+#define fi_srcfunc fi_src.iplookupfunc
+#define fi_dstfunc fi_dst.iplookupfunc
+
+
+/*
+ * These are both used by the state and NAT code to indicate that one port or
+ * the other should be treated as a wildcard.
+ * NOTE: When updating, check bit masks in ip_state.h and update there too.
+ */
+#define SI_W_SPORT 0x00000100
+#define SI_W_DPORT 0x00000200
+#define SI_WILDP (SI_W_SPORT|SI_W_DPORT)
+#define SI_W_SADDR 0x00000400
+#define SI_W_DADDR 0x00000800
+#define SI_WILDA (SI_W_SADDR|SI_W_DADDR)
+#define SI_NEWFR 0x00001000
+#define SI_CLONE 0x00002000
+#define SI_CLONED 0x00004000
+
+
+typedef struct fr_info {
+ void *fin_ifp; /* interface packet is `on' */
+ fr_ip_t fin_fi; /* IP Packet summary */
+ union {
+ u_short fid_16[2]; /* TCP/UDP ports, ICMP code/type */
+ u_32_t fid_32;
+ } fin_dat;
+ int fin_out; /* in or out ? 1 == out, 0 == in */
+ int fin_rev; /* state only: 1 = reverse */
+ u_short fin_hlen; /* length of IP header in bytes */
+ u_char fin_tcpf; /* TCP header flags (SYN, ACK, etc) */
+ u_char fin_icode; /* ICMP error to return */
+ u_32_t fin_rule; /* rule # last matched */
+ char fin_group[FR_GROUPLEN]; /* group number, -1 for none */
+ struct frentry *fin_fr; /* last matching rule */
+ void *fin_dp; /* start of data past IP header */
+ int fin_dlen; /* length of data portion of packet */
+ int fin_plen;
+ int fin_flen; /* length of layer 4 hdr and
+ ipv6 ext hdr after fragment hdr */
+ int fin_ipoff; /* # bytes from buffer start to hdr */
+ u_32_t fin_id; /* IP packet id field */
+ u_short fin_off;
+ int fin_depth; /* Group nesting depth */
+ int fin_error; /* Error code to return */
+ void *fin_nat;
+ void *fin_state;
+ void *fin_nattag;
+ ip_t *fin_ip;
+ mb_t **fin_mp; /* pointer to pointer to mbuf */
+ mb_t *fin_m; /* pointer to mbuf */
+#ifdef MENTAT
+ mb_t *fin_qfm; /* pointer to mblk where pkt starts */
+ void *fin_qpi;
+#endif
+#ifdef __sgi
+ void *fin_hbuf;
+#endif
+} fr_info_t;
+
+#define fin_v fin_fi.fi_v
+#define fin_p fin_fi.fi_p
+#define fin_flx fin_fi.fi_flx
+#define fin_optmsk fin_fi.fi_optmsk
+#define fin_secmsk fin_fi.fi_secmsk
+#define fin_auth fin_fi.fi_auth
+#define fin_src fin_fi.fi_src.in4
+#define fin_src6 fin_fi.fi_src.in6
+#define fin_saddr fin_fi.fi_saddr
+#define fin_dst fin_fi.fi_dst.in4
+#define fin_dst6 fin_fi.fi_dst.in6
+#define fin_daddr fin_fi.fi_daddr
+#define fin_data fin_dat.fid_16
+#define fin_sport fin_dat.fid_16[0]
+#define fin_dport fin_dat.fid_16[1]
+#define fin_ports fin_dat.fid_32
+
+#define IPF_IN 0
+#define IPF_OUT 1
+
+typedef struct frentry *(*ipfunc_t) __P((fr_info_t *, u_32_t *));
+typedef int (*ipfuncinit_t) __P((struct frentry *));
+
+typedef struct ipfunc_resolve {
+ char ipfu_name[32];
+ ipfunc_t ipfu_addr;
+ ipfuncinit_t ipfu_init;
+} ipfunc_resolve_t;
+
+/*
+ * Size for compares on fr_info structures
+ */
+#define FI_CSIZE offsetof(fr_info_t, fin_icode)
+#define FI_LCSIZE offsetof(fr_info_t, fin_dp)
+
+/*
+ * Size for copying cache fr_info structure
+ */
+#define FI_COPYSIZE offsetof(fr_info_t, fin_dp)
+
+/*
+ * Structure for holding IPFilter's tag information
+ */
+#define IPFTAG_LEN 16
+typedef struct {
+ union {
+ u_32_t iptu_num[4];
+ char iptu_tag[IPFTAG_LEN];
+ } ipt_un;
+ int ipt_not;
+} ipftag_t;
+
+#define ipt_tag ipt_un.iptu_tag
+#define ipt_num ipt_un.iptu_num
+
+
+/*
+ * This structure is used to hold information about the next hop for where
+ * to forward a packet.
+ */
+typedef struct frdest {
+ void *fd_ifp;
+ i6addr_t fd_ip6;
+ char fd_ifname[LIFNAMSIZ];
+} frdest_t;
+
+#define fd_ip fd_ip6.in4
+
+
+/*
+ * This structure holds information about a port comparison.
+ */
+typedef struct frpcmp {
+ int frp_cmp; /* data for port comparisons */
+ u_short frp_port; /* top port for <> and >< */
+ u_short frp_top; /* top port for <> and >< */
+} frpcmp_t;
+
+#define FR_NONE 0
+#define FR_EQUAL 1
+#define FR_NEQUAL 2
+#define FR_LESST 3
+#define FR_GREATERT 4
+#define FR_LESSTE 5
+#define FR_GREATERTE 6
+#define FR_OUTRANGE 7
+#define FR_INRANGE 8
+#define FR_INCRANGE 9
+
+/*
+ * Structure containing all the relevant TCP things that can be checked in
+ * a filter rule.
+ */
+typedef struct frtuc {
+ u_char ftu_tcpfm; /* tcp flags mask */
+ u_char ftu_tcpf; /* tcp flags */
+ frpcmp_t ftu_src;
+ frpcmp_t ftu_dst;
+} frtuc_t;
+
+#define ftu_scmp ftu_src.frp_cmp
+#define ftu_dcmp ftu_dst.frp_cmp
+#define ftu_sport ftu_src.frp_port
+#define ftu_dport ftu_dst.frp_port
+#define ftu_stop ftu_src.frp_top
+#define ftu_dtop ftu_dst.frp_top
+
+#define FR_TCPFMAX 0x3f
+
+/*
+ * This structure makes up what is considered to be the IPFilter specific
+ * matching components of a filter rule, as opposed to the data structures
+ * used to define the result which are in frentry_t and not here.
+ */
+typedef struct fripf {
+ fr_ip_t fri_ip;
+ fr_ip_t fri_mip; /* mask structure */
+
+ u_short fri_icmpm; /* data for ICMP packets (mask) */
+ u_short fri_icmp;
+
+ frtuc_t fri_tuc;
+ int fri_satype; /* addres type */
+ int fri_datype; /* addres type */
+ int fri_sifpidx; /* doing dynamic addressing */
+ int fri_difpidx; /* index into fr_ifps[] to use when */
+} fripf_t;
+
+#define fri_dstnum fri_ip.fi_dstnum
+#define fri_srcnum fri_mip.fi_srcnum
+#define fri_dstptr fri_ip.fi_dstptr
+#define fri_srcptr fri_mip.fi_srcptr
+
+#define FRI_NORMAL 0 /* Normal address */
+#define FRI_DYNAMIC 1 /* dynamic address */
+#define FRI_LOOKUP 2 /* address is a pool # */
+#define FRI_RANGE 3 /* address/mask is a range */
+#define FRI_NETWORK 4 /* network address from if */
+#define FRI_BROADCAST 5 /* broadcast address from if */
+#define FRI_PEERADDR 6 /* Peer address for P-to-P */
+#define FRI_NETMASKED 7 /* network address with netmask from if */
+
+
+typedef struct frentry * (* frentfunc_t) __P((fr_info_t *));
+
+typedef struct frentry {
+ ipfmutex_t fr_lock;
+ struct frentry *fr_next;
+ struct frentry **fr_grp;
+ struct ipscan *fr_isc;
+ void *fr_ifas[4];
+ void *fr_ptr; /* for use with fr_arg */
+ char *fr_comment; /* text comment for rule */
+ int fr_ref; /* reference count - for grouping */
+ int fr_statecnt; /* state count - for limit rules */
+ /*
+ * These are only incremented when a packet matches this rule and
+ * it is the last match
+ */
+ U_QUAD_T fr_hits;
+ U_QUAD_T fr_bytes;
+
+ /*
+ * For PPS rate limiting
+ */
+ struct timeval fr_lastpkt;
+ int fr_curpps;
+
+ union {
+ void *fru_data;
+ caddr_t fru_caddr;
+ fripf_t *fru_ipf;
+ frentfunc_t fru_func;
+ } fr_dun;
+
+ /*
+ * Fields after this may not change whilst in the kernel.
+ */
+ ipfunc_t fr_func; /* call this function */
+ int fr_dsize;
+ int fr_pps;
+ int fr_statemax; /* max reference count */
+ int fr_flineno; /* line number from conf file */
+ u_32_t fr_type;
+ u_32_t fr_flags; /* per-rule flags && options (see below) */
+ u_32_t fr_logtag; /* user defined log tag # */
+ u_32_t fr_collect; /* collection number */
+ u_int fr_arg; /* misc. numeric arg for rule */
+ u_int fr_loglevel; /* syslog log facility + priority */
+ u_int fr_age[2]; /* non-TCP timeouts */
+ u_char fr_v;
+ u_char fr_icode; /* return ICMP code */
+ char fr_group[FR_GROUPLEN]; /* group to which this rule belongs */
+ char fr_grhead[FR_GROUPLEN]; /* group # which this rule starts */
+ ipftag_t fr_nattag;
+ char fr_ifnames[4][LIFNAMSIZ];
+ char fr_isctag[16];
+ frdest_t fr_tifs[2]; /* "to"/"reply-to" interface */
+ frdest_t fr_dif; /* duplicate packet interface */
+ /*
+ * This must be last and will change after loaded into the kernel.
+ */
+ u_int fr_cksum; /* checksum on filter rules for performance */
+} frentry_t;
+
+#define fr_caddr fr_dun.fru_caddr
+#define fr_data fr_dun.fru_data
+#define fr_dfunc fr_dun.fru_func
+#define fr_ipf fr_dun.fru_ipf
+#define fr_ip fr_ipf->fri_ip
+#define fr_mip fr_ipf->fri_mip
+#define fr_icmpm fr_ipf->fri_icmpm
+#define fr_icmp fr_ipf->fri_icmp
+#define fr_tuc fr_ipf->fri_tuc
+#define fr_satype fr_ipf->fri_satype
+#define fr_datype fr_ipf->fri_datype
+#define fr_sifpidx fr_ipf->fri_sifpidx
+#define fr_difpidx fr_ipf->fri_difpidx
+#define fr_proto fr_ip.fi_p
+#define fr_mproto fr_mip.fi_p
+#define fr_ttl fr_ip.fi_ttl
+#define fr_mttl fr_mip.fi_ttl
+#define fr_tos fr_ip.fi_tos
+#define fr_mtos fr_mip.fi_tos
+#define fr_tcpfm fr_tuc.ftu_tcpfm
+#define fr_tcpf fr_tuc.ftu_tcpf
+#define fr_scmp fr_tuc.ftu_scmp
+#define fr_dcmp fr_tuc.ftu_dcmp
+#define fr_dport fr_tuc.ftu_dport
+#define fr_sport fr_tuc.ftu_sport
+#define fr_stop fr_tuc.ftu_stop
+#define fr_dtop fr_tuc.ftu_dtop
+#define fr_dst fr_ip.fi_dst.in4
+#define fr_daddr fr_ip.fi_dst.in4.s_addr
+#define fr_src fr_ip.fi_src.in4
+#define fr_saddr fr_ip.fi_src.in4.s_addr
+#define fr_dmsk fr_mip.fi_dst.in4
+#define fr_dmask fr_mip.fi_dst.in4.s_addr
+#define fr_smsk fr_mip.fi_src.in4
+#define fr_smask fr_mip.fi_src.in4.s_addr
+#define fr_dstnum fr_ip.fi_dstnum
+#define fr_srcnum fr_ip.fi_srcnum
+#define fr_dsttype fr_ip.fi_dsttype
+#define fr_srctype fr_ip.fi_srctype
+#define fr_dstptr fr_mip.fi_dstptr
+#define fr_srcptr fr_mip.fi_srcptr
+#define fr_dstfunc fr_mip.fi_dstfunc
+#define fr_srcfunc fr_mip.fi_srcfunc
+#define fr_optbits fr_ip.fi_optmsk
+#define fr_optmask fr_mip.fi_optmsk
+#define fr_secbits fr_ip.fi_secmsk
+#define fr_secmask fr_mip.fi_secmsk
+#define fr_authbits fr_ip.fi_auth
+#define fr_authmask fr_mip.fi_auth
+#define fr_flx fr_ip.fi_flx
+#define fr_mflx fr_mip.fi_flx
+#define fr_ifname fr_ifnames[0]
+#define fr_oifname fr_ifnames[2]
+#define fr_ifa fr_ifas[0]
+#define fr_oifa fr_ifas[2]
+#define fr_tif fr_tifs[0]
+#define fr_rif fr_tifs[1]
+
+#define FR_NOLOGTAG 0
+
+#ifndef offsetof
+#define offsetof(t,m) (int)((&((t *)0L)->m))
+#endif
+#define FR_CMPSIZ (sizeof(struct frentry) - \
+ offsetof(struct frentry, fr_func))
+
+/*
+ * fr_type
+ */
+#define FR_T_NONE 0
+#define FR_T_IPF 1 /* IPF structures */
+#define FR_T_BPFOPC 2 /* BPF opcode */
+#define FR_T_CALLFUNC 3 /* callout to function in fr_func only */
+#define FR_T_COMPIPF 4 /* compiled C code */
+#define FR_T_BUILTIN 0x80000000 /* rule is in kernel space */
+
+/*
+ * fr_flags
+ */
+#define FR_CALL 0x00000 /* call rule */
+#define FR_BLOCK 0x00001 /* do not allow packet to pass */
+#define FR_PASS 0x00002 /* allow packet to pass */
+#define FR_AUTH 0x00003 /* use authentication */
+#define FR_PREAUTH 0x00004 /* require preauthentication */
+#define FR_ACCOUNT 0x00005 /* Accounting rule */
+#define FR_SKIP 0x00006 /* skip rule */
+#define FR_DIVERT 0x00007 /* divert rule */
+#define FR_CMDMASK 0x0000f
+#define FR_LOG 0x00010 /* Log */
+#define FR_LOGB 0x00011 /* Log-fail */
+#define FR_LOGP 0x00012 /* Log-pass */
+#define FR_LOGMASK (FR_LOG|FR_CMDMASK)
+#define FR_CALLNOW 0x00020 /* call another function (fr_func) if matches */
+#define FR_NOTSRCIP 0x00040
+#define FR_NOTDSTIP 0x00080
+#define FR_QUICK 0x00100 /* match & stop processing list */
+#define FR_KEEPFRAG 0x00200 /* keep fragment information */
+#define FR_KEEPSTATE 0x00400 /* keep `connection' state information */
+#define FR_FASTROUTE 0x00800 /* bypass normal routing */
+#define FR_RETRST 0x01000 /* Return TCP RST packet - reset connection */
+#define FR_RETICMP 0x02000 /* Return ICMP unreachable packet */
+#define FR_FAKEICMP 0x03000 /* Return ICMP unreachable with fake source */
+#define FR_OUTQUE 0x04000 /* outgoing packets */
+#define FR_INQUE 0x08000 /* ingoing packets */
+#define FR_LOGBODY 0x10000 /* Log the body */
+#define FR_LOGFIRST 0x20000 /* Log the first byte if state held */
+#define FR_LOGORBLOCK 0x40000 /* block the packet if it can't be logged */
+#define FR_DUP 0x80000 /* duplicate packet */
+#define FR_FRSTRICT 0x100000 /* strict frag. cache */
+#define FR_STSTRICT 0x200000 /* strict keep state */
+#define FR_NEWISN 0x400000 /* new ISN for outgoing TCP */
+#define FR_NOICMPERR 0x800000 /* do not match ICMP errors in state */
+#define FR_STATESYNC 0x1000000 /* synchronize state to slave */
+#define FR_NOMATCH 0x8000000 /* no match occured */
+ /* 0x10000000 FF_LOGPASS */
+ /* 0x20000000 FF_LOGBLOCK */
+ /* 0x40000000 FF_LOGNOMATCH */
+ /* 0x80000000 FF_BLOCKNONIP */
+#define FR_COPIED 0x40000000 /* copied from user space */
+#define FR_INACTIVE 0x80000000 /* only used when flush'ing rules */
+
+#define FR_RETMASK (FR_RETICMP|FR_RETRST|FR_FAKEICMP)
+#define FR_ISBLOCK(x) (((x) & FR_CMDMASK) == FR_BLOCK)
+#define FR_ISPASS(x) (((x) & FR_CMDMASK) == FR_PASS)
+#define FR_ISAUTH(x) (((x) & FR_CMDMASK) == FR_AUTH)
+#define FR_ISPREAUTH(x) (((x) & FR_CMDMASK) == FR_PREAUTH)
+#define FR_ISACCOUNT(x) (((x) & FR_CMDMASK) == FR_ACCOUNT)
+#define FR_ISSKIP(x) (((x) & FR_CMDMASK) == FR_SKIP)
+#define FR_ISNOMATCH(x) ((x) & FR_NOMATCH)
+#define FR_INOUT (FR_INQUE|FR_OUTQUE)
+
+/*
+ * recognized flags for SIOCGETFF and SIOCSETFF, and get put in fr_flags
+ */
+#define FF_LOGPASS 0x10000000
+#define FF_LOGBLOCK 0x20000000
+#define FF_LOGNOMATCH 0x40000000
+#define FF_LOGGING (FF_LOGPASS|FF_LOGBLOCK|FF_LOGNOMATCH)
+#define FF_BLOCKNONIP 0x80000000 /* Solaris2 Only */
+
+
+/*
+ * Structure that passes information on what/how to flush to the kernel.
+ */
+typedef struct ipfflush {
+ int ipflu_how;
+ int ipflu_arg;
+} ipfflush_t;
+
+
+/*
+ *
+ */
+typedef struct ipfgetctl {
+ u_int ipfg_min; /* min value */
+ u_int ipfg_current; /* current value */
+ u_int ipfg_max; /* max value */
+ u_int ipfg_default; /* default value */
+ u_int ipfg_steps; /* value increments */
+ char ipfg_name[40]; /* tag name for this control */
+} ipfgetctl_t;
+
+typedef struct ipfsetctl {
+ int ipfs_which; /* 0 = min 1 = current 2 = max 3 = default */
+ u_int ipfs_value; /* min value */
+ char ipfs_name[40]; /* tag name for this control */
+} ipfsetctl_t;
+
+
+/*
+ * Some of the statistics below are in their own counters, but most are kept
+ * in this single structure so that they can all easily be collected and
+ * copied back as required.
+ *
+ * NOTE: when changing, keep in sync with kstats (below).
+ */
+typedef struct filterstats {
+ u_long fr_pass; /* packets allowed */
+ u_long fr_block; /* packets denied */
+ u_long fr_nom; /* packets which don't match any rule */
+ u_long fr_short; /* packets which are short */
+ u_long fr_ppkl; /* packets allowed and logged */
+ u_long fr_bpkl; /* packets denied and logged */
+ u_long fr_npkl; /* packets unmatched and logged */
+ u_long fr_pkl; /* packets logged */
+ u_long fr_skip; /* packets to be logged but buffer full */
+ u_long fr_ret; /* packets for which a return is sent */
+ u_long fr_acct; /* packets for which counting was performed */
+ u_long fr_bnfr; /* bad attempts to allocate fragment state */
+ u_long fr_nfr; /* new fragment state kept */
+ u_long fr_cfr; /* add new fragment state but complete pkt */
+ u_long fr_bads; /* bad attempts to allocate packet state */
+ u_long fr_ads; /* new packet state kept */
+ u_long fr_chit; /* cached hit */
+ u_long fr_tcpbad; /* TCP checksum check failures */
+ u_long fr_pull[2]; /* good and bad pullup attempts */
+ u_long fr_badsrc; /* source received doesn't match route */
+ u_long fr_badttl; /* TTL in packet doesn't reach minimum */
+ u_long fr_bad; /* bad IP packets to the filter */
+ u_long fr_ipv6; /* IPv6 packets in/out */
+ u_long fr_ppshit; /* dropped because of pps ceiling */
+ u_long fr_ipud; /* IP id update failures */
+} filterstats_t;
+
+/*
+ * kstat "copy" of the above - keep in sync!
+ * also keep in sync with initialisation code in solaris.c, ipf_kstat_init().
+ */
+typedef struct filter_kstats {
+ kstat_named_t fks_pass; /* see above for comments */
+ kstat_named_t fks_block;
+ kstat_named_t fks_nom;
+ kstat_named_t fks_short;
+ kstat_named_t fks_ppkl;
+ kstat_named_t fks_bpkl;
+ kstat_named_t fks_npkl;
+ kstat_named_t fks_pkl;
+ kstat_named_t fks_skip;
+ kstat_named_t fks_ret;
+ kstat_named_t fks_acct;
+ kstat_named_t fks_bnfr;
+ kstat_named_t fks_nfr;
+ kstat_named_t fks_cfr;
+ kstat_named_t fks_bads;
+ kstat_named_t fks_ads;
+ kstat_named_t fks_chit;
+ kstat_named_t fks_tcpbad;
+ kstat_named_t fks_pull[2];
+ kstat_named_t fks_badsrc;
+ kstat_named_t fks_badttl;
+ kstat_named_t fks_bad;
+ kstat_named_t fks_ipv6;
+ kstat_named_t fks_ppshit;
+ kstat_named_t fks_ipud;
+} filter_kstats_t;
+
+/*
+ * Log structure. Each packet header logged is prepended by one of these.
+ * Following this in the log records read from the device will be an ipflog
+ * structure which is then followed by any packet data.
+ */
+typedef struct iplog {
+ u_32_t ipl_magic;
+ u_int ipl_count;
+ struct timeval ipl_time;
+ size_t ipl_dsize;
+ struct iplog *ipl_next;
+} iplog_t;
+
+#define ipl_sec ipl_time.tv_sec
+#define ipl_usec ipl_time.tv_usec
+
+#define IPL_MAGIC 0x49504c4d /* 'IPLM' */
+#define IPL_MAGIC_NAT 0x49504c4e /* 'IPLN' */
+#define IPL_MAGIC_STATE 0x49504c53 /* 'IPLS' */
+#define IPLOG_SIZE sizeof(iplog_t)
+
+typedef struct ipflog {
+#if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199603)) || \
+ (defined(OpenBSD) && (OpenBSD >= 199603))
+#else
+ u_int fl_unit;
+#endif
+ u_32_t fl_rule;
+ u_32_t fl_flags;
+ u_32_t fl_lflags;
+ u_32_t fl_logtag;
+ ipftag_t fl_nattag;
+ u_short fl_plen; /* extra data after hlen */
+ u_short fl_loglevel; /* syslog log level */
+ char fl_group[FR_GROUPLEN];
+ u_char fl_hlen; /* length of IP headers saved */
+ u_char fl_dir;
+ u_char fl_xxx[2]; /* pad */
+ char fl_ifname[LIFNAMSIZ];
+} ipflog_t;
+
+#ifndef IPF_LOGGING
+# define IPF_LOGGING 0
+#endif
+#ifndef IPF_DEFAULT_PASS
+# define IPF_DEFAULT_PASS FR_PASS
+#endif
+
+#define DEFAULT_IPFLOGSIZE 8192
+#ifndef IPFILTER_LOGSIZE
+# define IPFILTER_LOGSIZE DEFAULT_IPFLOGSIZE
+#else
+# if IPFILTER_LOGSIZE < DEFAULT_IPFLOGSIZE
+# error IPFILTER_LOGSIZE too small. Must be >= DEFAULT_IPFLOGSIZE
+# endif
+#endif
+
+#define IPF_OPTCOPY 0x07ff00 /* bit mask of copied options */
+
+/*
+ * Device filenames for reading log information. Use ipf on Solaris2 because
+ * ipl is already a name used by something else.
+ */
+#ifndef IPL_NAME
+# if SOLARIS
+# define IPL_NAME "/dev/ipf"
+# else
+# define IPL_NAME "/dev/ipl"
+# endif
+#endif
+/*
+ * Pathnames for various IP Filter control devices. Used by LKM
+ * and userland, so defined here.
+ */
+#define IPNAT_NAME "/dev/ipnat"
+#define IPSTATE_NAME "/dev/ipstate"
+#define IPAUTH_NAME "/dev/ipauth"
+#define IPSYNC_NAME "/dev/ipsync"
+#define IPSCAN_NAME "/dev/ipscan"
+#define IPLOOKUP_NAME "/dev/iplookup"
+
+#define IPL_LOGIPF 0 /* Minor device #'s for accessing logs */
+#define IPL_LOGNAT 1
+#define IPL_LOGSTATE 2
+#define IPL_LOGAUTH 3
+#define IPL_LOGSYNC 4
+#define IPL_LOGSCAN 5
+#define IPL_LOGLOOKUP 6
+#define IPL_LOGCOUNT 7
+#define IPL_LOGMAX 7
+#define IPL_LOGSIZE IPL_LOGMAX + 1
+#define IPL_LOGALL -1
+#define IPL_LOGNONE -2
+
+/*
+ * For SIOCGETFS
+ */
+typedef struct friostat {
+ struct filterstats f_st[2];
+ struct frentry *f_ipf[2][2];
+ struct frentry *f_acct[2][2];
+ struct frentry *f_ipf6[2][2];
+ struct frentry *f_acct6[2][2];
+ struct frentry *f_auth;
+ struct frgroup *f_groups[IPL_LOGSIZE][2];
+ u_long f_froute[2];
+ u_long f_ticks;
+ int f_locks[IPL_LOGMAX];
+ size_t f_kmutex_sz;
+ size_t f_krwlock_sz;
+ int f_defpass; /* default pass - from fr_pass */
+ int f_active; /* 1 or 0 - active rule set */
+ int f_running; /* 1 if running, else 0 */
+ int f_logging; /* 1 if enabled, else 0 */
+ int f_features;
+ char f_version[32]; /* version string */
+} friostat_t;
+
+#define f_fin f_ipf[0]
+#define f_fin6 f_ipf6[0]
+#define f_fout f_ipf[1]
+#define f_fout6 f_ipf6[1]
+#define f_acctin f_acct[0]
+#define f_acctin6 f_acct6[0]
+#define f_acctout f_acct[1]
+#define f_acctout6 f_acct6[1]
+
+#define IPF_FEAT_LKM 0x001
+#define IPF_FEAT_LOG 0x002
+#define IPF_FEAT_LOOKUP 0x004
+#define IPF_FEAT_BPF 0x008
+#define IPF_FEAT_COMPILED 0x010
+#define IPF_FEAT_CKSUM 0x020
+#define IPF_FEAT_SYNC 0x040
+#define IPF_FEAT_SCAN 0x080
+#define IPF_FEAT_IPV6 0x100
+
+typedef struct optlist {
+ u_short ol_val;
+ int ol_bit;
+} optlist_t;
+
+
+/*
+ * Group list structure.
+ */
+typedef struct frgroup {
+ struct frgroup *fg_next;
+ struct frentry *fg_head;
+ struct frentry *fg_start;
+ u_32_t fg_flags;
+ int fg_ref;
+ char fg_name[FR_GROUPLEN];
+} frgroup_t;
+
+#define FG_NAME(g) (*(g)->fg_name == '\0' ? "" : (g)->fg_name)
+
+
+/*
+ * Used by state and NAT tables
+ */
+typedef struct icmpinfo {
+ u_short ici_id;
+ u_short ici_seq;
+ u_char ici_type;
+} icmpinfo_t;
+
+typedef struct udpinfo {
+ u_short us_sport;
+ u_short us_dport;
+} udpinfo_t;
+
+
+typedef struct tcpdata {
+ u_32_t td_end;
+ u_32_t td_maxend;
+ u_32_t td_maxwin;
+ u_32_t td_winscale;
+ u_32_t td_maxseg;
+ int td_winflags;
+} tcpdata_t;
+
+#define TCP_WSCALE_MAX 14
+
+#define TCP_WSCALE_SEEN 0x00000001
+#define TCP_WSCALE_FIRST 0x00000002
+
+
+typedef struct tcpinfo {
+ u_short ts_sport;
+ u_short ts_dport;
+ tcpdata_t ts_data[2];
+} tcpinfo_t;
+
+
+/*
+ * Structures to define a GRE header as seen in a packet.
+ */
+struct grebits {
+ u_32_t grb_C:1;
+ u_32_t grb_R:1;
+ u_32_t grb_K:1;
+ u_32_t grb_S:1;
+ u_32_t grb_s:1;
+ u_32_t grb_recur:1;
+ u_32_t grb_A:1;
+ u_32_t grb_flags:3;
+ u_32_t grb_ver:3;
+ u_short grb_ptype;
+};
+
+typedef struct grehdr {
+ union {
+ struct grebits gru_bits;
+ u_short gru_flags;
+ } gr_un;
+ u_short gr_len;
+ u_short gr_call;
+} grehdr_t;
+
+#define gr_flags gr_un.gru_flags
+#define gr_bits gr_un.gru_bits
+#define gr_ptype gr_bits.grb_ptype
+#define gr_C gr_bits.grb_C
+#define gr_R gr_bits.grb_R
+#define gr_K gr_bits.grb_K
+#define gr_S gr_bits.grb_S
+#define gr_s gr_bits.grb_s
+#define gr_recur gr_bits.grb_recur
+#define gr_A gr_bits.grb_A
+#define gr_ver gr_bits.grb_ver
+
+/*
+ * GRE information tracked by "keep state"
+ */
+typedef struct greinfo {
+ u_short gs_call[2];
+ u_short gs_flags;
+ u_short gs_ptype;
+} greinfo_t;
+
+#define GRE_REV(x) ((ntohs(x) >> 13) & 7)
+
+
+/*
+ * Format of an Authentication header
+ */
+typedef struct authhdr {
+ u_char ah_next;
+ u_char ah_plen;
+ u_short ah_reserved;
+ u_32_t ah_spi;
+ u_32_t ah_seq;
+ /* Following the sequence number field is 0 or more bytes of */
+ /* authentication data, as specified by ah_plen - RFC 2402. */
+} authhdr_t;
+
+
+/*
+ * Timeout tail queue list member
+ */
+typedef struct ipftqent {
+ struct ipftqent **tqe_pnext;
+ struct ipftqent *tqe_next;
+ struct ipftq *tqe_ifq;
+ void *tqe_parent; /* pointer back to NAT/state struct */
+ u_long tqe_die; /* when this entriy is to die */
+ u_long tqe_touched;
+ int tqe_flags;
+ int tqe_state[2]; /* current state of this entry */
+} ipftqent_t;
+
+#define TQE_RULEBASED 0x00000001
+
+
+/*
+ * Timeout tail queue head for IPFilter
+ */
+typedef struct ipftq {
+ ipfmutex_t ifq_lock;
+ u_int ifq_ttl;
+ ipftqent_t *ifq_head;
+ ipftqent_t **ifq_tail;
+ struct ipftq *ifq_next;
+ struct ipftq **ifq_pnext;
+ int ifq_ref;
+ u_int ifq_flags;
+} ipftq_t;
+
+#define IFQF_USER 0x01 /* User defined aging */
+#define IFQF_DELETE 0x02 /* Marked for deletion */
+#define IFQF_PROXY 0x04 /* Timeout queue in use by a proxy */
+
+#define IPF_HZ_MULT 1
+#define IPF_HZ_DIVIDE 2 /* How many times a second ipfilter */
+ /* checks its timeout queues. */
+#define IPF_TTLVAL(x) (((x) / IPF_HZ_MULT) * IPF_HZ_DIVIDE)
+
+/*
+ * Structure to define address for pool lookups.
+ */
+typedef struct {
+ u_char adf_len;
+ sa_family_t adf_family;
+ i6addr_t adf_addr;
+} addrfamily_t;
+
+
+/*
+ * Object structure description. For passing through in ioctls.
+ */
+typedef struct ipfobj {
+ u_32_t ipfo_rev; /* IPFilter version number */
+ u_32_t ipfo_size; /* size of object at ipfo_ptr */
+ void *ipfo_ptr; /* pointer to object */
+ int ipfo_type; /* type of object being pointed to */
+ int ipfo_offset; /* bytes from ipfo_ptr where to start */
+ u_char ipfo_xxxpad[32]; /* reserved for future use */
+} ipfobj_t;
+
+#define IPFOBJ_FRENTRY 0 /* struct frentry */
+#define IPFOBJ_IPFSTAT 1 /* struct friostat */
+#define IPFOBJ_IPFINFO 2 /* struct fr_info */
+#define IPFOBJ_AUTHSTAT 3 /* struct fr_authstat */
+#define IPFOBJ_FRAGSTAT 4 /* struct ipfrstat */
+#define IPFOBJ_IPNAT 5 /* struct ipnat */
+#define IPFOBJ_NATSTAT 6 /* struct natstat */
+#define IPFOBJ_STATESAVE 7 /* struct ipstate_save */
+#define IPFOBJ_NATSAVE 8 /* struct nat_save */
+#define IPFOBJ_NATLOOKUP 9 /* struct natlookup */
+#define IPFOBJ_IPSTATE 10 /* struct ipstate */
+#define IPFOBJ_STATESTAT 11 /* struct ips_stat */
+#define IPFOBJ_FRAUTH 12 /* struct frauth */
+#define IPFOBJ_TUNEABLE 13 /* struct ipftune */
+
+
+typedef union ipftunevalptr {
+ void *ipftp_void;
+ u_long *ipftp_long;
+ u_int *ipftp_int;
+ u_short *ipftp_short;
+ u_char *ipftp_char;
+} ipftunevalptr_t;
+
+typedef struct ipftuneable {
+ ipftunevalptr_t ipft_una;
+ char *ipft_name;
+ u_long ipft_min;
+ u_long ipft_max;
+ int ipft_sz;
+ int ipft_flags;
+ struct ipftuneable *ipft_next;
+} ipftuneable_t;
+
+#define ipft_addr ipft_una.ipftp_void
+#define ipft_plong ipft_una.ipftp_long
+#define ipft_pint ipft_una.ipftp_int
+#define ipft_pshort ipft_una.ipftp_short
+#define ipft_pchar ipft_una.ipftp_char
+
+#define IPFT_RDONLY 1 /* read-only */
+#define IPFT_WRDISABLED 2 /* write when disabled only */
+
+typedef union ipftuneval {
+ u_long ipftu_long;
+ u_int ipftu_int;
+ u_short ipftu_short;
+ u_char ipftu_char;
+} ipftuneval_t;
+
+typedef struct ipftune {
+ void *ipft_cookie;
+ ipftuneval_t ipft_un;
+ u_long ipft_min;
+ u_long ipft_max;
+ int ipft_sz;
+ int ipft_flags;
+ char ipft_name[80];
+} ipftune_t;
+
+#define ipft_vlong ipft_un.ipftu_long
+#define ipft_vint ipft_un.ipftu_int
+#define ipft_vshort ipft_un.ipftu_short
+#define ipft_vchar ipft_un.ipftu_char
+
+
+/*
+** HPUX Port
+*/
+#ifdef __hpux
+/* HP-UX locking sequence deadlock detection module lock MAJOR ID */
+# define IPF_SMAJ 0 /* temp assignment XXX, not critical */
+#endif
+
+#if !defined(CDEV_MAJOR) && defined (__FreeBSD_version) && \
+ (__FreeBSD_version >= 220000)
+# define CDEV_MAJOR 79
+#endif
+
+/*
+ * Post NetBSD 1.2 has the PFIL interface for packet filters. This turns
+ * on those hooks. We don't need any special mods in non-IP Filter code
+ * with this!
+ */
+#if (defined(NetBSD) && (NetBSD > 199609) && (NetBSD <= 1991011)) || \
+ (defined(NetBSD1_2) && NetBSD1_2 > 1) || \
+ (defined(__FreeBSD__) && (__FreeBSD_version >= 500043))
+# if (NetBSD >= 199905)
+# define PFIL_HOOKS
+# endif
+# ifdef PFIL_HOOKS
+# define NETBSD_PF
+# endif
+#endif
+
+#ifndef _KERNEL
+extern int fr_check __P((struct ip *, int, void *, int, mb_t **));
+extern int (*fr_checkp) __P((ip_t *, int, void *, int, mb_t **));
+extern int ipf_log __P((void));
+extern struct ifnet *get_unit __P((char *, int));
+extern char *get_ifname __P((struct ifnet *));
+# if defined(__NetBSD__) || defined(__OpenBSD__) || \
+ (_BSDI_VERSION >= 199701) || (__FreeBSD_version >= 300000)
+extern int iplioctl __P((int, ioctlcmd_t, caddr_t, int));
+# else
+extern int iplioctl __P((int, ioctlcmd_t, caddr_t, int));
+# endif
+extern int iplopen __P((dev_t, int));
+extern int iplclose __P((dev_t, int));
+extern void m_freem __P((mb_t *));
+#else /* #ifndef _KERNEL */
+# if defined(__NetBSD__) && defined(PFIL_HOOKS)
+extern void ipfilterattach __P((int));
+# endif
+extern int ipl_enable __P((void));
+extern int ipl_disable __P((void));
+# ifdef MENTAT
+extern int fr_check __P((struct ip *, int, void *, int, void *,
+ mblk_t **));
+# if SOLARIS
+# if SOLARIS2 >= 7
+extern int iplioctl __P((dev_t, int, intptr_t, int, cred_t *, int *));
+# else
+extern int iplioctl __P((dev_t, int, int *, int, cred_t *, int *));
+# endif
+extern int iplopen __P((dev_t *, int, int, cred_t *));
+extern int iplclose __P((dev_t, int, int, cred_t *));
+extern int iplread __P((dev_t, uio_t *, cred_t *));
+extern int iplwrite __P((dev_t, uio_t *, cred_t *));
+# endif
+# ifdef __hpux
+extern int iplopen __P((dev_t, int, intptr_t, int));
+extern int iplclose __P((dev_t, int, int));
+extern int iplioctl __P((dev_t, int, caddr_t, int));
+extern int iplread __P((dev_t, uio_t *));
+extern int iplwrite __P((dev_t, uio_t *));
+extern int iplselect __P((dev_t, int));
+# endif
+extern int ipfsync __P((void));
+extern int fr_qout __P((queue_t *, mblk_t *));
+# else /* MENTAT */
+extern int fr_check __P((struct ip *, int, void *, int, mb_t **));
+extern int (*fr_checkp) __P((ip_t *, int, void *, int, mb_t **));
+extern size_t mbufchainlen __P((mb_t *));
+# ifdef __sgi
+# include <sys/cred.h>
+extern int iplioctl __P((dev_t, int, caddr_t, int, cred_t *, int *));
+extern int iplopen __P((dev_t *, int, int, cred_t *));
+extern int iplclose __P((dev_t, int, int, cred_t *));
+extern int iplread __P((dev_t, uio_t *, cred_t *));
+extern int iplwrite __P((dev_t, uio_t *, cred_t *));
+extern int ipfsync __P((void));
+extern int ipfilter_sgi_attach __P((void));
+extern void ipfilter_sgi_detach __P((void));
+extern void ipfilter_sgi_intfsync __P((void));
+# else
+# ifdef IPFILTER_LKM
+extern int iplidentify __P((char *));
+# endif
+# if (_BSDI_VERSION >= 199510) || (__FreeBSD_version >= 220000) || \
+ (NetBSD >= 199511) || defined(__OpenBSD__)
+# if defined(__NetBSD__) || (_BSDI_VERSION >= 199701) || \
+ defined(__OpenBSD__) || (__FreeBSD_version >= 300000)
+# if (__FreeBSD_version >= 500024)
+# if (__FreeBSD_version >= 502116)
+extern int iplioctl __P((struct cdev*, u_long, caddr_t, int, struct thread *));
+# else
+extern int iplioctl __P((dev_t, u_long, caddr_t, int, struct thread *));
+# endif /* __FreeBSD_version >= 502116 */
+# else
+extern int iplioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
+# endif /* __FreeBSD_version >= 500024 */
+# else
+extern int iplioctl __P((dev_t, int, caddr_t, int, struct proc *));
+# endif
+# if (__FreeBSD_version >= 500024)
+# if (__FreeBSD_version >= 502116)
+extern int iplopen __P((struct cdev*, int, int, struct thread *));
+extern int iplclose __P((struct cdev*, int, int, struct thread *));
+# else
+extern int iplopen __P((dev_t, int, int, struct thread *));
+extern int iplclose __P((dev_t, int, int, struct thread *));
+# endif /* __FreeBSD_version >= 502116 */
+# else
+extern int iplopen __P((dev_t, int, int, struct proc *));
+extern int iplclose __P((dev_t, int, int, struct proc *));
+# endif /* __FreeBSD_version >= 500024 */
+# else
+# ifdef linux
+extern int iplioctl __P((struct inode *, struct file *, u_int, u_long));
+# else
+extern int iplopen __P((dev_t, int));
+extern int iplclose __P((dev_t, int));
+extern int iplioctl __P((dev_t, int, caddr_t, int));
+# endif
+# endif /* (_BSDI_VERSION >= 199510) */
+# if BSD >= 199306
+# if (__FreeBSD_version >= 502116)
+extern int iplread __P((struct cdev*, struct uio *, int));
+extern int iplwrite __P((struct cdev*, struct uio *, int));
+# else
+extern int iplread __P((dev_t, struct uio *, int));
+extern int iplwrite __P((dev_t, struct uio *, int));
+# endif /* __FreeBSD_version >= 502116 */
+# else
+# ifndef linux
+extern int iplread __P((dev_t, struct uio *));
+extern int iplwrite __P((dev_t, struct uio *));
+# endif
+# endif /* BSD >= 199306 */
+# endif /* __ sgi */
+# endif /* MENTAT */
+
+#endif /* #ifndef _KERNEL */
+
+extern ipfmutex_t ipl_mutex, ipf_authmx, ipf_rw, ipf_hostmap;
+extern ipfmutex_t ipf_timeoutlock, ipf_stinsert, ipf_natio, ipf_nat_new;
+extern ipfrwlock_t ipf_mutex, ipf_global, ip_poolrw, ipf_ipidfrag;
+extern ipfrwlock_t ipf_frag, ipf_state, ipf_nat, ipf_natfrag, ipf_auth;
+extern ipfrwlock_t ipf_frcache;
+
+extern char *memstr __P((char *, char *, int, int));
+extern int count4bits __P((u_32_t));
+extern int count6bits __P((u_32_t *));
+extern int frrequest __P((int, ioctlcmd_t, caddr_t, int, int));
+extern char *getifname __P((struct ifnet *));
+extern int iplattach __P((void));
+extern int ipldetach __P((void));
+extern u_short ipf_cksum __P((u_short *, int));
+extern int copyinptr __P((void *, void *, size_t));
+extern int copyoutptr __P((void *, void *, size_t));
+extern int fr_fastroute __P((mb_t *, mb_t **, fr_info_t *, frdest_t *));
+extern int fr_inobj __P((void *, void *, int));
+extern int fr_inobjsz __P((void *, void *, int, int));
+extern int fr_ioctlswitch __P((int, void *, ioctlcmd_t, int));
+extern int fr_ipftune __P((ioctlcmd_t, void *));
+extern int fr_outobj __P((void *, void *, int));
+extern int fr_outobjsz __P((void *, void *, int, int));
+extern void *fr_pullup __P((mb_t *, fr_info_t *, int));
+extern void fr_resolvedest __P((struct frdest *, int));
+extern int fr_resolvefunc __P((void *));
+extern void *fr_resolvenic __P((char *, int));
+extern int fr_send_icmp_err __P((int, fr_info_t *, int));
+extern int fr_send_reset __P((fr_info_t *));
+#if (__FreeBSD_version < 490000) || !defined(_KERNEL)
+extern int ppsratecheck __P((struct timeval *, int *, int));
+#endif
+extern ipftq_t *fr_addtimeoutqueue __P((ipftq_t **, u_int));
+extern void fr_deletequeueentry __P((ipftqent_t *));
+extern int fr_deletetimeoutqueue __P((ipftq_t *));
+extern void fr_freetimeoutqueue __P((ipftq_t *));
+extern void fr_movequeue __P((ipftqent_t *, ipftq_t *, ipftq_t *));
+extern void fr_queueappend __P((ipftqent_t *, ipftq_t *, void *));
+extern void fr_queueback __P((ipftqent_t *));
+extern void fr_queuefront __P((ipftqent_t *));
+extern void fr_checkv4sum __P((fr_info_t *));
+extern int fr_checkl4sum __P((fr_info_t *));
+extern int fr_ifpfillv4addr __P((int, struct sockaddr_in *,
+ struct sockaddr_in *, struct in_addr *,
+ struct in_addr *));
+extern int fr_coalesce __P((fr_info_t *));
+#ifdef USE_INET6
+extern void fr_checkv6sum __P((fr_info_t *));
+extern int fr_ifpfillv6addr __P((int, struct sockaddr_in6 *,
+ struct sockaddr_in6 *, struct in_addr *,
+ struct in_addr *));
+#endif
+
+extern int fr_addipftune __P((ipftuneable_t *));
+extern int fr_delipftune __P((ipftuneable_t *));
+
+extern int frflush __P((minor_t, int, int));
+extern void frsync __P((void *));
+extern frgroup_t *fr_addgroup __P((char *, void *, u_32_t, minor_t, int));
+extern int fr_derefrule __P((frentry_t **));
+extern void fr_delgroup __P((char *, minor_t, int));
+extern frgroup_t *fr_findgroup __P((char *, minor_t, int, frgroup_t ***));
+
+extern int fr_loginit __P((void));
+extern int ipflog_clear __P((minor_t));
+extern int ipflog_read __P((minor_t, uio_t *));
+extern int ipflog __P((fr_info_t *, u_int));
+extern int ipllog __P((int, fr_info_t *, void **, size_t *, int *, int));
+extern void fr_logunload __P((void));
+
+extern frentry_t *fr_acctpkt __P((fr_info_t *, u_32_t *));
+extern int fr_copytolog __P((int, char *, int));
+extern u_short fr_cksum __P((mb_t *, ip_t *, int, void *));
+extern void fr_deinitialise __P((void));
+extern frentry_t *fr_dolog __P((fr_info_t *, u_32_t *));
+extern frentry_t *fr_dstgrpmap __P((fr_info_t *, u_32_t *));
+extern void fr_fixskip __P((frentry_t **, frentry_t *, int));
+extern void fr_forgetifp __P((void *));
+extern frentry_t *fr_getrulen __P((int, char *, u_32_t));
+extern void fr_getstat __P((struct friostat *));
+extern int fr_ifpaddr __P((int, int, void *,
+ struct in_addr *, struct in_addr *));
+extern int fr_initialise __P((void));
+extern void fr_lock __P((caddr_t, int *));
+extern int fr_makefrip __P((int, ip_t *, fr_info_t *));
+extern int fr_matchtag __P((ipftag_t *, ipftag_t *));
+extern int fr_matchicmpqueryreply __P((int, icmpinfo_t *,
+ struct icmp *, int));
+extern u_32_t fr_newisn __P((fr_info_t *));
+extern u_short fr_nextipid __P((fr_info_t *));
+extern int fr_rulen __P((int, frentry_t *));
+extern int fr_scanlist __P((fr_info_t *, u_32_t));
+extern frentry_t *fr_srcgrpmap __P((fr_info_t *, u_32_t *));
+extern int fr_tcpudpchk __P((fr_info_t *, frtuc_t *));
+extern int fr_verifysrc __P((fr_info_t *fin));
+extern int fr_zerostats __P((char *));
+
+extern int fr_running;
+extern u_long fr_frouteok[2];
+extern int fr_pass;
+extern int fr_flags;
+extern int fr_active;
+extern int fr_chksrc;
+extern int fr_minttl;
+extern int fr_refcnt;
+extern int fr_control_forwarding;
+extern int fr_update_ipid;
+extern int nat_logging;
+extern int ipstate_logging;
+extern int ipl_suppress;
+extern int ipl_buffer_sz;
+extern int ipl_logmax;
+extern int ipl_logall;
+extern int ipl_logsize;
+extern u_long fr_ticks;
+extern fr_info_t frcache[2][8];
+extern char ipfilter_version[];
+extern iplog_t **iplh[IPL_LOGMAX+1], *iplt[IPL_LOGMAX+1];
+extern int iplused[IPL_LOGMAX + 1];
+extern struct frentry *ipfilter[2][2], *ipacct[2][2];
+#ifdef USE_INET6
+extern struct frentry *ipfilter6[2][2], *ipacct6[2][2];
+extern int icmptoicmp6types[ICMP_MAXTYPE+1];
+extern int icmptoicmp6unreach[ICMP_MAX_UNREACH];
+extern int icmpreplytype6[ICMP6_MAXTYPE + 1];
+#endif
+extern int icmpreplytype4[ICMP_MAXTYPE + 1];
+extern struct frgroup *ipfgroups[IPL_LOGSIZE][2];
+extern struct filterstats frstats[];
+extern frentry_t *ipfrule_match __P((fr_info_t *));
+extern u_char ipf_iss_secret[32];
+extern ipftuneable_t ipf_tuneables[];
+
+#endif /* __IP_FIL_H__ */
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_frag.h b/usr/src/uts/common/inet/ipf/netinet/ip_frag.h
new file mode 100644
index 0000000000..7eb7399010
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_frag.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 1993-2001 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * @(#)ip_frag.h 1.5 3/24/96
+ * $Id: ip_frag.h,v 2.23.2.2 2005/06/10 18:02:37 darrenr Exp $
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifndef __IP_FRAG_H__
+#define __IP_FRAG_H__
+
+#define IPFT_SIZE 257
+
+typedef struct ipfr {
+ struct ipfr *ipfr_hnext, **ipfr_hprev;
+ struct ipfr *ipfr_next, **ipfr_prev;
+ void *ipfr_data;
+ void *ipfr_ifp;
+ i6addr_t ipfr_source;
+ i6addr_t ipfr_dest;
+ u_32_t ipfr_optmsk;
+ u_short ipfr_secmsk;
+ u_short ipfr_auth;
+ u_32_t ipfr_id;
+ u_char ipfr_p;
+ u_char ipfr_tos;
+ u_32_t ipfr_pass;
+ u_short ipfr_off;
+ u_char ipfr_ttl;
+ u_char ipfr_seen0;
+ u_short ipfr_firstend;
+ frentry_t *ipfr_rule;
+} ipfr_t;
+
+#define ipfr_src ipfr_source.in4
+#define ipfr_dst ipfr_dest.in4
+
+typedef struct ipfrstat {
+ u_long ifs_exists; /* add & already exists */
+ u_long ifs_nomem;
+ u_long ifs_new;
+ u_long ifs_hits;
+ u_long ifs_expire;
+ u_long ifs_inuse;
+ u_long ifs_retrans0;
+ u_long ifs_short;
+ struct ipfr **ifs_table;
+ struct ipfr **ifs_nattab;
+} ipfrstat_t;
+
+#define IPFR_CMPSZ (offsetof(ipfr_t, ipfr_tos) - \
+ offsetof(ipfr_t, ipfr_ifp))
+
+extern int ipfr_size;
+extern int fr_ipfrttl;
+extern int fr_frag_lock;
+extern int fr_fraginit __P((void));
+extern void fr_fragunload __P((void));
+extern ipfrstat_t *fr_fragstats __P((void));
+
+extern int fr_newfrag __P((fr_info_t *, u_32_t));
+extern frentry_t *fr_knownfrag __P((fr_info_t *, u_32_t *));
+
+extern int fr_nat_newfrag __P((fr_info_t *, u_32_t, struct nat *));
+extern nat_t *fr_nat_knownfrag __P((fr_info_t *));
+
+extern int fr_ipid_newfrag __P((fr_info_t *, u_32_t));
+extern u_32_t fr_ipid_knownfrag __P((fr_info_t *));
+
+extern void fr_forget __P((void *));
+extern void fr_forgetnat __P((void *));
+extern void fr_fragclear __P((void));
+extern void fr_fragexpire __P((void));
+
+#if defined(_KERNEL) && ((BSD >= 199306) || SOLARIS || defined(__sgi) \
+ || defined(__osf__) || (defined(__sgi) && (IRIX >= 60500)))
+# if defined(SOLARIS2) && (SOLARIS2 < 7)
+extern void fr_slowtimer __P((void));
+# else
+extern void fr_slowtimer __P((void *));
+# endif
+#else
+# if defined(linux) && defined(_KERNEL)
+extern void fr_slowtimer __P((long));
+# else
+extern int fr_slowtimer __P((void));
+# endif
+#endif
+
+#endif /* __IP_FRAG_H__ */
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_ftp_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_ftp_pxy.c
new file mode 100644
index 0000000000..7d1ed33c96
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_ftp_pxy.c
@@ -0,0 +1,1458 @@
+/*
+ * Copyright (C) 1997-2003 by Darren Reed
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * $Id: ip_ftp_pxy.c,v 2.88.2.15 2005/03/19 19:38:10 darrenr Exp $
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Simple FTP transparent proxy for in-kernel use. For use with the NAT
+ * code.
+*/
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+
+#define IPF_FTP_PROXY
+
+#define IPF_MINPORTLEN 18
+#define IPF_MAXPORTLEN 30
+#define IPF_MIN227LEN 39
+#define IPF_MAX227LEN 51
+#define IPF_MIN229LEN 47
+#define IPF_MAX229LEN 51
+
+#define FTPXY_GO 0
+#define FTPXY_INIT 1
+#define FTPXY_USER_1 2
+#define FTPXY_USOK_1 3
+#define FTPXY_PASS_1 4
+#define FTPXY_PAOK_1 5
+#define FTPXY_AUTH_1 6
+#define FTPXY_AUOK_1 7
+#define FTPXY_ADAT_1 8
+#define FTPXY_ADOK_1 9
+#define FTPXY_ACCT_1 10
+#define FTPXY_ACOK_1 11
+#define FTPXY_USER_2 12
+#define FTPXY_USOK_2 13
+#define FTPXY_PASS_2 14
+#define FTPXY_PAOK_2 15
+
+/*
+ * Values for FTP commands. Numerics cover 0-999
+ */
+#define FTPXY_C_PASV 1000
+
+int ippr_ftp_client __P((fr_info_t *, ip_t *, nat_t *, ftpinfo_t *, int));
+int ippr_ftp_complete __P((char *, size_t));
+int ippr_ftp_in __P((fr_info_t *, ap_session_t *, nat_t *));
+int ippr_ftp_init __P((void));
+void ippr_ftp_fini __P((void));
+int ippr_ftp_new __P((fr_info_t *, ap_session_t *, nat_t *));
+int ippr_ftp_out __P((fr_info_t *, ap_session_t *, nat_t *));
+int ippr_ftp_pasv __P((fr_info_t *, ip_t *, nat_t *, ftpinfo_t *, int));
+int ippr_ftp_epsv __P((fr_info_t *, ip_t *, nat_t *, ftpside_t *, int));
+int ippr_ftp_port __P((fr_info_t *, ip_t *, nat_t *, ftpside_t *, int));
+int ippr_ftp_process __P((fr_info_t *, nat_t *, ftpinfo_t *, int));
+int ippr_ftp_server __P((fr_info_t *, ip_t *, nat_t *, ftpinfo_t *, int));
+int ippr_ftp_valid __P((ftpinfo_t *, int, char *, size_t));
+int ippr_ftp_server_valid __P((ftpside_t *, char *, size_t));
+int ippr_ftp_client_valid __P((ftpside_t *, char *, size_t));
+u_short ippr_ftp_atoi __P((char **));
+int ippr_ftp_pasvreply __P((fr_info_t *, ip_t *, nat_t *, ftpside_t *,
+ u_int, char *, char *, u_int));
+
+
+int ftp_proxy_init = 0;
+int ippr_ftp_pasvonly = 0;
+int ippr_ftp_insecure = 0; /* Do not require logins before transfers */
+int ippr_ftp_pasvrdr = 0;
+int ippr_ftp_forcepasv = 0; /* PASV must be last command prior to 227 */
+#if defined(_KERNEL)
+int ippr_ftp_debug = 0;
+#else
+int ippr_ftp_debug = 2;
+#endif
+/*
+ * 1 - security
+ * 2 - errors
+ * 3 - error debugging
+ * 4 - parsing errors
+ * 5 - parsing info
+ * 6 - parsing debug
+ */
+
+static frentry_t ftppxyfr;
+static ipftuneable_t ftptune = {
+ { &ippr_ftp_debug },
+ "ippr_ftp_debug",
+ 0,
+ 10,
+ sizeof(ippr_ftp_debug),
+ 0,
+ NULL
+};
+
+
+/*
+ * Initialize local structures.
+ */
+int ippr_ftp_init()
+{
+ bzero((char *)&ftppxyfr, sizeof(ftppxyfr));
+ ftppxyfr.fr_ref = 1;
+ ftppxyfr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE;
+ MUTEX_INIT(&ftppxyfr.fr_lock, "FTP Proxy Mutex");
+ ftp_proxy_init = 1;
+ (void) fr_addipftune(&ftptune);
+
+ return 0;
+}
+
+
+void ippr_ftp_fini()
+{
+ (void) fr_delipftune(&ftptune);
+
+ if (ftp_proxy_init == 1) {
+ MUTEX_DESTROY(&ftppxyfr.fr_lock);
+ ftp_proxy_init = 0;
+ }
+}
+
+
+int ippr_ftp_new(fin, aps, nat)
+fr_info_t *fin;
+ap_session_t *aps;
+nat_t *nat;
+{
+ ftpinfo_t *ftp;
+ ftpside_t *f;
+
+ KMALLOC(ftp, ftpinfo_t *);
+ if (ftp == NULL)
+ return -1;
+
+ fin = fin; /* LINT */
+ nat = nat; /* LINT */
+
+ aps->aps_data = ftp;
+ aps->aps_psiz = sizeof(ftpinfo_t);
+
+ bzero((char *)ftp, sizeof(*ftp));
+ f = &ftp->ftp_side[0];
+ f->ftps_rptr = f->ftps_buf;
+ f->ftps_wptr = f->ftps_buf;
+ f = &ftp->ftp_side[1];
+ f->ftps_rptr = f->ftps_buf;
+ f->ftps_wptr = f->ftps_buf;
+ ftp->ftp_passok = FTPXY_INIT;
+ ftp->ftp_incok = 0;
+ return 0;
+}
+
+
+int ippr_ftp_port(fin, ip, nat, f, dlen)
+fr_info_t *fin;
+ip_t *ip;
+nat_t *nat;
+ftpside_t *f;
+int dlen;
+{
+ tcphdr_t *tcp, tcph, *tcp2 = &tcph;
+ char newbuf[IPF_FTPBUFSZ], *s;
+ struct in_addr swip, swip2;
+ u_int a1, a2, a3, a4;
+ int inc, off, flags;
+ u_short a5, a6, sp;
+ size_t nlen, olen;
+ fr_info_t fi;
+ nat_t *nat2;
+ mb_t *m;
+
+ m = fin->fin_m;
+ tcp = (tcphdr_t *)fin->fin_dp;
+ off = (char *)tcp - (char *)ip + (TCP_OFF(tcp) << 2) + fin->fin_ipoff;
+
+ /*
+ * Check for client sending out PORT message.
+ */
+ if (dlen < IPF_MINPORTLEN) {
+ if (ippr_ftp_debug > 1)
+ printf("ippr_ftp_port:dlen(%d) < IPF_MINPORTLEN\n",
+ dlen);
+ return 0;
+ }
+ /*
+ * Skip the PORT command + space
+ */
+ s = f->ftps_rptr + 5;
+ /*
+ * Pick out the address components, two at a time.
+ */
+ a1 = ippr_ftp_atoi(&s);
+ if (s == NULL) {
+ if (ippr_ftp_debug > 1)
+ printf("ippr_ftp_port:ippr_ftp_atoi(%d) failed\n", 1);
+ return 0;
+ }
+ a2 = ippr_ftp_atoi(&s);
+ if (s == NULL) {
+ if (ippr_ftp_debug > 1)
+ printf("ippr_ftp_port:ippr_ftp_atoi(%d) failed\n", 2);
+ return 0;
+ }
+
+ /*
+ * Check that IP address in the PORT/PASV reply is the same as the
+ * sender of the command - prevents using PORT for port scanning.
+ */
+ a1 <<= 16;
+ a1 |= a2;
+ if (((nat->nat_dir == NAT_OUTBOUND) &&
+ (a1 != ntohl(nat->nat_inip.s_addr))) ||
+ ((nat->nat_dir == NAT_INBOUND) &&
+ (a1 != ntohl(nat->nat_oip.s_addr)))) {
+ if (ippr_ftp_debug > 0)
+ printf("ippr_ftp_port:%s != nat->nat_inip\n", "a1");
+ return APR_ERR(1);
+ }
+
+ a5 = ippr_ftp_atoi(&s);
+ if (s == NULL) {
+ if (ippr_ftp_debug > 1)
+ printf("ippr_ftp_port:ippr_ftp_atoi(%d) failed\n", 3);
+ return 0;
+ }
+ if (*s == ')')
+ s++;
+
+ /*
+ * check for CR-LF at the end.
+ */
+ if (*s == '\n')
+ s--;
+ if ((*s == '\r') && (*(s + 1) == '\n')) {
+ s += 2;
+ a6 = a5 & 0xff;
+ } else {
+ if (ippr_ftp_debug > 1)
+ printf("ippr_ftp_port:missing %s\n", "cr-lf");
+ return 0;
+ }
+
+ a5 >>= 8;
+ a5 &= 0xff;
+ sp = a5 << 8 | a6;
+ /*
+ * Don't allow the PORT command to specify a port < 1024 due to
+ * security crap.
+ */
+ if (sp < 1024) {
+ if (ippr_ftp_debug > 0)
+ printf("ippr_ftp_port:sp(%d) < 1024\n", sp);
+ return 0;
+ }
+ /*
+ * Calculate new address parts for PORT command
+ */
+ if (nat->nat_dir == NAT_INBOUND)
+ a1 = ntohl(nat->nat_oip.s_addr);
+ else
+ a1 = ntohl(ip->ip_src.s_addr);
+ a2 = (a1 >> 16) & 0xff;
+ a3 = (a1 >> 8) & 0xff;
+ a4 = a1 & 0xff;
+ a1 >>= 24;
+ olen = s - f->ftps_rptr;
+ /* DO NOT change this to snprintf! */
+#if defined(SNPRINTF) && defined(_KERNEL)
+ (void) SNPRINTF(newbuf, sizeof(newbuf), "%s %u,%u,%u,%u,%u,%u\r\n",
+ "PORT", a1, a2, a3, a4, a5, a6);
+#else
+ (void) sprintf(newbuf, "%s %u,%u,%u,%u,%u,%u\r\n",
+ "PORT", a1, a2, a3, a4, a5, a6);
+#endif
+
+ nlen = strlen(newbuf);
+ inc = nlen - olen;
+ if ((inc + ip->ip_len) > 65535) {
+ if (ippr_ftp_debug > 0)
+ printf("ippr_ftp_port:inc(%d) + ip->ip_len > 65535\n",
+ inc);
+ return 0;
+ }
+
+#if !defined(_KERNEL)
+ bcopy(newbuf, MTOD(m, char *) + off, nlen);
+#else
+# if defined(MENTAT)
+ if (inc < 0)
+ (void)adjmsg(m, inc);
+# else /* defined(MENTAT) */
+ /*
+ * m_adj takes care of pkthdr.len, if required and treats inc<0 to
+ * mean remove -len bytes from the end of the packet.
+ * The mbuf chain will be extended if necessary by m_copyback().
+ */
+ if (inc < 0)
+ m_adj(m, inc);
+# endif /* defined(MENTAT) */
+#endif /* !defined(_KERNEL) */
+ COPYBACK(m, off, nlen, newbuf);
+
+ if (inc != 0) {
+ ip->ip_len += inc;
+ fin->fin_dlen += inc;
+ fin->fin_plen += inc;
+ }
+
+ /*
+ * The server may not make the connection back from port 20, but
+ * it is the most likely so use it here to check for a conflicting
+ * mapping.
+ */
+ bcopy((char *)fin, (char *)&fi, sizeof(fi));
+ fi.fin_state = NULL;
+ fi.fin_nat = NULL;
+ fi.fin_flx |= FI_IGNORE;
+ fi.fin_data[0] = sp;
+ fi.fin_data[1] = fin->fin_data[1] - 1;
+ /*
+ * Add skeleton NAT entry for connection which will come back the
+ * other way.
+ */
+ if (nat->nat_dir == NAT_OUTBOUND)
+ nat2 = nat_outlookup(&fi, NAT_SEARCH|IPN_TCP, nat->nat_p,
+ nat->nat_inip, nat->nat_oip);
+ else
+ nat2 = nat_inlookup(&fi, NAT_SEARCH|IPN_TCP, nat->nat_p,
+ nat->nat_inip, nat->nat_oip);
+ if (nat2 == NULL) {
+ int slen;
+
+ slen = ip->ip_len;
+ ip->ip_len = fin->fin_hlen + sizeof(*tcp2);
+ bzero((char *)tcp2, sizeof(*tcp2));
+ tcp2->th_win = htons(8192);
+ tcp2->th_sport = htons(sp);
+ TCP_OFF_A(tcp2, 5);
+ tcp2->th_flags = TH_SYN;
+ tcp2->th_dport = 0; /* XXX - don't specify remote port */
+ fi.fin_data[1] = 0;
+ fi.fin_dlen = sizeof(*tcp2);
+ fi.fin_plen = fi.fin_hlen + sizeof(*tcp2);
+ fi.fin_dp = (char *)tcp2;
+ fi.fin_fr = &ftppxyfr;
+ fi.fin_out = nat->nat_dir;
+ fi.fin_flx &= FI_LOWTTL|FI_FRAG|FI_TCPUDP|FI_OPTIONS|FI_IGNORE;
+ swip = ip->ip_src;
+ swip2 = ip->ip_dst;
+ if (nat->nat_dir == NAT_OUTBOUND) {
+ fi.fin_fi.fi_saddr = nat->nat_inip.s_addr;
+ ip->ip_src = nat->nat_inip;
+ } else if (nat->nat_dir == NAT_INBOUND) {
+ fi.fin_fi.fi_saddr = nat->nat_oip.s_addr;
+ ip->ip_src = nat->nat_oip;
+ }
+
+ flags = NAT_SLAVE|IPN_TCP|SI_W_DPORT;
+ if (nat->nat_dir == NAT_INBOUND)
+ flags |= NAT_NOTRULEPORT;
+ nat2 = nat_new(&fi, nat->nat_ptr, NULL, flags, nat->nat_dir);
+
+ if (nat2 != NULL) {
+ (void) nat_proto(&fi, nat2, IPN_TCP);
+ nat_update(&fi, nat2, nat->nat_ptr);
+ fi.fin_ifp = NULL;
+ if (nat->nat_dir == NAT_INBOUND) {
+ fi.fin_fi.fi_daddr = nat->nat_inip.s_addr;
+ ip->ip_dst = nat->nat_inip;
+ }
+ (void) fr_addstate(&fi, &nat2->nat_state, SI_W_DPORT);
+ if (fi.fin_state != NULL)
+ fr_statederef(&fi, (ipstate_t **)&fi.fin_state);
+ }
+ ip->ip_len = slen;
+ ip->ip_src = swip;
+ ip->ip_dst = swip2;
+ } else {
+ ipstate_t *is;
+
+ nat_update(&fi, nat2, nat->nat_ptr);
+ READ_ENTER(&ipf_state);
+ is = nat2->nat_state;
+ if (is != NULL) {
+ MUTEX_ENTER(&is->is_lock);
+ (void)fr_tcp_age(&is->is_sti, &fi, ips_tqtqb,
+ is->is_flags);
+ MUTEX_EXIT(&is->is_lock);
+ }
+ RWLOCK_EXIT(&ipf_state);
+ }
+ return APR_INC(inc);
+}
+
+
+int ippr_ftp_client(fin, ip, nat, ftp, dlen)
+fr_info_t *fin;
+nat_t *nat;
+ftpinfo_t *ftp;
+ip_t *ip;
+int dlen;
+{
+ char *rptr, *wptr, cmd[6], c;
+ ftpside_t *f;
+ int inc, i;
+
+ inc = 0;
+ f = &ftp->ftp_side[0];
+ rptr = f->ftps_rptr;
+ wptr = f->ftps_wptr;
+
+ for (i = 0; (i < 5) && (i < dlen); i++) {
+ c = rptr[i];
+ if (ISALPHA(c)) {
+ cmd[i] = TOUPPER(c);
+ } else {
+ cmd[i] = c;
+ }
+ }
+ cmd[i] = '\0';
+
+ ftp->ftp_incok = 0;
+ if (!strncmp(cmd, "USER ", 5) || !strncmp(cmd, "XAUT ", 5)) {
+ if (ftp->ftp_passok == FTPXY_ADOK_1 ||
+ ftp->ftp_passok == FTPXY_AUOK_1) {
+ ftp->ftp_passok = FTPXY_USER_2;
+ ftp->ftp_incok = 1;
+ } else {
+ ftp->ftp_passok = FTPXY_USER_1;
+ ftp->ftp_incok = 1;
+ }
+ } else if (!strncmp(cmd, "AUTH ", 5)) {
+ ftp->ftp_passok = FTPXY_AUTH_1;
+ ftp->ftp_incok = 1;
+ } else if (!strncmp(cmd, "PASS ", 5)) {
+ if (ftp->ftp_passok == FTPXY_USOK_1) {
+ ftp->ftp_passok = FTPXY_PASS_1;
+ ftp->ftp_incok = 1;
+ } else if (ftp->ftp_passok == FTPXY_USOK_2) {
+ ftp->ftp_passok = FTPXY_PASS_2;
+ ftp->ftp_incok = 1;
+ }
+ } else if ((ftp->ftp_passok == FTPXY_AUOK_1) &&
+ !strncmp(cmd, "ADAT ", 5)) {
+ ftp->ftp_passok = FTPXY_ADAT_1;
+ ftp->ftp_incok = 1;
+ } else if ((ftp->ftp_passok == FTPXY_PAOK_1 ||
+ ftp->ftp_passok == FTPXY_PAOK_2) &&
+ !strncmp(cmd, "ACCT ", 5)) {
+ ftp->ftp_passok = FTPXY_ACCT_1;
+ ftp->ftp_incok = 1;
+ } else if ((ftp->ftp_passok == FTPXY_GO) && !ippr_ftp_pasvonly &&
+ !strncmp(cmd, "PORT ", 5)) {
+ inc = ippr_ftp_port(fin, ip, nat, f, dlen);
+ } else if (ippr_ftp_insecure && !ippr_ftp_pasvonly &&
+ !strncmp(cmd, "PORT ", 5)) {
+ inc = ippr_ftp_port(fin, ip, nat, f, dlen);
+ }
+
+ while ((*rptr++ != '\n') && (rptr < wptr))
+ ;
+ f->ftps_rptr = rptr;
+ return inc;
+}
+
+
+int ippr_ftp_pasv(fin, ip, nat, ftp, dlen)
+fr_info_t *fin;
+ip_t *ip;
+nat_t *nat;
+ftpinfo_t *ftp;
+int dlen;
+{
+ u_int a1, a2, a3, a4, data_ip;
+ char newbuf[IPF_FTPBUFSZ];
+ char *s, *brackets[2];
+ u_short a5, a6;
+ ftpside_t *f;
+
+ if (ippr_ftp_forcepasv != 0 &&
+ ftp->ftp_side[0].ftps_cmds != FTPXY_C_PASV) {
+ if (ippr_ftp_debug > 0)
+ printf("ippr_ftp_pasv:ftps_cmds(%d) != FTPXY_C_PASV\n",
+ ftp->ftp_side[0].ftps_cmds);
+ return 0;
+ }
+
+ f = &ftp->ftp_side[1];
+
+#define PASV_REPLEN 24
+ /*
+ * Check for PASV reply message.
+ */
+ if (dlen < IPF_MIN227LEN) {
+ if (ippr_ftp_debug > 1)
+ printf("ippr_ftp_pasv:dlen(%d) < IPF_MIN227LEN\n",
+ dlen);
+ return 0;
+ } else if (strncmp(f->ftps_rptr,
+ "227 Entering Passive Mod", PASV_REPLEN)) {
+ if (ippr_ftp_debug > 0)
+ printf("ippr_ftp_pasv:%d reply wrong\n", 227);
+ return 0;
+ }
+
+ brackets[0] = "";
+ brackets[1] = "";
+ /*
+ * Skip the PASV reply + space
+ */
+ s = f->ftps_rptr + PASV_REPLEN;
+ while (*s && !ISDIGIT(*s)) {
+ if (*s == '(') {
+ brackets[0] = "(";
+ brackets[1] = ")";
+ }
+ s++;
+ }
+
+ /*
+ * Pick out the address components, two at a time.
+ */
+ a1 = ippr_ftp_atoi(&s);
+ if (s == NULL) {
+ if (ippr_ftp_debug > 1)
+ printf("ippr_ftp_pasv:ippr_ftp_atoi(%d) failed\n", 1);
+ return 0;
+ }
+ a2 = ippr_ftp_atoi(&s);
+ if (s == NULL) {
+ if (ippr_ftp_debug > 1)
+ printf("ippr_ftp_pasv:ippr_ftp_atoi(%d) failed\n", 2);
+ return 0;
+ }
+
+ /*
+ * check that IP address in the PASV reply is the same as the
+ * sender of the command - prevents using PASV for port scanning.
+ */
+ a1 <<= 16;
+ a1 |= a2;
+
+ if (((nat->nat_dir == NAT_INBOUND) &&
+ (a1 != ntohl(nat->nat_inip.s_addr))) ||
+ ((nat->nat_dir == NAT_OUTBOUND) &&
+ (a1 != ntohl(nat->nat_oip.s_addr)))) {
+ if (ippr_ftp_debug > 0)
+ printf("ippr_ftp_pasv:%s != nat->nat_oip\n", "a1");
+ return 0;
+ }
+
+ a5 = ippr_ftp_atoi(&s);
+ if (s == NULL) {
+ if (ippr_ftp_debug > 1)
+ printf("ippr_ftp_pasv:ippr_ftp_atoi(%d) failed\n", 3);
+ return 0;
+ }
+
+ if (*s == ')')
+ s++;
+ if (*s == '.')
+ s++;
+ if (*s == '\n')
+ s--;
+ /*
+ * check for CR-LF at the end.
+ */
+ if ((*s == '\r') && (*(s + 1) == '\n')) {
+ s += 2;
+ } else {
+ if (ippr_ftp_debug > 1)
+ printf("ippr_ftp_pasv:missing %s", "cr-lf\n");
+ return 0;
+ }
+
+ a6 = a5 & 0xff;
+ a5 >>= 8;
+ /*
+ * Calculate new address parts for 227 reply
+ */
+ if (nat->nat_dir == NAT_INBOUND) {
+ data_ip = nat->nat_outip.s_addr;
+ a1 = ntohl(data_ip);
+ } else
+ data_ip = htonl(a1);
+
+ a2 = (a1 >> 16) & 0xff;
+ a3 = (a1 >> 8) & 0xff;
+ a4 = a1 & 0xff;
+ a1 >>= 24;
+
+#if defined(SNPRINTF) && defined(_KERNEL)
+ (void) SNPRINTF(newbuf, sizeof(newbuf), "%s %s%u,%u,%u,%u,%u,%u%s\r\n",
+ "227 Entering Passive Mode", brackets[0], a1, a2, a3, a4,
+ a5, a6, brackets[1]);
+#else
+ (void) sprintf(newbuf, "%s %s%u,%u,%u,%u,%u,%u%s\r\n",
+ "227 Entering Passive Mode", brackets[0], a1, a2, a3, a4,
+ a5, a6, brackets[1]);
+#endif
+ return ippr_ftp_pasvreply(fin, ip, nat, f, (a5 << 8 | a6),
+ newbuf, s, data_ip);
+}
+
+int ippr_ftp_pasvreply(fin, ip, nat, f, port, newmsg, s, data_ip)
+fr_info_t *fin;
+ip_t *ip;
+nat_t *nat;
+ftpside_t *f;
+u_int port;
+char *newmsg;
+char *s;
+u_int data_ip;
+{
+ int inc, off, nflags, sflags;
+ tcphdr_t *tcp, tcph, *tcp2;
+ struct in_addr swip, swip2;
+ struct in_addr data_addr;
+ size_t nlen, olen;
+ fr_info_t fi;
+ nat_t *nat2;
+ mb_t *m;
+
+ m = fin->fin_m;
+ tcp = (tcphdr_t *)fin->fin_dp;
+ off = (char *)tcp - (char *)ip + (TCP_OFF(tcp) << 2) + fin->fin_ipoff;
+
+ data_addr.s_addr = data_ip;
+ tcp2 = &tcph;
+ inc = 0;
+
+
+ olen = s - f->ftps_rptr;
+ nlen = strlen(newmsg);
+ inc = nlen - olen;
+ if ((inc + ip->ip_len) > 65535) {
+ if (ippr_ftp_debug > 0)
+ printf("ippr_ftp_pasv:inc(%d) + ip->ip_len > 65535\n",
+ inc);
+ return 0;
+ }
+
+#if !defined(_KERNEL)
+ bcopy(newmsg, MTOD(m, char *) + off, nlen);
+#else
+# if defined(MENTAT)
+ if (inc < 0)
+ (void)adjmsg(m, inc);
+# else /* defined(MENTAT) */
+ /*
+ * m_adj takes care of pkthdr.len, if required and treats inc<0 to
+ * mean remove -len bytes from the end of the packet.
+ * The mbuf chain will be extended if necessary by m_copyback().
+ */
+ if (inc < 0)
+ m_adj(m, inc);
+# endif /* defined(MENTAT) */
+#endif /* !defined(_KERNEL) */
+ COPYBACK(m, off, nlen, newmsg);
+
+ if (inc != 0) {
+ ip->ip_len += inc;
+ fin->fin_dlen += inc;
+ fin->fin_plen += inc;
+ }
+
+ /*
+ * Add skeleton NAT entry for connection which will come back the
+ * other way.
+ */
+ bcopy((char *)fin, (char *)&fi, sizeof(fi));
+ fi.fin_state = NULL;
+ fi.fin_nat = NULL;
+ fi.fin_flx |= FI_IGNORE;
+ fi.fin_data[0] = 0;
+ fi.fin_data[1] = port;
+ nflags = IPN_TCP|SI_W_SPORT;
+ if (ippr_ftp_pasvrdr && f->ftps_ifp)
+ nflags |= SI_W_DPORT;
+ if (nat->nat_dir == NAT_OUTBOUND)
+ nat2 = nat_outlookup(&fi, nflags|NAT_SEARCH,
+ nat->nat_p, nat->nat_inip, nat->nat_oip);
+ else
+ nat2 = nat_inlookup(&fi, nflags|NAT_SEARCH,
+ nat->nat_p, nat->nat_inip, nat->nat_oip);
+ if (nat2 == NULL) {
+ int slen;
+
+ slen = ip->ip_len;
+ ip->ip_len = fin->fin_hlen + sizeof(*tcp2);
+ bzero((char *)tcp2, sizeof(*tcp2));
+ tcp2->th_win = htons(8192);
+ tcp2->th_sport = 0; /* XXX - fake it for nat_new */
+ TCP_OFF_A(tcp2, 5);
+ tcp2->th_flags = TH_SYN;
+ fi.fin_data[1] = port;
+ fi.fin_dlen = sizeof(*tcp2);
+ tcp2->th_dport = htons(port);
+ fi.fin_data[0] = 0;
+ fi.fin_dp = (char *)tcp2;
+ fi.fin_plen = fi.fin_hlen + sizeof(*tcp);
+ fi.fin_fr = &ftppxyfr;
+ fi.fin_out = nat->nat_dir;
+ fi.fin_flx &= FI_LOWTTL|FI_FRAG|FI_TCPUDP|FI_OPTIONS|FI_IGNORE;
+ swip = ip->ip_src;
+ swip2 = ip->ip_dst;
+ if (nat->nat_dir == NAT_OUTBOUND) {
+ fi.fin_fi.fi_daddr = data_addr.s_addr;
+ fi.fin_fi.fi_saddr = nat->nat_inip.s_addr;
+ ip->ip_dst = data_addr;
+ ip->ip_src = nat->nat_inip;
+ } else if (nat->nat_dir == NAT_INBOUND) {
+ fi.fin_fi.fi_saddr = nat->nat_oip.s_addr;
+ fi.fin_fi.fi_daddr = nat->nat_outip.s_addr;
+ ip->ip_src = nat->nat_oip;
+ ip->ip_dst = nat->nat_outip;
+ }
+
+ sflags = nflags;
+ nflags |= NAT_SLAVE;
+ if (nat->nat_dir == NAT_INBOUND)
+ nflags |= NAT_NOTRULEPORT;
+ nat2 = nat_new(&fi, nat->nat_ptr, NULL, nflags, nat->nat_dir);
+ if (nat2 != NULL) {
+ (void) nat_proto(&fi, nat2, IPN_TCP);
+ nat_update(&fi, nat2, nat->nat_ptr);
+ fi.fin_ifp = NULL;
+ if (nat->nat_dir == NAT_INBOUND) {
+ fi.fin_fi.fi_daddr = nat->nat_inip.s_addr;
+ ip->ip_dst = nat->nat_inip;
+ }
+ (void) fr_addstate(&fi, &nat2->nat_state, sflags);
+ if (fi.fin_state != NULL)
+ fr_statederef(&fi, (ipstate_t **)&fi.fin_state);
+ }
+
+ ip->ip_len = slen;
+ ip->ip_src = swip;
+ ip->ip_dst = swip2;
+ } else {
+ ipstate_t *is;
+
+ nat_update(&fi, nat2, nat->nat_ptr);
+ READ_ENTER(&ipf_state);
+ is = nat2->nat_state;
+ if (is != NULL) {
+ MUTEX_ENTER(&is->is_lock);
+ (void)fr_tcp_age(&is->is_sti, &fi, ips_tqtqb,
+ is->is_flags);
+ MUTEX_EXIT(&is->is_lock);
+ }
+ RWLOCK_EXIT(&ipf_state);
+ }
+ return inc;
+}
+
+
+int ippr_ftp_server(fin, ip, nat, ftp, dlen)
+fr_info_t *fin;
+ip_t *ip;
+nat_t *nat;
+ftpinfo_t *ftp;
+int dlen;
+{
+ char *rptr, *wptr;
+ ftpside_t *f;
+ int inc;
+
+ inc = 0;
+ f = &ftp->ftp_side[1];
+ rptr = f->ftps_rptr;
+ wptr = f->ftps_wptr;
+
+ if (*rptr == ' ')
+ goto server_cmd_ok;
+ if (!ISDIGIT(*rptr) || !ISDIGIT(*(rptr + 1)) || !ISDIGIT(*(rptr + 2)))
+ return 0;
+ if (ftp->ftp_passok == FTPXY_GO) {
+ if (!strncmp(rptr, "227 ", 4))
+ inc = ippr_ftp_pasv(fin, ip, nat, ftp, dlen);
+ else if (!strncmp(rptr, "229 ", 4))
+ inc = ippr_ftp_epsv(fin, ip, nat, f, dlen);
+ } else if (ippr_ftp_insecure && !strncmp(rptr, "227 ", 4)) {
+ inc = ippr_ftp_pasv(fin, ip, nat, ftp, dlen);
+ } else if (ippr_ftp_insecure && !strncmp(rptr, "229 ", 4)) {
+ inc = ippr_ftp_epsv(fin, ip, nat, f, dlen);
+ } else if (*rptr == '5' || *rptr == '4')
+ ftp->ftp_passok = FTPXY_INIT;
+ else if (ftp->ftp_incok) {
+ if (*rptr == '3') {
+ if (ftp->ftp_passok == FTPXY_ACCT_1)
+ ftp->ftp_passok = FTPXY_GO;
+ else
+ ftp->ftp_passok++;
+ } else if (*rptr == '2') {
+ switch (ftp->ftp_passok)
+ {
+ case FTPXY_USER_1 :
+ case FTPXY_USER_2 :
+ case FTPXY_PASS_1 :
+ case FTPXY_PASS_2 :
+ case FTPXY_ACCT_1 :
+ ftp->ftp_passok = FTPXY_GO;
+ break;
+ default :
+ ftp->ftp_passok += 3;
+ break;
+ }
+ }
+ }
+server_cmd_ok:
+ ftp->ftp_incok = 0;
+
+ while ((*rptr++ != '\n') && (rptr < wptr))
+ ;
+ f->ftps_rptr = rptr;
+ return inc;
+}
+
+
+/*
+ * Look to see if the buffer starts with something which we recognise as
+ * being the correct syntax for the FTP protocol.
+ */
+int ippr_ftp_client_valid(ftps, buf, len)
+ftpside_t *ftps;
+char *buf;
+size_t len;
+{
+ register char *s, c, pc;
+ register size_t i = len;
+ char cmd[5];
+
+ s = buf;
+
+ if (ftps->ftps_junk == 1)
+ return 1;
+
+ if (i < 5) {
+ if (ippr_ftp_debug > 3)
+ printf("ippr_ftp_client_valid:i(%d) < 5\n", (int)i);
+ return 2;
+ }
+
+ i--;
+ c = *s++;
+
+ if (ISALPHA(c)) {
+ cmd[0] = TOUPPER(c);
+ c = *s++;
+ i--;
+ if (ISALPHA(c)) {
+ cmd[1] = TOUPPER(c);
+ c = *s++;
+ i--;
+ if (ISALPHA(c)) {
+ cmd[2] = TOUPPER(c);
+ c = *s++;
+ i--;
+ if (ISALPHA(c)) {
+ cmd[3] = TOUPPER(c);
+ c = *s++;
+ i--;
+ if ((c != ' ') && (c != '\r'))
+ goto bad_client_command;
+ } else if ((c != ' ') && (c != '\r'))
+ goto bad_client_command;
+ } else
+ goto bad_client_command;
+ } else
+ goto bad_client_command;
+ } else {
+bad_client_command:
+ if (ippr_ftp_debug > 3)
+ printf("%s:bad:junk %d len %d/%d c 0x%x buf [%*s]\n",
+ "ippr_ftp_client_valid",
+ ftps->ftps_junk, (int)len, (int)i, c,
+ (int)len, buf);
+ return 1;
+ }
+
+ for (; i; i--) {
+ pc = c;
+ c = *s++;
+ if ((pc == '\r') && (c == '\n')) {
+ cmd[4] = '\0';
+ if (!strcmp(cmd, "PASV"))
+ ftps->ftps_cmds = FTPXY_C_PASV;
+ else
+ ftps->ftps_cmds = 0;
+ return 0;
+ }
+ }
+#if !defined(_KERNEL)
+ printf("ippr_ftp_client_valid:junk after cmd[%*.*s]\n",
+ (int)len, (int)len, buf);
+#endif
+ return 2;
+}
+
+
+int ippr_ftp_server_valid(ftps, buf, len)
+ftpside_t *ftps;
+char *buf;
+size_t len;
+{
+ register char *s, c, pc;
+ register size_t i = len;
+ int cmd;
+
+ s = buf;
+ cmd = 0;
+
+ if (ftps->ftps_junk == 1)
+ return 1;
+
+ if (i < 5) {
+ if (ippr_ftp_debug > 3)
+ printf("ippr_ftp_servert_valid:i(%d) < 5\n", (int)i);
+ return 2;
+ }
+
+ c = *s++;
+ i--;
+ if (c == ' ')
+ goto search_eol;
+
+ if (ISDIGIT(c)) {
+ cmd = (c - '0') * 100;
+ c = *s++;
+ i--;
+ if (ISDIGIT(c)) {
+ cmd += (c - '0') * 10;
+ c = *s++;
+ i--;
+ if (ISDIGIT(c)) {
+ cmd += (c - '0');
+ c = *s++;
+ i--;
+ if ((c != '-') && (c != ' '))
+ goto bad_server_command;
+ } else
+ goto bad_server_command;
+ } else
+ goto bad_server_command;
+ } else {
+bad_server_command:
+ if (ippr_ftp_debug > 3)
+ printf("%s:bad:junk %d len %d/%d c 0x%x buf [%*s]\n",
+ "ippr_ftp_server_valid",
+ ftps->ftps_junk, (int)len, (int)i,
+ c, (int)len, buf);
+ return 1;
+ }
+search_eol:
+ for (; i; i--) {
+ pc = c;
+ c = *s++;
+ if ((pc == '\r') && (c == '\n')) {
+ ftps->ftps_cmds = cmd;
+ return 0;
+ }
+ }
+ if (ippr_ftp_debug > 3)
+ printf("ippr_ftp_server_valid:junk after cmd[%*s]\n",
+ (int)len, buf);
+ return 2;
+}
+
+
+int ippr_ftp_valid(ftp, side, buf, len)
+ftpinfo_t *ftp;
+int side;
+char *buf;
+size_t len;
+{
+ ftpside_t *ftps;
+ int ret;
+
+ ftps = &ftp->ftp_side[side];
+
+ if (side == 0)
+ ret = ippr_ftp_client_valid(ftps, buf, len);
+ else
+ ret = ippr_ftp_server_valid(ftps, buf, len);
+ return ret;
+}
+
+
+/*
+ * For map rules, the following applies:
+ * rv == 0 for outbound processing,
+ * rv == 1 for inbound processing.
+ * For rdr rules, the following applies:
+ * rv == 0 for inbound processing,
+ * rv == 1 for outbound processing.
+ */
+int ippr_ftp_process(fin, nat, ftp, rv)
+fr_info_t *fin;
+nat_t *nat;
+ftpinfo_t *ftp;
+int rv;
+{
+ int mlen, len, off, inc, i, sel, sel2, ok, ackoff, seqoff;
+ char *rptr, *wptr, *s;
+ u_32_t thseq, thack;
+ ap_session_t *aps;
+ ftpside_t *f, *t;
+ tcphdr_t *tcp;
+ ip_t *ip;
+ mb_t *m;
+
+ m = fin->fin_m;
+ ip = fin->fin_ip;
+ tcp = (tcphdr_t *)fin->fin_dp;
+ off = (char *)tcp - (char *)ip + (TCP_OFF(tcp) << 2) + fin->fin_ipoff;
+
+ f = &ftp->ftp_side[rv];
+ t = &ftp->ftp_side[1 - rv];
+ thseq = ntohl(tcp->th_seq);
+ thack = ntohl(tcp->th_ack);
+
+#ifdef __sgi
+ mlen = fin->fin_plen - off;
+#else
+ mlen = MSGDSIZE(m) - off;
+#endif
+ if (ippr_ftp_debug > 4)
+ printf("ippr_ftp_process: mlen %d\n", mlen);
+
+ if (mlen <= 0) {
+ if ((tcp->th_flags & TH_OPENING) == TH_OPENING) {
+ f->ftps_seq[0] = thseq + 1;
+ t->ftps_seq[0] = thack;
+ }
+ return 0;
+ }
+ aps = nat->nat_aps;
+
+ sel = aps->aps_sel[1 - rv];
+ sel2 = aps->aps_sel[rv];
+ if (rv == 0) {
+ seqoff = aps->aps_seqoff[sel];
+ if (aps->aps_seqmin[sel] > seqoff + thseq)
+ seqoff = aps->aps_seqoff[!sel];
+ ackoff = aps->aps_ackoff[sel2];
+ if (aps->aps_ackmin[sel2] > ackoff + thack)
+ ackoff = aps->aps_ackoff[!sel2];
+ } else {
+ seqoff = aps->aps_ackoff[sel];
+ if (ippr_ftp_debug > 2)
+ printf("seqoff %d thseq %x ackmin %x\n", seqoff, thseq,
+ aps->aps_ackmin[sel]);
+ if (aps->aps_ackmin[sel] > seqoff + thseq)
+ seqoff = aps->aps_ackoff[!sel];
+
+ ackoff = aps->aps_seqoff[sel2];
+ if (ippr_ftp_debug > 2)
+ printf("ackoff %d thack %x seqmin %x\n", ackoff, thack,
+ aps->aps_seqmin[sel2]);
+ if (ackoff > 0) {
+ if (aps->aps_seqmin[sel2] > ackoff + thack)
+ ackoff = aps->aps_seqoff[!sel2];
+ } else {
+ if (aps->aps_seqmin[sel2] > thack)
+ ackoff = aps->aps_seqoff[!sel2];
+ }
+ }
+ if (ippr_ftp_debug > 2) {
+ printf("%s: %x seq %x/%d ack %x/%d len %d/%d off %d\n",
+ rv ? "IN" : "OUT", tcp->th_flags, thseq, seqoff,
+ thack, ackoff, mlen, fin->fin_plen, off);
+ printf("sel %d seqmin %x/%x offset %d/%d\n", sel,
+ aps->aps_seqmin[sel], aps->aps_seqmin[sel2],
+ aps->aps_seqoff[sel], aps->aps_seqoff[sel2]);
+ printf("sel %d ackmin %x/%x offset %d/%d\n", sel2,
+ aps->aps_ackmin[sel], aps->aps_ackmin[sel2],
+ aps->aps_ackoff[sel], aps->aps_ackoff[sel2]);
+ }
+
+ /*
+ * XXX - Ideally, this packet should get dropped because we now know
+ * that it is out of order (and there is no real danger in doing so
+ * apart from causing packets to go through here ordered).
+ */
+ if (ippr_ftp_debug > 2) {
+ printf("rv %d t:seq[0] %x seq[1] %x %d/%d\n",
+ rv, t->ftps_seq[0], t->ftps_seq[1], seqoff, ackoff);
+ }
+
+ ok = 0;
+ if (t->ftps_seq[0] == 0) {
+ t->ftps_seq[0] = thack;
+ ok = 1;
+ } else {
+ if (ackoff == 0) {
+ if (t->ftps_seq[0] == thack)
+ ok = 1;
+ else if (t->ftps_seq[1] == thack) {
+ t->ftps_seq[0] = thack;
+ ok = 1;
+ }
+ } else {
+ if (t->ftps_seq[0] + ackoff == thack)
+ ok = 1;
+ else if (t->ftps_seq[0] == thack + ackoff)
+ ok = 1;
+ else if (t->ftps_seq[1] + ackoff == thack) {
+ t->ftps_seq[0] = thack - ackoff;
+ ok = 1;
+ } else if (t->ftps_seq[1] == thack + ackoff) {
+ t->ftps_seq[0] = thack - ackoff;
+ ok = 1;
+ }
+ }
+ }
+
+ if (ippr_ftp_debug > 2) {
+ if (!ok)
+ printf("%s ok\n", "not");
+ }
+
+ if (!mlen) {
+ if (t->ftps_seq[0] + ackoff != thack) {
+ if (ippr_ftp_debug > 1) {
+ printf("%s:seq[0](%x) + (%x) != (%x)\n",
+ "ippr_ftp_process", t->ftps_seq[0],
+ ackoff, thack);
+ }
+ return APR_ERR(1);
+ }
+
+ if (ippr_ftp_debug > 2) {
+ printf("ippr_ftp_process:f:seq[0] %x seq[1] %x\n",
+ f->ftps_seq[0], f->ftps_seq[1]);
+ }
+
+ if (tcp->th_flags & TH_FIN) {
+ if (thseq == f->ftps_seq[1]) {
+ f->ftps_seq[0] = f->ftps_seq[1] - seqoff;
+ f->ftps_seq[1] = thseq + 1 - seqoff;
+ } else {
+ if (ippr_ftp_debug > 1) {
+ printf("FIN: thseq %x seqoff %d ftps_seq %x\n",
+ thseq, seqoff, f->ftps_seq[0]);
+ }
+ return APR_ERR(1);
+ }
+ }
+ f->ftps_len = 0;
+ return 0;
+ }
+
+ ok = 0;
+ if ((thseq == f->ftps_seq[0]) || (thseq == f->ftps_seq[1])) {
+ ok = 1;
+ /*
+ * Retransmitted data packet.
+ */
+ } else if ((thseq + mlen == f->ftps_seq[0]) ||
+ (thseq + mlen == f->ftps_seq[1])) {
+ ok = 1;
+ }
+
+ if (ok == 0) {
+ inc = thseq - f->ftps_seq[0];
+ if (ippr_ftp_debug > 1) {
+ printf("inc %d sel %d rv %d\n", inc, sel, rv);
+ printf("th_seq %x ftps_seq %x/%x\n",
+ thseq, f->ftps_seq[0], f->ftps_seq[1]);
+ printf("ackmin %x ackoff %d\n", aps->aps_ackmin[sel],
+ aps->aps_ackoff[sel]);
+ printf("seqmin %x seqoff %d\n", aps->aps_seqmin[sel],
+ aps->aps_seqoff[sel]);
+ }
+
+ return APR_ERR(1);
+ }
+
+ inc = 0;
+ rptr = f->ftps_rptr;
+ wptr = f->ftps_wptr;
+ f->ftps_seq[0] = thseq;
+ f->ftps_seq[1] = f->ftps_seq[0] + mlen;
+ f->ftps_len = mlen;
+
+ while (mlen > 0) {
+ len = MIN(mlen, sizeof(f->ftps_buf) - (wptr - rptr));
+ COPYDATA(m, off, len, wptr);
+ mlen -= len;
+ off += len;
+ wptr += len;
+
+ if (ippr_ftp_debug > 3)
+ printf("%s:len %d/%d off %d wptr %lx junk %d [%*s]\n",
+ "ippr_ftp_process",
+ len, mlen, off, (u_long)wptr, f->ftps_junk,
+ len, rptr);
+
+ f->ftps_wptr = wptr;
+ if (f->ftps_junk != 0) {
+ i = f->ftps_junk;
+ f->ftps_junk = ippr_ftp_valid(ftp, rv, rptr,
+ wptr - rptr);
+
+ if (ippr_ftp_debug > 5)
+ printf("%s:junk %d -> %d\n",
+ "ippr_ftp_process", i, f->ftps_junk);
+
+ if (f->ftps_junk != 0) {
+ if (wptr - rptr == sizeof(f->ftps_buf)) {
+ if (ippr_ftp_debug > 4)
+ printf("%s:full buffer\n",
+ "ippr_ftp_process");
+ f->ftps_rptr = f->ftps_buf;
+ f->ftps_wptr = f->ftps_buf;
+ rptr = f->ftps_rptr;
+ wptr = f->ftps_wptr;
+ /*
+ * Because we throw away data here that
+ * we would otherwise parse, set the
+ * junk flag to indicate just ignore
+ * any data upto the next CRLF.
+ */
+ f->ftps_junk = 1;
+ continue;
+ }
+ }
+ }
+
+ while ((f->ftps_junk == 0) && (wptr > rptr)) {
+ len = wptr - rptr;
+ f->ftps_junk = ippr_ftp_valid(ftp, rv, rptr, len);
+
+ if (ippr_ftp_debug > 3) {
+ printf("%s=%d len %d rv %d ptr %lx/%lx ",
+ "ippr_ftp_valid",
+ f->ftps_junk, len, rv, (u_long)rptr,
+ (u_long)wptr);
+ printf("buf [%*s]\n", len, rptr);
+ }
+
+ if (f->ftps_junk == 0) {
+ f->ftps_rptr = rptr;
+ if (rv)
+ inc += ippr_ftp_server(fin, ip, nat,
+ ftp, len);
+ else
+ inc += ippr_ftp_client(fin, ip, nat,
+ ftp, len);
+ rptr = f->ftps_rptr;
+ wptr = f->ftps_wptr;
+ }
+ }
+
+ /*
+ * Off to a bad start so lets just forget about using the
+ * ftp proxy for this connection.
+ */
+ if ((f->ftps_cmds == 0) && (f->ftps_junk == 1)) {
+ /* f->ftps_seq[1] += inc; */
+
+ if (ippr_ftp_debug > 1)
+ printf("%s:cmds == 0 junk == 1\n",
+ "ippr_ftp_process");
+ return APR_ERR(2);
+ }
+
+ if ((f->ftps_junk != 0) && (rptr < wptr)) {
+ for (s = rptr; s < wptr; s++) {
+ if ((*s == '\r') && (s + 1 < wptr) &&
+ (*(s + 1) == '\n')) {
+ rptr = s + 2;
+ f->ftps_junk = 0;
+ break;
+ }
+ }
+ }
+
+ if (rptr == wptr) {
+ rptr = wptr = f->ftps_buf;
+ } else {
+ /*
+ * Compact the buffer back to the start. The junk
+ * flag should already be set and because we're not
+ * throwing away any data, it is preserved from its
+ * current state.
+ */
+ if (rptr > f->ftps_buf) {
+ bcopy(rptr, f->ftps_buf, len);
+ wptr -= rptr - f->ftps_buf;
+ rptr = f->ftps_buf;
+ }
+ }
+ f->ftps_rptr = rptr;
+ f->ftps_wptr = wptr;
+ }
+
+ /* f->ftps_seq[1] += inc; */
+ if (tcp->th_flags & TH_FIN)
+ f->ftps_seq[1]++;
+ if (ippr_ftp_debug > 3) {
+#ifdef __sgi
+ mlen = fin->fin_plen;
+#else
+ mlen = MSGDSIZE(m);
+#endif
+ mlen -= off;
+ printf("ftps_seq[1] = %x inc %d len %d\n",
+ f->ftps_seq[1], inc, mlen);
+ }
+
+ f->ftps_rptr = rptr;
+ f->ftps_wptr = wptr;
+ return APR_INC(inc);
+}
+
+
+int ippr_ftp_out(fin, aps, nat)
+fr_info_t *fin;
+ap_session_t *aps;
+nat_t *nat;
+{
+ ftpinfo_t *ftp;
+ int rev;
+
+ ftp = aps->aps_data;
+ if (ftp == NULL)
+ return 0;
+
+ rev = (nat->nat_dir == NAT_OUTBOUND) ? 0 : 1;
+ if (ftp->ftp_side[1 - rev].ftps_ifp == NULL)
+ ftp->ftp_side[1 - rev].ftps_ifp = fin->fin_ifp;
+
+ return ippr_ftp_process(fin, nat, ftp, rev);
+}
+
+
+int ippr_ftp_in(fin, aps, nat)
+fr_info_t *fin;
+ap_session_t *aps;
+nat_t *nat;
+{
+ ftpinfo_t *ftp;
+ int rev;
+
+ ftp = aps->aps_data;
+ if (ftp == NULL)
+ return 0;
+
+ rev = (nat->nat_dir == NAT_OUTBOUND) ? 0 : 1;
+ if (ftp->ftp_side[rev].ftps_ifp == NULL)
+ ftp->ftp_side[rev].ftps_ifp = fin->fin_ifp;
+
+ return ippr_ftp_process(fin, nat, ftp, 1 - rev);
+}
+
+
+/*
+ * ippr_ftp_atoi - implement a version of atoi which processes numbers in
+ * pairs separated by commas (which are expected to be in the range 0 - 255),
+ * returning a 16 bit number combining either side of the , as the MSB and
+ * LSB.
+ */
+u_short ippr_ftp_atoi(ptr)
+char **ptr;
+{
+ register char *s = *ptr, c;
+ register u_char i = 0, j = 0;
+
+ while (((c = *s++) != '\0') && ISDIGIT(c)) {
+ i *= 10;
+ i += c - '0';
+ }
+ if (c != ',') {
+ *ptr = NULL;
+ return 0;
+ }
+ while (((c = *s++) != '\0') && ISDIGIT(c)) {
+ j *= 10;
+ j += c - '0';
+ }
+ *ptr = s;
+ i &= 0xff;
+ j &= 0xff;
+ return (i << 8) | j;
+}
+
+
+int ippr_ftp_epsv(fin, ip, nat, f, dlen)
+fr_info_t *fin;
+ip_t *ip;
+nat_t *nat;
+ftpside_t *f;
+int dlen;
+{
+ char newbuf[IPF_FTPBUFSZ];
+ char *s;
+ u_short ap = 0;
+
+#define EPSV_REPLEN 33
+ /*
+ * Check for EPSV reply message.
+ */
+ if (dlen < IPF_MIN229LEN)
+ return (0);
+ else if (strncmp(f->ftps_rptr,
+ "229 Entering Extended Passive Mode", EPSV_REPLEN))
+ return (0);
+
+ /*
+ * Skip the EPSV command + space
+ */
+ s = f->ftps_rptr + 33;
+ while (*s && !ISDIGIT(*s))
+ s++;
+
+ /*
+ * As per RFC 2428, there are no addres components in the EPSV
+ * response. So we'll go straight to getting the port.
+ */
+ while (*s && ISDIGIT(*s)) {
+ ap *= 10;
+ ap += *s++ - '0';
+ }
+
+ if (!s)
+ return 0;
+
+ if (*s == '|')
+ s++;
+ if (*s == ')')
+ s++;
+ if (*s == '\n')
+ s--;
+ /*
+ * check for CR-LF at the end.
+ */
+ if ((*s == '\r') && (*(s + 1) == '\n')) {
+ s += 2;
+ } else
+ return 0;
+
+#if defined(SNPRINTF) && defined(_KERNEL)
+ (void) SNPRINTF(newbuf, sizeof(newbuf), "%s (|||%u|)\r\n",
+ "229 Entering Extended Passive Mode", ap);
+#else
+ (void) sprintf(newbuf, "%s (|||%u|)\r\n",
+ "229 Entering Extended Passive Mode", ap);
+#endif
+
+ return ippr_ftp_pasvreply(fin, ip, nat, f, (u_int)ap, newbuf, s,
+ ip->ip_src.s_addr);
+}
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_h323_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_h323_pxy.c
new file mode 100644
index 0000000000..7fc62276c5
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_h323_pxy.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright 2001, QNX Software Systems Ltd. All Rights Reserved
+ *
+ * This source code has been published by QNX Software Systems Ltd. (QSSL).
+ * However, any use, reproduction, modification, distribution or transfer of
+ * this software, or any software which includes or is based upon any of this
+ * code, is only permitted under the terms of the QNX Open Community License
+ * version 1.0 (see licensing.qnx.com for details) or as otherwise expressly
+ * authorized by a written license agreement from QSSL. For more information,
+ * please email licensing@qnx.com.
+ *
+ * For more details, see QNX_OCL.txt provided with this distribution.
+ */
+
+/*
+ * Simple H.323 proxy
+ *
+ * by xtang@canada.com
+ * ported to ipfilter 3.4.20 by Michael Grant mg-ipf@grant.org
+ */
+
+#if __FreeBSD_version >= 220000 && defined(_KERNEL)
+# include <sys/fcntl.h>
+# include <sys/filio.h>
+#else
+# ifndef linux
+# include <sys/ioctl.h>
+# endif
+#endif
+
+#define IPF_H323_PROXY
+
+int ippr_h323_init __P((void));
+void ippr_h323_fini __P((void));
+int ippr_h323_new __P((fr_info_t *, ap_session_t *, nat_t *));
+void ippr_h323_del __P((ap_session_t *));
+int ippr_h323_out __P((fr_info_t *, ap_session_t *, nat_t *));
+int ippr_h323_in __P((fr_info_t *, ap_session_t *, nat_t *));
+
+int ippr_h245_new __P((fr_info_t *, ap_session_t *, nat_t *));
+int ippr_h245_out __P((fr_info_t *, ap_session_t *, nat_t *));
+int ippr_h245_in __P((fr_info_t *, ap_session_t *, nat_t *));
+
+static frentry_t h323_fr;
+
+int h323_proxy_init = 0;
+
+static int find_port __P((int, caddr_t, int datlen, int *, u_short *));
+
+
+static int find_port(ipaddr, data, datlen, off, port)
+int ipaddr;
+caddr_t data;
+int datlen, *off;
+unsigned short *port;
+{
+ u_32_t addr, netaddr;
+ u_char *dp;
+ int offset;
+
+ if (datlen < 6)
+ return -1;
+
+ *port = 0;
+ offset = *off;
+ dp = (u_char *)data;
+ netaddr = ntohl(ipaddr);
+
+ for (offset = 0; offset <= datlen - 6; offset++, dp++) {
+ addr = (dp[0] << 24) | (dp[1] << 16) | (dp[2] << 8) | dp[3];
+ if (netaddr == addr)
+ {
+ *port = (*(dp + 4) << 8) | *(dp + 5);
+ break;
+ }
+ }
+ *off = offset;
+ return (offset > datlen - 6) ? -1 : 0;
+}
+
+/*
+ * Initialize local structures.
+ */
+int ippr_h323_init()
+{
+ bzero((char *)&h323_fr, sizeof(h323_fr));
+ h323_fr.fr_ref = 1;
+ h323_fr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE;
+ MUTEX_INIT(&h323_fr.fr_lock, "H323 proxy rule lock");
+ h323_proxy_init = 1;
+
+ return 0;
+}
+
+
+void ippr_h323_fini()
+{
+ if (h323_proxy_init == 1) {
+ MUTEX_DESTROY(&h323_fr.fr_lock);
+ h323_proxy_init = 0;
+ }
+}
+
+
+int ippr_h323_new(fin, aps, nat)
+fr_info_t *fin;
+ap_session_t *aps;
+nat_t *nat;
+{
+ fin = fin; /* LINT */
+ nat = nat; /* LINT */
+
+ aps->aps_data = NULL;
+ aps->aps_psiz = 0;
+
+ return 0;
+}
+
+
+void ippr_h323_del(aps)
+ap_session_t *aps;
+{
+ int i;
+ ipnat_t *ipn;
+
+ if (aps->aps_data) {
+ for (i = 0, ipn = aps->aps_data;
+ i < (aps->aps_psiz / sizeof(ipnat_t));
+ i++, ipn = (ipnat_t *)((char *)ipn + sizeof(*ipn)))
+ {
+ /*
+ * Check the comment in ippr_h323_in() function,
+ * just above fr_nat_ioctl() call.
+ * We are lucky here because this function is not
+ * called with ipf_nat locked.
+ */
+ if (fr_nat_ioctl((caddr_t)ipn, SIOCRMNAT, NAT_SYSSPACE|
+ NAT_LOCKHELD|FWRITE) == -1) {
+ /*EMPTY*/;
+ /* log the error */
+ }
+ }
+ KFREES(aps->aps_data, aps->aps_psiz);
+ /* avoid double free */
+ aps->aps_data = NULL;
+ aps->aps_psiz = 0;
+ }
+ return;
+}
+
+
+int ippr_h323_in(fin, aps, nat)
+fr_info_t *fin;
+ap_session_t *aps;
+nat_t *nat;
+{
+ int ipaddr, off, datlen;
+ unsigned short port;
+ caddr_t data;
+ tcphdr_t *tcp;
+ ip_t *ip;
+
+ ip = fin->fin_ip;
+ tcp = (tcphdr_t *)fin->fin_dp;
+ ipaddr = ip->ip_src.s_addr;
+
+ data = (caddr_t)tcp + (TCP_OFF(tcp) << 2);
+ datlen = fin->fin_dlen - (TCP_OFF(tcp) << 2);
+ if (find_port(ipaddr, data, datlen, &off, &port) == 0) {
+ ipnat_t *ipn;
+ char *newarray;
+
+ /* setup a nat rule to set a h245 proxy on tcp-port "port"
+ * it's like:
+ * map <if> <inter_ip>/<mask> -> <gate_ip>/<mask> proxy port <port> <port>/tcp
+ */
+ KMALLOCS(newarray, char *, aps->aps_psiz + sizeof(*ipn));
+ if (newarray == NULL) {
+ return -1;
+ }
+ ipn = (ipnat_t *)&newarray[aps->aps_psiz];
+ bcopy((caddr_t)nat->nat_ptr, (caddr_t)ipn, sizeof(ipnat_t));
+ (void) strncpy(ipn->in_plabel, "h245", APR_LABELLEN);
+
+ ipn->in_inip = nat->nat_inip.s_addr;
+ ipn->in_inmsk = 0xffffffff;
+ ipn->in_dport = htons(port);
+ /*
+ * we got a problem here. we need to call fr_nat_ioctl() to add
+ * the h245 proxy rule, but since we already hold (READ locked)
+ * the nat table rwlock (ipf_nat), if we go into fr_nat_ioctl(),
+ * it will try to WRITE lock it. This will causing dead lock
+ * on RTP.
+ *
+ * The quick & dirty solution here is release the read lock,
+ * call fr_nat_ioctl() and re-lock it.
+ * A (maybe better) solution is do a UPGRADE(), and instead
+ * of calling fr_nat_ioctl(), we add the nat rule ourself.
+ */
+ RWLOCK_EXIT(&ipf_nat);
+ if (fr_nat_ioctl((caddr_t)ipn, SIOCADNAT,
+ NAT_SYSSPACE|FWRITE) == -1) {
+ READ_ENTER(&ipf_nat);
+ return -1;
+ }
+ READ_ENTER(&ipf_nat);
+ if (aps->aps_data != NULL && aps->aps_psiz > 0) {
+ bcopy(aps->aps_data, newarray, aps->aps_psiz);
+ KFREES(aps->aps_data, aps->aps_psiz);
+ }
+ aps->aps_data = newarray;
+ aps->aps_psiz += sizeof(*ipn);
+ }
+ return 0;
+}
+
+
+int ippr_h245_new(fin, aps, nat)
+fr_info_t *fin;
+ap_session_t *aps;
+nat_t *nat;
+{
+ fin = fin; /* LINT */
+ nat = nat; /* LINT */
+
+ aps->aps_data = NULL;
+ aps->aps_psiz = 0;
+ return 0;
+}
+
+
+int ippr_h245_out(fin, aps, nat)
+fr_info_t *fin;
+ap_session_t *aps;
+nat_t *nat;
+{
+ int ipaddr, off, datlen;
+ tcphdr_t *tcp;
+ caddr_t data;
+ u_short port;
+ ip_t *ip;
+
+ aps = aps; /* LINT */
+
+ ip = fin->fin_ip;
+ tcp = (tcphdr_t *)fin->fin_dp;
+ ipaddr = nat->nat_inip.s_addr;
+ data = (caddr_t)tcp + (TCP_OFF(tcp) << 2);
+ datlen = fin->fin_dlen - (TCP_OFF(tcp) << 2);
+ if (find_port(ipaddr, data, datlen, &off, &port) == 0) {
+ fr_info_t fi;
+ nat_t *nat2;
+
+/* port = htons(port); */
+ nat2 = nat_outlookup(fin->fin_ifp, IPN_UDP, IPPROTO_UDP,
+ ip->ip_src, ip->ip_dst);
+ if (nat2 == NULL) {
+ struct ip newip;
+ struct udphdr udp;
+
+ bcopy((caddr_t)ip, (caddr_t)&newip, sizeof(newip));
+ newip.ip_len = fin->fin_hlen + sizeof(udp);
+ newip.ip_p = IPPROTO_UDP;
+ newip.ip_src = nat->nat_inip;
+
+ bzero((char *)&udp, sizeof(udp));
+ udp.uh_sport = port;
+
+ bcopy((caddr_t)fin, (caddr_t)&fi, sizeof(fi));
+ fi.fin_fi.fi_p = IPPROTO_UDP;
+ fi.fin_data[0] = port;
+ fi.fin_data[1] = 0;
+ fi.fin_dp = (char *)&udp;
+
+ nat2 = nat_new(&fi, nat->nat_ptr, NULL,
+ NAT_SLAVE|IPN_UDP|SI_W_DPORT,
+ NAT_OUTBOUND);
+ if (nat2 != NULL) {
+ (void) nat_proto(&fi, nat2, IPN_UDP);
+ nat_update(&fi, nat2, nat2->nat_ptr);
+
+ nat2->nat_ptr->in_hits++;
+#ifdef IPFILTER_LOG
+ nat_log(nat2, (u_int)(nat->nat_ptr->in_redir));
+#endif
+ bcopy((caddr_t)&ip->ip_src.s_addr,
+ data + off, 4);
+ bcopy((caddr_t)&nat2->nat_outport,
+ data + off + 4, 2);
+ }
+ }
+ }
+ return 0;
+}
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_htable.h b/usr/src/uts/common/inet/ipf/netinet/ip_htable.h
new file mode 100644
index 0000000000..082959a9c5
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_htable.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2003 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifndef __IP_HTABLE_H__
+#define __IP_HTABLE_H__
+
+#include "netinet/ip_lookup.h"
+
+typedef struct iphtent_s {
+ struct iphtent_s *ipe_next, **ipe_pnext;
+ void *ipe_ptr;
+ sa_family_t ipe_family;
+ i6addr_t ipe_addr;
+ i6addr_t ipe_mask;
+ int ipe_ref;
+ union {
+ char ipeu_char[16];
+ u_long ipeu_long;
+ u_int ipeu_int;
+ }ipe_un;
+} iphtent_t;
+
+#define ipe_value ipe_un.ipeu_int
+#define ipe_group ipe_un.ipeu_char
+
+#define IPE_HASH_FN(a, m, s) (((a) * (m)) % (s))
+
+
+typedef struct iphtable_s {
+ ipfrwlock_t iph_rwlock;
+ struct iphtable_s *iph_next, **iph_pnext;
+ struct iphtent_s **iph_table;
+ size_t iph_size; /* size of hash table */
+ u_long iph_seed; /* hashing seed */
+ u_32_t iph_flags;
+ u_int iph_unit; /* IPL_LOG* */
+ u_int iph_ref;
+ u_int iph_type; /* lookup or group map - IPHASH_* */
+ u_int iph_masks[4]; /* IPv4 or IPv6 netmasks in use */
+ char iph_name[FR_GROUPLEN]; /* hash table number */
+} iphtable_t;
+
+
+/* iph_type */
+#define IPHASH_LOOKUP 0
+#define IPHASH_GROUPMAP 1
+#define IPHASH_ANON 0x80000000
+
+
+typedef struct iphtstat_s {
+ iphtable_t *iphs_tables;
+ u_long iphs_numtables;
+ u_long iphs_numnodes;
+ u_long iphs_nomem;
+ u_long iphs_pad[16];
+} iphtstat_t;
+
+
+extern iphtable_t *ipf_htables[IPL_LOGSIZE];
+
+extern void fr_htable_unload __P((void));
+extern int fr_newhtable __P((iplookupop_t *));
+extern iphtable_t *fr_findhtable __P((int, char *));
+extern int fr_removehtable __P((iplookupop_t *));
+extern size_t fr_flushhtable __P((iplookupflush_t *));
+extern int fr_addhtent __P((iphtable_t *, iphtent_t *));
+extern int fr_delhtent __P((iphtable_t *, iphtent_t *));
+extern void fr_derefhtable __P((iphtable_t *));
+extern void fr_delhtable __P((iphtable_t *));
+extern void *fr_iphmfindgroup __P((void *, int, void *));
+extern int fr_iphmfindip __P((void *, int, void *));
+extern int fr_gethtablestat __P((iplookupop_t *));
+
+#endif /* __IP_HTABLE_H__ */
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_ipsec_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_ipsec_pxy.c
new file mode 100644
index 0000000000..aa7e1afd4d
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_ipsec_pxy.c
@@ -0,0 +1,346 @@
+/*
+ * Copyright (C) 2001-2003 by Darren Reed
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * Simple ISAKMP transparent proxy for in-kernel use. For use with the NAT
+ * code.
+ *
+ * $Id: ip_ipsec_pxy.c,v 2.20.2.7 2005/07/15 21:56:50 darrenr Exp $
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#define IPF_IPSEC_PROXY
+
+
+int ippr_ipsec_init __P((void));
+void ippr_ipsec_fini __P((void));
+int ippr_ipsec_new __P((fr_info_t *, ap_session_t *, nat_t *));
+void ippr_ipsec_del __P((ap_session_t *));
+int ippr_ipsec_inout __P((fr_info_t *, ap_session_t *, nat_t *));
+int ippr_ipsec_match __P((fr_info_t *, ap_session_t *, nat_t *));
+
+static frentry_t ipsecfr;
+static ipftq_t *ipsecnattqe;
+static ipftq_t *ipsecstatetqe;
+static char ipsec_buffer[1500];
+
+int ipsec_proxy_init = 0;
+int ipsec_proxy_ttl = 60;
+
+/*
+ * IPSec application proxy initialization.
+ */
+int ippr_ipsec_init()
+{
+ bzero((char *)&ipsecfr, sizeof(ipsecfr));
+ ipsecfr.fr_ref = 1;
+ ipsecfr.fr_flags = FR_OUTQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE;
+ MUTEX_INIT(&ipsecfr.fr_lock, "IPsec proxy rule lock");
+ ipsec_proxy_init = 1;
+
+ ipsecnattqe = fr_addtimeoutqueue(&nat_utqe, ipsec_proxy_ttl);
+ if (ipsecnattqe == NULL)
+ return -1;
+ ipsecstatetqe = fr_addtimeoutqueue(&ips_utqe, ipsec_proxy_ttl);
+ if (ipsecstatetqe == NULL) {
+ if (fr_deletetimeoutqueue(ipsecnattqe) == 0)
+ fr_freetimeoutqueue(ipsecnattqe);
+ ipsecnattqe = NULL;
+ return -1;
+ }
+
+ ipsecnattqe->ifq_flags |= IFQF_PROXY;
+ ipsecstatetqe->ifq_flags |= IFQF_PROXY;
+
+ ipsecfr.fr_age[0] = ipsec_proxy_ttl;
+ ipsecfr.fr_age[1] = ipsec_proxy_ttl;
+ return 0;
+}
+
+
+void ippr_ipsec_fini()
+{
+ if (ipsecnattqe != NULL) {
+ if (fr_deletetimeoutqueue(ipsecnattqe) == 0)
+ fr_freetimeoutqueue(ipsecnattqe);
+ }
+ ipsecnattqe = NULL;
+ if (ipsecstatetqe != NULL) {
+ if (fr_deletetimeoutqueue(ipsecstatetqe) == 0)
+ fr_freetimeoutqueue(ipsecstatetqe);
+ }
+ ipsecstatetqe = NULL;
+
+ if (ipsec_proxy_init == 1) {
+ MUTEX_DESTROY(&ipsecfr.fr_lock);
+ ipsec_proxy_init = 0;
+ }
+}
+
+
+/*
+ * Setup for a new IPSEC proxy.
+ */
+int ippr_ipsec_new(fin, aps, nat)
+fr_info_t *fin;
+ap_session_t *aps;
+nat_t *nat;
+{
+ ipsec_pxy_t *ipsec;
+ fr_info_t fi;
+ ipnat_t *ipn;
+ char *ptr;
+ int p, off, dlen, ttl;
+ mb_t *m;
+ ip_t *ip;
+
+ off = fin->fin_plen - fin->fin_dlen + fin->fin_ipoff;
+ bzero(ipsec_buffer, sizeof(ipsec_buffer));
+ ip = fin->fin_ip;
+ m = fin->fin_m;
+
+ dlen = M_LEN(m) - off;
+ if (dlen < 16)
+ return -1;
+ COPYDATA(m, off, MIN(sizeof(ipsec_buffer), dlen), ipsec_buffer);
+
+ if (nat_outlookup(fin, 0, IPPROTO_ESP, nat->nat_inip,
+ ip->ip_dst) != NULL)
+ return -1;
+
+ aps->aps_psiz = sizeof(*ipsec);
+ KMALLOCS(aps->aps_data, ipsec_pxy_t *, sizeof(*ipsec));
+ if (aps->aps_data == NULL)
+ return -1;
+
+ ipsec = aps->aps_data;
+ bzero((char *)ipsec, sizeof(*ipsec));
+
+ /*
+ * Create NAT rule against which the tunnel/transport mapping is
+ * created. This is required because the current NAT rule does not
+ * describe ESP but UDP instead.
+ */
+ ipn = &ipsec->ipsc_rule;
+ ttl = IPF_TTLVAL(ipsecnattqe->ifq_ttl);
+ ipn->in_tqehead[0] = fr_addtimeoutqueue(&nat_utqe, ttl);
+ ipn->in_tqehead[1] = fr_addtimeoutqueue(&nat_utqe, ttl);
+ ipn->in_ifps[0] = fin->fin_ifp;
+ ipn->in_apr = NULL;
+ ipn->in_use = 1;
+ ipn->in_hits = 1;
+ ipn->in_nip = ntohl(nat->nat_outip.s_addr);
+ ipn->in_ippip = 1;
+ ipn->in_inip = nat->nat_inip.s_addr;
+ ipn->in_inmsk = 0xffffffff;
+ ipn->in_outip = fin->fin_saddr;
+ ipn->in_outmsk = nat->nat_outip.s_addr;
+ ipn->in_srcip = fin->fin_saddr;
+ ipn->in_srcmsk = 0xffffffff;
+ ipn->in_redir = NAT_MAP;
+ bcopy(nat->nat_ptr->in_ifnames[0], ipn->in_ifnames[0],
+ sizeof(ipn->in_ifnames[0]));
+ ipn->in_p = IPPROTO_ESP;
+
+ bcopy((char *)fin, (char *)&fi, sizeof(fi));
+ fi.fin_state = NULL;
+ fi.fin_nat = NULL;
+ fi.fin_fi.fi_p = IPPROTO_ESP;
+ fi.fin_fr = &ipsecfr;
+ fi.fin_data[0] = 0;
+ fi.fin_data[1] = 0;
+ p = ip->ip_p;
+ ip->ip_p = IPPROTO_ESP;
+ fi.fin_flx &= ~(FI_TCPUDP|FI_STATE|FI_FRAG);
+ fi.fin_flx |= FI_IGNORE;
+
+ ptr = ipsec_buffer;
+ bcopy(ptr, (char *)ipsec->ipsc_icookie, sizeof(ipsec_cookie_t));
+ ptr += sizeof(ipsec_cookie_t);
+ bcopy(ptr, (char *)ipsec->ipsc_rcookie, sizeof(ipsec_cookie_t));
+ /*
+ * The responder cookie should only be non-zero if the initiator
+ * cookie is non-zero. Therefore, it is safe to assume(!) that the
+ * cookies are both set after copying if the responder is non-zero.
+ */
+ if ((ipsec->ipsc_rcookie[0]|ipsec->ipsc_rcookie[1]) != 0)
+ ipsec->ipsc_rckset = 1;
+
+ ipsec->ipsc_nat = nat_new(&fi, ipn, &ipsec->ipsc_nat,
+ NAT_SLAVE|SI_WILDP, NAT_OUTBOUND);
+ if (ipsec->ipsc_nat != NULL) {
+ (void) nat_proto(&fi, ipsec->ipsc_nat, 0);
+ nat_update(&fi, ipsec->ipsc_nat, ipn);
+
+ fi.fin_data[0] = 0;
+ fi.fin_data[1] = 0;
+ ipsec->ipsc_state = fr_addstate(&fi, &ipsec->ipsc_state,
+ SI_WILDP);
+ if (fi.fin_state != NULL)
+ fr_statederef(&fi, (ipstate_t **)&fi.fin_state);
+ }
+ ip->ip_p = p & 0xff;
+ return 0;
+}
+
+
+/*
+ * For outgoing IKE packets. refresh timeouts for NAT & state entries, if
+ * we can. If they have disappeared, recreate them.
+ */
+int ippr_ipsec_inout(fin, aps, nat)
+fr_info_t *fin;
+ap_session_t *aps;
+nat_t *nat;
+{
+ ipsec_pxy_t *ipsec;
+ fr_info_t fi;
+ ip_t *ip;
+ int p;
+
+ if ((fin->fin_out == 1) && (nat->nat_dir == NAT_INBOUND))
+ return 0;
+
+ if ((fin->fin_out == 0) && (nat->nat_dir == NAT_OUTBOUND))
+ return 0;
+
+ ipsec = aps->aps_data;
+
+ if (ipsec != NULL) {
+ ip = fin->fin_ip;
+ p = ip->ip_p;
+
+ if ((ipsec->ipsc_nat == NULL) || (ipsec->ipsc_state == NULL)) {
+ bcopy((char *)fin, (char *)&fi, sizeof(fi));
+ fi.fin_state = NULL;
+ fi.fin_nat = NULL;
+ fi.fin_fi.fi_p = IPPROTO_ESP;
+ fi.fin_fr = &ipsecfr;
+ fi.fin_data[0] = 0;
+ fi.fin_data[1] = 0;
+ ip->ip_p = IPPROTO_ESP;
+ fi.fin_flx &= ~(FI_TCPUDP|FI_STATE|FI_FRAG);
+ fi.fin_flx |= FI_IGNORE;
+ }
+
+ /*
+ * Update NAT timeout/create NAT if missing.
+ */
+ if (ipsec->ipsc_nat != NULL)
+ fr_queueback(&ipsec->ipsc_nat->nat_tqe);
+ else {
+ ipsec->ipsc_nat = nat_new(&fi, &ipsec->ipsc_rule,
+ &ipsec->ipsc_nat,
+ NAT_SLAVE|SI_WILDP,
+ nat->nat_dir);
+ if (ipsec->ipsc_nat != NULL) {
+ (void) nat_proto(&fi, ipsec->ipsc_nat, 0);
+ nat_update(&fi, ipsec->ipsc_nat,
+ &ipsec->ipsc_rule);
+ }
+ }
+
+ /*
+ * Update state timeout/create state if missing.
+ */
+ READ_ENTER(&ipf_state);
+ if (ipsec->ipsc_state != NULL) {
+ fr_queueback(&ipsec->ipsc_state->is_sti);
+ ipsec->ipsc_state->is_die = nat->nat_age;
+ RWLOCK_EXIT(&ipf_state);
+ } else {
+ RWLOCK_EXIT(&ipf_state);
+ fi.fin_data[0] = 0;
+ fi.fin_data[1] = 0;
+ ipsec->ipsc_state = fr_addstate(&fi,
+ &ipsec->ipsc_state,
+ SI_WILDP);
+ if (fi.fin_state != NULL)
+ fr_statederef(&fi, (ipstate_t **)&fi.fin_state);
+ }
+ ip->ip_p = p;
+ }
+ return 0;
+}
+
+
+/*
+ * This extends the NAT matching to be based on the cookies associated with
+ * a session and found at the front of IKE packets. The cookies are always
+ * in the same order (not reversed depending on packet flow direction as with
+ * UDP/TCP port numbers).
+ */
+int ippr_ipsec_match(fin, aps, nat)
+fr_info_t *fin;
+ap_session_t *aps;
+nat_t *nat;
+{
+ ipsec_pxy_t *ipsec;
+ u_32_t cookies[4];
+ mb_t *m;
+ int off;
+
+ nat = nat; /* LINT */
+
+ if ((fin->fin_dlen < sizeof(cookies)) || (fin->fin_flx & FI_FRAG))
+ return -1;
+
+ off = fin->fin_plen - fin->fin_dlen + fin->fin_ipoff;
+ ipsec = aps->aps_data;
+ m = fin->fin_m;
+ COPYDATA(m, off, sizeof(cookies), (char *)cookies);
+
+ if ((cookies[0] != ipsec->ipsc_icookie[0]) ||
+ (cookies[1] != ipsec->ipsc_icookie[1]))
+ return -1;
+
+ if (ipsec->ipsc_rckset == 0) {
+ if ((cookies[2]|cookies[3]) == 0) {
+ return 0;
+ }
+ ipsec->ipsc_rckset = 1;
+ ipsec->ipsc_rcookie[0] = cookies[2];
+ ipsec->ipsc_rcookie[1] = cookies[3];
+ return 0;
+ }
+
+ if ((cookies[2] != ipsec->ipsc_rcookie[0]) ||
+ (cookies[3] != ipsec->ipsc_rcookie[1]))
+ return -1;
+ return 0;
+}
+
+
+/*
+ * clean up after ourselves.
+ */
+void ippr_ipsec_del(aps)
+ap_session_t *aps;
+{
+ ipsec_pxy_t *ipsec;
+
+ ipsec = aps->aps_data;
+
+ if (ipsec != NULL) {
+ /*
+ * Don't bother changing any of the NAT structure details,
+ * *_del() is on a callback from aps_free(), from nat_delete()
+ */
+
+ READ_ENTER(&ipf_state);
+ if (ipsec->ipsc_state != NULL) {
+ ipsec->ipsc_state->is_die = fr_ticks + 1;
+ ipsec->ipsc_state->is_me = NULL;
+ fr_queuefront(&ipsec->ipsc_state->is_sti);
+ }
+ RWLOCK_EXIT(&ipf_state);
+
+ ipsec->ipsc_state = NULL;
+ ipsec->ipsc_nat = NULL;
+ }
+}
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_irc_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_irc_pxy.c
new file mode 100644
index 0000000000..4b7a139048
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_irc_pxy.c
@@ -0,0 +1,438 @@
+/*
+ * Copyright (C) 2000-2003 Darren Reed
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * $Id: ip_irc_pxy.c,v 2.39.2.4 2005/02/04 10:22:55 darrenr Exp $
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#define IPF_IRC_PROXY
+
+#define IPF_IRCBUFSZ 96 /* This *MUST* be >= 64! */
+
+
+int ippr_irc_init __P((void));
+void ippr_irc_fini __P((void));
+int ippr_irc_new __P((fr_info_t *, ap_session_t *, nat_t *));
+int ippr_irc_out __P((fr_info_t *, ap_session_t *, nat_t *));
+int ippr_irc_send __P((fr_info_t *, nat_t *));
+int ippr_irc_complete __P((ircinfo_t *, char *, size_t));
+u_short ipf_irc_atoi __P((char **));
+
+static frentry_t ircnatfr;
+
+int irc_proxy_init = 0;
+
+
+/*
+ * Initialize local structures.
+ */
+int ippr_irc_init()
+{
+ bzero((char *)&ircnatfr, sizeof(ircnatfr));
+ ircnatfr.fr_ref = 1;
+ ircnatfr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE;
+ MUTEX_INIT(&ircnatfr.fr_lock, "IRC proxy rule lock");
+ irc_proxy_init = 1;
+
+ return 0;
+}
+
+
+void ippr_irc_fini()
+{
+ if (irc_proxy_init == 1) {
+ MUTEX_DESTROY(&ircnatfr.fr_lock);
+ irc_proxy_init = 0;
+ }
+}
+
+
+char *ippr_irc_dcctypes[] = {
+ "CHAT ", /* CHAT chat ipnumber portnumber */
+ "SEND ", /* SEND filename ipnumber portnumber */
+ "MOVE ",
+ "TSEND ",
+ "SCHAT ",
+ NULL,
+};
+
+
+/*
+ * :A PRIVMSG B :^ADCC CHAT chat 0 0^A\r\n
+ * PRIVMSG B ^ADCC CHAT chat 0 0^A\r\n
+ */
+
+
+int ippr_irc_complete(ircp, buf, len)
+ircinfo_t *ircp;
+char *buf;
+size_t len;
+{
+ register char *s, c;
+ register size_t i;
+ u_32_t l;
+ int j, k;
+
+ ircp->irc_ipnum = 0;
+ ircp->irc_port = 0;
+
+ if (len < 31)
+ return 0;
+ s = buf;
+ c = *s++;
+ i = len - 1;
+
+ if ((c != ':') && (c != 'P'))
+ return 0;
+
+ if (c == ':') {
+ /*
+ * Loosely check that the source is a nickname of some sort
+ */
+ s++;
+ c = *s;
+ ircp->irc_snick = s;
+ if (!ISALPHA(c))
+ return 0;
+ i--;
+ for (c = *s; !ISSPACE(c) && (i > 0); i--)
+ c = *s++;
+ if (i < 31)
+ return 0;
+ if (c != 'P')
+ return 0;
+ } else
+ ircp->irc_snick = NULL;
+
+ /*
+ * Check command string
+ */
+ if (strncmp(s, "PRIVMSG ", 8))
+ return 0;
+ i -= 8;
+ s += 8;
+ c = *s;
+ ircp->irc_dnick = s;
+
+ /*
+ * Loosely check that the destination is a nickname of some sort
+ */
+ if (!ISALPHA(c))
+ return 0;
+ for (; !ISSPACE(c) && (i > 0); i--)
+ c = *s++;
+ if (i < 20)
+ return 0;
+ s++,
+ i--;
+
+ /*
+ * Look for a ^A to start the DCC
+ */
+ c = *s;
+ if (c == ':') {
+ s++;
+ c = *s;
+ }
+
+ if (strncmp(s, "\001DCC ", 4))
+ return 0;
+
+ i -= 4;
+ s += 4;
+
+ /*
+ * Check for a recognised DCC command
+ */
+ for (j = 0, k = 0; ippr_irc_dcctypes[j]; j++) {
+ k = MIN(strlen(ippr_irc_dcctypes[j]), i);
+ if (!strncmp(ippr_irc_dcctypes[j], s, k))
+ break;
+ }
+ if (!ippr_irc_dcctypes[j])
+ return 0;
+
+ ircp->irc_type = s;
+ i -= k;
+ s += k;
+
+ if (i < 11)
+ return 0;
+
+ /*
+ * Check for the arg
+ */
+ c = *s;
+ if (ISSPACE(c))
+ return 0;
+ ircp->irc_arg = s;
+ for (; (c != ' ') && (c != '\001') && (i > 0); i--)
+ c = *s++;
+
+ if (c == '\001') /* In reality a ^A can quote another ^A...*/
+ return 0;
+
+ if (i < 5)
+ return 0;
+
+ s++;
+ i--;
+ c = *s;
+ if (!ISDIGIT(c))
+ return 0;
+ ircp->irc_addr = s;
+ /*
+ * Get the IP#
+ */
+ for (l = 0; ISDIGIT(c) && (i > 0); i--) {
+ l *= 10;
+ l += c - '0';
+ c = *s++;
+ }
+
+ if (i < 4)
+ return 0;
+
+ if (c != ' ')
+ return 0;
+
+ ircp->irc_ipnum = l;
+ s++;
+ i--;
+ c = *s;
+ if (!ISDIGIT(c))
+ return 0;
+ /*
+ * Get the port#
+ */
+ for (l = 0; ISDIGIT(c) && (i > 0); i--) {
+ l *= 10;
+ l += c - '0';
+ c = *s++;
+ }
+ if (i < 3)
+ return 0;
+ if (strncmp(s, "\001\r\n", 3))
+ return 0;
+ s += 3;
+ ircp->irc_len = s - buf;
+ ircp->irc_port = l;
+ return 1;
+}
+
+
+int ippr_irc_new(fin, aps, nat)
+fr_info_t *fin;
+ap_session_t *aps;
+nat_t *nat;
+{
+ ircinfo_t *irc;
+
+ KMALLOC(irc, ircinfo_t *);
+ if (irc == NULL)
+ return -1;
+
+ fin = fin; /* LINT */
+ nat = nat; /* LINT */
+
+ aps->aps_data = irc;
+ aps->aps_psiz = sizeof(ircinfo_t);
+
+ bzero((char *)irc, sizeof(*irc));
+ return 0;
+}
+
+
+int ippr_irc_send(fin, nat)
+fr_info_t *fin;
+nat_t *nat;
+{
+ char ctcpbuf[IPF_IRCBUFSZ], newbuf[IPF_IRCBUFSZ];
+ tcphdr_t *tcp, tcph, *tcp2 = &tcph;
+ int off, inc = 0, i, dlen;
+ size_t nlen = 0, olen;
+ struct in_addr swip;
+ u_short a5, sp;
+ ircinfo_t *irc;
+ fr_info_t fi;
+ nat_t *nat2;
+ u_int a1;
+ ip_t *ip;
+ mb_t *m;
+#ifdef MENTAT
+ mb_t *m1;
+#endif
+
+ m = fin->fin_m;
+ ip = fin->fin_ip;
+ tcp = (tcphdr_t *)fin->fin_dp;
+ bzero(ctcpbuf, sizeof(ctcpbuf));
+ off = (char *)tcp - (char *)ip + (TCP_OFF(tcp) << 2) + fin->fin_ipoff;
+
+#ifdef __sgi
+ dlen = fin->fin_plen - off;
+#else
+ dlen = MSGDSIZE(m) - off;
+#endif
+ if (dlen <= 0)
+ return 0;
+ COPYDATA(m, off, MIN(sizeof(ctcpbuf), dlen), ctcpbuf);
+
+ if (dlen <= 0)
+ return 0;
+ ctcpbuf[sizeof(ctcpbuf) - 1] = '\0';
+ *newbuf = '\0';
+
+ irc = nat->nat_aps->aps_data;
+ if (ippr_irc_complete(irc, ctcpbuf, dlen) == 0)
+ return 0;
+
+ /*
+ * check that IP address in the PORT/PASV reply is the same as the
+ * sender of the command - prevents using PORT for port scanning.
+ */
+ if (irc->irc_ipnum != ntohl(nat->nat_inip.s_addr))
+ return 0;
+
+ a5 = irc->irc_port;
+
+ /*
+ * Calculate new address parts for the DCC command
+ */
+ a1 = ntohl(ip->ip_src.s_addr);
+ olen = irc->irc_len;
+ i = irc->irc_addr - ctcpbuf;
+ i++;
+ (void) strncpy(newbuf, ctcpbuf, i);
+ /* DO NOT change these! */
+#if defined(SNPRINTF) && defined(KERNEL)
+ (void) SNPRINTF(newbuf, sizeof(newbuf) - i, "%u %u\001\r\n", a1, a5);
+#else
+ (void) sprintf(newbuf, "%u %u\001\r\n", a1, a5);
+#endif
+
+ nlen = strlen(newbuf);
+ inc = nlen - olen;
+
+ if ((inc + ip->ip_len) > 65535)
+ return 0;
+
+#ifdef MENTAT
+ for (m1 = m; m1->b_cont; m1 = m1->b_cont)
+ ;
+ if ((inc > 0) && (m1->b_datap->db_lim - m1->b_wptr < inc)) {
+ mblk_t *nm;
+
+ /* alloc enough to keep same trailer space for lower driver */
+ nm = allocb(nlen, BPRI_MED);
+ PANIC((!nm),("ippr_irc_out: allocb failed"));
+
+ nm->b_band = m1->b_band;
+ nm->b_wptr += nlen;
+
+ m1->b_wptr -= olen;
+ PANIC((m1->b_wptr < m1->b_rptr),
+ ("ippr_irc_out: cannot handle fragmented data block"));
+
+ linkb(m1, nm);
+ } else {
+# if SOLARIS && defined(ICK_VALID)
+ if (m1->b_datap->db_struiolim == m1->b_wptr)
+ m1->b_datap->db_struiolim += inc;
+ m1->b_datap->db_struioflag &= ~STRUIO_IP;
+# endif
+ m1->b_wptr += inc;
+ }
+#else
+ if (inc < 0)
+ m_adj(m, inc);
+ /* the mbuf chain will be extended if necessary by m_copyback() */
+#endif
+ COPYBACK(m, off, nlen, newbuf);
+
+ if (inc != 0) {
+#if defined(MENTAT) || defined(__sgi)
+ register u_32_t sum1, sum2;
+
+ sum1 = ip->ip_len;
+ sum2 = ip->ip_len + inc;
+
+ /* Because ~1 == -2, We really need ~1 == -1 */
+ if (sum1 > sum2)
+ sum2--;
+ sum2 -= sum1;
+ sum2 = (sum2 & 0xffff) + (sum2 >> 16);
+
+ fix_outcksum(fin, &ip->ip_sum, sum2);
+#endif
+ ip->ip_len += inc;
+ }
+
+ /*
+ * Add skeleton NAT entry for connection which will come back the
+ * other way.
+ */
+ sp = htons(a5);
+ /*
+ * Don't allow the PORT command to specify a port < 1024 due to
+ * security crap.
+ */
+ if (ntohs(sp) < 1024)
+ return 0;
+
+ /*
+ * The server may not make the connection back from port 20, but
+ * it is the most likely so use it here to check for a conflicting
+ * mapping.
+ */
+ bcopy((caddr_t)fin, (caddr_t)&fi, sizeof(fi));
+ fi.fin_data[0] = sp;
+ fi.fin_data[1] = fin->fin_data[1];
+ nat2 = nat_outlookup(fin, IPN_TCP, nat->nat_p, nat->nat_inip,
+ ip->ip_dst);
+ if (nat2 == NULL) {
+ bcopy((caddr_t)fin, (caddr_t)&fi, sizeof(fi));
+ bzero((char *)tcp2, sizeof(*tcp2));
+ tcp2->th_win = htons(8192);
+ tcp2->th_sport = sp;
+ tcp2->th_dport = 0; /* XXX - don't specify remote port */
+ fi.fin_state = NULL;
+ fi.fin_nat = NULL;
+ fi.fin_data[0] = ntohs(sp);
+ fi.fin_data[1] = 0;
+ fi.fin_dp = (char *)tcp2;
+ fi.fin_fr = &ircnatfr;
+ fi.fin_dlen = sizeof(*tcp2);
+ fi.fin_plen = fi.fin_hlen + sizeof(*tcp2);
+ swip = ip->ip_src;
+ ip->ip_src = nat->nat_inip;
+ nat2 = nat_new(&fi, nat->nat_ptr, NULL,
+ NAT_SLAVE|IPN_TCP|SI_W_DPORT, NAT_OUTBOUND);
+ if (nat2 != NULL) {
+ (void) nat_proto(&fi, nat2, 0);
+ nat_update(&fi, nat2, nat2->nat_ptr);
+
+ (void) fr_addstate(&fi, NULL, SI_W_DPORT);
+ if (fi.fin_state != NULL)
+ fr_statederef(&fi, (ipstate_t **)&fi.fin_state);
+ }
+ ip->ip_src = swip;
+ }
+ return inc;
+}
+
+
+int ippr_irc_out(fin, aps, nat)
+fr_info_t *fin;
+ap_session_t *aps;
+nat_t *nat;
+{
+ aps = aps; /* LINT */
+ return ippr_irc_send(fin, nat);
+}
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_lookup.h b/usr/src/uts/common/inet/ipf/netinet/ip_lookup.h
new file mode 100644
index 0000000000..76cdd8fd0a
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_lookup.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 1993-2005 by Darren Reed.
+ * See the IPFILTER.LICENCE file for details on licencing.
+ */
+
+#ifndef __IP_LOOKUP_H__
+#define __IP_LOOKUP_H__
+
+#if defined(__STDC__) || defined(__GNUC__) || defined(_AIX51)
+# define SIOCLOOKUPADDTABLE _IOWR('r', 60, struct iplookupop)
+# define SIOCLOOKUPDELTABLE _IOWR('r', 61, struct iplookupop)
+# define SIOCLOOKUPSTAT _IOWR('r', 64, struct iplookupop)
+# define SIOCLOOKUPSTATW _IOW('r', 64, struct iplookupop)
+# define SIOCLOOKUPFLUSH _IOWR('r', 65, struct iplookupflush)
+# define SIOCLOOKUPADDNODE _IOWR('r', 67, struct iplookupop)
+# define SIOCLOOKUPADDNODEW _IOW('r', 67, struct iplookupop)
+# define SIOCLOOKUPDELNODE _IOWR('r', 68, struct iplookupop)
+# define SIOCLOOKUPDELNODEW _IOW('r', 68, struct iplookupop)
+#else
+# define SIOCLOOKUPADDTABLE _IOWR(r, 60, struct iplookupop)
+# define SIOCLOOKUPDELTABLE _IOWR(r, 61, struct iplookupop)
+# define SIOCLOOKUPSTAT _IOWR(r, 64, struct iplookupop)
+# define SIOCLOOKUPSTATW _IOW(r, 64, struct iplookupop)
+# define SIOCLOOKUPFLUSH _IOWR(r, 65, struct iplookupflush)
+# define SIOCLOOKUPADDNODE _IOWR(r, 67, struct iplookupop)
+# define SIOCLOOKUPADDNODEW _IOW(r, 67, struct iplookupop)
+# define SIOCLOOKUPDELNODE _IOWR(r, 68, struct iplookupop)
+# define SIOCLOOKUPDELNODEW _IOW(r, 68, struct iplookupop)
+#endif
+
+typedef struct iplookupop {
+ int iplo_type; /* IPLT_* */
+ int iplo_unit; /* IPL_LOG* */
+ u_int iplo_arg;
+ char iplo_name[FR_GROUPLEN];
+ size_t iplo_size; /* sizeof struct at iplo_struct */
+ void *iplo_struct;
+} iplookupop_t;
+
+typedef struct iplookupflush {
+ int iplf_type; /* IPLT_* */
+ int iplf_unit; /* IPL_LOG* */
+ u_int iplf_arg;
+ size_t iplf_count;
+ char iplf_name[FR_GROUPLEN];
+} iplookupflush_t;
+
+typedef struct iplookuplink {
+ int ipll_type; /* IPLT_* */
+ int ipll_unit; /* IPL_LOG* */
+ u_int ipll_num;
+ char ipll_group[FR_GROUPLEN];
+} iplookuplink_t;
+
+#define IPLT_ALL -1
+#define IPLT_NONE 0
+#define IPLT_POOL 1
+#define IPLT_HASH 2
+
+#define IPLT_ANON 0x80000000
+
+extern int ip_lookup_init __P((void));
+extern int ip_lookup_ioctl __P((caddr_t, ioctlcmd_t, int));
+extern void ip_lookup_unload __P((void));
+extern void ip_lookup_deref __P((int, void *));
+
+#endif /* __IP_LOOKUP_H__ */
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_nat.h b/usr/src/uts/common/inet/ipf/netinet/ip_nat.h
new file mode 100644
index 0000000000..930e8aa103
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_nat.h
@@ -0,0 +1,478 @@
+/*
+ * Copyright (C) 1995-2001, 2003 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * @(#)ip_nat.h 1.5 2/4/96
+ * $Id: ip_nat.h,v 2.90.2.11 2005/06/18 02:41:32 darrenr Exp $
+*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifndef __IP_NAT_H__
+#define __IP_NAT_H__
+
+#ifndef SOLARIS
+#define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#endif
+
+#if defined(__STDC__) || defined(__GNUC__) || defined(_AIX51)
+#define SIOCADNAT _IOW('r', 60, struct ipfobj)
+#define SIOCRMNAT _IOW('r', 61, struct ipfobj)
+#define SIOCGNATS _IOWR('r', 62, struct ipfobj)
+#define SIOCGNATL _IOWR('r', 63, struct ipfobj)
+#else
+#define SIOCADNAT _IOW(r, 60, struct ipfobj)
+#define SIOCRMNAT _IOW(r, 61, struct ipfobj)
+#define SIOCGNATS _IOWR(r, 62, struct ipfobj)
+#define SIOCGNATL _IOWR(r, 63, struct ipfobj)
+#endif
+
+#undef LARGE_NAT /* define this if you're setting up a system to NAT
+ * LARGE numbers of networks/hosts - i.e. in the
+ * hundreds or thousands. In such a case, you should
+ * also change the RDR_SIZE and NAT_SIZE below to more
+ * appropriate sizes. The figures below were used for
+ * a setup with 1000-2000 networks to NAT.
+ */
+#ifndef NAT_SIZE
+# ifdef LARGE_NAT
+# define NAT_SIZE 2047
+# else
+# define NAT_SIZE 127
+# endif
+#endif
+#ifndef RDR_SIZE
+# ifdef LARGE_NAT
+# define RDR_SIZE 2047
+# else
+# define RDR_SIZE 127
+# endif
+#endif
+#ifndef HOSTMAP_SIZE
+# ifdef LARGE_NAT
+# define HOSTMAP_SIZE 8191
+# else
+# define HOSTMAP_SIZE 2047
+# endif
+#endif
+#ifndef NAT_TABLE_MAX
+/*
+ * This is newly introduced and for the sake of "least surprise", the numbers
+ * present aren't what we'd normally use for creating a proper hash table.
+ */
+# ifdef LARGE_NAT
+# define NAT_TABLE_MAX 180000
+# else
+# define NAT_TABLE_MAX 30000
+# endif
+#endif
+#ifndef NAT_TABLE_SZ
+# ifdef LARGE_NAT
+# define NAT_TABLE_SZ 16383
+# else
+# define NAT_TABLE_SZ 2047
+# endif
+#endif
+#ifndef APR_LABELLEN
+#define APR_LABELLEN 16
+#endif
+#define NAT_HW_CKSUM 0x80000000
+
+#define DEF_NAT_AGE 1200 /* 10 minutes (600 seconds) */
+
+struct ipstate;
+struct ap_session;
+
+typedef struct nat {
+ ipfmutex_t nat_lock;
+ struct nat *nat_next;
+ struct nat **nat_pnext;
+ struct nat *nat_hnext[2];
+ struct nat **nat_phnext[2];
+ struct hostmap *nat_hm;
+ void *nat_data;
+ struct nat **nat_me;
+ struct ipstate *nat_state;
+ struct ap_session *nat_aps; /* proxy session */
+ frentry_t *nat_fr; /* filter rule ptr if appropriate */
+ struct ipnat *nat_ptr; /* pointer back to the rule */
+ void *nat_ifps[2];
+ void *nat_sync;
+ ipftqent_t nat_tqe;
+ u_32_t nat_flags;
+ u_32_t nat_sumd[2]; /* ip checksum delta for data segment*/
+ u_32_t nat_ipsumd; /* ip checksum delta for ip header */
+ u_32_t nat_mssclamp; /* if != zero clamp MSS to this */
+ i6addr_t nat_inip6;
+ i6addr_t nat_outip6;
+ i6addr_t nat_oip6; /* other ip */
+ U_QUAD_T nat_pkts[2];
+ U_QUAD_T nat_bytes[2];
+ union {
+ udpinfo_t nat_unu;
+ tcpinfo_t nat_unt;
+ icmpinfo_t nat_uni;
+ greinfo_t nat_ugre;
+ } nat_un;
+ u_short nat_oport; /* other port */
+ u_short nat_use;
+ u_char nat_p; /* protocol for NAT */
+ int nat_dir;
+ int nat_ref; /* reference count */
+ int nat_hv[2];
+ char nat_ifnames[2][LIFNAMSIZ];
+ int nat_rev; /* 0 = forward, 1 = reverse */
+} nat_t;
+
+#define nat_inip nat_inip6.in4
+#define nat_outip nat_outip6.in4
+#define nat_oip nat_oip6.in4
+#define nat_age nat_tqe.tqe_die
+#define nat_inport nat_un.nat_unt.ts_sport
+#define nat_outport nat_un.nat_unt.ts_dport
+#define nat_type nat_un.nat_uni.ici_type
+#define nat_seq nat_un.nat_uni.ici_seq
+#define nat_id nat_un.nat_uni.ici_id
+#define nat_tcpstate nat_tqe.tqe_state
+
+/*
+ * Values for nat_dir
+ */
+#define NAT_INBOUND 0
+#define NAT_OUTBOUND 1
+
+/*
+ * Definitions for nat_flags
+ */
+#define NAT_TCP 0x0001 /* IPN_TCP */
+#define NAT_UDP 0x0002 /* IPN_UDP */
+#define NAT_ICMPERR 0x0004 /* IPN_ICMPERR */
+#define NAT_ICMPQUERY 0x0008 /* IPN_ICMPQUERY */
+#define NAT_SEARCH 0x0010
+#define NAT_SLAVE 0x0020 /* Slave connection for a proxy */
+#define NAT_NOTRULEPORT 0x0040
+
+#define NAT_TCPUDP (NAT_TCP|NAT_UDP)
+#define NAT_TCPUDPICMP (NAT_TCP|NAT_UDP|NAT_ICMPERR)
+#define NAT_TCPUDPICMPQ (NAT_TCP|NAT_UDP|NAT_ICMPQUERY)
+#define NAT_FROMRULE (NAT_TCP|NAT_UDP)
+
+/* 0x0100 reserved for FI_W_SPORT */
+/* 0x0200 reserved for FI_W_DPORT */
+/* 0x0400 reserved for FI_W_SADDR */
+/* 0x0800 reserved for FI_W_DADDR */
+/* 0x1000 reserved for FI_W_NEWFR */
+/* 0x2000 reserved for SI_CLONE */
+/* 0x4000 reserved for SI_CLONED */
+/* 0x8000 reserved for SI_IGNOREPKT */
+
+#define NAT_DEBUG 0x800000
+
+typedef struct ipnat {
+ struct ipnat *in_next; /* NAT rule list next */
+ struct ipnat *in_rnext; /* rdr rule hash next */
+ struct ipnat **in_prnext; /* prior rdr next ptr */
+ struct ipnat *in_mnext; /* map rule hash next */
+ struct ipnat **in_pmnext; /* prior map next ptr */
+ struct ipftq *in_tqehead[2];
+ void *in_ifps[2];
+ void *in_apr;
+ char *in_comment;
+ i6addr_t in_next6;
+ u_long in_space;
+ u_long in_hits;
+ u_int in_use;
+ u_int in_hv;
+ int in_flineno; /* conf. file line number */
+ u_short in_pnext;
+ u_char in_v;
+ u_char in_xxx;
+ /* From here to the end is covered by IPN_CMPSIZ */
+ u_32_t in_flags;
+ u_32_t in_mssclamp; /* if != 0 clamp MSS to this */
+ u_int in_age[2];
+ int in_redir; /* see below for values */
+ int in_p; /* protocol. */
+ i6addr_t in_in[2];
+ i6addr_t in_out[2];
+ i6addr_t in_src[2];
+ frtuc_t in_tuc;
+ u_short in_port[2];
+ u_short in_ppip; /* ports per IP. */
+ u_short in_ippip; /* IP #'s per IP# */
+ char in_ifnames[2][LIFNAMSIZ];
+ char in_plabel[APR_LABELLEN]; /* proxy label. */
+ ipftag_t in_tag;
+} ipnat_t;
+
+#define in_pmin in_port[0] /* Also holds static redir port */
+#define in_pmax in_port[1]
+#define in_nextip in_next6.in4
+#define in_nip in_next6.in4.s_addr
+#define in_inip in_in[0].in4.s_addr
+#define in_inmsk in_in[1].in4.s_addr
+#define in_outip in_out[0].in4.s_addr
+#define in_outmsk in_out[1].in4.s_addr
+#define in_srcip in_src[0].in4.s_addr
+#define in_srcmsk in_src[1].in4.s_addr
+#define in_scmp in_tuc.ftu_scmp
+#define in_dcmp in_tuc.ftu_dcmp
+#define in_stop in_tuc.ftu_stop
+#define in_dtop in_tuc.ftu_dtop
+#define in_sport in_tuc.ftu_sport
+#define in_dport in_tuc.ftu_dport
+
+/*
+ * Bit definitions for in_flags
+ */
+#define IPN_ANY 0x00000
+#define IPN_TCP 0x00001
+#define IPN_UDP 0x00002
+#define IPN_TCPUDP (IPN_TCP|IPN_UDP)
+#define IPN_ICMPERR 0x00004
+#define IPN_TCPUDPICMP (IPN_TCP|IPN_UDP|IPN_ICMPERR)
+#define IPN_ICMPQUERY 0x00008
+#define IPN_TCPUDPICMPQ (IPN_TCP|IPN_UDP|IPN_ICMPQUERY)
+#define IPN_RF (IPN_TCPUDP|IPN_DELETE|IPN_ICMPERR)
+#define IPN_AUTOPORTMAP 0x00010
+#define IPN_IPRANGE 0x00020
+#define IPN_FILTER 0x00040
+#define IPN_SPLIT 0x00080
+#define IPN_ROUNDR 0x00100
+#define IPN_NOTSRC 0x04000
+#define IPN_NOTDST 0x08000
+#define IPN_DYNSRCIP 0x10000 /* dynamic src IP# */
+#define IPN_DYNDSTIP 0x20000 /* dynamic dst IP# */
+#define IPN_DELETE 0x40000
+#define IPN_STICKY 0x80000
+#define IPN_FRAG 0x100000
+#define IPN_FIXEDDPORT 0x200000
+#define IPN_FINDFORWARD 0x400000
+#define IPN_IN 0x800000
+#define IPN_USERFLAGS (IPN_TCPUDP|IPN_AUTOPORTMAP|IPN_IPRANGE|IPN_SPLIT|\
+ IPN_ROUNDR|IPN_FILTER|IPN_NOTSRC|IPN_NOTDST|\
+ IPN_FRAG|IPN_STICKY|IPN_FIXEDDPORT|IPN_ICMPQUERY)
+
+/*
+ * Values for in_redir
+ */
+#define NAT_MAP 0x01
+#define NAT_REDIRECT 0x02
+#define NAT_BIMAP (NAT_MAP|NAT_REDIRECT)
+#define NAT_MAPBLK 0x04
+
+#define MAPBLK_MINPORT 1024 /* don't use reserved ports for src port */
+#define USABLE_PORTS (65536 - MAPBLK_MINPORT)
+
+#define IPN_CMPSIZ (sizeof(ipnat_t) - offsetof(ipnat_t, in_flags))
+
+typedef struct natlookup {
+ struct in_addr nl_inip;
+ struct in_addr nl_outip;
+ struct in_addr nl_realip;
+ int nl_flags;
+ u_short nl_inport;
+ u_short nl_outport;
+ u_short nl_realport;
+} natlookup_t;
+
+
+typedef struct nat_save {
+ void *ipn_next;
+ struct nat ipn_nat;
+ struct ipnat ipn_ipnat;
+ struct frentry ipn_fr;
+ int ipn_dsize;
+ char ipn_data[4];
+} nat_save_t;
+
+#define ipn_rule ipn_nat.nat_fr
+
+typedef struct natget {
+ void *ng_ptr;
+ int ng_sz;
+} natget_t;
+
+
+#undef tr_flags
+typedef struct nattrpnt {
+ struct in_addr tr_dstip; /* real destination IP# */
+ struct in_addr tr_srcip; /* real source IP# */
+ struct in_addr tr_locip; /* local source IP# */
+ u_int tr_flags;
+ int tr_expire;
+ u_short tr_dstport; /* real destination port# */
+ u_short tr_srcport; /* real source port# */
+ u_short tr_locport; /* local source port# */
+ struct nattrpnt *tr_hnext;
+ struct nattrpnt **tr_phnext;
+ struct nattrpnt *tr_next;
+ struct nattrpnt **tr_pnext; /* previous next */
+} nattrpnt_t;
+
+#define TN_CMPSIZ offsetof(nattrpnt_t, tr_hnext)
+
+
+/*
+ * This structure gets used to help NAT sessions keep the same NAT rule (and
+ * thus translation for IP address) when:
+ * (a) round-robin redirects are in use
+ * (b) different IP add
+ */
+typedef struct hostmap {
+ struct hostmap *hm_next;
+ struct hostmap **hm_pnext;
+ struct ipnat *hm_ipnat;
+ struct in_addr hm_srcip;
+ struct in_addr hm_dstip;
+ struct in_addr hm_mapip;
+ u_32_t hm_port;
+ int hm_ref;
+} hostmap_t;
+
+
+/*
+ * Structure used to pass information in to nat_newmap and nat_newrdr.
+ */
+typedef struct natinfo {
+ ipnat_t *nai_np;
+ u_32_t nai_sum1;
+ u_32_t nai_sum2;
+ u_32_t nai_nflags;
+ u_32_t nai_flags;
+ struct in_addr nai_ip;
+ u_short nai_port;
+ u_short nai_nport;
+ u_short nai_sport;
+ u_short nai_dport;
+} natinfo_t;
+
+
+typedef struct natstat {
+ u_long ns_mapped[2];
+ u_long ns_rules;
+ u_long ns_added;
+ u_long ns_expire;
+ u_long ns_inuse;
+ u_long ns_logged;
+ u_long ns_logfail;
+ u_long ns_memfail;
+ u_long ns_badnat;
+ u_long ns_addtrpnt;
+ nat_t **ns_table[2];
+ hostmap_t **ns_maptable;
+ ipnat_t *ns_list;
+ void *ns_apslist;
+ u_int ns_wilds;
+ u_int ns_nattab_sz;
+ u_int ns_nattab_max;
+ u_int ns_rultab_sz;
+ u_int ns_rdrtab_sz;
+ u_int ns_trpntab_sz;
+ u_int ns_hostmap_sz;
+ nat_t *ns_instances;
+ nattrpnt_t *ns_trpntlist;
+ u_long *ns_bucketlen[2];
+} natstat_t;
+
+typedef struct natlog {
+ struct in_addr nl_origip;
+ struct in_addr nl_outip;
+ struct in_addr nl_inip;
+ u_short nl_origport;
+ u_short nl_outport;
+ u_short nl_inport;
+ u_short nl_type;
+ int nl_rule;
+ U_QUAD_T nl_pkts[2];
+ U_QUAD_T nl_bytes[2];
+ u_char nl_p;
+} natlog_t;
+
+
+#define NL_NEWMAP NAT_MAP
+#define NL_NEWRDR NAT_REDIRECT
+#define NL_NEWBIMAP NAT_BIMAP
+#define NL_NEWBLOCK NAT_MAPBLK
+#define NL_CLONE 0xfffd
+#define NL_FLUSH 0xfffe
+#define NL_EXPIRE 0xffff
+
+#define NAT_HASH_FN(k,l,m) (((k) + ((k) >> 12) + l) % (m))
+
+#define LONG_SUM(in) (((in) & 0xffff) + ((in) >> 16))
+
+#define CALC_SUMD(s1, s2, sd) { \
+ (s1) = ((s1) & 0xffff) + ((s1) >> 16); \
+ (s2) = ((s2) & 0xffff) + ((s2) >> 16); \
+ /* Do it twice */ \
+ (s1) = ((s1) & 0xffff) + ((s1) >> 16); \
+ (s2) = ((s2) & 0xffff) + ((s2) >> 16); \
+ /* Because ~1 == -2, We really need ~1 == -1 */ \
+ if ((s1) > (s2)) (s2)--; \
+ (sd) = (s2) - (s1); \
+ (sd) = ((sd) & 0xffff) + ((sd) >> 16); }
+
+#define NAT_SYSSPACE 0x80000000
+#define NAT_LOCKHELD 0x40000000
+
+
+extern u_int ipf_nattable_sz;
+extern u_int ipf_nattable_max;
+extern u_int ipf_natrules_sz;
+extern u_int ipf_rdrrules_sz;
+extern u_int ipf_hostmap_sz;
+extern u_int fr_nat_maxbucket;
+extern u_int fr_nat_maxbucket_reset;
+extern int fr_nat_lock;
+extern void fr_natsync __P((void *));
+extern u_long fr_defnatage;
+extern u_long fr_defnaticmpage;
+extern u_long fr_defnatipage;
+ /* nat_table[0] -> hashed list sorted by inside (ip, port) */
+ /* nat_table[1] -> hashed list sorted by outside (ip, port) */
+extern nat_t **nat_table[2];
+extern nat_t *nat_instances;
+extern ipnat_t *nat_list;
+extern ipnat_t **nat_rules;
+extern ipnat_t **rdr_rules;
+extern ipftq_t *nat_utqe;
+extern natstat_t nat_stats;
+
+#if defined(__OpenBSD__)
+extern void nat_ifdetach __P((void *));
+#endif
+extern int fr_nat_ioctl __P((caddr_t, ioctlcmd_t, int));
+extern int fr_natinit __P((void));
+extern nat_t *nat_new __P((fr_info_t *, ipnat_t *, nat_t **, u_int, int));
+extern nat_t *nat_outlookup __P((fr_info_t *, u_int, u_int, struct in_addr,
+ struct in_addr));
+extern void fix_datacksum __P((u_short *, u_32_t));
+extern nat_t *nat_inlookup __P((fr_info_t *, u_int, u_int, struct in_addr,
+ struct in_addr));
+extern nat_t *nat_tnlookup __P((fr_info_t *, int));
+extern nat_t *nat_maplookup __P((void *, u_int, struct in_addr,
+ struct in_addr));
+extern nat_t *nat_lookupredir __P((natlookup_t *));
+extern nat_t *nat_icmperrorlookup __P((fr_info_t *, int));
+extern nat_t *nat_icmperror __P((fr_info_t *, u_int *, int));
+extern int nat_insert __P((nat_t *, int));
+
+extern int fr_checknatout __P((fr_info_t *, u_32_t *));
+extern int fr_natout __P((fr_info_t *, nat_t *, int, u_32_t));
+extern int fr_checknatin __P((fr_info_t *, u_32_t *));
+extern int fr_natin __P((fr_info_t *, nat_t *, int, u_32_t));
+extern void fr_natunload __P((void));
+extern void fr_natexpire __P((void));
+extern void nat_log __P((struct nat *, u_int));
+extern void fix_incksum __P((fr_info_t *, u_short *, u_32_t));
+extern void fix_outcksum __P((fr_info_t *, u_short *, u_32_t));
+extern void fr_natderef __P((nat_t **));
+extern u_short *nat_proto __P((fr_info_t *, nat_t *, u_int));
+extern void nat_update __P((fr_info_t *, nat_t *, ipnat_t *));
+extern void fr_setnatqueue __P((nat_t *, int));
+
+#endif /* __IP_NAT_H__ */
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_netbios_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_netbios_pxy.c
new file mode 100644
index 0000000000..8bfa8fac47
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_netbios_pxy.c
@@ -0,0 +1,118 @@
+/*
+ * Simple netbios-dgm transparent proxy for in-kernel use.
+ * For use with the NAT code.
+ * $Id: ip_netbios_pxy.c,v 2.8.2.1 2005/07/15 21:56:51 darrenr Exp $
+ */
+
+/*-
+ * Copyright (c) 2002-2003 Paul J. Ledbetter III
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id: ip_netbios_pxy.c,v 2.8.2.1 2005/07/15 21:56:51 darrenr Exp $
+ */
+
+#define IPF_NETBIOS_PROXY
+
+int ippr_netbios_init __P((void));
+void ippr_netbios_fini __P((void));
+int ippr_netbios_out __P((fr_info_t *, ap_session_t *, nat_t *));
+
+static frentry_t netbiosfr;
+
+int netbios_proxy_init = 0;
+
+/*
+ * Initialize local structures.
+ */
+int ippr_netbios_init()
+{
+ bzero((char *)&netbiosfr, sizeof(netbiosfr));
+ netbiosfr.fr_ref = 1;
+ netbiosfr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE;
+ MUTEX_INIT(&netbiosfr.fr_lock, "NETBIOS proxy rule lock");
+ netbios_proxy_init = 1;
+
+ return 0;
+}
+
+
+void ippr_netbios_fini()
+{
+ if (netbios_proxy_init == 1) {
+ MUTEX_DESTROY(&netbiosfr.fr_lock);
+ netbios_proxy_init = 0;
+ }
+}
+
+
+int ippr_netbios_out(fin, aps, nat)
+fr_info_t *fin;
+ap_session_t *aps;
+nat_t *nat;
+{
+ char dgmbuf[6];
+ int off, dlen;
+ udphdr_t *udp;
+ ip_t *ip;
+ mb_t *m;
+
+ aps = aps; /* LINT */
+ nat = nat; /* LINT */
+
+ m = fin->fin_m;
+ dlen = fin->fin_dlen - sizeof(*udp);
+ /*
+ * no net bios datagram could possibly be shorter than this
+ */
+ if (dlen < 11)
+ return 0;
+
+ ip = fin->fin_ip;
+ udp = (udphdr_t *)fin->fin_dp;
+ off = (char *)udp - (char *)ip + sizeof(*udp) + fin->fin_ipoff;
+
+ /*
+ * move past the
+ * ip header;
+ * udp header;
+ * 4 bytes into the net bios dgm header.
+ * According to rfc1002, this should be the exact location of
+ * the source address/port
+ */
+ off += 4;
+
+ /* Copy NATed source Address/port*/
+ dgmbuf[0] = (char)((ip->ip_src.s_addr ) &0xFF);
+ dgmbuf[1] = (char)((ip->ip_src.s_addr >> 8) &0xFF);
+ dgmbuf[2] = (char)((ip->ip_src.s_addr >> 16)&0xFF);
+ dgmbuf[3] = (char)((ip->ip_src.s_addr >> 24)&0xFF);
+
+ dgmbuf[4] = (char)((udp->uh_sport )&0xFF);
+ dgmbuf[5] = (char)((udp->uh_sport >> 8)&0xFF);
+
+ /* replace data in packet */
+ COPYBACK(m, off, sizeof(dgmbuf), dgmbuf);
+
+ return 0;
+}
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_pool.h b/usr/src/uts/common/inet/ipf/netinet/ip_pool.h
new file mode 100644
index 0000000000..b40ba2b0fb
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_pool.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 1993-2001, 2003 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * $Id: ip_pool.h,v 2.26.2.3 2005/06/12 07:18:27 darrenr Exp $
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifndef __IP_POOL_H__
+#define __IP_POOL_H__
+
+#if defined(_KERNEL) && !defined(__osf__) && !defined(__hpux) && \
+ !defined(linux) && !defined(sun) && !defined(AIX)
+# include <net/radix.h>
+extern void rn_freehead __P((struct radix_node_head *));
+# define FreeS(p, z) KFREES(p, z)
+extern int max_keylen;
+#else
+# if defined(__osf__) || defined(__hpux)
+# include "radix_ipf_local.h"
+# define radix_mask ipf_radix_mask
+# define radix_node ipf_radix_node
+# define radix_node_head ipf_radix_node_head
+# else
+# include "radix_ipf.h"
+# endif
+#endif
+#include "netinet/ip_lookup.h"
+
+#define IP_POOL_NOMATCH 0
+#define IP_POOL_POSITIVE 1
+
+typedef struct ip_pool_node {
+ struct radix_node ipn_nodes[2];
+ addrfamily_t ipn_addr;
+ addrfamily_t ipn_mask;
+ int ipn_info;
+ char ipn_name[FR_GROUPLEN];
+ u_long ipn_hits;
+ struct ip_pool_node *ipn_next, **ipn_pnext;
+} ip_pool_node_t;
+
+
+typedef struct ip_pool_s {
+ struct ip_pool_s *ipo_next;
+ struct ip_pool_s **ipo_pnext;
+ struct radix_node_head *ipo_head;
+ ip_pool_node_t *ipo_list;
+ u_long ipo_hits;
+ int ipo_unit;
+ int ipo_flags;
+ int ipo_ref;
+ char ipo_name[FR_GROUPLEN];
+} ip_pool_t;
+
+#define IPOOL_ANON 0x80000000
+
+
+typedef struct ip_pool_stat {
+ u_long ipls_pools;
+ u_long ipls_tables;
+ u_long ipls_nodes;
+ ip_pool_t *ipls_list[IPL_LOGSIZE];
+} ip_pool_stat_t;
+
+
+extern ip_pool_stat_t ipoolstat;
+extern ip_pool_t *ip_pool_list[IPL_LOGSIZE];
+
+extern int ip_pool_search __P((void *, int, void *));
+extern int ip_pool_init __P((void));
+extern void ip_pool_fini __P((void));
+extern int ip_pool_create __P((iplookupop_t *));
+extern int ip_pool_insert __P((ip_pool_t *, addrfamily_t *,
+ addrfamily_t *, int));
+extern int ip_pool_remove __P((ip_pool_t *, ip_pool_node_t *));
+extern int ip_pool_destroy __P((iplookupop_t *));
+extern void ip_pool_free __P((ip_pool_t *));
+extern void ip_pool_deref __P((ip_pool_t *));
+extern void *ip_pool_find __P((int, char *));
+extern ip_pool_node_t *ip_pool_findeq __P((ip_pool_t *,
+ addrfamily_t *, addrfamily_t *));
+extern int ip_pool_flush __P((iplookupflush_t *));
+extern int ip_pool_statistics __P((iplookupop_t *));
+
+#endif /* __IP_POOL_H__ */
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_pptp_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_pptp_pxy.c
new file mode 100644
index 0000000000..480edf1a00
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_pptp_pxy.c
@@ -0,0 +1,528 @@
+/*
+ * Copyright (C) 2002-2003 by Darren Reed
+ *
+ * Simple PPTP transparent proxy for in-kernel use. For use with the NAT
+ * code.
+ *
+ * $Id: ip_pptp_pxy.c,v 2.10.2.10 2005/07/15 21:56:52 darrenr Exp $
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#define IPF_PPTP_PROXY
+
+typedef struct pptp_hdr {
+ u_short pptph_len;
+ u_short pptph_type;
+ u_32_t pptph_cookie;
+} pptp_hdr_t;
+
+#define PPTP_MSGTYPE_CTL 1
+#define PPTP_MTCTL_STARTREQ 1
+#define PPTP_MTCTL_STARTREP 2
+#define PPTP_MTCTL_STOPREQ 3
+#define PPTP_MTCTL_STOPREP 4
+#define PPTP_MTCTL_ECHOREQ 5
+#define PPTP_MTCTL_ECHOREP 6
+#define PPTP_MTCTL_OUTREQ 7
+#define PPTP_MTCTL_OUTREP 8
+#define PPTP_MTCTL_INREQ 9
+#define PPTP_MTCTL_INREP 10
+#define PPTP_MTCTL_INCONNECT 11
+#define PPTP_MTCTL_CLEAR 12
+#define PPTP_MTCTL_DISCONNECT 13
+#define PPTP_MTCTL_WANERROR 14
+#define PPTP_MTCTL_LINKINFO 15
+
+
+int ippr_pptp_init __P((void));
+void ippr_pptp_fini __P((void));
+int ippr_pptp_new __P((fr_info_t *, ap_session_t *, nat_t *));
+void ippr_pptp_del __P((ap_session_t *));
+int ippr_pptp_inout __P((fr_info_t *, ap_session_t *, nat_t *));
+void ippr_pptp_donatstate __P((fr_info_t *, nat_t *, pptp_pxy_t *));
+int ippr_pptp_message __P((fr_info_t *, nat_t *, pptp_pxy_t *, pptp_side_t *));
+int ippr_pptp_nextmessage __P((fr_info_t *, nat_t *, pptp_pxy_t *, int));
+int ippr_pptp_mctl __P((fr_info_t *, nat_t *, pptp_pxy_t *, pptp_side_t *));
+
+static frentry_t pptpfr;
+
+int pptp_proxy_init = 0;
+int ippr_pptp_debug = 0;
+int ippr_pptp_gretimeout = IPF_TTLVAL(120); /* 2 minutes */
+
+
+/*
+ * PPTP application proxy initialization.
+ */
+int ippr_pptp_init()
+{
+ bzero((char *)&pptpfr, sizeof(pptpfr));
+ pptpfr.fr_ref = 1;
+ pptpfr.fr_age[0] = ippr_pptp_gretimeout;
+ pptpfr.fr_age[1] = ippr_pptp_gretimeout;
+ pptpfr.fr_flags = FR_OUTQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE;
+ MUTEX_INIT(&pptpfr.fr_lock, "PPTP proxy rule lock");
+ pptp_proxy_init = 1;
+
+ return 0;
+}
+
+
+void ippr_pptp_fini()
+{
+ if (pptp_proxy_init == 1) {
+ MUTEX_DESTROY(&pptpfr.fr_lock);
+ pptp_proxy_init = 0;
+ }
+}
+
+
+/*
+ * Setup for a new PPTP proxy.
+ */
+int ippr_pptp_new(fin, aps, nat)
+fr_info_t *fin;
+ap_session_t *aps;
+nat_t *nat;
+{
+ pptp_pxy_t *pptp;
+ ipnat_t *ipn;
+ ip_t *ip;
+
+ ip = fin->fin_ip;
+
+ if (nat_outlookup(fin, 0, IPPROTO_GRE, nat->nat_inip,
+ ip->ip_dst) != NULL) {
+ if (ippr_pptp_debug > 0)
+ printf("ippr_pptp_new: GRE session already exists\n");
+ return -1;
+ }
+
+ aps->aps_psiz = sizeof(*pptp);
+ KMALLOCS(aps->aps_data, pptp_pxy_t *, sizeof(*pptp));
+ if (aps->aps_data == NULL) {
+ if (ippr_pptp_debug > 0)
+ printf("ippr_pptp_new: malloc for aps_data failed\n");
+ return -1;
+ }
+
+ /*
+ * Create NAT rule against which the tunnel/transport mapping is
+ * created. This is required because the current NAT rule does not
+ * describe GRE but TCP instead.
+ */
+ pptp = aps->aps_data;
+ bzero((char *)pptp, sizeof(*pptp));
+ ipn = &pptp->pptp_rule;
+ ipn->in_ifps[0] = fin->fin_ifp;
+ ipn->in_apr = NULL;
+ ipn->in_use = 1;
+ ipn->in_hits = 1;
+ ipn->in_ippip = 1;
+ if (nat->nat_dir == NAT_OUTBOUND) {
+ ipn->in_nip = ntohl(nat->nat_outip.s_addr);
+ ipn->in_outip = fin->fin_saddr;
+ ipn->in_redir = NAT_MAP;
+ } else if (nat->nat_dir == NAT_INBOUND) {
+ ipn->in_nip = 0;
+ ipn->in_outip = nat->nat_outip.s_addr;
+ ipn->in_redir = NAT_REDIRECT;
+ }
+ ipn->in_inip = nat->nat_inip.s_addr;
+ ipn->in_inmsk = 0xffffffff;
+ ipn->in_outmsk = 0xffffffff;
+ ipn->in_srcip = fin->fin_saddr;
+ ipn->in_srcmsk = 0xffffffff;
+ bcopy(nat->nat_ptr->in_ifnames[0], ipn->in_ifnames[0],
+ sizeof(ipn->in_ifnames[0]));
+ ipn->in_p = IPPROTO_GRE;
+
+ pptp->pptp_side[0].pptps_wptr = pptp->pptp_side[0].pptps_buffer;
+ pptp->pptp_side[1].pptps_wptr = pptp->pptp_side[1].pptps_buffer;
+ return 0;
+}
+
+
+void ippr_pptp_donatstate(fin, nat, pptp)
+fr_info_t *fin;
+nat_t *nat;
+pptp_pxy_t *pptp;
+{
+ fr_info_t fi;
+ grehdr_t gre;
+ nat_t *nat2;
+ u_char p;
+ ip_t *ip;
+
+ ip = fin->fin_ip;
+ p = ip->ip_p;
+
+ nat2 = pptp->pptp_nat;
+ if ((nat2 == NULL) || (pptp->pptp_state == NULL)) {
+ bcopy((char *)fin, (char *)&fi, sizeof(fi));
+ bzero((char *)&gre, sizeof(gre));
+ fi.fin_state = NULL;
+ fi.fin_nat = NULL;
+ fi.fin_fi.fi_p = IPPROTO_GRE;
+ fi.fin_fr = &pptpfr;
+ if ((nat->nat_dir == NAT_OUTBOUND && fin->fin_out) ||
+ (nat->nat_dir == NAT_INBOUND && !fin->fin_out)) {
+ fi.fin_data[0] = pptp->pptp_call[0];
+ fi.fin_data[1] = pptp->pptp_call[1];
+ } else {
+ fi.fin_data[0] = pptp->pptp_call[1];
+ fi.fin_data[1] = pptp->pptp_call[0];
+ }
+ ip = fin->fin_ip;
+ ip->ip_p = IPPROTO_GRE;
+ fi.fin_flx &= ~(FI_TCPUDP|FI_STATE|FI_FRAG);
+ fi.fin_flx |= FI_IGNORE;
+ fi.fin_dp = &gre;
+ gre.gr_flags = htons(1 << 13);
+ if (fin->fin_out && nat->nat_dir == NAT_INBOUND) {
+ fi.fin_fi.fi_saddr = fin->fin_fi.fi_daddr;
+ fi.fin_fi.fi_daddr = nat->nat_outip.s_addr;
+ } else if (!fin->fin_out && nat->nat_dir == NAT_OUTBOUND) {
+ fi.fin_fi.fi_saddr = nat->nat_inip.s_addr;
+ fi.fin_fi.fi_daddr = fin->fin_fi.fi_saddr;
+ }
+ }
+
+ /*
+ * Update NAT timeout/create NAT if missing.
+ */
+ if (nat2 != NULL)
+ fr_queueback(&nat2->nat_tqe);
+ else {
+ nat2 = nat_new(&fi, &pptp->pptp_rule, &pptp->pptp_nat,
+ NAT_SLAVE, nat->nat_dir);
+ pptp->pptp_nat = nat2;
+ if (nat2 != NULL) {
+ (void) nat_proto(&fi, nat2, 0);
+ nat_update(&fi, nat2, nat2->nat_ptr);
+ }
+ }
+
+ READ_ENTER(&ipf_state);
+ if (pptp->pptp_state != NULL) {
+ fr_queueback(&pptp->pptp_state->is_sti);
+ RWLOCK_EXIT(&ipf_state);
+ } else {
+ RWLOCK_EXIT(&ipf_state);
+ if (nat->nat_dir == NAT_INBOUND)
+ fi.fin_fi.fi_daddr = nat2->nat_inip.s_addr;
+ else
+ fi.fin_fi.fi_saddr = nat2->nat_inip.s_addr;
+ fi.fin_ifp = NULL;
+ pptp->pptp_state = fr_addstate(&fi, &pptp->pptp_state,
+ 0);
+ if (fi.fin_state != NULL)
+ fr_statederef(&fi, (ipstate_t **)&fi.fin_state);
+ }
+ ip->ip_p = p;
+ return;
+}
+
+
+/*
+ * Try and build up the next PPTP message in the TCP stream and if we can
+ * build it up completely (fits in our buffer) then pass it off to the message
+ * parsing function.
+ */
+int ippr_pptp_nextmessage(fin, nat, pptp, rev)
+fr_info_t *fin;
+nat_t *nat;
+pptp_pxy_t *pptp;
+int rev;
+{
+ static char *funcname = "ippr_pptp_nextmessage";
+ pptp_side_t *pptps;
+ u_32_t start, end;
+ pptp_hdr_t *hdr;
+ tcphdr_t *tcp;
+ int dlen, off;
+ u_short len;
+ char *msg;
+
+ tcp = fin->fin_dp;
+ dlen = fin->fin_dlen - (TCP_OFF(tcp) << 2);
+ start = ntohl(tcp->th_seq);
+ pptps = &pptp->pptp_side[rev];
+ off = (char *)tcp - (char *)fin->fin_ip + (TCP_OFF(tcp) << 2) +
+ fin->fin_ipoff;
+
+ if (dlen <= 0)
+ return 0;
+ /*
+ * If the complete data packet is before what we expect to see
+ * "next", just ignore it as the chances are we've already seen it.
+ * The next if statement following this one really just causes packets
+ * ahead of what we've seen to be dropped, implying that something in
+ * the middle went missing and we want to see that first.
+ */
+ end = start + dlen;
+ if (pptps->pptps_next > end && pptps->pptps_next > start)
+ return 0;
+
+ if (pptps->pptps_next != start) {
+ if (ippr_pptp_debug > 5)
+ printf("%s: next (%x) != start (%x)\n", funcname,
+ pptps->pptps_next, start);
+ return -1;
+ }
+
+ msg = (char *)fin->fin_dp + (TCP_OFF(tcp) << 2);
+
+ while (dlen > 0) {
+ off += pptps->pptps_bytes;
+ if (pptps->pptps_gothdr == 0) {
+ /*
+ * PPTP has an 8 byte header that inclues the cookie.
+ * The start of every message should include one and
+ * it should match 1a2b3c4d. Byte order is ignored,
+ * deliberately, when printing out the error.
+ */
+ len = MIN(8 - pptps->pptps_bytes, dlen);
+ COPYDATA(fin->fin_m, off, len, pptps->pptps_wptr);
+ pptps->pptps_bytes += len;
+ pptps->pptps_wptr += len;
+ hdr = (pptp_hdr_t *)pptps->pptps_buffer;
+ if (pptps->pptps_bytes == 8) {
+ pptps->pptps_next += 8;
+ if (ntohl(hdr->pptph_cookie) != 0x1a2b3c4d) {
+ if (ippr_pptp_debug > 1)
+ printf("%s: bad cookie (%x)\n",
+ funcname,
+ hdr->pptph_cookie);
+ return -1;
+ }
+ }
+ dlen -= len;
+ msg += len;
+ off += len;
+
+ pptps->pptps_gothdr = 1;
+ len = ntohs(hdr->pptph_len);
+ pptps->pptps_len = len;
+ pptps->pptps_nexthdr += len;
+
+ /*
+ * If a message is too big for the buffer, just set
+ * the fields for the next message to come along.
+ * The messages defined in RFC 2637 will not exceed
+ * 512 bytes (in total length) so this is likely a
+ * bad data packet, anyway.
+ */
+ if (len > sizeof(pptps->pptps_buffer)) {
+ if (ippr_pptp_debug > 3)
+ printf("%s: message too big (%d)\n",
+ funcname, len);
+ pptps->pptps_next = pptps->pptps_nexthdr;
+ pptps->pptps_wptr = pptps->pptps_buffer;
+ pptps->pptps_gothdr = 0;
+ pptps->pptps_bytes = 0;
+ pptps->pptps_len = 0;
+ break;
+ }
+ }
+
+ len = MIN(pptps->pptps_len - pptps->pptps_bytes, dlen);
+ COPYDATA(fin->fin_m, off, len, pptps->pptps_wptr);
+ pptps->pptps_bytes += len;
+ pptps->pptps_wptr += len;
+ pptps->pptps_next += len;
+
+ if (pptps->pptps_len > pptps->pptps_bytes)
+ break;
+
+ (void) ippr_pptp_message(fin, nat, pptp, pptps);
+ pptps->pptps_wptr = pptps->pptps_buffer;
+ pptps->pptps_gothdr = 0;
+ pptps->pptps_bytes = 0;
+ pptps->pptps_len = 0;
+
+ start += len;
+ msg += len;
+ dlen -= len;
+ }
+
+ return 0;
+}
+
+
+/*
+ * handle a complete PPTP message
+ */
+int ippr_pptp_message(fin, nat, pptp, pptps)
+fr_info_t *fin;
+nat_t *nat;
+pptp_pxy_t *pptp;
+pptp_side_t *pptps;
+{
+ pptp_hdr_t *hdr = (pptp_hdr_t *)pptps->pptps_buffer;
+
+ switch (ntohs(hdr->pptph_type))
+ {
+ case PPTP_MSGTYPE_CTL :
+ (void) ippr_pptp_mctl(fin, nat, pptp, pptps);
+ break;
+
+ default :
+ break;
+ }
+ return 0;
+}
+
+
+/*
+ * handle a complete PPTP control message
+ */
+int ippr_pptp_mctl(fin, nat, pptp, pptps)
+fr_info_t *fin;
+nat_t *nat;
+pptp_pxy_t *pptp;
+pptp_side_t *pptps;
+{
+ u_short *buffer = (u_short *)(pptps->pptps_buffer);
+ pptp_side_t *pptpo;
+
+ if (pptps == &pptp->pptp_side[0])
+ pptpo = &pptp->pptp_side[1];
+ else
+ pptpo = &pptp->pptp_side[0];
+
+ /*
+ * Breakout to handle all the various messages. Most are just state
+ * transition.
+ */
+ switch (ntohs(buffer[4]))
+ {
+ case PPTP_MTCTL_STARTREQ :
+ pptps->pptps_state = PPTP_MTCTL_STARTREQ;
+ break;
+ case PPTP_MTCTL_STARTREP :
+ if (pptpo->pptps_state == PPTP_MTCTL_STARTREQ)
+ pptps->pptps_state = PPTP_MTCTL_STARTREP;
+ break;
+ case PPTP_MTCTL_STOPREQ :
+ pptps->pptps_state = PPTP_MTCTL_STOPREQ;
+ break;
+ case PPTP_MTCTL_STOPREP :
+ if (pptpo->pptps_state == PPTP_MTCTL_STOPREQ)
+ pptps->pptps_state = PPTP_MTCTL_STOPREP;
+ break;
+ case PPTP_MTCTL_ECHOREQ :
+ pptps->pptps_state = PPTP_MTCTL_ECHOREQ;
+ break;
+ case PPTP_MTCTL_ECHOREP :
+ if (pptpo->pptps_state == PPTP_MTCTL_ECHOREQ)
+ pptps->pptps_state = PPTP_MTCTL_ECHOREP;
+ break;
+ case PPTP_MTCTL_OUTREQ :
+ pptps->pptps_state = PPTP_MTCTL_OUTREQ;
+ break;
+ case PPTP_MTCTL_OUTREP :
+ if (pptpo->pptps_state == PPTP_MTCTL_OUTREQ) {
+ pptps->pptps_state = PPTP_MTCTL_OUTREP;
+ pptp->pptp_call[0] = buffer[7];
+ pptp->pptp_call[1] = buffer[6];
+ ippr_pptp_donatstate(fin, nat, pptp);
+ }
+ break;
+ case PPTP_MTCTL_INREQ :
+ pptps->pptps_state = PPTP_MTCTL_INREQ;
+ break;
+ case PPTP_MTCTL_INREP :
+ if (pptpo->pptps_state == PPTP_MTCTL_INREQ) {
+ pptps->pptps_state = PPTP_MTCTL_INREP;
+ pptp->pptp_call[0] = buffer[7];
+ pptp->pptp_call[1] = buffer[6];
+ ippr_pptp_donatstate(fin, nat, pptp);
+ }
+ break;
+ case PPTP_MTCTL_INCONNECT :
+ pptps->pptps_state = PPTP_MTCTL_INCONNECT;
+ break;
+ case PPTP_MTCTL_CLEAR :
+ pptps->pptps_state = PPTP_MTCTL_CLEAR;
+ break;
+ case PPTP_MTCTL_DISCONNECT :
+ pptps->pptps_state = PPTP_MTCTL_DISCONNECT;
+ break;
+ case PPTP_MTCTL_WANERROR :
+ pptps->pptps_state = PPTP_MTCTL_WANERROR;
+ break;
+ case PPTP_MTCTL_LINKINFO :
+ pptps->pptps_state = PPTP_MTCTL_LINKINFO;
+ break;
+ }
+
+ return 0;
+}
+
+
+/*
+ * For outgoing PPTP packets. refresh timeouts for NAT & state entries, if
+ * we can. If they have disappeared, recreate them.
+ */
+int ippr_pptp_inout(fin, aps, nat)
+fr_info_t *fin;
+ap_session_t *aps;
+nat_t *nat;
+{
+ pptp_pxy_t *pptp;
+ tcphdr_t *tcp;
+ int rev;
+
+ if ((fin->fin_out == 1) && (nat->nat_dir == NAT_INBOUND))
+ rev = 1;
+ else if ((fin->fin_out == 0) && (nat->nat_dir == NAT_OUTBOUND))
+ rev = 1;
+ else
+ rev = 0;
+
+ tcp = (tcphdr_t *)fin->fin_dp;
+ if ((tcp->th_flags & TH_OPENING) == TH_OPENING) {
+ pptp = (pptp_pxy_t *)aps->aps_data;
+ pptp->pptp_side[1 - rev].pptps_next = ntohl(tcp->th_ack);
+ pptp->pptp_side[1 - rev].pptps_nexthdr = ntohl(tcp->th_ack);
+ pptp->pptp_side[rev].pptps_next = ntohl(tcp->th_seq) + 1;
+ pptp->pptp_side[rev].pptps_nexthdr = ntohl(tcp->th_seq) + 1;
+ }
+ return ippr_pptp_nextmessage(fin, nat, (pptp_pxy_t *)aps->aps_data,
+ rev);
+}
+
+
+/*
+ * clean up after ourselves.
+ */
+void ippr_pptp_del(aps)
+ap_session_t *aps;
+{
+ pptp_pxy_t *pptp;
+
+ pptp = aps->aps_data;
+
+ if (pptp != NULL) {
+ /*
+ * Don't bother changing any of the NAT structure details,
+ * *_del() is on a callback from aps_free(), from nat_delete()
+ */
+
+ READ_ENTER(&ipf_state);
+ if (pptp->pptp_state != NULL) {
+ pptp->pptp_state->is_die = fr_ticks + 1;
+ pptp->pptp_state->is_me = NULL;
+ fr_queuefront(&pptp->pptp_state->is_sti);
+ }
+ RWLOCK_EXIT(&ipf_state);
+
+ pptp->pptp_state = NULL;
+ pptp->pptp_nat = NULL;
+ }
+}
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_proxy.h b/usr/src/uts/common/inet/ipf/netinet/ip_proxy.h
new file mode 100644
index 0000000000..1e0bedef64
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_proxy.h
@@ -0,0 +1,458 @@
+/*
+ * Copyright (C) 1997-2001 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * $Id: ip_proxy.h,v 2.31.2.3 2005/06/18 02:41:33 darrenr Exp $
+ */
+
+#ifndef __IP_PROXY_H__
+#define __IP_PROXY_H__
+
+#ifndef SOLARIS
+#define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#endif
+
+#if defined(__STDC__) || defined(__GNUC__) || defined(_AIX51)
+#define SIOCPROXY _IOWR('r', 64, struct ap_control)
+#else
+#define SIOCPROXY _IOWR(r, 64, struct ap_control)
+#endif
+
+#ifndef APR_LABELLEN
+#define APR_LABELLEN 16
+#endif
+#define AP_SESS_SIZE 53
+
+struct nat;
+struct ipnat;
+struct ipstate;
+
+typedef struct ap_tcp {
+ u_short apt_sport; /* source port */
+ u_short apt_dport; /* destination port */
+ short apt_sel[2]; /* {seq,ack}{off,min} set selector */
+ short apt_seqoff[2]; /* sequence # difference */
+ u_32_t apt_seqmin[2]; /* don't change seq-off until after this */
+ short apt_ackoff[2]; /* sequence # difference */
+ u_32_t apt_ackmin[2]; /* don't change seq-off until after this */
+ u_char apt_state[2]; /* connection state */
+} ap_tcp_t;
+
+typedef struct ap_udp {
+ u_short apu_sport; /* source port */
+ u_short apu_dport; /* destination port */
+} ap_udp_t;
+
+typedef struct ap_session {
+ struct aproxy *aps_apr;
+ union {
+ struct ap_tcp apu_tcp;
+ struct ap_udp apu_udp;
+ } aps_un;
+ u_int aps_flags;
+ U_QUAD_T aps_bytes; /* bytes sent */
+ U_QUAD_T aps_pkts; /* packets sent */
+ void *aps_nat; /* pointer back to nat struct */
+ void *aps_data; /* private data */
+ int aps_p; /* protocol */
+ int aps_psiz; /* size of private data */
+ struct ap_session *aps_hnext;
+ struct ap_session *aps_next;
+} ap_session_t;
+
+#define aps_sport aps_un.apu_tcp.apt_sport
+#define aps_dport aps_un.apu_tcp.apt_dport
+#define aps_sel aps_un.apu_tcp.apt_sel
+#define aps_seqoff aps_un.apu_tcp.apt_seqoff
+#define aps_seqmin aps_un.apu_tcp.apt_seqmin
+#define aps_state aps_un.apu_tcp.apt_state
+#define aps_ackoff aps_un.apu_tcp.apt_ackoff
+#define aps_ackmin aps_un.apu_tcp.apt_ackmin
+
+
+typedef struct ap_control {
+ char apc_label[APR_LABELLEN];
+ u_char apc_p;
+ /*
+ * The following fields are upto the proxy's apr_ctl routine to deal
+ * with. When the proxy gets this in kernel space, apc_data will
+ * point to a malloc'd region of memory of apc_dsize bytes. If the
+ * proxy wants to keep that memory, it must set apc_data to NULL
+ * before it returns. It is expected if this happens that it will
+ * take care to free it in apr_fini or otherwise as appropriate.
+ * apc_cmd is provided as a standard place to put simple commands,
+ * with apc_arg being available to put a simple arg.
+ */
+ u_long apc_cmd;
+ u_long apc_arg;
+ void *apc_data;
+ size_t apc_dsize;
+} ap_ctl_t;
+
+
+typedef struct aproxy {
+ struct aproxy *apr_next;
+ char apr_label[APR_LABELLEN]; /* Proxy label # */
+ u_char apr_p; /* protocol */
+ int apr_ref; /* +1 per rule referencing it */
+ int apr_flags;
+ int (* apr_init) __P((void));
+ void (* apr_fini) __P((void));
+ int (* apr_new) __P((fr_info_t *, ap_session_t *, struct nat *));
+ void (* apr_del) __P((ap_session_t *));
+ int (* apr_inpkt) __P((fr_info_t *, ap_session_t *, struct nat *));
+ int (* apr_outpkt) __P((fr_info_t *, ap_session_t *, struct nat *));
+ int (* apr_match) __P((fr_info_t *, ap_session_t *, struct nat *));
+ int (* apr_ctl) __P((struct aproxy *, struct ap_control *));
+} aproxy_t;
+
+#define APR_DELETE 1
+
+#define APR_ERR(x) ((x) << 16)
+#define APR_EXIT(x) (((x) >> 16) & 0xffff)
+#define APR_INC(x) ((x) & 0xffff)
+
+/*
+ * Generic #define's to cover missing things in the kernel
+ */
+#ifndef isdigit
+#define isdigit(x) ((x) >= '0' && (x) <= '9')
+#endif
+#ifndef isupper
+#define isupper(x) (((unsigned)(x) >= 'A') && ((unsigned)(x) <= 'Z'))
+#endif
+#ifndef islower
+#define islower(x) (((unsigned)(x) >= 'a') && ((unsigned)(x) <= 'z'))
+#endif
+#ifndef isalpha
+#define isalpha(x) (isupper(x) || islower(x))
+#endif
+#ifndef toupper
+#define toupper(x) (isupper(x) ? (x) : (x) - 'a' + 'A')
+#endif
+#ifndef isspace
+#define isspace(x) (((x) == ' ') || ((x) == '\r') || ((x) == '\n') || \
+ ((x) == '\t') || ((x) == '\b'))
+#endif
+
+/*
+ * This is the scratch buffer size used to hold strings from the TCP stream
+ * that we may want to parse. It's an arbitrary size, really, but it must
+ * be at least as large as IPF_FTPBUFSZ.
+ */
+#define FTP_BUFSZ 120
+
+/*
+ * This buffer, however, doesn't need to be nearly so big. It just needs to
+ * be able to squeeze in the largest command it needs to rewrite, Which ones
+ * does it rewrite? EPRT, PORT, 227 replies.
+ */
+#define IPF_FTPBUFSZ 80 /* This *MUST* be >= 53! */
+
+typedef struct ftpside {
+ char *ftps_rptr;
+ char *ftps_wptr;
+ void *ftps_ifp;
+ u_32_t ftps_seq[2];
+ u_32_t ftps_len;
+ int ftps_junk; /* 2 = no cr/lf yet, 1 = cannot parse */
+ int ftps_cmds;
+ char ftps_buf[FTP_BUFSZ];
+} ftpside_t;
+
+typedef struct ftpinfo {
+ int ftp_passok;
+ int ftp_incok;
+ ftpside_t ftp_side[2];
+} ftpinfo_t;
+
+
+/*
+ * For the irc proxy.
+ */
+typedef struct ircinfo {
+ size_t irc_len;
+ char *irc_snick;
+ char *irc_dnick;
+ char *irc_type;
+ char *irc_arg;
+ char *irc_addr;
+ u_32_t irc_ipnum;
+ u_short irc_port;
+} ircinfo_t;
+
+
+/*
+ * Real audio proxy structure and #defines
+ */
+typedef struct raudio_s {
+ int rap_seenpna;
+ int rap_seenver;
+ int rap_version;
+ int rap_eos; /* End Of Startup */
+ int rap_gotid;
+ int rap_gotlen;
+ int rap_mode;
+ int rap_sdone;
+ u_short rap_plport;
+ u_short rap_prport;
+ u_short rap_srport;
+ char rap_svr[19];
+ u_32_t rap_sbf; /* flag to indicate which of the 19 bytes have
+ * been filled
+ */
+ u_32_t rap_sseq;
+} raudio_t;
+
+#define RA_ID_END 0
+#define RA_ID_UDP 1
+#define RA_ID_ROBUST 7
+
+#define RAP_M_UDP 1
+#define RAP_M_ROBUST 2
+#define RAP_M_TCP 4
+#define RAP_M_UDP_ROBUST (RAP_M_UDP|RAP_M_ROBUST)
+
+
+/*
+ * MSN RPC proxy
+ */
+typedef struct msnrpcinfo {
+ u_int mri_flags;
+ int mri_cmd[2];
+ u_int mri_valid;
+ struct in_addr mri_raddr;
+ u_short mri_rport;
+} msnrpcinfo_t;
+
+
+/*
+ * IPSec proxy
+ */
+typedef u_32_t ipsec_cookie_t[2];
+
+typedef struct ipsec_pxy {
+ ipsec_cookie_t ipsc_icookie;
+ ipsec_cookie_t ipsc_rcookie;
+ int ipsc_rckset;
+ ipnat_t ipsc_rule;
+ nat_t *ipsc_nat;
+ struct ipstate *ipsc_state;
+} ipsec_pxy_t;
+
+/*
+ * PPTP proxy
+ */
+typedef struct pptp_side {
+ u_32_t pptps_nexthdr;
+ u_32_t pptps_next;
+ int pptps_state;
+ int pptps_gothdr;
+ int pptps_len;
+ int pptps_bytes;
+ char *pptps_wptr;
+ char pptps_buffer[512];
+} pptp_side_t;
+
+typedef struct pptp_pxy {
+ ipnat_t pptp_rule;
+ nat_t *pptp_nat;
+ struct ipstate *pptp_state;
+ u_short pptp_call[2];
+ pptp_side_t pptp_side[2];
+} pptp_pxy_t;
+
+
+/*
+ * Sun RPCBIND proxy
+ */
+#define RPCB_MAXMSG 888
+#define RPCB_RES_PMAP 0 /* Response contains a v2 port. */
+#define RPCB_RES_STRING 1 /* " " " v3 (GETADDR) string. */
+#define RPCB_RES_LIST 2 /* " " " v4 (GETADDRLIST) list. */
+#define RPCB_MAXREQS 32 /* Arbitrary limit on tracked transactions */
+
+#define RPCB_REQMIN 40
+#define RPCB_REQMAX 888
+#define RPCB_REPMIN 20
+#define RPCB_REPMAX 604 /* XXX double check this! */
+
+/*
+ * These macros determine the number of bytes between p and the end of
+ * r->rs_buf relative to l.
+ */
+#define RPCB_BUF_END(r) (char *)((r)->rm_msgbuf + (r)->rm_buflen)
+#define RPCB_BUF_GEQ(r, p, l) \
+ ((RPCB_BUF_END((r)) > (char *)(p)) && \
+ ((RPCB_BUF_END((r)) - (char *)(p)) >= (l)))
+#define RPCB_BUF_EQ(r, p, l) \
+ (RPCB_BUF_END((r)) == ((char *)(p) + (l)))
+
+/*
+ * The following correspond to RPC(B) detailed in RFC183[13].
+ */
+#define RPCB_CALL 0
+#define RPCB_REPLY 1
+#define RPCB_MSG_VERSION 2
+#define RPCB_PROG 100000
+#define RPCB_GETPORT 3
+#define RPCB_GETADDR 3
+#define RPCB_GETADDRLIST 11
+#define RPCB_MSG_ACCEPTED 0
+#define RPCB_MSG_DENIED 1
+
+/* BEGIN (Generic XDR structures) */
+typedef struct xdr_string {
+ u_32_t *xs_len;
+ char *xs_str;
+} xdr_string_t;
+
+typedef struct xdr_auth {
+ /* u_32_t xa_flavor; */
+ xdr_string_t xa_string;
+} xdr_auth_t;
+
+typedef struct xdr_uaddr {
+ u_32_t xu_ip;
+ u_short xu_port;
+ xdr_string_t xu_str;
+} xdr_uaddr_t;
+
+typedef struct xdr_proto {
+ u_int xp_proto;
+ xdr_string_t xp_str;
+} xdr_proto_t;
+
+#define xu_xslen xu_str.xs_len
+#define xu_xsstr xu_str.xs_str
+#define xp_xslen xp_str.xs_len
+#define xp_xsstr xp_str.xs_str
+/* END (Generic XDR structures) */
+
+/* BEGIN (RPC call structures) */
+typedef struct pmap_args {
+ /* u_32_t pa_prog; */
+ /* u_32_t pa_vers; */
+ u_32_t *pa_prot;
+ /* u_32_t pa_port; */
+} pmap_args_t;
+
+typedef struct rpcb_args {
+ /* u_32_t *ra_prog; */
+ /* u_32_t *ra_vers; */
+ xdr_proto_t ra_netid;
+ xdr_uaddr_t ra_maddr;
+ /* xdr_string_t ra_owner; */
+} rpcb_args_t;
+
+typedef struct rpc_call {
+ /* u_32_t rc_rpcvers; */
+ /* u_32_t rc_prog; */
+ u_32_t *rc_vers;
+ u_32_t *rc_proc;
+ xdr_auth_t rc_authcred;
+ xdr_auth_t rc_authverf;
+ union {
+ pmap_args_t ra_pmapargs;
+ rpcb_args_t ra_rpcbargs;
+ } rpcb_args;
+} rpc_call_t;
+
+#define rc_pmapargs rpcb_args.ra_pmapargs
+#define rc_rpcbargs rpcb_args.ra_rpcbargs
+/* END (RPC call structures) */
+
+/* BEGIN (RPC reply structures) */
+typedef struct rpcb_entry {
+ xdr_uaddr_t re_maddr;
+ xdr_proto_t re_netid;
+ /* u_32_t re_semantics; */
+ xdr_string_t re_family;
+ xdr_proto_t re_proto;
+ u_32_t *re_more; /* 1 == another entry follows */
+} rpcb_entry_t;
+
+typedef struct rpcb_listp {
+ u_32_t *rl_list; /* 1 == list follows */
+ int rl_cnt;
+ rpcb_entry_t rl_entries[2]; /* TCP / UDP only */
+} rpcb_listp_t;
+
+typedef struct rpc_resp {
+ /* u_32_t rr_acceptdeny; */
+ /* Omitted 'message denied' fork; we don't care about rejects. */
+ xdr_auth_t rr_authverf;
+ /* u_32_t *rr_astat; */
+ union {
+ u_32_t *resp_pmap;
+ xdr_uaddr_t resp_getaddr;
+ rpcb_listp_t resp_getaddrlist;
+ } rpcb_reply;
+} rpc_resp_t;
+
+#define rr_v2 rpcb_reply.resp_pmap
+#define rr_v3 rpcb_reply.resp_getaddr
+#define rr_v4 rpcb_reply.resp_getaddrlist
+/* END (RPC reply structures) */
+
+/* BEGIN (RPC message structure & macros) */
+typedef struct rpc_msg {
+ char rm_msgbuf[RPCB_MAXMSG]; /* RPCB data buffer */
+ u_int rm_buflen;
+ u_32_t *rm_xid;
+ /* u_32_t Call vs Reply */
+ union {
+ rpc_call_t rb_call;
+ rpc_resp_t rb_resp;
+ } rm_body;
+} rpc_msg_t;
+
+#define rm_call rm_body.rb_call
+#define rm_resp rm_body.rb_resp
+/* END (RPC message structure & macros) */
+
+/*
+ * These code paths aren't hot enough to warrant per transaction
+ * mutexes.
+ */
+typedef struct rpcb_xact {
+ struct rpcb_xact *rx_next;
+ struct rpcb_xact **rx_pnext;
+ u_32_t rx_xid; /* RPC transmission ID */
+ u_int rx_type; /* RPCB response type */
+ u_int rx_ref; /* reference count */
+ u_int rx_proto; /* transport protocol (v2 only) */
+} rpcb_xact_t;
+
+typedef struct rpcb_session {
+ ipfmutex_t rs_rxlock;
+ rpcb_xact_t *rs_rxlist;
+} rpcb_session_t;
+
+/*
+ * For an explanation, please see the following:
+ * RFC1832 - Sections 3.11, 4.4, and 4.5.
+ */
+#define XDRALIGN(x) ((((x) % 4) != 0) ? ((((x) + 3) / 4) * 4) : (x))
+
+extern ap_session_t *ap_sess_tab[AP_SESS_SIZE];
+extern ap_session_t *ap_sess_list;
+extern aproxy_t ap_proxies[];
+extern int ippr_ftp_pasvonly;
+
+extern int appr_add __P((aproxy_t *));
+extern int appr_ctl __P((ap_ctl_t *));
+extern int appr_del __P((aproxy_t *));
+extern int appr_init __P((void));
+extern void appr_unload __P((void));
+extern int appr_ok __P((fr_info_t *, tcphdr_t *, struct ipnat *));
+extern int appr_match __P((fr_info_t *, struct nat *));
+extern void appr_free __P((aproxy_t *));
+extern void aps_free __P((ap_session_t *));
+extern int appr_check __P((fr_info_t *, struct nat *));
+extern aproxy_t *appr_lookup __P((u_int, char *));
+extern int appr_new __P((fr_info_t *, struct nat *));
+extern int appr_ioctl __P((caddr_t, ioctlcmd_t, int));
+
+#endif /* __IP_PROXY_H__ */
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_raudio_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_raudio_pxy.c
new file mode 100644
index 0000000000..a9abc5809b
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_raudio_pxy.c
@@ -0,0 +1,341 @@
+/*
+ * Copyright (C) 1998-2003 by Darren Reed
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * $Id: ip_raudio_pxy.c,v 1.40.2.3 2005/02/04 10:22:55 darrenr Exp $
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#define IPF_RAUDIO_PROXY
+
+
+int ippr_raudio_init __P((void));
+void ippr_raudio_fini __P((void));
+int ippr_raudio_new __P((fr_info_t *, ap_session_t *, nat_t *));
+int ippr_raudio_in __P((fr_info_t *, ap_session_t *, nat_t *));
+int ippr_raudio_out __P((fr_info_t *, ap_session_t *, nat_t *));
+
+static frentry_t raudiofr;
+
+int raudio_proxy_init = 0;
+
+
+/*
+ * Real Audio application proxy initialization.
+ */
+int ippr_raudio_init()
+{
+ bzero((char *)&raudiofr, sizeof(raudiofr));
+ raudiofr.fr_ref = 1;
+ raudiofr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE;
+ MUTEX_INIT(&raudiofr.fr_lock, "Real Audio proxy rule lock");
+ raudio_proxy_init = 1;
+
+ return 0;
+}
+
+
+void ippr_raudio_fini()
+{
+ if (raudio_proxy_init == 1) {
+ MUTEX_DESTROY(&raudiofr.fr_lock);
+ raudio_proxy_init = 0;
+ }
+}
+
+
+/*
+ * Setup for a new proxy to handle Real Audio.
+ */
+int ippr_raudio_new(fin, aps, nat)
+fr_info_t *fin;
+ap_session_t *aps;
+nat_t *nat;
+{
+ raudio_t *rap;
+
+ KMALLOCS(aps->aps_data, void *, sizeof(raudio_t));
+ if (aps->aps_data == NULL)
+ return -1;
+
+ fin = fin; /* LINT */
+ nat = nat; /* LINT */
+
+ bzero(aps->aps_data, sizeof(raudio_t));
+ rap = aps->aps_data;
+ aps->aps_psiz = sizeof(raudio_t);
+ rap->rap_mode = RAP_M_TCP; /* default is for TCP */
+ return 0;
+}
+
+
+
+int ippr_raudio_out(fin, aps, nat)
+fr_info_t *fin;
+ap_session_t *aps;
+nat_t *nat;
+{
+ raudio_t *rap = aps->aps_data;
+ unsigned char membuf[512 + 1], *s;
+ u_short id = 0;
+ tcphdr_t *tcp;
+ int off, dlen;
+ int len = 0;
+ mb_t *m;
+
+ nat = nat; /* LINT */
+
+ /*
+ * If we've already processed the start messages, then nothing left
+ * for the proxy to do.
+ */
+ if (rap->rap_eos == 1)
+ return 0;
+
+ m = fin->fin_m;
+ tcp = (tcphdr_t *)fin->fin_dp;
+ off = (char *)tcp - (char *)fin->fin_ip;
+ off += (TCP_OFF(tcp) << 2) + fin->fin_ipoff;
+
+#ifdef __sgi
+ dlen = fin->fin_plen - off;
+#else
+ dlen = MSGDSIZE(m) - off;
+#endif
+ if (dlen <= 0)
+ return 0;
+
+ if (dlen > sizeof(membuf))
+ dlen = sizeof(membuf);
+
+ bzero((char *)membuf, sizeof(membuf));
+ COPYDATA(m, off, dlen, (char *)membuf);
+ /*
+ * In all the startup parsing, ensure that we don't go outside
+ * the packet buffer boundary.
+ */
+ /*
+ * Look for the start of connection "PNA" string if not seen yet.
+ */
+ if (rap->rap_seenpna == 0) {
+ s = (u_char *)memstr("PNA", (char *)membuf, 3, dlen);
+ if (s == NULL)
+ return 0;
+ s += 3;
+ rap->rap_seenpna = 1;
+ } else
+ s = membuf;
+
+ /*
+ * Directly after the PNA will be the version number of this
+ * connection.
+ */
+ if (rap->rap_seenpna == 1 && rap->rap_seenver == 0) {
+ if ((s + 1) - membuf < dlen) {
+ rap->rap_version = (*s << 8) | *(s + 1);
+ s += 2;
+ rap->rap_seenver = 1;
+ } else
+ return 0;
+ }
+
+ /*
+ * Now that we've been past the PNA and version number, we're into the
+ * startup messages block. This ends when a message with an ID of 0.
+ */
+ while ((rap->rap_eos == 0) && ((s + 1) - membuf < dlen)) {
+ if (rap->rap_gotid == 0) {
+ id = (*s << 8) | *(s + 1);
+ s += 2;
+ rap->rap_gotid = 1;
+ if (id == RA_ID_END) {
+ rap->rap_eos = 1;
+ break;
+ }
+ } else if (rap->rap_gotlen == 0) {
+ len = (*s << 8) | *(s + 1);
+ s += 2;
+ rap->rap_gotlen = 1;
+ }
+
+ if (rap->rap_gotid == 1 && rap->rap_gotlen == 1) {
+ if (id == RA_ID_UDP) {
+ rap->rap_mode &= ~RAP_M_TCP;
+ rap->rap_mode |= RAP_M_UDP;
+ rap->rap_plport = (*s << 8) | *(s + 1);
+ } else if (id == RA_ID_ROBUST) {
+ rap->rap_mode |= RAP_M_ROBUST;
+ rap->rap_prport = (*s << 8) | *(s + 1);
+ }
+ s += len;
+ rap->rap_gotlen = 0;
+ rap->rap_gotid = 0;
+ }
+ }
+ return 0;
+}
+
+
+int ippr_raudio_in(fin, aps, nat)
+fr_info_t *fin;
+ap_session_t *aps;
+nat_t *nat;
+{
+ unsigned char membuf[IPF_MAXPORTLEN + 1], *s;
+ tcphdr_t *tcp, tcph, *tcp2 = &tcph;
+ raudio_t *rap = aps->aps_data;
+ struct in_addr swa, swb;
+ int off, dlen, slen;
+ int a1, a2, a3, a4;
+ u_short sp, dp;
+ fr_info_t fi;
+ tcp_seq seq;
+ nat_t *nat2;
+ u_char swp;
+ ip_t *ip;
+ mb_t *m;
+
+ /*
+ * Wait until we've seen the end of the start messages and even then
+ * only proceed further if we're using UDP. If they want to use TCP
+ * then data is sent back on the same channel that is already open.
+ */
+ if (rap->rap_sdone != 0)
+ return 0;
+
+ m = fin->fin_m;
+ tcp = (tcphdr_t *)fin->fin_dp;
+ off = (char *)tcp - (char *)fin->fin_ip;
+ off += (TCP_OFF(tcp) << 2) + fin->fin_ipoff;
+
+#ifdef __sgi
+ dlen = fin->fin_plen - off;
+#else
+ dlen = MSGDSIZE(m) - off;
+#endif
+ if (dlen <= 0)
+ return 0;
+
+ if (dlen > sizeof(membuf))
+ dlen = sizeof(membuf);
+
+ bzero((char *)membuf, sizeof(membuf));
+ COPYDATA(m, off, dlen, (char *)membuf);
+
+ seq = ntohl(tcp->th_seq);
+ /*
+ * Check to see if the data in this packet is of interest to us.
+ * We only care for the first 19 bytes coming back from the server.
+ */
+ if (rap->rap_sseq == 0) {
+ s = (u_char *)memstr("PNA", (char *)membuf, 3, dlen);
+ if (s == NULL)
+ return 0;
+ a1 = s - membuf;
+ dlen -= a1;
+ a1 = 0;
+ rap->rap_sseq = seq;
+ a2 = MIN(dlen, sizeof(rap->rap_svr));
+ } else if (seq <= rap->rap_sseq + sizeof(rap->rap_svr)) {
+ /*
+ * seq # which is the start of data and from that the offset
+ * into the buffer array.
+ */
+ a1 = seq - rap->rap_sseq;
+ a2 = MIN(dlen, sizeof(rap->rap_svr));
+ a2 -= a1;
+ s = membuf;
+ } else
+ return 0;
+
+ for (a3 = a1, a4 = a2; (a4 > 0) && (a3 < 19) && (a3 >= 0); a4--,a3++) {
+ rap->rap_sbf |= (1 << a3);
+ rap->rap_svr[a3] = *s++;
+ }
+
+ if ((rap->rap_sbf != 0x7ffff) || (!rap->rap_eos)) /* 19 bits */
+ return 0;
+ rap->rap_sdone = 1;
+
+ s = (u_char *)rap->rap_svr + 11;
+ if (((*s << 8) | *(s + 1)) == RA_ID_ROBUST) {
+ s += 2;
+ rap->rap_srport = (*s << 8) | *(s + 1);
+ }
+
+ ip = fin->fin_ip;
+ swp = ip->ip_p;
+ swa = ip->ip_src;
+ swb = ip->ip_dst;
+
+ ip->ip_p = IPPROTO_UDP;
+ ip->ip_src = nat->nat_inip;
+ ip->ip_dst = nat->nat_oip;
+
+ bcopy((char *)fin, (char *)&fi, sizeof(fi));
+ bzero((char *)tcp2, sizeof(*tcp2));
+ TCP_OFF_A(tcp2, 5);
+ fi.fin_state = NULL;
+ fi.fin_nat = NULL;
+ fi.fin_flx |= FI_IGNORE;
+ fi.fin_dp = (char *)tcp2;
+ fi.fin_fr = &raudiofr;
+ fi.fin_dlen = sizeof(*tcp2);
+ fi.fin_plen = fi.fin_hlen + sizeof(*tcp2);
+ tcp2->th_win = htons(8192);
+ slen = ip->ip_len;
+ ip->ip_len = fin->fin_hlen + sizeof(*tcp);
+
+ if (((rap->rap_mode & RAP_M_UDP_ROBUST) == RAP_M_UDP_ROBUST) &&
+ (rap->rap_srport != 0)) {
+ dp = rap->rap_srport;
+ sp = rap->rap_prport;
+ tcp2->th_sport = htons(sp);
+ tcp2->th_dport = htons(dp);
+ fi.fin_data[0] = dp;
+ fi.fin_data[1] = sp;
+ fi.fin_out = 0;
+ nat2 = nat_new(&fi, nat->nat_ptr, NULL,
+ NAT_SLAVE|IPN_UDP | (sp ? 0 : SI_W_SPORT),
+ NAT_OUTBOUND);
+ if (nat2 != NULL) {
+ (void) nat_proto(&fi, nat2, IPN_UDP);
+ nat_update(&fi, nat2, nat2->nat_ptr);
+
+ (void) fr_addstate(&fi, NULL, (sp ? 0 : SI_W_SPORT));
+ if (fi.fin_state != NULL)
+ fr_statederef(&fi, (ipstate_t **)&fi.fin_state);
+ }
+ }
+
+ if ((rap->rap_mode & RAP_M_UDP) == RAP_M_UDP) {
+ sp = rap->rap_plport;
+ tcp2->th_sport = htons(sp);
+ tcp2->th_dport = 0; /* XXX - don't specify remote port */
+ fi.fin_data[0] = sp;
+ fi.fin_data[1] = 0;
+ fi.fin_out = 1;
+ nat2 = nat_new(&fi, nat->nat_ptr, NULL,
+ NAT_SLAVE|IPN_UDP|SI_W_DPORT,
+ NAT_OUTBOUND);
+ if (nat2 != NULL) {
+ (void) nat_proto(&fi, nat2, IPN_UDP);
+ nat_update(&fi, nat2, nat2->nat_ptr);
+
+ (void) fr_addstate(&fi, NULL, SI_W_DPORT);
+ if (fi.fin_state != NULL)
+ fr_statederef(&fi, (ipstate_t **)&fi.fin_state);
+ }
+ }
+
+ ip->ip_p = swp;
+ ip->ip_len = slen;
+ ip->ip_src = swa;
+ ip->ip_dst = swb;
+ return 0;
+}
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_rcmd_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_rcmd_pxy.c
new file mode 100644
index 0000000000..919c47cb90
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_rcmd_pxy.c
@@ -0,0 +1,239 @@
+/*
+ * Copyright (C) 1998-2003 by Darren Reed
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * $Id: ip_rcmd_pxy.c,v 1.41.2.4 2005/02/04 10:22:55 darrenr Exp $
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Simple RCMD transparent proxy for in-kernel use. For use with the NAT
+ * code.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#define IPF_RCMD_PROXY
+
+
+int ippr_rcmd_init __P((void));
+void ippr_rcmd_fini __P((void));
+int ippr_rcmd_new __P((fr_info_t *, ap_session_t *, nat_t *));
+int ippr_rcmd_out __P((fr_info_t *, ap_session_t *, nat_t *));
+int ippr_rcmd_in __P((fr_info_t *, ap_session_t *, nat_t *));
+u_short ipf_rcmd_atoi __P((char *));
+int ippr_rcmd_portmsg __P((fr_info_t *, ap_session_t *, nat_t *));
+
+static frentry_t rcmdfr;
+
+int rcmd_proxy_init = 0;
+
+
+/*
+ * RCMD application proxy initialization.
+ */
+int ippr_rcmd_init()
+{
+ bzero((char *)&rcmdfr, sizeof(rcmdfr));
+ rcmdfr.fr_ref = 1;
+ rcmdfr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE;
+ MUTEX_INIT(&rcmdfr.fr_lock, "RCMD proxy rule lock");
+ rcmd_proxy_init = 1;
+
+ return 0;
+}
+
+
+void ippr_rcmd_fini()
+{
+ if (rcmd_proxy_init == 1) {
+ MUTEX_DESTROY(&rcmdfr.fr_lock);
+ rcmd_proxy_init = 0;
+ }
+}
+
+
+/*
+ * Setup for a new RCMD proxy.
+ */
+int ippr_rcmd_new(fin, aps, nat)
+fr_info_t *fin;
+ap_session_t *aps;
+nat_t *nat;
+{
+ tcphdr_t *tcp = (tcphdr_t *)fin->fin_dp;
+
+ fin = fin; /* LINT */
+ nat = nat; /* LINT */
+
+ aps->aps_psiz = sizeof(u_32_t);
+ KMALLOCS(aps->aps_data, u_32_t *, sizeof(u_32_t));
+ if (aps->aps_data == NULL) {
+#ifdef IP_RCMD_PROXY_DEBUG
+ printf("ippr_rcmd_new:KMALLOCS(%d) failed\n", sizeof(u_32_t));
+#endif
+ return -1;
+ }
+ *(u_32_t *)aps->aps_data = 0;
+ aps->aps_sport = tcp->th_sport;
+ aps->aps_dport = tcp->th_dport;
+ return 0;
+}
+
+
+/*
+ * ipf_rcmd_atoi - implement a simple version of atoi
+ */
+u_short ipf_rcmd_atoi(ptr)
+char *ptr;
+{
+ register char *s = ptr, c;
+ register u_short i = 0;
+
+ while (((c = *s++) != '\0') && ISDIGIT(c)) {
+ i *= 10;
+ i += c - '0';
+ }
+ return i;
+}
+
+
+int ippr_rcmd_portmsg(fin, aps, nat)
+fr_info_t *fin;
+ap_session_t *aps;
+nat_t *nat;
+{
+ tcphdr_t *tcp, tcph, *tcp2 = &tcph;
+ struct in_addr swip, swip2;
+ int off, dlen, nflags;
+ char portbuf[8], *s;
+ fr_info_t fi;
+ u_short sp;
+ nat_t *nat2;
+ ip_t *ip;
+ mb_t *m;
+
+ tcp = (tcphdr_t *)fin->fin_dp;
+
+ if (tcp->th_flags & TH_SYN) {
+ *(u_32_t *)aps->aps_data = htonl(ntohl(tcp->th_seq) + 1);
+ return 0;
+ }
+
+ if ((*(u_32_t *)aps->aps_data != 0) &&
+ (tcp->th_seq != *(u_32_t *)aps->aps_data))
+ return 0;
+
+ m = fin->fin_m;
+ ip = fin->fin_ip;
+ off = (char *)tcp - (char *)ip + (TCP_OFF(tcp) << 2) + fin->fin_ipoff;
+
+#ifdef __sgi
+ dlen = fin->fin_plen - off;
+#else
+ dlen = MSGDSIZE(m) - off;
+#endif
+ if (dlen <= 0)
+ return 0;
+
+ bzero(portbuf, sizeof(portbuf));
+ COPYDATA(m, off, MIN(sizeof(portbuf), dlen), portbuf);
+
+ portbuf[sizeof(portbuf) - 1] = '\0';
+ s = portbuf;
+ sp = ipf_rcmd_atoi(s);
+ if (sp == 0) {
+#ifdef IP_RCMD_PROXY_DEBUG
+ printf("ippr_rcmd_portmsg:sp == 0 dlen %d [%s]\n",
+ dlen, portbuf);
+#endif
+ return 0;
+ }
+
+ /*
+ * Add skeleton NAT entry for connection which will come back the
+ * other way.
+ */
+ bcopy((char *)fin, (char *)&fi, sizeof(fi));
+ fi.fin_flx |= FI_IGNORE;
+ fi.fin_data[0] = sp;
+ fi.fin_data[1] = 0;
+ if (nat->nat_dir == NAT_OUTBOUND)
+ nat2 = nat_outlookup(&fi, NAT_SEARCH|IPN_TCP, nat->nat_p,
+ nat->nat_inip, nat->nat_oip);
+ else
+ nat2 = nat_inlookup(&fi, NAT_SEARCH|IPN_TCP, nat->nat_p,
+ nat->nat_inip, nat->nat_oip);
+ if (nat2 == NULL) {
+ int slen;
+
+ slen = ip->ip_len;
+ ip->ip_len = fin->fin_hlen + sizeof(*tcp);
+ bzero((char *)tcp2, sizeof(*tcp2));
+ tcp2->th_win = htons(8192);
+ tcp2->th_sport = htons(sp);
+ tcp2->th_dport = 0; /* XXX - don't specify remote port */
+ TCP_OFF_A(tcp2, 5);
+ tcp2->th_flags = TH_SYN;
+ fi.fin_dp = (char *)tcp2;
+ fi.fin_fr = &rcmdfr;
+ fi.fin_dlen = sizeof(*tcp2);
+ fi.fin_plen = fi.fin_hlen + sizeof(*tcp2);
+ fi.fin_flx &= FI_LOWTTL|FI_FRAG|FI_TCPUDP|FI_OPTIONS|FI_IGNORE;
+ nflags = NAT_SLAVE|IPN_TCP|SI_W_DPORT;
+
+ swip = ip->ip_src;
+ swip2 = ip->ip_dst;
+
+ if (nat->nat_dir == NAT_OUTBOUND) {
+ fi.fin_fi.fi_saddr = nat->nat_inip.s_addr;
+ ip->ip_src = nat->nat_inip;
+ } else {
+ fi.fin_fi.fi_saddr = nat->nat_oip.s_addr;
+ ip->ip_src = nat->nat_oip;
+ nflags |= NAT_NOTRULEPORT;
+ }
+
+ nat2 = nat_new(&fi, nat->nat_ptr, NULL, nflags, nat->nat_dir);
+
+ if (nat2 != NULL) {
+ (void) nat_proto(&fi, nat2, IPN_TCP);
+ nat_update(&fi, nat2, nat2->nat_ptr);
+ fi.fin_ifp = NULL;
+ if (nat->nat_dir == NAT_INBOUND) {
+ fi.fin_fi.fi_daddr = nat->nat_inip.s_addr;
+ ip->ip_dst = nat->nat_inip;
+ }
+ (void) fr_addstate(&fi, &nat2->nat_state, SI_W_DPORT);
+ if (fi.fin_state != NULL)
+ fr_statederef(&fi, (ipstate_t **)&fi.fin_state);
+ }
+ ip->ip_len = slen;
+ ip->ip_src = swip;
+ ip->ip_dst = swip2;
+ }
+ return 0;
+}
+
+
+int ippr_rcmd_out(fin, aps, nat)
+fr_info_t *fin;
+ap_session_t *aps;
+nat_t *nat;
+{
+ if (nat->nat_dir == NAT_OUTBOUND)
+ return ippr_rcmd_portmsg(fin, aps, nat);
+ return 0;
+}
+
+
+int ippr_rcmd_in(fin, aps, nat)
+fr_info_t *fin;
+ap_session_t *aps;
+nat_t *nat;
+{
+ if (nat->nat_dir == NAT_INBOUND)
+ return ippr_rcmd_portmsg(fin, aps, nat);
+ return 0;
+}
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_rpcb_pxy.c b/usr/src/uts/common/inet/ipf/netinet/ip_rpcb_pxy.c
new file mode 100644
index 0000000000..f67c01a232
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_rpcb_pxy.c
@@ -0,0 +1,1452 @@
+/*
+ * Copyright (C) 2002-2003 by Ryan Beasley <ryanb@goddamnbastard.org>
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ */
+/*
+ * Overview:
+ * This is an in-kernel application proxy for Sun's RPCBIND (nee portmap)
+ * protocol as defined in RFC1833. It is far from complete, mostly
+ * lacking in less-likely corner cases, but it's definitely functional.
+ *
+ * Invocation:
+ * rdr <int> <e_ip>/32 port <e_p> -> <i_ip> port <i_p> udp proxy rpcbu
+ *
+ * If the host running IP Filter is the same as the RPC server, it's
+ * perfectly legal for both the internal and external addresses and ports
+ * to match.
+ *
+ * When triggered by appropriate IP NAT rules, this proxy works by
+ * examining data contained in received packets. Requests and replies are
+ * modified, NAT and state table entries created, etc., as necessary.
+ */
+/*
+ * TODO / NOTES
+ *
+ * o Must implement locking to protect proxy session data.
+ * o Fragmentation isn't supported.
+ * o Only supports UDP.
+ * o Doesn't support multiple RPC records in a single request.
+ * o Errors should be more fine-grained. (e.g., malloc failure vs.
+ * illegal RPCB request / reply)
+ * o Even with the limit on the total amount of recorded transactions,
+ * should there be a timeout on transaction removal?
+ * o There is a potential collision between cloning, wildcard NAT and
+ * state entries. There should be an appr_getport routine for
+ * to avoid this.
+ * o The enclosed hack of STREAMS support is pretty sick and most likely
+ * broken.
+ *
+ * $Id: ip_rpcb_pxy.c,v 2.25.2.3 2005/02/04 10:22:56 darrenr Exp $
+ */
+
+#define IPF_RPCB_PROXY
+
+/*
+ * Function prototypes
+ */
+int ippr_rpcb_init __P((void));
+void ippr_rpcb_fini __P((void));
+int ippr_rpcb_new __P((fr_info_t *, ap_session_t *, nat_t *));
+void ippr_rpcb_del __P((ap_session_t *));
+int ippr_rpcb_in __P((fr_info_t *, ap_session_t *, nat_t *));
+int ippr_rpcb_out __P((fr_info_t *, ap_session_t *, nat_t *));
+
+static void ippr_rpcb_flush __P((rpcb_session_t *));
+static int ippr_rpcb_decodereq __P((fr_info_t *, nat_t *,
+ rpcb_session_t *, rpc_msg_t *));
+static int ippr_rpcb_skipauth __P((rpc_msg_t *, xdr_auth_t *, u_32_t **));
+static int ippr_rpcb_insert __P((rpcb_session_t *, rpcb_xact_t *));
+static int ippr_rpcb_xdrrpcb __P((rpc_msg_t *, u_32_t *, rpcb_args_t *));
+static int ippr_rpcb_getuaddr __P((rpc_msg_t *, xdr_uaddr_t *,
+ u_32_t **));
+static u_int ippr_rpcb_atoi __P((char *));
+static int ippr_rpcb_modreq __P((fr_info_t *, nat_t *, rpc_msg_t *,
+ mb_t *, u_int));
+static int ippr_rpcb_decoderep __P((fr_info_t *, nat_t *,
+ rpcb_session_t *, rpc_msg_t *, rpcb_xact_t **));
+static rpcb_xact_t * ippr_rpcb_lookup __P((rpcb_session_t *, u_32_t));
+static void ippr_rpcb_deref __P((rpcb_session_t *, rpcb_xact_t *));
+static int ippr_rpcb_getproto __P((rpc_msg_t *, xdr_proto_t *,
+ u_32_t **));
+static int ippr_rpcb_getnat __P((fr_info_t *, nat_t *, u_int, u_int));
+static int ippr_rpcb_modv3 __P((fr_info_t *, nat_t *, rpc_msg_t *,
+ mb_t *, u_int));
+static int ippr_rpcb_modv4 __P((fr_info_t *, nat_t *, rpc_msg_t *,
+ mb_t *, u_int));
+static void ippr_rpcb_fixlen __P((fr_info_t *, int));
+
+/*
+ * Global variables
+ */
+static frentry_t rpcbfr; /* Skeleton rule for reference by entities
+ this proxy creates. */
+static int rpcbcnt; /* Upper bound of allocated RPCB sessions. */
+ /* XXX rpcbcnt still requires locking. */
+
+int rpcb_proxy_init = 0;
+
+
+/*
+ * Since rpc_msg contains only pointers, one should use this macro as a
+ * handy way to get to the goods. (In case you're wondering about the name,
+ * this started as BYTEREF -> BREF -> B.)
+ */
+#define B(r) (u_32_t)ntohl(*(r))
+
+/*
+ * Public subroutines
+ */
+
+/* -------------------------------------------------------------------- */
+/* Function: ippr_rpcb_init */
+/* Returns: int - 0 == success */
+/* Parameters: (void) */
+/* */
+/* Initialize the filter rule entry and session limiter. */
+/* -------------------------------------------------------------------- */
+int
+ippr_rpcb_init()
+{
+ rpcbcnt = 0;
+
+ bzero((char *)&rpcbfr, sizeof(rpcbfr));
+ rpcbfr.fr_ref = 1;
+ rpcbfr.fr_flags = FR_PASS|FR_QUICK|FR_KEEPSTATE;
+ MUTEX_INIT(&rpcbfr.fr_lock, "ipf Sun RPCB proxy rule lock");
+ rpcb_proxy_init = 1;
+
+ return(0);
+}
+
+/* -------------------------------------------------------------------- */
+/* Function: ippr_rpcb_fini */
+/* Returns: void */
+/* Parameters: (void) */
+/* */
+/* Destroy rpcbfr's mutex to avoid a lock leak. */
+/* -------------------------------------------------------------------- */
+void
+ippr_rpcb_fini()
+{
+ if (rpcb_proxy_init == 1) {
+ MUTEX_DESTROY(&rpcbfr.fr_lock);
+ rpcb_proxy_init = 0;
+ }
+}
+
+/* -------------------------------------------------------------------- */
+/* Function: ippr_rpcb_new */
+/* Returns: int - -1 == failure, 0 == success */
+/* Parameters: fin(I) - pointer to packet information */
+/* aps(I) - pointer to proxy session structure */
+/* nat(I) - pointer to NAT session structure */
+/* */
+/* Allocate resources for per-session proxy structures. */
+/* -------------------------------------------------------------------- */
+int
+ippr_rpcb_new(fin, aps, nat)
+ fr_info_t *fin;
+ ap_session_t *aps;
+ nat_t *nat;
+{
+ rpcb_session_t *rs;
+
+ fin = fin; /* LINT */
+ nat = nat; /* LINT */
+
+ KMALLOC(rs, rpcb_session_t *);
+ if (rs == NULL)
+ return(-1);
+
+ bzero((char *)rs, sizeof(*rs));
+ MUTEX_INIT(&rs->rs_rxlock, "ipf Sun RPCB proxy session lock");
+
+ aps->aps_data = rs;
+
+ return(0);
+}
+
+/* -------------------------------------------------------------------- */
+/* Function: ippr_rpcb_del */
+/* Returns: void */
+/* Parameters: aps(I) - pointer to proxy session structure */
+/* */
+/* Free up a session's list of RPCB requests. */
+/* -------------------------------------------------------------------- */
+void
+ippr_rpcb_del(aps)
+ ap_session_t *aps;
+{
+ rpcb_session_t *rs;
+ rs = (rpcb_session_t *)aps->aps_data;
+
+ MUTEX_ENTER(&rs->rs_rxlock);
+ ippr_rpcb_flush(rs);
+ MUTEX_EXIT(&rs->rs_rxlock);
+ MUTEX_DESTROY(&rs->rs_rxlock);
+}
+
+/* -------------------------------------------------------------------- */
+/* Function: ippr_rpcb_in */
+/* Returns: int - APR_ERR(1) == drop the packet, */
+/* APR_ERR(2) == kill the proxy session, */
+/* else change in packet length (in bytes) */
+/* Parameters: fin(I) - pointer to packet information */
+/* ip(I) - pointer to packet header */
+/* aps(I) - pointer to proxy session structure */
+/* nat(I) - pointer to NAT session structure */
+/* */
+/* Given a presumed RPCB request, perform some minor tests and pass off */
+/* for decoding. Also pass packet off for a rewrite if necessary. */
+/* -------------------------------------------------------------------- */
+int
+ippr_rpcb_in(fin, aps, nat)
+ fr_info_t *fin;
+ ap_session_t *aps;
+ nat_t *nat;
+{
+ rpc_msg_t rpcmsg, *rm;
+ rpcb_session_t *rs;
+ u_int off, dlen;
+ mb_t *m;
+ int rv;
+
+ /* Disallow fragmented or illegally short packets. */
+ if ((fin->fin_flx & (FI_FRAG|FI_SHORT)) != 0)
+ return(APR_ERR(1));
+
+ /* Perform basic variable initialization. */
+ rs = (rpcb_session_t *)aps->aps_data;
+
+ m = fin->fin_m;
+ off = (char *)fin->fin_dp - (char *)fin->fin_ip;
+ off += sizeof(udphdr_t) + fin->fin_ipoff;
+ dlen = fin->fin_dlen - sizeof(udphdr_t);
+
+ /* Disallow packets outside legal range for supported requests. */
+ if ((dlen < RPCB_REQMIN) || (dlen > RPCB_REQMAX))
+ return(APR_ERR(1));
+
+ /* Copy packet over to convenience buffer. */
+ rm = &rpcmsg;
+ bzero((char *)rm, sizeof(*rm));
+ COPYDATA(m, off, dlen, (caddr_t)&rm->rm_msgbuf);
+ rm->rm_buflen = dlen;
+
+ /* Send off to decode request. */
+ rv = ippr_rpcb_decodereq(fin, nat, rs, rm);
+
+ switch(rv)
+ {
+ case -1:
+ return(APR_ERR(1));
+ case 0:
+ break;
+ case 1:
+ rv = ippr_rpcb_modreq(fin, nat, rm, m, off);
+ break;
+ default:
+ /*CONSTANTCONDITION*/
+ IPF_PANIC(1, ("illegal rv %d (ippr_rpcb_req)", rv));
+ }
+
+ return(rv);
+}
+
+/* -------------------------------------------------------------------- */
+/* Function: ippr_rpcb_out */
+/* Returns: int - APR_ERR(1) == drop the packet, */
+/* APR_ERR(2) == kill the proxy session, */
+/* else change in packet length (in bytes) */
+/* Parameters: fin(I) - pointer to packet information */
+/* ip(I) - pointer to packet header */
+/* aps(I) - pointer to proxy session structure */
+/* nat(I) - pointer to NAT session structure */
+/* */
+/* Given a presumed RPCB reply, perform some minor tests and pass off */
+/* for decoding. If the message indicates a successful request with */
+/* valid addressing information, create NAT and state structures to */
+/* allow direct communication between RPC client and server. */
+/* -------------------------------------------------------------------- */
+int
+ippr_rpcb_out(fin, aps, nat)
+ fr_info_t *fin;
+ ap_session_t *aps;
+ nat_t *nat;
+{
+ rpc_msg_t rpcmsg, *rm;
+ rpcb_session_t *rs;
+ rpcb_xact_t *rx;
+ u_int off, dlen;
+ int rv, diff;
+ mb_t *m;
+
+ /* Disallow fragmented or illegally short packets. */
+ if ((fin->fin_flx & (FI_FRAG|FI_SHORT)) != 0)
+ return(APR_ERR(1));
+
+ /* Perform basic variable initialization. */
+ rs = (rpcb_session_t *)aps->aps_data;
+
+ m = fin->fin_m;
+ off = (char *)fin->fin_dp - (char *)fin->fin_ip;
+ off += sizeof(udphdr_t) + fin->fin_ipoff;
+ dlen = fin->fin_dlen - sizeof(udphdr_t);
+ diff = 0;
+
+ /* Disallow packets outside legal range for supported requests. */
+ if ((dlen < RPCB_REPMIN) || (dlen > RPCB_REPMAX))
+ return(APR_ERR(1));
+
+ /* Copy packet over to convenience buffer. */
+ rm = &rpcmsg;
+ bzero((char *)rm, sizeof(*rm));
+ COPYDATA(m, off, dlen, (caddr_t)&rm->rm_msgbuf);
+ rm->rm_buflen = dlen;
+
+ /* Send off to decode reply. */
+ rv = ippr_rpcb_decoderep(fin, nat, rs, rm, &rx);
+
+ switch(rv)
+ {
+ case -1: /* Bad packet */
+ if (rx != NULL) {
+ MUTEX_ENTER(&rs->rs_rxlock);
+ ippr_rpcb_deref(rs, rx);
+ MUTEX_EXIT(&rs->rs_rxlock);
+ }
+ return(APR_ERR(1));
+ case 0: /* Negative reply / request rejected */
+ break;
+ case 1: /* Positive reply */
+ /*
+ * With the IP address embedded in a GETADDR(LIST) reply,
+ * we'll need to rewrite the packet in the very possible
+ * event that the internal & external addresses aren't the
+ * same. (i.e., this box is either a router or rpcbind
+ * only listens on loopback.)
+ */
+ if (nat->nat_inip.s_addr != nat->nat_outip.s_addr) {
+ if (rx->rx_type == RPCB_RES_STRING)
+ diff = ippr_rpcb_modv3(fin, nat, rm, m, off);
+ else if (rx->rx_type == RPCB_RES_LIST)
+ diff = ippr_rpcb_modv4(fin, nat, rm, m, off);
+ }
+ break;
+ default:
+ /*CONSTANTCONDITION*/
+ IPF_PANIC(1, ("illegal rv %d (ippr_rpcb_decoderep)", rv));
+ }
+
+ if (rx != NULL) {
+ MUTEX_ENTER(&rs->rs_rxlock);
+ /* XXX Gross hack - I'm overloading the reference
+ * counter to deal with both threads and retransmitted
+ * requests. One deref signals that this thread is
+ * finished with rx, and the other signals that we've
+ * processed its reply.
+ */
+ ippr_rpcb_deref(rs, rx);
+ ippr_rpcb_deref(rs, rx);
+ MUTEX_EXIT(&rs->rs_rxlock);
+ }
+
+ return(diff);
+}
+
+/*
+ * Private support subroutines
+ */
+
+/* -------------------------------------------------------------------- */
+/* Function: ippr_rpcb_flush */
+/* Returns: void */
+/* Parameters: rs(I) - pointer to RPCB session structure */
+/* */
+/* Simply flushes the list of outstanding transactions, if any. */
+/* -------------------------------------------------------------------- */
+static void
+ippr_rpcb_flush(rs)
+ rpcb_session_t *rs;
+{
+ rpcb_xact_t *r1, *r2;
+
+ r1 = rs->rs_rxlist;
+ if (r1 == NULL)
+ return;
+
+ while (r1 != NULL) {
+ r2 = r1;
+ r1 = r1->rx_next;
+ KFREE(r2);
+ }
+}
+
+/* -------------------------------------------------------------------- */
+/* Function: ippr_rpcb_decodereq */
+/* Returns: int - -1 == bad request or critical failure, */
+/* 0 == request successfully decoded, */
+/* 1 == request successfully decoded; requires */
+/* address rewrite/modification */
+/* Parameters: fin(I) - pointer to packet information */
+/* nat(I) - pointer to NAT session structure */
+/* rs(I) - pointer to RPCB session structure */
+/* rm(I) - pointer to RPC message structure */
+/* */
+/* Take a presumed RPCB request, decode it, and store the results in */
+/* the transaction list. If the internal target address needs to be */
+/* modified, store its location in ptr. */
+/* WARNING: It's the responsibility of the caller to make sure there */
+/* is enough room in rs_buf for the basic RPC message "preamble". */
+/* -------------------------------------------------------------------- */
+static int
+ippr_rpcb_decodereq(fin, nat, rs, rm)
+ fr_info_t *fin;
+ nat_t *nat;
+ rpcb_session_t *rs;
+ rpc_msg_t *rm;
+{
+ rpcb_args_t *ra;
+ u_32_t xdr, *p;
+ rpc_call_t *rc;
+ rpcb_xact_t rx;
+ int mod;
+
+ p = (u_32_t *)rm->rm_msgbuf;
+ mod = 0;
+
+ bzero((char *)&rx, sizeof(rx));
+ rc = &rm->rm_call;
+
+ rm->rm_xid = p;
+ rx.rx_xid = B(p++); /* Record this message's XID. */
+
+ /* Parse out and test the RPC header. */
+ if ((B(p++) != RPCB_CALL) ||
+ (B(p++) != RPCB_MSG_VERSION) ||
+ (B(p++) != RPCB_PROG))
+ return(-1);
+
+ /* Record the RPCB version and procedure. */
+ rc->rc_vers = p++;
+ rc->rc_proc = p++;
+
+ /* Bypass RPC authentication stuff. */
+ if (ippr_rpcb_skipauth(rm, &rc->rc_authcred, &p) != 0)
+ return(-1);
+ if (ippr_rpcb_skipauth(rm, &rc->rc_authverf, &p) != 0)
+ return(-1);
+
+ /* Compare RPCB version and procedure numbers. */
+ switch(B(rc->rc_vers))
+ {
+ case 2:
+ /* This proxy only supports PMAP_GETPORT. */
+ if (B(rc->rc_proc) != RPCB_GETPORT)
+ return(-1);
+
+ /* Portmap requests contain four 4 byte parameters. */
+ if (RPCB_BUF_EQ(rm, p, 16) == 0)
+ return(-1);
+
+ p += 2; /* Skip requested program and version numbers. */
+
+ /* Sanity check the requested protocol. */
+ xdr = B(p);
+ if (!(xdr == IPPROTO_UDP || xdr == IPPROTO_TCP))
+ return(-1);
+
+ rx.rx_type = RPCB_RES_PMAP;
+ rx.rx_proto = xdr;
+ break;
+ case 3:
+ case 4:
+ /* GETADDRLIST is exclusive to v4; GETADDR for v3 & v4 */
+ switch(B(rc->rc_proc))
+ {
+ case RPCB_GETADDR:
+ rx.rx_type = RPCB_RES_STRING;
+ rx.rx_proto = (u_int)fin->fin_p;
+ break;
+ case RPCB_GETADDRLIST:
+ if (B(rc->rc_vers) != 4)
+ return(-1);
+ rx.rx_type = RPCB_RES_LIST;
+ break;
+ default:
+ return(-1);
+ }
+
+ ra = &rc->rc_rpcbargs;
+
+ /* Decode the 'struct rpcb' request. */
+ if (ippr_rpcb_xdrrpcb(rm, p, ra) != 0)
+ return(-1);
+
+ /* Are the target address & port valid? */
+ if ((ra->ra_maddr.xu_ip != nat->nat_outip.s_addr) ||
+ (ra->ra_maddr.xu_port != nat->nat_outport))
+ return(-1);
+
+ /* Do we need to rewrite this packet? */
+ if ((nat->nat_outip.s_addr != nat->nat_inip.s_addr) ||
+ (nat->nat_outport != nat->nat_inport))
+ mod = 1;
+ break;
+ default:
+ return(-1);
+ }
+
+ MUTEX_ENTER(&rs->rs_rxlock);
+ if (ippr_rpcb_insert(rs, &rx) != 0) {
+ MUTEX_EXIT(&rs->rs_rxlock);
+ return(-1);
+ }
+ MUTEX_EXIT(&rs->rs_rxlock);
+
+ return(mod);
+}
+
+/* -------------------------------------------------------------------- */
+/* Function: ippr_rpcb_skipauth */
+/* Returns: int -- -1 == illegal auth parameters (lengths) */
+/* 0 == valid parameters, pointer advanced */
+/* Parameters: rm(I) - pointer to RPC message structure */
+/* auth(I) - pointer to RPC auth structure */
+/* buf(IO) - pointer to location within convenience buffer */
+/* */
+/* Record auth data length & location of auth data, then advance past */
+/* it. */
+/* -------------------------------------------------------------------- */
+static int
+ippr_rpcb_skipauth(rm, auth, buf)
+ rpc_msg_t *rm;
+ xdr_auth_t *auth;
+ u_32_t **buf;
+{
+ u_32_t *p, xdr;
+
+ p = *buf;
+
+ /* Make sure we have enough space for expected fixed auth parms. */
+ if (RPCB_BUF_GEQ(rm, p, 8) == 0)
+ return(-1);
+
+ p++; /* We don't care about auth_flavor. */
+
+ auth->xa_string.xs_len = p;
+ xdr = B(p++); /* Length of auth_data */
+
+ /* Test for absurdity / illegality of auth_data length. */
+ if ((XDRALIGN(xdr) < xdr) || (RPCB_BUF_GEQ(rm, p, XDRALIGN(xdr)) == 0))
+ return(-1);
+
+ auth->xa_string.xs_str = (char *)p;
+
+ p += XDRALIGN(xdr); /* Advance our location. */
+
+ *buf = (u_32_t *)p;
+
+ return(0);
+}
+
+/* -------------------------------------------------------------------- */
+/* Function: ippr_rpcb_insert */
+/* Returns: int -- -1 == list insertion failed, */
+/* 0 == item successfully added */
+/* Parameters: rs(I) - pointer to RPCB session structure */
+/* rx(I) - pointer to RPCB transaction structure */
+/* -------------------------------------------------------------------- */
+static int
+ippr_rpcb_insert(rs, rx)
+ rpcb_session_t *rs;
+ rpcb_xact_t *rx;
+{
+ rpcb_xact_t *rxp;
+
+ rxp = ippr_rpcb_lookup(rs, rx->rx_xid);
+ if (rxp != NULL) {
+ ++rxp->rx_ref;
+ return(0);
+ }
+
+ if (rpcbcnt == RPCB_MAXREQS)
+ return(-1);
+
+ KMALLOC(rxp, rpcb_xact_t *);
+ if (rxp == NULL)
+ return(-1);
+
+ bcopy((char *)rx, (char *)rxp, sizeof(*rx));
+
+ if (rs->rs_rxlist != NULL)
+ rs->rs_rxlist->rx_pnext = &rxp->rx_next;
+
+ rxp->rx_pnext = &rs->rs_rxlist;
+ rxp->rx_next = rs->rs_rxlist;
+ rs->rs_rxlist = rxp;
+
+ rxp->rx_ref = 1;
+
+ ++rpcbcnt;
+
+ return(0);
+}
+
+/* -------------------------------------------------------------------- */
+/* Function: ippr_rpcb_xdrrpcb */
+/* Returns: int -- -1 == failure to properly decode the request */
+/* 0 == rpcb successfully decoded */
+/* Parameters: rs(I) - pointer to RPCB session structure */
+/* p(I) - pointer to location within session buffer */
+/* rpcb(O) - pointer to rpcb (xdr type) structure */
+/* */
+/* Decode a XDR encoded rpcb structure and record its contents in rpcb */
+/* within only the context of TCP/UDP over IP networks. */
+/* -------------------------------------------------------------------- */
+static int
+ippr_rpcb_xdrrpcb(rm, p, ra)
+ rpc_msg_t *rm;
+ u_32_t *p;
+ rpcb_args_t *ra;
+{
+ if (!RPCB_BUF_GEQ(rm, p, 20))
+ return(-1);
+
+ /* Bypass target program & version. */
+ p += 2;
+
+ /* Decode r_netid. Must be "tcp" or "udp". */
+ if (ippr_rpcb_getproto(rm, &ra->ra_netid, &p) != 0)
+ return(-1);
+
+ /* Decode r_maddr. */
+ if (ippr_rpcb_getuaddr(rm, &ra->ra_maddr, &p) != 0)
+ return(-1);
+
+ /* Advance to r_owner and make sure it's empty. */
+ if (!RPCB_BUF_EQ(rm, p, 4) || (B(p) != 0))
+ return(-1);
+
+ return(0);
+}
+
+/* -------------------------------------------------------------------- */
+/* Function: ippr_rpcb_getuaddr */
+/* Returns: int -- -1 == illegal string, */
+/* 0 == string parsed; contents recorded */
+/* Parameters: rm(I) - pointer to RPC message structure */
+/* xu(I) - pointer to universal address structure */
+/* p(IO) - pointer to location within message buffer */
+/* */
+/* Decode the IP address / port at p and record them in xu. */
+/* -------------------------------------------------------------------- */
+static int
+ippr_rpcb_getuaddr(rm, xu, p)
+ rpc_msg_t *rm;
+ xdr_uaddr_t *xu;
+ u_32_t **p;
+{
+ char *c, *i, *b, *pp;
+ u_int d, dd, l, t;
+ char uastr[24];
+
+ /* Test for string length. */
+ if (!RPCB_BUF_GEQ(rm, *p, 4))
+ return(-1);
+
+ xu->xu_xslen = (*p)++;
+ xu->xu_xsstr = (char *)*p;
+
+ /* Length check */
+ l = B(xu->xu_xslen);
+ if (l < 11 || l > 23 || !RPCB_BUF_GEQ(rm, *p, XDRALIGN(l)))
+ return(-1);
+
+ /* Advance p */
+ *(char **)p += XDRALIGN(l);
+
+ /* Copy string to local buffer & terminate C style */
+ bcopy(xu->xu_xsstr, uastr, l);
+ uastr[l] = '\0';
+
+ i = (char *)&xu->xu_ip;
+ pp = (char *)&xu->xu_port;
+
+ /*
+ * Expected format: a.b.c.d.e.f where [a-d] correspond to bytes of
+ * an IP address and [ef] are the bytes of a L4 port.
+ */
+ if (!(ISDIGIT(uastr[0]) && ISDIGIT(uastr[l-1])))
+ return(-1);
+ b = uastr;
+ for (c = &uastr[1], d = 0, dd = 0; c < &uastr[l-1]; c++) {
+ if (ISDIGIT(*c)) {
+ dd = 0;
+ continue;
+ }
+ if (*c == '.') {
+ if (dd != 0)
+ return(-1);
+
+ /* Check for ASCII byte. */
+ *c = '\0';
+ t = ippr_rpcb_atoi(b);
+ if (t > 255)
+ return(-1);
+
+ /* Aim b at beginning of the next byte. */
+ b = c + 1;
+
+ /* Switch off IP addr vs port parsing. */
+ if (d < 4)
+ i[d++] = t & 0xff;
+ else
+ pp[d++ - 4] = t & 0xff;
+
+ dd = 1;
+ continue;
+ }
+ return(-1);
+ }
+ if (d != 5) /* String must contain exactly 5 periods. */
+ return(-1);
+
+ /* Handle the last byte (port low byte) */
+ t = ippr_rpcb_atoi(b);
+ if (t > 255)
+ return(-1);
+ pp[d - 4] = t & 0xff;
+
+ return(0);
+}
+
+/* -------------------------------------------------------------------- */
+/* Function: ippr_rpcb_atoi (XXX should be generic for all proxies) */
+/* Returns: int -- integer representation of supplied string */
+/* Parameters: ptr(I) - input string */
+/* */
+/* Simple version of atoi(3) ripped from ip_rcmd_pxy.c. */
+/* -------------------------------------------------------------------- */
+static u_int
+ippr_rpcb_atoi(ptr)
+ char *ptr;
+{
+ register char *s = ptr, c;
+ register u_int i = 0;
+
+ while (((c = *s++) != '\0') && ISDIGIT(c)) {
+ i *= 10;
+ i += c - '0';
+ }
+ return i;
+}
+
+/* -------------------------------------------------------------------- */
+/* Function: ippr_rpcb_modreq */
+/* Returns: int -- change in datagram length */
+/* APR_ERR(2) - critical failure */
+/* Parameters: fin(I) - pointer to packet information */
+/* nat(I) - pointer to NAT session */
+/* rm(I) - pointer to RPC message structure */
+/* m(I) - pointer to mbuf chain */
+/* off(I) - current offset within mbuf chain */
+/* */
+/* When external and internal addresses differ, we rewrite the former */
+/* with the latter. (This is exclusive to protocol versions 3 & 4). */
+/* -------------------------------------------------------------------- */
+static int
+ippr_rpcb_modreq(fin, nat, rm, m, off)
+ fr_info_t *fin;
+ nat_t *nat;
+ rpc_msg_t *rm;
+ mb_t *m;
+ u_int off;
+{
+ u_int len, xlen, pos, bogo;
+ rpcb_args_t *ra;
+ char uaddr[24];
+ udphdr_t *udp;
+ char *i, *p;
+ int diff;
+
+ ra = &rm->rm_call.rc_rpcbargs;
+ i = (char *)&nat->nat_inip.s_addr;
+ p = (char *)&nat->nat_inport;
+
+ /* Form new string. */
+ bzero(uaddr, sizeof(uaddr)); /* Just in case we need padding. */
+#if defined(SNPRINTF) && defined(_KERNEL)
+ (void) SNPRINTF(uaddr, sizeof(uaddr),
+#else
+ (void) sprintf(uaddr,
+#endif
+ "%u.%u.%u.%u.%u.%u", i[0] & 0xff, i[1] & 0xff,
+ i[2] & 0xff, i[3] & 0xff, p[0] & 0xff, p[1] & 0xff);
+ len = strlen(uaddr);
+ xlen = XDRALIGN(len);
+
+ /* Determine mbuf offset to start writing to. */
+ pos = (char *)ra->ra_maddr.xu_xslen - rm->rm_msgbuf;
+ off += pos;
+
+ /* Write new string length. */
+ bogo = htonl(len);
+ COPYBACK(m, off, 4, (caddr_t)&bogo);
+ off += 4;
+
+ /* Write new string. */
+ COPYBACK(m, off, xlen, uaddr);
+ off += xlen;
+
+ /* Write in zero r_owner. */
+ bogo = 0;
+ COPYBACK(m, off, 4, (caddr_t)&bogo);
+
+ /* Determine difference in data lengths. */
+ diff = xlen - XDRALIGN(B(ra->ra_maddr.xu_xslen));
+
+ /*
+ * If our new string has a different length, make necessary
+ * adjustments.
+ */
+ if (diff != 0) {
+ udp = fin->fin_dp;
+ udp->uh_ulen = htons(ntohs(udp->uh_ulen) + diff);
+ fin->fin_ip->ip_len += diff;
+ fin->fin_dlen += diff;
+ fin->fin_plen += diff;
+ /* XXX Storage lengths. */
+ }
+
+ return(diff);
+}
+
+/* -------------------------------------------------------------------- */
+/* Function: ippr_rpcb_decoderep */
+/* Returns: int - -1 == bad request or critical failure, */
+/* 0 == valid, negative reply */
+/* 1 == vaddlid, positive reply; needs no changes */
+/* Parameters: fin(I) - pointer to packet information */
+/* nat(I) - pointer to NAT session structure */
+/* rs(I) - pointer to RPCB session structure */
+/* rm(I) - pointer to RPC message structure */
+/* rxp(O) - pointer to RPCB transaction structure */
+/* */
+/* Take a presumed RPCB reply, extract the XID, search for the original */
+/* request information, and determine whether the request was accepted */
+/* or rejected. With a valid accepted reply, go ahead and create NAT */
+/* and state entries, and finish up by rewriting the packet as */
+/* required. */
+/* */
+/* WARNING: It's the responsibility of the caller to make sure there */
+/* is enough room in rs_buf for the basic RPC message "preamble". */
+/* -------------------------------------------------------------------- */
+static int
+ippr_rpcb_decoderep(fin, nat, rs, rm, rxp)
+ fr_info_t *fin;
+ nat_t *nat;
+ rpcb_session_t *rs;
+ rpc_msg_t *rm;
+ rpcb_xact_t **rxp;
+{
+ rpcb_listp_t *rl;
+ rpcb_entry_t *re;
+ rpcb_xact_t *rx;
+ u_32_t xdr, *p;
+ rpc_resp_t *rr;
+ int rv, cnt;
+
+ p = (u_32_t *)rm->rm_msgbuf;
+
+ bzero((char *)&rx, sizeof(rx));
+ rr = &rm->rm_resp;
+
+ rm->rm_xid = p;
+ xdr = B(p++); /* Record this message's XID. */
+
+ /* Lookup XID */
+ MUTEX_ENTER(&rs->rs_rxlock);
+ if ((rx = ippr_rpcb_lookup(rs, xdr)) == NULL) {
+ MUTEX_EXIT(&rs->rs_rxlock);
+ return(-1);
+ }
+ ++rx->rx_ref; /* per thread reference */
+ MUTEX_EXIT(&rs->rs_rxlock);
+
+ *rxp = rx;
+
+ /* Test call vs reply */
+ if (B(p++) != RPCB_REPLY)
+ return(-1);
+
+ /* Test reply_stat */
+ switch(B(p++))
+ {
+ case RPCB_MSG_DENIED:
+ return(0);
+ case RPCB_MSG_ACCEPTED:
+ break;
+ default:
+ return(-1);
+ }
+
+ /* Bypass RPC authentication stuff. */
+ if (ippr_rpcb_skipauth(rm, &rr->rr_authverf, &p) != 0)
+ return(-1);
+
+ /* Test accept status */
+ if (!RPCB_BUF_GEQ(rm, p, 4))
+ return(-1);
+ if (B(p++) != 0)
+ return(0);
+
+ /* Parse out the expected reply */
+ switch(rx->rx_type)
+ {
+ case RPCB_RES_PMAP:
+ /* There must be only one 4 byte argument. */
+ if (!RPCB_BUF_EQ(rm, p, 4))
+ return(-1);
+
+ rr->rr_v2 = p;
+ xdr = B(rr->rr_v2);
+
+ /* Reply w/ a 0 port indicates service isn't registered */
+ if (xdr == 0)
+ return(0);
+
+ /* Is the value sane? */
+ if (xdr > 65535)
+ return(-1);
+
+ /* Create NAT & state table entries. */
+ if (ippr_rpcb_getnat(fin, nat, rx->rx_proto, (u_int)xdr) != 0)
+ return(-1);
+ break;
+ case RPCB_RES_STRING:
+ /* Expecting a XDR string; need 4 bytes for length */
+ if (!RPCB_BUF_GEQ(rm, p, 4))
+ return(-1);
+
+ rr->rr_v3.xu_str.xs_len = p++;
+ rr->rr_v3.xu_str.xs_str = (char *)p;
+
+ xdr = B(rr->rr_v3.xu_xslen);
+
+ /* A null string indicates an unregistered service */
+ if ((xdr == 0) && RPCB_BUF_EQ(rm, p, 0))
+ return(0);
+
+ /* Decode the target IP address / port. */
+ if (ippr_rpcb_getuaddr(rm, &rr->rr_v3, &p) != 0)
+ return(-1);
+
+ /* Validate the IP address and port contained. */
+ if (nat->nat_inip.s_addr != rr->rr_v3.xu_ip)
+ return(-1);
+
+ /* Create NAT & state table entries. */
+ if (ippr_rpcb_getnat(fin, nat, rx->rx_proto,
+ (u_int)rr->rr_v3.xu_port) != 0)
+ return(-1);
+ break;
+ case RPCB_RES_LIST:
+ if (!RPCB_BUF_GEQ(rm, p, 4))
+ return(-1);
+ /* rpcb_entry_list_ptr */
+ switch(B(p))
+ {
+ case 0:
+ return(0);
+ case 1:
+ break;
+ default:
+ return(-1);
+ }
+ rl = &rr->rr_v4;
+ rl->rl_list = p++;
+ cnt = 0;
+
+ for(;;) {
+ re = &rl->rl_entries[rl->rl_cnt];
+ if (ippr_rpcb_getuaddr(rm, &re->re_maddr, &p) != 0)
+ return(-1);
+ if (ippr_rpcb_getproto(rm, &re->re_netid, &p) != 0)
+ return(-1);
+ /* re_semantics & re_pfamily length */
+ if (!RPCB_BUF_GEQ(rm, p, 12))
+ return(-1);
+ p++; /* Skipping re_semantics. */
+ xdr = B(p++);
+ if ((xdr != 4) || strncmp((char *)p, "inet", 4))
+ return(-1);
+ p++;
+ if (ippr_rpcb_getproto(rm, &re->re_proto, &p) != 0)
+ return(-1);
+ if (!RPCB_BUF_GEQ(rm, p, 4))
+ return(-1);
+ re->re_more = p;
+ if (B(re->re_more) > 1) /* 0,1 only legal values */
+ return(-1);
+ ++rl->rl_cnt;
+ ++cnt;
+ if (B(re->re_more) == 0)
+ break;
+ /* Replies in max out at 2; TCP and/or UDP */
+ if (cnt > 2)
+ return(-1);
+ p++;
+ }
+
+ for(rl->rl_cnt = 0; rl->rl_cnt < cnt; rl->rl_cnt++) {
+ re = &rl->rl_entries[rl->rl_cnt];
+ rv = ippr_rpcb_getnat(fin, nat,
+ re->re_proto.xp_proto,
+ (u_int)re->re_maddr.xu_port);
+ if (rv != 0)
+ return(-1);
+ }
+ break;
+ default:
+ /*CONSTANTCONDITION*/
+ IPF_PANIC(1, ("illegal rx_type %d", rx->rx_type));
+ }
+
+ return(1);
+}
+
+/* -------------------------------------------------------------------- */
+/* Function: ippr_rpcb_lookup */
+/* Returns: rpcb_xact_t * - NULL == no matching record, */
+/* else pointer to relevant entry */
+/* Parameters: rs(I) - pointer to RPCB session */
+/* xid(I) - XID to look for */
+/* -------------------------------------------------------------------- */
+static rpcb_xact_t *
+ippr_rpcb_lookup(rs, xid)
+ rpcb_session_t *rs;
+ u_32_t xid;
+{
+ rpcb_xact_t *rx;
+
+ if (rs->rs_rxlist == NULL)
+ return(NULL);
+
+ for (rx = rs->rs_rxlist; rx != NULL; rx = rx->rx_next)
+ if (rx->rx_xid == xid)
+ break;
+
+ return(rx);
+}
+
+/* -------------------------------------------------------------------- */
+/* Function: ippr_rpcb_deref */
+/* Returns: (void) */
+/* Parameters: rs(I) - pointer to RPCB session */
+/* rx(I) - pointer to RPC transaction struct to remove */
+/* force(I) - indicates to delete entry regardless of */
+/* reference count */
+/* Locking: rs->rs_rxlock must be held write only */
+/* */
+/* Free the RPCB transaction record rx from the chain of entries. */
+/* -------------------------------------------------------------------- */
+static void
+ippr_rpcb_deref(rs, rx)
+ rpcb_session_t *rs;
+ rpcb_xact_t *rx;
+{
+ rs = rs; /* LINT */
+
+ if (rx == NULL)
+ return;
+
+ if (--rx->rx_ref != 0)
+ return;
+
+ if (rx->rx_next != NULL)
+ rx->rx_next->rx_pnext = rx->rx_pnext;
+
+ *rx->rx_pnext = rx->rx_next;
+
+ KFREE(rx);
+
+ --rpcbcnt;
+}
+
+/* -------------------------------------------------------------------- */
+/* Function: ippr_rpcb_getproto */
+/* Returns: int - -1 == illegal protocol/netid, */
+/* 0 == legal protocol/netid */
+/* Parameters: rm(I) - pointer to RPC message structure */
+/* xp(I) - pointer to netid structure */
+/* p(IO) - pointer to location within packet buffer */
+/* */
+/* Decode netid/proto stored at p and record its numeric value. */
+/* -------------------------------------------------------------------- */
+static int
+ippr_rpcb_getproto(rm, xp, p)
+ rpc_msg_t *rm;
+ xdr_proto_t *xp;
+ u_32_t **p;
+{
+ u_int len;
+
+ /* Must have 4 bytes for length & 4 bytes for "tcp" or "udp". */
+ if (!RPCB_BUF_GEQ(rm, p, 8))
+ return(-1);
+
+ xp->xp_xslen = (*p)++;
+ xp->xp_xsstr = (char *)*p;
+
+ /* Test the string length. */
+ len = B(xp->xp_xslen);
+ if (len != 3)
+ return(-1);
+
+ /* Test the actual string & record the protocol accordingly. */
+ if (!strncmp((char *)xp->xp_xsstr, "tcp\0", 4))
+ xp->xp_proto = IPPROTO_TCP;
+ else if (!strncmp((char *)xp->xp_xsstr, "udp\0", 4))
+ xp->xp_proto = IPPROTO_UDP;
+ else {
+ return(-1);
+ }
+
+ /* Advance past the string. */
+ (*p)++;
+
+ return(0);
+}
+
+/* -------------------------------------------------------------------- */
+/* Function: ippr_rpcb_getnat */
+/* Returns: int -- -1 == failed to create table entries, */
+/* 0 == success */
+/* Parameters: fin(I) - pointer to packet information */
+/* nat(I) - pointer to NAT table entry */
+/* proto(I) - transport protocol for new entries */
+/* port(I) - new port to use w/ wildcard table entries */
+/* */
+/* Create state and NAT entries to handle an anticipated connection */
+/* attempt between RPC client and server. */
+/* -------------------------------------------------------------------- */
+static int
+ippr_rpcb_getnat(fin, nat, proto, port)
+ fr_info_t *fin;
+ nat_t *nat;
+ u_int proto;
+ u_int port;
+{
+ ipnat_t *ipn, ipnat;
+ tcphdr_t tcp;
+ ipstate_t *is;
+ fr_info_t fi;
+ nat_t *natl;
+ int nflags;
+
+ ipn = nat->nat_ptr;
+
+ /* Generate dummy fr_info */
+ bcopy((char *)fin, (char *)&fi, sizeof(fi));
+ fi.fin_out = 0;
+ fi.fin_src = fin->fin_dst;
+ fi.fin_dst = nat->nat_outip;
+ fi.fin_p = proto;
+ fi.fin_sport = 0;
+ fi.fin_dport = port & 0xffff;
+ fi.fin_flx |= FI_IGNORE;
+
+ bzero((char *)&tcp, sizeof(tcp));
+ tcp.th_dport = htons(port);
+
+ if (proto == IPPROTO_TCP) {
+ tcp.th_win = htons(8192);
+ TCP_OFF_A(&tcp, sizeof(tcphdr_t) >> 2);
+ fi.fin_dlen = sizeof(tcphdr_t);
+ tcp.th_flags = TH_SYN;
+ nflags = NAT_TCP;
+ } else {
+ fi.fin_dlen = sizeof(udphdr_t);
+ nflags = NAT_UDP;
+ }
+
+ nflags |= SI_W_SPORT|NAT_SEARCH;
+ fi.fin_dp = &tcp;
+ fi.fin_plen = fi.fin_hlen + fi.fin_dlen;
+
+ /*
+ * Search for existing NAT & state entries. Pay close attention to
+ * mutexes / locks grabbed from lookup routines, as not doing so could
+ * lead to bad things.
+ *
+ * If successful, fr_stlookup returns with ipf_state locked. We have
+ * no use for this lock, so simply unlock it if necessary.
+ */
+ is = fr_stlookup(&fi, &tcp, NULL);
+ if (is != NULL)
+ RWLOCK_EXIT(&ipf_state);
+
+ RWLOCK_EXIT(&ipf_nat);
+
+ WRITE_ENTER(&ipf_nat);
+ natl = nat_inlookup(&fi, nflags, proto, fi.fin_src, fi.fin_dst);
+
+ if ((natl != NULL) && (is != NULL)) {
+ MUTEX_DOWNGRADE(&ipf_nat);
+ return(0);
+ }
+
+ /* Slightly modify the following structures for actual use in creating
+ * NAT and/or state entries. We're primarily concerned with stripping
+ * flags that may be detrimental to the creation process or simply
+ * shouldn't be associated with a table entry.
+ */
+ fi.fin_fr = &rpcbfr;
+ fi.fin_flx &= ~FI_IGNORE;
+ nflags &= ~NAT_SEARCH;
+
+ if (natl == NULL) {
+ /* XXX Since we're just copying the original ipn contents
+ * back, would we be better off just sending a pointer to
+ * the 'temp' copy off to nat_new instead?
+ */
+ /* Generate template/bogus NAT rule. */
+ bcopy((char *)ipn, (char *)&ipnat, sizeof(ipnat));
+ ipn->in_flags = nflags & IPN_TCPUDP;
+ ipn->in_apr = NULL;
+ ipn->in_p = proto;
+ ipn->in_pmin = htons(fi.fin_dport);
+ ipn->in_pmax = htons(fi.fin_dport);
+ ipn->in_pnext = htons(fi.fin_dport);
+ ipn->in_space = 1;
+ ipn->in_ippip = 1;
+ if (ipn->in_flags & IPN_FILTER) {
+ ipn->in_scmp = 0;
+ ipn->in_dcmp = 0;
+ }
+ *ipn->in_plabel = '\0';
+
+ /* Create NAT entry. return NULL if this fails. */
+ natl = nat_new(&fi, ipn, NULL, nflags|SI_CLONE|NAT_SLAVE,
+ NAT_INBOUND);
+
+ bcopy((char *)&ipnat, (char *)ipn, sizeof(ipnat));
+
+ if (natl == NULL) {
+ MUTEX_DOWNGRADE(&ipf_nat);
+ return(-1);
+ }
+
+ ipn->in_use++;
+ (void) nat_proto(&fi, natl, nflags);
+ nat_update(&fi, natl, natl->nat_ptr);
+ }
+ MUTEX_DOWNGRADE(&ipf_nat);
+
+ if (is == NULL) {
+ /* Create state entry. Return NULL if this fails. */
+ fi.fin_dst = nat->nat_inip;
+ fi.fin_nat = (void *)natl;
+ fi.fin_flx |= FI_NATED;
+ fi.fin_flx &= ~FI_STATE;
+ nflags &= NAT_TCPUDP;
+ nflags |= SI_W_SPORT|SI_CLONE;
+
+ is = fr_addstate(&fi, NULL, nflags);
+ if (is == NULL) {
+ /*
+ * XXX nat_delete is private to ip_nat.c. Should
+ * check w/ Darren about this one.
+ *
+ * nat_delete(natl, NL_EXPIRE);
+ */
+ return(-1);
+ }
+ if (fi.fin_state != NULL)
+ fr_statederef(&fi, (ipstate_t **)&fi.fin_state);
+ }
+
+ return(0);
+}
+
+/* -------------------------------------------------------------------- */
+/* Function: ippr_rpcb_modv3 */
+/* Returns: int -- change in packet length */
+/* Parameters: fin(I) - pointer to packet information */
+/* nat(I) - pointer to NAT session */
+/* rm(I) - pointer to RPC message structure */
+/* m(I) - pointer to mbuf chain */
+/* off(I) - offset within mbuf chain */
+/* */
+/* Write a new universal address string to this packet, adjusting */
+/* lengths as necessary. */
+/* -------------------------------------------------------------------- */
+static int
+ippr_rpcb_modv3(fin, nat, rm, m, off)
+ fr_info_t *fin;
+ nat_t *nat;
+ rpc_msg_t *rm;
+ mb_t *m;
+ u_int off;
+{
+ u_int len, xlen, pos, bogo;
+ rpc_resp_t *rr;
+ char uaddr[24];
+ char *i, *p;
+ int diff;
+
+ rr = &rm->rm_resp;
+ i = (char *)&nat->nat_outip.s_addr;
+ p = (char *)&rr->rr_v3.xu_port;
+
+ /* Form new string. */
+ bzero(uaddr, sizeof(uaddr)); /* Just in case we need padding. */
+#if defined(SNPRINTF) && defined(_KERNEL)
+ (void) SNPRINTF(uaddr, sizeof(uaddr),
+#else
+ (void) sprintf(uaddr,
+#endif
+ "%u.%u.%u.%u.%u.%u", i[0] & 0xff, i[1] & 0xff,
+ i[2] & 0xff, i[3] & 0xff, p[0] & 0xff, p[1] & 0xff);
+ len = strlen(uaddr);
+ xlen = XDRALIGN(len);
+
+ /* Determine mbuf offset to write to. */
+ pos = (char *)rr->rr_v3.xu_xslen - rm->rm_msgbuf;
+ off += pos;
+
+ /* Write new string length. */
+ bogo = htonl(len);
+ COPYBACK(m, off, 4, (caddr_t)&bogo);
+ off += 4;
+
+ /* Write new string. */
+ COPYBACK(m, off, xlen, uaddr);
+
+ /* Determine difference in data lengths. */
+ diff = xlen - XDRALIGN(B(rr->rr_v3.xu_xslen));
+
+ /*
+ * If our new string has a different length, make necessary
+ * adjustments.
+ */
+ if (diff != 0)
+ ippr_rpcb_fixlen(fin, diff);
+
+ return(diff);
+}
+
+/* -------------------------------------------------------------------- */
+/* Function: ippr_rpcb_modv4 */
+/* Returns: int -- change in packet length */
+/* Parameters: fin(I) - pointer to packet information */
+/* nat(I) - pointer to NAT session */
+/* rm(I) - pointer to RPC message structure */
+/* m(I) - pointer to mbuf chain */
+/* off(I) - offset within mbuf chain */
+/* */
+/* Write new rpcb_entry list, adjusting lengths as necessary. */
+/* -------------------------------------------------------------------- */
+static int
+ippr_rpcb_modv4(fin, nat, rm, m, off)
+ fr_info_t *fin;
+ nat_t *nat;
+ rpc_msg_t *rm;
+ mb_t *m;
+ u_int off;
+{
+ u_int len, xlen, pos, bogo;
+ rpcb_listp_t *rl;
+ rpcb_entry_t *re;
+ rpc_resp_t *rr;
+ char uaddr[24];
+ int diff, cnt;
+ char *i, *p;
+
+ diff = 0;
+ rr = &rm->rm_resp;
+ rl = &rr->rr_v4;
+
+ i = (char *)&nat->nat_outip.s_addr;
+
+ /* Determine mbuf offset to write to. */
+ re = &rl->rl_entries[0];
+ pos = (char *)re->re_maddr.xu_xslen - rm->rm_msgbuf;
+ off += pos;
+
+ for (cnt = 0; cnt < rl->rl_cnt; cnt++) {
+ re = &rl->rl_entries[cnt];
+ p = (char *)&re->re_maddr.xu_port;
+
+ /* Form new string. */
+ bzero(uaddr, sizeof(uaddr)); /* Just in case we need
+ padding. */
+#if defined(SNPRINTF) && defined(_KERNEL)
+ (void) SNPRINTF(uaddr, sizeof(uaddr),
+#else
+ (void) sprintf(uaddr,
+#endif
+ "%u.%u.%u.%u.%u.%u", i[0] & 0xff,
+ i[1] & 0xff, i[2] & 0xff, i[3] & 0xff,
+ p[0] & 0xff, p[1] & 0xff);
+ len = strlen(uaddr);
+ xlen = XDRALIGN(len);
+
+ /* Write new string length. */
+ bogo = htonl(len);
+ COPYBACK(m, off, 4, (caddr_t)&bogo);
+ off += 4;
+
+ /* Write new string. */
+ COPYBACK(m, off, xlen, uaddr);
+ off += xlen;
+
+ /* Record any change in length. */
+ diff += xlen - XDRALIGN(B(re->re_maddr.xu_xslen));
+
+ /* If the length changed, copy back the rest of this entry. */
+ len = ((char *)re->re_more + 4) -
+ (char *)re->re_netid.xp_xslen;
+ if (diff != 0) {
+ COPYBACK(m, off, len, (caddr_t)re->re_netid.xp_xslen);
+ }
+ off += len;
+ }
+
+ /*
+ * If our new string has a different length, make necessary
+ * adjustments.
+ */
+ if (diff != 0)
+ ippr_rpcb_fixlen(fin, diff);
+
+ return(diff);
+}
+
+
+/* -------------------------------------------------------------------- */
+/* Function: ippr_rpcb_fixlen */
+/* Returns: (void) */
+/* Parameters: fin(I) - pointer to packet information */
+/* len(I) - change in packet length */
+/* */
+/* Adjust various packet related lengths held in structure and packet */
+/* header fields. */
+/* -------------------------------------------------------------------- */
+static void
+ippr_rpcb_fixlen(fin, len)
+ fr_info_t *fin;
+ int len;
+{
+ udphdr_t *udp;
+
+ udp = fin->fin_dp;
+ udp->uh_ulen = htons(ntohs(udp->uh_ulen) + len);
+ fin->fin_ip->ip_len += len;
+ fin->fin_dlen += len;
+ fin->fin_plen += len;
+}
+
+#undef B
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_state.h b/usr/src/uts/common/inet/ipf/netinet/ip_state.h
new file mode 100644
index 0000000000..7b5891d86d
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_state.h
@@ -0,0 +1,267 @@
+/*
+ * Copyright (C) 1995-2001 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * @(#)ip_state.h 1.3 1/12/96 (C) 1995 Darren Reed
+ * $Id: ip_state.h,v 2.68.2.5 2005/08/11 19:58:04 darrenr Exp $
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifndef __IP_STATE_H__
+#define __IP_STATE_H__
+
+#if defined(__STDC__) || defined(__GNUC__) || defined(_AIX51)
+# define SIOCDELST _IOW('r', 61, struct ipfobj)
+#else
+# define SIOCDELST _IOW(r, 61, struct ipfobj)
+#endif
+
+struct ipscan;
+
+#ifndef IPSTATE_SIZE
+# define IPSTATE_SIZE 5737
+#endif
+#ifndef IPSTATE_MAX
+# define IPSTATE_MAX 4013 /* Maximum number of states held */
+#endif
+
+#define PAIRS(s1,d1,s2,d2) ((((s1) == (s2)) && ((d1) == (d2))) ||\
+ (((s1) == (d2)) && ((d1) == (s2))))
+#define IPPAIR(s1,d1,s2,d2) PAIRS((s1).s_addr, (d1).s_addr, \
+ (s2).s_addr, (d2).s_addr)
+
+
+typedef struct ipstate {
+ ipfmutex_t is_lock;
+ struct ipstate *is_next;
+ struct ipstate **is_pnext;
+ struct ipstate *is_hnext;
+ struct ipstate **is_phnext;
+ struct ipstate **is_me;
+ void *is_ifp[4];
+ void *is_sync;
+ struct nat *is_nat[2];
+ frentry_t *is_rule;
+ struct ipftq *is_tqehead[2];
+ struct ipscan *is_isc;
+ U_QUAD_T is_pkts[4];
+ U_QUAD_T is_bytes[4];
+ U_QUAD_T is_icmppkts[4];
+ struct ipftqent is_sti;
+ u_int is_frage[2];
+ int is_ref; /* reference count */
+ int is_isninc[2];
+ u_short is_sumd[2];
+ i6addr_t is_src;
+ i6addr_t is_dst;
+ u_int is_pass;
+ u_char is_p; /* Protocol */
+ u_char is_v;
+ u_32_t is_hv;
+ u_32_t is_tag;
+ u_32_t is_opt[2]; /* packet options set */
+ /* in both directions */
+ u_32_t is_optmsk[2]; /* " " mask */
+ /* in both directions */
+ u_short is_sec; /* security options set */
+ u_short is_secmsk; /* " " mask */
+ u_short is_auth; /* authentication options set */
+ u_short is_authmsk; /* " " mask */
+ union {
+ icmpinfo_t is_ics;
+ tcpinfo_t is_ts;
+ udpinfo_t is_us;
+ greinfo_t is_ug;
+ } is_ps;
+ u_32_t is_flags;
+ int is_flx[2][2];
+ u_32_t is_rulen; /* rule number when created */
+ u_32_t is_s0[2];
+ u_short is_smsk[2];
+ char is_group[FR_GROUPLEN];
+ char is_sbuf[2][16];
+ char is_ifname[4][LIFNAMSIZ];
+} ipstate_t;
+
+#define is_die is_sti.tqe_die
+#define is_state is_sti.tqe_state
+#define is_touched is_sti.tqe_touched
+#define is_saddr is_src.in4.s_addr
+#define is_daddr is_dst.in4.s_addr
+#define is_icmp is_ps.is_ics
+#define is_type is_icmp.ici_type
+#define is_code is_icmp.ici_code
+#define is_tcp is_ps.is_ts
+#define is_udp is_ps.is_us
+#define is_send is_tcp.ts_data[0].td_end
+#define is_dend is_tcp.ts_data[1].td_end
+#define is_maxswin is_tcp.ts_data[0].td_maxwin
+#define is_maxdwin is_tcp.ts_data[1].td_maxwin
+#define is_maxsend is_tcp.ts_data[0].td_maxend
+#define is_maxdend is_tcp.ts_data[1].td_maxend
+#define is_swinscale is_tcp.ts_data[0].td_winscale
+#define is_dwinscale is_tcp.ts_data[1].td_winscale
+#define is_swinflags is_tcp.ts_data[0].td_winflags
+#define is_dwinflags is_tcp.ts_data[1].td_winflags
+#define is_sport is_tcp.ts_sport
+#define is_dport is_tcp.ts_dport
+#define is_ifpin is_ifp[0]
+#define is_ifpout is_ifp[2]
+#define is_gre is_ps.is_ug
+#define is_call is_gre.gs_call
+
+#define IS_WSPORT SI_W_SPORT /* 0x00100 */
+#define IS_WDPORT SI_W_DPORT /* 0x00200 */
+#define IS_WSADDR SI_W_SADDR /* 0x00400 */
+#define IS_WDADDR SI_W_DADDR /* 0x00800 */
+#define IS_NEWFR SI_NEWFR /* 0x01000 */
+#define IS_CLONE SI_CLONE /* 0x02000 */
+#define IS_CLONED SI_CLONED /* 0x04000 */
+#define IS_TCPFSM 0x10000
+#define IS_STRICT 0x20000
+#define IS_ISNSYN 0x40000
+#define IS_ISNACK 0x80000
+#define IS_STATESYNC 0x100000
+/*
+ * IS_SC flags are for scan-operations that need to be recognised in state.
+ */
+#define IS_SC_CLIENT 0x10000000
+#define IS_SC_SERVER 0x20000000
+#define IS_SC_MATCHC 0x40000000
+#define IS_SC_MATCHS 0x80000000
+#define IS_SC_MATCHALL (IS_SC_MATCHC|IS_SC_MATCHC)
+#define IS_SC_ALL (IS_SC_MATCHC|IS_SC_MATCHC|IS_SC_CLIENT|IS_SC_SERVER)
+
+/*
+ * Flags that can be passed into fr_addstate
+ */
+#define IS_INHERITED 0x0fffff00
+
+#define TH_OPENING (TH_SYN|TH_ACK)
+/*
+ * is_flags:
+ * Bits 0 - 3 are use as a mask with the current packet's bits to check for
+ * whether it is short, tcp/udp, a fragment or the presence of IP options.
+ * Bits 4 - 7 are set from the initial packet and contain what the packet
+ * anded with bits 0-3 must match.
+ * Bits 8,9 are used to indicate wildcard source/destination port matching.
+ * Bits 10,11 are reserved for other wildcard flag compatibility.
+ * Bits 12,13 are for scaning.
+ */
+
+typedef struct ipstate_save {
+ void *ips_next;
+ struct ipstate ips_is;
+ struct frentry ips_fr;
+} ipstate_save_t;
+
+#define ips_rule ips_is.is_rule
+
+
+typedef struct ipslog {
+ U_QUAD_T isl_pkts[4];
+ U_QUAD_T isl_bytes[4];
+ i6addr_t isl_src;
+ i6addr_t isl_dst;
+ u_32_t isl_tag;
+ u_short isl_type;
+ union {
+ u_short isl_filler[2];
+ u_short isl_ports[2];
+ u_short isl_icmp;
+ } isl_ps;
+ u_char isl_v;
+ u_char isl_p;
+ u_char isl_flags;
+ u_char isl_state[2];
+ u_32_t isl_rulen;
+ char isl_group[FR_GROUPLEN];
+} ipslog_t;
+
+#define isl_sport isl_ps.isl_ports[0]
+#define isl_dport isl_ps.isl_ports[1]
+#define isl_itype isl_ps.isl_icmp
+
+#define ISL_NEW 0
+#define ISL_CLONE 1
+#define ISL_EXPIRE 0xffff
+#define ISL_FLUSH 0xfffe
+#define ISL_REMOVE 0xfffd
+#define ISL_INTERMEDIATE 0xfffc
+#define ISL_KILLED 0xfffb
+#define ISL_ORPHAN 0xfffa
+
+
+typedef struct ips_stat {
+ u_long iss_hits;
+ u_long iss_miss;
+ u_long iss_max;
+ u_long iss_maxref;
+ u_long iss_tcp;
+ u_long iss_udp;
+ u_long iss_icmp;
+ u_long iss_nomem;
+ u_long iss_expire;
+ u_long iss_fin;
+ u_long iss_active;
+ u_long iss_logged;
+ u_long iss_logfail;
+ u_long iss_inuse;
+ u_long iss_wild;
+ u_long iss_killed;
+ u_long iss_ticks;
+ u_long iss_bucketfull;
+ int iss_statesize;
+ int iss_statemax;
+ ipstate_t **iss_table;
+ ipstate_t *iss_list;
+ u_long *iss_bucketlen;
+} ips_stat_t;
+
+
+extern u_long fr_tcpidletimeout;
+extern u_long fr_tcpclosewait;
+extern u_long fr_tcplastack;
+extern u_long fr_tcptimeout;
+extern u_long fr_tcpclosed;
+extern u_long fr_tcphalfclosed;
+extern u_long fr_udptimeout;
+extern u_long fr_udpacktimeout;
+extern u_long fr_icmptimeout;
+extern u_long fr_icmpacktimeout;
+extern u_long fr_iptimeout;
+extern int fr_statemax;
+extern int fr_statesize;
+extern int fr_state_lock;
+extern int fr_state_maxbucket;
+extern int fr_state_maxbucket_reset;
+extern ipstate_t *ips_list;
+extern ipftq_t *ips_utqe;
+extern ipftq_t ips_tqtqb[IPF_TCP_NSTATES];
+
+extern int fr_stateinit __P((void));
+extern ipstate_t *fr_addstate __P((fr_info_t *, ipstate_t **, u_int));
+extern frentry_t *fr_checkstate __P((struct fr_info *, u_32_t *));
+extern ipstate_t *fr_stlookup __P((fr_info_t *, tcphdr_t *, ipftq_t **));
+extern void fr_statesync __P((void *));
+extern void fr_timeoutstate __P((void));
+extern int fr_tcp_age __P((struct ipftqent *, struct fr_info *,
+ struct ipftq *, int));
+extern int fr_tcpinwindow __P((struct fr_info *, struct tcpdata *,
+ struct tcpdata *, tcphdr_t *, int));
+extern void fr_stateunload __P((void));
+extern void ipstate_log __P((struct ipstate *, u_int));
+extern int fr_state_ioctl __P((caddr_t, ioctlcmd_t, int));
+extern void fr_stinsert __P((struct ipstate *, int));
+extern void fr_sttab_init __P((struct ipftq *));
+extern void fr_sttab_destroy __P((struct ipftq *));
+extern void fr_updatestate __P((fr_info_t *, ipstate_t *, ipftq_t *));
+extern void fr_statederef __P((fr_info_t *, ipstate_t **));
+extern void fr_setstatequeue __P((ipstate_t *, int));
+
+#endif /* __IP_STATE_H__ */
diff --git a/usr/src/uts/common/inet/ipf/netinet/ipl.h b/usr/src/uts/common/inet/ipf/netinet/ipl.h
new file mode 100644
index 0000000000..df2c0b4f3b
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/netinet/ipl.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 1993-2001, 2003 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * @(#)ipl.h 1.21 6/5/96
+ * $Id: ipl.h,v 2.52.2.10 2005/08/13 05:42:49 darrenr Exp $
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+
+#ifndef __IPL_H__
+#define __IPL_H__
+
+#define IPL_VERSION "IP Filter: v4.1.9"
+
+#define IPFILTER_VERSION 4010900
+
+#endif
diff --git a/usr/src/uts/common/inet/ipf/opts.h b/usr/src/uts/common/inet/ipf/opts.h
new file mode 100644
index 0000000000..d944df6242
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/opts.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2000 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * $Id: opts.h,v 2.12 2003/08/14 14:24:27 darrenr Exp $
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifndef __OPTS_H__
+#define __OPTS_H__
+
+#ifndef SOLARIS
+#define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#endif
+#define OPT_REMOVE 0x000001
+#define OPT_DEBUG 0x000002
+#define OPT_AUTHSTATS 0x000004
+#define OPT_RAW 0x000008
+#define OPT_LOG 0x000010
+#define OPT_SHOWLIST 0x000020
+#define OPT_VERBOSE 0x000040
+#define OPT_DONOTHING 0x000080
+#define OPT_HITS 0x000100
+#define OPT_BRIEF 0x000200
+#define OPT_ACCNT 0x000400
+#define OPT_FRSTATES 0x000800
+#define OPT_SHOWLINENO 0x001000
+#define OPT_PRINTFR 0x002000
+#define OPT_OUTQUE FR_OUTQUE /* 0x4000 */
+#define OPT_INQUE FR_INQUE /* 0x8000 */
+#define OPT_ZERORULEST 0x010000
+#define OPT_SAVEOUT 0x020000
+#define OPT_IPSTATES 0x040000
+#define OPT_INACTIVE 0x080000
+#define OPT_NAT 0x100000
+#define OPT_GROUPS 0x200000
+#define OPT_STATETOP 0x400000
+#define OPT_FLUSH 0x800000
+#define OPT_CLEAR 0x1000000
+#define OPT_HEX 0x2000000
+#define OPT_ASCII 0x4000000
+#define OPT_NORESOLVE 0x8000000
+#define OPT_UNDEF 0x10000000
+
+#define OPT_STAT OPT_FRSTATES
+#define OPT_LIST OPT_SHOWLIST
+
+
+#ifndef __P
+# ifdef __STDC__
+# define __P(x) x
+# else
+# define __P(x) ()
+# endif
+#endif
+
+#if defined(sun) && !SOLARIS
+# define STRERROR(x) sys_errlist[x]
+extern char *sys_errlist[];
+#else
+# define STRERROR(x) strerror(x)
+#endif
+
+extern int opts;
+
+#endif /* __OPTS_H__ */
diff --git a/usr/src/uts/common/inet/ipf/pfil.conf b/usr/src/uts/common/inet/ipf/pfil.conf
deleted file mode 100644
index 018946143b..0000000000
--- a/usr/src/uts/common/inet/ipf/pfil.conf
+++ /dev/null
@@ -1,2 +0,0 @@
-name="pfil" parent="pseudo" instance=0;
-
diff --git a/usr/src/uts/common/inet/ipf/radix.c b/usr/src/uts/common/inet/ipf/radix.c
new file mode 100644
index 0000000000..69b50c062a
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/radix.c
@@ -0,0 +1,1206 @@
+/*
+ * Copyright (c) 1988, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)radix.c 8.6 (Berkeley) 10/17/95
+ */
+
+/*
+ * Routines to build and maintain radix trees for routing lookups.
+ */
+#if defined(KERNEL) || defined(_KERNEL)
+# undef KERNEL
+# undef _KERNEL
+# define KERNEL 1
+# define _KERNEL 1
+#endif
+#define __SYS_ATOMIC_OPS_H__
+#if !defined(__svr4__) && !defined(__SVR4) && !defined(__osf__) && \
+ !defined(__hpux) && !defined(__sgi)
+#include <sys/cdefs.h>
+#endif
+#ifndef __P
+# ifdef __STDC__
+# define __P(x) x
+# else
+# define __P(x) ()
+# endif
+#endif
+#ifdef __osf__
+# define CONST
+# define _IPV6_SWTAB_H
+# define _PROTO_NET_H_
+# define _PROTO_IPV6_H
+# include <sys/malloc.h>
+#endif
+
+#include <sys/param.h>
+#ifdef _KERNEL
+#include <sys/systm.h>
+#else
+void panic __P((char *str));
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#endif
+#ifdef __hpux
+#include <syslog.h>
+#else
+#include <sys/syslog.h>
+#endif
+#include <sys/time.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <net/if.h>
+#include "netinet/ip_compat.h"
+#include "netinet/ip_fil.h"
+/* END OF INCLUDES */
+#include "radix_ipf.h"
+#ifndef min
+# define min MIN
+#endif
+#ifndef max
+# define max MAX
+#endif
+
+int max_keylen = 16;
+static struct radix_mask *rn_mkfreelist;
+static struct radix_node_head *mask_rnhead;
+static char *addmask_key;
+static u_char normal_chars[] = {0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff};
+static char *rn_zeros = NULL, *rn_ones = NULL;
+
+#define rn_masktop (mask_rnhead->rnh_treetop)
+#undef Bcmp
+#define Bcmp(a, b, l) (l == 0 ? 0 : bcmp((caddr_t)(a), (caddr_t)(b), (u_long)l))
+
+static int rn_satisfies_leaf __P((char *, struct radix_node *, int));
+static int rn_lexobetter __P((void *, void *));
+static struct radix_mask *rn_new_radix_mask __P((struct radix_node *,
+ struct radix_mask *));
+static int rn_freenode __P((struct radix_node *, void *));
+#if defined(AIX) && !defined(_KERNEL)
+struct radix_node *rn_match __P((void *, struct radix_node_head *));
+struct radix_node *rn_addmask __P((int, int, void *));
+#define FreeS(x, y) KFREES(x, y)
+#define Bcopy(x, y, z) bcopy(x, y, z)
+#endif
+
+/*
+ * The data structure for the keys is a radix tree with one way
+ * branching removed. The index rn_b at an internal node n represents a bit
+ * position to be tested. The tree is arranged so that all descendants
+ * of a node n have keys whose bits all agree up to position rn_b - 1.
+ * (We say the index of n is rn_b.)
+ *
+ * There is at least one descendant which has a one bit at position rn_b,
+ * and at least one with a zero there.
+ *
+ * A route is determined by a pair of key and mask. We require that the
+ * bit-wise logical and of the key and mask to be the key.
+ * We define the index of a route to associated with the mask to be
+ * the first bit number in the mask where 0 occurs (with bit number 0
+ * representing the highest order bit).
+ *
+ * We say a mask is normal if every bit is 0, past the index of the mask.
+ * If a node n has a descendant (k, m) with index(m) == index(n) == rn_b,
+ * and m is a normal mask, then the route applies to every descendant of n.
+ * If the index(m) < rn_b, this implies the trailing last few bits of k
+ * before bit b are all 0, (and hence consequently true of every descendant
+ * of n), so the route applies to all descendants of the node as well.
+ *
+ * Similar logic shows that a non-normal mask m such that
+ * index(m) <= index(n) could potentially apply to many children of n.
+ * Thus, for each non-host route, we attach its mask to a list at an internal
+ * node as high in the tree as we can go.
+ *
+ * The present version of the code makes use of normal routes in short-
+ * circuiting an explict mask and compare operation when testing whether
+ * a key satisfies a normal route, and also in remembering the unique leaf
+ * that governs a subtree.
+ */
+
+struct radix_node *
+rn_search(v_arg, head)
+ void *v_arg;
+ struct radix_node *head;
+{
+ struct radix_node *x;
+ caddr_t v;
+
+ for (x = head, v = v_arg; x->rn_b >= 0;) {
+ if (x->rn_bmask & v[x->rn_off])
+ x = x->rn_r;
+ else
+ x = x->rn_l;
+ }
+ return (x);
+}
+
+struct radix_node *
+rn_search_m(v_arg, head, m_arg)
+ struct radix_node *head;
+ void *v_arg, *m_arg;
+{
+ struct radix_node *x;
+ caddr_t v = v_arg, m = m_arg;
+
+ for (x = head; x->rn_b >= 0;) {
+ if ((x->rn_bmask & m[x->rn_off]) &&
+ (x->rn_bmask & v[x->rn_off]))
+ x = x->rn_r;
+ else
+ x = x->rn_l;
+ }
+ return x;
+}
+
+int
+rn_refines(m_arg, n_arg)
+ void *m_arg, *n_arg;
+{
+ caddr_t m = m_arg, n = n_arg;
+ caddr_t lim, lim2 = lim = n + *(u_char *)n;
+ int longer = (*(u_char *)n++) - (int)(*(u_char *)m++);
+ int masks_are_equal = 1;
+
+ if (longer > 0)
+ lim -= longer;
+ while (n < lim) {
+ if (*n & ~(*m))
+ return 0;
+ if (*n++ != *m++)
+ masks_are_equal = 0;
+ }
+ while (n < lim2)
+ if (*n++)
+ return 0;
+ if (masks_are_equal && (longer < 0))
+ for (lim2 = m - longer; m < lim2; )
+ if (*m++)
+ return 1;
+ return (!masks_are_equal);
+}
+
+struct radix_node *
+rn_lookup(v_arg, m_arg, head)
+ void *v_arg, *m_arg;
+ struct radix_node_head *head;
+{
+ struct radix_node *x;
+ caddr_t netmask = 0;
+
+ if (m_arg) {
+ if ((x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_off)) == 0)
+ return (0);
+ netmask = x->rn_key;
+ }
+ x = rn_match(v_arg, head);
+ if (x && netmask) {
+ while (x && x->rn_mask != netmask)
+ x = x->rn_dupedkey;
+ }
+ return x;
+}
+
+static int
+rn_satisfies_leaf(trial, leaf, skip)
+ char *trial;
+ struct radix_node *leaf;
+ int skip;
+{
+ char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask;
+ char *cplim;
+ int length = min(*(u_char *)cp, *(u_char *)cp2);
+
+ if (cp3 == 0)
+ cp3 = rn_ones;
+ else
+ length = min(length, *(u_char *)cp3);
+ cplim = cp + length;
+ cp3 += skip;
+ cp2 += skip;
+ for (cp += skip; cp < cplim; cp++, cp2++, cp3++)
+ if ((*cp ^ *cp2) & *cp3)
+ return 0;
+ return 1;
+}
+
+struct radix_node *
+rn_match(v_arg, head)
+ void *v_arg;
+ struct radix_node_head *head;
+{
+ caddr_t v = v_arg;
+ struct radix_node *t = head->rnh_treetop, *x;
+ caddr_t cp = v, cp2;
+ caddr_t cplim;
+ struct radix_node *saved_t, *top = t;
+ int off = t->rn_off, vlen = *(u_char *)cp, matched_off;
+ int test, b, rn_b;
+
+ /*
+ * Open code rn_search(v, top) to avoid overhead of extra
+ * subroutine call.
+ */
+ for (; t->rn_b >= 0; ) {
+ if (t->rn_bmask & cp[t->rn_off])
+ t = t->rn_r;
+ else
+ t = t->rn_l;
+ }
+ /*
+ * See if we match exactly as a host destination
+ * or at least learn how many bits match, for normal mask finesse.
+ *
+ * It doesn't hurt us to limit how many bytes to check
+ * to the length of the mask, since if it matches we had a genuine
+ * match and the leaf we have is the most specific one anyway;
+ * if it didn't match with a shorter length it would fail
+ * with a long one. This wins big for class B&C netmasks which
+ * are probably the most common case...
+ */
+ if (t->rn_mask)
+ vlen = *(u_char *)t->rn_mask;
+ cp += off;
+ cp2 = t->rn_key + off;
+ cplim = v + vlen;
+ for (; cp < cplim; cp++, cp2++)
+ if (*cp != *cp2)
+ goto on1;
+ /*
+ * This extra grot is in case we are explicitly asked
+ * to look up the default. Ugh!
+ */
+ if ((t->rn_flags & RNF_ROOT) && t->rn_dupedkey)
+ t = t->rn_dupedkey;
+ return t;
+on1:
+ test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */
+ for (b = 7; (test >>= 1) > 0;)
+ b--;
+ matched_off = cp - v;
+ b += matched_off << 3;
+ rn_b = -1 - b;
+ /*
+ * If there is a host route in a duped-key chain, it will be first.
+ */
+ if ((saved_t = t)->rn_mask == 0)
+ t = t->rn_dupedkey;
+ for (; t; t = t->rn_dupedkey)
+ /*
+ * Even if we don't match exactly as a host,
+ * we may match if the leaf we wound up at is
+ * a route to a net.
+ */
+ if (t->rn_flags & RNF_NORMAL) {
+ if (rn_b <= t->rn_b)
+ return t;
+ } else if (rn_satisfies_leaf(v, t, matched_off))
+ return t;
+ t = saved_t;
+ /* start searching up the tree */
+ do {
+ struct radix_mask *m;
+ t = t->rn_p;
+ m = t->rn_mklist;
+ if (m) {
+ /*
+ * If non-contiguous masks ever become important
+ * we can restore the masking and open coding of
+ * the search and satisfaction test and put the
+ * calculation of "off" back before the "do".
+ */
+ do {
+ if (m->rm_flags & RNF_NORMAL) {
+ if (rn_b <= m->rm_b)
+ return (m->rm_leaf);
+ } else {
+ off = min(t->rn_off, matched_off);
+ x = rn_search_m(v, t, m->rm_mask);
+ while (x && x->rn_mask != m->rm_mask)
+ x = x->rn_dupedkey;
+ if (x && rn_satisfies_leaf(v, x, off))
+ return x;
+ }
+ m = m->rm_mklist;
+ } while (m);
+ }
+ } while (t != top);
+ return 0;
+}
+
+#ifdef RN_DEBUG
+int rn_nodenum;
+struct radix_node *rn_clist;
+int rn_saveinfo;
+int rn_debug = 1;
+#endif
+
+struct radix_node *
+rn_newpair(v, b, nodes)
+ void *v;
+ int b;
+ struct radix_node nodes[2];
+{
+ struct radix_node *tt = nodes, *t = tt + 1;
+ t->rn_b = b;
+ t->rn_bmask = 0x80 >> (b & 7);
+ t->rn_l = tt;
+ t->rn_off = b >> 3;
+ tt->rn_b = -1;
+ tt->rn_key = (caddr_t)v;
+ tt->rn_p = t;
+ tt->rn_flags = t->rn_flags = RNF_ACTIVE;
+#ifdef RN_DEBUG
+ tt->rn_info = rn_nodenum++;
+ t->rn_info = rn_nodenum++;
+ tt->rn_twin = t;
+ tt->rn_ybro = rn_clist;
+ rn_clist = tt;
+#endif
+ return t;
+}
+
+struct radix_node *
+rn_insert(v_arg, head, dupentry, nodes)
+ void *v_arg;
+ struct radix_node_head *head;
+ int *dupentry;
+ struct radix_node nodes[2];
+{
+ caddr_t v = v_arg;
+ struct radix_node *top = head->rnh_treetop;
+ int head_off = top->rn_off, vlen = (int)*((u_char *)v);
+ struct radix_node *t = rn_search(v_arg, top);
+ caddr_t cp = v + head_off;
+ int b;
+ struct radix_node *tt;
+
+#ifdef RN_DEBUG
+ if (rn_debug)
+ log(LOG_DEBUG, "rn_insert(%p,%p,%p,%p)\n", v_arg, head, dupentry, nodes);
+#endif
+ /*
+ * Find first bit at which v and t->rn_key differ
+ */
+ {
+ caddr_t cp2 = t->rn_key + head_off;
+ int cmp_res;
+ caddr_t cplim = v + vlen;
+
+ while (cp < cplim)
+ if (*cp2++ != *cp++)
+ goto on1;
+ *dupentry = 1;
+ return t;
+on1:
+ *dupentry = 0;
+ cmp_res = (cp[-1] ^ cp2[-1]) & 0xff;
+ for (b = (cp - v) << 3; cmp_res; b--)
+ cmp_res >>= 1;
+ }
+ {
+ struct radix_node *p, *x = top;
+ cp = v;
+ do {
+ p = x;
+ if (cp[x->rn_off] & x->rn_bmask)
+ x = x->rn_r;
+ else
+ x = x->rn_l;
+ } while (b > (unsigned) x->rn_b); /* x->rn_b < b && x->rn_b >= 0 */
+#ifdef RN_DEBUG
+ if (rn_debug)
+ log(LOG_DEBUG, "rn_insert: Going In:\n"); // traverse(p);
+#endif
+ t = rn_newpair(v_arg, b, nodes);
+ tt = t->rn_l;
+ if ((cp[p->rn_off] & p->rn_bmask) == 0)
+ p->rn_l = t;
+ else
+ p->rn_r = t;
+ x->rn_p = t;
+ t->rn_p = p; /* frees x, p as temp vars below */
+ if ((cp[t->rn_off] & t->rn_bmask) == 0) {
+ t->rn_r = x;
+ } else {
+ t->rn_r = tt;
+ t->rn_l = x;
+ }
+#ifdef RN_DEBUG
+ if (rn_debug)
+ log(LOG_DEBUG, "rn_insert: Coming Out:\n"); // traverse(p);
+#endif
+ }
+ return (tt);
+}
+
+struct radix_node *
+rn_addmask(n_arg, search, skip)
+ int search, skip;
+ void *n_arg;
+{
+ caddr_t netmask = (caddr_t)n_arg;
+ struct radix_node *x;
+ caddr_t cp, cplim;
+ int b = 0, mlen, j;
+ int maskduplicated, m0, isnormal;
+ struct radix_node *saved_x;
+ static int last_zeroed = 0;
+
+#ifdef RN_DEBUG
+ if (rn_debug)
+ log(LOG_DEBUG, "rn_addmask(%p,%d,%d)\n", n_arg, search, skip);
+#endif
+ mlen = *(u_char *)netmask;
+ if ((mlen = *(u_char *)netmask) > max_keylen)
+ mlen = max_keylen;
+ if (skip == 0)
+ skip = 1;
+ if (mlen <= skip)
+ return (mask_rnhead->rnh_nodes);
+ if (skip > 1)
+ Bcopy(rn_ones + 1, addmask_key + 1, skip - 1);
+ if ((m0 = mlen) > skip)
+ Bcopy(netmask + skip, addmask_key + skip, mlen - skip);
+ /*
+ * Trim trailing zeroes.
+ */
+ for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;)
+ cp--;
+ mlen = cp - addmask_key;
+ if (mlen <= skip) {
+ if (m0 >= last_zeroed)
+ last_zeroed = mlen;
+ return (mask_rnhead->rnh_nodes);
+ }
+ if (m0 < last_zeroed)
+ Bzero(addmask_key + m0, last_zeroed - m0);
+ *addmask_key = last_zeroed = mlen;
+ x = rn_search(addmask_key, rn_masktop);
+ if (Bcmp(addmask_key, x->rn_key, mlen) != 0)
+ x = 0;
+ if (x || search)
+ return (x);
+ R_Malloc(x, struct radix_node *, max_keylen + 2 * sizeof (*x));
+ if ((saved_x = x) == 0)
+ return (0);
+ Bzero(x, max_keylen + 2 * sizeof (*x));
+ netmask = cp = (caddr_t)(x + 2);
+ Bcopy(addmask_key, cp, mlen);
+ x = rn_insert(cp, mask_rnhead, &maskduplicated, x);
+ if (maskduplicated) {
+#if 0
+ log(LOG_ERR, "rn_addmask: mask impossibly already in tree\n");
+#endif
+ Free(saved_x);
+ return (x);
+ }
+ /*
+ * Calculate index of mask, and check for normalcy.
+ */
+ cplim = netmask + mlen;
+ isnormal = 1;
+ for (cp = netmask + skip; (cp < cplim) && *(u_char *)cp == 0xff;)
+ cp++;
+ if (cp != cplim) {
+ for (j = 0x80; (j & *cp) != 0; j >>= 1)
+ b++;
+ if (*cp != normal_chars[b] || cp != (cplim - 1))
+ isnormal = 0;
+ }
+ b += (cp - netmask) << 3;
+ x->rn_b = -1 - b;
+ if (isnormal)
+ x->rn_flags |= RNF_NORMAL;
+ return (x);
+}
+
+static int /* XXX: arbitrary ordering for non-contiguous masks */
+rn_lexobetter(m_arg, n_arg)
+ void *m_arg, *n_arg;
+{
+ u_char *mp = m_arg, *np = n_arg, *lim;
+
+ if (*mp > *np)
+ return 1; /* not really, but need to check longer one first */
+ if (*mp == *np)
+ for (lim = mp + *mp; mp < lim;)
+ if (*mp++ > *np++)
+ return 1;
+ return 0;
+}
+
+static struct radix_mask *
+rn_new_radix_mask(tt, next)
+ struct radix_node *tt;
+ struct radix_mask *next;
+{
+ struct radix_mask *m;
+
+ MKGet(m);
+ if (m == 0) {
+#if 0
+ log(LOG_ERR, "Mask for route not entered\n");
+#endif
+ return (0);
+ }
+ Bzero(m, sizeof *m);
+ m->rm_b = tt->rn_b;
+ m->rm_flags = tt->rn_flags;
+ if (tt->rn_flags & RNF_NORMAL)
+ m->rm_leaf = tt;
+ else
+ m->rm_mask = tt->rn_mask;
+ m->rm_mklist = next;
+ tt->rn_mklist = m;
+ return m;
+}
+
+struct radix_node *
+rn_addroute(v_arg, n_arg, head, treenodes)
+ void *v_arg, *n_arg;
+ struct radix_node_head *head;
+ struct radix_node treenodes[2];
+{
+ caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg;
+ struct radix_node *t, *x = NULL, *tt;
+ struct radix_node *saved_tt, *top = head->rnh_treetop;
+ short b = 0, b_leaf = 0;
+ int keyduplicated;
+ caddr_t mmask;
+ struct radix_mask *m, **mp;
+
+#ifdef RN_DEBUG
+ if (rn_debug)
+ log(LOG_DEBUG, "rn_addroute(%p,%p,%p,%p)\n", v_arg, n_arg, head, treenodes);
+#endif
+ /*
+ * In dealing with non-contiguous masks, there may be
+ * many different routes which have the same mask.
+ * We will find it useful to have a unique pointer to
+ * the mask to speed avoiding duplicate references at
+ * nodes and possibly save time in calculating indices.
+ */
+ if (netmask) {
+ if ((x = rn_addmask(netmask, 0, top->rn_off)) == 0)
+ return (0);
+ b_leaf = x->rn_b;
+ b = -1 - x->rn_b;
+ netmask = x->rn_key;
+ }
+ /*
+ * Deal with duplicated keys: attach node to previous instance
+ */
+ saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes);
+ if (keyduplicated) {
+ for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) {
+ if (tt->rn_mask == netmask)
+ return (0);
+ if (netmask == 0 ||
+ (tt->rn_mask &&
+ ((b_leaf < tt->rn_b) || /* index(netmask) > node */
+ rn_refines(netmask, tt->rn_mask) ||
+ rn_lexobetter(netmask, tt->rn_mask))))
+ break;
+ }
+ /*
+ * If the mask is not duplicated, we wouldn't
+ * find it among possible duplicate key entries
+ * anyway, so the above test doesn't hurt.
+ *
+ * We sort the masks for a duplicated key the same way as
+ * in a masklist -- most specific to least specific.
+ * This may require the unfortunate nuisance of relocating
+ * the head of the list.
+ *
+ * We also reverse, or doubly link the list through the
+ * parent pointer.
+ */
+ if (tt == saved_tt) {
+ struct radix_node *xx = x;
+ /* link in at head of list */
+ (tt = treenodes)->rn_dupedkey = t;
+ tt->rn_flags = t->rn_flags;
+ tt->rn_p = x = t->rn_p;
+ t->rn_p = tt;
+ if (x->rn_l == t)
+ x->rn_l = tt;
+ else
+ x->rn_r = tt;
+ saved_tt = tt;
+ x = xx;
+ } else {
+ (tt = treenodes)->rn_dupedkey = t->rn_dupedkey;
+ t->rn_dupedkey = tt;
+ tt->rn_p = t;
+ if (tt->rn_dupedkey)
+ tt->rn_dupedkey->rn_p = tt;
+ }
+#ifdef RN_DEBUG
+ t=tt+1;
+ tt->rn_info = rn_nodenum++;
+ t->rn_info = rn_nodenum++;
+ tt->rn_twin = t;
+ tt->rn_ybro = rn_clist;
+ rn_clist = tt;
+#endif
+ tt->rn_key = (caddr_t) v;
+ tt->rn_b = -1;
+ tt->rn_flags = RNF_ACTIVE;
+ }
+ /*
+ * Put mask in tree.
+ */
+ if (netmask) {
+ tt->rn_mask = netmask;
+ tt->rn_b = x->rn_b;
+ tt->rn_flags |= x->rn_flags & RNF_NORMAL;
+ }
+ t = saved_tt->rn_p;
+ if (keyduplicated)
+ goto on2;
+ b_leaf = -1 - t->rn_b;
+ if (t->rn_r == saved_tt)
+ x = t->rn_l;
+ else
+ x = t->rn_r;
+ /* Promote general routes from below */
+ if (x->rn_b < 0) {
+ for (mp = &t->rn_mklist; x; x = x->rn_dupedkey)
+ if (x->rn_mask && (x->rn_b >= b_leaf) && x->rn_mklist == 0) {
+ *mp = m = rn_new_radix_mask(x, 0);
+ if (m)
+ mp = &m->rm_mklist;
+ }
+ } else if (x->rn_mklist) {
+ /*
+ * Skip over masks whose index is > that of new node
+ */
+ for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist)
+ if (m->rm_b >= b_leaf)
+ break;
+ t->rn_mklist = m;
+ *mp = 0;
+ }
+on2:
+ /* Add new route to highest possible ancestor's list */
+ if ((netmask == 0) || (b > t->rn_b ))
+ return tt; /* can't lift at all */
+ b_leaf = tt->rn_b;
+ do {
+ x = t;
+ t = t->rn_p;
+ } while (b <= t->rn_b && x != top);
+ /*
+ * Search through routes associated with node to
+ * insert new route according to index.
+ * Need same criteria as when sorting dupedkeys to avoid
+ * double loop on deletion.
+ */
+ for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) {
+ if (m->rm_b < b_leaf)
+ continue;
+ if (m->rm_b > b_leaf)
+ break;
+ if (m->rm_flags & RNF_NORMAL) {
+ mmask = m->rm_leaf->rn_mask;
+ if (tt->rn_flags & RNF_NORMAL) {
+#if 0
+ log(LOG_ERR, "Non-unique normal route,"
+ " mask not entered\n");
+#endif
+ return tt;
+ }
+ } else
+ mmask = m->rm_mask;
+ if (mmask == netmask) {
+ m->rm_refs++;
+ tt->rn_mklist = m;
+ return tt;
+ }
+ if (rn_refines(netmask, mmask)
+ || rn_lexobetter(netmask, mmask))
+ break;
+ }
+ *mp = rn_new_radix_mask(tt, *mp);
+ return tt;
+}
+
+struct radix_node *
+rn_delete(v_arg, netmask_arg, head)
+ void *v_arg, *netmask_arg;
+ struct radix_node_head *head;
+{
+ struct radix_node *t, *p, *x, *tt;
+ struct radix_mask *m, *saved_m, **mp;
+ struct radix_node *dupedkey, *saved_tt, *top;
+ caddr_t v, netmask;
+ int b, head_off, vlen;
+
+ v = v_arg;
+ netmask = netmask_arg;
+ x = head->rnh_treetop;
+ tt = rn_search(v, x);
+ head_off = x->rn_off;
+ vlen = *(u_char *)v;
+ saved_tt = tt;
+ top = x;
+ if (tt == 0 ||
+ Bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off))
+ return (0);
+ /*
+ * Delete our route from mask lists.
+ */
+ if (netmask) {
+ if ((x = rn_addmask(netmask, 1, head_off)) == 0)
+ return (0);
+ netmask = x->rn_key;
+ while (tt->rn_mask != netmask)
+ if ((tt = tt->rn_dupedkey) == 0)
+ return (0);
+ }
+ if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0)
+ goto on1;
+ if (tt->rn_flags & RNF_NORMAL) {
+ if (m->rm_leaf != tt || m->rm_refs > 0) {
+#if 0
+ log(LOG_ERR, "rn_delete: inconsistent annotation\n");
+#endif
+ return 0; /* dangling ref could cause disaster */
+ }
+ } else {
+ if (m->rm_mask != tt->rn_mask) {
+#if 0
+ log(LOG_ERR, "rn_delete: inconsistent annotation\n");
+#endif
+ goto on1;
+ }
+ if (--m->rm_refs >= 0)
+ goto on1;
+ }
+ b = -1 - tt->rn_b;
+ t = saved_tt->rn_p;
+ if (b > t->rn_b)
+ goto on1; /* Wasn't lifted at all */
+ do {
+ x = t;
+ t = t->rn_p;
+ } while (b <= t->rn_b && x != top);
+ for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist)
+ if (m == saved_m) {
+ *mp = m->rm_mklist;
+ MKFree(m);
+ break;
+ }
+ if (m == 0) {
+#if 0
+ log(LOG_ERR, "rn_delete: couldn't find our annotation\n");
+#endif
+ if (tt->rn_flags & RNF_NORMAL)
+ return (0); /* Dangling ref to us */
+ }
+on1:
+ /*
+ * Eliminate us from tree
+ */
+ if (tt->rn_flags & RNF_ROOT)
+ return (0);
+#ifdef RN_DEBUG
+ /* Get us out of the creation list */
+ for (t = rn_clist; t && t->rn_ybro != tt; t = t->rn_ybro)
+ ;
+ if (t) t->rn_ybro = tt->rn_ybro;
+#endif
+ t = tt->rn_p;
+ dupedkey = saved_tt->rn_dupedkey;
+ if (dupedkey) {
+ /*
+ * Here, tt is the deletion target and
+ * saved_tt is the head of the dupedkey chain.
+ */
+ if (tt == saved_tt) {
+ x = dupedkey;
+ x->rn_p = t;
+ if (t->rn_l == tt)
+ t->rn_l = x;
+ else
+ t->rn_r = x;
+ } else {
+ /* find node in front of tt on the chain */
+ for (x = p = saved_tt; p && p->rn_dupedkey != tt;)
+ p = p->rn_dupedkey;
+ if (p) {
+ p->rn_dupedkey = tt->rn_dupedkey;
+ if (tt->rn_dupedkey)
+ tt->rn_dupedkey->rn_p = p;
+ }
+#if 0
+ else
+ log(LOG_ERR, "rn_delete: couldn't find us\n");
+#endif
+ }
+ t = tt + 1;
+ if (t->rn_flags & RNF_ACTIVE) {
+#ifndef RN_DEBUG
+ *++x = *t;
+ p = t->rn_p;
+#else
+ b = t->rn_info;
+ *++x = *t;
+ t->rn_info = b;
+ p = t->rn_p;
+#endif
+ if (p->rn_l == t)
+ p->rn_l = x;
+ else
+ p->rn_r = x;
+ x->rn_l->rn_p = x;
+ x->rn_r->rn_p = x;
+ }
+ goto out;
+ }
+ if (t->rn_l == tt)
+ x = t->rn_r;
+ else
+ x = t->rn_l;
+ p = t->rn_p;
+ if (p->rn_r == t)
+ p->rn_r = x;
+ else
+ p->rn_l = x;
+ x->rn_p = p;
+ /*
+ * Demote routes attached to us.
+ */
+ if (t->rn_mklist) {
+ if (x->rn_b >= 0) {
+ for (mp = &x->rn_mklist; (m = *mp) != NULL;)
+ mp = &m->rm_mklist;
+ *mp = t->rn_mklist;
+ } else {
+ /* If there are any key,mask pairs in a sibling
+ duped-key chain, some subset will appear sorted
+ in the same order attached to our mklist */
+ for (m = t->rn_mklist; m && x; x = x->rn_dupedkey)
+ if (m == x->rn_mklist) {
+ struct radix_mask *mm = m->rm_mklist;
+ x->rn_mklist = 0;
+ if (--(m->rm_refs) < 0)
+ MKFree(m);
+ m = mm;
+ }
+#if 0
+ if (m)
+ log(LOG_ERR, "%s %p at %p\n",
+ "rn_delete: Orphaned Mask", m, x);
+#endif
+ }
+ }
+ /*
+ * We may be holding an active internal node in the tree.
+ */
+ x = tt + 1;
+ if (t != x) {
+#ifndef RN_DEBUG
+ *t = *x;
+#else
+ b = t->rn_info;
+ *t = *x;
+ t->rn_info = b;
+#endif
+ t->rn_l->rn_p = t;
+ t->rn_r->rn_p = t;
+ p = x->rn_p;
+ if (p->rn_l == x)
+ p->rn_l = t;
+ else
+ p->rn_r = t;
+ }
+out:
+ tt->rn_flags &= ~RNF_ACTIVE;
+ tt[1].rn_flags &= ~RNF_ACTIVE;
+ return (tt);
+}
+
+int
+rn_walktree(h, f, w)
+ struct radix_node_head *h;
+ int (*f) __P((struct radix_node *, void *));
+ void *w;
+{
+ int error;
+ struct radix_node *base, *next;
+ struct radix_node *rn = h->rnh_treetop;
+ /*
+ * This gets complicated because we may delete the node
+ * while applying the function f to it, so we need to calculate
+ * the successor node in advance.
+ */
+ /* First time through node, go left */
+ while (rn->rn_b >= 0)
+ rn = rn->rn_l;
+ for (;;) {
+ base = rn;
+ /* If at right child go back up, otherwise, go right */
+ while (rn->rn_p->rn_r == rn && (rn->rn_flags & RNF_ROOT) == 0)
+ rn = rn->rn_p;
+ /* Find the next *leaf* since next node might vanish, too */
+ for (rn = rn->rn_p->rn_r; rn->rn_b >= 0;)
+ rn = rn->rn_l;
+ next = rn;
+ /* Process leaves */
+ while ((rn = base) != NULL) {
+ base = rn->rn_dupedkey;
+ if (!(rn->rn_flags & RNF_ROOT)
+ && (error = (*f)(rn, w)))
+ return (error);
+ }
+ rn = next;
+ if (rn->rn_flags & RNF_ROOT)
+ return (0);
+ }
+ /* NOTREACHED */
+}
+
+int
+rn_inithead(head, off)
+ void **head;
+ int off;
+{
+ struct radix_node_head *rnh;
+
+ if (*head)
+ return (1);
+ R_Malloc(rnh, struct radix_node_head *, sizeof (*rnh));
+ if (rnh == 0)
+ return (0);
+ *head = rnh;
+ return rn_inithead0(rnh, off);
+}
+
+int
+rn_inithead0(rnh, off)
+ struct radix_node_head *rnh;
+ int off;
+{
+ struct radix_node *t, *tt, *ttt;
+
+ Bzero(rnh, sizeof (*rnh));
+ t = rn_newpair(rn_zeros, off, rnh->rnh_nodes);
+ ttt = rnh->rnh_nodes + 2;
+ t->rn_r = ttt;
+ t->rn_p = t;
+ tt = t->rn_l;
+ tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE;
+ tt->rn_b = -1 - off;
+ *ttt = *tt;
+ ttt->rn_key = rn_ones;
+ rnh->rnh_addaddr = rn_addroute;
+ rnh->rnh_deladdr = rn_delete;
+ rnh->rnh_matchaddr = rn_match;
+ rnh->rnh_lookup = rn_lookup;
+ rnh->rnh_walktree = rn_walktree;
+ rnh->rnh_treetop = t;
+ return (1);
+}
+
+void
+rn_init()
+{
+ char *cp, *cplim;
+
+ if (max_keylen == 0) {
+#if 0
+ log(LOG_ERR,
+ "rn_init: radix functions require max_keylen be set\n");
+#endif
+ return;
+ }
+ if (rn_zeros == NULL) {
+ R_Malloc(rn_zeros, char *, 3 * max_keylen);
+ }
+ if (rn_zeros == NULL)
+ panic("rn_init");
+ Bzero(rn_zeros, 3 * max_keylen);
+ rn_ones = cp = rn_zeros + max_keylen;
+ addmask_key = cplim = rn_ones + max_keylen;
+ while (cp < cplim)
+ *cp++ = -1;
+ if (rn_inithead((void *)&mask_rnhead, 0) == 0)
+ panic("rn_init 2");
+}
+
+
+static int
+rn_freenode(struct radix_node *n, void *p)
+{
+ struct radix_node_head *rnh = p;
+ struct radix_node *d;
+
+ d = rnh->rnh_deladdr(n->rn_key, NULL, rnh);
+ if (d != NULL) {
+ FreeS(d, max_keylen + 2 * sizeof (*d));
+ }
+ return 0;
+}
+
+
+void
+rn_freehead(rnh)
+ struct radix_node_head *rnh;
+{
+
+ (void)rn_walktree(rnh, rn_freenode, rnh);
+
+ rnh->rnh_addaddr = NULL;
+ rnh->rnh_deladdr = NULL;
+ rnh->rnh_matchaddr = NULL;
+ rnh->rnh_lookup = NULL;
+ rnh->rnh_walktree = NULL;
+
+ Free(rnh);
+}
+
+
+void
+rn_fini()
+{
+ struct radix_mask *m;
+
+ if (rn_zeros != NULL) {
+ FreeS(rn_zeros, 3 * max_keylen);
+ rn_zeros = NULL;
+ }
+
+ if (mask_rnhead != NULL) {
+ rn_freehead(mask_rnhead);
+ mask_rnhead = NULL;
+ }
+
+ while ((m = rn_mkfreelist) != NULL) {
+ rn_mkfreelist = m->rm_mklist;
+ KFREE(m);
+ }
+}
+
+
+#ifdef USE_MAIN
+
+typedef struct myst {
+ addrfamily_t dst;
+ addrfamily_t mask;
+ struct radix_node nodes[2];
+} myst_t;
+
+int
+main(int argc, char *argv[])
+{
+ struct radix_node_head *rnh;
+ struct radix_node *rn;
+ addrfamily_t af, mf;
+ myst_t st1, st2, *stp;
+
+ memset(&st1, 0, sizeof(st1));
+ memset(&st2, 0, sizeof(st2));
+ memset(&af, 0, sizeof(af));
+
+ rn_init();
+
+ rnh = NULL;
+ rn_inithead(&rnh, offsetof(addrfamily_t, adf_addr) << 3);
+
+ st1.dst.adf_len = sizeof(st1);
+ st1.mask.adf_len = sizeof(st1);
+ st1.dst.adf_addr.in4.s_addr = inet_addr("127.0.0.0");
+ st1.mask.adf_addr.in4.s_addr = inet_addr("255.0.0.0");
+ rn = rnh->rnh_addaddr(&st1.dst, &st1.mask, rnh, st1.nodes);
+ printf("add.1 %p\n", rn);
+
+ st2.dst.adf_len = sizeof(st2);
+ st2.mask.adf_len = sizeof(st2);
+ st2.dst.adf_addr.in4.s_addr = inet_addr("127.0.1.0");
+ st2.mask.adf_addr.in4.s_addr = inet_addr("255.255.255.0");
+ rn = rnh->rnh_addaddr(&st2.dst, &st2.mask, rnh, st2.nodes);
+ printf("add.2 %p\n", rn);
+
+ af.adf_len = sizeof(af);
+ af.adf_addr.in4.s_addr = inet_addr("127.0.1.0");
+ rn = rnh->rnh_matchaddr(&af, rnh);
+ if (rn != NULL) {
+ printf("1.lookup = %p key %p mask %p\n", rn, rn->rn_key, rn->rn_mask);
+ stp = rn->rn_key;
+ printf("%s/", inet_ntoa(stp->dst.adf_addr.in4));
+ stp = rn->rn_mask;
+ printf("%s\n", inet_ntoa(stp->dst.adf_addr.in4));
+ }
+
+ mf.adf_len = sizeof(mf);
+ mf.adf_addr.in4.s_addr = inet_addr("255.255.255.0");
+ rn = rnh->rnh_lookup(&af, &mf, rnh);
+ if (rn != NULL) {
+ printf("2.lookup = %p key %p mask %p\n", rn, rn->rn_key, rn->rn_mask);
+ stp = rn->rn_key;
+ printf("%s/", inet_ntoa(stp->dst.adf_addr.in4));
+ stp = rn->rn_mask;
+ printf("%s\n", inet_ntoa(stp->dst.adf_addr.in4));
+ }
+
+ af.adf_len = sizeof(af);
+ af.adf_addr.in4.s_addr = inet_addr("126.0.0.1");
+ rn = rnh->rnh_matchaddr(&af, rnh);
+ if (rn != NULL) {
+ printf("3.lookup = %p key %p mask %p\n", rn, rn->rn_key, rn->rn_mask);
+ stp = rn->rn_key;
+ printf("%s/", inet_ntoa(stp->dst.adf_addr.in4));
+ stp = rn->rn_mask;
+ printf("%s\n", inet_ntoa(stp->dst.adf_addr.in4));
+ }
+
+ return 0;
+}
+
+
+void
+log(int level, char *format, ...)
+{
+ va_list ap;
+
+ va_start(ap, format);
+ vfprintf(stderr, format, ap);
+ va_end(ap);
+}
+#endif
+
+
+#ifndef _KERNEL
+void
+panic(char *str)
+{
+ fputs(str, stderr);
+ abort();
+}
+#endif
diff --git a/usr/src/uts/common/inet/ipf/radix.h b/usr/src/uts/common/inet/ipf/radix.h
new file mode 100644
index 0000000000..0b55412605
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/radix.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 1988, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)radix.h 8.2 (Berkeley) 10/31/94
+ */
+
+#ifndef _NET_RADIX_H_
+#define _NET_RADIX_H_
+
+#ifndef __P
+# ifdef __STDC__
+# define __P(x) x
+# else
+# define __P(x) ()
+# endif
+#endif
+
+/*
+ * Radix search tree node layout.
+ */
+
+struct radix_node {
+ struct radix_mask *rn_mklist; /* list of masks contained in subtree */
+ struct radix_node *rn_p; /* parent */
+ short rn_b; /* bit offset; -1-index(netmask) */
+ char rn_bmask; /* node: mask for bit test*/
+ u_char rn_flags; /* enumerated next */
+#define RNF_NORMAL 1 /* leaf contains normal route */
+#define RNF_ROOT 2 /* leaf is root leaf for tree */
+#define RNF_ACTIVE 4 /* This node is alive (for rtfree) */
+ union {
+ struct { /* leaf only data: */
+ caddr_t rn_Key; /* object of search */
+ caddr_t rn_Mask; /* netmask, if present */
+ struct radix_node *rn_Dupedkey;
+ } rn_leaf;
+ struct { /* node only data: */
+ int rn_Off; /* where to start compare */
+ struct radix_node *rn_L;/* progeny */
+ struct radix_node *rn_R;/* progeny */
+ } rn_node;
+ } rn_u;
+#ifdef RN_DEBUG
+ int rn_info;
+ struct radix_node *rn_twin;
+ struct radix_node *rn_ybro;
+#endif
+};
+
+#define rn_dupedkey rn_u.rn_leaf.rn_Dupedkey
+#define rn_key rn_u.rn_leaf.rn_Key
+#define rn_mask rn_u.rn_leaf.rn_Mask
+#define rn_off rn_u.rn_node.rn_Off
+#define rn_l rn_u.rn_node.rn_L
+#define rn_r rn_u.rn_node.rn_R
+
+/*
+ * Annotations to tree concerning potential routes applying to subtrees.
+ */
+
+extern struct radix_mask {
+ short rm_b; /* bit offset; -1-index(netmask) */
+ char rm_unused; /* cf. rn_bmask */
+ u_char rm_flags; /* cf. rn_flags */
+ struct radix_mask *rm_mklist; /* more masks to try */
+ union {
+ caddr_t rmu_mask; /* the mask */
+ struct radix_node *rmu_leaf; /* for normal routes */
+ } rm_rmu;
+ int rm_refs; /* # of references to this struct */
+} *rn_mkfreelist;
+
+#define rm_mask rm_rmu.rmu_mask
+#define rm_leaf rm_rmu.rmu_leaf /* extra field would make 32 bytes */
+
+#define MKGet(m) {\
+ if (rn_mkfreelist) {\
+ m = rn_mkfreelist; \
+ rn_mkfreelist = (m)->rm_mklist; \
+ } else \
+ R_Malloc(m, struct radix_mask *, sizeof (*(m))); }\
+
+#define MKFree(m) { (m)->rm_mklist = rn_mkfreelist; rn_mkfreelist = (m);}
+
+struct radix_node_head {
+ struct radix_node *rnh_treetop;
+ struct radix_node *rnh_leaflist;
+ u_long rnh_hits;
+ u_int rnh_number;
+ u_int rnh_ref;
+ int rnh_addrsize; /* permit, but not require fixed keys */
+ int rnh_pktsize; /* permit, but not require fixed keys */
+ struct radix_node *(*rnh_addaddr) /* add based on sockaddr */
+ __P((void *v, void *mask,
+ struct radix_node_head *head, struct radix_node nodes[]));
+ struct radix_node *(*rnh_addpkt) /* add based on packet hdr */
+ __P((void *v, void *mask,
+ struct radix_node_head *head, struct radix_node nodes[]));
+ struct radix_node *(*rnh_deladdr) /* remove based on sockaddr */
+ __P((void *v, void *mask, struct radix_node_head *head));
+ struct radix_node *(*rnh_delpkt) /* remove based on packet hdr */
+ __P((void *v, void *mask, struct radix_node_head *head));
+ struct radix_node *(*rnh_matchaddr) /* locate based on sockaddr */
+ __P((void *v, struct radix_node_head *head));
+ struct radix_node *(*rnh_lookup) /* locate based on sockaddr */
+ __P((void *v, void *mask, struct radix_node_head *head));
+ struct radix_node *(*rnh_matchpkt) /* locate based on packet hdr */
+ __P((void *v, struct radix_node_head *head));
+ int (*rnh_walktree) /* traverse tree */
+ __P((struct radix_node_head *,
+ int (*)(struct radix_node *, void *), void *));
+ struct radix_node rnh_nodes[3]; /* empty tree for common case */
+};
+
+
+#define Bcmp(a, b, n) bcmp(((caddr_t)(a)), ((caddr_t)(b)), (unsigned)(n))
+#define Bcopy(a, b, n) bcopy(((caddr_t)(a)), ((caddr_t)(b)), (unsigned)(n))
+#define Bzero(p, n) bzero((caddr_t)(p), (unsigned)(n));
+#define R_Malloc(p, t, n) KMALLOCS(p, t, n)
+#define FreeS(p, z) KFREES(p, z)
+#define Free(p) KFREE(p)
+
+void rn_init __P((void));
+void rn_fini __P((void));
+int rn_inithead __P((void **, int));
+void rn_freehead __P((struct radix_node_head *));
+int rn_inithead0 __P((struct radix_node_head *, int));
+int rn_refines __P((void *, void *));
+int rn_walktree __P((struct radix_node_head *,
+ int (*)(struct radix_node *, void *), void *));
+struct radix_node
+ *rn_addmask __P((void *, int, int)),
+ *rn_addroute __P((void *, void *, struct radix_node_head *,
+ struct radix_node [2])),
+ *rn_delete __P((void *, void *, struct radix_node_head *)),
+ *rn_insert __P((void *, struct radix_node_head *, int *,
+ struct radix_node [2])),
+ *rn_lookup __P((void *, void *, struct radix_node_head *)),
+ *rn_match __P((void *, struct radix_node_head *)),
+ *rn_newpair __P((void *, int, struct radix_node[2])),
+ *rn_search __P((void *, struct radix_node *)),
+ *rn_search_m __P((void *, struct radix_node *, void *));
+
+#endif /* _NET_RADIX_H_ */
diff --git a/usr/src/uts/common/inet/ipf/radix_ipf.h b/usr/src/uts/common/inet/ipf/radix_ipf.h
new file mode 100644
index 0000000000..357b9c40dc
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/radix_ipf.h
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 1988, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)radix.h 8.2 (Berkeley) 10/31/94
+ */
+
+#if !defined(_NET_RADIX_H_) && !defined(_RADIX_H_)
+#define _NET_RADIX_H_
+#ifndef _RADIX_H_
+#define _RADIX_H_
+#endif /* _RADIX_H_ */
+
+#ifndef __P
+# ifdef __STDC__
+# define __P(x) x
+# else
+# define __P(x) ()
+# endif
+#endif
+
+#if defined(__sgi)
+# define radix_mask ipf_radix_mask
+# define radix_node ipf_radix_node
+# define radix_node_head ipf_radix_node_head
+#endif
+
+/*
+ * Radix search tree node layout.
+ */
+
+struct radix_node {
+ struct radix_mask *rn_mklist; /* list of masks contained in subtree */
+ struct radix_node *rn_p; /* parent */
+ short rn_b; /* bit offset; -1-index(netmask) */
+ char rn_bmask; /* node: mask for bit test*/
+ u_char rn_flags; /* enumerated next */
+#define RNF_NORMAL 1 /* leaf contains normal route */
+#define RNF_ROOT 2 /* leaf is root leaf for tree */
+#define RNF_ACTIVE 4 /* This node is alive (for rtfree) */
+ union {
+ struct { /* leaf only data: */
+ caddr_t rn_Key; /* object of search */
+ caddr_t rn_Mask; /* netmask, if present */
+ struct radix_node *rn_Dupedkey;
+ } rn_leaf;
+ struct { /* node only data: */
+ int rn_Off; /* where to start compare */
+ struct radix_node *rn_L;/* progeny */
+ struct radix_node *rn_R;/* progeny */
+ } rn_node;
+ } rn_u;
+#ifdef RN_DEBUG
+ int rn_info;
+ struct radix_node *rn_twin;
+ struct radix_node *rn_ybro;
+#endif
+};
+
+#define rn_dupedkey rn_u.rn_leaf.rn_Dupedkey
+#define rn_key rn_u.rn_leaf.rn_Key
+#define rn_mask rn_u.rn_leaf.rn_Mask
+#define rn_off rn_u.rn_node.rn_Off
+#define rn_l rn_u.rn_node.rn_L
+#define rn_r rn_u.rn_node.rn_R
+
+/*
+ * Annotations to tree concerning potential routes applying to subtrees.
+ */
+
+struct radix_mask {
+ short rm_b; /* bit offset; -1-index(netmask) */
+ char rm_unused; /* cf. rn_bmask */
+ u_char rm_flags; /* cf. rn_flags */
+ struct radix_mask *rm_mklist; /* more masks to try */
+ union {
+ caddr_t rmu_mask; /* the mask */
+ struct radix_node *rmu_leaf; /* for normal routes */
+ } rm_rmu;
+ int rm_refs; /* # of references to this struct */
+};
+
+#define rm_mask rm_rmu.rmu_mask
+#define rm_leaf rm_rmu.rmu_leaf /* extra field would make 32 bytes */
+
+#define MKGet(m) {\
+ if (rn_mkfreelist) {\
+ m = rn_mkfreelist; \
+ rn_mkfreelist = (m)->rm_mklist; \
+ } else \
+ R_Malloc(m, struct radix_mask *, sizeof (*(m))); }\
+
+#define MKFree(m) { (m)->rm_mklist = rn_mkfreelist; rn_mkfreelist = (m);}
+
+struct radix_node_head {
+ struct radix_node *rnh_treetop;
+ struct radix_node *rnh_leaflist;
+ u_long rnh_hits;
+ u_int rnh_number;
+ u_int rnh_ref;
+ int rnh_addrsize; /* permit, but not require fixed keys */
+ int rnh_pktsize; /* permit, but not require fixed keys */
+ struct radix_node *(*rnh_addaddr) /* add based on sockaddr */
+ __P((void *v, void *mask,
+ struct radix_node_head *head, struct radix_node nodes[]));
+ struct radix_node *(*rnh_addpkt) /* add based on packet hdr */
+ __P((void *v, void *mask,
+ struct radix_node_head *head, struct radix_node nodes[]));
+ struct radix_node *(*rnh_deladdr) /* remove based on sockaddr */
+ __P((void *v, void *mask, struct radix_node_head *head));
+ struct radix_node *(*rnh_delpkt) /* remove based on packet hdr */
+ __P((void *v, void *mask, struct radix_node_head *head));
+ struct radix_node *(*rnh_matchaddr) /* locate based on sockaddr */
+ __P((void *v, struct radix_node_head *head));
+ struct radix_node *(*rnh_lookup) /* locate based on sockaddr */
+ __P((void *v, void *mask, struct radix_node_head *head));
+ struct radix_node *(*rnh_matchpkt) /* locate based on packet hdr */
+ __P((void *v, struct radix_node_head *head));
+ int (*rnh_walktree) /* traverse tree */
+ __P((struct radix_node_head *,
+ int (*)(struct radix_node *, void *), void *));
+ struct radix_node rnh_nodes[3]; /* empty tree for common case */
+};
+
+
+#if defined(AIX)
+# undef Bcmp
+# undef Bzero
+# undef R_Malloc
+# undef Free
+#endif
+#define Bcmp(a, b, n) bcmp(((caddr_t)(a)), ((caddr_t)(b)), (unsigned)(n))
+#if defined(linux) && defined(_KERNEL)
+# define Bcopy(a, b, n) memmove(((caddr_t)(b)), ((caddr_t)(a)), (unsigned)(n))
+#else
+# define Bcopy(a, b, n) bcopy(((caddr_t)(a)), ((caddr_t)(b)), (unsigned)(n))
+#endif
+#define Bzero(p, n) bzero((caddr_t)(p), (unsigned)(n));
+#define R_Malloc(p, t, n) KMALLOCS(p, t, n)
+#define FreeS(p, z) KFREES(p, z)
+#define Free(p) KFREE(p)
+
+#if (defined(__osf__) || defined(AIX) || (IRIX >= 60516)) && defined(_KERNEL)
+# define rn_init ipf_rn_init
+# define rn_fini ipf_rn_fini
+# define rn_inithead ipf_rn_inithead
+# define rn_freehead ipf_rn_freehead
+# define rn_inithead0 ipf_rn_inithead0
+# define rn_refines ipf_rn_refines
+# define rn_walktree ipf_rn_walktree
+# define rn_addmask ipf_rn_addmask
+# define rn_addroute ipf_rn_addroute
+# define rn_delete ipf_rn_delete
+# define rn_insert ipf_rn_insert
+# define rn_lookup ipf_rn_lookup
+# define rn_match ipf_rn_match
+# define rn_newpair ipf_rn_newpair
+# define rn_search ipf_rn_search
+# define rn_search_m ipf_rn_search_m
+# define max_keylen ipf_maxkeylen
+# define rn_mkfreelist ipf_rn_mkfreelist
+# define rn_zeros ipf_rn_zeros
+# define rn_ones ipf_rn_ones
+# define rn_satisfies_leaf ipf_rn_satisfies_leaf
+# define rn_lexobetter ipf_rn_lexobetter
+# define rn_new_radix_mask ipf_rn_new_radix_mask
+# define rn_freenode ipf_rn_freenode
+#endif
+
+void rn_init __P((void));
+void rn_fini __P((void));
+int rn_inithead __P((void **, int));
+void rn_freehead __P((struct radix_node_head *));
+int rn_inithead0 __P((struct radix_node_head *, int));
+int rn_refines __P((void *, void *));
+int rn_walktree __P((struct radix_node_head *,
+ int (*)(struct radix_node *, void *), void *));
+struct radix_node
+ *rn_addmask __P((void *, int, int)),
+ *rn_addroute __P((void *, void *, struct radix_node_head *,
+ struct radix_node [2])),
+ *rn_delete __P((void *, void *, struct radix_node_head *)),
+ *rn_insert __P((void *, struct radix_node_head *, int *,
+ struct radix_node [2])),
+ *rn_lookup __P((void *, void *, struct radix_node_head *)),
+ *rn_match __P((void *, struct radix_node_head *)),
+ *rn_newpair __P((void *, int, struct radix_node[2])),
+ *rn_search __P((void *, struct radix_node *)),
+ *rn_search_m __P((void *, struct radix_node *, void *));
+
+#endif /* _NET_RADIX_H_ */
diff --git a/usr/src/uts/common/inet/ipf/solaris.c b/usr/src/uts/common/inet/ipf/solaris.c
new file mode 100644
index 0000000000..0c21d1f15f
--- /dev/null
+++ b/usr/src/uts/common/inet/ipf/solaris.c
@@ -0,0 +1,696 @@
+/*
+ * Copyright (C) 1993-2001, 2003 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/* #pragma ident "@(#)solaris.c 1.12 6/5/96 (C) 1995 Darren Reed"*/
+#pragma ident "@(#)$Id: solaris.c,v 2.73.2.6 2005/07/13 21:40:47 darrenr Exp $"
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/uio.h>
+#include <sys/buf.h>
+#include <sys/modctl.h>
+#include <sys/open.h>
+#include <sys/kmem.h>
+#include <sys/conf.h>
+#include <sys/cmn_err.h>
+#include <sys/stat.h>
+#include <sys/cred.h>
+#include <sys/dditypes.h>
+#include <sys/stream.h>
+#include <sys/poll.h>
+#include <sys/autoconf.h>
+#include <sys/byteorder.h>
+#include <sys/socket.h>
+#include <sys/dlpi.h>
+#include <sys/stropts.h>
+#include <sys/kstat.h>
+#include <sys/sockio.h>
+#include <net/if.h>
+#if SOLARIS2 >= 6
+# include <net/if_types.h>
+#endif
+#include <net/af.h>
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/tcpip.h>
+#include <netinet/ip_icmp.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include "netinet/ip_compat.h"
+#include "netinet/ipl.h"
+#include "netinet/ip_fil.h"
+#include "netinet/ip_nat.h"
+#include "netinet/ip_frag.h"
+#include "netinet/ip_auth.h"
+#include "netinet/ip_state.h"
+
+
+extern struct filterstats frstats[];
+extern int fr_running;
+extern int fr_flags;
+extern int iplwrite __P((dev_t, struct uio *, cred_t *));
+
+extern ipnat_t *nat_list;
+
+static int ipf_getinfo __P((dev_info_t *, ddi_info_cmd_t,
+ void *, void **));
+#if SOLARIS2 < 10
+static int ipf_identify __P((dev_info_t *));
+#endif
+static int ipf_attach __P((dev_info_t *, ddi_attach_cmd_t));
+static int ipf_detach __P((dev_info_t *, ddi_detach_cmd_t));
+static int fr_qifsync __P((ip_t *, int, void *, int, void *, mblk_t **));
+static int ipf_property_update __P((dev_info_t *));
+static char *ipf_devfiles[] = { IPL_NAME, IPNAT_NAME, IPSTATE_NAME,
+ IPAUTH_NAME, IPSYNC_NAME, IPSCAN_NAME,
+ IPLOOKUP_NAME, NULL };
+
+
+#if SOLARIS2 >= 7
+extern timeout_id_t fr_timer_id;
+#else
+extern int fr_timer_id;
+#endif
+
+static struct cb_ops ipf_cb_ops = {
+ iplopen,
+ iplclose,
+ nodev, /* strategy */
+ nodev, /* print */
+ nodev, /* dump */
+ iplread,
+ iplwrite, /* write */
+ iplioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ nochpoll, /* poll */
+ ddi_prop_op,
+ NULL,
+ D_MTSAFE,
+#if SOLARIS2 > 4
+ CB_REV,
+ nodev, /* aread */
+ nodev, /* awrite */
+#endif
+};
+
+static struct dev_ops ipf_ops = {
+ DEVO_REV,
+ 0,
+ ipf_getinfo,
+#if SOLARIS2 >= 10
+ nulldev,
+#else
+ ipf_identify,
+#endif
+ nulldev,
+ ipf_attach,
+ ipf_detach,
+ nodev, /* reset */
+ &ipf_cb_ops,
+ (struct bus_ops *)0
+};
+
+extern struct mod_ops mod_driverops;
+static struct modldrv iplmod = {
+ &mod_driverops, IPL_VERSION, &ipf_ops };
+static struct modlinkage modlink1 = { MODREV_1, &iplmod, NULL };
+
+#if SOLARIS2 >= 6
+static size_t hdrsizes[57][2] = {
+ { 0, 0 },
+ { IFT_OTHER, 0 },
+ { IFT_1822, 0 },
+ { IFT_HDH1822, 0 },
+ { IFT_X25DDN, 0 },
+ { IFT_X25, 0 },
+ { IFT_ETHER, 14 },
+ { IFT_ISO88023, 0 },
+ { IFT_ISO88024, 0 },
+ { IFT_ISO88025, 0 },
+ { IFT_ISO88026, 0 },
+ { IFT_STARLAN, 0 },
+ { IFT_P10, 0 },
+ { IFT_P80, 0 },
+ { IFT_HY, 0 },
+ { IFT_FDDI, 24 },
+ { IFT_LAPB, 0 },
+ { IFT_SDLC, 0 },
+ { IFT_T1, 0 },
+ { IFT_CEPT, 0 },
+ { IFT_ISDNBASIC, 0 },
+ { IFT_ISDNPRIMARY, 0 },
+ { IFT_PTPSERIAL, 0 },
+ { IFT_PPP, 0 },
+ { IFT_LOOP, 0 },
+ { IFT_EON, 0 },
+ { IFT_XETHER, 0 },
+ { IFT_NSIP, 0 },
+ { IFT_SLIP, 0 },
+ { IFT_ULTRA, 0 },
+ { IFT_DS3, 0 },
+ { IFT_SIP, 0 },
+ { IFT_FRELAY, 0 },
+ { IFT_RS232, 0 },
+ { IFT_PARA, 0 },
+ { IFT_ARCNET, 0 },
+ { IFT_ARCNETPLUS, 0 },
+ { IFT_ATM, 0 },
+ { IFT_MIOX25, 0 },
+ { IFT_SONET, 0 },
+ { IFT_X25PLE, 0 },
+ { IFT_ISO88022LLC, 0 },
+ { IFT_LOCALTALK, 0 },
+ { IFT_SMDSDXI, 0 },
+ { IFT_FRELAYDCE, 0 },
+ { IFT_V35, 0 },
+ { IFT_HSSI, 0 },
+ { IFT_HIPPI, 0 },
+ { IFT_MODEM, 0 },
+ { IFT_AAL5, 0 },
+ { IFT_SONETPATH, 0 },
+ { IFT_SONETVT, 0 },
+ { IFT_SMDSICIP, 0 },
+ { IFT_PROPVIRTUAL, 0 },
+ { IFT_PROPMUX, 0 },
+};
+#endif /* SOLARIS2 >= 6 */
+
+static dev_info_t *ipf_dev_info = NULL;
+
+static const filter_kstats_t ipf_kstat_tmp = {
+ { "pass", KSTAT_DATA_ULONG },
+ { "block", KSTAT_DATA_ULONG },
+ { "nomatch", KSTAT_DATA_ULONG },
+ { "short", KSTAT_DATA_ULONG },
+ { "pass, logged", KSTAT_DATA_ULONG },
+ { "block, logged", KSTAT_DATA_ULONG },
+ { "nomatch, logged", KSTAT_DATA_ULONG },
+ { "logged", KSTAT_DATA_ULONG },
+ { "skip", KSTAT_DATA_ULONG },
+ { "return sent", KSTAT_DATA_ULONG },
+ { "acct", KSTAT_DATA_ULONG },
+ { "bad frag state alloc", KSTAT_DATA_ULONG },
+ { "new frag state kept", KSTAT_DATA_ULONG },
+ { "new frag state compl. pkt", KSTAT_DATA_ULONG },
+ { "bad pkt state alloc", KSTAT_DATA_ULONG },
+ { "new pkt kept state", KSTAT_DATA_ULONG },
+ { "cachehit", KSTAT_DATA_ULONG },
+ { "tcp cksum bad", KSTAT_DATA_ULONG },
+ {{ "pullup ok", KSTAT_DATA_ULONG },
+ { "pullup nok", KSTAT_DATA_ULONG }},
+ { "src != route", KSTAT_DATA_ULONG },
+ { "ttl invalid", KSTAT_DATA_ULONG },
+ { "bad ip pkt", KSTAT_DATA_ULONG },
+ { "ipv6 pkt", KSTAT_DATA_ULONG },
+ { "dropped:pps ceiling", KSTAT_DATA_ULONG },
+ { "ip upd. fail", KSTAT_DATA_ULONG }
+};
+
+kstat_t *ipf_kstatp[2] = {NULL, NULL};
+static int ipf_kstat_update(kstat_t *ksp, int rwflag);
+
+static void
+ipf_kstat_init(void)
+{
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ ipf_kstatp[i] = kstat_create("ipf", 0,
+ (i==0)?"inbound":"outbound",
+ "net",
+ KSTAT_TYPE_NAMED,
+ sizeof (filter_kstats_t) / sizeof (kstat_named_t),
+ 0);
+ if (ipf_kstatp[i] != NULL) {
+ bcopy(&ipf_kstat_tmp, ipf_kstatp[i]->ks_data,
+ sizeof (filter_kstats_t));
+ ipf_kstatp[i]->ks_update = ipf_kstat_update;
+ ipf_kstatp[i]->ks_private = &frstats[i];
+ kstat_install(ipf_kstatp[i]);
+ }
+ }
+
+#ifdef IPFDEBUG
+ cmn_err(CE_NOTE, "IP Filter: ipf_kstat_init() installed 0x%x, 0x%x",
+ ipf_kstatp[0], ipf_kstatp[1]);
+#endif
+}
+
+static void
+ipf_kstat_fini(void)
+{
+ int i;
+ for (i = 0; i < 2; i++) {
+ if (ipf_kstatp[i] != NULL) {
+ kstat_delete(ipf_kstatp[i]);
+ ipf_kstatp[i] = NULL;
+ }
+ }
+}
+
+static int
+ipf_kstat_update(kstat_t *ksp, int rwflag)
+{
+ filter_kstats_t *fkp;
+ filterstats_t *fsp;
+
+ if (rwflag == KSTAT_WRITE)
+ return (EACCES);
+
+ fkp = ksp->ks_data;
+ fsp = ksp->ks_private;
+
+ fkp->fks_pass.value.ul = fsp->fr_pass;
+ fkp->fks_block.value.ul = fsp->fr_block;
+ fkp->fks_nom.value.ul = fsp->fr_nom;
+ fkp->fks_short.value.ul = fsp->fr_short;
+ fkp->fks_ppkl.value.ul = fsp->fr_ppkl;
+ fkp->fks_bpkl.value.ul = fsp->fr_bpkl;
+ fkp->fks_npkl.value.ul = fsp->fr_npkl;
+ fkp->fks_pkl.value.ul = fsp->fr_pkl;
+ fkp->fks_skip.value.ul = fsp->fr_skip;
+ fkp->fks_ret.value.ul = fsp->fr_ret;
+ fkp->fks_acct.value.ul = fsp->fr_acct;
+ fkp->fks_bnfr.value.ul = fsp->fr_bnfr;
+ fkp->fks_nfr.value.ul = fsp->fr_nfr;
+ fkp->fks_cfr.value.ul = fsp->fr_cfr;
+ fkp->fks_bads.value.ul = fsp->fr_bads;
+ fkp->fks_ads.value.ul = fsp->fr_ads;
+ fkp->fks_chit.value.ul = fsp->fr_chit;
+ fkp->fks_tcpbad.value.ul = fsp->fr_tcpbad;
+ fkp->fks_pull[0].value.ul = fsp->fr_pull[0];
+ fkp->fks_pull[1].value.ul = fsp->fr_pull[1];
+ fkp->fks_badsrc.value.ul = fsp->fr_badsrc;
+ fkp->fks_badttl.value.ul = fsp->fr_badttl;
+ fkp->fks_bad.value.ul = fsp->fr_bad;
+ fkp->fks_ipv6.value.ul = fsp->fr_ipv6;
+ fkp->fks_ppshit.value.ul = fsp->fr_ppshit;
+ fkp->fks_ipud.value.ul = fsp->fr_ipud;
+
+ return (0);
+}
+
+int _init()
+{
+ int ipfinst;
+
+ ipf_kstat_init();
+ ipfinst = mod_install(&modlink1);
+ if (ipfinst != 0)
+ ipf_kstat_fini();
+#ifdef IPFDEBUG
+ cmn_err(CE_NOTE, "IP Filter: _init() = %d", ipfinst);
+#endif
+ return ipfinst;
+}
+
+
+int _fini(void)
+{
+ int ipfinst;
+
+ ipfinst = mod_remove(&modlink1);
+#ifdef IPFDEBUG
+ cmn_err(CE_NOTE, "IP Filter: _fini() = %d", ipfinst);
+#endif
+ if (ipfinst == 0)
+ ipf_kstat_fini();
+ return ipfinst;
+}
+
+
+int _info(modinfop)
+struct modinfo *modinfop;
+{
+ int ipfinst;
+
+ ipfinst = mod_info(&modlink1, modinfop);
+#ifdef IPFDEBUG
+ cmn_err(CE_NOTE, "IP Filter: _info(%x) = %x", modinfop, ipfinst);
+#endif
+ return ipfinst;
+}
+
+
+#if SOLARIS2 < 10
+static int ipf_identify(dip)
+dev_info_t *dip;
+{
+# ifdef IPFDEBUG
+ cmn_err(CE_NOTE, "IP Filter: ipf_identify(%x)", dip);
+# endif
+ if (strcmp(ddi_get_name(dip), "ipf") == 0)
+ return (DDI_IDENTIFIED);
+ return (DDI_NOT_IDENTIFIED);
+}
+#endif
+
+
+static int ipf_attach(dip, cmd)
+dev_info_t *dip;
+ddi_attach_cmd_t cmd;
+{
+ char *s;
+ int i;
+ int instance;
+
+#ifdef IPFDEBUG
+ cmn_err(CE_NOTE, "IP Filter: ipf_attach(%x,%x)", dip, cmd);
+#endif
+
+ if ((pfilinterface != PFIL_INTERFACE) || (PFIL_INTERFACE < 2000000)) {
+ cmn_err(CE_NOTE, "pfilinterface(%d) != %d\n", pfilinterface,
+ PFIL_INTERFACE);
+ return EINVAL;
+ }
+
+ switch (cmd)
+ {
+ case DDI_ATTACH:
+ instance = ddi_get_instance(dip);
+ /* Only one instance of ipf (instance 0) can be attached. */
+ if (instance > 0)
+ return DDI_FAILURE;
+ if (fr_running != 0)
+ return DDI_FAILURE;
+
+#ifdef IPFDEBUG
+ cmn_err(CE_NOTE, "IP Filter: attach ipf instance %d", instance);
+#endif
+
+ (void) ipf_property_update(dip);
+
+ for (i = 0; ((s = ipf_devfiles[i]) != NULL); i++) {
+ s = strrchr(s, '/');
+ if (s == NULL)
+ continue;
+ s++;
+ if (ddi_create_minor_node(dip, s, S_IFCHR, i,
+ DDI_PSEUDO, 0) ==
+ DDI_FAILURE) {
+ ddi_remove_minor_node(dip, NULL);
+ goto attach_failed;
+ }
+ }
+
+ ipf_dev_info = dip;
+ /*
+ * Initialize mutex's
+ */
+ RWLOCK_INIT(&ipf_global, "ipf filter load/unload mutex");
+ RWLOCK_INIT(&ipf_mutex, "ipf filter rwlock");
+ RWLOCK_INIT(&ipf_frcache, "ipf cache rwlock");
+
+ /*
+ * Lock people out while we set things up.
+ */
+ WRITE_ENTER(&ipf_global);
+ if ((fr_running != 0) || (iplattach() == -1)) {
+ RWLOCK_EXIT(&ipf_global);
+ goto attach_failed;
+ }
+
+ if (pfil_add_hook(fr_check, PFIL_IN|PFIL_OUT, &pfh_inet4))
+ cmn_err(CE_WARN, "IP Filter: %s(pfh_inet4) failed",
+ "pfil_add_hook");
+#ifdef USE_INET6
+ if (pfil_add_hook(fr_check, PFIL_IN|PFIL_OUT, &pfh_inet6))
+ cmn_err(CE_WARN, "IP Filter: %s(pfh_inet6) failed",
+ "pfil_add_hook");
+#endif
+ if (pfil_add_hook(fr_qifsync, PFIL_IN|PFIL_OUT, &pfh_sync))
+ cmn_err(CE_WARN, "IP Filter: %s(pfh_sync) failed",
+ "pfil_add_hook");
+
+ fr_timer_id = timeout(fr_slowtimer, NULL,
+ drv_usectohz(500000));
+
+ fr_running = 1;
+
+ RWLOCK_EXIT(&ipf_global);
+
+ cmn_err(CE_CONT, "!%s, running.\n", ipfilter_version);
+
+ return DDI_SUCCESS;
+ /* NOTREACHED */
+ default:
+ break;
+ }
+
+attach_failed:
+#ifdef IPFDEBUG
+ cmn_err(CE_NOTE, "IP Filter: failed to attach\n");
+#endif
+ /*
+ * Use our own detach routine to toss
+ * away any stuff we allocated above.
+ */
+ (void) ipf_detach(dip, DDI_DETACH);
+ return DDI_FAILURE;
+}
+
+
+static int ipf_detach(dip, cmd)
+dev_info_t *dip;
+ddi_detach_cmd_t cmd;
+{
+ int i;
+
+#ifdef IPFDEBUG
+ cmn_err(CE_NOTE, "IP Filter: ipf_detach(%x,%x)", dip, cmd);
+#endif
+ switch (cmd) {
+ case DDI_DETACH:
+ if (fr_refcnt != 0)
+ return DDI_FAILURE;
+
+ if (fr_running == -2 || fr_running == 0)
+ break;
+ /*
+ * Make sure we're the only one's modifying things. With
+ * this lock others should just fall out of the loop.
+ */
+ WRITE_ENTER(&ipf_global);
+ if (fr_running <= 0) {
+ RWLOCK_EXIT(&ipf_global);
+ return DDI_FAILURE;
+ }
+ fr_running = -2;
+
+ if (pfil_remove_hook(fr_check, PFIL_IN|PFIL_OUT, &pfh_inet4))
+ cmn_err(CE_WARN, "IP Filter: %s(pfh_inet4) failed",
+ "pfil_remove_hook");
+#ifdef USE_INET6
+ if (pfil_remove_hook(fr_check, PFIL_IN|PFIL_OUT, &pfh_inet6))
+ cmn_err(CE_WARN, "IP Filter: %s(pfh_inet6) failed",
+ "pfil_add_hook");
+#endif
+ if (pfil_remove_hook(fr_qifsync, PFIL_IN|PFIL_OUT, &pfh_sync))
+ cmn_err(CE_WARN, "IP Filter: %s(pfh_sync) failed",
+ "pfil_remove_hook");
+
+ RWLOCK_EXIT(&ipf_global);
+
+ if (fr_timer_id != 0) {
+ (void) untimeout(fr_timer_id);
+ fr_timer_id = 0;
+ }
+
+ /*
+ * Undo what we did in ipf_attach, freeing resources
+ * and removing things we installed. The system
+ * framework guarantees we are not active with this devinfo
+ * node in any other entry points at this time.
+ */
+ ddi_prop_remove_all(dip);
+ i = ddi_get_instance(dip);
+ ddi_remove_minor_node(dip, NULL);
+ if (i > 0) {
+ cmn_err(CE_CONT, "IP Filter: still attached (%d)\n", i);
+ return DDI_FAILURE;
+ }
+
+ WRITE_ENTER(&ipf_global);
+ if (!ipldetach()) {
+ RWLOCK_EXIT(&ipf_global);
+ RW_DESTROY(&ipf_mutex);
+ RW_DESTROY(&ipf_frcache);
+ RW_DESTROY(&ipf_global);
+ cmn_err(CE_CONT, "!%s detached.\n", ipfilter_version);
+ return (DDI_SUCCESS);
+ }
+ RWLOCK_EXIT(&ipf_global);
+ break;
+ default:
+ break;
+ }
+ cmn_err(CE_NOTE, "IP Filter: failed to detach\n");
+ return DDI_FAILURE;
+}
+
+
+/*ARGSUSED*/
+static int ipf_getinfo(dip, infocmd, arg, result)
+dev_info_t *dip;
+ddi_info_cmd_t infocmd;
+void *arg, **result;
+{
+ int error;
+
+ if (fr_running <= 0)
+ return DDI_FAILURE;
+ error = DDI_FAILURE;
+#ifdef IPFDEBUG
+ cmn_err(CE_NOTE, "IP Filter: ipf_getinfo(%x,%x,%x)", dip, infocmd, arg);
+#endif
+ switch (infocmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ *result = ipf_dev_info;
+ error = DDI_SUCCESS;
+ break;
+ case DDI_INFO_DEVT2INSTANCE:
+ *result = (void *)0;
+ error = DDI_SUCCESS;
+ break;
+ default:
+ break;
+ }
+ return (error);
+}
+
+
+/*
+ * look for bad consistancies between the list of interfaces the filter knows
+ * about and those which are currently configured.
+ */
+/*ARGSUSED*/
+static int fr_qifsync(ip, hlen, il, out, qif, mp)
+ip_t *ip;
+int hlen;
+void *il;
+int out;
+void *qif;
+mblk_t **mp;
+{
+
+ frsync(qif);
+ /*
+ * Resync. any NAT `connections' using this interface and its IP #.
+ */
+ fr_natsync(qif);
+ fr_statesync(qif);
+ return 0;
+}
+
+
+/*
+ * look for bad consistancies between the list of interfaces the filter knows
+ * about and those which are currently configured.
+ */
+int ipfsync()
+{
+ frsync(NULL);
+ return 0;
+}
+
+
+/*
+ * Fetch configuration file values that have been entered into the ipf.conf
+ * driver file.
+ */
+static int ipf_property_update(dip)
+dev_info_t *dip;
+{
+ ipftuneable_t *ipft;
+ int64_t *i64p;
+ char *name;
+ u_int one;
+ int *i32p;
+ int err;
+
+#ifdef DDI_NO_AUTODETACH
+ if (ddi_prop_update_int(DDI_DEV_T_NONE, dip,
+ DDI_NO_AUTODETACH, 1) != DDI_PROP_SUCCESS) {
+ cmn_err(CE_WARN, "!updating DDI_NO_AUTODETACH failed");
+ return DDI_FAILURE;
+ }
+#else
+ if (ddi_prop_update_int(DDI_DEV_T_NONE, dip,
+ "ddi-no-autodetach", 1) != DDI_PROP_SUCCESS) {
+ cmn_err(CE_WARN, "!updating ddi-no-autodetach failed");
+ return DDI_FAILURE;
+ }
+#endif
+
+ err = DDI_SUCCESS;
+ ipft = ipf_tuneables;
+ for (ipft = ipf_tuneables; (name = ipft->ipft_name) != NULL; ipft++) {
+ one = 1;
+ switch (ipft->ipft_sz)
+ {
+ case 4 :
+ i32p = NULL;
+ err = ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip,
+ 0, name, &i32p, &one);
+ if (err == DDI_PROP_NOT_FOUND)
+ continue;
+#ifdef IPFDEBUG
+ cmn_err(CE_CONT, "IP Filter: lookup_int(%s) = %d\n",
+ name, err);
+#endif
+ if (err != DDI_PROP_SUCCESS)
+ return err;
+ if (*i32p >= ipft->ipft_min && *i32p <= ipft->ipft_max)
+ *ipft->ipft_pint = *i32p;
+ else
+ err = DDI_PROP_CANNOT_DECODE;
+ ddi_prop_free(i32p);
+ break;
+
+#if SOLARIS2 > 8
+ case 8 :
+ i64p = NULL;
+ err = ddi_prop_lookup_int64_array(DDI_DEV_T_ANY, dip,
+ 0, name, &i64p, &one);
+ if (err == DDI_PROP_NOT_FOUND)
+ continue;
+# ifdef IPFDEBUG
+ cmn_err(CE_CONT, "IP Filter: lookup_int64(%s) = %d\n",
+ name, err);
+# endif
+ if (err != DDI_PROP_SUCCESS)
+ return err;
+ if (*i64p >= ipft->ipft_min && *i64p <= ipft->ipft_max)
+ *ipft->ipft_pint = *i64p;
+ else
+ err = DDI_PROP_CANNOT_DECODE;
+ ddi_prop_free(i64p);
+ break;
+#endif
+
+ default :
+ break;
+ }
+ if (err != DDI_SUCCESS)
+ break;
+ }
+
+ return err;
+}
diff --git a/usr/src/uts/common/inet/ipf/compat.h b/usr/src/uts/common/inet/pfil/compat.h
index 776dd8d0c6..ddaa3354e2 100644
--- a/usr/src/uts/common/inet/ipf/compat.h
+++ b/usr/src/uts/common/inet/pfil/compat.h
@@ -3,7 +3,6 @@
*
* See the IPFILTER.LICENCE file for details on licencing.
*/
-
#ifdef DEBUG
# define PFILDEBUG
#endif
@@ -42,3 +41,38 @@
# define ASSERT(x)
#endif
+/*
+ * The list of SAPs below all come from Sun's <atm/iftypes.h> file. It's not
+ * yet clear whether pfil should deal with any of these or not.
+ */
+#ifndef IFMP_SAP
+# define IFMP_SAP 0x0065
+#endif
+
+#ifndef LANER_SAP
+# define LANER_SAP 0x9999
+#endif
+
+#ifndef SNMP_SAP
+# define SNMP_SAP 0x999a
+#endif
+
+#ifndef ILMI_SAP
+# define ILMI_SAP 0x999b
+#endif
+
+#ifndef SIG_SAP
+# define SIG_SAP 0x999c
+#endif
+
+#ifndef Q93B_MGMT_SAP
+# define Q93B_MGMT_SAP 0x999d
+#endif
+
+#ifndef UTIL_SAP
+# define UTIL_SAP 0x999e
+#endif
+
+#ifndef ERROR_SAP
+# define ERROR_SAP 0x999f
+#endif
diff --git a/usr/src/uts/common/inet/ipf/misc.c b/usr/src/uts/common/inet/pfil/misc.c
index b65ca63837..2a8c84dd4c 100644
--- a/usr/src/uts/common/inet/ipf/misc.c
+++ b/usr/src/uts/common/inet/pfil/misc.c
@@ -1,8 +1,10 @@
/*
- * Copyright (C) 2000 by Darren Reed.
+ * Copyright (C) 2003 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
*/
#ifndef __hpux
-#pragma ident "@(#)$Id: misc.c,v 1.9 2003/07/20 15:36:27 darrenr Exp $"
+#pragma ident "@(#)$Id: misc.c,v 1.12 2003/11/29 07:11:03 darrenr Exp $"
#else
struct uio;
#endif
diff --git a/usr/src/uts/common/inet/ipf/ndd.c b/usr/src/uts/common/inet/pfil/ndd.c
index c733a1d162..ce85cc0091 100644
--- a/usr/src/uts/common/inet/ipf/ndd.c
+++ b/usr/src/uts/common/inet/pfil/ndd.c
@@ -3,7 +3,7 @@
*
* See the IPFILTER.LICENCE file for details on licencing.
*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -30,13 +30,15 @@ static int qif_report(queue_t *, mblk_t *, caddr_t);
static int sill_report(queue_t *, mblk_t *, caddr_t);
static int qif_ipmp_report(queue_t *, mblk_t *, caddr_t);
static int qif_ipmp_set(queue_t *, mblk_t *, char *, caddr_t);
+static int pfil_hl_set(queue_t *, mblk_t *, char *, caddr_t);
extern int pfil_report(queue_t *, mblk_t *, caddr_t);
#else
static int qif_report(queue_t *, mblk_t *, caddr_t, cred_t *);
static int sill_report(queue_t *, mblk_t *, caddr_t, cred_t *);
static int qif_ipmp_report(queue_t *, mblk_t *, caddr_t, cred_t *);
-static int qif_ipmp_set(queue_t *, mblk_t *, char *, caddr_t, cred_t *);
+static int qif_ipmp_set(queue_t *, mblk_t *, char *, caddr_t , cred_t *);
+static int pfil_hl_set(queue_t *, mblk_t *, char *, caddr_t , cred_t *);
extern int pfil_report(queue_t *, mblk_t *, caddr_t, cred_t *);
#endif
@@ -101,16 +103,24 @@ int pfil_nd_set(queue_t *q, mblk_t *mp, char *str, caddr_t ptr, cred_t *cred)
(void *)q, (void *)mp, str, (void *)str,
(void *)ptr));
+#if (SOLARIS2 >= 10)
if (ddi_strtol(str, &end, 10, &i) != 0)
return (EINVAL);
-
+#else
+ i = mi_strtol(str, &end, 10);
+#endif
+
if (ptr == (caddr_t)&pfildebug) {
#ifdef PFILDEBUG
+#if (SOLARIS2 >= 10)
if ((end == str) || (i < 0) || (i > 100))
+#else
+ if (i < 0 || i > 1)
+#endif
#endif
return EINVAL;
} else if (ptr == (caddr_t)&qif_verbose) {
- if (i < 0 || i > 1)
+ if ((end == str) || (i < 0) || (i > 1))
return EINVAL;
}
*((int *)ptr) = i;
@@ -122,7 +132,7 @@ int pfil_nd_set(queue_t *q, mblk_t *mp, char *str, caddr_t ptr, cred_t *cred)
/* ------------------------------------------------------------------------ */
/* Function: pfil_ioctl_nd */
-/* Returns: int - B_TRUE == success, B_FALSE == getset error */
+/* Returns: int - B_TRUE == success, B_FALSE == getset error */
/* Parameters: q(I) - pointer to queue */
/* mp(I) - pointer to mblk */
/* */
@@ -146,11 +156,13 @@ int pfil_ioctl_nd(queue_t *q, mblk_t *mp)
int pfil_nd_init()
{
+#ifdef PFILDEBUG
if (!nd_load(&pfil_nd, "pfildebug", pfil_nd_get, pfil_nd_set,
(caddr_t)&pfildebug)) {
nd_free(&pfil_nd);
return -1;
}
+#endif
if (!nd_load(&pfil_nd, "pfil_delayed_copy", pfil_nd_get, pfil_nd_set,
(caddr_t)&pfil_delayed_copy)) {
@@ -209,6 +221,11 @@ int pfil_nd_init()
return -1;
}
+ if (!nd_load(&pfil_nd, "pfil_hl", NULL, pfil_hl_set, NULL)) {
+ nd_free(&pfil_nd);
+ return -1;
+ }
+
return 0;
}
@@ -267,6 +284,7 @@ static int qif_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *cred)
}
+
/* ------------------------------------------------------------------------ */
/* Function: sill_report */
/* Returns: int */
@@ -303,6 +321,7 @@ static int sill_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *cred)
return 0;
}
+
/* ------------------------------------------------------------------------ */
/* Function: qif_ipmp_report */
/* Returns: int */
@@ -378,3 +397,46 @@ static int qif_ipmp_set(queue_t *q, mblk_t *mp, char *str, caddr_t ptr,
return 0;
}
+
+/* ------------------------------------------------------------------------ */
+/* Function: pfil_hl_set */
+/* Returns: int - 0 == success, > 0 error occurred */
+/* Parameters: q(I) - pointer to queue */
+/* mp(I) - pointer to mblk */
+/* str(I) - pointer to new value as a string */
+/* ptr(I) - pointer to value to be stored */
+/* cred(I) - pointer to credential information */
+/* */
+/* Explicitly set the header length (hl) field of the qif structure. This */
+/* is used in situations where pfil cannot, for some reason, automatically */
+/* determine it via either ioctl snooping or looking at passing messages. */
+/* ndd -set /dev/pfil pfil_hl ipmp0=14 or v4:ipmp0=14 */
+/* ------------------------------------------------------------------------ */
+#if !defined(sun) || SOLARIS2 <= 8
+/*ARGSUSED*/
+static int pfil_hl_set(queue_t *q, mblk_t *mp, char *str, caddr_t ptr)
+#else
+/*ARGSUSED*/
+static int pfil_hl_set(queue_t *q, mblk_t *mp, char *str, caddr_t ptr,
+ cred_t *cred)
+#endif
+{
+ char *s, *t;
+
+ /* LINTED: E_CONSTANT_CONDITION */
+ PRINT(2, (CE_CONT, "pfil_hl_set(0x%lx,0x%lx,0x%lx[%s],0x%lx)\n",
+ (u_long)q, (u_long)mp, (u_long)str, str, (u_long)ptr));
+
+ t = NULL;
+ s = str;
+ do {
+ if (t != NULL)
+ s = t + 1;
+ t = strchr(s, ';');
+ if (t != NULL)
+ *t = '\0';
+ qif_hl_set(s);
+ } while (t != NULL);
+
+ return 0;
+}
diff --git a/usr/src/uts/common/inet/ipf/os.h b/usr/src/uts/common/inet/pfil/os.h
index f0c8e7502a..b7a77e130e 100644
--- a/usr/src/uts/common/inet/ipf/os.h
+++ b/usr/src/uts/common/inet/pfil/os.h
@@ -1,3 +1,8 @@
+/*
+ * Copyright (C) 2003 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ */
#include <sys/sunddi.h>
#include <sys/ddi.h>
#if SOLARIS2 >= 6
@@ -28,3 +33,8 @@
#define KMFREE(v, z) kmem_free(v, z)
extern caddr_t pfil_nd;
+
+#if defined(atomic_add_long) && (SOLARIS2 < 7)
+# undef atomic_add_long
+# define atomic_add_long(x,y) atomic_add_32((uint32_t *)x, y)
+#endif
diff --git a/usr/src/uts/common/inet/ipf/pfil.c b/usr/src/uts/common/inet/pfil/pfil.c
index a42055d693..d44f1ac5bc 100644
--- a/usr/src/uts/common/inet/ipf/pfil.c
+++ b/usr/src/uts/common/inet/pfil/pfil.c
@@ -3,12 +3,11 @@
*
* See the IPFILTER.LICENCE file for details on licencing.
*
- * ident "@(#)$Id: pfil.c,v 1.22 2003/08/18 22:13:59 darrenr Exp $"
+ * ident "@(#)$Id: pfil.c,v 1.27 2003/11/30 09:45:57 darrenr Exp $"
*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-
#ifndef __hpux
#pragma ident "%Z%%M% %I% %E% SMI"
#else
@@ -80,11 +79,11 @@ struct pfil_head pfh_sync;
static int pfil_list_add(pfil_list_t *,
int (*) __P((struct ip *, int, void *, int,
- struct qif *, mblk_t **)),
+ void *, mblk_t **)),
int);
static int pfil_list_remove(pfil_list_t *,
int (*) __P((struct ip *, int, void *, int,
- struct qif *, mblk_t **)));
+ void *, mblk_t **)));
/* ------------------------------------------------------------------------ */
@@ -116,14 +115,14 @@ int pfil_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *cred)
(void) mi_mpprintf(mp, "in");
(void) mi_mpprintf(mp, "function\tflags");
for (p = ph->ph_in.pfl_top; p; p = p->pfil_next)
- (void)mi_mpprintf(mp,"%p\t%x",
- (void *)p->pfil_func, p->pfil_flags);
+ (void) mi_mpprintf(mp,"%p\t%x",
+ (void *)p->pfil_func, p->pfil_flags);
(void) mi_mpprintf(mp, "out");
(void) mi_mpprintf(mp, "function\tflags");
for (p = ph->ph_out.pfl_top; p; p = p->pfil_next)
- (void)mi_mpprintf(mp,"%p\t%x",
- (void *)p->pfil_func, p->pfil_flags);
+ (void) mi_mpprintf(mp,"%p\t%x",
+ (void *)p->pfil_func, p->pfil_flags);
RW_EXIT(&ph->ph_lock);
@@ -176,7 +175,7 @@ pfil_init(ph)
int
pfil_add_hook(func, flags, ph)
int (*func) __P((struct ip *, int, void *, int,
- struct qif *, mblk_t **));
+ void *, mblk_t **));
int flags;
struct pfil_head *ph;
{
@@ -216,7 +215,7 @@ static int
pfil_list_add(list, func, flags)
pfil_list_t *list;
int (*func) __P((struct ip *, int, void *, int,
- struct qif *, mblk_t **));
+ void *, mblk_t **));
int flags;
{
struct packet_filter_hook *pfh;
@@ -270,7 +269,7 @@ pfil_list_add(list, func, flags)
int
pfil_remove_hook(func, flags, ph)
int (*func) __P((struct ip *, int, void *, int,
- struct qif *, mblk_t **));
+ void *, mblk_t **));
int flags;
struct pfil_head *ph;
{
@@ -309,7 +308,7 @@ static int
pfil_list_remove(list, func)
pfil_list_t *list;
int (*func) __P((struct ip *, int, void *, int,
- struct qif *, mblk_t **));
+ void *, mblk_t **));
{
struct packet_filter_hook *pfh;
diff --git a/usr/src/uts/common/inet/pfil/pfil.conf b/usr/src/uts/common/inet/pfil/pfil.conf
new file mode 100644
index 0000000000..1cf479a0d6
--- /dev/null
+++ b/usr/src/uts/common/inet/pfil/pfil.conf
@@ -0,0 +1,28 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+
+name="pfil" parent="pseudo" instance=0;
+
diff --git a/usr/src/uts/common/inet/ipf/pfil.h b/usr/src/uts/common/inet/pfil/pfil.h
index 9c3da19714..97f926da8b 100644
--- a/usr/src/uts/common/inet/ipf/pfil.h
+++ b/usr/src/uts/common/inet/pfil/pfil.h
@@ -3,7 +3,7 @@
*
* See the IPFILTER.LICENCE file for details on licencing.
*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -12,8 +12,8 @@
#ifndef _NET_PFIL_H_
#define _NET_PFIL_H_
-#define PFIL_RELEASE "2.1.4"
-#define PFIL_VERSION 2010400
+#define PFIL_RELEASE "2.1.6"
+#define PFIL_VERSION 2010600
#define PFIL_INTERFACE 2000000
#ifndef __P
@@ -24,6 +24,7 @@
# endif
#endif
+#ifdef sun
# include <inet/ip.h>
# if SOLARIS2 < 9
# include <netinet/in_systm.h>
@@ -34,15 +35,19 @@
# undef IPOPT_SSRR
# include <netinet/ip.h>
# endif
+#endif
+#ifdef __hpux
+# include <netinet/in_systm.h>
+# include <netinet/in.h>
+# include <netinet/ip.h>
+#endif
-struct qif;
-struct ip;
typedef struct packet_filter_hook {
struct packet_filter_hook *pfil_next;
struct packet_filter_hook **pfil_pnext;
- int (*pfil_func) __P((struct ip *, int, void *, int, struct qif *,
- mblk_t **));
+ int (*pfil_func) __P((struct ip *, int, void *, int,
+ void *, mblk_t **));
int pfil_flags;
} packet_filter_hook_t;
@@ -82,12 +87,12 @@ typedef struct pfil_head {
void pfil_init __P((struct pfil_head *));
struct packet_filter_hook *pfil_hook_get __P((int, struct pfil_head *));
int pfil_add_hook __P((int (*func) __P((struct ip *, int, void *, int,
- struct qif *, mblk_t **)), int,
+ void *, mblk_t **)), int,
struct pfil_head *));
int pfil_remove_hook __P((int (*func) __P((struct ip *, int, void *, int,
- struct qif *, mblk_t **)), int,
+ void *, mblk_t **)), int,
struct pfil_head *));
-int pfil_sendbuf __P((mblk_t *));
+int pfil_sendbuf(mblk_t *);
mblk_t *pfil_make_dl_packet __P((mblk_t *, struct ip *, void *,
char *, queue_t **));
void pfil_send_dl_packet __P((queue_t *, mblk_t *));
@@ -103,8 +108,6 @@ extern struct pfil_head pfh_sync; /* Notification of interface */
extern krwlock_t qif_rwlock;
extern krwlock_t pfil_rw;
-extern u_int pfil_ip_csum_hdr __P((u_char *));
-
/*
* NOTE: On Solaris, even though pfilwput(), etc, are prototyped as returning
* an int, the return value is never checked and much code ignores it, anyway,
diff --git a/usr/src/uts/common/inet/pfil/pfild.h b/usr/src/uts/common/inet/pfil/pfild.h
new file mode 100644
index 0000000000..e866e514ae
--- /dev/null
+++ b/usr/src/uts/common/inet/pfil/pfild.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2003 by Darren Reed.
+ *
+ * See the IPFILTER.LICENCE file for details on licencing.
+ *
+ * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <net/if.h>
+#include <netinet/in.h>
+
+/*
+ * STREAMS control messages used to communicate between pfild and pfil.
+ * Messages are sent down to /dev/pfil as M_PROTO->M_DATA.
+ * M_PROTO block contains uint32_t command code.
+ * M_DATA block contains [an array of] the corresponding data structure.
+ */
+
+/*
+ * Data structure used to pass interface configuration information from
+ * pfild to the pfil kernel module.
+ */
+#define PFILCMD_IFADDRS 1
+struct pfil_ifaddrs {
+ char name[LIFNAMSIZ];
+ union {
+ struct sockaddr_in in;
+ struct sockaddr_in6 in6;
+ } localaddr;
+ union {
+ struct sockaddr_in in;
+ struct sockaddr_in6 in6;
+ } netmask;
+ union {
+ struct sockaddr_in in;
+ struct sockaddr_in6 in6;
+ } broadaddr;
+ union {
+ struct sockaddr_in in;
+ struct sockaddr_in6 in6;
+ } dstaddr;
+ uint_t mtu;
+};
+
+/*
+ * Data structure used to pass interface valid source address set information
+ * from pfild to the pfil kernel module.
+ */
+#define PFILCMD_IFADDRSET 2
+struct pfil_ifaddrset {
+ char name[LIFNAMSIZ];
+ uint8_t af;
+ uint32_t nspans;
+};
+struct pfil_v4span {
+ uint32_t first, last; /* in host byte order! */
+};
+struct pfil_v6span {
+ struct in6_addr first, last;
+};
diff --git a/usr/src/uts/common/inet/ipf/pfildrv.c b/usr/src/uts/common/inet/pfil/pfildrv.c
index e787b5338e..45d694d304 100644
--- a/usr/src/uts/common/inet/ipf/pfildrv.c
+++ b/usr/src/uts/common/inet/pfil/pfildrv.c
@@ -3,7 +3,7 @@
*
* See the IPFILTER.LICENCE file for details on licencing.
*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -11,6 +11,7 @@
#include <sys/conf.h>
#include <sys/debug.h>
+#include <sys/atomic.h>
#include <sys/ethernet.h>
#include <sys/stream.h>
#include <sys/errno.h>
@@ -97,6 +98,9 @@ struct streamtab pfil_dev_strtab = {
&pfil_rinit, &pfil_winit
};
+extern int nulldev();
+extern int nodev();
+
void pfil_donotip(int, qif_t *, queue_t *, mblk_t *, mblk_t *, struct ip *, size_t);
static int pfil_info(dev_info_t *, ddi_info_cmd_t , void *, void **);
static int pfil_attach(dev_info_t *, ddi_attach_cmd_t);
@@ -105,9 +109,48 @@ static int pfil_identify(dev_info_t *);
#endif
static int pfil_detach(dev_info_t *, ddi_detach_cmd_t);
+#ifdef DDI_DEFINE_STREAM_OPS
DDI_DEFINE_STREAM_OPS(pfil_devops, nulldev, nulldev, pfil_attach, pfil_detach,
nulldev, pfil_info, D_MP, &pfil_dev_strtab);
+#else
+static struct cb_ops pfil_ops = {
+ nodev, /* cb_open */
+ nodev, /* cb_close */
+ nodev, /* cb_strategy */
+ nodev, /* cb_print */
+ nodev, /* cb_dump */
+ nodev, /* cb_read */
+ nodev, /* cb_write */
+ nodev, /* cb_ioctl */
+ nodev, /* cb_devmap */
+ nodev, /* cb_mmap */
+ nodev, /* cb_segmap */
+ nochpoll, /* cb_chpoll */
+ ddi_prop_op, /* cb_prop_op */
+ &pfilinfo, /* cb_stream */
+ D_MP /* cb_flag */
+};
+
+static struct dev_ops pfil_devops =
+{
+ DEVO_REV, /* devo_rev */
+ 0, /* devo_refcnt */
+ pfil_info, /* devo_getinfo */
+#if SOLARIS2 >= 10
+ nulldev,
+#else
+ pfil_identify, /* devo_identify */
+#endif
+ nulldev, /* devo_probe */
+ pfil_attach, /* devo_attach */
+ pfil_detach, /* devo_detach */
+ nodev, /* devo_reset */
+ &pfil_ops, /* devo_cb_ops */
+ NULL /* devo_bus_ops */
+};
+#endif
+
static struct modldrv modldrv = {
&mod_driverops, "pfil Streams driver "/**/PFIL_RELEASE, &pfil_devops
};
@@ -115,9 +158,8 @@ static struct modldrv modldrv = {
/************************************************************************
* STREAMS module information
*/
-static int pfilmodopen(queue_t *q, dev_t *devp, int flag, int sflag,
- cred_t *crp);
-static int pfilmodclose(queue_t *q, int flag, cred_t *crp);
+static int pfilmodopen(queue_t *, dev_t *, int, int, cred_t *);
+static int pfilmodclose(queue_t *, int, cred_t *);
static struct qinit pfilmod_rinit = {
(pfi_t)pfilmodrput, NULL, pfilmodopen, pfilmodclose,
@@ -174,7 +216,11 @@ static int pfil_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
pfil_dev_info = devi;
+#if SOLARIS2 >= 8
return (ddi_create_minor_node(devi, "pfil", S_IFCHR, 0, DDI_PSEUDO, 0));
+#else
+ return (ddi_create_minor_node(devi, "pfil", S_IFCHR, 0, NULL, 0));
+#endif
}
@@ -324,7 +370,6 @@ static int pfildevclose(queue_t *q, int flag, cred_t *crp)
return 0;
}
-
/************************************************************************
* STREAMS module functions
*/
@@ -394,8 +439,11 @@ static int pfilmodclose(queue_t *q, int flag, cred_t *crp)
/* ------------------------------------------------------------------------ */
/* Function: pfil_precheck */
-/* Returns: int - < 0 is error in this function, 0 == pass packet, else */
-/* (> 0) indicates passing prohibited */
+/* Returns: int - < 0 pass packet because it's not a type subject to */
+/* firewall rules (i.e. internal STREAMS messages), */
+/* 0 == pass packet, else > 0 indicates passing */
+/* prohibited (possibly due to an error occuring in */
+/* this function.) */
/* Parameters: q(I) - pointer to STREAMS queue */
/* mp(I) - pointer to STREAMS message */
/* qif(I) - pointer to per-queue interface information */
@@ -413,23 +461,32 @@ int pfil_precheck(queue_t *q, mblk_t **mp, int flags, qif_t *qif)
{
register struct ip *ip;
size_t hlen, len, off, mlen, iphlen, plen;
- int err, out, sap, realigned = 0;
packet_filter_hook_t *pfh;
+ qpktinfo_t qpkt, *qpi;
struct pfil_head *ph;
mblk_t *m, *mt = *mp;
- u_char *bp, *s;
- qif_t qf, *qp;
+ int err, out, sap;
+ u_char *bp;
#if SOLARIS2 >= 8
ip6_t *ip6;
#endif
#ifndef sparc
u_short __ipoff, __iplen;
#endif
- qf = *qif;
- qp = qif;
- qif = &qf;
- qif->qf_next = NULL;
- qif->qf_flags = 0;
+
+ qpi = &qpkt;
+ qpi->qpi_q = q;
+ qpi->qpi_off = 0;
+ qpi->qpi_name = qif->qf_name;
+ qpi->qpi_real = qif;
+ qpi->qpi_ill = qif->qf_ill;
+ qpi->qpi_hl = qif->qf_hl;
+ qpi->qpi_ppa = qif->qf_ppa;
+ qpi->qpi_num = qif->qf_num;
+ qpi->qpi_flags = qif->qf_flags;
+ qpi->qpi_max_frag = qif->qf_max_frag;
+ if ((flags & PFIL_GROUP) != 0)
+ qpi->qpi_flags |= QF_GROUP;
/*
* If there is only M_DATA for a packet going out, then any header
@@ -437,10 +494,9 @@ int pfil_precheck(queue_t *q, mblk_t **mp, int flags, qif_t *qif)
* the M_DATA) is prepended before the IP header. We need to set the
* offset to account for this.
*/
- qif->qf_off = 0;
out = (flags & PFIL_OUT) ? 1 : 0;
- off = (out) ? qif->qf_hl : 0;
-tryagain:
+ off = (out) ? qpi->qpi_hl : 0;
+
ip = NULL;
m = NULL;
#if SOLARIS2 >= 8
@@ -468,13 +524,13 @@ tryagain:
off = 0;
m = mt;
} else {
- atomic_add_long(&qp->qf_notdata, 1);
+ atomic_add_long(&qif->qf_notdata, 1);
return -1;
}
} else {
m = mt->b_cont;
if (m == NULL) {
- atomic_add_long(&qp->qf_nodata, 1);
+ atomic_add_long(&qif->qf_nodata, 1);
return -3; /* No data blocks */
}
}
@@ -484,7 +540,7 @@ tryagain:
m = mt;
break;
default :
- atomic_add_long(&qp->qf_notdata, 1);
+ atomic_add_long(&qif->qf_notdata, 1);
return -2;
}
@@ -497,7 +553,7 @@ tryagain:
off = 0; /* Any non-M_DATA cancels the offset */
if (m == NULL) {
- atomic_add_long(&qp->qf_nodata, 1);
+ atomic_add_long(&qif->qf_nodata, 1);
return -3; /* No data blocks */
}
@@ -510,7 +566,7 @@ tryagain:
if ((dl->dl_primitive == DL_UNITDATA_IND) &&
(dl->dl_group_address == 1)) {
- qif->qf_flags |= QF_GROUP;
+ qpi->qpi_flags |= QF_GROUP;
if (((*((u_char *)m->b_rptr) == 0x0) &&
((*((u_char *)m->b_rptr + 2) == 0x45))))
off += 2;
@@ -519,52 +575,21 @@ tryagain:
}
/*
- * If there is more than one copy of this message traversing the
- * STREAMS stack (ie packet is being used for snoop data) then make a
- * copy of it for our use so we become the sole owner of the new
- * message and do a freemsg() on the one passed in as we're no longer
- * using it or passing it up.
- */
- if ((pfil_delayed_copy == 0) && (m->b_datap->db_ref > 1)) {
- mblk_t *new;
-
-forced_copy:
- new = copymsg(m);
- if (new == NULL) {
- atomic_add_long(&qp->qf_copyfail, 1);
- return -3;
- }
- atomic_add_long(&qp->qf_copy, 1);
-
- if (mt != m)
- mt->b_cont = new;
- else {
- *mp = new;
- mt = new;
- }
- freemsg(m);
- m = new;
- }
-
- ip = (struct ip *)(m->b_rptr + off);
-
- /*
* We might have a 1st data block which is really M_PROTO, i.e. it is
* only big enough for the link layer header
*/
- while ((u_char *)ip >= m->b_wptr) {
- len = (u_char *)ip - m->b_wptr;
+ while ((len = m->b_wptr - m->b_rptr) <= off) {
+ off -= len;
m = m->b_cont;
if (m == NULL) {
- atomic_add_long(&qp->qf_nodata, 1);
+ atomic_add_long(&qif->qf_nodata, 1);
return -4; /* not enough data for IP */
}
- ip = (struct ip *)(m->b_rptr + len);
}
- off = (u_char *)ip - m->b_rptr;
- mlen = msgdsize(m) - off;
- if (mlen == 0)
- mlen = mt->b_wptr - mt->b_rptr;
+
+ ip = (struct ip *)(m->b_rptr + off);
+ len = m->b_wptr - m->b_rptr - off;
+ mlen = msgdsize(m);
#ifdef IRE_ILL_CN
sap = ((s_ill_t *)qif->qf_ill)->ill_sap;
@@ -572,7 +597,105 @@ forced_copy:
sap = ((ill_t *)qif->qf_ill)->ill_sap;
#endif
- if (sap == ETHERTYPE_IP) {
+ if (mlen == 0)
+ mlen = m->b_wptr - m->b_rptr;
+ mlen -= off;
+
+#ifdef PFILDEBUG
+ /*LINTED: E_CONSTANT_CONDITION*/
+ PRINT(10,(CE_CONT,
+ "!IP Filter[%s]: out %d len %ld/%ld sap %d ip %p b_rptr %p off %ld m %p/%d/%d/%p mt %p/%d/%d/%p\n",
+ qif->qf_name, out, len, mlen, sap,
+ (void *)ip, (void *)m->b_rptr, off,
+ (void *)m, MTYPE(m), (int)MLEN(m), (void *)m->b_cont,
+ (void *)mt, MTYPE(mt), (int)MLEN(mt), (void *)mt->b_cont));
+#endif
+
+ /*
+ * If there is more than one copy of this message traversing the
+ * STREAMS stack (ie the packet is being used for snoop data), the
+ * IP header isn't on a 32bit aligned address, or the IP header
+ * isn't contain within a single block, then make a copy which
+ * meets our requirements and do a freemsg on the one passed in
+ * since we're no longer using it or passing it up.
+ */
+
+ if ((pfil_delayed_copy == 0 && m->b_datap->db_ref > 1)
+ || ((uintptr_t)ip & 0x3) || len < sizeof(*ip)
+ || (sap != IP_DL_SAP
+#if SOLARIS2 >= 8
+ && sap != IP6_DL_SAP
+#endif
+ )) {
+ mblk_t *b;
+ mblk_t *nm;
+ mblk_t *nmt;
+ mblk_t *previous_nm;
+
+forced_copy:
+ nmt = NULL;
+ previous_nm = NULL;
+
+ /*
+ * Duplicate the message block descriptors up to (and
+ * including if the offset is non-zero) the block where
+ * IP begins.
+ */
+ for (b = mt; b != m || off; b = b->b_cont) {
+ nm = dupb(b);
+ if (nm == NULL) {
+ atomic_add_long(&qif->qf_copyfail, 1);
+ if (nmt)
+ freemsg(nmt);
+ return ENOBUFS;
+ }
+
+ nm->b_cont = NULL;
+ if (nmt)
+ linkb(previous_nm, nm);
+ else
+ nmt = nm;
+ previous_nm = nm;
+
+ /*
+ * Set the length so the block only contains what
+ * appears before IP.
+ */
+ if (b == m) {
+ nm->b_wptr = nm->b_rptr + off;
+ break;
+ }
+ }
+
+ m->b_rptr += off;
+ nm = msgpullup(m, -1);
+ m->b_rptr -= off;
+
+ if (nm == NULL) {
+ atomic_add_long(&qif->qf_copyfail, 1);
+ if (nmt)
+ freemsg(nmt);
+ return ENOBUFS;
+ }
+
+ if (nmt)
+ linkb(previous_nm, nm);
+ else
+ nmt = nm;
+
+ freemsg(mt);
+
+ *mp = nmt;
+ mt = nmt;
+ m = nm;
+
+ ip = (struct ip *)m->b_rptr;
+ len = m->b_wptr - m->b_rptr;
+ mlen = len;
+ off = 0;
+ }
+
+ if (sap == IP_DL_SAP) {
u_short tlen;
hlen = sizeof(*ip);
@@ -582,7 +705,6 @@ forced_copy:
((char *)&tlen)[1] = ((char *)&ip->ip_len)[1];
plen = ntohs(tlen);
- sap = 0;
ph = &pfh_inet4;
}
#if SOLARIS2 >= 8
@@ -599,165 +721,43 @@ forced_copy:
if (plen == 0)
return EMSGSIZE; /* Jumbo gram */
- sap = IP6_DL_SAP;
ph = &pfh_inet6;
}
#endif
else {
- hlen = 0;
sap = -1;
}
-
- len = m->b_wptr - m->b_rptr - off;
-#ifdef PFILDEBUG
- /*LINTED: E_CONSTANT_CONDITION*/
- PRINT(10,(CE_CONT,
- "!IP Filter[%s]: out %d len %ld/%ld sap %d ip %p b_rptr %p off %ld m %p/%d/%d/%p mt %p/%d/%d/%p\n",
- qif->qf_name, out, len, mlen, sap,
- (void *)ip, (void *)m->b_rptr, off,
- (void *)m, MTYPE(m), (int)MLEN(m), (void *)m->b_cont,
- (void *)mt, MTYPE(mt), (int)MLEN(mt), (void *)mt->b_cont));
-#endif
-
- /*
- * Ok, the IP header isn't on a 32bit aligned address so fix this.
- */
- if (((uintptr_t)ip & 0x3) || (len < sizeof(*ip)) || (sap == -1)) {
- mblk_t *m2, *m1;
- int off2;
-
- if (m->b_datap->db_ref > 1)
- goto forced_copy;
- /*
- * If we have already tried to realign the IP header and we
- * are back here, then the attempt has failed, so stop now
- * rather than try again (could keep on retrying with no
- * benefit.)
- */
- if (realigned) {
- atomic_add_long(&qp->qf_drop, 1);
- return EINVAL;
- }
- realigned = 1;
-
- len = msgdsize(m);
- if (len < sizeof(*ip)) {
- atomic_add_long(&qp->qf_bad, 1);
- return EINVAL;
- }
-
- /*
- * XXX - Now I understand how pullupmsg() & STREAMS messages
- * work better, this can possibly be junked in favour of using
- * pullupmsg() which will preserve all the dblk bits correctly,
- * as is done in fr_pullup in the ipf code.
- */
-
- /*
- * Junk using pullupmsg()
- */
- off2 = (uintptr_t)ip & 0x3;
- if (off2)
- off2 = 4 - off2;
- m2 = allocb(len + off2, BPRI_HI);
- if (m2 == NULL) {
- atomic_add_long(&qp->qf_drop, 1);
- return ENOBUFS;
- }
-
- MTYPE(m2) = M_DATA;
- if (m->b_rptr != (u_char *)ip)
- m2->b_rptr += off2;
- m2->b_wptr = m2->b_rptr + len;
- m1 = m;
- s = (u_char *)m->b_rptr;
- for (bp = m2->b_rptr; m1 && (bp < m2->b_wptr); bp += len) {
- len = MIN(m1->b_wptr - s, m2->b_wptr - bp);
- bcopy(s, bp, len);
- m1 = m1->b_cont;
- if (m1 != NULL)
- s = m1->b_rptr;
- }
-
- if ((mt != m) && (mt->b_cont == m) && (off == 0)) {
- /*
- * check if the buffer we're changing is chained in-
- * between other buffers and unlink/relink as required.
- */
- (void) unlinkb(mt); /* should return 'm' */
- m1 = unlinkb(m);
- if (m1 != NULL)
- linkb(m2, m1);
- freemsg(m);
- linkb(mt, m2);
- } else {
- if (m == mt) {
- m1 = unlinkb(mt);
- if (m1)
- linkb(m2, m1);
- }
- freemsg(mt);
- *mp = m2;
- mt = m2;
- }
-
- off = 0;
- goto tryagain;
- }
-
- if (((sap == 0) && (ip->ip_v != IPVERSION))
+ if (((sap == IP_DL_SAP) && (ip->ip_v != IPVERSION))
#if SOLARIS2 >= 8
|| ((sap == IP6_DL_SAP) && (((ip6->ip6_vfc) & 0xf0) != 0x60))
#endif
+ || sap == -1
) {
- atomic_add_long(&qp->qf_notip, 1);
+ atomic_add_long(&qif->qf_notip, 1);
#ifdef PFILDEBUG
pfil_donotip(out, qif, q, m, mt, ip, off);
#endif
return EINVAL;
}
- /*
- * The code in IPFilter assumes that both the ip_off and ip_len
- * fields are in host byte order, so convert them here to fulfill
- * that expectation.
- *
- * If the target compile host is non-SPARC, assume it is a little
- * endian machine, requiring the conversion of offset/length fields
- * to both be host byte ordered.
- */
-#ifndef sparc
- if (sap == 0) {
- __ipoff = (u_short)ip->ip_off;
- ip->ip_len = plen;
- ip->ip_off = ntohs(__ipoff);
- }
-#endif
- if (sap == 0)
+ if (sap == IP_DL_SAP)
iphlen = ip->ip_hl << 2;
#if SOLARIS2 >= 8
else if (sap == IP6_DL_SAP)
iphlen = sizeof(ip6_t);
#endif
+
if ((
#if SOLARIS2 >= 8
- (sap == IP6_DL_SAP) && (mlen < iphlen + plen)) ||
- ((sap == 0) &&
+ (sap == IP6_DL_SAP) && (mlen < plen)) ||
+ ((sap == IP_DL_SAP) &&
#endif
((iphlen < hlen) || (iphlen > plen) || (mlen < plen)))) {
/*
* Bad IP packet or not enough data/data length mismatches
*/
-#ifndef sparc
- if (sap == 0) {
- __ipoff = (u_short)ip->ip_off;
-
- ip->ip_len = htons(plen);
- ip->ip_off = htons(__ipoff);
- }
-#endif
- atomic_add_long(&qp->qf_bad, 1);
+ atomic_add_long(&qif->qf_bad, 1);
return EINVAL;
}
@@ -769,28 +769,43 @@ forced_copy:
if (m->b_datap->db_ref > 1)
goto forced_copy;
if (!pullupmsg(m, (int)iphlen + off)) {
- atomic_add_long(&qp->qf_nodata, 1);
- return -5;
+ atomic_add_long(&qif->qf_nodata, 1);
+ return ENOBUFS;
}
ip = (struct ip *)ALIGN32(m->b_rptr + off);
}
- if (sap == IP6_DL_SAP) {
- if ((len > iphlen + plen) && (off == 0))
- m->b_wptr -= len - (iphlen + plen);
- } else {
- if ((len > plen) && (off == 0))
- m->b_wptr -= len - plen;
+ /*
+ * Discard any excess data.
+ */
+ if (sap == IP6_DL_SAP && len > iphlen + plen)
+ m->b_wptr = m->b_rptr + off + plen + iphlen;
+ else if (sap == IP_DL_SAP && len > plen)
+ m->b_wptr = m->b_rptr + off + plen;
+
+ /*
+ * The code in IPFilter assumes that both the ip_off and ip_len
+ * fields are in host byte order, so convert them here to fulfill
+ * that expectation.
+ *
+ * If the target compile host is non-SPARC, assume it is a little
+ * endian machine, requiring the conversion of offset/length fields
+ * to both be host byte ordered.
+ */
+#ifndef sparc
+ if (sap == IP_DL_SAP) {
+ __ipoff = (u_short)ip->ip_off;
+ ip->ip_len = plen;
+ ip->ip_off = ntohs(__ipoff);
}
+#endif
- qif->qf_m = m;
- qif->qf_q = q;
- qif->qf_data = ip;
- qif->qf_oq = OTHERQ(q);
- qif->qf_off = off;
+ qpi->qpi_m = m;
+ qpi->qpi_off = off;
+ qpi->qpi_data = ip;
- if (qp->qf_ipmp != NULL)
- qp = qp->qf_ipmp;
+ if (qif->qf_ipmp != NULL)
+ qif = qif->qf_ipmp;
READ_ENTER(&ph->ph_lock);
@@ -802,15 +817,24 @@ forced_copy:
flags, (void *)ph, (void *)pfh));
for (; pfh; pfh = pfh->pfil_next)
if (pfh->pfil_func) {
- err = (*pfh->pfil_func)(ip, iphlen, qif->qf_ill, out,
- qif, mp);
+ err = (*pfh->pfil_func)(ip, iphlen, qif, out, qpi, mp);
if (err || !*mp)
break;
- ip = qif->qf_data;
+ /*
+ * fr_pullup may have allocated a new buffer.
+ */
+ ip = qpi->qpi_data;
}
RW_EXIT(&ph->ph_lock);
/*
+ * Functions called via pfil_func should only return values >= 0, so
+ * convert any that are < 0 to be > 0 and preserve the absolute value.
+ */
+ if (err < 0)
+ err = -err;
+
+ /*
* If we still have a STREAMS message after calling the filtering
* hooks, return the byte order of the fields changed above on
* platforms where this is required. They are refetched from the
@@ -818,8 +842,8 @@ forced_copy:
* them in some way.
*/
#ifndef sparc
- if (*mp != NULL) {
- if (sap == 0) {
+ if ((err == 0) && (*mp != NULL)) {
+ if (sap == IP_DL_SAP) {
__iplen = (u_short)ip->ip_len;
__ipoff = (u_short)ip->ip_off;
ip->ip_len = htons(__iplen);
@@ -827,7 +851,6 @@ forced_copy:
}
}
#endif
-
return err;
}
@@ -1015,6 +1038,9 @@ static void pfil_remif(queue_t *rq)
#endif /* IRE_ILL_CN */
+/************************************************************************
+ *
+ */
#ifdef PFILDEBUG
/* ------------------------------------------------------------------------ */
/* Function: pfil_donotip */
@@ -1090,6 +1116,51 @@ void pfil_donotip(int out, qif_t *qif, queue_t *q, mblk_t *m, mblk_t *mt, struct
#endif
+/* ------------------------------------------------------------------------ */
+/* Function: pfil_property_update */
+/* Returns: int - DDI_SUCCESS == success, else failure */
+/* Parameters: modinfop(I) - pointer to module informatio buffer */
+/* */
+/* Fetch configuration file values that have been entered into the */
+/* pfil.conf driver file. */
+/* ------------------------------------------------------------------------ */
+static int pfil_property_update(dev_info_t *dip)
+{
+ char *list, *s, *t;
+ int err;
+
+ if (ddi_prop_update_int(DDI_DEV_T_ANY, dip,
+ "ddi-no-autodetach", 1) == -1) {
+ cmn_err(CE_WARN, "!updating ddi-no-authdetach failed");
+ return DDI_FAILURE;
+ }
+
+ list = NULL;
+ err = ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
+ 0, "qif_ipmp_set", &list);
+#ifdef IPFDEBUG
+ cmn_err(CE_CONT, "IP Filter: lookup_string(pfil_ipmp_list) = %d\n",
+ err);
+#endif
+ if (err == DDI_SUCCESS) {
+ t = NULL;
+ s = list;
+ do {
+ if (t != NULL)
+ s = t + 1;
+ t = strchr(s, ';');
+ if (t != NULL)
+ *t = '\0';
+ qif_ipmp_update(s);
+ } while (t != NULL);
+
+ ddi_prop_free(list);
+ }
+
+ return DDI_SUCCESS;
+}
+
+
#if SOLARIS2 == 8
int miocpullup(mblk_t *m, size_t len)
{
@@ -1098,4 +1169,3 @@ int miocpullup(mblk_t *m, size_t len)
return pullupmsg(m->b_cont, len);
}
#endif
-
diff --git a/usr/src/uts/common/inet/ipf/pfilstream.c b/usr/src/uts/common/inet/pfil/pfilstream.c
index 580f60d95a..51d1b30d5a 100644
--- a/usr/src/uts/common/inet/ipf/pfilstream.c
+++ b/usr/src/uts/common/inet/pfil/pfilstream.c
@@ -3,7 +3,7 @@
*
* See the IPFILTER.LICENCE file for details on licencing.
*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -20,6 +20,7 @@ struct uio;
#include <sys/dlpi.h>
#include <sys/cmn_err.h>
#ifdef sun
+# include <sys/atomic.h>
# include <sys/sockio.h>
# include <sys/ksynch.h>
# include <sys/strsubr.h>
@@ -27,6 +28,7 @@ struct uio;
#endif
#ifdef __hpux
# include <sys/dlpi_ext.h>
+# include <net/mtcp.h>
#endif
#include <netinet/in.h>
#include <netinet/in_systm.h>
@@ -37,6 +39,8 @@ struct uio;
# include <inet/common.h>
# if SOLARIS2 >= 8
# include <netinet/ip6.h>
+# else
+# include <net/if_dl.h>
# endif
# if SOLARIS2 >= 10
# include <sys/policy.h>
@@ -50,17 +54,10 @@ struct uio;
# include <inet/ip_if.h>
#endif
-#ifdef sun
-# include <inet/ipf/compat.h>
-# include <inet/ipf/pfil.h>
-# include <inet/ipf/qif.h>
-# include <pfild.h>
-#else
-# include "compat.h"
-# include "pfil.h"
-# include "qif.h"
-# include "pfild.h"
-#endif
+#include "compat.h"
+#include "qif.h"
+#include "pfil.h"
+#include "pfild.h"
#if SOLARIS2 >= 10
extern queue_t *pfildq;
@@ -85,6 +82,7 @@ extern queue_t *pfildq;
static int pfil_drv_priv __P((cred_t *));
+
#ifdef PFILDEBUG
/* ------------------------------------------------------------------------ */
/* Function: pfil_printmchain */
@@ -209,6 +207,7 @@ static void pfil_printioctl(mblk_t *mp)
}
#endif /* PFILDEBUG */
+
/* ------------------------------------------------------------------------ */
/* Function: pfilbind */
/* Returns: int - 0 == success, else error */
@@ -234,7 +233,7 @@ int pfilbind(queue_t *q)
/* ------------------------------------------------------------------------ */
/* Function: pfilwput_ioctl */
-/* Returns: void */
+/* Returns: void */
/* Parameters: q(I) - pointer to queue */
/* mp(I) - pointer to STREAMS message */
/* */
@@ -318,7 +317,21 @@ void pfilwput_ioctl(queue_t *q, mblk_t *mp)
break;
#endif
#endif /* pre-S10 */
- default:
+#ifdef __hpux
+ case ND_SET :
+ case ND_GET :
+ if (pfil_ioctl_nd(q, mp)) {
+ if (iocp->ioc_error)
+ iocp->ioc_count = 0;
+ mp->b_datap->db_type = M_IOCACK;
+ qreply(q, mp);
+ } else {
+ miocnak(q, mp, 0, EINVAL);
+ }
+ return;
+ break;
+#endif
+ default :
break;
}
@@ -408,7 +421,7 @@ static void pfil_update_ifaddrset(mblk_t *mp)
*/
/* ------------------------------------------------------------------------ */
/* Function: pfilwput */
-/* Returns: void */
+/* Returns: void */
/* Parameters: q(I) - pointer to queue */
/* mp(I) - pointer to STREAMS message */
/* */
@@ -416,19 +429,21 @@ static void pfil_update_ifaddrset(mblk_t *mp)
/* /dev/pfil, not the STREAMS module pushed on another queue. As it does */
/* not do any IO, this should never be called except to handle ioctl's and */
/* so all other messages are free'd and no reply sent back. */
-/* The only ioctls handled by the driver are ND_GET/ND_SET. */
-/* pfilwput also handles PFILCMD_IFADDRS and PFILCMD_IFADDRSET messages */
+/* The only ioctls handled by the driver are ND_GET/ND_SET. */
+/* pfilwput also handles PFILCMD_IFADDRS and PFILCMD_IFADDRSET messages. */
+/* NOTE: HP-UX does not need or have pfil implemented as a STREAMS device. */
/* ------------------------------------------------------------------------ */
+#ifdef sun
void pfilwput(queue_t *q, mblk_t *mp)
{
struct iocblk *iocp;
uint32_t cmd;
-#ifdef PFILDEBUG
+# ifdef PFILDEBUG
/* LINTED: E_CONSTANT_CONDITION */
PRINT(9,(CE_CONT, "!pfilwput(%p,%p) [%s] qif %p\n",
(void *)q, (void *)mp, QTONM(q), (void *)q->q_ptr));
-#endif
+# endif
switch (MTYPE(mp))
{
@@ -463,7 +478,8 @@ void pfilwput(queue_t *q, mblk_t *mp)
#endif
case M_IOCTL:
iocp = (struct iocblk *)mp->b_rptr;
- switch (iocp->ioc_cmd) {
+ switch (iocp->ioc_cmd)
+ {
case ND_SET :
case ND_GET :
if (pfil_ioctl_nd(q, mp)) {
@@ -475,17 +491,20 @@ void pfilwput(queue_t *q, mblk_t *mp)
miocnak(q, mp, 0, EINVAL);
}
break;
- default:
+
+ default :
miocnak(q, mp, 0, EINVAL);
break;
}
return;
- default:
+ default :
break;
}
+
freemsg(mp);
}
+#endif
/************************************************************************
@@ -493,7 +512,7 @@ void pfilwput(queue_t *q, mblk_t *mp)
*/
/* ------------------------------------------------------------------------ */
/* Function: pfilmodwput */
-/* Returns: Void. */
+/* Returns: void */
/* Parameters: q(I) - pointer to queue */
/* mp(I) - pointer to STREAMS message */
/* */
@@ -527,7 +546,7 @@ void pfilmodwput(queue_t *q, mblk_t *mp)
break;
}
- /*FALLTHROUGH*/
+ /*FALLTHROUGH*/
case M_DATA :
atomic_add_long(&qif->qf_nw, 1);
@@ -535,6 +554,7 @@ void pfilmodwput(queue_t *q, mblk_t *mp)
int i;
i = pfil_precheck(q, &mp, PFIL_OUT, qif);
+
/* LINTED: E_CONSTANT_CONDITION */
PRINT(9, (CE_CONT, "!%s: pfil_precheck=%d mp %p\n",
"pfilmodwput", i, (void *)mp));
@@ -547,9 +567,10 @@ void pfilmodwput(queue_t *q, mblk_t *mp)
}
break;
- case M_IOCTL:
+ case M_IOCTL :
pfilwput_ioctl(q, mp);
return;
+
default :
break;
}
@@ -572,9 +593,10 @@ void pfilmodrput(queue_t *q, mblk_t *mp)
{
union DL_primitives *dl;
dl_bind_ack_t *b;
- int i;
+ int i, flags;
qif_t *qif;
+ flags = 0;
qif = q->q_ptr;
/* LINTED: E_CONSTANT_CONDITION */
@@ -582,7 +604,9 @@ void pfilmodrput(queue_t *q, mblk_t *mp)
(void *)q, (void *)mp, mp->b_datap->db_type, QTONM(q),
QTONM(OTHERQ(q)), (void *)qif,
(void *)qif->qf_ill));
- switch (MTYPE(mp)) {
+
+ switch (MTYPE(mp))
+ {
#ifdef DL_IOC_HDR_INFO
case M_IOCACK :
{
@@ -602,6 +626,7 @@ void pfilmodrput(queue_t *q, mblk_t *mp)
pfil_printioctl(mp);
#endif
break;
+
case M_PROTO :
case M_PCPROTO :
@@ -616,7 +641,7 @@ void pfilmodrput(queue_t *q, mblk_t *mp)
case DL_UNITDATA_IND :
if ((MLEN(mp) >= sizeof(dl_unitdata_ind_t)) &&
(dl->unitdata_ind.dl_group_address))
- qif->qf_flags |= QF_GROUP;
+ flags |= PFIL_GROUP;
break;
case DL_SUBS_BIND_ACK :
@@ -625,7 +650,6 @@ void pfilmodrput(queue_t *q, mblk_t *mp)
c = (dl_subs_bind_ack_t *)dl;
if (qif->qf_sap == 0) {
-/* XXX: What is this message? */
#if 0
qif->qf_sap = c->dl_sap;
if (qif->qf_sap < 0)
@@ -655,7 +679,9 @@ void pfilmodrput(queue_t *q, mblk_t *mp)
qif->qf_waitack++;
break;
}
- if (!b->dl_sap || b->dl_sap == IP_DL_SAP || b->dl_sap == IP6_DL_SAP)
+
+ if (!b->dl_sap || b->dl_sap == IP_DL_SAP ||
+ b->dl_sap == IP6_DL_SAP)
(void) pfilbind(q);
break;
@@ -672,7 +698,8 @@ void pfilmodrput(queue_t *q, mblk_t *mp)
atomic_add_long(&qif->qf_nr, 1);
if (qif->qf_ill != NULL) {
- i = pfil_precheck(q, &mp, PFIL_IN, qif);
+ flags |= PFIL_IN;
+ i = pfil_precheck(q, &mp, flags, qif);
/* LINTED: E_CONSTANT_CONDITION */
PRINT(9, (CE_CONT,
@@ -686,9 +713,11 @@ void pfilmodrput(queue_t *q, mblk_t *mp)
}
}
break;
+
default :
break;
}
+
putnext(q, mp);
}
@@ -730,4 +759,3 @@ void pfil_startup()
pfil_init(&pfh_inet6);
pfil_init(&pfh_sync);
}
-
diff --git a/usr/src/uts/common/inet/ipf/pkt.c b/usr/src/uts/common/inet/pfil/pkt.c
index 0b4653b5d4..6a9c74b139 100644
--- a/usr/src/uts/common/inet/ipf/pkt.c
+++ b/usr/src/uts/common/inet/pfil/pkt.c
@@ -3,8 +3,6 @@
*
* See the IPFILTER.LICENCE file for details on licencing.
*
- * ident "@(#)$Id: pkt.c,v 1.8 2003/07/28 05:13:58 darrenr Exp $"
- *
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -25,7 +23,9 @@
#include <netinet/in.h>
#include <netinet/ip.h>
#if SOLARIS2 >= 8
-#include <netinet/ip6.h>
+# include <netinet/ip6.h>
+#else
+# include <net/if_dl.h>
#endif
#undef IPOPT_EOL
@@ -253,7 +253,6 @@ mblk_t *mb;
/* Function: pfil_sendbuf */
/* Returns: int - 0 == success, 1 == failure */
/* Parameters: m(I) - pointer to streams message */
-/* v - indicated v4 or v6 */
/* */
/* Output an IPv4 packet to whichever interface has the correct route. */
/* ------------------------------------------------------------------------ */
diff --git a/usr/src/uts/common/inet/ipf/qif.c b/usr/src/uts/common/inet/pfil/qif.c
index 7715098c0f..fc74ca00fc 100644
--- a/usr/src/uts/common/inet/ipf/qif.c
+++ b/usr/src/uts/common/inet/pfil/qif.c
@@ -3,7 +3,7 @@
*
* See the IPFILTER.LICENCE file for details on licencing.
*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -42,10 +42,10 @@
# endif
# include <net/if_dl.h>
#endif
-#include <inet/common.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
+#include <inet/common.h>
#undef IPOPT_EOL
#undef IPOPT_NOP
#undef IPOPT_LSRR
@@ -326,7 +326,7 @@ qif_attach(rq)
(hdrsizes[ill->ill_type][0] == ill->ill_type))
qif->qf_hl = hdrsizes[ill->ill_type][1];
- if (qif->qf_hl == 0) {
+ if (qif->qf_hl == 0 && ill->ill_type != IFT_OTHER) {
cmn_err(CE_WARN,
"!Unknown layer 2 header size for %s type %d sap %x\n",
qif->qf_name, ill->ill_type, ill->ill_sap);
@@ -364,7 +364,7 @@ qif_attach(rq)
pfh = pfil_hook_get(PFIL_IN, &pfh_sync);
for (; pfh; pfh = pfh->pfil_next)
if (pfh->pfil_func)
- (void) (*pfh->pfil_func)(NULL, 0, ill, 0, qif, NULL);
+ (void) (*pfh->pfil_func)(NULL, 0, qif, 0, qif, NULL);
RW_EXIT(&pfh_sync.ph_lock);
@@ -420,6 +420,7 @@ qif_new(q, mflags)
return qif;
}
+
/* ------------------------------------------------------------------------ */
/* Function: qif_delete */
/* Returns: void */
@@ -470,7 +471,7 @@ queue_t *q;
pfh = pfil_hook_get(PFIL_OUT, &pfh_sync);
for (; pfh; pfh = pfh->pfil_next)
if (pfh->pfil_func)
- (void) (*pfh->pfil_func)(NULL, 0, qif->qf_ill,
+ (void) (*pfh->pfil_func)(NULL, 0, qif,
1, qif, NULL);
RW_EXIT(&pfh_sync.ph_lock);
}
@@ -480,9 +481,10 @@ queue_t *q;
freeb(qif->qf_addrset);
mutex_destroy(&qif->qf_ptl.pt_lock);
cv_destroy(&qif->qf_ptl.pt_cv);
- if (qif->qf_qifsz == sizeof(*qif)) {
+
+ if (qif->qf_qifsz == sizeof(*qif))
kmem_cache_free(qif_cache, qif);
- } else {
+ else {
KMFREE(qif, qif->qf_qifsz);
}
}
@@ -604,6 +606,7 @@ qif_t *qif_walk(qif_t **qfp)
return *qfp;
}
+
/* ------------------------------------------------------------------------ */
/* Function: qif_ipmp_update */
/* Returns: void */
@@ -710,7 +713,7 @@ void qif_ipmp_delete(char *qifname)
pfh = pfil_hook_get(PFIL_OUT, &pfh_sync);
for (; pfh; pfh = pfh->pfil_next)
if (pfh->pfil_func)
- (void) (*pfh->pfil_func)(NULL, 0, qif->qf_ill, 1,
+ (void) (*pfh->pfil_func)(NULL, 0, qif, 1,
qif, NULL);
KMFREE(qif, qif->qf_qifsz);
@@ -796,3 +799,59 @@ void qif_ipmp_syncslave(qif_t *target, const int sap)
}
}
+
+/* ------------------------------------------------------------------------ */
+/* Function: qif_hl_set */
+/* Returns: void */
+/* Parameters: ipmpconf(I) - string with header length setting for NIC */
+/* */
+/* For NICs that we cannot automatically determine the MAC header length of */
+/* we provide a manual crook to achieve that with. The input syntax for */
+/* the string is "[v4:|v6:]<ifname>=<length>" */
+/* ------------------------------------------------------------------------ */
+void qif_hl_set(char *ipmpconf)
+{
+ qif_t *qf;
+ char *s;
+
+ if (!strncmp(ipmpconf, "v4:", 3)) {
+ ipmpconf += 3;
+ } else if (!strncmp(ipmpconf, "v6:", 3)) {
+#if SOLARIS2 >= 8
+ ipmpconf += 3;
+#else
+ return;
+#endif
+ }
+
+ s = strchr(ipmpconf, '=');
+ if (s != NULL) {
+ if (*(s + 1) == '\0')
+ *s = '\0';
+ else
+ *s++ = '\0';
+ }
+ if (s == NULL || *s == NULL)
+ return;
+
+ READ_ENTER(&pfil_rw);
+ for (qf = qif_head; qf; qf = qf->qf_next)
+ if (strcmp(qf->qf_name, ipmpconf) == 0)
+ break;
+
+ if (qf != NULL) {
+ int hl = 0;
+
+ for (; *s != '\0'; s++) {
+ char c = *s;
+
+ if (c < '0' || c > '9')
+ return;
+ hl *= 10;
+ hl += c - '0';
+ }
+ qf->qf_hl = hl;
+ }
+
+ RW_EXIT(&pfil_rw);
+}
diff --git a/usr/src/uts/common/inet/ipf/qif.h b/usr/src/uts/common/inet/pfil/qif.h
index 2e3c7cc886..7371ab420a 100644
--- a/usr/src/uts/common/inet/ipf/qif.h
+++ b/usr/src/uts/common/inet/pfil/qif.h
@@ -3,14 +3,17 @@
*
* See the IPFILTER.LICENCE file for details on licencing.
*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
-#include "pfil.h"
-#include <sys/ptms.h>
+#ifdef sun
+# include <sys/dditypes.h>
+# include <sys/ptms.h>
+#endif
+
#ifdef IRE_ILL_CN
typedef union {
@@ -63,7 +66,6 @@ typedef struct s_ill_s {
uint_t mtu;
} s_ill_t;
-
typedef struct qif {
/* for alignment reasons, the lock is first. */
kmutex_t qf_lock;
@@ -103,7 +105,7 @@ typedef struct qif {
char qf_name[LIFNAMSIZ];
char *qf_members;
- /* ON10 specific */
+ /* ON(10, NV) specific */
mblk_t *qf_addrset;
size_t qf_off;
mblk_t *qf_m;
@@ -155,7 +157,15 @@ typedef struct qpktinfo {
#endif
#ifdef __hpux
-# define QF_V4_ADDR(x) ((ifinfot_t *)(x)->qf_ill)->ifi_addr[0]
+# define QF_V4_ADDR(x) ((ifinfo_t *)(x)->qf_ill)->ifi_addr[0]
+# define QF_V4_BROADCAST(x) 0
+# define QF_V4_NETMASK(x) 0xffffffff
+# define QF_V4_PEERADDR(x) 0
+# ifdef USE_INET6
+# define QF_V6_BROADCAST(x) 0
+# define QF_V6_NETMASK(x) 0
+# define QF_V6_PEERADDR(x) 0
+# endif
#endif
@@ -170,14 +180,16 @@ extern int qif_startup(void);
extern void qif_stop(void);
extern void *qif_iflookup(char *, int);
+#ifdef __hpux
struct irinfo_s;
extern void *ir_to_ill(struct irinfo_s *ir);
-
+#endif
extern struct qif *qif_walk(struct qif **);
extern struct qif *qif_head;
extern int qif_verbose;
extern void qif_update(struct qif *, mblk_t *);
extern void qif_nd_init(void);
+extern void qif_hl_set(char *);
extern void qif_ipmp_delete(char *);
extern void qif_ipmp_update(char *);
extern void qif_ipmp_syncmaster(struct qif *, const int);