summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorSangeeta Misra <Sangeeta.Misra@Sun.COM>2009-11-03 23:15:19 -0800
committerSangeeta Misra <Sangeeta.Misra@Sun.COM>2009-11-03 23:15:19 -0800
commitdbed73cbda2229fd1aa6dc5743993cae7f0a7ee9 (patch)
tree74a198c6a7ce750aaff09b4d682a53564ca13b58 /usr/src
parentdfe73b3d6f9191b75fe71a92c8854f83c6d16a63 (diff)
downloadillumos-gate-dbed73cbda2229fd1aa6dc5743993cae7f0a7ee9.tar.gz
PSARC 2008/575 ILB: Integrated L3/L4 Load balancer
6882718 in-kernel simple L3/L4 load balancing service should be provided in Solaris 6884202 ipobs_hook() in ip_input() invalidates DB_REF assumption
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/Makefile.lint3
-rw-r--r--usr/src/Targetdirs1
-rw-r--r--usr/src/cmd/Makefile.check1
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/Makefile4
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/ilbd/Makefile112
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/ilbd/ilb/Makefile55
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/ilbd/ilb/ilb_probe.c875
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd.h435
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd.xml115
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_hc.c1574
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_main.c1025
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_nat.c295
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_rules.c1357
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_scf.c1692
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_sg.c1644
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_support.c296
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/Makefile14
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ilbadm/Makefile82
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm.c249
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm.h242
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_hc.c520
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_import.c441
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_nat.c222
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_rules.c1313
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_sg.c837
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_stats.c1140
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_subr.c1161
-rw-r--r--usr/src/cmd/mdb/common/modules/ip/ip.c503
-rw-r--r--usr/src/cmd/mdb/intel/amd64/ip/Makefile10
-rw-r--r--usr/src/cmd/mdb/intel/ia32/ip/Makefile12
-rw-r--r--usr/src/cmd/mdb/sparc/v9/ip/Makefile12
-rw-r--r--usr/src/head/auth_list.h2
-rw-r--r--usr/src/lib/Makefile4
-rw-r--r--usr/src/lib/libbsm/audit_event.txt15
-rw-r--r--usr/src/lib/libbsm/common/adt.xml444
-rw-r--r--usr/src/lib/libilb/Makefile61
-rw-r--r--usr/src/lib/libilb/Makefile.com73
-rw-r--r--usr/src/lib/libilb/amd64/Makefile36
-rw-r--r--usr/src/lib/libilb/common/ilb_comm.c235
-rw-r--r--usr/src/lib/libilb/common/ilb_hc.c276
-rw-r--r--usr/src/lib/libilb/common/ilb_nat.c180
-rw-r--r--usr/src/lib/libilb/common/ilb_rules.c326
-rw-r--r--usr/src/lib/libilb/common/ilb_sg.c464
-rw-r--r--usr/src/lib/libilb/common/ilb_subr.c424
-rw-r--r--usr/src/lib/libilb/common/libilb.h398
-rw-r--r--usr/src/lib/libilb/common/libilb_impl.h253
-rw-r--r--usr/src/lib/libilb/common/llib-lilb29
-rw-r--r--usr/src/lib/libilb/common/mapfile-vers78
-rw-r--r--usr/src/lib/libilb/i386/Makefile28
-rw-r--r--usr/src/lib/libilb/sparc/Makefile28
-rw-r--r--usr/src/lib/libilb/sparcv9/Makefile34
-rw-r--r--usr/src/lib/libinetutil/common/libinetutil.h2
-rw-r--r--usr/src/lib/libinetutil/common/ofmt.c31
-rw-r--r--usr/src/lib/libinetutil/common/ofmt.h9
-rw-r--r--usr/src/lib/libinetutil/common/tq.c18
-rw-r--r--usr/src/lib/libsecdb/auth_attr.txt3
-rw-r--r--usr/src/lib/libsecdb/help/auths/Makefile3
-rw-r--r--usr/src/lib/libsecdb/help/auths/NetworkILBconf.html41
-rw-r--r--usr/src/lib/libsecdb/help/auths/NetworkILBenable.html41
-rw-r--r--usr/src/lib/libsecdb/help/auths/SmfILBStates.html36
-rw-r--r--usr/src/lib/libsecdb/help/profiles/Makefile1
-rw-r--r--usr/src/lib/libsecdb/help/profiles/RtNetILB.html37
-rw-r--r--usr/src/lib/libsecdb/prof_attr.txt3
-rw-r--r--usr/src/lib/libsecdb/user_attr.txt3
-rw-r--r--usr/src/pkgdefs/Makefile2
-rw-r--r--usr/src/pkgdefs/SUNW0on/prototype_com4
-rw-r--r--usr/src/pkgdefs/SUNWcsu/prototype_com4
-rw-r--r--usr/src/pkgdefs/SUNWilb/Makefile35
-rw-r--r--usr/src/pkgdefs/SUNWilb/depend50
-rw-r--r--usr/src/pkgdefs/SUNWilb/pkginfo.tmpl56
-rw-r--r--usr/src/pkgdefs/SUNWilb/prototype_com59
-rw-r--r--usr/src/pkgdefs/SUNWilb/prototype_i38653
-rw-r--r--usr/src/pkgdefs/SUNWilb/prototype_sparc53
-rw-r--r--usr/src/pkgdefs/SUNWilbr/Makefile37
-rw-r--r--usr/src/pkgdefs/SUNWilbr/depend49
-rw-r--r--usr/src/pkgdefs/SUNWilbr/pkginfo.tmpl58
-rw-r--r--usr/src/pkgdefs/SUNWilbr/prototype_com54
-rw-r--r--usr/src/pkgdefs/SUNWilbr/prototype_i38649
-rw-r--r--usr/src/pkgdefs/SUNWilbr/prototype_sparc48
-rw-r--r--usr/src/uts/common/Makefile.files4
-rw-r--r--usr/src/uts/common/Makefile.rules6
-rw-r--r--usr/src/uts/common/inet/ilb.h239
-rw-r--r--usr/src/uts/common/inet/ilb/ilb.c2153
-rw-r--r--usr/src/uts/common/inet/ilb/ilb_alg.h44
-rw-r--r--usr/src/uts/common/inet/ilb/ilb_alg_hash.c431
-rw-r--r--usr/src/uts/common/inet/ilb/ilb_alg_rr.c232
-rw-r--r--usr/src/uts/common/inet/ilb/ilb_conn.c1531
-rw-r--r--usr/src/uts/common/inet/ilb/ilb_conn.h246
-rw-r--r--usr/src/uts/common/inet/ilb/ilb_impl.h286
-rw-r--r--usr/src/uts/common/inet/ilb/ilb_nat.c609
-rw-r--r--usr/src/uts/common/inet/ilb/ilb_nat.h93
-rw-r--r--usr/src/uts/common/inet/ilb/ilb_stack.h134
-rw-r--r--usr/src/uts/common/inet/ilb_ip.h101
-rw-r--r--usr/src/uts/common/inet/ip/ip.c74
-rw-r--r--usr/src/uts/common/inet/ip/ip6.c35
-rw-r--r--usr/src/uts/common/inet/ip/ip_if.c269
-rw-r--r--usr/src/uts/common/inet/ipnet/ipnet.c21
-rw-r--r--usr/src/uts/common/sys/netstack.h5
-rw-r--r--usr/src/uts/common/sys/sockio.h5
-rw-r--r--usr/src/uts/intel/ip/ip.global-objs.debug6416
-rw-r--r--usr/src/uts/intel/ip/ip.global-objs.obj6416
-rw-r--r--usr/src/uts/sparc/ip/ip.global-objs.debug6416
-rw-r--r--usr/src/uts/sparc/ip/ip.global-objs.obj6416
103 files changed, 26994 insertions, 57 deletions
diff --git a/usr/src/Makefile.lint b/usr/src/Makefile.lint
index 715ab9cf9a..f9148fbbd7 100644
--- a/usr/src/Makefile.lint
+++ b/usr/src/Makefile.lint
@@ -72,6 +72,7 @@ COMMON_SUBDIRS = \
cmd/cmd-inet/usr.bin \
cmd/cmd-inet/usr.lib/bridged \
cmd/cmd-inet/usr.lib/dsvclockd \
+ cmd/cmd-inet/usr.lib/ilbd \
cmd/cmd-inet/usr.lib/in.dhcpd \
cmd/cmd-inet/usr.lib/in.mpathd \
cmd/cmd-inet/usr.lib/in.ndpd \
@@ -82,6 +83,7 @@ COMMON_SUBDIRS = \
cmd/cmd-inet/usr.lib/wanboot \
cmd/cmd-inet/usr.sadm \
cmd/cmd-inet/usr.sbin \
+ cmd/cmd-inet/usr.sbin/ilbadm \
cmd/col \
cmd/compress \
cmd/consadm \
@@ -365,6 +367,7 @@ COMMON_SUBDIRS = \
lib/libgss \
lib/libhotplug \
lib/libidmap \
+ lib/libilb \
lib/libinetcfg \
lib/libinetsvc \
lib/libinetutil \
diff --git a/usr/src/Targetdirs b/usr/src/Targetdirs
index b20207ca5a..4ed1e17291 100644
--- a/usr/src/Targetdirs
+++ b/usr/src/Targetdirs
@@ -234,6 +234,7 @@ DIRS= \
/usr/lib/inet/dhcp/nsu \
/usr/lib/inet/dhcp/svc \
/usr/lib/inet/dhcp/svcadm \
+ /usr/lib/inet/ilb \
/usr/lib/inet/$(MACH32) \
$(XDIRS) \
/usr/lib/krb5 \
diff --git a/usr/src/cmd/Makefile.check b/usr/src/cmd/Makefile.check
index 58c49840c8..a046de9f44 100644
--- a/usr/src/cmd/Makefile.check
+++ b/usr/src/cmd/Makefile.check
@@ -82,6 +82,7 @@ MANIFEST_SUBDIRS= \
agents/snmp/snmprelayd \
boot/scripts \
cmd-crypto/scripts \
+ cmd-inet/usr.lib/ilbd \
cmd-inet/usr.lib/in.chargend \
cmd-inet/usr.lib/in.daytimed \
cmd-inet/usr.lib/in.dhcpd \
diff --git a/usr/src/cmd/cmd-inet/usr.lib/Makefile b/usr/src/cmd/cmd-inet/usr.lib/Makefile
index 8c1e5198ee..d8216e049b 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/Makefile
+++ b/usr/src/cmd/cmd-inet/usr.lib/Makefile
@@ -23,12 +23,12 @@
# Use is subject to license terms.
#
-SUBDIRS= bridged dhcp dsvclockd in.chargend in.daytimed \
+SUBDIRS= bridged dhcp dsvclockd ilbd in.chargend in.daytimed \
in.discardd in.echod in.dhcpd in.mpathd in.ndpd \
in.ripngd in.timed inetd mdnsd ncaconfd pppoe \
slpd wanboot wpad
-MSGSUBDIRS= dsvclockd in.dhcpd inetd ncaconfd wanboot
+MSGSUBDIRS= dsvclockd ilbd in.dhcpd inetd ncaconfd wanboot
include ../../Makefile.cmd
diff --git a/usr/src/cmd/cmd-inet/usr.lib/ilbd/Makefile b/usr/src/cmd/cmd-inet/usr.lib/ilbd/Makefile
new file mode 100644
index 0000000000..91cd41202c
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.lib/ilbd/Makefile
@@ -0,0 +1,112 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+ILBD= ilbd
+PROG= $(ILBD)
+MANIFEST= ilbd.xml
+
+ILBD_OBJS= ilbd_main.o ilbd_support.o ilbd_sg.o ilbd_rules.o ilbd_hc.o \
+ ilbd_nat.o ilbd_scf.o
+ILBD_SRCS= $(ILBD_OBJS:%.o=%.c)
+HDRS= ilbd.h
+
+LIST_OBJS= list.o
+LIST_SRCS= $(LIST_OBJS:%.o=../../../uts/common/os/%.c)
+
+LIB_INC= $(SRC)/lib/libilb/common
+
+OBJS= $(ILBD_OBJS) $(LIST_OBJS)
+SRCS= $(ILBD_SRCS) $(LIST_SRCS)
+
+ILBSUBDIRS= ilb
+
+include ../../../Makefile.cmd
+include ../../Makefile.cmd-inet
+
+ROOTMANIFESTDIR= $(ROOTSVCNETWORK)/loadbalancer
+
+CHECKHDRS= $(HDRS:%.h=%.check)
+
+CPPFLAGS += -D_FILE_OFFSET_BITS=64 -I$(CMDINETCOMMONDIR) -D_REENTRANT
+CPPFLAGS += -I$(LIB_INC)
+CPPFLAGS += -I$(SRC)/uts/common
+
+C99MODE = $(C99_ENABLE)
+
+# I18n
+POFILE = $(ILBD).po
+POFILES = $(ILBD_SRCS:%.c=%.po)
+
+all:= TARGET= all
+install:= TARGET= install
+clean:= TARGET= clean
+clobber:= TARGET= clobber
+lint:= TARGET= lint
+
+# used for debugging ONLY:
+
+CFLAGS = -g -K pic
+STRIP_STABS= :
+CTFCVTFLAGS += -g
+
+
+LDLIBS += -lsocket -lsecdb -lnsl -lilb -lscf -linetutil -lbsm
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+$(ILBD): $(OBJS)
+ $(LINK.c) $(OBJS) -o $@ $(LDLIBS) $(CTFMERGE_HOOK)
+ $(POST_PROCESS)
+
+include ../Makefile.lib
+
+install: all $(ROOTLIBINETPROG) $(ETCILBDFILES) $(ROOTMANIFEST)
+
+check: $(CHKMANIFEST) $(CHECKHDRS)
+
+clean:
+ $(RM) $(OBJS)
+
+lint:
+ $(LINT.c) $(ILBD_SRCS) $(LDLIBS)
+
+$(POFILE): $(POFILES)
+ $(RM) $@
+ $(CAT) $(POFILES) > $@
+
+all install clean clobber lint: $(ILBSUBDIRS)
+
+include ../../../Makefile.targ
+
+# the below is needed to get list.o built
+%.o: ../../../../uts/common/os/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
+
+$(ILBSUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilb/Makefile b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilb/Makefile
new file mode 100644
index 0000000000..e70830e8e8
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilb/Makefile
@@ -0,0 +1,55 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+PROG = ilb_probe
+OBJS = $(PROG:%=%.o)
+SRCS = $(OBJS:%.o=%.c)
+
+include ../../../../Makefile.cmd
+
+LDLIBS += -lsocket -lnsl
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+include ../../Makefile.lib
+
+ROOTLIBINETILB = $(ROOTLIBINET)/ilb
+ROOTLIBINETILBPROG = $(PROG:%=$(ROOTLIBINETILB)/%)
+
+$(ROOTLIBINETILB): $(ROOTLIBINET)
+ $(INS.dir)
+
+$(ROOTLIBINETILB)/%: % $(ROOTLIBINETILB)
+ $(INS.file)
+
+install: all $(ROOTLIBINETILBPROG)
+
+clean:
+ $(RM) $(OBJS)
+
+lint: lint_SRCS
+
+include ../../../../Makefile.targ
diff --git a/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilb/ilb_probe.c b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilb/ilb_probe.c
new file mode 100644
index 0000000000..0704e76cde
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilb/ilb_probe.c
@@ -0,0 +1,875 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <arpa/inet.h>
+#include <netinet/tcp.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/icmp6.h>
+#include <netinet/udp.h>
+#include <netdb.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <errno.h>
+#include <limits.h>
+#include <signal.h>
+#include <libgen.h>
+#include <fcntl.h>
+
+/*
+ * The following values are what ilbd will set argv[0] to. This determines
+ * what type of probe to send out.
+ */
+#define PROBE_PING "ilb_ping"
+#define PROBE_PROTO "ilb_probe"
+
+/* The transport protocol to use in the probe. Value of argv[3]. */
+#define PROTO_TCP "TCP"
+#define PROTO_UDP "UDP"
+
+enum probe_type { ping_probe, tcp_probe, udp_probe };
+
+/* Load balance mode. Value of argv[4]. */
+#define MODE_DSR "DSR"
+#define MODE_NAT "NAT"
+#define MODE_HALF_NAT "HALF_NAT"
+
+enum lb_mode { dsr, nat, half_nat };
+
+/* Number of arguments to the command from ilbd. */
+#define PROG_ARGC 7
+
+/* Size of buffer used to receive ICMP packet */
+#define RECV_PKT_SZ 256
+
+/*
+ * Struct to store the probe info (most is passed in using the argv[] array to
+ * the command given by ilbd). The argv[] contains the following.
+ *
+ * argv[0] is either PROBE_PING or PROBE_PROTO
+ * argv[1] is the VIP
+ * argv[2] is the backend server address
+ * argv[3] is the transport protocol used in the rule
+ * argv[4] is the load balance mode, "DSR", "NAT", "HALF-NAT"
+ * argv[5] is the probe port
+ * argv[6] is the probe timeout
+ *
+ * The following three fields are used in sending ICMP ECHO probe.
+ *
+ * echo_id is the ID set in the probe
+ * echo_seq is the sequence set in the probe
+ * echo_cookie is the random number data in a probe
+ * lport is the local port (in network byte order) used to send the probe
+ */
+typedef struct {
+ enum probe_type probe;
+ struct in6_addr vip; /* argv[1] */
+ struct in6_addr srv_addr; /* argv[2] */
+ int proto; /* argv[3] */
+ enum lb_mode mode; /* argv[4] */
+ in_port_t port; /* argv[5] */
+ uint32_t timeout; /* argv[6] */
+
+ uint16_t echo_id;
+ uint16_t echo_seq;
+ uint32_t echo_cookie;
+ in_port_t lport;
+} probe_param_t;
+
+/* Global variable to indicate whether a timeout means success. */
+static boolean_t timeout_is_good;
+
+/* SIGALRM handler */
+/* ARGSUSED */
+static void
+probe_exit(int s)
+{
+ if (timeout_is_good) {
+ (void) printf("0");
+ exit(0);
+ } else {
+ (void) printf("-1");
+ exit(255);
+ }
+}
+
+/*
+ * Checksum routine for Internet Protocol family headers (C Version)
+ * (copied from ping.c)
+ */
+static ushort_t
+in_cksum(ushort_t *addr, int len)
+{
+ int nleft = len;
+ ushort_t *w = addr;
+ ushort_t answer;
+ ushort_t odd_byte = 0;
+ int sum = 0;
+
+ /*
+ * Our algorithm is simple, using a 32 bit accumulator (sum),
+ * we add sequential 16 bit words to it, and at the end, fold
+ * back all the carry bits from the top 16 bits into the lower
+ * 16 bits.
+ */
+ while (nleft > 1) {
+ sum += *w++;
+ nleft -= 2;
+ }
+
+ /* mop up an odd byte, if necessary */
+ if (nleft == 1) {
+ *(uchar_t *)(&odd_byte) = *(uchar_t *)w;
+ sum += odd_byte;
+ }
+
+ /*
+ * add back carry outs from top 16 bits to low 16 bits
+ */
+ sum = (sum >> 16) + (sum & 0xffff); /* add hi 16 to low 16 */
+ sum += (sum >> 16); /* add carry */
+ answer = ~sum; /* truncate to 16 bits */
+ return (answer);
+}
+
+/* It is assumed that argv[] contains PROBE_ARGC arguments. */
+static boolean_t
+parse_probe_param(char *argv[], probe_param_t *param)
+{
+ int32_t port;
+ int64_t timeout;
+ struct in_addr v4addr;
+
+ if (strcmp(basename(argv[0]), PROBE_PING) == 0) {
+ param->probe = ping_probe;
+ } else {
+ if (strcmp(basename(argv[0]), PROBE_PROTO) != 0)
+ return (B_FALSE);
+
+ if (strcasecmp(argv[3], PROTO_TCP) == 0) {
+ param->probe = tcp_probe;
+ param->proto = IPPROTO_TCP;
+ } else if (strcasecmp(argv[3], PROTO_UDP) == 0) {
+ param->probe = udp_probe;
+ param->proto = IPPROTO_UDP;
+ } else {
+ return (B_FALSE);
+ }
+ }
+
+ if (strchr(argv[1], ':') != NULL) {
+ if (inet_pton(AF_INET6, argv[1], &param->vip) == 0)
+ return (B_FALSE);
+ } else if (strchr(argv[1], '.') != NULL) {
+ if (inet_pton(AF_INET, argv[1], &v4addr) == 0)
+ return (B_FALSE);
+ IN6_INADDR_TO_V4MAPPED(&v4addr, &param->vip);
+ } else {
+ return (B_FALSE);
+ }
+
+ /*
+ * The address family of vip and srv_addr should be the same for
+ * now. But in future, we may allow them to be different... So
+ * we don't do a check here.
+ */
+ if (strchr(argv[2], ':') != NULL) {
+ if (inet_pton(AF_INET6, argv[2], &param->srv_addr) == 0)
+ return (B_FALSE);
+ } else if (strchr(argv[2], '.') != NULL) {
+ if (inet_pton(AF_INET, argv[2], &v4addr) == 0)
+ return (B_FALSE);
+ IN6_INADDR_TO_V4MAPPED(&v4addr, &param->srv_addr);
+ } else {
+ return (B_FALSE);
+ }
+
+ if (strcasecmp(argv[4], MODE_DSR) == 0)
+ param->mode = dsr;
+ else if (strcasecmp(argv[4], MODE_NAT) == 0)
+ param->mode = nat;
+ else if (strcasecmp(argv[4], MODE_HALF_NAT) == 0)
+ param->mode = half_nat;
+ else
+ return (B_FALSE);
+
+ if ((port = atoi(argv[5])) <= 0 || port > USHRT_MAX)
+ return (B_FALSE);
+ param->port = port;
+
+ if ((timeout = strtoll(argv[6], NULL, 10)) <= 0 || timeout > UINT_MAX)
+ return (B_FALSE);
+ param->timeout = timeout;
+
+ return (B_TRUE);
+}
+
+/*
+ * Set up the destination address to be used to send a probe based on
+ * param.
+ */
+static int
+set_sockaddr(struct sockaddr_storage *addr, socklen_t *addr_len,
+ void **next_hop, probe_param_t *param)
+{
+ int af;
+ struct in6_addr *param_addr;
+ struct sockaddr_in *v4_addr;
+ struct sockaddr_in6 *v6_addr;
+ boolean_t nh = B_FALSE;
+
+ switch (param->mode) {
+ case dsr:
+ param_addr = &param->vip;
+ nh = B_TRUE;
+ break;
+ case nat:
+ case half_nat:
+ param_addr = &param->srv_addr;
+ break;
+ }
+ if (IN6_IS_ADDR_V4MAPPED(param_addr)) {
+ af = AF_INET;
+ v4_addr = (struct sockaddr_in *)addr;
+ IN6_V4MAPPED_TO_INADDR(param_addr, &v4_addr->sin_addr);
+ v4_addr->sin_family = AF_INET;
+ v4_addr->sin_port = htons(param->port);
+
+ *addr_len = sizeof (*v4_addr);
+ } else {
+ af = AF_INET6;
+ v6_addr = (struct sockaddr_in6 *)addr;
+ v6_addr->sin6_family = AF_INET6;
+ v6_addr->sin6_addr = *param_addr;
+ v6_addr->sin6_port = htons(param->port);
+ v6_addr->sin6_flowinfo = 0;
+ v6_addr->sin6_scope_id = 0;
+
+ *addr_len = sizeof (*v6_addr);
+ }
+
+ if (!nh) {
+ *next_hop = NULL;
+ return (af);
+ }
+
+ if (af == AF_INET) {
+ ipaddr_t *nh_addr;
+
+ nh_addr = malloc(sizeof (ipaddr_t));
+ IN6_V4MAPPED_TO_IPADDR(&param->srv_addr, *nh_addr);
+ *next_hop = nh_addr;
+ } else {
+ struct sockaddr_in6 *nh_addr;
+
+ nh_addr = malloc(sizeof (*nh_addr));
+ nh_addr->sin6_family = AF_INET6;
+ nh_addr->sin6_addr = param->srv_addr;
+ nh_addr->sin6_flowinfo = 0;
+ nh_addr->sin6_scope_id = 0;
+ *next_hop = nh_addr;
+ }
+
+ return (af);
+}
+
+/*
+ * Use TCP to check if the peer server is alive. Create a TCP socket and
+ * then call connect() to reach the peer server. If connect() does not
+ * return within the timeout period, the SIGALRM handler will be invoked
+ * and tell ilbd that the peer server is not alive.
+ */
+static int
+tcp_query(probe_param_t *param)
+{
+ int ret;
+ int sd, af;
+ struct sockaddr_storage dst_addr;
+ socklen_t dst_addr_len;
+ void *next_hop;
+ hrtime_t start, end;
+ uint32_t rtt;
+
+ ret = 0;
+ next_hop = NULL;
+
+ af = set_sockaddr(&dst_addr, &dst_addr_len, &next_hop, param);
+
+ if ((sd = socket(af, SOCK_STREAM, param->proto)) == -1)
+ return (-1);
+
+ /* DSR mode, need to set the next hop */
+ if (next_hop != NULL) {
+ if (af == AF_INET) {
+ if (setsockopt(sd, IPPROTO_IP, IP_NEXTHOP, next_hop,
+ sizeof (ipaddr_t)) < 0) {
+ ret = -1;
+ goto out;
+ }
+ } else {
+ if (setsockopt(sd, IPPROTO_IPV6, IPV6_NEXTHOP,
+ next_hop, sizeof (struct sockaddr_in6)) < 0) {
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+
+ timeout_is_good = B_FALSE;
+ (void) alarm(param->timeout);
+ start = gethrtime();
+ if (connect(sd, (struct sockaddr *)&dst_addr, dst_addr_len) != 0) {
+ ret = -1;
+ goto out;
+ }
+ end = gethrtime();
+
+ rtt = (end - start) / (NANOSEC / MICROSEC);
+ if (rtt == 0)
+ rtt = 1;
+ (void) printf("%u", rtt);
+
+out:
+ (void) close(sd);
+ return (ret);
+}
+
+/*
+ * Check if the ICMP packet is a port unreachable message in respnsed to
+ * our probe. Return -1 if no, 0 if yes.
+ */
+static int
+check_icmp_unreach_v4(struct icmp *icmph, probe_param_t *param)
+{
+ struct udphdr *udph;
+ struct ip *iph;
+
+ if (icmph->icmp_type != ICMP_UNREACH)
+ return (-1);
+ if (icmph->icmp_code != ICMP_UNREACH_PORT)
+ return (-1);
+
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ iph = (struct ip *)((char *)icmph + ICMP_MINLEN);
+ if (iph->ip_p != IPPROTO_UDP)
+ return (-1);
+
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ udph = (struct udphdr *)((char *)iph + (iph->ip_hl << 2));
+ if (udph->uh_dport != htons(param->port))
+ return (-1);
+ if (udph->uh_sport != param->lport)
+ return (-1);
+
+ /* All matched, it is a response to the probe we sent. */
+ return (0);
+}
+
+/*
+ * Check if the ICMP packet is a reply to our echo request. Need to match
+ * the ID and sequence.
+ */
+static int
+check_icmp_echo_v4(struct icmp *icmph, probe_param_t *param)
+{
+ uint32_t cookie;
+ in_port_t port;
+
+ if (icmph->icmp_type != ICMP_ECHOREPLY)
+ return (-1);
+ if (icmph->icmp_id != param->echo_id)
+ return (-1);
+ if (icmph->icmp_seq != param->echo_seq)
+ return (-1);
+
+ bcopy(icmph->icmp_data, &cookie, sizeof (cookie));
+ if (cookie != param->echo_cookie)
+ return (-1);
+ bcopy(icmph->icmp_data + sizeof (cookie), &port, sizeof (port));
+ if (port != param->port)
+ return (-1);
+
+ /* All matched, it is a response to the echo we sent. */
+ return (0);
+}
+
+/* Verify if an ICMP packet is what we expect. */
+static int
+check_icmp_v4(char *buf, ssize_t rcvd, probe_param_t *param)
+{
+ struct ip *iph;
+ struct icmp *icmph;
+
+ /*
+ * We can dereference the length field without worry since the stack
+ * should not have sent up the packet if it is smaller than a normal
+ * ICMPv4 packet.
+ */
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ iph = (struct ip *)buf;
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ icmph = (struct icmp *)((char *)iph + (iph->ip_hl << 2));
+
+ /*
+ * If we sent an UDP probe, check if the packet is a port
+ * unreachable message in response to our probe.
+ *
+ * If we sent an ICMP echo request, check if the packet is a reply
+ * to our echo request.
+ */
+ if (param->probe == udp_probe) {
+ /* Is the packet large enough for further checking? */
+ if (rcvd < 2 * sizeof (struct ip) + ICMP_MINLEN +
+ sizeof (struct udphdr)) {
+ return (-1);
+ }
+ return (check_icmp_unreach_v4(icmph, param));
+ } else {
+ if (rcvd < sizeof (struct ip) + ICMP_MINLEN)
+ return (-1);
+ return (check_icmp_echo_v4(icmph, param));
+ }
+}
+
+/*
+ * Check if the ICMPv6 packet is a port unreachable message in respnsed to
+ * our probe. Return -1 if no, 0 if yes.
+ */
+static int
+check_icmp_unreach_v6(icmp6_t *icmp6h, probe_param_t *param)
+{
+ ip6_t *ip6h;
+ struct udphdr *udph;
+
+ if (icmp6h->icmp6_type != ICMP6_DST_UNREACH)
+ return (-1);
+ if (icmp6h->icmp6_code != ICMP6_DST_UNREACH_NOPORT)
+ return (-1);
+
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ ip6h = (ip6_t *)((char *)icmp6h + ICMP6_MINLEN);
+ if (ip6h->ip6_nxt != IPPROTO_UDP)
+ return (-1);
+
+ udph = (struct udphdr *)(ip6h + 1);
+
+ if (udph->uh_dport != htons(param->port))
+ return (-1);
+ if (udph->uh_sport != param->lport)
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * Check if the ICMPv6 packet is a reply to our echo request. Need to match
+ * the ID and sequence.
+ */
+static int
+check_icmp_echo_v6(icmp6_t *icmp6h, probe_param_t *param)
+{
+ char *tmp;
+ uint32_t cookie;
+ in_port_t port;
+
+ if (icmp6h->icmp6_type != ICMP6_ECHO_REPLY)
+ return (-1);
+ if (icmp6h->icmp6_id != param->echo_id)
+ return (-1);
+ if (icmp6h->icmp6_seq != param->echo_seq)
+ return (-1);
+ tmp = (char *)icmp6h + ICMP6_MINLEN;
+ bcopy(tmp, &cookie, sizeof (cookie));
+ if (cookie != param->echo_cookie)
+ return (-1);
+ tmp += sizeof (cookie);
+ bcopy(tmp, &port, sizeof (port));
+ if (port != param->port)
+ return (-1);
+
+ /* All matched, it is a response to the echo we sent. */
+ return (0);
+}
+
+/* Verify if an ICMPv6 packet is what we expect. */
+static int
+check_icmp_v6(char *buf, ssize_t rcvd, probe_param_t *param)
+{
+ icmp6_t *icmp6h;
+
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ icmp6h = (icmp6_t *)(buf);
+
+ /*
+ * If we sent an UDP probe, check if the packet is a port
+ * unreachable message.
+ *
+ * If we sent an ICMPv6 echo request, check if the packet is a reply.
+ */
+ if (param->probe == udp_probe) {
+ /* Is the packet large enough for further checking? */
+ if (rcvd < sizeof (ip6_t) + ICMP6_MINLEN +
+ sizeof (struct udphdr)) {
+ return (-1);
+ }
+ return (check_icmp_unreach_v6(icmp6h, param));
+ } else {
+ if (rcvd < ICMP6_MINLEN)
+ return (-1);
+ return (check_icmp_echo_v6(icmp6h, param));
+ }
+}
+
+/*
+ * Wait for an ICMP reply indefinitely. If we get what we expect, return 0.
+ * If an error happnes, return -1.
+ */
+static int
+wait_icmp_reply(int af, int recv_sd, struct sockaddr_storage *exp_from,
+ probe_param_t *param)
+{
+ char buf[RECV_PKT_SZ];
+ socklen_t from_len;
+ ssize_t rcvd;
+ int ret;
+
+ for (;;) {
+ if (af == AF_INET) {
+ struct sockaddr_in v4_from;
+
+ from_len = sizeof (v4_from);
+ if ((rcvd = recvfrom(recv_sd, buf, RECV_PKT_SZ, 0,
+ (struct sockaddr *)&v4_from, &from_len)) < 0) {
+ ret = -1;
+ break;
+ }
+
+ /* Packet not from our peer, ignore it. */
+ if ((((struct sockaddr_in *)exp_from)->sin_addr.s_addr)
+ != v4_from.sin_addr.s_addr) {
+ continue;
+ }
+ if (check_icmp_v4(buf, rcvd, param) == 0) {
+ ret = 0;
+ break;
+ }
+ } else {
+ struct sockaddr_in6 v6_from;
+
+ from_len = sizeof (struct sockaddr_in6);
+ if ((rcvd = recvfrom(recv_sd, buf, RECV_PKT_SZ, 0,
+ (struct sockaddr *)&v6_from, &from_len)) < 0) {
+ ret = -1;
+ break;
+ }
+
+ if (!IN6_ARE_ADDR_EQUAL(&(v6_from.sin6_addr),
+ &((struct sockaddr_in6 *)exp_from)->sin6_addr)) {
+ continue;
+ }
+ if (check_icmp_v6(buf, rcvd, param) == 0) {
+ ret = 0;
+ break;
+ }
+ }
+ }
+ return (ret);
+}
+
+/* Return the local port used (network byte order) in a socket. */
+static int
+get_lport(int sd, in_port_t *lport)
+{
+ struct sockaddr_storage addr;
+ socklen_t addr_sz;
+
+ addr_sz = sizeof (addr);
+ if (getsockname(sd, (struct sockaddr *)&addr, &addr_sz) != 0)
+ return (-1);
+ if (addr.ss_family == AF_INET)
+ *lport = ((struct sockaddr_in *)&addr)->sin_port;
+ else
+ *lport = ((struct sockaddr_in6 *)&addr)->sin6_port;
+ return (0);
+}
+
+/*
+ * Use UDP to check if the peer server is alive. Send a 0 length UDP packet
+ * to the peer server. If there is no one listening, the peer IP stack
+ * should send back a port unreachable ICMP(v4/v6) packet. If the peer
+ * server is alive, there should be no response. So if we get SIGALRM,
+ * the peer is alive.
+ */
+static int
+udp_query(probe_param_t *param)
+{
+ int ret;
+ int send_sd, recv_sd, af;
+ struct sockaddr_storage dst_addr;
+ socklen_t addr_len;
+ void *next_hop;
+ char buf[1];
+ struct itimerval timeout;
+ uint64_t tm;
+
+ ret = 0;
+ next_hop = NULL;
+
+ af = set_sockaddr(&dst_addr, &addr_len, &next_hop, param);
+
+ if ((send_sd = socket(af, SOCK_DGRAM, param->proto)) == -1)
+ return (-1);
+ if ((recv_sd = socket(af, SOCK_RAW, (af == AF_INET) ? IPPROTO_ICMP :
+ IPPROTO_ICMPV6)) == -1) {
+ return (-1);
+ }
+
+ /* DSR mode, need to set the next hop */
+ if (next_hop != NULL) {
+ if (af == AF_INET) {
+ if (setsockopt(send_sd, IPPROTO_IP, IP_NEXTHOP,
+ next_hop, sizeof (ipaddr_t)) < 0) {
+ ret = -1;
+ goto out;
+ }
+ } else {
+ if (setsockopt(send_sd, IPPROTO_IPV6, IPV6_NEXTHOP,
+ next_hop, sizeof (struct sockaddr_in6)) < 0) {
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+
+ /*
+ * If ilbd asks us to wait at most t, we will wait for at most
+ * t', which is 3/4 of t. If we wait for too long, ilbd may
+ * timeout and kill us.
+ */
+ timeout.it_interval.tv_sec = 0;
+ timeout.it_interval.tv_usec = 0;
+ tm = (param->timeout * MICROSEC >> 2) * 3;
+ if (tm > MICROSEC) {
+ timeout.it_value.tv_sec = tm / MICROSEC;
+ timeout.it_value.tv_usec = tm - (timeout.it_value.tv_sec *
+ MICROSEC);
+ } else {
+ timeout.it_value.tv_sec = 0;
+ timeout.it_value.tv_usec = tm;
+ }
+ timeout_is_good = B_TRUE;
+ if (setitimer(ITIMER_REAL, &timeout, NULL) != 0) {
+ ret = -1;
+ goto out;
+ }
+
+ if (sendto(send_sd, buf, 0, 0, (struct sockaddr *)&dst_addr,
+ addr_len) != 0) {
+ ret = -1;
+ goto out;
+ }
+ if ((ret = get_lport(send_sd, &param->lport)) != 0)
+ goto out;
+
+ /*
+ * If the server app is listening, we should not get back a
+ * response. So if wait_icmp_reply() returns, either there
+ * is an error or we get back something.
+ */
+ (void) wait_icmp_reply(af, recv_sd, &dst_addr, param);
+ ret = -1;
+
+out:
+ (void) close(send_sd);
+ (void) close(recv_sd);
+ return (ret);
+}
+
+/*
+ * Size (in uint32_t) of the ping packet to be sent to server. It includes
+ * a cookie (random number) + the target port. The cookie and port are used
+ * for matching ping request since there can be many such ping packets sent
+ * to different servers from the same source address and using the same VIP.
+ * The last two bytes are for padding.
+ *
+ */
+#define PING_PKT_LEN \
+ ((ICMP_MINLEN + 2 * sizeof (uint32_t)) / sizeof (uint32_t))
+
+/*
+ * Try to get a random number from the pseudo random number device
+ * /dev/urandom. If there is any error, return (uint32_t)gethrtime()
+ * as a back up.
+ */
+static uint32_t
+get_random(void)
+{
+ int fd;
+ uint32_t num;
+
+ if ((fd = open("/dev/urandom", O_RDONLY)) == -1)
+ return ((uint32_t)gethrtime());
+
+ if (read(fd, &num, sizeof (num)) != sizeof (num))
+ num = ((uint32_t)gethrtime());
+
+ (void) close(fd);
+ return (num);
+}
+
+/*
+ * Use ICMP(v4/v6) echo request to check if the peer server machine is
+ * reachable. Send a echo request and expect to get back a echo reply.
+ */
+static int
+ping_query(probe_param_t *param)
+{
+ int ret;
+ int sd, af;
+ struct sockaddr_storage dst_addr;
+ socklen_t dst_addr_len;
+ void *next_hop;
+ hrtime_t start, end;
+ uint32_t rtt;
+ uint32_t buf[PING_PKT_LEN];
+ struct icmp *icmph;
+
+ ret = 0;
+ next_hop = NULL;
+
+ af = set_sockaddr(&dst_addr, &dst_addr_len, &next_hop, param);
+
+ if ((sd = socket(af, SOCK_RAW, (af == AF_INET) ? IPPROTO_ICMP :
+ IPPROTO_ICMPV6)) == -1) {
+ return (-1);
+ }
+
+ /* DSR mode, need to set the next hop */
+ if (next_hop != NULL) {
+ if (af == AF_INET) {
+ if (setsockopt(sd, IPPROTO_IP, IP_NEXTHOP, next_hop,
+ sizeof (ipaddr_t)) < 0) {
+ ret = -1;
+ goto out;
+ }
+ } else {
+ if (setsockopt(sd, IPPROTO_IPV6, IPV6_NEXTHOP,
+ next_hop, sizeof (struct sockaddr_in6)) < 0) {
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+
+ bzero(buf, sizeof (buf));
+ icmph = (struct icmp *)buf;
+ icmph->icmp_type = af == AF_INET ? ICMP_ECHO : ICMP6_ECHO_REQUEST;
+ icmph->icmp_code = 0;
+ icmph->icmp_cksum = 0;
+ icmph->icmp_id = htons(gethrtime() % USHRT_MAX);
+ icmph->icmp_seq = htons(gethrtime() % USHRT_MAX);
+
+ param->echo_cookie = get_random();
+ bcopy(&param->echo_cookie, icmph->icmp_data,
+ sizeof (param->echo_cookie));
+ bcopy(&param->port, icmph->icmp_data + sizeof (param->echo_cookie),
+ sizeof (param->port));
+ icmph->icmp_cksum = in_cksum((ushort_t *)buf, sizeof (buf));
+ param->echo_id = icmph->icmp_id;
+ param->echo_seq = icmph->icmp_seq;
+
+ timeout_is_good = B_FALSE;
+ (void) alarm(param->timeout);
+ start = gethrtime();
+ if (sendto(sd, buf, sizeof (buf), 0, (struct sockaddr *)&dst_addr,
+ dst_addr_len) != sizeof (buf)) {
+ ret = -1;
+ goto out;
+ }
+ if (wait_icmp_reply(af, sd, &dst_addr, param) != 0) {
+ ret = -1;
+ goto out;
+ }
+ end = gethrtime();
+
+ rtt = (end - start) / (NANOSEC / MICROSEC);
+ if (rtt == 0)
+ rtt = 1;
+ (void) printf("%u", rtt);
+
+out:
+ (void) close(sd);
+ return (ret);
+}
+
+int
+main(int argc, char *argv[])
+{
+ probe_param_t param;
+ int ret;
+
+ /* ilbd should pass in PROG_ARGC parameters. */
+ if (argc != PROG_ARGC) {
+ (void) printf("-1");
+ return (-1);
+ }
+
+ if (signal(SIGALRM, probe_exit) == SIG_ERR) {
+ (void) printf("-1");
+ return (-1);
+ }
+
+ if (!parse_probe_param(argv, &param)) {
+ (void) printf("-1");
+ return (-1);
+ }
+
+ switch (param.probe) {
+ case ping_probe:
+ ret = ping_query(&param);
+ break;
+ case tcp_probe:
+ ret = tcp_query(&param);
+ break;
+ case udp_probe:
+ ret = udp_query(&param);
+ break;
+ }
+
+ if (ret == -1)
+ (void) printf("-1");
+
+ return (ret);
+}
diff --git a/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd.h b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd.h
new file mode 100644
index 0000000000..eda185efab
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd.h
@@ -0,0 +1,435 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+#ifndef _ILBD_H
+#define _ILBD_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <ucred.h>
+#include <pwd.h>
+#include <priv.h>
+#include <stdarg.h>
+#include <syslog.h>
+#include <sys/list.h>
+#include <libscf.h>
+#include <libintl.h>
+#include <locale.h>
+#include <libinetutil.h>
+#include <auth_list.h>
+#include <bsm/adt.h>
+#include <bsm/adt_event.h>
+
+#define SGNAME_SZ 80
+#define ILB_FMRI "svc:/network/loadbalancer/ilb:default"
+
+#define HC_ACTION ILB_SRV_DISABLED_HC
+#define ADMIN_ACTION ILB_SRV_DISABLED_ADMIN
+
+/* Max name and value length for scf properties */
+#define ILBD_MAX_NAME_LEN ilbd_scf_limit(SCF_LIMIT_MAX_NAME_LENGTH)
+#define ILBD_MAX_VALUE_LEN ilbd_scf_limit(SCF_LIMIT_MAX_VALUE_LENGTH)
+
+/* Different events ILBD is interested in. */
+typedef enum {
+ ILBD_EVENT_NEW_REQ, /* New client request */
+ ILBD_EVENT_REQ, /* Client request comes in */
+ ILBD_EVENT_REP_OK, /* Reply channel to client is writeable */
+ ILBD_EVENT_PROBE, /* A HC returns some result */
+ ILBD_EVENT_TIMER /* ilbd_timer_q fired */
+} ilbd_event_t;
+
+typedef enum {
+ ILBD_SCF_RULE, /* prop group for rules */
+ ILBD_SCF_SG, /* prop group for servergroups */
+ ILBD_SCF_HC /* prop group for healthchecks */
+} ilbd_scf_pg_type_t;
+
+typedef enum {
+ ILBD_SCF_CREATE,
+ ILBD_SCF_DESTROY,
+ ILBD_SCF_ENABLE_DISABLE
+} ilbd_scf_cmd_t;
+
+typedef enum {
+ ILBD_STRING, /* string */
+ ILBD_INT, /* int */
+ ILBD_ADDR_V4, /* ipv4 addr */
+ ILBD_ADDR_V6 /* ipv6 addr */
+} ilbd_scf_data_type_t;
+
+typedef enum {
+ stat_enable_server,
+ stat_disable_server,
+ stat_declare_srv_dead,
+ stat_declare_srv_alive
+} ilbd_srv_status_ind_t;
+
+/*
+ * All user struct pointer passed to port_associate() should have the first
+ * field as ilbd_event_t. The following struct can be used to find the
+ * event.
+ */
+typedef struct {
+ ilbd_event_t ev;
+} ilbd_event_obj_t;
+
+typedef struct {
+ ilbd_event_t ev;
+ timer_t timerid;
+} ilbd_timer_event_obj_t;
+
+typedef struct ilbd_srv {
+ list_node_t isv_srv_link;
+ ilb_sg_srv_t isv_srv;
+#define isv_addr isv_srv.sgs_addr
+#define isv_minport isv_srv.sgs_minport
+#define isv_maxport isv_srv.sgs_maxport
+#define isv_flags isv_srv.sgs_flags
+#define isv_id isv_srv.sgs_id
+#define isv_srvID isv_srv.sgs_srvID
+} ilbd_srv_t;
+
+#define MAX_SRVCOUNT 1000
+#define MAX_SRVID (MAX_SRVCOUNT - 1)
+#define BAD_SRVID (-1)
+
+typedef struct ilbd_sg {
+ list_t isg_srvlist; /* list of ilbd_srv_t */
+ char isg_name[ILB_SGNAME_SZ];
+ int32_t isg_srvcount;
+ int32_t isg_max_id;
+ list_t isg_rulelist; /* list of ilbd_rule_t */
+ char isg_id_arr[MAX_SRVCOUNT]; /* for server ID allocation */
+
+ list_node_t isg_link; /* linkage for sg list */
+} ilbd_sg_t;
+
+typedef struct ilbd_rule {
+ list_node_t irl_link;
+ list_node_t irl_sglink;
+ ilbd_sg_t *irl_sg;
+ ilb_rule_info_t irl_info;
+#define irl_flags irl_info.rl_flags
+#define irl_name irl_info.rl_name
+#define irl_vip irl_info.rl_vip
+#define irl_proto irl_info.rl_proto
+#define irl_ipversion irl_info.rl_ipversion
+#define irl_minport irl_info.rl_minport
+#define irl_maxport irl_info.rl_maxport
+#define irl_algo irl_info.rl_algo
+#define irl_topo irl_info.rl_topo
+#define irl_nat_src_start irl_info.rl_nat_src_start
+#define irl_nat_src_end irl_info.rl_nat_src_end
+#define irl_stickymask irl_info.rl_stickymask
+#define irl_conndrain irl_info.rl_conndrain
+#define irl_nat_timeout irl_info.rl_nat_timeout
+#define irl_sticky_timeout irl_info.rl_sticky_timeout
+#define irl_hcport irl_info.rl_hcport
+#define irl_hcpflag irl_info.rl_hcpflag
+#define irl_sgname irl_info.rl_sgname
+#define irl_hcname irl_info.rl_hcname
+} ilbd_rule_t;
+
+/*
+ * Health check related definitions
+ */
+
+/* Default health check probe program provided */
+#define ILB_PROBE_PROTO "/usr/lib/inet/ilb/ilb_probe"
+
+/* Command name (argv[0]) passed to ilb_probe to indicate a ping test */
+#define ILB_PROBE_PING "ilb_ping"
+
+/* Use the first character of the rule's hcname to decide if rule has HC. */
+#define RULE_HAS_HC(irl) ((irl)->irl_info.rl_hcname[0] != '\0')
+
+/* Type of probe test */
+typedef enum {
+ ILBD_HC_PING = 1, /* ICMP Echo probe */
+ ILBD_HC_TCP, /* TCP connect probe */
+ ILBD_HC_UDP, /* UDP packet probe */
+ ILBD_HC_USER /* User supplied probe */
+} ilbd_hc_test_t;
+
+/* Struct representing a hc object in ilbd */
+typedef struct {
+ list_node_t ihc_link; /* List linkage */
+
+ ilb_hc_info_t ihc_info;
+/* Short hand for the fields inside ilb_hc_info_t */
+#define ihc_name ihc_info.hci_name
+#define ihc_test ihc_info.hci_test
+#define ihc_timeout ihc_info.hci_timeout
+#define ihc_count ihc_info.hci_count
+#define ihc_interval ihc_info.hci_interval
+#define ihc_def_ping ihc_info.hci_def_ping
+
+ ilbd_hc_test_t ihc_test_type; /* Type of probe test */
+ int ihc_rule_cnt; /* Num of rules associated with hc */
+ list_t ihc_rules; /* Rules associated with this hc */
+} ilbd_hc_t;
+
+struct ilbd_hc_srv_s;
+
+/*
+ * Struct representing a hc rule object
+ *
+ * hcr_link: list linkage
+ * hcr_rule: pointer to the ilbd rule object
+ * hcr_servers: list of servers of this rule
+ */
+typedef struct {
+ list_node_t hcr_link;
+ ilbd_rule_t const *hcr_rule;
+ list_t hcr_servers;
+} ilbd_hc_rule_t;
+
+struct ilbd_hc_srv_s;
+
+/*
+ * Struct representing a event of the probe process
+ *
+ * ihp_ev: the event type, which is ILBD_EVENT_PROBE
+ * ihp_srv: pointer to the hc server object
+ * ihp_pid: pid of the probe process
+ * ihp_done: is ilbd done reading the output of the probe process
+ */
+typedef struct {
+ ilbd_event_t ihp_ev;
+ struct ilbd_hc_srv_s *ihp_srv;
+ pid_t ihp_pid;
+ boolean_t ihp_done;
+} ilbd_hc_probe_event_t;
+
+/*
+ * ilbd_hc_srv_t state
+ *
+ * ihd_hc_def_pinging: the default ping should be run
+ * ihd-hc_probing: the probe process should be started
+ */
+enum ilbd_hc_state {
+ ilbd_hc_def_pinging,
+ ilbd_hc_probing
+};
+
+/*
+ * Struct representing a server associated with a hc object
+ *
+ * shc_srv_link: list linkage
+ * shc_hc: pointer to the hc object
+ * shc_hc_rule: pointer to the hc rule object
+ * shc_sg_srv: pointer to the server group object
+ * shc_tid: timeout ID
+ * shc_cur_cnt: number of times the hc probe has been run
+ * shc_fail_cnt: number of consecutive probe failure
+ * shc_status: health status
+ * shc_rtt: rtt (in micro sec) to the backend server
+ * shc_lasttimer: last time a probe sequence is executed
+ * shc_nexttime: next time a probe sequence is executed
+ * shc_state: hc probe state
+ * shc_child_pid: pid of the probe process
+ * shc_child_fd: fd to the output of the probe process
+ * shc_ev: event object of the probe process
+ * shc_ev_port: event port of the event object
+ */
+typedef struct ilbd_hc_srv_s {
+ list_node_t shc_srv_link;
+ ilbd_hc_t *shc_hc;
+ ilbd_hc_rule_t *shc_hc_rule;
+ ilb_sg_srv_t const *shc_sg_srv;
+
+ iu_timer_id_t shc_tid;
+ uint_t shc_cur_cnt;
+ uint_t shc_fail_cnt;
+ ilb_hc_srv_status_t shc_status;
+ uint32_t shc_rtt;
+ time_t shc_lasttime;
+ time_t shc_nexttime;
+
+ enum ilbd_hc_state shc_state;
+ pid_t shc_child_pid;
+ int shc_child_fd;
+ ilbd_hc_probe_event_t *shc_ev;
+ int shc_ev_port;
+} ilbd_hc_srv_t;
+
+/*
+ * Structure for holding audit server and servergroup event
+ * data. Not all events use all members of the structure.
+ */
+typedef struct audit_sg_event_data {
+ char *ed_server_address; /* server's IP address */
+ char *ed_serverid; /* serverid. */
+ uint16_t ed_minport; /* server's minport */
+ uint16_t ed_maxport; /* server's maxport */
+ char *ed_sgroup; /* servergroup */
+} audit_sg_event_data_t;
+
+/* Struct to store client info */
+typedef struct {
+ ilbd_event_t cli_ev;
+ int cli_sd;
+ struct passwd cli_pw;
+ size_t cli_pw_bufsz;
+ char *cli_pw_buf;
+ ilbd_cmd_t cli_cmd;
+ ilb_comm_t *cli_saved_reply;
+ size_t cli_saved_size;
+ ucred_t *cli_peer_ucredp; /* needed for auditing */
+} ilbd_client_t;
+
+void ilbd_reply_ok(uint32_t *, size_t *);
+void ilbd_reply_err(uint32_t *, size_t *, ilb_status_t);
+
+ilb_status_t ilbd_check_client_config_auth(const struct passwd *);
+ilb_status_t ilbd_check_client_enable_auth(const struct passwd *);
+ilb_status_t ilbd_retrieve_names(ilbd_cmd_t, uint32_t *, size_t *);
+void i_setup_sg_hlist(void);
+void i_setup_rule_hlist(void);
+void logperror(const char *);
+ilb_status_t ilbd_add_server_to_group(ilb_sg_info_t *, int,
+ const struct passwd *, ucred_t *);
+ilb_status_t ilbd_rem_server_from_group(ilb_sg_info_t *, int,
+ const struct passwd *, ucred_t *);
+ilb_status_t ilbd_create_sg(ilb_sg_info_t *, int,
+ const struct passwd *, ucred_t *);
+
+ilb_status_t ilbd_destroy_sg(const char *, const struct passwd *,
+ ucred_t *);
+ilb_status_t ilbd_retrieve_sg_hosts(const char *, uint32_t *, size_t *);
+
+ilb_status_t ilbd_enable_server(ilb_sg_info_t *, const struct passwd *,
+ ucred_t *);
+ilb_status_t ilbd_disable_server(ilb_sg_info_t *, const struct passwd *,
+ ucred_t *);
+ilb_status_t ilbd_k_Xable_server(const struct in6_addr *, const char *,
+ ilbd_srv_status_ind_t);
+
+ilb_status_t i_add_srv2krules(list_t *, ilb_sg_srv_t *, int);
+ilb_status_t i_rem_srv_frm_krules(list_t *, ilb_sg_srv_t *, int);
+int ilbd_get_num_krules(void);
+ilb_status_t ilbd_get_krule_names(ilbd_namelist_t **, int);
+ilb_status_t ilb_get_krule_servers(ilb_sg_info_t *);
+ilbd_sg_t *i_find_sg_byname(const char *);
+ilb_status_t i_check_srv2rules(list_t *, ilb_sg_srv_t *);
+
+ilb_status_t ilbd_address_to_srvID(ilb_sg_info_t *, uint32_t *, size_t *);
+ilb_status_t ilbd_srvID_to_address(ilb_sg_info_t *, uint32_t *, size_t *);
+
+ilb_status_t do_ioctl(void *, ssize_t);
+
+ilb_status_t ilbd_create_rule(ilb_rule_info_t *, int, const struct passwd *,
+ ucred_t *);
+ilb_status_t ilbd_retrieve_rule(ilbd_name_t, uint32_t *, size_t *);
+
+ilb_status_t ilbd_destroy_rule(ilbd_name_t, const struct passwd *,
+ ucred_t *);
+ilb_status_t ilbd_enable_rule(ilbd_name_t, const struct passwd *, ucred_t *);
+ilb_status_t ilbd_disable_rule(ilbd_name_t, const struct passwd *,
+ ucred_t *);
+
+boolean_t is_debugging_on(void);
+ilb_status_t ilbd_sg_check_rule_port(ilbd_sg_t *, ilb_rule_info_t *);
+
+void ilbd_enable_debug(void);
+ilb_status_t ilb_map_errno2ilbstat(int);
+
+ilb_status_t i_attach_rule2sg(ilbd_sg_t *, ilbd_rule_t *);
+
+/* Logging routine and macros */
+void ilbd_log(int, const char *, ...);
+#define logerr(...) ilbd_log(LOG_ERR, __VA_ARGS__)
+#define logdebug(...) ilbd_log(LOG_DEBUG, __VA_ARGS__)
+
+/* Health check manipulation routines */
+void i_ilbd_setup_hc_list(void);
+ilb_status_t ilbd_create_hc(const ilb_hc_info_t *, int,
+ const struct passwd *, ucred_t *);
+ilb_status_t ilbd_destroy_hc(const char *, const struct passwd *, ucred_t *);
+ilbd_hc_t *ilbd_get_hc(const char *);
+ilb_status_t ilbd_get_hc_info(const char *, uint32_t *, size_t *);
+ilb_status_t ilbd_get_hc_srvs(const char *, uint32_t *, size_t *);
+ilb_status_t ilbd_hc_associate_rule(const ilbd_rule_t *, int);
+ilb_status_t ilbd_hc_dissociate_rule(const ilbd_rule_t *);
+ilb_status_t ilbd_hc_add_server(const ilbd_rule_t *, const ilb_sg_srv_t *,
+ int);
+ilb_status_t ilbd_hc_del_server(const ilbd_rule_t *, const ilb_sg_srv_t *);
+ilb_status_t ilbd_hc_enable_rule(const ilbd_rule_t *);
+ilb_status_t ilbd_hc_disable_rule(const ilbd_rule_t *);
+ilb_status_t ilbd_hc_enable_server(const ilbd_rule_t *,
+ const ilb_sg_srv_t *);
+ilb_status_t ilbd_hc_disable_server(const ilbd_rule_t *,
+ const ilb_sg_srv_t *);
+
+/* Health check timer routines */
+void ilbd_hc_probe_return(int, int, int, ilbd_hc_probe_event_t *);
+void ilbd_hc_timer_init(int, ilbd_timer_event_obj_t *);
+void ilbd_hc_timeout(void);
+void ilbd_hc_timer_update(ilbd_timer_event_obj_t *);
+
+/* Show NAT info routines */
+ilb_status_t ilbd_show_nat(void *, const ilb_comm_t *, uint32_t *,
+ size_t *);
+void ilbd_show_nat_cleanup(void);
+
+
+/* Show sticky info routines */
+ilb_status_t ilbd_show_sticky(void *, const ilb_comm_t *, uint32_t *,
+ size_t *);
+void ilbd_show_sticky_cleanup(void);
+
+ilb_status_t ilbd_create_pg(ilbd_scf_pg_type_t, void *);
+ilb_status_t ilbd_destroy_pg(ilbd_scf_pg_type_t, const char *);
+ilb_status_t ilbd_change_prop(ilbd_scf_pg_type_t, const char *,
+ const char *, void *);
+void ilbd_scf_str_to_ip(int, char *, struct in6_addr *);
+ilb_status_t ilbd_scf_ip_to_str(uint16_t, struct in6_addr *, scf_type_t *,
+ char *);
+ilb_status_t ilbd_scf_add_srv(ilbd_sg_t *, ilbd_srv_t *);
+ilb_status_t ilbd_scf_del_srv(ilbd_sg_t *, ilbd_srv_t *);
+int ilbd_scf_limit(int);
+
+ilb_status_t ilbd_walk_rule_pgs(ilb_status_t (*)(ilb_rule_info_t *, int,
+ const struct passwd *, ucred_t *), void *, void *);
+ilb_status_t ilbd_walk_sg_pgs(ilb_status_t (*)(ilb_sg_info_t *, int,
+ const struct passwd *, ucred_t *), void *, void *);
+ilb_status_t ilbd_walk_hc_pgs(ilb_status_t (*)(const ilb_hc_info_t *, int,
+ const struct passwd *, ucred_t *), void *, void *);
+void ilbd_addr2str(struct in6_addr *, char *, size_t);
+void addr2str(ilb_ip_addr_t, char *, size_t);
+void ilbd_algo_to_str(ilb_algo_t, char *);
+void ilbd_topo_to_str(ilb_topo_t, char *);
+void ilbd_ip_to_str(uint16_t, struct in6_addr *, char *);
+int ilberror2auditerror(ilb_status_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ILBD_H */
diff --git a/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd.xml b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd.xml
new file mode 100644
index 0000000000..567fb9439d
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd.xml
@@ -0,0 +1,115 @@
+<?xml version="1.0"?>
+<!DOCTYPE service_bundle SYSTEM "/usr/share/lib/xml/dtd/service_bundle.dtd.1">
+<!--
+ Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ Use is subject to license terms.
+
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+
+ NOTE: This service manifest is not editable; its contents will
+ be overwritten by package or patch operations, including
+ operating system upgrade. Make customizations in a different
+ file.
+
+ Service manifest for Integrated Load balancer(ILB).
+-->
+
+<service_bundle type='manifest' name='SUNWilbr:ilb'>
+
+<service
+ name='network/loadbalancer/ilb'
+ type='service'
+ version='1'>
+
+ <create_default_instance enabled='false' />
+
+ <single_instance />
+
+ <!--
+ Ensure that name services is enabled before ILB service begins.
+ -->
+ <dependency
+ name='name-services'
+ grouping='require_all'
+ restart_on='error'
+ type='service'>
+ <service_fmri value='svc:/milestone/name-services' />
+ </dependency>
+
+
+ <!--
+ Ensure that forwarding service is enabled before load balancing
+ service begins. The forwarding service is dependent on
+ the filesystem and devices services.
+ -->
+ <dependency
+ name='network'
+ grouping='require_any'
+ restart_on='error'
+ type='service'>
+ <service_fmri value='svc:/network/ipv4-forwarding' />
+ <service_fmri value='svc:/network/ipv6-forwarding' />
+ </dependency>
+
+ <exec_method
+ type='method'
+ name='stop'
+ exec=':kill'
+ timeout_seconds='60' >
+ </exec_method>
+
+ <exec_method
+ type='method'
+ name='start'
+ exec='/usr/lib/inet/ilbd'
+ timeout_seconds='60' >
+ <method_context>
+ <method_credential user='root' group='root' />
+ </method_context>
+ </exec_method>
+
+ <!-- to start stop routing services -->
+ <property_group name='general' type='framework'>
+ <propval name='action_authorization' type='astring'
+ value='solaris.smf.manage.ilb' />
+ <propval name='value_authorization' type='astring'
+ value='solaris.smf.manage.ilb' />
+ </property_group>
+
+
+ <stability value='Unstable' />
+
+ <template>
+
+ <common_name>
+ <loctext xml:lang='C'> Integrated layer 3/4 load balancer
+ </loctext>
+ </common_name>
+ <documentation>
+ <manpage title='ilbd' section='1M'
+ manpath='/usr/share/man' />
+ <manpage title='ilbadm' section='1M'
+ manpath='/usr/share/man' />
+ </documentation>
+
+ </template>
+</service>
+
+</service_bundle>
diff --git a/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_hc.c b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_hc.c
new file mode 100644
index 0000000000..da1ab63efe
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_hc.c
@@ -0,0 +1,1574 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/list.h>
+#include <sys/stropts.h>
+#include <sys/siginfo.h>
+#include <sys/wait.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <strings.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <libilb.h>
+#include <port.h>
+#include <time.h>
+#include <signal.h>
+#include <assert.h>
+#include <errno.h>
+#include <spawn.h>
+#include <fcntl.h>
+#include <limits.h>
+#include "libilb_impl.h"
+#include "ilbd.h"
+
+/* Global list of HC objects */
+list_t ilbd_hc_list;
+
+/* Timer queue for all hc related timers. */
+static iu_tq_t *ilbd_hc_timer_q;
+
+/* Indicate whether the timer needs to be updated */
+static boolean_t hc_timer_restarted;
+
+static void ilbd_hc_probe_timer(iu_tq_t *, void *);
+static ilb_status_t ilbd_hc_restart_timer(ilbd_hc_t *, ilbd_hc_srv_t *);
+static boolean_t ilbd_run_probe(ilbd_hc_srv_t *);
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+/*
+ * Number of arguments passed to a probe. argc[0] is the path name of
+ * the probe.
+ */
+#define HC_PROBE_ARGC 8
+
+/*
+ * Max number of characters to be read from the output of a probe. It
+ * is long enough to read in a 64 bit integer.
+ */
+#define HC_MAX_PROBE_OUTPUT 24
+
+void
+i_ilbd_setup_hc_list(void)
+{
+ list_create(&ilbd_hc_list, sizeof (ilbd_hc_t),
+ offsetof(ilbd_hc_t, ihc_link));
+}
+
+/*
+ * Given a hc object name, return a pointer to hc object if found.
+ */
+ilbd_hc_t *
+ilbd_get_hc(const char *name)
+{
+ ilbd_hc_t *hc;
+
+ for (hc = list_head(&ilbd_hc_list); hc != NULL;
+ hc = list_next(&ilbd_hc_list, hc)) {
+ if (strcasecmp(hc->ihc_name, name) == 0)
+ return (hc);
+ }
+ return (NULL);
+}
+
+/*
+ * Generates an audit record for create-healthcheck,
+ * delete-healtcheck subcommands.
+ */
+static void
+ilbd_audit_hc_event(const char *audit_hcname,
+ const ilb_hc_info_t *audit_hcinfo, ilbd_cmd_t cmd,
+ ilb_status_t rc, ucred_t *ucredp)
+{
+ adt_session_data_t *ah;
+ adt_event_data_t *event;
+ au_event_t flag;
+ int audit_error;
+
+ if ((ucredp == NULL) && (cmd == ILBD_CREATE_HC)) {
+ /*
+ * we came here from the path where ilbd incorporates
+ * the configuration that is listed in SCF:
+ * i_ilbd_read_config->ilbd_walk_hc_pgs->
+ * ->ilbd_scf_instance_walk_pg->ilbd_create_hc
+ * We skip auditing in that case
+ */
+ logdebug("ilbd_audit_hc_event: skipping auditing");
+ return;
+ }
+
+ if (adt_start_session(&ah, NULL, 0) != 0) {
+ logerr("ilbd_audit_hc_event: adt_start_session failed");
+ exit(EXIT_FAILURE);
+ }
+ if (adt_set_from_ucred(ah, ucredp, ADT_NEW) != 0) {
+ (void) adt_end_session(ah);
+ logerr("ilbd_audit_rule_event: adt_set_from_ucred failed");
+ exit(EXIT_FAILURE);
+ }
+ if (cmd == ILBD_CREATE_HC)
+ flag = ADT_ilb_create_healthcheck;
+ else if (cmd == ILBD_DESTROY_HC)
+ flag = ADT_ilb_delete_healthcheck;
+
+ if ((event = adt_alloc_event(ah, flag)) == NULL) {
+ logerr("ilbd_audit_hc_event: adt_alloc_event failed");
+ exit(EXIT_FAILURE);
+ }
+ (void) memset((char *)event, 0, sizeof (adt_event_data_t));
+
+ switch (cmd) {
+ case ILBD_CREATE_HC:
+ event->adt_ilb_create_healthcheck.auth_used =
+ NET_ILB_CONFIG_AUTH;
+ event->adt_ilb_create_healthcheck.hc_test =
+ (char *)audit_hcinfo->hci_test;
+ event->adt_ilb_create_healthcheck.hc_name =
+ (char *)audit_hcinfo->hci_name;
+
+ /*
+ * If the value 0 is stored, the default values are
+ * set in the kernel. User land does not know about them
+ * So if the user does not specify them, audit record
+ * will show them as 0
+ */
+ event->adt_ilb_create_healthcheck.hc_timeout =
+ audit_hcinfo->hci_timeout;
+ event->adt_ilb_create_healthcheck.hc_count =
+ audit_hcinfo->hci_count;
+ event->adt_ilb_create_healthcheck.hc_interval =
+ audit_hcinfo->hci_interval;
+ break;
+ case ILBD_DESTROY_HC:
+ event->adt_ilb_delete_healthcheck.auth_used =
+ NET_ILB_CONFIG_AUTH;
+ event->adt_ilb_delete_healthcheck.hc_name =
+ (char *)audit_hcname;
+ break;
+ }
+
+ /* Fill in success/failure */
+ if (rc == ILB_STATUS_OK) {
+ if (adt_put_event(event, ADT_SUCCESS, ADT_SUCCESS) != 0) {
+ logerr("ilbd_audit_hc_event: adt_put_event failed");
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ audit_error = ilberror2auditerror(rc);
+ if (adt_put_event(event, ADT_FAILURE, audit_error) != 0) {
+ logerr("ilbd_audit_hc_event: adt_put_event failed");
+ exit(EXIT_FAILURE);
+ }
+ }
+ adt_free_event(event);
+ (void) adt_end_session(ah);
+}
+
+/*
+ * Given the ilb_hc_info_t passed in (from the libilb), create a hc object
+ * in ilbd. The parameter ev_port is not used, refer to comments of
+ * ilbd_create_sg() in ilbd_sg.c
+ */
+/* ARGSUSED */
+ilb_status_t
+ilbd_create_hc(const ilb_hc_info_t *hc_info, int ev_port,
+ const struct passwd *ps, ucred_t *ucredp)
+{
+ ilbd_hc_t *hc;
+ ilb_status_t ret = ILB_STATUS_OK;
+
+ /*
+ * ps == NULL is from the daemon when it starts and load configuration
+ * ps != NULL is from client.
+ */
+ if (ps != NULL) {
+ ret = ilbd_check_client_config_auth(ps);
+ if (ret != ILB_STATUS_OK) {
+ ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
+ ret, ucredp);
+ return (ret);
+ }
+ }
+
+ if (hc_info->hci_name[0] == '\0') {
+ logdebug("ilbd_create_hc: missing healthcheck info");
+ ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
+ ILB_STATUS_ENOHCINFO, ucredp);
+ return (ILB_STATUS_ENOHCINFO);
+ }
+
+ hc = ilbd_get_hc(hc_info->hci_name);
+ if (hc != NULL) {
+ logdebug("ilbd_create_hc: healthcheck name %s already"
+ " exists", hc_info->hci_name);
+ ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
+ ILB_STATUS_EEXIST, ucredp);
+ return (ILB_STATUS_EEXIST);
+ }
+
+ /*
+ * Sanity check on user supplied probe. The given path name
+ * must be a full path name (starts with '/') and is
+ * executable.
+ */
+ if (strcasecmp(hc_info->hci_test, ILB_HC_STR_TCP) != 0 &&
+ strcasecmp(hc_info->hci_test, ILB_HC_STR_UDP) != 0 &&
+ strcasecmp(hc_info->hci_test, ILB_HC_STR_PING) != 0 &&
+ (hc_info->hci_test[0] != '/' ||
+ access(hc_info->hci_test, X_OK) == -1)) {
+ if (errno == ENOENT) {
+ logdebug("ilbd_create_hc: user script %s doesn't "
+ "exist", hc_info->hci_test);
+ ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
+ ILB_STATUS_ENOENT, ucredp);
+ return (ILB_STATUS_ENOENT);
+ } else {
+ logdebug("ilbd_create_hc: user script %s is "
+ "invalid", hc_info->hci_test);
+ ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
+ ILB_STATUS_EINVAL, ucredp);
+ return (ILB_STATUS_EINVAL);
+ }
+ }
+
+ /* Create and add the hc object */
+ hc = calloc(1, sizeof (ilbd_hc_t));
+ if (hc == NULL) {
+ ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
+ ILB_STATUS_ENOMEM, ucredp);
+ return (ILB_STATUS_ENOMEM);
+ }
+ (void) memcpy(&hc->ihc_info, hc_info, sizeof (ilb_hc_info_t));
+ if (strcasecmp(hc->ihc_test, ILB_HC_STR_TCP) == 0)
+ hc->ihc_test_type = ILBD_HC_TCP;
+ else if (strcasecmp(hc->ihc_test, ILB_HC_STR_UDP) == 0)
+ hc->ihc_test_type = ILBD_HC_UDP;
+ else if (strcasecmp(hc->ihc_test, ILB_HC_STR_PING) == 0)
+ hc->ihc_test_type = ILBD_HC_PING;
+ else
+ hc->ihc_test_type = ILBD_HC_USER;
+ list_create(&hc->ihc_rules, sizeof (ilbd_hc_rule_t),
+ offsetof(ilbd_hc_rule_t, hcr_link));
+
+ /* Update SCF */
+ if (ps != NULL) {
+ if ((ret = ilbd_create_pg(ILBD_SCF_HC, (void *)hc)) !=
+ ILB_STATUS_OK) {
+ ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
+ ret, ucredp);
+ free(hc);
+ return (ret);
+ }
+ }
+
+ /* Everything is fine, now add it to the global list. */
+ list_insert_tail(&ilbd_hc_list, hc);
+ ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, ret, ucredp);
+ return (ret);
+}
+
+/*
+ * Given a name of a hc object, destroy it.
+ */
+ilb_status_t
+ilbd_destroy_hc(const char *hc_name, const struct passwd *ps,
+ ucred_t *ucredp)
+{
+ ilb_status_t ret;
+ ilbd_hc_t *hc;
+
+ /*
+ * No need to check ps == NULL, daemon won't call any destroy func
+ * at start up.
+ */
+ ret = ilbd_check_client_config_auth(ps);
+ if (ret != ILB_STATUS_OK) {
+ ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC,
+ ret, ucredp);
+ return (ret);
+ }
+
+ hc = ilbd_get_hc(hc_name);
+ if (hc == NULL) {
+ logdebug("ilbd_destroy_hc: healthcheck %s does not exist",
+ hc_name);
+ ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC,
+ ILB_STATUS_ENOENT, ucredp);
+ return (ILB_STATUS_ENOENT);
+ }
+
+ /* If hc is in use, cannot delete it */
+ if (hc->ihc_rule_cnt > 0) {
+ logdebug("ilbd_destroy_hc: healthcheck %s is associated"
+ " with a rule - cannot remove", hc_name);
+ ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC,
+ ILB_STATUS_INUSE, ucredp);
+ return (ILB_STATUS_INUSE);
+ }
+
+ if ((ret = ilbd_destroy_pg(ILBD_SCF_HC, hc_name)) !=
+ ILB_STATUS_OK) {
+ logdebug("ilbd_destroy_hc: cannot destroy healthcheck %s "
+ "property group", hc_name);
+ ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC,
+ ret, ucredp);
+ return (ret);
+ }
+
+ list_remove(&ilbd_hc_list, hc);
+ free(hc);
+ ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, ret, ucredp);
+ return (ret);
+}
+
+/*
+ * Given a hc object name, return its information. Used by libilb to
+ * get hc info.
+ */
+ilb_status_t
+ilbd_get_hc_info(const char *hc_name, uint32_t *rbuf, size_t *rbufsz)
+{
+ ilbd_hc_t *hc;
+ ilb_hc_info_t *hc_info;
+ ilb_comm_t *ic = (ilb_comm_t *)rbuf;
+
+ hc = ilbd_get_hc(hc_name);
+ if (hc == NULL) {
+ logdebug("%s: healthcheck %s does not exist", __func__,
+ hc_name);
+ return (ILB_STATUS_ENOENT);
+ }
+ ilbd_reply_ok(rbuf, rbufsz);
+ hc_info = (ilb_hc_info_t *)&ic->ic_data;
+
+ (void) strlcpy(hc_info->hci_name, hc->ihc_name, sizeof (hc->ihc_name));
+ (void) strlcpy(hc_info->hci_test, hc->ihc_test, sizeof (hc->ihc_test));
+ hc_info->hci_timeout = hc->ihc_timeout;
+ hc_info->hci_count = hc->ihc_count;
+ hc_info->hci_interval = hc->ihc_interval;
+ hc_info->hci_def_ping = hc->ihc_def_ping;
+
+ *rbufsz += sizeof (ilb_hc_info_t);
+
+ return (ILB_STATUS_OK);
+}
+
+static void
+ilbd_hc_copy_srvs(uint32_t *rbuf, size_t *rbufsz, ilbd_hc_rule_t *hc_rule,
+ const char *rulename)
+{
+ ilbd_hc_srv_t *tmp_srv;
+ ilb_hc_srv_t *dst_srv;
+ ilb_hc_rule_srv_t *srvs;
+ size_t tmp_rbufsz;
+ int i;
+
+ tmp_rbufsz = *rbufsz;
+ /* Set up the reply buffer. rbufsz will be set to the new size. */
+ ilbd_reply_ok(rbuf, rbufsz);
+
+ /* Calculate how much space is left for holding server info. */
+ *rbufsz += sizeof (ilb_hc_rule_srv_t);
+ tmp_rbufsz -= *rbufsz;
+
+ srvs = (ilb_hc_rule_srv_t *)&((ilb_comm_t *)rbuf)->ic_data;
+
+ tmp_srv = list_head(&hc_rule->hcr_servers);
+ for (i = 0; tmp_srv != NULL && tmp_rbufsz >= sizeof (*dst_srv); i++) {
+ dst_srv = &srvs->rs_srvs[i];
+
+ (void) strlcpy(dst_srv->hcs_rule_name, rulename, ILB_NAMESZ);
+ (void) strlcpy(dst_srv->hcs_ID, tmp_srv->shc_sg_srv->sgs_srvID,
+ ILB_NAMESZ);
+ (void) strlcpy(dst_srv->hcs_hc_name,
+ tmp_srv->shc_hc->ihc_name, ILB_NAMESZ);
+ dst_srv->hcs_IP = tmp_srv->shc_sg_srv->sgs_addr;
+ dst_srv->hcs_fail_cnt = tmp_srv->shc_fail_cnt;
+ dst_srv->hcs_status = tmp_srv->shc_status;
+ dst_srv->hcs_rtt = tmp_srv->shc_rtt;
+ dst_srv->hcs_lasttime = tmp_srv->shc_lasttime;
+ dst_srv->hcs_nexttime = tmp_srv->shc_nexttime;
+
+ tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv);
+ tmp_rbufsz -= sizeof (*dst_srv);
+ }
+ srvs->rs_num_srvs = i;
+ *rbufsz += i * sizeof (*dst_srv);
+}
+
+/*
+ * Given a rule name, return the hc status of its servers.
+ */
+ilb_status_t
+ilbd_get_hc_srvs(const char *rulename, uint32_t *rbuf, size_t *rbufsz)
+{
+ ilbd_hc_t *hc;
+ ilbd_hc_rule_t *hc_rule;
+
+ for (hc = list_head(&ilbd_hc_list); hc != NULL;
+ hc = list_next(&ilbd_hc_list, hc)) {
+ for (hc_rule = list_head(&hc->ihc_rules); hc_rule != NULL;
+ hc_rule = list_next(&hc->ihc_rules, hc_rule)) {
+ if (strcasecmp(hc_rule->hcr_rule->irl_name,
+ rulename) != 0) {
+ continue;
+ }
+ ilbd_hc_copy_srvs(rbuf, rbufsz, hc_rule, rulename);
+ return (ILB_STATUS_OK);
+ }
+ }
+ return (ILB_STATUS_RULE_NO_HC);
+}
+
+/*
+ * Initialize the hc timer and associate the notification of timeout to
+ * the given event port.
+ */
+void
+ilbd_hc_timer_init(int ev_port, ilbd_timer_event_obj_t *ev_obj)
+{
+ struct sigevent sigev;
+ port_notify_t notify;
+
+ if ((ilbd_hc_timer_q = iu_tq_create()) == NULL) {
+ logerr("%s: cannot create hc timer queue", __func__);
+ exit(EXIT_FAILURE);
+ }
+ hc_timer_restarted = B_FALSE;
+
+ ev_obj->ev = ILBD_EVENT_TIMER;
+ ev_obj->timerid = -1;
+
+ notify.portnfy_port = ev_port;
+ notify.portnfy_user = ev_obj;
+ sigev.sigev_notify = SIGEV_PORT;
+ sigev.sigev_value.sival_ptr = &notify;
+ if (timer_create(CLOCK_REALTIME, &sigev, &ev_obj->timerid) == -1) {
+ logerr("%s: cannot create timer", __func__);
+ exit(EXIT_FAILURE);
+ }
+}
+
+/*
+ * HC timeout handler.
+ */
+void
+ilbd_hc_timeout(void)
+{
+ (void) iu_expire_timers(ilbd_hc_timer_q);
+ hc_timer_restarted = B_TRUE;
+}
+
+/*
+ * Set up the timer to fire at the earliest timeout.
+ */
+void
+ilbd_hc_timer_update(ilbd_timer_event_obj_t *ev_obj)
+{
+ itimerspec_t itimeout;
+ int timeout;
+
+ /*
+ * There is no change on the timer list, so no need to set up the
+ * timer again.
+ */
+ if (!hc_timer_restarted)
+ return;
+
+restart:
+ if ((timeout = iu_earliest_timer(ilbd_hc_timer_q)) == INFTIM) {
+ hc_timer_restarted = B_FALSE;
+ return;
+ } else if (timeout == 0) {
+ /*
+ * Handle the timeout immediately. After that (clearing all
+ * the expired timers), check to see if there are still
+ * timers running. If yes, start them.
+ */
+ (void) iu_expire_timers(ilbd_hc_timer_q);
+ goto restart;
+ }
+
+ itimeout.it_value.tv_sec = timeout / MILLISEC + 1;
+ itimeout.it_value.tv_nsec = 0;
+ itimeout.it_interval.tv_sec = 0;
+ itimeout.it_interval.tv_nsec = 0;
+
+ /*
+ * Failure to set a timeout is "OK" since hopefully there will be
+ * other events and timer_settime() will be called again. So
+ * we will only miss some timeouts. But in the worst case, no event
+ * will happen and ilbd will get stuck...
+ */
+ if (timer_settime(ev_obj->timerid, 0, &itimeout, NULL) == -1)
+ logerr("%s: cannot set timer", __func__);
+ hc_timer_restarted = B_FALSE;
+}
+
+/*
+ * Kill the probe process of a server.
+ */
+static void
+ilbd_hc_kill_probe(ilbd_hc_srv_t *srv)
+{
+ /*
+ * First dissociate the fd from the event port. It should not
+ * fail.
+ */
+ if (port_dissociate(srv->shc_ev_port, PORT_SOURCE_FD,
+ srv->shc_child_fd) != 0) {
+ logdebug("%s: port_dissociate: %s", __func__, strerror(errno));
+ }
+ (void) close(srv->shc_child_fd);
+ free(srv->shc_ev);
+ srv->shc_ev = NULL;
+
+ /* Then kill the probe process. */
+ if (kill(srv->shc_child_pid, SIGKILL) != 0) {
+ logerr("%s: rule %s server %s: %s", __func__,
+ srv->shc_hc_rule->hcr_rule->irl_name,
+ srv->shc_sg_srv->sgs_srvID, strerror(errno));
+ }
+ /* Should not fail... */
+ if (waitpid(srv->shc_child_pid, NULL, 0) != srv->shc_child_pid) {
+ logdebug("%s: waitpid: rule %s server %s", __func__,
+ srv->shc_hc_rule->hcr_rule->irl_name,
+ srv->shc_sg_srv->sgs_srvID);
+ }
+ srv->shc_child_pid = 0;
+}
+
+/*
+ * Disable the server, either because the server is dead or because a timer
+ * cannot be started for this server. Note that this only affects the
+ * transient configuration, meaning only in memory. The persistent
+ * configuration is not affected.
+ */
+static void
+ilbd_mark_server_disabled(ilbd_hc_srv_t *srv)
+{
+ srv->shc_status = ILB_HCS_DISABLED;
+
+ /* Disable the server in kernel. */
+ if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr,
+ srv->shc_hc_rule->hcr_rule->irl_name,
+ stat_declare_srv_dead) != ILB_STATUS_OK) {
+ logerr("%s: cannot disable server in kernel: rule %s "
+ "server %s", __func__,
+ srv->shc_hc_rule->hcr_rule->irl_name,
+ srv->shc_sg_srv->sgs_srvID);
+ }
+}
+
+/*
+ * A probe fails, set the state of the server.
+ */
+static void
+ilbd_set_fail_state(ilbd_hc_srv_t *srv)
+{
+ if (++srv->shc_fail_cnt < srv->shc_hc->ihc_count) {
+ /* Probe again */
+ ilbd_hc_probe_timer(ilbd_hc_timer_q, srv);
+ return;
+ }
+
+ logdebug("%s: rule %s server %s fails %u", __func__,
+ srv->shc_hc_rule->hcr_rule->irl_name, srv->shc_sg_srv->sgs_srvID,
+ srv->shc_fail_cnt);
+
+ /*
+ * If this is a ping test, mark the server as
+ * unreachable instead of dead.
+ */
+ if (srv->shc_hc->ihc_test_type == ILBD_HC_PING ||
+ srv->shc_state == ilbd_hc_def_pinging) {
+ srv->shc_status = ILB_HCS_UNREACH;
+ } else {
+ srv->shc_status = ILB_HCS_DEAD;
+ }
+
+ /* Disable the server in kernel. */
+ if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr,
+ srv->shc_hc_rule->hcr_rule->irl_name, stat_declare_srv_dead) !=
+ ILB_STATUS_OK) {
+ logerr("%s: cannot disable server in kernel: rule %s "
+ "server %s", __func__,
+ srv->shc_hc_rule->hcr_rule->irl_name,
+ srv->shc_sg_srv->sgs_srvID);
+ }
+
+ /* Still keep probing in case the server is alive again. */
+ if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) {
+ /* Only thing to do is to disable the server... */
+ logerr("%s: cannot restart timer: rule %s server %s", __func__,
+ srv->shc_hc_rule->hcr_rule->irl_name,
+ srv->shc_sg_srv->sgs_srvID);
+ srv->shc_status = ILB_HCS_DISABLED;
+ }
+}
+
+/*
+ * A probe process has not returned for the ihc_timeout period, we should
+ * kill it. This function is the handler of this.
+ */
+/* ARGSUSED */
+static void
+ilbd_hc_kill_timer(iu_tq_t *tq, void *arg)
+{
+ ilbd_hc_srv_t *srv = (ilbd_hc_srv_t *)arg;
+
+ ilbd_hc_kill_probe(srv);
+ ilbd_set_fail_state(srv);
+}
+
+/*
+ * Probe timeout handler. Send out the appropriate probe.
+ */
+/* ARGSUSED */
+static void
+ilbd_hc_probe_timer(iu_tq_t *tq, void *arg)
+{
+ ilbd_hc_srv_t *srv = (ilbd_hc_srv_t *)arg;
+
+ /*
+ * If starting the probe fails, just pretend that the timeout has
+ * extended.
+ */
+ if (!ilbd_run_probe(srv)) {
+ /*
+ * If we cannot restart the timer, the only thing we can do
+ * is to disable this server. Hopefully the sys admin will
+ * notice this and enable this server again later.
+ */
+ if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) {
+ logerr("%s: cannot restart timer: rule %s server %s, "
+ "disabling it", __func__,
+ srv->shc_hc_rule->hcr_rule->irl_name,
+ srv->shc_sg_srv->sgs_srvID);
+ ilbd_mark_server_disabled(srv);
+ }
+ return;
+ }
+
+ /*
+ * Similar to above, if kill timer cannot be started, disable the
+ * server.
+ */
+ if ((srv->shc_tid = iu_schedule_timer(ilbd_hc_timer_q,
+ srv->shc_hc->ihc_timeout, ilbd_hc_kill_timer, srv)) == -1) {
+ logerr("%s: cannot start kill timer: rule %s server %s, "
+ "disabling it", __func__,
+ srv->shc_hc_rule->hcr_rule->irl_name,
+ srv->shc_sg_srv->sgs_srvID);
+ ilbd_mark_server_disabled(srv);
+ }
+ hc_timer_restarted = B_TRUE;
+}
+
+/* Restart the periodic timer for a given server. */
+static ilb_status_t
+ilbd_hc_restart_timer(ilbd_hc_t *hc, ilbd_hc_srv_t *srv)
+{
+ int timeout;
+
+ /* Don't allow the timeout interval to be less than 1s */
+ timeout = MAX((hc->ihc_interval >> 1) + (gethrtime() %
+ (hc->ihc_interval + 1)), 1);
+
+ /*
+ * If the probe is actually a ping probe, there is no need to
+ * do default pinging. Just skip the step.
+ */
+ if (hc->ihc_def_ping && hc->ihc_test_type != ILBD_HC_PING)
+ srv->shc_state = ilbd_hc_def_pinging;
+ else
+ srv->shc_state = ilbd_hc_probing;
+ srv->shc_tid = iu_schedule_timer(ilbd_hc_timer_q, timeout,
+ ilbd_hc_probe_timer, srv);
+
+ if (srv->shc_tid == -1)
+ return (ILB_STATUS_TIMER);
+ srv->shc_lasttime = time(NULL);
+ srv->shc_nexttime = time(NULL) + timeout;
+
+ hc_timer_restarted = B_TRUE;
+ return (ILB_STATUS_OK);
+}
+
+/* Helper routine to associate a server with its hc object. */
+static ilb_status_t
+ilbd_hc_srv_add(ilbd_hc_t *hc, ilbd_hc_rule_t *hc_rule,
+ const ilb_sg_srv_t *srv, int ev_port)
+{
+ ilbd_hc_srv_t *new_srv;
+ ilb_status_t ret;
+
+ if ((new_srv = calloc(1, sizeof (ilbd_hc_srv_t))) == NULL)
+ return (ILB_STATUS_ENOMEM);
+ new_srv->shc_hc = hc;
+ new_srv->shc_hc_rule = hc_rule;
+ new_srv->shc_sg_srv = srv;
+ new_srv->shc_ev_port = ev_port;
+ new_srv->shc_tid = -1;
+ new_srv->shc_nexttime = time(NULL);
+ new_srv->shc_lasttime = new_srv->shc_nexttime;
+
+ if ((hc_rule->hcr_rule->irl_flags & ILB_FLAGS_RULE_ENABLED) &&
+ ILB_IS_SRV_ENABLED(srv->sgs_flags)) {
+ new_srv->shc_status = ILB_HCS_UNINIT;
+ ret = ilbd_hc_restart_timer(hc, new_srv);
+ if (ret != ILB_STATUS_OK) {
+ free(new_srv);
+ return (ret);
+ }
+ } else {
+ new_srv->shc_status = ILB_HCS_DISABLED;
+ }
+
+ list_insert_tail(&hc_rule->hcr_servers, new_srv);
+ return (ILB_STATUS_OK);
+}
+
+/* Handy macro to cancel a server's timer. */
+#define HC_CANCEL_TIMER(srv) \
+{ \
+ void *arg; \
+ int ret; \
+ if ((srv)->shc_tid != -1) { \
+ ret = iu_cancel_timer(ilbd_hc_timer_q, (srv)->shc_tid, &arg); \
+ (srv)->shc_tid = -1; \
+ assert(ret == 1); \
+ assert(arg == (srv)); \
+ } \
+ hc_timer_restarted = B_TRUE; \
+}
+
+/* Helper routine to dissociate a server from its hc object. */
+static ilb_status_t
+ilbd_hc_srv_rem(ilbd_hc_rule_t *hc_rule, const ilb_sg_srv_t *srv)
+{
+ ilbd_hc_srv_t *tmp_srv;
+
+ for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL;
+ tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) {
+ if (tmp_srv->shc_sg_srv == srv) {
+ list_remove(&hc_rule->hcr_servers, tmp_srv);
+ HC_CANCEL_TIMER(tmp_srv);
+ if (tmp_srv->shc_child_pid != 0)
+ ilbd_hc_kill_probe(tmp_srv);
+ free(tmp_srv);
+ return (ILB_STATUS_OK);
+ }
+ }
+ return (ILB_STATUS_ENOENT);
+}
+
+/* Helper routine to dissociate all servers of a rule from its hc object. */
+static void
+ilbd_hc_srv_rem_all(ilbd_hc_rule_t *hc_rule)
+{
+ ilbd_hc_srv_t *srv;
+
+ while ((srv = list_remove_head(&hc_rule->hcr_servers)) != NULL) {
+ HC_CANCEL_TIMER(srv);
+ if (srv->shc_child_pid != 0)
+ ilbd_hc_kill_probe(srv);
+ free(srv);
+ }
+}
+
+/* Associate a rule with its hc object. */
+ilb_status_t
+ilbd_hc_associate_rule(const ilbd_rule_t *rule, int ev_port)
+{
+ ilbd_hc_t *hc;
+ ilbd_hc_rule_t *hc_rule;
+ ilb_status_t ret;
+ ilbd_sg_t *sg;
+ ilbd_srv_t *ilbd_srv;
+
+ /* The rule is assumed to be initialized appropriately. */
+ if ((hc = ilbd_get_hc(rule->irl_hcname)) == NULL) {
+ logdebug("ilbd_hc_associate_rule: healthcheck %s does not "
+ "exist", rule->irl_hcname);
+ return (ILB_STATUS_ENOHCINFO);
+ }
+ if ((hc->ihc_test_type == ILBD_HC_TCP &&
+ rule->irl_proto != IPPROTO_TCP) ||
+ (hc->ihc_test_type == ILBD_HC_UDP &&
+ rule->irl_proto != IPPROTO_UDP)) {
+ return (ILB_STATUS_RULE_HC_MISMATCH);
+ }
+ if ((hc_rule = calloc(1, sizeof (ilbd_hc_rule_t))) == NULL) {
+ logdebug("ilbd_hc_associate_rule: out of memory");
+ return (ILB_STATUS_ENOMEM);
+ }
+
+ hc_rule->hcr_rule = rule;
+ list_create(&hc_rule->hcr_servers, sizeof (ilbd_hc_srv_t),
+ offsetof(ilbd_hc_srv_t, shc_srv_link));
+
+ /* Add all the servers. */
+ sg = rule->irl_sg;
+ for (ilbd_srv = list_head(&sg->isg_srvlist); ilbd_srv != NULL;
+ ilbd_srv = list_next(&sg->isg_srvlist, ilbd_srv)) {
+ if ((ret = ilbd_hc_srv_add(hc, hc_rule, &ilbd_srv->isv_srv,
+ ev_port)) != ILB_STATUS_OK) {
+ /* Remove all previously added servers */
+ ilbd_hc_srv_rem_all(hc_rule);
+ free(hc_rule);
+ return (ret);
+ }
+ }
+ list_insert_tail(&hc->ihc_rules, hc_rule);
+ hc->ihc_rule_cnt++;
+
+ return (ILB_STATUS_OK);
+}
+
+/* Dissociate a rule from its hc object. */
+ilb_status_t
+ilbd_hc_dissociate_rule(const ilbd_rule_t *rule)
+{
+ ilbd_hc_t *hc;
+ ilbd_hc_rule_t *hc_rule;
+
+ /* The rule is assumed to be initialized appropriately. */
+ if ((hc = ilbd_get_hc(rule->irl_hcname)) == NULL) {
+ logdebug("ilbd_hc_dissociate_rule: healthcheck %s does not "
+ "exist", rule->irl_hcname);
+ return (ILB_STATUS_ENOENT);
+ }
+ for (hc_rule = list_head(&hc->ihc_rules); hc_rule != NULL;
+ hc_rule = list_next(&hc->ihc_rules, hc_rule)) {
+ if (hc_rule->hcr_rule == rule)
+ break;
+ }
+ if (hc_rule == NULL) {
+ logdebug("ilbd_hc_dissociate_rule: rule %s is not associated "
+ "with healtcheck %s", rule->irl_hcname, hc->ihc_name);
+ return (ILB_STATUS_ENOENT);
+ }
+ ilbd_hc_srv_rem_all(hc_rule);
+ list_remove(&hc->ihc_rules, hc_rule);
+ hc->ihc_rule_cnt--;
+ return (ILB_STATUS_OK);
+}
+
+/*
+ * Given a hc object name and a rule, check to see if the rule is associated
+ * with the hc object. If it is, the hc object is returned in **hc and the
+ * ilbd_hc_rule_t is returned in **hc_rule.
+ */
+static boolean_t
+ilbd_hc_check_rule(const char *hc_name, const ilbd_rule_t *rule,
+ ilbd_hc_t **hc, ilbd_hc_rule_t **hc_rule)
+{
+ ilbd_hc_t *tmp_hc;
+ ilbd_hc_rule_t *tmp_hc_rule;
+
+ if ((tmp_hc = ilbd_get_hc(hc_name)) == NULL)
+ return (B_FALSE);
+ for (tmp_hc_rule = list_head(&tmp_hc->ihc_rules); tmp_hc_rule != NULL;
+ tmp_hc_rule = list_next(&tmp_hc->ihc_rules, tmp_hc_rule)) {
+ if (tmp_hc_rule->hcr_rule == rule) {
+ *hc = tmp_hc;
+ *hc_rule = tmp_hc_rule;
+ return (B_TRUE);
+ }
+ }
+ return (B_FALSE);
+}
+
+/* Associate a server with its hc object. */
+ilb_status_t
+ilbd_hc_add_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv,
+ int ev_port)
+{
+ ilbd_hc_t *hc;
+ ilbd_hc_rule_t *hc_rule;
+
+ if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule))
+ return (ILB_STATUS_ENOENT);
+ return (ilbd_hc_srv_add(hc, hc_rule, srv, ev_port));
+}
+
+/* Dissociate a server from its hc object. */
+ilb_status_t
+ilbd_hc_del_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv)
+{
+ ilbd_hc_t *hc;
+ ilbd_hc_rule_t *hc_rule;
+
+ if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule))
+ return (ILB_STATUS_ENOENT);
+ return (ilbd_hc_srv_rem(hc_rule, srv));
+}
+
+/* Helper routine to enable/disable a server's hc probe. */
+static ilb_status_t
+ilbd_hc_toggle_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv,
+ boolean_t enable)
+{
+ ilbd_hc_t *hc;
+ ilbd_hc_rule_t *hc_rule;
+ ilbd_hc_srv_t *tmp_srv;
+ ilb_status_t ret;
+
+ if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule))
+ return (ILB_STATUS_ENOENT);
+ for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL;
+ tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) {
+ if (tmp_srv->shc_sg_srv != srv) {
+ continue;
+ }
+ if (enable) {
+ if (tmp_srv->shc_status == ILB_HCS_DISABLED) {
+ ret = ilbd_hc_restart_timer(hc, tmp_srv);
+ if (ret != ILB_STATUS_OK) {
+ logerr("%s: cannot start timers for "
+ "rule %s server %s", __func__,
+ rule->irl_name,
+ tmp_srv->shc_sg_srv->sgs_srvID);
+ return (ret);
+ }
+ /* Start from fresh... */
+ tmp_srv->shc_status = ILB_HCS_UNINIT;
+ tmp_srv->shc_rtt = 0;
+ tmp_srv->shc_fail_cnt = 0;
+ }
+ } else {
+ if (tmp_srv->shc_status != ILB_HCS_DISABLED) {
+ tmp_srv->shc_status = ILB_HCS_DISABLED;
+ HC_CANCEL_TIMER(tmp_srv);
+ if (tmp_srv->shc_child_pid != 0)
+ ilbd_hc_kill_probe(tmp_srv);
+ }
+ }
+ return (ILB_STATUS_OK);
+ }
+ return (ILB_STATUS_ENOENT);
+}
+
+ilb_status_t
+ilbd_hc_enable_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv)
+{
+ return (ilbd_hc_toggle_server(rule, srv, B_TRUE));
+}
+
+ilb_status_t
+ilbd_hc_disable_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv)
+{
+ return (ilbd_hc_toggle_server(rule, srv, B_FALSE));
+}
+
+/*
+ * Helper routine to enable/disable a rule's hc probe (including all its
+ * servers).
+ */
+static ilb_status_t
+ilbd_hc_toggle_rule(const ilbd_rule_t *rule, boolean_t enable)
+{
+ ilbd_hc_t *hc;
+ ilbd_hc_rule_t *hc_rule;
+ ilbd_hc_srv_t *tmp_srv;
+ int ret;
+
+ if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule))
+ return (ILB_STATUS_ENOENT);
+
+ for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL;
+ tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) {
+ if (enable) {
+ /*
+ * If the server is disabled in the rule, do not
+ * restart its timer.
+ */
+ if (tmp_srv->shc_status == ILB_HCS_DISABLED &&
+ ILB_IS_SRV_ENABLED(
+ tmp_srv->shc_sg_srv->sgs_flags)) {
+ ret = ilbd_hc_restart_timer(hc, tmp_srv);
+ if (ret != ILB_STATUS_OK) {
+ logerr("%s: cannot start timers for "
+ "rule %s server %s", __func__,
+ rule->irl_name,
+ tmp_srv->shc_sg_srv->sgs_srvID);
+ goto rollback;
+ } else {
+ /* Start from fresh... */
+ tmp_srv->shc_status = ILB_HCS_UNINIT;
+ tmp_srv->shc_rtt = 0;
+ tmp_srv->shc_fail_cnt = 0;
+ }
+ }
+ } else {
+ if (tmp_srv->shc_status != ILB_HCS_DISABLED) {
+ HC_CANCEL_TIMER(tmp_srv);
+ tmp_srv->shc_status = ILB_HCS_DISABLED;
+ if (tmp_srv->shc_child_pid != 0)
+ ilbd_hc_kill_probe(tmp_srv);
+ }
+ }
+ }
+ return (ILB_STATUS_OK);
+rollback:
+ enable = !enable;
+ for (tmp_srv = list_prev(&hc_rule->hcr_servers, tmp_srv);
+ tmp_srv != NULL;
+ tmp_srv = list_prev(&hc_rule->hcr_servers, tmp_srv)) {
+ if (enable) {
+ if (tmp_srv->shc_status == ILB_HCS_DISABLED &&
+ ILB_IS_SRV_ENABLED(
+ tmp_srv->shc_sg_srv->sgs_flags)) {
+ (void) ilbd_hc_restart_timer(hc, tmp_srv);
+ tmp_srv->shc_status = ILB_HCS_UNINIT;
+ tmp_srv->shc_rtt = 0;
+ tmp_srv->shc_fail_cnt = 0;
+ }
+ } else {
+ if (tmp_srv->shc_status != ILB_HCS_DISABLED) {
+ HC_CANCEL_TIMER(tmp_srv);
+ tmp_srv->shc_status = ILB_HCS_DISABLED;
+ if (tmp_srv->shc_child_pid != 0)
+ ilbd_hc_kill_probe(tmp_srv);
+ }
+ }
+ }
+ return (ret);
+}
+
+ilb_status_t
+ilbd_hc_enable_rule(const ilbd_rule_t *rule)
+{
+ return (ilbd_hc_toggle_rule(rule, B_TRUE));
+}
+
+ilb_status_t
+ilbd_hc_disable_rule(const ilbd_rule_t *rule)
+{
+ return (ilbd_hc_toggle_rule(rule, B_FALSE));
+}
+
+static const char *
+topo_2_str(ilb_topo_t topo)
+{
+ switch (topo) {
+ case ILB_TOPO_DSR:
+ return ("DSR");
+ break;
+ case ILB_TOPO_NAT:
+ return ("NAT");
+ break;
+ case ILB_TOPO_HALF_NAT:
+ return ("HALF_NAT");
+ break;
+ default:
+ /* Should not happen. */
+ logerr("%s: unknown topology", __func__);
+ break;
+ }
+ return ("");
+}
+
+/*
+ * Create the argument list to be passed to a hc probe command.
+ * The passed in argv is assumed to have HC_PROBE_ARGC elements.
+ */
+static boolean_t
+create_argv(ilbd_hc_srv_t *srv, char *argv[])
+{
+ char buf[INET6_ADDRSTRLEN];
+ ilbd_rule_t const *rule;
+ ilb_sg_srv_t const *sg_srv;
+ struct in_addr v4_addr;
+ in_port_t port;
+ int i;
+
+ rule = srv->shc_hc_rule->hcr_rule;
+ sg_srv = srv->shc_sg_srv;
+
+ if (srv->shc_state == ilbd_hc_def_pinging) {
+ if ((argv[0] = strdup(ILB_PROBE_PING)) == NULL)
+ return (B_FALSE);
+ } else {
+ switch (srv->shc_hc->ihc_test_type) {
+ case ILBD_HC_USER:
+ if ((argv[0] = strdup(srv->shc_hc->ihc_test)) == NULL)
+ return (B_FALSE);
+ break;
+ case ILBD_HC_TCP:
+ case ILBD_HC_UDP:
+ if ((argv[0] = strdup(ILB_PROBE_PROTO)) ==
+ NULL) {
+ return (B_FALSE);
+ }
+ break;
+ case ILBD_HC_PING:
+ if ((argv[0] = strdup(ILB_PROBE_PING)) == NULL) {
+ return (B_FALSE);
+ }
+ break;
+ }
+ }
+
+ /*
+ * argv[1] is the VIP.
+ *
+ * Right now, the VIP and the backend server addresses should be
+ * in the same IP address family. Here we don't do that in case
+ * this assumption is changed in future.
+ */
+ if (IN6_IS_ADDR_V4MAPPED(&rule->irl_vip)) {
+ IN6_V4MAPPED_TO_INADDR(&rule->irl_vip, &v4_addr);
+ if (inet_ntop(AF_INET, &v4_addr, buf, sizeof (buf)) == NULL)
+ goto cleanup;
+ } else {
+ if (inet_ntop(AF_INET6, &rule->irl_vip, buf,
+ sizeof (buf)) == NULL) {
+ goto cleanup;
+ }
+ }
+ if ((argv[1] = strdup(buf)) == NULL)
+ goto cleanup;
+
+ /*
+ * argv[2] is the backend server address.
+ */
+ if (IN6_IS_ADDR_V4MAPPED(&sg_srv->sgs_addr)) {
+ IN6_V4MAPPED_TO_INADDR(&sg_srv->sgs_addr, &v4_addr);
+ if (inet_ntop(AF_INET, &v4_addr, buf, sizeof (buf)) == NULL)
+ goto cleanup;
+ } else {
+ if (inet_ntop(AF_INET6, &sg_srv->sgs_addr, buf,
+ sizeof (buf)) == NULL) {
+ goto cleanup;
+ }
+ }
+ if ((argv[2] = strdup(buf)) == NULL)
+ goto cleanup;
+
+ /*
+ * argv[3] is the transport protocol used in the rule.
+ */
+ switch (rule->irl_proto) {
+ case IPPROTO_TCP:
+ argv[3] = strdup("TCP");
+ break;
+ case IPPROTO_UDP:
+ argv[3] = strdup("UDP");
+ break;
+ default:
+ logerr("%s: unknown protocol", __func__);
+ goto cleanup;
+ break;
+ }
+ if (argv[3] == NULL)
+ goto cleanup;
+
+ /*
+ * argv[4] is the load balance mode, DSR, NAT, HALF-NAT.
+ */
+ if ((argv[4] = strdup(topo_2_str(rule->irl_topo))) == NULL)
+ goto cleanup;
+
+ /*
+ * argv[5] is the port range. Right now, there should only be 1 port.
+ */
+ switch (rule->irl_hcpflag) {
+ case ILB_HCI_PROBE_FIX:
+ port = ntohs(rule->irl_hcport);
+ break;
+ case ILB_HCI_PROBE_ANY: {
+ in_port_t min, max;
+
+ if (ntohs(sg_srv->sgs_minport) == 0) {
+ min = ntohs(rule->irl_minport);
+ max = ntohs(rule->irl_maxport);
+ } else {
+ min = ntohs(sg_srv->sgs_minport);
+ max = ntohs(sg_srv->sgs_maxport);
+ }
+ if (max > min)
+ port = min + gethrtime() % (max - min + 1);
+ else
+ port = min;
+ break;
+ }
+ default:
+ logerr("%s: unknown HC flag", __func__);
+ goto cleanup;
+ break;
+ }
+ (void) sprintf(buf, "%d", port);
+ if ((argv[5] = strdup(buf)) == NULL)
+ goto cleanup;
+
+ /*
+ * argv[6] is the probe timeout.
+ */
+ (void) sprintf(buf, "%d", srv->shc_hc->ihc_timeout);
+ if ((argv[6] = strdup(buf)) == NULL)
+ goto cleanup;
+
+ argv[7] = NULL;
+ return (B_TRUE);
+
+cleanup:
+ for (i = 0; i < HC_PROBE_ARGC; i++) {
+ if (argv[i] != NULL)
+ free(argv[i]);
+ }
+ return (B_FALSE);
+}
+
+static void
+destroy_argv(char *argv[])
+{
+ int i;
+
+ for (i = 0; argv[i] != NULL; i++)
+ free(argv[i]);
+}
+
+/* Spawn a process to run the hc probe on the given server. */
+static boolean_t
+ilbd_run_probe(ilbd_hc_srv_t *srv)
+{
+ posix_spawn_file_actions_t fd_actions;
+ posix_spawnattr_t attr;
+ sigset_t child_sigset;
+ int fds[2];
+ int fdflags;
+ pid_t pid;
+ char *child_argv[HC_PROBE_ARGC];
+ ilbd_hc_probe_event_t *probe_ev;
+ char *probe_name;
+
+ bzero(child_argv, HC_PROBE_ARGC * sizeof (char *));
+ if ((probe_ev = calloc(1, sizeof (*probe_ev))) == NULL) {
+ logdebug("ilbd_run_probe: calloc");
+ return (B_FALSE);
+ }
+
+ /* Set up a pipe to get output from probe command. */
+ if (pipe(fds) < 0) {
+ logdebug("ilbd_run_probe: cannot create pipe");
+ free(probe_ev);
+ return (B_FALSE);
+ }
+ /* Set our side of the pipe to be non-blocking */
+ if ((fdflags = fcntl(fds[0], F_GETFL, 0)) == -1) {
+ logdebug("ilbd_run_probe: fcntl(F_GETFL)");
+ goto cleanup;
+ }
+ if (fcntl(fds[0], F_SETFL, fdflags | O_NONBLOCK) == -1) {
+ logdebug("ilbd_run_probe: fcntl(F_SETFL)");
+ goto cleanup;
+ }
+
+ if (posix_spawn_file_actions_init(&fd_actions) != 0) {
+ logdebug("ilbd_run_probe: posix_spawn_file_actions_init");
+ goto cleanup;
+ }
+ if (posix_spawnattr_init(&attr) != 0) {
+ logdebug("ilbd_run_probe: posix_spawnattr_init");
+ goto cleanup;
+ }
+ if (posix_spawn_file_actions_addclose(&fd_actions, fds[0]) != 0) {
+ logdebug("ilbd_run_probe: posix_spawn_file_actions_addclose");
+ goto cleanup;
+ }
+ if (posix_spawn_file_actions_adddup2(&fd_actions, fds[1],
+ STDOUT_FILENO) != 0) {
+ logdebug("ilbd_run_probe: posix_spawn_file_actions_dup2");
+ goto cleanup;
+ }
+ if (posix_spawn_file_actions_addclose(&fd_actions, fds[1]) != 0) {
+ logdebug("ilbd_run_probe: posix_spawn_file_actions_addclose");
+ goto cleanup;
+ }
+
+ /* Reset all signal handling of the child to default. */
+ (void) sigfillset(&child_sigset);
+ if (posix_spawnattr_setsigdefault(&attr, &child_sigset) != 0) {
+ logdebug("ilbd_run_probe: posix_spawnattr_setsigdefault");
+ goto cleanup;
+ }
+ /* Don't want SIGCHLD. */
+ if (posix_spawnattr_setflags(&attr, POSIX_SPAWN_NOSIGCHLD_NP|
+ POSIX_SPAWN_SETSIGDEF) != 0) {
+ logdebug("ilbd_run_probe: posix_spawnattr_setflags");
+ goto cleanup;
+ }
+
+ if (!create_argv(srv, child_argv)) {
+ logdebug("ilbd_run_probe: create_argv");
+ goto cleanup;
+ }
+
+ /*
+ * If we are doing default pinging or not using a user supplied
+ * probe, we should execute our standard supplied probe. The
+ * supplied probe command handles all types of probes. And the
+ * type used depends on argv[0], as filled in by create_argv().
+ */
+ if (srv->shc_state == ilbd_hc_def_pinging ||
+ srv->shc_hc->ihc_test_type != ILBD_HC_USER) {
+ probe_name = ILB_PROBE_PROTO;
+ } else {
+ probe_name = srv->shc_hc->ihc_test;
+ }
+ if (posix_spawn(&pid, probe_name, &fd_actions, &attr, child_argv,
+ NULL) != 0) {
+ logerr("%s: posix_spawn: %s for server %s: %s", __func__,
+ srv->shc_hc->ihc_test, srv->shc_sg_srv->sgs_srvID,
+ strerror(errno));
+ goto cleanup;
+ }
+
+ (void) close(fds[1]);
+ destroy_argv(child_argv);
+ srv->shc_child_pid = pid;
+ srv->shc_child_fd = fds[0];
+ srv->shc_ev = probe_ev;
+
+ probe_ev->ihp_ev = ILBD_EVENT_PROBE;
+ probe_ev->ihp_srv = srv;
+ probe_ev->ihp_pid = pid;
+ if (port_associate(srv->shc_ev_port, PORT_SOURCE_FD, fds[0],
+ POLLRDNORM, probe_ev) != 0) {
+ /*
+ * Need to kill the child. It will free the srv->shc_ev,
+ * which is probe_ev. So set probe_ev to NULL.
+ */
+ ilbd_hc_kill_probe(srv);
+ probe_ev = NULL;
+ goto cleanup;
+ }
+
+ return (B_TRUE);
+
+cleanup:
+ (void) close(fds[0]);
+ (void) close(fds[1]);
+ destroy_argv(child_argv);
+ if (probe_ev != NULL)
+ free(probe_ev);
+ return (B_FALSE);
+}
+
+/*
+ * Called by ild_hc_probe_return() to re-associate the fd to a child to
+ * the event port.
+ */
+static void
+reassociate_port(int ev_port, int fd, ilbd_hc_probe_event_t *ev)
+{
+ if (port_associate(ev_port, PORT_SOURCE_FD, fd,
+ POLLRDNORM, ev) != 0) {
+ /*
+ * If we cannot reassociate with the port, the only
+ * thing we can do now is to kill the child and
+ * do a blocking wait here...
+ */
+ logdebug("%s: port_associate: %s", __func__, strerror(errno));
+ if (kill(ev->ihp_pid, SIGKILL) != 0)
+ logerr("%s: kill: %s", __func__, strerror(errno));
+ if (waitpid(ev->ihp_pid, NULL, 0) != ev->ihp_pid)
+ logdebug("%s: waitpid: %s", __func__, strerror(errno));
+ free(ev);
+ }
+}
+
+/*
+ * To handle a child probe process hanging up.
+ */
+static void
+ilbd_hc_child_hup(int ev_port, int fd, ilbd_hc_probe_event_t *ev)
+{
+ ilbd_hc_srv_t *srv;
+ pid_t ret_pid;
+ int ret;
+
+ srv = ev->ihp_srv;
+
+ if (!ev->ihp_done) {
+ /* ilbd does not care about this process anymore ... */
+ ev->ihp_done = B_TRUE;
+ srv->shc_ev = NULL;
+ srv->shc_child_pid = 0;
+ HC_CANCEL_TIMER(srv);
+ ilbd_set_fail_state(srv);
+ }
+ ret_pid = waitpid(ev->ihp_pid, &ret, WNOHANG);
+ switch (ret_pid) {
+ case -1:
+ logperror("ilbd_hc_child_hup: waitpid");
+ /* FALLTHROUGH */
+ case 0:
+ /* The child has not completed the exit. Wait again. */
+ reassociate_port(ev_port, fd, ev);
+ break;
+ default:
+ /* Right now, we just ignore the exit status. */
+ if (WIFEXITED(ret))
+ ret = WEXITSTATUS(ret);
+ (void) close(fd);
+ free(ev);
+ }
+}
+
+/*
+ * To read the output of a child probe process.
+ */
+static void
+ilbd_hc_child_data(int fd, ilbd_hc_probe_event_t *ev)
+{
+ ilbd_hc_srv_t *srv;
+ char buf[HC_MAX_PROBE_OUTPUT];
+ int ret;
+ int64_t rtt;
+
+ srv = ev->ihp_srv;
+
+ bzero(buf, HC_MAX_PROBE_OUTPUT);
+ ret = read(fd, buf, HC_MAX_PROBE_OUTPUT - 1);
+ /* Should not happen since event port should have caught this. */
+ assert(ret > 0);
+
+ /*
+ * We expect the probe command to print out the RTT only. But
+ * the command may misbehave and print out more than what we intend to
+ * read in. So need to do this check below to "flush" out all the
+ * output from the command.
+ */
+ if (!ev->ihp_done) {
+ ev->ihp_done = B_TRUE;
+ /* We don't need to know about this event anymore. */
+ srv->shc_ev = NULL;
+ srv->shc_child_pid = 0;
+ HC_CANCEL_TIMER(srv);
+ } else {
+ return;
+ }
+
+ rtt = strtoll(buf, NULL, 10);
+
+ /*
+ * -1 means the server is dead or the probe somehow fails. Treat
+ * them both as server is dead.
+ */
+ if (rtt == -1) {
+ ilbd_set_fail_state(srv);
+ return;
+ } else if (rtt > 0) {
+ /* If the returned RTT value is not valid, just ignore it. */
+ if (rtt > 0 && rtt <= UINT_MAX) {
+ /* Set rtt to be the simple smoothed average. */
+ if (srv->shc_rtt == 0) {
+ srv->shc_rtt = rtt;
+ } else {
+ srv->shc_rtt = 3 * ((srv)->shc_rtt >> 2) +
+ (rtt >> 2);
+ }
+ }
+
+ }
+
+ switch (srv->shc_state) {
+ case ilbd_hc_def_pinging:
+ srv->shc_state = ilbd_hc_probing;
+
+ /* Ping is OK, now start the probe. */
+ ilbd_hc_probe_timer(ilbd_hc_timer_q, srv);
+ break;
+ case ilbd_hc_probing:
+ srv->shc_fail_cnt = 0;
+
+ /* Server is dead before, re-enable it. */
+ if (srv->shc_status == ILB_HCS_UNREACH ||
+ srv->shc_status == ILB_HCS_DEAD) {
+ /*
+ * If enabling the server in kernel fails now,
+ * hopefully when the timer fires again later, the
+ * enabling can be done.
+ */
+ if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr,
+ srv->shc_hc_rule->hcr_rule->irl_name,
+ stat_declare_srv_alive) != ILB_STATUS_OK) {
+ logerr("%s: cannot enable server in kernel: "
+ " rule %s server %s", __func__,
+ srv->shc_hc_rule->hcr_rule->irl_name,
+ srv->shc_sg_srv->sgs_srvID);
+ } else {
+ srv->shc_status = ILB_HCS_ALIVE;
+ }
+ } else {
+ srv->shc_status = ILB_HCS_ALIVE;
+ }
+ if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) {
+ logerr("%s: cannot restart timer: rule %s server %s",
+ __func__, srv->shc_hc_rule->hcr_rule->irl_name,
+ srv->shc_sg_srv->sgs_srvID);
+ ilbd_mark_server_disabled(srv);
+ }
+ break;
+ default:
+ logdebug("%s: unknown state", __func__);
+ break;
+ }
+}
+
+/*
+ * Handle the return event of a child probe fd.
+ */
+void
+ilbd_hc_probe_return(int ev_port, int fd, int port_events,
+ ilbd_hc_probe_event_t *ev)
+{
+ /*
+ * Note that there can be more than one events delivered to us at
+ * the same time. So we need to check them individually.
+ */
+ if (port_events & POLLRDNORM)
+ ilbd_hc_child_data(fd, ev);
+
+ if (port_events & (POLLHUP|POLLERR)) {
+ ilbd_hc_child_hup(ev_port, fd, ev);
+ return;
+ }
+
+ /*
+ * Re-associate the fd with the port so that when the child
+ * exits, we can reap the status.
+ */
+ reassociate_port(ev_port, fd, ev);
+}
diff --git a/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_main.c b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_main.c
new file mode 100644
index 0000000000..2668f5681f
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_main.c
@@ -0,0 +1,1025 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * The core of ilbd daemon is a single-threaded event loop using
+ * event completion framework; it receives requests from client using
+ * the libilb functions, handles timeouts, initiates health checks, and
+ * populates the kernel state.
+ *
+ * The daemon has the following privileges (in addition to the basic ones):
+ *
+ * PRIV_PROC_OWNER, PRIV_NET_ICMPACCESS,
+ * PRIV_SYS_IP_CONFIG, PRIV_PROC_AUDIT
+ *
+ * The aforementioned privileges will be specified in the SMF manifest.
+ *
+ * AF_UNIX socket is used for IPC between libilb and this daemon as
+ * both processes will run on the same machine.
+ *
+ * To do health check, the daemon will create a timer for every health
+ * check probe. Each of these timers will be associated with the
+ * event port. When a timer goes off, the daemon will initiate a
+ * pipe to a separate process to execute the specific health check
+ * probe. This new process will run with the same user-id as that of
+ * ilbd daemon and will inherit all the privileges from the ilbd
+ * daemon parent process except the following:
+ *
+ * PRIV_PROC_OWNER, PRIV_PROC_AUDIT
+ *
+ * All health checks, will be implemented as external methods
+ * (binary or script). The following arguments will be passed
+ * to external methods:
+ *
+ * $1 VIP (literal IPv4 or IPv6 address)
+ * $2 Server IP (literal IPv4 or IPv6 address)
+ * $3 Protocol (UDP, TCP as a string)
+ * $4 The load balance mode, "DSR", "NAT", "HALF_NAT"
+ * $5 Numeric port range
+ * $6 maximum time (in seconds) the method
+ * should wait before returning failure. If the method runs for
+ * longer, it may be killed, and the test considered failed.
+ *
+ * Upon success, a health check method should print the RTT to the
+ * it finds to its STDOUT for ilbd to consume. The implicit unit
+ * is microseconds but only the number needs to be printed. If it
+ * cannot find the RTT, it should print 0. If the method decides
+ * that the server is dead, it should print -1 to its STDOUT.
+ *
+ * By default, an user-supplied health check probe process will
+ * also run with the same set of privileges as ILB's built-in
+ * probes. If the administrator has an user-supplied health check
+ * program that requires a larger privilege set, he/she will have
+ * to implement setuid program.
+ *
+ * Each health check will have a timeout, such that if the health
+ * check process is hung, it will be killed after the timeout interval
+ * and the daemon will notify the kernel ILB engine of the server's
+ * unresponsiveness, so that load distribution can be appropriately
+ * adjusted. If on the other hand the health check is successful
+ * the timeout timer is cancelled.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <libgen.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <signal.h>
+#include <port.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <sys/note.h>
+#include <sys/resource.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <errno.h>
+#include <ucred.h>
+#include <priv_utils.h>
+#include <net/if.h>
+#include <libilb.h>
+#include <assert.h>
+#include <inet/ilb.h>
+#include <libintl.h>
+#include <fcntl.h>
+#include <rpcsvc/daemon_utils.h>
+#include "libilb_impl.h"
+#include "ilbd.h"
+
+/*
+ * NOTE: The following needs to be kept up to date.
+ */
+#define ILBD_VERSION "1.0"
+#define ILBD_COPYRIGHT \
+ "Copyright 2009 Sun Microsystems, Inc. All rights reserved.\n" \
+ "Use is subject to license terms.\n"
+
+/*
+ * Global reply buffer to client request. Note that ilbd is single threaded,
+ * so a global buffer is OK. If ilbd becomes multi-threaded, this needs to
+ * be changed.
+ */
+static uint32_t reply_buf[ILBD_MSG_SIZE / sizeof (uint32_t)];
+
+static void
+ilbd_free_cli(ilbd_client_t *cli)
+{
+ (void) close(cli->cli_sd);
+ if (cli->cli_cmd == ILBD_SHOW_NAT)
+ ilbd_show_nat_cleanup();
+ if (cli->cli_cmd == ILBD_SHOW_PERSIST)
+ ilbd_show_sticky_cleanup();
+ if (cli->cli_saved_reply != NULL)
+ free(cli->cli_saved_reply);
+ free(cli->cli_pw_buf);
+ free(cli);
+}
+
+static void
+ilbd_reset_kernel_state(void)
+{
+ ilb_status_t rc;
+ ilb_name_cmd_t kcmd;
+
+ kcmd.cmd = ILB_DESTROY_RULE;
+ kcmd.flags = ILB_RULE_ALLRULES;
+ kcmd.name[0] = '\0';
+
+ rc = do_ioctl(&kcmd, 0);
+ if (rc != ILB_STATUS_OK)
+ logdebug("ilbd_reset_kernel_state: do_ioctl failed: %s",
+ strerror(errno));
+}
+
+/* Signal handler to do clean up. */
+/* ARGSUSED */
+static void
+ilbd_cleanup(int sig)
+{
+ (void) remove(SOCKET_PATH);
+ ilbd_reset_kernel_state();
+ exit(0);
+}
+
+/*
+ * Create a socket and return it to caller. If there is a failure, this
+ * function calls exit(2). Hence it always returns a valid listener socket.
+ *
+ * Note that this function is called before ilbd becomes a daemon. So
+ * we call perror(3C) to print out error message directly so that SMF can
+ * catch them.
+ */
+static int
+ilbd_create_client_socket(void)
+{
+ int s;
+ mode_t omask;
+ struct sockaddr_un sa;
+ int sobufsz;
+
+ s = socket(PF_UNIX, SOCK_SEQPACKET, 0);
+ if (s == -1) {
+ perror("ilbd_create_client_socket: socket to"
+ " client failed");
+ exit(errno);
+ }
+ if (fcntl(s, F_SETFD, FD_CLOEXEC) == -1) {
+ perror("ilbd_create_client_socket: fcntl(FD_CLOEXEC)");
+ exit(errno);
+ }
+
+ sobufsz = ILBD_MSG_SIZE;
+ if (setsockopt(s, SOL_SOCKET, SO_SNDBUF, &sobufsz,
+ sizeof (sobufsz)) != 0) {
+ perror("ilbd_creat_client_socket: setsockopt(SO_SNDBUF) "
+ "failed");
+ exit(errno);
+ }
+ if (setsockopt(s, SOL_SOCKET, SO_RCVBUF, &sobufsz,
+ sizeof (sobufsz)) != 0) {
+ perror("ilbd_creat_client_socket: setsockopt(SO_RCVBUF) "
+ "failed");
+ exit(errno);
+ }
+
+ /*
+ * since everybody can talk to us, we need to open up permissions
+ * we check peer privileges on a per-operation basis.
+ * This is no security issue as long as we're single-threaded.
+ */
+ omask = umask(0);
+
+ /* just in case we didn't clean up properly after last exit */
+ (void) remove(SOCKET_PATH);
+
+ bzero(&sa, sizeof (sa));
+ sa.sun_family = AF_UNIX;
+ (void) strlcpy(sa.sun_path, SOCKET_PATH, sizeof (sa.sun_path));
+
+ if (bind(s, (struct sockaddr *)&sa, sizeof (sa)) != 0) {
+ perror("ilbd_create_client_socket(): bind to client"
+ " socket failed");
+ exit(errno);
+ }
+
+ /* re-instate old umask */
+ (void) umask(omask);
+
+#define QLEN 16
+
+ if (listen(s, QLEN) != 0) {
+ perror("ilbd_create_client_socket: listen to client"
+ " socket failed");
+ exit(errno);
+ }
+
+ (void) signal(SIGHUP, SIG_IGN);
+ (void) signal(SIGPIPE, SIG_IGN);
+ (void) signal(SIGSTOP, SIG_IGN);
+ (void) signal(SIGTSTP, SIG_IGN);
+ (void) signal(SIGTTIN, SIG_IGN);
+ (void) signal(SIGTTOU, SIG_IGN);
+
+ (void) signal(SIGINT, ilbd_cleanup);
+ (void) signal(SIGTERM, ilbd_cleanup);
+ (void) signal(SIGQUIT, ilbd_cleanup);
+
+ return (s);
+}
+
+/*
+ * Return the minimum size of a given request. The returned size does not
+ * include the variable part of a request.
+ */
+static size_t
+ilbd_cmd_size(const ilb_comm_t *ic)
+{
+ size_t cmd_sz;
+
+ cmd_sz = sizeof (*ic);
+ switch (ic->ic_cmd) {
+ case ILBD_RETRIEVE_SG_NAMES:
+ case ILBD_RETRIEVE_RULE_NAMES:
+ case ILBD_RETRIEVE_HC_NAMES:
+ case ILBD_CMD_OK:
+ break;
+ case ILBD_CMD_ERROR:
+ cmd_sz += sizeof (ilb_status_t);
+ break;
+ case ILBD_RETRIEVE_SG_HOSTS:
+ case ILBD_CREATE_SERVERGROUP:
+ case ILBD_DESTROY_SERVERGROUP:
+ case ILBD_DESTROY_RULE:
+ case ILBD_ENABLE_RULE:
+ case ILBD_DISABLE_RULE:
+ case ILBD_RETRIEVE_RULE:
+ case ILBD_DESTROY_HC:
+ case ILBD_GET_HC_INFO:
+ case ILBD_GET_HC_SRVS:
+ cmd_sz += sizeof (ilbd_name_t);
+ break;
+ case ILBD_ENABLE_SERVER:
+ case ILBD_DISABLE_SERVER:
+ case ILBD_ADD_SERVER_TO_GROUP:
+ case ILBD_REM_SERVER_FROM_GROUP:
+ cmd_sz += sizeof (ilb_sg_info_t);
+ break;
+ case ILBD_SRV_ADDR2ID:
+ case ILBD_SRV_ID2ADDR:
+ cmd_sz += sizeof (ilb_sg_info_t) + sizeof (ilb_sg_srv_t);
+ break;
+ case ILBD_CREATE_RULE:
+ cmd_sz += sizeof (ilb_rule_info_t);
+ break;
+ case ILBD_CREATE_HC:
+ cmd_sz += sizeof (ilb_hc_info_t);
+ break;
+ case ILBD_SHOW_NAT:
+ case ILBD_SHOW_PERSIST:
+ cmd_sz += sizeof (ilb_show_info_t);
+ break;
+ }
+
+ return (cmd_sz);
+}
+
+/*
+ * Given a request and its size, check that the size is big enough to
+ * contain the variable part of a request.
+ */
+static ilb_status_t
+ilbd_check_req_size(ilb_comm_t *ic, size_t ic_sz)
+{
+ ilb_status_t rc = ILB_STATUS_OK;
+ ilb_sg_info_t *sg_info;
+ ilbd_namelist_t *nlist;
+
+ switch (ic->ic_cmd) {
+ case ILBD_CREATE_SERVERGROUP:
+ case ILBD_ENABLE_SERVER:
+ case ILBD_DISABLE_SERVER:
+ case ILBD_ADD_SERVER_TO_GROUP:
+ case ILBD_REM_SERVER_FROM_GROUP:
+ sg_info = (ilb_sg_info_t *)&ic->ic_data;
+
+ if (ic_sz < ilbd_cmd_size(ic) + sg_info->sg_srvcount *
+ sizeof (ilb_sg_srv_t)) {
+ rc = ILB_STATUS_EINVAL;
+ }
+ break;
+ case ILBD_ENABLE_RULE:
+ case ILBD_DISABLE_RULE:
+ case ILBD_DESTROY_RULE:
+ nlist = (ilbd_namelist_t *)&ic->ic_data;
+
+ if (ic_sz < ilbd_cmd_size(ic) + nlist->ilbl_count *
+ sizeof (ilbd_name_t)) {
+ rc = ILB_STATUS_EINVAL;
+ }
+ break;
+ }
+ return (rc);
+}
+
+/*
+ * this function *relies* on a complete message/data struct
+ * being passed in (currently via the SOCK_SEQPACKET socket type).
+ *
+ * Note that the size of ip is at most ILBD_MSG_SIZE.
+ */
+static ilb_status_t
+consume_common_struct(ilb_comm_t *ic, size_t ic_sz, ilbd_client_t *cli,
+ int ev_port)
+{
+ ilb_status_t rc;
+ struct passwd *ps;
+ size_t rbufsz;
+ ssize_t ret;
+ boolean_t standard_reply = B_TRUE;
+ ilbd_name_t name;
+
+ /*
+ * cli_ev must be overridden during handling of individual commands,
+ * if there's a special need; otherwise, leave this for
+ * the "default" case
+ */
+ cli->cli_ev = ILBD_EVENT_REQ;
+
+ ps = &cli->cli_pw;
+ rbufsz = ILBD_MSG_SIZE;
+
+ /* Sanity check on the size of the static part of a request. */
+ if (ic_sz < ilbd_cmd_size(ic)) {
+ rc = ILB_STATUS_EINVAL;
+ goto out;
+ }
+
+ switch (ic->ic_cmd) {
+ case ILBD_CREATE_SERVERGROUP: {
+ ilb_sg_info_t sg_info;
+
+ /*
+ * ilbd_create_sg() only needs the sg_name field. But it
+ * takes in a ilb_sg_info_t because it is used as a callback
+ * in ilbd_walk_sg_pgs().
+ */
+ (void) strlcpy(sg_info.sg_name, (char *)&(ic->ic_data),
+ sizeof (sg_info.sg_name));
+ rc = ilbd_create_sg(&sg_info, ev_port, ps,
+ cli->cli_peer_ucredp);
+ break;
+ }
+
+ case ILBD_DESTROY_SERVERGROUP:
+ (void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
+ rc = ilbd_destroy_sg(name, ps, cli->cli_peer_ucredp);
+ break;
+
+ case ILBD_ADD_SERVER_TO_GROUP:
+ if ((rc = ilbd_check_req_size(ic, ic_sz)) != ILB_STATUS_OK)
+ break;
+ rc = ilbd_add_server_to_group((ilb_sg_info_t *)&ic->ic_data,
+ ev_port, ps, cli->cli_peer_ucredp);
+ break;
+
+ case ILBD_REM_SERVER_FROM_GROUP:
+ if ((rc = ilbd_check_req_size(ic, ic_sz)) != ILB_STATUS_OK)
+ break;
+ rc = ilbd_rem_server_from_group((ilb_sg_info_t *)&ic->ic_data,
+ ev_port, ps, cli->cli_peer_ucredp);
+ break;
+
+ case ILBD_ENABLE_SERVER:
+ if ((rc = ilbd_check_req_size(ic, ic_sz)) != ILB_STATUS_OK)
+ break;
+ rc = ilbd_enable_server((ilb_sg_info_t *)&ic->ic_data, ps,
+ cli->cli_peer_ucredp);
+ break;
+
+ case ILBD_DISABLE_SERVER:
+ if ((rc = ilbd_check_req_size(ic, ic_sz)) != ILB_STATUS_OK)
+ break;
+ rc = ilbd_disable_server((ilb_sg_info_t *)&ic->ic_data, ps,
+ cli->cli_peer_ucredp);
+ break;
+
+ case ILBD_SRV_ADDR2ID:
+ rc = ilbd_address_to_srvID((ilb_sg_info_t *)&ic->ic_data,
+ reply_buf, &rbufsz);
+ if (rc == ILB_STATUS_OK)
+ standard_reply = B_FALSE;
+ break;
+
+ case ILBD_SRV_ID2ADDR:
+ rc = ilbd_srvID_to_address((ilb_sg_info_t *)&ic->ic_data,
+ reply_buf, &rbufsz);
+ if (rc == ILB_STATUS_OK)
+ standard_reply = B_FALSE;
+ break;
+
+ case ILBD_RETRIEVE_SG_HOSTS:
+ (void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
+ rc = ilbd_retrieve_sg_hosts(name, reply_buf, &rbufsz);
+ if (rc == ILB_STATUS_OK)
+ standard_reply = B_FALSE;
+ break;
+
+ case ILBD_RETRIEVE_SG_NAMES:
+ case ILBD_RETRIEVE_RULE_NAMES:
+ case ILBD_RETRIEVE_HC_NAMES:
+ rc = ilbd_retrieve_names(ic->ic_cmd, reply_buf, &rbufsz);
+ if (rc == ILB_STATUS_OK)
+ standard_reply = B_FALSE;
+ break;
+
+ case ILBD_CREATE_RULE:
+ rc = ilbd_create_rule((ilb_rule_info_t *)&ic->ic_data, ev_port,
+ ps, cli->cli_peer_ucredp);
+ break;
+
+ case ILBD_DESTROY_RULE:
+ /* Copy the name to ensure that name is NULL terminated. */
+ (void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
+ rc = ilbd_destroy_rule(name, ps, cli->cli_peer_ucredp);
+ break;
+
+ case ILBD_ENABLE_RULE:
+ (void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
+ rc = ilbd_enable_rule(name, ps, cli->cli_peer_ucredp);
+ break;
+
+ case ILBD_DISABLE_RULE:
+ (void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
+ rc = ilbd_disable_rule(name, ps, cli->cli_peer_ucredp);
+ break;
+
+ case ILBD_RETRIEVE_RULE:
+ (void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
+ rc = ilbd_retrieve_rule(name, reply_buf, &rbufsz);
+ if (rc == ILB_STATUS_OK)
+ standard_reply = B_FALSE;
+ break;
+
+ case ILBD_CREATE_HC:
+ rc = ilbd_create_hc((ilb_hc_info_t *)&ic->ic_data, ev_port, ps,
+ cli->cli_peer_ucredp);
+ break;
+
+ case ILBD_DESTROY_HC:
+ (void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
+ rc = ilbd_destroy_hc(name, ps, cli->cli_peer_ucredp);
+ break;
+
+ case ILBD_GET_HC_INFO:
+ (void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
+ rc = ilbd_get_hc_info(name, reply_buf, &rbufsz);
+ if (rc == ILB_STATUS_OK)
+ standard_reply = B_FALSE;
+ break;
+
+ case ILBD_GET_HC_SRVS:
+ (void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
+ rc = ilbd_get_hc_srvs(name, reply_buf, &rbufsz);
+ if (rc == ILB_STATUS_OK)
+ standard_reply = B_FALSE;
+ break;
+
+ case ILBD_SHOW_NAT:
+ rc = ilbd_show_nat(cli, ic, reply_buf, &rbufsz);
+ if (rc == ILB_STATUS_OK)
+ standard_reply = B_FALSE;
+ break;
+
+ case ILBD_SHOW_PERSIST:
+ rc = ilbd_show_sticky(cli, ic, reply_buf, &rbufsz);
+ if (rc == ILB_STATUS_OK)
+ standard_reply = B_FALSE;
+ break;
+
+ default:
+ logdebug("consume_common_struct: unknown command");
+ rc = ILB_STATUS_INVAL_CMD;
+ break;
+ }
+
+out:
+ /*
+ * The message exchange is always in pairs, request/response. If
+ * a transaction requires multiple exchanges, the client will send
+ * in multiple requests to get multiple responses. The show-nat and
+ * show-persist request are examples of this. The end of transaction
+ * is marked with ic_flags set to ILB_COMM_END.
+ */
+
+ /* This is the standard reply. */
+ if (standard_reply) {
+ if (rc == ILB_STATUS_OK)
+ ilbd_reply_ok(reply_buf, &rbufsz);
+ else
+ ilbd_reply_err(reply_buf, &rbufsz, rc);
+ }
+
+ if ((ret = send(cli->cli_sd, reply_buf, rbufsz, 0)) != rbufsz) {
+ if (ret == -1) {
+ if (errno != EWOULDBLOCK) {
+ logdebug("consume_common_struct: send: %s",
+ strerror(errno));
+ rc = ILB_STATUS_SEND;
+ goto err_out;
+ }
+ /*
+ * The reply is blocked, save the reply. handle_req()
+ * will associate the event port for the re-send.
+ */
+ assert(cli->cli_saved_reply == NULL);
+ if ((cli->cli_saved_reply = malloc(rbufsz)) == NULL) {
+ /*
+ * Set the error to ILB_STATUS_SEND so that
+ * handle_req() will free the client.
+ */
+ logdebug("consume_common_struct: failure to "
+ "allocate memory to save reply");
+ rc = ILB_STATUS_SEND;
+ goto err_out;
+ }
+ bcopy(reply_buf, cli->cli_saved_reply, rbufsz);
+ cli->cli_saved_size = rbufsz;
+ return (ILB_STATUS_EWOULDBLOCK);
+ }
+ }
+err_out:
+ return (rc);
+}
+
+/*
+ * Accept a new client request. A struct ilbd_client_t is allocated to
+ * store the client info. The accepted socket is port_associate() with
+ * the given port. And the allocated ilbd_client_t struct is passed as
+ * the user pointer.
+ */
+static void
+new_req(int ev_port, int listener, void *ev_obj)
+{
+ struct sockaddr sa;
+ int sa_len;
+ int new_sd;
+ int sflags;
+ ilbd_client_t *cli;
+ int res;
+ uid_t uid;
+
+ sa_len = sizeof (sa);
+ if ((new_sd = accept(listener, &sa, &sa_len)) == -1) {
+ /* don't log if we're out of file descriptors */
+ if (errno != EINTR && errno != EMFILE)
+ logperror("new_req: accept failed");
+ goto done;
+ }
+
+ /* Set the new socket to be non-blocking. */
+ if ((sflags = fcntl(new_sd, F_GETFL, 0)) == -1) {
+ logperror("new_req: fcntl(F_GETFL)");
+ goto clean_up;
+ }
+ if (fcntl(new_sd, F_SETFL, sflags | O_NONBLOCK) == -1) {
+ logperror("new_req: fcntl(F_SETFL)");
+ goto clean_up;
+ }
+ if (fcntl(new_sd, F_SETFD, FD_CLOEXEC) == -1) {
+ logperror("new_req: fcntl(FD_CLOEXEC)");
+ goto clean_up;
+ }
+ if ((cli = calloc(1, sizeof (ilbd_client_t))) == NULL) {
+ logerr("new_req: malloc(ilbd_client_t)");
+ goto clean_up;
+ }
+ res = getpeerucred(new_sd, &cli->cli_peer_ucredp);
+ if (res == -1) {
+ logperror("new_req: getpeerucred failed");
+ goto clean_up;
+ }
+ if ((uid = ucred_getruid(cli->cli_peer_ucredp)) == (uid_t)-1) {
+ logperror("new_req: ucred_getruid failed");
+ goto clean_up;
+ }
+ cli->cli_pw_bufsz = (size_t)sysconf(_SC_GETPW_R_SIZE_MAX);
+ if ((cli->cli_pw_buf = malloc(cli->cli_pw_bufsz)) == NULL) {
+ free(cli);
+ logerr("new_req: malloc(cli_pw_buf)");
+ goto clean_up;
+ }
+ if (getpwuid_r(uid, &cli->cli_pw, cli->cli_pw_buf,
+ cli->cli_pw_bufsz) == NULL) {
+ free(cli->cli_pw_buf);
+ free(cli);
+ logperror("new_req: invalid user");
+ goto clean_up;
+ }
+ cli->cli_ev = ILBD_EVENT_REQ;
+ cli->cli_sd = new_sd;
+ cli->cli_cmd = ILBD_BAD_CMD;
+ cli->cli_saved_reply = NULL;
+ cli->cli_saved_size = 0;
+ if (port_associate(ev_port, PORT_SOURCE_FD, new_sd, POLLRDNORM,
+ cli) == -1) {
+ logperror("new_req: port_associate(cli) failed");
+ free(cli->cli_pw_buf);
+ free(cli);
+clean_up:
+ (void) close(new_sd);
+ }
+
+done:
+ /* Re-associate the listener with the event port. */
+ if (port_associate(ev_port, PORT_SOURCE_FD, listener, POLLRDNORM,
+ ev_obj) == -1) {
+ logperror("new_req: port_associate(listener) failed");
+ exit(1);
+ }
+}
+
+static void
+handle_req(int ev_port, ilbd_event_t event, ilbd_client_t *cli)
+{
+ /* All request should be smaller than ILBD_MSG_SIZE */
+ union {
+ ilb_comm_t ic;
+ uint32_t buf[ILBD_MSG_SIZE / sizeof (uint32_t)];
+ } ic_u;
+ int rc = ILB_STATUS_OK;
+ ssize_t r;
+
+ if (event == ILBD_EVENT_REQ) {
+ /*
+ * Something is wrong with the client since there is a
+ * pending reply, the client should not send us another
+ * request. Kill this client.
+ */
+ if (cli->cli_saved_reply != NULL) {
+ logerr("handle_req: misbehaving client, more than one "
+ "outstanding request");
+ rc = ILB_STATUS_INTERNAL;
+ goto err_out;
+ }
+
+ /*
+ * Our socket is message based so we should be able
+ * to get the request in one single read.
+ */
+ r = recv(cli->cli_sd, (void *)ic_u.buf, sizeof (ic_u.buf), 0);
+ if (r < 0) {
+ if (errno != EINTR) {
+ logperror("handle_req: read failed");
+ rc = ILB_STATUS_READ;
+ goto err_out;
+ }
+ /*
+ * If interrupted, just re-associate the cli_sd
+ * with the port.
+ */
+ goto done;
+ }
+ cli->cli_cmd = ic_u.ic.ic_cmd;
+
+ rc = consume_common_struct(&ic_u.ic, r, cli, ev_port);
+ if (rc == ILB_STATUS_EWOULDBLOCK)
+ goto blocked;
+ /* Fatal error communicating with client, free it. */
+ if (rc == ILB_STATUS_SEND)
+ goto err_out;
+ } else {
+ assert(event == ILBD_EVENT_REP_OK);
+ assert(cli->cli_saved_reply != NULL);
+
+ /*
+ * The reply to client was previously blocked, we will
+ * send again.
+ */
+ if (send(cli->cli_sd, cli->cli_saved_reply,
+ cli->cli_saved_size, 0) != cli->cli_saved_size) {
+ if (errno != EWOULDBLOCK) {
+ logdebug("handle_req: send: %s",
+ strerror(errno));
+ rc = ILB_STATUS_SEND;
+ goto err_out;
+ }
+ goto blocked;
+ }
+ free(cli->cli_saved_reply);
+ cli->cli_saved_reply = NULL;
+ cli->cli_saved_size = 0;
+ }
+done:
+ /* Re-associate with the event port for more requests. */
+ cli->cli_ev = ILBD_EVENT_REQ;
+ if (port_associate(ev_port, PORT_SOURCE_FD, cli->cli_sd,
+ POLLRDNORM, cli) == -1) {
+ logperror("handle_req: port_associate(POLLRDNORM)");
+ rc = ILB_STATUS_INTERNAL;
+ goto err_out;
+ }
+ return;
+
+blocked:
+ /* Re-associate with the event port. */
+ cli->cli_ev = ILBD_EVENT_REP_OK;
+ if (port_associate(ev_port, PORT_SOURCE_FD, cli->cli_sd, POLLWRNORM,
+ cli) == -1) {
+ logperror("handle_req: port_associate(POLLWRNORM)");
+ rc = ILB_STATUS_INTERNAL;
+ goto err_out;
+ }
+ return;
+
+err_out:
+ ilbd_free_cli(cli);
+}
+
+static void
+i_ilbd_read_config(int ev_port)
+{
+ logdebug("i_ilbd_read_config: port %d", ev_port);
+ (void) ilbd_walk_sg_pgs(ilbd_create_sg, &ev_port, NULL);
+ (void) ilbd_walk_hc_pgs(ilbd_create_hc, &ev_port, NULL);
+ (void) ilbd_walk_rule_pgs(ilbd_create_rule, &ev_port, NULL);
+}
+
+/*
+ * main event loop for ilbd
+ * asserts that argument 'listener' is a server socket ready to accept() on.
+ */
+static void
+main_loop(int listener)
+{
+ port_event_t p_ev;
+ int ev_port, ev_port_obj;
+ ilbd_event_obj_t ev_obj;
+ ilbd_timer_event_obj_t timer_ev_obj;
+
+ ev_port = port_create();
+ if (ev_port == -1) {
+ logperror("main_loop: port_create failed");
+ exit(-1);
+ }
+ ilbd_hc_timer_init(ev_port, &timer_ev_obj);
+
+ ev_obj.ev = ILBD_EVENT_NEW_REQ;
+ if (port_associate(ev_port, PORT_SOURCE_FD, listener, POLLRDNORM,
+ &ev_obj) == -1) {
+ logperror("main_loop: port_associate failed");
+ exit(1);
+ }
+
+ i_ilbd_read_config(ev_port);
+ ilbd_hc_timer_update(&timer_ev_obj);
+
+ _NOTE(CONSTCOND)
+ while (B_TRUE) {
+ int r;
+ ilbd_event_t event;
+ ilbd_client_t *cli;
+
+ r = port_get(ev_port, &p_ev, NULL);
+ if (r == -1) {
+ if (errno == EINTR)
+ continue;
+ logperror("main_loop: port_get failed");
+ break;
+ }
+
+ ev_port_obj = p_ev.portev_object;
+ event = ((ilbd_event_obj_t *)p_ev.portev_user)->ev;
+
+ switch (event) {
+ case ILBD_EVENT_TIMER:
+ ilbd_hc_timeout();
+ break;
+
+ case ILBD_EVENT_PROBE:
+ ilbd_hc_probe_return(ev_port, ev_port_obj,
+ p_ev.portev_events,
+ (ilbd_hc_probe_event_t *)p_ev.portev_user);
+ break;
+
+ case ILBD_EVENT_NEW_REQ:
+ assert(ev_port_obj == listener);
+ /*
+ * An error happens in the listener. Exit
+ * for now....
+ */
+ if (p_ev.portev_events & (POLLHUP|POLLERR)) {
+ logerr("main_loop: listener error");
+ exit(1);
+ }
+ new_req(ev_port, ev_port_obj, &ev_obj);
+ break;
+
+ case ILBD_EVENT_REP_OK:
+ case ILBD_EVENT_REQ:
+ cli = (ilbd_client_t *)p_ev.portev_user;
+ assert(ev_port_obj == cli->cli_sd);
+
+ /*
+ * An error happens in the newly accepted
+ * client request. Clean up the client.
+ * this also happens when client closes socket,
+ * so not necessarily a reason for alarm
+ */
+ if (p_ev.portev_events & (POLLHUP|POLLERR)) {
+ ilbd_free_cli(cli);
+ break;
+ }
+
+ handle_req(ev_port, event, cli);
+ break;
+
+ default:
+ logerr("main_loop: unknown event %d", event);
+ exit(EXIT_FAILURE);
+ break;
+ }
+
+ ilbd_hc_timer_update(&timer_ev_obj);
+ }
+}
+
+static void
+i_ilbd_setup_lists(void)
+{
+ i_setup_sg_hlist();
+ i_setup_rule_hlist();
+ i_ilbd_setup_hc_list();
+}
+
+/*
+ * Usage message - call only during startup. it will print its
+ * message on stderr and exit
+ */
+static void
+Usage(char *name)
+{
+ (void) fprintf(stderr, gettext("Usage: %s [-d|--debug]\n"), name);
+ exit(1);
+}
+
+static void
+print_version(char *name)
+{
+ (void) printf("%s %s\n", basename(name), ILBD_VERSION);
+ (void) printf(gettext(ILBD_COPYRIGHT));
+ exit(0);
+}
+
+/*
+ * Increase the file descriptor limit for handling a lot of health check
+ * processes (each requires a pipe).
+ *
+ * Note that this function is called before ilbd becomes a daemon. So
+ * we call perror(3C) to print out error message directly so that SMF
+ * can catch them.
+ */
+static void
+set_rlim(void)
+{
+ struct rlimit rlp;
+
+ if (getrlimit(RLIMIT_NOFILE, &rlp) == -1) {
+ perror("ilbd: getrlimit");
+ exit(errno);
+ }
+ rlp.rlim_cur = rlp.rlim_max;
+ if (setrlimit(RLIMIT_NOFILE, &rlp) == -1) {
+ perror("ilbd: setrlimit");
+ exit(errno);
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ int s;
+ int c;
+
+ (void) setlocale(LC_ALL, "");
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+ static const char daemon_dir[] = DAEMON_DIR;
+
+ (void) textdomain(TEXT_DOMAIN);
+
+ while ((c = getopt(argc, argv, ":V?d(debug)")) != -1) {
+ switch ((char)c) {
+ case '?': Usage(argv[0]);
+ /* not reached */
+ break;
+ case 'V': print_version(argv[0]);
+ /* not reached */
+ break;
+ case 'd': ilbd_enable_debug();
+ break;
+ default: Usage(argv[0]);
+ /* not reached */
+ break;
+ }
+ }
+
+ /*
+ * Whenever the daemon starts, it needs to start with a clean
+ * slate in the kernel. We need sys_ip_config privilege for
+ * this.
+ */
+ ilbd_reset_kernel_state();
+
+ /* Increase the limit on the number of file descriptors. */
+ set_rlim();
+
+ /*
+ * ilbd daemon starts off as root, just so it can create
+ * /var/run/daemon if one does not exist. After that is done
+ * the daemon switches to "daemon" uid. This is similar to what
+ * rpcbind does.
+ */
+ if (mkdir(daemon_dir, DAEMON_DIR_MODE) == 0 || errno == EEXIST) {
+ (void) chmod(daemon_dir, DAEMON_DIR_MODE);
+ (void) chown(daemon_dir, DAEMON_UID, DAEMON_GID);
+ } else {
+ perror("main: mkdir failed");
+ exit(errno);
+ }
+ /*
+ * Now lets switch ilbd as uid = daemon, gid = daemon with a
+ * trimmed down privilege set
+ */
+ if (__init_daemon_priv(PU_RESETGROUPS | PU_LIMITPRIVS | PU_INHERITPRIVS,
+ DAEMON_UID, DAEMON_GID, PRIV_PROC_OWNER, PRIV_PROC_AUDIT,
+ PRIV_NET_ICMPACCESS, PRIV_SYS_IP_CONFIG, NULL) == -1) {
+ (void) fprintf(stderr, "Insufficient privileges\n");
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ * Opens a PF_UNIX socket to the client. No privilege needed
+ * for this.
+ */
+ s = ilbd_create_client_socket();
+
+ /*
+ * Daemonify if ilbd is not running with -d option
+ * Need proc_fork privilege for this
+ */
+ if (!is_debugging_on()) {
+ logdebug("daemonizing...");
+ if (daemon(0, 0) != 0) {
+ logperror("daemon failed");
+ exit(EXIT_FAILURE);
+ }
+ }
+ (void) priv_set(PRIV_OFF, PRIV_INHERITABLE, PRIV_PROC_OWNER,
+ PRIV_PROC_AUDIT, NULL);
+
+ /* if daemonified then set up syslog */
+ if (!is_debugging_on())
+ openlog("ilbd", LOG_PID, LOG_DAEMON);
+
+ i_ilbd_setup_lists();
+
+ main_loop(s);
+
+ /*
+ * if we come here, then we experienced an error or a shutdown
+ * indicator, so clean up after ourselves.
+ */
+ logdebug("main(): terminating");
+
+ (void) remove(SOCKET_PATH);
+ ilbd_reset_kernel_state();
+
+ return (0);
+}
diff --git a/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_nat.c b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_nat.c
new file mode 100644
index 0000000000..cdb114c795
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_nat.c
@@ -0,0 +1,295 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <libilb.h>
+#include <inet/ilb.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <errno.h>
+#include <assert.h>
+#include <macros.h>
+#include "libilb_impl.h"
+#include "ilbd.h"
+
+/*
+ * We only allow one show nat/persist command running at any time. Note that
+ * there is no lock for this since ilbd is single threaded. And we only care
+ * about the pointer value of client, not its type.
+ *
+ * The following variables store the current client making the request.
+ */
+static void *nat_cur_cli;
+static void *sticky_cur_cli;
+
+/* Maximum number of NAT/sticky entries to request from kernel. */
+#define NUM_ENTRIES 500
+
+/*
+ * Clear the current requesting client. This will allow a new client
+ * to make a request.
+ */
+void
+ilbd_show_nat_cleanup(void)
+{
+ nat_cur_cli = NULL;
+}
+
+void
+ilbd_show_sticky_cleanup(void)
+{
+ sticky_cur_cli = NULL;
+}
+
+/*
+ * To show the kernel NAT table.
+ *
+ * cli: the client pointer making the request.
+ * ic: the client request.
+ * rbuf: reply buffer to be filled in.
+ * rbufsz: reply buffer size.
+ */
+ilb_status_t
+ilbd_show_nat(void *cli, const ilb_comm_t *ic, uint32_t *rbuf, size_t *rbufsz)
+{
+ ilb_show_info_t *req_si = (ilb_show_info_t *)&ic->ic_data;
+ ilb_list_nat_cmd_t *kcmd;
+ boolean_t start;
+ size_t tmp_rbufsz, kbufsz;
+ uint32_t max_num;
+ ilb_status_t ret;
+ int i;
+ ilb_show_info_t *reply;
+ ilb_nat_info_t *nat_ret;
+
+ /* For new client request, start from the beginning of the table. */
+ if (nat_cur_cli == NULL) {
+ nat_cur_cli = cli;
+ start = B_TRUE;
+ } else if (cli == nat_cur_cli) {
+ /*
+ * Another request from client. If the client does not
+ * want to continue, reset the current client and reply OK.
+ */
+ if (ic->ic_flags & ILB_COMM_END) {
+ ilbd_show_nat_cleanup();
+ ilbd_reply_ok(rbuf, rbufsz);
+ return (ILB_STATUS_OK);
+ }
+ start = B_FALSE;
+ } else {
+ /* A request is on-going, so reject a new client. */
+ return (ILB_STATUS_INPROGRESS);
+ }
+
+ tmp_rbufsz = *rbufsz;
+ ilbd_reply_ok(rbuf, rbufsz);
+ reply = (ilb_show_info_t *)&((ilb_comm_t *)rbuf)->ic_data;
+
+ /*
+ * Calculate the max number of ilb_nat_info_t can be fitted in the
+ * reply.
+ */
+ *rbufsz += sizeof (ilb_show_info_t *);
+ tmp_rbufsz -= *rbufsz;
+ max_num = tmp_rbufsz / sizeof (ilb_nat_info_t);
+
+ /*
+ * Calculate the exact number of entries we should request from kernel.
+ */
+ max_num = min(req_si->sn_num, min(NUM_ENTRIES, max_num));
+
+ kbufsz = max_num * sizeof (ilb_nat_entry_t) +
+ offsetof(ilb_list_nat_cmd_t, entries);
+ if ((kcmd = malloc(kbufsz)) == NULL) {
+ logdebug("ilbd_show_nat: malloc(cmd)");
+ ilbd_reply_err(rbuf, rbufsz, ILB_STATUS_ENOMEM);
+ return (ILB_STATUS_ENOMEM);
+ }
+
+ kcmd->cmd = ILB_LIST_NAT_TABLE;
+ kcmd->flags = start ? ILB_LIST_BEGIN : ILB_LIST_CONT;
+ kcmd->num_nat = max_num;
+ if ((ret = do_ioctl(kcmd, kbufsz)) != ILB_STATUS_OK) {
+ logperror("ilbd_show_nat: ioctl(ILB_LIST_NAT_TABLE)");
+ ilbd_reply_err(rbuf, rbufsz, ret);
+ free(kcmd);
+ return (ret);
+ }
+
+ reply->sn_num = kcmd->num_nat;
+ *rbufsz += reply->sn_num * sizeof (ilb_nat_info_t);
+
+ /*
+ * It is the end of table, let the client know. And the transaction
+ * is done.
+ */
+ if (kcmd->flags & ILB_LIST_END) {
+ nat_cur_cli = NULL;
+ } else {
+ /*
+ * ilbd_reply_ok() sets ic_flags to ILB_COMM_END by default.
+ * Need to clear it here.
+ */
+ ((ilb_comm_t *)rbuf)->ic_flags = 0;
+ }
+
+ nat_ret = (ilb_nat_info_t *)&reply->sn_data;
+
+ for (i = 0; i < kcmd->num_nat; i++) {
+ ilb_nat_entry_t *nat;
+
+ nat = &kcmd->entries[i];
+
+ nat_ret->nat_proto = nat->proto;
+
+ nat_ret->nat_in_local = nat->in_local;
+ nat_ret->nat_in_global = nat->in_global;
+ nat_ret->nat_out_local = nat->out_local;
+ nat_ret->nat_out_global = nat->out_global;
+
+ nat_ret->nat_in_local_port = nat->in_local_port;
+ nat_ret->nat_in_global_port = nat->in_global_port;
+ nat_ret->nat_out_local_port = nat->out_local_port;
+ nat_ret->nat_out_global_port = nat->out_global_port;
+
+ nat_ret++;
+ }
+
+end:
+ free(kcmd);
+ return (ret);
+}
+
+/*
+ * To show the kernel sticky table.
+ *
+ * cli: the client pointer making the request.
+ * req_si: information about the show-persist request.
+ * rbuf: reply buffer to be filled in.
+ * rbufsz: reply buffer size.
+ */
+ilb_status_t
+ilbd_show_sticky(void *cli, const ilb_comm_t *ic, uint32_t *rbuf,
+ size_t *rbufsz)
+{
+ ilb_show_info_t *req_si = (ilb_show_info_t *)&ic->ic_data;
+ ilb_list_sticky_cmd_t *kcmd;
+ boolean_t start;
+ size_t tmp_rbufsz, kbufsz;
+ uint32_t max_num;
+ ilb_status_t ret;
+ int i;
+ ilb_show_info_t *reply;
+ ilb_persist_info_t *st_ret;
+
+ /* For new client request, start from the beginning of the table. */
+ if (sticky_cur_cli == NULL) {
+ sticky_cur_cli = cli;
+ start = B_TRUE;
+ } else if (cli == sticky_cur_cli) {
+ /*
+ * Another request from client. If the client does not
+ * want to continue, reset the current client and reply OK.
+ */
+ if (ic->ic_flags & ILB_COMM_END) {
+ ilbd_show_sticky_cleanup();
+ ilbd_reply_ok(rbuf, rbufsz);
+ return (ILB_STATUS_OK);
+ }
+ start = B_FALSE;
+ } else {
+ /* A request is on-going, so reject a new client. */
+ return (ILB_STATUS_INPROGRESS);
+ }
+
+ tmp_rbufsz = *rbufsz;
+ ilbd_reply_ok(rbuf, rbufsz);
+ reply = (ilb_show_info_t *)&((ilb_comm_t *)rbuf)->ic_data;
+
+ /*
+ * Calculate the max number of ilb_persist_info_t can be fitted in the
+ * reply.
+ */
+ *rbufsz += sizeof (ilb_show_info_t *);
+ tmp_rbufsz -= *rbufsz;
+ max_num = tmp_rbufsz / sizeof (ilb_persist_info_t);
+
+ /*
+ * Calculate the exact number of entries we should request from kernel.
+ */
+ max_num = min(req_si->sn_num, min(NUM_ENTRIES, max_num));
+
+ kbufsz = max_num * sizeof (ilb_sticky_entry_t) +
+ offsetof(ilb_list_sticky_cmd_t, entries);
+ if ((kcmd = malloc(kbufsz)) == NULL) {
+ logdebug("ilbd_show_nat: malloc(cmd)");
+ ilbd_reply_err(rbuf, rbufsz, ILB_STATUS_ENOMEM);
+ return (ILB_STATUS_ENOMEM);
+ }
+
+ kcmd->cmd = ILB_LIST_STICKY_TABLE;
+ kcmd->flags = start ? ILB_LIST_BEGIN : ILB_LIST_CONT;
+ kcmd->num_sticky = max_num;
+ if ((ret = do_ioctl(kcmd, kbufsz)) != ILB_STATUS_OK) {
+ logperror("ilbd_show_nat: ioctl(ILB_LIST_STICKY_TABLE)");
+ ilbd_reply_err(rbuf, rbufsz, ret);
+ free(kcmd);
+ return (ret);
+ }
+
+ reply->sn_num = kcmd->num_sticky;
+ *rbufsz += reply->sn_num * sizeof (ilb_persist_info_t);
+
+ if (kcmd->flags & ILB_LIST_END) {
+ sticky_cur_cli = NULL;
+ } else {
+ /*
+ * ilbd_reply_ok() sets ic_flags to ILB_COMM_END by default.
+ * Need to clear it here.
+ */
+ ((ilb_comm_t *)rbuf)->ic_flags = 0;
+ }
+
+ st_ret = (ilb_persist_info_t *)&reply->sn_data;
+
+ for (i = 0; i < kcmd->num_sticky; i++) {
+ ilb_sticky_entry_t *st;
+
+ st = &kcmd->entries[i];
+
+ (void) strlcpy(st_ret->persist_rule_name, st->rule_name,
+ ILB_NAMESZ);
+ st_ret->persist_req_addr = st->req_addr;
+ st_ret->persist_srv_addr = st->srv_addr;
+ st_ret++;
+ }
+
+end:
+ free(kcmd);
+ return (ret);
+}
diff --git a/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_rules.c b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_rules.c
new file mode 100644
index 0000000000..7a67c03283
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_rules.c
@@ -0,0 +1,1357 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <stdlib.h>
+#include <strings.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sys/list.h>
+#include <net/if.h>
+#include <assert.h>
+#include <errno.h>
+#include <libintl.h>
+#include <libilb.h>
+#include <inet/ilb.h>
+#include "libilb_impl.h"
+#include "ilbd.h"
+
+/* until we all use AF_* macros ... */
+#define AF_2_IPPROTO(_af) (_af == AF_INET)?IPPROTO_IP:IPPROTO_IPV6
+#define IPPROTO_2_AF(_i) (_i == IPPROTO_IP)?AF_INET:AF_INET6
+
+static ilb_status_t ilbd_disable_one_rule(ilbd_rule_t *, boolean_t);
+static uint32_t i_flags_d2k(int);
+
+#define ILB_SGSRV_2_KSRV(s, k) \
+ (k)->addr = (s)->sgs_addr; \
+ (k)->min_port = (s)->sgs_minport; \
+ (k)->max_port = (s)->sgs_maxport; \
+ (k)->flags = i_flags_d2k((s)->sgs_flags); \
+ (k)->err = 0; \
+ (void) strlcpy((k)->name, (s)->sgs_srvID, sizeof ((k)->name))
+
+list_t ilbd_rule_hlist;
+
+static ilb_algo_t
+algo_impl2lib(ilb_algo_impl_t a)
+{
+ switch (a) {
+ case ILB_ALG_IMPL_ROUNDROBIN:
+ return (ILB_ALG_ROUNDROBIN);
+ case ILB_ALG_IMPL_HASH_IP:
+ return (ILB_ALG_HASH_IP);
+ case ILB_ALG_IMPL_HASH_IP_SPORT:
+ return (ILB_ALG_HASH_IP_SPORT);
+ case ILB_ALG_IMPL_HASH_IP_VIP:
+ return (ILB_ALG_HASH_IP_VIP);
+ }
+ return (0);
+}
+
+static ilb_topo_t
+topo_impl2lib(ilb_topo_impl_t t)
+{
+ switch (t) {
+ case ILB_TOPO_IMPL_DSR:
+ return (ILB_TOPO_DSR);
+ case ILB_TOPO_IMPL_NAT:
+ return (ILB_TOPO_NAT);
+ case ILB_TOPO_IMPL_HALF_NAT:
+ return (ILB_TOPO_HALF_NAT);
+ }
+ return (0);
+}
+
+ilb_algo_impl_t
+algo_lib2impl(ilb_algo_t a)
+{
+ switch (a) {
+ case ILB_ALG_ROUNDROBIN:
+ return (ILB_ALG_IMPL_ROUNDROBIN);
+ case ILB_ALG_HASH_IP:
+ return (ILB_ALG_IMPL_HASH_IP);
+ case ILB_ALG_HASH_IP_SPORT:
+ return (ILB_ALG_IMPL_HASH_IP_SPORT);
+ case ILB_ALG_HASH_IP_VIP:
+ return (ILB_ALG_IMPL_HASH_IP_VIP);
+ }
+ return (0);
+}
+
+ilb_topo_impl_t
+topo_lib2impl(ilb_topo_t t)
+{
+ switch (t) {
+ case ILB_TOPO_DSR:
+ return (ILB_TOPO_IMPL_DSR);
+ case ILB_TOPO_NAT:
+ return (ILB_TOPO_IMPL_NAT);
+ case ILB_TOPO_HALF_NAT:
+ return (ILB_TOPO_IMPL_HALF_NAT);
+ }
+ return (0);
+}
+
+/*
+ * Walk the list of rules and check if its safe to add the
+ * the server to the rule (this is a list of rules hanging
+ * off of a server group)
+ */
+ilb_status_t
+i_check_srv2rules(list_t *rlist, ilb_sg_srv_t *srv)
+{
+ ilb_status_t rc = ILB_STATUS_OK;
+ ilbd_rule_t *rl;
+ int server_portrange, rule_portrange;
+ int srv_minport, srv_maxport;
+ int r_minport, r_maxport;
+
+ if (srv == NULL)
+ return (ILB_STATUS_OK);
+
+ srv_minport = ntohs(srv->sgs_minport);
+ srv_maxport = ntohs(srv->sgs_maxport);
+
+ for (rl = list_head(rlist); rl != NULL; rl = list_next(rlist, rl)) {
+ r_minport = ntohs(rl->irl_minport);
+ r_maxport = ntohs(rl->irl_maxport);
+
+ if ((srv_minport != 0) && (srv_minport == srv_maxport)) {
+ /* server has single port */
+ if (rl->irl_topo == ILB_TOPO_DSR) {
+ /*
+ * either we have a DSR rule with a port
+ * range, or both server and rule
+ * have single ports but their values
+ * don't match - this is incompatible
+ */
+ if (r_maxport > r_minport) {
+ rc = ILB_STATUS_INVAL_SRVR;
+ break;
+ } else if (srv_minport != r_minport) {
+ rc = ILB_STATUS_BADPORT;
+ break;
+ }
+ }
+ if (rl->irl_hcpflag == ILB_HCI_PROBE_FIX &&
+ rl->irl_hcport != srv_minport) {
+ rc = ILB_STATUS_BADPORT;
+ break;
+ }
+ } else if (srv_maxport > srv_minport) {
+ /* server has a port range */
+ if ((rl->irl_topo == ILB_TOPO_DSR) &&
+ (r_maxport > r_minport)) {
+ if ((r_minport != srv_minport) ||
+ (r_maxport != srv_maxport)) {
+ /*
+ * we have a DSR rule with a port range
+ * and its min and max port values
+ * does not meet that of server's
+ * - this is incompatible
+ */
+ rc = ILB_STATUS_BADPORT;
+ break;
+ }
+ } else if ((rl->irl_topo == ILB_TOPO_DSR) &&
+ (r_maxport == r_minport)) {
+ /*
+ * we have a DSR rule with a single
+ * port and a server with a port range
+ * - this is incompatible
+ */
+ rc = ILB_STATUS_INVAL_SRVR;
+ break;
+ } else if (((rl->irl_topo == ILB_TOPO_NAT) ||
+ (rl->irl_topo == ILB_TOPO_HALF_NAT)) &&
+ (r_maxport > r_minport)) {
+ server_portrange = srv_maxport - srv_minport;
+ rule_portrange = r_maxport - r_minport;
+ if (rule_portrange != server_portrange) {
+ /*
+ * we have a NAT/Half-NAT rule with
+ * a port range and server with a port
+ * range and there is a mismatch in the
+ * sizes of the port ranges - this is
+ * incompatible
+ */
+ rc = ILB_STATUS_INVAL_SRVR;
+ break;
+ }
+ }
+ if (rl->irl_hcpflag == ILB_HCI_PROBE_FIX &&
+ (rl->irl_hcport > srv_maxport ||
+ rl->irl_hcport < srv_minport)) {
+ rc = ILB_STATUS_BADPORT;
+ break;
+ }
+ }
+ }
+
+ return (rc);
+}
+
+void
+i_setup_rule_hlist(void)
+{
+ list_create(&ilbd_rule_hlist, sizeof (ilbd_rule_t),
+ offsetof(ilbd_rule_t, irl_link));
+}
+
+ilb_status_t
+i_ilbd_save_rule(ilbd_rule_t *irl, ilbd_scf_cmd_t scf_cmd)
+{
+ boolean_t enable = irl->irl_flags & ILB_FLAGS_RULE_ENABLED;
+
+ switch (scf_cmd) {
+ case ILBD_SCF_CREATE:
+ return (ilbd_create_pg(ILBD_SCF_RULE, (void *)irl));
+ case ILBD_SCF_DESTROY:
+ return (ilbd_destroy_pg(ILBD_SCF_RULE, irl->irl_name));
+ case ILBD_SCF_ENABLE_DISABLE:
+ return (ilbd_change_prop(ILBD_SCF_RULE, irl->irl_name,
+ "status", &enable));
+ default:
+ logdebug("i_ilbd_save_rule: invalid scf cmd %d", scf_cmd);
+ return (ILB_STATUS_INVAL_CMD);
+ }
+}
+
+/*
+ * allocate a new daemon-specific rule from the "template" passed
+ * in in *r
+ */
+static ilbd_rule_t *
+i_alloc_ilbd_rule(ilb_rule_info_t *r)
+{
+ ilbd_rule_t *rl;
+
+ rl = calloc(sizeof (*rl), 1);
+ if (rl != NULL && r != NULL)
+ bcopy(r, &rl->irl_info, sizeof (*r));
+
+ return (rl);
+}
+
+static ilbd_rule_t *
+i_find_rule_byname(const char *name)
+{
+ ilbd_rule_t *rl;
+
+ /* find position of rule in list */
+ rl = list_head(&ilbd_rule_hlist);
+ while (rl != NULL &&
+ strncmp(rl->irl_name, name, sizeof (rl->irl_name)) != 0) {
+ rl = list_next(&ilbd_rule_hlist, rl);
+ }
+
+ return (rl);
+}
+
+/*
+ * get exactly one rule (named in rl->irl_name) data from kernel
+ */
+static ilb_status_t
+ilb_get_krule(ilb_rule_info_t *rl)
+{
+ ilb_status_t rc;
+ ilb_rule_cmd_t kcmd;
+
+ kcmd.cmd = ILB_LIST_RULE;
+ (void) strlcpy(kcmd.name, rl->rl_name, sizeof (kcmd.name));
+ kcmd.flags = 0;
+
+ rc = do_ioctl(&kcmd, 0);
+ if (rc != ILB_STATUS_OK)
+ return (rc);
+
+ rl->rl_flags = kcmd.flags;
+ rl->rl_ipversion = IPPROTO_2_AF(kcmd.ip_ver);
+ rl->rl_vip = kcmd.vip;
+ rl->rl_proto = kcmd.proto;
+ rl->rl_minport = kcmd.min_port;
+ rl->rl_maxport = kcmd.max_port;
+ rl->rl_algo = algo_impl2lib(kcmd.algo);
+ rl->rl_topo = topo_impl2lib(kcmd.topo);
+ rl->rl_stickymask = kcmd.sticky_mask;
+ rl->rl_nat_src_start = kcmd.nat_src_start;
+ rl->rl_nat_src_end = kcmd.nat_src_end;
+ (void) strlcpy(rl->rl_name, kcmd.name, sizeof (rl->rl_name));
+ rl->rl_conndrain = kcmd.conn_drain_timeout;
+ rl->rl_nat_timeout = kcmd.nat_expiry;
+ rl->rl_sticky_timeout = kcmd.sticky_expiry;
+
+ return (ILB_STATUS_OK);
+}
+
+ilb_status_t
+ilbd_retrieve_rule(ilbd_name_t rl_name, uint32_t *rbuf, size_t *rbufsz)
+{
+ ilbd_rule_t *irl = NULL;
+ ilb_status_t rc;
+ ilb_rule_info_t *rinfo;
+
+ irl = i_find_rule_byname(rl_name);
+ if (irl == NULL)
+ return (ILB_STATUS_ENOENT);
+
+ ilbd_reply_ok(rbuf, rbufsz);
+ rinfo = (ilb_rule_info_t *)&((ilb_comm_t *)rbuf)->ic_data;
+ bcopy(&irl->irl_info, rinfo, sizeof (*rinfo));
+
+ /*
+ * Check if the various timeout values are 0. If one is, get the
+ * default values from kernel.
+ */
+ if (rinfo->rl_conndrain == 0 || rinfo->rl_nat_timeout == 0 ||
+ rinfo->rl_sticky_timeout == 0) {
+ ilb_rule_info_t tmp_info;
+
+ (void) strcpy(tmp_info.rl_name, rinfo->rl_name);
+ rc = ilb_get_krule(&tmp_info);
+ if (rc != ILB_STATUS_OK)
+ return (rc);
+ if (rinfo->rl_conndrain == 0)
+ rinfo->rl_conndrain = tmp_info.rl_conndrain;
+ if ((rinfo->rl_topo == ILB_TOPO_IMPL_NAT ||
+ rinfo->rl_topo == ILB_TOPO_IMPL_HALF_NAT) &&
+ rinfo->rl_nat_timeout == 0) {
+ rinfo->rl_nat_timeout = tmp_info.rl_nat_timeout;
+ }
+ if ((rinfo->rl_flags & ILB_FLAGS_RULE_STICKY) &&
+ rinfo->rl_sticky_timeout == 0) {
+ rinfo->rl_sticky_timeout = tmp_info.rl_sticky_timeout;
+ }
+ }
+ *rbufsz += sizeof (ilb_rule_info_t);
+
+ return (ILB_STATUS_OK);
+}
+
+static ilb_status_t
+ilbd_destroy_one_rule(ilbd_rule_t *irl)
+{
+ ilb_status_t rc;
+ ilb_name_cmd_t kcmd;
+
+ /*
+ * as far as talking to the kernel is concerned, "all rules"
+ * is handled in one go somewhere else, so we only
+ * tell the kernel about single rules here.
+ */
+ if ((irl->irl_flags & ILB_FLAGS_RULE_ALLRULES) == 0) {
+ kcmd.cmd = ILB_DESTROY_RULE;
+ (void) strlcpy(kcmd.name, irl->irl_name, sizeof (kcmd.name));
+ kcmd.flags = 0;
+
+ rc = do_ioctl(&kcmd, 0);
+ if (rc != ILB_STATUS_OK)
+ return (rc);
+
+ }
+ list_remove(&irl->irl_sg->isg_rulelist, irl);
+ list_remove(&ilbd_rule_hlist, irl);
+
+ /*
+ * When dissociating a rule, only two errors can happen. The hc
+ * name is incorrect or the rule is not associated with the hc
+ * object. Both should not happen.... The check is for debugging
+ * purpose.
+ */
+ if (RULE_HAS_HC(irl) && (rc = ilbd_hc_dissociate_rule(irl)) !=
+ ILB_STATUS_OK) {
+ logerr("ilbd_destroy_one_rule: cannot "
+ "dissociate %s from hc object %s: %d",
+ irl->irl_name, irl->irl_hcname, rc);
+ }
+
+ rc = i_ilbd_save_rule(irl, ILBD_SCF_DESTROY);
+ if (rc != ILB_STATUS_OK)
+ logdebug("ilbd_destroy_rule: save rule failed");
+
+ free(irl);
+ return (rc);
+}
+
+/*
+ * the following two functions are the other's opposite, and can
+ * call into each other for roll back purposes in case of error.
+ * To avoid endless recursion, the 'is_rollback' parameter must be
+ * set to B_TRUE in the roll back case.
+ */
+static ilb_status_t
+ilbd_enable_one_rule(ilbd_rule_t *irl, boolean_t is_rollback)
+{
+ ilb_status_t rc = ILB_STATUS_OK;
+ ilb_name_cmd_t kcmd;
+
+ /* no use sending a no-op to the kernel */
+ if ((irl->irl_flags & ILB_FLAGS_RULE_ENABLED) != 0)
+ return (ILB_STATUS_OK);
+
+ irl->irl_flags |= ILB_FLAGS_RULE_ENABLED;
+
+ /* "all rules" is handled in one go somewhere else, not here */
+ if ((irl->irl_flags & ILB_FLAGS_RULE_ALLRULES) == 0) {
+ kcmd.cmd = ILB_ENABLE_RULE;
+ (void) strlcpy(kcmd.name, irl->irl_name, sizeof (kcmd.name));
+ kcmd.flags = 0;
+
+ rc = do_ioctl(&kcmd, 0);
+ if (rc != ILB_STATUS_OK)
+ return (rc);
+ }
+ if (RULE_HAS_HC(irl) && (rc = ilbd_hc_enable_rule(irl)) !=
+ ILB_STATUS_OK) {
+ /* Undo the kernel work */
+ kcmd.cmd = ILB_DISABLE_RULE;
+ /* Cannot do much if ioctl fails... */
+ (void) do_ioctl(&kcmd, 0);
+ return (rc);
+ }
+
+ if (!is_rollback) {
+ if (rc == ILB_STATUS_OK)
+ rc = i_ilbd_save_rule(irl, ILBD_SCF_ENABLE_DISABLE);
+ if (rc != ILB_STATUS_OK)
+ /* ignore rollback return code */
+ (void) ilbd_disable_one_rule(irl, B_TRUE);
+ }
+
+ return (rc);
+}
+
+static ilb_status_t
+ilbd_disable_one_rule(ilbd_rule_t *irl, boolean_t is_rollback)
+{
+ ilb_status_t rc = ILB_STATUS_OK;
+ ilb_name_cmd_t kcmd;
+
+ /* no use sending a no-op to the kernel */
+ if ((irl->irl_flags & ILB_FLAGS_RULE_ENABLED) == 0)
+ return (ILB_STATUS_OK);
+
+ irl->irl_flags &= ~ILB_FLAGS_RULE_ENABLED;
+
+ /* "all rules" is handled in one go somewhere else, not here */
+ if ((irl->irl_flags & ILB_FLAGS_RULE_ALLRULES) == 0) {
+ kcmd.cmd = ILB_DISABLE_RULE;
+ (void) strlcpy(kcmd.name, irl->irl_name, sizeof (kcmd.name));
+ kcmd.flags = 0;
+
+ rc = do_ioctl(&kcmd, 0);
+ if (rc != ILB_STATUS_OK)
+ return (rc);
+ }
+
+ if (RULE_HAS_HC(irl) && (rc = ilbd_hc_disable_rule(irl)) !=
+ ILB_STATUS_OK) {
+ /* Undo the kernel work */
+ kcmd.cmd = ILB_ENABLE_RULE;
+ /* Cannot do much if ioctl fails... */
+ (void) do_ioctl(&kcmd, 0);
+ return (rc);
+ }
+
+ if (!is_rollback) {
+ if (rc == ILB_STATUS_OK)
+ rc = i_ilbd_save_rule(irl, ILBD_SCF_ENABLE_DISABLE);
+ if (rc != ILB_STATUS_OK)
+ /* ignore rollback return code */
+ (void) ilbd_enable_one_rule(irl, B_TRUE);
+ }
+
+ return (rc);
+}
+
+/*
+ * Generates an audit record for a supplied rule name
+ * Used for enable_rule, disable_rule, delete_rule,
+ * and create_rule subcommands
+ */
+static void
+ilbd_audit_rule_event(const char *audit_rule_name,
+ ilb_rule_info_t *rlinfo, ilbd_cmd_t cmd, ilb_status_t rc,
+ ucred_t *ucredp)
+{
+ adt_session_data_t *ah;
+ adt_event_data_t *event;
+ au_event_t flag;
+ int scf_val_len = ILBD_MAX_VALUE_LEN;
+ char aobuf[scf_val_len]; /* algo:topo */
+ char pbuf[scf_val_len]; /* protocol */
+ char pxbuf[scf_val_len]; /* prxy src range */
+ char hcpbuf[scf_val_len]; /* hcport: num or "ANY" */
+ char valstr1[scf_val_len];
+ char valstr2[scf_val_len];
+ char addrstr_buf[INET6_ADDRSTRLEN];
+ char addrstr_buf1[INET6_ADDRSTRLEN];
+ int audit_error;
+
+ if ((ucredp == NULL) && (cmd == ILBD_CREATE_RULE)) {
+ /*
+ * we came here from the path where ilbd incorporates
+ * the configuration that is listed in SCF :
+ * i_ilbd_read_config->ilbd_walk_rule_pgs->
+ * ->ilbd_scf_instance_walk_pg->ilbd_create_rule
+ * We skip auditing in that case
+ */
+ return;
+ }
+ if (adt_start_session(&ah, NULL, 0) != 0) {
+ logerr("ilbd_audit_rule_event: adt_start_session failed");
+ exit(EXIT_FAILURE);
+ }
+ if (adt_set_from_ucred(ah, ucredp, ADT_NEW) != 0) {
+ (void) adt_end_session(ah);
+ logerr("ilbd_audit_rule_event: adt_set_from_ucred failed");
+ exit(EXIT_FAILURE);
+ }
+ if (cmd == ILBD_ENABLE_RULE)
+ flag = ADT_ilb_enable_rule;
+ else if (cmd == ILBD_DISABLE_RULE)
+ flag = ADT_ilb_disable_rule;
+ else if (cmd == ILBD_DESTROY_RULE)
+ flag = ADT_ilb_delete_rule;
+ else if (cmd == ILBD_CREATE_RULE)
+ flag = ADT_ilb_create_rule;
+
+ if ((event = adt_alloc_event(ah, flag)) == NULL) {
+ logerr("ilbd_audit_rule_event: adt_alloc_event failed");
+ exit(EXIT_FAILURE);
+ }
+
+ (void) memset((char *)event, 0, sizeof (adt_event_data_t));
+
+ switch (cmd) {
+ case ILBD_DESTROY_RULE:
+ event->adt_ilb_delete_rule.auth_used = NET_ILB_CONFIG_AUTH;
+ event->adt_ilb_delete_rule.rule_name = (char *)audit_rule_name;
+ break;
+ case ILBD_ENABLE_RULE:
+ event->adt_ilb_enable_rule.auth_used = NET_ILB_ENABLE_AUTH;
+ event->adt_ilb_enable_rule.rule_name = (char *)audit_rule_name;
+ break;
+ case ILBD_DISABLE_RULE:
+ event->adt_ilb_disable_rule.auth_used = NET_ILB_ENABLE_AUTH;
+ event->adt_ilb_disable_rule.rule_name = (char *)audit_rule_name;
+ break;
+ case ILBD_CREATE_RULE:
+ aobuf[0] = '\0';
+ pbuf[0] = '\0';
+ valstr1[0] = '\0';
+ valstr2[0] = '\0';
+ hcpbuf[0] = '\0';
+
+ event->adt_ilb_create_rule.auth_used = NET_ILB_CONFIG_AUTH;
+
+ /* Fill in virtual IP address */
+ addrstr_buf[0] = '\0';
+ ilbd_addr2str(&rlinfo->rl_vip, addrstr_buf,
+ sizeof (addrstr_buf));
+ event->adt_ilb_create_rule.virtual_ipaddress = addrstr_buf;
+
+ /* Fill in port - could be a single value or a range */
+ event->adt_ilb_create_rule.min_port = ntohs(rlinfo->rl_minport);
+ if (ntohs(rlinfo->rl_maxport) > ntohs(rlinfo->rl_minport)) {
+ /* port range */
+ event->adt_ilb_create_rule.max_port =
+ ntohs(rlinfo->rl_maxport);
+ } else {
+ /* in audit record, max=min when single port */
+ event->adt_ilb_create_rule.max_port =
+ ntohs(rlinfo->rl_minport);
+ }
+
+ /*
+ * Fill in protocol - if user does not specify it,
+ * its TCP by default
+ */
+ if (rlinfo->rl_proto == IPPROTO_UDP)
+ (void) snprintf(pbuf, sizeof (pbuf), "UDP");
+ else
+ (void) snprintf(pbuf, sizeof (pbuf), "TCP");
+ event->adt_ilb_create_rule.protocol = pbuf;
+
+ /* Fill in algorithm and operation type */
+ ilbd_algo_to_str(rlinfo->rl_algo, valstr1);
+ ilbd_topo_to_str(rlinfo->rl_topo, valstr2);
+ (void) snprintf(aobuf, sizeof (aobuf), "%s:%s",
+ valstr1, valstr2);
+ event->adt_ilb_create_rule.algo_optype = aobuf;
+
+ /* Fill in proxy-src for the NAT case */
+ if (rlinfo->rl_topo == ILB_TOPO_NAT) {
+ ilbd_addr2str(&rlinfo->rl_nat_src_start, addrstr_buf,
+ sizeof (addrstr_buf));
+ if (&rlinfo->rl_nat_src_end == 0) {
+ /* Single address */
+ (void) snprintf(pxbuf, sizeof (pxbuf),
+ "%s", addrstr_buf);
+ } else {
+ /* address range */
+ ilbd_addr2str(&rlinfo->rl_nat_src_end,
+ addrstr_buf1, sizeof (addrstr_buf1));
+ (void) snprintf(pxbuf, sizeof (pxbuf),
+ "%s-%s", addrstr_buf, addrstr_buf1);
+ }
+ event->adt_ilb_create_rule.proxy_src = pxbuf;
+ }
+
+ /*
+ * Fill in pmask if user has specified one - 0 means
+ * no persistence
+ */
+ valstr1[0] = '\0';
+ ilbd_ip_to_str(rlinfo->rl_ipversion, &rlinfo->rl_stickymask,
+ valstr1);
+ event->adt_ilb_create_rule.persist_mask = valstr1;
+
+ /* If there is a hcname */
+ if (rlinfo->rl_hcname[0] != '\0')
+ event->adt_ilb_create_rule.hcname = rlinfo->rl_hcname;
+
+ /* Fill in hcport */
+ if (rlinfo->rl_hcpflag == ILB_HCI_PROBE_FIX) {
+ /* hcport is specified by user */
+ (void) snprintf(hcpbuf, sizeof (hcpbuf), "%d",
+ rlinfo->rl_hcport);
+ event->adt_ilb_create_rule.hcport = hcpbuf;
+ } else if (rlinfo->rl_hcpflag == ILB_HCI_PROBE_ANY) {
+ /* user has specified "ANY" */
+ (void) snprintf(hcpbuf, sizeof (hcpbuf), "ANY");
+ event->adt_ilb_create_rule.hcport = hcpbuf;
+ }
+
+ /*
+ * Fill out the conndrain, nat_timeout and persist_timeout
+ * If the user does not specify them, the default value
+ * is set in the kernel. Userland does not know what
+ * the values are. So if the user
+ * does not specify these values they will show up as
+ * 0 in the audit record.
+ */
+ event->adt_ilb_create_rule.conndrain_timeout =
+ rlinfo->rl_conndrain;
+ event->adt_ilb_create_rule.nat_timeout =
+ rlinfo->rl_nat_timeout;
+ event->adt_ilb_create_rule.persist_timeout =
+ rlinfo->rl_sticky_timeout;
+
+ /* Fill out servergroup and rule name */
+ event->adt_ilb_create_rule.server_group = rlinfo->rl_sgname;
+ event->adt_ilb_create_rule.rule_name = rlinfo->rl_name;
+ break;
+ }
+ if (rc == ILB_STATUS_OK) {
+ if (adt_put_event(event, ADT_SUCCESS, ADT_SUCCESS) != 0) {
+ logerr("ilbd_audit_rule_event:adt_put_event failed");
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ audit_error = ilberror2auditerror(rc);
+ if (adt_put_event(event, ADT_FAILURE, audit_error) != 0) {
+ logerr("ilbd_audit_rule_event: adt_put_event failed");
+ exit(EXIT_FAILURE);
+ }
+ }
+ adt_free_event(event);
+ (void) adt_end_session(ah);
+}
+
+static ilb_status_t
+i_ilbd_action_switch(ilbd_rule_t *irl, ilbd_cmd_t cmd,
+ boolean_t is_rollback, ucred_t *ucredp)
+{
+ ilb_status_t rc;
+
+ switch (cmd) {
+ case ILBD_DESTROY_RULE:
+ rc = ilbd_destroy_one_rule(irl);
+ if (!is_rollback) {
+ ilbd_audit_rule_event(irl->irl_name, NULL,
+ cmd, rc, ucredp);
+ }
+ return (rc);
+ case ILBD_ENABLE_RULE:
+ rc = ilbd_enable_one_rule(irl, is_rollback);
+ if (!is_rollback) {
+ ilbd_audit_rule_event(irl->irl_name, NULL, cmd,
+ rc, ucredp);
+ }
+ return (rc);
+ case ILBD_DISABLE_RULE:
+ rc = ilbd_disable_one_rule(irl, is_rollback);
+ if (!is_rollback) {
+ ilbd_audit_rule_event(irl->irl_name, NULL, cmd,
+ rc, ucredp);
+ }
+ return (rc);
+ }
+ return (ILB_STATUS_INVAL_CMD);
+}
+
+static ilb_cmd_t
+i_ilbd2ilb_cmd(ilbd_cmd_t c)
+{
+ ilb_cmd_t r;
+
+ switch (c) {
+ case ILBD_CREATE_RULE:
+ r = ILB_CREATE_RULE;
+ break;
+ case ILBD_DESTROY_RULE:
+ r = ILB_DESTROY_RULE;
+ break;
+ case ILBD_ENABLE_RULE:
+ r = ILB_ENABLE_RULE;
+ break;
+ case ILBD_DISABLE_RULE:
+ r = ILB_DISABLE_RULE;
+ break;
+ }
+ return (r);
+}
+
+static ilbd_cmd_t
+get_undo_cmd(ilbd_cmd_t cmd)
+{
+ ilbd_cmd_t u_cmd;
+
+ switch (cmd) {
+ case ILBD_DESTROY_RULE:
+ u_cmd = ILBD_BAD_CMD;
+ break;
+ case ILBD_ENABLE_RULE:
+ u_cmd = ILBD_DISABLE_RULE;
+ break;
+ case ILBD_DISABLE_RULE:
+ u_cmd = ILBD_ENABLE_RULE;
+ break;
+ }
+
+ return (u_cmd);
+}
+
+static ilb_status_t
+i_ilbd_rule_action(const char *rule_name, const struct passwd *ps,
+ ilbd_cmd_t cmd, ucred_t *ucredp)
+{
+ ilbd_rule_t *irl, *irl_next;
+ boolean_t is_all_rules = B_FALSE;
+ ilb_status_t rc = ILB_STATUS_OK;
+ ilb_name_cmd_t kcmd;
+ ilbd_cmd_t u_cmd;
+ char rulename[ILB_NAMESZ];
+
+ if (ps != NULL) {
+ if ((cmd == ILBD_ENABLE_RULE) || (cmd == ILBD_DISABLE_RULE))
+ rc = ilbd_check_client_enable_auth(ps);
+ else
+ rc = ilbd_check_client_config_auth(ps);
+ /* generate the audit record before bailing out */
+ if (rc != ILB_STATUS_OK) {
+ if (rule_name != '\0') {
+ ilbd_audit_rule_event(rule_name, NULL,
+ cmd, rc, ucredp);
+ } else {
+ (void) snprintf(rulename, sizeof (rulename),
+ "all");
+ ilbd_audit_rule_event(rulename, NULL, cmd, rc,
+ ucredp);
+ }
+ goto out;
+ }
+ }
+ is_all_rules = rule_name[0] == 0;
+
+ /* just one rule */
+ if (!is_all_rules) {
+ irl = i_find_rule_byname(rule_name);
+ if (irl == NULL) {
+ rc = ILB_STATUS_ENORULE;
+ ilbd_audit_rule_event(rule_name, NULL, cmd, rc, ucredp);
+ goto out;
+ }
+ /* auditing will be done by i_ilbd_action_switch() */
+ rc = i_ilbd_action_switch(irl, cmd, B_FALSE, ucredp);
+ goto out;
+ }
+
+ /* all rules: first tell the kernel, then walk the daemon's list */
+ kcmd.cmd = i_ilbd2ilb_cmd(cmd);
+ kcmd.flags = ILB_RULE_ALLRULES;
+
+ rc = do_ioctl(&kcmd, 0);
+ if (rc != ILB_STATUS_OK) {
+ (void) snprintf(rulename, sizeof (rulename), "all");
+ ilbd_audit_rule_event(rulename, NULL, cmd, rc, ucredp);
+ goto out;
+ }
+
+ irl = list_head(&ilbd_rule_hlist);
+ while (irl != NULL) {
+ irl_next = list_next(&ilbd_rule_hlist, irl);
+ irl->irl_flags |= ILB_FLAGS_RULE_ALLRULES;
+ /* auditing will be done by i_ilbd_action_switch() */
+ rc = i_ilbd_action_switch(irl, cmd, B_FALSE, ucredp);
+ irl->irl_flags &= ~ILB_FLAGS_RULE_ALLRULES;
+ if (rc != ILB_STATUS_OK)
+ goto rollback_list;
+ irl = irl_next;
+ }
+ return (rc);
+
+rollback_list:
+ u_cmd = get_undo_cmd(cmd);
+ if (u_cmd == ILBD_BAD_CMD)
+ return (rc);
+
+ if (is_all_rules) {
+ kcmd.cmd = i_ilbd2ilb_cmd(u_cmd);
+ (void) do_ioctl(&kcmd, 0);
+ }
+ /* current list element failed, so we start with previous one */
+ irl = list_prev(&ilbd_rule_hlist, irl);
+ while (irl != NULL) {
+ if (is_all_rules)
+ irl->irl_flags |= ILB_FLAGS_RULE_ALLRULES;
+
+ /*
+ * When the processing of a command consists of
+ * multiple sequential steps, and one of them fails,
+ * ilbd performs rollback to undo the steps taken before the
+ * failing step. Since ilbd is initiating these steps
+ * there is not need to audit them.
+ */
+ rc = i_ilbd_action_switch(irl, u_cmd, B_TRUE, NULL);
+ irl->irl_flags &= ~ILB_FLAGS_RULE_ALLRULES;
+
+ irl = list_prev(&ilbd_rule_hlist, irl);
+ }
+out:
+ return (rc);
+}
+
+ilb_status_t
+ilbd_destroy_rule(ilbd_name_t rule_name, const struct passwd *ps,
+ ucred_t *ucredp)
+{
+ return (i_ilbd_rule_action(rule_name, ps, ILBD_DESTROY_RULE, ucredp));
+}
+
+ilb_status_t
+ilbd_enable_rule(ilbd_name_t rule_name, const struct passwd *ps,
+ ucred_t *ucredp)
+{
+ return (i_ilbd_rule_action(rule_name, ps, ILBD_ENABLE_RULE, ucredp));
+
+}
+
+ilb_status_t
+ilbd_disable_rule(ilbd_name_t rule_name, const struct passwd *ps,
+ ucred_t *ucredp)
+{
+ return (i_ilbd_rule_action(rule_name, ps, ILBD_DISABLE_RULE, ucredp));
+}
+
+/*
+ * allocate storage for a kernel rule command and fill from
+ * "template" irl, if non-NULL
+ */
+static ilb_rule_cmd_t *
+i_alloc_kernel_rule_cmd(ilbd_rule_t *irl)
+{
+ ilb_rule_cmd_t *kcmd;
+
+ kcmd = (ilb_rule_cmd_t *)malloc(sizeof (*kcmd));
+ if (kcmd == NULL)
+ return (kcmd);
+
+ bzero(kcmd, sizeof (*kcmd));
+
+ if (irl != NULL) {
+ kcmd->flags = irl->irl_flags;
+ kcmd->ip_ver = AF_2_IPPROTO(irl->irl_ipversion);
+ kcmd->vip = irl->irl_vip;
+ kcmd->proto = irl->irl_proto;
+ kcmd->min_port = irl->irl_minport;
+ kcmd->max_port = irl->irl_maxport;
+ kcmd->algo = algo_lib2impl(irl->irl_algo);
+ kcmd->topo = topo_lib2impl(irl->irl_topo);
+ kcmd->sticky_mask = irl->irl_stickymask;
+ kcmd->nat_src_start = irl->irl_nat_src_start;
+ kcmd->nat_src_end = irl->irl_nat_src_end;
+ kcmd->conn_drain_timeout = irl->irl_conndrain;
+ kcmd->nat_expiry = irl->irl_nat_timeout;
+ kcmd->sticky_expiry = irl->irl_sticky_timeout;
+ (void) strlcpy(kcmd->name, irl->irl_name,
+ sizeof (kcmd->name));
+ }
+ return (kcmd);
+}
+
+/*
+ * ncount is the next to be used index into (*kcmdp)->servers
+ */
+static ilb_status_t
+adjust_srv_info_cmd(ilb_servers_info_cmd_t **kcmdp, int index)
+{
+ ilb_servers_info_cmd_t *kcmd = *kcmdp;
+ size_t sz;
+
+ if (kcmd != NULL && kcmd->num_servers > index + 1)
+ return (ILB_STATUS_OK);
+
+ /*
+ * the first ilb_server_info_t is part of *kcmd, so
+ * by using index (which is one less than the total needed) here,
+ * we allocate exactly the amount we need.
+ */
+ sz = sizeof (*kcmd) + (index * sizeof (ilb_server_info_t));
+ kcmd = (ilb_servers_info_cmd_t *)realloc(kcmd, sz);
+ if (kcmd == NULL)
+ return (ILB_STATUS_ENOMEM);
+
+ /*
+ * we don't count the slot we newly allocated yet.
+ */
+ kcmd->num_servers = index;
+ *kcmdp = kcmd;
+
+ return (ILB_STATUS_OK);
+}
+
+/*
+ * this function adds all servers in srvlist to the kernel(!) rule
+ * the name of which is passed as argument.
+ */
+static ilb_status_t
+i_update_ksrv_rules(char *name, ilbd_sg_t *sg, ilbd_rule_t *rl)
+{
+ ilb_status_t rc;
+ ilbd_srv_t *srvp;
+ ilb_servers_info_cmd_t *kcmd = NULL;
+ int i;
+
+ /*
+ * If the servergroup doesn't have any servers associated with
+ * it yet, there's nothing more to do here.
+ */
+ if (sg->isg_srvcount == 0)
+ return (ILB_STATUS_OK);
+
+ /*
+ * walk the list of servers attached to this SG
+ */
+ srvp = list_head(&sg->isg_srvlist);
+ for (i = 0; srvp != NULL; srvp = list_next(&sg->isg_srvlist, srvp)) {
+ rc = adjust_srv_info_cmd(&kcmd, i);
+ if (rc != ILB_STATUS_OK)
+ return (rc);
+
+ ILB_SGSRV_2_KSRV(&srvp->isv_srv, &kcmd->servers[i]);
+ /*
+ * "no port" means "copy rule's port" (for kernel rule)
+ */
+ if (kcmd->servers[i].min_port == 0) {
+ kcmd->servers[i].min_port = rl->irl_minport;
+ kcmd->servers[i].max_port = rl->irl_maxport;
+ }
+ i++;
+ }
+
+ kcmd->cmd = ILB_ADD_SERVERS;
+ kcmd->num_servers = i;
+ (void) strlcpy(kcmd->name, name, sizeof (kcmd->name));
+
+ rc = do_ioctl(kcmd, 0);
+ if (rc != ILB_STATUS_OK)
+ return (rc);
+
+ for (i = 0; i < kcmd->num_servers; i++) {
+ int e;
+
+ if ((e = kcmd->servers[i].err) != 0) {
+ logerr("i_update_ksrv_rules "
+ "ioctl indicates failure: %s", strerror(e));
+ rc = ilb_map_errno2ilbstat(e);
+ /*
+ * if adding even a single server failed, we need to
+ * roll back the whole wad. We ignore any errors and
+ * return the one that was returned by the first ioctl.
+ */
+ kcmd->cmd = ILB_DEL_SERVERS;
+ (void) do_ioctl(kcmd, 0);
+ return (rc);
+ }
+ }
+
+ return (ILB_STATUS_OK);
+}
+
+/* convert a struct in6_addr to valstr */
+void
+ilbd_ip_to_str(uint16_t ipversion, struct in6_addr *addr, char *valstr)
+{
+ size_t vallen;
+ ilb_ip_addr_t ipaddr;
+ void *addrptr;
+
+ vallen = (ipversion == AF_INET) ? INET_ADDRSTRLEN : INET6_ADDRSTRLEN;
+
+ IP_COPY_IMPL_2_CLI(addr, &ipaddr);
+ addrptr = (ipversion == AF_INET) ?
+ (void *)&ipaddr.ia_v4 : (void *)&ipaddr.ia_v6;
+ if (inet_ntop(ipversion, (void *)addrptr, valstr, vallen == NULL))
+ logerr("ilbd_ip_to_str: inet_ntop failed");
+ return;
+
+}
+
+ilb_status_t
+ilbd_create_rule(ilb_rule_info_t *rl, int ev_port,
+ const struct passwd *ps, ucred_t *ucredp)
+{
+ ilb_status_t rc;
+ ilbd_rule_t *irl = NULL;
+ ilbd_sg_t *sg;
+ ilb_rule_cmd_t *kcmd = NULL;
+
+ if (ps != NULL) {
+ if ((rc = ilbd_check_client_config_auth(ps)) != ILB_STATUS_OK)
+ goto out;
+ }
+
+ if (i_find_rule_byname(rl->rl_name) != NULL) {
+ logdebug("ilbd_create_rule: rule %s"
+ " already exists", rl->rl_name);
+ ilbd_audit_rule_event(NULL, rl, ILBD_CREATE_RULE,
+ ILB_STATUS_DUP_RULE, ucredp);
+ return (ILB_STATUS_DUP_RULE);
+ }
+
+ sg = i_find_sg_byname(rl->rl_sgname);
+ if (sg == NULL) {
+ logdebug("ilbd_create_rule: rule %s uses non-existent"
+ " servergroup name %s", rl->rl_name, rl->rl_sgname);
+ ilbd_audit_rule_event(NULL, rl, ILBD_CREATE_RULE,
+ ILB_STATUS_SGUNAVAIL, ucredp);
+ return (ILB_STATUS_SGUNAVAIL);
+ }
+
+ if ((rc = ilbd_sg_check_rule_port(sg, rl)) != ILB_STATUS_OK) {
+ ilbd_audit_rule_event(NULL, rl, ILBD_CREATE_RULE, rc, ucredp);
+ return (rc);
+ }
+
+ /* allocs and copies contents of arg (if != NULL) into new rule */
+ irl = i_alloc_ilbd_rule(rl);
+ if (irl == NULL) {
+ ilbd_audit_rule_event(NULL, rl, ILBD_CREATE_RULE,
+ ILB_STATUS_ENOMEM, ucredp);
+ return (ILB_STATUS_ENOMEM);
+ }
+
+ /* make sure rule's IPversion (via vip) and SG's match */
+ if (sg->isg_srvcount > 0) {
+ ilbd_srv_t *srv = list_head(&sg->isg_srvlist);
+ int32_t r_af = rl->rl_ipversion;
+ int32_t s_af = GET_AF(&srv->isv_addr);
+
+ if (r_af != s_af) {
+ logdebug("address family mismatch with servergroup");
+ rc = ILB_STATUS_MISMATCHSG;
+ goto out;
+ }
+ }
+ irl->irl_sg = sg;
+
+ /* Try associating the rule with the given hc oject. */
+ if (RULE_HAS_HC(irl)) {
+ if ((rc = ilbd_hc_associate_rule(irl, ev_port)) !=
+ ILB_STATUS_OK)
+ goto out;
+ }
+
+ /*
+ * checks are done, now:
+ * 1. create rule in kernel
+ * 2. tell it about the backend server (which we maintain in SG)
+ * 3. attach the rule in memory
+ */
+ /* 1. */
+ /* allocs and copies contents of arg (if != NULL) into new rule */
+ kcmd = i_alloc_kernel_rule_cmd(irl);
+ if (kcmd == NULL) {
+ rc = ILB_STATUS_ENOMEM;
+ goto rollback_hc;
+ }
+ kcmd->cmd = ILB_CREATE_RULE;
+
+ rc = do_ioctl(kcmd, 0);
+ if (rc != ILB_STATUS_OK)
+ goto rollback_kcmd;
+
+ /* 2. */
+ rc = i_update_ksrv_rules(kcmd->name, sg, irl);
+ if (rc != ILB_STATUS_OK)
+ goto rollback_kcmd;
+
+ /* 3. */
+ (void) i_attach_rule2sg(sg, irl);
+ list_insert_tail(&ilbd_rule_hlist, irl);
+
+ if (ps != NULL) {
+ rc = i_ilbd_save_rule(irl, ILBD_SCF_CREATE);
+ if (rc != ILB_STATUS_OK)
+ goto rollback_rule;
+ }
+
+ free(kcmd);
+ ilbd_audit_rule_event(NULL, rl, ILBD_CREATE_RULE,
+ ILB_STATUS_OK, ucredp);
+ return (ILB_STATUS_OK);
+
+rollback_rule:
+ /*
+ * ilbd_destroy_one_rule() also frees irl, as well as dissociate
+ * rule and HC, so all we need to do afterwards is free the kcmd
+ * and return.
+ */
+ (void) ilbd_destroy_one_rule(irl);
+ ilbd_audit_rule_event(NULL, rl, ILBD_CREATE_RULE, rc, ucredp);
+ free(kcmd);
+ return (rc);
+
+rollback_kcmd:
+ free(kcmd);
+rollback_hc:
+ /* Cannot fail since the rule is just associated with the hc object. */
+ if (RULE_HAS_HC(irl))
+ (void) ilbd_hc_dissociate_rule(irl);
+out:
+ ilbd_audit_rule_event(NULL, rl, ILBD_CREATE_RULE, rc, ucredp);
+ free(irl);
+ return (rc);
+}
+
+static uint32_t
+i_flags_d2k(int f)
+{
+ uint32_t r = 0;
+
+ if (ILB_IS_SRV_ENABLED(f))
+ r |= ILB_SERVER_ENABLED;
+ /* more as they are defined */
+
+ return (r);
+}
+
+/*
+ * walk the list of rules and add srv to the *kernel* rule
+ * (this is a list of rules hanging off of a server group)
+ */
+ilb_status_t
+i_add_srv2krules(list_t *rlist, ilb_sg_srv_t *srv, int ev_port)
+{
+ ilb_status_t rc = ILB_STATUS_OK;
+ ilbd_rule_t *rl, *del_rl;
+ ilb_servers_info_cmd_t kcmd;
+ ilb_servers_cmd_t del_kcmd;
+
+ kcmd.cmd = ILB_ADD_SERVERS;
+ kcmd.num_servers = 1;
+ kcmd.servers[0].err = 0;
+ kcmd.servers[0].addr = srv->sgs_addr;
+ kcmd.servers[0].flags = i_flags_d2k(srv->sgs_flags);
+ (void) strlcpy(kcmd.servers[0].name, srv->sgs_srvID,
+ sizeof (kcmd.servers[0].name));
+
+ /*
+ * a note about rollback: since we need to start rollback with the
+ * current list element in some case, and with the previous one
+ * in others, we must "go back" in this latter case before
+ * we jump to the rollback code.
+ */
+ for (rl = list_head(rlist); rl != NULL; rl = list_next(rlist, rl)) {
+ (void) strlcpy(kcmd.name, rl->irl_name, sizeof (kcmd.name));
+ /*
+ * sgs_minport == 0 means "no port specified"; this
+ * indicates that the server matches anything the rule
+ * provides.
+ * NOTE: this can be different for different rules
+ * using the same server group, therefore we don't modify
+ * this information in the servergroup, but *only* in
+ * the kernel's rule.
+ */
+ if (srv->sgs_minport == 0) {
+ kcmd.servers[0].min_port = rl->irl_minport;
+ kcmd.servers[0].max_port = rl->irl_maxport;
+ } else {
+ kcmd.servers[0].min_port = srv->sgs_minport;
+ kcmd.servers[0].max_port = srv->sgs_maxport;
+ }
+ rc = do_ioctl((void *)&kcmd, 0);
+ if (rc != ILB_STATUS_OK) {
+ logdebug("i_add_srv2krules: do_ioctl call failed");
+ del_rl = list_prev(rlist, rl);
+ goto rollback;
+ }
+
+ /*
+ * if ioctl() returns != 0, it doesn't perform the copyout
+ * necessary to indicate *which* server failed (we could be
+ * adding more than one); therefore we must check this
+ * 'err' field even if ioctl() returns 0.
+ */
+ if (kcmd.servers[0].err != 0) {
+ logerr("i_add_srv2krules: SIOCILB ioctl returned"
+ " error %d", kcmd.servers[0].err);
+ rc = ilb_map_errno2ilbstat(kcmd.servers[0].err);
+ del_rl = list_prev(rlist, rl);
+ goto rollback;
+ }
+ if (RULE_HAS_HC(rl)) {
+ if ((rc = ilbd_hc_add_server(rl, srv, ev_port)) !=
+ ILB_STATUS_OK) {
+ logerr("i_add_srv2krules: cannot start timer "
+ " for rules %s server %s", rl->irl_name,
+ srv->sgs_srvID);
+
+ del_rl = rl;
+ goto rollback;
+ }
+ }
+ }
+
+ return (rc);
+
+rollback:
+ /*
+ * this is almost, but not quite, the same as i_rem_srv_frm_krules()
+ * therefore we keep it seperate.
+ */
+ del_kcmd.cmd = ILB_DEL_SERVERS;
+ del_kcmd.num_servers = 1;
+ del_kcmd.servers[0].addr = srv->sgs_addr;
+ while (del_rl != NULL) {
+ if (RULE_HAS_HC(del_rl))
+ (void) ilbd_hc_del_server(del_rl, srv);
+ (void) strlcpy(del_kcmd.name, del_rl->irl_name,
+ sizeof (del_kcmd.name));
+ (void) do_ioctl((void *)&del_kcmd, 0);
+ del_rl = list_prev(rlist, del_rl);
+ }
+
+ return (rc);
+}
+
+/*
+ * ev_port is only used for rollback purposes in this function
+ */
+ilb_status_t
+i_rem_srv_frm_krules(list_t *rlist, ilb_sg_srv_t *srv, int ev_port)
+{
+ ilb_status_t rc = ILB_STATUS_OK;
+ ilbd_rule_t *rl, *add_rl;
+ ilb_servers_cmd_t kcmd;
+ ilb_servers_info_cmd_t add_kcmd;
+
+ kcmd.cmd = ILB_DEL_SERVERS;
+ kcmd.num_servers = 1;
+ kcmd.servers[0].err = 0;
+ kcmd.servers[0].addr = srv->sgs_addr;
+
+ for (rl = list_head(rlist); rl != NULL; rl = list_next(rlist, rl)) {
+ (void) strlcpy(kcmd.name, rl->irl_name, sizeof (kcmd.name));
+ rc = do_ioctl((void *)&kcmd, 0);
+ if (rc != ILB_STATUS_OK) {
+ logdebug("i_rem_srv_frm_krules: do_ioctl"
+ "call failed");
+ add_rl = list_prev(rlist, rl);
+ goto rollback;
+ }
+ /*
+ * if ioctl() returns != 0, it doesn't perform the copyout
+ * necessary to indicate *which* server failed (we could be
+ * removing more than one); therefore we must check this
+ * 'err' field even if ioctl() returns 0.
+ */
+ if (kcmd.servers[0].err != 0) {
+ logerr("i_rem_srv_frm_krules: SIOCILB ioctl"
+ " returned error %s",
+ strerror(kcmd.servers[0].err));
+ rc = ilb_map_errno2ilbstat(kcmd.servers[0].err);
+ add_rl = list_prev(rlist, rl);
+ goto rollback;
+ }
+ if (RULE_HAS_HC(rl) &&
+ (rc = ilbd_hc_del_server(rl, srv)) != ILB_STATUS_OK) {
+ logerr("i_rem_srv_frm_krules: cannot delete "
+ "timer for rules %s server %s", rl->irl_name,
+ srv->sgs_srvID);
+ add_rl = rl;
+ goto rollback;
+ }
+ }
+
+ return (rc);
+
+rollback:
+ /* Don't do roll back if ev_port == -1. */
+ if (ev_port == -1)
+ return (rc);
+
+ add_kcmd.cmd = ILB_ADD_SERVERS;
+ add_kcmd.num_servers = 1;
+ add_kcmd.servers[0].err = 0;
+ add_kcmd.servers[0].addr = srv->sgs_addr;
+ add_kcmd.servers[0].flags = i_flags_d2k(srv->sgs_flags);
+ (void) strlcpy(add_kcmd.servers[0].name, srv->sgs_srvID,
+ sizeof (add_kcmd.servers[0].name));
+ while (add_rl != NULL) {
+ if (srv->sgs_minport == 0) {
+ add_kcmd.servers[0].min_port = add_rl->irl_minport;
+ add_kcmd.servers[0].max_port = add_rl->irl_maxport;
+ } else {
+ add_kcmd.servers[0].min_port = srv->sgs_minport;
+ add_kcmd.servers[0].max_port = srv->sgs_maxport;
+ }
+ if (RULE_HAS_HC(add_rl))
+ (void) ilbd_hc_add_server(add_rl, srv, ev_port);
+ (void) strlcpy(add_kcmd.name, add_rl->irl_name,
+ sizeof (add_kcmd.name));
+ (void) do_ioctl((void *)&add_kcmd, 0);
+ add_rl = list_prev(rlist, add_rl);
+ }
+
+ return (rc);
+}
diff --git a/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_scf.c b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_scf.c
new file mode 100644
index 0000000000..6dcd03a24a
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_scf.c
@@ -0,0 +1,1692 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <strings.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sys/list.h>
+#include <libilb.h>
+#include <assert.h>
+#include <libscf.h>
+#include "libilb_impl.h"
+#include "ilbd.h"
+
+#define ILBD_PG_NAME_RULE "rule_"
+#define ILBD_PG_NAME_SG "sg_"
+#define ILBD_PG_NAME_HC "hc_"
+#define ILBD_SVC_FMRI "svc:/network/loadbalancer/ilb"
+#define ILBD_INST_NAME "default"
+
+typedef enum {
+ ILBD_RULE_STATUS,
+ ILBD_RULE_VIP,
+ ILBD_RULE_PROTO,
+ ILBD_RULE_PORT,
+ ILBD_RULE_ALGO,
+ ILBD_RULE_TOPO,
+ ILBD_RULE_NAT_STR,
+ ILBD_RULE_NAT_END,
+ ILBD_RULE_STI_MASK,
+ ILBD_RULE_SGNAME,
+ ILBD_RULE_HCNAME,
+ ILBD_RULE_HCPORT,
+ ILBD_RULE_HCPFLAG,
+ ILBD_RULE_DRAINTIME,
+ ILBD_RULE_NAT_TO,
+ ILBD_RULE_PERS_TO,
+
+ ILBD_SG_SERVER,
+
+ ILBD_HC_TEST,
+ ILBD_HC_TIMEOUT,
+ ILBD_HC_INTERVAL,
+ ILBD_HC_DEF_PING,
+ ILBD_HC_COUNT,
+
+ ILBD_VAR_INVALID
+} ilbd_var_type_t;
+
+typedef struct prop_tbl_entry {
+ ilbd_var_type_t val_type;
+ const char *scf_propname;
+ scf_type_t scf_proptype;
+} prop_tbl_entry_t;
+
+/*
+ * this table contains a map of all SCF properties, including rules,
+ * servergroups and health checks. The place to add new property needs to be
+ * watched carefully. When new properties are added, corresponding *VAR_NUM
+ * needs to be adjusted to reflect the correct index of the table
+ */
+prop_tbl_entry_t prop_tbl[] = {
+ /* entried for rule */
+ {ILBD_RULE_STATUS, "status", SCF_TYPE_BOOLEAN},
+ /* SCF_TYPE_NET_ADDR_V4 or SCF_TYPE_NET_ADDR_V6 */
+ {ILBD_RULE_VIP, "vip", SCF_TYPE_INVALID},
+ {ILBD_RULE_PROTO, "protocol", SCF_TYPE_ASTRING},
+ {ILBD_RULE_PORT, "port", SCF_TYPE_ASTRING},
+ {ILBD_RULE_ALGO, "ilb-algo", SCF_TYPE_ASTRING},
+ {ILBD_RULE_TOPO, "ilb-type", SCF_TYPE_ASTRING},
+ {ILBD_RULE_NAT_STR, "ilb-nat-start", SCF_TYPE_INVALID},
+ {ILBD_RULE_NAT_END, "ilb-nat-end", SCF_TYPE_INVALID},
+ {ILBD_RULE_STI_MASK, "ilb-sti-mask", SCF_TYPE_INVALID},
+ {ILBD_RULE_SGNAME, "servergroup", SCF_TYPE_ASTRING},
+ {ILBD_RULE_HCNAME, "healthcheck", SCF_TYPE_ASTRING},
+ {ILBD_RULE_HCPORT, "hc-port", SCF_TYPE_INTEGER},
+ {ILBD_RULE_HCPFLAG, "hcp-flag", SCF_TYPE_INTEGER},
+ {ILBD_RULE_DRAINTIME, "drain-time", SCF_TYPE_INTEGER},
+ {ILBD_RULE_NAT_TO, "nat-timeout", SCF_TYPE_INTEGER},
+ {ILBD_RULE_PERS_TO, "pers-timeout", SCF_TYPE_INTEGER},
+ /* add new rule related prop here */
+ /* entries for sg */
+ {ILBD_SG_SERVER, "server", SCF_TYPE_ASTRING},
+ /* add new sg related prop here */
+ /* entries for hc */
+ {ILBD_HC_TEST, "test", SCF_TYPE_ASTRING},
+ {ILBD_HC_TIMEOUT, "timeout", SCF_TYPE_INTEGER},
+ {ILBD_HC_INTERVAL, "interval", SCF_TYPE_INTEGER},
+ {ILBD_HC_DEF_PING, "ping", SCF_TYPE_BOOLEAN},
+ /* add new hc related prop here */
+ {ILBD_HC_COUNT, "count", SCF_TYPE_INTEGER}
+};
+
+#define ILBD_PROP_VAR_NUM (ILBD_HC_COUNT + 1)
+#define ILBD_RULE_VAR_NUM (ILBD_SG_SERVER)
+#define ILBD_SG_VAR_NUM (ILBD_HC_TEST - ILBD_SG_SERVER)
+#define ILBD_HC_VAR_NUM (ILBD_PROP_VAR_NUM - ILBD_HC_TEST)
+
+static ilb_status_t ilbd_scf_set_prop(scf_propertygroup_t *, const char *,
+ scf_type_t, scf_value_t *);
+static ilb_status_t ilbd_scf_retrieve_pg(const char *, scf_propertygroup_t **,
+ boolean_t);
+static ilb_status_t ilbd_scf_delete_pg(scf_propertygroup_t *);
+static ilb_status_t ilbd_scf_get_prop_val(scf_propertygroup_t *, const char *,
+ scf_value_t **);
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+int
+ilbd_scf_limit(int type)
+{
+ return (MIN(scf_limit(type), 120));
+}
+
+/*
+ * Translate libscf error to libilb status
+ */
+ilb_status_t
+ilbd_scf_err_to_ilb_err()
+{
+ switch (scf_error()) {
+ case SCF_ERROR_NONE:
+ return (ILB_STATUS_OK);
+ case SCF_ERROR_HANDLE_MISMATCH:
+ case SCF_ERROR_HANDLE_DESTROYED:
+ case SCF_ERROR_VERSION_MISMATCH:
+ case SCF_ERROR_NOT_BOUND:
+ case SCF_ERROR_CONSTRAINT_VIOLATED:
+ case SCF_ERROR_NOT_SET:
+ case SCF_ERROR_TYPE_MISMATCH:
+ case SCF_ERROR_INVALID_ARGUMENT:
+ return (ILB_STATUS_EINVAL);
+ case SCF_ERROR_NO_MEMORY:
+ case SCF_ERROR_NO_RESOURCES:
+ return (ILB_STATUS_ENOMEM);
+ case SCF_ERROR_NOT_FOUND:
+ case SCF_ERROR_DELETED:
+ return (ILB_STATUS_ENOENT);
+ case SCF_ERROR_EXISTS:
+ return (ILB_STATUS_EEXIST);
+ case SCF_ERROR_PERMISSION_DENIED:
+ return (ILB_STATUS_PERMIT);
+ case SCF_ERROR_CALLBACK_FAILED:
+ return (ILB_STATUS_CALLBACK);
+ case SCF_ERROR_IN_USE:
+ return (ILB_STATUS_INUSE);
+ default:
+ return (ILB_STATUS_INTERNAL);
+ }
+}
+
+static void
+ilbd_name_to_scfpgname(ilbd_scf_pg_type_t pg_type, const char *pgname,
+ char *scf_pgname)
+{
+ switch (pg_type) {
+ case ILBD_SCF_RULE:
+ (void) snprintf(scf_pgname, ILBD_MAX_NAME_LEN,
+ ILBD_PG_NAME_RULE "%s", pgname);
+ return;
+ case ILBD_SCF_SG:
+ (void) snprintf(scf_pgname, ILBD_MAX_NAME_LEN,
+ ILBD_PG_NAME_SG "%s", pgname);
+ return;
+ case ILBD_SCF_HC:
+ (void) snprintf(scf_pgname, ILBD_MAX_NAME_LEN,
+ ILBD_PG_NAME_HC "%s", pgname);
+ return;
+ /* Should not happen. Log it and put ILB service in maintenance. */
+ default:
+ logerr("ilbd_name_to_scfpgname: invalid pg type %d for pg %s",
+ pg_type, pgname);
+ (void) smf_maintain_instance(ILB_FMRI, SMF_IMMEDIATE);
+ exit(EXIT_FAILURE);
+ return;
+ }
+}
+
+static void
+ilbd_scf_destroy(scf_handle_t *h, scf_service_t *s, scf_instance_t *inst,
+ scf_propertygroup_t *pg)
+{
+ if (pg != NULL)
+ scf_pg_destroy(pg);
+ if (inst != NULL)
+ scf_instance_destroy(inst);
+ if (s != NULL)
+ scf_service_destroy(s);
+ if (h != NULL)
+ scf_handle_destroy(h);
+}
+
+
+static ilb_status_t
+ilbd_scf_get_inst(scf_handle_t **h, scf_service_t **svc, scf_instance_t **inst)
+{
+ if ((*h = scf_handle_create(SCF_VERSION)) == NULL)
+ return (ILB_STATUS_INTERNAL);
+
+ if (scf_handle_bind(*h) != 0) {
+ ilbd_scf_destroy(*h, NULL, NULL, NULL);
+ return (ilbd_scf_err_to_ilb_err());
+ }
+
+ if ((*svc = scf_service_create(*h)) == NULL) {
+ ilbd_scf_destroy(*h, NULL, NULL, NULL);
+ return (ilbd_scf_err_to_ilb_err());
+ }
+
+ if (scf_handle_decode_fmri(*h, ILBD_SVC_FMRI, NULL, *svc, NULL, NULL,
+ NULL, SCF_DECODE_FMRI_EXACT) != 0) {
+ ilbd_scf_destroy(*h, *svc, NULL, NULL);
+ return (ilbd_scf_err_to_ilb_err());
+ }
+
+ if ((*inst = scf_instance_create(*h)) == NULL) {
+ ilbd_scf_destroy(*h, *svc, NULL, NULL);
+ return (ilbd_scf_err_to_ilb_err());
+ }
+
+ if (scf_service_get_instance(*svc, ILBD_INST_NAME, *inst) != 0) {
+ ilbd_scf_destroy(*h, *svc, *inst, NULL);
+ return (ilbd_scf_err_to_ilb_err());
+ }
+ return (ILB_STATUS_OK);
+}
+
+/*
+ * If create is set, create a new prop group, destroy the old one if exists.
+ * If create not set, try to find the prop group with given name.
+ * The created or found entry is returned as *pg.
+ * Caller frees *pg and its handle scf_pg_handle(pg)
+ */
+static ilb_status_t
+ilbd_scf_retrieve_pg(const char *pgname, scf_propertygroup_t **pg,
+ boolean_t create)
+{
+ scf_instance_t *inst;
+ scf_handle_t *h;
+ scf_service_t *svc;
+ ilb_status_t ret;
+
+ ret = ilbd_scf_get_inst(&h, &svc, &inst);
+ if (ret != ILB_STATUS_OK)
+ return (ret);
+
+ *pg = scf_pg_create(h);
+ if (*pg == NULL)
+ return (ILB_STATUS_INTERNAL);
+
+ if (scf_instance_get_pg(inst, pgname, *pg) != 0) {
+ if (scf_error() != SCF_ERROR_NOT_FOUND ||
+ (scf_error() == SCF_ERROR_NOT_FOUND && (!create))) {
+ ilbd_scf_destroy(h, svc, inst, *pg);
+ *pg = NULL;
+ return (ilbd_scf_err_to_ilb_err());
+ }
+ } else {
+ /*
+ * Found pg, don't want to create, return EEXIST. Note that
+ * h cannot be destroyed here since the caller needs to use it.
+ * The caller gets it by calling scf_pg_handle().
+ */
+ if (!create) {
+ ilbd_scf_destroy(NULL, svc, inst, NULL);
+ return (ILB_STATUS_EEXIST);
+ }
+ /* found pg, need to create, destroy the existing one */
+ else
+ (void) ilbd_scf_delete_pg(*pg);
+ }
+
+ if (create) {
+ if (scf_instance_add_pg(inst, pgname,
+ SCF_GROUP_APPLICATION, 0, *pg) != 0) {
+ ilbd_scf_destroy(h, svc, inst, *pg);
+ *pg = NULL;
+ return (ilbd_scf_err_to_ilb_err());
+ }
+ }
+
+ /*
+ * Note that handle cannot be destroyed here, caller sometimes needs
+ * to use it. It gets the handle by calling scf_pg_handle().
+ */
+ ilbd_scf_destroy(NULL, svc, inst, NULL);
+ return (ILB_STATUS_OK);
+}
+
+struct algo_tbl_entry {
+ ilb_algo_t algo_type;
+ const char *algo_str;
+} algo_tbl[] = {
+ {ILB_ALG_ROUNDROBIN, "ROUNDROBIN"},
+ {ILB_ALG_HASH_IP, "HASH-IP"},
+ {ILB_ALG_HASH_IP_SPORT, "HASH-IP-PORT"},
+ {ILB_ALG_HASH_IP_VIP, "HASH-IP-VIP"}
+};
+
+#define ILBD_ALGO_TBL_SIZE (sizeof (algo_tbl) / \
+ sizeof (*algo_tbl))
+
+void
+ilbd_algo_to_str(ilb_algo_t algo_type, char *valstr)
+{
+ int i;
+
+ for (i = 0; i < ILBD_ALGO_TBL_SIZE; i++) {
+ if (algo_type == algo_tbl[i].algo_type) {
+ (void) strlcpy(valstr, algo_tbl[i].algo_str,
+ ILBD_MAX_VALUE_LEN);
+ return;
+ }
+ }
+ logerr("ilbd_algo_to_str: algo not found");
+}
+
+static void
+ilbd_scf_str_to_algo(ilb_algo_t *algo_type, char *valstr)
+{
+ int i;
+
+ for (i = 0; i < ILBD_ALGO_TBL_SIZE; i++) {
+ if (strcmp(valstr, algo_tbl[i].algo_str) == 0) {
+ *algo_type = algo_tbl[i].algo_type;
+ return;
+ }
+ }
+ logerr("ilbd_scf_str_to_algo: algo not found");
+}
+
+struct topo_tbl_entry {
+ ilb_topo_t topo_type;
+ const char *topo_str;
+} topo_tbl[] = {
+ {ILB_TOPO_DSR, "DSR"},
+ {ILB_TOPO_NAT, "NAT"},
+ {ILB_TOPO_HALF_NAT, "HALF-NAT"}
+};
+
+#define ILBD_TOPO_TBL_SIZE (sizeof (topo_tbl) / \
+ sizeof (*topo_tbl))
+
+void
+ilbd_topo_to_str(ilb_topo_t topo_type, char *valstr)
+{
+ int i;
+
+ for (i = 0; i < ILBD_TOPO_TBL_SIZE; i++) {
+ if (topo_type == topo_tbl[i].topo_type) {
+ (void) strlcpy(valstr, topo_tbl[i].topo_str,
+ ILBD_MAX_VALUE_LEN);
+ return;
+ }
+ }
+ logerr("ilbd_scf_topo_to_str: topo not found");
+}
+
+static void
+ilbd_scf_str_to_topo(ilb_topo_t *topo_type, char *valstr)
+{
+ int i;
+
+ for (i = 0; i < ILBD_TOPO_TBL_SIZE; i++) {
+ if (strcmp(valstr, topo_tbl[i].topo_str) == 0) {
+ *topo_type = topo_tbl[i].topo_type;
+ return;
+ }
+ }
+ logerr("ilbd_scf_str_to_topo: topo not found");
+}
+
+static void
+ilbd_get_svr_field(char *valstr, struct in6_addr *sgs_addr,
+ int32_t *min_port, int32_t *max_port, int32_t *sgs_flags)
+{
+ char *ipaddr, *ipverstr, *portstr, *flagstr;
+ int ip_ver;
+ ilb_ip_addr_t temp_ip;
+ void *addrptr;
+ char *max_portstr;
+
+ ipaddr = strtok(valstr, ";");
+ ipverstr = strtok(NULL, ";");
+ portstr = strtok(NULL, ";");
+ flagstr = strtok(NULL, ";");
+
+ if (ipaddr == NULL || ipverstr == NULL || portstr == NULL ||
+ flagstr == NULL) {
+ logerr("%s: invalid server fields", __func__);
+ (void) smf_maintain_instance(ILB_FMRI, SMF_IMMEDIATE);
+ exit(EXIT_FAILURE);
+ }
+ ip_ver = atoi(ipverstr);
+ addrptr = (ip_ver == AF_INET) ? (void *)&temp_ip.ia_v4 :
+ (void *)&temp_ip.ia_v6;
+ if (inet_pton(ip_ver, ipaddr, addrptr) == NULL) {
+ logerr("ilbd_get_svr_field: inet_pton failed");
+ return;
+ }
+
+ if (ip_ver == AF_INET) {
+ IN6_INADDR_TO_V4MAPPED(&(temp_ip.ia_v4), sgs_addr);
+ } else {
+ (void) memcpy(sgs_addr, &(temp_ip.ia_v6),
+ sizeof (struct in6_addr));
+ }
+
+ *sgs_flags = atoi(flagstr);
+ *min_port = atoi(strtok(portstr, "-"));
+ *min_port = ntohs(*min_port);
+ max_portstr = strtok(NULL, "-");
+ if (max_portstr != NULL) {
+ *max_port = atoi(max_portstr);
+ *max_port = ntohs(*max_port);
+ }
+}
+
+/*
+ * Convert the info of a server to its SCF string value representation.
+ * Argument value is assumed to be of size ILBD_MAX_VALUE_LEN.
+ */
+static void
+ilbd_srv_scf_val(ilbd_srv_t *srv, char *value)
+{
+ char ipstr[INET6_ADDRSTRLEN];
+ int ipver;
+
+ if (GET_AF(&srv->isv_addr) == AF_INET) {
+ struct in_addr v4_addr;
+
+ IN6_V4MAPPED_TO_INADDR(&srv->isv_addr, &v4_addr);
+ (void) inet_ntop(AF_INET, &v4_addr, ipstr, sizeof (ipstr));
+ ipver = AF_INET;
+ } else {
+ (void) inet_ntop(AF_INET6, &srv->isv_addr, ipstr,
+ sizeof (ipstr));
+ ipver = AF_INET6;
+ }
+ (void) snprintf(value, ILBD_MAX_VALUE_LEN, "%s;%d;%d-%d;%d",
+ ipstr, ipver, ntohs(srv->isv_minport), ntohs(srv->isv_maxport),
+ srv->isv_flags);
+}
+
+/* get the "ip:port:status" str of the #num server in the servergroup */
+ilb_status_t
+ilbd_get_svr_info(ilbd_sg_t *sg, int num, char *valstr, char *svrname)
+{
+ int i;
+ ilbd_srv_t *tmp_srv = NULL;
+
+ tmp_srv = list_head(&sg->isg_srvlist);
+ if (tmp_srv == NULL)
+ return (ILB_STATUS_ENOENT);
+
+ for (i = 0; i < num; i++)
+ tmp_srv = list_next(&sg->isg_srvlist, tmp_srv);
+
+ assert(tmp_srv != NULL);
+ if (valstr != NULL)
+ ilbd_srv_scf_val(tmp_srv, valstr);
+
+ if (svrname != NULL) {
+ (void) snprintf(svrname, ILBD_MAX_NAME_LEN, "server%d",
+ tmp_srv->isv_id);
+ }
+
+ return (ILB_STATUS_OK);
+}
+
+/* convert a struct in6_addr to valstr */
+ilb_status_t
+ilbd_scf_ip_to_str(uint16_t ipversion, struct in6_addr *addr,
+ scf_type_t *scftype, char *valstr)
+{
+ size_t vallen;
+ ilb_ip_addr_t ipaddr;
+ void *addrptr;
+
+ vallen = (ipversion == AF_INET) ? INET_ADDRSTRLEN :
+ INET6_ADDRSTRLEN;
+ if (scftype != NULL)
+ *scftype = (ipversion == AF_INET) ? SCF_TYPE_NET_ADDR_V4 :
+ SCF_TYPE_NET_ADDR_V6;
+
+ IP_COPY_IMPL_2_CLI(addr, &ipaddr);
+ addrptr = (ipversion == AF_INET) ?
+ (void *)&ipaddr.ia_v4 : (void *)&ipaddr.ia_v6;
+ (void) inet_ntop(ipversion, (void *)addrptr, valstr, vallen);
+ return (ILB_STATUS_OK);
+}
+
+/*
+ * This function takes a ilbd internal data struct and translate its value to
+ * scf value. The data struct is passed in within "data".
+ * Upon successful return, the scf val will be stored in "val" and the scf type
+ * will be returned in "scftype" if scftype != NULL, the number of values
+ * translated will be in "numval"
+ * If it failed, no data will be written to SCF
+ */
+static ilb_status_t
+ilbd_data_to_scfval(ilbd_scf_pg_type_t pg_type, ilbd_var_type_t type,
+ scf_handle_t *h, void *data, scf_value_t ***val, scf_type_t *scftype,
+ int *numval)
+{
+ scf_value_t *v, **varray = NULL;
+ int ret = ILB_STATUS_OK;
+ int i;
+ int scf_val_len = ILBD_MAX_VALUE_LEN;
+ char valstr[scf_val_len];
+ int valint;
+ uint8_t valbool = 0;
+ ilbd_rule_t *r_ent = NULL;
+ ilbd_sg_t *s_ent = NULL;
+ ilbd_hc_t *h_ent = NULL;
+
+ switch (pg_type) {
+ case ILBD_SCF_RULE:
+ r_ent = (ilbd_rule_t *)data;
+ break;
+ case ILBD_SCF_SG:
+ s_ent = (ilbd_sg_t *)data;
+ break;
+ case ILBD_SCF_HC:
+ h_ent = (ilbd_hc_t *)data;
+ break;
+ }
+
+ v = scf_value_create(h);
+ if (v == NULL)
+ return (ILB_STATUS_INTERNAL);
+
+ switch (type) {
+ case ILBD_RULE_STATUS:
+ valbool = r_ent->irl_flags & ILB_FLAGS_RULE_ENABLED;
+ break;
+ case ILBD_RULE_VIP:
+ ret = ilbd_scf_ip_to_str(r_ent->irl_ipversion, &r_ent->irl_vip,
+ scftype, valstr);
+ if (ret != ILB_STATUS_OK) {
+ scf_value_destroy(v);
+ return (ret);
+ }
+ break;
+ case ILBD_RULE_PROTO: {
+ struct protoent *protoent;
+
+ protoent = getprotobynumber(r_ent->irl_proto);
+ (void) strlcpy(valstr, protoent->p_name, sizeof (valstr));
+ break;
+ }
+ case ILBD_RULE_PORT:
+ (void) snprintf(valstr, sizeof (valstr), "%d-%d",
+ r_ent->irl_minport, r_ent->irl_maxport);
+ break;
+ case ILBD_RULE_ALGO:
+ ilbd_algo_to_str(r_ent->irl_algo, valstr);
+ break;
+ case ILBD_RULE_TOPO:
+ ilbd_topo_to_str(r_ent->irl_topo, valstr);
+ break;
+ case ILBD_RULE_NAT_STR:
+ ret = ilbd_scf_ip_to_str(r_ent->irl_ipversion,
+ &r_ent->irl_nat_src_start, scftype, valstr);
+ if (ret != ILB_STATUS_OK) {
+ scf_value_destroy(v);
+ return (ret);
+ }
+ break;
+ case ILBD_RULE_NAT_END:
+ ret = ilbd_scf_ip_to_str(r_ent->irl_ipversion,
+ &r_ent->irl_nat_src_end, scftype, valstr);
+ if (ret != ILB_STATUS_OK) {
+ scf_value_destroy(v);
+ return (ret);
+ }
+ break;
+ case ILBD_RULE_STI_MASK:
+ ret = ilbd_scf_ip_to_str(r_ent->irl_ipversion,
+ &r_ent->irl_stickymask, scftype, valstr);
+ if (ret != ILB_STATUS_OK) {
+ scf_value_destroy(v);
+ return (ret);
+ }
+ break;
+ case ILBD_RULE_SGNAME:
+ (void) strlcpy(valstr, r_ent->irl_sgname, sizeof (valstr));
+ break;
+ case ILBD_RULE_HCNAME:
+ if (r_ent->irl_hcname[0] != '\0')
+ (void) strlcpy(valstr, r_ent->irl_hcname,
+ sizeof (valstr));
+ else
+ bzero(valstr, ILBD_MAX_VALUE_LEN);
+ break;
+ case ILBD_RULE_HCPORT:
+ valint = r_ent->irl_hcport;
+ break;
+ case ILBD_RULE_HCPFLAG:
+ valint = r_ent->irl_hcpflag;
+ break;
+ case ILBD_RULE_DRAINTIME:
+ valint = r_ent->irl_conndrain;
+ break;
+ case ILBD_RULE_NAT_TO:
+ valint = r_ent->irl_nat_timeout;
+ break;
+ case ILBD_RULE_PERS_TO:
+ valint = r_ent->irl_sticky_timeout;
+ break;
+
+ case ILBD_SG_SERVER:
+ if (s_ent->isg_srvcount == 0) {
+ (void) strlcpy(valstr, "EMPTY_SERVERGROUP",
+ sizeof (valstr));
+ break;
+ }
+
+ varray = calloc(sizeof (*varray), s_ent->isg_srvcount);
+ if (varray == NULL) {
+ scf_value_destroy(v);
+ return (ILB_STATUS_ENOMEM);
+ }
+
+ for (i = 0; i < s_ent->isg_srvcount; i++) {
+ if (v == NULL) {
+ for (i--; i >= 0; i--)
+ scf_value_destroy(varray[i]);
+ free(varray);
+ return (ILB_STATUS_ENOMEM);
+ }
+
+ ret = ilbd_get_svr_info(s_ent, i, valstr, NULL);
+ if (ret != ILB_STATUS_OK) {
+ scf_value_destroy(v);
+ for (i--; i >= 0; i--)
+ scf_value_destroy(varray[i]);
+ free(varray);
+ return (ret);
+ }
+ (void) scf_value_set_astring(v, valstr);
+ varray[i] = v;
+ v = scf_value_create(h);
+ }
+ /* the last 'v' we created will go unused, so drop it */
+ scf_value_destroy(v);
+ *numval = s_ent->isg_srvcount;
+ *val = varray;
+ return (ret);
+ case ILBD_HC_TEST:
+ (void) strlcpy(valstr, h_ent->ihc_test, sizeof (valstr));
+ break;
+ case ILBD_HC_TIMEOUT:
+ valint = h_ent->ihc_timeout;
+ break;
+ case ILBD_HC_INTERVAL:
+ valint = h_ent->ihc_interval;
+ break;
+ case ILBD_HC_DEF_PING:
+ valbool = h_ent->ihc_def_ping;
+ break;
+ case ILBD_HC_COUNT:
+ valint = h_ent->ihc_count;
+ break;
+ }
+
+ switch (*scftype) {
+ case SCF_TYPE_BOOLEAN:
+ scf_value_set_boolean(v, valbool);
+ break;
+ case SCF_TYPE_ASTRING:
+ (void) scf_value_set_astring(v, valstr);
+ break;
+ case SCF_TYPE_INTEGER:
+ scf_value_set_integer(v, valint);
+ break;
+ case SCF_TYPE_NET_ADDR_V4:
+ (void) scf_value_set_from_string(v, SCF_TYPE_NET_ADDR_V4,
+ valstr);
+ break;
+ case SCF_TYPE_NET_ADDR_V6:
+ (void) scf_value_set_from_string(v, SCF_TYPE_NET_ADDR_V6,
+ valstr);
+ break;
+ }
+
+ varray = calloc(1, sizeof (*varray));
+ if (varray == NULL) {
+ scf_value_destroy(v);
+ return (ILB_STATUS_ENOMEM);
+ }
+ varray[0] = v;
+ *val = varray;
+ *numval = 1;
+
+ return (ret);
+}
+
+/*
+ * create a scf property group
+ */
+ilb_status_t
+ilbd_create_pg(ilbd_scf_pg_type_t pg_type, void *data)
+{
+ ilb_status_t ret;
+ char *pgname;
+ scf_propertygroup_t *pg = NULL;
+ scf_value_t **val;
+ scf_handle_t *h;
+ int scf_name_len = ILBD_MAX_NAME_LEN;
+ char scfpgname[scf_name_len];
+ int i, i_st, i_end;
+
+ switch (pg_type) {
+ case ILBD_SCF_RULE: {
+ ilbd_rule_t *r_ent = (ilbd_rule_t *)data;
+
+ pgname = r_ent->irl_name;
+ i_st = 0;
+ i_end = ILBD_RULE_VAR_NUM;
+ break;
+ }
+ case ILBD_SCF_SG: {
+ ilbd_sg_t *s_ent = (ilbd_sg_t *)data;
+
+ pgname = s_ent->isg_name;
+ i_st = ILBD_RULE_VAR_NUM;
+ i_end = ILBD_RULE_VAR_NUM + ILBD_SG_VAR_NUM;
+ break;
+ }
+ case ILBD_SCF_HC: {
+ ilbd_hc_t *h_ent = (ilbd_hc_t *)data;
+
+ pgname = h_ent->ihc_name;
+ i_st = ILBD_RULE_VAR_NUM + ILBD_SG_VAR_NUM;
+ i_end = ILBD_PROP_VAR_NUM;
+ break;
+ }
+ default:
+ logdebug("ilbd_create_pg: invalid pg type %d for pg %s",
+ pg_type, pgname);
+ return (ILB_STATUS_EINVAL);
+ }
+
+ ilbd_name_to_scfpgname(pg_type, pgname, scfpgname);
+
+ ret = ilbd_scf_retrieve_pg(scfpgname, &pg, B_TRUE);
+ if (ret != ILB_STATUS_OK)
+ return (ret);
+ h = scf_pg_handle(pg);
+
+ /* fill in props */
+ for (i = i_st; i < i_end; i++) {
+ int num, j;
+ int scf_name_len = ILBD_MAX_NAME_LEN;
+ char propname[scf_name_len];
+ scf_type_t scftype = prop_tbl[i].scf_proptype;
+
+ ret = ilbd_data_to_scfval(pg_type, prop_tbl[i].val_type, h,
+ data, &val, &scftype, &num);
+ if (ret != ILB_STATUS_OK)
+ goto done;
+
+ for (j = 0; j < num; j++) {
+ if (pg_type == ILBD_SCF_SG) {
+ ret = ilbd_get_svr_info(data, j, NULL,
+ propname);
+ if (ret == ILB_STATUS_ENOENT) {
+ (void) strlcpy(propname, "EMPTY_SERVER",
+ ILBD_MAX_NAME_LEN);
+ }
+ ret = ilbd_scf_set_prop(pg, propname,
+ scftype, val[j]);
+ } else {
+ ret = ilbd_scf_set_prop(pg,
+ prop_tbl[i].scf_propname, scftype, val[j]);
+ }
+ scf_value_destroy(val[j]);
+ }
+ free(val);
+ }
+
+done:
+ ilbd_scf_destroy(h, NULL, NULL, pg);
+ return (ret);
+}
+
+/*
+ * destroy a scf property group
+ */
+static ilb_status_t
+ilbd_scf_delete_pg(scf_propertygroup_t *pg)
+{
+ if (scf_pg_delete(pg) != 0)
+ return (ilbd_scf_err_to_ilb_err());
+ return (ILB_STATUS_OK);
+}
+
+/* sg can have same name as rule */
+ilb_status_t
+ilbd_destroy_pg(ilbd_scf_pg_type_t pg_t, const char *pgname)
+{
+ ilb_status_t ret;
+ scf_propertygroup_t *pg;
+ int scf_name_len = ILBD_MAX_NAME_LEN;
+ char scfname[scf_name_len];
+
+ ilbd_name_to_scfpgname(pg_t, pgname, scfname);
+
+ ret = ilbd_scf_retrieve_pg(scfname, &pg, B_FALSE);
+ if (ret != ILB_STATUS_EEXIST)
+ return (ret);
+
+ ret = ilbd_scf_delete_pg(pg);
+ ilbd_scf_destroy(scf_pg_handle(pg), NULL, NULL, pg);
+ return (ret);
+}
+
+/*
+ * Set named property to scf value specified. If property is new,
+ * create it.
+ */
+static ilb_status_t
+ilbd_scf_set_prop(scf_propertygroup_t *pg, const char *propname,
+ scf_type_t proptype, scf_value_t *val)
+{
+ scf_handle_t *h = NULL;
+ scf_property_t *prop = NULL;
+ scf_value_t *oldval = NULL;
+ scf_transaction_t *tx = NULL;
+ scf_transaction_entry_t *ent = NULL;
+ boolean_t new = B_FALSE;
+ ilb_status_t ret = ILB_STATUS_OK;
+ int commit_ret;
+
+ h = scf_pg_handle(pg);
+ if (h == NULL || propname == NULL)
+ return (ILB_STATUS_EINVAL);
+
+ ret = ilbd_scf_get_prop_val(pg, propname, &oldval);
+ if (oldval != NULL)
+ scf_value_destroy(oldval);
+ if (ret == ILB_STATUS_ENOENT)
+ new = B_TRUE;
+ else if (ret != ILB_STATUS_OK)
+ return (ret);
+
+ if ((prop = scf_property_create(h)) == NULL)
+ return (ilbd_scf_err_to_ilb_err());
+ if ((tx = scf_transaction_create(h)) == NULL ||
+ (ent = scf_entry_create(h)) == NULL) {
+ ret = ilbd_scf_err_to_ilb_err();
+ logdebug("ilbd_scf_set_prop: create scf transaction failed\n");
+ goto out;
+ }
+
+ if (scf_transaction_start(tx, pg) == -1) {
+ ret = ilbd_scf_err_to_ilb_err();
+ logdebug("ilbd_scf_set_prop: start scf transaction failed\n");
+ goto out;
+ }
+
+ if (new) {
+ if (scf_transaction_property_new(tx, ent, propname,
+ proptype) == -1) {
+ ret = ilbd_scf_err_to_ilb_err();
+ logdebug("ilbd_scf_set_prop: create scf prop failed\n");
+ goto out;
+ }
+ } else {
+ if (scf_transaction_property_change(tx, ent, propname, proptype)
+ == -1) {
+ ret = ilbd_scf_err_to_ilb_err();
+ logdebug("ilbd_scf_set_prop: change scf prop failed\n");
+ goto out;
+ }
+ }
+
+ if (scf_entry_add_value(ent, val) != 0) {
+ logdebug("ilbd_scf_set_prop: add scf entry failed\n");
+ ret = ilbd_scf_err_to_ilb_err();
+ goto out;
+ }
+
+ commit_ret = scf_transaction_commit(tx);
+ switch (commit_ret) {
+ case 1:
+ ret = ILB_STATUS_OK;
+ /* update pg here, so subsequent property setting succeeds */
+ (void) scf_pg_update(pg);
+ break;
+ case 0:
+ /* transaction failed due to not having most recent pg */
+ ret = ILB_STATUS_INUSE;
+ break;
+ default:
+ ret = ilbd_scf_err_to_ilb_err();
+ break;
+ }
+out:
+ if (tx != NULL)
+ scf_transaction_destroy(tx);
+ if (ent != NULL)
+ scf_entry_destroy(ent);
+ if (prop != NULL)
+ scf_property_destroy(prop);
+
+ return (ret);
+}
+
+/*
+ * get a prop's scf val
+ */
+static ilb_status_t
+ilbd_scf_get_prop_val(scf_propertygroup_t *pg, const char *propname,
+ scf_value_t **val)
+{
+ scf_handle_t *h = NULL;
+ scf_property_t *prop = NULL;
+ scf_value_t *value = NULL;
+ ilb_status_t ret = ILB_STATUS_OK;
+
+ h = scf_pg_handle(pg);
+ if (h == NULL || propname == NULL)
+ return (ILB_STATUS_EINVAL);
+
+ if ((prop = scf_property_create(h)) == NULL)
+ return (ilbd_scf_err_to_ilb_err());
+
+ if (scf_pg_get_property(pg, propname, prop) != 0) {
+ ret = ilbd_scf_err_to_ilb_err();
+ goto out;
+ }
+
+ if ((value = scf_value_create(h)) == NULL) {
+ ret = ilbd_scf_err_to_ilb_err();
+ goto out;
+ }
+
+ if (scf_property_get_value(prop, value) != 0) {
+ scf_value_destroy(value);
+ ret = ilbd_scf_err_to_ilb_err();
+ goto out;
+ }
+
+ *val = value;
+out:
+ if (prop != NULL)
+ scf_property_destroy(prop);
+
+ return (ret);
+}
+
+typedef struct ilbd_data
+{
+ union {
+ ilb_sg_info_t *sg_info;
+ ilb_hc_info_t *hc_info;
+ ilb_rule_info_t *rule_info;
+ } data;
+ ilbd_scf_pg_type_t pg_type; /* type of data */
+#define sg_data data.sg_info
+#define hc_data data.hc_info
+#define rule_data data.rule_info
+} ilbd_data_t;
+
+void
+ilbd_scf_str_to_ip(int ipversion, char *ipstr, struct in6_addr *addr)
+{
+ ilb_ip_addr_t ipaddr;
+ void *addrptr;
+
+ addrptr = (ipversion == AF_INET) ?
+ (void *)&ipaddr.ia_v4 : (void *)&ipaddr.ia_v6;
+ (void) inet_pton(ipversion, ipstr, addrptr);
+ if (ipversion == AF_INET) {
+ IN6_INADDR_TO_V4MAPPED(&(ipaddr.ia_v4), addr);
+ } else {
+ (void) memcpy(addr, &(ipaddr.ia_v6),
+ sizeof (struct in6_addr));
+ }
+}
+
+/*
+ * This function takes a scf value and writes it to the correct field of the
+ * corresponding data struct.
+ */
+static ilb_status_t
+ilbd_scfval_to_data(const char *propname, ilbd_var_type_t ilb_type,
+ scf_value_t *val, ilbd_data_t *ilb_data)
+{
+
+ scf_type_t scf_type = scf_value_type(val);
+ ilbd_scf_pg_type_t pg_type = ilb_data->pg_type;
+ int ret = 0;
+ ilb_rule_info_t *r_ent = NULL;
+ ilb_sg_info_t *s_ent = NULL;
+ ilb_hc_info_t *h_ent = NULL;
+ char ipstr[INET6_ADDRSTRLEN];
+ int scf_val_len = ILBD_MAX_VALUE_LEN;
+ char valstr[scf_val_len];
+ int64_t valint;
+ uint8_t valbool;
+ int ipversion;
+
+ switch (pg_type) {
+ case ILBD_SCF_RULE:
+ r_ent = ilb_data->rule_data;
+ break;
+ case ILBD_SCF_HC:
+ h_ent = ilb_data->hc_data;
+ break;
+ case ILBD_SCF_SG:
+ s_ent = ilb_data->sg_data;
+ break;
+ }
+
+ /* get scf value out */
+ switch (scf_type) {
+ case SCF_TYPE_NET_ADDR_V4:
+ if (scf_value_get_as_string_typed(val,
+ SCF_TYPE_NET_ADDR_V4, ipstr, INET_ADDRSTRLEN) < 0)
+ return (ILB_STATUS_INTERNAL);
+ ipversion = AF_INET;
+ break;
+ case SCF_TYPE_NET_ADDR_V6:
+ if (scf_value_get_as_string_typed(val,
+ SCF_TYPE_NET_ADDR_V6, ipstr, INET6_ADDRSTRLEN) < 0)
+ return (ILB_STATUS_INTERNAL);
+ ipversion = AF_INET6;
+ break;
+ case SCF_TYPE_BOOLEAN:
+ if (scf_value_get_boolean(val, &valbool) < 0)
+ return (ILB_STATUS_INTERNAL);
+ break;
+ case SCF_TYPE_ASTRING:
+ if (scf_value_get_astring(val, valstr, sizeof (valstr))
+ < 0)
+ return (ILB_STATUS_INTERNAL);
+ break;
+ case SCF_TYPE_INTEGER:
+ if (scf_value_get_integer(val, &valint) < 0)
+ return (ILB_STATUS_INTERNAL);
+ break;
+ default:
+ return (ILB_STATUS_INTERNAL);
+ }
+
+ ret = ILB_STATUS_OK;
+ switch (ilb_type) {
+ case ILBD_RULE_STATUS:
+ if (valbool)
+ r_ent->rl_flags |= ILB_FLAGS_RULE_ENABLED;
+ break;
+ case ILBD_RULE_VIP:
+ r_ent->rl_ipversion = ipversion;
+ ilbd_scf_str_to_ip(ipversion, ipstr, &r_ent->rl_vip);
+ break;
+ case ILBD_RULE_PROTO: {
+ struct protoent *protoent;
+
+ protoent = getprotobyname(valstr);
+ r_ent->rl_proto = protoent->p_proto;
+ break;
+ }
+ case ILBD_RULE_PORT: {
+ char *token1, *token2;
+
+ token1 = strtok(valstr, "-");
+ token2 = strtok(NULL, "-");
+ r_ent->rl_minport = atoi(token1);
+ r_ent->rl_maxport = atoi(token2);
+ break;
+ }
+ case ILBD_RULE_ALGO:
+ ilbd_scf_str_to_algo(&(r_ent->rl_algo), valstr);
+ break;
+ case ILBD_RULE_TOPO:
+ ilbd_scf_str_to_topo(&(r_ent->rl_topo), valstr);
+ break;
+ case ILBD_RULE_NAT_STR:
+ ilbd_scf_str_to_ip(ipversion, ipstr, &r_ent->rl_nat_src_start);
+ break;
+ case ILBD_RULE_NAT_END:
+ ilbd_scf_str_to_ip(ipversion, ipstr, &r_ent->rl_nat_src_end);
+ break;
+ case ILBD_RULE_STI_MASK:
+ ilbd_scf_str_to_ip(ipversion, ipstr, &r_ent->rl_stickymask);
+ if (ipversion == AF_INET) {
+ if (!IN6_IS_ADDR_V4MAPPED_ANY(&r_ent->rl_stickymask))
+ r_ent->rl_flags |= ILB_FLAGS_RULE_STICKY;
+ } else {
+ if (!IN6_IS_ADDR_UNSPECIFIED(&r_ent->rl_stickymask))
+ r_ent->rl_flags |= ILB_FLAGS_RULE_STICKY;
+ }
+ break;
+ case ILBD_RULE_SGNAME:
+ (void) strlcpy(r_ent->rl_sgname, valstr,
+ sizeof (r_ent->rl_sgname));
+ break;
+ case ILBD_RULE_HCNAME:
+ (void) strlcpy(r_ent->rl_hcname, valstr,
+ sizeof (r_ent->rl_hcname));
+ break;
+ case ILBD_RULE_HCPORT:
+ r_ent->rl_hcport = valint;
+ break;
+ case ILBD_RULE_HCPFLAG:
+ r_ent->rl_hcpflag = valint;
+ break;
+ case ILBD_RULE_DRAINTIME:
+ r_ent->rl_conndrain = valint;
+ break;
+ case ILBD_RULE_NAT_TO:
+ r_ent->rl_nat_timeout = valint;
+ break;
+ case ILBD_RULE_PERS_TO:
+ r_ent->rl_sticky_timeout = valint;
+ break;
+
+ case ILBD_SG_SERVER: {
+ int svr_cnt = s_ent->sg_srvcount;
+
+ /* found a new server, increase the svr count of this sg */
+ s_ent->sg_srvcount++;
+
+ /*
+ * valstr contains information of one server in the servergroup
+ * valstr is in the format of "ip:minport-maxport:enable"
+ */
+ s_ent = realloc(s_ent, sizeof (ilb_sg_info_t) +
+ s_ent->sg_srvcount * sizeof (ilb_sg_srv_t));
+
+ /* sgs_srvID is the sg name, leave it blank */
+ /*
+ * sgs_id is the digit in propname, propname is in a format of
+ * "server" + the digital serverID. We get the serverID by
+ * reading from the 7th char of propname.
+ */
+ s_ent->sg_servers[svr_cnt].sgs_id = atoi(&propname[6]);
+
+ ilbd_get_svr_field(valstr,
+ &s_ent->sg_servers[svr_cnt].sgs_addr,
+ &s_ent->sg_servers[svr_cnt].sgs_minport,
+ &s_ent->sg_servers[svr_cnt].sgs_maxport,
+ &s_ent->sg_servers[svr_cnt].sgs_flags);
+ ilb_data->sg_data = s_ent;
+
+ break;
+ }
+ case ILBD_HC_TEST:
+ (void) strlcpy(h_ent->hci_test, valstr,
+ sizeof (h_ent->hci_test));
+ break;
+ case ILBD_HC_TIMEOUT:
+ h_ent->hci_timeout = valint;
+ break;
+ case ILBD_HC_INTERVAL:
+ h_ent->hci_interval = valint;
+ break;
+ case ILBD_HC_DEF_PING:
+ h_ent->hci_def_ping = valbool;
+ break;
+ case ILBD_HC_COUNT:
+ h_ent->hci_count = valint;
+ break;
+ case ILBD_VAR_INVALID:
+ /*
+ * An empty server group is represented by an invalid
+ * SCF property. So when loading a server group, this
+ * case can be hit. But it should happen only for this
+ * single case. So if it happens in another case, move
+ * the service into maintenance mode.
+ */
+ if (pg_type != ILBD_SCF_SG || scf_type != SCF_TYPE_ASTRING) {
+ logerr("%s: invalid ilb type", __func__);
+ (void) smf_maintain_instance(ILB_FMRI, SMF_IMMEDIATE);
+ } else {
+ logdebug("%s: invalid ilb type", __func__);
+ }
+ break;
+ }
+
+ return (ret);
+}
+
+static ilbd_var_type_t
+ilbd_name_to_valtype(const char *prop_name)
+{
+ int i;
+
+ for (i = 0; i < ILBD_PROP_VAR_NUM; i++)
+ if (strncmp(prop_name, prop_tbl[i].scf_propname,
+ strlen(prop_tbl[i].scf_propname)) == 0)
+ return (prop_tbl[i].val_type);
+
+ logdebug("ilbd_name_to_valtype: couldn't find prop %s", prop_name);
+ return (ILBD_VAR_INVALID);
+}
+
+/* callback for pg_walk_prop, arg is ilbd_data_t */
+static ilb_status_t
+ilbd_scf_load_prop(scf_propertygroup_t *pg, const char *prop_name, void *arg)
+{
+ scf_handle_t *h;
+ scf_value_t *val;
+ ilb_status_t ret;
+ ilbd_data_t *ilb_data = (ilbd_data_t *)arg;
+ ilbd_var_type_t val_type = ilbd_name_to_valtype(prop_name);
+
+ h = scf_pg_handle(pg);
+ if (h == NULL)
+ return (ILB_STATUS_EINVAL);
+
+ ret = ilbd_scf_get_prop_val(pg, prop_name, &val);
+ if (ret == ILB_STATUS_ENOENT)
+ return (ILB_STATUS_OK);
+ else if (ret != ILB_STATUS_OK)
+ return (ret);
+
+ /*
+ * Load value to ilb_data.
+ */
+ ret = ilbd_scfval_to_data(prop_name, val_type, val, ilb_data);
+
+out:
+ if (val != NULL)
+ scf_value_destroy(val);
+
+ return (ret);
+}
+
+/*
+ * walk properties in one prop group, arg is ilbd_data
+ * cb is ilbd_scf_load_prop()
+ */
+static ilb_status_t
+ilbd_scf_pg_walk_props(scf_propertygroup_t *pg,
+ ilb_status_t (*cb)(scf_propertygroup_t *, const char *, void *),
+ void *arg)
+{
+ scf_handle_t *h;
+ scf_iter_t *propiter;
+ scf_property_t *prop;
+ int scf_name_len = ILBD_MAX_NAME_LEN;
+ char prop_name[scf_name_len];
+ ilb_status_t ret = ILB_STATUS_OK;
+ int scf_ret = -1;
+
+ h = scf_pg_handle(pg);
+ if (h == NULL)
+ return (ILB_STATUS_EINVAL);
+
+ prop = scf_property_create(h);
+ propiter = scf_iter_create(h);
+ if (prop == NULL || propiter == NULL)
+ goto out;
+
+ if (scf_iter_pg_properties(propiter, pg) != 0)
+ goto out;
+
+ while ((scf_ret = scf_iter_next_property(propiter, prop)) == 1) {
+ if (scf_property_get_name(prop, prop_name, sizeof (prop_name))
+ < 0) {
+ ret = ilbd_scf_err_to_ilb_err();
+ goto out;
+ }
+ ret = cb(pg, prop_name, arg);
+ if (ret != ILB_STATUS_OK)
+ break;
+ }
+out:
+ if (scf_ret == -1)
+ ret = ilbd_scf_err_to_ilb_err();
+ if (prop != NULL)
+ scf_property_destroy(prop);
+ if (propiter != NULL)
+ scf_iter_destroy(propiter);
+
+ return (ret);
+}
+
+/* cbs are libd_create_X */
+static ilb_status_t
+ilbd_scf_instance_walk_pg(scf_instance_t *inst,
+ ilbd_scf_pg_type_t pg_type,
+ ilb_status_t (*cb)(void *, int, struct passwd *, ucred_t *),
+ void *arg1, void *arg2)
+{
+ int scf_ret;
+ ilb_status_t ret;
+ scf_handle_t *h;
+ scf_iter_t *pgiter;
+ scf_propertygroup_t *newpg;
+ int port = *((int *)arg1);
+
+ if (inst == NULL)
+ return (ILB_STATUS_EINVAL);
+
+ h = scf_instance_handle(inst);
+ if (h == NULL)
+ return (ILB_STATUS_EINVAL);
+
+ if ((newpg = scf_pg_create(h)) == NULL)
+ return (ilbd_scf_err_to_ilb_err());
+
+ if ((pgiter = scf_iter_create(h)) == NULL) {
+ scf_pg_destroy(newpg);
+ return (ilbd_scf_err_to_ilb_err());
+ }
+
+ if ((scf_ret = scf_iter_instance_pgs(pgiter, inst)) < 0)
+ goto out;
+
+ while ((scf_ret = scf_iter_next_pg(pgiter, newpg)) > 0) {
+ ilbd_data_t data;
+ int scf_name_len = ILBD_MAX_NAME_LEN;
+ char pg_name[scf_name_len];
+
+ if (scf_pg_get_name(newpg, pg_name, sizeof (pg_name)) < 0) {
+ ret = ilbd_scf_err_to_ilb_err();
+ goto out;
+ }
+
+ /*
+ * if pg name indicates it's a ilb configuration, walk its prop
+ */
+ data.pg_type = pg_type;
+ data.hc_data = NULL;
+ data.sg_data = NULL;
+ data.rule_data = NULL;
+
+ switch (pg_type) {
+ case ILBD_SCF_RULE:
+ if (strncmp(ILBD_PG_NAME_RULE, pg_name,
+ strlen(ILBD_PG_NAME_RULE)) == 0) {
+ data.rule_data = calloc(1,
+ sizeof (ilb_rule_info_t));
+ if (data.rule_data == NULL) {
+ ret = ILB_STATUS_ENOMEM;
+ goto out;
+ }
+ ret = ilbd_scf_pg_walk_props(newpg,
+ ilbd_scf_load_prop, &data);
+ if (ret != ILB_STATUS_OK)
+ goto out;
+ assert(data.rule_data != NULL);
+ /* set rule name */
+ (void) strlcpy(data.rule_data->rl_name,
+ &pg_name[strlen(ILBD_PG_NAME_RULE)],
+ sizeof (data.rule_data->rl_name));
+
+ ret = cb(data.rule_data, port, arg2, NULL);
+ free(data.rule_data);
+ if (ret != ILB_STATUS_OK)
+ goto out;
+ }
+ break;
+ case ILBD_SCF_SG:
+ if (strncmp(ILBD_PG_NAME_SG, pg_name,
+ strlen(ILBD_PG_NAME_SG)) == 0) {
+ data.sg_data = calloc(1,
+ sizeof (ilb_sg_info_t));
+ if (data.sg_data == NULL)
+ return (ILB_STATUS_ENOMEM);
+ ret = ilbd_scf_pg_walk_props(newpg,
+ ilbd_scf_load_prop, &data);
+ if (ret != ILB_STATUS_OK) {
+ free(data.sg_data);
+ goto out;
+ }
+ assert(data.sg_data != NULL);
+ /* set sg name */
+ (void) strlcpy(data.sg_data->sg_name,
+ &pg_name[strlen(ILBD_PG_NAME_SG)],
+ sizeof (data.sg_data->sg_name));
+ ret = cb(data.sg_data, port, arg2, NULL);
+ if (ret != ILB_STATUS_OK) {
+ free(data.sg_data);
+ goto out;
+ }
+ /*
+ * create a servergroup is two-step operation.
+ * 1. create an empty servergroup.
+ * 2. add server(s) to the group.
+ *
+ * since we are here from:
+ * main_loop()->ilbd_read_config()->
+ * ilbd_walk_sg_pgs()
+ * there is no cli to send. So in this
+ * path auditing will skip the
+ * adt_set_from_ucred() check
+ */
+ if (data.sg_data->sg_srvcount > 0) {
+ ret = ilbd_add_server_to_group(
+ data.sg_data, port, NULL, NULL);
+ if (ret != ILB_STATUS_OK) {
+ free(data.sg_data);
+ goto out;
+ }
+ free(data.sg_data);
+ }
+ }
+ break;
+ case ILBD_SCF_HC:
+ if (strncmp(ILBD_PG_NAME_HC, pg_name,
+ strlen(ILBD_PG_NAME_HC)) == 0) {
+ data.hc_data = calloc(1,
+ sizeof (ilb_hc_info_t));
+ if (data.hc_data == NULL)
+ return (ILB_STATUS_ENOMEM);
+ ret = ilbd_scf_pg_walk_props(newpg,
+ ilbd_scf_load_prop, &data);
+ if (ret != ILB_STATUS_OK)
+ goto out;
+ assert(data.hc_data != NULL);
+ /* set hc name */
+ (void) strlcpy(data.hc_data->hci_name,
+ &pg_name[strlen(ILBD_PG_NAME_HC)],
+ sizeof (data.hc_data->hci_name));
+ ret = cb(data.hc_data, port, arg2, NULL);
+ free(data.hc_data);
+ if (ret != ILB_STATUS_OK)
+ goto out;
+ }
+ break;
+ }
+ }
+
+out:
+ if (scf_ret < 0)
+ ret = ilbd_scf_err_to_ilb_err();
+ scf_pg_destroy(newpg);
+ scf_iter_destroy(pgiter);
+ return (ret);
+}
+
+typedef ilb_status_t (*ilbd_scf_walker_fn)(void *, int, struct passwd *,
+ ucred_t *);
+
+ilb_status_t
+ilbd_walk_rule_pgs(ilb_status_t (*func)(ilb_rule_info_t *, int,
+ const struct passwd *, ucred_t *), void *arg1, void *arg2)
+{
+ scf_instance_t *inst;
+ scf_handle_t *h;
+ scf_service_t *svc;
+ ilb_status_t ret;
+
+ ret = ilbd_scf_get_inst(&h, &svc, &inst);
+ if (ret != ILB_STATUS_OK)
+ return (ret);
+
+ /* get rule prop group, transfer it to ilb_lrule_info_t */
+ ret = ilbd_scf_instance_walk_pg(inst, ILBD_SCF_RULE,
+ (ilbd_scf_walker_fn)func, arg1, arg2);
+ ilbd_scf_destroy(h, svc, inst, NULL);
+ return (ret);
+}
+
+ilb_status_t
+ilbd_walk_sg_pgs(ilb_status_t (*func)(ilb_sg_info_t *, int,
+ const struct passwd *, ucred_t *), void *arg1, void *arg2)
+{
+ scf_instance_t *inst;
+ scf_handle_t *h;
+ scf_service_t *svc;
+ ilb_status_t ret;
+
+ ret = ilbd_scf_get_inst(&h, &svc, &inst);
+ if (ret != ILB_STATUS_OK)
+ return (ret);
+
+ ret = ilbd_scf_instance_walk_pg(inst, ILBD_SCF_SG,
+ (ilbd_scf_walker_fn)func, arg1, arg2);
+ ilbd_scf_destroy(h, svc, inst, NULL);
+ return (ret);
+}
+
+ilb_status_t
+ilbd_walk_hc_pgs(ilb_status_t (*func)(const ilb_hc_info_t *, int,
+ const struct passwd *, ucred_t *), void *arg1, void *arg2)
+{
+ scf_instance_t *inst;
+ scf_handle_t *h;
+ scf_service_t *svc;
+ ilb_status_t ret;
+
+ ret = ilbd_scf_get_inst(&h, &svc, &inst);
+ if (ret != ILB_STATUS_OK)
+ return (ret);
+
+ ret = ilbd_scf_instance_walk_pg(inst, ILBD_SCF_HC,
+ (ilbd_scf_walker_fn)func, arg1, arg2);
+ ilbd_scf_destroy(h, svc, inst, NULL);
+ return (ret);
+}
+
+ilb_status_t
+ilbd_change_prop(ilbd_scf_pg_type_t pg_type, const char *pg_name,
+ const char *prop_name, void *new_val)
+{
+ int ret;
+ scf_propertygroup_t *scfpg = NULL;
+ int scf_name_len = ILBD_MAX_NAME_LEN;
+ char scf_pgname[scf_name_len];
+ scf_type_t scftype;
+ scf_value_t *scfval;
+ scf_handle_t *h;
+
+ ilbd_name_to_scfpgname(pg_type, pg_name, scf_pgname);
+ ret = ilbd_scf_retrieve_pg(scf_pgname, &scfpg, B_FALSE);
+ if (ret != ILB_STATUS_EEXIST)
+ return (ret);
+
+ assert(scfpg != NULL);
+
+ h = scf_pg_handle(scfpg);
+ if (h == NULL) {
+ ret = ILB_STATUS_EINVAL;
+ goto done;
+ }
+
+ if ((scfval = scf_value_create(h)) == NULL) {
+ ret = ILB_STATUS_ENOMEM;
+ goto done;
+ }
+
+ if (pg_type == ILBD_SCF_RULE) {
+ scftype = SCF_TYPE_BOOLEAN;
+ scf_value_set_boolean(scfval, *(boolean_t *)new_val);
+ } else if (pg_type == ILBD_SCF_SG) {
+ scftype = SCF_TYPE_ASTRING;
+ (void) scf_value_set_astring(scfval, (char *)new_val);
+ }
+ ret = ilbd_scf_set_prop(scfpg, prop_name, scftype, scfval);
+
+done:
+ if (scf_pg_handle(scfpg) != NULL)
+ scf_handle_destroy(scf_pg_handle(scfpg));
+ if (scfpg != NULL)
+ scf_pg_destroy(scfpg);
+ if (scfval != NULL)
+ scf_value_destroy(scfval);
+ return (ret);
+}
+
+/*
+ * Update the persistent configuration with a new server, srv, added to a
+ * server group, sg.
+ */
+ilb_status_t
+ilbd_scf_add_srv(ilbd_sg_t *sg, ilbd_srv_t *srv)
+{
+ scf_propertygroup_t *pg;
+ scf_handle_t *h;
+ scf_value_t *val;
+ ilb_status_t ret;
+ int scf_name_len = ILBD_MAX_NAME_LEN;
+ char buf[scf_name_len];
+ char propname[scf_name_len];
+
+ ilbd_name_to_scfpgname(ILBD_SCF_SG, sg->isg_name, buf);
+ ret = ilbd_scf_retrieve_pg(buf, &pg, B_FALSE);
+ /*
+ * The server group does not exist in persistent storage. This
+ * cannot happen. Should probably transition the service to
+ * maintenance since it should be there.
+ */
+ if (ret != ILB_STATUS_EEXIST) {
+ logerr("ilbd_scf_add_srv: SCF update failed - entering"
+ " maintenance mode");
+ (void) smf_maintain_instance(ILB_FMRI, SMF_IMMEDIATE);
+ return (ILB_STATUS_INTERNAL);
+ }
+
+ if ((h = scf_pg_handle(pg)) == NULL) {
+ ilbd_scf_destroy(NULL, NULL, NULL, pg);
+ return (ilbd_scf_err_to_ilb_err());
+ }
+
+ if ((val = scf_value_create(h)) == NULL) {
+ ilbd_scf_destroy(h, NULL, NULL, pg);
+ return (ILB_STATUS_ENOMEM);
+ }
+ ilbd_srv_scf_val(srv, buf);
+ (void) scf_value_set_astring(val, buf);
+ (void) snprintf(propname, sizeof (propname), "server%d", srv->isv_id);
+ ret = ilbd_scf_set_prop(pg, propname, SCF_TYPE_ASTRING, val);
+
+ ilbd_scf_destroy(h, NULL, NULL, pg);
+ scf_value_destroy(val);
+
+ return (ret);
+}
+
+/*
+ * Delete a server, srv, of a server group, sg, from the persistent
+ * configuration.
+ */
+ilb_status_t
+ilbd_scf_del_srv(ilbd_sg_t *sg, ilbd_srv_t *srv)
+{
+ ilb_status_t ret;
+ scf_propertygroup_t *pg;
+ scf_handle_t *h;
+ int scf_name_len = ILBD_MAX_NAME_LEN;
+ char buf[scf_name_len];
+ scf_transaction_t *tx = NULL;
+ scf_transaction_entry_t *entry = NULL;
+
+ ilbd_name_to_scfpgname(ILBD_SCF_SG, sg->isg_name, buf);
+ ret = ilbd_scf_retrieve_pg(buf, &pg, B_FALSE);
+ /*
+ * The server group does not exist in persistent storage. This
+ * cannot happen. THe caller of this function puts service in
+ * maintenance mode.
+ */
+ if (ret != ILB_STATUS_EEXIST)
+ return (ILB_STATUS_INTERNAL);
+ ret = ILB_STATUS_OK;
+
+ if ((h = scf_pg_handle(pg)) == NULL) {
+ logdebug("ilbd_scf_del_srv: scf_pg_handle: %s\n",
+ scf_strerror(scf_error()));
+ ilbd_scf_destroy(NULL, NULL, NULL, pg);
+ return (ilbd_scf_err_to_ilb_err());
+ }
+
+ if ((tx = scf_transaction_create(h)) == NULL ||
+ (entry = scf_entry_create(h)) == NULL) {
+ logdebug("ilbd_scf_del_srv: create scf transaction failed: "
+ "%s\n", scf_strerror(scf_error()));
+ ret = ilbd_scf_err_to_ilb_err();
+ goto out;
+ }
+
+ (void) snprintf(buf, sizeof (buf), "server%d", srv->isv_id);
+
+ if (scf_transaction_start(tx, pg) == -1) {
+ logdebug("ilbd_scf_set_prop: start scf transaction failed: "
+ "%s\n", scf_strerror(scf_error()));
+ ret = ilbd_scf_err_to_ilb_err();
+ goto out;
+ }
+ if (scf_transaction_property_delete(tx, entry, buf) == -1) {
+ logdebug("ilbd_scf_set_prop: delete property failed: %s\n",
+ scf_strerror(scf_error()));
+ ret = ilbd_scf_err_to_ilb_err();
+ goto out;
+ }
+ if (scf_transaction_commit(tx) != 1) {
+ logdebug("ilbd_scf_set_prop: commit transaction failed: %s\n",
+ scf_strerror(scf_error()));
+ ret = ilbd_scf_err_to_ilb_err();
+ }
+
+out:
+ if (entry != NULL)
+ scf_entry_destroy(entry);
+ if (tx != NULL)
+ scf_transaction_destroy(tx);
+ ilbd_scf_destroy(h, NULL, NULL, pg);
+
+ return (ret);
+}
diff --git a/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_sg.c b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_sg.c
new file mode 100644
index 0000000000..c308dc5525
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_sg.c
@@ -0,0 +1,1644 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <stdlib.h>
+#include <strings.h>
+#include <stddef.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/list.h>
+#include <assert.h>
+#include <errno.h>
+#include <libilb.h>
+#include <net/if.h>
+#include <inet/ilb.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include "libilb_impl.h"
+#include "ilbd.h"
+
+typedef enum {
+ not_searched,
+ stop_found,
+ cont_search,
+ fail_search
+} srch_ind_t;
+
+static list_t ilbd_sg_hlist;
+
+static ilb_status_t i_delete_srv(ilbd_sg_t *, ilbd_srv_t *, int);
+static void i_ilbd_free_srvID(ilbd_sg_t *, int32_t);
+
+/* Last parameter to pass to i_find_srv(), specifying the matching mode */
+#define MODE_ADDR 1
+#define MODE_SRVID 2
+
+static ilbd_srv_t *i_find_srv(list_t *, ilb_sg_srv_t *, int);
+
+void
+i_setup_sg_hlist(void)
+{
+ list_create(&ilbd_sg_hlist, sizeof (ilbd_sg_t),
+ offsetof(ilbd_sg_t, isg_link));
+}
+
+/*
+ * allocate storage for a daemon-internal server group, init counters
+ */
+static ilbd_sg_t *
+i_ilbd_alloc_sg(char *name)
+{
+ ilbd_sg_t *d_sg;
+
+ d_sg = calloc(sizeof (*d_sg), 1);
+ if (d_sg == NULL)
+ goto out;
+
+ (void) strlcpy(d_sg->isg_name, name, sizeof (d_sg->isg_name));
+
+ list_create(&d_sg->isg_srvlist, sizeof (ilbd_srv_t),
+ offsetof(ilbd_srv_t, isv_srv_link));
+ list_create(&d_sg->isg_rulelist, sizeof (ilbd_rule_t),
+ offsetof(ilbd_rule_t, irl_sglink));
+
+ list_insert_tail(&ilbd_sg_hlist, d_sg);
+out:
+ return (d_sg);
+}
+
+static ilb_status_t
+i_ilbd_save_sg(ilbd_sg_t *d_sg, ilbd_scf_cmd_t scf_cmd, const char *prop_name,
+ char *valstr)
+{
+ switch (scf_cmd) {
+ case ILBD_SCF_CREATE:
+ return (ilbd_create_pg(ILBD_SCF_SG, (void *)d_sg));
+ case ILBD_SCF_DESTROY:
+ return (ilbd_destroy_pg(ILBD_SCF_SG, d_sg->isg_name));
+ case ILBD_SCF_ENABLE_DISABLE:
+ if (prop_name == NULL)
+ return (ILB_STATUS_EINVAL);
+ return (ilbd_change_prop(ILBD_SCF_SG, d_sg->isg_name,
+ prop_name, valstr));
+ default:
+ logdebug("i_ilbd_save_sg: invalid scf cmd %d", scf_cmd);
+ return (ILB_STATUS_EINVAL);
+ }
+}
+
+ilb_status_t
+i_attach_rule2sg(ilbd_sg_t *sg, ilbd_rule_t *irl)
+{
+ /* assert: the same rule is attached to any sg only once */
+ list_insert_tail(&sg->isg_rulelist, irl);
+ return (ILB_STATUS_OK);
+}
+
+static void
+i_ilbd_free_sg(ilbd_sg_t *sg)
+{
+ ilbd_srv_t *tmp_srv;
+
+ if (sg == NULL)
+ return;
+ list_remove(&ilbd_sg_hlist, sg);
+ while ((tmp_srv = list_remove_tail(&sg->isg_srvlist)) != NULL) {
+ i_ilbd_free_srvID(sg, tmp_srv->isv_id);
+ free(tmp_srv);
+ sg->isg_srvcount--;
+ }
+ free(sg);
+}
+
+ilbd_sg_t *
+i_find_sg_byname(const char *name)
+{
+ ilbd_sg_t *sg;
+
+ /* find position of sg in list */
+ for (sg = list_head(&ilbd_sg_hlist); sg != NULL;
+ sg = list_next(&ilbd_sg_hlist, sg)) {
+ if (strncmp(sg->isg_name, name, sizeof (sg->isg_name)) == 0)
+ return (sg);
+ }
+ return (sg);
+}
+
+/*
+ * Generates an audit record for enable-server, disable-server, remove-server
+ * delete-servergroup, create-servergroup and add-server subcommands.
+ */
+static void
+ilbd_audit_server_event(audit_sg_event_data_t *data,
+ ilbd_cmd_t cmd, ilb_status_t rc, ucred_t *ucredp)
+{
+ adt_session_data_t *ah;
+ adt_event_data_t *event;
+ au_event_t flag;
+ int audit_error;
+
+ if ((ucredp == NULL) && ((cmd == ILBD_ADD_SERVER_TO_GROUP) ||
+ (cmd == ILBD_CREATE_SERVERGROUP))) {
+ /*
+ * We came here from the path where ilbd is
+ * incorporating the ILB configuration from
+ * SCF. In that case, we skip auditing
+ */
+ return;
+ }
+
+ if (adt_start_session(&ah, NULL, 0) != 0) {
+ logerr("ilbd_audit_server_event: adt_start_session failed");
+ exit(EXIT_FAILURE);
+ }
+
+ if (adt_set_from_ucred(ah, ucredp, ADT_NEW) != 0) {
+ (void) adt_end_session(ah);
+ logerr("ilbd_audit_server_event: adt_set_from_ucred failed");
+ exit(EXIT_FAILURE);
+ }
+
+ if (cmd == ILBD_ENABLE_SERVER)
+ flag = ADT_ilb_enable_server;
+ else if (cmd == ILBD_DISABLE_SERVER)
+ flag = ADT_ilb_disable_server;
+ else if (cmd == ILBD_REM_SERVER_FROM_GROUP)
+ flag = ADT_ilb_remove_server;
+ else if (cmd == ILBD_ADD_SERVER_TO_GROUP)
+ flag = ADT_ilb_add_server;
+ else if (cmd == ILBD_CREATE_SERVERGROUP)
+ flag = ADT_ilb_create_servergroup;
+ else if (cmd == ILBD_DESTROY_SERVERGROUP)
+ flag = ADT_ilb_delete_servergroup;
+
+ if ((event = adt_alloc_event(ah, flag)) == NULL) {
+ logerr("ilbd_audit_server_event: adt_alloc_event failed");
+ exit(EXIT_FAILURE);
+ }
+ (void) memset((char *)event, 0, sizeof (adt_event_data_t));
+
+ switch (cmd) {
+ case ILBD_ENABLE_SERVER:
+ event->adt_ilb_enable_server.auth_used =
+ NET_ILB_ENABLE_AUTH;
+ event->adt_ilb_enable_server.server_id =
+ data->ed_serverid;
+ event->adt_ilb_enable_server.server_ipaddress =
+ data->ed_server_address;
+ break;
+ case ILBD_DISABLE_SERVER:
+ event->adt_ilb_disable_server.auth_used =
+ NET_ILB_ENABLE_AUTH;
+ event->adt_ilb_disable_server.server_id =
+ data->ed_serverid;
+ event->adt_ilb_disable_server.server_ipaddress =
+ data->ed_server_address;
+ break;
+ case ILBD_REM_SERVER_FROM_GROUP:
+ event->adt_ilb_remove_server.auth_used =
+ NET_ILB_CONFIG_AUTH;
+ event->adt_ilb_remove_server.server_id =
+ data->ed_serverid;
+ event->adt_ilb_remove_server.server_group = data->ed_sgroup;
+ event->adt_ilb_remove_server.server_ipaddress =
+ data->ed_server_address;
+ break;
+ case ILBD_CREATE_SERVERGROUP:
+ event->adt_ilb_create_servergroup.auth_used =
+ NET_ILB_CONFIG_AUTH;
+ event->adt_ilb_create_servergroup.server_group =
+ data->ed_sgroup;
+ break;
+ case ILBD_ADD_SERVER_TO_GROUP:
+ event->adt_ilb_add_server.auth_used =
+ NET_ILB_CONFIG_AUTH;
+ event->adt_ilb_add_server.server_ipaddress =
+ data->ed_server_address;
+ event->adt_ilb_add_server.server_id =
+ data->ed_serverid;
+ event->adt_ilb_add_server.server_group =
+ data->ed_sgroup;
+ event->adt_ilb_add_server.server_minport =
+ ntohs(data->ed_minport);
+ event->adt_ilb_add_server.server_maxport =
+ ntohs(data->ed_maxport);
+ break;
+ case ILBD_DESTROY_SERVERGROUP:
+ event->adt_ilb_delete_servergroup.auth_used =
+ NET_ILB_CONFIG_AUTH;
+ event->adt_ilb_delete_servergroup.server_group =
+ data->ed_sgroup;
+ break;
+ }
+
+ /* Fill in success/failure */
+ if (rc == ILB_STATUS_OK) {
+ if (adt_put_event(event, ADT_SUCCESS, ADT_SUCCESS) != 0) {
+ logerr("ilbd_audit_server_event:"
+ " adt_put_event failed");
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ audit_error = ilberror2auditerror(rc);
+ if (adt_put_event(event, ADT_FAILURE, audit_error) != 0) {
+ logerr("ilbd_audit_server_event:"
+ " adt_put_event failed");
+ exit(EXIT_FAILURE);
+ }
+ }
+ adt_free_event(event);
+ (void) adt_end_session(ah);
+}
+
+ilb_status_t
+ilbd_destroy_sg(const char *sg_name, const struct passwd *ps,
+ ucred_t *ucredp)
+{
+ ilb_status_t rc;
+ ilbd_sg_t *tmp_sg;
+ audit_sg_event_data_t audit_sg_data;
+
+ (void) memset(&audit_sg_data, 0, sizeof (audit_sg_event_data_t));
+ audit_sg_data.ed_sgroup = (char *)sg_name;
+
+ rc = ilbd_check_client_config_auth(ps);
+ if (rc != ILB_STATUS_OK) {
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_DESTROY_SERVERGROUP, rc, ucredp);
+ return (rc);
+ }
+
+ tmp_sg = i_find_sg_byname(sg_name);
+ if (tmp_sg == NULL) {
+ logdebug("ilbd_destroy_sg: cannot find specified server"
+ " group %s", sg_name);
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_DESTROY_SERVERGROUP, ILB_STATUS_SGUNAVAIL, ucredp);
+ return (ILB_STATUS_SGUNAVAIL);
+ }
+
+ /*
+ * we only destroy SGs that don't have any rules associated with
+ * them anymore.
+ */
+ if (list_head(&tmp_sg->isg_rulelist) != NULL) {
+ logdebug("ilbd_destroy_sg: server group %s has rules"
+ " associated with it and thus cannot be"
+ " removed", tmp_sg->isg_name);
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_DESTROY_SERVERGROUP, ILB_STATUS_SGINUSE, ucredp);
+ return (ILB_STATUS_SGINUSE);
+ }
+
+ if (ps != NULL) {
+ rc = i_ilbd_save_sg(tmp_sg, ILBD_SCF_DESTROY, NULL, NULL);
+ if (rc != ILB_STATUS_OK) {
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_DESTROY_SERVERGROUP, rc, ucredp);
+ return (rc);
+ }
+ }
+ i_ilbd_free_sg(tmp_sg);
+ ilbd_audit_server_event(&audit_sg_data, ILBD_DESTROY_SERVERGROUP,
+ rc, ucredp);
+ return (rc);
+}
+
+/* ARGSUSED */
+/*
+ * Parameter ev_port is not used but has to have for read persistent configure
+ * ilbd_create_sg(), ilbd_create_hc() and ilbd_create_rule() are callbacks
+ * for ilbd_scf_instance_walk_pg() which requires the same signature.
+ */
+ilb_status_t
+ilbd_create_sg(ilb_sg_info_t *sg, int ev_port, const struct passwd *ps,
+ ucred_t *ucredp)
+{
+ ilb_status_t rc = ILB_STATUS_OK;
+ ilbd_sg_t *d_sg;
+ audit_sg_event_data_t audit_sg_data;
+
+ (void) memset(&audit_sg_data, 0, sizeof (audit_sg_event_data_t));
+ audit_sg_data.ed_sgroup = sg->sg_name;
+
+ if (ps != NULL) {
+ rc = ilbd_check_client_config_auth(ps);
+ if (rc != ILB_STATUS_OK) {
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_CREATE_SERVERGROUP, rc, ucredp);
+ return (rc);
+ }
+ }
+
+ if (i_find_sg_byname(sg->sg_name) != NULL) {
+ logdebug("ilbd_create_sg: server group %s already exists",
+ sg->sg_name);
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_CREATE_SERVERGROUP, ILB_STATUS_SGEXISTS, ucredp);
+ return (ILB_STATUS_SGEXISTS);
+ }
+
+ d_sg = i_ilbd_alloc_sg(sg->sg_name);
+ if (d_sg == NULL) {
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_CREATE_SERVERGROUP, ILB_STATUS_ENOMEM, ucredp);
+ return (ILB_STATUS_ENOMEM);
+ }
+
+ /*
+ * we've successfully created the sg in memory. Before we can
+ * return "success", we need to reflect this in persistent
+ * storage
+ */
+ if (ps != NULL) {
+ rc = i_ilbd_save_sg(d_sg, ILBD_SCF_CREATE, NULL, NULL);
+ if (rc != ILB_STATUS_OK) {
+ i_ilbd_free_sg(d_sg);
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_CREATE_SERVERGROUP, rc, ucredp);
+ return (rc);
+ }
+ }
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_CREATE_SERVERGROUP, rc, ucredp);
+ return (rc);
+}
+
+/*
+ * This function checks whether tsrv should/can be inserted before lsrv
+ * and does so if possible.
+ * We keep the list in sorted order so we don't have to search it
+ * in its entirety for overlap every time we insert a new server.
+ * Return code:
+ * stop_found: don't continue searching because we found a place
+ * cont_search: continue with next element in the list
+ * fail_search: search failed (caller translates to ILB_STATUS_EEXIST)
+ */
+static srch_ind_t
+i_test_and_insert(ilbd_srv_t *tsrv, ilbd_srv_t *lsrv, list_t *srvlist)
+{
+ struct in6_addr *t1, *l1;
+ int fnd;
+
+ t1 = &tsrv->isv_addr;
+ l1 = &lsrv->isv_addr;
+
+ if ((fnd = ilb_cmp_in6_addr(t1, l1, NULL)) == 1)
+ return (cont_search); /* search can continue */
+
+ if (fnd == 0) {
+ logdebug("i_test_and_insert: specified server already exists");
+ return (fail_search);
+ }
+ /* the list is kept in ascending order */
+ list_insert_before(srvlist, lsrv, tsrv);
+ return (stop_found);
+}
+
+
+/*
+ * copy a server description [ip1,ip2,port1,port2,srvID,flags]
+ */
+#define COPY_SERVER(src, dest) \
+ (dest)->sgs_addr = (src)->sgs_addr; \
+ (dest)->sgs_minport = (src)->sgs_minport; \
+ (dest)->sgs_maxport = (src)->sgs_maxport; \
+ (dest)->sgs_id = (src)->sgs_id; \
+ (void) strlcpy((dest)->sgs_srvID, (src)->sgs_srvID, \
+ sizeof ((dest)->sgs_srvID)); \
+ (dest)->sgs_flags = (src)->sgs_flags
+
+static ilb_status_t
+i_add_srv2sg(ilbd_sg_t *dsg, ilb_sg_srv_t *srv, ilbd_srv_t **ret_srv)
+{
+ ilb_sg_srv_t *n_sg_srv;
+ list_t *srvlist;
+ srch_ind_t search = not_searched;
+ ilb_status_t rc = ILB_STATUS_OK;
+ ilbd_srv_t *nsrv, *lsrv;
+ in_port_t h_minport, h_maxport;
+
+ nsrv = calloc(sizeof (*nsrv), 1);
+ if (nsrv == NULL)
+ return (ILB_STATUS_ENOMEM);
+ n_sg_srv = &nsrv->isv_srv;
+ COPY_SERVER(srv, n_sg_srv);
+
+ /*
+ * port info is in network byte order - we need host byte order
+ * for comparisons purposes
+ */
+ h_minport = ntohs(n_sg_srv->sgs_minport);
+ h_maxport = ntohs(n_sg_srv->sgs_maxport);
+ if (h_minport != 0 && h_minport > h_maxport)
+ n_sg_srv->sgs_maxport = n_sg_srv->sgs_minport;
+
+ srvlist = &dsg->isg_srvlist;
+
+ lsrv = list_head(srvlist);
+ if (lsrv == NULL) {
+ list_insert_head(srvlist, nsrv);
+ } else {
+ while (lsrv != NULL) {
+ search = i_test_and_insert(nsrv, lsrv,
+ srvlist);
+
+ if (search != cont_search)
+ break;
+ lsrv = list_next(srvlist, lsrv);
+
+ /* if reaches the end of list, insert to the tail */
+ if (search == cont_search && lsrv == NULL)
+ list_insert_tail(srvlist, nsrv);
+ }
+ if (search == fail_search)
+ rc = ILB_STATUS_EEXIST;
+ }
+
+ if (rc == ILB_STATUS_OK) {
+ dsg->isg_srvcount++;
+ *ret_srv = nsrv;
+ } else {
+ free(nsrv);
+ }
+
+ return (rc);
+}
+
+/*
+ * Allocate a server ID. The algorithm is simple. Just check the ID array
+ * of the server group and find an unused ID. If *set_id is given, it
+ * means that the ID is already allocated and the ID array needs to be
+ * updated. This is the case when ilbd reads from the persistent
+ * configuration.
+ */
+static int32_t
+i_ilbd_alloc_srvID(ilbd_sg_t *sg, int32_t *set_id)
+{
+ int32_t id;
+ int32_t i;
+
+ /* The server ID is already allocated, just update the ID array. */
+ if (set_id != NULL) {
+ assert(sg->isg_id_arr[*set_id] == 0);
+ sg->isg_id_arr[*set_id] = 1;
+ return (*set_id);
+ }
+
+ /* if we're "full up", give back something invalid */
+ if (sg->isg_srvcount == MAX_SRVCOUNT)
+ return (BAD_SRVID);
+
+ i = sg->isg_max_id;
+ for (id = 0; id < MAX_SRVCOUNT; id++) {
+ if (sg->isg_id_arr[(id + i) % MAX_SRVCOUNT] == 0)
+ break;
+ }
+
+ sg->isg_max_id = (id + i) % MAX_SRVCOUNT;
+ sg->isg_id_arr[sg->isg_max_id] = 1;
+ return (sg->isg_max_id);
+}
+
+/*
+ * Free a server ID by updating the server group's ID array.
+ */
+static void
+i_ilbd_free_srvID(ilbd_sg_t *sg, int32_t id)
+{
+ assert(sg->isg_id_arr[id] == 1);
+ sg->isg_id_arr[id] = 0;
+}
+
+/*
+ * This function is called by ilbd_add_server_to_group() and
+ * ilb_remove_server_group() to create a audit record for a
+ * failed servicing of add-server/remove-server command
+ */
+static void
+fill_audit_record(ilb_sg_info_t *sg, audit_sg_event_data_t *audit_sg_data,
+ ilbd_cmd_t cmd, ilb_status_t rc, ucred_t *ucredp)
+{
+ ilb_sg_srv_t *tsrv;
+ int i;
+
+ for (i = 0; i < sg->sg_srvcount; i++) {
+ tsrv = &sg->sg_servers[i];
+ if (cmd == ILBD_ADD_SERVER_TO_GROUP) {
+ char addrstr_buf[INET6_ADDRSTRLEN];
+
+ audit_sg_data->ed_serverid = NULL;
+ ilbd_addr2str(&tsrv->sgs_addr, addrstr_buf,
+ sizeof (addrstr_buf));
+ audit_sg_data->ed_server_address = addrstr_buf;
+ audit_sg_data->ed_minport = tsrv->sgs_minport;
+ audit_sg_data->ed_maxport = tsrv->sgs_maxport;
+ audit_sg_data->ed_sgroup = sg->sg_name;
+ } else if (cmd == ILBD_REM_SERVER_FROM_GROUP) {
+ audit_sg_data->ed_serverid = tsrv->sgs_srvID;
+ audit_sg_data->ed_sgroup = sg->sg_name;
+ audit_sg_data->ed_server_address = NULL;
+ audit_sg_data->ed_minport = 0;
+ audit_sg_data->ed_maxport = 0;
+ }
+ ilbd_audit_server_event(audit_sg_data, cmd, rc, ucredp);
+ }
+}
+
+/*
+ * the name(s) of the server(s) are encoded in the sg.
+ */
+ilb_status_t
+ilbd_add_server_to_group(ilb_sg_info_t *sg_info, int ev_port,
+ const struct passwd *ps, ucred_t *ucredp)
+{
+ ilb_status_t rc = ILB_STATUS_OK;
+ ilbd_sg_t *tmp_sg;
+ int i, j;
+ int32_t new_id = BAD_SRVID;
+ int32_t af = AF_UNSPEC;
+ ilbd_srv_t *nsrv;
+ ilb_sg_srv_t *srv;
+ audit_sg_event_data_t audit_sg_data;
+ char addrstr_buf[INET6_ADDRSTRLEN];
+
+ if (ps != NULL) {
+ rc = ilbd_check_client_config_auth(ps);
+ if (rc != ILB_STATUS_OK) {
+ fill_audit_record(sg_info, &audit_sg_data,
+ ILBD_ADD_SERVER_TO_GROUP, rc, ucredp);
+ return (rc);
+ }
+ }
+
+ tmp_sg = i_find_sg_byname(sg_info->sg_name);
+ if (tmp_sg == NULL) {
+ logdebug("ilbd_add_server_to_group: server"
+ " group %s does not exist", sg_info->sg_name);
+ fill_audit_record(sg_info, &audit_sg_data,
+ ILBD_ADD_SERVER_TO_GROUP, ILB_STATUS_ENOENT, ucredp);
+ return (ILB_STATUS_ENOENT);
+ }
+
+ /*
+ * we do the dance with address family below to make sure only
+ * IP addresses in the same AF get into an SG; the first one to get
+ * in sets the "tone"
+ * if this is the first server to join a group, check whether
+ * there's no mismatch with any *rules* already attached
+ */
+ if (tmp_sg->isg_srvcount > 0) {
+ ilbd_srv_t *tsrv = list_head(&tmp_sg->isg_srvlist);
+
+ af = GET_AF(&tsrv->isv_addr);
+ } else {
+ ilbd_rule_t *irl = list_head(&tmp_sg->isg_rulelist);
+
+ if (irl != NULL)
+ af = GET_AF(&irl->irl_vip);
+ }
+
+ for (i = 0; i < sg_info->sg_srvcount; i++) {
+ srv = &sg_info->sg_servers[i];
+
+ (void) memset(&audit_sg_data, 0, sizeof (audit_sg_data));
+ ilbd_addr2str(&srv->sgs_addr, addrstr_buf,
+ sizeof (addrstr_buf));
+ audit_sg_data.ed_server_address = addrstr_buf;
+ audit_sg_data.ed_minport = srv->sgs_minport;
+ audit_sg_data.ed_maxport = srv->sgs_maxport;
+ audit_sg_data.ed_sgroup = sg_info->sg_name;
+
+ /* only test if we have sth to test against */
+ if (af != AF_UNSPEC) {
+ int32_t sgs_af = GET_AF(&srv->sgs_addr);
+
+ if (af != sgs_af) {
+ logdebug("address family mismatch with previous"
+ " hosts in servergroup or with rule");
+ rc = ILB_STATUS_MISMATCHH;
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_ADD_SERVER_TO_GROUP, rc, ucredp);
+ goto rollback;
+ }
+ }
+
+ /*
+ * PS: NULL means daemon is loading configure from scf.
+ * ServerID is already assigned, just update the ID array.
+ */
+ if (ps != NULL) {
+ new_id = i_ilbd_alloc_srvID(tmp_sg, NULL);
+ if (new_id == BAD_SRVID) {
+ logdebug("ilbd_add_server_to_group: server"
+ "group %s is full, no more servers"
+ " can be added", sg_info->sg_name);
+ rc = ILB_STATUS_SGFULL;
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_ADD_SERVER_TO_GROUP, rc, ucredp);
+ goto rollback;
+ }
+ srv->sgs_id = new_id;
+ } else {
+ new_id = i_ilbd_alloc_srvID(tmp_sg, &srv->sgs_id);
+ }
+
+ /*
+ * here we implement the requirement that server IDs start
+ * with a character that is not legal in hostnames - in our
+ * case, a "_" (underscore).
+ */
+ (void) snprintf(srv->sgs_srvID,
+ sizeof (srv->sgs_srvID), "%c%s.%d", ILB_SRVID_PREFIX,
+ tmp_sg->isg_name, srv->sgs_id);
+ audit_sg_data.ed_serverid = srv->sgs_srvID;
+
+ /*
+ * Before we update the kernel rules by adding the server,
+ * we need to make checks and fail if any of the
+ * following is true:
+ *
+ * o if the server has single port and the servergroup
+ * is associated to a DSR rule with a port range
+ * o if the server has a port range and the servergroup
+ * is associated to a DSR rule with a port range and
+ * the rule's min and max port does not exactly
+ * match that of the server's.
+ * o if the the server has a port range and the servergroup
+ * is associated to a NAT/Half-NAT rule with a port range
+ * and the rule's port range size does not match that
+ * of the server's.
+ * o if the rule has a fixed hc port, check that this port
+ * is valid in the server's port specification.
+ */
+ rc = i_check_srv2rules(&tmp_sg->isg_rulelist, srv);
+ if (rc != ILB_STATUS_OK) {
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_ADD_SERVER_TO_GROUP, rc, ucredp);
+ goto rollback;
+ }
+
+ if ((rc = i_add_srv2sg(tmp_sg, srv, &nsrv)) != ILB_STATUS_OK) {
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_ADD_SERVER_TO_GROUP, rc, ucredp);
+ goto rollback;
+ }
+
+ rc = i_add_srv2krules(&tmp_sg->isg_rulelist, &nsrv->isv_srv,
+ ev_port);
+ if (rc != ILB_STATUS_OK) {
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_ADD_SERVER_TO_GROUP, rc, ucredp);
+ /*
+ * The failure may be due to the serverid being on
+ * hold in kernel for connection draining. But ilbd
+ * has no way of knowing that. So we are freeing up
+ * the serverid, and may run into the risk of
+ * having this failure again, if we choose this
+ * serverid when processing the next add-server
+ * command for this servergroup, while connection
+ * draining is underway. We assume that the user
+ * will read the man page after he/she encounters
+ * this failure, and learn to not add any server
+ * to the servergroup until connection draining of
+ * all servers in the servergroup is complete.
+ * XXX Need to revisit this when connection draining
+ * is reworked
+ */
+ list_remove(&tmp_sg->isg_srvlist, nsrv);
+ i_ilbd_free_srvID(tmp_sg, nsrv->isv_id);
+ free(nsrv);
+ tmp_sg->isg_srvcount--;
+ goto rollback;
+ }
+ if (ps != NULL) {
+ rc = ilbd_scf_add_srv(tmp_sg, nsrv);
+ if (rc != ILB_STATUS_OK) {
+ /*
+ * The following should not fail since the
+ * server is just added. Just in case, we
+ * pass in -1 as the event port to avoid
+ * roll back in i_rem_srv_frm_krules() called
+ * by i_delete_srv().
+ */
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_ADD_SERVER_TO_GROUP, rc, ucredp);
+ (void) i_delete_srv(tmp_sg, nsrv, -1);
+ break;
+ }
+ }
+ }
+
+ if (rc == ILB_STATUS_OK) {
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_ADD_SERVER_TO_GROUP, rc, ucredp);
+ return (rc);
+ }
+
+rollback:
+ /*
+ * If ilbd is initializing based on the SCF data and something fails,
+ * the only choice is to transition the service to maintanence mode...
+ */
+ if (ps == NULL) {
+ logerr("%s: failure during initialization -"
+ " entering maintenance mode", __func__);
+ (void) smf_maintain_instance(ILB_FMRI, SMF_IMMEDIATE);
+ return (rc);
+ }
+
+ /*
+ * we need to roll back all servers previous to the one
+ * that just caused the failure
+ */
+ for (j = i-1; j >= 0; j--) {
+ srv = &sg_info->sg_servers[j];
+
+ /* We should be able to find those servers just added. */
+ nsrv = i_find_srv(&tmp_sg->isg_srvlist, srv, MODE_SRVID);
+ assert(nsrv != NULL);
+ (void) i_delete_srv(tmp_sg, nsrv, -1);
+ }
+ return (rc);
+}
+
+static srch_ind_t
+i_match_srvID(ilb_sg_srv_t *sg_srv, ilbd_srv_t *lsrv)
+{
+ if (strncmp(sg_srv->sgs_srvID, lsrv->isv_srvID,
+ sizeof (sg_srv->sgs_srvID)) == 0) {
+ return (stop_found);
+ }
+ return (cont_search);
+}
+
+/*
+ * Sanity check on a rule's port specification against all the servers'
+ * specification in its associated server group.
+ *
+ * 1. If the health check's probe port (hcport) is specified.
+ * - if server port range is specified, check if hcport is inside
+ * the range
+ * - if no server port is specified (meaning the port range is the same as
+ * the rule's port range), check if hcport is inside the rule's range.
+ *
+ * 2. If a server has no port specification, there is no conflict.
+ *
+ * 3. If the rule's load balance mode is DSR, a server port specification must
+ * be exactly the same as the rule's.
+ *
+ * 4. In other modes (NAT and half-NAT), the server's port range must be
+ * the same as the rule's, unless it is doing port collapsing (the server's
+ * port range is only 1).
+ */
+ilb_status_t
+ilbd_sg_check_rule_port(ilbd_sg_t *sg, ilb_rule_info_t *rl)
+{
+ ilbd_srv_t *srv;
+ in_port_t r_minport, r_maxport;
+
+ /* Don't allow adding a rule to a sg with no server, for now... */
+ if (sg->isg_srvcount == 0)
+ return (ILB_STATUS_SGEMPTY);
+
+ r_minport = ntohs(rl->rl_minport);
+ r_maxport = ntohs(rl->rl_maxport);
+
+ for (srv = list_head(&sg->isg_srvlist); srv != NULL;
+ srv = list_next(&sg->isg_srvlist, srv)) {
+ in_port_t srv_minport, srv_maxport;
+ int range;
+
+ srv_minport = ntohs(srv->isv_minport);
+ srv_maxport = ntohs(srv->isv_maxport);
+ range = srv_maxport - srv_minport;
+
+ /*
+ * If the rule has a specific probe port, check if that port is
+ * valid in all the servers' port specification.
+ */
+ if (rl->rl_hcpflag == ILB_HCI_PROBE_FIX) {
+ in_port_t hcport = ntohs(rl->rl_hcport);
+
+ /* No server port specified. */
+ if (srv_minport == 0) {
+ if (hcport > r_maxport || hcport < r_minport) {
+ return (ILB_STATUS_BADSG);
+ }
+ } else {
+ if (hcport > srv_maxport ||
+ hcport < srv_minport) {
+ return (ILB_STATUS_BADSG);
+ }
+ }
+ }
+
+ /*
+ * There is no server port specification, so there cannot be
+ * any conflict.
+ */
+ if (srv_minport == 0)
+ continue;
+
+ if (rl->rl_topo == ILB_TOPO_DSR) {
+ if (r_minport != srv_minport ||
+ r_maxport != srv_maxport) {
+ return (ILB_STATUS_BADSG);
+ }
+ } else {
+ if ((range != r_maxport - r_minport) && range != 0)
+ return (ILB_STATUS_BADSG);
+ }
+ }
+
+ return (ILB_STATUS_OK);
+}
+
+static srch_ind_t
+i_match_srvIP(ilb_sg_srv_t *sg_srv, ilbd_srv_t *lsrv)
+{
+ if (IN6_ARE_ADDR_EQUAL(&sg_srv->sgs_addr, &lsrv->isv_addr))
+ return (stop_found);
+ return (cont_search);
+}
+
+static ilbd_srv_t *
+i_find_srv(list_t *srvlist, ilb_sg_srv_t *sg_srv, int cmpmode)
+{
+ ilbd_srv_t *tmp_srv;
+ srch_ind_t srch_res = cont_search;
+
+ for (tmp_srv = list_head(srvlist); tmp_srv != NULL;
+ tmp_srv = list_next(srvlist, tmp_srv)) {
+ switch (cmpmode) {
+ case MODE_ADDR:
+ srch_res = i_match_srvIP(sg_srv, tmp_srv);
+ break;
+ case MODE_SRVID:
+ srch_res = i_match_srvID(sg_srv, tmp_srv);
+ break;
+ }
+ if (srch_res == stop_found)
+ break;
+ }
+
+ if (srch_res == stop_found)
+ return (tmp_srv);
+ return (NULL);
+}
+
+static ilb_status_t
+i_delete_srv(ilbd_sg_t *sg, ilbd_srv_t *srv, int ev_port)
+{
+ ilb_status_t rc;
+
+ rc = i_rem_srv_frm_krules(&sg->isg_rulelist, &srv->isv_srv, ev_port);
+ if (rc != ILB_STATUS_OK)
+ return (rc);
+ list_remove(&sg->isg_srvlist, srv);
+ i_ilbd_free_srvID(sg, srv->isv_id);
+ free(srv);
+ sg->isg_srvcount--;
+ return (ILB_STATUS_OK);
+}
+
+/*
+ * some people argue that returning anything here is
+ * useless - what *do* you do if you can't remove/destroy
+ * something anyway?
+ */
+ilb_status_t
+ilbd_rem_server_from_group(ilb_sg_info_t *sg_info, int ev_port,
+ const struct passwd *ps, ucred_t *ucredp)
+{
+ ilb_status_t rc = ILB_STATUS_OK;
+ ilbd_sg_t *tmp_sg;
+ ilbd_srv_t *srv, tmp_srv;
+ ilb_sg_srv_t *tsrv;
+ audit_sg_event_data_t audit_sg_data;
+ char addrstr_buf[INET6_ADDRSTRLEN];
+
+ rc = ilbd_check_client_config_auth(ps);
+ if (rc != ILB_STATUS_OK) {
+ fill_audit_record(sg_info, &audit_sg_data,
+ ILBD_REM_SERVER_FROM_GROUP, rc, ucredp);
+ return (rc);
+ }
+
+ tmp_sg = i_find_sg_byname(sg_info->sg_name);
+ if (tmp_sg == NULL) {
+ logdebug("%s: server group %s\n does not exist", __func__,
+ sg_info->sg_name);
+ fill_audit_record(sg_info, &audit_sg_data,
+ ILBD_REM_SERVER_FROM_GROUP, ILB_STATUS_SGUNAVAIL, ucredp);
+ return (ILB_STATUS_SGUNAVAIL);
+ }
+ tsrv = &sg_info->sg_servers[0];
+ audit_sg_data.ed_serverid = tsrv->sgs_srvID;
+ audit_sg_data.ed_sgroup = sg_info->sg_name;
+ audit_sg_data.ed_server_address = NULL;
+
+ assert(sg_info->sg_srvcount == 1);
+ srv = i_find_srv(&tmp_sg->isg_srvlist, &sg_info->sg_servers[0],
+ MODE_SRVID);
+ if (srv == NULL) {
+ logdebug("%s: cannot find server in server group %s", __func__,
+ sg_info->sg_name);
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_REM_SERVER_FROM_GROUP, ILB_STATUS_SRVUNAVAIL, ucredp);
+ return (ILB_STATUS_SRVUNAVAIL);
+ }
+ tsrv = &srv->isv_srv;
+ ilbd_addr2str(&tsrv->sgs_addr, addrstr_buf,
+ sizeof (addrstr_buf));
+ audit_sg_data.ed_server_address = addrstr_buf;
+ /*
+ * i_delete_srv frees srv, therefore we need to save
+ * this information for ilbd_scf_del_srv
+ */
+ (void) memcpy(&tmp_srv, srv, sizeof (tmp_srv));
+
+ rc = i_delete_srv(tmp_sg, srv, ev_port);
+ if (rc != ILB_STATUS_OK) {
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_REM_SERVER_FROM_GROUP, rc, ucredp);
+ return (rc);
+ }
+
+ if (ps != NULL) {
+ if ((rc = ilbd_scf_del_srv(tmp_sg, &tmp_srv)) !=
+ ILB_STATUS_OK) {
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_REM_SERVER_FROM_GROUP, rc, ucredp);
+ logerr("%s: SCF update failed - entering maintenance"
+ " mode", __func__);
+ (void) smf_maintain_instance(ILB_FMRI, SMF_IMMEDIATE);
+ }
+ }
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_REM_SERVER_FROM_GROUP, rc, ucredp);
+ return (rc);
+}
+
+ilb_status_t
+ilbd_retrieve_names(ilbd_cmd_t cmd, uint32_t *rbuf, size_t *rbufsz)
+{
+ ilb_status_t rc = ILB_STATUS_OK;
+ ilbd_namelist_t *nlist;
+ size_t tmp_rbufsz;
+
+ tmp_rbufsz = *rbufsz;
+ /* Set up the reply buffer. rbufsz will be set to the new size. */
+ ilbd_reply_ok(rbuf, rbufsz);
+
+ /* Calculate how much space is left for holding name info. */
+ *rbufsz += sizeof (ilbd_namelist_t);
+ tmp_rbufsz -= *rbufsz;
+
+ nlist = (ilbd_namelist_t *)&((ilb_comm_t *)rbuf)->ic_data;
+ nlist->ilbl_count = 0;
+
+ switch (cmd) {
+ case ILBD_RETRIEVE_SG_NAMES: {
+ ilbd_sg_t *sg;
+
+ for (sg = list_head(&ilbd_sg_hlist);
+ sg != NULL && tmp_rbufsz >= sizeof (ilbd_name_t);
+ sg = list_next(&ilbd_sg_hlist, sg),
+ tmp_rbufsz -= sizeof (ilbd_name_t)) {
+ (void) strlcpy(nlist->ilbl_name[nlist->ilbl_count++],
+ sg->isg_name, sizeof (ilbd_name_t));
+ }
+ break;
+ }
+ case ILBD_RETRIEVE_RULE_NAMES: {
+ ilbd_rule_t *irl;
+ extern list_t ilbd_rule_hlist;
+
+ for (irl = list_head(&ilbd_rule_hlist);
+ irl != NULL && tmp_rbufsz >= sizeof (ilbd_name_t);
+ irl = list_next(&ilbd_rule_hlist, irl),
+ tmp_rbufsz -= sizeof (ilbd_name_t)) {
+ (void) strlcpy(nlist->ilbl_name[nlist->ilbl_count++],
+ irl->irl_name, sizeof (ilbd_name_t));
+ }
+ break;
+ }
+ case ILBD_RETRIEVE_HC_NAMES: {
+ extern list_t ilbd_hc_list;
+ ilbd_hc_t *hc;
+
+ for (hc = list_head(&ilbd_hc_list);
+ hc != NULL && tmp_rbufsz >= sizeof (ilbd_name_t);
+ hc = list_next(&ilbd_hc_list, hc)) {
+ (void) strlcpy(nlist->ilbl_name[nlist->ilbl_count++],
+ hc->ihc_name, sizeof (ilbd_name_t));
+ }
+ break;
+ }
+ default:
+ logdebug("ilbd_retrieve_names: unknown command");
+ return (ILB_STATUS_INVAL_CMD);
+ }
+
+ *rbufsz += nlist->ilbl_count * sizeof (ilbd_name_t);
+ return (rc);
+}
+
+ilb_status_t
+ilbd_retrieve_sg_hosts(const char *sg_name, uint32_t *rbuf, size_t *rbufsz)
+{
+ ilbd_sg_t *dsg;
+ ilbd_srv_t *dsrv;
+ list_t *srvlist;
+ ilb_sg_info_t *sg_info;
+ size_t tmp_rbufsz;
+
+ dsg = i_find_sg_byname(sg_name);
+ if (dsg == NULL) {
+ logdebug("ilbd_retrieve_sg_hosts: server group"
+ " %s not found", sg_name);
+ return (ILB_STATUS_SGUNAVAIL);
+ }
+
+ srvlist = &dsg->isg_srvlist;
+ dsrv = list_head(srvlist);
+
+ tmp_rbufsz = *rbufsz;
+ ilbd_reply_ok(rbuf, rbufsz);
+
+ /* Calculate the size to hold all the hosts info. */
+ *rbufsz += sizeof (ilb_sg_info_t);
+ tmp_rbufsz -= *rbufsz;
+
+ sg_info = (ilb_sg_info_t *)&((ilb_comm_t *)rbuf)->ic_data;
+ (void) strlcpy(sg_info->sg_name, sg_name, sizeof (sg_info->sg_name));
+ sg_info->sg_srvcount = 0;
+
+ while (dsrv != NULL && tmp_rbufsz >= sizeof (ilb_sg_srv_t)) {
+ sg_info->sg_servers[sg_info->sg_srvcount++] = dsrv->isv_srv;
+ dsrv = list_next(srvlist, dsrv);
+ tmp_rbufsz -= sizeof (ilb_sg_srv_t);
+ }
+ *rbufsz += sg_info->sg_srvcount * sizeof (ilb_sg_srv_t);
+ return (ILB_STATUS_OK);
+}
+
+/*
+ * this mapping function works on the assumption that HC only is
+ * active when a server is enabled.
+ */
+static ilb_cmd_t
+i_srvcmd_d2k(ilbd_srv_status_ind_t dcmd)
+{
+ ilb_cmd_t cmd;
+
+ switch (dcmd) {
+ case stat_enable_server:
+ case stat_declare_srv_alive:
+ cmd = ILB_ENABLE_SERVERS;
+ break;
+ case stat_disable_server:
+ case stat_declare_srv_dead:
+ cmd = ILB_DISABLE_SERVERS;
+ break;
+ }
+
+ return (cmd);
+}
+
+ilb_status_t
+ilbd_k_Xable_server(const struct in6_addr *addr, const char *rlname,
+ ilbd_srv_status_ind_t cmd)
+{
+ ilb_status_t rc;
+ ilb_servers_cmd_t kcmd;
+ int e;
+
+ kcmd.cmd = i_srvcmd_d2k(cmd);
+ (void) strlcpy(kcmd.name, rlname, sizeof (kcmd.name));
+ kcmd.num_servers = 1;
+
+ kcmd.servers[0].addr = *addr;
+ kcmd.servers[0].err = 0;
+
+ rc = do_ioctl(&kcmd, 0);
+ if (rc != ILB_STATUS_OK)
+ return (rc);
+
+ if ((e = kcmd.servers[0].err) != 0) {
+ logdebug("ilbd_k_Xable_server: error %s occurred",
+ strerror(e));
+ return (ilb_map_errno2ilbstat(e));
+ }
+
+ return (rc);
+}
+
+#define IS_SRV_ENABLED(s) ILB_IS_SRV_ENABLED((s)->sgs_flags)
+#define IS_SRV_DISABLED(s) (!(IS_SRV_ENABLED(s)))
+
+#define SET_SRV_ENABLED(s) ILB_SET_ENABLED((s)->sgs_flags)
+#define SET_SRV_DISABLED(s) ILB_SET_DISABLED((s)->sgs_flags)
+
+static ilb_status_t
+ilbd_Xable_server(ilb_sg_info_t *sg, const struct passwd *ps,
+ ilbd_srv_status_ind_t cmd, ucred_t *ucredp)
+{
+ ilb_status_t rc = ILB_STATUS_OK;
+ ilbd_sg_t *isg;
+ ilbd_srv_t *tmp_srv;
+ ilb_sg_srv_t *srv;
+ ilbd_rule_t *irl;
+ char *dot;
+ int scf_name_len = ILBD_MAX_NAME_LEN;
+ int scf_val_len = ILBD_MAX_VALUE_LEN;
+ char prop_name[scf_name_len];
+ ilb_ip_addr_t ipaddr;
+ void *addrptr;
+ char ipstr[INET6_ADDRSTRLEN], valstr[scf_val_len];
+ int ipver, vallen;
+ char sgname[ILB_NAMESZ];
+ uint32_t nflags;
+ ilbd_srv_status_ind_t u_cmd;
+ audit_sg_event_data_t audit_sg_data;
+ char addrstr_buf[INET6_ADDRSTRLEN];
+
+ (void) memset(&audit_sg_data, 0, sizeof (audit_sg_data));
+
+ /* we currently only implement a "list" of one */
+ assert(sg->sg_srvcount == 1);
+
+ srv = &sg->sg_servers[0];
+ audit_sg_data.ed_serverid = srv->sgs_srvID;
+ audit_sg_data.ed_server_address = NULL;
+
+ rc = ilbd_check_client_enable_auth(ps);
+ if (rc != ILB_STATUS_OK) {
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_ENABLE_SERVER, rc, ucredp);
+ return (rc);
+ }
+
+ if (srv->sgs_srvID[0] != ILB_SRVID_PREFIX) {
+ switch (cmd) {
+ case stat_disable_server:
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_DISABLE_SERVER,
+ ILB_STATUS_EINVAL, ucredp);
+ break;
+ case stat_enable_server:
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_ENABLE_SERVER,
+ ILB_STATUS_EINVAL, ucredp);
+ break;
+ }
+ return (ILB_STATUS_EINVAL);
+ }
+
+ /*
+ * the following asserts that serverIDs are constructed
+ * along the pattern "_"<SG name>"."<number>
+ * so we look for the final "." to recreate the SG name.
+ */
+ (void) strlcpy(sgname, srv->sgs_srvID + 1, sizeof (sgname));
+ dot = strrchr(sgname, (int)'.');
+ if (dot == NULL) {
+ switch (cmd) {
+ case stat_disable_server:
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_DISABLE_SERVER,
+ ILB_STATUS_EINVAL, ucredp);
+ break;
+ case stat_enable_server:
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_ENABLE_SERVER,
+ ILB_STATUS_EINVAL, ucredp);
+ break;
+ }
+ return (ILB_STATUS_EINVAL);
+ }
+
+ /* make the non-sg_name part "invisible" */
+ *dot = '\0';
+ isg = i_find_sg_byname(sgname);
+ if (isg == NULL) {
+ switch (cmd) {
+ case stat_disable_server:
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_DISABLE_SERVER,
+ ILB_STATUS_ENOENT, ucredp);
+ break;
+ case stat_enable_server:
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_ENABLE_SERVER,
+ ILB_STATUS_ENOENT, ucredp);
+ break;
+ }
+ return (ILB_STATUS_ENOENT);
+ }
+
+ tmp_srv = i_find_srv(&isg->isg_srvlist, srv, MODE_SRVID);
+ if (tmp_srv == NULL) {
+ switch (cmd) {
+ case stat_disable_server:
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_DISABLE_SERVER,
+ ILB_STATUS_ENOENT, ucredp);
+ break;
+ case stat_enable_server:
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_ENABLE_SERVER,
+ ILB_STATUS_ENOENT, ucredp);
+ break;
+ }
+ return (ILB_STATUS_ENOENT);
+ }
+
+ /*
+ * if server's servergroup is not associated with
+ * a rule, do not enable it.
+ */
+ irl = list_head(&isg->isg_rulelist);
+ if (irl == NULL) {
+ switch (cmd) {
+ case stat_disable_server:
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_DISABLE_SERVER,
+ ILB_STATUS_INVAL_ENBSRVR, ucredp);
+ break;
+ case stat_enable_server:
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_ENABLE_SERVER,
+ ILB_STATUS_INVAL_ENBSRVR, ucredp);
+ break;
+ }
+ return (ILB_STATUS_INVAL_ENBSRVR);
+ }
+ /* Fill in the server IP address for audit record */
+ ilbd_addr2str(&tmp_srv->isv_addr, addrstr_buf,
+ sizeof (addrstr_buf));
+ audit_sg_data.ed_server_address = addrstr_buf;
+
+ /*
+ * We have found the server in memory, perform the following
+ * tasks.
+ *
+ * 1. For every rule associated with this SG,
+ * - tell the kernel
+ * - tell the hc
+ * 2. Update our internal state and persistent configuration
+ * if the new state is not the same as the old one.
+ */
+ /* 1. */
+ for (; irl != NULL; irl = list_next(&isg->isg_rulelist, irl)) {
+ rc = ilbd_k_Xable_server(&tmp_srv->isv_addr,
+ irl->irl_name, cmd);
+ if (rc != ILB_STATUS_OK) {
+ switch (cmd) {
+ case stat_disable_server:
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_DISABLE_SERVER, rc, ucredp);
+ break;
+ case stat_enable_server:
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_ENABLE_SERVER, rc, ucredp);
+ break;
+ }
+ goto rollback_rules;
+ }
+ if (!RULE_HAS_HC(irl))
+ continue;
+
+ if (cmd == stat_disable_server) {
+ rc = ilbd_hc_disable_server(irl,
+ &tmp_srv->isv_srv);
+ } else {
+ assert(cmd == stat_enable_server);
+ rc = ilbd_hc_enable_server(irl,
+ &tmp_srv->isv_srv);
+ }
+ if (rc != ILB_STATUS_OK) {
+ logdebug("ilbd_Xable_server: cannot toggle srv "
+ "timer, rc =%d, srv =%s%d\n", rc,
+ tmp_srv->isv_srvID,
+ tmp_srv->isv_id);
+ }
+ }
+
+ /* 2. */
+ if ((cmd == stat_disable_server &&
+ IS_SRV_DISABLED(&tmp_srv->isv_srv)) ||
+ (cmd == stat_enable_server &&
+ IS_SRV_ENABLED(&tmp_srv->isv_srv))) {
+ switch (cmd) {
+ case stat_disable_server:
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_DISABLE_SERVER, ILB_STATUS_OK, ucredp);
+ break;
+ case stat_enable_server:
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_ENABLE_SERVER, ILB_STATUS_OK, ucredp);
+ break;
+ }
+ return (ILB_STATUS_OK);
+ }
+
+ nflags = tmp_srv->isv_flags;
+ if (cmd == stat_enable_server)
+ ILB_SET_ENABLED(nflags);
+ else
+ ILB_SET_DISABLED(nflags);
+
+ IP_COPY_IMPL_2_CLI(&tmp_srv->isv_addr, &ipaddr);
+ ipver = GET_AF(&tmp_srv->isv_addr);
+ vallen = (ipver == AF_INET) ? INET_ADDRSTRLEN :
+ INET6_ADDRSTRLEN;
+ addrptr = (ipver == AF_INET) ? (void *)&ipaddr.ia_v4 :
+ (void *)&ipaddr.ia_v6;
+ if (inet_ntop(ipver, addrptr, ipstr, vallen) == NULL) {
+ logerr("ilbd_Xable_server: failed transfer ip addr to"
+ " str");
+ if (errno == ENOSPC)
+ rc = ILB_STATUS_ENOMEM;
+ else
+ rc = ILB_STATUS_GENERIC;
+ switch (cmd) {
+ case stat_disable_server:
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_DISABLE_SERVER, rc, ucredp);
+ break;
+ case stat_enable_server:
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_ENABLE_SERVER, rc, ucredp);
+ break;
+ }
+ goto rollback_rules;
+ }
+
+ (void) snprintf(valstr, sizeof (valstr), "%s;%d;%d-%d;%d",
+ ipstr, ipver,
+ ntohs(tmp_srv->isv_minport),
+ ntohs(tmp_srv->isv_maxport), nflags);
+ (void) snprintf(prop_name, sizeof (prop_name), "server%d",
+ tmp_srv->isv_id);
+
+ switch (cmd) {
+ case stat_disable_server:
+ rc = i_ilbd_save_sg(isg, ILBD_SCF_ENABLE_DISABLE,
+ prop_name, valstr);
+ if (rc == ILB_STATUS_OK)
+ SET_SRV_DISABLED(&tmp_srv->isv_srv);
+ break;
+ case stat_enable_server:
+ rc = i_ilbd_save_sg(isg, ILBD_SCF_ENABLE_DISABLE,
+ prop_name, valstr);
+ if (rc == ILB_STATUS_OK)
+ SET_SRV_ENABLED(&tmp_srv->isv_srv);
+ break;
+ }
+ if (rc == ILB_STATUS_OK) {
+ switch (cmd) {
+ case stat_disable_server:
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_DISABLE_SERVER, ILB_STATUS_OK, ucredp);
+ break;
+ case stat_enable_server:
+ ilbd_audit_server_event(&audit_sg_data,
+ ILBD_ENABLE_SERVER, ILB_STATUS_OK, ucredp);
+ break;
+ }
+ return (ILB_STATUS_OK);
+ }
+
+rollback_rules:
+ if (cmd == stat_disable_server)
+ u_cmd = stat_enable_server;
+ else
+ u_cmd = stat_disable_server;
+
+ if (irl == NULL)
+ irl = list_tail(&isg->isg_rulelist);
+ else
+ irl = list_prev(&isg->isg_rulelist, irl);
+
+ for (; irl != NULL; irl = list_prev(&isg->isg_rulelist, irl)) {
+ (void) ilbd_k_Xable_server(&tmp_srv->isv_addr,
+ irl->irl_name, u_cmd);
+ if (!RULE_HAS_HC(irl))
+ continue;
+
+ if (u_cmd == stat_disable_server)
+ (void) ilbd_hc_disable_server(irl, &tmp_srv->isv_srv);
+ else
+ (void) ilbd_hc_enable_server(irl, &tmp_srv->isv_srv);
+ }
+
+ return (rc);
+}
+
+ilb_status_t
+ilbd_disable_server(ilb_sg_info_t *sg, const struct passwd *ps,
+ ucred_t *ucredp)
+{
+ return (ilbd_Xable_server(sg, ps, stat_disable_server, ucredp));
+}
+
+ilb_status_t
+ilbd_enable_server(ilb_sg_info_t *sg, const struct passwd *ps,
+ ucred_t *ucredp)
+{
+ return (ilbd_Xable_server(sg, ps, stat_enable_server, ucredp));
+}
+
+/*
+ * fill in the srvID for the given IP address in the 0th server
+ */
+ilb_status_t
+ilbd_address_to_srvID(ilb_sg_info_t *sg, uint32_t *rbuf, size_t *rbufsz)
+{
+ ilbd_srv_t *tmp_srv;
+ ilb_sg_srv_t *tsrv;
+ ilbd_sg_t *tmp_sg;
+
+ ilbd_reply_ok(rbuf, rbufsz);
+ tsrv = (ilb_sg_srv_t *)&((ilb_comm_t *)rbuf)->ic_data;
+ *rbufsz += sizeof (ilb_sg_srv_t);
+
+ tmp_sg = i_find_sg_byname(sg->sg_name);
+ if (tmp_sg == NULL)
+ return (ILB_STATUS_SGUNAVAIL);
+ tsrv->sgs_addr = sg->sg_servers[0].sgs_addr;
+
+ tmp_srv = i_find_srv(&tmp_sg->isg_srvlist, tsrv, MODE_ADDR);
+ if (tmp_srv == NULL)
+ return (ILB_STATUS_ENOENT);
+
+ (void) strlcpy(tsrv->sgs_srvID, tmp_srv->isv_srvID,
+ sizeof (tsrv->sgs_srvID));
+
+ return (ILB_STATUS_OK);
+}
+
+/*
+ * fill in the address for the given serverID in the 0th server
+ */
+ilb_status_t
+ilbd_srvID_to_address(ilb_sg_info_t *sg, uint32_t *rbuf, size_t *rbufsz)
+{
+ ilbd_srv_t *tmp_srv;
+ ilb_sg_srv_t *tsrv;
+ ilbd_sg_t *tmp_sg;
+
+ ilbd_reply_ok(rbuf, rbufsz);
+ tsrv = (ilb_sg_srv_t *)&((ilb_comm_t *)rbuf)->ic_data;
+
+ tmp_sg = i_find_sg_byname(sg->sg_name);
+ if (tmp_sg == NULL)
+ return (ILB_STATUS_SGUNAVAIL);
+ (void) strlcpy(tsrv->sgs_srvID, sg->sg_servers[0].sgs_srvID,
+ sizeof (tsrv->sgs_srvID));
+
+ tmp_srv = i_find_srv(&tmp_sg->isg_srvlist, tsrv, MODE_SRVID);
+ if (tmp_srv == NULL)
+ return (ILB_STATUS_ENOENT);
+
+ tsrv->sgs_addr = tmp_srv->isv_addr;
+ *rbufsz += sizeof (ilb_sg_srv_t);
+
+ return (ILB_STATUS_OK);
+}
+
+void
+ilbd_addr2str(struct in6_addr *ipaddr, char *addrstr_buf, size_t sz)
+{
+ ilb_ip_addr_t ilb_ip;
+
+ IP_COPY_IMPL_2_CLI(ipaddr, &ilb_ip);
+ addr2str(ilb_ip, addrstr_buf, sz);
+}
+
+/* Convert ip address to a address string */
+void
+addr2str(ilb_ip_addr_t ip, char *buf, size_t sz)
+{
+
+ switch (ip.ia_af) {
+ case AF_INET:
+ if ((uint32_t *)&(ip).ia_v4 == 0)
+ buf[0] = '\0';
+ else
+ (void) inet_ntop(AF_INET, (void *)&(ip).ia_v4, buf, sz);
+ break;
+ case AF_INET6:
+ if (IN6_IS_ADDR_UNSPECIFIED(&(ip).ia_v6)) {
+ buf[0] = '\0';
+ break;
+ }
+ (void) inet_ntop(ip.ia_af, (void *)&(ip).ia_v6, buf, sz);
+ break;
+ default: buf[0] = '\0';
+ }
+}
+
+/*
+ * Map ilb_status errors to similar errno values from errno.h or
+ * adt_event.h to be used for audit record
+ */
+int
+ilberror2auditerror(ilb_status_t rc)
+{
+ int audit_error;
+
+ switch (rc) {
+ case ILB_STATUS_CFGAUTH:
+ audit_error = ADT_FAIL_VALUE_AUTH;
+ break;
+ case ILB_STATUS_ENOMEM:
+ audit_error = ENOMEM;
+ break;
+ case ILB_STATUS_ENOENT:
+ case ILB_STATUS_ENOHCINFO:
+ case ILB_STATUS_INVAL_HCTESTTYPE:
+ case ILB_STATUS_INVAL_CMD:
+ case ILB_STATUS_DUP_RULE:
+ case ILB_STATUS_ENORULE:
+ case ILB_STATUS_SGUNAVAIL:
+ audit_error = ENOENT;
+ break;
+ case ILB_STATUS_EINVAL:
+ case ILB_STATUS_MISMATCHSG:
+ case ILB_STATUS_MISMATCHH:
+ case ILB_STATUS_BADSG:
+ case ILB_STATUS_INVAL_SRVR:
+ case ILB_STATUS_INVAL_ENBSRVR:
+ case ILB_STATUS_BADPORT:
+ audit_error = EINVAL;
+ break;
+ case ILB_STATUS_EEXIST:
+ case ILB_STATUS_SGEXISTS:
+ audit_error = EEXIST;
+ break;
+ case ILB_STATUS_EWOULDBLOCK:
+ audit_error = EWOULDBLOCK;
+ break;
+ case ILB_STATUS_INPROGRESS:
+ audit_error = EINPROGRESS;
+ break;
+ case ILB_STATUS_INTERNAL:
+ case ILB_STATUS_CALLBACK:
+ case ILB_STATUS_PERMIT:
+ case ILB_STATUS_RULE_NO_HC:
+ audit_error = ADT_FAIL_VALUE_PROGRAM;
+ break;
+ case ILB_STATUS_SOCKET:
+ audit_error = ENOTSOCK;
+ break;
+ case ILB_STATUS_READ:
+ case ILB_STATUS_WRITE:
+ audit_error = ENOTCONN;
+ break;
+ case ILB_STATUS_SGINUSE:
+ audit_error = EADDRINUSE;
+ break;
+ case ILB_STATUS_SEND:
+ audit_error = ECOMM;
+ break;
+ case ILB_STATUS_SGFULL:
+ audit_error = EOVERFLOW;
+ break;
+ case ILB_STATUS_NAMETOOLONG:
+ audit_error = ENAMETOOLONG;
+ break;
+ case ILB_STATUS_SRVUNAVAIL:
+ audit_error = EHOSTUNREACH;
+ break;
+ default:
+ audit_error = ADT_FAIL_VALUE_UNKNOWN;
+ break;
+ }
+ return (audit_error);
+}
diff --git a/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_support.c b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_support.c
new file mode 100644
index 0000000000..a5fe6223fd
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_support.c
@@ -0,0 +1,296 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/stropts.h>
+#include <sys/sockio.h>
+#include <errno.h>
+#include <sys/list.h>
+#include <auth_attr.h>
+#include <auth_list.h>
+#include <secdb.h>
+#include <libilb.h>
+#include "libilb_impl.h"
+#include "ilbd.h"
+
+/*
+ * logs error messages, either to stderr or syslog, depending on
+ * the -d option
+ */
+static boolean_t ilbd_debugging = B_FALSE;
+
+/* Socket to issue ioctl() to the kernel */
+static int ksock = -1;
+
+void
+ilbd_enable_debug(void)
+{
+ ilbd_debugging = B_TRUE;
+}
+
+boolean_t
+is_debugging_on(void)
+{
+ return (ilbd_debugging);
+}
+
+/*
+ * All routines log to syslog, unless the daemon is running in
+ * the foreground, in which case the logging goes to stderr.
+ * The following logging functions are available:
+ *
+ *
+ * logdebug(): A printf-like function for outputting debug messages
+ * (messages at LOG_DEBUG) that are only of use to developers.
+ *
+ * logerr(): A printf-like function for outputting error messages
+ * (messages at LOG_ERR) from the daemon.
+ *
+ * logperror*(): A set of functions used to output error messages
+ * (messages at LOG_ERR); these automatically append strerror(errno)
+ * and a newline to the message passed to them.
+ *
+ * NOTE: since the logging functions write to syslog, the messages passed
+ * to them are not eligible for localization. Thus, gettext() must
+ * *not* be used.
+ *
+ */
+/* PRINTFLIKE2 */
+void
+ilbd_log(int pri, const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+
+ if (ilbd_debugging == B_TRUE) {
+ (void) vfprintf(stderr, fmt, ap);
+ (void) fprintf(stderr, "\n");
+ } else {
+ vsyslog(pri, fmt, ap);
+ }
+ va_end(ap);
+
+}
+
+/* PRINTFLIKE1 */
+void
+logperror(const char *str)
+{
+ if (ilbd_debugging == B_TRUE)
+ (void) fprintf(stderr, "%s: %s\n", str, strerror(errno));
+ else
+ syslog(LOG_ERR, "%s: %m", str);
+}
+
+
+ilb_status_t
+ilbd_check_client_config_auth(const struct passwd *pwd)
+{
+ if (chkauthattr(NET_ILB_CONFIG_AUTH, pwd->pw_name) == 0) {
+ logdebug("user %s is not authorized for"
+ " configuration operation", pwd->pw_name);
+ return (ILB_STATUS_CFGAUTH);
+ }
+ return (ILB_STATUS_OK);
+
+}
+
+ilb_status_t
+ilbd_check_client_enable_auth(const struct passwd *pwd)
+{
+ if (chkauthattr(NET_ILB_ENABLE_AUTH, pwd->pw_name) == 0) {
+ logdebug("user %s is not authorized for"
+ " enable/disable operation", pwd->pw_name);
+ return (ILB_STATUS_CFGAUTH);
+ }
+ return (ILB_STATUS_OK);
+
+}
+
+/*
+ * input param. "err" should be one of the errnos defined in
+ * /usr/include/sys/errno.h
+ * this list is NOT complete.
+ */
+ilb_status_t
+ilb_map_errno2ilbstat(int err)
+{
+ ilb_status_t rc = ILB_STATUS_INTERNAL;
+
+ switch (err) {
+ case 0:
+ rc = ILB_STATUS_OK; /* for completeness' sake */
+ break;
+ case EINVAL:
+ rc = ILB_STATUS_EINVAL;
+ break;
+ case ENOENT:
+ rc = ILB_STATUS_ENOENT;
+ break;
+ case ENOMEM:
+ rc = ILB_STATUS_ENOMEM;
+ break;
+ case EINPROGRESS:
+ rc = ILB_STATUS_INPROGRESS;
+ break;
+ case EEXIST:
+ rc = ILB_STATUS_EEXIST;
+ break;
+ }
+ return (rc);
+}
+
+static int
+i_get_kcmd_sz(void *cmdp)
+{
+ int sz;
+
+ switch (((ilb_rule_cmd_t *)cmdp)->cmd) {
+ case ILB_DESTROY_RULE:
+ case ILB_ENABLE_RULE:
+ case ILB_DISABLE_RULE:
+ sz = sizeof (ilb_name_cmd_t);
+ break;
+ case ILB_CREATE_RULE:
+ case ILB_LIST_RULE:
+ sz = sizeof (ilb_rule_cmd_t);
+ break;
+ case ILB_NUM_RULES:
+ sz = sizeof (ilb_num_rules_cmd_t);
+ break;
+ case ILB_NUM_SERVERS:
+ sz = sizeof (ilb_num_servers_cmd_t);
+ break;
+ case ILB_ADD_SERVERS: {
+ ilb_servers_info_cmd_t *kcmd = (ilb_servers_info_cmd_t *)cmdp;
+
+ sz = sizeof (*kcmd) + ((kcmd->num_servers - 1) *
+ sizeof (kcmd->servers));
+ break;
+ }
+ case ILB_RULE_NAMES: {
+ ilb_rule_names_cmd_t *kcmd = (ilb_rule_names_cmd_t *)cmdp;
+
+ sz = sizeof (*kcmd) +
+ ((kcmd->num_names - 1) * sizeof (kcmd->buf));
+ break;
+ }
+ case ILB_DEL_SERVERS:
+ case ILB_ENABLE_SERVERS:
+ case ILB_DISABLE_SERVERS: {
+ ilb_servers_cmd_t *kcmd = (ilb_servers_cmd_t *)cmdp;
+
+ sz = sizeof (*kcmd) +
+ ((kcmd->num_servers - 1) * sizeof (kcmd->servers));
+ break;
+ }
+ default: sz = -1;
+ break;
+ }
+ return (sz);
+}
+
+/*
+ * parameter 'sz' is optional (indicated by == 0); if it's not set
+ * we try to derive it from cmdp->cmd
+ */
+ilb_status_t
+do_ioctl(void *cmdp, ssize_t sz)
+{
+ struct strioctl ioc;
+ int i_rc;
+
+ if (ksock == -1) {
+ ksock = socket(AF_INET, SOCK_DGRAM, 0);
+ if (ksock == -1) {
+ logperror("do_ioctl: AF_INET socket call"
+ " failed");
+ return (ILB_STATUS_INTERNAL);
+ }
+ }
+
+ (void) memset(&ioc, 0, sizeof (ioc));
+ ioc.ic_cmd = SIOCILB;
+ ioc.ic_timout = 0;
+ ioc.ic_dp = cmdp;
+
+ if (sz == 0) {
+ sz = i_get_kcmd_sz(cmdp);
+
+ if (sz == -1) {
+ logdebug("do_ioctl: unknown command");
+ return (ILB_STATUS_INVAL_CMD);
+ }
+ }
+
+ ioc.ic_len = sz;
+
+ i_rc = ioctl(ksock, I_STR, (caddr_t)&ioc);
+ if (i_rc == -1) {
+ logdebug("do_ioctl: SIOCILB ioctl (%d) failed: %s",
+ *(ilb_cmd_t *)cmdp, strerror(errno));
+ return (ilb_map_errno2ilbstat(errno));
+ }
+
+ return (ILB_STATUS_OK);
+}
+
+/*
+ * Create an OK reply to a client request. It is assumed that the passed
+ * in buffer is large enough to hold the reply.
+ */
+void
+ilbd_reply_ok(uint32_t *rbuf, size_t *rbufsz)
+{
+ ilb_comm_t *ic = (ilb_comm_t *)rbuf;
+
+ ic->ic_cmd = ILBD_CMD_OK;
+ /* Default is one exchange of request/response. */
+ ic->ic_flags = ILB_COMM_END;
+ *rbufsz = sizeof (ilb_comm_t);
+}
+
+/*
+ * Create an error reply to a client request. It is assumed that the passed
+ * in buffer is large enough to hold the reply.
+ */
+void
+ilbd_reply_err(uint32_t *rbuf, size_t *rbufsz, ilb_status_t status)
+{
+ ilb_comm_t *ic = (ilb_comm_t *)rbuf;
+
+ ic->ic_cmd = ILBD_CMD_ERROR;
+ /* Default is one exchange of request/response. */
+ ic->ic_flags = ILB_COMM_END;
+ *(ilb_status_t *)&ic->ic_data = status;
+ *rbufsz = sizeof (ilb_comm_t) + sizeof (ilb_status_t);
+}
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/Makefile b/usr/src/cmd/cmd-inet/usr.sbin/Makefile
index 386fedfe7f..61a3f6e4ec 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/Makefile
+++ b/usr/src/cmd/cmd-inet/usr.sbin/Makefile
@@ -65,14 +65,14 @@ K5RSHDOBJS= in.rshd.o
K5TELNETOBJS= in.telnetd.o
SRCS= $(PROGSRCS) $(OTHERSRC)
-SUBDIRS= bootconfchk htable ifconfig in.ftpd in.rdisc in.routed \
+SUBDIRS= bootconfchk htable ifconfig ilbadm in.ftpd in.rdisc in.routed \
in.talkd inetadm inetconv ipmpstat ipqosconf ipsecutils \
kssl/kssladm kssl/ksslcfg ping routeadm snoop sppptun \
traceroute wificonfig
-MSGSUBDIRS= bootconfchk htable ifconfig in.ftpd in.routed in.talkd inetadm \
- inetconv ipmpstat ipqosconf ipsecutils kssl/ksslcfg routeadm \
- sppptun snoop wificonfig
+MSGSUBDIRS= bootconfchk htable ifconfig ilbadm in.ftpd in.routed in.talkd \
+ inetadm inetconv ipmpstat ipqosconf ipsecutils kssl/ksslcfg \
+ routeadm sppptun snoop wificonfig
# As programs get lint-clean, add them here and to the 'lint' target.
# Eventually this hack should go away, and all in PROG should be
@@ -84,9 +84,9 @@ LINTCLEAN= 6to4relay arp in.rlogind in.rshd in.telnetd in.tftpd \
# they're all clean, replace the dependency of the lint target
# with SUBDIRS. Also (sigh) deal with the commented-out build lines
# for the lint rule.
-LINTSUBDIRS= bootconfchk in.rdisc in.routed in.talkd inetadm inetconv \
- ipmpstat ipqosconf ipsecutils ping routeadm sppptun traceroute \
- wificonfig
+LINTSUBDIRS= bootconfchk ilbadm in.rdisc in.routed in.talkd inetadm \
+ inetconv ipmpstat ipqosconf ipsecutils ping routeadm sppptun \
+ traceroute wificonfig
# And as programs are verified not to attempt to write into constants,
# -xstrconst should be used to ensure they stay that way.
CONSTCLEAN=
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/Makefile b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/Makefile
new file mode 100644
index 0000000000..7330464458
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/Makefile
@@ -0,0 +1,82 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+PROG= ilbadm
+
+ILB_OBJS = ilbadm.o ilbadm_sg.o ilbadm_rules.o ilbadm_hc.o
+ILB_OBJS += ilbadm_subr.o ilbadm_import.o ilbadm_nat.o ilbadm_stats.o
+LIST_OBJS = list.o
+OBJS = $(ILB_OBJS) $(LIST_OBJS)
+
+ILB_SRCS= $(ILB_OBJS:.o=.c)
+LIST_SRCS= $(LIST_OBJS:%.o=../../../../uts/common/os/%.c)
+
+SRCS= $(ILB_SRC) $(LIST_SRCS)
+
+include ../../../Makefile.cmd
+include ../../Makefile.cmd-inet
+
+LDLIBS += -lsocket -lnsl -lilb -linetutil -lkstat
+CPPFLAGS += -I$(SRC)/lib/libilb/common -I$(SRC)/uts/common
+
+C99MODE = $(C99_ENABLE)
+
+# for debug:
+CFLAGS = -g
+STRIP_STABS= :
+
+POFILES = $(ILB_OBJS:%.o=%.po)
+POFILE = $(PROG)_all.po
+
+.KEEP_STATE:
+.PARALLEL:
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+ $(LINK.c) -o $@ $(OBJS) $(LDLIBS)
+ $(POST_PROCESS)
+
+$(POFILE): $(POFILES)
+ $(RM) $@
+ cat $(POFILES) > $@
+
+install: all $(ROOTUSRSBINPROG)
+
+clean:
+ $(RM) $(OBJS) $(POFILES)
+
+lint: $(ILB_SRCS)
+ $(LINT.c) $(ILB_SRCS) $(LDLIBS)
+
+check: $(ILB_SRCS) $(PROG).h
+ $(CSTYLE) -pP $(ILB_SRCS) $(PROG).h
+ $(HDRCHK) $(PROG).h
+
+include ../../../Makefile.targ
+
+# the below is needed to get list.o built
+%.o: ../../../../uts/common/os/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm.c b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm.c
new file mode 100644
index 0000000000..949fcc064c
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm.c
@@ -0,0 +1,249 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <libgen.h>
+#include <libilb.h>
+#include "ilbadm.h"
+
+/*
+ * Error strings for error values returned by ilbadm functions
+ */
+const char *
+ilbadm_errstr(ilbadm_status_t rc)
+{
+ switch (rc) {
+ case ILBADM_OK:
+ return (gettext("no error"));
+ case ILBADM_FAIL:
+ return (gettext("processing of command failed"));
+ case ILBADM_ENOMEM:
+ return (gettext("memory allocation failure"));
+ case ILBADM_EINVAL:
+ return (gettext("invalid value - refer to ilbadm(1M)"));
+ case ILBADM_HCPRINT:
+ return (gettext("failed to print healthcheck values"));
+ case ILBADM_INVAL_AF:
+ return (gettext("address family is invalid"));
+ case ILBADM_INVAL_PORT:
+ return (gettext("port value is invalid"));
+ case ILBADM_INVAL_SRVID:
+ return (gettext("server ID is invalid"));
+ case ILBADM_INVAL_ADDR:
+ return (gettext("address is invalid"));
+ case ILBADM_INVAL_ARGS:
+ return (gettext("invalid/incompatible keywords - refer to"
+ " ilbadm(1M)"));
+ case ILBADM_ENOSGNAME:
+ return (gettext("servergroup name missing"));
+ case ILBADM_ENORULE:
+ return (gettext("rule name missing or specified"
+ " rule not found"));
+ case ILBADM_ENOSERVER:
+ return (gettext("server name missing or specified"
+ " server not found"));
+ case ILBADM_INVAL_ALG:
+ return (gettext("LB algorithm is invalid"));
+ case ILBADM_ENOPROTO:
+ return (gettext("protocol does not exist in"
+ " protocol database"));
+ case ILBADM_ENOSERVICE:
+ return (gettext("servicename does not exist in nameservices"));
+ case ILBADM_INVAL_OPER:
+ return (gettext("operation type is invalid"));
+ case ILBADM_INVAL_KEYWORD:
+ return (gettext("keyword is invalid - please refer"
+ " to ilbadm(1M)"));
+ case ILBADM_ASSIGNREQ:
+ return (gettext("assignment '=' missing"));
+ case ILBADM_NORECURSIVE:
+ return (gettext("recursive import not allowed"));
+ case ILBADM_INVAL_COMMAND:
+ return (gettext("subcommand is invalid - please refer"
+ " to ilbadm(1M)"));
+ case ILBADM_ENOPROXY:
+ return (gettext("proxy-src is missing"));
+ case ILBADM_INVAL_PROXY:
+ return (gettext("proxy-src not allowed"));
+ case ILBADM_ENOOPTION:
+ return (gettext("mandatory argument(s) missing - refer"
+ " to ilbadm(1M)"));
+ case ILBADM_TOOMANYIPADDR:
+ return (gettext("address range contains more than 255"
+ " IP addresses"));
+ case ILBADM_EXPORTFAIL:
+ return (gettext("could not export servergroup because"
+ " of lack of space"));
+ case ILBADM_INVAL_SYNTAX:
+ return (gettext("syntax failure - refer to ilbadm(1M)"));
+ case ILBADM_NOKEYWORD_VAL:
+ return (gettext("missing value"));
+ case ILBADM_LIBERR:
+ return (gettext("library error"));
+ default:
+ return (gettext("unknown error"));
+
+
+ }
+}
+
+/* PRINTFLIKE1 */
+void
+ilbadm_err(const char *format, ...)
+{
+ /* similar to warn() of dladm.c */
+ va_list alist;
+
+ (void) fprintf(stderr, "ilbadm: ");
+
+ va_start(alist, format);
+ (void) vfprintf(stderr, format, alist);
+ va_end(alist);
+
+ (void) fprintf(stderr, "\n");
+}
+
+void
+Usage(char *name)
+{
+ (void) fprintf(stderr, gettext("Usage:\n"));
+ print_cmdlist_short(basename(name), stderr);
+ exit(1);
+}
+
+static void
+print_version(char *name)
+{
+ (void) printf("%s %s\n", basename(name), ILBADM_VERSION);
+ (void) printf(gettext(ILBADM_COPYRIGHT));
+ exit(0);
+}
+
+void
+unknown_opt(char **argv, int optind)
+{
+ ilbadm_err(gettext("bad or misplaced option %s"), argv[optind]);
+ exit(1);
+}
+
+void
+incomplete_cmdline(char *name)
+{
+ ilbadm_err(gettext("the command line is incomplete "
+ "(more arguments expected)"));
+ Usage(name);
+}
+
+static void
+bad_importfile(char *name, char *filename)
+{
+ ilbadm_err(gettext("file %s cannot be opened for reading"), filename);
+ Usage(name);
+}
+
+int
+main(int argc, char *argv[])
+{
+ ilbadm_status_t rc;
+ int c;
+ int fd = -1;
+ int flags = 0;
+
+ (void) setlocale(LC_ALL, "");
+
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+ (void) textdomain(TEXT_DOMAIN);
+
+ /* handle global options (-?, -V) first */
+ while ((c = getopt(argc, argv, ":V:?")) != -1) {
+ switch ((char)c) {
+ case 'V': print_version(argv[0]);
+ /* not reached */
+ break;
+ case '?':
+ Usage(argv[0]);
+ /* not reached */
+ break;
+ default: unknown_opt(argv, optind - 1);
+ /* not reached */
+ break;
+ }
+ }
+
+ if (optind >= argc)
+ incomplete_cmdline(argv[0]);
+
+ /*
+ * we can import from a given file (argv[2]) or from
+ * stdin (if no file given)
+ */
+ if (strcasecmp(argv[1], "import-config") == 0 ||
+ strcasecmp(argv[1], "import-cf") == 0) {
+ int shift = 0;
+
+ if (argc > 2 && strcmp(argv[2], "-p") == 0) {
+ shift++;
+ flags |= ILBADM_IMPORT_PRESERVE;
+ }
+
+ if (argc - shift < 3)
+ fd = 0;
+ else
+ if ((fd = open(argv[2+shift], O_RDONLY)) == -1)
+ bad_importfile(argv[0], argv[2+shift]);
+ }
+
+ argv++;
+ argc--;
+
+ /*
+ * re-set optind for next callers of getopt() - they all believe they're
+ * the first.
+ */
+ optind = 1;
+ optopt = 0;
+
+ rc = ilbadm_import(fd, argc, argv, flags);
+
+ /*
+ * The error messages have been printed out, using
+ * ilbadm_errstr() and ilb_errstr(), before we get here.
+ * So just set the exit value
+ */
+ if (rc != ILBADM_OK)
+ return (1);
+ /* success */
+ return (0);
+}
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm.h b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm.h
new file mode 100644
index 0000000000..6168ba0090
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm.h
@@ -0,0 +1,242 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _ILBADM_H
+#define _ILBADM_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <sys/list.h>
+#include <net/if.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <libilb.h>
+#include <libintl.h>
+#include <locale.h>
+
+#define ILBADM_VERSION "1.0"
+#define ILBADM_COPYRIGHT \
+ "Copyright 2009 Sun Microsystems, Inc. All rights reserved.\n" \
+ "Use is subject to license terms.\n"
+
+/*
+ * flag values
+ */
+#define OPT_VALUE_LIST 0x0001
+#define OPT_IP_RANGE 0x0002
+#define OPT_PORTS 0x0004
+#define OPT_PORTS_ONLY 0x0008
+#define OPT_NAT 0x0010
+#define OPT_NUMERIC_ONLY 0x0020
+
+#define ILBD_BAD_VAL (-1)
+
+#define ILBADM_LIST_FULL 0x0001
+#define ILBADM_LIST_PARSE 0x0002
+#define ILBADM_LIST_ENABLED 0x0004
+#define ILBADM_LIST_NOENABLED (~ILBADM_LIST_ENABLED)
+#define ILBADM_LIST_DISABLED 0x0008
+#define ILBADM_LIST_NODISABLED (~ILBADM_LIST_DISABLED)
+
+#define ILBADM_IMPORT_PRESERVE 0x1000
+
+#define V6_ADDRONLY 0x1 /* don't print surrounding "[]"s */
+
+#define ILB_SRVID_SZ (ILB_NAMESZ - 5)
+#define ILBD_NAMESZ ILB_NAMESZ
+
+#define ILB_MAX_PORT UINT16_MAX
+
+typedef enum {
+ ILBADM_OK = 0,
+ ILBADM_ASSIGNREQ, /* assignment '=' required */
+ ILBADM_EINVAL, /* invalid value */
+ ILBADM_ENOMEM, /* malloc failed */
+ ILBADM_ENOOPTION, /* mandatory option missing */
+ ILBADM_ENOPROTO, /* protocol not found in database */
+ ILBADM_ENOPROXY, /* proxy-src is missing */
+ ILBADM_ENOSERVICE, /* servicename not found in database */
+ ILBADM_ENOSGNAME, /* servergroup name missing */
+ ILBADM_ENORULE, /* rulename missing or no such rule */
+ ILBADM_ENOSERVER, /* rulename missing or no such rule */
+ ILBADM_EXPORTFAIL, /* too little space to do export servergroup */
+ ILBADM_FAIL, /* processing of command failed */
+ ILBADM_HCPRINT, /* failed to print healthcheck */
+ ILBADM_INVAL_ADDR, /* invalid address */
+ ILBADM_INVAL_AF, /* invalid address family */
+ ILBADM_INVAL_ALG, /* LB algorithm failure */
+ ILBADM_INVAL_ARGS, /* invalid arguments to command */
+ ILBADM_INVAL_COMMAND, /* invalid command */
+ ILBADM_INVAL_KEYWORD, /* invalid keyword */
+ ILBADM_INVAL_OPER, /* invalid operation type */
+ ILBADM_INVAL_PORT, /* invalid value specified for port */
+ ILBADM_INVAL_PROXY, /* proxy-src not allowed */
+ ILBADM_INVAL_SYNTAX, /* syntax error */
+ ILBADM_INVAL_SRVID, /* server id is invalid (missing "_" ?) */
+ ILBADM_LIBERR, /* translation of libilb errors. We also */
+ /* set it in ilbadm fuctions to indicate */
+ /* printing of non-generic error messages */
+ ILBADM_NORECURSIVE, /* recursive import not allowed */
+ ILBADM_TOOMANYIPADDR, /* too many addresses */
+ ILBADM_NOKEYWORD_VAL /* no value specified for a keyword */
+} ilbadm_status_t;
+
+
+typedef enum {
+ ILB_KEY_BAD = -1,
+ ILB_KEY_SERVER,
+ ILB_KEY_SERVRANGE, /* pseudo-key for SG creation */
+ ILB_KEY_SERVERID,
+ ILB_KEY_VIP,
+ ILB_KEY_PORT,
+ ILB_KEY_PROTOCOL,
+ ILB_KEY_IPVERSION,
+ ILB_KEY_ALGORITHM,
+ ILB_KEY_TYPE,
+ ILB_KEY_SERVERGROUP,
+ ILB_KEY_HEALTHCHECK,
+ ILB_KEY_HCPORT,
+ ILB_KEY_SRC,
+ ILB_KEY_STICKY,
+ ILB_KEY_CONNDRAIN, /* otional timers ... */
+ ILB_KEY_NAT_TO,
+ ILB_KEY_STICKY_TO,
+ ILB_KEY_HC_TEST,
+ ILB_KEY_HC_COUNT,
+ ILB_KEY_HC_INTERVAL,
+ ILB_KEY_HC_TIMEOUT
+} ilbadm_key_code_t;
+
+/*
+ * we need a few codes for commands, can't use libilb ones
+ */
+typedef enum {
+ cmd_create_sg,
+ cmd_add_srv,
+ cmd_rem_srv,
+ cmd_enable_rule,
+ cmd_disable_rule,
+ cmd_enable_server,
+ cmd_disable_server
+} ilbadm_cmd_t;
+
+/* filched from snoop_ether.c */
+typedef struct val_type {
+ int v_type;
+ char v_name[20];
+ char v_alias[8]; /* undocumented */
+} ilbadm_val_type_t;
+
+typedef struct key_names {
+ ilbadm_key_code_t k_key;
+ char k_name[20];
+ char k_alias[12]; /* undocumented */
+} ilbadm_key_name_t;
+
+typedef struct servnode {
+ list_node_t s_link;
+ ilb_server_data_t s_spec;
+} ilbadm_servnode_t;
+
+typedef struct sgroup {
+ list_t sg_serv_list; /* list of servnode_t elements */
+ int sg_count;
+ char *sg_name;
+} ilbadm_sgroup_t;
+
+typedef struct cmd_hlp {
+ char *h_help;
+} ilbadm_cmd_help_t;
+
+typedef ilbadm_status_t (* cmdfunc_t)(int, char **);
+
+typedef struct cmd_names {
+ char c_name[25];
+ char c_alias[20]; /* undocumented */
+ cmdfunc_t c_action;
+ ilbadm_cmd_help_t *c_help; /* for "usage" */
+} ilbadm_cmd_desc_t;
+
+ilbadm_status_t ilbadm_add_server_to_group(int, char **);
+ilbadm_status_t ilbadm_create_servergroup(int, char **);
+ilbadm_status_t ilbadm_destroy_servergroup(int, char **);
+ilbadm_status_t ilbadm_rem_server_from_group(int, char **);
+
+ilbadm_status_t ilbadm_create_rule(int, char **);
+ilbadm_status_t ilbadm_destroy_rule(int, char **);
+ilbadm_status_t ilbadm_enable_rule(int, char **);
+ilbadm_status_t ilbadm_disable_rule(int, char **);
+ilbadm_status_t ilbadm_show_server(int, char **);
+ilbadm_status_t ilbadm_enable_server(int, char **);
+ilbadm_status_t ilbadm_disable_server(int, char **);
+
+ilbadm_status_t ilbadm_show_servergroups(int, char **);
+ilbadm_status_t ilbadm_show_rules(int, char **);
+ilbadm_status_t ilbadm_show_stats(int, char **);
+
+ilbadm_status_t ilbadm_create_hc(int, char **);
+ilbadm_status_t ilbadm_destroy_hc(int, char **);
+ilbadm_status_t ilbadm_show_hc(int, char **);
+ilbadm_status_t ilbadm_show_hc_result(int, char **);
+
+ilbadm_status_t ilbadm_noimport(int, char **);
+
+ilbadm_status_t ilbadm_show_nat(int, char **);
+ilbadm_status_t ilbadm_show_persist(int, char **);
+
+ilbadm_status_t i_parse_optstring(char *, void *, ilbadm_key_name_t *,
+ int, int *);
+ilbadm_servnode_t *i_new_sg_elem(ilbadm_sgroup_t *);
+ilbadm_status_t ilbadm_import(int, int, char *[], int);
+ilbadm_status_t ilbadm_export(int, char *[]);
+ilbadm_status_t ilbadm_export_servergroups(ilb_handle_t h, FILE *);
+ilbadm_status_t ilbadm_export_hc(ilb_handle_t h, FILE *);
+ilbadm_status_t ilbadm_export_rules(ilb_handle_t h, FILE *);
+
+ilbadm_status_t i_check_rule_spec(ilb_rule_data_t *);
+ilbadm_status_t ilbadm_set_netmask(char *, ilb_ip_addr_t *, int);
+int ilbadm_mask_to_prefixlen(ilb_ip_addr_t *);
+
+void print_cmdlist_short(char *, FILE *);
+extern int ilb_cmp_ipaddr(ilb_ip_addr_t *, ilb_ip_addr_t *,
+ longlong_t *);
+
+void ip2str(ilb_ip_addr_t *, char *, size_t, int);
+char *i_str_from_val(int, ilbadm_val_type_t *);
+char *ilbadm_key_to_opt(ilbadm_key_code_t);
+
+void Usage(char *);
+void unknown_opt(char **, int);
+const char *ilbadm_errstr(ilbadm_status_t);
+void ilbadm_err(const char *format, ...);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ILBADM_H */
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_hc.c b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_hc.c
new file mode 100644
index 0000000000..018470e4d0
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_hc.c
@@ -0,0 +1,520 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/list.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <errno.h>
+#include <ofmt.h>
+#include <libilb.h>
+#include "ilbadm.h"
+
+extern int optind, optopt, opterr;
+extern char *optarg;
+
+typedef struct hc_export_arg {
+ FILE *fp;
+} hc_export_arg_t;
+
+/* Maximum columns for printing hc output. */
+#define SHOW_HC_COLS 80
+
+/* OFMT call back to print out a hc server result field. */
+static boolean_t print_hc_result(ofmt_arg_t *, char *, uint_t);
+
+/* ID to indicate which field to be printed. */
+enum hc_print_id {
+ hc_of_rname, hc_of_hname, hc_of_sname, hc_of_status, hc_of_fail_cnt,
+ hc_of_lasttime, hc_of_nexttime, hc_of_rtt,
+ hc_of_name, hc_of_timeout, hc_of_count, hc_of_interval, hc_of_def_ping,
+ hc_of_test
+};
+
+/*
+ * Fields of a hc server result. The sum of all fields' width is SHOW_HC_COLS.
+ */
+static ofmt_field_t hc_results[] = {
+ {"RULENAME", 14, hc_of_rname, print_hc_result},
+ {"HCNAME", 14, hc_of_hname, print_hc_result},
+ {"SERVERID", 14, hc_of_sname, print_hc_result},
+ {"STATUS", 9, hc_of_status, print_hc_result},
+ {"FAIL", 5, hc_of_fail_cnt, print_hc_result},
+ {"LAST", 9, hc_of_lasttime, print_hc_result},
+ {"NEXT", 9, hc_of_nexttime, print_hc_result},
+ {"RTT", 6, hc_of_rtt, print_hc_result},
+ {NULL, 0, 0, NULL}
+};
+
+/* OFMT call back to print out a hc info field. */
+static boolean_t print_hc(ofmt_arg_t *, char *, uint_t);
+
+/*
+ * Fields of a hc info. The sume of all fields' width is SHOW_HC_COLS.
+ */
+static ofmt_field_t hc_fields[] = {
+ {"HCNAME", 14, hc_of_name, print_hc},
+ {"TIMEOUT", 8, hc_of_timeout, print_hc},
+ {"COUNT", 8, hc_of_count, print_hc},
+ {"INTERVAL", 9, hc_of_interval, print_hc},
+ {"DEF_PING", 9, hc_of_def_ping, print_hc},
+ {"TEST", 32, hc_of_test, print_hc},
+ {NULL, 0, 0, NULL}
+};
+
+static boolean_t
+print_hc(ofmt_arg_t *of_arg, char *buf, uint_t bufsize)
+{
+ enum hc_print_id id = of_arg->ofmt_id;
+ ilb_hc_info_t *info = (ilb_hc_info_t *)of_arg->ofmt_cbarg;
+
+ switch (id) {
+ case hc_of_name:
+ (void) strlcpy(buf, info->hci_name, bufsize);
+ break;
+ case hc_of_timeout:
+ (void) snprintf(buf, bufsize, "%d", info->hci_timeout);
+ break;
+ case hc_of_count:
+ (void) snprintf(buf, bufsize, "%d", info->hci_count);
+ break;
+ case hc_of_interval:
+ (void) snprintf(buf, bufsize, "%d", info->hci_interval);
+ break;
+ case hc_of_def_ping:
+ (void) snprintf(buf, bufsize, "%c",
+ info->hci_def_ping ? 'Y' : 'N');
+ break;
+ case hc_of_test:
+ (void) snprintf(buf, bufsize, "%s", info->hci_test);
+ break;
+ }
+ return (B_TRUE);
+}
+
+/* Call back to ilb_walk_hc(). */
+/* ARGSUSED */
+static ilb_status_t
+ilbadm_print_hc(ilb_handle_t h, ilb_hc_info_t *hc_info, void *arg)
+{
+ ofmt_handle_t ofmt_h = arg;
+
+ ofmt_print(ofmt_h, hc_info);
+ return (ILB_STATUS_OK);
+}
+
+/*
+ * Print out health check objects given their name.
+ * Or print out all health check objects if no name given.
+ */
+/* ARGSUSED */
+ilbadm_status_t
+ilbadm_show_hc(int argc, char *argv[])
+{
+ ilb_handle_t h = ILB_INVALID_HANDLE;
+ ilb_status_t rclib;
+ ofmt_handle_t ofmt_h;
+ ofmt_status_t ofmt_ret;
+
+ if ((ofmt_ret = ofmt_open("all", hc_fields, 0, SHOW_HC_COLS,
+ &ofmt_h)) != OFMT_SUCCESS) {
+ char err_buf[SHOW_HC_COLS];
+
+ ilbadm_err(gettext("ofmt_open failed: %s"),
+ ofmt_strerror(ofmt_h, ofmt_ret, err_buf, SHOW_HC_COLS));
+ return (ILBADM_LIBERR);
+ }
+ rclib = ilb_open(&h);
+ if (rclib != ILB_STATUS_OK)
+ goto out;
+
+ if (argc == 1) {
+ rclib = ilb_walk_hc(h, ilbadm_print_hc, ofmt_h);
+ } else {
+ ilb_hc_info_t hc_info;
+ int i;
+
+ for (i = 1; i < argc; i++) {
+ rclib = ilb_get_hc_info(h, argv[i], &hc_info);
+ if (rclib == ILB_STATUS_OK)
+ ofmt_print(ofmt_h, &hc_info);
+ else
+ break;
+ }
+ }
+out:
+ ofmt_close(ofmt_h);
+
+ if (h != ILB_INVALID_HANDLE)
+ (void) ilb_close(h);
+
+ if (rclib != ILB_STATUS_OK) {
+ ilbadm_err(ilb_errstr(rclib));
+ return (ILBADM_LIBERR);
+ }
+
+ return (ILBADM_OK);
+}
+
+static boolean_t
+print_hc_result(ofmt_arg_t *of_arg, char *buf, uint_t bufsize)
+{
+ enum hc_print_id id = of_arg->ofmt_id;
+ ilb_hc_srv_t *srv = (ilb_hc_srv_t *)of_arg->ofmt_cbarg;
+ struct tm tv;
+
+ switch (id) {
+ case hc_of_rname:
+ (void) strlcpy(buf, srv->hcs_rule_name, bufsize);
+ break;
+ case hc_of_hname:
+ (void) strlcpy(buf, srv->hcs_hc_name, bufsize);
+ break;
+ case hc_of_sname:
+ (void) strlcpy(buf, srv->hcs_ID, bufsize);
+ break;
+ case hc_of_status:
+ switch (srv->hcs_status) {
+ case ILB_HCS_UNINIT:
+ (void) strlcpy(buf, "un-init", bufsize);
+ break;
+ case ILB_HCS_UNREACH:
+ (void) strlcpy(buf, "unreach", bufsize);
+ break;
+ case ILB_HCS_ALIVE:
+ (void) strlcpy(buf, "alive", bufsize);
+ break;
+ case ILB_HCS_DEAD:
+ (void) strlcpy(buf, "dead", bufsize);
+ break;
+ case ILB_HCS_DISABLED:
+ (void) strlcpy(buf, "disabled", bufsize);
+ break;
+ }
+ break;
+ case hc_of_fail_cnt:
+ (void) snprintf(buf, bufsize, "%u", srv->hcs_fail_cnt);
+ break;
+ case hc_of_lasttime:
+ if (localtime_r(&srv->hcs_lasttime, &tv) == NULL)
+ return (B_FALSE);
+ (void) snprintf(buf, bufsize, "%02d:%02d:%02d", tv.tm_hour,
+ tv.tm_min, tv.tm_sec);
+ break;
+ case hc_of_nexttime:
+ if (srv->hcs_status == ILB_HCS_DISABLED)
+ break;
+ if (localtime_r(&srv->hcs_nexttime, &tv) == NULL)
+ return (B_FALSE);
+ (void) snprintf(buf, bufsize, "%02d:%02d:%02d", tv.tm_hour,
+ tv.tm_min, tv.tm_sec);
+ break;
+ case hc_of_rtt:
+ (void) snprintf(buf, bufsize, "%u", srv->hcs_rtt);
+ break;
+ }
+ return (B_TRUE);
+}
+
+/* Call back to ilbd_walk_hc_srvs(). */
+/* ARGSUSED */
+static ilb_status_t
+ilbadm_print_hc_result(ilb_handle_t h, ilb_hc_srv_t *srv, void *arg)
+{
+ ofmt_handle_t ofmt_h = arg;
+
+ ofmt_print(ofmt_h, srv);
+ return (ILB_STATUS_OK);
+}
+
+/*
+ * Output hc result of a specified rule or all rules.
+ */
+ilbadm_status_t
+ilbadm_show_hc_result(int argc, char *argv[])
+{
+ ilb_handle_t h = ILB_INVALID_HANDLE;
+ ilb_status_t rclib = ILB_STATUS_OK;
+ int i;
+ ofmt_handle_t ofmt_h;
+ ofmt_status_t ofmt_ret;
+
+ /* ilbadm show-hc-result [rule-name] */
+ if (argc < 1) {
+ ilbadm_err(gettext("usage: ilbadm show-hc-result"
+ " [rule-name]"));
+ return (ILBADM_LIBERR);
+ }
+
+ if ((ofmt_ret = ofmt_open("all", hc_results, 0, SHOW_HC_COLS,
+ &ofmt_h)) != OFMT_SUCCESS) {
+ char err_buf[SHOW_HC_COLS];
+
+ ilbadm_err(gettext("ofmt_open failed: %s"),
+ ofmt_strerror(ofmt_h, ofmt_ret, err_buf, SHOW_HC_COLS));
+ return (ILBADM_LIBERR);
+ }
+
+ rclib = ilb_open(&h);
+ if (rclib != ILB_STATUS_OK)
+ goto out;
+
+ /* If no rule name is given, show results for all rules. */
+ if (argc == 1) {
+ rclib = ilb_walk_hc_srvs(h, ilbadm_print_hc_result, NULL,
+ ofmt_h);
+ } else {
+ for (i = 1; i < argc; i++) {
+ rclib = ilb_walk_hc_srvs(h, ilbadm_print_hc_result,
+ argv[i], ofmt_h);
+ if (rclib != ILB_STATUS_OK)
+ break;
+ }
+ }
+out:
+ ofmt_close(ofmt_h);
+
+ if (h != ILB_INVALID_HANDLE)
+ (void) ilb_close(h);
+
+ if (rclib != ILB_STATUS_OK) {
+ ilbadm_err(ilb_errstr(rclib));
+ return (ILBADM_LIBERR);
+ }
+ return (ILBADM_OK);
+}
+
+#define ILBADM_DEF_HC_COUNT 3
+#define ILBADM_DEF_HC_INTERVAL 30 /* in sec */
+#define ILBADM_DEF_HC_TIMEOUT 5 /* in sec */
+
+static ilbadm_key_name_t hc_parse_keys[] = {
+ {ILB_KEY_HC_TEST, "hc-test", "hc-test"},
+ {ILB_KEY_HC_COUNT, "hc-count", "hc-count"},
+ {ILB_KEY_HC_TIMEOUT, "hc-timeout", "hc-tout"},
+ {ILB_KEY_HC_INTERVAL, "hc-interval", "hc-intl"},
+ {ILB_KEY_BAD, "", ""}
+};
+
+static ilbadm_status_t
+ilbadm_hc_parse_arg(char *arg, ilb_hc_info_t *hc)
+{
+ ilbadm_status_t ret;
+
+ /* set default value for count, interval, timeout */
+ hc->hci_count = ILBADM_DEF_HC_COUNT;
+ hc->hci_interval = ILBADM_DEF_HC_INTERVAL;
+ hc->hci_timeout = ILBADM_DEF_HC_TIMEOUT;
+ hc->hci_test[0] = '\0';
+
+ ret = i_parse_optstring(arg, hc, hc_parse_keys, 0, NULL);
+ if (ret != ILBADM_OK && ret != ILBADM_LIBERR) {
+ ilbadm_err(ilbadm_errstr(ret));
+ return (ILBADM_LIBERR);
+ }
+ if (hc->hci_test[0] == '\0' && ret != ILBADM_LIBERR) {
+ ilbadm_err("hc-test: missing");
+ return (ILBADM_LIBERR);
+ }
+ return (ret);
+}
+
+/* ARGSUSED */
+ilbadm_status_t
+ilbadm_create_hc(int argc, char *argv[])
+{
+ ilb_handle_t h = ILB_INVALID_HANDLE;
+ ilb_hc_info_t hc_info;
+ ilbadm_status_t ret = ILBADM_OK;
+ ilb_status_t rclib;
+ char c;
+
+
+ hc_info.hci_def_ping = B_TRUE;
+ while ((c = getopt(argc, argv, ":h:n")) != -1) {
+ if (c == 'h') {
+ ret = ilbadm_hc_parse_arg(optarg, &hc_info);
+ if (ret != ILBADM_OK)
+ return (ret);
+ } else if (c == 'n') {
+ hc_info.hci_def_ping = B_FALSE;
+ } else {
+ ilbadm_err(gettext("bad argument %c"), c);
+ return (ILBADM_LIBERR);
+ }
+ }
+
+ if (optind >= argc) {
+ ilbadm_err(gettext("usage: ilbadm"
+ " create-healthcheck [-n] -h"
+ " hc-test=val[,hc-timeout=val][,hc-count=va]"
+ "[,hc-interval=val] hc-name"));
+ return (ILBADM_FAIL);
+ }
+
+ if (strlen(argv[optind]) > ILBD_NAMESZ - 1) {
+ ilbadm_err(gettext("health check object name %s is too long - "
+ "must not exceed %d chars"), argv[optind],
+ ILBD_NAMESZ - 1);
+ return (ILBADM_FAIL);
+ }
+
+ if (((strcasecmp(hc_info.hci_test, ILB_HC_STR_UDP) == 0) ||
+ (strcasecmp(hc_info.hci_test, ILB_HC_STR_PING) == 0)) &&
+ !(hc_info.hci_def_ping)) {
+ ilbadm_err(gettext("cannot disable default PING"
+ " for this test"));
+ return (ILBADM_LIBERR);
+ }
+
+ rclib = ilb_open(&h);
+ if (rclib != ILB_STATUS_OK)
+ goto out;
+
+ (void) strlcpy(hc_info.hci_name, argv[optind],
+ sizeof (hc_info.hci_name));
+ rclib = ilb_create_hc(h, &hc_info);
+out:
+ if (h != ILB_INVALID_HANDLE)
+ (void) ilb_close(h);
+
+ if (rclib != ILB_STATUS_OK) {
+ ilbadm_err(ilb_errstr(rclib));
+ ret = ILBADM_LIBERR;
+ }
+ return (ret);
+}
+
+ilbadm_status_t
+ilbadm_destroy_hc(int argc, char *argv[])
+{
+ ilb_handle_t h = ILB_INVALID_HANDLE;
+ ilb_status_t rclib;
+ ilbadm_status_t ret = ILBADM_OK;
+ int i;
+
+ if (argc < 2) {
+ ilbadm_err(gettext("usage: ilbadm"
+ " delete-healthcheck hc-name ..."));
+ return (ILBADM_LIBERR);
+ }
+
+ rclib = ilb_open(&h);
+ if (rclib != ILB_STATUS_OK)
+ goto out;
+
+ for (i = 1; i < argc; i++) {
+ rclib = ilb_destroy_hc(h, argv[i]);
+ if (rclib != ILB_STATUS_OK)
+ break;
+ }
+out:
+ if (h != ILB_INVALID_HANDLE)
+ (void) ilb_close(h);
+
+ if (rclib != ILB_STATUS_OK) {
+ ilbadm_err(ilb_errstr(rclib));
+ ret = ILBADM_LIBERR;
+ }
+ return (ret);
+}
+
+/*
+ * Since this function is used by libilb function, it
+ * must return libilb errors
+ */
+/* ARGSUSED */
+ilb_status_t
+ilbadm_export_hcinfo(ilb_handle_t h, ilb_hc_info_t *hc_info, void *arg)
+{
+ FILE *fp = ((hc_export_arg_t *)arg)->fp;
+ int count = 0;
+ int ret;
+
+ /*
+ * a test name "PING" implies "no default ping", so we only
+ * print -n if the test is NOT "PING"
+ */
+ if (hc_info->hci_def_ping == B_FALSE &&
+ strncasecmp(hc_info->hci_test, "PING", 5) != 0)
+ (void) fprintf(fp, "create-healthcheck -n -h ");
+ else
+ (void) fprintf(fp, "create-healthcheck -h ");
+
+ if (*hc_info->hci_test != '\0') {
+ (void) fprintf(fp, "hc-test=%s", hc_info->hci_test);
+ count++;
+ }
+ if (hc_info->hci_timeout != 0) {
+ if (count++ > 0)
+ (void) fprintf(fp, ",");
+ (void) fprintf(fp, "hc-timeout=%d", hc_info->hci_timeout);
+ }
+ if (hc_info->hci_count != 0) {
+ if (count++ > 0)
+ (void) fprintf(fp, ",");
+ (void) fprintf(fp, "hc-count=%d", hc_info->hci_count);
+ }
+ if (hc_info->hci_interval != 0) {
+ if (count > 0)
+ (void) fprintf(fp, ",");
+ (void) fprintf(fp, "hc-interval=%d", hc_info->hci_interval);
+ }
+
+ /*
+ * if any of the above writes fails, then, we assume, so will
+ * this one; so it's sufficient to test once
+ */
+ ret = fprintf(fp, " %s\n", hc_info->hci_name);
+ if (ret < 0)
+ goto out_fail;
+ ret = fflush(fp);
+
+out_fail:
+ if (ret < 0)
+ return (ILB_STATUS_WRITE);
+ return (ILB_STATUS_OK);
+}
+
+ilbadm_status_t
+ilbadm_export_hc(ilb_handle_t h, FILE *fp)
+{
+ ilb_status_t rclib;
+ ilbadm_status_t ret = ILBADM_OK;
+ hc_export_arg_t arg;
+
+ arg.fp = fp;
+ rclib = ilb_walk_hc(h, ilbadm_export_hcinfo, (void *)&arg);
+ if (rclib != ILB_STATUS_OK) {
+ ilbadm_err(ilb_errstr(rclib));
+ ret = ILBADM_LIBERR;
+ }
+ return (ret);
+}
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_import.c b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_import.c
new file mode 100644
index 0000000000..22e7b6a13b
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_import.c
@@ -0,0 +1,441 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <strings.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <libilb.h>
+#include "ilbadm.h"
+
+static ilbadm_cmd_help_t create_sg_help = {
+"[-s server=hostspec[:portspec...]] groupname"
+};
+
+static ilbadm_cmd_help_t create_rule_help = {
+"[-e] [-p] -i vip=value,port=value[,protocol=value] \n" \
+" -m lbalg=value,type=value[,proxy-src=ip-range][,pmask=mask] \n"\
+" -h hc-name=value[,hc-port=value]] \n" \
+" [-t [conn-drain=N][,nat-timeout=N][,persist-timeout=N]] \n" \
+" -o servergroup=value name"
+};
+
+static ilbadm_cmd_help_t destroy_rule_help = {
+"-a | name ..."
+};
+
+static ilbadm_cmd_help_t add_server_help = {
+"-s server=value[,value ...] servergroup"
+};
+
+static ilbadm_cmd_help_t remove_server_help = {
+"-s server=value[,value ...] servergroup"
+};
+
+
+static ilbadm_cmd_help_t disable_server_help = {
+"server ... "
+};
+
+static ilbadm_cmd_help_t enable_server_help = {
+"server ..."
+};
+
+static ilbadm_cmd_help_t enable_rule_help = {
+"[name ... ]"
+};
+
+static ilbadm_cmd_help_t disable_rule_help = {
+"[name ... ]"
+};
+
+static ilbadm_cmd_help_t show_server_help = {
+"[[-p] -o field[,field...]] [rulename ... ]"
+};
+
+static ilbadm_cmd_help_t showstats_help = {
+"[-p] -o field[,...]] [-tdAvi]\n" \
+" [-r rulename|-s servername] [interval [count]]"
+};
+
+static ilbadm_cmd_help_t show_nat_help = {
+"[count]"
+};
+
+static ilbadm_cmd_help_t show_persist_help = {
+"[count]"
+};
+
+static ilbadm_cmd_help_t show_hc_help = {
+"[hc-name]"
+};
+
+static ilbadm_cmd_help_t create_hc_help = {
+"[-n] -h hc-test=value[,hc-timeout=value]\n" \
+" [,hc-count=value][,hc-interval=value] hcname"
+};
+
+static ilbadm_cmd_help_t destroy_hc_help = {
+"name ..."
+};
+
+static ilbadm_cmd_help_t show_hc_result_help = {
+"[rule-name]"
+};
+
+static ilbadm_cmd_help_t show_rule_help = {
+"[-e|-d] [-f |[-p] -o key[,key ...]] [name ...]"
+};
+
+static ilbadm_cmd_help_t destroy_servergroup_help = {
+"groupname"
+};
+
+static ilbadm_cmd_help_t show_servergroup_help = {
+"[[-p] -o field[,field]] [name]"
+};
+
+static ilbadm_cmd_help_t export_config_help = {
+"[filename]"
+};
+
+static ilbadm_cmd_help_t import_config_help = {
+"[-p] [filename]"
+};
+static ilbadm_cmd_desc_t ilbadm_cmds[] = {
+ {"create-rule", "create-rl", ilbadm_create_rule, &create_rule_help},
+ {"delete-rule", "delete-rl", ilbadm_destroy_rule, &destroy_rule_help},
+ {"enable-rule", "enable-rl", ilbadm_enable_rule, &enable_rule_help},
+ {"disable-rule", "disable-rl", ilbadm_disable_rule,
+ &disable_rule_help},
+ {"show-rule", "show-rl", ilbadm_show_rules, &show_rule_help},
+
+ {"create-servergroup", "create-sg", ilbadm_create_servergroup,
+ &create_sg_help},
+ {"delete-servergroup", "delete-sg", ilbadm_destroy_servergroup,
+ &destroy_servergroup_help},
+ {"show-servergroup", "show-sg", ilbadm_show_servergroups,
+ &show_servergroup_help},
+
+ {"add-server", "add-srv", ilbadm_add_server_to_group,
+ &add_server_help},
+ {"remove-server", "remove-srv", ilbadm_rem_server_from_group,
+ &remove_server_help},
+ {"disable-server", "disable-srv", ilbadm_disable_server,
+ &disable_server_help},
+ {"enable-server", "enable-srv", ilbadm_enable_server,
+ &enable_server_help},
+ {"show-server", "show-srv", ilbadm_show_server,
+ &show_server_help},
+
+ {"show-healthcheck", "show-hc", ilbadm_show_hc, &show_hc_help},
+ {"create-healthcheck", "create-hc", ilbadm_create_hc, &create_hc_help},
+ {"delete-healthcheck", "delete-hc", ilbadm_destroy_hc,
+ &destroy_hc_help},
+ {"show-hc-result", "show-hc-res", ilbadm_show_hc_result,
+ &show_hc_result_help},
+
+ {"export-config", "export-cf", ilbadm_export, &export_config_help},
+ {"import-config", "import-cf", ilbadm_noimport, &import_config_help},
+
+ {"show-statistics", "show-stats", ilbadm_show_stats, &showstats_help},
+ {"show-nat", "show-nat", ilbadm_show_nat, &show_nat_help},
+ {"show-persist", "show-pt", ilbadm_show_persist,
+ &show_persist_help},
+ {"", "", NULL, NULL}
+};
+
+
+/* ARGSUSED */
+ilbadm_status_t
+ilbadm_noimport(int argc, char *argv[])
+{
+ ilbadm_err(ilbadm_errstr(ILBADM_NORECURSIVE));
+ return (ILBADM_LIBERR);
+}
+
+static void
+print_cmd_short(char *name, FILE *fp, ilbadm_cmd_desc_t *cmd)
+{
+ char *h;
+
+ while (cmd->c_name[0] != '\0') {
+ if (cmd->c_help != NULL &&
+ (h = cmd->c_help->h_help) != NULL)
+ (void) fprintf(fp, "%s %s|%s %s\n", name,
+ cmd->c_name, cmd->c_alias, h);
+ else
+ (void) fprintf(fp, "%s %s|%s\n", name, cmd->c_name,
+ cmd->c_alias);
+ cmd++;
+ }
+}
+
+void
+print_cmdlist_short(char *name, FILE *fp)
+{
+ print_cmd_short(name, fp, ilbadm_cmds);
+}
+
+#define IMPORT_FILE 0x1
+
+static void
+match_cmd(char *name, ilbadm_cmd_desc_t *cmds, cmdfunc_t *action, int flags)
+{
+ ilbadm_cmd_desc_t *cmd;
+
+ if ((flags & IMPORT_FILE) == IMPORT_FILE) {
+ if (strcasecmp(name, "export-config") == 0 ||
+ strcasecmp(name, "export-cf") == 0) {
+ ilbadm_err(gettext("export from import file"
+ " not allowed"));
+ exit(1);
+ }
+ }
+
+ for (cmd = &cmds[0]; cmd->c_name[0] != '\0'; cmd++) {
+ if (strncasecmp(cmd->c_name, name, sizeof (cmd->c_name)) == 0 ||
+ strncasecmp(cmd->c_alias, name, sizeof (cmd->c_alias)) == 0)
+ break;
+ }
+ *action = cmd->c_action;
+}
+
+/*
+ * read and parse commandline
+ */
+static ilbadm_status_t
+ilb_import_cmdline(int argc, char *argv[], int flags)
+{
+ ilbadm_status_t rc = ILBADM_OK;
+ cmdfunc_t cmd;
+
+ match_cmd(argv[0], ilbadm_cmds, &cmd, flags);
+
+ if (*cmd != NULL) {
+ rc = cmd(argc, argv);
+ } else {
+ rc = ILBADM_INVAL_COMMAND;
+ ilbadm_err(ilbadm_errstr(rc));
+ }
+
+ return (rc);
+}
+
+#define CHUNK 10
+#define LINESZ 1024
+
+typedef struct {
+ int listsz;
+ char *arglist[1];
+} arg_t;
+
+static int
+i_getln_to_argv(FILE *fp, arg_t **ap)
+{
+ static char *linebuf = NULL;
+ char *stringp, *currp;
+ char delim[] = " \t\n";
+ int i;
+ arg_t *a = *ap;
+
+#define STR_DIFF(s1, s2) (int)((char *)s2 - (char *)s1)
+#define STR_ADJ_SZ(sz, buf, s) (sz - STR_DIFF(buf, s))
+
+ if (linebuf == NULL)
+ if ((linebuf = (char *)malloc(LINESZ)) == NULL)
+ return (0);
+
+ stringp = currp = linebuf;
+ i = 0;
+read_next:
+ if (fgets(currp, STR_ADJ_SZ(LINESZ, linebuf, currp), fp) == NULL)
+ return (i);
+
+ /* ignore lines starting with a # character */
+ if (*currp == '#')
+ goto read_next;
+
+ for (; stringp != NULL && currp != NULL; i++) {
+ currp = strsep(&stringp, delim);
+ /*
+ * if there's more than one adjacent delimiters ...
+ */
+ if (*currp == '\0') {
+ i--;
+ continue;
+ }
+ /*
+ * if we find a '\' at the end of a line, treat
+ * it as a continuation character.
+ */
+ if (*currp == '\\' && stringp == NULL) {
+ stringp = currp;
+ goto read_next;
+ }
+ if (a == NULL) {
+ a = (arg_t *)malloc(sizeof (*a));
+ bzero(a, sizeof (*a));
+ }
+ if (a->listsz <= i) {
+ int sz;
+
+ a->listsz += CHUNK;
+ sz = sizeof (*a) +
+ ((a->listsz - 1) * sizeof (a->arglist));
+ a = (arg_t *)realloc(a, sz);
+ *ap = a;
+ }
+ a->arglist[i] = currp;
+ }
+ return (i);
+}
+
+static ilbadm_status_t
+ilb_import_file(int fd, int flags)
+{
+ FILE *fp;
+ arg_t *a = NULL;
+ int argcount;
+ ilbadm_status_t rc = ILBADM_OK;
+
+ if ((fp = fdopen(fd, "r")) == NULL) {
+ ilbadm_err(gettext("cannot import file for reading"));
+ exit(1);
+ }
+
+ if ((flags & ILBADM_IMPORT_PRESERVE) == 0) {
+ ilb_handle_t h = ILB_INVALID_HANDLE;
+ ilb_status_t rclib;
+
+ rclib = ilb_open(&h);
+ if (rclib == ILB_STATUS_OK)
+ (void) ilb_reset_config(h);
+ if (h != ILB_INVALID_HANDLE)
+ (void) ilb_close(h);
+ }
+
+ while ((argcount = i_getln_to_argv(fp, &a)) > 0) {
+ optind = 1;
+ rc = ilb_import_cmdline(argcount, a->arglist, IMPORT_FILE);
+ if (rc != ILBADM_OK)
+ break;
+ }
+
+ return (rc);
+}
+
+/*
+ * this is the wrapper around everything to do with importing and
+ * parsing either commandline or persistent storage.
+ * if (fd == -1), parse commandline, otherwise use the given fd as input.
+ */
+/* ARGSUSED */
+ilbadm_status_t
+ilbadm_import(int fd, int argc, char *argv[], int flags)
+{
+ ilbadm_status_t rc;
+
+ if (fd == -1)
+ rc = ilb_import_cmdline(argc, argv, 0);
+ else
+ rc = ilb_import_file(fd, flags);
+
+ return (rc);
+}
+
+ilbadm_status_t
+ilbadm_export(int argc, char *argv[])
+{
+ ilb_handle_t h = ILB_INVALID_HANDLE;
+ ilbadm_status_t rc = ILBADM_OK;
+ ilb_status_t rclib = ILB_STATUS_OK;
+ int fd;
+ FILE *fp;
+ char *fname = NULL;
+ char tmpfile[MAXPATHLEN];
+
+ if (argc < 2) {
+ fd = 1; /* stdout */
+ *tmpfile = '\0';
+ } else {
+ fname = argv[1];
+ (void) snprintf(tmpfile, sizeof (tmpfile), "%sXXXXXX", fname);
+ fd = mkstemp(tmpfile);
+
+ if (fd == -1) {
+ ilbadm_err(gettext("cannot create working file"));
+ exit(1);
+ }
+ }
+ fp = fdopen(fd, "w");
+ if (fp == NULL) {
+ ilbadm_err(gettext("cannot open file for writing"), fd);
+ exit(1);
+ }
+
+ rclib = ilb_open(&h);
+ if (rclib != ILB_STATUS_OK)
+ goto out;
+
+ rc = ilbadm_export_servergroups(h, fp);
+ if (rc != ILBADM_OK)
+ goto out;
+
+ rc = ilbadm_export_hc(h, fp);
+ if (rc != ILBADM_OK)
+ goto out;
+
+ rc = ilbadm_export_rules(h, fp);
+ if (rc != ILBADM_OK)
+ goto out;
+
+ if (fname != NULL) {
+ if (rename(tmpfile, fname) == -1) {
+ ilbadm_err(gettext("cannot create %s: %s"), fname,
+ strerror(errno));
+ exit(1);
+ }
+ *tmpfile = '\0';
+ }
+
+out:
+ if (h != ILB_INVALID_HANDLE)
+ (void) ilb_close(h);
+
+ if ((rc != ILBADM_OK) && (rc != ILBADM_LIBERR))
+ ilbadm_err(ilbadm_errstr(rc));
+ (void) fclose(fp);
+ if (*tmpfile != '\0')
+ (void) unlink(tmpfile);
+ return (rc);
+}
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_nat.c b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_nat.c
new file mode 100644
index 0000000000..dad0f1cb53
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_nat.c
@@ -0,0 +1,222 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <strings.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <libilb.h>
+#include "ilbadm.h"
+
+/*
+ * For each iteration through the kernel table, ask for at most NUM_ENTRIES
+ * entries to be returned.
+ */
+#define NUM_ENTRIES 500
+
+static void
+print_nat_info(ilb_nat_info_t *info)
+{
+ char *tmp;
+ ipaddr_t addr_v4;
+ char addr[INET6_ADDRSTRLEN];
+
+ if (info->nat_proto == IPPROTO_TCP)
+ tmp = "TCP";
+ else if (info->nat_proto == IPPROTO_UDP)
+ tmp = "UDP";
+ else
+ tmp = "Unknown";
+ (void) printf("%4s: ", tmp);
+
+ if (IN6_IS_ADDR_V4MAPPED(&info->nat_out_global)) {
+ IN6_V4MAPPED_TO_IPADDR(&info->nat_out_global, addr_v4);
+ (void) printf("%s.%d > ", inet_ntop(AF_INET, &addr_v4, addr,
+ INET6_ADDRSTRLEN), ntohs(info->nat_out_global_port));
+ IN6_V4MAPPED_TO_IPADDR(&info->nat_in_global, addr_v4);
+ (void) printf("%s.%d >>> ", inet_ntop(AF_INET, &addr_v4, addr,
+ INET6_ADDRSTRLEN), ntohs(info->nat_in_global_port));
+
+ IN6_V4MAPPED_TO_IPADDR(&info->nat_out_local, addr_v4);
+ (void) printf("%s.%d > ", inet_ntop(AF_INET, &addr_v4, addr,
+ INET6_ADDRSTRLEN), ntohs(info->nat_out_local_port));
+ IN6_V4MAPPED_TO_IPADDR(&info->nat_in_local, addr_v4);
+ (void) printf("%s.%d\n", inet_ntop(AF_INET, &addr_v4, addr,
+ INET6_ADDRSTRLEN), ntohs(info->nat_in_local_port));
+ } else {
+ (void) printf("%s.%d > ", inet_ntop(AF_INET6,
+ &info->nat_out_global, addr, INET6_ADDRSTRLEN),
+ ntohs(info->nat_out_global_port));
+ (void) printf("%s.%d >>> ", inet_ntop(AF_INET6,
+ &info->nat_in_global, addr, INET6_ADDRSTRLEN),
+ ntohs(info->nat_in_global_port));
+
+ (void) printf("%s.%d > ", inet_ntop(AF_INET6,
+ &info->nat_out_local, addr, INET6_ADDRSTRLEN),
+ ntohs(info->nat_out_local_port));
+ (void) printf("%s.%d\n", inet_ntop(AF_INET6,
+ &info->nat_in_local, addr, INET6_ADDRSTRLEN),
+ ntohs(info->nat_in_local_port));
+ }
+}
+
+static void
+print_persist_info(ilb_persist_info_t *info)
+{
+ char addr[INET6_ADDRSTRLEN];
+
+ (void) printf("%s: ", info->persist_rule_name);
+ if (IN6_IS_ADDR_V4MAPPED(&info->persist_req_addr)) {
+ ipaddr_t addr_v4;
+
+ IN6_V4MAPPED_TO_IPADDR(&info->persist_req_addr, addr_v4);
+ (void) printf("%s --> ", inet_ntop(AF_INET, &addr_v4, addr,
+ INET6_ADDRSTRLEN));
+ IN6_V4MAPPED_TO_IPADDR(&info->persist_srv_addr, addr_v4);
+ (void) printf("%s\n", inet_ntop(AF_INET, &addr_v4, addr,
+ INET6_ADDRSTRLEN));
+ } else {
+ (void) printf("%s --> ", inet_ntop(AF_INET6,
+ &info->persist_req_addr, addr, INET6_ADDRSTRLEN));
+ (void) printf("%s\n", inet_ntop(AF_INET6,
+ &info->persist_srv_addr, addr, INET6_ADDRSTRLEN));
+ }
+}
+
+/* Tell ilbadm_show_info() which table to show. */
+enum which_tbl {
+ show_nat = 1,
+ show_persist
+};
+
+typedef union {
+ ilb_nat_info_t *nbuf;
+ ilb_persist_info_t *pbuf;
+ char *buf;
+} show_buf_t;
+
+static ilbadm_status_t
+ilbadm_show_info(int argc, char *argv[], enum which_tbl tbl)
+{
+ ilb_handle_t h = ILB_INVALID_HANDLE;
+ show_buf_t buf;
+ ilb_status_t rclib = ILB_STATUS_OK;
+ ilbadm_status_t rc = ILBADM_OK;
+ int32_t i, num_entries;
+ size_t num;
+ boolean_t end;
+ size_t entry_sz;
+
+ /*
+ * If the user does not specify a count, return the whole table.
+ * This requires setting the fourth param to ilb_show_nat/persist()
+ * end to B_FALSE. Otherwise, set end to B_TRUE;
+ */
+
+ switch (argc) {
+ case 1:
+ num_entries = -1;
+ end = B_FALSE;
+ break;
+ case 2:
+ num_entries = atoi(argv[1]);
+ if (num_entries < 1) {
+ rc = ILBADM_EINVAL;
+ goto out;
+ }
+ end = B_TRUE;
+ break;
+ default:
+ rc = ILBADM_EINVAL;
+ goto out;
+ }
+
+ if (tbl == show_nat)
+ entry_sz = sizeof (ilb_nat_info_t);
+ else
+ entry_sz = sizeof (ilb_persist_info_t);
+ if ((buf.buf = malloc((num_entries > 0 ? num_entries : NUM_ENTRIES) *
+ entry_sz)) == NULL) {
+ rc = ILBADM_ENOMEM;
+ goto out;
+ }
+
+ rclib = ilb_open(&h);
+ if (rclib != ILB_STATUS_OK)
+ goto out;
+
+ do {
+ num = num_entries > 0 ? num_entries : NUM_ENTRIES;
+ bzero(buf.buf, num * entry_sz);
+
+ if (tbl == show_nat)
+ rclib = ilb_show_nat(h, buf.nbuf, &num, &end);
+ else
+ rclib = ilb_show_persist(h, buf.pbuf, &num, &end);
+
+ if (rclib != ILB_STATUS_OK)
+ break;
+
+ for (i = 0; i < num; i++) {
+ if (tbl == show_nat)
+ print_nat_info(&buf.nbuf[i]);
+ else
+ print_persist_info(&buf.pbuf[i]);
+ }
+ if (num_entries > 0) {
+ num_entries -= num;
+ if (num_entries <= 0)
+ break;
+ }
+ } while (!end);
+ free(buf.buf);
+out:
+ if (h != ILB_INVALID_HANDLE)
+ (void) ilb_close(h);
+ if (rclib != ILB_STATUS_OK) {
+ ilbadm_err(ilb_errstr(rclib));
+ rc = ILBADM_LIBERR;
+ }
+ if ((rc != ILBADM_OK) && (rc != ILBADM_LIBERR))
+ ilbadm_err(ilbadm_errstr(rc));
+ return (rc);
+}
+
+
+ilbadm_status_t
+ilbadm_show_nat(int argc, char *argv[])
+{
+ return (ilbadm_show_info(argc, argv, show_nat));
+}
+
+ilbadm_status_t
+ilbadm_show_persist(int argc, char *argv[])
+{
+ return (ilbadm_show_info(argc, argv, show_persist));
+}
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_rules.c b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_rules.c
new file mode 100644
index 0000000000..dc428c4998
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_rules.c
@@ -0,0 +1,1313 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sys/list.h>
+#include <netdb.h>
+#include <ofmt.h>
+#include <assert.h>
+#include <libilb.h>
+#include "ilbadm.h"
+
+static ilbadm_key_name_t rl_incoming_keys[] = {
+ {ILB_KEY_VIP, "vip", ""},
+ {ILB_KEY_PORT, "port", ""},
+ {ILB_KEY_PROTOCOL, "protocol", "prot"},
+ {ILB_KEY_BAD, "", ""}
+};
+static ilbadm_key_name_t rl_method_keys[] = {
+ {ILB_KEY_ALGORITHM, "lbalg", "algo"},
+ {ILB_KEY_TYPE, "type", "topo"},
+ {ILB_KEY_SRC, "proxy-src", "nat-src"},
+ {ILB_KEY_STICKY, "pmask", "persist"},
+ {ILB_KEY_BAD, "", ""}
+};
+static ilbadm_key_name_t rl_outgoing_keys[] = {
+ {ILB_KEY_SERVERGROUP, "servergroup", "sg"},
+ {ILB_KEY_BAD, "", ""}
+};
+static ilbadm_key_name_t rl_healthchk_keys[] = {
+ {ILB_KEY_HEALTHCHECK, "hc-name", "hcn"},
+ {ILB_KEY_HCPORT, "hc-port", "hcp"},
+ {ILB_KEY_BAD, "", ""}
+};
+static ilbadm_key_name_t rl_timer_keys[] = {
+ {ILB_KEY_CONNDRAIN, "conn-drain", ""},
+ {ILB_KEY_NAT_TO, "nat-timeout", ""},
+ {ILB_KEY_STICKY_TO, "persist-timeout", ""},
+ {ILB_KEY_BAD, "", ""}
+};
+
+static ilbadm_key_name_t *all_keys[] = {
+ rl_incoming_keys, rl_method_keys, rl_outgoing_keys,
+ rl_healthchk_keys, rl_timer_keys, NULL
+};
+
+
+/* field ids for of_* functions */
+#define OF_IP_VIP 0
+#define OF_IP_PROXYSRC 1
+#define OF_IP_STICKYMASK 2
+
+#define OF_STR_RNAME 0
+#define OF_STR_HCNAME 1
+#define OF_STR_SGNAME 2
+#define OF_STR_INTERFACE 3
+
+#define OF_PORT 0
+#define OF_HCPORT 1
+
+#define OF_T_CONN 0
+#define OF_T_NAT 1
+#define OF_T_STICKY 2
+
+#define OF_SRV_ID 0
+#define OF_SRV_ADDR 1
+#define OF_SRV_PORT 2
+#define OF_SRV_STATUS 3
+#define OF_SRV_RNAME 4
+#define OF_SRV_SGNAME 5
+#define OF_SRV_HOSTNAME 6
+
+/* some field sizes of ofmt_field_t arrays */
+#define IPv4_FIELDWIDTH 16
+#define IPv6_FIELDWIDTH 39
+#define ILB_HOSTNAMELEN 20
+#define ILB_STATUSFIELD_LEN 7
+
+typedef struct arg_struct {
+ int flags;
+ char *o_str;
+ ofmt_field_t *o_fields;
+ ofmt_handle_t oh;
+} ilbadm_sh_rl_arg_t;
+
+typedef struct ilbadm_rl_exp_arg {
+ FILE *fp;
+} ilbadm_rl_exp_arg_t;
+
+typedef struct ilbadm_rl_list_arg {
+ ilb_handle_t h;
+ ilb_rule_data_t *rd;
+} ilbadm_rl_list_arg_t;
+
+typedef struct ilbadm_rl_srvlist_arg {
+ char *sgname;
+ ilb_server_data_t *sd;
+ ilb_rule_data_t *rd;
+ int flags;
+ char *o_str;
+ ofmt_field_t *o_fields;
+ ofmt_handle_t oh;
+} ilbadm_rl_srvlist_arg_t;
+
+static ofmt_cb_t of_algo;
+static ofmt_cb_t of_proto;
+static ofmt_cb_t of_rl_ip;
+static ofmt_cb_t of_rl_mask;
+static ofmt_cb_t of_rport;
+static ofmt_cb_t of_rstatus;
+static ofmt_cb_t of_str;
+static ofmt_cb_t of_time;
+static ofmt_cb_t of_topo;
+static ofmt_cb_t of_rl_srvlist;
+
+static boolean_t of_srv2str(ofmt_arg_t *, char *, uint_t);
+static boolean_t of_port2str(in_port_t, in_port_t, char *, uint_t);
+
+static ofmt_field_t rfields_v4[] = {
+ {"RULENAME", ILB_NAMESZ, OF_STR_RNAME, of_str},
+ {"STATUS", ILB_STATUSFIELD_LEN, 0, of_rstatus},
+ {"PORT", 10, OF_PORT, of_rport},
+ {"PROTOCOL", 5, 0, of_proto},
+ {"LBALG", 12, 0, of_algo},
+ {"TYPE", 8, 0, of_topo},
+ {"PROXY-SRC", 2*IPv4_FIELDWIDTH+1, OF_IP_PROXYSRC, of_rl_ip},
+ {"PMASK", 6, OF_IP_STICKYMASK, of_rl_mask},
+ {"HC-NAME", ILB_NAMESZ, OF_STR_HCNAME, of_str},
+ {"HC-PORT", 8, OF_HCPORT, of_rport},
+ {"CONN-DRAIN", 11, OF_T_CONN, of_time},
+ {"NAT-TIMEOUT", 12, OF_T_NAT, of_time},
+ {"PERSIST-TIMEOUT", 16, OF_T_STICKY, of_time},
+ {"SERVERGROUP", ILB_SGNAME_SZ, OF_STR_SGNAME, of_str},
+ {"VIP", IPv4_FIELDWIDTH, OF_IP_VIP, of_rl_ip},
+ {"SERVERS", 20, 0, of_rl_srvlist},
+ {NULL, 0, 0, NULL}
+};
+
+static ofmt_field_t rfields_v6[] = {
+ {"RULENAME", ILB_NAMESZ, OF_STR_RNAME, of_str},
+ {"STATUS", ILB_STATUSFIELD_LEN, 0, of_rstatus},
+ {"PORT", 10, OF_PORT, of_rport},
+ {"PROTOCOL", 5, 0, of_proto},
+ {"LBALG", 12, 0, of_algo},
+ {"TYPE", 8, 0, of_topo},
+ {"PROXY-SRC", IPv6_FIELDWIDTH, OF_IP_PROXYSRC, of_rl_ip},
+ {"PMASK", 6, OF_IP_STICKYMASK, of_rl_mask},
+ {"HC-NAME", ILB_NAMESZ, OF_STR_HCNAME, of_str},
+ {"HC-PORT", 8, OF_HCPORT, of_rport},
+ {"CONN-DRAIN", 11, OF_T_CONN, of_time},
+ {"NAT-TIMEOUT", 12, OF_T_NAT, of_time},
+ {"PERSIST-TIMEOUT", 16, OF_T_STICKY, of_time},
+ {"SERVERGROUP", ILB_SGNAME_SZ, OF_STR_SGNAME, of_str},
+ {"VIP", IPv6_FIELDWIDTH, OF_IP_VIP, of_rl_ip},
+ {"SERVERS", 20, 0, of_rl_srvlist},
+ {NULL, 0, 0, NULL}
+};
+
+static ofmt_field_t ssfields_v4[] = {
+ {"SERVERID", ILB_NAMESZ, OF_SRV_ID, of_srv2str},
+ {"ADDRESS", IPv4_FIELDWIDTH, OF_SRV_ADDR, of_srv2str},
+ {"PORT", 5, OF_SRV_PORT, of_srv2str},
+ {"RULENAME", ILB_NAMESZ, OF_SRV_RNAME, of_srv2str},
+ {"STATUS", ILB_STATUSFIELD_LEN, OF_SRV_STATUS, of_srv2str},
+ {"SERVERGROUP", ILB_SGNAME_SZ, OF_SRV_SGNAME, of_srv2str},
+ {"HOSTNAME", ILB_HOSTNAMELEN, OF_SRV_HOSTNAME, of_srv2str},
+ {NULL, 0, 0, NULL}
+};
+
+static ofmt_field_t ssfields_v6[] = {
+ {"SERVERID", ILB_NAMESZ, OF_SRV_ID, of_srv2str},
+ {"ADDRESS", IPv6_FIELDWIDTH, OF_SRV_ADDR, of_srv2str},
+ {"PORT", 5, OF_SRV_PORT, of_srv2str},
+ {"RULENAME", ILB_NAMESZ, OF_SRV_RNAME, of_srv2str},
+ {"STATUS", ILB_STATUSFIELD_LEN, OF_SRV_STATUS, of_srv2str},
+ {"SERVERGROUP", ILB_SGNAME_SZ, OF_SRV_SGNAME, of_srv2str},
+ {"HOSTNAME", ILB_HOSTNAMELEN, OF_SRV_HOSTNAME, of_srv2str},
+ {NULL, 0, 0, NULL}
+};
+
+extern int optind, optopt, opterr;
+extern char *optarg;
+
+extern ilbadm_val_type_t algo_types[];
+extern ilbadm_val_type_t topo_types[];
+
+static char *
+i_key_to_opt(ilbadm_key_name_t *n, ilbadm_key_code_t k)
+{
+ int i;
+
+ for (i = 0; n[i].k_key != ILB_KEY_BAD; i++)
+ if (n[i].k_key == k)
+ break;
+
+ return (n[i].k_name);
+}
+
+char *
+ilbadm_key_to_opt(ilbadm_key_code_t k)
+{
+ char *name;
+ int i;
+
+ for (i = 0; all_keys[i] != NULL; i++) {
+ name = i_key_to_opt(all_keys[i], k);
+ if (*name != '\0')
+ return (name);
+ }
+
+ return (NULL);
+}
+
+/*
+ * ports are in HOST byte order
+ */
+static void
+ports2str(short port1, short port2, char *buf, const int sz)
+{
+ if (port2 <= port1)
+ (void) snprintf(buf, sz, "port=%d", port1);
+ else
+ (void) snprintf(buf, sz, "port=%d-%d", port1, port2);
+}
+
+static void
+proto2str(short proto, char *buf, int sz)
+{
+ struct protoent *pe;
+
+ pe = getprotobynumber((int)proto);
+ if (pe != NULL)
+ (void) snprintf(buf, sz, "protocol=%s", pe->p_name);
+ else
+ (void) sprintf(buf, "(bad proto %d)", proto);
+}
+
+static void
+algo2str(ilb_algo_t algo, char *buf, int sz)
+{
+ char *s = i_str_from_val((int)algo, &algo_types[0]);
+
+ (void) snprintf(buf, sz, "lbalg=%s", (s && *s) ? s : "(bad algo)");
+}
+
+static int
+algo2bare_str(ilb_algo_t algo, char *buf, int sz)
+{
+ char *s = i_str_from_val((int)algo, &algo_types[0]);
+
+ return (snprintf(buf, sz, "%s", (s && *s) ? s : ""));
+}
+
+static void
+topo2str(ilb_topo_t topo, char *buf, int sz)
+{
+ char *s = i_str_from_val((int)topo, &topo_types[0]);
+
+ (void) snprintf(buf, sz, "type=%s", (s && *s) ? s : "(bad type)");
+}
+
+static int
+topo2bare_str(ilb_topo_t topo, char *buf, int sz)
+{
+ char *s = i_str_from_val((int)topo, &topo_types[0]);
+
+ return (snprintf(buf, sz, "%s", (s && *s) ? s : ""));
+}
+
+static boolean_t
+of_str(ofmt_arg_t *of_arg, char *buf, uint_t bufsize)
+{
+ ilbadm_rl_list_arg_t *ra = (ilbadm_rl_list_arg_t *)of_arg->ofmt_cbarg;
+ ilb_rule_data_t *rd = (ilb_rule_data_t *)ra->rd;
+
+ switch (of_arg->ofmt_id) {
+ case OF_STR_RNAME:
+ (void) strlcpy(buf, rd->r_name, bufsize);
+ break;
+ case OF_STR_SGNAME:
+ (void) strlcpy(buf, rd->r_sgname, bufsize);
+ break;
+ case OF_STR_HCNAME:
+ if (rd->r_hcname != NULL && *(rd->r_hcname) != '\0')
+ (void) strlcpy(buf, rd->r_hcname, bufsize);
+ break;
+ }
+ return (B_TRUE);
+}
+
+/* ARGSUSED */
+static boolean_t
+of_proto(ofmt_arg_t *of_arg, char *buf, uint_t bufsize)
+{
+ ilbadm_rl_list_arg_t *ra = (ilbadm_rl_list_arg_t *)of_arg->ofmt_cbarg;
+ ilb_rule_data_t *rd = (ilb_rule_data_t *)ra->rd;
+
+ if (rd->r_proto == IPPROTO_TCP)
+ (void) strlcpy(buf, "TCP", bufsize);
+ else if (rd->r_proto == IPPROTO_UDP)
+ (void) strlcpy(buf, "UDP", bufsize);
+ else
+ return (B_FALSE);
+ return (B_TRUE);
+}
+
+static boolean_t
+of_rl_ip(ofmt_arg_t *of_arg, char *buf, uint_t bufsize)
+{
+ ilbadm_rl_list_arg_t *ra = (ilbadm_rl_list_arg_t *)of_arg->ofmt_cbarg;
+ ilb_rule_data_t *rd = (ilb_rule_data_t *)ra->rd;
+ ilb_ip_addr_t *ip = NULL, *ip2 = NULL;
+
+ switch (of_arg->ofmt_id) {
+ case OF_IP_VIP:
+ ip = &rd->r_vip;
+ break;
+ case OF_IP_PROXYSRC:
+ ip = &rd->r_nat_src_start;
+ ip2 = &rd->r_nat_src_end;
+ break;
+ case OF_IP_STICKYMASK:
+ ip = &rd->r_stickymask;
+ break;
+ }
+
+ /* only print something valid */
+ if (ip != NULL && (ip->ia_af == AF_INET || ip->ia_af == AF_INET6))
+ ip2str(ip, buf, bufsize, V6_ADDRONLY);
+ if (ip2 != NULL && (ip2->ia_af == AF_INET || ip2->ia_af == AF_INET6) &&
+ buf[0] != '\0') {
+ int sl = strlen(buf);
+
+ buf += sl; bufsize -= sl;
+ *buf++ = '-'; bufsize--;
+ ip2str(ip2, buf, bufsize, V6_ADDRONLY);
+ }
+
+ return (B_TRUE);
+}
+
+static boolean_t
+of_rl_mask(ofmt_arg_t *of_arg, char *buf, uint_t bufsize)
+{
+ ilbadm_rl_list_arg_t *ra = (ilbadm_rl_list_arg_t *)of_arg->ofmt_cbarg;
+ ilb_rule_data_t *rd = (ilb_rule_data_t *)ra->rd;
+ ilb_ip_addr_t *ip = NULL;
+
+ assert(of_arg->ofmt_id == OF_IP_STICKYMASK);
+ if (!(rd->r_flags & ILB_FLAGS_RULE_STICKY))
+ return (B_TRUE);
+ ip = &rd->r_stickymask;
+
+ (void) snprintf(buf, bufsize, "/%d", ilbadm_mask_to_prefixlen(ip));
+ return (B_TRUE);
+}
+
+static void
+hcport_print(ilb_rule_data_t *rd, char *buf, uint_t bufsize)
+{
+ if (rd->r_hcport != 0)
+ (void) snprintf(buf, bufsize, "%d", ntohs(rd->r_hcport));
+ else if (rd->r_hcpflag == ILB_HCI_PROBE_ANY)
+ (void) snprintf(buf, bufsize, "ANY");
+ else
+ buf[0] = '\0';
+}
+static boolean_t
+of_rport(ofmt_arg_t *of_arg, char *buf, uint_t bufsize)
+{
+ ilbadm_rl_list_arg_t *ra = (ilbadm_rl_list_arg_t *)of_arg->ofmt_cbarg;
+ ilb_rule_data_t *rd = (ilb_rule_data_t *)ra->rd;
+
+ if (of_arg->ofmt_id == OF_PORT)
+ return (of_port2str(rd->r_minport, rd->r_maxport, buf,
+ bufsize));
+
+ /* only print a hcport if there's a hc name as well */
+ if (of_arg->ofmt_id == OF_HCPORT && rd->r_hcname[0] != '\0')
+ hcport_print(rd, buf, bufsize);
+
+ return (B_TRUE);
+}
+
+/* ARGSUSED */
+static boolean_t
+of_rstatus(ofmt_arg_t *of_arg, char *buf, uint_t bufsize)
+{
+ ilbadm_rl_list_arg_t *ra = (ilbadm_rl_list_arg_t *)of_arg->ofmt_cbarg;
+ ilb_rule_data_t *rd = (ilb_rule_data_t *)ra->rd;
+
+ if ((rd->r_flags & ILB_FLAGS_RULE_ENABLED) == ILB_FLAGS_RULE_ENABLED)
+ buf[0] = 'E';
+ else
+ buf[0] = 'D';
+ buf[1] = '\0';
+ return (B_TRUE);
+}
+
+static boolean_t
+of_algo(ofmt_arg_t *of_arg, char *buf, uint_t bufsize)
+{
+ ilbadm_rl_list_arg_t *ra = (ilbadm_rl_list_arg_t *)of_arg->ofmt_cbarg;
+ ilb_rule_data_t *rd = (ilb_rule_data_t *)ra->rd;
+
+ if (algo2bare_str(rd->r_algo, buf, bufsize) == 0)
+ return (B_FALSE);
+ return (B_TRUE);
+}
+
+static boolean_t
+of_topo(ofmt_arg_t *of_arg, char *buf, uint_t bufsize)
+{
+ ilbadm_rl_list_arg_t *ra = (ilbadm_rl_list_arg_t *)of_arg->ofmt_cbarg;
+ ilb_rule_data_t *rd = (ilb_rule_data_t *)ra->rd;
+
+ if (topo2bare_str(rd->r_topo, buf, bufsize) == 0)
+ return (B_FALSE);
+ return (B_TRUE);
+}
+
+static boolean_t
+of_time(ofmt_arg_t *of_arg, char *buf, uint_t bufsize)
+{
+ ilbadm_rl_list_arg_t *ra = (ilbadm_rl_list_arg_t *)of_arg->ofmt_cbarg;
+ ilb_rule_data_t *rd = (ilb_rule_data_t *)ra->rd;
+
+ switch (of_arg->ofmt_id) {
+ case OF_T_CONN:
+ (void) snprintf(buf, bufsize, "%u", rd->r_conndrain);
+ break;
+ case OF_T_NAT:
+ (void) snprintf(buf, bufsize, "%u", rd->r_nat_timeout);
+ break;
+ case OF_T_STICKY:
+ (void) snprintf(buf, bufsize, "%u", rd->r_sticky_timeout);
+ break;
+ }
+ return (B_TRUE);
+}
+
+typedef struct rl_showlist_arg {
+ char *buf;
+ uint_t bufsize;
+} rl_showlist_arg_t;
+
+/* ARGSUSED */
+/* called by ilb_walk_servers(), cannot get rid of unused args */
+static ilb_status_t
+srv2srvID(ilb_handle_t h, ilb_server_data_t *sd, const char *sgname, void *arg)
+{
+ rl_showlist_arg_t *sla = (rl_showlist_arg_t *)arg;
+ int len;
+
+ (void) snprintf(sla->buf, sla->bufsize, "%s,", sd->sd_srvID);
+ len = strlen(sd->sd_srvID) + 1;
+ sla->buf += len;
+ sla->bufsize -= len;
+
+ return (ILB_STATUS_OK);
+}
+
+static boolean_t
+of_rl_srvlist(ofmt_arg_t *of_arg, char *buf, uint_t bufsize)
+{
+ ilbadm_rl_list_arg_t *ra = (ilbadm_rl_list_arg_t *)of_arg->ofmt_cbarg;
+ ilb_rule_data_t *rd = (ilb_rule_data_t *)ra->rd;
+ rl_showlist_arg_t sla;
+
+ sla.buf = buf;
+ sla.bufsize = bufsize;
+
+ (void) ilb_walk_servers(ra->h, srv2srvID, rd->r_sgname,
+ (void *)&sla);
+ /* we're trailing a ',' which we need to remove */
+ *--sla.buf = '\0';
+
+ return (B_TRUE);
+}
+
+#define RMAXCOLS 120 /* enough? */
+#define SERVER_WIDTH (ILB_NAMESZ+1) /* 1st guess */
+
+static boolean_t
+of_port2str(in_port_t minport, in_port_t maxport, char *buf, uint_t bufsize)
+{
+ in_port_t h_min, h_max;
+ int len;
+
+ h_min = ntohs(minport);
+ h_max = ntohs(maxport);
+
+ if (h_min == 0)
+ return (B_FALSE); /* print "unspec" == "all ports" */
+
+ len = snprintf(buf, bufsize, "%d", h_min);
+ if (h_max > h_min)
+ (void) snprintf(buf + len, bufsize - len, "-%d", h_max);
+ return (B_TRUE);
+}
+
+static ilbadm_status_t
+ip2hostname(ilb_ip_addr_t *ip, char *buf, uint_t bufsize)
+{
+ int ret;
+ struct hostent *he;
+
+ switch (ip->ia_af) {
+ case AF_INET:
+ he = getipnodebyaddr((char *)&ip->ia_v4, sizeof (ip->ia_v4),
+ ip->ia_af, &ret);
+ break;
+ case AF_INET6:
+ he = getipnodebyaddr((char *)&ip->ia_v6, sizeof (ip->ia_v6),
+ ip->ia_af, &ret);
+ break;
+ default: return (ILBADM_INVAL_AF);
+ }
+
+ /* if we can't resolve this, just return an empty name */
+ if (he == NULL)
+ buf[0] = '\0';
+ else
+ (void) strlcpy(buf, he->h_name, bufsize);
+
+ return (ILBADM_OK);
+}
+
+/* ARGSUSED */
+/*
+ * Since this function is used by libilb routine ilb_walk_rules()
+ * it must return libilb errors
+ */
+static ilb_status_t
+ilbadm_show_onerule(ilb_handle_t h, ilb_rule_data_t *rd, void *arg)
+{
+ ilbadm_sh_rl_arg_t *larg = (ilbadm_sh_rl_arg_t *)arg;
+ ofmt_status_t oerr;
+ int oflags = 0;
+ int ocols = RMAXCOLS;
+ ilbadm_rl_list_arg_t ra;
+ static ofmt_handle_t oh = (ofmt_handle_t)NULL;
+ ofmt_field_t *fields;
+ boolean_t r_enabled = rd->r_flags & ILB_FLAGS_RULE_ENABLED;
+
+ if (larg->o_str == NULL) {
+ ilbadm_err(gettext("internal error"));
+ return (ILB_STATUS_GENERIC);
+ }
+
+ /*
+ * only print rules (enabled/dis-) we're asked to
+ * note: both LIST_**ABLED flags can be set at the same time,
+ * whereas a rule has one state only. therefore the complicated
+ * statement.
+ */
+ if (!((r_enabled && (larg->flags & ILBADM_LIST_ENABLED)) ||
+ (!r_enabled && (larg->flags & ILBADM_LIST_DISABLED))))
+ return (ILB_STATUS_OK);
+
+ if (larg->flags & ILBADM_LIST_PARSE)
+ oflags |= OFMT_PARSABLE;
+
+ if (larg->flags & ILBADM_LIST_FULL)
+ oflags |= OFMT_MULTILINE;
+
+ bzero(&ra, sizeof (ra));
+ ra.rd = rd;
+ ra.h = h;
+
+ if (oh == NULL) {
+ if (rd->r_vip.ia_af == AF_INET)
+ fields = rfields_v4;
+ else
+ fields = rfields_v6;
+
+ oerr = ofmt_open(larg->o_str, fields, oflags, ocols, &oh);
+ if (oerr != OFMT_SUCCESS) {
+ char e[80];
+
+ ilbadm_err(gettext("ofmt_open failed: %s"),
+ ofmt_strerror(oh, oerr, e, sizeof (e)));
+ return (ILB_STATUS_GENERIC);
+ }
+ }
+
+ ofmt_print(oh, &ra);
+
+ return (ILB_STATUS_OK);
+}
+
+static char *full_list_rule_hdrs =
+ "RULENAME,STATUS,PORT,PROTOCOL,LBALG,TYPE,PROXY-SRC,PMASK,"
+ "HC-NAME,HC-PORT,CONN-DRAIN,NAT-TIMEOUT,"
+ "PERSIST-TIMEOUT,SERVERGROUP,VIP,SERVERS";
+static char *def_list_rule_hdrs =
+ "RULENAME,STATUS,LBALG,TYPE,PROTOCOL,VIP,PORT";
+
+/* ARGSUSED */
+ilbadm_status_t
+ilbadm_show_rules(int argc, char *argv[])
+{
+ ilb_handle_t h = ILB_INVALID_HANDLE;
+ int c;
+ ilb_status_t rclib = ILB_STATUS_OK;
+ ilbadm_status_t rc = ILBADM_OK;
+ boolean_t o_opt = B_FALSE, p_opt = B_FALSE;
+ boolean_t f_opt = B_FALSE;
+ ilbadm_sh_rl_arg_t larg = {0, NULL, NULL, NULL};
+
+ larg.flags = ILBADM_LIST_ENABLED | ILBADM_LIST_DISABLED;
+ while ((c = getopt(argc, argv, ":fpedo:")) != -1) {
+ switch ((char)c) {
+ case 'f': larg.flags |= ILBADM_LIST_FULL;
+ larg.o_str = full_list_rule_hdrs;
+ f_opt = B_TRUE;
+ break;
+ case 'p': larg.flags |= ILBADM_LIST_PARSE;
+ p_opt = B_TRUE;
+ break;
+ case 'o': larg.o_str = optarg;
+ o_opt = B_TRUE;
+ break;
+ /* -e and -d may be repeated - make sure the last one wins */
+ case 'e': larg.flags &= ILBADM_LIST_NODISABLED;
+ larg.flags |= ILBADM_LIST_ENABLED;
+ break;
+ case 'd': larg.flags &= ILBADM_LIST_NOENABLED;
+ larg.flags |= ILBADM_LIST_DISABLED;
+ break;
+ case ':': ilbadm_err(gettext("missing option argument for %c"),
+ (char)optopt);
+ rc = ILBADM_LIBERR;
+ goto out;
+ /* not reached */
+ break;
+ case '?':
+ default:
+ unknown_opt(argv, optind-1);
+ /* not reached */
+ break;
+ }
+ }
+
+ if (f_opt && o_opt) {
+ ilbadm_err(gettext("options -o and -f are mutually"
+ " exclusive"));
+ exit(1);
+ }
+
+ if (p_opt && !o_opt) {
+ ilbadm_err(gettext("option -p requires -o"));
+ exit(1);
+ }
+
+ if (p_opt && larg.o_str != NULL &&
+ (strcasecmp(larg.o_str, "all") == 0)) {
+ ilbadm_err(gettext("option -p requires explicit field"
+ " names for -o"));
+ exit(1);
+ }
+
+ /* no -o option, so we use std. fields */
+ if (!o_opt && !f_opt)
+ larg.o_str = def_list_rule_hdrs;
+
+ rclib = ilb_open(&h);
+ if (rclib != ILB_STATUS_OK)
+ goto out;
+
+ if (optind >= argc) {
+ rclib = ilb_walk_rules(h, ilbadm_show_onerule, NULL,
+ (void*)&larg);
+ } else {
+ while (optind < argc) {
+ rclib = ilb_walk_rules(h, ilbadm_show_onerule,
+ argv[optind++], (void*)&larg);
+ if (rclib != ILB_STATUS_OK)
+ break;
+ }
+ }
+out:
+ if (h != ILB_INVALID_HANDLE)
+ (void) ilb_close(h);
+
+ if (rclib != ILB_STATUS_OK) {
+ /*
+ * The show function returns ILB_STATUS_GENERIC after printing
+ * out an error message. So we don't need to print it again.
+ */
+ if (rclib != ILB_STATUS_GENERIC)
+ ilbadm_err(ilb_errstr(rclib));
+ rc = ILBADM_LIBERR;
+ }
+ return (rc);
+}
+
+static boolean_t
+of_srv2str(ofmt_arg_t *of_arg, char *buf, uint_t bufsize)
+{
+ ilbadm_rl_srvlist_arg_t *larg =
+ (ilbadm_rl_srvlist_arg_t *)of_arg->ofmt_cbarg;
+ ilb_server_data_t *sd = larg->sd;
+ uint_t op = of_arg->ofmt_id;
+ boolean_t ret = B_TRUE;
+ ilbadm_status_t rc;
+
+ if (sd == NULL)
+ return (B_FALSE);
+
+ switch (op) {
+ case OF_SRV_ID:
+ (void) strlcpy(buf, sd->sd_srvID, bufsize);
+ break;
+ case OF_SRV_STATUS:
+ if (ILB_IS_SRV_ENABLED(sd->sd_flags))
+ buf[0] = 'E';
+ else
+ buf[0] = 'D';
+ buf[1] = '\0';
+ break;
+ case OF_SRV_RNAME:
+ (void) strlcpy(buf, larg->rd->r_name, bufsize);
+ break;
+ case OF_SRV_SGNAME:
+ (void) strlcpy(buf, larg->sgname, bufsize);
+ break;
+ case OF_SRV_HOSTNAME:
+ rc = ip2hostname(&sd->sd_addr, buf, bufsize);
+ if (rc != ILBADM_OK) {
+ buf[0] = '\0';
+ ret = B_FALSE;
+ }
+ break;
+ case OF_SRV_PORT:
+ ret = of_port2str(sd->sd_minport, sd->sd_maxport,
+ buf, bufsize);
+ break;
+ case OF_SRV_ADDR:
+ ip2str(&sd->sd_addr, buf, bufsize, V6_ADDRONLY);
+ break;
+ }
+
+ return (ret);
+}
+
+/* ARGSUSED */
+static ilb_status_t
+i_show_rl_srv(ilb_handle_t h, ilb_server_data_t *sd, const char *sgname,
+ void *arg)
+{
+ ilbadm_rl_srvlist_arg_t *larg = (ilbadm_rl_srvlist_arg_t *)arg;
+
+ larg->sd = sd;
+ ofmt_print(larg->oh, larg);
+ return (ILB_STATUS_OK);
+}
+
+/* ARGSUSED */
+/*
+ * Since this function is used by libilb routine ilb_walk_rules()
+ * it must return libilb errors
+ */
+ilb_status_t
+ilbadm_show_rl_servers(ilb_handle_t h, ilb_rule_data_t *rd, void *arg)
+{
+ ofmt_status_t oerr;
+ int oflags = 0;
+ int ocols = RMAXCOLS;
+ ofmt_field_t *fields;
+ static ofmt_handle_t oh = (ofmt_handle_t)NULL;
+ ilbadm_rl_srvlist_arg_t *larg = (ilbadm_rl_srvlist_arg_t *)arg;
+
+ /*
+ * in full mode, we currently re-open ofmt() for every rule; we use
+ * a variable number of lines, as we print one for every server
+ * attached to a rule.
+ */
+ if (larg->o_str == NULL) {
+ ilbadm_err(gettext("internal error"));
+ return (ILB_STATUS_GENERIC);
+ }
+
+ if (larg->flags & ILBADM_LIST_PARSE)
+ oflags |= OFMT_PARSABLE;
+
+ if (rd->r_vip.ia_af == AF_INET)
+ fields = ssfields_v4;
+ else
+ fields = ssfields_v6;
+
+ if (oh == NULL) {
+ oerr = ofmt_open(larg->o_str, fields, oflags, ocols, &oh);
+ if (oerr != OFMT_SUCCESS) {
+ char e[80];
+
+ ilbadm_err(gettext("ofmt_open failed: %s"),
+ ofmt_strerror(oh, oerr, e, sizeof (e)));
+ return (ILB_STATUS_GENERIC);
+ }
+ larg->oh = oh;
+ }
+
+ larg->rd = rd;
+ larg->sgname = rd->r_sgname;
+
+ return (ilb_walk_servers(h, i_show_rl_srv, rd->r_sgname, (void *)larg));
+}
+
+static char *def_show_srv_hdrs =
+ "SERVERID,ADDRESS,PORT,RULENAME,STATUS,SERVERGROUP";
+
+/* ARGSUSED */
+ilbadm_status_t
+ilbadm_show_server(int argc, char *argv[])
+{
+ ilb_handle_t h = ILB_INVALID_HANDLE;
+ int c;
+ ilb_status_t rclib = ILB_STATUS_OK;
+ ilbadm_status_t rc = ILBADM_OK;
+ boolean_t o_opt = B_FALSE, p_opt = B_FALSE;
+ ilbadm_rl_srvlist_arg_t larg;
+
+ bzero(&larg, sizeof (larg));
+ while ((c = getopt(argc, argv, ":po:")) != -1) {
+ switch ((char)c) {
+ case 'p': larg.flags |= ILBADM_LIST_PARSE;
+ p_opt = B_TRUE;
+ break;
+ case 'o': larg.o_str = optarg;
+ o_opt = B_TRUE;
+ break;
+ case ':': ilbadm_err(gettext("missing option argument for %c"),
+ (char)optopt);
+ rc = ILBADM_LIBERR;
+ goto out;
+ /* not reached */
+ break;
+ case '?':
+ default:
+ unknown_opt(argv, optind-1);
+ /* not reached */
+ break;
+ }
+ }
+
+ if (p_opt && !o_opt) {
+ ilbadm_err(gettext("option -p requires -o"));
+ exit(1);
+ }
+
+ if (p_opt && larg.o_str != NULL &&
+ (strcasecmp(larg.o_str, "all") == 0)) {
+ ilbadm_err(gettext("option -p requires explicit"
+ " field names for -o"));
+ exit(1);
+ }
+
+ /* no -o option, so we use default fields */
+ if (!o_opt)
+ larg.o_str = def_show_srv_hdrs;
+
+ rclib = ilb_open(&h);
+ if (rclib != ILB_STATUS_OK)
+ goto out;
+
+ if (optind >= argc) {
+ rclib = ilb_walk_rules(h, ilbadm_show_rl_servers, NULL,
+ (void*)&larg);
+ } else {
+ while (optind < argc) {
+ rclib = ilb_walk_rules(h, ilbadm_show_rl_servers,
+ argv[optind++], (void*)&larg);
+ if (rclib != ILB_STATUS_OK)
+ break;
+ }
+ }
+out:
+ if (h != ILB_INVALID_HANDLE)
+ (void) ilb_close(h);
+
+ if (rclib != ILB_STATUS_OK) {
+ /*
+ * The show function returns ILB_STATUS_GENERIC after printing
+ * out an error message. So we don't need to print it again.
+ */
+ if (rclib != ILB_STATUS_GENERIC)
+ ilbadm_err(ilb_errstr(rclib));
+ rc = ILBADM_LIBERR;
+ }
+ return (rc);
+}
+
+static ilbadm_status_t
+i_parse_rl_arg(char *arg, ilb_rule_data_t *rd, ilbadm_key_name_t *keylist)
+{
+ ilbadm_status_t rc;
+
+ rc = i_parse_optstring(arg, (void *) rd, keylist,
+ OPT_PORTS, NULL);
+ return (rc);
+}
+
+static void
+i_ilbadm_alloc_rule(ilb_rule_data_t **rdp)
+{
+ ilb_rule_data_t *rd;
+
+ *rdp = rd = (ilb_rule_data_t *)calloc(sizeof (*rd), 1);
+ if (rd == NULL)
+ return;
+ rd->r_proto = IPPROTO_TCP;
+}
+
+static void
+i_ilbadm_free_rule(ilb_rule_data_t *rd)
+{
+ free(rd);
+}
+
+/* ARGSUSED */
+ilbadm_status_t
+ilbadm_destroy_rule(int argc, char *argv[])
+{
+ ilb_handle_t h = ILB_INVALID_HANDLE;
+ ilbadm_status_t rc = ILBADM_OK;
+ ilb_status_t rclib = ILB_STATUS_OK;
+ boolean_t all_rules = B_FALSE;
+ int c, i;
+
+ while ((c = getopt(argc, argv, ":a")) != -1) {
+ switch ((char)c) {
+ case 'a':
+ all_rules = B_TRUE;
+ break;
+ case '?':
+ default:
+ unknown_opt(argv, optind-1);
+ /* not reached */
+ break;
+ }
+ }
+
+ if (optind >= argc && !all_rules) {
+ ilbadm_err(gettext("usage: delete-rule -a | name"));
+ return (ILBADM_LIBERR);
+ }
+
+ /* either "-a" or rulename, not both */
+ if (optind < argc && all_rules) {
+ rc = ILBADM_INVAL_ARGS;
+ goto out;
+ }
+
+ rclib = ilb_open(&h);
+ if (rclib != ILB_STATUS_OK)
+ goto out;
+
+ if (all_rules) {
+ rclib = ilb_destroy_rule(h, NULL);
+ goto out;
+ }
+
+ for (i = optind; i < argc && rclib == ILB_STATUS_OK; i++)
+ rclib = ilb_destroy_rule(h, argv[i]);
+
+out:
+ if (h != ILB_INVALID_HANDLE)
+ (void) ilb_close(h);
+
+ /* This prints the specific errors */
+ if (rclib != ILB_STATUS_OK) {
+ ilbadm_err(ilb_errstr(rclib));
+ rc = ILBADM_LIBERR;
+ }
+ /* This prints the generic errors */
+ if ((rc != ILBADM_OK) && (rc != ILBADM_LIBERR))
+ ilbadm_err(ilbadm_errstr(rc));
+ return (rc);
+}
+
+/* ARGSUSED */
+static ilbadm_status_t
+ilbadm_Xable_rule(int argc, char *argv[], ilbadm_cmd_t cmd)
+{
+ ilb_handle_t h = ILB_INVALID_HANDLE;
+ ilb_status_t rclib = ILB_STATUS_OK;
+ ilbadm_status_t rc = ILBADM_OK;
+ int i;
+
+ rclib = ilb_open(&h);
+ if (rclib != ILB_STATUS_OK)
+ goto out;
+ /*
+ * by default, en/disable-rule mean "all", and not using
+ * a rule name will cause this behaviour to kick in
+ */
+ if (argc < 2) {
+ if (cmd == cmd_enable_rule)
+ rclib = ilb_enable_rule(h, NULL);
+ else
+ rclib = ilb_disable_rule(h, NULL);
+ } else {
+
+ for (i = optind; i < argc && rc == ILBADM_OK; i++) {
+ if (cmd == cmd_enable_rule)
+ rclib = ilb_enable_rule(h, argv[i]);
+ else
+ rclib = ilb_disable_rule(h, argv[i]);
+ }
+ }
+out:
+ if (h != ILB_INVALID_HANDLE)
+ (void) ilb_close(h);
+
+ if (rclib != ILB_STATUS_OK) {
+ ilbadm_err(ilb_errstr(rclib));
+ rc = ILBADM_LIBERR;
+ }
+ return (rc);
+}
+
+ilbadm_status_t
+ilbadm_enable_rule(int argc, char *argv[])
+{
+
+ return (ilbadm_Xable_rule(argc, argv, cmd_enable_rule));
+}
+
+ilbadm_status_t
+ilbadm_disable_rule(int argc, char *argv[])
+{
+ return (ilbadm_Xable_rule(argc, argv, cmd_disable_rule));
+}
+
+/*
+ * parse and create a rule
+ */
+ilbadm_status_t
+ilbadm_create_rule(int argc, char *argv[])
+{
+ ilb_handle_t h = ILB_INVALID_HANDLE;
+ int c;
+ ilb_status_t rclib = ILB_STATUS_OK;
+ ilbadm_status_t rc = ILBADM_OK;
+ ilb_rule_data_t *rd;
+ boolean_t p_opt = B_FALSE;
+
+ i_ilbadm_alloc_rule(&rd);
+
+ while ((c = getopt(argc, argv, ":ei:m:o:t:h:p")) != -1) {
+ switch ((char)c) {
+ case 'e':
+ rd->r_flags |= ILB_FLAGS_RULE_ENABLED;
+ break;
+ case 'h':
+ /*
+ * Default value of of r_hcpflag means that if there
+ * is a port range, probe any port. If there is only
+ * one port, probe that port.
+ */
+ rd->r_hcpflag = ILB_HCI_PROBE_ANY;
+ rc = i_parse_rl_arg(optarg, rd, &rl_healthchk_keys[0]);
+ break;
+ case 'o':
+ rc = i_parse_rl_arg(optarg, rd, &rl_outgoing_keys[0]);
+ break;
+ case 'm':
+ rc = i_parse_rl_arg(optarg, rd, &rl_method_keys[0]);
+ break;
+ case 't':
+ rc = i_parse_rl_arg(optarg, rd, &rl_timer_keys[0]);
+ break;
+ case 'i':
+ rc = i_parse_rl_arg(optarg, rd, &rl_incoming_keys[0]);
+ break;
+ case 'p':
+ p_opt = B_TRUE;
+ break;
+ case ':':
+ ilbadm_err(gettext("missing option-argument"
+ " for %c"), (char)optopt);
+ rc = ILBADM_LIBERR;
+ break;
+ case '?':
+ default:
+ unknown_opt(argv, optind-1);
+ /* not reached */
+ break;
+
+ }
+ if (rc != ILBADM_OK)
+ goto out;
+ }
+
+ if (optind >= argc) {
+ ilbadm_err(gettext("missing mandatory arguments - please refer"
+ " to 'ilbadm create-rule' subcommand description in"
+ " ilbadm(1M)"));
+ rc = ILBADM_LIBERR;
+ goto out;
+
+ }
+
+ if (p_opt) {
+ /*
+ * if user hasn't specified a mask, apply default
+ */
+ if ((rd->r_flags & ILB_FLAGS_RULE_STICKY) == 0) {
+ char *maskstr;
+
+ switch (rd->r_vip.ia_af) {
+ case AF_INET:
+ maskstr = "32";
+ break;
+ case AF_INET6:
+ maskstr = "128";
+ break;
+ }
+ rc = ilbadm_set_netmask(maskstr, &rd->r_stickymask,
+ rd->r_vip.ia_af);
+ if (rc != ILBADM_OK) {
+ ilbadm_err(gettext("trouble seting default"
+ " persistence mask"));
+ rc = ILBADM_LIBERR;
+ goto out;
+ }
+ }
+ } else {
+ /* use of sticky mask currently mandates "-p" */
+ if ((rd->r_flags & ILB_FLAGS_RULE_STICKY) != 0) {
+ ilbadm_err(gettext("use of stickymask requires"
+ " -p option"));
+ rc = ILBADM_LIBERR;
+ goto out;
+ }
+ }
+
+ if (strlen(argv[optind]) > ILBD_NAMESZ -1) {
+ ilbadm_err(gettext("rule name %s is too long -"
+ " must not exceed %d chars"), argv[optind],
+ ILBD_NAMESZ - 1);
+ rc = ILBADM_LIBERR;
+ goto out;
+ }
+
+ (void) strlcpy(rd->r_name, argv[optind], sizeof (rd->r_name));
+
+ rc = i_check_rule_spec(rd);
+ if (rc != ILBADM_OK)
+ goto out;
+
+ rclib = ilb_open(&h);
+ if (rclib != ILB_STATUS_OK)
+ goto out;
+
+ rclib = ilb_create_rule(h, rd);
+
+out:
+ i_ilbadm_free_rule(rd);
+
+ if (h != ILB_INVALID_HANDLE)
+ (void) ilb_close(h);
+
+ if (rclib != ILB_STATUS_OK) {
+ ilbadm_err(ilb_errstr(rclib));
+ rc = ILBADM_LIBERR;
+ }
+ if ((rc != ILBADM_OK) && (rc != ILBADM_LIBERR))
+ ilbadm_err(ilbadm_errstr(rc));
+
+ return (rc);
+}
+
+/* ARGSUSED */
+
+/*
+ * Since this function is used by libilb function, ilb_walk_rules()
+ * it must return libilb errors
+ */
+static ilb_status_t
+ilbadm_export_rl(ilb_handle_t h, ilb_rule_data_t *rd, void *arg)
+{
+ char linebuf[128]; /* should be enough */
+ int sz = sizeof (linebuf);
+ FILE *fp = ((ilbadm_rl_exp_arg_t *)arg)->fp;
+ uint32_t conndrain, nat_timeout, sticky_timeout;
+
+ (void) fprintf(fp, "create-rule ");
+ if (rd->r_flags & ILB_FLAGS_RULE_ENABLED)
+ (void) fprintf(fp, "-e ");
+ if (rd->r_flags & ILB_FLAGS_RULE_STICKY)
+ (void) fprintf(fp, "-p ");
+
+ ip2str(&rd->r_vip, linebuf, sz, V6_ADDRONLY);
+ (void) fprintf(fp, "-i vip=%s,", linebuf);
+
+ (void) ports2str(ntohs(rd->r_minport), ntohs(rd->r_maxport),
+ linebuf, sz);
+ (void) fprintf(fp, "%s,", linebuf);
+
+ proto2str(rd->r_proto, linebuf, sz);
+ (void) fprintf(fp, "%s ", linebuf);
+
+ algo2str(rd->r_algo, linebuf, sz);
+ (void) fprintf(fp, "-m %s,", linebuf);
+
+ topo2str(rd->r_topo, linebuf, sz);
+ (void) fprintf(fp, "%s", linebuf);
+
+ if (rd->r_nat_src_start.ia_af != AF_UNSPEC) {
+ ip2str(&rd->r_nat_src_start, linebuf, sz, V6_ADDRONLY);
+ /* if the address is unspecified, skip it */
+ if (linebuf[0] != '\0') {
+ (void) fprintf(fp, ",proxy-src=%s", linebuf);
+ ip2str(&rd->r_nat_src_end, linebuf, sz, V6_ADDRONLY);
+ (void) fprintf(fp, "-%s", linebuf);
+ }
+ }
+
+ if (rd->r_flags & ILB_FLAGS_RULE_STICKY) {
+ (void) fprintf(fp, ",pmask=/%d",
+ ilbadm_mask_to_prefixlen(&rd->r_stickymask));
+ }
+
+ (void) fprintf(fp, " ");
+
+ if (*rd->r_hcname != '\0') {
+ (void) fprintf(fp, "-h hc-name=%s", rd->r_hcname);
+ hcport_print(rd, linebuf, sizeof (linebuf));
+
+ if (linebuf[0] != '\0')
+ (void) fprintf(fp, ",hc-port=%s", linebuf);
+ (void) fprintf(fp, " ");
+ }
+
+ conndrain = rd->r_conndrain;
+ nat_timeout = rd->r_nat_timeout;
+ sticky_timeout = rd->r_sticky_timeout;
+ if (conndrain != 0 || nat_timeout != 0 || sticky_timeout != 0) {
+ int cnt = 0;
+
+ (void) fprintf(fp, "-t ");
+ if (conndrain != 0) {
+ cnt++;
+ (void) fprintf(fp, "conn-drain=%u", conndrain);
+ }
+ if (nat_timeout != 0) {
+ if (cnt > 0)
+ (void) fprintf(fp, ",");
+ cnt++;
+ (void) fprintf(fp, "nat-timeout=%u", nat_timeout);
+ }
+ if (sticky_timeout != 0) {
+ if (cnt > 0)
+ (void) fprintf(fp, ",");
+ (void) fprintf(fp, "persist-timeout=%u",
+ sticky_timeout);
+ }
+ (void) fprintf(fp, " ");
+ }
+
+ if (fprintf(fp, "-o servergroup=%s %s\n", rd->r_sgname, rd->r_name)
+ < 0 || fflush(fp) == EOF)
+ return (ILB_STATUS_WRITE);
+
+ return (ILB_STATUS_OK);
+}
+
+ilbadm_status_t
+ilbadm_export_rules(ilb_handle_t h, FILE *fp)
+{
+ ilb_status_t rclib;
+ ilbadm_status_t rc = ILBADM_OK;
+ ilbadm_rl_exp_arg_t arg;
+
+ arg.fp = fp;
+
+ rclib = ilb_walk_rules(h, ilbadm_export_rl, NULL, (void *)&arg);
+ if (rclib != ILB_STATUS_OK)
+ rc = ILBADM_LIBERR;
+ return (rc);
+}
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_sg.c b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_sg.c
new file mode 100644
index 0000000000..d7009bf811
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_sg.c
@@ -0,0 +1,837 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <stddef.h>
+#include <assert.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sys/list.h>
+#include <ofmt.h>
+#include <libilb.h>
+#include "ilbadm.h"
+
+static ilbadm_key_name_t servrange_keys[] = {
+ {ILB_KEY_SERVER, "server", "servers"},
+ {ILB_KEY_SERVRANGE, "server", "servers"},
+ {ILB_KEY_BAD, "", ""}
+};
+
+static ilbadm_key_name_t serverID_keys[] = {
+ {ILB_KEY_SERVERID, "server", ""},
+ {ILB_KEY_BAD, "", ""}
+};
+
+typedef struct sg_export_arg {
+ FILE *fp;
+ ilbadm_sgroup_t *sg;
+} sg_export_arg_t;
+
+typedef struct arg_struct {
+ int flags;
+ char *o_str;
+ ofmt_field_t *o_fields;
+ ofmt_handle_t oh;
+} list_arg_t;
+
+typedef struct sg_srv_o_struct {
+ char *sgname;
+ ilb_server_data_t *sd;
+} sg_srv_o_arg_t;
+
+static ofmt_cb_t of_sgname;
+static ofmt_cb_t of_srvID;
+static ofmt_cb_t of_port;
+static ofmt_cb_t of_ip;
+
+static ofmt_field_t sgfields_v4[] = {
+ {"SGNAME", ILB_SGNAME_SZ, 0, of_sgname},
+ {"SERVERID", ILB_NAMESZ, 0, of_srvID},
+ {"MINPORT", 8, 0, of_port},
+ {"MAXPORT", 8, 1, of_port},
+ {"IP_ADDRESS", 15, 0, of_ip},
+ {NULL, 0, 0, NULL}
+};
+static ofmt_field_t sgfields_v6[] = {
+ {"SGNAME", ILB_SGNAME_SZ, 0, of_sgname},
+ {"SERVERID", ILB_NAMESZ, 0, of_srvID},
+ {"MINPORT", 8, 0, of_port},
+ {"MAXPORT", 8, 1, of_port},
+ {"IP_ADDRESS", 39, 0, of_ip},
+ {NULL, 0, 0, NULL}
+};
+
+#define MAXCOLS 80 /* make flexible? */
+
+extern int optind, optopt, opterr;
+extern char *optarg;
+
+static boolean_t
+of_sgname(ofmt_arg_t *of_arg, char *buf, uint_t bufsize)
+{
+ sg_srv_o_arg_t *l = (sg_srv_o_arg_t *)of_arg->ofmt_cbarg;
+
+ (void) strlcpy(buf, l->sgname, bufsize);
+ return (B_TRUE);
+}
+
+static boolean_t
+of_srvID(ofmt_arg_t *of_arg, char *buf, uint_t bufsize)
+{
+ sg_srv_o_arg_t *l = (sg_srv_o_arg_t *)of_arg->ofmt_cbarg;
+
+ (void) strlcpy(buf, l->sd->sd_srvID, bufsize);
+ return (B_TRUE);
+}
+
+static boolean_t
+of_port(ofmt_arg_t *of_arg, char *buf, uint_t bufsize)
+{
+ sg_srv_o_arg_t *l = (sg_srv_o_arg_t *)of_arg->ofmt_cbarg;
+ int port;
+
+ if (of_arg->ofmt_id == 0) {
+ port = ntohs(l->sd->sd_minport);
+ if (port == 0)
+ *buf = '\0';
+ else
+ (void) snprintf(buf, bufsize, "%d", port);
+ } else {
+ port = ntohs(l->sd->sd_maxport);
+ if (port == 0)
+ *buf = '\0';
+ else
+ (void) snprintf(buf, bufsize, "%d", port);
+ }
+ return (B_TRUE);
+}
+
+static boolean_t
+of_ip(ofmt_arg_t *of_arg, char *buf, uint_t bufsize)
+{
+ sg_srv_o_arg_t *l = (sg_srv_o_arg_t *)of_arg->ofmt_cbarg;
+
+ ip2str(&l->sd->sd_addr, buf, bufsize, V6_ADDRONLY);
+ return (B_TRUE);
+}
+
+ilbadm_status_t
+i_list_sg_srv_ofmt(char *sgname, ilb_server_data_t *sd, void *arg)
+{
+ list_arg_t *larg = (list_arg_t *)arg;
+ sg_srv_o_arg_t line_arg;
+
+ line_arg.sgname = sgname;
+ line_arg.sd = sd;
+ ofmt_print(larg->oh, &line_arg);
+ return (ILBADM_OK);
+}
+
+/*
+ * This function is always called via ilb_walk_servergroups()
+ * and so must return libilb errors.
+ * That's why we need to retain currently unused "h" argument
+ */
+/* ARGSUSED */
+static ilb_status_t
+ilbadm_list_sg_srv(ilb_handle_t h, ilb_server_data_t *sd, const char *sgname,
+ void *arg)
+{
+ char ip_str[2*INET6_ADDRSTRLEN + 3] = "";
+ char port_str[INET6_ADDRSTRLEN];
+ list_arg_t *larg = (list_arg_t *)arg;
+ ofmt_status_t oerr;
+ int oflags = 0;
+ int ocols = MAXCOLS;
+ int h_minport, h_maxport;
+ static ofmt_handle_t oh = (ofmt_handle_t)NULL;
+ ofmt_field_t *ofp;
+
+ if (larg->o_str != NULL) {
+ if (oh == NULL) {
+ if (sd->sd_addr.ia_af == AF_INET)
+ ofp = sgfields_v6;
+ else
+ ofp = sgfields_v4;
+
+ if (larg->flags & ILBADM_LIST_PARSE)
+ oflags |= OFMT_PARSABLE;
+
+ oerr = ofmt_open(larg->o_str, ofp, oflags, ocols, &oh);
+ if (oerr != OFMT_SUCCESS) {
+ char e[80];
+
+ ilbadm_err(gettext("ofmt_open failed: %s"),
+ ofmt_strerror(oh, oerr, e, sizeof (e)));
+ return (ILB_STATUS_GENERIC);
+ }
+ larg->oh = oh;
+ }
+
+
+ (void) i_list_sg_srv_ofmt((char *)sgname, sd, arg);
+ return (ILB_STATUS_OK);
+ }
+
+ ip2str(&sd->sd_addr, ip_str, sizeof (ip_str), 0);
+
+ h_minport = ntohs(sd->sd_minport);
+ h_maxport = ntohs(sd->sd_maxport);
+ if (h_minport == 0)
+ *port_str = '\0';
+ else if (h_maxport > h_minport)
+ (void) sprintf(port_str, ":%d-%d", h_minport, h_maxport);
+ else
+ (void) sprintf(port_str, ":%d", h_minport);
+
+ (void) printf("%s: id:%s %s%s\n", sgname,
+ sd->sd_srvID?sd->sd_srvID:"(null)", ip_str, port_str);
+ return (ILB_STATUS_OK);
+}
+
+ilb_status_t
+ilbadm_list_sg(ilb_handle_t h, ilb_sg_data_t *sg, void *arg)
+{
+ if (sg->sgd_srvcount == 0) {
+ ilb_server_data_t tmp_srv;
+
+ bzero(&tmp_srv, sizeof (tmp_srv));
+ return (ilbadm_list_sg_srv(h, &tmp_srv, sg->sgd_name, arg));
+ }
+
+ return (ilb_walk_servers(h, ilbadm_list_sg_srv, sg->sgd_name, arg));
+}
+
+static char *def_fields = "SGNAME,SERVERID,MINPORT,MAXPORT,IP_ADDRESS";
+
+/* ARGSUSED */
+ilbadm_status_t
+ilbadm_show_servergroups(int argc, char *argv[])
+{
+ ilb_handle_t h = ILB_INVALID_HANDLE;
+ ilb_status_t rclib = ILB_STATUS_OK;
+ ilbadm_status_t rc = ILBADM_OK;
+ int c;
+ char optstr[] = ":po:";
+
+ boolean_t o_opt = B_FALSE, p_opt = B_FALSE;
+ list_arg_t larg = {0, def_fields, NULL, NULL};
+
+ while ((c = getopt(argc, argv, optstr)) != -1) {
+ switch ((char)c) {
+ case 'p': p_opt = B_TRUE;
+ larg.flags |= ILBADM_LIST_PARSE;
+ break;
+ case 'o': larg.o_str = optarg;
+ o_opt = B_TRUE;
+ break;
+ case ':': ilbadm_err(gettext("missing option argument"
+ " for %c"), (char)optopt);
+ rc = ILBADM_LIBERR;
+ goto out;
+ /* not reached */
+ break;
+ default: unknown_opt(argv, optind-1);
+ /* not reached */
+ break;
+ }
+ }
+
+ if (p_opt && !o_opt) {
+ ilbadm_err(gettext("option -p requires -o"));
+ exit(1);
+ }
+
+ if (p_opt && larg.o_str != NULL &&
+ (strcasecmp(larg.o_str, "all") == 0)) {
+ ilbadm_err(gettext("option -p requires explicit field"
+ " names for -o"));
+ exit(1);
+ }
+
+ rclib = ilb_open(&h);
+ if (rclib != ILB_STATUS_OK)
+ goto out;
+
+ if (optind >= argc) {
+ rclib = ilb_walk_servergroups(h, ilbadm_list_sg, NULL,
+ (void*)&larg);
+ if (rclib != ILB_STATUS_OK)
+ rc = ILBADM_LIBERR;
+ } else {
+ while (optind < argc) {
+ rclib = ilb_walk_servergroups(h, ilbadm_list_sg,
+ argv[optind++], (void*)&larg);
+ if (rclib != ILB_STATUS_OK) {
+ rc = ILBADM_LIBERR;
+ break;
+ }
+ }
+ }
+
+ if (larg.oh != NULL)
+ ofmt_close(larg.oh);
+out:
+ if (h != ILB_INVALID_HANDLE)
+ (void) ilb_close(h);
+
+ if (rclib != ILB_STATUS_OK) {
+ /*
+ * The show function returns ILB_STATUS_GENERIC after printing
+ * out an error message. So we don't need to print it again.
+ */
+ if (rclib != ILB_STATUS_GENERIC)
+ ilbadm_err(ilb_errstr(rclib));
+ rc = ILBADM_LIBERR;
+ }
+
+ return (rc);
+}
+
+ilbadm_servnode_t *
+i_new_sg_elem(ilbadm_sgroup_t *sgp)
+{
+ ilbadm_servnode_t *s;
+
+ s = (ilbadm_servnode_t *)calloc(sizeof (*s), 1);
+ if (s != NULL) {
+ list_insert_tail(&sgp->sg_serv_list, s);
+ sgp->sg_count++;
+ }
+ return (s);
+}
+
+static ilbadm_status_t
+i_parse_servrange_list(char *arg, ilbadm_sgroup_t *sgp)
+{
+ ilbadm_status_t rc;
+ int count;
+
+ rc = i_parse_optstring(arg, (void *) sgp, servrange_keys,
+ OPT_VALUE_LIST|OPT_IP_RANGE|OPT_PORTS, &count);
+ return (rc);
+}
+
+static ilbadm_status_t
+i_parse_serverIDs(char *arg, ilbadm_sgroup_t *sgp)
+{
+ ilbadm_status_t rc;
+ int count;
+
+ rc = i_parse_optstring(arg, (void *) sgp, serverID_keys,
+ OPT_VALUE_LIST|OPT_PORTS, &count);
+ return (rc);
+}
+
+static ilbadm_status_t
+i_mod_sg(ilb_handle_t h, ilbadm_sgroup_t *sgp, ilbadm_cmd_t cmd,
+ int flags)
+{
+ ilbadm_servnode_t *sn;
+ ilb_server_data_t *srv;
+ ilb_status_t rclib = ILB_STATUS_OK;
+ ilbadm_status_t rc = ILBADM_OK;
+
+ if (h == ILB_INVALID_HANDLE && cmd != cmd_enable_server &&
+ cmd != cmd_disable_server)
+ return (ILBADM_LIBERR);
+
+ sn = list_head(&sgp->sg_serv_list);
+ while (sn != NULL) {
+ srv = &sn->s_spec;
+
+ srv->sd_flags |= flags;
+ if (cmd == cmd_create_sg || cmd == cmd_add_srv) {
+ rclib = ilb_add_server_to_group(h, sgp->sg_name,
+ srv);
+ if (rclib != ILB_STATUS_OK) {
+ char buf[INET6_ADDRSTRLEN + 1];
+
+ rc = ILBADM_LIBERR;
+ ip2str(&srv->sd_addr, buf, sizeof (buf),
+ V6_ADDRONLY);
+ ilbadm_err(gettext("cannot add %s to %s: %s"),
+ buf, sgp->sg_name, ilb_errstr(rclib));
+ /* if we created the SG, we bail out */
+ if (cmd == cmd_create_sg)
+ return (rc);
+ }
+ } else {
+ assert(cmd == cmd_rem_srv);
+ rclib = ilb_rem_server_from_group(h, sgp->sg_name,
+ srv);
+ /* if we fail, we tell user and continue */
+ if (rclib != ILB_STATUS_OK) {
+ rc = ILBADM_LIBERR;
+ ilbadm_err(
+ gettext("cannot remove %s from %s: %s"),
+ srv->sd_srvID, sgp->sg_name,
+ ilb_errstr(rclib));
+ }
+ }
+
+ /*
+ * list_next returns NULL instead of cycling back to head
+ * so we don't have to check for list_head explicitly.
+ */
+ sn = list_next(&sgp->sg_serv_list, sn);
+ };
+
+ return (rc);
+}
+
+static void
+i_ilbadm_alloc_sgroup(ilbadm_sgroup_t **sgp)
+{
+ ilbadm_sgroup_t *sg;
+
+ *sgp = sg = (ilbadm_sgroup_t *)calloc(sizeof (*sg), 1);
+ if (sg == NULL)
+ return;
+ list_create(&sg->sg_serv_list, sizeof (ilbadm_servnode_t),
+ offsetof(ilbadm_servnode_t, s_link));
+}
+
+static void
+i_ilbadm_free_sgroup(ilbadm_sgroup_t *sg)
+{
+ ilbadm_servnode_t *s;
+
+ while ((s = list_remove_head(&sg->sg_serv_list)) != NULL)
+ free(s);
+
+ list_destroy(&sg->sg_serv_list);
+}
+
+ilbadm_status_t
+ilbadm_create_servergroup(int argc, char *argv[])
+{
+ ilb_handle_t h = ILB_INVALID_HANDLE;
+ ilb_status_t rclib = ILB_STATUS_OK;
+ ilbadm_status_t rc = ILBADM_OK;
+ ilbadm_sgroup_t *sg;
+ int c;
+ int flags = 0;
+
+ i_ilbadm_alloc_sgroup(&sg);
+
+ while ((c = getopt(argc, argv, ":s:")) != -1) {
+ switch ((char)c) {
+ case 's':
+ rc = i_parse_servrange_list(optarg, sg);
+ break;
+ case ':':
+ ilbadm_err(gettext("missing option-argument for"
+ " %c"), (char)optopt);
+ rc = ILBADM_LIBERR;
+ break;
+ case '?':
+ default:
+ unknown_opt(argv, optind-1);
+ /* not reached */
+ break;
+ }
+
+ if (rc != ILBADM_OK)
+ goto out;
+ }
+
+ if (optind >= argc) {
+ ilbadm_err(gettext("missing mandatory arguments - please refer"
+ " to 'create-servergroup' subcommand"
+ " description in ilbadm(1M)"));
+ rc = ILBADM_LIBERR;
+ goto out;
+ }
+
+ if (strlen(argv[optind]) > ILB_SGNAME_SZ - 1) {
+ ilbadm_err(gettext("servergroup name %s is too long -"
+ " must not exceed %d chars"), argv[optind],
+ ILB_SGNAME_SZ - 1);
+ rc = ILBADM_LIBERR;
+ goto out;
+ }
+
+ sg->sg_name = argv[optind];
+
+ rclib = ilb_open(&h);
+ if (rclib != ILB_STATUS_OK)
+ goto out;
+
+ rclib = ilb_create_servergroup(h, sg->sg_name);
+ if (rclib != ILB_STATUS_OK)
+ goto out;
+
+ /* we create a servergroup with all servers enabled */
+ ILB_SET_ENABLED(flags);
+ rc = i_mod_sg(h, sg, cmd_create_sg, flags);
+
+ if (rc != ILBADM_OK)
+ (void) ilb_destroy_servergroup(h, sg->sg_name);
+
+out:
+ i_ilbadm_free_sgroup(sg);
+ if (h != ILB_INVALID_HANDLE)
+ (void) ilb_close(h);
+
+ if (rclib != ILB_STATUS_OK) {
+ ilbadm_err(ilb_errstr(rclib));
+ rc = ILBADM_LIBERR;
+ }
+ if ((rc != ILBADM_OK) && (rc != ILBADM_LIBERR))
+ ilbadm_err(ilbadm_errstr(rc));
+
+ return (rc);
+}
+
+ilbadm_status_t
+ilbadm_add_server_to_group(int argc, char **argv)
+{
+ ilb_handle_t h = ILB_INVALID_HANDLE;
+ ilb_status_t rclib = ILB_STATUS_OK;
+ ilbadm_status_t rc = ILBADM_OK;
+ ilbadm_sgroup_t *sg;
+ int c;
+ int flags = 0;
+
+ i_ilbadm_alloc_sgroup(&sg);
+
+ while ((c = getopt(argc, argv, ":s:")) != -1) {
+ switch ((char)c) {
+ case 's':
+ rc = i_parse_servrange_list(optarg, sg);
+ break;
+ case ':':
+ ilbadm_err(gettext("missing option-argument for"
+ " %c"), (char)optopt);
+ rc = ILBADM_LIBERR;
+ break;
+ case '?':
+ default: unknown_opt(argv, optind-1);
+ /* not reached */
+ break;
+ }
+
+ if (rc != ILBADM_OK)
+ goto out;
+ }
+
+ if (optind >= argc) {
+ ilbadm_err(gettext("missing mandatory arguments - please refer"
+ " to 'add-server' subcommand description in ilbadm(1M)"));
+ rc = ILBADM_LIBERR;
+ goto out;
+ }
+
+ sg->sg_name = argv[optind];
+
+ rclib = ilb_open(&h);
+ if (rclib != ILB_STATUS_OK)
+ goto out;
+
+ /* A server is added enabled */
+ ILB_SET_ENABLED(flags);
+ rc = i_mod_sg(h, sg, cmd_add_srv, flags);
+out:
+ i_ilbadm_free_sgroup(sg);
+ if (h != ILB_INVALID_HANDLE)
+ (void) ilb_close(h);
+
+ if ((rc != ILBADM_OK) && (rc != ILBADM_LIBERR))
+ ilbadm_err(ilbadm_errstr(rc));
+ return (rc);
+}
+
+/* ARGSUSED */
+static ilbadm_status_t
+ilbadm_Xable_server(int argc, char *argv[], ilbadm_cmd_t cmd)
+{
+ ilb_handle_t h = ILB_INVALID_HANDLE;
+ ilbadm_status_t rc = ILBADM_OK;
+ ilb_status_t rclib = ILB_STATUS_OK;
+ int i;
+
+ if (argc < 2) {
+ ilbadm_err(gettext("missing required argument"
+ " (server specification)"));
+ rc = ILBADM_LIBERR;
+ goto out;
+ }
+
+ rclib = ilb_open(&h);
+ if (rclib != ILB_STATUS_OK)
+ goto out;
+
+ /* enable-server and disable-server only accepts serverids */
+ for (i = 1; i < argc && rclib == ILB_STATUS_OK; i++) {
+ ilb_server_data_t srv;
+
+ if (argv[i][0] != ILB_SRVID_PREFIX) {
+ rc = ILBADM_INVAL_SRVID;
+ goto out;
+ }
+
+ bzero(&srv, sizeof (srv));
+ /* to do: check length */
+ (void) strlcpy(srv.sd_srvID, argv[i], sizeof (srv.sd_srvID));
+ switch (cmd) {
+ case cmd_enable_server:
+ rclib = ilb_enable_server(h, &srv, NULL);
+ break;
+ case cmd_disable_server:
+ rclib = ilb_disable_server(h, &srv, NULL);
+ break;
+ }
+
+ /* if we can't find a given server ID, just plough on */
+ if (rclib == ILB_STATUS_ENOENT) {
+ const char *msg = ilb_errstr(rclib);
+
+ rc = ILBADM_LIBERR;
+ ilbadm_err("%s: %s", msg, argv[i]);
+ rclib = ILB_STATUS_OK;
+ continue;
+ }
+ if (rclib != ILB_STATUS_OK)
+ break;
+ }
+out:
+ if (h != ILB_INVALID_HANDLE)
+ (void) ilb_close(h);
+
+ if (rclib != ILB_STATUS_OK) {
+ ilbadm_err(ilb_errstr(rclib));
+ rc = ILBADM_LIBERR;
+ }
+
+ if ((rc != ILBADM_OK) && (rc != ILBADM_LIBERR))
+ ilbadm_err(ilbadm_errstr(rc));
+ return (rc);
+}
+
+ilbadm_status_t
+ilbadm_disable_server(int argc, char *argv[])
+{
+ return (ilbadm_Xable_server(argc, argv, cmd_disable_server));
+}
+
+ilbadm_status_t
+ilbadm_enable_server(int argc, char *argv[])
+{
+ return (ilbadm_Xable_server(argc, argv, cmd_enable_server));
+}
+
+/* ARGSUSED */
+ilbadm_status_t
+ilbadm_rem_server_from_group(int argc, char *argv[])
+{
+ ilb_handle_t h = ILB_INVALID_HANDLE;
+ ilb_status_t rclib = ILB_STATUS_OK;
+ ilbadm_status_t rc = ILBADM_OK;
+ ilbadm_sgroup_t *sg;
+ int c;
+
+ i_ilbadm_alloc_sgroup(&sg);
+
+ while ((c = getopt(argc, argv, ":s:")) != -1) {
+ switch ((char)c) {
+ case 's':
+ rc = i_parse_serverIDs(optarg, sg);
+ break;
+ case ':':
+ ilbadm_err(gettext("missing option-argument for"
+ " %c"), (char)optopt);
+ rc = ILBADM_LIBERR;
+ break;
+ case '?':
+ default: unknown_opt(argv, optind-1);
+ /* not reached */
+ break;
+ }
+ if (rc != ILBADM_OK)
+ goto out;
+ }
+
+ /* we need servergroup name and at least one serverID to remove */
+ if (optind >= argc || sg->sg_count == 0) {
+ rc = ILBADM_ENOOPTION;
+ goto out;
+ }
+
+ sg->sg_name = argv[optind];
+
+ rclib = ilb_open(&h);
+ if (rclib != ILB_STATUS_OK)
+ goto out;
+
+ rc = i_mod_sg(h, sg, cmd_rem_srv, 0);
+out:
+ i_ilbadm_free_sgroup(sg);
+
+ if (h != ILB_INVALID_HANDLE)
+ (void) ilb_close(h);
+ if ((rc != ILBADM_OK) && (rc != ILBADM_LIBERR))
+ ilbadm_err(ilbadm_errstr(rc));
+ return (rc);
+}
+
+ilbadm_status_t
+ilbadm_destroy_servergroup(int argc, char *argv[])
+{
+ ilb_handle_t h = ILB_INVALID_HANDLE;
+ ilb_status_t rclib = ILB_STATUS_OK;
+ ilbadm_status_t rc = ILBADM_OK;
+ char *sgname;
+
+ if (argc != 2) {
+ ilbadm_err(gettext("usage:ilbadm"
+ " delete-servergroup groupname"));
+ rc = ILBADM_LIBERR;
+ goto out;
+ }
+
+ sgname = argv[1];
+
+ rclib = ilb_open(&h);
+ if (rclib != ILB_STATUS_OK)
+ goto out;
+
+ rclib = ilb_destroy_servergroup(h, sgname);
+out:
+ if (h != ILB_INVALID_HANDLE)
+ (void) ilb_close(h);
+
+ if (rclib != ILB_STATUS_OK) {
+ ilbadm_err(ilb_errstr(rclib));
+ rc = ILBADM_LIBERR;
+ }
+
+ return (rc);
+}
+
+#define BUFSZ 1024
+
+static int
+export_srv_spec(ilb_server_data_t *srv, char *buf, const int bufsize)
+{
+ int len = 0, bufsz = (int)bufsize;
+
+ ip2str(&srv->sd_addr, buf, bufsz, 0);
+
+ len += strlen(buf);
+ bufsz -= len;
+
+ if (srv->sd_minport != 0) {
+ in_port_t h_min, h_max;
+ int inc;
+
+ h_min = ntohs(srv->sd_minport);
+ h_max = ntohs(srv->sd_maxport);
+
+ /* to do: if service name was given, print that, not number */
+ if (h_max <= h_min)
+ inc = snprintf(buf+len, bufsz, ":%d", h_min);
+ else
+ inc = snprintf(buf+len, bufsz, ":%d-%d", h_min, h_max);
+
+ if (inc > bufsz) /* too little space */
+ return (-1);
+ len += inc;
+ }
+
+ return (len);
+}
+
+
+/*
+ * this is called by ilb_walk_servers(), therefore we return ilb_status_t
+ * not ilbadm_status, and retain an unused function argument
+ */
+/* ARGSUSED */
+ilb_status_t
+ilbadm_export_a_srv(ilb_handle_t h, ilb_server_data_t *srv, const char *sgname,
+ void *arg)
+{
+ sg_export_arg_t *larg = (sg_export_arg_t *)arg;
+ FILE *fp = larg->fp;
+ char linebuf[BUFSZ]; /* XXXms make that dynamic */
+ int sz = BUFSZ;
+
+ if (export_srv_spec(srv, linebuf, sz) == -1)
+ return (ILB_STATUS_OK);
+
+ (void) fprintf(fp, "add-server -s server=");
+
+ (void) fprintf(fp, "%s %s\n", linebuf, sgname);
+ return (ILB_STATUS_OK);
+}
+
+ilb_status_t
+ilbadm_export_sg(ilb_handle_t h, ilb_sg_data_t *sg, void *arg)
+{
+ ilb_status_t rc = ILB_STATUS_OK;
+ sg_export_arg_t *larg = (sg_export_arg_t *)arg;
+ FILE *fp = larg->fp;
+
+ (void) fprintf(fp, "create-servergroup %s\n", sg->sgd_name);
+ if (sg->sgd_srvcount == 0)
+ return (ILB_STATUS_OK);
+
+ rc = ilb_walk_servers(h, ilbadm_export_a_srv, sg->sgd_name, arg);
+ if (rc != ILB_STATUS_OK)
+ goto out;
+
+ if (fflush(fp) == EOF)
+ rc = ILB_STATUS_WRITE;
+
+out:
+ return (rc);
+}
+
+ilbadm_status_t
+ilbadm_export_servergroups(ilb_handle_t h, FILE *fp)
+{
+ ilb_status_t rclib = ILB_STATUS_OK;
+ ilbadm_status_t rc = ILBADM_OK;
+ sg_export_arg_t arg;
+
+ arg.fp = fp;
+ arg.sg = NULL;
+
+ rclib = ilb_walk_servergroups(h, ilbadm_export_sg, NULL, (void *)&arg);
+ if (rclib != ILB_STATUS_OK) {
+ ilbadm_err(ilb_errstr(rclib));
+ rc = ILBADM_LIBERR;
+ }
+
+ return (rc);
+}
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_stats.c b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_stats.c
new file mode 100644
index 0000000000..648ec1409e
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_stats.c
@@ -0,0 +1,1140 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/sysmacros.h>
+#include <sys/note.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <assert.h>
+#include <libgen.h>
+#include <kstat.h>
+#include <ofmt.h>
+#include <libilb.h>
+#include "ilbadm.h"
+
+#define ILBST_TIMESTAMP_HEADER 0x01 /* a timestamp w. every header */
+#define ILBST_DELTA_INTERVAL 0x02 /* delta over specified interval */
+#define ILBST_ABS_NUMBERS 0x04 /* print absolute numbers, no d's */
+#define ILBST_ITEMIZE 0x08 /* itemize */
+#define ILBST_VERBOSE 0x10 /* verbose error info */
+
+#define ILBST_OLD_VALUES 0x20 /* for internal processing */
+#define ILBST_RULES_CHANGED 0x40
+
+typedef struct {
+ char is_name[KSTAT_STRLEN];
+ uint64_t is_value;
+} ilbst_stat_t;
+
+static ilbst_stat_t rulestats[] = {
+ {"num_servers", 0},
+ {"bytes_not_processed", 0},
+ {"pkt_not_processed", 0},
+ {"bytes_dropped", 0},
+ {"pkt_dropped", 0},
+ {"nomem_bytes_dropped", 0},
+ {"nomem_pkt_dropped", 0},
+ {"noport_bytes_dropped", 0},
+ {"noport_pkt_dropped", 0},
+ {"icmp_echo_processed", 0},
+ {"icmp_dropped", 0},
+ {"icmp_too_big_processed", 0},
+ {"icmp_too_big_dropped", 0}
+};
+
+/* indices into array above, to avoid searching */
+#define RLSTA_NUM_SRV 0
+#define RLSTA_BYTES_U 1
+#define RLSTA_PKT_U 2
+#define RLSTA_BYTES_D 3
+#define RLSTA_PKT_D 4
+#define RLSTA_NOMEMBYTES_D 5
+#define RLSTA_NOMEMPKT_D 6
+#define RLSTA_NOPORTBYTES_D 7
+#define RLSTA_NOPORTPKT_D 8
+#define RLSTA_ICMP_P 9
+#define RLSTA_ICMP_D 10
+#define RLSTA_ICMP2BIG_P 11
+#define RLSTA_ICMP2BIG_D 12
+
+static ilbst_stat_t servstats[] = {
+ {"bytes_processed", 0},
+ {"pkt_processed", 0}
+};
+/* indices into array above, to avoid searching */
+#define SRVST_BYTES_P 0
+#define SRVST_PKT_P 1
+
+/* values used for of_* commands as id */
+#define ILBST_PKT_P 0
+#define ILBST_BYTES_P 1
+#define ILBST_PKT_U 2
+#define ILBST_BYTES_U 3
+#define ILBST_PKT_D 4
+#define ILBST_BYTES_D 5
+#define ILBST_ICMP_P 6
+#define ILBST_ICMP_D 7
+#define ILBST_ICMP2BIG_P 8
+#define ILBST_ICMP2BIG_D 9
+#define ILBST_NOMEMP_D 10
+#define ILBST_NOPORTP_D 11
+#define ILBST_NOMEMB_D 12
+#define ILBST_NOPORTB_D 13
+
+#define ILBST_ITEMIZE_SNAME 97
+#define ILBST_ITEMIZE_RNAME 98
+#define ILBST_TIMESTAMP 99
+
+/* approx field widths */
+#define ILBST_PKTCTR_W 8
+#define ILBST_BYTECTR_W 10
+#define ILBST_TIME_W 15
+
+static boolean_t of_rule_stats(ofmt_arg_t *, char *, uint_t);
+static boolean_t of_server_stats(ofmt_arg_t *, char *, uint_t);
+static boolean_t of_itemize_stats(ofmt_arg_t *, char *, uint_t);
+static boolean_t of_timestamp(ofmt_arg_t *, char *, uint_t);
+
+static ofmt_field_t stat_itemize_fields[] = {
+ {"RULENAME", ILB_NAMESZ, ILBST_ITEMIZE_RNAME, of_itemize_stats},
+ {"SERVERNAME", ILB_NAMESZ, ILBST_ITEMIZE_SNAME, of_itemize_stats},
+ {"PKT_P", ILBST_PKTCTR_W, ILBST_PKT_P, of_itemize_stats},
+ {"BYTES_P", ILBST_BYTECTR_W, ILBST_BYTES_P, of_itemize_stats},
+ {"TIME", ILBST_TIME_W, ILBST_TIMESTAMP, of_timestamp},
+ {NULL, 0, 0, NULL}
+};
+static ofmt_field_t stat_stdfields[] = {
+ {"PKT_P", ILBST_PKTCTR_W, ILBST_PKT_P, of_server_stats},
+ {"BYTES_P", ILBST_BYTECTR_W, ILBST_BYTES_P, of_server_stats},
+ {"PKT_U", ILBST_PKTCTR_W, ILBST_PKT_U, of_rule_stats},
+ {"BYTES_U", ILBST_BYTECTR_W, ILBST_BYTES_U, of_rule_stats},
+ {"PKT_D", ILBST_PKTCTR_W, ILBST_PKT_D, of_rule_stats},
+ {"BYTES_D", ILBST_BYTECTR_W, ILBST_BYTES_D, of_rule_stats},
+ {"ICMP_P", ILBST_PKTCTR_W, ILBST_ICMP_P, of_rule_stats},
+ {"ICMP_D", ILBST_PKTCTR_W, ILBST_ICMP_D, of_rule_stats},
+ {"ICMP2BIG_P", 11, ILBST_ICMP2BIG_P, of_rule_stats},
+ {"ICMP2BIG_D", 11, ILBST_ICMP2BIG_D, of_rule_stats},
+ {"NOMEMP_D", ILBST_PKTCTR_W, ILBST_NOMEMP_D, of_rule_stats},
+ {"NOPORTP_D", ILBST_PKTCTR_W, ILBST_NOPORTP_D, of_rule_stats},
+ {"NOMEMB_D", ILBST_PKTCTR_W, ILBST_NOMEMB_D, of_rule_stats},
+ {"NOPORTB_D", ILBST_PKTCTR_W, ILBST_NOPORTB_D, of_rule_stats},
+ {"TIME", ILBST_TIME_W, ILBST_TIMESTAMP, of_timestamp},
+ {NULL, 0, 0, NULL}
+};
+
+static char stat_stdhdrs[] = "PKT_P,BYTES_P,PKT_U,BYTES_U,PKT_D,BYTES_D";
+static char stat_stdv_hdrs[] = "PKT_P,BYTES_P,PKT_U,BYTES_U,PKT_D,BYTES_D,"
+ "ICMP_P,ICMP_D,ICMP2BIG_P,ICMP2BIG_D,NOMEMP_D,NOPORTP_D";
+static char stat_itemize_rule_hdrs[] = "SERVERNAME,PKT_P,BYTES_P";
+static char stat_itemize_server_hdrs[] = "RULENAME,PKT_P,BYTES_P";
+
+#define RSTAT_SZ (sizeof (rulestats)/sizeof (rulestats[0]))
+#define SSTAT_SZ (sizeof (servstats)/sizeof (servstats[0]))
+
+typedef struct {
+ char isd_servername[KSTAT_STRLEN]; /* serverID */
+ ilbst_stat_t isd_serverstats[SSTAT_SZ];
+ hrtime_t isd_crtime; /* save for comparison purpose */
+} ilbst_srv_desc_t;
+
+/*
+ * this data structure stores statistics for a rule - both an old set
+ * and a current/new set. we use pointers to the actual stores and switch
+ * the pointers for every round. old_is_old in ilbst_arg_t indicates
+ * which pointer points to the "old" data struct (ie, if true, _o pointer
+ * points to old)
+ */
+typedef struct {
+ char ird_rulename[KSTAT_STRLEN];
+ int ird_num_servers;
+ int ird_num_servers_o;
+ int ird_srv_ind;
+ hrtime_t ird_crtime; /* save for comparison */
+ hrtime_t ird_crtime_o; /* save for comparison */
+ ilbst_srv_desc_t *ird_srvlist;
+ ilbst_srv_desc_t *ird_srvlist_o;
+ ilbst_stat_t ird_rstats[RSTAT_SZ];
+ ilbst_stat_t ird_rstats_o[RSTAT_SZ];
+ ilbst_stat_t *ird_rulestats;
+ ilbst_stat_t *ird_rulestats_o;
+} ilbst_rule_desc_t;
+
+/*
+ * overall "container" for information pertaining to statistics, and
+ * how to display them.
+ */
+typedef struct {
+ int ilbst_flags;
+ /* fields representing user input */
+ char *ilbst_rulename; /* optional */
+ char *ilbst_server; /* optional */
+ int ilbst_interval;
+ int ilbst_count;
+ /* "internal" fields for data and data presentation */
+ ofmt_handle_t ilbst_oh;
+ boolean_t ilbst_old_is_old;
+ ilbst_rule_desc_t *ilbst_rlist;
+ int ilbst_rcount; /* current list count */
+ int ilbst_rcount_prev; /* prev (different) count */
+ int ilbst_rlist_sz; /* number of alloc'ed rules */
+ int ilbst_rule_index; /* for itemizes display */
+} ilbst_arg_t;
+
+/* ARGSUSED */
+static boolean_t
+of_timestamp(ofmt_arg_t *of_arg, char *buf, uint_t bufsize)
+{
+ time_t now;
+ struct tm *now_tm;
+
+ now = time(NULL);
+ now_tm = localtime(&now);
+
+ (void) strftime(buf, bufsize, "%F:%H.%M.%S", now_tm);
+ return (B_TRUE);
+}
+
+static boolean_t
+i_sum_per_rule_processed(ilbst_rule_desc_t *rp, uint64_t *resp, int index,
+ int flags)
+{
+ int i, num_servers;
+ ilbst_srv_desc_t *srv, *o_srv, *n_srv;
+ uint64_t res = 0;
+ boolean_t valid = B_TRUE;
+ boolean_t old = flags & ILBST_OLD_VALUES;
+ boolean_t check_valid;
+
+ /* if we do abs. numbers, we never look at the _o fields */
+ assert((old && (flags & ILBST_ABS_NUMBERS)) == B_FALSE);
+
+ /* we only check for validity under certain conditions */
+ check_valid = !(old || (flags & ILBST_ABS_NUMBERS));
+
+ if (check_valid && rp->ird_num_servers != rp->ird_num_servers_o)
+ valid = B_FALSE;
+
+ num_servers = old ? rp->ird_num_servers_o : rp->ird_num_servers;
+
+ for (i = 0; i < num_servers; i++) {
+ n_srv = &rp->ird_srvlist[i];
+ o_srv = &rp->ird_srvlist_o[i];
+
+ if (old)
+ srv = o_srv;
+ else
+ srv = n_srv;
+
+ res += srv->isd_serverstats[index].is_value;
+ /*
+ * if creation times don't match, comparison is wrong; if
+ * if we already know something is invalid, we don't
+ * need to compare again.
+ */
+ if (check_valid && valid == B_TRUE &&
+ o_srv->isd_crtime != n_srv->isd_crtime) {
+ valid = B_FALSE;
+ break;
+ }
+ }
+ /*
+ * save the result even though it may be imprecise - let the
+ * caller decide what to do
+ */
+ *resp = res;
+
+ return (valid);
+}
+
+typedef boolean_t (*sumfunc_t)(ilbst_rule_desc_t *, uint64_t *, int);
+
+static boolean_t
+i_sum_per_rule_pkt_p(ilbst_rule_desc_t *rp, uint64_t *resp, int flags)
+{
+ return (i_sum_per_rule_processed(rp, resp, SRVST_PKT_P, flags));
+}
+
+static boolean_t
+i_sum_per_rule_bytes_p(ilbst_rule_desc_t *rp, uint64_t *resp, int flags)
+{
+ return (i_sum_per_rule_processed(rp, resp, SRVST_BYTES_P, flags));
+}
+
+static boolean_t
+of_server_stats(ofmt_arg_t *of_arg, char *buf, uint_t bufsize)
+{
+ ilbst_arg_t *sta = (ilbst_arg_t *)of_arg->ofmt_cbarg;
+ uint64_t count = 0, val;
+ int i;
+ boolean_t valid = B_TRUE;
+ sumfunc_t sumfunc;
+
+ switch (of_arg->ofmt_id) {
+ case ILBST_PKT_P: sumfunc = i_sum_per_rule_pkt_p;
+ break;
+ case ILBST_BYTES_P: sumfunc = i_sum_per_rule_bytes_p;
+ break;
+ }
+
+ for (i = 0; i < sta->ilbst_rcount; i++) {
+ valid = sumfunc(&sta->ilbst_rlist[i], &val, sta->ilbst_flags);
+ if (!valid)
+ return (valid);
+ count += val;
+ }
+
+ if ((sta->ilbst_flags & ILBST_ABS_NUMBERS) != 0)
+ goto out;
+
+ for (i = 0; i < sta->ilbst_rcount; i++) {
+ (void) sumfunc(&sta->ilbst_rlist[i], &val,
+ sta->ilbst_flags | ILBST_OLD_VALUES);
+ count -= val;
+ }
+
+out:
+ /*
+ * normally, we print "change per second", which we calculate
+ * here. otherwise, we print "change over interval"
+ */
+ if ((sta->ilbst_flags & (ILBST_DELTA_INTERVAL|ILBST_ABS_NUMBERS)) == 0)
+ count /= sta->ilbst_interval;
+
+ (void) snprintf(buf, bufsize, "%llu", count);
+ return (B_TRUE);
+}
+
+/*
+ * this function is called when user wants itemized stats of every
+ * server for a named rule, or vice vera.
+ * i_do_print sets sta->rule_index and the proper ird_srv_ind so
+ * we don't have to differentiate between these two cases here.
+ */
+static boolean_t
+of_itemize_stats(ofmt_arg_t *of_arg, char *buf, uint_t bufsize)
+{
+ ilbst_arg_t *sta = (ilbst_arg_t *)of_arg->ofmt_cbarg;
+ int stat_ind;
+ uint64_t count;
+ int rule_index = sta->ilbst_rule_index;
+ int srv_ind = sta->ilbst_rlist[rule_index].ird_srv_ind;
+ boolean_t ret = B_TRUE;
+ ilbst_srv_desc_t *srv, *osrv;
+
+ srv = &sta->ilbst_rlist[rule_index].ird_srvlist[srv_ind];
+
+ switch (of_arg->ofmt_id) {
+ case ILBST_PKT_P: stat_ind = SRVST_PKT_P;
+ break;
+ case ILBST_BYTES_P: stat_ind = SRVST_BYTES_P;
+ break;
+ case ILBST_ITEMIZE_RNAME:
+ (void) snprintf(buf, bufsize, "%s",
+ sta->ilbst_rlist[rule_index].ird_rulename);
+ return (B_TRUE);
+ /* not reached */
+ break;
+ case ILBST_ITEMIZE_SNAME:
+ (void) snprintf(buf, bufsize, "%s", srv->isd_servername);
+ return (B_TRUE);
+ /* not reached */
+ break;
+ }
+
+ count = srv->isd_serverstats[stat_ind].is_value;
+
+ if ((sta->ilbst_flags & ILBST_ABS_NUMBERS) != 0)
+ goto out;
+
+ osrv = &sta->ilbst_rlist[rule_index].ird_srvlist_o[srv_ind];
+ if (srv->isd_crtime != osrv->isd_crtime)
+ ret = B_FALSE;
+
+ count -= osrv->isd_serverstats[stat_ind].is_value;
+out:
+ /*
+ * normally, we print "change per second", which we calculate
+ * here. otherwise, we print "change over interval" or absolute
+ * values.
+ */
+ if ((sta->ilbst_flags & (ILBST_DELTA_INTERVAL|ILBST_ABS_NUMBERS)) == 0)
+ count /= sta->ilbst_interval;
+
+ (void) snprintf(buf, bufsize, "%llu", count);
+ return (ret);
+
+}
+
+static boolean_t
+of_rule_stats(ofmt_arg_t *of_arg, char *buf, uint_t bufsize)
+{
+ ilbst_arg_t *sta = (ilbst_arg_t *)of_arg->ofmt_cbarg;
+ int i, ind;
+ uint64_t count = 0;
+
+ switch (of_arg->ofmt_id) {
+ case ILBST_PKT_U: ind = RLSTA_PKT_U;
+ break;
+ case ILBST_BYTES_U: ind = RLSTA_BYTES_U;
+ break;
+ case ILBST_PKT_D: ind = RLSTA_PKT_D;
+ break;
+ case ILBST_BYTES_D: ind = RLSTA_BYTES_D;
+ break;
+ case ILBST_ICMP_P: ind = RLSTA_ICMP_P;
+ break;
+ case ILBST_ICMP_D: ind = RLSTA_ICMP_D;
+ break;
+ case ILBST_ICMP2BIG_P: ind = RLSTA_ICMP2BIG_P;
+ break;
+ case ILBST_ICMP2BIG_D: ind = RLSTA_ICMP2BIG_D;
+ break;
+ case ILBST_NOMEMP_D: ind = RLSTA_NOMEMPKT_D;
+ break;
+ case ILBST_NOPORTP_D: ind = RLSTA_NOPORTPKT_D;
+ break;
+ case ILBST_NOMEMB_D: ind = RLSTA_NOMEMBYTES_D;
+ break;
+ case ILBST_NOPORTB_D: ind = RLSTA_NOPORTBYTES_D;
+ break;
+ }
+
+ for (i = 0; i < sta->ilbst_rcount; i++)
+ count += sta->ilbst_rlist[i].ird_rulestats[ind].is_value;
+
+ if ((sta->ilbst_flags & ILBST_ABS_NUMBERS) != 0)
+ goto out;
+
+ /*
+ * the purist approach: if we can't say 100% that what we
+ * calculate is correct, don't.
+ */
+ if (sta->ilbst_flags & ILBST_RULES_CHANGED)
+ return (B_FALSE);
+
+ for (i = 0; i < sta->ilbst_rcount; i++) {
+ if (sta->ilbst_rlist[i].ird_crtime_o != 0 &&
+ sta->ilbst_rlist[i].ird_crtime !=
+ sta->ilbst_rlist[i].ird_crtime_o)
+ return (B_FALSE);
+
+ count -= sta->ilbst_rlist[i].ird_rulestats_o[ind].is_value;
+ }
+out:
+ /*
+ * normally, we print "change per second", which we calculate
+ * here. otherwise, we print "change over interval"
+ */
+ if ((sta->ilbst_flags & (ILBST_DELTA_INTERVAL|ILBST_ABS_NUMBERS)) == 0)
+ count /= sta->ilbst_interval;
+
+ (void) snprintf(buf, bufsize, "%llu", count);
+ return (B_TRUE);
+}
+
+/*
+ * Get the number of kstat instances. Note that when rules are being
+ * drained the number of kstats instances may be different than the
+ * kstat counter num_rules (ilb:0:global:num_rules").
+ *
+ * Also there can be multiple instances of a rule in the following
+ * scenario:
+ *
+ * A rule named rule A has been deleted but remains in kstats because
+ * its undergoing connection draining. During this time, the user adds
+ * a new rule with the same name(rule A). In this case, there would
+ * be two kstats instances for rule A. Currently ilbadm's aggregate
+ * results will include data from both instances of rule A. In,
+ * future we should have ilbadm stats only consider the latest instance
+ * of the rule (ie only consider the the instance that corresponds
+ * to the rule that was just added).
+ *
+ */
+static int
+i_get_num_kinstances(kstat_ctl_t *kctl)
+{
+ kstat_t *kp;
+ int num_instances = 0; /* nothing found, 0 rules */
+
+ for (kp = kctl->kc_chain; kp != NULL; kp = kp->ks_next) {
+ if (strncmp("rulestat", kp->ks_class, 8) == 0 &&
+ strncmp("ilb", kp->ks_module, 3) == 0) {
+ num_instances++;
+ }
+ }
+
+ return (num_instances);
+}
+
+
+/*
+ * since server stat's classname is made up of <rulename>-sstat,
+ * we walk the rule list to construct the comparison
+ * Return: pointer to rule whose name matches the class
+ * NULL if no match
+ */
+static ilbst_rule_desc_t *
+match_2_rnames(char *class, ilbst_rule_desc_t *rlist, int rcount)
+{
+ int i;
+ char classname[KSTAT_STRLEN];
+
+ for (i = 0; i < rcount; i++) {
+ (void) snprintf(classname, sizeof (classname), "%s-sstat",
+ rlist[i].ird_rulename);
+ if (strncmp(classname, class, sizeof (classname)) == 0)
+ return (&rlist[i]);
+ }
+ return (NULL);
+}
+
+static int
+i_stat_index(kstat_named_t *knp, ilbst_stat_t *stats, int count)
+{
+ int i;
+
+ for (i = 0; i < count; i++) {
+ if (strcasecmp(stats[i].is_name, knp->name) == 0)
+ return (i);
+ }
+
+ return (-1);
+}
+
+static void
+i_copy_sstats(ilbst_srv_desc_t *sp, kstat_t *kp)
+{
+ kstat_named_t *knp;
+ int i, ind;
+
+ knp = KSTAT_NAMED_PTR(kp);
+ for (i = 0; i < kp->ks_ndata; i++, knp++) {
+ ind = i_stat_index(knp, servstats, SSTAT_SZ);
+ if (ind == -1)
+ continue;
+ (void) strlcpy(sp->isd_serverstats[ind].is_name, knp->name,
+ sizeof (sp->isd_serverstats[ind].is_name));
+ sp->isd_serverstats[ind].is_value = knp->value.ui64;
+ sp->isd_crtime = kp->ks_crtime;
+ }
+}
+
+
+static ilbadm_status_t
+i_get_server_descs(ilbst_arg_t *sta, kstat_ctl_t *kctl)
+{
+ ilbadm_status_t rc = ILBADM_OK;
+ kstat_t *kp;
+ int i = -1;
+ ilbst_rule_desc_t *rp;
+ ilbst_rule_desc_t *rlist = sta->ilbst_rlist;
+ int rcount = sta->ilbst_rcount;
+
+ /*
+ * find all "server" kstats, or the one specified in
+ * sta->server
+ */
+ for (kp = kctl->kc_chain; kp != NULL; kp = kp->ks_next) {
+ if (strncmp("ilb", kp->ks_module, 3) != 0)
+ continue;
+ if (sta->ilbst_server != NULL &&
+ strcasecmp(sta->ilbst_server, kp->ks_name) != 0)
+ continue;
+ rp = match_2_rnames(kp->ks_class, rlist, rcount);
+ if (rp == NULL)
+ continue;
+
+ (void) kstat_read(kctl, kp, NULL);
+ i = rp->ird_srv_ind++;
+
+ rc = ILBADM_OK;
+ /*
+ * This means that a server is added after we check last
+ * time... Just make the array bigger.
+ */
+ if (i+1 > rp->ird_num_servers) {
+ ilbst_srv_desc_t *srvlist;
+
+ if ((srvlist = realloc(rp->ird_srvlist, (i+1) *
+ sizeof (*srvlist))) == NULL) {
+ rc = ILBADM_ENOMEM;
+ break;
+ }
+ rp->ird_srvlist = srvlist;
+ rp->ird_num_servers = i;
+ }
+
+ (void) strlcpy(rp->ird_srvlist[i].isd_servername, kp->ks_name,
+ sizeof (rp->ird_srvlist[i].isd_servername));
+ i_copy_sstats(&rp->ird_srvlist[i], kp);
+ }
+
+ for (i = 0; i < rcount; i++)
+ rlist[i].ird_srv_ind = 0;
+
+ if (sta->ilbst_server != NULL && i == -1)
+ rc = ILBADM_ENOSERVER;
+ return (rc);
+}
+
+static void
+i_copy_rstats(ilbst_rule_desc_t *rp, kstat_t *kp)
+{
+ kstat_named_t *knp;
+ int i, ind;
+
+ knp = KSTAT_NAMED_PTR(kp);
+ for (i = 0; i < kp->ks_ndata; i++, knp++) {
+ ind = i_stat_index(knp, rulestats, RSTAT_SZ);
+ if (ind == -1)
+ continue;
+
+ (void) strlcpy(rp->ird_rulestats[ind].is_name, knp->name,
+ sizeof (rp->ird_rulestats[ind].is_name));
+ rp->ird_rulestats[ind].is_value = knp->value.ui64;
+ }
+}
+
+static void
+i_set_rlstats_ptr(ilbst_rule_desc_t *rp, boolean_t old_is_old)
+{
+ if (old_is_old) {
+ rp->ird_rulestats = rp->ird_rstats;
+ rp->ird_rulestats_o = rp->ird_rstats_o;
+ } else {
+ rp->ird_rulestats = rp->ird_rstats_o;
+ rp->ird_rulestats_o = rp->ird_rstats;
+ }
+}
+/*
+ * this function walks the array of rules and switches pointer to old
+ * and new stats as well as serverlists.
+ */
+static void
+i_swap_rl_pointers(ilbst_arg_t *sta, int rcount)
+{
+ int i, tmp_num;
+ ilbst_rule_desc_t *rlist = sta->ilbst_rlist;
+ ilbst_srv_desc_t *tmp_srv;
+
+ for (i = 0; i < rcount; i++) {
+ /* swap srvlist pointers */
+ tmp_srv = rlist[i].ird_srvlist;
+ rlist[i].ird_srvlist = rlist[i].ird_srvlist_o;
+ rlist[i].ird_srvlist_o = tmp_srv;
+
+ /*
+ * swap server counts - we need the old one to
+ * save reallocation calls
+ */
+ tmp_num = rlist[i].ird_num_servers_o;
+ rlist[i].ird_num_servers_o = rlist[i].ird_num_servers;
+ rlist[i].ird_num_servers = tmp_num;
+
+ /* preserve creation time */
+ rlist[i].ird_crtime_o = rlist[i].ird_crtime;
+
+ i_set_rlstats_ptr(&rlist[i], sta->ilbst_old_is_old);
+ rlist[i].ird_srv_ind = 0;
+ }
+}
+
+static void
+i_init_rulelist(ilbst_arg_t *sta, int rcount)
+{
+ int i;
+ ilbst_rule_desc_t *rlist = sta->ilbst_rlist;
+
+ for (i = 0; i < rcount; i++) {
+ rlist[i].ird_rulestats = rlist[i].ird_rstats;
+ rlist[i].ird_rulestats_o = rlist[i].ird_rstats_o;
+ rlist[i].ird_srv_ind = 0;
+ }
+}
+
+
+/*
+ * this function searches for kstats describing individual rules and
+ * saves name, # of servers, and the kstat_t * describing them (this is
+ * for sta->rulename == NULL);
+ * if sta->rulename != NULL, it names the rule we're looking for
+ * and this function will fill in the other data (like the all_rules case)
+ * Returns: ILBADM_ENORULE named rule not found
+ * ILBADM_ENOMEM no mem. available
+ */
+static ilbadm_status_t
+i_get_rule_descs(ilbst_arg_t *sta, kstat_ctl_t *kctl)
+{
+ ilbadm_status_t rc = ILBADM_OK;
+ kstat_t *kp;
+ kstat_named_t *knp;
+ int i;
+ int num_servers;
+ ilbst_rule_desc_t *rlist = sta->ilbst_rlist;
+ int rcount = sta->ilbst_rcount;
+
+ /*
+ * find all "rule" kstats, or the one specified in
+ * sta->ilbst_rulename.
+ */
+ for (i = 0, kp = kctl->kc_chain; i < rcount && kp != NULL;
+ kp = kp->ks_next) {
+ if (strncmp("rulestat", kp->ks_class, 8) != 0 ||
+ strncmp("ilb", kp->ks_module, 3) != 0)
+ continue;
+
+ (void) kstat_read(kctl, kp, NULL);
+
+ knp = kstat_data_lookup(kp, "num_servers");
+ if (knp == NULL) {
+ ilbadm_err(gettext("kstat_data_lookup() failed: %s"),
+ strerror(errno));
+ rc = ILBADM_LIBERR;
+ break;
+ }
+ if (sta->ilbst_rulename != NULL) {
+ if (strcasecmp(kp->ks_name, sta->ilbst_rulename)
+ != 0)
+ continue;
+ }
+ (void) strlcpy(rlist[i].ird_rulename, kp->ks_name,
+ sizeof (rlist[i].ird_rulename));
+
+ /* only alloc the space we need, set counter here ... */
+ if (sta->ilbst_server != NULL)
+ num_servers = 1;
+ else
+ num_servers = (int)knp->value.ui64;
+
+ /* ... furthermore, only reallocate if necessary */
+ if (num_servers != rlist[i].ird_num_servers) {
+ ilbst_srv_desc_t *srvlist;
+
+ rlist[i].ird_num_servers = num_servers;
+
+ if (rlist[i].ird_srvlist == NULL)
+ srvlist = calloc(num_servers,
+ sizeof (*srvlist));
+ else
+ srvlist = realloc(rlist[i].ird_srvlist,
+ sizeof (*srvlist) * num_servers);
+ if (srvlist == NULL) {
+ rc = ILBADM_ENOMEM;
+ break;
+ }
+ rlist[i].ird_srvlist = srvlist;
+ }
+ rlist[i].ird_srv_ind = 0;
+ rlist[i].ird_crtime = kp->ks_crtime;
+
+ i_copy_rstats(&rlist[i], kp);
+ i++;
+
+ /* if we know we're done, return */
+ if (sta->ilbst_rulename != NULL || i == rcount) {
+ rc = ILBADM_OK;
+ break;
+ }
+ }
+
+ if (sta->ilbst_rulename != NULL && i == 0)
+ rc = ILBADM_ENORULE;
+ return (rc);
+}
+
+static void
+i_do_print(ilbst_arg_t *sta)
+{
+ int i;
+
+ /* non-itemized display can go right ahead */
+ if ((sta->ilbst_flags & ILBST_ITEMIZE) == 0) {
+ ofmt_print(sta->ilbst_oh, sta);
+ return;
+ }
+
+ /*
+ * rulename is given, list a line per server
+ * here's how we do it:
+ * the _ITEMIZE flag indicates to the print function (called
+ * from ofmt_print()) to look at server [ird_srv_ind] only.
+ */
+ if (sta->ilbst_rulename != NULL) {
+ sta->ilbst_rule_index = 0;
+ for (i = 0; i < sta->ilbst_rlist->ird_num_servers; i++) {
+ sta->ilbst_rlist->ird_srv_ind = i;
+ ofmt_print(sta->ilbst_oh, sta);
+ }
+ sta->ilbst_rlist->ird_srv_ind = 0;
+ return;
+ }
+
+ /* list one line for every rule for a given server */
+ for (i = 0; i < sta->ilbst_rcount; i++) {
+ /*
+ * if a rule doesn't contain a given server, there's no
+ * need to print it. Luckily, we can check that
+ * fairly easily
+ */
+ if (sta->ilbst_rlist[i].ird_srvlist[0].isd_servername[0] ==
+ '\0')
+ continue;
+
+ sta->ilbst_rule_index = i;
+ sta->ilbst_rlist[i].ird_srv_ind = 0;
+ ofmt_print(sta->ilbst_oh, sta);
+ }
+ sta->ilbst_rule_index = 0;
+}
+
+static ilbadm_status_t
+i_do_show_stats(ilbst_arg_t *sta)
+{
+ kstat_ctl_t *kctl;
+ kid_t nkid;
+ int rcount = 1, i;
+ ilbadm_status_t rc = ILBADM_OK;
+ ilbst_rule_desc_t *rlist, *rp;
+ boolean_t pseudo_abs = B_FALSE; /* for first pass */
+
+ if ((kctl = kstat_open()) == NULL) {
+ ilbadm_err(gettext("kstat_open() failed: %s"), strerror(errno));
+ return (ILBADM_LIBERR);
+ }
+
+
+ if (sta->ilbst_rulename == NULL)
+ rcount = i_get_num_kinstances(kctl);
+
+ rlist = calloc(sizeof (*rlist), rcount);
+ if (rlist == NULL) {
+ rc = ILBADM_ENOMEM;
+ goto out;
+ }
+
+ sta->ilbst_old_is_old = B_TRUE;
+ sta->ilbst_rlist = rlist;
+ sta->ilbst_rcount = sta->ilbst_rcount_prev = rcount;
+ sta->ilbst_rlist_sz = rcount;
+
+ /*
+ * in the first pass, we always print absolute numbers. We
+ * need to remember whether we wanted abs. numbers for
+ * other samples as well
+ */
+ if ((sta->ilbst_flags & ILBST_ABS_NUMBERS) == 0) {
+ sta->ilbst_flags |= ILBST_ABS_NUMBERS;
+ pseudo_abs = B_TRUE;
+ }
+
+ i_init_rulelist(sta, rcount);
+ do {
+ rc = i_get_rule_descs(sta, kctl);
+ if (rc != ILBADM_OK)
+ goto out;
+
+ rc = i_get_server_descs(sta, kctl);
+ if (rc != ILBADM_OK)
+ goto out;
+
+ i_do_print(sta);
+
+ if (sta->ilbst_count == -1 || --(sta->ilbst_count) > 0)
+ (void) sleep(sta->ilbst_interval);
+ else
+ break;
+
+ nkid = kstat_chain_update(kctl);
+ sta->ilbst_flags &= ~ILBST_RULES_CHANGED;
+ /*
+ * we only need to continue with most of the rest of this if
+ * the kstat chain id has changed
+ */
+ if (nkid == 0)
+ goto swap_old_new;
+ if (nkid == -1) {
+ ilbadm_err(gettext("kstat_chain_update() failed: %s"),
+ strerror(errno));
+ rc = ILBADM_LIBERR;
+ break;
+ }
+
+ /*
+ * find out whether the number of rules has changed.
+ * if so, adjust rcount and _o; if number has increased,
+ * expand array to hold all rules.
+ * we only shrink if rlist_sz is larger than both rcount and
+ * rcount_prev;
+ */
+ if (sta->ilbst_rulename == NULL)
+ rcount = i_get_num_kinstances(kctl);
+ if (rcount != sta->ilbst_rcount) {
+ sta->ilbst_flags |= ILBST_RULES_CHANGED;
+ sta->ilbst_rcount_prev = sta->ilbst_rcount;
+ sta->ilbst_rcount = rcount;
+
+ if (rcount > sta->ilbst_rcount_prev) {
+ rlist = realloc(sta->ilbst_rlist,
+ sizeof (*sta->ilbst_rlist) * rcount);
+ if (rlist == NULL) {
+ rc = ILBADM_ENOMEM;
+ break;
+ }
+ sta->ilbst_rlist = rlist;
+ /* realloc doesn't zero out memory */
+ for (i = sta->ilbst_rcount_prev;
+ i < rcount; i++) {
+ rp = &sta->ilbst_rlist[i];
+ bzero(rp, sizeof (*rp));
+ i_set_rlstats_ptr(rp,
+ sta->ilbst_old_is_old);
+ }
+ /*
+ * even if rlist_sz was > rcount, it's now
+ * shrunk to rcount
+ */
+ sta->ilbst_rlist_sz = sta->ilbst_rcount;
+ }
+ }
+
+ /*
+ * we may need to shrink the allocated slots down to the
+ * actually required number - we need to make sure we
+ * don't delete old or new stats.
+ */
+ if (sta->ilbst_rlist_sz > MAX(sta->ilbst_rcount,
+ sta->ilbst_rcount_prev)) {
+ sta->ilbst_rlist_sz =
+ MAX(sta->ilbst_rcount, sta->ilbst_rcount_prev);
+ rlist = realloc(sta->ilbst_rlist,
+ sizeof (*sta->ilbst_rlist) * sta->ilbst_rlist_sz);
+ if (rlist == NULL) {
+ rc = ILBADM_ENOMEM;
+ break;
+ }
+ sta->ilbst_rlist = rlist;
+ }
+
+ /*
+ * move pointers around so what used to point to "old"
+ * stats now points to new, and vice versa
+ * if we're printing absolute numbers, this rigmarole is
+ * not necessary.
+ */
+swap_old_new:
+ if (pseudo_abs)
+ sta->ilbst_flags &= ~ILBST_ABS_NUMBERS;
+
+ if ((sta->ilbst_flags & ILBST_ABS_NUMBERS) == 0) {
+ sta->ilbst_old_is_old = !sta->ilbst_old_is_old;
+ i_swap_rl_pointers(sta, rcount);
+ }
+ _NOTE(CONSTCOND)
+ } while (B_TRUE);
+
+out:
+ (void) kstat_close(kctl);
+ if ((rc != ILBADM_OK) && (rc != ILBADM_LIBERR))
+ ilbadm_err(ilbadm_errstr(rc));
+
+ if (sta->ilbst_rlist != NULL)
+ free(sta->ilbst_rlist);
+
+ return (rc);
+}
+
+/*
+ * read ilb's kernel statistics and (periodically) display
+ * them.
+ */
+/* ARGSUSED */
+ilbadm_status_t
+ilbadm_show_stats(int argc, char *argv[])
+{
+ ilbadm_status_t rc;
+ int c;
+ ilbst_arg_t sta;
+ int oflags = 0;
+ char *fieldnames = stat_stdhdrs;
+ ofmt_field_t *fields = stat_stdfields;
+ boolean_t r_opt = B_FALSE, s_opt = B_FALSE, i_opt = B_FALSE;
+ boolean_t o_opt = B_FALSE, p_opt = B_FALSE, t_opt = B_FALSE;
+ boolean_t v_opt = B_FALSE, A_opt = B_FALSE, d_opt = B_FALSE;
+ ofmt_status_t oerr;
+ ofmt_handle_t oh = NULL;
+
+ bzero(&sta, sizeof (sta));
+ sta.ilbst_interval = 1;
+ sta.ilbst_count = 1;
+
+ while ((c = getopt(argc, argv, ":tdAr:s:ivo:p")) != -1) {
+ switch ((char)c) {
+ case 't': sta.ilbst_flags |= ILBST_TIMESTAMP_HEADER;
+ t_opt = B_TRUE;
+ break;
+ case 'd': sta.ilbst_flags |= ILBST_DELTA_INTERVAL;
+ d_opt = B_TRUE;
+ break;
+ case 'A': sta.ilbst_flags |= ILBST_ABS_NUMBERS;
+ A_opt = B_TRUE;
+ break;
+ case 'r': sta.ilbst_rulename = optarg;
+ r_opt = B_TRUE;
+ break;
+ case 's': sta.ilbst_server = optarg;
+ s_opt = B_TRUE;
+ break;
+ case 'i': sta.ilbst_flags |= ILBST_ITEMIZE;
+ i_opt = B_TRUE;
+ break;
+ case 'o': fieldnames = optarg;
+ o_opt = B_TRUE;
+ break;
+ case 'p': oflags |= OFMT_PARSABLE;
+ p_opt = B_TRUE;
+ break;
+ case 'v': sta.ilbst_flags |= ILBST_VERBOSE;
+ v_opt = B_TRUE;
+ fieldnames = stat_stdv_hdrs;
+ break;
+ case ':': ilbadm_err(gettext("missing option-argument"
+ " detected for %c"), (char)optopt);
+ exit(1);
+ /* not reached */
+ break;
+ case '?': /* fallthrough */
+ default:
+ unknown_opt(argv, optind-1);
+ /* not reached */
+ break;
+ }
+ }
+
+ if (s_opt && r_opt) {
+ ilbadm_err(gettext("options -s and -r are mutually exclusive"));
+ exit(1);
+ }
+
+ if (i_opt) {
+ if (!(s_opt || r_opt)) {
+ ilbadm_err(gettext("option -i requires"
+ " either -r or -s"));
+ exit(1);
+ }
+ if (v_opt) {
+ ilbadm_err(gettext("option -i and -v are mutually"
+ " exclusive"));
+ exit(1);
+ }
+ /* only use "std" headers if none are specified */
+ if (!o_opt)
+ if (r_opt)
+ fieldnames = stat_itemize_rule_hdrs;
+ else /* must be s_opt */
+ fieldnames = stat_itemize_server_hdrs;
+ fields = stat_itemize_fields;
+ }
+
+ if (p_opt) {
+ if (!o_opt) {
+ ilbadm_err(gettext("option -p requires -o"));
+ exit(1);
+ }
+ if (v_opt) {
+ ilbadm_err(gettext("option -o and -v are mutually"
+ " exclusive"));
+ exit(1);
+ }
+ if (strcasecmp(fieldnames, "all") == 0) {
+ ilbadm_err(gettext("option -p requires"
+ " explicit field names"));
+ exit(1);
+ }
+ }
+
+ if (t_opt) {
+ if (v_opt) {
+ fieldnames = "all";
+ } else {
+ int len = strlen(fieldnames) + 6;
+ char *fnames;
+
+ fnames = malloc(len);
+ if (fnames == NULL) {
+ rc = ILBADM_ENOMEM;
+ return (rc);
+ }
+ (void) snprintf(fnames, len, "%s,TIME", fieldnames);
+ fieldnames = fnames;
+ }
+ }
+
+ if (A_opt && d_opt) {
+ ilbadm_err(gettext("options -d and -A are mutually exclusive"));
+ exit(1);
+ }
+
+ /* find and parse interval and count arguments if present */
+ if (optind < argc) {
+ sta.ilbst_interval = atoi(argv[optind]);
+ if (sta.ilbst_interval < 1) {
+ ilbadm_err(gettext("illegal interval spec %s"),
+ argv[optind]);
+ exit(1);
+ }
+ sta.ilbst_count = -1;
+ if (++optind < argc) {
+ sta.ilbst_count = atoi(argv[optind]);
+ if (sta.ilbst_count < 1) {
+ ilbadm_err(gettext("illegal count spec %s"),
+ argv[optind]);
+ exit(1);
+ }
+ }
+ }
+
+ oerr = ofmt_open(fieldnames, fields, oflags, 80, &oh);
+ if (oerr != OFMT_SUCCESS) {
+ char e[80];
+
+ ilbadm_err(gettext("ofmt_open failed: %s"),
+ ofmt_strerror(oh, oerr, e, sizeof (e)));
+ return (ILBADM_LIBERR);
+ }
+
+ sta.ilbst_oh = oh;
+
+ rc = i_do_show_stats(&sta);
+
+ ofmt_close(oh);
+ return (rc);
+}
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_subr.c b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_subr.c
new file mode 100644
index 0000000000..940f82bc12
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_subr.c
@@ -0,0 +1,1161 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <errno.h>
+#include <ctype.h>
+#include <assert.h>
+#include <limits.h>
+#include <libilb.h>
+#include <libilb_impl.h>
+#include "ilbadm.h"
+
+#define PORT_SEP ':'
+
+typedef enum {
+ numeric = 1,
+ non_numeric
+} addr_type_t;
+
+ilbadm_val_type_t algo_types[] = {
+ {(int)ILB_ALG_ROUNDROBIN, "roundrobin", "rr"},
+ {(int)ILB_ALG_HASH_IP, "hash-ip", "hip"},
+ {(int)ILB_ALG_HASH_IP_SPORT, "hash-ip-port", "hipp"},
+ {(int)ILB_ALG_HASH_IP_VIP, "hash-ip-vip", "hipv"},
+ {ILBD_BAD_VAL, NULL, NULL}
+};
+
+ilbadm_val_type_t topo_types[] = {
+ {(int)ILB_TOPO_DSR, "DSR", "d"},
+ {(int)ILB_TOPO_NAT, "NAT", "n"},
+ {(int)ILB_TOPO_HALF_NAT, "HALF-NAT", "h"},
+ {ILBD_BAD_VAL, NULL, NULL}
+};
+
+void
+ip2str(ilb_ip_addr_t *ip, char *buf, size_t sz, int flags)
+{
+ int len;
+
+ switch (ip->ia_af) {
+ case AF_INET:
+ if (*(uint32_t *)&ip->ia_v4 == 0)
+ buf[0] = '\0';
+ else
+ (void) inet_ntop(AF_INET, (void *)&ip->ia_v4, buf, sz);
+ break;
+ case AF_INET6:
+ if (IN6_IS_ADDR_UNSPECIFIED(&ip->ia_v6)) {
+ buf[0] = '\0';
+ break;
+ }
+ if (!(flags & V6_ADDRONLY))
+ *buf++ = '[';
+ sz--;
+ (void) inet_ntop(ip->ia_af, (void *)&ip->ia_v6, buf, sz);
+ if (!(flags & V6_ADDRONLY)) {
+ len = strlen(buf);
+ buf[len] = ']';
+ buf[++len] = '\0';
+ }
+ break;
+ default: buf[0] = '\0';
+ }
+}
+
+char *
+i_str_from_val(int val, ilbadm_val_type_t *types)
+{
+ ilbadm_val_type_t *v;
+
+ for (v = types; v->v_type != ILBD_BAD_VAL; v++) {
+ if (v->v_type == val)
+ break;
+ }
+ /* we return this in all cases */
+ return (v->v_name);
+}
+
+int
+i_val_from_str(char *name, ilbadm_val_type_t *types)
+{
+ ilbadm_val_type_t *v;
+
+ for (v = types; v->v_type != ILBD_BAD_VAL; v++) {
+ if (strncasecmp(name, v->v_name, sizeof (v->v_name)) == 0 ||
+ strncasecmp(name, v->v_alias, sizeof (v->v_alias)) == 0)
+ break;
+ }
+ /* we return this in all cases */
+ return (v->v_type);
+}
+
+ilbadm_key_code_t
+i_match_key(char *key, ilbadm_key_name_t *keylist)
+{
+ ilbadm_key_name_t *t_key;
+
+ for (t_key = keylist; t_key->k_key != ILB_KEY_BAD; t_key++) {
+ if (strncasecmp(key, t_key->k_name,
+ sizeof (t_key->k_name)) == 0 ||
+ strncasecmp(key, t_key->k_alias,
+ sizeof (t_key->k_alias)) == 0)
+ break;
+ }
+ return (t_key->k_key);
+}
+
+/*
+ * try to match:
+ * 1) IPv4 address
+ * 2) IPv6 address
+ * 3) a hostname
+ */
+static ilbadm_status_t
+i_match_onehost(const char *val, ilb_ip_addr_t *ip, addr_type_t *a_type)
+{
+ struct addrinfo *ai = NULL;
+ struct addrinfo hints;
+ addr_type_t at = numeric;
+
+ (void) memset((void *)&hints, 0, sizeof (hints));
+ hints.ai_flags |= AI_NUMERICHOST;
+
+ /*
+ * if *a_type == numeric, we only want to check whether this
+ * is a (valid) numeric IP address. If we do and it is NOT,
+ * we return _ENOENT.
+ */
+ if (getaddrinfo(val, NULL, &hints, &ai) != 0) {
+ if (a_type != NULL && (*a_type == numeric))
+ return (ILBADM_INVAL_ADDR);
+
+ at = non_numeric;
+ if (getaddrinfo(val, NULL, NULL, &ai) != 0)
+ return (ILBADM_INVAL_ADDR);
+ }
+
+ ip->ia_af = ai->ai_family;
+ switch (ip->ia_af) {
+ case AF_INET: {
+ struct sockaddr_in sa;
+
+ assert(ai->ai_addrlen == sizeof (sa));
+ (void) memcpy(&sa, ai->ai_addr, sizeof (sa));
+ ip->ia_v4 = sa.sin_addr;
+ break;
+ }
+ case AF_INET6: {
+ struct sockaddr_in6 sa;
+
+ assert(ai->ai_addrlen == sizeof (sa));
+ (void) memcpy(&sa, ai->ai_addr, sizeof (sa));
+ ip->ia_v6 = sa.sin6_addr;
+ break;
+ }
+ default:
+ return (ILBADM_INVAL_AF);
+ break;
+ }
+
+ if (a_type != NULL)
+ *a_type = at;
+ return (ILBADM_OK);
+}
+
+static ilbadm_status_t
+i_store_serverID(void *store, char *val)
+{
+ ilbadm_servnode_t *s = (ilbadm_servnode_t *)store;
+ ilb_server_data_t *sn = &s->s_spec;
+
+ /*
+ * we shouldn't need to check for length here, as a name that's
+ * too long won't exist in the system anyway.
+ */
+ (void) strlcpy(sn->sd_srvID, val, sizeof (sn->sd_srvID));
+ return (ILBADM_OK);
+}
+
+static struct in_addr
+i_next_in_addr(struct in_addr *a, int dir)
+{
+ struct in_addr new_in;
+ uint32_t iah;
+
+ iah = ntohl(a->s_addr);
+ if (dir == 1)
+ iah++;
+ else
+ iah--;
+ new_in.s_addr = htonl(iah);
+ return (new_in);
+}
+
+static ilbadm_status_t
+i_expand_ipv4range(ilbadm_sgroup_t *sg, ilb_server_data_t *srv,
+ ilb_ip_addr_t *ip1, ilb_ip_addr_t *ip2)
+{
+ struct in_addr *a1;
+ ilbadm_servnode_t *sn_new;
+ ilb_ip_addr_t new_ip;
+
+ a1 = &ip1->ia_v4;
+
+ new_ip.ia_af = AF_INET;
+ new_ip.ia_v4 = i_next_in_addr(a1, 1);
+ while (ilb_cmp_ipaddr(&new_ip, ip2, NULL) < 1) {
+ sn_new = i_new_sg_elem(sg);
+ sn_new->s_spec.sd_addr = new_ip;
+ sn_new->s_spec.sd_minport = srv->sd_minport;
+ sn_new->s_spec.sd_maxport = srv->sd_maxport;
+ new_ip.ia_v4 = i_next_in_addr(&new_ip.ia_v4, 1);
+ }
+ return (ILBADM_OK);
+}
+
+static struct in6_addr
+i_next_in6_addr(struct in6_addr *a, int dir)
+{
+ struct in6_addr ia6;
+ uint64_t al, ah;
+
+ ah = INV6_N2H_MSB64(a);
+ al = INV6_N2H_LSB64(a);
+
+ if (dir == 1) {
+ /* overflow */
+ if (++al == 0)
+ ah++;
+ } else {
+ /* underflow */
+ if (--al == 0xffffffff)
+ ah--;
+ }
+
+ INV6_H2N_MSB64(&ia6, ah);
+ INV6_H2N_LSB64(&ia6, al);
+ return (ia6);
+}
+
+
+static ilbadm_status_t
+i_expand_ipv6range(ilbadm_sgroup_t *sg, ilb_server_data_t *srv,
+ ilb_ip_addr_t *ip1, ilb_ip_addr_t *ip2)
+{
+ struct in6_addr *a1;
+ ilbadm_servnode_t *sn_new;
+ ilb_ip_addr_t new_ip;
+
+ a1 = &ip1->ia_v6;
+
+ new_ip.ia_af = AF_INET6;
+ new_ip.ia_v6 = i_next_in6_addr(a1, 1);
+ while (ilb_cmp_ipaddr(&new_ip, ip2, NULL) < 1) {
+ sn_new = i_new_sg_elem(sg);
+ sn_new->s_spec.sd_addr = new_ip;
+ sn_new->s_spec.sd_minport = srv->sd_minport;
+ sn_new->s_spec.sd_maxport = srv->sd_maxport;
+ new_ip.ia_v6 = i_next_in6_addr(&new_ip.ia_v6, 1);
+ }
+ return (ILBADM_OK);
+}
+
+
+/*
+ * we create a list node in the servergroup for every ip address
+ * in the range [ip1, ip2], where we interpret the ip addresses as
+ * numbers
+ * the first ip address is already stored in "sn"
+ */
+static ilbadm_status_t
+i_expand_iprange(ilbadm_sgroup_t *sg, ilb_server_data_t *sr,
+ ilb_ip_addr_t *ip1, ilb_ip_addr_t *ip2)
+{
+ int cmp;
+ int64_t delta;
+
+ if (ip2->ia_af == 0)
+ return (ILBADM_OK);
+
+ if (ip1->ia_af != ip2->ia_af) {
+ ilbadm_err(gettext("IP address mismatch"));
+ return (ILBADM_LIBERR);
+ }
+
+ /* if ip addresses are the same, we're done */
+ if ((cmp = ilb_cmp_ipaddr(ip1, ip2, &delta)) == 0)
+ return (ILBADM_OK);
+ if (cmp == 1) {
+ ilbadm_err(gettext("starting IP address is must be less"
+ " than ending ip address in ip range specification"));
+ return (ILBADM_LIBERR);
+ }
+
+ /* if the implicit number of IPs is too large, stop */
+ if (abs((int)delta) > MAX_IP_SPREAD)
+ return (ILBADM_TOOMANYIPADDR);
+
+ switch (ip1->ia_af) {
+ case AF_INET: return (i_expand_ipv4range(sg, sr, ip1, ip2));
+ /* not reached */
+ break;
+ case AF_INET6: return (i_expand_ipv6range(sg, sr, ip1, ip2));
+ /* not reached */
+ break;
+ }
+ return (ILBADM_INVAL_AF);
+}
+
+/*
+ * parse a port spec (number or by service name) and
+ * return the numeric port in *host* byte order
+ *
+ * Upon return, *flags contains ILB_FLAGS_SRV_PORTNAME if a service name matches
+ */
+static int
+i_parseport(char *port, char *proto, int *flags)
+{
+ struct servent *se;
+
+ /* assumption: port names start with a non-digit */
+ if (isdigit(port[0])) {
+ if (flags != NULL)
+ *flags &= ~ILB_FLAGS_SRV_PORTNAME;
+ return ((int)strtol(port, NULL, 10));
+ }
+
+ se = getservbyname(port, proto);
+ if (se == NULL)
+ return (-1);
+
+ if (flags != NULL)
+ *flags |= ILB_FLAGS_SRV_PORTNAME;
+
+ /*
+ * we need to convert to host byte order to be in sync with
+ * numerical ports. since result needs to be compared, this
+ * is preferred to returning NW byte order
+ */
+ return ((int)(ntohs(se->s_port)));
+}
+
+/*
+ * matches one hostname or IP address and stores it in "store".
+ * space must have been pre-allocated to accept data
+ * "sg" != NULL only for cases where ip ranges may be coming in.
+ */
+static ilbadm_status_t
+i_match_hostorip(void *store, ilbadm_sgroup_t *sg, char *val,
+ int flags, ilbadm_key_code_t keyword)
+{
+ boolean_t is_ip_range_ok = flags & OPT_IP_RANGE;
+ boolean_t is_addr_numeric = flags & OPT_NUMERIC_ONLY;
+ boolean_t is_ports_ok = flags & OPT_PORTS;
+ boolean_t ports_only = flags & OPT_PORTS_ONLY;
+ boolean_t is_nat_src = flags & OPT_NAT;
+ char *port_pref, *dash;
+ char *port1p, *port2p, *host2p, *host1p;
+ char *close1, *close2;
+ ilb_ip_addr_t ip2store;
+ ilb_ip_addr_t *ip1, *ip2;
+ int p1, p2;
+ ilb_server_data_t *s = NULL;
+ ilbadm_status_t rc = ILBADM_OK;
+ int af = AF_INET;
+ addr_type_t at = 0;
+ int p_flg;
+ struct in6_addr v6nameaddr;
+
+ port1p = port2p = host2p = host1p = NULL;
+ port_pref = dash = NULL;
+ close1 = close2 = NULL;
+ errno = 0;
+
+ if (is_nat_src) {
+ ilb_rule_data_t *rd = (ilb_rule_data_t *)store;
+
+ ip1 = &rd->r_nat_src_start;
+ ip2 = &rd->r_nat_src_end;
+ } else {
+ ilbadm_servnode_t *sn = (ilbadm_servnode_t *)store;
+
+ s = &sn->s_spec;
+ ip1 = &s->sd_addr;
+ ip2 = &ip2store;
+ bzero(ip2, sizeof (*ip2));
+ }
+
+ if (ports_only) {
+ is_ports_ok = B_TRUE;
+ port_pref = val - 1; /* we increment again later on */
+ goto ports;
+ }
+
+ /*
+ * we parse the syntax ip[-ip][:port[-port]]
+ * since IPv6 addresses contain ':'s as well, they need to be
+ * enclosed in "[]" to be distinct from a potential port spec.
+ * therefore, we need to first check whether we're dealing with
+ * IPv6 addresses before we can go search for the port seperator
+ * and ipv6 range could look like this: [ff::0]-[ff::255]:80
+ */
+ if ((keyword == ILB_KEY_SERVER) && (strchr(val, ':') != NULL) &&
+ (*val != '[') && ((inet_pton(AF_INET6, val, &v6nameaddr)) != 0)) {
+ /*
+ * V6 addresses must be enclosed within
+ * brackets when specifying server addresses
+ */
+ rc = ILBADM_INVAL_SYNTAX;
+ goto err_out;
+ }
+
+ if (*val == '[') {
+ af = AF_INET6;
+
+ val++;
+ host1p = val;
+
+ close1 = strchr(val, (int)']');
+ if (close1 == NULL) {
+ rc = ILBADM_INVAL_SYNTAX;
+ goto err_out;
+ }
+ *close1 = '\0';
+ at = 0;
+ rc = i_match_onehost(host1p, ip1, &at);
+ if (rc != ILBADM_OK)
+ goto err_out;
+ if (at != numeric) {
+ rc = ILBADM_INVAL_ADDR;
+ goto err_out;
+ }
+ if (ip1->ia_af != af) {
+ rc = ILBADM_INVAL_AF;
+ goto err_out;
+ }
+ val = close1 + 1;
+
+ if (*val == PORT_SEP) {
+ port_pref = val;
+ goto ports;
+ }
+ if (*val == '-') {
+ dash = val;
+ if (!is_ip_range_ok) {
+ ilbadm_err(gettext("port ranges not allowed"));
+ rc = ILBADM_LIBERR;
+ goto err_out;
+ }
+ val++;
+ if (*val != '[') {
+ rc = ILBADM_INVAL_SYNTAX;
+ goto err_out;
+ }
+ val++;
+ close2 = strchr(val, (int)']');
+ if (close2 == NULL) {
+ rc = ILBADM_INVAL_SYNTAX;
+ goto err_out;
+ }
+ *close2 = '\0';
+ host2p = val;
+ at = 0;
+ rc = i_match_onehost(host2p, ip2, &at);
+ if (rc != ILBADM_OK)
+ goto err_out;
+ if (at != numeric) {
+ rc = ILBADM_INVAL_ADDR;
+ goto err_out;
+ }
+ if (ip2->ia_af != af) {
+ rc = ILBADM_INVAL_AF;
+ goto err_out;
+ }
+ val = close2+1;
+ }
+ }
+
+ /* ports always potentially allow ranges - XXXms: check? */
+ port_pref = strchr(val, (int)PORT_SEP);
+ports:
+ if (port_pref != NULL && is_ports_ok) {
+ port1p = port_pref + 1;
+ *port_pref = '\0';
+
+ dash = strchr(port1p, (int)'-');
+ if (dash != NULL) {
+ port2p = dash + 1;
+ *dash = '\0';
+ }
+ if (port1p != NULL) {
+ p1 = i_parseport(port1p, NULL, &p_flg);
+ if (p1 == -1 || p1 == 0 || p1 > ILB_MAX_PORT) {
+ ilbadm_err(gettext("invalid port value %s"
+ " specified"), port1p);
+ rc = ILBADM_LIBERR;
+ goto err_out;
+ }
+ s->sd_minport = htons((in_port_t)p1);
+ if (p_flg & ILB_FLAGS_SRV_PORTNAME)
+ s->sd_flags |= ILB_FLAGS_SRV_PORTNAME;
+ }
+ if (port2p != NULL) {
+ /* ranges are only allowed for numeric ports */
+ if (p_flg & ILB_FLAGS_SRV_PORTNAME) {
+ ilbadm_err(gettext("ranges are only allowed"
+ " for numeric ports"));
+ rc = ILBADM_LIBERR;
+ goto err_out;
+ }
+ p2 = i_parseport(port2p, NULL, &p_flg);
+ if (p2 == -1 || p2 <= p1 || p2 > ILB_MAX_PORT ||
+ (p_flg & ILB_FLAGS_SRV_PORTNAME) ==
+ ILB_FLAGS_SRV_PORTNAME) {
+ ilbadm_err(gettext("invalid port value %s"
+ " specified"), port2p);
+ rc = ILBADM_LIBERR;
+ goto err_out;
+ }
+ s->sd_maxport = htons((in_port_t)p2);
+ }
+ /*
+ * we fill the '-' back in, but not the port seperator,
+ * as the \0 in its place terminates the ip address(es)
+ */
+ if (dash != NULL)
+ *dash = '-';
+ if (ports_only)
+ goto out;
+ }
+
+ if (af == AF_INET6)
+ goto out;
+
+ /*
+ * we need to handle these situations for hosts:
+ * a. ip address
+ * b. ip address range (ip1-ip2)
+ * c. a hostname (may include '-' or start with a digit)
+ *
+ * We want to do hostname lookup only if we're quite sure that
+ * we actually are looking at neither a single IP address nor a
+ * range of same, as this can hang if name service is not set up
+ * (sth. likely in a LB environment).
+ *
+ * here's how we proceed:
+ * 1. try to match numeric only. If that succeeds, we're done.
+ * (getaddrinfo, which we call in i_match_onehost(), fails if
+ * it encounters a '-')
+ * 2. search for a '-'; if we find one, try numeric match for
+ * both sides. if this fails:
+ * 3. re-insert '-' and try for a legal hostname.
+ */
+ /* 1. */
+ at = numeric;
+ rc = i_match_onehost(val, ip1, &at);
+ if (rc == ILBADM_OK)
+ goto out;
+
+ /* 2. */
+ dash = strchr(val, (int)'-');
+ if (dash != NULL && is_ip_range_ok) {
+ host2p = dash + 1;
+ *dash = '\0';
+ at = numeric;
+ rc = i_match_onehost(host2p, ip2, &at);
+ if (rc != ILBADM_OK || at != numeric) {
+ *dash = '-';
+ dash = NULL;
+ bzero(ip2, sizeof (*ip2));
+ goto hostname;
+ }
+ /*
+ * if the RHS of '-' is an IP but LHS is not, we might
+ * have a hostname of form x-y where y is just a number
+ * (this seems a valid IPv4 address), so we need to
+ * try a complete hostname
+ */
+ rc = i_match_onehost(val, ip1, &at);
+ if (rc != ILBADM_OK || at != numeric) {
+ *dash = '-';
+ dash = NULL;
+ goto hostname;
+ }
+ goto out;
+ }
+hostname:
+ /* 3. */
+
+ if (is_addr_numeric)
+ at = numeric;
+ else
+ at = 0;
+ rc = i_match_onehost(val, ip1, &at);
+ if (rc != ILBADM_OK) {
+ goto out;
+ }
+ if (s != NULL) {
+ s->sd_flags |= ILB_FLAGS_SRV_HOSTNAME;
+ /* XXX: todo: save hostname for re-display for admin */
+ }
+
+out:
+ if (dash != NULL && !is_nat_src) {
+ rc = i_expand_iprange(sg, s, ip1, ip2);
+ if (rc != ILBADM_OK)
+ goto err_out;
+ }
+
+ if (is_nat_src && host2p == NULL)
+ *ip2 = *ip1;
+
+err_out:
+ /*
+ * we re-insert what we overwrote, especially in the error case
+ */
+ if (close2 != NULL)
+ *close2 = ']';
+ if (close1 != NULL)
+ *close1 = '[';
+ if (dash != NULL)
+ *dash = '-';
+ if (port_pref != NULL && !ports_only)
+ *port_pref = PORT_SEP;
+
+ return (rc);
+}
+
+/*
+ * type-agnostic helper function to return a pointer to a
+ * pristine (and maybe freshly allocated) piece of storage
+ * ready for something fitting "key"
+ */
+static void *
+i_new_storep(void *store, ilbadm_key_code_t key)
+{
+ void *res;
+
+ switch (key) {
+ case ILB_KEY_SERVER:
+ case ILB_KEY_SERVRANGE:
+ case ILB_KEY_SERVERID:
+ res = (void *) i_new_sg_elem(store);
+ break;
+ default: res = NULL;
+ break;
+ }
+
+ return (res);
+}
+
+/*
+ * make sure everything that needs to be there is there
+ */
+ilbadm_status_t
+i_check_rule_spec(ilb_rule_data_t *rd)
+{
+ int32_t vip_af = rd->r_vip.ia_af;
+ ilb_ip_addr_t *prxy_src;
+
+ if (vip_af != AF_INET && vip_af != AF_INET6)
+ return (ILBADM_INVAL_AF);
+
+ if (*rd->r_sgname == '\0')
+ return (ILBADM_ENOSGNAME);
+
+ if (rd->r_algo == 0 || rd->r_topo == 0) {
+ ilbadm_err(gettext("lbalg or type is unspecified"));
+ return (ILBADM_LIBERR);
+ }
+
+ if (rd->r_topo == ILB_TOPO_NAT) {
+ prxy_src = &rd->r_nat_src_start;
+ if (prxy_src->ia_af != vip_af) {
+ ilbadm_err(gettext("proxy-src is either missing"
+ " or its address family does not"
+ " match that of the VIP address"));
+ return (ILBADM_LIBERR);
+ }
+ }
+ /* extend as necessary */
+
+ return (ILBADM_OK);
+}
+
+/*
+ * in parameter "sz" describes size (in bytes) of mask
+ */
+static int
+mask_to_prefixlen(const uchar_t *mask, const int sz)
+{
+ uchar_t c;
+ int i, j;
+ int len = 0;
+ int tmask;
+
+ /*
+ * for every byte in the mask, we start with most significant
+ * bit and work our way down to the least significant bit; as
+ * long as we find the bit set, we add 1 to the length. the
+ * first unset bit we encounter terminates this process
+ */
+ for (i = 0; i < sz; i++) {
+ c = mask[i];
+ tmask = 1 << 7;
+ for (j = 7; j >= 0; j--) {
+ if ((c & tmask) == 0)
+ return (len);
+ len++;
+ tmask >>= 1;
+ }
+ }
+ return (len);
+}
+
+int
+ilbadm_mask_to_prefixlen(ilb_ip_addr_t *ip)
+{
+ int af = ip->ia_af;
+ int len = 0;
+
+ assert(af == AF_INET || af == AF_INET6);
+ switch (af) {
+ case AF_INET:
+ len = mask_to_prefixlen((uchar_t *)&ip->ia_v4.s_addr,
+ sizeof (ip->ia_v4));
+ break;
+ case AF_INET6:
+ len = mask_to_prefixlen((uchar_t *)&ip->ia_v6.s6_addr,
+ sizeof (ip->ia_v6));
+ break;
+ }
+ return (len);
+}
+
+/* copied from ifconfig.c, changed to return symbolic constants */
+/*
+ * Convert a prefix length to a mask.
+ * Returns 1 if ok. 0 otherwise.
+ * Assumes the mask array is zero'ed by the caller.
+ */
+static boolean_t
+in_prefixlentomask(int prefixlen, int maxlen, uchar_t *mask)
+{
+ if (prefixlen < 0 || prefixlen > maxlen)
+ return (B_FALSE);
+
+ while (prefixlen > 0) {
+ if (prefixlen >= 8) {
+ *mask++ = 0xFF;
+ prefixlen -= 8;
+ continue;
+ }
+ *mask |= 1 << (8 - prefixlen);
+ prefixlen--;
+ }
+ return (B_TRUE);
+}
+
+ilbadm_status_t
+ilbadm_set_netmask(char *val, ilb_ip_addr_t *ip, int af)
+{
+ int prefixlen, maxval;
+ boolean_t r;
+ char *end;
+
+ assert(af == AF_INET || af == AF_INET6);
+
+ maxval = (af == AF_INET) ? 32 : 128;
+
+ if (*val == '/')
+ val++;
+ prefixlen = strtol(val, &end, 10);
+ if ((val == end) || (*end != '\0')) {
+ ilbadm_err(gettext("invalid pmask provided"));
+ return (ILBADM_LIBERR);
+ }
+
+ if (prefixlen < 1 || prefixlen > maxval) {
+ ilbadm_err(gettext("invalid pmask provided (AF mismatch?)"));
+ return (ILBADM_LIBERR);
+ }
+
+ switch (af) {
+ case AF_INET:
+ r = in_prefixlentomask(prefixlen, maxval,
+ (uchar_t *)&ip->ia_v4.s_addr);
+ break;
+ case AF_INET6:
+ r = in_prefixlentomask(prefixlen, maxval,
+ (uchar_t *)&ip->ia_v6.s6_addr);
+ break;
+ }
+ if (r != B_TRUE) {
+ ilbadm_err(gettext("cannot convert %s to a netmask"), val);
+ return (ILBADM_LIBERR);
+ }
+ ip->ia_af = af;
+ return (ILBADM_OK);
+}
+
+static ilbadm_status_t
+i_store_val(char *val, void *store, ilbadm_key_code_t keyword)
+{
+ ilbadm_status_t rc = ILBADM_OK;
+ void *storep = store;
+ ilb_rule_data_t *rd = NULL;
+ ilbadm_sgroup_t *sg = NULL;
+ ilb_hc_info_t *hc_info = NULL;
+ struct protoent *pe;
+ int64_t tmp_val;
+
+ if (*val == '\0')
+ return (ILBADM_NOKEYWORD_VAL);
+
+ /* some types need new storage, others don't */
+ switch (keyword) {
+ case ILB_KEY_SERVER:
+ case ILB_KEY_SERVERID:
+ sg = (ilbadm_sgroup_t *)store;
+ storep = i_new_storep(store, keyword);
+ break;
+ case ILB_KEY_HEALTHCHECK:
+ case ILB_KEY_SERVERGROUP:
+ rd = (ilb_rule_data_t *)store;
+ break;
+ case ILB_KEY_VIP: /* fallthrough */
+ case ILB_KEY_PORT: /* fallthrough */
+ case ILB_KEY_HCPORT: /* fallthrough */
+ case ILB_KEY_CONNDRAIN: /* fallthrough */
+ case ILB_KEY_NAT_TO: /* fallthrough */
+ case ILB_KEY_STICKY_TO: /* fallthrough */
+ case ILB_KEY_PROTOCOL: /* fallthrough */
+ case ILB_KEY_ALGORITHM: /* fallthrough */
+ case ILB_KEY_STICKY: /* fallthrough */
+ case ILB_KEY_TYPE: /* fallthrough */
+ case ILB_KEY_SRC: /* fallthrough */
+ rd = (ilb_rule_data_t *)store;
+ break;
+ case ILB_KEY_HC_TEST:
+ case ILB_KEY_HC_COUNT:
+ case ILB_KEY_HC_INTERVAL:
+ case ILB_KEY_HC_TIMEOUT:
+ hc_info = (ilb_hc_info_t *)store;
+ default: /* do nothing */
+ ;
+ }
+
+ switch (keyword) {
+ case ILB_KEY_SRC:
+ /*
+ * the proxy-src keyword is only valid for full NAT topology
+ * the value is either a single or a range of IP addresses.
+ */
+ if (rd->r_topo != ILB_TOPO_NAT) {
+ rc = ILBADM_INVAL_PROXY;
+ break;
+ }
+ rc = i_match_hostorip(storep, sg, val, OPT_NUMERIC_ONLY |
+ OPT_IP_RANGE | OPT_NAT, ILB_KEY_SRC);
+ break;
+ case ILB_KEY_SERVER:
+ rc = i_match_hostorip(storep, sg, val,
+ OPT_IP_RANGE | OPT_PORTS, ILB_KEY_SERVER);
+ break;
+ case ILB_KEY_SERVERID:
+ if (val[0] != ILB_SRVID_PREFIX)
+ rc = ILBADM_INVAL_SRVID;
+ else
+ rc = i_store_serverID(storep, val);
+ break;
+ case ILB_KEY_VIP: {
+ ilb_ip_addr_t *vip = &rd->r_vip;
+ addr_type_t at = numeric;
+ char *close = NULL;
+
+ /*
+ * we duplicate some functionality of i_match_hostorip
+ * here; that function is geared to mandate '[]' for IPv6
+ * addresses, which we want to relax here, so as not to
+ * make i_match_hostorip even longer, we do what we need
+ * here.
+ */
+ if (*val == '[') {
+ val++;
+ if ((close = strchr(val, (int)']')) == NULL) {
+ rc = ILBADM_INVAL_SYNTAX;
+ break;
+ }
+ *close = NULL;
+ }
+ rc = i_match_onehost(val, vip, &at);
+ /* re-assemble string as we found it */
+ if (close != NULL) {
+ *close = ']';
+ if (rc == ILBADM_OK && vip->ia_af != AF_INET6) {
+ ilbadm_err(gettext("use of '[]' only valid"
+ " with IPv6 addresses"));
+ rc = ILBADM_LIBERR;
+ }
+ }
+ break;
+ }
+ case ILB_KEY_CONNDRAIN:
+ tmp_val = strtoll(val, NULL, 10);
+ if (tmp_val <= 0 || tmp_val > UINT_MAX) {
+ rc = ILBADM_EINVAL;
+ break;
+ }
+ rd->r_conndrain = tmp_val;
+ break;
+ case ILB_KEY_NAT_TO:
+ tmp_val = strtoll(val, NULL, 10);
+ if (tmp_val < 0 || tmp_val > UINT_MAX) {
+ rc = ILBADM_EINVAL;
+ break;
+ }
+ rd->r_nat_timeout = tmp_val;
+ break;
+ case ILB_KEY_STICKY_TO:
+ tmp_val = strtoll(val, NULL, 10);
+ if (tmp_val <= 0 || tmp_val > UINT_MAX) {
+ rc = ILBADM_EINVAL;
+ break;
+ }
+ rd->r_sticky_timeout = tmp_val;
+ break;
+ case ILB_KEY_PORT:
+ if (isdigit(*val)) {
+ ilbadm_servnode_t sn;
+
+ bzero(&sn, sizeof (sn));
+ rc = i_match_hostorip((void *)&sn, sg, val,
+ OPT_PORTS_ONLY, ILB_KEY_PORT);
+ if (rc != ILBADM_OK)
+ break;
+ rd->r_minport = sn.s_spec.sd_minport;
+ rd->r_maxport = sn.s_spec.sd_maxport;
+ } else {
+ struct servent *se;
+
+ se = getservbyname(val, NULL);
+ if (se == NULL) {
+ rc = ILBADM_ENOSERVICE;
+ break;
+ }
+ rd->r_minport = se->s_port;
+ rd->r_maxport = 0;
+ }
+ break;
+ case ILB_KEY_HCPORT:
+ if (isdigit(*val)) {
+ int hcport = atoi(val);
+
+ if (hcport < 1 || hcport > 65535) {
+ ilbadm_err(gettext("illegal number for"
+ " hcport %s"), val);
+ rc = ILBADM_LIBERR;
+ break;
+ }
+ rd->r_hcport = htons(hcport);
+ rd->r_hcpflag = ILB_HCI_PROBE_FIX;
+ } else if (strcasecmp(val, "ANY") == 0) {
+ rd->r_hcport = 0;
+ rd->r_hcpflag = ILB_HCI_PROBE_ANY;
+ } else {
+ return (ILBADM_EINVAL);
+ }
+ break;
+ case ILB_KEY_PROTOCOL:
+ pe = getprotobyname(val);
+ if (pe == NULL)
+ rc = ILBADM_ENOPROTO;
+ else
+ rd->r_proto = pe->p_proto;
+ break;
+ case ILB_KEY_ALGORITHM:
+ rd->r_algo = i_val_from_str(val, &algo_types[0]);
+ if (rd->r_algo == ILBD_BAD_VAL)
+ rc = ILBADM_INVAL_ALG;
+ break;
+ case ILB_KEY_STICKY:
+ rd->r_flags |= ILB_FLAGS_RULE_STICKY;
+ /*
+ * CAVEAT: the use of r_vip.ia_af implies that the VIP
+ * *must* be specified on the commandline *before*
+ * the sticky mask.
+ */
+ if (AF_UNSPEC == rd->r_vip.ia_af) {
+ ilbadm_err(gettext("option '%s' requires that VIP be "
+ "specified first"), ilbadm_key_to_opt(keyword));
+ rc = ILBADM_LIBERR;
+ break;
+ }
+ rc = ilbadm_set_netmask(val, &rd->r_stickymask,
+ rd->r_vip.ia_af);
+ break;
+ case ILB_KEY_TYPE:
+ rd->r_topo = i_val_from_str(val, &topo_types[0]);
+ if (rd->r_topo == ILBD_BAD_VAL)
+ rc = ILBADM_INVAL_OPER;
+ break;
+ case ILB_KEY_SERVERGROUP:
+ (void) strlcpy(rd->r_sgname, (char *)val,
+ sizeof (rd->r_sgname));
+ break;
+ case ILB_KEY_HEALTHCHECK:
+ (void) strlcpy(rd->r_hcname, (char *)val,
+ sizeof (rd->r_hcname));
+ break;
+ case ILB_KEY_HC_TEST:
+ (void) strlcpy(hc_info->hci_test, (char *)val,
+ sizeof (hc_info->hci_test));
+ break;
+ case ILB_KEY_HC_COUNT:
+ if (isdigit(*val))
+ hc_info->hci_count = atoi(val);
+ else
+ return (ILBADM_EINVAL);
+ break;
+ case ILB_KEY_HC_INTERVAL:
+ if (isdigit(*val))
+ hc_info->hci_interval = atoi(val);
+ else
+ return (ILBADM_EINVAL);
+ break;
+ case ILB_KEY_HC_TIMEOUT:
+ if (isdigit(*val))
+ hc_info->hci_timeout = atoi(val);
+ else
+ return (ILBADM_EINVAL);
+ break;
+ default: rc = ILBADM_INVAL_KEYWORD;
+ break;
+ }
+
+ return (rc);
+}
+
+/*
+ * generic parsing function.
+ * parses "key=value[,value]" strings in "arg". keylist determines the
+ * list of valid keys in the LHS. keycode determines interpretation and
+ * storage in store
+ * XXXms: looks like "key=value[,value]" violates spec. needs a fix
+ */
+ilbadm_status_t
+i_parse_optstring(char *arg, void *store, ilbadm_key_name_t *keylist,
+ int flags, int *count)
+{
+ ilbadm_status_t rc = ILBADM_OK;
+ char *comma = NULL, *equals = NULL;
+ char *key, *nextkey, *val;
+ ilbadm_key_code_t keyword;
+ boolean_t is_value_list = flags & OPT_VALUE_LIST;
+ boolean_t assign_seen = B_FALSE;
+ int n;
+
+ key = arg;
+ n = 1;
+ /*
+ * Algorithm:
+ * 1. find any commas indicating and seperating current value
+ * from a following value
+ * 2. if we're expecting a list of values (seperated by commas)
+ * and have already seen the assignment, then
+ * get the next "value"
+ * 3. else (we're looking at the first element of the RHS)
+ * 4. find the '='
+ * 5. match the keyword to the list we were passed in
+ * 6. store the value.
+ */
+ while (key != NULL && *key != '\0') {
+ comma = equals = NULL;
+
+ /* 2 */
+ nextkey = strchr(key, (int)',');
+ if (nextkey != NULL) {
+ comma = nextkey++;
+ *comma = '\0';
+ }
+
+ /* 3a */
+ if (is_value_list && assign_seen) {
+ val = key;
+ /* 3b */
+ } else {
+ /* 4 */
+ equals = strchr(key, (int)'=');
+ if (equals == NULL) {
+ ilbadm_err("%s: %s", key,
+ ilbadm_errstr(ILBADM_ASSIGNREQ));
+ rc = ILBADM_LIBERR;
+ goto out;
+ }
+ val = equals + 1;
+ *equals = '\0';
+ assign_seen = B_TRUE;
+
+ /* 5 */
+ keyword = i_match_key(key, keylist);
+ if (keyword == ILB_KEY_BAD) {
+ ilbadm_err(gettext("bad keyword %s"), key);
+ rc = ILBADM_LIBERR;
+ goto out;
+ }
+ }
+
+ /* 6 */
+ rc = i_store_val(val, store, keyword);
+ if (rc != ILBADM_OK) {
+ ilbadm_err("%s: %s", key, ilbadm_errstr(rc));
+ /* Change to ILBADM_ILBERR to avoid more err msgs. */
+ rc = ILBADM_LIBERR;
+ goto out;
+ }
+
+ key = nextkey;
+ n++;
+ }
+
+out:
+ if (comma != NULL)
+ *comma = ',';
+ if (equals != NULL)
+ *equals = '=';
+ if (count != NULL)
+ *count = n;
+ return (rc);
+}
diff --git a/usr/src/cmd/mdb/common/modules/ip/ip.c b/usr/src/cmd/mdb/common/modules/ip/ip.c
index 9b901119ad..28f21efe1f 100644
--- a/usr/src/cmd/mdb/common/modules/ip/ip.c
+++ b/usr/src/cmd/mdb/common/modules/ip/ip.c
@@ -46,6 +46,11 @@
#include <sys/modhash_impl.h>
#include <inet/ip_ndp.h>
#include <inet/ip_if.h>
+#include <ilb.h>
+#include <ilb/ilb_impl.h>
+#include <ilb/ilb_stack.h>
+#include <ilb/ilb_nat.h>
+#include <ilb/ilb_conn.h>
#include <sys/dlpi.h>
#include <mdb/mdb_modapi.h>
@@ -223,6 +228,19 @@ static void conn_status_help(void);
static int srcid_status(uintptr_t, uint_t, int, const mdb_arg_t *);
+static int ilb_stacks_walk_step(mdb_walk_state_t *);
+static int ilb_rules_walk_init(mdb_walk_state_t *);
+static int ilb_rules_walk_step(mdb_walk_state_t *);
+static int ilb_servers_walk_init(mdb_walk_state_t *);
+static int ilb_servers_walk_step(mdb_walk_state_t *);
+static int ilb_nat_src_walk_init(mdb_walk_state_t *);
+static int ilb_nat_src_walk_step(mdb_walk_state_t *);
+static int ilb_conn_walk_init(mdb_walk_state_t *);
+static int ilb_conn_walk_step(mdb_walk_state_t *);
+static int ilb_sticky_walk_init(mdb_walk_state_t *);
+static int ilb_sticky_walk_step(mdb_walk_state_t *);
+static void ilb_common_walk_fini(mdb_walk_state_t *);
+
/*
* Given the kernel address of an ip_stack_t, return the stackid
*/
@@ -1412,6 +1430,20 @@ static const mdb_walker_t walkers[] = {
"ips_ipcl_proto_fanout_v6",
ipcl_hash_walk_init, ipcl_hash_walk_step,
ipcl_hash_walk_fini, &proto_v6_hash_arg},
+ { "ilb_stacks", "walk ilb_stack_t",
+ ip_stacks_walk_init, ilb_stacks_walk_step, NULL },
+ { "ilb_rules", "walk ilb rules in a given ilb_stack_t",
+ ilb_rules_walk_init, ilb_rules_walk_step, NULL },
+ { "ilb_servers", "walk server in a given ilb_rule_t",
+ ilb_servers_walk_init, ilb_servers_walk_step, NULL },
+ { "ilb_nat_src", "walk NAT source table of a given ilb_stack_t",
+ ilb_nat_src_walk_init, ilb_nat_src_walk_step,
+ ilb_common_walk_fini },
+ { "ilb_conns", "walk NAT table of a given ilb_stack_t",
+ ilb_conn_walk_init, ilb_conn_walk_step, ilb_common_walk_fini },
+ { "ilb_stickys", "walk sticky table of a given ilb_stack_t",
+ ilb_sticky_walk_init, ilb_sticky_walk_step,
+ ilb_common_walk_fini },
{ NULL }
};
@@ -2543,3 +2575,474 @@ srcid_status(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
}
return (DCMD_OK);
}
+
+static int
+ilb_stacks_walk_step(mdb_walk_state_t *wsp)
+{
+ uintptr_t kaddr;
+ netstack_t nss;
+
+ if (mdb_vread(&nss, sizeof (nss), wsp->walk_addr) == -1) {
+ mdb_warn("can't read netstack at %p", wsp->walk_addr);
+ return (WALK_ERR);
+ }
+ kaddr = (uintptr_t)nss.netstack_modules[NS_ILB];
+
+ return (wsp->walk_callback(kaddr, wsp->walk_layer, wsp->walk_cbdata));
+}
+
+static int
+ilb_rules_walk_init(mdb_walk_state_t *wsp)
+{
+ ilb_stack_t ilbs;
+
+ if (wsp->walk_addr == NULL)
+ return (WALK_ERR);
+
+ if (mdb_vread(&ilbs, sizeof (ilbs), wsp->walk_addr) == -1) {
+ mdb_warn("failed to read ilb_stack_t at %p", wsp->walk_addr);
+ return (WALK_ERR);
+ }
+ if ((wsp->walk_addr = (uintptr_t)ilbs.ilbs_rule_head) != NULL)
+ return (WALK_NEXT);
+ else
+ return (WALK_DONE);
+}
+
+static int
+ilb_rules_walk_step(mdb_walk_state_t *wsp)
+{
+ ilb_rule_t rule;
+ int status;
+
+ if (mdb_vread(&rule, sizeof (rule), wsp->walk_addr) == -1) {
+ mdb_warn("failed to read ilb_rule_t at %p", wsp->walk_addr);
+ return (WALK_ERR);
+ }
+ status = wsp->walk_callback(wsp->walk_addr, &rule, wsp->walk_cbdata);
+ if (status != WALK_NEXT)
+ return (status);
+ if ((wsp->walk_addr = (uintptr_t)rule.ir_next) == NULL)
+ return (WALK_DONE);
+ else
+ return (WALK_NEXT);
+}
+
+static int
+ilb_servers_walk_init(mdb_walk_state_t *wsp)
+{
+ ilb_rule_t rule;
+
+ if (wsp->walk_addr == NULL)
+ return (WALK_ERR);
+
+ if (mdb_vread(&rule, sizeof (rule), wsp->walk_addr) == -1) {
+ mdb_warn("failed to read ilb_rule_t at %p", wsp->walk_addr);
+ return (WALK_ERR);
+ }
+ if ((wsp->walk_addr = (uintptr_t)rule.ir_servers) != NULL)
+ return (WALK_NEXT);
+ else
+ return (WALK_DONE);
+}
+
+static int
+ilb_servers_walk_step(mdb_walk_state_t *wsp)
+{
+ ilb_server_t server;
+ int status;
+
+ if (mdb_vread(&server, sizeof (server), wsp->walk_addr) == -1) {
+ mdb_warn("failed to read ilb_server_t at %p", wsp->walk_addr);
+ return (WALK_ERR);
+ }
+ status = wsp->walk_callback(wsp->walk_addr, &server, wsp->walk_cbdata);
+ if (status != WALK_NEXT)
+ return (status);
+ if ((wsp->walk_addr = (uintptr_t)server.iser_next) == NULL)
+ return (WALK_DONE);
+ else
+ return (WALK_NEXT);
+}
+
+/*
+ * Helper structure for ilb_nat_src walker. It stores the current index of the
+ * nat src table.
+ */
+typedef struct {
+ ilb_stack_t ilbs;
+ int idx;
+} ilb_walk_t;
+
+/* Copy from list.c */
+#define list_object(a, node) ((void *)(((char *)node) - (a)->list_offset))
+
+static int
+ilb_nat_src_walk_init(mdb_walk_state_t *wsp)
+{
+ int i;
+ ilb_walk_t *ns_walk;
+ ilb_nat_src_entry_t *entry = NULL;
+
+ if (wsp->walk_addr == NULL)
+ return (WALK_ERR);
+
+ ns_walk = mdb_alloc(sizeof (ilb_walk_t), UM_SLEEP);
+ if (mdb_vread(&ns_walk->ilbs, sizeof (ns_walk->ilbs),
+ wsp->walk_addr) == -1) {
+ mdb_warn("failed to read ilb_stack_t at %p", wsp->walk_addr);
+ mdb_free(ns_walk, sizeof (ilb_walk_t));
+ return (WALK_ERR);
+ }
+
+ if (ns_walk->ilbs.ilbs_nat_src == NULL) {
+ mdb_free(ns_walk, sizeof (ilb_walk_t));
+ return (WALK_DONE);
+ }
+
+ wsp->walk_data = ns_walk;
+ for (i = 0; i < ns_walk->ilbs.ilbs_nat_src_hash_size; i++) {
+ list_t head;
+ char *khead;
+
+ /* Read in the nsh_head in the i-th element of the array. */
+ khead = (char *)ns_walk->ilbs.ilbs_nat_src + i *
+ sizeof (ilb_nat_src_hash_t);
+ if (mdb_vread(&head, sizeof (list_t), (uintptr_t)khead) == -1) {
+ mdb_warn("failed to read ilbs_nat_src at %p\n", khead);
+ return (WALK_ERR);
+ }
+
+ /*
+ * Note that list_next points to a kernel address and we need
+ * to compare list_next with the kernel address of the list
+ * head. So we need to calculate the address manually.
+ */
+ if ((char *)head.list_head.list_next != khead +
+ offsetof(list_t, list_head)) {
+ entry = list_object(&head, head.list_head.list_next);
+ break;
+ }
+ }
+
+ if (entry == NULL)
+ return (WALK_DONE);
+
+ wsp->walk_addr = (uintptr_t)entry;
+ ns_walk->idx = i;
+ return (WALK_NEXT);
+}
+
+static int
+ilb_nat_src_walk_step(mdb_walk_state_t *wsp)
+{
+ int status;
+ ilb_nat_src_entry_t entry, *next_entry;
+ ilb_walk_t *ns_walk;
+ ilb_stack_t *ilbs;
+ list_t head;
+ char *khead;
+ int i;
+
+ if (mdb_vread(&entry, sizeof (ilb_nat_src_entry_t),
+ wsp->walk_addr) == -1) {
+ mdb_warn("failed to read ilb_nat_src_entry_t at %p",
+ wsp->walk_addr);
+ return (WALK_ERR);
+ }
+ status = wsp->walk_callback(wsp->walk_addr, &entry, wsp->walk_cbdata);
+ if (status != WALK_NEXT)
+ return (status);
+
+ ns_walk = (ilb_walk_t *)wsp->walk_data;
+ ilbs = &ns_walk->ilbs;
+ i = ns_walk->idx;
+
+ /* Read in the nsh_head in the i-th element of the array. */
+ khead = (char *)ilbs->ilbs_nat_src + i * sizeof (ilb_nat_src_hash_t);
+ if (mdb_vread(&head, sizeof (list_t), (uintptr_t)khead) == -1) {
+ mdb_warn("failed to read ilbs_nat_src at %p\n", khead);
+ return (WALK_ERR);
+ }
+
+ /*
+ * Check if there is still entry in the current list.
+ *
+ * Note that list_next points to a kernel address and we need to
+ * compare list_next with the kernel address of the list head.
+ * So we need to calculate the address manually.
+ */
+ if ((char *)entry.nse_link.list_next != khead + offsetof(list_t,
+ list_head)) {
+ wsp->walk_addr = (uintptr_t)list_object(&head,
+ entry.nse_link.list_next);
+ return (WALK_NEXT);
+ }
+
+ /* Start with the next bucket in the array. */
+ next_entry = NULL;
+ for (i++; i < ilbs->ilbs_nat_src_hash_size; i++) {
+ khead = (char *)ilbs->ilbs_nat_src + i *
+ sizeof (ilb_nat_src_hash_t);
+ if (mdb_vread(&head, sizeof (list_t), (uintptr_t)khead) == -1) {
+ mdb_warn("failed to read ilbs_nat_src at %p\n", khead);
+ return (WALK_ERR);
+ }
+
+ if ((char *)head.list_head.list_next != khead +
+ offsetof(list_t, list_head)) {
+ next_entry = list_object(&head,
+ head.list_head.list_next);
+ break;
+ }
+ }
+
+ if (next_entry == NULL)
+ return (WALK_DONE);
+
+ wsp->walk_addr = (uintptr_t)next_entry;
+ ns_walk->idx = i;
+ return (WALK_NEXT);
+}
+
+static void
+ilb_common_walk_fini(mdb_walk_state_t *wsp)
+{
+ ilb_walk_t *walk;
+
+ walk = (ilb_walk_t *)wsp->walk_data;
+ if (walk == NULL)
+ return;
+ mdb_free(walk, sizeof (ilb_walk_t *));
+}
+
+static int
+ilb_conn_walk_init(mdb_walk_state_t *wsp)
+{
+ int i;
+ ilb_walk_t *conn_walk;
+ ilb_conn_hash_t head;
+
+ if (wsp->walk_addr == NULL)
+ return (WALK_ERR);
+
+ conn_walk = mdb_alloc(sizeof (ilb_walk_t), UM_SLEEP);
+ if (mdb_vread(&conn_walk->ilbs, sizeof (conn_walk->ilbs),
+ wsp->walk_addr) == -1) {
+ mdb_warn("failed to read ilb_stack_t at %p", wsp->walk_addr);
+ mdb_free(conn_walk, sizeof (ilb_walk_t));
+ return (WALK_ERR);
+ }
+
+ if (conn_walk->ilbs.ilbs_c2s_conn_hash == NULL) {
+ mdb_free(conn_walk, sizeof (ilb_walk_t));
+ return (WALK_DONE);
+ }
+
+ wsp->walk_data = conn_walk;
+ for (i = 0; i < conn_walk->ilbs.ilbs_conn_hash_size; i++) {
+ char *khead;
+
+ /* Read in the nsh_head in the i-th element of the array. */
+ khead = (char *)conn_walk->ilbs.ilbs_c2s_conn_hash + i *
+ sizeof (ilb_conn_hash_t);
+ if (mdb_vread(&head, sizeof (ilb_conn_hash_t),
+ (uintptr_t)khead) == -1) {
+ mdb_warn("failed to read ilbs_c2s_conn_hash at %p\n",
+ khead);
+ return (WALK_ERR);
+ }
+
+ if (head.ilb_connp != NULL)
+ break;
+ }
+
+ if (head.ilb_connp == NULL)
+ return (WALK_DONE);
+
+ wsp->walk_addr = (uintptr_t)head.ilb_connp;
+ conn_walk->idx = i;
+ return (WALK_NEXT);
+}
+
+static int
+ilb_conn_walk_step(mdb_walk_state_t *wsp)
+{
+ int status;
+ ilb_conn_t conn;
+ ilb_walk_t *conn_walk;
+ ilb_stack_t *ilbs;
+ ilb_conn_hash_t head;
+ char *khead;
+ int i;
+
+ if (mdb_vread(&conn, sizeof (ilb_conn_t), wsp->walk_addr) == -1) {
+ mdb_warn("failed to read ilb_conn_t at %p", wsp->walk_addr);
+ return (WALK_ERR);
+ }
+
+ status = wsp->walk_callback(wsp->walk_addr, &conn, wsp->walk_cbdata);
+ if (status != WALK_NEXT)
+ return (status);
+
+ conn_walk = (ilb_walk_t *)wsp->walk_data;
+ ilbs = &conn_walk->ilbs;
+ i = conn_walk->idx;
+
+ /* Check if there is still entry in the current list. */
+ if (conn.conn_c2s_next != NULL) {
+ wsp->walk_addr = (uintptr_t)conn.conn_c2s_next;
+ return (WALK_NEXT);
+ }
+
+ /* Start with the next bucket in the array. */
+ for (i++; i < ilbs->ilbs_conn_hash_size; i++) {
+ khead = (char *)ilbs->ilbs_c2s_conn_hash + i *
+ sizeof (ilb_conn_hash_t);
+ if (mdb_vread(&head, sizeof (ilb_conn_hash_t),
+ (uintptr_t)khead) == -1) {
+ mdb_warn("failed to read ilbs_c2s_conn_hash at %p\n",
+ khead);
+ return (WALK_ERR);
+ }
+
+ if (head.ilb_connp != NULL)
+ break;
+ }
+
+ if (head.ilb_connp == NULL)
+ return (WALK_DONE);
+
+ wsp->walk_addr = (uintptr_t)head.ilb_connp;
+ conn_walk->idx = i;
+ return (WALK_NEXT);
+}
+
+static int
+ilb_sticky_walk_init(mdb_walk_state_t *wsp)
+{
+ int i;
+ ilb_walk_t *sticky_walk;
+ ilb_sticky_t *st = NULL;
+
+ if (wsp->walk_addr == NULL)
+ return (WALK_ERR);
+
+ sticky_walk = mdb_alloc(sizeof (ilb_walk_t), UM_SLEEP);
+ if (mdb_vread(&sticky_walk->ilbs, sizeof (sticky_walk->ilbs),
+ wsp->walk_addr) == -1) {
+ mdb_warn("failed to read ilb_stack_t at %p", wsp->walk_addr);
+ mdb_free(sticky_walk, sizeof (ilb_walk_t));
+ return (WALK_ERR);
+ }
+
+ if (sticky_walk->ilbs.ilbs_sticky_hash == NULL) {
+ mdb_free(sticky_walk, sizeof (ilb_walk_t));
+ return (WALK_DONE);
+ }
+
+ wsp->walk_data = sticky_walk;
+ for (i = 0; i < sticky_walk->ilbs.ilbs_sticky_hash_size; i++) {
+ list_t head;
+ char *khead;
+
+ /* Read in the nsh_head in the i-th element of the array. */
+ khead = (char *)sticky_walk->ilbs.ilbs_sticky_hash + i *
+ sizeof (ilb_sticky_hash_t);
+ if (mdb_vread(&head, sizeof (list_t), (uintptr_t)khead) == -1) {
+ mdb_warn("failed to read ilbs_sticky_hash at %p\n",
+ khead);
+ return (WALK_ERR);
+ }
+
+ /*
+ * Note that list_next points to a kernel address and we need
+ * to compare list_next with the kernel address of the list
+ * head. So we need to calculate the address manually.
+ */
+ if ((char *)head.list_head.list_next != khead +
+ offsetof(list_t, list_head)) {
+ st = list_object(&head, head.list_head.list_next);
+ break;
+ }
+ }
+
+ if (st == NULL)
+ return (WALK_DONE);
+
+ wsp->walk_addr = (uintptr_t)st;
+ sticky_walk->idx = i;
+ return (WALK_NEXT);
+}
+
+static int
+ilb_sticky_walk_step(mdb_walk_state_t *wsp)
+{
+ int status;
+ ilb_sticky_t st, *st_next;
+ ilb_walk_t *sticky_walk;
+ ilb_stack_t *ilbs;
+ list_t head;
+ char *khead;
+ int i;
+
+ if (mdb_vread(&st, sizeof (ilb_sticky_t), wsp->walk_addr) == -1) {
+ mdb_warn("failed to read ilb_sticky_t at %p", wsp->walk_addr);
+ return (WALK_ERR);
+ }
+
+ status = wsp->walk_callback(wsp->walk_addr, &st, wsp->walk_cbdata);
+ if (status != WALK_NEXT)
+ return (status);
+
+ sticky_walk = (ilb_walk_t *)wsp->walk_data;
+ ilbs = &sticky_walk->ilbs;
+ i = sticky_walk->idx;
+
+ /* Read in the nsh_head in the i-th element of the array. */
+ khead = (char *)ilbs->ilbs_sticky_hash + i * sizeof (ilb_sticky_hash_t);
+ if (mdb_vread(&head, sizeof (list_t), (uintptr_t)khead) == -1) {
+ mdb_warn("failed to read ilbs_sticky_hash at %p\n", khead);
+ return (WALK_ERR);
+ }
+
+ /*
+ * Check if there is still entry in the current list.
+ *
+ * Note that list_next points to a kernel address and we need to
+ * compare list_next with the kernel address of the list head.
+ * So we need to calculate the address manually.
+ */
+ if ((char *)st.list.list_next != khead + offsetof(list_t,
+ list_head)) {
+ wsp->walk_addr = (uintptr_t)list_object(&head,
+ st.list.list_next);
+ return (WALK_NEXT);
+ }
+
+ /* Start with the next bucket in the array. */
+ st_next = NULL;
+ for (i++; i < ilbs->ilbs_nat_src_hash_size; i++) {
+ khead = (char *)ilbs->ilbs_sticky_hash + i *
+ sizeof (ilb_sticky_hash_t);
+ if (mdb_vread(&head, sizeof (list_t), (uintptr_t)khead) == -1) {
+ mdb_warn("failed to read ilbs_sticky_hash at %p\n",
+ khead);
+ return (WALK_ERR);
+ }
+
+ if ((char *)head.list_head.list_next != khead +
+ offsetof(list_t, list_head)) {
+ st_next = list_object(&head,
+ head.list_head.list_next);
+ break;
+ }
+ }
+
+ if (st_next == NULL)
+ return (WALK_DONE);
+
+ wsp->walk_addr = (uintptr_t)st_next;
+ sticky_walk->idx = i;
+ return (WALK_NEXT);
+}
diff --git a/usr/src/cmd/mdb/intel/amd64/ip/Makefile b/usr/src/cmd/mdb/intel/amd64/ip/Makefile
index 6789adbba9..5112aeaac0 100644
--- a/usr/src/cmd/mdb/intel/amd64/ip/Makefile
+++ b/usr/src/cmd/mdb/intel/amd64/ip/Makefile
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -20,10 +19,9 @@
# CDDL HEADER END
#
#
-# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#ident "%Z%%M% %I% %E% SMI"
MODULE = ip.so
MDBTGT = kvm
@@ -34,3 +32,5 @@ include ../../../../Makefile.cmd
include ../../../../Makefile.cmd.64
include ../../Makefile.amd64
include ../../../Makefile.module
+
+CPPFLAGS += -I$(SRC)/uts/common/inet
diff --git a/usr/src/cmd/mdb/intel/ia32/ip/Makefile b/usr/src/cmd/mdb/intel/ia32/ip/Makefile
index b655ee078f..062b2b6cf2 100644
--- a/usr/src/cmd/mdb/intel/ia32/ip/Makefile
+++ b/usr/src/cmd/mdb/intel/ia32/ip/Makefile
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -20,10 +19,9 @@
# CDDL HEADER END
#
#
-# Copyright (c) 1998-1999 by Sun Microsystems, Inc.
-# All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
#
-#ident "%Z%%M% %I% %E% SMI"
MODULE = ip.so
MDBTGT = kvm
@@ -33,3 +31,5 @@ MODSRCS = ip.c
include ../../../../Makefile.cmd
include ../../Makefile.ia32
include ../../../Makefile.module
+
+CPPFLAGS += -I$(SRC)/uts/common/inet
diff --git a/usr/src/cmd/mdb/sparc/v9/ip/Makefile b/usr/src/cmd/mdb/sparc/v9/ip/Makefile
index 1694830450..7d5ab01461 100644
--- a/usr/src/cmd/mdb/sparc/v9/ip/Makefile
+++ b/usr/src/cmd/mdb/sparc/v9/ip/Makefile
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -20,10 +19,9 @@
# CDDL HEADER END
#
#
-# Copyright (c) 1998-2000 by Sun Microsystems, Inc.
-# All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
#
-#ident "%Z%%M% %I% %E% SMI"
MODULE = ip.so
MDBTGT = kvm
@@ -34,3 +32,5 @@ include ../../../../Makefile.cmd
include ../../../../Makefile.cmd.64
include ../../Makefile.sparcv9
include ../../../Makefile.module
+
+CPPFLAGS += -I$(SRC)/uts/common/inet
diff --git a/usr/src/head/auth_list.h b/usr/src/head/auth_list.h
index d1ac5bc55e..c9b68fd1ef 100644
--- a/usr/src/head/auth_list.h
+++ b/usr/src/head/auth_list.h
@@ -45,6 +45,8 @@ extern "C" {
#define LINK_SEC_AUTH "solaris.network.link.security"
#define MAILQ_AUTH "solaris.mail.mailq"
#define NET_AUTOCONF_AUTH "solaris.network.autoconf"
+#define NET_ILB_CONFIG_AUTH "solaris.network.ilb.config"
+#define NET_ILB_ENABLE_AUTH "solaris.network.ilb.enable"
#define SET_DATE_AUTH "solaris.system.date"
#define WIFI_CONFIG_AUTH "solaris.network.wifi.config"
#define WIFI_WEP_AUTH "solaris.network.wifi.wep"
diff --git a/usr/src/lib/Makefile b/usr/src/lib/Makefile
index 0fac076985..32e259be65 100644
--- a/usr/src/lib/Makefile
+++ b/usr/src/lib/Makefile
@@ -255,6 +255,7 @@ SUBDIRS += \
libvscan \
libgrubmgmt \
smbsrv \
+ libilb \
scsi \
mms \
libima \
@@ -329,6 +330,7 @@ MSGSUBDIRS= \
libidmap \
libinetcfg \
libipmp \
+ libilb \
libinetutil \
libinstzones \
libnsl \
@@ -487,6 +489,7 @@ HDRSUBDIRS= \
libvscan \
libgrubmgmt \
smbsrv \
+ libilb \
scsi \
hbaapi \
smhba \
@@ -644,6 +647,7 @@ pyzfs: libnvpair libsec libidmap libzfs
libreparse: libnvpair
libhotplug: libnvpair
cfgadm_plugins: libhotplug
+libilb: libsocket
#
# The reason this rule checks for the existence of the
diff --git a/usr/src/lib/libbsm/audit_event.txt b/usr/src/lib/libbsm/audit_event.txt
index 219ca21a69..3bdacb0477 100644
--- a/usr/src/lib/libbsm/audit_event.txt
+++ b/usr/src/lib/libbsm/audit_event.txt
@@ -511,6 +511,21 @@
6300:AUE_nwam_attach:attach nwam user:ss
6301:AUE_nwam_detach:detach nwam user:ss
#
+# ilbd(1M) events
+#
+6310:AUE_ilb_create_healthcheck:create ILB health check:as
+6311:AUE_ilb_delete_healthcheck:delete ILB health check:as
+6312:AUE_ilb_create_rule:create ILB rule:as
+6313:AUE_ilb_delete_rule:delete ILB rule:as
+6314:AUE_ilb_disable_rule:disable ILB rule:as
+6315:AUE_ilb_enable_rule:enable ILB rule:as
+6316:AUE_ilb_add_server:add ILB server:as
+6317:AUE_ilb_disable_server:disable ILB server:as
+6318:AUE_ilb_enable_server:enable ILB server:as
+6319:AUE_ilb_remove_server:remove ILB server:as
+6320:AUE_ilb_create_servergroup:create ILB server group:as
+6321:AUE_ilb_delete_servergroup:delete ILB server group:as
+#
# TCSD(8) events
#
6400:AUE_tpm_takeownership:take ownership of TPM:as
diff --git a/usr/src/lib/libbsm/common/adt.xml b/usr/src/lib/libbsm/common/adt.xml
index 8ab6e24422..6acabbff70 100644
--- a/usr/src/lib/libbsm/common/adt.xml
+++ b/usr/src/lib/libbsm/common/adt.xml
@@ -2091,8 +2091,450 @@ Use is subject to license terms.
</entry>
</event>
+ <event id="AUE_ilb_create_healthcheck" header="0" idNo="120" omit="JNI">
+ <title>Integrated Loadbalancer</title>
+ <program>/usr/sbin/ilbadm</program>
+ <see>ilbadm(1m)</see>
+ <entry id="subject">
+ <internal token="subject"/>
+ <external opt="none"/>
+ </entry>
+ <entry id="auth_used">
+ <internal token="uauth"/>
+ <external opt="required" type="char *"/>
+ <comment>authorization used</comment>
+ </entry>
+ <entry id="hc_test">
+ <internal token="path"/>
+ <external opt="required" type="char *"/>
+ <comment>healthcheck type-PING,TCP,UDP or 3rd party script</comment>
+ </entry>
+ <entry id="hc_name">
+ <internal token="text"/>
+ <external opt="required" type="char *"/>
+ <comment>healthcheck name</comment>
+ </entry>
+ <entry id="hc_timeout">
+ <internal token="text"/>
+ <external opt="required" type="int32_t" />
+ <comment>timeout(secs) to kill a hung healthcheck probe
+ - 0 means default value (see man page)
+ </comment>
+ </entry>
+ <entry id="hc_count">
+ <internal token="text"/>
+ <external opt="required" type="int"/>
+ <comment>number of times to run a health check probe
+ before declaring a server to be dead - 0 means
+ default value (see man page)
+ </comment>
+ </entry>
+ <entry id="hc_interval">
+ <internal token="text"/>
+ <external opt="required" type="int32_t"/>
+ <comment>time(secs) between 2 healthcheck events -
+ 0 means default value(see man page)
+ </comment>
+ </entry>
+ <entry id="return">
+ <internal token="return"/>
+ <external opt="none"/>
+ </entry>
+ </event>
+
+ <event id="AUE_ilb_delete_healthcheck" header="0" idNo="121" omit="JNI">
+ <title>Integrated Loadbalancer</title>
+ <program>/usr/sbin/ilbadm</program>
+ <see>ilbadm(1m)</see>
+ <entry id="subject">
+ <internal token="subject"/>
+ <external opt="none"/>
+ </entry>
+ <entry id="auth_used">
+ <internal token="uauth"/>
+ <external opt="required" type="char *"/>
+ <comment>authorization used</comment>
+ </entry>
+ <entry id="hc_name">
+ <internal token="text"/>
+ <external opt="required" type="char *"/>
+ <comment>healthcheck name</comment>
+ </entry>
+ <entry id="return">
+ <internal token="return"/>
+ <external opt="none"/>
+ </entry>
+ </event>
+
+ <!--
+ virtual_address and proxy-src token are set to be char *.
+ But they should be in6_addr See audit bug 6864075 .
+ -->
+ <event id="AUE_ilb_create_rule" header="0" idNo="122" omit="JNI">
+ <title>Integrated Loadbalancer</title>
+ <program>/usr/sbin/ilbadm</program>
+ <see>ilbadm(1m)</see>
+ <entry id="subject">
+ <internal token="subject"/>
+ <external opt="none"/>
+ </entry>
+ <entry id="auth_used">
+ <internal token="uauth"/>
+ <external opt="required" type="char *"/>
+ <comment>authorization used</comment>
+ </entry>
+ <entry id="virtual_ipaddress">
+ <internal token="text"/>
+ <external opt="required" type="char *"/>
+ <comment>LB virtual IP address</comment>
+ </entry>
+ <entry id="min_port">
+ <internal token="text"/>
+ <external opt="required" type="uint32_t"/>
+ <comment>minimum value in port range</comment>
+ </entry>
+ <entry id="max_port">
+ <internal token="text"/>
+ <external opt="required" type="uint32_t"/>
+ <comment>maximum value in port range - max=min means single
+ port is specified
+ </comment>
+ </entry>
+ <entry id="protocol">
+ <internal token="text"/>
+ <external opt="required" type="char *"/>
+ <comment>protocol</comment>
+ </entry>
+ <entry id="algo_optype">
+ <internal token="text"/>
+ <external opt="required" type="char *"/>
+ <comment>[rr,hip,hipp,hipv],[dsr,nat,half-nat]</comment>
+ </entry>
+ <entry id="proxy_src">
+ <internal token="text"/>
+ <external opt="optional" type="char *"/>
+ <comment>proxy source address for NAT - may be single
+ address or a address range
+ </comment>
+ </entry>
+ <entry id="persist_mask">
+ <internal token="text"/>
+ <external opt="required" type="char *"/>
+ <comment>prefix length</comment>
+ </entry>
+ <entry id="hcname">
+ <internal token="text"/>
+ <external opt="optional" type="char *"/>
+ <comment>healthcheck name</comment>
+ </entry>
+ <entry id="hcport">
+ <internal token="text"/>
+ <external opt="optional" type="char *"/>
+ <comment>healthcheck port - ANY(dynamically determined by ilbd)
+ or a positive integer
+ </comment>
+ </entry>
+ <entry id="conndrain_timeout">
+ <internal token="text"/>
+ <external opt="required" type="uint32_t"/>
+ <comment>connection timeout for NAT/half-NAT in sec. - 0 means
+ no forced removal)
+ </comment>
+ </entry>
+ <entry id="nat_timeout">
+ <internal token="text"/>
+ <external opt="required" type="uint32_t"/>
+ <comment>nat entry timeout for NAT/half-NAT in sec - 0 means
+ default value(see man page)
+ </comment>
+ </entry>
+ <entry id="persist_timeout">
+ <internal token="text"/>
+ <external opt="required" type="uint32_t"/>
+ <comment>session persistence mapping in sec - 0 means no
+ persistence
+ </comment>
+ </entry>
+ <entry id="server_group">
+ <internal token="text"/>
+ <external opt="required" type="char *"/>
+ <comment>server group name</comment>
+ </entry>
+ <entry id="rule_name">
+ <internal token="text"/>
+ <external opt="required" type="char *"/>
+ <comment>rule name</comment>
+ </entry>
+ <entry id="return">
+ <internal token="return"/>
+ <external opt="none"/>
+ </entry>
+ </event>
+
+<!-- generic ILB rule event -->
+
+ <event id="AUE_generic_ILB_rule" type="generic" omit="always">
+ <entry id="subject">
+ <internal token="subject"/>
+ <external opt="none"/>
+ </entry>
+ <entry id="auth_used">
+ <internal token="uauth"/>
+ <external opt="required" type="char *"/>
+ <comment>authorization used</comment>
+ </entry>
+ <entry id="rule_name">
+ <internal token="text"/>
+ <external opt="required" type="char *"/>
+ <comment>rule name - "all" means all rules</comment>
+ </entry>
+ <entry id="return">
+ <internal token="return"/>
+ <external opt="none"/>
+ </entry>
+ </event>
+
+<!-- instances of the ILB generic rule event. -->
+ <event id="AUE_ilb_delete_rule" instance_of="AUE_generic_ILB_rule"
+ header="0" idNo="123">
+ <title>Integrated Loadbalancer</title>
+ <program>/usr/sbin/ilbadm</program>
+ <see>ilbadm(1m)</see>
+ </event>
+
+ <event id="AUE_ilb_disable_rule" instance_of="AUE_generic_ILB_rule"
+ header="0" idNo="124">
+ <title>Integrated Loadbalancer</title>
+ <program>/usr/sbin/ilbadm</program>
+ <see>ilbadm(1m)</see>
+ </event>
+
+ <event id="AUE_ilb_enable_rule" instance_of="AUE_generic_ILB_rule"
+ header="0" idNo="125">
+ <title>Integrated Loadbalancer</title>
+ <program>/usr/sbin/ilbadm</program>
+ <see>ilbadm(1m)</see>
+ </event>
+
+ <!--
+ server_ipaddress token is set to be char *. But it should be
+ in6_addr See audit bug 6864075.
+ -->
+ <event id="AUE_ilb_add_server" header="0" idNo="126" omit="JNI">
+ <title>Integrated Loadbalancer</title>
+ <program>/usr/sbin/ilbadm</program>
+ <see>ilbadm(1m)</see>
+ <entry id="subject">
+ <internal token="subject"/>
+ <external opt="none"/>
+ </entry>
+ <entry id="auth_used">
+ <internal token="uauth"/>
+ <external opt="required" type="char *"/>
+ <comment>authorization used</comment>
+ </entry>
+ <entry id="server_ipaddress">
+ <internal token="text"/>
+ <external opt="required" type="char *"/>
+ <comment>IP address</comment>
+ </entry>
+ <entry id="server_id">
+ <internal token="text"/>
+ <external opt="optional" type="char *"/>
+ <comment>serverid that corresponds IP address - empty
+ if authorization fails, user specified IP address
+ is invalid or server cannot be added because
+ server group is full
+ </comment>
+ </entry>
+ <entry id="server_group">
+ <internal token="text"/>
+ <external opt="required" type="char *"/>
+ <comment>server group name</comment>
+ </entry>
+ <entry id="server_minport">
+ <internal token="text"/>
+ <external opt="optional" type="uint32_t" />
+ <comment>server's minimum value in port range - empty
+ means default value (see man page)
+ </comment>
+ </entry>
+ <entry id="server_maxport">
+ <internal token="text"/>
+ <external opt="optional" type="uint32_t" />
+ <comment>server's maximum value in port range - empty
+ means default value(see man page)
+ </comment>
+ </entry>
+ <entry id="return">
+ <internal token="return"/>
+ <external opt="none"/>
+ </entry>
+ </event>
+
+ <!--
+ server_ipaddress token is set to be char *. But it should be
+ in6_addr See audit bug 6864075.
+ -->
+ <event id="AUE_ilb_disable_server" header="0" idNo="127" omit="JNI">
+ <title>Integrated Loadbalancer</title>
+ <program>/usr/sbin/ilbadm</program>
+ <see>ilbadm(1m)</see>
+ <entry id="subject">
+ <internal token="subject"/>
+ <external opt="none"/>
+ </entry>
+ <entry id="auth_used">
+ <internal token="uauth"/>
+ <external opt="required" type="char *"/>
+ <comment>authorization used</comment>
+ </entry>
+ <entry id="server_id">
+ <internal token="text"/>
+ <external opt="required" type="char *"/>
+ <comment>serverid</comment>
+ </entry>
+ <entry id="server_ipaddress">
+ <internal token="text"/>
+ <external opt="optional" type="char *"/>
+ <comment>IPaddr corresponding to the serverid - empty
+ if authorization fails, or user specified serverid
+ is nonexistent
+ </comment>
+ </entry>
+ <entry id="return">
+ <internal token="return"/>
+ <external opt="none"/>
+ </entry>
+ </event>
+
+ <!--
+ server_ipaddress token is set to be char *. But it should be
+ in6_addr See audit bug 6864075.
+ -->
+ <event id="AUE_ilb_enable_server" header="0" idNo="128" omit="JNI">
+ <title>Integrated Loadbalancer</title>
+ <program>/usr/sbin/ilbadm</program>
+ <see>ilbadm(1m)</see>
+ <entry id="subject">
+ <internal token="subject"/>
+ <external opt="none"/>
+ </entry>
+ <entry id="auth_used">
+ <internal token="uauth"/>
+ <external opt="required" type="char *"/>
+ <comment>authorization used</comment>
+ </entry>
+ <entry id="server_id">
+ <internal token="text"/>
+ <external opt="required" type="char *"/>
+ <comment>serverid</comment>
+ </entry>
+ <entry id="server_ipaddress">
+ <internal token="text"/>
+ <external opt="optional" type="char *"/>
+ <comment>IPaddr corresponding to the serverid - empty
+ if authorization fails, or user specified serverid
+ is nonexistent
+ </comment>
+ </entry>
+ <entry id="return">
+ <internal token="return"/>
+ <external opt="none"/>
+ </entry>
+ </event>
+
+ <!--
+ server_ipaddress token is set to be char *. But it should be
+ in6_addr See audit bug 6864075 .
+ -->
+ <event id="AUE_ilb_remove_server" header="0" idNo="129" omit="JNI">
+ <title>Integrated Loadbalancer</title>
+ <program>/usr/sbin/ilbadm</program>
+ <see>ilbadm(1m)</see>
+ <entry id="subject">
+ <internal token="subject"/>
+ <external opt="none"/>
+ </entry>
+ <entry id="auth_used">
+ <internal token="uauth"/>
+ <external opt="required" type="char *"/>
+ <comment>authorization used</comment>
+ </entry>
+ <entry id="server_id">
+ <internal token="text"/>
+ <external opt="required" type="char *"/>
+ <comment>serverid</comment>
+ </entry>
+ <entry id="server_group">
+ <internal token="text"/>
+ <external opt="required" type="char *"/>
+ <comment>server group name</comment>
+ </entry>
+ <entry id="server_ipaddress">
+ <internal token="text"/>
+ <external opt="optional" type="char *"/>
+ <comment>IPaddr corresponding to serverid - empty
+ if authorization fails or user specified serverid
+ serverid is nonexistent
+ </comment>
+ </entry>
+ <entry id="return">
+ <internal token="return"/>
+ <external opt="none"/>
+ </entry>
+ </event>
+
+ <event id="AUE_ilb_create_servergroup" header="0" idNo="130" omit="JNI">
+ <title>Integrated Loadbalancer</title>
+ <program>/usr/sbin/ilbadm</program>
+ <see>ilbadm(1m)</see>
+ <entry id="subject">
+ <internal token="subject"/>
+ <external opt="none"/>
+ </entry>
+ <entry id="auth_used">
+ <internal token="uauth"/>
+ <external opt="required" type="char *"/>
+ <comment>authorization used</comment>
+ </entry>
+ <entry id="server_group">
+ <internal token="text"/>
+ <external opt="required" type="char *"/>
+ <comment>server group name</comment>
+ </entry>
+ <entry id="return">
+ <internal token="return"/>
+ <external opt="none"/>
+ </entry>
+ </event>
+
+ <event id="AUE_ilb_delete_servergroup" header="0" idNo="131" omit="JNI">
+ <title>Integrated Loadbalancer</title>
+ <program>/usr/sbin/ilbadm</program>
+ <see>ilbadm(1m)</see>
+ <entry id="subject">
+ <internal token="subject"/>
+ <external opt="none"/>
+ </entry>
+ <entry id="auth_used">
+ <internal token="uauth"/>
+ <external opt="required" type="char *"/>
+ <comment>authorization used</comment>
+ </entry>
+ <entry id="server_group">
+ <internal token="text"/>
+ <external opt="required" type="char *"/>
+ <comment>server group name</comment>
+ </entry>
+ <entry id="return">
+ <internal token="return"/>
+ <external opt="none"/>
+ </entry>
+ </event>
+
<!-- add new events here with the next higher idNo -->
-<!-- Highest idNo is 119, so next is 120, then fix this comment -->
+<!-- Highest idNo is 131, so next is 132, then fix this comment -->
<!-- end of C Only events -->
<!--
diff --git a/usr/src/lib/libilb/Makefile b/usr/src/lib/libilb/Makefile
new file mode 100644
index 0000000000..af3afb9ebe
--- /dev/null
+++ b/usr/src/lib/libilb/Makefile
@@ -0,0 +1,61 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+include $(SRC)/lib/Makefile.lib
+
+HDRS = libilb.h
+HDRDIR = common
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+POFILE = libilb.po
+MSGFILES = common/ilb_subr.c
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber install lint: $(SUBDIRS)
+
+install_h: $(ROOTHDRS)
+
+check: $(CHECKHDRS)
+
+$(POFILE): $(MSGFILES)
+ $(BUILDPO.msgfiles)
+
+_msg: $(MSGDOMAINPOFILE)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include $(SRC)/Makefile.msg.targ
+include $(SRC)/lib/Makefile.targ
diff --git a/usr/src/lib/libilb/Makefile.com b/usr/src/lib/libilb/Makefile.com
new file mode 100644
index 0000000000..39f32889b0
--- /dev/null
+++ b/usr/src/lib/libilb/Makefile.com
@@ -0,0 +1,73 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+LIBRARY = libilb.a
+VERS = .1
+
+LIB_OBJS = ilb_sg.o ilb_comm.o ilb_subr.o ilb_rules.o
+LIB_OBJS += ilb_hc.o ilb_nat.o
+
+# We don't have a userland-accessible implementation of list handling
+# so we use the one in uts (filched off libzpool)
+LIST_OBJS = list.o
+OBJECTS = $(LIB_OBJS) $(LIST_OBJS)
+
+include ../../Makefile.lib
+
+LIB_SRCS= $(LIB_OBJS:%.o=$(SRCDIR)/%.c)
+LIBS = $(DYNLIB) $(LINTLIB)
+INCS += -I../common -I$(SRC)/uts/common
+LDLIBS += -lc
+
+SRCDIR = ../common
+$(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC)
+
+C99MODE = $(C99_ENABLE)
+
+# use for prod:
+CFLAGS += -mt $(CCVERBOSE)
+CPPFLAGS += $(INCS)
+LDLIBS += -lsocket
+
+# use for debug:
+CFLAGS += -g
+STRIP_STABS= :
+CTFCVTFLAGS += -g
+CTFMERGE_LIB = $(CTFMERGE) -g -t -f -L VERSION -o $@ $(PICS)
+DYNFLAGS += -g
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: $(LIB_SRCS)
+ $(LINT.c) $(LINTCHECKFLAGS) $(LIB_SRCS) $(LDLIBS)
+
+include ../../Makefile.targ
+
+# the below is needed to get list.o built
+pics/%.o: ../../../uts/common/os/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
diff --git a/usr/src/lib/libilb/amd64/Makefile b/usr/src/lib/libilb/amd64/Makefile
new file mode 100644
index 0000000000..5924246775
--- /dev/null
+++ b/usr/src/lib/libilb/amd64/Makefile
@@ -0,0 +1,36 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#
+# lib/libilb/amd64/Makefile
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64)
diff --git a/usr/src/lib/libilb/common/ilb_comm.c b/usr/src/lib/libilb/common/ilb_comm.c
new file mode 100644
index 0000000000..ff8cdca3a1
--- /dev/null
+++ b/usr/src/lib/libilb/common/ilb_comm.c
@@ -0,0 +1,235 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <thread.h>
+#include <synch.h>
+#include <libilb_impl.h>
+#include <libilb.h>
+
+/* Assertion: the calling thread has a hold on the handle */
+static void
+i_ilb_socket_set_err(ilb_handle_t h, ilb_status_t err)
+{
+ ilb_handle_impl_t *hi = (ilb_handle_impl_t *)h;
+
+ if (h == ILB_INVALID_HANDLE)
+ return;
+ hi->h_valid = B_FALSE;
+ hi->h_error = err;
+}
+
+ilb_status_t
+ilb_open(ilb_handle_t *hp)
+{
+ ilb_handle_impl_t *hi = NULL;
+ int s = -1;
+ struct sockaddr_un sa = {AF_UNIX, SOCKET_PATH};
+ ilb_status_t rc = ILB_STATUS_OK;
+ int sobufsz;
+
+ if (hp == NULL)
+ return (ILB_STATUS_EINVAL);
+
+ hi = calloc(sizeof (*hi), 1);
+ if (hi == NULL)
+ return (ILB_STATUS_ENOMEM);
+
+ if (cond_init(&hi->h_cv, USYNC_THREAD, NULL) != 0) {
+ rc = ILB_STATUS_INTERNAL;
+ goto out;
+ }
+
+ if (mutex_init(&hi->h_lock, USYNC_THREAD | LOCK_ERRORCHECK, NULL)
+ != 0) {
+ rc = ILB_STATUS_INTERNAL;
+ goto out;
+ }
+
+ hi->h_busy = B_FALSE;
+
+ if ((s = socket(PF_UNIX, SOCK_SEQPACKET, 0)) == -1 ||
+ connect(s, (struct sockaddr *)&sa, sizeof (sa.sun_path))
+ == -1) {
+ rc = ILB_STATUS_SOCKET;
+ goto out;
+ }
+
+ /* The socket buffer must be at least the max size of a message */
+ sobufsz = ILBD_MSG_SIZE;
+ if (setsockopt(s, SOL_SOCKET, SO_SNDBUF, &sobufsz,
+ sizeof (sobufsz)) != 0) {
+ rc = ILB_STATUS_SOCKET;
+ (void) close(s);
+ goto out;
+ }
+ if (setsockopt(s, SOL_SOCKET, SO_RCVBUF, &sobufsz,
+ sizeof (sobufsz)) != 0) {
+ rc = ILB_STATUS_SOCKET;
+ (void) close(s);
+ goto out;
+ }
+
+ hi->h_socket = s;
+ hi->h_valid = B_TRUE;
+
+out:
+ if (rc != ILB_STATUS_OK && s != -1)
+ (void) close(s);
+
+ if (rc == ILB_STATUS_OK) {
+ *hp = (ilb_handle_t)hi;
+ } else {
+ free(hi);
+ *hp = ILB_INVALID_HANDLE;
+ }
+ return (rc);
+}
+
+ilb_status_t
+ilb_close(ilb_handle_t h)
+{
+ ilb_handle_impl_t *hi = (ilb_handle_impl_t *)h;
+
+ if (h == ILB_INVALID_HANDLE)
+ return (ILB_STATUS_EINVAL);
+
+ if (mutex_lock(&hi->h_lock) != 0)
+ return (ILB_STATUS_INTERNAL);
+
+ /* Somebody has done a close, no need to do anything. */
+ if (hi->h_closing) {
+ return (ILB_STATUS_OK);
+ } else {
+ hi->h_closing = B_TRUE;
+ hi->h_error = ILB_STATUS_HANDLE_CLOSING;
+ }
+
+ /* Wait until there is nobody waiting. */
+ while (hi->h_waiter > 0) {
+ if (cond_wait(&hi->h_cv, &hi->h_lock) != 0) {
+ (void) mutex_unlock(&hi->h_lock);
+ return (ILB_STATUS_INTERNAL);
+ }
+ }
+ /* No one is waiting, proceed to free the handle. */
+
+ (void) close(hi->h_socket);
+ (void) mutex_destroy(&hi->h_lock);
+ (void) cond_destroy(&hi->h_cv);
+ free(hi);
+ return (ILB_STATUS_OK);
+}
+
+/*
+ * Unified routine to communicate with ilbd.
+ *
+ * If ic is non-NULL, it means that the caller wants to send something
+ * to ilbd and expects a reply. If ic is NULL, it means that the caller
+ * only expects to receive from ilbd.
+ *
+ * The rbuf is the buffer supplied by the caller for receiving. If it
+ * is NULL, it means that there is no reply expected.
+ *
+ * This function will not close() the socket to kernel unless there is
+ * an error. If the transaction only consists of one exchange, the caller
+ * can use i_ilb_close_comm() to close() the socket when done.
+ */
+ilb_status_t
+i_ilb_do_comm(ilb_handle_t h, ilb_comm_t *ic, size_t ic_sz, ilb_comm_t *rbuf,
+ size_t *rbufsz)
+{
+ ilb_status_t rc = ILB_STATUS_OK;
+ int r, s;
+ ilb_handle_impl_t *hi = (ilb_handle_impl_t *)h;
+
+ assert(rbuf != NULL);
+ if (h == ILB_INVALID_HANDLE)
+ return (ILB_STATUS_EINVAL);
+
+ if (mutex_lock(&hi->h_lock) != 0)
+ return (ILB_STATUS_INTERNAL);
+
+ hi->h_waiter++;
+ while (hi->h_busy) {
+ if (cond_wait(&hi->h_cv, &hi->h_lock) != 0) {
+ hi->h_waiter--;
+ (void) cond_signal(&hi->h_cv);
+ (void) mutex_unlock(&hi->h_lock);
+ return (ILB_STATUS_INTERNAL);
+ }
+ }
+
+ if (!hi->h_valid || hi->h_closing) {
+ hi->h_waiter--;
+ (void) cond_signal(&hi->h_cv);
+ (void) mutex_unlock(&hi->h_lock);
+ return (hi->h_error);
+ }
+
+ hi->h_busy = B_TRUE;
+ (void) mutex_unlock(&hi->h_lock);
+
+ s = hi->h_socket;
+
+ r = send(s, ic, ic_sz, 0);
+ if (r < ic_sz) {
+ rc = ILB_STATUS_WRITE;
+ goto socket_error;
+ }
+ rc = ILB_STATUS_OK;
+
+ if ((r = recv(s, rbuf, *rbufsz, 0)) <= 0) {
+ rc = ILB_STATUS_READ;
+ } else {
+ *rbufsz = r;
+ goto out;
+ }
+
+socket_error:
+ i_ilb_socket_set_err(h, rc);
+
+out:
+ (void) mutex_lock(&hi->h_lock);
+ hi->h_busy = B_FALSE;
+ hi->h_waiter--;
+ (void) cond_signal(&hi->h_cv);
+ (void) mutex_unlock(&hi->h_lock);
+
+ return (rc);
+}
+
+void
+i_ilb_close_comm(ilb_handle_t h)
+{
+ (void) ilb_close(h);
+}
diff --git a/usr/src/lib/libilb/common/ilb_hc.c b/usr/src/lib/libilb/common/ilb_hc.c
new file mode 100644
index 0000000000..dc813320f2
--- /dev/null
+++ b/usr/src/lib/libilb/common/ilb_hc.c
@@ -0,0 +1,276 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <stdlib.h>
+#include <strings.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <stddef.h>
+#include <libilb_impl.h>
+#include <libilb.h>
+
+/*
+ * Create a health check, returning a health check handle upon success.
+ * Health check created will be recorded in persistent datastore.
+ */
+ilb_status_t
+ilb_create_hc(ilb_handle_t h, const ilb_hc_info_t *hc)
+{
+ ilb_status_t rc;
+ ilb_comm_t *ic;
+ size_t ic_sz;
+
+ if (h == ILB_INVALID_HANDLE || hc == NULL || *hc->hci_name == '\0' ||
+ hc->hci_timeout < 0 || hc->hci_count < 0 ||
+ hc->hci_interval <= hc->hci_timeout * hc->hci_count)
+ return (ILB_STATUS_EINVAL);
+
+ if ((ic = i_ilb_alloc_req(ILBD_CREATE_HC, &ic_sz)) == NULL)
+ return (ILB_STATUS_ENOMEM);
+
+ (void) memcpy(&ic->ic_data, hc, sizeof (ilb_hc_info_t));
+
+ rc = i_ilb_do_comm(h, ic, ic_sz, ic, &ic_sz);
+ if (rc != ILB_STATUS_OK)
+ goto out;
+
+ if (ic->ic_cmd != ILBD_CMD_OK)
+ rc = *(ilb_status_t *)&ic->ic_data;
+
+out:
+ free(ic);
+ return (rc);
+}
+
+/*
+ * Given a health check handle, destroy the corresponding health check.
+ * Persistent datastore will be updated as well.
+ */
+ilb_status_t
+ilb_destroy_hc(ilb_handle_t h, const char *hcname)
+{
+ ilb_status_t rc;
+ ilb_comm_t *ic;
+ size_t ic_sz;
+
+ if (h == ILB_INVALID_HANDLE || hcname == NULL || *hcname == '\0')
+ return (ILB_STATUS_EINVAL);
+
+ if ((ic = i_ilb_alloc_req(ILBD_DESTROY_HC, &ic_sz)) == NULL)
+ return (ILB_STATUS_ENOMEM);
+
+ (void) strlcpy((char *)&ic->ic_data, hcname, sizeof (ilbd_name_t));
+
+ rc = i_ilb_do_comm(h, ic, ic_sz, ic, &ic_sz);
+ if (rc != ILB_STATUS_OK)
+ goto out;
+
+ if (ic->ic_cmd != ILBD_CMD_OK)
+ rc = *(ilb_status_t *)&ic->ic_data;
+
+out:
+ free(ic);
+ return (rc);
+}
+
+/*
+ * Given a health check name, get hc info associated with this handle
+ */
+ilb_status_t
+ilb_get_hc_info(ilb_handle_t h, const char *name, ilb_hc_info_t *hcp)
+{
+ ilb_status_t rc;
+ ilb_comm_t *ic, *rbuf;
+ size_t ic_sz, rbufsz;
+
+ if (h == ILB_INVALID_HANDLE || name == NULL || hcp == NULL)
+ return (ILB_STATUS_EINVAL);
+
+ if ((ic = i_ilb_alloc_req(ILBD_GET_HC_INFO, &ic_sz)) == NULL)
+ return (ILB_STATUS_ENOMEM);
+ rbufsz = sizeof (ilb_comm_t) + sizeof (ilb_hc_info_t);
+ if ((rbuf = malloc(rbufsz)) == NULL) {
+ free(ic);
+ return (ILB_STATUS_ENOMEM);
+ }
+
+ (void) strlcpy((char *)&ic->ic_data, name, sizeof (ilbd_name_t));
+
+ rc = i_ilb_do_comm(h, ic, ic_sz, rbuf, &rbufsz);
+ if (rc != ILB_STATUS_OK)
+ goto out;
+
+ if (rbuf->ic_cmd != ILBD_CMD_OK) {
+ rc = *(ilb_status_t *)&rbuf->ic_data;
+ goto out;
+ }
+ (void) memcpy(hcp, &rbuf->ic_data, sizeof (*hcp));
+
+out:
+ free(ic);
+ free(rbuf);
+ return (rc);
+}
+
+/*
+ * Walk through all health checks, will need if we implement list-hc
+ */
+ilb_status_t
+ilb_walk_hc(ilb_handle_t h, hc_walkerfunc_t func, void *arg)
+{
+ ilb_status_t rc;
+ ilb_hc_info_t hc_info;
+ ilbd_namelist_t *hc_names;
+ ilb_comm_t ic, *rbuf;
+ size_t rbufsz;
+ int i;
+
+ rbufsz = ILBD_MSG_SIZE;
+ if ((rbuf = malloc(rbufsz)) == NULL)
+ return (ILB_STATUS_ENOMEM);
+ ic.ic_cmd = ILBD_RETRIEVE_HC_NAMES;
+
+ rc = i_ilb_do_comm(h, &ic, sizeof (ic), rbuf, &rbufsz);
+ if (rc != ILB_STATUS_OK)
+ goto out;
+ if (rbuf->ic_cmd != ILBD_CMD_OK) {
+ rc = *(ilb_status_t *)&rbuf->ic_data;
+ goto out;
+ }
+
+ hc_names = (ilbd_namelist_t *)&rbuf->ic_data;
+ for (i = 0; i < hc_names->ilbl_count; i++) {
+ rc = ilb_get_hc_info(h, hc_names->ilbl_name[i], &hc_info);
+ /*
+ * Since getting the list of hc names and getting the info
+ * of each of them are not atomic, some hc objects may have
+ * been deleted. If this is the case, just skip them.
+ */
+ if (rc == ILB_STATUS_ENOENT) {
+ rc = ILB_STATUS_OK;
+ continue;
+ } else if (rc != ILB_STATUS_OK) {
+ break;
+ }
+ rc = func(h, &hc_info, arg);
+ }
+
+out:
+ free(rbuf);
+ return (rc);
+}
+
+static ilb_status_t
+ilb_get_hc_srvs(ilb_handle_t h, const char *rulename, ilb_comm_t **rbuf,
+ size_t *rbufsz)
+{
+ ilb_status_t rc;
+ ilb_comm_t *ic, *tmp_rbuf;
+ size_t ic_sz;
+
+ if ((ic = i_ilb_alloc_req(ILBD_GET_HC_SRVS, &ic_sz)) == NULL)
+ return (ILB_STATUS_ENOMEM);
+ *rbufsz = ILBD_MSG_SIZE;
+ if ((tmp_rbuf = malloc(*rbufsz)) == NULL) {
+ free(ic);
+ return (ILB_STATUS_ENOMEM);
+ }
+
+ (void) strlcpy((char *)&ic->ic_data, rulename,
+ sizeof (ilbd_name_t));
+
+ rc = i_ilb_do_comm(h, ic, ic_sz, tmp_rbuf, rbufsz);
+ if (rc != ILB_STATUS_OK)
+ goto out;
+
+ if (tmp_rbuf->ic_cmd == ILBD_CMD_OK) {
+ *rbuf = tmp_rbuf;
+ return (rc);
+ }
+ rc = *(ilb_status_t *)&tmp_rbuf->ic_data;
+out:
+ free(ic);
+ free(tmp_rbuf);
+ *rbuf = NULL;
+ return (rc);
+}
+
+ilb_status_t
+ilb_walk_hc_srvs(ilb_handle_t h, hc_srvwalkerfunc_t fn, const char *rulename,
+ void *arg)
+{
+ ilb_status_t rc;
+ ilb_hc_rule_srv_t *srvs;
+ int i, j;
+ ilb_comm_t *rbuf;
+ size_t rbufsz;
+
+ if (rulename != NULL) {
+ rc = ilb_get_hc_srvs(h, rulename, &rbuf, &rbufsz);
+ if (rc != ILB_STATUS_OK)
+ return (rc);
+ srvs = (ilb_hc_rule_srv_t *)&rbuf->ic_data;
+ for (i = 0; i < srvs->rs_num_srvs; i++) {
+ rc = fn(h, &srvs->rs_srvs[i], arg);
+ if (rc != ILB_STATUS_OK)
+ break;
+ }
+ free(rbuf);
+ } else {
+ ilbd_namelist_t *names;
+ ilb_comm_t *srv_rbuf;
+ size_t srv_rbufsz;
+
+ rc = i_ilb_retrieve_rule_names(h, &rbuf, &rbufsz);
+ if (rc != ILB_STATUS_OK)
+ return (rc);
+ names = (ilbd_namelist_t *)&rbuf->ic_data;
+
+ for (i = 0; i < names->ilbl_count; i++) {
+ rc = ilb_get_hc_srvs(h, names->ilbl_name[i],
+ &srv_rbuf, &srv_rbufsz);
+
+ /* Not all rules have HC, so reset the error to OK. */
+ if (rc == ILB_STATUS_RULE_NO_HC) {
+ rc = ILB_STATUS_OK;
+ continue;
+ } else if (rc != ILB_STATUS_OK) {
+ break;
+ }
+
+ srvs = (ilb_hc_rule_srv_t *)&srv_rbuf->ic_data;
+ for (j = 0; j < srvs->rs_num_srvs; j++) {
+ rc = fn(h, &srvs->rs_srvs[j], arg);
+ if (rc != ILB_STATUS_OK)
+ break;
+ }
+ free(srv_rbuf);
+ }
+ free(rbuf);
+ }
+ return (rc);
+}
diff --git a/usr/src/lib/libilb/common/ilb_nat.c b/usr/src/lib/libilb/common/ilb_nat.c
new file mode 100644
index 0000000000..192d7b6d59
--- /dev/null
+++ b/usr/src/lib/libilb/common/ilb_nat.c
@@ -0,0 +1,180 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include "libilb.h"
+#include "libilb_impl.h"
+
+enum which_tbl {
+ show_nat = 1,
+ show_persist
+};
+
+/* The common function to show kernel info. */
+static ilb_status_t ilb_show_info(ilb_handle_t, char *, size_t *, boolean_t *,
+ enum which_tbl);
+
+/*
+ * To get the ILB NAT table.
+ *
+ * buf: The buffer to return the NAT table entries.
+ * num: The caller sets it to the number of ilb_nat_info_t entries buf can
+ * hold. On return, it contains the actual number of entries put in buf.
+ * end: The caller sets it to B_TRUE if it only wants at most num entries to
+ * be returned. The transaction to ilbd will be termianted when this
+ * call returns.
+ * The caller sets it to B_FALSE if it intends to get the whole table.
+ * If the whole table has more than num entries, the caller can call
+ * this function again to retrieve the rest of the table.
+ * On return, end is set to B_TRUE if end of table is reached; B_FALSE
+ * if there are still remaining entries.
+ */
+ilb_status_t
+ilb_show_nat(ilb_handle_t h, ilb_nat_info_t buf[], size_t *num,
+ boolean_t *end)
+{
+ return (ilb_show_info(h, (char *)buf, num, end, show_nat));
+}
+
+/*
+ * To get the ILB persistent entry table.
+ *
+ * buf: The buffer to return the persistent table entries.
+ * num: The caller sets it to the number of ilb_persist_info_t entries buf can
+ * hold. On return, it contains the actual number of entries put in buf.
+ * end: The caller sets it to B_TRUE if it only wants at most num entries to
+ * be returned. The transaction to ilbd will be termianted when this
+ * call returns.
+ * The caller sets it to B_FALSE if it intends to get the whole table.
+ * If the whole table has more than num entries, the caller can call
+ * this function again to retrieve the rest of the table.
+ * On return, end is set to B_TRUE if end of table is reached; B_FALSE
+ * if there are still remaining entries.
+ */
+ilb_status_t
+ilb_show_persist(ilb_handle_t h, ilb_persist_info_t buf[], size_t *num,
+ boolean_t *end)
+{
+ return (ilb_show_info(h, (char *)buf, num, end, show_persist));
+}
+
+/*
+ * The function doing the work... The tbl parameter determines whith table
+ * to show.
+ */
+static ilb_status_t
+ilb_show_info(ilb_handle_t h, char *buf, size_t *num, boolean_t *end,
+ enum which_tbl tbl)
+{
+ ilb_comm_t *req, *rbuf;
+ ilb_show_info_t *req_si, *tmp_si;
+ size_t reqsz, rbufsz, tmp_rbufsz, cur_num;
+ size_t entry_sz;
+ ilb_status_t rc;
+
+ if (*num == 0)
+ return (ILB_STATUS_EINVAL);
+
+ reqsz = sizeof (ilb_comm_t) + sizeof (ilb_show_info_t);
+ if ((req = malloc(reqsz)) == NULL)
+ return (ILB_STATUS_ENOMEM);
+ req_si = (ilb_show_info_t *)&req->ic_data;
+
+ /*
+ * Need to allocate a receive buffer and then copy the buffer
+ * content to the passed in buf. The reason is that the
+ * communication to ilbd is message based and the protocol
+ * includes a header in the reply. We need to remove this header
+ * from the message, hence the copying...
+ */
+ if (tbl == show_nat)
+ entry_sz = sizeof (ilb_nat_info_t);
+ else
+ entry_sz = sizeof (ilb_persist_info_t);
+ rbufsz = *num * entry_sz + sizeof (ilb_comm_t) +
+ sizeof (ilb_show_info_t);
+ if ((rbuf = malloc(rbufsz)) == NULL) {
+ free(req);
+ return (ILB_STATUS_ENOMEM);
+ }
+
+ if (tbl == show_nat)
+ req->ic_cmd = ILBD_SHOW_NAT;
+ else
+ req->ic_cmd = ILBD_SHOW_PERSIST;
+ req->ic_flags = 0;
+ req_si->sn_num = *num;
+ cur_num = 0;
+
+ do {
+ tmp_rbufsz = rbufsz;
+ rc = i_ilb_do_comm(h, req, reqsz, rbuf, &tmp_rbufsz);
+ if (rc != ILB_STATUS_OK)
+ goto out;
+ if (rbuf->ic_cmd != ILBD_CMD_OK) {
+ rc = *(ilb_status_t *)&rbuf->ic_data;
+ goto out;
+ }
+
+ tmp_si = (ilb_show_info_t *)&rbuf->ic_data;
+
+ cur_num += tmp_si->sn_num;
+ bcopy(&tmp_si->sn_data, buf, tmp_si->sn_num * entry_sz);
+ buf += tmp_si->sn_num * entry_sz;
+
+ /*
+ * Buffer is filled, regardless of this is the end of table or
+ * not, we need to stop.
+ */
+ if (cur_num == *num)
+ break;
+ /* Try to fill in the rest. */
+ req_si->sn_num = *num - cur_num;
+ } while (!(rbuf->ic_flags & ILB_COMM_END));
+
+ *num = cur_num;
+
+ /* End of transaction, let the caller know. */
+ if (rbuf->ic_flags & ILB_COMM_END) {
+ *end = B_TRUE;
+ } else {
+ /* The user wants to terminate the transaction */
+ if (*end) {
+ req->ic_flags = ILB_COMM_END;
+ tmp_rbufsz = rbufsz;
+ rc = i_ilb_do_comm(h, req, reqsz, rbuf, &tmp_rbufsz);
+ }
+ }
+out:
+ free(req);
+ free(rbuf);
+ return (rc);
+}
diff --git a/usr/src/lib/libilb/common/ilb_rules.c b/usr/src/lib/libilb/common/ilb_rules.c
new file mode 100644
index 0000000000..df01041e4c
--- /dev/null
+++ b/usr/src/lib/libilb/common/ilb_rules.c
@@ -0,0 +1,326 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#include <stdlib.h>
+#include <strings.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/stropts.h>
+#include <netinet/in.h>
+#include <stddef.h>
+#include "libilb.h"
+#include "libilb_impl.h"
+
+/* ARGSUSED */
+static ilb_status_t
+i_drop_hc(ilb_handle_t h, ilb_hc_info_t *hc, void *arg)
+{
+ return (ilb_destroy_hc(h, hc->hci_name));
+}
+
+/* ARGSUSED */
+static ilb_status_t
+i_drop_rule(ilb_handle_t h, ilb_rule_data_t *rd, void *arg)
+{
+ return (ilb_destroy_rule(h, rd->r_name));
+}
+
+/* ARGSUSED */
+static ilb_status_t
+i_drop_sg_srvs(ilb_handle_t h, ilb_server_data_t *srv, const char *sgname,
+ void *arg)
+{
+ return (ilb_rem_server_from_group(h, sgname, srv));
+}
+
+/* ARGSUSED */
+static ilb_status_t
+i_drop_sg(ilb_handle_t h, ilb_sg_data_t *sg, void *arg)
+{
+ ilb_status_t rc;
+
+ rc = ilb_walk_servers(h, i_drop_sg_srvs, sg->sgd_name, (void *)sg);
+ if (rc != ILB_STATUS_OK)
+ return (rc);
+
+ return (ilb_destroy_servergroup(h, sg->sgd_name));
+}
+
+ilb_status_t
+ilb_reset_config(ilb_handle_t h)
+{
+ ilb_status_t rc;
+
+ if (h == NULL)
+ return (ILB_STATUS_EINVAL);
+
+ rc = ilb_walk_rules(h, i_drop_rule, NULL, NULL);
+ if (rc != ILB_STATUS_OK)
+ goto out;
+
+ rc = ilb_walk_servergroups(h, i_drop_sg, NULL, NULL);
+ if (rc != ILB_STATUS_OK)
+ goto out;
+
+ rc = ilb_walk_hc(h, i_drop_hc, NULL);
+out:
+ return (rc);
+}
+
+ilb_status_t
+ilb_create_rule(ilb_handle_t h, const ilb_rule_data_t *rd)
+{
+ ilb_status_t rc;
+ ilb_comm_t *ic;
+ size_t ic_sz;
+ ilb_rule_info_t *rl;
+
+ if (h == ILB_INVALID_HANDLE || rd == NULL || *rd->r_name == '\0')
+ return (ILB_STATUS_EINVAL);
+
+ if ((ic = i_ilb_alloc_req(ILBD_CREATE_RULE, &ic_sz)) == NULL)
+ return (ILB_STATUS_ENOMEM);
+ rl = (ilb_rule_info_t *)&ic->ic_data;
+
+ /*
+ * Since the IP address representation in ilb_rule_data_t and
+ * ilb_rule_info_t is different, we need to convert between
+ * them.
+ */
+ (void) strlcpy(rl->rl_name, rd->r_name, sizeof (rl->rl_name));
+ (void) strlcpy(rl->rl_sgname, rd->r_sgname, sizeof (rl->rl_sgname));
+ (void) strlcpy(rl->rl_hcname, rd->r_hcname, sizeof (rl->rl_hcname));
+ rl->rl_flags = rd->r_flags;
+ rl->rl_proto = rd->r_proto;
+ rl->rl_ipversion = rd->r_vip.ia_af;
+ rl->rl_minport = rd->r_minport;
+ if (ntohs(rd->r_maxport) < ntohs(rd->r_minport))
+ rl->rl_maxport = rd->r_minport;
+ else
+ rl->rl_maxport = rd->r_maxport;
+ rl->rl_algo = rd->r_algo;
+ rl->rl_topo = rd->r_topo;
+ rl->rl_conndrain = rd->r_conndrain;
+ rl->rl_nat_timeout = rd->r_nat_timeout;
+ rl->rl_sticky_timeout = rd->r_sticky_timeout;
+ rl->rl_hcport = rd->r_hcport;
+ rl->rl_hcpflag = rd->r_hcpflag;
+
+ IP_COPY_CLI_2_IMPL(&rd->r_vip, &rl->rl_vip);
+ IP_COPY_CLI_2_IMPL(&rd->r_stickymask, &rl->rl_stickymask);
+ IP_COPY_CLI_2_IMPL(&rd->r_nat_src_start, &rl->rl_nat_src_start);
+ IP_COPY_CLI_2_IMPL(&rd->r_nat_src_end, &rl->rl_nat_src_end);
+
+ rc = i_ilb_do_comm(h, ic, ic_sz, ic, &ic_sz);
+ if (rc != ILB_STATUS_OK)
+ goto out;
+
+ if (ic->ic_cmd != ILBD_CMD_OK)
+ rc = *(ilb_status_t *)&ic->ic_data;
+
+out:
+ free(ic);
+ return (rc);
+}
+
+static ilb_status_t
+i_ilb_rule_action(ilb_handle_t h, const char *name, ilbd_cmd_t cmd)
+{
+ ilb_status_t rc;
+ ilb_comm_t *ic;
+ size_t ic_sz;
+
+ if (h == ILB_INVALID_HANDLE)
+ return (ILB_STATUS_EINVAL);
+
+ if ((ic = i_ilb_alloc_req(cmd, &ic_sz)) == NULL)
+ return (ILB_STATUS_ENOMEM);
+
+ if (name == NULL) {
+ bzero(&ic->ic_data, sizeof (ilbd_name_t));
+ } else {
+ (void) strlcpy((char *)&ic->ic_data, name,
+ sizeof (ilbd_name_t));
+ }
+
+ rc = i_ilb_do_comm(h, ic, ic_sz, ic, &ic_sz);
+ if (rc != ILB_STATUS_OK)
+ goto out;
+
+ if (ic->ic_cmd != ILBD_CMD_OK)
+ rc = *(ilb_status_t *)&ic->ic_data;
+
+out:
+ free(ic);
+ return (rc);
+}
+
+ilb_status_t
+ilb_destroy_rule(ilb_handle_t h, const char *name)
+{
+ return (i_ilb_rule_action(h, name, ILBD_DESTROY_RULE));
+}
+
+ilb_status_t
+ilb_enable_rule(ilb_handle_t h, const char *name)
+{
+ return (i_ilb_rule_action(h, name, ILBD_ENABLE_RULE));
+}
+
+ilb_status_t
+ilb_disable_rule(ilb_handle_t h, const char *name)
+{
+ return (i_ilb_rule_action(h, name, ILBD_DISABLE_RULE));
+}
+
+ilb_status_t
+i_ilb_retrieve_rule_names(ilb_handle_t h, ilb_comm_t **rbuf, size_t *rbufsz)
+{
+ ilb_status_t rc;
+ ilb_comm_t ic, *tmp_rbuf;
+
+ *rbufsz = ILBD_MSG_SIZE;
+ if ((tmp_rbuf = malloc(*rbufsz)) == NULL)
+ return (ILB_STATUS_ENOMEM);
+
+ ic.ic_cmd = ILBD_RETRIEVE_RULE_NAMES;
+
+ rc = i_ilb_do_comm(h, &ic, sizeof (ic), tmp_rbuf, rbufsz);
+ if (rc != ILB_STATUS_OK)
+ goto out;
+
+ if (tmp_rbuf->ic_cmd == ILBD_CMD_OK) {
+ *rbuf = tmp_rbuf;
+ return (rc);
+ }
+ rc = *(ilb_status_t *)&tmp_rbuf->ic_data;
+out:
+ free(tmp_rbuf);
+ *rbuf = NULL;
+ return (rc);
+}
+
+static ilb_status_t
+i_ilb_walk_one_rule(ilb_handle_t h, rule_walkerfunc_t f, const char *name,
+ void *arg)
+{
+ ilb_status_t rc = ILB_STATUS_OK;
+ ilb_rule_info_t *rl = NULL;
+ ilb_rule_data_t rd;
+ ilb_comm_t *ic, *rbuf;
+ size_t ic_sz, rbufsz;
+
+
+ if ((ic = i_ilb_alloc_req(ILBD_RETRIEVE_RULE, &ic_sz)) == NULL)
+ return (ILB_STATUS_ENOMEM);
+ rbufsz = sizeof (ilb_comm_t) + sizeof (ilb_rule_info_t);
+ if ((rbuf = malloc(rbufsz)) == NULL) {
+ free(ic);
+ return (ILB_STATUS_ENOMEM);
+ }
+
+ (void) strlcpy((char *)&ic->ic_data, name, sizeof (ilbd_name_t));
+ rc = i_ilb_do_comm(h, ic, ic_sz, rbuf, &rbufsz);
+ if (rc != ILB_STATUS_OK)
+ goto out;
+ if (rbuf->ic_cmd != ILBD_CMD_OK) {
+ rc = *(ilb_status_t *)&rbuf->ic_data;
+ goto out;
+ }
+ rl = (ilb_rule_info_t *)&rbuf->ic_data;
+
+ /*
+ * Since the IP address representation in ilb_rule_data_t and
+ * ilb_rule_info_t is different, we need to convert between
+ * them.
+ */
+ (void) strlcpy(rd.r_name, rl->rl_name, sizeof (rd.r_name));
+ (void) strlcpy(rd.r_hcname, rl->rl_hcname, sizeof (rd.r_hcname));
+ (void) strlcpy(rd.r_sgname, rl->rl_sgname, sizeof (rd.r_sgname));
+ rd.r_flags = rl->rl_flags;
+ rd.r_proto = rl->rl_proto;
+ rd.r_minport = rl->rl_minport;
+ rd.r_maxport = rl->rl_maxport;
+ rd.r_algo = rl->rl_algo;
+ rd.r_topo = rl->rl_topo;
+ rd.r_conndrain = rl->rl_conndrain;
+ rd.r_nat_timeout = rl->rl_nat_timeout;
+ rd.r_sticky_timeout = rl->rl_sticky_timeout;
+ rd.r_hcport = rl->rl_hcport;
+ rd.r_hcpflag = rl->rl_hcpflag;
+
+ IP_COPY_IMPL_2_CLI(&rl->rl_vip, &rd.r_vip);
+ IP_COPY_IMPL_2_CLI(&rl->rl_nat_src_start, &rd.r_nat_src_start);
+ IP_COPY_IMPL_2_CLI(&rl->rl_nat_src_end, &rd.r_nat_src_end);
+ IP_COPY_IMPL_2_CLI(&rl->rl_stickymask, &rd.r_stickymask);
+
+ rc = f(h, &rd, arg);
+
+out:
+ free(ic);
+ free(rbuf);
+ return (rc);
+}
+
+ilb_status_t
+ilb_walk_rules(ilb_handle_t h, rule_walkerfunc_t f, const char *name,
+ void *arg)
+{
+ ilb_status_t rc;
+ ilbd_namelist_t *names;
+ ilb_comm_t *rbuf;
+ size_t rbufsz;
+ int i;
+
+ if (h == NULL)
+ return (ILB_STATUS_EINVAL);
+
+ if (name != NULL)
+ return (i_ilb_walk_one_rule(h, f, name, arg));
+
+ rc = i_ilb_retrieve_rule_names(h, &rbuf, &rbufsz);
+ if (rc != ILB_STATUS_OK)
+ return (rc);
+
+ names = (ilbd_namelist_t *)&rbuf->ic_data;
+ for (i = 0; i < names->ilbl_count; i++) {
+ rc = i_ilb_walk_one_rule(h, f, names->ilbl_name[i], arg);
+ /*
+ * The rule may have been removed by another process since
+ * we retrieve all the rule names, just continue.
+ */
+ if (rc == ILB_STATUS_ENOENT) {
+ rc = ILB_STATUS_OK;
+ continue;
+ }
+ if (rc != ILB_STATUS_OK)
+ break;
+ }
+
+ free(rbuf);
+ return (rc);
+}
diff --git a/usr/src/lib/libilb/common/ilb_sg.c b/usr/src/lib/libilb/common/ilb_sg.c
new file mode 100644
index 0000000000..62990f0f09
--- /dev/null
+++ b/usr/src/lib/libilb/common/ilb_sg.c
@@ -0,0 +1,464 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <stdlib.h>
+#include <strings.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <stddef.h>
+#include <libilb_impl.h>
+#include <libilb.h>
+
+static ilb_status_t
+i_ilb_addrem_sg(ilb_handle_t h, const char *sgname, ilbd_cmd_t cmd)
+{
+ ilb_status_t rc;
+ ilb_comm_t *ic;
+ size_t ic_sz;
+
+ if (h == ILB_INVALID_HANDLE || sgname == NULL || *sgname == '\0')
+ return (ILB_STATUS_EINVAL);
+
+ if (strlen(sgname) > ILB_SGNAME_SZ - 1)
+ return (ILB_STATUS_NAMETOOLONG);
+
+ if ((ic = i_ilb_alloc_req(cmd, &ic_sz)) == NULL)
+ return (ILB_STATUS_ENOMEM);
+
+ (void) strlcpy((char *)&ic->ic_data, sgname, sizeof (ilbd_name_t));
+
+ rc = i_ilb_do_comm(h, ic, ic_sz, ic, &ic_sz);
+ if (rc != ILB_STATUS_OK)
+ goto out;
+
+ if (ic->ic_cmd != ILBD_CMD_OK)
+ rc = *(ilb_status_t *)&ic->ic_data;
+out:
+ free(ic);
+ return (rc);
+}
+
+ilb_status_t
+ilb_destroy_servergroup(ilb_handle_t h, const char *sgname)
+{
+ return (i_ilb_addrem_sg(h, sgname, ILBD_DESTROY_SERVERGROUP));
+}
+
+ilb_status_t
+ilb_create_servergroup(ilb_handle_t h, const char *sgname)
+{
+ return (i_ilb_addrem_sg(h, sgname, ILBD_CREATE_SERVERGROUP));
+}
+
+static ilb_status_t
+i_ilb_addrem_server_to_group(ilb_handle_t h, const char *sgname,
+ ilb_server_data_t *srv, ilbd_cmd_t cmd)
+{
+ ilb_status_t rc = ILB_STATUS_OK;
+ ilb_sg_info_t *sg;
+ ilb_sg_srv_t *sgs;
+ in_port_t h_maxport, h_minport;
+ ilb_comm_t *ic;
+ size_t ic_sz;
+
+ if (h == ILB_INVALID_HANDLE || sgname == NULL ||
+ *sgname == '\0' || srv == NULL)
+ return (ILB_STATUS_EINVAL);
+
+ if (strlen(sgname) > ILB_SGNAME_SZ - 1)
+ return (ILB_STATUS_NAMETOOLONG);
+
+ /* now all the checks have passed, we can pass on the goods */
+ if ((ic = i_ilb_alloc_req(cmd, &ic_sz)) == NULL)
+ return (ILB_STATUS_ENOMEM);
+
+ sg = (ilb_sg_info_t *)&ic->ic_data;
+ sg->sg_srvcount = 1;
+ (void) strlcpy(sg->sg_name, sgname, sizeof (sg->sg_name));
+
+ sgs = &sg->sg_servers[0];
+
+ IP_COPY_CLI_2_IMPL(&srv->sd_addr, &sgs->sgs_addr);
+ h_minport = ntohs(srv->sd_minport);
+ h_maxport = ntohs(srv->sd_maxport);
+ sgs->sgs_minport = srv->sd_minport;
+ if (h_minport != 0 && h_maxport < h_minport)
+ sgs->sgs_maxport = srv->sd_minport;
+ else
+ sgs->sgs_maxport = srv->sd_maxport;
+
+ sgs->sgs_flags = srv->sd_flags;
+ if (srv->sd_srvID[0] == ILB_SRVID_PREFIX)
+ (void) strlcpy(sgs->sgs_srvID, srv->sd_srvID,
+ sizeof (sgs->sgs_srvID));
+
+ rc = i_ilb_do_comm(h, ic, ic_sz, ic, &ic_sz);
+ if (rc != ILB_STATUS_OK)
+ goto out;
+
+ if (ic->ic_cmd != ILBD_CMD_OK)
+ rc = *(ilb_status_t *)&ic->ic_data;
+
+out:
+ free(ic);
+ return (rc);
+}
+
+ilb_status_t
+ilb_add_server_to_group(ilb_handle_t h, const char *sgname,
+ ilb_server_data_t *srv)
+{
+ return (i_ilb_addrem_server_to_group(h, sgname, srv,
+ ILBD_ADD_SERVER_TO_GROUP));
+}
+
+ilb_status_t
+ilb_rem_server_from_group(ilb_handle_t h, const char *sgname,
+ ilb_server_data_t *srv)
+{
+ return (i_ilb_addrem_server_to_group(h, sgname, srv,
+ ILBD_REM_SERVER_FROM_GROUP));
+}
+
+static ilb_status_t
+i_ilb_retrieve_sg_names(ilb_handle_t h, ilb_comm_t **rbuf, size_t *rbufsz)
+{
+ ilb_status_t rc;
+ ilb_comm_t ic, *tmp_rbuf;
+
+ *rbufsz = ILBD_MSG_SIZE;
+ if ((tmp_rbuf = malloc(*rbufsz)) == NULL)
+ return (ILB_STATUS_ENOMEM);
+
+ ic.ic_cmd = ILBD_RETRIEVE_SG_NAMES;
+ rc = i_ilb_do_comm(h, &ic, sizeof (ic), tmp_rbuf, rbufsz);
+ if (rc != ILB_STATUS_OK)
+ goto out;
+
+ if (tmp_rbuf->ic_cmd == ILBD_CMD_OK) {
+ *rbuf = tmp_rbuf;
+ return (rc);
+ }
+ rc = *(ilb_status_t *)&tmp_rbuf->ic_data;
+out:
+ free(tmp_rbuf);
+ *rbuf = NULL;
+ return (rc);
+}
+
+static ilb_status_t
+i_ilb_retrieve_sg_hosts(ilb_handle_t h, const char *sgname, ilb_comm_t **rbuf,
+ size_t *rbufsz)
+{
+ ilb_status_t rc;
+ ilb_comm_t *ic, *tmp_rbuf;
+ size_t ic_sz;
+
+ if ((ic = i_ilb_alloc_req(ILBD_RETRIEVE_SG_HOSTS, &ic_sz)) == NULL)
+ return (ILB_STATUS_ENOMEM);
+ *rbufsz = ILBD_MSG_SIZE;
+ if ((tmp_rbuf = malloc(*rbufsz)) == NULL) {
+ free(ic);
+ *rbuf = NULL;
+ return (ILB_STATUS_ENOMEM);
+ }
+
+ (void) strlcpy((char *)&ic->ic_data, sgname, sizeof (ilbd_name_t));
+ rc = i_ilb_do_comm(h, ic, ic_sz, tmp_rbuf, rbufsz);
+ if (rc != ILB_STATUS_OK)
+ goto out;
+
+ if (tmp_rbuf->ic_cmd == ILBD_CMD_OK) {
+ *rbuf = tmp_rbuf;
+ free(ic);
+ return (rc);
+ }
+ rc = *(ilb_status_t *)&tmp_rbuf->ic_data;
+out:
+ free(ic);
+ free(tmp_rbuf);
+ *rbuf = NULL;
+ return (rc);
+}
+
+typedef enum {
+ walk_servers,
+ walk_sg
+} sgwalk_t;
+
+/*
+ * "walks" one sg (retrieves data) and depending on "walktype" argument
+ * call servergroup function once per sg or server function once
+ * for every server. in both cases, the argument "f" is cast to
+ * be the proper function pointer type
+ */
+static ilb_status_t
+i_ilb_walk_one_sg(ilb_handle_t h, void *f, const char *sgname, void *arg,
+ sgwalk_t walktype)
+{
+ ilb_status_t rc = ILB_STATUS_OK;
+ ilb_sg_info_t *sg_info;
+ ilb_sg_srv_t *srv;
+ int i;
+ ilb_comm_t *rbuf;
+ size_t rbufsz;
+
+ rc = i_ilb_retrieve_sg_hosts(h, sgname, &rbuf, &rbufsz);
+ if (rc != ILB_STATUS_OK)
+ return (rc);
+ sg_info = (ilb_sg_info_t *)&rbuf->ic_data;
+
+ if (walktype == walk_sg) {
+ sg_walkerfunc_t sg_func = (sg_walkerfunc_t)f;
+ ilb_sg_data_t sgd;
+
+ (void) strlcpy(sgd.sgd_name, sg_info->sg_name,
+ sizeof (sgd.sgd_name));
+ sgd.sgd_srvcount = sg_info->sg_srvcount;
+ sgd.sgd_flags = sg_info->sg_flags;
+ rc = sg_func(h, &sgd, arg);
+ goto out;
+ }
+
+ for (i = 0; i < sg_info->sg_srvcount; i++) {
+ srv_walkerfunc_t srv_func = (srv_walkerfunc_t)f;
+ ilb_server_data_t sd;
+
+ srv = &sg_info->sg_servers[i];
+ IP_COPY_IMPL_2_CLI(&srv->sgs_addr, &sd.sd_addr);
+ sd.sd_minport = srv->sgs_minport;
+ sd.sd_maxport = srv->sgs_maxport;
+ sd.sd_flags = srv->sgs_flags;
+ (void) strlcpy(sd.sd_srvID, srv->sgs_srvID,
+ sizeof (sd.sd_srvID));
+
+ rc = srv_func(h, &sd, sg_info->sg_name, arg);
+ if (rc != ILB_STATUS_OK)
+ break;
+ }
+
+out:
+ free(rbuf);
+ return (rc);
+}
+
+/*
+ * wrapper function for i_walk_one_sg; if necessary, gets list of
+ * SG names and calles i_walk_one_sg with every name
+ */
+static ilb_status_t
+i_walk_sgs(ilb_handle_t h, void *f, const char *sgname,
+ void *arg, sgwalk_t walktype)
+{
+ ilb_status_t rc;
+ ilbd_namelist_t *sgl;
+ ilb_comm_t *rbuf;
+ size_t rbufsz;
+ int i;
+
+ if (sgname != NULL) {
+ rc = i_ilb_walk_one_sg(h, f, sgname, arg, walktype);
+ return (rc);
+ }
+
+ rc = i_ilb_retrieve_sg_names(h, &rbuf, &rbufsz);
+ if (rc != ILB_STATUS_OK)
+ return (rc);
+ sgl = (ilbd_namelist_t *)&rbuf->ic_data;
+
+ for (i = 0; i < sgl->ilbl_count; i++) {
+ rc = i_ilb_walk_one_sg(h, f, sgl->ilbl_name[i], arg, walktype);
+ /*
+ * The server group may have been removed by another
+ * process, just continue.
+ */
+ if (rc == ILB_STATUS_SGUNAVAIL) {
+ rc = ILB_STATUS_OK;
+ continue;
+ }
+ if (rc != ILB_STATUS_OK)
+ break;
+ }
+ free(rbuf);
+ return (rc);
+}
+
+ilb_status_t
+ilb_walk_servergroups(ilb_handle_t h, sg_walkerfunc_t f, const char *sgname,
+ void *arg)
+{
+ return (i_walk_sgs(h, (void *)f, sgname, arg, walk_sg));
+}
+
+ilb_status_t
+ilb_walk_servers(ilb_handle_t h, srv_walkerfunc_t f, const char *sgname,
+ void *arg)
+{
+ return (i_walk_sgs(h, (void *)f, sgname, arg, walk_servers));
+}
+
+static ilb_status_t
+ilb_Xable_server(ilb_handle_t h, ilb_server_data_t *srv, void *reserved,
+ ilbd_cmd_t cmd)
+{
+ ilb_status_t rc;
+ ilb_sg_info_t *sg_info;
+ ilb_sg_srv_t *sgs;
+ in_port_t h_maxport, h_minport;
+ ilb_comm_t *ic;
+ size_t ic_sz;
+
+ if (h == NULL)
+ return (ILB_STATUS_EINVAL);
+
+ /*
+ * In this implementation, this needs to be NULL, so
+ * there's no ugly surprises with old apps once we attach
+ * meaning to this parameter.
+ */
+ if (reserved != NULL)
+ return (ILB_STATUS_EINVAL);
+
+ /* now all the checks have passed, we can pass on the goods */
+ if ((ic = i_ilb_alloc_req(cmd, &ic_sz)) == NULL)
+ return (ILB_STATUS_ENOMEM);
+
+ sg_info = (ilb_sg_info_t *)&ic->ic_data;
+ sg_info->sg_srvcount = 1;
+
+ sgs = &sg_info->sg_servers[0];
+
+ /* make sure min_port <= max_port; comparison in host byte order! */
+ h_maxport = ntohs(srv->sd_maxport);
+ h_minport = ntohs(srv->sd_minport);
+ if (h_maxport != 0 && h_maxport < h_minport)
+ sgs->sgs_maxport = sgs->sgs_minport;
+ else
+ sgs->sgs_maxport = srv->sd_maxport;
+ sgs->sgs_minport = srv->sd_minport;
+
+ sgs->sgs_flags = srv->sd_flags;
+ (void) strlcpy(sgs->sgs_srvID, srv->sd_srvID, sizeof (sgs->sgs_srvID));
+ IP_COPY_CLI_2_IMPL(&srv->sd_addr, &sgs->sgs_addr);
+
+ rc = i_ilb_do_comm(h, ic, ic_sz, ic, &ic_sz);
+ if (rc != ILB_STATUS_OK)
+ goto out;
+
+ if (ic->ic_cmd != ILBD_CMD_OK)
+ rc = *(ilb_status_t *)&ic->ic_data;
+out:
+ free(ic);
+ return (rc);
+}
+
+ilb_status_t
+ilb_enable_server(ilb_handle_t h, ilb_server_data_t *srv, void *reserved)
+{
+ return (ilb_Xable_server(h, srv, reserved, ILBD_ENABLE_SERVER));
+}
+
+ilb_status_t
+ilb_disable_server(ilb_handle_t h, ilb_server_data_t *srv, void *reserved)
+{
+ return (ilb_Xable_server(h, srv, reserved, ILBD_DISABLE_SERVER));
+}
+
+static ilb_status_t
+i_ilb_fillin_srvdata(ilb_handle_t h, ilb_server_data_t *srv, const char *sgname,
+ ilbd_cmd_t cmd)
+{
+ ilb_status_t rc;
+ ilb_sg_info_t *sg_info;
+ ilb_sg_srv_t *sgs;
+ ilb_comm_t *ic;
+ size_t ic_sz;
+ ilb_comm_t *rbuf;
+ size_t rbufsz;
+
+ if (h == ILB_INVALID_HANDLE || sgname == NULL ||
+ *sgname == '\0' || srv == NULL)
+ return (ILB_STATUS_EINVAL);
+
+ if (cmd == ILBD_SRV_ID2ADDR && srv->sd_srvID[0] == '\0')
+ return (ILB_STATUS_EINVAL);
+ if (cmd == ILBD_SRV_ADDR2ID && !IS_AF_VALID(srv->sd_addr.ia_af))
+ return (ILB_STATUS_EINVAL);
+
+ if ((ic = i_ilb_alloc_req(cmd, &ic_sz)) == NULL)
+ return (ILB_STATUS_ENOMEM);
+ rbufsz = sizeof (ilb_comm_t) + sizeof (ilb_sg_srv_t);
+ if ((rbuf = malloc(rbufsz)) == NULL) {
+ free(ic);
+ return (ILB_STATUS_ENOMEM);
+ }
+
+ sg_info = (ilb_sg_info_t *)&ic->ic_data;
+ sg_info->sg_srvcount = 1;
+ (void) strlcpy(sg_info->sg_name, sgname, sizeof (sg_info->sg_name));
+
+ sgs = &sg_info->sg_servers[0];
+
+ if (cmd == ILBD_SRV_ID2ADDR)
+ (void) strlcpy(sgs->sgs_srvID, srv->sd_srvID,
+ sizeof (sgs->sgs_srvID));
+ else
+ IP_COPY_CLI_2_IMPL(&srv->sd_addr, &sgs->sgs_addr);
+
+ rc = i_ilb_do_comm(h, ic, ic_sz, rbuf, &rbufsz);
+ if (rc != ILB_STATUS_OK)
+ goto out;
+
+ if (rbuf->ic_cmd == ILBD_CMD_OK) {
+ sgs = (ilb_sg_srv_t *)&rbuf->ic_data;
+ if (cmd == ILBD_SRV_ID2ADDR) {
+ IP_COPY_IMPL_2_CLI(&sgs->sgs_addr, &srv->sd_addr);
+ } else {
+ (void) strlcpy(srv->sd_srvID, sgs->sgs_srvID,
+ sizeof (sgs->sgs_srvID));
+ }
+ return (rc);
+ }
+
+ rc = *(ilb_status_t *)&rbuf->ic_data;
+out:
+ free(ic);
+ return (rc);
+}
+
+ilb_status_t
+ilb_srvID_to_address(ilb_handle_t h, ilb_server_data_t *srv, const char *sgname)
+{
+ return (i_ilb_fillin_srvdata(h, srv, sgname, ILBD_SRV_ID2ADDR));
+
+}
+
+ilb_status_t
+ilb_address_to_srvID(ilb_handle_t h, ilb_server_data_t *srv, const char *sgname)
+{
+ return (i_ilb_fillin_srvdata(h, srv, sgname, ILBD_SRV_ADDR2ID));
+}
diff --git a/usr/src/lib/libilb/common/ilb_subr.c b/usr/src/lib/libilb/common/ilb_subr.c
new file mode 100644
index 0000000000..25f990d239
--- /dev/null
+++ b/usr/src/lib/libilb/common/ilb_subr.c
@@ -0,0 +1,424 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <stdlib.h>
+#include <strings.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <inttypes.h>
+#include <assert.h>
+#include <libilb.h>
+#include <libilb_impl.h>
+#include <locale.h>
+
+typedef enum {
+ internal,
+ external
+} ip_addr_type_t;
+
+static int
+sign64(int64_t n)
+{
+ if (n >= 0)
+ return (1);
+ return (-1);
+}
+
+static int
+sign32(int32_t n)
+{
+ if (n >= 0)
+ return (1);
+ return (-1);
+}
+
+/*
+ * since the difference between two uint64_ts can be greater than
+ * what a int64_t can hold, we need to cap the result at +/- INT64_MAX
+ * return: < 0: x < y, 0: x == y, > 0: x > y
+ */
+static int64_t
+signed_diff64(uint64_t x, uint64_t y)
+{
+ uint64_t ud;
+ int s = -1;
+
+ if (x == y)
+ return (0);
+
+ /* make sure we have x < y */
+ if (x > y) {
+ uint64_t t;
+
+ s = 1;
+ t = x; x = y; y = t;
+ }
+
+ ud = y - x;
+ if (ud > INT64_MAX)
+ return (INT64_MAX * s);
+
+ return ((int64_t)ud * s);
+}
+
+static uint64_t
+unsigned_diff64(uint64_t x, uint64_t y, int *sgn)
+{
+ int s = -1;
+
+ if (x == y)
+ return (0);
+
+ /* make sure we have x < y */
+ if (x > y) {
+ uint64_t t;
+
+ s = 1;
+ t = x; x = y; y = t;
+ }
+ *sgn = s;
+ return (y - x);
+}
+
+/*
+ * compare ip addresses ip1 and ip2 (as unsigned integers)
+ * return: -1: ip1 < ip2, 0: ip1 == ip2, 1: ip1 > ip2
+ * input addresses are assumed to be in network byte order
+ * diff contains the difference between the two with the same
+ * sign as the comparison result;
+ * NOTE: since ipv6 address (difference)s can be more than a 64bit
+ * value can express, the difference is capped at +/- INT64_MAX
+ */
+static int
+i_cmp_addr_impl(void *ip1, void *ip2, ip_addr_type_t atype, int64_t *diff)
+{
+ struct in6_addr *a6_1, *a6_2;
+ uint32_t i1, i2;
+ uint32_t l1, l2;
+ int af, sgn;
+ int64_t d;
+
+ if (atype == internal) {
+ af = GET_AF((struct in6_addr *)ip1);
+ if (af == AF_INET) {
+ IN6_V4MAPPED_TO_IPADDR((struct in6_addr *)ip1, i1);
+ IN6_V4MAPPED_TO_IPADDR((struct in6_addr *)ip2, i2);
+
+ l1 = ntohl(i1);
+ l2 = ntohl(i2);
+ } else {
+ a6_1 = (struct in6_addr *)ip1;
+ a6_2 = (struct in6_addr *)ip2;
+ }
+ } else {
+ af = ((ilb_ip_addr_t *)ip1)->ia_af;
+ if (af == AF_INET) {
+ struct in_addr *a1, *a2;
+
+ a1 = &((ilb_ip_addr_t *)ip1)->ia_v4;
+ a2 = &((ilb_ip_addr_t *)ip2)->ia_v4;
+
+ l1 = ntohl((uint32_t)a1->s_addr);
+ l2 = ntohl((uint32_t)a2->s_addr);
+ } else {
+ a6_1 = &((ilb_ip_addr_t *)ip1)->ia_v6;
+ a6_2 = &((ilb_ip_addr_t *)ip2)->ia_v6;
+ }
+ }
+
+ if (af == AF_INET) {
+ d = l1 - l2;
+ sgn = sign32((int32_t)d);
+ } else {
+ /*
+ * we're facing the dilemma that 128-bit ipv6 addresses are
+ * larger than the largest integer type - int64_t.
+ * we handle this thus:
+ * 1. seperate high-order and low-order bits (64 each) into
+ * *h and *l variables (unsigned).
+ * 2. calculate difference for *h and *l:
+ * low: unsigned
+ * high: signed
+ * 3. if high-order diff == 0, we can take low-order
+ * diff, if necessary cap it, convert it to signed
+ * and be done
+ * 4. if high-order and low-order signs are the same, the low-
+ * order bits won't significantly impact high-order
+ * difference, so we know that we've overflowed an int64_t;
+ * if high-order diff is > 1, any low-order difference won't
+ * change the overflow.
+ * 5. (dh == 1 and l_sign <= 0) or (dh == -1 and l_sign > 0),
+ * ie, dh == +/- 2^64
+ * 5a. if dl < INT64_MAX, the result is still > INT64_MAX, so
+ * we cap again.
+ * 5b. dl >= INT64_MAX
+ * we need to express (for dh == 1):
+ * (2^64) + x (where x < 0).
+ * Since the largest number we have is
+ * 2^64 - 1 == UINT64_MAX
+ * we use
+ * (2^64 - 1) + x + 1
+ *
+ * for dh == -1, all we have is
+ * -(2^63 - 1), so to express
+ * -(2^64) + x,
+ * we first do (dl - (2^63-1)) (which is then also < 2^63),
+ * si we can then add that to -(2^63 - 1);
+ */
+ uint64_t i1h, i1l;
+ uint64_t i2h, i2l;
+ uint64_t dl;
+ int64_t dh;
+ int l_sign;
+
+ /* 1. */
+ i1h = INV6_N2H_MSB64(a6_1);
+ i1l = INV6_N2H_LSB64(a6_1);
+ i2h = INV6_N2H_MSB64(a6_2);
+ i2l = INV6_N2H_LSB64(a6_2);
+
+ /* 2. */
+ dh = signed_diff64(i1h, i2h);
+ dl = unsigned_diff64(i1l, i2l, &l_sign);
+
+ /* 3. */
+ if (dh == 0) {
+ if (dl > INT64_MAX)
+ dl = INT64_MAX;
+
+ d = dl * l_sign;
+ /* 4, */
+ } else if (l_sign == sign64(dh) || abs(dh) > 1) {
+ if (dh > 0)
+ d = INT64_MAX;
+ else
+ d = -INT64_MAX;
+ /* 5. */
+ } else {
+ if (dl < INT64_MAX) {
+ d = INT64_MAX;
+ } else {
+ if (dh == 1)
+ d = UINT64_MAX - dl + 1;
+ else
+ d = -INT64_MAX - (dl - INT64_MAX) - 1;
+ }
+ }
+ sgn = sign64(d);
+ }
+ if (diff != NULL)
+ *diff = d;
+ if (d == 0)
+ return (0);
+ return (sgn);
+}
+
+int
+ilb_cmp_in6_addr(struct in6_addr *ip1, struct in6_addr *ip2, int64_t *diff)
+{
+ int res;
+
+ res = i_cmp_addr_impl(ip1, ip2, internal, diff);
+ return (res);
+}
+
+int
+ilb_cmp_ipaddr(ilb_ip_addr_t *ip1, ilb_ip_addr_t *ip2, int64_t *diff)
+{
+ int res;
+
+ res = i_cmp_addr_impl(ip1, ip2, external, diff);
+ return (res);
+}
+
+/*
+ * Error strings for error values returned by libilb functions
+ */
+const char *
+ilb_errstr(ilb_status_t rc)
+{
+ switch (rc) {
+ case ILB_STATUS_OK:
+ return (dgettext(TEXT_DOMAIN, "no error"));
+ case ILB_STATUS_INTERNAL:
+ return (dgettext(TEXT_DOMAIN, "error internal to the library"));
+ case ILB_STATUS_EINVAL:
+ return (dgettext(TEXT_DOMAIN, "invalid argument(s) - see"
+ " man page"));
+ case ILB_STATUS_ENOMEM:
+ return (dgettext(TEXT_DOMAIN, "not enough memory"
+ " for operation"));
+ case ILB_STATUS_ENOENT:
+ return (dgettext(TEXT_DOMAIN, "no such/no more element(s)"));
+ case ILB_STATUS_SOCKET:
+ return (dgettext(TEXT_DOMAIN, "socket() failed"));
+ case ILB_STATUS_READ:
+ return (dgettext(TEXT_DOMAIN, "read() failed"));
+ case ILB_STATUS_WRITE:
+ return (dgettext(TEXT_DOMAIN, "fflush() or send() failed"));
+ case ILB_STATUS_TIMER:
+ return (dgettext(TEXT_DOMAIN, "health check timer"
+ " create/setup error"));
+ case ILB_STATUS_INUSE:
+ return (dgettext(TEXT_DOMAIN, "object is in use,"
+ " cannot destroy"));
+ case ILB_STATUS_EEXIST:
+ return (dgettext(TEXT_DOMAIN, "object already exists"));
+ case ILB_STATUS_PERMIT:
+ return (dgettext(TEXT_DOMAIN, "no scf permit"));
+ case ILB_STATUS_CALLBACK:
+ return (dgettext(TEXT_DOMAIN, "scf callback error"));
+ case ILB_STATUS_INPROGRESS:
+ return (dgettext(TEXT_DOMAIN, "operation is progress"));
+ case ILB_STATUS_SEND:
+ return (dgettext(TEXT_DOMAIN, "send() failed"));
+ case ILB_STATUS_ENOHCINFO:
+ return (dgettext(TEXT_DOMAIN, "missing healthcheck info"));
+ case ILB_STATUS_INVAL_HCTESTTYPE:
+ return (dgettext(TEXT_DOMAIN, "invalid health check"
+ " test type"));
+ case ILB_STATUS_INVAL_CMD:
+ return (dgettext(TEXT_DOMAIN, "invalid command"));
+ case ILB_STATUS_DUP_RULE:
+ return (dgettext(TEXT_DOMAIN, "specified rule name already"
+ " exists"));
+ case ILB_STATUS_ENORULE:
+ return (dgettext(TEXT_DOMAIN, "specified rule does not exist"));
+ case ILB_STATUS_MISMATCHSG:
+ return (dgettext(TEXT_DOMAIN, "address family mismatch with"
+ " servergroup"));
+ case ILB_STATUS_MISMATCHH:
+ return (dgettext(TEXT_DOMAIN, "address family mismatch"
+ " with previous hosts in servergroup or with rule"));
+ case ILB_STATUS_SGUNAVAIL:
+ return (dgettext(TEXT_DOMAIN, "cannot find specified"
+ " server group"));
+ case ILB_STATUS_SGINUSE:
+ return (dgettext(TEXT_DOMAIN, "cannot remove server"
+ " group - its in use with other active rules"));
+ case ILB_STATUS_SGEXISTS:
+ return (dgettext(TEXT_DOMAIN, "servergroup already exists"));
+ case ILB_STATUS_SGFULL:
+ return (dgettext(TEXT_DOMAIN, "servergroup is full - cannot"
+ " add any more servers to this servergroup"));
+ case ILB_STATUS_SGEMPTY:
+ return (dgettext(TEXT_DOMAIN, "servergroup does not contain"
+ " any servers"));
+ case ILB_STATUS_NAMETOOLONG:
+ return (dgettext(TEXT_DOMAIN, "servergroup name can"
+ " only contain a maximum of 14 characters"));
+ case ILB_STATUS_CFGAUTH:
+ return (dgettext(TEXT_DOMAIN, "user is not authorized to"
+ " execute command"));
+ case ILB_STATUS_CFGUPDATE:
+ return (dgettext(TEXT_DOMAIN, "a failure occurred while trying"
+ " to update persistent config. Panic?"));
+ case ILB_STATUS_BADSG:
+ return (dgettext(TEXT_DOMAIN, "the rule's port range"
+ " does not match that of the servers' in associated"
+ " servergroup"));
+ case ILB_STATUS_INVAL_SRVR:
+ return (dgettext(TEXT_DOMAIN, "server cannot be added to the"
+ " servergroup, as the servergroup is associated to rule(s)"
+ " with port/port range that is incompatible"
+ "with the server's port"));
+ case ILB_STATUS_INVAL_ENBSRVR:
+ return (dgettext(TEXT_DOMAIN, "server cannot be enabled"
+ " because it's not associated with any rule"));
+ case ILB_STATUS_BADPORT:
+ return (dgettext(TEXT_DOMAIN, "the rule's port value does"
+ " not match that of the servers' in"
+ " associated servergroup"));
+ case ILB_STATUS_SRVUNAVAIL:
+ return (dgettext(TEXT_DOMAIN, "cannot find specified server"));
+ case ILB_STATUS_RULE_NO_HC:
+ return (dgettext(TEXT_DOMAIN, "rule does not have health "
+ "check enabled"));
+ case ILB_STATUS_RULE_HC_MISMATCH:
+ return (dgettext(TEXT_DOMAIN, "protocol used in rule and "
+ "health check does not match"));
+ case ILB_STATUS_HANDLE_CLOSING:
+ return (dgettext(TEXT_DOMAIN, "handle is being closed"));
+
+ default:
+ return (dgettext(TEXT_DOMAIN, "unknown error"));
+ }
+}
+
+/* Allocate space for a specified request to be sent to ilbd. */
+ilb_comm_t *
+i_ilb_alloc_req(ilbd_cmd_t cmd, size_t *ic_sz)
+{
+ ilb_comm_t *ic;
+ size_t sz;
+
+ sz = sizeof (ilb_comm_t);
+
+ switch (cmd) {
+ case ILBD_CREATE_RULE:
+ sz += sizeof (ilb_rule_info_t);
+ break;
+
+ case ILBD_RETRIEVE_RULE:
+ case ILBD_DESTROY_RULE:
+ case ILBD_ENABLE_RULE:
+ case ILBD_DISABLE_RULE:
+ case ILBD_RETRIEVE_SG_HOSTS:
+ case ILBD_DESTROY_SERVERGROUP:
+ case ILBD_CREATE_SERVERGROUP:
+ case ILBD_DESTROY_HC:
+ case ILBD_GET_HC_INFO:
+ case ILBD_GET_HC_SRVS:
+ sz += sizeof (ilbd_name_t);
+ break;
+
+ case ILBD_ENABLE_SERVER:
+ case ILBD_DISABLE_SERVER:
+ case ILBD_ADD_SERVER_TO_GROUP:
+ case ILBD_REM_SERVER_FROM_GROUP:
+ case ILBD_SRV_ADDR2ID:
+ case ILBD_SRV_ID2ADDR:
+ sz += sizeof (ilb_sg_info_t) + sizeof (ilb_sg_srv_t);
+ break;
+
+ case ILBD_CREATE_HC:
+ sz += sizeof (ilb_hc_info_t);
+ break;
+
+ default:
+ /* Should not reach here. */
+ assert(0);
+ break;
+ }
+
+ if ((ic = calloc(1, sz)) == NULL)
+ return (NULL);
+
+ *ic_sz = sz;
+ ic->ic_cmd = cmd;
+ ic->ic_flags = 0;
+ return (ic);
+}
diff --git a/usr/src/lib/libilb/common/libilb.h b/usr/src/lib/libilb/common/libilb.h
new file mode 100644
index 0000000000..1fc4d1954d
--- /dev/null
+++ b/usr/src/lib/libilb/common/libilb.h
@@ -0,0 +1,398 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBILB_H
+#define _LIBILB_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <net/if.h>
+
+/* make sure these values stay in sync with definitions in ilb.h! */
+#define ILB_FLAGS_RULE_ENABLED 0x01
+#define ILB_FLAGS_RULE_STICKY 0x02
+#define ILB_FLAGS_RULE_ALLRULES 0x04
+#define ILB_FLAGS_RESERVED 0x08 /* in use by kernel, don't overlay */
+
+/*
+ * information whether we're interested in names or numerical information
+ */
+#define ILB_FLAGS_SRV_HOSTNAME 0x01 /* a servers hostname was given */
+#define ILB_FLAGS_SRV_PORTNAME 0x02 /* a port was spec'd by name */
+
+/*
+ * server status information
+ */
+#define ILB_FLAGS_SRV_ENABLED 0x10
+
+/*
+ * macros to determine, and for some cases, set status of server
+ */
+#define ILB_IS_SRV_ENABLED(f) \
+ ((f & ILB_FLAGS_SRV_ENABLED) == ILB_FLAGS_SRV_ENABLED)
+#define ILB_IS_SRV_DISABLED(f) ((f & ILB_FLAGS_SRV_ENABLED) == 0)
+
+#define ILB_SET_ENABLED(f) (f |= ILB_FLAGS_SRV_ENABLED)
+#define ILB_SET_DISABLED(f) (f &= ~ILB_FLAGS_SRV_ENABLED)
+
+#define MAX_IP_SPREAD 0xff /* largest ip addr. range */
+
+#define ILB_HC_STR_UDP "udp"
+#define ILB_HC_STR_TCP "tcp"
+#define ILB_HC_STR_PING "ping"
+
+#define ILB_NAMESZ 20 /* keep in sync with kernel definition */
+#define ILB_SGNAME_SZ (ILB_NAMESZ - 5) /* 3 numeric digits, "." and "_" */
+
+#define ILB_SRVID_PREFIX '_' /* a valid serverID starts with this */
+
+/* producers of these statuses are libilb and ilbd functions */
+typedef enum {
+ ILB_STATUS_OK = 0,
+ ILB_STATUS_INTERNAL, /* an error internal to the library */
+ ILB_STATUS_EINVAL, /* invalid argument(s) */
+ ILB_STATUS_ENOMEM, /* not enough memory for operation */
+ ILB_STATUS_ENOENT, /* no such/no more element(s) */
+ ILB_STATUS_SOCKET, /* socket related failure */
+ ILB_STATUS_READ, /* read related failure */
+ ILB_STATUS_WRITE, /* write related failure */
+ ILB_STATUS_TIMER, /* healthcheck timer error */
+ ILB_STATUS_INUSE, /* item in use, cannot delete */
+ ILB_STATUS_EEXIST, /* scf item exist */
+ ILB_STATUS_PERMIT, /* no scf permit */
+ ILB_STATUS_CALLBACK, /* scf callback error */
+ ILB_STATUS_EWOULDBLOCK, /* operation is blocked - no error string */
+ ILB_STATUS_INPROGRESS, /* operation already in progress */
+ ILB_STATUS_SEND, /* send related failure */
+ ILB_STATUS_GENERIC, /* generic failure - no error string */
+ ILB_STATUS_ENOHCINFO, /* missing healthcheck info */
+ ILB_STATUS_INVAL_HCTESTTYPE, /* invalid health check */
+ ILB_STATUS_INVAL_CMD, /* unknown command */
+ ILB_STATUS_DUP_RULE, /* rule name exists */
+ ILB_STATUS_ENORULE, /* rule does not exist */
+ ILB_STATUS_MISMATCHSG, /* addr family mismatch with sgroup */
+ ILB_STATUS_MISMATCHH, /* addr family mismatch with hosts/rule */
+ ILB_STATUS_SGUNAVAIL, /* cannot find sgroup in sggroup list */
+ ILB_STATUS_SGINUSE, /* server is un use, cannot remove */
+ ILB_STATUS_SGEXISTS, /* server exists */
+ ILB_STATUS_SGFULL, /* cannot add any more servers */
+ ILB_STATUS_SGEMPTY, /* sgroup is empty */
+ ILB_STATUS_NAMETOOLONG, /* a name is longer than allowed */
+ ILB_STATUS_CFGAUTH, /* config authoriz denied -no error string */
+ ILB_STATUS_CFGUPDATE, /* failed to update config! */
+ ILB_STATUS_BADSG, /* rules port range size does not match */
+ /* that of the servers */
+ ILB_STATUS_INVAL_SRVR, /* server port is incompatible with */
+ /* rule port */
+ ILB_STATUS_INVAL_ENBSRVR, /* server cannot be enabled since it's */
+ /* not being used by a rule */
+ ILB_STATUS_BADPORT, /* rules port value does not match */
+ /* server's */
+ ILB_STATUS_SRVUNAVAIL, /* cannot find specified server */
+ ILB_STATUS_RULE_NO_HC, /* rule does not have hc info */
+ ILB_STATUS_RULE_HC_MISMATCH, /* rule and hc object mismatch */
+ ILB_STATUS_HANDLE_CLOSING /* library handle is being closed */
+} ilb_status_t;
+
+typedef struct {
+ int32_t ia_af; /* AF_INET or AF_INET6 */
+ union {
+ struct in_addr v4; /* network byte order */
+ struct in6_addr v6; /* network byte order */
+ } _au;
+#define ia_v4 _au.v4
+#define ia_v6 _au.v6
+} ilb_ip_addr_t;
+
+/* Supported load balancing algorithm type */
+typedef enum {
+ ILB_ALG_ROUNDROBIN = 1,
+ ILB_ALG_HASH_IP,
+ ILB_ALG_HASH_IP_SPORT,
+ ILB_ALG_HASH_IP_VIP
+} ilb_algo_t;
+
+/* Supported load balancing method */
+typedef enum {
+ ILB_TOPO_DSR = 1,
+ ILB_TOPO_NAT,
+ ILB_TOPO_HALF_NAT
+} ilb_topo_t;
+
+#define ILB_INVALID_HANDLE ((void *) NULL)
+
+/*
+ * note: pointer to a non-existant struct
+ */
+typedef struct ilb_handle *ilb_handle_t;
+
+/*
+ * Health check related information
+ */
+
+/* HC state of a server */
+typedef enum {
+ ILB_HCS_UNINIT = -1, /* Uninitialized */
+ ILB_HCS_UNREACH = 0, /* Unreachable, ping fails */
+ ILB_HCS_ALIVE, /* Probe succeeds */
+ ILB_HCS_DEAD, /* Probe fails */
+ ILB_HCS_DISABLED /* Server is disabled */
+} ilb_hc_srv_status_t;
+
+/*
+ * Struct representing a server in a hc object
+ *
+ * hcs_rule_name: rule using this server
+ * hcs_ID: server ID
+ * hcs_hc_name: hc object this server is associated with
+ * hcs_IP: IP address of the server
+ * hcs_fail_cnt: number of fail hc probe
+ * hcs_status: hc status of the server
+ * hcs_rtt: (in microsec) smoothed average RTT to the server
+ * hcs_lasttime: last time hc test was done (as returned by time(2))
+ * hcs_nexttime: next time hc test will be done (as returned by (time(2))
+ */
+typedef struct {
+ char hcs_rule_name[ILB_NAMESZ];
+ char hcs_ID[ILB_NAMESZ];
+ char hcs_hc_name[ILB_NAMESZ];
+ struct in6_addr hcs_IP;
+ uint32_t hcs_fail_cnt;
+ ilb_hc_srv_status_t hcs_status;
+ uint32_t hcs_rtt;
+ time_t hcs_lasttime;
+ time_t hcs_nexttime;
+} ilb_hc_srv_t;
+
+/* Probe flags to be used in r_hcpflag in struct rule data. */
+typedef enum {
+ ILB_HCI_PROBE_ANY = 0, /* Probe any port in the server port range */
+ ILB_HCI_PROBE_FIX /* Probe a fixed port */
+} ilb_hcp_flags_t;
+
+/*
+ * Struct representing a hc object
+ *
+ * hci_name: name of the hc object
+ * hci_test: hc test to be done, TCP, UDP, or user supplied path name
+ * hci_timeout: (in sec) test time out
+ * hci_interval: (in sec) test execution interval
+ * hci_def_ping: true if default ping is done; false otherwise
+ */
+typedef struct {
+ char hci_name[ILB_NAMESZ];
+ char hci_test[MAXPATHLEN];
+ int32_t hci_timeout;
+ int32_t hci_count;
+ int32_t hci_interval;
+ boolean_t hci_def_ping;
+} ilb_hc_info_t;
+
+typedef struct rule_data {
+ char r_name[ILB_NAMESZ]; /* name of this rule */
+ int32_t r_flags; /* opt: ILB_FLAGS_RULE_ENABLED etc. */
+ ilb_ip_addr_t r_vip; /* vip, required for rule creation */
+ uint16_t r_proto; /* protocol (tcp, udp) */
+ in_port_t r_minport; /* port this rule refers to */
+ in_port_t r_maxport; /* if != 0, defines port range */
+ ilb_algo_t r_algo; /* round-robin, hash-ip, etc. */
+ ilb_topo_t r_topo; /* dsr, NAT, etc */
+ ilb_ip_addr_t r_nat_src_start; /* required for NAT */
+ ilb_ip_addr_t r_nat_src_end; /* required for NAT */
+ ilb_ip_addr_t r_stickymask; /* netmask for persistence */
+ uint32_t r_conndrain; /* opt: time for conn. draining (s) */
+ uint32_t r_nat_timeout; /* opt: timeout for nat connections */
+ uint32_t r_sticky_timeout; /* opt: timeout for persistence */
+ ilb_hcp_flags_t r_hcpflag; /* HC port flag */
+ in_port_t r_hcport; /* opt with HC */
+ char r_sgname[ILB_SGNAME_SZ]; /* this rule's server grp. */
+ char r_hcname[ILB_NAMESZ]; /* HC name: optional */
+} ilb_rule_data_t;
+
+/* not all fields are valid in all calls where this is used */
+typedef struct server_data {
+ ilb_ip_addr_t sd_addr; /* a server's ip address */
+ in_port_t sd_minport; /* port information */
+ in_port_t sd_maxport; /* ... if != 0, defines a port range */
+ uint32_t sd_flags; /* enabled, dis- */
+ char sd_srvID[ILB_NAMESZ]; /* "name" for server */
+ /* assigned by system, not user */
+} ilb_server_data_t;
+
+/*
+ * Struct to represent a server group.
+ *
+ * sgd_name: server group name
+ * sgd_flags: flags
+ * sgd_srvcount: number of servers in the group (not used in sever group
+ * creation); filled in when used by call back function for
+ * ilb_walk_servergroups().
+ */
+typedef struct sg_data {
+ char sgd_name[ILB_SGNAME_SZ];
+ int32_t sgd_flags;
+ int32_t sgd_srvcount;
+} ilb_sg_data_t;
+
+/*
+ * Struct to represent a NAT entry in kernel.
+ *
+ * nat_proto: transport protocol used in this NAT entry
+ *
+ * nat_out_global: IP address of client's request
+ * nat_out_global_port: port number of client's request
+ * nat_in_global: VIP of a rule for the NAT entry
+ * nat_in_global_port: port of a rule for the NAT entry
+ *
+ * nat_out_local: half NAT: IP address of client's request
+ * full NAT: NAT'ed IP addres of client' request
+ * nat_out_local_port: half NAT: port number of client's request
+ * full NAT: NAT'ed port number of client's request
+ * nat_in_local: IP address of back end server handling this request
+ * nat_in_local_port: port number in back end server handling thi request
+ *
+ * (*) IPv4 address is represented as IPv4 mapped IPv6 address.
+ */
+typedef struct {
+ uint32_t nat_proto;
+
+ in6_addr_t nat_in_local;
+ in6_addr_t nat_in_global;
+ in6_addr_t nat_out_local;
+ in6_addr_t nat_out_global;
+
+ in_port_t nat_in_local_port;
+ in_port_t nat_in_global_port;
+ in_port_t nat_out_local_port;
+ in_port_t nat_out_global_port;
+} ilb_nat_info_t;
+
+/*
+ * Struct to represet a persistent entry in kernel.
+ *
+ * rule_name: the name of rule for a persistent entry
+ * req_addr: the client's IP address (*)
+ * srv_addr: the server's IP address (*) handling the client's request
+ *
+ * (*) IPv4 address is represented as IPv4 mapped IPv6 address.
+ */
+typedef struct {
+ char persist_rule_name[ILB_NAMESZ];
+ in6_addr_t persist_req_addr;
+ in6_addr_t persist_srv_addr;
+} ilb_persist_info_t;
+
+/*
+ * Function prototype of the call back function of those walker functions.
+ *
+ * Note: the storage of the data item parameter (ilb_sg_data_t/
+ * ilb_server_data_/ilb_rule_data_t/ilb_hc_info_t/ilb_hc_srv_t) will be
+ * freed after calling the call back function. If the call back function
+ * needs to keep a copy of the data, it must copy the data content.
+ */
+typedef ilb_status_t (* sg_walkerfunc_t)(ilb_handle_t, ilb_sg_data_t *,
+ void *);
+typedef ilb_status_t (* srv_walkerfunc_t)(ilb_handle_t, ilb_server_data_t *,
+ const char *, void *);
+typedef ilb_status_t (* rule_walkerfunc_t)(ilb_handle_t, ilb_rule_data_t *,
+ void *);
+typedef ilb_status_t (* hc_walkerfunc_t)(ilb_handle_t, ilb_hc_info_t *,
+ void *);
+typedef ilb_status_t (* hc_srvwalkerfunc_t)(ilb_handle_t, ilb_hc_srv_t *,
+ void *);
+
+/*
+ * ilb_open creates a session handle that every caller into
+ * libilb needs to use
+ */
+ilb_status_t ilb_open(ilb_handle_t *);
+
+/*
+ * relinquish the session handle
+ */
+ilb_status_t ilb_close(ilb_handle_t);
+
+/* support and general functions */
+ilb_status_t ilb_reset_config(ilb_handle_t);
+const char *ilb_errstr(ilb_status_t);
+
+/* rule-related functions */
+ilb_status_t ilb_create_rule(ilb_handle_t, const ilb_rule_data_t *);
+ilb_status_t ilb_destroy_rule(ilb_handle_t, const char *);
+ilb_status_t ilb_disable_rule(ilb_handle_t, const char *);
+ilb_status_t ilb_enable_rule(ilb_handle_t, const char *);
+ilb_status_t ilb_walk_rules(ilb_handle_t, rule_walkerfunc_t, const char *,
+ void *);
+
+/* servergroup functionality */
+ilb_status_t ilb_create_servergroup(ilb_handle_t, const char *);
+ilb_status_t ilb_destroy_servergroup(ilb_handle_t, const char *);
+ilb_status_t ilb_add_server_to_group(ilb_handle_t, const char *,
+ ilb_server_data_t *);
+ilb_status_t ilb_rem_server_from_group(ilb_handle_t, const char *,
+ ilb_server_data_t *);
+ilb_status_t ilb_walk_servergroups(ilb_handle_t, sg_walkerfunc_t,
+ const char *, void *);
+ilb_status_t ilb_walk_servers(ilb_handle_t, srv_walkerfunc_t,
+ const char *, void *);
+
+/* functions for individual servers */
+ilb_status_t ilb_enable_server(ilb_handle_t, ilb_server_data_t *, void *);
+ilb_status_t ilb_disable_server(ilb_handle_t, ilb_server_data_t *, void *);
+ilb_status_t ilb_srvID_to_address(ilb_handle_t, ilb_server_data_t *,
+ const char *);
+ilb_status_t ilb_address_to_srvID(ilb_handle_t, ilb_server_data_t *,
+ const char *);
+
+/* health check-related functions */
+ilb_status_t ilb_create_hc(ilb_handle_t, const ilb_hc_info_t *);
+ilb_status_t ilb_destroy_hc(ilb_handle_t, const char *);
+ilb_status_t ilb_get_hc_info(ilb_handle_t, const char *, ilb_hc_info_t *);
+ilb_status_t ilb_walk_hc(ilb_handle_t, hc_walkerfunc_t, void *);
+ilb_status_t ilb_walk_hc_srvs(ilb_handle_t, hc_srvwalkerfunc_t,
+ const char *, void *);
+
+/* To show NAT table entries of ILB */
+ilb_status_t ilb_show_nat(ilb_handle_t, ilb_nat_info_t[], size_t *,
+ boolean_t *);
+
+/* To show persistent table entries of ILB */
+ilb_status_t ilb_show_persist(ilb_handle_t, ilb_persist_info_t[], size_t *,
+ boolean_t *);
+
+/* PRIVATE */
+int ilb_cmp_ipaddr(ilb_ip_addr_t *, ilb_ip_addr_t *, int64_t *);
+int ilb_cmp_in6_addr(struct in6_addr *, struct in6_addr *, int64_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBILB_H */
diff --git a/usr/src/lib/libilb/common/libilb_impl.h b/usr/src/lib/libilb/common/libilb_impl.h
new file mode 100644
index 0000000000..7636c37c56
--- /dev/null
+++ b/usr/src/lib/libilb/common/libilb_impl.h
@@ -0,0 +1,253 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBILB_IMPL_H
+#define _LIBILB_IMPL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/note.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <net/if.h>
+#include <inet/ilb.h>
+#include <libilb.h>
+#include <thread.h>
+#include <synch.h>
+
+#if !defined max
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#endif
+
+/* The UNIX domain socket path to talk to ilbd. */
+#define SOCKET_PATH "/var/run/daemon/ilb_sock"
+
+/* The max message size for communicating with ilbd */
+#define ILBD_MSG_SIZE 102400
+
+/*
+ * moral equivalent of ntohl for IPv6 addresses, MSB and LSB (64 bit each),
+ * assign to uint64_t variables
+ */
+#define INV6_N2H_MSB64(addr) \
+ (((uint64_t)ntohl((addr)->_S6_un._S6_u32[0]) << 32) + \
+ (ntohl((addr)->_S6_un._S6_u32[1])))
+
+#define INV6_N2H_LSB64(addr) \
+ (((uint64_t)ntohl((addr)->_S6_un._S6_u32[2]) << 32) + \
+ (ntohl((addr)->_S6_un._S6_u32[3])))
+
+/*
+ * moral equiv. of htonl of MSB and LSB 64-bit portions to an IPv6 address
+ */
+#define INV6_H2N_MSB64(addr, msb) \
+ (addr)->_S6_un._S6_u32[0] = htonl((msb) >> 32); \
+ (addr)->_S6_un._S6_u32[1] = htonl((msb) & 0xffffffff)
+
+#define INV6_H2N_LSB64(addr, lsb) \
+ (addr)->_S6_un._S6_u32[2] = htonl((lsb) >> 32); \
+ (addr)->_S6_un._S6_u32[3] = htonl((lsb) & 0xffffffff)
+
+#define IP_COPY_CLI_2_IMPL(_e, _i) \
+ bzero(_i, sizeof (*(_i))); \
+ if ((_e)->ia_af == AF_INET6) \
+ (void) memcpy((_i), &(_e)->ia_v6, sizeof (*(_i))); \
+ else \
+ IN6_INADDR_TO_V4MAPPED(&(_e)->ia_v4, (_i))
+
+#define IP_COPY_IMPL_2_CLI(_i, _e) \
+ do { \
+ bzero(_e, sizeof (*(_e))); \
+ if (IN6_IS_ADDR_V4MAPPED(_i)) { \
+ (_e)->ia_af = AF_INET; \
+ IN6_V4MAPPED_TO_INADDR((_i), &(_e)->ia_v4); \
+ } else { \
+ (_e)->ia_af = AF_INET6; \
+ (void) memcpy(&(_e)->ia_v6, (_i), \
+ sizeof ((_e)->ia_v6)); \
+ } \
+ _NOTE(CONSTCOND) \
+ } while (0)
+
+#define GET_AF(_a) IN6_IS_ADDR_V4MAPPED(_a)?AF_INET:AF_INET6
+#define IS_AF_VALID(_af) (_af == AF_INET || _af == AF_INET6)
+
+typedef enum {
+ ILBD_BAD_CMD = 0,
+ /* servergroup commands */
+ ILBD_CREATE_SERVERGROUP,
+ ILBD_ADD_SERVER_TO_GROUP,
+ ILBD_REM_SERVER_FROM_GROUP,
+ ILBD_ENABLE_SERVER,
+ ILBD_DISABLE_SERVER,
+ ILBD_DESTROY_SERVERGROUP,
+ ILBD_RETRIEVE_SG_NAMES, /* names of all SGs registered */
+ ILBD_RETRIEVE_SG_HOSTS, /* all hosts for a given SG (hndl) */
+ ILBD_SRV_ADDR2ID, /* fill in serverID for given address */
+ ILBD_SRV_ID2ADDR, /* fill in address from given serverID */
+ /* rule commands */
+ ILBD_CREATE_RULE,
+ ILBD_DESTROY_RULE,
+ ILBD_ENABLE_RULE,
+ ILBD_DISABLE_RULE,
+ ILBD_RETRIEVE_RULE_NAMES,
+ ILBD_RETRIEVE_RULE,
+
+ ILBD_CREATE_HC,
+ ILBD_DESTROY_HC,
+ ILBD_GET_HC_INFO,
+ ILBD_GET_HC_SRVS,
+ ILBD_GET_HC_RULES,
+ ILBD_RETRIEVE_HC_NAMES,
+
+ ILBD_SHOW_NAT, /* list the NAT table */
+ ILBD_SHOW_PERSIST, /* list the sticky table */
+
+ ILBD_CMD_OK, /* Requested operation succeeds. */
+ ILBD_CMD_ERROR /* Rquested operation fails. */
+} ilbd_cmd_t;
+
+typedef struct sg_srv {
+ int32_t sgs_flags; /* enabled, dis- */
+ struct in6_addr sgs_addr;
+ int32_t sgs_minport;
+ int32_t sgs_maxport;
+ int32_t sgs_id; /* numerical part of srvID */
+ char sgs_srvID[ILB_NAMESZ]; /* "name" given to server */
+} ilb_sg_srv_t;
+
+typedef struct sg_info {
+ int32_t sg_flags;
+ char sg_name[ILB_SGNAME_SZ];
+ int32_t sg_srvcount;
+ ilb_sg_srv_t sg_servers[];
+} ilb_sg_info_t;
+
+typedef char ilbd_name_t[ILB_NAMESZ];
+
+typedef struct ilbd_namelist {
+ int32_t ilbl_flags;
+ int32_t ilbl_count;
+ ilbd_name_t ilbl_name[];
+} ilbd_namelist_t;
+
+#define ILBL_NAME_OFFSET (offsetof(ilbd_namelist_t, ilbl_name))
+
+typedef struct rule_info {
+ int32_t rl_flags;
+ char rl_name[ILB_NAMESZ];
+ struct in6_addr rl_vip;
+ uint16_t rl_proto;
+ uint16_t rl_ipversion;
+ int32_t rl_minport;
+ int32_t rl_maxport;
+ ilb_algo_t rl_algo;
+ ilb_topo_t rl_topo;
+ struct in6_addr rl_nat_src_start;
+ struct in6_addr rl_nat_src_end;
+ struct in6_addr rl_stickymask;
+ uint32_t rl_conndrain;
+ uint32_t rl_nat_timeout;
+ uint32_t rl_sticky_timeout;
+ in_port_t rl_hcport;
+ ilb_hcp_flags_t rl_hcpflag;
+ char rl_sgname[ILB_SGNAME_SZ];
+ char rl_hcname[ILB_NAMESZ];
+} ilb_rule_info_t;
+
+/*
+ * Struct to represent show NAT request and reply.
+ *
+ * sn_num: (request) indicates the number of entries wanted;
+ * (reply) the number of entries returned;
+ * sn_data: NAT/persist able entries (is uint32_t aligned).
+ */
+typedef struct {
+ uint32_t sn_num;
+ uint32_t sn_data[];
+} ilb_show_info_t;
+
+/*
+ * Struct to represent the set of servers associated with a hc object.
+ *
+ * rs_num_srvs: number of servers in this struct.
+ * rs_srvs: array of servers.
+ */
+typedef struct {
+ uint32_t rs_num_srvs;
+ ilb_hc_srv_t rs_srvs[];
+} ilb_hc_rule_srv_t;
+
+typedef struct ilb_handle_impl {
+ mutex_t h_lock;
+ cond_t h_cv;
+ boolean_t h_busy;
+ boolean_t h_valid;
+ boolean_t h_closing;
+ uint32_t h_waiter;
+ int h_socket;
+ ilb_status_t h_error; /* ... that caused invalidation */
+} ilb_handle_impl_t;
+
+/*
+ * Communication flags used in ilb_comm_t.
+ *
+ * ILB_COMM_END: end of communication
+ */
+#define ILB_COMM_END 0x1
+
+/*
+ * The message structure used to communicate with ilbd.
+ *
+ * ic_cmd: the message type.
+ * ic_flags: communication flags
+ * ic_data: message data (is uint32_t aligned).
+ */
+typedef struct {
+ ilbd_cmd_t ic_cmd;
+ int32_t ic_flags;
+ uint32_t ic_data[];
+} ilb_comm_t;
+
+ilb_status_t i_check_ip_range(ilb_ip_addr_t *, ilb_ip_addr_t *);
+ilb_status_t i_ilb_do_comm(ilb_handle_t, ilb_comm_t *, size_t, ilb_comm_t *,
+ size_t *);
+void i_ilb_close_comm(ilb_handle_t);
+struct in6_addr i_next_ip_addr(struct in6_addr *, int);
+
+ilb_status_t i_ilb_retrieve_rule_names(ilb_handle_t, ilb_comm_t **,
+ size_t *);
+ilb_comm_t *i_ilb_alloc_req(ilbd_cmd_t, size_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBILB_IMPL_H */
diff --git a/usr/src/lib/libilb/common/llib-lilb b/usr/src/lib/libilb/common/llib-lilb
new file mode 100644
index 0000000000..a57ef8dd3e
--- /dev/null
+++ b/usr/src/lib/libilb/common/llib-lilb
@@ -0,0 +1,29 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*LINTLIBRARY*/
+/*PROTOLIB1*/
+
+#include <libilb.h>
diff --git a/usr/src/lib/libilb/common/mapfile-vers b/usr/src/lib/libilb/common/mapfile-vers
new file mode 100644
index 0000000000..353a0a6958
--- /dev/null
+++ b/usr/src/lib/libilb/common/mapfile-vers
@@ -0,0 +1,78 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+
+SUNW_1.1 {
+ global:
+ ilb_add_server_to_group;
+ ilb_address_to_srvID;
+ ilb_close;
+ ilb_create_hc;
+ ilb_create_rule;
+ ilb_create_servergroup;
+ ilb_destroy_hc;
+ ilb_destroy_rule;
+ ilb_destroy_servergroup;
+ ilb_disable_rule;
+ ilb_disable_server;
+ ilb_enable_rule;
+ ilb_enable_server;
+ ilb_errstr;
+ ilb_get_hc_info;
+ ilb_open;
+ ilb_rem_server_from_group;
+ ilb_reset_config;
+ ilb_srvID_to_address;
+ ilb_show_nat;
+ ilb_show_persist;
+ ilb_walk_hc;
+ ilb_walk_hc_srvs;
+ ilb_walk_rules;
+ ilb_walk_servergroups;
+ ilb_walk_servers;
+};
+
+SUNWprivate {
+ global:
+ ilb_cmp_in6_addr;
+ ilb_cmp_ipaddr;
+ local:
+ *;
+};
+
diff --git a/usr/src/lib/libilb/i386/Makefile b/usr/src/lib/libilb/i386/Makefile
new file mode 100644
index 0000000000..c86be4377c
--- /dev/null
+++ b/usr/src/lib/libilb/i386/Makefile
@@ -0,0 +1,28 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/libilb/sparc/Makefile b/usr/src/lib/libilb/sparc/Makefile
new file mode 100644
index 0000000000..c86be4377c
--- /dev/null
+++ b/usr/src/lib/libilb/sparc/Makefile
@@ -0,0 +1,28 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/libilb/sparcv9/Makefile b/usr/src/lib/libilb/sparcv9/Makefile
new file mode 100644
index 0000000000..f1fa7d3c45
--- /dev/null
+++ b/usr/src/lib/libilb/sparcv9/Makefile
@@ -0,0 +1,34 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64)
diff --git a/usr/src/lib/libinetutil/common/libinetutil.h b/usr/src/lib/libinetutil/common/libinetutil.h
index bacf64938a..a285103af6 100644
--- a/usr/src/lib/libinetutil/common/libinetutil.h
+++ b/usr/src/lib/libinetutil/common/libinetutil.h
@@ -158,7 +158,7 @@ typedef struct iu_timer_queue iu_tq_t;
typedef int iu_timer_id_t;
-#define IU_TIMER_ID_MAX 1024 /* max number of concurrent timers */
+#define IU_TIMER_ID_MAX 4096 /* max number of concurrent timers */
/*
* a iu_tq_callback_t is a function that is called back in response to a
diff --git a/usr/src/lib/libinetutil/common/ofmt.c b/usr/src/lib/libinetutil/common/ofmt.c
index 2bfc4aafa4..c0fce1839e 100644
--- a/usr/src/lib/libinetutil/common/ofmt.c
+++ b/usr/src/lib/libinetutil/common/ofmt.c
@@ -66,6 +66,8 @@ typedef struct ofmt_state_s {
boolean_t os_wrap;
int os_nbad;
char **os_badfields;
+ boolean_t os_multiline;
+ int os_maxnamelen; /* longest name (f. multiline) */
} ofmt_state_t;
/*
* A B_TRUE return value from the callback function will print out the contents
@@ -187,9 +189,12 @@ ofmt_open(const char *str, const ofmt_field_t *template, uint_t flags,
ofmt_status_t err = OFMT_SUCCESS;
boolean_t parsable = ((flags & OFMT_PARSABLE) != 0);
boolean_t wrap = ((flags & OFMT_WRAP) != 0);
+ boolean_t multiline = (flags & OFMT_MULTILINE);
*ofmt = NULL;
if (parsable) {
+ if (multiline)
+ return (OFMT_EPARSEMULTI);
/*
* For parsable output mode, the caller always needs
* to specify precisely which fields are to be selected,
@@ -229,6 +234,7 @@ ofmt_open(const char *str, const ofmt_field_t *template, uint_t flags,
os->os_parsable = parsable;
os->os_wrap = wrap;
+ os->os_multiline = multiline;
of = os->os_fields;
of_index = 0;
/*
@@ -260,6 +266,11 @@ ofmt_open(const char *str, const ofmt_field_t *template, uint_t flags,
of[of_index].of_name = strdup(template[j].of_name);
if (of[of_index].of_name == NULL)
goto nomem;
+ if (multiline) {
+ int n = strlen(of[of_index].of_name);
+
+ os->os_maxnamelen = MAX(n, os->os_maxnamelen);
+ }
of[of_index].of_width = template[j].of_width;
of[of_index].of_id = template[j].of_id;
of[of_index].of_cb = template[j].of_cb;
@@ -310,14 +321,13 @@ ofmt_print_field(ofmt_state_t *os, ofmt_field_t *ofp, const char *value,
uint_t width = ofp->of_width;
uint_t valwidth;
uint_t compress;
- boolean_t parsable = os->os_parsable;
char c;
/*
* Parsable fields are separated by ':'. If such a field contains
* a ':' or '\', this character is prefixed by a '\'.
*/
- if (parsable) {
+ if (os->os_parsable) {
if (os->os_nfields == 1) {
(void) printf("%s", value);
return;
@@ -329,7 +339,13 @@ ofmt_print_field(ofmt_state_t *os, ofmt_field_t *ofp, const char *value,
}
if (!os->os_lastfield)
(void) putchar(':');
- return;
+ } else if (os->os_multiline) {
+ if (value[0] == '\0')
+ value = OFMT_VAL_UNDEF;
+ (void) printf("%*.*s: %s", os->os_maxnamelen,
+ os->os_maxnamelen, ofp->of_name, value);
+ if (!os->os_lastfield)
+ (void) putchar('\n');
} else {
if (os->os_lastfield) {
(void) printf("%s", value);
@@ -408,11 +424,15 @@ ofmt_print(ofmt_handle_t ofmt, void *arg)
return;
}
- if ((os->os_nrow++ % os->os_winsize.ws_row) == 0 && !os->os_parsable) {
+ if ((os->os_nrow++ % os->os_winsize.ws_row) == 0 && !os->os_parsable &&
+ !os->os_multiline) {
ofmt_print_header(os);
os->os_nrow++;
}
+ if (os->os_multiline && os->os_nrow > 1)
+ (void) putchar('\n');
+
of = os->os_fields;
escsep = (os->os_nfields > 1);
more_rows = B_FALSE;
@@ -549,6 +569,9 @@ ofmt_strerror(ofmt_handle_t ofmt, ofmt_status_t err, char *buf, uint_t bufsize)
case OFMT_ENOFIELDS:
s = "no valid output fields";
break;
+ case OFMT_EPARSEMULTI:
+ s = "multiline mode incompatible with parsable mode";
+ break;
case OFMT_EPARSEALL:
s = "output field `all' invalid in parsable mode";
break;
diff --git a/usr/src/lib/libinetutil/common/ofmt.h b/usr/src/lib/libinetutil/common/ofmt.h
index d3e4142578..81693ae325 100644
--- a/usr/src/lib/libinetutil/common/ofmt.h
+++ b/usr/src/lib/libinetutil/common/ofmt.h
@@ -35,6 +35,8 @@
* are separated by ':', with the ':' character itself escaped by a \
* (e.g., IPv6 addresses may be printed as "fe80\:\:1"); single field output
* is printed as-is.
+ * In multiline mode, every [field,value] pair is printed in a line of
+ * its own, thus: "field: value".
*
* The caller must open a handle for each set of fields to be printed by
* invoking ofmt_open(). The invocation to ofmt_open must provide the list of
@@ -57,6 +59,9 @@
* (non machine-parsable) mode, a NULL fields_str, or a value of "all" for
* fields_str, is treated as a request to print all allowable fields that fit
* other applicable constraints.
+ * To achieve multiline mode, OFMT_MULTILINE needs to be specified in oflags.
+ * Specifying both OFMT_MULTILINE and OFMT_PARSABLE will result in
+ * OFMT_EPARSEMULTI.
*
* Thus a typical invocation to open the ofmt_handle would be:
*
@@ -135,7 +140,8 @@ typedef enum {
OFMT_EPARSEALL, /* 'all' invalid in parsable mode */
OFMT_EPARSENONE, /* output fields missing in parsable mode */
OFMT_EPARSEWRAP, /* parsable mode incompatible with wrap mode */
- OFMT_ENOTEMPLATE /* no template provided for fields */
+ OFMT_ENOTEMPLATE, /* no template provided for fields */
+ OFMT_EPARSEMULTI /* parsable and multiline don't mix */
} ofmt_status_t;
/*
@@ -171,6 +177,7 @@ extern ofmt_status_t ofmt_open(const char *, const ofmt_field_t *, uint_t,
#define OFMT_PARSABLE 0x00000001 /* machine parsable mode */
#define OFMT_WRAP 0x00000002 /* wrap output if field width is exceeded */
+#define OFMT_MULTILINE 0x00000004 /* "long" output: "name: value" lines */
/*
* ofmt_close() must be called to free resources associated
diff --git a/usr/src/lib/libinetutil/common/tq.c b/usr/src/lib/libinetutil/common/tq.c
index 78505462bd..e809b3289d 100644
--- a/usr/src/lib/libinetutil/common/tq.c
+++ b/usr/src/lib/libinetutil/common/tq.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdlib.h>
#include <limits.h>
#include <sys/time.h>
@@ -342,7 +339,14 @@ iu_expire_timers(iu_tq_t *tq)
for (node = tq->iutq_head; node != NULL;
node = node->iutn_expire_next) {
- if (node->iutn_abs_timeout > current_time)
+ /*
+ * If the timeout is within 1 millisec of current time,
+ * consider it as expired already. We do this because
+ * iu_earliest_timer() only has millisec granularity.
+ * So we should also use millisec grandularity in
+ * comparing timeout values.
+ */
+ if (node->iutn_abs_timeout - current_time > 1000000)
break;
/*
diff --git a/usr/src/lib/libsecdb/auth_attr.txt b/usr/src/lib/libsecdb/auth_attr.txt
index 12123ba3de..8929f8a694 100644
--- a/usr/src/lib/libsecdb/auth_attr.txt
+++ b/usr/src/lib/libsecdb/auth_attr.txt
@@ -92,6 +92,8 @@ solaris.mms.io.write:::Read and Write Permission for MMS Volumes::help=AuthMMSIO
#
solaris.network.:::Network::help=NetworkHeader.html
solaris.network.autoconf:::Network Auto-Magic Configuration::help=NetworkAutoconf.html
+solaris.network.ilb.config:::Network ILB Configuration::help=NetworkILBconf.html
+solaris.network.ilb.enable:::Network ILB Enable Configuration::help=NetworkILBenable.html
solaris.network.link.security:::Link Security::help=LinkSecurity.html
solaris.network.wifi.config:::Wifi Config::help=WifiConfig.html
solaris.network.wifi.wep:::Wifi Wep::help=WifiWep.html
@@ -134,6 +136,7 @@ solaris.smf.manage.extended-accounting.flow:::Manage Task Extended Accounting Se
solaris.smf.manage.hal:::Manage HAL Service States::help=SmfHALStates.html
solaris.smf.manage.hotplug:::Manage Hotplug Service::help=SmfManageHotplug.html
solaris.smf.manage.idmap:::Manage Identity Mapping Service States::help=SmfIdmapStates.html
+solaris.smf.manage.ilb:::Manage Integrated Load Balancer Service States::help=SmfILBStates.html
solaris.smf.manage.inetd:::Manage inetd and inetd managed services States::help=SmfIntedStates.html
solaris.smf.manage.ipsec:::Manage IPsec Service States::help=SmfIPsecStates.html
solaris.smf.manage.labels:::Manage label server::help=LabelServer.html
diff --git a/usr/src/lib/libsecdb/help/auths/Makefile b/usr/src/lib/libsecdb/help/auths/Makefile
index 1d1f131d1d..4c30de414d 100644
--- a/usr/src/lib/libsecdb/help/auths/Makefile
+++ b/usr/src/lib/libsecdb/help/auths/Makefile
@@ -74,6 +74,7 @@ HTMLENTS = \
SmfExAcctTaskStates.html \
SmfExAcctNetStates.html \
SmfHeader.html \
+ SmfILBStates.html \
SmfInetdStates.html \
SmfIPsecStates.html \
SmfManageHeader.html \
@@ -120,6 +121,8 @@ HTMLENTS = \
SmfValueVt.html \
SmfWpaStates.html \
NetworkAutoconf.html \
+ NetworkILBconf.html \
+ NetworkILBenable.html \
NetworkHeader.html \
WifiConfig.html \
WifiWep.html \
diff --git a/usr/src/lib/libsecdb/help/auths/NetworkILBconf.html b/usr/src/lib/libsecdb/help/auths/NetworkILBconf.html
new file mode 100644
index 0000000000..b0a175258d
--- /dev/null
+++ b/usr/src/lib/libsecdb/help/auths/NetworkILBconf.html
@@ -0,0 +1,41 @@
+<html>
+
+<!--
+ Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ Use is subject to license terms.
+
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+-->
+
+<head>
+<!--
+meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"
+-->
+</head>
+<body>
+When Network ILB Configuration is in the Authorizations
+Included column, it grants permission to administrate the add,
+create and destroy subcommands of ilbadm(1M)
+<p>
+Note, to retrieve configuration and state information about ILB by
+using ilbadm(1M) doesn't require Network ILB Configuration
+authorization.
+</body>
+</html>
diff --git a/usr/src/lib/libsecdb/help/auths/NetworkILBenable.html b/usr/src/lib/libsecdb/help/auths/NetworkILBenable.html
new file mode 100644
index 0000000000..3cb93b237e
--- /dev/null
+++ b/usr/src/lib/libsecdb/help/auths/NetworkILBenable.html
@@ -0,0 +1,41 @@
+<html>
+
+<!--
+ Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ Use is subject to license terms.
+
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+-->
+
+<head>
+<!--
+meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"
+-->
+</head>
+<body>
+When Network ILB Enable Configuration is in the Authorizations
+Included column, it grants permission to administrate the enable
+and disable subcommands of ilbadm(1M).
+<p>
+Note, to retrieve configuration and state information about ILB by
+using ilbadm(1M) doesn't require Network ILB Configuration
+authorization.
+</body>
+</html>
diff --git a/usr/src/lib/libsecdb/help/auths/SmfILBStates.html b/usr/src/lib/libsecdb/help/auths/SmfILBStates.html
new file mode 100644
index 0000000000..5e4bc62bf3
--- /dev/null
+++ b/usr/src/lib/libsecdb/help/auths/SmfILBStates.html
@@ -0,0 +1,36 @@
+<HTML>
+<!--
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+
+Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+Use is subject to license terms.
+-->
+ <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
+-->
+<BODY>
+When Integrated Load Balancer Service States is in the Authorizations Include
+column, it grants the authorization to enable, disable, or restart
+Integrated Load Balancer service.
+<p>
+If Integrated Load Balancer Service States is grayed, then you are not entitled
+to Add or Remove this authorization.
+<BR>&nbsp;
+</BODY>
+</HTML>
diff --git a/usr/src/lib/libsecdb/help/profiles/Makefile b/usr/src/lib/libsecdb/help/profiles/Makefile
index fab13a3051..654138efd6 100644
--- a/usr/src/lib/libsecdb/help/profiles/Makefile
+++ b/usr/src/lib/libsecdb/help/profiles/Makefile
@@ -60,6 +60,7 @@ HTMLENTS = \
RtNameServiceAdmin.html \
RtNameServiceSecure.html \
RtNetAutoconf.html \
+ RtNetILB.html \
RtNetIPsec.html \
RtNetMngmnt.html \
RtNetObservability.html \
diff --git a/usr/src/lib/libsecdb/help/profiles/RtNetILB.html b/usr/src/lib/libsecdb/help/profiles/RtNetILB.html
new file mode 100644
index 0000000000..95a7f99bef
--- /dev/null
+++ b/usr/src/lib/libsecdb/help/profiles/RtNetILB.html
@@ -0,0 +1,37 @@
+<HTML>
+<!--
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+
+-- Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+-- Use is subject to license terms.
+-->
+<head>
+<title></title>
+</head>
+<body>
+When Network ILB is in the Rights Included column, it grants the
+right to manage the configuration of ILB feature by using ilbadm(1M)
+command.
+<p>
+If Network ILB is grayed, then you are not entitled to Add or
+Remove this right.
+<p>
+</body>
+</html>
diff --git a/usr/src/lib/libsecdb/prof_attr.txt b/usr/src/lib/libsecdb/prof_attr.txt
index ad497ee369..c20338c570 100644
--- a/usr/src/lib/libsecdb/prof_attr.txt
+++ b/usr/src/lib/libsecdb/prof_attr.txt
@@ -62,7 +62,8 @@ MMS Operator:::MMS Media Manager Operator:auths=solaris.smf.manage.mms,solaris.m
MMS User:::MMS Tape User:auths=solaris.mms.io.*;help=RtMMSUser.html
NDMP Management:::Manage the NDMP service:auths=solaris.smf.manage.ndmp,solaris.smf.value.ndmp,solaris.smf.read.ndmp;help=RtNdmpMngmnt.html
Network Autoconf:::Manage network auto-magic configuration via nwamd:auths=solaris.network.autoconf;help=RtNetAutoconf.html
-Network Management:::Manage the host and network configuration:auths=solaris.smf.manage.name-service-cache,solaris.smf.manage.bind,solaris.smf.value.routing,solaris.smf.manage.routing,solaris.smf.value.nwam,solaris.smf.manage.nwam,solaris.smf.manage.tnd,solaris.smf.manage.tnctl,solaris.smf.manage.wpa,solaris.smf.value.mdns,solaris.smf.manage.mdns;profiles=Network Wifi Management,Inetd Management,Network Autoconf,Network Observability;help=RtNetMngmnt.html
+Network ILB:::Manage ILB configuration via ilbadm:auths=solaris.network.ilb.config,solaris.network.ilb.enable;help=RtNetILB.html
+Network Management:::Manage the host and network configuration:auths=solaris.smf.manage.name-service-cache,solaris.smf.manage.bind,solaris.smf.value.routing,solaris.smf.manage.routing,solaris.smf.value.nwam,solaris.smf.manage.nwam,solaris.smf.manage.tnd,solaris.smf.manage.tnctl,solaris.smf.manage.wpa,solaris.smf.value.mdns,solaris.smf.manage.mdns,solaris.smf.manage.ilb;profiles=Network Wifi Management,Inetd Management,Network Autoconf,Network Observability;help=RtNetMngmnt.html
Network Observability:::Allow access to observability devices:privs=net_observability;help=RtNetObservability.html
Network Security:::Manage network and host security:auths=solaris.smf.manage.ssh,solaris.smf.value.tnd;profiles=Network Wifi Security,Network Link Security,Network IPsec Management;help=RtNetSecure.html
Network Wifi Management:::Manage wifi network configuration:auths=solaris.network.wifi.config;help=RtNetWifiMngmnt.html
diff --git a/usr/src/lib/libsecdb/user_attr.txt b/usr/src/lib/libsecdb/user_attr.txt
index 244b202415..f48558b72c 100644
--- a/usr/src/lib/libsecdb/user_attr.txt
+++ b/usr/src/lib/libsecdb/user_attr.txt
@@ -1,5 +1,5 @@
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# CDDL HEADER START
@@ -30,4 +30,5 @@ root::::auths=solaris.*,solaris.grant;profiles=All;lock_after_retries=no;min_lab
lp::::profiles=Printer Management
adm::::profiles=Log Management
dladm::::auths=solaris.smf.manage.wpa,solaris.smf.modify
+daemon::::auths=solaris.smf.manage.ilb,solaris.smf.modify.application
zfssnap::::type=role;auths=solaris.smf.manage.zfs-auto-snapshot;profiles=ZFS File System Management
diff --git a/usr/src/pkgdefs/Makefile b/usr/src/pkgdefs/Makefile
index 7421c57412..a61a50bcd0 100644
--- a/usr/src/pkgdefs/Makefile
+++ b/usr/src/pkgdefs/Makefile
@@ -292,6 +292,8 @@ COMMON_SUBDIRS= \
SUNWiir \
SUNWiiu \
SUNWigb \
+ SUNWilb \
+ SUNWilbr \
SUNWima \
SUNWimac \
SUNWimar \
diff --git a/usr/src/pkgdefs/SUNW0on/prototype_com b/usr/src/pkgdefs/SUNW0on/prototype_com
index 6aa1273d9e..b1d1f9e711 100644
--- a/usr/src/pkgdefs/SUNW0on/prototype_com
+++ b/usr/src/pkgdefs/SUNW0on/prototype_com
@@ -295,6 +295,8 @@ f none usr/lib/help/auths/locale/SmfValueVt.html 444 root bin
f none usr/lib/help/auths/locale/SmfWpaStates.html 444 root bin
f none usr/lib/help/auths/locale/NetworkAutoconf.html 444 root bin
f none usr/lib/help/auths/locale/NetworkHeader.html 444 root bin
+f none usr/lib/help/auths/locale/NetworkILBconf.html 444 root bin
+f none usr/lib/help/auths/locale/NetworkILBenable.html 444 root bin
f none usr/lib/help/auths/locale/WifiConfig.html 444 root bin
f none usr/lib/help/auths/locale/WifiWep.html 444 root bin
f none usr/lib/help/auths/locale/LinkSecurity.html 444 root bin
@@ -303,6 +305,7 @@ f none usr/lib/help/auths/locale/MailQueue.html 0444 root bin
f none usr/lib/help/auths/locale/DevCDRW.html 0444 root bin
f none usr/lib/help/auths/locale/IdmapRules.html 0444 root bin
f none usr/lib/help/auths/locale/SmfIdmapStates.html 0444 root bin
+f none usr/lib/help/auths/locale/SmfILBStates.html 0444 root bin
f none usr/lib/help/auths/locale/SmfValueIdmap.html 0444 root bin
f none usr/lib/help/auths/locale/FileChown.html 444 root bin
f none usr/lib/help/auths/locale/FileHeader.html 444 root bin
@@ -376,6 +379,7 @@ f none usr/lib/help/profiles/locale/RtMediaRestore.html 444 root bin
f none usr/lib/help/profiles/locale/RtNameServiceAdmin.html 444 root bin
f none usr/lib/help/profiles/locale/RtNameServiceSecure.html 444 root bin
f none usr/lib/help/profiles/locale/RtNetAutoconf.html 444 root bin
+f none usr/lib/help/profiles/locale/RtNetILB.html 444 root bin
f none usr/lib/help/profiles/locale/RtNetIPsec.html 444 root bin
f none usr/lib/help/profiles/locale/RtNetMngmnt.html 444 root bin
f none usr/lib/help/profiles/locale/RtNetObservability.html 444 root bin
diff --git a/usr/src/pkgdefs/SUNWcsu/prototype_com b/usr/src/pkgdefs/SUNWcsu/prototype_com
index 1e0a68fe8d..03cf229c6c 100644
--- a/usr/src/pkgdefs/SUNWcsu/prototype_com
+++ b/usr/src/pkgdefs/SUNWcsu/prototype_com
@@ -484,6 +484,8 @@ f none usr/lib/help/auths/locale/C/LoginHeader.html 444 root bin
f none usr/lib/help/auths/locale/C/LoginRemote.html 444 root bin
f none usr/lib/help/auths/locale/C/NetworkAutoconf.html 444 root bin
f none usr/lib/help/auths/locale/C/NetworkHeader.html 444 root bin
+f none usr/lib/help/auths/locale/C/NetworkILBconf.html 444 root bin
+f none usr/lib/help/auths/locale/C/NetworkILBenable.html 444 root bin
f none usr/lib/help/auths/locale/C/PriAdmin.html 444 root bin
f none usr/lib/help/auths/locale/C/ProfmgrHeader.html 444 root bin
f none usr/lib/help/auths/locale/C/RoleHeader.html 444 root bin
@@ -503,6 +505,7 @@ f none usr/lib/help/auths/locale/C/SmfModifyDepend.html 444 root bin
f none usr/lib/help/auths/locale/C/SmfModifyFramework.html 444 root bin
f none usr/lib/help/auths/locale/C/SmfModifyHeader.html 444 root bin
f none usr/lib/help/auths/locale/C/SmfModifyMethod.html 444 root bin
+f none usr/lib/help/auths/locale/C/SmfILBStates.html 444 root bin
f none usr/lib/help/auths/locale/C/SmfInetdStates.html 444 root bin
f none usr/lib/help/auths/locale/C/SmfIPsecStates.html 444 root bin
f none usr/lib/help/auths/locale/C/SmfNscdStates.html 444 root bin
@@ -604,6 +607,7 @@ f none usr/lib/help/profiles/locale/C/RtMediaRestore.html 444 root bin
f none usr/lib/help/profiles/locale/C/RtNameServiceAdmin.html 444 root bin
f none usr/lib/help/profiles/locale/C/RtNameServiceSecure.html 444 root bin
f none usr/lib/help/profiles/locale/C/RtNetAutoconf.html 444 root bin
+f none usr/lib/help/profiles/locale/C/RtNetILB.html 444 root bin
f none usr/lib/help/profiles/locale/C/RtNetIPsec.html 444 root bin
f none usr/lib/help/profiles/locale/C/RtNetMngmnt.html 444 root bin
f none usr/lib/help/profiles/locale/C/RtNetObservability.html 444 root bin
diff --git a/usr/src/pkgdefs/SUNWilb/Makefile b/usr/src/pkgdefs/SUNWilb/Makefile
new file mode 100644
index 0000000000..1182b25256
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWilb/Makefile
@@ -0,0 +1,35 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#
+
+include ../Makefile.com
+
+.KEEP_STATE:
+
+all: $(FILES)
+
+install: all pkg
+
+include ../Makefile.targ
diff --git a/usr/src/pkgdefs/SUNWilb/depend b/usr/src/pkgdefs/SUNWilb/depend
new file mode 100644
index 0000000000..0934573a20
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWilb/depend
@@ -0,0 +1,50 @@
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# This package information file defines software dependencies associated
+# with the pkg. You can define three types of pkg dependencies with this file:
+# P indicates a prerequisite for installation
+# I indicates an incompatible package
+# R indicates a reverse dependency
+# <pkg.abbr> see pkginfo(4), PKG parameter
+# <name> see pkginfo(4), NAME parameter
+# <version> see pkginfo(4), VERSION parameter
+# <arch> see pkginfo(4), ARCH parameter
+# <type> <pkg.abbr> <name>
+# (<arch>)<version>
+# (<arch>)<version>
+# ...
+# <type> <pkg.abbr> <name>
+# ...
+#
+
+P SUNWcar Core Architecture, (Root)
+P SUNWcakr Core Solaris Kernel Architecture (Root)
+P SUNWkvm Core Architecture, (Kvm)
+P SUNWcsr Core Solaris, (Root)
+P SUNWckr Core Solaris Kernel (Root)
+P SUNWcnetr Core Solaris Network Infrastructure (Root)
+P SUNWcsu Core Solaris, (Usr)
+P SUNWcsd Core Solaris Devices
+P SUNWcsl Core Solaris Libraries
+P SUNWilbr ILB IP L3/L4 load balancer(Root)
diff --git a/usr/src/pkgdefs/SUNWilb/pkginfo.tmpl b/usr/src/pkgdefs/SUNWilb/pkginfo.tmpl
new file mode 100644
index 0000000000..b696544117
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWilb/pkginfo.tmpl
@@ -0,0 +1,56 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#
+#
+# This required package information file describes characteristics of the
+# package, such as package abbreviation, full package name, package version,
+# and package architecture.
+#
+PKG="SUNWilb"
+NAME="Integrated IP layer 3/4 load balancer for Solaris (usr)"
+ARCH="ISA"
+VERSION="ONVERS,REV=0.0.0"
+SUNW_PRODNAME="SunOS"
+SUNW_PRODVERS="RELEASE/VERSION"
+SUNW_PKGTYPE="usr"
+MAXINST="1000"
+CATEGORY="system"
+DESC="Integrated IP layer 3/4 load balancer for Solaris (usr)"
+VENDOR="Sun Microsystems, Inc."
+HOTLINE="Please contact your local service provider"
+EMAIL=""
+CLASSES="none"
+BASEDIR=/
+SUNW_PKGVERS="1.0"
+SUNW_PKG_ALLZONES="true"
+SUNW_PKG_HOLLOW="false"
+SUNW_PKG_THISZONE="false"
+#VSTOCK="<reserved by Release Engineering for package part #>"
+#ISTATES="<developer defined>"
+#RSTATES='<developer defined>'
+#ULIMIT="<developer defined>"
+#ORDER="<developer defined>"
+#PSTAMP="<developer defined>"
+#INTONLY="<developer defined>"
diff --git a/usr/src/pkgdefs/SUNWilb/prototype_com b/usr/src/pkgdefs/SUNWilb/prototype_com
new file mode 100644
index 0000000000..8282e1f77c
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWilb/prototype_com
@@ -0,0 +1,59 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+
+#!search <pathname pathname ...> # where to find pkg objects
+#!include <filename> # include another 'prototype' file
+#!default <mode> <owner> <group> # default used if not specified on entry
+#!<param>=<value> # puts parameter in pkg environment
+
+# packaging files
+i pkginfo
+i copyright
+i depend
+
+#
+# source locations relative to the prototype file
+#
+# SUNWilb
+#
+d none usr 0755 root sys
+d none usr/lib 0755 root bin
+d none usr/lib/inet 0755 root bin
+d none usr/lib/inet/ilb 0755 root bin
+f none usr/lib/inet/ilbd 555 root bin
+f none usr/lib/inet/ilb/ilb_probe 555 root bin
+f none usr/lib/libilb.so.1 755 root bin
+s none usr/lib/libilb.so=./libilb.so.1
+f none usr/lib/llib-lilb 644 root bin
+f none usr/lib/llib-lilb.ln 644 root bin
+d none usr/include 0755 root bin
+f none usr/include/libilb.h 0644 root bin
+d none usr/sbin 0755 root bin
+f none usr/sbin/ilbadm 555 root bin
diff --git a/usr/src/pkgdefs/SUNWilb/prototype_i386 b/usr/src/pkgdefs/SUNWilb/prototype_i386
new file mode 100644
index 0000000000..8edad55543
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWilb/prototype_i386
@@ -0,0 +1,53 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+
+#!search <pathname pathname ...> # where to find pkg objects
+#!include <filename> # include another 'prototype' file
+#!default <mode> <owner> <group> # default used if not specified on entry
+#!<param>=<value> # puts parameter in pkg environment
+
+#
+# Include ISA independent files (prototype_com)
+#
+!include prototype_com
+#
+#
+#
+# List files which are I386 specific here
+#
+# source locations relative to the prototype file
+#
+#
+# SUNWilb
+#
+d none usr/lib/amd64 755 root bin
+f none usr/lib/amd64/libilb.so.1 755 root bin
+s none usr/lib/amd64/libilb.so=./libilb.so.1
+f none usr/lib/amd64/llib-lilb.ln 644 root bin
diff --git a/usr/src/pkgdefs/SUNWilb/prototype_sparc b/usr/src/pkgdefs/SUNWilb/prototype_sparc
new file mode 100644
index 0000000000..ccd5491878
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWilb/prototype_sparc
@@ -0,0 +1,53 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+
+#!search <pathname pathname ...> # where to find pkg objects
+#!include <filename> # include another 'prototype' file
+#!default <mode> <owner> <group> # default used if not specified on entry
+#!<param>=<value> # puts parameter in pkg environment
+
+#
+# Include ISA independent files (prototype_com)
+#
+!include prototype_com
+#
+#
+#
+# List files which are SPARC specific here
+#
+# source locations relative to the prototype file
+#
+#
+# SUNWilb
+#
+d none usr/lib/sparcv9 755 root bin
+f none usr/lib/sparcv9/libilb.so.1 755 root bin
+s none usr/lib/sparcv9/libilb.so=./libilb.so.1
+f none usr/lib/sparcv9/llib-lilb.ln 644 root bin
diff --git a/usr/src/pkgdefs/SUNWilbr/Makefile b/usr/src/pkgdefs/SUNWilbr/Makefile
new file mode 100644
index 0000000000..377cf88897
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWilbr/Makefile
@@ -0,0 +1,37 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#
+
+include ../Makefile.com
+
+DATAFILES += i.manifest r.manifest
+
+.KEEP_STATE:
+
+all: $(FILES)
+install: all pkg
+
+include ../Makefile.targ
+include ../Makefile.prtarg
diff --git a/usr/src/pkgdefs/SUNWilbr/depend b/usr/src/pkgdefs/SUNWilbr/depend
new file mode 100644
index 0000000000..051afae5f5
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWilbr/depend
@@ -0,0 +1,49 @@
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# This package information file defines software dependencies associated
+# with the pkg. You can define three types of pkg dependencies with this file:
+# P indicates a prerequisite for installation
+# I indicates an incompatible package
+# R indicates a reverse dependency
+# <pkg.abbr> see pkginfo(4), PKG parameter
+# <name> see pkginfo(4), NAME parameter
+# <version> see pkginfo(4), VERSION parameter
+# <arch> see pkginfo(4), ARCH parameter
+# <type> <pkg.abbr> <name>
+# (<arch>)<version>
+# (<arch>)<version>
+# ...
+# <type> <pkg.abbr> <name>
+# ...
+#
+
+P SUNWcar Core Architecture, (Root)
+P SUNWcakr Core Solaris Kernel Architecture (Root)
+P SUNWkvm Core Architecture, (Kvm)
+P SUNWcsr Core Solaris, (Root)
+P SUNWckr Core Solaris Kernel (Root)
+P SUNWcnetr Core Solaris Network Infrastructure (Root)
+P SUNWcsu Core Solaris, (Usr)
+P SUNWcsd Core Solaris Devices
+P SUNWcsl Core Solaris Libraries
diff --git a/usr/src/pkgdefs/SUNWilbr/pkginfo.tmpl b/usr/src/pkgdefs/SUNWilbr/pkginfo.tmpl
new file mode 100644
index 0000000000..0975fee69f
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWilbr/pkginfo.tmpl
@@ -0,0 +1,58 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#
+
+#
+# This required package information file describes characteristics of the
+# package, such as package abbreviation, full package name, package version,
+# and package architecture.
+#
+PKG="SUNWilbr"
+NAME="Integrated IP layer 3/4 load balancer for Solaris(root)"
+ARCH="ISA"
+VERSION="ONVERS,REV=0.0.0"
+SUNW_PRODNAME="SunOS"
+SUNW_PRODVERS="RELEASE/VERSION"
+SUNW_PKGTYPE="root"
+SUNW_PKGVERS="1.0"
+MAXINST="1000"
+CATEGORY="system"
+DESC="Integrated IP layer 3/4 load balancer for Solaris(root)"
+VENDOR="Sun Microsystems, Inc."
+HOTLINE="Please contact your local service provider"
+EMAIL=""
+CLASSES="none manifest"
+BASEDIR=/
+SUNW_PKG_ALLZONES="true"
+SUNW_PKG_HOLLOW="false"
+SUNW_PKG_THISZONE="false"
+#VSTOCK="<reserved by Release Engineering for package part #>"
+#ISTATES="<developer defined>"
+#RSTATES='<developer defined>'
+#ULIMIT="<developer defined>"
+#ORDER="<developer defined>"
+#PSTAMP="<developer defined>"
+#INTONLY="<developer defined>"
diff --git a/usr/src/pkgdefs/SUNWilbr/prototype_com b/usr/src/pkgdefs/SUNWilbr/prototype_com
new file mode 100644
index 0000000000..d70a8ca5ca
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWilbr/prototype_com
@@ -0,0 +1,54 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+#
+
+#!search <pathname pathname ...> # where to find pkg objects
+#!include <filename> # include another 'prototype' file
+#!default <mode> <owner> <group> # default used if not specified on entry
+#!<param>=<value> # puts parameter in pkg environment
+
+# packaging files
+i pkginfo
+i copyright
+i depend
+i i.manifest
+i r.manifest
+#
+#
+# source locations relative to the prototype file
+#
+# SUNWilbr
+#
+d none var 755 root sys
+d none var/svc 755 root sys
+d none var/svc/manifest 755 root sys
+d none var/svc/manifest/network 755 root sys
+d none var/svc/manifest/network/loadbalancer 755 root sys
+f manifest var/svc/manifest/network/loadbalancer/ilbd.xml 0444 root sys
diff --git a/usr/src/pkgdefs/SUNWilbr/prototype_i386 b/usr/src/pkgdefs/SUNWilbr/prototype_i386
new file mode 100644
index 0000000000..9213d36b1e
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWilbr/prototype_i386
@@ -0,0 +1,49 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+
+#!search <pathname pathname ...> # where to find pkg objects
+#!include <filename> # include another 'prototype' file
+#!default <mode> <owner> <group> # default used if not specified on entry
+#!<param>=<value> # puts parameter in pkg environment
+
+#
+# Include ISA independent files (prototype_com)
+#
+!include prototype_com
+#
+#
+#
+# List files which are I386 specific here
+#
+# source locations relative to the prototype file
+#
+#
+# SUNWilbr
+#
diff --git a/usr/src/pkgdefs/SUNWilbr/prototype_sparc b/usr/src/pkgdefs/SUNWilbr/prototype_sparc
new file mode 100644
index 0000000000..52e38b20aa
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWilbr/prototype_sparc
@@ -0,0 +1,48 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+
+#!search <pathname pathname ...> # where to find pkg objects
+#!include <filename> # include another 'prototype' file
+#!default <mode> <owner> <group> # default used if not specified on entry
+#!<param>=<value> # puts parameter in pkg environment
+
+#
+# Include ISA independent files (prototype_com)
+#
+!include prototype_com
+#
+#
+#
+# List files which are SPARC specific here
+#
+# source locations relative to the prototype file
+#
+#
+# SUNWilbr
+#
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index aa1b921b69..ded59fafd2 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -531,6 +531,7 @@ IP_SCTP_OBJS = sctp.o sctp_opt_data.o sctp_output.o \
sctp_timer.o sctp_heartbeat.o sctp_hash.o \
sctp_ioc.o sctp_bind.o sctp_notify.o sctp_asconf.o \
sctp_addr.o tn_ipopt.o tnet.o ip_netinfo.o
+IP_ILB_OBJS = ilb.o ilb_nat.o ilb_conn.o ilb_alg_hash.o ilb_alg_rr.o
IP_OBJS += igmp.o ipmp.o ip.o ip6.o ip6_asp.o ip6_if.o ip6_ire.o ip6_rts.o \
ip_if.o ip_ire.o ip_listutils.o ip_mroute.o \
@@ -543,7 +544,8 @@ IP_OBJS += igmp.o ipmp.o ip.o ip6.o ip6_asp.o ip6_if.o ip6_ire.o ip6_rts.o \
$(IP_RTS_OBJS) \
$(IP_TCP_OBJS) \
$(IP_UDP_OBJS) \
- $(IP_SCTP_OBJS)
+ $(IP_SCTP_OBJS) \
+ $(IP_ILB_OBJS)
IP6_OBJS += ip6ddi.o
diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules
index ebb286f385..277ac46685 100644
--- a/usr/src/uts/common/Makefile.rules
+++ b/usr/src/uts/common/Makefile.rules
@@ -491,6 +491,9 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/inet/tcp/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/inet/ilb/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
$(OBJS_DIR)/%.o: $(UTSBASE)/common/inet/ipf/%.c
$(COMPILE.c) -o $@ $<
@@ -1833,6 +1836,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/sctp/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/tcp/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/ilb/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/nca/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/common/inet/ilb.h b/usr/src/uts/common/inet/ilb.h
new file mode 100644
index 0000000000..81213bfd4c
--- /dev/null
+++ b/usr/src/uts/common/inet/ilb.h
@@ -0,0 +1,239 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+#ifndef _INET_ILB_H
+#define _INET_ILB_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * This file contains the private interface to IP to configure ILB in
+ * the system. Note that this is not a supported interface, and is
+ * subject to be changed without notice. User level apps should instead
+ * use the libilb library to interface with ILB.
+ */
+
+/* ioctl cmds to IP to configure ILB */
+typedef enum {
+ ILB_CREATE_RULE,
+ ILB_DESTROY_RULE,
+ ILB_ENABLE_RULE,
+ ILB_DISABLE_RULE,
+ ILB_NUM_RULES,
+ ILB_NUM_SERVERS,
+ ILB_RULE_NAMES,
+ ILB_LIST_RULE,
+ ILB_LIST_SERVERS,
+ ILB_ADD_SERVERS,
+ ILB_DEL_SERVERS,
+ ILB_ENABLE_SERVERS,
+ ILB_DISABLE_SERVERS,
+ ILB_LIST_NAT_TABLE,
+ ILB_LIST_STICKY_TABLE
+} ilb_cmd_t;
+
+/* Supported load balancing algorithm type */
+typedef enum {
+ ILB_ALG_IMPL_ROUNDROBIN = 1,
+ ILB_ALG_IMPL_HASH_IP,
+ ILB_ALG_IMPL_HASH_IP_SPORT,
+ ILB_ALG_IMPL_HASH_IP_VIP
+} ilb_algo_impl_t;
+
+/* Supported load balancing method */
+typedef enum {
+ ILB_TOPO_IMPL_DSR = 1,
+ ILB_TOPO_IMPL_NAT,
+ ILB_TOPO_IMPL_HALF_NAT
+} ilb_topo_impl_t;
+
+/* Max ILB rule name length */
+#define ILB_RULE_NAMESZ 20
+
+/* Max kstat server name length */
+#define ILB_SERVER_NAMESZ 20
+
+/* Rule destroy/enable/disable command struct */
+typedef struct {
+ ilb_cmd_t cmd;
+ char name[ILB_RULE_NAMESZ];
+ uint32_t flags;
+} ilb_name_cmd_t;
+
+/* Flags for rule creation command */
+/* these are echoed in lib/libilb/common/libilb.h - please keep in sync */
+#define ILB_RULE_ENABLED 0x1
+#define ILB_RULE_STICKY 0x2
+#define ILB_RULE_ALLRULES 0x4
+#define ILB_RULE_BUSY 0x8
+
+/* Rule creation/retrieval command struct */
+typedef struct {
+ ilb_cmd_t cmd;
+ char name[ILB_RULE_NAMESZ];
+ uint32_t ip_ver;
+ in6_addr_t vip;
+ char vip_itf[LIFNAMSIZ];
+ uint32_t proto;
+ in_port_t min_port; /* In network byte order */
+ in_port_t max_port;
+ ilb_algo_impl_t algo;
+ ilb_topo_impl_t topo;
+ char servers_itf[LIFNAMSIZ];
+ in6_addr_t nat_src_start;
+ in6_addr_t nat_src_end;
+ uint32_t flags;
+ in6_addr_t sticky_mask;
+ uint32_t conn_drain_timeout; /* Time value is in seconds */
+ uint32_t nat_expiry;
+ uint32_t sticky_expiry;
+} ilb_rule_cmd_t;
+
+/* Get number of servers command struct */
+typedef struct {
+ ilb_cmd_t cmd;
+ char name[ILB_RULE_NAMESZ];
+ uint32_t num;
+} ilb_num_servers_cmd_t;
+
+/* Get number of rules command struct */
+typedef struct {
+ ilb_cmd_t cmd;
+ uint32_t num;
+} ilb_num_rules_cmd_t;
+
+/* Get all rule names command struct */
+typedef struct {
+ ilb_cmd_t cmd;
+ uint32_t num_names;
+ /* buf size is (num_names * ILB_RULE_NAMESZ) */
+ char buf[ILB_RULE_NAMESZ];
+} ilb_rule_names_cmd_t;
+
+/* Flags for ilb_server_info_t */
+#define ILB_SERVER_ENABLED 0x1
+
+/* Struct to represent a backend server for add/list command */
+typedef struct {
+ char name[ILB_SERVER_NAMESZ];
+ in6_addr_t addr;
+ in_port_t min_port; /* In network byte order */
+ in_port_t max_port;
+ uint32_t flags;
+ int err; /* In return, non zero value indicates error */
+} ilb_server_info_t;
+
+/* Add/list servers command struct */
+typedef struct {
+ ilb_cmd_t cmd;
+ char name[ILB_RULE_NAMESZ];
+ uint32_t num_servers;
+ ilb_server_info_t servers[1];
+} ilb_servers_info_cmd_t;
+
+/*
+ * Struct to represent a backend server for delete/enable/disable
+ * command
+ */
+typedef struct {
+ in6_addr_t addr;
+ int err; /* In return, non zero value indicates error */
+} ilb_server_arg_t;
+
+/* Delete/enable/disable a server command struct */
+typedef struct {
+ ilb_cmd_t cmd;
+ char name[ILB_RULE_NAMESZ];
+ uint32_t num_servers;
+ ilb_server_arg_t servers[1];
+} ilb_servers_cmd_t;
+
+/*
+ * Flags for listing NAT/persistence table entries
+ *
+ * ILB_LIST_BEGIN: start from the beginning of the table
+ * ILB_LIST_CONT: start from the last reply
+ * ILB_LIST_END: on return, this flag indicates the end of the table
+ */
+#define ILB_LIST_BEGIN 0x1
+#define ILB_LIST_CONT 0x2
+#define ILB_LIST_END 0x4
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack(4)
+#endif
+
+typedef struct {
+ uint32_t proto;
+
+ in6_addr_t in_local;
+ in6_addr_t in_global;
+ in6_addr_t out_local;
+ in6_addr_t out_global;
+
+ in_port_t in_local_port;
+ in_port_t in_global_port;
+ in_port_t out_local_port;
+ in_port_t out_global_port;
+
+ int64_t create_time;
+ int64_t last_access_time;
+ uint64_t pkt_cnt;
+} ilb_nat_entry_t;
+
+/* List NAT table entries command struct */
+typedef struct {
+ ilb_cmd_t cmd;
+ uint32_t flags;
+ uint32_t num_nat;
+ ilb_nat_entry_t entries[1];
+} ilb_list_nat_cmd_t;
+
+typedef struct {
+ char rule_name[ILB_RULE_NAMESZ];
+ in6_addr_t req_addr;
+ in6_addr_t srv_addr;
+ int64_t expiry_time;
+} ilb_sticky_entry_t;
+
+/* List sticky table entries command struct */
+typedef struct {
+ ilb_cmd_t cmd;
+ uint32_t flags;
+ uint32_t num_sticky;
+ ilb_sticky_entry_t entries[1];
+} ilb_list_sticky_cmd_t;
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack()
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _INET_ILB_H */
diff --git a/usr/src/uts/common/inet/ilb/ilb.c b/usr/src/uts/common/inet/ilb/ilb.c
new file mode 100644
index 0000000000..9bfe3fb0e0
--- /dev/null
+++ b/usr/src/uts/common/inet/ilb/ilb.c
@@ -0,0 +1,2153 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/kmem.h>
+#include <sys/ksynch.h>
+#include <sys/systm.h>
+#include <sys/socket.h>
+#include <sys/disp.h>
+#include <sys/taskq.h>
+#include <sys/cmn_err.h>
+#include <sys/strsun.h>
+#include <sys/sdt.h>
+#include <sys/atomic.h>
+#include <netinet/in.h>
+#include <inet/ip.h>
+#include <inet/ip6.h>
+#include <inet/tcp.h>
+#include <inet/udp_impl.h>
+#include <inet/kstatcom.h>
+
+#include <inet/ilb_ip.h>
+#include "ilb_alg.h"
+#include "ilb_nat.h"
+#include "ilb_conn.h"
+
+/* ILB kmem cache flag */
+int ilb_kmem_flags = 0;
+
+/*
+ * The default size for the different hash tables. Global for all stacks.
+ * But each stack has its own table, just that their sizes are the same.
+ */
+static size_t ilb_rule_hash_size = 2048;
+
+static size_t ilb_conn_hash_size = 262144;
+
+static size_t ilb_sticky_hash_size = 262144;
+
+/* This should be a prime number. */
+static size_t ilb_nat_src_hash_size = 97;
+
+/* Default NAT cache entry expiry time. */
+static uint32_t ilb_conn_tcp_expiry = 120;
+static uint32_t ilb_conn_udp_expiry = 60;
+
+/* Default sticky entry expiry time. */
+static uint32_t ilb_sticky_expiry = 60;
+
+/* addr is assumed to be a uint8_t * to an ipaddr_t. */
+#define ILB_RULE_HASH(addr, hash_size) \
+ ((*((addr) + 3) * 29791 + *((addr) + 2) * 961 + *((addr) + 1) * 31 + \
+ *(addr)) & ((hash_size) - 1))
+
+/*
+ * Note on ILB delayed processing
+ *
+ * To avoid in line removal on some of the data structures, such as rules,
+ * servers and ilb_conn_hash entries, ILB delays such processing to a taskq.
+ * There are three types of ILB taskq:
+ *
+ * 1. rule handling: created at stack initialialization time, ilb_stack_init()
+ * 2. conn hash handling: created at conn hash initialization time,
+ * ilb_conn_hash_init()
+ * 3. sticky hash handling: created at sticky hash initialization time,
+ * ilb_sticky_hash_init()
+ *
+ * The rule taskq is for processing rule and server removal. When a user
+ * land rule/server removal request comes in, a taskq is dispatched after
+ * removing the rule/server from all related hashes. This taskq will wait
+ * until all references to the rule/server are gone before removing it.
+ * So the user land thread requesting the removal does not need to wait
+ * for the removal completion.
+ *
+ * The conn hash/sticky hash taskq is for processing ilb_conn_hash and
+ * ilb_sticky_hash table entry removal. There are ilb_conn_timer_size timers
+ * and ilb_sticky_timer_size timers running for ilb_conn_hash and
+ * ilb_sticky_hash cleanup respectively. Each timer is responsible for one
+ * portion (same size) of the hash table. When a timer fires, it dispatches
+ * a conn hash taskq to clean up its portion of the table. This avoids in
+ * line processing of the removal.
+ *
+ * There is another delayed processing, the clean up of NAT source address
+ * table. We just use the timer to directly handle it instead of using
+ * a taskq. The reason is that the table is small so it is OK to use the
+ * timer.
+ */
+
+/* ILB rule taskq constants. */
+#define ILB_RULE_TASKQ_NUM_THR 20
+
+/* Argument passed to ILB rule taskq routines. */
+typedef struct {
+ ilb_stack_t *ilbs;
+ ilb_rule_t *rule;
+} ilb_rule_tq_t;
+
+/* kstat handling routines. */
+static kstat_t *ilb_kstat_g_init(netstackid_t, ilb_stack_t *);
+static void ilb_kstat_g_fini(netstackid_t, ilb_stack_t *);
+static kstat_t *ilb_rule_kstat_init(netstackid_t, ilb_rule_t *);
+static kstat_t *ilb_server_kstat_init(netstackid_t, ilb_rule_t *,
+ ilb_server_t *);
+
+/* Rule hash handling routines. */
+static void ilb_rule_hash_init(ilb_stack_t *);
+static void ilb_rule_hash_fini(ilb_stack_t *);
+static void ilb_rule_hash_add(ilb_stack_t *, ilb_rule_t *, const in6_addr_t *);
+static void ilb_rule_hash_del(ilb_rule_t *);
+static ilb_rule_t *ilb_rule_hash(ilb_stack_t *, int, int, in6_addr_t *,
+ in_port_t, zoneid_t, uint32_t, boolean_t *);
+
+static void ilb_rule_g_add(ilb_stack_t *, ilb_rule_t *);
+static void ilb_rule_g_del(ilb_stack_t *, ilb_rule_t *);
+static void ilb_del_rule_common(ilb_stack_t *, ilb_rule_t *);
+static ilb_rule_t *ilb_find_rule_locked(ilb_stack_t *, zoneid_t, const char *,
+ int *);
+static boolean_t ilb_match_rule(ilb_stack_t *, zoneid_t, const char *, int,
+ int, in_port_t, in_port_t, const in6_addr_t *);
+
+/* Back end server handling routines. */
+static void ilb_server_free(ilb_server_t *);
+
+/* Network stack handling routines. */
+static void *ilb_stack_init(netstackid_t, netstack_t *);
+static void ilb_stack_shutdown(netstackid_t, void *);
+static void ilb_stack_fini(netstackid_t, void *);
+
+/* Sticky connection handling routines. */
+static void ilb_rule_sticky_init(ilb_rule_t *);
+static void ilb_rule_sticky_fini(ilb_rule_t *);
+
+/* Handy macro to check for unspecified address. */
+#define IS_ADDR_UNSPEC(addr) \
+ (IN6_IS_ADDR_V4MAPPED(addr) ? IN6_IS_ADDR_V4MAPPED_ANY(addr) : \
+ IN6_IS_ADDR_UNSPECIFIED(addr))
+
+/*
+ * Global kstat instance counter. When a rule is created, its kstat instance
+ * number is assigned by ilb_kstat_instance and ilb_kstat_instance is
+ * incremented.
+ */
+static uint_t ilb_kstat_instance = 0;
+
+/*
+ * The ILB global kstat has name ILB_G_KS_NAME and class name ILB_G_KS_CNAME.
+ * A rule's kstat has ILB_RULE_KS_CNAME class name.
+ */
+#define ILB_G_KS_NAME "global"
+#define ILB_G_KS_CNAME "kstat"
+#define ILB_RULE_KS_CNAME "rulestat"
+
+static kstat_t *
+ilb_kstat_g_init(netstackid_t stackid, ilb_stack_t *ilbs)
+{
+ kstat_t *ksp;
+ ilb_g_kstat_t template = {
+ { "num_rules", KSTAT_DATA_UINT64, 0 },
+ { "ip_frag_in", KSTAT_DATA_UINT64, 0 },
+ { "ip_frag_dropped", KSTAT_DATA_UINT64, 0 }
+ };
+
+ ksp = kstat_create_netstack(ILB_KSTAT_MOD_NAME, 0, ILB_G_KS_NAME,
+ ILB_G_KS_CNAME, KSTAT_TYPE_NAMED, NUM_OF_FIELDS(ilb_g_kstat_t),
+ KSTAT_FLAG_VIRTUAL, stackid);
+ if (ksp == NULL)
+ return (NULL);
+ bcopy(&template, ilbs->ilbs_kstat, sizeof (template));
+ ksp->ks_data = ilbs->ilbs_kstat;
+ ksp->ks_private = (void *)(uintptr_t)stackid;
+
+ kstat_install(ksp);
+ return (ksp);
+}
+
+static void
+ilb_kstat_g_fini(netstackid_t stackid, ilb_stack_t *ilbs)
+{
+ if (ilbs->ilbs_ksp != NULL) {
+ ASSERT(stackid == (netstackid_t)(uintptr_t)
+ ilbs->ilbs_ksp->ks_private);
+ kstat_delete_netstack(ilbs->ilbs_ksp, stackid);
+ ilbs->ilbs_ksp = NULL;
+ }
+}
+
+static kstat_t *
+ilb_rule_kstat_init(netstackid_t stackid, ilb_rule_t *rule)
+{
+ kstat_t *ksp;
+ ilb_rule_kstat_t template = {
+ { "num_servers", KSTAT_DATA_UINT64, 0 },
+ { "bytes_not_processed", KSTAT_DATA_UINT64, 0 },
+ { "pkt_not_processed", KSTAT_DATA_UINT64, 0 },
+ { "bytes_dropped", KSTAT_DATA_UINT64, 0 },
+ { "pkt_dropped", KSTAT_DATA_UINT64, 0 },
+ { "nomem_bytes_dropped", KSTAT_DATA_UINT64, 0 },
+ { "nomem_pkt_dropped", KSTAT_DATA_UINT64, 0 },
+ { "noport_bytes_dropped", KSTAT_DATA_UINT64, 0 },
+ { "noport_pkt_dropped", KSTAT_DATA_UINT64, 0 },
+ { "icmp_echo_processed", KSTAT_DATA_UINT64, 0 },
+ { "icmp_dropped", KSTAT_DATA_UINT64, 0 },
+ { "icmp_too_big_processed", KSTAT_DATA_UINT64, 0 },
+ { "icmp_too_big_dropped", KSTAT_DATA_UINT64, 0 }
+ };
+
+ ksp = kstat_create_netstack(ILB_KSTAT_MOD_NAME, rule->ir_ks_instance,
+ rule->ir_name, ILB_RULE_KS_CNAME, KSTAT_TYPE_NAMED,
+ NUM_OF_FIELDS(ilb_rule_kstat_t), KSTAT_FLAG_VIRTUAL, stackid);
+ if (ksp == NULL)
+ return (NULL);
+
+ bcopy(&template, &rule->ir_kstat, sizeof (template));
+ ksp->ks_data = &rule->ir_kstat;
+ ksp->ks_private = (void *)(uintptr_t)stackid;
+
+ kstat_install(ksp);
+ return (ksp);
+}
+
+static kstat_t *
+ilb_server_kstat_init(netstackid_t stackid, ilb_rule_t *rule,
+ ilb_server_t *server)
+{
+ kstat_t *ksp;
+ ilb_server_kstat_t template = {
+ { "bytes_processed", KSTAT_DATA_UINT64, 0 },
+ { "pkt_processed", KSTAT_DATA_UINT64, 0 },
+ { "ip_address", KSTAT_DATA_STRING, 0 }
+ };
+ char cname_buf[KSTAT_STRLEN];
+
+ /* 7 is "-sstat" */
+ ASSERT(strlen(rule->ir_name) + 7 < KSTAT_STRLEN);
+ (void) sprintf(cname_buf, "%s-sstat", rule->ir_name);
+ ksp = kstat_create_netstack(ILB_KSTAT_MOD_NAME, rule->ir_ks_instance,
+ server->iser_name, cname_buf, KSTAT_TYPE_NAMED,
+ NUM_OF_FIELDS(ilb_server_kstat_t), KSTAT_FLAG_VIRTUAL, stackid);
+ if (ksp == NULL)
+ return (NULL);
+
+ bcopy(&template, &server->iser_kstat, sizeof (template));
+ ksp->ks_data = &server->iser_kstat;
+ ksp->ks_private = (void *)(uintptr_t)stackid;
+
+ kstat_named_setstr(&server->iser_kstat.ip_address,
+ server->iser_ip_addr);
+ /* We never change the IP address */
+ ksp->ks_data_size += strlen(server->iser_ip_addr) + 1;
+
+ kstat_install(ksp);
+ return (ksp);
+}
+
+/* Initialize the rule hash table. */
+static void
+ilb_rule_hash_init(ilb_stack_t *ilbs)
+{
+ int i;
+
+ /*
+ * If ilbs->ilbs_rule_hash_size is not a power of 2, bump it up to
+ * the next power of 2.
+ */
+ if (ilbs->ilbs_rule_hash_size & (ilbs->ilbs_rule_hash_size - 1)) {
+ for (i = 0; i < 31; i++) {
+ if (ilbs->ilbs_rule_hash_size < (1 << i))
+ break;
+ }
+ ilbs->ilbs_rule_hash_size = 1 << i;
+ }
+ ilbs->ilbs_g_hash = kmem_zalloc(sizeof (ilb_hash_t) *
+ ilbs->ilbs_rule_hash_size, KM_SLEEP);
+ for (i = 0; i < ilbs->ilbs_rule_hash_size; i++) {
+ mutex_init(&ilbs->ilbs_g_hash[i].ilb_hash_lock, NULL,
+ MUTEX_DEFAULT, NULL);
+ }
+}
+
+/* Clean up the rule hash table. */
+static void
+ilb_rule_hash_fini(ilb_stack_t *ilbs)
+{
+ if (ilbs->ilbs_g_hash == NULL)
+ return;
+ kmem_free(ilbs->ilbs_g_hash, sizeof (ilb_hash_t) *
+ ilbs->ilbs_rule_hash_size);
+}
+
+/* Add a rule to the rule hash table. */
+static void
+ilb_rule_hash_add(ilb_stack_t *ilbs, ilb_rule_t *rule, const in6_addr_t *addr)
+{
+ int i;
+
+ i = ILB_RULE_HASH((uint8_t *)&addr->s6_addr32[3],
+ ilbs->ilbs_rule_hash_size);
+ DTRACE_PROBE2(ilb__rule__hash__add, ilb_rule_t *, rule, int, i);
+ mutex_enter(&ilbs->ilbs_g_hash[i].ilb_hash_lock);
+ rule->ir_hash_next = ilbs->ilbs_g_hash[i].ilb_hash_rule;
+ if (ilbs->ilbs_g_hash[i].ilb_hash_rule != NULL)
+ ilbs->ilbs_g_hash[i].ilb_hash_rule->ir_hash_prev = rule;
+ rule->ir_hash_prev = NULL;
+ ilbs->ilbs_g_hash[i].ilb_hash_rule = rule;
+
+ rule->ir_hash = &ilbs->ilbs_g_hash[i];
+ mutex_exit(&ilbs->ilbs_g_hash[i].ilb_hash_lock);
+}
+
+/*
+ * Remove a rule from the rule hash table. Note that the rule is not freed
+ * in this routine.
+ */
+static void
+ilb_rule_hash_del(ilb_rule_t *rule)
+{
+ mutex_enter(&rule->ir_hash->ilb_hash_lock);
+ if (rule->ir_hash->ilb_hash_rule == rule) {
+ rule->ir_hash->ilb_hash_rule = rule->ir_hash_next;
+ if (rule->ir_hash_next != NULL)
+ rule->ir_hash_next->ir_hash_prev = NULL;
+ } else {
+ if (rule->ir_hash_prev != NULL)
+ rule->ir_hash_prev->ir_hash_next =
+ rule->ir_hash_next;
+ if (rule->ir_hash_next != NULL) {
+ rule->ir_hash_next->ir_hash_prev =
+ rule->ir_hash_prev;
+ }
+ }
+ mutex_exit(&rule->ir_hash->ilb_hash_lock);
+
+ rule->ir_hash_next = NULL;
+ rule->ir_hash_prev = NULL;
+ rule->ir_hash = NULL;
+}
+
+/*
+ * Given the info of a packet, look for a match in the rule hash table.
+ */
+static ilb_rule_t *
+ilb_rule_hash(ilb_stack_t *ilbs, int l3, int l4, in6_addr_t *addr,
+ in_port_t port, zoneid_t zoneid, uint32_t len, boolean_t *busy)
+{
+ int i;
+ ilb_rule_t *rule;
+ ipaddr_t v4_addr;
+
+ *busy = B_FALSE;
+ IN6_V4MAPPED_TO_IPADDR(addr, v4_addr);
+ i = ILB_RULE_HASH((uint8_t *)&v4_addr, ilbs->ilbs_rule_hash_size);
+ port = ntohs(port);
+
+ mutex_enter(&ilbs->ilbs_g_hash[i].ilb_hash_lock);
+ for (rule = ilbs->ilbs_g_hash[i].ilb_hash_rule; rule != NULL;
+ rule = rule->ir_hash_next) {
+ if (!rule->ir_port_range) {
+ if (rule->ir_min_port != port)
+ continue;
+ } else {
+ if (port < rule->ir_min_port ||
+ port > rule->ir_max_port) {
+ continue;
+ }
+ }
+ if (rule->ir_ipver != l3 || rule->ir_proto != l4 ||
+ rule->ir_zoneid != zoneid) {
+ continue;
+ }
+
+ if (l3 == IPPROTO_IP) {
+ if (rule->ir_target_v4 != INADDR_ANY &&
+ rule->ir_target_v4 != v4_addr) {
+ continue;
+ }
+ } else {
+ if (!IN6_IS_ADDR_UNSPECIFIED(&rule->ir_target_v6) &&
+ !IN6_ARE_ADDR_EQUAL(addr, &rule->ir_target_v6)) {
+ continue;
+ }
+ }
+
+ /*
+ * Just update the stats if the rule is disabled.
+ */
+ mutex_enter(&rule->ir_lock);
+ if (!(rule->ir_flags & ILB_RULE_ENABLED)) {
+ ILB_R_KSTAT(rule, pkt_not_processed);
+ ILB_R_KSTAT_UPDATE(rule, bytes_not_processed, len);
+ mutex_exit(&rule->ir_lock);
+ rule = NULL;
+ break;
+ } else if (rule->ir_flags & ILB_RULE_BUSY) {
+ /*
+ * If we are busy...
+ *
+ * XXX we should have a queue to postpone the
+ * packet processing. But this requires a
+ * mechanism in IP to re-start the packet
+ * processing. So for now, just drop the packet.
+ */
+ ILB_R_KSTAT(rule, pkt_dropped);
+ ILB_R_KSTAT_UPDATE(rule, bytes_dropped, len);
+ mutex_exit(&rule->ir_lock);
+ *busy = B_TRUE;
+ rule = NULL;
+ break;
+ } else {
+ rule->ir_refcnt++;
+ ASSERT(rule->ir_refcnt != 1);
+ mutex_exit(&rule->ir_lock);
+ break;
+ }
+ }
+ mutex_exit(&ilbs->ilbs_g_hash[i].ilb_hash_lock);
+ return (rule);
+}
+
+/*
+ * Add a rule to the global rule list. This list is for finding all rules
+ * in an IP stack. The caller is assumed to hold the ilbs_g_lock.
+ */
+static void
+ilb_rule_g_add(ilb_stack_t *ilbs, ilb_rule_t *rule)
+{
+ ASSERT(mutex_owned(&ilbs->ilbs_g_lock));
+ rule->ir_next = ilbs->ilbs_rule_head;
+ ilbs->ilbs_rule_head = rule;
+ ILB_KSTAT_UPDATE(ilbs, num_rules, 1);
+}
+
+/* The call is assumed to hold the ilbs_g_lock. */
+static void
+ilb_rule_g_del(ilb_stack_t *ilbs, ilb_rule_t *rule)
+{
+ ilb_rule_t *tmp_rule;
+ ilb_rule_t *prev_rule;
+
+ ASSERT(mutex_owned(&ilbs->ilbs_g_lock));
+ prev_rule = NULL;
+ for (tmp_rule = ilbs->ilbs_rule_head; tmp_rule != NULL;
+ prev_rule = tmp_rule, tmp_rule = tmp_rule->ir_next) {
+ if (tmp_rule == rule)
+ break;
+ }
+ if (tmp_rule == NULL) {
+ mutex_exit(&ilbs->ilbs_g_lock);
+ return;
+ }
+ if (prev_rule == NULL)
+ ilbs->ilbs_rule_head = tmp_rule->ir_next;
+ else
+ prev_rule->ir_next = tmp_rule->ir_next;
+ ILB_KSTAT_UPDATE(ilbs, num_rules, -1);
+}
+
+/*
+ * Helper routine to calculate how many source addresses are in a given
+ * range.
+ */
+static int64_t
+num_nat_src_v6(const in6_addr_t *a1, const in6_addr_t *a2)
+{
+ int64_t ret;
+ uint32_t addr1, addr2;
+
+ /*
+ * Here we assume that the max number of NAT source cannot be
+ * large such that the most significant 2 s6_addr32 must be
+ * equal.
+ */
+ addr1 = ntohl(a1->s6_addr32[3]);
+ addr2 = ntohl(a2->s6_addr32[3]);
+ if (a1->s6_addr32[0] != a2->s6_addr32[0] ||
+ a1->s6_addr32[1] != a2->s6_addr32[1] ||
+ a1->s6_addr32[2] > a2->s6_addr32[2] ||
+ (a1->s6_addr32[2] == a2->s6_addr32[2] && addr1 > addr2)) {
+ return (-1);
+ }
+ if (a1->s6_addr32[2] == a2->s6_addr32[2]) {
+ return (addr2 - addr1 + 1);
+ } else {
+ ret = (ntohl(a2->s6_addr32[2]) - ntohl(a1->s6_addr32[2]));
+ ret <<= 32;
+ ret = ret + addr1 - addr2;
+ return (ret + 1);
+ }
+}
+
+/*
+ * Add an ILB rule.
+ */
+int
+ilb_rule_add(ilb_stack_t *ilbs, zoneid_t zoneid, const ilb_rule_cmd_t *cmd)
+{
+ ilb_rule_t *rule;
+ netstackid_t stackid;
+ int ret;
+ in_port_t min_port, max_port;
+ int64_t num_src;
+
+ /* Sanity checks. */
+ if (cmd->ip_ver != IPPROTO_IP && cmd->ip_ver != IPPROTO_IPV6)
+ return (EINVAL);
+
+ /* Need to support SCTP... */
+ if (cmd->proto != IPPROTO_TCP && cmd->proto != IPPROTO_UDP)
+ return (EINVAL);
+
+ /* For full NAT, the NAT source must be supplied. */
+ if (cmd->topo == ILB_TOPO_IMPL_NAT) {
+ if (IS_ADDR_UNSPEC(&cmd->nat_src_start) ||
+ IS_ADDR_UNSPEC(&cmd->nat_src_end)) {
+ return (EINVAL);
+ }
+ }
+
+ /* Check invalid mask */
+ if ((cmd->flags & ILB_RULE_STICKY) &&
+ IS_ADDR_UNSPEC(&cmd->sticky_mask)) {
+ return (EINVAL);
+ }
+
+ /* Port is passed in network byte order. */
+ min_port = ntohs(cmd->min_port);
+ max_port = ntohs(cmd->max_port);
+ if (min_port > max_port)
+ return (EINVAL);
+
+ /* min_port == 0 means "all ports". Make it so */
+ if (min_port == 0) {
+ min_port = 1;
+ max_port = 65535;
+ }
+
+ /* Funny address checking. */
+ if (cmd->ip_ver == IPPROTO_IP) {
+ in_addr_t v4_addr1, v4_addr2;
+
+ v4_addr1 = cmd->vip.s6_addr32[3];
+ if ((*(uchar_t *)&v4_addr1) == IN_LOOPBACKNET ||
+ CLASSD(v4_addr1) || v4_addr1 == INADDR_BROADCAST ||
+ v4_addr1 == INADDR_ANY ||
+ !IN6_IS_ADDR_V4MAPPED(&cmd->vip)) {
+ return (EINVAL);
+ }
+
+ if (cmd->topo == ILB_TOPO_IMPL_NAT) {
+ v4_addr1 = ntohl(cmd->nat_src_start.s6_addr32[3]);
+ v4_addr2 = ntohl(cmd->nat_src_end.s6_addr32[3]);
+ if ((*(uchar_t *)&v4_addr1) == IN_LOOPBACKNET ||
+ (*(uchar_t *)&v4_addr2) == IN_LOOPBACKNET ||
+ v4_addr1 == INADDR_BROADCAST ||
+ v4_addr2 == INADDR_BROADCAST ||
+ v4_addr1 == INADDR_ANY || v4_addr2 == INADDR_ANY ||
+ CLASSD(v4_addr1) || CLASSD(v4_addr2) ||
+ !IN6_IS_ADDR_V4MAPPED(&cmd->nat_src_start) ||
+ !IN6_IS_ADDR_V4MAPPED(&cmd->nat_src_end)) {
+ return (EINVAL);
+ }
+
+ num_src = v4_addr2 - v4_addr1 + 1;
+ if (v4_addr1 > v4_addr2 || num_src > ILB_MAX_NAT_SRC)
+ return (EINVAL);
+ }
+ } else {
+ if (IN6_IS_ADDR_LOOPBACK(&cmd->vip) ||
+ IN6_IS_ADDR_MULTICAST(&cmd->vip) ||
+ IN6_IS_ADDR_UNSPECIFIED(&cmd->vip) ||
+ IN6_IS_ADDR_V4MAPPED(&cmd->vip)) {
+ return (EINVAL);
+ }
+
+ if (cmd->topo == ILB_TOPO_IMPL_NAT) {
+ if (IN6_IS_ADDR_LOOPBACK(&cmd->nat_src_start) ||
+ IN6_IS_ADDR_LOOPBACK(&cmd->nat_src_end) ||
+ IN6_IS_ADDR_MULTICAST(&cmd->nat_src_start) ||
+ IN6_IS_ADDR_MULTICAST(&cmd->nat_src_end) ||
+ IN6_IS_ADDR_UNSPECIFIED(&cmd->nat_src_start) ||
+ IN6_IS_ADDR_UNSPECIFIED(&cmd->nat_src_end) ||
+ IN6_IS_ADDR_V4MAPPED(&cmd->nat_src_start) ||
+ IN6_IS_ADDR_V4MAPPED(&cmd->nat_src_end)) {
+ return (EINVAL);
+ }
+
+ if ((num_src = num_nat_src_v6(&cmd->nat_src_start,
+ &cmd->nat_src_end)) < 0 ||
+ num_src > ILB_MAX_NAT_SRC) {
+ return (EINVAL);
+ }
+ }
+ }
+
+ mutex_enter(&ilbs->ilbs_g_lock);
+ if (ilbs->ilbs_g_hash == NULL)
+ ilb_rule_hash_init(ilbs);
+ if (ilbs->ilbs_c2s_conn_hash == NULL) {
+ ASSERT(ilbs->ilbs_s2c_conn_hash == NULL);
+ ilb_conn_hash_init(ilbs);
+ ilb_nat_src_init(ilbs);
+ }
+
+ /* Make sure that the new rule does not duplicate an existing one. */
+ if (ilb_match_rule(ilbs, zoneid, cmd->name, cmd->ip_ver, cmd->proto,
+ min_port, max_port, &cmd->vip)) {
+ mutex_exit(&ilbs->ilbs_g_lock);
+ return (EEXIST);
+ }
+
+ rule = kmem_zalloc(sizeof (ilb_rule_t), KM_NOSLEEP);
+ if (rule == NULL) {
+ mutex_exit(&ilbs->ilbs_g_lock);
+ return (ENOMEM);
+ }
+
+ /* ir_name is all 0 to begin with */
+ (void) memcpy(rule->ir_name, cmd->name, ILB_RULE_NAMESZ - 1);
+
+ rule->ir_ks_instance = atomic_add_int_nv(&ilb_kstat_instance, 1);
+ stackid = (netstackid_t)(uintptr_t)ilbs->ilbs_ksp->ks_private;
+ if ((rule->ir_ksp = ilb_rule_kstat_init(stackid, rule)) == NULL) {
+ ret = ENOMEM;
+ goto error;
+ }
+
+ if (cmd->topo == ILB_TOPO_IMPL_NAT) {
+ rule->ir_nat_src_start = cmd->nat_src_start;
+ rule->ir_nat_src_end = cmd->nat_src_end;
+ }
+
+ rule->ir_ipver = cmd->ip_ver;
+ rule->ir_proto = cmd->proto;
+ rule->ir_topo = cmd->topo;
+
+ rule->ir_min_port = min_port;
+ rule->ir_max_port = max_port;
+ if (rule->ir_min_port != rule->ir_max_port)
+ rule->ir_port_range = B_TRUE;
+ else
+ rule->ir_port_range = B_FALSE;
+
+ rule->ir_zoneid = zoneid;
+
+ rule->ir_target_v6 = cmd->vip;
+ rule->ir_servers = NULL;
+
+ /*
+ * The default connection drain timeout is indefinite (value 0),
+ * meaning we will wait for all connections to finish. So we
+ * can assign cmd->conn_drain_timeout to it directly.
+ */
+ rule->ir_conn_drain_timeout = cmd->conn_drain_timeout;
+ if (cmd->nat_expiry != 0) {
+ rule->ir_nat_expiry = cmd->nat_expiry;
+ } else {
+ switch (rule->ir_proto) {
+ case IPPROTO_TCP:
+ rule->ir_nat_expiry = ilb_conn_tcp_expiry;
+ break;
+ case IPPROTO_UDP:
+ rule->ir_nat_expiry = ilb_conn_udp_expiry;
+ break;
+ default:
+ cmn_err(CE_PANIC, "data corruption: wrong ir_proto: %p",
+ (void *)rule);
+ break;
+ }
+ }
+ if (cmd->sticky_expiry != 0)
+ rule->ir_sticky_expiry = cmd->sticky_expiry;
+ else
+ rule->ir_sticky_expiry = ilb_sticky_expiry;
+
+ if (cmd->flags & ILB_RULE_STICKY) {
+ rule->ir_flags |= ILB_RULE_STICKY;
+ rule->ir_sticky_mask = cmd->sticky_mask;
+ if (ilbs->ilbs_sticky_hash == NULL)
+ ilb_sticky_hash_init(ilbs);
+ }
+ if (cmd->flags & ILB_RULE_ENABLED)
+ rule->ir_flags |= ILB_RULE_ENABLED;
+
+ mutex_init(&rule->ir_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&rule->ir_cv, NULL, CV_DEFAULT, NULL);
+
+ rule->ir_refcnt = 1;
+
+ switch (cmd->algo) {
+ case ILB_ALG_IMPL_ROUNDROBIN:
+ if ((rule->ir_alg = ilb_alg_rr_init(rule, NULL)) == NULL) {
+ ret = ENOMEM;
+ goto error;
+ }
+ rule->ir_alg_type = ILB_ALG_IMPL_ROUNDROBIN;
+ break;
+ case ILB_ALG_IMPL_HASH_IP:
+ case ILB_ALG_IMPL_HASH_IP_SPORT:
+ case ILB_ALG_IMPL_HASH_IP_VIP:
+ if ((rule->ir_alg = ilb_alg_hash_init(rule,
+ &cmd->algo)) == NULL) {
+ ret = ENOMEM;
+ goto error;
+ }
+ rule->ir_alg_type = cmd->algo;
+ break;
+ default:
+ ret = EINVAL;
+ goto error;
+ }
+
+ /* Add it to the global list and hash array at the end. */
+ ilb_rule_g_add(ilbs, rule);
+ ilb_rule_hash_add(ilbs, rule, &cmd->vip);
+
+ mutex_exit(&ilbs->ilbs_g_lock);
+
+ return (0);
+
+error:
+ mutex_exit(&ilbs->ilbs_g_lock);
+ if (rule->ir_ksp != NULL) {
+ /* stackid must be initialized if ir_ksp != NULL */
+ kstat_delete_netstack(rule->ir_ksp, stackid);
+ }
+ kmem_free(rule, sizeof (ilb_rule_t));
+ return (ret);
+}
+
+/*
+ * The final part in deleting a rule. Either called directly or by the
+ * taskq dispatched.
+ */
+static void
+ilb_rule_del_common(ilb_stack_t *ilbs, ilb_rule_t *tmp_rule)
+{
+ netstackid_t stackid;
+ ilb_server_t *server;
+
+ stackid = (netstackid_t)(uintptr_t)ilbs->ilbs_ksp->ks_private;
+
+ /*
+ * Let the algorithm know that the rule is going away. The
+ * algorithm fini routine will free all its resources with this
+ * rule.
+ */
+ tmp_rule->ir_alg->ilb_alg_fini(&tmp_rule->ir_alg);
+
+ while ((server = tmp_rule->ir_servers) != NULL) {
+ mutex_enter(&server->iser_lock);
+ ilb_destroy_nat_src(&server->iser_nat_src);
+ if (tmp_rule->ir_conn_drain_timeout != 0) {
+ /*
+ * The garbage collection thread checks this value
+ * without grabing a lock. So we need to use
+ * atomic_swap_64() to make sure that the value seen
+ * by gc thread is intact.
+ */
+ (void) atomic_swap_64(
+ (uint64_t *)&server->iser_die_time, lbolt64 +
+ SEC_TO_TICK(tmp_rule->ir_conn_drain_timeout));
+ }
+ while (server->iser_refcnt > 1)
+ cv_wait(&server->iser_cv, &server->iser_lock);
+ tmp_rule->ir_servers = server->iser_next;
+ kstat_delete_netstack(server->iser_ksp, stackid);
+ kmem_free(server, sizeof (ilb_server_t));
+ }
+
+ ASSERT(tmp_rule->ir_ksp != NULL);
+ kstat_delete_netstack(tmp_rule->ir_ksp, stackid);
+
+ kmem_free(tmp_rule, sizeof (ilb_rule_t));
+}
+
+/* The routine executed by the delayed rule taskq. */
+static void
+ilb_rule_del_tq(void *arg)
+{
+ ilb_stack_t *ilbs = ((ilb_rule_tq_t *)arg)->ilbs;
+ ilb_rule_t *rule = ((ilb_rule_tq_t *)arg)->rule;
+
+ mutex_enter(&rule->ir_lock);
+ while (rule->ir_refcnt > 1)
+ cv_wait(&rule->ir_cv, &rule->ir_lock);
+ ilb_rule_del_common(ilbs, rule);
+ kmem_free(arg, sizeof (ilb_rule_tq_t));
+}
+
+/* Routine to delete a rule. */
+int
+ilb_rule_del(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name)
+{
+ ilb_rule_t *tmp_rule;
+ ilb_rule_tq_t *arg;
+ int err;
+
+ mutex_enter(&ilbs->ilbs_g_lock);
+ if ((tmp_rule = ilb_find_rule_locked(ilbs, zoneid, name,
+ &err)) == NULL) {
+ mutex_exit(&ilbs->ilbs_g_lock);
+ return (err);
+ }
+
+ /*
+ * First remove the rule from the hash array and the global list so
+ * that no one can find this rule any more.
+ */
+ ilb_rule_hash_del(tmp_rule);
+ ilb_rule_g_del(ilbs, tmp_rule);
+ mutex_exit(&ilbs->ilbs_g_lock);
+ ILB_RULE_REFRELE(tmp_rule);
+
+ /*
+ * Now no one can find this rule, we can remove it once all
+ * references to it are dropped and all references to the list
+ * of servers are dropped. So dispatch a task to finish the deletion.
+ * We do this instead of letting the last one referencing the
+ * rule do it. The reason is that the last one may be the
+ * interrupt thread. We want to minimize the work it needs to
+ * do. Rule deletion is not a critical task so it can be delayed.
+ */
+ arg = kmem_alloc(sizeof (ilb_rule_tq_t), KM_SLEEP);
+ arg->ilbs = ilbs;
+ arg->rule = tmp_rule;
+ (void) taskq_dispatch(ilbs->ilbs_rule_taskq, ilb_rule_del_tq, arg,
+ TQ_SLEEP);
+
+ return (0);
+}
+
+/*
+ * Given an IP address, check to see if there is a rule using this
+ * as the VIP. It can be used to check if we need to drop a fragment.
+ */
+boolean_t
+ilb_rule_match_vip_v6(ilb_stack_t *ilbs, in6_addr_t *vip, ilb_rule_t **ret_rule)
+{
+ int i;
+ ilb_rule_t *rule;
+ boolean_t ret = B_FALSE;
+
+ i = ILB_RULE_HASH((uint8_t *)&vip->s6_addr32[3],
+ ilbs->ilbs_rule_hash_size);
+ mutex_enter(&ilbs->ilbs_g_hash[i].ilb_hash_lock);
+ for (rule = ilbs->ilbs_g_hash[i].ilb_hash_rule; rule != NULL;
+ rule = rule->ir_hash_next) {
+ if (IN6_ARE_ADDR_EQUAL(vip, &rule->ir_target_v6)) {
+ mutex_enter(&rule->ir_lock);
+ if (rule->ir_flags & ILB_RULE_BUSY) {
+ mutex_exit(&rule->ir_lock);
+ break;
+ }
+ if (ret_rule != NULL) {
+ rule->ir_refcnt++;
+ mutex_exit(&rule->ir_lock);
+ *ret_rule = rule;
+ } else {
+ mutex_exit(&rule->ir_lock);
+ }
+ ret = B_TRUE;
+ break;
+ }
+ }
+ mutex_exit(&ilbs->ilbs_g_hash[i].ilb_hash_lock);
+ return (ret);
+}
+
+boolean_t
+ilb_rule_match_vip_v4(ilb_stack_t *ilbs, ipaddr_t addr, ilb_rule_t **ret_rule)
+{
+ int i;
+ ilb_rule_t *rule;
+ boolean_t ret = B_FALSE;
+
+ i = ILB_RULE_HASH((uint8_t *)&addr, ilbs->ilbs_rule_hash_size);
+ mutex_enter(&ilbs->ilbs_g_hash[i].ilb_hash_lock);
+ for (rule = ilbs->ilbs_g_hash[i].ilb_hash_rule; rule != NULL;
+ rule = rule->ir_hash_next) {
+ if (rule->ir_target_v6.s6_addr32[3] == addr) {
+ mutex_enter(&rule->ir_lock);
+ if (rule->ir_flags & ILB_RULE_BUSY) {
+ mutex_exit(&rule->ir_lock);
+ break;
+ }
+ if (ret_rule != NULL) {
+ rule->ir_refcnt++;
+ mutex_exit(&rule->ir_lock);
+ *ret_rule = rule;
+ } else {
+ mutex_exit(&rule->ir_lock);
+ }
+ ret = B_TRUE;
+ break;
+ }
+ }
+ mutex_exit(&ilbs->ilbs_g_hash[i].ilb_hash_lock);
+ return (ret);
+}
+
+static ilb_rule_t *
+ilb_find_rule_locked(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name,
+ int *err)
+{
+ ilb_rule_t *tmp_rule;
+
+ ASSERT(mutex_owned(&ilbs->ilbs_g_lock));
+
+ for (tmp_rule = ilbs->ilbs_rule_head; tmp_rule != NULL;
+ tmp_rule = tmp_rule->ir_next) {
+ if (tmp_rule->ir_zoneid != zoneid)
+ continue;
+ if (strcasecmp(tmp_rule->ir_name, name) == 0) {
+ mutex_enter(&tmp_rule->ir_lock);
+ if (tmp_rule->ir_flags & ILB_RULE_BUSY) {
+ mutex_exit(&tmp_rule->ir_lock);
+ *err = EINPROGRESS;
+ return (NULL);
+ }
+ tmp_rule->ir_refcnt++;
+ mutex_exit(&tmp_rule->ir_lock);
+ *err = 0;
+ return (tmp_rule);
+ }
+ }
+ *err = ENOENT;
+ return (NULL);
+}
+
+/* To find a rule with a given name and zone in the global rule list. */
+ilb_rule_t *
+ilb_find_rule(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name,
+ int *err)
+{
+ ilb_rule_t *tmp_rule;
+
+ mutex_enter(&ilbs->ilbs_g_lock);
+ tmp_rule = ilb_find_rule_locked(ilbs, zoneid, name, err);
+ mutex_exit(&ilbs->ilbs_g_lock);
+ return (tmp_rule);
+}
+
+/* Try to match the given packet info and zone ID with a rule. */
+static boolean_t
+ilb_match_rule(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name, int l3,
+ int l4, in_port_t min_port, in_port_t max_port, const in6_addr_t *addr)
+{
+ ilb_rule_t *tmp_rule;
+
+ ASSERT(mutex_owned(&ilbs->ilbs_g_lock));
+
+ for (tmp_rule = ilbs->ilbs_rule_head; tmp_rule != NULL;
+ tmp_rule = tmp_rule->ir_next) {
+ if (tmp_rule->ir_zoneid != zoneid)
+ continue;
+
+ /*
+ * We don't allow the same name in different rules even if all
+ * the other rule components are different.
+ */
+ if (strcasecmp(tmp_rule->ir_name, name) == 0)
+ return (B_TRUE);
+
+ if (tmp_rule->ir_ipver != l3 || tmp_rule->ir_proto != l4)
+ continue;
+
+ /*
+ * ir_min_port and ir_max_port are the same if ir_port_range
+ * is false. In this case, if the ir_min|max_port (same) is
+ * outside of the given port range, it is OK. In other cases,
+ * check if min and max port are outside a rule's range.
+ */
+ if (tmp_rule->ir_max_port < min_port ||
+ tmp_rule->ir_min_port > max_port) {
+ continue;
+ }
+
+ /*
+ * If l3 is IPv4, the addr passed in is assumed to be
+ * mapped address.
+ */
+ if (V6_OR_V4_INADDR_ANY(*addr) ||
+ V6_OR_V4_INADDR_ANY(tmp_rule->ir_target_v6) ||
+ IN6_ARE_ADDR_EQUAL(addr, &tmp_rule->ir_target_v6)) {
+ return (B_TRUE);
+ }
+ }
+ return (B_FALSE);
+}
+
+int
+ilb_rule_enable(ilb_stack_t *ilbs, zoneid_t zoneid,
+ const char *rule_name, ilb_rule_t *in_rule)
+{
+ ilb_rule_t *rule;
+ int err;
+
+ ASSERT((in_rule == NULL && rule_name != NULL) ||
+ (in_rule != NULL && rule_name == NULL));
+ if ((rule = in_rule) == NULL) {
+ if ((rule = ilb_find_rule(ilbs, zoneid, rule_name,
+ &err)) == NULL) {
+ return (err);
+ }
+ }
+ mutex_enter(&rule->ir_lock);
+ rule->ir_flags |= ILB_RULE_ENABLED;
+ mutex_exit(&rule->ir_lock);
+
+ /* Only refrele if the rule is passed in. */
+ if (in_rule == NULL)
+ ILB_RULE_REFRELE(rule);
+ return (0);
+}
+
+int
+ilb_rule_disable(ilb_stack_t *ilbs, zoneid_t zoneid,
+ const char *rule_name, ilb_rule_t *in_rule)
+{
+ ilb_rule_t *rule;
+ int err;
+
+ ASSERT((in_rule == NULL && rule_name != NULL) ||
+ (in_rule != NULL && rule_name == NULL));
+ if ((rule = in_rule) == NULL) {
+ if ((rule = ilb_find_rule(ilbs, zoneid, rule_name,
+ &err)) == NULL) {
+ return (err);
+ }
+ }
+ mutex_enter(&rule->ir_lock);
+ rule->ir_flags &= ~ILB_RULE_ENABLED;
+ mutex_exit(&rule->ir_lock);
+
+ /* Only refrele if the rule is passed in. */
+ if (in_rule == NULL)
+ ILB_RULE_REFRELE(rule);
+ return (0);
+}
+
+/*
+ * XXX We should probably have a walker function to walk all rules. For
+ * now, just add a simple loop for enable/disable/del.
+ */
+void
+ilb_rule_enable_all(ilb_stack_t *ilbs, zoneid_t zoneid)
+{
+ ilb_rule_t *rule;
+
+ mutex_enter(&ilbs->ilbs_g_lock);
+ for (rule = ilbs->ilbs_rule_head; rule != NULL; rule = rule->ir_next) {
+ if (rule->ir_zoneid != zoneid)
+ continue;
+ /*
+ * No need to hold the rule as we are holding the global
+ * lock so it won't go away. Ignore the return value here
+ * as the rule is provided so the call cannot fail.
+ */
+ (void) ilb_rule_enable(ilbs, zoneid, NULL, rule);
+ }
+ mutex_exit(&ilbs->ilbs_g_lock);
+}
+
+void
+ilb_rule_disable_all(ilb_stack_t *ilbs, zoneid_t zoneid)
+{
+ ilb_rule_t *rule;
+
+ mutex_enter(&ilbs->ilbs_g_lock);
+ for (rule = ilbs->ilbs_rule_head; rule != NULL;
+ rule = rule->ir_next) {
+ if (rule->ir_zoneid != zoneid)
+ continue;
+ (void) ilb_rule_disable(ilbs, zoneid, NULL, rule);
+ }
+ mutex_exit(&ilbs->ilbs_g_lock);
+}
+
+void
+ilb_rule_del_all(ilb_stack_t *ilbs, zoneid_t zoneid)
+{
+ ilb_rule_t *rule;
+ ilb_rule_tq_t *arg;
+
+ mutex_enter(&ilbs->ilbs_g_lock);
+ while ((rule = ilbs->ilbs_rule_head) != NULL) {
+ if (rule->ir_zoneid != zoneid)
+ continue;
+ ilb_rule_hash_del(rule);
+ ilb_rule_g_del(ilbs, rule);
+ mutex_exit(&ilbs->ilbs_g_lock);
+
+ arg = kmem_alloc(sizeof (ilb_rule_tq_t), KM_SLEEP);
+ arg->ilbs = ilbs;
+ arg->rule = rule;
+ (void) taskq_dispatch(ilbs->ilbs_rule_taskq, ilb_rule_del_tq,
+ arg, TQ_SLEEP);
+
+ mutex_enter(&ilbs->ilbs_g_lock);
+ }
+ mutex_exit(&ilbs->ilbs_g_lock);
+}
+
+/*
+ * This is just an optimization, so don't grab the global lock. The
+ * worst case is that we missed a couple packets.
+ */
+boolean_t
+ilb_has_rules(ilb_stack_t *ilbs)
+{
+ return (ilbs->ilbs_rule_head != NULL);
+}
+
+
+static int
+ilb_server_toggle(ilb_stack_t *ilbs, zoneid_t zoneid, const char *rule_name,
+ ilb_rule_t *rule, in6_addr_t *addr, boolean_t enable)
+{
+ ilb_server_t *tmp_server;
+ int ret;
+
+ ASSERT((rule == NULL && rule_name != NULL) ||
+ (rule != NULL && rule_name == NULL));
+
+ if (rule == NULL) {
+ if ((rule = ilb_find_rule(ilbs, zoneid, rule_name,
+ &ret)) == NULL) {
+ return (ret);
+ }
+ }
+
+ /* Once we get a hold on the rule, no server can be added/deleted. */
+ for (tmp_server = rule->ir_servers; tmp_server != NULL;
+ tmp_server = tmp_server->iser_next) {
+ if (IN6_ARE_ADDR_EQUAL(&tmp_server->iser_addr_v6, addr))
+ break;
+ }
+ if (tmp_server == NULL) {
+ ret = ENOENT;
+ goto done;
+ }
+
+ if (enable) {
+ ret = rule->ir_alg->ilb_alg_server_enable(tmp_server,
+ rule->ir_alg->ilb_alg_data);
+ if (ret == 0) {
+ tmp_server->iser_enabled = B_TRUE;
+ tmp_server->iser_die_time = 0;
+ }
+ } else {
+ ret = rule->ir_alg->ilb_alg_server_disable(tmp_server,
+ rule->ir_alg->ilb_alg_data);
+ if (ret == 0) {
+ tmp_server->iser_enabled = B_FALSE;
+ if (rule->ir_conn_drain_timeout != 0) {
+ (void) atomic_swap_64(
+ (uint64_t *)&tmp_server->iser_die_time,
+ lbolt64 + SEC_TO_TICK(
+ rule->ir_conn_drain_timeout));
+ }
+ }
+ }
+
+done:
+ if (rule_name != NULL)
+ ILB_RULE_REFRELE(rule);
+ return (ret);
+}
+int
+ilb_server_enable(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name,
+ ilb_rule_t *rule, in6_addr_t *addr)
+{
+ return (ilb_server_toggle(ilbs, zoneid, name, rule, addr, B_TRUE));
+}
+
+int
+ilb_server_disable(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name,
+ ilb_rule_t *rule, in6_addr_t *addr)
+{
+ return (ilb_server_toggle(ilbs, zoneid, name, rule, addr, B_FALSE));
+}
+
+/*
+ * Add a back end server to a rule. If the address is IPv4, it is assumed
+ * to be passed in as a mapped address.
+ */
+int
+ilb_server_add(ilb_stack_t *ilbs, ilb_rule_t *rule, ilb_server_info_t *info)
+{
+ ilb_server_t *server;
+ netstackid_t stackid;
+ int ret = 0;
+ in_port_t min_port, max_port;
+ in_port_t range;
+
+ /* Port is passed in network byte order. */
+ min_port = ntohs(info->min_port);
+ max_port = ntohs(info->max_port);
+ if (min_port > max_port)
+ return (EINVAL);
+
+ /* min_port == 0 means "all ports". Make it so */
+ if (min_port == 0) {
+ min_port = 1;
+ max_port = 65535;
+ }
+ range = max_port - min_port;
+
+ mutex_enter(&rule->ir_lock);
+ /* If someone is already doing server add/del, sleeps and wait. */
+ while (rule->ir_flags & ILB_RULE_BUSY) {
+ if (cv_wait_sig(&rule->ir_cv, &rule->ir_lock) == 0) {
+ mutex_exit(&rule->ir_lock);
+ return (EINTR);
+ }
+ }
+
+ /*
+ * Set the rule to be busy to make sure that no new packet can
+ * use this rule.
+ */
+ rule->ir_flags |= ILB_RULE_BUSY;
+
+ /* Now wait for all other guys to finish their work. */
+ while (rule->ir_refcnt > 2) {
+ if (cv_wait_sig(&rule->ir_cv, &rule->ir_lock) == 0) {
+ mutex_exit(&rule->ir_lock);
+ ret = EINTR;
+ goto end;
+ }
+ }
+ mutex_exit(&rule->ir_lock);
+
+ /* Sanity checks... */
+ if ((IN6_IS_ADDR_V4MAPPED(&info->addr) &&
+ rule->ir_ipver != IPPROTO_IP) ||
+ (!IN6_IS_ADDR_V4MAPPED(&info->addr) &&
+ rule->ir_ipver != IPPROTO_IPV6)) {
+ ret = EINVAL;
+ goto end;
+ }
+
+ /*
+ * Check for valid port range.
+ *
+ * For DSR, there can be no port shifting. Hence the server
+ * specification must be the same as the rule's.
+ *
+ * For half-NAT/NAT, the range must either be 0 (port collapsing) or
+ * it must be equal to the same value as the rule port range.
+ *
+ */
+ if (rule->ir_topo == ILB_TOPO_IMPL_DSR) {
+ if (rule->ir_max_port != max_port ||
+ rule->ir_min_port != min_port) {
+ ret = EINVAL;
+ goto end;
+ }
+ } else {
+ if ((range != rule->ir_max_port - rule->ir_min_port) &&
+ range != 0) {
+ ret = EINVAL;
+ goto end;
+ }
+ }
+
+ /* Check for duplicate. */
+ for (server = rule->ir_servers; server != NULL;
+ server = server->iser_next) {
+ if (IN6_ARE_ADDR_EQUAL(&server->iser_addr_v6, &info->addr) ||
+ strcasecmp(server->iser_name, info->name) == 0) {
+ break;
+ }
+ }
+ if (server != NULL) {
+ ret = EEXIST;
+ goto end;
+ }
+
+ if ((server = kmem_zalloc(sizeof (ilb_server_t), KM_NOSLEEP)) == NULL) {
+ ret = ENOMEM;
+ goto end;
+ }
+
+ (void) memcpy(server->iser_name, info->name, ILB_SERVER_NAMESZ - 1);
+ (void) inet_ntop(AF_INET6, &info->addr, server->iser_ip_addr,
+ sizeof (server->iser_ip_addr));
+ stackid = (netstackid_t)(uintptr_t)ilbs->ilbs_ksp->ks_private;
+ server->iser_ksp = ilb_server_kstat_init(stackid, rule, server);
+ if (server->iser_ksp == NULL) {
+ kmem_free(server, sizeof (ilb_server_t));
+ ret = EINVAL;
+ goto end;
+ }
+
+ server->iser_stackid = stackid;
+ server->iser_addr_v6 = info->addr;
+ server->iser_min_port = min_port;
+ server->iser_max_port = max_port;
+ if (min_port != max_port)
+ server->iser_port_range = B_TRUE;
+ else
+ server->iser_port_range = B_FALSE;
+
+ /*
+ * If the rule uses NAT, find/create the NAT source entry to use
+ * for this server.
+ */
+ if (rule->ir_topo == ILB_TOPO_IMPL_NAT) {
+ in_port_t port;
+
+ /*
+ * If the server uses a port range, our port allocation
+ * scheme needs to treat it as a wildcard. Refer to the
+ * comments in ilb_nat.c about the scheme.
+ */
+ if (server->iser_port_range)
+ port = 0;
+ else
+ port = server->iser_min_port;
+
+ if ((ret = ilb_create_nat_src(ilbs, &server->iser_nat_src,
+ &server->iser_addr_v6, port, &rule->ir_nat_src_start,
+ num_nat_src_v6(&rule->ir_nat_src_start,
+ &rule->ir_nat_src_end))) != 0) {
+ kstat_delete_netstack(server->iser_ksp, stackid);
+ kmem_free(server, sizeof (ilb_server_t));
+ goto end;
+ }
+ }
+
+ /*
+ * The iser_lock is only used to protect iser_refcnt. All the other
+ * fields in ilb_server_t should not change, except for iser_enabled.
+ * The worst thing that can happen if iser_enabled is messed up is
+ * that one or two packets may not be load balanced to a server
+ * correctly.
+ */
+ server->iser_refcnt = 1;
+ server->iser_enabled = info->flags & ILB_SERVER_ENABLED ? B_TRUE :
+ B_FALSE;
+ mutex_init(&server->iser_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&server->iser_cv, NULL, CV_DEFAULT, NULL);
+
+ /* Let the load balancing algorithm know about the addition. */
+ ASSERT(rule->ir_alg != NULL);
+ if ((ret = rule->ir_alg->ilb_alg_server_add(server,
+ rule->ir_alg->ilb_alg_data)) != 0) {
+ kstat_delete_netstack(server->iser_ksp, stackid);
+ kmem_free(server, sizeof (ilb_server_t));
+ goto end;
+ }
+
+ /*
+ * No need to hold ir_lock since no other thread should manipulate
+ * the following fields until ILB_RULE_BUSY is cleared.
+ */
+ if (rule->ir_servers == NULL) {
+ server->iser_next = NULL;
+ } else {
+ server->iser_next = rule->ir_servers;
+ }
+ rule->ir_servers = server;
+ ILB_R_KSTAT(rule, num_servers);
+
+end:
+ mutex_enter(&rule->ir_lock);
+ rule->ir_flags &= ~ILB_RULE_BUSY;
+ cv_signal(&rule->ir_cv);
+ mutex_exit(&rule->ir_lock);
+ return (ret);
+}
+
+/* The routine executed by the delayed rule processing taskq. */
+static void
+ilb_server_del_tq(void *arg)
+{
+ ilb_server_t *server = (ilb_server_t *)arg;
+
+ mutex_enter(&server->iser_lock);
+ while (server->iser_refcnt > 1)
+ cv_wait(&server->iser_cv, &server->iser_lock);
+ kstat_delete_netstack(server->iser_ksp, server->iser_stackid);
+ kmem_free(server, sizeof (ilb_server_t));
+}
+
+/*
+ * Delete a back end server from a rule. If the address is IPv4, it is assumed
+ * to be passed in as a mapped address.
+ */
+int
+ilb_server_del(ilb_stack_t *ilbs, zoneid_t zoneid, const char *rule_name,
+ ilb_rule_t *rule, in6_addr_t *addr)
+{
+ ilb_server_t *server;
+ ilb_server_t *prev_server;
+ int ret = 0;
+
+ ASSERT((rule == NULL && rule_name != NULL) ||
+ (rule != NULL && rule_name == NULL));
+ if (rule == NULL) {
+ if ((rule = ilb_find_rule(ilbs, zoneid, rule_name,
+ &ret)) == NULL) {
+ return (ret);
+ }
+ }
+
+ mutex_enter(&rule->ir_lock);
+ /* If someone is already doing server add/del, sleeps and wait. */
+ while (rule->ir_flags & ILB_RULE_BUSY) {
+ if (cv_wait_sig(&rule->ir_cv, &rule->ir_lock) == 0) {
+ if (rule_name != NULL) {
+ if (--rule->ir_refcnt <= 2)
+ cv_signal(&rule->ir_cv);
+ }
+ mutex_exit(&rule->ir_lock);
+ return (EINTR);
+ }
+ }
+ /*
+ * Set the rule to be busy to make sure that no new packet can
+ * use this rule.
+ */
+ rule->ir_flags |= ILB_RULE_BUSY;
+
+ /* Now wait for all other guys to finish their work. */
+ while (rule->ir_refcnt > 2) {
+ if (cv_wait_sig(&rule->ir_cv, &rule->ir_lock) == 0) {
+ mutex_exit(&rule->ir_lock);
+ ret = EINTR;
+ goto end;
+ }
+ }
+ mutex_exit(&rule->ir_lock);
+
+ prev_server = NULL;
+ for (server = rule->ir_servers; server != NULL;
+ prev_server = server, server = server->iser_next) {
+ if (IN6_ARE_ADDR_EQUAL(&server->iser_addr_v6, addr))
+ break;
+ }
+ if (server == NULL) {
+ ret = ENOENT;
+ goto end;
+ }
+
+ /*
+ * Let the load balancing algorithm know about the removal.
+ * The algorithm may disallow the removal...
+ */
+ if ((ret = rule->ir_alg->ilb_alg_server_del(server,
+ rule->ir_alg->ilb_alg_data)) != 0) {
+ goto end;
+ }
+
+ if (prev_server == NULL)
+ rule->ir_servers = server->iser_next;
+ else
+ prev_server->iser_next = server->iser_next;
+
+ ILB_R_KSTAT_UPDATE(rule, num_servers, -1);
+
+ /*
+ * Mark the server as disabled so that if there is any sticky cache
+ * using this server around, it won't be used.
+ */
+ server->iser_enabled = B_FALSE;
+
+ mutex_enter(&server->iser_lock);
+
+ /*
+ * De-allocate the NAT source array. The indiviual ilb_nat_src_entry_t
+ * may not go away if there is still a conn using it. The NAT source
+ * timer will do the garbage collection.
+ */
+ ilb_destroy_nat_src(&server->iser_nat_src);
+
+ /* If there is a hard limit on when a server should die, set it. */
+ if (rule->ir_conn_drain_timeout != 0) {
+ (void) atomic_swap_64((uint64_t *)&server->iser_die_time,
+ lbolt64 + SEC_TO_TICK(rule->ir_conn_drain_timeout));
+ }
+
+ if (server->iser_refcnt > 1) {
+ (void) taskq_dispatch(ilbs->ilbs_rule_taskq, ilb_server_del_tq,
+ server, TQ_SLEEP);
+ mutex_exit(&server->iser_lock);
+ } else {
+ kstat_delete_netstack(server->iser_ksp, server->iser_stackid);
+ kmem_free(server, sizeof (ilb_server_t));
+ }
+
+end:
+ mutex_enter(&rule->ir_lock);
+ rule->ir_flags &= ~ILB_RULE_BUSY;
+ if (rule_name != NULL)
+ rule->ir_refcnt--;
+ cv_signal(&rule->ir_cv);
+ mutex_exit(&rule->ir_lock);
+ return (ret);
+}
+
+/*
+ * First check if the destination of the ICMP message matches a VIP of
+ * a rule. If it does not, just return ILB_PASSED.
+ *
+ * If the destination matches a VIP:
+ *
+ * For ICMP_ECHO_REQUEST, generate a response on behalf of the back end
+ * server.
+ *
+ * For ICMP_DEST_UNREACHABLE fragmentation needed, check inside the payload
+ * and see which back end server we should send this message to. And we
+ * need to do NAT on both the payload message and the outside IP packet.
+ *
+ * For other ICMP messages, drop them.
+ */
+/* ARGSUSED */
+static int
+ilb_icmp_v4(ilb_stack_t *ilbs, ill_t *ill, mblk_t *mp, ipha_t *ipha,
+ icmph_t *icmph, ipaddr_t *lb_dst)
+{
+ ipaddr_t vip;
+ ilb_rule_t *rule;
+ in6_addr_t addr6;
+
+ if (!ilb_rule_match_vip_v4(ilbs, ipha->ipha_dst, &rule))
+ return (ILB_PASSED);
+
+
+ if ((uint8_t *)icmph + sizeof (icmph_t) > mp->b_wptr) {
+ ILB_R_KSTAT(rule, icmp_dropped);
+ ILB_RULE_REFRELE(rule);
+ return (ILB_DROPPED);
+ }
+
+ switch (icmph->icmph_type) {
+ case ICMP_ECHO_REQUEST:
+ ILB_R_KSTAT(rule, icmp_echo_processed);
+ ILB_RULE_REFRELE(rule);
+
+ icmph->icmph_type = ICMP_ECHO_REPLY;
+ icmph->icmph_checksum = 0;
+ icmph->icmph_checksum = IP_CSUM(mp, IPH_HDR_LENGTH(ipha), 0);
+ ipha->ipha_ttl =
+ ilbs->ilbs_netstack->netstack_ip->ips_ip_def_ttl;
+ *lb_dst = ipha->ipha_src;
+ vip = ipha->ipha_dst;
+ ipha->ipha_dst = ipha->ipha_src;
+ ipha->ipha_src = vip;
+ return (ILB_BALANCED);
+ case ICMP_DEST_UNREACHABLE: {
+ int ret;
+
+ if (icmph->icmph_code != ICMP_FRAGMENTATION_NEEDED) {
+ ILB_R_KSTAT(rule, icmp_dropped);
+ ILB_RULE_REFRELE(rule);
+ return (ILB_DROPPED);
+ }
+ if (ilb_check_icmp_conn(ilbs, mp, IPPROTO_IP, ipha, icmph,
+ &addr6)) {
+ ILB_R_KSTAT(rule, icmp_2big_processed);
+ ret = ILB_BALANCED;
+ } else {
+ ILB_R_KSTAT(rule, icmp_2big_dropped);
+ ret = ILB_DROPPED;
+ }
+ ILB_RULE_REFRELE(rule);
+ IN6_V4MAPPED_TO_IPADDR(&addr6, *lb_dst);
+ return (ret);
+ }
+ default:
+ ILB_R_KSTAT(rule, icmp_dropped);
+ ILB_RULE_REFRELE(rule);
+ return (ILB_DROPPED);
+ }
+}
+
+/* ARGSUSED */
+static int
+ilb_icmp_v6(ilb_stack_t *ilbs, ill_t *ill, mblk_t *mp, ip6_t *ip6h,
+ icmp6_t *icmp6, in6_addr_t *lb_dst)
+{
+ ilb_rule_t *rule;
+
+ if (!ilb_rule_match_vip_v6(ilbs, &ip6h->ip6_dst, &rule))
+ return (ILB_PASSED);
+
+ if ((uint8_t *)icmp6 + sizeof (icmp6_t) > mp->b_wptr) {
+ ILB_R_KSTAT(rule, icmp_dropped);
+ ILB_RULE_REFRELE(rule);
+ return (ILB_DROPPED);
+ }
+
+ switch (icmp6->icmp6_type) {
+ case ICMP6_ECHO_REQUEST: {
+ int hdr_len;
+
+ ILB_R_KSTAT(rule, icmp_echo_processed);
+ ILB_RULE_REFRELE(rule);
+
+ icmp6->icmp6_type = ICMP6_ECHO_REPLY;
+ icmp6->icmp6_cksum = ip6h->ip6_plen;
+ hdr_len = (char *)icmp6 - (char *)ip6h;
+ icmp6->icmp6_cksum = IP_CSUM(mp, hdr_len,
+ ilb_pseudo_sum_v6(ip6h, IPPROTO_ICMPV6));
+ ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL;
+ ip6h->ip6_hops =
+ ilbs->ilbs_netstack->netstack_ip->ips_ipv6_def_hops;
+ *lb_dst = ip6h->ip6_src;
+ ip6h->ip6_src = ip6h->ip6_dst;
+ ip6h->ip6_dst = *lb_dst;
+ return (ILB_BALANCED);
+ }
+ case ICMP6_PACKET_TOO_BIG: {
+ int ret;
+
+ if (ilb_check_icmp_conn(ilbs, mp, IPPROTO_IPV6, ip6h, icmp6,
+ lb_dst)) {
+ ILB_R_KSTAT(rule, icmp_2big_processed);
+ ret = ILB_BALANCED;
+ } else {
+ ILB_R_KSTAT(rule, icmp_2big_dropped);
+ ret = ILB_DROPPED;
+ }
+ ILB_RULE_REFRELE(rule);
+ return (ret);
+ }
+ default:
+ ILB_R_KSTAT(rule, icmp_dropped);
+ ILB_RULE_REFRELE(rule);
+ return (ILB_DROPPED);
+ }
+}
+
+/*
+ * Common routine to check an incoming packet and decide what to do with it.
+ * called by ilb_check_v4|v6().
+ */
+static int
+ilb_check(ilb_stack_t *ilbs, ill_t *ill, mblk_t *mp, in6_addr_t *src,
+ in6_addr_t *dst, int l3, int l4, void *iph, uint8_t *tph, uint32_t pkt_len,
+ in6_addr_t *lb_dst)
+{
+ in_port_t sport, dport;
+ tcpha_t *tcph;
+ udpha_t *udph;
+ ilb_rule_t *rule;
+ ilb_server_t *server;
+ boolean_t balanced;
+ struct ilb_sticky_s *s = NULL;
+ int ret;
+ uint32_t ip_sum, tp_sum;
+ ilb_nat_info_t info;
+ uint16_t nat_src_idx;
+ boolean_t busy;
+
+ /*
+ * We don't really need to switch here since both protocols's
+ * ports are at the same offset. Just prepare for future protocol
+ * specific processing.
+ */
+ switch (l4) {
+ case IPPROTO_TCP:
+ if (tph + TCP_MIN_HEADER_LENGTH > mp->b_wptr)
+ return (ILB_DROPPED);
+ tcph = (tcpha_t *)tph;
+ sport = tcph->tha_lport;
+ dport = tcph->tha_fport;
+ break;
+ case IPPROTO_UDP:
+ if (tph + sizeof (udpha_t) > mp->b_wptr)
+ return (ILB_DROPPED);
+ udph = (udpha_t *)tph;
+ sport = udph->uha_src_port;
+ dport = udph->uha_dst_port;
+ break;
+ default:
+ return (ILB_PASSED);
+ }
+
+ /* Fast path, there is an existing conn. */
+ if (ilb_check_conn(ilbs, l3, iph, l4, tph, src, dst, sport, dport,
+ pkt_len, lb_dst)) {
+ return (ILB_BALANCED);
+ }
+
+ /*
+ * If there is no existing connection for the incoming packet, check
+ * to see if the packet matches a rule. If not, just let IP decide
+ * what to do with it.
+ *
+ * Note: a reply from back end server should not match a rule. A
+ * reply should match one existing conn.
+ */
+ rule = ilb_rule_hash(ilbs, l3, l4, dst, dport, ill->ill_zoneid,
+ pkt_len, &busy);
+ if (rule == NULL) {
+ /* If the rule is busy, just drop the packet. */
+ if (busy)
+ return (ILB_DROPPED);
+ else
+ return (ILB_PASSED);
+ }
+
+ /*
+ * The packet matches a rule, use the rule load balance algorithm
+ * to find a server.
+ */
+ balanced = rule->ir_alg->ilb_alg_lb(src, sport, dst, dport,
+ rule->ir_alg->ilb_alg_data, &server);
+ /*
+ * This can only happen if there is no server in a rule or all
+ * the servers are currently disabled.
+ */
+ if (!balanced)
+ goto no_server;
+
+ /*
+ * If the rule is sticky enabled, we need to check the sticky table.
+ * If there is a sticky entry for the client, use the previous server
+ * instead of the one found above (note that both can be the same).
+ * If there is no entry for that client, add an entry to the sticky
+ * table. Both the find and add are done in ilb_sticky_find_add()
+ * to avoid checking for duplicate when adding an entry.
+ */
+ if (rule->ir_flags & ILB_RULE_STICKY) {
+ in6_addr_t addr;
+
+ V6_MASK_COPY(*src, rule->ir_sticky_mask, addr);
+ if ((server = ilb_sticky_find_add(ilbs, rule, &addr, server,
+ &s, &nat_src_idx)) == NULL) {
+ ILB_R_KSTAT(rule, nomem_pkt_dropped);
+ ILB_R_KSTAT_UPDATE(rule, nomem_bytes_dropped, pkt_len);
+ goto no_server;
+ }
+ }
+
+ /*
+ * We are holding a reference on the rule, so the server
+ * cannot go away.
+ */
+ *lb_dst = server->iser_addr_v6;
+ ILB_S_KSTAT(server, pkt_processed);
+ ILB_S_KSTAT_UPDATE(server, bytes_processed, pkt_len);
+
+ switch (rule->ir_topo) {
+ case ILB_TOPO_IMPL_NAT: {
+ ilb_nat_src_entry_t *src_ent;
+ uint16_t *src_idx;
+
+ /*
+ * We create a cache even if it is not a SYN segment.
+ * The server should return a RST. When we see the
+ * RST, we will destroy this cache. But by having
+ * a cache, we know how to NAT the returned RST.
+ */
+ info.vip = *dst;
+ info.dport = dport;
+ info.src = *src;
+ info.sport = sport;
+
+ /* If stickiness is enabled, use the same source address */
+ if (s != NULL)
+ src_idx = &nat_src_idx;
+ else
+ src_idx = NULL;
+
+ if ((src_ent = ilb_alloc_nat_addr(server->iser_nat_src,
+ &info.nat_src, &info.nat_sport, src_idx)) == NULL) {
+ if (s != NULL)
+ ilb_sticky_refrele(s);
+ ILB_R_KSTAT(rule, pkt_dropped);
+ ILB_R_KSTAT_UPDATE(rule, bytes_dropped, pkt_len);
+ ILB_R_KSTAT(rule, noport_pkt_dropped);
+ ILB_R_KSTAT_UPDATE(rule, noport_bytes_dropped, pkt_len);
+ ret = ILB_DROPPED;
+ break;
+ }
+ info.src_ent = src_ent;
+ info.nat_dst = server->iser_addr_v6;
+ if (rule->ir_port_range && server->iser_port_range) {
+ info.nat_dport = htons(ntohs(dport) -
+ rule->ir_min_port + server->iser_min_port);
+ } else {
+ info.nat_dport = htons(server->iser_min_port);
+ }
+
+ /*
+ * If ilb_conn_add() fails, it will release the reference on
+ * sticky info and de-allocate the NAT source port allocated
+ * above.
+ */
+ if (ilb_conn_add(ilbs, rule, server, src, sport, dst,
+ dport, &info, &ip_sum, &tp_sum, s) != 0) {
+ ILB_R_KSTAT(rule, pkt_dropped);
+ ILB_R_KSTAT_UPDATE(rule, bytes_dropped, pkt_len);
+ ILB_R_KSTAT(rule, nomem_pkt_dropped);
+ ILB_R_KSTAT_UPDATE(rule, nomem_bytes_dropped, pkt_len);
+ ret = ILB_DROPPED;
+ break;
+ }
+ ilb_full_nat(l3, iph, l4, tph, &info, ip_sum, tp_sum, B_TRUE);
+ ret = ILB_BALANCED;
+ break;
+ }
+ case ILB_TOPO_IMPL_HALF_NAT:
+ info.vip = *dst;
+ info.nat_dst = server->iser_addr_v6;
+ info.dport = dport;
+ if (rule->ir_port_range && server->iser_port_range) {
+ info.nat_dport = htons(ntohs(dport) -
+ rule->ir_min_port + server->iser_min_port);
+ } else {
+ info.nat_dport = htons(server->iser_min_port);
+ }
+
+ if (ilb_conn_add(ilbs, rule, server, src, sport, dst,
+ dport, &info, &ip_sum, &tp_sum, s) != 0) {
+ ILB_R_KSTAT(rule, pkt_dropped);
+ ILB_R_KSTAT_UPDATE(rule, bytes_dropped, pkt_len);
+ ILB_R_KSTAT(rule, nomem_pkt_dropped);
+ ILB_R_KSTAT_UPDATE(rule, nomem_bytes_dropped, pkt_len);
+ ret = ILB_DROPPED;
+ break;
+ }
+ ilb_half_nat(l3, iph, l4, tph, &info, ip_sum, tp_sum, B_TRUE);
+
+ ret = ILB_BALANCED;
+ break;
+ case ILB_TOPO_IMPL_DSR:
+ /*
+ * By decrementing the sticky refcnt, the period of
+ * stickiness (life time of ilb_sticky_t) will be
+ * from now to (now + default expiry time).
+ */
+ if (s != NULL)
+ ilb_sticky_refrele(s);
+ ret = ILB_BALANCED;
+ break;
+ default:
+ cmn_err(CE_PANIC, "data corruption unknown topology: %p",
+ (void *) rule);
+ break;
+ }
+ ILB_RULE_REFRELE(rule);
+ return (ret);
+
+no_server:
+ /* This can only happen if there is no server available. */
+ ILB_R_KSTAT(rule, pkt_dropped);
+ ILB_R_KSTAT_UPDATE(rule, bytes_dropped, pkt_len);
+ ILB_RULE_REFRELE(rule);
+ return (ILB_DROPPED);
+}
+
+int
+ilb_check_v4(ilb_stack_t *ilbs, ill_t *ill, mblk_t *mp, ipha_t *ipha, int l4,
+ uint8_t *tph, ipaddr_t *lb_dst)
+{
+ in6_addr_t v6_src, v6_dst, v6_lb_dst;
+ int ret;
+
+ ASSERT(DB_REF(mp) == 1);
+
+ if (l4 == IPPROTO_ICMP) {
+ return (ilb_icmp_v4(ilbs, ill, mp, ipha, (icmph_t *)tph,
+ lb_dst));
+ }
+
+ IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &v6_src);
+ IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &v6_dst);
+ ret = ilb_check(ilbs, ill, mp, &v6_src, &v6_dst, IPPROTO_IP, l4, ipha,
+ tph, ntohs(ipha->ipha_length), &v6_lb_dst);
+ if (ret == ILB_BALANCED)
+ IN6_V4MAPPED_TO_IPADDR(&v6_lb_dst, *lb_dst);
+ return (ret);
+}
+
+int
+ilb_check_v6(ilb_stack_t *ilbs, ill_t *ill, mblk_t *mp, ip6_t *ip6h, int l4,
+ uint8_t *tph, in6_addr_t *lb_dst)
+{
+ uint32_t pkt_len;
+
+ ASSERT(DB_REF(mp) == 1);
+
+ if (l4 == IPPROTO_ICMPV6) {
+ return (ilb_icmp_v6(ilbs, ill, mp, ip6h, (icmp6_t *)tph,
+ lb_dst));
+ }
+
+ pkt_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
+ return (ilb_check(ilbs, ill, mp, &ip6h->ip6_src, &ip6h->ip6_dst,
+ IPPROTO_IPV6, l4, ip6h, tph, pkt_len, lb_dst));
+}
+
+void
+ilb_get_num_rules(ilb_stack_t *ilbs, zoneid_t zoneid, uint32_t *num_rules)
+{
+ ilb_rule_t *tmp_rule;
+
+ mutex_enter(&ilbs->ilbs_g_lock);
+ *num_rules = 0;
+ for (tmp_rule = ilbs->ilbs_rule_head; tmp_rule != NULL;
+ tmp_rule = tmp_rule->ir_next) {
+ if (tmp_rule->ir_zoneid == zoneid)
+ *num_rules += 1;
+ }
+ mutex_exit(&ilbs->ilbs_g_lock);
+}
+
+int
+ilb_get_num_servers(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name,
+ uint32_t *num_servers)
+{
+ ilb_rule_t *rule;
+ int err;
+
+ if ((rule = ilb_find_rule(ilbs, zoneid, name, &err)) == NULL)
+ return (err);
+ *num_servers = rule->ir_kstat.num_servers.value.ui64;
+ ILB_RULE_REFRELE(rule);
+ return (0);
+}
+
+int
+ilb_get_servers(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name,
+ ilb_server_info_t *servers, uint32_t *num_servers)
+{
+ ilb_rule_t *rule;
+ ilb_server_t *server;
+ size_t cnt;
+ int err;
+
+ if ((rule = ilb_find_rule(ilbs, zoneid, name, &err)) == NULL)
+ return (err);
+ for (server = rule->ir_servers, cnt = *num_servers;
+ server != NULL && cnt > 0;
+ server = server->iser_next, cnt--, servers++) {
+ (void) memcpy(servers->name, server->iser_name,
+ ILB_SERVER_NAMESZ);
+ servers->addr = server->iser_addr_v6;
+ servers->min_port = htons(server->iser_min_port);
+ servers->max_port = htons(server->iser_max_port);
+ servers->flags = server->iser_enabled ? ILB_SERVER_ENABLED : 0;
+ servers->err = 0;
+ }
+ ILB_RULE_REFRELE(rule);
+ *num_servers -= cnt;
+
+ return (0);
+}
+
+void
+ilb_get_rulenames(ilb_stack_t *ilbs, zoneid_t zoneid, uint32_t *num_names,
+ char *buf)
+{
+ ilb_rule_t *tmp_rule;
+ int cnt;
+
+ if (*num_names == 0)
+ return;
+
+ mutex_enter(&ilbs->ilbs_g_lock);
+ for (cnt = 0, tmp_rule = ilbs->ilbs_rule_head; tmp_rule != NULL;
+ tmp_rule = tmp_rule->ir_next) {
+ if (tmp_rule->ir_zoneid != zoneid)
+ continue;
+
+ (void) memcpy(buf, tmp_rule->ir_name, ILB_RULE_NAMESZ);
+ buf += ILB_RULE_NAMESZ;
+ if (++cnt == *num_names)
+ break;
+ }
+ mutex_exit(&ilbs->ilbs_g_lock);
+ *num_names = cnt;
+}
+
+int
+ilb_rule_list(ilb_stack_t *ilbs, zoneid_t zoneid, ilb_rule_cmd_t *cmd)
+{
+ ilb_rule_t *rule;
+ int err;
+
+ if ((rule = ilb_find_rule(ilbs, zoneid, cmd->name, &err)) == NULL) {
+ return (err);
+ }
+
+ /*
+ * Except the enabled flags, none of the following will change
+ * in the life time of a rule. So we don't hold the mutex when
+ * reading them. The worst is to report a wrong enabled flags.
+ */
+ cmd->ip_ver = rule->ir_ipver;
+ cmd->proto = rule->ir_proto;
+ cmd->min_port = htons(rule->ir_min_port);
+ cmd->max_port = htons(rule->ir_max_port);
+
+ cmd->vip = rule->ir_target_v6;
+ cmd->algo = rule->ir_alg_type;
+ cmd->topo = rule->ir_topo;
+
+ cmd->nat_src_start = rule->ir_nat_src_start;
+ cmd->nat_src_end = rule->ir_nat_src_end;
+
+ cmd->conn_drain_timeout = rule->ir_conn_drain_timeout;
+ cmd->nat_expiry = rule->ir_nat_expiry;
+ cmd->sticky_expiry = rule->ir_sticky_expiry;
+
+ cmd->flags = 0;
+ if (rule->ir_flags & ILB_RULE_ENABLED)
+ cmd->flags |= ILB_RULE_ENABLED;
+ if (rule->ir_flags & ILB_RULE_STICKY) {
+ cmd->flags |= ILB_RULE_STICKY;
+ cmd->sticky_mask = rule->ir_sticky_mask;
+ }
+
+ ILB_RULE_REFRELE(rule);
+ return (0);
+}
+
+static void *
+ilb_stack_init(netstackid_t stackid, netstack_t *ns)
+{
+ ilb_stack_t *ilbs;
+ char tq_name[TASKQ_NAMELEN];
+
+ ilbs = kmem_alloc(sizeof (ilb_stack_t), KM_SLEEP);
+ ilbs->ilbs_netstack = ns;
+
+ ilbs->ilbs_rule_head = NULL;
+ ilbs->ilbs_g_hash = NULL;
+ mutex_init(&ilbs->ilbs_g_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ ilbs->ilbs_kstat = kmem_alloc(sizeof (ilb_g_kstat_t), KM_SLEEP);
+ if ((ilbs->ilbs_ksp = ilb_kstat_g_init(stackid, ilbs)) == NULL) {
+ kmem_free(ilbs, sizeof (ilb_stack_t));
+ return (NULL);
+ }
+
+ /*
+ * ilbs_conn/sticky_hash related info is initialized in
+ * ilb_conn/sticky_hash_init().
+ */
+ ilbs->ilbs_conn_taskq = NULL;
+ ilbs->ilbs_rule_hash_size = ilb_rule_hash_size;
+ ilbs->ilbs_conn_hash_size = ilb_conn_hash_size;
+ ilbs->ilbs_c2s_conn_hash = NULL;
+ ilbs->ilbs_s2c_conn_hash = NULL;
+ ilbs->ilbs_conn_timer_list = NULL;
+
+ ilbs->ilbs_sticky_hash = NULL;
+ ilbs->ilbs_sticky_hash_size = ilb_sticky_hash_size;
+ ilbs->ilbs_sticky_timer_list = NULL;
+ ilbs->ilbs_sticky_taskq = NULL;
+
+ /* The allocation is done later when there is a rule using NAT mode. */
+ ilbs->ilbs_nat_src = NULL;
+ ilbs->ilbs_nat_src_hash_size = ilb_nat_src_hash_size;
+ mutex_init(&ilbs->ilbs_nat_src_lock, NULL, MUTEX_DEFAULT, NULL);
+ ilbs->ilbs_nat_src_tid = 0;
+
+ /* For listing the conn hash table */
+ mutex_init(&ilbs->ilbs_conn_list_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&ilbs->ilbs_conn_list_cv, NULL, CV_DEFAULT, NULL);
+ ilbs->ilbs_conn_list_busy = B_FALSE;
+ ilbs->ilbs_conn_list_cur = 0;
+ ilbs->ilbs_conn_list_connp = NULL;
+
+ /* For listing the sticky hash table */
+ mutex_init(&ilbs->ilbs_sticky_list_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&ilbs->ilbs_sticky_list_cv, NULL, CV_DEFAULT, NULL);
+ ilbs->ilbs_sticky_list_busy = B_FALSE;
+ ilbs->ilbs_sticky_list_cur = 0;
+ ilbs->ilbs_sticky_list_curp = NULL;
+
+ (void) snprintf(tq_name, sizeof (tq_name), "ilb_rule_taskq_%p", ns);
+ ilbs->ilbs_rule_taskq = taskq_create(tq_name, ILB_RULE_TASKQ_NUM_THR,
+ minclsyspri, 1, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
+
+ return (ilbs);
+}
+
+/* ARGSUSED */
+static void
+ilb_stack_shutdown(netstackid_t stackid, void *arg)
+{
+ ilb_stack_t *ilbs = (ilb_stack_t *)arg;
+ ilb_rule_t *tmp_rule;
+
+ ilb_sticky_hash_fini(ilbs);
+ ilb_conn_hash_fini(ilbs);
+ mutex_enter(&ilbs->ilbs_g_lock);
+ while ((tmp_rule = ilbs->ilbs_rule_head) != NULL) {
+ ilb_rule_hash_del(tmp_rule);
+ ilb_rule_g_del(ilbs, tmp_rule);
+ mutex_exit(&ilbs->ilbs_g_lock);
+ ilb_rule_del_common(ilbs, tmp_rule);
+ mutex_enter(&ilbs->ilbs_g_lock);
+ }
+ mutex_exit(&ilbs->ilbs_g_lock);
+ if (ilbs->ilbs_nat_src != NULL)
+ ilb_nat_src_fini(ilbs);
+}
+
+static void
+ilb_stack_fini(netstackid_t stackid, void * arg)
+{
+ ilb_stack_t *ilbs = (ilb_stack_t *)arg;
+
+ ilb_rule_hash_fini(ilbs);
+ taskq_destroy(ilbs->ilbs_rule_taskq);
+ ilb_kstat_g_fini(stackid, ilbs);
+ kmem_free(ilbs->ilbs_kstat, sizeof (ilb_g_kstat_t));
+ kmem_free(ilbs, sizeof (ilb_stack_t));
+}
+
+void
+ilb_ddi_g_init(void)
+{
+ netstack_register(NS_ILB, ilb_stack_init, ilb_stack_shutdown,
+ ilb_stack_fini);
+}
+
+void
+ilb_ddi_g_destroy(void)
+{
+ netstack_unregister(NS_ILB);
+ ilb_conn_cache_fini();
+ ilb_sticky_cache_fini();
+}
diff --git a/usr/src/uts/common/inet/ilb/ilb_alg.h b/usr/src/uts/common/inet/ilb/ilb_alg.h
new file mode 100644
index 0000000000..5e94b8b666
--- /dev/null
+++ b/usr/src/uts/common/inet/ilb/ilb_alg.h
@@ -0,0 +1,44 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _INET_ILB_ALG_H
+#define _INET_ILB_ALG_H
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Load balance algorithms initialization routines. */
+ilb_alg_data_t *ilb_alg_rr_init(ilb_rule_t *, void *);
+ilb_alg_data_t *ilb_alg_hash_init(ilb_rule_t *, const void *);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _INET_ILB_ALG_H */
diff --git a/usr/src/uts/common/inet/ilb/ilb_alg_hash.c b/usr/src/uts/common/inet/ilb/ilb_alg_hash.c
new file mode 100644
index 0000000000..94140b7d8e
--- /dev/null
+++ b/usr/src/uts/common/inet/ilb/ilb_alg_hash.c
@@ -0,0 +1,431 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/cmn_err.h>
+#include <netinet/in.h>
+#include <inet/ip.h>
+#include <inet/ip6.h>
+#include <sys/crc32.h>
+
+#include <inet/ilb.h>
+#include "ilb_impl.h"
+#include "ilb_alg.h"
+
+#define HASH_IP_V4(hash, addr, size) \
+{ \
+ CRC32((hash), &(addr), sizeof (in_addr_t), -1U, crc32_table); \
+ (hash) %= (size); \
+}
+#define HASH_IP_V6(hash, addr, size) \
+ HASH_IP_V4((hash), (addr)->s6_addr32[3], (size))
+
+#define HASH_IP_PORT_V4(hash, addr, port, size) \
+{ \
+ uint32_t val = (addr) ^ ((port) << 16) ^ (port); \
+ CRC32((hash), &val, sizeof (uint32_t), -1U, crc32_table); \
+ (hash) %= (size); \
+}
+#define HASH_IP_PORT_V6(hash, addr, port, size) \
+ HASH_IP_PORT_V4((hash), (addr)->s6_addr32[3], (port), (size))
+
+#define HASH_IP_VIP_V4(hash, saddr, daddr, size) \
+{ \
+ uint32_t val = (saddr) ^ (daddr); \
+ CRC32((hash), &val, sizeof (uint32_t), -1U, crc32_table); \
+ (hash) %= (size); \
+}
+#define HASH_IP_VIP_V6(hash, saddr, daddr, size) \
+ HASH_IP_VIP_V4((hash), (saddr)->s6_addr32[3], (daddr)->s6_addr32[3], \
+ (size))
+
+#define INIT_HASH_TBL_SIZE 10
+
+typedef struct {
+ ilb_server_t *server;
+ boolean_t enabled;
+} hash_server_t;
+
+/*
+ * There are two hash tables. The hash_tbl holds all servers, both enabled
+ * and disabled. The hash_enabled_tbl only holds enabled servers. Having
+ * two tables allows the hash on a client request remains the same even when
+ * some servers are disabled. If a server is disabled and a client's request
+ * hashes to it, we will do another hash. This time the has is on the enabled
+ * server table.
+ */
+typedef struct hash_s {
+ kmutex_t hash_lock;
+ size_t hash_servers; /* Total # of servers */
+ size_t hash_tbl_size; /* All server table size */
+ size_t hash_enabled_servers; /* # of enabled servers */
+ size_t hash_enabled_tbl_size; /* Enabled server table size */
+ hash_server_t *hash_tbl;
+ hash_server_t *hash_enabled_tbl;
+ ilb_algo_impl_t hash_type;
+} hash_t;
+
+static void hash_fini(ilb_alg_data_t **);
+
+/* ARGSUSED */
+static boolean_t
+hash_lb(in6_addr_t *saddr, in_port_t sport, in6_addr_t *daddr,
+ in_port_t dport, void *alg_data, ilb_server_t **ret_server)
+{
+ hash_t *hash_alg = (hash_t *)alg_data;
+ uint32_t i;
+
+ ASSERT(ret_server != NULL);
+ *ret_server = NULL;
+
+ mutex_enter(&hash_alg->hash_lock);
+
+ if (hash_alg->hash_servers == 0) {
+ mutex_exit(&hash_alg->hash_lock);
+ return (B_FALSE);
+ }
+
+ switch (hash_alg->hash_type) {
+ case ILB_ALG_IMPL_HASH_IP:
+ HASH_IP_V6(i, saddr, hash_alg->hash_servers);
+ break;
+ case ILB_ALG_IMPL_HASH_IP_SPORT:
+ HASH_IP_PORT_V6(i, saddr, sport, hash_alg->hash_servers);
+ break;
+ case ILB_ALG_IMPL_HASH_IP_VIP:
+ HASH_IP_VIP_V6(i, saddr, daddr, hash_alg->hash_servers);
+ break;
+ default:
+ mutex_exit(&hash_alg->hash_lock);
+ return (B_FALSE);
+ }
+ if (hash_alg->hash_tbl[i].enabled) {
+ *ret_server = hash_alg->hash_tbl[i].server;
+ mutex_exit(&hash_alg->hash_lock);
+ return (B_TRUE);
+ }
+
+ if (hash_alg->hash_enabled_servers == 0) {
+ mutex_exit(&hash_alg->hash_lock);
+ return (B_FALSE);
+ }
+
+ switch (hash_alg->hash_type) {
+ case ILB_ALG_IMPL_HASH_IP:
+ HASH_IP_V6(i, saddr, hash_alg->hash_enabled_servers);
+ break;
+ case ILB_ALG_IMPL_HASH_IP_SPORT:
+ HASH_IP_PORT_V6(i, saddr, sport,
+ hash_alg->hash_enabled_servers);
+ break;
+ case ILB_ALG_IMPL_HASH_IP_VIP:
+ HASH_IP_VIP_V6(i, saddr, daddr,
+ hash_alg->hash_enabled_servers);
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+ *ret_server = hash_alg->hash_enabled_tbl[i].server;
+ mutex_exit(&hash_alg->hash_lock);
+ return (B_TRUE);
+}
+
+static boolean_t
+del_server(hash_server_t *tbl, size_t hash_size, ilb_server_t *host)
+{
+ size_t i, j;
+
+ for (i = 0; i < hash_size; i++) {
+ if (tbl[i].server == host) {
+ if (i == hash_size - 1)
+ break;
+ for (j = i; j < hash_size - 1; j++)
+ tbl[j] = tbl[j + 1];
+ break;
+ }
+ }
+ /* Not found... */
+ if (i == hash_size)
+ return (B_FALSE);
+ tbl[hash_size - 1].server = NULL;
+ tbl[hash_size - 1].enabled = B_FALSE;
+ return (B_TRUE);
+}
+
+static int
+hash_server_del(ilb_server_t *host, void *alg_data)
+{
+ hash_t *hash_alg = (hash_t *)alg_data;
+ boolean_t ret;
+
+ mutex_enter(&hash_alg->hash_lock);
+
+ ret = del_server(hash_alg->hash_tbl, hash_alg->hash_servers, host);
+ if (!ret) {
+ mutex_exit(&hash_alg->hash_lock);
+ return (EINVAL);
+ }
+ hash_alg->hash_servers--;
+
+ /* The server may not be enabled. */
+ ret = del_server(hash_alg->hash_enabled_tbl,
+ hash_alg->hash_enabled_servers, host);
+ if (ret)
+ hash_alg->hash_enabled_servers--;
+
+ mutex_exit(&hash_alg->hash_lock);
+ ILB_SERVER_REFRELE(host);
+ return (0);
+}
+
+static int
+grow_tbl(hash_server_t **hash_tbl, size_t *tbl_size)
+{
+ size_t mem_size;
+ hash_server_t *new_tbl;
+
+ if ((new_tbl = kmem_zalloc(sizeof (hash_server_t) *
+ (*tbl_size + INIT_HASH_TBL_SIZE), KM_NOSLEEP)) == NULL) {
+ return (ENOMEM);
+ }
+ mem_size = *tbl_size * sizeof (hash_server_t);
+ bcopy(*hash_tbl, new_tbl, mem_size);
+ kmem_free(*hash_tbl, mem_size);
+ *hash_tbl = new_tbl;
+ *tbl_size += INIT_HASH_TBL_SIZE;
+ return (0);
+}
+
+static int
+hash_server_add(ilb_server_t *host, void *alg_data)
+{
+ hash_t *hash_alg = (hash_t *)alg_data;
+ size_t new_size;
+
+ mutex_enter(&hash_alg->hash_lock);
+
+ /* First add the server to the hash_tbl. */
+ new_size = hash_alg->hash_servers + 1;
+ if (new_size > hash_alg->hash_tbl_size) {
+ if (grow_tbl(&hash_alg->hash_tbl, &hash_alg->hash_tbl_size) !=
+ 0) {
+ mutex_exit(&hash_alg->hash_lock);
+ return (ENOMEM);
+ }
+ }
+
+ hash_alg->hash_tbl[hash_alg->hash_servers].server = host;
+ hash_alg->hash_tbl[hash_alg->hash_servers].enabled = host->iser_enabled;
+ hash_alg->hash_servers++;
+
+ if (!host->iser_enabled) {
+ mutex_exit(&hash_alg->hash_lock);
+ ILB_SERVER_REFHOLD(host);
+ return (0);
+ }
+
+ /* If the server is enabled, add it to the hasn_enabled_tbl. */
+ new_size = hash_alg->hash_enabled_servers + 1;
+ if (new_size > hash_alg->hash_enabled_tbl_size) {
+ if (grow_tbl(&hash_alg->hash_enabled_tbl,
+ &hash_alg->hash_enabled_tbl_size) != 0) {
+ mutex_exit(&hash_alg->hash_lock);
+ return (ENOMEM);
+ }
+ }
+ hash_alg->hash_enabled_tbl[hash_alg->hash_enabled_servers].server =
+ host;
+ hash_alg->hash_enabled_tbl[hash_alg->hash_enabled_servers].enabled =
+ B_TRUE;
+ hash_alg->hash_enabled_servers++;
+
+ mutex_exit(&hash_alg->hash_lock);
+ ILB_SERVER_REFHOLD(host);
+ return (0);
+}
+
+static int
+hash_server_enable(ilb_server_t *host, void *alg_data)
+{
+ hash_t *alg = (hash_t *)alg_data;
+ size_t new_size, i;
+
+ mutex_enter(&alg->hash_lock);
+
+ for (i = 0; i < alg->hash_servers; i++) {
+ if (alg->hash_tbl[i].server == host) {
+ if (alg->hash_tbl[i].enabled) {
+ mutex_exit(&alg->hash_lock);
+ return (0);
+ } else {
+ break;
+ }
+ }
+ }
+ if (i == alg->hash_servers) {
+ mutex_exit(&alg->hash_lock);
+ return (EINVAL);
+ }
+
+#if DEBUG
+ /* The server should not be in the enabled tabled. */
+ {
+ size_t j;
+
+ for (j = 0; j < alg->hash_enabled_servers; j++) {
+ if (alg->hash_enabled_tbl[j].server == host) {
+ cmn_err(CE_PANIC, "Corrupted ILB enabled hash "
+ "table");
+ }
+ }
+ }
+#endif
+
+ new_size = alg->hash_enabled_servers + 1;
+ if (new_size > alg->hash_enabled_tbl_size) {
+ if (grow_tbl(&alg->hash_enabled_tbl,
+ &alg->hash_enabled_tbl_size) != 0) {
+ mutex_exit(&alg->hash_lock);
+ return (ENOMEM);
+ }
+ }
+ alg->hash_tbl[i].enabled = B_TRUE;
+ alg->hash_enabled_tbl[alg->hash_enabled_servers].server = host;
+ alg->hash_enabled_tbl[alg->hash_enabled_servers].enabled = B_TRUE;
+ alg->hash_enabled_servers++;
+
+ mutex_exit(&alg->hash_lock);
+ return (0);
+}
+
+static int
+hash_server_disable(ilb_server_t *host, void *alg_data)
+{
+ hash_t *alg = (hash_t *)alg_data;
+ size_t i;
+
+ mutex_enter(&alg->hash_lock);
+
+ for (i = 0; i < alg->hash_servers; i++) {
+ if (alg->hash_tbl[i].server == host) {
+ if (!alg->hash_tbl[i].enabled) {
+ mutex_exit(&alg->hash_lock);
+ return (0);
+ } else {
+ break;
+ }
+ }
+ }
+ if (i == alg->hash_servers) {
+ mutex_exit(&alg->hash_lock);
+ return (EINVAL);
+ }
+
+ alg->hash_tbl[i].enabled = B_FALSE;
+#if DEBUG
+ ASSERT(del_server(alg->hash_enabled_tbl, alg->hash_enabled_servers,
+ host));
+#else
+ (void) del_server(alg->hash_enabled_tbl, alg->hash_enabled_servers,
+ host);
+#endif
+ alg->hash_enabled_servers--;
+
+ mutex_exit(&alg->hash_lock);
+ return (0);
+}
+
+/* ARGSUSED */
+ilb_alg_data_t *
+ilb_alg_hash_init(ilb_rule_t *rule, const void *arg)
+{
+ ilb_alg_data_t *alg;
+ hash_t *hash_alg;
+ int flags = *(int *)arg;
+
+ if ((alg = kmem_alloc(sizeof (ilb_alg_data_t), KM_NOSLEEP)) == NULL)
+ return (NULL);
+ if ((hash_alg = kmem_alloc(sizeof (hash_t), KM_NOSLEEP)) == NULL) {
+ kmem_free(alg, sizeof (ilb_alg_data_t));
+ return (NULL);
+ }
+ alg->ilb_alg_lb = hash_lb;
+ alg->ilb_alg_server_del = hash_server_del;
+ alg->ilb_alg_server_add = hash_server_add;
+ alg->ilb_alg_server_enable = hash_server_enable;
+ alg->ilb_alg_server_disable = hash_server_disable;
+ alg->ilb_alg_fini = hash_fini;
+ alg->ilb_alg_data = hash_alg;
+
+ mutex_init(&hash_alg->hash_lock, NULL, MUTEX_DEFAULT, NULL);
+ hash_alg->hash_type = flags;
+
+ /* Table of all servers */
+ hash_alg->hash_servers = 0;
+ hash_alg->hash_tbl_size = INIT_HASH_TBL_SIZE;
+ hash_alg->hash_tbl = kmem_zalloc(sizeof (hash_server_t) *
+ INIT_HASH_TBL_SIZE, KM_NOSLEEP);
+ if (hash_alg->hash_tbl == NULL) {
+ kmem_free(hash_alg, sizeof (hash_t));
+ kmem_free(alg, sizeof (ilb_alg_data_t));
+ return (NULL);
+ }
+
+ /* Table of only enabled servers */
+ hash_alg->hash_enabled_servers = 0;
+ hash_alg->hash_enabled_tbl_size = INIT_HASH_TBL_SIZE;
+ hash_alg->hash_enabled_tbl = kmem_zalloc(sizeof (hash_server_t) *
+ INIT_HASH_TBL_SIZE, KM_NOSLEEP);
+ if (hash_alg->hash_tbl == NULL) {
+ kmem_free(hash_alg->hash_tbl, INIT_HASH_TBL_SIZE *
+ sizeof (ilb_server_t *));
+ kmem_free(hash_alg, sizeof (hash_t));
+ kmem_free(alg, sizeof (ilb_alg_data_t));
+ return (NULL);
+ }
+
+ return (alg);
+}
+
+static void
+hash_fini(ilb_alg_data_t **alg)
+{
+ hash_t *hash_alg;
+ int i;
+
+ hash_alg = (*alg)->ilb_alg_data;
+ for (i = 0; i < hash_alg->hash_servers; i++)
+ ILB_SERVER_REFRELE(hash_alg->hash_tbl[i].server);
+
+ kmem_free(hash_alg->hash_tbl, sizeof (hash_server_t) *
+ hash_alg->hash_tbl_size);
+ kmem_free(hash_alg->hash_enabled_tbl, sizeof (hash_server_t) *
+ hash_alg->hash_enabled_tbl_size);
+ kmem_free(hash_alg, sizeof (hash_t));
+ kmem_free(*alg, sizeof (ilb_alg_data_t));
+ *alg = NULL;
+}
diff --git a/usr/src/uts/common/inet/ilb/ilb_alg_rr.c b/usr/src/uts/common/inet/ilb/ilb_alg_rr.c
new file mode 100644
index 0000000000..e88712e19f
--- /dev/null
+++ b/usr/src/uts/common/inet/ilb/ilb_alg_rr.c
@@ -0,0 +1,232 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/errno.h>
+#include <sys/sysmacros.h>
+#include <sys/list.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <inet/ilb.h>
+#include "ilb_impl.h"
+#include "ilb_alg.h"
+
+typedef struct {
+ ilb_server_t *server;
+ boolean_t enabled;
+ list_node_t list;
+} rr_server_t;
+
+typedef struct rr_s {
+ kmutex_t rr_lock;
+ list_t rr_servers;
+ rr_server_t *rr_next;
+} rr_t;
+
+static void rr_fini(ilb_alg_data_t **);
+
+/* ARGSUSED */
+static boolean_t
+rr_lb(in6_addr_t *saddr, in_port_t sport, in6_addr_t *daddr,
+ in_port_t dport, void *alg_data, ilb_server_t **ret_server)
+{
+ rr_t *rr_alg = (rr_t *)alg_data;
+ list_t *servers;
+ rr_server_t *start;
+
+ ASSERT(ret_server != NULL);
+ *ret_server = NULL;
+
+ mutex_enter(&rr_alg->rr_lock);
+ servers = &rr_alg->rr_servers;
+ if (list_is_empty(servers)) {
+ mutex_exit(&rr_alg->rr_lock);
+ return (B_FALSE);
+ }
+ if (rr_alg->rr_next == NULL)
+ rr_alg->rr_next = list_head(servers);
+ start = rr_alg->rr_next;
+ while (!rr_alg->rr_next->enabled) {
+ rr_alg->rr_next = list_next(servers, rr_alg->rr_next);
+ if (rr_alg->rr_next == NULL)
+ rr_alg->rr_next = list_head(servers);
+ if (rr_alg->rr_next == start) {
+ mutex_exit(&rr_alg->rr_lock);
+ return (B_FALSE);
+ }
+ }
+
+ *ret_server = rr_alg->rr_next->server;
+ rr_alg->rr_next = list_next(servers, rr_alg->rr_next);
+ mutex_exit(&rr_alg->rr_lock);
+ return (B_TRUE);
+}
+
+static int
+rr_server_del(ilb_server_t *host, void *alg_data)
+{
+ rr_t *rr_alg = (rr_t *)alg_data;
+ list_t *servers = &rr_alg->rr_servers;
+ rr_server_t *tmp_server;
+
+ mutex_enter(&rr_alg->rr_lock);
+ for (tmp_server = list_head(servers); tmp_server != NULL;
+ tmp_server = list_next(servers, tmp_server)) {
+ if (tmp_server->server == host) {
+ if (rr_alg->rr_next == tmp_server) {
+ rr_alg->rr_next = list_next(servers,
+ tmp_server);
+ }
+ list_remove(servers, tmp_server);
+ break;
+ }
+ }
+ mutex_exit(&rr_alg->rr_lock);
+ if (tmp_server == NULL)
+ return (EINVAL);
+ kmem_free(tmp_server, sizeof (rr_server_t));
+
+ ILB_SERVER_REFRELE(host);
+ return (0);
+}
+
+static int
+rr_server_add(ilb_server_t *host, void *alg_data)
+{
+ rr_t *rr_alg = (rr_t *)alg_data;
+ rr_server_t *new_server;
+
+ new_server = kmem_alloc(sizeof (rr_server_t), KM_NOSLEEP);
+ if (new_server == NULL)
+ return (ENOMEM);
+ new_server->server = host;
+ new_server->enabled = host->iser_enabled;
+
+ mutex_enter(&rr_alg->rr_lock);
+ list_insert_head(&rr_alg->rr_servers, new_server);
+ mutex_exit(&rr_alg->rr_lock);
+
+ ILB_SERVER_REFHOLD(host);
+ return (0);
+}
+
+static int
+rr_server_toggle(list_t *servers, ilb_server_t *host, boolean_t value)
+{
+ rr_server_t *tmp_server;
+
+ if (list_is_empty(servers))
+ return (EINVAL);
+
+ for (tmp_server = list_head(servers); tmp_server != NULL;
+ tmp_server = list_next(servers, tmp_server)) {
+ if (tmp_server->server == host) {
+ tmp_server->enabled = value;
+ break;
+ }
+ }
+ if (tmp_server != NULL)
+ return (0);
+ else
+ return (EINVAL);
+}
+
+static int
+rr_server_enable(ilb_server_t *host, void *alg_data)
+{
+ rr_t *rr_alg = (rr_t *)alg_data;
+ list_t *servers;
+ int ret;
+
+ mutex_enter(&rr_alg->rr_lock);
+ servers = &rr_alg->rr_servers;
+ ret = rr_server_toggle(servers, host, B_TRUE);
+ mutex_exit(&rr_alg->rr_lock);
+ return (ret);
+}
+
+static int
+rr_server_disable(ilb_server_t *host, void *alg_data)
+{
+ rr_t *rr_alg = (rr_t *)alg_data;
+ list_t *servers;
+ int ret;
+
+ mutex_enter(&rr_alg->rr_lock);
+ servers = &rr_alg->rr_servers;
+ ret = rr_server_toggle(servers, host, B_FALSE);
+ mutex_exit(&rr_alg->rr_lock);
+ return (ret);
+}
+
+/* ARGSUSED */
+ilb_alg_data_t *
+ilb_alg_rr_init(ilb_rule_t *rule, void *arg)
+{
+ ilb_alg_data_t *alg;
+ rr_t *rr_alg;
+
+ if ((alg = kmem_alloc(sizeof (ilb_alg_data_t), KM_NOSLEEP)) == NULL)
+ return (NULL);
+ if ((rr_alg = kmem_alloc(sizeof (rr_t), KM_NOSLEEP)) == NULL) {
+ kmem_free(alg, sizeof (ilb_alg_data_t));
+ return (NULL);
+ }
+
+ alg->ilb_alg_lb = rr_lb;
+ alg->ilb_alg_server_del = rr_server_del;
+ alg->ilb_alg_server_add = rr_server_add;
+ alg->ilb_alg_server_enable = rr_server_enable;
+ alg->ilb_alg_server_disable = rr_server_disable;
+ alg->ilb_alg_fini = rr_fini;
+ alg->ilb_alg_data = rr_alg;
+
+ mutex_init(&rr_alg->rr_lock, NULL, MUTEX_DEFAULT, NULL);
+ list_create(&rr_alg->rr_servers, sizeof (rr_server_t),
+ offsetof(rr_server_t, list));
+ rr_alg->rr_next = NULL;
+
+ return (alg);
+}
+
+static void
+rr_fini(ilb_alg_data_t **alg)
+{
+ rr_t *rr_alg;
+ rr_server_t *tmp_server;
+ list_t *servers;
+
+ rr_alg = (*alg)->ilb_alg_data;
+ servers = &rr_alg->rr_servers;
+ while ((tmp_server = list_head(servers)) != NULL) {
+ list_remove(servers, tmp_server);
+ ILB_SERVER_REFRELE(tmp_server->server);
+ kmem_free(tmp_server, sizeof (rr_server_t));
+ }
+ list_destroy(servers);
+ kmem_free(rr_alg, sizeof (rr_t));
+ kmem_free(*alg, sizeof (ilb_alg_data_t));
+ *alg = NULL;
+}
diff --git a/usr/src/uts/common/inet/ilb/ilb_conn.c b/usr/src/uts/common/inet/ilb/ilb_conn.c
new file mode 100644
index 0000000000..d4e88260cf
--- /dev/null
+++ b/usr/src/uts/common/inet/ilb/ilb_conn.c
@@ -0,0 +1,1531 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/time.h>
+#include <sys/taskq.h>
+#include <sys/cmn_err.h>
+#include <sys/sdt.h>
+#include <sys/atomic.h>
+#include <netinet/in.h>
+#include <inet/ip.h>
+#include <inet/ip6.h>
+#include <inet/tcp.h>
+#include <inet/udp_impl.h>
+#include <inet/ilb.h>
+
+#include "ilb_stack.h"
+#include "ilb_impl.h"
+#include "ilb_conn.h"
+#include "ilb_nat.h"
+
+/*
+ * Timer struct for ilb_conn_t and ilb_sticky_t garbage collection
+ *
+ * start: starting index into the hash table to do gc
+ * end: ending index into the hash table to do gc
+ * ilbs: pointer to the ilb_stack_t of the IP stack
+ * tid_lock: mutex to protect the timer id.
+ * tid: timer id of the timer
+ */
+typedef struct ilb_timer_s {
+ uint32_t start;
+ uint32_t end;
+ ilb_stack_t *ilbs;
+ kmutex_t tid_lock;
+ timeout_id_t tid;
+} ilb_timer_t;
+
+/* Hash macro for finding the index to the conn hash table */
+#define ILB_CONN_HASH(saddr, sport, daddr, dport, hash_size) \
+ (((*((saddr) + 3) ^ *((daddr) + 3)) * 50653 + \
+ (*((saddr) + 2) ^ *((daddr) + 2)) * 1369 + \
+ (*((saddr) + 1) ^ *((daddr) + 1)) * 37 + \
+ (*(saddr) ^ *(daddr)) + (sport) * 37 + (dport)) & \
+ ((hash_size) - 1))
+
+/* Kmem cache for the conn hash entry */
+static struct kmem_cache *ilb_conn_cache = NULL;
+
+/*
+ * There are 60 timers running to do conn cache garbage collection. Each
+ * gc thread is responsible for 1/60 of the conn hash table.
+ */
+static int ilb_conn_timer_size = 60;
+
+/* Each of the above gc timers wake up every 15s to do the gc. */
+static int ilb_conn_cache_timeout = 15;
+
+#define ILB_STICKY_HASH(saddr, rule, hash_size) \
+ (((*((saddr) + 3) ^ ((rule) >> 24)) * 29791 + \
+ (*((saddr) + 2) ^ ((rule) >> 16)) * 961 + \
+ (*((saddr) + 1) ^ ((rule) >> 8)) * 31 + \
+ (*(saddr) ^ (rule))) & ((hash_size) - 1))
+
+static struct kmem_cache *ilb_sticky_cache = NULL;
+
+/*
+ * There are 60 timers running to do sticky cache garbage collection. Each
+ * gc thread is responsible for 1/60 of the sticky hash table.
+ */
+static int ilb_sticky_timer_size = 60;
+
+/* Each of the above gc timers wake up every 15s to do the gc. */
+static int ilb_sticky_timeout = 15;
+
+#define ILB_STICKY_REFRELE(s) \
+{ \
+ mutex_enter(&(s)->hash->sticky_lock); \
+ (s)->refcnt--; \
+ (s)->atime = lbolt64; \
+ mutex_exit(&s->hash->sticky_lock); \
+}
+
+
+static void
+ilb_conn_cache_init(void)
+{
+ ilb_conn_cache = kmem_cache_create("ilb_conn_cache",
+ sizeof (ilb_conn_t), 0, NULL, NULL, NULL, NULL, NULL,
+ ilb_kmem_flags);
+}
+
+void
+ilb_conn_cache_fini(void)
+{
+ if (ilb_conn_cache != NULL) {
+ kmem_cache_destroy(ilb_conn_cache);
+ ilb_conn_cache = NULL;
+ }
+}
+
+static void
+ilb_conn_remove_common(ilb_conn_t *connp, boolean_t c2s)
+{
+ ilb_conn_hash_t *hash;
+ ilb_conn_t **next, **prev;
+ ilb_conn_t **next_prev, **prev_next;
+
+ if (c2s) {
+ hash = connp->conn_c2s_hash;
+ ASSERT(MUTEX_HELD(&hash->ilb_conn_hash_lock));
+ next = &connp->conn_c2s_next;
+ prev = &connp->conn_c2s_prev;
+ if (*next != NULL)
+ next_prev = &(*next)->conn_c2s_prev;
+ if (*prev != NULL)
+ prev_next = &(*prev)->conn_c2s_next;
+ } else {
+ hash = connp->conn_s2c_hash;
+ ASSERT(MUTEX_HELD(&hash->ilb_conn_hash_lock));
+ next = &connp->conn_s2c_next;
+ prev = &connp->conn_s2c_prev;
+ if (*next != NULL)
+ next_prev = &(*next)->conn_s2c_prev;
+ if (*prev != NULL)
+ prev_next = &(*prev)->conn_s2c_next;
+ }
+
+ if (hash->ilb_connp == connp) {
+ hash->ilb_connp = *next;
+ if (*next != NULL)
+ *next_prev = NULL;
+ } else {
+ if (*prev != NULL)
+ *prev_next = *next;
+ if (*next != NULL)
+ *next_prev = *prev;
+ }
+ ASSERT(hash->ilb_conn_cnt > 0);
+ hash->ilb_conn_cnt--;
+
+ *next = NULL;
+ *prev = NULL;
+}
+
+static void
+ilb_conn_remove(ilb_conn_t *connp)
+{
+ ASSERT(MUTEX_HELD(&connp->conn_c2s_hash->ilb_conn_hash_lock));
+ ilb_conn_remove_common(connp, B_TRUE);
+ ASSERT(MUTEX_HELD(&connp->conn_s2c_hash->ilb_conn_hash_lock));
+ ilb_conn_remove_common(connp, B_FALSE);
+
+ if (connp->conn_rule_cache.topo == ILB_TOPO_IMPL_NAT) {
+ in_port_t port;
+
+ port = ntohs(connp->conn_rule_cache.info.nat_sport);
+ vmem_free(connp->conn_rule_cache.info.src_ent->nse_port_arena,
+ (void *)(uintptr_t)port, 1);
+ }
+
+ if (connp->conn_sticky != NULL)
+ ILB_STICKY_REFRELE(connp->conn_sticky);
+ ILB_SERVER_REFRELE(connp->conn_server);
+ kmem_cache_free(ilb_conn_cache, connp);
+}
+
+/*
+ * Routine to do periodic garbage collection of conn hash entries. When
+ * a conn hash timer fires, it dispatches a taskq to call this function
+ * to do the gc. Note that each taskq is responisble for a portion of
+ * the table. The portion is stored in timer->start, timer->end.
+ */
+static void
+ilb_conn_cleanup(void *arg)
+{
+ ilb_timer_t *timer = (ilb_timer_t *)arg;
+ uint32_t i;
+ ilb_stack_t *ilbs;
+ ilb_conn_hash_t *c2s_hash, *s2c_hash;
+ ilb_conn_t *connp, *nxt_connp;
+ int64_t now;
+ int64_t expiry;
+ boolean_t die_now;
+
+ ilbs = timer->ilbs;
+ c2s_hash = ilbs->ilbs_c2s_conn_hash;
+ ASSERT(c2s_hash != NULL);
+
+ now = lbolt64;
+ for (i = timer->start; i < timer->end; i++) {
+ mutex_enter(&c2s_hash[i].ilb_conn_hash_lock);
+ if ((connp = c2s_hash[i].ilb_connp) == NULL) {
+ ASSERT(c2s_hash[i].ilb_conn_cnt == 0);
+ mutex_exit(&c2s_hash[i].ilb_conn_hash_lock);
+ continue;
+ }
+ do {
+ ASSERT(c2s_hash[i].ilb_conn_cnt > 0);
+ ASSERT(connp->conn_c2s_hash == &c2s_hash[i]);
+ nxt_connp = connp->conn_c2s_next;
+ expiry = now - SEC_TO_TICK(connp->conn_expiry);
+ if (connp->conn_server->iser_die_time != 0 &&
+ connp->conn_server->iser_die_time < now)
+ die_now = B_TRUE;
+ else
+ die_now = B_FALSE;
+ s2c_hash = connp->conn_s2c_hash;
+ mutex_enter(&s2c_hash->ilb_conn_hash_lock);
+
+ if (connp->conn_gc || die_now ||
+ (connp->conn_c2s_atime < expiry &&
+ connp->conn_s2c_atime < expiry)) {
+ /* Need to update the nat list cur_connp */
+ if (connp == ilbs->ilbs_conn_list_connp) {
+ ilbs->ilbs_conn_list_connp =
+ connp->conn_c2s_next;
+ }
+ ilb_conn_remove(connp);
+ goto nxt_connp;
+ }
+
+ if (connp->conn_l4 != IPPROTO_TCP)
+ goto nxt_connp;
+
+ /* Update and check TCP related conn info */
+ if (connp->conn_c2s_tcp_fin_sent &&
+ SEQ_GT(connp->conn_s2c_tcp_ack,
+ connp->conn_c2s_tcp_fss)) {
+ connp->conn_c2s_tcp_fin_acked = B_TRUE;
+ }
+ if (connp->conn_s2c_tcp_fin_sent &&
+ SEQ_GT(connp->conn_c2s_tcp_ack,
+ connp->conn_s2c_tcp_fss)) {
+ connp->conn_s2c_tcp_fin_acked = B_TRUE;
+ }
+ if (connp->conn_c2s_tcp_fin_acked &&
+ connp->conn_s2c_tcp_fin_acked) {
+ ilb_conn_remove(connp);
+ }
+nxt_connp:
+ mutex_exit(&s2c_hash->ilb_conn_hash_lock);
+ connp = nxt_connp;
+ } while (connp != NULL);
+ mutex_exit(&c2s_hash[i].ilb_conn_hash_lock);
+ }
+}
+
+/* Conn hash timer routine. It dispatches a taskq and restart the timer */
+static void
+ilb_conn_timer(void *arg)
+{
+ ilb_timer_t *timer = (ilb_timer_t *)arg;
+
+ (void) taskq_dispatch(timer->ilbs->ilbs_conn_taskq, ilb_conn_cleanup,
+ arg, TQ_SLEEP);
+ mutex_enter(&timer->tid_lock);
+ if (timer->tid == 0) {
+ mutex_exit(&timer->tid_lock);
+ } else {
+ timer->tid = timeout(ilb_conn_timer, arg,
+ SEC_TO_TICK(ilb_conn_cache_timeout));
+ mutex_exit(&timer->tid_lock);
+ }
+}
+
+void
+ilb_conn_hash_init(ilb_stack_t *ilbs)
+{
+ extern pri_t minclsyspri;
+ int i, part;
+ ilb_timer_t *tm;
+ char tq_name[TASKQ_NAMELEN];
+
+ /*
+ * If ilbs->ilbs_conn_hash_size is not a power of 2, bump it up to
+ * the next power of 2.
+ */
+ if (ilbs->ilbs_conn_hash_size & (ilbs->ilbs_conn_hash_size - 1)) {
+ for (i = 0; i < 31; i++) {
+ if (ilbs->ilbs_conn_hash_size < (1 << i))
+ break;
+ }
+ ilbs->ilbs_conn_hash_size = 1 << i;
+ }
+
+ /*
+ * Can sleep since this should be called when a rule is being added,
+ * hence we are not in interrupt context.
+ */
+ ilbs->ilbs_c2s_conn_hash = kmem_zalloc(sizeof (ilb_conn_hash_t) *
+ ilbs->ilbs_conn_hash_size, KM_SLEEP);
+ ilbs->ilbs_s2c_conn_hash = kmem_zalloc(sizeof (ilb_conn_hash_t) *
+ ilbs->ilbs_conn_hash_size, KM_SLEEP);
+
+ for (i = 0; i < ilbs->ilbs_conn_hash_size; i++) {
+ mutex_init(&ilbs->ilbs_c2s_conn_hash[i].ilb_conn_hash_lock,
+ NULL, MUTEX_DEFAULT, NULL);
+ }
+ for (i = 0; i < ilbs->ilbs_conn_hash_size; i++) {
+ mutex_init(&ilbs->ilbs_s2c_conn_hash[i].ilb_conn_hash_lock,
+ NULL, MUTEX_DEFAULT, NULL);
+ }
+
+ if (ilb_conn_cache == NULL)
+ ilb_conn_cache_init();
+
+ (void) snprintf(tq_name, sizeof (tq_name), "ilb_conn_taskq_%p",
+ ilbs->ilbs_netstack);
+ ASSERT(ilbs->ilbs_conn_taskq == NULL);
+ ilbs->ilbs_conn_taskq = taskq_create(tq_name,
+ ilb_conn_timer_size * 2, minclsyspri, ilb_conn_timer_size,
+ ilb_conn_timer_size * 2, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
+
+ ASSERT(ilbs->ilbs_conn_timer_list == NULL);
+ ilbs->ilbs_conn_timer_list = kmem_zalloc(sizeof (ilb_timer_t) *
+ ilb_conn_timer_size, KM_SLEEP);
+
+ /*
+ * The hash table is divided in equal partition for those timers
+ * to do garbage collection.
+ */
+ part = ilbs->ilbs_conn_hash_size / ilb_conn_timer_size + 1;
+ for (i = 0; i < ilb_conn_timer_size; i++) {
+ tm = ilbs->ilbs_conn_timer_list + i;
+ tm->start = i * part;
+ tm->end = i * part + part;
+ if (tm->end > ilbs->ilbs_conn_hash_size)
+ tm->end = ilbs->ilbs_conn_hash_size;
+ tm->ilbs = ilbs;
+ mutex_init(&tm->tid_lock, NULL, MUTEX_DEFAULT, NULL);
+ /* Spread out the starting execution time of all the timers. */
+ tm->tid = timeout(ilb_conn_timer, tm,
+ SEC_TO_TICK(ilb_conn_cache_timeout + i));
+ }
+}
+
+void
+ilb_conn_hash_fini(ilb_stack_t *ilbs)
+{
+ uint32_t i;
+ ilb_conn_t *connp;
+
+ if (ilbs->ilbs_c2s_conn_hash == NULL) {
+ ASSERT(ilbs->ilbs_s2c_conn_hash == NULL);
+ return;
+ }
+
+ /* Stop all the timers first. */
+ for (i = 0; i < ilb_conn_timer_size; i++) {
+ timeout_id_t tid;
+
+ /* Setting tid to 0 tells the timer handler not to restart. */
+ mutex_enter(&ilbs->ilbs_conn_timer_list[i].tid_lock);
+ tid = ilbs->ilbs_conn_timer_list[i].tid;
+ ilbs->ilbs_conn_timer_list[i].tid = 0;
+ mutex_exit(&ilbs->ilbs_conn_timer_list[i].tid_lock);
+ (void) untimeout(tid);
+ }
+ kmem_free(ilbs->ilbs_conn_timer_list, sizeof (ilb_timer_t) *
+ ilb_conn_timer_size);
+ taskq_destroy(ilbs->ilbs_conn_taskq);
+ ilbs->ilbs_conn_taskq = NULL;
+
+ /* Then remove all the conns. */
+ for (i = 0; i < ilbs->ilbs_conn_hash_size; i++) {
+ while ((connp = ilbs->ilbs_s2c_conn_hash->ilb_connp) != NULL) {
+ ilbs->ilbs_s2c_conn_hash->ilb_connp =
+ connp->conn_s2c_next;
+ ILB_SERVER_REFRELE(connp->conn_server);
+ if (connp->conn_rule_cache.topo == ILB_TOPO_IMPL_NAT) {
+ ilb_nat_src_entry_t *ent;
+ in_port_t port;
+
+ /*
+ * src_ent will be freed in ilb_nat_src_fini().
+ */
+ port = ntohs(
+ connp->conn_rule_cache.info.nat_sport);
+ ent = connp->conn_rule_cache.info.src_ent;
+ vmem_free(ent->nse_port_arena,
+ (void *)(uintptr_t)port, 1);
+ }
+ kmem_cache_free(ilb_conn_cache, connp);
+ }
+ }
+ kmem_free(ilbs->ilbs_c2s_conn_hash, sizeof (ilb_conn_hash_t) *
+ ilbs->ilbs_conn_hash_size);
+ kmem_free(ilbs->ilbs_s2c_conn_hash, sizeof (ilb_conn_hash_t) *
+ ilbs->ilbs_conn_hash_size);
+}
+
+/*
+ * Internet checksum adjustment calculation routines. We pre-calculate
+ * checksum adjustment so that we don't need to compute the checksum on
+ * the whole packet when we change address/port in the packet.
+ */
+
+static void
+hnat_cksum_v4(uint16_t *oaddr, uint16_t *naddr, in_port_t old_port,
+ in_port_t new_port, uint32_t *adj_sum)
+{
+ uint32_t sum;
+
+ sum = *oaddr + *(oaddr + 1) + old_port;
+ while ((sum >> 16) != 0)
+ sum = (sum & 0xffff) + (sum >> 16);
+ *adj_sum = (uint16_t)~sum + *naddr + *(naddr + 1) + new_port;
+}
+
+static void
+hnat_cksum_v6(uint16_t *oaddr, uint16_t *naddr, in_port_t old_port,
+ in_port_t new_port, uint32_t *adj_sum)
+{
+ uint32_t sum = 0;
+
+ sum = *oaddr + *(oaddr + 1) + *(oaddr + 2) + *(oaddr + 3) +
+ *(oaddr + 4) + *(oaddr + 5) + *(oaddr + 6) + *(oaddr + 7) +
+ old_port;
+ while ((sum >> 16) != 0)
+ sum = (sum & 0xffff) + (sum >> 16);
+ *adj_sum = (uint16_t)~sum + *naddr + *(naddr + 1) +
+ *(naddr + 2) + *(naddr + 3) + *(naddr + 4) + *(naddr + 5) +
+ *(naddr + 6) + *(naddr + 7) + new_port;
+}
+
+static void
+fnat_cksum_v4(uint16_t *oaddr1, uint16_t *oaddr2, uint16_t *naddr1,
+ uint16_t *naddr2, in_port_t old_port1, in_port_t old_port2,
+ in_port_t new_port1, in_port_t new_port2, uint32_t *adj_sum)
+{
+ uint32_t sum;
+
+ sum = *oaddr1 + *(oaddr1 + 1) + old_port1 + *oaddr2 + *(oaddr2 + 1) +
+ old_port2;
+ while ((sum >> 16) != 0)
+ sum = (sum & 0xffff) + (sum >> 16);
+ *adj_sum = (uint16_t)~sum + *naddr1 + *(naddr1 + 1) + new_port1 +
+ *naddr2 + *(naddr2 + 1) + new_port2;
+}
+
+static void
+fnat_cksum_v6(uint16_t *oaddr1, uint16_t *oaddr2, uint16_t *naddr1,
+ uint16_t *naddr2, in_port_t old_port1, in_port_t old_port2,
+ in_port_t new_port1, in_port_t new_port2, uint32_t *adj_sum)
+{
+ uint32_t sum = 0;
+
+ sum = *oaddr1 + *(oaddr1 + 1) + *(oaddr1 + 2) + *(oaddr1 + 3) +
+ *(oaddr1 + 4) + *(oaddr1 + 5) + *(oaddr1 + 6) + *(oaddr1 + 7) +
+ old_port1;
+ sum += *oaddr2 + *(oaddr2 + 1) + *(oaddr2 + 2) + *(oaddr2 + 3) +
+ *(oaddr2 + 4) + *(oaddr2 + 5) + *(oaddr2 + 6) + *(oaddr2 + 7) +
+ old_port2;
+ while ((sum >> 16) != 0)
+ sum = (sum & 0xffff) + (sum >> 16);
+ sum = (uint16_t)~sum + *naddr1 + *(naddr1 + 1) + *(naddr1 + 2) +
+ *(naddr1 + 3) + *(naddr1 + 4) + *(naddr1 + 5) + *(naddr1 + 6) +
+ *(naddr1 + 7) + new_port1;
+ *adj_sum = sum + *naddr2 + *(naddr2 + 1) + *(naddr2 + 2) +
+ *(naddr2 + 3) + *(naddr2 + 4) + *(naddr2 + 5) + *(naddr2 + 6) +
+ *(naddr2 + 7) + new_port2;
+}
+
+/*
+ * Add a conn hash entry to the tables. Note that a conn hash entry
+ * (ilb_conn_t) contains info on both directions. And there are two hash
+ * tables, one for client to server and the other for server to client.
+ * So the same entry is added to both tables and can be ccessed by two
+ * thread simultaneously. But each thread will only access data on one
+ * direction, so there is no conflict.
+ */
+int
+ilb_conn_add(ilb_stack_t *ilbs, ilb_rule_t *rule, ilb_server_t *server,
+ in6_addr_t *src, in_port_t sport, in6_addr_t *dst, in_port_t dport,
+ ilb_nat_info_t *info, uint32_t *ip_sum, uint32_t *tp_sum, ilb_sticky_t *s)
+{
+ ilb_conn_t *connp;
+ ilb_conn_hash_t *hash;
+ int i;
+
+ connp = kmem_cache_alloc(ilb_conn_cache, KM_NOSLEEP);
+ if (connp == NULL) {
+ if (s != NULL) {
+ if (rule->ir_topo == ILB_TOPO_IMPL_NAT) {
+ ilb_nat_src_entry_t **entry;
+
+ entry = s->server->iser_nat_src->src_list;
+ vmem_free(entry[s->nat_src_idx]->nse_port_arena,
+ (void *)(uintptr_t)ntohs(info->nat_sport),
+ 1);
+ }
+ ILB_STICKY_REFRELE(s);
+ }
+ return (ENOMEM);
+ }
+
+ connp->conn_l4 = rule->ir_proto;
+
+ connp->conn_server = server;
+ ILB_SERVER_REFHOLD(server);
+ connp->conn_sticky = s;
+
+ connp->conn_rule_cache.topo = rule->ir_topo;
+ connp->conn_rule_cache.info = *info;
+
+ connp->conn_gc = B_FALSE;
+
+ connp->conn_expiry = rule->ir_nat_expiry;
+ connp->conn_cr_time = lbolt64;
+
+ /* Client to server info. */
+ connp->conn_c2s_saddr = *src;
+ connp->conn_c2s_sport = sport;
+ connp->conn_c2s_daddr = *dst;
+ connp->conn_c2s_dport = dport;
+
+ connp->conn_c2s_atime = lbolt64;
+ /* The packet ths triggers this creation should be counted */
+ connp->conn_c2s_pkt_cnt = 1;
+ connp->conn_c2s_tcp_fin_sent = B_FALSE;
+ connp->conn_c2s_tcp_fin_acked = B_FALSE;
+
+ /* Server to client info, before NAT */
+ switch (rule->ir_topo) {
+ case ILB_TOPO_IMPL_HALF_NAT:
+ connp->conn_s2c_saddr = info->nat_dst;
+ connp->conn_s2c_sport = info->nat_dport;
+ connp->conn_s2c_daddr = *src;
+ connp->conn_s2c_dport = sport;
+
+ /* Pre-calculate checksum changes for both directions */
+ if (rule->ir_ipver == IPPROTO_IP) {
+ hnat_cksum_v4((uint16_t *)&dst->s6_addr32[3],
+ (uint16_t *)&info->nat_dst.s6_addr32[3], 0, 0,
+ &connp->conn_c2s_ip_sum);
+ hnat_cksum_v4((uint16_t *)&dst->s6_addr32[3],
+ (uint16_t *)&info->nat_dst.s6_addr32[3], dport,
+ info->nat_dport, &connp->conn_c2s_tp_sum);
+ *ip_sum = connp->conn_c2s_ip_sum;
+ *tp_sum = connp->conn_c2s_tp_sum;
+
+ hnat_cksum_v4(
+ (uint16_t *)&info->nat_dst.s6_addr32[3],
+ (uint16_t *)&dst->s6_addr32[3], 0, 0,
+ &connp->conn_s2c_ip_sum);
+ hnat_cksum_v4(
+ (uint16_t *)&info->nat_dst.s6_addr32[3],
+ (uint16_t *)&dst->s6_addr32[3],
+ info->nat_dport, dport,
+ &connp->conn_s2c_tp_sum);
+ } else {
+ connp->conn_c2s_ip_sum = 0;
+ hnat_cksum_v6((uint16_t *)dst,
+ (uint16_t *)&info->nat_dst, dport,
+ info->nat_dport, &connp->conn_c2s_tp_sum);
+ *ip_sum = 0;
+ *tp_sum = connp->conn_c2s_tp_sum;
+
+ connp->conn_s2c_ip_sum = 0;
+ hnat_cksum_v6((uint16_t *)&info->nat_dst,
+ (uint16_t *)dst, info->nat_dport, dport,
+ &connp->conn_s2c_tp_sum);
+ }
+ break;
+ case ILB_TOPO_IMPL_NAT:
+ connp->conn_s2c_saddr = info->nat_dst;
+ connp->conn_s2c_sport = info->nat_dport;
+ connp->conn_s2c_daddr = info->nat_src;
+ connp->conn_s2c_dport = info->nat_sport;
+
+ if (rule->ir_ipver == IPPROTO_IP) {
+ fnat_cksum_v4((uint16_t *)&src->s6_addr32[3],
+ (uint16_t *)&dst->s6_addr32[3],
+ (uint16_t *)&info->nat_src.s6_addr32[3],
+ (uint16_t *)&info->nat_dst.s6_addr32[3],
+ 0, 0, 0, 0, &connp->conn_c2s_ip_sum);
+ fnat_cksum_v4((uint16_t *)&src->s6_addr32[3],
+ (uint16_t *)&dst->s6_addr32[3],
+ (uint16_t *)&info->nat_src.s6_addr32[3],
+ (uint16_t *)&info->nat_dst.s6_addr32[3],
+ sport, dport, info->nat_sport,
+ info->nat_dport, &connp->conn_c2s_tp_sum);
+ *ip_sum = connp->conn_c2s_ip_sum;
+ *tp_sum = connp->conn_c2s_tp_sum;
+
+ fnat_cksum_v4(
+ (uint16_t *)&info->nat_src.s6_addr32[3],
+ (uint16_t *)&info->nat_dst.s6_addr32[3],
+ (uint16_t *)&src->s6_addr32[3],
+ (uint16_t *)&dst->s6_addr32[3],
+ 0, 0, 0, 0, &connp->conn_s2c_ip_sum);
+ fnat_cksum_v4(
+ (uint16_t *)&info->nat_src.s6_addr32[3],
+ (uint16_t *)&info->nat_dst.s6_addr32[3],
+ (uint16_t *)&src->s6_addr32[3],
+ (uint16_t *)&dst->s6_addr32[3],
+ info->nat_sport, info->nat_dport,
+ sport, dport, &connp->conn_s2c_tp_sum);
+ } else {
+ fnat_cksum_v6((uint16_t *)src, (uint16_t *)dst,
+ (uint16_t *)&info->nat_src,
+ (uint16_t *)&info->nat_dst,
+ sport, dport, info->nat_sport,
+ info->nat_dport, &connp->conn_c2s_tp_sum);
+ connp->conn_c2s_ip_sum = 0;
+ *ip_sum = 0;
+ *tp_sum = connp->conn_c2s_tp_sum;
+
+ fnat_cksum_v6((uint16_t *)&info->nat_src,
+ (uint16_t *)&info->nat_dst, (uint16_t *)src,
+ (uint16_t *)dst, info->nat_sport,
+ info->nat_dport, sport, dport,
+ &connp->conn_s2c_tp_sum);
+ connp->conn_s2c_ip_sum = 0;
+ }
+ break;
+ }
+
+ connp->conn_s2c_atime = lbolt64;
+ connp->conn_s2c_pkt_cnt = 1;
+ connp->conn_s2c_tcp_fin_sent = B_FALSE;
+ connp->conn_s2c_tcp_fin_acked = B_FALSE;
+
+ /* Add it to the s2c hash table. */
+ hash = ilbs->ilbs_s2c_conn_hash;
+ i = ILB_CONN_HASH((uint8_t *)&connp->conn_s2c_saddr.s6_addr32[3],
+ ntohs(connp->conn_s2c_sport),
+ (uint8_t *)&connp->conn_s2c_daddr.s6_addr32[3],
+ ntohs(connp->conn_s2c_dport), ilbs->ilbs_conn_hash_size);
+ connp->conn_s2c_hash = &hash[i];
+ DTRACE_PROBE2(ilb__conn__hash__add__s2c, ilb_conn_t *, connp, int, i);
+
+ mutex_enter(&hash[i].ilb_conn_hash_lock);
+ hash[i].ilb_conn_cnt++;
+ connp->conn_s2c_next = hash[i].ilb_connp;
+ if (hash[i].ilb_connp != NULL)
+ hash[i].ilb_connp->conn_s2c_prev = connp;
+ connp->conn_s2c_prev = NULL;
+ hash[i].ilb_connp = connp;
+ mutex_exit(&hash[i].ilb_conn_hash_lock);
+
+ /* Add it to the c2s hash table. */
+ hash = ilbs->ilbs_c2s_conn_hash;
+ i = ILB_CONN_HASH((uint8_t *)&src->s6_addr32[3], ntohs(sport),
+ (uint8_t *)&dst->s6_addr32[3], ntohs(dport),
+ ilbs->ilbs_conn_hash_size);
+ connp->conn_c2s_hash = &hash[i];
+ DTRACE_PROBE2(ilb__conn__hash__add__c2s, ilb_conn_t *, connp, int, i);
+
+ mutex_enter(&hash[i].ilb_conn_hash_lock);
+ hash[i].ilb_conn_cnt++;
+ connp->conn_c2s_next = hash[i].ilb_connp;
+ if (hash[i].ilb_connp != NULL)
+ hash[i].ilb_connp->conn_c2s_prev = connp;
+ connp->conn_c2s_prev = NULL;
+ hash[i].ilb_connp = connp;
+ mutex_exit(&hash[i].ilb_conn_hash_lock);
+
+ return (0);
+}
+
+/*
+ * If a connection is using TCP, we keep track of simple TCP state transition
+ * so that we know when to clean up an entry.
+ */
+static boolean_t
+update_conn_tcp(ilb_conn_t *connp, void *iph, tcpha_t *tcpha, int32_t pkt_len,
+ boolean_t c2s)
+{
+ uint32_t ack, seq;
+ int32_t seg_len;
+
+ if (tcpha->tha_flags & TH_RST)
+ return (B_FALSE);
+
+ seg_len = pkt_len - ((uint8_t *)tcpha - (uint8_t *)iph) -
+ TCP_HDR_LENGTH((tcph_t *)tcpha);
+
+ if (tcpha->tha_flags & TH_ACK)
+ ack = ntohl(tcpha->tha_ack);
+ seq = ntohl(tcpha->tha_seq);
+ if (c2s) {
+ ASSERT(MUTEX_HELD(&connp->conn_c2s_hash->ilb_conn_hash_lock));
+ if (tcpha->tha_flags & TH_FIN) {
+ connp->conn_c2s_tcp_fss = seq + seg_len;
+ connp->conn_c2s_tcp_fin_sent = B_TRUE;
+ }
+ connp->conn_c2s_tcp_ack = ack;
+
+ /* Port reuse by the client, restart the conn. */
+ if (connp->conn_c2s_tcp_fin_sent &&
+ SEQ_GT(seq, connp->conn_c2s_tcp_fss + 1)) {
+ connp->conn_c2s_tcp_fin_sent = B_FALSE;
+ connp->conn_c2s_tcp_fin_acked = B_FALSE;
+ }
+ } else {
+ ASSERT(MUTEX_HELD(&connp->conn_s2c_hash->ilb_conn_hash_lock));
+ if (tcpha->tha_flags & TH_FIN) {
+ connp->conn_s2c_tcp_fss = seq + seg_len;
+ connp->conn_s2c_tcp_fin_sent = B_TRUE;
+ }
+ connp->conn_s2c_tcp_ack = ack;
+
+ /* Port reuse by the client, restart the conn. */
+ if (connp->conn_s2c_tcp_fin_sent &&
+ SEQ_GT(seq, connp->conn_s2c_tcp_fss + 1)) {
+ connp->conn_s2c_tcp_fin_sent = B_FALSE;
+ connp->conn_s2c_tcp_fin_acked = B_FALSE;
+ }
+ }
+
+ return (B_TRUE);
+}
+
+/*
+ * Helper routint to find conn hash entry given some packet information and
+ * the traffic direction (c2s, client to server?)
+ */
+static boolean_t
+ilb_find_conn(ilb_stack_t *ilbs, void *iph, void *tph, int l4, in6_addr_t *src,
+ in_port_t sport, in6_addr_t *dst, in_port_t dport,
+ ilb_rule_info_t *rule_cache, uint32_t *ip_sum, uint32_t *tp_sum,
+ int32_t pkt_len, boolean_t c2s)
+{
+ ilb_conn_hash_t *hash;
+ uint_t i;
+ ilb_conn_t *connp;
+ boolean_t tcp_alive;
+ boolean_t ret = B_FALSE;
+
+ i = ILB_CONN_HASH((uint8_t *)&src->s6_addr32[3], ntohs(sport),
+ (uint8_t *)&dst->s6_addr32[3], ntohs(dport),
+ ilbs->ilbs_conn_hash_size);
+ if (c2s) {
+ hash = ilbs->ilbs_c2s_conn_hash;
+ mutex_enter(&hash[i].ilb_conn_hash_lock);
+ for (connp = hash[i].ilb_connp; connp != NULL;
+ connp = connp->conn_c2s_next) {
+ if (connp->conn_l4 == l4 &&
+ connp->conn_c2s_dport == dport &&
+ connp->conn_c2s_sport == sport &&
+ IN6_ARE_ADDR_EQUAL(src, &connp->conn_c2s_saddr) &&
+ IN6_ARE_ADDR_EQUAL(dst, &connp->conn_c2s_daddr)) {
+ connp->conn_c2s_atime = lbolt64;
+ connp->conn_c2s_pkt_cnt++;
+ *rule_cache = connp->conn_rule_cache;
+ *ip_sum = connp->conn_c2s_ip_sum;
+ *tp_sum = connp->conn_c2s_tp_sum;
+ ret = B_TRUE;
+ break;
+ }
+ }
+ } else {
+ hash = ilbs->ilbs_s2c_conn_hash;
+ mutex_enter(&hash[i].ilb_conn_hash_lock);
+ for (connp = hash[i].ilb_connp; connp != NULL;
+ connp = connp->conn_s2c_next) {
+ if (connp->conn_l4 == l4 &&
+ connp->conn_s2c_dport == dport &&
+ connp->conn_s2c_sport == sport &&
+ IN6_ARE_ADDR_EQUAL(src, &connp->conn_s2c_saddr) &&
+ IN6_ARE_ADDR_EQUAL(dst, &connp->conn_s2c_daddr)) {
+ connp->conn_s2c_atime = lbolt64;
+ connp->conn_s2c_pkt_cnt++;
+ *rule_cache = connp->conn_rule_cache;
+ *ip_sum = connp->conn_s2c_ip_sum;
+ *tp_sum = connp->conn_s2c_tp_sum;
+ ret = B_TRUE;
+ break;
+ }
+ }
+ }
+ if (ret) {
+ ILB_S_KSTAT(connp->conn_server, pkt_processed);
+ ILB_S_KSTAT_UPDATE(connp->conn_server, bytes_processed,
+ pkt_len);
+
+ switch (l4) {
+ case (IPPROTO_TCP):
+ tcp_alive = update_conn_tcp(connp, iph, tph, pkt_len,
+ c2s);
+ if (!tcp_alive) {
+ connp->conn_gc = B_TRUE;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ mutex_exit(&hash[i].ilb_conn_hash_lock);
+
+ return (ret);
+}
+
+/*
+ * To check if a give packet matches an existing conn hash entry. If it
+ * does, return the information about this entry so that the caller can
+ * do the proper NAT.
+ */
+boolean_t
+ilb_check_conn(ilb_stack_t *ilbs, int l3, void *iph, int l4, void *tph,
+ in6_addr_t *src, in6_addr_t *dst, in_port_t sport, in_port_t dport,
+ uint32_t pkt_len, in6_addr_t *lb_dst)
+{
+ ilb_rule_info_t rule_cache;
+ uint32_t adj_ip_sum, adj_tp_sum;
+ boolean_t ret;
+
+ /* Check the incoming hash table. */
+ if (ilb_find_conn(ilbs, iph, tph, l4, src, sport, dst, dport,
+ &rule_cache, &adj_ip_sum, &adj_tp_sum, pkt_len, B_TRUE)) {
+ switch (rule_cache.topo) {
+ case ILB_TOPO_IMPL_NAT:
+ *lb_dst = rule_cache.info.nat_dst;
+ ilb_full_nat(l3, iph, l4, tph, &rule_cache.info,
+ adj_ip_sum, adj_tp_sum, B_TRUE);
+ ret = B_TRUE;
+ break;
+ case ILB_TOPO_IMPL_HALF_NAT:
+ *lb_dst = rule_cache.info.nat_dst;
+ ilb_half_nat(l3, iph, l4, tph, &rule_cache.info,
+ adj_ip_sum, adj_tp_sum, B_TRUE);
+ ret = B_TRUE;
+ break;
+ default:
+ ret = B_FALSE;
+ break;
+ }
+ return (ret);
+ }
+ if (ilb_find_conn(ilbs, iph, tph, l4, src, sport, dst, dport,
+ &rule_cache, &adj_ip_sum, &adj_tp_sum, pkt_len, B_FALSE)) {
+ switch (rule_cache.topo) {
+ case ILB_TOPO_IMPL_NAT:
+ *lb_dst = rule_cache.info.src;
+ ilb_full_nat(l3, iph, l4, tph, &rule_cache.info,
+ adj_ip_sum, adj_tp_sum, B_FALSE);
+ ret = B_TRUE;
+ break;
+ case ILB_TOPO_IMPL_HALF_NAT:
+ *lb_dst = *dst;
+ ilb_half_nat(l3, iph, l4, tph, &rule_cache.info,
+ adj_ip_sum, adj_tp_sum, B_FALSE);
+ ret = B_TRUE;
+ break;
+ default:
+ ret = B_FALSE;
+ break;
+ }
+ return (ret);
+ }
+
+ return (B_FALSE);
+}
+
+/*
+ * To check if an ICMP packet belongs to a connection in one of the conn
+ * hash entries.
+ */
+boolean_t
+ilb_check_icmp_conn(ilb_stack_t *ilbs, mblk_t *mp, int l3, void *out_iph,
+ void *icmph, in6_addr_t *lb_dst)
+{
+ ilb_conn_hash_t *hash;
+ ipha_t *in_iph4;
+ ip6_t *in_iph6;
+ icmph_t *icmph4;
+ icmp6_t *icmph6;
+ in6_addr_t *in_src_p, *in_dst_p;
+ in_port_t *sport, *dport;
+ int l4;
+ uint_t i;
+ ilb_conn_t *connp;
+ ilb_rule_info_t rule_cache;
+ uint32_t adj_ip_sum;
+ boolean_t full_nat;
+
+ if (l3 == IPPROTO_IP) {
+ in6_addr_t in_src, in_dst;
+
+ icmph4 = (icmph_t *)icmph;
+ in_iph4 = (ipha_t *)&icmph4[1];
+
+ if ((uint8_t *)in_iph4 + IPH_HDR_LENGTH(in_iph4) +
+ ICMP_MIN_TP_HDR_LEN > mp->b_wptr) {
+ return (B_FALSE);
+ }
+
+ IN6_IPADDR_TO_V4MAPPED(in_iph4->ipha_src, &in_src);
+ in_src_p = &in_src;
+ IN6_IPADDR_TO_V4MAPPED(in_iph4->ipha_dst, &in_dst);
+ in_dst_p = &in_dst;
+
+ l4 = in_iph4->ipha_protocol;
+ if (l4 != IPPROTO_TCP && l4 != IPPROTO_UDP)
+ return (B_FALSE);
+
+ sport = (in_port_t *)((char *)in_iph4 +
+ IPH_HDR_LENGTH(in_iph4));
+ dport = sport + 1;
+
+ DTRACE_PROBE4(ilb__chk__icmp__conn__v4, uint32_t,
+ in_iph4->ipha_src, uint32_t, in_iph4->ipha_dst, uint16_t,
+ ntohs(*sport), uint16_t, ntohs(*dport));
+ } else {
+ ASSERT(l3 == IPPROTO_IPV6);
+
+ icmph6 = (icmp6_t *)icmph;
+ in_iph6 = (ip6_t *)&icmph6[1];
+ in_src_p = &in_iph6->ip6_src;
+ in_dst_p = &in_iph6->ip6_dst;
+
+ if ((uint8_t *)in_iph6 + sizeof (ip6_t) +
+ ICMP_MIN_TP_HDR_LEN > mp->b_wptr) {
+ return (B_FALSE);
+ }
+
+ l4 = in_iph6->ip6_nxt;
+ /* We don't go deep inside an IPv6 packet yet. */
+ if (l4 != IPPROTO_TCP && l4 != IPPROTO_UDP)
+ return (B_FALSE);
+
+ sport = (in_port_t *)&in_iph6[1];
+ dport = sport + 1;
+
+ DTRACE_PROBE4(ilb__chk__icmp__conn__v6, in6_addr_t *,
+ &in_iph6->ip6_src, in6_addr_t *, &in_iph6->ip6_dst,
+ uint16_t, ntohs(*sport), uint16_t, ntohs(*dport));
+ }
+
+ i = ILB_CONN_HASH((uint8_t *)&in_dst_p->s6_addr32[3], ntohs(*dport),
+ (uint8_t *)&in_src_p->s6_addr32[3], ntohs(*sport),
+ ilbs->ilbs_conn_hash_size);
+ hash = ilbs->ilbs_c2s_conn_hash;
+
+ mutex_enter(&hash[i].ilb_conn_hash_lock);
+ for (connp = hash[i].ilb_connp; connp != NULL;
+ connp = connp->conn_c2s_next) {
+ if (connp->conn_l4 == l4 &&
+ connp->conn_c2s_dport == *sport &&
+ connp->conn_c2s_sport == *dport &&
+ IN6_ARE_ADDR_EQUAL(in_dst_p, &connp->conn_c2s_saddr) &&
+ IN6_ARE_ADDR_EQUAL(in_src_p, &connp->conn_c2s_daddr)) {
+ connp->conn_c2s_atime = lbolt64;
+ connp->conn_c2s_pkt_cnt++;
+ rule_cache = connp->conn_rule_cache;
+ adj_ip_sum = connp->conn_c2s_ip_sum;
+ break;
+ }
+ }
+ mutex_exit(&hash[i].ilb_conn_hash_lock);
+
+ if (connp == NULL) {
+ DTRACE_PROBE(ilb__chk__icmp__conn__failed);
+ return (B_FALSE);
+ }
+
+ switch (rule_cache.topo) {
+ case ILB_TOPO_IMPL_NAT:
+ full_nat = B_TRUE;
+ break;
+ case ILB_TOPO_IMPL_HALF_NAT:
+ full_nat = B_FALSE;
+ break;
+ default:
+ return (B_FALSE);
+ }
+
+ *lb_dst = rule_cache.info.nat_dst;
+ if (l3 == IPPROTO_IP) {
+ ilb_nat_icmpv4(mp, out_iph, icmph4, in_iph4, sport, dport,
+ &rule_cache.info, adj_ip_sum, full_nat);
+ } else {
+ ilb_nat_icmpv6(mp, out_iph, icmph6, in_iph6, sport, dport,
+ &rule_cache.info, full_nat);
+ }
+ return (B_TRUE);
+}
+
+/*
+ * This routine sends up the conn hash table to user land. Note that the
+ * request is an ioctl, hence we cannot really differentiate requests
+ * from different clients. There is no context shared between different
+ * ioctls. Here we make the assumption that the user land ilbd will
+ * only allow one client to show the conn hash table at any time.
+ * Otherwise, the results will be "very" inconsistent.
+ *
+ * In each ioctl, a flag (ILB_LIST_BEGIN) indicates whether the client wants
+ * to read from the beginning of the able. After a certain entries
+ * are reported, the kernel remembers the position of the last returned
+ * entry. When the next ioctl comes in with the ILB_LIST_BEGIN flag,
+ * it will return entries starting from where it was left off. When
+ * the end of table is reached, a flag (ILB_LIST_END) is set to tell
+ * the client that there is no more entry.
+ *
+ * It is assumed that the caller has checked the size of nat so that it
+ * can hold num entries.
+ */
+/* ARGSUSED */
+int
+ilb_list_nat(ilb_stack_t *ilbs, zoneid_t zoneid, ilb_nat_entry_t *nat,
+ uint32_t *num, uint32_t *flags)
+{
+ ilb_conn_hash_t *hash;
+ ilb_conn_t *cur_connp;
+ uint32_t i, j;
+ int ret = 0;
+
+ mutex_enter(&ilbs->ilbs_conn_list_lock);
+ while (ilbs->ilbs_conn_list_busy) {
+ if (cv_wait_sig(&ilbs->ilbs_conn_list_cv,
+ &ilbs->ilbs_conn_list_lock) == 0) {
+ mutex_exit(&ilbs->ilbs_conn_list_lock);
+ return (EINTR);
+ }
+ }
+ if ((hash = ilbs->ilbs_c2s_conn_hash) == NULL) {
+ ASSERT(ilbs->ilbs_s2c_conn_hash == NULL);
+ mutex_exit(&ilbs->ilbs_conn_list_lock);
+ *num = 0;
+ *flags |= ILB_LIST_END;
+ return (0);
+ }
+ ilbs->ilbs_conn_list_busy = B_TRUE;
+ mutex_exit(&ilbs->ilbs_conn_list_lock);
+
+ if (*flags & ILB_LIST_BEGIN) {
+ i = 0;
+ mutex_enter(&hash[0].ilb_conn_hash_lock);
+ cur_connp = hash[0].ilb_connp;
+ } else if (*flags & ILB_LIST_CONT) {
+ if (ilbs->ilbs_conn_list_cur == ilbs->ilbs_conn_hash_size) {
+ *num = 0;
+ *flags |= ILB_LIST_END;
+ goto done;
+ }
+ i = ilbs->ilbs_conn_list_cur;
+ mutex_enter(&hash[i].ilb_conn_hash_lock);
+ cur_connp = ilbs->ilbs_conn_list_connp;
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+ j = 0;
+ while (j < *num) {
+ if (cur_connp == NULL) {
+ mutex_exit(&hash[i].ilb_conn_hash_lock);
+ if (++i == ilbs->ilbs_conn_hash_size) {
+ *flags |= ILB_LIST_END;
+ break;
+ }
+ mutex_enter(&hash[i].ilb_conn_hash_lock);
+ cur_connp = hash[i].ilb_connp;
+ continue;
+ }
+ nat[j].proto = cur_connp->conn_l4;
+
+ nat[j].in_global = cur_connp->conn_c2s_daddr;
+ nat[j].in_global_port = cur_connp->conn_c2s_dport;
+ nat[j].out_global = cur_connp->conn_c2s_saddr;
+ nat[j].out_global_port = cur_connp->conn_c2s_sport;
+
+ nat[j].in_local = cur_connp->conn_s2c_saddr;
+ nat[j].in_local_port = cur_connp->conn_s2c_sport;
+ nat[j].out_local = cur_connp->conn_s2c_daddr;
+ nat[j].out_local_port = cur_connp->conn_s2c_dport;
+
+ nat[j].create_time = TICK_TO_MSEC(cur_connp->conn_cr_time);
+ nat[j].last_access_time =
+ TICK_TO_MSEC(cur_connp->conn_c2s_atime);
+
+ /*
+ * The conn_s2c_pkt_cnt may not be accurate since we are not
+ * holding the s2c hash lock.
+ */
+ nat[j].pkt_cnt = cur_connp->conn_c2s_pkt_cnt +
+ cur_connp->conn_s2c_pkt_cnt;
+ j++;
+
+ cur_connp = cur_connp->conn_c2s_next;
+ }
+ ilbs->ilbs_conn_list_connp = cur_connp;
+ if (j == *num)
+ mutex_exit(&hash[i].ilb_conn_hash_lock);
+
+ ilbs->ilbs_conn_list_cur = i;
+
+ *num = j;
+done:
+ mutex_enter(&ilbs->ilbs_conn_list_lock);
+ ilbs->ilbs_conn_list_busy = B_FALSE;
+ cv_signal(&ilbs->ilbs_conn_list_cv);
+ mutex_exit(&ilbs->ilbs_conn_list_lock);
+
+ return (ret);
+}
+
+
+/*
+ * Stickiness (persistence) handling routines.
+ */
+
+
+static void
+ilb_sticky_cache_init(void)
+{
+ ilb_sticky_cache = kmem_cache_create("ilb_sticky_cache",
+ sizeof (ilb_sticky_t), 0, NULL, NULL, NULL, NULL, NULL,
+ ilb_kmem_flags);
+}
+
+void
+ilb_sticky_cache_fini(void)
+{
+ if (ilb_sticky_cache != NULL) {
+ kmem_cache_destroy(ilb_sticky_cache);
+ ilb_sticky_cache = NULL;
+ }
+}
+
+void
+ilb_sticky_refrele(ilb_sticky_t *s)
+{
+ ILB_STICKY_REFRELE(s);
+}
+
+static ilb_sticky_t *
+ilb_sticky_lookup(ilb_sticky_hash_t *hash, ilb_rule_t *rule, in6_addr_t *src)
+{
+ ilb_sticky_t *s;
+
+ ASSERT(mutex_owned(&hash->sticky_lock));
+
+ for (s = list_head(&hash->sticky_head); s != NULL;
+ s = list_next(&hash->sticky_head, s)) {
+ if (s->rule_instance == rule->ir_ks_instance) {
+ if (IN6_ARE_ADDR_EQUAL(src, &s->src))
+ return (s);
+ }
+ }
+ return (NULL);
+}
+
+static ilb_sticky_t *
+ilb_sticky_add(ilb_sticky_hash_t *hash, ilb_rule_t *rule, ilb_server_t *server,
+ in6_addr_t *src)
+{
+ ilb_sticky_t *s;
+
+ ASSERT(mutex_owned(&hash->sticky_lock));
+
+ if ((s = kmem_cache_alloc(ilb_sticky_cache, KM_NOSLEEP)) == NULL)
+ return (NULL);
+
+ /*
+ * The rule instance is for handling the scenario when the same
+ * client talks to different rules at the same time. Stickiness
+ * is per rule so we can use the rule instance to differentiate
+ * the client's request.
+ */
+ s->rule_instance = rule->ir_ks_instance;
+ /*
+ * Copy the rule name for listing all sticky cache entry. ir_name
+ * is guaranteed to be NULL terminated.
+ */
+ (void) strcpy(s->rule_name, rule->ir_name);
+ s->server = server;
+
+ /*
+ * Grab a ref cnt on the server so that it won't go away while
+ * it is still in the sticky table.
+ */
+ ILB_SERVER_REFHOLD(server);
+ s->src = *src;
+ s->expiry = rule->ir_sticky_expiry;
+ s->refcnt = 1;
+ s->hash = hash;
+
+ /*
+ * There is no need to set atime here since the refcnt is not
+ * zero. A sticky entry is removed only when the refcnt is
+ * zero. But just set it here for debugging purpose. The
+ * atime is set when a refrele is done on a sticky entry.
+ */
+ s->atime = lbolt64;
+
+ list_insert_head(&hash->sticky_head, s);
+ hash->sticky_cnt++;
+ return (s);
+}
+
+/*
+ * This routine checks if there is an existing sticky entry which matches
+ * a given packet. If there is one, return it. If there is not, create
+ * a sticky entry using the packet's info.
+ */
+ilb_server_t *
+ilb_sticky_find_add(ilb_stack_t *ilbs, ilb_rule_t *rule, in6_addr_t *src,
+ ilb_server_t *server, ilb_sticky_t **res, uint16_t *src_ent_idx)
+{
+ int i;
+ ilb_sticky_hash_t *hash;
+ ilb_sticky_t *s;
+
+ ASSERT(server != NULL);
+
+ *res = NULL;
+
+ i = ILB_STICKY_HASH((uint8_t *)&src->s6_addr32[3],
+ (uint32_t)(uintptr_t)rule, ilbs->ilbs_sticky_hash_size);
+ hash = &ilbs->ilbs_sticky_hash[i];
+
+ /* First check if there is already an entry. */
+ mutex_enter(&hash->sticky_lock);
+ s = ilb_sticky_lookup(hash, rule, src);
+
+ /* No sticky entry, add one. */
+ if (s == NULL) {
+add_new_entry:
+ s = ilb_sticky_add(hash, rule, server, src);
+ if (s == NULL) {
+ mutex_exit(&hash->sticky_lock);
+ return (NULL);
+ }
+ /*
+ * Find a source for this server. All subseqent requests from
+ * the same client matching this sticky entry will use this
+ * source address in doing NAT. The current algorithm is
+ * simple, rotate the source address. Note that the
+ * source address array does not change after it's created, so
+ * it is OK to just increment the cur index.
+ */
+ if (server->iser_nat_src != NULL) {
+ /* It is a hint, does not need to be atomic. */
+ *src_ent_idx = (server->iser_nat_src->cur++ %
+ server->iser_nat_src->num_src);
+ s->nat_src_idx = *src_ent_idx;
+ }
+ mutex_exit(&hash->sticky_lock);
+ *res = s;
+ return (server);
+ }
+
+ /*
+ * We don't hold any lock accessing iser_enabled. Refer to the
+ * comment in ilb_server_add() about iser_lock.
+ */
+ if (!s->server->iser_enabled) {
+ /*
+ * s->server == server can only happen if there is a race in
+ * toggling the iser_enabled flag (we don't hold a lock doing
+ * that) so that the load balance algorithm still returns a
+ * disabled server. In this case, just drop the packet...
+ */
+ if (s->server == server) {
+ mutex_exit(&hash->sticky_lock);
+ return (NULL);
+ }
+
+ /*
+ * The old server is disabled and there is a new server, use
+ * the new one to create a sticky entry. Since we will
+ * add the entry at the beginning, subsequent lookup will
+ * find this new entry instead of the old one.
+ */
+ goto add_new_entry;
+ }
+
+ s->refcnt++;
+ *res = s;
+ mutex_exit(&hash->sticky_lock);
+ if (server->iser_nat_src != NULL)
+ *src_ent_idx = s->nat_src_idx;
+ return (s->server);
+}
+
+static void
+ilb_sticky_cleanup(void *arg)
+{
+ ilb_timer_t *timer = (ilb_timer_t *)arg;
+ uint32_t i;
+ ilb_stack_t *ilbs;
+ ilb_sticky_hash_t *hash;
+ ilb_sticky_t *s, *nxt_s;
+ int64_t now, expiry;
+
+ ilbs = timer->ilbs;
+ hash = ilbs->ilbs_sticky_hash;
+ ASSERT(hash != NULL);
+
+ now = lbolt64;
+ for (i = timer->start; i < timer->end; i++) {
+ mutex_enter(&hash[i].sticky_lock);
+ for (s = list_head(&hash[i].sticky_head); s != NULL;
+ s = nxt_s) {
+ nxt_s = list_next(&hash[i].sticky_head, s);
+ if (s->refcnt != 0)
+ continue;
+ expiry = now - SEC_TO_TICK(s->expiry);
+ if (s->atime < expiry) {
+ ILB_SERVER_REFRELE(s->server);
+ list_remove(&hash[i].sticky_head, s);
+ kmem_cache_free(ilb_sticky_cache, s);
+ hash[i].sticky_cnt--;
+ }
+ }
+ mutex_exit(&hash[i].sticky_lock);
+ }
+}
+
+static void
+ilb_sticky_timer(void *arg)
+{
+ ilb_timer_t *timer = (ilb_timer_t *)arg;
+
+ (void) taskq_dispatch(timer->ilbs->ilbs_sticky_taskq,
+ ilb_sticky_cleanup, arg, TQ_SLEEP);
+ mutex_enter(&timer->tid_lock);
+ if (timer->tid == 0) {
+ mutex_exit(&timer->tid_lock);
+ } else {
+ timer->tid = timeout(ilb_sticky_timer, arg,
+ SEC_TO_TICK(ilb_sticky_timeout));
+ mutex_exit(&timer->tid_lock);
+ }
+}
+
+void
+ilb_sticky_hash_init(ilb_stack_t *ilbs)
+{
+ extern pri_t minclsyspri;
+ int i, part;
+ char tq_name[TASKQ_NAMELEN];
+ ilb_timer_t *tm;
+
+ if (ilbs->ilbs_sticky_hash_size & (ilbs->ilbs_sticky_hash_size - 1)) {
+ for (i = 0; i < 31; i++) {
+ if (ilbs->ilbs_sticky_hash_size < (1 << i))
+ break;
+ }
+ ilbs->ilbs_sticky_hash_size = 1 << i;
+ }
+
+ ilbs->ilbs_sticky_hash = kmem_zalloc(sizeof (ilb_sticky_hash_t) *
+ ilbs->ilbs_sticky_hash_size, KM_SLEEP);
+ for (i = 0; i < ilbs->ilbs_sticky_hash_size; i++) {
+ mutex_init(&ilbs->ilbs_sticky_hash[i].sticky_lock, NULL,
+ MUTEX_DEFAULT, NULL);
+ list_create(&ilbs->ilbs_sticky_hash[i].sticky_head,
+ sizeof (ilb_sticky_t),
+ offsetof(ilb_sticky_t, list));
+ }
+
+ if (ilb_sticky_cache == NULL)
+ ilb_sticky_cache_init();
+
+ (void) snprintf(tq_name, sizeof (tq_name), "ilb_sticky_taskq_%p",
+ ilbs->ilbs_netstack);
+ ASSERT(ilbs->ilbs_sticky_taskq == NULL);
+ ilbs->ilbs_sticky_taskq = taskq_create(tq_name,
+ ilb_sticky_timer_size * 2, minclsyspri, ilb_sticky_timer_size,
+ ilb_sticky_timer_size * 2, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
+
+ ASSERT(ilbs->ilbs_sticky_timer_list == NULL);
+ ilbs->ilbs_sticky_timer_list = kmem_zalloc(sizeof (ilb_timer_t) *
+ ilb_sticky_timer_size, KM_SLEEP);
+ part = ilbs->ilbs_sticky_hash_size / ilb_sticky_timer_size + 1;
+ for (i = 0; i < ilb_sticky_timer_size; i++) {
+ tm = ilbs->ilbs_sticky_timer_list + i;
+ tm->start = i * part;
+ tm->end = i * part + part;
+ if (tm->end > ilbs->ilbs_sticky_hash_size)
+ tm->end = ilbs->ilbs_sticky_hash_size;
+ tm->ilbs = ilbs;
+ mutex_init(&tm->tid_lock, NULL, MUTEX_DEFAULT, NULL);
+ /* Spread out the starting execution time of all the timers. */
+ tm->tid = timeout(ilb_sticky_timer, tm,
+ SEC_TO_TICK(ilb_sticky_timeout + i));
+ }
+}
+
+void
+ilb_sticky_hash_fini(ilb_stack_t *ilbs)
+{
+ int i;
+ ilb_sticky_t *s;
+
+ if (ilbs->ilbs_sticky_hash == NULL)
+ return;
+
+ /* Stop all the timers first. */
+ for (i = 0; i < ilb_sticky_timer_size; i++) {
+ timeout_id_t tid;
+
+ /* Setting tid to 0 tells the timer handler not to restart. */
+ mutex_enter(&ilbs->ilbs_sticky_timer_list[i].tid_lock);
+ tid = ilbs->ilbs_sticky_timer_list[i].tid;
+ ilbs->ilbs_sticky_timer_list[i].tid = 0;
+ mutex_exit(&ilbs->ilbs_sticky_timer_list[i].tid_lock);
+ (void) untimeout(tid);
+ }
+ kmem_free(ilbs->ilbs_sticky_timer_list, sizeof (ilb_timer_t) *
+ ilb_sticky_timer_size);
+ taskq_destroy(ilbs->ilbs_sticky_taskq);
+ ilbs->ilbs_sticky_taskq = NULL;
+
+ for (i = 0; i < ilbs->ilbs_sticky_hash_size; i++) {
+ while ((s = list_head(&ilbs->ilbs_sticky_hash[i].sticky_head))
+ != NULL) {
+ list_remove(&ilbs->ilbs_sticky_hash[i].sticky_head, s);
+ ILB_SERVER_REFRELE(s->server);
+ kmem_free(s, sizeof (ilb_sticky_t));
+ }
+ }
+ kmem_free(ilbs->ilbs_sticky_hash, ilbs->ilbs_sticky_hash_size *
+ sizeof (ilb_sticky_hash_t));
+}
+
+/*
+ * This routine sends up the sticky hash table to user land. Refer to
+ * the comments before ilb_list_nat(). Both routines assume similar
+ * conditions.
+ *
+ * It is assumed that the caller has checked the size of st so that it
+ * can hold num entries.
+ */
+/* ARGSUSED */
+int
+ilb_list_sticky(ilb_stack_t *ilbs, zoneid_t zoneid, ilb_sticky_entry_t *st,
+ uint32_t *num, uint32_t *flags)
+{
+ ilb_sticky_hash_t *hash;
+ ilb_sticky_t *curp;
+ uint32_t i, j;
+ int ret = 0;
+
+ mutex_enter(&ilbs->ilbs_sticky_list_lock);
+ while (ilbs->ilbs_sticky_list_busy) {
+ if (cv_wait_sig(&ilbs->ilbs_sticky_list_cv,
+ &ilbs->ilbs_sticky_list_lock) == 0) {
+ mutex_exit(&ilbs->ilbs_sticky_list_lock);
+ return (EINTR);
+ }
+ }
+ if ((hash = ilbs->ilbs_sticky_hash) == NULL) {
+ mutex_exit(&ilbs->ilbs_sticky_list_lock);
+ *num = 0;
+ *flags |= ILB_LIST_END;
+ return (0);
+ }
+ ilbs->ilbs_sticky_list_busy = B_TRUE;
+ mutex_exit(&ilbs->ilbs_sticky_list_lock);
+
+ if (*flags & ILB_LIST_BEGIN) {
+ i = 0;
+ mutex_enter(&hash[0].sticky_lock);
+ curp = list_head(&hash[0].sticky_head);
+ } else if (*flags & ILB_LIST_CONT) {
+ if (ilbs->ilbs_sticky_list_cur == ilbs->ilbs_sticky_hash_size) {
+ *num = 0;
+ *flags |= ILB_LIST_END;
+ goto done;
+ }
+ i = ilbs->ilbs_sticky_list_cur;
+ mutex_enter(&hash[i].sticky_lock);
+ curp = ilbs->ilbs_sticky_list_curp;
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+ j = 0;
+ while (j < *num) {
+ if (curp == NULL) {
+ mutex_exit(&hash[i].sticky_lock);
+ if (++i == ilbs->ilbs_sticky_hash_size) {
+ *flags |= ILB_LIST_END;
+ break;
+ }
+ mutex_enter(&hash[i].sticky_lock);
+ curp = list_head(&hash[i].sticky_head);
+ continue;
+ }
+ (void) strcpy(st[j].rule_name, curp->rule_name);
+ st[j].req_addr = curp->src;
+ st[j].srv_addr = curp->server->iser_addr_v6;
+ st[j].expiry_time = TICK_TO_MSEC(curp->expiry);
+ j++;
+ curp = list_next(&hash[i].sticky_head, curp);
+ }
+ ilbs->ilbs_sticky_list_curp = curp;
+ if (j == *num)
+ mutex_exit(&hash[i].sticky_lock);
+
+ ilbs->ilbs_sticky_list_cur = i;
+
+ *num = j;
+done:
+ mutex_enter(&ilbs->ilbs_sticky_list_lock);
+ ilbs->ilbs_sticky_list_busy = B_FALSE;
+ cv_signal(&ilbs->ilbs_sticky_list_cv);
+ mutex_exit(&ilbs->ilbs_sticky_list_lock);
+
+ return (ret);
+}
diff --git a/usr/src/uts/common/inet/ilb/ilb_conn.h b/usr/src/uts/common/inet/ilb/ilb_conn.h
new file mode 100644
index 0000000000..6be3b7a8d3
--- /dev/null
+++ b/usr/src/uts/common/inet/ilb/ilb_conn.h
@@ -0,0 +1,246 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _INET_ILB_CONN_H
+#define _INET_ILB_CONN_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct ilb_conn_s;
+
+/*
+ * Struct of the conn hash table bucket
+ *
+ * ilb_connp: the first conn hash entry in the bucket
+ * ilb_conn_hash_lock: mutex to protect the list in the bucket
+ * ilb_conn_cnt: number of conn hash entries in this bucket
+ */
+typedef struct ilb_conn_hash_s {
+ struct ilb_conn_s *ilb_connp;
+ kmutex_t ilb_conn_hash_lock;
+ uint32_t ilb_conn_cnt;
+#if defined(_LP64) || defined(_I32LPx)
+ char ilb_conn_hash_pad[44];
+#else
+ char ilb_conn_hash_pad[52];
+#endif
+} ilb_conn_hash_t;
+
+/*
+ * Extracted rule/server info for faster access without holding a reference
+ * to a rule or server.
+ */
+typedef struct ilb_rule_info_s {
+ ilb_topo_impl_t topo;
+ ilb_nat_info_t info;
+} ilb_rule_info_t;
+
+/* Info about a TCP connection for tracking */
+struct ilb_tcp_track {
+ uint32_t ack;
+ uint32_t fss;
+ boolean_t fin_sent;
+ boolean_t fin_acked;
+};
+
+/*
+ * Struct to store NAT info of a connection (one direction)
+ *
+ * conn_daddr: destination address to be matched to find this info
+ * conn_saddr: source address to be matched
+ * conn_dport: destination port to be matched
+ * conn_sport: source port to be matched
+ * conn_ip_sum: IP checksum adjustment for NAT
+ * conn_tp_sum: tranport checksum adjustment for NAT
+ * conn_tcp_track: TCP connection tracking info
+ * conn_atime: last access time of this conn cache
+ * conn_pkt_cnt: packets processed using this conn
+ * conn_next: next conn info (for conn info linked list)
+ * conn_prev: previous conn info (for conn info linked list)
+ * conn_hash: back pointer to the conn hash table bucket
+ */
+struct ilb_conn_info {
+ in6_addr_t conn_daddr;
+ in6_addr_t conn_saddr;
+ in_port_t conn_dport;
+ in_port_t conn_sport;
+ uint32_t conn_ip_sum;
+ uint32_t conn_tp_sum;
+
+ struct ilb_tcp_track conn_tcp_track;
+
+ /* Last access time */
+ int64_t conn_atime;
+ uint64_t conn_pkt_cnt;
+
+ struct ilb_conn_s *conn_next;
+ struct ilb_conn_s *conn_prev;
+ ilb_conn_hash_t *conn_hash;
+};
+
+/*
+ * Struct (an entry in the conn hash table) to store a NAT info of a
+ * connection (both directions, client to server and server to client)
+ *
+ * conn_l4: transport protocol used in this NAT connection
+ * conn_expry: expiry time of this entry
+ * conn_cr_time: creation time of this entry
+ * conn_c2s: client to back end server info
+ * conn_s2c_ back end server to client info
+ * conn_server: pointer to the back end server structure
+ * conn_rule_cache: rule information needed for this entry (copied from
+ * the ilb_rule_t struct)
+ * conn_sticky: pointer to the sticky info of this client, used to do
+ * reference counting on the sticky info.
+ * conn_gc: indicates whether this entry needs to be garbage collected
+ */
+typedef struct ilb_conn_s {
+ int conn_l4;
+
+ int64_t conn_expiry;
+ int64_t conn_cr_time;
+
+ /* Client to server, hash and check info */
+ struct ilb_conn_info conn_c2s;
+#define conn_c2s_daddr conn_c2s.conn_daddr
+#define conn_c2s_saddr conn_c2s.conn_saddr
+#define conn_c2s_dport conn_c2s.conn_dport
+#define conn_c2s_sport conn_c2s.conn_sport
+#define conn_c2s_next conn_c2s.conn_next
+#define conn_c2s_prev conn_c2s.conn_prev
+#define conn_c2s_hash conn_c2s.conn_hash
+#define conn_c2s_atime conn_c2s.conn_atime
+#define conn_c2s_pkt_cnt conn_c2s.conn_pkt_cnt
+#define conn_c2s_ip_sum conn_c2s.conn_ip_sum
+#define conn_c2s_tp_sum conn_c2s.conn_tp_sum
+#define conn_c2s_tcp_ack conn_c2s.conn_tcp_track.ack
+#define conn_c2s_tcp_fss conn_c2s.conn_tcp_track.fss
+#define conn_c2s_tcp_fin_sent conn_c2s.conn_tcp_track.fin_sent
+#define conn_c2s_tcp_fin_acked conn_c2s.conn_tcp_track.fin_acked
+
+ /* Server to client, hash and check info */
+ struct ilb_conn_info conn_s2c;
+#define conn_s2c_daddr conn_s2c.conn_daddr
+#define conn_s2c_saddr conn_s2c.conn_saddr
+#define conn_s2c_dport conn_s2c.conn_dport
+#define conn_s2c_sport conn_s2c.conn_sport
+#define conn_s2c_next conn_s2c.conn_next
+#define conn_s2c_prev conn_s2c.conn_prev
+#define conn_s2c_hash conn_s2c.conn_hash
+#define conn_s2c_atime conn_s2c.conn_atime
+#define conn_s2c_pkt_cnt conn_s2c.conn_pkt_cnt
+#define conn_s2c_ip_sum conn_s2c.conn_ip_sum
+#define conn_s2c_tp_sum conn_s2c.conn_tp_sum
+#define conn_s2c_tcp_ack conn_s2c.conn_tcp_track.ack
+#define conn_s2c_tcp_fss conn_s2c.conn_tcp_track.fss
+#define conn_s2c_tcp_fin_sent conn_s2c.conn_tcp_track.fin_sent
+#define conn_s2c_tcp_fin_acked conn_s2c.conn_tcp_track.fin_acked
+
+ ilb_server_t *conn_server;
+ ilb_rule_info_t conn_rule_cache;
+
+ /*
+ * If the rule is sticky enabled, all ilb_conn_t created from this
+ * rule will have conn_sticky set to the ilb_sticky_t entry. Otherwise
+ * conn_sticky is NULL.
+ */
+ struct ilb_sticky_s *conn_sticky;
+
+ boolean_t conn_gc;
+} ilb_conn_t;
+
+/*
+ * Struct of the sticky hash table bucket
+ *
+ * sticky_head: the sticky hash list of this bucket
+ * sticky_lock: mutex to protect the list
+ * sticki_cnt: number of sticky hash entries in this bucket
+ */
+typedef struct ilb_sticky_hash_s {
+ list_t sticky_head;
+ kmutex_t sticky_lock;
+ uint32_t sticky_cnt;
+#if defined(_LP64) || defined(_I32LPx)
+ char sticky_pad[20];
+#else
+ char sticky_pad[36];
+#endif
+} ilb_sticky_hash_t;
+
+/*
+ * Struct to store sticky info of a client.
+ *
+ * rule_instance: the rule instance for this entry, for look up purpose
+ * rule_name: the rule name for this entry
+ * server: the back end server for this client
+ * src: the client source address
+ * expire: the expiry time of this entry
+ * atime: the last access time of this entry
+ * nat_src_idx: the index to the NAT source array for this client
+ * refcnt: reference count
+ * list: linked list node
+ * hash: back pointer to the sticky hash buckey of this entry
+ */
+typedef struct ilb_sticky_s {
+ uint_t rule_instance;
+ char rule_name[ILB_RULE_NAMESZ];
+ ilb_server_t *server;
+ in6_addr_t src;
+ int64_t expiry;
+ int64_t atime;
+ int nat_src_idx;
+
+ uint32_t refcnt;
+ list_node_t list;
+ ilb_sticky_hash_t *hash;
+} ilb_sticky_t;
+
+extern void ilb_conn_hash_init(ilb_stack_t *);
+extern void ilb_conn_hash_fini(ilb_stack_t *);
+extern void ilb_conn_cache_fini(void);
+extern void ilb_sticky_hash_init(ilb_stack_t *);
+extern void ilb_sticky_hash_fini(ilb_stack_t *);
+extern void ilb_sticky_cache_fini(void);
+
+extern boolean_t ilb_check_conn(ilb_stack_t *, int, void *, int, void *,
+ in6_addr_t *, in6_addr_t *, in_port_t, in_port_t, uint32_t, in6_addr_t *);
+extern boolean_t ilb_check_icmp_conn(ilb_stack_t *, mblk_t *, int, void *,
+ void *, in6_addr_t *);
+extern int ilb_conn_add(ilb_stack_t *, ilb_rule_t *, ilb_server_t *,
+ in6_addr_t *, in_port_t, in6_addr_t *, in_port_t, ilb_nat_info_t *,
+ uint32_t *, uint32_t *, struct ilb_sticky_s *);
+
+extern ilb_server_t *ilb_sticky_find_add(ilb_stack_t *, ilb_rule_t *,
+ in6_addr_t *, ilb_server_t *, struct ilb_sticky_s **, uint16_t *);
+void ilb_sticky_refrele(struct ilb_sticky_s *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _INET_ILB_CONN_H */
diff --git a/usr/src/uts/common/inet/ilb/ilb_impl.h b/usr/src/uts/common/inet/ilb/ilb_impl.h
new file mode 100644
index 0000000000..226aa77d81
--- /dev/null
+++ b/usr/src/uts/common/inet/ilb/ilb_impl.h
@@ -0,0 +1,286 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _INET_ILB_IMPL_H
+#define _INET_ILB_IMPL_H
+
+#include <sys/types.h>
+#include <sys/kstat.h>
+#include <sys/netstack.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Statistics in ILB is stored in several kstat structures. ilb_g_kstat
+ * represents the global statistics. ilb_rule_kstat represents the statistics
+ * of a rule. ilb_server_kstat represents the statistics of a server.
+ */
+#define ILB_KSTAT_MOD_NAME "ilb"
+
+typedef struct ilb_g_kstat_s {
+ kstat_named_t num_rules; /* Number of rules */
+ kstat_named_t ip_frag_in; /* Number of input fragments */
+ kstat_named_t ip_frag_dropped; /* Number of fragments dropped */
+} ilb_g_kstat_t;
+
+#define ILB_KSTAT_UPDATE(ilbs, x, y) \
+{ \
+ DTRACE_PROBE1(ilb__g__kstat__##x, ilb_stack_t *, \
+ (ilbs)); \
+ ((ilbs)->ilbs_kstat->x.value.ui64 += (y)); \
+}
+
+typedef struct ilb_rule_kstat {
+ kstat_named_t num_servers; /* Number of back end servers */
+ kstat_named_t bytes_not_processed; /* Num of bytes not processed. */
+ kstat_named_t pkt_not_processed; /* Num of packets not processed. */
+ kstat_named_t bytes_dropped; /* Number of bytes dropped */
+ kstat_named_t pkt_dropped; /* Number of packets dropped */
+ kstat_named_t nomem_bytes_dropped; /* Bytes dropped due to nomem */
+ kstat_named_t nomem_pkt_dropped; /* Packets dropped due to nomem */
+ kstat_named_t noport_bytes_dropped; /* No NAT sport bytes drop */
+ kstat_named_t noport_pkt_dropped; /* No NAT sport packet drop */
+ kstat_named_t icmp_echo_processed; /* No of ICMP echo processed */
+ kstat_named_t icmp_dropped; /* No of ICMP packets dropped */
+ kstat_named_t icmp_2big_processed; /* No of ICMP 2big processed */
+ kstat_named_t icmp_2big_dropped; /* No of ICMP 2big dropped */
+} ilb_rule_kstat_t;
+
+#define ILB_R_KSTAT(rule, x) \
+{ \
+ DTRACE_PROBE1(ilb__r__kstat__##x, ilb_rule_t *, \
+ (rule)); \
+ ((rule)->ir_kstat.x.value.ui64++); \
+}
+#define ILB_R_KSTAT_UPDATE(rule, x, y) \
+{ \
+ DTRACE_PROBE1(ilb__r__kstat__##x, ilb_rule_t *, \
+ (rule)); \
+ ((rule)->ir_kstat.x.value.ui64 += (y)); \
+}
+
+typedef struct ilb_server_kstat {
+ kstat_named_t bytes_processed; /* Number of bytes processed */
+ kstat_named_t pkt_processed; /* Number of packets processed */
+ kstat_named_t ip_address; /* IP address of the server */
+} ilb_server_kstat_t;
+
+#define ILB_S_KSTAT(host, x) \
+{ \
+ DTRACE_PROBE1(ilb__s__kstat__##x, ilb_server_t *, \
+ (host)); \
+ ((host)->iser_kstat.x.value.ui64++); \
+}
+#define ILB_S_KSTAT_UPDATE(host, x, y) \
+{ \
+ DTRACE_PROBE1(ilb__s__kstat__##x, ilb_server_t *, \
+ (host)); \
+ ((host)->iser_kstat.x.value.ui64 += (y)); \
+}
+
+/* The maximum port range, meaning all ports (65535 - 1). */
+#define ILB_ALL_PORTS_RANGE 65534
+
+struct ilb_nat_src_s;
+
+/*
+ * This structure reprensents a server.
+ */
+typedef struct ilb_server_s {
+ in6_addr_t iser_addr_v6;
+ in6_addr_t iser_prefix_v6;
+#define iser_addr_v4 iser_addr_v6.s6_addr32[3]
+#define iser_prefix_v4 iser_prefix_v6.s6_addr32[3]
+
+ boolean_t iser_port_range;
+ in_port_t iser_min_port; /* In host byte order */
+ in_port_t iser_max_port;
+
+ char iser_name[ILB_SERVER_NAMESZ];
+ char iser_ip_addr[INET6_ADDRSTRLEN];
+ netstackid_t iser_stackid;
+ kstat_t *iser_ksp;
+ ilb_server_kstat_t iser_kstat;
+ struct ilb_server_s *iser_next;
+
+ boolean_t iser_enabled;
+ kmutex_t iser_lock;
+ kcondvar_t iser_cv;
+ uint64_t iser_refcnt;
+
+ int64_t iser_die_time;
+
+ struct ilb_nat_src_s *iser_nat_src;
+} ilb_server_t;
+
+#define ILB_SERVER_REFHOLD(host) \
+{ \
+ mutex_enter(&(host)->iser_lock); \
+ (host)->iser_refcnt++; \
+ ASSERT((host)->iser_refcnt != 1); \
+ mutex_exit(&(host)->iser_lock); \
+}
+
+#define ILB_SERVER_REFRELE(host) \
+{ \
+ mutex_enter(&(host)->iser_lock); \
+ (host)->iser_refcnt--; \
+ if ((host)->iser_refcnt == 1) \
+ cv_signal(&(host)->iser_cv); \
+ mutex_exit(&(host)->iser_lock); \
+}
+
+struct ilb_rule_s;
+struct ilb_hash_s;
+
+typedef struct ilb_alg_data_s {
+ boolean_t (*ilb_alg_lb)(in6_addr_t *, in_port_t, in6_addr_t *,
+ in_port_t, void *, ilb_server_t **);
+ int (*ilb_alg_server_add)(ilb_server_t *, void *);
+ int (*ilb_alg_server_del)(ilb_server_t *, void *);
+ int (*ilb_alg_server_enable)(ilb_server_t *, void *);
+ int (*ilb_alg_server_disable)(ilb_server_t *, void *);
+ void (*ilb_alg_fini)(struct ilb_alg_data_s **);
+
+ void *ilb_alg_data;
+} ilb_alg_data_t;
+
+/*
+ * A load balance rule has
+ *
+ * 1. a name
+ * 2. a network protocol
+ * 3. a transport protocol
+ * 4. a load balance mechanism (DSR, NAT, ...)
+ * 5. a target address (VIP)
+ * 6. a target port (or port ranges)
+ * 7. a pool of back end servers
+ * 8. a load balance algorithm (round robin, hashing, ...)
+ */
+typedef struct ilb_rule_s {
+ char ir_name[ILB_RULE_NAMESZ];
+ uint8_t ir_ipver;
+ uint8_t ir_proto;
+ ilb_topo_impl_t ir_topo;
+ zoneid_t ir_zoneid;
+ uint32_t ir_flags;
+
+ in6_addr_t ir_target_v6;
+#define ir_target_v4 ir_target_v6.s6_addr32[3]
+ in6_addr_t ir_prefix_v6;
+#define ir_target_prefix_v4 ir_prefix_v6.s6_addr32[3]
+
+ boolean_t ir_port_range;
+ in_port_t ir_min_port; /* In host byte order */
+ in_port_t ir_max_port;
+
+ ilb_server_t *ir_servers;
+
+ uint32_t ir_nat_expiry;
+ uint32_t ir_conn_drain_timeout;
+ in6_addr_t ir_nat_src_start;
+ in6_addr_t ir_nat_src_end;
+
+ boolean_t ir_sticky;
+ in6_addr_t ir_sticky_mask;
+ uint32_t ir_sticky_expiry;
+
+ struct ilb_rule_s *ir_next;
+
+ struct ilb_rule_s *ir_hash_next;
+ struct ilb_rule_s *ir_hash_prev;
+ struct ilb_hash_s *ir_hash;
+
+ ilb_algo_impl_t ir_alg_type;
+ ilb_alg_data_t *ir_alg;
+
+ kstat_t *ir_ksp;
+ ilb_rule_kstat_t ir_kstat;
+ uint_t ir_ks_instance;
+
+ kmutex_t ir_lock;
+ kcondvar_t ir_cv;
+ uint32_t ir_refcnt;
+} ilb_rule_t;
+
+#define ILB_RULE_REFHOLD(rule) \
+{ \
+ mutex_enter(&(rule)->ir_lock); \
+ (rule)->ir_refcnt++; \
+ ASSERT((rule)->ir_refcnt != 1); \
+ mutex_exit(&(rule)->ir_lock); \
+}
+
+#define ILB_RULE_REFRELE(rule) \
+{ \
+ mutex_enter(&(rule)->ir_lock); \
+ ASSERT((rule)->ir_refcnt >= 2); \
+ if (--(rule)->ir_refcnt <= 2) \
+ cv_signal(&(rule)->ir_cv); \
+ mutex_exit(&(rule)->ir_lock); \
+}
+
+
+typedef struct ilb_hash_s {
+ ilb_rule_t *ilb_hash_rule;
+ kmutex_t ilb_hash_lock;
+#if defined(_LP64) || defined(_I32LPx)
+ char ilb_hash_pad[48];
+#else
+ char ilb_hash_pad[56];
+#endif
+} ilb_hash_t;
+
+struct ilb_nat_src_entry_s;
+
+/*
+ * Structure to store NAT info.
+ *
+ * Half NAT only uses the first 4 fields in the structure.
+ */
+typedef struct {
+ in6_addr_t vip;
+ in6_addr_t nat_dst;
+ in_port_t dport;
+ in_port_t nat_dport;
+
+ in6_addr_t src;
+ in6_addr_t nat_src;
+ in_port_t sport;
+ in_port_t nat_sport;
+
+ struct ilb_nat_src_entry_s *src_ent;
+} ilb_nat_info_t;
+
+extern int ilb_kmem_flags;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _INET_ILB_IMPL_H */
diff --git a/usr/src/uts/common/inet/ilb/ilb_nat.c b/usr/src/uts/common/inet/ilb/ilb_nat.c
new file mode 100644
index 0000000000..0be473fb12
--- /dev/null
+++ b/usr/src/uts/common/inet/ilb/ilb_nat.c
@@ -0,0 +1,609 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/cmn_err.h>
+#include <sys/crc32.h>
+#include <netinet/in.h>
+#include <inet/ip.h>
+#include <inet/ip6.h>
+#include <inet/tcp.h>
+#include <inet/udp_impl.h>
+#include <inet/ilb.h>
+
+#include "ilb_impl.h"
+#include "ilb_stack.h"
+#include "ilb_nat.h"
+
+/*
+ * NAT source entry garbarge collection timeout. The actual timeout value
+ * includes a random jitter bounded by the ILB_NAT_SRC_TIMEOUT_JITTER.
+ */
+#define ILB_NAT_SRC_TIMEOUT 30
+#define ILB_NAT_SRC_TIMEOUT_JITTER 5
+
+/* key1/2 are assumed to be uint32_t. */
+#define ILB_NAT_SRC_HASH(hash, key1, key2, hash_size) \
+{ \
+ CRC32((hash), (key1), sizeof (uint32_t), -1U, crc32_table); \
+ CRC32((hash), (key2), sizeof (uint32_t), (hash), crc32_table); \
+ (hash) %= (hash_size); \
+}
+
+/* NAT source port space instance number. */
+static uint32_t ilb_nat_src_instance = 0;
+
+static void
+incr_addr(in6_addr_t *a)
+{
+ uint32_t i;
+
+ i = ntohl(a->s6_addr32[3]);
+ if (IN6_IS_ADDR_V4MAPPED(a)) {
+ a->s6_addr32[3] = htonl(++i);
+ ASSERT(i != 0);
+ return;
+ }
+
+ if (++i != 0) {
+ a->s6_addr32[3] = htonl(i);
+ return;
+ }
+ a->s6_addr32[3] = 0;
+ i = ntohl(a->s6_addr[2]);
+ if (++i != 0) {
+ a->s6_addr32[2] = htonl(i);
+ return;
+ }
+ a->s6_addr32[2] = 0;
+ i = ntohl(a->s6_addr[1]);
+ if (++i != 0) {
+ a->s6_addr32[1] = htonl(i);
+ return;
+ }
+ a->s6_addr32[1] = 0;
+ i = ntohl(a->s6_addr[0]);
+ a->s6_addr[0] = htonl(++i);
+ ASSERT(i != 0);
+}
+
+/*
+ * When ILB does full NAT, it first picks one source address from the rule's
+ * specified NAT source address list (currently done in round robin fashion).
+ * Then it needs to allocate a port. This source port must make the tuple
+ * (source address:source port:destination address:destination port)
+ * unique. The destination part of the tuple is determined by the back
+ * end server, and could not be changed.
+ *
+ * To handle the above source port number allocation, ILB sets up a table
+ * of entries identified by source address:back end server address:server port
+ * tuple. This table is used by all rules for NAT source port allocation.
+ * Each tuple has an associated vmem arena used for managing the NAT source
+ * port space between the source address and back end server address/port.
+ * Each back end server (ilb_server_t) has an array of pointers (iser_nat_src)
+ * to the different entries in this table for NAT source port allocation.
+ * When ILB needs to allocate a NAT source address and port to talk to a back
+ * end server, it picks a source address and uses the array pointer to get
+ * to an entry. Then it calls vmem_alloc() on the associated vmem arena to
+ * find an unused port.
+ *
+ * When a back end server is added, ILB sets up the aforementioned array.
+ * For each source address specified in the rule, ILB checks if there is any
+ * existing entry which matches this source address:back end server address:
+ * port tuple. The server port is either a specific port or 0 (meaning wild
+ * card port). Normally, a back end server uses the same port as in the rule.
+ * If a back end server is used to serve two different rules, there will be
+ * two different ports. Source port allocation for these two rules do not
+ * conflict, hence we can use two vmem arenas (two different entries in the
+ * table). But if a server uses port range in one rule, we will treat it as
+ * a wild card port. Wild card poart matches with any port. If this server
+ * is used to serve more than one rules and those rules use the same set of
+ * NAT source addresses, this means that they must share the same set of vmem
+ * arenas (source port spaces). We do this for simplicity reason. If not,
+ * we need to partition the port range so that we can identify different forms
+ * of source port number collision.
+ */
+
+/*
+ * NAT source address initialization routine.
+ */
+void
+ilb_nat_src_init(ilb_stack_t *ilbs)
+{
+ int i;
+
+ ilbs->ilbs_nat_src_tid = timeout(ilb_nat_src_timer, ilbs,
+ SEC_TO_TICK(ILB_NAT_SRC_TIMEOUT +
+ gethrtime() % ILB_NAT_SRC_TIMEOUT_JITTER));
+ ilbs->ilbs_nat_src = kmem_zalloc(sizeof (ilb_nat_src_hash_t) *
+ ilbs->ilbs_nat_src_hash_size, KM_SLEEP);
+ for (i = 0; i < ilbs->ilbs_nat_src_hash_size; i++) {
+ list_create(&ilbs->ilbs_nat_src[i].nsh_head,
+ sizeof (ilb_nat_src_entry_t),
+ offsetof(ilb_nat_src_entry_t, nse_link));
+ mutex_init(&ilbs->ilbs_nat_src[i].nsh_lock, NULL,
+ MUTEX_DEFAULT, NULL);
+ }
+}
+
+/*
+ * NAT source address clean up routine.
+ */
+void
+ilb_nat_src_fini(ilb_stack_t *ilbs)
+{
+ ilb_nat_src_entry_t *cur;
+ timeout_id_t tid;
+ int i;
+
+ /*
+ * By setting ilbs_nat_src_tid to 0, the timer handler will not
+ * restart the timer.
+ */
+ mutex_enter(&ilbs->ilbs_nat_src_lock);
+ tid = ilbs->ilbs_nat_src_tid;
+ ilbs->ilbs_nat_src_tid = 0;
+ mutex_exit(&ilbs->ilbs_nat_src_lock);
+ if (tid != 0)
+ (void) untimeout(tid);
+
+ mutex_destroy(&ilbs->ilbs_nat_src_lock);
+
+ for (i = 0; i < ilbs->ilbs_nat_src_hash_size; i++) {
+ while ((cur = list_remove_head(&ilbs->ilbs_nat_src[i].nsh_head))
+ != NULL) {
+ vmem_destroy(cur->nse_port_arena);
+ kmem_free(cur, sizeof (ilb_nat_src_entry_t));
+ }
+ mutex_destroy(&ilbs->ilbs_nat_src[i].nsh_lock);
+ }
+
+ kmem_free(ilbs->ilbs_nat_src, sizeof (ilb_nat_src_hash_t) *
+ ilbs->ilbs_nat_src_hash_size);
+ ilbs->ilbs_nat_src = NULL;
+}
+
+/* An arena name is "ilb_ns" + "_xxxxxxxxxx" */
+#define ARENA_NAMESZ 18
+#define NAT_PORT_START 4096
+#define NAT_PORT_SIZE 65535 - NAT_PORT_START
+
+/*
+ * Check if the NAT source and back end server pair ilb_nat_src_entry_t
+ * exists. If it does, increment the refcnt and return it. If not, create
+ * one and return it.
+ */
+static ilb_nat_src_entry_t *
+ilb_find_nat_src(ilb_stack_t *ilbs, const in6_addr_t *nat_src,
+ const in6_addr_t *serv_addr, in_port_t port)
+{
+ ilb_nat_src_entry_t *tmp;
+ uint32_t idx;
+ char arena_name[ARENA_NAMESZ];
+ list_t *head;
+
+ ILB_NAT_SRC_HASH(idx, &nat_src->s6_addr32[3], &serv_addr->s6_addr32[3],
+ ilbs->ilbs_nat_src_hash_size);
+ mutex_enter(&ilbs->ilbs_nat_src[idx].nsh_lock);
+ head = &ilbs->ilbs_nat_src[idx].nsh_head;
+ for (tmp = list_head(head); tmp != NULL; tmp = list_next(head, tmp)) {
+ if (IN6_ARE_ADDR_EQUAL(&tmp->nse_src_addr, nat_src) &&
+ IN6_ARE_ADDR_EQUAL(&tmp->nse_serv_addr, serv_addr) &&
+ (port == tmp->nse_port || port == 0 ||
+ tmp->nse_port == 0)) {
+ break;
+ }
+ }
+ /* Found one, return it. */
+ if (tmp != NULL) {
+ tmp->nse_refcnt++;
+ mutex_exit(&ilbs->ilbs_nat_src[idx].nsh_lock);
+ return (tmp);
+ }
+
+ tmp = kmem_alloc(sizeof (ilb_nat_src_entry_t), KM_NOSLEEP);
+ if (tmp == NULL) {
+ mutex_exit(&ilbs->ilbs_nat_src[idx].nsh_lock);
+ return (NULL);
+ }
+ tmp->nse_src_addr = *nat_src;
+ tmp->nse_serv_addr = *serv_addr;
+ tmp->nse_port = port;
+ tmp->nse_nsh_lock = &ilbs->ilbs_nat_src[idx].nsh_lock;
+ tmp->nse_refcnt = 1;
+
+ (void) snprintf(arena_name, ARENA_NAMESZ, "ilb_ns_%u",
+ atomic_add_32_nv(&ilb_nat_src_instance, 1));
+ if ((tmp->nse_port_arena = vmem_create(arena_name,
+ (void *)NAT_PORT_START, NAT_PORT_SIZE, 1, NULL, NULL, NULL, 1,
+ VM_SLEEP | VMC_IDENTIFIER)) == NULL) {
+ kmem_free(tmp, sizeof (*tmp));
+ return (NULL);
+ }
+
+ list_insert_tail(head, tmp);
+ mutex_exit(&ilbs->ilbs_nat_src[idx].nsh_lock);
+
+ return (tmp);
+}
+
+/*
+ * Create ilb_nat_src_t struct for a ilb_server_t struct.
+ */
+int
+ilb_create_nat_src(ilb_stack_t *ilbs, ilb_nat_src_t **nat_src,
+ const in6_addr_t *srv_addr, in_port_t port, const in6_addr_t *start,
+ int num)
+{
+ ilb_nat_src_t *src;
+ in6_addr_t cur_addr;
+ int i;
+
+ if ((src = kmem_zalloc(sizeof (ilb_nat_src_t), KM_NOSLEEP)) == NULL) {
+ *nat_src = NULL;
+ return (ENOMEM);
+ }
+ cur_addr = *start;
+ for (i = 0; i < num && i < ILB_MAX_NAT_SRC; i++) {
+ src->src_list[i] = ilb_find_nat_src(ilbs, &cur_addr, srv_addr,
+ port);
+ if (src->src_list[i] == NULL) {
+ ilb_destroy_nat_src(&src);
+ *nat_src = NULL;
+ return (ENOMEM);
+ }
+ incr_addr(&cur_addr);
+ /*
+ * Increment num_src here so that we can call
+ * ilb_destroy_nat_src() when we need to do cleanup.
+ */
+ src->num_src++;
+ }
+ *nat_src = src;
+ return (0);
+}
+
+/*
+ * Timer routine for garbage collecting unneeded NAT source entry. We
+ * don't use a taskq for this since the table should be relatively small
+ * and should be OK for a timer to handle.
+ */
+void
+ilb_nat_src_timer(void *arg)
+{
+ ilb_stack_t *ilbs = (ilb_stack_t *)arg;
+ ilb_nat_src_entry_t *cur, *tmp;
+ list_t *head;
+ int i;
+
+ for (i = 0; i < ilbs->ilbs_nat_src_hash_size; i++) {
+ mutex_enter(&ilbs->ilbs_nat_src[i].nsh_lock);
+ head = &ilbs->ilbs_nat_src[i].nsh_head;
+ cur = list_head(head);
+ while (cur != NULL) {
+ /*
+ * When a server is removed, it will release its
+ * reference on an entry. But there may still be
+ * conn using some ports. So check the size also.
+ */
+ if (cur->nse_refcnt != 0 ||
+ vmem_size(cur->nse_port_arena, VMEM_ALLOC) != 0) {
+ cur = list_next(head, cur);
+ continue;
+ }
+ tmp = cur;
+ cur = list_next(head, cur);
+ list_remove(head, tmp);
+ vmem_destroy(tmp->nse_port_arena);
+ kmem_free(tmp, sizeof (ilb_nat_src_entry_t));
+ }
+ mutex_exit(&ilbs->ilbs_nat_src[i].nsh_lock);
+ }
+
+ mutex_enter(&ilbs->ilbs_nat_src_lock);
+ if (ilbs->ilbs_nat_src_tid == 0) {
+ mutex_exit(&ilbs->ilbs_nat_src_lock);
+ } else {
+ ilbs->ilbs_nat_src_tid = timeout(ilb_nat_src_timer, ilbs,
+ SEC_TO_TICK(ILB_NAT_SRC_TIMEOUT +
+ gethrtime() % ILB_NAT_SRC_TIMEOUT_JITTER));
+ mutex_exit(&ilbs->ilbs_nat_src_lock);
+ }
+}
+
+/*
+ * Destroy a given ilb_nat_src_t struct. It will also release the reference
+ * hold on all its ilb_nat_src_entry_t.
+ */
+void
+ilb_destroy_nat_src(ilb_nat_src_t **nat_src)
+{
+ int i, size;
+ ilb_nat_src_t *src;
+ ilb_nat_src_entry_t *entry;
+
+ src = *nat_src;
+ if (src == NULL)
+ return;
+ size = src->num_src;
+ /*
+ * Set each entry to be condemned and the garbarge collector will
+ * clean them up.
+ */
+ for (i = 0; i < size; i++) {
+ entry = src->src_list[i];
+ mutex_enter(entry->nse_nsh_lock);
+ entry->nse_refcnt--;
+ mutex_exit(entry->nse_nsh_lock);
+ }
+ kmem_free(src, sizeof (ilb_nat_src_t));
+ *nat_src = NULL;
+}
+
+/*
+ * Given a backend server address and its ilb_nat_src_t, allocate a source
+ * address and port for NAT usage.
+ */
+ilb_nat_src_entry_t *
+ilb_alloc_nat_addr(ilb_nat_src_t *src, in6_addr_t *addr, in_port_t *port,
+ uint16_t *nat_src_idx)
+{
+ int i, try, size;
+ in_port_t p;
+
+ size = src->num_src;
+ /* Increment of cur does not need to be atomic. It is just a hint. */
+ if (nat_src_idx == NULL)
+ i = (++src->cur) % size;
+ else
+ i = *nat_src_idx;
+
+ for (try = 0; try < size; try++) {
+ p = (in_port_t)(uintptr_t)vmem_alloc(
+ src->src_list[i]->nse_port_arena, 1, VM_NOSLEEP);
+ if (p != 0)
+ break;
+ /*
+ * If an index is given and we cannot allocate a port using
+ * that entry, return NULL.
+ */
+ if (nat_src_idx != NULL)
+ return (NULL);
+ i = (i + 1) % size;
+ }
+ if (try == size)
+ return (NULL);
+ *addr = src->src_list[i]->nse_src_addr;
+ *port = htons(p);
+ return (src->src_list[i]);
+}
+
+/*
+ * Use the pre-calculated checksum to adjust the checksum of a packet after
+ * NAT.
+ */
+static void
+adj_cksum(uint16_t *chksum, uint32_t adj_sum)
+{
+ adj_sum += (uint16_t)~(*chksum);
+ while ((adj_sum >> 16) != 0)
+ adj_sum = (adj_sum & 0xffff) + (adj_sum >> 16);
+ *chksum = (uint16_t)~adj_sum;
+}
+
+/* Do full NAT (replace both source and desination info) on a packet. */
+void
+ilb_full_nat(int l3, void *iph, int l4, void *tph, ilb_nat_info_t *info,
+ uint32_t adj_ip_sum, uint32_t adj_tp_sum, boolean_t c2s)
+{
+ in_port_t *orig_sport, *orig_dport;
+ uint16_t *tp_cksum;
+
+ switch (l4) {
+ case IPPROTO_TCP:
+ orig_sport = &((tcpha_t *)tph)->tha_lport;
+ orig_dport = &((tcpha_t *)tph)->tha_fport;
+ tp_cksum = &((tcpha_t *)tph)->tha_sum;
+ break;
+ case IPPROTO_UDP:
+ orig_sport = &((udpha_t *)tph)->uha_src_port;
+ orig_dport = &((udpha_t *)tph)->uha_dst_port;
+ tp_cksum = &((udpha_t *)tph)->uha_checksum;
+ break;
+ default:
+ ASSERT(0);
+ return;
+ }
+
+ switch (l3) {
+ case IPPROTO_IP: {
+ ipha_t *ipha;
+
+ ipha = iph;
+ if (c2s) {
+ IN6_V4MAPPED_TO_IPADDR(&info->nat_src,
+ ipha->ipha_src);
+ IN6_V4MAPPED_TO_IPADDR(&info->nat_dst,
+ ipha->ipha_dst);
+ *orig_sport = info->nat_sport;
+ *orig_dport = info->nat_dport;
+ } else {
+ IN6_V4MAPPED_TO_IPADDR(&info->vip, ipha->ipha_src);
+ IN6_V4MAPPED_TO_IPADDR(&info->src, ipha->ipha_dst);
+ *orig_sport = info->dport;
+ *orig_dport = info->sport;
+ }
+ adj_cksum(&ipha->ipha_hdr_checksum, adj_ip_sum);
+ adj_cksum(tp_cksum, adj_tp_sum);
+ break;
+ }
+ case IPPROTO_IPV6: {
+ ip6_t *ip6h;
+
+ ip6h = iph;
+ if (c2s) {
+ ip6h->ip6_src = info->nat_src;
+ ip6h->ip6_dst = info->nat_dst;
+ *orig_sport = info->nat_sport;
+ *orig_dport = info->nat_dport;
+ } else {
+ ip6h->ip6_src = info->vip;
+ ip6h->ip6_dst = info->src;
+ *orig_sport = info->dport;
+ *orig_dport = info->sport;
+ }
+ /* No checksum for IPv6 header */
+ adj_cksum(tp_cksum, adj_tp_sum);
+ break;
+ }
+ default:
+ ASSERT(0);
+ break;
+ }
+}
+
+/* Do half NAT (only replace the destination info) on a packet. */
+void
+ilb_half_nat(int l3, void *iph, int l4, void *tph, ilb_nat_info_t *info,
+ uint32_t adj_ip_sum, uint32_t adj_tp_sum, boolean_t c2s)
+{
+ in_port_t *orig_port;
+ uint16_t *tp_cksum;
+
+ switch (l4) {
+ case IPPROTO_TCP:
+ if (c2s)
+ orig_port = &((tcpha_t *)tph)->tha_fport;
+ else
+ orig_port = &((tcpha_t *)tph)->tha_lport;
+ tp_cksum = &((tcpha_t *)tph)->tha_sum;
+ break;
+ case IPPROTO_UDP:
+ if (c2s)
+ orig_port = &((udpha_t *)tph)->uha_dst_port;
+ else
+ orig_port = &((udpha_t *)tph)->uha_src_port;
+ tp_cksum = &((udpha_t *)tph)->uha_checksum;
+ break;
+ default:
+ ASSERT(0);
+ return;
+ }
+
+ switch (l3) {
+ case IPPROTO_IP: {
+ ipha_t *ipha;
+
+ ipha = iph;
+ if (c2s) {
+ IN6_V4MAPPED_TO_IPADDR(&info->nat_dst,
+ ipha->ipha_dst);
+ *orig_port = info->nat_dport;
+ } else {
+ IN6_V4MAPPED_TO_IPADDR(&info->vip, ipha->ipha_src);
+ *orig_port = info->dport;
+ }
+ adj_cksum(&ipha->ipha_hdr_checksum, adj_ip_sum);
+ adj_cksum(tp_cksum, adj_tp_sum);
+ break;
+ }
+ case IPPROTO_IPV6: {
+ ip6_t *ip6h;
+
+ ip6h = iph;
+ if (c2s) {
+ ip6h->ip6_dst = info->nat_dst;
+ *orig_port = info->nat_dport;
+ } else {
+ ip6h->ip6_src = info->vip;
+ *orig_port = info->dport;
+ }
+ /* No checksum for IPv6 header */
+ adj_cksum(tp_cksum, adj_tp_sum);
+ break;
+ }
+ default:
+ ASSERT(0);
+ break;
+ }
+}
+
+/* Calculate the IPv6 pseudo checksum, used for ICMPv6 NAT. */
+uint32_t
+ilb_pseudo_sum_v6(ip6_t *ip6h, uint8_t nxt_hdr)
+{
+ uint32_t sum;
+ uint16_t *cur;
+
+ cur = (uint16_t *)&ip6h->ip6_src;
+ sum = cur[0] + cur[1] + cur[2] + cur[3] + cur[4] + cur[5] + cur[6] +
+ cur[7] + cur[8] + cur[9] + cur[10] + cur[11] + cur[12] + cur[13] +
+ cur[14] + cur[15] + htons(nxt_hdr);
+ return ((sum & 0xffff) + (sum >> 16));
+}
+
+/* Do NAT on an ICMPv4 packet. */
+void
+ilb_nat_icmpv4(mblk_t *mp, ipha_t *out_iph, icmph_t *icmph, ipha_t *in_iph,
+ in_port_t *sport, in_port_t *dport, ilb_nat_info_t *info, uint32_t sum,
+ boolean_t full_nat)
+{
+ if (full_nat) {
+ IN6_V4MAPPED_TO_IPADDR(&info->nat_src, out_iph->ipha_src);
+ IN6_V4MAPPED_TO_IPADDR(&info->nat_src, in_iph->ipha_dst);
+ *dport = info->nat_sport;
+ }
+ IN6_V4MAPPED_TO_IPADDR(&info->nat_dst, out_iph->ipha_dst);
+ adj_cksum(&out_iph->ipha_hdr_checksum, sum);
+ IN6_V4MAPPED_TO_IPADDR(&info->nat_dst, in_iph->ipha_src);
+ *sport = info->nat_dport;
+
+ icmph->icmph_checksum = 0;
+ icmph->icmph_checksum = IP_CSUM(mp, IPH_HDR_LENGTH(out_iph), 0);
+}
+
+/* Do NAT on an ICMPv6 packet. */
+void
+ilb_nat_icmpv6(mblk_t *mp, ip6_t *out_ip6h, icmp6_t *icmp6h, ip6_t *in_ip6h,
+ in_port_t *sport, in_port_t *dport, ilb_nat_info_t *info,
+ boolean_t full_nat)
+{
+ int hdr_len;
+
+ if (full_nat) {
+ out_ip6h->ip6_src = info->nat_src;
+ in_ip6h->ip6_dst = info->nat_src;
+ *dport = info->nat_sport;
+ }
+ out_ip6h->ip6_dst = info->nat_dst;
+ in_ip6h->ip6_src = info->nat_dst;
+ *sport = info->nat_dport;
+
+ icmp6h->icmp6_cksum = out_ip6h->ip6_plen;
+ hdr_len = (char *)icmp6h - (char *)out_ip6h;
+ icmp6h->icmp6_cksum = IP_CSUM(mp, hdr_len,
+ ilb_pseudo_sum_v6(out_ip6h, IPPROTO_ICMPV6));
+}
diff --git a/usr/src/uts/common/inet/ilb/ilb_nat.h b/usr/src/uts/common/inet/ilb/ilb_nat.h
new file mode 100644
index 0000000000..6adaf965ec
--- /dev/null
+++ b/usr/src/uts/common/inet/ilb/ilb_nat.h
@@ -0,0 +1,93 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _INET_ILB_NAT_H
+#define _INET_ILB_NAT_H
+
+#include <sys/vmem.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Maximum number of NAT source address of a rule. */
+#define ILB_MAX_NAT_SRC 10
+
+/* NAT source address hash table. */
+typedef struct ilb_nat_src_hash_s {
+ list_t nsh_head;
+ kmutex_t nsh_lock;
+ char nsh_pad[64 - sizeof (list_t) - sizeof (kmutex_t)];
+} ilb_nat_src_hash_t;
+
+/*
+ * NAT source entry. Hold the port space for a source addr/back end server
+ * pair.
+ */
+typedef struct ilb_nat_src_entry_s {
+ in6_addr_t nse_src_addr;
+ in6_addr_t nse_serv_addr;
+ in_port_t nse_port;
+ vmem_t *nse_port_arena;
+ uint32_t nse_refcnt;
+ kmutex_t *nse_nsh_lock;
+ list_node_t nse_link;
+} ilb_nat_src_entry_t;
+
+/* Struct to hold all NAT source entry of a back end server. */
+typedef struct ilb_nat_src_s {
+ uint16_t cur;
+ uint16_t num_src;
+ ilb_nat_src_entry_t *src_list[ILB_MAX_NAT_SRC];
+} ilb_nat_src_t;
+
+extern int ilb_create_nat_src(ilb_stack_t *ilbs, ilb_nat_src_t **,
+ const in6_addr_t *, in_port_t, const in6_addr_t *, int);
+extern void ilb_destroy_nat_src(ilb_nat_src_t **);
+extern void ilb_nat_src_timer(void *);
+extern void ilb_nat_src_init(ilb_stack_t *);
+extern void ilb_nat_src_fini(ilb_stack_t *);
+
+extern ilb_nat_src_entry_t *ilb_alloc_nat_addr(ilb_nat_src_t *, in6_addr_t *,
+ in_port_t *, uint16_t *);
+
+extern void ilb_full_nat(int, void *, int, void *, ilb_nat_info_t *, uint32_t,
+ uint32_t, boolean_t);
+extern void ilb_half_nat(int, void *, int, void *, ilb_nat_info_t *, uint32_t,
+ uint32_t, boolean_t);
+
+extern void ilb_nat_icmpv4(mblk_t *, ipha_t *, icmph_t *, ipha_t *,
+ in_port_t *, in_port_t *, ilb_nat_info_t *, uint32_t, boolean_t);
+extern void ilb_nat_icmpv6(mblk_t *, ip6_t *, icmp6_t *, ip6_t *,
+ in_port_t *, in_port_t *, ilb_nat_info_t *, boolean_t);
+
+extern uint32_t ilb_pseudo_sum_v6(ip6_t *, uint8_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _INET_ILB_NAT_H */
diff --git a/usr/src/uts/common/inet/ilb/ilb_stack.h b/usr/src/uts/common/inet/ilb/ilb_stack.h
new file mode 100644
index 0000000000..6cb034c7f6
--- /dev/null
+++ b/usr/src/uts/common/inet/ilb/ilb_stack.h
@@ -0,0 +1,134 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _INET_ILB_STACK_H
+#define _INET_ILB_STACK_H
+
+#include <sys/netstack.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct ilb_rule_s;
+struct ilb_hash_s;
+struct ilb_timer_s;
+struct ilb_conn_s;
+struct ilb_conn_hash_s;
+struct ilb_sticky_s;
+struct ilb_sticky_hash_s;
+struct ilb_g_kstat_s;
+struct ilb_nat_src_hash_s;
+
+/* Per network stack ILB information */
+typedef struct ilb_stack {
+ netstack_t *ilbs_netstack;
+
+ /*
+ * Rule info in a network stack.
+ *
+ * ilbs_rule_head: list of all rules
+ * ilbs_g_lock: lock to protect the rule list
+ * ilbs_rule_hash_size: size of the rule hash table
+ * ilbs_g_hash: the rule hash table
+ * ilbs_rule_taskq: taskq for rule related delayed processing
+ */
+ struct ilb_rule_s *ilbs_rule_head;
+ kmutex_t ilbs_g_lock;
+ size_t ilbs_rule_hash_size;
+ struct ilb_hash_s *ilbs_g_hash;
+ taskq_t *ilbs_rule_taskq;
+
+ /*
+ * NAT connection cache info
+ *
+ * ilbs_conn_hash_szie: size of the conn cache hash table
+ * ilbs_c2s_conn_hash: client to server conn cache hash table
+ * ilbs_s2c_conn_hash: server to client conn cache hash table
+ * ilbs_conn_timer_list: list of all timers for handling conn cache
+ * ilbs_conn_taskq: taskq for conn cache related delayed processing
+ */
+ size_t ilbs_conn_hash_size;
+ struct ilb_conn_hash_s *ilbs_c2s_conn_hash;
+ struct ilb_conn_hash_s *ilbs_s2c_conn_hash;
+ struct ilb_timer_s *ilbs_conn_timer_list;
+ taskq_t *ilbs_conn_taskq;
+
+ /*
+ * Sticky (persistent) cache info
+ *
+ * ilbs_sticky_hash_szie: size of the sticky cache hash table
+ * ilbs_sticky_hash: sticky cache hash table
+ * ilbs_sticky_timer_list: list of all timers for handling sticky cache
+ * ilbs_sticky_taskq: taskq for sticky cache related delayed processing
+ */
+ size_t ilbs_sticky_hash_size;
+ struct ilb_sticky_hash_s *ilbs_sticky_hash;
+ struct ilb_timer_s *ilbs_sticky_timer_list;
+ taskq_t *ilbs_sticky_taskq;
+
+ /*
+ * Info of NAT source address for
+ *
+ * ilbs_nat_src: NAT source hash table
+ * ilbs_nat_src_hash_size: size of the NAT source hash table
+ * ilbs_nat_src_lock: lock for protecting ilbs_nat_src_tid
+ * ilbs_nat_src_tid: ID of the timer handling garbage colllection
+ */
+ struct ilb_nat_src_hash_s *ilbs_nat_src;
+ size_t ilbs_nat_src_hash_size;
+ kmutex_t ilbs_nat_src_lock;
+ timeout_id_t ilbs_nat_src_tid;
+
+ /* NAT conn cache and sticky cache listing related info */
+
+ /* Lock to ensure that all nat listing ops are serialized */
+ kmutex_t ilbs_conn_list_lock;
+ kcondvar_t ilbs_conn_list_cv;
+ boolean_t ilbs_conn_list_busy;
+ /* Current position for listing all conn hash entries */
+ size_t ilbs_conn_list_cur;
+ struct ilb_conn_s *ilbs_conn_list_connp;
+
+ /* Lock to ensure that all sticky listing ops are serialized */
+ kmutex_t ilbs_sticky_list_lock;
+ kcondvar_t ilbs_sticky_list_cv;
+ boolean_t ilbs_sticky_list_busy;
+ /* Current position for listing all sticky hash entries */
+ size_t ilbs_sticky_list_cur;
+ struct ilb_sticky_s *ilbs_sticky_list_curp;
+
+ /* Stack wide ILB kstat */
+ kstat_t *ilbs_ksp;
+ struct ilb_g_kstat_s *ilbs_kstat;
+} ilb_stack_t;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _INET_ILB_STACK_H */
diff --git a/usr/src/uts/common/inet/ilb_ip.h b/usr/src/uts/common/inet/ilb_ip.h
new file mode 100644
index 0000000000..16dddbb427
--- /dev/null
+++ b/usr/src/uts/common/inet/ilb_ip.h
@@ -0,0 +1,101 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+#ifndef _INET_ILB_IP_H
+#define _INET_ILB_IP_H
+
+#include <inet/ilb.h>
+#include <inet/ilb/ilb_stack.h>
+#include <inet/ilb/ilb_impl.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void ilb_ddi_g_init(void);
+extern void ilb_ddi_g_destroy(void);
+
+/* Return values of ilb_check_*() */
+#define ILB_DROPPED 1 /* Caller should drop the packet. */
+#define ILB_PASSED 2 /* No load balanced rule is matched. */
+#define ILB_BALANCED 3 /* A rule is matached. */
+
+extern boolean_t ilb_has_rules(ilb_stack_t *);
+
+extern int ilb_check_v4(ilb_stack_t *, ill_t *, mblk_t *, ipha_t *, int,
+ uint8_t *, ipaddr_t *);
+extern int ilb_check_v6(ilb_stack_t *, ill_t *, mblk_t *, ip6_t *, int,
+ uint8_t *, in6_addr_t *);
+extern boolean_t ilb_rule_match_vip_v4(ilb_stack_t *, ipaddr_t, ilb_rule_t **);
+extern boolean_t ilb_rule_match_vip_v6(ilb_stack_t *, in6_addr_t *,
+ ilb_rule_t **);
+
+extern int ip_sioctl_ilb_cmd(ipif_t *, sin_t *, queue_t *, mblk_t *,
+ ip_ioctl_cmd_t *, void *);
+
+extern int ilb_rule_add(ilb_stack_t *, zoneid_t, const ilb_rule_cmd_t *);
+extern int ilb_rule_del(ilb_stack_t *, zoneid_t, const char *);
+extern void ilb_rule_del_all(ilb_stack_t *, zoneid_t);
+extern int ilb_rule_enable(ilb_stack_t *, zoneid_t, const char *,
+ ilb_rule_t *);
+extern void ilb_rule_enable_all(ilb_stack_t *, zoneid_t);
+extern int ilb_rule_disable(ilb_stack_t *, zoneid_t, const char *,
+ ilb_rule_t *);
+extern void ilb_rule_disable_all(ilb_stack_t *, zoneid_t);
+extern int ilb_rule_list(ilb_stack_t *, zoneid_t, ilb_rule_cmd_t *);
+
+extern void ilb_get_num_rules(ilb_stack_t *, zoneid_t, uint32_t *);
+extern int ilb_get_num_servers(ilb_stack_t *, zoneid_t, const char *,
+ uint32_t *);
+extern ilb_rule_t *ilb_find_rule(ilb_stack_t *, zoneid_t, const char *, int *);
+extern void ilb_get_rulenames(ilb_stack_t *, zoneid_t, uint32_t *,
+ char *);
+extern int ilb_get_servers(ilb_stack_t *, zoneid_t, const char *,
+ ilb_server_info_t *, uint32_t *);
+
+extern int ilb_server_add(ilb_stack_t *, ilb_rule_t *, ilb_server_info_t *);
+extern int ilb_server_del(ilb_stack_t *, zoneid_t, const char *,
+ ilb_rule_t *, in6_addr_t *);
+extern int ilb_server_enable(ilb_stack_t *, zoneid_t, const char *,
+ ilb_rule_t *, in6_addr_t *);
+extern int ilb_server_disable(ilb_stack_t *, zoneid_t, const char *,
+ ilb_rule_t *, in6_addr_t *);
+
+extern int ilb_list_nat(ilb_stack_t *, zoneid_t, ilb_nat_entry_t *,
+ uint32_t *, uint32_t *);
+extern int ilb_list_sticky(ilb_stack_t *, zoneid_t, ilb_sticky_entry_t *,
+ uint32_t *, uint32_t *);
+
+/* Currently supported transport protocol. */
+#define ILB_SUPP_L4(proto) \
+ ((proto) == IPPROTO_TCP || (proto) == IPPROTO_UDP || \
+ (proto) == IPPROTO_ICMP || (proto) == IPPROTO_ICMPV6)
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _INET_ILB_IP_H */
diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c
index b72218e471..ebb89e3172 100644
--- a/usr/src/uts/common/inet/ip/ip.c
+++ b/usr/src/uts/common/inet/ip/ip.c
@@ -101,6 +101,7 @@
#include <inet/iptun/iptun_impl.h>
#include <inet/ipdrop.h>
#include <inet/ip_netinfo.h>
+#include <inet/ilb_ip.h>
#include <sys/ethernet.h>
#include <net/if_types.h>
@@ -1345,6 +1346,10 @@ ip_ioctl_cmd_t ip_ndx_ioctl_table[] = {
/* SIOCSENABLESDP is handled by SDP */
/* 183 */ { IPI_DONTCARE /* SIOCSENABLESDP */, 0, 0, 0, NULL, NULL },
/* 184 */ { IPI_DONTCARE /* SIOCSQPTR */, 0, 0, 0, NULL, NULL },
+ /* 185 */ { IPI_DONTCARE /* SIOCGIFHWADDR */, 0, 0, 0, NULL, NULL },
+ /* 186 */ { IPI_DONTCARE /* SIOCGSTAMP */, 0, 0, 0, NULL, NULL },
+ /* 187 */ { SIOCILB, 0, IPI_PRIV | IPI_GET_CMD, MISC_CMD,
+ ip_sioctl_ilb_cmd, NULL },
};
int ip_ndx_ioctl_count = sizeof (ip_ndx_ioctl_table) / sizeof (ip_ioctl_cmd_t);
@@ -5661,6 +5666,7 @@ ip_ddi_destroy(void)
udp_ddi_g_destroy();
sctp_ddi_g_destroy();
tcp_ddi_g_destroy();
+ ilb_ddi_g_destroy();
ipsec_policy_g_destroy();
ipcl_g_destroy();
ip_net_g_destroy();
@@ -5927,6 +5933,7 @@ ip_ddi_init(void)
udp_ddi_g_init();
rts_ddi_g_init();
icmp_ddi_g_init();
+ ilb_ddi_g_init();
}
/*
@@ -14829,6 +14836,8 @@ ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain,
mblk_t *mp;
mblk_t *dmp;
uint8_t tag;
+ ilb_stack_t *ilbs;
+ ipaddr_t lb_dst;
ASSERT(mp_chain != NULL);
ASSERT(ill != NULL);
@@ -14839,6 +14848,7 @@ ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain,
#define rptr ((uchar_t *)ipha)
+ ilbs = ipst->ips_netstack->netstack_ilb;
while (mp_chain != NULL) {
mp = mp_chain;
mp_chain = mp_chain->b_next;
@@ -15065,6 +15075,62 @@ ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain,
}
/*
+ * Here we check to see if we machine is setup as
+ * L3 loadbalancer and if the incoming packet is for a VIP
+ *
+ * Check the following:
+ * - there is at least a rule
+ * - protocol of the packet is supported
+ */
+ if (ilb_has_rules(ilbs) && ILB_SUPP_L4(ipha->ipha_protocol)) {
+ int lb_ret;
+
+ /* For convenience, we pull up the mblk. */
+ if (mp->b_cont != NULL) {
+ if (pullupmsg(mp, -1) == 0) {
+ BUMP_MIB(ill->ill_ip_mib,
+ ipIfStatsInDiscards);
+ freemsg(first_mp);
+ continue;
+ }
+ ipha = (ipha_t *)mp->b_rptr;
+ }
+
+ /*
+ * We just drop all fragments going to any VIP, at
+ * least for now....
+ */
+ if (ntohs(ipha->ipha_fragment_offset_and_flags) &
+ (IPH_MF | IPH_OFFSET)) {
+ if (!ilb_rule_match_vip_v4(ilbs,
+ ipha->ipha_dst, NULL)) {
+ goto after_ilb;
+ }
+
+ ILB_KSTAT_UPDATE(ilbs, ip_frag_in, 1);
+ ILB_KSTAT_UPDATE(ilbs, ip_frag_dropped, 1);
+ BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
+ freemsg(first_mp);
+ continue;
+ }
+ lb_ret = ilb_check_v4(ilbs, ill, mp, ipha,
+ ipha->ipha_protocol, (uint8_t *)ipha +
+ IPH_HDR_LENGTH(ipha), &lb_dst);
+
+ if (lb_ret == ILB_DROPPED) {
+ /* Is this the right counter to increase? */
+ BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
+ freemsg(first_mp);
+ continue;
+ } else if (lb_ret == ILB_BALANCED) {
+ /* Set the dst to that of the chosen server */
+ dst = lb_dst;
+ DB_CKSUMFLAGS(mp) = 0;
+ }
+ }
+
+after_ilb:
+ /*
* Reuse the cached ire only if the ipha_dst of the previous
* packet is the same as the current packet AND it is not
* INADDR_ANY.
@@ -15399,6 +15465,7 @@ ip_accept_tcp(ill_t *ill, ill_rx_ring_t *ip_ring, squeue_t *target_sqp,
mblk_t *uhead = NULL; /* Unaccepted tail */
uint_t ucnt = 0; /* Unaccepted cnt */
ip_stack_t *ipst = ill->ill_ipst;
+ ilb_stack_t *ilbs = ipst->ips_netstack->netstack_ilb;
*cnt = 0;
@@ -15407,6 +15474,12 @@ ip_accept_tcp(ill_t *ill, ill_rx_ring_t *ip_ring, squeue_t *target_sqp,
TRACE_1(TR_FAC_IP, TR_IP_RPUT_START, "ip_accept_tcp: q %p", q);
+ /* If ILB is enabled, don't do fast processing. */
+ if (ilb_has_rules(ilbs)) {
+ uhead = mp_chain;
+ goto all_reject;
+ }
+
#define rptr ((uchar_t *)ipha)
while (mp_chain != NULL) {
@@ -15574,6 +15647,7 @@ local_accept:
if (ire != NULL)
ire_refrele(ire);
+all_reject:
if (uhead != NULL)
ip_input(ill, ip_ring, uhead, NULL);
diff --git a/usr/src/uts/common/inet/ip/ip6.c b/usr/src/uts/common/inet/ip/ip6.c
index 6fca667f63..38fe7b2562 100644
--- a/usr/src/uts/common/inet/ip/ip6.c
+++ b/usr/src/uts/common/inet/ip/ip6.c
@@ -81,6 +81,7 @@
#include <inet/udp_impl.h>
#include <inet/sctp/sctp_impl.h>
#include <inet/ipp_common.h>
+#include <inet/ilb_ip.h>
#include <inet/ip_multi.h>
#include <inet/ip_if.h>
@@ -6922,6 +6923,9 @@ ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h,
boolean_t cksum_err;
mblk_t *mp1;
ip_stack_t *ipst = inill->ill_ipst;
+ ilb_stack_t *ilbs = ipst->ips_netstack->netstack_ilb;
+ in6_addr_t lb_dst;
+ int lb_ret = ILB_PASSED;
EXTRACT_PKT_MP(mp, first_mp, mctl_present);
@@ -7087,8 +7091,32 @@ drop_pkt: BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL,
MATCH_IRE_TYPE | MATCH_IRE_ILL, ipst);
} else {
- ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES,
- msg_getlabel(mp), ipst);
+ if (ilb_has_rules(ilbs) && ILB_SUPP_L4(nexthdr)) {
+ /* For convenience, we just pull up the mblk. */
+ if (mp->b_cont != NULL) {
+ if (pullupmsg(mp, -1) == 0) {
+ BUMP_MIB(ill->ill_ip_mib,
+ ipIfStatsInDiscards);
+ freemsg(hada_mp);
+ freemsg(first_mp);
+ return;
+ }
+ hdr_len = pkt_len - remlen;
+ ip6h = (ip6_t *)mp->b_rptr;
+ whereptr = (uint8_t *)ip6h + hdr_len;
+ }
+ lb_ret = ilb_check_v6(ilbs, ill, mp, ip6h, nexthdr,
+ whereptr, &lb_dst);
+ if (lb_ret == ILB_DROPPED) {
+ BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
+ freemsg(hada_mp);
+ freemsg(first_mp);
+ return;
+ }
+ }
+
+ ire = ire_cache_lookup_v6((lb_ret == ILB_BALANCED) ? &lb_dst :
+ &ip6h->ip6_dst, ALL_ZONES, msg_getlabel(mp), ipst);
if (ire != NULL && ire->ire_stq != NULL &&
ire->ire_zoneid != GLOBAL_ZONEID &&
@@ -7139,7 +7167,8 @@ drop_pkt: BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
}
mp->b_prev = (mblk_t *)(uintptr_t)
ill->ill_phyint->phyint_ifindex;
- ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src,
+ ip_newroute_v6(q, mp, (lb_ret == ILB_BALANCED) ? &lb_dst :
+ &ip6h->ip6_dst, &ip6h->ip6_src,
IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL,
GLOBAL_ZONEID, ipst);
return;
diff --git a/usr/src/uts/common/inet/ip/ip_if.c b/usr/src/uts/common/inet/ip/ip_if.c
index ce8f7e6371..b175f4530f 100644
--- a/usr/src/uts/common/inet/ip/ip_if.c
+++ b/usr/src/uts/common/inet/ip/ip_if.c
@@ -86,6 +86,7 @@
#include <inet/ip_impl.h>
#include <inet/sctp_ip.h>
#include <inet/ip_netinfo.h>
+#include <inet/ilb_ip.h>
#include <net/pfkeyv2.h>
#include <inet/ipsec_info.h>
@@ -10192,6 +10193,15 @@ ip_sioctl_copyin_setup(queue_t *q, mblk_t *mp)
case IP_IOCTL:
ip_wput_ioctl(q, mp);
return;
+
+ case SIOCILB:
+ /* The ioctl length varies depending on the ILB command. */
+ copyin_size = iocp->ioc_count;
+ if (copyin_size < sizeof (ilb_cmd_t))
+ goto nak;
+ mi_copyin(q, mp, NULL, copyin_size);
+ return;
+
default:
cmn_err(CE_PANIC, "should not happen ");
}
@@ -20341,3 +20351,262 @@ ipif_up_notify(ipif_t *ipif)
ill_nic_event_dispatch(ipif->ipif_ill, MAP_IPIF_ID(ipif->ipif_id),
NE_LIF_UP, NULL, 0);
}
+
+/*
+ * ILB ioctl uses cv_wait (such as deleting a rule or adding a server) and
+ * this assumes the context is cv_wait'able. Hence it shouldnt' be used on
+ * TPI end points with STREAMS modules pushed above. This is assured by not
+ * having the IPI_MODOK flag for the ioctl. And IP ensures the ILB ioctl
+ * never ends up on an ipsq, otherwise we may end up processing the ioctl
+ * while unwinding from the ispq and that could be a thread from the bottom.
+ */
+/* ARGSUSED */
+int
+ip_sioctl_ilb_cmd(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp,
+ ip_ioctl_cmd_t *ipip, void *arg)
+{
+ mblk_t *cmd_mp = mp->b_cont->b_cont;
+ ilb_cmd_t command = *((ilb_cmd_t *)cmd_mp->b_rptr);
+ int ret = 0;
+ int i;
+ size_t size;
+ ip_stack_t *ipst;
+ zoneid_t zoneid;
+ ilb_stack_t *ilbs;
+
+ ipst = CONNQ_TO_IPST(q);
+ ilbs = ipst->ips_netstack->netstack_ilb;
+ zoneid = Q_TO_CONN(q)->conn_zoneid;
+
+ switch (command) {
+ case ILB_CREATE_RULE: {
+ ilb_rule_cmd_t *cmd = (ilb_rule_cmd_t *)cmd_mp->b_rptr;
+
+ if (MBLKL(cmd_mp) != sizeof (ilb_rule_cmd_t)) {
+ ret = EINVAL;
+ break;
+ }
+
+ ret = ilb_rule_add(ilbs, zoneid, cmd);
+ break;
+ }
+ case ILB_DESTROY_RULE:
+ case ILB_ENABLE_RULE:
+ case ILB_DISABLE_RULE: {
+ ilb_name_cmd_t *cmd = (ilb_name_cmd_t *)cmd_mp->b_rptr;
+
+ if (MBLKL(cmd_mp) != sizeof (ilb_name_cmd_t)) {
+ ret = EINVAL;
+ break;
+ }
+
+ if (cmd->flags & ILB_RULE_ALLRULES) {
+ if (command == ILB_DESTROY_RULE) {
+ ilb_rule_del_all(ilbs, zoneid);
+ break;
+ } else if (command == ILB_ENABLE_RULE) {
+ ilb_rule_enable_all(ilbs, zoneid);
+ break;
+ } else if (command == ILB_DISABLE_RULE) {
+ ilb_rule_disable_all(ilbs, zoneid);
+ break;
+ }
+ } else {
+ if (command == ILB_DESTROY_RULE) {
+ ret = ilb_rule_del(ilbs, zoneid, cmd->name);
+ } else if (command == ILB_ENABLE_RULE) {
+ ret = ilb_rule_enable(ilbs, zoneid, cmd->name,
+ NULL);
+ } else if (command == ILB_DISABLE_RULE) {
+ ret = ilb_rule_disable(ilbs, zoneid, cmd->name,
+ NULL);
+ }
+ }
+ break;
+ }
+ case ILB_NUM_RULES: {
+ ilb_num_rules_cmd_t *cmd;
+
+ if (MBLKL(cmd_mp) != sizeof (ilb_num_rules_cmd_t)) {
+ ret = EINVAL;
+ break;
+ }
+ cmd = (ilb_num_rules_cmd_t *)cmd_mp->b_rptr;
+ ilb_get_num_rules(ilbs, zoneid, &(cmd->num));
+ break;
+ }
+ case ILB_RULE_NAMES: {
+ ilb_rule_names_cmd_t *cmd;
+
+ cmd = (ilb_rule_names_cmd_t *)cmd_mp->b_rptr;
+ if (MBLKL(cmd_mp) < sizeof (ilb_rule_names_cmd_t) ||
+ cmd->num_names == 0) {
+ ret = EINVAL;
+ break;
+ }
+ size = cmd->num_names * ILB_RULE_NAMESZ;
+ if (cmd_mp->b_rptr + offsetof(ilb_rule_names_cmd_t, buf) +
+ size != cmd_mp->b_wptr) {
+ ret = EINVAL;
+ break;
+ }
+ ilb_get_rulenames(ilbs, zoneid, &cmd->num_names, cmd->buf);
+ break;
+ }
+ case ILB_NUM_SERVERS: {
+ ilb_num_servers_cmd_t *cmd;
+
+ if (MBLKL(cmd_mp) != sizeof (ilb_num_servers_cmd_t)) {
+ ret = EINVAL;
+ break;
+ }
+ cmd = (ilb_num_servers_cmd_t *)cmd_mp->b_rptr;
+ ret = ilb_get_num_servers(ilbs, zoneid, cmd->name,
+ &(cmd->num));
+ break;
+ }
+ case ILB_LIST_RULE: {
+ ilb_rule_cmd_t *cmd = (ilb_rule_cmd_t *)cmd_mp->b_rptr;
+
+ if (MBLKL(cmd_mp) != sizeof (ilb_rule_cmd_t)) {
+ ret = EINVAL;
+ break;
+ }
+ ret = ilb_rule_list(ilbs, zoneid, cmd);
+ break;
+ }
+ case ILB_LIST_SERVERS: {
+ ilb_servers_info_cmd_t *cmd;
+
+ cmd = (ilb_servers_info_cmd_t *)cmd_mp->b_rptr;
+ if (MBLKL(cmd_mp) < sizeof (ilb_servers_info_cmd_t) ||
+ cmd->num_servers == 0) {
+ ret = EINVAL;
+ break;
+ }
+ size = cmd->num_servers * sizeof (ilb_server_info_t);
+ if (cmd_mp->b_rptr + offsetof(ilb_servers_info_cmd_t, servers) +
+ size != cmd_mp->b_wptr) {
+ ret = EINVAL;
+ break;
+ }
+
+ ret = ilb_get_servers(ilbs, zoneid, cmd->name, cmd->servers,
+ &cmd->num_servers);
+ break;
+ }
+ case ILB_ADD_SERVERS: {
+ ilb_servers_info_cmd_t *cmd;
+ ilb_rule_t *rule;
+
+ cmd = (ilb_servers_info_cmd_t *)cmd_mp->b_rptr;
+ if (MBLKL(cmd_mp) < sizeof (ilb_servers_info_cmd_t)) {
+ ret = EINVAL;
+ break;
+ }
+ size = cmd->num_servers * sizeof (ilb_server_info_t);
+ if (cmd_mp->b_rptr + offsetof(ilb_servers_info_cmd_t, servers) +
+ size != cmd_mp->b_wptr) {
+ ret = EINVAL;
+ break;
+ }
+ rule = ilb_find_rule(ilbs, zoneid, cmd->name, &ret);
+ if (rule == NULL) {
+ ASSERT(ret != 0);
+ break;
+ }
+ for (i = 0; i < cmd->num_servers; i++) {
+ ilb_server_info_t *s;
+
+ s = &cmd->servers[i];
+ s->err = ilb_server_add(ilbs, rule, s);
+ }
+ ILB_RULE_REFRELE(rule);
+ break;
+ }
+ case ILB_DEL_SERVERS:
+ case ILB_ENABLE_SERVERS:
+ case ILB_DISABLE_SERVERS: {
+ ilb_servers_cmd_t *cmd;
+ ilb_rule_t *rule;
+ int (*f)();
+
+ cmd = (ilb_servers_cmd_t *)cmd_mp->b_rptr;
+ if (MBLKL(cmd_mp) < sizeof (ilb_servers_cmd_t)) {
+ ret = EINVAL;
+ break;
+ }
+ size = cmd->num_servers * sizeof (ilb_server_arg_t);
+ if (cmd_mp->b_rptr + offsetof(ilb_servers_cmd_t, servers) +
+ size != cmd_mp->b_wptr) {
+ ret = EINVAL;
+ break;
+ }
+
+ if (command == ILB_DEL_SERVERS)
+ f = ilb_server_del;
+ else if (command == ILB_ENABLE_SERVERS)
+ f = ilb_server_enable;
+ else if (command == ILB_DISABLE_SERVERS)
+ f = ilb_server_disable;
+
+ rule = ilb_find_rule(ilbs, zoneid, cmd->name, &ret);
+ if (rule == NULL) {
+ ASSERT(ret != 0);
+ break;
+ }
+
+ for (i = 0; i < cmd->num_servers; i++) {
+ ilb_server_arg_t *s;
+
+ s = &cmd->servers[i];
+ s->err = f(ilbs, zoneid, NULL, rule, &s->addr);
+ }
+ ILB_RULE_REFRELE(rule);
+ break;
+ }
+ case ILB_LIST_NAT_TABLE: {
+ ilb_list_nat_cmd_t *cmd;
+
+ cmd = (ilb_list_nat_cmd_t *)cmd_mp->b_rptr;
+ if (MBLKL(cmd_mp) < sizeof (ilb_list_nat_cmd_t)) {
+ ret = EINVAL;
+ break;
+ }
+ size = cmd->num_nat * sizeof (ilb_nat_entry_t);
+ if (cmd_mp->b_rptr + offsetof(ilb_list_nat_cmd_t, entries) +
+ size != cmd_mp->b_wptr) {
+ ret = EINVAL;
+ break;
+ }
+
+ ret = ilb_list_nat(ilbs, zoneid, cmd->entries, &cmd->num_nat,
+ &cmd->flags);
+ break;
+ }
+ case ILB_LIST_STICKY_TABLE: {
+ ilb_list_sticky_cmd_t *cmd;
+
+ cmd = (ilb_list_sticky_cmd_t *)cmd_mp->b_rptr;
+ if (MBLKL(cmd_mp) < sizeof (ilb_list_sticky_cmd_t)) {
+ ret = EINVAL;
+ break;
+ }
+ size = cmd->num_sticky * sizeof (ilb_sticky_entry_t);
+ if (cmd_mp->b_rptr + offsetof(ilb_list_sticky_cmd_t, entries) +
+ size != cmd_mp->b_wptr) {
+ ret = EINVAL;
+ break;
+ }
+
+ ret = ilb_list_sticky(ilbs, zoneid, cmd->entries,
+ &cmd->num_sticky, &cmd->flags);
+ break;
+ }
+ default:
+ ret = EINVAL;
+ break;
+ }
+done:
+ return (ret);
+}
diff --git a/usr/src/uts/common/inet/ipnet/ipnet.c b/usr/src/uts/common/inet/ipnet/ipnet.c
index f3a3c73374..d9b7cf7768 100644
--- a/usr/src/uts/common/inet/ipnet/ipnet.c
+++ b/usr/src/uts/common/inet/ipnet/ipnet.c
@@ -1951,16 +1951,17 @@ ipobs_bounce_func(hook_event_token_t token, hook_data_t info, void *arg)
mblk_t *mp;
hdr = (hook_pkt_observe_t *)info;
- mp = dupmsg(hdr->hpo_pkt);
- if (mp == NULL) {
- mp = copymsg(hdr->hpo_pkt);
- if (mp == NULL) {
- netstack_t *ns = hdr->hpo_ctx;
- ipnet_stack_t *ips = ns->netstack_ipnet;
-
- IPSK_BUMP(ips, ik_dispatchDupDrop);
- return (0);
- }
+ /*
+ * Code in ip_input() expects that it is the only one accessing the
+ * packet.
+ */
+ mp = copymsg(hdr->hpo_pkt);
+ if (mp == NULL) {
+ netstack_t *ns = hdr->hpo_ctx;
+ ipnet_stack_t *ips = ns->netstack_ipnet;
+
+ IPSK_BUMP(ips, ik_dispatchDupDrop);
+ return (0);
}
hdr = (hook_pkt_observe_t *)mp->b_rptr;
diff --git a/usr/src/uts/common/sys/netstack.h b/usr/src/uts/common/sys/netstack.h
index 8b13b66599..2c77e1be96 100644
--- a/usr/src/uts/common/sys/netstack.h
+++ b/usr/src/uts/common/sys/netstack.h
@@ -80,7 +80,8 @@ typedef id_t netstackid_t;
#define NS_IPSECAH 15
#define NS_IPSECESP 16
#define NS_IPNET 17
-#define NS_MAX (NS_IPNET+1)
+#define NS_ILB 18
+#define NS_MAX (NS_ILB+1)
/*
* State maintained for each module which tracks the state of
@@ -155,6 +156,7 @@ struct netstack {
struct ipsecah_stack *nu_ipsecah;
struct ipsecesp_stack *nu_ipsecesp;
struct ipnet_stack *nu_ipnet;
+ struct ilb_stack *nu_ilb;
} nu_s;
} netstack_u;
#define netstack_modules netstack_u.nu_modules
@@ -176,6 +178,7 @@ struct netstack {
#define netstack_ipsecah netstack_u.nu_s.nu_ipsecah
#define netstack_ipsecesp netstack_u.nu_s.nu_ipsecesp
#define netstack_ipnet netstack_u.nu_s.nu_ipnet
+#define netstack_ilb netstack_u.nu_s.nu_ilb
nm_state_t netstack_m_state[NS_MAX]; /* module state */
diff --git a/usr/src/uts/common/sys/sockio.h b/usr/src/uts/common/sys/sockio.h
index 06b63d2969..0fca7c5c43 100644
--- a/usr/src/uts/common/sys/sockio.h
+++ b/usr/src/uts/common/sys/sockio.h
@@ -313,6 +313,11 @@ extern "C" {
#define SIOCGIFHWADDR _IOWR('i', 185, int) /* PF_PACKET */
#define SIOCGSTAMP _IOWR('i', 186, struct timeval) /* PF_PACKET */
+/*
+ * Private ioctl for Integrated Load Balancer. The ioctl length varies.
+ */
+#define SIOCILB _IOWR('i', 187, 0)
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/intel/ip/ip.global-objs.debug64 b/usr/src/uts/intel/ip/ip.global-objs.debug64
index a45c7e5ae9..6009f5b006 100644
--- a/usr/src/uts/intel/ip/ip.global-objs.debug64
+++ b/usr/src/uts/intel/ip/ip.global-objs.debug64
@@ -77,6 +77,22 @@ icmpinfov6
icmprinitv4
icmprinitv6
icmpwinit
+ilb_conn_cache
+ilb_conn_cache_timeout
+ilb_conn_hash_size
+ilb_conn_tcp_expiry
+ilb_conn_timer_size
+ilb_conn_udp_expiry
+ilb_kstat_instance
+ilb_kmem_flags
+ilb_nat_src_hash_size
+ilb_nat_src_instance
+ilb_rule_hash_size
+ilb_sticky_cache
+ilb_sticky_hash_size
+ilb_sticky_expiry
+ilb_sticky_timer_size
+ilb_sticky_timeout
ill_no_arena
ill_null
inet_dev_info
diff --git a/usr/src/uts/intel/ip/ip.global-objs.obj64 b/usr/src/uts/intel/ip/ip.global-objs.obj64
index b3fb7df1b5..1706a82aa7 100644
--- a/usr/src/uts/intel/ip/ip.global-objs.obj64
+++ b/usr/src/uts/intel/ip/ip.global-objs.obj64
@@ -77,6 +77,22 @@ icmpinfov6
icmprinitv4
icmprinitv6
icmpwinit
+ilb_conn_cache
+ilb_conn_cache_timeout
+ilb_conn_hash_size
+ilb_conn_tcp_expiry
+ilb_conn_timer_size
+ilb_conn_udp_expiry
+ilb_kstat_instance
+ilb_kmem_flags
+ilb_nat_src_hash_size
+ilb_nat_src_instance
+ilb_rule_hash_size
+ilb_sticky_cache
+ilb_sticky_hash_size
+ilb_sticky_expiry
+ilb_sticky_timer_size
+ilb_sticky_timeout
ill_no_arena
ill_null
inet_dev_info
diff --git a/usr/src/uts/sparc/ip/ip.global-objs.debug64 b/usr/src/uts/sparc/ip/ip.global-objs.debug64
index 419a412037..8df87d813d 100644
--- a/usr/src/uts/sparc/ip/ip.global-objs.debug64
+++ b/usr/src/uts/sparc/ip/ip.global-objs.debug64
@@ -77,6 +77,22 @@ icmpinfov6
icmprinitv4
icmprinitv6
icmpwinit
+ilb_conn_cache
+ilb_conn_cache_timeout
+ilb_conn_hash_size
+ilb_conn_tcp_expiry
+ilb_conn_timer_size
+ilb_conn_udp_expiry
+ilb_kstat_instance
+ilb_kmem_flags
+ilb_nat_src_hash_size
+ilb_nat_src_instance
+ilb_rule_hash_size
+ilb_sticky_cache
+ilb_sticky_hash_size
+ilb_sticky_expiry
+ilb_sticky_timer_size
+ilb_sticky_timeout
ill_no_arena
ill_null
inet_dev_info
diff --git a/usr/src/uts/sparc/ip/ip.global-objs.obj64 b/usr/src/uts/sparc/ip/ip.global-objs.obj64
index db3c18e64e..3df973b8f9 100644
--- a/usr/src/uts/sparc/ip/ip.global-objs.obj64
+++ b/usr/src/uts/sparc/ip/ip.global-objs.obj64
@@ -77,6 +77,22 @@ icmpinfov6
icmprinitv4
icmprinitv6
icmpwinit
+ilb_conn_cache
+ilb_conn_cache_timeout
+ilb_conn_hash_size
+ilb_conn_tcp_expiry
+ilb_conn_timer_size
+ilb_conn_udp_expiry
+ilb_kstat_instance
+ilb_kmem_flags
+ilb_nat_src_hash_size
+ilb_nat_src_instance
+ilb_rule_hash_size
+ilb_sticky_cache
+ilb_sticky_hash_size
+ilb_sticky_expiry
+ilb_sticky_timer_size
+ilb_sticky_timeout
ill_no_arena
ill_null
inet_dev_info