diff options
author | Sangeeta Misra <Sangeeta.Misra@Sun.COM> | 2009-11-03 23:15:19 -0800 |
---|---|---|
committer | Sangeeta Misra <Sangeeta.Misra@Sun.COM> | 2009-11-03 23:15:19 -0800 |
commit | dbed73cbda2229fd1aa6dc5743993cae7f0a7ee9 (patch) | |
tree | 74a198c6a7ce750aaff09b4d682a53564ca13b58 /usr/src | |
parent | dfe73b3d6f9191b75fe71a92c8854f83c6d16a63 (diff) | |
download | illumos-gate-dbed73cbda2229fd1aa6dc5743993cae7f0a7ee9.tar.gz |
PSARC 2008/575 ILB: Integrated L3/L4 Load balancer
6882718 in-kernel simple L3/L4 load balancing service should be provided in Solaris
6884202 ipobs_hook() in ip_input() invalidates DB_REF assumption
Diffstat (limited to 'usr/src')
103 files changed, 26994 insertions, 57 deletions
diff --git a/usr/src/Makefile.lint b/usr/src/Makefile.lint index 715ab9cf9a..f9148fbbd7 100644 --- a/usr/src/Makefile.lint +++ b/usr/src/Makefile.lint @@ -72,6 +72,7 @@ COMMON_SUBDIRS = \ cmd/cmd-inet/usr.bin \ cmd/cmd-inet/usr.lib/bridged \ cmd/cmd-inet/usr.lib/dsvclockd \ + cmd/cmd-inet/usr.lib/ilbd \ cmd/cmd-inet/usr.lib/in.dhcpd \ cmd/cmd-inet/usr.lib/in.mpathd \ cmd/cmd-inet/usr.lib/in.ndpd \ @@ -82,6 +83,7 @@ COMMON_SUBDIRS = \ cmd/cmd-inet/usr.lib/wanboot \ cmd/cmd-inet/usr.sadm \ cmd/cmd-inet/usr.sbin \ + cmd/cmd-inet/usr.sbin/ilbadm \ cmd/col \ cmd/compress \ cmd/consadm \ @@ -365,6 +367,7 @@ COMMON_SUBDIRS = \ lib/libgss \ lib/libhotplug \ lib/libidmap \ + lib/libilb \ lib/libinetcfg \ lib/libinetsvc \ lib/libinetutil \ diff --git a/usr/src/Targetdirs b/usr/src/Targetdirs index b20207ca5a..4ed1e17291 100644 --- a/usr/src/Targetdirs +++ b/usr/src/Targetdirs @@ -234,6 +234,7 @@ DIRS= \ /usr/lib/inet/dhcp/nsu \ /usr/lib/inet/dhcp/svc \ /usr/lib/inet/dhcp/svcadm \ + /usr/lib/inet/ilb \ /usr/lib/inet/$(MACH32) \ $(XDIRS) \ /usr/lib/krb5 \ diff --git a/usr/src/cmd/Makefile.check b/usr/src/cmd/Makefile.check index 58c49840c8..a046de9f44 100644 --- a/usr/src/cmd/Makefile.check +++ b/usr/src/cmd/Makefile.check @@ -82,6 +82,7 @@ MANIFEST_SUBDIRS= \ agents/snmp/snmprelayd \ boot/scripts \ cmd-crypto/scripts \ + cmd-inet/usr.lib/ilbd \ cmd-inet/usr.lib/in.chargend \ cmd-inet/usr.lib/in.daytimed \ cmd-inet/usr.lib/in.dhcpd \ diff --git a/usr/src/cmd/cmd-inet/usr.lib/Makefile b/usr/src/cmd/cmd-inet/usr.lib/Makefile index 8c1e5198ee..d8216e049b 100644 --- a/usr/src/cmd/cmd-inet/usr.lib/Makefile +++ b/usr/src/cmd/cmd-inet/usr.lib/Makefile @@ -23,12 +23,12 @@ # Use is subject to license terms. # -SUBDIRS= bridged dhcp dsvclockd in.chargend in.daytimed \ +SUBDIRS= bridged dhcp dsvclockd ilbd in.chargend in.daytimed \ in.discardd in.echod in.dhcpd in.mpathd in.ndpd \ in.ripngd in.timed inetd mdnsd ncaconfd pppoe \ slpd wanboot wpad -MSGSUBDIRS= dsvclockd in.dhcpd inetd ncaconfd wanboot +MSGSUBDIRS= dsvclockd ilbd in.dhcpd inetd ncaconfd wanboot include ../../Makefile.cmd diff --git a/usr/src/cmd/cmd-inet/usr.lib/ilbd/Makefile b/usr/src/cmd/cmd-inet/usr.lib/ilbd/Makefile new file mode 100644 index 0000000000..91cd41202c --- /dev/null +++ b/usr/src/cmd/cmd-inet/usr.lib/ilbd/Makefile @@ -0,0 +1,112 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +ILBD= ilbd +PROG= $(ILBD) +MANIFEST= ilbd.xml + +ILBD_OBJS= ilbd_main.o ilbd_support.o ilbd_sg.o ilbd_rules.o ilbd_hc.o \ + ilbd_nat.o ilbd_scf.o +ILBD_SRCS= $(ILBD_OBJS:%.o=%.c) +HDRS= ilbd.h + +LIST_OBJS= list.o +LIST_SRCS= $(LIST_OBJS:%.o=../../../uts/common/os/%.c) + +LIB_INC= $(SRC)/lib/libilb/common + +OBJS= $(ILBD_OBJS) $(LIST_OBJS) +SRCS= $(ILBD_SRCS) $(LIST_SRCS) + +ILBSUBDIRS= ilb + +include ../../../Makefile.cmd +include ../../Makefile.cmd-inet + +ROOTMANIFESTDIR= $(ROOTSVCNETWORK)/loadbalancer + +CHECKHDRS= $(HDRS:%.h=%.check) + +CPPFLAGS += -D_FILE_OFFSET_BITS=64 -I$(CMDINETCOMMONDIR) -D_REENTRANT +CPPFLAGS += -I$(LIB_INC) +CPPFLAGS += -I$(SRC)/uts/common + +C99MODE = $(C99_ENABLE) + +# I18n +POFILE = $(ILBD).po +POFILES = $(ILBD_SRCS:%.c=%.po) + +all:= TARGET= all +install:= TARGET= install +clean:= TARGET= clean +clobber:= TARGET= clobber +lint:= TARGET= lint + +# used for debugging ONLY: + +CFLAGS = -g -K pic +STRIP_STABS= : +CTFCVTFLAGS += -g + + +LDLIBS += -lsocket -lsecdb -lnsl -lilb -lscf -linetutil -lbsm + +.KEEP_STATE: + +all: $(PROG) + +$(ILBD): $(OBJS) + $(LINK.c) $(OBJS) -o $@ $(LDLIBS) $(CTFMERGE_HOOK) + $(POST_PROCESS) + +include ../Makefile.lib + +install: all $(ROOTLIBINETPROG) $(ETCILBDFILES) $(ROOTMANIFEST) + +check: $(CHKMANIFEST) $(CHECKHDRS) + +clean: + $(RM) $(OBJS) + +lint: + $(LINT.c) $(ILBD_SRCS) $(LDLIBS) + +$(POFILE): $(POFILES) + $(RM) $@ + $(CAT) $(POFILES) > $@ + +all install clean clobber lint: $(ILBSUBDIRS) + +include ../../../Makefile.targ + +# the below is needed to get list.o built +%.o: ../../../../uts/common/os/%.c + $(COMPILE.c) -o $@ $< + $(POST_PROCESS_O) + +$(ILBSUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: diff --git a/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilb/Makefile b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilb/Makefile new file mode 100644 index 0000000000..e70830e8e8 --- /dev/null +++ b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilb/Makefile @@ -0,0 +1,55 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +PROG = ilb_probe +OBJS = $(PROG:%=%.o) +SRCS = $(OBJS:%.o=%.c) + +include ../../../../Makefile.cmd + +LDLIBS += -lsocket -lnsl + +.KEEP_STATE: + +all: $(PROG) + +include ../../Makefile.lib + +ROOTLIBINETILB = $(ROOTLIBINET)/ilb +ROOTLIBINETILBPROG = $(PROG:%=$(ROOTLIBINETILB)/%) + +$(ROOTLIBINETILB): $(ROOTLIBINET) + $(INS.dir) + +$(ROOTLIBINETILB)/%: % $(ROOTLIBINETILB) + $(INS.file) + +install: all $(ROOTLIBINETILBPROG) + +clean: + $(RM) $(OBJS) + +lint: lint_SRCS + +include ../../../../Makefile.targ diff --git a/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilb/ilb_probe.c b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilb/ilb_probe.c new file mode 100644 index 0000000000..0704e76cde --- /dev/null +++ b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilb/ilb_probe.c @@ -0,0 +1,875 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/time.h> + +#include <netinet/in_systm.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <arpa/inet.h> +#include <netinet/tcp.h> +#include <netinet/ip_icmp.h> +#include <netinet/icmp6.h> +#include <netinet/udp.h> +#include <netdb.h> +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <errno.h> +#include <limits.h> +#include <signal.h> +#include <libgen.h> +#include <fcntl.h> + +/* + * The following values are what ilbd will set argv[0] to. This determines + * what type of probe to send out. + */ +#define PROBE_PING "ilb_ping" +#define PROBE_PROTO "ilb_probe" + +/* The transport protocol to use in the probe. Value of argv[3]. */ +#define PROTO_TCP "TCP" +#define PROTO_UDP "UDP" + +enum probe_type { ping_probe, tcp_probe, udp_probe }; + +/* Load balance mode. Value of argv[4]. */ +#define MODE_DSR "DSR" +#define MODE_NAT "NAT" +#define MODE_HALF_NAT "HALF_NAT" + +enum lb_mode { dsr, nat, half_nat }; + +/* Number of arguments to the command from ilbd. */ +#define PROG_ARGC 7 + +/* Size of buffer used to receive ICMP packet */ +#define RECV_PKT_SZ 256 + +/* + * Struct to store the probe info (most is passed in using the argv[] array to + * the command given by ilbd). The argv[] contains the following. + * + * argv[0] is either PROBE_PING or PROBE_PROTO + * argv[1] is the VIP + * argv[2] is the backend server address + * argv[3] is the transport protocol used in the rule + * argv[4] is the load balance mode, "DSR", "NAT", "HALF-NAT" + * argv[5] is the probe port + * argv[6] is the probe timeout + * + * The following three fields are used in sending ICMP ECHO probe. + * + * echo_id is the ID set in the probe + * echo_seq is the sequence set in the probe + * echo_cookie is the random number data in a probe + * lport is the local port (in network byte order) used to send the probe + */ +typedef struct { + enum probe_type probe; + struct in6_addr vip; /* argv[1] */ + struct in6_addr srv_addr; /* argv[2] */ + int proto; /* argv[3] */ + enum lb_mode mode; /* argv[4] */ + in_port_t port; /* argv[5] */ + uint32_t timeout; /* argv[6] */ + + uint16_t echo_id; + uint16_t echo_seq; + uint32_t echo_cookie; + in_port_t lport; +} probe_param_t; + +/* Global variable to indicate whether a timeout means success. */ +static boolean_t timeout_is_good; + +/* SIGALRM handler */ +/* ARGSUSED */ +static void +probe_exit(int s) +{ + if (timeout_is_good) { + (void) printf("0"); + exit(0); + } else { + (void) printf("-1"); + exit(255); + } +} + +/* + * Checksum routine for Internet Protocol family headers (C Version) + * (copied from ping.c) + */ +static ushort_t +in_cksum(ushort_t *addr, int len) +{ + int nleft = len; + ushort_t *w = addr; + ushort_t answer; + ushort_t odd_byte = 0; + int sum = 0; + + /* + * Our algorithm is simple, using a 32 bit accumulator (sum), + * we add sequential 16 bit words to it, and at the end, fold + * back all the carry bits from the top 16 bits into the lower + * 16 bits. + */ + while (nleft > 1) { + sum += *w++; + nleft -= 2; + } + + /* mop up an odd byte, if necessary */ + if (nleft == 1) { + *(uchar_t *)(&odd_byte) = *(uchar_t *)w; + sum += odd_byte; + } + + /* + * add back carry outs from top 16 bits to low 16 bits + */ + sum = (sum >> 16) + (sum & 0xffff); /* add hi 16 to low 16 */ + sum += (sum >> 16); /* add carry */ + answer = ~sum; /* truncate to 16 bits */ + return (answer); +} + +/* It is assumed that argv[] contains PROBE_ARGC arguments. */ +static boolean_t +parse_probe_param(char *argv[], probe_param_t *param) +{ + int32_t port; + int64_t timeout; + struct in_addr v4addr; + + if (strcmp(basename(argv[0]), PROBE_PING) == 0) { + param->probe = ping_probe; + } else { + if (strcmp(basename(argv[0]), PROBE_PROTO) != 0) + return (B_FALSE); + + if (strcasecmp(argv[3], PROTO_TCP) == 0) { + param->probe = tcp_probe; + param->proto = IPPROTO_TCP; + } else if (strcasecmp(argv[3], PROTO_UDP) == 0) { + param->probe = udp_probe; + param->proto = IPPROTO_UDP; + } else { + return (B_FALSE); + } + } + + if (strchr(argv[1], ':') != NULL) { + if (inet_pton(AF_INET6, argv[1], ¶m->vip) == 0) + return (B_FALSE); + } else if (strchr(argv[1], '.') != NULL) { + if (inet_pton(AF_INET, argv[1], &v4addr) == 0) + return (B_FALSE); + IN6_INADDR_TO_V4MAPPED(&v4addr, ¶m->vip); + } else { + return (B_FALSE); + } + + /* + * The address family of vip and srv_addr should be the same for + * now. But in future, we may allow them to be different... So + * we don't do a check here. + */ + if (strchr(argv[2], ':') != NULL) { + if (inet_pton(AF_INET6, argv[2], ¶m->srv_addr) == 0) + return (B_FALSE); + } else if (strchr(argv[2], '.') != NULL) { + if (inet_pton(AF_INET, argv[2], &v4addr) == 0) + return (B_FALSE); + IN6_INADDR_TO_V4MAPPED(&v4addr, ¶m->srv_addr); + } else { + return (B_FALSE); + } + + if (strcasecmp(argv[4], MODE_DSR) == 0) + param->mode = dsr; + else if (strcasecmp(argv[4], MODE_NAT) == 0) + param->mode = nat; + else if (strcasecmp(argv[4], MODE_HALF_NAT) == 0) + param->mode = half_nat; + else + return (B_FALSE); + + if ((port = atoi(argv[5])) <= 0 || port > USHRT_MAX) + return (B_FALSE); + param->port = port; + + if ((timeout = strtoll(argv[6], NULL, 10)) <= 0 || timeout > UINT_MAX) + return (B_FALSE); + param->timeout = timeout; + + return (B_TRUE); +} + +/* + * Set up the destination address to be used to send a probe based on + * param. + */ +static int +set_sockaddr(struct sockaddr_storage *addr, socklen_t *addr_len, + void **next_hop, probe_param_t *param) +{ + int af; + struct in6_addr *param_addr; + struct sockaddr_in *v4_addr; + struct sockaddr_in6 *v6_addr; + boolean_t nh = B_FALSE; + + switch (param->mode) { + case dsr: + param_addr = ¶m->vip; + nh = B_TRUE; + break; + case nat: + case half_nat: + param_addr = ¶m->srv_addr; + break; + } + if (IN6_IS_ADDR_V4MAPPED(param_addr)) { + af = AF_INET; + v4_addr = (struct sockaddr_in *)addr; + IN6_V4MAPPED_TO_INADDR(param_addr, &v4_addr->sin_addr); + v4_addr->sin_family = AF_INET; + v4_addr->sin_port = htons(param->port); + + *addr_len = sizeof (*v4_addr); + } else { + af = AF_INET6; + v6_addr = (struct sockaddr_in6 *)addr; + v6_addr->sin6_family = AF_INET6; + v6_addr->sin6_addr = *param_addr; + v6_addr->sin6_port = htons(param->port); + v6_addr->sin6_flowinfo = 0; + v6_addr->sin6_scope_id = 0; + + *addr_len = sizeof (*v6_addr); + } + + if (!nh) { + *next_hop = NULL; + return (af); + } + + if (af == AF_INET) { + ipaddr_t *nh_addr; + + nh_addr = malloc(sizeof (ipaddr_t)); + IN6_V4MAPPED_TO_IPADDR(¶m->srv_addr, *nh_addr); + *next_hop = nh_addr; + } else { + struct sockaddr_in6 *nh_addr; + + nh_addr = malloc(sizeof (*nh_addr)); + nh_addr->sin6_family = AF_INET6; + nh_addr->sin6_addr = param->srv_addr; + nh_addr->sin6_flowinfo = 0; + nh_addr->sin6_scope_id = 0; + *next_hop = nh_addr; + } + + return (af); +} + +/* + * Use TCP to check if the peer server is alive. Create a TCP socket and + * then call connect() to reach the peer server. If connect() does not + * return within the timeout period, the SIGALRM handler will be invoked + * and tell ilbd that the peer server is not alive. + */ +static int +tcp_query(probe_param_t *param) +{ + int ret; + int sd, af; + struct sockaddr_storage dst_addr; + socklen_t dst_addr_len; + void *next_hop; + hrtime_t start, end; + uint32_t rtt; + + ret = 0; + next_hop = NULL; + + af = set_sockaddr(&dst_addr, &dst_addr_len, &next_hop, param); + + if ((sd = socket(af, SOCK_STREAM, param->proto)) == -1) + return (-1); + + /* DSR mode, need to set the next hop */ + if (next_hop != NULL) { + if (af == AF_INET) { + if (setsockopt(sd, IPPROTO_IP, IP_NEXTHOP, next_hop, + sizeof (ipaddr_t)) < 0) { + ret = -1; + goto out; + } + } else { + if (setsockopt(sd, IPPROTO_IPV6, IPV6_NEXTHOP, + next_hop, sizeof (struct sockaddr_in6)) < 0) { + ret = -1; + goto out; + } + } + } + + timeout_is_good = B_FALSE; + (void) alarm(param->timeout); + start = gethrtime(); + if (connect(sd, (struct sockaddr *)&dst_addr, dst_addr_len) != 0) { + ret = -1; + goto out; + } + end = gethrtime(); + + rtt = (end - start) / (NANOSEC / MICROSEC); + if (rtt == 0) + rtt = 1; + (void) printf("%u", rtt); + +out: + (void) close(sd); + return (ret); +} + +/* + * Check if the ICMP packet is a port unreachable message in respnsed to + * our probe. Return -1 if no, 0 if yes. + */ +static int +check_icmp_unreach_v4(struct icmp *icmph, probe_param_t *param) +{ + struct udphdr *udph; + struct ip *iph; + + if (icmph->icmp_type != ICMP_UNREACH) + return (-1); + if (icmph->icmp_code != ICMP_UNREACH_PORT) + return (-1); + + /* LINTED E_BAD_PTR_CAST_ALIGN */ + iph = (struct ip *)((char *)icmph + ICMP_MINLEN); + if (iph->ip_p != IPPROTO_UDP) + return (-1); + + /* LINTED E_BAD_PTR_CAST_ALIGN */ + udph = (struct udphdr *)((char *)iph + (iph->ip_hl << 2)); + if (udph->uh_dport != htons(param->port)) + return (-1); + if (udph->uh_sport != param->lport) + return (-1); + + /* All matched, it is a response to the probe we sent. */ + return (0); +} + +/* + * Check if the ICMP packet is a reply to our echo request. Need to match + * the ID and sequence. + */ +static int +check_icmp_echo_v4(struct icmp *icmph, probe_param_t *param) +{ + uint32_t cookie; + in_port_t port; + + if (icmph->icmp_type != ICMP_ECHOREPLY) + return (-1); + if (icmph->icmp_id != param->echo_id) + return (-1); + if (icmph->icmp_seq != param->echo_seq) + return (-1); + + bcopy(icmph->icmp_data, &cookie, sizeof (cookie)); + if (cookie != param->echo_cookie) + return (-1); + bcopy(icmph->icmp_data + sizeof (cookie), &port, sizeof (port)); + if (port != param->port) + return (-1); + + /* All matched, it is a response to the echo we sent. */ + return (0); +} + +/* Verify if an ICMP packet is what we expect. */ +static int +check_icmp_v4(char *buf, ssize_t rcvd, probe_param_t *param) +{ + struct ip *iph; + struct icmp *icmph; + + /* + * We can dereference the length field without worry since the stack + * should not have sent up the packet if it is smaller than a normal + * ICMPv4 packet. + */ + /* LINTED E_BAD_PTR_CAST_ALIGN */ + iph = (struct ip *)buf; + /* LINTED E_BAD_PTR_CAST_ALIGN */ + icmph = (struct icmp *)((char *)iph + (iph->ip_hl << 2)); + + /* + * If we sent an UDP probe, check if the packet is a port + * unreachable message in response to our probe. + * + * If we sent an ICMP echo request, check if the packet is a reply + * to our echo request. + */ + if (param->probe == udp_probe) { + /* Is the packet large enough for further checking? */ + if (rcvd < 2 * sizeof (struct ip) + ICMP_MINLEN + + sizeof (struct udphdr)) { + return (-1); + } + return (check_icmp_unreach_v4(icmph, param)); + } else { + if (rcvd < sizeof (struct ip) + ICMP_MINLEN) + return (-1); + return (check_icmp_echo_v4(icmph, param)); + } +} + +/* + * Check if the ICMPv6 packet is a port unreachable message in respnsed to + * our probe. Return -1 if no, 0 if yes. + */ +static int +check_icmp_unreach_v6(icmp6_t *icmp6h, probe_param_t *param) +{ + ip6_t *ip6h; + struct udphdr *udph; + + if (icmp6h->icmp6_type != ICMP6_DST_UNREACH) + return (-1); + if (icmp6h->icmp6_code != ICMP6_DST_UNREACH_NOPORT) + return (-1); + + /* LINTED E_BAD_PTR_CAST_ALIGN */ + ip6h = (ip6_t *)((char *)icmp6h + ICMP6_MINLEN); + if (ip6h->ip6_nxt != IPPROTO_UDP) + return (-1); + + udph = (struct udphdr *)(ip6h + 1); + + if (udph->uh_dport != htons(param->port)) + return (-1); + if (udph->uh_sport != param->lport) + return (-1); + + return (0); +} + +/* + * Check if the ICMPv6 packet is a reply to our echo request. Need to match + * the ID and sequence. + */ +static int +check_icmp_echo_v6(icmp6_t *icmp6h, probe_param_t *param) +{ + char *tmp; + uint32_t cookie; + in_port_t port; + + if (icmp6h->icmp6_type != ICMP6_ECHO_REPLY) + return (-1); + if (icmp6h->icmp6_id != param->echo_id) + return (-1); + if (icmp6h->icmp6_seq != param->echo_seq) + return (-1); + tmp = (char *)icmp6h + ICMP6_MINLEN; + bcopy(tmp, &cookie, sizeof (cookie)); + if (cookie != param->echo_cookie) + return (-1); + tmp += sizeof (cookie); + bcopy(tmp, &port, sizeof (port)); + if (port != param->port) + return (-1); + + /* All matched, it is a response to the echo we sent. */ + return (0); +} + +/* Verify if an ICMPv6 packet is what we expect. */ +static int +check_icmp_v6(char *buf, ssize_t rcvd, probe_param_t *param) +{ + icmp6_t *icmp6h; + + /* LINTED E_BAD_PTR_CAST_ALIGN */ + icmp6h = (icmp6_t *)(buf); + + /* + * If we sent an UDP probe, check if the packet is a port + * unreachable message. + * + * If we sent an ICMPv6 echo request, check if the packet is a reply. + */ + if (param->probe == udp_probe) { + /* Is the packet large enough for further checking? */ + if (rcvd < sizeof (ip6_t) + ICMP6_MINLEN + + sizeof (struct udphdr)) { + return (-1); + } + return (check_icmp_unreach_v6(icmp6h, param)); + } else { + if (rcvd < ICMP6_MINLEN) + return (-1); + return (check_icmp_echo_v6(icmp6h, param)); + } +} + +/* + * Wait for an ICMP reply indefinitely. If we get what we expect, return 0. + * If an error happnes, return -1. + */ +static int +wait_icmp_reply(int af, int recv_sd, struct sockaddr_storage *exp_from, + probe_param_t *param) +{ + char buf[RECV_PKT_SZ]; + socklen_t from_len; + ssize_t rcvd; + int ret; + + for (;;) { + if (af == AF_INET) { + struct sockaddr_in v4_from; + + from_len = sizeof (v4_from); + if ((rcvd = recvfrom(recv_sd, buf, RECV_PKT_SZ, 0, + (struct sockaddr *)&v4_from, &from_len)) < 0) { + ret = -1; + break; + } + + /* Packet not from our peer, ignore it. */ + if ((((struct sockaddr_in *)exp_from)->sin_addr.s_addr) + != v4_from.sin_addr.s_addr) { + continue; + } + if (check_icmp_v4(buf, rcvd, param) == 0) { + ret = 0; + break; + } + } else { + struct sockaddr_in6 v6_from; + + from_len = sizeof (struct sockaddr_in6); + if ((rcvd = recvfrom(recv_sd, buf, RECV_PKT_SZ, 0, + (struct sockaddr *)&v6_from, &from_len)) < 0) { + ret = -1; + break; + } + + if (!IN6_ARE_ADDR_EQUAL(&(v6_from.sin6_addr), + &((struct sockaddr_in6 *)exp_from)->sin6_addr)) { + continue; + } + if (check_icmp_v6(buf, rcvd, param) == 0) { + ret = 0; + break; + } + } + } + return (ret); +} + +/* Return the local port used (network byte order) in a socket. */ +static int +get_lport(int sd, in_port_t *lport) +{ + struct sockaddr_storage addr; + socklen_t addr_sz; + + addr_sz = sizeof (addr); + if (getsockname(sd, (struct sockaddr *)&addr, &addr_sz) != 0) + return (-1); + if (addr.ss_family == AF_INET) + *lport = ((struct sockaddr_in *)&addr)->sin_port; + else + *lport = ((struct sockaddr_in6 *)&addr)->sin6_port; + return (0); +} + +/* + * Use UDP to check if the peer server is alive. Send a 0 length UDP packet + * to the peer server. If there is no one listening, the peer IP stack + * should send back a port unreachable ICMP(v4/v6) packet. If the peer + * server is alive, there should be no response. So if we get SIGALRM, + * the peer is alive. + */ +static int +udp_query(probe_param_t *param) +{ + int ret; + int send_sd, recv_sd, af; + struct sockaddr_storage dst_addr; + socklen_t addr_len; + void *next_hop; + char buf[1]; + struct itimerval timeout; + uint64_t tm; + + ret = 0; + next_hop = NULL; + + af = set_sockaddr(&dst_addr, &addr_len, &next_hop, param); + + if ((send_sd = socket(af, SOCK_DGRAM, param->proto)) == -1) + return (-1); + if ((recv_sd = socket(af, SOCK_RAW, (af == AF_INET) ? IPPROTO_ICMP : + IPPROTO_ICMPV6)) == -1) { + return (-1); + } + + /* DSR mode, need to set the next hop */ + if (next_hop != NULL) { + if (af == AF_INET) { + if (setsockopt(send_sd, IPPROTO_IP, IP_NEXTHOP, + next_hop, sizeof (ipaddr_t)) < 0) { + ret = -1; + goto out; + } + } else { + if (setsockopt(send_sd, IPPROTO_IPV6, IPV6_NEXTHOP, + next_hop, sizeof (struct sockaddr_in6)) < 0) { + ret = -1; + goto out; + } + } + } + + /* + * If ilbd asks us to wait at most t, we will wait for at most + * t', which is 3/4 of t. If we wait for too long, ilbd may + * timeout and kill us. + */ + timeout.it_interval.tv_sec = 0; + timeout.it_interval.tv_usec = 0; + tm = (param->timeout * MICROSEC >> 2) * 3; + if (tm > MICROSEC) { + timeout.it_value.tv_sec = tm / MICROSEC; + timeout.it_value.tv_usec = tm - (timeout.it_value.tv_sec * + MICROSEC); + } else { + timeout.it_value.tv_sec = 0; + timeout.it_value.tv_usec = tm; + } + timeout_is_good = B_TRUE; + if (setitimer(ITIMER_REAL, &timeout, NULL) != 0) { + ret = -1; + goto out; + } + + if (sendto(send_sd, buf, 0, 0, (struct sockaddr *)&dst_addr, + addr_len) != 0) { + ret = -1; + goto out; + } + if ((ret = get_lport(send_sd, ¶m->lport)) != 0) + goto out; + + /* + * If the server app is listening, we should not get back a + * response. So if wait_icmp_reply() returns, either there + * is an error or we get back something. + */ + (void) wait_icmp_reply(af, recv_sd, &dst_addr, param); + ret = -1; + +out: + (void) close(send_sd); + (void) close(recv_sd); + return (ret); +} + +/* + * Size (in uint32_t) of the ping packet to be sent to server. It includes + * a cookie (random number) + the target port. The cookie and port are used + * for matching ping request since there can be many such ping packets sent + * to different servers from the same source address and using the same VIP. + * The last two bytes are for padding. + * + */ +#define PING_PKT_LEN \ + ((ICMP_MINLEN + 2 * sizeof (uint32_t)) / sizeof (uint32_t)) + +/* + * Try to get a random number from the pseudo random number device + * /dev/urandom. If there is any error, return (uint32_t)gethrtime() + * as a back up. + */ +static uint32_t +get_random(void) +{ + int fd; + uint32_t num; + + if ((fd = open("/dev/urandom", O_RDONLY)) == -1) + return ((uint32_t)gethrtime()); + + if (read(fd, &num, sizeof (num)) != sizeof (num)) + num = ((uint32_t)gethrtime()); + + (void) close(fd); + return (num); +} + +/* + * Use ICMP(v4/v6) echo request to check if the peer server machine is + * reachable. Send a echo request and expect to get back a echo reply. + */ +static int +ping_query(probe_param_t *param) +{ + int ret; + int sd, af; + struct sockaddr_storage dst_addr; + socklen_t dst_addr_len; + void *next_hop; + hrtime_t start, end; + uint32_t rtt; + uint32_t buf[PING_PKT_LEN]; + struct icmp *icmph; + + ret = 0; + next_hop = NULL; + + af = set_sockaddr(&dst_addr, &dst_addr_len, &next_hop, param); + + if ((sd = socket(af, SOCK_RAW, (af == AF_INET) ? IPPROTO_ICMP : + IPPROTO_ICMPV6)) == -1) { + return (-1); + } + + /* DSR mode, need to set the next hop */ + if (next_hop != NULL) { + if (af == AF_INET) { + if (setsockopt(sd, IPPROTO_IP, IP_NEXTHOP, next_hop, + sizeof (ipaddr_t)) < 0) { + ret = -1; + goto out; + } + } else { + if (setsockopt(sd, IPPROTO_IPV6, IPV6_NEXTHOP, + next_hop, sizeof (struct sockaddr_in6)) < 0) { + ret = -1; + goto out; + } + } + } + + bzero(buf, sizeof (buf)); + icmph = (struct icmp *)buf; + icmph->icmp_type = af == AF_INET ? ICMP_ECHO : ICMP6_ECHO_REQUEST; + icmph->icmp_code = 0; + icmph->icmp_cksum = 0; + icmph->icmp_id = htons(gethrtime() % USHRT_MAX); + icmph->icmp_seq = htons(gethrtime() % USHRT_MAX); + + param->echo_cookie = get_random(); + bcopy(¶m->echo_cookie, icmph->icmp_data, + sizeof (param->echo_cookie)); + bcopy(¶m->port, icmph->icmp_data + sizeof (param->echo_cookie), + sizeof (param->port)); + icmph->icmp_cksum = in_cksum((ushort_t *)buf, sizeof (buf)); + param->echo_id = icmph->icmp_id; + param->echo_seq = icmph->icmp_seq; + + timeout_is_good = B_FALSE; + (void) alarm(param->timeout); + start = gethrtime(); + if (sendto(sd, buf, sizeof (buf), 0, (struct sockaddr *)&dst_addr, + dst_addr_len) != sizeof (buf)) { + ret = -1; + goto out; + } + if (wait_icmp_reply(af, sd, &dst_addr, param) != 0) { + ret = -1; + goto out; + } + end = gethrtime(); + + rtt = (end - start) / (NANOSEC / MICROSEC); + if (rtt == 0) + rtt = 1; + (void) printf("%u", rtt); + +out: + (void) close(sd); + return (ret); +} + +int +main(int argc, char *argv[]) +{ + probe_param_t param; + int ret; + + /* ilbd should pass in PROG_ARGC parameters. */ + if (argc != PROG_ARGC) { + (void) printf("-1"); + return (-1); + } + + if (signal(SIGALRM, probe_exit) == SIG_ERR) { + (void) printf("-1"); + return (-1); + } + + if (!parse_probe_param(argv, ¶m)) { + (void) printf("-1"); + return (-1); + } + + switch (param.probe) { + case ping_probe: + ret = ping_query(¶m); + break; + case tcp_probe: + ret = tcp_query(¶m); + break; + case udp_probe: + ret = udp_query(¶m); + break; + } + + if (ret == -1) + (void) printf("-1"); + + return (ret); +} diff --git a/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd.h b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd.h new file mode 100644 index 0000000000..eda185efab --- /dev/null +++ b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd.h @@ -0,0 +1,435 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +#ifndef _ILBD_H +#define _ILBD_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdio.h> +#include <ucred.h> +#include <pwd.h> +#include <priv.h> +#include <stdarg.h> +#include <syslog.h> +#include <sys/list.h> +#include <libscf.h> +#include <libintl.h> +#include <locale.h> +#include <libinetutil.h> +#include <auth_list.h> +#include <bsm/adt.h> +#include <bsm/adt_event.h> + +#define SGNAME_SZ 80 +#define ILB_FMRI "svc:/network/loadbalancer/ilb:default" + +#define HC_ACTION ILB_SRV_DISABLED_HC +#define ADMIN_ACTION ILB_SRV_DISABLED_ADMIN + +/* Max name and value length for scf properties */ +#define ILBD_MAX_NAME_LEN ilbd_scf_limit(SCF_LIMIT_MAX_NAME_LENGTH) +#define ILBD_MAX_VALUE_LEN ilbd_scf_limit(SCF_LIMIT_MAX_VALUE_LENGTH) + +/* Different events ILBD is interested in. */ +typedef enum { + ILBD_EVENT_NEW_REQ, /* New client request */ + ILBD_EVENT_REQ, /* Client request comes in */ + ILBD_EVENT_REP_OK, /* Reply channel to client is writeable */ + ILBD_EVENT_PROBE, /* A HC returns some result */ + ILBD_EVENT_TIMER /* ilbd_timer_q fired */ +} ilbd_event_t; + +typedef enum { + ILBD_SCF_RULE, /* prop group for rules */ + ILBD_SCF_SG, /* prop group for servergroups */ + ILBD_SCF_HC /* prop group for healthchecks */ +} ilbd_scf_pg_type_t; + +typedef enum { + ILBD_SCF_CREATE, + ILBD_SCF_DESTROY, + ILBD_SCF_ENABLE_DISABLE +} ilbd_scf_cmd_t; + +typedef enum { + ILBD_STRING, /* string */ + ILBD_INT, /* int */ + ILBD_ADDR_V4, /* ipv4 addr */ + ILBD_ADDR_V6 /* ipv6 addr */ +} ilbd_scf_data_type_t; + +typedef enum { + stat_enable_server, + stat_disable_server, + stat_declare_srv_dead, + stat_declare_srv_alive +} ilbd_srv_status_ind_t; + +/* + * All user struct pointer passed to port_associate() should have the first + * field as ilbd_event_t. The following struct can be used to find the + * event. + */ +typedef struct { + ilbd_event_t ev; +} ilbd_event_obj_t; + +typedef struct { + ilbd_event_t ev; + timer_t timerid; +} ilbd_timer_event_obj_t; + +typedef struct ilbd_srv { + list_node_t isv_srv_link; + ilb_sg_srv_t isv_srv; +#define isv_addr isv_srv.sgs_addr +#define isv_minport isv_srv.sgs_minport +#define isv_maxport isv_srv.sgs_maxport +#define isv_flags isv_srv.sgs_flags +#define isv_id isv_srv.sgs_id +#define isv_srvID isv_srv.sgs_srvID +} ilbd_srv_t; + +#define MAX_SRVCOUNT 1000 +#define MAX_SRVID (MAX_SRVCOUNT - 1) +#define BAD_SRVID (-1) + +typedef struct ilbd_sg { + list_t isg_srvlist; /* list of ilbd_srv_t */ + char isg_name[ILB_SGNAME_SZ]; + int32_t isg_srvcount; + int32_t isg_max_id; + list_t isg_rulelist; /* list of ilbd_rule_t */ + char isg_id_arr[MAX_SRVCOUNT]; /* for server ID allocation */ + + list_node_t isg_link; /* linkage for sg list */ +} ilbd_sg_t; + +typedef struct ilbd_rule { + list_node_t irl_link; + list_node_t irl_sglink; + ilbd_sg_t *irl_sg; + ilb_rule_info_t irl_info; +#define irl_flags irl_info.rl_flags +#define irl_name irl_info.rl_name +#define irl_vip irl_info.rl_vip +#define irl_proto irl_info.rl_proto +#define irl_ipversion irl_info.rl_ipversion +#define irl_minport irl_info.rl_minport +#define irl_maxport irl_info.rl_maxport +#define irl_algo irl_info.rl_algo +#define irl_topo irl_info.rl_topo +#define irl_nat_src_start irl_info.rl_nat_src_start +#define irl_nat_src_end irl_info.rl_nat_src_end +#define irl_stickymask irl_info.rl_stickymask +#define irl_conndrain irl_info.rl_conndrain +#define irl_nat_timeout irl_info.rl_nat_timeout +#define irl_sticky_timeout irl_info.rl_sticky_timeout +#define irl_hcport irl_info.rl_hcport +#define irl_hcpflag irl_info.rl_hcpflag +#define irl_sgname irl_info.rl_sgname +#define irl_hcname irl_info.rl_hcname +} ilbd_rule_t; + +/* + * Health check related definitions + */ + +/* Default health check probe program provided */ +#define ILB_PROBE_PROTO "/usr/lib/inet/ilb/ilb_probe" + +/* Command name (argv[0]) passed to ilb_probe to indicate a ping test */ +#define ILB_PROBE_PING "ilb_ping" + +/* Use the first character of the rule's hcname to decide if rule has HC. */ +#define RULE_HAS_HC(irl) ((irl)->irl_info.rl_hcname[0] != '\0') + +/* Type of probe test */ +typedef enum { + ILBD_HC_PING = 1, /* ICMP Echo probe */ + ILBD_HC_TCP, /* TCP connect probe */ + ILBD_HC_UDP, /* UDP packet probe */ + ILBD_HC_USER /* User supplied probe */ +} ilbd_hc_test_t; + +/* Struct representing a hc object in ilbd */ +typedef struct { + list_node_t ihc_link; /* List linkage */ + + ilb_hc_info_t ihc_info; +/* Short hand for the fields inside ilb_hc_info_t */ +#define ihc_name ihc_info.hci_name +#define ihc_test ihc_info.hci_test +#define ihc_timeout ihc_info.hci_timeout +#define ihc_count ihc_info.hci_count +#define ihc_interval ihc_info.hci_interval +#define ihc_def_ping ihc_info.hci_def_ping + + ilbd_hc_test_t ihc_test_type; /* Type of probe test */ + int ihc_rule_cnt; /* Num of rules associated with hc */ + list_t ihc_rules; /* Rules associated with this hc */ +} ilbd_hc_t; + +struct ilbd_hc_srv_s; + +/* + * Struct representing a hc rule object + * + * hcr_link: list linkage + * hcr_rule: pointer to the ilbd rule object + * hcr_servers: list of servers of this rule + */ +typedef struct { + list_node_t hcr_link; + ilbd_rule_t const *hcr_rule; + list_t hcr_servers; +} ilbd_hc_rule_t; + +struct ilbd_hc_srv_s; + +/* + * Struct representing a event of the probe process + * + * ihp_ev: the event type, which is ILBD_EVENT_PROBE + * ihp_srv: pointer to the hc server object + * ihp_pid: pid of the probe process + * ihp_done: is ilbd done reading the output of the probe process + */ +typedef struct { + ilbd_event_t ihp_ev; + struct ilbd_hc_srv_s *ihp_srv; + pid_t ihp_pid; + boolean_t ihp_done; +} ilbd_hc_probe_event_t; + +/* + * ilbd_hc_srv_t state + * + * ihd_hc_def_pinging: the default ping should be run + * ihd-hc_probing: the probe process should be started + */ +enum ilbd_hc_state { + ilbd_hc_def_pinging, + ilbd_hc_probing +}; + +/* + * Struct representing a server associated with a hc object + * + * shc_srv_link: list linkage + * shc_hc: pointer to the hc object + * shc_hc_rule: pointer to the hc rule object + * shc_sg_srv: pointer to the server group object + * shc_tid: timeout ID + * shc_cur_cnt: number of times the hc probe has been run + * shc_fail_cnt: number of consecutive probe failure + * shc_status: health status + * shc_rtt: rtt (in micro sec) to the backend server + * shc_lasttimer: last time a probe sequence is executed + * shc_nexttime: next time a probe sequence is executed + * shc_state: hc probe state + * shc_child_pid: pid of the probe process + * shc_child_fd: fd to the output of the probe process + * shc_ev: event object of the probe process + * shc_ev_port: event port of the event object + */ +typedef struct ilbd_hc_srv_s { + list_node_t shc_srv_link; + ilbd_hc_t *shc_hc; + ilbd_hc_rule_t *shc_hc_rule; + ilb_sg_srv_t const *shc_sg_srv; + + iu_timer_id_t shc_tid; + uint_t shc_cur_cnt; + uint_t shc_fail_cnt; + ilb_hc_srv_status_t shc_status; + uint32_t shc_rtt; + time_t shc_lasttime; + time_t shc_nexttime; + + enum ilbd_hc_state shc_state; + pid_t shc_child_pid; + int shc_child_fd; + ilbd_hc_probe_event_t *shc_ev; + int shc_ev_port; +} ilbd_hc_srv_t; + +/* + * Structure for holding audit server and servergroup event + * data. Not all events use all members of the structure. + */ +typedef struct audit_sg_event_data { + char *ed_server_address; /* server's IP address */ + char *ed_serverid; /* serverid. */ + uint16_t ed_minport; /* server's minport */ + uint16_t ed_maxport; /* server's maxport */ + char *ed_sgroup; /* servergroup */ +} audit_sg_event_data_t; + +/* Struct to store client info */ +typedef struct { + ilbd_event_t cli_ev; + int cli_sd; + struct passwd cli_pw; + size_t cli_pw_bufsz; + char *cli_pw_buf; + ilbd_cmd_t cli_cmd; + ilb_comm_t *cli_saved_reply; + size_t cli_saved_size; + ucred_t *cli_peer_ucredp; /* needed for auditing */ +} ilbd_client_t; + +void ilbd_reply_ok(uint32_t *, size_t *); +void ilbd_reply_err(uint32_t *, size_t *, ilb_status_t); + +ilb_status_t ilbd_check_client_config_auth(const struct passwd *); +ilb_status_t ilbd_check_client_enable_auth(const struct passwd *); +ilb_status_t ilbd_retrieve_names(ilbd_cmd_t, uint32_t *, size_t *); +void i_setup_sg_hlist(void); +void i_setup_rule_hlist(void); +void logperror(const char *); +ilb_status_t ilbd_add_server_to_group(ilb_sg_info_t *, int, + const struct passwd *, ucred_t *); +ilb_status_t ilbd_rem_server_from_group(ilb_sg_info_t *, int, + const struct passwd *, ucred_t *); +ilb_status_t ilbd_create_sg(ilb_sg_info_t *, int, + const struct passwd *, ucred_t *); + +ilb_status_t ilbd_destroy_sg(const char *, const struct passwd *, + ucred_t *); +ilb_status_t ilbd_retrieve_sg_hosts(const char *, uint32_t *, size_t *); + +ilb_status_t ilbd_enable_server(ilb_sg_info_t *, const struct passwd *, + ucred_t *); +ilb_status_t ilbd_disable_server(ilb_sg_info_t *, const struct passwd *, + ucred_t *); +ilb_status_t ilbd_k_Xable_server(const struct in6_addr *, const char *, + ilbd_srv_status_ind_t); + +ilb_status_t i_add_srv2krules(list_t *, ilb_sg_srv_t *, int); +ilb_status_t i_rem_srv_frm_krules(list_t *, ilb_sg_srv_t *, int); +int ilbd_get_num_krules(void); +ilb_status_t ilbd_get_krule_names(ilbd_namelist_t **, int); +ilb_status_t ilb_get_krule_servers(ilb_sg_info_t *); +ilbd_sg_t *i_find_sg_byname(const char *); +ilb_status_t i_check_srv2rules(list_t *, ilb_sg_srv_t *); + +ilb_status_t ilbd_address_to_srvID(ilb_sg_info_t *, uint32_t *, size_t *); +ilb_status_t ilbd_srvID_to_address(ilb_sg_info_t *, uint32_t *, size_t *); + +ilb_status_t do_ioctl(void *, ssize_t); + +ilb_status_t ilbd_create_rule(ilb_rule_info_t *, int, const struct passwd *, + ucred_t *); +ilb_status_t ilbd_retrieve_rule(ilbd_name_t, uint32_t *, size_t *); + +ilb_status_t ilbd_destroy_rule(ilbd_name_t, const struct passwd *, + ucred_t *); +ilb_status_t ilbd_enable_rule(ilbd_name_t, const struct passwd *, ucred_t *); +ilb_status_t ilbd_disable_rule(ilbd_name_t, const struct passwd *, + ucred_t *); + +boolean_t is_debugging_on(void); +ilb_status_t ilbd_sg_check_rule_port(ilbd_sg_t *, ilb_rule_info_t *); + +void ilbd_enable_debug(void); +ilb_status_t ilb_map_errno2ilbstat(int); + +ilb_status_t i_attach_rule2sg(ilbd_sg_t *, ilbd_rule_t *); + +/* Logging routine and macros */ +void ilbd_log(int, const char *, ...); +#define logerr(...) ilbd_log(LOG_ERR, __VA_ARGS__) +#define logdebug(...) ilbd_log(LOG_DEBUG, __VA_ARGS__) + +/* Health check manipulation routines */ +void i_ilbd_setup_hc_list(void); +ilb_status_t ilbd_create_hc(const ilb_hc_info_t *, int, + const struct passwd *, ucred_t *); +ilb_status_t ilbd_destroy_hc(const char *, const struct passwd *, ucred_t *); +ilbd_hc_t *ilbd_get_hc(const char *); +ilb_status_t ilbd_get_hc_info(const char *, uint32_t *, size_t *); +ilb_status_t ilbd_get_hc_srvs(const char *, uint32_t *, size_t *); +ilb_status_t ilbd_hc_associate_rule(const ilbd_rule_t *, int); +ilb_status_t ilbd_hc_dissociate_rule(const ilbd_rule_t *); +ilb_status_t ilbd_hc_add_server(const ilbd_rule_t *, const ilb_sg_srv_t *, + int); +ilb_status_t ilbd_hc_del_server(const ilbd_rule_t *, const ilb_sg_srv_t *); +ilb_status_t ilbd_hc_enable_rule(const ilbd_rule_t *); +ilb_status_t ilbd_hc_disable_rule(const ilbd_rule_t *); +ilb_status_t ilbd_hc_enable_server(const ilbd_rule_t *, + const ilb_sg_srv_t *); +ilb_status_t ilbd_hc_disable_server(const ilbd_rule_t *, + const ilb_sg_srv_t *); + +/* Health check timer routines */ +void ilbd_hc_probe_return(int, int, int, ilbd_hc_probe_event_t *); +void ilbd_hc_timer_init(int, ilbd_timer_event_obj_t *); +void ilbd_hc_timeout(void); +void ilbd_hc_timer_update(ilbd_timer_event_obj_t *); + +/* Show NAT info routines */ +ilb_status_t ilbd_show_nat(void *, const ilb_comm_t *, uint32_t *, + size_t *); +void ilbd_show_nat_cleanup(void); + + +/* Show sticky info routines */ +ilb_status_t ilbd_show_sticky(void *, const ilb_comm_t *, uint32_t *, + size_t *); +void ilbd_show_sticky_cleanup(void); + +ilb_status_t ilbd_create_pg(ilbd_scf_pg_type_t, void *); +ilb_status_t ilbd_destroy_pg(ilbd_scf_pg_type_t, const char *); +ilb_status_t ilbd_change_prop(ilbd_scf_pg_type_t, const char *, + const char *, void *); +void ilbd_scf_str_to_ip(int, char *, struct in6_addr *); +ilb_status_t ilbd_scf_ip_to_str(uint16_t, struct in6_addr *, scf_type_t *, + char *); +ilb_status_t ilbd_scf_add_srv(ilbd_sg_t *, ilbd_srv_t *); +ilb_status_t ilbd_scf_del_srv(ilbd_sg_t *, ilbd_srv_t *); +int ilbd_scf_limit(int); + +ilb_status_t ilbd_walk_rule_pgs(ilb_status_t (*)(ilb_rule_info_t *, int, + const struct passwd *, ucred_t *), void *, void *); +ilb_status_t ilbd_walk_sg_pgs(ilb_status_t (*)(ilb_sg_info_t *, int, + const struct passwd *, ucred_t *), void *, void *); +ilb_status_t ilbd_walk_hc_pgs(ilb_status_t (*)(const ilb_hc_info_t *, int, + const struct passwd *, ucred_t *), void *, void *); +void ilbd_addr2str(struct in6_addr *, char *, size_t); +void addr2str(ilb_ip_addr_t, char *, size_t); +void ilbd_algo_to_str(ilb_algo_t, char *); +void ilbd_topo_to_str(ilb_topo_t, char *); +void ilbd_ip_to_str(uint16_t, struct in6_addr *, char *); +int ilberror2auditerror(ilb_status_t); + +#ifdef __cplusplus +} +#endif + +#endif /* _ILBD_H */ diff --git a/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd.xml b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd.xml new file mode 100644 index 0000000000..567fb9439d --- /dev/null +++ b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd.xml @@ -0,0 +1,115 @@ +<?xml version="1.0"?> +<!DOCTYPE service_bundle SYSTEM "/usr/share/lib/xml/dtd/service_bundle.dtd.1"> +<!-- + Copyright 2009 Sun Microsystems, Inc. All rights reserved. + Use is subject to license terms. + + CDDL HEADER START + + The contents of this file are subject to the terms of the + Common Development and Distribution License (the "License"). + You may not use this file except in compliance with the License. + + You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + or http://www.opensolaris.org/os/licensing. + See the License for the specific language governing permissions + and limitations under the License. + + When distributing Covered Code, include this CDDL HEADER in each + file and include the License file at usr/src/OPENSOLARIS.LICENSE. + If applicable, add the following below this CDDL HEADER, with the + fields enclosed by brackets "[]" replaced with your own identifying + information: Portions Copyright [yyyy] [name of copyright owner] + + CDDL HEADER END + + NOTE: This service manifest is not editable; its contents will + be overwritten by package or patch operations, including + operating system upgrade. Make customizations in a different + file. + + Service manifest for Integrated Load balancer(ILB). +--> + +<service_bundle type='manifest' name='SUNWilbr:ilb'> + +<service + name='network/loadbalancer/ilb' + type='service' + version='1'> + + <create_default_instance enabled='false' /> + + <single_instance /> + + <!-- + Ensure that name services is enabled before ILB service begins. + --> + <dependency + name='name-services' + grouping='require_all' + restart_on='error' + type='service'> + <service_fmri value='svc:/milestone/name-services' /> + </dependency> + + + <!-- + Ensure that forwarding service is enabled before load balancing + service begins. The forwarding service is dependent on + the filesystem and devices services. + --> + <dependency + name='network' + grouping='require_any' + restart_on='error' + type='service'> + <service_fmri value='svc:/network/ipv4-forwarding' /> + <service_fmri value='svc:/network/ipv6-forwarding' /> + </dependency> + + <exec_method + type='method' + name='stop' + exec=':kill' + timeout_seconds='60' > + </exec_method> + + <exec_method + type='method' + name='start' + exec='/usr/lib/inet/ilbd' + timeout_seconds='60' > + <method_context> + <method_credential user='root' group='root' /> + </method_context> + </exec_method> + + <!-- to start stop routing services --> + <property_group name='general' type='framework'> + <propval name='action_authorization' type='astring' + value='solaris.smf.manage.ilb' /> + <propval name='value_authorization' type='astring' + value='solaris.smf.manage.ilb' /> + </property_group> + + + <stability value='Unstable' /> + + <template> + + <common_name> + <loctext xml:lang='C'> Integrated layer 3/4 load balancer + </loctext> + </common_name> + <documentation> + <manpage title='ilbd' section='1M' + manpath='/usr/share/man' /> + <manpage title='ilbadm' section='1M' + manpath='/usr/share/man' /> + </documentation> + + </template> +</service> + +</service_bundle> diff --git a/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_hc.c b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_hc.c new file mode 100644 index 0000000000..da1ab63efe --- /dev/null +++ b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_hc.c @@ -0,0 +1,1574 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/list.h> +#include <sys/stropts.h> +#include <sys/siginfo.h> +#include <sys/wait.h> +#include <arpa/inet.h> +#include <netinet/in.h> +#include <stdlib.h> +#include <stdio.h> +#include <strings.h> +#include <stddef.h> +#include <unistd.h> +#include <libilb.h> +#include <port.h> +#include <time.h> +#include <signal.h> +#include <assert.h> +#include <errno.h> +#include <spawn.h> +#include <fcntl.h> +#include <limits.h> +#include "libilb_impl.h" +#include "ilbd.h" + +/* Global list of HC objects */ +list_t ilbd_hc_list; + +/* Timer queue for all hc related timers. */ +static iu_tq_t *ilbd_hc_timer_q; + +/* Indicate whether the timer needs to be updated */ +static boolean_t hc_timer_restarted; + +static void ilbd_hc_probe_timer(iu_tq_t *, void *); +static ilb_status_t ilbd_hc_restart_timer(ilbd_hc_t *, ilbd_hc_srv_t *); +static boolean_t ilbd_run_probe(ilbd_hc_srv_t *); + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +/* + * Number of arguments passed to a probe. argc[0] is the path name of + * the probe. + */ +#define HC_PROBE_ARGC 8 + +/* + * Max number of characters to be read from the output of a probe. It + * is long enough to read in a 64 bit integer. + */ +#define HC_MAX_PROBE_OUTPUT 24 + +void +i_ilbd_setup_hc_list(void) +{ + list_create(&ilbd_hc_list, sizeof (ilbd_hc_t), + offsetof(ilbd_hc_t, ihc_link)); +} + +/* + * Given a hc object name, return a pointer to hc object if found. + */ +ilbd_hc_t * +ilbd_get_hc(const char *name) +{ + ilbd_hc_t *hc; + + for (hc = list_head(&ilbd_hc_list); hc != NULL; + hc = list_next(&ilbd_hc_list, hc)) { + if (strcasecmp(hc->ihc_name, name) == 0) + return (hc); + } + return (NULL); +} + +/* + * Generates an audit record for create-healthcheck, + * delete-healtcheck subcommands. + */ +static void +ilbd_audit_hc_event(const char *audit_hcname, + const ilb_hc_info_t *audit_hcinfo, ilbd_cmd_t cmd, + ilb_status_t rc, ucred_t *ucredp) +{ + adt_session_data_t *ah; + adt_event_data_t *event; + au_event_t flag; + int audit_error; + + if ((ucredp == NULL) && (cmd == ILBD_CREATE_HC)) { + /* + * we came here from the path where ilbd incorporates + * the configuration that is listed in SCF: + * i_ilbd_read_config->ilbd_walk_hc_pgs-> + * ->ilbd_scf_instance_walk_pg->ilbd_create_hc + * We skip auditing in that case + */ + logdebug("ilbd_audit_hc_event: skipping auditing"); + return; + } + + if (adt_start_session(&ah, NULL, 0) != 0) { + logerr("ilbd_audit_hc_event: adt_start_session failed"); + exit(EXIT_FAILURE); + } + if (adt_set_from_ucred(ah, ucredp, ADT_NEW) != 0) { + (void) adt_end_session(ah); + logerr("ilbd_audit_rule_event: adt_set_from_ucred failed"); + exit(EXIT_FAILURE); + } + if (cmd == ILBD_CREATE_HC) + flag = ADT_ilb_create_healthcheck; + else if (cmd == ILBD_DESTROY_HC) + flag = ADT_ilb_delete_healthcheck; + + if ((event = adt_alloc_event(ah, flag)) == NULL) { + logerr("ilbd_audit_hc_event: adt_alloc_event failed"); + exit(EXIT_FAILURE); + } + (void) memset((char *)event, 0, sizeof (adt_event_data_t)); + + switch (cmd) { + case ILBD_CREATE_HC: + event->adt_ilb_create_healthcheck.auth_used = + NET_ILB_CONFIG_AUTH; + event->adt_ilb_create_healthcheck.hc_test = + (char *)audit_hcinfo->hci_test; + event->adt_ilb_create_healthcheck.hc_name = + (char *)audit_hcinfo->hci_name; + + /* + * If the value 0 is stored, the default values are + * set in the kernel. User land does not know about them + * So if the user does not specify them, audit record + * will show them as 0 + */ + event->adt_ilb_create_healthcheck.hc_timeout = + audit_hcinfo->hci_timeout; + event->adt_ilb_create_healthcheck.hc_count = + audit_hcinfo->hci_count; + event->adt_ilb_create_healthcheck.hc_interval = + audit_hcinfo->hci_interval; + break; + case ILBD_DESTROY_HC: + event->adt_ilb_delete_healthcheck.auth_used = + NET_ILB_CONFIG_AUTH; + event->adt_ilb_delete_healthcheck.hc_name = + (char *)audit_hcname; + break; + } + + /* Fill in success/failure */ + if (rc == ILB_STATUS_OK) { + if (adt_put_event(event, ADT_SUCCESS, ADT_SUCCESS) != 0) { + logerr("ilbd_audit_hc_event: adt_put_event failed"); + exit(EXIT_FAILURE); + } + } else { + audit_error = ilberror2auditerror(rc); + if (adt_put_event(event, ADT_FAILURE, audit_error) != 0) { + logerr("ilbd_audit_hc_event: adt_put_event failed"); + exit(EXIT_FAILURE); + } + } + adt_free_event(event); + (void) adt_end_session(ah); +} + +/* + * Given the ilb_hc_info_t passed in (from the libilb), create a hc object + * in ilbd. The parameter ev_port is not used, refer to comments of + * ilbd_create_sg() in ilbd_sg.c + */ +/* ARGSUSED */ +ilb_status_t +ilbd_create_hc(const ilb_hc_info_t *hc_info, int ev_port, + const struct passwd *ps, ucred_t *ucredp) +{ + ilbd_hc_t *hc; + ilb_status_t ret = ILB_STATUS_OK; + + /* + * ps == NULL is from the daemon when it starts and load configuration + * ps != NULL is from client. + */ + if (ps != NULL) { + ret = ilbd_check_client_config_auth(ps); + if (ret != ILB_STATUS_OK) { + ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, + ret, ucredp); + return (ret); + } + } + + if (hc_info->hci_name[0] == '\0') { + logdebug("ilbd_create_hc: missing healthcheck info"); + ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, + ILB_STATUS_ENOHCINFO, ucredp); + return (ILB_STATUS_ENOHCINFO); + } + + hc = ilbd_get_hc(hc_info->hci_name); + if (hc != NULL) { + logdebug("ilbd_create_hc: healthcheck name %s already" + " exists", hc_info->hci_name); + ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, + ILB_STATUS_EEXIST, ucredp); + return (ILB_STATUS_EEXIST); + } + + /* + * Sanity check on user supplied probe. The given path name + * must be a full path name (starts with '/') and is + * executable. + */ + if (strcasecmp(hc_info->hci_test, ILB_HC_STR_TCP) != 0 && + strcasecmp(hc_info->hci_test, ILB_HC_STR_UDP) != 0 && + strcasecmp(hc_info->hci_test, ILB_HC_STR_PING) != 0 && + (hc_info->hci_test[0] != '/' || + access(hc_info->hci_test, X_OK) == -1)) { + if (errno == ENOENT) { + logdebug("ilbd_create_hc: user script %s doesn't " + "exist", hc_info->hci_test); + ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, + ILB_STATUS_ENOENT, ucredp); + return (ILB_STATUS_ENOENT); + } else { + logdebug("ilbd_create_hc: user script %s is " + "invalid", hc_info->hci_test); + ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, + ILB_STATUS_EINVAL, ucredp); + return (ILB_STATUS_EINVAL); + } + } + + /* Create and add the hc object */ + hc = calloc(1, sizeof (ilbd_hc_t)); + if (hc == NULL) { + ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, + ILB_STATUS_ENOMEM, ucredp); + return (ILB_STATUS_ENOMEM); + } + (void) memcpy(&hc->ihc_info, hc_info, sizeof (ilb_hc_info_t)); + if (strcasecmp(hc->ihc_test, ILB_HC_STR_TCP) == 0) + hc->ihc_test_type = ILBD_HC_TCP; + else if (strcasecmp(hc->ihc_test, ILB_HC_STR_UDP) == 0) + hc->ihc_test_type = ILBD_HC_UDP; + else if (strcasecmp(hc->ihc_test, ILB_HC_STR_PING) == 0) + hc->ihc_test_type = ILBD_HC_PING; + else + hc->ihc_test_type = ILBD_HC_USER; + list_create(&hc->ihc_rules, sizeof (ilbd_hc_rule_t), + offsetof(ilbd_hc_rule_t, hcr_link)); + + /* Update SCF */ + if (ps != NULL) { + if ((ret = ilbd_create_pg(ILBD_SCF_HC, (void *)hc)) != + ILB_STATUS_OK) { + ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, + ret, ucredp); + free(hc); + return (ret); + } + } + + /* Everything is fine, now add it to the global list. */ + list_insert_tail(&ilbd_hc_list, hc); + ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, ret, ucredp); + return (ret); +} + +/* + * Given a name of a hc object, destroy it. + */ +ilb_status_t +ilbd_destroy_hc(const char *hc_name, const struct passwd *ps, + ucred_t *ucredp) +{ + ilb_status_t ret; + ilbd_hc_t *hc; + + /* + * No need to check ps == NULL, daemon won't call any destroy func + * at start up. + */ + ret = ilbd_check_client_config_auth(ps); + if (ret != ILB_STATUS_OK) { + ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, + ret, ucredp); + return (ret); + } + + hc = ilbd_get_hc(hc_name); + if (hc == NULL) { + logdebug("ilbd_destroy_hc: healthcheck %s does not exist", + hc_name); + ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, + ILB_STATUS_ENOENT, ucredp); + return (ILB_STATUS_ENOENT); + } + + /* If hc is in use, cannot delete it */ + if (hc->ihc_rule_cnt > 0) { + logdebug("ilbd_destroy_hc: healthcheck %s is associated" + " with a rule - cannot remove", hc_name); + ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, + ILB_STATUS_INUSE, ucredp); + return (ILB_STATUS_INUSE); + } + + if ((ret = ilbd_destroy_pg(ILBD_SCF_HC, hc_name)) != + ILB_STATUS_OK) { + logdebug("ilbd_destroy_hc: cannot destroy healthcheck %s " + "property group", hc_name); + ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, + ret, ucredp); + return (ret); + } + + list_remove(&ilbd_hc_list, hc); + free(hc); + ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, ret, ucredp); + return (ret); +} + +/* + * Given a hc object name, return its information. Used by libilb to + * get hc info. + */ +ilb_status_t +ilbd_get_hc_info(const char *hc_name, uint32_t *rbuf, size_t *rbufsz) +{ + ilbd_hc_t *hc; + ilb_hc_info_t *hc_info; + ilb_comm_t *ic = (ilb_comm_t *)rbuf; + + hc = ilbd_get_hc(hc_name); + if (hc == NULL) { + logdebug("%s: healthcheck %s does not exist", __func__, + hc_name); + return (ILB_STATUS_ENOENT); + } + ilbd_reply_ok(rbuf, rbufsz); + hc_info = (ilb_hc_info_t *)&ic->ic_data; + + (void) strlcpy(hc_info->hci_name, hc->ihc_name, sizeof (hc->ihc_name)); + (void) strlcpy(hc_info->hci_test, hc->ihc_test, sizeof (hc->ihc_test)); + hc_info->hci_timeout = hc->ihc_timeout; + hc_info->hci_count = hc->ihc_count; + hc_info->hci_interval = hc->ihc_interval; + hc_info->hci_def_ping = hc->ihc_def_ping; + + *rbufsz += sizeof (ilb_hc_info_t); + + return (ILB_STATUS_OK); +} + +static void +ilbd_hc_copy_srvs(uint32_t *rbuf, size_t *rbufsz, ilbd_hc_rule_t *hc_rule, + const char *rulename) +{ + ilbd_hc_srv_t *tmp_srv; + ilb_hc_srv_t *dst_srv; + ilb_hc_rule_srv_t *srvs; + size_t tmp_rbufsz; + int i; + + tmp_rbufsz = *rbufsz; + /* Set up the reply buffer. rbufsz will be set to the new size. */ + ilbd_reply_ok(rbuf, rbufsz); + + /* Calculate how much space is left for holding server info. */ + *rbufsz += sizeof (ilb_hc_rule_srv_t); + tmp_rbufsz -= *rbufsz; + + srvs = (ilb_hc_rule_srv_t *)&((ilb_comm_t *)rbuf)->ic_data; + + tmp_srv = list_head(&hc_rule->hcr_servers); + for (i = 0; tmp_srv != NULL && tmp_rbufsz >= sizeof (*dst_srv); i++) { + dst_srv = &srvs->rs_srvs[i]; + + (void) strlcpy(dst_srv->hcs_rule_name, rulename, ILB_NAMESZ); + (void) strlcpy(dst_srv->hcs_ID, tmp_srv->shc_sg_srv->sgs_srvID, + ILB_NAMESZ); + (void) strlcpy(dst_srv->hcs_hc_name, + tmp_srv->shc_hc->ihc_name, ILB_NAMESZ); + dst_srv->hcs_IP = tmp_srv->shc_sg_srv->sgs_addr; + dst_srv->hcs_fail_cnt = tmp_srv->shc_fail_cnt; + dst_srv->hcs_status = tmp_srv->shc_status; + dst_srv->hcs_rtt = tmp_srv->shc_rtt; + dst_srv->hcs_lasttime = tmp_srv->shc_lasttime; + dst_srv->hcs_nexttime = tmp_srv->shc_nexttime; + + tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv); + tmp_rbufsz -= sizeof (*dst_srv); + } + srvs->rs_num_srvs = i; + *rbufsz += i * sizeof (*dst_srv); +} + +/* + * Given a rule name, return the hc status of its servers. + */ +ilb_status_t +ilbd_get_hc_srvs(const char *rulename, uint32_t *rbuf, size_t *rbufsz) +{ + ilbd_hc_t *hc; + ilbd_hc_rule_t *hc_rule; + + for (hc = list_head(&ilbd_hc_list); hc != NULL; + hc = list_next(&ilbd_hc_list, hc)) { + for (hc_rule = list_head(&hc->ihc_rules); hc_rule != NULL; + hc_rule = list_next(&hc->ihc_rules, hc_rule)) { + if (strcasecmp(hc_rule->hcr_rule->irl_name, + rulename) != 0) { + continue; + } + ilbd_hc_copy_srvs(rbuf, rbufsz, hc_rule, rulename); + return (ILB_STATUS_OK); + } + } + return (ILB_STATUS_RULE_NO_HC); +} + +/* + * Initialize the hc timer and associate the notification of timeout to + * the given event port. + */ +void +ilbd_hc_timer_init(int ev_port, ilbd_timer_event_obj_t *ev_obj) +{ + struct sigevent sigev; + port_notify_t notify; + + if ((ilbd_hc_timer_q = iu_tq_create()) == NULL) { + logerr("%s: cannot create hc timer queue", __func__); + exit(EXIT_FAILURE); + } + hc_timer_restarted = B_FALSE; + + ev_obj->ev = ILBD_EVENT_TIMER; + ev_obj->timerid = -1; + + notify.portnfy_port = ev_port; + notify.portnfy_user = ev_obj; + sigev.sigev_notify = SIGEV_PORT; + sigev.sigev_value.sival_ptr = ¬ify; + if (timer_create(CLOCK_REALTIME, &sigev, &ev_obj->timerid) == -1) { + logerr("%s: cannot create timer", __func__); + exit(EXIT_FAILURE); + } +} + +/* + * HC timeout handler. + */ +void +ilbd_hc_timeout(void) +{ + (void) iu_expire_timers(ilbd_hc_timer_q); + hc_timer_restarted = B_TRUE; +} + +/* + * Set up the timer to fire at the earliest timeout. + */ +void +ilbd_hc_timer_update(ilbd_timer_event_obj_t *ev_obj) +{ + itimerspec_t itimeout; + int timeout; + + /* + * There is no change on the timer list, so no need to set up the + * timer again. + */ + if (!hc_timer_restarted) + return; + +restart: + if ((timeout = iu_earliest_timer(ilbd_hc_timer_q)) == INFTIM) { + hc_timer_restarted = B_FALSE; + return; + } else if (timeout == 0) { + /* + * Handle the timeout immediately. After that (clearing all + * the expired timers), check to see if there are still + * timers running. If yes, start them. + */ + (void) iu_expire_timers(ilbd_hc_timer_q); + goto restart; + } + + itimeout.it_value.tv_sec = timeout / MILLISEC + 1; + itimeout.it_value.tv_nsec = 0; + itimeout.it_interval.tv_sec = 0; + itimeout.it_interval.tv_nsec = 0; + + /* + * Failure to set a timeout is "OK" since hopefully there will be + * other events and timer_settime() will be called again. So + * we will only miss some timeouts. But in the worst case, no event + * will happen and ilbd will get stuck... + */ + if (timer_settime(ev_obj->timerid, 0, &itimeout, NULL) == -1) + logerr("%s: cannot set timer", __func__); + hc_timer_restarted = B_FALSE; +} + +/* + * Kill the probe process of a server. + */ +static void +ilbd_hc_kill_probe(ilbd_hc_srv_t *srv) +{ + /* + * First dissociate the fd from the event port. It should not + * fail. + */ + if (port_dissociate(srv->shc_ev_port, PORT_SOURCE_FD, + srv->shc_child_fd) != 0) { + logdebug("%s: port_dissociate: %s", __func__, strerror(errno)); + } + (void) close(srv->shc_child_fd); + free(srv->shc_ev); + srv->shc_ev = NULL; + + /* Then kill the probe process. */ + if (kill(srv->shc_child_pid, SIGKILL) != 0) { + logerr("%s: rule %s server %s: %s", __func__, + srv->shc_hc_rule->hcr_rule->irl_name, + srv->shc_sg_srv->sgs_srvID, strerror(errno)); + } + /* Should not fail... */ + if (waitpid(srv->shc_child_pid, NULL, 0) != srv->shc_child_pid) { + logdebug("%s: waitpid: rule %s server %s", __func__, + srv->shc_hc_rule->hcr_rule->irl_name, + srv->shc_sg_srv->sgs_srvID); + } + srv->shc_child_pid = 0; +} + +/* + * Disable the server, either because the server is dead or because a timer + * cannot be started for this server. Note that this only affects the + * transient configuration, meaning only in memory. The persistent + * configuration is not affected. + */ +static void +ilbd_mark_server_disabled(ilbd_hc_srv_t *srv) +{ + srv->shc_status = ILB_HCS_DISABLED; + + /* Disable the server in kernel. */ + if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr, + srv->shc_hc_rule->hcr_rule->irl_name, + stat_declare_srv_dead) != ILB_STATUS_OK) { + logerr("%s: cannot disable server in kernel: rule %s " + "server %s", __func__, + srv->shc_hc_rule->hcr_rule->irl_name, + srv->shc_sg_srv->sgs_srvID); + } +} + +/* + * A probe fails, set the state of the server. + */ +static void +ilbd_set_fail_state(ilbd_hc_srv_t *srv) +{ + if (++srv->shc_fail_cnt < srv->shc_hc->ihc_count) { + /* Probe again */ + ilbd_hc_probe_timer(ilbd_hc_timer_q, srv); + return; + } + + logdebug("%s: rule %s server %s fails %u", __func__, + srv->shc_hc_rule->hcr_rule->irl_name, srv->shc_sg_srv->sgs_srvID, + srv->shc_fail_cnt); + + /* + * If this is a ping test, mark the server as + * unreachable instead of dead. + */ + if (srv->shc_hc->ihc_test_type == ILBD_HC_PING || + srv->shc_state == ilbd_hc_def_pinging) { + srv->shc_status = ILB_HCS_UNREACH; + } else { + srv->shc_status = ILB_HCS_DEAD; + } + + /* Disable the server in kernel. */ + if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr, + srv->shc_hc_rule->hcr_rule->irl_name, stat_declare_srv_dead) != + ILB_STATUS_OK) { + logerr("%s: cannot disable server in kernel: rule %s " + "server %s", __func__, + srv->shc_hc_rule->hcr_rule->irl_name, + srv->shc_sg_srv->sgs_srvID); + } + + /* Still keep probing in case the server is alive again. */ + if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) { + /* Only thing to do is to disable the server... */ + logerr("%s: cannot restart timer: rule %s server %s", __func__, + srv->shc_hc_rule->hcr_rule->irl_name, + srv->shc_sg_srv->sgs_srvID); + srv->shc_status = ILB_HCS_DISABLED; + } +} + +/* + * A probe process has not returned for the ihc_timeout period, we should + * kill it. This function is the handler of this. + */ +/* ARGSUSED */ +static void +ilbd_hc_kill_timer(iu_tq_t *tq, void *arg) +{ + ilbd_hc_srv_t *srv = (ilbd_hc_srv_t *)arg; + + ilbd_hc_kill_probe(srv); + ilbd_set_fail_state(srv); +} + +/* + * Probe timeout handler. Send out the appropriate probe. + */ +/* ARGSUSED */ +static void +ilbd_hc_probe_timer(iu_tq_t *tq, void *arg) +{ + ilbd_hc_srv_t *srv = (ilbd_hc_srv_t *)arg; + + /* + * If starting the probe fails, just pretend that the timeout has + * extended. + */ + if (!ilbd_run_probe(srv)) { + /* + * If we cannot restart the timer, the only thing we can do + * is to disable this server. Hopefully the sys admin will + * notice this and enable this server again later. + */ + if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) { + logerr("%s: cannot restart timer: rule %s server %s, " + "disabling it", __func__, + srv->shc_hc_rule->hcr_rule->irl_name, + srv->shc_sg_srv->sgs_srvID); + ilbd_mark_server_disabled(srv); + } + return; + } + + /* + * Similar to above, if kill timer cannot be started, disable the + * server. + */ + if ((srv->shc_tid = iu_schedule_timer(ilbd_hc_timer_q, + srv->shc_hc->ihc_timeout, ilbd_hc_kill_timer, srv)) == -1) { + logerr("%s: cannot start kill timer: rule %s server %s, " + "disabling it", __func__, + srv->shc_hc_rule->hcr_rule->irl_name, + srv->shc_sg_srv->sgs_srvID); + ilbd_mark_server_disabled(srv); + } + hc_timer_restarted = B_TRUE; +} + +/* Restart the periodic timer for a given server. */ +static ilb_status_t +ilbd_hc_restart_timer(ilbd_hc_t *hc, ilbd_hc_srv_t *srv) +{ + int timeout; + + /* Don't allow the timeout interval to be less than 1s */ + timeout = MAX((hc->ihc_interval >> 1) + (gethrtime() % + (hc->ihc_interval + 1)), 1); + + /* + * If the probe is actually a ping probe, there is no need to + * do default pinging. Just skip the step. + */ + if (hc->ihc_def_ping && hc->ihc_test_type != ILBD_HC_PING) + srv->shc_state = ilbd_hc_def_pinging; + else + srv->shc_state = ilbd_hc_probing; + srv->shc_tid = iu_schedule_timer(ilbd_hc_timer_q, timeout, + ilbd_hc_probe_timer, srv); + + if (srv->shc_tid == -1) + return (ILB_STATUS_TIMER); + srv->shc_lasttime = time(NULL); + srv->shc_nexttime = time(NULL) + timeout; + + hc_timer_restarted = B_TRUE; + return (ILB_STATUS_OK); +} + +/* Helper routine to associate a server with its hc object. */ +static ilb_status_t +ilbd_hc_srv_add(ilbd_hc_t *hc, ilbd_hc_rule_t *hc_rule, + const ilb_sg_srv_t *srv, int ev_port) +{ + ilbd_hc_srv_t *new_srv; + ilb_status_t ret; + + if ((new_srv = calloc(1, sizeof (ilbd_hc_srv_t))) == NULL) + return (ILB_STATUS_ENOMEM); + new_srv->shc_hc = hc; + new_srv->shc_hc_rule = hc_rule; + new_srv->shc_sg_srv = srv; + new_srv->shc_ev_port = ev_port; + new_srv->shc_tid = -1; + new_srv->shc_nexttime = time(NULL); + new_srv->shc_lasttime = new_srv->shc_nexttime; + + if ((hc_rule->hcr_rule->irl_flags & ILB_FLAGS_RULE_ENABLED) && + ILB_IS_SRV_ENABLED(srv->sgs_flags)) { + new_srv->shc_status = ILB_HCS_UNINIT; + ret = ilbd_hc_restart_timer(hc, new_srv); + if (ret != ILB_STATUS_OK) { + free(new_srv); + return (ret); + } + } else { + new_srv->shc_status = ILB_HCS_DISABLED; + } + + list_insert_tail(&hc_rule->hcr_servers, new_srv); + return (ILB_STATUS_OK); +} + +/* Handy macro to cancel a server's timer. */ +#define HC_CANCEL_TIMER(srv) \ +{ \ + void *arg; \ + int ret; \ + if ((srv)->shc_tid != -1) { \ + ret = iu_cancel_timer(ilbd_hc_timer_q, (srv)->shc_tid, &arg); \ + (srv)->shc_tid = -1; \ + assert(ret == 1); \ + assert(arg == (srv)); \ + } \ + hc_timer_restarted = B_TRUE; \ +} + +/* Helper routine to dissociate a server from its hc object. */ +static ilb_status_t +ilbd_hc_srv_rem(ilbd_hc_rule_t *hc_rule, const ilb_sg_srv_t *srv) +{ + ilbd_hc_srv_t *tmp_srv; + + for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL; + tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) { + if (tmp_srv->shc_sg_srv == srv) { + list_remove(&hc_rule->hcr_servers, tmp_srv); + HC_CANCEL_TIMER(tmp_srv); + if (tmp_srv->shc_child_pid != 0) + ilbd_hc_kill_probe(tmp_srv); + free(tmp_srv); + return (ILB_STATUS_OK); + } + } + return (ILB_STATUS_ENOENT); +} + +/* Helper routine to dissociate all servers of a rule from its hc object. */ +static void +ilbd_hc_srv_rem_all(ilbd_hc_rule_t *hc_rule) +{ + ilbd_hc_srv_t *srv; + + while ((srv = list_remove_head(&hc_rule->hcr_servers)) != NULL) { + HC_CANCEL_TIMER(srv); + if (srv->shc_child_pid != 0) + ilbd_hc_kill_probe(srv); + free(srv); + } +} + +/* Associate a rule with its hc object. */ +ilb_status_t +ilbd_hc_associate_rule(const ilbd_rule_t *rule, int ev_port) +{ + ilbd_hc_t *hc; + ilbd_hc_rule_t *hc_rule; + ilb_status_t ret; + ilbd_sg_t *sg; + ilbd_srv_t *ilbd_srv; + + /* The rule is assumed to be initialized appropriately. */ + if ((hc = ilbd_get_hc(rule->irl_hcname)) == NULL) { + logdebug("ilbd_hc_associate_rule: healthcheck %s does not " + "exist", rule->irl_hcname); + return (ILB_STATUS_ENOHCINFO); + } + if ((hc->ihc_test_type == ILBD_HC_TCP && + rule->irl_proto != IPPROTO_TCP) || + (hc->ihc_test_type == ILBD_HC_UDP && + rule->irl_proto != IPPROTO_UDP)) { + return (ILB_STATUS_RULE_HC_MISMATCH); + } + if ((hc_rule = calloc(1, sizeof (ilbd_hc_rule_t))) == NULL) { + logdebug("ilbd_hc_associate_rule: out of memory"); + return (ILB_STATUS_ENOMEM); + } + + hc_rule->hcr_rule = rule; + list_create(&hc_rule->hcr_servers, sizeof (ilbd_hc_srv_t), + offsetof(ilbd_hc_srv_t, shc_srv_link)); + + /* Add all the servers. */ + sg = rule->irl_sg; + for (ilbd_srv = list_head(&sg->isg_srvlist); ilbd_srv != NULL; + ilbd_srv = list_next(&sg->isg_srvlist, ilbd_srv)) { + if ((ret = ilbd_hc_srv_add(hc, hc_rule, &ilbd_srv->isv_srv, + ev_port)) != ILB_STATUS_OK) { + /* Remove all previously added servers */ + ilbd_hc_srv_rem_all(hc_rule); + free(hc_rule); + return (ret); + } + } + list_insert_tail(&hc->ihc_rules, hc_rule); + hc->ihc_rule_cnt++; + + return (ILB_STATUS_OK); +} + +/* Dissociate a rule from its hc object. */ +ilb_status_t +ilbd_hc_dissociate_rule(const ilbd_rule_t *rule) +{ + ilbd_hc_t *hc; + ilbd_hc_rule_t *hc_rule; + + /* The rule is assumed to be initialized appropriately. */ + if ((hc = ilbd_get_hc(rule->irl_hcname)) == NULL) { + logdebug("ilbd_hc_dissociate_rule: healthcheck %s does not " + "exist", rule->irl_hcname); + return (ILB_STATUS_ENOENT); + } + for (hc_rule = list_head(&hc->ihc_rules); hc_rule != NULL; + hc_rule = list_next(&hc->ihc_rules, hc_rule)) { + if (hc_rule->hcr_rule == rule) + break; + } + if (hc_rule == NULL) { + logdebug("ilbd_hc_dissociate_rule: rule %s is not associated " + "with healtcheck %s", rule->irl_hcname, hc->ihc_name); + return (ILB_STATUS_ENOENT); + } + ilbd_hc_srv_rem_all(hc_rule); + list_remove(&hc->ihc_rules, hc_rule); + hc->ihc_rule_cnt--; + return (ILB_STATUS_OK); +} + +/* + * Given a hc object name and a rule, check to see if the rule is associated + * with the hc object. If it is, the hc object is returned in **hc and the + * ilbd_hc_rule_t is returned in **hc_rule. + */ +static boolean_t +ilbd_hc_check_rule(const char *hc_name, const ilbd_rule_t *rule, + ilbd_hc_t **hc, ilbd_hc_rule_t **hc_rule) +{ + ilbd_hc_t *tmp_hc; + ilbd_hc_rule_t *tmp_hc_rule; + + if ((tmp_hc = ilbd_get_hc(hc_name)) == NULL) + return (B_FALSE); + for (tmp_hc_rule = list_head(&tmp_hc->ihc_rules); tmp_hc_rule != NULL; + tmp_hc_rule = list_next(&tmp_hc->ihc_rules, tmp_hc_rule)) { + if (tmp_hc_rule->hcr_rule == rule) { + *hc = tmp_hc; + *hc_rule = tmp_hc_rule; + return (B_TRUE); + } + } + return (B_FALSE); +} + +/* Associate a server with its hc object. */ +ilb_status_t +ilbd_hc_add_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv, + int ev_port) +{ + ilbd_hc_t *hc; + ilbd_hc_rule_t *hc_rule; + + if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule)) + return (ILB_STATUS_ENOENT); + return (ilbd_hc_srv_add(hc, hc_rule, srv, ev_port)); +} + +/* Dissociate a server from its hc object. */ +ilb_status_t +ilbd_hc_del_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv) +{ + ilbd_hc_t *hc; + ilbd_hc_rule_t *hc_rule; + + if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule)) + return (ILB_STATUS_ENOENT); + return (ilbd_hc_srv_rem(hc_rule, srv)); +} + +/* Helper routine to enable/disable a server's hc probe. */ +static ilb_status_t +ilbd_hc_toggle_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv, + boolean_t enable) +{ + ilbd_hc_t *hc; + ilbd_hc_rule_t *hc_rule; + ilbd_hc_srv_t *tmp_srv; + ilb_status_t ret; + + if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule)) + return (ILB_STATUS_ENOENT); + for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL; + tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) { + if (tmp_srv->shc_sg_srv != srv) { + continue; + } + if (enable) { + if (tmp_srv->shc_status == ILB_HCS_DISABLED) { + ret = ilbd_hc_restart_timer(hc, tmp_srv); + if (ret != ILB_STATUS_OK) { + logerr("%s: cannot start timers for " + "rule %s server %s", __func__, + rule->irl_name, + tmp_srv->shc_sg_srv->sgs_srvID); + return (ret); + } + /* Start from fresh... */ + tmp_srv->shc_status = ILB_HCS_UNINIT; + tmp_srv->shc_rtt = 0; + tmp_srv->shc_fail_cnt = 0; + } + } else { + if (tmp_srv->shc_status != ILB_HCS_DISABLED) { + tmp_srv->shc_status = ILB_HCS_DISABLED; + HC_CANCEL_TIMER(tmp_srv); + if (tmp_srv->shc_child_pid != 0) + ilbd_hc_kill_probe(tmp_srv); + } + } + return (ILB_STATUS_OK); + } + return (ILB_STATUS_ENOENT); +} + +ilb_status_t +ilbd_hc_enable_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv) +{ + return (ilbd_hc_toggle_server(rule, srv, B_TRUE)); +} + +ilb_status_t +ilbd_hc_disable_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv) +{ + return (ilbd_hc_toggle_server(rule, srv, B_FALSE)); +} + +/* + * Helper routine to enable/disable a rule's hc probe (including all its + * servers). + */ +static ilb_status_t +ilbd_hc_toggle_rule(const ilbd_rule_t *rule, boolean_t enable) +{ + ilbd_hc_t *hc; + ilbd_hc_rule_t *hc_rule; + ilbd_hc_srv_t *tmp_srv; + int ret; + + if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule)) + return (ILB_STATUS_ENOENT); + + for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL; + tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) { + if (enable) { + /* + * If the server is disabled in the rule, do not + * restart its timer. + */ + if (tmp_srv->shc_status == ILB_HCS_DISABLED && + ILB_IS_SRV_ENABLED( + tmp_srv->shc_sg_srv->sgs_flags)) { + ret = ilbd_hc_restart_timer(hc, tmp_srv); + if (ret != ILB_STATUS_OK) { + logerr("%s: cannot start timers for " + "rule %s server %s", __func__, + rule->irl_name, + tmp_srv->shc_sg_srv->sgs_srvID); + goto rollback; + } else { + /* Start from fresh... */ + tmp_srv->shc_status = ILB_HCS_UNINIT; + tmp_srv->shc_rtt = 0; + tmp_srv->shc_fail_cnt = 0; + } + } + } else { + if (tmp_srv->shc_status != ILB_HCS_DISABLED) { + HC_CANCEL_TIMER(tmp_srv); + tmp_srv->shc_status = ILB_HCS_DISABLED; + if (tmp_srv->shc_child_pid != 0) + ilbd_hc_kill_probe(tmp_srv); + } + } + } + return (ILB_STATUS_OK); +rollback: + enable = !enable; + for (tmp_srv = list_prev(&hc_rule->hcr_servers, tmp_srv); + tmp_srv != NULL; + tmp_srv = list_prev(&hc_rule->hcr_servers, tmp_srv)) { + if (enable) { + if (tmp_srv->shc_status == ILB_HCS_DISABLED && + ILB_IS_SRV_ENABLED( + tmp_srv->shc_sg_srv->sgs_flags)) { + (void) ilbd_hc_restart_timer(hc, tmp_srv); + tmp_srv->shc_status = ILB_HCS_UNINIT; + tmp_srv->shc_rtt = 0; + tmp_srv->shc_fail_cnt = 0; + } + } else { + if (tmp_srv->shc_status != ILB_HCS_DISABLED) { + HC_CANCEL_TIMER(tmp_srv); + tmp_srv->shc_status = ILB_HCS_DISABLED; + if (tmp_srv->shc_child_pid != 0) + ilbd_hc_kill_probe(tmp_srv); + } + } + } + return (ret); +} + +ilb_status_t +ilbd_hc_enable_rule(const ilbd_rule_t *rule) +{ + return (ilbd_hc_toggle_rule(rule, B_TRUE)); +} + +ilb_status_t +ilbd_hc_disable_rule(const ilbd_rule_t *rule) +{ + return (ilbd_hc_toggle_rule(rule, B_FALSE)); +} + +static const char * +topo_2_str(ilb_topo_t topo) +{ + switch (topo) { + case ILB_TOPO_DSR: + return ("DSR"); + break; + case ILB_TOPO_NAT: + return ("NAT"); + break; + case ILB_TOPO_HALF_NAT: + return ("HALF_NAT"); + break; + default: + /* Should not happen. */ + logerr("%s: unknown topology", __func__); + break; + } + return (""); +} + +/* + * Create the argument list to be passed to a hc probe command. + * The passed in argv is assumed to have HC_PROBE_ARGC elements. + */ +static boolean_t +create_argv(ilbd_hc_srv_t *srv, char *argv[]) +{ + char buf[INET6_ADDRSTRLEN]; + ilbd_rule_t const *rule; + ilb_sg_srv_t const *sg_srv; + struct in_addr v4_addr; + in_port_t port; + int i; + + rule = srv->shc_hc_rule->hcr_rule; + sg_srv = srv->shc_sg_srv; + + if (srv->shc_state == ilbd_hc_def_pinging) { + if ((argv[0] = strdup(ILB_PROBE_PING)) == NULL) + return (B_FALSE); + } else { + switch (srv->shc_hc->ihc_test_type) { + case ILBD_HC_USER: + if ((argv[0] = strdup(srv->shc_hc->ihc_test)) == NULL) + return (B_FALSE); + break; + case ILBD_HC_TCP: + case ILBD_HC_UDP: + if ((argv[0] = strdup(ILB_PROBE_PROTO)) == + NULL) { + return (B_FALSE); + } + break; + case ILBD_HC_PING: + if ((argv[0] = strdup(ILB_PROBE_PING)) == NULL) { + return (B_FALSE); + } + break; + } + } + + /* + * argv[1] is the VIP. + * + * Right now, the VIP and the backend server addresses should be + * in the same IP address family. Here we don't do that in case + * this assumption is changed in future. + */ + if (IN6_IS_ADDR_V4MAPPED(&rule->irl_vip)) { + IN6_V4MAPPED_TO_INADDR(&rule->irl_vip, &v4_addr); + if (inet_ntop(AF_INET, &v4_addr, buf, sizeof (buf)) == NULL) + goto cleanup; + } else { + if (inet_ntop(AF_INET6, &rule->irl_vip, buf, + sizeof (buf)) == NULL) { + goto cleanup; + } + } + if ((argv[1] = strdup(buf)) == NULL) + goto cleanup; + + /* + * argv[2] is the backend server address. + */ + if (IN6_IS_ADDR_V4MAPPED(&sg_srv->sgs_addr)) { + IN6_V4MAPPED_TO_INADDR(&sg_srv->sgs_addr, &v4_addr); + if (inet_ntop(AF_INET, &v4_addr, buf, sizeof (buf)) == NULL) + goto cleanup; + } else { + if (inet_ntop(AF_INET6, &sg_srv->sgs_addr, buf, + sizeof (buf)) == NULL) { + goto cleanup; + } + } + if ((argv[2] = strdup(buf)) == NULL) + goto cleanup; + + /* + * argv[3] is the transport protocol used in the rule. + */ + switch (rule->irl_proto) { + case IPPROTO_TCP: + argv[3] = strdup("TCP"); + break; + case IPPROTO_UDP: + argv[3] = strdup("UDP"); + break; + default: + logerr("%s: unknown protocol", __func__); + goto cleanup; + break; + } + if (argv[3] == NULL) + goto cleanup; + + /* + * argv[4] is the load balance mode, DSR, NAT, HALF-NAT. + */ + if ((argv[4] = strdup(topo_2_str(rule->irl_topo))) == NULL) + goto cleanup; + + /* + * argv[5] is the port range. Right now, there should only be 1 port. + */ + switch (rule->irl_hcpflag) { + case ILB_HCI_PROBE_FIX: + port = ntohs(rule->irl_hcport); + break; + case ILB_HCI_PROBE_ANY: { + in_port_t min, max; + + if (ntohs(sg_srv->sgs_minport) == 0) { + min = ntohs(rule->irl_minport); + max = ntohs(rule->irl_maxport); + } else { + min = ntohs(sg_srv->sgs_minport); + max = ntohs(sg_srv->sgs_maxport); + } + if (max > min) + port = min + gethrtime() % (max - min + 1); + else + port = min; + break; + } + default: + logerr("%s: unknown HC flag", __func__); + goto cleanup; + break; + } + (void) sprintf(buf, "%d", port); + if ((argv[5] = strdup(buf)) == NULL) + goto cleanup; + + /* + * argv[6] is the probe timeout. + */ + (void) sprintf(buf, "%d", srv->shc_hc->ihc_timeout); + if ((argv[6] = strdup(buf)) == NULL) + goto cleanup; + + argv[7] = NULL; + return (B_TRUE); + +cleanup: + for (i = 0; i < HC_PROBE_ARGC; i++) { + if (argv[i] != NULL) + free(argv[i]); + } + return (B_FALSE); +} + +static void +destroy_argv(char *argv[]) +{ + int i; + + for (i = 0; argv[i] != NULL; i++) + free(argv[i]); +} + +/* Spawn a process to run the hc probe on the given server. */ +static boolean_t +ilbd_run_probe(ilbd_hc_srv_t *srv) +{ + posix_spawn_file_actions_t fd_actions; + posix_spawnattr_t attr; + sigset_t child_sigset; + int fds[2]; + int fdflags; + pid_t pid; + char *child_argv[HC_PROBE_ARGC]; + ilbd_hc_probe_event_t *probe_ev; + char *probe_name; + + bzero(child_argv, HC_PROBE_ARGC * sizeof (char *)); + if ((probe_ev = calloc(1, sizeof (*probe_ev))) == NULL) { + logdebug("ilbd_run_probe: calloc"); + return (B_FALSE); + } + + /* Set up a pipe to get output from probe command. */ + if (pipe(fds) < 0) { + logdebug("ilbd_run_probe: cannot create pipe"); + free(probe_ev); + return (B_FALSE); + } + /* Set our side of the pipe to be non-blocking */ + if ((fdflags = fcntl(fds[0], F_GETFL, 0)) == -1) { + logdebug("ilbd_run_probe: fcntl(F_GETFL)"); + goto cleanup; + } + if (fcntl(fds[0], F_SETFL, fdflags | O_NONBLOCK) == -1) { + logdebug("ilbd_run_probe: fcntl(F_SETFL)"); + goto cleanup; + } + + if (posix_spawn_file_actions_init(&fd_actions) != 0) { + logdebug("ilbd_run_probe: posix_spawn_file_actions_init"); + goto cleanup; + } + if (posix_spawnattr_init(&attr) != 0) { + logdebug("ilbd_run_probe: posix_spawnattr_init"); + goto cleanup; + } + if (posix_spawn_file_actions_addclose(&fd_actions, fds[0]) != 0) { + logdebug("ilbd_run_probe: posix_spawn_file_actions_addclose"); + goto cleanup; + } + if (posix_spawn_file_actions_adddup2(&fd_actions, fds[1], + STDOUT_FILENO) != 0) { + logdebug("ilbd_run_probe: posix_spawn_file_actions_dup2"); + goto cleanup; + } + if (posix_spawn_file_actions_addclose(&fd_actions, fds[1]) != 0) { + logdebug("ilbd_run_probe: posix_spawn_file_actions_addclose"); + goto cleanup; + } + + /* Reset all signal handling of the child to default. */ + (void) sigfillset(&child_sigset); + if (posix_spawnattr_setsigdefault(&attr, &child_sigset) != 0) { + logdebug("ilbd_run_probe: posix_spawnattr_setsigdefault"); + goto cleanup; + } + /* Don't want SIGCHLD. */ + if (posix_spawnattr_setflags(&attr, POSIX_SPAWN_NOSIGCHLD_NP| + POSIX_SPAWN_SETSIGDEF) != 0) { + logdebug("ilbd_run_probe: posix_spawnattr_setflags"); + goto cleanup; + } + + if (!create_argv(srv, child_argv)) { + logdebug("ilbd_run_probe: create_argv"); + goto cleanup; + } + + /* + * If we are doing default pinging or not using a user supplied + * probe, we should execute our standard supplied probe. The + * supplied probe command handles all types of probes. And the + * type used depends on argv[0], as filled in by create_argv(). + */ + if (srv->shc_state == ilbd_hc_def_pinging || + srv->shc_hc->ihc_test_type != ILBD_HC_USER) { + probe_name = ILB_PROBE_PROTO; + } else { + probe_name = srv->shc_hc->ihc_test; + } + if (posix_spawn(&pid, probe_name, &fd_actions, &attr, child_argv, + NULL) != 0) { + logerr("%s: posix_spawn: %s for server %s: %s", __func__, + srv->shc_hc->ihc_test, srv->shc_sg_srv->sgs_srvID, + strerror(errno)); + goto cleanup; + } + + (void) close(fds[1]); + destroy_argv(child_argv); + srv->shc_child_pid = pid; + srv->shc_child_fd = fds[0]; + srv->shc_ev = probe_ev; + + probe_ev->ihp_ev = ILBD_EVENT_PROBE; + probe_ev->ihp_srv = srv; + probe_ev->ihp_pid = pid; + if (port_associate(srv->shc_ev_port, PORT_SOURCE_FD, fds[0], + POLLRDNORM, probe_ev) != 0) { + /* + * Need to kill the child. It will free the srv->shc_ev, + * which is probe_ev. So set probe_ev to NULL. + */ + ilbd_hc_kill_probe(srv); + probe_ev = NULL; + goto cleanup; + } + + return (B_TRUE); + +cleanup: + (void) close(fds[0]); + (void) close(fds[1]); + destroy_argv(child_argv); + if (probe_ev != NULL) + free(probe_ev); + return (B_FALSE); +} + +/* + * Called by ild_hc_probe_return() to re-associate the fd to a child to + * the event port. + */ +static void +reassociate_port(int ev_port, int fd, ilbd_hc_probe_event_t *ev) +{ + if (port_associate(ev_port, PORT_SOURCE_FD, fd, + POLLRDNORM, ev) != 0) { + /* + * If we cannot reassociate with the port, the only + * thing we can do now is to kill the child and + * do a blocking wait here... + */ + logdebug("%s: port_associate: %s", __func__, strerror(errno)); + if (kill(ev->ihp_pid, SIGKILL) != 0) + logerr("%s: kill: %s", __func__, strerror(errno)); + if (waitpid(ev->ihp_pid, NULL, 0) != ev->ihp_pid) + logdebug("%s: waitpid: %s", __func__, strerror(errno)); + free(ev); + } +} + +/* + * To handle a child probe process hanging up. + */ +static void +ilbd_hc_child_hup(int ev_port, int fd, ilbd_hc_probe_event_t *ev) +{ + ilbd_hc_srv_t *srv; + pid_t ret_pid; + int ret; + + srv = ev->ihp_srv; + + if (!ev->ihp_done) { + /* ilbd does not care about this process anymore ... */ + ev->ihp_done = B_TRUE; + srv->shc_ev = NULL; + srv->shc_child_pid = 0; + HC_CANCEL_TIMER(srv); + ilbd_set_fail_state(srv); + } + ret_pid = waitpid(ev->ihp_pid, &ret, WNOHANG); + switch (ret_pid) { + case -1: + logperror("ilbd_hc_child_hup: waitpid"); + /* FALLTHROUGH */ + case 0: + /* The child has not completed the exit. Wait again. */ + reassociate_port(ev_port, fd, ev); + break; + default: + /* Right now, we just ignore the exit status. */ + if (WIFEXITED(ret)) + ret = WEXITSTATUS(ret); + (void) close(fd); + free(ev); + } +} + +/* + * To read the output of a child probe process. + */ +static void +ilbd_hc_child_data(int fd, ilbd_hc_probe_event_t *ev) +{ + ilbd_hc_srv_t *srv; + char buf[HC_MAX_PROBE_OUTPUT]; + int ret; + int64_t rtt; + + srv = ev->ihp_srv; + + bzero(buf, HC_MAX_PROBE_OUTPUT); + ret = read(fd, buf, HC_MAX_PROBE_OUTPUT - 1); + /* Should not happen since event port should have caught this. */ + assert(ret > 0); + + /* + * We expect the probe command to print out the RTT only. But + * the command may misbehave and print out more than what we intend to + * read in. So need to do this check below to "flush" out all the + * output from the command. + */ + if (!ev->ihp_done) { + ev->ihp_done = B_TRUE; + /* We don't need to know about this event anymore. */ + srv->shc_ev = NULL; + srv->shc_child_pid = 0; + HC_CANCEL_TIMER(srv); + } else { + return; + } + + rtt = strtoll(buf, NULL, 10); + + /* + * -1 means the server is dead or the probe somehow fails. Treat + * them both as server is dead. + */ + if (rtt == -1) { + ilbd_set_fail_state(srv); + return; + } else if (rtt > 0) { + /* If the returned RTT value is not valid, just ignore it. */ + if (rtt > 0 && rtt <= UINT_MAX) { + /* Set rtt to be the simple smoothed average. */ + if (srv->shc_rtt == 0) { + srv->shc_rtt = rtt; + } else { + srv->shc_rtt = 3 * ((srv)->shc_rtt >> 2) + + (rtt >> 2); + } + } + + } + + switch (srv->shc_state) { + case ilbd_hc_def_pinging: + srv->shc_state = ilbd_hc_probing; + + /* Ping is OK, now start the probe. */ + ilbd_hc_probe_timer(ilbd_hc_timer_q, srv); + break; + case ilbd_hc_probing: + srv->shc_fail_cnt = 0; + + /* Server is dead before, re-enable it. */ + if (srv->shc_status == ILB_HCS_UNREACH || + srv->shc_status == ILB_HCS_DEAD) { + /* + * If enabling the server in kernel fails now, + * hopefully when the timer fires again later, the + * enabling can be done. + */ + if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr, + srv->shc_hc_rule->hcr_rule->irl_name, + stat_declare_srv_alive) != ILB_STATUS_OK) { + logerr("%s: cannot enable server in kernel: " + " rule %s server %s", __func__, + srv->shc_hc_rule->hcr_rule->irl_name, + srv->shc_sg_srv->sgs_srvID); + } else { + srv->shc_status = ILB_HCS_ALIVE; + } + } else { + srv->shc_status = ILB_HCS_ALIVE; + } + if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) { + logerr("%s: cannot restart timer: rule %s server %s", + __func__, srv->shc_hc_rule->hcr_rule->irl_name, + srv->shc_sg_srv->sgs_srvID); + ilbd_mark_server_disabled(srv); + } + break; + default: + logdebug("%s: unknown state", __func__); + break; + } +} + +/* + * Handle the return event of a child probe fd. + */ +void +ilbd_hc_probe_return(int ev_port, int fd, int port_events, + ilbd_hc_probe_event_t *ev) +{ + /* + * Note that there can be more than one events delivered to us at + * the same time. So we need to check them individually. + */ + if (port_events & POLLRDNORM) + ilbd_hc_child_data(fd, ev); + + if (port_events & (POLLHUP|POLLERR)) { + ilbd_hc_child_hup(ev_port, fd, ev); + return; + } + + /* + * Re-associate the fd with the port so that when the child + * exits, we can reap the status. + */ + reassociate_port(ev_port, fd, ev); +} diff --git a/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_main.c b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_main.c new file mode 100644 index 0000000000..2668f5681f --- /dev/null +++ b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_main.c @@ -0,0 +1,1025 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * The core of ilbd daemon is a single-threaded event loop using + * event completion framework; it receives requests from client using + * the libilb functions, handles timeouts, initiates health checks, and + * populates the kernel state. + * + * The daemon has the following privileges (in addition to the basic ones): + * + * PRIV_PROC_OWNER, PRIV_NET_ICMPACCESS, + * PRIV_SYS_IP_CONFIG, PRIV_PROC_AUDIT + * + * The aforementioned privileges will be specified in the SMF manifest. + * + * AF_UNIX socket is used for IPC between libilb and this daemon as + * both processes will run on the same machine. + * + * To do health check, the daemon will create a timer for every health + * check probe. Each of these timers will be associated with the + * event port. When a timer goes off, the daemon will initiate a + * pipe to a separate process to execute the specific health check + * probe. This new process will run with the same user-id as that of + * ilbd daemon and will inherit all the privileges from the ilbd + * daemon parent process except the following: + * + * PRIV_PROC_OWNER, PRIV_PROC_AUDIT + * + * All health checks, will be implemented as external methods + * (binary or script). The following arguments will be passed + * to external methods: + * + * $1 VIP (literal IPv4 or IPv6 address) + * $2 Server IP (literal IPv4 or IPv6 address) + * $3 Protocol (UDP, TCP as a string) + * $4 The load balance mode, "DSR", "NAT", "HALF_NAT" + * $5 Numeric port range + * $6 maximum time (in seconds) the method + * should wait before returning failure. If the method runs for + * longer, it may be killed, and the test considered failed. + * + * Upon success, a health check method should print the RTT to the + * it finds to its STDOUT for ilbd to consume. The implicit unit + * is microseconds but only the number needs to be printed. If it + * cannot find the RTT, it should print 0. If the method decides + * that the server is dead, it should print -1 to its STDOUT. + * + * By default, an user-supplied health check probe process will + * also run with the same set of privileges as ILB's built-in + * probes. If the administrator has an user-supplied health check + * program that requires a larger privilege set, he/she will have + * to implement setuid program. + * + * Each health check will have a timeout, such that if the health + * check process is hung, it will be killed after the timeout interval + * and the daemon will notify the kernel ILB engine of the server's + * unresponsiveness, so that load distribution can be appropriately + * adjusted. If on the other hand the health check is successful + * the timeout timer is cancelled. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <libgen.h> +#include <fcntl.h> +#include <stddef.h> +#include <signal.h> +#include <port.h> +#include <ctype.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/stat.h> +#include <sys/note.h> +#include <sys/resource.h> +#include <unistd.h> +#include <sys/socket.h> +#include <errno.h> +#include <ucred.h> +#include <priv_utils.h> +#include <net/if.h> +#include <libilb.h> +#include <assert.h> +#include <inet/ilb.h> +#include <libintl.h> +#include <fcntl.h> +#include <rpcsvc/daemon_utils.h> +#include "libilb_impl.h" +#include "ilbd.h" + +/* + * NOTE: The following needs to be kept up to date. + */ +#define ILBD_VERSION "1.0" +#define ILBD_COPYRIGHT \ + "Copyright 2009 Sun Microsystems, Inc. All rights reserved.\n" \ + "Use is subject to license terms.\n" + +/* + * Global reply buffer to client request. Note that ilbd is single threaded, + * so a global buffer is OK. If ilbd becomes multi-threaded, this needs to + * be changed. + */ +static uint32_t reply_buf[ILBD_MSG_SIZE / sizeof (uint32_t)]; + +static void +ilbd_free_cli(ilbd_client_t *cli) +{ + (void) close(cli->cli_sd); + if (cli->cli_cmd == ILBD_SHOW_NAT) + ilbd_show_nat_cleanup(); + if (cli->cli_cmd == ILBD_SHOW_PERSIST) + ilbd_show_sticky_cleanup(); + if (cli->cli_saved_reply != NULL) + free(cli->cli_saved_reply); + free(cli->cli_pw_buf); + free(cli); +} + +static void +ilbd_reset_kernel_state(void) +{ + ilb_status_t rc; + ilb_name_cmd_t kcmd; + + kcmd.cmd = ILB_DESTROY_RULE; + kcmd.flags = ILB_RULE_ALLRULES; + kcmd.name[0] = '\0'; + + rc = do_ioctl(&kcmd, 0); + if (rc != ILB_STATUS_OK) + logdebug("ilbd_reset_kernel_state: do_ioctl failed: %s", + strerror(errno)); +} + +/* Signal handler to do clean up. */ +/* ARGSUSED */ +static void +ilbd_cleanup(int sig) +{ + (void) remove(SOCKET_PATH); + ilbd_reset_kernel_state(); + exit(0); +} + +/* + * Create a socket and return it to caller. If there is a failure, this + * function calls exit(2). Hence it always returns a valid listener socket. + * + * Note that this function is called before ilbd becomes a daemon. So + * we call perror(3C) to print out error message directly so that SMF can + * catch them. + */ +static int +ilbd_create_client_socket(void) +{ + int s; + mode_t omask; + struct sockaddr_un sa; + int sobufsz; + + s = socket(PF_UNIX, SOCK_SEQPACKET, 0); + if (s == -1) { + perror("ilbd_create_client_socket: socket to" + " client failed"); + exit(errno); + } + if (fcntl(s, F_SETFD, FD_CLOEXEC) == -1) { + perror("ilbd_create_client_socket: fcntl(FD_CLOEXEC)"); + exit(errno); + } + + sobufsz = ILBD_MSG_SIZE; + if (setsockopt(s, SOL_SOCKET, SO_SNDBUF, &sobufsz, + sizeof (sobufsz)) != 0) { + perror("ilbd_creat_client_socket: setsockopt(SO_SNDBUF) " + "failed"); + exit(errno); + } + if (setsockopt(s, SOL_SOCKET, SO_RCVBUF, &sobufsz, + sizeof (sobufsz)) != 0) { + perror("ilbd_creat_client_socket: setsockopt(SO_RCVBUF) " + "failed"); + exit(errno); + } + + /* + * since everybody can talk to us, we need to open up permissions + * we check peer privileges on a per-operation basis. + * This is no security issue as long as we're single-threaded. + */ + omask = umask(0); + + /* just in case we didn't clean up properly after last exit */ + (void) remove(SOCKET_PATH); + + bzero(&sa, sizeof (sa)); + sa.sun_family = AF_UNIX; + (void) strlcpy(sa.sun_path, SOCKET_PATH, sizeof (sa.sun_path)); + + if (bind(s, (struct sockaddr *)&sa, sizeof (sa)) != 0) { + perror("ilbd_create_client_socket(): bind to client" + " socket failed"); + exit(errno); + } + + /* re-instate old umask */ + (void) umask(omask); + +#define QLEN 16 + + if (listen(s, QLEN) != 0) { + perror("ilbd_create_client_socket: listen to client" + " socket failed"); + exit(errno); + } + + (void) signal(SIGHUP, SIG_IGN); + (void) signal(SIGPIPE, SIG_IGN); + (void) signal(SIGSTOP, SIG_IGN); + (void) signal(SIGTSTP, SIG_IGN); + (void) signal(SIGTTIN, SIG_IGN); + (void) signal(SIGTTOU, SIG_IGN); + + (void) signal(SIGINT, ilbd_cleanup); + (void) signal(SIGTERM, ilbd_cleanup); + (void) signal(SIGQUIT, ilbd_cleanup); + + return (s); +} + +/* + * Return the minimum size of a given request. The returned size does not + * include the variable part of a request. + */ +static size_t +ilbd_cmd_size(const ilb_comm_t *ic) +{ + size_t cmd_sz; + + cmd_sz = sizeof (*ic); + switch (ic->ic_cmd) { + case ILBD_RETRIEVE_SG_NAMES: + case ILBD_RETRIEVE_RULE_NAMES: + case ILBD_RETRIEVE_HC_NAMES: + case ILBD_CMD_OK: + break; + case ILBD_CMD_ERROR: + cmd_sz += sizeof (ilb_status_t); + break; + case ILBD_RETRIEVE_SG_HOSTS: + case ILBD_CREATE_SERVERGROUP: + case ILBD_DESTROY_SERVERGROUP: + case ILBD_DESTROY_RULE: + case ILBD_ENABLE_RULE: + case ILBD_DISABLE_RULE: + case ILBD_RETRIEVE_RULE: + case ILBD_DESTROY_HC: + case ILBD_GET_HC_INFO: + case ILBD_GET_HC_SRVS: + cmd_sz += sizeof (ilbd_name_t); + break; + case ILBD_ENABLE_SERVER: + case ILBD_DISABLE_SERVER: + case ILBD_ADD_SERVER_TO_GROUP: + case ILBD_REM_SERVER_FROM_GROUP: + cmd_sz += sizeof (ilb_sg_info_t); + break; + case ILBD_SRV_ADDR2ID: + case ILBD_SRV_ID2ADDR: + cmd_sz += sizeof (ilb_sg_info_t) + sizeof (ilb_sg_srv_t); + break; + case ILBD_CREATE_RULE: + cmd_sz += sizeof (ilb_rule_info_t); + break; + case ILBD_CREATE_HC: + cmd_sz += sizeof (ilb_hc_info_t); + break; + case ILBD_SHOW_NAT: + case ILBD_SHOW_PERSIST: + cmd_sz += sizeof (ilb_show_info_t); + break; + } + + return (cmd_sz); +} + +/* + * Given a request and its size, check that the size is big enough to + * contain the variable part of a request. + */ +static ilb_status_t +ilbd_check_req_size(ilb_comm_t *ic, size_t ic_sz) +{ + ilb_status_t rc = ILB_STATUS_OK; + ilb_sg_info_t *sg_info; + ilbd_namelist_t *nlist; + + switch (ic->ic_cmd) { + case ILBD_CREATE_SERVERGROUP: + case ILBD_ENABLE_SERVER: + case ILBD_DISABLE_SERVER: + case ILBD_ADD_SERVER_TO_GROUP: + case ILBD_REM_SERVER_FROM_GROUP: + sg_info = (ilb_sg_info_t *)&ic->ic_data; + + if (ic_sz < ilbd_cmd_size(ic) + sg_info->sg_srvcount * + sizeof (ilb_sg_srv_t)) { + rc = ILB_STATUS_EINVAL; + } + break; + case ILBD_ENABLE_RULE: + case ILBD_DISABLE_RULE: + case ILBD_DESTROY_RULE: + nlist = (ilbd_namelist_t *)&ic->ic_data; + + if (ic_sz < ilbd_cmd_size(ic) + nlist->ilbl_count * + sizeof (ilbd_name_t)) { + rc = ILB_STATUS_EINVAL; + } + break; + } + return (rc); +} + +/* + * this function *relies* on a complete message/data struct + * being passed in (currently via the SOCK_SEQPACKET socket type). + * + * Note that the size of ip is at most ILBD_MSG_SIZE. + */ +static ilb_status_t +consume_common_struct(ilb_comm_t *ic, size_t ic_sz, ilbd_client_t *cli, + int ev_port) +{ + ilb_status_t rc; + struct passwd *ps; + size_t rbufsz; + ssize_t ret; + boolean_t standard_reply = B_TRUE; + ilbd_name_t name; + + /* + * cli_ev must be overridden during handling of individual commands, + * if there's a special need; otherwise, leave this for + * the "default" case + */ + cli->cli_ev = ILBD_EVENT_REQ; + + ps = &cli->cli_pw; + rbufsz = ILBD_MSG_SIZE; + + /* Sanity check on the size of the static part of a request. */ + if (ic_sz < ilbd_cmd_size(ic)) { + rc = ILB_STATUS_EINVAL; + goto out; + } + + switch (ic->ic_cmd) { + case ILBD_CREATE_SERVERGROUP: { + ilb_sg_info_t sg_info; + + /* + * ilbd_create_sg() only needs the sg_name field. But it + * takes in a ilb_sg_info_t because it is used as a callback + * in ilbd_walk_sg_pgs(). + */ + (void) strlcpy(sg_info.sg_name, (char *)&(ic->ic_data), + sizeof (sg_info.sg_name)); + rc = ilbd_create_sg(&sg_info, ev_port, ps, + cli->cli_peer_ucredp); + break; + } + + case ILBD_DESTROY_SERVERGROUP: + (void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name)); + rc = ilbd_destroy_sg(name, ps, cli->cli_peer_ucredp); + break; + + case ILBD_ADD_SERVER_TO_GROUP: + if ((rc = ilbd_check_req_size(ic, ic_sz)) != ILB_STATUS_OK) + break; + rc = ilbd_add_server_to_group((ilb_sg_info_t *)&ic->ic_data, + ev_port, ps, cli->cli_peer_ucredp); + break; + + case ILBD_REM_SERVER_FROM_GROUP: + if ((rc = ilbd_check_req_size(ic, ic_sz)) != ILB_STATUS_OK) + break; + rc = ilbd_rem_server_from_group((ilb_sg_info_t *)&ic->ic_data, + ev_port, ps, cli->cli_peer_ucredp); + break; + + case ILBD_ENABLE_SERVER: + if ((rc = ilbd_check_req_size(ic, ic_sz)) != ILB_STATUS_OK) + break; + rc = ilbd_enable_server((ilb_sg_info_t *)&ic->ic_data, ps, + cli->cli_peer_ucredp); + break; + + case ILBD_DISABLE_SERVER: + if ((rc = ilbd_check_req_size(ic, ic_sz)) != ILB_STATUS_OK) + break; + rc = ilbd_disable_server((ilb_sg_info_t *)&ic->ic_data, ps, + cli->cli_peer_ucredp); + break; + + case ILBD_SRV_ADDR2ID: + rc = ilbd_address_to_srvID((ilb_sg_info_t *)&ic->ic_data, + reply_buf, &rbufsz); + if (rc == ILB_STATUS_OK) + standard_reply = B_FALSE; + break; + + case ILBD_SRV_ID2ADDR: + rc = ilbd_srvID_to_address((ilb_sg_info_t *)&ic->ic_data, + reply_buf, &rbufsz); + if (rc == ILB_STATUS_OK) + standard_reply = B_FALSE; + break; + + case ILBD_RETRIEVE_SG_HOSTS: + (void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name)); + rc = ilbd_retrieve_sg_hosts(name, reply_buf, &rbufsz); + if (rc == ILB_STATUS_OK) + standard_reply = B_FALSE; + break; + + case ILBD_RETRIEVE_SG_NAMES: + case ILBD_RETRIEVE_RULE_NAMES: + case ILBD_RETRIEVE_HC_NAMES: + rc = ilbd_retrieve_names(ic->ic_cmd, reply_buf, &rbufsz); + if (rc == ILB_STATUS_OK) + standard_reply = B_FALSE; + break; + + case ILBD_CREATE_RULE: + rc = ilbd_create_rule((ilb_rule_info_t *)&ic->ic_data, ev_port, + ps, cli->cli_peer_ucredp); + break; + + case ILBD_DESTROY_RULE: + /* Copy the name to ensure that name is NULL terminated. */ + (void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name)); + rc = ilbd_destroy_rule(name, ps, cli->cli_peer_ucredp); + break; + + case ILBD_ENABLE_RULE: + (void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name)); + rc = ilbd_enable_rule(name, ps, cli->cli_peer_ucredp); + break; + + case ILBD_DISABLE_RULE: + (void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name)); + rc = ilbd_disable_rule(name, ps, cli->cli_peer_ucredp); + break; + + case ILBD_RETRIEVE_RULE: + (void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name)); + rc = ilbd_retrieve_rule(name, reply_buf, &rbufsz); + if (rc == ILB_STATUS_OK) + standard_reply = B_FALSE; + break; + + case ILBD_CREATE_HC: + rc = ilbd_create_hc((ilb_hc_info_t *)&ic->ic_data, ev_port, ps, + cli->cli_peer_ucredp); + break; + + case ILBD_DESTROY_HC: + (void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name)); + rc = ilbd_destroy_hc(name, ps, cli->cli_peer_ucredp); + break; + + case ILBD_GET_HC_INFO: + (void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name)); + rc = ilbd_get_hc_info(name, reply_buf, &rbufsz); + if (rc == ILB_STATUS_OK) + standard_reply = B_FALSE; + break; + + case ILBD_GET_HC_SRVS: + (void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name)); + rc = ilbd_get_hc_srvs(name, reply_buf, &rbufsz); + if (rc == ILB_STATUS_OK) + standard_reply = B_FALSE; + break; + + case ILBD_SHOW_NAT: + rc = ilbd_show_nat(cli, ic, reply_buf, &rbufsz); + if (rc == ILB_STATUS_OK) + standard_reply = B_FALSE; + break; + + case ILBD_SHOW_PERSIST: + rc = ilbd_show_sticky(cli, ic, reply_buf, &rbufsz); + if (rc == ILB_STATUS_OK) + standard_reply = B_FALSE; + break; + + default: + logdebug("consume_common_struct: unknown command"); + rc = ILB_STATUS_INVAL_CMD; + break; + } + +out: + /* + * The message exchange is always in pairs, request/response. If + * a transaction requires multiple exchanges, the client will send + * in multiple requests to get multiple responses. The show-nat and + * show-persist request are examples of this. The end of transaction + * is marked with ic_flags set to ILB_COMM_END. + */ + + /* This is the standard reply. */ + if (standard_reply) { + if (rc == ILB_STATUS_OK) + ilbd_reply_ok(reply_buf, &rbufsz); + else + ilbd_reply_err(reply_buf, &rbufsz, rc); + } + + if ((ret = send(cli->cli_sd, reply_buf, rbufsz, 0)) != rbufsz) { + if (ret == -1) { + if (errno != EWOULDBLOCK) { + logdebug("consume_common_struct: send: %s", + strerror(errno)); + rc = ILB_STATUS_SEND; + goto err_out; + } + /* + * The reply is blocked, save the reply. handle_req() + * will associate the event port for the re-send. + */ + assert(cli->cli_saved_reply == NULL); + if ((cli->cli_saved_reply = malloc(rbufsz)) == NULL) { + /* + * Set the error to ILB_STATUS_SEND so that + * handle_req() will free the client. + */ + logdebug("consume_common_struct: failure to " + "allocate memory to save reply"); + rc = ILB_STATUS_SEND; + goto err_out; + } + bcopy(reply_buf, cli->cli_saved_reply, rbufsz); + cli->cli_saved_size = rbufsz; + return (ILB_STATUS_EWOULDBLOCK); + } + } +err_out: + return (rc); +} + +/* + * Accept a new client request. A struct ilbd_client_t is allocated to + * store the client info. The accepted socket is port_associate() with + * the given port. And the allocated ilbd_client_t struct is passed as + * the user pointer. + */ +static void +new_req(int ev_port, int listener, void *ev_obj) +{ + struct sockaddr sa; + int sa_len; + int new_sd; + int sflags; + ilbd_client_t *cli; + int res; + uid_t uid; + + sa_len = sizeof (sa); + if ((new_sd = accept(listener, &sa, &sa_len)) == -1) { + /* don't log if we're out of file descriptors */ + if (errno != EINTR && errno != EMFILE) + logperror("new_req: accept failed"); + goto done; + } + + /* Set the new socket to be non-blocking. */ + if ((sflags = fcntl(new_sd, F_GETFL, 0)) == -1) { + logperror("new_req: fcntl(F_GETFL)"); + goto clean_up; + } + if (fcntl(new_sd, F_SETFL, sflags | O_NONBLOCK) == -1) { + logperror("new_req: fcntl(F_SETFL)"); + goto clean_up; + } + if (fcntl(new_sd, F_SETFD, FD_CLOEXEC) == -1) { + logperror("new_req: fcntl(FD_CLOEXEC)"); + goto clean_up; + } + if ((cli = calloc(1, sizeof (ilbd_client_t))) == NULL) { + logerr("new_req: malloc(ilbd_client_t)"); + goto clean_up; + } + res = getpeerucred(new_sd, &cli->cli_peer_ucredp); + if (res == -1) { + logperror("new_req: getpeerucred failed"); + goto clean_up; + } + if ((uid = ucred_getruid(cli->cli_peer_ucredp)) == (uid_t)-1) { + logperror("new_req: ucred_getruid failed"); + goto clean_up; + } + cli->cli_pw_bufsz = (size_t)sysconf(_SC_GETPW_R_SIZE_MAX); + if ((cli->cli_pw_buf = malloc(cli->cli_pw_bufsz)) == NULL) { + free(cli); + logerr("new_req: malloc(cli_pw_buf)"); + goto clean_up; + } + if (getpwuid_r(uid, &cli->cli_pw, cli->cli_pw_buf, + cli->cli_pw_bufsz) == NULL) { + free(cli->cli_pw_buf); + free(cli); + logperror("new_req: invalid user"); + goto clean_up; + } + cli->cli_ev = ILBD_EVENT_REQ; + cli->cli_sd = new_sd; + cli->cli_cmd = ILBD_BAD_CMD; + cli->cli_saved_reply = NULL; + cli->cli_saved_size = 0; + if (port_associate(ev_port, PORT_SOURCE_FD, new_sd, POLLRDNORM, + cli) == -1) { + logperror("new_req: port_associate(cli) failed"); + free(cli->cli_pw_buf); + free(cli); +clean_up: + (void) close(new_sd); + } + +done: + /* Re-associate the listener with the event port. */ + if (port_associate(ev_port, PORT_SOURCE_FD, listener, POLLRDNORM, + ev_obj) == -1) { + logperror("new_req: port_associate(listener) failed"); + exit(1); + } +} + +static void +handle_req(int ev_port, ilbd_event_t event, ilbd_client_t *cli) +{ + /* All request should be smaller than ILBD_MSG_SIZE */ + union { + ilb_comm_t ic; + uint32_t buf[ILBD_MSG_SIZE / sizeof (uint32_t)]; + } ic_u; + int rc = ILB_STATUS_OK; + ssize_t r; + + if (event == ILBD_EVENT_REQ) { + /* + * Something is wrong with the client since there is a + * pending reply, the client should not send us another + * request. Kill this client. + */ + if (cli->cli_saved_reply != NULL) { + logerr("handle_req: misbehaving client, more than one " + "outstanding request"); + rc = ILB_STATUS_INTERNAL; + goto err_out; + } + + /* + * Our socket is message based so we should be able + * to get the request in one single read. + */ + r = recv(cli->cli_sd, (void *)ic_u.buf, sizeof (ic_u.buf), 0); + if (r < 0) { + if (errno != EINTR) { + logperror("handle_req: read failed"); + rc = ILB_STATUS_READ; + goto err_out; + } + /* + * If interrupted, just re-associate the cli_sd + * with the port. + */ + goto done; + } + cli->cli_cmd = ic_u.ic.ic_cmd; + + rc = consume_common_struct(&ic_u.ic, r, cli, ev_port); + if (rc == ILB_STATUS_EWOULDBLOCK) + goto blocked; + /* Fatal error communicating with client, free it. */ + if (rc == ILB_STATUS_SEND) + goto err_out; + } else { + assert(event == ILBD_EVENT_REP_OK); + assert(cli->cli_saved_reply != NULL); + + /* + * The reply to client was previously blocked, we will + * send again. + */ + if (send(cli->cli_sd, cli->cli_saved_reply, + cli->cli_saved_size, 0) != cli->cli_saved_size) { + if (errno != EWOULDBLOCK) { + logdebug("handle_req: send: %s", + strerror(errno)); + rc = ILB_STATUS_SEND; + goto err_out; + } + goto blocked; + } + free(cli->cli_saved_reply); + cli->cli_saved_reply = NULL; + cli->cli_saved_size = 0; + } +done: + /* Re-associate with the event port for more requests. */ + cli->cli_ev = ILBD_EVENT_REQ; + if (port_associate(ev_port, PORT_SOURCE_FD, cli->cli_sd, + POLLRDNORM, cli) == -1) { + logperror("handle_req: port_associate(POLLRDNORM)"); + rc = ILB_STATUS_INTERNAL; + goto err_out; + } + return; + +blocked: + /* Re-associate with the event port. */ + cli->cli_ev = ILBD_EVENT_REP_OK; + if (port_associate(ev_port, PORT_SOURCE_FD, cli->cli_sd, POLLWRNORM, + cli) == -1) { + logperror("handle_req: port_associate(POLLWRNORM)"); + rc = ILB_STATUS_INTERNAL; + goto err_out; + } + return; + +err_out: + ilbd_free_cli(cli); +} + +static void +i_ilbd_read_config(int ev_port) +{ + logdebug("i_ilbd_read_config: port %d", ev_port); + (void) ilbd_walk_sg_pgs(ilbd_create_sg, &ev_port, NULL); + (void) ilbd_walk_hc_pgs(ilbd_create_hc, &ev_port, NULL); + (void) ilbd_walk_rule_pgs(ilbd_create_rule, &ev_port, NULL); +} + +/* + * main event loop for ilbd + * asserts that argument 'listener' is a server socket ready to accept() on. + */ +static void +main_loop(int listener) +{ + port_event_t p_ev; + int ev_port, ev_port_obj; + ilbd_event_obj_t ev_obj; + ilbd_timer_event_obj_t timer_ev_obj; + + ev_port = port_create(); + if (ev_port == -1) { + logperror("main_loop: port_create failed"); + exit(-1); + } + ilbd_hc_timer_init(ev_port, &timer_ev_obj); + + ev_obj.ev = ILBD_EVENT_NEW_REQ; + if (port_associate(ev_port, PORT_SOURCE_FD, listener, POLLRDNORM, + &ev_obj) == -1) { + logperror("main_loop: port_associate failed"); + exit(1); + } + + i_ilbd_read_config(ev_port); + ilbd_hc_timer_update(&timer_ev_obj); + + _NOTE(CONSTCOND) + while (B_TRUE) { + int r; + ilbd_event_t event; + ilbd_client_t *cli; + + r = port_get(ev_port, &p_ev, NULL); + if (r == -1) { + if (errno == EINTR) + continue; + logperror("main_loop: port_get failed"); + break; + } + + ev_port_obj = p_ev.portev_object; + event = ((ilbd_event_obj_t *)p_ev.portev_user)->ev; + + switch (event) { + case ILBD_EVENT_TIMER: + ilbd_hc_timeout(); + break; + + case ILBD_EVENT_PROBE: + ilbd_hc_probe_return(ev_port, ev_port_obj, + p_ev.portev_events, + (ilbd_hc_probe_event_t *)p_ev.portev_user); + break; + + case ILBD_EVENT_NEW_REQ: + assert(ev_port_obj == listener); + /* + * An error happens in the listener. Exit + * for now.... + */ + if (p_ev.portev_events & (POLLHUP|POLLERR)) { + logerr("main_loop: listener error"); + exit(1); + } + new_req(ev_port, ev_port_obj, &ev_obj); + break; + + case ILBD_EVENT_REP_OK: + case ILBD_EVENT_REQ: + cli = (ilbd_client_t *)p_ev.portev_user; + assert(ev_port_obj == cli->cli_sd); + + /* + * An error happens in the newly accepted + * client request. Clean up the client. + * this also happens when client closes socket, + * so not necessarily a reason for alarm + */ + if (p_ev.portev_events & (POLLHUP|POLLERR)) { + ilbd_free_cli(cli); + break; + } + + handle_req(ev_port, event, cli); + break; + + default: + logerr("main_loop: unknown event %d", event); + exit(EXIT_FAILURE); + break; + } + + ilbd_hc_timer_update(&timer_ev_obj); + } +} + +static void +i_ilbd_setup_lists(void) +{ + i_setup_sg_hlist(); + i_setup_rule_hlist(); + i_ilbd_setup_hc_list(); +} + +/* + * Usage message - call only during startup. it will print its + * message on stderr and exit + */ +static void +Usage(char *name) +{ + (void) fprintf(stderr, gettext("Usage: %s [-d|--debug]\n"), name); + exit(1); +} + +static void +print_version(char *name) +{ + (void) printf("%s %s\n", basename(name), ILBD_VERSION); + (void) printf(gettext(ILBD_COPYRIGHT)); + exit(0); +} + +/* + * Increase the file descriptor limit for handling a lot of health check + * processes (each requires a pipe). + * + * Note that this function is called before ilbd becomes a daemon. So + * we call perror(3C) to print out error message directly so that SMF + * can catch them. + */ +static void +set_rlim(void) +{ + struct rlimit rlp; + + if (getrlimit(RLIMIT_NOFILE, &rlp) == -1) { + perror("ilbd: getrlimit"); + exit(errno); + } + rlp.rlim_cur = rlp.rlim_max; + if (setrlimit(RLIMIT_NOFILE, &rlp) == -1) { + perror("ilbd: setrlimit"); + exit(errno); + } +} + +int +main(int argc, char **argv) +{ + int s; + int c; + + (void) setlocale(LC_ALL, ""); +#if !defined(TEXT_DOMAIN) +#define TEXT_DOMAIN "SYS_TEST" +#endif + static const char daemon_dir[] = DAEMON_DIR; + + (void) textdomain(TEXT_DOMAIN); + + while ((c = getopt(argc, argv, ":V?d(debug)")) != -1) { + switch ((char)c) { + case '?': Usage(argv[0]); + /* not reached */ + break; + case 'V': print_version(argv[0]); + /* not reached */ + break; + case 'd': ilbd_enable_debug(); + break; + default: Usage(argv[0]); + /* not reached */ + break; + } + } + + /* + * Whenever the daemon starts, it needs to start with a clean + * slate in the kernel. We need sys_ip_config privilege for + * this. + */ + ilbd_reset_kernel_state(); + + /* Increase the limit on the number of file descriptors. */ + set_rlim(); + + /* + * ilbd daemon starts off as root, just so it can create + * /var/run/daemon if one does not exist. After that is done + * the daemon switches to "daemon" uid. This is similar to what + * rpcbind does. + */ + if (mkdir(daemon_dir, DAEMON_DIR_MODE) == 0 || errno == EEXIST) { + (void) chmod(daemon_dir, DAEMON_DIR_MODE); + (void) chown(daemon_dir, DAEMON_UID, DAEMON_GID); + } else { + perror("main: mkdir failed"); + exit(errno); + } + /* + * Now lets switch ilbd as uid = daemon, gid = daemon with a + * trimmed down privilege set + */ + if (__init_daemon_priv(PU_RESETGROUPS | PU_LIMITPRIVS | PU_INHERITPRIVS, + DAEMON_UID, DAEMON_GID, PRIV_PROC_OWNER, PRIV_PROC_AUDIT, + PRIV_NET_ICMPACCESS, PRIV_SYS_IP_CONFIG, NULL) == -1) { + (void) fprintf(stderr, "Insufficient privileges\n"); + exit(EXIT_FAILURE); + } + + /* + * Opens a PF_UNIX socket to the client. No privilege needed + * for this. + */ + s = ilbd_create_client_socket(); + + /* + * Daemonify if ilbd is not running with -d option + * Need proc_fork privilege for this + */ + if (!is_debugging_on()) { + logdebug("daemonizing..."); + if (daemon(0, 0) != 0) { + logperror("daemon failed"); + exit(EXIT_FAILURE); + } + } + (void) priv_set(PRIV_OFF, PRIV_INHERITABLE, PRIV_PROC_OWNER, + PRIV_PROC_AUDIT, NULL); + + /* if daemonified then set up syslog */ + if (!is_debugging_on()) + openlog("ilbd", LOG_PID, LOG_DAEMON); + + i_ilbd_setup_lists(); + + main_loop(s); + + /* + * if we come here, then we experienced an error or a shutdown + * indicator, so clean up after ourselves. + */ + logdebug("main(): terminating"); + + (void) remove(SOCKET_PATH); + ilbd_reset_kernel_state(); + + return (0); +} diff --git a/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_nat.c b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_nat.c new file mode 100644 index 0000000000..cdb114c795 --- /dev/null +++ b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_nat.c @@ -0,0 +1,295 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/types.h> +#include <libilb.h> +#include <inet/ilb.h> +#include <stddef.h> +#include <stdlib.h> +#include <strings.h> +#include <errno.h> +#include <assert.h> +#include <macros.h> +#include "libilb_impl.h" +#include "ilbd.h" + +/* + * We only allow one show nat/persist command running at any time. Note that + * there is no lock for this since ilbd is single threaded. And we only care + * about the pointer value of client, not its type. + * + * The following variables store the current client making the request. + */ +static void *nat_cur_cli; +static void *sticky_cur_cli; + +/* Maximum number of NAT/sticky entries to request from kernel. */ +#define NUM_ENTRIES 500 + +/* + * Clear the current requesting client. This will allow a new client + * to make a request. + */ +void +ilbd_show_nat_cleanup(void) +{ + nat_cur_cli = NULL; +} + +void +ilbd_show_sticky_cleanup(void) +{ + sticky_cur_cli = NULL; +} + +/* + * To show the kernel NAT table. + * + * cli: the client pointer making the request. + * ic: the client request. + * rbuf: reply buffer to be filled in. + * rbufsz: reply buffer size. + */ +ilb_status_t +ilbd_show_nat(void *cli, const ilb_comm_t *ic, uint32_t *rbuf, size_t *rbufsz) +{ + ilb_show_info_t *req_si = (ilb_show_info_t *)&ic->ic_data; + ilb_list_nat_cmd_t *kcmd; + boolean_t start; + size_t tmp_rbufsz, kbufsz; + uint32_t max_num; + ilb_status_t ret; + int i; + ilb_show_info_t *reply; + ilb_nat_info_t *nat_ret; + + /* For new client request, start from the beginning of the table. */ + if (nat_cur_cli == NULL) { + nat_cur_cli = cli; + start = B_TRUE; + } else if (cli == nat_cur_cli) { + /* + * Another request from client. If the client does not + * want to continue, reset the current client and reply OK. + */ + if (ic->ic_flags & ILB_COMM_END) { + ilbd_show_nat_cleanup(); + ilbd_reply_ok(rbuf, rbufsz); + return (ILB_STATUS_OK); + } + start = B_FALSE; + } else { + /* A request is on-going, so reject a new client. */ + return (ILB_STATUS_INPROGRESS); + } + + tmp_rbufsz = *rbufsz; + ilbd_reply_ok(rbuf, rbufsz); + reply = (ilb_show_info_t *)&((ilb_comm_t *)rbuf)->ic_data; + + /* + * Calculate the max number of ilb_nat_info_t can be fitted in the + * reply. + */ + *rbufsz += sizeof (ilb_show_info_t *); + tmp_rbufsz -= *rbufsz; + max_num = tmp_rbufsz / sizeof (ilb_nat_info_t); + + /* + * Calculate the exact number of entries we should request from kernel. + */ + max_num = min(req_si->sn_num, min(NUM_ENTRIES, max_num)); + + kbufsz = max_num * sizeof (ilb_nat_entry_t) + + offsetof(ilb_list_nat_cmd_t, entries); + if ((kcmd = malloc(kbufsz)) == NULL) { + logdebug("ilbd_show_nat: malloc(cmd)"); + ilbd_reply_err(rbuf, rbufsz, ILB_STATUS_ENOMEM); + return (ILB_STATUS_ENOMEM); + } + + kcmd->cmd = ILB_LIST_NAT_TABLE; + kcmd->flags = start ? ILB_LIST_BEGIN : ILB_LIST_CONT; + kcmd->num_nat = max_num; + if ((ret = do_ioctl(kcmd, kbufsz)) != ILB_STATUS_OK) { + logperror("ilbd_show_nat: ioctl(ILB_LIST_NAT_TABLE)"); + ilbd_reply_err(rbuf, rbufsz, ret); + free(kcmd); + return (ret); + } + + reply->sn_num = kcmd->num_nat; + *rbufsz += reply->sn_num * sizeof (ilb_nat_info_t); + + /* + * It is the end of table, let the client know. And the transaction + * is done. + */ + if (kcmd->flags & ILB_LIST_END) { + nat_cur_cli = NULL; + } else { + /* + * ilbd_reply_ok() sets ic_flags to ILB_COMM_END by default. + * Need to clear it here. + */ + ((ilb_comm_t *)rbuf)->ic_flags = 0; + } + + nat_ret = (ilb_nat_info_t *)&reply->sn_data; + + for (i = 0; i < kcmd->num_nat; i++) { + ilb_nat_entry_t *nat; + + nat = &kcmd->entries[i]; + + nat_ret->nat_proto = nat->proto; + + nat_ret->nat_in_local = nat->in_local; + nat_ret->nat_in_global = nat->in_global; + nat_ret->nat_out_local = nat->out_local; + nat_ret->nat_out_global = nat->out_global; + + nat_ret->nat_in_local_port = nat->in_local_port; + nat_ret->nat_in_global_port = nat->in_global_port; + nat_ret->nat_out_local_port = nat->out_local_port; + nat_ret->nat_out_global_port = nat->out_global_port; + + nat_ret++; + } + +end: + free(kcmd); + return (ret); +} + +/* + * To show the kernel sticky table. + * + * cli: the client pointer making the request. + * req_si: information about the show-persist request. + * rbuf: reply buffer to be filled in. + * rbufsz: reply buffer size. + */ +ilb_status_t +ilbd_show_sticky(void *cli, const ilb_comm_t *ic, uint32_t *rbuf, + size_t *rbufsz) +{ + ilb_show_info_t *req_si = (ilb_show_info_t *)&ic->ic_data; + ilb_list_sticky_cmd_t *kcmd; + boolean_t start; + size_t tmp_rbufsz, kbufsz; + uint32_t max_num; + ilb_status_t ret; + int i; + ilb_show_info_t *reply; + ilb_persist_info_t *st_ret; + + /* For new client request, start from the beginning of the table. */ + if (sticky_cur_cli == NULL) { + sticky_cur_cli = cli; + start = B_TRUE; + } else if (cli == sticky_cur_cli) { + /* + * Another request from client. If the client does not + * want to continue, reset the current client and reply OK. + */ + if (ic->ic_flags & ILB_COMM_END) { + ilbd_show_sticky_cleanup(); + ilbd_reply_ok(rbuf, rbufsz); + return (ILB_STATUS_OK); + } + start = B_FALSE; + } else { + /* A request is on-going, so reject a new client. */ + return (ILB_STATUS_INPROGRESS); + } + + tmp_rbufsz = *rbufsz; + ilbd_reply_ok(rbuf, rbufsz); + reply = (ilb_show_info_t *)&((ilb_comm_t *)rbuf)->ic_data; + + /* + * Calculate the max number of ilb_persist_info_t can be fitted in the + * reply. + */ + *rbufsz += sizeof (ilb_show_info_t *); + tmp_rbufsz -= *rbufsz; + max_num = tmp_rbufsz / sizeof (ilb_persist_info_t); + + /* + * Calculate the exact number of entries we should request from kernel. + */ + max_num = min(req_si->sn_num, min(NUM_ENTRIES, max_num)); + + kbufsz = max_num * sizeof (ilb_sticky_entry_t) + + offsetof(ilb_list_sticky_cmd_t, entries); + if ((kcmd = malloc(kbufsz)) == NULL) { + logdebug("ilbd_show_nat: malloc(cmd)"); + ilbd_reply_err(rbuf, rbufsz, ILB_STATUS_ENOMEM); + return (ILB_STATUS_ENOMEM); + } + + kcmd->cmd = ILB_LIST_STICKY_TABLE; + kcmd->flags = start ? ILB_LIST_BEGIN : ILB_LIST_CONT; + kcmd->num_sticky = max_num; + if ((ret = do_ioctl(kcmd, kbufsz)) != ILB_STATUS_OK) { + logperror("ilbd_show_nat: ioctl(ILB_LIST_STICKY_TABLE)"); + ilbd_reply_err(rbuf, rbufsz, ret); + free(kcmd); + return (ret); + } + + reply->sn_num = kcmd->num_sticky; + *rbufsz += reply->sn_num * sizeof (ilb_persist_info_t); + + if (kcmd->flags & ILB_LIST_END) { + sticky_cur_cli = NULL; + } else { + /* + * ilbd_reply_ok() sets ic_flags to ILB_COMM_END by default. + * Need to clear it here. + */ + ((ilb_comm_t *)rbuf)->ic_flags = 0; + } + + st_ret = (ilb_persist_info_t *)&reply->sn_data; + + for (i = 0; i < kcmd->num_sticky; i++) { + ilb_sticky_entry_t *st; + + st = &kcmd->entries[i]; + + (void) strlcpy(st_ret->persist_rule_name, st->rule_name, + ILB_NAMESZ); + st_ret->persist_req_addr = st->req_addr; + st_ret->persist_srv_addr = st->srv_addr; + st_ret++; + } + +end: + free(kcmd); + return (ret); +} diff --git a/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_rules.c b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_rules.c new file mode 100644 index 0000000000..7a67c03283 --- /dev/null +++ b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_rules.c @@ -0,0 +1,1357 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <stdlib.h> +#include <strings.h> +#include <stddef.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <sys/list.h> +#include <net/if.h> +#include <assert.h> +#include <errno.h> +#include <libintl.h> +#include <libilb.h> +#include <inet/ilb.h> +#include "libilb_impl.h" +#include "ilbd.h" + +/* until we all use AF_* macros ... */ +#define AF_2_IPPROTO(_af) (_af == AF_INET)?IPPROTO_IP:IPPROTO_IPV6 +#define IPPROTO_2_AF(_i) (_i == IPPROTO_IP)?AF_INET:AF_INET6 + +static ilb_status_t ilbd_disable_one_rule(ilbd_rule_t *, boolean_t); +static uint32_t i_flags_d2k(int); + +#define ILB_SGSRV_2_KSRV(s, k) \ + (k)->addr = (s)->sgs_addr; \ + (k)->min_port = (s)->sgs_minport; \ + (k)->max_port = (s)->sgs_maxport; \ + (k)->flags = i_flags_d2k((s)->sgs_flags); \ + (k)->err = 0; \ + (void) strlcpy((k)->name, (s)->sgs_srvID, sizeof ((k)->name)) + +list_t ilbd_rule_hlist; + +static ilb_algo_t +algo_impl2lib(ilb_algo_impl_t a) +{ + switch (a) { + case ILB_ALG_IMPL_ROUNDROBIN: + return (ILB_ALG_ROUNDROBIN); + case ILB_ALG_IMPL_HASH_IP: + return (ILB_ALG_HASH_IP); + case ILB_ALG_IMPL_HASH_IP_SPORT: + return (ILB_ALG_HASH_IP_SPORT); + case ILB_ALG_IMPL_HASH_IP_VIP: + return (ILB_ALG_HASH_IP_VIP); + } + return (0); +} + +static ilb_topo_t +topo_impl2lib(ilb_topo_impl_t t) +{ + switch (t) { + case ILB_TOPO_IMPL_DSR: + return (ILB_TOPO_DSR); + case ILB_TOPO_IMPL_NAT: + return (ILB_TOPO_NAT); + case ILB_TOPO_IMPL_HALF_NAT: + return (ILB_TOPO_HALF_NAT); + } + return (0); +} + +ilb_algo_impl_t +algo_lib2impl(ilb_algo_t a) +{ + switch (a) { + case ILB_ALG_ROUNDROBIN: + return (ILB_ALG_IMPL_ROUNDROBIN); + case ILB_ALG_HASH_IP: + return (ILB_ALG_IMPL_HASH_IP); + case ILB_ALG_HASH_IP_SPORT: + return (ILB_ALG_IMPL_HASH_IP_SPORT); + case ILB_ALG_HASH_IP_VIP: + return (ILB_ALG_IMPL_HASH_IP_VIP); + } + return (0); +} + +ilb_topo_impl_t +topo_lib2impl(ilb_topo_t t) +{ + switch (t) { + case ILB_TOPO_DSR: + return (ILB_TOPO_IMPL_DSR); + case ILB_TOPO_NAT: + return (ILB_TOPO_IMPL_NAT); + case ILB_TOPO_HALF_NAT: + return (ILB_TOPO_IMPL_HALF_NAT); + } + return (0); +} + +/* + * Walk the list of rules and check if its safe to add the + * the server to the rule (this is a list of rules hanging + * off of a server group) + */ +ilb_status_t +i_check_srv2rules(list_t *rlist, ilb_sg_srv_t *srv) +{ + ilb_status_t rc = ILB_STATUS_OK; + ilbd_rule_t *rl; + int server_portrange, rule_portrange; + int srv_minport, srv_maxport; + int r_minport, r_maxport; + + if (srv == NULL) + return (ILB_STATUS_OK); + + srv_minport = ntohs(srv->sgs_minport); + srv_maxport = ntohs(srv->sgs_maxport); + + for (rl = list_head(rlist); rl != NULL; rl = list_next(rlist, rl)) { + r_minport = ntohs(rl->irl_minport); + r_maxport = ntohs(rl->irl_maxport); + + if ((srv_minport != 0) && (srv_minport == srv_maxport)) { + /* server has single port */ + if (rl->irl_topo == ILB_TOPO_DSR) { + /* + * either we have a DSR rule with a port + * range, or both server and rule + * have single ports but their values + * don't match - this is incompatible + */ + if (r_maxport > r_minport) { + rc = ILB_STATUS_INVAL_SRVR; + break; + } else if (srv_minport != r_minport) { + rc = ILB_STATUS_BADPORT; + break; + } + } + if (rl->irl_hcpflag == ILB_HCI_PROBE_FIX && + rl->irl_hcport != srv_minport) { + rc = ILB_STATUS_BADPORT; + break; + } + } else if (srv_maxport > srv_minport) { + /* server has a port range */ + if ((rl->irl_topo == ILB_TOPO_DSR) && + (r_maxport > r_minport)) { + if ((r_minport != srv_minport) || + (r_maxport != srv_maxport)) { + /* + * we have a DSR rule with a port range + * and its min and max port values + * does not meet that of server's + * - this is incompatible + */ + rc = ILB_STATUS_BADPORT; + break; + } + } else if ((rl->irl_topo == ILB_TOPO_DSR) && + (r_maxport == r_minport)) { + /* + * we have a DSR rule with a single + * port and a server with a port range + * - this is incompatible + */ + rc = ILB_STATUS_INVAL_SRVR; + break; + } else if (((rl->irl_topo == ILB_TOPO_NAT) || + (rl->irl_topo == ILB_TOPO_HALF_NAT)) && + (r_maxport > r_minport)) { + server_portrange = srv_maxport - srv_minport; + rule_portrange = r_maxport - r_minport; + if (rule_portrange != server_portrange) { + /* + * we have a NAT/Half-NAT rule with + * a port range and server with a port + * range and there is a mismatch in the + * sizes of the port ranges - this is + * incompatible + */ + rc = ILB_STATUS_INVAL_SRVR; + break; + } + } + if (rl->irl_hcpflag == ILB_HCI_PROBE_FIX && + (rl->irl_hcport > srv_maxport || + rl->irl_hcport < srv_minport)) { + rc = ILB_STATUS_BADPORT; + break; + } + } + } + + return (rc); +} + +void +i_setup_rule_hlist(void) +{ + list_create(&ilbd_rule_hlist, sizeof (ilbd_rule_t), + offsetof(ilbd_rule_t, irl_link)); +} + +ilb_status_t +i_ilbd_save_rule(ilbd_rule_t *irl, ilbd_scf_cmd_t scf_cmd) +{ + boolean_t enable = irl->irl_flags & ILB_FLAGS_RULE_ENABLED; + + switch (scf_cmd) { + case ILBD_SCF_CREATE: + return (ilbd_create_pg(ILBD_SCF_RULE, (void *)irl)); + case ILBD_SCF_DESTROY: + return (ilbd_destroy_pg(ILBD_SCF_RULE, irl->irl_name)); + case ILBD_SCF_ENABLE_DISABLE: + return (ilbd_change_prop(ILBD_SCF_RULE, irl->irl_name, + "status", &enable)); + default: + logdebug("i_ilbd_save_rule: invalid scf cmd %d", scf_cmd); + return (ILB_STATUS_INVAL_CMD); + } +} + +/* + * allocate a new daemon-specific rule from the "template" passed + * in in *r + */ +static ilbd_rule_t * +i_alloc_ilbd_rule(ilb_rule_info_t *r) +{ + ilbd_rule_t *rl; + + rl = calloc(sizeof (*rl), 1); + if (rl != NULL && r != NULL) + bcopy(r, &rl->irl_info, sizeof (*r)); + + return (rl); +} + +static ilbd_rule_t * +i_find_rule_byname(const char *name) +{ + ilbd_rule_t *rl; + + /* find position of rule in list */ + rl = list_head(&ilbd_rule_hlist); + while (rl != NULL && + strncmp(rl->irl_name, name, sizeof (rl->irl_name)) != 0) { + rl = list_next(&ilbd_rule_hlist, rl); + } + + return (rl); +} + +/* + * get exactly one rule (named in rl->irl_name) data from kernel + */ +static ilb_status_t +ilb_get_krule(ilb_rule_info_t *rl) +{ + ilb_status_t rc; + ilb_rule_cmd_t kcmd; + + kcmd.cmd = ILB_LIST_RULE; + (void) strlcpy(kcmd.name, rl->rl_name, sizeof (kcmd.name)); + kcmd.flags = 0; + + rc = do_ioctl(&kcmd, 0); + if (rc != ILB_STATUS_OK) + return (rc); + + rl->rl_flags = kcmd.flags; + rl->rl_ipversion = IPPROTO_2_AF(kcmd.ip_ver); + rl->rl_vip = kcmd.vip; + rl->rl_proto = kcmd.proto; + rl->rl_minport = kcmd.min_port; + rl->rl_maxport = kcmd.max_port; + rl->rl_algo = algo_impl2lib(kcmd.algo); + rl->rl_topo = topo_impl2lib(kcmd.topo); + rl->rl_stickymask = kcmd.sticky_mask; + rl->rl_nat_src_start = kcmd.nat_src_start; + rl->rl_nat_src_end = kcmd.nat_src_end; + (void) strlcpy(rl->rl_name, kcmd.name, sizeof (rl->rl_name)); + rl->rl_conndrain = kcmd.conn_drain_timeout; + rl->rl_nat_timeout = kcmd.nat_expiry; + rl->rl_sticky_timeout = kcmd.sticky_expiry; + + return (ILB_STATUS_OK); +} + +ilb_status_t +ilbd_retrieve_rule(ilbd_name_t rl_name, uint32_t *rbuf, size_t *rbufsz) +{ + ilbd_rule_t *irl = NULL; + ilb_status_t rc; + ilb_rule_info_t *rinfo; + + irl = i_find_rule_byname(rl_name); + if (irl == NULL) + return (ILB_STATUS_ENOENT); + + ilbd_reply_ok(rbuf, rbufsz); + rinfo = (ilb_rule_info_t *)&((ilb_comm_t *)rbuf)->ic_data; + bcopy(&irl->irl_info, rinfo, sizeof (*rinfo)); + + /* + * Check if the various timeout values are 0. If one is, get the + * default values from kernel. + */ + if (rinfo->rl_conndrain == 0 || rinfo->rl_nat_timeout == 0 || + rinfo->rl_sticky_timeout == 0) { + ilb_rule_info_t tmp_info; + + (void) strcpy(tmp_info.rl_name, rinfo->rl_name); + rc = ilb_get_krule(&tmp_info); + if (rc != ILB_STATUS_OK) + return (rc); + if (rinfo->rl_conndrain == 0) + rinfo->rl_conndrain = tmp_info.rl_conndrain; + if ((rinfo->rl_topo == ILB_TOPO_IMPL_NAT || + rinfo->rl_topo == ILB_TOPO_IMPL_HALF_NAT) && + rinfo->rl_nat_timeout == 0) { + rinfo->rl_nat_timeout = tmp_info.rl_nat_timeout; + } + if ((rinfo->rl_flags & ILB_FLAGS_RULE_STICKY) && + rinfo->rl_sticky_timeout == 0) { + rinfo->rl_sticky_timeout = tmp_info.rl_sticky_timeout; + } + } + *rbufsz += sizeof (ilb_rule_info_t); + + return (ILB_STATUS_OK); +} + +static ilb_status_t +ilbd_destroy_one_rule(ilbd_rule_t *irl) +{ + ilb_status_t rc; + ilb_name_cmd_t kcmd; + + /* + * as far as talking to the kernel is concerned, "all rules" + * is handled in one go somewhere else, so we only + * tell the kernel about single rules here. + */ + if ((irl->irl_flags & ILB_FLAGS_RULE_ALLRULES) == 0) { + kcmd.cmd = ILB_DESTROY_RULE; + (void) strlcpy(kcmd.name, irl->irl_name, sizeof (kcmd.name)); + kcmd.flags = 0; + + rc = do_ioctl(&kcmd, 0); + if (rc != ILB_STATUS_OK) + return (rc); + + } + list_remove(&irl->irl_sg->isg_rulelist, irl); + list_remove(&ilbd_rule_hlist, irl); + + /* + * When dissociating a rule, only two errors can happen. The hc + * name is incorrect or the rule is not associated with the hc + * object. Both should not happen.... The check is for debugging + * purpose. + */ + if (RULE_HAS_HC(irl) && (rc = ilbd_hc_dissociate_rule(irl)) != + ILB_STATUS_OK) { + logerr("ilbd_destroy_one_rule: cannot " + "dissociate %s from hc object %s: %d", + irl->irl_name, irl->irl_hcname, rc); + } + + rc = i_ilbd_save_rule(irl, ILBD_SCF_DESTROY); + if (rc != ILB_STATUS_OK) + logdebug("ilbd_destroy_rule: save rule failed"); + + free(irl); + return (rc); +} + +/* + * the following two functions are the other's opposite, and can + * call into each other for roll back purposes in case of error. + * To avoid endless recursion, the 'is_rollback' parameter must be + * set to B_TRUE in the roll back case. + */ +static ilb_status_t +ilbd_enable_one_rule(ilbd_rule_t *irl, boolean_t is_rollback) +{ + ilb_status_t rc = ILB_STATUS_OK; + ilb_name_cmd_t kcmd; + + /* no use sending a no-op to the kernel */ + if ((irl->irl_flags & ILB_FLAGS_RULE_ENABLED) != 0) + return (ILB_STATUS_OK); + + irl->irl_flags |= ILB_FLAGS_RULE_ENABLED; + + /* "all rules" is handled in one go somewhere else, not here */ + if ((irl->irl_flags & ILB_FLAGS_RULE_ALLRULES) == 0) { + kcmd.cmd = ILB_ENABLE_RULE; + (void) strlcpy(kcmd.name, irl->irl_name, sizeof (kcmd.name)); + kcmd.flags = 0; + + rc = do_ioctl(&kcmd, 0); + if (rc != ILB_STATUS_OK) + return (rc); + } + if (RULE_HAS_HC(irl) && (rc = ilbd_hc_enable_rule(irl)) != + ILB_STATUS_OK) { + /* Undo the kernel work */ + kcmd.cmd = ILB_DISABLE_RULE; + /* Cannot do much if ioctl fails... */ + (void) do_ioctl(&kcmd, 0); + return (rc); + } + + if (!is_rollback) { + if (rc == ILB_STATUS_OK) + rc = i_ilbd_save_rule(irl, ILBD_SCF_ENABLE_DISABLE); + if (rc != ILB_STATUS_OK) + /* ignore rollback return code */ + (void) ilbd_disable_one_rule(irl, B_TRUE); + } + + return (rc); +} + +static ilb_status_t +ilbd_disable_one_rule(ilbd_rule_t *irl, boolean_t is_rollback) +{ + ilb_status_t rc = ILB_STATUS_OK; + ilb_name_cmd_t kcmd; + + /* no use sending a no-op to the kernel */ + if ((irl->irl_flags & ILB_FLAGS_RULE_ENABLED) == 0) + return (ILB_STATUS_OK); + + irl->irl_flags &= ~ILB_FLAGS_RULE_ENABLED; + + /* "all rules" is handled in one go somewhere else, not here */ + if ((irl->irl_flags & ILB_FLAGS_RULE_ALLRULES) == 0) { + kcmd.cmd = ILB_DISABLE_RULE; + (void) strlcpy(kcmd.name, irl->irl_name, sizeof (kcmd.name)); + kcmd.flags = 0; + + rc = do_ioctl(&kcmd, 0); + if (rc != ILB_STATUS_OK) + return (rc); + } + + if (RULE_HAS_HC(irl) && (rc = ilbd_hc_disable_rule(irl)) != + ILB_STATUS_OK) { + /* Undo the kernel work */ + kcmd.cmd = ILB_ENABLE_RULE; + /* Cannot do much if ioctl fails... */ + (void) do_ioctl(&kcmd, 0); + return (rc); + } + + if (!is_rollback) { + if (rc == ILB_STATUS_OK) + rc = i_ilbd_save_rule(irl, ILBD_SCF_ENABLE_DISABLE); + if (rc != ILB_STATUS_OK) + /* ignore rollback return code */ + (void) ilbd_enable_one_rule(irl, B_TRUE); + } + + return (rc); +} + +/* + * Generates an audit record for a supplied rule name + * Used for enable_rule, disable_rule, delete_rule, + * and create_rule subcommands + */ +static void +ilbd_audit_rule_event(const char *audit_rule_name, + ilb_rule_info_t *rlinfo, ilbd_cmd_t cmd, ilb_status_t rc, + ucred_t *ucredp) +{ + adt_session_data_t *ah; + adt_event_data_t *event; + au_event_t flag; + int scf_val_len = ILBD_MAX_VALUE_LEN; + char aobuf[scf_val_len]; /* algo:topo */ + char pbuf[scf_val_len]; /* protocol */ + char pxbuf[scf_val_len]; /* prxy src range */ + char hcpbuf[scf_val_len]; /* hcport: num or "ANY" */ + char valstr1[scf_val_len]; + char valstr2[scf_val_len]; + char addrstr_buf[INET6_ADDRSTRLEN]; + char addrstr_buf1[INET6_ADDRSTRLEN]; + int audit_error; + + if ((ucredp == NULL) && (cmd == ILBD_CREATE_RULE)) { + /* + * we came here from the path where ilbd incorporates + * the configuration that is listed in SCF : + * i_ilbd_read_config->ilbd_walk_rule_pgs-> + * ->ilbd_scf_instance_walk_pg->ilbd_create_rule + * We skip auditing in that case + */ + return; + } + if (adt_start_session(&ah, NULL, 0) != 0) { + logerr("ilbd_audit_rule_event: adt_start_session failed"); + exit(EXIT_FAILURE); + } + if (adt_set_from_ucred(ah, ucredp, ADT_NEW) != 0) { + (void) adt_end_session(ah); + logerr("ilbd_audit_rule_event: adt_set_from_ucred failed"); + exit(EXIT_FAILURE); + } + if (cmd == ILBD_ENABLE_RULE) + flag = ADT_ilb_enable_rule; + else if (cmd == ILBD_DISABLE_RULE) + flag = ADT_ilb_disable_rule; + else if (cmd == ILBD_DESTROY_RULE) + flag = ADT_ilb_delete_rule; + else if (cmd == ILBD_CREATE_RULE) + flag = ADT_ilb_create_rule; + + if ((event = adt_alloc_event(ah, flag)) == NULL) { + logerr("ilbd_audit_rule_event: adt_alloc_event failed"); + exit(EXIT_FAILURE); + } + + (void) memset((char *)event, 0, sizeof (adt_event_data_t)); + + switch (cmd) { + case ILBD_DESTROY_RULE: + event->adt_ilb_delete_rule.auth_used = NET_ILB_CONFIG_AUTH; + event->adt_ilb_delete_rule.rule_name = (char *)audit_rule_name; + break; + case ILBD_ENABLE_RULE: + event->adt_ilb_enable_rule.auth_used = NET_ILB_ENABLE_AUTH; + event->adt_ilb_enable_rule.rule_name = (char *)audit_rule_name; + break; + case ILBD_DISABLE_RULE: + event->adt_ilb_disable_rule.auth_used = NET_ILB_ENABLE_AUTH; + event->adt_ilb_disable_rule.rule_name = (char *)audit_rule_name; + break; + case ILBD_CREATE_RULE: + aobuf[0] = '\0'; + pbuf[0] = '\0'; + valstr1[0] = '\0'; + valstr2[0] = '\0'; + hcpbuf[0] = '\0'; + + event->adt_ilb_create_rule.auth_used = NET_ILB_CONFIG_AUTH; + + /* Fill in virtual IP address */ + addrstr_buf[0] = '\0'; + ilbd_addr2str(&rlinfo->rl_vip, addrstr_buf, + sizeof (addrstr_buf)); + event->adt_ilb_create_rule.virtual_ipaddress = addrstr_buf; + + /* Fill in port - could be a single value or a range */ + event->adt_ilb_create_rule.min_port = ntohs(rlinfo->rl_minport); + if (ntohs(rlinfo->rl_maxport) > ntohs(rlinfo->rl_minport)) { + /* port range */ + event->adt_ilb_create_rule.max_port = + ntohs(rlinfo->rl_maxport); + } else { + /* in audit record, max=min when single port */ + event->adt_ilb_create_rule.max_port = + ntohs(rlinfo->rl_minport); + } + + /* + * Fill in protocol - if user does not specify it, + * its TCP by default + */ + if (rlinfo->rl_proto == IPPROTO_UDP) + (void) snprintf(pbuf, sizeof (pbuf), "UDP"); + else + (void) snprintf(pbuf, sizeof (pbuf), "TCP"); + event->adt_ilb_create_rule.protocol = pbuf; + + /* Fill in algorithm and operation type */ + ilbd_algo_to_str(rlinfo->rl_algo, valstr1); + ilbd_topo_to_str(rlinfo->rl_topo, valstr2); + (void) snprintf(aobuf, sizeof (aobuf), "%s:%s", + valstr1, valstr2); + event->adt_ilb_create_rule.algo_optype = aobuf; + + /* Fill in proxy-src for the NAT case */ + if (rlinfo->rl_topo == ILB_TOPO_NAT) { + ilbd_addr2str(&rlinfo->rl_nat_src_start, addrstr_buf, + sizeof (addrstr_buf)); + if (&rlinfo->rl_nat_src_end == 0) { + /* Single address */ + (void) snprintf(pxbuf, sizeof (pxbuf), + "%s", addrstr_buf); + } else { + /* address range */ + ilbd_addr2str(&rlinfo->rl_nat_src_end, + addrstr_buf1, sizeof (addrstr_buf1)); + (void) snprintf(pxbuf, sizeof (pxbuf), + "%s-%s", addrstr_buf, addrstr_buf1); + } + event->adt_ilb_create_rule.proxy_src = pxbuf; + } + + /* + * Fill in pmask if user has specified one - 0 means + * no persistence + */ + valstr1[0] = '\0'; + ilbd_ip_to_str(rlinfo->rl_ipversion, &rlinfo->rl_stickymask, + valstr1); + event->adt_ilb_create_rule.persist_mask = valstr1; + + /* If there is a hcname */ + if (rlinfo->rl_hcname[0] != '\0') + event->adt_ilb_create_rule.hcname = rlinfo->rl_hcname; + + /* Fill in hcport */ + if (rlinfo->rl_hcpflag == ILB_HCI_PROBE_FIX) { + /* hcport is specified by user */ + (void) snprintf(hcpbuf, sizeof (hcpbuf), "%d", + rlinfo->rl_hcport); + event->adt_ilb_create_rule.hcport = hcpbuf; + } else if (rlinfo->rl_hcpflag == ILB_HCI_PROBE_ANY) { + /* user has specified "ANY" */ + (void) snprintf(hcpbuf, sizeof (hcpbuf), "ANY"); + event->adt_ilb_create_rule.hcport = hcpbuf; + } + + /* + * Fill out the conndrain, nat_timeout and persist_timeout + * If the user does not specify them, the default value + * is set in the kernel. Userland does not know what + * the values are. So if the user + * does not specify these values they will show up as + * 0 in the audit record. + */ + event->adt_ilb_create_rule.conndrain_timeout = + rlinfo->rl_conndrain; + event->adt_ilb_create_rule.nat_timeout = + rlinfo->rl_nat_timeout; + event->adt_ilb_create_rule.persist_timeout = + rlinfo->rl_sticky_timeout; + + /* Fill out servergroup and rule name */ + event->adt_ilb_create_rule.server_group = rlinfo->rl_sgname; + event->adt_ilb_create_rule.rule_name = rlinfo->rl_name; + break; + } + if (rc == ILB_STATUS_OK) { + if (adt_put_event(event, ADT_SUCCESS, ADT_SUCCESS) != 0) { + logerr("ilbd_audit_rule_event:adt_put_event failed"); + exit(EXIT_FAILURE); + } + } else { + audit_error = ilberror2auditerror(rc); + if (adt_put_event(event, ADT_FAILURE, audit_error) != 0) { + logerr("ilbd_audit_rule_event: adt_put_event failed"); + exit(EXIT_FAILURE); + } + } + adt_free_event(event); + (void) adt_end_session(ah); +} + +static ilb_status_t +i_ilbd_action_switch(ilbd_rule_t *irl, ilbd_cmd_t cmd, + boolean_t is_rollback, ucred_t *ucredp) +{ + ilb_status_t rc; + + switch (cmd) { + case ILBD_DESTROY_RULE: + rc = ilbd_destroy_one_rule(irl); + if (!is_rollback) { + ilbd_audit_rule_event(irl->irl_name, NULL, + cmd, rc, ucredp); + } + return (rc); + case ILBD_ENABLE_RULE: + rc = ilbd_enable_one_rule(irl, is_rollback); + if (!is_rollback) { + ilbd_audit_rule_event(irl->irl_name, NULL, cmd, + rc, ucredp); + } + return (rc); + case ILBD_DISABLE_RULE: + rc = ilbd_disable_one_rule(irl, is_rollback); + if (!is_rollback) { + ilbd_audit_rule_event(irl->irl_name, NULL, cmd, + rc, ucredp); + } + return (rc); + } + return (ILB_STATUS_INVAL_CMD); +} + +static ilb_cmd_t +i_ilbd2ilb_cmd(ilbd_cmd_t c) +{ + ilb_cmd_t r; + + switch (c) { + case ILBD_CREATE_RULE: + r = ILB_CREATE_RULE; + break; + case ILBD_DESTROY_RULE: + r = ILB_DESTROY_RULE; + break; + case ILBD_ENABLE_RULE: + r = ILB_ENABLE_RULE; + break; + case ILBD_DISABLE_RULE: + r = ILB_DISABLE_RULE; + break; + } + return (r); +} + +static ilbd_cmd_t +get_undo_cmd(ilbd_cmd_t cmd) +{ + ilbd_cmd_t u_cmd; + + switch (cmd) { + case ILBD_DESTROY_RULE: + u_cmd = ILBD_BAD_CMD; + break; + case ILBD_ENABLE_RULE: + u_cmd = ILBD_DISABLE_RULE; + break; + case ILBD_DISABLE_RULE: + u_cmd = ILBD_ENABLE_RULE; + break; + } + + return (u_cmd); +} + +static ilb_status_t +i_ilbd_rule_action(const char *rule_name, const struct passwd *ps, + ilbd_cmd_t cmd, ucred_t *ucredp) +{ + ilbd_rule_t *irl, *irl_next; + boolean_t is_all_rules = B_FALSE; + ilb_status_t rc = ILB_STATUS_OK; + ilb_name_cmd_t kcmd; + ilbd_cmd_t u_cmd; + char rulename[ILB_NAMESZ]; + + if (ps != NULL) { + if ((cmd == ILBD_ENABLE_RULE) || (cmd == ILBD_DISABLE_RULE)) + rc = ilbd_check_client_enable_auth(ps); + else + rc = ilbd_check_client_config_auth(ps); + /* generate the audit record before bailing out */ + if (rc != ILB_STATUS_OK) { + if (rule_name != '\0') { + ilbd_audit_rule_event(rule_name, NULL, + cmd, rc, ucredp); + } else { + (void) snprintf(rulename, sizeof (rulename), + "all"); + ilbd_audit_rule_event(rulename, NULL, cmd, rc, + ucredp); + } + goto out; + } + } + is_all_rules = rule_name[0] == 0; + + /* just one rule */ + if (!is_all_rules) { + irl = i_find_rule_byname(rule_name); + if (irl == NULL) { + rc = ILB_STATUS_ENORULE; + ilbd_audit_rule_event(rule_name, NULL, cmd, rc, ucredp); + goto out; + } + /* auditing will be done by i_ilbd_action_switch() */ + rc = i_ilbd_action_switch(irl, cmd, B_FALSE, ucredp); + goto out; + } + + /* all rules: first tell the kernel, then walk the daemon's list */ + kcmd.cmd = i_ilbd2ilb_cmd(cmd); + kcmd.flags = ILB_RULE_ALLRULES; + + rc = do_ioctl(&kcmd, 0); + if (rc != ILB_STATUS_OK) { + (void) snprintf(rulename, sizeof (rulename), "all"); + ilbd_audit_rule_event(rulename, NULL, cmd, rc, ucredp); + goto out; + } + + irl = list_head(&ilbd_rule_hlist); + while (irl != NULL) { + irl_next = list_next(&ilbd_rule_hlist, irl); + irl->irl_flags |= ILB_FLAGS_RULE_ALLRULES; + /* auditing will be done by i_ilbd_action_switch() */ + rc = i_ilbd_action_switch(irl, cmd, B_FALSE, ucredp); + irl->irl_flags &= ~ILB_FLAGS_RULE_ALLRULES; + if (rc != ILB_STATUS_OK) + goto rollback_list; + irl = irl_next; + } + return (rc); + +rollback_list: + u_cmd = get_undo_cmd(cmd); + if (u_cmd == ILBD_BAD_CMD) + return (rc); + + if (is_all_rules) { + kcmd.cmd = i_ilbd2ilb_cmd(u_cmd); + (void) do_ioctl(&kcmd, 0); + } + /* current list element failed, so we start with previous one */ + irl = list_prev(&ilbd_rule_hlist, irl); + while (irl != NULL) { + if (is_all_rules) + irl->irl_flags |= ILB_FLAGS_RULE_ALLRULES; + + /* + * When the processing of a command consists of + * multiple sequential steps, and one of them fails, + * ilbd performs rollback to undo the steps taken before the + * failing step. Since ilbd is initiating these steps + * there is not need to audit them. + */ + rc = i_ilbd_action_switch(irl, u_cmd, B_TRUE, NULL); + irl->irl_flags &= ~ILB_FLAGS_RULE_ALLRULES; + + irl = list_prev(&ilbd_rule_hlist, irl); + } +out: + return (rc); +} + +ilb_status_t +ilbd_destroy_rule(ilbd_name_t rule_name, const struct passwd *ps, + ucred_t *ucredp) +{ + return (i_ilbd_rule_action(rule_name, ps, ILBD_DESTROY_RULE, ucredp)); +} + +ilb_status_t +ilbd_enable_rule(ilbd_name_t rule_name, const struct passwd *ps, + ucred_t *ucredp) +{ + return (i_ilbd_rule_action(rule_name, ps, ILBD_ENABLE_RULE, ucredp)); + +} + +ilb_status_t +ilbd_disable_rule(ilbd_name_t rule_name, const struct passwd *ps, + ucred_t *ucredp) +{ + return (i_ilbd_rule_action(rule_name, ps, ILBD_DISABLE_RULE, ucredp)); +} + +/* + * allocate storage for a kernel rule command and fill from + * "template" irl, if non-NULL + */ +static ilb_rule_cmd_t * +i_alloc_kernel_rule_cmd(ilbd_rule_t *irl) +{ + ilb_rule_cmd_t *kcmd; + + kcmd = (ilb_rule_cmd_t *)malloc(sizeof (*kcmd)); + if (kcmd == NULL) + return (kcmd); + + bzero(kcmd, sizeof (*kcmd)); + + if (irl != NULL) { + kcmd->flags = irl->irl_flags; + kcmd->ip_ver = AF_2_IPPROTO(irl->irl_ipversion); + kcmd->vip = irl->irl_vip; + kcmd->proto = irl->irl_proto; + kcmd->min_port = irl->irl_minport; + kcmd->max_port = irl->irl_maxport; + kcmd->algo = algo_lib2impl(irl->irl_algo); + kcmd->topo = topo_lib2impl(irl->irl_topo); + kcmd->sticky_mask = irl->irl_stickymask; + kcmd->nat_src_start = irl->irl_nat_src_start; + kcmd->nat_src_end = irl->irl_nat_src_end; + kcmd->conn_drain_timeout = irl->irl_conndrain; + kcmd->nat_expiry = irl->irl_nat_timeout; + kcmd->sticky_expiry = irl->irl_sticky_timeout; + (void) strlcpy(kcmd->name, irl->irl_name, + sizeof (kcmd->name)); + } + return (kcmd); +} + +/* + * ncount is the next to be used index into (*kcmdp)->servers + */ +static ilb_status_t +adjust_srv_info_cmd(ilb_servers_info_cmd_t **kcmdp, int index) +{ + ilb_servers_info_cmd_t *kcmd = *kcmdp; + size_t sz; + + if (kcmd != NULL && kcmd->num_servers > index + 1) + return (ILB_STATUS_OK); + + /* + * the first ilb_server_info_t is part of *kcmd, so + * by using index (which is one less than the total needed) here, + * we allocate exactly the amount we need. + */ + sz = sizeof (*kcmd) + (index * sizeof (ilb_server_info_t)); + kcmd = (ilb_servers_info_cmd_t *)realloc(kcmd, sz); + if (kcmd == NULL) + return (ILB_STATUS_ENOMEM); + + /* + * we don't count the slot we newly allocated yet. + */ + kcmd->num_servers = index; + *kcmdp = kcmd; + + return (ILB_STATUS_OK); +} + +/* + * this function adds all servers in srvlist to the kernel(!) rule + * the name of which is passed as argument. + */ +static ilb_status_t +i_update_ksrv_rules(char *name, ilbd_sg_t *sg, ilbd_rule_t *rl) +{ + ilb_status_t rc; + ilbd_srv_t *srvp; + ilb_servers_info_cmd_t *kcmd = NULL; + int i; + + /* + * If the servergroup doesn't have any servers associated with + * it yet, there's nothing more to do here. + */ + if (sg->isg_srvcount == 0) + return (ILB_STATUS_OK); + + /* + * walk the list of servers attached to this SG + */ + srvp = list_head(&sg->isg_srvlist); + for (i = 0; srvp != NULL; srvp = list_next(&sg->isg_srvlist, srvp)) { + rc = adjust_srv_info_cmd(&kcmd, i); + if (rc != ILB_STATUS_OK) + return (rc); + + ILB_SGSRV_2_KSRV(&srvp->isv_srv, &kcmd->servers[i]); + /* + * "no port" means "copy rule's port" (for kernel rule) + */ + if (kcmd->servers[i].min_port == 0) { + kcmd->servers[i].min_port = rl->irl_minport; + kcmd->servers[i].max_port = rl->irl_maxport; + } + i++; + } + + kcmd->cmd = ILB_ADD_SERVERS; + kcmd->num_servers = i; + (void) strlcpy(kcmd->name, name, sizeof (kcmd->name)); + + rc = do_ioctl(kcmd, 0); + if (rc != ILB_STATUS_OK) + return (rc); + + for (i = 0; i < kcmd->num_servers; i++) { + int e; + + if ((e = kcmd->servers[i].err) != 0) { + logerr("i_update_ksrv_rules " + "ioctl indicates failure: %s", strerror(e)); + rc = ilb_map_errno2ilbstat(e); + /* + * if adding even a single server failed, we need to + * roll back the whole wad. We ignore any errors and + * return the one that was returned by the first ioctl. + */ + kcmd->cmd = ILB_DEL_SERVERS; + (void) do_ioctl(kcmd, 0); + return (rc); + } + } + + return (ILB_STATUS_OK); +} + +/* convert a struct in6_addr to valstr */ +void +ilbd_ip_to_str(uint16_t ipversion, struct in6_addr *addr, char *valstr) +{ + size_t vallen; + ilb_ip_addr_t ipaddr; + void *addrptr; + + vallen = (ipversion == AF_INET) ? INET_ADDRSTRLEN : INET6_ADDRSTRLEN; + + IP_COPY_IMPL_2_CLI(addr, &ipaddr); + addrptr = (ipversion == AF_INET) ? + (void *)&ipaddr.ia_v4 : (void *)&ipaddr.ia_v6; + if (inet_ntop(ipversion, (void *)addrptr, valstr, vallen == NULL)) + logerr("ilbd_ip_to_str: inet_ntop failed"); + return; + +} + +ilb_status_t +ilbd_create_rule(ilb_rule_info_t *rl, int ev_port, + const struct passwd *ps, ucred_t *ucredp) +{ + ilb_status_t rc; + ilbd_rule_t *irl = NULL; + ilbd_sg_t *sg; + ilb_rule_cmd_t *kcmd = NULL; + + if (ps != NULL) { + if ((rc = ilbd_check_client_config_auth(ps)) != ILB_STATUS_OK) + goto out; + } + + if (i_find_rule_byname(rl->rl_name) != NULL) { + logdebug("ilbd_create_rule: rule %s" + " already exists", rl->rl_name); + ilbd_audit_rule_event(NULL, rl, ILBD_CREATE_RULE, + ILB_STATUS_DUP_RULE, ucredp); + return (ILB_STATUS_DUP_RULE); + } + + sg = i_find_sg_byname(rl->rl_sgname); + if (sg == NULL) { + logdebug("ilbd_create_rule: rule %s uses non-existent" + " servergroup name %s", rl->rl_name, rl->rl_sgname); + ilbd_audit_rule_event(NULL, rl, ILBD_CREATE_RULE, + ILB_STATUS_SGUNAVAIL, ucredp); + return (ILB_STATUS_SGUNAVAIL); + } + + if ((rc = ilbd_sg_check_rule_port(sg, rl)) != ILB_STATUS_OK) { + ilbd_audit_rule_event(NULL, rl, ILBD_CREATE_RULE, rc, ucredp); + return (rc); + } + + /* allocs and copies contents of arg (if != NULL) into new rule */ + irl = i_alloc_ilbd_rule(rl); + if (irl == NULL) { + ilbd_audit_rule_event(NULL, rl, ILBD_CREATE_RULE, + ILB_STATUS_ENOMEM, ucredp); + return (ILB_STATUS_ENOMEM); + } + + /* make sure rule's IPversion (via vip) and SG's match */ + if (sg->isg_srvcount > 0) { + ilbd_srv_t *srv = list_head(&sg->isg_srvlist); + int32_t r_af = rl->rl_ipversion; + int32_t s_af = GET_AF(&srv->isv_addr); + + if (r_af != s_af) { + logdebug("address family mismatch with servergroup"); + rc = ILB_STATUS_MISMATCHSG; + goto out; + } + } + irl->irl_sg = sg; + + /* Try associating the rule with the given hc oject. */ + if (RULE_HAS_HC(irl)) { + if ((rc = ilbd_hc_associate_rule(irl, ev_port)) != + ILB_STATUS_OK) + goto out; + } + + /* + * checks are done, now: + * 1. create rule in kernel + * 2. tell it about the backend server (which we maintain in SG) + * 3. attach the rule in memory + */ + /* 1. */ + /* allocs and copies contents of arg (if != NULL) into new rule */ + kcmd = i_alloc_kernel_rule_cmd(irl); + if (kcmd == NULL) { + rc = ILB_STATUS_ENOMEM; + goto rollback_hc; + } + kcmd->cmd = ILB_CREATE_RULE; + + rc = do_ioctl(kcmd, 0); + if (rc != ILB_STATUS_OK) + goto rollback_kcmd; + + /* 2. */ + rc = i_update_ksrv_rules(kcmd->name, sg, irl); + if (rc != ILB_STATUS_OK) + goto rollback_kcmd; + + /* 3. */ + (void) i_attach_rule2sg(sg, irl); + list_insert_tail(&ilbd_rule_hlist, irl); + + if (ps != NULL) { + rc = i_ilbd_save_rule(irl, ILBD_SCF_CREATE); + if (rc != ILB_STATUS_OK) + goto rollback_rule; + } + + free(kcmd); + ilbd_audit_rule_event(NULL, rl, ILBD_CREATE_RULE, + ILB_STATUS_OK, ucredp); + return (ILB_STATUS_OK); + +rollback_rule: + /* + * ilbd_destroy_one_rule() also frees irl, as well as dissociate + * rule and HC, so all we need to do afterwards is free the kcmd + * and return. + */ + (void) ilbd_destroy_one_rule(irl); + ilbd_audit_rule_event(NULL, rl, ILBD_CREATE_RULE, rc, ucredp); + free(kcmd); + return (rc); + +rollback_kcmd: + free(kcmd); +rollback_hc: + /* Cannot fail since the rule is just associated with the hc object. */ + if (RULE_HAS_HC(irl)) + (void) ilbd_hc_dissociate_rule(irl); +out: + ilbd_audit_rule_event(NULL, rl, ILBD_CREATE_RULE, rc, ucredp); + free(irl); + return (rc); +} + +static uint32_t +i_flags_d2k(int f) +{ + uint32_t r = 0; + + if (ILB_IS_SRV_ENABLED(f)) + r |= ILB_SERVER_ENABLED; + /* more as they are defined */ + + return (r); +} + +/* + * walk the list of rules and add srv to the *kernel* rule + * (this is a list of rules hanging off of a server group) + */ +ilb_status_t +i_add_srv2krules(list_t *rlist, ilb_sg_srv_t *srv, int ev_port) +{ + ilb_status_t rc = ILB_STATUS_OK; + ilbd_rule_t *rl, *del_rl; + ilb_servers_info_cmd_t kcmd; + ilb_servers_cmd_t del_kcmd; + + kcmd.cmd = ILB_ADD_SERVERS; + kcmd.num_servers = 1; + kcmd.servers[0].err = 0; + kcmd.servers[0].addr = srv->sgs_addr; + kcmd.servers[0].flags = i_flags_d2k(srv->sgs_flags); + (void) strlcpy(kcmd.servers[0].name, srv->sgs_srvID, + sizeof (kcmd.servers[0].name)); + + /* + * a note about rollback: since we need to start rollback with the + * current list element in some case, and with the previous one + * in others, we must "go back" in this latter case before + * we jump to the rollback code. + */ + for (rl = list_head(rlist); rl != NULL; rl = list_next(rlist, rl)) { + (void) strlcpy(kcmd.name, rl->irl_name, sizeof (kcmd.name)); + /* + * sgs_minport == 0 means "no port specified"; this + * indicates that the server matches anything the rule + * provides. + * NOTE: this can be different for different rules + * using the same server group, therefore we don't modify + * this information in the servergroup, but *only* in + * the kernel's rule. + */ + if (srv->sgs_minport == 0) { + kcmd.servers[0].min_port = rl->irl_minport; + kcmd.servers[0].max_port = rl->irl_maxport; + } else { + kcmd.servers[0].min_port = srv->sgs_minport; + kcmd.servers[0].max_port = srv->sgs_maxport; + } + rc = do_ioctl((void *)&kcmd, 0); + if (rc != ILB_STATUS_OK) { + logdebug("i_add_srv2krules: do_ioctl call failed"); + del_rl = list_prev(rlist, rl); + goto rollback; + } + + /* + * if ioctl() returns != 0, it doesn't perform the copyout + * necessary to indicate *which* server failed (we could be + * adding more than one); therefore we must check this + * 'err' field even if ioctl() returns 0. + */ + if (kcmd.servers[0].err != 0) { + logerr("i_add_srv2krules: SIOCILB ioctl returned" + " error %d", kcmd.servers[0].err); + rc = ilb_map_errno2ilbstat(kcmd.servers[0].err); + del_rl = list_prev(rlist, rl); + goto rollback; + } + if (RULE_HAS_HC(rl)) { + if ((rc = ilbd_hc_add_server(rl, srv, ev_port)) != + ILB_STATUS_OK) { + logerr("i_add_srv2krules: cannot start timer " + " for rules %s server %s", rl->irl_name, + srv->sgs_srvID); + + del_rl = rl; + goto rollback; + } + } + } + + return (rc); + +rollback: + /* + * this is almost, but not quite, the same as i_rem_srv_frm_krules() + * therefore we keep it seperate. + */ + del_kcmd.cmd = ILB_DEL_SERVERS; + del_kcmd.num_servers = 1; + del_kcmd.servers[0].addr = srv->sgs_addr; + while (del_rl != NULL) { + if (RULE_HAS_HC(del_rl)) + (void) ilbd_hc_del_server(del_rl, srv); + (void) strlcpy(del_kcmd.name, del_rl->irl_name, + sizeof (del_kcmd.name)); + (void) do_ioctl((void *)&del_kcmd, 0); + del_rl = list_prev(rlist, del_rl); + } + + return (rc); +} + +/* + * ev_port is only used for rollback purposes in this function + */ +ilb_status_t +i_rem_srv_frm_krules(list_t *rlist, ilb_sg_srv_t *srv, int ev_port) +{ + ilb_status_t rc = ILB_STATUS_OK; + ilbd_rule_t *rl, *add_rl; + ilb_servers_cmd_t kcmd; + ilb_servers_info_cmd_t add_kcmd; + + kcmd.cmd = ILB_DEL_SERVERS; + kcmd.num_servers = 1; + kcmd.servers[0].err = 0; + kcmd.servers[0].addr = srv->sgs_addr; + + for (rl = list_head(rlist); rl != NULL; rl = list_next(rlist, rl)) { + (void) strlcpy(kcmd.name, rl->irl_name, sizeof (kcmd.name)); + rc = do_ioctl((void *)&kcmd, 0); + if (rc != ILB_STATUS_OK) { + logdebug("i_rem_srv_frm_krules: do_ioctl" + "call failed"); + add_rl = list_prev(rlist, rl); + goto rollback; + } + /* + * if ioctl() returns != 0, it doesn't perform the copyout + * necessary to indicate *which* server failed (we could be + * removing more than one); therefore we must check this + * 'err' field even if ioctl() returns 0. + */ + if (kcmd.servers[0].err != 0) { + logerr("i_rem_srv_frm_krules: SIOCILB ioctl" + " returned error %s", + strerror(kcmd.servers[0].err)); + rc = ilb_map_errno2ilbstat(kcmd.servers[0].err); + add_rl = list_prev(rlist, rl); + goto rollback; + } + if (RULE_HAS_HC(rl) && + (rc = ilbd_hc_del_server(rl, srv)) != ILB_STATUS_OK) { + logerr("i_rem_srv_frm_krules: cannot delete " + "timer for rules %s server %s", rl->irl_name, + srv->sgs_srvID); + add_rl = rl; + goto rollback; + } + } + + return (rc); + +rollback: + /* Don't do roll back if ev_port == -1. */ + if (ev_port == -1) + return (rc); + + add_kcmd.cmd = ILB_ADD_SERVERS; + add_kcmd.num_servers = 1; + add_kcmd.servers[0].err = 0; + add_kcmd.servers[0].addr = srv->sgs_addr; + add_kcmd.servers[0].flags = i_flags_d2k(srv->sgs_flags); + (void) strlcpy(add_kcmd.servers[0].name, srv->sgs_srvID, + sizeof (add_kcmd.servers[0].name)); + while (add_rl != NULL) { + if (srv->sgs_minport == 0) { + add_kcmd.servers[0].min_port = add_rl->irl_minport; + add_kcmd.servers[0].max_port = add_rl->irl_maxport; + } else { + add_kcmd.servers[0].min_port = srv->sgs_minport; + add_kcmd.servers[0].max_port = srv->sgs_maxport; + } + if (RULE_HAS_HC(add_rl)) + (void) ilbd_hc_add_server(add_rl, srv, ev_port); + (void) strlcpy(add_kcmd.name, add_rl->irl_name, + sizeof (add_kcmd.name)); + (void) do_ioctl((void *)&add_kcmd, 0); + add_rl = list_prev(rlist, add_rl); + } + + return (rc); +} diff --git a/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_scf.c b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_scf.c new file mode 100644 index 0000000000..6dcd03a24a --- /dev/null +++ b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_scf.c @@ -0,0 +1,1692 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <strings.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <sys/list.h> +#include <libilb.h> +#include <assert.h> +#include <libscf.h> +#include "libilb_impl.h" +#include "ilbd.h" + +#define ILBD_PG_NAME_RULE "rule_" +#define ILBD_PG_NAME_SG "sg_" +#define ILBD_PG_NAME_HC "hc_" +#define ILBD_SVC_FMRI "svc:/network/loadbalancer/ilb" +#define ILBD_INST_NAME "default" + +typedef enum { + ILBD_RULE_STATUS, + ILBD_RULE_VIP, + ILBD_RULE_PROTO, + ILBD_RULE_PORT, + ILBD_RULE_ALGO, + ILBD_RULE_TOPO, + ILBD_RULE_NAT_STR, + ILBD_RULE_NAT_END, + ILBD_RULE_STI_MASK, + ILBD_RULE_SGNAME, + ILBD_RULE_HCNAME, + ILBD_RULE_HCPORT, + ILBD_RULE_HCPFLAG, + ILBD_RULE_DRAINTIME, + ILBD_RULE_NAT_TO, + ILBD_RULE_PERS_TO, + + ILBD_SG_SERVER, + + ILBD_HC_TEST, + ILBD_HC_TIMEOUT, + ILBD_HC_INTERVAL, + ILBD_HC_DEF_PING, + ILBD_HC_COUNT, + + ILBD_VAR_INVALID +} ilbd_var_type_t; + +typedef struct prop_tbl_entry { + ilbd_var_type_t val_type; + const char *scf_propname; + scf_type_t scf_proptype; +} prop_tbl_entry_t; + +/* + * this table contains a map of all SCF properties, including rules, + * servergroups and health checks. The place to add new property needs to be + * watched carefully. When new properties are added, corresponding *VAR_NUM + * needs to be adjusted to reflect the correct index of the table + */ +prop_tbl_entry_t prop_tbl[] = { + /* entried for rule */ + {ILBD_RULE_STATUS, "status", SCF_TYPE_BOOLEAN}, + /* SCF_TYPE_NET_ADDR_V4 or SCF_TYPE_NET_ADDR_V6 */ + {ILBD_RULE_VIP, "vip", SCF_TYPE_INVALID}, + {ILBD_RULE_PROTO, "protocol", SCF_TYPE_ASTRING}, + {ILBD_RULE_PORT, "port", SCF_TYPE_ASTRING}, + {ILBD_RULE_ALGO, "ilb-algo", SCF_TYPE_ASTRING}, + {ILBD_RULE_TOPO, "ilb-type", SCF_TYPE_ASTRING}, + {ILBD_RULE_NAT_STR, "ilb-nat-start", SCF_TYPE_INVALID}, + {ILBD_RULE_NAT_END, "ilb-nat-end", SCF_TYPE_INVALID}, + {ILBD_RULE_STI_MASK, "ilb-sti-mask", SCF_TYPE_INVALID}, + {ILBD_RULE_SGNAME, "servergroup", SCF_TYPE_ASTRING}, + {ILBD_RULE_HCNAME, "healthcheck", SCF_TYPE_ASTRING}, + {ILBD_RULE_HCPORT, "hc-port", SCF_TYPE_INTEGER}, + {ILBD_RULE_HCPFLAG, "hcp-flag", SCF_TYPE_INTEGER}, + {ILBD_RULE_DRAINTIME, "drain-time", SCF_TYPE_INTEGER}, + {ILBD_RULE_NAT_TO, "nat-timeout", SCF_TYPE_INTEGER}, + {ILBD_RULE_PERS_TO, "pers-timeout", SCF_TYPE_INTEGER}, + /* add new rule related prop here */ + /* entries for sg */ + {ILBD_SG_SERVER, "server", SCF_TYPE_ASTRING}, + /* add new sg related prop here */ + /* entries for hc */ + {ILBD_HC_TEST, "test", SCF_TYPE_ASTRING}, + {ILBD_HC_TIMEOUT, "timeout", SCF_TYPE_INTEGER}, + {ILBD_HC_INTERVAL, "interval", SCF_TYPE_INTEGER}, + {ILBD_HC_DEF_PING, "ping", SCF_TYPE_BOOLEAN}, + /* add new hc related prop here */ + {ILBD_HC_COUNT, "count", SCF_TYPE_INTEGER} +}; + +#define ILBD_PROP_VAR_NUM (ILBD_HC_COUNT + 1) +#define ILBD_RULE_VAR_NUM (ILBD_SG_SERVER) +#define ILBD_SG_VAR_NUM (ILBD_HC_TEST - ILBD_SG_SERVER) +#define ILBD_HC_VAR_NUM (ILBD_PROP_VAR_NUM - ILBD_HC_TEST) + +static ilb_status_t ilbd_scf_set_prop(scf_propertygroup_t *, const char *, + scf_type_t, scf_value_t *); +static ilb_status_t ilbd_scf_retrieve_pg(const char *, scf_propertygroup_t **, + boolean_t); +static ilb_status_t ilbd_scf_delete_pg(scf_propertygroup_t *); +static ilb_status_t ilbd_scf_get_prop_val(scf_propertygroup_t *, const char *, + scf_value_t **); + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +int +ilbd_scf_limit(int type) +{ + return (MIN(scf_limit(type), 120)); +} + +/* + * Translate libscf error to libilb status + */ +ilb_status_t +ilbd_scf_err_to_ilb_err() +{ + switch (scf_error()) { + case SCF_ERROR_NONE: + return (ILB_STATUS_OK); + case SCF_ERROR_HANDLE_MISMATCH: + case SCF_ERROR_HANDLE_DESTROYED: + case SCF_ERROR_VERSION_MISMATCH: + case SCF_ERROR_NOT_BOUND: + case SCF_ERROR_CONSTRAINT_VIOLATED: + case SCF_ERROR_NOT_SET: + case SCF_ERROR_TYPE_MISMATCH: + case SCF_ERROR_INVALID_ARGUMENT: + return (ILB_STATUS_EINVAL); + case SCF_ERROR_NO_MEMORY: + case SCF_ERROR_NO_RESOURCES: + return (ILB_STATUS_ENOMEM); + case SCF_ERROR_NOT_FOUND: + case SCF_ERROR_DELETED: + return (ILB_STATUS_ENOENT); + case SCF_ERROR_EXISTS: + return (ILB_STATUS_EEXIST); + case SCF_ERROR_PERMISSION_DENIED: + return (ILB_STATUS_PERMIT); + case SCF_ERROR_CALLBACK_FAILED: + return (ILB_STATUS_CALLBACK); + case SCF_ERROR_IN_USE: + return (ILB_STATUS_INUSE); + default: + return (ILB_STATUS_INTERNAL); + } +} + +static void +ilbd_name_to_scfpgname(ilbd_scf_pg_type_t pg_type, const char *pgname, + char *scf_pgname) +{ + switch (pg_type) { + case ILBD_SCF_RULE: + (void) snprintf(scf_pgname, ILBD_MAX_NAME_LEN, + ILBD_PG_NAME_RULE "%s", pgname); + return; + case ILBD_SCF_SG: + (void) snprintf(scf_pgname, ILBD_MAX_NAME_LEN, + ILBD_PG_NAME_SG "%s", pgname); + return; + case ILBD_SCF_HC: + (void) snprintf(scf_pgname, ILBD_MAX_NAME_LEN, + ILBD_PG_NAME_HC "%s", pgname); + return; + /* Should not happen. Log it and put ILB service in maintenance. */ + default: + logerr("ilbd_name_to_scfpgname: invalid pg type %d for pg %s", + pg_type, pgname); + (void) smf_maintain_instance(ILB_FMRI, SMF_IMMEDIATE); + exit(EXIT_FAILURE); + return; + } +} + +static void +ilbd_scf_destroy(scf_handle_t *h, scf_service_t *s, scf_instance_t *inst, + scf_propertygroup_t *pg) +{ + if (pg != NULL) + scf_pg_destroy(pg); + if (inst != NULL) + scf_instance_destroy(inst); + if (s != NULL) + scf_service_destroy(s); + if (h != NULL) + scf_handle_destroy(h); +} + + +static ilb_status_t +ilbd_scf_get_inst(scf_handle_t **h, scf_service_t **svc, scf_instance_t **inst) +{ + if ((*h = scf_handle_create(SCF_VERSION)) == NULL) + return (ILB_STATUS_INTERNAL); + + if (scf_handle_bind(*h) != 0) { + ilbd_scf_destroy(*h, NULL, NULL, NULL); + return (ilbd_scf_err_to_ilb_err()); + } + + if ((*svc = scf_service_create(*h)) == NULL) { + ilbd_scf_destroy(*h, NULL, NULL, NULL); + return (ilbd_scf_err_to_ilb_err()); + } + + if (scf_handle_decode_fmri(*h, ILBD_SVC_FMRI, NULL, *svc, NULL, NULL, + NULL, SCF_DECODE_FMRI_EXACT) != 0) { + ilbd_scf_destroy(*h, *svc, NULL, NULL); + return (ilbd_scf_err_to_ilb_err()); + } + + if ((*inst = scf_instance_create(*h)) == NULL) { + ilbd_scf_destroy(*h, *svc, NULL, NULL); + return (ilbd_scf_err_to_ilb_err()); + } + + if (scf_service_get_instance(*svc, ILBD_INST_NAME, *inst) != 0) { + ilbd_scf_destroy(*h, *svc, *inst, NULL); + return (ilbd_scf_err_to_ilb_err()); + } + return (ILB_STATUS_OK); +} + +/* + * If create is set, create a new prop group, destroy the old one if exists. + * If create not set, try to find the prop group with given name. + * The created or found entry is returned as *pg. + * Caller frees *pg and its handle scf_pg_handle(pg) + */ +static ilb_status_t +ilbd_scf_retrieve_pg(const char *pgname, scf_propertygroup_t **pg, + boolean_t create) +{ + scf_instance_t *inst; + scf_handle_t *h; + scf_service_t *svc; + ilb_status_t ret; + + ret = ilbd_scf_get_inst(&h, &svc, &inst); + if (ret != ILB_STATUS_OK) + return (ret); + + *pg = scf_pg_create(h); + if (*pg == NULL) + return (ILB_STATUS_INTERNAL); + + if (scf_instance_get_pg(inst, pgname, *pg) != 0) { + if (scf_error() != SCF_ERROR_NOT_FOUND || + (scf_error() == SCF_ERROR_NOT_FOUND && (!create))) { + ilbd_scf_destroy(h, svc, inst, *pg); + *pg = NULL; + return (ilbd_scf_err_to_ilb_err()); + } + } else { + /* + * Found pg, don't want to create, return EEXIST. Note that + * h cannot be destroyed here since the caller needs to use it. + * The caller gets it by calling scf_pg_handle(). + */ + if (!create) { + ilbd_scf_destroy(NULL, svc, inst, NULL); + return (ILB_STATUS_EEXIST); + } + /* found pg, need to create, destroy the existing one */ + else + (void) ilbd_scf_delete_pg(*pg); + } + + if (create) { + if (scf_instance_add_pg(inst, pgname, + SCF_GROUP_APPLICATION, 0, *pg) != 0) { + ilbd_scf_destroy(h, svc, inst, *pg); + *pg = NULL; + return (ilbd_scf_err_to_ilb_err()); + } + } + + /* + * Note that handle cannot be destroyed here, caller sometimes needs + * to use it. It gets the handle by calling scf_pg_handle(). + */ + ilbd_scf_destroy(NULL, svc, inst, NULL); + return (ILB_STATUS_OK); +} + +struct algo_tbl_entry { + ilb_algo_t algo_type; + const char *algo_str; +} algo_tbl[] = { + {ILB_ALG_ROUNDROBIN, "ROUNDROBIN"}, + {ILB_ALG_HASH_IP, "HASH-IP"}, + {ILB_ALG_HASH_IP_SPORT, "HASH-IP-PORT"}, + {ILB_ALG_HASH_IP_VIP, "HASH-IP-VIP"} +}; + +#define ILBD_ALGO_TBL_SIZE (sizeof (algo_tbl) / \ + sizeof (*algo_tbl)) + +void +ilbd_algo_to_str(ilb_algo_t algo_type, char *valstr) +{ + int i; + + for (i = 0; i < ILBD_ALGO_TBL_SIZE; i++) { + if (algo_type == algo_tbl[i].algo_type) { + (void) strlcpy(valstr, algo_tbl[i].algo_str, + ILBD_MAX_VALUE_LEN); + return; + } + } + logerr("ilbd_algo_to_str: algo not found"); +} + +static void +ilbd_scf_str_to_algo(ilb_algo_t *algo_type, char *valstr) +{ + int i; + + for (i = 0; i < ILBD_ALGO_TBL_SIZE; i++) { + if (strcmp(valstr, algo_tbl[i].algo_str) == 0) { + *algo_type = algo_tbl[i].algo_type; + return; + } + } + logerr("ilbd_scf_str_to_algo: algo not found"); +} + +struct topo_tbl_entry { + ilb_topo_t topo_type; + const char *topo_str; +} topo_tbl[] = { + {ILB_TOPO_DSR, "DSR"}, + {ILB_TOPO_NAT, "NAT"}, + {ILB_TOPO_HALF_NAT, "HALF-NAT"} +}; + +#define ILBD_TOPO_TBL_SIZE (sizeof (topo_tbl) / \ + sizeof (*topo_tbl)) + +void +ilbd_topo_to_str(ilb_topo_t topo_type, char *valstr) +{ + int i; + + for (i = 0; i < ILBD_TOPO_TBL_SIZE; i++) { + if (topo_type == topo_tbl[i].topo_type) { + (void) strlcpy(valstr, topo_tbl[i].topo_str, + ILBD_MAX_VALUE_LEN); + return; + } + } + logerr("ilbd_scf_topo_to_str: topo not found"); +} + +static void +ilbd_scf_str_to_topo(ilb_topo_t *topo_type, char *valstr) +{ + int i; + + for (i = 0; i < ILBD_TOPO_TBL_SIZE; i++) { + if (strcmp(valstr, topo_tbl[i].topo_str) == 0) { + *topo_type = topo_tbl[i].topo_type; + return; + } + } + logerr("ilbd_scf_str_to_topo: topo not found"); +} + +static void +ilbd_get_svr_field(char *valstr, struct in6_addr *sgs_addr, + int32_t *min_port, int32_t *max_port, int32_t *sgs_flags) +{ + char *ipaddr, *ipverstr, *portstr, *flagstr; + int ip_ver; + ilb_ip_addr_t temp_ip; + void *addrptr; + char *max_portstr; + + ipaddr = strtok(valstr, ";"); + ipverstr = strtok(NULL, ";"); + portstr = strtok(NULL, ";"); + flagstr = strtok(NULL, ";"); + + if (ipaddr == NULL || ipverstr == NULL || portstr == NULL || + flagstr == NULL) { + logerr("%s: invalid server fields", __func__); + (void) smf_maintain_instance(ILB_FMRI, SMF_IMMEDIATE); + exit(EXIT_FAILURE); + } + ip_ver = atoi(ipverstr); + addrptr = (ip_ver == AF_INET) ? (void *)&temp_ip.ia_v4 : + (void *)&temp_ip.ia_v6; + if (inet_pton(ip_ver, ipaddr, addrptr) == NULL) { + logerr("ilbd_get_svr_field: inet_pton failed"); + return; + } + + if (ip_ver == AF_INET) { + IN6_INADDR_TO_V4MAPPED(&(temp_ip.ia_v4), sgs_addr); + } else { + (void) memcpy(sgs_addr, &(temp_ip.ia_v6), + sizeof (struct in6_addr)); + } + + *sgs_flags = atoi(flagstr); + *min_port = atoi(strtok(portstr, "-")); + *min_port = ntohs(*min_port); + max_portstr = strtok(NULL, "-"); + if (max_portstr != NULL) { + *max_port = atoi(max_portstr); + *max_port = ntohs(*max_port); + } +} + +/* + * Convert the info of a server to its SCF string value representation. + * Argument value is assumed to be of size ILBD_MAX_VALUE_LEN. + */ +static void +ilbd_srv_scf_val(ilbd_srv_t *srv, char *value) +{ + char ipstr[INET6_ADDRSTRLEN]; + int ipver; + + if (GET_AF(&srv->isv_addr) == AF_INET) { + struct in_addr v4_addr; + + IN6_V4MAPPED_TO_INADDR(&srv->isv_addr, &v4_addr); + (void) inet_ntop(AF_INET, &v4_addr, ipstr, sizeof (ipstr)); + ipver = AF_INET; + } else { + (void) inet_ntop(AF_INET6, &srv->isv_addr, ipstr, + sizeof (ipstr)); + ipver = AF_INET6; + } + (void) snprintf(value, ILBD_MAX_VALUE_LEN, "%s;%d;%d-%d;%d", + ipstr, ipver, ntohs(srv->isv_minport), ntohs(srv->isv_maxport), + srv->isv_flags); +} + +/* get the "ip:port:status" str of the #num server in the servergroup */ +ilb_status_t +ilbd_get_svr_info(ilbd_sg_t *sg, int num, char *valstr, char *svrname) +{ + int i; + ilbd_srv_t *tmp_srv = NULL; + + tmp_srv = list_head(&sg->isg_srvlist); + if (tmp_srv == NULL) + return (ILB_STATUS_ENOENT); + + for (i = 0; i < num; i++) + tmp_srv = list_next(&sg->isg_srvlist, tmp_srv); + + assert(tmp_srv != NULL); + if (valstr != NULL) + ilbd_srv_scf_val(tmp_srv, valstr); + + if (svrname != NULL) { + (void) snprintf(svrname, ILBD_MAX_NAME_LEN, "server%d", + tmp_srv->isv_id); + } + + return (ILB_STATUS_OK); +} + +/* convert a struct in6_addr to valstr */ +ilb_status_t +ilbd_scf_ip_to_str(uint16_t ipversion, struct in6_addr *addr, + scf_type_t *scftype, char *valstr) +{ + size_t vallen; + ilb_ip_addr_t ipaddr; + void *addrptr; + + vallen = (ipversion == AF_INET) ? INET_ADDRSTRLEN : + INET6_ADDRSTRLEN; + if (scftype != NULL) + *scftype = (ipversion == AF_INET) ? SCF_TYPE_NET_ADDR_V4 : + SCF_TYPE_NET_ADDR_V6; + + IP_COPY_IMPL_2_CLI(addr, &ipaddr); + addrptr = (ipversion == AF_INET) ? + (void *)&ipaddr.ia_v4 : (void *)&ipaddr.ia_v6; + (void) inet_ntop(ipversion, (void *)addrptr, valstr, vallen); + return (ILB_STATUS_OK); +} + +/* + * This function takes a ilbd internal data struct and translate its value to + * scf value. The data struct is passed in within "data". + * Upon successful return, the scf val will be stored in "val" and the scf type + * will be returned in "scftype" if scftype != NULL, the number of values + * translated will be in "numval" + * If it failed, no data will be written to SCF + */ +static ilb_status_t +ilbd_data_to_scfval(ilbd_scf_pg_type_t pg_type, ilbd_var_type_t type, + scf_handle_t *h, void *data, scf_value_t ***val, scf_type_t *scftype, + int *numval) +{ + scf_value_t *v, **varray = NULL; + int ret = ILB_STATUS_OK; + int i; + int scf_val_len = ILBD_MAX_VALUE_LEN; + char valstr[scf_val_len]; + int valint; + uint8_t valbool = 0; + ilbd_rule_t *r_ent = NULL; + ilbd_sg_t *s_ent = NULL; + ilbd_hc_t *h_ent = NULL; + + switch (pg_type) { + case ILBD_SCF_RULE: + r_ent = (ilbd_rule_t *)data; + break; + case ILBD_SCF_SG: + s_ent = (ilbd_sg_t *)data; + break; + case ILBD_SCF_HC: + h_ent = (ilbd_hc_t *)data; + break; + } + + v = scf_value_create(h); + if (v == NULL) + return (ILB_STATUS_INTERNAL); + + switch (type) { + case ILBD_RULE_STATUS: + valbool = r_ent->irl_flags & ILB_FLAGS_RULE_ENABLED; + break; + case ILBD_RULE_VIP: + ret = ilbd_scf_ip_to_str(r_ent->irl_ipversion, &r_ent->irl_vip, + scftype, valstr); + if (ret != ILB_STATUS_OK) { + scf_value_destroy(v); + return (ret); + } + break; + case ILBD_RULE_PROTO: { + struct protoent *protoent; + + protoent = getprotobynumber(r_ent->irl_proto); + (void) strlcpy(valstr, protoent->p_name, sizeof (valstr)); + break; + } + case ILBD_RULE_PORT: + (void) snprintf(valstr, sizeof (valstr), "%d-%d", + r_ent->irl_minport, r_ent->irl_maxport); + break; + case ILBD_RULE_ALGO: + ilbd_algo_to_str(r_ent->irl_algo, valstr); + break; + case ILBD_RULE_TOPO: + ilbd_topo_to_str(r_ent->irl_topo, valstr); + break; + case ILBD_RULE_NAT_STR: + ret = ilbd_scf_ip_to_str(r_ent->irl_ipversion, + &r_ent->irl_nat_src_start, scftype, valstr); + if (ret != ILB_STATUS_OK) { + scf_value_destroy(v); + return (ret); + } + break; + case ILBD_RULE_NAT_END: + ret = ilbd_scf_ip_to_str(r_ent->irl_ipversion, + &r_ent->irl_nat_src_end, scftype, valstr); + if (ret != ILB_STATUS_OK) { + scf_value_destroy(v); + return (ret); + } + break; + case ILBD_RULE_STI_MASK: + ret = ilbd_scf_ip_to_str(r_ent->irl_ipversion, + &r_ent->irl_stickymask, scftype, valstr); + if (ret != ILB_STATUS_OK) { + scf_value_destroy(v); + return (ret); + } + break; + case ILBD_RULE_SGNAME: + (void) strlcpy(valstr, r_ent->irl_sgname, sizeof (valstr)); + break; + case ILBD_RULE_HCNAME: + if (r_ent->irl_hcname[0] != '\0') + (void) strlcpy(valstr, r_ent->irl_hcname, + sizeof (valstr)); + else + bzero(valstr, ILBD_MAX_VALUE_LEN); + break; + case ILBD_RULE_HCPORT: + valint = r_ent->irl_hcport; + break; + case ILBD_RULE_HCPFLAG: + valint = r_ent->irl_hcpflag; + break; + case ILBD_RULE_DRAINTIME: + valint = r_ent->irl_conndrain; + break; + case ILBD_RULE_NAT_TO: + valint = r_ent->irl_nat_timeout; + break; + case ILBD_RULE_PERS_TO: + valint = r_ent->irl_sticky_timeout; + break; + + case ILBD_SG_SERVER: + if (s_ent->isg_srvcount == 0) { + (void) strlcpy(valstr, "EMPTY_SERVERGROUP", + sizeof (valstr)); + break; + } + + varray = calloc(sizeof (*varray), s_ent->isg_srvcount); + if (varray == NULL) { + scf_value_destroy(v); + return (ILB_STATUS_ENOMEM); + } + + for (i = 0; i < s_ent->isg_srvcount; i++) { + if (v == NULL) { + for (i--; i >= 0; i--) + scf_value_destroy(varray[i]); + free(varray); + return (ILB_STATUS_ENOMEM); + } + + ret = ilbd_get_svr_info(s_ent, i, valstr, NULL); + if (ret != ILB_STATUS_OK) { + scf_value_destroy(v); + for (i--; i >= 0; i--) + scf_value_destroy(varray[i]); + free(varray); + return (ret); + } + (void) scf_value_set_astring(v, valstr); + varray[i] = v; + v = scf_value_create(h); + } + /* the last 'v' we created will go unused, so drop it */ + scf_value_destroy(v); + *numval = s_ent->isg_srvcount; + *val = varray; + return (ret); + case ILBD_HC_TEST: + (void) strlcpy(valstr, h_ent->ihc_test, sizeof (valstr)); + break; + case ILBD_HC_TIMEOUT: + valint = h_ent->ihc_timeout; + break; + case ILBD_HC_INTERVAL: + valint = h_ent->ihc_interval; + break; + case ILBD_HC_DEF_PING: + valbool = h_ent->ihc_def_ping; + break; + case ILBD_HC_COUNT: + valint = h_ent->ihc_count; + break; + } + + switch (*scftype) { + case SCF_TYPE_BOOLEAN: + scf_value_set_boolean(v, valbool); + break; + case SCF_TYPE_ASTRING: + (void) scf_value_set_astring(v, valstr); + break; + case SCF_TYPE_INTEGER: + scf_value_set_integer(v, valint); + break; + case SCF_TYPE_NET_ADDR_V4: + (void) scf_value_set_from_string(v, SCF_TYPE_NET_ADDR_V4, + valstr); + break; + case SCF_TYPE_NET_ADDR_V6: + (void) scf_value_set_from_string(v, SCF_TYPE_NET_ADDR_V6, + valstr); + break; + } + + varray = calloc(1, sizeof (*varray)); + if (varray == NULL) { + scf_value_destroy(v); + return (ILB_STATUS_ENOMEM); + } + varray[0] = v; + *val = varray; + *numval = 1; + + return (ret); +} + +/* + * create a scf property group + */ +ilb_status_t +ilbd_create_pg(ilbd_scf_pg_type_t pg_type, void *data) +{ + ilb_status_t ret; + char *pgname; + scf_propertygroup_t *pg = NULL; + scf_value_t **val; + scf_handle_t *h; + int scf_name_len = ILBD_MAX_NAME_LEN; + char scfpgname[scf_name_len]; + int i, i_st, i_end; + + switch (pg_type) { + case ILBD_SCF_RULE: { + ilbd_rule_t *r_ent = (ilbd_rule_t *)data; + + pgname = r_ent->irl_name; + i_st = 0; + i_end = ILBD_RULE_VAR_NUM; + break; + } + case ILBD_SCF_SG: { + ilbd_sg_t *s_ent = (ilbd_sg_t *)data; + + pgname = s_ent->isg_name; + i_st = ILBD_RULE_VAR_NUM; + i_end = ILBD_RULE_VAR_NUM + ILBD_SG_VAR_NUM; + break; + } + case ILBD_SCF_HC: { + ilbd_hc_t *h_ent = (ilbd_hc_t *)data; + + pgname = h_ent->ihc_name; + i_st = ILBD_RULE_VAR_NUM + ILBD_SG_VAR_NUM; + i_end = ILBD_PROP_VAR_NUM; + break; + } + default: + logdebug("ilbd_create_pg: invalid pg type %d for pg %s", + pg_type, pgname); + return (ILB_STATUS_EINVAL); + } + + ilbd_name_to_scfpgname(pg_type, pgname, scfpgname); + + ret = ilbd_scf_retrieve_pg(scfpgname, &pg, B_TRUE); + if (ret != ILB_STATUS_OK) + return (ret); + h = scf_pg_handle(pg); + + /* fill in props */ + for (i = i_st; i < i_end; i++) { + int num, j; + int scf_name_len = ILBD_MAX_NAME_LEN; + char propname[scf_name_len]; + scf_type_t scftype = prop_tbl[i].scf_proptype; + + ret = ilbd_data_to_scfval(pg_type, prop_tbl[i].val_type, h, + data, &val, &scftype, &num); + if (ret != ILB_STATUS_OK) + goto done; + + for (j = 0; j < num; j++) { + if (pg_type == ILBD_SCF_SG) { + ret = ilbd_get_svr_info(data, j, NULL, + propname); + if (ret == ILB_STATUS_ENOENT) { + (void) strlcpy(propname, "EMPTY_SERVER", + ILBD_MAX_NAME_LEN); + } + ret = ilbd_scf_set_prop(pg, propname, + scftype, val[j]); + } else { + ret = ilbd_scf_set_prop(pg, + prop_tbl[i].scf_propname, scftype, val[j]); + } + scf_value_destroy(val[j]); + } + free(val); + } + +done: + ilbd_scf_destroy(h, NULL, NULL, pg); + return (ret); +} + +/* + * destroy a scf property group + */ +static ilb_status_t +ilbd_scf_delete_pg(scf_propertygroup_t *pg) +{ + if (scf_pg_delete(pg) != 0) + return (ilbd_scf_err_to_ilb_err()); + return (ILB_STATUS_OK); +} + +/* sg can have same name as rule */ +ilb_status_t +ilbd_destroy_pg(ilbd_scf_pg_type_t pg_t, const char *pgname) +{ + ilb_status_t ret; + scf_propertygroup_t *pg; + int scf_name_len = ILBD_MAX_NAME_LEN; + char scfname[scf_name_len]; + + ilbd_name_to_scfpgname(pg_t, pgname, scfname); + + ret = ilbd_scf_retrieve_pg(scfname, &pg, B_FALSE); + if (ret != ILB_STATUS_EEXIST) + return (ret); + + ret = ilbd_scf_delete_pg(pg); + ilbd_scf_destroy(scf_pg_handle(pg), NULL, NULL, pg); + return (ret); +} + +/* + * Set named property to scf value specified. If property is new, + * create it. + */ +static ilb_status_t +ilbd_scf_set_prop(scf_propertygroup_t *pg, const char *propname, + scf_type_t proptype, scf_value_t *val) +{ + scf_handle_t *h = NULL; + scf_property_t *prop = NULL; + scf_value_t *oldval = NULL; + scf_transaction_t *tx = NULL; + scf_transaction_entry_t *ent = NULL; + boolean_t new = B_FALSE; + ilb_status_t ret = ILB_STATUS_OK; + int commit_ret; + + h = scf_pg_handle(pg); + if (h == NULL || propname == NULL) + return (ILB_STATUS_EINVAL); + + ret = ilbd_scf_get_prop_val(pg, propname, &oldval); + if (oldval != NULL) + scf_value_destroy(oldval); + if (ret == ILB_STATUS_ENOENT) + new = B_TRUE; + else if (ret != ILB_STATUS_OK) + return (ret); + + if ((prop = scf_property_create(h)) == NULL) + return (ilbd_scf_err_to_ilb_err()); + if ((tx = scf_transaction_create(h)) == NULL || + (ent = scf_entry_create(h)) == NULL) { + ret = ilbd_scf_err_to_ilb_err(); + logdebug("ilbd_scf_set_prop: create scf transaction failed\n"); + goto out; + } + + if (scf_transaction_start(tx, pg) == -1) { + ret = ilbd_scf_err_to_ilb_err(); + logdebug("ilbd_scf_set_prop: start scf transaction failed\n"); + goto out; + } + + if (new) { + if (scf_transaction_property_new(tx, ent, propname, + proptype) == -1) { + ret = ilbd_scf_err_to_ilb_err(); + logdebug("ilbd_scf_set_prop: create scf prop failed\n"); + goto out; + } + } else { + if (scf_transaction_property_change(tx, ent, propname, proptype) + == -1) { + ret = ilbd_scf_err_to_ilb_err(); + logdebug("ilbd_scf_set_prop: change scf prop failed\n"); + goto out; + } + } + + if (scf_entry_add_value(ent, val) != 0) { + logdebug("ilbd_scf_set_prop: add scf entry failed\n"); + ret = ilbd_scf_err_to_ilb_err(); + goto out; + } + + commit_ret = scf_transaction_commit(tx); + switch (commit_ret) { + case 1: + ret = ILB_STATUS_OK; + /* update pg here, so subsequent property setting succeeds */ + (void) scf_pg_update(pg); + break; + case 0: + /* transaction failed due to not having most recent pg */ + ret = ILB_STATUS_INUSE; + break; + default: + ret = ilbd_scf_err_to_ilb_err(); + break; + } +out: + if (tx != NULL) + scf_transaction_destroy(tx); + if (ent != NULL) + scf_entry_destroy(ent); + if (prop != NULL) + scf_property_destroy(prop); + + return (ret); +} + +/* + * get a prop's scf val + */ +static ilb_status_t +ilbd_scf_get_prop_val(scf_propertygroup_t *pg, const char *propname, + scf_value_t **val) +{ + scf_handle_t *h = NULL; + scf_property_t *prop = NULL; + scf_value_t *value = NULL; + ilb_status_t ret = ILB_STATUS_OK; + + h = scf_pg_handle(pg); + if (h == NULL || propname == NULL) + return (ILB_STATUS_EINVAL); + + if ((prop = scf_property_create(h)) == NULL) + return (ilbd_scf_err_to_ilb_err()); + + if (scf_pg_get_property(pg, propname, prop) != 0) { + ret = ilbd_scf_err_to_ilb_err(); + goto out; + } + + if ((value = scf_value_create(h)) == NULL) { + ret = ilbd_scf_err_to_ilb_err(); + goto out; + } + + if (scf_property_get_value(prop, value) != 0) { + scf_value_destroy(value); + ret = ilbd_scf_err_to_ilb_err(); + goto out; + } + + *val = value; +out: + if (prop != NULL) + scf_property_destroy(prop); + + return (ret); +} + +typedef struct ilbd_data +{ + union { + ilb_sg_info_t *sg_info; + ilb_hc_info_t *hc_info; + ilb_rule_info_t *rule_info; + } data; + ilbd_scf_pg_type_t pg_type; /* type of data */ +#define sg_data data.sg_info +#define hc_data data.hc_info +#define rule_data data.rule_info +} ilbd_data_t; + +void +ilbd_scf_str_to_ip(int ipversion, char *ipstr, struct in6_addr *addr) +{ + ilb_ip_addr_t ipaddr; + void *addrptr; + + addrptr = (ipversion == AF_INET) ? + (void *)&ipaddr.ia_v4 : (void *)&ipaddr.ia_v6; + (void) inet_pton(ipversion, ipstr, addrptr); + if (ipversion == AF_INET) { + IN6_INADDR_TO_V4MAPPED(&(ipaddr.ia_v4), addr); + } else { + (void) memcpy(addr, &(ipaddr.ia_v6), + sizeof (struct in6_addr)); + } +} + +/* + * This function takes a scf value and writes it to the correct field of the + * corresponding data struct. + */ +static ilb_status_t +ilbd_scfval_to_data(const char *propname, ilbd_var_type_t ilb_type, + scf_value_t *val, ilbd_data_t *ilb_data) +{ + + scf_type_t scf_type = scf_value_type(val); + ilbd_scf_pg_type_t pg_type = ilb_data->pg_type; + int ret = 0; + ilb_rule_info_t *r_ent = NULL; + ilb_sg_info_t *s_ent = NULL; + ilb_hc_info_t *h_ent = NULL; + char ipstr[INET6_ADDRSTRLEN]; + int scf_val_len = ILBD_MAX_VALUE_LEN; + char valstr[scf_val_len]; + int64_t valint; + uint8_t valbool; + int ipversion; + + switch (pg_type) { + case ILBD_SCF_RULE: + r_ent = ilb_data->rule_data; + break; + case ILBD_SCF_HC: + h_ent = ilb_data->hc_data; + break; + case ILBD_SCF_SG: + s_ent = ilb_data->sg_data; + break; + } + + /* get scf value out */ + switch (scf_type) { + case SCF_TYPE_NET_ADDR_V4: + if (scf_value_get_as_string_typed(val, + SCF_TYPE_NET_ADDR_V4, ipstr, INET_ADDRSTRLEN) < 0) + return (ILB_STATUS_INTERNAL); + ipversion = AF_INET; + break; + case SCF_TYPE_NET_ADDR_V6: + if (scf_value_get_as_string_typed(val, + SCF_TYPE_NET_ADDR_V6, ipstr, INET6_ADDRSTRLEN) < 0) + return (ILB_STATUS_INTERNAL); + ipversion = AF_INET6; + break; + case SCF_TYPE_BOOLEAN: + if (scf_value_get_boolean(val, &valbool) < 0) + return (ILB_STATUS_INTERNAL); + break; + case SCF_TYPE_ASTRING: + if (scf_value_get_astring(val, valstr, sizeof (valstr)) + < 0) + return (ILB_STATUS_INTERNAL); + break; + case SCF_TYPE_INTEGER: + if (scf_value_get_integer(val, &valint) < 0) + return (ILB_STATUS_INTERNAL); + break; + default: + return (ILB_STATUS_INTERNAL); + } + + ret = ILB_STATUS_OK; + switch (ilb_type) { + case ILBD_RULE_STATUS: + if (valbool) + r_ent->rl_flags |= ILB_FLAGS_RULE_ENABLED; + break; + case ILBD_RULE_VIP: + r_ent->rl_ipversion = ipversion; + ilbd_scf_str_to_ip(ipversion, ipstr, &r_ent->rl_vip); + break; + case ILBD_RULE_PROTO: { + struct protoent *protoent; + + protoent = getprotobyname(valstr); + r_ent->rl_proto = protoent->p_proto; + break; + } + case ILBD_RULE_PORT: { + char *token1, *token2; + + token1 = strtok(valstr, "-"); + token2 = strtok(NULL, "-"); + r_ent->rl_minport = atoi(token1); + r_ent->rl_maxport = atoi(token2); + break; + } + case ILBD_RULE_ALGO: + ilbd_scf_str_to_algo(&(r_ent->rl_algo), valstr); + break; + case ILBD_RULE_TOPO: + ilbd_scf_str_to_topo(&(r_ent->rl_topo), valstr); + break; + case ILBD_RULE_NAT_STR: + ilbd_scf_str_to_ip(ipversion, ipstr, &r_ent->rl_nat_src_start); + break; + case ILBD_RULE_NAT_END: + ilbd_scf_str_to_ip(ipversion, ipstr, &r_ent->rl_nat_src_end); + break; + case ILBD_RULE_STI_MASK: + ilbd_scf_str_to_ip(ipversion, ipstr, &r_ent->rl_stickymask); + if (ipversion == AF_INET) { + if (!IN6_IS_ADDR_V4MAPPED_ANY(&r_ent->rl_stickymask)) + r_ent->rl_flags |= ILB_FLAGS_RULE_STICKY; + } else { + if (!IN6_IS_ADDR_UNSPECIFIED(&r_ent->rl_stickymask)) + r_ent->rl_flags |= ILB_FLAGS_RULE_STICKY; + } + break; + case ILBD_RULE_SGNAME: + (void) strlcpy(r_ent->rl_sgname, valstr, + sizeof (r_ent->rl_sgname)); + break; + case ILBD_RULE_HCNAME: + (void) strlcpy(r_ent->rl_hcname, valstr, + sizeof (r_ent->rl_hcname)); + break; + case ILBD_RULE_HCPORT: + r_ent->rl_hcport = valint; + break; + case ILBD_RULE_HCPFLAG: + r_ent->rl_hcpflag = valint; + break; + case ILBD_RULE_DRAINTIME: + r_ent->rl_conndrain = valint; + break; + case ILBD_RULE_NAT_TO: + r_ent->rl_nat_timeout = valint; + break; + case ILBD_RULE_PERS_TO: + r_ent->rl_sticky_timeout = valint; + break; + + case ILBD_SG_SERVER: { + int svr_cnt = s_ent->sg_srvcount; + + /* found a new server, increase the svr count of this sg */ + s_ent->sg_srvcount++; + + /* + * valstr contains information of one server in the servergroup + * valstr is in the format of "ip:minport-maxport:enable" + */ + s_ent = realloc(s_ent, sizeof (ilb_sg_info_t) + + s_ent->sg_srvcount * sizeof (ilb_sg_srv_t)); + + /* sgs_srvID is the sg name, leave it blank */ + /* + * sgs_id is the digit in propname, propname is in a format of + * "server" + the digital serverID. We get the serverID by + * reading from the 7th char of propname. + */ + s_ent->sg_servers[svr_cnt].sgs_id = atoi(&propname[6]); + + ilbd_get_svr_field(valstr, + &s_ent->sg_servers[svr_cnt].sgs_addr, + &s_ent->sg_servers[svr_cnt].sgs_minport, + &s_ent->sg_servers[svr_cnt].sgs_maxport, + &s_ent->sg_servers[svr_cnt].sgs_flags); + ilb_data->sg_data = s_ent; + + break; + } + case ILBD_HC_TEST: + (void) strlcpy(h_ent->hci_test, valstr, + sizeof (h_ent->hci_test)); + break; + case ILBD_HC_TIMEOUT: + h_ent->hci_timeout = valint; + break; + case ILBD_HC_INTERVAL: + h_ent->hci_interval = valint; + break; + case ILBD_HC_DEF_PING: + h_ent->hci_def_ping = valbool; + break; + case ILBD_HC_COUNT: + h_ent->hci_count = valint; + break; + case ILBD_VAR_INVALID: + /* + * An empty server group is represented by an invalid + * SCF property. So when loading a server group, this + * case can be hit. But it should happen only for this + * single case. So if it happens in another case, move + * the service into maintenance mode. + */ + if (pg_type != ILBD_SCF_SG || scf_type != SCF_TYPE_ASTRING) { + logerr("%s: invalid ilb type", __func__); + (void) smf_maintain_instance(ILB_FMRI, SMF_IMMEDIATE); + } else { + logdebug("%s: invalid ilb type", __func__); + } + break; + } + + return (ret); +} + +static ilbd_var_type_t +ilbd_name_to_valtype(const char *prop_name) +{ + int i; + + for (i = 0; i < ILBD_PROP_VAR_NUM; i++) + if (strncmp(prop_name, prop_tbl[i].scf_propname, + strlen(prop_tbl[i].scf_propname)) == 0) + return (prop_tbl[i].val_type); + + logdebug("ilbd_name_to_valtype: couldn't find prop %s", prop_name); + return (ILBD_VAR_INVALID); +} + +/* callback for pg_walk_prop, arg is ilbd_data_t */ +static ilb_status_t +ilbd_scf_load_prop(scf_propertygroup_t *pg, const char *prop_name, void *arg) +{ + scf_handle_t *h; + scf_value_t *val; + ilb_status_t ret; + ilbd_data_t *ilb_data = (ilbd_data_t *)arg; + ilbd_var_type_t val_type = ilbd_name_to_valtype(prop_name); + + h = scf_pg_handle(pg); + if (h == NULL) + return (ILB_STATUS_EINVAL); + + ret = ilbd_scf_get_prop_val(pg, prop_name, &val); + if (ret == ILB_STATUS_ENOENT) + return (ILB_STATUS_OK); + else if (ret != ILB_STATUS_OK) + return (ret); + + /* + * Load value to ilb_data. + */ + ret = ilbd_scfval_to_data(prop_name, val_type, val, ilb_data); + +out: + if (val != NULL) + scf_value_destroy(val); + + return (ret); +} + +/* + * walk properties in one prop group, arg is ilbd_data + * cb is ilbd_scf_load_prop() + */ +static ilb_status_t +ilbd_scf_pg_walk_props(scf_propertygroup_t *pg, + ilb_status_t (*cb)(scf_propertygroup_t *, const char *, void *), + void *arg) +{ + scf_handle_t *h; + scf_iter_t *propiter; + scf_property_t *prop; + int scf_name_len = ILBD_MAX_NAME_LEN; + char prop_name[scf_name_len]; + ilb_status_t ret = ILB_STATUS_OK; + int scf_ret = -1; + + h = scf_pg_handle(pg); + if (h == NULL) + return (ILB_STATUS_EINVAL); + + prop = scf_property_create(h); + propiter = scf_iter_create(h); + if (prop == NULL || propiter == NULL) + goto out; + + if (scf_iter_pg_properties(propiter, pg) != 0) + goto out; + + while ((scf_ret = scf_iter_next_property(propiter, prop)) == 1) { + if (scf_property_get_name(prop, prop_name, sizeof (prop_name)) + < 0) { + ret = ilbd_scf_err_to_ilb_err(); + goto out; + } + ret = cb(pg, prop_name, arg); + if (ret != ILB_STATUS_OK) + break; + } +out: + if (scf_ret == -1) + ret = ilbd_scf_err_to_ilb_err(); + if (prop != NULL) + scf_property_destroy(prop); + if (propiter != NULL) + scf_iter_destroy(propiter); + + return (ret); +} + +/* cbs are libd_create_X */ +static ilb_status_t +ilbd_scf_instance_walk_pg(scf_instance_t *inst, + ilbd_scf_pg_type_t pg_type, + ilb_status_t (*cb)(void *, int, struct passwd *, ucred_t *), + void *arg1, void *arg2) +{ + int scf_ret; + ilb_status_t ret; + scf_handle_t *h; + scf_iter_t *pgiter; + scf_propertygroup_t *newpg; + int port = *((int *)arg1); + + if (inst == NULL) + return (ILB_STATUS_EINVAL); + + h = scf_instance_handle(inst); + if (h == NULL) + return (ILB_STATUS_EINVAL); + + if ((newpg = scf_pg_create(h)) == NULL) + return (ilbd_scf_err_to_ilb_err()); + + if ((pgiter = scf_iter_create(h)) == NULL) { + scf_pg_destroy(newpg); + return (ilbd_scf_err_to_ilb_err()); + } + + if ((scf_ret = scf_iter_instance_pgs(pgiter, inst)) < 0) + goto out; + + while ((scf_ret = scf_iter_next_pg(pgiter, newpg)) > 0) { + ilbd_data_t data; + int scf_name_len = ILBD_MAX_NAME_LEN; + char pg_name[scf_name_len]; + + if (scf_pg_get_name(newpg, pg_name, sizeof (pg_name)) < 0) { + ret = ilbd_scf_err_to_ilb_err(); + goto out; + } + + /* + * if pg name indicates it's a ilb configuration, walk its prop + */ + data.pg_type = pg_type; + data.hc_data = NULL; + data.sg_data = NULL; + data.rule_data = NULL; + + switch (pg_type) { + case ILBD_SCF_RULE: + if (strncmp(ILBD_PG_NAME_RULE, pg_name, + strlen(ILBD_PG_NAME_RULE)) == 0) { + data.rule_data = calloc(1, + sizeof (ilb_rule_info_t)); + if (data.rule_data == NULL) { + ret = ILB_STATUS_ENOMEM; + goto out; + } + ret = ilbd_scf_pg_walk_props(newpg, + ilbd_scf_load_prop, &data); + if (ret != ILB_STATUS_OK) + goto out; + assert(data.rule_data != NULL); + /* set rule name */ + (void) strlcpy(data.rule_data->rl_name, + &pg_name[strlen(ILBD_PG_NAME_RULE)], + sizeof (data.rule_data->rl_name)); + + ret = cb(data.rule_data, port, arg2, NULL); + free(data.rule_data); + if (ret != ILB_STATUS_OK) + goto out; + } + break; + case ILBD_SCF_SG: + if (strncmp(ILBD_PG_NAME_SG, pg_name, + strlen(ILBD_PG_NAME_SG)) == 0) { + data.sg_data = calloc(1, + sizeof (ilb_sg_info_t)); + if (data.sg_data == NULL) + return (ILB_STATUS_ENOMEM); + ret = ilbd_scf_pg_walk_props(newpg, + ilbd_scf_load_prop, &data); + if (ret != ILB_STATUS_OK) { + free(data.sg_data); + goto out; + } + assert(data.sg_data != NULL); + /* set sg name */ + (void) strlcpy(data.sg_data->sg_name, + &pg_name[strlen(ILBD_PG_NAME_SG)], + sizeof (data.sg_data->sg_name)); + ret = cb(data.sg_data, port, arg2, NULL); + if (ret != ILB_STATUS_OK) { + free(data.sg_data); + goto out; + } + /* + * create a servergroup is two-step operation. + * 1. create an empty servergroup. + * 2. add server(s) to the group. + * + * since we are here from: + * main_loop()->ilbd_read_config()-> + * ilbd_walk_sg_pgs() + * there is no cli to send. So in this + * path auditing will skip the + * adt_set_from_ucred() check + */ + if (data.sg_data->sg_srvcount > 0) { + ret = ilbd_add_server_to_group( + data.sg_data, port, NULL, NULL); + if (ret != ILB_STATUS_OK) { + free(data.sg_data); + goto out; + } + free(data.sg_data); + } + } + break; + case ILBD_SCF_HC: + if (strncmp(ILBD_PG_NAME_HC, pg_name, + strlen(ILBD_PG_NAME_HC)) == 0) { + data.hc_data = calloc(1, + sizeof (ilb_hc_info_t)); + if (data.hc_data == NULL) + return (ILB_STATUS_ENOMEM); + ret = ilbd_scf_pg_walk_props(newpg, + ilbd_scf_load_prop, &data); + if (ret != ILB_STATUS_OK) + goto out; + assert(data.hc_data != NULL); + /* set hc name */ + (void) strlcpy(data.hc_data->hci_name, + &pg_name[strlen(ILBD_PG_NAME_HC)], + sizeof (data.hc_data->hci_name)); + ret = cb(data.hc_data, port, arg2, NULL); + free(data.hc_data); + if (ret != ILB_STATUS_OK) + goto out; + } + break; + } + } + +out: + if (scf_ret < 0) + ret = ilbd_scf_err_to_ilb_err(); + scf_pg_destroy(newpg); + scf_iter_destroy(pgiter); + return (ret); +} + +typedef ilb_status_t (*ilbd_scf_walker_fn)(void *, int, struct passwd *, + ucred_t *); + +ilb_status_t +ilbd_walk_rule_pgs(ilb_status_t (*func)(ilb_rule_info_t *, int, + const struct passwd *, ucred_t *), void *arg1, void *arg2) +{ + scf_instance_t *inst; + scf_handle_t *h; + scf_service_t *svc; + ilb_status_t ret; + + ret = ilbd_scf_get_inst(&h, &svc, &inst); + if (ret != ILB_STATUS_OK) + return (ret); + + /* get rule prop group, transfer it to ilb_lrule_info_t */ + ret = ilbd_scf_instance_walk_pg(inst, ILBD_SCF_RULE, + (ilbd_scf_walker_fn)func, arg1, arg2); + ilbd_scf_destroy(h, svc, inst, NULL); + return (ret); +} + +ilb_status_t +ilbd_walk_sg_pgs(ilb_status_t (*func)(ilb_sg_info_t *, int, + const struct passwd *, ucred_t *), void *arg1, void *arg2) +{ + scf_instance_t *inst; + scf_handle_t *h; + scf_service_t *svc; + ilb_status_t ret; + + ret = ilbd_scf_get_inst(&h, &svc, &inst); + if (ret != ILB_STATUS_OK) + return (ret); + + ret = ilbd_scf_instance_walk_pg(inst, ILBD_SCF_SG, + (ilbd_scf_walker_fn)func, arg1, arg2); + ilbd_scf_destroy(h, svc, inst, NULL); + return (ret); +} + +ilb_status_t +ilbd_walk_hc_pgs(ilb_status_t (*func)(const ilb_hc_info_t *, int, + const struct passwd *, ucred_t *), void *arg1, void *arg2) +{ + scf_instance_t *inst; + scf_handle_t *h; + scf_service_t *svc; + ilb_status_t ret; + + ret = ilbd_scf_get_inst(&h, &svc, &inst); + if (ret != ILB_STATUS_OK) + return (ret); + + ret = ilbd_scf_instance_walk_pg(inst, ILBD_SCF_HC, + (ilbd_scf_walker_fn)func, arg1, arg2); + ilbd_scf_destroy(h, svc, inst, NULL); + return (ret); +} + +ilb_status_t +ilbd_change_prop(ilbd_scf_pg_type_t pg_type, const char *pg_name, + const char *prop_name, void *new_val) +{ + int ret; + scf_propertygroup_t *scfpg = NULL; + int scf_name_len = ILBD_MAX_NAME_LEN; + char scf_pgname[scf_name_len]; + scf_type_t scftype; + scf_value_t *scfval; + scf_handle_t *h; + + ilbd_name_to_scfpgname(pg_type, pg_name, scf_pgname); + ret = ilbd_scf_retrieve_pg(scf_pgname, &scfpg, B_FALSE); + if (ret != ILB_STATUS_EEXIST) + return (ret); + + assert(scfpg != NULL); + + h = scf_pg_handle(scfpg); + if (h == NULL) { + ret = ILB_STATUS_EINVAL; + goto done; + } + + if ((scfval = scf_value_create(h)) == NULL) { + ret = ILB_STATUS_ENOMEM; + goto done; + } + + if (pg_type == ILBD_SCF_RULE) { + scftype = SCF_TYPE_BOOLEAN; + scf_value_set_boolean(scfval, *(boolean_t *)new_val); + } else if (pg_type == ILBD_SCF_SG) { + scftype = SCF_TYPE_ASTRING; + (void) scf_value_set_astring(scfval, (char *)new_val); + } + ret = ilbd_scf_set_prop(scfpg, prop_name, scftype, scfval); + +done: + if (scf_pg_handle(scfpg) != NULL) + scf_handle_destroy(scf_pg_handle(scfpg)); + if (scfpg != NULL) + scf_pg_destroy(scfpg); + if (scfval != NULL) + scf_value_destroy(scfval); + return (ret); +} + +/* + * Update the persistent configuration with a new server, srv, added to a + * server group, sg. + */ +ilb_status_t +ilbd_scf_add_srv(ilbd_sg_t *sg, ilbd_srv_t *srv) +{ + scf_propertygroup_t *pg; + scf_handle_t *h; + scf_value_t *val; + ilb_status_t ret; + int scf_name_len = ILBD_MAX_NAME_LEN; + char buf[scf_name_len]; + char propname[scf_name_len]; + + ilbd_name_to_scfpgname(ILBD_SCF_SG, sg->isg_name, buf); + ret = ilbd_scf_retrieve_pg(buf, &pg, B_FALSE); + /* + * The server group does not exist in persistent storage. This + * cannot happen. Should probably transition the service to + * maintenance since it should be there. + */ + if (ret != ILB_STATUS_EEXIST) { + logerr("ilbd_scf_add_srv: SCF update failed - entering" + " maintenance mode"); + (void) smf_maintain_instance(ILB_FMRI, SMF_IMMEDIATE); + return (ILB_STATUS_INTERNAL); + } + + if ((h = scf_pg_handle(pg)) == NULL) { + ilbd_scf_destroy(NULL, NULL, NULL, pg); + return (ilbd_scf_err_to_ilb_err()); + } + + if ((val = scf_value_create(h)) == NULL) { + ilbd_scf_destroy(h, NULL, NULL, pg); + return (ILB_STATUS_ENOMEM); + } + ilbd_srv_scf_val(srv, buf); + (void) scf_value_set_astring(val, buf); + (void) snprintf(propname, sizeof (propname), "server%d", srv->isv_id); + ret = ilbd_scf_set_prop(pg, propname, SCF_TYPE_ASTRING, val); + + ilbd_scf_destroy(h, NULL, NULL, pg); + scf_value_destroy(val); + + return (ret); +} + +/* + * Delete a server, srv, of a server group, sg, from the persistent + * configuration. + */ +ilb_status_t +ilbd_scf_del_srv(ilbd_sg_t *sg, ilbd_srv_t *srv) +{ + ilb_status_t ret; + scf_propertygroup_t *pg; + scf_handle_t *h; + int scf_name_len = ILBD_MAX_NAME_LEN; + char buf[scf_name_len]; + scf_transaction_t *tx = NULL; + scf_transaction_entry_t *entry = NULL; + + ilbd_name_to_scfpgname(ILBD_SCF_SG, sg->isg_name, buf); + ret = ilbd_scf_retrieve_pg(buf, &pg, B_FALSE); + /* + * The server group does not exist in persistent storage. This + * cannot happen. THe caller of this function puts service in + * maintenance mode. + */ + if (ret != ILB_STATUS_EEXIST) + return (ILB_STATUS_INTERNAL); + ret = ILB_STATUS_OK; + + if ((h = scf_pg_handle(pg)) == NULL) { + logdebug("ilbd_scf_del_srv: scf_pg_handle: %s\n", + scf_strerror(scf_error())); + ilbd_scf_destroy(NULL, NULL, NULL, pg); + return (ilbd_scf_err_to_ilb_err()); + } + + if ((tx = scf_transaction_create(h)) == NULL || + (entry = scf_entry_create(h)) == NULL) { + logdebug("ilbd_scf_del_srv: create scf transaction failed: " + "%s\n", scf_strerror(scf_error())); + ret = ilbd_scf_err_to_ilb_err(); + goto out; + } + + (void) snprintf(buf, sizeof (buf), "server%d", srv->isv_id); + + if (scf_transaction_start(tx, pg) == -1) { + logdebug("ilbd_scf_set_prop: start scf transaction failed: " + "%s\n", scf_strerror(scf_error())); + ret = ilbd_scf_err_to_ilb_err(); + goto out; + } + if (scf_transaction_property_delete(tx, entry, buf) == -1) { + logdebug("ilbd_scf_set_prop: delete property failed: %s\n", + scf_strerror(scf_error())); + ret = ilbd_scf_err_to_ilb_err(); + goto out; + } + if (scf_transaction_commit(tx) != 1) { + logdebug("ilbd_scf_set_prop: commit transaction failed: %s\n", + scf_strerror(scf_error())); + ret = ilbd_scf_err_to_ilb_err(); + } + +out: + if (entry != NULL) + scf_entry_destroy(entry); + if (tx != NULL) + scf_transaction_destroy(tx); + ilbd_scf_destroy(h, NULL, NULL, pg); + + return (ret); +} diff --git a/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_sg.c b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_sg.c new file mode 100644 index 0000000000..c308dc5525 --- /dev/null +++ b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_sg.c @@ -0,0 +1,1644 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <stdlib.h> +#include <strings.h> +#include <stddef.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/list.h> +#include <assert.h> +#include <errno.h> +#include <libilb.h> +#include <net/if.h> +#include <inet/ilb.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include "libilb_impl.h" +#include "ilbd.h" + +typedef enum { + not_searched, + stop_found, + cont_search, + fail_search +} srch_ind_t; + +static list_t ilbd_sg_hlist; + +static ilb_status_t i_delete_srv(ilbd_sg_t *, ilbd_srv_t *, int); +static void i_ilbd_free_srvID(ilbd_sg_t *, int32_t); + +/* Last parameter to pass to i_find_srv(), specifying the matching mode */ +#define MODE_ADDR 1 +#define MODE_SRVID 2 + +static ilbd_srv_t *i_find_srv(list_t *, ilb_sg_srv_t *, int); + +void +i_setup_sg_hlist(void) +{ + list_create(&ilbd_sg_hlist, sizeof (ilbd_sg_t), + offsetof(ilbd_sg_t, isg_link)); +} + +/* + * allocate storage for a daemon-internal server group, init counters + */ +static ilbd_sg_t * +i_ilbd_alloc_sg(char *name) +{ + ilbd_sg_t *d_sg; + + d_sg = calloc(sizeof (*d_sg), 1); + if (d_sg == NULL) + goto out; + + (void) strlcpy(d_sg->isg_name, name, sizeof (d_sg->isg_name)); + + list_create(&d_sg->isg_srvlist, sizeof (ilbd_srv_t), + offsetof(ilbd_srv_t, isv_srv_link)); + list_create(&d_sg->isg_rulelist, sizeof (ilbd_rule_t), + offsetof(ilbd_rule_t, irl_sglink)); + + list_insert_tail(&ilbd_sg_hlist, d_sg); +out: + return (d_sg); +} + +static ilb_status_t +i_ilbd_save_sg(ilbd_sg_t *d_sg, ilbd_scf_cmd_t scf_cmd, const char *prop_name, + char *valstr) +{ + switch (scf_cmd) { + case ILBD_SCF_CREATE: + return (ilbd_create_pg(ILBD_SCF_SG, (void *)d_sg)); + case ILBD_SCF_DESTROY: + return (ilbd_destroy_pg(ILBD_SCF_SG, d_sg->isg_name)); + case ILBD_SCF_ENABLE_DISABLE: + if (prop_name == NULL) + return (ILB_STATUS_EINVAL); + return (ilbd_change_prop(ILBD_SCF_SG, d_sg->isg_name, + prop_name, valstr)); + default: + logdebug("i_ilbd_save_sg: invalid scf cmd %d", scf_cmd); + return (ILB_STATUS_EINVAL); + } +} + +ilb_status_t +i_attach_rule2sg(ilbd_sg_t *sg, ilbd_rule_t *irl) +{ + /* assert: the same rule is attached to any sg only once */ + list_insert_tail(&sg->isg_rulelist, irl); + return (ILB_STATUS_OK); +} + +static void +i_ilbd_free_sg(ilbd_sg_t *sg) +{ + ilbd_srv_t *tmp_srv; + + if (sg == NULL) + return; + list_remove(&ilbd_sg_hlist, sg); + while ((tmp_srv = list_remove_tail(&sg->isg_srvlist)) != NULL) { + i_ilbd_free_srvID(sg, tmp_srv->isv_id); + free(tmp_srv); + sg->isg_srvcount--; + } + free(sg); +} + +ilbd_sg_t * +i_find_sg_byname(const char *name) +{ + ilbd_sg_t *sg; + + /* find position of sg in list */ + for (sg = list_head(&ilbd_sg_hlist); sg != NULL; + sg = list_next(&ilbd_sg_hlist, sg)) { + if (strncmp(sg->isg_name, name, sizeof (sg->isg_name)) == 0) + return (sg); + } + return (sg); +} + +/* + * Generates an audit record for enable-server, disable-server, remove-server + * delete-servergroup, create-servergroup and add-server subcommands. + */ +static void +ilbd_audit_server_event(audit_sg_event_data_t *data, + ilbd_cmd_t cmd, ilb_status_t rc, ucred_t *ucredp) +{ + adt_session_data_t *ah; + adt_event_data_t *event; + au_event_t flag; + int audit_error; + + if ((ucredp == NULL) && ((cmd == ILBD_ADD_SERVER_TO_GROUP) || + (cmd == ILBD_CREATE_SERVERGROUP))) { + /* + * We came here from the path where ilbd is + * incorporating the ILB configuration from + * SCF. In that case, we skip auditing + */ + return; + } + + if (adt_start_session(&ah, NULL, 0) != 0) { + logerr("ilbd_audit_server_event: adt_start_session failed"); + exit(EXIT_FAILURE); + } + + if (adt_set_from_ucred(ah, ucredp, ADT_NEW) != 0) { + (void) adt_end_session(ah); + logerr("ilbd_audit_server_event: adt_set_from_ucred failed"); + exit(EXIT_FAILURE); + } + + if (cmd == ILBD_ENABLE_SERVER) + flag = ADT_ilb_enable_server; + else if (cmd == ILBD_DISABLE_SERVER) + flag = ADT_ilb_disable_server; + else if (cmd == ILBD_REM_SERVER_FROM_GROUP) + flag = ADT_ilb_remove_server; + else if (cmd == ILBD_ADD_SERVER_TO_GROUP) + flag = ADT_ilb_add_server; + else if (cmd == ILBD_CREATE_SERVERGROUP) + flag = ADT_ilb_create_servergroup; + else if (cmd == ILBD_DESTROY_SERVERGROUP) + flag = ADT_ilb_delete_servergroup; + + if ((event = adt_alloc_event(ah, flag)) == NULL) { + logerr("ilbd_audit_server_event: adt_alloc_event failed"); + exit(EXIT_FAILURE); + } + (void) memset((char *)event, 0, sizeof (adt_event_data_t)); + + switch (cmd) { + case ILBD_ENABLE_SERVER: + event->adt_ilb_enable_server.auth_used = + NET_ILB_ENABLE_AUTH; + event->adt_ilb_enable_server.server_id = + data->ed_serverid; + event->adt_ilb_enable_server.server_ipaddress = + data->ed_server_address; + break; + case ILBD_DISABLE_SERVER: + event->adt_ilb_disable_server.auth_used = + NET_ILB_ENABLE_AUTH; + event->adt_ilb_disable_server.server_id = + data->ed_serverid; + event->adt_ilb_disable_server.server_ipaddress = + data->ed_server_address; + break; + case ILBD_REM_SERVER_FROM_GROUP: + event->adt_ilb_remove_server.auth_used = + NET_ILB_CONFIG_AUTH; + event->adt_ilb_remove_server.server_id = + data->ed_serverid; + event->adt_ilb_remove_server.server_group = data->ed_sgroup; + event->adt_ilb_remove_server.server_ipaddress = + data->ed_server_address; + break; + case ILBD_CREATE_SERVERGROUP: + event->adt_ilb_create_servergroup.auth_used = + NET_ILB_CONFIG_AUTH; + event->adt_ilb_create_servergroup.server_group = + data->ed_sgroup; + break; + case ILBD_ADD_SERVER_TO_GROUP: + event->adt_ilb_add_server.auth_used = + NET_ILB_CONFIG_AUTH; + event->adt_ilb_add_server.server_ipaddress = + data->ed_server_address; + event->adt_ilb_add_server.server_id = + data->ed_serverid; + event->adt_ilb_add_server.server_group = + data->ed_sgroup; + event->adt_ilb_add_server.server_minport = + ntohs(data->ed_minport); + event->adt_ilb_add_server.server_maxport = + ntohs(data->ed_maxport); + break; + case ILBD_DESTROY_SERVERGROUP: + event->adt_ilb_delete_servergroup.auth_used = + NET_ILB_CONFIG_AUTH; + event->adt_ilb_delete_servergroup.server_group = + data->ed_sgroup; + break; + } + + /* Fill in success/failure */ + if (rc == ILB_STATUS_OK) { + if (adt_put_event(event, ADT_SUCCESS, ADT_SUCCESS) != 0) { + logerr("ilbd_audit_server_event:" + " adt_put_event failed"); + exit(EXIT_FAILURE); + } + } else { + audit_error = ilberror2auditerror(rc); + if (adt_put_event(event, ADT_FAILURE, audit_error) != 0) { + logerr("ilbd_audit_server_event:" + " adt_put_event failed"); + exit(EXIT_FAILURE); + } + } + adt_free_event(event); + (void) adt_end_session(ah); +} + +ilb_status_t +ilbd_destroy_sg(const char *sg_name, const struct passwd *ps, + ucred_t *ucredp) +{ + ilb_status_t rc; + ilbd_sg_t *tmp_sg; + audit_sg_event_data_t audit_sg_data; + + (void) memset(&audit_sg_data, 0, sizeof (audit_sg_event_data_t)); + audit_sg_data.ed_sgroup = (char *)sg_name; + + rc = ilbd_check_client_config_auth(ps); + if (rc != ILB_STATUS_OK) { + ilbd_audit_server_event(&audit_sg_data, + ILBD_DESTROY_SERVERGROUP, rc, ucredp); + return (rc); + } + + tmp_sg = i_find_sg_byname(sg_name); + if (tmp_sg == NULL) { + logdebug("ilbd_destroy_sg: cannot find specified server" + " group %s", sg_name); + ilbd_audit_server_event(&audit_sg_data, + ILBD_DESTROY_SERVERGROUP, ILB_STATUS_SGUNAVAIL, ucredp); + return (ILB_STATUS_SGUNAVAIL); + } + + /* + * we only destroy SGs that don't have any rules associated with + * them anymore. + */ + if (list_head(&tmp_sg->isg_rulelist) != NULL) { + logdebug("ilbd_destroy_sg: server group %s has rules" + " associated with it and thus cannot be" + " removed", tmp_sg->isg_name); + ilbd_audit_server_event(&audit_sg_data, + ILBD_DESTROY_SERVERGROUP, ILB_STATUS_SGINUSE, ucredp); + return (ILB_STATUS_SGINUSE); + } + + if (ps != NULL) { + rc = i_ilbd_save_sg(tmp_sg, ILBD_SCF_DESTROY, NULL, NULL); + if (rc != ILB_STATUS_OK) { + ilbd_audit_server_event(&audit_sg_data, + ILBD_DESTROY_SERVERGROUP, rc, ucredp); + return (rc); + } + } + i_ilbd_free_sg(tmp_sg); + ilbd_audit_server_event(&audit_sg_data, ILBD_DESTROY_SERVERGROUP, + rc, ucredp); + return (rc); +} + +/* ARGSUSED */ +/* + * Parameter ev_port is not used but has to have for read persistent configure + * ilbd_create_sg(), ilbd_create_hc() and ilbd_create_rule() are callbacks + * for ilbd_scf_instance_walk_pg() which requires the same signature. + */ +ilb_status_t +ilbd_create_sg(ilb_sg_info_t *sg, int ev_port, const struct passwd *ps, + ucred_t *ucredp) +{ + ilb_status_t rc = ILB_STATUS_OK; + ilbd_sg_t *d_sg; + audit_sg_event_data_t audit_sg_data; + + (void) memset(&audit_sg_data, 0, sizeof (audit_sg_event_data_t)); + audit_sg_data.ed_sgroup = sg->sg_name; + + if (ps != NULL) { + rc = ilbd_check_client_config_auth(ps); + if (rc != ILB_STATUS_OK) { + ilbd_audit_server_event(&audit_sg_data, + ILBD_CREATE_SERVERGROUP, rc, ucredp); + return (rc); + } + } + + if (i_find_sg_byname(sg->sg_name) != NULL) { + logdebug("ilbd_create_sg: server group %s already exists", + sg->sg_name); + ilbd_audit_server_event(&audit_sg_data, + ILBD_CREATE_SERVERGROUP, ILB_STATUS_SGEXISTS, ucredp); + return (ILB_STATUS_SGEXISTS); + } + + d_sg = i_ilbd_alloc_sg(sg->sg_name); + if (d_sg == NULL) { + ilbd_audit_server_event(&audit_sg_data, + ILBD_CREATE_SERVERGROUP, ILB_STATUS_ENOMEM, ucredp); + return (ILB_STATUS_ENOMEM); + } + + /* + * we've successfully created the sg in memory. Before we can + * return "success", we need to reflect this in persistent + * storage + */ + if (ps != NULL) { + rc = i_ilbd_save_sg(d_sg, ILBD_SCF_CREATE, NULL, NULL); + if (rc != ILB_STATUS_OK) { + i_ilbd_free_sg(d_sg); + ilbd_audit_server_event(&audit_sg_data, + ILBD_CREATE_SERVERGROUP, rc, ucredp); + return (rc); + } + } + ilbd_audit_server_event(&audit_sg_data, + ILBD_CREATE_SERVERGROUP, rc, ucredp); + return (rc); +} + +/* + * This function checks whether tsrv should/can be inserted before lsrv + * and does so if possible. + * We keep the list in sorted order so we don't have to search it + * in its entirety for overlap every time we insert a new server. + * Return code: + * stop_found: don't continue searching because we found a place + * cont_search: continue with next element in the list + * fail_search: search failed (caller translates to ILB_STATUS_EEXIST) + */ +static srch_ind_t +i_test_and_insert(ilbd_srv_t *tsrv, ilbd_srv_t *lsrv, list_t *srvlist) +{ + struct in6_addr *t1, *l1; + int fnd; + + t1 = &tsrv->isv_addr; + l1 = &lsrv->isv_addr; + + if ((fnd = ilb_cmp_in6_addr(t1, l1, NULL)) == 1) + return (cont_search); /* search can continue */ + + if (fnd == 0) { + logdebug("i_test_and_insert: specified server already exists"); + return (fail_search); + } + /* the list is kept in ascending order */ + list_insert_before(srvlist, lsrv, tsrv); + return (stop_found); +} + + +/* + * copy a server description [ip1,ip2,port1,port2,srvID,flags] + */ +#define COPY_SERVER(src, dest) \ + (dest)->sgs_addr = (src)->sgs_addr; \ + (dest)->sgs_minport = (src)->sgs_minport; \ + (dest)->sgs_maxport = (src)->sgs_maxport; \ + (dest)->sgs_id = (src)->sgs_id; \ + (void) strlcpy((dest)->sgs_srvID, (src)->sgs_srvID, \ + sizeof ((dest)->sgs_srvID)); \ + (dest)->sgs_flags = (src)->sgs_flags + +static ilb_status_t +i_add_srv2sg(ilbd_sg_t *dsg, ilb_sg_srv_t *srv, ilbd_srv_t **ret_srv) +{ + ilb_sg_srv_t *n_sg_srv; + list_t *srvlist; + srch_ind_t search = not_searched; + ilb_status_t rc = ILB_STATUS_OK; + ilbd_srv_t *nsrv, *lsrv; + in_port_t h_minport, h_maxport; + + nsrv = calloc(sizeof (*nsrv), 1); + if (nsrv == NULL) + return (ILB_STATUS_ENOMEM); + n_sg_srv = &nsrv->isv_srv; + COPY_SERVER(srv, n_sg_srv); + + /* + * port info is in network byte order - we need host byte order + * for comparisons purposes + */ + h_minport = ntohs(n_sg_srv->sgs_minport); + h_maxport = ntohs(n_sg_srv->sgs_maxport); + if (h_minport != 0 && h_minport > h_maxport) + n_sg_srv->sgs_maxport = n_sg_srv->sgs_minport; + + srvlist = &dsg->isg_srvlist; + + lsrv = list_head(srvlist); + if (lsrv == NULL) { + list_insert_head(srvlist, nsrv); + } else { + while (lsrv != NULL) { + search = i_test_and_insert(nsrv, lsrv, + srvlist); + + if (search != cont_search) + break; + lsrv = list_next(srvlist, lsrv); + + /* if reaches the end of list, insert to the tail */ + if (search == cont_search && lsrv == NULL) + list_insert_tail(srvlist, nsrv); + } + if (search == fail_search) + rc = ILB_STATUS_EEXIST; + } + + if (rc == ILB_STATUS_OK) { + dsg->isg_srvcount++; + *ret_srv = nsrv; + } else { + free(nsrv); + } + + return (rc); +} + +/* + * Allocate a server ID. The algorithm is simple. Just check the ID array + * of the server group and find an unused ID. If *set_id is given, it + * means that the ID is already allocated and the ID array needs to be + * updated. This is the case when ilbd reads from the persistent + * configuration. + */ +static int32_t +i_ilbd_alloc_srvID(ilbd_sg_t *sg, int32_t *set_id) +{ + int32_t id; + int32_t i; + + /* The server ID is already allocated, just update the ID array. */ + if (set_id != NULL) { + assert(sg->isg_id_arr[*set_id] == 0); + sg->isg_id_arr[*set_id] = 1; + return (*set_id); + } + + /* if we're "full up", give back something invalid */ + if (sg->isg_srvcount == MAX_SRVCOUNT) + return (BAD_SRVID); + + i = sg->isg_max_id; + for (id = 0; id < MAX_SRVCOUNT; id++) { + if (sg->isg_id_arr[(id + i) % MAX_SRVCOUNT] == 0) + break; + } + + sg->isg_max_id = (id + i) % MAX_SRVCOUNT; + sg->isg_id_arr[sg->isg_max_id] = 1; + return (sg->isg_max_id); +} + +/* + * Free a server ID by updating the server group's ID array. + */ +static void +i_ilbd_free_srvID(ilbd_sg_t *sg, int32_t id) +{ + assert(sg->isg_id_arr[id] == 1); + sg->isg_id_arr[id] = 0; +} + +/* + * This function is called by ilbd_add_server_to_group() and + * ilb_remove_server_group() to create a audit record for a + * failed servicing of add-server/remove-server command + */ +static void +fill_audit_record(ilb_sg_info_t *sg, audit_sg_event_data_t *audit_sg_data, + ilbd_cmd_t cmd, ilb_status_t rc, ucred_t *ucredp) +{ + ilb_sg_srv_t *tsrv; + int i; + + for (i = 0; i < sg->sg_srvcount; i++) { + tsrv = &sg->sg_servers[i]; + if (cmd == ILBD_ADD_SERVER_TO_GROUP) { + char addrstr_buf[INET6_ADDRSTRLEN]; + + audit_sg_data->ed_serverid = NULL; + ilbd_addr2str(&tsrv->sgs_addr, addrstr_buf, + sizeof (addrstr_buf)); + audit_sg_data->ed_server_address = addrstr_buf; + audit_sg_data->ed_minport = tsrv->sgs_minport; + audit_sg_data->ed_maxport = tsrv->sgs_maxport; + audit_sg_data->ed_sgroup = sg->sg_name; + } else if (cmd == ILBD_REM_SERVER_FROM_GROUP) { + audit_sg_data->ed_serverid = tsrv->sgs_srvID; + audit_sg_data->ed_sgroup = sg->sg_name; + audit_sg_data->ed_server_address = NULL; + audit_sg_data->ed_minport = 0; + audit_sg_data->ed_maxport = 0; + } + ilbd_audit_server_event(audit_sg_data, cmd, rc, ucredp); + } +} + +/* + * the name(s) of the server(s) are encoded in the sg. + */ +ilb_status_t +ilbd_add_server_to_group(ilb_sg_info_t *sg_info, int ev_port, + const struct passwd *ps, ucred_t *ucredp) +{ + ilb_status_t rc = ILB_STATUS_OK; + ilbd_sg_t *tmp_sg; + int i, j; + int32_t new_id = BAD_SRVID; + int32_t af = AF_UNSPEC; + ilbd_srv_t *nsrv; + ilb_sg_srv_t *srv; + audit_sg_event_data_t audit_sg_data; + char addrstr_buf[INET6_ADDRSTRLEN]; + + if (ps != NULL) { + rc = ilbd_check_client_config_auth(ps); + if (rc != ILB_STATUS_OK) { + fill_audit_record(sg_info, &audit_sg_data, + ILBD_ADD_SERVER_TO_GROUP, rc, ucredp); + return (rc); + } + } + + tmp_sg = i_find_sg_byname(sg_info->sg_name); + if (tmp_sg == NULL) { + logdebug("ilbd_add_server_to_group: server" + " group %s does not exist", sg_info->sg_name); + fill_audit_record(sg_info, &audit_sg_data, + ILBD_ADD_SERVER_TO_GROUP, ILB_STATUS_ENOENT, ucredp); + return (ILB_STATUS_ENOENT); + } + + /* + * we do the dance with address family below to make sure only + * IP addresses in the same AF get into an SG; the first one to get + * in sets the "tone" + * if this is the first server to join a group, check whether + * there's no mismatch with any *rules* already attached + */ + if (tmp_sg->isg_srvcount > 0) { + ilbd_srv_t *tsrv = list_head(&tmp_sg->isg_srvlist); + + af = GET_AF(&tsrv->isv_addr); + } else { + ilbd_rule_t *irl = list_head(&tmp_sg->isg_rulelist); + + if (irl != NULL) + af = GET_AF(&irl->irl_vip); + } + + for (i = 0; i < sg_info->sg_srvcount; i++) { + srv = &sg_info->sg_servers[i]; + + (void) memset(&audit_sg_data, 0, sizeof (audit_sg_data)); + ilbd_addr2str(&srv->sgs_addr, addrstr_buf, + sizeof (addrstr_buf)); + audit_sg_data.ed_server_address = addrstr_buf; + audit_sg_data.ed_minport = srv->sgs_minport; + audit_sg_data.ed_maxport = srv->sgs_maxport; + audit_sg_data.ed_sgroup = sg_info->sg_name; + + /* only test if we have sth to test against */ + if (af != AF_UNSPEC) { + int32_t sgs_af = GET_AF(&srv->sgs_addr); + + if (af != sgs_af) { + logdebug("address family mismatch with previous" + " hosts in servergroup or with rule"); + rc = ILB_STATUS_MISMATCHH; + ilbd_audit_server_event(&audit_sg_data, + ILBD_ADD_SERVER_TO_GROUP, rc, ucredp); + goto rollback; + } + } + + /* + * PS: NULL means daemon is loading configure from scf. + * ServerID is already assigned, just update the ID array. + */ + if (ps != NULL) { + new_id = i_ilbd_alloc_srvID(tmp_sg, NULL); + if (new_id == BAD_SRVID) { + logdebug("ilbd_add_server_to_group: server" + "group %s is full, no more servers" + " can be added", sg_info->sg_name); + rc = ILB_STATUS_SGFULL; + ilbd_audit_server_event(&audit_sg_data, + ILBD_ADD_SERVER_TO_GROUP, rc, ucredp); + goto rollback; + } + srv->sgs_id = new_id; + } else { + new_id = i_ilbd_alloc_srvID(tmp_sg, &srv->sgs_id); + } + + /* + * here we implement the requirement that server IDs start + * with a character that is not legal in hostnames - in our + * case, a "_" (underscore). + */ + (void) snprintf(srv->sgs_srvID, + sizeof (srv->sgs_srvID), "%c%s.%d", ILB_SRVID_PREFIX, + tmp_sg->isg_name, srv->sgs_id); + audit_sg_data.ed_serverid = srv->sgs_srvID; + + /* + * Before we update the kernel rules by adding the server, + * we need to make checks and fail if any of the + * following is true: + * + * o if the server has single port and the servergroup + * is associated to a DSR rule with a port range + * o if the server has a port range and the servergroup + * is associated to a DSR rule with a port range and + * the rule's min and max port does not exactly + * match that of the server's. + * o if the the server has a port range and the servergroup + * is associated to a NAT/Half-NAT rule with a port range + * and the rule's port range size does not match that + * of the server's. + * o if the rule has a fixed hc port, check that this port + * is valid in the server's port specification. + */ + rc = i_check_srv2rules(&tmp_sg->isg_rulelist, srv); + if (rc != ILB_STATUS_OK) { + ilbd_audit_server_event(&audit_sg_data, + ILBD_ADD_SERVER_TO_GROUP, rc, ucredp); + goto rollback; + } + + if ((rc = i_add_srv2sg(tmp_sg, srv, &nsrv)) != ILB_STATUS_OK) { + ilbd_audit_server_event(&audit_sg_data, + ILBD_ADD_SERVER_TO_GROUP, rc, ucredp); + goto rollback; + } + + rc = i_add_srv2krules(&tmp_sg->isg_rulelist, &nsrv->isv_srv, + ev_port); + if (rc != ILB_STATUS_OK) { + ilbd_audit_server_event(&audit_sg_data, + ILBD_ADD_SERVER_TO_GROUP, rc, ucredp); + /* + * The failure may be due to the serverid being on + * hold in kernel for connection draining. But ilbd + * has no way of knowing that. So we are freeing up + * the serverid, and may run into the risk of + * having this failure again, if we choose this + * serverid when processing the next add-server + * command for this servergroup, while connection + * draining is underway. We assume that the user + * will read the man page after he/she encounters + * this failure, and learn to not add any server + * to the servergroup until connection draining of + * all servers in the servergroup is complete. + * XXX Need to revisit this when connection draining + * is reworked + */ + list_remove(&tmp_sg->isg_srvlist, nsrv); + i_ilbd_free_srvID(tmp_sg, nsrv->isv_id); + free(nsrv); + tmp_sg->isg_srvcount--; + goto rollback; + } + if (ps != NULL) { + rc = ilbd_scf_add_srv(tmp_sg, nsrv); + if (rc != ILB_STATUS_OK) { + /* + * The following should not fail since the + * server is just added. Just in case, we + * pass in -1 as the event port to avoid + * roll back in i_rem_srv_frm_krules() called + * by i_delete_srv(). + */ + ilbd_audit_server_event(&audit_sg_data, + ILBD_ADD_SERVER_TO_GROUP, rc, ucredp); + (void) i_delete_srv(tmp_sg, nsrv, -1); + break; + } + } + } + + if (rc == ILB_STATUS_OK) { + ilbd_audit_server_event(&audit_sg_data, + ILBD_ADD_SERVER_TO_GROUP, rc, ucredp); + return (rc); + } + +rollback: + /* + * If ilbd is initializing based on the SCF data and something fails, + * the only choice is to transition the service to maintanence mode... + */ + if (ps == NULL) { + logerr("%s: failure during initialization -" + " entering maintenance mode", __func__); + (void) smf_maintain_instance(ILB_FMRI, SMF_IMMEDIATE); + return (rc); + } + + /* + * we need to roll back all servers previous to the one + * that just caused the failure + */ + for (j = i-1; j >= 0; j--) { + srv = &sg_info->sg_servers[j]; + + /* We should be able to find those servers just added. */ + nsrv = i_find_srv(&tmp_sg->isg_srvlist, srv, MODE_SRVID); + assert(nsrv != NULL); + (void) i_delete_srv(tmp_sg, nsrv, -1); + } + return (rc); +} + +static srch_ind_t +i_match_srvID(ilb_sg_srv_t *sg_srv, ilbd_srv_t *lsrv) +{ + if (strncmp(sg_srv->sgs_srvID, lsrv->isv_srvID, + sizeof (sg_srv->sgs_srvID)) == 0) { + return (stop_found); + } + return (cont_search); +} + +/* + * Sanity check on a rule's port specification against all the servers' + * specification in its associated server group. + * + * 1. If the health check's probe port (hcport) is specified. + * - if server port range is specified, check if hcport is inside + * the range + * - if no server port is specified (meaning the port range is the same as + * the rule's port range), check if hcport is inside the rule's range. + * + * 2. If a server has no port specification, there is no conflict. + * + * 3. If the rule's load balance mode is DSR, a server port specification must + * be exactly the same as the rule's. + * + * 4. In other modes (NAT and half-NAT), the server's port range must be + * the same as the rule's, unless it is doing port collapsing (the server's + * port range is only 1). + */ +ilb_status_t +ilbd_sg_check_rule_port(ilbd_sg_t *sg, ilb_rule_info_t *rl) +{ + ilbd_srv_t *srv; + in_port_t r_minport, r_maxport; + + /* Don't allow adding a rule to a sg with no server, for now... */ + if (sg->isg_srvcount == 0) + return (ILB_STATUS_SGEMPTY); + + r_minport = ntohs(rl->rl_minport); + r_maxport = ntohs(rl->rl_maxport); + + for (srv = list_head(&sg->isg_srvlist); srv != NULL; + srv = list_next(&sg->isg_srvlist, srv)) { + in_port_t srv_minport, srv_maxport; + int range; + + srv_minport = ntohs(srv->isv_minport); + srv_maxport = ntohs(srv->isv_maxport); + range = srv_maxport - srv_minport; + + /* + * If the rule has a specific probe port, check if that port is + * valid in all the servers' port specification. + */ + if (rl->rl_hcpflag == ILB_HCI_PROBE_FIX) { + in_port_t hcport = ntohs(rl->rl_hcport); + + /* No server port specified. */ + if (srv_minport == 0) { + if (hcport > r_maxport || hcport < r_minport) { + return (ILB_STATUS_BADSG); + } + } else { + if (hcport > srv_maxport || + hcport < srv_minport) { + return (ILB_STATUS_BADSG); + } + } + } + + /* + * There is no server port specification, so there cannot be + * any conflict. + */ + if (srv_minport == 0) + continue; + + if (rl->rl_topo == ILB_TOPO_DSR) { + if (r_minport != srv_minport || + r_maxport != srv_maxport) { + return (ILB_STATUS_BADSG); + } + } else { + if ((range != r_maxport - r_minport) && range != 0) + return (ILB_STATUS_BADSG); + } + } + + return (ILB_STATUS_OK); +} + +static srch_ind_t +i_match_srvIP(ilb_sg_srv_t *sg_srv, ilbd_srv_t *lsrv) +{ + if (IN6_ARE_ADDR_EQUAL(&sg_srv->sgs_addr, &lsrv->isv_addr)) + return (stop_found); + return (cont_search); +} + +static ilbd_srv_t * +i_find_srv(list_t *srvlist, ilb_sg_srv_t *sg_srv, int cmpmode) +{ + ilbd_srv_t *tmp_srv; + srch_ind_t srch_res = cont_search; + + for (tmp_srv = list_head(srvlist); tmp_srv != NULL; + tmp_srv = list_next(srvlist, tmp_srv)) { + switch (cmpmode) { + case MODE_ADDR: + srch_res = i_match_srvIP(sg_srv, tmp_srv); + break; + case MODE_SRVID: + srch_res = i_match_srvID(sg_srv, tmp_srv); + break; + } + if (srch_res == stop_found) + break; + } + + if (srch_res == stop_found) + return (tmp_srv); + return (NULL); +} + +static ilb_status_t +i_delete_srv(ilbd_sg_t *sg, ilbd_srv_t *srv, int ev_port) +{ + ilb_status_t rc; + + rc = i_rem_srv_frm_krules(&sg->isg_rulelist, &srv->isv_srv, ev_port); + if (rc != ILB_STATUS_OK) + return (rc); + list_remove(&sg->isg_srvlist, srv); + i_ilbd_free_srvID(sg, srv->isv_id); + free(srv); + sg->isg_srvcount--; + return (ILB_STATUS_OK); +} + +/* + * some people argue that returning anything here is + * useless - what *do* you do if you can't remove/destroy + * something anyway? + */ +ilb_status_t +ilbd_rem_server_from_group(ilb_sg_info_t *sg_info, int ev_port, + const struct passwd *ps, ucred_t *ucredp) +{ + ilb_status_t rc = ILB_STATUS_OK; + ilbd_sg_t *tmp_sg; + ilbd_srv_t *srv, tmp_srv; + ilb_sg_srv_t *tsrv; + audit_sg_event_data_t audit_sg_data; + char addrstr_buf[INET6_ADDRSTRLEN]; + + rc = ilbd_check_client_config_auth(ps); + if (rc != ILB_STATUS_OK) { + fill_audit_record(sg_info, &audit_sg_data, + ILBD_REM_SERVER_FROM_GROUP, rc, ucredp); + return (rc); + } + + tmp_sg = i_find_sg_byname(sg_info->sg_name); + if (tmp_sg == NULL) { + logdebug("%s: server group %s\n does not exist", __func__, + sg_info->sg_name); + fill_audit_record(sg_info, &audit_sg_data, + ILBD_REM_SERVER_FROM_GROUP, ILB_STATUS_SGUNAVAIL, ucredp); + return (ILB_STATUS_SGUNAVAIL); + } + tsrv = &sg_info->sg_servers[0]; + audit_sg_data.ed_serverid = tsrv->sgs_srvID; + audit_sg_data.ed_sgroup = sg_info->sg_name; + audit_sg_data.ed_server_address = NULL; + + assert(sg_info->sg_srvcount == 1); + srv = i_find_srv(&tmp_sg->isg_srvlist, &sg_info->sg_servers[0], + MODE_SRVID); + if (srv == NULL) { + logdebug("%s: cannot find server in server group %s", __func__, + sg_info->sg_name); + ilbd_audit_server_event(&audit_sg_data, + ILBD_REM_SERVER_FROM_GROUP, ILB_STATUS_SRVUNAVAIL, ucredp); + return (ILB_STATUS_SRVUNAVAIL); + } + tsrv = &srv->isv_srv; + ilbd_addr2str(&tsrv->sgs_addr, addrstr_buf, + sizeof (addrstr_buf)); + audit_sg_data.ed_server_address = addrstr_buf; + /* + * i_delete_srv frees srv, therefore we need to save + * this information for ilbd_scf_del_srv + */ + (void) memcpy(&tmp_srv, srv, sizeof (tmp_srv)); + + rc = i_delete_srv(tmp_sg, srv, ev_port); + if (rc != ILB_STATUS_OK) { + ilbd_audit_server_event(&audit_sg_data, + ILBD_REM_SERVER_FROM_GROUP, rc, ucredp); + return (rc); + } + + if (ps != NULL) { + if ((rc = ilbd_scf_del_srv(tmp_sg, &tmp_srv)) != + ILB_STATUS_OK) { + ilbd_audit_server_event(&audit_sg_data, + ILBD_REM_SERVER_FROM_GROUP, rc, ucredp); + logerr("%s: SCF update failed - entering maintenance" + " mode", __func__); + (void) smf_maintain_instance(ILB_FMRI, SMF_IMMEDIATE); + } + } + ilbd_audit_server_event(&audit_sg_data, + ILBD_REM_SERVER_FROM_GROUP, rc, ucredp); + return (rc); +} + +ilb_status_t +ilbd_retrieve_names(ilbd_cmd_t cmd, uint32_t *rbuf, size_t *rbufsz) +{ + ilb_status_t rc = ILB_STATUS_OK; + ilbd_namelist_t *nlist; + size_t tmp_rbufsz; + + tmp_rbufsz = *rbufsz; + /* Set up the reply buffer. rbufsz will be set to the new size. */ + ilbd_reply_ok(rbuf, rbufsz); + + /* Calculate how much space is left for holding name info. */ + *rbufsz += sizeof (ilbd_namelist_t); + tmp_rbufsz -= *rbufsz; + + nlist = (ilbd_namelist_t *)&((ilb_comm_t *)rbuf)->ic_data; + nlist->ilbl_count = 0; + + switch (cmd) { + case ILBD_RETRIEVE_SG_NAMES: { + ilbd_sg_t *sg; + + for (sg = list_head(&ilbd_sg_hlist); + sg != NULL && tmp_rbufsz >= sizeof (ilbd_name_t); + sg = list_next(&ilbd_sg_hlist, sg), + tmp_rbufsz -= sizeof (ilbd_name_t)) { + (void) strlcpy(nlist->ilbl_name[nlist->ilbl_count++], + sg->isg_name, sizeof (ilbd_name_t)); + } + break; + } + case ILBD_RETRIEVE_RULE_NAMES: { + ilbd_rule_t *irl; + extern list_t ilbd_rule_hlist; + + for (irl = list_head(&ilbd_rule_hlist); + irl != NULL && tmp_rbufsz >= sizeof (ilbd_name_t); + irl = list_next(&ilbd_rule_hlist, irl), + tmp_rbufsz -= sizeof (ilbd_name_t)) { + (void) strlcpy(nlist->ilbl_name[nlist->ilbl_count++], + irl->irl_name, sizeof (ilbd_name_t)); + } + break; + } + case ILBD_RETRIEVE_HC_NAMES: { + extern list_t ilbd_hc_list; + ilbd_hc_t *hc; + + for (hc = list_head(&ilbd_hc_list); + hc != NULL && tmp_rbufsz >= sizeof (ilbd_name_t); + hc = list_next(&ilbd_hc_list, hc)) { + (void) strlcpy(nlist->ilbl_name[nlist->ilbl_count++], + hc->ihc_name, sizeof (ilbd_name_t)); + } + break; + } + default: + logdebug("ilbd_retrieve_names: unknown command"); + return (ILB_STATUS_INVAL_CMD); + } + + *rbufsz += nlist->ilbl_count * sizeof (ilbd_name_t); + return (rc); +} + +ilb_status_t +ilbd_retrieve_sg_hosts(const char *sg_name, uint32_t *rbuf, size_t *rbufsz) +{ + ilbd_sg_t *dsg; + ilbd_srv_t *dsrv; + list_t *srvlist; + ilb_sg_info_t *sg_info; + size_t tmp_rbufsz; + + dsg = i_find_sg_byname(sg_name); + if (dsg == NULL) { + logdebug("ilbd_retrieve_sg_hosts: server group" + " %s not found", sg_name); + return (ILB_STATUS_SGUNAVAIL); + } + + srvlist = &dsg->isg_srvlist; + dsrv = list_head(srvlist); + + tmp_rbufsz = *rbufsz; + ilbd_reply_ok(rbuf, rbufsz); + + /* Calculate the size to hold all the hosts info. */ + *rbufsz += sizeof (ilb_sg_info_t); + tmp_rbufsz -= *rbufsz; + + sg_info = (ilb_sg_info_t *)&((ilb_comm_t *)rbuf)->ic_data; + (void) strlcpy(sg_info->sg_name, sg_name, sizeof (sg_info->sg_name)); + sg_info->sg_srvcount = 0; + + while (dsrv != NULL && tmp_rbufsz >= sizeof (ilb_sg_srv_t)) { + sg_info->sg_servers[sg_info->sg_srvcount++] = dsrv->isv_srv; + dsrv = list_next(srvlist, dsrv); + tmp_rbufsz -= sizeof (ilb_sg_srv_t); + } + *rbufsz += sg_info->sg_srvcount * sizeof (ilb_sg_srv_t); + return (ILB_STATUS_OK); +} + +/* + * this mapping function works on the assumption that HC only is + * active when a server is enabled. + */ +static ilb_cmd_t +i_srvcmd_d2k(ilbd_srv_status_ind_t dcmd) +{ + ilb_cmd_t cmd; + + switch (dcmd) { + case stat_enable_server: + case stat_declare_srv_alive: + cmd = ILB_ENABLE_SERVERS; + break; + case stat_disable_server: + case stat_declare_srv_dead: + cmd = ILB_DISABLE_SERVERS; + break; + } + + return (cmd); +} + +ilb_status_t +ilbd_k_Xable_server(const struct in6_addr *addr, const char *rlname, + ilbd_srv_status_ind_t cmd) +{ + ilb_status_t rc; + ilb_servers_cmd_t kcmd; + int e; + + kcmd.cmd = i_srvcmd_d2k(cmd); + (void) strlcpy(kcmd.name, rlname, sizeof (kcmd.name)); + kcmd.num_servers = 1; + + kcmd.servers[0].addr = *addr; + kcmd.servers[0].err = 0; + + rc = do_ioctl(&kcmd, 0); + if (rc != ILB_STATUS_OK) + return (rc); + + if ((e = kcmd.servers[0].err) != 0) { + logdebug("ilbd_k_Xable_server: error %s occurred", + strerror(e)); + return (ilb_map_errno2ilbstat(e)); + } + + return (rc); +} + +#define IS_SRV_ENABLED(s) ILB_IS_SRV_ENABLED((s)->sgs_flags) +#define IS_SRV_DISABLED(s) (!(IS_SRV_ENABLED(s))) + +#define SET_SRV_ENABLED(s) ILB_SET_ENABLED((s)->sgs_flags) +#define SET_SRV_DISABLED(s) ILB_SET_DISABLED((s)->sgs_flags) + +static ilb_status_t +ilbd_Xable_server(ilb_sg_info_t *sg, const struct passwd *ps, + ilbd_srv_status_ind_t cmd, ucred_t *ucredp) +{ + ilb_status_t rc = ILB_STATUS_OK; + ilbd_sg_t *isg; + ilbd_srv_t *tmp_srv; + ilb_sg_srv_t *srv; + ilbd_rule_t *irl; + char *dot; + int scf_name_len = ILBD_MAX_NAME_LEN; + int scf_val_len = ILBD_MAX_VALUE_LEN; + char prop_name[scf_name_len]; + ilb_ip_addr_t ipaddr; + void *addrptr; + char ipstr[INET6_ADDRSTRLEN], valstr[scf_val_len]; + int ipver, vallen; + char sgname[ILB_NAMESZ]; + uint32_t nflags; + ilbd_srv_status_ind_t u_cmd; + audit_sg_event_data_t audit_sg_data; + char addrstr_buf[INET6_ADDRSTRLEN]; + + (void) memset(&audit_sg_data, 0, sizeof (audit_sg_data)); + + /* we currently only implement a "list" of one */ + assert(sg->sg_srvcount == 1); + + srv = &sg->sg_servers[0]; + audit_sg_data.ed_serverid = srv->sgs_srvID; + audit_sg_data.ed_server_address = NULL; + + rc = ilbd_check_client_enable_auth(ps); + if (rc != ILB_STATUS_OK) { + ilbd_audit_server_event(&audit_sg_data, + ILBD_ENABLE_SERVER, rc, ucredp); + return (rc); + } + + if (srv->sgs_srvID[0] != ILB_SRVID_PREFIX) { + switch (cmd) { + case stat_disable_server: + ilbd_audit_server_event(&audit_sg_data, + ILBD_DISABLE_SERVER, + ILB_STATUS_EINVAL, ucredp); + break; + case stat_enable_server: + ilbd_audit_server_event(&audit_sg_data, + ILBD_ENABLE_SERVER, + ILB_STATUS_EINVAL, ucredp); + break; + } + return (ILB_STATUS_EINVAL); + } + + /* + * the following asserts that serverIDs are constructed + * along the pattern "_"<SG name>"."<number> + * so we look for the final "." to recreate the SG name. + */ + (void) strlcpy(sgname, srv->sgs_srvID + 1, sizeof (sgname)); + dot = strrchr(sgname, (int)'.'); + if (dot == NULL) { + switch (cmd) { + case stat_disable_server: + ilbd_audit_server_event(&audit_sg_data, + ILBD_DISABLE_SERVER, + ILB_STATUS_EINVAL, ucredp); + break; + case stat_enable_server: + ilbd_audit_server_event(&audit_sg_data, + ILBD_ENABLE_SERVER, + ILB_STATUS_EINVAL, ucredp); + break; + } + return (ILB_STATUS_EINVAL); + } + + /* make the non-sg_name part "invisible" */ + *dot = '\0'; + isg = i_find_sg_byname(sgname); + if (isg == NULL) { + switch (cmd) { + case stat_disable_server: + ilbd_audit_server_event(&audit_sg_data, + ILBD_DISABLE_SERVER, + ILB_STATUS_ENOENT, ucredp); + break; + case stat_enable_server: + ilbd_audit_server_event(&audit_sg_data, + ILBD_ENABLE_SERVER, + ILB_STATUS_ENOENT, ucredp); + break; + } + return (ILB_STATUS_ENOENT); + } + + tmp_srv = i_find_srv(&isg->isg_srvlist, srv, MODE_SRVID); + if (tmp_srv == NULL) { + switch (cmd) { + case stat_disable_server: + ilbd_audit_server_event(&audit_sg_data, + ILBD_DISABLE_SERVER, + ILB_STATUS_ENOENT, ucredp); + break; + case stat_enable_server: + ilbd_audit_server_event(&audit_sg_data, + ILBD_ENABLE_SERVER, + ILB_STATUS_ENOENT, ucredp); + break; + } + return (ILB_STATUS_ENOENT); + } + + /* + * if server's servergroup is not associated with + * a rule, do not enable it. + */ + irl = list_head(&isg->isg_rulelist); + if (irl == NULL) { + switch (cmd) { + case stat_disable_server: + ilbd_audit_server_event(&audit_sg_data, + ILBD_DISABLE_SERVER, + ILB_STATUS_INVAL_ENBSRVR, ucredp); + break; + case stat_enable_server: + ilbd_audit_server_event(&audit_sg_data, + ILBD_ENABLE_SERVER, + ILB_STATUS_INVAL_ENBSRVR, ucredp); + break; + } + return (ILB_STATUS_INVAL_ENBSRVR); + } + /* Fill in the server IP address for audit record */ + ilbd_addr2str(&tmp_srv->isv_addr, addrstr_buf, + sizeof (addrstr_buf)); + audit_sg_data.ed_server_address = addrstr_buf; + + /* + * We have found the server in memory, perform the following + * tasks. + * + * 1. For every rule associated with this SG, + * - tell the kernel + * - tell the hc + * 2. Update our internal state and persistent configuration + * if the new state is not the same as the old one. + */ + /* 1. */ + for (; irl != NULL; irl = list_next(&isg->isg_rulelist, irl)) { + rc = ilbd_k_Xable_server(&tmp_srv->isv_addr, + irl->irl_name, cmd); + if (rc != ILB_STATUS_OK) { + switch (cmd) { + case stat_disable_server: + ilbd_audit_server_event(&audit_sg_data, + ILBD_DISABLE_SERVER, rc, ucredp); + break; + case stat_enable_server: + ilbd_audit_server_event(&audit_sg_data, + ILBD_ENABLE_SERVER, rc, ucredp); + break; + } + goto rollback_rules; + } + if (!RULE_HAS_HC(irl)) + continue; + + if (cmd == stat_disable_server) { + rc = ilbd_hc_disable_server(irl, + &tmp_srv->isv_srv); + } else { + assert(cmd == stat_enable_server); + rc = ilbd_hc_enable_server(irl, + &tmp_srv->isv_srv); + } + if (rc != ILB_STATUS_OK) { + logdebug("ilbd_Xable_server: cannot toggle srv " + "timer, rc =%d, srv =%s%d\n", rc, + tmp_srv->isv_srvID, + tmp_srv->isv_id); + } + } + + /* 2. */ + if ((cmd == stat_disable_server && + IS_SRV_DISABLED(&tmp_srv->isv_srv)) || + (cmd == stat_enable_server && + IS_SRV_ENABLED(&tmp_srv->isv_srv))) { + switch (cmd) { + case stat_disable_server: + ilbd_audit_server_event(&audit_sg_data, + ILBD_DISABLE_SERVER, ILB_STATUS_OK, ucredp); + break; + case stat_enable_server: + ilbd_audit_server_event(&audit_sg_data, + ILBD_ENABLE_SERVER, ILB_STATUS_OK, ucredp); + break; + } + return (ILB_STATUS_OK); + } + + nflags = tmp_srv->isv_flags; + if (cmd == stat_enable_server) + ILB_SET_ENABLED(nflags); + else + ILB_SET_DISABLED(nflags); + + IP_COPY_IMPL_2_CLI(&tmp_srv->isv_addr, &ipaddr); + ipver = GET_AF(&tmp_srv->isv_addr); + vallen = (ipver == AF_INET) ? INET_ADDRSTRLEN : + INET6_ADDRSTRLEN; + addrptr = (ipver == AF_INET) ? (void *)&ipaddr.ia_v4 : + (void *)&ipaddr.ia_v6; + if (inet_ntop(ipver, addrptr, ipstr, vallen) == NULL) { + logerr("ilbd_Xable_server: failed transfer ip addr to" + " str"); + if (errno == ENOSPC) + rc = ILB_STATUS_ENOMEM; + else + rc = ILB_STATUS_GENERIC; + switch (cmd) { + case stat_disable_server: + ilbd_audit_server_event(&audit_sg_data, + ILBD_DISABLE_SERVER, rc, ucredp); + break; + case stat_enable_server: + ilbd_audit_server_event(&audit_sg_data, + ILBD_ENABLE_SERVER, rc, ucredp); + break; + } + goto rollback_rules; + } + + (void) snprintf(valstr, sizeof (valstr), "%s;%d;%d-%d;%d", + ipstr, ipver, + ntohs(tmp_srv->isv_minport), + ntohs(tmp_srv->isv_maxport), nflags); + (void) snprintf(prop_name, sizeof (prop_name), "server%d", + tmp_srv->isv_id); + + switch (cmd) { + case stat_disable_server: + rc = i_ilbd_save_sg(isg, ILBD_SCF_ENABLE_DISABLE, + prop_name, valstr); + if (rc == ILB_STATUS_OK) + SET_SRV_DISABLED(&tmp_srv->isv_srv); + break; + case stat_enable_server: + rc = i_ilbd_save_sg(isg, ILBD_SCF_ENABLE_DISABLE, + prop_name, valstr); + if (rc == ILB_STATUS_OK) + SET_SRV_ENABLED(&tmp_srv->isv_srv); + break; + } + if (rc == ILB_STATUS_OK) { + switch (cmd) { + case stat_disable_server: + ilbd_audit_server_event(&audit_sg_data, + ILBD_DISABLE_SERVER, ILB_STATUS_OK, ucredp); + break; + case stat_enable_server: + ilbd_audit_server_event(&audit_sg_data, + ILBD_ENABLE_SERVER, ILB_STATUS_OK, ucredp); + break; + } + return (ILB_STATUS_OK); + } + +rollback_rules: + if (cmd == stat_disable_server) + u_cmd = stat_enable_server; + else + u_cmd = stat_disable_server; + + if (irl == NULL) + irl = list_tail(&isg->isg_rulelist); + else + irl = list_prev(&isg->isg_rulelist, irl); + + for (; irl != NULL; irl = list_prev(&isg->isg_rulelist, irl)) { + (void) ilbd_k_Xable_server(&tmp_srv->isv_addr, + irl->irl_name, u_cmd); + if (!RULE_HAS_HC(irl)) + continue; + + if (u_cmd == stat_disable_server) + (void) ilbd_hc_disable_server(irl, &tmp_srv->isv_srv); + else + (void) ilbd_hc_enable_server(irl, &tmp_srv->isv_srv); + } + + return (rc); +} + +ilb_status_t +ilbd_disable_server(ilb_sg_info_t *sg, const struct passwd *ps, + ucred_t *ucredp) +{ + return (ilbd_Xable_server(sg, ps, stat_disable_server, ucredp)); +} + +ilb_status_t +ilbd_enable_server(ilb_sg_info_t *sg, const struct passwd *ps, + ucred_t *ucredp) +{ + return (ilbd_Xable_server(sg, ps, stat_enable_server, ucredp)); +} + +/* + * fill in the srvID for the given IP address in the 0th server + */ +ilb_status_t +ilbd_address_to_srvID(ilb_sg_info_t *sg, uint32_t *rbuf, size_t *rbufsz) +{ + ilbd_srv_t *tmp_srv; + ilb_sg_srv_t *tsrv; + ilbd_sg_t *tmp_sg; + + ilbd_reply_ok(rbuf, rbufsz); + tsrv = (ilb_sg_srv_t *)&((ilb_comm_t *)rbuf)->ic_data; + *rbufsz += sizeof (ilb_sg_srv_t); + + tmp_sg = i_find_sg_byname(sg->sg_name); + if (tmp_sg == NULL) + return (ILB_STATUS_SGUNAVAIL); + tsrv->sgs_addr = sg->sg_servers[0].sgs_addr; + + tmp_srv = i_find_srv(&tmp_sg->isg_srvlist, tsrv, MODE_ADDR); + if (tmp_srv == NULL) + return (ILB_STATUS_ENOENT); + + (void) strlcpy(tsrv->sgs_srvID, tmp_srv->isv_srvID, + sizeof (tsrv->sgs_srvID)); + + return (ILB_STATUS_OK); +} + +/* + * fill in the address for the given serverID in the 0th server + */ +ilb_status_t +ilbd_srvID_to_address(ilb_sg_info_t *sg, uint32_t *rbuf, size_t *rbufsz) +{ + ilbd_srv_t *tmp_srv; + ilb_sg_srv_t *tsrv; + ilbd_sg_t *tmp_sg; + + ilbd_reply_ok(rbuf, rbufsz); + tsrv = (ilb_sg_srv_t *)&((ilb_comm_t *)rbuf)->ic_data; + + tmp_sg = i_find_sg_byname(sg->sg_name); + if (tmp_sg == NULL) + return (ILB_STATUS_SGUNAVAIL); + (void) strlcpy(tsrv->sgs_srvID, sg->sg_servers[0].sgs_srvID, + sizeof (tsrv->sgs_srvID)); + + tmp_srv = i_find_srv(&tmp_sg->isg_srvlist, tsrv, MODE_SRVID); + if (tmp_srv == NULL) + return (ILB_STATUS_ENOENT); + + tsrv->sgs_addr = tmp_srv->isv_addr; + *rbufsz += sizeof (ilb_sg_srv_t); + + return (ILB_STATUS_OK); +} + +void +ilbd_addr2str(struct in6_addr *ipaddr, char *addrstr_buf, size_t sz) +{ + ilb_ip_addr_t ilb_ip; + + IP_COPY_IMPL_2_CLI(ipaddr, &ilb_ip); + addr2str(ilb_ip, addrstr_buf, sz); +} + +/* Convert ip address to a address string */ +void +addr2str(ilb_ip_addr_t ip, char *buf, size_t sz) +{ + + switch (ip.ia_af) { + case AF_INET: + if ((uint32_t *)&(ip).ia_v4 == 0) + buf[0] = '\0'; + else + (void) inet_ntop(AF_INET, (void *)&(ip).ia_v4, buf, sz); + break; + case AF_INET6: + if (IN6_IS_ADDR_UNSPECIFIED(&(ip).ia_v6)) { + buf[0] = '\0'; + break; + } + (void) inet_ntop(ip.ia_af, (void *)&(ip).ia_v6, buf, sz); + break; + default: buf[0] = '\0'; + } +} + +/* + * Map ilb_status errors to similar errno values from errno.h or + * adt_event.h to be used for audit record + */ +int +ilberror2auditerror(ilb_status_t rc) +{ + int audit_error; + + switch (rc) { + case ILB_STATUS_CFGAUTH: + audit_error = ADT_FAIL_VALUE_AUTH; + break; + case ILB_STATUS_ENOMEM: + audit_error = ENOMEM; + break; + case ILB_STATUS_ENOENT: + case ILB_STATUS_ENOHCINFO: + case ILB_STATUS_INVAL_HCTESTTYPE: + case ILB_STATUS_INVAL_CMD: + case ILB_STATUS_DUP_RULE: + case ILB_STATUS_ENORULE: + case ILB_STATUS_SGUNAVAIL: + audit_error = ENOENT; + break; + case ILB_STATUS_EINVAL: + case ILB_STATUS_MISMATCHSG: + case ILB_STATUS_MISMATCHH: + case ILB_STATUS_BADSG: + case ILB_STATUS_INVAL_SRVR: + case ILB_STATUS_INVAL_ENBSRVR: + case ILB_STATUS_BADPORT: + audit_error = EINVAL; + break; + case ILB_STATUS_EEXIST: + case ILB_STATUS_SGEXISTS: + audit_error = EEXIST; + break; + case ILB_STATUS_EWOULDBLOCK: + audit_error = EWOULDBLOCK; + break; + case ILB_STATUS_INPROGRESS: + audit_error = EINPROGRESS; + break; + case ILB_STATUS_INTERNAL: + case ILB_STATUS_CALLBACK: + case ILB_STATUS_PERMIT: + case ILB_STATUS_RULE_NO_HC: + audit_error = ADT_FAIL_VALUE_PROGRAM; + break; + case ILB_STATUS_SOCKET: + audit_error = ENOTSOCK; + break; + case ILB_STATUS_READ: + case ILB_STATUS_WRITE: + audit_error = ENOTCONN; + break; + case ILB_STATUS_SGINUSE: + audit_error = EADDRINUSE; + break; + case ILB_STATUS_SEND: + audit_error = ECOMM; + break; + case ILB_STATUS_SGFULL: + audit_error = EOVERFLOW; + break; + case ILB_STATUS_NAMETOOLONG: + audit_error = ENAMETOOLONG; + break; + case ILB_STATUS_SRVUNAVAIL: + audit_error = EHOSTUNREACH; + break; + default: + audit_error = ADT_FAIL_VALUE_UNKNOWN; + break; + } + return (audit_error); +} diff --git a/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_support.c b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_support.c new file mode 100644 index 0000000000..a5fe6223fd --- /dev/null +++ b/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_support.c @@ -0,0 +1,296 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <unistd.h> +#include <stropts.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/stropts.h> +#include <sys/sockio.h> +#include <errno.h> +#include <sys/list.h> +#include <auth_attr.h> +#include <auth_list.h> +#include <secdb.h> +#include <libilb.h> +#include "libilb_impl.h" +#include "ilbd.h" + +/* + * logs error messages, either to stderr or syslog, depending on + * the -d option + */ +static boolean_t ilbd_debugging = B_FALSE; + +/* Socket to issue ioctl() to the kernel */ +static int ksock = -1; + +void +ilbd_enable_debug(void) +{ + ilbd_debugging = B_TRUE; +} + +boolean_t +is_debugging_on(void) +{ + return (ilbd_debugging); +} + +/* + * All routines log to syslog, unless the daemon is running in + * the foreground, in which case the logging goes to stderr. + * The following logging functions are available: + * + * + * logdebug(): A printf-like function for outputting debug messages + * (messages at LOG_DEBUG) that are only of use to developers. + * + * logerr(): A printf-like function for outputting error messages + * (messages at LOG_ERR) from the daemon. + * + * logperror*(): A set of functions used to output error messages + * (messages at LOG_ERR); these automatically append strerror(errno) + * and a newline to the message passed to them. + * + * NOTE: since the logging functions write to syslog, the messages passed + * to them are not eligible for localization. Thus, gettext() must + * *not* be used. + * + */ +/* PRINTFLIKE2 */ +void +ilbd_log(int pri, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + + if (ilbd_debugging == B_TRUE) { + (void) vfprintf(stderr, fmt, ap); + (void) fprintf(stderr, "\n"); + } else { + vsyslog(pri, fmt, ap); + } + va_end(ap); + +} + +/* PRINTFLIKE1 */ +void +logperror(const char *str) +{ + if (ilbd_debugging == B_TRUE) + (void) fprintf(stderr, "%s: %s\n", str, strerror(errno)); + else + syslog(LOG_ERR, "%s: %m", str); +} + + +ilb_status_t +ilbd_check_client_config_auth(const struct passwd *pwd) +{ + if (chkauthattr(NET_ILB_CONFIG_AUTH, pwd->pw_name) == 0) { + logdebug("user %s is not authorized for" + " configuration operation", pwd->pw_name); + return (ILB_STATUS_CFGAUTH); + } + return (ILB_STATUS_OK); + +} + +ilb_status_t +ilbd_check_client_enable_auth(const struct passwd *pwd) +{ + if (chkauthattr(NET_ILB_ENABLE_AUTH, pwd->pw_name) == 0) { + logdebug("user %s is not authorized for" + " enable/disable operation", pwd->pw_name); + return (ILB_STATUS_CFGAUTH); + } + return (ILB_STATUS_OK); + +} + +/* + * input param. "err" should be one of the errnos defined in + * /usr/include/sys/errno.h + * this list is NOT complete. + */ +ilb_status_t +ilb_map_errno2ilbstat(int err) +{ + ilb_status_t rc = ILB_STATUS_INTERNAL; + + switch (err) { + case 0: + rc = ILB_STATUS_OK; /* for completeness' sake */ + break; + case EINVAL: + rc = ILB_STATUS_EINVAL; + break; + case ENOENT: + rc = ILB_STATUS_ENOENT; + break; + case ENOMEM: + rc = ILB_STATUS_ENOMEM; + break; + case EINPROGRESS: + rc = ILB_STATUS_INPROGRESS; + break; + case EEXIST: + rc = ILB_STATUS_EEXIST; + break; + } + return (rc); +} + +static int +i_get_kcmd_sz(void *cmdp) +{ + int sz; + + switch (((ilb_rule_cmd_t *)cmdp)->cmd) { + case ILB_DESTROY_RULE: + case ILB_ENABLE_RULE: + case ILB_DISABLE_RULE: + sz = sizeof (ilb_name_cmd_t); + break; + case ILB_CREATE_RULE: + case ILB_LIST_RULE: + sz = sizeof (ilb_rule_cmd_t); + break; + case ILB_NUM_RULES: + sz = sizeof (ilb_num_rules_cmd_t); + break; + case ILB_NUM_SERVERS: + sz = sizeof (ilb_num_servers_cmd_t); + break; + case ILB_ADD_SERVERS: { + ilb_servers_info_cmd_t *kcmd = (ilb_servers_info_cmd_t *)cmdp; + + sz = sizeof (*kcmd) + ((kcmd->num_servers - 1) * + sizeof (kcmd->servers)); + break; + } + case ILB_RULE_NAMES: { + ilb_rule_names_cmd_t *kcmd = (ilb_rule_names_cmd_t *)cmdp; + + sz = sizeof (*kcmd) + + ((kcmd->num_names - 1) * sizeof (kcmd->buf)); + break; + } + case ILB_DEL_SERVERS: + case ILB_ENABLE_SERVERS: + case ILB_DISABLE_SERVERS: { + ilb_servers_cmd_t *kcmd = (ilb_servers_cmd_t *)cmdp; + + sz = sizeof (*kcmd) + + ((kcmd->num_servers - 1) * sizeof (kcmd->servers)); + break; + } + default: sz = -1; + break; + } + return (sz); +} + +/* + * parameter 'sz' is optional (indicated by == 0); if it's not set + * we try to derive it from cmdp->cmd + */ +ilb_status_t +do_ioctl(void *cmdp, ssize_t sz) +{ + struct strioctl ioc; + int i_rc; + + if (ksock == -1) { + ksock = socket(AF_INET, SOCK_DGRAM, 0); + if (ksock == -1) { + logperror("do_ioctl: AF_INET socket call" + " failed"); + return (ILB_STATUS_INTERNAL); + } + } + + (void) memset(&ioc, 0, sizeof (ioc)); + ioc.ic_cmd = SIOCILB; + ioc.ic_timout = 0; + ioc.ic_dp = cmdp; + + if (sz == 0) { + sz = i_get_kcmd_sz(cmdp); + + if (sz == -1) { + logdebug("do_ioctl: unknown command"); + return (ILB_STATUS_INVAL_CMD); + } + } + + ioc.ic_len = sz; + + i_rc = ioctl(ksock, I_STR, (caddr_t)&ioc); + if (i_rc == -1) { + logdebug("do_ioctl: SIOCILB ioctl (%d) failed: %s", + *(ilb_cmd_t *)cmdp, strerror(errno)); + return (ilb_map_errno2ilbstat(errno)); + } + + return (ILB_STATUS_OK); +} + +/* + * Create an OK reply to a client request. It is assumed that the passed + * in buffer is large enough to hold the reply. + */ +void +ilbd_reply_ok(uint32_t *rbuf, size_t *rbufsz) +{ + ilb_comm_t *ic = (ilb_comm_t *)rbuf; + + ic->ic_cmd = ILBD_CMD_OK; + /* Default is one exchange of request/response. */ + ic->ic_flags = ILB_COMM_END; + *rbufsz = sizeof (ilb_comm_t); +} + +/* + * Create an error reply to a client request. It is assumed that the passed + * in buffer is large enough to hold the reply. + */ +void +ilbd_reply_err(uint32_t *rbuf, size_t *rbufsz, ilb_status_t status) +{ + ilb_comm_t *ic = (ilb_comm_t *)rbuf; + + ic->ic_cmd = ILBD_CMD_ERROR; + /* Default is one exchange of request/response. */ + ic->ic_flags = ILB_COMM_END; + *(ilb_status_t *)&ic->ic_data = status; + *rbufsz = sizeof (ilb_comm_t) + sizeof (ilb_status_t); +} diff --git a/usr/src/cmd/cmd-inet/usr.sbin/Makefile b/usr/src/cmd/cmd-inet/usr.sbin/Makefile index 386fedfe7f..61a3f6e4ec 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/Makefile +++ b/usr/src/cmd/cmd-inet/usr.sbin/Makefile @@ -65,14 +65,14 @@ K5RSHDOBJS= in.rshd.o K5TELNETOBJS= in.telnetd.o SRCS= $(PROGSRCS) $(OTHERSRC) -SUBDIRS= bootconfchk htable ifconfig in.ftpd in.rdisc in.routed \ +SUBDIRS= bootconfchk htable ifconfig ilbadm in.ftpd in.rdisc in.routed \ in.talkd inetadm inetconv ipmpstat ipqosconf ipsecutils \ kssl/kssladm kssl/ksslcfg ping routeadm snoop sppptun \ traceroute wificonfig -MSGSUBDIRS= bootconfchk htable ifconfig in.ftpd in.routed in.talkd inetadm \ - inetconv ipmpstat ipqosconf ipsecutils kssl/ksslcfg routeadm \ - sppptun snoop wificonfig +MSGSUBDIRS= bootconfchk htable ifconfig ilbadm in.ftpd in.routed in.talkd \ + inetadm inetconv ipmpstat ipqosconf ipsecutils kssl/ksslcfg \ + routeadm sppptun snoop wificonfig # As programs get lint-clean, add them here and to the 'lint' target. # Eventually this hack should go away, and all in PROG should be @@ -84,9 +84,9 @@ LINTCLEAN= 6to4relay arp in.rlogind in.rshd in.telnetd in.tftpd \ # they're all clean, replace the dependency of the lint target # with SUBDIRS. Also (sigh) deal with the commented-out build lines # for the lint rule. -LINTSUBDIRS= bootconfchk in.rdisc in.routed in.talkd inetadm inetconv \ - ipmpstat ipqosconf ipsecutils ping routeadm sppptun traceroute \ - wificonfig +LINTSUBDIRS= bootconfchk ilbadm in.rdisc in.routed in.talkd inetadm \ + inetconv ipmpstat ipqosconf ipsecutils ping routeadm sppptun \ + traceroute wificonfig # And as programs are verified not to attempt to write into constants, # -xstrconst should be used to ensure they stay that way. CONSTCLEAN= diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/Makefile b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/Makefile new file mode 100644 index 0000000000..7330464458 --- /dev/null +++ b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/Makefile @@ -0,0 +1,82 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +PROG= ilbadm + +ILB_OBJS = ilbadm.o ilbadm_sg.o ilbadm_rules.o ilbadm_hc.o +ILB_OBJS += ilbadm_subr.o ilbadm_import.o ilbadm_nat.o ilbadm_stats.o +LIST_OBJS = list.o +OBJS = $(ILB_OBJS) $(LIST_OBJS) + +ILB_SRCS= $(ILB_OBJS:.o=.c) +LIST_SRCS= $(LIST_OBJS:%.o=../../../../uts/common/os/%.c) + +SRCS= $(ILB_SRC) $(LIST_SRCS) + +include ../../../Makefile.cmd +include ../../Makefile.cmd-inet + +LDLIBS += -lsocket -lnsl -lilb -linetutil -lkstat +CPPFLAGS += -I$(SRC)/lib/libilb/common -I$(SRC)/uts/common + +C99MODE = $(C99_ENABLE) + +# for debug: +CFLAGS = -g +STRIP_STABS= : + +POFILES = $(ILB_OBJS:%.o=%.po) +POFILE = $(PROG)_all.po + +.KEEP_STATE: +.PARALLEL: + +all: $(PROG) + +$(PROG): $(OBJS) + $(LINK.c) -o $@ $(OBJS) $(LDLIBS) + $(POST_PROCESS) + +$(POFILE): $(POFILES) + $(RM) $@ + cat $(POFILES) > $@ + +install: all $(ROOTUSRSBINPROG) + +clean: + $(RM) $(OBJS) $(POFILES) + +lint: $(ILB_SRCS) + $(LINT.c) $(ILB_SRCS) $(LDLIBS) + +check: $(ILB_SRCS) $(PROG).h + $(CSTYLE) -pP $(ILB_SRCS) $(PROG).h + $(HDRCHK) $(PROG).h + +include ../../../Makefile.targ + +# the below is needed to get list.o built +%.o: ../../../../uts/common/os/%.c + $(COMPILE.c) -o $@ $< + $(POST_PROCESS_O) diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm.c b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm.c new file mode 100644 index 0000000000..949fcc064c --- /dev/null +++ b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm.c @@ -0,0 +1,249 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <strings.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <fcntl.h> +#include <errno.h> +#include <libgen.h> +#include <libilb.h> +#include "ilbadm.h" + +/* + * Error strings for error values returned by ilbadm functions + */ +const char * +ilbadm_errstr(ilbadm_status_t rc) +{ + switch (rc) { + case ILBADM_OK: + return (gettext("no error")); + case ILBADM_FAIL: + return (gettext("processing of command failed")); + case ILBADM_ENOMEM: + return (gettext("memory allocation failure")); + case ILBADM_EINVAL: + return (gettext("invalid value - refer to ilbadm(1M)")); + case ILBADM_HCPRINT: + return (gettext("failed to print healthcheck values")); + case ILBADM_INVAL_AF: + return (gettext("address family is invalid")); + case ILBADM_INVAL_PORT: + return (gettext("port value is invalid")); + case ILBADM_INVAL_SRVID: + return (gettext("server ID is invalid")); + case ILBADM_INVAL_ADDR: + return (gettext("address is invalid")); + case ILBADM_INVAL_ARGS: + return (gettext("invalid/incompatible keywords - refer to" + " ilbadm(1M)")); + case ILBADM_ENOSGNAME: + return (gettext("servergroup name missing")); + case ILBADM_ENORULE: + return (gettext("rule name missing or specified" + " rule not found")); + case ILBADM_ENOSERVER: + return (gettext("server name missing or specified" + " server not found")); + case ILBADM_INVAL_ALG: + return (gettext("LB algorithm is invalid")); + case ILBADM_ENOPROTO: + return (gettext("protocol does not exist in" + " protocol database")); + case ILBADM_ENOSERVICE: + return (gettext("servicename does not exist in nameservices")); + case ILBADM_INVAL_OPER: + return (gettext("operation type is invalid")); + case ILBADM_INVAL_KEYWORD: + return (gettext("keyword is invalid - please refer" + " to ilbadm(1M)")); + case ILBADM_ASSIGNREQ: + return (gettext("assignment '=' missing")); + case ILBADM_NORECURSIVE: + return (gettext("recursive import not allowed")); + case ILBADM_INVAL_COMMAND: + return (gettext("subcommand is invalid - please refer" + " to ilbadm(1M)")); + case ILBADM_ENOPROXY: + return (gettext("proxy-src is missing")); + case ILBADM_INVAL_PROXY: + return (gettext("proxy-src not allowed")); + case ILBADM_ENOOPTION: + return (gettext("mandatory argument(s) missing - refer" + " to ilbadm(1M)")); + case ILBADM_TOOMANYIPADDR: + return (gettext("address range contains more than 255" + " IP addresses")); + case ILBADM_EXPORTFAIL: + return (gettext("could not export servergroup because" + " of lack of space")); + case ILBADM_INVAL_SYNTAX: + return (gettext("syntax failure - refer to ilbadm(1M)")); + case ILBADM_NOKEYWORD_VAL: + return (gettext("missing value")); + case ILBADM_LIBERR: + return (gettext("library error")); + default: + return (gettext("unknown error")); + + + } +} + +/* PRINTFLIKE1 */ +void +ilbadm_err(const char *format, ...) +{ + /* similar to warn() of dladm.c */ + va_list alist; + + (void) fprintf(stderr, "ilbadm: "); + + va_start(alist, format); + (void) vfprintf(stderr, format, alist); + va_end(alist); + + (void) fprintf(stderr, "\n"); +} + +void +Usage(char *name) +{ + (void) fprintf(stderr, gettext("Usage:\n")); + print_cmdlist_short(basename(name), stderr); + exit(1); +} + +static void +print_version(char *name) +{ + (void) printf("%s %s\n", basename(name), ILBADM_VERSION); + (void) printf(gettext(ILBADM_COPYRIGHT)); + exit(0); +} + +void +unknown_opt(char **argv, int optind) +{ + ilbadm_err(gettext("bad or misplaced option %s"), argv[optind]); + exit(1); +} + +void +incomplete_cmdline(char *name) +{ + ilbadm_err(gettext("the command line is incomplete " + "(more arguments expected)")); + Usage(name); +} + +static void +bad_importfile(char *name, char *filename) +{ + ilbadm_err(gettext("file %s cannot be opened for reading"), filename); + Usage(name); +} + +int +main(int argc, char *argv[]) +{ + ilbadm_status_t rc; + int c; + int fd = -1; + int flags = 0; + + (void) setlocale(LC_ALL, ""); + +#if !defined(TEXT_DOMAIN) +#define TEXT_DOMAIN "SYS_TEST" +#endif + (void) textdomain(TEXT_DOMAIN); + + /* handle global options (-?, -V) first */ + while ((c = getopt(argc, argv, ":V:?")) != -1) { + switch ((char)c) { + case 'V': print_version(argv[0]); + /* not reached */ + break; + case '?': + Usage(argv[0]); + /* not reached */ + break; + default: unknown_opt(argv, optind - 1); + /* not reached */ + break; + } + } + + if (optind >= argc) + incomplete_cmdline(argv[0]); + + /* + * we can import from a given file (argv[2]) or from + * stdin (if no file given) + */ + if (strcasecmp(argv[1], "import-config") == 0 || + strcasecmp(argv[1], "import-cf") == 0) { + int shift = 0; + + if (argc > 2 && strcmp(argv[2], "-p") == 0) { + shift++; + flags |= ILBADM_IMPORT_PRESERVE; + } + + if (argc - shift < 3) + fd = 0; + else + if ((fd = open(argv[2+shift], O_RDONLY)) == -1) + bad_importfile(argv[0], argv[2+shift]); + } + + argv++; + argc--; + + /* + * re-set optind for next callers of getopt() - they all believe they're + * the first. + */ + optind = 1; + optopt = 0; + + rc = ilbadm_import(fd, argc, argv, flags); + + /* + * The error messages have been printed out, using + * ilbadm_errstr() and ilb_errstr(), before we get here. + * So just set the exit value + */ + if (rc != ILBADM_OK) + return (1); + /* success */ + return (0); +} diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm.h b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm.h new file mode 100644 index 0000000000..6168ba0090 --- /dev/null +++ b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm.h @@ -0,0 +1,242 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _ILBADM_H +#define _ILBADM_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdio.h> +#include <sys/list.h> +#include <net/if.h> +#include <stdarg.h> +#include <inttypes.h> +#include <libilb.h> +#include <libintl.h> +#include <locale.h> + +#define ILBADM_VERSION "1.0" +#define ILBADM_COPYRIGHT \ + "Copyright 2009 Sun Microsystems, Inc. All rights reserved.\n" \ + "Use is subject to license terms.\n" + +/* + * flag values + */ +#define OPT_VALUE_LIST 0x0001 +#define OPT_IP_RANGE 0x0002 +#define OPT_PORTS 0x0004 +#define OPT_PORTS_ONLY 0x0008 +#define OPT_NAT 0x0010 +#define OPT_NUMERIC_ONLY 0x0020 + +#define ILBD_BAD_VAL (-1) + +#define ILBADM_LIST_FULL 0x0001 +#define ILBADM_LIST_PARSE 0x0002 +#define ILBADM_LIST_ENABLED 0x0004 +#define ILBADM_LIST_NOENABLED (~ILBADM_LIST_ENABLED) +#define ILBADM_LIST_DISABLED 0x0008 +#define ILBADM_LIST_NODISABLED (~ILBADM_LIST_DISABLED) + +#define ILBADM_IMPORT_PRESERVE 0x1000 + +#define V6_ADDRONLY 0x1 /* don't print surrounding "[]"s */ + +#define ILB_SRVID_SZ (ILB_NAMESZ - 5) +#define ILBD_NAMESZ ILB_NAMESZ + +#define ILB_MAX_PORT UINT16_MAX + +typedef enum { + ILBADM_OK = 0, + ILBADM_ASSIGNREQ, /* assignment '=' required */ + ILBADM_EINVAL, /* invalid value */ + ILBADM_ENOMEM, /* malloc failed */ + ILBADM_ENOOPTION, /* mandatory option missing */ + ILBADM_ENOPROTO, /* protocol not found in database */ + ILBADM_ENOPROXY, /* proxy-src is missing */ + ILBADM_ENOSERVICE, /* servicename not found in database */ + ILBADM_ENOSGNAME, /* servergroup name missing */ + ILBADM_ENORULE, /* rulename missing or no such rule */ + ILBADM_ENOSERVER, /* rulename missing or no such rule */ + ILBADM_EXPORTFAIL, /* too little space to do export servergroup */ + ILBADM_FAIL, /* processing of command failed */ + ILBADM_HCPRINT, /* failed to print healthcheck */ + ILBADM_INVAL_ADDR, /* invalid address */ + ILBADM_INVAL_AF, /* invalid address family */ + ILBADM_INVAL_ALG, /* LB algorithm failure */ + ILBADM_INVAL_ARGS, /* invalid arguments to command */ + ILBADM_INVAL_COMMAND, /* invalid command */ + ILBADM_INVAL_KEYWORD, /* invalid keyword */ + ILBADM_INVAL_OPER, /* invalid operation type */ + ILBADM_INVAL_PORT, /* invalid value specified for port */ + ILBADM_INVAL_PROXY, /* proxy-src not allowed */ + ILBADM_INVAL_SYNTAX, /* syntax error */ + ILBADM_INVAL_SRVID, /* server id is invalid (missing "_" ?) */ + ILBADM_LIBERR, /* translation of libilb errors. We also */ + /* set it in ilbadm fuctions to indicate */ + /* printing of non-generic error messages */ + ILBADM_NORECURSIVE, /* recursive import not allowed */ + ILBADM_TOOMANYIPADDR, /* too many addresses */ + ILBADM_NOKEYWORD_VAL /* no value specified for a keyword */ +} ilbadm_status_t; + + +typedef enum { + ILB_KEY_BAD = -1, + ILB_KEY_SERVER, + ILB_KEY_SERVRANGE, /* pseudo-key for SG creation */ + ILB_KEY_SERVERID, + ILB_KEY_VIP, + ILB_KEY_PORT, + ILB_KEY_PROTOCOL, + ILB_KEY_IPVERSION, + ILB_KEY_ALGORITHM, + ILB_KEY_TYPE, + ILB_KEY_SERVERGROUP, + ILB_KEY_HEALTHCHECK, + ILB_KEY_HCPORT, + ILB_KEY_SRC, + ILB_KEY_STICKY, + ILB_KEY_CONNDRAIN, /* otional timers ... */ + ILB_KEY_NAT_TO, + ILB_KEY_STICKY_TO, + ILB_KEY_HC_TEST, + ILB_KEY_HC_COUNT, + ILB_KEY_HC_INTERVAL, + ILB_KEY_HC_TIMEOUT +} ilbadm_key_code_t; + +/* + * we need a few codes for commands, can't use libilb ones + */ +typedef enum { + cmd_create_sg, + cmd_add_srv, + cmd_rem_srv, + cmd_enable_rule, + cmd_disable_rule, + cmd_enable_server, + cmd_disable_server +} ilbadm_cmd_t; + +/* filched from snoop_ether.c */ +typedef struct val_type { + int v_type; + char v_name[20]; + char v_alias[8]; /* undocumented */ +} ilbadm_val_type_t; + +typedef struct key_names { + ilbadm_key_code_t k_key; + char k_name[20]; + char k_alias[12]; /* undocumented */ +} ilbadm_key_name_t; + +typedef struct servnode { + list_node_t s_link; + ilb_server_data_t s_spec; +} ilbadm_servnode_t; + +typedef struct sgroup { + list_t sg_serv_list; /* list of servnode_t elements */ + int sg_count; + char *sg_name; +} ilbadm_sgroup_t; + +typedef struct cmd_hlp { + char *h_help; +} ilbadm_cmd_help_t; + +typedef ilbadm_status_t (* cmdfunc_t)(int, char **); + +typedef struct cmd_names { + char c_name[25]; + char c_alias[20]; /* undocumented */ + cmdfunc_t c_action; + ilbadm_cmd_help_t *c_help; /* for "usage" */ +} ilbadm_cmd_desc_t; + +ilbadm_status_t ilbadm_add_server_to_group(int, char **); +ilbadm_status_t ilbadm_create_servergroup(int, char **); +ilbadm_status_t ilbadm_destroy_servergroup(int, char **); +ilbadm_status_t ilbadm_rem_server_from_group(int, char **); + +ilbadm_status_t ilbadm_create_rule(int, char **); +ilbadm_status_t ilbadm_destroy_rule(int, char **); +ilbadm_status_t ilbadm_enable_rule(int, char **); +ilbadm_status_t ilbadm_disable_rule(int, char **); +ilbadm_status_t ilbadm_show_server(int, char **); +ilbadm_status_t ilbadm_enable_server(int, char **); +ilbadm_status_t ilbadm_disable_server(int, char **); + +ilbadm_status_t ilbadm_show_servergroups(int, char **); +ilbadm_status_t ilbadm_show_rules(int, char **); +ilbadm_status_t ilbadm_show_stats(int, char **); + +ilbadm_status_t ilbadm_create_hc(int, char **); +ilbadm_status_t ilbadm_destroy_hc(int, char **); +ilbadm_status_t ilbadm_show_hc(int, char **); +ilbadm_status_t ilbadm_show_hc_result(int, char **); + +ilbadm_status_t ilbadm_noimport(int, char **); + +ilbadm_status_t ilbadm_show_nat(int, char **); +ilbadm_status_t ilbadm_show_persist(int, char **); + +ilbadm_status_t i_parse_optstring(char *, void *, ilbadm_key_name_t *, + int, int *); +ilbadm_servnode_t *i_new_sg_elem(ilbadm_sgroup_t *); +ilbadm_status_t ilbadm_import(int, int, char *[], int); +ilbadm_status_t ilbadm_export(int, char *[]); +ilbadm_status_t ilbadm_export_servergroups(ilb_handle_t h, FILE *); +ilbadm_status_t ilbadm_export_hc(ilb_handle_t h, FILE *); +ilbadm_status_t ilbadm_export_rules(ilb_handle_t h, FILE *); + +ilbadm_status_t i_check_rule_spec(ilb_rule_data_t *); +ilbadm_status_t ilbadm_set_netmask(char *, ilb_ip_addr_t *, int); +int ilbadm_mask_to_prefixlen(ilb_ip_addr_t *); + +void print_cmdlist_short(char *, FILE *); +extern int ilb_cmp_ipaddr(ilb_ip_addr_t *, ilb_ip_addr_t *, + longlong_t *); + +void ip2str(ilb_ip_addr_t *, char *, size_t, int); +char *i_str_from_val(int, ilbadm_val_type_t *); +char *ilbadm_key_to_opt(ilbadm_key_code_t); + +void Usage(char *); +void unknown_opt(char **, int); +const char *ilbadm_errstr(ilbadm_status_t); +void ilbadm_err(const char *format, ...); + +#ifdef __cplusplus +} +#endif + +#endif /* _ILBADM_H */ diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_hc.c b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_hc.c new file mode 100644 index 0000000000..018470e4d0 --- /dev/null +++ b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_hc.c @@ -0,0 +1,520 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/list.h> +#include <netinet/in.h> +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <strings.h> +#include <errno.h> +#include <ofmt.h> +#include <libilb.h> +#include "ilbadm.h" + +extern int optind, optopt, opterr; +extern char *optarg; + +typedef struct hc_export_arg { + FILE *fp; +} hc_export_arg_t; + +/* Maximum columns for printing hc output. */ +#define SHOW_HC_COLS 80 + +/* OFMT call back to print out a hc server result field. */ +static boolean_t print_hc_result(ofmt_arg_t *, char *, uint_t); + +/* ID to indicate which field to be printed. */ +enum hc_print_id { + hc_of_rname, hc_of_hname, hc_of_sname, hc_of_status, hc_of_fail_cnt, + hc_of_lasttime, hc_of_nexttime, hc_of_rtt, + hc_of_name, hc_of_timeout, hc_of_count, hc_of_interval, hc_of_def_ping, + hc_of_test +}; + +/* + * Fields of a hc server result. The sum of all fields' width is SHOW_HC_COLS. + */ +static ofmt_field_t hc_results[] = { + {"RULENAME", 14, hc_of_rname, print_hc_result}, + {"HCNAME", 14, hc_of_hname, print_hc_result}, + {"SERVERID", 14, hc_of_sname, print_hc_result}, + {"STATUS", 9, hc_of_status, print_hc_result}, + {"FAIL", 5, hc_of_fail_cnt, print_hc_result}, + {"LAST", 9, hc_of_lasttime, print_hc_result}, + {"NEXT", 9, hc_of_nexttime, print_hc_result}, + {"RTT", 6, hc_of_rtt, print_hc_result}, + {NULL, 0, 0, NULL} +}; + +/* OFMT call back to print out a hc info field. */ +static boolean_t print_hc(ofmt_arg_t *, char *, uint_t); + +/* + * Fields of a hc info. The sume of all fields' width is SHOW_HC_COLS. + */ +static ofmt_field_t hc_fields[] = { + {"HCNAME", 14, hc_of_name, print_hc}, + {"TIMEOUT", 8, hc_of_timeout, print_hc}, + {"COUNT", 8, hc_of_count, print_hc}, + {"INTERVAL", 9, hc_of_interval, print_hc}, + {"DEF_PING", 9, hc_of_def_ping, print_hc}, + {"TEST", 32, hc_of_test, print_hc}, + {NULL, 0, 0, NULL} +}; + +static boolean_t +print_hc(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + enum hc_print_id id = of_arg->ofmt_id; + ilb_hc_info_t *info = (ilb_hc_info_t *)of_arg->ofmt_cbarg; + + switch (id) { + case hc_of_name: + (void) strlcpy(buf, info->hci_name, bufsize); + break; + case hc_of_timeout: + (void) snprintf(buf, bufsize, "%d", info->hci_timeout); + break; + case hc_of_count: + (void) snprintf(buf, bufsize, "%d", info->hci_count); + break; + case hc_of_interval: + (void) snprintf(buf, bufsize, "%d", info->hci_interval); + break; + case hc_of_def_ping: + (void) snprintf(buf, bufsize, "%c", + info->hci_def_ping ? 'Y' : 'N'); + break; + case hc_of_test: + (void) snprintf(buf, bufsize, "%s", info->hci_test); + break; + } + return (B_TRUE); +} + +/* Call back to ilb_walk_hc(). */ +/* ARGSUSED */ +static ilb_status_t +ilbadm_print_hc(ilb_handle_t h, ilb_hc_info_t *hc_info, void *arg) +{ + ofmt_handle_t ofmt_h = arg; + + ofmt_print(ofmt_h, hc_info); + return (ILB_STATUS_OK); +} + +/* + * Print out health check objects given their name. + * Or print out all health check objects if no name given. + */ +/* ARGSUSED */ +ilbadm_status_t +ilbadm_show_hc(int argc, char *argv[]) +{ + ilb_handle_t h = ILB_INVALID_HANDLE; + ilb_status_t rclib; + ofmt_handle_t ofmt_h; + ofmt_status_t ofmt_ret; + + if ((ofmt_ret = ofmt_open("all", hc_fields, 0, SHOW_HC_COLS, + &ofmt_h)) != OFMT_SUCCESS) { + char err_buf[SHOW_HC_COLS]; + + ilbadm_err(gettext("ofmt_open failed: %s"), + ofmt_strerror(ofmt_h, ofmt_ret, err_buf, SHOW_HC_COLS)); + return (ILBADM_LIBERR); + } + rclib = ilb_open(&h); + if (rclib != ILB_STATUS_OK) + goto out; + + if (argc == 1) { + rclib = ilb_walk_hc(h, ilbadm_print_hc, ofmt_h); + } else { + ilb_hc_info_t hc_info; + int i; + + for (i = 1; i < argc; i++) { + rclib = ilb_get_hc_info(h, argv[i], &hc_info); + if (rclib == ILB_STATUS_OK) + ofmt_print(ofmt_h, &hc_info); + else + break; + } + } +out: + ofmt_close(ofmt_h); + + if (h != ILB_INVALID_HANDLE) + (void) ilb_close(h); + + if (rclib != ILB_STATUS_OK) { + ilbadm_err(ilb_errstr(rclib)); + return (ILBADM_LIBERR); + } + + return (ILBADM_OK); +} + +static boolean_t +print_hc_result(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + enum hc_print_id id = of_arg->ofmt_id; + ilb_hc_srv_t *srv = (ilb_hc_srv_t *)of_arg->ofmt_cbarg; + struct tm tv; + + switch (id) { + case hc_of_rname: + (void) strlcpy(buf, srv->hcs_rule_name, bufsize); + break; + case hc_of_hname: + (void) strlcpy(buf, srv->hcs_hc_name, bufsize); + break; + case hc_of_sname: + (void) strlcpy(buf, srv->hcs_ID, bufsize); + break; + case hc_of_status: + switch (srv->hcs_status) { + case ILB_HCS_UNINIT: + (void) strlcpy(buf, "un-init", bufsize); + break; + case ILB_HCS_UNREACH: + (void) strlcpy(buf, "unreach", bufsize); + break; + case ILB_HCS_ALIVE: + (void) strlcpy(buf, "alive", bufsize); + break; + case ILB_HCS_DEAD: + (void) strlcpy(buf, "dead", bufsize); + break; + case ILB_HCS_DISABLED: + (void) strlcpy(buf, "disabled", bufsize); + break; + } + break; + case hc_of_fail_cnt: + (void) snprintf(buf, bufsize, "%u", srv->hcs_fail_cnt); + break; + case hc_of_lasttime: + if (localtime_r(&srv->hcs_lasttime, &tv) == NULL) + return (B_FALSE); + (void) snprintf(buf, bufsize, "%02d:%02d:%02d", tv.tm_hour, + tv.tm_min, tv.tm_sec); + break; + case hc_of_nexttime: + if (srv->hcs_status == ILB_HCS_DISABLED) + break; + if (localtime_r(&srv->hcs_nexttime, &tv) == NULL) + return (B_FALSE); + (void) snprintf(buf, bufsize, "%02d:%02d:%02d", tv.tm_hour, + tv.tm_min, tv.tm_sec); + break; + case hc_of_rtt: + (void) snprintf(buf, bufsize, "%u", srv->hcs_rtt); + break; + } + return (B_TRUE); +} + +/* Call back to ilbd_walk_hc_srvs(). */ +/* ARGSUSED */ +static ilb_status_t +ilbadm_print_hc_result(ilb_handle_t h, ilb_hc_srv_t *srv, void *arg) +{ + ofmt_handle_t ofmt_h = arg; + + ofmt_print(ofmt_h, srv); + return (ILB_STATUS_OK); +} + +/* + * Output hc result of a specified rule or all rules. + */ +ilbadm_status_t +ilbadm_show_hc_result(int argc, char *argv[]) +{ + ilb_handle_t h = ILB_INVALID_HANDLE; + ilb_status_t rclib = ILB_STATUS_OK; + int i; + ofmt_handle_t ofmt_h; + ofmt_status_t ofmt_ret; + + /* ilbadm show-hc-result [rule-name] */ + if (argc < 1) { + ilbadm_err(gettext("usage: ilbadm show-hc-result" + " [rule-name]")); + return (ILBADM_LIBERR); + } + + if ((ofmt_ret = ofmt_open("all", hc_results, 0, SHOW_HC_COLS, + &ofmt_h)) != OFMT_SUCCESS) { + char err_buf[SHOW_HC_COLS]; + + ilbadm_err(gettext("ofmt_open failed: %s"), + ofmt_strerror(ofmt_h, ofmt_ret, err_buf, SHOW_HC_COLS)); + return (ILBADM_LIBERR); + } + + rclib = ilb_open(&h); + if (rclib != ILB_STATUS_OK) + goto out; + + /* If no rule name is given, show results for all rules. */ + if (argc == 1) { + rclib = ilb_walk_hc_srvs(h, ilbadm_print_hc_result, NULL, + ofmt_h); + } else { + for (i = 1; i < argc; i++) { + rclib = ilb_walk_hc_srvs(h, ilbadm_print_hc_result, + argv[i], ofmt_h); + if (rclib != ILB_STATUS_OK) + break; + } + } +out: + ofmt_close(ofmt_h); + + if (h != ILB_INVALID_HANDLE) + (void) ilb_close(h); + + if (rclib != ILB_STATUS_OK) { + ilbadm_err(ilb_errstr(rclib)); + return (ILBADM_LIBERR); + } + return (ILBADM_OK); +} + +#define ILBADM_DEF_HC_COUNT 3 +#define ILBADM_DEF_HC_INTERVAL 30 /* in sec */ +#define ILBADM_DEF_HC_TIMEOUT 5 /* in sec */ + +static ilbadm_key_name_t hc_parse_keys[] = { + {ILB_KEY_HC_TEST, "hc-test", "hc-test"}, + {ILB_KEY_HC_COUNT, "hc-count", "hc-count"}, + {ILB_KEY_HC_TIMEOUT, "hc-timeout", "hc-tout"}, + {ILB_KEY_HC_INTERVAL, "hc-interval", "hc-intl"}, + {ILB_KEY_BAD, "", ""} +}; + +static ilbadm_status_t +ilbadm_hc_parse_arg(char *arg, ilb_hc_info_t *hc) +{ + ilbadm_status_t ret; + + /* set default value for count, interval, timeout */ + hc->hci_count = ILBADM_DEF_HC_COUNT; + hc->hci_interval = ILBADM_DEF_HC_INTERVAL; + hc->hci_timeout = ILBADM_DEF_HC_TIMEOUT; + hc->hci_test[0] = '\0'; + + ret = i_parse_optstring(arg, hc, hc_parse_keys, 0, NULL); + if (ret != ILBADM_OK && ret != ILBADM_LIBERR) { + ilbadm_err(ilbadm_errstr(ret)); + return (ILBADM_LIBERR); + } + if (hc->hci_test[0] == '\0' && ret != ILBADM_LIBERR) { + ilbadm_err("hc-test: missing"); + return (ILBADM_LIBERR); + } + return (ret); +} + +/* ARGSUSED */ +ilbadm_status_t +ilbadm_create_hc(int argc, char *argv[]) +{ + ilb_handle_t h = ILB_INVALID_HANDLE; + ilb_hc_info_t hc_info; + ilbadm_status_t ret = ILBADM_OK; + ilb_status_t rclib; + char c; + + + hc_info.hci_def_ping = B_TRUE; + while ((c = getopt(argc, argv, ":h:n")) != -1) { + if (c == 'h') { + ret = ilbadm_hc_parse_arg(optarg, &hc_info); + if (ret != ILBADM_OK) + return (ret); + } else if (c == 'n') { + hc_info.hci_def_ping = B_FALSE; + } else { + ilbadm_err(gettext("bad argument %c"), c); + return (ILBADM_LIBERR); + } + } + + if (optind >= argc) { + ilbadm_err(gettext("usage: ilbadm" + " create-healthcheck [-n] -h" + " hc-test=val[,hc-timeout=val][,hc-count=va]" + "[,hc-interval=val] hc-name")); + return (ILBADM_FAIL); + } + + if (strlen(argv[optind]) > ILBD_NAMESZ - 1) { + ilbadm_err(gettext("health check object name %s is too long - " + "must not exceed %d chars"), argv[optind], + ILBD_NAMESZ - 1); + return (ILBADM_FAIL); + } + + if (((strcasecmp(hc_info.hci_test, ILB_HC_STR_UDP) == 0) || + (strcasecmp(hc_info.hci_test, ILB_HC_STR_PING) == 0)) && + !(hc_info.hci_def_ping)) { + ilbadm_err(gettext("cannot disable default PING" + " for this test")); + return (ILBADM_LIBERR); + } + + rclib = ilb_open(&h); + if (rclib != ILB_STATUS_OK) + goto out; + + (void) strlcpy(hc_info.hci_name, argv[optind], + sizeof (hc_info.hci_name)); + rclib = ilb_create_hc(h, &hc_info); +out: + if (h != ILB_INVALID_HANDLE) + (void) ilb_close(h); + + if (rclib != ILB_STATUS_OK) { + ilbadm_err(ilb_errstr(rclib)); + ret = ILBADM_LIBERR; + } + return (ret); +} + +ilbadm_status_t +ilbadm_destroy_hc(int argc, char *argv[]) +{ + ilb_handle_t h = ILB_INVALID_HANDLE; + ilb_status_t rclib; + ilbadm_status_t ret = ILBADM_OK; + int i; + + if (argc < 2) { + ilbadm_err(gettext("usage: ilbadm" + " delete-healthcheck hc-name ...")); + return (ILBADM_LIBERR); + } + + rclib = ilb_open(&h); + if (rclib != ILB_STATUS_OK) + goto out; + + for (i = 1; i < argc; i++) { + rclib = ilb_destroy_hc(h, argv[i]); + if (rclib != ILB_STATUS_OK) + break; + } +out: + if (h != ILB_INVALID_HANDLE) + (void) ilb_close(h); + + if (rclib != ILB_STATUS_OK) { + ilbadm_err(ilb_errstr(rclib)); + ret = ILBADM_LIBERR; + } + return (ret); +} + +/* + * Since this function is used by libilb function, it + * must return libilb errors + */ +/* ARGSUSED */ +ilb_status_t +ilbadm_export_hcinfo(ilb_handle_t h, ilb_hc_info_t *hc_info, void *arg) +{ + FILE *fp = ((hc_export_arg_t *)arg)->fp; + int count = 0; + int ret; + + /* + * a test name "PING" implies "no default ping", so we only + * print -n if the test is NOT "PING" + */ + if (hc_info->hci_def_ping == B_FALSE && + strncasecmp(hc_info->hci_test, "PING", 5) != 0) + (void) fprintf(fp, "create-healthcheck -n -h "); + else + (void) fprintf(fp, "create-healthcheck -h "); + + if (*hc_info->hci_test != '\0') { + (void) fprintf(fp, "hc-test=%s", hc_info->hci_test); + count++; + } + if (hc_info->hci_timeout != 0) { + if (count++ > 0) + (void) fprintf(fp, ","); + (void) fprintf(fp, "hc-timeout=%d", hc_info->hci_timeout); + } + if (hc_info->hci_count != 0) { + if (count++ > 0) + (void) fprintf(fp, ","); + (void) fprintf(fp, "hc-count=%d", hc_info->hci_count); + } + if (hc_info->hci_interval != 0) { + if (count > 0) + (void) fprintf(fp, ","); + (void) fprintf(fp, "hc-interval=%d", hc_info->hci_interval); + } + + /* + * if any of the above writes fails, then, we assume, so will + * this one; so it's sufficient to test once + */ + ret = fprintf(fp, " %s\n", hc_info->hci_name); + if (ret < 0) + goto out_fail; + ret = fflush(fp); + +out_fail: + if (ret < 0) + return (ILB_STATUS_WRITE); + return (ILB_STATUS_OK); +} + +ilbadm_status_t +ilbadm_export_hc(ilb_handle_t h, FILE *fp) +{ + ilb_status_t rclib; + ilbadm_status_t ret = ILBADM_OK; + hc_export_arg_t arg; + + arg.fp = fp; + rclib = ilb_walk_hc(h, ilbadm_export_hcinfo, (void *)&arg); + if (rclib != ILB_STATUS_OK) { + ilbadm_err(ilb_errstr(rclib)); + ret = ILBADM_LIBERR; + } + return (ret); +} diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_import.c b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_import.c new file mode 100644 index 0000000000..22e7b6a13b --- /dev/null +++ b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_import.c @@ -0,0 +1,441 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <strings.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <libilb.h> +#include "ilbadm.h" + +static ilbadm_cmd_help_t create_sg_help = { +"[-s server=hostspec[:portspec...]] groupname" +}; + +static ilbadm_cmd_help_t create_rule_help = { +"[-e] [-p] -i vip=value,port=value[,protocol=value] \n" \ +" -m lbalg=value,type=value[,proxy-src=ip-range][,pmask=mask] \n"\ +" -h hc-name=value[,hc-port=value]] \n" \ +" [-t [conn-drain=N][,nat-timeout=N][,persist-timeout=N]] \n" \ +" -o servergroup=value name" +}; + +static ilbadm_cmd_help_t destroy_rule_help = { +"-a | name ..." +}; + +static ilbadm_cmd_help_t add_server_help = { +"-s server=value[,value ...] servergroup" +}; + +static ilbadm_cmd_help_t remove_server_help = { +"-s server=value[,value ...] servergroup" +}; + + +static ilbadm_cmd_help_t disable_server_help = { +"server ... " +}; + +static ilbadm_cmd_help_t enable_server_help = { +"server ..." +}; + +static ilbadm_cmd_help_t enable_rule_help = { +"[name ... ]" +}; + +static ilbadm_cmd_help_t disable_rule_help = { +"[name ... ]" +}; + +static ilbadm_cmd_help_t show_server_help = { +"[[-p] -o field[,field...]] [rulename ... ]" +}; + +static ilbadm_cmd_help_t showstats_help = { +"[-p] -o field[,...]] [-tdAvi]\n" \ +" [-r rulename|-s servername] [interval [count]]" +}; + +static ilbadm_cmd_help_t show_nat_help = { +"[count]" +}; + +static ilbadm_cmd_help_t show_persist_help = { +"[count]" +}; + +static ilbadm_cmd_help_t show_hc_help = { +"[hc-name]" +}; + +static ilbadm_cmd_help_t create_hc_help = { +"[-n] -h hc-test=value[,hc-timeout=value]\n" \ +" [,hc-count=value][,hc-interval=value] hcname" +}; + +static ilbadm_cmd_help_t destroy_hc_help = { +"name ..." +}; + +static ilbadm_cmd_help_t show_hc_result_help = { +"[rule-name]" +}; + +static ilbadm_cmd_help_t show_rule_help = { +"[-e|-d] [-f |[-p] -o key[,key ...]] [name ...]" +}; + +static ilbadm_cmd_help_t destroy_servergroup_help = { +"groupname" +}; + +static ilbadm_cmd_help_t show_servergroup_help = { +"[[-p] -o field[,field]] [name]" +}; + +static ilbadm_cmd_help_t export_config_help = { +"[filename]" +}; + +static ilbadm_cmd_help_t import_config_help = { +"[-p] [filename]" +}; +static ilbadm_cmd_desc_t ilbadm_cmds[] = { + {"create-rule", "create-rl", ilbadm_create_rule, &create_rule_help}, + {"delete-rule", "delete-rl", ilbadm_destroy_rule, &destroy_rule_help}, + {"enable-rule", "enable-rl", ilbadm_enable_rule, &enable_rule_help}, + {"disable-rule", "disable-rl", ilbadm_disable_rule, + &disable_rule_help}, + {"show-rule", "show-rl", ilbadm_show_rules, &show_rule_help}, + + {"create-servergroup", "create-sg", ilbadm_create_servergroup, + &create_sg_help}, + {"delete-servergroup", "delete-sg", ilbadm_destroy_servergroup, + &destroy_servergroup_help}, + {"show-servergroup", "show-sg", ilbadm_show_servergroups, + &show_servergroup_help}, + + {"add-server", "add-srv", ilbadm_add_server_to_group, + &add_server_help}, + {"remove-server", "remove-srv", ilbadm_rem_server_from_group, + &remove_server_help}, + {"disable-server", "disable-srv", ilbadm_disable_server, + &disable_server_help}, + {"enable-server", "enable-srv", ilbadm_enable_server, + &enable_server_help}, + {"show-server", "show-srv", ilbadm_show_server, + &show_server_help}, + + {"show-healthcheck", "show-hc", ilbadm_show_hc, &show_hc_help}, + {"create-healthcheck", "create-hc", ilbadm_create_hc, &create_hc_help}, + {"delete-healthcheck", "delete-hc", ilbadm_destroy_hc, + &destroy_hc_help}, + {"show-hc-result", "show-hc-res", ilbadm_show_hc_result, + &show_hc_result_help}, + + {"export-config", "export-cf", ilbadm_export, &export_config_help}, + {"import-config", "import-cf", ilbadm_noimport, &import_config_help}, + + {"show-statistics", "show-stats", ilbadm_show_stats, &showstats_help}, + {"show-nat", "show-nat", ilbadm_show_nat, &show_nat_help}, + {"show-persist", "show-pt", ilbadm_show_persist, + &show_persist_help}, + {"", "", NULL, NULL} +}; + + +/* ARGSUSED */ +ilbadm_status_t +ilbadm_noimport(int argc, char *argv[]) +{ + ilbadm_err(ilbadm_errstr(ILBADM_NORECURSIVE)); + return (ILBADM_LIBERR); +} + +static void +print_cmd_short(char *name, FILE *fp, ilbadm_cmd_desc_t *cmd) +{ + char *h; + + while (cmd->c_name[0] != '\0') { + if (cmd->c_help != NULL && + (h = cmd->c_help->h_help) != NULL) + (void) fprintf(fp, "%s %s|%s %s\n", name, + cmd->c_name, cmd->c_alias, h); + else + (void) fprintf(fp, "%s %s|%s\n", name, cmd->c_name, + cmd->c_alias); + cmd++; + } +} + +void +print_cmdlist_short(char *name, FILE *fp) +{ + print_cmd_short(name, fp, ilbadm_cmds); +} + +#define IMPORT_FILE 0x1 + +static void +match_cmd(char *name, ilbadm_cmd_desc_t *cmds, cmdfunc_t *action, int flags) +{ + ilbadm_cmd_desc_t *cmd; + + if ((flags & IMPORT_FILE) == IMPORT_FILE) { + if (strcasecmp(name, "export-config") == 0 || + strcasecmp(name, "export-cf") == 0) { + ilbadm_err(gettext("export from import file" + " not allowed")); + exit(1); + } + } + + for (cmd = &cmds[0]; cmd->c_name[0] != '\0'; cmd++) { + if (strncasecmp(cmd->c_name, name, sizeof (cmd->c_name)) == 0 || + strncasecmp(cmd->c_alias, name, sizeof (cmd->c_alias)) == 0) + break; + } + *action = cmd->c_action; +} + +/* + * read and parse commandline + */ +static ilbadm_status_t +ilb_import_cmdline(int argc, char *argv[], int flags) +{ + ilbadm_status_t rc = ILBADM_OK; + cmdfunc_t cmd; + + match_cmd(argv[0], ilbadm_cmds, &cmd, flags); + + if (*cmd != NULL) { + rc = cmd(argc, argv); + } else { + rc = ILBADM_INVAL_COMMAND; + ilbadm_err(ilbadm_errstr(rc)); + } + + return (rc); +} + +#define CHUNK 10 +#define LINESZ 1024 + +typedef struct { + int listsz; + char *arglist[1]; +} arg_t; + +static int +i_getln_to_argv(FILE *fp, arg_t **ap) +{ + static char *linebuf = NULL; + char *stringp, *currp; + char delim[] = " \t\n"; + int i; + arg_t *a = *ap; + +#define STR_DIFF(s1, s2) (int)((char *)s2 - (char *)s1) +#define STR_ADJ_SZ(sz, buf, s) (sz - STR_DIFF(buf, s)) + + if (linebuf == NULL) + if ((linebuf = (char *)malloc(LINESZ)) == NULL) + return (0); + + stringp = currp = linebuf; + i = 0; +read_next: + if (fgets(currp, STR_ADJ_SZ(LINESZ, linebuf, currp), fp) == NULL) + return (i); + + /* ignore lines starting with a # character */ + if (*currp == '#') + goto read_next; + + for (; stringp != NULL && currp != NULL; i++) { + currp = strsep(&stringp, delim); + /* + * if there's more than one adjacent delimiters ... + */ + if (*currp == '\0') { + i--; + continue; + } + /* + * if we find a '\' at the end of a line, treat + * it as a continuation character. + */ + if (*currp == '\\' && stringp == NULL) { + stringp = currp; + goto read_next; + } + if (a == NULL) { + a = (arg_t *)malloc(sizeof (*a)); + bzero(a, sizeof (*a)); + } + if (a->listsz <= i) { + int sz; + + a->listsz += CHUNK; + sz = sizeof (*a) + + ((a->listsz - 1) * sizeof (a->arglist)); + a = (arg_t *)realloc(a, sz); + *ap = a; + } + a->arglist[i] = currp; + } + return (i); +} + +static ilbadm_status_t +ilb_import_file(int fd, int flags) +{ + FILE *fp; + arg_t *a = NULL; + int argcount; + ilbadm_status_t rc = ILBADM_OK; + + if ((fp = fdopen(fd, "r")) == NULL) { + ilbadm_err(gettext("cannot import file for reading")); + exit(1); + } + + if ((flags & ILBADM_IMPORT_PRESERVE) == 0) { + ilb_handle_t h = ILB_INVALID_HANDLE; + ilb_status_t rclib; + + rclib = ilb_open(&h); + if (rclib == ILB_STATUS_OK) + (void) ilb_reset_config(h); + if (h != ILB_INVALID_HANDLE) + (void) ilb_close(h); + } + + while ((argcount = i_getln_to_argv(fp, &a)) > 0) { + optind = 1; + rc = ilb_import_cmdline(argcount, a->arglist, IMPORT_FILE); + if (rc != ILBADM_OK) + break; + } + + return (rc); +} + +/* + * this is the wrapper around everything to do with importing and + * parsing either commandline or persistent storage. + * if (fd == -1), parse commandline, otherwise use the given fd as input. + */ +/* ARGSUSED */ +ilbadm_status_t +ilbadm_import(int fd, int argc, char *argv[], int flags) +{ + ilbadm_status_t rc; + + if (fd == -1) + rc = ilb_import_cmdline(argc, argv, 0); + else + rc = ilb_import_file(fd, flags); + + return (rc); +} + +ilbadm_status_t +ilbadm_export(int argc, char *argv[]) +{ + ilb_handle_t h = ILB_INVALID_HANDLE; + ilbadm_status_t rc = ILBADM_OK; + ilb_status_t rclib = ILB_STATUS_OK; + int fd; + FILE *fp; + char *fname = NULL; + char tmpfile[MAXPATHLEN]; + + if (argc < 2) { + fd = 1; /* stdout */ + *tmpfile = '\0'; + } else { + fname = argv[1]; + (void) snprintf(tmpfile, sizeof (tmpfile), "%sXXXXXX", fname); + fd = mkstemp(tmpfile); + + if (fd == -1) { + ilbadm_err(gettext("cannot create working file")); + exit(1); + } + } + fp = fdopen(fd, "w"); + if (fp == NULL) { + ilbadm_err(gettext("cannot open file for writing"), fd); + exit(1); + } + + rclib = ilb_open(&h); + if (rclib != ILB_STATUS_OK) + goto out; + + rc = ilbadm_export_servergroups(h, fp); + if (rc != ILBADM_OK) + goto out; + + rc = ilbadm_export_hc(h, fp); + if (rc != ILBADM_OK) + goto out; + + rc = ilbadm_export_rules(h, fp); + if (rc != ILBADM_OK) + goto out; + + if (fname != NULL) { + if (rename(tmpfile, fname) == -1) { + ilbadm_err(gettext("cannot create %s: %s"), fname, + strerror(errno)); + exit(1); + } + *tmpfile = '\0'; + } + +out: + if (h != ILB_INVALID_HANDLE) + (void) ilb_close(h); + + if ((rc != ILBADM_OK) && (rc != ILBADM_LIBERR)) + ilbadm_err(ilbadm_errstr(rc)); + (void) fclose(fp); + if (*tmpfile != '\0') + (void) unlink(tmpfile); + return (rc); +} diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_nat.c b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_nat.c new file mode 100644 index 0000000000..dad0f1cb53 --- /dev/null +++ b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_nat.c @@ -0,0 +1,222 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/types.h> +#include <sys/socket.h> +#include <stdlib.h> +#include <stdio.h> +#include <strings.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <libilb.h> +#include "ilbadm.h" + +/* + * For each iteration through the kernel table, ask for at most NUM_ENTRIES + * entries to be returned. + */ +#define NUM_ENTRIES 500 + +static void +print_nat_info(ilb_nat_info_t *info) +{ + char *tmp; + ipaddr_t addr_v4; + char addr[INET6_ADDRSTRLEN]; + + if (info->nat_proto == IPPROTO_TCP) + tmp = "TCP"; + else if (info->nat_proto == IPPROTO_UDP) + tmp = "UDP"; + else + tmp = "Unknown"; + (void) printf("%4s: ", tmp); + + if (IN6_IS_ADDR_V4MAPPED(&info->nat_out_global)) { + IN6_V4MAPPED_TO_IPADDR(&info->nat_out_global, addr_v4); + (void) printf("%s.%d > ", inet_ntop(AF_INET, &addr_v4, addr, + INET6_ADDRSTRLEN), ntohs(info->nat_out_global_port)); + IN6_V4MAPPED_TO_IPADDR(&info->nat_in_global, addr_v4); + (void) printf("%s.%d >>> ", inet_ntop(AF_INET, &addr_v4, addr, + INET6_ADDRSTRLEN), ntohs(info->nat_in_global_port)); + + IN6_V4MAPPED_TO_IPADDR(&info->nat_out_local, addr_v4); + (void) printf("%s.%d > ", inet_ntop(AF_INET, &addr_v4, addr, + INET6_ADDRSTRLEN), ntohs(info->nat_out_local_port)); + IN6_V4MAPPED_TO_IPADDR(&info->nat_in_local, addr_v4); + (void) printf("%s.%d\n", inet_ntop(AF_INET, &addr_v4, addr, + INET6_ADDRSTRLEN), ntohs(info->nat_in_local_port)); + } else { + (void) printf("%s.%d > ", inet_ntop(AF_INET6, + &info->nat_out_global, addr, INET6_ADDRSTRLEN), + ntohs(info->nat_out_global_port)); + (void) printf("%s.%d >>> ", inet_ntop(AF_INET6, + &info->nat_in_global, addr, INET6_ADDRSTRLEN), + ntohs(info->nat_in_global_port)); + + (void) printf("%s.%d > ", inet_ntop(AF_INET6, + &info->nat_out_local, addr, INET6_ADDRSTRLEN), + ntohs(info->nat_out_local_port)); + (void) printf("%s.%d\n", inet_ntop(AF_INET6, + &info->nat_in_local, addr, INET6_ADDRSTRLEN), + ntohs(info->nat_in_local_port)); + } +} + +static void +print_persist_info(ilb_persist_info_t *info) +{ + char addr[INET6_ADDRSTRLEN]; + + (void) printf("%s: ", info->persist_rule_name); + if (IN6_IS_ADDR_V4MAPPED(&info->persist_req_addr)) { + ipaddr_t addr_v4; + + IN6_V4MAPPED_TO_IPADDR(&info->persist_req_addr, addr_v4); + (void) printf("%s --> ", inet_ntop(AF_INET, &addr_v4, addr, + INET6_ADDRSTRLEN)); + IN6_V4MAPPED_TO_IPADDR(&info->persist_srv_addr, addr_v4); + (void) printf("%s\n", inet_ntop(AF_INET, &addr_v4, addr, + INET6_ADDRSTRLEN)); + } else { + (void) printf("%s --> ", inet_ntop(AF_INET6, + &info->persist_req_addr, addr, INET6_ADDRSTRLEN)); + (void) printf("%s\n", inet_ntop(AF_INET6, + &info->persist_srv_addr, addr, INET6_ADDRSTRLEN)); + } +} + +/* Tell ilbadm_show_info() which table to show. */ +enum which_tbl { + show_nat = 1, + show_persist +}; + +typedef union { + ilb_nat_info_t *nbuf; + ilb_persist_info_t *pbuf; + char *buf; +} show_buf_t; + +static ilbadm_status_t +ilbadm_show_info(int argc, char *argv[], enum which_tbl tbl) +{ + ilb_handle_t h = ILB_INVALID_HANDLE; + show_buf_t buf; + ilb_status_t rclib = ILB_STATUS_OK; + ilbadm_status_t rc = ILBADM_OK; + int32_t i, num_entries; + size_t num; + boolean_t end; + size_t entry_sz; + + /* + * If the user does not specify a count, return the whole table. + * This requires setting the fourth param to ilb_show_nat/persist() + * end to B_FALSE. Otherwise, set end to B_TRUE; + */ + + switch (argc) { + case 1: + num_entries = -1; + end = B_FALSE; + break; + case 2: + num_entries = atoi(argv[1]); + if (num_entries < 1) { + rc = ILBADM_EINVAL; + goto out; + } + end = B_TRUE; + break; + default: + rc = ILBADM_EINVAL; + goto out; + } + + if (tbl == show_nat) + entry_sz = sizeof (ilb_nat_info_t); + else + entry_sz = sizeof (ilb_persist_info_t); + if ((buf.buf = malloc((num_entries > 0 ? num_entries : NUM_ENTRIES) * + entry_sz)) == NULL) { + rc = ILBADM_ENOMEM; + goto out; + } + + rclib = ilb_open(&h); + if (rclib != ILB_STATUS_OK) + goto out; + + do { + num = num_entries > 0 ? num_entries : NUM_ENTRIES; + bzero(buf.buf, num * entry_sz); + + if (tbl == show_nat) + rclib = ilb_show_nat(h, buf.nbuf, &num, &end); + else + rclib = ilb_show_persist(h, buf.pbuf, &num, &end); + + if (rclib != ILB_STATUS_OK) + break; + + for (i = 0; i < num; i++) { + if (tbl == show_nat) + print_nat_info(&buf.nbuf[i]); + else + print_persist_info(&buf.pbuf[i]); + } + if (num_entries > 0) { + num_entries -= num; + if (num_entries <= 0) + break; + } + } while (!end); + free(buf.buf); +out: + if (h != ILB_INVALID_HANDLE) + (void) ilb_close(h); + if (rclib != ILB_STATUS_OK) { + ilbadm_err(ilb_errstr(rclib)); + rc = ILBADM_LIBERR; + } + if ((rc != ILBADM_OK) && (rc != ILBADM_LIBERR)) + ilbadm_err(ilbadm_errstr(rc)); + return (rc); +} + + +ilbadm_status_t +ilbadm_show_nat(int argc, char *argv[]) +{ + return (ilbadm_show_info(argc, argv, show_nat)); +} + +ilbadm_status_t +ilbadm_show_persist(int argc, char *argv[]) +{ + return (ilbadm_show_info(argc, argv, show_persist)); +} diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_rules.c b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_rules.c new file mode 100644 index 0000000000..dc428c4998 --- /dev/null +++ b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_rules.c @@ -0,0 +1,1313 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <strings.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <sys/list.h> +#include <netdb.h> +#include <ofmt.h> +#include <assert.h> +#include <libilb.h> +#include "ilbadm.h" + +static ilbadm_key_name_t rl_incoming_keys[] = { + {ILB_KEY_VIP, "vip", ""}, + {ILB_KEY_PORT, "port", ""}, + {ILB_KEY_PROTOCOL, "protocol", "prot"}, + {ILB_KEY_BAD, "", ""} +}; +static ilbadm_key_name_t rl_method_keys[] = { + {ILB_KEY_ALGORITHM, "lbalg", "algo"}, + {ILB_KEY_TYPE, "type", "topo"}, + {ILB_KEY_SRC, "proxy-src", "nat-src"}, + {ILB_KEY_STICKY, "pmask", "persist"}, + {ILB_KEY_BAD, "", ""} +}; +static ilbadm_key_name_t rl_outgoing_keys[] = { + {ILB_KEY_SERVERGROUP, "servergroup", "sg"}, + {ILB_KEY_BAD, "", ""} +}; +static ilbadm_key_name_t rl_healthchk_keys[] = { + {ILB_KEY_HEALTHCHECK, "hc-name", "hcn"}, + {ILB_KEY_HCPORT, "hc-port", "hcp"}, + {ILB_KEY_BAD, "", ""} +}; +static ilbadm_key_name_t rl_timer_keys[] = { + {ILB_KEY_CONNDRAIN, "conn-drain", ""}, + {ILB_KEY_NAT_TO, "nat-timeout", ""}, + {ILB_KEY_STICKY_TO, "persist-timeout", ""}, + {ILB_KEY_BAD, "", ""} +}; + +static ilbadm_key_name_t *all_keys[] = { + rl_incoming_keys, rl_method_keys, rl_outgoing_keys, + rl_healthchk_keys, rl_timer_keys, NULL +}; + + +/* field ids for of_* functions */ +#define OF_IP_VIP 0 +#define OF_IP_PROXYSRC 1 +#define OF_IP_STICKYMASK 2 + +#define OF_STR_RNAME 0 +#define OF_STR_HCNAME 1 +#define OF_STR_SGNAME 2 +#define OF_STR_INTERFACE 3 + +#define OF_PORT 0 +#define OF_HCPORT 1 + +#define OF_T_CONN 0 +#define OF_T_NAT 1 +#define OF_T_STICKY 2 + +#define OF_SRV_ID 0 +#define OF_SRV_ADDR 1 +#define OF_SRV_PORT 2 +#define OF_SRV_STATUS 3 +#define OF_SRV_RNAME 4 +#define OF_SRV_SGNAME 5 +#define OF_SRV_HOSTNAME 6 + +/* some field sizes of ofmt_field_t arrays */ +#define IPv4_FIELDWIDTH 16 +#define IPv6_FIELDWIDTH 39 +#define ILB_HOSTNAMELEN 20 +#define ILB_STATUSFIELD_LEN 7 + +typedef struct arg_struct { + int flags; + char *o_str; + ofmt_field_t *o_fields; + ofmt_handle_t oh; +} ilbadm_sh_rl_arg_t; + +typedef struct ilbadm_rl_exp_arg { + FILE *fp; +} ilbadm_rl_exp_arg_t; + +typedef struct ilbadm_rl_list_arg { + ilb_handle_t h; + ilb_rule_data_t *rd; +} ilbadm_rl_list_arg_t; + +typedef struct ilbadm_rl_srvlist_arg { + char *sgname; + ilb_server_data_t *sd; + ilb_rule_data_t *rd; + int flags; + char *o_str; + ofmt_field_t *o_fields; + ofmt_handle_t oh; +} ilbadm_rl_srvlist_arg_t; + +static ofmt_cb_t of_algo; +static ofmt_cb_t of_proto; +static ofmt_cb_t of_rl_ip; +static ofmt_cb_t of_rl_mask; +static ofmt_cb_t of_rport; +static ofmt_cb_t of_rstatus; +static ofmt_cb_t of_str; +static ofmt_cb_t of_time; +static ofmt_cb_t of_topo; +static ofmt_cb_t of_rl_srvlist; + +static boolean_t of_srv2str(ofmt_arg_t *, char *, uint_t); +static boolean_t of_port2str(in_port_t, in_port_t, char *, uint_t); + +static ofmt_field_t rfields_v4[] = { + {"RULENAME", ILB_NAMESZ, OF_STR_RNAME, of_str}, + {"STATUS", ILB_STATUSFIELD_LEN, 0, of_rstatus}, + {"PORT", 10, OF_PORT, of_rport}, + {"PROTOCOL", 5, 0, of_proto}, + {"LBALG", 12, 0, of_algo}, + {"TYPE", 8, 0, of_topo}, + {"PROXY-SRC", 2*IPv4_FIELDWIDTH+1, OF_IP_PROXYSRC, of_rl_ip}, + {"PMASK", 6, OF_IP_STICKYMASK, of_rl_mask}, + {"HC-NAME", ILB_NAMESZ, OF_STR_HCNAME, of_str}, + {"HC-PORT", 8, OF_HCPORT, of_rport}, + {"CONN-DRAIN", 11, OF_T_CONN, of_time}, + {"NAT-TIMEOUT", 12, OF_T_NAT, of_time}, + {"PERSIST-TIMEOUT", 16, OF_T_STICKY, of_time}, + {"SERVERGROUP", ILB_SGNAME_SZ, OF_STR_SGNAME, of_str}, + {"VIP", IPv4_FIELDWIDTH, OF_IP_VIP, of_rl_ip}, + {"SERVERS", 20, 0, of_rl_srvlist}, + {NULL, 0, 0, NULL} +}; + +static ofmt_field_t rfields_v6[] = { + {"RULENAME", ILB_NAMESZ, OF_STR_RNAME, of_str}, + {"STATUS", ILB_STATUSFIELD_LEN, 0, of_rstatus}, + {"PORT", 10, OF_PORT, of_rport}, + {"PROTOCOL", 5, 0, of_proto}, + {"LBALG", 12, 0, of_algo}, + {"TYPE", 8, 0, of_topo}, + {"PROXY-SRC", IPv6_FIELDWIDTH, OF_IP_PROXYSRC, of_rl_ip}, + {"PMASK", 6, OF_IP_STICKYMASK, of_rl_mask}, + {"HC-NAME", ILB_NAMESZ, OF_STR_HCNAME, of_str}, + {"HC-PORT", 8, OF_HCPORT, of_rport}, + {"CONN-DRAIN", 11, OF_T_CONN, of_time}, + {"NAT-TIMEOUT", 12, OF_T_NAT, of_time}, + {"PERSIST-TIMEOUT", 16, OF_T_STICKY, of_time}, + {"SERVERGROUP", ILB_SGNAME_SZ, OF_STR_SGNAME, of_str}, + {"VIP", IPv6_FIELDWIDTH, OF_IP_VIP, of_rl_ip}, + {"SERVERS", 20, 0, of_rl_srvlist}, + {NULL, 0, 0, NULL} +}; + +static ofmt_field_t ssfields_v4[] = { + {"SERVERID", ILB_NAMESZ, OF_SRV_ID, of_srv2str}, + {"ADDRESS", IPv4_FIELDWIDTH, OF_SRV_ADDR, of_srv2str}, + {"PORT", 5, OF_SRV_PORT, of_srv2str}, + {"RULENAME", ILB_NAMESZ, OF_SRV_RNAME, of_srv2str}, + {"STATUS", ILB_STATUSFIELD_LEN, OF_SRV_STATUS, of_srv2str}, + {"SERVERGROUP", ILB_SGNAME_SZ, OF_SRV_SGNAME, of_srv2str}, + {"HOSTNAME", ILB_HOSTNAMELEN, OF_SRV_HOSTNAME, of_srv2str}, + {NULL, 0, 0, NULL} +}; + +static ofmt_field_t ssfields_v6[] = { + {"SERVERID", ILB_NAMESZ, OF_SRV_ID, of_srv2str}, + {"ADDRESS", IPv6_FIELDWIDTH, OF_SRV_ADDR, of_srv2str}, + {"PORT", 5, OF_SRV_PORT, of_srv2str}, + {"RULENAME", ILB_NAMESZ, OF_SRV_RNAME, of_srv2str}, + {"STATUS", ILB_STATUSFIELD_LEN, OF_SRV_STATUS, of_srv2str}, + {"SERVERGROUP", ILB_SGNAME_SZ, OF_SRV_SGNAME, of_srv2str}, + {"HOSTNAME", ILB_HOSTNAMELEN, OF_SRV_HOSTNAME, of_srv2str}, + {NULL, 0, 0, NULL} +}; + +extern int optind, optopt, opterr; +extern char *optarg; + +extern ilbadm_val_type_t algo_types[]; +extern ilbadm_val_type_t topo_types[]; + +static char * +i_key_to_opt(ilbadm_key_name_t *n, ilbadm_key_code_t k) +{ + int i; + + for (i = 0; n[i].k_key != ILB_KEY_BAD; i++) + if (n[i].k_key == k) + break; + + return (n[i].k_name); +} + +char * +ilbadm_key_to_opt(ilbadm_key_code_t k) +{ + char *name; + int i; + + for (i = 0; all_keys[i] != NULL; i++) { + name = i_key_to_opt(all_keys[i], k); + if (*name != '\0') + return (name); + } + + return (NULL); +} + +/* + * ports are in HOST byte order + */ +static void +ports2str(short port1, short port2, char *buf, const int sz) +{ + if (port2 <= port1) + (void) snprintf(buf, sz, "port=%d", port1); + else + (void) snprintf(buf, sz, "port=%d-%d", port1, port2); +} + +static void +proto2str(short proto, char *buf, int sz) +{ + struct protoent *pe; + + pe = getprotobynumber((int)proto); + if (pe != NULL) + (void) snprintf(buf, sz, "protocol=%s", pe->p_name); + else + (void) sprintf(buf, "(bad proto %d)", proto); +} + +static void +algo2str(ilb_algo_t algo, char *buf, int sz) +{ + char *s = i_str_from_val((int)algo, &algo_types[0]); + + (void) snprintf(buf, sz, "lbalg=%s", (s && *s) ? s : "(bad algo)"); +} + +static int +algo2bare_str(ilb_algo_t algo, char *buf, int sz) +{ + char *s = i_str_from_val((int)algo, &algo_types[0]); + + return (snprintf(buf, sz, "%s", (s && *s) ? s : "")); +} + +static void +topo2str(ilb_topo_t topo, char *buf, int sz) +{ + char *s = i_str_from_val((int)topo, &topo_types[0]); + + (void) snprintf(buf, sz, "type=%s", (s && *s) ? s : "(bad type)"); +} + +static int +topo2bare_str(ilb_topo_t topo, char *buf, int sz) +{ + char *s = i_str_from_val((int)topo, &topo_types[0]); + + return (snprintf(buf, sz, "%s", (s && *s) ? s : "")); +} + +static boolean_t +of_str(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + ilbadm_rl_list_arg_t *ra = (ilbadm_rl_list_arg_t *)of_arg->ofmt_cbarg; + ilb_rule_data_t *rd = (ilb_rule_data_t *)ra->rd; + + switch (of_arg->ofmt_id) { + case OF_STR_RNAME: + (void) strlcpy(buf, rd->r_name, bufsize); + break; + case OF_STR_SGNAME: + (void) strlcpy(buf, rd->r_sgname, bufsize); + break; + case OF_STR_HCNAME: + if (rd->r_hcname != NULL && *(rd->r_hcname) != '\0') + (void) strlcpy(buf, rd->r_hcname, bufsize); + break; + } + return (B_TRUE); +} + +/* ARGSUSED */ +static boolean_t +of_proto(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + ilbadm_rl_list_arg_t *ra = (ilbadm_rl_list_arg_t *)of_arg->ofmt_cbarg; + ilb_rule_data_t *rd = (ilb_rule_data_t *)ra->rd; + + if (rd->r_proto == IPPROTO_TCP) + (void) strlcpy(buf, "TCP", bufsize); + else if (rd->r_proto == IPPROTO_UDP) + (void) strlcpy(buf, "UDP", bufsize); + else + return (B_FALSE); + return (B_TRUE); +} + +static boolean_t +of_rl_ip(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + ilbadm_rl_list_arg_t *ra = (ilbadm_rl_list_arg_t *)of_arg->ofmt_cbarg; + ilb_rule_data_t *rd = (ilb_rule_data_t *)ra->rd; + ilb_ip_addr_t *ip = NULL, *ip2 = NULL; + + switch (of_arg->ofmt_id) { + case OF_IP_VIP: + ip = &rd->r_vip; + break; + case OF_IP_PROXYSRC: + ip = &rd->r_nat_src_start; + ip2 = &rd->r_nat_src_end; + break; + case OF_IP_STICKYMASK: + ip = &rd->r_stickymask; + break; + } + + /* only print something valid */ + if (ip != NULL && (ip->ia_af == AF_INET || ip->ia_af == AF_INET6)) + ip2str(ip, buf, bufsize, V6_ADDRONLY); + if (ip2 != NULL && (ip2->ia_af == AF_INET || ip2->ia_af == AF_INET6) && + buf[0] != '\0') { + int sl = strlen(buf); + + buf += sl; bufsize -= sl; + *buf++ = '-'; bufsize--; + ip2str(ip2, buf, bufsize, V6_ADDRONLY); + } + + return (B_TRUE); +} + +static boolean_t +of_rl_mask(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + ilbadm_rl_list_arg_t *ra = (ilbadm_rl_list_arg_t *)of_arg->ofmt_cbarg; + ilb_rule_data_t *rd = (ilb_rule_data_t *)ra->rd; + ilb_ip_addr_t *ip = NULL; + + assert(of_arg->ofmt_id == OF_IP_STICKYMASK); + if (!(rd->r_flags & ILB_FLAGS_RULE_STICKY)) + return (B_TRUE); + ip = &rd->r_stickymask; + + (void) snprintf(buf, bufsize, "/%d", ilbadm_mask_to_prefixlen(ip)); + return (B_TRUE); +} + +static void +hcport_print(ilb_rule_data_t *rd, char *buf, uint_t bufsize) +{ + if (rd->r_hcport != 0) + (void) snprintf(buf, bufsize, "%d", ntohs(rd->r_hcport)); + else if (rd->r_hcpflag == ILB_HCI_PROBE_ANY) + (void) snprintf(buf, bufsize, "ANY"); + else + buf[0] = '\0'; +} +static boolean_t +of_rport(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + ilbadm_rl_list_arg_t *ra = (ilbadm_rl_list_arg_t *)of_arg->ofmt_cbarg; + ilb_rule_data_t *rd = (ilb_rule_data_t *)ra->rd; + + if (of_arg->ofmt_id == OF_PORT) + return (of_port2str(rd->r_minport, rd->r_maxport, buf, + bufsize)); + + /* only print a hcport if there's a hc name as well */ + if (of_arg->ofmt_id == OF_HCPORT && rd->r_hcname[0] != '\0') + hcport_print(rd, buf, bufsize); + + return (B_TRUE); +} + +/* ARGSUSED */ +static boolean_t +of_rstatus(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + ilbadm_rl_list_arg_t *ra = (ilbadm_rl_list_arg_t *)of_arg->ofmt_cbarg; + ilb_rule_data_t *rd = (ilb_rule_data_t *)ra->rd; + + if ((rd->r_flags & ILB_FLAGS_RULE_ENABLED) == ILB_FLAGS_RULE_ENABLED) + buf[0] = 'E'; + else + buf[0] = 'D'; + buf[1] = '\0'; + return (B_TRUE); +} + +static boolean_t +of_algo(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + ilbadm_rl_list_arg_t *ra = (ilbadm_rl_list_arg_t *)of_arg->ofmt_cbarg; + ilb_rule_data_t *rd = (ilb_rule_data_t *)ra->rd; + + if (algo2bare_str(rd->r_algo, buf, bufsize) == 0) + return (B_FALSE); + return (B_TRUE); +} + +static boolean_t +of_topo(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + ilbadm_rl_list_arg_t *ra = (ilbadm_rl_list_arg_t *)of_arg->ofmt_cbarg; + ilb_rule_data_t *rd = (ilb_rule_data_t *)ra->rd; + + if (topo2bare_str(rd->r_topo, buf, bufsize) == 0) + return (B_FALSE); + return (B_TRUE); +} + +static boolean_t +of_time(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + ilbadm_rl_list_arg_t *ra = (ilbadm_rl_list_arg_t *)of_arg->ofmt_cbarg; + ilb_rule_data_t *rd = (ilb_rule_data_t *)ra->rd; + + switch (of_arg->ofmt_id) { + case OF_T_CONN: + (void) snprintf(buf, bufsize, "%u", rd->r_conndrain); + break; + case OF_T_NAT: + (void) snprintf(buf, bufsize, "%u", rd->r_nat_timeout); + break; + case OF_T_STICKY: + (void) snprintf(buf, bufsize, "%u", rd->r_sticky_timeout); + break; + } + return (B_TRUE); +} + +typedef struct rl_showlist_arg { + char *buf; + uint_t bufsize; +} rl_showlist_arg_t; + +/* ARGSUSED */ +/* called by ilb_walk_servers(), cannot get rid of unused args */ +static ilb_status_t +srv2srvID(ilb_handle_t h, ilb_server_data_t *sd, const char *sgname, void *arg) +{ + rl_showlist_arg_t *sla = (rl_showlist_arg_t *)arg; + int len; + + (void) snprintf(sla->buf, sla->bufsize, "%s,", sd->sd_srvID); + len = strlen(sd->sd_srvID) + 1; + sla->buf += len; + sla->bufsize -= len; + + return (ILB_STATUS_OK); +} + +static boolean_t +of_rl_srvlist(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + ilbadm_rl_list_arg_t *ra = (ilbadm_rl_list_arg_t *)of_arg->ofmt_cbarg; + ilb_rule_data_t *rd = (ilb_rule_data_t *)ra->rd; + rl_showlist_arg_t sla; + + sla.buf = buf; + sla.bufsize = bufsize; + + (void) ilb_walk_servers(ra->h, srv2srvID, rd->r_sgname, + (void *)&sla); + /* we're trailing a ',' which we need to remove */ + *--sla.buf = '\0'; + + return (B_TRUE); +} + +#define RMAXCOLS 120 /* enough? */ +#define SERVER_WIDTH (ILB_NAMESZ+1) /* 1st guess */ + +static boolean_t +of_port2str(in_port_t minport, in_port_t maxport, char *buf, uint_t bufsize) +{ + in_port_t h_min, h_max; + int len; + + h_min = ntohs(minport); + h_max = ntohs(maxport); + + if (h_min == 0) + return (B_FALSE); /* print "unspec" == "all ports" */ + + len = snprintf(buf, bufsize, "%d", h_min); + if (h_max > h_min) + (void) snprintf(buf + len, bufsize - len, "-%d", h_max); + return (B_TRUE); +} + +static ilbadm_status_t +ip2hostname(ilb_ip_addr_t *ip, char *buf, uint_t bufsize) +{ + int ret; + struct hostent *he; + + switch (ip->ia_af) { + case AF_INET: + he = getipnodebyaddr((char *)&ip->ia_v4, sizeof (ip->ia_v4), + ip->ia_af, &ret); + break; + case AF_INET6: + he = getipnodebyaddr((char *)&ip->ia_v6, sizeof (ip->ia_v6), + ip->ia_af, &ret); + break; + default: return (ILBADM_INVAL_AF); + } + + /* if we can't resolve this, just return an empty name */ + if (he == NULL) + buf[0] = '\0'; + else + (void) strlcpy(buf, he->h_name, bufsize); + + return (ILBADM_OK); +} + +/* ARGSUSED */ +/* + * Since this function is used by libilb routine ilb_walk_rules() + * it must return libilb errors + */ +static ilb_status_t +ilbadm_show_onerule(ilb_handle_t h, ilb_rule_data_t *rd, void *arg) +{ + ilbadm_sh_rl_arg_t *larg = (ilbadm_sh_rl_arg_t *)arg; + ofmt_status_t oerr; + int oflags = 0; + int ocols = RMAXCOLS; + ilbadm_rl_list_arg_t ra; + static ofmt_handle_t oh = (ofmt_handle_t)NULL; + ofmt_field_t *fields; + boolean_t r_enabled = rd->r_flags & ILB_FLAGS_RULE_ENABLED; + + if (larg->o_str == NULL) { + ilbadm_err(gettext("internal error")); + return (ILB_STATUS_GENERIC); + } + + /* + * only print rules (enabled/dis-) we're asked to + * note: both LIST_**ABLED flags can be set at the same time, + * whereas a rule has one state only. therefore the complicated + * statement. + */ + if (!((r_enabled && (larg->flags & ILBADM_LIST_ENABLED)) || + (!r_enabled && (larg->flags & ILBADM_LIST_DISABLED)))) + return (ILB_STATUS_OK); + + if (larg->flags & ILBADM_LIST_PARSE) + oflags |= OFMT_PARSABLE; + + if (larg->flags & ILBADM_LIST_FULL) + oflags |= OFMT_MULTILINE; + + bzero(&ra, sizeof (ra)); + ra.rd = rd; + ra.h = h; + + if (oh == NULL) { + if (rd->r_vip.ia_af == AF_INET) + fields = rfields_v4; + else + fields = rfields_v6; + + oerr = ofmt_open(larg->o_str, fields, oflags, ocols, &oh); + if (oerr != OFMT_SUCCESS) { + char e[80]; + + ilbadm_err(gettext("ofmt_open failed: %s"), + ofmt_strerror(oh, oerr, e, sizeof (e))); + return (ILB_STATUS_GENERIC); + } + } + + ofmt_print(oh, &ra); + + return (ILB_STATUS_OK); +} + +static char *full_list_rule_hdrs = + "RULENAME,STATUS,PORT,PROTOCOL,LBALG,TYPE,PROXY-SRC,PMASK," + "HC-NAME,HC-PORT,CONN-DRAIN,NAT-TIMEOUT," + "PERSIST-TIMEOUT,SERVERGROUP,VIP,SERVERS"; +static char *def_list_rule_hdrs = + "RULENAME,STATUS,LBALG,TYPE,PROTOCOL,VIP,PORT"; + +/* ARGSUSED */ +ilbadm_status_t +ilbadm_show_rules(int argc, char *argv[]) +{ + ilb_handle_t h = ILB_INVALID_HANDLE; + int c; + ilb_status_t rclib = ILB_STATUS_OK; + ilbadm_status_t rc = ILBADM_OK; + boolean_t o_opt = B_FALSE, p_opt = B_FALSE; + boolean_t f_opt = B_FALSE; + ilbadm_sh_rl_arg_t larg = {0, NULL, NULL, NULL}; + + larg.flags = ILBADM_LIST_ENABLED | ILBADM_LIST_DISABLED; + while ((c = getopt(argc, argv, ":fpedo:")) != -1) { + switch ((char)c) { + case 'f': larg.flags |= ILBADM_LIST_FULL; + larg.o_str = full_list_rule_hdrs; + f_opt = B_TRUE; + break; + case 'p': larg.flags |= ILBADM_LIST_PARSE; + p_opt = B_TRUE; + break; + case 'o': larg.o_str = optarg; + o_opt = B_TRUE; + break; + /* -e and -d may be repeated - make sure the last one wins */ + case 'e': larg.flags &= ILBADM_LIST_NODISABLED; + larg.flags |= ILBADM_LIST_ENABLED; + break; + case 'd': larg.flags &= ILBADM_LIST_NOENABLED; + larg.flags |= ILBADM_LIST_DISABLED; + break; + case ':': ilbadm_err(gettext("missing option argument for %c"), + (char)optopt); + rc = ILBADM_LIBERR; + goto out; + /* not reached */ + break; + case '?': + default: + unknown_opt(argv, optind-1); + /* not reached */ + break; + } + } + + if (f_opt && o_opt) { + ilbadm_err(gettext("options -o and -f are mutually" + " exclusive")); + exit(1); + } + + if (p_opt && !o_opt) { + ilbadm_err(gettext("option -p requires -o")); + exit(1); + } + + if (p_opt && larg.o_str != NULL && + (strcasecmp(larg.o_str, "all") == 0)) { + ilbadm_err(gettext("option -p requires explicit field" + " names for -o")); + exit(1); + } + + /* no -o option, so we use std. fields */ + if (!o_opt && !f_opt) + larg.o_str = def_list_rule_hdrs; + + rclib = ilb_open(&h); + if (rclib != ILB_STATUS_OK) + goto out; + + if (optind >= argc) { + rclib = ilb_walk_rules(h, ilbadm_show_onerule, NULL, + (void*)&larg); + } else { + while (optind < argc) { + rclib = ilb_walk_rules(h, ilbadm_show_onerule, + argv[optind++], (void*)&larg); + if (rclib != ILB_STATUS_OK) + break; + } + } +out: + if (h != ILB_INVALID_HANDLE) + (void) ilb_close(h); + + if (rclib != ILB_STATUS_OK) { + /* + * The show function returns ILB_STATUS_GENERIC after printing + * out an error message. So we don't need to print it again. + */ + if (rclib != ILB_STATUS_GENERIC) + ilbadm_err(ilb_errstr(rclib)); + rc = ILBADM_LIBERR; + } + return (rc); +} + +static boolean_t +of_srv2str(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + ilbadm_rl_srvlist_arg_t *larg = + (ilbadm_rl_srvlist_arg_t *)of_arg->ofmt_cbarg; + ilb_server_data_t *sd = larg->sd; + uint_t op = of_arg->ofmt_id; + boolean_t ret = B_TRUE; + ilbadm_status_t rc; + + if (sd == NULL) + return (B_FALSE); + + switch (op) { + case OF_SRV_ID: + (void) strlcpy(buf, sd->sd_srvID, bufsize); + break; + case OF_SRV_STATUS: + if (ILB_IS_SRV_ENABLED(sd->sd_flags)) + buf[0] = 'E'; + else + buf[0] = 'D'; + buf[1] = '\0'; + break; + case OF_SRV_RNAME: + (void) strlcpy(buf, larg->rd->r_name, bufsize); + break; + case OF_SRV_SGNAME: + (void) strlcpy(buf, larg->sgname, bufsize); + break; + case OF_SRV_HOSTNAME: + rc = ip2hostname(&sd->sd_addr, buf, bufsize); + if (rc != ILBADM_OK) { + buf[0] = '\0'; + ret = B_FALSE; + } + break; + case OF_SRV_PORT: + ret = of_port2str(sd->sd_minport, sd->sd_maxport, + buf, bufsize); + break; + case OF_SRV_ADDR: + ip2str(&sd->sd_addr, buf, bufsize, V6_ADDRONLY); + break; + } + + return (ret); +} + +/* ARGSUSED */ +static ilb_status_t +i_show_rl_srv(ilb_handle_t h, ilb_server_data_t *sd, const char *sgname, + void *arg) +{ + ilbadm_rl_srvlist_arg_t *larg = (ilbadm_rl_srvlist_arg_t *)arg; + + larg->sd = sd; + ofmt_print(larg->oh, larg); + return (ILB_STATUS_OK); +} + +/* ARGSUSED */ +/* + * Since this function is used by libilb routine ilb_walk_rules() + * it must return libilb errors + */ +ilb_status_t +ilbadm_show_rl_servers(ilb_handle_t h, ilb_rule_data_t *rd, void *arg) +{ + ofmt_status_t oerr; + int oflags = 0; + int ocols = RMAXCOLS; + ofmt_field_t *fields; + static ofmt_handle_t oh = (ofmt_handle_t)NULL; + ilbadm_rl_srvlist_arg_t *larg = (ilbadm_rl_srvlist_arg_t *)arg; + + /* + * in full mode, we currently re-open ofmt() for every rule; we use + * a variable number of lines, as we print one for every server + * attached to a rule. + */ + if (larg->o_str == NULL) { + ilbadm_err(gettext("internal error")); + return (ILB_STATUS_GENERIC); + } + + if (larg->flags & ILBADM_LIST_PARSE) + oflags |= OFMT_PARSABLE; + + if (rd->r_vip.ia_af == AF_INET) + fields = ssfields_v4; + else + fields = ssfields_v6; + + if (oh == NULL) { + oerr = ofmt_open(larg->o_str, fields, oflags, ocols, &oh); + if (oerr != OFMT_SUCCESS) { + char e[80]; + + ilbadm_err(gettext("ofmt_open failed: %s"), + ofmt_strerror(oh, oerr, e, sizeof (e))); + return (ILB_STATUS_GENERIC); + } + larg->oh = oh; + } + + larg->rd = rd; + larg->sgname = rd->r_sgname; + + return (ilb_walk_servers(h, i_show_rl_srv, rd->r_sgname, (void *)larg)); +} + +static char *def_show_srv_hdrs = + "SERVERID,ADDRESS,PORT,RULENAME,STATUS,SERVERGROUP"; + +/* ARGSUSED */ +ilbadm_status_t +ilbadm_show_server(int argc, char *argv[]) +{ + ilb_handle_t h = ILB_INVALID_HANDLE; + int c; + ilb_status_t rclib = ILB_STATUS_OK; + ilbadm_status_t rc = ILBADM_OK; + boolean_t o_opt = B_FALSE, p_opt = B_FALSE; + ilbadm_rl_srvlist_arg_t larg; + + bzero(&larg, sizeof (larg)); + while ((c = getopt(argc, argv, ":po:")) != -1) { + switch ((char)c) { + case 'p': larg.flags |= ILBADM_LIST_PARSE; + p_opt = B_TRUE; + break; + case 'o': larg.o_str = optarg; + o_opt = B_TRUE; + break; + case ':': ilbadm_err(gettext("missing option argument for %c"), + (char)optopt); + rc = ILBADM_LIBERR; + goto out; + /* not reached */ + break; + case '?': + default: + unknown_opt(argv, optind-1); + /* not reached */ + break; + } + } + + if (p_opt && !o_opt) { + ilbadm_err(gettext("option -p requires -o")); + exit(1); + } + + if (p_opt && larg.o_str != NULL && + (strcasecmp(larg.o_str, "all") == 0)) { + ilbadm_err(gettext("option -p requires explicit" + " field names for -o")); + exit(1); + } + + /* no -o option, so we use default fields */ + if (!o_opt) + larg.o_str = def_show_srv_hdrs; + + rclib = ilb_open(&h); + if (rclib != ILB_STATUS_OK) + goto out; + + if (optind >= argc) { + rclib = ilb_walk_rules(h, ilbadm_show_rl_servers, NULL, + (void*)&larg); + } else { + while (optind < argc) { + rclib = ilb_walk_rules(h, ilbadm_show_rl_servers, + argv[optind++], (void*)&larg); + if (rclib != ILB_STATUS_OK) + break; + } + } +out: + if (h != ILB_INVALID_HANDLE) + (void) ilb_close(h); + + if (rclib != ILB_STATUS_OK) { + /* + * The show function returns ILB_STATUS_GENERIC after printing + * out an error message. So we don't need to print it again. + */ + if (rclib != ILB_STATUS_GENERIC) + ilbadm_err(ilb_errstr(rclib)); + rc = ILBADM_LIBERR; + } + return (rc); +} + +static ilbadm_status_t +i_parse_rl_arg(char *arg, ilb_rule_data_t *rd, ilbadm_key_name_t *keylist) +{ + ilbadm_status_t rc; + + rc = i_parse_optstring(arg, (void *) rd, keylist, + OPT_PORTS, NULL); + return (rc); +} + +static void +i_ilbadm_alloc_rule(ilb_rule_data_t **rdp) +{ + ilb_rule_data_t *rd; + + *rdp = rd = (ilb_rule_data_t *)calloc(sizeof (*rd), 1); + if (rd == NULL) + return; + rd->r_proto = IPPROTO_TCP; +} + +static void +i_ilbadm_free_rule(ilb_rule_data_t *rd) +{ + free(rd); +} + +/* ARGSUSED */ +ilbadm_status_t +ilbadm_destroy_rule(int argc, char *argv[]) +{ + ilb_handle_t h = ILB_INVALID_HANDLE; + ilbadm_status_t rc = ILBADM_OK; + ilb_status_t rclib = ILB_STATUS_OK; + boolean_t all_rules = B_FALSE; + int c, i; + + while ((c = getopt(argc, argv, ":a")) != -1) { + switch ((char)c) { + case 'a': + all_rules = B_TRUE; + break; + case '?': + default: + unknown_opt(argv, optind-1); + /* not reached */ + break; + } + } + + if (optind >= argc && !all_rules) { + ilbadm_err(gettext("usage: delete-rule -a | name")); + return (ILBADM_LIBERR); + } + + /* either "-a" or rulename, not both */ + if (optind < argc && all_rules) { + rc = ILBADM_INVAL_ARGS; + goto out; + } + + rclib = ilb_open(&h); + if (rclib != ILB_STATUS_OK) + goto out; + + if (all_rules) { + rclib = ilb_destroy_rule(h, NULL); + goto out; + } + + for (i = optind; i < argc && rclib == ILB_STATUS_OK; i++) + rclib = ilb_destroy_rule(h, argv[i]); + +out: + if (h != ILB_INVALID_HANDLE) + (void) ilb_close(h); + + /* This prints the specific errors */ + if (rclib != ILB_STATUS_OK) { + ilbadm_err(ilb_errstr(rclib)); + rc = ILBADM_LIBERR; + } + /* This prints the generic errors */ + if ((rc != ILBADM_OK) && (rc != ILBADM_LIBERR)) + ilbadm_err(ilbadm_errstr(rc)); + return (rc); +} + +/* ARGSUSED */ +static ilbadm_status_t +ilbadm_Xable_rule(int argc, char *argv[], ilbadm_cmd_t cmd) +{ + ilb_handle_t h = ILB_INVALID_HANDLE; + ilb_status_t rclib = ILB_STATUS_OK; + ilbadm_status_t rc = ILBADM_OK; + int i; + + rclib = ilb_open(&h); + if (rclib != ILB_STATUS_OK) + goto out; + /* + * by default, en/disable-rule mean "all", and not using + * a rule name will cause this behaviour to kick in + */ + if (argc < 2) { + if (cmd == cmd_enable_rule) + rclib = ilb_enable_rule(h, NULL); + else + rclib = ilb_disable_rule(h, NULL); + } else { + + for (i = optind; i < argc && rc == ILBADM_OK; i++) { + if (cmd == cmd_enable_rule) + rclib = ilb_enable_rule(h, argv[i]); + else + rclib = ilb_disable_rule(h, argv[i]); + } + } +out: + if (h != ILB_INVALID_HANDLE) + (void) ilb_close(h); + + if (rclib != ILB_STATUS_OK) { + ilbadm_err(ilb_errstr(rclib)); + rc = ILBADM_LIBERR; + } + return (rc); +} + +ilbadm_status_t +ilbadm_enable_rule(int argc, char *argv[]) +{ + + return (ilbadm_Xable_rule(argc, argv, cmd_enable_rule)); +} + +ilbadm_status_t +ilbadm_disable_rule(int argc, char *argv[]) +{ + return (ilbadm_Xable_rule(argc, argv, cmd_disable_rule)); +} + +/* + * parse and create a rule + */ +ilbadm_status_t +ilbadm_create_rule(int argc, char *argv[]) +{ + ilb_handle_t h = ILB_INVALID_HANDLE; + int c; + ilb_status_t rclib = ILB_STATUS_OK; + ilbadm_status_t rc = ILBADM_OK; + ilb_rule_data_t *rd; + boolean_t p_opt = B_FALSE; + + i_ilbadm_alloc_rule(&rd); + + while ((c = getopt(argc, argv, ":ei:m:o:t:h:p")) != -1) { + switch ((char)c) { + case 'e': + rd->r_flags |= ILB_FLAGS_RULE_ENABLED; + break; + case 'h': + /* + * Default value of of r_hcpflag means that if there + * is a port range, probe any port. If there is only + * one port, probe that port. + */ + rd->r_hcpflag = ILB_HCI_PROBE_ANY; + rc = i_parse_rl_arg(optarg, rd, &rl_healthchk_keys[0]); + break; + case 'o': + rc = i_parse_rl_arg(optarg, rd, &rl_outgoing_keys[0]); + break; + case 'm': + rc = i_parse_rl_arg(optarg, rd, &rl_method_keys[0]); + break; + case 't': + rc = i_parse_rl_arg(optarg, rd, &rl_timer_keys[0]); + break; + case 'i': + rc = i_parse_rl_arg(optarg, rd, &rl_incoming_keys[0]); + break; + case 'p': + p_opt = B_TRUE; + break; + case ':': + ilbadm_err(gettext("missing option-argument" + " for %c"), (char)optopt); + rc = ILBADM_LIBERR; + break; + case '?': + default: + unknown_opt(argv, optind-1); + /* not reached */ + break; + + } + if (rc != ILBADM_OK) + goto out; + } + + if (optind >= argc) { + ilbadm_err(gettext("missing mandatory arguments - please refer" + " to 'ilbadm create-rule' subcommand description in" + " ilbadm(1M)")); + rc = ILBADM_LIBERR; + goto out; + + } + + if (p_opt) { + /* + * if user hasn't specified a mask, apply default + */ + if ((rd->r_flags & ILB_FLAGS_RULE_STICKY) == 0) { + char *maskstr; + + switch (rd->r_vip.ia_af) { + case AF_INET: + maskstr = "32"; + break; + case AF_INET6: + maskstr = "128"; + break; + } + rc = ilbadm_set_netmask(maskstr, &rd->r_stickymask, + rd->r_vip.ia_af); + if (rc != ILBADM_OK) { + ilbadm_err(gettext("trouble seting default" + " persistence mask")); + rc = ILBADM_LIBERR; + goto out; + } + } + } else { + /* use of sticky mask currently mandates "-p" */ + if ((rd->r_flags & ILB_FLAGS_RULE_STICKY) != 0) { + ilbadm_err(gettext("use of stickymask requires" + " -p option")); + rc = ILBADM_LIBERR; + goto out; + } + } + + if (strlen(argv[optind]) > ILBD_NAMESZ -1) { + ilbadm_err(gettext("rule name %s is too long -" + " must not exceed %d chars"), argv[optind], + ILBD_NAMESZ - 1); + rc = ILBADM_LIBERR; + goto out; + } + + (void) strlcpy(rd->r_name, argv[optind], sizeof (rd->r_name)); + + rc = i_check_rule_spec(rd); + if (rc != ILBADM_OK) + goto out; + + rclib = ilb_open(&h); + if (rclib != ILB_STATUS_OK) + goto out; + + rclib = ilb_create_rule(h, rd); + +out: + i_ilbadm_free_rule(rd); + + if (h != ILB_INVALID_HANDLE) + (void) ilb_close(h); + + if (rclib != ILB_STATUS_OK) { + ilbadm_err(ilb_errstr(rclib)); + rc = ILBADM_LIBERR; + } + if ((rc != ILBADM_OK) && (rc != ILBADM_LIBERR)) + ilbadm_err(ilbadm_errstr(rc)); + + return (rc); +} + +/* ARGSUSED */ + +/* + * Since this function is used by libilb function, ilb_walk_rules() + * it must return libilb errors + */ +static ilb_status_t +ilbadm_export_rl(ilb_handle_t h, ilb_rule_data_t *rd, void *arg) +{ + char linebuf[128]; /* should be enough */ + int sz = sizeof (linebuf); + FILE *fp = ((ilbadm_rl_exp_arg_t *)arg)->fp; + uint32_t conndrain, nat_timeout, sticky_timeout; + + (void) fprintf(fp, "create-rule "); + if (rd->r_flags & ILB_FLAGS_RULE_ENABLED) + (void) fprintf(fp, "-e "); + if (rd->r_flags & ILB_FLAGS_RULE_STICKY) + (void) fprintf(fp, "-p "); + + ip2str(&rd->r_vip, linebuf, sz, V6_ADDRONLY); + (void) fprintf(fp, "-i vip=%s,", linebuf); + + (void) ports2str(ntohs(rd->r_minport), ntohs(rd->r_maxport), + linebuf, sz); + (void) fprintf(fp, "%s,", linebuf); + + proto2str(rd->r_proto, linebuf, sz); + (void) fprintf(fp, "%s ", linebuf); + + algo2str(rd->r_algo, linebuf, sz); + (void) fprintf(fp, "-m %s,", linebuf); + + topo2str(rd->r_topo, linebuf, sz); + (void) fprintf(fp, "%s", linebuf); + + if (rd->r_nat_src_start.ia_af != AF_UNSPEC) { + ip2str(&rd->r_nat_src_start, linebuf, sz, V6_ADDRONLY); + /* if the address is unspecified, skip it */ + if (linebuf[0] != '\0') { + (void) fprintf(fp, ",proxy-src=%s", linebuf); + ip2str(&rd->r_nat_src_end, linebuf, sz, V6_ADDRONLY); + (void) fprintf(fp, "-%s", linebuf); + } + } + + if (rd->r_flags & ILB_FLAGS_RULE_STICKY) { + (void) fprintf(fp, ",pmask=/%d", + ilbadm_mask_to_prefixlen(&rd->r_stickymask)); + } + + (void) fprintf(fp, " "); + + if (*rd->r_hcname != '\0') { + (void) fprintf(fp, "-h hc-name=%s", rd->r_hcname); + hcport_print(rd, linebuf, sizeof (linebuf)); + + if (linebuf[0] != '\0') + (void) fprintf(fp, ",hc-port=%s", linebuf); + (void) fprintf(fp, " "); + } + + conndrain = rd->r_conndrain; + nat_timeout = rd->r_nat_timeout; + sticky_timeout = rd->r_sticky_timeout; + if (conndrain != 0 || nat_timeout != 0 || sticky_timeout != 0) { + int cnt = 0; + + (void) fprintf(fp, "-t "); + if (conndrain != 0) { + cnt++; + (void) fprintf(fp, "conn-drain=%u", conndrain); + } + if (nat_timeout != 0) { + if (cnt > 0) + (void) fprintf(fp, ","); + cnt++; + (void) fprintf(fp, "nat-timeout=%u", nat_timeout); + } + if (sticky_timeout != 0) { + if (cnt > 0) + (void) fprintf(fp, ","); + (void) fprintf(fp, "persist-timeout=%u", + sticky_timeout); + } + (void) fprintf(fp, " "); + } + + if (fprintf(fp, "-o servergroup=%s %s\n", rd->r_sgname, rd->r_name) + < 0 || fflush(fp) == EOF) + return (ILB_STATUS_WRITE); + + return (ILB_STATUS_OK); +} + +ilbadm_status_t +ilbadm_export_rules(ilb_handle_t h, FILE *fp) +{ + ilb_status_t rclib; + ilbadm_status_t rc = ILBADM_OK; + ilbadm_rl_exp_arg_t arg; + + arg.fp = fp; + + rclib = ilb_walk_rules(h, ilbadm_export_rl, NULL, (void *)&arg); + if (rclib != ILB_STATUS_OK) + rc = ILBADM_LIBERR; + return (rc); +} diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_sg.c b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_sg.c new file mode 100644 index 0000000000..d7009bf811 --- /dev/null +++ b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_sg.c @@ -0,0 +1,837 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <strings.h> +#include <stddef.h> +#include <assert.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <sys/list.h> +#include <ofmt.h> +#include <libilb.h> +#include "ilbadm.h" + +static ilbadm_key_name_t servrange_keys[] = { + {ILB_KEY_SERVER, "server", "servers"}, + {ILB_KEY_SERVRANGE, "server", "servers"}, + {ILB_KEY_BAD, "", ""} +}; + +static ilbadm_key_name_t serverID_keys[] = { + {ILB_KEY_SERVERID, "server", ""}, + {ILB_KEY_BAD, "", ""} +}; + +typedef struct sg_export_arg { + FILE *fp; + ilbadm_sgroup_t *sg; +} sg_export_arg_t; + +typedef struct arg_struct { + int flags; + char *o_str; + ofmt_field_t *o_fields; + ofmt_handle_t oh; +} list_arg_t; + +typedef struct sg_srv_o_struct { + char *sgname; + ilb_server_data_t *sd; +} sg_srv_o_arg_t; + +static ofmt_cb_t of_sgname; +static ofmt_cb_t of_srvID; +static ofmt_cb_t of_port; +static ofmt_cb_t of_ip; + +static ofmt_field_t sgfields_v4[] = { + {"SGNAME", ILB_SGNAME_SZ, 0, of_sgname}, + {"SERVERID", ILB_NAMESZ, 0, of_srvID}, + {"MINPORT", 8, 0, of_port}, + {"MAXPORT", 8, 1, of_port}, + {"IP_ADDRESS", 15, 0, of_ip}, + {NULL, 0, 0, NULL} +}; +static ofmt_field_t sgfields_v6[] = { + {"SGNAME", ILB_SGNAME_SZ, 0, of_sgname}, + {"SERVERID", ILB_NAMESZ, 0, of_srvID}, + {"MINPORT", 8, 0, of_port}, + {"MAXPORT", 8, 1, of_port}, + {"IP_ADDRESS", 39, 0, of_ip}, + {NULL, 0, 0, NULL} +}; + +#define MAXCOLS 80 /* make flexible? */ + +extern int optind, optopt, opterr; +extern char *optarg; + +static boolean_t +of_sgname(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + sg_srv_o_arg_t *l = (sg_srv_o_arg_t *)of_arg->ofmt_cbarg; + + (void) strlcpy(buf, l->sgname, bufsize); + return (B_TRUE); +} + +static boolean_t +of_srvID(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + sg_srv_o_arg_t *l = (sg_srv_o_arg_t *)of_arg->ofmt_cbarg; + + (void) strlcpy(buf, l->sd->sd_srvID, bufsize); + return (B_TRUE); +} + +static boolean_t +of_port(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + sg_srv_o_arg_t *l = (sg_srv_o_arg_t *)of_arg->ofmt_cbarg; + int port; + + if (of_arg->ofmt_id == 0) { + port = ntohs(l->sd->sd_minport); + if (port == 0) + *buf = '\0'; + else + (void) snprintf(buf, bufsize, "%d", port); + } else { + port = ntohs(l->sd->sd_maxport); + if (port == 0) + *buf = '\0'; + else + (void) snprintf(buf, bufsize, "%d", port); + } + return (B_TRUE); +} + +static boolean_t +of_ip(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + sg_srv_o_arg_t *l = (sg_srv_o_arg_t *)of_arg->ofmt_cbarg; + + ip2str(&l->sd->sd_addr, buf, bufsize, V6_ADDRONLY); + return (B_TRUE); +} + +ilbadm_status_t +i_list_sg_srv_ofmt(char *sgname, ilb_server_data_t *sd, void *arg) +{ + list_arg_t *larg = (list_arg_t *)arg; + sg_srv_o_arg_t line_arg; + + line_arg.sgname = sgname; + line_arg.sd = sd; + ofmt_print(larg->oh, &line_arg); + return (ILBADM_OK); +} + +/* + * This function is always called via ilb_walk_servergroups() + * and so must return libilb errors. + * That's why we need to retain currently unused "h" argument + */ +/* ARGSUSED */ +static ilb_status_t +ilbadm_list_sg_srv(ilb_handle_t h, ilb_server_data_t *sd, const char *sgname, + void *arg) +{ + char ip_str[2*INET6_ADDRSTRLEN + 3] = ""; + char port_str[INET6_ADDRSTRLEN]; + list_arg_t *larg = (list_arg_t *)arg; + ofmt_status_t oerr; + int oflags = 0; + int ocols = MAXCOLS; + int h_minport, h_maxport; + static ofmt_handle_t oh = (ofmt_handle_t)NULL; + ofmt_field_t *ofp; + + if (larg->o_str != NULL) { + if (oh == NULL) { + if (sd->sd_addr.ia_af == AF_INET) + ofp = sgfields_v6; + else + ofp = sgfields_v4; + + if (larg->flags & ILBADM_LIST_PARSE) + oflags |= OFMT_PARSABLE; + + oerr = ofmt_open(larg->o_str, ofp, oflags, ocols, &oh); + if (oerr != OFMT_SUCCESS) { + char e[80]; + + ilbadm_err(gettext("ofmt_open failed: %s"), + ofmt_strerror(oh, oerr, e, sizeof (e))); + return (ILB_STATUS_GENERIC); + } + larg->oh = oh; + } + + + (void) i_list_sg_srv_ofmt((char *)sgname, sd, arg); + return (ILB_STATUS_OK); + } + + ip2str(&sd->sd_addr, ip_str, sizeof (ip_str), 0); + + h_minport = ntohs(sd->sd_minport); + h_maxport = ntohs(sd->sd_maxport); + if (h_minport == 0) + *port_str = '\0'; + else if (h_maxport > h_minport) + (void) sprintf(port_str, ":%d-%d", h_minport, h_maxport); + else + (void) sprintf(port_str, ":%d", h_minport); + + (void) printf("%s: id:%s %s%s\n", sgname, + sd->sd_srvID?sd->sd_srvID:"(null)", ip_str, port_str); + return (ILB_STATUS_OK); +} + +ilb_status_t +ilbadm_list_sg(ilb_handle_t h, ilb_sg_data_t *sg, void *arg) +{ + if (sg->sgd_srvcount == 0) { + ilb_server_data_t tmp_srv; + + bzero(&tmp_srv, sizeof (tmp_srv)); + return (ilbadm_list_sg_srv(h, &tmp_srv, sg->sgd_name, arg)); + } + + return (ilb_walk_servers(h, ilbadm_list_sg_srv, sg->sgd_name, arg)); +} + +static char *def_fields = "SGNAME,SERVERID,MINPORT,MAXPORT,IP_ADDRESS"; + +/* ARGSUSED */ +ilbadm_status_t +ilbadm_show_servergroups(int argc, char *argv[]) +{ + ilb_handle_t h = ILB_INVALID_HANDLE; + ilb_status_t rclib = ILB_STATUS_OK; + ilbadm_status_t rc = ILBADM_OK; + int c; + char optstr[] = ":po:"; + + boolean_t o_opt = B_FALSE, p_opt = B_FALSE; + list_arg_t larg = {0, def_fields, NULL, NULL}; + + while ((c = getopt(argc, argv, optstr)) != -1) { + switch ((char)c) { + case 'p': p_opt = B_TRUE; + larg.flags |= ILBADM_LIST_PARSE; + break; + case 'o': larg.o_str = optarg; + o_opt = B_TRUE; + break; + case ':': ilbadm_err(gettext("missing option argument" + " for %c"), (char)optopt); + rc = ILBADM_LIBERR; + goto out; + /* not reached */ + break; + default: unknown_opt(argv, optind-1); + /* not reached */ + break; + } + } + + if (p_opt && !o_opt) { + ilbadm_err(gettext("option -p requires -o")); + exit(1); + } + + if (p_opt && larg.o_str != NULL && + (strcasecmp(larg.o_str, "all") == 0)) { + ilbadm_err(gettext("option -p requires explicit field" + " names for -o")); + exit(1); + } + + rclib = ilb_open(&h); + if (rclib != ILB_STATUS_OK) + goto out; + + if (optind >= argc) { + rclib = ilb_walk_servergroups(h, ilbadm_list_sg, NULL, + (void*)&larg); + if (rclib != ILB_STATUS_OK) + rc = ILBADM_LIBERR; + } else { + while (optind < argc) { + rclib = ilb_walk_servergroups(h, ilbadm_list_sg, + argv[optind++], (void*)&larg); + if (rclib != ILB_STATUS_OK) { + rc = ILBADM_LIBERR; + break; + } + } + } + + if (larg.oh != NULL) + ofmt_close(larg.oh); +out: + if (h != ILB_INVALID_HANDLE) + (void) ilb_close(h); + + if (rclib != ILB_STATUS_OK) { + /* + * The show function returns ILB_STATUS_GENERIC after printing + * out an error message. So we don't need to print it again. + */ + if (rclib != ILB_STATUS_GENERIC) + ilbadm_err(ilb_errstr(rclib)); + rc = ILBADM_LIBERR; + } + + return (rc); +} + +ilbadm_servnode_t * +i_new_sg_elem(ilbadm_sgroup_t *sgp) +{ + ilbadm_servnode_t *s; + + s = (ilbadm_servnode_t *)calloc(sizeof (*s), 1); + if (s != NULL) { + list_insert_tail(&sgp->sg_serv_list, s); + sgp->sg_count++; + } + return (s); +} + +static ilbadm_status_t +i_parse_servrange_list(char *arg, ilbadm_sgroup_t *sgp) +{ + ilbadm_status_t rc; + int count; + + rc = i_parse_optstring(arg, (void *) sgp, servrange_keys, + OPT_VALUE_LIST|OPT_IP_RANGE|OPT_PORTS, &count); + return (rc); +} + +static ilbadm_status_t +i_parse_serverIDs(char *arg, ilbadm_sgroup_t *sgp) +{ + ilbadm_status_t rc; + int count; + + rc = i_parse_optstring(arg, (void *) sgp, serverID_keys, + OPT_VALUE_LIST|OPT_PORTS, &count); + return (rc); +} + +static ilbadm_status_t +i_mod_sg(ilb_handle_t h, ilbadm_sgroup_t *sgp, ilbadm_cmd_t cmd, + int flags) +{ + ilbadm_servnode_t *sn; + ilb_server_data_t *srv; + ilb_status_t rclib = ILB_STATUS_OK; + ilbadm_status_t rc = ILBADM_OK; + + if (h == ILB_INVALID_HANDLE && cmd != cmd_enable_server && + cmd != cmd_disable_server) + return (ILBADM_LIBERR); + + sn = list_head(&sgp->sg_serv_list); + while (sn != NULL) { + srv = &sn->s_spec; + + srv->sd_flags |= flags; + if (cmd == cmd_create_sg || cmd == cmd_add_srv) { + rclib = ilb_add_server_to_group(h, sgp->sg_name, + srv); + if (rclib != ILB_STATUS_OK) { + char buf[INET6_ADDRSTRLEN + 1]; + + rc = ILBADM_LIBERR; + ip2str(&srv->sd_addr, buf, sizeof (buf), + V6_ADDRONLY); + ilbadm_err(gettext("cannot add %s to %s: %s"), + buf, sgp->sg_name, ilb_errstr(rclib)); + /* if we created the SG, we bail out */ + if (cmd == cmd_create_sg) + return (rc); + } + } else { + assert(cmd == cmd_rem_srv); + rclib = ilb_rem_server_from_group(h, sgp->sg_name, + srv); + /* if we fail, we tell user and continue */ + if (rclib != ILB_STATUS_OK) { + rc = ILBADM_LIBERR; + ilbadm_err( + gettext("cannot remove %s from %s: %s"), + srv->sd_srvID, sgp->sg_name, + ilb_errstr(rclib)); + } + } + + /* + * list_next returns NULL instead of cycling back to head + * so we don't have to check for list_head explicitly. + */ + sn = list_next(&sgp->sg_serv_list, sn); + }; + + return (rc); +} + +static void +i_ilbadm_alloc_sgroup(ilbadm_sgroup_t **sgp) +{ + ilbadm_sgroup_t *sg; + + *sgp = sg = (ilbadm_sgroup_t *)calloc(sizeof (*sg), 1); + if (sg == NULL) + return; + list_create(&sg->sg_serv_list, sizeof (ilbadm_servnode_t), + offsetof(ilbadm_servnode_t, s_link)); +} + +static void +i_ilbadm_free_sgroup(ilbadm_sgroup_t *sg) +{ + ilbadm_servnode_t *s; + + while ((s = list_remove_head(&sg->sg_serv_list)) != NULL) + free(s); + + list_destroy(&sg->sg_serv_list); +} + +ilbadm_status_t +ilbadm_create_servergroup(int argc, char *argv[]) +{ + ilb_handle_t h = ILB_INVALID_HANDLE; + ilb_status_t rclib = ILB_STATUS_OK; + ilbadm_status_t rc = ILBADM_OK; + ilbadm_sgroup_t *sg; + int c; + int flags = 0; + + i_ilbadm_alloc_sgroup(&sg); + + while ((c = getopt(argc, argv, ":s:")) != -1) { + switch ((char)c) { + case 's': + rc = i_parse_servrange_list(optarg, sg); + break; + case ':': + ilbadm_err(gettext("missing option-argument for" + " %c"), (char)optopt); + rc = ILBADM_LIBERR; + break; + case '?': + default: + unknown_opt(argv, optind-1); + /* not reached */ + break; + } + + if (rc != ILBADM_OK) + goto out; + } + + if (optind >= argc) { + ilbadm_err(gettext("missing mandatory arguments - please refer" + " to 'create-servergroup' subcommand" + " description in ilbadm(1M)")); + rc = ILBADM_LIBERR; + goto out; + } + + if (strlen(argv[optind]) > ILB_SGNAME_SZ - 1) { + ilbadm_err(gettext("servergroup name %s is too long -" + " must not exceed %d chars"), argv[optind], + ILB_SGNAME_SZ - 1); + rc = ILBADM_LIBERR; + goto out; + } + + sg->sg_name = argv[optind]; + + rclib = ilb_open(&h); + if (rclib != ILB_STATUS_OK) + goto out; + + rclib = ilb_create_servergroup(h, sg->sg_name); + if (rclib != ILB_STATUS_OK) + goto out; + + /* we create a servergroup with all servers enabled */ + ILB_SET_ENABLED(flags); + rc = i_mod_sg(h, sg, cmd_create_sg, flags); + + if (rc != ILBADM_OK) + (void) ilb_destroy_servergroup(h, sg->sg_name); + +out: + i_ilbadm_free_sgroup(sg); + if (h != ILB_INVALID_HANDLE) + (void) ilb_close(h); + + if (rclib != ILB_STATUS_OK) { + ilbadm_err(ilb_errstr(rclib)); + rc = ILBADM_LIBERR; + } + if ((rc != ILBADM_OK) && (rc != ILBADM_LIBERR)) + ilbadm_err(ilbadm_errstr(rc)); + + return (rc); +} + +ilbadm_status_t +ilbadm_add_server_to_group(int argc, char **argv) +{ + ilb_handle_t h = ILB_INVALID_HANDLE; + ilb_status_t rclib = ILB_STATUS_OK; + ilbadm_status_t rc = ILBADM_OK; + ilbadm_sgroup_t *sg; + int c; + int flags = 0; + + i_ilbadm_alloc_sgroup(&sg); + + while ((c = getopt(argc, argv, ":s:")) != -1) { + switch ((char)c) { + case 's': + rc = i_parse_servrange_list(optarg, sg); + break; + case ':': + ilbadm_err(gettext("missing option-argument for" + " %c"), (char)optopt); + rc = ILBADM_LIBERR; + break; + case '?': + default: unknown_opt(argv, optind-1); + /* not reached */ + break; + } + + if (rc != ILBADM_OK) + goto out; + } + + if (optind >= argc) { + ilbadm_err(gettext("missing mandatory arguments - please refer" + " to 'add-server' subcommand description in ilbadm(1M)")); + rc = ILBADM_LIBERR; + goto out; + } + + sg->sg_name = argv[optind]; + + rclib = ilb_open(&h); + if (rclib != ILB_STATUS_OK) + goto out; + + /* A server is added enabled */ + ILB_SET_ENABLED(flags); + rc = i_mod_sg(h, sg, cmd_add_srv, flags); +out: + i_ilbadm_free_sgroup(sg); + if (h != ILB_INVALID_HANDLE) + (void) ilb_close(h); + + if ((rc != ILBADM_OK) && (rc != ILBADM_LIBERR)) + ilbadm_err(ilbadm_errstr(rc)); + return (rc); +} + +/* ARGSUSED */ +static ilbadm_status_t +ilbadm_Xable_server(int argc, char *argv[], ilbadm_cmd_t cmd) +{ + ilb_handle_t h = ILB_INVALID_HANDLE; + ilbadm_status_t rc = ILBADM_OK; + ilb_status_t rclib = ILB_STATUS_OK; + int i; + + if (argc < 2) { + ilbadm_err(gettext("missing required argument" + " (server specification)")); + rc = ILBADM_LIBERR; + goto out; + } + + rclib = ilb_open(&h); + if (rclib != ILB_STATUS_OK) + goto out; + + /* enable-server and disable-server only accepts serverids */ + for (i = 1; i < argc && rclib == ILB_STATUS_OK; i++) { + ilb_server_data_t srv; + + if (argv[i][0] != ILB_SRVID_PREFIX) { + rc = ILBADM_INVAL_SRVID; + goto out; + } + + bzero(&srv, sizeof (srv)); + /* to do: check length */ + (void) strlcpy(srv.sd_srvID, argv[i], sizeof (srv.sd_srvID)); + switch (cmd) { + case cmd_enable_server: + rclib = ilb_enable_server(h, &srv, NULL); + break; + case cmd_disable_server: + rclib = ilb_disable_server(h, &srv, NULL); + break; + } + + /* if we can't find a given server ID, just plough on */ + if (rclib == ILB_STATUS_ENOENT) { + const char *msg = ilb_errstr(rclib); + + rc = ILBADM_LIBERR; + ilbadm_err("%s: %s", msg, argv[i]); + rclib = ILB_STATUS_OK; + continue; + } + if (rclib != ILB_STATUS_OK) + break; + } +out: + if (h != ILB_INVALID_HANDLE) + (void) ilb_close(h); + + if (rclib != ILB_STATUS_OK) { + ilbadm_err(ilb_errstr(rclib)); + rc = ILBADM_LIBERR; + } + + if ((rc != ILBADM_OK) && (rc != ILBADM_LIBERR)) + ilbadm_err(ilbadm_errstr(rc)); + return (rc); +} + +ilbadm_status_t +ilbadm_disable_server(int argc, char *argv[]) +{ + return (ilbadm_Xable_server(argc, argv, cmd_disable_server)); +} + +ilbadm_status_t +ilbadm_enable_server(int argc, char *argv[]) +{ + return (ilbadm_Xable_server(argc, argv, cmd_enable_server)); +} + +/* ARGSUSED */ +ilbadm_status_t +ilbadm_rem_server_from_group(int argc, char *argv[]) +{ + ilb_handle_t h = ILB_INVALID_HANDLE; + ilb_status_t rclib = ILB_STATUS_OK; + ilbadm_status_t rc = ILBADM_OK; + ilbadm_sgroup_t *sg; + int c; + + i_ilbadm_alloc_sgroup(&sg); + + while ((c = getopt(argc, argv, ":s:")) != -1) { + switch ((char)c) { + case 's': + rc = i_parse_serverIDs(optarg, sg); + break; + case ':': + ilbadm_err(gettext("missing option-argument for" + " %c"), (char)optopt); + rc = ILBADM_LIBERR; + break; + case '?': + default: unknown_opt(argv, optind-1); + /* not reached */ + break; + } + if (rc != ILBADM_OK) + goto out; + } + + /* we need servergroup name and at least one serverID to remove */ + if (optind >= argc || sg->sg_count == 0) { + rc = ILBADM_ENOOPTION; + goto out; + } + + sg->sg_name = argv[optind]; + + rclib = ilb_open(&h); + if (rclib != ILB_STATUS_OK) + goto out; + + rc = i_mod_sg(h, sg, cmd_rem_srv, 0); +out: + i_ilbadm_free_sgroup(sg); + + if (h != ILB_INVALID_HANDLE) + (void) ilb_close(h); + if ((rc != ILBADM_OK) && (rc != ILBADM_LIBERR)) + ilbadm_err(ilbadm_errstr(rc)); + return (rc); +} + +ilbadm_status_t +ilbadm_destroy_servergroup(int argc, char *argv[]) +{ + ilb_handle_t h = ILB_INVALID_HANDLE; + ilb_status_t rclib = ILB_STATUS_OK; + ilbadm_status_t rc = ILBADM_OK; + char *sgname; + + if (argc != 2) { + ilbadm_err(gettext("usage:ilbadm" + " delete-servergroup groupname")); + rc = ILBADM_LIBERR; + goto out; + } + + sgname = argv[1]; + + rclib = ilb_open(&h); + if (rclib != ILB_STATUS_OK) + goto out; + + rclib = ilb_destroy_servergroup(h, sgname); +out: + if (h != ILB_INVALID_HANDLE) + (void) ilb_close(h); + + if (rclib != ILB_STATUS_OK) { + ilbadm_err(ilb_errstr(rclib)); + rc = ILBADM_LIBERR; + } + + return (rc); +} + +#define BUFSZ 1024 + +static int +export_srv_spec(ilb_server_data_t *srv, char *buf, const int bufsize) +{ + int len = 0, bufsz = (int)bufsize; + + ip2str(&srv->sd_addr, buf, bufsz, 0); + + len += strlen(buf); + bufsz -= len; + + if (srv->sd_minport != 0) { + in_port_t h_min, h_max; + int inc; + + h_min = ntohs(srv->sd_minport); + h_max = ntohs(srv->sd_maxport); + + /* to do: if service name was given, print that, not number */ + if (h_max <= h_min) + inc = snprintf(buf+len, bufsz, ":%d", h_min); + else + inc = snprintf(buf+len, bufsz, ":%d-%d", h_min, h_max); + + if (inc > bufsz) /* too little space */ + return (-1); + len += inc; + } + + return (len); +} + + +/* + * this is called by ilb_walk_servers(), therefore we return ilb_status_t + * not ilbadm_status, and retain an unused function argument + */ +/* ARGSUSED */ +ilb_status_t +ilbadm_export_a_srv(ilb_handle_t h, ilb_server_data_t *srv, const char *sgname, + void *arg) +{ + sg_export_arg_t *larg = (sg_export_arg_t *)arg; + FILE *fp = larg->fp; + char linebuf[BUFSZ]; /* XXXms make that dynamic */ + int sz = BUFSZ; + + if (export_srv_spec(srv, linebuf, sz) == -1) + return (ILB_STATUS_OK); + + (void) fprintf(fp, "add-server -s server="); + + (void) fprintf(fp, "%s %s\n", linebuf, sgname); + return (ILB_STATUS_OK); +} + +ilb_status_t +ilbadm_export_sg(ilb_handle_t h, ilb_sg_data_t *sg, void *arg) +{ + ilb_status_t rc = ILB_STATUS_OK; + sg_export_arg_t *larg = (sg_export_arg_t *)arg; + FILE *fp = larg->fp; + + (void) fprintf(fp, "create-servergroup %s\n", sg->sgd_name); + if (sg->sgd_srvcount == 0) + return (ILB_STATUS_OK); + + rc = ilb_walk_servers(h, ilbadm_export_a_srv, sg->sgd_name, arg); + if (rc != ILB_STATUS_OK) + goto out; + + if (fflush(fp) == EOF) + rc = ILB_STATUS_WRITE; + +out: + return (rc); +} + +ilbadm_status_t +ilbadm_export_servergroups(ilb_handle_t h, FILE *fp) +{ + ilb_status_t rclib = ILB_STATUS_OK; + ilbadm_status_t rc = ILBADM_OK; + sg_export_arg_t arg; + + arg.fp = fp; + arg.sg = NULL; + + rclib = ilb_walk_servergroups(h, ilbadm_export_sg, NULL, (void *)&arg); + if (rclib != ILB_STATUS_OK) { + ilbadm_err(ilb_errstr(rclib)); + rc = ILBADM_LIBERR; + } + + return (rc); +} diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_stats.c b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_stats.c new file mode 100644 index 0000000000..648ec1409e --- /dev/null +++ b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_stats.c @@ -0,0 +1,1140 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <strings.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/sysmacros.h> +#include <sys/note.h> +#include <fcntl.h> +#include <errno.h> +#include <assert.h> +#include <libgen.h> +#include <kstat.h> +#include <ofmt.h> +#include <libilb.h> +#include "ilbadm.h" + +#define ILBST_TIMESTAMP_HEADER 0x01 /* a timestamp w. every header */ +#define ILBST_DELTA_INTERVAL 0x02 /* delta over specified interval */ +#define ILBST_ABS_NUMBERS 0x04 /* print absolute numbers, no d's */ +#define ILBST_ITEMIZE 0x08 /* itemize */ +#define ILBST_VERBOSE 0x10 /* verbose error info */ + +#define ILBST_OLD_VALUES 0x20 /* for internal processing */ +#define ILBST_RULES_CHANGED 0x40 + +typedef struct { + char is_name[KSTAT_STRLEN]; + uint64_t is_value; +} ilbst_stat_t; + +static ilbst_stat_t rulestats[] = { + {"num_servers", 0}, + {"bytes_not_processed", 0}, + {"pkt_not_processed", 0}, + {"bytes_dropped", 0}, + {"pkt_dropped", 0}, + {"nomem_bytes_dropped", 0}, + {"nomem_pkt_dropped", 0}, + {"noport_bytes_dropped", 0}, + {"noport_pkt_dropped", 0}, + {"icmp_echo_processed", 0}, + {"icmp_dropped", 0}, + {"icmp_too_big_processed", 0}, + {"icmp_too_big_dropped", 0} +}; + +/* indices into array above, to avoid searching */ +#define RLSTA_NUM_SRV 0 +#define RLSTA_BYTES_U 1 +#define RLSTA_PKT_U 2 +#define RLSTA_BYTES_D 3 +#define RLSTA_PKT_D 4 +#define RLSTA_NOMEMBYTES_D 5 +#define RLSTA_NOMEMPKT_D 6 +#define RLSTA_NOPORTBYTES_D 7 +#define RLSTA_NOPORTPKT_D 8 +#define RLSTA_ICMP_P 9 +#define RLSTA_ICMP_D 10 +#define RLSTA_ICMP2BIG_P 11 +#define RLSTA_ICMP2BIG_D 12 + +static ilbst_stat_t servstats[] = { + {"bytes_processed", 0}, + {"pkt_processed", 0} +}; +/* indices into array above, to avoid searching */ +#define SRVST_BYTES_P 0 +#define SRVST_PKT_P 1 + +/* values used for of_* commands as id */ +#define ILBST_PKT_P 0 +#define ILBST_BYTES_P 1 +#define ILBST_PKT_U 2 +#define ILBST_BYTES_U 3 +#define ILBST_PKT_D 4 +#define ILBST_BYTES_D 5 +#define ILBST_ICMP_P 6 +#define ILBST_ICMP_D 7 +#define ILBST_ICMP2BIG_P 8 +#define ILBST_ICMP2BIG_D 9 +#define ILBST_NOMEMP_D 10 +#define ILBST_NOPORTP_D 11 +#define ILBST_NOMEMB_D 12 +#define ILBST_NOPORTB_D 13 + +#define ILBST_ITEMIZE_SNAME 97 +#define ILBST_ITEMIZE_RNAME 98 +#define ILBST_TIMESTAMP 99 + +/* approx field widths */ +#define ILBST_PKTCTR_W 8 +#define ILBST_BYTECTR_W 10 +#define ILBST_TIME_W 15 + +static boolean_t of_rule_stats(ofmt_arg_t *, char *, uint_t); +static boolean_t of_server_stats(ofmt_arg_t *, char *, uint_t); +static boolean_t of_itemize_stats(ofmt_arg_t *, char *, uint_t); +static boolean_t of_timestamp(ofmt_arg_t *, char *, uint_t); + +static ofmt_field_t stat_itemize_fields[] = { + {"RULENAME", ILB_NAMESZ, ILBST_ITEMIZE_RNAME, of_itemize_stats}, + {"SERVERNAME", ILB_NAMESZ, ILBST_ITEMIZE_SNAME, of_itemize_stats}, + {"PKT_P", ILBST_PKTCTR_W, ILBST_PKT_P, of_itemize_stats}, + {"BYTES_P", ILBST_BYTECTR_W, ILBST_BYTES_P, of_itemize_stats}, + {"TIME", ILBST_TIME_W, ILBST_TIMESTAMP, of_timestamp}, + {NULL, 0, 0, NULL} +}; +static ofmt_field_t stat_stdfields[] = { + {"PKT_P", ILBST_PKTCTR_W, ILBST_PKT_P, of_server_stats}, + {"BYTES_P", ILBST_BYTECTR_W, ILBST_BYTES_P, of_server_stats}, + {"PKT_U", ILBST_PKTCTR_W, ILBST_PKT_U, of_rule_stats}, + {"BYTES_U", ILBST_BYTECTR_W, ILBST_BYTES_U, of_rule_stats}, + {"PKT_D", ILBST_PKTCTR_W, ILBST_PKT_D, of_rule_stats}, + {"BYTES_D", ILBST_BYTECTR_W, ILBST_BYTES_D, of_rule_stats}, + {"ICMP_P", ILBST_PKTCTR_W, ILBST_ICMP_P, of_rule_stats}, + {"ICMP_D", ILBST_PKTCTR_W, ILBST_ICMP_D, of_rule_stats}, + {"ICMP2BIG_P", 11, ILBST_ICMP2BIG_P, of_rule_stats}, + {"ICMP2BIG_D", 11, ILBST_ICMP2BIG_D, of_rule_stats}, + {"NOMEMP_D", ILBST_PKTCTR_W, ILBST_NOMEMP_D, of_rule_stats}, + {"NOPORTP_D", ILBST_PKTCTR_W, ILBST_NOPORTP_D, of_rule_stats}, + {"NOMEMB_D", ILBST_PKTCTR_W, ILBST_NOMEMB_D, of_rule_stats}, + {"NOPORTB_D", ILBST_PKTCTR_W, ILBST_NOPORTB_D, of_rule_stats}, + {"TIME", ILBST_TIME_W, ILBST_TIMESTAMP, of_timestamp}, + {NULL, 0, 0, NULL} +}; + +static char stat_stdhdrs[] = "PKT_P,BYTES_P,PKT_U,BYTES_U,PKT_D,BYTES_D"; +static char stat_stdv_hdrs[] = "PKT_P,BYTES_P,PKT_U,BYTES_U,PKT_D,BYTES_D," + "ICMP_P,ICMP_D,ICMP2BIG_P,ICMP2BIG_D,NOMEMP_D,NOPORTP_D"; +static char stat_itemize_rule_hdrs[] = "SERVERNAME,PKT_P,BYTES_P"; +static char stat_itemize_server_hdrs[] = "RULENAME,PKT_P,BYTES_P"; + +#define RSTAT_SZ (sizeof (rulestats)/sizeof (rulestats[0])) +#define SSTAT_SZ (sizeof (servstats)/sizeof (servstats[0])) + +typedef struct { + char isd_servername[KSTAT_STRLEN]; /* serverID */ + ilbst_stat_t isd_serverstats[SSTAT_SZ]; + hrtime_t isd_crtime; /* save for comparison purpose */ +} ilbst_srv_desc_t; + +/* + * this data structure stores statistics for a rule - both an old set + * and a current/new set. we use pointers to the actual stores and switch + * the pointers for every round. old_is_old in ilbst_arg_t indicates + * which pointer points to the "old" data struct (ie, if true, _o pointer + * points to old) + */ +typedef struct { + char ird_rulename[KSTAT_STRLEN]; + int ird_num_servers; + int ird_num_servers_o; + int ird_srv_ind; + hrtime_t ird_crtime; /* save for comparison */ + hrtime_t ird_crtime_o; /* save for comparison */ + ilbst_srv_desc_t *ird_srvlist; + ilbst_srv_desc_t *ird_srvlist_o; + ilbst_stat_t ird_rstats[RSTAT_SZ]; + ilbst_stat_t ird_rstats_o[RSTAT_SZ]; + ilbst_stat_t *ird_rulestats; + ilbst_stat_t *ird_rulestats_o; +} ilbst_rule_desc_t; + +/* + * overall "container" for information pertaining to statistics, and + * how to display them. + */ +typedef struct { + int ilbst_flags; + /* fields representing user input */ + char *ilbst_rulename; /* optional */ + char *ilbst_server; /* optional */ + int ilbst_interval; + int ilbst_count; + /* "internal" fields for data and data presentation */ + ofmt_handle_t ilbst_oh; + boolean_t ilbst_old_is_old; + ilbst_rule_desc_t *ilbst_rlist; + int ilbst_rcount; /* current list count */ + int ilbst_rcount_prev; /* prev (different) count */ + int ilbst_rlist_sz; /* number of alloc'ed rules */ + int ilbst_rule_index; /* for itemizes display */ +} ilbst_arg_t; + +/* ARGSUSED */ +static boolean_t +of_timestamp(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + time_t now; + struct tm *now_tm; + + now = time(NULL); + now_tm = localtime(&now); + + (void) strftime(buf, bufsize, "%F:%H.%M.%S", now_tm); + return (B_TRUE); +} + +static boolean_t +i_sum_per_rule_processed(ilbst_rule_desc_t *rp, uint64_t *resp, int index, + int flags) +{ + int i, num_servers; + ilbst_srv_desc_t *srv, *o_srv, *n_srv; + uint64_t res = 0; + boolean_t valid = B_TRUE; + boolean_t old = flags & ILBST_OLD_VALUES; + boolean_t check_valid; + + /* if we do abs. numbers, we never look at the _o fields */ + assert((old && (flags & ILBST_ABS_NUMBERS)) == B_FALSE); + + /* we only check for validity under certain conditions */ + check_valid = !(old || (flags & ILBST_ABS_NUMBERS)); + + if (check_valid && rp->ird_num_servers != rp->ird_num_servers_o) + valid = B_FALSE; + + num_servers = old ? rp->ird_num_servers_o : rp->ird_num_servers; + + for (i = 0; i < num_servers; i++) { + n_srv = &rp->ird_srvlist[i]; + o_srv = &rp->ird_srvlist_o[i]; + + if (old) + srv = o_srv; + else + srv = n_srv; + + res += srv->isd_serverstats[index].is_value; + /* + * if creation times don't match, comparison is wrong; if + * if we already know something is invalid, we don't + * need to compare again. + */ + if (check_valid && valid == B_TRUE && + o_srv->isd_crtime != n_srv->isd_crtime) { + valid = B_FALSE; + break; + } + } + /* + * save the result even though it may be imprecise - let the + * caller decide what to do + */ + *resp = res; + + return (valid); +} + +typedef boolean_t (*sumfunc_t)(ilbst_rule_desc_t *, uint64_t *, int); + +static boolean_t +i_sum_per_rule_pkt_p(ilbst_rule_desc_t *rp, uint64_t *resp, int flags) +{ + return (i_sum_per_rule_processed(rp, resp, SRVST_PKT_P, flags)); +} + +static boolean_t +i_sum_per_rule_bytes_p(ilbst_rule_desc_t *rp, uint64_t *resp, int flags) +{ + return (i_sum_per_rule_processed(rp, resp, SRVST_BYTES_P, flags)); +} + +static boolean_t +of_server_stats(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + ilbst_arg_t *sta = (ilbst_arg_t *)of_arg->ofmt_cbarg; + uint64_t count = 0, val; + int i; + boolean_t valid = B_TRUE; + sumfunc_t sumfunc; + + switch (of_arg->ofmt_id) { + case ILBST_PKT_P: sumfunc = i_sum_per_rule_pkt_p; + break; + case ILBST_BYTES_P: sumfunc = i_sum_per_rule_bytes_p; + break; + } + + for (i = 0; i < sta->ilbst_rcount; i++) { + valid = sumfunc(&sta->ilbst_rlist[i], &val, sta->ilbst_flags); + if (!valid) + return (valid); + count += val; + } + + if ((sta->ilbst_flags & ILBST_ABS_NUMBERS) != 0) + goto out; + + for (i = 0; i < sta->ilbst_rcount; i++) { + (void) sumfunc(&sta->ilbst_rlist[i], &val, + sta->ilbst_flags | ILBST_OLD_VALUES); + count -= val; + } + +out: + /* + * normally, we print "change per second", which we calculate + * here. otherwise, we print "change over interval" + */ + if ((sta->ilbst_flags & (ILBST_DELTA_INTERVAL|ILBST_ABS_NUMBERS)) == 0) + count /= sta->ilbst_interval; + + (void) snprintf(buf, bufsize, "%llu", count); + return (B_TRUE); +} + +/* + * this function is called when user wants itemized stats of every + * server for a named rule, or vice vera. + * i_do_print sets sta->rule_index and the proper ird_srv_ind so + * we don't have to differentiate between these two cases here. + */ +static boolean_t +of_itemize_stats(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + ilbst_arg_t *sta = (ilbst_arg_t *)of_arg->ofmt_cbarg; + int stat_ind; + uint64_t count; + int rule_index = sta->ilbst_rule_index; + int srv_ind = sta->ilbst_rlist[rule_index].ird_srv_ind; + boolean_t ret = B_TRUE; + ilbst_srv_desc_t *srv, *osrv; + + srv = &sta->ilbst_rlist[rule_index].ird_srvlist[srv_ind]; + + switch (of_arg->ofmt_id) { + case ILBST_PKT_P: stat_ind = SRVST_PKT_P; + break; + case ILBST_BYTES_P: stat_ind = SRVST_BYTES_P; + break; + case ILBST_ITEMIZE_RNAME: + (void) snprintf(buf, bufsize, "%s", + sta->ilbst_rlist[rule_index].ird_rulename); + return (B_TRUE); + /* not reached */ + break; + case ILBST_ITEMIZE_SNAME: + (void) snprintf(buf, bufsize, "%s", srv->isd_servername); + return (B_TRUE); + /* not reached */ + break; + } + + count = srv->isd_serverstats[stat_ind].is_value; + + if ((sta->ilbst_flags & ILBST_ABS_NUMBERS) != 0) + goto out; + + osrv = &sta->ilbst_rlist[rule_index].ird_srvlist_o[srv_ind]; + if (srv->isd_crtime != osrv->isd_crtime) + ret = B_FALSE; + + count -= osrv->isd_serverstats[stat_ind].is_value; +out: + /* + * normally, we print "change per second", which we calculate + * here. otherwise, we print "change over interval" or absolute + * values. + */ + if ((sta->ilbst_flags & (ILBST_DELTA_INTERVAL|ILBST_ABS_NUMBERS)) == 0) + count /= sta->ilbst_interval; + + (void) snprintf(buf, bufsize, "%llu", count); + return (ret); + +} + +static boolean_t +of_rule_stats(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + ilbst_arg_t *sta = (ilbst_arg_t *)of_arg->ofmt_cbarg; + int i, ind; + uint64_t count = 0; + + switch (of_arg->ofmt_id) { + case ILBST_PKT_U: ind = RLSTA_PKT_U; + break; + case ILBST_BYTES_U: ind = RLSTA_BYTES_U; + break; + case ILBST_PKT_D: ind = RLSTA_PKT_D; + break; + case ILBST_BYTES_D: ind = RLSTA_BYTES_D; + break; + case ILBST_ICMP_P: ind = RLSTA_ICMP_P; + break; + case ILBST_ICMP_D: ind = RLSTA_ICMP_D; + break; + case ILBST_ICMP2BIG_P: ind = RLSTA_ICMP2BIG_P; + break; + case ILBST_ICMP2BIG_D: ind = RLSTA_ICMP2BIG_D; + break; + case ILBST_NOMEMP_D: ind = RLSTA_NOMEMPKT_D; + break; + case ILBST_NOPORTP_D: ind = RLSTA_NOPORTPKT_D; + break; + case ILBST_NOMEMB_D: ind = RLSTA_NOMEMBYTES_D; + break; + case ILBST_NOPORTB_D: ind = RLSTA_NOPORTBYTES_D; + break; + } + + for (i = 0; i < sta->ilbst_rcount; i++) + count += sta->ilbst_rlist[i].ird_rulestats[ind].is_value; + + if ((sta->ilbst_flags & ILBST_ABS_NUMBERS) != 0) + goto out; + + /* + * the purist approach: if we can't say 100% that what we + * calculate is correct, don't. + */ + if (sta->ilbst_flags & ILBST_RULES_CHANGED) + return (B_FALSE); + + for (i = 0; i < sta->ilbst_rcount; i++) { + if (sta->ilbst_rlist[i].ird_crtime_o != 0 && + sta->ilbst_rlist[i].ird_crtime != + sta->ilbst_rlist[i].ird_crtime_o) + return (B_FALSE); + + count -= sta->ilbst_rlist[i].ird_rulestats_o[ind].is_value; + } +out: + /* + * normally, we print "change per second", which we calculate + * here. otherwise, we print "change over interval" + */ + if ((sta->ilbst_flags & (ILBST_DELTA_INTERVAL|ILBST_ABS_NUMBERS)) == 0) + count /= sta->ilbst_interval; + + (void) snprintf(buf, bufsize, "%llu", count); + return (B_TRUE); +} + +/* + * Get the number of kstat instances. Note that when rules are being + * drained the number of kstats instances may be different than the + * kstat counter num_rules (ilb:0:global:num_rules"). + * + * Also there can be multiple instances of a rule in the following + * scenario: + * + * A rule named rule A has been deleted but remains in kstats because + * its undergoing connection draining. During this time, the user adds + * a new rule with the same name(rule A). In this case, there would + * be two kstats instances for rule A. Currently ilbadm's aggregate + * results will include data from both instances of rule A. In, + * future we should have ilbadm stats only consider the latest instance + * of the rule (ie only consider the the instance that corresponds + * to the rule that was just added). + * + */ +static int +i_get_num_kinstances(kstat_ctl_t *kctl) +{ + kstat_t *kp; + int num_instances = 0; /* nothing found, 0 rules */ + + for (kp = kctl->kc_chain; kp != NULL; kp = kp->ks_next) { + if (strncmp("rulestat", kp->ks_class, 8) == 0 && + strncmp("ilb", kp->ks_module, 3) == 0) { + num_instances++; + } + } + + return (num_instances); +} + + +/* + * since server stat's classname is made up of <rulename>-sstat, + * we walk the rule list to construct the comparison + * Return: pointer to rule whose name matches the class + * NULL if no match + */ +static ilbst_rule_desc_t * +match_2_rnames(char *class, ilbst_rule_desc_t *rlist, int rcount) +{ + int i; + char classname[KSTAT_STRLEN]; + + for (i = 0; i < rcount; i++) { + (void) snprintf(classname, sizeof (classname), "%s-sstat", + rlist[i].ird_rulename); + if (strncmp(classname, class, sizeof (classname)) == 0) + return (&rlist[i]); + } + return (NULL); +} + +static int +i_stat_index(kstat_named_t *knp, ilbst_stat_t *stats, int count) +{ + int i; + + for (i = 0; i < count; i++) { + if (strcasecmp(stats[i].is_name, knp->name) == 0) + return (i); + } + + return (-1); +} + +static void +i_copy_sstats(ilbst_srv_desc_t *sp, kstat_t *kp) +{ + kstat_named_t *knp; + int i, ind; + + knp = KSTAT_NAMED_PTR(kp); + for (i = 0; i < kp->ks_ndata; i++, knp++) { + ind = i_stat_index(knp, servstats, SSTAT_SZ); + if (ind == -1) + continue; + (void) strlcpy(sp->isd_serverstats[ind].is_name, knp->name, + sizeof (sp->isd_serverstats[ind].is_name)); + sp->isd_serverstats[ind].is_value = knp->value.ui64; + sp->isd_crtime = kp->ks_crtime; + } +} + + +static ilbadm_status_t +i_get_server_descs(ilbst_arg_t *sta, kstat_ctl_t *kctl) +{ + ilbadm_status_t rc = ILBADM_OK; + kstat_t *kp; + int i = -1; + ilbst_rule_desc_t *rp; + ilbst_rule_desc_t *rlist = sta->ilbst_rlist; + int rcount = sta->ilbst_rcount; + + /* + * find all "server" kstats, or the one specified in + * sta->server + */ + for (kp = kctl->kc_chain; kp != NULL; kp = kp->ks_next) { + if (strncmp("ilb", kp->ks_module, 3) != 0) + continue; + if (sta->ilbst_server != NULL && + strcasecmp(sta->ilbst_server, kp->ks_name) != 0) + continue; + rp = match_2_rnames(kp->ks_class, rlist, rcount); + if (rp == NULL) + continue; + + (void) kstat_read(kctl, kp, NULL); + i = rp->ird_srv_ind++; + + rc = ILBADM_OK; + /* + * This means that a server is added after we check last + * time... Just make the array bigger. + */ + if (i+1 > rp->ird_num_servers) { + ilbst_srv_desc_t *srvlist; + + if ((srvlist = realloc(rp->ird_srvlist, (i+1) * + sizeof (*srvlist))) == NULL) { + rc = ILBADM_ENOMEM; + break; + } + rp->ird_srvlist = srvlist; + rp->ird_num_servers = i; + } + + (void) strlcpy(rp->ird_srvlist[i].isd_servername, kp->ks_name, + sizeof (rp->ird_srvlist[i].isd_servername)); + i_copy_sstats(&rp->ird_srvlist[i], kp); + } + + for (i = 0; i < rcount; i++) + rlist[i].ird_srv_ind = 0; + + if (sta->ilbst_server != NULL && i == -1) + rc = ILBADM_ENOSERVER; + return (rc); +} + +static void +i_copy_rstats(ilbst_rule_desc_t *rp, kstat_t *kp) +{ + kstat_named_t *knp; + int i, ind; + + knp = KSTAT_NAMED_PTR(kp); + for (i = 0; i < kp->ks_ndata; i++, knp++) { + ind = i_stat_index(knp, rulestats, RSTAT_SZ); + if (ind == -1) + continue; + + (void) strlcpy(rp->ird_rulestats[ind].is_name, knp->name, + sizeof (rp->ird_rulestats[ind].is_name)); + rp->ird_rulestats[ind].is_value = knp->value.ui64; + } +} + +static void +i_set_rlstats_ptr(ilbst_rule_desc_t *rp, boolean_t old_is_old) +{ + if (old_is_old) { + rp->ird_rulestats = rp->ird_rstats; + rp->ird_rulestats_o = rp->ird_rstats_o; + } else { + rp->ird_rulestats = rp->ird_rstats_o; + rp->ird_rulestats_o = rp->ird_rstats; + } +} +/* + * this function walks the array of rules and switches pointer to old + * and new stats as well as serverlists. + */ +static void +i_swap_rl_pointers(ilbst_arg_t *sta, int rcount) +{ + int i, tmp_num; + ilbst_rule_desc_t *rlist = sta->ilbst_rlist; + ilbst_srv_desc_t *tmp_srv; + + for (i = 0; i < rcount; i++) { + /* swap srvlist pointers */ + tmp_srv = rlist[i].ird_srvlist; + rlist[i].ird_srvlist = rlist[i].ird_srvlist_o; + rlist[i].ird_srvlist_o = tmp_srv; + + /* + * swap server counts - we need the old one to + * save reallocation calls + */ + tmp_num = rlist[i].ird_num_servers_o; + rlist[i].ird_num_servers_o = rlist[i].ird_num_servers; + rlist[i].ird_num_servers = tmp_num; + + /* preserve creation time */ + rlist[i].ird_crtime_o = rlist[i].ird_crtime; + + i_set_rlstats_ptr(&rlist[i], sta->ilbst_old_is_old); + rlist[i].ird_srv_ind = 0; + } +} + +static void +i_init_rulelist(ilbst_arg_t *sta, int rcount) +{ + int i; + ilbst_rule_desc_t *rlist = sta->ilbst_rlist; + + for (i = 0; i < rcount; i++) { + rlist[i].ird_rulestats = rlist[i].ird_rstats; + rlist[i].ird_rulestats_o = rlist[i].ird_rstats_o; + rlist[i].ird_srv_ind = 0; + } +} + + +/* + * this function searches for kstats describing individual rules and + * saves name, # of servers, and the kstat_t * describing them (this is + * for sta->rulename == NULL); + * if sta->rulename != NULL, it names the rule we're looking for + * and this function will fill in the other data (like the all_rules case) + * Returns: ILBADM_ENORULE named rule not found + * ILBADM_ENOMEM no mem. available + */ +static ilbadm_status_t +i_get_rule_descs(ilbst_arg_t *sta, kstat_ctl_t *kctl) +{ + ilbadm_status_t rc = ILBADM_OK; + kstat_t *kp; + kstat_named_t *knp; + int i; + int num_servers; + ilbst_rule_desc_t *rlist = sta->ilbst_rlist; + int rcount = sta->ilbst_rcount; + + /* + * find all "rule" kstats, or the one specified in + * sta->ilbst_rulename. + */ + for (i = 0, kp = kctl->kc_chain; i < rcount && kp != NULL; + kp = kp->ks_next) { + if (strncmp("rulestat", kp->ks_class, 8) != 0 || + strncmp("ilb", kp->ks_module, 3) != 0) + continue; + + (void) kstat_read(kctl, kp, NULL); + + knp = kstat_data_lookup(kp, "num_servers"); + if (knp == NULL) { + ilbadm_err(gettext("kstat_data_lookup() failed: %s"), + strerror(errno)); + rc = ILBADM_LIBERR; + break; + } + if (sta->ilbst_rulename != NULL) { + if (strcasecmp(kp->ks_name, sta->ilbst_rulename) + != 0) + continue; + } + (void) strlcpy(rlist[i].ird_rulename, kp->ks_name, + sizeof (rlist[i].ird_rulename)); + + /* only alloc the space we need, set counter here ... */ + if (sta->ilbst_server != NULL) + num_servers = 1; + else + num_servers = (int)knp->value.ui64; + + /* ... furthermore, only reallocate if necessary */ + if (num_servers != rlist[i].ird_num_servers) { + ilbst_srv_desc_t *srvlist; + + rlist[i].ird_num_servers = num_servers; + + if (rlist[i].ird_srvlist == NULL) + srvlist = calloc(num_servers, + sizeof (*srvlist)); + else + srvlist = realloc(rlist[i].ird_srvlist, + sizeof (*srvlist) * num_servers); + if (srvlist == NULL) { + rc = ILBADM_ENOMEM; + break; + } + rlist[i].ird_srvlist = srvlist; + } + rlist[i].ird_srv_ind = 0; + rlist[i].ird_crtime = kp->ks_crtime; + + i_copy_rstats(&rlist[i], kp); + i++; + + /* if we know we're done, return */ + if (sta->ilbst_rulename != NULL || i == rcount) { + rc = ILBADM_OK; + break; + } + } + + if (sta->ilbst_rulename != NULL && i == 0) + rc = ILBADM_ENORULE; + return (rc); +} + +static void +i_do_print(ilbst_arg_t *sta) +{ + int i; + + /* non-itemized display can go right ahead */ + if ((sta->ilbst_flags & ILBST_ITEMIZE) == 0) { + ofmt_print(sta->ilbst_oh, sta); + return; + } + + /* + * rulename is given, list a line per server + * here's how we do it: + * the _ITEMIZE flag indicates to the print function (called + * from ofmt_print()) to look at server [ird_srv_ind] only. + */ + if (sta->ilbst_rulename != NULL) { + sta->ilbst_rule_index = 0; + for (i = 0; i < sta->ilbst_rlist->ird_num_servers; i++) { + sta->ilbst_rlist->ird_srv_ind = i; + ofmt_print(sta->ilbst_oh, sta); + } + sta->ilbst_rlist->ird_srv_ind = 0; + return; + } + + /* list one line for every rule for a given server */ + for (i = 0; i < sta->ilbst_rcount; i++) { + /* + * if a rule doesn't contain a given server, there's no + * need to print it. Luckily, we can check that + * fairly easily + */ + if (sta->ilbst_rlist[i].ird_srvlist[0].isd_servername[0] == + '\0') + continue; + + sta->ilbst_rule_index = i; + sta->ilbst_rlist[i].ird_srv_ind = 0; + ofmt_print(sta->ilbst_oh, sta); + } + sta->ilbst_rule_index = 0; +} + +static ilbadm_status_t +i_do_show_stats(ilbst_arg_t *sta) +{ + kstat_ctl_t *kctl; + kid_t nkid; + int rcount = 1, i; + ilbadm_status_t rc = ILBADM_OK; + ilbst_rule_desc_t *rlist, *rp; + boolean_t pseudo_abs = B_FALSE; /* for first pass */ + + if ((kctl = kstat_open()) == NULL) { + ilbadm_err(gettext("kstat_open() failed: %s"), strerror(errno)); + return (ILBADM_LIBERR); + } + + + if (sta->ilbst_rulename == NULL) + rcount = i_get_num_kinstances(kctl); + + rlist = calloc(sizeof (*rlist), rcount); + if (rlist == NULL) { + rc = ILBADM_ENOMEM; + goto out; + } + + sta->ilbst_old_is_old = B_TRUE; + sta->ilbst_rlist = rlist; + sta->ilbst_rcount = sta->ilbst_rcount_prev = rcount; + sta->ilbst_rlist_sz = rcount; + + /* + * in the first pass, we always print absolute numbers. We + * need to remember whether we wanted abs. numbers for + * other samples as well + */ + if ((sta->ilbst_flags & ILBST_ABS_NUMBERS) == 0) { + sta->ilbst_flags |= ILBST_ABS_NUMBERS; + pseudo_abs = B_TRUE; + } + + i_init_rulelist(sta, rcount); + do { + rc = i_get_rule_descs(sta, kctl); + if (rc != ILBADM_OK) + goto out; + + rc = i_get_server_descs(sta, kctl); + if (rc != ILBADM_OK) + goto out; + + i_do_print(sta); + + if (sta->ilbst_count == -1 || --(sta->ilbst_count) > 0) + (void) sleep(sta->ilbst_interval); + else + break; + + nkid = kstat_chain_update(kctl); + sta->ilbst_flags &= ~ILBST_RULES_CHANGED; + /* + * we only need to continue with most of the rest of this if + * the kstat chain id has changed + */ + if (nkid == 0) + goto swap_old_new; + if (nkid == -1) { + ilbadm_err(gettext("kstat_chain_update() failed: %s"), + strerror(errno)); + rc = ILBADM_LIBERR; + break; + } + + /* + * find out whether the number of rules has changed. + * if so, adjust rcount and _o; if number has increased, + * expand array to hold all rules. + * we only shrink if rlist_sz is larger than both rcount and + * rcount_prev; + */ + if (sta->ilbst_rulename == NULL) + rcount = i_get_num_kinstances(kctl); + if (rcount != sta->ilbst_rcount) { + sta->ilbst_flags |= ILBST_RULES_CHANGED; + sta->ilbst_rcount_prev = sta->ilbst_rcount; + sta->ilbst_rcount = rcount; + + if (rcount > sta->ilbst_rcount_prev) { + rlist = realloc(sta->ilbst_rlist, + sizeof (*sta->ilbst_rlist) * rcount); + if (rlist == NULL) { + rc = ILBADM_ENOMEM; + break; + } + sta->ilbst_rlist = rlist; + /* realloc doesn't zero out memory */ + for (i = sta->ilbst_rcount_prev; + i < rcount; i++) { + rp = &sta->ilbst_rlist[i]; + bzero(rp, sizeof (*rp)); + i_set_rlstats_ptr(rp, + sta->ilbst_old_is_old); + } + /* + * even if rlist_sz was > rcount, it's now + * shrunk to rcount + */ + sta->ilbst_rlist_sz = sta->ilbst_rcount; + } + } + + /* + * we may need to shrink the allocated slots down to the + * actually required number - we need to make sure we + * don't delete old or new stats. + */ + if (sta->ilbst_rlist_sz > MAX(sta->ilbst_rcount, + sta->ilbst_rcount_prev)) { + sta->ilbst_rlist_sz = + MAX(sta->ilbst_rcount, sta->ilbst_rcount_prev); + rlist = realloc(sta->ilbst_rlist, + sizeof (*sta->ilbst_rlist) * sta->ilbst_rlist_sz); + if (rlist == NULL) { + rc = ILBADM_ENOMEM; + break; + } + sta->ilbst_rlist = rlist; + } + + /* + * move pointers around so what used to point to "old" + * stats now points to new, and vice versa + * if we're printing absolute numbers, this rigmarole is + * not necessary. + */ +swap_old_new: + if (pseudo_abs) + sta->ilbst_flags &= ~ILBST_ABS_NUMBERS; + + if ((sta->ilbst_flags & ILBST_ABS_NUMBERS) == 0) { + sta->ilbst_old_is_old = !sta->ilbst_old_is_old; + i_swap_rl_pointers(sta, rcount); + } + _NOTE(CONSTCOND) + } while (B_TRUE); + +out: + (void) kstat_close(kctl); + if ((rc != ILBADM_OK) && (rc != ILBADM_LIBERR)) + ilbadm_err(ilbadm_errstr(rc)); + + if (sta->ilbst_rlist != NULL) + free(sta->ilbst_rlist); + + return (rc); +} + +/* + * read ilb's kernel statistics and (periodically) display + * them. + */ +/* ARGSUSED */ +ilbadm_status_t +ilbadm_show_stats(int argc, char *argv[]) +{ + ilbadm_status_t rc; + int c; + ilbst_arg_t sta; + int oflags = 0; + char *fieldnames = stat_stdhdrs; + ofmt_field_t *fields = stat_stdfields; + boolean_t r_opt = B_FALSE, s_opt = B_FALSE, i_opt = B_FALSE; + boolean_t o_opt = B_FALSE, p_opt = B_FALSE, t_opt = B_FALSE; + boolean_t v_opt = B_FALSE, A_opt = B_FALSE, d_opt = B_FALSE; + ofmt_status_t oerr; + ofmt_handle_t oh = NULL; + + bzero(&sta, sizeof (sta)); + sta.ilbst_interval = 1; + sta.ilbst_count = 1; + + while ((c = getopt(argc, argv, ":tdAr:s:ivo:p")) != -1) { + switch ((char)c) { + case 't': sta.ilbst_flags |= ILBST_TIMESTAMP_HEADER; + t_opt = B_TRUE; + break; + case 'd': sta.ilbst_flags |= ILBST_DELTA_INTERVAL; + d_opt = B_TRUE; + break; + case 'A': sta.ilbst_flags |= ILBST_ABS_NUMBERS; + A_opt = B_TRUE; + break; + case 'r': sta.ilbst_rulename = optarg; + r_opt = B_TRUE; + break; + case 's': sta.ilbst_server = optarg; + s_opt = B_TRUE; + break; + case 'i': sta.ilbst_flags |= ILBST_ITEMIZE; + i_opt = B_TRUE; + break; + case 'o': fieldnames = optarg; + o_opt = B_TRUE; + break; + case 'p': oflags |= OFMT_PARSABLE; + p_opt = B_TRUE; + break; + case 'v': sta.ilbst_flags |= ILBST_VERBOSE; + v_opt = B_TRUE; + fieldnames = stat_stdv_hdrs; + break; + case ':': ilbadm_err(gettext("missing option-argument" + " detected for %c"), (char)optopt); + exit(1); + /* not reached */ + break; + case '?': /* fallthrough */ + default: + unknown_opt(argv, optind-1); + /* not reached */ + break; + } + } + + if (s_opt && r_opt) { + ilbadm_err(gettext("options -s and -r are mutually exclusive")); + exit(1); + } + + if (i_opt) { + if (!(s_opt || r_opt)) { + ilbadm_err(gettext("option -i requires" + " either -r or -s")); + exit(1); + } + if (v_opt) { + ilbadm_err(gettext("option -i and -v are mutually" + " exclusive")); + exit(1); + } + /* only use "std" headers if none are specified */ + if (!o_opt) + if (r_opt) + fieldnames = stat_itemize_rule_hdrs; + else /* must be s_opt */ + fieldnames = stat_itemize_server_hdrs; + fields = stat_itemize_fields; + } + + if (p_opt) { + if (!o_opt) { + ilbadm_err(gettext("option -p requires -o")); + exit(1); + } + if (v_opt) { + ilbadm_err(gettext("option -o and -v are mutually" + " exclusive")); + exit(1); + } + if (strcasecmp(fieldnames, "all") == 0) { + ilbadm_err(gettext("option -p requires" + " explicit field names")); + exit(1); + } + } + + if (t_opt) { + if (v_opt) { + fieldnames = "all"; + } else { + int len = strlen(fieldnames) + 6; + char *fnames; + + fnames = malloc(len); + if (fnames == NULL) { + rc = ILBADM_ENOMEM; + return (rc); + } + (void) snprintf(fnames, len, "%s,TIME", fieldnames); + fieldnames = fnames; + } + } + + if (A_opt && d_opt) { + ilbadm_err(gettext("options -d and -A are mutually exclusive")); + exit(1); + } + + /* find and parse interval and count arguments if present */ + if (optind < argc) { + sta.ilbst_interval = atoi(argv[optind]); + if (sta.ilbst_interval < 1) { + ilbadm_err(gettext("illegal interval spec %s"), + argv[optind]); + exit(1); + } + sta.ilbst_count = -1; + if (++optind < argc) { + sta.ilbst_count = atoi(argv[optind]); + if (sta.ilbst_count < 1) { + ilbadm_err(gettext("illegal count spec %s"), + argv[optind]); + exit(1); + } + } + } + + oerr = ofmt_open(fieldnames, fields, oflags, 80, &oh); + if (oerr != OFMT_SUCCESS) { + char e[80]; + + ilbadm_err(gettext("ofmt_open failed: %s"), + ofmt_strerror(oh, oerr, e, sizeof (e))); + return (ILBADM_LIBERR); + } + + sta.ilbst_oh = oh; + + rc = i_do_show_stats(&sta); + + ofmt_close(oh); + return (rc); +} diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_subr.c b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_subr.c new file mode 100644 index 0000000000..940f82bc12 --- /dev/null +++ b/usr/src/cmd/cmd-inet/usr.sbin/ilbadm/ilbadm_subr.c @@ -0,0 +1,1161 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <strings.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <netdb.h> +#include <errno.h> +#include <ctype.h> +#include <assert.h> +#include <limits.h> +#include <libilb.h> +#include <libilb_impl.h> +#include "ilbadm.h" + +#define PORT_SEP ':' + +typedef enum { + numeric = 1, + non_numeric +} addr_type_t; + +ilbadm_val_type_t algo_types[] = { + {(int)ILB_ALG_ROUNDROBIN, "roundrobin", "rr"}, + {(int)ILB_ALG_HASH_IP, "hash-ip", "hip"}, + {(int)ILB_ALG_HASH_IP_SPORT, "hash-ip-port", "hipp"}, + {(int)ILB_ALG_HASH_IP_VIP, "hash-ip-vip", "hipv"}, + {ILBD_BAD_VAL, NULL, NULL} +}; + +ilbadm_val_type_t topo_types[] = { + {(int)ILB_TOPO_DSR, "DSR", "d"}, + {(int)ILB_TOPO_NAT, "NAT", "n"}, + {(int)ILB_TOPO_HALF_NAT, "HALF-NAT", "h"}, + {ILBD_BAD_VAL, NULL, NULL} +}; + +void +ip2str(ilb_ip_addr_t *ip, char *buf, size_t sz, int flags) +{ + int len; + + switch (ip->ia_af) { + case AF_INET: + if (*(uint32_t *)&ip->ia_v4 == 0) + buf[0] = '\0'; + else + (void) inet_ntop(AF_INET, (void *)&ip->ia_v4, buf, sz); + break; + case AF_INET6: + if (IN6_IS_ADDR_UNSPECIFIED(&ip->ia_v6)) { + buf[0] = '\0'; + break; + } + if (!(flags & V6_ADDRONLY)) + *buf++ = '['; + sz--; + (void) inet_ntop(ip->ia_af, (void *)&ip->ia_v6, buf, sz); + if (!(flags & V6_ADDRONLY)) { + len = strlen(buf); + buf[len] = ']'; + buf[++len] = '\0'; + } + break; + default: buf[0] = '\0'; + } +} + +char * +i_str_from_val(int val, ilbadm_val_type_t *types) +{ + ilbadm_val_type_t *v; + + for (v = types; v->v_type != ILBD_BAD_VAL; v++) { + if (v->v_type == val) + break; + } + /* we return this in all cases */ + return (v->v_name); +} + +int +i_val_from_str(char *name, ilbadm_val_type_t *types) +{ + ilbadm_val_type_t *v; + + for (v = types; v->v_type != ILBD_BAD_VAL; v++) { + if (strncasecmp(name, v->v_name, sizeof (v->v_name)) == 0 || + strncasecmp(name, v->v_alias, sizeof (v->v_alias)) == 0) + break; + } + /* we return this in all cases */ + return (v->v_type); +} + +ilbadm_key_code_t +i_match_key(char *key, ilbadm_key_name_t *keylist) +{ + ilbadm_key_name_t *t_key; + + for (t_key = keylist; t_key->k_key != ILB_KEY_BAD; t_key++) { + if (strncasecmp(key, t_key->k_name, + sizeof (t_key->k_name)) == 0 || + strncasecmp(key, t_key->k_alias, + sizeof (t_key->k_alias)) == 0) + break; + } + return (t_key->k_key); +} + +/* + * try to match: + * 1) IPv4 address + * 2) IPv6 address + * 3) a hostname + */ +static ilbadm_status_t +i_match_onehost(const char *val, ilb_ip_addr_t *ip, addr_type_t *a_type) +{ + struct addrinfo *ai = NULL; + struct addrinfo hints; + addr_type_t at = numeric; + + (void) memset((void *)&hints, 0, sizeof (hints)); + hints.ai_flags |= AI_NUMERICHOST; + + /* + * if *a_type == numeric, we only want to check whether this + * is a (valid) numeric IP address. If we do and it is NOT, + * we return _ENOENT. + */ + if (getaddrinfo(val, NULL, &hints, &ai) != 0) { + if (a_type != NULL && (*a_type == numeric)) + return (ILBADM_INVAL_ADDR); + + at = non_numeric; + if (getaddrinfo(val, NULL, NULL, &ai) != 0) + return (ILBADM_INVAL_ADDR); + } + + ip->ia_af = ai->ai_family; + switch (ip->ia_af) { + case AF_INET: { + struct sockaddr_in sa; + + assert(ai->ai_addrlen == sizeof (sa)); + (void) memcpy(&sa, ai->ai_addr, sizeof (sa)); + ip->ia_v4 = sa.sin_addr; + break; + } + case AF_INET6: { + struct sockaddr_in6 sa; + + assert(ai->ai_addrlen == sizeof (sa)); + (void) memcpy(&sa, ai->ai_addr, sizeof (sa)); + ip->ia_v6 = sa.sin6_addr; + break; + } + default: + return (ILBADM_INVAL_AF); + break; + } + + if (a_type != NULL) + *a_type = at; + return (ILBADM_OK); +} + +static ilbadm_status_t +i_store_serverID(void *store, char *val) +{ + ilbadm_servnode_t *s = (ilbadm_servnode_t *)store; + ilb_server_data_t *sn = &s->s_spec; + + /* + * we shouldn't need to check for length here, as a name that's + * too long won't exist in the system anyway. + */ + (void) strlcpy(sn->sd_srvID, val, sizeof (sn->sd_srvID)); + return (ILBADM_OK); +} + +static struct in_addr +i_next_in_addr(struct in_addr *a, int dir) +{ + struct in_addr new_in; + uint32_t iah; + + iah = ntohl(a->s_addr); + if (dir == 1) + iah++; + else + iah--; + new_in.s_addr = htonl(iah); + return (new_in); +} + +static ilbadm_status_t +i_expand_ipv4range(ilbadm_sgroup_t *sg, ilb_server_data_t *srv, + ilb_ip_addr_t *ip1, ilb_ip_addr_t *ip2) +{ + struct in_addr *a1; + ilbadm_servnode_t *sn_new; + ilb_ip_addr_t new_ip; + + a1 = &ip1->ia_v4; + + new_ip.ia_af = AF_INET; + new_ip.ia_v4 = i_next_in_addr(a1, 1); + while (ilb_cmp_ipaddr(&new_ip, ip2, NULL) < 1) { + sn_new = i_new_sg_elem(sg); + sn_new->s_spec.sd_addr = new_ip; + sn_new->s_spec.sd_minport = srv->sd_minport; + sn_new->s_spec.sd_maxport = srv->sd_maxport; + new_ip.ia_v4 = i_next_in_addr(&new_ip.ia_v4, 1); + } + return (ILBADM_OK); +} + +static struct in6_addr +i_next_in6_addr(struct in6_addr *a, int dir) +{ + struct in6_addr ia6; + uint64_t al, ah; + + ah = INV6_N2H_MSB64(a); + al = INV6_N2H_LSB64(a); + + if (dir == 1) { + /* overflow */ + if (++al == 0) + ah++; + } else { + /* underflow */ + if (--al == 0xffffffff) + ah--; + } + + INV6_H2N_MSB64(&ia6, ah); + INV6_H2N_LSB64(&ia6, al); + return (ia6); +} + + +static ilbadm_status_t +i_expand_ipv6range(ilbadm_sgroup_t *sg, ilb_server_data_t *srv, + ilb_ip_addr_t *ip1, ilb_ip_addr_t *ip2) +{ + struct in6_addr *a1; + ilbadm_servnode_t *sn_new; + ilb_ip_addr_t new_ip; + + a1 = &ip1->ia_v6; + + new_ip.ia_af = AF_INET6; + new_ip.ia_v6 = i_next_in6_addr(a1, 1); + while (ilb_cmp_ipaddr(&new_ip, ip2, NULL) < 1) { + sn_new = i_new_sg_elem(sg); + sn_new->s_spec.sd_addr = new_ip; + sn_new->s_spec.sd_minport = srv->sd_minport; + sn_new->s_spec.sd_maxport = srv->sd_maxport; + new_ip.ia_v6 = i_next_in6_addr(&new_ip.ia_v6, 1); + } + return (ILBADM_OK); +} + + +/* + * we create a list node in the servergroup for every ip address + * in the range [ip1, ip2], where we interpret the ip addresses as + * numbers + * the first ip address is already stored in "sn" + */ +static ilbadm_status_t +i_expand_iprange(ilbadm_sgroup_t *sg, ilb_server_data_t *sr, + ilb_ip_addr_t *ip1, ilb_ip_addr_t *ip2) +{ + int cmp; + int64_t delta; + + if (ip2->ia_af == 0) + return (ILBADM_OK); + + if (ip1->ia_af != ip2->ia_af) { + ilbadm_err(gettext("IP address mismatch")); + return (ILBADM_LIBERR); + } + + /* if ip addresses are the same, we're done */ + if ((cmp = ilb_cmp_ipaddr(ip1, ip2, &delta)) == 0) + return (ILBADM_OK); + if (cmp == 1) { + ilbadm_err(gettext("starting IP address is must be less" + " than ending ip address in ip range specification")); + return (ILBADM_LIBERR); + } + + /* if the implicit number of IPs is too large, stop */ + if (abs((int)delta) > MAX_IP_SPREAD) + return (ILBADM_TOOMANYIPADDR); + + switch (ip1->ia_af) { + case AF_INET: return (i_expand_ipv4range(sg, sr, ip1, ip2)); + /* not reached */ + break; + case AF_INET6: return (i_expand_ipv6range(sg, sr, ip1, ip2)); + /* not reached */ + break; + } + return (ILBADM_INVAL_AF); +} + +/* + * parse a port spec (number or by service name) and + * return the numeric port in *host* byte order + * + * Upon return, *flags contains ILB_FLAGS_SRV_PORTNAME if a service name matches + */ +static int +i_parseport(char *port, char *proto, int *flags) +{ + struct servent *se; + + /* assumption: port names start with a non-digit */ + if (isdigit(port[0])) { + if (flags != NULL) + *flags &= ~ILB_FLAGS_SRV_PORTNAME; + return ((int)strtol(port, NULL, 10)); + } + + se = getservbyname(port, proto); + if (se == NULL) + return (-1); + + if (flags != NULL) + *flags |= ILB_FLAGS_SRV_PORTNAME; + + /* + * we need to convert to host byte order to be in sync with + * numerical ports. since result needs to be compared, this + * is preferred to returning NW byte order + */ + return ((int)(ntohs(se->s_port))); +} + +/* + * matches one hostname or IP address and stores it in "store". + * space must have been pre-allocated to accept data + * "sg" != NULL only for cases where ip ranges may be coming in. + */ +static ilbadm_status_t +i_match_hostorip(void *store, ilbadm_sgroup_t *sg, char *val, + int flags, ilbadm_key_code_t keyword) +{ + boolean_t is_ip_range_ok = flags & OPT_IP_RANGE; + boolean_t is_addr_numeric = flags & OPT_NUMERIC_ONLY; + boolean_t is_ports_ok = flags & OPT_PORTS; + boolean_t ports_only = flags & OPT_PORTS_ONLY; + boolean_t is_nat_src = flags & OPT_NAT; + char *port_pref, *dash; + char *port1p, *port2p, *host2p, *host1p; + char *close1, *close2; + ilb_ip_addr_t ip2store; + ilb_ip_addr_t *ip1, *ip2; + int p1, p2; + ilb_server_data_t *s = NULL; + ilbadm_status_t rc = ILBADM_OK; + int af = AF_INET; + addr_type_t at = 0; + int p_flg; + struct in6_addr v6nameaddr; + + port1p = port2p = host2p = host1p = NULL; + port_pref = dash = NULL; + close1 = close2 = NULL; + errno = 0; + + if (is_nat_src) { + ilb_rule_data_t *rd = (ilb_rule_data_t *)store; + + ip1 = &rd->r_nat_src_start; + ip2 = &rd->r_nat_src_end; + } else { + ilbadm_servnode_t *sn = (ilbadm_servnode_t *)store; + + s = &sn->s_spec; + ip1 = &s->sd_addr; + ip2 = &ip2store; + bzero(ip2, sizeof (*ip2)); + } + + if (ports_only) { + is_ports_ok = B_TRUE; + port_pref = val - 1; /* we increment again later on */ + goto ports; + } + + /* + * we parse the syntax ip[-ip][:port[-port]] + * since IPv6 addresses contain ':'s as well, they need to be + * enclosed in "[]" to be distinct from a potential port spec. + * therefore, we need to first check whether we're dealing with + * IPv6 addresses before we can go search for the port seperator + * and ipv6 range could look like this: [ff::0]-[ff::255]:80 + */ + if ((keyword == ILB_KEY_SERVER) && (strchr(val, ':') != NULL) && + (*val != '[') && ((inet_pton(AF_INET6, val, &v6nameaddr)) != 0)) { + /* + * V6 addresses must be enclosed within + * brackets when specifying server addresses + */ + rc = ILBADM_INVAL_SYNTAX; + goto err_out; + } + + if (*val == '[') { + af = AF_INET6; + + val++; + host1p = val; + + close1 = strchr(val, (int)']'); + if (close1 == NULL) { + rc = ILBADM_INVAL_SYNTAX; + goto err_out; + } + *close1 = '\0'; + at = 0; + rc = i_match_onehost(host1p, ip1, &at); + if (rc != ILBADM_OK) + goto err_out; + if (at != numeric) { + rc = ILBADM_INVAL_ADDR; + goto err_out; + } + if (ip1->ia_af != af) { + rc = ILBADM_INVAL_AF; + goto err_out; + } + val = close1 + 1; + + if (*val == PORT_SEP) { + port_pref = val; + goto ports; + } + if (*val == '-') { + dash = val; + if (!is_ip_range_ok) { + ilbadm_err(gettext("port ranges not allowed")); + rc = ILBADM_LIBERR; + goto err_out; + } + val++; + if (*val != '[') { + rc = ILBADM_INVAL_SYNTAX; + goto err_out; + } + val++; + close2 = strchr(val, (int)']'); + if (close2 == NULL) { + rc = ILBADM_INVAL_SYNTAX; + goto err_out; + } + *close2 = '\0'; + host2p = val; + at = 0; + rc = i_match_onehost(host2p, ip2, &at); + if (rc != ILBADM_OK) + goto err_out; + if (at != numeric) { + rc = ILBADM_INVAL_ADDR; + goto err_out; + } + if (ip2->ia_af != af) { + rc = ILBADM_INVAL_AF; + goto err_out; + } + val = close2+1; + } + } + + /* ports always potentially allow ranges - XXXms: check? */ + port_pref = strchr(val, (int)PORT_SEP); +ports: + if (port_pref != NULL && is_ports_ok) { + port1p = port_pref + 1; + *port_pref = '\0'; + + dash = strchr(port1p, (int)'-'); + if (dash != NULL) { + port2p = dash + 1; + *dash = '\0'; + } + if (port1p != NULL) { + p1 = i_parseport(port1p, NULL, &p_flg); + if (p1 == -1 || p1 == 0 || p1 > ILB_MAX_PORT) { + ilbadm_err(gettext("invalid port value %s" + " specified"), port1p); + rc = ILBADM_LIBERR; + goto err_out; + } + s->sd_minport = htons((in_port_t)p1); + if (p_flg & ILB_FLAGS_SRV_PORTNAME) + s->sd_flags |= ILB_FLAGS_SRV_PORTNAME; + } + if (port2p != NULL) { + /* ranges are only allowed for numeric ports */ + if (p_flg & ILB_FLAGS_SRV_PORTNAME) { + ilbadm_err(gettext("ranges are only allowed" + " for numeric ports")); + rc = ILBADM_LIBERR; + goto err_out; + } + p2 = i_parseport(port2p, NULL, &p_flg); + if (p2 == -1 || p2 <= p1 || p2 > ILB_MAX_PORT || + (p_flg & ILB_FLAGS_SRV_PORTNAME) == + ILB_FLAGS_SRV_PORTNAME) { + ilbadm_err(gettext("invalid port value %s" + " specified"), port2p); + rc = ILBADM_LIBERR; + goto err_out; + } + s->sd_maxport = htons((in_port_t)p2); + } + /* + * we fill the '-' back in, but not the port seperator, + * as the \0 in its place terminates the ip address(es) + */ + if (dash != NULL) + *dash = '-'; + if (ports_only) + goto out; + } + + if (af == AF_INET6) + goto out; + + /* + * we need to handle these situations for hosts: + * a. ip address + * b. ip address range (ip1-ip2) + * c. a hostname (may include '-' or start with a digit) + * + * We want to do hostname lookup only if we're quite sure that + * we actually are looking at neither a single IP address nor a + * range of same, as this can hang if name service is not set up + * (sth. likely in a LB environment). + * + * here's how we proceed: + * 1. try to match numeric only. If that succeeds, we're done. + * (getaddrinfo, which we call in i_match_onehost(), fails if + * it encounters a '-') + * 2. search for a '-'; if we find one, try numeric match for + * both sides. if this fails: + * 3. re-insert '-' and try for a legal hostname. + */ + /* 1. */ + at = numeric; + rc = i_match_onehost(val, ip1, &at); + if (rc == ILBADM_OK) + goto out; + + /* 2. */ + dash = strchr(val, (int)'-'); + if (dash != NULL && is_ip_range_ok) { + host2p = dash + 1; + *dash = '\0'; + at = numeric; + rc = i_match_onehost(host2p, ip2, &at); + if (rc != ILBADM_OK || at != numeric) { + *dash = '-'; + dash = NULL; + bzero(ip2, sizeof (*ip2)); + goto hostname; + } + /* + * if the RHS of '-' is an IP but LHS is not, we might + * have a hostname of form x-y where y is just a number + * (this seems a valid IPv4 address), so we need to + * try a complete hostname + */ + rc = i_match_onehost(val, ip1, &at); + if (rc != ILBADM_OK || at != numeric) { + *dash = '-'; + dash = NULL; + goto hostname; + } + goto out; + } +hostname: + /* 3. */ + + if (is_addr_numeric) + at = numeric; + else + at = 0; + rc = i_match_onehost(val, ip1, &at); + if (rc != ILBADM_OK) { + goto out; + } + if (s != NULL) { + s->sd_flags |= ILB_FLAGS_SRV_HOSTNAME; + /* XXX: todo: save hostname for re-display for admin */ + } + +out: + if (dash != NULL && !is_nat_src) { + rc = i_expand_iprange(sg, s, ip1, ip2); + if (rc != ILBADM_OK) + goto err_out; + } + + if (is_nat_src && host2p == NULL) + *ip2 = *ip1; + +err_out: + /* + * we re-insert what we overwrote, especially in the error case + */ + if (close2 != NULL) + *close2 = ']'; + if (close1 != NULL) + *close1 = '['; + if (dash != NULL) + *dash = '-'; + if (port_pref != NULL && !ports_only) + *port_pref = PORT_SEP; + + return (rc); +} + +/* + * type-agnostic helper function to return a pointer to a + * pristine (and maybe freshly allocated) piece of storage + * ready for something fitting "key" + */ +static void * +i_new_storep(void *store, ilbadm_key_code_t key) +{ + void *res; + + switch (key) { + case ILB_KEY_SERVER: + case ILB_KEY_SERVRANGE: + case ILB_KEY_SERVERID: + res = (void *) i_new_sg_elem(store); + break; + default: res = NULL; + break; + } + + return (res); +} + +/* + * make sure everything that needs to be there is there + */ +ilbadm_status_t +i_check_rule_spec(ilb_rule_data_t *rd) +{ + int32_t vip_af = rd->r_vip.ia_af; + ilb_ip_addr_t *prxy_src; + + if (vip_af != AF_INET && vip_af != AF_INET6) + return (ILBADM_INVAL_AF); + + if (*rd->r_sgname == '\0') + return (ILBADM_ENOSGNAME); + + if (rd->r_algo == 0 || rd->r_topo == 0) { + ilbadm_err(gettext("lbalg or type is unspecified")); + return (ILBADM_LIBERR); + } + + if (rd->r_topo == ILB_TOPO_NAT) { + prxy_src = &rd->r_nat_src_start; + if (prxy_src->ia_af != vip_af) { + ilbadm_err(gettext("proxy-src is either missing" + " or its address family does not" + " match that of the VIP address")); + return (ILBADM_LIBERR); + } + } + /* extend as necessary */ + + return (ILBADM_OK); +} + +/* + * in parameter "sz" describes size (in bytes) of mask + */ +static int +mask_to_prefixlen(const uchar_t *mask, const int sz) +{ + uchar_t c; + int i, j; + int len = 0; + int tmask; + + /* + * for every byte in the mask, we start with most significant + * bit and work our way down to the least significant bit; as + * long as we find the bit set, we add 1 to the length. the + * first unset bit we encounter terminates this process + */ + for (i = 0; i < sz; i++) { + c = mask[i]; + tmask = 1 << 7; + for (j = 7; j >= 0; j--) { + if ((c & tmask) == 0) + return (len); + len++; + tmask >>= 1; + } + } + return (len); +} + +int +ilbadm_mask_to_prefixlen(ilb_ip_addr_t *ip) +{ + int af = ip->ia_af; + int len = 0; + + assert(af == AF_INET || af == AF_INET6); + switch (af) { + case AF_INET: + len = mask_to_prefixlen((uchar_t *)&ip->ia_v4.s_addr, + sizeof (ip->ia_v4)); + break; + case AF_INET6: + len = mask_to_prefixlen((uchar_t *)&ip->ia_v6.s6_addr, + sizeof (ip->ia_v6)); + break; + } + return (len); +} + +/* copied from ifconfig.c, changed to return symbolic constants */ +/* + * Convert a prefix length to a mask. + * Returns 1 if ok. 0 otherwise. + * Assumes the mask array is zero'ed by the caller. + */ +static boolean_t +in_prefixlentomask(int prefixlen, int maxlen, uchar_t *mask) +{ + if (prefixlen < 0 || prefixlen > maxlen) + return (B_FALSE); + + while (prefixlen > 0) { + if (prefixlen >= 8) { + *mask++ = 0xFF; + prefixlen -= 8; + continue; + } + *mask |= 1 << (8 - prefixlen); + prefixlen--; + } + return (B_TRUE); +} + +ilbadm_status_t +ilbadm_set_netmask(char *val, ilb_ip_addr_t *ip, int af) +{ + int prefixlen, maxval; + boolean_t r; + char *end; + + assert(af == AF_INET || af == AF_INET6); + + maxval = (af == AF_INET) ? 32 : 128; + + if (*val == '/') + val++; + prefixlen = strtol(val, &end, 10); + if ((val == end) || (*end != '\0')) { + ilbadm_err(gettext("invalid pmask provided")); + return (ILBADM_LIBERR); + } + + if (prefixlen < 1 || prefixlen > maxval) { + ilbadm_err(gettext("invalid pmask provided (AF mismatch?)")); + return (ILBADM_LIBERR); + } + + switch (af) { + case AF_INET: + r = in_prefixlentomask(prefixlen, maxval, + (uchar_t *)&ip->ia_v4.s_addr); + break; + case AF_INET6: + r = in_prefixlentomask(prefixlen, maxval, + (uchar_t *)&ip->ia_v6.s6_addr); + break; + } + if (r != B_TRUE) { + ilbadm_err(gettext("cannot convert %s to a netmask"), val); + return (ILBADM_LIBERR); + } + ip->ia_af = af; + return (ILBADM_OK); +} + +static ilbadm_status_t +i_store_val(char *val, void *store, ilbadm_key_code_t keyword) +{ + ilbadm_status_t rc = ILBADM_OK; + void *storep = store; + ilb_rule_data_t *rd = NULL; + ilbadm_sgroup_t *sg = NULL; + ilb_hc_info_t *hc_info = NULL; + struct protoent *pe; + int64_t tmp_val; + + if (*val == '\0') + return (ILBADM_NOKEYWORD_VAL); + + /* some types need new storage, others don't */ + switch (keyword) { + case ILB_KEY_SERVER: + case ILB_KEY_SERVERID: + sg = (ilbadm_sgroup_t *)store; + storep = i_new_storep(store, keyword); + break; + case ILB_KEY_HEALTHCHECK: + case ILB_KEY_SERVERGROUP: + rd = (ilb_rule_data_t *)store; + break; + case ILB_KEY_VIP: /* fallthrough */ + case ILB_KEY_PORT: /* fallthrough */ + case ILB_KEY_HCPORT: /* fallthrough */ + case ILB_KEY_CONNDRAIN: /* fallthrough */ + case ILB_KEY_NAT_TO: /* fallthrough */ + case ILB_KEY_STICKY_TO: /* fallthrough */ + case ILB_KEY_PROTOCOL: /* fallthrough */ + case ILB_KEY_ALGORITHM: /* fallthrough */ + case ILB_KEY_STICKY: /* fallthrough */ + case ILB_KEY_TYPE: /* fallthrough */ + case ILB_KEY_SRC: /* fallthrough */ + rd = (ilb_rule_data_t *)store; + break; + case ILB_KEY_HC_TEST: + case ILB_KEY_HC_COUNT: + case ILB_KEY_HC_INTERVAL: + case ILB_KEY_HC_TIMEOUT: + hc_info = (ilb_hc_info_t *)store; + default: /* do nothing */ + ; + } + + switch (keyword) { + case ILB_KEY_SRC: + /* + * the proxy-src keyword is only valid for full NAT topology + * the value is either a single or a range of IP addresses. + */ + if (rd->r_topo != ILB_TOPO_NAT) { + rc = ILBADM_INVAL_PROXY; + break; + } + rc = i_match_hostorip(storep, sg, val, OPT_NUMERIC_ONLY | + OPT_IP_RANGE | OPT_NAT, ILB_KEY_SRC); + break; + case ILB_KEY_SERVER: + rc = i_match_hostorip(storep, sg, val, + OPT_IP_RANGE | OPT_PORTS, ILB_KEY_SERVER); + break; + case ILB_KEY_SERVERID: + if (val[0] != ILB_SRVID_PREFIX) + rc = ILBADM_INVAL_SRVID; + else + rc = i_store_serverID(storep, val); + break; + case ILB_KEY_VIP: { + ilb_ip_addr_t *vip = &rd->r_vip; + addr_type_t at = numeric; + char *close = NULL; + + /* + * we duplicate some functionality of i_match_hostorip + * here; that function is geared to mandate '[]' for IPv6 + * addresses, which we want to relax here, so as not to + * make i_match_hostorip even longer, we do what we need + * here. + */ + if (*val == '[') { + val++; + if ((close = strchr(val, (int)']')) == NULL) { + rc = ILBADM_INVAL_SYNTAX; + break; + } + *close = NULL; + } + rc = i_match_onehost(val, vip, &at); + /* re-assemble string as we found it */ + if (close != NULL) { + *close = ']'; + if (rc == ILBADM_OK && vip->ia_af != AF_INET6) { + ilbadm_err(gettext("use of '[]' only valid" + " with IPv6 addresses")); + rc = ILBADM_LIBERR; + } + } + break; + } + case ILB_KEY_CONNDRAIN: + tmp_val = strtoll(val, NULL, 10); + if (tmp_val <= 0 || tmp_val > UINT_MAX) { + rc = ILBADM_EINVAL; + break; + } + rd->r_conndrain = tmp_val; + break; + case ILB_KEY_NAT_TO: + tmp_val = strtoll(val, NULL, 10); + if (tmp_val < 0 || tmp_val > UINT_MAX) { + rc = ILBADM_EINVAL; + break; + } + rd->r_nat_timeout = tmp_val; + break; + case ILB_KEY_STICKY_TO: + tmp_val = strtoll(val, NULL, 10); + if (tmp_val <= 0 || tmp_val > UINT_MAX) { + rc = ILBADM_EINVAL; + break; + } + rd->r_sticky_timeout = tmp_val; + break; + case ILB_KEY_PORT: + if (isdigit(*val)) { + ilbadm_servnode_t sn; + + bzero(&sn, sizeof (sn)); + rc = i_match_hostorip((void *)&sn, sg, val, + OPT_PORTS_ONLY, ILB_KEY_PORT); + if (rc != ILBADM_OK) + break; + rd->r_minport = sn.s_spec.sd_minport; + rd->r_maxport = sn.s_spec.sd_maxport; + } else { + struct servent *se; + + se = getservbyname(val, NULL); + if (se == NULL) { + rc = ILBADM_ENOSERVICE; + break; + } + rd->r_minport = se->s_port; + rd->r_maxport = 0; + } + break; + case ILB_KEY_HCPORT: + if (isdigit(*val)) { + int hcport = atoi(val); + + if (hcport < 1 || hcport > 65535) { + ilbadm_err(gettext("illegal number for" + " hcport %s"), val); + rc = ILBADM_LIBERR; + break; + } + rd->r_hcport = htons(hcport); + rd->r_hcpflag = ILB_HCI_PROBE_FIX; + } else if (strcasecmp(val, "ANY") == 0) { + rd->r_hcport = 0; + rd->r_hcpflag = ILB_HCI_PROBE_ANY; + } else { + return (ILBADM_EINVAL); + } + break; + case ILB_KEY_PROTOCOL: + pe = getprotobyname(val); + if (pe == NULL) + rc = ILBADM_ENOPROTO; + else + rd->r_proto = pe->p_proto; + break; + case ILB_KEY_ALGORITHM: + rd->r_algo = i_val_from_str(val, &algo_types[0]); + if (rd->r_algo == ILBD_BAD_VAL) + rc = ILBADM_INVAL_ALG; + break; + case ILB_KEY_STICKY: + rd->r_flags |= ILB_FLAGS_RULE_STICKY; + /* + * CAVEAT: the use of r_vip.ia_af implies that the VIP + * *must* be specified on the commandline *before* + * the sticky mask. + */ + if (AF_UNSPEC == rd->r_vip.ia_af) { + ilbadm_err(gettext("option '%s' requires that VIP be " + "specified first"), ilbadm_key_to_opt(keyword)); + rc = ILBADM_LIBERR; + break; + } + rc = ilbadm_set_netmask(val, &rd->r_stickymask, + rd->r_vip.ia_af); + break; + case ILB_KEY_TYPE: + rd->r_topo = i_val_from_str(val, &topo_types[0]); + if (rd->r_topo == ILBD_BAD_VAL) + rc = ILBADM_INVAL_OPER; + break; + case ILB_KEY_SERVERGROUP: + (void) strlcpy(rd->r_sgname, (char *)val, + sizeof (rd->r_sgname)); + break; + case ILB_KEY_HEALTHCHECK: + (void) strlcpy(rd->r_hcname, (char *)val, + sizeof (rd->r_hcname)); + break; + case ILB_KEY_HC_TEST: + (void) strlcpy(hc_info->hci_test, (char *)val, + sizeof (hc_info->hci_test)); + break; + case ILB_KEY_HC_COUNT: + if (isdigit(*val)) + hc_info->hci_count = atoi(val); + else + return (ILBADM_EINVAL); + break; + case ILB_KEY_HC_INTERVAL: + if (isdigit(*val)) + hc_info->hci_interval = atoi(val); + else + return (ILBADM_EINVAL); + break; + case ILB_KEY_HC_TIMEOUT: + if (isdigit(*val)) + hc_info->hci_timeout = atoi(val); + else + return (ILBADM_EINVAL); + break; + default: rc = ILBADM_INVAL_KEYWORD; + break; + } + + return (rc); +} + +/* + * generic parsing function. + * parses "key=value[,value]" strings in "arg". keylist determines the + * list of valid keys in the LHS. keycode determines interpretation and + * storage in store + * XXXms: looks like "key=value[,value]" violates spec. needs a fix + */ +ilbadm_status_t +i_parse_optstring(char *arg, void *store, ilbadm_key_name_t *keylist, + int flags, int *count) +{ + ilbadm_status_t rc = ILBADM_OK; + char *comma = NULL, *equals = NULL; + char *key, *nextkey, *val; + ilbadm_key_code_t keyword; + boolean_t is_value_list = flags & OPT_VALUE_LIST; + boolean_t assign_seen = B_FALSE; + int n; + + key = arg; + n = 1; + /* + * Algorithm: + * 1. find any commas indicating and seperating current value + * from a following value + * 2. if we're expecting a list of values (seperated by commas) + * and have already seen the assignment, then + * get the next "value" + * 3. else (we're looking at the first element of the RHS) + * 4. find the '=' + * 5. match the keyword to the list we were passed in + * 6. store the value. + */ + while (key != NULL && *key != '\0') { + comma = equals = NULL; + + /* 2 */ + nextkey = strchr(key, (int)','); + if (nextkey != NULL) { + comma = nextkey++; + *comma = '\0'; + } + + /* 3a */ + if (is_value_list && assign_seen) { + val = key; + /* 3b */ + } else { + /* 4 */ + equals = strchr(key, (int)'='); + if (equals == NULL) { + ilbadm_err("%s: %s", key, + ilbadm_errstr(ILBADM_ASSIGNREQ)); + rc = ILBADM_LIBERR; + goto out; + } + val = equals + 1; + *equals = '\0'; + assign_seen = B_TRUE; + + /* 5 */ + keyword = i_match_key(key, keylist); + if (keyword == ILB_KEY_BAD) { + ilbadm_err(gettext("bad keyword %s"), key); + rc = ILBADM_LIBERR; + goto out; + } + } + + /* 6 */ + rc = i_store_val(val, store, keyword); + if (rc != ILBADM_OK) { + ilbadm_err("%s: %s", key, ilbadm_errstr(rc)); + /* Change to ILBADM_ILBERR to avoid more err msgs. */ + rc = ILBADM_LIBERR; + goto out; + } + + key = nextkey; + n++; + } + +out: + if (comma != NULL) + *comma = ','; + if (equals != NULL) + *equals = '='; + if (count != NULL) + *count = n; + return (rc); +} diff --git a/usr/src/cmd/mdb/common/modules/ip/ip.c b/usr/src/cmd/mdb/common/modules/ip/ip.c index 9b901119ad..28f21efe1f 100644 --- a/usr/src/cmd/mdb/common/modules/ip/ip.c +++ b/usr/src/cmd/mdb/common/modules/ip/ip.c @@ -46,6 +46,11 @@ #include <sys/modhash_impl.h> #include <inet/ip_ndp.h> #include <inet/ip_if.h> +#include <ilb.h> +#include <ilb/ilb_impl.h> +#include <ilb/ilb_stack.h> +#include <ilb/ilb_nat.h> +#include <ilb/ilb_conn.h> #include <sys/dlpi.h> #include <mdb/mdb_modapi.h> @@ -223,6 +228,19 @@ static void conn_status_help(void); static int srcid_status(uintptr_t, uint_t, int, const mdb_arg_t *); +static int ilb_stacks_walk_step(mdb_walk_state_t *); +static int ilb_rules_walk_init(mdb_walk_state_t *); +static int ilb_rules_walk_step(mdb_walk_state_t *); +static int ilb_servers_walk_init(mdb_walk_state_t *); +static int ilb_servers_walk_step(mdb_walk_state_t *); +static int ilb_nat_src_walk_init(mdb_walk_state_t *); +static int ilb_nat_src_walk_step(mdb_walk_state_t *); +static int ilb_conn_walk_init(mdb_walk_state_t *); +static int ilb_conn_walk_step(mdb_walk_state_t *); +static int ilb_sticky_walk_init(mdb_walk_state_t *); +static int ilb_sticky_walk_step(mdb_walk_state_t *); +static void ilb_common_walk_fini(mdb_walk_state_t *); + /* * Given the kernel address of an ip_stack_t, return the stackid */ @@ -1412,6 +1430,20 @@ static const mdb_walker_t walkers[] = { "ips_ipcl_proto_fanout_v6", ipcl_hash_walk_init, ipcl_hash_walk_step, ipcl_hash_walk_fini, &proto_v6_hash_arg}, + { "ilb_stacks", "walk ilb_stack_t", + ip_stacks_walk_init, ilb_stacks_walk_step, NULL }, + { "ilb_rules", "walk ilb rules in a given ilb_stack_t", + ilb_rules_walk_init, ilb_rules_walk_step, NULL }, + { "ilb_servers", "walk server in a given ilb_rule_t", + ilb_servers_walk_init, ilb_servers_walk_step, NULL }, + { "ilb_nat_src", "walk NAT source table of a given ilb_stack_t", + ilb_nat_src_walk_init, ilb_nat_src_walk_step, + ilb_common_walk_fini }, + { "ilb_conns", "walk NAT table of a given ilb_stack_t", + ilb_conn_walk_init, ilb_conn_walk_step, ilb_common_walk_fini }, + { "ilb_stickys", "walk sticky table of a given ilb_stack_t", + ilb_sticky_walk_init, ilb_sticky_walk_step, + ilb_common_walk_fini }, { NULL } }; @@ -2543,3 +2575,474 @@ srcid_status(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) } return (DCMD_OK); } + +static int +ilb_stacks_walk_step(mdb_walk_state_t *wsp) +{ + uintptr_t kaddr; + netstack_t nss; + + if (mdb_vread(&nss, sizeof (nss), wsp->walk_addr) == -1) { + mdb_warn("can't read netstack at %p", wsp->walk_addr); + return (WALK_ERR); + } + kaddr = (uintptr_t)nss.netstack_modules[NS_ILB]; + + return (wsp->walk_callback(kaddr, wsp->walk_layer, wsp->walk_cbdata)); +} + +static int +ilb_rules_walk_init(mdb_walk_state_t *wsp) +{ + ilb_stack_t ilbs; + + if (wsp->walk_addr == NULL) + return (WALK_ERR); + + if (mdb_vread(&ilbs, sizeof (ilbs), wsp->walk_addr) == -1) { + mdb_warn("failed to read ilb_stack_t at %p", wsp->walk_addr); + return (WALK_ERR); + } + if ((wsp->walk_addr = (uintptr_t)ilbs.ilbs_rule_head) != NULL) + return (WALK_NEXT); + else + return (WALK_DONE); +} + +static int +ilb_rules_walk_step(mdb_walk_state_t *wsp) +{ + ilb_rule_t rule; + int status; + + if (mdb_vread(&rule, sizeof (rule), wsp->walk_addr) == -1) { + mdb_warn("failed to read ilb_rule_t at %p", wsp->walk_addr); + return (WALK_ERR); + } + status = wsp->walk_callback(wsp->walk_addr, &rule, wsp->walk_cbdata); + if (status != WALK_NEXT) + return (status); + if ((wsp->walk_addr = (uintptr_t)rule.ir_next) == NULL) + return (WALK_DONE); + else + return (WALK_NEXT); +} + +static int +ilb_servers_walk_init(mdb_walk_state_t *wsp) +{ + ilb_rule_t rule; + + if (wsp->walk_addr == NULL) + return (WALK_ERR); + + if (mdb_vread(&rule, sizeof (rule), wsp->walk_addr) == -1) { + mdb_warn("failed to read ilb_rule_t at %p", wsp->walk_addr); + return (WALK_ERR); + } + if ((wsp->walk_addr = (uintptr_t)rule.ir_servers) != NULL) + return (WALK_NEXT); + else + return (WALK_DONE); +} + +static int +ilb_servers_walk_step(mdb_walk_state_t *wsp) +{ + ilb_server_t server; + int status; + + if (mdb_vread(&server, sizeof (server), wsp->walk_addr) == -1) { + mdb_warn("failed to read ilb_server_t at %p", wsp->walk_addr); + return (WALK_ERR); + } + status = wsp->walk_callback(wsp->walk_addr, &server, wsp->walk_cbdata); + if (status != WALK_NEXT) + return (status); + if ((wsp->walk_addr = (uintptr_t)server.iser_next) == NULL) + return (WALK_DONE); + else + return (WALK_NEXT); +} + +/* + * Helper structure for ilb_nat_src walker. It stores the current index of the + * nat src table. + */ +typedef struct { + ilb_stack_t ilbs; + int idx; +} ilb_walk_t; + +/* Copy from list.c */ +#define list_object(a, node) ((void *)(((char *)node) - (a)->list_offset)) + +static int +ilb_nat_src_walk_init(mdb_walk_state_t *wsp) +{ + int i; + ilb_walk_t *ns_walk; + ilb_nat_src_entry_t *entry = NULL; + + if (wsp->walk_addr == NULL) + return (WALK_ERR); + + ns_walk = mdb_alloc(sizeof (ilb_walk_t), UM_SLEEP); + if (mdb_vread(&ns_walk->ilbs, sizeof (ns_walk->ilbs), + wsp->walk_addr) == -1) { + mdb_warn("failed to read ilb_stack_t at %p", wsp->walk_addr); + mdb_free(ns_walk, sizeof (ilb_walk_t)); + return (WALK_ERR); + } + + if (ns_walk->ilbs.ilbs_nat_src == NULL) { + mdb_free(ns_walk, sizeof (ilb_walk_t)); + return (WALK_DONE); + } + + wsp->walk_data = ns_walk; + for (i = 0; i < ns_walk->ilbs.ilbs_nat_src_hash_size; i++) { + list_t head; + char *khead; + + /* Read in the nsh_head in the i-th element of the array. */ + khead = (char *)ns_walk->ilbs.ilbs_nat_src + i * + sizeof (ilb_nat_src_hash_t); + if (mdb_vread(&head, sizeof (list_t), (uintptr_t)khead) == -1) { + mdb_warn("failed to read ilbs_nat_src at %p\n", khead); + return (WALK_ERR); + } + + /* + * Note that list_next points to a kernel address and we need + * to compare list_next with the kernel address of the list + * head. So we need to calculate the address manually. + */ + if ((char *)head.list_head.list_next != khead + + offsetof(list_t, list_head)) { + entry = list_object(&head, head.list_head.list_next); + break; + } + } + + if (entry == NULL) + return (WALK_DONE); + + wsp->walk_addr = (uintptr_t)entry; + ns_walk->idx = i; + return (WALK_NEXT); +} + +static int +ilb_nat_src_walk_step(mdb_walk_state_t *wsp) +{ + int status; + ilb_nat_src_entry_t entry, *next_entry; + ilb_walk_t *ns_walk; + ilb_stack_t *ilbs; + list_t head; + char *khead; + int i; + + if (mdb_vread(&entry, sizeof (ilb_nat_src_entry_t), + wsp->walk_addr) == -1) { + mdb_warn("failed to read ilb_nat_src_entry_t at %p", + wsp->walk_addr); + return (WALK_ERR); + } + status = wsp->walk_callback(wsp->walk_addr, &entry, wsp->walk_cbdata); + if (status != WALK_NEXT) + return (status); + + ns_walk = (ilb_walk_t *)wsp->walk_data; + ilbs = &ns_walk->ilbs; + i = ns_walk->idx; + + /* Read in the nsh_head in the i-th element of the array. */ + khead = (char *)ilbs->ilbs_nat_src + i * sizeof (ilb_nat_src_hash_t); + if (mdb_vread(&head, sizeof (list_t), (uintptr_t)khead) == -1) { + mdb_warn("failed to read ilbs_nat_src at %p\n", khead); + return (WALK_ERR); + } + + /* + * Check if there is still entry in the current list. + * + * Note that list_next points to a kernel address and we need to + * compare list_next with the kernel address of the list head. + * So we need to calculate the address manually. + */ + if ((char *)entry.nse_link.list_next != khead + offsetof(list_t, + list_head)) { + wsp->walk_addr = (uintptr_t)list_object(&head, + entry.nse_link.list_next); + return (WALK_NEXT); + } + + /* Start with the next bucket in the array. */ + next_entry = NULL; + for (i++; i < ilbs->ilbs_nat_src_hash_size; i++) { + khead = (char *)ilbs->ilbs_nat_src + i * + sizeof (ilb_nat_src_hash_t); + if (mdb_vread(&head, sizeof (list_t), (uintptr_t)khead) == -1) { + mdb_warn("failed to read ilbs_nat_src at %p\n", khead); + return (WALK_ERR); + } + + if ((char *)head.list_head.list_next != khead + + offsetof(list_t, list_head)) { + next_entry = list_object(&head, + head.list_head.list_next); + break; + } + } + + if (next_entry == NULL) + return (WALK_DONE); + + wsp->walk_addr = (uintptr_t)next_entry; + ns_walk->idx = i; + return (WALK_NEXT); +} + +static void +ilb_common_walk_fini(mdb_walk_state_t *wsp) +{ + ilb_walk_t *walk; + + walk = (ilb_walk_t *)wsp->walk_data; + if (walk == NULL) + return; + mdb_free(walk, sizeof (ilb_walk_t *)); +} + +static int +ilb_conn_walk_init(mdb_walk_state_t *wsp) +{ + int i; + ilb_walk_t *conn_walk; + ilb_conn_hash_t head; + + if (wsp->walk_addr == NULL) + return (WALK_ERR); + + conn_walk = mdb_alloc(sizeof (ilb_walk_t), UM_SLEEP); + if (mdb_vread(&conn_walk->ilbs, sizeof (conn_walk->ilbs), + wsp->walk_addr) == -1) { + mdb_warn("failed to read ilb_stack_t at %p", wsp->walk_addr); + mdb_free(conn_walk, sizeof (ilb_walk_t)); + return (WALK_ERR); + } + + if (conn_walk->ilbs.ilbs_c2s_conn_hash == NULL) { + mdb_free(conn_walk, sizeof (ilb_walk_t)); + return (WALK_DONE); + } + + wsp->walk_data = conn_walk; + for (i = 0; i < conn_walk->ilbs.ilbs_conn_hash_size; i++) { + char *khead; + + /* Read in the nsh_head in the i-th element of the array. */ + khead = (char *)conn_walk->ilbs.ilbs_c2s_conn_hash + i * + sizeof (ilb_conn_hash_t); + if (mdb_vread(&head, sizeof (ilb_conn_hash_t), + (uintptr_t)khead) == -1) { + mdb_warn("failed to read ilbs_c2s_conn_hash at %p\n", + khead); + return (WALK_ERR); + } + + if (head.ilb_connp != NULL) + break; + } + + if (head.ilb_connp == NULL) + return (WALK_DONE); + + wsp->walk_addr = (uintptr_t)head.ilb_connp; + conn_walk->idx = i; + return (WALK_NEXT); +} + +static int +ilb_conn_walk_step(mdb_walk_state_t *wsp) +{ + int status; + ilb_conn_t conn; + ilb_walk_t *conn_walk; + ilb_stack_t *ilbs; + ilb_conn_hash_t head; + char *khead; + int i; + + if (mdb_vread(&conn, sizeof (ilb_conn_t), wsp->walk_addr) == -1) { + mdb_warn("failed to read ilb_conn_t at %p", wsp->walk_addr); + return (WALK_ERR); + } + + status = wsp->walk_callback(wsp->walk_addr, &conn, wsp->walk_cbdata); + if (status != WALK_NEXT) + return (status); + + conn_walk = (ilb_walk_t *)wsp->walk_data; + ilbs = &conn_walk->ilbs; + i = conn_walk->idx; + + /* Check if there is still entry in the current list. */ + if (conn.conn_c2s_next != NULL) { + wsp->walk_addr = (uintptr_t)conn.conn_c2s_next; + return (WALK_NEXT); + } + + /* Start with the next bucket in the array. */ + for (i++; i < ilbs->ilbs_conn_hash_size; i++) { + khead = (char *)ilbs->ilbs_c2s_conn_hash + i * + sizeof (ilb_conn_hash_t); + if (mdb_vread(&head, sizeof (ilb_conn_hash_t), + (uintptr_t)khead) == -1) { + mdb_warn("failed to read ilbs_c2s_conn_hash at %p\n", + khead); + return (WALK_ERR); + } + + if (head.ilb_connp != NULL) + break; + } + + if (head.ilb_connp == NULL) + return (WALK_DONE); + + wsp->walk_addr = (uintptr_t)head.ilb_connp; + conn_walk->idx = i; + return (WALK_NEXT); +} + +static int +ilb_sticky_walk_init(mdb_walk_state_t *wsp) +{ + int i; + ilb_walk_t *sticky_walk; + ilb_sticky_t *st = NULL; + + if (wsp->walk_addr == NULL) + return (WALK_ERR); + + sticky_walk = mdb_alloc(sizeof (ilb_walk_t), UM_SLEEP); + if (mdb_vread(&sticky_walk->ilbs, sizeof (sticky_walk->ilbs), + wsp->walk_addr) == -1) { + mdb_warn("failed to read ilb_stack_t at %p", wsp->walk_addr); + mdb_free(sticky_walk, sizeof (ilb_walk_t)); + return (WALK_ERR); + } + + if (sticky_walk->ilbs.ilbs_sticky_hash == NULL) { + mdb_free(sticky_walk, sizeof (ilb_walk_t)); + return (WALK_DONE); + } + + wsp->walk_data = sticky_walk; + for (i = 0; i < sticky_walk->ilbs.ilbs_sticky_hash_size; i++) { + list_t head; + char *khead; + + /* Read in the nsh_head in the i-th element of the array. */ + khead = (char *)sticky_walk->ilbs.ilbs_sticky_hash + i * + sizeof (ilb_sticky_hash_t); + if (mdb_vread(&head, sizeof (list_t), (uintptr_t)khead) == -1) { + mdb_warn("failed to read ilbs_sticky_hash at %p\n", + khead); + return (WALK_ERR); + } + + /* + * Note that list_next points to a kernel address and we need + * to compare list_next with the kernel address of the list + * head. So we need to calculate the address manually. + */ + if ((char *)head.list_head.list_next != khead + + offsetof(list_t, list_head)) { + st = list_object(&head, head.list_head.list_next); + break; + } + } + + if (st == NULL) + return (WALK_DONE); + + wsp->walk_addr = (uintptr_t)st; + sticky_walk->idx = i; + return (WALK_NEXT); +} + +static int +ilb_sticky_walk_step(mdb_walk_state_t *wsp) +{ + int status; + ilb_sticky_t st, *st_next; + ilb_walk_t *sticky_walk; + ilb_stack_t *ilbs; + list_t head; + char *khead; + int i; + + if (mdb_vread(&st, sizeof (ilb_sticky_t), wsp->walk_addr) == -1) { + mdb_warn("failed to read ilb_sticky_t at %p", wsp->walk_addr); + return (WALK_ERR); + } + + status = wsp->walk_callback(wsp->walk_addr, &st, wsp->walk_cbdata); + if (status != WALK_NEXT) + return (status); + + sticky_walk = (ilb_walk_t *)wsp->walk_data; + ilbs = &sticky_walk->ilbs; + i = sticky_walk->idx; + + /* Read in the nsh_head in the i-th element of the array. */ + khead = (char *)ilbs->ilbs_sticky_hash + i * sizeof (ilb_sticky_hash_t); + if (mdb_vread(&head, sizeof (list_t), (uintptr_t)khead) == -1) { + mdb_warn("failed to read ilbs_sticky_hash at %p\n", khead); + return (WALK_ERR); + } + + /* + * Check if there is still entry in the current list. + * + * Note that list_next points to a kernel address and we need to + * compare list_next with the kernel address of the list head. + * So we need to calculate the address manually. + */ + if ((char *)st.list.list_next != khead + offsetof(list_t, + list_head)) { + wsp->walk_addr = (uintptr_t)list_object(&head, + st.list.list_next); + return (WALK_NEXT); + } + + /* Start with the next bucket in the array. */ + st_next = NULL; + for (i++; i < ilbs->ilbs_nat_src_hash_size; i++) { + khead = (char *)ilbs->ilbs_sticky_hash + i * + sizeof (ilb_sticky_hash_t); + if (mdb_vread(&head, sizeof (list_t), (uintptr_t)khead) == -1) { + mdb_warn("failed to read ilbs_sticky_hash at %p\n", + khead); + return (WALK_ERR); + } + + if ((char *)head.list_head.list_next != khead + + offsetof(list_t, list_head)) { + st_next = list_object(&head, + head.list_head.list_next); + break; + } + } + + if (st_next == NULL) + return (WALK_DONE); + + wsp->walk_addr = (uintptr_t)st_next; + sticky_walk->idx = i; + return (WALK_NEXT); +} diff --git a/usr/src/cmd/mdb/intel/amd64/ip/Makefile b/usr/src/cmd/mdb/intel/amd64/ip/Makefile index 6789adbba9..5112aeaac0 100644 --- a/usr/src/cmd/mdb/intel/amd64/ip/Makefile +++ b/usr/src/cmd/mdb/intel/amd64/ip/Makefile @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,10 +19,9 @@ # CDDL HEADER END # # -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -#ident "%Z%%M% %I% %E% SMI" MODULE = ip.so MDBTGT = kvm @@ -34,3 +32,5 @@ include ../../../../Makefile.cmd include ../../../../Makefile.cmd.64 include ../../Makefile.amd64 include ../../../Makefile.module + +CPPFLAGS += -I$(SRC)/uts/common/inet diff --git a/usr/src/cmd/mdb/intel/ia32/ip/Makefile b/usr/src/cmd/mdb/intel/ia32/ip/Makefile index b655ee078f..062b2b6cf2 100644 --- a/usr/src/cmd/mdb/intel/ia32/ip/Makefile +++ b/usr/src/cmd/mdb/intel/ia32/ip/Makefile @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,10 +19,9 @@ # CDDL HEADER END # # -# Copyright (c) 1998-1999 by Sun Microsystems, Inc. -# All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. # -#ident "%Z%%M% %I% %E% SMI" MODULE = ip.so MDBTGT = kvm @@ -33,3 +31,5 @@ MODSRCS = ip.c include ../../../../Makefile.cmd include ../../Makefile.ia32 include ../../../Makefile.module + +CPPFLAGS += -I$(SRC)/uts/common/inet diff --git a/usr/src/cmd/mdb/sparc/v9/ip/Makefile b/usr/src/cmd/mdb/sparc/v9/ip/Makefile index 1694830450..7d5ab01461 100644 --- a/usr/src/cmd/mdb/sparc/v9/ip/Makefile +++ b/usr/src/cmd/mdb/sparc/v9/ip/Makefile @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,10 +19,9 @@ # CDDL HEADER END # # -# Copyright (c) 1998-2000 by Sun Microsystems, Inc. -# All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. # -#ident "%Z%%M% %I% %E% SMI" MODULE = ip.so MDBTGT = kvm @@ -34,3 +32,5 @@ include ../../../../Makefile.cmd include ../../../../Makefile.cmd.64 include ../../Makefile.sparcv9 include ../../../Makefile.module + +CPPFLAGS += -I$(SRC)/uts/common/inet diff --git a/usr/src/head/auth_list.h b/usr/src/head/auth_list.h index d1ac5bc55e..c9b68fd1ef 100644 --- a/usr/src/head/auth_list.h +++ b/usr/src/head/auth_list.h @@ -45,6 +45,8 @@ extern "C" { #define LINK_SEC_AUTH "solaris.network.link.security" #define MAILQ_AUTH "solaris.mail.mailq" #define NET_AUTOCONF_AUTH "solaris.network.autoconf" +#define NET_ILB_CONFIG_AUTH "solaris.network.ilb.config" +#define NET_ILB_ENABLE_AUTH "solaris.network.ilb.enable" #define SET_DATE_AUTH "solaris.system.date" #define WIFI_CONFIG_AUTH "solaris.network.wifi.config" #define WIFI_WEP_AUTH "solaris.network.wifi.wep" diff --git a/usr/src/lib/Makefile b/usr/src/lib/Makefile index 0fac076985..32e259be65 100644 --- a/usr/src/lib/Makefile +++ b/usr/src/lib/Makefile @@ -255,6 +255,7 @@ SUBDIRS += \ libvscan \ libgrubmgmt \ smbsrv \ + libilb \ scsi \ mms \ libima \ @@ -329,6 +330,7 @@ MSGSUBDIRS= \ libidmap \ libinetcfg \ libipmp \ + libilb \ libinetutil \ libinstzones \ libnsl \ @@ -487,6 +489,7 @@ HDRSUBDIRS= \ libvscan \ libgrubmgmt \ smbsrv \ + libilb \ scsi \ hbaapi \ smhba \ @@ -644,6 +647,7 @@ pyzfs: libnvpair libsec libidmap libzfs libreparse: libnvpair libhotplug: libnvpair cfgadm_plugins: libhotplug +libilb: libsocket # # The reason this rule checks for the existence of the diff --git a/usr/src/lib/libbsm/audit_event.txt b/usr/src/lib/libbsm/audit_event.txt index 219ca21a69..3bdacb0477 100644 --- a/usr/src/lib/libbsm/audit_event.txt +++ b/usr/src/lib/libbsm/audit_event.txt @@ -511,6 +511,21 @@ 6300:AUE_nwam_attach:attach nwam user:ss 6301:AUE_nwam_detach:detach nwam user:ss # +# ilbd(1M) events +# +6310:AUE_ilb_create_healthcheck:create ILB health check:as +6311:AUE_ilb_delete_healthcheck:delete ILB health check:as +6312:AUE_ilb_create_rule:create ILB rule:as +6313:AUE_ilb_delete_rule:delete ILB rule:as +6314:AUE_ilb_disable_rule:disable ILB rule:as +6315:AUE_ilb_enable_rule:enable ILB rule:as +6316:AUE_ilb_add_server:add ILB server:as +6317:AUE_ilb_disable_server:disable ILB server:as +6318:AUE_ilb_enable_server:enable ILB server:as +6319:AUE_ilb_remove_server:remove ILB server:as +6320:AUE_ilb_create_servergroup:create ILB server group:as +6321:AUE_ilb_delete_servergroup:delete ILB server group:as +# # TCSD(8) events # 6400:AUE_tpm_takeownership:take ownership of TPM:as diff --git a/usr/src/lib/libbsm/common/adt.xml b/usr/src/lib/libbsm/common/adt.xml index 8ab6e24422..6acabbff70 100644 --- a/usr/src/lib/libbsm/common/adt.xml +++ b/usr/src/lib/libbsm/common/adt.xml @@ -2091,8 +2091,450 @@ Use is subject to license terms. </entry> </event> + <event id="AUE_ilb_create_healthcheck" header="0" idNo="120" omit="JNI"> + <title>Integrated Loadbalancer</title> + <program>/usr/sbin/ilbadm</program> + <see>ilbadm(1m)</see> + <entry id="subject"> + <internal token="subject"/> + <external opt="none"/> + </entry> + <entry id="auth_used"> + <internal token="uauth"/> + <external opt="required" type="char *"/> + <comment>authorization used</comment> + </entry> + <entry id="hc_test"> + <internal token="path"/> + <external opt="required" type="char *"/> + <comment>healthcheck type-PING,TCP,UDP or 3rd party script</comment> + </entry> + <entry id="hc_name"> + <internal token="text"/> + <external opt="required" type="char *"/> + <comment>healthcheck name</comment> + </entry> + <entry id="hc_timeout"> + <internal token="text"/> + <external opt="required" type="int32_t" /> + <comment>timeout(secs) to kill a hung healthcheck probe + - 0 means default value (see man page) + </comment> + </entry> + <entry id="hc_count"> + <internal token="text"/> + <external opt="required" type="int"/> + <comment>number of times to run a health check probe + before declaring a server to be dead - 0 means + default value (see man page) + </comment> + </entry> + <entry id="hc_interval"> + <internal token="text"/> + <external opt="required" type="int32_t"/> + <comment>time(secs) between 2 healthcheck events - + 0 means default value(see man page) + </comment> + </entry> + <entry id="return"> + <internal token="return"/> + <external opt="none"/> + </entry> + </event> + + <event id="AUE_ilb_delete_healthcheck" header="0" idNo="121" omit="JNI"> + <title>Integrated Loadbalancer</title> + <program>/usr/sbin/ilbadm</program> + <see>ilbadm(1m)</see> + <entry id="subject"> + <internal token="subject"/> + <external opt="none"/> + </entry> + <entry id="auth_used"> + <internal token="uauth"/> + <external opt="required" type="char *"/> + <comment>authorization used</comment> + </entry> + <entry id="hc_name"> + <internal token="text"/> + <external opt="required" type="char *"/> + <comment>healthcheck name</comment> + </entry> + <entry id="return"> + <internal token="return"/> + <external opt="none"/> + </entry> + </event> + + <!-- + virtual_address and proxy-src token are set to be char *. + But they should be in6_addr See audit bug 6864075 . + --> + <event id="AUE_ilb_create_rule" header="0" idNo="122" omit="JNI"> + <title>Integrated Loadbalancer</title> + <program>/usr/sbin/ilbadm</program> + <see>ilbadm(1m)</see> + <entry id="subject"> + <internal token="subject"/> + <external opt="none"/> + </entry> + <entry id="auth_used"> + <internal token="uauth"/> + <external opt="required" type="char *"/> + <comment>authorization used</comment> + </entry> + <entry id="virtual_ipaddress"> + <internal token="text"/> + <external opt="required" type="char *"/> + <comment>LB virtual IP address</comment> + </entry> + <entry id="min_port"> + <internal token="text"/> + <external opt="required" type="uint32_t"/> + <comment>minimum value in port range</comment> + </entry> + <entry id="max_port"> + <internal token="text"/> + <external opt="required" type="uint32_t"/> + <comment>maximum value in port range - max=min means single + port is specified + </comment> + </entry> + <entry id="protocol"> + <internal token="text"/> + <external opt="required" type="char *"/> + <comment>protocol</comment> + </entry> + <entry id="algo_optype"> + <internal token="text"/> + <external opt="required" type="char *"/> + <comment>[rr,hip,hipp,hipv],[dsr,nat,half-nat]</comment> + </entry> + <entry id="proxy_src"> + <internal token="text"/> + <external opt="optional" type="char *"/> + <comment>proxy source address for NAT - may be single + address or a address range + </comment> + </entry> + <entry id="persist_mask"> + <internal token="text"/> + <external opt="required" type="char *"/> + <comment>prefix length</comment> + </entry> + <entry id="hcname"> + <internal token="text"/> + <external opt="optional" type="char *"/> + <comment>healthcheck name</comment> + </entry> + <entry id="hcport"> + <internal token="text"/> + <external opt="optional" type="char *"/> + <comment>healthcheck port - ANY(dynamically determined by ilbd) + or a positive integer + </comment> + </entry> + <entry id="conndrain_timeout"> + <internal token="text"/> + <external opt="required" type="uint32_t"/> + <comment>connection timeout for NAT/half-NAT in sec. - 0 means + no forced removal) + </comment> + </entry> + <entry id="nat_timeout"> + <internal token="text"/> + <external opt="required" type="uint32_t"/> + <comment>nat entry timeout for NAT/half-NAT in sec - 0 means + default value(see man page) + </comment> + </entry> + <entry id="persist_timeout"> + <internal token="text"/> + <external opt="required" type="uint32_t"/> + <comment>session persistence mapping in sec - 0 means no + persistence + </comment> + </entry> + <entry id="server_group"> + <internal token="text"/> + <external opt="required" type="char *"/> + <comment>server group name</comment> + </entry> + <entry id="rule_name"> + <internal token="text"/> + <external opt="required" type="char *"/> + <comment>rule name</comment> + </entry> + <entry id="return"> + <internal token="return"/> + <external opt="none"/> + </entry> + </event> + +<!-- generic ILB rule event --> + + <event id="AUE_generic_ILB_rule" type="generic" omit="always"> + <entry id="subject"> + <internal token="subject"/> + <external opt="none"/> + </entry> + <entry id="auth_used"> + <internal token="uauth"/> + <external opt="required" type="char *"/> + <comment>authorization used</comment> + </entry> + <entry id="rule_name"> + <internal token="text"/> + <external opt="required" type="char *"/> + <comment>rule name - "all" means all rules</comment> + </entry> + <entry id="return"> + <internal token="return"/> + <external opt="none"/> + </entry> + </event> + +<!-- instances of the ILB generic rule event. --> + <event id="AUE_ilb_delete_rule" instance_of="AUE_generic_ILB_rule" + header="0" idNo="123"> + <title>Integrated Loadbalancer</title> + <program>/usr/sbin/ilbadm</program> + <see>ilbadm(1m)</see> + </event> + + <event id="AUE_ilb_disable_rule" instance_of="AUE_generic_ILB_rule" + header="0" idNo="124"> + <title>Integrated Loadbalancer</title> + <program>/usr/sbin/ilbadm</program> + <see>ilbadm(1m)</see> + </event> + + <event id="AUE_ilb_enable_rule" instance_of="AUE_generic_ILB_rule" + header="0" idNo="125"> + <title>Integrated Loadbalancer</title> + <program>/usr/sbin/ilbadm</program> + <see>ilbadm(1m)</see> + </event> + + <!-- + server_ipaddress token is set to be char *. But it should be + in6_addr See audit bug 6864075. + --> + <event id="AUE_ilb_add_server" header="0" idNo="126" omit="JNI"> + <title>Integrated Loadbalancer</title> + <program>/usr/sbin/ilbadm</program> + <see>ilbadm(1m)</see> + <entry id="subject"> + <internal token="subject"/> + <external opt="none"/> + </entry> + <entry id="auth_used"> + <internal token="uauth"/> + <external opt="required" type="char *"/> + <comment>authorization used</comment> + </entry> + <entry id="server_ipaddress"> + <internal token="text"/> + <external opt="required" type="char *"/> + <comment>IP address</comment> + </entry> + <entry id="server_id"> + <internal token="text"/> + <external opt="optional" type="char *"/> + <comment>serverid that corresponds IP address - empty + if authorization fails, user specified IP address + is invalid or server cannot be added because + server group is full + </comment> + </entry> + <entry id="server_group"> + <internal token="text"/> + <external opt="required" type="char *"/> + <comment>server group name</comment> + </entry> + <entry id="server_minport"> + <internal token="text"/> + <external opt="optional" type="uint32_t" /> + <comment>server's minimum value in port range - empty + means default value (see man page) + </comment> + </entry> + <entry id="server_maxport"> + <internal token="text"/> + <external opt="optional" type="uint32_t" /> + <comment>server's maximum value in port range - empty + means default value(see man page) + </comment> + </entry> + <entry id="return"> + <internal token="return"/> + <external opt="none"/> + </entry> + </event> + + <!-- + server_ipaddress token is set to be char *. But it should be + in6_addr See audit bug 6864075. + --> + <event id="AUE_ilb_disable_server" header="0" idNo="127" omit="JNI"> + <title>Integrated Loadbalancer</title> + <program>/usr/sbin/ilbadm</program> + <see>ilbadm(1m)</see> + <entry id="subject"> + <internal token="subject"/> + <external opt="none"/> + </entry> + <entry id="auth_used"> + <internal token="uauth"/> + <external opt="required" type="char *"/> + <comment>authorization used</comment> + </entry> + <entry id="server_id"> + <internal token="text"/> + <external opt="required" type="char *"/> + <comment>serverid</comment> + </entry> + <entry id="server_ipaddress"> + <internal token="text"/> + <external opt="optional" type="char *"/> + <comment>IPaddr corresponding to the serverid - empty + if authorization fails, or user specified serverid + is nonexistent + </comment> + </entry> + <entry id="return"> + <internal token="return"/> + <external opt="none"/> + </entry> + </event> + + <!-- + server_ipaddress token is set to be char *. But it should be + in6_addr See audit bug 6864075. + --> + <event id="AUE_ilb_enable_server" header="0" idNo="128" omit="JNI"> + <title>Integrated Loadbalancer</title> + <program>/usr/sbin/ilbadm</program> + <see>ilbadm(1m)</see> + <entry id="subject"> + <internal token="subject"/> + <external opt="none"/> + </entry> + <entry id="auth_used"> + <internal token="uauth"/> + <external opt="required" type="char *"/> + <comment>authorization used</comment> + </entry> + <entry id="server_id"> + <internal token="text"/> + <external opt="required" type="char *"/> + <comment>serverid</comment> + </entry> + <entry id="server_ipaddress"> + <internal token="text"/> + <external opt="optional" type="char *"/> + <comment>IPaddr corresponding to the serverid - empty + if authorization fails, or user specified serverid + is nonexistent + </comment> + </entry> + <entry id="return"> + <internal token="return"/> + <external opt="none"/> + </entry> + </event> + + <!-- + server_ipaddress token is set to be char *. But it should be + in6_addr See audit bug 6864075 . + --> + <event id="AUE_ilb_remove_server" header="0" idNo="129" omit="JNI"> + <title>Integrated Loadbalancer</title> + <program>/usr/sbin/ilbadm</program> + <see>ilbadm(1m)</see> + <entry id="subject"> + <internal token="subject"/> + <external opt="none"/> + </entry> + <entry id="auth_used"> + <internal token="uauth"/> + <external opt="required" type="char *"/> + <comment>authorization used</comment> + </entry> + <entry id="server_id"> + <internal token="text"/> + <external opt="required" type="char *"/> + <comment>serverid</comment> + </entry> + <entry id="server_group"> + <internal token="text"/> + <external opt="required" type="char *"/> + <comment>server group name</comment> + </entry> + <entry id="server_ipaddress"> + <internal token="text"/> + <external opt="optional" type="char *"/> + <comment>IPaddr corresponding to serverid - empty + if authorization fails or user specified serverid + serverid is nonexistent + </comment> + </entry> + <entry id="return"> + <internal token="return"/> + <external opt="none"/> + </entry> + </event> + + <event id="AUE_ilb_create_servergroup" header="0" idNo="130" omit="JNI"> + <title>Integrated Loadbalancer</title> + <program>/usr/sbin/ilbadm</program> + <see>ilbadm(1m)</see> + <entry id="subject"> + <internal token="subject"/> + <external opt="none"/> + </entry> + <entry id="auth_used"> + <internal token="uauth"/> + <external opt="required" type="char *"/> + <comment>authorization used</comment> + </entry> + <entry id="server_group"> + <internal token="text"/> + <external opt="required" type="char *"/> + <comment>server group name</comment> + </entry> + <entry id="return"> + <internal token="return"/> + <external opt="none"/> + </entry> + </event> + + <event id="AUE_ilb_delete_servergroup" header="0" idNo="131" omit="JNI"> + <title>Integrated Loadbalancer</title> + <program>/usr/sbin/ilbadm</program> + <see>ilbadm(1m)</see> + <entry id="subject"> + <internal token="subject"/> + <external opt="none"/> + </entry> + <entry id="auth_used"> + <internal token="uauth"/> + <external opt="required" type="char *"/> + <comment>authorization used</comment> + </entry> + <entry id="server_group"> + <internal token="text"/> + <external opt="required" type="char *"/> + <comment>server group name</comment> + </entry> + <entry id="return"> + <internal token="return"/> + <external opt="none"/> + </entry> + </event> + <!-- add new events here with the next higher idNo --> -<!-- Highest idNo is 119, so next is 120, then fix this comment --> +<!-- Highest idNo is 131, so next is 132, then fix this comment --> <!-- end of C Only events --> <!-- diff --git a/usr/src/lib/libilb/Makefile b/usr/src/lib/libilb/Makefile new file mode 100644 index 0000000000..af3afb9ebe --- /dev/null +++ b/usr/src/lib/libilb/Makefile @@ -0,0 +1,61 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +include $(SRC)/lib/Makefile.lib + +HDRS = libilb.h +HDRDIR = common +SUBDIRS = $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +POFILE = libilb.po +MSGFILES = common/ilb_subr.c + +all := TARGET = all +clean := TARGET = clean +clobber := TARGET = clobber +install := TARGET = install +lint := TARGET = lint + +.KEEP_STATE: + +all clean clobber install lint: $(SUBDIRS) + +install_h: $(ROOTHDRS) + +check: $(CHECKHDRS) + +$(POFILE): $(MSGFILES) + $(BUILDPO.msgfiles) + +_msg: $(MSGDOMAINPOFILE) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: + +include $(SRC)/Makefile.msg.targ +include $(SRC)/lib/Makefile.targ diff --git a/usr/src/lib/libilb/Makefile.com b/usr/src/lib/libilb/Makefile.com new file mode 100644 index 0000000000..39f32889b0 --- /dev/null +++ b/usr/src/lib/libilb/Makefile.com @@ -0,0 +1,73 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +LIBRARY = libilb.a +VERS = .1 + +LIB_OBJS = ilb_sg.o ilb_comm.o ilb_subr.o ilb_rules.o +LIB_OBJS += ilb_hc.o ilb_nat.o + +# We don't have a userland-accessible implementation of list handling +# so we use the one in uts (filched off libzpool) +LIST_OBJS = list.o +OBJECTS = $(LIB_OBJS) $(LIST_OBJS) + +include ../../Makefile.lib + +LIB_SRCS= $(LIB_OBJS:%.o=$(SRCDIR)/%.c) +LIBS = $(DYNLIB) $(LINTLIB) +INCS += -I../common -I$(SRC)/uts/common +LDLIBS += -lc + +SRCDIR = ../common +$(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC) + +C99MODE = $(C99_ENABLE) + +# use for prod: +CFLAGS += -mt $(CCVERBOSE) +CPPFLAGS += $(INCS) +LDLIBS += -lsocket + +# use for debug: +CFLAGS += -g +STRIP_STABS= : +CTFCVTFLAGS += -g +CTFMERGE_LIB = $(CTFMERGE) -g -t -f -L VERSION -o $@ $(PICS) +DYNFLAGS += -g + +.KEEP_STATE: + +all: $(LIBS) + +lint: $(LIB_SRCS) + $(LINT.c) $(LINTCHECKFLAGS) $(LIB_SRCS) $(LDLIBS) + +include ../../Makefile.targ + +# the below is needed to get list.o built +pics/%.o: ../../../uts/common/os/%.c + $(COMPILE.c) -o $@ $< + $(POST_PROCESS_O) diff --git a/usr/src/lib/libilb/amd64/Makefile b/usr/src/lib/libilb/amd64/Makefile new file mode 100644 index 0000000000..5924246775 --- /dev/null +++ b/usr/src/lib/libilb/amd64/Makefile @@ -0,0 +1,36 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# +# lib/libilb/amd64/Makefile +# + +include ../Makefile.com +include ../../Makefile.lib.64 + +.KEEP_STATE: + +all: $(LIBS) + +install: all $(ROOTLIBS64) $(ROOTLINKS64) diff --git a/usr/src/lib/libilb/common/ilb_comm.c b/usr/src/lib/libilb/common/ilb_comm.c new file mode 100644 index 0000000000..ff8cdca3a1 --- /dev/null +++ b/usr/src/lib/libilb/common/ilb_comm.c @@ -0,0 +1,235 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <stdlib.h> +#include <strings.h> +#include <unistd.h> +#include <stddef.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <thread.h> +#include <synch.h> +#include <libilb_impl.h> +#include <libilb.h> + +/* Assertion: the calling thread has a hold on the handle */ +static void +i_ilb_socket_set_err(ilb_handle_t h, ilb_status_t err) +{ + ilb_handle_impl_t *hi = (ilb_handle_impl_t *)h; + + if (h == ILB_INVALID_HANDLE) + return; + hi->h_valid = B_FALSE; + hi->h_error = err; +} + +ilb_status_t +ilb_open(ilb_handle_t *hp) +{ + ilb_handle_impl_t *hi = NULL; + int s = -1; + struct sockaddr_un sa = {AF_UNIX, SOCKET_PATH}; + ilb_status_t rc = ILB_STATUS_OK; + int sobufsz; + + if (hp == NULL) + return (ILB_STATUS_EINVAL); + + hi = calloc(sizeof (*hi), 1); + if (hi == NULL) + return (ILB_STATUS_ENOMEM); + + if (cond_init(&hi->h_cv, USYNC_THREAD, NULL) != 0) { + rc = ILB_STATUS_INTERNAL; + goto out; + } + + if (mutex_init(&hi->h_lock, USYNC_THREAD | LOCK_ERRORCHECK, NULL) + != 0) { + rc = ILB_STATUS_INTERNAL; + goto out; + } + + hi->h_busy = B_FALSE; + + if ((s = socket(PF_UNIX, SOCK_SEQPACKET, 0)) == -1 || + connect(s, (struct sockaddr *)&sa, sizeof (sa.sun_path)) + == -1) { + rc = ILB_STATUS_SOCKET; + goto out; + } + + /* The socket buffer must be at least the max size of a message */ + sobufsz = ILBD_MSG_SIZE; + if (setsockopt(s, SOL_SOCKET, SO_SNDBUF, &sobufsz, + sizeof (sobufsz)) != 0) { + rc = ILB_STATUS_SOCKET; + (void) close(s); + goto out; + } + if (setsockopt(s, SOL_SOCKET, SO_RCVBUF, &sobufsz, + sizeof (sobufsz)) != 0) { + rc = ILB_STATUS_SOCKET; + (void) close(s); + goto out; + } + + hi->h_socket = s; + hi->h_valid = B_TRUE; + +out: + if (rc != ILB_STATUS_OK && s != -1) + (void) close(s); + + if (rc == ILB_STATUS_OK) { + *hp = (ilb_handle_t)hi; + } else { + free(hi); + *hp = ILB_INVALID_HANDLE; + } + return (rc); +} + +ilb_status_t +ilb_close(ilb_handle_t h) +{ + ilb_handle_impl_t *hi = (ilb_handle_impl_t *)h; + + if (h == ILB_INVALID_HANDLE) + return (ILB_STATUS_EINVAL); + + if (mutex_lock(&hi->h_lock) != 0) + return (ILB_STATUS_INTERNAL); + + /* Somebody has done a close, no need to do anything. */ + if (hi->h_closing) { + return (ILB_STATUS_OK); + } else { + hi->h_closing = B_TRUE; + hi->h_error = ILB_STATUS_HANDLE_CLOSING; + } + + /* Wait until there is nobody waiting. */ + while (hi->h_waiter > 0) { + if (cond_wait(&hi->h_cv, &hi->h_lock) != 0) { + (void) mutex_unlock(&hi->h_lock); + return (ILB_STATUS_INTERNAL); + } + } + /* No one is waiting, proceed to free the handle. */ + + (void) close(hi->h_socket); + (void) mutex_destroy(&hi->h_lock); + (void) cond_destroy(&hi->h_cv); + free(hi); + return (ILB_STATUS_OK); +} + +/* + * Unified routine to communicate with ilbd. + * + * If ic is non-NULL, it means that the caller wants to send something + * to ilbd and expects a reply. If ic is NULL, it means that the caller + * only expects to receive from ilbd. + * + * The rbuf is the buffer supplied by the caller for receiving. If it + * is NULL, it means that there is no reply expected. + * + * This function will not close() the socket to kernel unless there is + * an error. If the transaction only consists of one exchange, the caller + * can use i_ilb_close_comm() to close() the socket when done. + */ +ilb_status_t +i_ilb_do_comm(ilb_handle_t h, ilb_comm_t *ic, size_t ic_sz, ilb_comm_t *rbuf, + size_t *rbufsz) +{ + ilb_status_t rc = ILB_STATUS_OK; + int r, s; + ilb_handle_impl_t *hi = (ilb_handle_impl_t *)h; + + assert(rbuf != NULL); + if (h == ILB_INVALID_HANDLE) + return (ILB_STATUS_EINVAL); + + if (mutex_lock(&hi->h_lock) != 0) + return (ILB_STATUS_INTERNAL); + + hi->h_waiter++; + while (hi->h_busy) { + if (cond_wait(&hi->h_cv, &hi->h_lock) != 0) { + hi->h_waiter--; + (void) cond_signal(&hi->h_cv); + (void) mutex_unlock(&hi->h_lock); + return (ILB_STATUS_INTERNAL); + } + } + + if (!hi->h_valid || hi->h_closing) { + hi->h_waiter--; + (void) cond_signal(&hi->h_cv); + (void) mutex_unlock(&hi->h_lock); + return (hi->h_error); + } + + hi->h_busy = B_TRUE; + (void) mutex_unlock(&hi->h_lock); + + s = hi->h_socket; + + r = send(s, ic, ic_sz, 0); + if (r < ic_sz) { + rc = ILB_STATUS_WRITE; + goto socket_error; + } + rc = ILB_STATUS_OK; + + if ((r = recv(s, rbuf, *rbufsz, 0)) <= 0) { + rc = ILB_STATUS_READ; + } else { + *rbufsz = r; + goto out; + } + +socket_error: + i_ilb_socket_set_err(h, rc); + +out: + (void) mutex_lock(&hi->h_lock); + hi->h_busy = B_FALSE; + hi->h_waiter--; + (void) cond_signal(&hi->h_cv); + (void) mutex_unlock(&hi->h_lock); + + return (rc); +} + +void +i_ilb_close_comm(ilb_handle_t h) +{ + (void) ilb_close(h); +} diff --git a/usr/src/lib/libilb/common/ilb_hc.c b/usr/src/lib/libilb/common/ilb_hc.c new file mode 100644 index 0000000000..dc813320f2 --- /dev/null +++ b/usr/src/lib/libilb/common/ilb_hc.c @@ -0,0 +1,276 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <stdlib.h> +#include <strings.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <stddef.h> +#include <libilb_impl.h> +#include <libilb.h> + +/* + * Create a health check, returning a health check handle upon success. + * Health check created will be recorded in persistent datastore. + */ +ilb_status_t +ilb_create_hc(ilb_handle_t h, const ilb_hc_info_t *hc) +{ + ilb_status_t rc; + ilb_comm_t *ic; + size_t ic_sz; + + if (h == ILB_INVALID_HANDLE || hc == NULL || *hc->hci_name == '\0' || + hc->hci_timeout < 0 || hc->hci_count < 0 || + hc->hci_interval <= hc->hci_timeout * hc->hci_count) + return (ILB_STATUS_EINVAL); + + if ((ic = i_ilb_alloc_req(ILBD_CREATE_HC, &ic_sz)) == NULL) + return (ILB_STATUS_ENOMEM); + + (void) memcpy(&ic->ic_data, hc, sizeof (ilb_hc_info_t)); + + rc = i_ilb_do_comm(h, ic, ic_sz, ic, &ic_sz); + if (rc != ILB_STATUS_OK) + goto out; + + if (ic->ic_cmd != ILBD_CMD_OK) + rc = *(ilb_status_t *)&ic->ic_data; + +out: + free(ic); + return (rc); +} + +/* + * Given a health check handle, destroy the corresponding health check. + * Persistent datastore will be updated as well. + */ +ilb_status_t +ilb_destroy_hc(ilb_handle_t h, const char *hcname) +{ + ilb_status_t rc; + ilb_comm_t *ic; + size_t ic_sz; + + if (h == ILB_INVALID_HANDLE || hcname == NULL || *hcname == '\0') + return (ILB_STATUS_EINVAL); + + if ((ic = i_ilb_alloc_req(ILBD_DESTROY_HC, &ic_sz)) == NULL) + return (ILB_STATUS_ENOMEM); + + (void) strlcpy((char *)&ic->ic_data, hcname, sizeof (ilbd_name_t)); + + rc = i_ilb_do_comm(h, ic, ic_sz, ic, &ic_sz); + if (rc != ILB_STATUS_OK) + goto out; + + if (ic->ic_cmd != ILBD_CMD_OK) + rc = *(ilb_status_t *)&ic->ic_data; + +out: + free(ic); + return (rc); +} + +/* + * Given a health check name, get hc info associated with this handle + */ +ilb_status_t +ilb_get_hc_info(ilb_handle_t h, const char *name, ilb_hc_info_t *hcp) +{ + ilb_status_t rc; + ilb_comm_t *ic, *rbuf; + size_t ic_sz, rbufsz; + + if (h == ILB_INVALID_HANDLE || name == NULL || hcp == NULL) + return (ILB_STATUS_EINVAL); + + if ((ic = i_ilb_alloc_req(ILBD_GET_HC_INFO, &ic_sz)) == NULL) + return (ILB_STATUS_ENOMEM); + rbufsz = sizeof (ilb_comm_t) + sizeof (ilb_hc_info_t); + if ((rbuf = malloc(rbufsz)) == NULL) { + free(ic); + return (ILB_STATUS_ENOMEM); + } + + (void) strlcpy((char *)&ic->ic_data, name, sizeof (ilbd_name_t)); + + rc = i_ilb_do_comm(h, ic, ic_sz, rbuf, &rbufsz); + if (rc != ILB_STATUS_OK) + goto out; + + if (rbuf->ic_cmd != ILBD_CMD_OK) { + rc = *(ilb_status_t *)&rbuf->ic_data; + goto out; + } + (void) memcpy(hcp, &rbuf->ic_data, sizeof (*hcp)); + +out: + free(ic); + free(rbuf); + return (rc); +} + +/* + * Walk through all health checks, will need if we implement list-hc + */ +ilb_status_t +ilb_walk_hc(ilb_handle_t h, hc_walkerfunc_t func, void *arg) +{ + ilb_status_t rc; + ilb_hc_info_t hc_info; + ilbd_namelist_t *hc_names; + ilb_comm_t ic, *rbuf; + size_t rbufsz; + int i; + + rbufsz = ILBD_MSG_SIZE; + if ((rbuf = malloc(rbufsz)) == NULL) + return (ILB_STATUS_ENOMEM); + ic.ic_cmd = ILBD_RETRIEVE_HC_NAMES; + + rc = i_ilb_do_comm(h, &ic, sizeof (ic), rbuf, &rbufsz); + if (rc != ILB_STATUS_OK) + goto out; + if (rbuf->ic_cmd != ILBD_CMD_OK) { + rc = *(ilb_status_t *)&rbuf->ic_data; + goto out; + } + + hc_names = (ilbd_namelist_t *)&rbuf->ic_data; + for (i = 0; i < hc_names->ilbl_count; i++) { + rc = ilb_get_hc_info(h, hc_names->ilbl_name[i], &hc_info); + /* + * Since getting the list of hc names and getting the info + * of each of them are not atomic, some hc objects may have + * been deleted. If this is the case, just skip them. + */ + if (rc == ILB_STATUS_ENOENT) { + rc = ILB_STATUS_OK; + continue; + } else if (rc != ILB_STATUS_OK) { + break; + } + rc = func(h, &hc_info, arg); + } + +out: + free(rbuf); + return (rc); +} + +static ilb_status_t +ilb_get_hc_srvs(ilb_handle_t h, const char *rulename, ilb_comm_t **rbuf, + size_t *rbufsz) +{ + ilb_status_t rc; + ilb_comm_t *ic, *tmp_rbuf; + size_t ic_sz; + + if ((ic = i_ilb_alloc_req(ILBD_GET_HC_SRVS, &ic_sz)) == NULL) + return (ILB_STATUS_ENOMEM); + *rbufsz = ILBD_MSG_SIZE; + if ((tmp_rbuf = malloc(*rbufsz)) == NULL) { + free(ic); + return (ILB_STATUS_ENOMEM); + } + + (void) strlcpy((char *)&ic->ic_data, rulename, + sizeof (ilbd_name_t)); + + rc = i_ilb_do_comm(h, ic, ic_sz, tmp_rbuf, rbufsz); + if (rc != ILB_STATUS_OK) + goto out; + + if (tmp_rbuf->ic_cmd == ILBD_CMD_OK) { + *rbuf = tmp_rbuf; + return (rc); + } + rc = *(ilb_status_t *)&tmp_rbuf->ic_data; +out: + free(ic); + free(tmp_rbuf); + *rbuf = NULL; + return (rc); +} + +ilb_status_t +ilb_walk_hc_srvs(ilb_handle_t h, hc_srvwalkerfunc_t fn, const char *rulename, + void *arg) +{ + ilb_status_t rc; + ilb_hc_rule_srv_t *srvs; + int i, j; + ilb_comm_t *rbuf; + size_t rbufsz; + + if (rulename != NULL) { + rc = ilb_get_hc_srvs(h, rulename, &rbuf, &rbufsz); + if (rc != ILB_STATUS_OK) + return (rc); + srvs = (ilb_hc_rule_srv_t *)&rbuf->ic_data; + for (i = 0; i < srvs->rs_num_srvs; i++) { + rc = fn(h, &srvs->rs_srvs[i], arg); + if (rc != ILB_STATUS_OK) + break; + } + free(rbuf); + } else { + ilbd_namelist_t *names; + ilb_comm_t *srv_rbuf; + size_t srv_rbufsz; + + rc = i_ilb_retrieve_rule_names(h, &rbuf, &rbufsz); + if (rc != ILB_STATUS_OK) + return (rc); + names = (ilbd_namelist_t *)&rbuf->ic_data; + + for (i = 0; i < names->ilbl_count; i++) { + rc = ilb_get_hc_srvs(h, names->ilbl_name[i], + &srv_rbuf, &srv_rbufsz); + + /* Not all rules have HC, so reset the error to OK. */ + if (rc == ILB_STATUS_RULE_NO_HC) { + rc = ILB_STATUS_OK; + continue; + } else if (rc != ILB_STATUS_OK) { + break; + } + + srvs = (ilb_hc_rule_srv_t *)&srv_rbuf->ic_data; + for (j = 0; j < srvs->rs_num_srvs; j++) { + rc = fn(h, &srvs->rs_srvs[j], arg); + if (rc != ILB_STATUS_OK) + break; + } + free(srv_rbuf); + } + free(rbuf); + } + return (rc); +} diff --git a/usr/src/lib/libilb/common/ilb_nat.c b/usr/src/lib/libilb/common/ilb_nat.c new file mode 100644 index 0000000000..192d7b6d59 --- /dev/null +++ b/usr/src/lib/libilb/common/ilb_nat.c @@ -0,0 +1,180 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <stdlib.h> +#include <strings.h> +#include <unistd.h> +#include <stddef.h> +#include <sys/types.h> +#include <sys/socket.h> +#include "libilb.h" +#include "libilb_impl.h" + +enum which_tbl { + show_nat = 1, + show_persist +}; + +/* The common function to show kernel info. */ +static ilb_status_t ilb_show_info(ilb_handle_t, char *, size_t *, boolean_t *, + enum which_tbl); + +/* + * To get the ILB NAT table. + * + * buf: The buffer to return the NAT table entries. + * num: The caller sets it to the number of ilb_nat_info_t entries buf can + * hold. On return, it contains the actual number of entries put in buf. + * end: The caller sets it to B_TRUE if it only wants at most num entries to + * be returned. The transaction to ilbd will be termianted when this + * call returns. + * The caller sets it to B_FALSE if it intends to get the whole table. + * If the whole table has more than num entries, the caller can call + * this function again to retrieve the rest of the table. + * On return, end is set to B_TRUE if end of table is reached; B_FALSE + * if there are still remaining entries. + */ +ilb_status_t +ilb_show_nat(ilb_handle_t h, ilb_nat_info_t buf[], size_t *num, + boolean_t *end) +{ + return (ilb_show_info(h, (char *)buf, num, end, show_nat)); +} + +/* + * To get the ILB persistent entry table. + * + * buf: The buffer to return the persistent table entries. + * num: The caller sets it to the number of ilb_persist_info_t entries buf can + * hold. On return, it contains the actual number of entries put in buf. + * end: The caller sets it to B_TRUE if it only wants at most num entries to + * be returned. The transaction to ilbd will be termianted when this + * call returns. + * The caller sets it to B_FALSE if it intends to get the whole table. + * If the whole table has more than num entries, the caller can call + * this function again to retrieve the rest of the table. + * On return, end is set to B_TRUE if end of table is reached; B_FALSE + * if there are still remaining entries. + */ +ilb_status_t +ilb_show_persist(ilb_handle_t h, ilb_persist_info_t buf[], size_t *num, + boolean_t *end) +{ + return (ilb_show_info(h, (char *)buf, num, end, show_persist)); +} + +/* + * The function doing the work... The tbl parameter determines whith table + * to show. + */ +static ilb_status_t +ilb_show_info(ilb_handle_t h, char *buf, size_t *num, boolean_t *end, + enum which_tbl tbl) +{ + ilb_comm_t *req, *rbuf; + ilb_show_info_t *req_si, *tmp_si; + size_t reqsz, rbufsz, tmp_rbufsz, cur_num; + size_t entry_sz; + ilb_status_t rc; + + if (*num == 0) + return (ILB_STATUS_EINVAL); + + reqsz = sizeof (ilb_comm_t) + sizeof (ilb_show_info_t); + if ((req = malloc(reqsz)) == NULL) + return (ILB_STATUS_ENOMEM); + req_si = (ilb_show_info_t *)&req->ic_data; + + /* + * Need to allocate a receive buffer and then copy the buffer + * content to the passed in buf. The reason is that the + * communication to ilbd is message based and the protocol + * includes a header in the reply. We need to remove this header + * from the message, hence the copying... + */ + if (tbl == show_nat) + entry_sz = sizeof (ilb_nat_info_t); + else + entry_sz = sizeof (ilb_persist_info_t); + rbufsz = *num * entry_sz + sizeof (ilb_comm_t) + + sizeof (ilb_show_info_t); + if ((rbuf = malloc(rbufsz)) == NULL) { + free(req); + return (ILB_STATUS_ENOMEM); + } + + if (tbl == show_nat) + req->ic_cmd = ILBD_SHOW_NAT; + else + req->ic_cmd = ILBD_SHOW_PERSIST; + req->ic_flags = 0; + req_si->sn_num = *num; + cur_num = 0; + + do { + tmp_rbufsz = rbufsz; + rc = i_ilb_do_comm(h, req, reqsz, rbuf, &tmp_rbufsz); + if (rc != ILB_STATUS_OK) + goto out; + if (rbuf->ic_cmd != ILBD_CMD_OK) { + rc = *(ilb_status_t *)&rbuf->ic_data; + goto out; + } + + tmp_si = (ilb_show_info_t *)&rbuf->ic_data; + + cur_num += tmp_si->sn_num; + bcopy(&tmp_si->sn_data, buf, tmp_si->sn_num * entry_sz); + buf += tmp_si->sn_num * entry_sz; + + /* + * Buffer is filled, regardless of this is the end of table or + * not, we need to stop. + */ + if (cur_num == *num) + break; + /* Try to fill in the rest. */ + req_si->sn_num = *num - cur_num; + } while (!(rbuf->ic_flags & ILB_COMM_END)); + + *num = cur_num; + + /* End of transaction, let the caller know. */ + if (rbuf->ic_flags & ILB_COMM_END) { + *end = B_TRUE; + } else { + /* The user wants to terminate the transaction */ + if (*end) { + req->ic_flags = ILB_COMM_END; + tmp_rbufsz = rbufsz; + rc = i_ilb_do_comm(h, req, reqsz, rbuf, &tmp_rbufsz); + } + } +out: + free(req); + free(rbuf); + return (rc); +} diff --git a/usr/src/lib/libilb/common/ilb_rules.c b/usr/src/lib/libilb/common/ilb_rules.c new file mode 100644 index 0000000000..df01041e4c --- /dev/null +++ b/usr/src/lib/libilb/common/ilb_rules.c @@ -0,0 +1,326 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + +#include <stdlib.h> +#include <strings.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/stropts.h> +#include <netinet/in.h> +#include <stddef.h> +#include "libilb.h" +#include "libilb_impl.h" + +/* ARGSUSED */ +static ilb_status_t +i_drop_hc(ilb_handle_t h, ilb_hc_info_t *hc, void *arg) +{ + return (ilb_destroy_hc(h, hc->hci_name)); +} + +/* ARGSUSED */ +static ilb_status_t +i_drop_rule(ilb_handle_t h, ilb_rule_data_t *rd, void *arg) +{ + return (ilb_destroy_rule(h, rd->r_name)); +} + +/* ARGSUSED */ +static ilb_status_t +i_drop_sg_srvs(ilb_handle_t h, ilb_server_data_t *srv, const char *sgname, + void *arg) +{ + return (ilb_rem_server_from_group(h, sgname, srv)); +} + +/* ARGSUSED */ +static ilb_status_t +i_drop_sg(ilb_handle_t h, ilb_sg_data_t *sg, void *arg) +{ + ilb_status_t rc; + + rc = ilb_walk_servers(h, i_drop_sg_srvs, sg->sgd_name, (void *)sg); + if (rc != ILB_STATUS_OK) + return (rc); + + return (ilb_destroy_servergroup(h, sg->sgd_name)); +} + +ilb_status_t +ilb_reset_config(ilb_handle_t h) +{ + ilb_status_t rc; + + if (h == NULL) + return (ILB_STATUS_EINVAL); + + rc = ilb_walk_rules(h, i_drop_rule, NULL, NULL); + if (rc != ILB_STATUS_OK) + goto out; + + rc = ilb_walk_servergroups(h, i_drop_sg, NULL, NULL); + if (rc != ILB_STATUS_OK) + goto out; + + rc = ilb_walk_hc(h, i_drop_hc, NULL); +out: + return (rc); +} + +ilb_status_t +ilb_create_rule(ilb_handle_t h, const ilb_rule_data_t *rd) +{ + ilb_status_t rc; + ilb_comm_t *ic; + size_t ic_sz; + ilb_rule_info_t *rl; + + if (h == ILB_INVALID_HANDLE || rd == NULL || *rd->r_name == '\0') + return (ILB_STATUS_EINVAL); + + if ((ic = i_ilb_alloc_req(ILBD_CREATE_RULE, &ic_sz)) == NULL) + return (ILB_STATUS_ENOMEM); + rl = (ilb_rule_info_t *)&ic->ic_data; + + /* + * Since the IP address representation in ilb_rule_data_t and + * ilb_rule_info_t is different, we need to convert between + * them. + */ + (void) strlcpy(rl->rl_name, rd->r_name, sizeof (rl->rl_name)); + (void) strlcpy(rl->rl_sgname, rd->r_sgname, sizeof (rl->rl_sgname)); + (void) strlcpy(rl->rl_hcname, rd->r_hcname, sizeof (rl->rl_hcname)); + rl->rl_flags = rd->r_flags; + rl->rl_proto = rd->r_proto; + rl->rl_ipversion = rd->r_vip.ia_af; + rl->rl_minport = rd->r_minport; + if (ntohs(rd->r_maxport) < ntohs(rd->r_minport)) + rl->rl_maxport = rd->r_minport; + else + rl->rl_maxport = rd->r_maxport; + rl->rl_algo = rd->r_algo; + rl->rl_topo = rd->r_topo; + rl->rl_conndrain = rd->r_conndrain; + rl->rl_nat_timeout = rd->r_nat_timeout; + rl->rl_sticky_timeout = rd->r_sticky_timeout; + rl->rl_hcport = rd->r_hcport; + rl->rl_hcpflag = rd->r_hcpflag; + + IP_COPY_CLI_2_IMPL(&rd->r_vip, &rl->rl_vip); + IP_COPY_CLI_2_IMPL(&rd->r_stickymask, &rl->rl_stickymask); + IP_COPY_CLI_2_IMPL(&rd->r_nat_src_start, &rl->rl_nat_src_start); + IP_COPY_CLI_2_IMPL(&rd->r_nat_src_end, &rl->rl_nat_src_end); + + rc = i_ilb_do_comm(h, ic, ic_sz, ic, &ic_sz); + if (rc != ILB_STATUS_OK) + goto out; + + if (ic->ic_cmd != ILBD_CMD_OK) + rc = *(ilb_status_t *)&ic->ic_data; + +out: + free(ic); + return (rc); +} + +static ilb_status_t +i_ilb_rule_action(ilb_handle_t h, const char *name, ilbd_cmd_t cmd) +{ + ilb_status_t rc; + ilb_comm_t *ic; + size_t ic_sz; + + if (h == ILB_INVALID_HANDLE) + return (ILB_STATUS_EINVAL); + + if ((ic = i_ilb_alloc_req(cmd, &ic_sz)) == NULL) + return (ILB_STATUS_ENOMEM); + + if (name == NULL) { + bzero(&ic->ic_data, sizeof (ilbd_name_t)); + } else { + (void) strlcpy((char *)&ic->ic_data, name, + sizeof (ilbd_name_t)); + } + + rc = i_ilb_do_comm(h, ic, ic_sz, ic, &ic_sz); + if (rc != ILB_STATUS_OK) + goto out; + + if (ic->ic_cmd != ILBD_CMD_OK) + rc = *(ilb_status_t *)&ic->ic_data; + +out: + free(ic); + return (rc); +} + +ilb_status_t +ilb_destroy_rule(ilb_handle_t h, const char *name) +{ + return (i_ilb_rule_action(h, name, ILBD_DESTROY_RULE)); +} + +ilb_status_t +ilb_enable_rule(ilb_handle_t h, const char *name) +{ + return (i_ilb_rule_action(h, name, ILBD_ENABLE_RULE)); +} + +ilb_status_t +ilb_disable_rule(ilb_handle_t h, const char *name) +{ + return (i_ilb_rule_action(h, name, ILBD_DISABLE_RULE)); +} + +ilb_status_t +i_ilb_retrieve_rule_names(ilb_handle_t h, ilb_comm_t **rbuf, size_t *rbufsz) +{ + ilb_status_t rc; + ilb_comm_t ic, *tmp_rbuf; + + *rbufsz = ILBD_MSG_SIZE; + if ((tmp_rbuf = malloc(*rbufsz)) == NULL) + return (ILB_STATUS_ENOMEM); + + ic.ic_cmd = ILBD_RETRIEVE_RULE_NAMES; + + rc = i_ilb_do_comm(h, &ic, sizeof (ic), tmp_rbuf, rbufsz); + if (rc != ILB_STATUS_OK) + goto out; + + if (tmp_rbuf->ic_cmd == ILBD_CMD_OK) { + *rbuf = tmp_rbuf; + return (rc); + } + rc = *(ilb_status_t *)&tmp_rbuf->ic_data; +out: + free(tmp_rbuf); + *rbuf = NULL; + return (rc); +} + +static ilb_status_t +i_ilb_walk_one_rule(ilb_handle_t h, rule_walkerfunc_t f, const char *name, + void *arg) +{ + ilb_status_t rc = ILB_STATUS_OK; + ilb_rule_info_t *rl = NULL; + ilb_rule_data_t rd; + ilb_comm_t *ic, *rbuf; + size_t ic_sz, rbufsz; + + + if ((ic = i_ilb_alloc_req(ILBD_RETRIEVE_RULE, &ic_sz)) == NULL) + return (ILB_STATUS_ENOMEM); + rbufsz = sizeof (ilb_comm_t) + sizeof (ilb_rule_info_t); + if ((rbuf = malloc(rbufsz)) == NULL) { + free(ic); + return (ILB_STATUS_ENOMEM); + } + + (void) strlcpy((char *)&ic->ic_data, name, sizeof (ilbd_name_t)); + rc = i_ilb_do_comm(h, ic, ic_sz, rbuf, &rbufsz); + if (rc != ILB_STATUS_OK) + goto out; + if (rbuf->ic_cmd != ILBD_CMD_OK) { + rc = *(ilb_status_t *)&rbuf->ic_data; + goto out; + } + rl = (ilb_rule_info_t *)&rbuf->ic_data; + + /* + * Since the IP address representation in ilb_rule_data_t and + * ilb_rule_info_t is different, we need to convert between + * them. + */ + (void) strlcpy(rd.r_name, rl->rl_name, sizeof (rd.r_name)); + (void) strlcpy(rd.r_hcname, rl->rl_hcname, sizeof (rd.r_hcname)); + (void) strlcpy(rd.r_sgname, rl->rl_sgname, sizeof (rd.r_sgname)); + rd.r_flags = rl->rl_flags; + rd.r_proto = rl->rl_proto; + rd.r_minport = rl->rl_minport; + rd.r_maxport = rl->rl_maxport; + rd.r_algo = rl->rl_algo; + rd.r_topo = rl->rl_topo; + rd.r_conndrain = rl->rl_conndrain; + rd.r_nat_timeout = rl->rl_nat_timeout; + rd.r_sticky_timeout = rl->rl_sticky_timeout; + rd.r_hcport = rl->rl_hcport; + rd.r_hcpflag = rl->rl_hcpflag; + + IP_COPY_IMPL_2_CLI(&rl->rl_vip, &rd.r_vip); + IP_COPY_IMPL_2_CLI(&rl->rl_nat_src_start, &rd.r_nat_src_start); + IP_COPY_IMPL_2_CLI(&rl->rl_nat_src_end, &rd.r_nat_src_end); + IP_COPY_IMPL_2_CLI(&rl->rl_stickymask, &rd.r_stickymask); + + rc = f(h, &rd, arg); + +out: + free(ic); + free(rbuf); + return (rc); +} + +ilb_status_t +ilb_walk_rules(ilb_handle_t h, rule_walkerfunc_t f, const char *name, + void *arg) +{ + ilb_status_t rc; + ilbd_namelist_t *names; + ilb_comm_t *rbuf; + size_t rbufsz; + int i; + + if (h == NULL) + return (ILB_STATUS_EINVAL); + + if (name != NULL) + return (i_ilb_walk_one_rule(h, f, name, arg)); + + rc = i_ilb_retrieve_rule_names(h, &rbuf, &rbufsz); + if (rc != ILB_STATUS_OK) + return (rc); + + names = (ilbd_namelist_t *)&rbuf->ic_data; + for (i = 0; i < names->ilbl_count; i++) { + rc = i_ilb_walk_one_rule(h, f, names->ilbl_name[i], arg); + /* + * The rule may have been removed by another process since + * we retrieve all the rule names, just continue. + */ + if (rc == ILB_STATUS_ENOENT) { + rc = ILB_STATUS_OK; + continue; + } + if (rc != ILB_STATUS_OK) + break; + } + + free(rbuf); + return (rc); +} diff --git a/usr/src/lib/libilb/common/ilb_sg.c b/usr/src/lib/libilb/common/ilb_sg.c new file mode 100644 index 0000000000..62990f0f09 --- /dev/null +++ b/usr/src/lib/libilb/common/ilb_sg.c @@ -0,0 +1,464 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <stdlib.h> +#include <strings.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <stddef.h> +#include <libilb_impl.h> +#include <libilb.h> + +static ilb_status_t +i_ilb_addrem_sg(ilb_handle_t h, const char *sgname, ilbd_cmd_t cmd) +{ + ilb_status_t rc; + ilb_comm_t *ic; + size_t ic_sz; + + if (h == ILB_INVALID_HANDLE || sgname == NULL || *sgname == '\0') + return (ILB_STATUS_EINVAL); + + if (strlen(sgname) > ILB_SGNAME_SZ - 1) + return (ILB_STATUS_NAMETOOLONG); + + if ((ic = i_ilb_alloc_req(cmd, &ic_sz)) == NULL) + return (ILB_STATUS_ENOMEM); + + (void) strlcpy((char *)&ic->ic_data, sgname, sizeof (ilbd_name_t)); + + rc = i_ilb_do_comm(h, ic, ic_sz, ic, &ic_sz); + if (rc != ILB_STATUS_OK) + goto out; + + if (ic->ic_cmd != ILBD_CMD_OK) + rc = *(ilb_status_t *)&ic->ic_data; +out: + free(ic); + return (rc); +} + +ilb_status_t +ilb_destroy_servergroup(ilb_handle_t h, const char *sgname) +{ + return (i_ilb_addrem_sg(h, sgname, ILBD_DESTROY_SERVERGROUP)); +} + +ilb_status_t +ilb_create_servergroup(ilb_handle_t h, const char *sgname) +{ + return (i_ilb_addrem_sg(h, sgname, ILBD_CREATE_SERVERGROUP)); +} + +static ilb_status_t +i_ilb_addrem_server_to_group(ilb_handle_t h, const char *sgname, + ilb_server_data_t *srv, ilbd_cmd_t cmd) +{ + ilb_status_t rc = ILB_STATUS_OK; + ilb_sg_info_t *sg; + ilb_sg_srv_t *sgs; + in_port_t h_maxport, h_minport; + ilb_comm_t *ic; + size_t ic_sz; + + if (h == ILB_INVALID_HANDLE || sgname == NULL || + *sgname == '\0' || srv == NULL) + return (ILB_STATUS_EINVAL); + + if (strlen(sgname) > ILB_SGNAME_SZ - 1) + return (ILB_STATUS_NAMETOOLONG); + + /* now all the checks have passed, we can pass on the goods */ + if ((ic = i_ilb_alloc_req(cmd, &ic_sz)) == NULL) + return (ILB_STATUS_ENOMEM); + + sg = (ilb_sg_info_t *)&ic->ic_data; + sg->sg_srvcount = 1; + (void) strlcpy(sg->sg_name, sgname, sizeof (sg->sg_name)); + + sgs = &sg->sg_servers[0]; + + IP_COPY_CLI_2_IMPL(&srv->sd_addr, &sgs->sgs_addr); + h_minport = ntohs(srv->sd_minport); + h_maxport = ntohs(srv->sd_maxport); + sgs->sgs_minport = srv->sd_minport; + if (h_minport != 0 && h_maxport < h_minport) + sgs->sgs_maxport = srv->sd_minport; + else + sgs->sgs_maxport = srv->sd_maxport; + + sgs->sgs_flags = srv->sd_flags; + if (srv->sd_srvID[0] == ILB_SRVID_PREFIX) + (void) strlcpy(sgs->sgs_srvID, srv->sd_srvID, + sizeof (sgs->sgs_srvID)); + + rc = i_ilb_do_comm(h, ic, ic_sz, ic, &ic_sz); + if (rc != ILB_STATUS_OK) + goto out; + + if (ic->ic_cmd != ILBD_CMD_OK) + rc = *(ilb_status_t *)&ic->ic_data; + +out: + free(ic); + return (rc); +} + +ilb_status_t +ilb_add_server_to_group(ilb_handle_t h, const char *sgname, + ilb_server_data_t *srv) +{ + return (i_ilb_addrem_server_to_group(h, sgname, srv, + ILBD_ADD_SERVER_TO_GROUP)); +} + +ilb_status_t +ilb_rem_server_from_group(ilb_handle_t h, const char *sgname, + ilb_server_data_t *srv) +{ + return (i_ilb_addrem_server_to_group(h, sgname, srv, + ILBD_REM_SERVER_FROM_GROUP)); +} + +static ilb_status_t +i_ilb_retrieve_sg_names(ilb_handle_t h, ilb_comm_t **rbuf, size_t *rbufsz) +{ + ilb_status_t rc; + ilb_comm_t ic, *tmp_rbuf; + + *rbufsz = ILBD_MSG_SIZE; + if ((tmp_rbuf = malloc(*rbufsz)) == NULL) + return (ILB_STATUS_ENOMEM); + + ic.ic_cmd = ILBD_RETRIEVE_SG_NAMES; + rc = i_ilb_do_comm(h, &ic, sizeof (ic), tmp_rbuf, rbufsz); + if (rc != ILB_STATUS_OK) + goto out; + + if (tmp_rbuf->ic_cmd == ILBD_CMD_OK) { + *rbuf = tmp_rbuf; + return (rc); + } + rc = *(ilb_status_t *)&tmp_rbuf->ic_data; +out: + free(tmp_rbuf); + *rbuf = NULL; + return (rc); +} + +static ilb_status_t +i_ilb_retrieve_sg_hosts(ilb_handle_t h, const char *sgname, ilb_comm_t **rbuf, + size_t *rbufsz) +{ + ilb_status_t rc; + ilb_comm_t *ic, *tmp_rbuf; + size_t ic_sz; + + if ((ic = i_ilb_alloc_req(ILBD_RETRIEVE_SG_HOSTS, &ic_sz)) == NULL) + return (ILB_STATUS_ENOMEM); + *rbufsz = ILBD_MSG_SIZE; + if ((tmp_rbuf = malloc(*rbufsz)) == NULL) { + free(ic); + *rbuf = NULL; + return (ILB_STATUS_ENOMEM); + } + + (void) strlcpy((char *)&ic->ic_data, sgname, sizeof (ilbd_name_t)); + rc = i_ilb_do_comm(h, ic, ic_sz, tmp_rbuf, rbufsz); + if (rc != ILB_STATUS_OK) + goto out; + + if (tmp_rbuf->ic_cmd == ILBD_CMD_OK) { + *rbuf = tmp_rbuf; + free(ic); + return (rc); + } + rc = *(ilb_status_t *)&tmp_rbuf->ic_data; +out: + free(ic); + free(tmp_rbuf); + *rbuf = NULL; + return (rc); +} + +typedef enum { + walk_servers, + walk_sg +} sgwalk_t; + +/* + * "walks" one sg (retrieves data) and depending on "walktype" argument + * call servergroup function once per sg or server function once + * for every server. in both cases, the argument "f" is cast to + * be the proper function pointer type + */ +static ilb_status_t +i_ilb_walk_one_sg(ilb_handle_t h, void *f, const char *sgname, void *arg, + sgwalk_t walktype) +{ + ilb_status_t rc = ILB_STATUS_OK; + ilb_sg_info_t *sg_info; + ilb_sg_srv_t *srv; + int i; + ilb_comm_t *rbuf; + size_t rbufsz; + + rc = i_ilb_retrieve_sg_hosts(h, sgname, &rbuf, &rbufsz); + if (rc != ILB_STATUS_OK) + return (rc); + sg_info = (ilb_sg_info_t *)&rbuf->ic_data; + + if (walktype == walk_sg) { + sg_walkerfunc_t sg_func = (sg_walkerfunc_t)f; + ilb_sg_data_t sgd; + + (void) strlcpy(sgd.sgd_name, sg_info->sg_name, + sizeof (sgd.sgd_name)); + sgd.sgd_srvcount = sg_info->sg_srvcount; + sgd.sgd_flags = sg_info->sg_flags; + rc = sg_func(h, &sgd, arg); + goto out; + } + + for (i = 0; i < sg_info->sg_srvcount; i++) { + srv_walkerfunc_t srv_func = (srv_walkerfunc_t)f; + ilb_server_data_t sd; + + srv = &sg_info->sg_servers[i]; + IP_COPY_IMPL_2_CLI(&srv->sgs_addr, &sd.sd_addr); + sd.sd_minport = srv->sgs_minport; + sd.sd_maxport = srv->sgs_maxport; + sd.sd_flags = srv->sgs_flags; + (void) strlcpy(sd.sd_srvID, srv->sgs_srvID, + sizeof (sd.sd_srvID)); + + rc = srv_func(h, &sd, sg_info->sg_name, arg); + if (rc != ILB_STATUS_OK) + break; + } + +out: + free(rbuf); + return (rc); +} + +/* + * wrapper function for i_walk_one_sg; if necessary, gets list of + * SG names and calles i_walk_one_sg with every name + */ +static ilb_status_t +i_walk_sgs(ilb_handle_t h, void *f, const char *sgname, + void *arg, sgwalk_t walktype) +{ + ilb_status_t rc; + ilbd_namelist_t *sgl; + ilb_comm_t *rbuf; + size_t rbufsz; + int i; + + if (sgname != NULL) { + rc = i_ilb_walk_one_sg(h, f, sgname, arg, walktype); + return (rc); + } + + rc = i_ilb_retrieve_sg_names(h, &rbuf, &rbufsz); + if (rc != ILB_STATUS_OK) + return (rc); + sgl = (ilbd_namelist_t *)&rbuf->ic_data; + + for (i = 0; i < sgl->ilbl_count; i++) { + rc = i_ilb_walk_one_sg(h, f, sgl->ilbl_name[i], arg, walktype); + /* + * The server group may have been removed by another + * process, just continue. + */ + if (rc == ILB_STATUS_SGUNAVAIL) { + rc = ILB_STATUS_OK; + continue; + } + if (rc != ILB_STATUS_OK) + break; + } + free(rbuf); + return (rc); +} + +ilb_status_t +ilb_walk_servergroups(ilb_handle_t h, sg_walkerfunc_t f, const char *sgname, + void *arg) +{ + return (i_walk_sgs(h, (void *)f, sgname, arg, walk_sg)); +} + +ilb_status_t +ilb_walk_servers(ilb_handle_t h, srv_walkerfunc_t f, const char *sgname, + void *arg) +{ + return (i_walk_sgs(h, (void *)f, sgname, arg, walk_servers)); +} + +static ilb_status_t +ilb_Xable_server(ilb_handle_t h, ilb_server_data_t *srv, void *reserved, + ilbd_cmd_t cmd) +{ + ilb_status_t rc; + ilb_sg_info_t *sg_info; + ilb_sg_srv_t *sgs; + in_port_t h_maxport, h_minport; + ilb_comm_t *ic; + size_t ic_sz; + + if (h == NULL) + return (ILB_STATUS_EINVAL); + + /* + * In this implementation, this needs to be NULL, so + * there's no ugly surprises with old apps once we attach + * meaning to this parameter. + */ + if (reserved != NULL) + return (ILB_STATUS_EINVAL); + + /* now all the checks have passed, we can pass on the goods */ + if ((ic = i_ilb_alloc_req(cmd, &ic_sz)) == NULL) + return (ILB_STATUS_ENOMEM); + + sg_info = (ilb_sg_info_t *)&ic->ic_data; + sg_info->sg_srvcount = 1; + + sgs = &sg_info->sg_servers[0]; + + /* make sure min_port <= max_port; comparison in host byte order! */ + h_maxport = ntohs(srv->sd_maxport); + h_minport = ntohs(srv->sd_minport); + if (h_maxport != 0 && h_maxport < h_minport) + sgs->sgs_maxport = sgs->sgs_minport; + else + sgs->sgs_maxport = srv->sd_maxport; + sgs->sgs_minport = srv->sd_minport; + + sgs->sgs_flags = srv->sd_flags; + (void) strlcpy(sgs->sgs_srvID, srv->sd_srvID, sizeof (sgs->sgs_srvID)); + IP_COPY_CLI_2_IMPL(&srv->sd_addr, &sgs->sgs_addr); + + rc = i_ilb_do_comm(h, ic, ic_sz, ic, &ic_sz); + if (rc != ILB_STATUS_OK) + goto out; + + if (ic->ic_cmd != ILBD_CMD_OK) + rc = *(ilb_status_t *)&ic->ic_data; +out: + free(ic); + return (rc); +} + +ilb_status_t +ilb_enable_server(ilb_handle_t h, ilb_server_data_t *srv, void *reserved) +{ + return (ilb_Xable_server(h, srv, reserved, ILBD_ENABLE_SERVER)); +} + +ilb_status_t +ilb_disable_server(ilb_handle_t h, ilb_server_data_t *srv, void *reserved) +{ + return (ilb_Xable_server(h, srv, reserved, ILBD_DISABLE_SERVER)); +} + +static ilb_status_t +i_ilb_fillin_srvdata(ilb_handle_t h, ilb_server_data_t *srv, const char *sgname, + ilbd_cmd_t cmd) +{ + ilb_status_t rc; + ilb_sg_info_t *sg_info; + ilb_sg_srv_t *sgs; + ilb_comm_t *ic; + size_t ic_sz; + ilb_comm_t *rbuf; + size_t rbufsz; + + if (h == ILB_INVALID_HANDLE || sgname == NULL || + *sgname == '\0' || srv == NULL) + return (ILB_STATUS_EINVAL); + + if (cmd == ILBD_SRV_ID2ADDR && srv->sd_srvID[0] == '\0') + return (ILB_STATUS_EINVAL); + if (cmd == ILBD_SRV_ADDR2ID && !IS_AF_VALID(srv->sd_addr.ia_af)) + return (ILB_STATUS_EINVAL); + + if ((ic = i_ilb_alloc_req(cmd, &ic_sz)) == NULL) + return (ILB_STATUS_ENOMEM); + rbufsz = sizeof (ilb_comm_t) + sizeof (ilb_sg_srv_t); + if ((rbuf = malloc(rbufsz)) == NULL) { + free(ic); + return (ILB_STATUS_ENOMEM); + } + + sg_info = (ilb_sg_info_t *)&ic->ic_data; + sg_info->sg_srvcount = 1; + (void) strlcpy(sg_info->sg_name, sgname, sizeof (sg_info->sg_name)); + + sgs = &sg_info->sg_servers[0]; + + if (cmd == ILBD_SRV_ID2ADDR) + (void) strlcpy(sgs->sgs_srvID, srv->sd_srvID, + sizeof (sgs->sgs_srvID)); + else + IP_COPY_CLI_2_IMPL(&srv->sd_addr, &sgs->sgs_addr); + + rc = i_ilb_do_comm(h, ic, ic_sz, rbuf, &rbufsz); + if (rc != ILB_STATUS_OK) + goto out; + + if (rbuf->ic_cmd == ILBD_CMD_OK) { + sgs = (ilb_sg_srv_t *)&rbuf->ic_data; + if (cmd == ILBD_SRV_ID2ADDR) { + IP_COPY_IMPL_2_CLI(&sgs->sgs_addr, &srv->sd_addr); + } else { + (void) strlcpy(srv->sd_srvID, sgs->sgs_srvID, + sizeof (sgs->sgs_srvID)); + } + return (rc); + } + + rc = *(ilb_status_t *)&rbuf->ic_data; +out: + free(ic); + return (rc); +} + +ilb_status_t +ilb_srvID_to_address(ilb_handle_t h, ilb_server_data_t *srv, const char *sgname) +{ + return (i_ilb_fillin_srvdata(h, srv, sgname, ILBD_SRV_ID2ADDR)); + +} + +ilb_status_t +ilb_address_to_srvID(ilb_handle_t h, ilb_server_data_t *srv, const char *sgname) +{ + return (i_ilb_fillin_srvdata(h, srv, sgname, ILBD_SRV_ADDR2ID)); +} diff --git a/usr/src/lib/libilb/common/ilb_subr.c b/usr/src/lib/libilb/common/ilb_subr.c new file mode 100644 index 0000000000..25f990d239 --- /dev/null +++ b/usr/src/lib/libilb/common/ilb_subr.c @@ -0,0 +1,424 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <stdlib.h> +#include <strings.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <inttypes.h> +#include <assert.h> +#include <libilb.h> +#include <libilb_impl.h> +#include <locale.h> + +typedef enum { + internal, + external +} ip_addr_type_t; + +static int +sign64(int64_t n) +{ + if (n >= 0) + return (1); + return (-1); +} + +static int +sign32(int32_t n) +{ + if (n >= 0) + return (1); + return (-1); +} + +/* + * since the difference between two uint64_ts can be greater than + * what a int64_t can hold, we need to cap the result at +/- INT64_MAX + * return: < 0: x < y, 0: x == y, > 0: x > y + */ +static int64_t +signed_diff64(uint64_t x, uint64_t y) +{ + uint64_t ud; + int s = -1; + + if (x == y) + return (0); + + /* make sure we have x < y */ + if (x > y) { + uint64_t t; + + s = 1; + t = x; x = y; y = t; + } + + ud = y - x; + if (ud > INT64_MAX) + return (INT64_MAX * s); + + return ((int64_t)ud * s); +} + +static uint64_t +unsigned_diff64(uint64_t x, uint64_t y, int *sgn) +{ + int s = -1; + + if (x == y) + return (0); + + /* make sure we have x < y */ + if (x > y) { + uint64_t t; + + s = 1; + t = x; x = y; y = t; + } + *sgn = s; + return (y - x); +} + +/* + * compare ip addresses ip1 and ip2 (as unsigned integers) + * return: -1: ip1 < ip2, 0: ip1 == ip2, 1: ip1 > ip2 + * input addresses are assumed to be in network byte order + * diff contains the difference between the two with the same + * sign as the comparison result; + * NOTE: since ipv6 address (difference)s can be more than a 64bit + * value can express, the difference is capped at +/- INT64_MAX + */ +static int +i_cmp_addr_impl(void *ip1, void *ip2, ip_addr_type_t atype, int64_t *diff) +{ + struct in6_addr *a6_1, *a6_2; + uint32_t i1, i2; + uint32_t l1, l2; + int af, sgn; + int64_t d; + + if (atype == internal) { + af = GET_AF((struct in6_addr *)ip1); + if (af == AF_INET) { + IN6_V4MAPPED_TO_IPADDR((struct in6_addr *)ip1, i1); + IN6_V4MAPPED_TO_IPADDR((struct in6_addr *)ip2, i2); + + l1 = ntohl(i1); + l2 = ntohl(i2); + } else { + a6_1 = (struct in6_addr *)ip1; + a6_2 = (struct in6_addr *)ip2; + } + } else { + af = ((ilb_ip_addr_t *)ip1)->ia_af; + if (af == AF_INET) { + struct in_addr *a1, *a2; + + a1 = &((ilb_ip_addr_t *)ip1)->ia_v4; + a2 = &((ilb_ip_addr_t *)ip2)->ia_v4; + + l1 = ntohl((uint32_t)a1->s_addr); + l2 = ntohl((uint32_t)a2->s_addr); + } else { + a6_1 = &((ilb_ip_addr_t *)ip1)->ia_v6; + a6_2 = &((ilb_ip_addr_t *)ip2)->ia_v6; + } + } + + if (af == AF_INET) { + d = l1 - l2; + sgn = sign32((int32_t)d); + } else { + /* + * we're facing the dilemma that 128-bit ipv6 addresses are + * larger than the largest integer type - int64_t. + * we handle this thus: + * 1. seperate high-order and low-order bits (64 each) into + * *h and *l variables (unsigned). + * 2. calculate difference for *h and *l: + * low: unsigned + * high: signed + * 3. if high-order diff == 0, we can take low-order + * diff, if necessary cap it, convert it to signed + * and be done + * 4. if high-order and low-order signs are the same, the low- + * order bits won't significantly impact high-order + * difference, so we know that we've overflowed an int64_t; + * if high-order diff is > 1, any low-order difference won't + * change the overflow. + * 5. (dh == 1 and l_sign <= 0) or (dh == -1 and l_sign > 0), + * ie, dh == +/- 2^64 + * 5a. if dl < INT64_MAX, the result is still > INT64_MAX, so + * we cap again. + * 5b. dl >= INT64_MAX + * we need to express (for dh == 1): + * (2^64) + x (where x < 0). + * Since the largest number we have is + * 2^64 - 1 == UINT64_MAX + * we use + * (2^64 - 1) + x + 1 + * + * for dh == -1, all we have is + * -(2^63 - 1), so to express + * -(2^64) + x, + * we first do (dl - (2^63-1)) (which is then also < 2^63), + * si we can then add that to -(2^63 - 1); + */ + uint64_t i1h, i1l; + uint64_t i2h, i2l; + uint64_t dl; + int64_t dh; + int l_sign; + + /* 1. */ + i1h = INV6_N2H_MSB64(a6_1); + i1l = INV6_N2H_LSB64(a6_1); + i2h = INV6_N2H_MSB64(a6_2); + i2l = INV6_N2H_LSB64(a6_2); + + /* 2. */ + dh = signed_diff64(i1h, i2h); + dl = unsigned_diff64(i1l, i2l, &l_sign); + + /* 3. */ + if (dh == 0) { + if (dl > INT64_MAX) + dl = INT64_MAX; + + d = dl * l_sign; + /* 4, */ + } else if (l_sign == sign64(dh) || abs(dh) > 1) { + if (dh > 0) + d = INT64_MAX; + else + d = -INT64_MAX; + /* 5. */ + } else { + if (dl < INT64_MAX) { + d = INT64_MAX; + } else { + if (dh == 1) + d = UINT64_MAX - dl + 1; + else + d = -INT64_MAX - (dl - INT64_MAX) - 1; + } + } + sgn = sign64(d); + } + if (diff != NULL) + *diff = d; + if (d == 0) + return (0); + return (sgn); +} + +int +ilb_cmp_in6_addr(struct in6_addr *ip1, struct in6_addr *ip2, int64_t *diff) +{ + int res; + + res = i_cmp_addr_impl(ip1, ip2, internal, diff); + return (res); +} + +int +ilb_cmp_ipaddr(ilb_ip_addr_t *ip1, ilb_ip_addr_t *ip2, int64_t *diff) +{ + int res; + + res = i_cmp_addr_impl(ip1, ip2, external, diff); + return (res); +} + +/* + * Error strings for error values returned by libilb functions + */ +const char * +ilb_errstr(ilb_status_t rc) +{ + switch (rc) { + case ILB_STATUS_OK: + return (dgettext(TEXT_DOMAIN, "no error")); + case ILB_STATUS_INTERNAL: + return (dgettext(TEXT_DOMAIN, "error internal to the library")); + case ILB_STATUS_EINVAL: + return (dgettext(TEXT_DOMAIN, "invalid argument(s) - see" + " man page")); + case ILB_STATUS_ENOMEM: + return (dgettext(TEXT_DOMAIN, "not enough memory" + " for operation")); + case ILB_STATUS_ENOENT: + return (dgettext(TEXT_DOMAIN, "no such/no more element(s)")); + case ILB_STATUS_SOCKET: + return (dgettext(TEXT_DOMAIN, "socket() failed")); + case ILB_STATUS_READ: + return (dgettext(TEXT_DOMAIN, "read() failed")); + case ILB_STATUS_WRITE: + return (dgettext(TEXT_DOMAIN, "fflush() or send() failed")); + case ILB_STATUS_TIMER: + return (dgettext(TEXT_DOMAIN, "health check timer" + " create/setup error")); + case ILB_STATUS_INUSE: + return (dgettext(TEXT_DOMAIN, "object is in use," + " cannot destroy")); + case ILB_STATUS_EEXIST: + return (dgettext(TEXT_DOMAIN, "object already exists")); + case ILB_STATUS_PERMIT: + return (dgettext(TEXT_DOMAIN, "no scf permit")); + case ILB_STATUS_CALLBACK: + return (dgettext(TEXT_DOMAIN, "scf callback error")); + case ILB_STATUS_INPROGRESS: + return (dgettext(TEXT_DOMAIN, "operation is progress")); + case ILB_STATUS_SEND: + return (dgettext(TEXT_DOMAIN, "send() failed")); + case ILB_STATUS_ENOHCINFO: + return (dgettext(TEXT_DOMAIN, "missing healthcheck info")); + case ILB_STATUS_INVAL_HCTESTTYPE: + return (dgettext(TEXT_DOMAIN, "invalid health check" + " test type")); + case ILB_STATUS_INVAL_CMD: + return (dgettext(TEXT_DOMAIN, "invalid command")); + case ILB_STATUS_DUP_RULE: + return (dgettext(TEXT_DOMAIN, "specified rule name already" + " exists")); + case ILB_STATUS_ENORULE: + return (dgettext(TEXT_DOMAIN, "specified rule does not exist")); + case ILB_STATUS_MISMATCHSG: + return (dgettext(TEXT_DOMAIN, "address family mismatch with" + " servergroup")); + case ILB_STATUS_MISMATCHH: + return (dgettext(TEXT_DOMAIN, "address family mismatch" + " with previous hosts in servergroup or with rule")); + case ILB_STATUS_SGUNAVAIL: + return (dgettext(TEXT_DOMAIN, "cannot find specified" + " server group")); + case ILB_STATUS_SGINUSE: + return (dgettext(TEXT_DOMAIN, "cannot remove server" + " group - its in use with other active rules")); + case ILB_STATUS_SGEXISTS: + return (dgettext(TEXT_DOMAIN, "servergroup already exists")); + case ILB_STATUS_SGFULL: + return (dgettext(TEXT_DOMAIN, "servergroup is full - cannot" + " add any more servers to this servergroup")); + case ILB_STATUS_SGEMPTY: + return (dgettext(TEXT_DOMAIN, "servergroup does not contain" + " any servers")); + case ILB_STATUS_NAMETOOLONG: + return (dgettext(TEXT_DOMAIN, "servergroup name can" + " only contain a maximum of 14 characters")); + case ILB_STATUS_CFGAUTH: + return (dgettext(TEXT_DOMAIN, "user is not authorized to" + " execute command")); + case ILB_STATUS_CFGUPDATE: + return (dgettext(TEXT_DOMAIN, "a failure occurred while trying" + " to update persistent config. Panic?")); + case ILB_STATUS_BADSG: + return (dgettext(TEXT_DOMAIN, "the rule's port range" + " does not match that of the servers' in associated" + " servergroup")); + case ILB_STATUS_INVAL_SRVR: + return (dgettext(TEXT_DOMAIN, "server cannot be added to the" + " servergroup, as the servergroup is associated to rule(s)" + " with port/port range that is incompatible" + "with the server's port")); + case ILB_STATUS_INVAL_ENBSRVR: + return (dgettext(TEXT_DOMAIN, "server cannot be enabled" + " because it's not associated with any rule")); + case ILB_STATUS_BADPORT: + return (dgettext(TEXT_DOMAIN, "the rule's port value does" + " not match that of the servers' in" + " associated servergroup")); + case ILB_STATUS_SRVUNAVAIL: + return (dgettext(TEXT_DOMAIN, "cannot find specified server")); + case ILB_STATUS_RULE_NO_HC: + return (dgettext(TEXT_DOMAIN, "rule does not have health " + "check enabled")); + case ILB_STATUS_RULE_HC_MISMATCH: + return (dgettext(TEXT_DOMAIN, "protocol used in rule and " + "health check does not match")); + case ILB_STATUS_HANDLE_CLOSING: + return (dgettext(TEXT_DOMAIN, "handle is being closed")); + + default: + return (dgettext(TEXT_DOMAIN, "unknown error")); + } +} + +/* Allocate space for a specified request to be sent to ilbd. */ +ilb_comm_t * +i_ilb_alloc_req(ilbd_cmd_t cmd, size_t *ic_sz) +{ + ilb_comm_t *ic; + size_t sz; + + sz = sizeof (ilb_comm_t); + + switch (cmd) { + case ILBD_CREATE_RULE: + sz += sizeof (ilb_rule_info_t); + break; + + case ILBD_RETRIEVE_RULE: + case ILBD_DESTROY_RULE: + case ILBD_ENABLE_RULE: + case ILBD_DISABLE_RULE: + case ILBD_RETRIEVE_SG_HOSTS: + case ILBD_DESTROY_SERVERGROUP: + case ILBD_CREATE_SERVERGROUP: + case ILBD_DESTROY_HC: + case ILBD_GET_HC_INFO: + case ILBD_GET_HC_SRVS: + sz += sizeof (ilbd_name_t); + break; + + case ILBD_ENABLE_SERVER: + case ILBD_DISABLE_SERVER: + case ILBD_ADD_SERVER_TO_GROUP: + case ILBD_REM_SERVER_FROM_GROUP: + case ILBD_SRV_ADDR2ID: + case ILBD_SRV_ID2ADDR: + sz += sizeof (ilb_sg_info_t) + sizeof (ilb_sg_srv_t); + break; + + case ILBD_CREATE_HC: + sz += sizeof (ilb_hc_info_t); + break; + + default: + /* Should not reach here. */ + assert(0); + break; + } + + if ((ic = calloc(1, sz)) == NULL) + return (NULL); + + *ic_sz = sz; + ic->ic_cmd = cmd; + ic->ic_flags = 0; + return (ic); +} diff --git a/usr/src/lib/libilb/common/libilb.h b/usr/src/lib/libilb/common/libilb.h new file mode 100644 index 0000000000..1fc4d1954d --- /dev/null +++ b/usr/src/lib/libilb/common/libilb.h @@ -0,0 +1,398 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBILB_H +#define _LIBILB_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/types.h> +#include <netinet/in.h> +#include <net/if.h> + +/* make sure these values stay in sync with definitions in ilb.h! */ +#define ILB_FLAGS_RULE_ENABLED 0x01 +#define ILB_FLAGS_RULE_STICKY 0x02 +#define ILB_FLAGS_RULE_ALLRULES 0x04 +#define ILB_FLAGS_RESERVED 0x08 /* in use by kernel, don't overlay */ + +/* + * information whether we're interested in names or numerical information + */ +#define ILB_FLAGS_SRV_HOSTNAME 0x01 /* a servers hostname was given */ +#define ILB_FLAGS_SRV_PORTNAME 0x02 /* a port was spec'd by name */ + +/* + * server status information + */ +#define ILB_FLAGS_SRV_ENABLED 0x10 + +/* + * macros to determine, and for some cases, set status of server + */ +#define ILB_IS_SRV_ENABLED(f) \ + ((f & ILB_FLAGS_SRV_ENABLED) == ILB_FLAGS_SRV_ENABLED) +#define ILB_IS_SRV_DISABLED(f) ((f & ILB_FLAGS_SRV_ENABLED) == 0) + +#define ILB_SET_ENABLED(f) (f |= ILB_FLAGS_SRV_ENABLED) +#define ILB_SET_DISABLED(f) (f &= ~ILB_FLAGS_SRV_ENABLED) + +#define MAX_IP_SPREAD 0xff /* largest ip addr. range */ + +#define ILB_HC_STR_UDP "udp" +#define ILB_HC_STR_TCP "tcp" +#define ILB_HC_STR_PING "ping" + +#define ILB_NAMESZ 20 /* keep in sync with kernel definition */ +#define ILB_SGNAME_SZ (ILB_NAMESZ - 5) /* 3 numeric digits, "." and "_" */ + +#define ILB_SRVID_PREFIX '_' /* a valid serverID starts with this */ + +/* producers of these statuses are libilb and ilbd functions */ +typedef enum { + ILB_STATUS_OK = 0, + ILB_STATUS_INTERNAL, /* an error internal to the library */ + ILB_STATUS_EINVAL, /* invalid argument(s) */ + ILB_STATUS_ENOMEM, /* not enough memory for operation */ + ILB_STATUS_ENOENT, /* no such/no more element(s) */ + ILB_STATUS_SOCKET, /* socket related failure */ + ILB_STATUS_READ, /* read related failure */ + ILB_STATUS_WRITE, /* write related failure */ + ILB_STATUS_TIMER, /* healthcheck timer error */ + ILB_STATUS_INUSE, /* item in use, cannot delete */ + ILB_STATUS_EEXIST, /* scf item exist */ + ILB_STATUS_PERMIT, /* no scf permit */ + ILB_STATUS_CALLBACK, /* scf callback error */ + ILB_STATUS_EWOULDBLOCK, /* operation is blocked - no error string */ + ILB_STATUS_INPROGRESS, /* operation already in progress */ + ILB_STATUS_SEND, /* send related failure */ + ILB_STATUS_GENERIC, /* generic failure - no error string */ + ILB_STATUS_ENOHCINFO, /* missing healthcheck info */ + ILB_STATUS_INVAL_HCTESTTYPE, /* invalid health check */ + ILB_STATUS_INVAL_CMD, /* unknown command */ + ILB_STATUS_DUP_RULE, /* rule name exists */ + ILB_STATUS_ENORULE, /* rule does not exist */ + ILB_STATUS_MISMATCHSG, /* addr family mismatch with sgroup */ + ILB_STATUS_MISMATCHH, /* addr family mismatch with hosts/rule */ + ILB_STATUS_SGUNAVAIL, /* cannot find sgroup in sggroup list */ + ILB_STATUS_SGINUSE, /* server is un use, cannot remove */ + ILB_STATUS_SGEXISTS, /* server exists */ + ILB_STATUS_SGFULL, /* cannot add any more servers */ + ILB_STATUS_SGEMPTY, /* sgroup is empty */ + ILB_STATUS_NAMETOOLONG, /* a name is longer than allowed */ + ILB_STATUS_CFGAUTH, /* config authoriz denied -no error string */ + ILB_STATUS_CFGUPDATE, /* failed to update config! */ + ILB_STATUS_BADSG, /* rules port range size does not match */ + /* that of the servers */ + ILB_STATUS_INVAL_SRVR, /* server port is incompatible with */ + /* rule port */ + ILB_STATUS_INVAL_ENBSRVR, /* server cannot be enabled since it's */ + /* not being used by a rule */ + ILB_STATUS_BADPORT, /* rules port value does not match */ + /* server's */ + ILB_STATUS_SRVUNAVAIL, /* cannot find specified server */ + ILB_STATUS_RULE_NO_HC, /* rule does not have hc info */ + ILB_STATUS_RULE_HC_MISMATCH, /* rule and hc object mismatch */ + ILB_STATUS_HANDLE_CLOSING /* library handle is being closed */ +} ilb_status_t; + +typedef struct { + int32_t ia_af; /* AF_INET or AF_INET6 */ + union { + struct in_addr v4; /* network byte order */ + struct in6_addr v6; /* network byte order */ + } _au; +#define ia_v4 _au.v4 +#define ia_v6 _au.v6 +} ilb_ip_addr_t; + +/* Supported load balancing algorithm type */ +typedef enum { + ILB_ALG_ROUNDROBIN = 1, + ILB_ALG_HASH_IP, + ILB_ALG_HASH_IP_SPORT, + ILB_ALG_HASH_IP_VIP +} ilb_algo_t; + +/* Supported load balancing method */ +typedef enum { + ILB_TOPO_DSR = 1, + ILB_TOPO_NAT, + ILB_TOPO_HALF_NAT +} ilb_topo_t; + +#define ILB_INVALID_HANDLE ((void *) NULL) + +/* + * note: pointer to a non-existant struct + */ +typedef struct ilb_handle *ilb_handle_t; + +/* + * Health check related information + */ + +/* HC state of a server */ +typedef enum { + ILB_HCS_UNINIT = -1, /* Uninitialized */ + ILB_HCS_UNREACH = 0, /* Unreachable, ping fails */ + ILB_HCS_ALIVE, /* Probe succeeds */ + ILB_HCS_DEAD, /* Probe fails */ + ILB_HCS_DISABLED /* Server is disabled */ +} ilb_hc_srv_status_t; + +/* + * Struct representing a server in a hc object + * + * hcs_rule_name: rule using this server + * hcs_ID: server ID + * hcs_hc_name: hc object this server is associated with + * hcs_IP: IP address of the server + * hcs_fail_cnt: number of fail hc probe + * hcs_status: hc status of the server + * hcs_rtt: (in microsec) smoothed average RTT to the server + * hcs_lasttime: last time hc test was done (as returned by time(2)) + * hcs_nexttime: next time hc test will be done (as returned by (time(2)) + */ +typedef struct { + char hcs_rule_name[ILB_NAMESZ]; + char hcs_ID[ILB_NAMESZ]; + char hcs_hc_name[ILB_NAMESZ]; + struct in6_addr hcs_IP; + uint32_t hcs_fail_cnt; + ilb_hc_srv_status_t hcs_status; + uint32_t hcs_rtt; + time_t hcs_lasttime; + time_t hcs_nexttime; +} ilb_hc_srv_t; + +/* Probe flags to be used in r_hcpflag in struct rule data. */ +typedef enum { + ILB_HCI_PROBE_ANY = 0, /* Probe any port in the server port range */ + ILB_HCI_PROBE_FIX /* Probe a fixed port */ +} ilb_hcp_flags_t; + +/* + * Struct representing a hc object + * + * hci_name: name of the hc object + * hci_test: hc test to be done, TCP, UDP, or user supplied path name + * hci_timeout: (in sec) test time out + * hci_interval: (in sec) test execution interval + * hci_def_ping: true if default ping is done; false otherwise + */ +typedef struct { + char hci_name[ILB_NAMESZ]; + char hci_test[MAXPATHLEN]; + int32_t hci_timeout; + int32_t hci_count; + int32_t hci_interval; + boolean_t hci_def_ping; +} ilb_hc_info_t; + +typedef struct rule_data { + char r_name[ILB_NAMESZ]; /* name of this rule */ + int32_t r_flags; /* opt: ILB_FLAGS_RULE_ENABLED etc. */ + ilb_ip_addr_t r_vip; /* vip, required for rule creation */ + uint16_t r_proto; /* protocol (tcp, udp) */ + in_port_t r_minport; /* port this rule refers to */ + in_port_t r_maxport; /* if != 0, defines port range */ + ilb_algo_t r_algo; /* round-robin, hash-ip, etc. */ + ilb_topo_t r_topo; /* dsr, NAT, etc */ + ilb_ip_addr_t r_nat_src_start; /* required for NAT */ + ilb_ip_addr_t r_nat_src_end; /* required for NAT */ + ilb_ip_addr_t r_stickymask; /* netmask for persistence */ + uint32_t r_conndrain; /* opt: time for conn. draining (s) */ + uint32_t r_nat_timeout; /* opt: timeout for nat connections */ + uint32_t r_sticky_timeout; /* opt: timeout for persistence */ + ilb_hcp_flags_t r_hcpflag; /* HC port flag */ + in_port_t r_hcport; /* opt with HC */ + char r_sgname[ILB_SGNAME_SZ]; /* this rule's server grp. */ + char r_hcname[ILB_NAMESZ]; /* HC name: optional */ +} ilb_rule_data_t; + +/* not all fields are valid in all calls where this is used */ +typedef struct server_data { + ilb_ip_addr_t sd_addr; /* a server's ip address */ + in_port_t sd_minport; /* port information */ + in_port_t sd_maxport; /* ... if != 0, defines a port range */ + uint32_t sd_flags; /* enabled, dis- */ + char sd_srvID[ILB_NAMESZ]; /* "name" for server */ + /* assigned by system, not user */ +} ilb_server_data_t; + +/* + * Struct to represent a server group. + * + * sgd_name: server group name + * sgd_flags: flags + * sgd_srvcount: number of servers in the group (not used in sever group + * creation); filled in when used by call back function for + * ilb_walk_servergroups(). + */ +typedef struct sg_data { + char sgd_name[ILB_SGNAME_SZ]; + int32_t sgd_flags; + int32_t sgd_srvcount; +} ilb_sg_data_t; + +/* + * Struct to represent a NAT entry in kernel. + * + * nat_proto: transport protocol used in this NAT entry + * + * nat_out_global: IP address of client's request + * nat_out_global_port: port number of client's request + * nat_in_global: VIP of a rule for the NAT entry + * nat_in_global_port: port of a rule for the NAT entry + * + * nat_out_local: half NAT: IP address of client's request + * full NAT: NAT'ed IP addres of client' request + * nat_out_local_port: half NAT: port number of client's request + * full NAT: NAT'ed port number of client's request + * nat_in_local: IP address of back end server handling this request + * nat_in_local_port: port number in back end server handling thi request + * + * (*) IPv4 address is represented as IPv4 mapped IPv6 address. + */ +typedef struct { + uint32_t nat_proto; + + in6_addr_t nat_in_local; + in6_addr_t nat_in_global; + in6_addr_t nat_out_local; + in6_addr_t nat_out_global; + + in_port_t nat_in_local_port; + in_port_t nat_in_global_port; + in_port_t nat_out_local_port; + in_port_t nat_out_global_port; +} ilb_nat_info_t; + +/* + * Struct to represet a persistent entry in kernel. + * + * rule_name: the name of rule for a persistent entry + * req_addr: the client's IP address (*) + * srv_addr: the server's IP address (*) handling the client's request + * + * (*) IPv4 address is represented as IPv4 mapped IPv6 address. + */ +typedef struct { + char persist_rule_name[ILB_NAMESZ]; + in6_addr_t persist_req_addr; + in6_addr_t persist_srv_addr; +} ilb_persist_info_t; + +/* + * Function prototype of the call back function of those walker functions. + * + * Note: the storage of the data item parameter (ilb_sg_data_t/ + * ilb_server_data_/ilb_rule_data_t/ilb_hc_info_t/ilb_hc_srv_t) will be + * freed after calling the call back function. If the call back function + * needs to keep a copy of the data, it must copy the data content. + */ +typedef ilb_status_t (* sg_walkerfunc_t)(ilb_handle_t, ilb_sg_data_t *, + void *); +typedef ilb_status_t (* srv_walkerfunc_t)(ilb_handle_t, ilb_server_data_t *, + const char *, void *); +typedef ilb_status_t (* rule_walkerfunc_t)(ilb_handle_t, ilb_rule_data_t *, + void *); +typedef ilb_status_t (* hc_walkerfunc_t)(ilb_handle_t, ilb_hc_info_t *, + void *); +typedef ilb_status_t (* hc_srvwalkerfunc_t)(ilb_handle_t, ilb_hc_srv_t *, + void *); + +/* + * ilb_open creates a session handle that every caller into + * libilb needs to use + */ +ilb_status_t ilb_open(ilb_handle_t *); + +/* + * relinquish the session handle + */ +ilb_status_t ilb_close(ilb_handle_t); + +/* support and general functions */ +ilb_status_t ilb_reset_config(ilb_handle_t); +const char *ilb_errstr(ilb_status_t); + +/* rule-related functions */ +ilb_status_t ilb_create_rule(ilb_handle_t, const ilb_rule_data_t *); +ilb_status_t ilb_destroy_rule(ilb_handle_t, const char *); +ilb_status_t ilb_disable_rule(ilb_handle_t, const char *); +ilb_status_t ilb_enable_rule(ilb_handle_t, const char *); +ilb_status_t ilb_walk_rules(ilb_handle_t, rule_walkerfunc_t, const char *, + void *); + +/* servergroup functionality */ +ilb_status_t ilb_create_servergroup(ilb_handle_t, const char *); +ilb_status_t ilb_destroy_servergroup(ilb_handle_t, const char *); +ilb_status_t ilb_add_server_to_group(ilb_handle_t, const char *, + ilb_server_data_t *); +ilb_status_t ilb_rem_server_from_group(ilb_handle_t, const char *, + ilb_server_data_t *); +ilb_status_t ilb_walk_servergroups(ilb_handle_t, sg_walkerfunc_t, + const char *, void *); +ilb_status_t ilb_walk_servers(ilb_handle_t, srv_walkerfunc_t, + const char *, void *); + +/* functions for individual servers */ +ilb_status_t ilb_enable_server(ilb_handle_t, ilb_server_data_t *, void *); +ilb_status_t ilb_disable_server(ilb_handle_t, ilb_server_data_t *, void *); +ilb_status_t ilb_srvID_to_address(ilb_handle_t, ilb_server_data_t *, + const char *); +ilb_status_t ilb_address_to_srvID(ilb_handle_t, ilb_server_data_t *, + const char *); + +/* health check-related functions */ +ilb_status_t ilb_create_hc(ilb_handle_t, const ilb_hc_info_t *); +ilb_status_t ilb_destroy_hc(ilb_handle_t, const char *); +ilb_status_t ilb_get_hc_info(ilb_handle_t, const char *, ilb_hc_info_t *); +ilb_status_t ilb_walk_hc(ilb_handle_t, hc_walkerfunc_t, void *); +ilb_status_t ilb_walk_hc_srvs(ilb_handle_t, hc_srvwalkerfunc_t, + const char *, void *); + +/* To show NAT table entries of ILB */ +ilb_status_t ilb_show_nat(ilb_handle_t, ilb_nat_info_t[], size_t *, + boolean_t *); + +/* To show persistent table entries of ILB */ +ilb_status_t ilb_show_persist(ilb_handle_t, ilb_persist_info_t[], size_t *, + boolean_t *); + +/* PRIVATE */ +int ilb_cmp_ipaddr(ilb_ip_addr_t *, ilb_ip_addr_t *, int64_t *); +int ilb_cmp_in6_addr(struct in6_addr *, struct in6_addr *, int64_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBILB_H */ diff --git a/usr/src/lib/libilb/common/libilb_impl.h b/usr/src/lib/libilb/common/libilb_impl.h new file mode 100644 index 0000000000..7636c37c56 --- /dev/null +++ b/usr/src/lib/libilb/common/libilb_impl.h @@ -0,0 +1,253 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBILB_IMPL_H +#define _LIBILB_IMPL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/note.h> +#include <netinet/in.h> +#include <netdb.h> +#include <net/if.h> +#include <inet/ilb.h> +#include <libilb.h> +#include <thread.h> +#include <synch.h> + +#if !defined max +#define max(a, b) ((a) > (b) ? (a) : (b)) +#endif + +/* The UNIX domain socket path to talk to ilbd. */ +#define SOCKET_PATH "/var/run/daemon/ilb_sock" + +/* The max message size for communicating with ilbd */ +#define ILBD_MSG_SIZE 102400 + +/* + * moral equivalent of ntohl for IPv6 addresses, MSB and LSB (64 bit each), + * assign to uint64_t variables + */ +#define INV6_N2H_MSB64(addr) \ + (((uint64_t)ntohl((addr)->_S6_un._S6_u32[0]) << 32) + \ + (ntohl((addr)->_S6_un._S6_u32[1]))) + +#define INV6_N2H_LSB64(addr) \ + (((uint64_t)ntohl((addr)->_S6_un._S6_u32[2]) << 32) + \ + (ntohl((addr)->_S6_un._S6_u32[3]))) + +/* + * moral equiv. of htonl of MSB and LSB 64-bit portions to an IPv6 address + */ +#define INV6_H2N_MSB64(addr, msb) \ + (addr)->_S6_un._S6_u32[0] = htonl((msb) >> 32); \ + (addr)->_S6_un._S6_u32[1] = htonl((msb) & 0xffffffff) + +#define INV6_H2N_LSB64(addr, lsb) \ + (addr)->_S6_un._S6_u32[2] = htonl((lsb) >> 32); \ + (addr)->_S6_un._S6_u32[3] = htonl((lsb) & 0xffffffff) + +#define IP_COPY_CLI_2_IMPL(_e, _i) \ + bzero(_i, sizeof (*(_i))); \ + if ((_e)->ia_af == AF_INET6) \ + (void) memcpy((_i), &(_e)->ia_v6, sizeof (*(_i))); \ + else \ + IN6_INADDR_TO_V4MAPPED(&(_e)->ia_v4, (_i)) + +#define IP_COPY_IMPL_2_CLI(_i, _e) \ + do { \ + bzero(_e, sizeof (*(_e))); \ + if (IN6_IS_ADDR_V4MAPPED(_i)) { \ + (_e)->ia_af = AF_INET; \ + IN6_V4MAPPED_TO_INADDR((_i), &(_e)->ia_v4); \ + } else { \ + (_e)->ia_af = AF_INET6; \ + (void) memcpy(&(_e)->ia_v6, (_i), \ + sizeof ((_e)->ia_v6)); \ + } \ + _NOTE(CONSTCOND) \ + } while (0) + +#define GET_AF(_a) IN6_IS_ADDR_V4MAPPED(_a)?AF_INET:AF_INET6 +#define IS_AF_VALID(_af) (_af == AF_INET || _af == AF_INET6) + +typedef enum { + ILBD_BAD_CMD = 0, + /* servergroup commands */ + ILBD_CREATE_SERVERGROUP, + ILBD_ADD_SERVER_TO_GROUP, + ILBD_REM_SERVER_FROM_GROUP, + ILBD_ENABLE_SERVER, + ILBD_DISABLE_SERVER, + ILBD_DESTROY_SERVERGROUP, + ILBD_RETRIEVE_SG_NAMES, /* names of all SGs registered */ + ILBD_RETRIEVE_SG_HOSTS, /* all hosts for a given SG (hndl) */ + ILBD_SRV_ADDR2ID, /* fill in serverID for given address */ + ILBD_SRV_ID2ADDR, /* fill in address from given serverID */ + /* rule commands */ + ILBD_CREATE_RULE, + ILBD_DESTROY_RULE, + ILBD_ENABLE_RULE, + ILBD_DISABLE_RULE, + ILBD_RETRIEVE_RULE_NAMES, + ILBD_RETRIEVE_RULE, + + ILBD_CREATE_HC, + ILBD_DESTROY_HC, + ILBD_GET_HC_INFO, + ILBD_GET_HC_SRVS, + ILBD_GET_HC_RULES, + ILBD_RETRIEVE_HC_NAMES, + + ILBD_SHOW_NAT, /* list the NAT table */ + ILBD_SHOW_PERSIST, /* list the sticky table */ + + ILBD_CMD_OK, /* Requested operation succeeds. */ + ILBD_CMD_ERROR /* Rquested operation fails. */ +} ilbd_cmd_t; + +typedef struct sg_srv { + int32_t sgs_flags; /* enabled, dis- */ + struct in6_addr sgs_addr; + int32_t sgs_minport; + int32_t sgs_maxport; + int32_t sgs_id; /* numerical part of srvID */ + char sgs_srvID[ILB_NAMESZ]; /* "name" given to server */ +} ilb_sg_srv_t; + +typedef struct sg_info { + int32_t sg_flags; + char sg_name[ILB_SGNAME_SZ]; + int32_t sg_srvcount; + ilb_sg_srv_t sg_servers[]; +} ilb_sg_info_t; + +typedef char ilbd_name_t[ILB_NAMESZ]; + +typedef struct ilbd_namelist { + int32_t ilbl_flags; + int32_t ilbl_count; + ilbd_name_t ilbl_name[]; +} ilbd_namelist_t; + +#define ILBL_NAME_OFFSET (offsetof(ilbd_namelist_t, ilbl_name)) + +typedef struct rule_info { + int32_t rl_flags; + char rl_name[ILB_NAMESZ]; + struct in6_addr rl_vip; + uint16_t rl_proto; + uint16_t rl_ipversion; + int32_t rl_minport; + int32_t rl_maxport; + ilb_algo_t rl_algo; + ilb_topo_t rl_topo; + struct in6_addr rl_nat_src_start; + struct in6_addr rl_nat_src_end; + struct in6_addr rl_stickymask; + uint32_t rl_conndrain; + uint32_t rl_nat_timeout; + uint32_t rl_sticky_timeout; + in_port_t rl_hcport; + ilb_hcp_flags_t rl_hcpflag; + char rl_sgname[ILB_SGNAME_SZ]; + char rl_hcname[ILB_NAMESZ]; +} ilb_rule_info_t; + +/* + * Struct to represent show NAT request and reply. + * + * sn_num: (request) indicates the number of entries wanted; + * (reply) the number of entries returned; + * sn_data: NAT/persist able entries (is uint32_t aligned). + */ +typedef struct { + uint32_t sn_num; + uint32_t sn_data[]; +} ilb_show_info_t; + +/* + * Struct to represent the set of servers associated with a hc object. + * + * rs_num_srvs: number of servers in this struct. + * rs_srvs: array of servers. + */ +typedef struct { + uint32_t rs_num_srvs; + ilb_hc_srv_t rs_srvs[]; +} ilb_hc_rule_srv_t; + +typedef struct ilb_handle_impl { + mutex_t h_lock; + cond_t h_cv; + boolean_t h_busy; + boolean_t h_valid; + boolean_t h_closing; + uint32_t h_waiter; + int h_socket; + ilb_status_t h_error; /* ... that caused invalidation */ +} ilb_handle_impl_t; + +/* + * Communication flags used in ilb_comm_t. + * + * ILB_COMM_END: end of communication + */ +#define ILB_COMM_END 0x1 + +/* + * The message structure used to communicate with ilbd. + * + * ic_cmd: the message type. + * ic_flags: communication flags + * ic_data: message data (is uint32_t aligned). + */ +typedef struct { + ilbd_cmd_t ic_cmd; + int32_t ic_flags; + uint32_t ic_data[]; +} ilb_comm_t; + +ilb_status_t i_check_ip_range(ilb_ip_addr_t *, ilb_ip_addr_t *); +ilb_status_t i_ilb_do_comm(ilb_handle_t, ilb_comm_t *, size_t, ilb_comm_t *, + size_t *); +void i_ilb_close_comm(ilb_handle_t); +struct in6_addr i_next_ip_addr(struct in6_addr *, int); + +ilb_status_t i_ilb_retrieve_rule_names(ilb_handle_t, ilb_comm_t **, + size_t *); +ilb_comm_t *i_ilb_alloc_req(ilbd_cmd_t, size_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBILB_IMPL_H */ diff --git a/usr/src/lib/libilb/common/llib-lilb b/usr/src/lib/libilb/common/llib-lilb new file mode 100644 index 0000000000..a57ef8dd3e --- /dev/null +++ b/usr/src/lib/libilb/common/llib-lilb @@ -0,0 +1,29 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/*LINTLIBRARY*/ +/*PROTOLIB1*/ + +#include <libilb.h> diff --git a/usr/src/lib/libilb/common/mapfile-vers b/usr/src/lib/libilb/common/mapfile-vers new file mode 100644 index 0000000000..353a0a6958 --- /dev/null +++ b/usr/src/lib/libilb/common/mapfile-vers @@ -0,0 +1,78 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + + +SUNW_1.1 { + global: + ilb_add_server_to_group; + ilb_address_to_srvID; + ilb_close; + ilb_create_hc; + ilb_create_rule; + ilb_create_servergroup; + ilb_destroy_hc; + ilb_destroy_rule; + ilb_destroy_servergroup; + ilb_disable_rule; + ilb_disable_server; + ilb_enable_rule; + ilb_enable_server; + ilb_errstr; + ilb_get_hc_info; + ilb_open; + ilb_rem_server_from_group; + ilb_reset_config; + ilb_srvID_to_address; + ilb_show_nat; + ilb_show_persist; + ilb_walk_hc; + ilb_walk_hc_srvs; + ilb_walk_rules; + ilb_walk_servergroups; + ilb_walk_servers; +}; + +SUNWprivate { + global: + ilb_cmp_in6_addr; + ilb_cmp_ipaddr; + local: + *; +}; + diff --git a/usr/src/lib/libilb/i386/Makefile b/usr/src/lib/libilb/i386/Makefile new file mode 100644 index 0000000000..c86be4377c --- /dev/null +++ b/usr/src/lib/libilb/i386/Makefile @@ -0,0 +1,28 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/libilb/sparc/Makefile b/usr/src/lib/libilb/sparc/Makefile new file mode 100644 index 0000000000..c86be4377c --- /dev/null +++ b/usr/src/lib/libilb/sparc/Makefile @@ -0,0 +1,28 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/libilb/sparcv9/Makefile b/usr/src/lib/libilb/sparcv9/Makefile new file mode 100644 index 0000000000..f1fa7d3c45 --- /dev/null +++ b/usr/src/lib/libilb/sparcv9/Makefile @@ -0,0 +1,34 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# + +include ../Makefile.com +include ../../Makefile.lib.64 + +.KEEP_STATE: + +all: $(LIBS) + +install: all $(ROOTLIBS64) $(ROOTLINKS64) diff --git a/usr/src/lib/libinetutil/common/libinetutil.h b/usr/src/lib/libinetutil/common/libinetutil.h index bacf64938a..a285103af6 100644 --- a/usr/src/lib/libinetutil/common/libinetutil.h +++ b/usr/src/lib/libinetutil/common/libinetutil.h @@ -158,7 +158,7 @@ typedef struct iu_timer_queue iu_tq_t; typedef int iu_timer_id_t; -#define IU_TIMER_ID_MAX 1024 /* max number of concurrent timers */ +#define IU_TIMER_ID_MAX 4096 /* max number of concurrent timers */ /* * a iu_tq_callback_t is a function that is called back in response to a diff --git a/usr/src/lib/libinetutil/common/ofmt.c b/usr/src/lib/libinetutil/common/ofmt.c index 2bfc4aafa4..c0fce1839e 100644 --- a/usr/src/lib/libinetutil/common/ofmt.c +++ b/usr/src/lib/libinetutil/common/ofmt.c @@ -66,6 +66,8 @@ typedef struct ofmt_state_s { boolean_t os_wrap; int os_nbad; char **os_badfields; + boolean_t os_multiline; + int os_maxnamelen; /* longest name (f. multiline) */ } ofmt_state_t; /* * A B_TRUE return value from the callback function will print out the contents @@ -187,9 +189,12 @@ ofmt_open(const char *str, const ofmt_field_t *template, uint_t flags, ofmt_status_t err = OFMT_SUCCESS; boolean_t parsable = ((flags & OFMT_PARSABLE) != 0); boolean_t wrap = ((flags & OFMT_WRAP) != 0); + boolean_t multiline = (flags & OFMT_MULTILINE); *ofmt = NULL; if (parsable) { + if (multiline) + return (OFMT_EPARSEMULTI); /* * For parsable output mode, the caller always needs * to specify precisely which fields are to be selected, @@ -229,6 +234,7 @@ ofmt_open(const char *str, const ofmt_field_t *template, uint_t flags, os->os_parsable = parsable; os->os_wrap = wrap; + os->os_multiline = multiline; of = os->os_fields; of_index = 0; /* @@ -260,6 +266,11 @@ ofmt_open(const char *str, const ofmt_field_t *template, uint_t flags, of[of_index].of_name = strdup(template[j].of_name); if (of[of_index].of_name == NULL) goto nomem; + if (multiline) { + int n = strlen(of[of_index].of_name); + + os->os_maxnamelen = MAX(n, os->os_maxnamelen); + } of[of_index].of_width = template[j].of_width; of[of_index].of_id = template[j].of_id; of[of_index].of_cb = template[j].of_cb; @@ -310,14 +321,13 @@ ofmt_print_field(ofmt_state_t *os, ofmt_field_t *ofp, const char *value, uint_t width = ofp->of_width; uint_t valwidth; uint_t compress; - boolean_t parsable = os->os_parsable; char c; /* * Parsable fields are separated by ':'. If such a field contains * a ':' or '\', this character is prefixed by a '\'. */ - if (parsable) { + if (os->os_parsable) { if (os->os_nfields == 1) { (void) printf("%s", value); return; @@ -329,7 +339,13 @@ ofmt_print_field(ofmt_state_t *os, ofmt_field_t *ofp, const char *value, } if (!os->os_lastfield) (void) putchar(':'); - return; + } else if (os->os_multiline) { + if (value[0] == '\0') + value = OFMT_VAL_UNDEF; + (void) printf("%*.*s: %s", os->os_maxnamelen, + os->os_maxnamelen, ofp->of_name, value); + if (!os->os_lastfield) + (void) putchar('\n'); } else { if (os->os_lastfield) { (void) printf("%s", value); @@ -408,11 +424,15 @@ ofmt_print(ofmt_handle_t ofmt, void *arg) return; } - if ((os->os_nrow++ % os->os_winsize.ws_row) == 0 && !os->os_parsable) { + if ((os->os_nrow++ % os->os_winsize.ws_row) == 0 && !os->os_parsable && + !os->os_multiline) { ofmt_print_header(os); os->os_nrow++; } + if (os->os_multiline && os->os_nrow > 1) + (void) putchar('\n'); + of = os->os_fields; escsep = (os->os_nfields > 1); more_rows = B_FALSE; @@ -549,6 +569,9 @@ ofmt_strerror(ofmt_handle_t ofmt, ofmt_status_t err, char *buf, uint_t bufsize) case OFMT_ENOFIELDS: s = "no valid output fields"; break; + case OFMT_EPARSEMULTI: + s = "multiline mode incompatible with parsable mode"; + break; case OFMT_EPARSEALL: s = "output field `all' invalid in parsable mode"; break; diff --git a/usr/src/lib/libinetutil/common/ofmt.h b/usr/src/lib/libinetutil/common/ofmt.h index d3e4142578..81693ae325 100644 --- a/usr/src/lib/libinetutil/common/ofmt.h +++ b/usr/src/lib/libinetutil/common/ofmt.h @@ -35,6 +35,8 @@ * are separated by ':', with the ':' character itself escaped by a \ * (e.g., IPv6 addresses may be printed as "fe80\:\:1"); single field output * is printed as-is. + * In multiline mode, every [field,value] pair is printed in a line of + * its own, thus: "field: value". * * The caller must open a handle for each set of fields to be printed by * invoking ofmt_open(). The invocation to ofmt_open must provide the list of @@ -57,6 +59,9 @@ * (non machine-parsable) mode, a NULL fields_str, or a value of "all" for * fields_str, is treated as a request to print all allowable fields that fit * other applicable constraints. + * To achieve multiline mode, OFMT_MULTILINE needs to be specified in oflags. + * Specifying both OFMT_MULTILINE and OFMT_PARSABLE will result in + * OFMT_EPARSEMULTI. * * Thus a typical invocation to open the ofmt_handle would be: * @@ -135,7 +140,8 @@ typedef enum { OFMT_EPARSEALL, /* 'all' invalid in parsable mode */ OFMT_EPARSENONE, /* output fields missing in parsable mode */ OFMT_EPARSEWRAP, /* parsable mode incompatible with wrap mode */ - OFMT_ENOTEMPLATE /* no template provided for fields */ + OFMT_ENOTEMPLATE, /* no template provided for fields */ + OFMT_EPARSEMULTI /* parsable and multiline don't mix */ } ofmt_status_t; /* @@ -171,6 +177,7 @@ extern ofmt_status_t ofmt_open(const char *, const ofmt_field_t *, uint_t, #define OFMT_PARSABLE 0x00000001 /* machine parsable mode */ #define OFMT_WRAP 0x00000002 /* wrap output if field width is exceeded */ +#define OFMT_MULTILINE 0x00000004 /* "long" output: "name: value" lines */ /* * ofmt_close() must be called to free resources associated diff --git a/usr/src/lib/libinetutil/common/tq.c b/usr/src/lib/libinetutil/common/tq.c index 78505462bd..e809b3289d 100644 --- a/usr/src/lib/libinetutil/common/tq.c +++ b/usr/src/lib/libinetutil/common/tq.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <stdlib.h> #include <limits.h> #include <sys/time.h> @@ -342,7 +339,14 @@ iu_expire_timers(iu_tq_t *tq) for (node = tq->iutq_head; node != NULL; node = node->iutn_expire_next) { - if (node->iutn_abs_timeout > current_time) + /* + * If the timeout is within 1 millisec of current time, + * consider it as expired already. We do this because + * iu_earliest_timer() only has millisec granularity. + * So we should also use millisec grandularity in + * comparing timeout values. + */ + if (node->iutn_abs_timeout - current_time > 1000000) break; /* diff --git a/usr/src/lib/libsecdb/auth_attr.txt b/usr/src/lib/libsecdb/auth_attr.txt index 12123ba3de..8929f8a694 100644 --- a/usr/src/lib/libsecdb/auth_attr.txt +++ b/usr/src/lib/libsecdb/auth_attr.txt @@ -92,6 +92,8 @@ solaris.mms.io.write:::Read and Write Permission for MMS Volumes::help=AuthMMSIO # solaris.network.:::Network::help=NetworkHeader.html solaris.network.autoconf:::Network Auto-Magic Configuration::help=NetworkAutoconf.html +solaris.network.ilb.config:::Network ILB Configuration::help=NetworkILBconf.html +solaris.network.ilb.enable:::Network ILB Enable Configuration::help=NetworkILBenable.html solaris.network.link.security:::Link Security::help=LinkSecurity.html solaris.network.wifi.config:::Wifi Config::help=WifiConfig.html solaris.network.wifi.wep:::Wifi Wep::help=WifiWep.html @@ -134,6 +136,7 @@ solaris.smf.manage.extended-accounting.flow:::Manage Task Extended Accounting Se solaris.smf.manage.hal:::Manage HAL Service States::help=SmfHALStates.html solaris.smf.manage.hotplug:::Manage Hotplug Service::help=SmfManageHotplug.html solaris.smf.manage.idmap:::Manage Identity Mapping Service States::help=SmfIdmapStates.html +solaris.smf.manage.ilb:::Manage Integrated Load Balancer Service States::help=SmfILBStates.html solaris.smf.manage.inetd:::Manage inetd and inetd managed services States::help=SmfIntedStates.html solaris.smf.manage.ipsec:::Manage IPsec Service States::help=SmfIPsecStates.html solaris.smf.manage.labels:::Manage label server::help=LabelServer.html diff --git a/usr/src/lib/libsecdb/help/auths/Makefile b/usr/src/lib/libsecdb/help/auths/Makefile index 1d1f131d1d..4c30de414d 100644 --- a/usr/src/lib/libsecdb/help/auths/Makefile +++ b/usr/src/lib/libsecdb/help/auths/Makefile @@ -74,6 +74,7 @@ HTMLENTS = \ SmfExAcctTaskStates.html \ SmfExAcctNetStates.html \ SmfHeader.html \ + SmfILBStates.html \ SmfInetdStates.html \ SmfIPsecStates.html \ SmfManageHeader.html \ @@ -120,6 +121,8 @@ HTMLENTS = \ SmfValueVt.html \ SmfWpaStates.html \ NetworkAutoconf.html \ + NetworkILBconf.html \ + NetworkILBenable.html \ NetworkHeader.html \ WifiConfig.html \ WifiWep.html \ diff --git a/usr/src/lib/libsecdb/help/auths/NetworkILBconf.html b/usr/src/lib/libsecdb/help/auths/NetworkILBconf.html new file mode 100644 index 0000000000..b0a175258d --- /dev/null +++ b/usr/src/lib/libsecdb/help/auths/NetworkILBconf.html @@ -0,0 +1,41 @@ +<html> + +<!-- + Copyright 2009 Sun Microsystems, Inc. All rights reserved. + Use is subject to license terms. + + CDDL HEADER START + + The contents of this file are subject to the terms of the + Common Development and Distribution License (the "License"). + You may not use this file except in compliance with the License. + + You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + or http://www.opensolaris.org/os/licensing. + See the License for the specific language governing permissions + and limitations under the License. + + When distributing Covered Code, include this CDDL HEADER in each + file and include the License file at usr/src/OPENSOLARIS.LICENSE. + If applicable, add the following below this CDDL HEADER, with the + fields enclosed by brackets "[]" replaced with your own identifying + information: Portions Copyright [yyyy] [name of copyright owner] + + CDDL HEADER END +--> + +<head> +<!-- +meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" +--> +</head> +<body> +When Network ILB Configuration is in the Authorizations +Included column, it grants permission to administrate the add, +create and destroy subcommands of ilbadm(1M) +<p> +Note, to retrieve configuration and state information about ILB by +using ilbadm(1M) doesn't require Network ILB Configuration +authorization. +</body> +</html> diff --git a/usr/src/lib/libsecdb/help/auths/NetworkILBenable.html b/usr/src/lib/libsecdb/help/auths/NetworkILBenable.html new file mode 100644 index 0000000000..3cb93b237e --- /dev/null +++ b/usr/src/lib/libsecdb/help/auths/NetworkILBenable.html @@ -0,0 +1,41 @@ +<html> + +<!-- + Copyright 2009 Sun Microsystems, Inc. All rights reserved. + Use is subject to license terms. + + CDDL HEADER START + + The contents of this file are subject to the terms of the + Common Development and Distribution License (the "License"). + You may not use this file except in compliance with the License. + + You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + or http://www.opensolaris.org/os/licensing. + See the License for the specific language governing permissions + and limitations under the License. + + When distributing Covered Code, include this CDDL HEADER in each + file and include the License file at usr/src/OPENSOLARIS.LICENSE. + If applicable, add the following below this CDDL HEADER, with the + fields enclosed by brackets "[]" replaced with your own identifying + information: Portions Copyright [yyyy] [name of copyright owner] + + CDDL HEADER END +--> + +<head> +<!-- +meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" +--> +</head> +<body> +When Network ILB Enable Configuration is in the Authorizations +Included column, it grants permission to administrate the enable +and disable subcommands of ilbadm(1M). +<p> +Note, to retrieve configuration and state information about ILB by +using ilbadm(1M) doesn't require Network ILB Configuration +authorization. +</body> +</html> diff --git a/usr/src/lib/libsecdb/help/auths/SmfILBStates.html b/usr/src/lib/libsecdb/help/auths/SmfILBStates.html new file mode 100644 index 0000000000..5e4bc62bf3 --- /dev/null +++ b/usr/src/lib/libsecdb/help/auths/SmfILBStates.html @@ -0,0 +1,36 @@ +<HTML> +<!-- + CDDL HEADER START + + The contents of this file are subject to the terms of the + Common Development and Distribution License (the "License"). + You may not use this file except in compliance with the License. + + You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + or http://www.opensolaris.org/os/licensing. + See the License for the specific language governing permissions + and limitations under the License. + + When distributing Covered Code, include this CDDL HEADER in each + file and include the License file at usr/src/OPENSOLARIS.LICENSE. + If applicable, add the following below this CDDL HEADER, with the + fields enclosed by brackets "[]" replaced with your own identifying + information: Portions Copyright [yyyy] [name of copyright owner] + + CDDL HEADER END + +Copyright 2009 Sun Microsystems, Inc. All rights reserved. +Use is subject to license terms. +--> + <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1"> +--> +<BODY> +When Integrated Load Balancer Service States is in the Authorizations Include +column, it grants the authorization to enable, disable, or restart +Integrated Load Balancer service. +<p> +If Integrated Load Balancer Service States is grayed, then you are not entitled +to Add or Remove this authorization. +<BR> +</BODY> +</HTML> diff --git a/usr/src/lib/libsecdb/help/profiles/Makefile b/usr/src/lib/libsecdb/help/profiles/Makefile index fab13a3051..654138efd6 100644 --- a/usr/src/lib/libsecdb/help/profiles/Makefile +++ b/usr/src/lib/libsecdb/help/profiles/Makefile @@ -60,6 +60,7 @@ HTMLENTS = \ RtNameServiceAdmin.html \ RtNameServiceSecure.html \ RtNetAutoconf.html \ + RtNetILB.html \ RtNetIPsec.html \ RtNetMngmnt.html \ RtNetObservability.html \ diff --git a/usr/src/lib/libsecdb/help/profiles/RtNetILB.html b/usr/src/lib/libsecdb/help/profiles/RtNetILB.html new file mode 100644 index 0000000000..95a7f99bef --- /dev/null +++ b/usr/src/lib/libsecdb/help/profiles/RtNetILB.html @@ -0,0 +1,37 @@ +<HTML> +<!-- + CDDL HEADER START + + The contents of this file are subject to the terms of the + Common Development and Distribution License (the "License"). + You may not use this file except in compliance with the License. + + You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + or http://www.opensolaris.org/os/licensing. + See the License for the specific language governing permissions + and limitations under the License. + + When distributing Covered Code, include this CDDL HEADER in each + file and include the License file at usr/src/OPENSOLARIS.LICENSE. + If applicable, add the following below this CDDL HEADER, with the + fields enclosed by brackets "[]" replaced with your own identifying + information: Portions Copyright [yyyy] [name of copyright owner] + + CDDL HEADER END + +-- Copyright 2009 Sun Microsystems, Inc. All rights reserved. +-- Use is subject to license terms. +--> +<head> +<title></title> +</head> +<body> +When Network ILB is in the Rights Included column, it grants the +right to manage the configuration of ILB feature by using ilbadm(1M) +command. +<p> +If Network ILB is grayed, then you are not entitled to Add or +Remove this right. +<p> +</body> +</html> diff --git a/usr/src/lib/libsecdb/prof_attr.txt b/usr/src/lib/libsecdb/prof_attr.txt index ad497ee369..c20338c570 100644 --- a/usr/src/lib/libsecdb/prof_attr.txt +++ b/usr/src/lib/libsecdb/prof_attr.txt @@ -62,7 +62,8 @@ MMS Operator:::MMS Media Manager Operator:auths=solaris.smf.manage.mms,solaris.m MMS User:::MMS Tape User:auths=solaris.mms.io.*;help=RtMMSUser.html NDMP Management:::Manage the NDMP service:auths=solaris.smf.manage.ndmp,solaris.smf.value.ndmp,solaris.smf.read.ndmp;help=RtNdmpMngmnt.html Network Autoconf:::Manage network auto-magic configuration via nwamd:auths=solaris.network.autoconf;help=RtNetAutoconf.html -Network Management:::Manage the host and network configuration:auths=solaris.smf.manage.name-service-cache,solaris.smf.manage.bind,solaris.smf.value.routing,solaris.smf.manage.routing,solaris.smf.value.nwam,solaris.smf.manage.nwam,solaris.smf.manage.tnd,solaris.smf.manage.tnctl,solaris.smf.manage.wpa,solaris.smf.value.mdns,solaris.smf.manage.mdns;profiles=Network Wifi Management,Inetd Management,Network Autoconf,Network Observability;help=RtNetMngmnt.html +Network ILB:::Manage ILB configuration via ilbadm:auths=solaris.network.ilb.config,solaris.network.ilb.enable;help=RtNetILB.html +Network Management:::Manage the host and network configuration:auths=solaris.smf.manage.name-service-cache,solaris.smf.manage.bind,solaris.smf.value.routing,solaris.smf.manage.routing,solaris.smf.value.nwam,solaris.smf.manage.nwam,solaris.smf.manage.tnd,solaris.smf.manage.tnctl,solaris.smf.manage.wpa,solaris.smf.value.mdns,solaris.smf.manage.mdns,solaris.smf.manage.ilb;profiles=Network Wifi Management,Inetd Management,Network Autoconf,Network Observability;help=RtNetMngmnt.html Network Observability:::Allow access to observability devices:privs=net_observability;help=RtNetObservability.html Network Security:::Manage network and host security:auths=solaris.smf.manage.ssh,solaris.smf.value.tnd;profiles=Network Wifi Security,Network Link Security,Network IPsec Management;help=RtNetSecure.html Network Wifi Management:::Manage wifi network configuration:auths=solaris.network.wifi.config;help=RtNetWifiMngmnt.html diff --git a/usr/src/lib/libsecdb/user_attr.txt b/usr/src/lib/libsecdb/user_attr.txt index 244b202415..f48558b72c 100644 --- a/usr/src/lib/libsecdb/user_attr.txt +++ b/usr/src/lib/libsecdb/user_attr.txt @@ -1,5 +1,5 @@ # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # CDDL HEADER START @@ -30,4 +30,5 @@ root::::auths=solaris.*,solaris.grant;profiles=All;lock_after_retries=no;min_lab lp::::profiles=Printer Management adm::::profiles=Log Management dladm::::auths=solaris.smf.manage.wpa,solaris.smf.modify +daemon::::auths=solaris.smf.manage.ilb,solaris.smf.modify.application zfssnap::::type=role;auths=solaris.smf.manage.zfs-auto-snapshot;profiles=ZFS File System Management diff --git a/usr/src/pkgdefs/Makefile b/usr/src/pkgdefs/Makefile index 7421c57412..a61a50bcd0 100644 --- a/usr/src/pkgdefs/Makefile +++ b/usr/src/pkgdefs/Makefile @@ -292,6 +292,8 @@ COMMON_SUBDIRS= \ SUNWiir \ SUNWiiu \ SUNWigb \ + SUNWilb \ + SUNWilbr \ SUNWima \ SUNWimac \ SUNWimar \ diff --git a/usr/src/pkgdefs/SUNW0on/prototype_com b/usr/src/pkgdefs/SUNW0on/prototype_com index 6aa1273d9e..b1d1f9e711 100644 --- a/usr/src/pkgdefs/SUNW0on/prototype_com +++ b/usr/src/pkgdefs/SUNW0on/prototype_com @@ -295,6 +295,8 @@ f none usr/lib/help/auths/locale/SmfValueVt.html 444 root bin f none usr/lib/help/auths/locale/SmfWpaStates.html 444 root bin f none usr/lib/help/auths/locale/NetworkAutoconf.html 444 root bin f none usr/lib/help/auths/locale/NetworkHeader.html 444 root bin +f none usr/lib/help/auths/locale/NetworkILBconf.html 444 root bin +f none usr/lib/help/auths/locale/NetworkILBenable.html 444 root bin f none usr/lib/help/auths/locale/WifiConfig.html 444 root bin f none usr/lib/help/auths/locale/WifiWep.html 444 root bin f none usr/lib/help/auths/locale/LinkSecurity.html 444 root bin @@ -303,6 +305,7 @@ f none usr/lib/help/auths/locale/MailQueue.html 0444 root bin f none usr/lib/help/auths/locale/DevCDRW.html 0444 root bin f none usr/lib/help/auths/locale/IdmapRules.html 0444 root bin f none usr/lib/help/auths/locale/SmfIdmapStates.html 0444 root bin +f none usr/lib/help/auths/locale/SmfILBStates.html 0444 root bin f none usr/lib/help/auths/locale/SmfValueIdmap.html 0444 root bin f none usr/lib/help/auths/locale/FileChown.html 444 root bin f none usr/lib/help/auths/locale/FileHeader.html 444 root bin @@ -376,6 +379,7 @@ f none usr/lib/help/profiles/locale/RtMediaRestore.html 444 root bin f none usr/lib/help/profiles/locale/RtNameServiceAdmin.html 444 root bin f none usr/lib/help/profiles/locale/RtNameServiceSecure.html 444 root bin f none usr/lib/help/profiles/locale/RtNetAutoconf.html 444 root bin +f none usr/lib/help/profiles/locale/RtNetILB.html 444 root bin f none usr/lib/help/profiles/locale/RtNetIPsec.html 444 root bin f none usr/lib/help/profiles/locale/RtNetMngmnt.html 444 root bin f none usr/lib/help/profiles/locale/RtNetObservability.html 444 root bin diff --git a/usr/src/pkgdefs/SUNWcsu/prototype_com b/usr/src/pkgdefs/SUNWcsu/prototype_com index 1e0a68fe8d..03cf229c6c 100644 --- a/usr/src/pkgdefs/SUNWcsu/prototype_com +++ b/usr/src/pkgdefs/SUNWcsu/prototype_com @@ -484,6 +484,8 @@ f none usr/lib/help/auths/locale/C/LoginHeader.html 444 root bin f none usr/lib/help/auths/locale/C/LoginRemote.html 444 root bin f none usr/lib/help/auths/locale/C/NetworkAutoconf.html 444 root bin f none usr/lib/help/auths/locale/C/NetworkHeader.html 444 root bin +f none usr/lib/help/auths/locale/C/NetworkILBconf.html 444 root bin +f none usr/lib/help/auths/locale/C/NetworkILBenable.html 444 root bin f none usr/lib/help/auths/locale/C/PriAdmin.html 444 root bin f none usr/lib/help/auths/locale/C/ProfmgrHeader.html 444 root bin f none usr/lib/help/auths/locale/C/RoleHeader.html 444 root bin @@ -503,6 +505,7 @@ f none usr/lib/help/auths/locale/C/SmfModifyDepend.html 444 root bin f none usr/lib/help/auths/locale/C/SmfModifyFramework.html 444 root bin f none usr/lib/help/auths/locale/C/SmfModifyHeader.html 444 root bin f none usr/lib/help/auths/locale/C/SmfModifyMethod.html 444 root bin +f none usr/lib/help/auths/locale/C/SmfILBStates.html 444 root bin f none usr/lib/help/auths/locale/C/SmfInetdStates.html 444 root bin f none usr/lib/help/auths/locale/C/SmfIPsecStates.html 444 root bin f none usr/lib/help/auths/locale/C/SmfNscdStates.html 444 root bin @@ -604,6 +607,7 @@ f none usr/lib/help/profiles/locale/C/RtMediaRestore.html 444 root bin f none usr/lib/help/profiles/locale/C/RtNameServiceAdmin.html 444 root bin f none usr/lib/help/profiles/locale/C/RtNameServiceSecure.html 444 root bin f none usr/lib/help/profiles/locale/C/RtNetAutoconf.html 444 root bin +f none usr/lib/help/profiles/locale/C/RtNetILB.html 444 root bin f none usr/lib/help/profiles/locale/C/RtNetIPsec.html 444 root bin f none usr/lib/help/profiles/locale/C/RtNetMngmnt.html 444 root bin f none usr/lib/help/profiles/locale/C/RtNetObservability.html 444 root bin diff --git a/usr/src/pkgdefs/SUNWilb/Makefile b/usr/src/pkgdefs/SUNWilb/Makefile new file mode 100644 index 0000000000..1182b25256 --- /dev/null +++ b/usr/src/pkgdefs/SUNWilb/Makefile @@ -0,0 +1,35 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# + +include ../Makefile.com + +.KEEP_STATE: + +all: $(FILES) + +install: all pkg + +include ../Makefile.targ diff --git a/usr/src/pkgdefs/SUNWilb/depend b/usr/src/pkgdefs/SUNWilb/depend new file mode 100644 index 0000000000..0934573a20 --- /dev/null +++ b/usr/src/pkgdefs/SUNWilb/depend @@ -0,0 +1,50 @@ +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# This package information file defines software dependencies associated +# with the pkg. You can define three types of pkg dependencies with this file: +# P indicates a prerequisite for installation +# I indicates an incompatible package +# R indicates a reverse dependency +# <pkg.abbr> see pkginfo(4), PKG parameter +# <name> see pkginfo(4), NAME parameter +# <version> see pkginfo(4), VERSION parameter +# <arch> see pkginfo(4), ARCH parameter +# <type> <pkg.abbr> <name> +# (<arch>)<version> +# (<arch>)<version> +# ... +# <type> <pkg.abbr> <name> +# ... +# + +P SUNWcar Core Architecture, (Root) +P SUNWcakr Core Solaris Kernel Architecture (Root) +P SUNWkvm Core Architecture, (Kvm) +P SUNWcsr Core Solaris, (Root) +P SUNWckr Core Solaris Kernel (Root) +P SUNWcnetr Core Solaris Network Infrastructure (Root) +P SUNWcsu Core Solaris, (Usr) +P SUNWcsd Core Solaris Devices +P SUNWcsl Core Solaris Libraries +P SUNWilbr ILB IP L3/L4 load balancer(Root) diff --git a/usr/src/pkgdefs/SUNWilb/pkginfo.tmpl b/usr/src/pkgdefs/SUNWilb/pkginfo.tmpl new file mode 100644 index 0000000000..b696544117 --- /dev/null +++ b/usr/src/pkgdefs/SUNWilb/pkginfo.tmpl @@ -0,0 +1,56 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# +# +# This required package information file describes characteristics of the +# package, such as package abbreviation, full package name, package version, +# and package architecture. +# +PKG="SUNWilb" +NAME="Integrated IP layer 3/4 load balancer for Solaris (usr)" +ARCH="ISA" +VERSION="ONVERS,REV=0.0.0" +SUNW_PRODNAME="SunOS" +SUNW_PRODVERS="RELEASE/VERSION" +SUNW_PKGTYPE="usr" +MAXINST="1000" +CATEGORY="system" +DESC="Integrated IP layer 3/4 load balancer for Solaris (usr)" +VENDOR="Sun Microsystems, Inc." +HOTLINE="Please contact your local service provider" +EMAIL="" +CLASSES="none" +BASEDIR=/ +SUNW_PKGVERS="1.0" +SUNW_PKG_ALLZONES="true" +SUNW_PKG_HOLLOW="false" +SUNW_PKG_THISZONE="false" +#VSTOCK="<reserved by Release Engineering for package part #>" +#ISTATES="<developer defined>" +#RSTATES='<developer defined>' +#ULIMIT="<developer defined>" +#ORDER="<developer defined>" +#PSTAMP="<developer defined>" +#INTONLY="<developer defined>" diff --git a/usr/src/pkgdefs/SUNWilb/prototype_com b/usr/src/pkgdefs/SUNWilb/prototype_com new file mode 100644 index 0000000000..8282e1f77c --- /dev/null +++ b/usr/src/pkgdefs/SUNWilb/prototype_com @@ -0,0 +1,59 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# +# This required package information file contains a list of package contents. +# The 'pkgmk' command uses this file to identify the contents of a package +# and their location on the development machine when building the package. +# Can be created via a text editor or through use of the 'pkgproto' command. + +#!search <pathname pathname ...> # where to find pkg objects +#!include <filename> # include another 'prototype' file +#!default <mode> <owner> <group> # default used if not specified on entry +#!<param>=<value> # puts parameter in pkg environment + +# packaging files +i pkginfo +i copyright +i depend + +# +# source locations relative to the prototype file +# +# SUNWilb +# +d none usr 0755 root sys +d none usr/lib 0755 root bin +d none usr/lib/inet 0755 root bin +d none usr/lib/inet/ilb 0755 root bin +f none usr/lib/inet/ilbd 555 root bin +f none usr/lib/inet/ilb/ilb_probe 555 root bin +f none usr/lib/libilb.so.1 755 root bin +s none usr/lib/libilb.so=./libilb.so.1 +f none usr/lib/llib-lilb 644 root bin +f none usr/lib/llib-lilb.ln 644 root bin +d none usr/include 0755 root bin +f none usr/include/libilb.h 0644 root bin +d none usr/sbin 0755 root bin +f none usr/sbin/ilbadm 555 root bin diff --git a/usr/src/pkgdefs/SUNWilb/prototype_i386 b/usr/src/pkgdefs/SUNWilb/prototype_i386 new file mode 100644 index 0000000000..8edad55543 --- /dev/null +++ b/usr/src/pkgdefs/SUNWilb/prototype_i386 @@ -0,0 +1,53 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# +# This required package information file contains a list of package contents. +# The 'pkgmk' command uses this file to identify the contents of a package +# and their location on the development machine when building the package. +# Can be created via a text editor or through use of the 'pkgproto' command. + +#!search <pathname pathname ...> # where to find pkg objects +#!include <filename> # include another 'prototype' file +#!default <mode> <owner> <group> # default used if not specified on entry +#!<param>=<value> # puts parameter in pkg environment + +# +# Include ISA independent files (prototype_com) +# +!include prototype_com +# +# +# +# List files which are I386 specific here +# +# source locations relative to the prototype file +# +# +# SUNWilb +# +d none usr/lib/amd64 755 root bin +f none usr/lib/amd64/libilb.so.1 755 root bin +s none usr/lib/amd64/libilb.so=./libilb.so.1 +f none usr/lib/amd64/llib-lilb.ln 644 root bin diff --git a/usr/src/pkgdefs/SUNWilb/prototype_sparc b/usr/src/pkgdefs/SUNWilb/prototype_sparc new file mode 100644 index 0000000000..ccd5491878 --- /dev/null +++ b/usr/src/pkgdefs/SUNWilb/prototype_sparc @@ -0,0 +1,53 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# +# This required package information file contains a list of package contents. +# The 'pkgmk' command uses this file to identify the contents of a package +# and their location on the development machine when building the package. +# Can be created via a text editor or through use of the 'pkgproto' command. + +#!search <pathname pathname ...> # where to find pkg objects +#!include <filename> # include another 'prototype' file +#!default <mode> <owner> <group> # default used if not specified on entry +#!<param>=<value> # puts parameter in pkg environment + +# +# Include ISA independent files (prototype_com) +# +!include prototype_com +# +# +# +# List files which are SPARC specific here +# +# source locations relative to the prototype file +# +# +# SUNWilb +# +d none usr/lib/sparcv9 755 root bin +f none usr/lib/sparcv9/libilb.so.1 755 root bin +s none usr/lib/sparcv9/libilb.so=./libilb.so.1 +f none usr/lib/sparcv9/llib-lilb.ln 644 root bin diff --git a/usr/src/pkgdefs/SUNWilbr/Makefile b/usr/src/pkgdefs/SUNWilbr/Makefile new file mode 100644 index 0000000000..377cf88897 --- /dev/null +++ b/usr/src/pkgdefs/SUNWilbr/Makefile @@ -0,0 +1,37 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# + +include ../Makefile.com + +DATAFILES += i.manifest r.manifest + +.KEEP_STATE: + +all: $(FILES) +install: all pkg + +include ../Makefile.targ +include ../Makefile.prtarg diff --git a/usr/src/pkgdefs/SUNWilbr/depend b/usr/src/pkgdefs/SUNWilbr/depend new file mode 100644 index 0000000000..051afae5f5 --- /dev/null +++ b/usr/src/pkgdefs/SUNWilbr/depend @@ -0,0 +1,49 @@ +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# This package information file defines software dependencies associated +# with the pkg. You can define three types of pkg dependencies with this file: +# P indicates a prerequisite for installation +# I indicates an incompatible package +# R indicates a reverse dependency +# <pkg.abbr> see pkginfo(4), PKG parameter +# <name> see pkginfo(4), NAME parameter +# <version> see pkginfo(4), VERSION parameter +# <arch> see pkginfo(4), ARCH parameter +# <type> <pkg.abbr> <name> +# (<arch>)<version> +# (<arch>)<version> +# ... +# <type> <pkg.abbr> <name> +# ... +# + +P SUNWcar Core Architecture, (Root) +P SUNWcakr Core Solaris Kernel Architecture (Root) +P SUNWkvm Core Architecture, (Kvm) +P SUNWcsr Core Solaris, (Root) +P SUNWckr Core Solaris Kernel (Root) +P SUNWcnetr Core Solaris Network Infrastructure (Root) +P SUNWcsu Core Solaris, (Usr) +P SUNWcsd Core Solaris Devices +P SUNWcsl Core Solaris Libraries diff --git a/usr/src/pkgdefs/SUNWilbr/pkginfo.tmpl b/usr/src/pkgdefs/SUNWilbr/pkginfo.tmpl new file mode 100644 index 0000000000..0975fee69f --- /dev/null +++ b/usr/src/pkgdefs/SUNWilbr/pkginfo.tmpl @@ -0,0 +1,58 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# + +# +# This required package information file describes characteristics of the +# package, such as package abbreviation, full package name, package version, +# and package architecture. +# +PKG="SUNWilbr" +NAME="Integrated IP layer 3/4 load balancer for Solaris(root)" +ARCH="ISA" +VERSION="ONVERS,REV=0.0.0" +SUNW_PRODNAME="SunOS" +SUNW_PRODVERS="RELEASE/VERSION" +SUNW_PKGTYPE="root" +SUNW_PKGVERS="1.0" +MAXINST="1000" +CATEGORY="system" +DESC="Integrated IP layer 3/4 load balancer for Solaris(root)" +VENDOR="Sun Microsystems, Inc." +HOTLINE="Please contact your local service provider" +EMAIL="" +CLASSES="none manifest" +BASEDIR=/ +SUNW_PKG_ALLZONES="true" +SUNW_PKG_HOLLOW="false" +SUNW_PKG_THISZONE="false" +#VSTOCK="<reserved by Release Engineering for package part #>" +#ISTATES="<developer defined>" +#RSTATES='<developer defined>' +#ULIMIT="<developer defined>" +#ORDER="<developer defined>" +#PSTAMP="<developer defined>" +#INTONLY="<developer defined>" diff --git a/usr/src/pkgdefs/SUNWilbr/prototype_com b/usr/src/pkgdefs/SUNWilbr/prototype_com new file mode 100644 index 0000000000..d70a8ca5ca --- /dev/null +++ b/usr/src/pkgdefs/SUNWilbr/prototype_com @@ -0,0 +1,54 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# +# This required package information file contains a list of package contents. +# The 'pkgmk' command uses this file to identify the contents of a package +# and their location on the development machine when building the package. +# Can be created via a text editor or through use of the 'pkgproto' command. +# + +#!search <pathname pathname ...> # where to find pkg objects +#!include <filename> # include another 'prototype' file +#!default <mode> <owner> <group> # default used if not specified on entry +#!<param>=<value> # puts parameter in pkg environment + +# packaging files +i pkginfo +i copyright +i depend +i i.manifest +i r.manifest +# +# +# source locations relative to the prototype file +# +# SUNWilbr +# +d none var 755 root sys +d none var/svc 755 root sys +d none var/svc/manifest 755 root sys +d none var/svc/manifest/network 755 root sys +d none var/svc/manifest/network/loadbalancer 755 root sys +f manifest var/svc/manifest/network/loadbalancer/ilbd.xml 0444 root sys diff --git a/usr/src/pkgdefs/SUNWilbr/prototype_i386 b/usr/src/pkgdefs/SUNWilbr/prototype_i386 new file mode 100644 index 0000000000..9213d36b1e --- /dev/null +++ b/usr/src/pkgdefs/SUNWilbr/prototype_i386 @@ -0,0 +1,49 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# +# This required package information file contains a list of package contents. +# The 'pkgmk' command uses this file to identify the contents of a package +# and their location on the development machine when building the package. +# Can be created via a text editor or through use of the 'pkgproto' command. + +#!search <pathname pathname ...> # where to find pkg objects +#!include <filename> # include another 'prototype' file +#!default <mode> <owner> <group> # default used if not specified on entry +#!<param>=<value> # puts parameter in pkg environment + +# +# Include ISA independent files (prototype_com) +# +!include prototype_com +# +# +# +# List files which are I386 specific here +# +# source locations relative to the prototype file +# +# +# SUNWilbr +# diff --git a/usr/src/pkgdefs/SUNWilbr/prototype_sparc b/usr/src/pkgdefs/SUNWilbr/prototype_sparc new file mode 100644 index 0000000000..52e38b20aa --- /dev/null +++ b/usr/src/pkgdefs/SUNWilbr/prototype_sparc @@ -0,0 +1,48 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# +# This required package information file contains a list of package contents. +# The 'pkgmk' command uses this file to identify the contents of a package +# and their location on the development machine when building the package. +# Can be created via a text editor or through use of the 'pkgproto' command. + +#!search <pathname pathname ...> # where to find pkg objects +#!include <filename> # include another 'prototype' file +#!default <mode> <owner> <group> # default used if not specified on entry +#!<param>=<value> # puts parameter in pkg environment + +# +# Include ISA independent files (prototype_com) +# +!include prototype_com +# +# +# +# List files which are SPARC specific here +# +# source locations relative to the prototype file +# +# +# SUNWilbr +# diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index aa1b921b69..ded59fafd2 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -531,6 +531,7 @@ IP_SCTP_OBJS = sctp.o sctp_opt_data.o sctp_output.o \ sctp_timer.o sctp_heartbeat.o sctp_hash.o \ sctp_ioc.o sctp_bind.o sctp_notify.o sctp_asconf.o \ sctp_addr.o tn_ipopt.o tnet.o ip_netinfo.o +IP_ILB_OBJS = ilb.o ilb_nat.o ilb_conn.o ilb_alg_hash.o ilb_alg_rr.o IP_OBJS += igmp.o ipmp.o ip.o ip6.o ip6_asp.o ip6_if.o ip6_ire.o ip6_rts.o \ ip_if.o ip_ire.o ip_listutils.o ip_mroute.o \ @@ -543,7 +544,8 @@ IP_OBJS += igmp.o ipmp.o ip.o ip6.o ip6_asp.o ip6_if.o ip6_ire.o ip6_rts.o \ $(IP_RTS_OBJS) \ $(IP_TCP_OBJS) \ $(IP_UDP_OBJS) \ - $(IP_SCTP_OBJS) + $(IP_SCTP_OBJS) \ + $(IP_ILB_OBJS) IP6_OBJS += ip6ddi.o diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules index ebb286f385..277ac46685 100644 --- a/usr/src/uts/common/Makefile.rules +++ b/usr/src/uts/common/Makefile.rules @@ -491,6 +491,9 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/inet/tcp/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) +$(OBJS_DIR)/%.o: $(UTSBASE)/common/inet/ilb/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) $(OBJS_DIR)/%.o: $(UTSBASE)/common/inet/ipf/%.c $(COMPILE.c) -o $@ $< @@ -1833,6 +1836,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/sctp/%.c $(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/tcp/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/ilb/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) + $(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/nca/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/common/inet/ilb.h b/usr/src/uts/common/inet/ilb.h new file mode 100644 index 0000000000..81213bfd4c --- /dev/null +++ b/usr/src/uts/common/inet/ilb.h @@ -0,0 +1,239 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +#ifndef _INET_ILB_H +#define _INET_ILB_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This file contains the private interface to IP to configure ILB in + * the system. Note that this is not a supported interface, and is + * subject to be changed without notice. User level apps should instead + * use the libilb library to interface with ILB. + */ + +/* ioctl cmds to IP to configure ILB */ +typedef enum { + ILB_CREATE_RULE, + ILB_DESTROY_RULE, + ILB_ENABLE_RULE, + ILB_DISABLE_RULE, + ILB_NUM_RULES, + ILB_NUM_SERVERS, + ILB_RULE_NAMES, + ILB_LIST_RULE, + ILB_LIST_SERVERS, + ILB_ADD_SERVERS, + ILB_DEL_SERVERS, + ILB_ENABLE_SERVERS, + ILB_DISABLE_SERVERS, + ILB_LIST_NAT_TABLE, + ILB_LIST_STICKY_TABLE +} ilb_cmd_t; + +/* Supported load balancing algorithm type */ +typedef enum { + ILB_ALG_IMPL_ROUNDROBIN = 1, + ILB_ALG_IMPL_HASH_IP, + ILB_ALG_IMPL_HASH_IP_SPORT, + ILB_ALG_IMPL_HASH_IP_VIP +} ilb_algo_impl_t; + +/* Supported load balancing method */ +typedef enum { + ILB_TOPO_IMPL_DSR = 1, + ILB_TOPO_IMPL_NAT, + ILB_TOPO_IMPL_HALF_NAT +} ilb_topo_impl_t; + +/* Max ILB rule name length */ +#define ILB_RULE_NAMESZ 20 + +/* Max kstat server name length */ +#define ILB_SERVER_NAMESZ 20 + +/* Rule destroy/enable/disable command struct */ +typedef struct { + ilb_cmd_t cmd; + char name[ILB_RULE_NAMESZ]; + uint32_t flags; +} ilb_name_cmd_t; + +/* Flags for rule creation command */ +/* these are echoed in lib/libilb/common/libilb.h - please keep in sync */ +#define ILB_RULE_ENABLED 0x1 +#define ILB_RULE_STICKY 0x2 +#define ILB_RULE_ALLRULES 0x4 +#define ILB_RULE_BUSY 0x8 + +/* Rule creation/retrieval command struct */ +typedef struct { + ilb_cmd_t cmd; + char name[ILB_RULE_NAMESZ]; + uint32_t ip_ver; + in6_addr_t vip; + char vip_itf[LIFNAMSIZ]; + uint32_t proto; + in_port_t min_port; /* In network byte order */ + in_port_t max_port; + ilb_algo_impl_t algo; + ilb_topo_impl_t topo; + char servers_itf[LIFNAMSIZ]; + in6_addr_t nat_src_start; + in6_addr_t nat_src_end; + uint32_t flags; + in6_addr_t sticky_mask; + uint32_t conn_drain_timeout; /* Time value is in seconds */ + uint32_t nat_expiry; + uint32_t sticky_expiry; +} ilb_rule_cmd_t; + +/* Get number of servers command struct */ +typedef struct { + ilb_cmd_t cmd; + char name[ILB_RULE_NAMESZ]; + uint32_t num; +} ilb_num_servers_cmd_t; + +/* Get number of rules command struct */ +typedef struct { + ilb_cmd_t cmd; + uint32_t num; +} ilb_num_rules_cmd_t; + +/* Get all rule names command struct */ +typedef struct { + ilb_cmd_t cmd; + uint32_t num_names; + /* buf size is (num_names * ILB_RULE_NAMESZ) */ + char buf[ILB_RULE_NAMESZ]; +} ilb_rule_names_cmd_t; + +/* Flags for ilb_server_info_t */ +#define ILB_SERVER_ENABLED 0x1 + +/* Struct to represent a backend server for add/list command */ +typedef struct { + char name[ILB_SERVER_NAMESZ]; + in6_addr_t addr; + in_port_t min_port; /* In network byte order */ + in_port_t max_port; + uint32_t flags; + int err; /* In return, non zero value indicates error */ +} ilb_server_info_t; + +/* Add/list servers command struct */ +typedef struct { + ilb_cmd_t cmd; + char name[ILB_RULE_NAMESZ]; + uint32_t num_servers; + ilb_server_info_t servers[1]; +} ilb_servers_info_cmd_t; + +/* + * Struct to represent a backend server for delete/enable/disable + * command + */ +typedef struct { + in6_addr_t addr; + int err; /* In return, non zero value indicates error */ +} ilb_server_arg_t; + +/* Delete/enable/disable a server command struct */ +typedef struct { + ilb_cmd_t cmd; + char name[ILB_RULE_NAMESZ]; + uint32_t num_servers; + ilb_server_arg_t servers[1]; +} ilb_servers_cmd_t; + +/* + * Flags for listing NAT/persistence table entries + * + * ILB_LIST_BEGIN: start from the beginning of the table + * ILB_LIST_CONT: start from the last reply + * ILB_LIST_END: on return, this flag indicates the end of the table + */ +#define ILB_LIST_BEGIN 0x1 +#define ILB_LIST_CONT 0x2 +#define ILB_LIST_END 0x4 + +#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 +#pragma pack(4) +#endif + +typedef struct { + uint32_t proto; + + in6_addr_t in_local; + in6_addr_t in_global; + in6_addr_t out_local; + in6_addr_t out_global; + + in_port_t in_local_port; + in_port_t in_global_port; + in_port_t out_local_port; + in_port_t out_global_port; + + int64_t create_time; + int64_t last_access_time; + uint64_t pkt_cnt; +} ilb_nat_entry_t; + +/* List NAT table entries command struct */ +typedef struct { + ilb_cmd_t cmd; + uint32_t flags; + uint32_t num_nat; + ilb_nat_entry_t entries[1]; +} ilb_list_nat_cmd_t; + +typedef struct { + char rule_name[ILB_RULE_NAMESZ]; + in6_addr_t req_addr; + in6_addr_t srv_addr; + int64_t expiry_time; +} ilb_sticky_entry_t; + +/* List sticky table entries command struct */ +typedef struct { + ilb_cmd_t cmd; + uint32_t flags; + uint32_t num_sticky; + ilb_sticky_entry_t entries[1]; +} ilb_list_sticky_cmd_t; + +#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 +#pragma pack() +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _INET_ILB_H */ diff --git a/usr/src/uts/common/inet/ilb/ilb.c b/usr/src/uts/common/inet/ilb/ilb.c new file mode 100644 index 0000000000..9bfe3fb0e0 --- /dev/null +++ b/usr/src/uts/common/inet/ilb/ilb.c @@ -0,0 +1,2153 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/kmem.h> +#include <sys/ksynch.h> +#include <sys/systm.h> +#include <sys/socket.h> +#include <sys/disp.h> +#include <sys/taskq.h> +#include <sys/cmn_err.h> +#include <sys/strsun.h> +#include <sys/sdt.h> +#include <sys/atomic.h> +#include <netinet/in.h> +#include <inet/ip.h> +#include <inet/ip6.h> +#include <inet/tcp.h> +#include <inet/udp_impl.h> +#include <inet/kstatcom.h> + +#include <inet/ilb_ip.h> +#include "ilb_alg.h" +#include "ilb_nat.h" +#include "ilb_conn.h" + +/* ILB kmem cache flag */ +int ilb_kmem_flags = 0; + +/* + * The default size for the different hash tables. Global for all stacks. + * But each stack has its own table, just that their sizes are the same. + */ +static size_t ilb_rule_hash_size = 2048; + +static size_t ilb_conn_hash_size = 262144; + +static size_t ilb_sticky_hash_size = 262144; + +/* This should be a prime number. */ +static size_t ilb_nat_src_hash_size = 97; + +/* Default NAT cache entry expiry time. */ +static uint32_t ilb_conn_tcp_expiry = 120; +static uint32_t ilb_conn_udp_expiry = 60; + +/* Default sticky entry expiry time. */ +static uint32_t ilb_sticky_expiry = 60; + +/* addr is assumed to be a uint8_t * to an ipaddr_t. */ +#define ILB_RULE_HASH(addr, hash_size) \ + ((*((addr) + 3) * 29791 + *((addr) + 2) * 961 + *((addr) + 1) * 31 + \ + *(addr)) & ((hash_size) - 1)) + +/* + * Note on ILB delayed processing + * + * To avoid in line removal on some of the data structures, such as rules, + * servers and ilb_conn_hash entries, ILB delays such processing to a taskq. + * There are three types of ILB taskq: + * + * 1. rule handling: created at stack initialialization time, ilb_stack_init() + * 2. conn hash handling: created at conn hash initialization time, + * ilb_conn_hash_init() + * 3. sticky hash handling: created at sticky hash initialization time, + * ilb_sticky_hash_init() + * + * The rule taskq is for processing rule and server removal. When a user + * land rule/server removal request comes in, a taskq is dispatched after + * removing the rule/server from all related hashes. This taskq will wait + * until all references to the rule/server are gone before removing it. + * So the user land thread requesting the removal does not need to wait + * for the removal completion. + * + * The conn hash/sticky hash taskq is for processing ilb_conn_hash and + * ilb_sticky_hash table entry removal. There are ilb_conn_timer_size timers + * and ilb_sticky_timer_size timers running for ilb_conn_hash and + * ilb_sticky_hash cleanup respectively. Each timer is responsible for one + * portion (same size) of the hash table. When a timer fires, it dispatches + * a conn hash taskq to clean up its portion of the table. This avoids in + * line processing of the removal. + * + * There is another delayed processing, the clean up of NAT source address + * table. We just use the timer to directly handle it instead of using + * a taskq. The reason is that the table is small so it is OK to use the + * timer. + */ + +/* ILB rule taskq constants. */ +#define ILB_RULE_TASKQ_NUM_THR 20 + +/* Argument passed to ILB rule taskq routines. */ +typedef struct { + ilb_stack_t *ilbs; + ilb_rule_t *rule; +} ilb_rule_tq_t; + +/* kstat handling routines. */ +static kstat_t *ilb_kstat_g_init(netstackid_t, ilb_stack_t *); +static void ilb_kstat_g_fini(netstackid_t, ilb_stack_t *); +static kstat_t *ilb_rule_kstat_init(netstackid_t, ilb_rule_t *); +static kstat_t *ilb_server_kstat_init(netstackid_t, ilb_rule_t *, + ilb_server_t *); + +/* Rule hash handling routines. */ +static void ilb_rule_hash_init(ilb_stack_t *); +static void ilb_rule_hash_fini(ilb_stack_t *); +static void ilb_rule_hash_add(ilb_stack_t *, ilb_rule_t *, const in6_addr_t *); +static void ilb_rule_hash_del(ilb_rule_t *); +static ilb_rule_t *ilb_rule_hash(ilb_stack_t *, int, int, in6_addr_t *, + in_port_t, zoneid_t, uint32_t, boolean_t *); + +static void ilb_rule_g_add(ilb_stack_t *, ilb_rule_t *); +static void ilb_rule_g_del(ilb_stack_t *, ilb_rule_t *); +static void ilb_del_rule_common(ilb_stack_t *, ilb_rule_t *); +static ilb_rule_t *ilb_find_rule_locked(ilb_stack_t *, zoneid_t, const char *, + int *); +static boolean_t ilb_match_rule(ilb_stack_t *, zoneid_t, const char *, int, + int, in_port_t, in_port_t, const in6_addr_t *); + +/* Back end server handling routines. */ +static void ilb_server_free(ilb_server_t *); + +/* Network stack handling routines. */ +static void *ilb_stack_init(netstackid_t, netstack_t *); +static void ilb_stack_shutdown(netstackid_t, void *); +static void ilb_stack_fini(netstackid_t, void *); + +/* Sticky connection handling routines. */ +static void ilb_rule_sticky_init(ilb_rule_t *); +static void ilb_rule_sticky_fini(ilb_rule_t *); + +/* Handy macro to check for unspecified address. */ +#define IS_ADDR_UNSPEC(addr) \ + (IN6_IS_ADDR_V4MAPPED(addr) ? IN6_IS_ADDR_V4MAPPED_ANY(addr) : \ + IN6_IS_ADDR_UNSPECIFIED(addr)) + +/* + * Global kstat instance counter. When a rule is created, its kstat instance + * number is assigned by ilb_kstat_instance and ilb_kstat_instance is + * incremented. + */ +static uint_t ilb_kstat_instance = 0; + +/* + * The ILB global kstat has name ILB_G_KS_NAME and class name ILB_G_KS_CNAME. + * A rule's kstat has ILB_RULE_KS_CNAME class name. + */ +#define ILB_G_KS_NAME "global" +#define ILB_G_KS_CNAME "kstat" +#define ILB_RULE_KS_CNAME "rulestat" + +static kstat_t * +ilb_kstat_g_init(netstackid_t stackid, ilb_stack_t *ilbs) +{ + kstat_t *ksp; + ilb_g_kstat_t template = { + { "num_rules", KSTAT_DATA_UINT64, 0 }, + { "ip_frag_in", KSTAT_DATA_UINT64, 0 }, + { "ip_frag_dropped", KSTAT_DATA_UINT64, 0 } + }; + + ksp = kstat_create_netstack(ILB_KSTAT_MOD_NAME, 0, ILB_G_KS_NAME, + ILB_G_KS_CNAME, KSTAT_TYPE_NAMED, NUM_OF_FIELDS(ilb_g_kstat_t), + KSTAT_FLAG_VIRTUAL, stackid); + if (ksp == NULL) + return (NULL); + bcopy(&template, ilbs->ilbs_kstat, sizeof (template)); + ksp->ks_data = ilbs->ilbs_kstat; + ksp->ks_private = (void *)(uintptr_t)stackid; + + kstat_install(ksp); + return (ksp); +} + +static void +ilb_kstat_g_fini(netstackid_t stackid, ilb_stack_t *ilbs) +{ + if (ilbs->ilbs_ksp != NULL) { + ASSERT(stackid == (netstackid_t)(uintptr_t) + ilbs->ilbs_ksp->ks_private); + kstat_delete_netstack(ilbs->ilbs_ksp, stackid); + ilbs->ilbs_ksp = NULL; + } +} + +static kstat_t * +ilb_rule_kstat_init(netstackid_t stackid, ilb_rule_t *rule) +{ + kstat_t *ksp; + ilb_rule_kstat_t template = { + { "num_servers", KSTAT_DATA_UINT64, 0 }, + { "bytes_not_processed", KSTAT_DATA_UINT64, 0 }, + { "pkt_not_processed", KSTAT_DATA_UINT64, 0 }, + { "bytes_dropped", KSTAT_DATA_UINT64, 0 }, + { "pkt_dropped", KSTAT_DATA_UINT64, 0 }, + { "nomem_bytes_dropped", KSTAT_DATA_UINT64, 0 }, + { "nomem_pkt_dropped", KSTAT_DATA_UINT64, 0 }, + { "noport_bytes_dropped", KSTAT_DATA_UINT64, 0 }, + { "noport_pkt_dropped", KSTAT_DATA_UINT64, 0 }, + { "icmp_echo_processed", KSTAT_DATA_UINT64, 0 }, + { "icmp_dropped", KSTAT_DATA_UINT64, 0 }, + { "icmp_too_big_processed", KSTAT_DATA_UINT64, 0 }, + { "icmp_too_big_dropped", KSTAT_DATA_UINT64, 0 } + }; + + ksp = kstat_create_netstack(ILB_KSTAT_MOD_NAME, rule->ir_ks_instance, + rule->ir_name, ILB_RULE_KS_CNAME, KSTAT_TYPE_NAMED, + NUM_OF_FIELDS(ilb_rule_kstat_t), KSTAT_FLAG_VIRTUAL, stackid); + if (ksp == NULL) + return (NULL); + + bcopy(&template, &rule->ir_kstat, sizeof (template)); + ksp->ks_data = &rule->ir_kstat; + ksp->ks_private = (void *)(uintptr_t)stackid; + + kstat_install(ksp); + return (ksp); +} + +static kstat_t * +ilb_server_kstat_init(netstackid_t stackid, ilb_rule_t *rule, + ilb_server_t *server) +{ + kstat_t *ksp; + ilb_server_kstat_t template = { + { "bytes_processed", KSTAT_DATA_UINT64, 0 }, + { "pkt_processed", KSTAT_DATA_UINT64, 0 }, + { "ip_address", KSTAT_DATA_STRING, 0 } + }; + char cname_buf[KSTAT_STRLEN]; + + /* 7 is "-sstat" */ + ASSERT(strlen(rule->ir_name) + 7 < KSTAT_STRLEN); + (void) sprintf(cname_buf, "%s-sstat", rule->ir_name); + ksp = kstat_create_netstack(ILB_KSTAT_MOD_NAME, rule->ir_ks_instance, + server->iser_name, cname_buf, KSTAT_TYPE_NAMED, + NUM_OF_FIELDS(ilb_server_kstat_t), KSTAT_FLAG_VIRTUAL, stackid); + if (ksp == NULL) + return (NULL); + + bcopy(&template, &server->iser_kstat, sizeof (template)); + ksp->ks_data = &server->iser_kstat; + ksp->ks_private = (void *)(uintptr_t)stackid; + + kstat_named_setstr(&server->iser_kstat.ip_address, + server->iser_ip_addr); + /* We never change the IP address */ + ksp->ks_data_size += strlen(server->iser_ip_addr) + 1; + + kstat_install(ksp); + return (ksp); +} + +/* Initialize the rule hash table. */ +static void +ilb_rule_hash_init(ilb_stack_t *ilbs) +{ + int i; + + /* + * If ilbs->ilbs_rule_hash_size is not a power of 2, bump it up to + * the next power of 2. + */ + if (ilbs->ilbs_rule_hash_size & (ilbs->ilbs_rule_hash_size - 1)) { + for (i = 0; i < 31; i++) { + if (ilbs->ilbs_rule_hash_size < (1 << i)) + break; + } + ilbs->ilbs_rule_hash_size = 1 << i; + } + ilbs->ilbs_g_hash = kmem_zalloc(sizeof (ilb_hash_t) * + ilbs->ilbs_rule_hash_size, KM_SLEEP); + for (i = 0; i < ilbs->ilbs_rule_hash_size; i++) { + mutex_init(&ilbs->ilbs_g_hash[i].ilb_hash_lock, NULL, + MUTEX_DEFAULT, NULL); + } +} + +/* Clean up the rule hash table. */ +static void +ilb_rule_hash_fini(ilb_stack_t *ilbs) +{ + if (ilbs->ilbs_g_hash == NULL) + return; + kmem_free(ilbs->ilbs_g_hash, sizeof (ilb_hash_t) * + ilbs->ilbs_rule_hash_size); +} + +/* Add a rule to the rule hash table. */ +static void +ilb_rule_hash_add(ilb_stack_t *ilbs, ilb_rule_t *rule, const in6_addr_t *addr) +{ + int i; + + i = ILB_RULE_HASH((uint8_t *)&addr->s6_addr32[3], + ilbs->ilbs_rule_hash_size); + DTRACE_PROBE2(ilb__rule__hash__add, ilb_rule_t *, rule, int, i); + mutex_enter(&ilbs->ilbs_g_hash[i].ilb_hash_lock); + rule->ir_hash_next = ilbs->ilbs_g_hash[i].ilb_hash_rule; + if (ilbs->ilbs_g_hash[i].ilb_hash_rule != NULL) + ilbs->ilbs_g_hash[i].ilb_hash_rule->ir_hash_prev = rule; + rule->ir_hash_prev = NULL; + ilbs->ilbs_g_hash[i].ilb_hash_rule = rule; + + rule->ir_hash = &ilbs->ilbs_g_hash[i]; + mutex_exit(&ilbs->ilbs_g_hash[i].ilb_hash_lock); +} + +/* + * Remove a rule from the rule hash table. Note that the rule is not freed + * in this routine. + */ +static void +ilb_rule_hash_del(ilb_rule_t *rule) +{ + mutex_enter(&rule->ir_hash->ilb_hash_lock); + if (rule->ir_hash->ilb_hash_rule == rule) { + rule->ir_hash->ilb_hash_rule = rule->ir_hash_next; + if (rule->ir_hash_next != NULL) + rule->ir_hash_next->ir_hash_prev = NULL; + } else { + if (rule->ir_hash_prev != NULL) + rule->ir_hash_prev->ir_hash_next = + rule->ir_hash_next; + if (rule->ir_hash_next != NULL) { + rule->ir_hash_next->ir_hash_prev = + rule->ir_hash_prev; + } + } + mutex_exit(&rule->ir_hash->ilb_hash_lock); + + rule->ir_hash_next = NULL; + rule->ir_hash_prev = NULL; + rule->ir_hash = NULL; +} + +/* + * Given the info of a packet, look for a match in the rule hash table. + */ +static ilb_rule_t * +ilb_rule_hash(ilb_stack_t *ilbs, int l3, int l4, in6_addr_t *addr, + in_port_t port, zoneid_t zoneid, uint32_t len, boolean_t *busy) +{ + int i; + ilb_rule_t *rule; + ipaddr_t v4_addr; + + *busy = B_FALSE; + IN6_V4MAPPED_TO_IPADDR(addr, v4_addr); + i = ILB_RULE_HASH((uint8_t *)&v4_addr, ilbs->ilbs_rule_hash_size); + port = ntohs(port); + + mutex_enter(&ilbs->ilbs_g_hash[i].ilb_hash_lock); + for (rule = ilbs->ilbs_g_hash[i].ilb_hash_rule; rule != NULL; + rule = rule->ir_hash_next) { + if (!rule->ir_port_range) { + if (rule->ir_min_port != port) + continue; + } else { + if (port < rule->ir_min_port || + port > rule->ir_max_port) { + continue; + } + } + if (rule->ir_ipver != l3 || rule->ir_proto != l4 || + rule->ir_zoneid != zoneid) { + continue; + } + + if (l3 == IPPROTO_IP) { + if (rule->ir_target_v4 != INADDR_ANY && + rule->ir_target_v4 != v4_addr) { + continue; + } + } else { + if (!IN6_IS_ADDR_UNSPECIFIED(&rule->ir_target_v6) && + !IN6_ARE_ADDR_EQUAL(addr, &rule->ir_target_v6)) { + continue; + } + } + + /* + * Just update the stats if the rule is disabled. + */ + mutex_enter(&rule->ir_lock); + if (!(rule->ir_flags & ILB_RULE_ENABLED)) { + ILB_R_KSTAT(rule, pkt_not_processed); + ILB_R_KSTAT_UPDATE(rule, bytes_not_processed, len); + mutex_exit(&rule->ir_lock); + rule = NULL; + break; + } else if (rule->ir_flags & ILB_RULE_BUSY) { + /* + * If we are busy... + * + * XXX we should have a queue to postpone the + * packet processing. But this requires a + * mechanism in IP to re-start the packet + * processing. So for now, just drop the packet. + */ + ILB_R_KSTAT(rule, pkt_dropped); + ILB_R_KSTAT_UPDATE(rule, bytes_dropped, len); + mutex_exit(&rule->ir_lock); + *busy = B_TRUE; + rule = NULL; + break; + } else { + rule->ir_refcnt++; + ASSERT(rule->ir_refcnt != 1); + mutex_exit(&rule->ir_lock); + break; + } + } + mutex_exit(&ilbs->ilbs_g_hash[i].ilb_hash_lock); + return (rule); +} + +/* + * Add a rule to the global rule list. This list is for finding all rules + * in an IP stack. The caller is assumed to hold the ilbs_g_lock. + */ +static void +ilb_rule_g_add(ilb_stack_t *ilbs, ilb_rule_t *rule) +{ + ASSERT(mutex_owned(&ilbs->ilbs_g_lock)); + rule->ir_next = ilbs->ilbs_rule_head; + ilbs->ilbs_rule_head = rule; + ILB_KSTAT_UPDATE(ilbs, num_rules, 1); +} + +/* The call is assumed to hold the ilbs_g_lock. */ +static void +ilb_rule_g_del(ilb_stack_t *ilbs, ilb_rule_t *rule) +{ + ilb_rule_t *tmp_rule; + ilb_rule_t *prev_rule; + + ASSERT(mutex_owned(&ilbs->ilbs_g_lock)); + prev_rule = NULL; + for (tmp_rule = ilbs->ilbs_rule_head; tmp_rule != NULL; + prev_rule = tmp_rule, tmp_rule = tmp_rule->ir_next) { + if (tmp_rule == rule) + break; + } + if (tmp_rule == NULL) { + mutex_exit(&ilbs->ilbs_g_lock); + return; + } + if (prev_rule == NULL) + ilbs->ilbs_rule_head = tmp_rule->ir_next; + else + prev_rule->ir_next = tmp_rule->ir_next; + ILB_KSTAT_UPDATE(ilbs, num_rules, -1); +} + +/* + * Helper routine to calculate how many source addresses are in a given + * range. + */ +static int64_t +num_nat_src_v6(const in6_addr_t *a1, const in6_addr_t *a2) +{ + int64_t ret; + uint32_t addr1, addr2; + + /* + * Here we assume that the max number of NAT source cannot be + * large such that the most significant 2 s6_addr32 must be + * equal. + */ + addr1 = ntohl(a1->s6_addr32[3]); + addr2 = ntohl(a2->s6_addr32[3]); + if (a1->s6_addr32[0] != a2->s6_addr32[0] || + a1->s6_addr32[1] != a2->s6_addr32[1] || + a1->s6_addr32[2] > a2->s6_addr32[2] || + (a1->s6_addr32[2] == a2->s6_addr32[2] && addr1 > addr2)) { + return (-1); + } + if (a1->s6_addr32[2] == a2->s6_addr32[2]) { + return (addr2 - addr1 + 1); + } else { + ret = (ntohl(a2->s6_addr32[2]) - ntohl(a1->s6_addr32[2])); + ret <<= 32; + ret = ret + addr1 - addr2; + return (ret + 1); + } +} + +/* + * Add an ILB rule. + */ +int +ilb_rule_add(ilb_stack_t *ilbs, zoneid_t zoneid, const ilb_rule_cmd_t *cmd) +{ + ilb_rule_t *rule; + netstackid_t stackid; + int ret; + in_port_t min_port, max_port; + int64_t num_src; + + /* Sanity checks. */ + if (cmd->ip_ver != IPPROTO_IP && cmd->ip_ver != IPPROTO_IPV6) + return (EINVAL); + + /* Need to support SCTP... */ + if (cmd->proto != IPPROTO_TCP && cmd->proto != IPPROTO_UDP) + return (EINVAL); + + /* For full NAT, the NAT source must be supplied. */ + if (cmd->topo == ILB_TOPO_IMPL_NAT) { + if (IS_ADDR_UNSPEC(&cmd->nat_src_start) || + IS_ADDR_UNSPEC(&cmd->nat_src_end)) { + return (EINVAL); + } + } + + /* Check invalid mask */ + if ((cmd->flags & ILB_RULE_STICKY) && + IS_ADDR_UNSPEC(&cmd->sticky_mask)) { + return (EINVAL); + } + + /* Port is passed in network byte order. */ + min_port = ntohs(cmd->min_port); + max_port = ntohs(cmd->max_port); + if (min_port > max_port) + return (EINVAL); + + /* min_port == 0 means "all ports". Make it so */ + if (min_port == 0) { + min_port = 1; + max_port = 65535; + } + + /* Funny address checking. */ + if (cmd->ip_ver == IPPROTO_IP) { + in_addr_t v4_addr1, v4_addr2; + + v4_addr1 = cmd->vip.s6_addr32[3]; + if ((*(uchar_t *)&v4_addr1) == IN_LOOPBACKNET || + CLASSD(v4_addr1) || v4_addr1 == INADDR_BROADCAST || + v4_addr1 == INADDR_ANY || + !IN6_IS_ADDR_V4MAPPED(&cmd->vip)) { + return (EINVAL); + } + + if (cmd->topo == ILB_TOPO_IMPL_NAT) { + v4_addr1 = ntohl(cmd->nat_src_start.s6_addr32[3]); + v4_addr2 = ntohl(cmd->nat_src_end.s6_addr32[3]); + if ((*(uchar_t *)&v4_addr1) == IN_LOOPBACKNET || + (*(uchar_t *)&v4_addr2) == IN_LOOPBACKNET || + v4_addr1 == INADDR_BROADCAST || + v4_addr2 == INADDR_BROADCAST || + v4_addr1 == INADDR_ANY || v4_addr2 == INADDR_ANY || + CLASSD(v4_addr1) || CLASSD(v4_addr2) || + !IN6_IS_ADDR_V4MAPPED(&cmd->nat_src_start) || + !IN6_IS_ADDR_V4MAPPED(&cmd->nat_src_end)) { + return (EINVAL); + } + + num_src = v4_addr2 - v4_addr1 + 1; + if (v4_addr1 > v4_addr2 || num_src > ILB_MAX_NAT_SRC) + return (EINVAL); + } + } else { + if (IN6_IS_ADDR_LOOPBACK(&cmd->vip) || + IN6_IS_ADDR_MULTICAST(&cmd->vip) || + IN6_IS_ADDR_UNSPECIFIED(&cmd->vip) || + IN6_IS_ADDR_V4MAPPED(&cmd->vip)) { + return (EINVAL); + } + + if (cmd->topo == ILB_TOPO_IMPL_NAT) { + if (IN6_IS_ADDR_LOOPBACK(&cmd->nat_src_start) || + IN6_IS_ADDR_LOOPBACK(&cmd->nat_src_end) || + IN6_IS_ADDR_MULTICAST(&cmd->nat_src_start) || + IN6_IS_ADDR_MULTICAST(&cmd->nat_src_end) || + IN6_IS_ADDR_UNSPECIFIED(&cmd->nat_src_start) || + IN6_IS_ADDR_UNSPECIFIED(&cmd->nat_src_end) || + IN6_IS_ADDR_V4MAPPED(&cmd->nat_src_start) || + IN6_IS_ADDR_V4MAPPED(&cmd->nat_src_end)) { + return (EINVAL); + } + + if ((num_src = num_nat_src_v6(&cmd->nat_src_start, + &cmd->nat_src_end)) < 0 || + num_src > ILB_MAX_NAT_SRC) { + return (EINVAL); + } + } + } + + mutex_enter(&ilbs->ilbs_g_lock); + if (ilbs->ilbs_g_hash == NULL) + ilb_rule_hash_init(ilbs); + if (ilbs->ilbs_c2s_conn_hash == NULL) { + ASSERT(ilbs->ilbs_s2c_conn_hash == NULL); + ilb_conn_hash_init(ilbs); + ilb_nat_src_init(ilbs); + } + + /* Make sure that the new rule does not duplicate an existing one. */ + if (ilb_match_rule(ilbs, zoneid, cmd->name, cmd->ip_ver, cmd->proto, + min_port, max_port, &cmd->vip)) { + mutex_exit(&ilbs->ilbs_g_lock); + return (EEXIST); + } + + rule = kmem_zalloc(sizeof (ilb_rule_t), KM_NOSLEEP); + if (rule == NULL) { + mutex_exit(&ilbs->ilbs_g_lock); + return (ENOMEM); + } + + /* ir_name is all 0 to begin with */ + (void) memcpy(rule->ir_name, cmd->name, ILB_RULE_NAMESZ - 1); + + rule->ir_ks_instance = atomic_add_int_nv(&ilb_kstat_instance, 1); + stackid = (netstackid_t)(uintptr_t)ilbs->ilbs_ksp->ks_private; + if ((rule->ir_ksp = ilb_rule_kstat_init(stackid, rule)) == NULL) { + ret = ENOMEM; + goto error; + } + + if (cmd->topo == ILB_TOPO_IMPL_NAT) { + rule->ir_nat_src_start = cmd->nat_src_start; + rule->ir_nat_src_end = cmd->nat_src_end; + } + + rule->ir_ipver = cmd->ip_ver; + rule->ir_proto = cmd->proto; + rule->ir_topo = cmd->topo; + + rule->ir_min_port = min_port; + rule->ir_max_port = max_port; + if (rule->ir_min_port != rule->ir_max_port) + rule->ir_port_range = B_TRUE; + else + rule->ir_port_range = B_FALSE; + + rule->ir_zoneid = zoneid; + + rule->ir_target_v6 = cmd->vip; + rule->ir_servers = NULL; + + /* + * The default connection drain timeout is indefinite (value 0), + * meaning we will wait for all connections to finish. So we + * can assign cmd->conn_drain_timeout to it directly. + */ + rule->ir_conn_drain_timeout = cmd->conn_drain_timeout; + if (cmd->nat_expiry != 0) { + rule->ir_nat_expiry = cmd->nat_expiry; + } else { + switch (rule->ir_proto) { + case IPPROTO_TCP: + rule->ir_nat_expiry = ilb_conn_tcp_expiry; + break; + case IPPROTO_UDP: + rule->ir_nat_expiry = ilb_conn_udp_expiry; + break; + default: + cmn_err(CE_PANIC, "data corruption: wrong ir_proto: %p", + (void *)rule); + break; + } + } + if (cmd->sticky_expiry != 0) + rule->ir_sticky_expiry = cmd->sticky_expiry; + else + rule->ir_sticky_expiry = ilb_sticky_expiry; + + if (cmd->flags & ILB_RULE_STICKY) { + rule->ir_flags |= ILB_RULE_STICKY; + rule->ir_sticky_mask = cmd->sticky_mask; + if (ilbs->ilbs_sticky_hash == NULL) + ilb_sticky_hash_init(ilbs); + } + if (cmd->flags & ILB_RULE_ENABLED) + rule->ir_flags |= ILB_RULE_ENABLED; + + mutex_init(&rule->ir_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&rule->ir_cv, NULL, CV_DEFAULT, NULL); + + rule->ir_refcnt = 1; + + switch (cmd->algo) { + case ILB_ALG_IMPL_ROUNDROBIN: + if ((rule->ir_alg = ilb_alg_rr_init(rule, NULL)) == NULL) { + ret = ENOMEM; + goto error; + } + rule->ir_alg_type = ILB_ALG_IMPL_ROUNDROBIN; + break; + case ILB_ALG_IMPL_HASH_IP: + case ILB_ALG_IMPL_HASH_IP_SPORT: + case ILB_ALG_IMPL_HASH_IP_VIP: + if ((rule->ir_alg = ilb_alg_hash_init(rule, + &cmd->algo)) == NULL) { + ret = ENOMEM; + goto error; + } + rule->ir_alg_type = cmd->algo; + break; + default: + ret = EINVAL; + goto error; + } + + /* Add it to the global list and hash array at the end. */ + ilb_rule_g_add(ilbs, rule); + ilb_rule_hash_add(ilbs, rule, &cmd->vip); + + mutex_exit(&ilbs->ilbs_g_lock); + + return (0); + +error: + mutex_exit(&ilbs->ilbs_g_lock); + if (rule->ir_ksp != NULL) { + /* stackid must be initialized if ir_ksp != NULL */ + kstat_delete_netstack(rule->ir_ksp, stackid); + } + kmem_free(rule, sizeof (ilb_rule_t)); + return (ret); +} + +/* + * The final part in deleting a rule. Either called directly or by the + * taskq dispatched. + */ +static void +ilb_rule_del_common(ilb_stack_t *ilbs, ilb_rule_t *tmp_rule) +{ + netstackid_t stackid; + ilb_server_t *server; + + stackid = (netstackid_t)(uintptr_t)ilbs->ilbs_ksp->ks_private; + + /* + * Let the algorithm know that the rule is going away. The + * algorithm fini routine will free all its resources with this + * rule. + */ + tmp_rule->ir_alg->ilb_alg_fini(&tmp_rule->ir_alg); + + while ((server = tmp_rule->ir_servers) != NULL) { + mutex_enter(&server->iser_lock); + ilb_destroy_nat_src(&server->iser_nat_src); + if (tmp_rule->ir_conn_drain_timeout != 0) { + /* + * The garbage collection thread checks this value + * without grabing a lock. So we need to use + * atomic_swap_64() to make sure that the value seen + * by gc thread is intact. + */ + (void) atomic_swap_64( + (uint64_t *)&server->iser_die_time, lbolt64 + + SEC_TO_TICK(tmp_rule->ir_conn_drain_timeout)); + } + while (server->iser_refcnt > 1) + cv_wait(&server->iser_cv, &server->iser_lock); + tmp_rule->ir_servers = server->iser_next; + kstat_delete_netstack(server->iser_ksp, stackid); + kmem_free(server, sizeof (ilb_server_t)); + } + + ASSERT(tmp_rule->ir_ksp != NULL); + kstat_delete_netstack(tmp_rule->ir_ksp, stackid); + + kmem_free(tmp_rule, sizeof (ilb_rule_t)); +} + +/* The routine executed by the delayed rule taskq. */ +static void +ilb_rule_del_tq(void *arg) +{ + ilb_stack_t *ilbs = ((ilb_rule_tq_t *)arg)->ilbs; + ilb_rule_t *rule = ((ilb_rule_tq_t *)arg)->rule; + + mutex_enter(&rule->ir_lock); + while (rule->ir_refcnt > 1) + cv_wait(&rule->ir_cv, &rule->ir_lock); + ilb_rule_del_common(ilbs, rule); + kmem_free(arg, sizeof (ilb_rule_tq_t)); +} + +/* Routine to delete a rule. */ +int +ilb_rule_del(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name) +{ + ilb_rule_t *tmp_rule; + ilb_rule_tq_t *arg; + int err; + + mutex_enter(&ilbs->ilbs_g_lock); + if ((tmp_rule = ilb_find_rule_locked(ilbs, zoneid, name, + &err)) == NULL) { + mutex_exit(&ilbs->ilbs_g_lock); + return (err); + } + + /* + * First remove the rule from the hash array and the global list so + * that no one can find this rule any more. + */ + ilb_rule_hash_del(tmp_rule); + ilb_rule_g_del(ilbs, tmp_rule); + mutex_exit(&ilbs->ilbs_g_lock); + ILB_RULE_REFRELE(tmp_rule); + + /* + * Now no one can find this rule, we can remove it once all + * references to it are dropped and all references to the list + * of servers are dropped. So dispatch a task to finish the deletion. + * We do this instead of letting the last one referencing the + * rule do it. The reason is that the last one may be the + * interrupt thread. We want to minimize the work it needs to + * do. Rule deletion is not a critical task so it can be delayed. + */ + arg = kmem_alloc(sizeof (ilb_rule_tq_t), KM_SLEEP); + arg->ilbs = ilbs; + arg->rule = tmp_rule; + (void) taskq_dispatch(ilbs->ilbs_rule_taskq, ilb_rule_del_tq, arg, + TQ_SLEEP); + + return (0); +} + +/* + * Given an IP address, check to see if there is a rule using this + * as the VIP. It can be used to check if we need to drop a fragment. + */ +boolean_t +ilb_rule_match_vip_v6(ilb_stack_t *ilbs, in6_addr_t *vip, ilb_rule_t **ret_rule) +{ + int i; + ilb_rule_t *rule; + boolean_t ret = B_FALSE; + + i = ILB_RULE_HASH((uint8_t *)&vip->s6_addr32[3], + ilbs->ilbs_rule_hash_size); + mutex_enter(&ilbs->ilbs_g_hash[i].ilb_hash_lock); + for (rule = ilbs->ilbs_g_hash[i].ilb_hash_rule; rule != NULL; + rule = rule->ir_hash_next) { + if (IN6_ARE_ADDR_EQUAL(vip, &rule->ir_target_v6)) { + mutex_enter(&rule->ir_lock); + if (rule->ir_flags & ILB_RULE_BUSY) { + mutex_exit(&rule->ir_lock); + break; + } + if (ret_rule != NULL) { + rule->ir_refcnt++; + mutex_exit(&rule->ir_lock); + *ret_rule = rule; + } else { + mutex_exit(&rule->ir_lock); + } + ret = B_TRUE; + break; + } + } + mutex_exit(&ilbs->ilbs_g_hash[i].ilb_hash_lock); + return (ret); +} + +boolean_t +ilb_rule_match_vip_v4(ilb_stack_t *ilbs, ipaddr_t addr, ilb_rule_t **ret_rule) +{ + int i; + ilb_rule_t *rule; + boolean_t ret = B_FALSE; + + i = ILB_RULE_HASH((uint8_t *)&addr, ilbs->ilbs_rule_hash_size); + mutex_enter(&ilbs->ilbs_g_hash[i].ilb_hash_lock); + for (rule = ilbs->ilbs_g_hash[i].ilb_hash_rule; rule != NULL; + rule = rule->ir_hash_next) { + if (rule->ir_target_v6.s6_addr32[3] == addr) { + mutex_enter(&rule->ir_lock); + if (rule->ir_flags & ILB_RULE_BUSY) { + mutex_exit(&rule->ir_lock); + break; + } + if (ret_rule != NULL) { + rule->ir_refcnt++; + mutex_exit(&rule->ir_lock); + *ret_rule = rule; + } else { + mutex_exit(&rule->ir_lock); + } + ret = B_TRUE; + break; + } + } + mutex_exit(&ilbs->ilbs_g_hash[i].ilb_hash_lock); + return (ret); +} + +static ilb_rule_t * +ilb_find_rule_locked(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name, + int *err) +{ + ilb_rule_t *tmp_rule; + + ASSERT(mutex_owned(&ilbs->ilbs_g_lock)); + + for (tmp_rule = ilbs->ilbs_rule_head; tmp_rule != NULL; + tmp_rule = tmp_rule->ir_next) { + if (tmp_rule->ir_zoneid != zoneid) + continue; + if (strcasecmp(tmp_rule->ir_name, name) == 0) { + mutex_enter(&tmp_rule->ir_lock); + if (tmp_rule->ir_flags & ILB_RULE_BUSY) { + mutex_exit(&tmp_rule->ir_lock); + *err = EINPROGRESS; + return (NULL); + } + tmp_rule->ir_refcnt++; + mutex_exit(&tmp_rule->ir_lock); + *err = 0; + return (tmp_rule); + } + } + *err = ENOENT; + return (NULL); +} + +/* To find a rule with a given name and zone in the global rule list. */ +ilb_rule_t * +ilb_find_rule(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name, + int *err) +{ + ilb_rule_t *tmp_rule; + + mutex_enter(&ilbs->ilbs_g_lock); + tmp_rule = ilb_find_rule_locked(ilbs, zoneid, name, err); + mutex_exit(&ilbs->ilbs_g_lock); + return (tmp_rule); +} + +/* Try to match the given packet info and zone ID with a rule. */ +static boolean_t +ilb_match_rule(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name, int l3, + int l4, in_port_t min_port, in_port_t max_port, const in6_addr_t *addr) +{ + ilb_rule_t *tmp_rule; + + ASSERT(mutex_owned(&ilbs->ilbs_g_lock)); + + for (tmp_rule = ilbs->ilbs_rule_head; tmp_rule != NULL; + tmp_rule = tmp_rule->ir_next) { + if (tmp_rule->ir_zoneid != zoneid) + continue; + + /* + * We don't allow the same name in different rules even if all + * the other rule components are different. + */ + if (strcasecmp(tmp_rule->ir_name, name) == 0) + return (B_TRUE); + + if (tmp_rule->ir_ipver != l3 || tmp_rule->ir_proto != l4) + continue; + + /* + * ir_min_port and ir_max_port are the same if ir_port_range + * is false. In this case, if the ir_min|max_port (same) is + * outside of the given port range, it is OK. In other cases, + * check if min and max port are outside a rule's range. + */ + if (tmp_rule->ir_max_port < min_port || + tmp_rule->ir_min_port > max_port) { + continue; + } + + /* + * If l3 is IPv4, the addr passed in is assumed to be + * mapped address. + */ + if (V6_OR_V4_INADDR_ANY(*addr) || + V6_OR_V4_INADDR_ANY(tmp_rule->ir_target_v6) || + IN6_ARE_ADDR_EQUAL(addr, &tmp_rule->ir_target_v6)) { + return (B_TRUE); + } + } + return (B_FALSE); +} + +int +ilb_rule_enable(ilb_stack_t *ilbs, zoneid_t zoneid, + const char *rule_name, ilb_rule_t *in_rule) +{ + ilb_rule_t *rule; + int err; + + ASSERT((in_rule == NULL && rule_name != NULL) || + (in_rule != NULL && rule_name == NULL)); + if ((rule = in_rule) == NULL) { + if ((rule = ilb_find_rule(ilbs, zoneid, rule_name, + &err)) == NULL) { + return (err); + } + } + mutex_enter(&rule->ir_lock); + rule->ir_flags |= ILB_RULE_ENABLED; + mutex_exit(&rule->ir_lock); + + /* Only refrele if the rule is passed in. */ + if (in_rule == NULL) + ILB_RULE_REFRELE(rule); + return (0); +} + +int +ilb_rule_disable(ilb_stack_t *ilbs, zoneid_t zoneid, + const char *rule_name, ilb_rule_t *in_rule) +{ + ilb_rule_t *rule; + int err; + + ASSERT((in_rule == NULL && rule_name != NULL) || + (in_rule != NULL && rule_name == NULL)); + if ((rule = in_rule) == NULL) { + if ((rule = ilb_find_rule(ilbs, zoneid, rule_name, + &err)) == NULL) { + return (err); + } + } + mutex_enter(&rule->ir_lock); + rule->ir_flags &= ~ILB_RULE_ENABLED; + mutex_exit(&rule->ir_lock); + + /* Only refrele if the rule is passed in. */ + if (in_rule == NULL) + ILB_RULE_REFRELE(rule); + return (0); +} + +/* + * XXX We should probably have a walker function to walk all rules. For + * now, just add a simple loop for enable/disable/del. + */ +void +ilb_rule_enable_all(ilb_stack_t *ilbs, zoneid_t zoneid) +{ + ilb_rule_t *rule; + + mutex_enter(&ilbs->ilbs_g_lock); + for (rule = ilbs->ilbs_rule_head; rule != NULL; rule = rule->ir_next) { + if (rule->ir_zoneid != zoneid) + continue; + /* + * No need to hold the rule as we are holding the global + * lock so it won't go away. Ignore the return value here + * as the rule is provided so the call cannot fail. + */ + (void) ilb_rule_enable(ilbs, zoneid, NULL, rule); + } + mutex_exit(&ilbs->ilbs_g_lock); +} + +void +ilb_rule_disable_all(ilb_stack_t *ilbs, zoneid_t zoneid) +{ + ilb_rule_t *rule; + + mutex_enter(&ilbs->ilbs_g_lock); + for (rule = ilbs->ilbs_rule_head; rule != NULL; + rule = rule->ir_next) { + if (rule->ir_zoneid != zoneid) + continue; + (void) ilb_rule_disable(ilbs, zoneid, NULL, rule); + } + mutex_exit(&ilbs->ilbs_g_lock); +} + +void +ilb_rule_del_all(ilb_stack_t *ilbs, zoneid_t zoneid) +{ + ilb_rule_t *rule; + ilb_rule_tq_t *arg; + + mutex_enter(&ilbs->ilbs_g_lock); + while ((rule = ilbs->ilbs_rule_head) != NULL) { + if (rule->ir_zoneid != zoneid) + continue; + ilb_rule_hash_del(rule); + ilb_rule_g_del(ilbs, rule); + mutex_exit(&ilbs->ilbs_g_lock); + + arg = kmem_alloc(sizeof (ilb_rule_tq_t), KM_SLEEP); + arg->ilbs = ilbs; + arg->rule = rule; + (void) taskq_dispatch(ilbs->ilbs_rule_taskq, ilb_rule_del_tq, + arg, TQ_SLEEP); + + mutex_enter(&ilbs->ilbs_g_lock); + } + mutex_exit(&ilbs->ilbs_g_lock); +} + +/* + * This is just an optimization, so don't grab the global lock. The + * worst case is that we missed a couple packets. + */ +boolean_t +ilb_has_rules(ilb_stack_t *ilbs) +{ + return (ilbs->ilbs_rule_head != NULL); +} + + +static int +ilb_server_toggle(ilb_stack_t *ilbs, zoneid_t zoneid, const char *rule_name, + ilb_rule_t *rule, in6_addr_t *addr, boolean_t enable) +{ + ilb_server_t *tmp_server; + int ret; + + ASSERT((rule == NULL && rule_name != NULL) || + (rule != NULL && rule_name == NULL)); + + if (rule == NULL) { + if ((rule = ilb_find_rule(ilbs, zoneid, rule_name, + &ret)) == NULL) { + return (ret); + } + } + + /* Once we get a hold on the rule, no server can be added/deleted. */ + for (tmp_server = rule->ir_servers; tmp_server != NULL; + tmp_server = tmp_server->iser_next) { + if (IN6_ARE_ADDR_EQUAL(&tmp_server->iser_addr_v6, addr)) + break; + } + if (tmp_server == NULL) { + ret = ENOENT; + goto done; + } + + if (enable) { + ret = rule->ir_alg->ilb_alg_server_enable(tmp_server, + rule->ir_alg->ilb_alg_data); + if (ret == 0) { + tmp_server->iser_enabled = B_TRUE; + tmp_server->iser_die_time = 0; + } + } else { + ret = rule->ir_alg->ilb_alg_server_disable(tmp_server, + rule->ir_alg->ilb_alg_data); + if (ret == 0) { + tmp_server->iser_enabled = B_FALSE; + if (rule->ir_conn_drain_timeout != 0) { + (void) atomic_swap_64( + (uint64_t *)&tmp_server->iser_die_time, + lbolt64 + SEC_TO_TICK( + rule->ir_conn_drain_timeout)); + } + } + } + +done: + if (rule_name != NULL) + ILB_RULE_REFRELE(rule); + return (ret); +} +int +ilb_server_enable(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name, + ilb_rule_t *rule, in6_addr_t *addr) +{ + return (ilb_server_toggle(ilbs, zoneid, name, rule, addr, B_TRUE)); +} + +int +ilb_server_disable(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name, + ilb_rule_t *rule, in6_addr_t *addr) +{ + return (ilb_server_toggle(ilbs, zoneid, name, rule, addr, B_FALSE)); +} + +/* + * Add a back end server to a rule. If the address is IPv4, it is assumed + * to be passed in as a mapped address. + */ +int +ilb_server_add(ilb_stack_t *ilbs, ilb_rule_t *rule, ilb_server_info_t *info) +{ + ilb_server_t *server; + netstackid_t stackid; + int ret = 0; + in_port_t min_port, max_port; + in_port_t range; + + /* Port is passed in network byte order. */ + min_port = ntohs(info->min_port); + max_port = ntohs(info->max_port); + if (min_port > max_port) + return (EINVAL); + + /* min_port == 0 means "all ports". Make it so */ + if (min_port == 0) { + min_port = 1; + max_port = 65535; + } + range = max_port - min_port; + + mutex_enter(&rule->ir_lock); + /* If someone is already doing server add/del, sleeps and wait. */ + while (rule->ir_flags & ILB_RULE_BUSY) { + if (cv_wait_sig(&rule->ir_cv, &rule->ir_lock) == 0) { + mutex_exit(&rule->ir_lock); + return (EINTR); + } + } + + /* + * Set the rule to be busy to make sure that no new packet can + * use this rule. + */ + rule->ir_flags |= ILB_RULE_BUSY; + + /* Now wait for all other guys to finish their work. */ + while (rule->ir_refcnt > 2) { + if (cv_wait_sig(&rule->ir_cv, &rule->ir_lock) == 0) { + mutex_exit(&rule->ir_lock); + ret = EINTR; + goto end; + } + } + mutex_exit(&rule->ir_lock); + + /* Sanity checks... */ + if ((IN6_IS_ADDR_V4MAPPED(&info->addr) && + rule->ir_ipver != IPPROTO_IP) || + (!IN6_IS_ADDR_V4MAPPED(&info->addr) && + rule->ir_ipver != IPPROTO_IPV6)) { + ret = EINVAL; + goto end; + } + + /* + * Check for valid port range. + * + * For DSR, there can be no port shifting. Hence the server + * specification must be the same as the rule's. + * + * For half-NAT/NAT, the range must either be 0 (port collapsing) or + * it must be equal to the same value as the rule port range. + * + */ + if (rule->ir_topo == ILB_TOPO_IMPL_DSR) { + if (rule->ir_max_port != max_port || + rule->ir_min_port != min_port) { + ret = EINVAL; + goto end; + } + } else { + if ((range != rule->ir_max_port - rule->ir_min_port) && + range != 0) { + ret = EINVAL; + goto end; + } + } + + /* Check for duplicate. */ + for (server = rule->ir_servers; server != NULL; + server = server->iser_next) { + if (IN6_ARE_ADDR_EQUAL(&server->iser_addr_v6, &info->addr) || + strcasecmp(server->iser_name, info->name) == 0) { + break; + } + } + if (server != NULL) { + ret = EEXIST; + goto end; + } + + if ((server = kmem_zalloc(sizeof (ilb_server_t), KM_NOSLEEP)) == NULL) { + ret = ENOMEM; + goto end; + } + + (void) memcpy(server->iser_name, info->name, ILB_SERVER_NAMESZ - 1); + (void) inet_ntop(AF_INET6, &info->addr, server->iser_ip_addr, + sizeof (server->iser_ip_addr)); + stackid = (netstackid_t)(uintptr_t)ilbs->ilbs_ksp->ks_private; + server->iser_ksp = ilb_server_kstat_init(stackid, rule, server); + if (server->iser_ksp == NULL) { + kmem_free(server, sizeof (ilb_server_t)); + ret = EINVAL; + goto end; + } + + server->iser_stackid = stackid; + server->iser_addr_v6 = info->addr; + server->iser_min_port = min_port; + server->iser_max_port = max_port; + if (min_port != max_port) + server->iser_port_range = B_TRUE; + else + server->iser_port_range = B_FALSE; + + /* + * If the rule uses NAT, find/create the NAT source entry to use + * for this server. + */ + if (rule->ir_topo == ILB_TOPO_IMPL_NAT) { + in_port_t port; + + /* + * If the server uses a port range, our port allocation + * scheme needs to treat it as a wildcard. Refer to the + * comments in ilb_nat.c about the scheme. + */ + if (server->iser_port_range) + port = 0; + else + port = server->iser_min_port; + + if ((ret = ilb_create_nat_src(ilbs, &server->iser_nat_src, + &server->iser_addr_v6, port, &rule->ir_nat_src_start, + num_nat_src_v6(&rule->ir_nat_src_start, + &rule->ir_nat_src_end))) != 0) { + kstat_delete_netstack(server->iser_ksp, stackid); + kmem_free(server, sizeof (ilb_server_t)); + goto end; + } + } + + /* + * The iser_lock is only used to protect iser_refcnt. All the other + * fields in ilb_server_t should not change, except for iser_enabled. + * The worst thing that can happen if iser_enabled is messed up is + * that one or two packets may not be load balanced to a server + * correctly. + */ + server->iser_refcnt = 1; + server->iser_enabled = info->flags & ILB_SERVER_ENABLED ? B_TRUE : + B_FALSE; + mutex_init(&server->iser_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&server->iser_cv, NULL, CV_DEFAULT, NULL); + + /* Let the load balancing algorithm know about the addition. */ + ASSERT(rule->ir_alg != NULL); + if ((ret = rule->ir_alg->ilb_alg_server_add(server, + rule->ir_alg->ilb_alg_data)) != 0) { + kstat_delete_netstack(server->iser_ksp, stackid); + kmem_free(server, sizeof (ilb_server_t)); + goto end; + } + + /* + * No need to hold ir_lock since no other thread should manipulate + * the following fields until ILB_RULE_BUSY is cleared. + */ + if (rule->ir_servers == NULL) { + server->iser_next = NULL; + } else { + server->iser_next = rule->ir_servers; + } + rule->ir_servers = server; + ILB_R_KSTAT(rule, num_servers); + +end: + mutex_enter(&rule->ir_lock); + rule->ir_flags &= ~ILB_RULE_BUSY; + cv_signal(&rule->ir_cv); + mutex_exit(&rule->ir_lock); + return (ret); +} + +/* The routine executed by the delayed rule processing taskq. */ +static void +ilb_server_del_tq(void *arg) +{ + ilb_server_t *server = (ilb_server_t *)arg; + + mutex_enter(&server->iser_lock); + while (server->iser_refcnt > 1) + cv_wait(&server->iser_cv, &server->iser_lock); + kstat_delete_netstack(server->iser_ksp, server->iser_stackid); + kmem_free(server, sizeof (ilb_server_t)); +} + +/* + * Delete a back end server from a rule. If the address is IPv4, it is assumed + * to be passed in as a mapped address. + */ +int +ilb_server_del(ilb_stack_t *ilbs, zoneid_t zoneid, const char *rule_name, + ilb_rule_t *rule, in6_addr_t *addr) +{ + ilb_server_t *server; + ilb_server_t *prev_server; + int ret = 0; + + ASSERT((rule == NULL && rule_name != NULL) || + (rule != NULL && rule_name == NULL)); + if (rule == NULL) { + if ((rule = ilb_find_rule(ilbs, zoneid, rule_name, + &ret)) == NULL) { + return (ret); + } + } + + mutex_enter(&rule->ir_lock); + /* If someone is already doing server add/del, sleeps and wait. */ + while (rule->ir_flags & ILB_RULE_BUSY) { + if (cv_wait_sig(&rule->ir_cv, &rule->ir_lock) == 0) { + if (rule_name != NULL) { + if (--rule->ir_refcnt <= 2) + cv_signal(&rule->ir_cv); + } + mutex_exit(&rule->ir_lock); + return (EINTR); + } + } + /* + * Set the rule to be busy to make sure that no new packet can + * use this rule. + */ + rule->ir_flags |= ILB_RULE_BUSY; + + /* Now wait for all other guys to finish their work. */ + while (rule->ir_refcnt > 2) { + if (cv_wait_sig(&rule->ir_cv, &rule->ir_lock) == 0) { + mutex_exit(&rule->ir_lock); + ret = EINTR; + goto end; + } + } + mutex_exit(&rule->ir_lock); + + prev_server = NULL; + for (server = rule->ir_servers; server != NULL; + prev_server = server, server = server->iser_next) { + if (IN6_ARE_ADDR_EQUAL(&server->iser_addr_v6, addr)) + break; + } + if (server == NULL) { + ret = ENOENT; + goto end; + } + + /* + * Let the load balancing algorithm know about the removal. + * The algorithm may disallow the removal... + */ + if ((ret = rule->ir_alg->ilb_alg_server_del(server, + rule->ir_alg->ilb_alg_data)) != 0) { + goto end; + } + + if (prev_server == NULL) + rule->ir_servers = server->iser_next; + else + prev_server->iser_next = server->iser_next; + + ILB_R_KSTAT_UPDATE(rule, num_servers, -1); + + /* + * Mark the server as disabled so that if there is any sticky cache + * using this server around, it won't be used. + */ + server->iser_enabled = B_FALSE; + + mutex_enter(&server->iser_lock); + + /* + * De-allocate the NAT source array. The indiviual ilb_nat_src_entry_t + * may not go away if there is still a conn using it. The NAT source + * timer will do the garbage collection. + */ + ilb_destroy_nat_src(&server->iser_nat_src); + + /* If there is a hard limit on when a server should die, set it. */ + if (rule->ir_conn_drain_timeout != 0) { + (void) atomic_swap_64((uint64_t *)&server->iser_die_time, + lbolt64 + SEC_TO_TICK(rule->ir_conn_drain_timeout)); + } + + if (server->iser_refcnt > 1) { + (void) taskq_dispatch(ilbs->ilbs_rule_taskq, ilb_server_del_tq, + server, TQ_SLEEP); + mutex_exit(&server->iser_lock); + } else { + kstat_delete_netstack(server->iser_ksp, server->iser_stackid); + kmem_free(server, sizeof (ilb_server_t)); + } + +end: + mutex_enter(&rule->ir_lock); + rule->ir_flags &= ~ILB_RULE_BUSY; + if (rule_name != NULL) + rule->ir_refcnt--; + cv_signal(&rule->ir_cv); + mutex_exit(&rule->ir_lock); + return (ret); +} + +/* + * First check if the destination of the ICMP message matches a VIP of + * a rule. If it does not, just return ILB_PASSED. + * + * If the destination matches a VIP: + * + * For ICMP_ECHO_REQUEST, generate a response on behalf of the back end + * server. + * + * For ICMP_DEST_UNREACHABLE fragmentation needed, check inside the payload + * and see which back end server we should send this message to. And we + * need to do NAT on both the payload message and the outside IP packet. + * + * For other ICMP messages, drop them. + */ +/* ARGSUSED */ +static int +ilb_icmp_v4(ilb_stack_t *ilbs, ill_t *ill, mblk_t *mp, ipha_t *ipha, + icmph_t *icmph, ipaddr_t *lb_dst) +{ + ipaddr_t vip; + ilb_rule_t *rule; + in6_addr_t addr6; + + if (!ilb_rule_match_vip_v4(ilbs, ipha->ipha_dst, &rule)) + return (ILB_PASSED); + + + if ((uint8_t *)icmph + sizeof (icmph_t) > mp->b_wptr) { + ILB_R_KSTAT(rule, icmp_dropped); + ILB_RULE_REFRELE(rule); + return (ILB_DROPPED); + } + + switch (icmph->icmph_type) { + case ICMP_ECHO_REQUEST: + ILB_R_KSTAT(rule, icmp_echo_processed); + ILB_RULE_REFRELE(rule); + + icmph->icmph_type = ICMP_ECHO_REPLY; + icmph->icmph_checksum = 0; + icmph->icmph_checksum = IP_CSUM(mp, IPH_HDR_LENGTH(ipha), 0); + ipha->ipha_ttl = + ilbs->ilbs_netstack->netstack_ip->ips_ip_def_ttl; + *lb_dst = ipha->ipha_src; + vip = ipha->ipha_dst; + ipha->ipha_dst = ipha->ipha_src; + ipha->ipha_src = vip; + return (ILB_BALANCED); + case ICMP_DEST_UNREACHABLE: { + int ret; + + if (icmph->icmph_code != ICMP_FRAGMENTATION_NEEDED) { + ILB_R_KSTAT(rule, icmp_dropped); + ILB_RULE_REFRELE(rule); + return (ILB_DROPPED); + } + if (ilb_check_icmp_conn(ilbs, mp, IPPROTO_IP, ipha, icmph, + &addr6)) { + ILB_R_KSTAT(rule, icmp_2big_processed); + ret = ILB_BALANCED; + } else { + ILB_R_KSTAT(rule, icmp_2big_dropped); + ret = ILB_DROPPED; + } + ILB_RULE_REFRELE(rule); + IN6_V4MAPPED_TO_IPADDR(&addr6, *lb_dst); + return (ret); + } + default: + ILB_R_KSTAT(rule, icmp_dropped); + ILB_RULE_REFRELE(rule); + return (ILB_DROPPED); + } +} + +/* ARGSUSED */ +static int +ilb_icmp_v6(ilb_stack_t *ilbs, ill_t *ill, mblk_t *mp, ip6_t *ip6h, + icmp6_t *icmp6, in6_addr_t *lb_dst) +{ + ilb_rule_t *rule; + + if (!ilb_rule_match_vip_v6(ilbs, &ip6h->ip6_dst, &rule)) + return (ILB_PASSED); + + if ((uint8_t *)icmp6 + sizeof (icmp6_t) > mp->b_wptr) { + ILB_R_KSTAT(rule, icmp_dropped); + ILB_RULE_REFRELE(rule); + return (ILB_DROPPED); + } + + switch (icmp6->icmp6_type) { + case ICMP6_ECHO_REQUEST: { + int hdr_len; + + ILB_R_KSTAT(rule, icmp_echo_processed); + ILB_RULE_REFRELE(rule); + + icmp6->icmp6_type = ICMP6_ECHO_REPLY; + icmp6->icmp6_cksum = ip6h->ip6_plen; + hdr_len = (char *)icmp6 - (char *)ip6h; + icmp6->icmp6_cksum = IP_CSUM(mp, hdr_len, + ilb_pseudo_sum_v6(ip6h, IPPROTO_ICMPV6)); + ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; + ip6h->ip6_hops = + ilbs->ilbs_netstack->netstack_ip->ips_ipv6_def_hops; + *lb_dst = ip6h->ip6_src; + ip6h->ip6_src = ip6h->ip6_dst; + ip6h->ip6_dst = *lb_dst; + return (ILB_BALANCED); + } + case ICMP6_PACKET_TOO_BIG: { + int ret; + + if (ilb_check_icmp_conn(ilbs, mp, IPPROTO_IPV6, ip6h, icmp6, + lb_dst)) { + ILB_R_KSTAT(rule, icmp_2big_processed); + ret = ILB_BALANCED; + } else { + ILB_R_KSTAT(rule, icmp_2big_dropped); + ret = ILB_DROPPED; + } + ILB_RULE_REFRELE(rule); + return (ret); + } + default: + ILB_R_KSTAT(rule, icmp_dropped); + ILB_RULE_REFRELE(rule); + return (ILB_DROPPED); + } +} + +/* + * Common routine to check an incoming packet and decide what to do with it. + * called by ilb_check_v4|v6(). + */ +static int +ilb_check(ilb_stack_t *ilbs, ill_t *ill, mblk_t *mp, in6_addr_t *src, + in6_addr_t *dst, int l3, int l4, void *iph, uint8_t *tph, uint32_t pkt_len, + in6_addr_t *lb_dst) +{ + in_port_t sport, dport; + tcpha_t *tcph; + udpha_t *udph; + ilb_rule_t *rule; + ilb_server_t *server; + boolean_t balanced; + struct ilb_sticky_s *s = NULL; + int ret; + uint32_t ip_sum, tp_sum; + ilb_nat_info_t info; + uint16_t nat_src_idx; + boolean_t busy; + + /* + * We don't really need to switch here since both protocols's + * ports are at the same offset. Just prepare for future protocol + * specific processing. + */ + switch (l4) { + case IPPROTO_TCP: + if (tph + TCP_MIN_HEADER_LENGTH > mp->b_wptr) + return (ILB_DROPPED); + tcph = (tcpha_t *)tph; + sport = tcph->tha_lport; + dport = tcph->tha_fport; + break; + case IPPROTO_UDP: + if (tph + sizeof (udpha_t) > mp->b_wptr) + return (ILB_DROPPED); + udph = (udpha_t *)tph; + sport = udph->uha_src_port; + dport = udph->uha_dst_port; + break; + default: + return (ILB_PASSED); + } + + /* Fast path, there is an existing conn. */ + if (ilb_check_conn(ilbs, l3, iph, l4, tph, src, dst, sport, dport, + pkt_len, lb_dst)) { + return (ILB_BALANCED); + } + + /* + * If there is no existing connection for the incoming packet, check + * to see if the packet matches a rule. If not, just let IP decide + * what to do with it. + * + * Note: a reply from back end server should not match a rule. A + * reply should match one existing conn. + */ + rule = ilb_rule_hash(ilbs, l3, l4, dst, dport, ill->ill_zoneid, + pkt_len, &busy); + if (rule == NULL) { + /* If the rule is busy, just drop the packet. */ + if (busy) + return (ILB_DROPPED); + else + return (ILB_PASSED); + } + + /* + * The packet matches a rule, use the rule load balance algorithm + * to find a server. + */ + balanced = rule->ir_alg->ilb_alg_lb(src, sport, dst, dport, + rule->ir_alg->ilb_alg_data, &server); + /* + * This can only happen if there is no server in a rule or all + * the servers are currently disabled. + */ + if (!balanced) + goto no_server; + + /* + * If the rule is sticky enabled, we need to check the sticky table. + * If there is a sticky entry for the client, use the previous server + * instead of the one found above (note that both can be the same). + * If there is no entry for that client, add an entry to the sticky + * table. Both the find and add are done in ilb_sticky_find_add() + * to avoid checking for duplicate when adding an entry. + */ + if (rule->ir_flags & ILB_RULE_STICKY) { + in6_addr_t addr; + + V6_MASK_COPY(*src, rule->ir_sticky_mask, addr); + if ((server = ilb_sticky_find_add(ilbs, rule, &addr, server, + &s, &nat_src_idx)) == NULL) { + ILB_R_KSTAT(rule, nomem_pkt_dropped); + ILB_R_KSTAT_UPDATE(rule, nomem_bytes_dropped, pkt_len); + goto no_server; + } + } + + /* + * We are holding a reference on the rule, so the server + * cannot go away. + */ + *lb_dst = server->iser_addr_v6; + ILB_S_KSTAT(server, pkt_processed); + ILB_S_KSTAT_UPDATE(server, bytes_processed, pkt_len); + + switch (rule->ir_topo) { + case ILB_TOPO_IMPL_NAT: { + ilb_nat_src_entry_t *src_ent; + uint16_t *src_idx; + + /* + * We create a cache even if it is not a SYN segment. + * The server should return a RST. When we see the + * RST, we will destroy this cache. But by having + * a cache, we know how to NAT the returned RST. + */ + info.vip = *dst; + info.dport = dport; + info.src = *src; + info.sport = sport; + + /* If stickiness is enabled, use the same source address */ + if (s != NULL) + src_idx = &nat_src_idx; + else + src_idx = NULL; + + if ((src_ent = ilb_alloc_nat_addr(server->iser_nat_src, + &info.nat_src, &info.nat_sport, src_idx)) == NULL) { + if (s != NULL) + ilb_sticky_refrele(s); + ILB_R_KSTAT(rule, pkt_dropped); + ILB_R_KSTAT_UPDATE(rule, bytes_dropped, pkt_len); + ILB_R_KSTAT(rule, noport_pkt_dropped); + ILB_R_KSTAT_UPDATE(rule, noport_bytes_dropped, pkt_len); + ret = ILB_DROPPED; + break; + } + info.src_ent = src_ent; + info.nat_dst = server->iser_addr_v6; + if (rule->ir_port_range && server->iser_port_range) { + info.nat_dport = htons(ntohs(dport) - + rule->ir_min_port + server->iser_min_port); + } else { + info.nat_dport = htons(server->iser_min_port); + } + + /* + * If ilb_conn_add() fails, it will release the reference on + * sticky info and de-allocate the NAT source port allocated + * above. + */ + if (ilb_conn_add(ilbs, rule, server, src, sport, dst, + dport, &info, &ip_sum, &tp_sum, s) != 0) { + ILB_R_KSTAT(rule, pkt_dropped); + ILB_R_KSTAT_UPDATE(rule, bytes_dropped, pkt_len); + ILB_R_KSTAT(rule, nomem_pkt_dropped); + ILB_R_KSTAT_UPDATE(rule, nomem_bytes_dropped, pkt_len); + ret = ILB_DROPPED; + break; + } + ilb_full_nat(l3, iph, l4, tph, &info, ip_sum, tp_sum, B_TRUE); + ret = ILB_BALANCED; + break; + } + case ILB_TOPO_IMPL_HALF_NAT: + info.vip = *dst; + info.nat_dst = server->iser_addr_v6; + info.dport = dport; + if (rule->ir_port_range && server->iser_port_range) { + info.nat_dport = htons(ntohs(dport) - + rule->ir_min_port + server->iser_min_port); + } else { + info.nat_dport = htons(server->iser_min_port); + } + + if (ilb_conn_add(ilbs, rule, server, src, sport, dst, + dport, &info, &ip_sum, &tp_sum, s) != 0) { + ILB_R_KSTAT(rule, pkt_dropped); + ILB_R_KSTAT_UPDATE(rule, bytes_dropped, pkt_len); + ILB_R_KSTAT(rule, nomem_pkt_dropped); + ILB_R_KSTAT_UPDATE(rule, nomem_bytes_dropped, pkt_len); + ret = ILB_DROPPED; + break; + } + ilb_half_nat(l3, iph, l4, tph, &info, ip_sum, tp_sum, B_TRUE); + + ret = ILB_BALANCED; + break; + case ILB_TOPO_IMPL_DSR: + /* + * By decrementing the sticky refcnt, the period of + * stickiness (life time of ilb_sticky_t) will be + * from now to (now + default expiry time). + */ + if (s != NULL) + ilb_sticky_refrele(s); + ret = ILB_BALANCED; + break; + default: + cmn_err(CE_PANIC, "data corruption unknown topology: %p", + (void *) rule); + break; + } + ILB_RULE_REFRELE(rule); + return (ret); + +no_server: + /* This can only happen if there is no server available. */ + ILB_R_KSTAT(rule, pkt_dropped); + ILB_R_KSTAT_UPDATE(rule, bytes_dropped, pkt_len); + ILB_RULE_REFRELE(rule); + return (ILB_DROPPED); +} + +int +ilb_check_v4(ilb_stack_t *ilbs, ill_t *ill, mblk_t *mp, ipha_t *ipha, int l4, + uint8_t *tph, ipaddr_t *lb_dst) +{ + in6_addr_t v6_src, v6_dst, v6_lb_dst; + int ret; + + ASSERT(DB_REF(mp) == 1); + + if (l4 == IPPROTO_ICMP) { + return (ilb_icmp_v4(ilbs, ill, mp, ipha, (icmph_t *)tph, + lb_dst)); + } + + IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &v6_src); + IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &v6_dst); + ret = ilb_check(ilbs, ill, mp, &v6_src, &v6_dst, IPPROTO_IP, l4, ipha, + tph, ntohs(ipha->ipha_length), &v6_lb_dst); + if (ret == ILB_BALANCED) + IN6_V4MAPPED_TO_IPADDR(&v6_lb_dst, *lb_dst); + return (ret); +} + +int +ilb_check_v6(ilb_stack_t *ilbs, ill_t *ill, mblk_t *mp, ip6_t *ip6h, int l4, + uint8_t *tph, in6_addr_t *lb_dst) +{ + uint32_t pkt_len; + + ASSERT(DB_REF(mp) == 1); + + if (l4 == IPPROTO_ICMPV6) { + return (ilb_icmp_v6(ilbs, ill, mp, ip6h, (icmp6_t *)tph, + lb_dst)); + } + + pkt_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; + return (ilb_check(ilbs, ill, mp, &ip6h->ip6_src, &ip6h->ip6_dst, + IPPROTO_IPV6, l4, ip6h, tph, pkt_len, lb_dst)); +} + +void +ilb_get_num_rules(ilb_stack_t *ilbs, zoneid_t zoneid, uint32_t *num_rules) +{ + ilb_rule_t *tmp_rule; + + mutex_enter(&ilbs->ilbs_g_lock); + *num_rules = 0; + for (tmp_rule = ilbs->ilbs_rule_head; tmp_rule != NULL; + tmp_rule = tmp_rule->ir_next) { + if (tmp_rule->ir_zoneid == zoneid) + *num_rules += 1; + } + mutex_exit(&ilbs->ilbs_g_lock); +} + +int +ilb_get_num_servers(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name, + uint32_t *num_servers) +{ + ilb_rule_t *rule; + int err; + + if ((rule = ilb_find_rule(ilbs, zoneid, name, &err)) == NULL) + return (err); + *num_servers = rule->ir_kstat.num_servers.value.ui64; + ILB_RULE_REFRELE(rule); + return (0); +} + +int +ilb_get_servers(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name, + ilb_server_info_t *servers, uint32_t *num_servers) +{ + ilb_rule_t *rule; + ilb_server_t *server; + size_t cnt; + int err; + + if ((rule = ilb_find_rule(ilbs, zoneid, name, &err)) == NULL) + return (err); + for (server = rule->ir_servers, cnt = *num_servers; + server != NULL && cnt > 0; + server = server->iser_next, cnt--, servers++) { + (void) memcpy(servers->name, server->iser_name, + ILB_SERVER_NAMESZ); + servers->addr = server->iser_addr_v6; + servers->min_port = htons(server->iser_min_port); + servers->max_port = htons(server->iser_max_port); + servers->flags = server->iser_enabled ? ILB_SERVER_ENABLED : 0; + servers->err = 0; + } + ILB_RULE_REFRELE(rule); + *num_servers -= cnt; + + return (0); +} + +void +ilb_get_rulenames(ilb_stack_t *ilbs, zoneid_t zoneid, uint32_t *num_names, + char *buf) +{ + ilb_rule_t *tmp_rule; + int cnt; + + if (*num_names == 0) + return; + + mutex_enter(&ilbs->ilbs_g_lock); + for (cnt = 0, tmp_rule = ilbs->ilbs_rule_head; tmp_rule != NULL; + tmp_rule = tmp_rule->ir_next) { + if (tmp_rule->ir_zoneid != zoneid) + continue; + + (void) memcpy(buf, tmp_rule->ir_name, ILB_RULE_NAMESZ); + buf += ILB_RULE_NAMESZ; + if (++cnt == *num_names) + break; + } + mutex_exit(&ilbs->ilbs_g_lock); + *num_names = cnt; +} + +int +ilb_rule_list(ilb_stack_t *ilbs, zoneid_t zoneid, ilb_rule_cmd_t *cmd) +{ + ilb_rule_t *rule; + int err; + + if ((rule = ilb_find_rule(ilbs, zoneid, cmd->name, &err)) == NULL) { + return (err); + } + + /* + * Except the enabled flags, none of the following will change + * in the life time of a rule. So we don't hold the mutex when + * reading them. The worst is to report a wrong enabled flags. + */ + cmd->ip_ver = rule->ir_ipver; + cmd->proto = rule->ir_proto; + cmd->min_port = htons(rule->ir_min_port); + cmd->max_port = htons(rule->ir_max_port); + + cmd->vip = rule->ir_target_v6; + cmd->algo = rule->ir_alg_type; + cmd->topo = rule->ir_topo; + + cmd->nat_src_start = rule->ir_nat_src_start; + cmd->nat_src_end = rule->ir_nat_src_end; + + cmd->conn_drain_timeout = rule->ir_conn_drain_timeout; + cmd->nat_expiry = rule->ir_nat_expiry; + cmd->sticky_expiry = rule->ir_sticky_expiry; + + cmd->flags = 0; + if (rule->ir_flags & ILB_RULE_ENABLED) + cmd->flags |= ILB_RULE_ENABLED; + if (rule->ir_flags & ILB_RULE_STICKY) { + cmd->flags |= ILB_RULE_STICKY; + cmd->sticky_mask = rule->ir_sticky_mask; + } + + ILB_RULE_REFRELE(rule); + return (0); +} + +static void * +ilb_stack_init(netstackid_t stackid, netstack_t *ns) +{ + ilb_stack_t *ilbs; + char tq_name[TASKQ_NAMELEN]; + + ilbs = kmem_alloc(sizeof (ilb_stack_t), KM_SLEEP); + ilbs->ilbs_netstack = ns; + + ilbs->ilbs_rule_head = NULL; + ilbs->ilbs_g_hash = NULL; + mutex_init(&ilbs->ilbs_g_lock, NULL, MUTEX_DEFAULT, NULL); + + ilbs->ilbs_kstat = kmem_alloc(sizeof (ilb_g_kstat_t), KM_SLEEP); + if ((ilbs->ilbs_ksp = ilb_kstat_g_init(stackid, ilbs)) == NULL) { + kmem_free(ilbs, sizeof (ilb_stack_t)); + return (NULL); + } + + /* + * ilbs_conn/sticky_hash related info is initialized in + * ilb_conn/sticky_hash_init(). + */ + ilbs->ilbs_conn_taskq = NULL; + ilbs->ilbs_rule_hash_size = ilb_rule_hash_size; + ilbs->ilbs_conn_hash_size = ilb_conn_hash_size; + ilbs->ilbs_c2s_conn_hash = NULL; + ilbs->ilbs_s2c_conn_hash = NULL; + ilbs->ilbs_conn_timer_list = NULL; + + ilbs->ilbs_sticky_hash = NULL; + ilbs->ilbs_sticky_hash_size = ilb_sticky_hash_size; + ilbs->ilbs_sticky_timer_list = NULL; + ilbs->ilbs_sticky_taskq = NULL; + + /* The allocation is done later when there is a rule using NAT mode. */ + ilbs->ilbs_nat_src = NULL; + ilbs->ilbs_nat_src_hash_size = ilb_nat_src_hash_size; + mutex_init(&ilbs->ilbs_nat_src_lock, NULL, MUTEX_DEFAULT, NULL); + ilbs->ilbs_nat_src_tid = 0; + + /* For listing the conn hash table */ + mutex_init(&ilbs->ilbs_conn_list_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&ilbs->ilbs_conn_list_cv, NULL, CV_DEFAULT, NULL); + ilbs->ilbs_conn_list_busy = B_FALSE; + ilbs->ilbs_conn_list_cur = 0; + ilbs->ilbs_conn_list_connp = NULL; + + /* For listing the sticky hash table */ + mutex_init(&ilbs->ilbs_sticky_list_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&ilbs->ilbs_sticky_list_cv, NULL, CV_DEFAULT, NULL); + ilbs->ilbs_sticky_list_busy = B_FALSE; + ilbs->ilbs_sticky_list_cur = 0; + ilbs->ilbs_sticky_list_curp = NULL; + + (void) snprintf(tq_name, sizeof (tq_name), "ilb_rule_taskq_%p", ns); + ilbs->ilbs_rule_taskq = taskq_create(tq_name, ILB_RULE_TASKQ_NUM_THR, + minclsyspri, 1, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC); + + return (ilbs); +} + +/* ARGSUSED */ +static void +ilb_stack_shutdown(netstackid_t stackid, void *arg) +{ + ilb_stack_t *ilbs = (ilb_stack_t *)arg; + ilb_rule_t *tmp_rule; + + ilb_sticky_hash_fini(ilbs); + ilb_conn_hash_fini(ilbs); + mutex_enter(&ilbs->ilbs_g_lock); + while ((tmp_rule = ilbs->ilbs_rule_head) != NULL) { + ilb_rule_hash_del(tmp_rule); + ilb_rule_g_del(ilbs, tmp_rule); + mutex_exit(&ilbs->ilbs_g_lock); + ilb_rule_del_common(ilbs, tmp_rule); + mutex_enter(&ilbs->ilbs_g_lock); + } + mutex_exit(&ilbs->ilbs_g_lock); + if (ilbs->ilbs_nat_src != NULL) + ilb_nat_src_fini(ilbs); +} + +static void +ilb_stack_fini(netstackid_t stackid, void * arg) +{ + ilb_stack_t *ilbs = (ilb_stack_t *)arg; + + ilb_rule_hash_fini(ilbs); + taskq_destroy(ilbs->ilbs_rule_taskq); + ilb_kstat_g_fini(stackid, ilbs); + kmem_free(ilbs->ilbs_kstat, sizeof (ilb_g_kstat_t)); + kmem_free(ilbs, sizeof (ilb_stack_t)); +} + +void +ilb_ddi_g_init(void) +{ + netstack_register(NS_ILB, ilb_stack_init, ilb_stack_shutdown, + ilb_stack_fini); +} + +void +ilb_ddi_g_destroy(void) +{ + netstack_unregister(NS_ILB); + ilb_conn_cache_fini(); + ilb_sticky_cache_fini(); +} diff --git a/usr/src/uts/common/inet/ilb/ilb_alg.h b/usr/src/uts/common/inet/ilb/ilb_alg.h new file mode 100644 index 0000000000..5e94b8b666 --- /dev/null +++ b/usr/src/uts/common/inet/ilb/ilb_alg.h @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _INET_ILB_ALG_H +#define _INET_ILB_ALG_H + + +#ifdef __cplusplus +extern "C" { +#endif + +/* Load balance algorithms initialization routines. */ +ilb_alg_data_t *ilb_alg_rr_init(ilb_rule_t *, void *); +ilb_alg_data_t *ilb_alg_hash_init(ilb_rule_t *, const void *); + + +#ifdef __cplusplus +} +#endif + +#endif /* _INET_ILB_ALG_H */ diff --git a/usr/src/uts/common/inet/ilb/ilb_alg_hash.c b/usr/src/uts/common/inet/ilb/ilb_alg_hash.c new file mode 100644 index 0000000000..94140b7d8e --- /dev/null +++ b/usr/src/uts/common/inet/ilb/ilb_alg_hash.c @@ -0,0 +1,431 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/types.h> +#include <sys/cmn_err.h> +#include <netinet/in.h> +#include <inet/ip.h> +#include <inet/ip6.h> +#include <sys/crc32.h> + +#include <inet/ilb.h> +#include "ilb_impl.h" +#include "ilb_alg.h" + +#define HASH_IP_V4(hash, addr, size) \ +{ \ + CRC32((hash), &(addr), sizeof (in_addr_t), -1U, crc32_table); \ + (hash) %= (size); \ +} +#define HASH_IP_V6(hash, addr, size) \ + HASH_IP_V4((hash), (addr)->s6_addr32[3], (size)) + +#define HASH_IP_PORT_V4(hash, addr, port, size) \ +{ \ + uint32_t val = (addr) ^ ((port) << 16) ^ (port); \ + CRC32((hash), &val, sizeof (uint32_t), -1U, crc32_table); \ + (hash) %= (size); \ +} +#define HASH_IP_PORT_V6(hash, addr, port, size) \ + HASH_IP_PORT_V4((hash), (addr)->s6_addr32[3], (port), (size)) + +#define HASH_IP_VIP_V4(hash, saddr, daddr, size) \ +{ \ + uint32_t val = (saddr) ^ (daddr); \ + CRC32((hash), &val, sizeof (uint32_t), -1U, crc32_table); \ + (hash) %= (size); \ +} +#define HASH_IP_VIP_V6(hash, saddr, daddr, size) \ + HASH_IP_VIP_V4((hash), (saddr)->s6_addr32[3], (daddr)->s6_addr32[3], \ + (size)) + +#define INIT_HASH_TBL_SIZE 10 + +typedef struct { + ilb_server_t *server; + boolean_t enabled; +} hash_server_t; + +/* + * There are two hash tables. The hash_tbl holds all servers, both enabled + * and disabled. The hash_enabled_tbl only holds enabled servers. Having + * two tables allows the hash on a client request remains the same even when + * some servers are disabled. If a server is disabled and a client's request + * hashes to it, we will do another hash. This time the has is on the enabled + * server table. + */ +typedef struct hash_s { + kmutex_t hash_lock; + size_t hash_servers; /* Total # of servers */ + size_t hash_tbl_size; /* All server table size */ + size_t hash_enabled_servers; /* # of enabled servers */ + size_t hash_enabled_tbl_size; /* Enabled server table size */ + hash_server_t *hash_tbl; + hash_server_t *hash_enabled_tbl; + ilb_algo_impl_t hash_type; +} hash_t; + +static void hash_fini(ilb_alg_data_t **); + +/* ARGSUSED */ +static boolean_t +hash_lb(in6_addr_t *saddr, in_port_t sport, in6_addr_t *daddr, + in_port_t dport, void *alg_data, ilb_server_t **ret_server) +{ + hash_t *hash_alg = (hash_t *)alg_data; + uint32_t i; + + ASSERT(ret_server != NULL); + *ret_server = NULL; + + mutex_enter(&hash_alg->hash_lock); + + if (hash_alg->hash_servers == 0) { + mutex_exit(&hash_alg->hash_lock); + return (B_FALSE); + } + + switch (hash_alg->hash_type) { + case ILB_ALG_IMPL_HASH_IP: + HASH_IP_V6(i, saddr, hash_alg->hash_servers); + break; + case ILB_ALG_IMPL_HASH_IP_SPORT: + HASH_IP_PORT_V6(i, saddr, sport, hash_alg->hash_servers); + break; + case ILB_ALG_IMPL_HASH_IP_VIP: + HASH_IP_VIP_V6(i, saddr, daddr, hash_alg->hash_servers); + break; + default: + mutex_exit(&hash_alg->hash_lock); + return (B_FALSE); + } + if (hash_alg->hash_tbl[i].enabled) { + *ret_server = hash_alg->hash_tbl[i].server; + mutex_exit(&hash_alg->hash_lock); + return (B_TRUE); + } + + if (hash_alg->hash_enabled_servers == 0) { + mutex_exit(&hash_alg->hash_lock); + return (B_FALSE); + } + + switch (hash_alg->hash_type) { + case ILB_ALG_IMPL_HASH_IP: + HASH_IP_V6(i, saddr, hash_alg->hash_enabled_servers); + break; + case ILB_ALG_IMPL_HASH_IP_SPORT: + HASH_IP_PORT_V6(i, saddr, sport, + hash_alg->hash_enabled_servers); + break; + case ILB_ALG_IMPL_HASH_IP_VIP: + HASH_IP_VIP_V6(i, saddr, daddr, + hash_alg->hash_enabled_servers); + break; + default: + ASSERT(0); + break; + } + *ret_server = hash_alg->hash_enabled_tbl[i].server; + mutex_exit(&hash_alg->hash_lock); + return (B_TRUE); +} + +static boolean_t +del_server(hash_server_t *tbl, size_t hash_size, ilb_server_t *host) +{ + size_t i, j; + + for (i = 0; i < hash_size; i++) { + if (tbl[i].server == host) { + if (i == hash_size - 1) + break; + for (j = i; j < hash_size - 1; j++) + tbl[j] = tbl[j + 1]; + break; + } + } + /* Not found... */ + if (i == hash_size) + return (B_FALSE); + tbl[hash_size - 1].server = NULL; + tbl[hash_size - 1].enabled = B_FALSE; + return (B_TRUE); +} + +static int +hash_server_del(ilb_server_t *host, void *alg_data) +{ + hash_t *hash_alg = (hash_t *)alg_data; + boolean_t ret; + + mutex_enter(&hash_alg->hash_lock); + + ret = del_server(hash_alg->hash_tbl, hash_alg->hash_servers, host); + if (!ret) { + mutex_exit(&hash_alg->hash_lock); + return (EINVAL); + } + hash_alg->hash_servers--; + + /* The server may not be enabled. */ + ret = del_server(hash_alg->hash_enabled_tbl, + hash_alg->hash_enabled_servers, host); + if (ret) + hash_alg->hash_enabled_servers--; + + mutex_exit(&hash_alg->hash_lock); + ILB_SERVER_REFRELE(host); + return (0); +} + +static int +grow_tbl(hash_server_t **hash_tbl, size_t *tbl_size) +{ + size_t mem_size; + hash_server_t *new_tbl; + + if ((new_tbl = kmem_zalloc(sizeof (hash_server_t) * + (*tbl_size + INIT_HASH_TBL_SIZE), KM_NOSLEEP)) == NULL) { + return (ENOMEM); + } + mem_size = *tbl_size * sizeof (hash_server_t); + bcopy(*hash_tbl, new_tbl, mem_size); + kmem_free(*hash_tbl, mem_size); + *hash_tbl = new_tbl; + *tbl_size += INIT_HASH_TBL_SIZE; + return (0); +} + +static int +hash_server_add(ilb_server_t *host, void *alg_data) +{ + hash_t *hash_alg = (hash_t *)alg_data; + size_t new_size; + + mutex_enter(&hash_alg->hash_lock); + + /* First add the server to the hash_tbl. */ + new_size = hash_alg->hash_servers + 1; + if (new_size > hash_alg->hash_tbl_size) { + if (grow_tbl(&hash_alg->hash_tbl, &hash_alg->hash_tbl_size) != + 0) { + mutex_exit(&hash_alg->hash_lock); + return (ENOMEM); + } + } + + hash_alg->hash_tbl[hash_alg->hash_servers].server = host; + hash_alg->hash_tbl[hash_alg->hash_servers].enabled = host->iser_enabled; + hash_alg->hash_servers++; + + if (!host->iser_enabled) { + mutex_exit(&hash_alg->hash_lock); + ILB_SERVER_REFHOLD(host); + return (0); + } + + /* If the server is enabled, add it to the hasn_enabled_tbl. */ + new_size = hash_alg->hash_enabled_servers + 1; + if (new_size > hash_alg->hash_enabled_tbl_size) { + if (grow_tbl(&hash_alg->hash_enabled_tbl, + &hash_alg->hash_enabled_tbl_size) != 0) { + mutex_exit(&hash_alg->hash_lock); + return (ENOMEM); + } + } + hash_alg->hash_enabled_tbl[hash_alg->hash_enabled_servers].server = + host; + hash_alg->hash_enabled_tbl[hash_alg->hash_enabled_servers].enabled = + B_TRUE; + hash_alg->hash_enabled_servers++; + + mutex_exit(&hash_alg->hash_lock); + ILB_SERVER_REFHOLD(host); + return (0); +} + +static int +hash_server_enable(ilb_server_t *host, void *alg_data) +{ + hash_t *alg = (hash_t *)alg_data; + size_t new_size, i; + + mutex_enter(&alg->hash_lock); + + for (i = 0; i < alg->hash_servers; i++) { + if (alg->hash_tbl[i].server == host) { + if (alg->hash_tbl[i].enabled) { + mutex_exit(&alg->hash_lock); + return (0); + } else { + break; + } + } + } + if (i == alg->hash_servers) { + mutex_exit(&alg->hash_lock); + return (EINVAL); + } + +#if DEBUG + /* The server should not be in the enabled tabled. */ + { + size_t j; + + for (j = 0; j < alg->hash_enabled_servers; j++) { + if (alg->hash_enabled_tbl[j].server == host) { + cmn_err(CE_PANIC, "Corrupted ILB enabled hash " + "table"); + } + } + } +#endif + + new_size = alg->hash_enabled_servers + 1; + if (new_size > alg->hash_enabled_tbl_size) { + if (grow_tbl(&alg->hash_enabled_tbl, + &alg->hash_enabled_tbl_size) != 0) { + mutex_exit(&alg->hash_lock); + return (ENOMEM); + } + } + alg->hash_tbl[i].enabled = B_TRUE; + alg->hash_enabled_tbl[alg->hash_enabled_servers].server = host; + alg->hash_enabled_tbl[alg->hash_enabled_servers].enabled = B_TRUE; + alg->hash_enabled_servers++; + + mutex_exit(&alg->hash_lock); + return (0); +} + +static int +hash_server_disable(ilb_server_t *host, void *alg_data) +{ + hash_t *alg = (hash_t *)alg_data; + size_t i; + + mutex_enter(&alg->hash_lock); + + for (i = 0; i < alg->hash_servers; i++) { + if (alg->hash_tbl[i].server == host) { + if (!alg->hash_tbl[i].enabled) { + mutex_exit(&alg->hash_lock); + return (0); + } else { + break; + } + } + } + if (i == alg->hash_servers) { + mutex_exit(&alg->hash_lock); + return (EINVAL); + } + + alg->hash_tbl[i].enabled = B_FALSE; +#if DEBUG + ASSERT(del_server(alg->hash_enabled_tbl, alg->hash_enabled_servers, + host)); +#else + (void) del_server(alg->hash_enabled_tbl, alg->hash_enabled_servers, + host); +#endif + alg->hash_enabled_servers--; + + mutex_exit(&alg->hash_lock); + return (0); +} + +/* ARGSUSED */ +ilb_alg_data_t * +ilb_alg_hash_init(ilb_rule_t *rule, const void *arg) +{ + ilb_alg_data_t *alg; + hash_t *hash_alg; + int flags = *(int *)arg; + + if ((alg = kmem_alloc(sizeof (ilb_alg_data_t), KM_NOSLEEP)) == NULL) + return (NULL); + if ((hash_alg = kmem_alloc(sizeof (hash_t), KM_NOSLEEP)) == NULL) { + kmem_free(alg, sizeof (ilb_alg_data_t)); + return (NULL); + } + alg->ilb_alg_lb = hash_lb; + alg->ilb_alg_server_del = hash_server_del; + alg->ilb_alg_server_add = hash_server_add; + alg->ilb_alg_server_enable = hash_server_enable; + alg->ilb_alg_server_disable = hash_server_disable; + alg->ilb_alg_fini = hash_fini; + alg->ilb_alg_data = hash_alg; + + mutex_init(&hash_alg->hash_lock, NULL, MUTEX_DEFAULT, NULL); + hash_alg->hash_type = flags; + + /* Table of all servers */ + hash_alg->hash_servers = 0; + hash_alg->hash_tbl_size = INIT_HASH_TBL_SIZE; + hash_alg->hash_tbl = kmem_zalloc(sizeof (hash_server_t) * + INIT_HASH_TBL_SIZE, KM_NOSLEEP); + if (hash_alg->hash_tbl == NULL) { + kmem_free(hash_alg, sizeof (hash_t)); + kmem_free(alg, sizeof (ilb_alg_data_t)); + return (NULL); + } + + /* Table of only enabled servers */ + hash_alg->hash_enabled_servers = 0; + hash_alg->hash_enabled_tbl_size = INIT_HASH_TBL_SIZE; + hash_alg->hash_enabled_tbl = kmem_zalloc(sizeof (hash_server_t) * + INIT_HASH_TBL_SIZE, KM_NOSLEEP); + if (hash_alg->hash_tbl == NULL) { + kmem_free(hash_alg->hash_tbl, INIT_HASH_TBL_SIZE * + sizeof (ilb_server_t *)); + kmem_free(hash_alg, sizeof (hash_t)); + kmem_free(alg, sizeof (ilb_alg_data_t)); + return (NULL); + } + + return (alg); +} + +static void +hash_fini(ilb_alg_data_t **alg) +{ + hash_t *hash_alg; + int i; + + hash_alg = (*alg)->ilb_alg_data; + for (i = 0; i < hash_alg->hash_servers; i++) + ILB_SERVER_REFRELE(hash_alg->hash_tbl[i].server); + + kmem_free(hash_alg->hash_tbl, sizeof (hash_server_t) * + hash_alg->hash_tbl_size); + kmem_free(hash_alg->hash_enabled_tbl, sizeof (hash_server_t) * + hash_alg->hash_enabled_tbl_size); + kmem_free(hash_alg, sizeof (hash_t)); + kmem_free(*alg, sizeof (ilb_alg_data_t)); + *alg = NULL; +} diff --git a/usr/src/uts/common/inet/ilb/ilb_alg_rr.c b/usr/src/uts/common/inet/ilb/ilb_alg_rr.c new file mode 100644 index 0000000000..e88712e19f --- /dev/null +++ b/usr/src/uts/common/inet/ilb/ilb_alg_rr.c @@ -0,0 +1,232 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/errno.h> +#include <sys/sysmacros.h> +#include <sys/list.h> +#include <net/if.h> +#include <netinet/in.h> +#include <inet/ilb.h> +#include "ilb_impl.h" +#include "ilb_alg.h" + +typedef struct { + ilb_server_t *server; + boolean_t enabled; + list_node_t list; +} rr_server_t; + +typedef struct rr_s { + kmutex_t rr_lock; + list_t rr_servers; + rr_server_t *rr_next; +} rr_t; + +static void rr_fini(ilb_alg_data_t **); + +/* ARGSUSED */ +static boolean_t +rr_lb(in6_addr_t *saddr, in_port_t sport, in6_addr_t *daddr, + in_port_t dport, void *alg_data, ilb_server_t **ret_server) +{ + rr_t *rr_alg = (rr_t *)alg_data; + list_t *servers; + rr_server_t *start; + + ASSERT(ret_server != NULL); + *ret_server = NULL; + + mutex_enter(&rr_alg->rr_lock); + servers = &rr_alg->rr_servers; + if (list_is_empty(servers)) { + mutex_exit(&rr_alg->rr_lock); + return (B_FALSE); + } + if (rr_alg->rr_next == NULL) + rr_alg->rr_next = list_head(servers); + start = rr_alg->rr_next; + while (!rr_alg->rr_next->enabled) { + rr_alg->rr_next = list_next(servers, rr_alg->rr_next); + if (rr_alg->rr_next == NULL) + rr_alg->rr_next = list_head(servers); + if (rr_alg->rr_next == start) { + mutex_exit(&rr_alg->rr_lock); + return (B_FALSE); + } + } + + *ret_server = rr_alg->rr_next->server; + rr_alg->rr_next = list_next(servers, rr_alg->rr_next); + mutex_exit(&rr_alg->rr_lock); + return (B_TRUE); +} + +static int +rr_server_del(ilb_server_t *host, void *alg_data) +{ + rr_t *rr_alg = (rr_t *)alg_data; + list_t *servers = &rr_alg->rr_servers; + rr_server_t *tmp_server; + + mutex_enter(&rr_alg->rr_lock); + for (tmp_server = list_head(servers); tmp_server != NULL; + tmp_server = list_next(servers, tmp_server)) { + if (tmp_server->server == host) { + if (rr_alg->rr_next == tmp_server) { + rr_alg->rr_next = list_next(servers, + tmp_server); + } + list_remove(servers, tmp_server); + break; + } + } + mutex_exit(&rr_alg->rr_lock); + if (tmp_server == NULL) + return (EINVAL); + kmem_free(tmp_server, sizeof (rr_server_t)); + + ILB_SERVER_REFRELE(host); + return (0); +} + +static int +rr_server_add(ilb_server_t *host, void *alg_data) +{ + rr_t *rr_alg = (rr_t *)alg_data; + rr_server_t *new_server; + + new_server = kmem_alloc(sizeof (rr_server_t), KM_NOSLEEP); + if (new_server == NULL) + return (ENOMEM); + new_server->server = host; + new_server->enabled = host->iser_enabled; + + mutex_enter(&rr_alg->rr_lock); + list_insert_head(&rr_alg->rr_servers, new_server); + mutex_exit(&rr_alg->rr_lock); + + ILB_SERVER_REFHOLD(host); + return (0); +} + +static int +rr_server_toggle(list_t *servers, ilb_server_t *host, boolean_t value) +{ + rr_server_t *tmp_server; + + if (list_is_empty(servers)) + return (EINVAL); + + for (tmp_server = list_head(servers); tmp_server != NULL; + tmp_server = list_next(servers, tmp_server)) { + if (tmp_server->server == host) { + tmp_server->enabled = value; + break; + } + } + if (tmp_server != NULL) + return (0); + else + return (EINVAL); +} + +static int +rr_server_enable(ilb_server_t *host, void *alg_data) +{ + rr_t *rr_alg = (rr_t *)alg_data; + list_t *servers; + int ret; + + mutex_enter(&rr_alg->rr_lock); + servers = &rr_alg->rr_servers; + ret = rr_server_toggle(servers, host, B_TRUE); + mutex_exit(&rr_alg->rr_lock); + return (ret); +} + +static int +rr_server_disable(ilb_server_t *host, void *alg_data) +{ + rr_t *rr_alg = (rr_t *)alg_data; + list_t *servers; + int ret; + + mutex_enter(&rr_alg->rr_lock); + servers = &rr_alg->rr_servers; + ret = rr_server_toggle(servers, host, B_FALSE); + mutex_exit(&rr_alg->rr_lock); + return (ret); +} + +/* ARGSUSED */ +ilb_alg_data_t * +ilb_alg_rr_init(ilb_rule_t *rule, void *arg) +{ + ilb_alg_data_t *alg; + rr_t *rr_alg; + + if ((alg = kmem_alloc(sizeof (ilb_alg_data_t), KM_NOSLEEP)) == NULL) + return (NULL); + if ((rr_alg = kmem_alloc(sizeof (rr_t), KM_NOSLEEP)) == NULL) { + kmem_free(alg, sizeof (ilb_alg_data_t)); + return (NULL); + } + + alg->ilb_alg_lb = rr_lb; + alg->ilb_alg_server_del = rr_server_del; + alg->ilb_alg_server_add = rr_server_add; + alg->ilb_alg_server_enable = rr_server_enable; + alg->ilb_alg_server_disable = rr_server_disable; + alg->ilb_alg_fini = rr_fini; + alg->ilb_alg_data = rr_alg; + + mutex_init(&rr_alg->rr_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&rr_alg->rr_servers, sizeof (rr_server_t), + offsetof(rr_server_t, list)); + rr_alg->rr_next = NULL; + + return (alg); +} + +static void +rr_fini(ilb_alg_data_t **alg) +{ + rr_t *rr_alg; + rr_server_t *tmp_server; + list_t *servers; + + rr_alg = (*alg)->ilb_alg_data; + servers = &rr_alg->rr_servers; + while ((tmp_server = list_head(servers)) != NULL) { + list_remove(servers, tmp_server); + ILB_SERVER_REFRELE(tmp_server->server); + kmem_free(tmp_server, sizeof (rr_server_t)); + } + list_destroy(servers); + kmem_free(rr_alg, sizeof (rr_t)); + kmem_free(*alg, sizeof (ilb_alg_data_t)); + *alg = NULL; +} diff --git a/usr/src/uts/common/inet/ilb/ilb_conn.c b/usr/src/uts/common/inet/ilb/ilb_conn.c new file mode 100644 index 0000000000..d4e88260cf --- /dev/null +++ b/usr/src/uts/common/inet/ilb/ilb_conn.c @@ -0,0 +1,1531 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/types.h> +#include <sys/conf.h> +#include <sys/time.h> +#include <sys/taskq.h> +#include <sys/cmn_err.h> +#include <sys/sdt.h> +#include <sys/atomic.h> +#include <netinet/in.h> +#include <inet/ip.h> +#include <inet/ip6.h> +#include <inet/tcp.h> +#include <inet/udp_impl.h> +#include <inet/ilb.h> + +#include "ilb_stack.h" +#include "ilb_impl.h" +#include "ilb_conn.h" +#include "ilb_nat.h" + +/* + * Timer struct for ilb_conn_t and ilb_sticky_t garbage collection + * + * start: starting index into the hash table to do gc + * end: ending index into the hash table to do gc + * ilbs: pointer to the ilb_stack_t of the IP stack + * tid_lock: mutex to protect the timer id. + * tid: timer id of the timer + */ +typedef struct ilb_timer_s { + uint32_t start; + uint32_t end; + ilb_stack_t *ilbs; + kmutex_t tid_lock; + timeout_id_t tid; +} ilb_timer_t; + +/* Hash macro for finding the index to the conn hash table */ +#define ILB_CONN_HASH(saddr, sport, daddr, dport, hash_size) \ + (((*((saddr) + 3) ^ *((daddr) + 3)) * 50653 + \ + (*((saddr) + 2) ^ *((daddr) + 2)) * 1369 + \ + (*((saddr) + 1) ^ *((daddr) + 1)) * 37 + \ + (*(saddr) ^ *(daddr)) + (sport) * 37 + (dport)) & \ + ((hash_size) - 1)) + +/* Kmem cache for the conn hash entry */ +static struct kmem_cache *ilb_conn_cache = NULL; + +/* + * There are 60 timers running to do conn cache garbage collection. Each + * gc thread is responsible for 1/60 of the conn hash table. + */ +static int ilb_conn_timer_size = 60; + +/* Each of the above gc timers wake up every 15s to do the gc. */ +static int ilb_conn_cache_timeout = 15; + +#define ILB_STICKY_HASH(saddr, rule, hash_size) \ + (((*((saddr) + 3) ^ ((rule) >> 24)) * 29791 + \ + (*((saddr) + 2) ^ ((rule) >> 16)) * 961 + \ + (*((saddr) + 1) ^ ((rule) >> 8)) * 31 + \ + (*(saddr) ^ (rule))) & ((hash_size) - 1)) + +static struct kmem_cache *ilb_sticky_cache = NULL; + +/* + * There are 60 timers running to do sticky cache garbage collection. Each + * gc thread is responsible for 1/60 of the sticky hash table. + */ +static int ilb_sticky_timer_size = 60; + +/* Each of the above gc timers wake up every 15s to do the gc. */ +static int ilb_sticky_timeout = 15; + +#define ILB_STICKY_REFRELE(s) \ +{ \ + mutex_enter(&(s)->hash->sticky_lock); \ + (s)->refcnt--; \ + (s)->atime = lbolt64; \ + mutex_exit(&s->hash->sticky_lock); \ +} + + +static void +ilb_conn_cache_init(void) +{ + ilb_conn_cache = kmem_cache_create("ilb_conn_cache", + sizeof (ilb_conn_t), 0, NULL, NULL, NULL, NULL, NULL, + ilb_kmem_flags); +} + +void +ilb_conn_cache_fini(void) +{ + if (ilb_conn_cache != NULL) { + kmem_cache_destroy(ilb_conn_cache); + ilb_conn_cache = NULL; + } +} + +static void +ilb_conn_remove_common(ilb_conn_t *connp, boolean_t c2s) +{ + ilb_conn_hash_t *hash; + ilb_conn_t **next, **prev; + ilb_conn_t **next_prev, **prev_next; + + if (c2s) { + hash = connp->conn_c2s_hash; + ASSERT(MUTEX_HELD(&hash->ilb_conn_hash_lock)); + next = &connp->conn_c2s_next; + prev = &connp->conn_c2s_prev; + if (*next != NULL) + next_prev = &(*next)->conn_c2s_prev; + if (*prev != NULL) + prev_next = &(*prev)->conn_c2s_next; + } else { + hash = connp->conn_s2c_hash; + ASSERT(MUTEX_HELD(&hash->ilb_conn_hash_lock)); + next = &connp->conn_s2c_next; + prev = &connp->conn_s2c_prev; + if (*next != NULL) + next_prev = &(*next)->conn_s2c_prev; + if (*prev != NULL) + prev_next = &(*prev)->conn_s2c_next; + } + + if (hash->ilb_connp == connp) { + hash->ilb_connp = *next; + if (*next != NULL) + *next_prev = NULL; + } else { + if (*prev != NULL) + *prev_next = *next; + if (*next != NULL) + *next_prev = *prev; + } + ASSERT(hash->ilb_conn_cnt > 0); + hash->ilb_conn_cnt--; + + *next = NULL; + *prev = NULL; +} + +static void +ilb_conn_remove(ilb_conn_t *connp) +{ + ASSERT(MUTEX_HELD(&connp->conn_c2s_hash->ilb_conn_hash_lock)); + ilb_conn_remove_common(connp, B_TRUE); + ASSERT(MUTEX_HELD(&connp->conn_s2c_hash->ilb_conn_hash_lock)); + ilb_conn_remove_common(connp, B_FALSE); + + if (connp->conn_rule_cache.topo == ILB_TOPO_IMPL_NAT) { + in_port_t port; + + port = ntohs(connp->conn_rule_cache.info.nat_sport); + vmem_free(connp->conn_rule_cache.info.src_ent->nse_port_arena, + (void *)(uintptr_t)port, 1); + } + + if (connp->conn_sticky != NULL) + ILB_STICKY_REFRELE(connp->conn_sticky); + ILB_SERVER_REFRELE(connp->conn_server); + kmem_cache_free(ilb_conn_cache, connp); +} + +/* + * Routine to do periodic garbage collection of conn hash entries. When + * a conn hash timer fires, it dispatches a taskq to call this function + * to do the gc. Note that each taskq is responisble for a portion of + * the table. The portion is stored in timer->start, timer->end. + */ +static void +ilb_conn_cleanup(void *arg) +{ + ilb_timer_t *timer = (ilb_timer_t *)arg; + uint32_t i; + ilb_stack_t *ilbs; + ilb_conn_hash_t *c2s_hash, *s2c_hash; + ilb_conn_t *connp, *nxt_connp; + int64_t now; + int64_t expiry; + boolean_t die_now; + + ilbs = timer->ilbs; + c2s_hash = ilbs->ilbs_c2s_conn_hash; + ASSERT(c2s_hash != NULL); + + now = lbolt64; + for (i = timer->start; i < timer->end; i++) { + mutex_enter(&c2s_hash[i].ilb_conn_hash_lock); + if ((connp = c2s_hash[i].ilb_connp) == NULL) { + ASSERT(c2s_hash[i].ilb_conn_cnt == 0); + mutex_exit(&c2s_hash[i].ilb_conn_hash_lock); + continue; + } + do { + ASSERT(c2s_hash[i].ilb_conn_cnt > 0); + ASSERT(connp->conn_c2s_hash == &c2s_hash[i]); + nxt_connp = connp->conn_c2s_next; + expiry = now - SEC_TO_TICK(connp->conn_expiry); + if (connp->conn_server->iser_die_time != 0 && + connp->conn_server->iser_die_time < now) + die_now = B_TRUE; + else + die_now = B_FALSE; + s2c_hash = connp->conn_s2c_hash; + mutex_enter(&s2c_hash->ilb_conn_hash_lock); + + if (connp->conn_gc || die_now || + (connp->conn_c2s_atime < expiry && + connp->conn_s2c_atime < expiry)) { + /* Need to update the nat list cur_connp */ + if (connp == ilbs->ilbs_conn_list_connp) { + ilbs->ilbs_conn_list_connp = + connp->conn_c2s_next; + } + ilb_conn_remove(connp); + goto nxt_connp; + } + + if (connp->conn_l4 != IPPROTO_TCP) + goto nxt_connp; + + /* Update and check TCP related conn info */ + if (connp->conn_c2s_tcp_fin_sent && + SEQ_GT(connp->conn_s2c_tcp_ack, + connp->conn_c2s_tcp_fss)) { + connp->conn_c2s_tcp_fin_acked = B_TRUE; + } + if (connp->conn_s2c_tcp_fin_sent && + SEQ_GT(connp->conn_c2s_tcp_ack, + connp->conn_s2c_tcp_fss)) { + connp->conn_s2c_tcp_fin_acked = B_TRUE; + } + if (connp->conn_c2s_tcp_fin_acked && + connp->conn_s2c_tcp_fin_acked) { + ilb_conn_remove(connp); + } +nxt_connp: + mutex_exit(&s2c_hash->ilb_conn_hash_lock); + connp = nxt_connp; + } while (connp != NULL); + mutex_exit(&c2s_hash[i].ilb_conn_hash_lock); + } +} + +/* Conn hash timer routine. It dispatches a taskq and restart the timer */ +static void +ilb_conn_timer(void *arg) +{ + ilb_timer_t *timer = (ilb_timer_t *)arg; + + (void) taskq_dispatch(timer->ilbs->ilbs_conn_taskq, ilb_conn_cleanup, + arg, TQ_SLEEP); + mutex_enter(&timer->tid_lock); + if (timer->tid == 0) { + mutex_exit(&timer->tid_lock); + } else { + timer->tid = timeout(ilb_conn_timer, arg, + SEC_TO_TICK(ilb_conn_cache_timeout)); + mutex_exit(&timer->tid_lock); + } +} + +void +ilb_conn_hash_init(ilb_stack_t *ilbs) +{ + extern pri_t minclsyspri; + int i, part; + ilb_timer_t *tm; + char tq_name[TASKQ_NAMELEN]; + + /* + * If ilbs->ilbs_conn_hash_size is not a power of 2, bump it up to + * the next power of 2. + */ + if (ilbs->ilbs_conn_hash_size & (ilbs->ilbs_conn_hash_size - 1)) { + for (i = 0; i < 31; i++) { + if (ilbs->ilbs_conn_hash_size < (1 << i)) + break; + } + ilbs->ilbs_conn_hash_size = 1 << i; + } + + /* + * Can sleep since this should be called when a rule is being added, + * hence we are not in interrupt context. + */ + ilbs->ilbs_c2s_conn_hash = kmem_zalloc(sizeof (ilb_conn_hash_t) * + ilbs->ilbs_conn_hash_size, KM_SLEEP); + ilbs->ilbs_s2c_conn_hash = kmem_zalloc(sizeof (ilb_conn_hash_t) * + ilbs->ilbs_conn_hash_size, KM_SLEEP); + + for (i = 0; i < ilbs->ilbs_conn_hash_size; i++) { + mutex_init(&ilbs->ilbs_c2s_conn_hash[i].ilb_conn_hash_lock, + NULL, MUTEX_DEFAULT, NULL); + } + for (i = 0; i < ilbs->ilbs_conn_hash_size; i++) { + mutex_init(&ilbs->ilbs_s2c_conn_hash[i].ilb_conn_hash_lock, + NULL, MUTEX_DEFAULT, NULL); + } + + if (ilb_conn_cache == NULL) + ilb_conn_cache_init(); + + (void) snprintf(tq_name, sizeof (tq_name), "ilb_conn_taskq_%p", + ilbs->ilbs_netstack); + ASSERT(ilbs->ilbs_conn_taskq == NULL); + ilbs->ilbs_conn_taskq = taskq_create(tq_name, + ilb_conn_timer_size * 2, minclsyspri, ilb_conn_timer_size, + ilb_conn_timer_size * 2, TASKQ_PREPOPULATE|TASKQ_DYNAMIC); + + ASSERT(ilbs->ilbs_conn_timer_list == NULL); + ilbs->ilbs_conn_timer_list = kmem_zalloc(sizeof (ilb_timer_t) * + ilb_conn_timer_size, KM_SLEEP); + + /* + * The hash table is divided in equal partition for those timers + * to do garbage collection. + */ + part = ilbs->ilbs_conn_hash_size / ilb_conn_timer_size + 1; + for (i = 0; i < ilb_conn_timer_size; i++) { + tm = ilbs->ilbs_conn_timer_list + i; + tm->start = i * part; + tm->end = i * part + part; + if (tm->end > ilbs->ilbs_conn_hash_size) + tm->end = ilbs->ilbs_conn_hash_size; + tm->ilbs = ilbs; + mutex_init(&tm->tid_lock, NULL, MUTEX_DEFAULT, NULL); + /* Spread out the starting execution time of all the timers. */ + tm->tid = timeout(ilb_conn_timer, tm, + SEC_TO_TICK(ilb_conn_cache_timeout + i)); + } +} + +void +ilb_conn_hash_fini(ilb_stack_t *ilbs) +{ + uint32_t i; + ilb_conn_t *connp; + + if (ilbs->ilbs_c2s_conn_hash == NULL) { + ASSERT(ilbs->ilbs_s2c_conn_hash == NULL); + return; + } + + /* Stop all the timers first. */ + for (i = 0; i < ilb_conn_timer_size; i++) { + timeout_id_t tid; + + /* Setting tid to 0 tells the timer handler not to restart. */ + mutex_enter(&ilbs->ilbs_conn_timer_list[i].tid_lock); + tid = ilbs->ilbs_conn_timer_list[i].tid; + ilbs->ilbs_conn_timer_list[i].tid = 0; + mutex_exit(&ilbs->ilbs_conn_timer_list[i].tid_lock); + (void) untimeout(tid); + } + kmem_free(ilbs->ilbs_conn_timer_list, sizeof (ilb_timer_t) * + ilb_conn_timer_size); + taskq_destroy(ilbs->ilbs_conn_taskq); + ilbs->ilbs_conn_taskq = NULL; + + /* Then remove all the conns. */ + for (i = 0; i < ilbs->ilbs_conn_hash_size; i++) { + while ((connp = ilbs->ilbs_s2c_conn_hash->ilb_connp) != NULL) { + ilbs->ilbs_s2c_conn_hash->ilb_connp = + connp->conn_s2c_next; + ILB_SERVER_REFRELE(connp->conn_server); + if (connp->conn_rule_cache.topo == ILB_TOPO_IMPL_NAT) { + ilb_nat_src_entry_t *ent; + in_port_t port; + + /* + * src_ent will be freed in ilb_nat_src_fini(). + */ + port = ntohs( + connp->conn_rule_cache.info.nat_sport); + ent = connp->conn_rule_cache.info.src_ent; + vmem_free(ent->nse_port_arena, + (void *)(uintptr_t)port, 1); + } + kmem_cache_free(ilb_conn_cache, connp); + } + } + kmem_free(ilbs->ilbs_c2s_conn_hash, sizeof (ilb_conn_hash_t) * + ilbs->ilbs_conn_hash_size); + kmem_free(ilbs->ilbs_s2c_conn_hash, sizeof (ilb_conn_hash_t) * + ilbs->ilbs_conn_hash_size); +} + +/* + * Internet checksum adjustment calculation routines. We pre-calculate + * checksum adjustment so that we don't need to compute the checksum on + * the whole packet when we change address/port in the packet. + */ + +static void +hnat_cksum_v4(uint16_t *oaddr, uint16_t *naddr, in_port_t old_port, + in_port_t new_port, uint32_t *adj_sum) +{ + uint32_t sum; + + sum = *oaddr + *(oaddr + 1) + old_port; + while ((sum >> 16) != 0) + sum = (sum & 0xffff) + (sum >> 16); + *adj_sum = (uint16_t)~sum + *naddr + *(naddr + 1) + new_port; +} + +static void +hnat_cksum_v6(uint16_t *oaddr, uint16_t *naddr, in_port_t old_port, + in_port_t new_port, uint32_t *adj_sum) +{ + uint32_t sum = 0; + + sum = *oaddr + *(oaddr + 1) + *(oaddr + 2) + *(oaddr + 3) + + *(oaddr + 4) + *(oaddr + 5) + *(oaddr + 6) + *(oaddr + 7) + + old_port; + while ((sum >> 16) != 0) + sum = (sum & 0xffff) + (sum >> 16); + *adj_sum = (uint16_t)~sum + *naddr + *(naddr + 1) + + *(naddr + 2) + *(naddr + 3) + *(naddr + 4) + *(naddr + 5) + + *(naddr + 6) + *(naddr + 7) + new_port; +} + +static void +fnat_cksum_v4(uint16_t *oaddr1, uint16_t *oaddr2, uint16_t *naddr1, + uint16_t *naddr2, in_port_t old_port1, in_port_t old_port2, + in_port_t new_port1, in_port_t new_port2, uint32_t *adj_sum) +{ + uint32_t sum; + + sum = *oaddr1 + *(oaddr1 + 1) + old_port1 + *oaddr2 + *(oaddr2 + 1) + + old_port2; + while ((sum >> 16) != 0) + sum = (sum & 0xffff) + (sum >> 16); + *adj_sum = (uint16_t)~sum + *naddr1 + *(naddr1 + 1) + new_port1 + + *naddr2 + *(naddr2 + 1) + new_port2; +} + +static void +fnat_cksum_v6(uint16_t *oaddr1, uint16_t *oaddr2, uint16_t *naddr1, + uint16_t *naddr2, in_port_t old_port1, in_port_t old_port2, + in_port_t new_port1, in_port_t new_port2, uint32_t *adj_sum) +{ + uint32_t sum = 0; + + sum = *oaddr1 + *(oaddr1 + 1) + *(oaddr1 + 2) + *(oaddr1 + 3) + + *(oaddr1 + 4) + *(oaddr1 + 5) + *(oaddr1 + 6) + *(oaddr1 + 7) + + old_port1; + sum += *oaddr2 + *(oaddr2 + 1) + *(oaddr2 + 2) + *(oaddr2 + 3) + + *(oaddr2 + 4) + *(oaddr2 + 5) + *(oaddr2 + 6) + *(oaddr2 + 7) + + old_port2; + while ((sum >> 16) != 0) + sum = (sum & 0xffff) + (sum >> 16); + sum = (uint16_t)~sum + *naddr1 + *(naddr1 + 1) + *(naddr1 + 2) + + *(naddr1 + 3) + *(naddr1 + 4) + *(naddr1 + 5) + *(naddr1 + 6) + + *(naddr1 + 7) + new_port1; + *adj_sum = sum + *naddr2 + *(naddr2 + 1) + *(naddr2 + 2) + + *(naddr2 + 3) + *(naddr2 + 4) + *(naddr2 + 5) + *(naddr2 + 6) + + *(naddr2 + 7) + new_port2; +} + +/* + * Add a conn hash entry to the tables. Note that a conn hash entry + * (ilb_conn_t) contains info on both directions. And there are two hash + * tables, one for client to server and the other for server to client. + * So the same entry is added to both tables and can be ccessed by two + * thread simultaneously. But each thread will only access data on one + * direction, so there is no conflict. + */ +int +ilb_conn_add(ilb_stack_t *ilbs, ilb_rule_t *rule, ilb_server_t *server, + in6_addr_t *src, in_port_t sport, in6_addr_t *dst, in_port_t dport, + ilb_nat_info_t *info, uint32_t *ip_sum, uint32_t *tp_sum, ilb_sticky_t *s) +{ + ilb_conn_t *connp; + ilb_conn_hash_t *hash; + int i; + + connp = kmem_cache_alloc(ilb_conn_cache, KM_NOSLEEP); + if (connp == NULL) { + if (s != NULL) { + if (rule->ir_topo == ILB_TOPO_IMPL_NAT) { + ilb_nat_src_entry_t **entry; + + entry = s->server->iser_nat_src->src_list; + vmem_free(entry[s->nat_src_idx]->nse_port_arena, + (void *)(uintptr_t)ntohs(info->nat_sport), + 1); + } + ILB_STICKY_REFRELE(s); + } + return (ENOMEM); + } + + connp->conn_l4 = rule->ir_proto; + + connp->conn_server = server; + ILB_SERVER_REFHOLD(server); + connp->conn_sticky = s; + + connp->conn_rule_cache.topo = rule->ir_topo; + connp->conn_rule_cache.info = *info; + + connp->conn_gc = B_FALSE; + + connp->conn_expiry = rule->ir_nat_expiry; + connp->conn_cr_time = lbolt64; + + /* Client to server info. */ + connp->conn_c2s_saddr = *src; + connp->conn_c2s_sport = sport; + connp->conn_c2s_daddr = *dst; + connp->conn_c2s_dport = dport; + + connp->conn_c2s_atime = lbolt64; + /* The packet ths triggers this creation should be counted */ + connp->conn_c2s_pkt_cnt = 1; + connp->conn_c2s_tcp_fin_sent = B_FALSE; + connp->conn_c2s_tcp_fin_acked = B_FALSE; + + /* Server to client info, before NAT */ + switch (rule->ir_topo) { + case ILB_TOPO_IMPL_HALF_NAT: + connp->conn_s2c_saddr = info->nat_dst; + connp->conn_s2c_sport = info->nat_dport; + connp->conn_s2c_daddr = *src; + connp->conn_s2c_dport = sport; + + /* Pre-calculate checksum changes for both directions */ + if (rule->ir_ipver == IPPROTO_IP) { + hnat_cksum_v4((uint16_t *)&dst->s6_addr32[3], + (uint16_t *)&info->nat_dst.s6_addr32[3], 0, 0, + &connp->conn_c2s_ip_sum); + hnat_cksum_v4((uint16_t *)&dst->s6_addr32[3], + (uint16_t *)&info->nat_dst.s6_addr32[3], dport, + info->nat_dport, &connp->conn_c2s_tp_sum); + *ip_sum = connp->conn_c2s_ip_sum; + *tp_sum = connp->conn_c2s_tp_sum; + + hnat_cksum_v4( + (uint16_t *)&info->nat_dst.s6_addr32[3], + (uint16_t *)&dst->s6_addr32[3], 0, 0, + &connp->conn_s2c_ip_sum); + hnat_cksum_v4( + (uint16_t *)&info->nat_dst.s6_addr32[3], + (uint16_t *)&dst->s6_addr32[3], + info->nat_dport, dport, + &connp->conn_s2c_tp_sum); + } else { + connp->conn_c2s_ip_sum = 0; + hnat_cksum_v6((uint16_t *)dst, + (uint16_t *)&info->nat_dst, dport, + info->nat_dport, &connp->conn_c2s_tp_sum); + *ip_sum = 0; + *tp_sum = connp->conn_c2s_tp_sum; + + connp->conn_s2c_ip_sum = 0; + hnat_cksum_v6((uint16_t *)&info->nat_dst, + (uint16_t *)dst, info->nat_dport, dport, + &connp->conn_s2c_tp_sum); + } + break; + case ILB_TOPO_IMPL_NAT: + connp->conn_s2c_saddr = info->nat_dst; + connp->conn_s2c_sport = info->nat_dport; + connp->conn_s2c_daddr = info->nat_src; + connp->conn_s2c_dport = info->nat_sport; + + if (rule->ir_ipver == IPPROTO_IP) { + fnat_cksum_v4((uint16_t *)&src->s6_addr32[3], + (uint16_t *)&dst->s6_addr32[3], + (uint16_t *)&info->nat_src.s6_addr32[3], + (uint16_t *)&info->nat_dst.s6_addr32[3], + 0, 0, 0, 0, &connp->conn_c2s_ip_sum); + fnat_cksum_v4((uint16_t *)&src->s6_addr32[3], + (uint16_t *)&dst->s6_addr32[3], + (uint16_t *)&info->nat_src.s6_addr32[3], + (uint16_t *)&info->nat_dst.s6_addr32[3], + sport, dport, info->nat_sport, + info->nat_dport, &connp->conn_c2s_tp_sum); + *ip_sum = connp->conn_c2s_ip_sum; + *tp_sum = connp->conn_c2s_tp_sum; + + fnat_cksum_v4( + (uint16_t *)&info->nat_src.s6_addr32[3], + (uint16_t *)&info->nat_dst.s6_addr32[3], + (uint16_t *)&src->s6_addr32[3], + (uint16_t *)&dst->s6_addr32[3], + 0, 0, 0, 0, &connp->conn_s2c_ip_sum); + fnat_cksum_v4( + (uint16_t *)&info->nat_src.s6_addr32[3], + (uint16_t *)&info->nat_dst.s6_addr32[3], + (uint16_t *)&src->s6_addr32[3], + (uint16_t *)&dst->s6_addr32[3], + info->nat_sport, info->nat_dport, + sport, dport, &connp->conn_s2c_tp_sum); + } else { + fnat_cksum_v6((uint16_t *)src, (uint16_t *)dst, + (uint16_t *)&info->nat_src, + (uint16_t *)&info->nat_dst, + sport, dport, info->nat_sport, + info->nat_dport, &connp->conn_c2s_tp_sum); + connp->conn_c2s_ip_sum = 0; + *ip_sum = 0; + *tp_sum = connp->conn_c2s_tp_sum; + + fnat_cksum_v6((uint16_t *)&info->nat_src, + (uint16_t *)&info->nat_dst, (uint16_t *)src, + (uint16_t *)dst, info->nat_sport, + info->nat_dport, sport, dport, + &connp->conn_s2c_tp_sum); + connp->conn_s2c_ip_sum = 0; + } + break; + } + + connp->conn_s2c_atime = lbolt64; + connp->conn_s2c_pkt_cnt = 1; + connp->conn_s2c_tcp_fin_sent = B_FALSE; + connp->conn_s2c_tcp_fin_acked = B_FALSE; + + /* Add it to the s2c hash table. */ + hash = ilbs->ilbs_s2c_conn_hash; + i = ILB_CONN_HASH((uint8_t *)&connp->conn_s2c_saddr.s6_addr32[3], + ntohs(connp->conn_s2c_sport), + (uint8_t *)&connp->conn_s2c_daddr.s6_addr32[3], + ntohs(connp->conn_s2c_dport), ilbs->ilbs_conn_hash_size); + connp->conn_s2c_hash = &hash[i]; + DTRACE_PROBE2(ilb__conn__hash__add__s2c, ilb_conn_t *, connp, int, i); + + mutex_enter(&hash[i].ilb_conn_hash_lock); + hash[i].ilb_conn_cnt++; + connp->conn_s2c_next = hash[i].ilb_connp; + if (hash[i].ilb_connp != NULL) + hash[i].ilb_connp->conn_s2c_prev = connp; + connp->conn_s2c_prev = NULL; + hash[i].ilb_connp = connp; + mutex_exit(&hash[i].ilb_conn_hash_lock); + + /* Add it to the c2s hash table. */ + hash = ilbs->ilbs_c2s_conn_hash; + i = ILB_CONN_HASH((uint8_t *)&src->s6_addr32[3], ntohs(sport), + (uint8_t *)&dst->s6_addr32[3], ntohs(dport), + ilbs->ilbs_conn_hash_size); + connp->conn_c2s_hash = &hash[i]; + DTRACE_PROBE2(ilb__conn__hash__add__c2s, ilb_conn_t *, connp, int, i); + + mutex_enter(&hash[i].ilb_conn_hash_lock); + hash[i].ilb_conn_cnt++; + connp->conn_c2s_next = hash[i].ilb_connp; + if (hash[i].ilb_connp != NULL) + hash[i].ilb_connp->conn_c2s_prev = connp; + connp->conn_c2s_prev = NULL; + hash[i].ilb_connp = connp; + mutex_exit(&hash[i].ilb_conn_hash_lock); + + return (0); +} + +/* + * If a connection is using TCP, we keep track of simple TCP state transition + * so that we know when to clean up an entry. + */ +static boolean_t +update_conn_tcp(ilb_conn_t *connp, void *iph, tcpha_t *tcpha, int32_t pkt_len, + boolean_t c2s) +{ + uint32_t ack, seq; + int32_t seg_len; + + if (tcpha->tha_flags & TH_RST) + return (B_FALSE); + + seg_len = pkt_len - ((uint8_t *)tcpha - (uint8_t *)iph) - + TCP_HDR_LENGTH((tcph_t *)tcpha); + + if (tcpha->tha_flags & TH_ACK) + ack = ntohl(tcpha->tha_ack); + seq = ntohl(tcpha->tha_seq); + if (c2s) { + ASSERT(MUTEX_HELD(&connp->conn_c2s_hash->ilb_conn_hash_lock)); + if (tcpha->tha_flags & TH_FIN) { + connp->conn_c2s_tcp_fss = seq + seg_len; + connp->conn_c2s_tcp_fin_sent = B_TRUE; + } + connp->conn_c2s_tcp_ack = ack; + + /* Port reuse by the client, restart the conn. */ + if (connp->conn_c2s_tcp_fin_sent && + SEQ_GT(seq, connp->conn_c2s_tcp_fss + 1)) { + connp->conn_c2s_tcp_fin_sent = B_FALSE; + connp->conn_c2s_tcp_fin_acked = B_FALSE; + } + } else { + ASSERT(MUTEX_HELD(&connp->conn_s2c_hash->ilb_conn_hash_lock)); + if (tcpha->tha_flags & TH_FIN) { + connp->conn_s2c_tcp_fss = seq + seg_len; + connp->conn_s2c_tcp_fin_sent = B_TRUE; + } + connp->conn_s2c_tcp_ack = ack; + + /* Port reuse by the client, restart the conn. */ + if (connp->conn_s2c_tcp_fin_sent && + SEQ_GT(seq, connp->conn_s2c_tcp_fss + 1)) { + connp->conn_s2c_tcp_fin_sent = B_FALSE; + connp->conn_s2c_tcp_fin_acked = B_FALSE; + } + } + + return (B_TRUE); +} + +/* + * Helper routint to find conn hash entry given some packet information and + * the traffic direction (c2s, client to server?) + */ +static boolean_t +ilb_find_conn(ilb_stack_t *ilbs, void *iph, void *tph, int l4, in6_addr_t *src, + in_port_t sport, in6_addr_t *dst, in_port_t dport, + ilb_rule_info_t *rule_cache, uint32_t *ip_sum, uint32_t *tp_sum, + int32_t pkt_len, boolean_t c2s) +{ + ilb_conn_hash_t *hash; + uint_t i; + ilb_conn_t *connp; + boolean_t tcp_alive; + boolean_t ret = B_FALSE; + + i = ILB_CONN_HASH((uint8_t *)&src->s6_addr32[3], ntohs(sport), + (uint8_t *)&dst->s6_addr32[3], ntohs(dport), + ilbs->ilbs_conn_hash_size); + if (c2s) { + hash = ilbs->ilbs_c2s_conn_hash; + mutex_enter(&hash[i].ilb_conn_hash_lock); + for (connp = hash[i].ilb_connp; connp != NULL; + connp = connp->conn_c2s_next) { + if (connp->conn_l4 == l4 && + connp->conn_c2s_dport == dport && + connp->conn_c2s_sport == sport && + IN6_ARE_ADDR_EQUAL(src, &connp->conn_c2s_saddr) && + IN6_ARE_ADDR_EQUAL(dst, &connp->conn_c2s_daddr)) { + connp->conn_c2s_atime = lbolt64; + connp->conn_c2s_pkt_cnt++; + *rule_cache = connp->conn_rule_cache; + *ip_sum = connp->conn_c2s_ip_sum; + *tp_sum = connp->conn_c2s_tp_sum; + ret = B_TRUE; + break; + } + } + } else { + hash = ilbs->ilbs_s2c_conn_hash; + mutex_enter(&hash[i].ilb_conn_hash_lock); + for (connp = hash[i].ilb_connp; connp != NULL; + connp = connp->conn_s2c_next) { + if (connp->conn_l4 == l4 && + connp->conn_s2c_dport == dport && + connp->conn_s2c_sport == sport && + IN6_ARE_ADDR_EQUAL(src, &connp->conn_s2c_saddr) && + IN6_ARE_ADDR_EQUAL(dst, &connp->conn_s2c_daddr)) { + connp->conn_s2c_atime = lbolt64; + connp->conn_s2c_pkt_cnt++; + *rule_cache = connp->conn_rule_cache; + *ip_sum = connp->conn_s2c_ip_sum; + *tp_sum = connp->conn_s2c_tp_sum; + ret = B_TRUE; + break; + } + } + } + if (ret) { + ILB_S_KSTAT(connp->conn_server, pkt_processed); + ILB_S_KSTAT_UPDATE(connp->conn_server, bytes_processed, + pkt_len); + + switch (l4) { + case (IPPROTO_TCP): + tcp_alive = update_conn_tcp(connp, iph, tph, pkt_len, + c2s); + if (!tcp_alive) { + connp->conn_gc = B_TRUE; + } + break; + default: + break; + } + } + mutex_exit(&hash[i].ilb_conn_hash_lock); + + return (ret); +} + +/* + * To check if a give packet matches an existing conn hash entry. If it + * does, return the information about this entry so that the caller can + * do the proper NAT. + */ +boolean_t +ilb_check_conn(ilb_stack_t *ilbs, int l3, void *iph, int l4, void *tph, + in6_addr_t *src, in6_addr_t *dst, in_port_t sport, in_port_t dport, + uint32_t pkt_len, in6_addr_t *lb_dst) +{ + ilb_rule_info_t rule_cache; + uint32_t adj_ip_sum, adj_tp_sum; + boolean_t ret; + + /* Check the incoming hash table. */ + if (ilb_find_conn(ilbs, iph, tph, l4, src, sport, dst, dport, + &rule_cache, &adj_ip_sum, &adj_tp_sum, pkt_len, B_TRUE)) { + switch (rule_cache.topo) { + case ILB_TOPO_IMPL_NAT: + *lb_dst = rule_cache.info.nat_dst; + ilb_full_nat(l3, iph, l4, tph, &rule_cache.info, + adj_ip_sum, adj_tp_sum, B_TRUE); + ret = B_TRUE; + break; + case ILB_TOPO_IMPL_HALF_NAT: + *lb_dst = rule_cache.info.nat_dst; + ilb_half_nat(l3, iph, l4, tph, &rule_cache.info, + adj_ip_sum, adj_tp_sum, B_TRUE); + ret = B_TRUE; + break; + default: + ret = B_FALSE; + break; + } + return (ret); + } + if (ilb_find_conn(ilbs, iph, tph, l4, src, sport, dst, dport, + &rule_cache, &adj_ip_sum, &adj_tp_sum, pkt_len, B_FALSE)) { + switch (rule_cache.topo) { + case ILB_TOPO_IMPL_NAT: + *lb_dst = rule_cache.info.src; + ilb_full_nat(l3, iph, l4, tph, &rule_cache.info, + adj_ip_sum, adj_tp_sum, B_FALSE); + ret = B_TRUE; + break; + case ILB_TOPO_IMPL_HALF_NAT: + *lb_dst = *dst; + ilb_half_nat(l3, iph, l4, tph, &rule_cache.info, + adj_ip_sum, adj_tp_sum, B_FALSE); + ret = B_TRUE; + break; + default: + ret = B_FALSE; + break; + } + return (ret); + } + + return (B_FALSE); +} + +/* + * To check if an ICMP packet belongs to a connection in one of the conn + * hash entries. + */ +boolean_t +ilb_check_icmp_conn(ilb_stack_t *ilbs, mblk_t *mp, int l3, void *out_iph, + void *icmph, in6_addr_t *lb_dst) +{ + ilb_conn_hash_t *hash; + ipha_t *in_iph4; + ip6_t *in_iph6; + icmph_t *icmph4; + icmp6_t *icmph6; + in6_addr_t *in_src_p, *in_dst_p; + in_port_t *sport, *dport; + int l4; + uint_t i; + ilb_conn_t *connp; + ilb_rule_info_t rule_cache; + uint32_t adj_ip_sum; + boolean_t full_nat; + + if (l3 == IPPROTO_IP) { + in6_addr_t in_src, in_dst; + + icmph4 = (icmph_t *)icmph; + in_iph4 = (ipha_t *)&icmph4[1]; + + if ((uint8_t *)in_iph4 + IPH_HDR_LENGTH(in_iph4) + + ICMP_MIN_TP_HDR_LEN > mp->b_wptr) { + return (B_FALSE); + } + + IN6_IPADDR_TO_V4MAPPED(in_iph4->ipha_src, &in_src); + in_src_p = &in_src; + IN6_IPADDR_TO_V4MAPPED(in_iph4->ipha_dst, &in_dst); + in_dst_p = &in_dst; + + l4 = in_iph4->ipha_protocol; + if (l4 != IPPROTO_TCP && l4 != IPPROTO_UDP) + return (B_FALSE); + + sport = (in_port_t *)((char *)in_iph4 + + IPH_HDR_LENGTH(in_iph4)); + dport = sport + 1; + + DTRACE_PROBE4(ilb__chk__icmp__conn__v4, uint32_t, + in_iph4->ipha_src, uint32_t, in_iph4->ipha_dst, uint16_t, + ntohs(*sport), uint16_t, ntohs(*dport)); + } else { + ASSERT(l3 == IPPROTO_IPV6); + + icmph6 = (icmp6_t *)icmph; + in_iph6 = (ip6_t *)&icmph6[1]; + in_src_p = &in_iph6->ip6_src; + in_dst_p = &in_iph6->ip6_dst; + + if ((uint8_t *)in_iph6 + sizeof (ip6_t) + + ICMP_MIN_TP_HDR_LEN > mp->b_wptr) { + return (B_FALSE); + } + + l4 = in_iph6->ip6_nxt; + /* We don't go deep inside an IPv6 packet yet. */ + if (l4 != IPPROTO_TCP && l4 != IPPROTO_UDP) + return (B_FALSE); + + sport = (in_port_t *)&in_iph6[1]; + dport = sport + 1; + + DTRACE_PROBE4(ilb__chk__icmp__conn__v6, in6_addr_t *, + &in_iph6->ip6_src, in6_addr_t *, &in_iph6->ip6_dst, + uint16_t, ntohs(*sport), uint16_t, ntohs(*dport)); + } + + i = ILB_CONN_HASH((uint8_t *)&in_dst_p->s6_addr32[3], ntohs(*dport), + (uint8_t *)&in_src_p->s6_addr32[3], ntohs(*sport), + ilbs->ilbs_conn_hash_size); + hash = ilbs->ilbs_c2s_conn_hash; + + mutex_enter(&hash[i].ilb_conn_hash_lock); + for (connp = hash[i].ilb_connp; connp != NULL; + connp = connp->conn_c2s_next) { + if (connp->conn_l4 == l4 && + connp->conn_c2s_dport == *sport && + connp->conn_c2s_sport == *dport && + IN6_ARE_ADDR_EQUAL(in_dst_p, &connp->conn_c2s_saddr) && + IN6_ARE_ADDR_EQUAL(in_src_p, &connp->conn_c2s_daddr)) { + connp->conn_c2s_atime = lbolt64; + connp->conn_c2s_pkt_cnt++; + rule_cache = connp->conn_rule_cache; + adj_ip_sum = connp->conn_c2s_ip_sum; + break; + } + } + mutex_exit(&hash[i].ilb_conn_hash_lock); + + if (connp == NULL) { + DTRACE_PROBE(ilb__chk__icmp__conn__failed); + return (B_FALSE); + } + + switch (rule_cache.topo) { + case ILB_TOPO_IMPL_NAT: + full_nat = B_TRUE; + break; + case ILB_TOPO_IMPL_HALF_NAT: + full_nat = B_FALSE; + break; + default: + return (B_FALSE); + } + + *lb_dst = rule_cache.info.nat_dst; + if (l3 == IPPROTO_IP) { + ilb_nat_icmpv4(mp, out_iph, icmph4, in_iph4, sport, dport, + &rule_cache.info, adj_ip_sum, full_nat); + } else { + ilb_nat_icmpv6(mp, out_iph, icmph6, in_iph6, sport, dport, + &rule_cache.info, full_nat); + } + return (B_TRUE); +} + +/* + * This routine sends up the conn hash table to user land. Note that the + * request is an ioctl, hence we cannot really differentiate requests + * from different clients. There is no context shared between different + * ioctls. Here we make the assumption that the user land ilbd will + * only allow one client to show the conn hash table at any time. + * Otherwise, the results will be "very" inconsistent. + * + * In each ioctl, a flag (ILB_LIST_BEGIN) indicates whether the client wants + * to read from the beginning of the able. After a certain entries + * are reported, the kernel remembers the position of the last returned + * entry. When the next ioctl comes in with the ILB_LIST_BEGIN flag, + * it will return entries starting from where it was left off. When + * the end of table is reached, a flag (ILB_LIST_END) is set to tell + * the client that there is no more entry. + * + * It is assumed that the caller has checked the size of nat so that it + * can hold num entries. + */ +/* ARGSUSED */ +int +ilb_list_nat(ilb_stack_t *ilbs, zoneid_t zoneid, ilb_nat_entry_t *nat, + uint32_t *num, uint32_t *flags) +{ + ilb_conn_hash_t *hash; + ilb_conn_t *cur_connp; + uint32_t i, j; + int ret = 0; + + mutex_enter(&ilbs->ilbs_conn_list_lock); + while (ilbs->ilbs_conn_list_busy) { + if (cv_wait_sig(&ilbs->ilbs_conn_list_cv, + &ilbs->ilbs_conn_list_lock) == 0) { + mutex_exit(&ilbs->ilbs_conn_list_lock); + return (EINTR); + } + } + if ((hash = ilbs->ilbs_c2s_conn_hash) == NULL) { + ASSERT(ilbs->ilbs_s2c_conn_hash == NULL); + mutex_exit(&ilbs->ilbs_conn_list_lock); + *num = 0; + *flags |= ILB_LIST_END; + return (0); + } + ilbs->ilbs_conn_list_busy = B_TRUE; + mutex_exit(&ilbs->ilbs_conn_list_lock); + + if (*flags & ILB_LIST_BEGIN) { + i = 0; + mutex_enter(&hash[0].ilb_conn_hash_lock); + cur_connp = hash[0].ilb_connp; + } else if (*flags & ILB_LIST_CONT) { + if (ilbs->ilbs_conn_list_cur == ilbs->ilbs_conn_hash_size) { + *num = 0; + *flags |= ILB_LIST_END; + goto done; + } + i = ilbs->ilbs_conn_list_cur; + mutex_enter(&hash[i].ilb_conn_hash_lock); + cur_connp = ilbs->ilbs_conn_list_connp; + } else { + ret = EINVAL; + goto done; + } + + j = 0; + while (j < *num) { + if (cur_connp == NULL) { + mutex_exit(&hash[i].ilb_conn_hash_lock); + if (++i == ilbs->ilbs_conn_hash_size) { + *flags |= ILB_LIST_END; + break; + } + mutex_enter(&hash[i].ilb_conn_hash_lock); + cur_connp = hash[i].ilb_connp; + continue; + } + nat[j].proto = cur_connp->conn_l4; + + nat[j].in_global = cur_connp->conn_c2s_daddr; + nat[j].in_global_port = cur_connp->conn_c2s_dport; + nat[j].out_global = cur_connp->conn_c2s_saddr; + nat[j].out_global_port = cur_connp->conn_c2s_sport; + + nat[j].in_local = cur_connp->conn_s2c_saddr; + nat[j].in_local_port = cur_connp->conn_s2c_sport; + nat[j].out_local = cur_connp->conn_s2c_daddr; + nat[j].out_local_port = cur_connp->conn_s2c_dport; + + nat[j].create_time = TICK_TO_MSEC(cur_connp->conn_cr_time); + nat[j].last_access_time = + TICK_TO_MSEC(cur_connp->conn_c2s_atime); + + /* + * The conn_s2c_pkt_cnt may not be accurate since we are not + * holding the s2c hash lock. + */ + nat[j].pkt_cnt = cur_connp->conn_c2s_pkt_cnt + + cur_connp->conn_s2c_pkt_cnt; + j++; + + cur_connp = cur_connp->conn_c2s_next; + } + ilbs->ilbs_conn_list_connp = cur_connp; + if (j == *num) + mutex_exit(&hash[i].ilb_conn_hash_lock); + + ilbs->ilbs_conn_list_cur = i; + + *num = j; +done: + mutex_enter(&ilbs->ilbs_conn_list_lock); + ilbs->ilbs_conn_list_busy = B_FALSE; + cv_signal(&ilbs->ilbs_conn_list_cv); + mutex_exit(&ilbs->ilbs_conn_list_lock); + + return (ret); +} + + +/* + * Stickiness (persistence) handling routines. + */ + + +static void +ilb_sticky_cache_init(void) +{ + ilb_sticky_cache = kmem_cache_create("ilb_sticky_cache", + sizeof (ilb_sticky_t), 0, NULL, NULL, NULL, NULL, NULL, + ilb_kmem_flags); +} + +void +ilb_sticky_cache_fini(void) +{ + if (ilb_sticky_cache != NULL) { + kmem_cache_destroy(ilb_sticky_cache); + ilb_sticky_cache = NULL; + } +} + +void +ilb_sticky_refrele(ilb_sticky_t *s) +{ + ILB_STICKY_REFRELE(s); +} + +static ilb_sticky_t * +ilb_sticky_lookup(ilb_sticky_hash_t *hash, ilb_rule_t *rule, in6_addr_t *src) +{ + ilb_sticky_t *s; + + ASSERT(mutex_owned(&hash->sticky_lock)); + + for (s = list_head(&hash->sticky_head); s != NULL; + s = list_next(&hash->sticky_head, s)) { + if (s->rule_instance == rule->ir_ks_instance) { + if (IN6_ARE_ADDR_EQUAL(src, &s->src)) + return (s); + } + } + return (NULL); +} + +static ilb_sticky_t * +ilb_sticky_add(ilb_sticky_hash_t *hash, ilb_rule_t *rule, ilb_server_t *server, + in6_addr_t *src) +{ + ilb_sticky_t *s; + + ASSERT(mutex_owned(&hash->sticky_lock)); + + if ((s = kmem_cache_alloc(ilb_sticky_cache, KM_NOSLEEP)) == NULL) + return (NULL); + + /* + * The rule instance is for handling the scenario when the same + * client talks to different rules at the same time. Stickiness + * is per rule so we can use the rule instance to differentiate + * the client's request. + */ + s->rule_instance = rule->ir_ks_instance; + /* + * Copy the rule name for listing all sticky cache entry. ir_name + * is guaranteed to be NULL terminated. + */ + (void) strcpy(s->rule_name, rule->ir_name); + s->server = server; + + /* + * Grab a ref cnt on the server so that it won't go away while + * it is still in the sticky table. + */ + ILB_SERVER_REFHOLD(server); + s->src = *src; + s->expiry = rule->ir_sticky_expiry; + s->refcnt = 1; + s->hash = hash; + + /* + * There is no need to set atime here since the refcnt is not + * zero. A sticky entry is removed only when the refcnt is + * zero. But just set it here for debugging purpose. The + * atime is set when a refrele is done on a sticky entry. + */ + s->atime = lbolt64; + + list_insert_head(&hash->sticky_head, s); + hash->sticky_cnt++; + return (s); +} + +/* + * This routine checks if there is an existing sticky entry which matches + * a given packet. If there is one, return it. If there is not, create + * a sticky entry using the packet's info. + */ +ilb_server_t * +ilb_sticky_find_add(ilb_stack_t *ilbs, ilb_rule_t *rule, in6_addr_t *src, + ilb_server_t *server, ilb_sticky_t **res, uint16_t *src_ent_idx) +{ + int i; + ilb_sticky_hash_t *hash; + ilb_sticky_t *s; + + ASSERT(server != NULL); + + *res = NULL; + + i = ILB_STICKY_HASH((uint8_t *)&src->s6_addr32[3], + (uint32_t)(uintptr_t)rule, ilbs->ilbs_sticky_hash_size); + hash = &ilbs->ilbs_sticky_hash[i]; + + /* First check if there is already an entry. */ + mutex_enter(&hash->sticky_lock); + s = ilb_sticky_lookup(hash, rule, src); + + /* No sticky entry, add one. */ + if (s == NULL) { +add_new_entry: + s = ilb_sticky_add(hash, rule, server, src); + if (s == NULL) { + mutex_exit(&hash->sticky_lock); + return (NULL); + } + /* + * Find a source for this server. All subseqent requests from + * the same client matching this sticky entry will use this + * source address in doing NAT. The current algorithm is + * simple, rotate the source address. Note that the + * source address array does not change after it's created, so + * it is OK to just increment the cur index. + */ + if (server->iser_nat_src != NULL) { + /* It is a hint, does not need to be atomic. */ + *src_ent_idx = (server->iser_nat_src->cur++ % + server->iser_nat_src->num_src); + s->nat_src_idx = *src_ent_idx; + } + mutex_exit(&hash->sticky_lock); + *res = s; + return (server); + } + + /* + * We don't hold any lock accessing iser_enabled. Refer to the + * comment in ilb_server_add() about iser_lock. + */ + if (!s->server->iser_enabled) { + /* + * s->server == server can only happen if there is a race in + * toggling the iser_enabled flag (we don't hold a lock doing + * that) so that the load balance algorithm still returns a + * disabled server. In this case, just drop the packet... + */ + if (s->server == server) { + mutex_exit(&hash->sticky_lock); + return (NULL); + } + + /* + * The old server is disabled and there is a new server, use + * the new one to create a sticky entry. Since we will + * add the entry at the beginning, subsequent lookup will + * find this new entry instead of the old one. + */ + goto add_new_entry; + } + + s->refcnt++; + *res = s; + mutex_exit(&hash->sticky_lock); + if (server->iser_nat_src != NULL) + *src_ent_idx = s->nat_src_idx; + return (s->server); +} + +static void +ilb_sticky_cleanup(void *arg) +{ + ilb_timer_t *timer = (ilb_timer_t *)arg; + uint32_t i; + ilb_stack_t *ilbs; + ilb_sticky_hash_t *hash; + ilb_sticky_t *s, *nxt_s; + int64_t now, expiry; + + ilbs = timer->ilbs; + hash = ilbs->ilbs_sticky_hash; + ASSERT(hash != NULL); + + now = lbolt64; + for (i = timer->start; i < timer->end; i++) { + mutex_enter(&hash[i].sticky_lock); + for (s = list_head(&hash[i].sticky_head); s != NULL; + s = nxt_s) { + nxt_s = list_next(&hash[i].sticky_head, s); + if (s->refcnt != 0) + continue; + expiry = now - SEC_TO_TICK(s->expiry); + if (s->atime < expiry) { + ILB_SERVER_REFRELE(s->server); + list_remove(&hash[i].sticky_head, s); + kmem_cache_free(ilb_sticky_cache, s); + hash[i].sticky_cnt--; + } + } + mutex_exit(&hash[i].sticky_lock); + } +} + +static void +ilb_sticky_timer(void *arg) +{ + ilb_timer_t *timer = (ilb_timer_t *)arg; + + (void) taskq_dispatch(timer->ilbs->ilbs_sticky_taskq, + ilb_sticky_cleanup, arg, TQ_SLEEP); + mutex_enter(&timer->tid_lock); + if (timer->tid == 0) { + mutex_exit(&timer->tid_lock); + } else { + timer->tid = timeout(ilb_sticky_timer, arg, + SEC_TO_TICK(ilb_sticky_timeout)); + mutex_exit(&timer->tid_lock); + } +} + +void +ilb_sticky_hash_init(ilb_stack_t *ilbs) +{ + extern pri_t minclsyspri; + int i, part; + char tq_name[TASKQ_NAMELEN]; + ilb_timer_t *tm; + + if (ilbs->ilbs_sticky_hash_size & (ilbs->ilbs_sticky_hash_size - 1)) { + for (i = 0; i < 31; i++) { + if (ilbs->ilbs_sticky_hash_size < (1 << i)) + break; + } + ilbs->ilbs_sticky_hash_size = 1 << i; + } + + ilbs->ilbs_sticky_hash = kmem_zalloc(sizeof (ilb_sticky_hash_t) * + ilbs->ilbs_sticky_hash_size, KM_SLEEP); + for (i = 0; i < ilbs->ilbs_sticky_hash_size; i++) { + mutex_init(&ilbs->ilbs_sticky_hash[i].sticky_lock, NULL, + MUTEX_DEFAULT, NULL); + list_create(&ilbs->ilbs_sticky_hash[i].sticky_head, + sizeof (ilb_sticky_t), + offsetof(ilb_sticky_t, list)); + } + + if (ilb_sticky_cache == NULL) + ilb_sticky_cache_init(); + + (void) snprintf(tq_name, sizeof (tq_name), "ilb_sticky_taskq_%p", + ilbs->ilbs_netstack); + ASSERT(ilbs->ilbs_sticky_taskq == NULL); + ilbs->ilbs_sticky_taskq = taskq_create(tq_name, + ilb_sticky_timer_size * 2, minclsyspri, ilb_sticky_timer_size, + ilb_sticky_timer_size * 2, TASKQ_PREPOPULATE|TASKQ_DYNAMIC); + + ASSERT(ilbs->ilbs_sticky_timer_list == NULL); + ilbs->ilbs_sticky_timer_list = kmem_zalloc(sizeof (ilb_timer_t) * + ilb_sticky_timer_size, KM_SLEEP); + part = ilbs->ilbs_sticky_hash_size / ilb_sticky_timer_size + 1; + for (i = 0; i < ilb_sticky_timer_size; i++) { + tm = ilbs->ilbs_sticky_timer_list + i; + tm->start = i * part; + tm->end = i * part + part; + if (tm->end > ilbs->ilbs_sticky_hash_size) + tm->end = ilbs->ilbs_sticky_hash_size; + tm->ilbs = ilbs; + mutex_init(&tm->tid_lock, NULL, MUTEX_DEFAULT, NULL); + /* Spread out the starting execution time of all the timers. */ + tm->tid = timeout(ilb_sticky_timer, tm, + SEC_TO_TICK(ilb_sticky_timeout + i)); + } +} + +void +ilb_sticky_hash_fini(ilb_stack_t *ilbs) +{ + int i; + ilb_sticky_t *s; + + if (ilbs->ilbs_sticky_hash == NULL) + return; + + /* Stop all the timers first. */ + for (i = 0; i < ilb_sticky_timer_size; i++) { + timeout_id_t tid; + + /* Setting tid to 0 tells the timer handler not to restart. */ + mutex_enter(&ilbs->ilbs_sticky_timer_list[i].tid_lock); + tid = ilbs->ilbs_sticky_timer_list[i].tid; + ilbs->ilbs_sticky_timer_list[i].tid = 0; + mutex_exit(&ilbs->ilbs_sticky_timer_list[i].tid_lock); + (void) untimeout(tid); + } + kmem_free(ilbs->ilbs_sticky_timer_list, sizeof (ilb_timer_t) * + ilb_sticky_timer_size); + taskq_destroy(ilbs->ilbs_sticky_taskq); + ilbs->ilbs_sticky_taskq = NULL; + + for (i = 0; i < ilbs->ilbs_sticky_hash_size; i++) { + while ((s = list_head(&ilbs->ilbs_sticky_hash[i].sticky_head)) + != NULL) { + list_remove(&ilbs->ilbs_sticky_hash[i].sticky_head, s); + ILB_SERVER_REFRELE(s->server); + kmem_free(s, sizeof (ilb_sticky_t)); + } + } + kmem_free(ilbs->ilbs_sticky_hash, ilbs->ilbs_sticky_hash_size * + sizeof (ilb_sticky_hash_t)); +} + +/* + * This routine sends up the sticky hash table to user land. Refer to + * the comments before ilb_list_nat(). Both routines assume similar + * conditions. + * + * It is assumed that the caller has checked the size of st so that it + * can hold num entries. + */ +/* ARGSUSED */ +int +ilb_list_sticky(ilb_stack_t *ilbs, zoneid_t zoneid, ilb_sticky_entry_t *st, + uint32_t *num, uint32_t *flags) +{ + ilb_sticky_hash_t *hash; + ilb_sticky_t *curp; + uint32_t i, j; + int ret = 0; + + mutex_enter(&ilbs->ilbs_sticky_list_lock); + while (ilbs->ilbs_sticky_list_busy) { + if (cv_wait_sig(&ilbs->ilbs_sticky_list_cv, + &ilbs->ilbs_sticky_list_lock) == 0) { + mutex_exit(&ilbs->ilbs_sticky_list_lock); + return (EINTR); + } + } + if ((hash = ilbs->ilbs_sticky_hash) == NULL) { + mutex_exit(&ilbs->ilbs_sticky_list_lock); + *num = 0; + *flags |= ILB_LIST_END; + return (0); + } + ilbs->ilbs_sticky_list_busy = B_TRUE; + mutex_exit(&ilbs->ilbs_sticky_list_lock); + + if (*flags & ILB_LIST_BEGIN) { + i = 0; + mutex_enter(&hash[0].sticky_lock); + curp = list_head(&hash[0].sticky_head); + } else if (*flags & ILB_LIST_CONT) { + if (ilbs->ilbs_sticky_list_cur == ilbs->ilbs_sticky_hash_size) { + *num = 0; + *flags |= ILB_LIST_END; + goto done; + } + i = ilbs->ilbs_sticky_list_cur; + mutex_enter(&hash[i].sticky_lock); + curp = ilbs->ilbs_sticky_list_curp; + } else { + ret = EINVAL; + goto done; + } + + j = 0; + while (j < *num) { + if (curp == NULL) { + mutex_exit(&hash[i].sticky_lock); + if (++i == ilbs->ilbs_sticky_hash_size) { + *flags |= ILB_LIST_END; + break; + } + mutex_enter(&hash[i].sticky_lock); + curp = list_head(&hash[i].sticky_head); + continue; + } + (void) strcpy(st[j].rule_name, curp->rule_name); + st[j].req_addr = curp->src; + st[j].srv_addr = curp->server->iser_addr_v6; + st[j].expiry_time = TICK_TO_MSEC(curp->expiry); + j++; + curp = list_next(&hash[i].sticky_head, curp); + } + ilbs->ilbs_sticky_list_curp = curp; + if (j == *num) + mutex_exit(&hash[i].sticky_lock); + + ilbs->ilbs_sticky_list_cur = i; + + *num = j; +done: + mutex_enter(&ilbs->ilbs_sticky_list_lock); + ilbs->ilbs_sticky_list_busy = B_FALSE; + cv_signal(&ilbs->ilbs_sticky_list_cv); + mutex_exit(&ilbs->ilbs_sticky_list_lock); + + return (ret); +} diff --git a/usr/src/uts/common/inet/ilb/ilb_conn.h b/usr/src/uts/common/inet/ilb/ilb_conn.h new file mode 100644 index 0000000000..6be3b7a8d3 --- /dev/null +++ b/usr/src/uts/common/inet/ilb/ilb_conn.h @@ -0,0 +1,246 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _INET_ILB_CONN_H +#define _INET_ILB_CONN_H + +#ifdef __cplusplus +extern "C" { +#endif + +struct ilb_conn_s; + +/* + * Struct of the conn hash table bucket + * + * ilb_connp: the first conn hash entry in the bucket + * ilb_conn_hash_lock: mutex to protect the list in the bucket + * ilb_conn_cnt: number of conn hash entries in this bucket + */ +typedef struct ilb_conn_hash_s { + struct ilb_conn_s *ilb_connp; + kmutex_t ilb_conn_hash_lock; + uint32_t ilb_conn_cnt; +#if defined(_LP64) || defined(_I32LPx) + char ilb_conn_hash_pad[44]; +#else + char ilb_conn_hash_pad[52]; +#endif +} ilb_conn_hash_t; + +/* + * Extracted rule/server info for faster access without holding a reference + * to a rule or server. + */ +typedef struct ilb_rule_info_s { + ilb_topo_impl_t topo; + ilb_nat_info_t info; +} ilb_rule_info_t; + +/* Info about a TCP connection for tracking */ +struct ilb_tcp_track { + uint32_t ack; + uint32_t fss; + boolean_t fin_sent; + boolean_t fin_acked; +}; + +/* + * Struct to store NAT info of a connection (one direction) + * + * conn_daddr: destination address to be matched to find this info + * conn_saddr: source address to be matched + * conn_dport: destination port to be matched + * conn_sport: source port to be matched + * conn_ip_sum: IP checksum adjustment for NAT + * conn_tp_sum: tranport checksum adjustment for NAT + * conn_tcp_track: TCP connection tracking info + * conn_atime: last access time of this conn cache + * conn_pkt_cnt: packets processed using this conn + * conn_next: next conn info (for conn info linked list) + * conn_prev: previous conn info (for conn info linked list) + * conn_hash: back pointer to the conn hash table bucket + */ +struct ilb_conn_info { + in6_addr_t conn_daddr; + in6_addr_t conn_saddr; + in_port_t conn_dport; + in_port_t conn_sport; + uint32_t conn_ip_sum; + uint32_t conn_tp_sum; + + struct ilb_tcp_track conn_tcp_track; + + /* Last access time */ + int64_t conn_atime; + uint64_t conn_pkt_cnt; + + struct ilb_conn_s *conn_next; + struct ilb_conn_s *conn_prev; + ilb_conn_hash_t *conn_hash; +}; + +/* + * Struct (an entry in the conn hash table) to store a NAT info of a + * connection (both directions, client to server and server to client) + * + * conn_l4: transport protocol used in this NAT connection + * conn_expry: expiry time of this entry + * conn_cr_time: creation time of this entry + * conn_c2s: client to back end server info + * conn_s2c_ back end server to client info + * conn_server: pointer to the back end server structure + * conn_rule_cache: rule information needed for this entry (copied from + * the ilb_rule_t struct) + * conn_sticky: pointer to the sticky info of this client, used to do + * reference counting on the sticky info. + * conn_gc: indicates whether this entry needs to be garbage collected + */ +typedef struct ilb_conn_s { + int conn_l4; + + int64_t conn_expiry; + int64_t conn_cr_time; + + /* Client to server, hash and check info */ + struct ilb_conn_info conn_c2s; +#define conn_c2s_daddr conn_c2s.conn_daddr +#define conn_c2s_saddr conn_c2s.conn_saddr +#define conn_c2s_dport conn_c2s.conn_dport +#define conn_c2s_sport conn_c2s.conn_sport +#define conn_c2s_next conn_c2s.conn_next +#define conn_c2s_prev conn_c2s.conn_prev +#define conn_c2s_hash conn_c2s.conn_hash +#define conn_c2s_atime conn_c2s.conn_atime +#define conn_c2s_pkt_cnt conn_c2s.conn_pkt_cnt +#define conn_c2s_ip_sum conn_c2s.conn_ip_sum +#define conn_c2s_tp_sum conn_c2s.conn_tp_sum +#define conn_c2s_tcp_ack conn_c2s.conn_tcp_track.ack +#define conn_c2s_tcp_fss conn_c2s.conn_tcp_track.fss +#define conn_c2s_tcp_fin_sent conn_c2s.conn_tcp_track.fin_sent +#define conn_c2s_tcp_fin_acked conn_c2s.conn_tcp_track.fin_acked + + /* Server to client, hash and check info */ + struct ilb_conn_info conn_s2c; +#define conn_s2c_daddr conn_s2c.conn_daddr +#define conn_s2c_saddr conn_s2c.conn_saddr +#define conn_s2c_dport conn_s2c.conn_dport +#define conn_s2c_sport conn_s2c.conn_sport +#define conn_s2c_next conn_s2c.conn_next +#define conn_s2c_prev conn_s2c.conn_prev +#define conn_s2c_hash conn_s2c.conn_hash +#define conn_s2c_atime conn_s2c.conn_atime +#define conn_s2c_pkt_cnt conn_s2c.conn_pkt_cnt +#define conn_s2c_ip_sum conn_s2c.conn_ip_sum +#define conn_s2c_tp_sum conn_s2c.conn_tp_sum +#define conn_s2c_tcp_ack conn_s2c.conn_tcp_track.ack +#define conn_s2c_tcp_fss conn_s2c.conn_tcp_track.fss +#define conn_s2c_tcp_fin_sent conn_s2c.conn_tcp_track.fin_sent +#define conn_s2c_tcp_fin_acked conn_s2c.conn_tcp_track.fin_acked + + ilb_server_t *conn_server; + ilb_rule_info_t conn_rule_cache; + + /* + * If the rule is sticky enabled, all ilb_conn_t created from this + * rule will have conn_sticky set to the ilb_sticky_t entry. Otherwise + * conn_sticky is NULL. + */ + struct ilb_sticky_s *conn_sticky; + + boolean_t conn_gc; +} ilb_conn_t; + +/* + * Struct of the sticky hash table bucket + * + * sticky_head: the sticky hash list of this bucket + * sticky_lock: mutex to protect the list + * sticki_cnt: number of sticky hash entries in this bucket + */ +typedef struct ilb_sticky_hash_s { + list_t sticky_head; + kmutex_t sticky_lock; + uint32_t sticky_cnt; +#if defined(_LP64) || defined(_I32LPx) + char sticky_pad[20]; +#else + char sticky_pad[36]; +#endif +} ilb_sticky_hash_t; + +/* + * Struct to store sticky info of a client. + * + * rule_instance: the rule instance for this entry, for look up purpose + * rule_name: the rule name for this entry + * server: the back end server for this client + * src: the client source address + * expire: the expiry time of this entry + * atime: the last access time of this entry + * nat_src_idx: the index to the NAT source array for this client + * refcnt: reference count + * list: linked list node + * hash: back pointer to the sticky hash buckey of this entry + */ +typedef struct ilb_sticky_s { + uint_t rule_instance; + char rule_name[ILB_RULE_NAMESZ]; + ilb_server_t *server; + in6_addr_t src; + int64_t expiry; + int64_t atime; + int nat_src_idx; + + uint32_t refcnt; + list_node_t list; + ilb_sticky_hash_t *hash; +} ilb_sticky_t; + +extern void ilb_conn_hash_init(ilb_stack_t *); +extern void ilb_conn_hash_fini(ilb_stack_t *); +extern void ilb_conn_cache_fini(void); +extern void ilb_sticky_hash_init(ilb_stack_t *); +extern void ilb_sticky_hash_fini(ilb_stack_t *); +extern void ilb_sticky_cache_fini(void); + +extern boolean_t ilb_check_conn(ilb_stack_t *, int, void *, int, void *, + in6_addr_t *, in6_addr_t *, in_port_t, in_port_t, uint32_t, in6_addr_t *); +extern boolean_t ilb_check_icmp_conn(ilb_stack_t *, mblk_t *, int, void *, + void *, in6_addr_t *); +extern int ilb_conn_add(ilb_stack_t *, ilb_rule_t *, ilb_server_t *, + in6_addr_t *, in_port_t, in6_addr_t *, in_port_t, ilb_nat_info_t *, + uint32_t *, uint32_t *, struct ilb_sticky_s *); + +extern ilb_server_t *ilb_sticky_find_add(ilb_stack_t *, ilb_rule_t *, + in6_addr_t *, ilb_server_t *, struct ilb_sticky_s **, uint16_t *); +void ilb_sticky_refrele(struct ilb_sticky_s *); + +#ifdef __cplusplus +} +#endif + +#endif /* _INET_ILB_CONN_H */ diff --git a/usr/src/uts/common/inet/ilb/ilb_impl.h b/usr/src/uts/common/inet/ilb/ilb_impl.h new file mode 100644 index 0000000000..226aa77d81 --- /dev/null +++ b/usr/src/uts/common/inet/ilb/ilb_impl.h @@ -0,0 +1,286 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _INET_ILB_IMPL_H +#define _INET_ILB_IMPL_H + +#include <sys/types.h> +#include <sys/kstat.h> +#include <sys/netstack.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Statistics in ILB is stored in several kstat structures. ilb_g_kstat + * represents the global statistics. ilb_rule_kstat represents the statistics + * of a rule. ilb_server_kstat represents the statistics of a server. + */ +#define ILB_KSTAT_MOD_NAME "ilb" + +typedef struct ilb_g_kstat_s { + kstat_named_t num_rules; /* Number of rules */ + kstat_named_t ip_frag_in; /* Number of input fragments */ + kstat_named_t ip_frag_dropped; /* Number of fragments dropped */ +} ilb_g_kstat_t; + +#define ILB_KSTAT_UPDATE(ilbs, x, y) \ +{ \ + DTRACE_PROBE1(ilb__g__kstat__##x, ilb_stack_t *, \ + (ilbs)); \ + ((ilbs)->ilbs_kstat->x.value.ui64 += (y)); \ +} + +typedef struct ilb_rule_kstat { + kstat_named_t num_servers; /* Number of back end servers */ + kstat_named_t bytes_not_processed; /* Num of bytes not processed. */ + kstat_named_t pkt_not_processed; /* Num of packets not processed. */ + kstat_named_t bytes_dropped; /* Number of bytes dropped */ + kstat_named_t pkt_dropped; /* Number of packets dropped */ + kstat_named_t nomem_bytes_dropped; /* Bytes dropped due to nomem */ + kstat_named_t nomem_pkt_dropped; /* Packets dropped due to nomem */ + kstat_named_t noport_bytes_dropped; /* No NAT sport bytes drop */ + kstat_named_t noport_pkt_dropped; /* No NAT sport packet drop */ + kstat_named_t icmp_echo_processed; /* No of ICMP echo processed */ + kstat_named_t icmp_dropped; /* No of ICMP packets dropped */ + kstat_named_t icmp_2big_processed; /* No of ICMP 2big processed */ + kstat_named_t icmp_2big_dropped; /* No of ICMP 2big dropped */ +} ilb_rule_kstat_t; + +#define ILB_R_KSTAT(rule, x) \ +{ \ + DTRACE_PROBE1(ilb__r__kstat__##x, ilb_rule_t *, \ + (rule)); \ + ((rule)->ir_kstat.x.value.ui64++); \ +} +#define ILB_R_KSTAT_UPDATE(rule, x, y) \ +{ \ + DTRACE_PROBE1(ilb__r__kstat__##x, ilb_rule_t *, \ + (rule)); \ + ((rule)->ir_kstat.x.value.ui64 += (y)); \ +} + +typedef struct ilb_server_kstat { + kstat_named_t bytes_processed; /* Number of bytes processed */ + kstat_named_t pkt_processed; /* Number of packets processed */ + kstat_named_t ip_address; /* IP address of the server */ +} ilb_server_kstat_t; + +#define ILB_S_KSTAT(host, x) \ +{ \ + DTRACE_PROBE1(ilb__s__kstat__##x, ilb_server_t *, \ + (host)); \ + ((host)->iser_kstat.x.value.ui64++); \ +} +#define ILB_S_KSTAT_UPDATE(host, x, y) \ +{ \ + DTRACE_PROBE1(ilb__s__kstat__##x, ilb_server_t *, \ + (host)); \ + ((host)->iser_kstat.x.value.ui64 += (y)); \ +} + +/* The maximum port range, meaning all ports (65535 - 1). */ +#define ILB_ALL_PORTS_RANGE 65534 + +struct ilb_nat_src_s; + +/* + * This structure reprensents a server. + */ +typedef struct ilb_server_s { + in6_addr_t iser_addr_v6; + in6_addr_t iser_prefix_v6; +#define iser_addr_v4 iser_addr_v6.s6_addr32[3] +#define iser_prefix_v4 iser_prefix_v6.s6_addr32[3] + + boolean_t iser_port_range; + in_port_t iser_min_port; /* In host byte order */ + in_port_t iser_max_port; + + char iser_name[ILB_SERVER_NAMESZ]; + char iser_ip_addr[INET6_ADDRSTRLEN]; + netstackid_t iser_stackid; + kstat_t *iser_ksp; + ilb_server_kstat_t iser_kstat; + struct ilb_server_s *iser_next; + + boolean_t iser_enabled; + kmutex_t iser_lock; + kcondvar_t iser_cv; + uint64_t iser_refcnt; + + int64_t iser_die_time; + + struct ilb_nat_src_s *iser_nat_src; +} ilb_server_t; + +#define ILB_SERVER_REFHOLD(host) \ +{ \ + mutex_enter(&(host)->iser_lock); \ + (host)->iser_refcnt++; \ + ASSERT((host)->iser_refcnt != 1); \ + mutex_exit(&(host)->iser_lock); \ +} + +#define ILB_SERVER_REFRELE(host) \ +{ \ + mutex_enter(&(host)->iser_lock); \ + (host)->iser_refcnt--; \ + if ((host)->iser_refcnt == 1) \ + cv_signal(&(host)->iser_cv); \ + mutex_exit(&(host)->iser_lock); \ +} + +struct ilb_rule_s; +struct ilb_hash_s; + +typedef struct ilb_alg_data_s { + boolean_t (*ilb_alg_lb)(in6_addr_t *, in_port_t, in6_addr_t *, + in_port_t, void *, ilb_server_t **); + int (*ilb_alg_server_add)(ilb_server_t *, void *); + int (*ilb_alg_server_del)(ilb_server_t *, void *); + int (*ilb_alg_server_enable)(ilb_server_t *, void *); + int (*ilb_alg_server_disable)(ilb_server_t *, void *); + void (*ilb_alg_fini)(struct ilb_alg_data_s **); + + void *ilb_alg_data; +} ilb_alg_data_t; + +/* + * A load balance rule has + * + * 1. a name + * 2. a network protocol + * 3. a transport protocol + * 4. a load balance mechanism (DSR, NAT, ...) + * 5. a target address (VIP) + * 6. a target port (or port ranges) + * 7. a pool of back end servers + * 8. a load balance algorithm (round robin, hashing, ...) + */ +typedef struct ilb_rule_s { + char ir_name[ILB_RULE_NAMESZ]; + uint8_t ir_ipver; + uint8_t ir_proto; + ilb_topo_impl_t ir_topo; + zoneid_t ir_zoneid; + uint32_t ir_flags; + + in6_addr_t ir_target_v6; +#define ir_target_v4 ir_target_v6.s6_addr32[3] + in6_addr_t ir_prefix_v6; +#define ir_target_prefix_v4 ir_prefix_v6.s6_addr32[3] + + boolean_t ir_port_range; + in_port_t ir_min_port; /* In host byte order */ + in_port_t ir_max_port; + + ilb_server_t *ir_servers; + + uint32_t ir_nat_expiry; + uint32_t ir_conn_drain_timeout; + in6_addr_t ir_nat_src_start; + in6_addr_t ir_nat_src_end; + + boolean_t ir_sticky; + in6_addr_t ir_sticky_mask; + uint32_t ir_sticky_expiry; + + struct ilb_rule_s *ir_next; + + struct ilb_rule_s *ir_hash_next; + struct ilb_rule_s *ir_hash_prev; + struct ilb_hash_s *ir_hash; + + ilb_algo_impl_t ir_alg_type; + ilb_alg_data_t *ir_alg; + + kstat_t *ir_ksp; + ilb_rule_kstat_t ir_kstat; + uint_t ir_ks_instance; + + kmutex_t ir_lock; + kcondvar_t ir_cv; + uint32_t ir_refcnt; +} ilb_rule_t; + +#define ILB_RULE_REFHOLD(rule) \ +{ \ + mutex_enter(&(rule)->ir_lock); \ + (rule)->ir_refcnt++; \ + ASSERT((rule)->ir_refcnt != 1); \ + mutex_exit(&(rule)->ir_lock); \ +} + +#define ILB_RULE_REFRELE(rule) \ +{ \ + mutex_enter(&(rule)->ir_lock); \ + ASSERT((rule)->ir_refcnt >= 2); \ + if (--(rule)->ir_refcnt <= 2) \ + cv_signal(&(rule)->ir_cv); \ + mutex_exit(&(rule)->ir_lock); \ +} + + +typedef struct ilb_hash_s { + ilb_rule_t *ilb_hash_rule; + kmutex_t ilb_hash_lock; +#if defined(_LP64) || defined(_I32LPx) + char ilb_hash_pad[48]; +#else + char ilb_hash_pad[56]; +#endif +} ilb_hash_t; + +struct ilb_nat_src_entry_s; + +/* + * Structure to store NAT info. + * + * Half NAT only uses the first 4 fields in the structure. + */ +typedef struct { + in6_addr_t vip; + in6_addr_t nat_dst; + in_port_t dport; + in_port_t nat_dport; + + in6_addr_t src; + in6_addr_t nat_src; + in_port_t sport; + in_port_t nat_sport; + + struct ilb_nat_src_entry_s *src_ent; +} ilb_nat_info_t; + +extern int ilb_kmem_flags; + +#ifdef __cplusplus +} +#endif + +#endif /* _INET_ILB_IMPL_H */ diff --git a/usr/src/uts/common/inet/ilb/ilb_nat.c b/usr/src/uts/common/inet/ilb/ilb_nat.c new file mode 100644 index 0000000000..0be473fb12 --- /dev/null +++ b/usr/src/uts/common/inet/ilb/ilb_nat.c @@ -0,0 +1,609 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/types.h> +#include <sys/cmn_err.h> +#include <sys/crc32.h> +#include <netinet/in.h> +#include <inet/ip.h> +#include <inet/ip6.h> +#include <inet/tcp.h> +#include <inet/udp_impl.h> +#include <inet/ilb.h> + +#include "ilb_impl.h" +#include "ilb_stack.h" +#include "ilb_nat.h" + +/* + * NAT source entry garbarge collection timeout. The actual timeout value + * includes a random jitter bounded by the ILB_NAT_SRC_TIMEOUT_JITTER. + */ +#define ILB_NAT_SRC_TIMEOUT 30 +#define ILB_NAT_SRC_TIMEOUT_JITTER 5 + +/* key1/2 are assumed to be uint32_t. */ +#define ILB_NAT_SRC_HASH(hash, key1, key2, hash_size) \ +{ \ + CRC32((hash), (key1), sizeof (uint32_t), -1U, crc32_table); \ + CRC32((hash), (key2), sizeof (uint32_t), (hash), crc32_table); \ + (hash) %= (hash_size); \ +} + +/* NAT source port space instance number. */ +static uint32_t ilb_nat_src_instance = 0; + +static void +incr_addr(in6_addr_t *a) +{ + uint32_t i; + + i = ntohl(a->s6_addr32[3]); + if (IN6_IS_ADDR_V4MAPPED(a)) { + a->s6_addr32[3] = htonl(++i); + ASSERT(i != 0); + return; + } + + if (++i != 0) { + a->s6_addr32[3] = htonl(i); + return; + } + a->s6_addr32[3] = 0; + i = ntohl(a->s6_addr[2]); + if (++i != 0) { + a->s6_addr32[2] = htonl(i); + return; + } + a->s6_addr32[2] = 0; + i = ntohl(a->s6_addr[1]); + if (++i != 0) { + a->s6_addr32[1] = htonl(i); + return; + } + a->s6_addr32[1] = 0; + i = ntohl(a->s6_addr[0]); + a->s6_addr[0] = htonl(++i); + ASSERT(i != 0); +} + +/* + * When ILB does full NAT, it first picks one source address from the rule's + * specified NAT source address list (currently done in round robin fashion). + * Then it needs to allocate a port. This source port must make the tuple + * (source address:source port:destination address:destination port) + * unique. The destination part of the tuple is determined by the back + * end server, and could not be changed. + * + * To handle the above source port number allocation, ILB sets up a table + * of entries identified by source address:back end server address:server port + * tuple. This table is used by all rules for NAT source port allocation. + * Each tuple has an associated vmem arena used for managing the NAT source + * port space between the source address and back end server address/port. + * Each back end server (ilb_server_t) has an array of pointers (iser_nat_src) + * to the different entries in this table for NAT source port allocation. + * When ILB needs to allocate a NAT source address and port to talk to a back + * end server, it picks a source address and uses the array pointer to get + * to an entry. Then it calls vmem_alloc() on the associated vmem arena to + * find an unused port. + * + * When a back end server is added, ILB sets up the aforementioned array. + * For each source address specified in the rule, ILB checks if there is any + * existing entry which matches this source address:back end server address: + * port tuple. The server port is either a specific port or 0 (meaning wild + * card port). Normally, a back end server uses the same port as in the rule. + * If a back end server is used to serve two different rules, there will be + * two different ports. Source port allocation for these two rules do not + * conflict, hence we can use two vmem arenas (two different entries in the + * table). But if a server uses port range in one rule, we will treat it as + * a wild card port. Wild card poart matches with any port. If this server + * is used to serve more than one rules and those rules use the same set of + * NAT source addresses, this means that they must share the same set of vmem + * arenas (source port spaces). We do this for simplicity reason. If not, + * we need to partition the port range so that we can identify different forms + * of source port number collision. + */ + +/* + * NAT source address initialization routine. + */ +void +ilb_nat_src_init(ilb_stack_t *ilbs) +{ + int i; + + ilbs->ilbs_nat_src_tid = timeout(ilb_nat_src_timer, ilbs, + SEC_TO_TICK(ILB_NAT_SRC_TIMEOUT + + gethrtime() % ILB_NAT_SRC_TIMEOUT_JITTER)); + ilbs->ilbs_nat_src = kmem_zalloc(sizeof (ilb_nat_src_hash_t) * + ilbs->ilbs_nat_src_hash_size, KM_SLEEP); + for (i = 0; i < ilbs->ilbs_nat_src_hash_size; i++) { + list_create(&ilbs->ilbs_nat_src[i].nsh_head, + sizeof (ilb_nat_src_entry_t), + offsetof(ilb_nat_src_entry_t, nse_link)); + mutex_init(&ilbs->ilbs_nat_src[i].nsh_lock, NULL, + MUTEX_DEFAULT, NULL); + } +} + +/* + * NAT source address clean up routine. + */ +void +ilb_nat_src_fini(ilb_stack_t *ilbs) +{ + ilb_nat_src_entry_t *cur; + timeout_id_t tid; + int i; + + /* + * By setting ilbs_nat_src_tid to 0, the timer handler will not + * restart the timer. + */ + mutex_enter(&ilbs->ilbs_nat_src_lock); + tid = ilbs->ilbs_nat_src_tid; + ilbs->ilbs_nat_src_tid = 0; + mutex_exit(&ilbs->ilbs_nat_src_lock); + if (tid != 0) + (void) untimeout(tid); + + mutex_destroy(&ilbs->ilbs_nat_src_lock); + + for (i = 0; i < ilbs->ilbs_nat_src_hash_size; i++) { + while ((cur = list_remove_head(&ilbs->ilbs_nat_src[i].nsh_head)) + != NULL) { + vmem_destroy(cur->nse_port_arena); + kmem_free(cur, sizeof (ilb_nat_src_entry_t)); + } + mutex_destroy(&ilbs->ilbs_nat_src[i].nsh_lock); + } + + kmem_free(ilbs->ilbs_nat_src, sizeof (ilb_nat_src_hash_t) * + ilbs->ilbs_nat_src_hash_size); + ilbs->ilbs_nat_src = NULL; +} + +/* An arena name is "ilb_ns" + "_xxxxxxxxxx" */ +#define ARENA_NAMESZ 18 +#define NAT_PORT_START 4096 +#define NAT_PORT_SIZE 65535 - NAT_PORT_START + +/* + * Check if the NAT source and back end server pair ilb_nat_src_entry_t + * exists. If it does, increment the refcnt and return it. If not, create + * one and return it. + */ +static ilb_nat_src_entry_t * +ilb_find_nat_src(ilb_stack_t *ilbs, const in6_addr_t *nat_src, + const in6_addr_t *serv_addr, in_port_t port) +{ + ilb_nat_src_entry_t *tmp; + uint32_t idx; + char arena_name[ARENA_NAMESZ]; + list_t *head; + + ILB_NAT_SRC_HASH(idx, &nat_src->s6_addr32[3], &serv_addr->s6_addr32[3], + ilbs->ilbs_nat_src_hash_size); + mutex_enter(&ilbs->ilbs_nat_src[idx].nsh_lock); + head = &ilbs->ilbs_nat_src[idx].nsh_head; + for (tmp = list_head(head); tmp != NULL; tmp = list_next(head, tmp)) { + if (IN6_ARE_ADDR_EQUAL(&tmp->nse_src_addr, nat_src) && + IN6_ARE_ADDR_EQUAL(&tmp->nse_serv_addr, serv_addr) && + (port == tmp->nse_port || port == 0 || + tmp->nse_port == 0)) { + break; + } + } + /* Found one, return it. */ + if (tmp != NULL) { + tmp->nse_refcnt++; + mutex_exit(&ilbs->ilbs_nat_src[idx].nsh_lock); + return (tmp); + } + + tmp = kmem_alloc(sizeof (ilb_nat_src_entry_t), KM_NOSLEEP); + if (tmp == NULL) { + mutex_exit(&ilbs->ilbs_nat_src[idx].nsh_lock); + return (NULL); + } + tmp->nse_src_addr = *nat_src; + tmp->nse_serv_addr = *serv_addr; + tmp->nse_port = port; + tmp->nse_nsh_lock = &ilbs->ilbs_nat_src[idx].nsh_lock; + tmp->nse_refcnt = 1; + + (void) snprintf(arena_name, ARENA_NAMESZ, "ilb_ns_%u", + atomic_add_32_nv(&ilb_nat_src_instance, 1)); + if ((tmp->nse_port_arena = vmem_create(arena_name, + (void *)NAT_PORT_START, NAT_PORT_SIZE, 1, NULL, NULL, NULL, 1, + VM_SLEEP | VMC_IDENTIFIER)) == NULL) { + kmem_free(tmp, sizeof (*tmp)); + return (NULL); + } + + list_insert_tail(head, tmp); + mutex_exit(&ilbs->ilbs_nat_src[idx].nsh_lock); + + return (tmp); +} + +/* + * Create ilb_nat_src_t struct for a ilb_server_t struct. + */ +int +ilb_create_nat_src(ilb_stack_t *ilbs, ilb_nat_src_t **nat_src, + const in6_addr_t *srv_addr, in_port_t port, const in6_addr_t *start, + int num) +{ + ilb_nat_src_t *src; + in6_addr_t cur_addr; + int i; + + if ((src = kmem_zalloc(sizeof (ilb_nat_src_t), KM_NOSLEEP)) == NULL) { + *nat_src = NULL; + return (ENOMEM); + } + cur_addr = *start; + for (i = 0; i < num && i < ILB_MAX_NAT_SRC; i++) { + src->src_list[i] = ilb_find_nat_src(ilbs, &cur_addr, srv_addr, + port); + if (src->src_list[i] == NULL) { + ilb_destroy_nat_src(&src); + *nat_src = NULL; + return (ENOMEM); + } + incr_addr(&cur_addr); + /* + * Increment num_src here so that we can call + * ilb_destroy_nat_src() when we need to do cleanup. + */ + src->num_src++; + } + *nat_src = src; + return (0); +} + +/* + * Timer routine for garbage collecting unneeded NAT source entry. We + * don't use a taskq for this since the table should be relatively small + * and should be OK for a timer to handle. + */ +void +ilb_nat_src_timer(void *arg) +{ + ilb_stack_t *ilbs = (ilb_stack_t *)arg; + ilb_nat_src_entry_t *cur, *tmp; + list_t *head; + int i; + + for (i = 0; i < ilbs->ilbs_nat_src_hash_size; i++) { + mutex_enter(&ilbs->ilbs_nat_src[i].nsh_lock); + head = &ilbs->ilbs_nat_src[i].nsh_head; + cur = list_head(head); + while (cur != NULL) { + /* + * When a server is removed, it will release its + * reference on an entry. But there may still be + * conn using some ports. So check the size also. + */ + if (cur->nse_refcnt != 0 || + vmem_size(cur->nse_port_arena, VMEM_ALLOC) != 0) { + cur = list_next(head, cur); + continue; + } + tmp = cur; + cur = list_next(head, cur); + list_remove(head, tmp); + vmem_destroy(tmp->nse_port_arena); + kmem_free(tmp, sizeof (ilb_nat_src_entry_t)); + } + mutex_exit(&ilbs->ilbs_nat_src[i].nsh_lock); + } + + mutex_enter(&ilbs->ilbs_nat_src_lock); + if (ilbs->ilbs_nat_src_tid == 0) { + mutex_exit(&ilbs->ilbs_nat_src_lock); + } else { + ilbs->ilbs_nat_src_tid = timeout(ilb_nat_src_timer, ilbs, + SEC_TO_TICK(ILB_NAT_SRC_TIMEOUT + + gethrtime() % ILB_NAT_SRC_TIMEOUT_JITTER)); + mutex_exit(&ilbs->ilbs_nat_src_lock); + } +} + +/* + * Destroy a given ilb_nat_src_t struct. It will also release the reference + * hold on all its ilb_nat_src_entry_t. + */ +void +ilb_destroy_nat_src(ilb_nat_src_t **nat_src) +{ + int i, size; + ilb_nat_src_t *src; + ilb_nat_src_entry_t *entry; + + src = *nat_src; + if (src == NULL) + return; + size = src->num_src; + /* + * Set each entry to be condemned and the garbarge collector will + * clean them up. + */ + for (i = 0; i < size; i++) { + entry = src->src_list[i]; + mutex_enter(entry->nse_nsh_lock); + entry->nse_refcnt--; + mutex_exit(entry->nse_nsh_lock); + } + kmem_free(src, sizeof (ilb_nat_src_t)); + *nat_src = NULL; +} + +/* + * Given a backend server address and its ilb_nat_src_t, allocate a source + * address and port for NAT usage. + */ +ilb_nat_src_entry_t * +ilb_alloc_nat_addr(ilb_nat_src_t *src, in6_addr_t *addr, in_port_t *port, + uint16_t *nat_src_idx) +{ + int i, try, size; + in_port_t p; + + size = src->num_src; + /* Increment of cur does not need to be atomic. It is just a hint. */ + if (nat_src_idx == NULL) + i = (++src->cur) % size; + else + i = *nat_src_idx; + + for (try = 0; try < size; try++) { + p = (in_port_t)(uintptr_t)vmem_alloc( + src->src_list[i]->nse_port_arena, 1, VM_NOSLEEP); + if (p != 0) + break; + /* + * If an index is given and we cannot allocate a port using + * that entry, return NULL. + */ + if (nat_src_idx != NULL) + return (NULL); + i = (i + 1) % size; + } + if (try == size) + return (NULL); + *addr = src->src_list[i]->nse_src_addr; + *port = htons(p); + return (src->src_list[i]); +} + +/* + * Use the pre-calculated checksum to adjust the checksum of a packet after + * NAT. + */ +static void +adj_cksum(uint16_t *chksum, uint32_t adj_sum) +{ + adj_sum += (uint16_t)~(*chksum); + while ((adj_sum >> 16) != 0) + adj_sum = (adj_sum & 0xffff) + (adj_sum >> 16); + *chksum = (uint16_t)~adj_sum; +} + +/* Do full NAT (replace both source and desination info) on a packet. */ +void +ilb_full_nat(int l3, void *iph, int l4, void *tph, ilb_nat_info_t *info, + uint32_t adj_ip_sum, uint32_t adj_tp_sum, boolean_t c2s) +{ + in_port_t *orig_sport, *orig_dport; + uint16_t *tp_cksum; + + switch (l4) { + case IPPROTO_TCP: + orig_sport = &((tcpha_t *)tph)->tha_lport; + orig_dport = &((tcpha_t *)tph)->tha_fport; + tp_cksum = &((tcpha_t *)tph)->tha_sum; + break; + case IPPROTO_UDP: + orig_sport = &((udpha_t *)tph)->uha_src_port; + orig_dport = &((udpha_t *)tph)->uha_dst_port; + tp_cksum = &((udpha_t *)tph)->uha_checksum; + break; + default: + ASSERT(0); + return; + } + + switch (l3) { + case IPPROTO_IP: { + ipha_t *ipha; + + ipha = iph; + if (c2s) { + IN6_V4MAPPED_TO_IPADDR(&info->nat_src, + ipha->ipha_src); + IN6_V4MAPPED_TO_IPADDR(&info->nat_dst, + ipha->ipha_dst); + *orig_sport = info->nat_sport; + *orig_dport = info->nat_dport; + } else { + IN6_V4MAPPED_TO_IPADDR(&info->vip, ipha->ipha_src); + IN6_V4MAPPED_TO_IPADDR(&info->src, ipha->ipha_dst); + *orig_sport = info->dport; + *orig_dport = info->sport; + } + adj_cksum(&ipha->ipha_hdr_checksum, adj_ip_sum); + adj_cksum(tp_cksum, adj_tp_sum); + break; + } + case IPPROTO_IPV6: { + ip6_t *ip6h; + + ip6h = iph; + if (c2s) { + ip6h->ip6_src = info->nat_src; + ip6h->ip6_dst = info->nat_dst; + *orig_sport = info->nat_sport; + *orig_dport = info->nat_dport; + } else { + ip6h->ip6_src = info->vip; + ip6h->ip6_dst = info->src; + *orig_sport = info->dport; + *orig_dport = info->sport; + } + /* No checksum for IPv6 header */ + adj_cksum(tp_cksum, adj_tp_sum); + break; + } + default: + ASSERT(0); + break; + } +} + +/* Do half NAT (only replace the destination info) on a packet. */ +void +ilb_half_nat(int l3, void *iph, int l4, void *tph, ilb_nat_info_t *info, + uint32_t adj_ip_sum, uint32_t adj_tp_sum, boolean_t c2s) +{ + in_port_t *orig_port; + uint16_t *tp_cksum; + + switch (l4) { + case IPPROTO_TCP: + if (c2s) + orig_port = &((tcpha_t *)tph)->tha_fport; + else + orig_port = &((tcpha_t *)tph)->tha_lport; + tp_cksum = &((tcpha_t *)tph)->tha_sum; + break; + case IPPROTO_UDP: + if (c2s) + orig_port = &((udpha_t *)tph)->uha_dst_port; + else + orig_port = &((udpha_t *)tph)->uha_src_port; + tp_cksum = &((udpha_t *)tph)->uha_checksum; + break; + default: + ASSERT(0); + return; + } + + switch (l3) { + case IPPROTO_IP: { + ipha_t *ipha; + + ipha = iph; + if (c2s) { + IN6_V4MAPPED_TO_IPADDR(&info->nat_dst, + ipha->ipha_dst); + *orig_port = info->nat_dport; + } else { + IN6_V4MAPPED_TO_IPADDR(&info->vip, ipha->ipha_src); + *orig_port = info->dport; + } + adj_cksum(&ipha->ipha_hdr_checksum, adj_ip_sum); + adj_cksum(tp_cksum, adj_tp_sum); + break; + } + case IPPROTO_IPV6: { + ip6_t *ip6h; + + ip6h = iph; + if (c2s) { + ip6h->ip6_dst = info->nat_dst; + *orig_port = info->nat_dport; + } else { + ip6h->ip6_src = info->vip; + *orig_port = info->dport; + } + /* No checksum for IPv6 header */ + adj_cksum(tp_cksum, adj_tp_sum); + break; + } + default: + ASSERT(0); + break; + } +} + +/* Calculate the IPv6 pseudo checksum, used for ICMPv6 NAT. */ +uint32_t +ilb_pseudo_sum_v6(ip6_t *ip6h, uint8_t nxt_hdr) +{ + uint32_t sum; + uint16_t *cur; + + cur = (uint16_t *)&ip6h->ip6_src; + sum = cur[0] + cur[1] + cur[2] + cur[3] + cur[4] + cur[5] + cur[6] + + cur[7] + cur[8] + cur[9] + cur[10] + cur[11] + cur[12] + cur[13] + + cur[14] + cur[15] + htons(nxt_hdr); + return ((sum & 0xffff) + (sum >> 16)); +} + +/* Do NAT on an ICMPv4 packet. */ +void +ilb_nat_icmpv4(mblk_t *mp, ipha_t *out_iph, icmph_t *icmph, ipha_t *in_iph, + in_port_t *sport, in_port_t *dport, ilb_nat_info_t *info, uint32_t sum, + boolean_t full_nat) +{ + if (full_nat) { + IN6_V4MAPPED_TO_IPADDR(&info->nat_src, out_iph->ipha_src); + IN6_V4MAPPED_TO_IPADDR(&info->nat_src, in_iph->ipha_dst); + *dport = info->nat_sport; + } + IN6_V4MAPPED_TO_IPADDR(&info->nat_dst, out_iph->ipha_dst); + adj_cksum(&out_iph->ipha_hdr_checksum, sum); + IN6_V4MAPPED_TO_IPADDR(&info->nat_dst, in_iph->ipha_src); + *sport = info->nat_dport; + + icmph->icmph_checksum = 0; + icmph->icmph_checksum = IP_CSUM(mp, IPH_HDR_LENGTH(out_iph), 0); +} + +/* Do NAT on an ICMPv6 packet. */ +void +ilb_nat_icmpv6(mblk_t *mp, ip6_t *out_ip6h, icmp6_t *icmp6h, ip6_t *in_ip6h, + in_port_t *sport, in_port_t *dport, ilb_nat_info_t *info, + boolean_t full_nat) +{ + int hdr_len; + + if (full_nat) { + out_ip6h->ip6_src = info->nat_src; + in_ip6h->ip6_dst = info->nat_src; + *dport = info->nat_sport; + } + out_ip6h->ip6_dst = info->nat_dst; + in_ip6h->ip6_src = info->nat_dst; + *sport = info->nat_dport; + + icmp6h->icmp6_cksum = out_ip6h->ip6_plen; + hdr_len = (char *)icmp6h - (char *)out_ip6h; + icmp6h->icmp6_cksum = IP_CSUM(mp, hdr_len, + ilb_pseudo_sum_v6(out_ip6h, IPPROTO_ICMPV6)); +} diff --git a/usr/src/uts/common/inet/ilb/ilb_nat.h b/usr/src/uts/common/inet/ilb/ilb_nat.h new file mode 100644 index 0000000000..6adaf965ec --- /dev/null +++ b/usr/src/uts/common/inet/ilb/ilb_nat.h @@ -0,0 +1,93 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _INET_ILB_NAT_H +#define _INET_ILB_NAT_H + +#include <sys/vmem.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* Maximum number of NAT source address of a rule. */ +#define ILB_MAX_NAT_SRC 10 + +/* NAT source address hash table. */ +typedef struct ilb_nat_src_hash_s { + list_t nsh_head; + kmutex_t nsh_lock; + char nsh_pad[64 - sizeof (list_t) - sizeof (kmutex_t)]; +} ilb_nat_src_hash_t; + +/* + * NAT source entry. Hold the port space for a source addr/back end server + * pair. + */ +typedef struct ilb_nat_src_entry_s { + in6_addr_t nse_src_addr; + in6_addr_t nse_serv_addr; + in_port_t nse_port; + vmem_t *nse_port_arena; + uint32_t nse_refcnt; + kmutex_t *nse_nsh_lock; + list_node_t nse_link; +} ilb_nat_src_entry_t; + +/* Struct to hold all NAT source entry of a back end server. */ +typedef struct ilb_nat_src_s { + uint16_t cur; + uint16_t num_src; + ilb_nat_src_entry_t *src_list[ILB_MAX_NAT_SRC]; +} ilb_nat_src_t; + +extern int ilb_create_nat_src(ilb_stack_t *ilbs, ilb_nat_src_t **, + const in6_addr_t *, in_port_t, const in6_addr_t *, int); +extern void ilb_destroy_nat_src(ilb_nat_src_t **); +extern void ilb_nat_src_timer(void *); +extern void ilb_nat_src_init(ilb_stack_t *); +extern void ilb_nat_src_fini(ilb_stack_t *); + +extern ilb_nat_src_entry_t *ilb_alloc_nat_addr(ilb_nat_src_t *, in6_addr_t *, + in_port_t *, uint16_t *); + +extern void ilb_full_nat(int, void *, int, void *, ilb_nat_info_t *, uint32_t, + uint32_t, boolean_t); +extern void ilb_half_nat(int, void *, int, void *, ilb_nat_info_t *, uint32_t, + uint32_t, boolean_t); + +extern void ilb_nat_icmpv4(mblk_t *, ipha_t *, icmph_t *, ipha_t *, + in_port_t *, in_port_t *, ilb_nat_info_t *, uint32_t, boolean_t); +extern void ilb_nat_icmpv6(mblk_t *, ip6_t *, icmp6_t *, ip6_t *, + in_port_t *, in_port_t *, ilb_nat_info_t *, boolean_t); + +extern uint32_t ilb_pseudo_sum_v6(ip6_t *, uint8_t); + +#ifdef __cplusplus +} +#endif + +#endif /* _INET_ILB_NAT_H */ diff --git a/usr/src/uts/common/inet/ilb/ilb_stack.h b/usr/src/uts/common/inet/ilb/ilb_stack.h new file mode 100644 index 0000000000..6cb034c7f6 --- /dev/null +++ b/usr/src/uts/common/inet/ilb/ilb_stack.h @@ -0,0 +1,134 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _INET_ILB_STACK_H +#define _INET_ILB_STACK_H + +#include <sys/netstack.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct ilb_rule_s; +struct ilb_hash_s; +struct ilb_timer_s; +struct ilb_conn_s; +struct ilb_conn_hash_s; +struct ilb_sticky_s; +struct ilb_sticky_hash_s; +struct ilb_g_kstat_s; +struct ilb_nat_src_hash_s; + +/* Per network stack ILB information */ +typedef struct ilb_stack { + netstack_t *ilbs_netstack; + + /* + * Rule info in a network stack. + * + * ilbs_rule_head: list of all rules + * ilbs_g_lock: lock to protect the rule list + * ilbs_rule_hash_size: size of the rule hash table + * ilbs_g_hash: the rule hash table + * ilbs_rule_taskq: taskq for rule related delayed processing + */ + struct ilb_rule_s *ilbs_rule_head; + kmutex_t ilbs_g_lock; + size_t ilbs_rule_hash_size; + struct ilb_hash_s *ilbs_g_hash; + taskq_t *ilbs_rule_taskq; + + /* + * NAT connection cache info + * + * ilbs_conn_hash_szie: size of the conn cache hash table + * ilbs_c2s_conn_hash: client to server conn cache hash table + * ilbs_s2c_conn_hash: server to client conn cache hash table + * ilbs_conn_timer_list: list of all timers for handling conn cache + * ilbs_conn_taskq: taskq for conn cache related delayed processing + */ + size_t ilbs_conn_hash_size; + struct ilb_conn_hash_s *ilbs_c2s_conn_hash; + struct ilb_conn_hash_s *ilbs_s2c_conn_hash; + struct ilb_timer_s *ilbs_conn_timer_list; + taskq_t *ilbs_conn_taskq; + + /* + * Sticky (persistent) cache info + * + * ilbs_sticky_hash_szie: size of the sticky cache hash table + * ilbs_sticky_hash: sticky cache hash table + * ilbs_sticky_timer_list: list of all timers for handling sticky cache + * ilbs_sticky_taskq: taskq for sticky cache related delayed processing + */ + size_t ilbs_sticky_hash_size; + struct ilb_sticky_hash_s *ilbs_sticky_hash; + struct ilb_timer_s *ilbs_sticky_timer_list; + taskq_t *ilbs_sticky_taskq; + + /* + * Info of NAT source address for + * + * ilbs_nat_src: NAT source hash table + * ilbs_nat_src_hash_size: size of the NAT source hash table + * ilbs_nat_src_lock: lock for protecting ilbs_nat_src_tid + * ilbs_nat_src_tid: ID of the timer handling garbage colllection + */ + struct ilb_nat_src_hash_s *ilbs_nat_src; + size_t ilbs_nat_src_hash_size; + kmutex_t ilbs_nat_src_lock; + timeout_id_t ilbs_nat_src_tid; + + /* NAT conn cache and sticky cache listing related info */ + + /* Lock to ensure that all nat listing ops are serialized */ + kmutex_t ilbs_conn_list_lock; + kcondvar_t ilbs_conn_list_cv; + boolean_t ilbs_conn_list_busy; + /* Current position for listing all conn hash entries */ + size_t ilbs_conn_list_cur; + struct ilb_conn_s *ilbs_conn_list_connp; + + /* Lock to ensure that all sticky listing ops are serialized */ + kmutex_t ilbs_sticky_list_lock; + kcondvar_t ilbs_sticky_list_cv; + boolean_t ilbs_sticky_list_busy; + /* Current position for listing all sticky hash entries */ + size_t ilbs_sticky_list_cur; + struct ilb_sticky_s *ilbs_sticky_list_curp; + + /* Stack wide ILB kstat */ + kstat_t *ilbs_ksp; + struct ilb_g_kstat_s *ilbs_kstat; +} ilb_stack_t; + + +#ifdef __cplusplus +} +#endif + +#endif /* _INET_ILB_STACK_H */ diff --git a/usr/src/uts/common/inet/ilb_ip.h b/usr/src/uts/common/inet/ilb_ip.h new file mode 100644 index 0000000000..16dddbb427 --- /dev/null +++ b/usr/src/uts/common/inet/ilb_ip.h @@ -0,0 +1,101 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +#ifndef _INET_ILB_IP_H +#define _INET_ILB_IP_H + +#include <inet/ilb.h> +#include <inet/ilb/ilb_stack.h> +#include <inet/ilb/ilb_impl.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern void ilb_ddi_g_init(void); +extern void ilb_ddi_g_destroy(void); + +/* Return values of ilb_check_*() */ +#define ILB_DROPPED 1 /* Caller should drop the packet. */ +#define ILB_PASSED 2 /* No load balanced rule is matched. */ +#define ILB_BALANCED 3 /* A rule is matached. */ + +extern boolean_t ilb_has_rules(ilb_stack_t *); + +extern int ilb_check_v4(ilb_stack_t *, ill_t *, mblk_t *, ipha_t *, int, + uint8_t *, ipaddr_t *); +extern int ilb_check_v6(ilb_stack_t *, ill_t *, mblk_t *, ip6_t *, int, + uint8_t *, in6_addr_t *); +extern boolean_t ilb_rule_match_vip_v4(ilb_stack_t *, ipaddr_t, ilb_rule_t **); +extern boolean_t ilb_rule_match_vip_v6(ilb_stack_t *, in6_addr_t *, + ilb_rule_t **); + +extern int ip_sioctl_ilb_cmd(ipif_t *, sin_t *, queue_t *, mblk_t *, + ip_ioctl_cmd_t *, void *); + +extern int ilb_rule_add(ilb_stack_t *, zoneid_t, const ilb_rule_cmd_t *); +extern int ilb_rule_del(ilb_stack_t *, zoneid_t, const char *); +extern void ilb_rule_del_all(ilb_stack_t *, zoneid_t); +extern int ilb_rule_enable(ilb_stack_t *, zoneid_t, const char *, + ilb_rule_t *); +extern void ilb_rule_enable_all(ilb_stack_t *, zoneid_t); +extern int ilb_rule_disable(ilb_stack_t *, zoneid_t, const char *, + ilb_rule_t *); +extern void ilb_rule_disable_all(ilb_stack_t *, zoneid_t); +extern int ilb_rule_list(ilb_stack_t *, zoneid_t, ilb_rule_cmd_t *); + +extern void ilb_get_num_rules(ilb_stack_t *, zoneid_t, uint32_t *); +extern int ilb_get_num_servers(ilb_stack_t *, zoneid_t, const char *, + uint32_t *); +extern ilb_rule_t *ilb_find_rule(ilb_stack_t *, zoneid_t, const char *, int *); +extern void ilb_get_rulenames(ilb_stack_t *, zoneid_t, uint32_t *, + char *); +extern int ilb_get_servers(ilb_stack_t *, zoneid_t, const char *, + ilb_server_info_t *, uint32_t *); + +extern int ilb_server_add(ilb_stack_t *, ilb_rule_t *, ilb_server_info_t *); +extern int ilb_server_del(ilb_stack_t *, zoneid_t, const char *, + ilb_rule_t *, in6_addr_t *); +extern int ilb_server_enable(ilb_stack_t *, zoneid_t, const char *, + ilb_rule_t *, in6_addr_t *); +extern int ilb_server_disable(ilb_stack_t *, zoneid_t, const char *, + ilb_rule_t *, in6_addr_t *); + +extern int ilb_list_nat(ilb_stack_t *, zoneid_t, ilb_nat_entry_t *, + uint32_t *, uint32_t *); +extern int ilb_list_sticky(ilb_stack_t *, zoneid_t, ilb_sticky_entry_t *, + uint32_t *, uint32_t *); + +/* Currently supported transport protocol. */ +#define ILB_SUPP_L4(proto) \ + ((proto) == IPPROTO_TCP || (proto) == IPPROTO_UDP || \ + (proto) == IPPROTO_ICMP || (proto) == IPPROTO_ICMPV6) + + +#ifdef __cplusplus +} +#endif + +#endif /* _INET_ILB_IP_H */ diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c index b72218e471..ebb89e3172 100644 --- a/usr/src/uts/common/inet/ip/ip.c +++ b/usr/src/uts/common/inet/ip/ip.c @@ -101,6 +101,7 @@ #include <inet/iptun/iptun_impl.h> #include <inet/ipdrop.h> #include <inet/ip_netinfo.h> +#include <inet/ilb_ip.h> #include <sys/ethernet.h> #include <net/if_types.h> @@ -1345,6 +1346,10 @@ ip_ioctl_cmd_t ip_ndx_ioctl_table[] = { /* SIOCSENABLESDP is handled by SDP */ /* 183 */ { IPI_DONTCARE /* SIOCSENABLESDP */, 0, 0, 0, NULL, NULL }, /* 184 */ { IPI_DONTCARE /* SIOCSQPTR */, 0, 0, 0, NULL, NULL }, + /* 185 */ { IPI_DONTCARE /* SIOCGIFHWADDR */, 0, 0, 0, NULL, NULL }, + /* 186 */ { IPI_DONTCARE /* SIOCGSTAMP */, 0, 0, 0, NULL, NULL }, + /* 187 */ { SIOCILB, 0, IPI_PRIV | IPI_GET_CMD, MISC_CMD, + ip_sioctl_ilb_cmd, NULL }, }; int ip_ndx_ioctl_count = sizeof (ip_ndx_ioctl_table) / sizeof (ip_ioctl_cmd_t); @@ -5661,6 +5666,7 @@ ip_ddi_destroy(void) udp_ddi_g_destroy(); sctp_ddi_g_destroy(); tcp_ddi_g_destroy(); + ilb_ddi_g_destroy(); ipsec_policy_g_destroy(); ipcl_g_destroy(); ip_net_g_destroy(); @@ -5927,6 +5933,7 @@ ip_ddi_init(void) udp_ddi_g_init(); rts_ddi_g_init(); icmp_ddi_g_init(); + ilb_ddi_g_init(); } /* @@ -14829,6 +14836,8 @@ ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, mblk_t *mp; mblk_t *dmp; uint8_t tag; + ilb_stack_t *ilbs; + ipaddr_t lb_dst; ASSERT(mp_chain != NULL); ASSERT(ill != NULL); @@ -14839,6 +14848,7 @@ ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, #define rptr ((uchar_t *)ipha) + ilbs = ipst->ips_netstack->netstack_ilb; while (mp_chain != NULL) { mp = mp_chain; mp_chain = mp_chain->b_next; @@ -15065,6 +15075,62 @@ ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, } /* + * Here we check to see if we machine is setup as + * L3 loadbalancer and if the incoming packet is for a VIP + * + * Check the following: + * - there is at least a rule + * - protocol of the packet is supported + */ + if (ilb_has_rules(ilbs) && ILB_SUPP_L4(ipha->ipha_protocol)) { + int lb_ret; + + /* For convenience, we pull up the mblk. */ + if (mp->b_cont != NULL) { + if (pullupmsg(mp, -1) == 0) { + BUMP_MIB(ill->ill_ip_mib, + ipIfStatsInDiscards); + freemsg(first_mp); + continue; + } + ipha = (ipha_t *)mp->b_rptr; + } + + /* + * We just drop all fragments going to any VIP, at + * least for now.... + */ + if (ntohs(ipha->ipha_fragment_offset_and_flags) & + (IPH_MF | IPH_OFFSET)) { + if (!ilb_rule_match_vip_v4(ilbs, + ipha->ipha_dst, NULL)) { + goto after_ilb; + } + + ILB_KSTAT_UPDATE(ilbs, ip_frag_in, 1); + ILB_KSTAT_UPDATE(ilbs, ip_frag_dropped, 1); + BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); + freemsg(first_mp); + continue; + } + lb_ret = ilb_check_v4(ilbs, ill, mp, ipha, + ipha->ipha_protocol, (uint8_t *)ipha + + IPH_HDR_LENGTH(ipha), &lb_dst); + + if (lb_ret == ILB_DROPPED) { + /* Is this the right counter to increase? */ + BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); + freemsg(first_mp); + continue; + } else if (lb_ret == ILB_BALANCED) { + /* Set the dst to that of the chosen server */ + dst = lb_dst; + DB_CKSUMFLAGS(mp) = 0; + } + } + +after_ilb: + /* * Reuse the cached ire only if the ipha_dst of the previous * packet is the same as the current packet AND it is not * INADDR_ANY. @@ -15399,6 +15465,7 @@ ip_accept_tcp(ill_t *ill, ill_rx_ring_t *ip_ring, squeue_t *target_sqp, mblk_t *uhead = NULL; /* Unaccepted tail */ uint_t ucnt = 0; /* Unaccepted cnt */ ip_stack_t *ipst = ill->ill_ipst; + ilb_stack_t *ilbs = ipst->ips_netstack->netstack_ilb; *cnt = 0; @@ -15407,6 +15474,12 @@ ip_accept_tcp(ill_t *ill, ill_rx_ring_t *ip_ring, squeue_t *target_sqp, TRACE_1(TR_FAC_IP, TR_IP_RPUT_START, "ip_accept_tcp: q %p", q); + /* If ILB is enabled, don't do fast processing. */ + if (ilb_has_rules(ilbs)) { + uhead = mp_chain; + goto all_reject; + } + #define rptr ((uchar_t *)ipha) while (mp_chain != NULL) { @@ -15574,6 +15647,7 @@ local_accept: if (ire != NULL) ire_refrele(ire); +all_reject: if (uhead != NULL) ip_input(ill, ip_ring, uhead, NULL); diff --git a/usr/src/uts/common/inet/ip/ip6.c b/usr/src/uts/common/inet/ip/ip6.c index 6fca667f63..38fe7b2562 100644 --- a/usr/src/uts/common/inet/ip/ip6.c +++ b/usr/src/uts/common/inet/ip/ip6.c @@ -81,6 +81,7 @@ #include <inet/udp_impl.h> #include <inet/sctp/sctp_impl.h> #include <inet/ipp_common.h> +#include <inet/ilb_ip.h> #include <inet/ip_multi.h> #include <inet/ip_if.h> @@ -6922,6 +6923,9 @@ ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, boolean_t cksum_err; mblk_t *mp1; ip_stack_t *ipst = inill->ill_ipst; + ilb_stack_t *ilbs = ipst->ips_netstack->netstack_ilb; + in6_addr_t lb_dst; + int lb_ret = ILB_PASSED; EXTRACT_PKT_MP(mp, first_mp, mctl_present); @@ -7087,8 +7091,32 @@ drop_pkt: BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE | MATCH_IRE_ILL, ipst); } else { - ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, - msg_getlabel(mp), ipst); + if (ilb_has_rules(ilbs) && ILB_SUPP_L4(nexthdr)) { + /* For convenience, we just pull up the mblk. */ + if (mp->b_cont != NULL) { + if (pullupmsg(mp, -1) == 0) { + BUMP_MIB(ill->ill_ip_mib, + ipIfStatsInDiscards); + freemsg(hada_mp); + freemsg(first_mp); + return; + } + hdr_len = pkt_len - remlen; + ip6h = (ip6_t *)mp->b_rptr; + whereptr = (uint8_t *)ip6h + hdr_len; + } + lb_ret = ilb_check_v6(ilbs, ill, mp, ip6h, nexthdr, + whereptr, &lb_dst); + if (lb_ret == ILB_DROPPED) { + BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); + freemsg(hada_mp); + freemsg(first_mp); + return; + } + } + + ire = ire_cache_lookup_v6((lb_ret == ILB_BALANCED) ? &lb_dst : + &ip6h->ip6_dst, ALL_ZONES, msg_getlabel(mp), ipst); if (ire != NULL && ire->ire_stq != NULL && ire->ire_zoneid != GLOBAL_ZONEID && @@ -7139,7 +7167,8 @@ drop_pkt: BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); } mp->b_prev = (mblk_t *)(uintptr_t) ill->ill_phyint->phyint_ifindex; - ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, + ip_newroute_v6(q, mp, (lb_ret == ILB_BALANCED) ? &lb_dst : + &ip6h->ip6_dst, &ip6h->ip6_src, IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, GLOBAL_ZONEID, ipst); return; diff --git a/usr/src/uts/common/inet/ip/ip_if.c b/usr/src/uts/common/inet/ip/ip_if.c index ce8f7e6371..b175f4530f 100644 --- a/usr/src/uts/common/inet/ip/ip_if.c +++ b/usr/src/uts/common/inet/ip/ip_if.c @@ -86,6 +86,7 @@ #include <inet/ip_impl.h> #include <inet/sctp_ip.h> #include <inet/ip_netinfo.h> +#include <inet/ilb_ip.h> #include <net/pfkeyv2.h> #include <inet/ipsec_info.h> @@ -10192,6 +10193,15 @@ ip_sioctl_copyin_setup(queue_t *q, mblk_t *mp) case IP_IOCTL: ip_wput_ioctl(q, mp); return; + + case SIOCILB: + /* The ioctl length varies depending on the ILB command. */ + copyin_size = iocp->ioc_count; + if (copyin_size < sizeof (ilb_cmd_t)) + goto nak; + mi_copyin(q, mp, NULL, copyin_size); + return; + default: cmn_err(CE_PANIC, "should not happen "); } @@ -20341,3 +20351,262 @@ ipif_up_notify(ipif_t *ipif) ill_nic_event_dispatch(ipif->ipif_ill, MAP_IPIF_ID(ipif->ipif_id), NE_LIF_UP, NULL, 0); } + +/* + * ILB ioctl uses cv_wait (such as deleting a rule or adding a server) and + * this assumes the context is cv_wait'able. Hence it shouldnt' be used on + * TPI end points with STREAMS modules pushed above. This is assured by not + * having the IPI_MODOK flag for the ioctl. And IP ensures the ILB ioctl + * never ends up on an ipsq, otherwise we may end up processing the ioctl + * while unwinding from the ispq and that could be a thread from the bottom. + */ +/* ARGSUSED */ +int +ip_sioctl_ilb_cmd(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, + ip_ioctl_cmd_t *ipip, void *arg) +{ + mblk_t *cmd_mp = mp->b_cont->b_cont; + ilb_cmd_t command = *((ilb_cmd_t *)cmd_mp->b_rptr); + int ret = 0; + int i; + size_t size; + ip_stack_t *ipst; + zoneid_t zoneid; + ilb_stack_t *ilbs; + + ipst = CONNQ_TO_IPST(q); + ilbs = ipst->ips_netstack->netstack_ilb; + zoneid = Q_TO_CONN(q)->conn_zoneid; + + switch (command) { + case ILB_CREATE_RULE: { + ilb_rule_cmd_t *cmd = (ilb_rule_cmd_t *)cmd_mp->b_rptr; + + if (MBLKL(cmd_mp) != sizeof (ilb_rule_cmd_t)) { + ret = EINVAL; + break; + } + + ret = ilb_rule_add(ilbs, zoneid, cmd); + break; + } + case ILB_DESTROY_RULE: + case ILB_ENABLE_RULE: + case ILB_DISABLE_RULE: { + ilb_name_cmd_t *cmd = (ilb_name_cmd_t *)cmd_mp->b_rptr; + + if (MBLKL(cmd_mp) != sizeof (ilb_name_cmd_t)) { + ret = EINVAL; + break; + } + + if (cmd->flags & ILB_RULE_ALLRULES) { + if (command == ILB_DESTROY_RULE) { + ilb_rule_del_all(ilbs, zoneid); + break; + } else if (command == ILB_ENABLE_RULE) { + ilb_rule_enable_all(ilbs, zoneid); + break; + } else if (command == ILB_DISABLE_RULE) { + ilb_rule_disable_all(ilbs, zoneid); + break; + } + } else { + if (command == ILB_DESTROY_RULE) { + ret = ilb_rule_del(ilbs, zoneid, cmd->name); + } else if (command == ILB_ENABLE_RULE) { + ret = ilb_rule_enable(ilbs, zoneid, cmd->name, + NULL); + } else if (command == ILB_DISABLE_RULE) { + ret = ilb_rule_disable(ilbs, zoneid, cmd->name, + NULL); + } + } + break; + } + case ILB_NUM_RULES: { + ilb_num_rules_cmd_t *cmd; + + if (MBLKL(cmd_mp) != sizeof (ilb_num_rules_cmd_t)) { + ret = EINVAL; + break; + } + cmd = (ilb_num_rules_cmd_t *)cmd_mp->b_rptr; + ilb_get_num_rules(ilbs, zoneid, &(cmd->num)); + break; + } + case ILB_RULE_NAMES: { + ilb_rule_names_cmd_t *cmd; + + cmd = (ilb_rule_names_cmd_t *)cmd_mp->b_rptr; + if (MBLKL(cmd_mp) < sizeof (ilb_rule_names_cmd_t) || + cmd->num_names == 0) { + ret = EINVAL; + break; + } + size = cmd->num_names * ILB_RULE_NAMESZ; + if (cmd_mp->b_rptr + offsetof(ilb_rule_names_cmd_t, buf) + + size != cmd_mp->b_wptr) { + ret = EINVAL; + break; + } + ilb_get_rulenames(ilbs, zoneid, &cmd->num_names, cmd->buf); + break; + } + case ILB_NUM_SERVERS: { + ilb_num_servers_cmd_t *cmd; + + if (MBLKL(cmd_mp) != sizeof (ilb_num_servers_cmd_t)) { + ret = EINVAL; + break; + } + cmd = (ilb_num_servers_cmd_t *)cmd_mp->b_rptr; + ret = ilb_get_num_servers(ilbs, zoneid, cmd->name, + &(cmd->num)); + break; + } + case ILB_LIST_RULE: { + ilb_rule_cmd_t *cmd = (ilb_rule_cmd_t *)cmd_mp->b_rptr; + + if (MBLKL(cmd_mp) != sizeof (ilb_rule_cmd_t)) { + ret = EINVAL; + break; + } + ret = ilb_rule_list(ilbs, zoneid, cmd); + break; + } + case ILB_LIST_SERVERS: { + ilb_servers_info_cmd_t *cmd; + + cmd = (ilb_servers_info_cmd_t *)cmd_mp->b_rptr; + if (MBLKL(cmd_mp) < sizeof (ilb_servers_info_cmd_t) || + cmd->num_servers == 0) { + ret = EINVAL; + break; + } + size = cmd->num_servers * sizeof (ilb_server_info_t); + if (cmd_mp->b_rptr + offsetof(ilb_servers_info_cmd_t, servers) + + size != cmd_mp->b_wptr) { + ret = EINVAL; + break; + } + + ret = ilb_get_servers(ilbs, zoneid, cmd->name, cmd->servers, + &cmd->num_servers); + break; + } + case ILB_ADD_SERVERS: { + ilb_servers_info_cmd_t *cmd; + ilb_rule_t *rule; + + cmd = (ilb_servers_info_cmd_t *)cmd_mp->b_rptr; + if (MBLKL(cmd_mp) < sizeof (ilb_servers_info_cmd_t)) { + ret = EINVAL; + break; + } + size = cmd->num_servers * sizeof (ilb_server_info_t); + if (cmd_mp->b_rptr + offsetof(ilb_servers_info_cmd_t, servers) + + size != cmd_mp->b_wptr) { + ret = EINVAL; + break; + } + rule = ilb_find_rule(ilbs, zoneid, cmd->name, &ret); + if (rule == NULL) { + ASSERT(ret != 0); + break; + } + for (i = 0; i < cmd->num_servers; i++) { + ilb_server_info_t *s; + + s = &cmd->servers[i]; + s->err = ilb_server_add(ilbs, rule, s); + } + ILB_RULE_REFRELE(rule); + break; + } + case ILB_DEL_SERVERS: + case ILB_ENABLE_SERVERS: + case ILB_DISABLE_SERVERS: { + ilb_servers_cmd_t *cmd; + ilb_rule_t *rule; + int (*f)(); + + cmd = (ilb_servers_cmd_t *)cmd_mp->b_rptr; + if (MBLKL(cmd_mp) < sizeof (ilb_servers_cmd_t)) { + ret = EINVAL; + break; + } + size = cmd->num_servers * sizeof (ilb_server_arg_t); + if (cmd_mp->b_rptr + offsetof(ilb_servers_cmd_t, servers) + + size != cmd_mp->b_wptr) { + ret = EINVAL; + break; + } + + if (command == ILB_DEL_SERVERS) + f = ilb_server_del; + else if (command == ILB_ENABLE_SERVERS) + f = ilb_server_enable; + else if (command == ILB_DISABLE_SERVERS) + f = ilb_server_disable; + + rule = ilb_find_rule(ilbs, zoneid, cmd->name, &ret); + if (rule == NULL) { + ASSERT(ret != 0); + break; + } + + for (i = 0; i < cmd->num_servers; i++) { + ilb_server_arg_t *s; + + s = &cmd->servers[i]; + s->err = f(ilbs, zoneid, NULL, rule, &s->addr); + } + ILB_RULE_REFRELE(rule); + break; + } + case ILB_LIST_NAT_TABLE: { + ilb_list_nat_cmd_t *cmd; + + cmd = (ilb_list_nat_cmd_t *)cmd_mp->b_rptr; + if (MBLKL(cmd_mp) < sizeof (ilb_list_nat_cmd_t)) { + ret = EINVAL; + break; + } + size = cmd->num_nat * sizeof (ilb_nat_entry_t); + if (cmd_mp->b_rptr + offsetof(ilb_list_nat_cmd_t, entries) + + size != cmd_mp->b_wptr) { + ret = EINVAL; + break; + } + + ret = ilb_list_nat(ilbs, zoneid, cmd->entries, &cmd->num_nat, + &cmd->flags); + break; + } + case ILB_LIST_STICKY_TABLE: { + ilb_list_sticky_cmd_t *cmd; + + cmd = (ilb_list_sticky_cmd_t *)cmd_mp->b_rptr; + if (MBLKL(cmd_mp) < sizeof (ilb_list_sticky_cmd_t)) { + ret = EINVAL; + break; + } + size = cmd->num_sticky * sizeof (ilb_sticky_entry_t); + if (cmd_mp->b_rptr + offsetof(ilb_list_sticky_cmd_t, entries) + + size != cmd_mp->b_wptr) { + ret = EINVAL; + break; + } + + ret = ilb_list_sticky(ilbs, zoneid, cmd->entries, + &cmd->num_sticky, &cmd->flags); + break; + } + default: + ret = EINVAL; + break; + } +done: + return (ret); +} diff --git a/usr/src/uts/common/inet/ipnet/ipnet.c b/usr/src/uts/common/inet/ipnet/ipnet.c index f3a3c73374..d9b7cf7768 100644 --- a/usr/src/uts/common/inet/ipnet/ipnet.c +++ b/usr/src/uts/common/inet/ipnet/ipnet.c @@ -1951,16 +1951,17 @@ ipobs_bounce_func(hook_event_token_t token, hook_data_t info, void *arg) mblk_t *mp; hdr = (hook_pkt_observe_t *)info; - mp = dupmsg(hdr->hpo_pkt); - if (mp == NULL) { - mp = copymsg(hdr->hpo_pkt); - if (mp == NULL) { - netstack_t *ns = hdr->hpo_ctx; - ipnet_stack_t *ips = ns->netstack_ipnet; - - IPSK_BUMP(ips, ik_dispatchDupDrop); - return (0); - } + /* + * Code in ip_input() expects that it is the only one accessing the + * packet. + */ + mp = copymsg(hdr->hpo_pkt); + if (mp == NULL) { + netstack_t *ns = hdr->hpo_ctx; + ipnet_stack_t *ips = ns->netstack_ipnet; + + IPSK_BUMP(ips, ik_dispatchDupDrop); + return (0); } hdr = (hook_pkt_observe_t *)mp->b_rptr; diff --git a/usr/src/uts/common/sys/netstack.h b/usr/src/uts/common/sys/netstack.h index 8b13b66599..2c77e1be96 100644 --- a/usr/src/uts/common/sys/netstack.h +++ b/usr/src/uts/common/sys/netstack.h @@ -80,7 +80,8 @@ typedef id_t netstackid_t; #define NS_IPSECAH 15 #define NS_IPSECESP 16 #define NS_IPNET 17 -#define NS_MAX (NS_IPNET+1) +#define NS_ILB 18 +#define NS_MAX (NS_ILB+1) /* * State maintained for each module which tracks the state of @@ -155,6 +156,7 @@ struct netstack { struct ipsecah_stack *nu_ipsecah; struct ipsecesp_stack *nu_ipsecesp; struct ipnet_stack *nu_ipnet; + struct ilb_stack *nu_ilb; } nu_s; } netstack_u; #define netstack_modules netstack_u.nu_modules @@ -176,6 +178,7 @@ struct netstack { #define netstack_ipsecah netstack_u.nu_s.nu_ipsecah #define netstack_ipsecesp netstack_u.nu_s.nu_ipsecesp #define netstack_ipnet netstack_u.nu_s.nu_ipnet +#define netstack_ilb netstack_u.nu_s.nu_ilb nm_state_t netstack_m_state[NS_MAX]; /* module state */ diff --git a/usr/src/uts/common/sys/sockio.h b/usr/src/uts/common/sys/sockio.h index 06b63d2969..0fca7c5c43 100644 --- a/usr/src/uts/common/sys/sockio.h +++ b/usr/src/uts/common/sys/sockio.h @@ -313,6 +313,11 @@ extern "C" { #define SIOCGIFHWADDR _IOWR('i', 185, int) /* PF_PACKET */ #define SIOCGSTAMP _IOWR('i', 186, struct timeval) /* PF_PACKET */ +/* + * Private ioctl for Integrated Load Balancer. The ioctl length varies. + */ +#define SIOCILB _IOWR('i', 187, 0) + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/intel/ip/ip.global-objs.debug64 b/usr/src/uts/intel/ip/ip.global-objs.debug64 index a45c7e5ae9..6009f5b006 100644 --- a/usr/src/uts/intel/ip/ip.global-objs.debug64 +++ b/usr/src/uts/intel/ip/ip.global-objs.debug64 @@ -77,6 +77,22 @@ icmpinfov6 icmprinitv4 icmprinitv6 icmpwinit +ilb_conn_cache +ilb_conn_cache_timeout +ilb_conn_hash_size +ilb_conn_tcp_expiry +ilb_conn_timer_size +ilb_conn_udp_expiry +ilb_kstat_instance +ilb_kmem_flags +ilb_nat_src_hash_size +ilb_nat_src_instance +ilb_rule_hash_size +ilb_sticky_cache +ilb_sticky_hash_size +ilb_sticky_expiry +ilb_sticky_timer_size +ilb_sticky_timeout ill_no_arena ill_null inet_dev_info diff --git a/usr/src/uts/intel/ip/ip.global-objs.obj64 b/usr/src/uts/intel/ip/ip.global-objs.obj64 index b3fb7df1b5..1706a82aa7 100644 --- a/usr/src/uts/intel/ip/ip.global-objs.obj64 +++ b/usr/src/uts/intel/ip/ip.global-objs.obj64 @@ -77,6 +77,22 @@ icmpinfov6 icmprinitv4 icmprinitv6 icmpwinit +ilb_conn_cache +ilb_conn_cache_timeout +ilb_conn_hash_size +ilb_conn_tcp_expiry +ilb_conn_timer_size +ilb_conn_udp_expiry +ilb_kstat_instance +ilb_kmem_flags +ilb_nat_src_hash_size +ilb_nat_src_instance +ilb_rule_hash_size +ilb_sticky_cache +ilb_sticky_hash_size +ilb_sticky_expiry +ilb_sticky_timer_size +ilb_sticky_timeout ill_no_arena ill_null inet_dev_info diff --git a/usr/src/uts/sparc/ip/ip.global-objs.debug64 b/usr/src/uts/sparc/ip/ip.global-objs.debug64 index 419a412037..8df87d813d 100644 --- a/usr/src/uts/sparc/ip/ip.global-objs.debug64 +++ b/usr/src/uts/sparc/ip/ip.global-objs.debug64 @@ -77,6 +77,22 @@ icmpinfov6 icmprinitv4 icmprinitv6 icmpwinit +ilb_conn_cache +ilb_conn_cache_timeout +ilb_conn_hash_size +ilb_conn_tcp_expiry +ilb_conn_timer_size +ilb_conn_udp_expiry +ilb_kstat_instance +ilb_kmem_flags +ilb_nat_src_hash_size +ilb_nat_src_instance +ilb_rule_hash_size +ilb_sticky_cache +ilb_sticky_hash_size +ilb_sticky_expiry +ilb_sticky_timer_size +ilb_sticky_timeout ill_no_arena ill_null inet_dev_info diff --git a/usr/src/uts/sparc/ip/ip.global-objs.obj64 b/usr/src/uts/sparc/ip/ip.global-objs.obj64 index db3c18e64e..3df973b8f9 100644 --- a/usr/src/uts/sparc/ip/ip.global-objs.obj64 +++ b/usr/src/uts/sparc/ip/ip.global-objs.obj64 @@ -77,6 +77,22 @@ icmpinfov6 icmprinitv4 icmprinitv6 icmpwinit +ilb_conn_cache +ilb_conn_cache_timeout +ilb_conn_hash_size +ilb_conn_tcp_expiry +ilb_conn_timer_size +ilb_conn_udp_expiry +ilb_kstat_instance +ilb_kmem_flags +ilb_nat_src_hash_size +ilb_nat_src_instance +ilb_rule_hash_size +ilb_sticky_cache +ilb_sticky_hash_size +ilb_sticky_expiry +ilb_sticky_timer_size +ilb_sticky_timeout ill_no_arena ill_null inet_dev_info |