diff options
author | Sebastien Roy <Sebastien.Roy@Sun.COM> | 2009-09-22 22:04:45 -0400 |
---|---|---|
committer | Sebastien Roy <Sebastien.Roy@Sun.COM> | 2009-09-22 22:04:45 -0400 |
commit | 2b24ab6b3865caeede9eeb9db6b83e1d89dcd1ea (patch) | |
tree | 72c0d7d4e1c44843a86bab6e3ed6f82cfa7356af /usr/src | |
parent | 51fc88a818087605a0e5f11eddb8b66576f72c23 (diff) | |
download | illumos-joyent-2b24ab6b3865caeede9eeb9db6b83e1d89dcd1ea.tar.gz |
PSARC 2009/373 Clearview IP Tunneling
PSARC 2009/410 Datalink Administration from Non-Global Zones
6858533 Clearview IP Tunneling
4861777 *snoop* cannot snoop on tunnel interfaces
5010680 M_IOCTL interface between ip and tun is horribly wrong
5029727 tun prints bogus debug messages when receiving multicast packets on 6to4 tunnels
6835873 dlpi_walk() silently fails in an exclusive zone
4152864 must not allow two tunnels to have the same tsrc/tdst pair
6855902 link and flow kstats are too promiscuous
6218826 need to be able to tunnel into a zone
4505468 network interface names can confuse, lie, and deceive
4524756 tun_wproc() takes up too much stack
6417373 tun_wproc_mdata assertion failures
4627970 scalability problems with IP in IP tunnels
4674797 ifparse_ifspec() will not correctly parse ipv6 tunnels
6509231 dladm should show links in exclusive stack zone
4793233 tun driver should include addr in DL_PHYS_ADDR_ACK for non-zero lengths
6795831 ZONE_*_DATALINK syscalls should take datalink_id_t as argument
6791472 mac module doesn't allow MAC addresses < 6 bytes
6618091 Race condition trips ASSERT() in tun.c's SIOCSLIFNAME path
6837580 bogus mi_active check in mac_set_mtu()
6868083 libinetutil: ofmt_open()'s template argument should be const
6870313 libdladm: needless dladm_init_linkprop() in i_dladm_aggr_up()
6872221 panic in dls_devnet_close() if "mtu" property is being set
4289774 Change to the interface-id does not change IPv6 link-local address
6873561 unable to create links with 31 character link names
6874666 changing a link property can accidentally destroy it
6874682 removing a link attribute corrupts the attribute list
6875167 IPCL_ISV6 conn flag is set but never used
6881764 itp reference leak in ipsec_construct_inverse_acquire()
6881951 dladm delete-vlan can no longer delete persistent-only VLANs
--HG--
rename : usr/src/uts/common/inet/tun.h => usr/src/uts/common/inet/iptun.h
rename : usr/src/uts/common/inet/ip/tun.c => usr/src/uts/common/inet/iptun/iptun.c
rename : usr/src/uts/intel/tun/Makefile => usr/src/uts/intel/iptun/Makefile
rename : usr/src/uts/sparc/tun/Makefile => usr/src/uts/sparc/iptun/Makefile
Diffstat (limited to 'usr/src')
194 files changed, 11732 insertions, 11913 deletions
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/6to4relay.c b/usr/src/cmd/cmd-inet/usr.sbin/6to4relay.c index f0547145b2..34bf605432 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/6to4relay.c +++ b/usr/src/cmd/cmd-inet/usr.sbin/6to4relay.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2002 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/socket.h> #include <sys/stream.h> #include <sys/param.h> @@ -34,7 +31,6 @@ #include <net/if.h> #include <netinet/in.h> #include <arpa/inet.h> -#include <inet/tun.h> #include <locale.h> @@ -46,82 +42,15 @@ #include <string.h> #include <stropts.h> #include <fcntl.h> +#include <libdliptun.h> -/* - * Converts an IPv4 address to a 6to4 /64 route. Address is of the form - * 2002:<V4ADDR>:<SUBNETID>::/64 where SUBNETID will always be 0 and V4ADDR - * equals the input IPv4 address. IN6_V4ADDR_TO_6TO4(v4, v6) creates an - * address of form 2002:<V4ADDR>:<SUBNETID>::<HOSTID>, where SUBNETID equals 0 - * and HOSTID equals 1. For this route, we are not concerned about the - * HOSTID portion of the address, thus it can be set to 0. - * - * void V4ADDR_TO_6TO4_RT(const struct in_addr *v4, in6_addr_t *v6) - */ -#define V4ADDR_TO_6TO4_RT(v4, v6) \ - (IN6_V4ADDR_TO_6TO4(v4, v6), (v6)->_S6_un._S6_u32[3] = 0) - -static void strioctl(int, void *, size_t); -static void getkstatus(ipaddr_t *); -static void printkstatus(void); -static void modifyroute(unsigned int, in6_addr_t *); -static void setkrraddr(ipaddr_t); -static void printerror(char *); static void usage(void); +static dladm_handle_t handle; /* booleans corresponding to command line flags */ -static boolean_t eflag = B_FALSE; -static boolean_t dflag = B_FALSE; -static boolean_t aflag = B_FALSE; - -static int fd = -1; - -/* - * srtioctl(cmd, buf, size) - * - * Passes the contents of 'buf' using the ioctl specified by 'cmd', by way of - * the I_STR ioctl mechanism. The response of the ioctl will be stored in buf - * when this function returns. The input 'size' specifies the size of the - * buffer to be passed. - */ -static void -strioctl(int cmd, void *buf, size_t size) -{ - struct strioctl ioc; - - (void) memset(&ioc, 0, sizeof (ioc)); - - ioc.ic_cmd = cmd; - ioc.ic_timout = 0; - ioc.ic_len = size; - ioc.ic_dp = (char *)buf; - - if (ioctl(fd, I_STR, &ioc) < 0) { - printerror("ioctl (I_STR)"); - (void) close(fd); - exit(EXIT_FAILURE); - /* NOTREACHED */ - } -} - - -/* - * getkstatus(out_addr) - * - * Queries the kernel for the 6to4 Relay Router destination address by sending - * the SIOCG6TO4TUNRRADDR ioctl to the tunnel module using the I_STR ioctl - * mechanism. The value returned, through the ioctl, will be an ipaddr_t - * embedded in a strioctl. Output parameter is set with result. - */ -static void -getkstatus(ipaddr_t *out_addr) -{ - ipaddr_t an_addr; - - /* Get the Relay Router address from the kernel */ - strioctl(SIOCG6TO4TUNRRADDR, &an_addr, sizeof (an_addr)); - - *out_addr = an_addr; /* set output parameter */ -} +static boolean_t eflag = B_FALSE; +static boolean_t dflag = B_FALSE; +static boolean_t aflag = B_FALSE; /* @@ -135,12 +64,20 @@ getkstatus(ipaddr_t *out_addr) static void printkstatus(void) { - ipaddr_t rr_addr; - char buf[INET6_ADDRSTRLEN]; - - getkstatus(&rr_addr); /* get value from kernel */ + struct in_addr rr_addr; + char buf[INET6_ADDRSTRLEN]; + char errstr[DLADM_STRSIZE]; + dladm_status_t status; + + status = dladm_iptun_get6to4relay(handle, &rr_addr); + if (status != DLADM_STATUS_OK) { + (void) fprintf(stderr, gettext("6to4relay: unable to get " + "6to4 relay status: %s\n"), + dladm_status2str(status, errstr)); + return; + } (void) printf("6to4relay: "); - if (rr_addr == INADDR_ANY) { + if (rr_addr.s_addr == INADDR_ANY) { (void) printf(gettext("6to4 Relay Router communication " "support is disabled.\n")); } else { @@ -177,10 +114,9 @@ modifyroute(unsigned int cmd, in6_addr_t *in_gw) /* Open a routing socket for passing route commands */ if ((rtsock = socket(AF_ROUTE, SOCK_RAW, AF_INET)) < 0) { - printerror("socket"); - (void) close(fd); - exit(EXIT_FAILURE); - /* NOTREACHED */ + (void) fprintf(stderr, gettext("6to4relay: unable to modify " + "default IPv6 route: socket: %s\n"), strerror(errno)); + return; } (void) memset(&rt_msg, 0, sizeof (rt_msg)); @@ -222,48 +158,24 @@ modifyroute(unsigned int cmd, in6_addr_t *in_gw) (void) close(rtsock); } -/* - * setkrraddr(in_addr) - * - * Sets the 6to4 Relay Router destination address value in the kernel using - * the SIOCS6TO4TUNRRADDR ioctl using the I_STR ioctl mechanism. - * The address is sent to the kernel, as an ipaddr_t, embedded in an strioctl. - */ -static void -setkrraddr(ipaddr_t in_addr) -{ - /* set Relay Router address */ - strioctl(SIOCS6TO4TUNRRADDR, &in_addr, sizeof (in_addr)); -} - -static void -printerror(char *s) -{ - int sverrno = errno; - - (void) fprintf(stderr, "6to4relay: "); - if (s != NULL) - (void) fprintf(stderr, "%s: ", s); - (void) fprintf(stderr, "%s\n", strerror(sverrno)); -} - static void usage(void) { (void) fprintf(stderr, gettext("usage:\n" - "\t6to4relay\n" - "\t6to4relay -e [-a <addr>]\n" - "\t6to4relay -d\n" - "\t6to4relay -h\n")); + "\t6to4relay\n" + "\t6to4relay -e [-a <addr>]\n" + "\t6to4relay -d\n" + "\t6to4relay -h\n")); } int main(int argc, char **argv) { - int ch; - char *in_addr = NULL; - int ret = EXIT_SUCCESS; + int ch; + char *relay_arg = NULL; + dladm_status_t status; + char errstr[DLADM_STRSIZE]; (void) setlocale(LC_ALL, ""); @@ -272,23 +184,18 @@ main(int argc, char **argv) #endif (void) textdomain(TEXT_DOMAIN); - /* open /dev/ip for use */ - if ((fd = open("/dev/ip", O_RDWR)) == -1) { - printerror(gettext("can't open /dev/ip")); - exit(EXIT_FAILURE); - } - - if (ioctl(fd, I_PUSH, TUN_NAME) < 0) { - printerror("ioctl (I_PUSH)"); - ret = EXIT_FAILURE; - goto done; + if ((status = dladm_open(&handle)) != DLADM_STATUS_OK) { + (void) fprintf(stderr, gettext("6to4relay: error opening " + "dladm handle: %s\n"), dladm_status2str(status, errstr)); + return (EXIT_FAILURE); } - /* If no args are specified, print status as queried from kernel */ + /* If no args are specified, print the current status. */ if (argc < 2) { printkstatus(); - goto done; + return (EXIT_SUCCESS); } + while ((ch = getopt(argc, argv, "ea:dh")) != EOF) { switch (ch) { case 'e': @@ -299,15 +206,14 @@ main(int argc, char **argv) break; case 'a': aflag = B_TRUE; - in_addr = optarg; + relay_arg = optarg; break; case 'h': usage(); - goto done; + return (EXIT_SUCCESS); default: usage(); - ret = EXIT_FAILURE; - goto done; + return (EXIT_FAILURE); } } /* @@ -316,16 +222,15 @@ main(int argc, char **argv) */ if ((aflag && !eflag) || (eflag && dflag)) { usage(); - ret = EXIT_FAILURE; - goto done; + return (EXIT_FAILURE); } /* * Enable Relay Router communication support in the kernel. */ if (eflag) { - struct in_addr current_addr; /* addr currently set in kernel */ - struct in_addr new_addr; /* new addr we plan to set */ + struct in_addr current_addr; + struct in_addr new_addr; in6_addr_t v6_rt; /* @@ -334,92 +239,79 @@ main(int argc, char **argv) */ if (!aflag) { new_addr.s_addr = htonl(INADDR_6TO4RRANYCAST); - - } else if (inet_pton(AF_INET, in_addr, &new_addr) <= 0) { + } else if (inet_pton(AF_INET, relay_arg, &new_addr) <= 0) { (void) fprintf(stderr, gettext("6to4relay: input " "address (%s) is not a valid IPv4 dotted-decimal " - "string.\n"), in_addr); - ret = EXIT_FAILURE; - goto done; + "string.\n"), relay_arg); + return (EXIT_FAILURE); } - /* - * INADDR_ANY has special meaning in the kernel, reject this - * input and exit. - */ - if (new_addr.s_addr == INADDR_ANY) { - (void) fprintf(stderr, gettext("6to4relay: input " - "(0.0.0.0) is not a valid IPv4 unicast " - "address.\n")); - ret = EXIT_FAILURE; - goto done; + status = dladm_iptun_get6to4relay(handle, ¤t_addr); + if (status != DLADM_STATUS_OK) { + (void) fprintf(stderr, gettext("6to4relay: " + "unable to obtain current 6to4 relay address: %s"), + dladm_status2str(status, errstr)); + return (EXIT_FAILURE); } - /* - * get the current Relay Router address from the kernel. - * - * 1. If the current address is INADDR_ANY, set the new - * address in the kernel and add a default IPv6 route using - * the new address. - * - * 2. If the current address is different than the new address, - * set the new address in the kernel, delete the - * old default IPv6 route and add a new default IPv6 route - * (using the new address). - * - * 3. If the kernel address is the same as the one we are - * adding, no additional processing is needed. - */ - getkstatus(¤t_addr.s_addr); - - if (current_addr.s_addr == INADDR_ANY) { - setkrraddr(new_addr.s_addr); - V4ADDR_TO_6TO4_RT(&new_addr, &v6_rt); - modifyroute(RTM_ADD, &v6_rt); - } else if (new_addr.s_addr != current_addr.s_addr) { - setkrraddr(new_addr.s_addr); + if (current_addr.s_addr == new_addr.s_addr) + return (EXIT_SUCCESS); + + status = dladm_iptun_set6to4relay(handle, &new_addr); + if (status != DLADM_STATUS_OK) { + (void) fprintf(stderr, gettext("6to4relay: " + "unable to set the 6to4 relay router address: " + "%s\n"), dladm_status2str(status, errstr)); + return (EXIT_FAILURE); + } + + if (current_addr.s_addr != INADDR_ANY) { /* remove old default IPv6 route */ - V4ADDR_TO_6TO4_RT(¤t_addr, &v6_rt); + IN6_V4ADDR_TO_6TO4(¤t_addr, &v6_rt); modifyroute(RTM_DELETE, &v6_rt); - /* - * Add new default IPv6 route using a 6to4 address - * created from the address we just set in the kernel. - */ - V4ADDR_TO_6TO4_RT(&new_addr, &v6_rt); - modifyroute(RTM_ADD, &v6_rt); } + + IN6_V4ADDR_TO_6TO4(&new_addr, &v6_rt); + modifyroute(RTM_ADD, &v6_rt); } /* * Disable Relay Router communication support in kernel. */ if (dflag) { - struct in_addr current_addr; /* addr currently set in kernel */ + struct in_addr rr_addr; in6_addr_t v6_rt; /* * get Relay Router address from the kernel and delete * default IPv6 route that was added for it. */ - getkstatus(¤t_addr.s_addr); - if (current_addr.s_addr == INADDR_ANY) { - /* - * Feature is already disabled in kernel, no - * additional processing is needed. - */ - goto done; + status = dladm_iptun_get6to4relay(handle, &rr_addr); + if (status != DLADM_STATUS_OK) { + (void) fprintf(stderr, gettext("6to4relay: " + "unable to obtain current 6to4 relay address: %s"), + dladm_status2str(status, errstr)); + return (EXIT_FAILURE); } + if (rr_addr.s_addr == INADDR_ANY) + return (EXIT_SUCCESS); - V4ADDR_TO_6TO4_RT(¤t_addr, &v6_rt); + IN6_V4ADDR_TO_6TO4(&rr_addr, &v6_rt); modifyroute(RTM_DELETE, &v6_rt); /* * INADDR_ANY (0.0.0.0) is used by the kernel to disable Relay * Router communication support. */ - setkrraddr(INADDR_ANY); + rr_addr.s_addr = INADDR_ANY; + status = dladm_iptun_set6to4relay(handle, &rr_addr); + if (status != DLADM_STATUS_OK) { + (void) fprintf(stderr, gettext("6to4relay: " + "unable to disable tunneling to 6to4 relay router: " + "%s\n"), dladm_status2str(status, errstr)); + return (EXIT_FAILURE); + } } -done: - (void) close(fd); - return (ret); + + return (EXIT_SUCCESS); } diff --git a/usr/src/cmd/cmd-inet/usr.sbin/Makefile b/usr/src/cmd/cmd-inet/usr.sbin/Makefile index e29c1765ec..386fedfe7f 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/Makefile +++ b/usr/src/cmd/cmd-inet/usr.sbin/Makefile @@ -54,6 +54,7 @@ NSLPROG= 6to4relay arp gettable hostconfig in.comsat in.rarpd \ CMDPROG= in.telnetd K5PROGS= in.telnetd in.rlogind in.rshd TSNETPROG= route +DLADMPROG= 6to4relay ndd DEFAULTFILES= telnetd.dfl PROGSRCS= $(PROG:%=%.c) @@ -144,12 +145,12 @@ $(K5PROGS) := CPPFLAGS += -I$(SRC)/head \ -I$(SRC)/lib/pam_modules/krb5 LDLIBS += $(K5LIBS) $(TSNETPROG) := LDLIBS += -ltsnet +$(DLADMPROG) := LDLIBS += -ldladm in.rarpd := LDLIBS += -linetutil -ldlpi if_mpadm := LDLIBS += -linetutil -lipmp if_mpadm.po := XGETFLAGS += -a route := CPPFLAGS += -DNDEBUG -ndd := LDLIBS += -ldladm gettable in.comsat := LDFLAGS += $(MAPFILE.NGB:%=-M%) .KEEP_STATE: @@ -227,7 +228,7 @@ clobber: $(SUBDIRS) -$(RM) $(CLEANFILES) $(CLOBBERFILES) lint: $(LINTSUBDIRS) - $(LINT.c) 6to4relay.c $(LDLIBS) -lsocket -lnsl + $(LINT.c) 6to4relay.c $(LDLIBS) -lsocket -ldladm $(LINT.c) arp.c $(LDLIBS) -lsocket -lnsl @# $(LINT.c) in.rexecd.c $(LDLIBS) -lbsm -lpam $(LINT.c) -erroff=E_NAME_USED_NOT_DEF2 -erroff=E_NAME_DEF_NOT_USED2 \ diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/Makefile b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/Makefile index e99f2945a7..64fe8e8ae7 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/Makefile +++ b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/Makefile @@ -37,7 +37,7 @@ COMMONSRCS= $(CMDINETCOMMONDIR)/$(COMMONOBJS:%.o=%.c) SRCS= $(LOCALSRCS) $(COMMONSRCS) CPPFLAGS += -I$(CMDINETCOMMONDIR) -I$(SRC)/common/net/dhcp -LDLIBS += -ldhcpagent -ldlpi -linetutil -linetcfg -lipmp -ldladm +LDLIBS += -ldhcpagent -ldlpi -linetutil -lipmp -ldladm LINTFLAGS += -m ROOTUSRSBINLINKS = $(PROG:%=$(ROOTUSRSBIN)/%) diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/defs.h b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/defs.h index 4aa1aa0ed7..1b3361feed 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/defs.h +++ b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/defs.h @@ -54,7 +54,6 @@ extern "C" { #include <ipmp_mpathd.h> #include <ipmp_admin.h> -#include <inetcfg.h> #include <libinetutil.h> #include <alloca.h> diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c index 2ac64b312e..506b15a307 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c +++ b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c @@ -14,15 +14,14 @@ #include <compat.h> #include <libdlpi.h> #include <libdllink.h> +#include <libdliptun.h> +#include <libdllink.h> #include <inet/ip.h> #include <inet/ipsec_impl.h> #define LOOPBACK_IF "lo0" #define NONE_STR "none" #define ARP_MOD_NAME "arp" -#define TUN_NAME "tun" -#define ATUN_NAME "atun" -#define TUN6TO4_NAME "6to4tun" #define IPMPSTUB (void *)-1 typedef struct if_flags { @@ -84,12 +83,16 @@ static const if_appflags_t if_appflags_tbl[] = { { NULL, 0, 0 } }; -static struct lifreq lifr; +static dladm_handle_t dlh; +boolean_t dlh_opened; +static struct lifreq lifr; /* current interface name a particular function is accessing */ -static char name[LIFNAMSIZ]; +static char name[LIFNAMSIZ]; /* foreach interface saved name */ -static char origname[LIFNAMSIZ]; -static int setaddr; +static char origname[LIFNAMSIZ]; +static int setaddr; +static boolean_t ipsec_policy_set; +static boolean_t ipsec_auth_covered; /* * Make sure the algorithm variables hold more than the sizeof an algorithm @@ -176,10 +179,9 @@ static boolean_t in_getmask(struct sockaddr_in *saddr, static int in_getprefixlen(char *addr, boolean_t slash, int plen); static boolean_t in_prefixlentomask(int prefixlen, int maxlen, uchar_t *mask); -static int settaddr(char *, int (*)(icfg_handle_t, - const struct sockaddr *, socklen_t)); static void status(void); static void ifstatus(const char *); +static void tun_status(datalink_id_t); static void usage(void); static int strioctl(int s, int cmd, void *buf, int buflen); static int setifdhcp(const char *caller, const char *ifname, @@ -187,9 +189,7 @@ static int setifdhcp(const char *caller, const char *ifname, static int ip_domux2fd(int *, int *, int *, int *, int *); static int ip_plink(int, int, int, int, int); static int modop(char *arg, char op); -static int find_all_global_interfaces(struct lifconf *lifcp, char **buf, - int64_t lifc_flags); -static int find_all_zone_interfaces(struct lifconf *lifcp, char **buf, +static int find_all_interfaces(struct lifconf *lifcp, char **buf, int64_t lifc_flags); static int create_ipmp(const char *grname, int af, const char *ifname, boolean_t implicit); @@ -197,6 +197,9 @@ static int create_ipmp_peer(int af, const char *ifname); static void start_ipmp_daemon(void); static boolean_t ifaddr_up(ifaddrlistx_t *ifaddrp); static boolean_t ifaddr_down(ifaddrlistx_t *ifaddrp); +static dladm_status_t ifconfig_dladm_open(const char *, datalink_class_t, + datalink_id_t *); +static void dladmerr_exit(dladm_status_t status, const char *str); #define max(a, b) ((a) < (b) ? (b) : (a)) @@ -359,8 +362,8 @@ struct afswtch *afp; /* the address family being set or asked about */ int main(int argc, char *argv[]) { - int64_t lifc_flags; - char *default_ip_str; + int64_t lifc_flags; + char *default_ip_str; lifc_flags = LIFC_NOXMIT|LIFC_TEMPORARY|LIFC_ALLZONES|LIFC_UNDER_IPMP; @@ -510,23 +513,12 @@ foreachinterface(void (*func)(), int argc, char *argv[], int af, buf = NULL; /* * Special case: - * ifconfig -a plumb should find all network interfaces - * in the machine for the global zone. - * For non-global zones, only find the assigned interfaces. - * Also, there is no need to SIOCGLIF* ioctls, since - * those interfaces have already been plumbed + * ifconfig -a plumb should find all network interfaces in the current + * zone. */ if (argc > 0 && (strcmp(*argv, "plumb") == 0)) { - if (getzoneid() == GLOBAL_ZONEID) { - if (find_all_global_interfaces(&lifc, &buf, - lifc_flags) != 0) - return; - } else { - if (find_all_zone_interfaces(&lifc, &buf, - lifc_flags) != 0) - return; - } - if (lifc.lifc_len == 0) + if (find_all_interfaces(&lifc, &buf, lifc_flags) != 0 || + lifc.lifc_len == 0) return; plumball = 1; } else { @@ -659,38 +651,6 @@ foreachinterface(void (*func)(), int argc, char *argv[], int af, free(buf); } -static void -tun_reality_check(void) -{ - struct iftun_req treq; - ipsec_req_t *ipsr; - - (void) strncpy(treq.ifta_lifr_name, name, sizeof (treq.ifta_lifr_name)); - if (strchr(name, ':') != NULL) { - /* Return, we don't need to check. */ - return; - } - if (ioctl(s, SIOCGTUNPARAM, (caddr_t)&treq) < 0 || - !(treq.ifta_flags & IFTUN_SECURITY) || - (treq.ifta_flags & IFTUN_COMPLEX_SECURITY)) { - /* - * Either not a tunnel (the SIOCGTUNPARAM fails on - * non-tunnels), the security flag is not set, or - * this is a tunnel with ipsecconf(1M)-set policy. - * Regardless, return. - */ - return; - } - - ipsr = (ipsec_req_t *)&treq.ifta_secinfo; - - if (ipsr->ipsr_esp_req != 0 && - ipsr->ipsr_esp_auth_alg == SADB_AALG_NONE && - ipsr->ipsr_ah_req == 0) - (void) fprintf(stderr, "ifconfig: WARNING - tunnel with " - "only ESP and no authentication.\n"); -} - /* * for the specified interface call (*func)(argc, argv, af, lifrp). */ @@ -869,7 +829,10 @@ ifconfig(int argc, char *argv[], int af, struct lifreq *lifrp) } /* Check to see if there's a security hole in the tunnel setup. */ - tun_reality_check(); + if (ipsec_policy_set && !ipsec_auth_covered) { + (void) fprintf(stderr, "ifconfig: WARNING: tunnel with only " + "ESP and no authentication.\n"); + } } /* ARGSUSED */ @@ -1061,7 +1024,7 @@ parsenum(char *num) * this isn't common to ipseckey.c. * * NOTE: Static buffer in this function for the return value. Since ifconfig - * isn't multithreaded, this isn't a huge problem. + * isn't multithreaded, this isn't a huge problem. */ #define NBUF_SIZE 20 /* Enough to print a large integer. */ @@ -1146,52 +1109,34 @@ parsealg(char *algname, int proto_num) enum ipsec_alg_type { ESP_ENCR_ALG = 1, ESP_AUTH_ALG, AH_AUTH_ALG }; -boolean_t first_set_tun = _B_TRUE; -boolean_t encr_alg_set = _B_FALSE; - -/* - * Need global for multiple calls to set_tun_algs - * because we accumulate algorithm selections over - * the lifetime of this ifconfig(1M) invocation. - */ -static struct iftun_req treq_tun; - static int set_tun_algs(int which_alg, int alg) { - ipsec_req_t *ipsr; + boolean_t encr_alg_set = _B_FALSE; + iptun_params_t params; + dladm_status_t status; + ipsec_req_t *ipsr; - (void) strncpy(treq_tun.ifta_lifr_name, name, - sizeof (treq_tun.ifta_lifr_name)); - if (strchr(name, ':') != NULL) { - errno = EPERM; - Perror0_exit("Tunnel params on logical interfaces"); - } - if (ioctl(s, SIOCGTUNPARAM, (caddr_t)&treq_tun) < 0) { - if (errno == EOPNOTSUPP || errno == EINVAL) - Perror0_exit("Not a tunnel"); - else Perror0_exit("SIOCGTUNPARAM"); - } + if ((status = ifconfig_dladm_open(name, DATALINK_CLASS_IPTUN, + ¶ms.iptun_param_linkid)) != DLADM_STATUS_OK) + goto done; - ipsr = (ipsec_req_t *)&treq_tun.ifta_secinfo; + status = dladm_iptun_getparams(dlh, ¶ms, DLADM_OPT_ACTIVE); + if (status != DLADM_STATUS_OK) + goto done; - if (treq_tun.ifta_vers != IFTUN_VERSION) { - (void) fprintf(stderr, - "Kernel tunnel secinfo version mismatch.\n"); - exit(1); - } + ipsr = ¶ms.iptun_param_secinfo; /* * If I'm just starting off this ifconfig, I want a clean slate, * otherwise, I've captured the current tunnel security settings. * In the case of continuation, I merely add to the settings. */ - if (first_set_tun) { - first_set_tun = _B_FALSE; + if (!(params.iptun_param_flags & IPTUN_PARAM_SECINFO)) (void) memset(ipsr, 0, sizeof (*ipsr)); - } - treq_tun.ifta_flags = IFTUN_SECURITY; + /* We're only modifying the IPsec information */ + params.iptun_param_flags = IPTUN_PARAM_SECINFO; switch (which_alg) { case ESP_ENCR_ALG: @@ -1243,11 +1188,19 @@ set_tun_algs(int which_alg, int alg) /* Will never hit DEFAULT */ } - if (ioctl(s, SIOCSTUNPARAM, (caddr_t)&treq_tun) < 0) { - Perror2_exit("set tunnel security properties", - treq_tun.ifta_lifr_name); - } + status = dladm_iptun_modify(dlh, ¶ms, DLADM_OPT_ACTIVE); +done: + if (status != DLADM_STATUS_OK) + dladmerr_exit(status, name); + else { + ipsec_policy_set = _B_TRUE; + if ((ipsr->ipsr_esp_req != 0 && + ipsr->ipsr_esp_auth_alg != SADB_AALG_NONE) || + (ipsr->ipsr_ah_req != 0 && + ipsr->ipsr_auth_alg != SADB_AALG_NONE)) + ipsec_auth_covered = _B_TRUE; + } return (0); } @@ -1678,10 +1631,7 @@ setifether(char *addr, int64_t param) static void print_ifether(char *ifname) { - int protocol; - icfg_if_t interface; - icfg_handle_t handle; - int fd; + int fd; (void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name)); @@ -1704,36 +1654,19 @@ print_ifether(char *ifname) if (lifr.lifr_flags & (IFF_VIRTUAL|IFF_IPMP)) return; - /* - * We must be careful to set if_protocol based on the current - * properties of the interface. For instance, if "ip.tun0" is - * configured only as an IPv6 tunnel, then if_protocol must be - * set to AF_INET6 or icfg_get_tunnel_lower() will fail and - * we will falsely conclude that it's not a tunnel. - */ - interface.if_protocol = AF_INET; - if (lifr.lifr_flags & IFF_IPV6) - interface.if_protocol = AF_INET6; - - (void) strncpy(interface.if_name, ifname, sizeof (interface.if_name)); - - if (icfg_open(&handle, &interface) == ICFG_SUCCESS) { - if (icfg_get_tunnel_lower(handle, &protocol) == ICFG_SUCCESS) { - /* Tunnel op succeeded -- it's a tunnel so skip */ - icfg_close(handle); - return; - } - icfg_close(handle); - } + /* IP tunnels also don't have Ethernet-like MAC addresses */ + if (ifconfig_dladm_open(ifname, DATALINK_CLASS_IPTUN, NULL) == + DLADM_STATUS_OK) + return; dlpi_print_address(ifname); } /* - * static int find_all_global_interfaces(struct lifconf *lifcp, char **buf, + * static int find_all_interfaces(struct lifconf *lifcp, char **buf, * int64_t lifc_flags) * - * It finds all data links for the global zone. + * It finds all active data links. * * It takes in input a pointer to struct lifconf to receive interfaces * informations, a **char to hold allocated buffer, and a lifc_flags. @@ -1743,32 +1676,23 @@ print_ifether(char *ifname) * -1 = problem */ static int -find_all_global_interfaces(struct lifconf *lifcp, char **buf, - int64_t lifc_flags) +find_all_interfaces(struct lifconf *lifcp, char **buf, int64_t lifc_flags) { unsigned bufsize; int n; ni_t *nip; struct lifreq *lifrp; - dladm_handle_t dld_handle; dladm_status_t status; - char errmsg[DLADM_STRSIZE]; - if ((status = dladm_open(&dld_handle)) != DLADM_STATUS_OK) { - (void) fprintf(stderr, - "ifconfig: find_all_global_interfaces failed: %s\n", - dladm_status2str(status, errmsg)); - return (-1); + if (!dlh_opened) { + status = ifconfig_dladm_open(NULL, 0, NULL); + if (status != DLADM_STATUS_OK) + dladmerr_exit(status, "unable to open dladm handle"); } - (void) dlpi_walk(ni_entry, dld_handle, 0); - - dladm_close(dld_handle); + (void) dlpi_walk(ni_entry, dlh, 0); - /* - * Now, translate the linked list into - * a struct lifreq buffer - */ + /* Now, translate the linked list into a struct lifreq buffer */ if (num_ni == 0) { lifcp->lifc_family = AF_UNSPEC; lifcp->lifc_flags = lifc_flags; @@ -1797,92 +1721,6 @@ find_all_global_interfaces(struct lifconf *lifcp, char **buf, } /* - * static int find_all_zone_interfaces(struct lifconf *lifcp, char **buf, - * int64_t lifc_flags) - * - * It finds all interfaces for an exclusive-IP zone, that is all the interfaces - * assigned to it. - * - * It takes in input a pointer to struct lifconf to receive interfaces - * informations, a **char to hold allocated buffer, and a lifc_flags. - * - * Return values: - * 0 = everything OK - * -1 = problem - */ -static int -find_all_zone_interfaces(struct lifconf *lifcp, char **buf, int64_t lifc_flags) -{ - zoneid_t zoneid; - unsigned bufsize; - char *dlnames, *ptr; - struct lifreq *lifrp; - int num_ni_saved, i; - - zoneid = getzoneid(); - - num_ni = 0; - if (zone_list_datalink(zoneid, &num_ni, NULL) != 0) - Perror0_exit("find_all_interfaces: list interfaces failed"); -again: - /* this zone doesn't have any data-links */ - if (num_ni == 0) { - lifcp->lifc_family = AF_UNSPEC; - lifcp->lifc_flags = lifc_flags; - lifcp->lifc_len = 0; - lifcp->lifc_buf = NULL; - return (0); - } - - dlnames = malloc(num_ni * LIFNAMSIZ); - if (dlnames == NULL) - Perror0_exit("find_all_interfaces: out of memory"); - num_ni_saved = num_ni; - - if (zone_list_datalink(zoneid, &num_ni, dlnames) != 0) - Perror0_exit("find_all_interfaces: list interfaces failed"); - - if (num_ni_saved < num_ni) { - /* list increased, try again */ - free(dlnames); - goto again; - } - - /* this zone doesn't have any data-links now */ - if (num_ni == 0) { - free(dlnames); - lifcp->lifc_family = AF_UNSPEC; - lifcp->lifc_flags = lifc_flags; - lifcp->lifc_len = 0; - lifcp->lifc_buf = NULL; - return (0); - } - - bufsize = num_ni * sizeof (struct lifreq); - if ((*buf = malloc(bufsize)) == NULL) { - free(dlnames); - Perror0_exit("find_all_interfaces: malloc failed"); - } - - lifrp = (struct lifreq *)*buf; - ptr = dlnames; - for (i = 0; i < num_ni; i++) { - if (strlcpy(lifrp->lifr_name, ptr, LIFNAMSIZ) >= - LIFNAMSIZ) - Perror0_exit("find_all_interfaces: overflow"); - ptr += LIFNAMSIZ; - lifrp++; - } - - free(dlnames); - lifcp->lifc_family = AF_UNSPEC; - lifcp->lifc_flags = lifc_flags; - lifcp->lifc_len = bufsize; - lifcp->lifc_buf = *buf; - return (0); -} - -/* * Create the next unused logical interface using the original name * and assign the address (and mask if '/<n>' is part of the address). * Use the new logical interface for subsequent subcommands by updating @@ -2619,10 +2457,7 @@ modop(char *arg, char op) */ if (op == MODREMOVE_OP && (strcmp(mod.mod_name, ARP_MOD_NAME) == 0 || - strcmp(mod.mod_name, IP_MOD_NAME) == 0 || - strcmp(mod.mod_name, TUN_NAME) == 0 || - strcmp(mod.mod_name, ATUN_NAME) == 0 || - strcmp(mod.mod_name, TUN6TO4_NAME) == 0)) { + strcmp(mod.mod_name, IP_MOD_NAME) == 0)) { (void) fprintf(stderr, "ifconfig: cannot remove %s\n", mod.mod_name); exit(1); @@ -2668,6 +2503,19 @@ modop(char *arg, char op) orig_arpid)); } +static int +modify_tun(iptun_params_t *params) +{ + dladm_status_t status; + + if ((status = ifconfig_dladm_open(name, DATALINK_CLASS_IPTUN, + ¶ms->iptun_param_linkid)) == DLADM_STATUS_OK) + status = dladm_iptun_modify(dlh, params, DLADM_OPT_ACTIVE); + if (status != DLADM_STATUS_OK) + dladmerr_exit(status, name); + return (0); +} + /* * Set tunnel source address */ @@ -2675,7 +2523,12 @@ modop(char *arg, char op) static int setiftsrc(char *addr, int64_t param) { - return (settaddr(addr, icfg_set_tunnel_src)); + iptun_params_t params; + + params.iptun_param_flags = IPTUN_PARAM_LADDR; + (void) strlcpy(params.iptun_param_laddr, addr, + sizeof (params.iptun_param_laddr)); + return (modify_tun(¶ms)); } /* @@ -2685,56 +2538,27 @@ setiftsrc(char *addr, int64_t param) static int setiftdst(char *addr, int64_t param) { - return (settaddr(addr, icfg_set_tunnel_dest)); + iptun_params_t params; + + params.iptun_param_flags = IPTUN_PARAM_RADDR; + (void) strlcpy(params.iptun_param_raddr, addr, + sizeof (params.iptun_param_raddr)); + return (modify_tun(¶ms)); } -/* - * sets tunnels src|dst address. settaddr() expects the following: - * addr: Points to a printable string containing the address to be - * set, e.g. 129.153.128.110. - * fn: Pointer to a libinetcfg routine that will do the actual work. - * The only valid functions are icfg_set_tunnel_src and - * icfg_set_tunnel_dest. - */ static int -settaddr(char *addr, - int (*fn)(icfg_handle_t, const struct sockaddr *, socklen_t)) +set_tun_prop(const char *propname, char *value) { - icfg_handle_t handle; - icfg_if_t interface; - struct sockaddr_storage laddr; - int lower; - int rc; + dladm_status_t status; + datalink_id_t linkid; - if (strchr(name, ':') != NULL) { - errno = EPERM; - Perror0_exit("Tunnel params on logical interfaces"); + status = ifconfig_dladm_open(name, DATALINK_CLASS_IPTUN, &linkid); + if (status == DLADM_STATUS_OK) { + status = dladm_set_linkprop(dlh, linkid, propname, &value, 1, + DLADM_OPT_ACTIVE); } - (void) strncpy(interface.if_name, name, sizeof (interface.if_name)); - interface.if_protocol = SOCKET_AF(af); - - /* Open interface. */ - if ((rc = icfg_open(&handle, &interface)) != ICFG_SUCCESS) - Perror0_exit((char *)icfg_errmsg(rc)); - - rc = icfg_get_tunnel_lower(handle, &lower); - if (rc != ICFG_SUCCESS) - Perror0_exit((char *)icfg_errmsg(rc)); - - if (lower == AF_INET) { - in_getaddr(addr, (struct sockaddr *)&laddr, NULL); - } else { - in6_getaddr(addr, (struct sockaddr *)&laddr, NULL); - } - - /* Call fn to do the real work, and close the interface. */ - rc = (*fn)(handle, (struct sockaddr *)&laddr, - sizeof (struct sockaddr_storage)); - icfg_close(handle); - - if (rc != ICFG_SUCCESS) - Perror0_exit((char *)icfg_errmsg(rc)); - + if (status != DLADM_STATUS_OK) + dladmerr_exit(status, name); return (0); } @@ -2743,35 +2567,7 @@ settaddr(char *addr, static int set_tun_encap_limit(char *arg, int64_t param) { - short limit; - icfg_if_t interface; - icfg_handle_t handle; - int rc; - - if (strchr(name, ':') != NULL) { - errno = EPERM; - Perror0_exit("Tunnel params on logical interfaces"); - } - - if ((sscanf(arg, "%hd", &limit) != 1) || (limit < 0) || - (limit > 255)) { - errno = EINVAL; - Perror0_exit("Invalid encapsulation limit"); - } - - /* Open interface for configuration. */ - (void) strncpy(interface.if_name, name, sizeof (interface.if_name)); - interface.if_protocol = SOCKET_AF(af); - if (icfg_open(&handle, &interface) != ICFG_SUCCESS) - Perror0_exit("couldn't open interface"); - - rc = icfg_set_tunnel_encaplimit(handle, (int)limit); - icfg_close(handle); - - if (rc != ICFG_SUCCESS) - Perror0_exit("Could not configure tunnel encapsulation limit"); - - return (0); + return (set_tun_prop("encaplimit", arg)); } /* Disable encapsulation limit. */ @@ -2779,28 +2575,7 @@ set_tun_encap_limit(char *arg, int64_t param) static int clr_tun_encap_limit(char *arg, int64_t param) { - icfg_if_t interface; - icfg_handle_t handle; - int rc; - - if (strchr(name, ':') != NULL) { - errno = EPERM; - Perror0_exit("Tunnel params on logical interfaces"); - } - - /* Open interface for configuration. */ - (void) strncpy(interface.if_name, name, sizeof (interface.if_name)); - interface.if_protocol = SOCKET_AF(af); - if (icfg_open(&handle, &interface) != ICFG_SUCCESS) - Perror0_exit("couldn't open interface"); - - rc = icfg_set_tunnel_encaplimit(handle, -1); - icfg_close(handle); - - if (rc != ICFG_SUCCESS) - Perror0_exit((char *)icfg_errmsg(rc)); - - return (0); + return (set_tun_encap_limit("-1", 0)); } /* Set tunnel hop limit. */ @@ -2808,37 +2583,7 @@ clr_tun_encap_limit(char *arg, int64_t param) static int set_tun_hop_limit(char *arg, int64_t param) { - unsigned short limit; - icfg_if_t interface; - icfg_handle_t handle; - int rc; - - if (strchr(name, ':') != NULL) { - errno = EPERM; - Perror0_exit("Tunnel params on logical interfaces"); - } - - /* - * Check limit here since it's really only an 8-bit unsigned quantity. - */ - if ((sscanf(arg, "%hu", &limit) != 1) || (limit > 255)) { - errno = EINVAL; - Perror0_exit("Invalid hop limit"); - } - - /* Open interface for configuration. */ - (void) strncpy(interface.if_name, name, sizeof (interface.if_name)); - interface.if_protocol = SOCKET_AF(af); - if (icfg_open(&handle, &interface) != ICFG_SUCCESS) - Perror0_exit("couldn't open interface"); - - rc = icfg_set_tunnel_hoplimit(handle, (uint8_t)limit); - icfg_close(handle); - - if (rc != ICFG_SUCCESS) - Perror0_exit("Could not configure tunnel hop limit"); - - return (0); + return (set_tun_prop("hoplimit", arg)); } /* Set zone ID */ @@ -3066,8 +2811,9 @@ ifstatus(const char *ifname) static void status(void) { - struct afswtch *p = afp; - uint64_t flags; + struct afswtch *p = afp; + uint64_t flags; + datalink_id_t linkid; (void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name)); if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) { @@ -3094,6 +2840,10 @@ status(void) ifstatus(name); + if (ifconfig_dladm_open(name, DATALINK_CLASS_IPTUN, &linkid) == + DLADM_STATUS_OK) + tun_status(linkid); + if (p != NULL) { (*p->af_status)(1, flags); } else { @@ -3219,28 +2969,20 @@ configinfo(char *null, int64_t param) } } - (void) printf("\n"); + (void) putchar('\n'); return (0); } static void -print_tsec(struct iftun_req *tparams) +print_tsec(iptun_params_t *params) { ipsec_req_t *ipsr; (void) printf("\ttunnel security settings "); - /* - * Deal with versioning, for now just point - * an ipsec_req_t at ifta_secinfo. If versions - * change, something else will overlay ifta_secinfo. - */ - assert(tparams->ifta_vers == IFTUN_VERSION); - - if (tparams->ifta_flags & IFTUN_COMPLEX_SECURITY) { - (void) printf("--> use 'ipsecconf -ln -i %s'", - tparams->ifta_lifr_name); + if (!(params->iptun_param_flags & IPTUN_PARAM_SECINFO)) { + (void) printf("--> use 'ipsecconf -ln -i %s'", name); } else { - ipsr = (ipsec_req_t *)(&tparams->ifta_secinfo); + ipsr = ¶ms->iptun_param_secinfo; if (ipsr->ipsr_ah_req & IPSEC_PREF_REQUIRED) { (void) printf("ah (%s) ", rparsealg(ipsr->ipsr_auth_alg, IPSEC_PROTO_AH)); @@ -3256,117 +2998,81 @@ print_tsec(struct iftun_req *tparams) } static void -tun_status(void) +tun_status(datalink_id_t linkid) { - icfg_if_t interface; - int rc; - icfg_handle_t handle; - int protocol; - char srcbuf[INET6_ADDRSTRLEN]; - char dstbuf[INET6_ADDRSTRLEN]; - boolean_t tabbed; - uint8_t hoplimit; - int16_t encaplimit; - struct sockaddr_storage taddr; - socklen_t socklen = sizeof (taddr); - - (void) strncpy(interface.if_name, name, sizeof (interface.if_name)); - interface.if_protocol = SOCKET_AF(af); - if ((rc = icfg_open(&handle, &interface)) != ICFG_SUCCESS) - Perror0_exit((char *)icfg_errmsg(rc)); - - /* - * only print tunnel info for lun 0. If ioctl fails, assume - * we are not a tunnel - */ - if (strchr(name, ':') != NULL || - icfg_get_tunnel_lower(handle, &protocol) != ICFG_SUCCESS) { - icfg_close(handle); + iptun_params_t params; + char propval[DLADM_PROP_VAL_MAX]; + char *valptr[1]; + uint_t valcnt = 1; + boolean_t tabbed = _B_FALSE; + + params.iptun_param_linkid = linkid; + + /* If dladm_iptun_getparams() fails, assume we are not a tunnel. */ + assert(dlh_opened); + if (dladm_iptun_getparams(dlh, ¶ms, DLADM_OPT_ACTIVE) != + DLADM_STATUS_OK) return; - } - switch (protocol) { - case AF_INET: + switch (params.iptun_param_type) { + case IPTUN_TYPE_IPV4: + case IPTUN_TYPE_6TO4: (void) printf("\tinet"); break; - case AF_INET6: + case IPTUN_TYPE_IPV6: (void) printf("\tinet6"); break; default: - Perror0_exit("\ttunnel: Illegal lower stream\n\t"); + dladmerr_exit(DLADM_STATUS_IPTUNTYPE, name); break; } - rc = icfg_get_tunnel_src(handle, (struct sockaddr *)&taddr, &socklen); - if (rc == ICFG_NOT_SET) { - (void) strlcpy(srcbuf, (protocol == AF_INET) ? "0.0.0.0" : - "::", sizeof (srcbuf)); - } else if (rc != ICFG_SUCCESS) { - Perror0_exit((char *)icfg_errmsg(rc)); - } else { - rc = icfg_sockaddr_to_str(protocol, (struct sockaddr *)&taddr, - srcbuf, sizeof (srcbuf)); - if (rc != ICFG_SUCCESS) { - Perror0_exit((char *)icfg_errmsg(rc)); - } - } - - (void) printf(" tunnel src %s ", srcbuf); - - rc = icfg_get_tunnel_dest(handle, (struct sockaddr *)&taddr, &socklen); - if (rc == ICFG_NOT_SET) { - (void) printf("\n"); - } else { - rc = icfg_sockaddr_to_str(protocol, (struct sockaddr *)&taddr, - dstbuf, sizeof (dstbuf)); - if (rc != ICFG_SUCCESS) { - Perror0_exit((char *)icfg_errmsg(rc)); - } - (void) printf("tunnel dst %s\n", dstbuf); - } - - if (handle->ifh_tunnel_params != NULL && - (handle->ifh_tunnel_params->ifta_flags & IFTUN_SECURITY)) - print_tsec(handle->ifh_tunnel_params); - /* - * tabbed indicates tabbed and printed. Use it tell us whether - * to tab and that we've printed something here, so we need a - * newline + * There is always a source address. If it hasn't been explicitly + * set, the API will pass back a buffer containing the unspecified + * address. */ - tabbed = _B_FALSE; + (void) printf(" tunnel src %s ", params.iptun_param_laddr); - if (icfg_get_tunnel_hoplimit(handle, &hoplimit) == ICFG_SUCCESS) { - (void) printf("\ttunnel hop limit %d ", hoplimit); + if (params.iptun_param_flags & IPTUN_PARAM_RADDR) + (void) printf("tunnel dst %s\n", params.iptun_param_raddr); + else + (void) putchar('\n'); + + if (params.iptun_param_flags & IPTUN_PARAM_IPSECPOL) + print_tsec(¶ms); + + valptr[0] = propval; + if (dladm_get_linkprop(dlh, linkid, DLADM_PROP_VAL_CURRENT, "hoplimit", + (char **)valptr, &valcnt) == DLADM_STATUS_OK) { + (void) printf("\ttunnel hop limit %s ", propval); tabbed = _B_TRUE; } - if ((protocol == AF_INET6) && - (icfg_get_tunnel_encaplimit(handle, &encaplimit) == - ICFG_SUCCESS)) { + if (dladm_get_linkprop(dlh, linkid, DLADM_PROP_VAL_CURRENT, + "encaplimit", (char **)valptr, &valcnt) == DLADM_STATUS_OK) { + uint32_t elim; + if (!tabbed) { - (void) printf("\t"); + (void) putchar('\t'); tabbed = _B_TRUE; } - if (encaplimit >= 0) { - (void) printf("tunnel encapsulation limit %d", - encaplimit); - } else { + elim = strtol(propval, NULL, 10); + if (elim > 0) + (void) printf("tunnel encapsulation limit %s", propval); + else (void) printf("tunnel encapsulation limit disabled"); - } } if (tabbed) - (void) printf("\n"); - - icfg_close(handle); + (void) putchar('\n'); } static void in_status(int force, uint64_t flags) { - struct sockaddr_in *sin, *laddr; - struct sockaddr_in netmask = { AF_INET }; + struct sockaddr_in *sin, *laddr; + struct sockaddr_in netmask = { AF_INET }; if (debug) (void) printf("in_status(%s) flags 0x%llx\n", name, flags); @@ -3375,9 +3081,6 @@ in_status(int force, uint64_t flags) if (!(flags & IFF_IPV4)) return; - /* if the interface is a tunnel, print the tunnel status */ - tun_status(); - if (!(flags & IFF_NOLOCAL)) { (void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name)); if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) { @@ -3468,8 +3171,8 @@ in_status(int force, uint64_t flags) static void in6_status(int force, uint64_t flags) { - char abuf[INET6_ADDRSTRLEN]; - struct sockaddr_in6 *sin6, *laddr6; + char abuf[INET6_ADDRSTRLEN]; + struct sockaddr_in6 *sin6, *laddr6; if (debug) (void) printf("in6_status(%s) flags 0x%llx\n", name, flags); @@ -3477,9 +3180,6 @@ in6_status(int force, uint64_t flags) if (!(flags & IFF_IPV6)) return; - /* if the interface is a tunnel, print the tunnel status */ - tun_status(); - if (!(flags & IFF_NOLOCAL)) { (void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name)); if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) { @@ -4014,9 +3714,6 @@ ifplumb(const char *linkname, const char *ifname, boolean_t genppa, int af) /* * This interface does use ARP, so set up a separate stream * from the interface to ARP. - * - * Note: modules specified by the user are pushed - * only on the interface stream, not on the ARP stream. */ if (debug) (void) printf("ifconfig: ifplumb: interface %s", ifname); @@ -4251,6 +3948,7 @@ inetplumb(char *arg, int64_t param) char *strptr; boolean_t islo; zoneid_t zoneid; + datalink_id_t linkid; strptr = strchr(name, ':'); islo = (strcmp(name, LOOPBACK_IF) == 0); @@ -4280,16 +3978,19 @@ inetplumb(char *arg, int64_t param) } /* - * For global zone, check if the interface is used by a non-global - * zone, note that the non-global zones doesn't need this check, - * because zoneadm has taken care of this when the zone boots. + * If we're in the global zone and we're plumbing a datalink, make + * sure that the datalink is not assigned to a non-global zone. Note + * that the non-global zones don't need this check, because zoneadm + * has taken care of this when the zones boot. */ zoneid = getzoneid(); - if (zoneid == GLOBAL_ZONEID) { + if (zoneid == GLOBAL_ZONEID && + ifconfig_dladm_open(name, DATALINK_CLASS_ALL, &linkid) == + DLADM_STATUS_OK) { int ret; zoneid = ALL_ZONES; - ret = zone_check_datalink(&zoneid, name); + ret = zone_check_datalink(&zoneid, linkid); if (ret == 0) { char zonename[ZONENAME_MAX]; @@ -4531,6 +4232,44 @@ ifaddr_down(ifaddrlistx_t *ifaddrp) return (ifaddr_op(ifaddrp, _B_FALSE)); } +/* + * Open the global libdladm handle "dlh" if it isn't already opened. The + * caller may optionally supply a link name to obtain its linkid. If a link + * of a specific class or classes is required, reqclass specifies the class + * mask. + */ +static dladm_status_t +ifconfig_dladm_open(const char *name, datalink_class_t reqclass, + datalink_id_t *linkid) +{ + dladm_status_t status = DLADM_STATUS_OK; + datalink_class_t class; + + if (!dlh_opened) { + if ((status = dladm_open(&dlh)) != DLADM_STATUS_OK) + return (status); + dlh_opened = _B_TRUE; + } + if (name != NULL) { + status = dladm_name2info(dlh, name, linkid, NULL, &class, NULL); + if (status == DLADM_STATUS_OK) { + if (!(class & reqclass)) + status = DLADM_STATUS_LINKINVAL; + } + } + return (status); +} + +void +dladmerr_exit(dladm_status_t status, const char *str) +{ + char errstr[DLADM_STRSIZE]; + + (void) fprintf(stderr, "%s: %s\n", str, + dladm_status2str(status, errstr)); + exit(1); +} + void Perror0(const char *cmd) { @@ -4542,7 +4281,6 @@ Perror0_exit(const char *cmd) { Perror0(cmd); exit(1); - /* NOTREACHED */ } void diff --git a/usr/src/cmd/cmd-inet/usr.sbin/in.rarpd.c b/usr/src/cmd/cmd-inet/usr.sbin/in.rarpd.c index 6a391d4b8c..81039c3e86 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/in.rarpd.c +++ b/usr/src/cmd/cmd-inet/usr.sbin/in.rarpd.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -30,8 +30,6 @@ * under license from the Regents of the University of California. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * rarpd.c Reverse-ARP server. * Refer to RFC 903 "A Reverse Address Resolution Protocol". @@ -240,9 +238,8 @@ main(int argc, char *argv[]) error("out of memory"); } - if (!ifparse_ifspec(buf, &ifsp) || ifsp.ifsp_modcnt != 0) { + if (!ifparse_ifspec(buf, &ifsp)) error("invalid interface specification"); - } if (ifsp.ifsp_lunvalid) { (void) snprintf(ifdev->ldevice, diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.c index 2daf77bbdf..097dd6ee90 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.c +++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.c @@ -89,7 +89,6 @@ struct Pf_ext_packetfilt pf; static int vlanid = 0; static void usage(void); -void show_count(); static void snoop_sigrecover(int sig, siginfo_t *info, void *p); static char *protmalloc(size_t); static void resetperm(void); diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h index 9eb27bafaa..263e45c361 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h +++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h @@ -125,11 +125,13 @@ extern char *prot_nest_prefix; extern char *get_sum_line(void); extern char *get_detail_line(int, int); +extern int want_packet(uchar_t *, int, int); extern void set_vlan_id(int); extern struct timeval prev_time; extern void process_pkt(struct sb_hdr *, char *, int, int); extern char *getflag(int, int, char *, char *); extern void show_header(char *, char *, int); +extern void show_count(void); extern void xdr_init(char *, int); extern char *get_line(int, int); extern int get_line_remain(void); @@ -263,6 +265,7 @@ extern char *tohex(char *p, int len); extern char *printether(struct ether_addr *); extern char *print_ethertype(int); extern const char *arp_htype(int); +extern int valid_rpc(char *, int); /* * Describes characteristics of the Media Access Layer. @@ -283,6 +286,8 @@ extern const char *arp_htype(int); * and only use a user space filter if the filter expression * cannot be expressed in kernel space. */ +typedef uint_t (interpreter_fn_t)(int, char *, int, int); +typedef uint_t (headerlen_fn_t)(char *, size_t); typedef struct interface { uint_t mac_type; uint_t mtu_size; @@ -290,8 +295,8 @@ typedef struct interface { size_t network_type_len; uint_t network_type_ip; uint_t network_type_ipv6; - uint_t (*header_len)(char *); - uint_t (*interpreter)(int, char *, int, int); + headerlen_fn_t *header_len; + interpreter_fn_t *interpreter; boolean_t try_kernel_filter; } interface_t; diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c index 20d0f1c63b..d6ad9b2e03 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c +++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c @@ -448,7 +448,7 @@ scan(char *buf, int len, int filter, int cap, int old, void (*proc)(), header_okay = 1; if (!filter || - want_packet(pktp, + want_packet((uchar_t *)pktp, nhdrp->sbh_msglen, nhdrp->sbh_origlen)) { count++; diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c index d2e9b8fe42..2fcb69bf8a 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c +++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c @@ -42,6 +42,7 @@ #include <sys/ethernet.h> #include <sys/vlan.h> #include <sys/zone.h> +#include <inet/iptun.h> #include <sys/byteorder.h> #include <limits.h> #include <inet/ip.h> @@ -51,12 +52,12 @@ #include "at.h" #include "snoop.h" -static uint_t ether_header_len(char *), fddi_header_len(char *), - tr_header_len(char *), ib_header_len(char *), ipnet_header_len(char *); -static uint_t interpret_ether(), interpret_fddi(), interpret_tr(); -static uint_t interpret_ib(int, char *, int, int), - interpret_ipnet(int, char *, int, int); +static headerlen_fn_t ether_header_len, fddi_header_len, tr_header_len, + ib_header_len, ipnet_header_len, ipv4_header_len, ipv6_header_len; +static interpreter_fn_t interpret_ether, interpret_fddi, interpret_tr, + interpret_ib, interpret_ipnet, interpret_iptun; static void addr_copy_swap(struct ether_addr *, struct ether_addr *); +static int tr_machdr_len(char *, int *, int *); interface_t *interface; interface_t INTERFACES[] = { @@ -85,6 +86,18 @@ interface_t INTERFACES[] = { { DL_IPNET, INT_MAX, 1, 1, IPV4_VERSION, IPV6_VERSION, ipnet_header_len, interpret_ipnet, B_TRUE }, + /* IPv4 tunnel */ + { DL_IPV4, 0, 9, 1, IPPROTO_ENCAP, IPPROTO_IPV6, + ipv4_header_len, interpret_iptun, B_FALSE }, + + /* IPv6 tunnel */ + { DL_IPV6, 0, 40, 1, IPPROTO_ENCAP, IPPROTO_IPV6, + ipv6_header_len, interpret_iptun, B_FALSE }, + + /* 6to4 tunnel */ + { DL_6TO4, 0, 9, 1, IPPROTO_ENCAP, IPPROTO_IPV6, + ipv4_header_len, interpret_iptun, B_FALSE }, + { (uint_t)-1, 0, 0, 0, 0, NULL, NULL, B_FALSE } }; @@ -110,11 +123,9 @@ static int datalen; /* current data buffer length */ static const struct ether_addr all_isis_rbridges = ALL_ISIS_RBRIDGES; uint_t -interpret_ether(flags, e, elen, origlen) - int flags; - struct ether_header *e; - int elen, origlen; +interpret_ether(int flags, char *header, int elen, int origlen) { + struct ether_header *e = (struct ether_header *)header; uchar_t *off, *ieeestart; int len; int ieee8023 = 0; @@ -138,10 +149,11 @@ interpret_ether(flags, e, elen, origlen) } inner_pkt: if (origlen < 14) { - if (flags & F_SUM) + if (flags & F_SUM) { (void) sprintf(get_sum_line(), - "RUNT (short packet - %d bytes)", - origlen); + "RUNT (short packet - %d bytes)", + origlen); + } if (flags & F_DTAIL) show_header("RUNT: ", "Short packet", origlen); return (elen); @@ -373,10 +385,13 @@ inner_pkt: * a VLAN header otherwise. */ uint_t -ether_header_len(e) -char *e; +ether_header_len(char *e, size_t msgsize) { uint16_t ether_type = 0; + + if (msgsize < sizeof (struct ether_header)) + return (0); + e += (offsetof(struct ether_header, ether_type)); GETINT16(ether_type, e); @@ -623,8 +638,7 @@ ETHERTYPE_PUP, "Xerox PUP", }; char * -print_fc(type) -uint_t type; +print_fc(uint_t type) { switch (type) { @@ -636,8 +650,7 @@ uint_t type; } char * -print_smtclass(type) -uint_t type; +print_smtclass(uint_t type) { switch (type) { case 0x01: return ("NIF"); @@ -657,8 +670,7 @@ uint_t type; } char * -print_smttype(type) -uint_t type; +print_smttype(uint_t type) { switch (type) { case 0x01: return ("Announce"); @@ -669,8 +681,7 @@ uint_t type; } char * -print_ethertype(type) - int type; +print_ethertype(int type) { int i; @@ -793,10 +804,7 @@ print_sr(struct tr_ri *rh) } uint_t -interpret_tr(flags, e, elen, origlen) - int flags; - caddr_t e; - int elen, origlen; +interpret_tr(int flags, caddr_t e, int elen, int origlen) { struct tr_header *mh; struct tr_ri *rh; @@ -808,7 +816,6 @@ interpret_tr(flags, e, elen, origlen) extern char *dst_name, *src_name; int ethertype; int is_llc = 0, is_snap = 0, source_routing = 0; - int tr_machdr_len(char *, int *, int *); int blen = MAX(origlen, 17800); if (data != NULL && datalen != 0 && datalen < blen) { @@ -824,10 +831,11 @@ interpret_tr(flags, e, elen, origlen) } if (origlen < ACFCDASA_LEN) { - if (flags & F_SUM) + if (flags & F_SUM) { (void) sprintf(get_sum_line(), - "RUNT (short packet - %d bytes)", - origlen); + "RUNT (short packet - %d bytes)", + origlen); + } if (flags & F_DTAIL) show_header("RUNT: ", "Short packet", origlen); return (elen); @@ -842,8 +850,8 @@ interpret_tr(flags, e, elen, origlen) if (is_llc = tr_machdr_len(e, &maclen, &source_routing)) { snaphdr = (struct llc_snap_hdr *)(e + maclen); if (snaphdr->d_lsap == LSAP_SNAP && - snaphdr->s_lsap == LSAP_SNAP && - snaphdr->control == CNTL_LLC_UI) { + snaphdr->s_lsap == LSAP_SNAP && + snaphdr->control == CNTL_LLC_UI) { is_snap = 1; } } @@ -852,7 +860,7 @@ interpret_tr(flags, e, elen, origlen) sizeof (struct ether_addr)) == 0) dst_name = "(broadcast)"; else if (memcmp(&mh->dhost, &tokenbroadcastaddr2, - sizeof (struct ether_addr)) == 0) + sizeof (struct ether_addr)) == 0) dst_name = "(mac broadcast)"; else if (mh->dhost.ether_addr_octet[0] & TR_FN_ADDR) dst_name = "(functional)"; @@ -894,72 +902,74 @@ interpret_tr(flags, e, elen, origlen) if (is_llc) { if (is_snap) { - (void) sprintf(get_sum_line(), - "TR LLC w/SNAP Type=%04X (%s), size=%d bytes", - ethertype, - print_ethertype(ethertype), - origlen); + (void) sprintf(get_sum_line(), "TR LLC w/SNAP " + "Type=%04X (%s), size=%d bytes", + ethertype, + print_ethertype(ethertype), + origlen); } else { - (void) sprintf(get_sum_line(), - "TR LLC, but no SNAP encoding, size = %d bytes", - origlen); + (void) sprintf(get_sum_line(), "TR LLC, but no " + "SNAP encoding, size = %d bytes", + origlen); } } else { (void) sprintf(get_sum_line(), - "TR MAC FC=%02X (%s), size = %d bytes", - fc, print_fc(fc), origlen); + "TR MAC FC=%02X (%s), size = %d bytes", + fc, print_fc(fc), origlen); } } if (flags & F_DTAIL) { - show_header("TR: ", "TR Header", elen); - show_space(); - (void) sprintf(get_line(0, 0), - "Packet %d arrived at %d:%02d:%d.%05d", - pi_frame, - pi_time_hour, pi_time_min, pi_time_sec, - pi_time_usec / 10); - (void) sprintf(get_line(0, 0), - "Packet size = %d bytes", - elen); - (void) sprintf(get_line(0, 1), - "Frame Control = %02x (%s)", - fc, print_fc(fc)); - (void) sprintf(get_line(2, 6), - "Destination = %s, %s", - printether(&mh->dhost), - print_etherinfo(&mh->dhost)); - (void) sprintf(get_line(8, 6), - "Source = %s, %s", - printether(&mh->shost), - print_etherinfo(&mh->shost)); - - if (source_routing) - sprintf(get_line(ACFCDASA_LEN, rh->len), print_sr(rh)); + show_header("TR: ", "TR Header", elen); + show_space(); + (void) sprintf(get_line(0, 0), + "Packet %d arrived at %d:%02d:%d.%05d", + pi_frame, + pi_time_hour, pi_time_min, pi_time_sec, + pi_time_usec / 10); + (void) sprintf(get_line(0, 0), + "Packet size = %d bytes", + elen); + (void) sprintf(get_line(0, 1), + "Frame Control = %02x (%s)", + fc, print_fc(fc)); + (void) sprintf(get_line(2, 6), + "Destination = %s, %s", + printether(&mh->dhost), + print_etherinfo(&mh->dhost)); + (void) sprintf(get_line(8, 6), + "Source = %s, %s", + printether(&mh->shost), + print_etherinfo(&mh->shost)); - if (is_llc) { - (void) sprintf(get_line(maclen, 1), - "Dest Service Access Point = %02x", - snaphdr->d_lsap); - (void) sprintf(get_line(maclen+1, 1), - "Source Service Access Point = %02x", - snaphdr->s_lsap); - (void) sprintf(get_line(maclen+2, 1), - "Control = %02x", - snaphdr->control); - if (is_snap) - (void) sprintf(get_line(maclen+3, 3), - "SNAP Protocol Id = %02x%02x%02x", - snaphdr->org[0], snaphdr->org[1], - snaphdr->org[2]); - } + if (source_routing) + sprintf(get_line(ACFCDASA_LEN, rh->len), print_sr(rh)); - if (is_snap) - (void) sprintf(get_line(maclen+6, 2), - "SNAP Type = %04X (%s)", - ethertype, print_ethertype(ethertype)); + if (is_llc) { + (void) sprintf(get_line(maclen, 1), + "Dest Service Access Point = %02x", + snaphdr->d_lsap); + (void) sprintf(get_line(maclen+1, 1), + "Source Service Access Point = %02x", + snaphdr->s_lsap); + (void) sprintf(get_line(maclen+2, 1), + "Control = %02x", + snaphdr->control); + if (is_snap) { + (void) sprintf(get_line(maclen+3, 3), + "SNAP Protocol Id = %02x%02x%02x", + snaphdr->org[0], snaphdr->org[1], + snaphdr->org[2]); + } + } - show_space(); + if (is_snap) { + (void) sprintf(get_line(maclen+6, 2), + "SNAP Type = %04X (%s)", + ethertype, print_ethertype(ethertype)); + } + + show_space(); } /* go to the next protocol layer */ @@ -997,7 +1007,7 @@ interpret_tr(flags, e, elen, origlen) * 0: mac frame * 1: llc frame */ -int +static int tr_machdr_len(char *e, int *lenp, int *source_routing) { struct tr_header *mh; @@ -1023,8 +1033,7 @@ tr_machdr_len(char *e, int *lenp, int *source_routing) } uint_t -tr_header_len(e) -char *e; +tr_header_len(char *e, size_t msgsize) { struct llc_snap_hdr *snaphdr; int len = 0, source_routing; @@ -1032,10 +1041,13 @@ char *e; if (tr_machdr_len(e, &len, &source_routing) == 0) return (len); /* it's a MAC frame */ + if (msgsize < sizeof (struct llc_snap_hdr)) + return (0); + snaphdr = (struct llc_snap_hdr *)(e + len); if (snaphdr->d_lsap == LSAP_SNAP && - snaphdr->s_lsap == LSAP_SNAP && - snaphdr->control == CNTL_LLC_UI) + snaphdr->s_lsap == LSAP_SNAP && + snaphdr->control == CNTL_LLC_UI) len += LLC_SNAP_HDR_LEN; /* it's a SNAP frame */ else len += LLC_HDR1_LEN; @@ -1051,10 +1063,7 @@ struct fddi_header { }; uint_t -interpret_fddi(flags, e, elen, origlen) - int flags; - caddr_t e; - int elen, origlen; +interpret_fddi(int flags, caddr_t e, int elen, int origlen) { struct fddi_header fhdr, *f = &fhdr; char *off; @@ -1078,10 +1087,11 @@ interpret_fddi(flags, e, elen, origlen) } if (origlen < 13) { - if (flags & F_SUM) + if (flags & F_SUM) { (void) sprintf(get_sum_line(), - "RUNT (short packet - %d bytes)", - origlen); + "RUNT (short packet - %d bytes)", + origlen); + } if (flags & F_DTAIL) show_header("RUNT: ", "Short packet", origlen); return (elen); @@ -1151,86 +1161,89 @@ interpret_fddi(flags, e, elen, origlen) if (is_llc) { if (is_snap) { (void) sprintf(get_sum_line(), - "FDDI LLC Type=%04X (%s), size = %d bytes", - ethertype, - print_ethertype(ethertype), - origlen); + "FDDI LLC Type=%04X (%s), size = %d bytes", + ethertype, + print_ethertype(ethertype), + origlen); } else { - (void) sprintf(get_sum_line(), - "LLC, but no SNAP encoding, size = %d bytes", - origlen); + (void) sprintf(get_sum_line(), "LLC, but no " + "SNAP encoding, size = %d bytes", + origlen); } } else if (is_smt) { - (void) sprintf(get_sum_line(), - "SMT Type=%02X (%s), Class = %02X (%s), size = %d bytes", - *(uchar_t *)(data+1), print_smttype(*(data+1)), *data, - print_smtclass(*data), origlen); + (void) sprintf(get_sum_line(), "SMT Type=%02X (%s), " + "Class = %02X (%s), size = %d bytes", + *(uchar_t *)(data+1), print_smttype(*(data+1)), + *data, print_smtclass(*data), origlen); } else { (void) sprintf(get_sum_line(), - "FC=%02X (%s), size = %d bytes", - f->fc, print_fc(f->fc), origlen); + "FC=%02X (%s), size = %d bytes", + f->fc, print_fc(f->fc), origlen); } } if (flags & F_DTAIL) { - show_header("FDDI: ", "FDDI Header", elen); - show_space(); - (void) sprintf(get_line(0, 0), - "Packet %d arrived at %d:%02d:%d.%05d", - pi_frame, - pi_time_hour, pi_time_min, pi_time_sec, - pi_time_usec / 10); - (void) sprintf(get_line(0, 0), - "Packet size = %d bytes", - elen, elen); - (void) sprintf(get_line(0, 6), - "Destination = %s, %s", - printether(&f->dhost), - print_etherinfo(&f->dhost)); - (void) sprintf(get_line(6, 6), - "Source = %s, %s", - printether(&f->shost), - print_etherinfo(&f->shost)); + show_header("FDDI: ", "FDDI Header", elen); + show_space(); + (void) sprintf(get_line(0, 0), + "Packet %d arrived at %d:%02d:%d.%05d", + pi_frame, + pi_time_hour, pi_time_min, pi_time_sec, + pi_time_usec / 10); + (void) sprintf(get_line(0, 0), + "Packet size = %d bytes", + elen, elen); + (void) sprintf(get_line(0, 6), + "Destination = %s, %s", + printether(&f->dhost), + print_etherinfo(&f->dhost)); + (void) sprintf(get_line(6, 6), + "Source = %s, %s", + printether(&f->shost), + print_etherinfo(&f->shost)); - if (is_llc) { - (void) sprintf(get_line(12, 2), - "Frame Control = %02x (%s)", - f->fc, print_fc(f->fc)); - (void) sprintf(get_line(12, 2), - "Dest Service Access Point = %02x", - f->dsap); - (void) sprintf(get_line(12, 2), - "Source Service Access Point = %02x", - f->ssap); - (void) sprintf(get_line(12, 2), - "Control = %02x", - f->ctl); - if (is_snap) + if (is_llc) { (void) sprintf(get_line(12, 2), - "Protocol Id = %02x%02x%02x", - f->proto_id[0], f->proto_id[1], f->proto_id[2]); - } else if (is_smt) { - (void) sprintf(get_line(12, 2), - "Frame Control = %02x (%s)", - f->fc, print_fc(f->fc)); - (void) sprintf(get_line(12, 2), - "Class = %02x (%s)", - (uchar_t)*data, print_smtclass(*data)); - (void) sprintf(get_line(12, 2), - "Type = %02x (%s)", - *(uchar_t *)(data+1), print_smttype(*(data+1))); - } else { - (void) sprintf(get_line(12, 2), - "FC=%02X (%s), size = %d bytes", - f->fc, print_fc(f->fc), origlen); - } + "Frame Control = %02x (%s)", + f->fc, print_fc(f->fc)); + (void) sprintf(get_line(12, 2), + "Dest Service Access Point = %02x", + f->dsap); + (void) sprintf(get_line(12, 2), + "Source Service Access Point = %02x", + f->ssap); + (void) sprintf(get_line(12, 2), + "Control = %02x", + f->ctl); + if (is_snap) { + (void) sprintf(get_line(12, 2), + "Protocol Id = %02x%02x%02x", + f->proto_id[0], f->proto_id[1], + f->proto_id[2]); + } + } else if (is_smt) { + (void) sprintf(get_line(12, 2), + "Frame Control = %02x (%s)", + f->fc, print_fc(f->fc)); + (void) sprintf(get_line(12, 2), + "Class = %02x (%s)", + (uchar_t)*data, print_smtclass(*data)); + (void) sprintf(get_line(12, 2), + "Type = %02x (%s)", + *(uchar_t *)(data+1), print_smttype(*(data+1))); + } else { + (void) sprintf(get_line(12, 2), + "FC=%02X (%s), size = %d bytes", + f->fc, print_fc(f->fc), origlen); + } - if (is_snap) - (void) sprintf(get_line(12, 2), - "LLC Type = %04X (%s)", - ethertype, print_ethertype(ethertype)); + if (is_snap) { + (void) sprintf(get_line(12, 2), + "LLC Type = %04X (%s)", + ethertype, print_ethertype(ethertype)); + } - show_space(); + show_space(); } /* go to the next protocol layer */ @@ -1257,10 +1270,13 @@ interpret_fddi(flags, e, elen, origlen) } uint_t -fddi_header_len(char *e) +fddi_header_len(char *e, size_t msgsize) { struct fddi_header fhdr, *f = &fhdr; + if (msgsize < sizeof (struct fddi_header)) + return (0); + (void) memcpy(&f->fc, e, sizeof (f->fc)); (void) memcpy(&f->dhost, e+1, sizeof (struct ether_addr)); (void) memcpy(&f->shost, e+7, sizeof (struct ether_addr)); @@ -1286,18 +1302,17 @@ fddi_header_len(char *e) * Print the given Ethernet address */ char * -printether(p) - struct ether_addr *p; +printether(struct ether_addr *p) { static char buf[256]; sprintf(buf, "%x:%x:%x:%x:%x:%x", - p->ether_addr_octet[0], - p->ether_addr_octet[1], - p->ether_addr_octet[2], - p->ether_addr_octet[3], - p->ether_addr_octet[4], - p->ether_addr_octet[5]); + p->ether_addr_octet[0], + p->ether_addr_octet[1], + p->ether_addr_octet[2], + p->ether_addr_octet[3], + p->ether_addr_octet[4], + p->ether_addr_octet[5]); return (buf); } @@ -1489,8 +1504,7 @@ ether_ouiname(uint32_t oui) * Print the additional Ethernet address info */ static char * -print_etherinfo(eaddr) - struct ether_addr *eaddr; +print_etherinfo(struct ether_addr *eaddr) { uint_t addr = 0; char *p = (char *)&addr + 1; @@ -1546,9 +1560,7 @@ static uchar_t endianswap[] = { }; static void -addr_copy_swap(pd, ps) - struct ether_addr *pd; - struct ether_addr *ps; +addr_copy_swap(struct ether_addr *pd, struct ether_addr *ps) { pd->ether_addr_octet[0] = endianswap[ps->ether_addr_octet[0]]; pd->ether_addr_octet[1] = endianswap[ps->ether_addr_octet[1]]; @@ -1560,7 +1572,7 @@ addr_copy_swap(pd, ps) /* ARGSUSED */ uint_t -ib_header_len(char *hdr) +ib_header_len(char *hdr, size_t msgsize) { return (IPOIB_HDRSIZE); } @@ -1571,7 +1583,6 @@ interpret_ib(int flags, char *header, int elen, int origlen) struct ipoib_header *hdr = (struct ipoib_header *)header; char *off; int len; - extern char *dst_name; unsigned short ethertype; int blen = MAX(origlen, 4096); @@ -1647,8 +1658,9 @@ interpret_ib(int flags, char *header, int elen, int origlen) return (elen); } +/* ARGSUSED */ uint_t -ipnet_header_len(char *hdr) +ipnet_header_len(char *hdr, size_t msgsize) { return (sizeof (dl_ipnetinfo_t)); } @@ -1728,3 +1740,63 @@ interpret_ipnet(int flags, char *header, int elen, int origlen) return (0); } + +uint_t +ipv4_header_len(char *hdr, size_t msgsize) +{ + return (msgsize < sizeof (ipha_t) ? 0 : IPH_HDR_LENGTH((ipha_t *)hdr)); +} + +/* + * The header length needs to include all potential extension headers, as the + * caller expects to use this length as an offset to the inner network layer + * header to be used as a filter offset. IPsec headers aren't passed up here, + * and neither are fragmentation headers. + */ +uint_t +ipv6_header_len(char *hdr, size_t msgsize) +{ + ip6_t *ip6hdr = (ip6_t *)hdr; + ip6_hbh_t *exthdr; + uint_t hdrlen = sizeof (ip6_t), exthdrlen; + char *pptr; + uint8_t nxt; + + if (msgsize < sizeof (ip6_t)) + return (0); + + nxt = ip6hdr->ip6_nxt; + pptr = (char *)(ip6hdr + 1); + + while (nxt != IPPROTO_ENCAP && nxt != IPPROTO_IPV6) { + switch (nxt) { + case IPPROTO_HOPOPTS: + case IPPROTO_DSTOPTS: + case IPPROTO_ROUTING: + if (msgsize < hdrlen + sizeof (ip6_hbh_t)) + return (0); + exthdr = (ip6_hbh_t *)pptr; + exthdrlen = 8 + exthdr->ip6h_len * 8; + hdrlen += exthdrlen; + pptr += exthdrlen; + nxt = exthdr->ip6h_nxt; + break; + default: + /* + * This is garbage, there's no way to know where the + * inner IP header is. + */ + return (0); + } + } + + return (hdrlen); +} + +/* ARGSUSED */ +uint_t +interpret_iptun(int flags, char *header, int elen, int origlen) +{ + (void) interpret_ip(flags, (struct ip *)header, elen); + return (elen); +} diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_filter.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_filter.c index 386274ec32..fab3922f14 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_filter.c +++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_filter.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -32,7 +32,8 @@ #include <sys/types.h> #include <sys/time.h> #include <stddef.h> - +#include <unistd.h> +#include <stropts.h> #include <sys/socket.h> #include <sys/sockio.h> #include <sys/vlan.h> @@ -557,14 +558,13 @@ want_packet(uchar_t *pkt, int len, int origlen) uchar_t **offp; /* current offset */ uchar_t *opkt = NULL; uint_t olen; - uint_t ethertype = 0; sp = stack; *sp = 1; base = pkt; offp = offstack; - header_size = (*interface->header_len)((char *)pkt); + header_size = (*interface->header_len)((char *)pkt, len); for (op = oplist; *op; op++) { switch ((enum optype) *op) { @@ -839,7 +839,7 @@ want_packet(uchar_t *pkt, int len, int origlen) } /* align */ (void) memcpy(&rpcmsg, rpc, 24); - if (!valid_rpc(&rpcmsg, 24)) { + if (!valid_rpc((char *)&rpcmsg, 24)) { if (sp >= &stack[MAXSS]) return (0); *(++sp) = 0; @@ -1379,6 +1379,27 @@ static match_type_t ipnet_match_types[] = { 0, 0, 0, 0, 0, 0 }; +static match_type_t iptun_match_types[] = { + "ip", 0, 1, IPPROTO_ENCAP, -1, OP_OFFSET_ETHERTYPE, + "ip6", 0, 1, IPPROTO_IPV6, -1, OP_OFFSET_ETHERTYPE, + "tcp", 9, 1, IPPROTO_TCP, 0, OP_OFFSET_LINK, + "tcp", 6, 1, IPPROTO_TCP, 1, OP_OFFSET_LINK, + "udp", 9, 1, IPPROTO_UDP, 0, OP_OFFSET_LINK, + "udp", 6, 1, IPPROTO_UDP, 1, OP_OFFSET_LINK, + "icmp", 9, 1, IPPROTO_ICMP, 0, OP_OFFSET_LINK, + "icmp6", 6, 1, IPPROTO_ICMPV6, 1, OP_OFFSET_LINK, + "ospf", 9, 1, IPPROTO_OSPF, 0, OP_OFFSET_LINK, + "ospf", 6, 1, IPPROTO_OSPF, 1, OP_OFFSET_LINK, + "ip-in-ip", 9, 1, IPPROTO_ENCAP, 0, OP_OFFSET_LINK, + "esp", 9, 1, IPPROTO_ESP, 0, OP_OFFSET_LINK, + "esp", 6, 1, IPPROTO_ESP, 1, OP_OFFSET_LINK, + "ah", 9, 1, IPPROTO_AH, 0, OP_OFFSET_LINK, + "ah", 6, 1, IPPROTO_AH, 1, OP_OFFSET_LINK, + "sctp", 9, 1, IPPROTO_SCTP, 0, OP_OFFSET_LINK, + "sctp", 6, 1, IPPROTO_SCTP, 1, OP_OFFSET_LINK, + 0, 0, 0, 0, 0, 0 +}; + static void generate_check(match_type_t match_types[], int index, int type) { @@ -1422,6 +1443,11 @@ comparison(char *s) case DL_IPNET: match_types = ipnet_match_types; break; + case DL_IPV4: + case DL_IPV6: + case DL_6TO4: + match_types = iptun_match_types; + break; default: return (0); } @@ -1894,15 +1920,6 @@ ethertype_match(int val) } } -static void -ipnettype_match(int val) -{ - int ipnet_offset = interface->network_type_offset; - - emitop(OP_OFFSET_ETHERTYPE); - compare_value(ipnet_offset, 2, val); -} - /* * Match a network address. The host part * is masked out. The network address may diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ip.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ip.c index 4eaa51817e..816dc7be03 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ip.c +++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ip.c @@ -19,13 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - - #include <stdio.h> #include <string.h> #include <fcntl.h> @@ -97,7 +94,6 @@ interpret_ip(int flags, const struct ip *ip, int fraglen) return (iplen); } - /* XXX Should this count for mix-and-match v4/v6 encapsulations? */ if (encap_levels == 0) total_encap_levels = 0; encap_levels++; @@ -124,10 +120,8 @@ interpret_ip(int flags, const struct ip *ip, int fraglen) if (morefrag || fragoffset != 0) isfrag = B_TRUE; - if (encap_levels == 1) { - src_name = addrtoname(AF_INET, &ip->ip_src); - dst_name = addrtoname(AF_INET, &ip->ip_dst); - } /* Else we already have the src_name and dst_name we want! */ + src_name = addrtoname(AF_INET, &ip->ip_src); + dst_name = addrtoname(AF_INET, &ip->ip_dst); if (flags & F_SUM) { if (isfrag) { @@ -370,17 +364,19 @@ interpret_ipv6(int flags, const ip6_t *ip6h, int fraglen) version = ntohl(ip6h->ip6_vcf) >> 28; - if (strcmp(src_name, src_addrstr) == 0) + if (strcmp(src_name, src_addrstr) == 0) { print_srcname[0] = '\0'; - else + } else { snprintf(print_srcname, sizeof (print_srcname), - ", %s", src_name); + ", %s", src_name); + } - if (strcmp(dst_name, dst_addrstr) == 0) + if (strcmp(dst_name, dst_addrstr) == 0) { print_dstname[0] = '\0'; - else + } else { snprintf(print_dstname, sizeof (print_dstname), - ", %s", dst_name); + ", %s", dst_name); + } show_header("IPv6: ", "IPv6 Header", iplen); show_space(); diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_rpc.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_rpc.c index 8812758e5a..b317bd21a4 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_rpc.c +++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_rpc.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/errno.h> #include <setjmp.h> @@ -64,7 +61,6 @@ static void rpc_detail_reply(int, int, struct cache_struct *, char *, int len); static void print_creds(int); static void print_verif(int); static void stash_xid(ulong_t, int, int, int, int); -int valid_rpc(char *, int); #define LAST_FRAG ((ulong_t)1 << 31) @@ -87,7 +83,7 @@ interpret_rpc(int flags, char *rpc, int fraglen, int type) if (setjmp(xdr_err)) { if (flags & F_DTAIL) (void) sprintf(get_line(0, 0), - "---- short frame ---"); + "---- short frame ---"); return (fraglen); } @@ -106,9 +102,10 @@ interpret_rpc(int flags, char *rpc, int fraglen, int type) vers = getxdr_long(); proc = getxdr_long(); stash_xid(xid, pi_frame, prog, vers, proc); - if (!(flags & (F_SUM | F_DTAIL))) + if (!(flags & (F_SUM | F_DTAIL))) { protoprint(flags, CALL, xid, prog, vers, proc, - rpc, fraglen); + rpc, fraglen); + } } else { x = find_xid(xid); } @@ -117,10 +114,10 @@ interpret_rpc(int flags, char *rpc, int fraglen, int type) switch (direction) { case CALL: (void) sprintf(get_sum_line(), - "RPC C XID=%lu PROG=%d (%s) VERS=%d PROC=%d", - xid, - prog, nameof_prog(prog), - vers, proc); + "RPC C XID=%lu PROG=%d (%s) VERS=%d PROC=%d", + xid, + prog, nameof_prog(prog), + vers, proc); if (getxdr_long() == RPCSEC_GSS) { /* Cred auth type */ extract_rpcsec_gss_cred_info(xid); /* RPCSEC_GSS cred auth data */ @@ -132,7 +129,7 @@ interpret_rpc(int flags, char *rpc, int fraglen, int type) xdr_skip(RNDUP(getxdr_long())); /* Verf auth data */ protoprint(flags, CALL, xid, prog, vers, proc, - rpc, fraglen); + rpc, fraglen); break; case REPLY: @@ -141,7 +138,7 @@ interpret_rpc(int flags, char *rpc, int fraglen, int type) (void) sprintf(lp, "RPC R XID=%lu", xid); else (void) sprintf(lp, "RPC R (#%d) XID=%lu", - x->xid_frame, xid); + x->xid_frame, xid); lp += strlen(lp); status = getxdr_long(); @@ -152,18 +149,18 @@ interpret_rpc(int flags, char *rpc, int fraglen, int type) xdr_skip(RNDUP(getxdr_long())); astat = getxdr_long(); (void) sprintf(lp, " %s", - nameof_astat(astat)); + nameof_astat(astat)); lp += strlen(lp); switch (astat) { case SUCCESS: if (x) { protoprint(flags, REPLY, - xid, - x->xid_prog, - x->xid_vers, - x->xid_proc, - rpc, fraglen); + xid, + x->xid_prog, + x->xid_vers, + x->xid_proc, + rpc, fraglen); } break; @@ -173,8 +170,8 @@ interpret_rpc(int flags, char *rpc, int fraglen, int type) lo = getxdr_long(); hi = getxdr_long(); (void) sprintf(lp, - " (low=%d, high=%d)", - lo, hi); + " (low=%d, high=%d)", + lo, hi); break; case GARBAGE_ARGS: @@ -193,14 +190,14 @@ interpret_rpc(int flags, char *rpc, int fraglen, int type) hi = getxdr_long(); (void) sprintf(lp, " Vers mismatch (low=%d, high=%d)", - lo, hi); + lo, hi); break; case AUTH_ERROR: why = getxdr_u_long(); (void) sprintf(lp, - " Can't authenticate (%s)", - nameof_why(why)); + " Can't authenticate (%s)", + nameof_why(why)); break; } } @@ -213,23 +210,23 @@ interpret_rpc(int flags, char *rpc, int fraglen, int type) show_space(); if (type == IPPROTO_TCP) { /* record mark */ (void) sprintf(get_line(markpos, markpos+4), - "Record Mark: %s fragment, length = %d", - recmark & LAST_FRAG ? "last" : "", - recmark & ~LAST_FRAG); + "Record Mark: %s fragment, length = %d", + recmark & LAST_FRAG ? "last" : "", + recmark & ~LAST_FRAG); } (void) sprintf(get_line(0, 0), - "Transaction id = %lu", - xid); + "Transaction id = %lu", + xid); (void) sprintf(get_line(0, 0), - "Type = %d (%s)", - direction, - direction == CALL ? "Call":"Reply"); + "Type = %d (%s)", + direction, + direction == CALL ? "Call":"Reply"); switch (direction) { case CALL: rpc_detail_call(flags, xid, rpcvers, - prog, vers, proc, rpc, fraglen); + prog, vers, proc, rpc, fraglen); break; case REPLY: rpc_detail_reply(flags, xid, x, rpc, fraglen); @@ -248,11 +245,11 @@ rpc_detail_call(int flags, int xid, int rpcvers, int prog, int vers, int proc, char *nameof_prog(); (void) sprintf(get_line(pos, getxdr_pos()), - "RPC version = %d", - rpcvers); + "RPC version = %d", + rpcvers); (void) sprintf(get_line(pos, getxdr_pos()), - "Program = %d (%s), version = %d, procedure = %d", - prog, nameof_prog(prog), vers, proc); + "Program = %d (%s), version = %d, procedure = %d", + prog, nameof_prog(prog), vers, proc); print_creds(xid); print_verif(CALL); show_trailer(); @@ -314,8 +311,8 @@ print_creds(int xid) flavor = getxdr_long(); authlen = getxdr_long(); (void) sprintf(get_line(pos, getxdr_pos()), - "Credentials: Flavor = %d (%s), len = %d bytes", - flavor, nameof_flavor(flavor), authlen); + "Credentials: Flavor = %d (%s), len = %d bytes", + flavor, nameof_flavor(flavor), authlen); if (authlen <= 0) return; @@ -327,8 +324,8 @@ print_creds(int xid) uid = getxdr_u_long(); gid = getxdr_u_long(); (void) sprintf(get_line(pos, getxdr_pos()), - " Uid = %d, Gid = %d", - uid, gid); + " Uid = %d, Gid = %d", + uid, gid); len = getxdr_u_long(); line = get_line(pos, len * 4); if (len == 0) @@ -347,24 +344,24 @@ print_creds(int xid) case AUTH_DES: namekind = getxdr_u_long(); (void) sprintf(get_line(pos, getxdr_pos()), - " Name kind = %d (%s)", - namekind, - namekind == ADN_FULLNAME ? - "fullname" : "nickname"); - switch (namekind) { - case ADN_FULLNAME: - (void) showxdr_string(64, - " Network name = %s"); - (void) showxdr_hex(8, - " Conversation key = 0x%s (DES encrypted)"); - (void) showxdr_hex(4, - " Window = 0x%s (DES encrypted)"); - break; + " Name kind = %d (%s)", + namekind, + namekind == ADN_FULLNAME ? + "fullname" : "nickname"); + switch (namekind) { + case ADN_FULLNAME: + (void) showxdr_string(64, + " Network name = %s"); + (void) showxdr_hex(8, + " Conversation key = 0x%s (DES encrypted)"); + (void) showxdr_hex(4, + " Window = 0x%s (DES encrypted)"); + break; - case ADN_NICKNAME: - (void) showxdr_hex(4, " Nickname = 0x%s"); - break; - }; + case ADN_NICKNAME: + (void) showxdr_hex(4, " Nickname = 0x%s"); + break; + }; break; case RPCSEC_GSS: @@ -386,8 +383,8 @@ print_verif(int direction) flavor = getxdr_long(); verlen = getxdr_long(); (void) sprintf(get_line(pos, getxdr_pos()), - "Verifier : Flavor = %d (%s), len = %d bytes", - flavor, nameof_flavor(flavor), verlen); + "Verifier : Flavor = %d (%s), len = %d bytes", + flavor, nameof_flavor(flavor), verlen); if (verlen == 0) return; @@ -396,7 +393,7 @@ print_verif(int direction) (void) showxdr_hex(8, " Timestamp = 0x%s (DES encrypted)"); if (direction == CALL) (void) showxdr_hex(4, - " Window = 0x%s (DES encrypted)"); + " Window = 0x%s (DES encrypted)"); else (void) showxdr_hex(4, " Nickname = 0x%s"); break; @@ -622,14 +619,14 @@ rpc_detail_reply(int flags, int xid, struct cache_struct *x, char *data, if (x) { (void) sprintf(get_line(0, 0), - "This is a reply to frame %d", - x->xid_frame); + "This is a reply to frame %d", + x->xid_frame); } pos = getxdr_pos(); status = getxdr_long(); (void) sprintf(get_line(pos, getxdr_pos()), - "Status = %d (%s)", - status, status ? "Denied" : "Accepted"); + "Status = %d (%s)", + status, status ? "Denied" : "Accepted"); switch (status) { case MSG_ACCEPTED: @@ -637,16 +634,16 @@ rpc_detail_reply(int flags, int xid, struct cache_struct *x, char *data, pos = getxdr_pos(); astat = getxdr_long(); (void) sprintf(get_line(pos, getxdr_pos()), - "Accept status = %d (%s)", - astat, nameof_astat(astat)); + "Accept status = %d (%s)", + astat, nameof_astat(astat)); switch (astat) { case SUCCESS: if (x) { show_trailer(); protoprint(flags, REPLY, xid, - x->xid_prog, x->xid_vers, x->xid_proc, - data, len); + x->xid_prog, x->xid_vers, x->xid_proc, + data, len); } break; case PROG_UNAVAIL : @@ -668,10 +665,10 @@ rpc_detail_reply(int flags, int xid, struct cache_struct *x, char *data, pos = getxdr_pos(); rstat = getxdr_long(); (void) sprintf(get_line(pos, getxdr_pos()), - "Reject status = %d (%s)", - rstat, - rstat ? "can't authenticate" - : "version mismatch"); + "Reject status = %d (%s)", + rstat, + rstat ? "can't authenticate" + : "version mismatch"); switch (rstat) { case RPC_MISMATCH: @@ -681,8 +678,8 @@ rpc_detail_reply(int flags, int xid, struct cache_struct *x, char *data, case AUTH_ERROR: why = getxdr_u_long(); (void) sprintf(get_line(pos, getxdr_pos()), - " Why = %d (%s)", - why, nameof_why(why)); + " Why = %d (%s)", + why, nameof_why(why)); break; } break; @@ -712,7 +709,7 @@ valid_rpc(char *rpc, int rpclen) break; case REPLY: if (xdr_u_int(&xdrm, - (uint_t *)&msg.rm_reply.rp_stat) && + (uint_t *)&msg.rm_reply.rp_stat) && (msg.rm_reply.rp_stat == MSG_ACCEPTED || msg.rm_reply.rp_stat == MSG_DENIED)) return (1); diff --git a/usr/src/cmd/devfsadm/misc_link.c b/usr/src/cmd/devfsadm/misc_link.c index a36312da91..c8cbfaeb10 100644 --- a/usr/src/cmd/devfsadm/misc_link.c +++ b/usr/src/cmd/devfsadm/misc_link.c @@ -104,7 +104,7 @@ static devfsadm_create_t misc_cbt[] = { "(^ip$)|(^tcp$)|(^udp$)|(^icmp$)|(^sctp$)|" "(^ip6$)|(^tcp6$)|(^udp6$)|(^icmp6$)|(^sctp6$)|" "(^rts$)|(^arp$)|(^ipsecah$)|(^ipsecesp$)|(^keysock$)|(^spdsock$)|" - "(^nca$)|(^rds$)|(^sdp$)|(^ipnet$)|(^dlpistub$)", + "(^nca$)|(^rds$)|(^sdp$)|(^ipnet$)|(^dlpistub$)|(^iptunq)", TYPE_EXACT | DRV_RE, ILEVEL_1, minor_name }, { "pseudo", "ddi_pseudo", diff --git a/usr/src/cmd/dladm/dladm.c b/usr/src/cmd/dladm/dladm.c index a2cef21b09..1a7e8f0f42 100644 --- a/usr/src/cmd/dladm/dladm.c +++ b/usr/src/cmd/dladm/dladm.c @@ -40,6 +40,7 @@ #include <getopt.h> #include <unistd.h> #include <priv.h> +#include <limits.h> #include <termios.h> #include <pwd.h> #include <auth_attr.h> @@ -55,6 +56,7 @@ #include <libdlvlan.h> #include <libdlvnic.h> #include <libdlether.h> +#include <libdliptun.h> #include <libdlsim.h> #include <libdlbridge.h> #include <libinetutil.h> @@ -192,6 +194,8 @@ static cmdfunc_t do_delete_simnet, do_show_simnet, do_up_simnet; static cmdfunc_t do_show_usage; static cmdfunc_t do_create_bridge, do_modify_bridge, do_delete_bridge; static cmdfunc_t do_add_bridge, do_remove_bridge, do_show_bridge; +static cmdfunc_t do_create_iptun, do_modify_iptun, do_delete_iptun; +static cmdfunc_t do_show_iptun, do_up_iptun, do_down_iptun; static void do_up_vnic_common(int, char **, const char *, boolean_t); @@ -210,6 +214,12 @@ static uint64_t get_ifspeed(const char *, boolean_t); static const char *get_linkstate(const char *, boolean_t, char *); static const char *get_linkduplex(const char *, boolean_t, char *); +static iptun_type_t iptun_gettypebyname(char *); +static const char *iptun_gettypebyvalue(iptun_type_t); +static dladm_status_t print_iptun(dladm_handle_t, datalink_id_t, + show_state_t *); +static int print_iptun_walker(dladm_handle_t, datalink_id_t, void *); + static int show_etherprop(dladm_handle_t, datalink_id_t, void *); static void show_ether_xprop(void *, dladm_ether_info_t *); static boolean_t link_is_ether(const char *, datalink_id_t *); @@ -288,6 +298,17 @@ static cmd_t cmds[] = { { "show-vlan", do_show_vlan, " show-vlan [-pP] [-o <field>,..] [<link>]\n" }, { "up-vlan", do_up_vlan, NULL }, + { "create-iptun", do_create_iptun, + " create-iptun [-t] -T <type> " + "[-a {local|remote}=<addr>,...] <link>]" }, + { "delete-iptun", do_delete_iptun, + " delete-iptun [-t] <link>" }, + { "modify-iptun", do_modify_iptun, + " modify-iptun [-t] -a {local|remote}=<addr>,... <link>" }, + { "show-iptun", do_show_iptun, + " show-iptun [-pP] [-o <field>,..] [<link>]\n" }, + { "up-iptun", do_up_iptun, NULL }, + { "down-iptun", do_down_iptun, NULL }, { "delete-phys", do_delete_phys, " delete-phys <link>" }, { "show-phys", do_show_phys, @@ -380,6 +401,34 @@ static const struct option show_lopts[] = { { 0, 0, 0, 0 } }; +static const struct option iptun_lopts[] = { + {"output", required_argument, 0, 'o'}, + {"tunnel-type", required_argument, 0, 'T'}, + {"address", required_argument, 0, 'a'}, + {"root-dir", required_argument, 0, 'R'}, + {"parsable", no_argument, 0, 'p'}, + {"parseable", no_argument, 0, 'p'}, + {"persistent", no_argument, 0, 'P'}, + { 0, 0, 0, 0 } +}; + +static char * const iptun_addropts[] = { +#define IPTUN_LOCAL 0 + "local", +#define IPTUN_REMOTE 1 + "remote", + NULL}; + +static const struct { + const char *type_name; + iptun_type_t type_value; +} iptun_types[] = { + {"ipv4", IPTUN_TYPE_IPV4}, + {"ipv6", IPTUN_TYPE_IPV6}, + {"6to4", IPTUN_TYPE_6TO4}, + {NULL, 0} +}; + static const struct option prop_longopts[] = { {"temporary", no_argument, 0, 't' }, {"output", required_argument, 0, 'o' }, @@ -410,6 +459,7 @@ static const struct option wifi_longopts[] = { {"file", required_argument, 0, 'f' }, { 0, 0, 0, 0 } }; + static const struct option showeth_lopts[] = { {"parsable", no_argument, 0, 'p' }, {"parseable", no_argument, 0, 'p' }, @@ -492,7 +542,7 @@ typedef struct ether_fields_buf_s char eth_rem_fault[16]; } ether_fields_buf_t; -static ofmt_field_t ether_fields[] = { +static const ofmt_field_t ether_fields[] = { /* name, field width, offset callback */ { "LINK", 16, offsetof(ether_fields_buf_t, eth_link), print_default_cb}, @@ -534,7 +584,7 @@ typedef enum { LINK_S_OERRORS } link_s_field_index_t; -static ofmt_field_t link_s_fields[] = { +static const ofmt_field_t link_s_fields[] = { /* name, field width, index, callback */ { "LINK", 15, LINK_S_LINK, print_link_stats_cb}, { "IPACKETS", 10, LINK_S_IPKTS, print_link_stats_cb}, @@ -572,7 +622,7 @@ typedef struct link_fields_buf_s { /* * structures for 'dladm show-link' */ -static ofmt_field_t link_fields[] = { +static const ofmt_field_t link_fields[] = { /* name, field width, index, callback */ { "LINK", 12, offsetof(link_fields_buf_t, link_name), print_default_cb}, @@ -611,7 +661,7 @@ typedef struct laggr_args_s { boolean_t laggr_parsable; } laggr_args_t; -static ofmt_field_t laggr_fields[] = { +static const ofmt_field_t laggr_fields[] = { /* name, field width, offset, callback */ { "LINK", 16, offsetof(laggr_fields_buf_t, laggr_name), print_default_cb}, @@ -641,7 +691,7 @@ typedef enum { AGGR_X_PORTSTATE } aggr_x_field_index_t; -static ofmt_field_t aggr_x_fields[] = { +static const ofmt_field_t aggr_x_fields[] = { /* name, field width, index callback */ { "LINK", 12, AGGR_X_LINK, print_xaggr_cb}, { "PORT", 15, AGGR_X_PORT, print_xaggr_cb}, @@ -667,7 +717,7 @@ typedef enum { AGGR_S_OPKTDIST } aggr_s_field_index_t; -static ofmt_field_t aggr_s_fields[] = { +static const ofmt_field_t aggr_s_fields[] = { { "LINK", 12, AGGR_S_LINK, print_aggr_stats_cb}, { "PORT", 10, AGGR_S_PORT, print_aggr_stats_cb}, { "IPACKETS", 8, AGGR_S_IPKTS, print_aggr_stats_cb}, @@ -693,7 +743,7 @@ typedef enum { AGGR_L_EXPIRED } aggr_l_field_index_t; -static ofmt_field_t aggr_l_fields[] = { +static const ofmt_field_t aggr_l_fields[] = { /* name, field width, index */ { "LINK", 12, AGGR_L_LINK, print_lacp_cb}, { "PORT", 13, AGGR_L_PORT, print_lacp_cb}, @@ -710,7 +760,7 @@ static ofmt_field_t aggr_l_fields[] = { * structures for 'dladm show-phys' */ -static ofmt_field_t phys_fields[] = { +static const ofmt_field_t phys_fields[] = { /* name, field width, offset */ { "LINK", 13, offsetof(link_fields_buf_t, link_name), print_default_cb}, @@ -741,7 +791,7 @@ typedef enum { PHYS_M_CLIENT } phys_m_field_index_t; -static ofmt_field_t phys_m_fields[] = { +static const ofmt_field_t phys_m_fields[] = { /* name, field width, offset */ { "LINK", 13, PHYS_M_LINK, print_phys_one_mac_cb}, { "SLOT", 9, PHYS_M_SLOT, print_phys_one_mac_cb}, @@ -763,7 +813,7 @@ typedef enum { PHYS_H_CLIENTS } phys_h_field_index_t; -static ofmt_field_t phys_h_fields[] = { +static const ofmt_field_t phys_h_fields[] = { { "LINK", 13, PHYS_H_LINK, print_phys_one_hwgrp_cb}, { "GROUP", 9, PHYS_H_GROUP, print_phys_one_hwgrp_cb}, { "GROUPTYPE", 7, PHYS_H_GRPTYPE, print_phys_one_hwgrp_cb}, @@ -775,7 +825,7 @@ static ofmt_field_t phys_h_fields[] = { /* * structures for 'dladm show-vlan' */ -static ofmt_field_t vlan_fields[] = { +static const ofmt_field_t vlan_fields[] = { { "LINK", 16, offsetof(link_fields_buf_t, link_name), print_default_cb}, { "VID", 9, @@ -834,7 +884,7 @@ typedef enum { LINKPROP_POSSIBLE } linkprop_field_index_t; -static ofmt_field_t linkprop_fields[] = { +static const ofmt_field_t linkprop_fields[] = { /* name, field width, index */ { "LINK", 13, LINKPROP_LINK, print_linkprop_cb}, { "PROPERTY", 16, LINKPROP_PROPERTY, print_linkprop_cb}, @@ -882,7 +932,7 @@ typedef struct secobj_fields_buf_s { char ss_val[30]; } secobj_fields_buf_t; -static ofmt_field_t secobj_fields[] = { +static const ofmt_field_t secobj_fields[] = { { "OBJECT", 21, offsetof(secobj_fields_buf_t, ss_obj_name), print_default_cb}, { "CLASS", 21, @@ -905,7 +955,7 @@ typedef struct vnic_fields_buf_s char vnic_vid[6]; } vnic_fields_buf_t; -static ofmt_field_t vnic_fields[] = { +static const ofmt_field_t vnic_fields[] = { { "LINK", 13, offsetof(vnic_fields_buf_t, vnic_link), print_default_cb}, { "OVER", 13, @@ -932,7 +982,7 @@ typedef struct simnet_fields_buf_s char simnet_otherlink[DLPI_LINKNAME_MAX]; } simnet_fields_buf_t; -static ofmt_field_t simnet_fields[] = { +static const ofmt_field_t simnet_fields[] = { { "LINK", 12, offsetof(simnet_fields_buf_t, simnet_name), print_default_cb}, { "MEDIA", 20, @@ -958,7 +1008,7 @@ typedef struct usage_fields_buf_s { char usage_bandwidth[14]; } usage_fields_buf_t; -static ofmt_field_t usage_fields[] = { +static const ofmt_field_t usage_fields[] = { { "LINK", 13, offsetof(usage_fields_buf_t, usage_link), print_default_cb}, { "DURATION", 11, @@ -990,7 +1040,7 @@ typedef struct usage_l_fields_buf_s { char usage_l_bandwidth[14]; } usage_l_fields_buf_t; -static ofmt_field_t usage_l_fields[] = { +static const ofmt_field_t usage_l_fields[] = { /* name, field width, offset */ { "LINK", 13, offsetof(usage_l_fields_buf_t, usage_l_link), print_default_cb}, @@ -1007,6 +1057,34 @@ static ofmt_field_t usage_l_fields[] = { { NULL, 0, 0, NULL}} ; +/* IPTUN_*FLAG_INDEX values are indices into iptun_flags below. */ +enum { IPTUN_SFLAG_INDEX, IPTUN_IFLAG_INDEX, IPTUN_NUM_FLAGS }; + +/* + * structures for 'dladm show-iptun' + */ +typedef struct iptun_fields_buf_s { + char iptun_name[MAXLINKNAMELEN]; + char iptun_type[5]; + char iptun_laddr[NI_MAXHOST]; + char iptun_raddr[NI_MAXHOST]; + char iptun_flags[IPTUN_NUM_FLAGS + 1]; +} iptun_fields_buf_t; + +static const ofmt_field_t iptun_fields[] = { +{ "LINK", 16, + offsetof(iptun_fields_buf_t, iptun_name), print_default_cb }, +{ "TYPE", 6, + offsetof(iptun_fields_buf_t, iptun_type), print_default_cb }, +{ "FLAGS", 7, + offsetof(iptun_fields_buf_t, iptun_flags), print_default_cb }, +{ "LOCAL", 20, + offsetof(iptun_fields_buf_t, iptun_laddr), print_default_cb }, +{ "REMOTE", 20, + offsetof(iptun_fields_buf_t, iptun_raddr), print_default_cb }, +{ NULL, 0, 0, NULL} +}; + /* * structures for 'dladm show-bridge'. These are based on sections 14.8.1.1.3 * and 14.8.1.2.2 of IEEE 802.1D-2004. @@ -1265,7 +1343,7 @@ usage(void) if (handle != NULL) dladm_close(handle); - exit(1); + exit(EXIT_FAILURE); } int @@ -1298,15 +1376,14 @@ main(int argc, char *argv[]) cmdp->c_fn(argc - 1, &argv[1], cmdp->c_usage); dladm_close(handle); - exit(0); + return (EXIT_SUCCESS); } } (void) fprintf(stderr, gettext("%s: unknown subcommand '%s'\n"), progname, argv[1]); usage(); - - return (0); + return (EXIT_FAILURE); } /*ARGSUSED*/ @@ -1932,12 +2009,8 @@ done: * and should be removed once 6399681 is fixed. */ if (status == DLADM_STATUS_NOTSUP) { - (void) fprintf(stderr, - gettext("%s: add operation failed: %s\n"), - progname, - gettext("link capabilities don't match")); - dladm_close(handle); - exit(ENOTSUP); + die("add operation failed: link capabilities don't " + "match"); } else if (status == DLADM_STATUS_NONOTIF) { die("not all links have link up/down detection; must " "use -f (see dladm(1M))"); @@ -2439,11 +2512,10 @@ do_init_phys(int argc, char *argv[], const char *use) DATALINK_CLASS_PHYS, DATALINK_ANY_MEDIATYPE, DLADM_OPT_PERSIST); } - /* * Print the active topology information. */ -static dladm_status_t +void print_link_topology(show_state_t *state, datalink_id_t linkid, datalink_class_t class, link_fields_buf_t *lbuf) { @@ -2460,46 +2532,43 @@ print_link_topology(show_state_t *state, datalink_id_t linkid, case DATALINK_CLASS_ETHERSTUB: status = dladm_bridge_getlink(handle, linkid, lbuf->link_bridge, sizeof (lbuf->link_bridge)); - if (status == DLADM_STATUS_OK) - break; - if (status != DLADM_STATUS_NOTFOUND) - return (status); + if (status != DLADM_STATUS_OK && + status != DLADM_STATUS_NOTFOUND) + (void) strcpy(lbuf->link_bridge, "?"); break; } - status = DLADM_STATUS_OK; switch (class) { case DATALINK_CLASS_VLAN: { dladm_vlan_attr_t vinfo; - status = dladm_vlan_info(handle, linkid, &vinfo, flags); - if (status != DLADM_STATUS_OK) + if (dladm_vlan_info(handle, linkid, &vinfo, flags) != + DLADM_STATUS_OK) { + (void) strcpy(lbuf->link_over, "?"); break; - status = dladm_datalink_id2info(handle, vinfo.dv_linkid, NULL, - NULL, NULL, lbuf->link_over, sizeof (lbuf->link_over)); + } + if (dladm_datalink_id2info(handle, vinfo.dv_linkid, NULL, NULL, + NULL, lbuf->link_over, sizeof (lbuf->link_over)) != + DLADM_STATUS_OK) + (void) strcpy(lbuf->link_over, "?"); break; } - case DATALINK_CLASS_AGGR: { dladm_aggr_grp_attr_t ginfo; int i; - lbuf->link_over[0] = '\0'; - - status = dladm_aggr_info(handle, linkid, &ginfo, flags); - if (status != DLADM_STATUS_OK) - break; - - if (ginfo.lg_nports == 0) { - status = DLADM_STATUS_BADVAL; + if (dladm_aggr_info(handle, linkid, &ginfo, flags) != + DLADM_STATUS_OK || ginfo.lg_nports == 0) { + (void) strcpy(lbuf->link_over, "?"); break; } for (i = 0; i < ginfo.lg_nports; i++) { - status = dladm_datalink_id2info(handle, + if (dladm_datalink_id2info(handle, ginfo.lg_ports[i].lp_linkid, NULL, NULL, NULL, - tmpbuf, sizeof (tmpbuf)); - if (status != DLADM_STATUS_OK) + tmpbuf, sizeof (tmpbuf)) != DLADM_STATUS_OK) { + (void) strcpy(lbuf->link_over, "?"); break; + } (void) strlcat(lbuf->link_over, tmpbuf, sizeof (lbuf->link_over)); if (i != (ginfo.lg_nports - 1)) { @@ -2510,39 +2579,43 @@ print_link_topology(show_state_t *state, datalink_id_t linkid, free(ginfo.lg_ports); break; } - case DATALINK_CLASS_VNIC: { dladm_vnic_attr_t vinfo; - status = dladm_vnic_info(handle, linkid, &vinfo, flags); - if (status == DLADM_STATUS_OK) - status = dladm_datalink_id2info(handle, - vinfo.va_link_id, NULL, NULL, NULL, lbuf->link_over, - sizeof (lbuf->link_over)); + if (dladm_vnic_info(handle, linkid, &vinfo, flags) != + DLADM_STATUS_OK) { + (void) strcpy(lbuf->link_over, "?"); + break; + } + if (dladm_datalink_id2info(handle, vinfo.va_link_id, NULL, NULL, + NULL, lbuf->link_over, sizeof (lbuf->link_over)) != + DLADM_STATUS_OK) + (void) strcpy(lbuf->link_over, "?"); break; } - case DATALINK_CLASS_BRIDGE: { datalink_id_t *dlp; uint_t i, nports; - status = dladm_datalink_id2info(handle, linkid, NULL, NULL, - NULL, tmpbuf, sizeof (tmpbuf)); - if (status != DLADM_STATUS_OK) + if (dladm_datalink_id2info(handle, linkid, NULL, NULL, + NULL, tmpbuf, sizeof (tmpbuf)) != DLADM_STATUS_OK) { + (void) strcpy(lbuf->link_over, "?"); break; + } if (tmpbuf[0] != '\0') tmpbuf[strlen(tmpbuf) - 1] = '\0'; dlp = dladm_bridge_get_portlist(tmpbuf, &nports); if (dlp == NULL) { - status = DLADM_STATUS_BADVAL; + (void) strcpy(lbuf->link_over, "?"); break; } - lbuf->link_over[0] = '\0'; for (i = 0; i < nports; i++) { - status = dladm_datalink_id2info(handle, dlp[i], NULL, - NULL, NULL, tmpbuf, sizeof (tmpbuf)); - if (status != DLADM_STATUS_OK) + if (dladm_datalink_id2info(handle, dlp[i], NULL, + NULL, NULL, tmpbuf, sizeof (tmpbuf)) != + DLADM_STATUS_OK) { + (void) strcpy(lbuf->link_over, "?"); break; + } (void) strlcat(lbuf->link_over, tmpbuf, sizeof (lbuf->link_over)); if (i != nports - 1) { @@ -2557,17 +2630,21 @@ print_link_topology(show_state_t *state, datalink_id_t linkid, case DATALINK_CLASS_SIMNET: { dladm_simnet_attr_t slinfo; - status = dladm_simnet_info(handle, linkid, &slinfo, flags); - if (status == DLADM_STATUS_OK && - slinfo.sna_peer_link_id != DATALINK_INVALID_LINKID) - status = dladm_datalink_id2info(handle, + if (dladm_simnet_info(handle, linkid, &slinfo, flags) != + DLADM_STATUS_OK) { + (void) strcpy(lbuf->link_over, "?"); + break; + } + if (slinfo.sna_peer_link_id != DATALINK_INVALID_LINKID) { + if (dladm_datalink_id2info(handle, slinfo.sna_peer_link_id, NULL, NULL, NULL, - lbuf->link_over, sizeof (lbuf->link_over)); + lbuf->link_over, sizeof (lbuf->link_over)) != + DLADM_STATUS_OK) + (void) strcpy(lbuf->link_over, "?"); + } break; } } - - return (status); } static dladm_status_t @@ -2641,10 +2718,7 @@ link_mtu: (void) get_linkstate(link, B_TRUE, lbuf->link_state); } - status = print_link_topology(state, linkid, class, lbuf); - if (status != DLADM_STATUS_OK) - goto done; - + print_link_topology(state, linkid, class, lbuf); done: return (status); } @@ -2661,14 +2735,8 @@ show_link(dladm_handle_t dh, datalink_id_t linkid, void *arg) * first get all the link attributes into lbuf; */ bzero(&lbuf, sizeof (link_fields_buf_t)); - status = print_link(state, linkid, &lbuf); - - if (status != DLADM_STATUS_OK) - goto done; - - ofmt_print(state->ls_ofmt, &lbuf); - -done: + if ((status = print_link(state, linkid, &lbuf)) == DLADM_STATUS_OK) + ofmt_print(state->ls_ofmt, &lbuf); state->ls_status = status; return (DLADM_WALK_CONTINUE); } @@ -2713,7 +2781,7 @@ show_link_stats(dladm_handle_t dh, datalink_id_t linkid, void *arg) { char link[DLPI_LINKNAME_MAX]; datalink_class_t class; - show_state_t *state = (show_state_t *)arg; + show_state_t *state = arg; pktsum_t stats, diff_stats; dladm_phys_attr_t dpa; link_args_t largs; @@ -2776,7 +2844,6 @@ print_aggr_info(show_grp_state_t *state, const char *link, sizeof (lbuf.laggr_addrpolicy), "auto"); } - (void) dladm_aggr_lacpmode2str(ginfop->lg_lacp_mode, lbuf.laggr_lacpactivity); (void) dladm_aggr_lacptimer2str(ginfop->lg_lacp_timer, @@ -2793,28 +2860,21 @@ static boolean_t print_xaggr_cb(ofmt_arg_t *ofarg, char *buf, uint_t bufsize) { const laggr_args_t *l = ofarg->ofmt_cbarg; - int portnum; boolean_t is_port = (l->laggr_lport >= 0); - static char tmpbuf[DLADM_STRSIZE]; - dladm_aggr_port_attr_t *portp; + char tmpbuf[DLADM_STRSIZE]; + const char *objname; + dladm_aggr_port_attr_t *portp; dladm_phys_attr_t dpa; - dladm_status_t *stat, status = DLADM_STATUS_OK; - - stat = l->laggr_status; if (is_port) { - portnum = l->laggr_lport; - portp = &(l->laggr_ginfop->lg_ports[portnum]); - if ((status = dladm_datalink_id2info(handle, - portp->lp_linkid, NULL, NULL, NULL, buf, bufsize)) != - DLADM_STATUS_OK) { - goto err; - } - - if ((status = dladm_phys_info(handle, portp->lp_linkid, - &dpa, DLADM_OPT_ACTIVE)) != DLADM_STATUS_OK) { - goto err; - } + portp = &(l->laggr_ginfop->lg_ports[l->laggr_lport]); + if (dladm_phys_info(handle, portp->lp_linkid, &dpa, + DLADM_OPT_ACTIVE) != DLADM_STATUS_OK) + objname = "?"; + else + objname = dpa.dp_dev; + } else { + objname = l->laggr_link; } switch (ofarg->ofmt_id) { @@ -2823,36 +2883,25 @@ print_xaggr_cb(ofmt_arg_t *ofarg, char *buf, uint_t bufsize) (is_port && !l->laggr_parsable ? " " : l->laggr_link)); break; case AGGR_X_PORT: - if (is_port) - break; - *stat = DLADM_STATUS_OK; - return (B_TRUE); - - case AGGR_X_SPEED: if (is_port) { - (void) snprintf(buf, bufsize, "%uMb", - (uint_t)((get_ifspeed(dpa.dp_dev, - B_FALSE)) / 1000000ull)); - } else { - (void) snprintf(buf, bufsize, "%uMb", - (uint_t)((get_ifspeed(l->laggr_link, - B_TRUE)) / 1000000ull)); + if (dladm_datalink_id2info(handle, portp->lp_linkid, + NULL, NULL, NULL, buf, bufsize) != DLADM_STATUS_OK) + (void) sprintf(buf, "?"); } break; + case AGGR_X_SPEED: + (void) snprintf(buf, bufsize, "%uMb", + (uint_t)((get_ifspeed(objname, !is_port)) / 1000000ull)); + break; + case AGGR_X_DUPLEX: - if (is_port) - (void) get_linkduplex(dpa.dp_dev, B_FALSE, tmpbuf); - else - (void) get_linkduplex(l->laggr_link, B_TRUE, tmpbuf); + (void) get_linkduplex(objname, !is_port, tmpbuf); (void) strlcpy(buf, tmpbuf, bufsize); break; case AGGR_X_STATE: - if (is_port) - (void) get_linkstate(dpa.dp_dev, B_FALSE, tmpbuf); - else - (void) get_linkstate(l->laggr_link, B_TRUE, tmpbuf); + (void) get_linkstate(objname, !is_port, tmpbuf); (void) strlcpy(buf, tmpbuf, bufsize); break; case AGGR_X_ADDRESS: @@ -2870,7 +2919,7 @@ print_xaggr_cb(ofmt_arg_t *ofarg, char *buf, uint_t bufsize) break; } err: - *stat = status; + *(l->laggr_status) = DLADM_STATUS_OK; return (B_TRUE); } @@ -2912,22 +2961,13 @@ print_lacp_cb(ofmt_arg_t *ofarg, char *buf, uint_t bufsize) int portnum; boolean_t is_port = (l->laggr_lport >= 0); dladm_aggr_port_attr_t *portp; - dladm_status_t *stat, status; aggr_lacp_state_t *lstate; - if (!is_port) { + if (!is_port) return (B_FALSE); /* cannot happen! */ - } - - stat = l->laggr_status; portnum = l->laggr_lport; portp = &(l->laggr_ginfop->lg_ports[portnum]); - - if ((status = dladm_datalink_id2info(handle, portp->lp_linkid, - NULL, NULL, NULL, buf, bufsize)) != DLADM_STATUS_OK) { - goto err; - } lstate = &(portp->lp_lacp_state); switch (ofarg->ofmt_id) { @@ -2937,10 +2977,9 @@ print_lacp_cb(ofmt_arg_t *ofarg, char *buf, uint_t bufsize) break; case AGGR_L_PORT: - /* - * buf already contains portname as a result of the - * earlier call to dladm_datalink_id2info(). - */ + if (dladm_datalink_id2info(handle, portp->lp_linkid, NULL, NULL, + NULL, buf, bufsize) != DLADM_STATUS_OK) + (void) sprintf(buf, "?"); break; case AGGR_L_AGGREGATABLE: @@ -2974,11 +3013,7 @@ print_lacp_cb(ofmt_arg_t *ofarg, char *buf, uint_t bufsize) break; } - *stat = DLADM_STATUS_OK; - return (B_TRUE); - -err: - *stat = status; + *(l->laggr_status) = DLADM_STATUS_OK; return (B_TRUE); } @@ -3309,14 +3344,9 @@ do_show_link(int argc, char *argv[], const char *use) if (optind == (argc-1)) { uint32_t f; - if (strlcpy(linkname, argv[optind], MAXLINKNAMELEN) - >= MAXLINKNAMELEN) { - (void) fprintf(stderr, - gettext("%s: link name too long\n"), - progname); - dladm_close(handle); - exit(1); - } + if (strlcpy(linkname, argv[optind], MAXLINKNAMELEN) >= + MAXLINKNAMELEN) + die("link name too long"); if ((status = dladm_name2info(handle, linkname, &linkid, &f, NULL, NULL)) != DLADM_STATUS_OK) { die_dlerr(status, "link %s is not valid", linkname); @@ -3403,7 +3433,7 @@ do_show_aggr(int argc, char *argv[], const char *use) "link,port,ipackets,rbytes,opackets,obytes,ipktdist,opktdist"; char *all_extended_fields = "link,port,speed,duplex,state,address,portstate"; - ofmt_field_t *pf; + const ofmt_field_t *pf; ofmt_handle_t ofmt; ofmt_status_t oferr; uint_t ofmtflags = 0; @@ -3747,6 +3777,338 @@ print_phys_hwgrp(show_state_t *state, datalink_id_t linkid, char *link) print_phys_hwgrp_callback)); } +/* + * Parse the "local=<laddr>,remote=<raddr>" sub-options for the -a option of + * *-iptun subcommands. + */ +static void +iptun_process_addrarg(char *addrarg, iptun_params_t *params) +{ + char *addrval; + + while (*addrarg != '\0') { + switch (getsubopt(&addrarg, iptun_addropts, &addrval)) { + case IPTUN_LOCAL: + params->iptun_param_flags |= IPTUN_PARAM_LADDR; + if (strlcpy(params->iptun_param_laddr, addrval, + sizeof (params->iptun_param_laddr)) >= + sizeof (params->iptun_param_laddr)) + die("tunnel source address is too long"); + break; + case IPTUN_REMOTE: + params->iptun_param_flags |= IPTUN_PARAM_RADDR; + if (strlcpy(params->iptun_param_raddr, addrval, + sizeof (params->iptun_param_raddr)) >= + sizeof (params->iptun_param_raddr)) + die("tunnel destination address is too long"); + break; + default: + die("invalid address type: %s", addrval); + break; + } + } +} + +/* + * Convenience routine to process iptun-create/modify/delete subcommand + * arguments. + */ +static void +iptun_process_args(int argc, char *argv[], const char *opts, + iptun_params_t *params, uint32_t *flags, char *name, const char *use) +{ + int option; + char *altroot = NULL; + + if (params != NULL) + bzero(params, sizeof (*params)); + *flags = DLADM_OPT_ACTIVE | DLADM_OPT_PERSIST; + + opterr = 0; + while ((option = getopt_long(argc, argv, opts, iptun_lopts, NULL)) != + -1) { + switch (option) { + case 'a': + iptun_process_addrarg(optarg, params); + break; + case 'R': + altroot = optarg; + break; + case 't': + *flags &= ~DLADM_OPT_PERSIST; + break; + case 'T': + params->iptun_param_type = iptun_gettypebyname(optarg); + if (params->iptun_param_type == IPTUN_TYPE_UNKNOWN) + die("unknown tunnel type: %s", optarg); + params->iptun_param_flags |= IPTUN_PARAM_TYPE; + break; + default: + die_opterr(optopt, option, use); + break; + } + } + + /* Get the required tunnel name argument. */ + if (argc - optind != 1) + usage(); + + if (strlcpy(name, argv[optind], MAXLINKNAMELEN) >= MAXLINKNAMELEN) + die("tunnel name is too long"); + + if (altroot != NULL) + altroot_cmd(altroot, argc, argv); +} + +static void +do_create_iptun(int argc, char *argv[], const char *use) +{ + iptun_params_t params; + dladm_status_t status; + uint32_t flags; + char name[MAXLINKNAMELEN]; + + iptun_process_args(argc, argv, ":a:R:tT:", ¶ms, &flags, name, + use); + + status = dladm_iptun_create(handle, name, ¶ms, flags); + if (status != DLADM_STATUS_OK) + die_dlerr(status, "could not create tunnel"); +} + +static void +do_delete_iptun(int argc, char *argv[], const char *use) +{ + uint32_t flags; + datalink_id_t linkid; + dladm_status_t status; + char name[MAXLINKNAMELEN]; + + iptun_process_args(argc, argv, ":R:t", NULL, &flags, name, use); + + status = dladm_name2info(handle, name, &linkid, NULL, NULL, NULL); + if (status != DLADM_STATUS_OK) + die_dlerr(status, "could not delete tunnel"); + status = dladm_iptun_delete(handle, linkid, flags); + if (status != DLADM_STATUS_OK) + die_dlerr(status, "could not delete tunnel"); +} + +static void +do_modify_iptun(int argc, char *argv[], const char *use) +{ + iptun_params_t params; + uint32_t flags; + dladm_status_t status; + char name[MAXLINKNAMELEN]; + + iptun_process_args(argc, argv, ":a:R:t", ¶ms, &flags, name, use); + + if ((status = dladm_name2info(handle, name, ¶ms.iptun_param_linkid, + NULL, NULL, NULL)) != DLADM_STATUS_OK) + die_dlerr(status, "could not modify tunnel"); + status = dladm_iptun_modify(handle, ¶ms, flags); + if (status != DLADM_STATUS_OK) + die_dlerr(status, "could not modify tunnel"); +} + +static void +do_show_iptun(int argc, char *argv[], const char *use) +{ + char option; + datalink_id_t linkid; + uint32_t flags = DLADM_OPT_ACTIVE; + char *name = NULL; + dladm_status_t status; + const char *fields_str = NULL; + show_state_t state; + ofmt_handle_t ofmt; + ofmt_status_t oferr; + uint_t ofmtflags = 0; + + bzero(&state, sizeof (state)); + opterr = 0; + while ((option = getopt_long(argc, argv, ":pPo:", + iptun_lopts, NULL)) != -1) { + switch (option) { + case 'o': + fields_str = optarg; + break; + case 'p': + state.ls_parsable = B_TRUE; + ofmtflags = OFMT_PARSABLE; + break; + case 'P': + flags = DLADM_OPT_PERSIST; + break; + default: + die_opterr(optopt, option, use); + break; + } + } + + /* + * Get the optional tunnel name argument. If there is one, it must + * be the last thing remaining on the command-line. + */ + if (argc - optind > 1) + die(gettext(use)); + if (argc - optind == 1) + name = argv[optind]; + + oferr = ofmt_open(fields_str, iptun_fields, ofmtflags, + DLADM_DEFAULT_COL, &ofmt); + dladm_ofmt_check(oferr, state.ls_parsable, ofmt); + + state.ls_ofmt = ofmt; + state.ls_flags = flags; + + if (name == NULL) { + (void) dladm_walk_datalink_id(print_iptun_walker, handle, + &state, DATALINK_CLASS_IPTUN, DATALINK_ANY_MEDIATYPE, + flags); + status = state.ls_status; + } else { + if ((status = dladm_name2info(handle, name, &linkid, NULL, NULL, + NULL)) == DLADM_STATUS_OK) + status = print_iptun(handle, linkid, &state); + } + + if (status != DLADM_STATUS_OK) + die_dlerr(status, "unable to obtain tunnel status"); +} + +/* ARGSUSED */ +static void +do_up_iptun(int argc, char *argv[], const char *use) +{ + datalink_id_t linkid = DATALINK_ALL_LINKID; + dladm_status_t status = DLADM_STATUS_OK; + + /* + * Get the optional tunnel name argument. If there is one, it must + * be the last thing remaining on the command-line. + */ + if (argc - optind > 1) + usage(); + if (argc - optind == 1) { + status = dladm_name2info(handle, argv[optind], &linkid, NULL, + NULL, NULL); + } + if (status == DLADM_STATUS_OK) + status = dladm_iptun_up(handle, linkid); + if (status != DLADM_STATUS_OK) + die_dlerr(status, "unable to configure IP tunnel links"); +} + +/* ARGSUSED */ +static void +do_down_iptun(int argc, char *argv[], const char *use) +{ + datalink_id_t linkid = DATALINK_ALL_LINKID; + dladm_status_t status = DLADM_STATUS_OK; + + /* + * Get the optional tunnel name argument. If there is one, it must + * be the last thing remaining on the command-line. + */ + if (argc - optind > 1) + usage(); + if (argc - optind == 1) { + status = dladm_name2info(handle, argv[optind], &linkid, NULL, + NULL, NULL); + } + if (status == DLADM_STATUS_OK) + status = dladm_iptun_down(handle, linkid); + if (status != DLADM_STATUS_OK) + die_dlerr(status, "unable to bring down IP tunnel links"); +} + +static iptun_type_t +iptun_gettypebyname(char *typestr) +{ + int i; + + for (i = 0; iptun_types[i].type_name != NULL; i++) { + if (strncmp(iptun_types[i].type_name, typestr, + strlen(iptun_types[i].type_name)) == 0) { + return (iptun_types[i].type_value); + } + } + return (IPTUN_TYPE_UNKNOWN); +} + +static const char * +iptun_gettypebyvalue(iptun_type_t type) +{ + int i; + + for (i = 0; iptun_types[i].type_name != NULL; i++) { + if (iptun_types[i].type_value == type) + return (iptun_types[i].type_name); + } + return (NULL); +} + +static dladm_status_t +print_iptun(dladm_handle_t dh, datalink_id_t linkid, show_state_t *state) +{ + dladm_status_t status; + iptun_params_t params; + iptun_fields_buf_t lbuf; + const char *laddr; + const char *raddr; + + params.iptun_param_linkid = linkid; + status = dladm_iptun_getparams(dh, ¶ms, state->ls_flags); + if (status != DLADM_STATUS_OK) + return (status); + + /* LINK */ + status = dladm_datalink_id2info(dh, linkid, NULL, NULL, NULL, + lbuf.iptun_name, sizeof (lbuf.iptun_name)); + if (status != DLADM_STATUS_OK) + return (status); + + /* TYPE */ + (void) strlcpy(lbuf.iptun_type, + iptun_gettypebyvalue(params.iptun_param_type), + sizeof (lbuf.iptun_type)); + + /* FLAGS */ + (void) memset(lbuf.iptun_flags, '-', IPTUN_NUM_FLAGS); + lbuf.iptun_flags[IPTUN_NUM_FLAGS] = '\0'; + if (params.iptun_param_flags & IPTUN_PARAM_IPSECPOL) + lbuf.iptun_flags[IPTUN_SFLAG_INDEX] = 's'; + if (params.iptun_param_flags & IPTUN_PARAM_IMPLICIT) + lbuf.iptun_flags[IPTUN_IFLAG_INDEX] = 'i'; + + /* LOCAL */ + if (params.iptun_param_flags & IPTUN_PARAM_LADDR) + laddr = params.iptun_param_laddr; + else + laddr = (state->ls_parsable) ? "" : "--"; + (void) strlcpy(lbuf.iptun_laddr, laddr, sizeof (lbuf.iptun_laddr)); + + /* REMOTE */ + if (params.iptun_param_flags & IPTUN_PARAM_RADDR) + raddr = params.iptun_param_raddr; + else + raddr = (state->ls_parsable) ? "" : "--"; + (void) strlcpy(lbuf.iptun_raddr, raddr, sizeof (lbuf.iptun_raddr)); + + ofmt_print(state->ls_ofmt, &lbuf); + + return (DLADM_STATUS_OK); +} + +static int +print_iptun_walker(dladm_handle_t dh, datalink_id_t linkid, void *arg) +{ + ((show_state_t *)arg)->ls_status = print_iptun(dh, linkid, arg); + return (DLADM_WALK_CONTINUE); +} + static dladm_status_t print_phys(show_state_t *state, datalink_id_t linkid) { @@ -3868,7 +4230,7 @@ do_show_phys(int argc, char *argv[], const char *use) char *all_mac_fields = "link,slot,address,inuse,client"; char *all_hwgrp_fields = "link,group,grouptype,rings,clients"; - ofmt_field_t *pf; + const ofmt_field_t *pf; ofmt_handle_t ofmt; ofmt_status_t oferr; uint_t ofmtflags = 0; @@ -4414,7 +4776,7 @@ print_vnic(show_vnic_state_t *state, datalink_id_t linkid) if (!is_etherstub && dladm_datalink_id2info(handle, vnic->va_link_id, NULL, NULL, NULL, devname, sizeof (devname)) != DLADM_STATUS_OK) - return (DLADM_STATUS_BADARG); + (void) sprintf(devname, "?"); state->vs_found = B_TRUE; if (state->vs_stats) { @@ -4517,7 +4879,7 @@ do_show_vnic_common(int argc, char *argv[], const char *use, dladm_status_t status; boolean_t o_arg = B_FALSE; char *fields_str = NULL; - ofmt_field_t *pf; + const ofmt_field_t *pf; char *all_e_fields = "link"; ofmt_handle_t ofmt; ofmt_status_t oferr; @@ -6401,7 +6763,7 @@ done: dladm_free_props(proplist); if (status != DLADM_STATUS_OK) { dladm_close(handle); - exit(1); + exit(EXIT_FAILURE); } } @@ -6803,7 +7165,7 @@ do_delete_secobj(int argc, char **argv, const char *use) if (status != DLADM_STATUS_OK || pstatus != DLADM_STATUS_OK) { dladm_close(handle); - exit(1); + exit(EXIT_FAILURE); } } @@ -8265,7 +8627,7 @@ altroot_cmd(char *altroot, int argc, char *argv[]) (void) fprintf(fp, "%s\n", SMF_DLADM_UPGRADE_MSG); (void) fclose(fp); dladm_close(handle); - exit(0); + exit(EXIT_SUCCESS); } /* diff --git a/usr/src/cmd/dladm/dladm.xcl b/usr/src/cmd/dladm/dladm.xcl index 44dc98a1e1..6a35f108da 100644 --- a/usr/src/cmd/dladm/dladm.xcl +++ b/usr/src/cmd/dladm/dladm.xcl @@ -90,12 +90,15 @@ msgid "0x" msgid "100M" msgid "10M" msgid "1G" +msgid "6to4" msgid ": %s\n" msgid ":L:l:P:R:tu:T:" msgid ":LpPxsi:o:" msgid ":R:" msgid ":R:t" msgid ":a" +msgid ":a:R:t" +msgid ":a:R:tT:" msgid ":d:l:L:P:R:tfu:T:" msgid ":d:l:R:t" msgid ":d:l:R:tf" @@ -167,6 +170,7 @@ msgid "LACPTIMER" msgid "LINK" msgid "LINKID" msgid "LINK\n" +msgid "LOCAL" msgid "MACADDRESS" msgid "MACADDRTYPE" msgid "MAXAGE" @@ -199,6 +203,7 @@ msgid "PRIORITY" msgid "PROPERTY" msgid "PROTECT" msgid "PTYPE" +msgid "REMOTE" msgid "RBYTES" msgid "RECV" msgid "REM_FAULT" @@ -223,6 +228,7 @@ msgid "TCHANGE" msgid "TCNBPDU" msgid "TCTIME" msgid "TXBPDU" +msgid "TYPE" msgid "Total" msgid "UNKNOWN" msgid "UPTIME" @@ -287,6 +293,7 @@ msgid "create-aggr" msgid "create-bridge" msgid "create-etherstub" msgid "create-ibss" +msgid "create-iptun" msgid "create-secobj" msgid "create-simnet" msgid "create-vlan" @@ -297,6 +304,7 @@ msgid "defaulted" msgid "delete-aggr" msgid "delete-bridge" msgid "delete-etherstub" +msgid "delete-iptun" msgid "delete-phys" msgid "delete-secobj" msgid "delete-simnet" @@ -307,6 +315,7 @@ msgid "dev" msgid "device" msgid "disconnect-wifi" msgid "dist" +msgid "down-iptun" msgid "down-vnic" msgid "drops" msgid "duplex" @@ -338,6 +347,8 @@ msgid "interval" msgid "inuse" msgid "ipackets" msgid "ipktdist" +msgid "ipv4" +msgid "ipv6" msgid "key" msgid "lacp" msgid "lacp-mode" @@ -376,6 +387,7 @@ msgid "link_autoneg" msgid "link_duplex" msgid "link_pause" msgid "link_state" +msgid "local" msgid "lp_cap_10" msgid "lp_cap_100" msgid "lp_cap_1000" @@ -391,6 +403,7 @@ msgid "media" msgid "mode" msgid "modify-aggr" msgid "modify-bridge" +msgid "modify-iptun" msgid "modify-simnet" msgid "mtu" msgid "nick,flags,link,nexthop" @@ -428,6 +441,7 @@ msgid "random" msgid "rbytes" msgid "recv" msgid "rem_fault" +msgid "remote" msgid "remove-aggr" msgid "remove-bridge" msgid "rename-link" @@ -444,6 +458,7 @@ msgid "show-bridge" msgid "show-dev" msgid "show-ether" msgid "show-etherstub" +msgid "show-iptun" msgid "show-link" msgid "show-linkmap" msgid "show-linkprop" @@ -470,10 +485,12 @@ msgid "tdps:e:f:" msgid "temporary" msgid "timeout" msgid "trill" +msgid "tunnel-type" msgid "tx" msgid "unicast" msgid "unknown" msgid "up-aggr" +msgid "up-iptun" msgid "up-simnet" msgid "up-vlan" msgid "up-vnic" diff --git a/usr/src/cmd/dlmgmtd/Makefile b/usr/src/cmd/dlmgmtd/Makefile index 2df2508f80..1eb82565c9 100644 --- a/usr/src/cmd/dlmgmtd/Makefile +++ b/usr/src/cmd/dlmgmtd/Makefile @@ -38,14 +38,22 @@ ROOTCFGFILES= $(CFGFILES:%=$(ROOTCFGDIR)/%) $(ROOTCFGDIR)/datalink.conf := FILEMODE= 644 -LDLIBS += -ldladm -ldlpi -lavl -lnvpair -lsysevent +LDLIBS += -ldladm -ldlpi -lavl -lnvpair -lsysevent -lcontract + +# +# Instrument dlmgmtd with CTF data to ease debugging. +# +CTFCONVERT_HOOK = && $(CTFCONVERT_O) +CTFMERGE_HOOK = && $(CTFMERGE) -L VERSION -o $@ $(OBJS) +$(OBJS) := CFLAGS += $(CTF_FLAGS) + .KEEP_STATE: all: $(PROG) $(PROG): $(OBJS) - $(LINK.c) -o $@ $(OBJS) $(LDLIBS) + $(LINK.c) -o $@ $(OBJS) $(LDLIBS) $(CTFMERGE_HOOK) $(POST_PROCESS) install: all $(ROOTSBINPROG) $(ROOTMANIFEST) $(ROOTSVCMETHOD) $(ROOTCFGDIR) \ diff --git a/usr/src/cmd/dlmgmtd/dlmgmt_db.c b/usr/src/cmd/dlmgmtd/dlmgmt_db.c index e65722613b..807b912d1e 100644 --- a/usr/src/cmd/dlmgmtd/dlmgmt_db.c +++ b/usr/src/cmd/dlmgmtd/dlmgmt_db.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -30,11 +30,19 @@ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> +#include <string.h> #include <strings.h> #include <syslog.h> +#include <zone.h> +#include <sys/types.h> #include <sys/stat.h> +#include <stropts.h> +#include <sys/conf.h> #include <pthread.h> #include <unistd.h> +#include <wait.h> +#include <libcontract.h> +#include <sys/contract/process.h> #include "dlmgmt_impl.h" typedef enum dlmgmt_db_op { @@ -46,7 +54,9 @@ typedef enum dlmgmt_db_op { typedef struct dlmgmt_db_req_s { struct dlmgmt_db_req_s *ls_next; dlmgmt_db_op_t ls_op; + char ls_link[MAXLINKNAMELEN]; datalink_id_t ls_linkid; + zoneid_t ls_zoneid; uint32_t ls_flags; /* Either DLMGMT_ACTIVE or */ /* DLMGMT_PERSIST, not both. */ } dlmgmt_db_req_t; @@ -57,19 +67,28 @@ typedef struct dlmgmt_db_req_s { static dlmgmt_db_req_t *dlmgmt_db_req_head = NULL; static dlmgmt_db_req_t *dlmgmt_db_req_tail = NULL; -static int dlmgmt_db_update(dlmgmt_db_op_t, datalink_id_t, - uint32_t); +/* + * rewrite_needed is set to B_TRUE by process_link_line() if it encounters a + * line with an old format. This will cause the file being read to be + * re-written with the current format. + */ +static boolean_t rewrite_needed; + +static int dlmgmt_db_update(dlmgmt_db_op_t, const char *, + dlmgmt_link_t *, uint32_t); static int dlmgmt_process_db_req(dlmgmt_db_req_t *); static int dlmgmt_process_db_onereq(dlmgmt_db_req_t *, boolean_t); static void *dlmgmt_db_update_thread(void *); -static boolean_t process_link_line(char *, dlmgmt_link_t **); +static boolean_t process_link_line(char *, dlmgmt_link_t *); static int process_db_write(dlmgmt_db_req_t *, FILE *, FILE *); -static int process_db_read(dlmgmt_db_req_t *, FILE *, FILE *); +static int process_db_read(dlmgmt_db_req_t *, FILE *); static void generate_link_line(dlmgmt_link_t *, boolean_t, char *); #define BUFLEN(lim, ptr) (((lim) > (ptr)) ? ((lim) - (ptr)) : 0) #define MAXLINELEN 1024 +typedef void db_walk_func_t(dlmgmt_link_t *); + /* * Translator functions to go from dladm_datatype_t to character strings. * Each function takes a pointer to a buffer, the size of the buffer, @@ -127,13 +146,262 @@ static size_t ntranslators = sizeof (translators) / sizeof (translator_t); /* * Name of the cache file to keep the active <link name, linkid> mapping */ -static char cachefile[MAXPATHLEN]; +char cachefile[MAXPATHLEN]; #define DLMGMT_PERSISTENT_DB_PATH "/etc/dladm/datalink.conf" #define DLMGMT_MAKE_FILE_DB_PATH(buffer, persistent) \ (void) snprintf((buffer), MAXPATHLEN, "%s", \ (persistent) ? DLMGMT_PERSISTENT_DB_PATH : cachefile); +typedef struct zopen_arg { + const char *zopen_modestr; + int *zopen_pipe; + int zopen_fd; +} zopen_arg_t; + +typedef struct zrename_arg { + const char *zrename_newname; +} zrename_arg_t; + +typedef union zfoparg { + zopen_arg_t zfop_openarg; + zrename_arg_t zfop_renamearg; +} zfoparg_t; + +typedef struct zfcbarg { + boolean_t zfarg_inglobalzone; /* is callback in global zone? */ + zoneid_t zfarg_finglobalzone; /* is file in global zone? */ + const char *zfarg_filename; + zfoparg_t *zfarg_oparg; +} zfarg_t; +#define zfarg_openarg zfarg_oparg->zfop_openarg +#define zfarg_renamearg zfarg_oparg->zfop_renamearg + +/* zone file callback */ +typedef int zfcb_t(zfarg_t *); + +/* + * Execute an operation on filename relative to zoneid's zone root. If the + * file is in the global zone, then the zfcb() callback will simply be called + * directly. If the file is in a non-global zone, then zfcb() will be called + * both from the global zone's context, and from the non-global zone's context + * (from a fork()'ed child that has entered the non-global zone). This is + * done to allow the callback to communicate with itself if needed (e.g. to + * pass back the file descriptor of an opened file). + */ +static int +dlmgmt_zfop(const char *filename, zoneid_t zoneid, zfcb_t *zfcb, + zfoparg_t *zfoparg) +{ + int ctfd; + int err; + pid_t childpid; + siginfo_t info; + zfarg_t zfarg; + + if (zoneid != GLOBAL_ZONEID) { + /* + * We need to access a file that isn't in the global zone. + * Accessing non-global zone files from the global zone is + * unsafe (due to symlink attacks), we'll need to fork a child + * that enters the zone in question and executes the callback + * that will operate on the file. + * + * Before we proceed with this zone tango, we need to create a + * new process contract for the child, as required by + * zone_enter(). + */ + errno = 0; + ctfd = open64("/system/contract/process/template", O_RDWR); + if (ctfd == -1) + return (errno); + if ((err = ct_tmpl_set_critical(ctfd, 0)) != 0 || + (err = ct_tmpl_set_informative(ctfd, 0)) != 0 || + (err = ct_pr_tmpl_set_fatal(ctfd, CT_PR_EV_HWERR)) != 0 || + (err = ct_pr_tmpl_set_param(ctfd, CT_PR_PGRPONLY)) != 0 || + (err = ct_tmpl_activate(ctfd)) != 0) { + return (err); + } + childpid = fork(); + (void) ct_tmpl_clear(ctfd); + (void) close(ctfd); + switch (childpid) { + case -1: + return (err); + case 0: + /* + * Elevate our privileges as zone_enter() requires all + * privileges. + */ + if ((err = dlmgmt_elevate_privileges()) != 0) + _exit(err); + if (zone_enter(zoneid) == -1) + _exit(errno); + if ((err = dlmgmt_drop_privileges()) != 0) + _exit(err); + break; + default: + if (waitid(P_PID, childpid, &info, WEXITED) == -1) + return (errno); + if (info.si_status != 0) + return (info.si_status); + } + } + + zfarg.zfarg_inglobalzone = (zoneid == GLOBAL_ZONEID || childpid != 0); + zfarg.zfarg_finglobalzone = (zoneid == GLOBAL_ZONEID); + zfarg.zfarg_filename = filename; + zfarg.zfarg_oparg = zfoparg; + err = zfcb(&zfarg); + if (!zfarg.zfarg_inglobalzone) + _exit(err); + return (err); +} + +static int +dlmgmt_zopen_cb(zfarg_t *zfarg) +{ + struct strrecvfd recvfd; + boolean_t newfile = B_FALSE; + boolean_t inglobalzone = zfarg->zfarg_inglobalzone; + zoneid_t finglobalzone = zfarg->zfarg_finglobalzone; + const char *filename = zfarg->zfarg_filename; + const char *modestr = zfarg->zfarg_openarg.zopen_modestr; + int *p = zfarg->zfarg_openarg.zopen_pipe; + struct stat statbuf; + int oflags; + mode_t mode; + int fd = -1; + int err; + + /* We only ever open a file for reading or writing, not both. */ + oflags = (modestr[0] == 'r') ? O_RDONLY : O_WRONLY | O_CREAT | O_TRUNC; + mode = (modestr[0] == 'r') ? 0 : S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; + + /* Open the file if we're in the same zone as the file. */ + if (inglobalzone == finglobalzone) { + /* + * First determine if we will be creating the file as part of + * opening it. If so, then we'll need to ensure that it has + * the proper ownership after having opened it. + */ + if (oflags & O_CREAT) { + if (stat(filename, &statbuf) == -1) { + if (errno == ENOENT) + newfile = B_TRUE; + else + return (errno); + } + } + if ((fd = open(filename, oflags, mode)) == -1) + return (errno); + if (newfile) { + if (chown(filename, UID_DLADM, GID_SYS) == -1) { + err = errno; + (void) close(fd); + return (err); + } + } + } + + /* + * If we're not in the global zone, send the file-descriptor back to + * our parent in the global zone. + */ + if (!inglobalzone) { + assert(!finglobalzone); + assert(fd != -1); + return (ioctl(p[1], I_SENDFD, fd) == -1 ? errno : 0); + } + + /* + * At this point, we know we're in the global zone. If the file was + * in a non-global zone, receive the file-descriptor from our child in + * the non-global zone. + */ + if (!finglobalzone) { + if (ioctl(p[0], I_RECVFD, &recvfd) == -1) + return (errno); + fd = recvfd.fd; + } + + zfarg->zfarg_openarg.zopen_fd = fd; + return (0); +} + +static int +dlmgmt_zunlink_cb(zfarg_t *zfarg) +{ + if (zfarg->zfarg_inglobalzone != zfarg->zfarg_finglobalzone) + return (0); + return (unlink(zfarg->zfarg_filename) == 0 ? 0 : errno); +} + +static int +dlmgmt_zrename_cb(zfarg_t *zfarg) +{ + if (zfarg->zfarg_inglobalzone != zfarg->zfarg_finglobalzone) + return (0); + return (rename(zfarg->zfarg_filename, + zfarg->zfarg_renamearg.zrename_newname) == 0 ? 0 : errno); +} + +/* + * Same as fopen(3C), except that it opens the file relative to zoneid's zone + * root. + */ +static FILE * +dlmgmt_zfopen(const char *filename, const char *modestr, zoneid_t zoneid, + int *err) +{ + int p[2]; + zfoparg_t zfoparg; + FILE *fp = NULL; + + if (zoneid != GLOBAL_ZONEID && pipe(p) == -1) { + *err = errno; + return (NULL); + } + + zfoparg.zfop_openarg.zopen_modestr = modestr; + zfoparg.zfop_openarg.zopen_pipe = p; + *err = dlmgmt_zfop(filename, zoneid, dlmgmt_zopen_cb, &zfoparg); + if (zoneid != GLOBAL_ZONEID) { + (void) close(p[0]); + (void) close(p[1]); + } + if (*err == 0) { + fp = fdopen(zfoparg.zfop_openarg.zopen_fd, modestr); + if (fp == NULL) { + *err = errno; + (void) close(zfoparg.zfop_openarg.zopen_fd); + } + } + return (fp); +} + +/* + * Same as rename(2), except that old and new are relative to zoneid's zone + * root. + */ +static int +dlmgmt_zrename(const char *old, const char *new, zoneid_t zoneid) +{ + zfoparg_t zfoparg; + + zfoparg.zfop_renamearg.zrename_newname = new; + return (dlmgmt_zfop(old, zoneid, dlmgmt_zrename_cb, &zfoparg)); +} + +/* + * Same as unlink(2), except that filename is relative to zoneid's zone root. + */ +static int +dlmgmt_zunlink(const char *filename, zoneid_t zoneid) +{ + return (dlmgmt_zfop(filename, zoneid, dlmgmt_zunlink_cb, NULL)); +} + static size_t write_str(char *buffer, size_t buffer_length, char *name, void *value) { @@ -237,24 +505,41 @@ read_int64(char *buffer, void **value) return (sizeof (int64_t)); } +static dlmgmt_db_req_t * +dlmgmt_db_req_alloc(dlmgmt_db_op_t op, const char *linkname, + datalink_id_t linkid, zoneid_t zoneid, uint32_t flags, int *err) +{ + dlmgmt_db_req_t *req; + + if ((req = calloc(1, sizeof (dlmgmt_db_req_t))) == NULL) { + *err = errno; + } else { + req->ls_op = op; + if (linkname != NULL) + (void) strlcpy(req->ls_link, linkname, MAXLINKNAMELEN); + req->ls_linkid = linkid; + req->ls_zoneid = zoneid; + req->ls_flags = flags; + } + return (req); +} + +/* + * Update the db entry with name "entryname" using information from "linkp". + */ static int -dlmgmt_db_update(dlmgmt_db_op_t op, datalink_id_t linkid, uint32_t flags) +dlmgmt_db_update(dlmgmt_db_op_t op, const char *entryname, dlmgmt_link_t *linkp, + uint32_t flags) { dlmgmt_db_req_t *req; int err; - /* - * It is either a persistent request or an active request, not both. - */ + /* It is either a persistent request or an active request, not both. */ assert((flags == DLMGMT_PERSIST) || (flags == DLMGMT_ACTIVE)); - if ((req = malloc(sizeof (dlmgmt_db_req_t))) == NULL) - return (ENOMEM); - - req->ls_next = NULL; - req->ls_op = op; - req->ls_linkid = linkid; - req->ls_flags = flags; + if ((req = dlmgmt_db_req_alloc(op, entryname, linkp->ll_linkid, + linkp->ll_zoneid, flags, &err)) == NULL) + return (err); /* * If the return error is EINPROGRESS, this request is handled @@ -297,15 +582,11 @@ dlmgmt_process_db_req(dlmgmt_db_req_t *req) } err = dlmgmt_process_db_onereq(req, writeop); - if (err != EINPROGRESS && err != 0 && - (req->ls_flags != DLMGMT_ACTIVE || errno != ENOENT)) { - + if (err != EINPROGRESS && err != 0 && err != ENOENT) { /* - * Log the error unless the request processing: - * - is successful; - * - is still in progress; - * - has failed with ENOENT because the active configuration - * file is not created yet; + * Log the error unless the request processing is still in + * progress or if the configuration file hasn't been created + * yet (ENOENT). */ dlmgmt_log(LOG_WARNING, "dlmgmt_process_db_onereq() %s " "operation on %s configuration failed: %s", @@ -331,79 +612,44 @@ dlmgmt_process_db_onereq(dlmgmt_db_req_t *req, boolean_t writeop) FILE *fp, *nfp = NULL; char file[MAXPATHLEN]; char newfile[MAXPATHLEN]; - int nfd; DLMGMT_MAKE_FILE_DB_PATH(file, (req->ls_flags == DLMGMT_PERSIST)); - if ((fp = fopen(file, (writeop ? "r+" : "r"))) == NULL) { - if (writeop && errno == EROFS) { - /* - * This can happen at boot when the file system is - * read-only. So add this request to the pending - * request list and start a retry thread. - */ - return (EINPROGRESS); - } else if (req->ls_flags == DLMGMT_ACTIVE && errno == ENOENT) { - /* - * It is fine if the file keeping active configuration - * does not exist. This happens during a new reboot. - */ - if (!writeop) - return (ENOENT); - /* - * If this is an update request for the active - * configuration, create the file. - */ - if ((fp = fopen(file, "w")) == NULL) - return (errno == EROFS ? EINPROGRESS : errno); - } else { - return (errno); - } - } + fp = dlmgmt_zfopen(file, "r", req->ls_zoneid, &err); + /* + * Note that it is not an error if the file doesn't exist. If we're + * reading, we treat this case the same way as an empty file. If + * we're writing, the file will be created when we open the file for + * writing below. + */ + if (fp == NULL && !writeop) + return (err); if (writeop) { (void) snprintf(newfile, MAXPATHLEN, "%s.new", file); - if ((nfd = open(newfile, O_WRONLY | O_CREAT | O_TRUNC, - S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) < 0) { - err = errno; - (void) fclose(fp); - return (err); - } - - if ((nfp = fdopen(nfd, "w")) == NULL) { - err = errno; - (void) close(nfd); - (void) fclose(fp); - (void) unlink(newfile); - return (err); + nfp = dlmgmt_zfopen(newfile, "w", req->ls_zoneid, &err); + if (nfp == NULL) { + /* + * EROFS can happen at boot when the file system is + * read-only. Return EINPROGRESS so that the caller + * can add this request to the pending request list + * and start a retry thread. + */ + err = (errno == EROFS ? EINPROGRESS : errno); + goto done; } } - if (writeop) - err = process_db_write(req, fp, nfp); - else - err = process_db_read(req, fp, nfp); - if (!writeop || err != 0) - goto done; - - if (fflush(nfp) == EOF) { - err = errno; - goto done; - } - (void) fclose(fp); - (void) fclose(nfp); - - if (rename(newfile, file) < 0) { - err = errno; - (void) unlink(newfile); - return (err); + if (writeop) { + if ((err = process_db_write(req, fp, nfp)) == 0) + err = dlmgmt_zrename(newfile, file, req->ls_zoneid); + } else { + err = process_db_read(req, fp); } - return (0); - done: if (nfp != NULL) { (void) fclose(nfp); if (err != 0) - (void) unlink(newfile); + (void) dlmgmt_zunlink(newfile, req->ls_zoneid); } (void) fclose(fp); return (err); @@ -414,15 +660,13 @@ static void * dlmgmt_db_update_thread(void *arg) { dlmgmt_db_req_t *req; - int err = 0; dlmgmt_table_lock(B_TRUE); assert(dlmgmt_db_req_head != NULL); while ((req = dlmgmt_db_req_head) != NULL) { assert(req->ls_flags == DLMGMT_PERSIST); - err = dlmgmt_process_db_onereq(req, B_TRUE); - if (err == EINPROGRESS) { + if (dlmgmt_process_db_onereq(req, B_TRUE) == EINPROGRESS) { /* * The filesystem is still read only. Go to sleep and * try again. @@ -494,7 +738,11 @@ parse_linkprops(char *buf, dlmgmt_link_t *linkp) if (c == '=') goto parse_fail; - if (strcmp(attr_name, "name") == 0) { + if (strcmp(attr_name, "linkid") == 0) { + (void) read_int64(curr, &attr_buf); + linkp->ll_linkid = + (datalink_class_t)*(int64_t *)attr_buf; + } else if (strcmp(attr_name, "name") == 0) { (void) read_str(curr, &attr_buf); (void) snprintf(linkp->ll_link, MAXLINKNAMELEN, "%s", attr_buf); @@ -553,12 +801,14 @@ parse_fail: } static boolean_t -process_link_line(char *buf, dlmgmt_link_t **linkpp) +process_link_line(char *buf, dlmgmt_link_t *linkp) { - dlmgmt_link_t *linkp; - int i, len, llen; - char *str, *lasts; - char tmpbuf[MAXLINELEN]; + int i, len, llen; + char *str, *lasts; + char tmpbuf[MAXLINELEN]; + + bzero(linkp, sizeof (*linkp)); + linkp->ll_linkid = DATALINK_INVALID_LINKID; /* * Use a copy of buf for parsing so that we can do whatever we want. @@ -573,24 +823,33 @@ process_link_line(char *buf, dlmgmt_link_t **linkpp) if (!isspace(tmpbuf[i])) break; } - if (i == len || tmpbuf[i] == '#') { - *linkpp = NULL; + if (i == len || tmpbuf[i] == '#') return (B_TRUE); - } - - linkp = calloc(1, sizeof (dlmgmt_link_t)); - if (linkp == NULL) - goto fail; str = tmpbuf + i; /* - * Find the link id and assign it to the link structure. + * Find the link name and assign it to the link structure. */ if (strtok_r(str, " \n\t", &lasts) == NULL) goto fail; llen = strlen(str); - linkp->ll_linkid = atoi(str); + /* + * Note that a previous version of the persistent datalink.conf file + * stored the linkid as the first field. In that case, the name will + * be obtained through parse_linkprops from a property with the format + * "name=<linkname>". If we encounter such a format, we set + * rewrite_needed so that dlmgmt_db_init() can rewrite the file with + * the new format after it's done reading in the data. + */ + if (isdigit(str[0])) { + linkp->ll_linkid = atoi(str); + rewrite_needed = B_TRUE; + } else { + if (strlcpy(linkp->ll_link, str, sizeof (linkp->ll_link)) >= + sizeof (linkp->ll_link)) + goto fail; + } str += llen + 1; if (str >= tmpbuf + len) @@ -605,12 +864,9 @@ process_link_line(char *buf, dlmgmt_link_t **linkpp) if (parse_linkprops(str, linkp) < 0) goto fail; - *linkpp = linkp; return (B_TRUE); fail: - link_destroy(linkp); - /* * Delete corrupted line. */ @@ -618,20 +874,82 @@ fail: return (B_FALSE); } +/* + * Find any properties in linkp that refer to "old", and rename to "new". + * Return B_TRUE if any renaming occurred. + */ +static int +dlmgmt_attr_rename(dlmgmt_link_t *linkp, const char *old, const char *new, + boolean_t *renamed) +{ + dlmgmt_linkattr_t *attrp; + char *newval = NULL, *pname; + char valcp[MAXLINKATTRVALLEN]; + size_t newsize; + + *renamed = B_FALSE; + + if ((attrp = linkattr_find(linkp->ll_head, "linkover")) != NULL || + (attrp = linkattr_find(linkp->ll_head, "simnetpeer")) != NULL) { + if (strcmp(old, (char *)attrp->lp_val) == 0) { + newsize = strlen(new) + 1; + if ((newval = malloc(newsize)) == NULL) + return (errno); + (void) strcpy(newval, new); + free(attrp->lp_val); + attrp->lp_val = newval; + attrp->lp_sz = newsize; + *renamed = B_TRUE; + } + return (0); + } + + if ((attrp = linkattr_find(linkp->ll_head, "portnames")) == NULL) + return (0); + + /* <linkname>:[<linkname>:]... */ + if ((newval = calloc(MAXLINKATTRVALLEN, sizeof (char))) == NULL) + return (errno); + + bcopy(attrp->lp_val, valcp, sizeof (valcp)); + pname = strtok(valcp, ":"); + while (pname != NULL) { + if (strcmp(pname, old) == 0) { + (void) strcat(newval, new); + *renamed = B_TRUE; + } else { + (void) strcat(newval, pname); + } + (void) strcat(newval, ":"); + pname = strtok(NULL, ":"); + } + if (*renamed) { + free(attrp->lp_val); + attrp->lp_val = newval; + attrp->lp_sz = strlen(newval) + 1; + } else { + free(newval); + } + return (0); +} + static int process_db_write(dlmgmt_db_req_t *req, FILE *fp, FILE *nfp) { boolean_t done = B_FALSE; int err = 0; - dlmgmt_link_t *linkp, *link_in_file, link; + dlmgmt_link_t link_in_file, *linkp = NULL, *dblinkp; + boolean_t persist = (req->ls_flags == DLMGMT_PERSIST); + boolean_t writeall, rename, attr_renamed; char buf[MAXLINELEN]; - if (req->ls_op == DLMGMT_DB_OP_WRITE) { + writeall = (req->ls_linkid == DATALINK_ALL_LINKID); + + if (req->ls_op == DLMGMT_DB_OP_WRITE && !writeall) { /* * find the link in the avl tree with the given linkid. */ - link.ll_linkid = req->ls_linkid; - linkp = avl_find(&dlmgmt_id_avl, &link, NULL); + linkp = link_by_id(req->ls_linkid, req->ls_zoneid); if (linkp == NULL || (linkp->ll_flags & req->ls_flags) == 0) { /* * This link has already been changed. This could @@ -640,50 +958,77 @@ process_db_write(dlmgmt_db_req_t *req, FILE *fp, FILE *nfp) */ return (0); } + /* + * In the case of a rename, linkp's name has been updated to + * the new name, and req->ls_link is the old link name. + */ + rename = (strcmp(req->ls_link, linkp->ll_link) != 0); } + /* + * fp can be NULL if the file didn't initially exist and we're + * creating it as part of this write operation. + */ + if (fp == NULL) + goto write; + while (err == 0 && fgets(buf, sizeof (buf), fp) != NULL && process_link_line(buf, &link_in_file)) { - if (link_in_file == NULL || done) { + if (link_in_file.ll_link[0] == '\0' || done) { /* * this is a comment line or we are done updating the - * link of the given link, write the rest of lines out. + * line for the specified link, write the rest of + * lines out. */ if (fputs(buf, nfp) == EOF) err = errno; - if (link_in_file != NULL) - link_destroy(link_in_file); continue; } switch (req->ls_op) { case DLMGMT_DB_OP_WRITE: /* - * For write operations, if the linkid of the link - * read from the file does not match the id of what - * req->ll_linkid points to, write out the buffer. - * Otherwise, generate a new line. If we get to the - * end and have not seen what req->ll_linkid points - * to, write it out then. + * For write operations, we generate a new output line + * if we're either writing all links (writeall) or if + * the name of the link in the file matches the one + * we're looking for. Otherwise, we write out the + * buffer as-is. + * + * If we're doing a rename operation, ensure that any + * references to the link being renamed in link + * properties are also updated before we write + * anything. */ - if (linkp == NULL || - linkp->ll_linkid != link_in_file->ll_linkid) { - if (fputs(buf, nfp) == EOF) - err = errno; - } else { - generate_link_line(linkp, - req->ls_flags == DLMGMT_PERSIST, buf); - if (fputs(buf, nfp) == EOF) - err = errno; - done = B_TRUE; + if (writeall) { + linkp = link_by_name(link_in_file.ll_link, + req->ls_zoneid); } + if (writeall || strcmp(req->ls_link, + link_in_file.ll_link) == 0) { + generate_link_line(linkp, persist, buf); + if (!writeall && !rename) + done = B_TRUE; + } else if (rename && persist) { + dblinkp = link_by_name(link_in_file.ll_link, + req->ls_zoneid); + err = dlmgmt_attr_rename(dblinkp, req->ls_link, + linkp->ll_link, &attr_renamed); + if (err != 0) + break; + if (attr_renamed) { + generate_link_line(dblinkp, persist, + buf); + } + } + if (fputs(buf, nfp) == EOF) + err = errno; break; case DLMGMT_DB_OP_DELETE: /* * Delete is simple. If buf does not represent the * link we're deleting, write it out. */ - if (req->ls_linkid != link_in_file->ll_linkid) { + if (strcmp(req->ls_link, link_in_file.ll_link) != 0) { if (fputs(buf, nfp) == EOF) err = errno; } else { @@ -695,33 +1040,28 @@ process_db_write(dlmgmt_db_req_t *req, FILE *fp, FILE *nfp) err = EINVAL; break; } - link_destroy(link_in_file); } +write: /* - * If we get to the end of the file and have not seen what - * req->ll_linkid points to, write it out then. + * If we get to the end of the file and have not seen what linkid + * points to, write it out then. */ - if (req->ls_op == DLMGMT_DB_OP_WRITE && !done) { - generate_link_line(linkp, req->ls_flags == DLMGMT_PERSIST, buf); + if (req->ls_op == DLMGMT_DB_OP_WRITE && !writeall && !rename && !done) { + generate_link_line(linkp, persist, buf); done = B_TRUE; if (fputs(buf, nfp) == EOF) err = errno; } - if (!done) - err = ENOENT; - return (err); } -/* ARGSUSED1 */ static int -process_db_read(dlmgmt_db_req_t *req, FILE *fp, FILE *nfp) +process_db_read(dlmgmt_db_req_t *req, FILE *fp) { avl_index_t name_where, id_where; - dlmgmt_link_t *link_in_file; - dlmgmt_link_t *linkp1, *linkp2; + dlmgmt_link_t link_in_file, *newlink, *link_in_db; char buf[MAXLINELEN]; int err = 0; @@ -737,40 +1077,71 @@ process_db_read(dlmgmt_db_req_t *req, FILE *fp, FILE *nfp) /* * Skip the comment line. */ - if (link_in_file == NULL) + if (link_in_file.ll_link[0] == '\0') + continue; + + if ((req->ls_flags & DLMGMT_ACTIVE) && + link_in_file.ll_linkid == DATALINK_INVALID_LINKID) continue; - linkp1 = avl_find(&dlmgmt_name_avl, link_in_file, &name_where); - linkp2 = avl_find(&dlmgmt_id_avl, link_in_file, &id_where); - if ((linkp1 != NULL) || (linkp2 != NULL)) { + link_in_file.ll_zoneid = req->ls_zoneid; + link_in_db = avl_find(&dlmgmt_name_avl, &link_in_file, + &name_where); + if (link_in_db != NULL) { /* - * If any of the following conditions are met, this is - * a duplicate entry: - * - * 1. link2 (with the given name) and link2 (with the - * given id) are not the same link; - * 2. This is a persistent req and find the link with - * the given name and id. Note that persistent db - * is read before the active one. - * 3. Found the link with the given name and id but - * the link is already active. + * If the link in the database already has the flag + * for this request set, then the entry is a + * duplicate. If it's not a duplicate, then simply + * turn on the appropriate flag on the existing link. */ - if ((linkp1 != linkp2) || - (req->ls_flags == DLMGMT_PERSIST) || - ((linkp1->ll_flags & DLMGMT_ACTIVE) != 0)) { - dlmgmt_log(LOG_WARNING, "Duplicate link " - "entries in repository: link name %s " - "link id %i", link_in_file->ll_link, - link_in_file->ll_linkid); + if (link_in_db->ll_flags & req->ls_flags) { + dlmgmt_log(LOG_WARNING, "Duplicate links " + "in the repository: %s", + link_in_file.ll_link); } else { - linkp1->ll_flags |= DLMGMT_ACTIVE; + if (req->ls_flags & DLMGMT_PERSIST) { + /* + * Save the newly read properties into + * the existing link. + */ + assert(link_in_db->ll_head == NULL); + link_in_db->ll_head = + link_in_file.ll_head; + } + link_in_db->ll_flags |= req->ls_flags; } - link_destroy(link_in_file); } else { - avl_insert(&dlmgmt_name_avl, link_in_file, name_where); - avl_insert(&dlmgmt_id_avl, link_in_file, id_where); - dlmgmt_advance(link_in_file); - link_in_file->ll_flags |= req->ls_flags; + /* + * This is a new link. Allocate a new dlmgmt_link_t + * and add it to the trees. + */ + newlink = calloc(1, sizeof (*newlink)); + if (newlink == NULL) { + dlmgmt_log(LOG_WARNING, "Unable to allocate " + "memory to create new link %s", + link_in_file.ll_link); + continue; + } + bcopy(&link_in_file, newlink, sizeof (*newlink)); + + if (newlink->ll_linkid == DATALINK_INVALID_LINKID) + newlink->ll_linkid = dlmgmt_nextlinkid; + if (avl_find(&dlmgmt_id_avl, newlink, &id_where) != + NULL) { + link_destroy(newlink); + continue; + } + if ((req->ls_flags & DLMGMT_ACTIVE) && + link_activate(newlink) != 0) { + dlmgmt_log(LOG_WARNING, "Unable to activate %s", + newlink->ll_link); + link_destroy(newlink); + continue; + } + avl_insert(&dlmgmt_name_avl, newlink, name_where); + avl_insert(&dlmgmt_id_avl, newlink, id_where); + dlmgmt_advance(newlink); + newlink->ll_flags |= req->ls_flags; } } @@ -780,70 +1151,72 @@ process_db_read(dlmgmt_db_req_t *req, FILE *fp, FILE *nfp) /* * Generate an entry in the link database. * Each entry has this format: - * <link id> <prop0>=<type>,<val>;...;<propn>=<type>,<val>; + * <link name> <prop0>=<type>,<val>;...;<propn>=<type>,<val>; */ static void generate_link_line(dlmgmt_link_t *linkp, boolean_t persist, char *buf) { char tmpbuf[MAXLINELEN]; - char *ptr; + char *ptr = tmpbuf; char *lim = tmpbuf + MAXLINELEN; - char *name_to_write = NULL; - datalink_id_t id_to_write; dlmgmt_linkattr_t *cur_p = NULL; uint64_t u64; - ptr = tmpbuf; - id_to_write = linkp->ll_linkid; - ptr += snprintf(ptr, BUFLEN(lim, ptr), "%d\t", id_to_write); - name_to_write = linkp->ll_link; - ptr += write_str(ptr, BUFLEN(lim, ptr), "name", name_to_write); + ptr += snprintf(ptr, BUFLEN(lim, ptr), "%s\t", linkp->ll_link); + if (!persist) { + /* + * We store the linkid in the active database so that dlmgmtd + * can recover in the event that it is restarted. + */ + u64 = linkp->ll_linkid; + ptr += write_uint64(ptr, BUFLEN(lim, ptr), "linkid", &u64); + } u64 = linkp->ll_class; ptr += write_uint64(ptr, BUFLEN(lim, ptr), "class", &u64); u64 = linkp->ll_media; ptr += write_uint64(ptr, BUFLEN(lim, ptr), "media", &u64); /* - * The daemon does not keep any active link attribute. If this request - * is for active configuration, we are done. + * The daemon does not keep any active link attribute. Only store the + * attributes if this request is for persistent configuration, */ - if (!persist) - goto done; - - for (cur_p = linkp->ll_head; cur_p != NULL; cur_p = cur_p->lp_next) { - ptr += translators[cur_p->lp_type].write_func(ptr, - BUFLEN(lim, ptr), cur_p->lp_name, cur_p->lp_val); + if (persist) { + for (cur_p = linkp->ll_head; cur_p != NULL; + cur_p = cur_p->lp_next) { + ptr += translators[cur_p->lp_type].write_func(ptr, + BUFLEN(lim, ptr), cur_p->lp_name, cur_p->lp_val); + } } -done: - if (ptr > lim) - return; - (void) snprintf(buf, MAXLINELEN, "%s\n", tmpbuf); + + if (ptr <= lim) + (void) snprintf(buf, MAXLINELEN, "%s\n", tmpbuf); } int -dlmgmt_delete_db_entry(datalink_id_t linkid, uint32_t flags) +dlmgmt_delete_db_entry(dlmgmt_link_t *linkp, uint32_t flags) { - return (dlmgmt_db_update(DLMGMT_DB_OP_DELETE, linkid, flags)); + return (dlmgmt_db_update(DLMGMT_DB_OP_DELETE, linkp->ll_link, linkp, + flags)); } int -dlmgmt_write_db_entry(datalink_id_t linkid, uint32_t flags) +dlmgmt_write_db_entry(const char *entryname, dlmgmt_link_t *linkp, + uint32_t flags) { - int err; + int err; if (flags & DLMGMT_PERSIST) { - if ((err = dlmgmt_db_update(DLMGMT_DB_OP_WRITE, - linkid, DLMGMT_PERSIST)) != 0) { + if ((err = dlmgmt_db_update(DLMGMT_DB_OP_WRITE, entryname, + linkp, DLMGMT_PERSIST)) != 0) { return (err); } } if (flags & DLMGMT_ACTIVE) { - if (((err = dlmgmt_db_update(DLMGMT_DB_OP_WRITE, - linkid, DLMGMT_ACTIVE)) != 0) && - (flags & DLMGMT_PERSIST)) { - (void) dlmgmt_db_update(DLMGMT_DB_OP_DELETE, - linkid, DLMGMT_PERSIST); + if (((err = dlmgmt_db_update(DLMGMT_DB_OP_WRITE, entryname, + linkp, DLMGMT_ACTIVE)) != 0) && (flags & DLMGMT_PERSIST)) { + (void) dlmgmt_db_update(DLMGMT_DB_OP_DELETE, entryname, + linkp, DLMGMT_PERSIST); return (err); } } @@ -852,87 +1225,177 @@ dlmgmt_write_db_entry(datalink_id_t linkid, uint32_t flags) } /* + * Upgrade properties that have link IDs as values to link names. Because '.' + * is a valid linkname character, the port separater for link aggregations + * must be changed to ':'.4 + */ +static void +linkattr_upgrade(dlmgmt_linkattr_t *attrp) +{ + datalink_id_t linkid; + char *portidstr; + char portname[MAXLINKNAMELEN + 1]; + dlmgmt_link_t *linkp; + char *new_attr_val; + size_t new_attr_sz; + boolean_t upgraded = B_FALSE; + + if (strcmp(attrp->lp_name, "linkover") == 0 || + strcmp(attrp->lp_name, "simnetpeer") == 0) { + if (attrp->lp_type == DLADM_TYPE_UINT64) { + linkid = *(datalink_id_t *)attrp->lp_val; + if ((linkp = link_by_id(linkid, GLOBAL_ZONEID)) == NULL) + return; + new_attr_sz = strlen(linkp->ll_link) + 1; + if ((new_attr_val = malloc(new_attr_sz)) == NULL) + return; + (void) strcpy(new_attr_val, linkp->ll_link); + upgraded = B_TRUE; + } + } else if (strcmp(attrp->lp_name, "portnames") == 0) { + /* + * The old format for "portnames" was + * "<linkid>.[<linkid>.]...". The new format is + * "<linkname>:[<linkname>:]...". + */ + if (!isdigit(((char *)attrp->lp_val)[0])) + return; + new_attr_val = calloc(MAXLINKATTRVALLEN, sizeof (char)); + if (new_attr_val == NULL) + return; + portidstr = (char *)attrp->lp_val; + while (*portidstr != '\0') { + errno = 0; + linkid = strtol(portidstr, &portidstr, 10); + if (linkid == 0 || *portidstr != '.' || + (linkp = link_by_id(linkid, GLOBAL_ZONEID)) == + NULL) { + free(new_attr_val); + return; + } + (void) snprintf(portname, sizeof (portname), "%s:", + linkp->ll_link); + if (strlcat(new_attr_val, portname, + MAXLINKATTRVALLEN) >= MAXLINKATTRVALLEN) { + free(new_attr_val); + return; + } + /* skip the '.' delimiter */ + portidstr++; + } + new_attr_sz = strlen(new_attr_val) + 1; + upgraded = B_TRUE; + } + + if (upgraded) { + attrp->lp_type = DLADM_TYPE_STR; + attrp->lp_sz = new_attr_sz; + free(attrp->lp_val); + attrp->lp_val = new_attr_val; + } +} + +static void +dlmgmt_db_upgrade(dlmgmt_link_t *linkp) +{ + dlmgmt_linkattr_t *attrp; + + for (attrp = linkp->ll_head; attrp != NULL; attrp = attrp->lp_next) + linkattr_upgrade(attrp); +} + +static void +dlmgmt_db_phys_activate(dlmgmt_link_t *linkp) +{ + linkp->ll_flags |= DLMGMT_ACTIVE; + (void) dlmgmt_write_db_entry(linkp->ll_link, linkp, DLMGMT_ACTIVE); +} + +static void +dlmgmt_db_walk(zoneid_t zoneid, datalink_class_t class, db_walk_func_t *func) +{ + dlmgmt_link_t *linkp; + + for (linkp = avl_first(&dlmgmt_id_avl); linkp != NULL; + linkp = AVL_NEXT(&dlmgmt_id_avl, linkp)) { + if (linkp->ll_zoneid == zoneid && (linkp->ll_class & class)) + func(linkp); + } +} + +/* * Initialize the datalink <link name, linkid> mapping and the link's * attributes list based on the configuration file /etc/dladm/datalink.conf * and the active configuration cache file * /etc/svc/volatile/dladm/datalink-management:default.cache. - * - * This function is called when the datalink-management service is started - * during reboot, and when the dlmgmtd daemon is restarted. */ int -dlmgmt_db_init() +dlmgmt_db_init(zoneid_t zoneid) { - char filename[MAXPATHLEN]; - dlmgmt_db_req_t req; + dlmgmt_db_req_t *req; int err; - dlmgmt_link_t *linkp; - char *fmri, *c; + boolean_t boot = B_FALSE; - /* - * First derive the name of the cache file from the FMRI name. This - * cache name is used to keep active datalink configuration. - */ - if (debug) { - (void) snprintf(cachefile, MAXPATHLEN, "%s/%s%s", - DLMGMT_TMPFS_DIR, progname, ".debug.cache"); - } else { - if ((fmri = getenv("SMF_FMRI")) == NULL) { - dlmgmt_log(LOG_WARNING, "dlmgmtd is an smf(5) managed " - "service and should not be run from the command " - "line."); - return (EINVAL); - } + if ((req = dlmgmt_db_req_alloc(DLMGMT_DB_OP_READ, NULL, + DATALINK_INVALID_LINKID, zoneid, DLMGMT_ACTIVE, &err)) == NULL) + return (err); + if ((err = dlmgmt_process_db_req(req)) != 0) { /* - * The FMRI name is in the form of - * svc:/service/service:instance. We need to remove the - * prefix "svc:/" and replace '/' with '-'. The cache file - * name is in the form of "service:instance.cache". + * If we get back ENOENT, that means that the active + * configuration file doesn't exist yet, and is not an error. + * We'll create it down below after we've loaded the + * persistent configuration. */ - if ((c = strchr(fmri, '/')) != NULL) - c++; - else - c = fmri; - (void) snprintf(filename, MAXPATHLEN, "%s.cache", c); - for (c = filename; *c != '\0'; c++) { - if (*c == '/') - *c = '-'; - } - - (void) snprintf(cachefile, MAXPATHLEN, "%s/%s", - DLMGMT_TMPFS_DIR, filename); + if (err != ENOENT) + goto done; + boot = B_TRUE; } - dlmgmt_table_lock(B_TRUE); - - req.ls_next = NULL; - req.ls_op = DLMGMT_DB_OP_READ; - req.ls_linkid = DATALINK_INVALID_LINKID; - req.ls_flags = DLMGMT_PERSIST; - - if ((err = dlmgmt_process_db_req(&req)) != 0) + req->ls_flags = DLMGMT_PERSIST; + err = dlmgmt_process_db_req(req); + if (err != 0 && err != ENOENT) goto done; - - req.ls_flags = DLMGMT_ACTIVE; - err = dlmgmt_process_db_req(&req); - if (err == ENOENT) { + err = 0; + if (rewrite_needed) { /* - * The temporary datalink.conf does not exist. This is - * the first boot. Mark all the physical links active. + * First update links in memory, then dump the entire db to + * disk. */ - for (linkp = avl_first(&dlmgmt_id_avl); linkp != NULL; - linkp = AVL_NEXT(&dlmgmt_id_avl, linkp)) { - if (linkp->ll_class == DATALINK_CLASS_PHYS) { - linkp->ll_flags |= DLMGMT_ACTIVE; - (void) dlmgmt_write_db_entry( - linkp->ll_linkid, DLMGMT_ACTIVE); - } - } - err = 0; + dlmgmt_db_walk(zoneid, DATALINK_CLASS_ALL, dlmgmt_db_upgrade); + req->ls_op = DLMGMT_DB_OP_WRITE; + req->ls_linkid = DATALINK_ALL_LINKID; + if ((err = dlmgmt_process_db_req(req)) != 0 && + err != EINPROGRESS) + goto done; + } + if (boot) { + dlmgmt_db_walk(zoneid, DATALINK_CLASS_PHYS, + dlmgmt_db_phys_activate); } done: - dlmgmt_table_unlock(); + if (err == EINPROGRESS) + err = 0; + else + free(req); return (err); } + +/* + * Remove all links in the given zoneid. + */ +void +dlmgmt_db_fini(zoneid_t zoneid) +{ + dlmgmt_link_t *linkp = avl_first(&dlmgmt_name_avl), *next_linkp; + + while (linkp != NULL) { + next_linkp = AVL_NEXT(&dlmgmt_name_avl, linkp); + if (linkp->ll_zoneid == zoneid) { + (void) dlmgmt_destroy_common(linkp, + DLMGMT_ACTIVE | DLMGMT_PERSIST); + } + linkp = next_linkp; + } +} diff --git a/usr/src/cmd/dlmgmtd/dlmgmt_door.c b/usr/src/cmd/dlmgmtd/dlmgmt_door.c index e73f4841b6..ad59cfe190 100644 --- a/usr/src/cmd/dlmgmtd/dlmgmt_door.c +++ b/usr/src/cmd/dlmgmtd/dlmgmt_door.c @@ -28,6 +28,23 @@ * Main door handler functions used by dlmgmtd to process the different door * call requests. Door call requests can come from the user-land applications, * or from the kernel. + * + * Note on zones handling: + * + * There are two zoneid's associated with a link. One is the zoneid of the + * zone in which the link was created (ll_zoneid in the dlmgmt_link_t), and + * the other is the zoneid of the zone where the link is currently assigned + * (the "zone" link property). The two can be different if a datalink is + * created in the global zone and subsequently assigned to a non-global zone + * via zonecfg or via explicitly setting the "zone" link property. + * + * Door clients can see links that were created in their zone, and links that + * are currently assigned to their zone. Door clients in a zone can only + * modify links that were created in their zone. + * + * The datalink ID space is global, while each zone has its own datalink name + * space. This allows each zone to have complete freedom over the names that + * they assign to links created within the zone. */ #include <assert.h> @@ -38,29 +55,47 @@ #include <strings.h> #include <syslog.h> #include <sys/sysevent/eventdefs.h> +#include <zone.h> #include <libsysevent.h> #include <libdlmgmt.h> #include <librcm.h> #include "dlmgmt_impl.h" -typedef void dlmgmt_door_handler_t(void *, void *); +typedef void dlmgmt_door_handler_t(void *, void *, zoneid_t, ucred_t *); typedef struct dlmgmt_door_info_s { uint_t di_cmd; - boolean_t di_set; size_t di_reqsz; size_t di_acksz; dlmgmt_door_handler_t *di_handler; } dlmgmt_door_info_t; +/* + * Check if the caller has the required privileges to operate on a link of the + * given class. + */ +static int +dlmgmt_checkprivs(datalink_class_t class, ucred_t *cred) +{ + const priv_set_t *eset; + + eset = ucred_getprivset(cred, PRIV_EFFECTIVE); + if (eset != NULL && ((class == DATALINK_CLASS_IPTUN && + priv_ismember(eset, PRIV_SYS_IPTUN_CONFIG)) || + priv_ismember(eset, PRIV_SYS_DL_CONFIG) || + priv_ismember(eset, PRIV_SYS_NET_CONFIG))) + return (0); + return (EACCES); +} static dlmgmt_link_t * -dlmgmt_getlink_by_dev(char *devname) +dlmgmt_getlink_by_dev(char *devname, zoneid_t zoneid) { dlmgmt_link_t *linkp = avl_first(&dlmgmt_id_avl); for (; linkp != NULL; linkp = AVL_NEXT(&dlmgmt_id_avl, linkp)) { - if ((linkp->ll_class == DATALINK_CLASS_PHYS) && + if (link_is_visible(linkp, zoneid) && + (linkp->ll_class == DATALINK_CLASS_PHYS) && linkattr_equal(&(linkp->ll_head), FDEVNAME, devname, strlen(devname) + 1)) { return (linkp); @@ -102,7 +137,7 @@ done: } static void -dlmgmt_upcall_create(void *argp, void *retp) +dlmgmt_upcall_create(void *argp, void *retp, zoneid_t zoneid, ucred_t *cred) { dlmgmt_upcall_arg_create_t *create = argp; dlmgmt_create_retval_t *retvalp = retp; @@ -129,13 +164,15 @@ dlmgmt_upcall_create(void *argp, void *retp) */ dlmgmt_table_lock(B_TRUE); + if ((err = dlmgmt_checkprivs(class, cred)) != 0) + goto done; + /* * Check to see whether this is the reattachment of an existing * physical link. If so, return its linkid. */ - if ((class == DATALINK_CLASS_PHYS) && - (linkp = dlmgmt_getlink_by_dev(create->ld_devname)) != NULL) { - + if ((class == DATALINK_CLASS_PHYS) && (linkp = + dlmgmt_getlink_by_dev(create->ld_devname, zoneid)) != NULL) { if (linkattr_equal(&(linkp->ll_head), FPHYMAJ, &create->ld_phymaj, sizeof (uint64_t)) && linkattr_equal(&(linkp->ll_head), FPHYINST, @@ -163,6 +200,8 @@ dlmgmt_upcall_create(void *argp, void *retp) if ((linkp->ll_flags & DLMGMT_ACTIVE) == 0) reconfigured = B_TRUE; + if ((err = link_activate(linkp)) != 0) + goto done; linkp->ll_flags |= flags; linkp->ll_gen++; @@ -170,7 +209,7 @@ dlmgmt_upcall_create(void *argp, void *retp) } if ((err = dlmgmt_create_common(create->ld_devname, class, media, - flags, &linkp)) == EEXIST) { + zoneid, flags, &linkp)) == EEXIST) { /* * The link name already exists. Return error if this is a * non-physical link (in that case, the link name must be @@ -183,11 +222,12 @@ dlmgmt_upcall_create(void *argp, void *retp) * The physical link's name already exists, request * a suggested link name: net<nextppa> */ - err = dlmgmt_generate_name("net", link, MAXLINKNAMELEN); + err = dlmgmt_generate_name("net", link, MAXLINKNAMELEN, zoneid); if (err != 0) goto done; - err = dlmgmt_create_common(link, class, media, flags, &linkp); + err = dlmgmt_create_common(link, class, media, zoneid, flags, + &linkp); } if (err != 0) @@ -210,7 +250,7 @@ dlmgmt_upcall_create(void *argp, void *retp) } done: - if ((err == 0) && ((err = dlmgmt_write_db_entry(linkp->ll_linkid, + if ((err == 0) && ((err = dlmgmt_write_db_entry(linkp->ll_link, linkp, linkp->ll_flags)) != 0) && created) { (void) dlmgmt_destroy_common(linkp, flags); } @@ -235,7 +275,7 @@ noupdate: } static void -dlmgmt_upcall_update(void *argp, void *retp) +dlmgmt_upcall_update(void *argp, void *retp, zoneid_t zoneid, ucred_t *cred) { dlmgmt_upcall_arg_update_t *update = argp; dlmgmt_update_retval_t *retvalp = retp; @@ -252,11 +292,15 @@ dlmgmt_upcall_update(void *argp, void *retp) * Check to see whether this is the reattachment of an existing * physical link. If so, return its linkid. */ - if ((linkp = dlmgmt_getlink_by_dev(update->ld_devname)) == NULL) { + if ((linkp = dlmgmt_getlink_by_dev(update->ld_devname, zoneid)) == + NULL) { err = ENOENT; goto done; } + if ((err = dlmgmt_checkprivs(linkp->ll_class, cred)) != 0) + goto done; + retvalp->lr_linkid = linkp->ll_linkid; retvalp->lr_media = media; if (linkp->ll_media != media && linkp->ll_media != DL_OTHER) { @@ -295,7 +339,8 @@ dlmgmt_upcall_update(void *argp, void *retp) if (linkp->ll_media != media) { linkp->ll_media = media; linkp->ll_gen++; - (void) dlmgmt_write_db_entry(linkp->ll_linkid, linkp->ll_flags); + (void) dlmgmt_write_db_entry(linkp->ll_link, linkp, + linkp->ll_flags); } done: @@ -304,7 +349,7 @@ done: } static void -dlmgmt_upcall_destroy(void *argp, void *retp) +dlmgmt_upcall_destroy(void *argp, void *retp, zoneid_t zoneid, ucred_t *cred) { dlmgmt_upcall_arg_destroy_t *destroy = argp; dlmgmt_destroy_retval_t *retvalp = retp; @@ -320,21 +365,22 @@ dlmgmt_upcall_destroy(void *argp, void *retp) */ dlmgmt_table_lock(B_TRUE); - if ((linkp = link_by_id(linkid)) == NULL) { + if ((linkp = link_by_id(linkid, zoneid)) == NULL) { err = ENOENT; goto done; } + if ((err = dlmgmt_checkprivs(linkp->ll_class, cred)) != 0) + goto done; + if (((linkp->ll_flags & flags) & DLMGMT_ACTIVE) != 0) { - err = dlmgmt_delete_db_entry(linkid, DLMGMT_ACTIVE); - if (err != 0) + if ((err = dlmgmt_delete_db_entry(linkp, DLMGMT_ACTIVE)) != 0) goto done; dflags |= DLMGMT_ACTIVE; } if (((linkp->ll_flags & flags) & DLMGMT_PERSIST) != 0) { - err = dlmgmt_delete_db_entry(linkid, DLMGMT_PERSIST); - if (err != 0) + if ((err = dlmgmt_delete_db_entry(linkp, DLMGMT_PERSIST)) != 0) goto done; dflags |= DLMGMT_PERSIST; } @@ -342,14 +388,15 @@ dlmgmt_upcall_destroy(void *argp, void *retp) err = dlmgmt_destroy_common(linkp, flags); done: if (err != 0 && dflags != 0) - (void) dlmgmt_write_db_entry(linkp->ll_linkid, dflags); + (void) dlmgmt_write_db_entry(linkp->ll_link, linkp, dflags); dlmgmt_table_unlock(); retvalp->lr_err = err; } +/* ARGSUSED */ static void -dlmgmt_getname(void *argp, void *retp) +dlmgmt_getname(void *argp, void *retp, zoneid_t zoneid, ucred_t *cred) { dlmgmt_door_getname_t *getname = argp; dlmgmt_getname_retval_t *retvalp = retp; @@ -360,30 +407,24 @@ dlmgmt_getname(void *argp, void *retp) * Hold the reader lock to access the link */ dlmgmt_table_lock(B_FALSE); - if ((linkp = link_by_id(getname->ld_linkid)) == NULL) { - /* - * The link does not exists. - */ + if ((linkp = link_by_id(getname->ld_linkid, zoneid)) == NULL) { err = ENOENT; - goto done; - } - - if (strlcpy(retvalp->lr_link, linkp->ll_link, MAXLINKNAMELEN) >= + } else if (strlcpy(retvalp->lr_link, linkp->ll_link, MAXLINKNAMELEN) >= MAXLINKNAMELEN) { err = ENOSPC; - goto done; + } else { + retvalp->lr_flags = linkp->ll_flags; + retvalp->lr_class = linkp->ll_class; + retvalp->lr_media = linkp->ll_media; } - retvalp->lr_flags = linkp->ll_flags; - retvalp->lr_class = linkp->ll_class; - retvalp->lr_media = linkp->ll_media; -done: dlmgmt_table_unlock(); retvalp->lr_err = err; } +/* ARGSUSED */ static void -dlmgmt_getlinkid(void *argp, void *retp) +dlmgmt_getlinkid(void *argp, void *retp, zoneid_t zoneid, ucred_t *cred) { dlmgmt_door_getlinkid_t *getlinkid = argp; dlmgmt_getlinkid_retval_t *retvalp = retp; @@ -394,9 +435,10 @@ dlmgmt_getlinkid(void *argp, void *retp) * Hold the reader lock to access the link */ dlmgmt_table_lock(B_FALSE); - if ((linkp = link_by_name(getlinkid->ld_link)) == NULL) { + + if ((linkp = link_by_name(getlinkid->ld_link, zoneid)) == NULL) { /* - * The link does not exists. + * The link does not exist in this zone. */ err = ENOENT; goto done; @@ -412,13 +454,13 @@ done: retvalp->lr_err = err; } +/* ARGSUSED */ static void -dlmgmt_getnext(void *argp, void *retp) +dlmgmt_getnext(void *argp, void *retp, zoneid_t zoneid, ucred_t *cred) { dlmgmt_door_getnext_t *getnext = argp; dlmgmt_getnext_retval_t *retvalp = retp; dlmgmt_link_t link, *linkp; - datalink_id_t linkid = getnext->ld_linkid; avl_index_t where; int err = 0; @@ -427,12 +469,13 @@ dlmgmt_getnext(void *argp, void *retp) */ dlmgmt_table_lock(B_FALSE); - link.ll_linkid = (linkid + 1); - linkp = avl_find(&dlmgmt_id_avl, &link, &where); - if (linkp == NULL) + link.ll_linkid = (getnext->ld_linkid + 1); + if ((linkp = avl_find(&dlmgmt_id_avl, &link, &where)) == NULL) linkp = avl_nearest(&dlmgmt_id_avl, where, AVL_AFTER); for (; linkp != NULL; linkp = AVL_NEXT(&dlmgmt_id_avl, linkp)) { + if (!link_is_visible(linkp, zoneid)) + continue; if ((linkp->ll_class & getnext->ld_class) && (linkp->ll_flags & getnext->ld_flags) && DATALINK_MEDIA_ACCEPTED(getnext->ld_dmedia, @@ -453,8 +496,9 @@ dlmgmt_getnext(void *argp, void *retp) retvalp->lr_err = err; } +/* ARGSUSED */ static void -dlmgmt_upcall_getattr(void *argp, void *retp) +dlmgmt_upcall_getattr(void *argp, void *retp, zoneid_t zoneid, ucred_t *cred) { dlmgmt_upcall_arg_getattr_t *getattr = argp; dlmgmt_getattr_retval_t *retvalp = retp; @@ -464,22 +508,17 @@ dlmgmt_upcall_getattr(void *argp, void *retp) * Hold the reader lock to access the link */ dlmgmt_table_lock(B_FALSE); - if ((linkp = link_by_id(getattr->ld_linkid)) == NULL) { - /* - * The link does not exist. - */ + if ((linkp = link_by_id(getattr->ld_linkid, zoneid)) == NULL) { retvalp->lr_err = ENOENT; - goto done; + } else { + retvalp->lr_err = dlmgmt_getattr_common(&linkp->ll_head, + getattr->ld_attr, retvalp); } - - dlmgmt_getattr_common(&linkp->ll_head, getattr->ld_attr, retvalp); - -done: dlmgmt_table_unlock(); } static void -dlmgmt_createid(void *argp, void *retp) +dlmgmt_createid(void *argp, void *retp, zoneid_t zoneid, ucred_t *cred) { dlmgmt_door_createid_t *createid = argp; dlmgmt_createid_retval_t *retvalp = retp; @@ -493,18 +532,21 @@ dlmgmt_createid(void *argp, void *retp) */ dlmgmt_table_lock(B_TRUE); + if ((err = dlmgmt_checkprivs(createid->ld_class, cred)) != 0) + goto done; + if (createid->ld_prefix) { err = dlmgmt_generate_name(createid->ld_link, link, - MAXLINKNAMELEN); + MAXLINKNAMELEN, zoneid); if (err != 0) goto done; err = dlmgmt_create_common(link, createid->ld_class, - createid->ld_media, createid->ld_flags, &linkp); + createid->ld_media, zoneid, createid->ld_flags, &linkp); } else { err = dlmgmt_create_common(createid->ld_link, - createid->ld_class, createid->ld_media, createid->ld_flags, - &linkp); + createid->ld_class, createid->ld_media, zoneid, + createid->ld_flags, &linkp); } if (err == 0) { @@ -512,8 +554,10 @@ dlmgmt_createid(void *argp, void *retp) * Keep the active mapping. */ linkid = linkp->ll_linkid; - if (createid->ld_flags & DLMGMT_ACTIVE) - (void) dlmgmt_write_db_entry(linkid, DLMGMT_ACTIVE); + if (createid->ld_flags & DLMGMT_ACTIVE) { + (void) dlmgmt_write_db_entry(linkp->ll_link, linkp, + DLMGMT_ACTIVE); + } } done: @@ -523,7 +567,7 @@ done: } static void -dlmgmt_destroyid(void *argp, void *retp) +dlmgmt_destroyid(void *argp, void *retp, zoneid_t zoneid, ucred_t *cred) { dlmgmt_door_destroyid_t *destroyid = argp; dlmgmt_destroyid_retval_t *retvalp = retp; @@ -536,20 +580,21 @@ dlmgmt_destroyid(void *argp, void *retp) * Hold the writer lock to update the link table. */ dlmgmt_table_lock(B_TRUE); - if ((linkp = link_by_id(linkid)) == NULL) { + if ((linkp = link_by_id(linkid, zoneid)) == NULL) { err = ENOENT; goto done; } - if ((err = dlmgmt_destroy_common(linkp, flags)) != 0) + if ((err = dlmgmt_checkprivs(linkp->ll_class, cred)) != 0) goto done; /* * Delete the active mapping. */ if (flags & DLMGMT_ACTIVE) - (void) dlmgmt_delete_db_entry(linkid, DLMGMT_ACTIVE); - + err = dlmgmt_delete_db_entry(linkp, DLMGMT_ACTIVE); + if (err == 0) + err = dlmgmt_destroy_common(linkp, flags); done: dlmgmt_table_unlock(); retvalp->lr_err = err; @@ -561,13 +606,13 @@ done: * the given link name. */ static void -dlmgmt_remapid(void *argp, void *retp) +dlmgmt_remapid(void *argp, void *retp, zoneid_t zoneid, ucred_t *cred) { dlmgmt_door_remapid_t *remapid = argp; dlmgmt_remapid_retval_t *retvalp = retp; - datalink_id_t linkid1 = remapid->ld_linkid; - dlmgmt_link_t link, *linkp1, *tmp; - avl_index_t where; + dlmgmt_link_t *linkp; + char oldname[MAXLINKNAMELEN]; + boolean_t renamed = B_FALSE; int err = 0; if (!dladm_valid_linkname(remapid->ld_link)) { @@ -579,36 +624,55 @@ dlmgmt_remapid(void *argp, void *retp) * Hold the writer lock to update the link table. */ dlmgmt_table_lock(B_TRUE); - if ((linkp1 = link_by_id(linkid1)) == NULL) { + if ((linkp = link_by_id(remapid->ld_linkid, zoneid)) == NULL) { err = ENOENT; goto done; } - if (link_by_name(remapid->ld_link) != NULL) { + if ((err = dlmgmt_checkprivs(linkp->ll_class, cred)) != 0) + goto done; + + if (link_by_name(remapid->ld_link, linkp->ll_zoneid) != NULL) { err = EEXIST; goto done; } - avl_remove(&dlmgmt_name_avl, linkp1); - (void) strlcpy(link.ll_link, remapid->ld_link, MAXLINKNAMELEN); - tmp = avl_find(&dlmgmt_name_avl, &link, &where); - assert(tmp == NULL); - (void) strlcpy(linkp1->ll_link, remapid->ld_link, MAXLINKNAMELEN); - avl_insert(&dlmgmt_name_avl, linkp1, where); - dlmgmt_advance(linkp1); + (void) strlcpy(oldname, linkp->ll_link, MAXLINKNAMELEN); + avl_remove(&dlmgmt_name_avl, linkp); + (void) strlcpy(linkp->ll_link, remapid->ld_link, MAXLINKNAMELEN); + avl_add(&dlmgmt_name_avl, linkp); + renamed = B_TRUE; - /* - * If we renamed a temporary link, update the temporary repository. - */ - if (linkp1->ll_flags & DLMGMT_ACTIVE) - (void) dlmgmt_write_db_entry(linkid1, DLMGMT_ACTIVE); + if (linkp->ll_flags & DLMGMT_ACTIVE) { + err = dlmgmt_write_db_entry(oldname, linkp, DLMGMT_ACTIVE); + if (err != 0) + goto done; + } + if (linkp->ll_flags & DLMGMT_PERSIST) { + err = dlmgmt_write_db_entry(oldname, linkp, DLMGMT_PERSIST); + if (err != 0) { + if (linkp->ll_flags & DLMGMT_ACTIVE) { + (void) dlmgmt_write_db_entry(remapid->ld_link, + linkp, DLMGMT_ACTIVE); + } + goto done; + } + } + + dlmgmt_advance(linkp); + linkp->ll_gen++; done: + if (err != 0 && renamed) { + avl_remove(&dlmgmt_name_avl, linkp); + (void) strlcpy(linkp->ll_link, oldname, MAXLINKNAMELEN); + avl_add(&dlmgmt_name_avl, linkp); + } dlmgmt_table_unlock(); retvalp->lr_err = err; } static void -dlmgmt_upid(void *argp, void *retp) +dlmgmt_upid(void *argp, void *retp, zoneid_t zoneid, ucred_t *cred) { dlmgmt_door_upid_t *upid = argp; dlmgmt_upid_retval_t *retvalp = retp; @@ -619,30 +683,34 @@ dlmgmt_upid(void *argp, void *retp) * Hold the writer lock to update the link table. */ dlmgmt_table_lock(B_TRUE); - if ((linkp = link_by_id(upid->ld_linkid)) == NULL) { + if ((linkp = link_by_id(upid->ld_linkid, zoneid)) == NULL) { err = ENOENT; goto done; } + if ((err = dlmgmt_checkprivs(linkp->ll_class, cred)) != 0) + goto done; + if (linkp->ll_flags & DLMGMT_ACTIVE) { err = EINVAL; goto done; } - linkp->ll_flags |= DLMGMT_ACTIVE; - (void) dlmgmt_write_db_entry(linkp->ll_linkid, DLMGMT_ACTIVE); + if ((err = link_activate(linkp)) == 0) { + (void) dlmgmt_write_db_entry(linkp->ll_link, linkp, + DLMGMT_ACTIVE); + } done: dlmgmt_table_unlock(); retvalp->lr_err = err; } static void -dlmgmt_createconf(void *argp, void *retp) +dlmgmt_createconf(void *argp, void *retp, zoneid_t zoneid, ucred_t *cred) { dlmgmt_door_createconf_t *createconf = argp; dlmgmt_createconf_retval_t *retvalp = retp; - dlmgmt_dlconf_t dlconf, *dlconfp, *tmp; - avl_index_t where; + dlmgmt_dlconf_t *dlconfp; int err; /* @@ -650,25 +718,23 @@ dlmgmt_createconf(void *argp, void *retp) */ dlmgmt_dlconf_table_lock(B_TRUE); - if ((err = dlconf_create(createconf->ld_link, createconf->ld_linkid, - createconf->ld_class, createconf->ld_media, &dlconfp)) != 0) { + if ((err = dlmgmt_checkprivs(createconf->ld_class, cred)) != 0) goto done; - } - - dlconf.ld_id = dlconfp->ld_id; - tmp = avl_find(&dlmgmt_dlconf_avl, &dlconf, &where); - assert(tmp == NULL); - avl_insert(&dlmgmt_dlconf_avl, dlconfp, where); - dlmgmt_advance_dlconfid(dlconfp); - retvalp->lr_conf = (dladm_conf_t)dlconfp->ld_id; + err = dlconf_create(createconf->ld_link, createconf->ld_linkid, + createconf->ld_class, createconf->ld_media, zoneid, &dlconfp); + if (err == 0) { + avl_add(&dlmgmt_dlconf_avl, dlconfp); + dlmgmt_advance_dlconfid(dlconfp); + retvalp->lr_conf = (dladm_conf_t)dlconfp->ld_id; + } done: dlmgmt_dlconf_table_unlock(); retvalp->lr_err = err; } static void -dlmgmt_setattr(void *argp, void *retp) +dlmgmt_setattr(void *argp, void *retp, zoneid_t zoneid, ucred_t *cred) { dlmgmt_door_setattr_t *setattr = argp; dlmgmt_setattr_retval_t *retvalp = retp; @@ -682,11 +748,14 @@ dlmgmt_setattr(void *argp, void *retp) dlconf.ld_id = (int)setattr->ld_conf; dlconfp = avl_find(&dlmgmt_dlconf_avl, &dlconf, NULL); - if (dlconfp == NULL) { + if (dlconfp == NULL || zoneid != dlconfp->ld_zoneid) { err = ENOENT; goto done; } + if ((err = dlmgmt_checkprivs(dlconfp->ld_class, cred)) != 0) + goto done; + err = linkattr_set(&(dlconfp->ld_head), setattr->ld_attr, &setattr->ld_attrval, setattr->ld_attrsz, setattr->ld_type); @@ -696,7 +765,7 @@ done: } static void -dlmgmt_unsetconfattr(void *argp, void *retp) +dlmgmt_unsetconfattr(void *argp, void *retp, zoneid_t zoneid, ucred_t *cred) { dlmgmt_door_unsetattr_t *unsetattr = argp; dlmgmt_unsetattr_retval_t *retvalp = retp; @@ -710,12 +779,15 @@ dlmgmt_unsetconfattr(void *argp, void *retp) dlconf.ld_id = (int)unsetattr->ld_conf; dlconfp = avl_find(&dlmgmt_dlconf_avl, &dlconf, NULL); - if (dlconfp == NULL) { + if (dlconfp == NULL || zoneid != dlconfp->ld_zoneid) { err = ENOENT; goto done; } - err = linkattr_unset(&(dlconfp->ld_head), unsetattr->ld_attr); + if ((err = dlmgmt_checkprivs(dlconfp->ld_class, cred)) != 0) + goto done; + + linkattr_unset(&(dlconfp->ld_head), unsetattr->ld_attr); done: dlmgmt_dlconf_table_unlock(); @@ -735,7 +807,7 @@ done: * across the pair of dladm_read_conf() and dladm_write_conf() calls. */ static void -dlmgmt_writeconf(void *argp, void *retp) +dlmgmt_writeconf(void *argp, void *retp, zoneid_t zoneid, ucred_t *cred) { dlmgmt_door_writeconf_t *writeconf = argp; dlmgmt_writeconf_retval_t *retvalp = retp; @@ -751,16 +823,19 @@ dlmgmt_writeconf(void *argp, void *retp) dlconf.ld_id = (int)writeconf->ld_conf; dlconfp = avl_find(&dlmgmt_dlconf_avl, &dlconf, NULL); - if (dlconfp == NULL) { + if (dlconfp == NULL || zoneid != dlconfp->ld_zoneid) { err = ENOENT; goto done; } + if ((err = dlmgmt_checkprivs(dlconfp->ld_class, cred)) != 0) + goto done; + /* * Hold the writer lock to update the link table. */ dlmgmt_table_lock(B_TRUE); - linkp = link_by_id(dlconfp->ld_linkid); + linkp = link_by_id(dlconfp->ld_linkid, zoneid); if ((linkp == NULL) || (linkp->ll_class != dlconfp->ld_class) || (linkp->ll_media != dlconfp->ld_media) || (strcmp(linkp->ll_link, dlconfp->ld_link) != 0)) { @@ -803,7 +878,7 @@ dlmgmt_writeconf(void *argp, void *retp) } linkp->ll_gen++; - err = dlmgmt_write_db_entry(linkp->ll_linkid, DLMGMT_PERSIST); + err = dlmgmt_write_db_entry(linkp->ll_link, linkp, DLMGMT_PERSIST); dlmgmt_table_unlock(); done: dlmgmt_dlconf_table_unlock(); @@ -811,20 +886,38 @@ done: } static void -dlmgmt_removeconf(void *argp, void *retp) +dlmgmt_removeconf(void *argp, void *retp, zoneid_t zoneid, ucred_t *cred) { dlmgmt_door_removeconf_t *removeconf = argp; dlmgmt_removeconf_retval_t *retvalp = retp; + dlmgmt_link_t *linkp; int err; dlmgmt_table_lock(B_TRUE); - err = dlmgmt_delete_db_entry(removeconf->ld_linkid, DLMGMT_PERSIST); + if ((linkp = link_by_id(removeconf->ld_linkid, zoneid)) == NULL) { + err = ENOENT; + goto done; + } + if (zoneid != GLOBAL_ZONEID && linkp->ll_onloan) { + /* + * A non-global zone cannot remove the persistent + * configuration of a link that is on loan from the global + * zone. + */ + err = EACCES; + goto done; + } + if ((err = dlmgmt_checkprivs(linkp->ll_class, cred)) != 0) + goto done; + + err = dlmgmt_delete_db_entry(linkp, DLMGMT_PERSIST); +done: dlmgmt_table_unlock(); retvalp->lr_err = err; } static void -dlmgmt_destroyconf(void *argp, void *retp) +dlmgmt_destroyconf(void *argp, void *retp, zoneid_t zoneid, ucred_t *cred) { dlmgmt_door_destroyconf_t *destroyconf = argp; dlmgmt_destroyconf_retval_t *retvalp = retp; @@ -838,11 +931,14 @@ dlmgmt_destroyconf(void *argp, void *retp) dlconf.ld_id = (int)destroyconf->ld_conf; dlconfp = avl_find(&dlmgmt_dlconf_avl, &dlconf, NULL); - if (dlconfp == NULL) { + if (dlconfp == NULL || zoneid != dlconfp->ld_zoneid) { err = ENOENT; goto done; } + if ((err = dlmgmt_checkprivs(dlconfp->ld_class, cred)) != 0) + goto done; + avl_remove(&dlmgmt_dlconf_avl, dlconfp); dlconf_destroy(dlconfp); @@ -855,16 +951,16 @@ done: * See the comments above dladm_write_conf() to see how ld_gen is used to * ensure atomicity across the {dlmgmt_readconf(), dlmgmt_writeconf()} pair. */ +/* ARGSUSED */ static void -dlmgmt_readconf(void *argp, void *retp) +dlmgmt_readconf(void *argp, void *retp, zoneid_t zoneid, ucred_t *cred) { dlmgmt_door_readconf_t *readconf = argp; dlmgmt_readconf_retval_t *retvalp = retp; dlmgmt_link_t *linkp; datalink_id_t linkid = readconf->ld_linkid; - dlmgmt_dlconf_t *dlconfp, *tmp, dlconf; + dlmgmt_dlconf_t *dlconfp; dlmgmt_linkattr_t *attrp; - avl_index_t where; int err = 0; /* @@ -876,19 +972,24 @@ dlmgmt_readconf(void *argp, void *retp) * Hold the reader lock to access the link */ dlmgmt_table_lock(B_FALSE); - linkp = link_by_id(linkid); + linkp = link_by_id(linkid, zoneid); if ((linkp == NULL) || !(linkp->ll_flags & DLMGMT_PERSIST)) { + /* The persistent link configuration does not exist. */ + err = ENOENT; + goto done; + } + if (linkp->ll_onloan && zoneid != GLOBAL_ZONEID) { /* - * The persistent link configuration does not exists. + * The caller is in a non-global zone and the persistent + * configuration belongs to the global zone. */ - err = ENOENT; + err = EACCES; goto done; } if ((err = dlconf_create(linkp->ll_link, linkp->ll_linkid, - linkp->ll_class, linkp->ll_media, &dlconfp)) != 0) { + linkp->ll_class, linkp->ll_media, zoneid, &dlconfp)) != 0) goto done; - } for (attrp = linkp->ll_head; attrp != NULL; attrp = attrp->lp_next) { if ((err = linkattr_set(&(dlconfp->ld_head), attrp->lp_name, @@ -898,11 +999,7 @@ dlmgmt_readconf(void *argp, void *retp) } } dlconfp->ld_gen = linkp->ll_gen; - - dlconf.ld_id = dlconfp->ld_id; - tmp = avl_find(&dlmgmt_dlconf_avl, &dlconf, &where); - assert(tmp == NULL); - avl_insert(&dlmgmt_dlconf_avl, dlconfp, where); + avl_add(&dlmgmt_dlconf_avl, dlconfp); dlmgmt_advance_dlconfid(dlconfp); retvalp->lr_conf = (dladm_conf_t)dlconfp->ld_id; @@ -915,8 +1012,9 @@ done: /* * Note: the caller must free *retvalpp in case of success. */ +/* ARGSUSED */ static void -dlmgmt_getattr(void *argp, void *retp) +dlmgmt_getattr(void *argp, void *retp, zoneid_t zoneid, ucred_t *cred) { dlmgmt_door_getattr_t *getattr = argp; dlmgmt_getattr_retval_t *retvalp = retp; @@ -928,49 +1026,45 @@ dlmgmt_getattr(void *argp, void *retp) dlmgmt_dlconf_table_lock(B_FALSE); dlconf.ld_id = (int)getattr->ld_conf; - dlconfp = avl_find(&dlmgmt_dlconf_avl, &dlconf, NULL); - if (dlconfp == NULL) { + if ((dlconfp = avl_find(&dlmgmt_dlconf_avl, &dlconf, NULL)) == NULL || + zoneid != dlconfp->ld_zoneid) { retvalp->lr_err = ENOENT; - goto done; + } else { + retvalp->lr_err = dlmgmt_getattr_common(&dlconfp->ld_head, + getattr->ld_attr, retvalp); } - dlmgmt_getattr_common(&dlconfp->ld_head, getattr->ld_attr, retvalp); - -done: dlmgmt_dlconf_table_unlock(); } static void -dlmgmt_upcall_linkprop_init(void *argp, void *retp) +dlmgmt_upcall_linkprop_init(void *argp, void *retp, zoneid_t zoneid, + ucred_t *cred) { dlmgmt_door_linkprop_init_t *lip = argp; dlmgmt_linkprop_init_retval_t *retvalp = retp; - boolean_t do_linkprop = B_FALSE; + dlmgmt_link_t *linkp; + int err; - /* - * Ignore wifi links until wifi property ioctls are converted - * to generic property ioctls. This avoids deadlocks due to - * wifi property ioctls using their own /dev/net device, - * not the DLD control device. - */ dlmgmt_table_lock(B_FALSE); - if (link_by_id(lip->ld_linkid) == NULL) - retvalp->lr_err = ENOENT; + if ((linkp = link_by_id(lip->ld_linkid, zoneid)) == NULL) + err = ENOENT; else - do_linkprop = B_TRUE; + err = dlmgmt_checkprivs(linkp->ll_class, cred); dlmgmt_table_unlock(); - if (do_linkprop) - retvalp->lr_err = dladm_init_linkprop(dld_handle, - lip->ld_linkid, B_TRUE); + if (err == 0) + err = dladm_init_linkprop(dld_handle, lip->ld_linkid, B_TRUE); + retvalp->lr_err = err; } /* * Get the link property that follows ld_last_attr. * If ld_last_attr is empty, return the first property. */ +/* ARGSUSED */ static void -dlmgmt_linkprop_getnext(void *argp, void *retp) +dlmgmt_linkprop_getnext(void *argp, void *retp, zoneid_t zoneid, ucred_t *cred) { dlmgmt_door_linkprop_getnext_t *getnext = argp; dlmgmt_linkprop_getnext_retval_t *retvalp = retp; @@ -1013,100 +1107,225 @@ done: retvalp->lr_err = err; } +static void +dlmgmt_setzoneid(void *argp, void *retp, zoneid_t zoneid, ucred_t *cred) +{ + dlmgmt_door_setzoneid_t *setzoneid = argp; + dlmgmt_setzoneid_retval_t *retvalp = retp; + dlmgmt_link_t *linkp; + datalink_id_t linkid = setzoneid->ld_linkid; + zoneid_t oldzoneid, newzoneid; + int err = 0; + + dlmgmt_table_lock(B_TRUE); + + /* We currently only allow changing zoneid's from the global zone. */ + if (zoneid != GLOBAL_ZONEID) { + err = EACCES; + goto done; + } + + if ((linkp = link_by_id(linkid, zoneid)) == NULL) { + err = ENOENT; + goto done; + } + + if ((err = dlmgmt_checkprivs(linkp->ll_class, cred)) != 0) + goto done; + + /* We can only assign an active link to a zone. */ + if (!(linkp->ll_flags & DLMGMT_ACTIVE)) { + err = EINVAL; + goto done; + } + + oldzoneid = linkp->ll_zoneid; + newzoneid = setzoneid->ld_zoneid; + + if (oldzoneid == newzoneid) + goto done; + + /* + * Before we remove the link from its current zone, make sure that + * there isn't a link with the same name in the destination zone. + */ + if (zoneid != GLOBAL_ZONEID && + link_by_name(linkp->ll_link, newzoneid) != NULL) { + err = EEXIST; + goto done; + } + + if (oldzoneid != GLOBAL_ZONEID) { + if (zone_remove_datalink(oldzoneid, linkid) != 0) { + err = errno; + dlmgmt_log(LOG_WARNING, "unable to remove link %d from " + "zone %d: %s", linkid, oldzoneid, strerror(err)); + goto done; + } + avl_remove(&dlmgmt_loan_avl, linkp); + linkp->ll_onloan = B_FALSE; + } + if (newzoneid != GLOBAL_ZONEID) { + if (zone_add_datalink(newzoneid, linkid) != 0) { + err = errno; + dlmgmt_log(LOG_WARNING, "unable to add link %d to zone " + "%d: %s", linkid, newzoneid, strerror(err)); + (void) zone_add_datalink(oldzoneid, linkid); + goto done; + } + avl_add(&dlmgmt_loan_avl, linkp); + linkp->ll_onloan = B_TRUE; + } + + avl_remove(&dlmgmt_name_avl, linkp); + linkp->ll_zoneid = newzoneid; + avl_add(&dlmgmt_name_avl, linkp); + +done: + dlmgmt_table_unlock(); + retvalp->lr_err = err; +} + +static void +dlmgmt_zoneboot(void *argp, void *retp, zoneid_t zoneid, ucred_t *cred) +{ + int err; + dlmgmt_door_zoneboot_t *zoneboot = argp; + dlmgmt_zoneboot_retval_t *retvalp = retp; + + dlmgmt_table_lock(B_TRUE); + + if ((err = dlmgmt_checkprivs(0, cred)) != 0) + goto done; + + if (zoneid != GLOBAL_ZONEID) { + err = EACCES; + goto done; + } + if (zoneboot->ld_zoneid == GLOBAL_ZONEID) { + err = EINVAL; + goto done; + } + + if ((err = dlmgmt_elevate_privileges()) == 0) { + err = dlmgmt_zone_init(zoneboot->ld_zoneid); + (void) dlmgmt_drop_privileges(); + } +done: + dlmgmt_table_unlock(); + retvalp->lr_err = err; +} + +static void +dlmgmt_zonehalt(void *argp, void *retp, zoneid_t zoneid, ucred_t *cred) +{ + int err = 0; + dlmgmt_door_zonehalt_t *zonehalt = argp; + dlmgmt_zonehalt_retval_t *retvalp = retp; + + if ((err = dlmgmt_checkprivs(0, cred)) == 0) { + if (zoneid != GLOBAL_ZONEID) { + err = EACCES; + } else if (zonehalt->ld_zoneid == GLOBAL_ZONEID) { + err = EINVAL; + } else { + dlmgmt_table_lock(B_TRUE); + dlmgmt_db_fini(zonehalt->ld_zoneid); + dlmgmt_table_unlock(); + } + } + retvalp->lr_err = err; +} + static dlmgmt_door_info_t i_dlmgmt_door_info_tbl[] = { - { DLMGMT_CMD_DLS_CREATE, B_TRUE, sizeof (dlmgmt_upcall_arg_create_t), + { DLMGMT_CMD_DLS_CREATE, sizeof (dlmgmt_upcall_arg_create_t), sizeof (dlmgmt_create_retval_t), dlmgmt_upcall_create }, - { DLMGMT_CMD_DLS_GETATTR, B_FALSE, sizeof (dlmgmt_upcall_arg_getattr_t), + { DLMGMT_CMD_DLS_GETATTR, sizeof (dlmgmt_upcall_arg_getattr_t), sizeof (dlmgmt_getattr_retval_t), dlmgmt_upcall_getattr }, - { DLMGMT_CMD_DLS_DESTROY, B_TRUE, sizeof (dlmgmt_upcall_arg_destroy_t), + { DLMGMT_CMD_DLS_DESTROY, sizeof (dlmgmt_upcall_arg_destroy_t), sizeof (dlmgmt_destroy_retval_t), dlmgmt_upcall_destroy }, - { DLMGMT_CMD_GETNAME, B_FALSE, sizeof (dlmgmt_door_getname_t), + { DLMGMT_CMD_GETNAME, sizeof (dlmgmt_door_getname_t), sizeof (dlmgmt_getname_retval_t), dlmgmt_getname }, - { DLMGMT_CMD_GETLINKID, B_FALSE, sizeof (dlmgmt_door_getlinkid_t), + { DLMGMT_CMD_GETLINKID, sizeof (dlmgmt_door_getlinkid_t), sizeof (dlmgmt_getlinkid_retval_t), dlmgmt_getlinkid }, - { DLMGMT_CMD_GETNEXT, B_FALSE, sizeof (dlmgmt_door_getnext_t), + { DLMGMT_CMD_GETNEXT, sizeof (dlmgmt_door_getnext_t), sizeof (dlmgmt_getnext_retval_t), dlmgmt_getnext }, - { DLMGMT_CMD_DLS_UPDATE, B_TRUE, sizeof (dlmgmt_upcall_arg_update_t), + { DLMGMT_CMD_DLS_UPDATE, sizeof (dlmgmt_upcall_arg_update_t), sizeof (dlmgmt_update_retval_t), dlmgmt_upcall_update }, - { DLMGMT_CMD_CREATE_LINKID, B_TRUE, sizeof (dlmgmt_door_createid_t), + { DLMGMT_CMD_CREATE_LINKID, sizeof (dlmgmt_door_createid_t), sizeof (dlmgmt_createid_retval_t), dlmgmt_createid }, - { DLMGMT_CMD_DESTROY_LINKID, B_TRUE, sizeof (dlmgmt_door_destroyid_t), + { DLMGMT_CMD_DESTROY_LINKID, sizeof (dlmgmt_door_destroyid_t), sizeof (dlmgmt_destroyid_retval_t), dlmgmt_destroyid }, - { DLMGMT_CMD_REMAP_LINKID, B_TRUE, sizeof (dlmgmt_door_remapid_t), + { DLMGMT_CMD_REMAP_LINKID, sizeof (dlmgmt_door_remapid_t), sizeof (dlmgmt_remapid_retval_t), dlmgmt_remapid }, - { DLMGMT_CMD_CREATECONF, B_TRUE, sizeof (dlmgmt_door_createconf_t), + { DLMGMT_CMD_CREATECONF, sizeof (dlmgmt_door_createconf_t), sizeof (dlmgmt_createconf_retval_t), dlmgmt_createconf }, - { DLMGMT_CMD_READCONF, B_FALSE, sizeof (dlmgmt_door_readconf_t), + { DLMGMT_CMD_READCONF, sizeof (dlmgmt_door_readconf_t), sizeof (dlmgmt_readconf_retval_t), dlmgmt_readconf }, - { DLMGMT_CMD_WRITECONF, B_TRUE, sizeof (dlmgmt_door_writeconf_t), + { DLMGMT_CMD_WRITECONF, sizeof (dlmgmt_door_writeconf_t), sizeof (dlmgmt_writeconf_retval_t), dlmgmt_writeconf }, - { DLMGMT_CMD_UP_LINKID, B_TRUE, sizeof (dlmgmt_door_upid_t), + { DLMGMT_CMD_UP_LINKID, sizeof (dlmgmt_door_upid_t), sizeof (dlmgmt_upid_retval_t), dlmgmt_upid }, - { DLMGMT_CMD_SETATTR, B_TRUE, sizeof (dlmgmt_door_setattr_t), + { DLMGMT_CMD_SETATTR, sizeof (dlmgmt_door_setattr_t), sizeof (dlmgmt_setattr_retval_t), dlmgmt_setattr }, - { DLMGMT_CMD_UNSETATTR, B_TRUE, sizeof (dlmgmt_door_unsetattr_t), + { DLMGMT_CMD_UNSETATTR, sizeof (dlmgmt_door_unsetattr_t), sizeof (dlmgmt_unsetattr_retval_t), dlmgmt_unsetconfattr }, - { DLMGMT_CMD_REMOVECONF, B_TRUE, sizeof (dlmgmt_door_removeconf_t), + { DLMGMT_CMD_REMOVECONF, sizeof (dlmgmt_door_removeconf_t), sizeof (dlmgmt_removeconf_retval_t), dlmgmt_removeconf }, - { DLMGMT_CMD_DESTROYCONF, B_TRUE, sizeof (dlmgmt_door_destroyconf_t), + { DLMGMT_CMD_DESTROYCONF, sizeof (dlmgmt_door_destroyconf_t), sizeof (dlmgmt_destroyconf_retval_t), dlmgmt_destroyconf }, - { DLMGMT_CMD_GETATTR, B_FALSE, sizeof (dlmgmt_door_getattr_t), + { DLMGMT_CMD_GETATTR, sizeof (dlmgmt_door_getattr_t), sizeof (dlmgmt_getattr_retval_t), dlmgmt_getattr }, - { DLMGMT_CMD_LINKPROP_INIT, B_TRUE, - sizeof (dlmgmt_door_linkprop_init_t), + { DLMGMT_CMD_LINKPROP_INIT, sizeof (dlmgmt_door_linkprop_init_t), sizeof (dlmgmt_linkprop_init_retval_t), dlmgmt_upcall_linkprop_init }, - { DLMGMT_CMD_LINKPROP_GETNEXT, B_FALSE, - sizeof (dlmgmt_door_linkprop_getnext_t), + { DLMGMT_CMD_LINKPROP_GETNEXT, sizeof (dlmgmt_door_linkprop_getnext_t), sizeof (dlmgmt_linkprop_getnext_retval_t), - dlmgmt_linkprop_getnext } + dlmgmt_linkprop_getnext }, + { DLMGMT_CMD_SETZONEID, sizeof (dlmgmt_door_setzoneid_t), + sizeof (dlmgmt_setzoneid_retval_t), dlmgmt_setzoneid }, + { DLMGMT_CMD_ZONEBOOT, sizeof (dlmgmt_door_zoneboot_t), + sizeof (dlmgmt_zoneboot_retval_t), dlmgmt_zoneboot }, + { DLMGMT_CMD_ZONEHALT, sizeof (dlmgmt_door_zonehalt_t), + sizeof (dlmgmt_zonehalt_retval_t), dlmgmt_zonehalt }, + { 0, 0, 0, NULL } }; -#define DLMGMT_INFO_TABLE_SIZE (sizeof (i_dlmgmt_door_info_tbl) / \ - sizeof (i_dlmgmt_door_info_tbl[0])) +static dlmgmt_door_info_t * +dlmgmt_getcmdinfo(int cmd) +{ + dlmgmt_door_info_t *infop = i_dlmgmt_door_info_tbl; + + while (infop->di_handler != NULL) { + if (infop->di_cmd == cmd) + break; + infop++; + } + return (infop); +} /* ARGSUSED */ void dlmgmt_handler(void *cookie, char *argp, size_t argsz, door_desc_t *dp, uint_t n_desc) { + dlmgmt_door_arg_t *door_arg = (dlmgmt_door_arg_t *)(void *)argp; dlmgmt_door_info_t *infop = NULL; dlmgmt_retval_t retval; + ucred_t *cred = NULL; + zoneid_t zoneid; void *retvalp; int err = 0; - int i; - - for (i = 0; i < DLMGMT_INFO_TABLE_SIZE; i++) { - if (i_dlmgmt_door_info_tbl[i].di_cmd == - ((dlmgmt_door_arg_t *)(void *)argp)->ld_cmd) { - infop = i_dlmgmt_door_info_tbl + i; - break; - } - } + infop = dlmgmt_getcmdinfo(door_arg->ld_cmd); if (infop == NULL || argsz != infop->di_reqsz) { err = EINVAL; - goto fail; + goto done; } - if (infop->di_set) { - ucred_t *cred = NULL; - const priv_set_t *eset; - - if (door_ucred(&cred) != 0) { - err = errno; - goto fail; - } - - eset = ucred_getprivset(cred, PRIV_EFFECTIVE); - if ((eset == NULL) || - (!priv_ismember(eset, PRIV_SYS_DL_CONFIG) && - !priv_ismember(eset, PRIV_SYS_NET_CONFIG))) { - err = EACCES; - } - ucred_free(cred); - if (err != 0) - goto fail; + if (door_ucred(&cred) != 0 || (zoneid = ucred_getzoneid(cred)) == -1) { + err = errno; + goto done; } /* @@ -1114,11 +1333,15 @@ dlmgmt_handler(void *cookie, char *argp, size_t argsz, door_desc_t *dp, * memory allocated by malloc() would get leaked. Use alloca() instead. */ retvalp = alloca(infop->di_acksz); - infop->di_handler(argp, retvalp); - (void) door_return(retvalp, infop->di_acksz, NULL, 0); - return; + infop->di_handler(argp, retvalp, zoneid, cred); -fail: - retval.lr_err = err; - (void) door_return((char *)&retval, sizeof (retval), NULL, 0); +done: + if (cred != NULL) + ucred_free(cred); + if (err == 0) { + (void) door_return(retvalp, infop->di_acksz, NULL, 0); + } else { + retval.lr_err = err; + (void) door_return((char *)&retval, sizeof (retval), NULL, 0); + } } diff --git a/usr/src/cmd/dlmgmtd/dlmgmt_impl.h b/usr/src/cmd/dlmgmtd/dlmgmt_impl.h index 237910ede6..eb7f6410b2 100644 --- a/usr/src/cmd/dlmgmtd/dlmgmt_impl.h +++ b/usr/src/cmd/dlmgmtd/dlmgmt_impl.h @@ -61,8 +61,11 @@ typedef struct dlmgmt_link_s { datalink_class_t ll_class; uint32_t ll_media; datalink_id_t ll_linkid; - avl_node_t ll_node_by_name; - avl_node_t ll_node_by_id; + zoneid_t ll_zoneid; + boolean_t ll_onloan; + avl_node_t ll_name_node; + avl_node_t ll_id_node; + avl_node_t ll_loan_node; uint32_t ll_flags; uint32_t ll_gen; /* generation number */ } dlmgmt_link_t; @@ -77,21 +80,25 @@ typedef struct dlmgmt_dlconf_s { datalink_class_t ld_class; uint32_t ld_media; int ld_id; + zoneid_t ld_zoneid; uint32_t ld_gen; avl_node_t ld_node; } dlmgmt_dlconf_t; extern boolean_t debug; extern const char *progname; +extern char cachefile[]; extern dladm_handle_t dld_handle; - +extern datalink_id_t dlmgmt_nextlinkid; extern avl_tree_t dlmgmt_name_avl; extern avl_tree_t dlmgmt_id_avl; +extern avl_tree_t dlmgmt_loan_avl; extern avl_tree_t dlmgmt_dlconf_avl; boolean_t linkattr_equal(dlmgmt_linkattr_t **, const char *, void *, size_t); -int linkattr_unset(dlmgmt_linkattr_t **, const char *); +dlmgmt_linkattr_t *linkattr_find(dlmgmt_linkattr_t *, const char *); +void linkattr_unset(dlmgmt_linkattr_t **, const char *); int linkattr_set(dlmgmt_linkattr_t **, const char *, void *, size_t, dladm_datatype_t); int linkattr_get(dlmgmt_linkattr_t **, const char *, void **, @@ -100,12 +107,14 @@ int linkprop_getnext(dlmgmt_linkattr_t **, const char *, char **, void **, size_t *, dladm_datatype_t *); void link_destroy(dlmgmt_link_t *); -dlmgmt_link_t *link_by_id(datalink_id_t); -dlmgmt_link_t *link_by_name(const char *); +int link_activate(dlmgmt_link_t *); +boolean_t link_is_visible(dlmgmt_link_t *, zoneid_t); +dlmgmt_link_t *link_by_id(datalink_id_t, zoneid_t); +dlmgmt_link_t *link_by_name(const char *, zoneid_t); int dlmgmt_create_common(const char *, datalink_class_t, - uint32_t, uint32_t, dlmgmt_link_t **); + uint32_t, zoneid_t, uint32_t, dlmgmt_link_t **); int dlmgmt_destroy_common(dlmgmt_link_t *, uint32_t); -void dlmgmt_getattr_common(dlmgmt_linkattr_t **, const char *, +int dlmgmt_getattr_common(dlmgmt_linkattr_t **, const char *, dlmgmt_getattr_retval_t *); void dlmgmt_advance(dlmgmt_link_t *); @@ -113,24 +122,26 @@ void dlmgmt_table_lock(boolean_t); void dlmgmt_table_unlock(); int dlconf_create(const char *, datalink_id_t, datalink_class_t, - uint32_t, dlmgmt_dlconf_t **); + uint32_t, zoneid_t, dlmgmt_dlconf_t **); void dlconf_destroy(dlmgmt_dlconf_t *); void dlmgmt_advance_dlconfid(dlmgmt_dlconf_t *); void dlmgmt_dlconf_table_lock(boolean_t); void dlmgmt_dlconf_table_unlock(void); -int dlmgmt_generate_name(const char *, char *, size_t); +int dlmgmt_generate_name(const char *, char *, size_t, zoneid_t); -int dlmgmt_linktable_init(void); +void dlmgmt_linktable_init(void); void dlmgmt_linktable_fini(void); +int dlmgmt_zone_init(zoneid_t); +int dlmgmt_elevate_privileges(void); +int dlmgmt_drop_privileges(); void dlmgmt_handler(void *, char *, size_t, door_desc_t *, uint_t); void dlmgmt_log(int, const char *, ...); -int dlmgmt_write_db_entry(datalink_id_t, uint32_t); -int dlmgmt_delete_db_entry(datalink_id_t, uint32_t); -int dlmgmt_db_init(void); - -#define DLMGMT_TMPFS_DIR "/etc/svc/volatile/dladm" +int dlmgmt_write_db_entry(const char *, dlmgmt_link_t *, uint32_t); +int dlmgmt_delete_db_entry(dlmgmt_link_t *, uint32_t); +int dlmgmt_db_init(zoneid_t); +void dlmgmt_db_fini(zoneid_t); #ifdef __cplusplus } diff --git a/usr/src/cmd/dlmgmtd/dlmgmt_main.c b/usr/src/cmd/dlmgmtd/dlmgmt_main.c index 5a27fdc2c2..7b97664792 100644 --- a/usr/src/cmd/dlmgmtd/dlmgmt_main.c +++ b/usr/src/cmd/dlmgmtd/dlmgmt_main.c @@ -41,12 +41,13 @@ #include <assert.h> #include <errno.h> #include <fcntl.h> -#include <priv_utils.h> +#include <priv.h> #include <signal.h> #include <stdlib.h> #include <stdio.h> #include <strings.h> #include <syslog.h> +#include <zone.h> #include <sys/dld.h> #include <sys/dld_ioc.h> #include <sys/param.h> @@ -66,19 +67,18 @@ static int pfds[2]; static int dlmgmt_door_fd = -1; /* - * This libdladm handle is global so that dlmgmt_upcall_linkprop_init() - * can pass to libdladm. The handle is opened during dlmgmt_init_privileges() - * with "ALL" privileges. It is not able to open DLMGMT_DOOR at that time as - * it hasn't been created yet. This door in the handle is opened in the first - * call to dladm_door_fd(). + * This libdladm handle is global so that dlmgmt_upcall_linkprop_init() can + * pass to libdladm. The handle is opened with "ALL" privileges, before + * privileges are dropped in dlmgmt_drop_privileges(). It is not able to open + * DLMGMT_DOOR at that time as it hasn't been created yet. This door in the + * handle is opened in the first call to dladm_door_fd(). */ dladm_handle_t dld_handle = NULL; static void dlmgmtd_exit(int); static int dlmgmt_init(); static void dlmgmt_fini(); -static int dlmgmt_init_privileges(); -static void dlmgmt_fini_privileges(); +static int dlmgmt_set_privileges(); static int dlmgmt_set_doorfd(boolean_t start) @@ -97,73 +97,162 @@ dlmgmt_set_doorfd(boolean_t start) } static int -dlmgmt_door_init() +dlmgmt_door_init(void) { - int fd; - int err; + int err = 0; - /* - * Create the door file for dlmgmtd. - */ - if ((fd = open(DLMGMT_DOOR, O_CREAT|O_RDONLY, 0644)) == -1) { + if ((dlmgmt_door_fd = door_create(dlmgmt_handler, NULL, + DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) == -1) { err = errno; - dlmgmt_log(LOG_ERR, "open(%s) failed: %s", - DLMGMT_DOOR, strerror(err)); + dlmgmt_log(LOG_ERR, "door_create() failed: %s", + strerror(err)); return (err); } - (void) close(fd); + return (err); +} - if ((dlmgmt_door_fd = door_create(dlmgmt_handler, NULL, - DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) == -1) { +static void +dlmgmt_door_fini(void) +{ + if (dlmgmt_door_fd == -1) + return; + + if (door_revoke(dlmgmt_door_fd) == -1) { + dlmgmt_log(LOG_WARNING, "door_revoke(%s) failed: %s", + DLMGMT_DOOR, strerror(errno)); + } + (void) dlmgmt_set_doorfd(B_FALSE); + dlmgmt_door_fd = -1; +} + +int +dlmgmt_door_attach(zoneid_t zoneid, char *rootdir) +{ + int fd; + int err = 0; + char doorpath[MAXPATHLEN]; + + (void) snprintf(doorpath, sizeof (doorpath), "%s%s", rootdir, + DLMGMT_DOOR); + + /* + * Create the door file for dlmgmtd. + */ + if ((fd = open(doorpath, O_CREAT|O_RDONLY, 0644)) == -1) { err = errno; - dlmgmt_log(LOG_ERR, "door_create() failed: %s", + dlmgmt_log(LOG_ERR, "open(%s) failed: %s", doorpath, strerror(err)); return (err); } + (void) close(fd); + if (chown(doorpath, UID_DLADM, GID_SYS) == -1) + return (errno); + /* * fdetach first in case a previous daemon instance exited * ungracefully. */ - (void) fdetach(DLMGMT_DOOR); - if (fattach(dlmgmt_door_fd, DLMGMT_DOOR) != 0) { + (void) fdetach(doorpath); + if (fattach(dlmgmt_door_fd, doorpath) != 0) { err = errno; - dlmgmt_log(LOG_ERR, "fattach(%s) failed: %s", - DLMGMT_DOOR, strerror(err)); - goto fail; - } - if ((err = dlmgmt_set_doorfd(B_TRUE)) != 0) { - dlmgmt_log(LOG_ERR, "cannot set kernel doorfd: %s", + dlmgmt_log(LOG_ERR, "fattach(%s) failed: %s", doorpath, strerror(err)); - (void) fdetach(DLMGMT_DOOR); - goto fail; + } else if (zoneid == GLOBAL_ZONEID) { + if ((err = dlmgmt_set_doorfd(B_TRUE)) != 0) { + dlmgmt_log(LOG_ERR, "cannot set kernel doorfd: %s", + strerror(err)); + } } - return (0); -fail: - (void) door_revoke(dlmgmt_door_fd); - dlmgmt_door_fd = -1; return (err); } -static void -dlmgmt_door_fini() +/* + * Create the /etc/svc/volatile/dladm/ directory if it doesn't exist, load the + * datalink.conf data for this zone, and create/attach the door rendezvous + * file. + */ +int +dlmgmt_zone_init(zoneid_t zoneid) { - if (dlmgmt_door_fd == -1) - return; + char rootdir[MAXPATHLEN], tmpfsdir[MAXPATHLEN]; + int err; + struct stat statbuf; + + if (zoneid == GLOBAL_ZONEID) { + rootdir[0] = '\0'; + } else if (zone_getattr(zoneid, ZONE_ATTR_ROOT, rootdir, + sizeof (rootdir)) < 0) { + return (errno); + } - if (door_revoke(dlmgmt_door_fd) == -1) { - dlmgmt_log(LOG_WARNING, "door_revoke(%s) failed: %s", - DLMGMT_DOOR, strerror(errno)); + /* + * Create the DLMGMT_TMPFS_DIR directory. + */ + (void) snprintf(tmpfsdir, sizeof (tmpfsdir), "%s%s", rootdir, + DLMGMT_TMPFS_DIR); + if (stat(tmpfsdir, &statbuf) < 0) { + if (mkdir(tmpfsdir, (mode_t)0755) < 0) + return (errno); + } else if ((statbuf.st_mode & S_IFMT) != S_IFDIR) { + return (ENOTDIR); } - (void) fdetach(DLMGMT_DOOR); - (void) dlmgmt_set_doorfd(B_FALSE); + if ((chmod(tmpfsdir, 0755) < 0) || + (chown(tmpfsdir, UID_DLADM, GID_SYS) < 0)) { + return (EPERM); + } + + if ((err = dlmgmt_db_init(zoneid)) != 0) + return (err); + return (dlmgmt_door_attach(zoneid, rootdir)); } +/* + * Initialize each running zone. + */ static int -dlmgmt_init() +dlmgmt_allzones_init(void) { - int err; + int err, i; + zoneid_t *zids = NULL; + uint_t nzids, nzids_saved; + + if (zone_list(NULL, &nzids) != 0) + return (errno); +again: + nzids *= 2; + if ((zids = malloc(nzids * sizeof (zoneid_t))) == NULL) + return (errno); + nzids_saved = nzids; + if (zone_list(zids, &nzids) != 0) { + free(zids); + return (errno); + } + if (nzids > nzids_saved) { + free(zids); + goto again; + } + + for (i = 0; i < nzids; i++) { + if ((err = dlmgmt_zone_init(zids[i])) != 0) + break; + } + free(zids); + return (err); +} + +static int +dlmgmt_init(void) +{ + int err; + char *fmri, *c; + char filename[MAXPATHLEN]; + + if (dladm_open(&dld_handle) != DLADM_STATUS_OK) { + dlmgmt_log(LOG_ERR, "dladm_open() failed"); + return (EPERM); + } if (signal(SIGTERM, dlmgmtd_exit) == SIG_ERR || signal(SIGINT, dlmgmtd_exit) == SIG_ERR) { @@ -173,20 +262,66 @@ dlmgmt_init() return (err); } - if ((err = dlmgmt_linktable_init()) != 0) - return (err); + /* + * First derive the name of the cache file from the FMRI name. This + * cache name is used to keep active datalink configuration. + */ + if (debug) { + (void) snprintf(cachefile, MAXPATHLEN, "%s/%s%s", + DLMGMT_TMPFS_DIR, progname, ".debug.cache"); + } else { + if ((fmri = getenv("SMF_FMRI")) == NULL) { + dlmgmt_log(LOG_ERR, "dlmgmtd is an smf(5) managed " + "service and should not be run from the command " + "line."); + return (EINVAL); + } - if ((err = dlmgmt_db_init()) != 0 || (err = dlmgmt_door_init()) != 0) - dlmgmt_linktable_fini(); + /* + * The FMRI name is in the form of + * svc:/service/service:instance. We need to remove the + * prefix "svc:/" and replace '/' with '-'. The cache file + * name is in the form of "service:instance.cache". + */ + if ((c = strchr(fmri, '/')) != NULL) + c++; + else + c = fmri; + (void) snprintf(filename, MAXPATHLEN, "%s.cache", c); + c = filename; + while ((c = strchr(c, '/')) != NULL) + *c = '-'; + + (void) snprintf(cachefile, MAXPATHLEN, "%s/%s", + DLMGMT_TMPFS_DIR, filename); + } + + dlmgmt_linktable_init(); + if ((err = dlmgmt_door_init()) != 0) + goto done; + + /* + * Load datalink configuration and create dlmgmtd door files for all + * currently running zones. + */ + if ((err = dlmgmt_allzones_init()) != 0) + dlmgmt_door_fini(); +done: + if (err != 0) + dlmgmt_linktable_fini(); return (err); } static void -dlmgmt_fini() +dlmgmt_fini(void) { dlmgmt_door_fini(); dlmgmt_linktable_fini(); + if (dld_handle != NULL) { + dladm_close(dld_handle); + dld_handle = NULL; + } } /* @@ -214,7 +349,6 @@ dlmgmtd_exit(int signo) { (void) close(pfds[1]); dlmgmt_fini(); - dlmgmt_fini_privileges(); exit(EXIT_FAILURE); } @@ -226,65 +360,76 @@ usage(void) } /* - * Set the uid of this daemon to the "dladm" user. Finish the following - * operations before setuid() because they need root privileges: - * - * - create the /etc/svc/volatile/dladm directory; - * - change its uid/gid to "dladm"/"sys"; - * - open the dld control node + * Restrict privileges to only those needed. */ -static int -dlmgmt_init_privileges() +int +dlmgmt_drop_privileges(void) { - struct stat statbuf; - - /* - * Create the DLMGMT_TMPFS_DIR directory. - */ - if (stat(DLMGMT_TMPFS_DIR, &statbuf) < 0) { - if (mkdir(DLMGMT_TMPFS_DIR, (mode_t)0755) < 0) - return (errno); - } else { - if ((statbuf.st_mode & S_IFMT) != S_IFDIR) - return (ENOTDIR); - } - - if ((chmod(DLMGMT_TMPFS_DIR, 0755) < 0) || - (chown(DLMGMT_TMPFS_DIR, UID_DLADM, GID_SYS) < 0)) { - return (EPERM); - } + priv_set_t *pset; + priv_ptype_t ptype; + zoneid_t zoneid = getzoneid(); + int err = 0; - /* - * When dlmgmtd is started at boot, "ALL" privilege is required - * to open the dld control node. The door isn't created yet. - */ - if (dladm_open(&dld_handle) != DLADM_STATUS_OK) { - dlmgmt_log(LOG_ERR, "dladm_open() failed"); - return (EPERM); - } + if ((pset = priv_allocset()) == NULL) + return (errno); /* - * We need PRIV_SYS_DL_CONFIG for the DLDIOC_DOORSERVER ioctl, - * and PRIV_SYS_CONFIG to post sysevents. + * The global zone needs PRIV_PROC_FORK so that it can fork() when it + * issues db ops in non-global zones, PRIV_SYS_CONFIG to post + * sysevents, and PRIV_SYS_DL_CONFIG to initialize link properties in + * dlmgmt_upcall_linkprop_init(). + * + * We remove all privileges from the permitted (and thus effective) + * set in the non-global zone. When executing in a non-global zone, + * dlmgmtd only needs to read and write to files that it already owns. */ - if (__init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, UID_DLADM, - GID_SYS, PRIV_SYS_DL_CONFIG, PRIV_SYS_CONFIG, NULL) == -1) { - dladm_close(dld_handle); - dld_handle = NULL; - return (EPERM); + priv_emptyset(pset); + if (zoneid == GLOBAL_ZONEID) { + ptype = PRIV_EFFECTIVE; + if (priv_addset(pset, PRIV_PROC_FORK) == -1 || + priv_addset(pset, PRIV_SYS_CONFIG) == -1 || + priv_addset(pset, PRIV_SYS_DL_CONFIG) == -1) + err = errno; + } else { + ptype = PRIV_PERMITTED; } + if (err == 0 && setppriv(PRIV_SET, ptype, pset) == -1) + err = errno; +done: + priv_freeset(pset); + return (err); +} +int +dlmgmt_elevate_privileges(void) +{ + priv_set_t *privset; + int err = 0; - return (0); + if ((privset = priv_str_to_set("zone", ",", NULL)) == NULL) + return (errno); + if (setppriv(PRIV_SET, PRIV_EFFECTIVE, privset) == -1) + err = errno; + priv_freeset(privset); + return (err); } -static void -dlmgmt_fini_privileges() +/* + * Set the uid of this daemon to the "dladm" user and drop privileges to only + * those needed. + */ +static int +dlmgmt_set_privileges(void) { - if (dld_handle != NULL) { - dladm_close(dld_handle); - dld_handle = NULL; - } + int err; + + (void) setgroups(0, NULL); + if (setegid(GID_SYS) == -1 || seteuid(UID_DLADM) == -1) + err = errno; + else + err = dlmgmt_drop_privileges(); +done: + return (err); } /* @@ -347,7 +492,7 @@ dlmgmt_daemonize(void) int main(int argc, char *argv[]) { - int opt; + int opt, err; progname = strrchr(argv[0], '/'); if (progname != NULL) @@ -371,14 +516,14 @@ main(int argc, char *argv[]) if (!debug && !dlmgmt_daemonize()) return (EXIT_FAILURE); - if ((errno = dlmgmt_init_privileges()) != 0) { - dlmgmt_log(LOG_ERR, "dlmgmt_init_privileges() failed: %s", - strerror(errno)); + if ((err = dlmgmt_init()) != 0) { + dlmgmt_log(LOG_ERR, "unable to initialize daemon: %s", + strerror(err)); goto child_out; - } - - if (dlmgmt_init() != 0) { - dlmgmt_fini_privileges(); + } else if ((err = dlmgmt_set_privileges()) != 0) { + dlmgmt_log(LOG_ERR, "unable to set daemon privileges: %s", + strerror(err)); + dlmgmt_fini(); goto child_out; } diff --git a/usr/src/cmd/dlmgmtd/dlmgmt_util.c b/usr/src/cmd/dlmgmtd/dlmgmt_util.c index 87d2ed394f..58e44b9182 100644 --- a/usr/src/cmd/dlmgmtd/dlmgmt_util.c +++ b/usr/src/cmd/dlmgmtd/dlmgmt_util.c @@ -33,20 +33,26 @@ #include <stddef.h> #include <stdlib.h> #include <stdio.h> +#include <errno.h> #include <strings.h> +#include <string.h> #include <syslog.h> #include <stdarg.h> +#include <zone.h> #include <errno.h> #include <libdlpi.h> #include "dlmgmt_impl.h" /* - * There are two datalink AVL tables. One table (dlmgmt_name_avl) is keyed by - * the link name, and the other (dlmgmt_id_avl) is keyed by the link id. - * Each link will be present in both tables. + * There are three datalink AVL tables. The dlmgmt_name_avl tree contains all + * datalinks and is keyed by zoneid and link name. The dlmgmt_id_avl also + * contains all datalinks, and it is keyed by link ID. The dlmgmt_loan_avl is + * keyed by link name, and contains the set of global-zone links that are + * currently on loan to non-global zones. */ avl_tree_t dlmgmt_name_avl; avl_tree_t dlmgmt_id_avl; +avl_tree_t dlmgmt_loan_avl; avl_tree_t dlmgmt_dlconf_avl; @@ -58,20 +64,14 @@ static pthread_rwlock_t dlmgmt_dlconf_lock = PTHREAD_RWLOCK_INITIALIZER; typedef struct dlmgmt_prefix { struct dlmgmt_prefix *lp_next; char lp_prefix[MAXLINKNAMELEN]; + zoneid_t lp_zoneid; uint_t lp_nextppa; } dlmgmt_prefix_t; -static dlmgmt_prefix_t *dlmgmt_prefixlist; +static dlmgmt_prefix_t dlmgmt_prefixlist; -static datalink_id_t dlmgmt_nextlinkid; +datalink_id_t dlmgmt_nextlinkid; static datalink_id_t dlmgmt_nextconfid = 1; -static int linkattr_add(dlmgmt_linkattr_t **, - dlmgmt_linkattr_t *); -static int linkattr_rm(dlmgmt_linkattr_t **, - dlmgmt_linkattr_t *); -static int link_create(const char *, datalink_class_t, uint32_t, - uint32_t, dlmgmt_link_t **); - static void dlmgmt_advance_linkid(dlmgmt_link_t *); static void dlmgmt_advance_ppa(dlmgmt_link_t *); @@ -101,6 +101,24 @@ cmp_link_by_name(const void *v1, const void *v2) return ((cmp == 0) ? 0 : ((cmp < 0) ? -1 : 1)); } +/* + * Note that the zoneid associated with a link is effectively part of its + * name. This is essentially what results in having each zone have disjoint + * datalink namespaces. + */ +static int +cmp_link_by_zname(const void *v1, const void *v2) +{ + const dlmgmt_link_t *link1 = v1; + const dlmgmt_link_t *link2 = v2; + + if (link1->ll_zoneid < link2->ll_zoneid) + return (-1); + if (link1->ll_zoneid > link2->ll_zoneid) + return (1); + return (cmp_link_by_name(link1, link2)); +} + static int cmp_link_by_id(const void *v1, const void *v2) { @@ -129,49 +147,46 @@ cmp_dlconf_by_id(const void *v1, const void *v2) return (1); } -int -dlmgmt_linktable_init() +void +dlmgmt_linktable_init(void) { /* - * Initialize the prefix list. First add the "net" prefix to the list. + * Initialize the prefix list. First add the "net" prefix for the + * global zone to the list. */ - dlmgmt_prefixlist = malloc(sizeof (dlmgmt_prefix_t)); - if (dlmgmt_prefixlist == NULL) { - dlmgmt_log(LOG_WARNING, "dlmgmt_linktable_init() failed: %s", - strerror(ENOMEM)); - return (ENOMEM); - } - - dlmgmt_prefixlist->lp_next = NULL; - dlmgmt_prefixlist->lp_nextppa = 0; - (void) strlcpy(dlmgmt_prefixlist->lp_prefix, "net", MAXLINKNAMELEN); + dlmgmt_prefixlist.lp_next = NULL; + dlmgmt_prefixlist.lp_zoneid = GLOBAL_ZONEID; + dlmgmt_prefixlist.lp_nextppa = 0; + (void) strlcpy(dlmgmt_prefixlist.lp_prefix, "net", MAXLINKNAMELEN); - avl_create(&dlmgmt_name_avl, cmp_link_by_name, sizeof (dlmgmt_link_t), - offsetof(dlmgmt_link_t, ll_node_by_name)); + avl_create(&dlmgmt_name_avl, cmp_link_by_zname, sizeof (dlmgmt_link_t), + offsetof(dlmgmt_link_t, ll_name_node)); avl_create(&dlmgmt_id_avl, cmp_link_by_id, sizeof (dlmgmt_link_t), - offsetof(dlmgmt_link_t, ll_node_by_id)); + offsetof(dlmgmt_link_t, ll_id_node)); + avl_create(&dlmgmt_loan_avl, cmp_link_by_name, sizeof (dlmgmt_link_t), + offsetof(dlmgmt_link_t, ll_loan_node)); avl_create(&dlmgmt_dlconf_avl, cmp_dlconf_by_id, sizeof (dlmgmt_dlconf_t), offsetof(dlmgmt_dlconf_t, ld_node)); dlmgmt_nextlinkid = 1; - return (0); } void -dlmgmt_linktable_fini() +dlmgmt_linktable_fini(void) { - dlmgmt_prefix_t *lpp, *next; + dlmgmt_prefix_t *lpp, *next; - for (lpp = dlmgmt_prefixlist; lpp != NULL; lpp = next) { + for (lpp = dlmgmt_prefixlist.lp_next; lpp != NULL; lpp = next) { next = lpp->lp_next; free(lpp); } avl_destroy(&dlmgmt_dlconf_avl); avl_destroy(&dlmgmt_name_avl); + avl_destroy(&dlmgmt_loan_avl); avl_destroy(&dlmgmt_id_avl); } -static int +static void linkattr_add(dlmgmt_linkattr_t **headp, dlmgmt_linkattr_t *attrp) { if (*headp == NULL) { @@ -181,10 +196,9 @@ linkattr_add(dlmgmt_linkattr_t **headp, dlmgmt_linkattr_t *attrp) attrp->lp_next = *headp; *headp = attrp; } - return (0); } -static int +static void linkattr_rm(dlmgmt_linkattr_t **headp, dlmgmt_linkattr_t *attrp) { dlmgmt_linkattr_t *next, *prev; @@ -197,8 +211,18 @@ linkattr_rm(dlmgmt_linkattr_t **headp, dlmgmt_linkattr_t *attrp) prev->lp_next = next; else *headp = next; +} - return (0); +dlmgmt_linkattr_t * +linkattr_find(dlmgmt_linkattr_t *headp, const char *attr) +{ + dlmgmt_linkattr_t *attrp; + + for (attrp = headp; attrp != NULL; attrp = attrp->lp_next) { + if (strcmp(attrp->lp_name, attr) == 0) + break; + } + return (attrp); } int @@ -206,24 +230,17 @@ linkattr_set(dlmgmt_linkattr_t **headp, const char *attr, void *attrval, size_t attrsz, dladm_datatype_t type) { dlmgmt_linkattr_t *attrp; - int err; - - /* - * See whether the attr is already set. - */ - for (attrp = *headp; attrp != NULL; attrp = attrp->lp_next) { - if (strcmp(attrp->lp_name, attr) == 0) - break; - } + void *newval; + boolean_t new; + attrp = linkattr_find(*headp, attr); if (attrp != NULL) { /* * It is already set. If the value changed, update it. */ if (linkattr_equal(headp, attr, attrval, attrsz)) return (0); - - free(attrp->lp_val); + new = B_FALSE; } else { /* * It is not set yet, allocate the linkattr and prepend to the @@ -232,73 +249,43 @@ linkattr_set(dlmgmt_linkattr_t **headp, const char *attr, void *attrval, if ((attrp = calloc(1, sizeof (dlmgmt_linkattr_t))) == NULL) return (ENOMEM); - if ((err = linkattr_add(headp, attrp)) != 0) { - free(attrp); - return (err); - } (void) strlcpy(attrp->lp_name, attr, MAXLINKATTRLEN); + new = B_TRUE; } - if ((attrp->lp_val = calloc(1, attrsz)) == NULL) { - (void) linkattr_rm(headp, attrp); - free(attrp); + if ((newval = calloc(1, attrsz)) == NULL) { + if (new) + free(attrp); return (ENOMEM); } + if (!new) + free(attrp->lp_val); + attrp->lp_val = newval; bcopy(attrval, attrp->lp_val, attrsz); attrp->lp_sz = attrsz; attrp->lp_type = type; attrp->lp_linkprop = dladm_attr_is_linkprop(attr); + if (new) + linkattr_add(headp, attrp); return (0); } -int +void linkattr_unset(dlmgmt_linkattr_t **headp, const char *attr) { - dlmgmt_linkattr_t *attrp, *prev; - - /* - * See whether the attr exists. - */ - for (prev = NULL, attrp = *headp; attrp != NULL; - prev = attrp, attrp = attrp->lp_next) { - if (strcmp(attrp->lp_name, attr) == 0) - break; - } - - /* - * This attribute is not set in the first place. Return success. - */ - if (attrp == NULL) - return (0); - - /* - * Remove this attr from the list. - */ - if (prev == NULL) - *headp = attrp->lp_next; - else - prev->lp_next = attrp->lp_next; + dlmgmt_linkattr_t *attrp; - free(attrp->lp_val); - free(attrp); - return (0); + if ((attrp = linkattr_find(*headp, attr)) != NULL) + linkattr_rm(headp, attrp); } int linkattr_get(dlmgmt_linkattr_t **headp, const char *attr, void **attrvalp, size_t *attrszp, dladm_datatype_t *typep) { - dlmgmt_linkattr_t *attrp = *headp; + dlmgmt_linkattr_t *attrp; - /* - * find the specific attr. - */ - for (attrp = *headp; attrp != NULL; attrp = attrp->lp_next) { - if (strcmp(attrp->lp_name, attr) == 0) - break; - } - - if (attrp == NULL) + if ((attrp = linkattr_find(*headp, attr)) == NULL) return (ENOENT); *attrvalp = attrp->lp_val; @@ -369,7 +356,7 @@ dlmgmt_table_lock(boolean_t write) } void -dlmgmt_table_unlock() +dlmgmt_table_unlock(void) { (void) pthread_rwlock_unlock(&dlmgmt_avl_lock); (void) pthread_mutex_lock(&dlmgmt_avl_mutex); @@ -377,34 +364,6 @@ dlmgmt_table_unlock() (void) pthread_mutex_unlock(&dlmgmt_avl_mutex); } -static int -link_create(const char *name, datalink_class_t class, uint32_t media, - uint32_t flags, dlmgmt_link_t **linkpp) -{ - dlmgmt_link_t *linkp = NULL; - int err = 0; - - if (dlmgmt_nextlinkid == DATALINK_INVALID_LINKID) { - err = ENOSPC; - goto done; - } - - if ((linkp = calloc(1, sizeof (dlmgmt_link_t))) == NULL) { - err = ENOMEM; - goto done; - } - - (void) strlcpy(linkp->ll_link, name, MAXLINKNAMELEN); - linkp->ll_class = class; - linkp->ll_media = media; - linkp->ll_linkid = dlmgmt_nextlinkid; - linkp->ll_flags = flags; - linkp->ll_gen = 0; -done: - *linkpp = linkp; - return (err); -} - void link_destroy(dlmgmt_link_t *linkp) { @@ -418,56 +377,129 @@ link_destroy(dlmgmt_link_t *linkp) free(linkp); } +/* + * Set the DLMGMT_ACTIVE flag on the link to note that it is active. When a + * link becomes active and it belongs to a non-global zone, it is also added + * to that zone. + */ +int +link_activate(dlmgmt_link_t *linkp) +{ + int err = 0; + zoneid_t zoneid; + + if (zone_check_datalink(&zoneid, linkp->ll_linkid) == 0) { + /* + * This link was already added to a non-global zone. This can + * happen if dlmgmtd is restarted. + */ + if (zoneid != linkp->ll_zoneid) { + if (link_by_name(linkp->ll_link, zoneid) != NULL) { + err = EEXIST; + goto done; + } + avl_remove(&dlmgmt_name_avl, linkp); + linkp->ll_zoneid = zoneid; + avl_add(&dlmgmt_name_avl, linkp); + avl_add(&dlmgmt_loan_avl, linkp); + linkp->ll_onloan = B_TRUE; + } + } else if (linkp->ll_zoneid != GLOBAL_ZONEID) { + err = zone_add_datalink(linkp->ll_zoneid, linkp->ll_linkid); + } +done: + if (err == 0) + linkp->ll_flags |= DLMGMT_ACTIVE; + return (err); +} + +/* + * Is linkp visible from the caller's zoneid? It is if the link is in the + * same zone as the caller, or if the caller is in the global zone and the + * link is on loan to a non-global zone. + */ +boolean_t +link_is_visible(dlmgmt_link_t *linkp, zoneid_t zoneid) +{ + return (linkp->ll_zoneid == zoneid || + (zoneid == GLOBAL_ZONEID && linkp->ll_onloan)); +} + dlmgmt_link_t * -link_by_id(datalink_id_t linkid) +link_by_id(datalink_id_t linkid, zoneid_t zoneid) { - dlmgmt_link_t link; + dlmgmt_link_t link, *linkp; link.ll_linkid = linkid; - return (avl_find(&dlmgmt_id_avl, &link, NULL)); + linkp = avl_find(&dlmgmt_id_avl, &link, NULL); + if (zoneid != GLOBAL_ZONEID && linkp->ll_zoneid != zoneid) + linkp = NULL; + return (linkp); } dlmgmt_link_t * -link_by_name(const char *name) +link_by_name(const char *name, zoneid_t zoneid) { - dlmgmt_link_t link; + dlmgmt_link_t link, *linkp; (void) strlcpy(link.ll_link, name, MAXLINKNAMELEN); - return (avl_find(&dlmgmt_name_avl, &link, NULL)); + link.ll_zoneid = zoneid; + linkp = avl_find(&dlmgmt_name_avl, &link, NULL); + if (linkp == NULL && zoneid == GLOBAL_ZONEID) { + /* The link could be on loan to a non-global zone? */ + linkp = avl_find(&dlmgmt_loan_avl, &link, NULL); + } + return (linkp); } int dlmgmt_create_common(const char *name, datalink_class_t class, uint32_t media, - uint32_t flags, dlmgmt_link_t **linkpp) + zoneid_t zoneid, uint32_t flags, dlmgmt_link_t **linkpp) { - dlmgmt_link_t link, *linkp, *tmp; + dlmgmt_link_t *linkp = NULL; avl_index_t name_where, id_where; - int err; + int err = 0; - /* - * Validate the link. - */ if (!dladm_valid_linkname(name)) return (EINVAL); + if (dlmgmt_nextlinkid == DATALINK_INVALID_LINKID) + return (ENOSPC); - /* - * Check to see whether this is an existing link name. - */ - (void) strlcpy(link.ll_link, name, MAXLINKNAMELEN); - if ((linkp = avl_find(&dlmgmt_name_avl, &link, &name_where)) != NULL) - return (EEXIST); + if ((linkp = calloc(1, sizeof (dlmgmt_link_t))) == NULL) { + err = ENOMEM; + goto done; + } - if ((err = link_create(name, class, media, flags, &linkp)) != 0) - return (err); + (void) strlcpy(linkp->ll_link, name, MAXLINKNAMELEN); + linkp->ll_class = class; + linkp->ll_media = media; + linkp->ll_linkid = dlmgmt_nextlinkid; + linkp->ll_zoneid = zoneid; + linkp->ll_gen = 0; + + if (avl_find(&dlmgmt_name_avl, linkp, &name_where) != NULL || + avl_find(&dlmgmt_id_avl, linkp, &id_where) != NULL) { + err = EEXIST; + goto done; + } - link.ll_linkid = linkp->ll_linkid; - tmp = avl_find(&dlmgmt_id_avl, &link, &id_where); - assert(tmp == NULL); avl_insert(&dlmgmt_name_avl, linkp, name_where); avl_insert(&dlmgmt_id_avl, linkp, id_where); + + if ((flags & DLMGMT_ACTIVE) && (err = link_activate(linkp)) != 0) { + avl_remove(&dlmgmt_name_avl, linkp); + avl_remove(&dlmgmt_id_avl, linkp); + goto done; + } + + linkp->ll_flags = flags; dlmgmt_advance(linkp); *linkpp = linkp; - return (0); + +done: + if (err != 0) + free(linkp); + return (err); } int @@ -479,8 +511,9 @@ dlmgmt_destroy_common(dlmgmt_link_t *linkp, uint32_t flags) */ return (ENOENT); } + linkp->ll_flags &= ~flags; - if (!(linkp->ll_flags & DLMGMT_PERSIST)) { + if (flags & DLMGMT_PERSIST) { dlmgmt_linkattr_t *next, *attrp; for (attrp = linkp->ll_head; attrp != NULL; attrp = next) { @@ -491,6 +524,12 @@ dlmgmt_destroy_common(dlmgmt_link_t *linkp, uint32_t flags) linkp->ll_head = NULL; } + if ((flags & DLMGMT_ACTIVE) && linkp->ll_zoneid != GLOBAL_ZONEID) { + (void) zone_remove_datalink(linkp->ll_zoneid, linkp->ll_linkid); + if (linkp->ll_onloan) + avl_remove(&dlmgmt_loan_avl, linkp); + } + if (linkp->ll_flags == 0) { avl_remove(&dlmgmt_id_avl, linkp); avl_remove(&dlmgmt_name_avl, linkp); @@ -500,7 +539,7 @@ dlmgmt_destroy_common(dlmgmt_link_t *linkp, uint32_t flags) return (0); } -void +int dlmgmt_getattr_common(dlmgmt_linkattr_t **headp, const char *attr, dlmgmt_getattr_retval_t *retvalp) { @@ -511,19 +550,16 @@ dlmgmt_getattr_common(dlmgmt_linkattr_t **headp, const char *attr, err = linkattr_get(headp, attr, &attrval, &attrsz, &attrtype); if (err != 0) - goto done; + return (err); assert(attrsz > 0); - if (attrsz > MAXLINKATTRVALLEN) { - err = EINVAL; - goto done; - } + if (attrsz > MAXLINKATTRVALLEN) + return (EINVAL); retvalp->lr_type = attrtype; retvalp->lr_attrsz = attrsz; bcopy(attrval, retvalp->lr_attrval, attrsz); -done: - retvalp->lr_err = err; + return (0); } void @@ -536,14 +572,14 @@ dlmgmt_dlconf_table_lock(boolean_t write) } void -dlmgmt_dlconf_table_unlock() +dlmgmt_dlconf_table_unlock(void) { (void) pthread_rwlock_unlock(&dlmgmt_dlconf_lock); } int dlconf_create(const char *name, datalink_id_t linkid, datalink_class_t class, - uint32_t media, dlmgmt_dlconf_t **dlconfpp) + uint32_t media, zoneid_t zoneid, dlmgmt_dlconf_t **dlconfpp) { dlmgmt_dlconf_t *dlconfp = NULL; int err = 0; @@ -563,6 +599,7 @@ dlconf_create(const char *name, datalink_id_t linkid, datalink_class_t class, dlconfp->ld_class = class; dlconfp->ld_media = media; dlconfp->ld_id = dlmgmt_nextconfid; + dlconfp->ld_zoneid = zoneid; done: *dlconfpp = dlconfp; @@ -583,16 +620,19 @@ dlconf_destroy(dlmgmt_dlconf_t *dlconfp) } int -dlmgmt_generate_name(const char *prefix, char *name, size_t size) +dlmgmt_generate_name(const char *prefix, char *name, size_t size, + zoneid_t zoneid) { dlmgmt_prefix_t *lpp, *prev = NULL; + dlmgmt_link_t link, *linkp; /* * See whether the requested prefix is already in the list. */ - for (lpp = dlmgmt_prefixlist; lpp != NULL; prev = lpp, - lpp = lpp->lp_next) { - if (strcmp(prefix, lpp->lp_prefix) == 0) + for (lpp = &dlmgmt_prefixlist; lpp != NULL; + prev = lpp, lpp = lpp->lp_next) { + if (lpp->lp_zoneid == zoneid && + strcmp(prefix, lpp->lp_prefix) == 0) break; } @@ -600,8 +640,6 @@ dlmgmt_generate_name(const char *prefix, char *name, size_t size) * Not found. */ if (lpp == NULL) { - dlmgmt_link_t *linkp, link; - assert(prev != NULL); /* @@ -612,6 +650,7 @@ dlmgmt_generate_name(const char *prefix, char *name, size_t size) prev->lp_next = lpp; lpp->lp_next = NULL; + lpp->lp_zoneid = zoneid; lpp->lp_nextppa = 0; (void) strlcpy(lpp->lp_prefix, prefix, MAXLINKNAMELEN); @@ -619,9 +658,9 @@ dlmgmt_generate_name(const char *prefix, char *name, size_t size) * Now determine this prefix's nextppa. */ (void) snprintf(link.ll_link, MAXLINKNAMELEN, "%s%d", - prefix, lpp->lp_nextppa); - linkp = avl_find(&dlmgmt_name_avl, &link, NULL); - if (linkp != NULL) + prefix, 0); + link.ll_zoneid = zoneid; + if ((linkp = avl_find(&dlmgmt_name_avl, &link, NULL)) != NULL) dlmgmt_advance_ppa(linkp); } @@ -641,6 +680,7 @@ dlmgmt_advance_ppa(dlmgmt_link_t *linkp) { dlmgmt_prefix_t *lpp; char prefix[MAXLINKNAMELEN]; + char linkname[MAXLINKNAMELEN]; uint_t start, ppa; (void) dlpi_parselink(linkp->ll_link, prefix, &ppa); @@ -648,8 +688,9 @@ dlmgmt_advance_ppa(dlmgmt_link_t *linkp) /* * See whether the requested prefix is already in the list. */ - for (lpp = dlmgmt_prefixlist; lpp != NULL; lpp = lpp->lp_next) { - if (strcmp(prefix, lpp->lp_prefix) == 0) + for (lpp = &dlmgmt_prefixlist; lpp != NULL; lpp = lpp->lp_next) { + if (lpp->lp_zoneid == linkp->ll_zoneid && + strcmp(prefix, lpp->lp_prefix) == 0) break; } @@ -664,15 +705,13 @@ dlmgmt_advance_ppa(dlmgmt_link_t *linkp) linkp = AVL_NEXT(&dlmgmt_name_avl, linkp); while (lpp->lp_nextppa != start) { if (lpp->lp_nextppa == (uint_t)-1) { - dlmgmt_link_t link; - /* * wrapped around. search from <prefix>1. */ lpp->lp_nextppa = 0; - (void) snprintf(link.ll_link, MAXLINKNAMELEN, + (void) snprintf(linkname, MAXLINKNAMELEN, "%s%d", lpp->lp_prefix, lpp->lp_nextppa); - linkp = avl_find(&dlmgmt_name_avl, &link, NULL); + linkp = link_by_name(linkname, lpp->lp_zoneid); if (linkp == NULL) return; } else { @@ -706,15 +745,11 @@ dlmgmt_advance_linkid(dlmgmt_link_t *linkp) do { if (dlmgmt_nextlinkid == DATALINK_MAX_LINKID) { - dlmgmt_link_t link; - /* * wrapped around. search from 1. */ dlmgmt_nextlinkid = 1; - link.ll_linkid = 1; - linkp = avl_find(&dlmgmt_id_avl, &link, NULL); - if (linkp == NULL) + if ((linkp = link_by_id(1, GLOBAL_ZONEID)) == NULL) return; } else { dlmgmt_nextlinkid++; diff --git a/usr/src/cmd/mdb/common/modules/mac/mac.c b/usr/src/cmd/mdb/common/modules/mac/mac.c index b73871a96d..4a56960ca7 100644 --- a/usr/src/cmd/mdb/common/modules/mac/mac.c +++ b/usr/src/cmd/mdb/common/modules/mac/mac.c @@ -284,10 +284,8 @@ mac_flow_dcmd_output(uintptr_t addr, uint_t flags, uint_t args) fe.fe_flags, flow_flag_bits); mdb_snprintf(flow_type, 2 * FLOW_MAX_TYPE, "%hb", fe.fe_type, flow_type_bits); - mdb_printf("%?p %-24s %10s %10s " - "%20s %4d\n", - addr, fe.fe_flow_name, flow_type, flow_flags, - func_name, fe.fe_zoneid); + mdb_printf("%?p %-24s %10s %10s %20s\n", + addr, fe.fe_flow_name, flow_type, flow_flags, func_name); break; } case MAC_FLOW_RX: { diff --git a/usr/src/cmd/svc/milestone/Makefile b/usr/src/cmd/svc/milestone/Makefile index 496b997ad9..8f51fd3e7e 100644 --- a/usr/src/cmd/svc/milestone/Makefile +++ b/usr/src/cmd/svc/milestone/Makefile @@ -40,6 +40,7 @@ FSMANIFESTS= $(FSSVCS:%=$(ROOTSVCSYSTEMFILESYSTEM)/%) NETSVCS= \ network-initial.xml \ + network-iptun.xml \ network-loopback.xml \ network-physical.xml \ network-routing-setup.xml \ @@ -104,6 +105,7 @@ SVCMETHOD=\ manifest-import \ net-loopback \ net-init \ + net-iptun \ net-nwam \ net-physical \ net-routing-setup \ diff --git a/usr/src/cmd/svc/milestone/net-init b/usr/src/cmd/svc/milestone/net-init index 7f0804af67..ad2ff3d963 100644 --- a/usr/src/cmd/svc/milestone/net-init +++ b/usr/src/cmd/svc/milestone/net-init @@ -69,11 +69,6 @@ encr=`/usr/bin/awk -F: '/^root:/ {print $2}' /etc/shadow` [ -z "$encr" ] || /usr/sbin/ndd -set /dev/tcp tcp_1948_phrase $encr unset encr -# -# Get values for TCP_STRONG_ISS, ACCEPT6TO4RELAY and RELAY6TO4ADDR. -# -[ -f /etc/default/inetinit ] && . /etc/default/inetinit - # Set the SDP system Policy. This needs to happen after basic # networking is up but before any networking services that might # want to use SDP are enabled @@ -90,85 +85,10 @@ fi # See /etc/default/inetinit for settings and further info on TCP_STRONG_ISS. # If not set, use TCP's internal default setting. # +[ -f /etc/default/inetinit ] && . /etc/default/inetinit if [ $TCP_STRONG_ISS ]; then /usr/sbin/ndd -set /dev/tcp tcp_strong_iss $TCP_STRONG_ISS fi -# -# Configure tunnels which were deferred by /lib/svc/method/net-physical -# (the svc:/network/physical service) since it depends on the tunnel endpoints -# being reachable i.e. routing must be running. -# -# WARNING: you may wish to turn OFF forwarding if you haven't already, because -# of various possible security vulnerabilities when configuring tunnels for -# Virtual Private Network (VPN) construction. -# -# Also, if names are used in the /etc/hostname.ip.tun* file, those names -# have to be in either DNS (and DNS is used) or in /etc/hosts, because this -# file is executed before NIS or NIS+ is started. -# - -# -# IPv4 tunnels -# The second component of the name must be either "ip" or "ip6". -# -interface_names="`/usr/bin/ls /etc/hostname.ip*.*[0-9] 2>/dev/null | \ - /usr/bin/grep '/etc/hostname\.ip6\{0,1\}\.'`" -if [ -n "$interface_names" ]; then - ( - echo "configuring IPv4 tunnels:\c" - # Extract the part after the first '.' - set -- `for intr in $interface_names; do \ - /usr/bin/expr //$intr : '[^.]*\.\(.*\)$'; done` - while [ $# -ge 1 ]; do - # Skip empty files - if [ ! -s /etc/hostname\.$1 ]; then - shift - continue - fi - /usr/sbin/ifconfig $1 plumb - while read ifcmds; do - if [ -n "$ifcmds" ]; then - /usr/sbin/ifconfig $1 inet $ifcmds - fi - done </etc/hostname\.$1 >/dev/null - echo " $1\c" - shift - done - echo "." - ) -fi - -# -# IPv6 Tunnels -# The second component of the name must be either "ip" or "ip6". -# -interface_names="`/usr/bin/ls /etc/hostname6.ip*.*[0-9] 2>/dev/null | \ - /usr/bin/grep '/etc/hostname6\.ip6\{0,1\}\.'`" -if [ -n "$interface_names" ]; then - ( - echo "configuring IPv6 tunnels:\c" - # Extract the part after the first '.' - set -- `for intr in $interface_names; do \ - /usr/bin/expr //$intr : '[^.]*\.\(.*\)$'; done` - while [ $# -ge 1 ]; do - # Skip empty files - if [ ! -s /etc/hostname6\.$1 ]; then - shift - continue - fi - /usr/sbin/ifconfig $1 inet6 plumb - while read ifcmds; do - if [ -n "$ifcmds" ]; then - /usr/sbin/ifconfig $1 inet6 $ifcmds - fi - done </etc/hostname6\.$1 > /dev/null - echo " $1\c" - shift - done - echo "." - ) -fi - # Clear exit status. exit $SMF_EXIT_OK diff --git a/usr/src/cmd/svc/milestone/net-iptun b/usr/src/cmd/svc/milestone/net-iptun new file mode 100644 index 0000000000..13f27fe795 --- /dev/null +++ b/usr/src/cmd/svc/milestone/net-iptun @@ -0,0 +1,139 @@ +#!/sbin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# This service configures IP tunnel links and IP interfaces over IP +# tunnels. +# + +. /lib/svc/share/smf_include.sh + +# +# Configure tunnels which were deferred by /lib/svc/method/net-physical (the +# svc:/network/physical service) since it depends on the tunnel source +# addresses being available. +# +# WARNING: you may wish to turn OFF forwarding if you haven't already, because +# of various possible security vulnerabilities when configuring tunnels for +# Virtual Private Network (VPN) construction. +# +# Also, if names are used in the /etc/hostname*.* files, those names have to +# be in either DNS (and DNS is used) or in /etc/hosts, because this file is +# executed before NIS or NIS+ is started. +# + +# +# get_tunnel_links: print the names of the tunnel links currently configured +# on the running system. +# +get_tunnel_links () +{ + /sbin/dladm show-iptun -p -o link +} + +# plumb_tunnel <intf_name> <net_type> <intf_file> +plumb_tunnel () +{ + /sbin/ifconfig $1 $2 plumb + while read ifcmds; do + if [ -n "$ifcmds" ]; then + /sbin/ifconfig $1 $2 $ifcmds + fi + done < $3 > /dev/null + /sbin/ifconfig $1 $2 up +} + +case "$1" in +start) + # First, bring up tunnel links + /sbin/dladm up-iptun + + # + # Get the list of IP tunnel interfaces we'll need to configure. These + # are comprised of IP interfaces over the tunnels we've just brought + # up in the above dladm command, and the implicit tunnels named "ip.*" + # that we'll also create for backward compatibility. When we build + # the list of implicit tunnels, we have to make sure that they're not + # different kinds of links that are simply named "ip.*". + # + tunnel_links=`get_tunnel_links` + implicit_tunnel_names=`/usr/bin/ls -1 /etc/hostname.ip*.*[0-9] \ + /etc/hostname6.ip*.*[0-9] 2> /dev/null | /usr/bin/cut -f2- -d. | \ + /usr/bin/sort -u` + for intf_name in $implicit_tunnel_names; do + /sbin/dladm show-link -pP $intf_name > /dev/null 2>&1 + if [ $? -ne 0 ]; then + implicit_tunnels="$implicit_tunnels $intf_name" + fi + done + tunnel_interfaces=`for intf in $tunnel_links $implicit_tunnels; do \ + echo $intf; done | /usr/bin/sort -u` + + for intf_name in $tunnel_interfaces; do + if [ -f /etc/hostname.$intf_name ]; then + plumb_tunnel $intf_name inet /etc/hostname.$intf_name + fi + if [ -f /etc/hostname6.$intf_name ]; then + plumb_tunnel $intf_name inet6 /etc/hostname6.$intf_name + fi + done + + # + # Set 6to4 Relay Router communication support policy and, if + # applicable, the destination Relay Router IPv4 address. See + # /etc/default/inetinit for setting and further info on + # ACCEPT6TO4RELAY and RELAY6TO4ADDR. If ACCEPT6TO4RELAY=NO, the + # default value in the kernel will be used. + # + [ -f /etc/default/inetinit ] && . /etc/default/inetinit + ACCEPT6TO4RELAY=`echo "$ACCEPT6TO4RELAY" | /usr/bin/tr '[A-Z]' '[a-z]'` + if [ "$ACCEPT6TO4RELAY" = yes ]; then + if [ "$RELAY6TO4ADDR" ]; then + /usr/sbin/6to4relay -e -a $RELAY6TO4ADDR + else + /usr/sbin/6to4relay -e + fi + fi + ;; + +stop) + tunnel_links=`get_tunnel_links` + + # Unplumb IP interfaces + for tun in $tunnel_links; do + /sbin/ifconfig $tun unplumb > /dev/null 2>&1 + /sbin/ifconfig $tun inet6 unplumb > /dev/null 2>&1 + done + + # Take down the IP tunnel links + /sbin/dladm down-iptun + ;; + +*) + echo "Usage: $0 { start | stop }" + exit 1 + ;; +esac + +exit $SMF_EXIT_OK diff --git a/usr/src/cmd/svc/milestone/net-physical b/usr/src/cmd/svc/milestone/net-physical index 7fabad7938..2e512093c3 100644 --- a/usr/src/cmd/svc/milestone/net-physical +++ b/usr/src/cmd/svc/milestone/net-physical @@ -100,6 +100,36 @@ fi # /sbin/ifconfig -auD4 netmask + broadcast + +is_iptun () +{ + intf=$1 + # Is this a persistent IP tunnel link? + /sbin/dladm show-iptun -P $intf > /dev/null 2>&1 + if [ $? -eq 0 ]; then + return 0 + fi + # Is this an implicit IP tunnel (i.e., ip.tun0) + ORIGIFS="$IFS" + IFS="$IFS." + set -- $intf + IFS="$ORIGIFS" + if [ $# -eq 2 -a \( "$1" = "ip" -o "$1" = "ip6" \) ]; then + # + # It looks like one, but another type of link might be + # using a name that looks like an implicit IP tunnel. + # If dladm show-link -P finds it, then it's not an IP + # tunnel. + # + /sbin/dladm show-link -Pp $intf > /dev/null 2>&1 + if [ $? -eq 0 ]; then + return 1 + else + return 0 + fi + fi + return 1 +} + # # All the IPv4 and IPv6 interfaces are plumbed before doing any # interface configuration. This prevents errors from plumb failures @@ -127,6 +157,11 @@ if [ "$interface_names" != "/etc/hostname.*[0-9]" ]; then done shift + # skip IP tunnel interfaces plumbed by net-iptun. + if is_iptun $intf_name; then + continue + fi + read one rest < /etc/hostname.$intf_name if [ "$one" = ipmp ]; then ipmp_list="$ipmp_list $intf_name" @@ -156,6 +191,11 @@ if [ "$interface_names" != "/etc/hostname6.*[0-9]" ]; then done shift + # skip IP tunnel interfaces plumbed by net-iptun. + if is_iptun $intf_name; then + continue + fi + read one rest < /etc/hostname6.$intf_name if [ "$one" = ipmp ]; then ipmp6_list="$ipmp6_list $intf_name" diff --git a/usr/src/cmd/svc/milestone/net-routing-setup b/usr/src/cmd/svc/milestone/net-routing-setup index efde9837ea..45b34fd220 100644 --- a/usr/src/cmd/svc/milestone/net-routing-setup +++ b/usr/src/cmd/svc/milestone/net-routing-setup @@ -20,10 +20,8 @@ # CDDL HEADER END # # -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. -# -# ident "%Z%%M% %I% %E% SMI" # This script configures IP routing. @@ -187,22 +185,6 @@ else fi # -# Set 6to4 Relay Router communication support policy and, if applicable, -# the destination Relay Router IPv4 address. See /etc/default/inetinit for -# setting and further info on ACCEPT6TO4RELAY and RELAY6TO4ADDR. -# If ACCEPT6TO4RELAY=NO, the default value in the kernel will -# be used. -# -ACCEPT6TO4RELAY=`echo "$ACCEPT6TO4RELAY" | /usr/bin/tr '[A-Z]' '[a-z]'` -if [ "$ACCEPT6TO4RELAY" = yes ]; then - if [ "$RELAY6TO4ADDR" ]; then - /usr/sbin/6to4relay -e -a $RELAY6TO4ADDR - else - /usr/sbin/6to4relay -e - fi -fi - -# # Read /etc/inet/static_routes and add each route. # if [ -f /etc/inet/static_routes ]; then diff --git a/usr/src/cmd/svc/milestone/network-initial.xml b/usr/src/cmd/svc/milestone/network-initial.xml index 52c2c3cde3..b0f3a9e87b 100644 --- a/usr/src/cmd/svc/milestone/network-initial.xml +++ b/usr/src/cmd/svc/milestone/network-initial.xml @@ -1,7 +1,7 @@ <?xml version="1.0"?> <!DOCTYPE service_bundle SYSTEM "/usr/share/lib/xml/dtd/service_bundle.dtd.1"> <!-- - Copyright 2006 Sun Microsystems, Inc. All rights reserved. + Copyright 2009 Sun Microsystems, Inc. All rights reserved. Use is subject to license terms. CDDL HEADER START @@ -23,8 +23,6 @@ CDDL HEADER END - ident "%Z%%M% %I% %E% SMI" - NOTE: This service manifest is not editable; its contents will be overwritten by package or patch operations, including operating system upgrade. Make customizations in a different @@ -56,14 +54,6 @@ <service_fmri value='svc:/system/filesystem/usr' /> </dependency> - <dependency - name='cryptoframework' - grouping='optional_all' - restart_on='none' - type='service'> - <service_fmri value='svc:/system/cryptosvc' /> - </dependency> - <exec_method type='method' name='start' diff --git a/usr/src/cmd/svc/milestone/network-iptun.xml b/usr/src/cmd/svc/milestone/network-iptun.xml new file mode 100644 index 0000000000..0b54da5bc2 --- /dev/null +++ b/usr/src/cmd/svc/milestone/network-iptun.xml @@ -0,0 +1,94 @@ +<?xml version="1.0"?> +<!DOCTYPE service_bundle SYSTEM "/usr/share/lib/xml/dtd/service_bundle.dtd.1"> +<!-- + Copyright 2009 Sun Microsystems, Inc. All rights reserved. + Use is subject to license terms. + + CDDL HEADER START + + The contents of this file are subject to the terms of the + Common Development and Distribution License (the "License"). + You may not use this file except in compliance with the License. + + You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + or http://www.opensolaris.org/os/licensing. + See the License for the specific language governing permissions + and limitations under the License. + + When distributing Covered Code, include this CDDL HEADER in each + file and include the License file at usr/src/OPENSOLARIS.LICENSE. + If applicable, add the following below this CDDL HEADER, with the + fields enclosed by brackets "[]" replaced with your own identifying + information: Portions Copyright [yyyy] [name of copyright owner] + + CDDL HEADER END + + NOTE: This service manifest is not editable; its contents will + be overwritten by package or patch operations, including + operating system upgrade. Make customizations in a different + file. +--> + +<service_bundle type='manifest' name='SUNWcsr:iptun'> + +<service + name='network/iptun' + type='service' + version='1'> + + <create_default_instance enabled='true' /> + + <dependency + name='network' + grouping='require_all' + restart_on='none' + type='service'> + <service_fmri value='svc:/network/physical:default' /> + </dependency> + + <dependency + name='ipsecpolicy' + grouping='optional_all' + restart_on='none' + type='service'> + <service_fmri value='svc:/network/ipsec/policy:default' /> + </dependency> + + <exec_method + type='method' + name='start' + exec='/lib/svc/method/net-iptun %m' + timeout_seconds='600' /> + + <exec_method + type='method' + name='stop' + exec='/lib/svc/method/net-iptun %m' + timeout_seconds='600' /> + + <property_group name='startd' type='framework'> + <propval name='duration' type='astring' value='transient' /> + </property_group> + + <stability value='Unstable' /> + + <template> + <common_name> + <loctext xml:lang='C'> + IP tunnel configuration + </loctext> + </common_name> + <description> + <loctext xml:lang='C'> + Create IP tunnel links and IP + interfaces over IP tunnels. + </loctext> + </description> + <documentation> + <manpage title='dladm' section='1M' + manpath='/usr/share/man' /> + </documentation> + </template> +</service> + +</service_bundle> diff --git a/usr/src/cmd/truss/codes.c b/usr/src/cmd/truss/codes.c index 422608a30b..49ac62d691 100644 --- a/usr/src/cmd/truss/codes.c +++ b/usr/src/cmd/truss/codes.c @@ -95,6 +95,7 @@ #include <sys/devinfo_impl.h> #include <sys/dumpadm.h> #include <sys/mntio.h> +#include <inet/iptun.h> #include <sys/zcons.h> #include <sys/usb/clients/hid/hid.h> #include <sys/pm.h> @@ -863,8 +864,6 @@ const struct ioc { { (uint_t)SIOCTMYADDR, "SIOCTMYADDR", "sioc_addrreq" }, { (uint_t)SIOCTONLINK, "SIOCTONLINK", "sioc_addrreq" }, { (uint_t)SIOCTMYSITE, "SIOCTMYSITE", "sioc_addrreq" }, - { (uint_t)SIOCGTUNPARAM, "SIOCGTUNPARAM", "iftun_req" }, - { (uint_t)SIOCSTUNPARAM, "SIOCSTUNPARAM", "iftun_req" }, { (uint_t)SIOCFIPSECONFIG, "SIOCFIPSECONFIG", NULL }, { (uint_t)SIOCSIPSECONFIG, "SIOCSIPSECONFIG", NULL }, { (uint_t)SIOCDIPSECONFIG, "SIOCDIPSECONFIG", NULL }, @@ -1415,6 +1414,13 @@ const struct ioc { { (uint_t)DINFOIDENT, "DINFOIDENT", NULL}, + { (uint_t)IPTUN_CREATE, "IPTUN_CREATE", "iptun_kparams_t"}, + { (uint_t)IPTUN_DELETE, "IPTUN_DELETE", "datalink_id_t"}, + { (uint_t)IPTUN_MODIFY, "IPTUN_MODIFY", "iptun_kparams_t"}, + { (uint_t)IPTUN_INFO, "IPTUN_INFO", NULL}, + { (uint_t)IPTUN_SET_6TO4RELAY, "IPTUN_SET_6TO4RELAY", NULL}, + { (uint_t)IPTUN_GET_6TO4RELAY, "IPTUN_GET_6TO4RELAY", NULL}, + /* zcons ioctls */ { (uint_t)ZC_HOLDSLAVE, "ZC_HOLDSLAVE", NULL }, { (uint_t)ZC_RELEASESLAVE, "ZC_RELEASESLAVE", NULL }, diff --git a/usr/src/cmd/zoneadm/Makefile b/usr/src/cmd/zoneadm/Makefile index 8dbc8aa0e7..57015803b8 100644 --- a/usr/src/cmd/zoneadm/Makefile +++ b/usr/src/cmd/zoneadm/Makefile @@ -20,11 +20,9 @@ # # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# ident "%Z%%M% %I% %E% SMI" -# PROG= zoneadm MANIFEST= zones.xml resource-mgmt.xml @@ -39,8 +37,7 @@ SRCS = $(OBJS:.o=.c) POFILE=zoneadm_all.po POFILES= $(OBJS:%.o=%.po) -LDLIBS += -lzonecfg -lsocket -lgen -lpool -lzfs -luuid -lnvpair -lbrand \ - -ldlpi +LDLIBS += -lzonecfg -lsocket -lgen -lpool -lzfs -luuid -lnvpair -lbrand -ldladm .KEEP_STATE: diff --git a/usr/src/cmd/zoneadm/zoneadm.c b/usr/src/cmd/zoneadm/zoneadm.c index 3114deb18a..c7adf912b4 100644 --- a/usr/src/cmd/zoneadm/zoneadm.c +++ b/usr/src/cmd/zoneadm/zoneadm.c @@ -64,8 +64,6 @@ #include <limits.h> #include <dirent.h> #include <uuid/uuid.h> -#include <libdlpi.h> - #include <fcntl.h> #include <door.h> #include <macros.h> @@ -76,11 +74,12 @@ #include <libscf.h> #include <procfs.h> #include <strings.h> - #include <pool.h> #include <sys/pool.h> #include <sys/priocntl.h> #include <sys/fsspriocntl.h> +#include <libdladm.h> +#include <libdllink.h> #include "zoneadm.h" @@ -2483,7 +2482,10 @@ verify_handle(int cmd_num, zone_dochandle_t handle, char *argv[]) int err; boolean_t in_alt_root; zone_iptype_t iptype; - dlpi_handle_t dh; + dladm_handle_t dh; + dladm_status_t status; + datalink_id_t linkid; + char errmsg[DLADM_STRSIZE]; in_alt_root = zonecfg_in_alt_root(); if (in_alt_root) @@ -2556,27 +2558,25 @@ verify_handle(int cmd_num, zone_dochandle_t handle, char *argv[]) } /* - * Verify that the physical interface can be opened. + * Verify that the datalink exists and that it isn't + * already assigned to a zone. */ - err = dlpi_open(nwiftab.zone_nwif_physical, &dh, 0); - if (err != DLPI_SUCCESS) { + if ((status = dladm_open(&dh)) == DLADM_STATUS_OK) { + status = dladm_name2info(dh, + nwiftab.zone_nwif_physical, &linkid, NULL, + NULL, NULL); + dladm_close(dh); + } + if (status != DLADM_STATUS_OK) { (void) fprintf(stderr, gettext("WARNING: skipping network " - "interface '%s' which cannot be opened: " - "dlpi error (%s).\n"), + "interface '%s': %s\n"), nwiftab.zone_nwif_physical, - dlpi_strerror(err)); + dladm_status2str(status, errmsg)); break; - } else { - dlpi_close(dh); } - /* - * Verify whether the physical interface is already - * used by a zone. - */ dl_owner_zid = ALL_ZONES; - if (zone_check_datalink(&dl_owner_zid, - nwiftab.zone_nwif_physical) != 0) + if (zone_check_datalink(&dl_owner_zid, linkid) != 0) break; /* diff --git a/usr/src/cmd/zoneadmd/vplat.c b/usr/src/cmd/zoneadmd/vplat.c index 8a89d34bb0..22600e6025 100644 --- a/usr/src/cmd/zoneadmd/vplat.c +++ b/usr/src/cmd/zoneadmd/vplat.c @@ -1037,8 +1037,8 @@ mount_one_dev_symlink_cb(void *arg, const char *source, const char *target) return (di_prof_add_symlink(prof, source, target)); } -static int -get_iptype(zlog_t *zlogp, zone_iptype_t *iptypep) +int +vplat_get_iptype(zlog_t *zlogp, zone_iptype_t *iptypep) { zone_dochandle_t handle; @@ -1103,7 +1103,7 @@ mount_one_dev(zlog_t *zlogp, char *devpath, zone_mnt_t mount_cmd) goto cleanup; } - if (get_iptype(zlogp, &iptype) < 0) { + if (vplat_get_iptype(zlogp, &iptype) < 0) { zerror(zlogp, B_TRUE, "unable to determine ip-type"); goto cleanup; } @@ -2535,7 +2535,7 @@ zdlerror(zlog_t *zlogp, dladm_status_t err, const char *dlname, const char *str) } static int -add_datalink(zlog_t *zlogp, char *zone_name, char *dlname) +add_datalink(zlog_t *zlogp, char *zone_name, datalink_id_t linkid, char *dlname) { dladm_status_t err; @@ -2548,27 +2548,13 @@ add_datalink(zlog_t *zlogp, char *zone_name, char *dlname) } /* Set zoneid of this link. */ - err = dladm_setzid(dld_handle, dlname, zone_name); + err = dladm_set_linkprop(dld_handle, linkid, "zone", &zone_name, 1, + DLADM_OPT_ACTIVE); if (err != DLADM_STATUS_OK) { zdlerror(zlogp, err, dlname, "WARNING: unable to add network interface"); return (-1); } - - return (0); -} - -static int -remove_datalink(zlog_t *zlogp, char *dlname) -{ - dladm_status_t err; - - err = dladm_setzid(dld_handle, dlname, GLOBAL_ZONENAME); - if (err != DLADM_STATUS_OK) { - zdlerror(zlogp, err, dlname, - "unable to release network interface"); - return (-1); - } return (0); } @@ -2584,6 +2570,7 @@ configure_exclusive_network_interfaces(zlog_t *zlogp) struct zone_nwiftab nwiftab; char rootpath[MAXPATHLEN]; char path[MAXPATHLEN]; + datalink_id_t linkid; di_prof_t prof = NULL; boolean_t added = B_FALSE; @@ -2637,8 +2624,10 @@ configure_exclusive_network_interfaces(zlog_t *zlogp) * created in that case. The /dev/net entry is always * accessible. */ - if (add_datalink(zlogp, zone_name, nwiftab.zone_nwif_physical) - == 0) { + if (dladm_name2info(dld_handle, nwiftab.zone_nwif_physical, + &linkid, NULL, NULL, NULL) == DLADM_STATUS_OK && + add_datalink(zlogp, zone_name, linkid, + nwiftab.zone_nwif_physical) == 0) { added = B_TRUE; } else { (void) zonecfg_endnwifent(handle); @@ -2662,104 +2651,25 @@ configure_exclusive_network_interfaces(zlog_t *zlogp) return (0); } -/* - * Get the list of the data-links from kernel, and try to remove it - */ static int -unconfigure_exclusive_network_interfaces_run(zlog_t *zlogp, zoneid_t zoneid) +unconfigure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid) { - char *dlnames, *ptr; - int dlnum, dlnum_saved, i; + int dlnum = 0; - dlnum = 0; + /* + * The kernel shutdown callback for the dls module should have removed + * all datalinks from this zone. If any remain, then there's a + * problem. + */ if (zone_list_datalink(zoneid, &dlnum, NULL) != 0) { zerror(zlogp, B_TRUE, "unable to list network interfaces"); return (-1); } -again: - /* this zone doesn't have any data-links */ - if (dlnum == 0) - return (0); - - dlnames = malloc(dlnum * LIFNAMSIZ); - if (dlnames == NULL) { - zerror(zlogp, B_TRUE, "memory allocation failed"); - return (-1); - } - dlnum_saved = dlnum; - - if (zone_list_datalink(zoneid, &dlnum, dlnames) != 0) { - zerror(zlogp, B_TRUE, "unable to list network interfaces"); - free(dlnames); - return (-1); - } - if (dlnum_saved < dlnum) { - /* list increased, try again */ - free(dlnames); - goto again; - } - ptr = dlnames; - for (i = 0; i < dlnum; i++) { - /* Remove access control information */ - if (remove_datalink(zlogp, ptr) != 0) { - free(dlnames); - return (-1); - } - ptr += LIFNAMSIZ; - } - free(dlnames); - return (0); -} - -/* - * Get the list of the data-links from configuration, and try to remove it - */ -static int -unconfigure_exclusive_network_interfaces_static(zlog_t *zlogp) -{ - zone_dochandle_t handle; - struct zone_nwiftab nwiftab; - - if ((handle = zonecfg_init_handle()) == NULL) { - zerror(zlogp, B_TRUE, "getting zone configuration handle"); - return (-1); - } - if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { - zerror(zlogp, B_FALSE, "invalid configuration"); - zonecfg_fini_handle(handle); + if (dlnum != 0) { + zerror(zlogp, B_FALSE, + "datalinks remain in zone after shutdown"); return (-1); } - if (zonecfg_setnwifent(handle) != Z_OK) { - zonecfg_fini_handle(handle); - return (0); - } - for (;;) { - if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK) - break; - /* Remove access control information */ - if (remove_datalink(zlogp, nwiftab.zone_nwif_physical) - != 0) { - (void) zonecfg_endnwifent(handle); - zonecfg_fini_handle(handle); - return (-1); - } - } - (void) zonecfg_endnwifent(handle); - zonecfg_fini_handle(handle); - return (0); -} - -/* - * Remove the access control information from the kernel for the exclusive - * network interfaces. - */ -static int -unconfigure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid) -{ - if (unconfigure_exclusive_network_interfaces_run(zlogp, zoneid) != 0) { - return (unconfigure_exclusive_network_interfaces_static(zlogp)); - } - return (0); } @@ -4071,7 +3981,7 @@ vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd) if (zonecfg_in_alt_root()) resolve_lofs(zlogp, rootpath, sizeof (rootpath)); - if (get_iptype(zlogp, &iptype) < 0) { + if (vplat_get_iptype(zlogp, &iptype) < 0) { zerror(zlogp, B_TRUE, "unable to determine ip-type"); return (-1); } @@ -4407,7 +4317,7 @@ vplat_bringup(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zoneid) if (mount_cmd == Z_MNT_BOOT) { zone_iptype_t iptype; - if (get_iptype(zlogp, &iptype) < 0) { + if (vplat_get_iptype(zlogp, &iptype) < 0) { zerror(zlogp, B_TRUE, "unable to determine ip-type"); lofs_discard_mnttab(); return (-1); @@ -4513,6 +4423,8 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting) char cmdbuf[MAXPATHLEN]; char brand[MAXNAMELEN]; brand_handle_t bh = NULL; + dladm_status_t status; + char errmsg[DLADM_STRSIZE]; ushort_t flags; kzone = zone_name; @@ -4583,7 +4495,7 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting) if (zone_getattr(zoneid, ZONE_ATTR_FLAGS, &flags, sizeof (flags)) < 0) { - if (get_iptype(zlogp, &iptype) < 0) { + if (vplat_get_iptype(zlogp, &iptype) < 0) { zerror(zlogp, B_TRUE, "unable to determine " "ip-type"); goto error; @@ -4611,6 +4523,12 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting) "network interfaces in zone"); goto error; } + status = dladm_zone_halt(dld_handle, zoneid); + if (status != DLADM_STATUS_OK) { + zerror(zlogp, B_FALSE, "unable to notify " + "dlmgmtd of zone halt: %s", + dladm_status2str(status, errmsg)); + } break; } } diff --git a/usr/src/cmd/zoneadmd/zoneadmd.c b/usr/src/cmd/zoneadmd/zoneadmd.c index ab351b0f70..662cb1993f 100644 --- a/usr/src/cmd/zoneadmd/zoneadmd.c +++ b/usr/src/cmd/zoneadmd/zoneadmd.c @@ -97,6 +97,8 @@ #include <libcontract_priv.h> #include <sys/contract/process.h> #include <sys/ctfs.h> +#include <libdladm.h> +#include <sys/dls_mgmt.h> #include <libzonecfg.h> #include "zoneadmd.h" @@ -772,6 +774,10 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate) char cmdbuf[MAXPATHLEN]; fs_callback_t cb; brand_handle_t bh; + zone_iptype_t iptype; + boolean_t links_loaded = B_FALSE; + dladm_status_t status; + char errmsg[DLADM_STRSIZE]; int err; if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0) @@ -859,6 +865,22 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate) } /* + * Exclusive stack zones interact with the dlmgmtd running in the + * global zone. dladm_zone_boot() tells dlmgmtd that this zone is + * booting, and loads its datalinks from the zone's datalink + * configuration file. + */ + if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) { + status = dladm_zone_boot(dld_handle, zoneid); + if (status != DLADM_STATUS_OK) { + zerror(zlogp, B_FALSE, "unable to load zone datalinks: " + " %s", dladm_status2str(status, errmsg)); + goto bad; + } + links_loaded = B_TRUE; + } + + /* * If there is a brand 'boot' callback, execute it now to give the * brand one last chance to do any additional setup before the zone * is booted. @@ -895,6 +917,8 @@ bad: * state, RUNNING, and then invoke the hook as if we're halting. */ (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT); + if (links_loaded) + (void) dladm_zone_halt(dld_handle, zoneid); return (-1); } diff --git a/usr/src/cmd/zoneadmd/zoneadmd.h b/usr/src/cmd/zoneadmd/zoneadmd.h index c244852968..d29f3b3a98 100644 --- a/usr/src/cmd/zoneadmd/zoneadmd.h +++ b/usr/src/cmd/zoneadmd/zoneadmd.h @@ -131,6 +131,7 @@ typedef enum { extern zoneid_t vplat_create(zlog_t *, zone_mnt_t); extern int vplat_bringup(zlog_t *, zone_mnt_t, zoneid_t); extern int vplat_teardown(zlog_t *, boolean_t, boolean_t); +extern int vplat_get_iptype(zlog_t *, zone_iptype_t *); /* * Filesystem mounting interfaces. diff --git a/usr/src/head/zone.h b/usr/src/head/zone.h index c4077c03de..34528a27f5 100644 --- a/usr/src/head/zone.h +++ b/usr/src/head/zone.h @@ -19,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _ZONE_H #define _ZONE_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/zone.h> #include <sys/priv.h> @@ -67,10 +65,10 @@ extern int zone_enter(zoneid_t); extern int zone_list(zoneid_t *, uint_t *); extern int zone_shutdown(zoneid_t); extern int zone_version(int *); -extern int zone_add_datalink(zoneid_t, char *); -extern int zone_remove_datalink(zoneid_t, char *); -extern int zone_check_datalink(zoneid_t *, char *); -extern int zone_list_datalink(zoneid_t, int *, char *); +extern int zone_add_datalink(zoneid_t, datalink_id_t); +extern int zone_remove_datalink(zoneid_t, datalink_id_t); +extern int zone_check_datalink(zoneid_t *, datalink_id_t); +extern int zone_list_datalink(zoneid_t, int *, datalink_id_t *); #ifdef __cplusplus } diff --git a/usr/src/lib/brand/native/zone/config.xml b/usr/src/lib/brand/native/zone/config.xml index 9ede6178dc..31ec5834db 100644 --- a/usr/src/lib/brand/native/zone/config.xml +++ b/usr/src/lib/brand/native/zone/config.xml @@ -77,6 +77,7 @@ <privilege set="default" name="sys_acct" /> <privilege set="default" name="sys_admin" /> <privilege set="default" name="sys_ip_config" ip-type="exclusive" /> + <privilege set="default" name="sys_iptun_config" ip-type="exclusive" /> <privilege set="default" name="sys_mount" /> <privilege set="default" name="sys_nfs" /> <privilege set="default" name="sys_resource" /> diff --git a/usr/src/lib/brand/native/zone/platform.xml b/usr/src/lib/brand/native/zone/platform.xml index 4970516328..0aacc8c401 100644 --- a/usr/src/lib/brand/native/zone/platform.xml +++ b/usr/src/lib/brand/native/zone/platform.xml @@ -89,6 +89,7 @@ <device match="zfs" /> <!-- Devices to create in exclusive IP zone only --> + <device match="dld" ip-type="exclusive" /> <device match="icmp" ip-type="exclusive" /> <device match="icmp6" ip-type="exclusive" /> <device match="ip" ip-type="exclusive" /> @@ -104,6 +105,7 @@ <device match="ipsecesp" ip-type="exclusive" /> <device match="ipstate" ip-type="exclusive" /> <device match="ipsync" ip-type="exclusive" /> + <device match="iptunq" ip-type="exclusive" /> <device match="keysock" ip-type="exclusive" /> <device match="rawip" ip-type="exclusive" /> <device match="rawip6" ip-type="exclusive" /> diff --git a/usr/src/lib/brand/sn1/zone/config.xml b/usr/src/lib/brand/sn1/zone/config.xml index c32690afcf..ba458e719e 100644 --- a/usr/src/lib/brand/sn1/zone/config.xml +++ b/usr/src/lib/brand/sn1/zone/config.xml @@ -71,6 +71,7 @@ <privilege set="default" name="sys_acct" /> <privilege set="default" name="sys_admin" /> <privilege set="default" name="sys_ip_config" ip-type="exclusive" /> + <privilege set="default" name="sys_iptun_config" ip-type="exclusive" /> <privilege set="default" name="sys_mount" /> <privilege set="default" name="sys_nfs" /> <privilege set="default" name="sys_resource" /> diff --git a/usr/src/lib/brand/sn1/zone/platform.xml b/usr/src/lib/brand/sn1/zone/platform.xml index 2d5fb2eea0..f07bc4eddc 100644 --- a/usr/src/lib/brand/sn1/zone/platform.xml +++ b/usr/src/lib/brand/sn1/zone/platform.xml @@ -93,6 +93,7 @@ <device match="zfs" /> <!-- Devices to create in exclusive IP zone only --> + <device match="dld" ip-type="exclusive" /> <device match="icmp" ip-type="exclusive" /> <device match="icmp6" ip-type="exclusive" /> <device match="ip" ip-type="exclusive" /> @@ -108,6 +109,7 @@ <device match="ipsecesp" ip-type="exclusive" /> <device match="ipstate" ip-type="exclusive" /> <device match="ipsync" ip-type="exclusive" /> + <device match="iptunq" ip-type="exclusive" /> <device match="keysock" ip-type="exclusive" /> <device match="rawip" ip-type="exclusive" /> <device match="rawip6" ip-type="exclusive" /> diff --git a/usr/src/lib/libc/port/sys/zone.c b/usr/src/lib/libc/port/sys/zone.c index 5129e5d576..4a4c70043d 100644 --- a/usr/src/lib/libc/port/sys/zone.c +++ b/usr/src/lib/libc/port/sys/zone.c @@ -20,12 +20,10 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include "lint.h" #include <sys/types.h> #include <sys/syscall.h> @@ -220,27 +218,26 @@ zone_version(int *version) return (syscall(SYS_zone, ZONE_VERSION, version)); } - int -zone_add_datalink(zoneid_t zoneid, char *dlname) +zone_add_datalink(zoneid_t zoneid, datalink_id_t linkid) { - return (syscall(SYS_zone, ZONE_ADD_DATALINK, zoneid, dlname)); + return (syscall(SYS_zone, ZONE_ADD_DATALINK, zoneid, linkid)); } int -zone_remove_datalink(zoneid_t zoneid, char *dlname) +zone_remove_datalink(zoneid_t zoneid, datalink_id_t linkid) { - return (syscall(SYS_zone, ZONE_DEL_DATALINK, zoneid, dlname)); + return (syscall(SYS_zone, ZONE_DEL_DATALINK, zoneid, linkid)); } int -zone_check_datalink(zoneid_t *zoneidp, char *dlname) +zone_check_datalink(zoneid_t *zoneidp, datalink_id_t linkid) { - return (syscall(SYS_zone, ZONE_CHECK_DATALINK, zoneidp, dlname)); + return (syscall(SYS_zone, ZONE_CHECK_DATALINK, zoneidp, linkid)); } int -zone_list_datalink(zoneid_t zoneid, int *dlnump, char *buf) +zone_list_datalink(zoneid_t zoneid, int *dlnump, datalink_id_t *linkids) { - return (syscall(SYS_zone, ZONE_LIST_DATALINK, zoneid, dlnump, buf)); + return (syscall(SYS_zone, ZONE_LIST_DATALINK, zoneid, dlnump, linkids)); } diff --git a/usr/src/lib/libdladm/Makefile b/usr/src/lib/libdladm/Makefile index 7765c65ab8..3e5457f227 100644 --- a/usr/src/lib/libdladm/Makefile +++ b/usr/src/lib/libdladm/Makefile @@ -29,7 +29,7 @@ include $(SRC)/lib/Makefile.lib HDRS = libdladm.h libdladm_impl.h libdllink.h libdlaggr.h \ libdlwlan.h libdlwlan_impl.h libdlvnic.h libdlvlan.h \ libdlmgmt.h libdlflow.h libdlflow_impl.h libdlstat.h \ - libdlether.h libdlsim.h libdlbridge.h + libdlether.h libdlsim.h libdlbridge.h libdliptun.h HDRDIR = common @@ -45,7 +45,7 @@ MSGFILES = common/libdladm.c common/linkprop.c common/secobj.c \ common/propfuncs.c common/libdlflow.c \ common/libdlstat.c common/flowattr.c \ common/libdlether.c common/libdlsim.c \ - common/libdlbridge.c + common/libdlbridge.c common/libdliptun.c XGETFLAGS = -a -x libdladm.xcl diff --git a/usr/src/lib/libdladm/Makefile.com b/usr/src/lib/libdladm/Makefile.com index 1ad7a7c961..84d4f28fd4 100644 --- a/usr/src/lib/libdladm/Makefile.com +++ b/usr/src/lib/libdladm/Makefile.com @@ -28,7 +28,7 @@ VERS = .1 OBJECTS = libdladm.o secobj.o linkprop.o libdllink.o libdlaggr.o \ libdlwlan.o libdlvnic.o libdlmgmt.o libdlvlan.o \ flowattr.o flowprop.o propfuncs.o libdlflow.o libdlstat.o \ - usage.o libdlether.o libdlsim.o libdlbridge.o + usage.o libdlether.o libdlsim.o libdlbridge.o libdliptun.o include ../../Makefile.lib diff --git a/usr/src/lib/libdladm/common/libdladm.c b/usr/src/lib/libdladm/common/libdladm.c index 90d1d7fdaf..9e03468c7d 100644 --- a/usr/src/lib/libdladm/common/libdladm.c +++ b/usr/src/lib/libdladm/common/libdladm.c @@ -52,6 +52,7 @@ static media_type_t media_type_table[] = { { DL_IB, "Infiniband" }, { DL_IPV4, "IPv4Tunnel" }, { DL_IPV6, "IPv6Tunnel" }, + { DL_6TO4, "6to4Tunnel" }, { DL_CSMACD, "CSMA/CD" }, { DL_TPB, "TokenBus" }, { DL_TPR, "TokenRing" }, @@ -351,6 +352,21 @@ dladm_status2str(dladm_status_t status, char *buf) case DLADM_STATUS_OPTMISSING: s = "optional software not installed"; break; + case DLADM_STATUS_IPTUNTYPE: + s = "invalid IP tunnel type"; + break; + case DLADM_STATUS_IPTUNTYPEREQD: + s = "IP tunnel type required"; + break; + case DLADM_STATUS_BADIPTUNLADDR: + s = "invalid local IP tunnel address"; + break; + case DLADM_STATUS_BADIPTUNRADDR: + s = "invalid remote IP tunnel address"; + break; + case DLADM_STATUS_ADDRINUSE: + s = "address already in use"; + break; default: s = "<unknown error>"; break; @@ -399,6 +415,8 @@ dladm_errno2status(int err) return (DLADM_STATUS_FLOW_INCOMPATIBLE); case EALREADY: return (DLADM_STATUS_FLOW_IDENTICAL); + case EADDRINUSE: + return (DLADM_STATUS_ADDRINUSE); default: return (DLADM_STATUS_FAILED); } @@ -573,6 +591,9 @@ dladm_class2str(datalink_class_t class, char *buf) case DATALINK_CLASS_ETHERSTUB: s = "etherstub"; break; + case DATALINK_CLASS_IPTUN: + s = "iptun"; + break; case DATALINK_CLASS_SIMNET: s = "simnet"; break; @@ -747,7 +768,7 @@ dladm_valid_linkname(const char *link) size_t len = strlen(link); const char *cp; - if (len + 1 >= MAXLINKNAMELEN) + if (len >= MAXLINKNAMELEN) return (B_FALSE); /* @@ -758,10 +779,10 @@ dladm_valid_linkname(const char *link) /* * The legal characters in a link name are: - * alphanumeric (a-z, A-Z, 0-9), and the underscore ('_'). + * alphanumeric (a-z, A-Z, 0-9), underscore ('_'), and '.'. */ for (cp = link; *cp != '\0'; cp++) { - if ((isalnum(*cp) == 0) && (*cp != '_')) + if ((isalnum(*cp) == 0) && (*cp != '_') && (*cp != '.')) return (B_FALSE); } diff --git a/usr/src/lib/libdladm/common/libdladm.h b/usr/src/lib/libdladm/common/libdladm.h index 919c207cd0..0e9a066083 100644 --- a/usr/src/lib/libdladm/common/libdladm.h +++ b/usr/src/lib/libdladm/common/libdladm.h @@ -115,6 +115,11 @@ typedef enum { DLADM_STATUS_VIDINVAL, DLADM_STATUS_NONOTIF, DLADM_STATUS_TRYAGAIN, + DLADM_STATUS_IPTUNTYPE, + DLADM_STATUS_IPTUNTYPEREQD, + DLADM_STATUS_BADIPTUNLADDR, + DLADM_STATUS_BADIPTUNRADDR, + DLADM_STATUS_ADDRINUSE, DLADM_STATUS_BADTIMEVAL, DLADM_STATUS_INVALIDMACADDR, DLADM_STATUS_INVALIDMACADDRNIC, @@ -240,6 +245,8 @@ extern dladm_status_t dladm_usage_summary(int (*)(dladm_usage_t *, void *), int, char *, void *); extern dladm_status_t dladm_usage_dates(int (*)(dladm_usage_t *, void *), int, char *, char *, void *); +extern dladm_status_t dladm_zone_boot(dladm_handle_t, zoneid_t); +extern dladm_status_t dladm_zone_halt(dladm_handle_t, zoneid_t); #ifdef __cplusplus } diff --git a/usr/src/lib/libdladm/common/libdladm_impl.h b/usr/src/lib/libdladm/common/libdladm_impl.h index 774d86b0ac..61e584a051 100644 --- a/usr/src/lib/libdladm/common/libdladm_impl.h +++ b/usr/src/lib/libdladm/common/libdladm_impl.h @@ -26,6 +26,7 @@ #ifndef _LIBDLADM_IMPL_H #define _LIBDLADM_IMPL_H +#include <sys/types.h> #include <sys/mac.h> #include <sys/mac_flow.h> #include <libdladm.h> diff --git a/usr/src/lib/libdladm/common/libdlaggr.c b/usr/src/lib/libdladm/common/libdlaggr.c index 408bf828ab..d715ff013c 100644 --- a/usr/src/lib/libdladm/common/libdlaggr.c +++ b/usr/src/lib/libdladm/common/libdlaggr.c @@ -59,26 +59,7 @@ static uchar_t zero_mac[] = {0, 0, 0, 0, 0, 0}; (((mac) != NULL) && (bcmp(zero_mac, (mac), ETHERADDRL) != 0) && \ (!(mac)[0] & 0x01)) -#define PORT_DELIMITER '.' - -#define WRITE_PORT(portstr, portid, size) { \ - char pstr[LINKID_STR_WIDTH + 2]; \ - (void) snprintf(pstr, LINKID_STR_WIDTH + 2, "%d%c", \ - (portid), PORT_DELIMITER); \ - (void) strlcat((portstr), pstr, (size)); \ -} - -#define READ_PORT(portstr, portid, status) { \ - errno = 0; \ - (status) = DLADM_STATUS_OK; \ - (portid) = (int)strtol((portstr), &(portstr), 10); \ - if (errno != 0 || *(portstr) != PORT_DELIMITER) { \ - (status) = DLADM_STATUS_REPOSITORYINVAL; \ - } else { \ - /* Skip the delimiter. */ \ - (portstr)++; \ - } \ -} +#define PORT_DELIMITER ":" typedef struct dladm_aggr_modify_attr { uint32_t ld_policy; @@ -136,6 +117,35 @@ static dladm_aggr_port_state_t port_states[] = { #define NPORT_STATES \ (sizeof (port_states) / sizeof (dladm_aggr_port_state_t)) +static dladm_status_t +write_port(dladm_handle_t handle, char *portstr, datalink_id_t portid, + size_t portstrsize) +{ + char pname[MAXLINKNAMELEN + 1]; + dladm_status_t status; + + if ((status = dladm_datalink_id2info(handle, portid, NULL, NULL, NULL, + pname, sizeof (pname))) != DLADM_STATUS_OK) + return (status); + (void) strlcat(pname, PORT_DELIMITER, sizeof (pname)); + if (strlcat(portstr, pname, portstrsize) >= portstrsize) + status = DLADM_STATUS_TOOSMALL; + return (status); +} + +static dladm_status_t +read_port(dladm_handle_t handle, char **portstr, datalink_id_t *portid) +{ + dladm_status_t status; + char *pname; + + if ((pname = strtok(*portstr, PORT_DELIMITER)) == NULL) + return (DLADM_STATUS_REPOSITORYINVAL); + *portstr += (strlen(pname) + 1); + status = dladm_name2info(handle, pname, portid, NULL, NULL, NULL); + return (status); +} + static int i_dladm_aggr_ioctl(dladm_handle_t handle, int cmd, void *ptr) { @@ -259,7 +269,7 @@ i_dladm_aggr_info_persist(dladm_handle_t handle, datalink_id_t linkid, { dladm_conf_t conf; uint32_t nports, i; - char *portstr, *next; + char *portstr = NULL, *next; dladm_status_t status; uint64_t u64; int size; @@ -323,36 +333,30 @@ i_dladm_aggr_info_persist(dladm_handle_t handle, datalink_id_t linkid, nports = (uint32_t)u64; attrp->lg_nports = nports; - size = nports * (LINKID_STR_WIDTH + 1) + 1; + size = nports * (MAXLINKNAMELEN + 1) + 1; if ((portstr = calloc(1, size)) == NULL) { status = DLADM_STATUS_NOMEM; goto done; } status = dladm_get_conf_field(handle, conf, FPORTS, portstr, size); - if (status != DLADM_STATUS_OK) { - free(portstr); + if (status != DLADM_STATUS_OK) goto done; - } if ((attrp->lg_ports = malloc(nports * sizeof (dladm_aggr_port_attr_t))) == NULL) { - free(portstr); status = DLADM_STATUS_NOMEM; goto done; } for (next = portstr, i = 0; i < nports; i++) { - READ_PORT(next, attrp->lg_ports[i].lp_linkid, status); - if (status != DLADM_STATUS_OK) { - free(portstr); + if ((status = read_port(handle, &next, + &attrp->lg_ports[i].lp_linkid)) != DLADM_STATUS_OK) free(attrp->lg_ports); - goto done; - } } - free(portstr); done: + free(portstr); dladm_destroy_conf(handle, conf); return (status); } @@ -406,7 +410,7 @@ i_dladm_aggr_add_rmv(dladm_handle_t handle, datalink_id_t linkid, * First, update the persistent configuration if requested. We only * need to update the FPORTS and FNPORTS fields of this aggregation. * Note that FPORTS is a list of port linkids separated by - * PORT_DELIMITER ('.'). + * PORT_DELIMITER (':'). */ if (flags & DLADM_OPT_PERSIST) { status = dladm_read_conf(handle, linkid, &conf); @@ -430,7 +434,7 @@ i_dladm_aggr_add_rmv(dladm_handle_t handle, datalink_id_t linkid, goto destroyconf; } - size = orig_nports * (LINKID_STR_WIDTH + 1) + 1; + size = orig_nports * (MAXLINKNAMELEN + 1) + 1; if ((orig_portstr = calloc(1, size)) == NULL) { status = dladm_errno2status(errno); goto destroyconf; @@ -444,7 +448,7 @@ i_dladm_aggr_add_rmv(dladm_handle_t handle, datalink_id_t linkid, result_nports = (cmd == LAIOC_ADD) ? orig_nports + nports : orig_nports; - size = result_nports * (LINKID_STR_WIDTH + 1) + 1; + size = result_nports * (MAXLINKNAMELEN + 1) + 1; if ((portstr = calloc(1, size)) == NULL) { status = dladm_errno2status(errno); goto destroyconf; @@ -456,8 +460,14 @@ i_dladm_aggr_add_rmv(dladm_handle_t handle, datalink_id_t linkid, */ if (cmd == LAIOC_ADD) { (void) strlcpy(portstr, orig_portstr, size); - for (i = 0; i < nports; i++) - WRITE_PORT(portstr, ports[i].lp_linkid, size); + for (i = 0; i < nports; i++) { + status = write_port(handle, portstr, + ports[i].lp_linkid, size); + if (status != DLADM_STATUS_OK) { + free(portstr); + goto destroyconf; + } + } } else { char *next; datalink_id_t portid; @@ -468,7 +478,7 @@ i_dladm_aggr_add_rmv(dladm_handle_t handle, datalink_id_t linkid, * Read the portids from the old configuration * one by one. */ - READ_PORT(next, portid, status); + status = read_port(handle, &next, &portid); if (status != DLADM_STATUS_OK) { free(portstr); goto destroyconf; @@ -483,7 +493,12 @@ i_dladm_aggr_add_rmv(dladm_handle_t handle, datalink_id_t linkid, break; } if (i == nports) { - WRITE_PORT(portstr, portid, size); + status = write_port(handle, portstr, + portid, size); + if (status != DLADM_STATUS_OK) { + free(portstr); + goto destroyconf; + } } else { remove++; } @@ -724,16 +739,10 @@ i_dladm_aggr_up(dladm_handle_t handle, datalink_id_t linkid, void *arg) if ((status = dladm_up_datalink_id(handle, linkid)) != DLADM_STATUS_OK) { laioc_delete_t ioc; + ioc.ld_linkid = linkid; (void) i_dladm_aggr_ioctl(handle, LAIOC_DELETE, &ioc); - goto done; } - - /* - * Reset the active linkprop of this specific link. - */ - (void) dladm_init_linkprop(handle, linkid, B_FALSE); - done: free(attr.lg_ports); free(ports); @@ -1028,14 +1037,19 @@ dladm_aggr_persist_aggr_conf(dladm_handle_t handle, const char *link, if (status != DLADM_STATUS_OK) goto done; - size = nports * (LINKID_STR_WIDTH + 1) + 1; + size = nports * MAXLINKNAMELEN + 1; if ((portstr = calloc(1, size)) == NULL) { status = DLADM_STATUS_NOMEM; goto done; } - for (i = 0; i < nports; i++) - WRITE_PORT(portstr, ports[i].lp_linkid, size); + for (i = 0; i < nports; i++) { + status = write_port(handle, portstr, ports[i].lp_linkid, size); + if (status != DLADM_STATUS_OK) { + free(portstr); + goto done; + } + } status = dladm_set_conf_field(handle, conf, FPORTS, DLADM_TYPE_STR, portstr); free(portstr); @@ -1404,9 +1418,9 @@ dladm_aggr_delete(dladm_handle_t handle, datalink_id_t linkid, uint32_t flags) if (arg.isheld) return (DLADM_STATUS_LINKBUSY); + (void) dladm_remove_conf(handle, linkid); (void) dladm_destroy_datalink_id(handle, linkid, DLADM_OPT_PERSIST); - (void) dladm_remove_conf(handle, linkid); } return (DLADM_STATUS_OK); diff --git a/usr/src/lib/libdladm/common/libdliptun.c b/usr/src/lib/libdladm/common/libdliptun.c new file mode 100644 index 0000000000..f73c0d1ecb --- /dev/null +++ b/usr/src/lib/libdladm/common/libdliptun.c @@ -0,0 +1,625 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <assert.h> +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <fcntl.h> +#include <stropts.h> +#include <string.h> +#include <netdb.h> +#include <sys/conf.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <inet/iptun.h> +#include <sys/dls.h> +#include <libdlpi.h> +#include <libdladm_impl.h> +#include <libdllink.h> +#include <libdliptun.h> + +/* + * IP Tunneling Administration Library. + * This library is used by dladm(1M) and to configure IP tunnel links. + */ + +#define IPTUN_CONF_TYPE "type" +#define IPTUN_CONF_LADDR "laddr" +#define IPTUN_CONF_RADDR "raddr" + +/* + * If IPTUN_CREATE and IPTUN_MODIFY include IPsec policy and IPsec hasn't + * loaded yet, the ioctls may return EAGAIN. We try the ioctl + * IPTUN_IOCTL_ATTEMPT_LIMIT times and wait IPTUN_IOCTL_ATTEMPT_INTERVAL + * microseconds between attempts. + */ +#define IPTUN_IOCTL_ATTEMPT_LIMIT 3 +#define IPTUN_IOCTL_ATTEMPT_INTERVAL 10000 + +dladm_status_t +i_iptun_ioctl(dladm_handle_t handle, int cmd, void *dp) +{ + dladm_status_t status = DLADM_STATUS_OK; + uint_t attempt; + + for (attempt = 0; attempt < IPTUN_IOCTL_ATTEMPT_LIMIT; attempt++) { + if (attempt != 0) + (void) usleep(IPTUN_IOCTL_ATTEMPT_INTERVAL); + status = (ioctl(dladm_dld_fd(handle), cmd, dp) == 0) ? + DLADM_STATUS_OK : dladm_errno2status(errno); + if (status != DLADM_STATUS_TRYAGAIN) + break; + } + return (status); +} + +/* + * Given tunnel paramaters as supplied by a library consumer, fill in kernel + * parameters to be passed down to the iptun control device. + */ +static dladm_status_t +i_iptun_kparams(dladm_handle_t handle, const iptun_params_t *params, + iptun_kparams_t *ik) +{ + dladm_status_t status; + struct addrinfo *ai, hints; + iptun_kparams_t tmpik; + iptun_type_t iptuntype = IPTUN_TYPE_UNKNOWN; + + (void) memset(ik, 0, sizeof (*ik)); + + ik->iptun_kparam_linkid = params->iptun_param_linkid; + + if (params->iptun_param_flags & IPTUN_PARAM_TYPE) { + ik->iptun_kparam_type = iptuntype = params->iptun_param_type; + ik->iptun_kparam_flags |= IPTUN_KPARAM_TYPE; + } + + if (params->iptun_param_flags & (IPTUN_PARAM_LADDR|IPTUN_PARAM_RADDR)) { + if (iptuntype == IPTUN_TYPE_UNKNOWN) { + /* + * We need to get the type of this existing tunnel in + * order to validate and/or look up the right kind of + * IP address. + */ + tmpik.iptun_kparam_linkid = params->iptun_param_linkid; + status = i_iptun_ioctl(handle, IPTUN_INFO, &tmpik); + if (status != DLADM_STATUS_OK) + return (status); + iptuntype = tmpik.iptun_kparam_type; + } + + (void) memset(&hints, 0, sizeof (hints)); + switch (iptuntype) { + case IPTUN_TYPE_IPV4: + case IPTUN_TYPE_6TO4: + hints.ai_family = AF_INET; + break; + case IPTUN_TYPE_IPV6: + hints.ai_family = AF_INET6; + break; + } + } + + if (params->iptun_param_flags & IPTUN_PARAM_LADDR) { + if (getaddrinfo(params->iptun_param_laddr, NULL, &hints, &ai) != + 0) + return (DLADM_STATUS_BADIPTUNLADDR); + if (ai->ai_next != NULL) { + freeaddrinfo(ai); + return (DLADM_STATUS_BADIPTUNLADDR); + } + (void) memcpy(&ik->iptun_kparam_laddr, ai->ai_addr, + ai->ai_addrlen); + ik->iptun_kparam_flags |= IPTUN_KPARAM_LADDR; + freeaddrinfo(ai); + } + + if (params->iptun_param_flags & IPTUN_PARAM_RADDR) { + if (getaddrinfo(params->iptun_param_raddr, NULL, &hints, &ai) != + 0) + return (DLADM_STATUS_BADIPTUNRADDR); + if (ai->ai_next != NULL) { + freeaddrinfo(ai); + return (DLADM_STATUS_BADIPTUNRADDR); + } + (void) memcpy(&ik->iptun_kparam_raddr, ai->ai_addr, + ai->ai_addrlen); + ik->iptun_kparam_flags |= IPTUN_KPARAM_RADDR; + freeaddrinfo(ai); + } + + if (params->iptun_param_flags & IPTUN_PARAM_SECINFO) { + ik->iptun_kparam_secinfo = params->iptun_param_secinfo; + ik->iptun_kparam_flags |= IPTUN_KPARAM_SECINFO; + } + + return (DLADM_STATUS_OK); +} + +/* + * The inverse of i_iptun_kparams(). Given kernel tunnel paramaters as + * returned from an IPTUN_INFO ioctl, fill in tunnel parameters. + */ +static dladm_status_t +i_iptun_params(const iptun_kparams_t *ik, iptun_params_t *params) +{ + socklen_t salen; + + (void) memset(params, 0, sizeof (*params)); + + params->iptun_param_linkid = ik->iptun_kparam_linkid; + + if (ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE) { + params->iptun_param_type = ik->iptun_kparam_type; + params->iptun_param_flags |= IPTUN_PARAM_TYPE; + } + + if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR) { + salen = ik->iptun_kparam_laddr.ss_family == AF_INET ? + sizeof (struct sockaddr_in) : sizeof (struct sockaddr_in6); + if (getnameinfo((const struct sockaddr *) + &ik->iptun_kparam_laddr, salen, params->iptun_param_laddr, + sizeof (params->iptun_param_laddr), NULL, 0, + NI_NUMERICHOST) != 0) { + return (DLADM_STATUS_BADIPTUNLADDR); + } + params->iptun_param_flags |= IPTUN_PARAM_LADDR; + } + + if (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) { + salen = ik->iptun_kparam_raddr.ss_family == AF_INET ? + sizeof (struct sockaddr_in) : sizeof (struct sockaddr_in6); + if (getnameinfo((const struct sockaddr *) + &ik->iptun_kparam_raddr, salen, params->iptun_param_raddr, + sizeof (params->iptun_param_raddr), NULL, 0, + NI_NUMERICHOST) != 0) { + return (DLADM_STATUS_BADIPTUNRADDR); + } + params->iptun_param_flags |= IPTUN_PARAM_RADDR; + } + + if (ik->iptun_kparam_flags & IPTUN_KPARAM_SECINFO) { + params->iptun_param_secinfo = ik->iptun_kparam_secinfo; + params->iptun_param_flags |= IPTUN_PARAM_SECINFO; + } + + if (ik->iptun_kparam_flags & IPTUN_KPARAM_IMPLICIT) + params->iptun_param_flags |= IPTUN_PARAM_IMPLICIT; + + if (ik->iptun_kparam_flags & IPTUN_KPARAM_IPSECPOL) + params->iptun_param_flags |= IPTUN_PARAM_IPSECPOL; + + return (DLADM_STATUS_OK); +} + +dladm_status_t +i_iptun_get_sysparams(dladm_handle_t handle, iptun_params_t *params) +{ + dladm_status_t status = DLADM_STATUS_OK; + iptun_kparams_t ik; + + ik.iptun_kparam_linkid = params->iptun_param_linkid; + status = i_iptun_ioctl(handle, IPTUN_INFO, &ik); + if (status == DLADM_STATUS_OK) + status = i_iptun_params(&ik, params); + return (status); +} + +/* + * Read tunnel parameters from persistent storage. Note that the tunnel type + * is the only thing which must always be in the configuratioh. All other + * parameters (currently the source and destination addresses) may or may not + * have been configured, and therefore may not have been set. + */ +static dladm_status_t +i_iptun_get_dbparams(dladm_handle_t handle, iptun_params_t *params) +{ + dladm_status_t status; + dladm_conf_t conf; + datalink_class_t class; + uint64_t temp; + + /* First, make sure that this is an IP tunnel. */ + if ((status = dladm_datalink_id2info(handle, params->iptun_param_linkid, + NULL, &class, NULL, NULL, 0)) != DLADM_STATUS_OK) + return (status); + if (class != DATALINK_CLASS_IPTUN) + return (DLADM_STATUS_LINKINVAL); + + status = dladm_read_conf(handle, params->iptun_param_linkid, &conf); + if (status != DLADM_STATUS_OK) + return (status); + + params->iptun_param_flags = 0; + + if ((status = dladm_get_conf_field(handle, conf, IPTUN_CONF_TYPE, &temp, + sizeof (temp))) != DLADM_STATUS_OK) + goto done; + params->iptun_param_type = (iptun_type_t)temp; + params->iptun_param_flags |= IPTUN_PARAM_TYPE; + + if (dladm_get_conf_field(handle, conf, IPTUN_CONF_LADDR, + params->iptun_param_laddr, sizeof (params->iptun_param_laddr)) == + DLADM_STATUS_OK) + params->iptun_param_flags |= IPTUN_PARAM_LADDR; + + if (dladm_get_conf_field(handle, conf, IPTUN_CONF_RADDR, + params->iptun_param_raddr, sizeof (params->iptun_param_raddr)) == + DLADM_STATUS_OK) + params->iptun_param_flags |= IPTUN_PARAM_RADDR; + +done: + dladm_destroy_conf(handle, conf); + return (status); +} + +static dladm_status_t +i_iptun_create_sys(dladm_handle_t handle, iptun_params_t *params) +{ + iptun_kparams_t ik; + dladm_status_t status = DLADM_STATUS_OK; + + /* The tunnel type is required for creation. */ + if (!(params->iptun_param_flags & IPTUN_PARAM_TYPE)) + return (DLADM_STATUS_IPTUNTYPEREQD); + + if ((status = i_iptun_kparams(handle, params, &ik)) == DLADM_STATUS_OK) + status = i_iptun_ioctl(handle, IPTUN_CREATE, &ik); + return (status); +} + +static dladm_status_t +i_iptun_create_db(dladm_handle_t handle, const char *name, + iptun_params_t *params, uint32_t media) +{ + dladm_conf_t conf; + dladm_status_t status; + uint64_t storage; + + status = dladm_create_conf(handle, name, params->iptun_param_linkid, + DATALINK_CLASS_IPTUN, media, &conf); + if (status != DLADM_STATUS_OK) + return (status); + + assert(params->iptun_param_flags & IPTUN_PARAM_TYPE); + storage = params->iptun_param_type; + status = dladm_set_conf_field(handle, conf, IPTUN_CONF_TYPE, + DLADM_TYPE_UINT64, &storage); + if (status != DLADM_STATUS_OK) + goto done; + + if (params->iptun_param_flags & IPTUN_PARAM_LADDR) { + status = dladm_set_conf_field(handle, conf, IPTUN_CONF_LADDR, + DLADM_TYPE_STR, params->iptun_param_laddr); + if (status != DLADM_STATUS_OK) + goto done; + } + + if (params->iptun_param_flags & IPTUN_PARAM_RADDR) { + status = dladm_set_conf_field(handle, conf, IPTUN_CONF_RADDR, + DLADM_TYPE_STR, params->iptun_param_raddr); + if (status != DLADM_STATUS_OK) + goto done; + } + + status = dladm_write_conf(handle, conf); + +done: + dladm_destroy_conf(handle, conf); + return (status); +} + +static dladm_status_t +i_iptun_delete_sys(dladm_handle_t handle, datalink_id_t linkid) +{ + dladm_status_t status; + + status = i_iptun_ioctl(handle, IPTUN_DELETE, &linkid); + if (status != DLADM_STATUS_OK) + return (status); + (void) dladm_destroy_datalink_id(handle, linkid, DLADM_OPT_ACTIVE); + return (DLADM_STATUS_OK); +} + +static dladm_status_t +i_iptun_modify_sys(dladm_handle_t handle, const iptun_params_t *params) +{ + iptun_kparams_t ik; + dladm_status_t status; + + if ((status = i_iptun_kparams(handle, params, &ik)) == DLADM_STATUS_OK) + status = i_iptun_ioctl(handle, IPTUN_MODIFY, &ik); + return (status); +} + +static dladm_status_t +i_iptun_modify_db(dladm_handle_t handle, const iptun_params_t *params) +{ + dladm_conf_t conf; + dladm_status_t status; + + assert(params->iptun_param_flags & + (IPTUN_PARAM_LADDR|IPTUN_PARAM_RADDR)); + + /* + * The only parameters that can be modified persistently are the local + * and remote addresses. + */ + if (params->iptun_param_flags & ~(IPTUN_PARAM_LADDR|IPTUN_PARAM_RADDR)) + return (DLADM_STATUS_BADARG); + + status = dladm_read_conf(handle, params->iptun_param_linkid, &conf); + if (status != DLADM_STATUS_OK) + return (status); + + if (params->iptun_param_flags & IPTUN_PARAM_LADDR) { + status = dladm_set_conf_field(handle, conf, IPTUN_CONF_LADDR, + DLADM_TYPE_STR, (void *)params->iptun_param_laddr); + if (status != DLADM_STATUS_OK) + goto done; + } + + if (params->iptun_param_flags & IPTUN_PARAM_RADDR) { + status = dladm_set_conf_field(handle, conf, IPTUN_CONF_RADDR, + DLADM_TYPE_STR, (void *)params->iptun_param_raddr); + if (status != DLADM_STATUS_OK) + goto done; + } + + status = dladm_write_conf(handle, conf); + +done: + dladm_destroy_conf(handle, conf); + return (status); +} + +dladm_status_t +dladm_iptun_create(dladm_handle_t handle, const char *name, + iptun_params_t *params, uint32_t flags) +{ + dladm_status_t status; + uint32_t linkmgmt_flags = flags; + uint32_t media; + + if (!(params->iptun_param_flags & IPTUN_PARAM_TYPE)) + return (DLADM_STATUS_IPTUNTYPEREQD); + + switch (params->iptun_param_type) { + case IPTUN_TYPE_IPV4: + media = DL_IPV4; + break; + case IPTUN_TYPE_IPV6: + media = DL_IPV6; + break; + case IPTUN_TYPE_6TO4: + media = DL_6TO4; + break; + default: + return (DLADM_STATUS_IPTUNTYPE); + } + + status = dladm_create_datalink_id(handle, name, DATALINK_CLASS_IPTUN, + media, linkmgmt_flags, ¶ms->iptun_param_linkid); + if (status != DLADM_STATUS_OK) + return (status); + + if (flags & DLADM_OPT_PERSIST) { + status = i_iptun_create_db(handle, name, params, media); + if (status != DLADM_STATUS_OK) + goto done; + } + + if (flags & DLADM_OPT_ACTIVE) { + status = i_iptun_create_sys(handle, params); + if (status != DLADM_STATUS_OK && (flags & DLADM_OPT_PERSIST)) { + (void) dladm_remove_conf(handle, + params->iptun_param_linkid); + } + } + +done: + if (status != DLADM_STATUS_OK) { + (void) dladm_destroy_datalink_id(handle, + params->iptun_param_linkid, flags); + } + return (status); +} + +dladm_status_t +dladm_iptun_delete(dladm_handle_t handle, datalink_id_t linkid, uint32_t flags) +{ + dladm_status_t status; + datalink_class_t class; + + /* First, make sure that this is an IP tunnel. */ + if ((status = dladm_datalink_id2info(handle, linkid, NULL, &class, NULL, + NULL, 0)) != DLADM_STATUS_OK) + return (status); + if (class != DATALINK_CLASS_IPTUN) + return (DLADM_STATUS_LINKINVAL); + + if (flags & DLADM_OPT_ACTIVE) { + /* + * Note that if i_iptun_delete_sys() fails with + * DLADM_STATUS_NOTFOUND and the caller also wishes to delete + * the persistent configuration, we still fall through to the + * DLADM_OPT_PERSIST case in case the tunnel only exists + * persistently. + */ + status = i_iptun_delete_sys(handle, linkid); + if (status != DLADM_STATUS_OK && + (status != DLADM_STATUS_NOTFOUND || + !(flags & DLADM_OPT_PERSIST))) + return (status); + } + + if (flags & DLADM_OPT_PERSIST) { + (void) dladm_remove_conf(handle, linkid); + (void) dladm_destroy_datalink_id(handle, linkid, + DLADM_OPT_PERSIST); + } + return (DLADM_STATUS_OK); +} + +dladm_status_t +dladm_iptun_modify(dladm_handle_t handle, const iptun_params_t *params, + uint32_t flags) +{ + dladm_status_t status = DLADM_STATUS_OK; + iptun_params_t old_params; + + /* + * We can only modify the tunnel source, tunnel destination, or IPsec + * policy. + */ + if (!(params->iptun_param_flags & + (IPTUN_PARAM_LADDR|IPTUN_PARAM_RADDR|IPTUN_PARAM_SECINFO))) + return (DLADM_STATUS_BADARG); + + if (flags & DLADM_OPT_PERSIST) { + /* + * Before we change the database, save the old configuration + * so that we can revert back if an error occurs. + */ + old_params.iptun_param_linkid = params->iptun_param_linkid; + status = i_iptun_get_dbparams(handle, &old_params); + if (status != DLADM_STATUS_OK) + return (status); + /* we'll only need to revert the parameters being modified */ + old_params.iptun_param_flags = params->iptun_param_flags; + + status = i_iptun_modify_db(handle, params); + if (status != DLADM_STATUS_OK) + return (status); + } + + if (flags & DLADM_OPT_ACTIVE) { + status = i_iptun_modify_sys(handle, params); + if (status != DLADM_STATUS_OK && (flags & DLADM_OPT_PERSIST)) { + (void) i_iptun_modify_db(handle, &old_params); + } + } + + return (status); +} + +dladm_status_t +dladm_iptun_getparams(dladm_handle_t handle, iptun_params_t *params, + uint32_t flags) +{ + if (flags == DLADM_OPT_ACTIVE) + return (i_iptun_get_sysparams(handle, params)); + else if (flags == DLADM_OPT_PERSIST) + return (i_iptun_get_dbparams(handle, params)); + else + return (DLADM_STATUS_BADARG); +} + +static int +i_iptun_up(dladm_handle_t handle, datalink_id_t linkid, void *arg) +{ + dladm_status_t *statusp = arg; + dladm_status_t status; + iptun_params_t params; + boolean_t id_up = B_FALSE; + + status = dladm_up_datalink_id(handle, linkid); + if (status != DLADM_STATUS_OK) + goto done; + id_up = B_TRUE; + + (void) memset(¶ms, 0, sizeof (params)); + + params.iptun_param_linkid = linkid; + if ((status = i_iptun_get_dbparams(handle, ¶ms)) == DLADM_STATUS_OK) + status = i_iptun_create_sys(handle, ¶ms); +done: + if (statusp != NULL) + *statusp = status; + if (status != DLADM_STATUS_OK && id_up) { + (void) dladm_destroy_datalink_id(handle, linkid, + DLADM_OPT_ACTIVE); + } + return (DLADM_WALK_CONTINUE); +} + +static int +i_iptun_down(dladm_handle_t handle, datalink_id_t linkid, void *arg) +{ + dladm_status_t *statusp = arg; + dladm_status_t status; + + status = i_iptun_delete_sys(handle, linkid); + if (statusp != NULL) + *statusp = status; + return (DLADM_WALK_CONTINUE); +} + +/* ARGSUSED */ +dladm_status_t +dladm_iptun_up(dladm_handle_t handle, datalink_id_t linkid) +{ + dladm_status_t status = DLADM_STATUS_OK; + + if (linkid == DATALINK_ALL_LINKID) { + (void) dladm_walk_datalink_id(i_iptun_up, handle, NULL, + DATALINK_CLASS_IPTUN, DATALINK_ANY_MEDIATYPE, + DLADM_OPT_PERSIST); + } else { + (void) i_iptun_up(handle, linkid, &status); + } + return (status); +} + +dladm_status_t +dladm_iptun_down(dladm_handle_t handle, datalink_id_t linkid) +{ + dladm_status_t status = DLADM_STATUS_OK; + + if (linkid == DATALINK_ALL_LINKID) { + (void) dladm_walk_datalink_id(i_iptun_down, handle, NULL, + DATALINK_CLASS_IPTUN, DATALINK_ANY_MEDIATYPE, + DLADM_OPT_ACTIVE); + } else { + (void) i_iptun_down(handle, linkid, &status); + } + return (status); +} + +dladm_status_t +dladm_iptun_set6to4relay(dladm_handle_t handle, struct in_addr *relay) +{ + return (i_iptun_ioctl(handle, IPTUN_SET_6TO4RELAY, relay)); +} + +dladm_status_t +dladm_iptun_get6to4relay(dladm_handle_t handle, struct in_addr *relay) +{ + return (i_iptun_ioctl(handle, IPTUN_GET_6TO4RELAY, relay)); +} diff --git a/usr/src/lib/libdladm/common/libdliptun.h b/usr/src/lib/libdladm/common/libdliptun.h new file mode 100644 index 0000000000..287682f5a3 --- /dev/null +++ b/usr/src/lib/libdladm/common/libdliptun.h @@ -0,0 +1,75 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBDLIPTUN_H +#define _LIBDLIPTUN_H + +#include <netdb.h> +#include <sys/types.h> +#include <netinet/in.h> +#include <inet/iptun.h> +#include <libdladm.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct iptun_params { + datalink_id_t iptun_param_linkid; + uint_t iptun_param_flags; + iptun_type_t iptun_param_type; + char iptun_param_laddr[NI_MAXHOST]; /* local address */ + char iptun_param_raddr[NI_MAXHOST]; /* remote address */ + ipsec_req_t iptun_param_secinfo; +} iptun_params_t; + +/* iptun_param_flags */ +#define IPTUN_PARAM_TYPE 0x00000001 /* itp_type is set */ +#define IPTUN_PARAM_LADDR 0x00000002 /* itp_laddr is set */ +#define IPTUN_PARAM_RADDR 0x00000004 /* itp_raddr is set */ +#define IPTUN_PARAM_SECINFO 0x00000008 /* itp_secinfo is set */ +#define IPTUN_PARAM_IMPLICIT 0x00000010 /* implicitly created IP tunnel */ +#define IPTUN_PARAM_IPSECPOL 0x00000020 /* IPsec policy exists */ + +extern dladm_status_t dladm_iptun_create(dladm_handle_t, const char *, + iptun_params_t *, uint_t); +extern dladm_status_t dladm_iptun_delete(dladm_handle_t, datalink_id_t, + uint_t); +extern dladm_status_t dladm_iptun_modify(dladm_handle_t, + const iptun_params_t *, uint_t); +extern dladm_status_t dladm_iptun_getparams(dladm_handle_t, iptun_params_t *, + uint_t); +extern dladm_status_t dladm_iptun_up(dladm_handle_t, datalink_id_t); +extern dladm_status_t dladm_iptun_down(dladm_handle_t, datalink_id_t); +extern dladm_status_t dladm_iptun_set6to4relay(dladm_handle_t, + struct in_addr *); +extern dladm_status_t dladm_iptun_get6to4relay(dladm_handle_t, + struct in_addr *); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBDLIPTUN_H */ diff --git a/usr/src/lib/libdladm/common/libdllink.c b/usr/src/lib/libdladm/common/libdllink.c index c73e7e0b92..4c2ca93c8c 100644 --- a/usr/src/lib/libdladm/common/libdllink.c +++ b/usr/src/lib/libdladm/common/libdllink.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -379,28 +379,6 @@ dladm_linkduplex2str(link_duplex_t duplex, char *buf) } /* - * Set zoneid of a given link. Note that this function takes a link name - * argument instead of a linkid, because a data-link (and its linkid) could - * be created implicitly as the result of this function. - */ -dladm_status_t -dladm_setzid(dladm_handle_t handle, const char *dlname, char *zone_name) -{ - datalink_id_t linkid; - dladm_status_t status = DLADM_STATUS_OK; - - /* If the link does not exist, it is a ppa-hacked vlan. */ - status = dladm_name2info(handle, dlname, &linkid, NULL, NULL, NULL); - if (status != DLADM_STATUS_OK) - return (status); - - status = dladm_set_linkprop(handle, linkid, "zone", &zone_name, 1, - DLADM_OPT_ACTIVE); - - return (status); -} - -/* * Case 1: rename an existing link1 to a link2 that does not exist. * Result: <linkid1, link2> */ @@ -409,7 +387,6 @@ i_dladm_rename_link_c1(dladm_handle_t handle, datalink_id_t linkid1, const char *link1, const char *link2, uint32_t flags) { dld_ioc_rename_t dir; - dladm_conf_t conf; dladm_status_t status = DLADM_STATUS_OK; /* @@ -428,24 +405,9 @@ i_dladm_rename_link_c1(dladm_handle_t handle, datalink_id_t linkid1, } status = dladm_remap_datalink_id(handle, linkid1, link2); - if (status != DLADM_STATUS_OK) - goto done; - - /* - * Flush the current mapping to persistent configuration. - */ - if ((flags & DLADM_OPT_PERSIST) && - (((status = dladm_read_conf(handle, linkid1, &conf)) != - DLADM_STATUS_OK) || - ((status = dladm_write_conf(handle, conf)) != DLADM_STATUS_OK))) { - (void) dladm_remap_datalink_id(handle, linkid1, link1); - } -done: - if (flags & DLADM_OPT_ACTIVE) { - if (status != DLADM_STATUS_OK) { - (void) strlcpy(dir.dir_link, link1, MAXLINKNAMELEN); - (void) ioctl(dladm_dld_fd(handle), DLDIOC_RENAME, &dir); - } + if (status != DLADM_STATUS_OK && (flags & DLADM_OPT_ACTIVE)) { + (void) strlcpy(dir.dir_link, link1, MAXLINKNAMELEN); + (void) ioctl(dladm_dld_fd(handle), DLDIOC_RENAME, &dir); } return (status); } @@ -586,9 +548,9 @@ i_dladm_rename_link_c2(dladm_handle_t handle, datalink_id_t linkid1, /* * Delete link1 and mark link2 up. */ + (void) dladm_remove_conf(handle, linkid1); (void) dladm_destroy_datalink_id(handle, linkid1, DLADM_OPT_ACTIVE | DLADM_OPT_PERSIST); - (void) dladm_remove_conf(handle, linkid1); (void) dladm_up_datalink_id(handle, linkid2); /* @@ -801,9 +763,8 @@ i_dladm_phys_delete(dladm_handle_t handle, datalink_id_t linkid, void *arg) DLADM_OPT_PERSIST); } - (void) dladm_destroy_datalink_id(handle, linkid, DLADM_OPT_PERSIST); (void) dladm_remove_conf(handle, linkid); - + (void) dladm_destroy_datalink_id(handle, linkid, DLADM_OPT_PERSIST); done: del_phys_arg->rval = status; return (DLADM_WALK_CONTINUE); diff --git a/usr/src/lib/libdladm/common/libdllink.h b/usr/src/lib/libdladm/common/libdllink.h index 6e3b0c97a3..d47059e1d1 100644 --- a/usr/src/lib/libdladm/common/libdllink.h +++ b/usr/src/lib/libdladm/common/libdllink.h @@ -119,7 +119,6 @@ extern dladm_status_t dladm_walk(dladm_walkcb_t *, dladm_handle_t, void *, extern dladm_status_t dladm_mac_walk(dladm_walkcb_t *, void *); extern dladm_status_t dladm_info(dladm_handle_t, datalink_id_t, dladm_attr_t *); -extern dladm_status_t dladm_setzid(dladm_handle_t, const char *, char *); extern dladm_status_t dladm_rename_link(dladm_handle_t, const char *, const char *); diff --git a/usr/src/lib/libdladm/common/libdlmgmt.c b/usr/src/lib/libdladm/common/libdlmgmt.c index c642567fd5..db107ed094 100644 --- a/usr/src/lib/libdladm/common/libdlmgmt.c +++ b/usr/src/lib/libdladm/common/libdlmgmt.c @@ -31,6 +31,7 @@ #include <unistd.h> #include <string.h> #include <strings.h> +#include <zone.h> #include <sys/types.h> #include <sys/stat.h> #include <sys/aggr.h> @@ -607,3 +608,27 @@ dladm_destroy_conf(dladm_handle_t handle, dladm_conf_t conf) (void) dladm_door_call(handle, &destroyconf, sizeof (destroyconf), &retval, sizeof (retval)); } + +dladm_status_t +dladm_zone_boot(dladm_handle_t handle, zoneid_t zoneid) +{ + dlmgmt_door_zoneboot_t zoneboot; + dlmgmt_zoneboot_retval_t retval; + + zoneboot.ld_cmd = DLMGMT_CMD_ZONEBOOT; + zoneboot.ld_zoneid = zoneid; + return (dladm_door_call(handle, &zoneboot, sizeof (zoneboot), &retval, + sizeof (retval))); +} + +dladm_status_t +dladm_zone_halt(dladm_handle_t handle, zoneid_t zoneid) +{ + dlmgmt_door_zonehalt_t zonehalt; + dlmgmt_zonehalt_retval_t retval; + + zonehalt.ld_cmd = DLMGMT_CMD_ZONEHALT; + zonehalt.ld_zoneid = zoneid; + return (dladm_door_call(handle, &zonehalt, sizeof (zonehalt), &retval, + sizeof (retval))); +} diff --git a/usr/src/lib/libdladm/common/libdlmgmt.h b/usr/src/lib/libdladm/common/libdlmgmt.h index 35156c2f97..c170629647 100644 --- a/usr/src/lib/libdladm/common/libdlmgmt.h +++ b/usr/src/lib/libdladm/common/libdlmgmt.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -58,6 +58,8 @@ extern "C" { #define DLMGMT_CMD_DESTROYCONF (DLMGMT_CMD_BASE + 10) #define DLMGMT_CMD_GETATTR (DLMGMT_CMD_BASE + 11) #define DLMGMT_CMD_LINKPROP_GETNEXT (DLMGMT_CMD_BASE + 12) +#define DLMGMT_CMD_ZONEBOOT (DLMGMT_CMD_BASE + 13) +#define DLMGMT_CMD_ZONEHALT (DLMGMT_CMD_BASE + 14) typedef struct dlmgmt_door_createid_s { int ld_cmd; @@ -153,6 +155,11 @@ typedef struct dlmgmt_linkprop_getnext_retval_s { char lr_attrval[MAXLINKATTRVALLEN]; } dlmgmt_linkprop_getnext_retval_t; +typedef struct dlmgmt_door_zone_s { + int ld_cmd; + zoneid_t ld_zoneid; +} dlmgmt_door_zoneboot_t, dlmgmt_door_zonehalt_t; + typedef struct dlmgmt_retval_s dlmgmt_remapid_retval_t, dlmgmt_upid_retval_t, dlmgmt_destroyid_retval_t, @@ -160,7 +167,9 @@ typedef struct dlmgmt_retval_s dlmgmt_remapid_retval_t, dlmgmt_unsetattr_retval_t, dlmgmt_writeconf_retval_t, dlmgmt_removeconf_retval_t, - dlmgmt_destroyconf_retval_t; + dlmgmt_destroyconf_retval_t, + dlmgmt_zoneboot_retval_t, + dlmgmt_zonehalt_retval_t; typedef struct dlmgmt_linkid_retval_s dlmgmt_createid_retval_t; diff --git a/usr/src/lib/libdladm/common/libdlsim.c b/usr/src/lib/libdladm/common/libdlsim.c index 5368db7d0a..a588073bfb 100644 --- a/usr/src/lib/libdladm/common/libdlsim.c +++ b/usr/src/lib/libdladm/common/libdlsim.c @@ -138,6 +138,7 @@ i_dladm_get_simnet_info_persist(dladm_handle_t handle, dladm_conf_t conf; dladm_status_t status; char macstr[ETHERADDRL * 3]; + char simnetpeer[MAXLINKNAMELEN]; uint64_t u64; boolean_t mac_fixed; @@ -164,11 +165,13 @@ i_dladm_get_simnet_info_persist(dladm_handle_t handle, (void) dladm_aggr_str2macaddr(macstr, &mac_fixed, attrp->sna_mac_addr); /* Peer field is optional and only set when peer is attached */ - if (dladm_get_conf_field(handle, conf, FSIMNETPEER, &u64, - sizeof (u64)) == DLADM_STATUS_OK) - attrp->sna_peer_link_id = (datalink_id_t)u64; - else + if (dladm_get_conf_field(handle, conf, FSIMNETPEER, simnetpeer, + sizeof (simnetpeer)) == DLADM_STATUS_OK) { + status = dladm_name2info(handle, simnetpeer, + &attrp->sna_peer_link_id, NULL, NULL, NULL); + } else { attrp->sna_peer_link_id = DATALINK_INVALID_LINKID; + } done: dladm_destroy_conf(handle, conf); return (status); @@ -223,7 +226,7 @@ i_dladm_simnet_update_conf(dladm_handle_t handle, datalink_id_t simnet_id, { dladm_status_t status; dladm_conf_t conf; - uint64_t u64; + char simnetpeer[MAXLINKNAMELEN]; status = dladm_read_conf(handle, simnet_id, &conf); if (status != DLADM_STATUS_OK) @@ -232,12 +235,12 @@ i_dladm_simnet_update_conf(dladm_handle_t handle, datalink_id_t simnet_id, /* First clear previous peer if any in configuration */ (void) dladm_unset_conf_field(handle, conf, FSIMNETPEER); if (peer_simnet_id != DATALINK_INVALID_LINKID) { - u64 = peer_simnet_id; if ((status = dladm_datalink_id2info(handle, - peer_simnet_id, NULL, NULL, NULL, NULL, - 0)) == DLADM_STATUS_OK) + peer_simnet_id, NULL, NULL, NULL, simnetpeer, + sizeof (simnetpeer))) == DLADM_STATUS_OK) { status = dladm_set_conf_field(handle, conf, - FSIMNETPEER, DLADM_TYPE_UINT64, &u64); + FSIMNETPEER, DLADM_TYPE_STR, simnetpeer); + } if (status != DLADM_STATUS_OK) goto fail; } @@ -357,9 +360,9 @@ dladm_simnet_delete(dladm_handle_t handle, datalink_id_t simnet_id, } if (flags & DLADM_OPT_PERSIST) { + (void) dladm_remove_conf(handle, simnet_id); (void) dladm_destroy_datalink_id(handle, simnet_id, DLADM_OPT_PERSIST); - (void) dladm_remove_conf(handle, simnet_id); /* Update any attached peer configuration */ if (prevattr.sna_peer_link_id != DATALINK_INVALID_LINKID) diff --git a/usr/src/lib/libdladm/common/libdlvnic.c b/usr/src/lib/libdladm/common/libdlvnic.c index 2984c42dcc..74a4339c45 100644 --- a/usr/src/lib/libdladm/common/libdlvnic.c +++ b/usr/src/lib/libdladm/common/libdlvnic.c @@ -187,6 +187,7 @@ i_dladm_vnic_info_persist(dladm_handle_t handle, datalink_id_t linkid, dladm_conf_t conf; dladm_status_t status; char macstr[ETHERADDRL * 3]; + char linkover[MAXLINKNAMELEN]; uint64_t u64; datalink_class_t class; @@ -195,10 +196,19 @@ i_dladm_vnic_info_persist(dladm_handle_t handle, datalink_id_t linkid, DLADM_STATUS_OK) return (status); - status = dladm_get_conf_field(handle, conf, FLINKOVER, &u64, - sizeof (u64)); - attrp->va_link_id = ((status == DLADM_STATUS_OK) ? - (datalink_id_t)u64 : DATALINK_INVALID_LINKID); + status = dladm_get_conf_field(handle, conf, FLINKOVER, linkover, + sizeof (linkover)); + if (status != DLADM_STATUS_OK) { + /* + * This isn't an error, etherstubs don't have a FLINKOVER + * property. + */ + attrp->va_link_id = DATALINK_INVALID_LINKID; + } else { + if ((status = dladm_name2info(handle, linkover, + &attrp->va_link_id, NULL, NULL, NULL)) != DLADM_STATUS_OK) + goto done; + } status = dladm_get_conf_field(handle, conf, FHWRINGS, &attrp->va_hwrings, sizeof (boolean_t)); @@ -509,7 +519,6 @@ dladm_vnic_delete(dladm_handle_t handle, datalink_id_t linkid, uint32_t flags) { dladm_status_t status; datalink_class_t class; - dladm_vnic_attr_t attr; if (flags == 0) return (DLADM_STATUS_BADARG); @@ -528,10 +537,6 @@ dladm_vnic_delete(dladm_handle_t handle, datalink_id_t linkid, uint32_t flags) } if ((flags & DLADM_OPT_ACTIVE) != 0) { - status = dladm_vnic_info(handle, linkid, &attr, - DLADM_OPT_ACTIVE); - if (status != DLADM_STATUS_OK) - return (status); status = i_dladm_vnic_delete_sys(handle, linkid); if (status == DLADM_STATUS_OK) { (void) dladm_set_linkprop(handle, linkid, NULL, NULL, 0, @@ -544,9 +549,9 @@ dladm_vnic_delete(dladm_handle_t handle, datalink_id_t linkid, uint32_t flags) } } if ((flags & DLADM_OPT_PERSIST) != 0) { + (void) dladm_remove_conf(handle, linkid); (void) dladm_destroy_datalink_id(handle, linkid, DLADM_OPT_PERSIST); - (void) dladm_remove_conf(handle, linkid); } return (dladm_bridge_refresh(handle, linkid)); } @@ -589,6 +594,7 @@ dladm_vnic_persist_conf(dladm_handle_t handle, const char *name, dladm_conf_t conf = DLADM_INVALID_CONF; dladm_status_t status; char macstr[ETHERADDRL * 3]; + char linkover[MAXLINKNAMELEN]; uint64_t u64; if ((status = dladm_create_conf(handle, name, attrp->va_vnic_id, @@ -596,9 +602,12 @@ dladm_vnic_persist_conf(dladm_handle_t handle, const char *name, return (status); if (attrp->va_link_id != DATALINK_INVALID_LINKID) { - u64 = attrp->va_link_id; + status = dladm_datalink_id2info(handle, attrp->va_link_id, NULL, + NULL, NULL, linkover, sizeof (linkover)); + if (status != DLADM_STATUS_OK) + goto done; status = dladm_set_conf_field(handle, conf, FLINKOVER, - DLADM_TYPE_UINT64, &u64); + DLADM_TYPE_STR, linkover); if (status != DLADM_STATUS_OK) goto done; } diff --git a/usr/src/lib/libdladm/common/linkprop.c b/usr/src/lib/libdladm/common/linkprop.c index b0c0c32f45..d77707be99 100644 --- a/usr/src/lib/libdladm/common/linkprop.c +++ b/usr/src/lib/libdladm/common/linkprop.c @@ -54,6 +54,7 @@ #include <sys/mac_flow.h> #include <inttypes.h> #include <sys/ethernet.h> +#include <inet/iptun.h> #include <net/wpa.h> #include <sys/sysmacros.h> #include <sys/vlan.h> @@ -146,6 +147,7 @@ static pd_setf_t do_set_zone, do_set_rate_prop, set_stp_prop, set_bridge_forward, set_bridge_pvid; static pd_checkf_t do_check_zone, do_check_autopush, do_check_rate, + do_check_hoplimit, do_check_encaplim, i_dladm_uint32_check, do_check_maxbw, do_check_cpus, do_check_priority, check_stp_prop, check_bridge_pvid; @@ -329,6 +331,10 @@ static link_attr_t link_attr[] = { { MAC_PROP_TAGMODE, sizeof (link_tagmode_t), "tagmode"}, + { MAC_PROP_IPTUN_HOPLIMIT, sizeof (uint32_t), "hoplimit"}, + + { MAC_PROP_IPTUN_ENCAPLIMIT, sizeof (uint32_t), "encaplimit"}, + { MAC_PROP_PVID, sizeof (uint16_t), "default_tag"}, { MAC_PROP_LLIMIT, sizeof (uint32_t), "learn_limit"}, @@ -553,6 +559,14 @@ static prop_desc_t prop_table[] = { DATALINK_CLASS_PHYS | DATALINK_CLASS_AGGR | DATALINK_CLASS_VNIC, DL_ETHER }, + { "hoplimit", { "", 0 }, NULL, 0, + i_dladm_set_public_prop, i_dladm_range_get, i_dladm_uint32_get, + do_check_hoplimit, 0, DATALINK_CLASS_IPTUN, DATALINK_ANY_MEDIATYPE}, + + { "encaplimit", { "", 0 }, NULL, 0, + i_dladm_set_public_prop, i_dladm_range_get, i_dladm_uint32_get, + do_check_encaplim, 0, DATALINK_CLASS_IPTUN, DL_IPV6}, + { "forward", { "1", 1 }, link_01_vals, VALCNT(link_01_vals), set_bridge_forward, NULL, get_bridge_forward, NULL, PD_AFTER_PERM, @@ -1360,7 +1374,6 @@ do_set_zone(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, { dladm_status_t status = DLADM_STATUS_OK; zoneid_t zid_old, zid_new; - char link[MAXLINKNAMELEN]; char *cp; dld_ioc_macprop_t *dip; dld_ioc_zid_t *dzp; @@ -1380,75 +1393,25 @@ do_set_zone(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, free(dip); zid_new = dzp->diz_zid; - (void) strlcpy(link, dzp->diz_link, MAXLINKNAMELEN); - - /* Do nothing if setting to current value */ if (zid_new == zid_old) - return (status); + return (DLADM_STATUS_OK); - if (zid_new != GLOBAL_ZONEID) { - /* - * If the new zoneid is the global zone, we could destroy - * the link (in the case of an implicitly-created VLAN) as a - * result of setting the zoneid. In that case, we defer the - * operation to the end of this function to avoid recreating - * the VLAN and getting a different linkid during the rollback - * if other operation fails. - * - * Otherwise, this operation will hold a reference to the - * link and prevent a link renaming, so we need to do it - * before other operations. - */ - status = i_dladm_set_public_prop(handle, pdp, linkid, vdp, - val_cnt, flags, media); - if (status != DLADM_STATUS_OK) - return (status); - } + if ((status = i_dladm_set_public_prop(handle, pdp, linkid, vdp, val_cnt, + flags, media)) != DLADM_STATUS_OK) + return (status); + /* + * It is okay to fail to update the /dev entry (some vanity-named + * links do not have a /dev entry). + */ if (zid_old != GLOBAL_ZONEID) { - if (zone_remove_datalink(zid_old, link) != 0 && - errno != ENXIO) { - status = dladm_errno2status(errno); - goto rollback1; - } - - /* - * It is okay to fail to update the /dev entry (some - * vanity-named links do not have a /dev entry). - */ (void) i_dladm_update_deventry(handle, zid_old, linkid, B_FALSE); } - - if (zid_new != GLOBAL_ZONEID) { - if (zone_add_datalink(zid_new, link) != 0) { - status = dladm_errno2status(errno); - goto rollback2; - } - + if (zid_new != GLOBAL_ZONEID) (void) i_dladm_update_deventry(handle, zid_new, linkid, B_TRUE); - } else { - status = i_dladm_set_public_prop(handle, pdp, linkid, vdp, - val_cnt, flags, media); - if (status != DLADM_STATUS_OK) - goto rollback2; - } return (DLADM_STATUS_OK); - -rollback2: - if (zid_old != GLOBAL_ZONEID) - (void) i_dladm_update_deventry(handle, zid_old, linkid, B_TRUE); - if (zid_old != GLOBAL_ZONEID) - (void) zone_add_datalink(zid_old, link); -rollback1: - if (zid_new != GLOBAL_ZONEID) { - dzp->diz_zid = zid_old; - (void) i_dladm_set_public_prop(handle, pdp, linkid, vdp, - val_cnt, flags, media); - } - - return (status); } /* ARGSUSED */ @@ -1457,7 +1420,6 @@ do_check_zone(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t val_cnt, val_desc_t *vdp, datalink_media_t media) { char *zone_name; - char linkname[MAXLINKNAMELEN]; zoneid_t zoneid; dladm_status_t status = DLADM_STATUS_OK; dld_ioc_zid_t *dzp; @@ -1469,17 +1431,7 @@ do_check_zone(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, if (dzp == NULL) return (DLADM_STATUS_NOMEM); - if ((status = dladm_datalink_id2info(handle, linkid, NULL, NULL, NULL, - linkname, MAXLINKNAMELEN)) != DLADM_STATUS_OK) { - goto done; - } - zone_name = (prop_val != NULL) ? *prop_val : GLOBAL_ZONENAME; - if (strlen(linkname) > MAXLINKNAMELEN) { - status = DLADM_STATUS_BADVAL; - goto done; - } - if ((zoneid = getzoneidbyname(zone_name)) == -1) { status = DLADM_STATUS_BADVAL; goto done; @@ -1503,7 +1455,7 @@ do_check_zone(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, (void) memset(dzp, 0, sizeof (dld_ioc_zid_t)); dzp->diz_zid = zoneid; - (void) strlcpy(dzp->diz_link, linkname, MAXLINKNAMELEN); + dzp->diz_linkid = linkid; vdp->vd_val = (uintptr_t)dzp; return (DLADM_STATUS_OK); @@ -2349,7 +2301,7 @@ do_set_radio(dladm_handle_t handle, datalink_id_t linkid, /* ARGSUSED */ static dladm_status_t do_set_radio_prop(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - val_desc_t *vdp, uint_t val_cnt, uint_t fags, datalink_media_t media) + val_desc_t *vdp, uint_t val_cnt, uint_t flags, datalink_media_t media) { dladm_wlan_radio_t radio = (dladm_wlan_radio_t)vdp->vd_val; dladm_status_t status; @@ -2362,6 +2314,50 @@ do_set_radio_prop(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, return (status); } +/* ARGSUSED */ +static dladm_status_t +do_check_hoplimit(dladm_handle_t handle, prop_desc_t *pdp, + datalink_id_t linkid, char **prop_val, uint_t val_cnt, val_desc_t *vdp, + datalink_media_t media) +{ + int32_t hlim; + char *ep; + + if (val_cnt != 1) + return (DLADM_STATUS_BADVALCNT); + + errno = 0; + hlim = strtol(*prop_val, &ep, 10); + if (errno != 0 || ep == *prop_val || hlim < 1 || + hlim > (int32_t)UINT8_MAX) + return (DLADM_STATUS_BADVAL); + vdp->vd_val = hlim; + return (DLADM_STATUS_OK); +} + +/* ARGSUSED */ +static dladm_status_t +do_check_encaplim(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t val_cnt, val_desc_t *vdp, datalink_media_t media) +{ + int32_t elim; + char *ep; + + if (media != DL_IPV6) + return (DLADM_STATUS_BADARG); + + if (val_cnt != 1) + return (DLADM_STATUS_BADVALCNT); + + errno = 0; + elim = strtol(*prop_val, &ep, 10); + if (errno != 0 || ep == *prop_val || elim < 0 || + elim > (int32_t)UINT8_MAX) + return (DLADM_STATUS_BADVAL); + vdp->vd_val = elim; + return (DLADM_STATUS_OK); +} + static dladm_status_t i_dladm_set_linkprop_db(dladm_handle_t handle, datalink_id_t linkid, const char *prop_name, char **prop_val, uint_t val_cnt) diff --git a/usr/src/lib/libdladm/common/llib-ldladm b/usr/src/lib/libdladm/common/llib-ldladm index cc379d19b6..1020605a3c 100644 --- a/usr/src/lib/libdladm/common/llib-ldladm +++ b/usr/src/lib/libdladm/common/llib-ldladm @@ -31,6 +31,7 @@ #include <libdlwlan.h> #include <libdlvnic.h> #include <libdlvlan.h> +#include <libdliptun.h> #include <libdlmgmt.h> #include <libdlflow.h> #include <libdlstat.h> diff --git a/usr/src/lib/libdladm/common/mapfile-vers b/usr/src/lib/libdladm/common/mapfile-vers index 048b809751..81b5d363cc 100644 --- a/usr/src/lib/libdladm/common/mapfile-vers +++ b/usr/src/lib/libdladm/common/mapfile-vers @@ -45,7 +45,6 @@ SUNWprivate_1.1 { dladm_door_fd; dladm_info; dladm_walk; - dladm_setzid; dladm_status2str; dladm_linkstate2str; dladm_linkduplex2str; @@ -108,6 +107,14 @@ SUNWprivate_1.1 { dladm_wlan_str2auth; dladm_wlan_str2bsstype; dladm_wlan_str2linkstatus; + dladm_iptun_create; + dladm_iptun_delete; + dladm_iptun_modify; + dladm_iptun_getparams; + dladm_iptun_up; + dladm_iptun_down; + dladm_iptun_set6to4relay; + dladm_iptun_get6to4relay; dladm_vlan_create; dladm_vlan_delete; dladm_vlan_up; @@ -156,6 +163,8 @@ SUNWprivate_1.1 { dladm_walk_usage_time; dladm_usage_summary; dladm_usage_dates; + dladm_zone_boot; + dladm_zone_halt; dladm_flow_add; dladm_flow_remove; diff --git a/usr/src/lib/libdladm/libdladm.xcl b/usr/src/lib/libdladm/libdladm.xcl index 5070c9457c..5e276923e8 100644 --- a/usr/src/lib/libdladm/libdladm.xcl +++ b/usr/src/lib/libdladm/libdladm.xcl @@ -43,9 +43,12 @@ msgid "0x" msgid "bridging" msgid "config" msgid "debug" +msgid "encaplimit" msgid "force-protocol" msgid "forward-delay" msgid "hello-time" +msgid "hoplimit" +msgid "iptun" msgid "max-age" msgid "nickname" msgid "r" diff --git a/usr/src/lib/libdlpi/common/libdlpi.c b/usr/src/lib/libdlpi/common/libdlpi.c index 2dcf98c90a..bda2648955 100644 --- a/usr/src/lib/libdlpi/common/libdlpi.c +++ b/usr/src/lib/libdlpi/common/libdlpi.c @@ -55,7 +55,6 @@ static int i_dlpi_open(const char *, int *, uint_t, boolean_t); static int i_dlpi_style1_open(dlpi_impl_t *); static int i_dlpi_style2_open(dlpi_impl_t *); static int i_dlpi_checkstyle(dlpi_impl_t *, t_uscalar_t); -static int i_dlpi_remove_ppa(char *); static int i_dlpi_attach(dlpi_impl_t *); static void i_dlpi_passive(dlpi_impl_t *); @@ -134,7 +133,6 @@ int dlpi_open(const char *linkname, dlpi_handle_t *dhp, uint_t flags) { int retval, on = 1; - int cnt; ifspec_t ifsp; dlpi_impl_t *dip; @@ -166,14 +164,6 @@ dlpi_open(const char *linkname, dlpi_handle_t *dhp, uint_t flags) if (getenv("DLPI_DEVONLY") != NULL) dip->dli_oflags |= DLPI_DEVONLY; - if (!(flags & DLPI_DEVIPNET)) { - dip->dli_mod_cnt = ifsp.ifsp_modcnt; - for (cnt = 0; cnt != dip->dli_mod_cnt; cnt++) { - (void) strlcpy(dip->dli_modlist[cnt], - ifsp.ifsp_mods[cnt], DLPI_LINKNAME_MAX); - } - } - /* Copy linkname provided to the function. */ if (strlcpy(dip->dli_linkname, linkname, sizeof (dip->dli_linkname)) >= sizeof (dip->dli_linkname)) { @@ -1140,45 +1130,18 @@ i_dlpi_style1_open(dlpi_impl_t *dip) int retval, save_errno; int fd; - /* - * In order to support open of syntax like device[.module[.module...]] - * where modules need to be pushed onto the device stream, open only - * device name, otherwise open the full linkname. - */ - retval = i_dlpi_open((dip->dli_mod_cnt != 0) ? - dip->dli_provider : dip->dli_linkname, &fd, - dip->dli_oflags, B_TRUE); - - if (retval != DLPI_SUCCESS) { - dip->dli_mod_pushed = 0; + retval = i_dlpi_open(dip->dli_linkname, &fd, dip->dli_oflags, B_TRUE); + if (retval != DLPI_SUCCESS) return (retval); - } dip->dli_fd = fd; - /* - * Try to push modules (if any) onto the device stream. If I_PUSH - * fails, we increment count of modules pushed (dli_mod_pushed) - * expecting it is last module to be pushed and thus will be pushed - * in i_dlpi_style2_open(). - */ - for (dip->dli_mod_pushed = 0; dip->dli_mod_pushed < dip->dli_mod_cnt; - dip->dli_mod_pushed++) { - if (ioctl(fd, I_PUSH, - dip->dli_modlist[dip->dli_mod_pushed]) == -1) { - dip->dli_mod_pushed++; - return (DLPI_FAILURE); - } - } - if ((retval = i_dlpi_checkstyle(dip, DL_STYLE1)) != DLPI_SUCCESS) { save_errno = errno; (void) close(dip->dli_fd); errno = save_errno; - dip->dli_mod_pushed = 0; - return (retval); } - return (DLPI_SUCCESS); + return (retval); } /* @@ -1190,45 +1153,10 @@ i_dlpi_style2_open(dlpi_impl_t *dip) int fd; int retval, save_errno; - /* - * If style 1 open failed, we need to determine how far it got and - * finish up the open() call as a style 2 open. - * - * If no modules were pushed (mod_pushed == 0), then we need to - * open it as a style 2 link. - * - * If the pushing of the last module failed, we need to - * try pushing it as a style 2 module. Decrement dli_mod_pushed - * count so it can be pushed onto the stream. - * - * Otherwise we failed during the push of an intermediate module and - * must fail out and close the link. - */ - if (dip->dli_mod_pushed == 0) { - if ((retval = i_dlpi_open(dip->dli_provider, &fd, - dip->dli_oflags, B_FALSE)) != DLPI_SUCCESS) { - return (retval); - } - dip->dli_fd = fd; - } else if (dip->dli_mod_pushed == dip->dli_mod_cnt) { - if (i_dlpi_remove_ppa(dip->dli_modlist[dip->dli_mod_cnt - 1]) - != DLPI_SUCCESS) - return (DLPI_ELINKNAMEINVAL); - - dip->dli_mod_pushed--; - fd = dip->dli_fd; - } else { - return (DLPI_ELINKNAMEINVAL); - } - - /* Try and push modules (if any) onto the device stream. */ - for (; dip->dli_mod_pushed < dip->dli_mod_cnt; dip->dli_mod_pushed++) { - if (ioctl(fd, I_PUSH, - dip->dli_modlist[dip->dli_mod_pushed]) == -1) { - retval = DL_SYSERR; - goto failure; - } - } + retval = i_dlpi_open(dip->dli_provider, &fd, dip->dli_oflags, B_FALSE); + if (retval != DLPI_SUCCESS) + return (retval); + dip->dli_fd = fd; /* * Special case: DLPI_SERIAL flag (synchronous serial lines) is not a @@ -1248,10 +1176,8 @@ i_dlpi_style2_open(dlpi_impl_t *dip) return (DLPI_SUCCESS); attach: - if ((retval = i_dlpi_attach(dip)) != DLPI_SUCCESS) - goto failure; - - return (DLPI_SUCCESS); + if ((retval = i_dlpi_attach(dip)) == DLPI_SUCCESS) + return (DLPI_SUCCESS); failure: save_errno = errno; @@ -1278,25 +1204,6 @@ i_dlpi_checkstyle(dlpi_impl_t *dip, t_uscalar_t style) } /* - * Remove PPA from end of linkname. - * Return DLPI_SUCCESS if found, else return DLPI_FAILURE. - */ -static int -i_dlpi_remove_ppa(char *linkname) -{ - int i = strlen(linkname) - 1; - - if (i == -1 || !isdigit(linkname[i--])) - return (DLPI_FAILURE); - - while (i >= 0 && isdigit(linkname[i])) - i--; - - linkname[i + 1] = '\0'; - return (DLPI_SUCCESS); -} - -/* * For DLPI style 2 providers, an explicit attach of PPA is required. */ static int @@ -1842,6 +1749,13 @@ i_dlpi_notifyind_process(dlpi_impl_t *dip, dl_notify_ind_t *dlnotifyindp) notifinfo.dni_size = dlnotifyindp->dl_data; break; case DL_NOTE_PHYS_ADDR: + /* + * libdlpi currently only supports notifications for + * DL_CURR_PHYS_ADDR. + */ + if (dlnotifyindp->dl_data != DL_CURR_PHYS_ADDR) + return (DLPI_ENOTENOTSUP); + dataoff = dlnotifyindp->dl_addr_offset; datalen = dlnotifyindp->dl_addr_length; diff --git a/usr/src/lib/libdlpi/common/libdlpi_impl.h b/usr/src/lib/libdlpi/common/libdlpi_impl.h index c1d61ff1f8..70708ff5af 100644 --- a/usr/src/lib/libdlpi/common/libdlpi_impl.h +++ b/usr/src/lib/libdlpi/common/libdlpi_impl.h @@ -19,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _LIBDLPI_IMPL_H #define _LIBDLPI_IMPL_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <libdlpi.h> #include <sys/sysmacros.h> @@ -46,11 +44,6 @@ extern "C" { #define DLPI_SAPLEN_MAX 4 /* - * Maximum number of modules that can be pushed onto a device stream. - */ -#define DLPI_MODS_MAX 9 - -/* * Number of elements in 'arr'. */ #define NELEMS(arr) (sizeof (arr) / sizeof ((arr)[0])) @@ -124,10 +117,6 @@ typedef struct dlpi_impl_s { uint_t dli_sap; /* bound SAP value */ boolean_t dli_sapbefore; /* true if SAP precedes address */ uint_t dli_ppa; /* physical point of attachment */ - uint_t dli_mod_cnt; /* number of modules to be pushed */ - uint_t dli_mod_pushed; /* number of modules pushed */ - char dli_modlist[DLPI_MODS_MAX][DLPI_LINKNAME_MAX]; - /* array of mods */ uint_t dli_mactype; /* mac type */ uint_t dli_oflags; /* flags set at open */ uint_t dli_note_processing; diff --git a/usr/src/lib/libinetcfg/common/inetcfg.c b/usr/src/lib/libinetcfg/common/inetcfg.c index e1f09a881a..d5a27f23ea 100644 --- a/usr/src/lib/libinetcfg/common/inetcfg.c +++ b/usr/src/lib/libinetcfg/common/inetcfg.c @@ -44,9 +44,6 @@ #define ICFG_FAMILY(handle) handle->ifh_interface.if_protocol -#define ICFG_TUNNEL_PROTOCOL(protocol) \ - (protocol == IFTAP_IPV6) ? AF_INET6 : AF_INET - #define ICFG_SOCKADDR_LEN(protocol) \ (protocol == AF_INET) ? \ (socklen_t)sizeof (struct sockaddr_in) : \ @@ -66,12 +63,11 @@ static char *errmsgs[ICFG_NERR] = { /* 0 ICFG_SUCCESS */ "Success", /* 1 ICFG_FAILURE */ "Failure", -/* 2 ICFG_NOT_TUNNEL */ "Tunnel operation attempted on non-tunnel", -/* 3 ICFG_NOT_SET */ "Could not return non-existent value", -/* 4 ICFG_BAD_ADDR */ "Invalid Address", -/* 5 ICFG_BAD_PROT */ "Wrong protocol family for operation", -/* 6 ICFG_DAD_FAILED */ "Duplicate address detection failure", -/* 7 ICFG_DAD_FOUND */ "Duplicate address detected" +/* 2 ICFG_NOT_SET */ "Could not return non-existent value", +/* 3 ICFG_BAD_ADDR */ "Invalid Address", +/* 4 ICFG_BAD_PROT */ "Wrong protocol family for operation", +/* 5 ICFG_DAD_FAILED */ "Duplicate address detection failure", +/* 6 ICFG_DAD_FOUND */ "Duplicate address detected" }; /* @@ -156,110 +152,6 @@ to_sockaddr_storage(sa_family_t af, const struct sockaddr *addr, } /* - * Ensures that the tunnel parameter data for the tunnel associated with - * the handle is cached. If the 'force_update' argument is TRUE, then the - * cache should be updated. - * - * Returns: ICFG_SUCCESS, ICFG_NOT_TUNNEL or ICFG_FAILURE. - */ -static int -get_tunnel_params(icfg_handle_t handle, boolean_t force_update) -{ - struct iftun_req *params; - - if ((handle->ifh_tunnel_params != NULL) && (!force_update)) { - return (ICFG_SUCCESS); - } - - if (strchr(handle->ifh_interface.if_name, ICFG_LOGICAL_SEP) != NULL) { - return (ICFG_NOT_TUNNEL); - } - - if ((params = calloc(1, sizeof (struct iftun_req))) == NULL) { - return (ICFG_FAILURE); - } - - (void) strlcpy(params->ifta_lifr_name, handle->ifh_interface.if_name, - sizeof (params->ifta_lifr_name)); - - if (ioctl(handle->ifh_sock, SIOCGTUNPARAM, (caddr_t)params) < 0) { - free(params); - if ((errno == EOPNOTSUPP) || (errno == EINVAL)) { - return (ICFG_NOT_TUNNEL); - } - return (ICFG_FAILURE); - } - - /* - * We assert that the iftun_req version is the right one - * and that the lower and upper protocols are set to either - * IPv4 or IPv6. Otherwise, some of our APIs are buggy. - */ - assert((params->ifta_vers == IFTUN_VERSION) && - ((params->ifta_lower == IFTAP_IPV4) || - (params->ifta_lower == IFTAP_IPV6)) && - ((params->ifta_upper == IFTAP_IPV4) || - (params->ifta_upper == IFTAP_IPV6))); - - if (handle->ifh_tunnel_params != NULL) { - free(handle->ifh_tunnel_params); - } - handle->ifh_tunnel_params = params; - - return (ICFG_SUCCESS); -} - -/* - * Sets a tunnel destination or source address (depending upon 'type') on - * a tunnel interface. - * - * Returns: ICFG_SUCCESS, ICFG_NOT_TUNNEL or ICFG_FAILURE. - */ -static int -set_tunnel_address(icfg_handle_t handle, const struct sockaddr *addr, - socklen_t addrlen, int type) -{ - struct sockaddr_storage laddr; - sa_family_t lower_family; - struct iftun_req *params; - int ret; - - assert((type == IFTUN_SRC) || (type == IFTUN_DST)); - - if ((ret = get_tunnel_params(handle, B_TRUE)) != ICFG_SUCCESS) { - return (ret); - } - params = handle->ifh_tunnel_params; - - if (params->ifta_lower == IFTAP_IPV4) { - lower_family = AF_INET; - } else { - lower_family = AF_INET6; - } - - ret = to_sockaddr_storage(lower_family, addr, addrlen, &laddr); - if (ret != ICFG_SUCCESS) { - return (ret); - } - - if (type == IFTUN_SRC) { - params->ifta_saddr = laddr; - } else { - params->ifta_daddr = laddr; - } - - (void) strlcpy(params->ifta_lifr_name, handle->ifh_interface.if_name, - sizeof (params->ifta_lifr_name)); - params->ifta_flags |= type; - - if (ioctl(handle->ifh_sock, SIOCSTUNPARAM, (caddr_t)params) < 0) { - return (ICFG_FAILURE); - } - - return (ICFG_SUCCESS); -} - -/* * Return the appropriate error message for a given ICFG error. */ const char * @@ -311,7 +203,6 @@ icfg_open(icfg_handle_t *handle, const icfg_if_t *interface) loc_handle->ifh_sock = sock; loc_handle->ifh_interface = *interface; - loc_handle->ifh_tunnel_params = NULL; *handle = loc_handle; @@ -326,351 +217,10 @@ void icfg_close(icfg_handle_t handle) { (void) close(handle->ifh_sock); - if (handle->ifh_tunnel_params != NULL) { - free(handle->ifh_tunnel_params); - } free(handle); } /* - * Refreshes the tunnel parameter data cache associated with the interface - * represented by the handle. Tunnel parameter data is cached by the - * libinetcfg library by the first call to to any of the tunnel related APIs. - * Since there is no synchronization between consumers of the library and - * non-users of this library, the cache may contain stale data. Users may - * wish to use this API to refresh the cache before subsequent calls to the - * other tunnel related APIs. - * - * Returns: ICFG_SUCCESS, ICFG_NOT_TUNNEL or ICFG_FAILURE. - */ -int -icfg_refresh_tunnel_cache(icfg_handle_t handle) -{ - return (get_tunnel_params(handle, B_TRUE)); -} - -/* - * Sets the destination address for the tunnel interface represented - * by 'handle'. - * - * The 'addr' argument points to either a sockaddr_in structure - * (for IPv4) or a sockaddr_in6 structure (for IPv6) that holds - * the IP address. The 'addrlen' argument gives the length of the - * 'addr' structure. - * - * This API will always result in an update of the tunnel parameter - * data cache. - * - * Returns: ICFG_SUCCESS, ICFG_NOT_TUNNEL or ICFG_FAILURE. - */ -int -icfg_set_tunnel_dest(icfg_handle_t handle, const struct sockaddr *addr, - socklen_t addrlen) -{ - return (set_tunnel_address(handle, addr, addrlen, IFTUN_DST)); -} - -/* - * Sets the source address for the tunnel interface represented - * by 'handle'. - * - * The 'addr' argument points to either a sockaddr_in structure - * (for IPv4) or a sockaddr_in6 structure (for IPv6) that holds - * the IP address. The 'addrlen' argument gives the length of the - * 'addr' structure. - * - * This API will always result in an update of the tunnel parameter - * data cache. - * - * Returns: ICFG_SUCCESS, ICFG_NOT_TUNNEL or ICFG_FAILURE. - */ -int -icfg_set_tunnel_src(icfg_handle_t handle, const struct sockaddr *addr, - socklen_t addrlen) -{ - return (set_tunnel_address(handle, addr, addrlen, IFTUN_SRC)); -} - -/* - * Sets the hop limit for the tunnel interface represented by - * the handle to the value contained in the 'limit' argument. - * - * This API will always result in an update of the tunnel parameter data cache. - * - * Returns: ICFG_SUCCESS, ICFG_NOT_TUNNEL or ICFG_FAILURE. - */ -int -icfg_set_tunnel_hoplimit(icfg_handle_t handle, uint8_t limit) -{ - struct iftun_req *params; - int ret; - - if ((ret = get_tunnel_params(handle, B_TRUE)) != ICFG_SUCCESS) { - return (ret); - } - params = handle->ifh_tunnel_params; - - (void) strlcpy(params->ifta_lifr_name, handle->ifh_interface.if_name, - sizeof (params->ifta_lifr_name)); - - params->ifta_hop_limit = limit; - params->ifta_flags |= IFTUN_HOPLIMIT; - - if (ioctl(handle->ifh_sock, SIOCSTUNPARAM, (caddr_t)params) < 0) { - return (ICFG_FAILURE); - } - - return (ICFG_SUCCESS); -} - -/* - * Sets the encapsulation limit for the tunnel interface represented by - * the handle to the value contained in the 'limit' argument. If the - * value of the limit is negative, then the encapsulation limit is disabled. - * - * This API will always result in an update of the tunnel parameter data cache. - * - * Returns: ICFG_SUCCESS, ICFG_NOT_TUNNEL or ICFG_FAILURE. - */ -int -icfg_set_tunnel_encaplimit(icfg_handle_t handle, int16_t limit) -{ - struct iftun_req *params; - int ret; - - if ((ret = get_tunnel_params(handle, B_TRUE)) != ICFG_SUCCESS) { - return (ret); - } - params = handle->ifh_tunnel_params; - - (void) strlcpy(params->ifta_lifr_name, handle->ifh_interface.if_name, - sizeof (params->ifta_lifr_name)); - - params->ifta_encap_lim = limit; - params->ifta_flags |= IFTUN_ENCAP; - - if (ioctl(handle->ifh_sock, SIOCSTUNPARAM, (caddr_t)params) < 0) { - return (ICFG_FAILURE); - } - - return (ICFG_SUCCESS); -} - -/* - * Returns the source address for the tunnel interface represented - * by 'handle'. - * - * The 'addr' argument is a result parameter that is filled in with - * the requested address. The format of the 'addr' parameter is - * determined by the address family of the interface. - * - * The 'addrlen' argument is a value-result parameter. Initially, - * it contains the amount of space pointed to by 'addr'; on return - * it contains the length in bytes of the address returned. - * - * Note that if 'addrlen' is not large enough for the returned - * address value, then ICFG_FAILURE will be returned and errno - * will be set to ENOSPC. - * - * This API will retrieve the tunnel source value from the tunnel - * parameter data cache and will only update the cache if no data has - * yet been cached for this tunnel. - * - * Returns: ICFG_SUCCESS, ICFG_NOT_TUNNEL, ICFG_NOT_SET or - * ICFG_FAILURE. - */ -int -icfg_get_tunnel_src(icfg_handle_t handle, struct sockaddr *addr, - socklen_t *addrlen) -{ - struct iftun_req *params; - int ret; - - if ((ret = get_tunnel_params(handle, B_FALSE)) != ICFG_SUCCESS) { - return (ret); - } - params = handle->ifh_tunnel_params; - - if (!(params->ifta_flags & IFTUN_SRC)) { - return (ICFG_NOT_SET); - } - - if (params->ifta_lower == IFTAP_IPV4) { - assert(params->ifta_saddr.ss_family == AF_INET); - } else { - assert(params->ifta_saddr.ss_family == AF_INET6); - } - - return (to_sockaddr(params->ifta_saddr.ss_family, addr, addrlen, - ¶ms->ifta_saddr)); -} - -/* - * Returns the destination address for the tunnel interface - * represented by 'handle'. - * - * The 'addr' argument is a result parameter that is filled in - * with the requested address. The format of the 'addr' parameter - * is determined by the address family of the interface. - * - * The 'addrlen' argument is a value-result parameter. Initially, it - * contains the amount of space pointed to by 'addr'; on return it - * contains the length in bytes of the address returned. - * - * Note that if 'addrlen' is not large enough for the returned address - * value, then ICFG_FAILURE will be returned and errno will be set - * to ENOSPC. - * - * This API will retrieve the tunnel destination value from the tunnel - * parameter data cache and will only update the cache if no data has yet - * been cached for this tunnel. - * - * Returns: ICFG_SUCCESS, ICFG_NOT_TUNNEL, ICFG_NOT_SET or - * ICFG_FAILURE. - */ -int -icfg_get_tunnel_dest(icfg_handle_t handle, struct sockaddr *addr, - socklen_t *addrlen) -{ - struct iftun_req *params; - int ret; - - if ((ret = get_tunnel_params(handle, B_FALSE)) != ICFG_SUCCESS) { - return (ret); - } - params = handle->ifh_tunnel_params; - - if (!(params->ifta_flags & IFTUN_DST)) { - return (ICFG_NOT_SET); - } - - if (params->ifta_lower == IFTAP_IPV4) { - assert(params->ifta_daddr.ss_family == AF_INET); - } else if (params->ifta_lower == IFTAP_IPV6) { - assert(params->ifta_daddr.ss_family == AF_INET6); - } - - return (to_sockaddr(params->ifta_daddr.ss_family, addr, addrlen, - ¶ms->ifta_daddr)); -} - -/* - * Returns the tunnel hop limit (if any). The value of the limit - * will be copied into the buffer supplied by the 'limit' argument. - * - * This API will retrieve the hoplimit value from the tunnel parameter data - * cache and will only update the cache if no data has yet been cached for - * this tunnel. - * - * Returns: ICFG_SUCCESS, ICFG_NOT_TUNNEL, ICFG_NOT_SET or - * ICFG_FAILURE. - */ -int -icfg_get_tunnel_hoplimit(icfg_handle_t handle, uint8_t *limit) -{ - struct iftun_req *params; - int ret; - - if ((ret = get_tunnel_params(handle, B_FALSE)) != ICFG_SUCCESS) { - return (ret); - } - params = handle->ifh_tunnel_params; - - if (!(params->ifta_flags & IFTUN_HOPLIMIT)) { - return (ICFG_NOT_SET); - } - - *limit = params->ifta_hop_limit; - - return (ICFG_SUCCESS); -} - -/* - * Returns the tunnel encapsulation limit (if any). The value of the limit - * will be copied into the buffer supplied by the 'limit' argument. - * - * This API will retrieve the encapsulation limit value from the tunnel - * parameter data cache and will only update the cache if no data has yet - * been cached for this tunnel. - * - * Returns: ICFG_SUCCESS, ICFG_NOT_TUNNEL, ICFG_NOT_SET or - * ICFG_FAILURE. - */ -int -icfg_get_tunnel_encaplimit(icfg_handle_t handle, int16_t *limit) -{ - struct iftun_req *params; - int ret; - - if ((ret = get_tunnel_params(handle, B_FALSE)) != ICFG_SUCCESS) { - return (ret); - } - params = handle->ifh_tunnel_params; - - if (!(params->ifta_flags & IFTUN_ENCAP)) { - return (ICFG_NOT_SET); - } - - *limit = params->ifta_encap_lim; - - return (ICFG_SUCCESS); -} - -/* - * Returns the protocol family (AF_INET or AF_INET6) of the protocol - * actually being used to tunnel the data. The value of the protocol family - * will be copied into the buffer supplied by the 'protocol' argument. - * - * This API will retrieve the protocol value from the tunnel parameter data - * cache and will only update the cache if no data has yet been cached for - * this tunnel. - * - * Returns: ICFG_SUCCESS, ICFG_NOT_TUNNEL or ICFG_FAILURE. - */ -int -icfg_get_tunnel_lower(icfg_handle_t handle, int *protocol) -{ - struct iftun_req *params; - int ret; - - if ((ret = get_tunnel_params(handle, B_FALSE)) != ICFG_SUCCESS) { - return (ret); - } - params = handle->ifh_tunnel_params; - - *protocol = ICFG_TUNNEL_PROTOCOL(params->ifta_lower); - - return (ICFG_SUCCESS); -} - -/* - * Returns the protocol family (AF_INET or AF_INET6) of the protocol - * actually being tunneled. The value of the protocol family will be copied - * into the buffer supplied by the 'protocol' argument. - * - * This API will retrieve the protocolvalue from the tunnel parameter data - * cache and will only update the cache if no data has yet been cached for - * this tunnel. - * - * Returns: ICFG_SUCCESS, ICFG_NOT_TUNNEL or ICFG_FAILURE. - */ -int -icfg_get_tunnel_upper(icfg_handle_t handle, int *protocol) -{ - struct iftun_req *params; - int ret; - - if ((ret = get_tunnel_params(handle, B_FALSE)) != ICFG_SUCCESS) { - return (ret); - } - params = handle->ifh_tunnel_params; - - *protocol = ICFG_TUNNEL_PROTOCOL(params->ifta_upper); - - return (ICFG_SUCCESS); -} - -/* * Any time that flags are changed on an interface where either the new or the * existing flags have IFF_UP set, we'll get at least one RTM_IFINFO message to * announce the flag status. Typically, there are two such messages: one diff --git a/usr/src/lib/libinetcfg/common/inetcfg.h b/usr/src/lib/libinetcfg/common/inetcfg.h index 323fc09b95..fad1b9b0ca 100644 --- a/usr/src/lib/libinetcfg/common/inetcfg.h +++ b/usr/src/lib/libinetcfg/common/inetcfg.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2002 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _INETCFG_H #define _INETCFG_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/socket.h> #include <net/if.h> @@ -40,12 +37,11 @@ extern "C" { /* error codes */ #define ICFG_SUCCESS 0 /* API was successful */ #define ICFG_FAILURE 1 /* Generic failure */ -#define ICFG_NOT_TUNNEL 2 /* Tunnel operation attempted on non-tunnel */ -#define ICFG_NOT_SET 3 /* Could not return non-existent value */ -#define ICFG_BAD_ADDR 4 /* Invalid address */ -#define ICFG_BAD_PROT 5 /* Wrong protocol family for operation */ -#define ICFG_DAD_FAILED 6 /* Duplicate address detection failure */ -#define ICFG_DAD_FOUND 7 /* Duplicate address detected */ +#define ICFG_NOT_SET 2 /* Could not return non-existent value */ +#define ICFG_BAD_ADDR 3 /* Invalid address */ +#define ICFG_BAD_PROT 4 /* Wrong protocol family for operation */ +#define ICFG_DAD_FAILED 5 /* Duplicate address detection failure */ +#define ICFG_DAD_FOUND 6 /* Duplicate address detected */ #define ICFG_NERR (ICFG_DAD_FOUND + 1) @@ -61,25 +57,11 @@ typedef struct icfg_if { typedef struct icfg_handle { int ifh_sock; /* socket to interface */ icfg_if_t ifh_interface; /* interface definition */ - struct iftun_req *ifh_tunnel_params; /* tunnel parameters */ } *icfg_handle_t; extern const char *icfg_errmsg(int); extern int icfg_open(icfg_handle_t *, const icfg_if_t *); extern void icfg_close(icfg_handle_t); -extern int icfg_refresh_tunnel_cache(icfg_handle_t); -extern int icfg_set_tunnel_dest(icfg_handle_t, const struct sockaddr *, - socklen_t); -extern int icfg_set_tunnel_src(icfg_handle_t, const struct sockaddr *, - socklen_t); -extern int icfg_set_tunnel_hoplimit(icfg_handle_t, uint8_t); -extern int icfg_set_tunnel_encaplimit(icfg_handle_t, int16_t); -extern int icfg_get_tunnel_dest(icfg_handle_t, struct sockaddr *, socklen_t *); -extern int icfg_get_tunnel_src(icfg_handle_t, struct sockaddr *, socklen_t *); -extern int icfg_get_tunnel_hoplimit(icfg_handle_t, uint8_t *); -extern int icfg_get_tunnel_encaplimit(icfg_handle_t, int16_t *); -extern int icfg_get_tunnel_lower(icfg_handle_t, int *); -extern int icfg_get_tunnel_upper(icfg_handle_t, int *); extern int icfg_set_flags(icfg_handle_t, uint64_t); extern int icfg_set_metric(icfg_handle_t, int); extern int icfg_set_mtu(icfg_handle_t, uint_t); diff --git a/usr/src/lib/libinetcfg/common/mapfile-vers b/usr/src/lib/libinetcfg/common/mapfile-vers index b8ea188f89..3ecc82bd63 100644 --- a/usr/src/lib/libinetcfg/common/mapfile-vers +++ b/usr/src/lib/libinetcfg/common/mapfile-vers @@ -54,16 +54,9 @@ SUNWprivate_1.1 { icfg_get_netmask; icfg_get_subnet; icfg_get_token; - icfg_get_tunnel_dest; - icfg_get_tunnel_encaplimit; - icfg_get_tunnel_hoplimit; - icfg_get_tunnel_lower; - icfg_get_tunnel_src; - icfg_get_tunnel_upper; icfg_is_logical; icfg_iterate_if; icfg_open; - icfg_refresh_tunnel_cache; icfg_set_addr; icfg_set_broadcast; icfg_set_dest_addr; @@ -75,10 +68,6 @@ SUNWprivate_1.1 { icfg_set_prefixlen; icfg_set_subnet; icfg_set_token; - icfg_set_tunnel_dest; - icfg_set_tunnel_encaplimit; - icfg_set_tunnel_hoplimit; - icfg_set_tunnel_src; icfg_sockaddr_to_str; icfg_str_to_sockaddr; local: diff --git a/usr/src/lib/libinetutil/common/ifspec.c b/usr/src/lib/libinetutil/common/ifspec.c index 157b497efd..7cf41591c5 100644 --- a/usr/src/lib/libinetutil/common/ifspec.c +++ b/usr/src/lib/libinetutil/common/ifspec.c @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * This file contains a routine used to validate a ifconfig-style interface * specification @@ -92,7 +90,7 @@ getppa(const char *bp, int bpsize, uint_t *ppa) for (tp = ep; tp >= bp && isdigit(*tp); tp--) /* Null body */; - if (*tp == '.' || *tp == ':') { + if (*tp == ':') { errno = EINVAL; return (-1); } @@ -103,75 +101,34 @@ getppa(const char *bp, int bpsize, uint_t *ppa) /* * Given an ifconfig-style inet relative-path interface specification - * (e.g: hme.[module].[module][PPA]:2), validate its form and decompose the - * contents into a dynamically allocated ifspec_t. + * (e.g: bge0:2), validate its form and decompose the contents into a + * dynamically allocated ifspec_t. * * Returns ifspec_t for success, NULL pointer if spec is malformed. */ boolean_t ifparse_ifspec(const char *ifname, ifspec_t *ifsp) { - char *mp, *ep, *lp, *tp; - char *ifnamecp; - size_t iflen; - boolean_t have_ppa = B_FALSE; + char *lp, *tp; + char ifnamecp[LIFNAMSIZ]; - iflen = strlen(ifname); - if (iflen > LIFNAMSIZ) { + /* snag a copy we can modify */ + if (strlcpy(ifnamecp, ifname, LIFNAMSIZ) >= LIFNAMSIZ) { errno = EINVAL; return (B_FALSE); } - /* snag a copy we can modify */ - ifnamecp = alloca(iflen + 1); - (void) strlcpy(ifnamecp, ifname, iflen + 1); - ifsp->ifsp_lunvalid = B_FALSE; /* * An interface name must have the format of: - * dev[.module[.module...]][ppa][:lun] - * - * where the ppa must be specified at the end of the interface name. - * e.g. ip.foo.tun0 + * dev[ppa][:lun] * * lun - logical unit number. - * - * Produce substrings for each grouping, starting first with modules, - * then lun, devname, and finally ppa. */ - /* Any modules? */ - mp = strchr(ifnamecp, '.'); - /* Any logical units? */ lp = strchr(ifnamecp, ':'); - - if (lp != NULL && mp != NULL && lp < mp) { - errno = EINVAL; - return (B_FALSE); - } - - ifsp->ifsp_modcnt = 0; - if (mp != NULL) { - *mp++ = '\0'; - if (lp != NULL) - *lp = '\0'; - while (mp != NULL && ifsp->ifsp_modcnt <= IFSP_MAXMODS) { - if ((ep = strchr(mp, '.')) != NULL) - *ep++ = '\0'; - (void) strlcpy(ifsp->ifsp_mods[ifsp->ifsp_modcnt++], - mp, LIFNAMSIZ); - mp = ep; - } - if (lp != NULL) - *lp = ':'; - if (ifsp->ifsp_modcnt > IFSP_MAXMODS) { - errno = E2BIG; - return (B_FALSE); - } - } - if (lp != NULL) { if (getlun(lp, strlen(lp), &ifsp->ifsp_lun) != 0) return (B_FALSE); @@ -180,25 +137,17 @@ ifparse_ifspec(const char *ifname, ifspec_t *ifsp) (void) strlcpy(ifsp->ifsp_devnm, ifnamecp, LIFNAMSIZ); - /* - * Find ppa - has to be part of devname or if modules exist part of - * last module name. - */ - if (ifsp->ifsp_modcnt != 0 && - getppa(ifsp->ifsp_mods[ifsp->ifsp_modcnt - 1], - strlen(ifsp->ifsp_mods[ifsp->ifsp_modcnt - 1]), - &ifsp->ifsp_ppa) == 0) { - have_ppa = B_TRUE; - } else if (ifsp->ifsp_modcnt == 0 && - getppa(ifsp->ifsp_devnm, strlen(ifsp->ifsp_devnm), - &ifsp->ifsp_ppa) == 0) { - have_ppa = B_TRUE; - - /* strip the ppa off of the device name if present */ - for (tp = &ifsp->ifsp_devnm[strlen(ifsp->ifsp_devnm) - 1]; - tp >= ifsp->ifsp_devnm && isdigit(*tp); tp--) - *tp = '\0'; + /* Find ppa */ + if (getppa(ifsp->ifsp_devnm, strlen(ifsp->ifsp_devnm), + &ifsp->ifsp_ppa) != 0) { + return (B_FALSE); + } + + /* strip the ppa off of the device name if present */ + for (tp = &ifsp->ifsp_devnm[strlen(ifsp->ifsp_devnm) - 1]; + tp >= ifsp->ifsp_devnm && isdigit(*tp); tp--) { + *tp = '\0'; } - return (have_ppa); + return (B_TRUE); } diff --git a/usr/src/lib/libinetutil/common/libinetutil.h b/usr/src/lib/libinetutil/common/libinetutil.h index 0bece07e07..bacf64938a 100644 --- a/usr/src/lib/libinetutil/common/libinetutil.h +++ b/usr/src/lib/libinetutil/common/libinetutil.h @@ -43,15 +43,11 @@ extern "C" { #if !defined(_KERNEL) && !defined(_BOOT) -#define IFSP_MAXMODS 9 /* Max modules that can be pushed on if */ - typedef struct { uint_t ifsp_ppa; /* Physical Point of Attachment */ uint_t ifsp_lun; /* Logical Unit number */ boolean_t ifsp_lunvalid; /* TRUE if lun is valid */ - int ifsp_modcnt; /* Number of modules to be pushed */ char ifsp_devnm[LIFNAMSIZ]; /* only the device name */ - char ifsp_mods[IFSP_MAXMODS][LIFNAMSIZ]; /* table of mods */ } ifspec_t; extern boolean_t ifparse_ifspec(const char *, ifspec_t *); diff --git a/usr/src/lib/libinetutil/common/ofmt.c b/usr/src/lib/libinetutil/common/ofmt.c index 2f9fe3f91d..ccae05091c 100644 --- a/usr/src/lib/libinetutil/common/ofmt.c +++ b/usr/src/lib/libinetutil/common/ofmt.c @@ -167,11 +167,12 @@ splitfree(split_t *sp) * Open a handle to be used for printing formatted output. */ ofmt_status_t -ofmt_open(const char *str, ofmt_field_t *template, uint_t flags, +ofmt_open(const char *str, const ofmt_field_t *template, uint_t flags, uint_t maxcols, ofmt_handle_t *ofmt) { split_t *sp; uint_t i, j, of_index; + const ofmt_field_t *ofp; ofmt_field_t *of; ofmt_state_t *os; int nfields = 0; @@ -192,7 +193,7 @@ ofmt_open(const char *str, ofmt_field_t *template, uint_t flags, } if (template == NULL) return (OFMT_ENOTEMPLATE); - for (of = template; of->of_name != NULL; of++) + for (ofp = template; ofp->of_name != NULL; ofp++) nfields++; /* * split str into the columns selected, or construct the diff --git a/usr/src/lib/libinetutil/common/ofmt.h b/usr/src/lib/libinetutil/common/ofmt.h index ff03b80ae0..a477847917 100644 --- a/usr/src/lib/libinetutil/common/ofmt.h +++ b/usr/src/lib/libinetutil/common/ofmt.h @@ -165,7 +165,7 @@ typedef struct ofmt_field_s { * for the handle are freed by ofmt_close(); */ typedef struct ofmt_state_s *ofmt_handle_t; -extern ofmt_status_t ofmt_open(const char *, ofmt_field_t *, uint_t, +extern ofmt_status_t ofmt_open(const char *, const ofmt_field_t *, uint_t, uint_t, ofmt_handle_t *); #define OFMT_PARSABLE 0x00000001 /* machine parsable mode */ diff --git a/usr/src/pkgdefs/SUNWckr/prototype_com b/usr/src/pkgdefs/SUNWckr/prototype_com index 282a0ce818..30679b7037 100644 --- a/usr/src/pkgdefs/SUNWckr/prototype_com +++ b/usr/src/pkgdefs/SUNWckr/prototype_com @@ -91,6 +91,8 @@ f none kernel/drv/ipnet.conf 644 root sys f none kernel/drv/ippctl.conf 644 root sys f none kernel/drv/ipsecah.conf 644 root sys f none kernel/drv/ipsecesp.conf 644 root sys +f none kernel/drv/iptun.conf 644 root sys +f none kernel/drv/iptunq.conf 644 root sys f none kernel/drv/iwscn.conf 644 root sys f none kernel/drv/keysock.conf 644 root sys f none kernel/drv/kmdb.conf 644 root sys diff --git a/usr/src/pkgdefs/SUNWckr/prototype_i386 b/usr/src/pkgdefs/SUNWckr/prototype_i386 index 83e8fec867..01877af5dd 100644 --- a/usr/src/pkgdefs/SUNWckr/prototype_i386 +++ b/usr/src/pkgdefs/SUNWckr/prototype_i386 @@ -100,6 +100,8 @@ f none kernel/drv/ipnet 755 root sys f none kernel/drv/ippctl 755 root sys f none kernel/drv/ipsecah 755 root sys f none kernel/drv/ipsecesp 755 root sys +f none kernel/drv/iptun 755 root sys +f none kernel/drv/iptunq 755 root sys f none kernel/drv/iwscn 755 root sys f none kernel/drv/kb8042 755 root sys f none kernel/drv/keysock 755 root sys @@ -180,7 +182,10 @@ f none kernel/genunix 755 root sys f none kernel/ipp/ipgpc 755 root sys d none boot/acpi 755 root sys d none boot/acpi/tables 755 root sys +f none kernel/mac/mac_6to4 755 root sys f none kernel/mac/mac_ether 755 root sys +f none kernel/mac/mac_ipv4 755 root sys +f none kernel/mac/mac_ipv6 755 root sys f none kernel/mac/mac_wifi 755 root sys f none kernel/mac/mac_ib 755 root sys d none kernel/misc/scsi_vhci 755 root sys @@ -236,9 +241,7 @@ f none kernel/misc/tem 755 root sys f none kernel/misc/tlimod 755 root sys f none kernel/sched/TS 755 root sys f none kernel/sched/TS_DPTBL 755 root sys -f none kernel/strmod/6to4tun 755 root sys l none kernel/strmod/arp=../../kernel/drv/arp -f none kernel/strmod/atun 755 root sys f none kernel/strmod/bufmod 755 root sys f none kernel/strmod/connld 755 root sys f none kernel/strmod/dedump 755 root sys @@ -259,7 +262,6 @@ l none kernel/strmod/tcp=../../kernel/drv/tcp f none kernel/strmod/timod 755 root sys f none kernel/strmod/tirdwr 755 root sys f none kernel/strmod/ttcompat 755 root sys -f none kernel/strmod/tun 755 root sys l none kernel/strmod/udp=../../kernel/drv/udp l none kernel/sys/autofs=../../kernel/fs/autofs f none kernel/sys/c2audit 755 root sys @@ -321,6 +323,8 @@ f none kernel/drv/amd64/ipnet 755 root sys f none kernel/drv/amd64/ippctl 755 root sys f none kernel/drv/amd64/ipsecah 755 root sys f none kernel/drv/amd64/ipsecesp 755 root sys +f none kernel/drv/amd64/iptun 755 root sys +f none kernel/drv/amd64/iptunq 755 root sys f none kernel/drv/amd64/iwscn 755 root sys f none kernel/drv/amd64/kb8042 755 root sys f none kernel/drv/amd64/keysock 755 root sys @@ -396,7 +400,10 @@ f none kernel/fs/amd64/ufs 755 root sys d none kernel/ipp/amd64 755 root sys f none kernel/ipp/amd64/ipgpc 755 root sys d none kernel/mac/amd64 755 root sys +f none kernel/mac/amd64/mac_6to4 755 root sys f none kernel/mac/amd64/mac_ether 755 root sys +f none kernel/mac/amd64/mac_ipv4 755 root sys +f none kernel/mac/amd64/mac_ipv6 755 root sys f none kernel/mac/amd64/mac_wifi 755 root sys f none kernel/mac/amd64/mac_ib 755 root sys d none kernel/misc/scsi_vhci/amd64 755 root sys @@ -456,9 +463,7 @@ d none kernel/sched/amd64 755 root sys f none kernel/sched/amd64/TS 755 root sys f none kernel/sched/amd64/TS_DPTBL 755 root sys d none kernel/strmod/amd64 755 root sys -f none kernel/strmod/amd64/6to4tun 755 root sys l none kernel/strmod/amd64/arp=../../../kernel/drv/amd64/arp -f none kernel/strmod/amd64/atun 755 root sys f none kernel/strmod/amd64/bufmod 755 root sys f none kernel/strmod/amd64/connld 755 root sys f none kernel/strmod/amd64/dedump 755 root sys @@ -479,7 +484,6 @@ l none kernel/strmod/amd64/tcp=../../../kernel/drv/amd64/tcp f none kernel/strmod/amd64/timod 755 root sys f none kernel/strmod/amd64/tirdwr 755 root sys f none kernel/strmod/amd64/ttcompat 755 root sys -f none kernel/strmod/amd64/tun 755 root sys l none kernel/strmod/amd64/udp=../../../kernel/drv/amd64/udp d none kernel/sys/amd64 755 root sys l none kernel/sys/amd64/autofs=../../../kernel/fs/amd64/autofs diff --git a/usr/src/pkgdefs/SUNWckr/prototype_sparc b/usr/src/pkgdefs/SUNWckr/prototype_sparc index f35c4068ed..ae54f53a4a 100644 --- a/usr/src/pkgdefs/SUNWckr/prototype_sparc +++ b/usr/src/pkgdefs/SUNWckr/prototype_sparc @@ -93,6 +93,8 @@ f none kernel/drv/sparcv9/ip6 755 root sys f none kernel/drv/sparcv9/ippctl 755 root sys f none kernel/drv/sparcv9/ipsecah 755 root sys f none kernel/drv/sparcv9/ipsecesp 755 root sys +f none kernel/drv/sparcv9/iptun 755 root sys +f none kernel/drv/sparcv9/iptunq 755 root sys f none kernel/drv/sparcv9/isp 755 root sys f none kernel/drv/sparcv9/iwscn 755 root sys f none kernel/drv/sparcv9/kb8042 755 root sys @@ -166,7 +168,10 @@ f none kernel/fs/sparcv9/ufs 755 root sys d none kernel/ipp/sparcv9 755 root sys f none kernel/ipp/sparcv9/ipgpc 755 root sys d none kernel/mac/sparcv9 755 root sys +f none kernel/mac/sparcv9/mac_6to4 755 root sys f none kernel/mac/sparcv9/mac_ether 755 root sys +f none kernel/mac/sparcv9/mac_ipv4 755 root sys +f none kernel/mac/sparcv9/mac_ipv6 755 root sys f none kernel/mac/sparcv9/mac_wifi 755 root sys f none kernel/mac/sparcv9/mac_ib 755 root sys f none kernel/misc/sparcv9/bignum 755 root sys @@ -221,9 +226,7 @@ d none kernel/sched/sparcv9 755 root sys f none kernel/sched/sparcv9/TS 755 root sys f none kernel/sched/sparcv9/TS_DPTBL 755 root sys d none kernel/strmod/sparcv9 755 root sys -f none kernel/strmod/sparcv9/6to4tun 755 root sys l none kernel/strmod/sparcv9/arp=../../../kernel/drv/sparcv9/arp -f none kernel/strmod/sparcv9/atun 755 root sys f none kernel/strmod/sparcv9/bufmod 755 root sys f none kernel/strmod/sparcv9/connld 755 root sys f none kernel/strmod/sparcv9/dedump 755 root sys @@ -245,7 +248,6 @@ l none kernel/strmod/sparcv9/tcp=../../../kernel/drv/sparcv9/tcp f none kernel/strmod/sparcv9/timod 755 root sys f none kernel/strmod/sparcv9/tirdwr 755 root sys f none kernel/strmod/sparcv9/ttcompat 755 root sys -f none kernel/strmod/sparcv9/tun 755 root sys l none kernel/strmod/sparcv9/udp=../../../kernel/drv/sparcv9/udp f none kernel/strmod/sparcv9/vuid3ps2 755 root sys d none kernel/sys/sparcv9 755 root sys diff --git a/usr/src/pkgdefs/SUNWcsr/prototype_com b/usr/src/pkgdefs/SUNWcsr/prototype_com index aa4f57b0b1..e6d8f7d321 100644 --- a/usr/src/pkgdefs/SUNWcsr/prototype_com +++ b/usr/src/pkgdefs/SUNWcsr/prototype_com @@ -363,6 +363,7 @@ f none lib/svc/method/ldap-client 0555 root bin f none lib/svc/method/manifest-import 0555 root bin f none lib/svc/method/mpxio-upgrade 0555 root bin f none lib/svc/method/net-init 0555 root bin +f none lib/svc/method/net-iptun 0555 root bin f none lib/svc/method/net-loopback 0555 root bin f none lib/svc/method/net-nwam 0555 root bin f none lib/svc/method/net-physical 0555 root bin @@ -506,6 +507,7 @@ f manifest var/svc/manifest/network/inetd.xml 0444 root sys f manifest var/svc/manifest/network/inetd-upgrade.xml 0444 root sys f seedmanifest var/svc/manifest/network/dlmgmt.xml 0444 root sys f manifest var/svc/manifest/network/network-initial.xml 0444 root sys +f manifest var/svc/manifest/network/network-iptun.xml 0444 root sys f manifest var/svc/manifest/network/network-loopback.xml 0444 root sys f manifest var/svc/manifest/network/network-physical.xml 0444 root sys f manifest var/svc/manifest/network/network-routing-setup.xml 0444 root sys diff --git a/usr/src/pkgdefs/common_files/i.minorperm_i386 b/usr/src/pkgdefs/common_files/i.minorperm_i386 index 9f4bc70380..5fee4e04ff 100644 --- a/usr/src/pkgdefs/common_files/i.minorperm_i386 +++ b/usr/src/pkgdefs/common_files/i.minorperm_i386 @@ -360,7 +360,6 @@ dmfe:* mxfe:* bmc:bmc dld:* -aggr:* smbios:smbios zfs:* zfs:zfs @@ -386,6 +385,7 @@ domcaps:* evtchn:* privcmd:* xenbus:* +iptunq:* fm:* amd_iommu:* xpvtap:* diff --git a/usr/src/pkgdefs/common_files/i.minorperm_sparc b/usr/src/pkgdefs/common_files/i.minorperm_sparc index 733937e054..be5d16c7c3 100644 --- a/usr/src/pkgdefs/common_files/i.minorperm_sparc +++ b/usr/src/pkgdefs/common_files/i.minorperm_sparc @@ -333,7 +333,6 @@ dlpistub:* cpuid:self ntwdt:* dld:* -aggr:* mdesc:* zfs:* zfs:zfs @@ -349,6 +348,7 @@ smbsrv:* vscan:* nsmb:* bmc:bmc +iptunq:* fm:* clone:bridge EOF diff --git a/usr/src/pkgdefs/etc/exception_list_i386 b/usr/src/pkgdefs/etc/exception_list_i386 index 1f881159c6..df3c4ff3f4 100644 --- a/usr/src/pkgdefs/etc/exception_list_i386 +++ b/usr/src/pkgdefs/etc/exception_list_i386 @@ -98,6 +98,7 @@ usr/include/sys/mac_flow_impl.h i386 usr/include/sys/mac_impl.h i386 usr/include/sys/mac_provider.h i386 usr/include/sys/mac_soft_ring.h i386 +usr/include/inet/iptun.h i386 # # Private GLDv3 userland libraries and headers # @@ -106,6 +107,7 @@ usr/include/libdlaggr.h i386 usr/include/libdlether.h i386 usr/include/libdlflow.h i386 usr/include/libdlflow_impl.h i386 +usr/include/libdliptun.h i386 usr/include/libdlmgmt.h i386 usr/include/libdlsim.h i386 usr/include/libdlstat.h i386 @@ -371,7 +373,6 @@ usr/include/inet/ip_impl.h i386 usr/include/inet/ip_ndp.h i386 usr/include/inet/ip2mac.h i386 usr/include/inet/ip2mac_impl.h i386 -usr/include/inet/tun.h i386 usr/include/protocols/ripngd.h i386 usr/include/libmail.h i386 usr/include/s_string.h i386 @@ -390,7 +391,6 @@ usr/include/sys/sha1.h i386 usr/include/sys/sha1_consts.h i386 usr/include/sys/sha2.h i386 usr/include/sys/sha2_consts.h i386 -usr/include/inet/tun.h i386 # # Filtering out directories not shipped # diff --git a/usr/src/pkgdefs/etc/exception_list_sparc b/usr/src/pkgdefs/etc/exception_list_sparc index aafc7af9d0..528230ba9a 100644 --- a/usr/src/pkgdefs/etc/exception_list_sparc +++ b/usr/src/pkgdefs/etc/exception_list_sparc @@ -87,6 +87,7 @@ usr/include/sys/mac_flow_impl.h sparc usr/include/sys/mac_impl.h sparc usr/include/sys/mac_provider.h sparc usr/include/sys/mac_soft_ring.h sparc +usr/include/inet/iptun.h sparc # # Private GLDv3 userland libraries and headers # @@ -95,6 +96,7 @@ usr/include/libdlaggr.h sparc usr/include/libdlether.h sparc usr/include/libdlflow.h sparc usr/include/libdlflow_impl.h sparc +usr/include/libdliptun.h sparc usr/include/libdlmgmt.h sparc usr/include/libdlsim.h sparc usr/include/libdlstat.h sparc @@ -360,7 +362,6 @@ usr/include/inet/ip_impl.h sparc usr/include/inet/ip_ndp.h sparc usr/include/inet/ip2mac.h sparc usr/include/inet/ip2mac_impl.h sparc -usr/include/inet/tun.h sparc usr/include/protocols/ripngd.h sparc usr/include/libmail.h sparc usr/include/s_string.h sparc @@ -379,7 +380,6 @@ usr/include/sys/sha1.h sparc usr/include/sys/sha1_consts.h sparc usr/include/sys/sha2.h sparc usr/include/sys/sha2_consts.h sparc -usr/include/inet/tun.h sparc # # The following are win and graphics related also ignore and do NOT replace. # diff --git a/usr/src/tools/scripts/bfu.sh b/usr/src/tools/scripts/bfu.sh index 962226d667..c9fa13e192 100644 --- a/usr/src/tools/scripts/bfu.sh +++ b/usr/src/tools/scripts/bfu.sh @@ -2890,6 +2890,28 @@ if $ZCAT $cpiodir/generic.root$ZFIX | cpio -it 2>/dev/null | \ fi # +# The Clearview IP Tunneling project changes the format of the +# /etc/dladm/datalink.conf file. The conversion is done in the +# dlmgmtd daemon, so there is no backwards conversion when bfu'ing +# backwards. The solution is to have bfu save the old file away when +# bfu'ing across this project, and restore it when bfu'ing back. +# +datalink_file=$root/etc/dladm/datalink.conf +datalink_backup=$root/etc/dladm/datalink.conf.bfusave +datalink_action=none +if [[ -f $datalink_file ]]; then + iptun_exists=false + if archive_file_exists generic.kernel "kernel/drv/iptun.conf"; then + iptun_exists=true + fi + if [[ ! -f $root/kernel/drv/iptun.conf ]] && $iptun_exists; then + datalink_action=save + elif [[ -f $root/kernel/drv/iptun.conf ]] && ! $iptun_exists; then + datalink_action=restore + fi +fi + +# # Check whether the build is boot-archive or ufsboot sparc # boot based on the existence of a generic.boot archive # @@ -7860,6 +7882,25 @@ mondo_loop() { rm -f $root/kernel/drv/softmac rm -f $root/kernel/drv/sparcv9/softmac rm -f $root/kernel/drv/amd64/softmac + rm -f $root/kernel/drv/iptun.conf + rm -f $root/kernel/drv/iptun + rm -f $root/kernel/drv/sparcv9/iptun + rm -f $root/kernel/drv/amd64/iptun + rm -f $root/kernel/drv/iptunq.conf + rm -f $root/kernel/drv/iptunq + rm -f $root/kernel/drv/sparcv9/iptunq + rm -f $root/kernel/drv/amd64/iptunq + + # Remove obsolete tunneling STREAMS modules + rm -f $root/kernel/strmod/6to4tun + rm -f $root/kernel/strmod/sparcv9/6to4tun + rm -f $root/kernel/strmod/amd64/6to4tun + rm -f $root/kernel/strmod/atun + rm -f $root/kernel/strmod/sparcv9/atun + rm -f $root/kernel/strmod/amd64/atun + rm -f $root/kernel/strmod/tun + rm -f $root/kernel/strmod/sparcv9/tun + rm -f $root/kernel/strmod/amd64/tun # # Remove libtopo platform XML files that have been replaced by propmap @@ -7953,6 +7994,12 @@ mondo_loop() { done fi + if [[ $datalink_action = "save" ]]; then + cp -p $datalink_file $datalink_backup + elif [[ $datalink_action = "restore" && -f $datalink_backup ]]; then + mv $datalink_backup $datalink_file + fi + # End of pre-archive extraction hacks. if [ $diskless = no -a $zone = global ]; then diff --git a/usr/src/ucbhead/sys/types.h b/usr/src/ucbhead/sys/types.h index 43f3eceb18..8d198dc129 100644 --- a/usr/src/ucbhead/sys/types.h +++ b/usr/src/ucbhead/sys/types.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -42,8 +41,6 @@ #ifndef _SYS_TYPES_H #define _SYS_TYPES_H -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Include fixed width type declarations proposed by the ISO/JTC1/SC22/WG14 C * committee's working draft for the revision of the current ISO C standard, @@ -352,6 +349,8 @@ typedef id_t poolid_t; /* pool ID type */ typedef id_t zoneid_t; /* zone ID type */ typedef id_t ctid_t; /* contract ID type */ +typedef uint32_t datalink_id_t; + typedef ulong_t dev_t; /* expanded device type */ #if !defined(_LP64) && defined(__cplusplus) diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 7fac595b5d..6a80a808d8 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -526,7 +526,7 @@ IP_OBJS += igmp.o ipmp.o ip.o ip6.o ip6_asp.o ip6_if.o ip6_ire.o ip6_rts.o \ ipddi.o ipdrop.o mi.o nd.o optcom.o snmpcom.o ipsec_loader.o \ spd.o ipclassifier.o inet_common.o ip_squeue.o squeue.o \ ip_sadb.o ip_ftable.o proto_set.o radix.o ip_dummy.o \ - ip_helper_stream.o\ + ip_helper_stream.o iptunq.o \ $(IP_ICMP_OBJS) \ $(IP_RTS_OBJS) \ $(IP_TCP_OBJS) \ @@ -572,12 +572,6 @@ SDP_SOCK_MOD_OBJS += sockmod_sdp.o socksdp.o socksdpsubr.o SCTP_SOCK_MOD_OBJS += sockmod_sctp.o socksctp.o socksctpsubr.o -TUN_OBJS += tun.o - -ATUN_OBJS += atun.o - -6TO4TUN_OBJS += 6to4tun.o - RDS_OBJS += rdsddi.o rdssubr.o rds_opt.o rds_ioctl.o RDSIB_OBJS += rdsib.o rdsib_ib.o rdsib_cm.o rdsib_ep.o rdsib_buf.o \ @@ -616,12 +610,22 @@ MAC_OBJS += mac.o mac_bcast.o mac_client.o mac_datapath_setup.o mac_flow.o mac_hio.o mac_mod.o mac_ndd.o mac_provider.o mac_sched.o \ mac_soft_ring.o mac_stat.o mac_util.o +MAC_6TO4_OBJS += mac_6to4.o + MAC_ETHER_OBJS += mac_ether.o +MAC_IPV4_OBJS += mac_ipv4.o + +MAC_IPV6_OBJS += mac_ipv6.o + MAC_WIFI_OBJS += mac_wifi.o MAC_IB_OBJS += mac_ib.o +IPTUN_OBJS += iptun_dev.o iptun_ctl.o iptun.o + +IPTUNQ_OBJS += iptunq_ddi.o + AGGR_OBJS += aggr_dev.o aggr_ctl.o aggr_grp.o aggr_port.o \ aggr_send.o aggr_recv.o aggr_lacp.o diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules index e31217049f..06115e976b 100644 --- a/usr/src/uts/common/Makefile.rules +++ b/usr/src/uts/common/Makefile.rules @@ -459,6 +459,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/inet/ipnet/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) +$(OBJS_DIR)/%.o: $(UTSBASE)/common/inet/iptun/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + $(OBJS_DIR)/%.o: $(UTSBASE)/common/inet/kssl/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) @@ -1770,6 +1774,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/ip/%.c $(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/ipnet/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/iptun/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) + $(LINTS_DIR)/%.ln: $(UTSBASE)/common/inet/ipf/%.c @($(LHEAD) $(LINT.c) $(IPFFLAGS) $< $(LTAIL)) diff --git a/usr/src/uts/common/fs/dev/sdev_netops.c b/usr/src/uts/common/fs/dev/sdev_netops.c index bd7bf5058b..a4c4ab168d 100644 --- a/usr/src/uts/common/fs/dev/sdev_netops.c +++ b/usr/src/uts/common/fs/dev/sdev_netops.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -44,16 +44,6 @@ struct vnodeops *devnet_vnodeops; /* - * Called by zone_walk_datalink() to see if the given link name belongs to the - * given zone. Returns 0 to continue the walk, -1 if the link name is found. - */ -static int -devnet_validate_name(const char *link, void *arg) -{ - return ((strcmp(link, arg) == 0) ? -1 : 0); -} - -/* * Check if a net sdev_node is still valid - i.e. it represents a current * network link. * This serves two purposes @@ -65,19 +55,19 @@ devnet_validate_name(const char *link, void *arg) int devnet_validate(struct sdev_node *dv) { - char *nm = dv->sdev_name; datalink_id_t linkid; + zoneid_t zoneid; ASSERT(!(dv->sdev_flags & SDEV_STALE)); ASSERT(dv->sdev_state == SDEV_READY); - if (SDEV_IS_GLOBAL(dv)) { - return ((dls_mgmt_get_linkid(nm, &linkid) != 0) ? - SDEV_VTOR_INVALID : SDEV_VTOR_VALID); - } else { - return ((zone_datalink_walk(getzoneid(), devnet_validate_name, - nm) == -1) ? SDEV_VTOR_VALID : SDEV_VTOR_INVALID); - } + if (dls_mgmt_get_linkid(dv->sdev_name, &linkid) != 0) + return (SDEV_VTOR_INVALID); + if (SDEV_IS_GLOBAL(dv)) + return (SDEV_VTOR_VALID); + zoneid = getzoneid(); + return (zone_check_datalink(&zoneid, linkid) == 0 ? + SDEV_VTOR_VALID : SDEV_VTOR_INVALID); } /* @@ -219,14 +209,19 @@ failed: } static int -devnet_filldir_datalink(const char *link, void *arg) +devnet_filldir_datalink(datalink_id_t linkid, void *arg) { - struct sdev_node *ddv = arg; - struct vattr vattr; - struct sdev_node *dv; - dls_dl_handle_t ddh = NULL; + struct sdev_node *ddv = arg; + struct vattr vattr; + struct sdev_node *dv; + dls_dl_handle_t ddh = NULL; + char link[MAXLINKNAMELEN]; ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); + + if (dls_mgmt_get_linkinfo(linkid, link, NULL, NULL, NULL) != 0) + return (0); + if ((dv = sdev_cache_lookup(ddv, (char *)link)) != NULL) goto found; @@ -259,7 +254,6 @@ static void devnet_filldir(struct sdev_node *ddv) { sdev_node_t *dv, *next; - char link[MAXLINKNAMELEN]; datalink_id_t linkid; ASSERT(RW_READ_HELD(&ddv->sdev_contents)); @@ -302,12 +296,8 @@ devnet_filldir(struct sdev_node *ddv) do { linkid = dls_mgmt_get_next(linkid, DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE, DLMGMT_ACTIVE); - - if ((linkid != DATALINK_INVALID_LINKID) && - (dls_mgmt_get_linkinfo(linkid, link, - NULL, NULL, NULL) == 0)) { - (void) devnet_filldir_datalink(link, ddv); - } + if (linkid != DATALINK_INVALID_LINKID) + (void) devnet_filldir_datalink(linkid, ddv); } while (linkid != DATALINK_INVALID_LINKID); } else { (void) zone_datalink_walk(getzoneid(), diff --git a/usr/src/uts/common/inet/Makefile b/usr/src/uts/common/inet/Makefile index 85b139c424..052c010aea 100644 --- a/usr/src/uts/common/inet/Makefile +++ b/usr/src/uts/common/inet/Makefile @@ -29,10 +29,10 @@ include ../../../Makefile.master HDRS= arp.h arp_impl.h common.h ipclassifier.h ip.h ip6.h ipdrop.h ipnet.h \ - ipsecah.h ipsecesp.h ipsec_info.h ip6_asp.h ip_if.h ip_ire.h \ + ipsecah.h ipsecesp.h ipsec_info.h iptun.h ip6_asp.h ip_if.h ip_ire.h \ ip_multi.h ip_netinfo.h ip_ndp.h ip_rts.h ipsec_impl.h keysock.h \ led.h mi.h mib2.h nd.h optcom.h sadb.h sctp_itf.h snmpcom.h tcp.h \ - tcp_sack.h tcp_stack.h tun.h udp_impl.h rawip_impl.h ipp_common.h \ + tcp_sack.h tcp_stack.h udp_impl.h rawip_impl.h ipp_common.h \ ip_ftable.h ip_impl.h ip_stack.h tcp_impl.h wifi_ioctl.h \ ip2mac.h ip2mac_impl.h diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h index ef9e739b43..98a4621956 100644 --- a/usr/src/uts/common/inet/ip.h +++ b/usr/src/uts/common/inet/ip.h @@ -471,6 +471,13 @@ typedef enum { #define ICMP_ADDRESS_MASK_REQUEST 17 #define ICMP_ADDRESS_MASK_REPLY 18 +/* Evaluates to true if the ICMP type is an ICMP error */ +#define ICMP_IS_ERROR(type) ( \ + (type) == ICMP_DEST_UNREACHABLE || \ + (type) == ICMP_SOURCE_QUENCH || \ + (type) == ICMP_TIME_EXCEEDED || \ + (type) == ICMP_PARAM_PROBLEM) + /* ICMP_TIME_EXCEEDED codes */ #define ICMP_TTL_EXCEEDED 0 #define ICMP_REASSEMBLY_TIME_EXCEEDED 1 @@ -563,7 +570,7 @@ typedef struct ipha_s { struct ill_s; -typedef boolean_t ip_v6intfid_func_t(struct ill_s *, in6_addr_t *); +typedef void ip_v6intfid_func_t(struct ill_s *, in6_addr_t *); typedef boolean_t ip_v6mapinfo_func_t(uint_t, uint8_t *, uint8_t *, uint32_t *, in6_addr_t *); typedef boolean_t ip_v4mapinfo_func_t(uint_t, uint8_t *, uint8_t *, uint32_t *, @@ -573,9 +580,12 @@ typedef boolean_t ip_v4mapinfo_func_t(uint_t, uint8_t *, uint8_t *, uint32_t *, typedef struct ip_m_s { t_uscalar_t ip_m_mac_type; /* From <sys/dlpi.h> */ int ip_m_type; /* From <net/if_types.h> */ + t_uscalar_t ip_m_ipv4sap; + t_uscalar_t ip_m_ipv6sap; ip_v4mapinfo_func_t *ip_m_v4mapinfo; ip_v6mapinfo_func_t *ip_m_v6mapinfo; ip_v6intfid_func_t *ip_m_v6intfid; + ip_v6intfid_func_t *ip_m_v6destintfid; } ip_m_t; /* @@ -595,12 +605,10 @@ typedef struct ip_m_s { #define MEDIA_V6MINFO(ip_m, plen, bphys, maddr, hwxp, v6ptr) \ (((ip_m)->ip_m_v6mapinfo != NULL) && \ (*(ip_m)->ip_m_v6mapinfo)(plen, bphys, maddr, hwxp, v6ptr)) -#define MEDIA_V6INTFID(ip_m, ill, v6ptr) \ - (((ip_m)->ip_m_v6intfid != NULL) && \ - (*(ip_m)->ip_m_v6intfid)(ill, v6ptr)) +/* ip_m_v6*intfid return void and are never NULL */ +#define MEDIA_V6INTFID(ip_m, ill, v6ptr) (ip_m)->ip_m_v6intfid(ill, v6ptr) #define MEDIA_V6DESTINTFID(ip_m, ill, v6ptr) \ - (((ip_m)->ip_m_v6destintfid != NULL) && \ - (*(ip_m)->ip_m_v6destintfid)(ill, v6ptr)) + (ip_m)->ip_m_v6destintfid(ill, v6ptr) /* Router entry types */ #define IRE_BROADCAST 0x0001 /* Route entry for broadcast address */ @@ -1959,12 +1967,14 @@ typedef struct ill_s { mblk_t *ill_promiscoff_mp; /* for ill_leave_allmulti() */ mblk_t *ill_dlpi_deferred; /* b_next chain of control messages */ mblk_t *ill_ardeact_mp; /* deact mp from ipmp_ill_activate() */ + mblk_t *ill_dest_addr_mp; /* mblk which holds ill_dest_addr */ mblk_t *ill_replumb_mp; /* replumb mp from ill_replumb() */ mblk_t *ill_phys_addr_mp; /* mblk which holds ill_phys_addr */ #define ill_last_mp_to_free ill_phys_addr_mp cred_t *ill_credp; /* opener's credentials */ uint8_t *ill_phys_addr; /* ill_phys_addr_mp->b_rptr + off */ + uint8_t *ill_dest_addr; /* ill_dest_addr_mp->b_rptr + off */ uint_t ill_state_flags; /* see ILL_* flags above */ @@ -1978,15 +1988,15 @@ typedef struct ill_s { ill_ifname_pending : 1, ill_join_allmulti : 1, ill_logical_down : 1, - ill_is_6to4tun : 1, /* Interface is a 6to4 tunnel */ - ill_dl_up : 1, + ill_up_ipifs : 1, ill_note_link : 1, /* supports link-up notification */ ill_capab_reneg : 1, /* capability renegotiation to be done */ ill_dld_capab_inprog : 1, /* direct dld capab call in prog */ + ill_need_recover_multicast : 1, - ill_pad_to_bit_31 : 18; + ill_pad_to_bit_31 : 19; /* Following bit fields protected by ill_lock */ uint_t @@ -1997,7 +2007,10 @@ typedef struct ill_s { ill_arp_bringup_pending : 1, ill_arp_extend : 1, /* ARP has DAD extensions */ - ill_pad_bit_31 : 26; + ill_manual_token : 1, /* system won't override ill_token */ + ill_manual_linklocal : 1, /* system won't auto-conf linklocal */ + + ill_pad_bit_31 : 24; /* * Used in SIOCSIFMUXID and SIOCGIFMUXID for 'ifconfig unplumb'. @@ -2030,7 +2043,7 @@ typedef struct ill_s { mblk_t *ill_capab_reset_mp; /* Preallocated mblk for capab reset */ /* - * New fields for IPv6 + * Fields for IPv6 */ uint8_t ill_max_hops; /* Maximum hops for any logical interface */ uint_t ill_max_mtu; /* Maximum MTU for any logical interface */ @@ -2038,7 +2051,8 @@ typedef struct ill_s { uint32_t ill_reachable_time; /* Value for ND algorithm in msec */ uint32_t ill_reachable_retrans_time; /* Value for ND algorithm msec */ uint_t ill_max_buf; /* Max # of req to buffer for ND */ - in6_addr_t ill_token; + in6_addr_t ill_token; /* IPv6 interface id */ + in6_addr_t ill_dest_token; /* Destination IPv6 interface id */ uint_t ill_token_length; uint32_t ill_xmit_count; /* ndp max multicast xmits */ mib2_ipIfStatsEntry_t *ill_ip_mib; /* ver indep. interface mib */ @@ -2071,7 +2085,7 @@ typedef struct ill_s { uint8_t *ill_nd_lla; /* Link Layer Address */ uint_t ill_nd_lla_len; /* Link Layer Address length */ /* - * We now have 3 phys_addr_req's sent down. This field keeps track + * We have 4 phys_addr_req's sent down. This field keeps track * of which one is pending. */ t_uscalar_t ill_phys_addr_pend; /* which dl_phys_addr_req pending */ @@ -2183,6 +2197,8 @@ typedef struct ill_s { * absence of ipsq writer. * ill_phys_addr_mp ipsq + down ill only when ill is up * ill_phys_addr ipsq + down ill only when ill is up + * ill_dest_addr_mp ipsq + down ill only when ill is up + * ill_dest_addr ipsq + down ill only when ill is up * * ill_state_flags ill_lock ill_lock * exclusive bit flags ipsq_t ipsq_t @@ -2210,6 +2226,7 @@ typedef struct ill_s { * report partially updated results without executing in the ipsq. * ill_token ipsq + ill_lock ill_lock * ill_token_length ipsq + ill_lock ill_lock + * ill_dest_token ipsq + down ill only when ill is up * ill_xmit_count ipsq + down ill write once * ill_ip6_mib ipsq + down ill only when ill is up * ill_icmp6_mib ipsq + down ill only when ill is up @@ -2277,14 +2294,13 @@ typedef struct ip_ioctl_cmd_s { * * IF_CMD 1 old style ifreq cmd * LIF_CMD 2 new style lifreq cmd - * TUN_CMD 3 tunnel related - * ARP_CMD 4 arpreq cmd - * XARP_CMD 5 xarpreq cmd - * MSFILT_CMD 6 multicast source filter cmd - * MISC_CMD 7 misc cmd (not a more specific one above) + * ARP_CMD 3 arpreq cmd + * XARP_CMD 4 xarpreq cmd + * MSFILT_CMD 5 multicast source filter cmd + * MISC_CMD 6 misc cmd (not a more specific one above) */ -enum { IF_CMD = 1, LIF_CMD, TUN_CMD, ARP_CMD, XARP_CMD, MSFILT_CMD, MISC_CMD }; +enum { IF_CMD = 1, LIF_CMD, ARP_CMD, XARP_CMD, MSFILT_CMD, MISC_CMD }; #define IPI_DONTCARE 0 /* For ioctl encoded values that don't matter */ @@ -2649,7 +2665,6 @@ typedef struct ire_s { uint32_t ire_ihandle; /* Associate interface IREs to cache */ ipif_t *ire_ipif; /* the interface that this ire uses */ uint32_t ire_flags; /* flags related to route (RTF_*) */ - uint_t ire_ipsec_overhead; /* IPSEC overhead */ /* * Neighbor Cache Entry for IPv6; arp info for IPv4 */ @@ -3431,7 +3446,7 @@ extern boolean_t ip_md_hcksum_attr(struct multidata_s *, struct pdesc_s *, uint32_t, uint32_t, uint32_t, uint32_t); extern boolean_t ip_md_zcopy_attr(struct multidata_s *, struct pdesc_s *, uint_t); -extern void ip_unbind(conn_t *connp); +extern void ip_unbind(conn_t *); extern void tnet_init(void); extern void tnet_fini(void); diff --git a/usr/src/uts/common/inet/ip/6to4tun.c b/usr/src/uts/common/inet/ip/6to4tun.c deleted file mode 100644 index 23ed86eaa8..0000000000 --- a/usr/src/uts/common/inet/ip/6to4tun.c +++ /dev/null @@ -1,104 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2002 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* 6to4 tunnel module */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/types.h> -#include <sys/stream.h> -#include <sys/stropts.h> - -#include <sys/isa_defs.h> - -#include <inet/common.h> - -#include <inet/ip.h> -#include <inet/ip6.h> -#include <inet/tun.h> - -#include <sys/modctl.h> - -/* streams linkages */ -static struct module_info tun6to4info = { - TUN6TO4_MODID, TUN6TO4_NAME, 1, INFPSZ, 65536, 1024 -}; - -static struct qinit tun6to4rinit = { - (pfi_t)tun_rput, - (pfi_t)tun_rsrv, - tun_open, - tun_close, - NULL, - &tun6to4info -}; - -static struct qinit tun6to4winit = { - (pfi_t)tun_wput, - (pfi_t)tun_wsrv, - NULL, - NULL, - NULL, - &tun6to4info -}; - -static struct streamtab tun6to4_strtab = { - &tun6to4rinit, &tun6to4winit, NULL, NULL -}; - -static struct fmodsw tun6to4_fmodsw = { - TUN6TO4_NAME, - &tun6to4_strtab, - (D_MP | D_MTQPAIR | D_MTPUTSHARED) -}; - -static struct modlstrmod modlstrmod = { - &mod_strmodops, "6to4 tunneling module", &tun6to4_fmodsw -}; - -static struct modlinkage modlinkage = { - MODREV_1, - &modlstrmod, - NULL -}; - -int -_init() -{ - return (mod_install(&modlinkage)); -} - -int -_fini() -{ - return (mod_remove(&modlinkage)); -} - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); -} diff --git a/usr/src/uts/common/inet/ip/atun.c b/usr/src/uts/common/inet/ip/atun.c deleted file mode 100644 index 9ead8c91de..0000000000 --- a/usr/src/uts/common/inet/ip/atun.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1995-1997, 2001 by Sun Microsystems, Inc. - * All rights reserved. - */ - -/* Automatic tunnel module */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/types.h> -#include <sys/stream.h> -#include <sys/stropts.h> - -#include <sys/socket.h> -#include <sys/isa_defs.h> -#include <net/if.h> -#include <net/if_arp.h> -#include <netinet/in.h> - -#include <inet/common.h> -#include <inet/arp.h> - -#include <netinet/ip6.h> -#include <netinet/icmp6.h> -#include <inet/ip.h> -#include <inet/ip6.h> -#include <net/if_dl.h> -#include <inet/ip_if.h> -#include <inet/tun.h> - -#include <sys/conf.h> -#include <sys/modctl.h> -#include <sys/stat.h> - -/* streams linkages */ -static struct module_info atuninfo = { - ATUN_MODID, ATUN_NAME, 1, INFPSZ, 65536, 1024 -}; - -static struct qinit atunrinit = { - (pfi_t)tun_rput, - (pfi_t)tun_rsrv, - tun_open, - tun_close, - NULL, - &atuninfo, - NULL -}; - -static struct qinit atunwinit = { - (pfi_t)tun_wput, - (pfi_t)tun_wsrv, - NULL, - NULL, - NULL, - &atuninfo, - NULL -}; - -static struct streamtab atun_strtab = { - &atunrinit, &atunwinit, NULL, NULL -}; - -static struct fmodsw atun_fmodsw = { - ATUN_NAME, - &atun_strtab, - (D_MP | D_MTQPAIR | D_MTPUTSHARED) - }; - -static struct modlstrmod modlstrmod = { - &mod_strmodops, "auto-tunneling module", &atun_fmodsw - }; - -static struct modlinkage modlinkage = { - MODREV_1, - &modlstrmod, - NULL - }; - - -int -_init(void) -{ - return (mod_install(&modlinkage)); -} - -int -_fini(void) -{ - return (mod_remove(&modlinkage)); -} - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); -} diff --git a/usr/src/uts/common/inet/ip/icmp.c b/usr/src/uts/common/inet/ip/icmp.c index 559bca6031..f1c5f9225f 100644 --- a/usr/src/uts/common/inet/ip/icmp.c +++ b/usr/src/uts/common/inet/ip/icmp.c @@ -1664,7 +1664,6 @@ icmp_open(int family, cred_t *credp, int *err, int flags) icmp->icmp_max_hdr_len = IPV6_HDR_LEN; icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit; connp->conn_af_isv6 = B_TRUE; - connp->conn_flags |= IPCL_ISV6; } else { icmp->icmp_ipversion = IPV4_VERSION; icmp->icmp_family = AF_INET; @@ -1673,7 +1672,6 @@ icmp_open(int family, cred_t *credp, int *err, int flags) icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH; icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl; connp->conn_af_isv6 = B_FALSE; - connp->conn_flags &= ~IPCL_ISV6; } icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; icmp->icmp_pending_op = -1; diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c index ea78c1bf53..44e891eaaf 100644 --- a/usr/src/uts/common/inet/ip/ip.c +++ b/usr/src/uts/common/inet/ip/ip.c @@ -98,7 +98,7 @@ #include <inet/sadb.h> #include <inet/ipsec_impl.h> #include <sys/iphada.h> -#include <inet/tun.h> +#include <inet/iptun/iptun_impl.h> #include <inet/ipdrop.h> #include <inet/ip_netinfo.h> @@ -1268,12 +1268,8 @@ ip_ioctl_cmd_t ip_ndx_ioctl_table[] = { MISC_CMD, ip_sioctl_tonlink, NULL }, /* 146 */ { SIOCTMYSITE, sizeof (struct sioc_addrreq), 0, MISC_CMD, ip_sioctl_tmysite, NULL }, - /* 147 */ { SIOCGTUNPARAM, sizeof (struct iftun_req), 0, - TUN_CMD, ip_sioctl_tunparam, NULL }, - /* 148 */ { SIOCSTUNPARAM, sizeof (struct iftun_req), - IPI_PRIV | IPI_WR, - TUN_CMD, ip_sioctl_tunparam, NULL }, - + /* 147 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, + /* 148 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, /* IPSECioctls handled in ip_sioctl_copyin_setup itself */ /* 149 */ { SIOCFIPSECONFIG, 0, IPI_PRIV, MISC_CMD, NULL, NULL }, /* 150 */ { SIOCSIPSECONFIG, 0, IPI_PRIV, MISC_CMD, NULL, NULL }, @@ -1354,10 +1350,6 @@ ip_ioctl_cmd_t ip_ndx_ioctl_table[] = { int ip_ndx_ioctl_count = sizeof (ip_ndx_ioctl_table) / sizeof (ip_ioctl_cmd_t); ip_ioctl_cmd_t ip_misc_ioctl_table[] = { - { OSIOCGTUNPARAM, sizeof (struct old_iftun_req), - IPI_GET_CMD, TUN_CMD, ip_sioctl_tunparam, NULL }, - { OSIOCSTUNPARAM, sizeof (struct old_iftun_req), IPI_PRIV | IPI_WR, - TUN_CMD, ip_sioctl_tunparam, NULL }, { I_LINK, 0, IPI_PRIV | IPI_WR | IPI_PASS_DOWN, 0, NULL, NULL }, { I_UNLINK, 0, IPI_PRIV | IPI_WR | IPI_PASS_DOWN, 0, NULL, NULL }, { I_PLINK, 0, IPI_PRIV | IPI_WR | IPI_PASS_DOWN, 0, NULL, NULL }, @@ -2371,6 +2363,26 @@ icmp_inbound_self_encap_error(mblk_t *mp, int iph_hdr_length, int hdr_length) } /* + * Fanout for ICMP errors containing IP-in-IPv4 packets. Returns B_TRUE if a + * tunnel consumed the message, and B_FALSE otherwise. + */ +static boolean_t +icmp_inbound_iptun_fanout(mblk_t *first_mp, ipha_t *ripha, ill_t *ill, + ip_stack_t *ipst) +{ + conn_t *connp; + + if ((connp = ipcl_iptun_classify_v4(&ripha->ipha_src, &ripha->ipha_dst, + ipst)) == NULL) + return (B_FALSE); + + BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); + connp->conn_recv(connp, first_mp, NULL); + CONN_DEC_REF(connp); + return (B_TRUE); +} + +/* * Try to pass the ICMP message upstream in case the ULP cares. * * If the packet that caused the ICMP error is secure, we send @@ -2378,14 +2390,10 @@ icmp_inbound_self_encap_error(mblk_t *mp, int iph_hdr_length, int hdr_length) * valid association. ipha in the code below points to the * IP header of the packet that caused the error. * - * We handle ICMP_FRAGMENTATION_NEEDED(IFN) message differently - * in the context of IPsec. Normally we tell the upper layer - * whenever we send the ire (including ip_bind), the IPsec header - * length in ire_ipsec_overhead. TCP can deduce the MSS as it - * has both the MTU (ire_max_frag) and the ire_ipsec_overhead. - * Similarly, we pass the new MTU icmph_du_mtu and TCP does the - * same thing. As TCP has the IPsec options size that needs to be - * adjusted, we just pass the MTU unchanged. + * For IPsec cases, we let the next-layer-up (which has access to + * cached policy on the conn_t, or can query the SPD directly) + * subtract out any IPsec overhead if they must. We therefore make no + * adjustments here for IPsec overhead. * * IFN could have been generated locally or by some router. * @@ -2461,6 +2469,21 @@ icmp_inbound_error_fanout(queue_t *q, ill_t *ill, mblk_t *mp, ii = NULL; } + /* + * We need a separate IP header with the source and destination + * addresses reversed to do fanout/classification because the ipha in + * the ICMP error is in the form we sent it out. + */ + ripha.ipha_src = ipha->ipha_dst; + ripha.ipha_dst = ipha->ipha_src; + ripha.ipha_protocol = ipha->ipha_protocol; + ripha.ipha_version_and_hdr_length = ipha->ipha_version_and_hdr_length; + + ip2dbg(("icmp_inbound_error: proto %d %x to %x: %d/%d\n", + ripha.ipha_protocol, ntohl(ipha->ipha_src), + ntohl(ipha->ipha_dst), + icmph->icmph_type, icmph->icmph_code)); + switch (ipha->ipha_protocol) { case IPPROTO_UDP: /* @@ -2478,22 +2501,11 @@ icmp_inbound_error_fanout(queue_t *q, ill_t *ill, mblk_t *mp, } up = (uint16_t *)((uchar_t *)ipha + hdr_length); - /* - * Attempt to find a client stream based on port. - * Note that we do a reverse lookup since the header is - * in the form we sent it out. - * The ripha header is only used for the IP_UDP_MATCH and we - * only set the src and dst addresses and protocol. - */ - ripha.ipha_src = ipha->ipha_dst; - ripha.ipha_dst = ipha->ipha_src; - ripha.ipha_protocol = ipha->ipha_protocol; + /* Attempt to find a client stream based on port. */ ((uint16_t *)&ports)[0] = up[1]; ((uint16_t *)&ports)[1] = up[0]; - ip2dbg(("icmp_inbound_error: UDP %x:%d to %x:%d: %d/%d\n", - ntohl(ipha->ipha_src), ntohs(up[0]), - ntohl(ipha->ipha_dst), ntohs(up[1]), - icmph->icmph_type, icmph->icmph_code)); + ip2dbg(("icmp_inbound_error: UDP ports %d to %d\n", + ntohs(up[0]), ntohs(up[1]))); /* Have to change db_type after any pullupmsg */ DB_TYPE(mp) = M_CTL; @@ -2548,18 +2560,7 @@ icmp_inbound_error_fanout(queue_t *q, ill_t *ill, mblk_t *mp, ipha = (ipha_t *)&icmph[1]; } up = (uint16_t *)((uchar_t *)ipha + hdr_length); - /* - * Find a SCTP client stream for this packet. - * Note that we do a reverse lookup since the header is - * in the form we sent it out. - * The ripha header is only used for the matching and we - * only set the src and dst addresses, protocol, and version. - */ - ripha.ipha_src = ipha->ipha_dst; - ripha.ipha_dst = ipha->ipha_src; - ripha.ipha_protocol = ipha->ipha_protocol; - ripha.ipha_version_and_hdr_length = - ipha->ipha_version_and_hdr_length; + /* Find a SCTP client stream for this packet. */ ((uint16_t *)&ports)[0] = up[1]; ((uint16_t *)&ports)[1] = up[0]; @@ -2632,7 +2633,6 @@ icmp_inbound_error_fanout(queue_t *q, ill_t *ill, mblk_t *mp, ii->ipsec_in_rill_index = recv_ill->ill_phyint->phyint_ifindex; } - ip2dbg(("icmp_inbound_error: ipsec\n")); if (!ipsec_loaded(ipss)) { ip_proto_not_sup(q, first_mp, 0, zoneid, ipst); @@ -2649,18 +2649,8 @@ icmp_inbound_error_fanout(queue_t *q, ill_t *ill, mblk_t *mp, ip_fanout_proto_again(first_mp, ill, recv_ill, NULL); return; } - default: - /* - * The ripha header is only used for the lookup and we - * only set the src and dst addresses and protocol. - */ - ripha.ipha_src = ipha->ipha_dst; - ripha.ipha_dst = ipha->ipha_src; - ripha.ipha_protocol = ipha->ipha_protocol; - ip2dbg(("icmp_inbound_error: proto %d %x to %x: %d/%d\n", - ripha.ipha_protocol, ntohl(ipha->ipha_src), - ntohl(ipha->ipha_dst), - icmph->icmph_type, icmph->icmph_code)); + case IPPROTO_ENCAP: + case IPPROTO_IPV6: if (ipha->ipha_protocol == IPPROTO_ENCAP) { ipha_t *in_ipha; @@ -2684,12 +2674,9 @@ icmp_inbound_error_fanout(queue_t *q, ill_t *ill, mblk_t *mp, * we did for the outer header. */ in_ipha = (ipha_t *)((uchar_t *)ipha + hdr_length); - if ((IPH_HDR_VERSION(in_ipha) != IPV4_VERSION)) { + if ((IPH_HDR_VERSION(in_ipha) != IPV4_VERSION) || + IPH_HDR_LENGTH(in_ipha) < sizeof (ipha_t)) goto discard_pkt; - } - if (IPH_HDR_LENGTH(in_ipha) < sizeof (ipha_t)) { - goto discard_pkt; - } /* Check for Self-encapsulated tunnels */ if (in_ipha->ipha_src == ipha->ipha_src && in_ipha->ipha_dst == ipha->ipha_dst) { @@ -2715,33 +2702,16 @@ icmp_inbound_error_fanout(queue_t *q, ill_t *ill, mblk_t *mp, return; } } - if ((ipha->ipha_protocol == IPPROTO_ENCAP || - ipha->ipha_protocol == IPPROTO_IPV6) && - icmph->icmph_code == ICMP_FRAGMENTATION_NEEDED && - ii != NULL && - ii->ipsec_in_loopback && - ii->ipsec_in_secure) { - /* - * For IP tunnels that get a looped-back - * ICMP_FRAGMENTATION_NEEDED message, adjust the - * reported new MTU to take into account the IPsec - * headers protecting this configured tunnel. - * - * This allows the tunnel module (tun.c) to blindly - * accept the MTU reported in an ICMP "too big" - * message. - * - * Non-looped back ICMP messages will just be - * handled by the security protocols (if needed), - * and the first subsequent packet will hit this - * path. - */ - icmph->icmph_du_mtu = htons(ntohs(icmph->icmph_du_mtu) - - ipsec_in_extra_length(first_mp)); - } - /* Have to change db_type after any pullupmsg */ - DB_TYPE(mp) = M_CTL; + DB_TYPE(mp) = M_CTL; + if (icmp_inbound_iptun_fanout(first_mp, &ripha, ill, ipst)) + return; + /* + * No IP tunnel is interested, fallthrough and see + * if a raw socket will want it. + */ + /* FALLTHRU */ + default: ip_fanout_proto(q, first_mp, ill, &ripha, 0, mctl_present, ip_policy, recv_ill, zoneid); return; @@ -3931,7 +3901,7 @@ ip_arp_news(queue_t *q, mblk_t *mp) } arh = (arh_t *)mp->b_cont->b_rptr; /* Is it one we are interested in? */ - if (BE16_TO_U16(arh->arh_proto) == IP6_DL_SAP) { + if (BE16_TO_U16(arh->arh_proto) == ETHERTYPE_IPV6) { isv6 = B_TRUE; bcopy((char *)&arh[1] + (arh->arh_hlen & 0xFF), &v6src, IPV6_ADDR_LEN); @@ -4355,23 +4325,6 @@ ip_bind_ipsec_policy_set(conn_t *connp, mblk_t *policy_mp) return (B_TRUE); } -static void -ip_bind_post_handling(conn_t *connp, mblk_t *mp, boolean_t ire_requested) -{ - /* - * Pass the IPsec headers size in ire_ipsec_overhead. - * We can't do this in ip_bind_get_ire because the policy - * may not have been inherited at that point in time and hence - * conn_out_enforce_policy may not be set. - */ - if (ire_requested && connp->conn_out_enforce_policy && - mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE) { - ire_t *ire = (ire_t *)mp->b_rptr; - ASSERT(MBLKL(mp) >= sizeof (ire_t)); - ire->ire_ipsec_overhead = conn_ipsec_length(connp); - } -} - /* * Upper level protocols (ULP) pass through bind requests to IP for inspection * and to arrange for power-fanout assist. The ULP is identified by @@ -4411,7 +4364,6 @@ ip_bind_v4(queue_t *q, mblk_t *mp, conn_t *connp) ipa_conn_t *ac; uchar_t *ucp; mblk_t *mp1; - boolean_t ire_requested; int error = 0; int protocol; ipa_conn_x_t *acx; @@ -4502,9 +4454,7 @@ ip_bind_v4(queue_t *q, mblk_t *mp, conn_t *connp) /* * Check for trailing mps. */ - mp1 = mp->b_cont; - ire_requested = (mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE); switch (tbr->ADDR_length) { default: @@ -4551,8 +4501,6 @@ ip_bind_v4(queue_t *q, mblk_t *mp, conn_t *connp) if (error != 0) goto bad_addr; - ip_bind_post_handling(connp, mp->b_cont, ire_requested); - /* Send it home. */ mp->b_datap->db_type = M_PCPROTO; tbr->PRIM_type = T_BIND_ACK; @@ -4753,12 +4701,6 @@ ip_proto_bind_laddr_v4(conn_t *connp, mblk_t **ire_mpp, uint8_t protocol, ipaddr_t src_addr, uint16_t lport, boolean_t fanout_insert) { int error; - mblk_t *mp = NULL; - boolean_t ire_requested; - - if (ire_mpp) - mp = *ire_mpp; - ire_requested = (mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE); ASSERT(!connp->conn_af_isv6); connp->conn_pkt_isv6 = B_FALSE; @@ -4766,12 +4708,8 @@ ip_proto_bind_laddr_v4(conn_t *connp, mblk_t **ire_mpp, uint8_t protocol, error = ip_bind_laddr_v4(connp, ire_mpp, protocol, src_addr, lport, fanout_insert); - if (error == 0) { - ip_bind_post_handling(connp, ire_mpp ? *ire_mpp : NULL, - ire_requested); - } else if (error < 0) { + if (error < 0) error = -TBADADDR; - } return (error); } @@ -5286,12 +5224,6 @@ ip_proto_bind_connected_v4(conn_t *connp, mblk_t **ire_mpp, uint8_t protocol, boolean_t fanout_insert, boolean_t verify_dst, cred_t *cr) { int error; - mblk_t *mp = NULL; - boolean_t ire_requested; - - if (ire_mpp) - mp = *ire_mpp; - ire_requested = (mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE); ASSERT(!connp->conn_af_isv6); connp->conn_pkt_isv6 = B_FALSE; @@ -5302,12 +5234,8 @@ ip_proto_bind_connected_v4(conn_t *connp, mblk_t **ire_mpp, uint8_t protocol, lport = connp->conn_lport; error = ip_bind_connected_v4(connp, ire_mpp, protocol, src_addrp, lport, dst_addr, fport, fanout_insert, verify_dst, cr); - if (error == 0) { - ip_bind_post_handling(connp, ire_mpp ? *ire_mpp : NULL, - ire_requested); - } else if (error < 0) { + if (error < 0) error = -TBADADDR; - } return (error); } @@ -6414,10 +6342,6 @@ ipsec_in_is_secure(mblk_t *ipsec_mp) * is used to negotiate SAs as SAs will be added only after * verifying the policy. * - * NOTE : If the packet was tunneled and not multicast we only send - * to it the first match. Unlike TCP and UDP fanouts this doesn't fall - * back to delivering packets to AF_INET6 raw sockets. - * * IPQoS Notes: * Once we have determined the client, invoke IPPF processing. * Policy processing takes place only if the callout_position, IPP_LOCAL_IN, @@ -6439,7 +6363,6 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags, mblk_t *mp1, *first_mp1; uint_t protocol = ipha->ipha_protocol; ipaddr_t dst; - boolean_t one_only; mblk_t *first_mp = mp; boolean_t secure; uint32_t ill_index; @@ -6459,13 +6382,6 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags, secure = B_FALSE; } dst = ipha->ipha_dst; - /* - * If the packet was tunneled and not multicast we only send to it - * the first match. - */ - one_only = ((protocol == IPPROTO_ENCAP || protocol == IPPROTO_IPV6) && - !CLASSD(dst)); - shared_addr = (zoneid == ALL_ZONES); if (shared_addr) { /* @@ -6533,12 +6449,7 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags, CONN_INC_REF(connp); first_connp = connp; - - /* - * Only send message to one tunnel driver by immediately - * terminating the loop. - */ - connp = one_only ? NULL : connp->conn_next; + connp = connp->conn_next; for (;;) { while (connp != NULL) { @@ -6584,12 +6495,14 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags, freemsg(first_mp1); } else { /* - * Don't enforce here if we're an actual tunnel - - * let "tun" do it instead. + * Enforce policy like any other conn_t. Note that + * IP-in-IP packets don't come through here, but + * through ip_iptun_input() or + * icmp_inbound_iptun_fanout(). IPsec policy for such + * packets is enforced in the iptun module. */ - if (!IPCL_IS_IPTUN(connp) && - (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || - secure)) { + if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || + secure) { first_mp1 = ipsec_check_inbound_policy (first_mp1, connp, ipha, NULL, mctl_present); @@ -6685,19 +6598,7 @@ ip_fanout_proto(queue_t *q, mblk_t *mp, ill_t *ill, ipha_t *ipha, uint_t flags, freemsg(first_mp); } else { - if (IPCL_IS_IPTUN(connp)) { - /* - * Tunneled packet. We enforce policy in the tunnel - * module itself. - * - * Send the WHOLE packet up (incl. IPSEC_IN) without - * a policy check. - * FIXME to use conn_recv for tun later. - */ - putnext(rq, first_mp); - CONN_DEC_REF(connp); - return; - } + ASSERT(!IPCL_IS_IPTUN(connp)); if ((CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure)) { first_mp = ipsec_check_inbound_policy(first_mp, connp, @@ -8595,8 +8496,7 @@ ip_newroute(queue_t *q, mblk_t *mp, ipaddr_t dst, conn_t *connp, return; } case IRE_IF_NORESOLVER: { - if (dst_ill->ill_phys_addr_length != IP_ADDR_LEN && - dst_ill->ill_resolver_mp == NULL) { + if (dst_ill->ill_resolver_mp == NULL) { ip1dbg(("ip_newroute: dst_ill %p " "for IRE_IF_NORESOLVER ire %p has " "no ill_resolver_mp\n", @@ -9312,8 +9212,7 @@ ip_newroute_ipif(queue_t *q, mblk_t *mp, ipif_t *ipif, ipaddr_t dst, case IRE_IF_NORESOLVER: { /* We have what we need to build an IRE_CACHE. */ - if ((dst_ill->ill_phys_addr_length != IP_ADDR_LEN) && - (dst_ill->ill_resolver_mp == NULL)) { + if (dst_ill->ill_resolver_mp == NULL) { ip1dbg(("ip_newroute_ipif: dst_ill %p " "for IRE_IF_NORESOLVER ire %p has " "no ill_resolver_mp\n", @@ -9884,7 +9783,6 @@ ip_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, /* Minor tells us which /dev entry was opened */ if (isv6) { - connp->conn_flags |= IPCL_ISV6; connp->conn_af_isv6 = B_TRUE; ip_setpktversion(connp, isv6, B_FALSE, ipst); connp->conn_src_preferences = IPV6_PREFER_SRC_DEFAULT; @@ -10174,15 +10072,10 @@ ipsec_set_req(cred_t *cr, conn_t *connp, ipsec_req_t *req) uint_t ah_req = 0; uint_t esp_req = 0; uint_t se_req = 0; - ipsec_selkey_t sel; ipsec_act_t *actp = NULL; uint_t nact; - ipsec_policy_t *pin4 = NULL, *pout4 = NULL; - ipsec_policy_t *pin6 = NULL, *pout6 = NULL; - ipsec_policy_root_t *pr; ipsec_policy_head_t *ph; - int fam; - boolean_t is_pol_reset; + boolean_t is_pol_reset, is_pol_inserted = B_FALSE; int error = 0; netstack_t *ns = connp->conn_netstack; ip_stack_t *ipst = ns->netstack_ip; @@ -10300,65 +10193,33 @@ ipsec_set_req(cred_t *cr, conn_t *connp, ipsec_req_t *req) goto enomem; /* - * Always allocate IPv4 policy entries, since they can also - * apply to ipv6 sockets being used in ipv4-compat mode. + * Always insert IPv4 policy entries, since they can also apply to + * ipv6 sockets being used in ipv4-compat mode. */ - bzero(&sel, sizeof (sel)); - sel.ipsl_valid = IPSL_IPV4; - - pin4 = ipsec_policy_create(&sel, actp, nact, IPSEC_PRIO_SOCKET, NULL, - ipst->ips_netstack); - if (pin4 == NULL) + if (!ipsec_polhead_insert(ph, actp, nact, IPSEC_AF_V4, + IPSEC_TYPE_INBOUND, ns)) goto enomem; - - pout4 = ipsec_policy_create(&sel, actp, nact, IPSEC_PRIO_SOCKET, NULL, - ipst->ips_netstack); - if (pout4 == NULL) + is_pol_inserted = B_TRUE; + if (!ipsec_polhead_insert(ph, actp, nact, IPSEC_AF_V4, + IPSEC_TYPE_OUTBOUND, ns)) goto enomem; + /* + * We're looking at a v6 socket, also insert the v6-specific + * entries. + */ if (connp->conn_af_isv6) { - /* - * We're looking at a v6 socket, also allocate the - * v6-specific entries... - */ - sel.ipsl_valid = IPSL_IPV6; - pin6 = ipsec_policy_create(&sel, actp, nact, - IPSEC_PRIO_SOCKET, NULL, ipst->ips_netstack); - if (pin6 == NULL) + if (!ipsec_polhead_insert(ph, actp, nact, IPSEC_AF_V6, + IPSEC_TYPE_INBOUND, ns)) goto enomem; - - pout6 = ipsec_policy_create(&sel, actp, nact, - IPSEC_PRIO_SOCKET, NULL, ipst->ips_netstack); - if (pout6 == NULL) + if (!ipsec_polhead_insert(ph, actp, nact, IPSEC_AF_V6, + IPSEC_TYPE_OUTBOUND, ns)) goto enomem; - - /* - * .. and file them away in the right place. - */ - fam = IPSEC_AF_V6; - pr = &ph->iph_root[IPSEC_TYPE_INBOUND]; - HASHLIST_INSERT(pin6, ipsp_hash, pr->ipr_nonhash[fam]); - ipsec_insert_always(&ph->iph_rulebyid, pin6); - pr = &ph->iph_root[IPSEC_TYPE_OUTBOUND]; - HASHLIST_INSERT(pout6, ipsp_hash, pr->ipr_nonhash[fam]); - ipsec_insert_always(&ph->iph_rulebyid, pout6); } ipsec_actvec_free(actp, nact); /* - * File the v4 policies. - */ - fam = IPSEC_AF_V4; - pr = &ph->iph_root[IPSEC_TYPE_INBOUND]; - HASHLIST_INSERT(pin4, ipsp_hash, pr->ipr_nonhash[fam]); - ipsec_insert_always(&ph->iph_rulebyid, pin4); - - pr = &ph->iph_root[IPSEC_TYPE_OUTBOUND]; - HASHLIST_INSERT(pout4, ipsp_hash, pr->ipr_nonhash[fam]); - ipsec_insert_always(&ph->iph_rulebyid, pout4); - - /* * If the requests need security, set enforce_policy. * If the requests are IPSEC_PREF_NEVER, one should * still set conn_out_enforce_policy so that an ipsec_out @@ -10388,14 +10249,8 @@ enomem: mutex_exit(&connp->conn_lock); if (actp != NULL) ipsec_actvec_free(actp, nact); - if (pin4 != NULL) - IPPOL_REFRELE(pin4, ipst->ips_netstack); - if (pout4 != NULL) - IPPOL_REFRELE(pout4, ipst->ips_netstack); - if (pin6 != NULL) - IPPOL_REFRELE(pin6, ipst->ips_netstack); - if (pout6 != NULL) - IPPOL_REFRELE(pout6, ipst->ips_netstack); + if (is_pol_inserted) + ipsec_polhead_flush(ph, ns); return (ENOMEM); } @@ -12958,6 +12813,25 @@ slow_done: #undef rptr } +static boolean_t +ip_iptun_input(mblk_t *ipsec_mp, mblk_t *data_mp, ipha_t *ipha, ill_t *ill, + ire_t *ire, ip_stack_t *ipst) +{ + conn_t *connp; + + ASSERT(ipsec_mp == NULL || ipsec_mp->b_cont == data_mp); + + if ((connp = ipcl_classify_v4(data_mp, ipha->ipha_protocol, + IP_SIMPLE_HDR_LENGTH, ire->ire_zoneid, ipst)) != NULL) { + BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); + connp->conn_recv(connp, ipsec_mp != NULL ? ipsec_mp : data_mp, + NULL); + CONN_DEC_REF(connp); + return (B_TRUE); + } + return (B_FALSE); +} + /* ARGSUSED */ static mblk_t * ip_tcp_input(mblk_t *mp, ipha_t *ipha, ill_t *recv_ill, boolean_t mctl_present, @@ -14715,14 +14589,6 @@ ip_rput_process_notdata(queue_t *q, mblk_t **first_mpp, ill_t *ill, ill = (ill_t *)q->q_ptr; ill_fastpath_ack(ill, mp); return (B_TRUE); - case SIOCSTUNPARAM: - case OSIOCSTUNPARAM: - /* Go through qwriter_ip */ - break; - case SIOCGTUNPARAM: - case OSIOCGTUNPARAM: - ip_rput_other(NULL, q, mp, NULL); - return (B_TRUE); default: putnext(q, mp); return (B_TRUE); @@ -14793,18 +14659,7 @@ ip_rput_process_notdata(queue_t *q, mblk_t **first_mpp, ill_t *ill, ip1dbg(("got iocnak ")); iocp = (struct iocblk *)mp->b_rptr; switch (iocp->ioc_cmd) { - case SIOCSTUNPARAM: - case OSIOCSTUNPARAM: - /* - * Since this is on the ill stream we unconditionally - * bump up the refcount - */ - ill_refhold(ill); - qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); - return (B_TRUE); case DL_IOC_HDR_INFO: - case SIOCGTUNPARAM: - case OSIOCGTUNPARAM: ip_rput_other(NULL, q, mp, NULL); return (B_TRUE); default: @@ -14838,9 +14693,6 @@ ip_rput(queue_t *q, mblk_t *mp) dl = (union DL_primitives *)mp->b_rptr; if (DB_TYPE(mp) != M_PCPROTO || dl->dl_primitive == DL_UNITDATA_IND) { - /* - * SIOC[GS]TUNPARAM ioctls can come here. - */ inet_freemsg(mp); TRACE_2(TR_FAC_IP, TR_IP_RPUT_END, "ip_rput_end: q %p (%S)", q, "uninit"); @@ -15478,6 +15330,17 @@ local: /* ire has been released by ip_sctp_input */ ire = NULL; continue; + case IPPROTO_ENCAP: + case IPPROTO_IPV6: + ASSERT(first_mp == mp); + if (ip_iptun_input(NULL, mp, ipha, ill, ire, ipst)) + break; + /* + * If there was no IP tunnel data-link bound to + * receive this packet, then we fall through to + * allow potential raw sockets bound to either of + * these protocols to pick it up. + */ default: ip_proto_input(q, first_mp, ipha, ire, ill, 0); continue; @@ -16435,10 +16298,12 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) * available, but we know the ioctl is pending on ill_wq.) */ uint_t paddrlen, paddroff; + uint8_t *addr; paddrreq = ill->ill_phys_addr_pend; paddrlen = ((dl_phys_addr_ack_t *)mp->b_rptr)->dl_addr_length; paddroff = ((dl_phys_addr_ack_t *)mp->b_rptr)->dl_addr_offset; + addr = mp->b_rptr + paddroff; ill_dlpi_done(ill, DL_PHYS_ADDR_REQ); if (paddrreq == DL_IPV6_TOKEN) { @@ -16448,8 +16313,7 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) * XXX Temporary hack - currently, all known tokens * are 64 bits, so I'll cheat for the moment. */ - bcopy(mp->b_rptr + paddroff, - &ill->ill_token.s6_addr32[2], paddrlen); + bcopy(addr, &ill->ill_token.s6_addr32[2], paddrlen); ill->ill_token_length = paddrlen; break; } else if (paddrreq == DL_IPV6_LINK_LAYER_ADDR) { @@ -16457,6 +16321,16 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) ill_set_ndmp(ill, mp, paddroff, paddrlen); mp = NULL; break; + } else if (paddrreq == DL_CURR_DEST_ADDR) { + ASSERT(ill->ill_dest_addr_mp == NULL); + ill->ill_dest_addr_mp = mp; + ill->ill_dest_addr = addr; + mp = NULL; + if (ill->ill_isv6) { + ill_setdesttoken(ill); + ipif_setdestlinklocal(ill->ill_ipif); + } + break; } ASSERT(paddrreq == DL_CURR_PHYS_ADDR); @@ -16482,22 +16356,18 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) } ill->ill_phys_addr_mp = mp; - ill->ill_phys_addr = mp->b_rptr + paddroff; + ill->ill_phys_addr = (paddrlen == 0 ? NULL : addr); mp = NULL; /* - * If paddrlen is zero, the DLPI provider doesn't support - * physical addresses. The other two tests were historical - * workarounds for bugs in our former PPP implementation, but - * now other things have grown dependencies on them -- e.g., - * the tun module specifies a dl_addr_length of zero in its - * DL_BIND_ACK, but then specifies an incorrect value in its - * DL_PHYS_ADDR_ACK. These bogus checks need to be removed, - * but only after careful testing ensures that all dependent - * broken DLPI providers have been fixed. + * If paddrlen or ill_phys_addr_length is zero, the DLPI + * provider doesn't support physical addresses. We check both + * paddrlen and ill_phys_addr_length because sppp (PPP) does + * not have physical addresses, but historically adversises a + * physical address length of 0 in its DL_INFO_ACK, but 6 in + * its DL_PHYS_ADDR_ACK. */ - if (paddrlen == 0 || ill->ill_phys_addr_length == 0 || - ill->ill_phys_addr_length == IP_ADDR_LEN) { + if (paddrlen == 0 || ill->ill_phys_addr_length == 0) { ill->ill_phys_addr = NULL; } else if (paddrlen != ill->ill_phys_addr_length) { ip0dbg(("DL_PHYS_ADDR_ACK: got addrlen %d, expected %d", @@ -16514,17 +16384,9 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) ill_set_ndmp(ill, mp_hw, paddroff, paddrlen); } - /* - * Set the interface token. If the zeroth interface address - * is unspecified, then set it to the link local address. - */ - if (IN6_IS_ADDR_UNSPECIFIED(&ill->ill_token)) - (void) ill_setdefaulttoken(ill); - - ASSERT(ill->ill_ipif->ipif_id == 0); - if (ipif != NULL && - IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr)) { - (void) ipif_setlinklocal(ipif); + if (ill->ill_isv6) { + ill_setdefaulttoken(ill); + ipif_setlinklocal(ill->ill_ipif); } break; } @@ -16608,7 +16470,7 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) /* * ip_rput_other is called by ip_rput to handle messages modifying the global - * state in IP. Normally called as writer. Exception SIOCGTUNPARAM (shared) + * state in IP. If 'ipsq' is non-NULL, caller is writer on it. */ /* ARGSUSED */ void @@ -16616,8 +16478,6 @@ ip_rput_other(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) { ill_t *ill = q->q_ptr; struct iocblk *iocp; - mblk_t *mp1; - conn_t *connp = NULL; ip1dbg(("ip_rput_other ")); if (ipsq != NULL) { @@ -16643,168 +16503,29 @@ ip_rput_other(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) return; ipif_all_down_tail(ipsq, q, mp, NULL); break; - case M_IOCACK: + case M_IOCNAK: { iocp = (struct iocblk *)mp->b_rptr; - ASSERT(iocp->ioc_cmd != DL_IOC_HDR_INFO); - switch (iocp->ioc_cmd) { - case SIOCSTUNPARAM: - case OSIOCSTUNPARAM: - ASSERT(ipsq != NULL); - /* - * Finish socket ioctl passed through to tun. - * We should have an IOCTL waiting on this. - */ - mp1 = ipsq_pending_mp_get(ipsq, &connp); - if (ill->ill_isv6) { - struct iftun_req *ta; - - /* - * if a source or destination is - * being set, try and set the link - * local address for the tunnel - */ - ta = (struct iftun_req *)mp->b_cont-> - b_cont->b_rptr; - if (ta->ifta_flags & (IFTUN_SRC | IFTUN_DST)) { - ipif_set_tun_llink(ill, ta); - } - } - if (mp1 != NULL) { - /* - * Now copy back the b_next/b_prev used by - * mi code for the mi_copy* functions. - * See ip_sioctl_tunparam() for the reason. - * Also protect against missing b_cont. - */ - if (mp->b_cont != NULL) { - mp->b_cont->b_next = - mp1->b_cont->b_next; - mp->b_cont->b_prev = - mp1->b_cont->b_prev; - } - inet_freemsg(mp1); - ASSERT(connp != NULL); - ip_ioctl_finish(CONNP_TO_WQ(connp), mp, - iocp->ioc_error, NO_COPYOUT, ipsq); - } else { - ASSERT(connp == NULL); - putnext(q, mp); - } - break; - case SIOCGTUNPARAM: - case OSIOCGTUNPARAM: - /* - * This is really M_IOCDATA from the tunnel driver. - * convert back and complete the ioctl. - * We should have an IOCTL waiting on this. - */ - mp1 = ill_pending_mp_get(ill, &connp, iocp->ioc_id); - if (mp1) { - /* - * Now copy back the b_next/b_prev used by - * mi code for the mi_copy* functions. - * See ip_sioctl_tunparam() for the reason. - * Also protect against missing b_cont. - */ - if (mp->b_cont != NULL) { - mp->b_cont->b_next = - mp1->b_cont->b_next; - mp->b_cont->b_prev = - mp1->b_cont->b_prev; - } - inet_freemsg(mp1); - if (iocp->ioc_error == 0) - mp->b_datap->db_type = M_IOCDATA; - ASSERT(connp != NULL); - ip_ioctl_finish(CONNP_TO_WQ(connp), mp, - iocp->ioc_error, COPYOUT, NULL); - } else { - ASSERT(connp == NULL); - putnext(q, mp); - } - break; - default: - break; + ASSERT(iocp->ioc_cmd == DL_IOC_HDR_INFO); + /* + * If this was the first attempt, turn off the fastpath + * probing. + */ + mutex_enter(&ill->ill_lock); + if (ill->ill_dlpi_fastpath_state == IDS_INPROGRESS) { + ill->ill_dlpi_fastpath_state = IDS_FAILED; + mutex_exit(&ill->ill_lock); + ill_fastpath_nack(ill); + ip1dbg(("ip_rput: DLPI fastpath off on interface %s\n", + ill->ill_name)); + } else { + mutex_exit(&ill->ill_lock); } + freemsg(mp); break; - case M_IOCNAK: - iocp = (struct iocblk *)mp->b_rptr; - - switch (iocp->ioc_cmd) { - int mode; - - case DL_IOC_HDR_INFO: - /* - * If this was the first attempt, turn off the - * fastpath probing. - */ - mutex_enter(&ill->ill_lock); - if (ill->ill_dlpi_fastpath_state == IDS_INPROGRESS) { - ill->ill_dlpi_fastpath_state = IDS_FAILED; - mutex_exit(&ill->ill_lock); - ill_fastpath_nack(ill); - ip1dbg(("ip_rput: DLPI fastpath off on " - "interface %s\n", - ill->ill_name)); - } else { - mutex_exit(&ill->ill_lock); - } - freemsg(mp); - break; - case SIOCSTUNPARAM: - case OSIOCSTUNPARAM: - ASSERT(ipsq != NULL); - /* - * Finish socket ioctl passed through to tun - * We should have an IOCTL waiting on this. - */ - /* FALLTHRU */ - case SIOCGTUNPARAM: - case OSIOCGTUNPARAM: - /* - * This is really M_IOCDATA from the tunnel driver. - * convert back and complete the ioctl. - * We should have an IOCTL waiting on this. - */ - if (iocp->ioc_cmd == SIOCGTUNPARAM || - iocp->ioc_cmd == OSIOCGTUNPARAM) { - mp1 = ill_pending_mp_get(ill, &connp, - iocp->ioc_id); - mode = COPYOUT; - ipsq = NULL; - } else { - mp1 = ipsq_pending_mp_get(ipsq, &connp); - mode = NO_COPYOUT; - } - if (mp1 != NULL) { - /* - * Now copy back the b_next/b_prev used by - * mi code for the mi_copy* functions. - * See ip_sioctl_tunparam() for the reason. - * Also protect against missing b_cont. - */ - if (mp->b_cont != NULL) { - mp->b_cont->b_next = - mp1->b_cont->b_next; - mp->b_cont->b_prev = - mp1->b_cont->b_prev; - } - inet_freemsg(mp1); - if (iocp->ioc_error == 0) - iocp->ioc_error = EINVAL; - ASSERT(connp != NULL); - ip_ioctl_finish(CONNP_TO_WQ(connp), mp, - iocp->ioc_error, mode, ipsq); - } else { - ASSERT(connp == NULL); - putnext(q, mp); - } - break; - default: - break; - } + } default: + ASSERT(0); break; } } @@ -17364,6 +17085,19 @@ ip_fanout_proto_again(mblk_t *ipsec_mp, ill_t *ill, ill_t *recv_ill, ire_t *ire) ip_sctp_input(mp, ipha, ill, B_TRUE, ire, ipsec_mp, 0, ill->ill_rq, dst); break; + case IPPROTO_ENCAP: + case IPPROTO_IPV6: + if (ip_iptun_input(ipsec_mp, mp, ipha, ill, ire, + ill->ill_ipst)) { + /* + * If we made it here, we don't need to worry + * about the raw-socket/protocol fanout. + */ + if (ire_need_rele) + ire_refrele(ire); + break; + } + /* else FALLTHRU */ default: ip_proto_input(ill->ill_rq, ipsec_mp, ipha, ire, recv_ill, 0); @@ -20508,7 +20242,6 @@ ip_unbind(conn_t *connp) connp->conn_mlp_type = mlptSingle; ipcl_hash_remove(connp); - } /* @@ -27081,10 +26814,6 @@ ip_process_ioctl(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *arg) extract_funcp = ip_extract_arpreq; break; - case TUN_CMD: - extract_funcp = ip_extract_tunreq; - break; - case MSFILT_CMD: extract_funcp = ip_extract_msfilter; break; @@ -27392,14 +27121,6 @@ nak: if (mp->b_wptr - mp->b_rptr < sizeof (uint32_t)) break; - if (((ipsec_info_t *)mp->b_rptr)->ipsec_info_type == - TUN_HELLO) { - ASSERT(connp != NULL); - connp->conn_flags |= IPCL_IPTUN; - freeb(mp); - return; - } - /* M_CTL messages are used by ARP to tell us things. */ if ((mp->b_wptr - mp->b_rptr) < sizeof (arc_t)) break; @@ -27490,6 +27211,7 @@ nak: ASSERT(!IPCL_IS_TCP(connp)); ASSERT(!IPCL_IS_UDP(connp)); ASSERT(!IPCL_IS_RAWIP(connp)); + ASSERT(!IPCL_IS_IPTUN(connp)); /* The case of AH and ESP */ qreply(q, mp); diff --git a/usr/src/uts/common/inet/ip/ip6.c b/usr/src/uts/common/inet/ip/ip6.c index cd71a6a1c6..bdea32272d 100644 --- a/usr/src/uts/common/inet/ip/ip6.c +++ b/usr/src/uts/common/inet/ip/ip6.c @@ -90,7 +90,7 @@ #include <inet/ipsec_info.h> #include <inet/sadb.h> #include <inet/ipsec_impl.h> -#include <inet/tun.h> +#include <inet/iptun/iptun_impl.h> #include <inet/sctp_ip.h> #include <sys/pattr.h> #include <inet/ipclassifier.h> @@ -195,8 +195,6 @@ static int ip_bind_connected_v6(conn_t *, mblk_t **, uint8_t, in6_addr_t *, boolean_t, boolean_t, cred_t *); static boolean_t ip_bind_get_ire_v6(mblk_t **, ire_t *, const in6_addr_t *, iulp_t *, ip_stack_t *); -static void ip_bind_post_handling_v6(conn_t *, mblk_t *, boolean_t, - boolean_t, ip_stack_t *); static int ip_bind_laddr_v6(conn_t *, mblk_t **, uint8_t, const in6_addr_t *, uint16_t, boolean_t); static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, @@ -222,7 +220,7 @@ static areq_t ipv6_areq_template = { AR_ENTRY_QUERY, /* cmd */ sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ - IP6_DL_SAP, /* protocol, from arps perspective */ + ETHERTYPE_IPV6, /* protocol, from arps perspective */ sizeof (areq_t), /* target addr offset */ IPV6_ADDR_LEN, /* target addr_length */ 0, /* flags */ @@ -725,6 +723,26 @@ icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, ill_t *inill, } /* + * Fanout for ICMPv6 errors containing IP-in-IPv6 packets. Returns B_TRUE if a + * tunnel consumed the message, and B_FALSE otherwise. + */ +static boolean_t +icmp_inbound_iptun_fanout_v6(mblk_t *first_mp, ip6_t *rip6h, ill_t *ill, + ip_stack_t *ipst) +{ + conn_t *connp; + + if ((connp = ipcl_iptun_classify_v6(&rip6h->ip6_src, &rip6h->ip6_dst, + ipst)) == NULL) + return (B_FALSE); + + BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); + connp->conn_recv(connp, first_mp, NULL); + CONN_DEC_REF(connp); + return (B_TRUE); +} + +/* * Fanout received ICMPv6 error packets to the transports. * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. */ @@ -784,6 +802,15 @@ icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, /* Set message type, must be done after pullups */ mp->b_datap->db_type = M_CTL; + /* + * We need a separate IP header with the source and destination + * addresses reversed to do fanout/classification because the ip6h in + * the ICMPv6 error is in the form we sent it out. + */ + rip6h.ip6_src = ip6h->ip6_dst; + rip6h.ip6_dst = ip6h->ip6_src; + rip6h.ip6_nxt = nexthdr; + /* Try to pass the ICMP message to clients who need it */ switch (nexthdr) { case IPPROTO_UDP: { @@ -795,17 +822,8 @@ icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, mp->b_wptr) { break; } - /* - * Attempt to find a client stream based on port. - * Note that we do a reverse lookup since the header is - * in the form we sent it out. - * The rip6h header is only used for the IPCL_UDP_MATCH_V6 - * and we only set the src and dst addresses and nexthdr. - */ + /* Attempt to find a client stream based on port. */ up = (uint16_t *)((uchar_t *)ip6h + hdr_length); - rip6h.ip6_src = ip6h->ip6_dst; - rip6h.ip6_dst = ip6h->ip6_src; - rip6h.ip6_nxt = nexthdr; ((uint16_t *)&ports)[0] = up[1]; ((uint16_t *)&ports)[1] = up[0]; @@ -827,10 +845,7 @@ icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, * Attempt to find a client stream based on port. * Note that we do a reverse lookup since the header is * in the form we sent it out. - * The rip6h header is only used for the IP_TCP_*MATCH_V6 and - * we only set the src and dst addresses and nexthdr. */ - tcpha = (tcpha_t *)((char *)ip6h + hdr_length); connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); @@ -958,10 +973,11 @@ icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, * we need to adjust the MTU to take into account * the IPsec overhead. */ - if (ii != NULL) + if (ii != NULL) { icmp6->icmp6_mtu = htonl( ntohl(icmp6->icmp6_mtu) - ipsec_in_extra_length(first_mp)); + } } else { /* * Self-encapsulated case. As in the ipv4 case, @@ -1037,15 +1053,14 @@ icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, mctl_present, zoneid); return; } - /* FALLTHRU */ - default: + if (icmp_inbound_iptun_fanout_v6(first_mp, &rip6h, ill, ipst)) + return; /* - * The rip6h header is only used for the lookup and we - * only set the src and dst addresses and nexthdr. + * No IP tunnel is associated with this error. Perhaps a raw + * socket will want it. */ - rip6h.ip6_src = ip6h->ip6_dst; - rip6h.ip6_dst = ip6h->ip6_src; - rip6h.ip6_nxt = nexthdr; + /* FALLTHRU */ + default: ip_fanout_proto_v6(q, first_mp, &rip6h, ill, inill, nexthdr, 0, IP6_NO_IPPOLICY, mctl_present, zoneid); return; @@ -2160,29 +2175,6 @@ bad_addr: return (mp); } -static void -ip_bind_post_handling_v6(conn_t *connp, mblk_t *mp, - boolean_t version_changed, boolean_t ire_requested, ip_stack_t *ipst) -{ - /* Update conn_send and pktversion if v4/v6 changed */ - if (version_changed) { - ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst); - } - - /* - * Pass the IPSEC headers size in ire_ipsec_overhead. - * We can't do this in ip_bind_insert_ire because the policy - * may not have been inherited at that point in time and hence - * conn_out_enforce_policy may not be set. - */ - if (ire_requested && connp->conn_out_enforce_policy && - mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE) { - ire_t *ire = (ire_t *)mp->b_rptr; - ASSERT(MBLKL(mp) >= sizeof (ire_t)); - ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); - } -} - /* * Here address is verified to be a valid local address. * If the IRE_DB_REQ_TYPE mp is present, a multicast @@ -2375,21 +2367,10 @@ int ip_proto_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, const in6_addr_t *v6srcp, uint16_t lport, boolean_t fanout_insert) { - int error; - boolean_t ire_requested; - mblk_t *mp = NULL; - boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; + int error; + boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; ip_stack_t *ipst = connp->conn_netstack->netstack_ip; - /* - * Note that we allow connect to broadcast and multicast - * address when ire_requested is set. Thus the ULP - * has to check for IRE_BROADCAST and multicast. - */ - if (mpp) - mp = *mpp; - ire_requested = (mp && DB_TYPE(mp) == IRE_DB_REQ_TYPE); - ASSERT(connp->conn_af_isv6); connp->conn_ulp = protocol; @@ -2416,8 +2397,8 @@ ip_proto_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, connp->conn_pkt_isv6 = B_TRUE; } - ip_bind_post_handling_v6(connp, mpp ? *mpp : NULL, - orig_pkt_isv6 != connp->conn_pkt_isv6, ire_requested, ipst); + if (orig_pkt_isv6 != connp->conn_pkt_isv6) + ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst); return (0); bad_addr: @@ -2913,17 +2894,8 @@ ip_proto_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, { int error = 0; boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; - boolean_t ire_requested; ip_stack_t *ipst = connp->conn_netstack->netstack_ip; - /* - * Note that we allow connect to broadcast and multicast - * address when ire_requested is set. Thus the ULP - * has to check for IRE_BROADCAST and multicast. - */ - ASSERT(mpp != NULL); - ire_requested = (*mpp != NULL && DB_TYPE(*mpp) == IRE_DB_REQ_TYPE); - ASSERT(connp->conn_af_isv6); connp->conn_ulp = protocol; @@ -2969,8 +2941,8 @@ ip_proto_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, connp->conn_pkt_isv6 = B_TRUE; } - ip_bind_post_handling_v6(connp, mpp ? *mpp : NULL, - orig_pkt_isv6 != connp->conn_pkt_isv6, ire_requested, ipst); + if (orig_pkt_isv6 != connp->conn_pkt_isv6) + ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst); /* Send it home. */ return (0); @@ -3082,8 +3054,6 @@ ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) * can be more than one stream bound to a particular * protocol. When this is the case, normally each one gets a copy * of any incoming packets. - * However, if the packet was tunneled and not multicast we only send to it - * the first match. * * Zones notes: * Packets will be distributed to streams in all zones. This is really only @@ -3099,7 +3069,6 @@ ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, mblk_t *mp1, *first_mp1; in6_addr_t dst = ip6h->ip6_dst; in6_addr_t src = ip6h->ip6_src; - boolean_t one_only; mblk_t *first_mp = mp; boolean_t secure, shared_addr; conn_t *connp, *first_connp, *next_connp; @@ -3115,13 +3084,6 @@ ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, secure = B_FALSE; } - /* - * If the packet was tunneled and not multicast we only send to it - * the first match. - */ - one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && - !IN6_IS_ADDR_MULTICAST(&dst)); - shared_addr = (zoneid == ALL_ZONES); if (shared_addr) { /* @@ -3169,16 +3131,7 @@ ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, * XXX: Fix the multiple protocol listeners case. We should not * be walking the conn->next list here. */ - if (one_only) { - /* - * Only send message to one tunnel driver by immediately - * terminating the loop. - */ - connp = NULL; - } else { - connp = connp->conn_next; - - } + connp = connp->conn_next; for (;;) { while (connp != NULL) { if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, @@ -3235,13 +3188,10 @@ ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, freemsg(mp1); } else { - /* - * Don't enforce here if we're a tunnel - let "tun" do - * it instead. - */ - if (!IPCL_IS_IPTUN(connp) && - (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || - secure)) { + ASSERT(!IPCL_IS_IPTUN(connp)); + + if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || + secure) { first_mp1 = ipsec_check_inbound_policy( first_mp1, connp, NULL, ip6h, mctl_present); } @@ -3312,24 +3262,9 @@ ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, freemsg(first_mp); } else { - if (IPCL_IS_IPTUN(connp)) { - /* - * Tunneled packet. We enforce policy in the tunnel - * module itself. - * - * Send the WHOLE packet up (incl. IPSEC_IN) without - * a policy check. - */ - putnext(rq, first_mp); - CONN_DEC_REF(connp); - return; - } - /* - * Don't enforce here if we're a tunnel - let "tun" do - * it instead. - */ - if (nexthdr != IPPROTO_ENCAP && nexthdr != IPPROTO_IPV6 && - (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure)) { + ASSERT(!IPCL_IS_IPTUN(connp)); + + if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure) { first_mp = ipsec_check_inbound_policy(first_mp, connp, NULL, ip6h, mctl_present); if (first_mp == NULL) { @@ -6452,8 +6387,8 @@ ip_rput_v6(queue_t *q, mblk_t *mp) */ if (ill->ill_mactype == DL_ETHER && (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && - (ucp = mp->b_rptr)[-1] == (IP6_DL_SAP & 0xFF) && - ucp[-2] == (IP6_DL_SAP >> 8)) { + (ucp = mp->b_rptr)[-1] == (ETHERTYPE_IPV6 & 0xFF) && + ucp[-2] == (ETHERTYPE_IPV6 >> 8)) { if (hlen >= sizeof (struct ether_vlan_header) && ucp[-5] == 0 && ucp[-6] == 0x81) ucp -= sizeof (struct ether_vlan_header); @@ -6517,16 +6452,6 @@ ip_rput_v6(queue_t *q, mblk_t *mp) ill = (ill_t *)q->q_ptr; ill_fastpath_ack(ill, mp); return; - - case SIOCGTUNPARAM: - case OSIOCGTUNPARAM: - ip_rput_other(NULL, q, mp, NULL); - return; - - case SIOCSTUNPARAM: - case OSIOCSTUNPARAM: - /* Go through qwriter */ - break; default: putnext(q, mp); return; @@ -6557,23 +6482,8 @@ ip_rput_v6(queue_t *q, mblk_t *mp) iocp = (struct iocblk *)mp->b_rptr; switch (iocp->ioc_cmd) { case DL_IOC_HDR_INFO: - case SIOCGTUNPARAM: - case OSIOCGTUNPARAM: ip_rput_other(NULL, q, mp, NULL); return; - - case SIOCSTUNPARAM: - case OSIOCSTUNPARAM: - mutex_enter(&ill->ill_lock); - if (ill->ill_state_flags & ILL_CONDEMNED) { - mutex_exit(&ill->ill_lock); - freemsg(mp); - return; - } - ill_refhold_locked(ill); - mutex_exit(&ill->ill_lock); - qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); - return; default: break; } @@ -6894,6 +6804,26 @@ ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, return (B_TRUE); } +static boolean_t +ip_iptun_input_v6(mblk_t *ipsec_mp, mblk_t *data_mp, + size_t hdr_len, uint8_t nexthdr, zoneid_t zoneid, ill_t *ill, + ip_stack_t *ipst) +{ + conn_t *connp; + + ASSERT(ipsec_mp == NULL || ipsec_mp->b_cont == data_mp); + + connp = ipcl_classify_v6(data_mp, nexthdr, hdr_len, zoneid, ipst); + if (connp != NULL) { + BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); + connp->conn_recv(connp, ipsec_mp != NULL ? ipsec_mp : data_mp, + NULL); + CONN_DEC_REF(connp); + return (B_TRUE); + } + return (B_FALSE); +} + /* * Validate the IPv6 mblk for alignment. */ @@ -6975,7 +6905,6 @@ ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, ire_t *ire = NULL; ill_t *ill = inill; ill_t *outill; - ipif_t *ipif; uint8_t *whereptr; uint8_t nexthdr; uint16_t remlen; @@ -7154,32 +7083,6 @@ drop_pkt: BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); goto ipv6forus; } - ipif = ill->ill_ipif; - - /* - * If a packet was received on an interface that is a 6to4 tunnel, - * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must - * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to - * the 6to4 prefix of the address configured on the receiving interface. - * Otherwise, the packet was delivered to this interface in error and - * the packet must be dropped. - */ - if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { - - if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, - &ip6h->ip6_dst)) { - if (ip_debug > 2) { - /* ip1dbg */ - pr_addr_dbg("ip_rput_data_v6: received 6to4 " - "addressed packet which is not for us: " - "%s\n", AF_INET6, &ip6h->ip6_dst); - } - BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); - freemsg(first_mp); - return; - } - } - /* * Find an ire that matches destination. For link-local addresses * we have to match the ill. @@ -7822,8 +7725,22 @@ tcp_fanout: inill, hdr_len, mctl_present, 0, zoneid, dl_mp); } + goto proto_fanout; } + case IPPROTO_ENCAP: + case IPPROTO_IPV6: + if (ip_iptun_input_v6(mctl_present ? first_mp : NULL, + mp, pkt_len - remlen, nexthdr, zoneid, ill, ipst)) { + return; + } + /* + * If there was no IP tunnel data-link bound to + * receive this packet, then we fall through to + * allow potential raw sockets bound to either of + * these protocols to pick it up. + */ /* FALLTHRU */ +proto_fanout: default: { /* * Handle protocols with which IPv6 is less intimate. @@ -9084,7 +9001,7 @@ ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) #endif /* - * M_CTL comes from 6 places + * M_CTL comes from 5 places * * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections * both V4 and V6 datagrams. @@ -9100,8 +9017,6 @@ ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for * IPsec hardware acceleration support. * - * 6) TUN_HELLO. - * * We need to handle (1)'s IPv6 case and (3) here. For the * IPv4 case in (1), and (2), IPSEC processing has already * started. The code in ip_wput() already knows how to handle @@ -11697,34 +11612,6 @@ ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, return; } - /* - * If a packet is to be sent out an interface that is a 6to4 - * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 - * destination, must be checked to have a 6to4 prefix - * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of - * address configured on the sending interface. Otherwise, - * the packet was delivered to this interface in error and the - * packet must be dropped. - */ - if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { - ipif_t *ipif = ill->ill_ipif; - - if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, - &ip6h->ip6_dst)) { - if (ip_debug > 2) { - /* ip1dbg */ - pr_addr_dbg("ip_xmit_v6: attempting to " - "send 6to4 addressed IPv6 " - "destination (%s) out the wrong " - "interface.\n", AF_INET6, - &ip6h->ip6_dst); - } - BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); - freemsg(mp); - return; - } - } - /* Flow-control check has been done in ip_wput_ire_v6 */ if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || caller == IP_WSRV || canput(stq->q_next)) { diff --git a/usr/src/uts/common/inet/ip/ip6_if.c b/usr/src/uts/common/inet/ip/ip6_if.c index 4a9b648b40..a986a755ac 100644 --- a/usr/src/uts/common/inet/ip/ip6_if.c +++ b/usr/src/uts/common/inet/ip/ip6_if.c @@ -65,7 +65,6 @@ #include <inet/ip_ndp.h> #include <inet/ip_if.h> #include <inet/ip6_asp.h> -#include <inet/tun.h> #include <inet/ipclassifier.h> #include <inet/sctp_ip.h> @@ -1146,26 +1145,23 @@ ip_rt_delete_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, } /* - * Derive a token from the link layer address. + * Derive an interface id from the link layer address. */ -boolean_t +void ill_setdefaulttoken(ill_t *ill) { - int i; - in6_addr_t v6addr, v6mask; - - if (!MEDIA_V6INTFID(ill->ill_media, ill, &v6addr)) - return (B_FALSE); - - (void) ip_plen_to_mask_v6(IPV6_TOKEN_LEN, &v6mask); - - for (i = 0; i < 4; i++) - v6mask.s6_addr32[i] = v6mask.s6_addr32[i] ^ - (uint32_t)0xffffffff; + if (!ill->ill_manual_token) { + bzero(&ill->ill_token, sizeof (ill->ill_token)); + MEDIA_V6INTFID(ill->ill_media, ill, &ill->ill_token); + ill->ill_token_length = IPV6_TOKEN_LEN; + } +} - V6_MASK_COPY(v6addr, v6mask, ill->ill_token); - ill->ill_token_length = IPV6_TOKEN_LEN; - return (B_TRUE); +void +ill_setdesttoken(ill_t *ill) +{ + bzero(&ill->ill_dest_token, sizeof (ill->ill_dest_token)); + MEDIA_V6DESTINTFID(ill->ill_media, ill, &ill->ill_dest_token); } /* @@ -1183,123 +1179,27 @@ ipif_get_linklocal(in6_addr_t *dest, const in6_addr_t *token) } /* - * Set a nice default address for either automatic tunnels tsrc/96 or - * 6to4 tunnels 2002:<tsrc>::1/64 + * Set a default IPv6 address for a 6to4 tunnel interface 2002:<tsrc>::1/16 */ static void -ipif_set_tun_auto_addr(ipif_t *ipif, struct iftun_req *ta) -{ - sin6_t sin6; - sin_t *sin; - ill_t *ill = ipif->ipif_ill; - tun_t *tp = (tun_t *)ill->ill_wq->q_next->q_ptr; - - if (ta->ifta_saddr.ss_family != AF_INET || - (ipif->ipif_flags & IPIF_UP) || !ipif->ipif_isv6 || - (ta->ifta_flags & IFTUN_SRC) == 0) - return; - - /* - * Check the tunnel type by examining q_next->q_ptr - */ - if (tp->tun_flags & TUN_AUTOMATIC) { - /* this is an automatic tunnel */ - (void) ip_plen_to_mask_v6(IPV6_ABITS - IP_ABITS, - &ipif->ipif_v6net_mask); - bzero(&sin6, sizeof (sin6_t)); - sin = (sin_t *)&ta->ifta_saddr; - V4_PART_OF_V6(sin6.sin6_addr) = sin->sin_addr.s_addr; - sin6.sin6_family = AF_INET6; - (void) ip_sioctl_addr(ipif, (sin_t *)&sin6, - NULL, NULL, NULL, NULL); - } else if (tp->tun_flags & TUN_6TO4) { - /* this is a 6to4 tunnel */ - (void) ip_plen_to_mask_v6(IPV6_PREFIX_LEN, - &ipif->ipif_v6net_mask); - sin = (sin_t *)&ta->ifta_saddr; - /* create a 6to4 address from the IPv4 tsrc */ - IN6_V4ADDR_TO_6TO4(&sin->sin_addr, &sin6.sin6_addr); - sin6.sin6_family = AF_INET6; - (void) ip_sioctl_addr(ipif, (sin_t *)&sin6, - NULL, NULL, NULL, NULL); - } else { - ip1dbg(("ipif_set_tun_auto_addr: Unknown tunnel type")); - return; - } -} - -/* - * Set link local for ipif_id 0 of a configured tunnel based on the - * tsrc or tdst parameter - * For tunnels over IPv4 use the IPv4 address prepended with 32 zeros as - * the token. - * For tunnels over IPv6 use the low-order 64 bits of the "inner" IPv6 address - * as the token for the "outer" link. - */ -void -ipif_set_tun_llink(ill_t *ill, struct iftun_req *ta) +ipif_set6to4addr(ipif_t *ipif) { - ipif_t *ipif; - sin_t *sin; - in6_addr_t *s6addr; - - ASSERT(IAM_WRITER_ILL(ill)); + ill_t *ill = ipif->ipif_ill; + struct in_addr v4phys; - /* The first ipif must be id zero. */ - ipif = ill->ill_ipif; - ASSERT(ipif->ipif_id == 0); + ASSERT(ill->ill_mactype == DL_6TO4); + ASSERT(ill->ill_phys_addr_length == sizeof (struct in_addr)); + ASSERT(ipif->ipif_isv6); - /* no link local for automatic tunnels */ - if (!(ipif->ipif_flags & IPIF_POINTOPOINT)) { - ipif_set_tun_auto_addr(ipif, ta); + if (ipif->ipif_flags & IPIF_UP) return; - } - - if ((ta->ifta_flags & IFTUN_DST) && - IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6pp_dst_addr)) { - sin6_t sin6; - ASSERT(!(ipif->ipif_flags & IPIF_UP)); - bzero(&sin6, sizeof (sin6_t)); - if ((ta->ifta_saddr.ss_family == AF_INET)) { - sin = (sin_t *)&ta->ifta_daddr; - V4_PART_OF_V6(sin6.sin6_addr) = - sin->sin_addr.s_addr; - } else { - s6addr = - &((sin6_t *)&ta->ifta_daddr)->sin6_addr; - sin6.sin6_addr.s6_addr32[3] = s6addr->s6_addr32[3]; - sin6.sin6_addr.s6_addr32[2] = s6addr->s6_addr32[2]; - } - ipif_get_linklocal(&ipif->ipif_v6pp_dst_addr, - &sin6.sin6_addr); - ipif->ipif_v6subnet = ipif->ipif_v6pp_dst_addr; - } - if ((ta->ifta_flags & IFTUN_SRC)) { - ASSERT(!(ipif->ipif_flags & IPIF_UP)); - - /* Set the token if it isn't already set */ - if (IN6_IS_ADDR_UNSPECIFIED(&ill->ill_token)) { - if ((ta->ifta_saddr.ss_family == AF_INET)) { - sin = (sin_t *)&ta->ifta_saddr; - V4_PART_OF_V6(ill->ill_token) = - sin->sin_addr.s_addr; - } else { - s6addr = - &((sin6_t *)&ta->ifta_saddr)->sin6_addr; - ill->ill_token.s6_addr32[3] = - s6addr->s6_addr32[3]; - ill->ill_token.s6_addr32[2] = - s6addr->s6_addr32[2]; - } - ill->ill_token_length = IPV6_TOKEN_LEN; - } - /* - * Attempt to set the link local address if it isn't set. - */ - if (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr)) - (void) ipif_setlinklocal(ipif); - } + (void) ip_plen_to_mask_v6(16, &ipif->ipif_v6net_mask); + bcopy(ill->ill_phys_addr, &v4phys, sizeof (struct in_addr)); + IN6_V4ADDR_TO_6TO4(&v4phys, &ipif->ipif_v6lcl_addr); + ipif->ipif_v6src_addr = ipif->ipif_v6lcl_addr; + V6_MASK_COPY(ipif->ipif_v6lcl_addr, ipif->ipif_v6net_mask, + ipif->ipif_v6subnet); } /* @@ -1322,9 +1222,8 @@ ipif_cant_setlinklocal(ipif_t *ipif) /* * Generate a link-local address from the token. - * Return zero if the address was set, or non-zero if it couldn't be set. */ -int +void ipif_setlinklocal(ipif_t *ipif) { ill_t *ill = ipif->ipif_ill; @@ -1332,22 +1231,57 @@ ipif_setlinklocal(ipif_t *ipif) ASSERT(IAM_WRITER_ILL(ill)); + /* + * ill_manual_linklocal is set when the link-local address was + * manually configured. + */ + if (ill->ill_manual_linklocal) + return; + + /* + * IPv6 interfaces over 6to4 tunnels are special. They do not have + * link-local addresses, but instead have a single automatically + * generated global address. + */ + if (ill->ill_mactype == DL_6TO4) { + ipif_set6to4addr(ipif); + return; + } + if (ipif_cant_setlinklocal(ipif)) - return (-1); + return; ov6addr = ipif->ipif_v6lcl_addr; ipif_get_linklocal(&ipif->ipif_v6lcl_addr, &ill->ill_token); sctp_update_ipif_addr(ipif, ov6addr); (void) ip_plen_to_mask_v6(IPV6_LL_PREFIXLEN, &ipif->ipif_v6net_mask); - V6_MASK_COPY(ipif->ipif_v6lcl_addr, ipif->ipif_v6net_mask, - ipif->ipif_v6subnet); + if (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6pp_dst_addr)) { + V6_MASK_COPY(ipif->ipif_v6lcl_addr, ipif->ipif_v6net_mask, + ipif->ipif_v6subnet); + } if (ipif->ipif_flags & IPIF_NOLOCAL) { ipif->ipif_v6src_addr = ipv6_all_zeros; } else { ipif->ipif_v6src_addr = ipif->ipif_v6lcl_addr; } - return (0); +} + +/* + * Set the destination link-local address for a point-to-point IPv6 + * interface with a destination interface id (IP tunnels are such + * interfaces). + */ +void +ipif_setdestlinklocal(ipif_t *ipif) +{ + ill_t *ill = ipif->ipif_ill; + + ASSERT(IAM_WRITER_ILL(ill)); + if (IN6_IS_ADDR_UNSPECIFIED(&ill->ill_dest_token)) + return; + ipif_get_linklocal(&ipif->ipif_v6pp_dst_addr, &ill->ill_dest_token); + ipif->ipif_v6subnet = ipif->ipif_v6pp_dst_addr; } /* @@ -1375,6 +1309,9 @@ ipif_ndp_setup_multicast(ipif_t *ipif, nce_t **ret_nce) if (ret_nce != NULL) *ret_nce = NULL; + if (ipif->ipif_flags & IPIF_POINTOPOINT) + return (0); + /* * IPMP meta-interfaces don't have any inherent multicast mappings, * and instead use the ones on the underlying interfaces. @@ -2818,6 +2755,7 @@ ill_dl_phys(ill_t *ill, ipif_t *ipif, mblk_t *mp, queue_t *q) { mblk_t *v6token_mp = NULL; mblk_t *v6lla_mp = NULL; + mblk_t *dest_mp = NULL; mblk_t *phys_mp = NULL; mblk_t *info_mp = NULL; mblk_t *attach_mp = NULL; @@ -2845,6 +2783,15 @@ ill_dl_phys(ill_t *ill, ipif_t *ipif, mblk_t *mp, queue_t *q) DL_IPV6_LINK_LAYER_ADDR; } + if (ill->ill_mactype == DL_IPV4 || ill->ill_mactype == DL_IPV6) { + dest_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) + + sizeof (t_scalar_t), DL_PHYS_ADDR_REQ); + if (dest_mp == NULL) + goto bad; + ((dl_phys_addr_req_t *)dest_mp->b_rptr)->dl_addr_type = + DL_CURR_DEST_ADDR; + } + /* * Allocate a DL_NOTIFY_REQ and set the notifications we want. */ @@ -2913,10 +2860,12 @@ ill_dl_phys(ill_t *ill, ipif_t *ipif, mblk_t *mp, queue_t *q) } ill_dlpi_send(ill, bind_mp); ill_dlpi_send(ill, info_mp); - if (ill->ill_isv6) { + if (v6token_mp != NULL) ill_dlpi_send(ill, v6token_mp); + if (v6lla_mp != NULL) ill_dlpi_send(ill, v6lla_mp); - } + if (dest_mp != NULL) + ill_dlpi_send(ill, dest_mp); ill_dlpi_send(ill, phys_mp); ill_dlpi_send(ill, notify_mp); ill_dlpi_send(ill, unbind_mp); @@ -2929,6 +2878,7 @@ ill_dl_phys(ill_t *ill, ipif_t *ipif, mblk_t *mp, queue_t *q) bad: freemsg(v6token_mp); freemsg(v6lla_mp); + freemsg(dest_mp); freemsg(phys_mp); freemsg(info_mp); freemsg(attach_mp); @@ -3111,11 +3061,7 @@ ipif_up_done_v6(ipif_t *ipif) ipst); } - /* - * Set up the IRE_IF_RESOLVER or IRE_IF_NORESOLVER, as appropriate. - * Note that atun interfaces have an all-zero ipif_v6subnet. - * Thus we allow a zero subnet as long as the mask is non-zero. - */ + /* Set up the IRE_IF_RESOLVER or IRE_IF_NORESOLVER, as appropriate. */ if (stq != NULL && !(ipif->ipif_flags & IPIF_NOXMIT) && !(IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6subnet) && IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6net_mask))) { @@ -3153,62 +3099,6 @@ ipif_up_done_v6(ipif_t *ipif) ipst); } - /* - * Setup 2002::/16 route, if this interface is a 6to4 tunnel - */ - if (IN6_IS_ADDR_6TO4(&ipif->ipif_v6lcl_addr) && - (ill->ill_is_6to4tun)) { - /* - * Destination address is 2002::/16 - */ -#ifdef _BIG_ENDIAN - const in6_addr_t prefix_addr = { 0x20020000U, 0, 0, 0 }; - const in6_addr_t prefix_mask = { 0xffff0000U, 0, 0, 0 }; -#else - const in6_addr_t prefix_addr = { 0x00000220U, 0, 0, 0 }; - const in6_addr_t prefix_mask = { 0x0000ffffU, 0, 0, 0 }; -#endif /* _BIG_ENDIAN */ - char buf2[INET6_ADDRSTRLEN]; - ire_t *isdup; - in6_addr_t *first_addr = &ill->ill_ipif->ipif_v6lcl_addr; - - /* - * check to see if this route has already been added for - * this tunnel interface. - */ - isdup = ire_ftable_lookup_v6(first_addr, &prefix_mask, 0, - IRE_IF_NORESOLVER, ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, - (MATCH_IRE_SRC | MATCH_IRE_MASK), ipst); - - if (isdup == NULL) { - ip1dbg(("ipif_up_done_v6: creating if IRE %d for %s", - IRE_IF_NORESOLVER, inet_ntop(AF_INET6, &v6addr, - buf2, sizeof (buf2)))); - - *irep++ = ire_create_v6( - &prefix_addr, /* 2002:: */ - &prefix_mask, /* ffff:: */ - &ipif->ipif_v6lcl_addr, /* src addr */ - NULL, /* gateway */ - &ipif->ipif_mtu, /* max_frag */ - NULL, /* no src nce */ - NULL, /* no rfq */ - ill->ill_wq, /* stq */ - IRE_IF_NORESOLVER, /* type */ - ipif, /* interface */ - NULL, /* v6cmask */ - 0, - 0, - RTF_UP, - &ire_uinfo_null, - NULL, - NULL, - ipst); - } else { - ire_refrele(isdup); - } - } - /* If an earlier ire_create failed, get out now */ for (irep1 = irep; irep1 > ire_array; ) { irep1--; diff --git a/usr/src/uts/common/inet/ip/ip_ftable.c b/usr/src/uts/common/inet/ip/ip_ftable.c index 89334daf4f..9e228c2925 100644 --- a/usr/src/uts/common/inet/ip/ip_ftable.c +++ b/usr/src/uts/common/inet/ip/ip_ftable.c @@ -1057,8 +1057,7 @@ create_irecache: switch (ire->ire_type) { case IRE_IF_NORESOLVER: /* create ire_cache for ire_addr endpoint */ - if (dst_ill->ill_phys_addr_length != IP_ADDR_LEN && - dst_ill->ill_resolver_mp == NULL) { + if (dst_ill->ill_resolver_mp == NULL) { ip1dbg(("ire_forward: dst_ill %p " "for IRE_IF_NORESOLVER ire %p has " "no ill_resolver_mp\n", diff --git a/usr/src/uts/common/inet/ip/ip_if.c b/usr/src/uts/common/inet/ip/ip_if.c index 2c5a2543d3..7b1bea5de7 100644 --- a/usr/src/uts/common/inet/ip/ip_if.c +++ b/usr/src/uts/common/inet/ip/ip_if.c @@ -84,7 +84,6 @@ #include <inet/ip_ndp.h> #include <inet/ip_if.h> #include <inet/ip_impl.h> -#include <inet/tun.h> #include <inet/sctp_ip.h> #include <inet/ip_netinfo.h> @@ -196,9 +195,13 @@ static void ill_set_phys_addr_tail(ipsq_t *, queue_t *, mblk_t *, void *); static void ill_replumb_tail(ipsq_t *, queue_t *, mblk_t *, void *); static ip_v6intfid_func_t ip_ether_v6intfid, ip_ib_v6intfid; +static ip_v6intfid_func_t ip_ipv4_v6intfid, ip_ipv6_v6intfid; static ip_v6intfid_func_t ip_ipmp_v6intfid, ip_nodef_v6intfid; +static ip_v6intfid_func_t ip_ipv4_v6destintfid, ip_ipv6_v6destintfid; static ip_v6mapinfo_func_t ip_ether_v6mapinfo, ip_ib_v6mapinfo; +static ip_v6mapinfo_func_t ip_nodef_v6mapinfo; static ip_v4mapinfo_func_t ip_ether_v4mapinfo, ip_ib_v4mapinfo; +static ip_v4mapinfo_func_t ip_nodef_v4mapinfo; static void ipif_save_ire(ipif_t *, ire_t *); static void ipif_remove_ire(ipif_t *, ire_t *); static void ip_cgtp_bcast_add(ire_t *, ire_t *, ip_stack_t *); @@ -439,21 +442,36 @@ static ipha_t icmp_ipha = { static uchar_t ip_six_byte_all_ones[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; static ip_m_t ip_m_tbl[] = { - { DL_ETHER, IFT_ETHER, ip_ether_v4mapinfo, ip_ether_v6mapinfo, - ip_ether_v6intfid }, - { DL_CSMACD, IFT_ISO88023, ip_ether_v4mapinfo, ip_ether_v6mapinfo, + { DL_ETHER, IFT_ETHER, ETHERTYPE_IP, ETHERTYPE_IPV6, + ip_ether_v4mapinfo, ip_ether_v6mapinfo, ip_ether_v6intfid, ip_nodef_v6intfid }, - { DL_TPB, IFT_ISO88024, ip_ether_v4mapinfo, ip_ether_v6mapinfo, + { DL_CSMACD, IFT_ISO88023, ETHERTYPE_IP, ETHERTYPE_IPV6, + ip_ether_v4mapinfo, ip_ether_v6mapinfo, ip_nodef_v6intfid, ip_nodef_v6intfid }, - { DL_TPR, IFT_ISO88025, ip_ether_v4mapinfo, ip_ether_v6mapinfo, + { DL_TPB, IFT_ISO88024, ETHERTYPE_IP, ETHERTYPE_IPV6, + ip_ether_v4mapinfo, ip_ether_v6mapinfo, ip_nodef_v6intfid, ip_nodef_v6intfid }, - { DL_FDDI, IFT_FDDI, ip_ether_v4mapinfo, ip_ether_v6mapinfo, - ip_ether_v6intfid }, - { DL_IB, IFT_IB, ip_ib_v4mapinfo, ip_ib_v6mapinfo, - ip_ib_v6intfid }, - { SUNW_DL_VNI, IFT_OTHER, NULL, NULL, NULL }, - { SUNW_DL_IPMP, IFT_OTHER, NULL, NULL, ip_ipmp_v6intfid }, - { DL_OTHER, IFT_OTHER, ip_ether_v4mapinfo, ip_ether_v6mapinfo, + { DL_TPR, IFT_ISO88025, ETHERTYPE_IP, ETHERTYPE_IPV6, + ip_ether_v4mapinfo, ip_ether_v6mapinfo, ip_nodef_v6intfid, + ip_nodef_v6intfid }, + { DL_FDDI, IFT_FDDI, ETHERTYPE_IP, ETHERTYPE_IPV6, + ip_ether_v4mapinfo, ip_ether_v6mapinfo, ip_ether_v6intfid, + ip_nodef_v6intfid }, + { DL_IB, IFT_IB, ETHERTYPE_IP, ETHERTYPE_IPV6, + ip_ib_v4mapinfo, ip_ib_v6mapinfo, ip_ib_v6intfid, + ip_nodef_v6intfid }, + { DL_IPV4, IFT_IPV4, IPPROTO_ENCAP, IPPROTO_IPV6, ip_nodef_v4mapinfo, + ip_nodef_v6mapinfo, ip_ipv4_v6intfid, ip_ipv4_v6destintfid }, + { DL_IPV6, IFT_IPV6, IPPROTO_ENCAP, IPPROTO_IPV6, ip_nodef_v4mapinfo, + ip_nodef_v6mapinfo, ip_ipv6_v6intfid, ip_ipv6_v6destintfid }, + { DL_6TO4, IFT_6TO4, IPPROTO_ENCAP, IPPROTO_IPV6, ip_nodef_v4mapinfo, + ip_nodef_v6mapinfo, ip_ipv4_v6intfid, ip_nodef_v6intfid }, + { SUNW_DL_VNI, IFT_OTHER, ETHERTYPE_IP, ETHERTYPE_IPV6, + NULL, NULL, ip_nodef_v6intfid, ip_nodef_v6intfid }, + { SUNW_DL_IPMP, IFT_OTHER, ETHERTYPE_IP, ETHERTYPE_IPV6, + NULL, NULL, ip_ipmp_v6intfid, ip_nodef_v6intfid }, + { DL_OTHER, IFT_OTHER, ETHERTYPE_IP, ETHERTYPE_IPV6, + ip_ether_v4mapinfo, ip_ether_v6mapinfo, ip_nodef_v6intfid, ip_nodef_v6intfid } }; @@ -1058,22 +1076,17 @@ ill_dlur_gen(uchar_t *addr, uint_t addr_length, t_uscalar_t sap, } /* - * Add the 'mp' to the list of pending mp's headed by ill_pending_mp - * Return an error if we already have 1 or more ioctls in progress. - * This is used only for non-exclusive ioctls. Currently this is used - * for SIOC*ARP and SIOCGTUNPARAM ioctls. Most set ioctls are exclusive - * and thus need to use ipsq_pending_mp_add. + * Add the 'mp' to the list of pending mp's headed by ill_pending_mp. Return + * an error if we already have 1 or more ioctls in progress. This is only + * needed for SIOCG*ARP. */ boolean_t ill_pending_mp_add(ill_t *ill, conn_t *connp, mblk_t *add_mp) { ASSERT(MUTEX_HELD(&ill->ill_lock)); ASSERT((add_mp->b_next == NULL) && (add_mp->b_prev == NULL)); - /* - * M_IOCDATA from ioctls, M_IOCTL from tunnel ioctls. - */ - ASSERT((add_mp->b_datap->db_type == M_IOCDATA) || - (add_mp->b_datap->db_type == M_IOCTL)); + /* We should only see M_IOCDATA arp ioctls here. */ + ASSERT(add_mp->b_datap->db_type == M_IOCDATA); ASSERT(MUTEX_HELD(&connp->conn_lock)); /* @@ -1180,12 +1193,12 @@ ipsq_pending_mp_add(conn_t *connp, ipif_t *ipif, queue_t *q, mblk_t *add_mp, ASSERT(ipx->ipx_current_ipif != NULL); /* - * M_IOCDATA from ioctls, M_IOCTL from tunnel ioctls, - * M_ERROR/M_HANGUP/M_PROTO/M_PCPROTO from the driver. + * M_IOCDATA from ioctls, M_ERROR/M_HANGUP/M_PROTO/M_PCPROTO from the + * driver. */ - ASSERT((DB_TYPE(add_mp) == M_IOCDATA) || (DB_TYPE(add_mp) == M_IOCTL) || - (DB_TYPE(add_mp) == M_ERROR) || (DB_TYPE(add_mp) == M_HANGUP) || - (DB_TYPE(add_mp) == M_PROTO) || (DB_TYPE(add_mp) == M_PCPROTO)); + ASSERT((DB_TYPE(add_mp) == M_IOCDATA) || (DB_TYPE(add_mp) == M_ERROR) || + (DB_TYPE(add_mp) == M_HANGUP) || (DB_TYPE(add_mp) == M_PROTO) || + (DB_TYPE(add_mp) == M_PCPROTO)); if (connp != NULL) { ASSERT(MUTEX_HELD(&connp->conn_lock)); @@ -5304,10 +5317,7 @@ ip_ll_subnet_defaults(ill_t *ill, mblk_t *mp) * second DL_INFO_ACK we are recieving in response to the * DL_INFO_REQ sent in ipif_set_values. */ - if (ill->ill_isv6) - ill->ill_sap = IP6_DL_SAP; - else - ill->ill_sap = IP_DL_SAP; + ill->ill_sap = (ill->ill_isv6) ? ipm->ip_m_ipv6sap : ipm->ip_m_ipv4sap; /* * Set ipif_mtu which is used to set the IRE's * ire_max_frag value. The driver could have sent @@ -5360,14 +5370,19 @@ ip_ll_subnet_defaults(ill_t *ill, mblk_t *mp) else ill->ill_flags |= ILLF_NOARP; - if (ill->ill_phys_addr_length == 0) { - if (ill->ill_media->ip_m_mac_type == SUNW_DL_VNI) { - ill->ill_ipif->ipif_flags |= IPIF_NOXMIT; - } else { - /* pt-pt supports multicast. */ - ill->ill_flags |= ILLF_MULTICAST; - ill->ill_ipif->ipif_flags |= IPIF_POINTOPOINT; - } + if (ill->ill_mactype == SUNW_DL_VNI) { + ill->ill_ipif->ipif_flags |= IPIF_NOXMIT; + } else if (ill->ill_phys_addr_length == 0 || + ill->ill_mactype == DL_IPV4 || + ill->ill_mactype == DL_IPV6) { + /* + * The underying link is point-to-point, so mark the + * interface as such. We can do IP multicast over + * such a link since it transmits all network-layer + * packets to the remote side the same way. + */ + ill->ill_flags |= ILLF_MULTICAST; + ill->ill_ipif->ipif_flags |= IPIF_POINTOPOINT; } } else { ill->ill_net_type = IRE_IF_RESOLVER; @@ -8089,80 +8104,6 @@ ipsq_flush(ill_t *ill) } /* - * Parse an iftun_req structure coming down SIOC[GS]TUNPARAM ioctls, - * refhold and return the associated ipif - */ -/* ARGSUSED */ -int -ip_extract_tunreq(queue_t *q, mblk_t *mp, const ip_ioctl_cmd_t *ipip, - cmd_info_t *ci, ipsq_func_t func) -{ - boolean_t exists; - struct iftun_req *ta; - ipif_t *ipif; - ill_t *ill; - boolean_t isv6; - mblk_t *mp1; - int error; - conn_t *connp; - ip_stack_t *ipst; - - /* Existence verified in ip_wput_nondata */ - mp1 = mp->b_cont->b_cont; - ta = (struct iftun_req *)mp1->b_rptr; - /* - * Null terminate the string to protect against buffer - * overrun. String was generated by user code and may not - * be trusted. - */ - ta->ifta_lifr_name[LIFNAMSIZ - 1] = '\0'; - - connp = Q_TO_CONN(q); - isv6 = connp->conn_af_isv6; - ipst = connp->conn_netstack->netstack_ip; - - /* Disallows implicit create */ - ipif = ipif_lookup_on_name(ta->ifta_lifr_name, - mi_strlen(ta->ifta_lifr_name), B_FALSE, &exists, isv6, - connp->conn_zoneid, CONNP_TO_WQ(connp), mp, func, &error, ipst); - if (ipif == NULL) - return (error); - - if (ipif->ipif_id != 0) { - /* - * We really don't want to set/get tunnel parameters - * on virtual tunnel interfaces. Only allow the - * base tunnel to do these. - */ - ipif_refrele(ipif); - return (EINVAL); - } - - /* - * Send down to tunnel mod for ioctl processing. - * Will finish ioctl in ip_rput_other(). - */ - ill = ipif->ipif_ill; - if (ill->ill_net_type == IRE_LOOPBACK) { - ipif_refrele(ipif); - return (EOPNOTSUPP); - } - - if (ill->ill_wq == NULL) { - ipif_refrele(ipif); - return (ENXIO); - } - /* - * Mark the ioctl as coming from an IPv6 interface for - * tun's convenience. - */ - if (ill->ill_isv6) - ta->ifta_flags |= 0x80000000; - ci->ci_ipif = ipif; - return (0); -} - -/* * Parse an ifreq or lifreq struct coming down ioctls and refhold * and return the associated ipif. * Return value: @@ -9286,61 +9227,6 @@ ip_sioctl_tmysite(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, return (ENXIO); } -/* ARGSUSED */ -int -ip_sioctl_tunparam(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, - ip_ioctl_cmd_t *ipip, void *dummy_ifreq) -{ - ill_t *ill; - mblk_t *mp1; - conn_t *connp; - boolean_t success; - - ip1dbg(("ip_sioctl_tunparam(%s:%u %p)\n", - ipif->ipif_ill->ill_name, ipif->ipif_id, (void *)ipif)); - /* ioctl comes down on an conn */ - ASSERT(!(q->q_flag & QREADR) && q->q_next == NULL); - connp = Q_TO_CONN(q); - - mp->b_datap->db_type = M_IOCTL; - - /* - * Send down a copy. (copymsg does not copy b_next/b_prev). - * The original mp contains contaminated b_next values due to 'mi', - * which is needed to do the mi_copy_done. Unfortunately if we - * send down the original mblk itself and if we are popped due to an - * an unplumb before the response comes back from tunnel, - * the streamhead (which does a freemsg) will see this contaminated - * message and the assertion in freemsg about non-null b_next/b_prev - * will panic a DEBUG kernel. - */ - mp1 = copymsg(mp); - if (mp1 == NULL) - return (ENOMEM); - - ill = ipif->ipif_ill; - mutex_enter(&connp->conn_lock); - mutex_enter(&ill->ill_lock); - if (ipip->ipi_cmd == SIOCSTUNPARAM || ipip->ipi_cmd == OSIOCSTUNPARAM) { - success = ipsq_pending_mp_add(connp, ipif, CONNP_TO_WQ(connp), - mp, 0); - } else { - success = ill_pending_mp_add(ill, connp, mp); - } - mutex_exit(&ill->ill_lock); - mutex_exit(&connp->conn_lock); - - if (success) { - ip1dbg(("sending down tunparam request ")); - putnext(ill->ill_wq, mp1); - return (EINPROGRESS); - } else { - /* The conn has started closing */ - freemsg(mp1); - return (EINTR); - } -} - /* * ARP IOCTLs. * How does IP get in the business of fronting ARP configuration/queries? @@ -11118,35 +11004,18 @@ ip_sioctl_addr_tail(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, } } - if (ipif->ipif_isv6 && IN6_IS_ADDR_6TO4(&v6addr) && - !ill->ill_is_6to4tun) { - queue_t *wqp = ill->ill_wq; - - /* - * The local address of this interface is a 6to4 address, - * check if this interface is in fact a 6to4 tunnel or just - * an interface configured with a 6to4 address. We are only - * interested in the former. - */ - if (wqp != NULL) { - while ((wqp->q_next != NULL) && - (wqp->q_next->q_qinfo != NULL) && - (wqp->q_next->q_qinfo->qi_minfo != NULL)) { - - if (wqp->q_next->q_qinfo->qi_minfo->mi_idnum - == TUN6TO4_MODID) { - /* set for use in IP */ - ill->ill_is_6to4tun = 1; - break; - } - wqp = wqp->q_next; - } - } - } - ipif_set_default(ipif); /* + * If we've just manually set the IPv6 link-local address (0th ipif), + * tag the ill so that future updates to the interface ID don't result + * in this address getting automatically reconfigured from under the + * administrator. + */ + if (ipif->ipif_isv6 && ipif->ipif_id == 0) + ill->ill_manual_linklocal = 1; + + /* * When publishing an interface address change event, we only notify * the event listeners of the new address. It is assumed that if they * actively care about the addresses assigned that they will have @@ -12607,6 +12476,11 @@ ip_sioctl_token_tail(ipif_t *ipif, sin6_t *sin6, int addrlen, queue_t *q, mutex_enter(&ill->ill_lock); V6_MASK_COPY(v6addr, v6mask, ill->ill_token); ill->ill_token_length = addrlen; + ill->ill_manual_token = 1; + + /* Reconfigure the link-local address based on this new token */ + ipif_setlinklocal(ill->ill_ipif); + mutex_exit(&ill->ill_lock); if (need_up) { @@ -15180,12 +15054,10 @@ ipif_get_name(const ipif_t *ipif, char *buf, int len) } /* - * Find an IPIF based on the name passed in. Names can be of the - * form <phys> (e.g., le0), <phys>:<#> (e.g., le0:1), - * The <phys> string can have forms like <dev><#> (e.g., le0), - * <dev><#>.<module> (e.g. le0.foo), or <dev>.<module><#> (e.g. ip.tun3). - * When there is no colon, the implied unit id is zero. <phys> must - * correspond to the name of an ILL. (May be called as writer.) + * Find an IPIF based on the name passed in. Names can be of the form <phys> + * (e.g., le0) or <phys>:<#> (e.g., le0:1). When there is no colon, the + * implied unit id is zero. <phys> must correspond to the name of an ILL. + * (May be called as writer.) */ static ipif_t * ipif_lookup_on_name(char *name, size_t namelen, boolean_t do_alloc, @@ -18808,12 +18680,8 @@ ipif_set_values(queue_t *q, mblk_t *mp, char *interf_name, uint_t *new_ppa_ptr) * Pick a default sap until we get the DL_INFO_ACK back from * the driver. */ - if (ill->ill_sap == 0) { - if (ill->ill_isv6) - ill->ill_sap = IP6_DL_SAP; - else - ill->ill_sap = IP_DL_SAP; - } + ill->ill_sap = (ill->ill_isv6) ? ill->ill_media->ip_m_ipv6sap : + ill->ill_media->ip_m_ipv4sap; ill->ill_ifname_pending = 1; ill->ill_ifname_pending_err = 0; @@ -19573,13 +19441,12 @@ ill_ipsec_capab_send_all(uint_t sa_type, mblk_t *mp, ipsa_t *sa, * Derive an interface id from the link layer address. * Knows about IEEE 802 and IEEE EUI-64 mappings. */ -static boolean_t +static void ip_ether_v6intfid(ill_t *ill, in6_addr_t *v6addr) { char *addr; - if (ill->ill_phys_addr_length != ETHERADDRL) - return (B_FALSE); + ASSERT(ill->ill_phys_addr_length == ETHERADDRL); /* Form EUI-64 like address */ addr = (char *)&v6addr->s6_addr32[2]; @@ -19588,14 +19455,12 @@ ip_ether_v6intfid(ill_t *ill, in6_addr_t *v6addr) addr[3] = (char)0xff; addr[4] = (char)0xfe; bcopy(ill->ill_phys_addr + 3, addr + 5, 3); - return (B_TRUE); } /* ARGSUSED */ -static boolean_t +static void ip_nodef_v6intfid(ill_t *ill, in6_addr_t *v6addr) { - return (B_FALSE); } typedef struct ipmp_ifcookie { @@ -19608,7 +19473,7 @@ typedef struct ipmp_ifcookie { * Construct a pseudo-random interface ID for the IPMP interface that's both * predictable and (almost) guaranteed to be unique. */ -static boolean_t +static void ip_ipmp_v6intfid(ill_t *ill, in6_addr_t *v6addr) { zone_t *zp; @@ -19640,8 +19505,6 @@ ip_ipmp_v6intfid(ill_t *ill, in6_addr_t *v6addr) addr = &v6addr->s6_addr8[8]; bcopy(hash + 8, addr, sizeof (uint64_t)); addr[0] &= ~0x2; /* set local bit */ - - return (B_TRUE); } /* ARGSUSED */ @@ -19695,16 +19558,31 @@ ip_ether_v4mapinfo(uint_t phys_length, uint8_t *bphys_addr, uint8_t *maddr, return (B_TRUE); } +/* ARGSUSED */ +static boolean_t +ip_nodef_v4mapinfo(uint_t phys_length, uint8_t *bphys_addr, uint8_t *maddr, + uint32_t *hw_start, ipaddr_t *extract_mask) +{ + return (B_FALSE); +} + +/* ARGSUSED */ +static boolean_t +ip_nodef_v6mapinfo(uint_t lla_length, uint8_t *bphys_addr, uint8_t *maddr, + uint32_t *hw_start, in6_addr_t *v6_extract_mask) +{ + return (B_FALSE); +} + /* * Derive IPoIB interface id from the link layer address. */ -static boolean_t +static void ip_ib_v6intfid(ill_t *ill, in6_addr_t *v6addr) { char *addr; - if (ill->ill_phys_addr_length != 20) - return (B_FALSE); + ASSERT(ill->ill_phys_addr_length == 20); addr = (char *)&v6addr->s6_addr32[2]; bcopy(ill->ill_phys_addr + 12, addr, 8); /* @@ -19719,7 +19597,6 @@ ip_ib_v6intfid(ill_t *ill, in6_addr_t *v6addr) * bit set to 1. */ addr[0] |= 2; /* Set Universal/Local bit to 1 */ - return (B_TRUE); } /* @@ -19805,6 +19682,58 @@ ip_ib_v4mapinfo(uint_t phys_length, uint8_t *bphys_addr, uint8_t *maddr, } /* + * Derive IPv6 interface id from an IPv4 link-layer address (e.g. from an IPv4 + * tunnel). The IPv4 address simply get placed in the lower 4 bytes of the + * IPv6 interface id. This is a suggested mechanism described in section 3.7 + * of RFC4213. + */ +static void +ip_ipv4_genv6intfid(ill_t *ill, uint8_t *physaddr, in6_addr_t *v6addr) +{ + ASSERT(ill->ill_phys_addr_length == sizeof (ipaddr_t)); + v6addr->s6_addr32[2] = 0; + bcopy(physaddr, &v6addr->s6_addr32[3], sizeof (ipaddr_t)); +} + +/* + * Derive IPv6 interface id from an IPv6 link-layer address (e.g. from an IPv6 + * tunnel). The lower 8 bytes of the IPv6 address simply become the interface + * id. + */ +static void +ip_ipv6_genv6intfid(ill_t *ill, uint8_t *physaddr, in6_addr_t *v6addr) +{ + in6_addr_t *v6lladdr = (in6_addr_t *)physaddr; + + ASSERT(ill->ill_phys_addr_length == sizeof (in6_addr_t)); + bcopy(&v6lladdr->s6_addr32[2], &v6addr->s6_addr32[2], 8); +} + +static void +ip_ipv6_v6intfid(ill_t *ill, in6_addr_t *v6addr) +{ + ip_ipv6_genv6intfid(ill, ill->ill_phys_addr, v6addr); +} + +static void +ip_ipv6_v6destintfid(ill_t *ill, in6_addr_t *v6addr) +{ + ip_ipv6_genv6intfid(ill, ill->ill_dest_addr, v6addr); +} + +static void +ip_ipv4_v6intfid(ill_t *ill, in6_addr_t *v6addr) +{ + ip_ipv4_genv6intfid(ill, ill->ill_phys_addr, v6addr); +} + +static void +ip_ipv4_v6destintfid(ill_t *ill, in6_addr_t *v6addr) +{ + ip_ipv4_genv6intfid(ill, ill->ill_dest_addr, v6addr); +} + +/* * Returns B_TRUE if an ipif is present in the given zone, matching some flags * (typically IPIF_UP). If ipifp is non-null, the held ipif is returned there. * This works for both IPv4 and IPv6; if the passed-in ill is v6, the ipif with @@ -19926,6 +19855,7 @@ ill_set_phys_addr(ill_t *ill, mblk_t *mp) ASSERT(IAM_WRITER_IPSQ(ipsq)); if (dlindp->dl_data != DL_IPV6_LINK_LAYER_ADDR && + dlindp->dl_data != DL_CURR_DEST_ADDR && dlindp->dl_data != DL_CURR_PHYS_ADDR) { /* Changing DL_IPV6_TOKEN is not yet supported */ return (0); @@ -19992,16 +19922,30 @@ ill_set_phys_addr_tail(ipsq_t *ipsq, queue_t *q, mblk_t *addrmp, void *dummy) freemsg(addrmp2); break; + case DL_CURR_DEST_ADDR: + freemsg(ill->ill_dest_addr_mp); + ill->ill_dest_addr = addrmp->b_rptr + addroff; + ill->ill_dest_addr_mp = addrmp; + if (ill->ill_isv6) { + ill_setdesttoken(ill); + ipif_setdestlinklocal(ill->ill_ipif); + } + freemsg(addrmp2); + break; + case DL_CURR_PHYS_ADDR: freemsg(ill->ill_phys_addr_mp); ill->ill_phys_addr = addrmp->b_rptr + addroff; ill->ill_phys_addr_mp = addrmp; ill->ill_phys_addr_length = addrlen; - if (ill->ill_isv6 && !(ill->ill_flags & ILLF_XRESOLV)) ill_set_ndmp(ill, addrmp2, addroff, addrlen); else freemsg(addrmp2); + if (ill->ill_isv6) { + ill_setdefaulttoken(ill); + ipif_setlinklocal(ill->ill_ipif); + } break; default: ASSERT(0); diff --git a/usr/src/uts/common/inet/ip/ip_ire.c b/usr/src/uts/common/inet/ip/ip_ire.c index ba1c2015f3..63a6863844 100644 --- a/usr/src/uts/common/inet/ip/ip_ire.c +++ b/usr/src/uts/common/inet/ip/ip_ire.c @@ -124,7 +124,7 @@ struct kmem_cache *rt_entry_cache; * ire_marks * - bucket lock protects this. * - * ire_ipsec_overhead/ire_ll_hdr_length + * ire_ll_hdr_length * * - Place holder for returning the information to the upper layers * when IRE_DB_REQ comes down. @@ -791,14 +791,6 @@ ip_ire_req(queue_t *q, mblk_t *mp) sizeof (iulp_t)); } - /* - * As we don't lookup global policy here, we may not - * pass the right size if per-socket policy is not - * present. For these cases, path mtu discovery will - * do the right thing. - */ - inire->ire_ipsec_overhead = conn_ipsec_length(Q_TO_CONN(q)); - /* Pass the latest setting of the ip_path_mtu_discovery */ inire->ire_frag_flag |= (ipst->ips_ip_path_mtu_discovery) ? IPH_DF : 0; diff --git a/usr/src/uts/common/inet/ip/ip_ndp.c b/usr/src/uts/common/inet/ip/ip_ndp.c index 51f1487847..35f9d541e8 100644 --- a/usr/src/uts/common/inet/ip/ip_ndp.c +++ b/usr/src/uts/common/inet/ip/ip_ndp.c @@ -177,12 +177,26 @@ ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, if (ill->ill_net_type == IRE_IF_RESOLVER) { template = nce_udreq_alloc(ill); } else { - if (ill->ill_resolver_mp == NULL) { - freeb(mp); - return (EINVAL); + if (ill->ill_phys_addr_length == IPV6_ADDR_LEN && + ill->ill_mactype != DL_IPV6) { + /* + * We create a nce_res_mp with the IP nexthop address + * as the destination address if the physical length + * is exactly 16 bytes for point-to-multipoint links + * that do their own resolution from IP to link-layer + * address. + */ + template = ill_dlur_gen((uchar_t *)addr, + ill->ill_phys_addr_length, ill->ill_sap, + ill->ill_sap_length); + } else { + if (ill->ill_resolver_mp == NULL) { + freeb(mp); + return (EINVAL); + } + ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); + template = copyb(ill->ill_resolver_mp); } - ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); - template = copyb(ill->ill_resolver_mp); } if (template == NULL) { freeb(mp); @@ -1229,7 +1243,7 @@ ndp_noresolver(ill_t *ill, const in6_addr_t *dst) err = ndp_lookup_then_add_v6(ill, B_FALSE, /* NCE fastpath is per ill; don't match across group */ - NULL, /* hardware address */ + ill->ill_dest_addr, /* hardware address is NULL in most cases */ dst, &ipv6_all_ones, &ipv6_all_zeros, @@ -3672,14 +3686,18 @@ ndp_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, } else if (ill->ill_net_type == IRE_IF_NORESOLVER) { /* * NORESOLVER entries are always created in the REACHABLE - * state. We create a nce_res_mp with the IP nexthop address - * in the destination address in the DLPI hdr if the - * physical length is exactly 4 bytes. - * - * XXX not clear which drivers set ill_phys_addr_length to - * IP_ADDR_LEN. + * state. */ - if (ill->ill_phys_addr_length == IP_ADDR_LEN) { + if (ill->ill_phys_addr_length == IP_ADDR_LEN && + ill->ill_mactype != DL_IPV4 && + ill->ill_mactype != DL_6TO4) { + /* + * We create a nce_res_mp with the IP nexthop address + * as the destination address if the physical length + * is exactly 4 bytes for point-to-multipoint links + * that do their own resolution from IP to link-layer + * address (e.g. IP over X.25). + */ template = ill_dlur_gen((uchar_t *)addr, ill->ill_phys_addr_length, ill->ill_sap, ill->ill_sap_length); diff --git a/usr/src/uts/common/inet/ip/ip_netinfo.c b/usr/src/uts/common/inet/ip/ip_netinfo.c index 75ac1dbf8b..52b4da0e01 100644 --- a/usr/src/uts/common/inet/ip/ip_netinfo.c +++ b/usr/src/uts/common/inet/ip/ip_netinfo.c @@ -1517,7 +1517,6 @@ ip_ni_queue_func_impl(injection_t *inject, boolean_t out) conn = ipcl_conn_create(IPCL_IPCCONN, KM_NOSLEEP, ipst->ips_netstack); if (conn != NULL) { if (inject->inj_isv6) { - conn->conn_flags |= IPCL_ISV6; conn->conn_af_isv6 = B_TRUE; conn->conn_src_preferences = IPV6_PREFER_SRC_DEFAULT; conn->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; diff --git a/usr/src/uts/common/inet/ip/ip_srcid.c b/usr/src/uts/common/inet/ip/ip_srcid.c index 9744006560..949508a796 100644 --- a/usr/src/uts/common/inet/ip/ip_srcid.c +++ b/usr/src/uts/common/inet/ip/ip_srcid.c @@ -106,7 +106,6 @@ #include <inet/sadb.h> #include <sys/kmem.h> #include <inet/ipsec_impl.h> -#include <inet/tun.h> static uint_t srcid_nextid(ip_stack_t *); static srcid_map_t **srcid_lookup_addr(const in6_addr_t *addr, diff --git a/usr/src/uts/common/inet/ip/ipclassifier.c b/usr/src/uts/common/inet/ip/ipclassifier.c index 031034e3b1..20bf8b6b64 100644 --- a/usr/src/uts/common/inet/ip/ipclassifier.c +++ b/usr/src/uts/common/inet/ip/ipclassifier.c @@ -70,6 +70,7 @@ * ipcl_proto_fanout: IPv4 protocol fanout * ipcl_proto_fanout_v6: IPv6 protocol fanout * ipcl_udp_fanout: contains all UDP connections + * ipcl_iptun_fanout: contains all IP tunnel connections * ipcl_globalhash_fanout: contains all connections * * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering) @@ -268,6 +269,7 @@ #include <inet/sctp/sctp_impl.h> #include <inet/rawip_impl.h> #include <inet/rts_impl.h> +#include <inet/iptun/iptun_impl.h> #include <sys/cpuvar.h> @@ -307,6 +309,13 @@ uint_t ipcl_udp_fanout_size = 16384; uint_t ipcl_raw_fanout_size = 256; /* + * The IPCL_IPTUN_HASH() function works best with a prime table size. We + * expect that most large deployments would have hundreds of tunnels, and + * thousands in the extreme case. + */ +uint_t ipcl_iptun_fanout_size = 6143; + +/* * Power of 2^N Primes useful for hashing for N of 0-28, * these primes are the nearest prime <= 2^N - 2^(N-2). */ @@ -464,6 +473,7 @@ ipcl_init(ip_stack_t *ipst) ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size; ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size; ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size; + ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size; ASSERT(ipst->ips_ipcl_conn_fanout == NULL); @@ -508,6 +518,13 @@ ipcl_init(ip_stack_t *ipst) MUTEX_DEFAULT, NULL); } + ipst->ips_ipcl_iptun_fanout = kmem_zalloc( + ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP); + for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) { + mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL, + MUTEX_DEFAULT, NULL); + } + ipst->ips_ipcl_raw_fanout = kmem_zalloc( ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP); for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { @@ -581,6 +598,14 @@ ipcl_destroy(ip_stack_t *ipst) sizeof (connf_t)); ipst->ips_ipcl_udp_fanout = NULL; + for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) { + ASSERT(ipst->ips_ipcl_iptun_fanout[i].connf_head == NULL); + mutex_destroy(&ipst->ips_ipcl_iptun_fanout[i].connf_lock); + } + kmem_free(ipst->ips_ipcl_iptun_fanout, + ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t)); + ipst->ips_ipcl_iptun_fanout = NULL; + for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL); mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock); @@ -1022,6 +1047,66 @@ ipcl_proto_insert_v6(conn_t *connp, uint8_t protocol) } /* + * Because the classifier is used to classify inbound packets, the destination + * address is meant to be our local tunnel address (tunnel source), and the + * source the remote tunnel address (tunnel destination). + */ +conn_t * +ipcl_iptun_classify_v4(ipaddr_t *src, ipaddr_t *dst, ip_stack_t *ipst) +{ + connf_t *connfp; + conn_t *connp; + + /* first look for IPv4 tunnel links */ + connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, *src)]; + mutex_enter(&connfp->connf_lock); + for (connp = connfp->connf_head; connp != NULL; + connp = connp->conn_next) { + if (IPCL_IPTUN_MATCH(connp, *dst, *src)) + break; + } + if (connp != NULL) + goto done; + + mutex_exit(&connfp->connf_lock); + + /* We didn't find an IPv4 tunnel, try a 6to4 tunnel */ + connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, + INADDR_ANY)]; + mutex_enter(&connfp->connf_lock); + for (connp = connfp->connf_head; connp != NULL; + connp = connp->conn_next) { + if (IPCL_IPTUN_MATCH(connp, *dst, INADDR_ANY)) + break; + } +done: + if (connp != NULL) + CONN_INC_REF(connp); + mutex_exit(&connfp->connf_lock); + return (connp); +} + +conn_t * +ipcl_iptun_classify_v6(in6_addr_t *src, in6_addr_t *dst, ip_stack_t *ipst) +{ + connf_t *connfp; + conn_t *connp; + + /* Look for an IPv6 tunnel link */ + connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(dst, src)]; + mutex_enter(&connfp->connf_lock); + for (connp = connfp->connf_head; connp != NULL; + connp = connp->conn_next) { + if (IPCL_IPTUN_MATCH_V6(connp, dst, src)) { + CONN_INC_REF(connp); + break; + } + } + mutex_exit(&connfp->connf_lock); + return (connp); +} + +/* * This function is used only for inserting SCTP raw socket now. * This may change later. * @@ -1071,6 +1156,50 @@ ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport) return (0); } +static int +ipcl_iptun_hash_insert(conn_t *connp, ipaddr_t src, ipaddr_t dst, + ip_stack_t *ipst) +{ + connf_t *connfp; + conn_t *tconnp; + + connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(src, dst)]; + mutex_enter(&connfp->connf_lock); + for (tconnp = connfp->connf_head; tconnp != NULL; + tconnp = tconnp->conn_next) { + if (IPCL_IPTUN_MATCH(tconnp, src, dst)) { + /* A tunnel is already bound to these addresses. */ + mutex_exit(&connfp->connf_lock); + return (EADDRINUSE); + } + } + IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); + mutex_exit(&connfp->connf_lock); + return (0); +} + +static int +ipcl_iptun_hash_insert_v6(conn_t *connp, const in6_addr_t *src, + const in6_addr_t *dst, ip_stack_t *ipst) +{ + connf_t *connfp; + conn_t *tconnp; + + connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(src, dst)]; + mutex_enter(&connfp->connf_lock); + for (tconnp = connfp->connf_head; tconnp != NULL; + tconnp = tconnp->conn_next) { + if (IPCL_IPTUN_MATCH_V6(tconnp, src, dst)) { + /* A tunnel is already bound to these addresses. */ + mutex_exit(&connfp->connf_lock); + return (EADDRINUSE); + } + } + IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); + mutex_exit(&connfp->connf_lock); + return (0); +} + /* * Check for a MAC exemption conflict on a labeled system. Note that for * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the @@ -1162,6 +1291,9 @@ ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport) IN6_IPADDR_TO_V4MAPPED(src, &connp->conn_srcv6); connp->conn_lport = lport; + if (IPCL_IS_IPTUN(connp)) + return (ipcl_iptun_hash_insert(connp, src, INADDR_ANY, ipst)); + switch (protocol) { default: if (is_system_labeled() && @@ -1224,16 +1356,19 @@ int ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, uint16_t lport) { - connf_t *connfp; - int ret = 0; + connf_t *connfp; + int ret = 0; ip_stack_t *ipst = connp->conn_netstack->netstack_ip; - ASSERT(connp); - - connp->conn_ulp = protocol; + ASSERT(connp != NULL); connp->conn_ulp = protocol; connp->conn_srcv6 = *src; connp->conn_lport = lport; + if (IPCL_IS_IPTUN(connp)) { + return (ipcl_iptun_hash_insert_v6(connp, src, &ipv6_all_zeros, + ipst)); + } + switch (protocol) { default: if (is_system_labeled() && @@ -1324,6 +1459,9 @@ ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, inet_ntoa_r(src, sbuf), inet_ntoa_r(rem, rbuf), ports, protocol)); + if (IPCL_IS_IPTUN(connp)) + return (ipcl_iptun_hash_insert(connp, src, rem, ipst)); + switch (protocol) { case IPPROTO_TCP: if (!(connp->conn_flags & IPCL_EAGER)) { @@ -1433,6 +1571,9 @@ ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, int ret = 0; ip_stack_t *ipst = connp->conn_netstack->netstack_ip; + if (IPCL_IS_IPTUN(connp)) + return (ipcl_iptun_hash_insert_v6(connp, src, rem, ipst)); + switch (protocol) { case IPPROTO_TCP: /* Just need to insert a conn struct */ @@ -1715,6 +1856,11 @@ ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid, ("ipcl_classify: cant find udp conn_t for ports : %x %x", lport, fport)); break; + + case IPPROTO_ENCAP: + case IPPROTO_IPV6: + return (ipcl_iptun_classify_v4(&ipha->ipha_src, + &ipha->ipha_dst, ipst)); } return (NULL); @@ -1915,6 +2061,10 @@ ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid, ("ipcl_classify_v6: cant find udp conn_t for ports : %x %x", lport, fport)); break; + case IPPROTO_ENCAP: + case IPPROTO_IPV6: + return (ipcl_iptun_classify_v6(&ip6h->ip6_src, + &ip6h->ip6_dst, ipst)); } return (NULL); diff --git a/usr/src/uts/common/inet/ip/ipmp.c b/usr/src/uts/common/inet/ip/ipmp.c index c0347f088b..ea8b4a73bb 100644 --- a/usr/src/uts/common/inet/ip/ipmp.c +++ b/usr/src/uts/common/inet/ip/ipmp.c @@ -2059,14 +2059,12 @@ ipmp_phyint_get_kstats(phyint_t *phyi, uint64_t kstats[]) const char *name; kstat_t *ksp; kstat_named_t *kn; + ip_stack_t *ipst = PHYINT_TO_IPST(phyi); + zoneid_t zoneid; bzero(kstats, sizeof (kstats[0]) * IPMP_KSTAT_MAX); - - /* - * NOTE: ALL_ZONES here assumes that there's at most one link - * with a given name on a given system (safe for now). - */ - ksp = kstat_hold_byname("link", 0, phyi->phyint_name, ALL_ZONES); + zoneid = netstackid_to_zoneid(ipst->ips_netstack->netstack_stackid); + ksp = kstat_hold_byname("link", 0, phyi->phyint_name, zoneid); if (ksp == NULL) return; diff --git a/usr/src/uts/common/inet/ip/sadb.c b/usr/src/uts/common/inet/ip/sadb.c index 32733026dc..181ad1bdc2 100644 --- a/usr/src/uts/common/inet/ip/sadb.c +++ b/usr/src/uts/common/inet/ip/sadb.c @@ -63,7 +63,6 @@ #include <inet/ipdrop.h> #include <inet/ipclassifier.h> #include <inet/sctp_ip.h> -#include <inet/tun.h> /* * This source file contains Security Association Database (SADB) common @@ -6618,12 +6617,9 @@ ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, * Reset "sel" to indicate inner selectors. Pass * inner PF_KEY address extensions for this to happen. */ - err = ipsec_get_inverse_acquire_sel(sel, - innsrcext, inndstext, diagnostic); - if (err != 0) { - ITP_REFRELE(itp, ns); + if ((err = ipsec_get_inverse_acquire_sel(sel, + innsrcext, inndstext, diagnostic)) != 0) return (err); - } /* * Now look for a tunnel policy based on those inner * selectors. (Common code is below.) @@ -6637,13 +6633,9 @@ ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, * configured - return to indicate a global policy * check is needed. */ - if (itp != NULL) { - ITP_REFRELE(itp, ns); - } return (0); } else if (itp->itp_flags & ITPF_P_TUNNEL) { /* Tunnel mode set with no inner selectors. */ - ITP_REFRELE(itp, ns); return (ENOENT); } /* @@ -6661,7 +6653,6 @@ ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, *ppp = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, sel, ns); rw_exit(&polhead->iph_lock); - ITP_REFRELE(itp, ns); /* * Don't default to global if we didn't find a matching policy entry. @@ -6745,7 +6736,6 @@ ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[], ipsec_selector_t sel, isel; mblk_t *retmp; ip_stack_t *ipst = ns->netstack_ip; - ipsec_stack_t *ipss = ns->netstack_ipsec; /* Normalize addresses */ if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)srcext, 0, ns) @@ -6838,16 +6828,14 @@ ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[], break; case IPPROTO_ENCAP: case IPPROTO_IPV6: - rw_enter(&ipss->ipsec_itp_get_byaddr_rw_lock, RW_READER); /* * Assume sel.ips_remote_addr_* has the right address at * that exact position. */ - itp = ipss->ipsec_itp_get_byaddr( - (uint32_t *)(&sel.ips_local_addr_v6), - (uint32_t *)(&sel.ips_remote_addr_v6), - src->sin6_family, ns); - rw_exit(&ipss->ipsec_itp_get_byaddr_rw_lock); + itp = itp_get_byaddr((uint32_t *)(&sel.ips_local_addr_v6), + (uint32_t *)(&sel.ips_remote_addr_v6), src->sin6_family, + ipst); + if (innsrcext == NULL) { /* * Transport-mode tunnel, make sure we fake out isel @@ -6896,6 +6884,9 @@ ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[], if (pp != NULL) { IPPOL_REFRELE(pp, ns); } + if (itp != NULL) { + ITP_REFRELE(itp, ns); + } if (retmp != NULL) { return (retmp); } else { diff --git a/usr/src/uts/common/inet/ip/spd.c b/usr/src/uts/common/inet/ip/spd.c index 1fba5d9c4a..6d4e6f8036 100644 --- a/usr/src/uts/common/inet/ip/spd.c +++ b/usr/src/uts/common/inet/ip/spd.c @@ -23,8 +23,6 @@ * Use is subject to license terms. */ -#pragma ident "@(#)spd.c 1.61 08/07/15 SMI" - /* * IPsec Security Policy Database. * @@ -70,7 +68,8 @@ #include <inet/ipsecesp.h> #include <inet/ipdrop.h> #include <inet/ipclassifier.h> -#include <inet/tun.h> +#include <inet/iptun.h> +#include <inet/iptun/iptun_impl.h> static void ipsec_update_present_flags(ipsec_stack_t *); static ipsec_act_t *ipsec_act_wildcard_expand(ipsec_act_t *, uint_t *, @@ -96,6 +95,7 @@ static void ipsid_fini(netstack_t *); #define SEL_PORT_POLICY 0x0001 #define SEL_IS_ICMP 0x0002 #define SEL_TUNNEL_MODE 0x0004 +#define SEL_POST_FRAG 0x0008 /* Return values for ipsec_init_inbound_sel(). */ typedef enum { SELRET_NOMEM, SELRET_BADPKT, SELRET_SUCCESS, SELRET_TUNFRAG} @@ -668,12 +668,6 @@ ipsec_stack_init(netstackid_t stackid, netstack_t *ns) ip_drop_init(ipss); ip_drop_register(&ipss->ipsec_spd_dropper, "IPsec SPD"); - /* Set function to dummy until tun is loaded */ - rw_init(&ipss->ipsec_itp_get_byaddr_rw_lock, NULL, RW_DEFAULT, NULL); - rw_enter(&ipss->ipsec_itp_get_byaddr_rw_lock, RW_WRITER); - ipss->ipsec_itp_get_byaddr = itp_get_byaddr_dummy; - rw_exit(&ipss->ipsec_itp_get_byaddr_rw_lock); - /* IP's IPsec code calls the packet dropper */ ip_drop_register(&ipss->ipsec_dropper, "IP IPsec processing"); @@ -1029,41 +1023,6 @@ ipsec_clone_system_policy(netstack_t *ns) } /* - * Generic "do we have IPvN policy" answer. - */ -boolean_t -iph_ipvN(ipsec_policy_head_t *iph, boolean_t v6) -{ - int i, hval; - uint32_t valbit; - ipsec_policy_root_t *ipr; - ipsec_policy_t *ipp; - - if (v6) { - valbit = IPSL_IPV6; - hval = IPSEC_AF_V6; - } else { - valbit = IPSL_IPV4; - hval = IPSEC_AF_V4; - } - - ASSERT(RW_LOCK_HELD(&iph->iph_lock)); - for (ipr = iph->iph_root; ipr < &(iph->iph_root[IPSEC_NTYPES]); ipr++) { - if (ipr->ipr_nonhash[hval] != NULL) - return (B_TRUE); - for (i = 0; i < ipr->ipr_nchains; i++) { - for (ipp = ipr->ipr_hash[i].hash_head; ipp != NULL; - ipp = ipp->ipsp_hash.hash_next) { - if (ipp->ipsp_sel->ipsl_key.ipsl_valid & valbit) - return (B_TRUE); - } - } - } - - return (B_FALSE); -} - -/* * Extract the string from ipsec_policy_failure_msgs[type] and * log it. * @@ -1387,7 +1346,7 @@ ipsec_act_wildcard_expand(ipsec_act_t *act, uint_t *nact, netstack_t *ns) * Extract the parts of an ipsec_prot_t from an old-style ipsec_req_t. */ static void -ipsec_prot_from_req(ipsec_req_t *req, ipsec_prot_t *ipp) +ipsec_prot_from_req(const ipsec_req_t *req, ipsec_prot_t *ipp) { bzero(ipp, sizeof (*ipp)); /* @@ -1417,7 +1376,7 @@ ipsec_prot_from_req(ipsec_req_t *req, ipsec_prot_t *ipp) * Extract a new-style action from a request. */ void -ipsec_actvec_from_req(ipsec_req_t *req, ipsec_act_t **actp, uint_t *nactp, +ipsec_actvec_from_req(const ipsec_req_t *req, ipsec_act_t **actp, uint_t *nactp, netstack_t *ns) { struct ipsec_act act; @@ -2778,12 +2737,13 @@ ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, boolean_t port_policy_present = (sel_flags & SEL_PORT_POLICY); boolean_t is_icmp = (sel_flags & SEL_IS_ICMP); boolean_t tunnel_mode = (sel_flags & SEL_TUNNEL_MODE); + boolean_t post_frag = (sel_flags & SEL_POST_FRAG); ASSERT((ipha == NULL && ip6h != NULL) || (ipha != NULL && ip6h == NULL)); if (ip6h != NULL) { - if (is_icmp) + if (is_icmp || tunnel_mode) outer_hdr_len = ((uint8_t *)ip6h) - mp->b_rptr; check_proto = IPPROTO_ICMPV6; @@ -2827,7 +2787,7 @@ ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, return (SELRET_TUNFRAG); } } else { - if (is_icmp) + if (is_icmp || tunnel_mode) outer_hdr_len = ((uint8_t *)ipha) - mp->b_rptr; check_proto = IPPROTO_ICMP; sel->ips_isv4 = B_TRUE; @@ -2849,7 +2809,7 @@ ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha, if ((nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP && nexthdr != IPPROTO_SCTP && nexthdr != check_proto) || - (!port_policy_present && tunnel_mode)) { + (!port_policy_present && !post_frag && tunnel_mode)) { sel->ips_remote_port = sel->ips_local_port = 0; ipsec_freemsg_chain(spare_mp); return (SELRET_SUCCESS); @@ -3877,6 +3837,30 @@ ipsec_ipr_flush(ipsec_policy_head_t *php, ipsec_policy_root_t *ipr, } } +/* + * Create and insert inbound or outbound policy associated with actp for the + * address family fam into the policy head ph. Returns B_TRUE if policy was + * inserted, and B_FALSE otherwise. + */ +boolean_t +ipsec_polhead_insert(ipsec_policy_head_t *ph, ipsec_act_t *actp, uint_t nact, + int fam, int ptype, netstack_t *ns) +{ + ipsec_selkey_t sel; + ipsec_policy_t *pol; + ipsec_policy_root_t *pr; + + bzero(&sel, sizeof (sel)); + sel.ipsl_valid = (fam == IPSEC_AF_V4 ? IPSL_IPV4 : IPSL_IPV6); + if ((pol = ipsec_policy_create(&sel, actp, nact, IPSEC_PRIO_SOCKET, + NULL, ns)) != NULL) { + pr = &ph->iph_root[ptype]; + HASHLIST_INSERT(pol, ipsp_hash, pr->ipr_nonhash[fam]); + ipsec_insert_always(&ph->iph_rulebyid, pol); + } + return (pol != NULL); +} + void ipsec_polhead_flush(ipsec_policy_head_t *php, netstack_t *ns) { @@ -5472,26 +5456,24 @@ ipsec_unregister_prov_update(void) * inner-packet contents. */ mblk_t * -ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4, - ip6_t *inner_ipv6, ipha_t *outer_ipv4, ip6_t *outer_ipv6, int outer_hdr_len, - netstack_t *ns) +ipsec_tun_outbound(mblk_t *mp, iptun_t *iptun, ipha_t *inner_ipv4, + ip6_t *inner_ipv6, ipha_t *outer_ipv4, ip6_t *outer_ipv6, int outer_hdr_len) { - ipsec_tun_pol_t *itp = atp->tun_itp; ipsec_policy_head_t *polhead; ipsec_selector_t sel; mblk_t *ipsec_mp, *ipsec_mp_head, *nmp; ipsec_out_t *io; boolean_t is_fragment; ipsec_policy_t *pol; + ipsec_tun_pol_t *itp = iptun->iptun_itp; + netstack_t *ns = iptun->iptun_ns; ipsec_stack_t *ipss = ns->netstack_ipsec; ASSERT(outer_ipv6 != NULL && outer_ipv4 == NULL || outer_ipv4 != NULL && outer_ipv6 == NULL); /* We take care of inners in a bit. */ - /* No policy on this tunnel - let global policy have at it. */ - if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE)) - return (mp); + ASSERT(itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)); polhead = itp->itp_policy; bzero(&sel, sizeof (sel)); @@ -5568,8 +5550,7 @@ ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4, ASSERT(mp->b_cont == NULL); /* - * If we get here, we have a full - * fragment chain + * If we get here, we have a full fragment chain */ oiph = (ipha_t *)mp->b_rptr; @@ -5701,7 +5682,12 @@ ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4, */ io->ipsec_out_polhead = polhead; io->ipsec_out_policy = pol; - io->ipsec_out_zoneid = atp->tun_zoneid; + /* + * NOTE: There is a subtle difference between iptun_zoneid and + * iptun_connp->conn_zoneid explained in iptun_conn_create(). When + * interacting with the ip module, we must use conn_zoneid. + */ + io->ipsec_out_zoneid = iptun->iptun_connp->conn_zoneid; io->ipsec_out_v4 = (outer_ipv4 != NULL); io->ipsec_out_secure = B_TRUE; @@ -5860,20 +5846,18 @@ ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, boolean_t retval, port_policy_present, is_icmp, global_present; in6_addr_t tmpaddr; ipaddr_t tmp4; + uint8_t flags, *inner_hdr; ipsec_stack_t *ipss = ns->netstack_ipsec; - uint8_t flags, *holder, *outer_hdr; sel.ips_is_icmp_inv_acq = 0; if (outer_ipv4 != NULL) { ASSERT(outer_ipv6 == NULL); - outer_hdr = (uint8_t *)outer_ipv4; global_present = ipss->ipsec_inbound_v4_policy_present; } else { - outer_hdr = (uint8_t *)outer_ipv6; + ASSERT(outer_ipv6 != NULL); global_present = ipss->ipsec_inbound_v6_policy_present; } - ASSERT(outer_hdr != NULL); ASSERT(inner_ipv4 != NULL && inner_ipv6 == NULL || inner_ipv4 == NULL && inner_ipv6 != NULL); @@ -5898,6 +5882,11 @@ ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, port_policy_present = ((itp->itp_flags & ITPF_P_PER_PORT_SECURITY) ? B_TRUE : B_FALSE); + /* + * NOTE: Even if our policy is transport mode, set the + * SEL_TUNNEL_MODE flag so ipsec_init_inbound_sel() can + * do the right thing w.r.t. outer headers. + */ flags = ((port_policy_present ? SEL_PORT_POLICY : SEL_NONE) | (is_icmp ? SEL_IS_ICMP : SEL_NONE) | SEL_TUNNEL_MODE); @@ -5939,18 +5928,31 @@ ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, * If we get here, we have a full fragment chain. * Reacquire headers and selectors from first fragment. */ + inner_hdr = message->b_cont->b_rptr; + if (outer_ipv4 != NULL) { + inner_hdr += IPH_HDR_LENGTH( + (ipha_t *)message->b_cont->b_rptr); + } else { + inner_hdr += ip_hdr_length_v6(message->b_cont, + (ip6_t *)message->b_cont->b_rptr); + } + ASSERT(inner_hdr <= message->b_cont->b_wptr); + if (inner_ipv4 != NULL) { - inner_ipv4 = (ipha_t *)message->b_cont->b_rptr; - ASSERT(message->b_cont->b_wptr - - message->b_cont->b_rptr > sizeof (ipha_t)); + inner_ipv4 = (ipha_t *)inner_hdr; + inner_ipv6 = NULL; } else { - inner_ipv6 = (ip6_t *)message->b_cont->b_rptr; - ASSERT(message->b_cont->b_wptr - - message->b_cont->b_rptr > sizeof (ip6_t)); + inner_ipv6 = (ip6_t *)inner_hdr; + inner_ipv4 = NULL; } - /* Use SEL_NONE so we always get ports! */ + + /* + * Use SEL_TUNNEL_MODE to take into account the outer + * header. Use SEL_POST_FRAG so we always get ports. + */ rc = ipsec_init_inbound_sel(&sel, message->b_cont, - inner_ipv4, inner_ipv6, SEL_NONE); + inner_ipv4, inner_ipv6, + SEL_TUNNEL_MODE | SEL_POST_FRAG); switch (rc) { case SELRET_SUCCESS: /* @@ -6098,15 +6100,6 @@ ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, return (B_FALSE); } - /* - * The following assertion is valid because only the tun module alters - * the mblk chain - stripping the outer header by advancing mp->b_rptr. - */ - ASSERT(is_icmp || ((*data_mp)->b_datap->db_base <= outer_hdr && - outer_hdr < (*data_mp)->b_rptr)); - holder = (*data_mp)->b_rptr; - (*data_mp)->b_rptr = outer_hdr; - if (is_icmp) { /* * For ICMP packets, "outer_ipvN" is set to the outer header @@ -6150,8 +6143,6 @@ ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp, } } - (*data_mp)->b_rptr = holder; - if (ipsec_mp != NULL) freeb(ipsec_mp); @@ -6184,8 +6175,14 @@ tunnel_compare(const void *arg1, const void *arg2) void itp_free(ipsec_tun_pol_t *node, netstack_t *ns) { - IPPH_REFRELE(node->itp_policy, ns); - IPPH_REFRELE(node->itp_inactive, ns); + if (node->itp_policy != NULL) { + IPPH_REFRELE(node->itp_policy, ns); + node->itp_policy = NULL; + } + if (node->itp_inactive != NULL) { + IPPH_REFRELE(node->itp_inactive, ns); + node->itp_inactive = NULL; + } mutex_destroy(&node->itp_lock); kmem_free(node, sizeof (*node)); } @@ -6335,15 +6332,44 @@ nomem: } /* - * We can't call the tun_t lookup function until tun is - * loaded, so create a dummy function to avoid symbol - * lookup errors on boot. + * Given two addresses, find a tunnel instance's IPsec policy heads. + * Returns NULL on failure. */ -/* ARGSUSED */ ipsec_tun_pol_t * -itp_get_byaddr_dummy(uint32_t *laddr, uint32_t *faddr, int af, netstack_t *ns) +itp_get_byaddr(uint32_t *laddr, uint32_t *faddr, int af, ip_stack_t *ipst) { - return (NULL); /* Always return NULL. */ + conn_t *connp; + iptun_t *iptun; + ipsec_tun_pol_t *itp = NULL; + + /* Classifiers are used to "src" being foreign. */ + if (af == AF_INET) { + connp = ipcl_iptun_classify_v4((ipaddr_t *)faddr, + (ipaddr_t *)laddr, ipst); + } else { + ASSERT(af == AF_INET6); + ASSERT(!IN6_IS_ADDR_V4MAPPED((in6_addr_t *)laddr)); + ASSERT(!IN6_IS_ADDR_V4MAPPED((in6_addr_t *)faddr)); + connp = ipcl_iptun_classify_v6((in6_addr_t *)faddr, + (in6_addr_t *)laddr, ipst); + } + + if (connp == NULL) + return (NULL); + + if (IPCL_IS_IPTUN(connp)) { + iptun = connp->conn_iptun; + if (iptun != NULL) { + itp = iptun->iptun_itp; + if (itp != NULL) { + /* Braces due to the macro's nature... */ + ITP_REFHOLD(itp); + } + } /* Else itp is already NULL. */ + } + + CONN_DEC_REF(connp); + return (itp); } /* diff --git a/usr/src/uts/common/inet/ip/spdsock.c b/usr/src/uts/common/inet/ip/spdsock.c index 250cb9cea6..4a9856857e 100644 --- a/usr/src/uts/common/inet/ip/spdsock.c +++ b/usr/src/uts/common/inet/ip/spdsock.c @@ -42,6 +42,7 @@ #include <sys/cmn_err.h> #include <sys/suntpi.h> #include <sys/policy.h> +#include <sys/dls.h> #include <sys/socket.h> #include <netinet/in.h> @@ -56,12 +57,13 @@ #include <inet/proto_set.h> #include <inet/nd.h> #include <inet/ip_if.h> -#include <inet/tun.h> #include <inet/optcom.h> #include <inet/ipsec_info.h> #include <inet/ipsec_impl.h> #include <inet/spdsock.h> #include <inet/sadb.h> +#include <inet/iptun.h> +#include <inet/iptun/iptun_impl.h> #include <sys/isa_defs.h> @@ -2782,58 +2784,6 @@ spdsock_updatealg(queue_t *q, mblk_t *mp, spd_ext_t *extv[]) } /* - * With a reference-held ill, dig down and find an instance of "tun", and - * assign its tunnel policy pointer, while reference-holding it. Also, - * release ill's refrence when finished. - * - * We'll be messing with q_next, so be VERY careful. - */ -static void -find_tun_and_set_itp(ill_t *ill, ipsec_tun_pol_t *itp) -{ - queue_t *q; - tun_t *tun; - - /* Don't bother if this ill is going away. */ - if (ill->ill_flags & ILL_CONDEMNED) { - ill_refrele(ill); - return; - } - - - q = ill->ill_wq; - claimstr(q); /* Lighter-weight than freezestr(). */ - - do { - /* Use strcmp() because "tun" is bounded. */ - if (strcmp(q->q_qinfo->qi_minfo->mi_idname, "tun") == 0) { - /* Aha! Got it. */ - tun = (tun_t *)q->q_ptr; - if (tun != NULL) { - mutex_enter(&tun->tun_lock); - if (tun->tun_itp != itp) { - ASSERT(tun->tun_itp == NULL); - ITP_REFHOLD(itp); - tun->tun_itp = itp; - } - mutex_exit(&tun->tun_lock); - goto release_and_return; - } - /* - * Else assume this is some other module named "tun" - * and move on, hoping we find one that actually has - * something in q_ptr. - */ - } - q = q->q_next; - } while (q != NULL); - -release_and_return: - releasestr(ill->ill_wq); - ill_refrele(ill); -} - -/* * Sort through the mess of polhead options to retrieve an appropriate one. * Returns NULL if we send an spdsock error. Returns a valid pointer if we * found a valid polhead. Returns ALL_ACTIVE_POLHEADS (aka. -1) or @@ -2852,7 +2802,7 @@ get_appropriate_polhead(queue_t *q, mblk_t *mp, spd_if_t *tunname, int spdid, spdsock_t *ss = (spdsock_t *)q->q_ptr; netstack_t *ns = ss->spdsock_spds->spds_netstack; uint64_t gen; /* Placeholder */ - ill_t *v4, *v6; + datalink_id_t linkid; active = (spdid == SPD_ACTIVE); *itpp = NULL; @@ -2895,19 +2845,13 @@ get_appropriate_polhead(queue_t *q, mblk_t *mp, spd_if_t *tunname, int spdid, } } /* - * Troll the plumbed tunnels and see if we have a - * match. We need to do this always in case we add - * policy AFTER plumbing a tunnel. + * Troll the plumbed tunnels and see if we have a match. We + * need to do this always in case we add policy AFTER plumbing + * a tunnel. */ - v4 = ill_lookup_on_name(tname, B_FALSE, B_FALSE, NULL, - NULL, NULL, &errno, NULL, ns->netstack_ip); - if (v4 != NULL) - find_tun_and_set_itp(v4, itp); - v6 = ill_lookup_on_name(tname, B_FALSE, B_TRUE, NULL, - NULL, NULL, &errno, NULL, ns->netstack_ip); - if (v6 != NULL) - find_tun_and_set_itp(v6, itp); - ASSERT(itp != NULL); + if (dls_mgmt_get_linkid(tname, &linkid) == 0) + iptun_set_policy(linkid, itp); + *itpp = itp; /* For spdsock dump state, set the polhead's name. */ if (msgtype == SPD_DUMP) { diff --git a/usr/src/uts/common/inet/ip/tn_ipopt.c b/usr/src/uts/common/inet/ip/tn_ipopt.c index cde62b4a84..dba9b8c98a 100644 --- a/usr/src/uts/common/inet/ip/tn_ipopt.c +++ b/usr/src/uts/common/inet/ip/tn_ipopt.c @@ -293,7 +293,8 @@ tsol_check_dest(const cred_t *credp, const void *dst, uchar_t version, tsol_tpc_t *dst_rhtp; zoneid_t zoneid; - *effective_cred = NULL; + if (effective_cred != NULL) + *effective_cred = NULL; ASSERT(version == IPV4_VERSION || (version == IPV6_VERSION && !IN6_IS_ADDR_V4MAPPED((in6_addr_t *)dst))); @@ -425,10 +426,12 @@ tsol_check_dest(const cred_t *credp, const void *dst, uchar_t version, * label flags. */ if (newtsl != NULL) { - *effective_cred = copycred_from_tslabel(credp, - newtsl, KM_NOSLEEP); + if (effective_cred != NULL) { + *effective_cred = copycred_from_tslabel(credp, + newtsl, KM_NOSLEEP); + } label_rele(newtsl); - if (*effective_cred == NULL) { + if (effective_cred != NULL && *effective_cred == NULL) { TPC_RELE(dst_rhtp); return (ENOMEM); } diff --git a/usr/src/uts/common/inet/ip/tun.c b/usr/src/uts/common/inet/ip/tun.c deleted file mode 100644 index dc5af5e7c1..0000000000 --- a/usr/src/uts/common/inet/ip/tun.c +++ /dev/null @@ -1,5919 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Tunnel driver - * This module acts like a driver/DLPI provider as viewed from the top - * and a stream head/TPI user from the bottom - * Implements the logic for IP (IPv4 or IPv6) encapsulation - * within IP (IPv4 or IPv6) - */ - -#include <sys/types.h> -#include <sys/stream.h> -#include <sys/dlpi.h> -#include <sys/stropts.h> -#include <sys/strsubr.h> -#include <sys/strlog.h> -#include <sys/tihdr.h> -#include <sys/tiuser.h> -#include <sys/ddi.h> -#include <sys/sunddi.h> -#include <sys/ethernet.h> -#include <sys/cmn_err.h> -#include <sys/debug.h> -#include <sys/kmem.h> -#include <sys/netstack.h> - -#include <sys/systm.h> -#include <sys/param.h> -#include <sys/socket.h> -#include <sys/vtrace.h> -#include <sys/isa_defs.h> -#include <net/if.h> -#include <net/if_arp.h> -#include <net/route.h> -#include <sys/sockio.h> -#include <netinet/in.h> - -#include <inet/common.h> -#include <inet/mi.h> -#include <inet/mib2.h> -#include <inet/nd.h> -#include <inet/arp.h> -#include <inet/snmpcom.h> - -#include <netinet/igmp_var.h> - -#include <netinet/ip6.h> -#include <netinet/icmp6.h> -#include <inet/ip.h> -#include <inet/ip6.h> -#include <net/if_dl.h> -#include <inet/ip_if.h> -#include <sys/strsun.h> -#include <sys/strsubr.h> -#include <inet/ipsec_impl.h> -#include <inet/ipdrop.h> -#include <inet/tun.h> -#include <inet/ipsec_impl.h> - - -#include <sys/conf.h> -#include <sys/errno.h> -#include <sys/modctl.h> -#include <sys/stat.h> - -#include <inet/ip_ire.h> /* for ire_route_lookup_v6 */ - -static void tun_cancel_rec_evs(queue_t *, eventid_t *); -static void tun_bufcall_handler(void *); -static boolean_t tun_icmp_message_v4(queue_t *, ipha_t *, icmph_t *, mblk_t *); -static boolean_t tun_icmp_too_big_v4(queue_t *, ipha_t *, uint16_t, mblk_t *); -static boolean_t tun_icmp_message_v6(queue_t *, ip6_t *, icmp6_t *, uint8_t, - mblk_t *); -static boolean_t tun_icmp_too_big_v6(queue_t *, ip6_t *, uint32_t, uint8_t, - mblk_t *); -static void tun_sendokack(queue_t *, mblk_t *, t_uscalar_t); -static void tun_sendsdusize(queue_t *); -static void tun_senderrack(queue_t *, mblk_t *, t_uscalar_t, t_uscalar_t, - t_uscalar_t); -static int tun_fastpath(queue_t *, mblk_t *); -static int tun_ioctl(queue_t *, mblk_t *); -static void tun_timeout_handler(void *); -static int tun_rproc(queue_t *, mblk_t *); -static int tun_wproc_mdata(queue_t *, mblk_t *); -static int tun_wproc(queue_t *, mblk_t *); -static int tun_rdata(queue_t *, mblk_t *, mblk_t *, tun_t *, uint_t); -static int tun_rdata_v4(queue_t *, mblk_t *, mblk_t *, tun_t *); -static int tun_rdata_v6(queue_t *, mblk_t *, mblk_t *, tun_t *); -static int tun_set_sec_simple(tun_t *, ipsec_req_t *); -static void tun_send_ire_req(queue_t *); -static uint32_t tun_update_link_mtu(queue_t *, uint32_t, boolean_t); -static mblk_t *tun_realloc_mblk(queue_t *, mblk_t *, size_t, mblk_t *, - boolean_t); -static void tun_recover(queue_t *, mblk_t *, size_t); -static void tun_rem_ppa_list(tun_t *); -static void tun_rem_tun_byaddr_list(tun_t *); -static void tun_rput_icmp_err_v4(queue_t *, mblk_t *, mblk_t *); -static void icmp_ricmp_err_v4_v4(queue_t *, mblk_t *, mblk_t *); -static void icmp_ricmp_err_v6_v4(queue_t *, mblk_t *, mblk_t *); -static void icmp_ricmp_err_v4_v6(queue_t *, mblk_t *, mblk_t *, icmp6_t *); -static void icmp_ricmp_err_v6_v6(queue_t *, mblk_t *, mblk_t *, icmp6_t *); -static void tun_rput_icmp_err_v6(queue_t *, mblk_t *, mblk_t *); -static int tun_rput_tpi(queue_t *, mblk_t *); -static int tun_send_bind_req(queue_t *); -static void tun_statinit(tun_stats_t *, char *, netstackid_t); -static int tun_stat_kstat_update(kstat_t *, int); -static void tun_wdata_v4(queue_t *, mblk_t *); -static void tun_wdata_v6(queue_t *, mblk_t *); -static char *tun_who(queue_t *, char *); -static int tun_wput_dlpi(queue_t *, mblk_t *); -static int tun_wputnext_v6(queue_t *, mblk_t *); -static int tun_wputnext_v4(queue_t *, mblk_t *); -static boolean_t tun_limit_value_v6(queue_t *, mblk_t *, ip6_t *, int *); -static void tun_freemsg_chain(mblk_t *, uint64_t *); -static void *tun_stack_init(netstackid_t, netstack_t *); -static void tun_stack_fini(netstackid_t, void *); - -/* module's defined constants, globals and data structures */ - -#define IP "ip" -#define IP6 "ip6" -static major_t IP_MAJ; -static major_t IP6_MAJ; - -#define TUN_DEBUG -#define TUN_LINK_EXTRA_OFF 32 - -#define IPV6V4_DEF_TTL 60 -#define IPV6V4_DEF_ENCAP 60 - -#define TUN_WHO_BUF 60 - - -#ifdef TUN_DEBUG -/* levels of debugging verbosity */ -#define TUN0DBG 0x00 /* crucial */ -#define TUN1DBG 0x01 /* informational */ -#define TUN2DBG 0x02 /* verbose */ -#define TUN3DBG 0x04 /* very verbose */ - -/* - * Global variable storing debugging level for all tunnels. By default - * all crucial messages will be printed. Value can be masked to exclusively - * print certain debug levels and not others. - */ -int8_t tun_debug = TUN0DBG; - -#define TUN_LEVEL(dbg, lvl) ((dbg & lvl) == lvl) - -#define tun0dbg(a) printf a -#define tun1dbg(a) if (TUN_LEVEL(tun_debug, TUN1DBG)) printf a -#define tun2dbg(a) if (TUN_LEVEL(tun_debug, TUN2DBG)) printf a -#define tun3dbg(a) if (TUN_LEVEL(tun_debug, TUN3DBG)) printf a -#else -#define tun0dbg(a) /* */ -#define tun1dbg(a) /* */ -#define tun2dbg(a) /* */ -#define tun3dbg(a) /* */ -#endif /* TUN_DEBUG */ - -#define TUN_RECOVER_WAIT (1*hz) - -/* canned DL_INFO_ACK - adjusted based on tunnel type */ -dl_info_ack_t infoack = { - DL_INFO_ACK, /* dl_primitive */ - 4196, /* dl_max_sdu */ - 0, /* dl_min_sdu */ - 0, /* dl_addr_length */ - DL_IPV4, /* dl_mac_type */ - 0, /* dl_reserved */ - DL_UNATTACHED, /* dl_current_state */ - 0, /* dl_sap_length */ - DL_CLDLS, /* dl_service_mode */ - 0, /* dl_qos_length */ - 0, /* dl_qos_offset */ - 0, /* dl_qos_range_length */ - 0, /* dl_qos_range_offset */ - DL_STYLE2, /* dl_provider_style */ - 0, /* dl_addr_offset */ - DL_VERSION_2, /* dl_version */ - 0, /* dl_brdcast_addr_length */ - 0, /* dl_brdcst_addr_offset */ - 0 /* dl_grow */ -}; - -/* - * canned DL_BIND_ACK - IP doesn't use any of this info. - */ -dl_bind_ack_t bindack = { - DL_BIND_ACK, /* dl_primitive */ - 0, /* dl_sap */ - 0, /* dl_addr_length */ - 0, /* dl_addr_offset */ - 0, /* dl_max_conind */ - 0 /* dl_xidtest_flg */ -}; - - -/* - * Canned IPv6 destination options header containing Tunnel - * Encapsulation Limit option. - */ -static struct tun_encap_limit tun_limit_init_upper_v4 = { - { IPPROTO_ENCAP, 0 }, - IP6OPT_TUNNEL_LIMIT, - 1, - IPV6_DEFAULT_ENCAPLIMIT, /* filled in with actual value later */ - IP6OPT_PADN, - 1, - 0 -}; -static struct tun_encap_limit tun_limit_init_upper_v6 = { - { IPPROTO_IPV6, 0 }, - IP6OPT_TUNNEL_LIMIT, - 1, - IPV6_DEFAULT_ENCAPLIMIT, /* filled in with actual value later */ - IP6OPT_PADN, - 1, - 0 -}; - -static tun_stats_t *tun_add_stat(queue_t *); - -static void tun_add_byaddr(tun_t *); -static ipsec_tun_pol_t *itp_get_byaddr_fn(uint32_t *, uint32_t *, int, - netstack_t *); - -/* Setable in /etc/system */ -static boolean_t tun_do_fastpath = B_TRUE; - -/* streams linkages */ -static struct module_info info = { - TUN_MODID, /* module id number */ - TUN_NAME, /* module name */ - 1, /* min packet size accepted */ - INFPSZ, /* max packet size accepted */ - 65536, /* hi-water mark */ - 1024 /* lo-water mark */ -}; - -static struct qinit tunrinit = { - (pfi_t)tun_rput, /* read side put procedure */ - (pfi_t)tun_rsrv, /* read side service procedure */ - tun_open, /* open procedure */ - tun_close, /* close procedure */ - NULL, /* for future use */ - &info, /* module information structure */ - NULL /* module statistics structure */ -}; - -static struct qinit tunwinit = { - (pfi_t)tun_wput, /* write side put procedure */ - (pfi_t)tun_wsrv, /* write side service procedure */ - NULL, - NULL, - NULL, - &info, - NULL -}; - -struct streamtab tuninfo = { - &tunrinit, /* read side queue init */ - &tunwinit, /* write side queue init */ - NULL, /* mux read side init */ - NULL /* mux write side init */ -}; - -static struct fmodsw tun_fmodsw = { - TUN_NAME, - &tuninfo, - (D_MP | D_MTQPAIR | D_MTPUTSHARED) -}; - -static struct modlstrmod modlstrmod = { - &mod_strmodops, - "configured tunneling module", - &tun_fmodsw -}; - -static struct modlinkage modlinkage = { - MODREV_1, - &modlstrmod, - NULL -}; - -int -_init(void) -{ - int rc; - - IP_MAJ = ddi_name_to_major(IP); - IP6_MAJ = ddi_name_to_major(IP6); - - /* - * We want to be informed each time a stack is created or - * destroyed in the kernel, so we can maintain the - * set of tun_stack_t's. - */ - netstack_register(NS_TUN, tun_stack_init, NULL, tun_stack_fini); - - rc = mod_install(&modlinkage); - if (rc != 0) - netstack_unregister(NS_TUN); - - return (rc); -} - -int -_fini(void) -{ - int error; - - error = mod_remove(&modlinkage); - if (error == 0) - netstack_unregister(NS_TUN); - - return (error); -} - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); -} - -/* - * this module is meant to be pushed on an instance of IP and - * have an instance of IP pushed on top of it. - */ - -/* ARGSUSED */ -int -tun_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) -{ - tun_t *atp; - mblk_t *hello; - ipsec_info_t *ii; - netstack_t *ns; - zoneid_t zoneid; - - if (q->q_ptr != NULL) { - /* re-open of an already open instance */ - return (0); - } - - if (sflag != MODOPEN) { - return (EINVAL); - } - - tun1dbg(("tun_open\n")); - - ns = netstack_find_by_cred(credp); - ASSERT(ns != NULL); - - /* - * For exclusive stacks we set the zoneid to zero - * to make IP operate as if in the global zone. - */ - if (ns->netstack_stackid != GLOBAL_NETSTACKID) - zoneid = GLOBAL_ZONEID; - else - zoneid = crgetzoneid(credp); - - hello = allocb(sizeof (ipsec_info_t), BPRI_HI); - if (hello == NULL) { - netstack_rele(ns); - return (ENOMEM); - } - - /* allocate per-instance structure */ - atp = kmem_zalloc(sizeof (tun_t), KM_SLEEP); - - atp->tun_state = DL_UNATTACHED; - atp->tun_dev = *devp; - atp->tun_zoneid = zoneid; - atp->tun_netstack = ns; - atp->tun_cred = credp; - crhold(credp); - - /* - * Based on the lower version of IP, initialize stuff that - * won't change - */ - if (getmajor(*devp) == IP_MAJ) { - ipha_t *ipha; - - atp->tun_flags = TUN_L_V4 | TUN_HOP_LIM; - atp->tun_hop_limit = IPV6V4_DEF_TTL; - - /* - * The tunnel MTU is recalculated when we know more - * about the tunnel destination. - */ - atp->tun_mtu = IP_MAXPACKET - sizeof (ipha_t); - ipha = &atp->tun_ipha; - ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION; - ipha->ipha_type_of_service = 0; - ipha->ipha_ident = 0; /* to be filled in by IP */ - ipha->ipha_fragment_offset_and_flags = htons(IPH_DF); - ipha->ipha_ttl = atp->tun_hop_limit; - ipha->ipha_hdr_checksum = 0; /* to be filled in by IP */ - } else if (getmajor(*devp) == IP6_MAJ) { - atp->tun_flags = TUN_L_V6 | TUN_HOP_LIM | TUN_ENCAP_LIM; - atp->tun_hop_limit = IPV6_DEFAULT_HOPS; - atp->tun_encap_lim = IPV6_DEFAULT_ENCAPLIMIT; - atp->tun_mtu = IP_MAXPACKET - sizeof (ip6_t) - - IPV6_TUN_ENCAP_OPT_LEN; - atp->tun_ip6h.ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; - atp->tun_ip6h.ip6_hops = IPV6_DEFAULT_HOPS; - } else { - netstack_rele(ns); - crfree(credp); - kmem_free(atp, sizeof (tun_t)); - return (ENXIO); - } - - atp->tun_extra_offset = TUN_LINK_EXTRA_OFF; - mutex_init(&atp->tun_lock, NULL, MUTEX_DEFAULT, NULL); - - /* - * If this is the automatic tunneling module, atun, verify that the - * lower protocol is IPv4 and set TUN_AUTOMATIC. Since we don't do - * automatic tunneling over IPv6, trying to run over IPv6 is an error, - * so free memory and return an error. - */ - if (q->q_qinfo->qi_minfo->mi_idnum == ATUN_MODID) { - if (atp->tun_flags & TUN_L_V4) { - atp->tun_flags |= TUN_AUTOMATIC; - atp->tun_mtu = ATUN_MTU; - } else { - /* Error. */ - netstack_rele(ns); - crfree(credp); - kmem_free(atp, sizeof (tun_t)); - return (ENXIO); - } - } else if (q->q_qinfo->qi_minfo->mi_idnum == TUN6TO4_MODID) { - /* - * Set 6to4 flag if this is the 6to4tun module and make - * the same checks mentioned above. - */ - if (atp->tun_flags & TUN_L_V4) { - atp->tun_flags |= TUN_6TO4; - atp->tun_mtu = ATUN_MTU; - } else { - /* Error. */ - netstack_rele(ns); - crfree(credp); - kmem_free(atp, sizeof (tun_t)); - return (ENXIO); - } - } - - q->q_ptr = WR(q)->q_ptr = atp; - atp->tun_wq = WR(q); - mutex_enter(&ns->netstack_tun->tuns_global_lock); - tun_add_byaddr(atp); - mutex_exit(&ns->netstack_tun->tuns_global_lock); - ii = (ipsec_info_t *)hello->b_rptr; - hello->b_wptr = hello->b_rptr + sizeof (*ii); - hello->b_datap->db_type = M_CTL; - ii->ipsec_info_type = TUN_HELLO; - ii->ipsec_info_len = sizeof (*ii); - qprocson(q); - putnext(WR(q), hello); - return (0); -} - -/* ARGSUSED */ -int -tun_close(queue_t *q, int flag, cred_t *cred_p) -{ - tun_t *atp = (tun_t *)q->q_ptr; - netstack_t *ns; - tun_stack_t *tuns; - - ASSERT(atp != NULL); - - ns = atp->tun_netstack; - tuns = ns->netstack_tun; - - /* Cancel outstanding qtimeouts() or qbufcalls() */ - tun_cancel_rec_evs(q, &atp->tun_events); - - qprocsoff(q); - - crfree(atp->tun_cred); - atp->tun_cred = NULL; - - /* NOTE: tun_rem_ppa_list() may unlink tun_itp from its AVL tree. */ - if (atp->tun_stats != NULL) - tun_rem_ppa_list(atp); - - if (atp->tun_itp != NULL) { - /* In brackets because of ITP_REFRELE's brackets. */ - ITP_REFRELE(atp->tun_itp, ns); - } - - netstack_rele(ns); - - mutex_destroy(&atp->tun_lock); - - /* remove tun_t from global list */ - mutex_enter(&tuns->tuns_global_lock); - tun_rem_tun_byaddr_list(atp); - mutex_exit(&tuns->tuns_global_lock); - - /* free per-instance struct */ - kmem_free(atp, sizeof (tun_t)); - - q->q_ptr = WR(q)->q_ptr = NULL; - - return (0); -} - - -/* - * Cancel bufcall and timer requests - * Don't need to hold lock. protected by perimeter - */ -static void -tun_cancel_rec_evs(queue_t *q, eventid_t *evs) -{ - if (evs->ev_rbufcid != 0) { - qunbufcall(RD(q), evs->ev_rbufcid); - evs->ev_rbufcid = 0; - } - if (evs->ev_wbufcid != 0) { - qunbufcall(WR(q), evs->ev_wbufcid); - evs->ev_wbufcid = 0; - } - if (evs->ev_rtimoutid != 0) { - (void) quntimeout(RD(q), evs->ev_rtimoutid); - evs->ev_rtimoutid = 0; - } - if (evs->ev_wtimoutid != 0) { - (void) quntimeout(WR(q), evs->ev_wtimoutid); - evs->ev_wtimoutid = 0; - } -} - -/* - * Called by bufcall() when memory becomes available - * Don't need to hold lock. protected by perimeter - */ -static void -tun_bufcall_handler(void *arg) -{ - queue_t *q = arg; - tun_t *atp = (tun_t *)q->q_ptr; - eventid_t *evs; - - ASSERT(atp); - - evs = &atp->tun_events; - if ((q->q_flag & QREADR) != 0) { - ASSERT(evs->ev_rbufcid); - evs->ev_rbufcid = 0; - } else { - ASSERT(evs->ev_wbufcid); - evs->ev_wbufcid = 0; - } - enableok(q); - qenable(q); -} - -/* - * Called by timeout (if we couldn't do a bufcall) - * Don't need to hold lock. protected by perimeter - */ -static void -tun_timeout_handler(void *arg) -{ - queue_t *q = arg; - tun_t *atp = (tun_t *)q->q_ptr; - eventid_t *evs; - - ASSERT(atp); - evs = &atp->tun_events; - - if (q->q_flag & QREADR) { - ASSERT(evs->ev_rtimoutid); - evs->ev_rtimoutid = 0; - } else { - ASSERT(evs->ev_wtimoutid); - evs->ev_wtimoutid = 0; - } - enableok(q); - qenable(q); -} - -/* - * This routine is called when a message buffer can not - * be allocated. M_PCPROT message are converted to M_PROTO, but - * other than that, the mblk passed in must not be a high - * priority message (putting a hight priority message back on - * the queue is a bad idea) - * Side effect: the queue is disabled - * (timeout or bufcall handler will re-enable the queue) - * tun_cancel_rec_evs() must be called in close to cancel all - * outstanding requests. - */ -static void -tun_recover(queue_t *q, mblk_t *mp, size_t size) -{ - tun_t *atp = (tun_t *)q->q_ptr; - timeout_id_t tid; - bufcall_id_t bid; - eventid_t *evs = &atp->tun_events; - - ASSERT(mp != NULL); - - /* - * To avoid re-enabling the queue, change the high priority - * M_PCPROTO message to a M_PROTO before putting it on the queue - */ - if (mp->b_datap->db_type == M_PCPROTO) - mp->b_datap->db_type = M_PROTO; - - ASSERT(mp->b_datap->db_type < QPCTL); - - (void) putbq(q, mp); - - /* - * Make sure there is at most one outstanding request per queue. - */ - if (q->q_flag & QREADR) { - if (evs->ev_rtimoutid || evs->ev_rbufcid) - return; - } else { - if (evs->ev_wtimoutid || evs->ev_wbufcid) - return; - } - - noenable(q); - /* - * locking is needed here because this routine may be called - * with two puts() running - */ - mutex_enter(&atp->tun_lock); - if (!(bid = qbufcall(q, size, BPRI_MED, tun_bufcall_handler, q))) { - tid = qtimeout(q, tun_timeout_handler, q, TUN_RECOVER_WAIT); - if (q->q_flag & QREADR) - evs->ev_rtimoutid = tid; - else - evs->ev_wtimoutid = tid; - } else { - if (q->q_flag & QREADR) - evs->ev_rbufcid = bid; - else - evs->ev_wbufcid = bid; - } - mutex_exit(&atp->tun_lock); -} - -/* - * tun_realloc_mblk(q, mp, size, orig_mp, copy) - * - * q - pointer to a queue_t, must not be NULL - * mp - pointer to an mblk to copy, can be NULL - * size - Number of bytes being (re)allocated - * orig_mp - pointer to the original mblk_t which will be passed to - * tun_recover if the memory (re)allocation fails. This is done - * so that the message can be rescheduled on the queue. - * orig_mp must be NULL if the original mblk_t is a high priority - * message of type other then M_PCPROTO. - * copy - a boolean to specify wheater the contents of mp should be copied - * into the new mblk_t returned by this function. - * - * note: this routine will adjust the b_rptr and b_wptr of the - * mblk. Returns an mblk able to hold the requested size or - * NULL if allocation failed. If copy is true, original - * contents, if any, will be copied to new mblk - */ -static mblk_t * -tun_realloc_mblk(queue_t *q, mblk_t *mp, size_t size, mblk_t *orig_mp, - boolean_t copy) -{ - /* - * If we are passed in an mblk.. check to make sure that - * it is big enough and we are the only users of the mblk - * If not, then try and allocate one - */ - if (mp == NULL || mp->b_datap->db_lim - mp->b_datap->db_base < size || - mp->b_datap->db_ref > 1) { - size_t asize; - mblk_t *newmp; - - /* allocate at least as much as we had -- don't shrink */ - if (mp != NULL) { - asize = MAX(size, - mp->b_datap->db_lim - mp->b_datap->db_base); - } else { - asize = size; - } - newmp = allocb(asize, BPRI_HI); - - if (newmp == NULL) { - /* - * Reschedule the mblk via bufcall or timeout - * if orig_mp is non-NULL - */ - if (orig_mp != NULL) { - tun_recover(q, orig_mp, asize); - } - tun1dbg(("tun_realloc_mblk: couldn't allocate" \ - " dl_ok_ack mblk\n")); - return (NULL); - } - if (mp != NULL) { - if (copy) - bcopy(mp->b_rptr, newmp->b_rptr, - mp->b_wptr - mp->b_rptr); - newmp->b_datap->db_type = mp->b_datap->db_type; - freemsg(mp); - } - mp = newmp; - } else { - if (mp->b_rptr != mp->b_datap->db_base) { - if (copy) - bcopy(mp->b_rptr, mp->b_datap->db_base, - mp->b_wptr - mp->b_rptr); - mp->b_rptr = mp->b_datap->db_base; - } - } - mp->b_wptr = mp->b_rptr + size; - return (mp); -} - - -/* send a DL_OK_ACK back upstream */ -static void -tun_sendokack(queue_t *q, mblk_t *mp, t_uscalar_t prim) -{ - dl_ok_ack_t *dlok; - - if ((mp = tun_realloc_mblk(q, mp, sizeof (dl_ok_ack_t), mp, - B_FALSE)) == NULL) { - return; - } - dlok = (dl_ok_ack_t *)mp->b_rptr; - dlok->dl_primitive = DL_OK_ACK; - dlok->dl_correct_primitive = prim; - mp->b_datap->db_type = M_PCPROTO; - qreply(q, mp); -} - -/* - * Send a DL_NOTIFY_IND message with DL_NOTE_SDU_SIZE up to notify IP of a - * link MTU change. - */ -static void -tun_sendsdusize(queue_t *q) -{ - tun_t *atp = (tun_t *)q->q_ptr; - mblk_t *mp = NULL; - dl_notify_ind_t *notify; - - if (!(atp->tun_notifications & DL_NOTE_SDU_SIZE)) - return; - - if ((mp = tun_realloc_mblk(q, NULL, DL_NOTIFY_IND_SIZE, NULL, - B_FALSE)) == NULL) { - return; - } - mp->b_datap->db_type = M_PROTO; - notify = (dl_notify_ind_t *)mp->b_rptr; - notify->dl_primitive = DL_NOTIFY_IND; - notify->dl_notification = DL_NOTE_SDU_SIZE; - notify->dl_data = atp->tun_mtu; - notify->dl_addr_length = 0; - notify->dl_addr_offset = 0; - - tun1dbg(("tun_sendsdusize: notifying ip of new mtu: %d", atp->tun_mtu)); - - /* - * We send this notification to the upper IP instance who is using - * us as a device. - */ - putnext(RD(q), mp); -} - -/* send a DL_ERROR_ACK back upstream */ -static void -tun_senderrack(queue_t *q, mblk_t *mp, t_uscalar_t prim, t_uscalar_t dl_err, - t_uscalar_t error) -{ - dl_error_ack_t *dl_err_ack; - - if ((mp = tun_realloc_mblk(q, mp, sizeof (dl_error_ack_t), mp, - B_FALSE)) == NULL) { - return; - } - - dl_err_ack = (dl_error_ack_t *)mp->b_rptr; - dl_err_ack->dl_error_primitive = prim; - dl_err_ack->dl_primitive = DL_ERROR_ACK; - dl_err_ack->dl_errno = dl_err; - dl_err_ack->dl_unix_errno = error; - mp->b_datap->db_type = M_PCPROTO; - qreply(q, mp); -} - -/* - * Free all messages in an mblk chain and optionally collect - * byte-counter stats. Caller responsible for per-packet stats - */ -static void -tun_freemsg_chain(mblk_t *mp, uint64_t *bytecount) -{ - mblk_t *mpnext; - while (mp != NULL) { - ASSERT(mp->b_prev == NULL); - mpnext = mp->b_next; - mp->b_next = NULL; - if (bytecount != NULL) - atomic_add_64(bytecount, (int64_t)msgdsize(mp)); - freemsg(mp); - mp = mpnext; - } -} - -/* - * Send all messages in a chain of mblk chains and optionally collect - * byte-counter stats. Caller responsible for per-packet stats, and insuring - * mp is always non-NULL. - * - * This is a macro so we can save stack. Assume the caller function - * has local-variable "nmp" as a placeholder. Define two versions, one with - * byte-counting stats and one without. - */ -#define TUN_PUTMSG_CHAIN_STATS(q, mp, nmp, bytecount) \ - (nmp) = NULL; \ - ASSERT((mp) != NULL); \ - do { \ - if ((nmp) != NULL) \ - putnext(q, (nmp)); \ - ASSERT((mp)->b_prev == NULL); \ - (nmp) = (mp); \ - (mp) = (mp)->b_next; \ - (nmp)->b_next = NULL; \ - atomic_add_64(bytecount, (int64_t)msgdsize(nmp)); \ - } while ((mp) != NULL); \ -\ - putnext((q), (nmp)) /* trailing semicolon provided by instantiator. */ - -#define TUN_PUTMSG_CHAIN(q, mp, nmp) \ - (nmp) = NULL; \ - ASSERT((mp) != NULL); \ - do { \ - if ((nmp) != NULL) \ - putnext(q, (nmp)); \ - ASSERT((mp)->b_prev == NULL); \ - (nmp) = (mp); \ - (mp) = (mp)->b_next; \ - (nmp)->b_next = NULL; \ - } while ((mp) != NULL); \ -\ - putnext((q), (nmp)) /* trailing semicolon provided by instantiator. */ - -/* - * Macro that not only checks tun_itp, but also sees if one got loaded - * via ipsecconf(1m)/PF_POLICY behind our backs. Note the sleazy update of - * (tun)->tun_itp_gen so we don't lose races with other possible updates via - * PF_POLICY. - */ -#define tun_policy_present(tun, ns, ipss) \ - (((tun)->tun_itp != NULL) || \ - (((tun)->tun_itp_gen < ipss->ipsec_tunnel_policy_gen) && \ - ((tun)->tun_itp_gen = ipss->ipsec_tunnel_policy_gen) && \ - (((tun)->tun_itp = get_tunnel_policy((tun)->tun_lifname, ns)) \ - != NULL))) - -/* - * Search tuns_byaddr_list for occurrence of tun_t with matching - * inner addresses. This function does not take into account - * prefixes. Possibly we could generalize this function in the - * future with V6_MASK_EQ() and pass in an all 1's prefix for IP - * address matches. - * Returns NULL on no match. - * This function is not directly called - it's assigned into itp_get_byaddr(). - */ -static ipsec_tun_pol_t * -itp_get_byaddr_fn(uint32_t *lin, uint32_t *fin, int af, netstack_t *ns) -{ - tun_t *tun_list; - uint_t index; - in6_addr_t lmapped, fmapped, *laddr, *faddr; - ipsec_stack_t *ipss = ns->netstack_ipsec; - tun_stack_t *tuns = ns->netstack_tun; - - if (af == AF_INET) { - laddr = &lmapped; - faddr = &fmapped; - IN6_INADDR_TO_V4MAPPED((struct in_addr *)lin, laddr); - IN6_INADDR_TO_V4MAPPED((struct in_addr *)fin, faddr); - } else { - laddr = (in6_addr_t *)lin; - faddr = (in6_addr_t *)fin; - } - - index = TUN_BYADDR_LIST_HASH(*faddr); - - /* - * it's ok to grab global lock while holding tun_lock/perimeter - */ - mutex_enter(&tuns->tuns_global_lock); - - /* - * walk through list of tun_t looking for a match of - * inner addresses. Addresses are inserted with - * IN6_IPADDR_TO_V4MAPPED(), so v6 matching works for - * all cases. - */ - for (tun_list = tuns->tuns_byaddr_list[index]; tun_list; - tun_list = tun_list->tun_next) { - if (IN6_ARE_ADDR_EQUAL(&tun_list->tun_laddr, laddr) && - IN6_ARE_ADDR_EQUAL(&tun_list->tun_faddr, faddr)) { - ipsec_tun_pol_t *itp; - - if (!tun_policy_present(tun_list, ns, ipss)) { - tun1dbg(("itp_get_byaddr: No IPsec policy on " - "matching tun_t instance %p/%s\n", - (void *)tun_list, tun_list->tun_lifname)); - continue; - } - tun1dbg(("itp_get_byaddr: Found matching tun_t %p with " - "IPsec policy\n", (void *)tun_list)); - mutex_enter(&tun_list->tun_itp->itp_lock); - itp = tun_list->tun_itp; - mutex_exit(&tuns->tuns_global_lock); - ITP_REFHOLD(itp); - mutex_exit(&itp->itp_lock); - tun1dbg(("itp_get_byaddr: Found itp %p \n", - (void *)itp)); - return (itp); - } - } - - /* didn't find one, return zilch */ - - tun1dbg(("itp_get_byaddr: No matching tunnel instances with policy\n")); - mutex_exit(&tuns->tuns_global_lock); - return (NULL); -} - -/* - * Search tuns_byaddr_list for occurrence of tun_t, same upper and lower stream, - * and same type (6to4 vs automatic vs configured) - * If none is found, insert this tun entry. - */ -static void -tun_add_byaddr(tun_t *atp) -{ - tun_t *tun_list; - t_uscalar_t ppa = atp->tun_ppa; - uint_t mask = atp->tun_flags & (TUN_LOWER_MASK | TUN_UPPER_MASK); - uint_t tun_type = (atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)); - uint_t index = TUN_BYADDR_LIST_HASH(atp->tun_faddr); - tun_stack_t *tuns = atp->tun_netstack->netstack_tun; - - tun1dbg(("tun_add_byaddr: index = %d\n", index)); - - ASSERT(MUTEX_HELD(&tuns->tuns_global_lock)); - ASSERT(atp->tun_next == NULL); - - /* - * walk through list of tun_t looking for a match of - * ppa, same upper and lower stream and same tunnel type - * (automatic or configured). - * There shouldn't be all that many tunnels, so a sequential - * search of the bucket should be fine. - */ - for (tun_list = tuns->tuns_byaddr_list[index]; tun_list; - tun_list = tun_list->tun_next) { - if (tun_list->tun_ppa == ppa && - ((tun_list->tun_flags & (TUN_LOWER_MASK | - TUN_UPPER_MASK)) == mask) && - ((tun_list->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)) == - tun_type)) { - tun1dbg(("tun_add_byaddr: tun 0x%p Found ppa %d " \ - "tun_stats 0x%p\n", (void *)atp, ppa, - (void *)tun_list)); - tun1dbg(("tun_add_byaddr: Nothing to do.")); - /* Collision, do nothing. */ - return; - } - } - - /* didn't find one, throw it in the global list */ - - atp->tun_next = tuns->tuns_byaddr_list[index]; - atp->tun_ptpn = &(tuns->tuns_byaddr_list[index]); - if (tuns->tuns_byaddr_list[index] != NULL) - tuns->tuns_byaddr_list[index]->tun_ptpn = &(atp->tun_next); - tuns->tuns_byaddr_list[index] = atp; -} - -/* - * Search tuns_ppa_list for occurrence of tun_ppa, same lower stream, - * and same type (6to4 vs automatic vs configured) - * If none is found, insert this tun entry and create a new kstat for - * the entry. - * This is needed so that multiple tunnels with the same interface - * name (e.g. ip.tun0 under IPv4 and ip.tun0 under IPv6) can share the - * same kstats. (they share the same tun_stat and kstat) - * Don't need to hold tun_lock if we are coming is as qwriter() - */ -static tun_stats_t * -tun_add_stat(queue_t *q) -{ - tun_t *atp = (tun_t *)q->q_ptr; - tun_stats_t *tun_list; - tun_stats_t *tun_stat; - t_uscalar_t ppa = atp->tun_ppa; - uint_t lower = atp->tun_flags & TUN_LOWER_MASK; - uint_t tun_type = (atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)); - uint_t index = TUN_LIST_HASH(ppa); - tun_stack_t *tuns = atp->tun_netstack->netstack_tun; - - ASSERT(atp->tun_stats == NULL); - - ASSERT(atp->tun_kstat_next == NULL); - /* - * it's ok to grab global lock while holding tun_lock/perimeter - */ - mutex_enter(&tuns->tuns_global_lock); - - /* - * walk through list of tun_stats looking for a match of - * ppa, same lower stream and same tunnel type (automatic - * or configured - * There shouldn't be all that many tunnels, so a sequential - * search should be fine - * XXX - this may change if tunnels get ever get created on the fly - */ - for (tun_list = tuns->tuns_ppa_list[index]; tun_list; - tun_list = tun_list->ts_next) { - if (tun_list->ts_ppa == ppa && - tun_list->ts_lower == lower && - tun_list->ts_type == tun_type) { - tun1dbg(("tun_add_stat: tun 0x%p Found ppa %d " \ - "tun_stats 0x%p\n", (void *)atp, ppa, - (void *)tun_list)); - mutex_enter(&tun_list->ts_lock); - mutex_exit(&tuns->tuns_global_lock); - ASSERT(tun_list->ts_refcnt > 0); - tun_list->ts_refcnt++; - ASSERT(atp->tun_kstat_next == NULL); - ASSERT(atp != tun_list->ts_atp); - /* - * add this tunnel instance to head of list - * of tunnels referencing this kstat structure - */ - atp->tun_kstat_next = tun_list->ts_atp; - tun_list->ts_atp = atp; - atp->tun_stats = tun_list; - mutex_exit(&tun_list->ts_lock); - - /* - * Check for IPsec tunnel policy pointer, if it hasn't - * been set already. If we call get_tunnel_policy() - * and return NULL, there's none configured. - */ - if (atp->tun_lifname[0] != '\0' && - atp->tun_itp == NULL) { - atp->tun_itp = - get_tunnel_policy(atp->tun_lifname, - atp->tun_netstack); - } - return (tun_list); - } - } - - /* didn't find one, allocate a new one */ - - tun_stat = kmem_zalloc(sizeof (tun_stats_t), KM_NOSLEEP); - if (tun_stat != NULL) { - mutex_init(&tun_stat->ts_lock, NULL, MUTEX_DEFAULT, - NULL); - tun1dbg(("tun_add_stat: New ppa %d tun_stat 0x%p\n", ppa, - (void *)tun_stat)); - tun_stat->ts_refcnt = 1; - tun_stat->ts_lower = lower; - tun_stat->ts_type = tun_type; - tun_stat->ts_ppa = ppa; - tun_stat->ts_next = tuns->tuns_ppa_list[index]; - tuns->tuns_ppa_list[index] = tun_stat; - tun_stat->ts_atp = atp; - atp->tun_kstat_next = NULL; - atp->tun_stats = tun_stat; - mutex_exit(&tuns->tuns_global_lock); - tun_statinit(tun_stat, q->q_qinfo->qi_minfo->mi_idname, - atp->tun_netstack->netstack_stackid); - } else { - mutex_exit(&tuns->tuns_global_lock); - } - return (tun_stat); -} - -/* - * remove tun from tuns_byaddr_list - * called either holding tun_lock or in perimeter - */ -static void -tun_rem_tun_byaddr_list(tun_t *atp) -{ - ASSERT(MUTEX_HELD(&atp->tun_netstack->netstack_tun->tuns_global_lock)); - - if (atp->tun_ptpn == NULL) { - /* - * If we reach here, it means that this tun_t was passed into - * tun_add_byaddr() and hit a collision when trying to insert - * itself into a list. (See "Collision, do nothing" - * earlier.) Therefore this tun_t needs no removal. - */ - goto bail; - } - - /* - * remove tunnel instance from list of tun_t - */ - *(atp->tun_ptpn) = atp->tun_next; - if (atp->tun_next != NULL) { - atp->tun_next->tun_ptpn = atp->tun_ptpn; - atp->tun_next = NULL; - } - atp->tun_ptpn = NULL; - -bail: - ASSERT(atp->tun_next == NULL); -} - -/* - * remove tun from tuns_ppa_list - * called either holding tun_lock or in perimeter - */ -static void -tun_rem_ppa_list(tun_t *atp) -{ - uint_t index = TUN_LIST_HASH(atp->tun_ppa); - tun_stats_t *tun_stat = atp->tun_stats; - tun_stats_t **tun_list; - tun_t **at_list; - tun_stack_t *tuns = atp->tun_netstack->netstack_tun; - - if (tun_stat == NULL) - return; - - ASSERT(atp->tun_ppa == tun_stat->ts_ppa); - mutex_enter(&tuns->tuns_global_lock); - mutex_enter(&tun_stat->ts_lock); - atp->tun_stats = NULL; - tun_stat->ts_refcnt--; - - /* - * If this is the last instance, delete the tun_stat AND unlink the - * ipsec_tun_pol_t from the AVL tree. - */ - if (tun_stat->ts_refcnt == 0) { - kstat_t *tksp; - - tun1dbg(("tun_rem_ppa_list: tun 0x%p Last ref ppa %d tun_stat" \ - " 0x%p\n", (void *)atp, tun_stat->ts_ppa, - (void *)tun_stat)); - - if (atp->tun_itp != NULL) - itp_unlink(atp->tun_itp, atp->tun_netstack); - - ASSERT(atp->tun_kstat_next == NULL); - for (tun_list = &tuns->tuns_ppa_list[index]; *tun_list; - tun_list = &(*tun_list)->ts_next) { - if (tun_stat == *tun_list) { - *tun_list = tun_stat->ts_next; - tun_stat->ts_next = NULL; - break; - } - } - mutex_exit(&tuns->tuns_global_lock); - tksp = tun_stat->ts_ksp; - tun_stat->ts_ksp = NULL; - mutex_exit(&tun_stat->ts_lock); - kstat_delete_netstack(tksp, - atp->tun_netstack->netstack_stackid); - mutex_destroy(&tun_stat->ts_lock); - kmem_free(tun_stat, sizeof (tun_stats_t)); - return; - } - mutex_exit(&tuns->tuns_global_lock); - - tun1dbg(("tun_rem_ppa_list: tun 0x%p Removing ref ppa %d tun_stat " \ - "0x%p\n", (void *)atp, tun_stat->ts_ppa, (void *)tun_stat)); - - ASSERT(tun_stat->ts_atp->tun_kstat_next != NULL); - - /* - * remove tunnel instance from list of tunnels referencing - * this kstat. List should be short, so we just search - * sequentially - */ - for (at_list = &tun_stat->ts_atp; *at_list; - at_list = &(*at_list)->tun_kstat_next) { - if (atp == *at_list) { - *at_list = atp->tun_kstat_next; - atp->tun_kstat_next = NULL; - break; - } - } - ASSERT(tun_stat->ts_atp != NULL); - ASSERT(atp->tun_kstat_next == NULL); - mutex_exit(&tun_stat->ts_lock); -} - -/* - * handle all non-unitdata DLPI requests from above - * called as qwriter() - */ -static void -tun_wput_dlpi_other(queue_t *q, mblk_t *mp) -{ - tun_t *atp = (tun_t *)q->q_ptr; - uint_t lvers; - t_uscalar_t prim = *((t_uscalar_t *)mp->b_rptr); - t_uscalar_t dl_err = DL_UNSUPPORTED; - t_uscalar_t dl_errno = 0; - - switch (prim) { - case DL_INFO_REQ: { - dl_info_ack_t *dinfo; - - tun1dbg(("tun_wput_dlpi_other: got DL_INFO_REQ\n")); - - if ((mp = tun_realloc_mblk(q, mp, sizeof (dl_info_ack_t), mp, - B_FALSE)) == NULL) { - return; - } - mp->b_datap->db_type = M_PCPROTO; - - /* send DL_INFO_ACK back up */ - dinfo = (dl_info_ack_t *)mp->b_rptr; - - *dinfo = infoack; - dinfo->dl_current_state = atp->tun_state; - dinfo->dl_max_sdu = atp->tun_mtu; - /* dl_mac_type is set to DL_IPV4 by default. */ - if (atp->tun_flags & TUN_L_V6) - dinfo->dl_mac_type = DL_IPV6; - - /* - * We set the address length to non-zero so that - * automatic tunnels will not have multicast or - * point to point set. - * Someday IPv6 needs to support multicast over automatic - * tunnels - * 6to4 tunnels should behave the same as automatic tunnels - */ - if (atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)) { - /* - * set length to size of ip address so that - * ip_newroute will generate dl_unitdata_req for - * us with gateway or dest filed in. (i.e. - * might as well have ip do something useful) - */ - dinfo->dl_addr_length = IPV6_ADDR_LEN; - } else { - dinfo->dl_addr_length = 0; - } - qreply(q, mp); - return; - } - - case DL_ATTACH_REQ: { - dl_attach_req_t *dla; - - tun1dbg(("tun_wput_dlpi_other: got DL_ATTACH_REQ\n")); - - if ((mp = tun_realloc_mblk(q, mp, sizeof (dl_ok_ack_t), mp, - B_TRUE)) == NULL) { - return; - } - - dla = (dl_attach_req_t *)mp->b_rptr; - - if (atp->tun_state != DL_UNATTACHED) { - dl_err = DL_OUTSTATE; - tun0dbg(("tun_wput_dlpi_other: " - "DL_ATTACH_REQ state not DL_UNATTACHED (0x%x)\n", - atp->tun_state)); - break; - } - atp->tun_ppa = dla->dl_ppa; - - /* - * get (possibly shared) kstat structure - */ - if (tun_add_stat(q) == NULL) { - ASSERT(atp->tun_stats == NULL); - dl_err = DL_SYSERR; - dl_errno = ENOMEM; - break; - } - atp->tun_state = DL_UNBOUND; - - tun_sendokack(q, mp, prim); - return; - } - - case DL_DETACH_REQ: - - tun1dbg(("tun_wput_dlpi_other: got DL_DETACH_REQ\n")); - - if ((mp = tun_realloc_mblk(q, mp, sizeof (dl_ok_ack_t), mp, - B_FALSE)) == NULL) { - return; - } - - if (atp->tun_state != DL_UNBOUND) { - dl_err = DL_OUTSTATE; - tun0dbg(("tun_wput_dlpi_other: " \ - "DL_DETACH_REQ state not DL_UNBOUND (0x%x)\n", - atp->tun_state)); - break; - } - atp->tun_state = DL_UNATTACHED; - - /* - * don't need to hold tun_lock - * since this is really a single thread operation - * for this instance - */ - if (atp->tun_stats) { - tun_rem_ppa_list(atp); - tun1dbg(("tun_wput_dlpi_other: deleting kstat")); - } - tun_sendokack(q, mp, prim); - return; - - case DL_BIND_REQ: { - dl_bind_req_t *bind_req; - t_uscalar_t dl_sap = 0; - - tun1dbg(("tun_wput_dlpi_other: got DL_BIND_REQ\n")); - - if (atp->tun_state != DL_UNBOUND) { - dl_err = DL_OUTSTATE; - tun0dbg(("tun_wput_dlpi_other: " \ - "DL_BIND_REQ state not DL_UNBOUND (0x%x)\n", - atp->tun_state)); - break; - } - - atp->tun_state = DL_IDLE; - - bind_req = (dl_bind_req_t *)mp->b_rptr; - - dl_sap = bind_req->dl_sap; - ASSERT(bind_req->dl_sap == IP_DL_SAP || - bind_req->dl_sap == IP6_DL_SAP); - - lvers = atp->tun_flags & TUN_LOWER_MASK; - - if (dl_sap == IP_DL_SAP) { - if ((atp->tun_flags & TUN_U_V6) != 0) { - dl_err = DL_BOUND; - tun0dbg(("tun_wput_dlpi_other: " \ - "DL_BIND_REQ upper TUN_U_V6 (0x%x)\n", - atp->tun_flags)); - break; - } - - if ((atp->tun_flags & TUN_AUTOMATIC) != 0) { - dl_err = DL_SYSERR; - dl_errno = EINVAL; - tun0dbg(("tun_wput_dlpi_other: " \ - "DL_BIND_REQ for IPv4 atun (0x%x)\n", - atp->tun_flags)); - break; - } - - if ((atp->tun_flags & TUN_6TO4) != 0) { - dl_err = DL_SYSERR; - dl_errno = EINVAL; - tun0dbg(("tun_wput_dlpi_other: " \ - "DL_BIND_REQ for 6to4 tunnel (0x%x)\n", - atp->tun_flags)); - break; - } - - atp->tun_flags |= TUN_U_V4; - if (lvers == TUN_L_V4) { - atp->tun_ipha.ipha_protocol = IPPROTO_ENCAP; - } else { - ASSERT(lvers == TUN_L_V6); - /* Adjust headers. */ - if (atp->tun_encap_lim >= 0) { - atp->tun_ip6h.ip6_nxt = - IPPROTO_DSTOPTS; - atp->tun_telopt = - tun_limit_init_upper_v4; - atp->tun_telopt.tel_telopt. - ip6ot_encap_limit = - atp->tun_encap_lim; - } else { - atp->tun_ip6h.ip6_nxt = IPPROTO_ENCAP; - } - } - } else if (dl_sap == IP6_DL_SAP) { - if ((atp->tun_flags & TUN_U_V4) != 0) { - dl_err = DL_BOUND; - tun0dbg(("tun_wput_dlpi_other: " - "DL_BIND_REQ upper TUN_U_V4 (0x%x)\n", - atp->tun_flags)); - break; - } - atp->tun_flags |= TUN_U_V6; - if (lvers == TUN_L_V4) { - atp->tun_ipha.ipha_protocol = IPPROTO_IPV6; - } else { - ASSERT(lvers == TUN_L_V6); - if (atp->tun_encap_lim >= 0) { - atp->tun_ip6h.ip6_nxt = - IPPROTO_DSTOPTS; - atp->tun_telopt = - tun_limit_init_upper_v6; - atp->tun_telopt.tel_telopt. - ip6ot_encap_limit = - atp->tun_encap_lim; - } else { - atp->tun_ip6h.ip6_nxt = IPPROTO_IPV6; - } - } - } else { - atp->tun_state = DL_UNBOUND; - break; - } - - /* - * Send DL_BIND_ACK, which is the same size as the - * request, so we can re-use the mblk. - */ - - *(dl_bind_ack_t *)mp->b_rptr = bindack; - ((dl_bind_ack_t *)mp->b_rptr)->dl_sap = dl_sap; - mp->b_datap->db_type = M_PCPROTO; - qreply(q, mp); - return; - } - case DL_UNBIND_REQ: - - tun1dbg(("tun_wput_dlpi_other: got DL_UNBIND_REQ\n")); - - if ((mp = tun_realloc_mblk(q, mp, sizeof (dl_ok_ack_t), mp, - B_FALSE)) == NULL) { - return; - } - - if (atp->tun_state != DL_IDLE) { - dl_err = DL_OUTSTATE; - tun0dbg(("tun_wput_dlpi_other: " \ - "DL_UNBIND_REQ state not DL_IDLE (0x%x)\n", - atp->tun_state)); - break; - } - atp->tun_state = DL_UNBOUND; - /* Send a DL_OK_ACK. */ - tun_sendokack(q, mp, prim); - return; - - case DL_PHYS_ADDR_REQ: { - dl_phys_addr_ack_t *dpa; - - tun1dbg(("tun_wput_dlpi_other: got DL_PHYS_ADDR_REQ\n")); - - if ((mp = tun_realloc_mblk(q, mp, sizeof (dl_phys_addr_ack_t), - mp, B_FALSE)) == NULL) { - return; - } - - dpa = (dl_phys_addr_ack_t *)mp->b_rptr; - - dpa->dl_primitive = DL_PHYS_ADDR_ACK; - - /* - * dl_addr_length must match info ack - */ - if (atp->tun_flags & TUN_AUTOMATIC) { - if ((atp->tun_flags & TUN_U_V4) != 0) { - dl_err = DL_SYSERR; - dl_errno = EINVAL; - tun0dbg(("tun_wput_dlpi_other: " \ - "DL_PHYS_ADDR_REQ for IPv4 atun\n")); - break; - } else { - dpa->dl_addr_length = IPV6_ADDR_LEN; - } - } else if (atp->tun_flags & TUN_6TO4) { - if ((atp->tun_flags & TUN_U_V4) != 0) { - dl_err = DL_SYSERR; - dl_errno = EINVAL; - tun0dbg(("tun_wput_dlpi_other: " \ - "DL_PHYS_ADDR_REQ for 6to4 tunnel\n")); - break; - } else { - dpa->dl_addr_length = IPV6_ADDR_LEN; - } - } else { - dpa->dl_addr_length = 0; - } - - dpa->dl_addr_offset = 0; - mp->b_datap->db_type = M_PCPROTO; - qreply(q, mp); - return; - } - case DL_SUBS_BIND_REQ: - case DL_ENABMULTI_REQ: - case DL_DISABMULTI_REQ: - case DL_PROMISCON_REQ: - case DL_PROMISCOFF_REQ: - case DL_AGGR_REQ: - case DL_UNAGGR_REQ: - case DL_UDQOS_REQ: - case DL_CONNECT_REQ: - case DL_TOKEN_REQ: - case DL_DISCONNECT_REQ: - case DL_RESET_REQ: - case DL_DATA_ACK_REQ: - case DL_REPLY_REQ: - case DL_REPLY_UPDATE_REQ: - case DL_XID_REQ: - case DL_TEST_REQ: - case DL_SET_PHYS_ADDR_REQ: - case DL_GET_STATISTICS_REQ: - case DL_CAPABILITY_REQ: - case DL_CONTROL_REQ: - /* unsupported command */ - break; - default: - /* unknown command */ - tun0dbg(("tun_wput_dlpi_other: unknown DLPI message type: " \ - "%d\n", prim)); - dl_err = DL_BADPRIM; - } - tun_senderrack(q, mp, prim, dl_err, dl_errno); -} - -/* - * handle all DLPI requests from above - */ -static int -tun_wput_dlpi(queue_t *q, mblk_t *mp) -{ - tun_t *atp = (tun_t *)q->q_ptr; - mblk_t *mp1; - int error = 0; - t_uscalar_t prim = *((t_uscalar_t *)mp->b_rptr); - - switch (prim) { - case DL_UNITDATA_REQ: - if (atp->tun_state != DL_IDLE) { - break; - } - if (!canputnext(q)) { - atomic_add_32(&atp->tun_xmtretry, 1); - (void) putbq(q, mp); - return (ENOMEM); /* to get service proc to stop */ - } - /* we don't use any of the data in the DLPI header */ - mp1 = mp->b_cont; - freeb(mp); - if (mp1 == NULL) { - break; - } - switch (atp->tun_flags & TUN_UPPER_MASK) { - case TUN_U_V4: - tun_wdata_v4(q, mp1); - break; - case TUN_U_V6: - tun_wdata_v6(q, mp1); - break; - default: - atomic_add_32(&atp->tun_OutErrors, 1); - ASSERT((atp->tun_flags & TUN_UPPER_MASK) != TUN_U_V4 || - (atp->tun_flags & TUN_UPPER_MASK) != TUN_U_V6); - break; - } - break; - - case DL_NOTIFY_REQ: { - dl_notify_req_t *dlip; - - if (MBLKL(mp) < DL_NOTIFY_REQ_SIZE) { - tun_senderrack(q, mp, prim, DL_BADPRIM, 0); - break; - } - - dlip = (dl_notify_req_t *)mp->b_rptr; - - atp->tun_notifications = - dlip->dl_notifications & DL_NOTE_SDU_SIZE; - - dlip->dl_notifications &= DL_NOTE_SDU_SIZE; - dlip->dl_primitive = DL_NOTIFY_ACK; - mp->b_wptr = mp->b_rptr + DL_NOTIFY_ACK_SIZE; - qreply(q, mp); - - tun_sendsdusize(q); - - break; - } - - default: - qwriter(q, mp, tun_wput_dlpi_other, PERIM_INNER); - break; - } - return (error); -} - -/* - * set the tunnel parameters - * called as qwriter - */ -static void -tun_sparam(queue_t *q, mblk_t *mp) -{ - tun_t *atp = (tun_t *)q->q_ptr; - struct iocblk *iocp = (struct iocblk *)(mp->b_rptr); - struct iftun_req *ta; - mblk_t *mp1; - int uerr = 0; - uint_t lvers; - sin_t *sin; - sin6_t *sin6; - size_t size; - boolean_t new; - ipsec_stack_t *ipss = atp->tun_netstack->netstack_ipsec; - tun_stack_t *tuns = atp->tun_netstack->netstack_tun; - - /* don't allow changes after dl_bind_req */ - if (atp->tun_state == DL_IDLE) { - uerr = EAGAIN; - goto nak; - } - - mp1 = mp->b_cont; - if (mp1 == NULL) { - uerr = EPROTO; - goto nak; - } - - mp1 = mp1->b_cont; - if (mp1 == NULL) { - uerr = EPROTO; - goto nak; - } - size = mp1->b_wptr - mp1->b_rptr; - if ((size != sizeof (struct iftun_req)) && - (size != sizeof (struct old_iftun_req))) { - uerr = EPROTO; - goto nak; - } - new = (size == sizeof (struct iftun_req)); - if (atp->tun_iocmp) { - uerr = EBUSY; - goto nak; - } - - lvers = atp->tun_flags & TUN_LOWER_MASK; - - ta = (struct iftun_req *)mp1->b_rptr; - - /* - * Check version number for parsing the security settings. - */ - if (ta->ifta_vers != IFTUN_VERSION) { - uerr = EINVAL; - goto nak; - } - - /* - * Upper layer will give us a v4/v6 indicator, in case we don't know - * already. - */ - if ((atp->tun_flags & TUN_UPPER_MASK) == 0) { - if (ta->ifta_flags & 0x80000000) { - atp->tun_flags |= TUN_U_V6; - } else { - atp->tun_flags |= TUN_U_V4; - } - } - - if (((atp->tun_flags & (TUN_AUTOMATIC | TUN_U_V4)) == - (TUN_AUTOMATIC | TUN_U_V4)) || - ((atp->tun_flags & (TUN_6TO4 | TUN_U_V4)) == - (TUN_6TO4 | TUN_U_V4))) { - uerr = EINVAL; - goto nak; - } - - if (ta->ifta_flags & IFTUN_SRC) { - switch (ta->ifta_saddr.ss_family) { - case AF_INET: - sin = (sin_t *)&ta->ifta_saddr; - if (lvers != TUN_L_V4) { - uerr = EINVAL; - goto nak; - } - if ((sin->sin_addr.s_addr == INADDR_ANY) || - (sin->sin_addr.s_addr == 0xffffffff) || - CLASSD(sin->sin_addr.s_addr)) { - uerr = EADDRNOTAVAIL; - goto nak; - } - atp->tun_ipha.ipha_src = sin->sin_addr.s_addr; - IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, - &atp->tun_laddr); - break; - case AF_INET6: - sin6 = (sin6_t *)&ta->ifta_saddr; - if (lvers != TUN_L_V6) { - uerr = EINVAL; - goto nak; - } - - if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || - IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || - IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { - uerr = EADDRNOTAVAIL; - goto nak; - } - - atp->tun_ip6h.ip6_src = atp->tun_laddr = - sin6->sin6_addr; - break; - default: - uerr = EAFNOSUPPORT; - goto nak; - } - - /* - * If I reach here, then I didn't bail, the src address - * was good. - */ - atp->tun_flags |= TUN_SRC; - } - if (ta->ifta_flags & IFTUN_DST) { - if (atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)) { - uerr = EINVAL; - goto nak; - } - if (ta->ifta_saddr.ss_family == AF_INET) { - sin = (sin_t *)&ta->ifta_daddr; - if (lvers != TUN_L_V4) { - uerr = EINVAL; - goto nak; - } - if ((sin->sin_addr.s_addr == 0) || - (sin->sin_addr.s_addr == 0xffffffff) || - CLASSD(sin->sin_addr.s_addr)) { - uerr = EADDRNOTAVAIL; - goto nak; - } - atp->tun_ipha.ipha_dst = sin->sin_addr.s_addr; - /* Remove from previous hash bucket */ - IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, - &atp->tun_faddr); - } else if (ta->ifta_saddr.ss_family == AF_INET6) { - sin6 = (sin6_t *)&ta->ifta_daddr; - if (lvers != TUN_L_V6) { - uerr = EINVAL; - goto nak; - } - - if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || - IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || - IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { - uerr = EADDRNOTAVAIL; - goto nak; - } - - /* Remove from previous hash bucket */ - atp->tun_ip6h.ip6_dst = atp->tun_faddr = - sin6->sin6_addr; - } else { - uerr = EAFNOSUPPORT; - goto nak; - } - - /* - * If I reach here, then I didn't bail, the dst address - * was good. - */ - atp->tun_flags |= TUN_DST; - /* tun_faddr changed, move to proper hash bucket */ - mutex_enter(&tuns->tuns_global_lock); - tun_rem_tun_byaddr_list(atp); - tun_add_byaddr(atp); - mutex_exit(&tuns->tuns_global_lock); - } - - if (new && (ta->ifta_flags & IFTUN_HOPLIMIT)) { - /* Check bounds. */ - if (ta->ifta_hop_limit < 1) { - uerr = EINVAL; - goto nak; - } - atp->tun_hop_limit = ta->ifta_hop_limit; - /* XXX do we really need this flag */ - atp->tun_flags |= TUN_HOP_LIM; - if (lvers == TUN_L_V4) { - atp->tun_ipha.ipha_ttl = atp->tun_hop_limit; - } else { - atp->tun_ip6h.ip6_hops = atp->tun_hop_limit; - } - } - - if (new && (ta->ifta_flags & IFTUN_ENCAP)) { - /* Bounds checking. */ - if ((ta->ifta_encap_lim > IPV6_MAX_ENCAPLIMIT) || - (lvers != TUN_L_V6)) { - uerr = EINVAL; - goto nak; - } - - atp->tun_encap_lim = ta->ifta_encap_lim; - atp->tun_flags |= TUN_ENCAP_LIM; - if (ta->ifta_encap_lim >= 0) { - atp->tun_telopt.tel_telopt.ip6ot_encap_limit = - ta->ifta_encap_lim; - atp->tun_ip6h.ip6_nxt = IPPROTO_DSTOPTS; - } else { - switch (atp->tun_flags & TUN_UPPER_MASK) { - case TUN_U_V4: - atp->tun_ip6h.ip6_nxt = IPPROTO_ENCAP; - break; - case TUN_U_V6: - atp->tun_ip6h.ip6_nxt = IPPROTO_IPV6; - break; - default: - /* This shouldn't happen! */ - ASSERT((atp->tun_flags & TUN_UPPER_MASK) != 0); - break; - } - } - } - - /* - * If we passed in IFTUN_COMPLEX_SECURITY, do not do anything. This - * allows us to let dumb ifconfig(1m)-like apps reflect what they see - * without a penalty. - */ - if ((ta->ifta_flags & (IFTUN_SECURITY | IFTUN_COMPLEX_SECURITY)) == - IFTUN_SECURITY) { - /* Can't set security properties for automatic tunnels. */ - if (atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)) { - uerr = EINVAL; - goto nak; - } - - /* - * The version number checked out, so just cast - * ifta_secinfo to an ipsr. - */ - if (ipsec_loaded(ipss)) { - uerr = tun_set_sec_simple(atp, - (ipsec_req_t *)&ta->ifta_secinfo); - } else { - if (ipsec_failed(ipss)) { - uerr = EPROTONOSUPPORT; - goto nak; - } - /* Otherwise, try again later and load IPsec. */ - (void) putq(q, mp); - ipsec_loader_loadnow(ipss); - return; - } - if (uerr != 0) - goto nak; - } - - mp->b_datap->db_type = M_IOCACK; - iocp->ioc_error = 0; - - /* - * Send a T_BIND_REQ if and only if a tsrc/tdst change was requested - * _AND_ tsrc is turned on _AND_ the tunnel either has tdst turned on - * or is an automatic tunnel. - */ - if ((ta->ifta_flags & (IFTUN_SRC | IFTUN_DST)) != 0 && - (atp->tun_flags & TUN_SRC) != 0 && - (atp->tun_flags & (TUN_DST | TUN_AUTOMATIC | TUN_6TO4)) != 0) { - atp->tun_iocmp = mp; - uerr = tun_send_bind_req(q); - if (uerr == 0) { - /* qreply() done by T_BIND_ACK processing */ - return; - } else { - atp->tun_iocmp = NULL; - goto nak; - } - } - qreply(q, mp); - return; -nak: - iocp->ioc_error = uerr; - mp->b_datap->db_type = M_IOCNAK; - qreply(q, mp); -} - -static boolean_t -tun_thisvers_policy(tun_t *atp) -{ - boolean_t rc; - ipsec_policy_head_t *iph; - int uvec = atp->tun_flags & TUN_UPPER_MASK; - - if (atp->tun_itp == NULL) - return (B_FALSE); - iph = atp->tun_itp->itp_policy; - - rw_enter(&iph->iph_lock, RW_READER); - rc = iph_ipvN(iph, (uvec & TUN_U_V6)); - rw_exit(&iph->iph_lock); - - return (rc); -} - -/* - * Processes SIOCs to setup a tunnel and IOCs to configure tunnel module. - * M_IOCDATA->M_COPY->DATA or M_IOCTL->DATA - */ -static int -tun_ioctl(queue_t *q, mblk_t *mp) -{ - tun_t *atp = (tun_t *)q->q_ptr; - struct iocblk *iocp = (struct iocblk *)(mp->b_rptr); - struct iftun_req *ta; - mblk_t *mp1; - int reterr = 0; - int uerr = 0; - uint_t lvers; - sin_t *sin; - sin6_t *sin6; - size_t size; - boolean_t new; - ipaddr_t *rr_addr; - char buf[INET6_ADDRSTRLEN]; - struct lifreq *lifr; - netstack_t *ns = atp->tun_netstack; - ipsec_stack_t *ipss = ns->netstack_ipsec; - tun_stack_t *tuns = ns->netstack_tun; - - lvers = atp->tun_flags & TUN_LOWER_MASK; - - switch (iocp->ioc_cmd) { - case OSIOCSTUNPARAM: - case SIOCSTUNPARAM: - qwriter(q, mp, tun_sparam, PERIM_INNER); - return (0); - case OSIOCGTUNPARAM: - case SIOCGTUNPARAM: - mp1 = mp->b_cont; - if (mp1 == NULL) { - uerr = EPROTO; - goto nak; - } - mp1 = mp1->b_cont; - if (mp1 == NULL) { - uerr = EPROTO; - goto nak; - } - size = mp1->b_wptr - mp1->b_rptr; - if ((size != sizeof (struct iftun_req)) && - (size != sizeof (struct old_iftun_req))) { - uerr = EPROTO; - goto nak; - } - new = (size == sizeof (struct iftun_req)); - /* - * don't need to hold any locks. Can only be - * changed by qwriter - */ - ta = (struct iftun_req *)mp1->b_rptr; - ta->ifta_flags = 0; - - /* - * Unlike tun_sparam(), the version number for security - * parameters is ignored, since we're filling it in! - */ - ta->ifta_vers = IFTUN_VERSION; - - /* in case we are pushed under something unsupported */ - switch (atp->tun_flags & TUN_UPPER_MASK) { - case TUN_U_V4: - ta->ifta_upper = IFTAP_IPV4; - break; - case TUN_U_V6: - ta->ifta_upper = IFTAP_IPV6; - break; - default: - ta->ifta_upper = 0; - break; - } - /* - * Copy in security information. - * - * If we revise IFTUN_VERSION, this will become revision- - * dependent. - */ - - if (tun_policy_present(atp, ns, ipss) && - tun_thisvers_policy(atp)) { - ipsec_req_t *ipsr; - - ipsr = (ipsec_req_t *)ta->ifta_secinfo; - - mutex_enter(&atp->tun_itp->itp_lock); - if (!(atp->tun_itp->itp_flags & ITPF_P_TUNNEL) && - (atp->tun_policy_index >= - atp->tun_itp->itp_next_policy_index)) { - /* - * Convert 0.0.0.0/0, 0::0/0 tree entry to - * ipsec_req_t. - */ - *ipsr = atp->tun_secinfo; - /* Reality check for empty polhead. */ - if (ipsr->ipsr_ah_req != 0 || - ipsr->ipsr_esp_req != 0) - ta->ifta_flags |= IFTUN_SECURITY; - } else { - bzero(ipsr, sizeof (*ipsr)); - ta->ifta_flags |= - (IFTUN_COMPLEX_SECURITY | IFTUN_SECURITY); - } - mutex_exit(&atp->tun_itp->itp_lock); - } - - if (new && (iocp->ioc_cmd == SIOCGTUNPARAM)) { - - /* Copy in hop limit. */ - if (atp->tun_flags & TUN_HOP_LIM) { - ta->ifta_flags |= IFTUN_HOPLIMIT; - ta->ifta_hop_limit = atp->tun_hop_limit; - } - - /* Copy in encapsulation limit. */ - if (atp->tun_flags & TUN_ENCAP_LIM) { - ta->ifta_flags |= IFTUN_ENCAP; - ta->ifta_encap_lim = atp->tun_encap_lim; - } - } - - /* lower must be IPv4 or IPv6, otherwise open fails */ - if (lvers == TUN_L_V4) { - sin = (sin_t *)&ta->ifta_saddr; - ta->ifta_lower = IFTAP_IPV4; - bzero(sin, sizeof (sin_t)); - sin->sin_family = AF_INET; - if (atp->tun_flags & TUN_SRC) { - IN6_V4MAPPED_TO_IPADDR(&atp->tun_laddr, - sin->sin_addr.s_addr); - ta->ifta_flags |= IFTUN_SRC; - } else { - sin->sin_addr.s_addr = 0; - } - - sin = (sin_t *)&ta->ifta_daddr; - bzero(sin, sizeof (sin_t)); - sin->sin_family = AF_INET; - if (atp->tun_flags & TUN_DST) { - IN6_V4MAPPED_TO_IPADDR(&atp->tun_faddr, - sin->sin_addr.s_addr); - ta->ifta_flags |= IFTUN_DST; - } else { - sin->sin_addr.s_addr = 0; - } - } else { - ASSERT(lvers == TUN_L_V6); - - ta->ifta_lower = IFTAP_IPV6; - sin6 = (sin6_t *)&ta->ifta_saddr; - bzero(sin6, sizeof (sin6_t)); - sin6->sin6_family = AF_INET6; - if (atp->tun_flags & TUN_SRC) { - sin6->sin6_addr = atp->tun_laddr; - ta->ifta_flags |= IFTUN_SRC; - } else { - V6_SET_ZERO(sin6->sin6_addr); - } - - sin6 = (sin6_t *)&ta->ifta_daddr; - bzero(sin6, sizeof (sin6_t)); - sin6->sin6_family = AF_INET6; - - if (atp->tun_flags & TUN_DST) { - ta->ifta_flags |= IFTUN_DST; - sin6->sin6_addr = atp->tun_faddr; - } else { - V6_SET_ZERO(sin6->sin6_addr); - } - } - break; - case SIOCS6TO4TUNRRADDR: { - struct iocblk *iocp; - - /* check to make sure this is not a TRANSPARENT ioctl */ - iocp = (struct iocblk *)mp->b_rptr; - if (iocp->ioc_count == TRANSPARENT) { - uerr = EINVAL; - goto nak; - } - - /* skip over iocblk to M_DATA */ - mp1 = mp->b_cont; - if (mp1 == NULL) { - uerr = EPROTO; - goto nak; - } - - size = mp1->b_wptr - mp1->b_rptr; - if (size != (sizeof (ipaddr_t))) { - uerr = EPROTO; - goto nak; - } - rr_addr = (ipaddr_t *)mp1->b_rptr; - - /* - * Value read MUST equal either: - * 1) a valid unicast IPv4 Address - * 2) INADDR_ANY - * - * (1) enables 6to4 Relay Router communication support on - * this system and denotes the IPv4 destination address used - * for sending to 6to4 Relay Routers. - * (2) disables 6to4 Relay Router communication support on - * this system. - * - * Any other value results in a NAK. - */ - if ((*rr_addr == INADDR_ANY) || (!CLASSD(*rr_addr))) { - tun1dbg(("tun_ioctl: 6to4 Relay Router = %s\n", - inet_ntop(AF_INET, rr_addr, buf, - sizeof (buf)))); - tuns->tuns_relay_rtr_addr_v4 = *rr_addr; - } else { - tun1dbg(("tun_ioctl: Invalid 6to4 Relay Router " \ - "address (%s)\n", - inet_ntop(AF_INET, rr_addr, buf, - sizeof (buf)))); - uerr = EINVAL; - goto nak; - } - break; - } - case SIOCG6TO4TUNRRADDR: - /* skip over iocblk to M_DATA */ - mp1 = mp->b_cont; - if (mp1 == NULL) { - uerr = EPROTO; - goto nak; - } - - size = mp1->b_wptr - mp1->b_rptr; - if (size != (sizeof (ipaddr_t))) { - uerr = EPROTO; - goto nak; - } - - rr_addr = (ipaddr_t *)mp1->b_rptr; - *rr_addr = tuns->tuns_relay_rtr_addr_v4; - break; - case DL_IOC_HDR_INFO: - uerr = tun_fastpath(q, mp); - if (uerr != 0) - goto nak; - break; - case SIOCSLIFNAME: - /* - * Intercept SIOCSLIFNAME and attach the name to my - * tunnel_instance. For extra paranoia, if my name is not "" - * (as it would be at tun_t initialization), don't change - * anything. - * - * For now, this is the only way to tie tunnel names (as - * used in IPsec Tunnel Policy (ITP) instances) to actual - * tunnel instances. In practice, SIOCSLIFNAME is only - * used by ifconfig(1m) to change the ill name to something - * ifconfig can handle. - */ - mp1 = mp->b_cont; - if (mp1 != NULL) { - lifr = (struct lifreq *)mp1->b_rptr; - if (atp->tun_lifname[0] == '\0') { - (void) strncpy(atp->tun_lifname, - lifr->lifr_name, LIFNAMSIZ); - ASSERT(atp->tun_itp == NULL); - atp->tun_itp = - get_tunnel_policy(atp->tun_lifname, - ns); - /* - * It really doesn't matter if we return - * NULL or not. If we get the itp pointer, - * we're in good shape. - */ - } else { - tun0dbg(("SIOCSLIFNAME: new is %s, old is %s" - " - not changing\n", - lifr->lifr_name, atp->tun_lifname)); - } - } - break; - default: - /* - * We are module that thinks it's a driver so nak anything we - * don't understand - */ - uerr = EINVAL; - goto nak; - } - mp->b_datap->db_type = M_IOCACK; - iocp->ioc_error = 0; - qreply(q, mp); - return (reterr); -nak: - iocp->ioc_error = uerr; - mp->b_datap->db_type = M_IOCNAK; - qreply(q, mp); - return (reterr); -} - -/* - * mp contains the M_IOCTL DL_IOC_HDR_INFO message - * allocate mblk for fast path. - * XXX - fix IP so that db_base and rptr can be different - */ -static int -tun_fastpath(queue_t *q, mblk_t *mp) -{ - tun_t *atp = (tun_t *)q->q_ptr; - mblk_t *nmp; - int error; - dl_unitdata_req_t *dludp; - int hdrlen; - - if (!tun_do_fastpath || atp->tun_state != DL_IDLE) - return (EINVAL); - - error = miocpullup(mp, sizeof (dl_unitdata_req_t)); - if (error != 0) - return (error); - - dludp = (dl_unitdata_req_t *)mp->b_cont->b_rptr; - if (dludp->dl_primitive != DL_UNITDATA_REQ) - return (EINVAL); - - switch (atp->tun_flags & TUN_LOWER_MASK) { - case TUN_L_V4: - nmp = allocb(sizeof (ipha_t) + atp->tun_extra_offset, BPRI_HI); - if (nmp == NULL) { - return (ENOMEM); - } - linkb(mp, nmp); - nmp->b_rptr += atp->tun_extra_offset; - nmp->b_wptr = nmp->b_rptr + sizeof (ipha_t); - *(ipha_t *)(nmp->b_rptr) = atp->tun_ipha; - nmp->b_rptr = nmp->b_datap->db_base; - break; - case TUN_L_V6: - hdrlen = sizeof (ip6_t); - if (atp->tun_encap_lim >= 0) { - hdrlen += IPV6_TUN_ENCAP_OPT_LEN; - } - nmp = allocb(hdrlen + atp->tun_extra_offset, BPRI_HI); - if (nmp == NULL) { - return (ENOMEM); - } - linkb(mp, nmp); - nmp->b_rptr += atp->tun_extra_offset; - nmp->b_wptr = nmp->b_rptr + hdrlen; - bcopy(&atp->tun_ip6h, nmp->b_rptr, hdrlen); - nmp->b_rptr = nmp->b_datap->db_base; - break; - default: - return (EPFNOSUPPORT); - } - atp->tun_flags |= TUN_FASTPATH; - - return (0); -} - - - -/* - * write side service procedure - */ -void -tun_wsrv(queue_t *q) -{ - mblk_t *mp; - tun_t *atp = (tun_t *)q->q_ptr; - - while (mp = getq(q)) { - /* out of memory or canputnext failed */ - if (tun_wproc(q, mp) == ENOMEM) { - break; - } - /* - * If we called qwriter, then the only way we - * can tell if we ran out of memory is to check if - * any events have been scheduled - */ - if (atp->tun_events.ev_wtimoutid != 0 && - atp->tun_events.ev_wbufcid != 0) { - break; - } - } -} - - -/* write side put procedure */ -void -tun_wput(queue_t *q, mblk_t *mp) -{ - /* note: q_first is 'protected' by perimeter */ - if (q->q_first != NULL) { - (void) putq(q, mp); - } else { - (void) tun_wproc(q, mp); - } -} - -/* - * called from write side put or service procedure to process - * messages - */ -static int -tun_wproc(queue_t *q, mblk_t *mp) -{ - int error = 0; - - switch (mp->b_datap->db_type) { - case M_DATA: - error = tun_wproc_mdata(q, mp); - break; - - case M_PROTO: - case M_PCPROTO: - /* its a DLPI message */ - error = tun_wput_dlpi(q, mp); - break; - - case M_IOCDATA: - case M_IOCTL: - /* Data to be copied out arrives from ip as M_IOCDATA */ - error = tun_ioctl(q, mp); - break; - - /* we are a module pretending to be a driver.. turn around flush */ - - case M_FLUSH: - if (*mp->b_rptr & FLUSHW) { - flushq(q, FLUSHALL); - *mp->b_rptr &= ~FLUSHW; - } - if (*mp->b_rptr & FLUSHR) - flushq(RD(q), FLUSHALL); - qreply(q, mp); - break; - - /* - * we are a module pretending to be a driver.. so just free message - * we don't understand - */ - default: { - char buf[TUN_WHO_BUF]; - - tun0dbg(("tun_wproc: %s got unknown mblk type %d\n", - tun_who(q, buf), mp->b_datap->db_type)); - freemsg(mp); - break; - } - - } - return (error); -} - -/* - * handle fast path M_DATA message - */ -static int -tun_wproc_mdata(queue_t *q, mblk_t *mp) -{ - tun_t *atp = (tun_t *)q->q_ptr; - int error = 0; - - ASSERT(atp->tun_flags & TUN_FASTPATH); - - ASSERT((atp->tun_flags & TUN_L_V6) ? - (mp->b_wptr - mp->b_rptr >= atp->tun_extra_offset + - sizeof (ip6_t)) : - ((atp->tun_flags & TUN_L_V4) ? - (mp->b_wptr - mp->b_rptr >= atp->tun_extra_offset + - sizeof (ipha_t)) : 1)); - - if (!canputnext(q)) { - atomic_add_32(&atp->tun_xmtretry, 1); - (void) putbq(q, mp); - return (ENOMEM); /* get service procedure to stop */ - } - - if (atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)) { - int iph_hdr_length; - /* - * get rid of fastpath header. let tun_wdata* - * fill in real thing - */ - - iph_hdr_length = IPH_HDR_LENGTH((ipha_t *)(mp->b_rptr + - atp->tun_extra_offset)); - if (mp->b_wptr - mp->b_rptr < iph_hdr_length + - atp->tun_extra_offset + sizeof (ip6_t)) { - if (!pullupmsg(mp, iph_hdr_length + - atp->tun_extra_offset + sizeof (ip6_t))) { - tun0dbg(("tun_wproc_mdata: message too " \ - "short for IPv6 header\n")); - atomic_add_32(&atp->tun_InErrors, 1); - atomic_add_32(&atp->tun_InDiscard, 1); - freemsg(mp); - return (0); - } - } - mp->b_rptr += atp->tun_extra_offset + iph_hdr_length; - - ASSERT((atp->tun_flags & TUN_UPPER_MASK) == TUN_U_V6); - tun_wdata_v6(q, mp); - return (error); - } - - switch (atp->tun_flags & TUN_UPPER_MASK) { - case TUN_U_V4: - error = tun_wputnext_v4(q, mp); - break; - case TUN_U_V6: - error = tun_wputnext_v6(q, mp); - break; - default: - atomic_add_32(&atp->tun_OutErrors, 1); - freemsg(mp); - error = EINVAL; - } - return (error); -} - -/* - * Because a TUNSPARAM ioctl()'s requirement to only set IPsec policy for a - * given upper instance (IPv4-over-IP* or IPv6-over-IP*), have a special - * AF-specific flusher. This way, setting one upper instance doesn't sabotage - * the other. Don't bother with the hash-chained policy heads - they won't be - * filled in in TUNSPARAM cases. - */ -static void -flush_af(ipsec_policy_head_t *polhead, int ulp_vector, netstack_t *ns) -{ - int dir; - int af = (ulp_vector == TUN_U_V4) ? IPSEC_AF_V4 : IPSEC_AF_V6; - ipsec_policy_t *ip, *nip; - - ASSERT(RW_WRITE_HELD(&polhead->iph_lock)); - - for (dir = 0; dir < IPSEC_NTYPES; dir++) { - for (ip = polhead->iph_root[dir].ipr_nonhash[af]; ip != NULL; - ip = nip) { - nip = ip->ipsp_hash.hash_next; - IPPOL_UNCHAIN(polhead, ip, ns); - } - } -} - -/* - * Set and insert the actual simple policies. - */ -static boolean_t -insert_actual_policies(ipsec_tun_pol_t *itp, ipsec_act_t *actp, uint_t nact, - int ulp_vector, netstack_t *ns) -{ - ipsec_selkey_t selkey; - ipsec_policy_t *pol; - ipsec_policy_root_t *pr; - ipsec_policy_head_t *polhead = itp->itp_policy; - - bzero(&selkey, sizeof (selkey)); - - if (ulp_vector & TUN_U_V4) { - selkey.ipsl_valid = IPSL_IPV4; - - /* v4 inbound */ - pol = ipsec_policy_create(&selkey, actp, nact, - IPSEC_PRIO_SOCKET, &itp->itp_next_policy_index, ns); - if (pol == NULL) - return (B_FALSE); - pr = &polhead->iph_root[IPSEC_TYPE_INBOUND]; - HASHLIST_INSERT(pol, ipsp_hash, pr->ipr_nonhash[IPSEC_AF_V4]); - ipsec_insert_always(&polhead->iph_rulebyid, pol); - - /* v4 outbound */ - pol = ipsec_policy_create(&selkey, actp, nact, - IPSEC_PRIO_SOCKET, &itp->itp_next_policy_index, ns); - if (pol == NULL) - return (B_FALSE); - pr = &polhead->iph_root[IPSEC_TYPE_OUTBOUND]; - HASHLIST_INSERT(pol, ipsp_hash, pr->ipr_nonhash[IPSEC_AF_V4]); - ipsec_insert_always(&polhead->iph_rulebyid, pol); - } - - if (ulp_vector & TUN_U_V6) { - selkey.ipsl_valid = IPSL_IPV6; - - /* v6 inbound */ - pol = ipsec_policy_create(&selkey, actp, nact, - IPSEC_PRIO_SOCKET, &itp->itp_next_policy_index, ns); - if (pol == NULL) - return (B_FALSE); - pr = &polhead->iph_root[IPSEC_TYPE_INBOUND]; - HASHLIST_INSERT(pol, ipsp_hash, pr->ipr_nonhash[IPSEC_AF_V6]); - ipsec_insert_always(&polhead->iph_rulebyid, pol); - - /* v6 outbound */ - pol = ipsec_policy_create(&selkey, actp, nact, - IPSEC_PRIO_SOCKET, &itp->itp_next_policy_index, ns); - if (pol == NULL) - return (B_FALSE); - pr = &polhead->iph_root[IPSEC_TYPE_OUTBOUND]; - HASHLIST_INSERT(pol, ipsp_hash, pr->ipr_nonhash[IPSEC_AF_V6]); - ipsec_insert_always(&polhead->iph_rulebyid, pol); - } - - return (B_TRUE); -} - -/* - * For the old-fashioned tunnel-ioctl method of setting tunnel security - * properties. In the new world, set this to be a low-priority 0.0.0.0/0 - * match. - */ -static int -tun_set_sec_simple(tun_t *atp, ipsec_req_t *ipsr) -{ - int rc = 0; - uint_t nact; - ipsec_act_t *actp = NULL; - boolean_t clear_all, old_policy = B_FALSE; - ipsec_tun_pol_t *itp; - tun_t *other_tun; - netstack_t *ns = atp->tun_netstack; - ipsec_stack_t *ipss = ns->netstack_ipsec; - - tun1dbg( - ("tun_set_sec_simple: adjusting tunnel security the old way.")); - -#define REQ_MASK (IPSEC_PREF_REQUIRED | IPSEC_PREF_NEVER) - /* Can't specify self-encap on a tunnel!!! */ - if ((ipsr->ipsr_self_encap_req && REQ_MASK) != 0) - return (EINVAL); - - /* - * If it's a "clear-all" entry, unset the security flags and - * resume normal cleartext (or inherit-from-global) policy. - */ - clear_all = ((ipsr->ipsr_ah_req & REQ_MASK) == 0 && - (ipsr->ipsr_esp_req & REQ_MASK) == 0); -#undef REQ_MASK - - mutex_enter(&atp->tun_lock); - if (!tun_policy_present(atp, ns, ipss)) { - if (clear_all) { - bzero(&atp->tun_secinfo, sizeof (ipsec_req_t)); - atp->tun_policy_index = 0; - goto bail; /* No need to allocate! */ - } - - ASSERT(atp->tun_lifname[0] != '\0'); - atp->tun_itp = create_tunnel_policy(atp->tun_lifname, - &rc, &atp->tun_itp_gen, ns); - /* NOTE: "rc" set by create_tunnel_policy(). */ - if (atp->tun_itp == NULL) - goto bail; - } - itp = atp->tun_itp; - - /* Allocate the actvec now, before holding itp or polhead locks. */ - ipsec_actvec_from_req(ipsr, &actp, &nact, ns); - if (actp == NULL) { - rc = ENOMEM; - goto bail; - } - - /* - * Just write on the active polhead. Save the primary/secondary - * stuff for spdsock operations. - * - * Mutex because we need to write to the polhead AND flags atomically. - * Other threads will acquire the polhead lock as a reader if the - * (unprotected) flag is set. - */ - mutex_enter(&itp->itp_lock); - if (itp->itp_flags & ITPF_P_TUNNEL) { - /* - * Oops, we lost a race. Let's get out of here. - */ - rc = EBUSY; - goto mutex_bail; - } - old_policy = ((itp->itp_flags & ITPF_P_ACTIVE) != 0); - - if (old_policy) { - /* - * We have to be more subtle here than we would - * in the spdosock code-paths, due to backward compatibility. - */ - ITPF_CLONE(itp->itp_flags); - rc = ipsec_copy_polhead(itp->itp_policy, itp->itp_inactive, ns); - if (rc != 0) { - /* inactive has already been cleared. */ - itp->itp_flags &= ~ITPF_IFLAGS; - goto mutex_bail; - } - rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); - flush_af(itp->itp_policy, atp->tun_flags & TUN_UPPER_MASK, ns); - } else { - /* Else assume itp->itp_policy is already flushed. */ - rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); - } - - if (clear_all) { - /* We've already cleared out the polhead. We are now done. */ - if (avl_numnodes(&itp->itp_policy->iph_rulebyid) == 0) - itp->itp_flags &= ~ITPF_PFLAGS; - rw_exit(&itp->itp_policy->iph_lock); - bzero(&atp->tun_secinfo, sizeof (ipsec_req_t)); - old_policy = B_FALSE; /* Clear out the inactive one too. */ - goto recover_bail; - } - if (insert_actual_policies(itp, actp, nact, - atp->tun_flags & TUN_UPPER_MASK, ns)) { - rw_exit(&itp->itp_policy->iph_lock); - /* - * Adjust MTU and make sure the DL side knows what's up. - */ - atp->tun_ipsec_overhead = ipsec_act_ovhd(actp); - itp->itp_flags = ITPF_P_ACTIVE; - /* - * <sigh> There has to be a better way, but for now, send an - * IRE_DB_REQ again. We will resynch from scratch, but have - * the tun_ipsec_overhead taken into account. - */ - if (atp->tun_flags & TUN_DST) - tun_send_ire_req(atp->tun_wq); - old_policy = B_FALSE; /* Blank out inactive - we succeeded */ - /* Copy ipsec_req_t for subsequent SIOGTUNPARAM ops. */ - atp->tun_secinfo = *ipsr; - } else { - rw_exit(&itp->itp_policy->iph_lock); - rc = ENOMEM; - } - -recover_bail: - atp->tun_policy_index = itp->itp_next_policy_index; - /* Find the "other guy" (v4/v6) and update his tun_policy_index too. */ - if (atp->tun_stats != NULL) { - if (atp->tun_stats->ts_atp == atp) { - other_tun = atp->tun_kstat_next; - ASSERT(other_tun == NULL || - other_tun->tun_kstat_next == NULL); - } else { - other_tun = atp->tun_stats->ts_atp; - ASSERT(other_tun != NULL); - ASSERT(other_tun->tun_kstat_next == atp); - } - if (other_tun != NULL) - other_tun->tun_policy_index = atp->tun_policy_index; - } - - if (old_policy) { - /* Recover policy in in active polhead. */ - ipsec_swap_policy(itp->itp_policy, itp->itp_inactive, ns); - ITPF_SWAP(itp->itp_flags); - atp->tun_extra_offset = TUN_LINK_EXTRA_OFF; - } - - /* Clear policy in inactive polhead. */ - itp->itp_flags &= ~ITPF_IFLAGS; - rw_enter(&itp->itp_inactive->iph_lock, RW_WRITER); - ipsec_polhead_flush(itp->itp_inactive, ns); - rw_exit(&itp->itp_inactive->iph_lock); - -mutex_bail: - mutex_exit(&itp->itp_lock); - -bail: - if (actp != NULL) - ipsec_actvec_free(actp, nact); - mutex_exit(&atp->tun_lock); - return (rc); -} - -/* - * Send an IRE_DB_REQ_TYPE to the lower module to obtain an IRE for the - * tunnel destination. If the tunnel has no destination, then request an - * IRE for the source instead. - */ -static void -tun_send_ire_req(queue_t *q) -{ - tun_t *atp = q->q_ptr; - mblk_t *mp; - ire_t *ire; - uint_t lvers = (atp->tun_flags & TUN_LOWER_MASK); - char addrstr[INET6_ADDRSTRLEN]; - - if ((mp = tun_realloc_mblk(q, NULL, sizeof (ire_t), NULL, B_FALSE)) == - NULL) { - tun0dbg(("tun_send_ire_req: couldn't allocate mblk\n")); - return; - } - mp->b_datap->db_type = IRE_DB_REQ_TYPE; - ire = (ire_t *)mp->b_rptr; - if (lvers == TUN_L_V4) { - ire->ire_ipversion = IPV4_VERSION; - /* - * For tunnels without destinations, we request the source - * ire so that we can account for IPsec policy in our MTU - * calculation. - */ - ire->ire_addr = (atp->tun_flags & TUN_DST) ? - atp->tun_ipha.ipha_dst : atp->tun_ipha.ipha_src; - } else { - ASSERT(lvers == TUN_L_V6 && (atp->tun_flags & TUN_DST)); - ire->ire_ipversion = IPV6_VERSION; - ire->ire_addr_v6 = atp->tun_ip6h.ip6_dst; - } - - tun1dbg(("tun_send_ire_req: requesting ire for %s", - (lvers == TUN_L_V4 ? - inet_ntop(AF_INET, &ire->ire_addr, addrstr, INET6_ADDRSTRLEN) : - inet_ntop(AF_INET6, &ire->ire_addr_v6, addrstr, - INET6_ADDRSTRLEN)))); - - atp->tun_ire_lastreq = lbolt; - putnext(WR(q), mp); -} - -/* - * Given the path MTU to the tunnel destination, calculate tunnel's link - * mtu. For configured tunnels, we update the tunnel's link MTU and notify - * the upper instance of IP of the change so that the IP interface's MTU - * can be updated. If the tunnel is a 6to4 or automatic tunnel, just - * return the effective MTU of the tunnel without updating it. We don't - * update the link MTU of 6to4 or automatic tunnels because they tunnel to - * multiple destinations all with potentially differing path MTU's. - */ -static uint32_t -tun_update_link_mtu(queue_t *q, uint32_t pmtu, boolean_t icmp) -{ - tun_t *atp = (tun_t *)q->q_ptr; - uint32_t newmtu = pmtu; - boolean_t sendsdusize = B_FALSE; - - /* - * If the pmtu provided came from an ICMP error being passed up - * from below, then the pmtu argument has already been adjusted - * by the IPsec overhead and ip header length. For ICMP6, the - * encap limit option's size is also accounted for as part of - * outer_hlen in icmp_ricmp_err_v?_v6(). - */ - if (!icmp && atp->tun_itp != NULL && - (atp->tun_itp->itp_flags & ITPF_P_ACTIVE)) - newmtu -= atp->tun_ipsec_overhead; - - if (atp->tun_flags & TUN_L_V4) { - if (!icmp) - newmtu -= sizeof (ipha_t); - if (newmtu < IP_MIN_MTU) - newmtu = IP_MIN_MTU; - } else { - ASSERT(atp->tun_flags & TUN_L_V6); - if (!icmp) { - newmtu -= sizeof (ip6_t); - if (atp->tun_encap_lim > 0) - newmtu -= IPV6_TUN_ENCAP_OPT_LEN; - } - if (newmtu < IPV6_MIN_MTU) - newmtu = IPV6_MIN_MTU; - } - - if (!(atp->tun_flags & (TUN_6TO4 | TUN_AUTOMATIC))) { - if (newmtu != atp->tun_mtu) { - atp->tun_mtu = newmtu; - sendsdusize = B_TRUE; - } - - if (sendsdusize) - tun_sendsdusize(q); - } - return (newmtu); -} - -/* - * Process TPI messages responses comming up the read side - */ -/* ARGSUSED */ -int -tun_rput_tpi(queue_t *q, mblk_t *mp) -{ - tun_t *atp = (tun_t *)q->q_ptr; - t_uscalar_t prim = *((t_uscalar_t *)mp->b_rptr); - mblk_t *iocmp; - - switch (prim) { - case T_BIND_ACK: - tun1dbg(("tun_rput_tpi: got a T_BIND_ACK\n")); - mutex_enter(&atp->tun_lock); - - /* - * XXX This first assert may fail if this path gets re- - * executed because of tun_recover() being invoked. - */ - ASSERT((atp->tun_flags & TUN_BIND_SENT) != 0); - ASSERT(atp->tun_iocmp != NULL); - /* - * If we have an IRE in mp->b_cont, use it to help compute - * atp->tun_extra_offset, tun_ipsec_overhead, and the link - * MTU of configured tunnels. - */ - if (mp->b_cont != NULL) { - ire_t *ire; - - ire = (ire_t *)mp->b_cont->b_rptr; - /* - * Take advice from lower-layer if it is bigger than - * what we have cached now. We do manage per-tunnel - * policy, but there may be global overhead to account - * for. - */ - atp->tun_ipsec_overhead = max(ire->ire_ipsec_overhead, - atp->tun_ipsec_overhead); - if (atp->tun_flags & TUN_DST) { - atp->tun_extra_offset = - MAX(ire->ire_ll_hdr_length, - TUN_LINK_EXTRA_OFF); - (void) tun_update_link_mtu(q, - ire->ire_max_frag, B_FALSE); - } - } - - /* - * Automatic and 6to4 tunnels only require source to be set - * Configured tunnels require both - */ - if ((atp->tun_flags & TUN_SRC) && - (atp->tun_flags & (TUN_DST | TUN_AUTOMATIC | TUN_6TO4))) { - atp->tun_flags |= TUN_BOUND; - } - - atp->tun_flags &= ~TUN_BIND_SENT; - - iocmp = atp->tun_iocmp; - - /* - * Ack the ioctl - */ - atp->tun_iocmp = NULL; - mutex_exit(&atp->tun_lock); - freemsg(mp); - putnext(q, iocmp); - break; - case T_ERROR_ACK: { - struct T_error_ack *terr = (struct T_error_ack *)mp->b_rptr; - switch (terr->ERROR_prim) { - case T_BIND_REQ: { - struct iftun_req *ta; - mblk_t *mp1; - struct iocblk *iocp; - - mutex_enter(&atp->tun_lock); - atp->tun_flags &= ~(TUN_BOUND | TUN_BIND_SENT); - iocmp = atp->tun_iocmp; - atp->tun_iocmp = NULL; - mutex_exit(&atp->tun_lock); - iocp = (struct iocblk *)(iocmp->b_rptr); - - mp1 = iocmp->b_cont; - if (mp1 != NULL) - mp1 = mp1->b_cont; - if (mp1 != NULL) { - ta = (struct iftun_req *)mp1->b_rptr; - if (ta->ifta_flags & IFTUN_SRC) { - atp->tun_flags &= ~TUN_SRC; - } - if (ta->ifta_flags & IFTUN_DST) { - atp->tun_flags &= ~TUN_DST; - } - } - switch (terr->TLI_error) { - default: - iocp->ioc_error = EINVAL; - break; - case TSYSERR: - iocp->ioc_error = terr->UNIX_error; - break; - case TBADADDR: - iocp->ioc_error = EADDRNOTAVAIL; - break; - } - putnext(q, iocmp); - freemsg(mp); - return (0); - } - default: { - char buf[TUN_WHO_BUF]; - - tun0dbg(("tun_rput_tpi: %s got an unkown TPI Error " \ - "message: %d\n", - tun_who(q, buf), terr->ERROR_prim)); - freemsg(mp); - break; - } - } - break; - } - - case T_OK_ACK: - freemsg(mp); - break; - - /* act like a stream head and eat all up comming tpi messages */ - default: { - char buf[TUN_WHO_BUF]; - - tun0dbg(("tun_rput_tpi: %s got an unkown TPI message: %d\n", - tun_who(q, buf), prim)); - freemsg(mp); - break; - } - } - return (0); -} - -/* - * handle tunnel over IPv6 - */ -static int -tun_rdata_v6(queue_t *q, mblk_t *ipsec_mp, mblk_t *data_mp, tun_t *atp) -{ - ip6_t *outer_ip6h, *ip6h; - ipha_t *inner_iph; - uint8_t *rptr; - size_t hdrlen; - mblk_t *mp1, *nmp, *orig_mp = data_mp; - uint8_t nexthdr; - boolean_t inner_v4; - in6_addr_t v6src; - in6_addr_t v6dst; - char buf[TUN_WHO_BUF]; - char buf1[INET6_ADDRSTRLEN]; - char buf2[INET6_ADDRSTRLEN]; - int pullup_len; - - /* need at least an IPv6 header. */ - ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ip6_t)); - - outer_ip6h = (ip6_t *)data_mp->b_rptr; - - /* Handle ip6i_t case. */ - if (outer_ip6h->ip6_nxt == IPPROTO_RAW) { - /* - * Assume sizeof (ip6i_t) == sizeof(ip6_t), can't - * use ASSERT because of lint warnings. - */ - rptr = (uint8_t *)(outer_ip6h + 1); - data_mp->b_rptr = rptr; - if (rptr == data_mp->b_wptr) { - mp1 = data_mp->b_cont; - freeb(data_mp); - orig_mp = data_mp = mp1; - rptr = data_mp->b_rptr; - if (ipsec_mp != NULL) - ipsec_mp->b_cont = data_mp; - } - ASSERT(data_mp->b_wptr - rptr >= sizeof (ip6_t)); - outer_ip6h = (ip6_t *)rptr; - } - - - hdrlen = ip_hdr_length_v6(data_mp, outer_ip6h); - ASSERT(IPH_HDR_VERSION(outer_ip6h) == IPV6_VERSION); - ASSERT(hdrlen >= sizeof (ip6_t)); - ASSERT(hdrlen <= (data_mp->b_wptr - data_mp->b_rptr)); - - v6src = outer_ip6h->ip6_src; - v6dst = outer_ip6h->ip6_dst; - - /* - * If the Next Header field is not IPPROTO_ENCAP or IPPROTO_IPV6, there - * are IPv6 options present that we need to parse in order to figure - * out whether we have an encapsulated IPv4 or IPv6 packet here. - */ - if (outer_ip6h->ip6_nxt != IPPROTO_ENCAP && - outer_ip6h->ip6_nxt != IPPROTO_IPV6) { - /* Tunnel packet with options!!! */ - ip6_pkt_t ipp; - - ipp.ipp_fields = 0; /* must be initialized */ - (void) ip_find_hdr_v6(data_mp, outer_ip6h, &ipp, NULL); - if (ipp.ipp_dstopts != NULL) { - nexthdr = ipp.ipp_dstopts->ip6d_nxt; - } else if (ipp.ipp_rthdr != NULL) { - nexthdr = ipp.ipp_rthdr->ip6r_nxt; - } else if (ipp.ipp_hopopts != NULL) { - nexthdr = ipp.ipp_hopopts->ip6h_nxt; - } else { - /* Otherwise, pretend it's IP + ESP. */ - cmn_err(CE_WARN, "tun IPv6 headers wrong (%d).\n", - outer_ip6h->ip6_nxt); - nexthdr = outer_ip6h->ip6_nxt; - } - } else { - nexthdr = outer_ip6h->ip6_nxt; - } - inner_v4 = (nexthdr == IPPROTO_ENCAP); - - /* - * NOTE: The "+ 4" is for the upper-layer protocol information - * (ports) so we can enforce policy. - */ - pullup_len = hdrlen + (inner_v4 ? sizeof (ipha_t) : sizeof (ip6_t)) + 4; - if ((data_mp->b_wptr - data_mp->b_rptr) < pullup_len) { - if (!pullupmsg(data_mp, pullup_len)) { - atomic_add_32(&atp->tun_InErrors, 1); - atomic_add_32(&atp->tun_InDiscard, 1); - goto drop; - } - outer_ip6h = (ip6_t *)data_mp->b_rptr; - } - - /* Shave off the outer header(s). */ - data_mp->b_rptr += hdrlen; - - if (inner_v4) { - /* IPv4 in IPv6 */ - inner_iph = (ipha_t *)data_mp->b_rptr; - ASSERT(IPH_HDR_VERSION(inner_iph) == IPV4_VERSION); - ASSERT(IN6_ARE_ADDR_EQUAL(&v6dst, &atp->tun_laddr) && - IN6_ARE_ADDR_EQUAL(&v6src, &atp->tun_faddr)); - if (!ipsec_tun_inbound(ipsec_mp, &data_mp, atp->tun_itp, - inner_iph, NULL, NULL, outer_ip6h, 0, - atp->tun_netstack)) { - data_mp = NULL; - ipsec_mp = NULL; - atomic_add_32(&atp->tun_InErrors, 1); - goto drop; - } - ipsec_mp = NULL; - if (data_mp != orig_mp) { - /* mp has changed, reset appropriate pointers */ - - /* Outer hdrlen is already shaved off */ - ASSERT(data_mp != NULL); - inner_iph = (ipha_t *)data_mp->b_rptr; - } - - /* - * Remember - ipsec_tun_inbound() may return a whole chain - * of packets if there was per-port policy on the ITP and - * we got a fragmented packet. - */ - if (CLASSD(inner_iph->ipha_dst)) { - for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next) - atomic_add_64(&atp->tun_HCInMulticastPkts, 1); - } else { - for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next) - atomic_add_64(&atp->tun_HCInUcastPkts, 1); - } - } else { - /* IPv6 in IPv6 */ - ip6h = (ip6_t *)data_mp->b_rptr; - ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION); - ASSERT(IN6_ARE_ADDR_EQUAL(&v6dst, &atp->tun_laddr)); - - if (!ipsec_tun_inbound(ipsec_mp, &data_mp, atp->tun_itp, NULL, - ip6h, NULL, outer_ip6h, 0, atp->tun_netstack)) { - data_mp = NULL; - ipsec_mp = NULL; - atomic_add_32(&atp->tun_InErrors, 1); - goto drop; - } - ipsec_mp = NULL; - if (data_mp != orig_mp) { - /* mp has changed, reset appropriate pointers */ - /* v6src should still be a valid and relevant ptr */ - ASSERT(data_mp != NULL); - ip6h = (ip6_t *)data_mp->b_rptr; - } - - /* - * Remember - ipsec_tun_inbound() may return a whole chain - * of packets if there was per-port policy on the ITP and - * we got a fragmented packet. - */ - if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { - for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next) - atomic_add_64(&atp->tun_HCInMulticastPkts, 1); - } else { - for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next) - atomic_add_64(&atp->tun_HCInUcastPkts, 1); - } - - if (!IN6_ARE_ADDR_EQUAL(&v6src, &atp->tun_faddr)) { - /* - * Configured Tunnel packet source should match our - * destination - * Lower IP should ensure that this is true - */ - tun0dbg(("tun_rdata_v6: %s src (%s) != tun_faddr " \ - "(%s)\n", tun_who(q, buf), - inet_ntop(AF_INET6, &v6src, buf1, - sizeof (buf1)), - inet_ntop(AF_INET6, &atp->tun_faddr, buf2, - sizeof (buf2)))); - for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next) - atomic_add_32(&atp->tun_InErrors, 1); - goto drop; - } - } - TUN_PUTMSG_CHAIN_STATS(q, data_mp, nmp, &atp->tun_HCInOctets); - return (0); -drop: - if (ipsec_mp != NULL) - freeb(ipsec_mp); - tun_freemsg_chain(data_mp, NULL); - return (0); -} - -/* - * Handle tunnels over IPv4 - * XXX - we don't do any locking here. The worst that - * can happen is we drop the packet or don't record stats quite right - * what's the worst that can happen if the header stuff changes? - */ -static int -tun_rdata_v4(queue_t *q, mblk_t *ipsec_mp, mblk_t *data_mp, tun_t *atp) -{ - ipha_t *iph, *inner_iph; - ip6_t *ip6h; - size_t hdrlen; - mblk_t *mp1, *nmp, *orig_mp = data_mp; - boolean_t inner_v4; - ipaddr_t v4src; - ipaddr_t v4dst; - in6_addr_t v4mapped_src; - in6_addr_t v4mapped_dst; - char buf1[INET6_ADDRSTRLEN]; - char buf2[INET6_ADDRSTRLEN]; - char buf[TUN_WHO_BUF]; - int pullup_len; - tun_stack_t *tuns = atp->tun_netstack->netstack_tun; - - /* need at least an IP header */ - ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ipha_t)); - - iph = (ipha_t *)data_mp->b_rptr; - - hdrlen = IPH_HDR_LENGTH(iph); - /* check IP version number */ - ASSERT(IPH_HDR_VERSION(iph) == IPV4_VERSION); - - ASSERT(hdrlen >= sizeof (ipha_t)); - ASSERT(hdrlen <= (data_mp->b_wptr - data_mp->b_rptr)); - - v4src = iph->ipha_src; - v4dst = iph->ipha_dst; - IN6_IPADDR_TO_V4MAPPED(v4src, &v4mapped_src); - IN6_IPADDR_TO_V4MAPPED(v4dst, &v4mapped_dst); - inner_v4 = (iph->ipha_protocol == IPPROTO_ENCAP); - - /* - * NOTE: The "+ 4" is for the upper-layer protocol headers - * so we can enforce policy. - */ - pullup_len = hdrlen + (inner_v4 ? sizeof (ipha_t) : sizeof (ip6_t)) + 4; - if ((data_mp->b_wptr - data_mp->b_rptr) < pullup_len) { - if (!pullupmsg(data_mp, pullup_len)) { - atomic_add_32(&atp->tun_InErrors, 1); - atomic_add_32(&atp->tun_InDiscard, 1); - if (ipsec_mp != NULL) - freeb(ipsec_mp); - goto drop; - } - iph = (ipha_t *)data_mp->b_rptr; - } - - /* Shave off the IPv4 header. */ - data_mp->b_rptr += hdrlen; - - if (inner_v4) { - /* IPv4 in IPv4 */ - inner_iph = (ipha_t *)data_mp->b_rptr; - ASSERT(IPH_HDR_VERSION(inner_iph) == IPV4_VERSION); - ASSERT(IN6_ARE_ADDR_EQUAL(&v4mapped_dst, &atp->tun_laddr) && - IN6_ARE_ADDR_EQUAL(&v4mapped_src, &atp->tun_faddr)); - - /* NOTE: ipsec_tun_inbound() always frees ipsec_mp. */ - if (!ipsec_tun_inbound(ipsec_mp, &data_mp, atp->tun_itp, - inner_iph, NULL, iph, NULL, 0, atp->tun_netstack)) { - data_mp = NULL; - atomic_add_32(&atp->tun_InErrors, 1); - goto drop; - } - if (data_mp != orig_mp) { - /* mp has changed, reset appropriate pointers */ - - /* Outer hdrlen is already shaved off */ - ASSERT(data_mp != NULL); - inner_iph = (ipha_t *)data_mp->b_rptr; - } - - /* - * Remember - ipsec_tun_inbound() may return a whole chain - * of packets if there was per-port policy on the ITP and - * we got a fragmented packet. - */ - if (CLASSD(inner_iph->ipha_dst)) { - for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next) - atomic_add_64(&atp->tun_HCInMulticastPkts, 1); - } else { - for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next) - atomic_add_64(&atp->tun_HCInUcastPkts, 1); - } - - } else { - /* IPv6 in IPv4 */ - ip6h = (ip6_t *)data_mp->b_rptr; - ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION); - - /* NOTE: ipsec_tun_inbound() always frees ipsec_mp. */ - if (!ipsec_tun_inbound(ipsec_mp, &data_mp, atp->tun_itp, NULL, - ip6h, iph, NULL, 0, atp->tun_netstack)) { - data_mp = NULL; - atomic_add_32(&atp->tun_InErrors, 1); - goto drop; - } - if (data_mp != orig_mp) { - /* mp has changed, reset appropriate pointers */ - - /* - * v6src and v4dst should still be - * valid and relevant pointers - */ - ASSERT(data_mp != NULL); - ip6h = (ip6_t *)data_mp->b_rptr; - } - - /* - * Remember - ipsec_tun_inbound() may return a whole chain - * of packets if there was per-port policy on the ITP and - * we got a fragmented packet. - */ - ASSERT(IN6_ARE_ADDR_EQUAL(&v4mapped_dst, &atp->tun_laddr)); - if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { - for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next) - atomic_add_64(&atp->tun_HCInMulticastPkts, 1); - } else { - for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next) - atomic_add_64(&atp->tun_HCInUcastPkts, 1); - } - - /* Is this an automatic tunnel ? */ - if ((atp->tun_flags & TUN_AUTOMATIC) != 0) { - dl_unitdata_ind_t *dludindp; - - /* - * make sure IPv4 destination makes sense - */ - if (v4dst == INADDR_ANY || CLASSD(v4dst)) { - tun0dbg(("tun_rdata_v4: %s tun: invalid IPv4" \ - " dest (%s)\n", - tun_who(q, buf), - inet_ntop(AF_INET, &v4dst, - buf1, sizeof (buf1)))); - for (nmp = data_mp; nmp != NULL; - nmp = nmp->b_next) { - atomic_add_32(&atp->tun_InErrors, 1); - } - goto drop; - } - - /* - * send packet up as DL_UNITDATA_IND so that it won't - * be forwarded - */ - - mp1 = allocb(sizeof (dl_unitdata_ind_t), BPRI_HI); - if (mp1 == NULL) { - tun0dbg(("tun_rdata_v4: allocb failed\n")); - atomic_add_32(&atp->tun_InDiscard, 1); - atomic_add_32(&atp->tun_allocbfail, 1); - goto drop; - } - mp1->b_cont = data_mp; - data_mp = mp1; - /* - * create dl_unitdata_ind with group address set so - * we don't forward - */ - data_mp->b_wptr = data_mp->b_rptr + - sizeof (dl_unitdata_ind_t); - data_mp->b_datap->db_type = M_PROTO; - dludindp = (dl_unitdata_ind_t *)data_mp->b_rptr; - dludindp->dl_primitive = DL_UNITDATA_IND; - dludindp->dl_dest_addr_length = 0; - dludindp->dl_dest_addr_offset = 0; - dludindp->dl_src_addr_length = 0; - dludindp->dl_src_addr_offset = 0; - dludindp->dl_group_address = 1; - - /* Is this a 6to4 tunnel ? */ - } else if ((atp->tun_flags & TUN_6TO4) != 0) { - struct in_addr v4addr; - - /* - * Make sure IPv6 destination is a 6to4 address. - * ip_rput_data_v6 will ensure that 6to4 prefix - * of IPv6 destination and the prefix assigned to - * the interface, on which this packet was received, - * match. - */ - if (!IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { - tun0dbg(("tun_rdata_v4: %s tun: invalid " \ - "IPv6 dest (%s)\n", tun_who(q, buf), - inet_ntop(AF_INET6, &ip6h->ip6_dst, buf1, - sizeof (buf1)))); - atomic_add_32(&atp->tun_InErrors, 1); - goto drop; - } - - /* - * make sure IPv4 destination makes sense - */ - if (v4dst == INADDR_ANY || CLASSD(v4dst)) { - tun0dbg(("tun_rdata_v4: %s tun: invalid " \ - "IPv4 dest (%s)\n", tun_who(q, buf), - inet_ntop(AF_INET, &v4dst, buf1, - sizeof (buf1)))); - for (nmp = data_mp; nmp != NULL; - nmp = nmp->b_next) { - atomic_add_32(&atp->tun_InErrors, 1); - } - goto drop; - } - - /* - * 6to4 router security considerations state that - * the V4ADDR portion of the IPv6 destination - * MUST be equal to the IPv4 destination. - */ - IN6_6TO4_TO_V4ADDR(&ip6h->ip6_dst, &v4addr); - if ((ipaddr_t)v4addr.s_addr != v4dst) { - tun0dbg(("tun_rdata_v4: %s tun: V4ADDR " \ - "portion of 6to4 IPv6 dest (%s) does not" \ - " equal IPv4 dest (%s)\n", tun_who(q, buf), - inet_ntop(AF_INET, &v4addr, - buf1, sizeof (buf1)), - inet_ntop(AF_INET, &v4dst, - buf2, sizeof (buf2)))); - for (nmp = data_mp; nmp != NULL; - nmp = nmp->b_next) { - atomic_add_32(&atp->tun_InErrors, 1); - } - goto drop; - } - - /* - * check to see if the source was another 6to4 router - */ - if (IN6_IS_ADDR_6TO4(&ip6h->ip6_src)) { - /* - * 6to4 router security considerations state - * that the V4ADDR portion of the IPv6 source - * MUST be equal to the IPv4 source, when - * the source machine is another 6to4 router - */ - IN6_6TO4_TO_V4ADDR(&ip6h->ip6_src, &v4addr); - if ((ipaddr_t)v4addr.s_addr != v4src) { - tun0dbg(("tun_rdata_v4: %s tun: " \ - "V4ADDR portion of 6to4 IPv6 src" \ - " (%s) does not equal IPv4 src " \ - "(%s)\n", - tun_who(q, buf), - inet_ntop(AF_INET, &v4addr, - buf1, sizeof (buf1)), - inet_ntop(AF_INET, &v4src, - buf2, sizeof (buf2)))); - for (nmp = data_mp; nmp != NULL; - nmp = nmp->b_next) { - atomic_add_32( - &atp->tun_InErrors, 1); - } - goto drop; - } - - /* - * IPv6 source is, possibly, a "Native" - * (ie non-6to4) IPv6 host. IPv4 source is, - * possibly, a 6to4 Relay Router. - */ - } else { - /* - * Check if tun module support 6to4 Relay - * Router is disabled or enabled. - * tuns_relay_rtr_addr_v4 will equal INADDR_ANY - * if support is disabled. Otherwise, it will - * equal a valid, routable, IPv4 address; - * denoting that the packet will be accepted. - * - * There is no standard trust mechanism for - * 6to4 Relay Routers, thus communication - * support is disabled by default for - * security reasons. - */ - if (tuns->tuns_relay_rtr_addr_v4 == - INADDR_ANY) { - tun1dbg(("tun_rdata_v4: " - "%s tuns_relay_rtr_addr_v4 = %s, " - "dropping packet from IPv4 src " - "%s\n", tun_who(q, buf), - inet_ntop(AF_INET, - &tuns->tuns_relay_rtr_addr_v4, - buf1, sizeof (buf1)), - inet_ntop(AF_INET, &v4src, buf2, - sizeof (buf2)))); - for (nmp = data_mp; nmp != NULL; - nmp = nmp->b_next) { - atomic_add_32( - &atp->tun_InErrors, 1); - } - goto drop; - } - } - - /* - * this might happen if we are in the middle of - * re-binding - */ - } else if (!IN6_ARE_ADDR_EQUAL(&v4mapped_src, - &atp->tun_faddr)) { - - /* - * Configured Tunnel packet source should match our - * destination - * Lower IP should ensure that this is true - */ - tun0dbg(("tun_rdata_v4: %s src (%s) != tun_faddr " \ - "(%s)\n", tun_who(q, buf), - inet_ntop(AF_INET6, &v4mapped_src, - buf1, sizeof (buf1)), - inet_ntop(AF_INET6, &atp->tun_faddr, - buf2, sizeof (buf2)))); - /* XXX - should this be per-frag? */ - for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next) - atomic_add_32(&atp->tun_InErrors, 1); - goto drop; - } - } - TUN_PUTMSG_CHAIN_STATS(q, data_mp, nmp, &atp->tun_HCInOctets); - return (0); -drop: - tun_freemsg_chain(data_mp, NULL); - return (0); -} - -static void -tun_rput_icmp_err_v6(queue_t *q, mblk_t *mp, mblk_t *ipsec_mp) -{ - tun_t *atp = (tun_t *)q->q_ptr; - ip6_t *ip6; - icmp6_t *icmph; - int hdr_length; - - ip6 = (ip6_t *)mp->b_rptr; - hdr_length = ip_hdr_length_v6(mp, ip6); - icmph = (icmp6_t *)(&mp->b_rptr[hdr_length]); - - switch (atp->tun_flags & TUN_UPPER_MASK) { - case TUN_U_V6: - icmp_ricmp_err_v6_v6(q, mp, ipsec_mp, icmph); - break; - case TUN_U_V4: - icmp_ricmp_err_v4_v6(q, mp, ipsec_mp, icmph); - break; - default: - atomic_add_32(&atp->tun_InErrors, 1); - ASSERT(0); - if (ipsec_mp != NULL) - freeb(ipsec_mp); - freemsg(mp); - } -} - -/* - * icmp from lower IPv4 - * Process ICMP messages from IPv4. Pass them to the appropriate - * lower processing function. - */ -static void -tun_rput_icmp_err_v4(queue_t *q, mblk_t *mp, mblk_t *ipsec_mp) -{ - tun_t *atp = (tun_t *)q->q_ptr; - - switch (atp->tun_flags & TUN_UPPER_MASK) { - case TUN_U_V6: - icmp_ricmp_err_v6_v4(q, mp, ipsec_mp); - break; - case TUN_U_V4: - icmp_ricmp_err_v4_v4(q, mp, ipsec_mp); - break; - default: - atomic_add_32(&atp->tun_InErrors, 1); - ASSERT(0); - if (ipsec_mp != NULL) - freeb(ipsec_mp); - freemsg(mp); - } -} - -/* - * Process ICMP message from IPv4 encapsulating an IPv4 packet. - * If this message contains path mtu information, cut out the - * encapsulation from the icmp data. If there is still useful - * information in the icmp data pass it upstream (packaged correctly for - * the upper layer IP) - */ -static void -icmp_ricmp_err_v4_v4(queue_t *q, mblk_t *mp, mblk_t *ipsec_mp) -{ - tun_t *atp = (tun_t *)q->q_ptr; - ipha_t *outer_ipha, *inner_ipha; - int outer_hlen; - int inner_hlen; - int hlen; - icmph_t icmp; - uint8_t type; - uint8_t code; - char buf1[INET_ADDRSTRLEN]; - char buf2[INET_ADDRSTRLEN]; - icmph_t *icmph; - mblk_t *orig_mp = mp; - - /* - * The packet looks like this : - * - * [IPv4(0)][ICMPv4][IPv4(1)][IPv4(2)][ULP] - * - * We want most of this in one piece. But if the ULP is ICMP, we - * need to see whether it is an ICMP error or not. We should not - * send icmp errors in response to icmp errors. "outer_ipha" points - * to IP header (1), "inner_ipha" points to IP header (2). Inbound - * policy lookups for ICMP need to reverse the src/dst of things. - * Fortunately, ipsec_tun_inbound() can determine if this is an ICMP - * message or not. - * - * The caller already pulled up the entire message, or should have! - */ - ASSERT(mp->b_cont == NULL); - - hlen = IPH_HDR_LENGTH((ipha_t *)mp->b_rptr); - icmph = (icmph_t *)(&mp->b_rptr[hlen]); - outer_ipha = (ipha_t *)&icmph[1]; - outer_hlen = IPH_HDR_LENGTH(outer_ipha); - inner_ipha = (ipha_t *)((uint8_t *)outer_ipha + outer_hlen); - - if (((uchar_t *)inner_ipha + sizeof (ipha_t)) > mp->b_wptr) { - atomic_add_32(&atp->tun_InDiscard, 1); - if (ipsec_mp != NULL) - freeb(ipsec_mp); - freemsg(mp); - return; - } - if (inner_ipha->ipha_protocol == IPPROTO_ICMP) { - icmph_t *inner_icmph; - - inner_hlen = IPH_HDR_LENGTH(inner_ipha); - inner_icmph = (icmph_t *)((uchar_t *)inner_ipha + inner_hlen); - - if (((uchar_t *)inner_icmph + sizeof (icmph_t)) > mp->b_wptr) { - atomic_add_32(&atp->tun_InDiscard, 1); - if (ipsec_mp != NULL) - freeb(ipsec_mp); - freemsg(mp); - return; - } - - switch (inner_icmph->icmph_type) { - case ICMP_DEST_UNREACHABLE: - case ICMP_SOURCE_QUENCH: - case ICMP_TIME_EXCEEDED: - case ICMP_PARAM_PROBLEM: - case ICMP_REDIRECT: - atomic_add_32(&atp->tun_InDiscard, 1); - freemsg(mp); - if (ipsec_mp != NULL) - freeb(ipsec_mp); - return; - default : - break; - } - } - - type = icmph->icmph_type; - code = icmph->icmph_code; - - /* - * NOTE: icmp_inbound() in IP already checked global policy on the - * outermost header. If we got here, IP thought it was okay for - * us to receive it. We now have to use inner policy to see if - * we want to percolate it up (like conn_t's are checked). - * - * Use -outer_hlen to indicate this is an ICMP packet. And - * ipsec_tun_inbound() always frees ipsec_mp. - */ - if (!ipsec_tun_inbound(ipsec_mp, &mp, atp->tun_itp, inner_ipha, NULL, - outer_ipha, NULL, -outer_hlen, atp->tun_netstack)) { - /* Callee did all of the freeing */ - return; - } - ASSERT(mp == orig_mp); - - /* New packet will contain all of old packet */ - - mp->b_rptr = (uchar_t *)inner_ipha; - - switch (type) { - case ICMP_DEST_UNREACHABLE: - switch (code) { - case ICMP_FRAGMENTATION_NEEDED: { - uint16_t mtu; - - mtu = ntohs(icmph->icmph_du_mtu); - if (icmph->icmph_du_zero != 0 && mtu <= IP_MIN_MTU) { - tun0dbg(("icmp_ricmp_err_v4_v4: invalid " \ - "icmp mtu\n")); - atomic_add_32(&atp->tun_InErrors, 1); - freemsg(mp); - return; - } - if (outer_hlen < mtu) - mtu -= outer_hlen; - mutex_enter(&atp->tun_lock); - mtu = tun_update_link_mtu(q, mtu, B_TRUE); - mutex_exit(&atp->tun_lock); - if (!tun_icmp_too_big_v4(q, inner_ipha, mtu, mp)) { - atomic_add_32(&atp->tun_InDiscard, 1); - atomic_add_32(&atp->tun_allocbfail, 1); - } - return; - } - case ICMP_PROTOCOL_UNREACHABLE: - /* - * XXX may need way to throttle messages - * XXX should we do this for automatic or - * just configured tunnels ? - */ - (void) strlog(q->q_qinfo->qi_minfo->mi_idnum, - atp->tun_ppa, 1, - SL_ERROR | SL_WARN, - "%s.%s%d: Protocol unreachable. " - "Misconfigured tunnel? source %s" - " destination %s\n", - (atp->tun_flags & TUN_LOWER_MASK) == - TUN_L_V4 ? "ip" : "ip6", - TUN_NAME, atp->tun_ppa, - inet_ntop(AF_INET, &outer_ipha->ipha_dst, - buf1, sizeof (buf1)), - inet_ntop(AF_INET, &outer_ipha->ipha_src, - buf2, sizeof (buf2))); - /* FALLTHRU */ - case ICMP_NET_UNREACHABLE: - case ICMP_HOST_UNREACHABLE: - case ICMP_DEST_NET_UNKNOWN: - case ICMP_DEST_HOST_UNKNOWN: - case ICMP_SRC_HOST_ISOLATED: - case ICMP_SOURCE_ROUTE_FAILED: - case ICMP_DEST_NET_UNREACH_TOS: - case ICMP_DEST_HOST_UNREACH_TOS: - icmp.icmph_type = ICMP_DEST_UNREACHABLE; - /* XXX HOST or NET unreachable? */ - icmp.icmph_code = ICMP_NET_UNREACHABLE; - icmp.icmph_rd_gateway = (ipaddr_t)0; - break; - case ICMP_DEST_NET_UNREACH_ADMIN: - case ICMP_DEST_HOST_UNREACH_ADMIN: - icmp.icmph_type = ICMP_DEST_UNREACHABLE; - icmp.icmph_code = ICMP_DEST_NET_UNREACH_ADMIN; - icmp.icmph_rd_gateway = (ipaddr_t)0; - break; - default: - atomic_add_32(&atp->tun_InErrors, 1); - freemsg(mp); - return; - } - break; - case ICMP_TIME_EXCEEDED: - icmp.icmph_type = ICMP_TIME_EXCEEDED; - icmp.icmph_code = code; - icmp.icmph_rd_gateway = (ipaddr_t)0; - break; - case ICMP_PARAM_PROBLEM: - icmp.icmph_type = ICMP_PARAM_PROBLEM; - if (icmph->icmph_pp_ptr < (uchar_t *)inner_ipha - mp->b_rptr) { - tun0dbg(("icmp_ricmp_err_v4_v4: ICMP_PARAM_PROBLEM " \ - "too short\n")); - atomic_add_32(&atp->tun_InErrors, 1); - freemsg(mp); - return; - } - icmp.icmph_pp_ptr = htonl(icmph->icmph_pp_ptr - - ((uchar_t *)inner_ipha - mp->b_rptr) + sizeof (ipha_t) + - sizeof (icmph_t)); - break; - default: - atomic_add_32(&atp->tun_InErrors, 1); - freemsg(mp); - return; - } - if (!tun_icmp_message_v4(q, inner_ipha, &icmp, mp)) { - atomic_add_32(&atp->tun_InDiscard, 1); - atomic_add_32(&atp->tun_allocbfail, 1); - } -} - -/* - * Process ICMP message from IPv6 encapsulating an IPv4 packet - * If this message contains path mtu information, cut out the - * encapsulation from the icmp data. If there is still useful - * information in the icmp data pass it upstream (packaged correctly for - * the upper layer IP) - */ -static void -icmp_ricmp_err_v4_v6(queue_t *q, mblk_t *mp, mblk_t *ipsec_mp, icmp6_t *icmph) -{ - tun_t *atp = (tun_t *)q->q_ptr; - ip6_t *ip6; - ipha_t *ipha; - int outer_hlen; - icmph_t icmp; - uint8_t type; - size_t offset, newoffset; - uint8_t *hdrp; - ip6_dest_t *destp; - size_t optlen, length; - struct ip6_opt *optp; - boolean_t found = B_FALSE; - ip6_pkt_t pkt; - mblk_t *orig_mp = mp; - - ip6 = (ip6_t *)&(icmph[1]); - - /* - * The packet looks like this: - * - * [IPv6(0)][ICMPv6][IPv6(1)][IPv4][ULP] - * - * "ip6" points to the IPv6 header labelled (1). - */ - outer_hlen = ip_hdr_length_v6(mp, ip6); - ipha = (ipha_t *)((uint8_t *)ip6 + outer_hlen); - type = icmph->icmp6_type; - - /* - * NOTE: icmp_inbound() in IP already checked global policy on the - * outermost header. If we got here, IP thought it was okay for - * us to receive it. We now have to use inner policy to see if - * we want to percolate it up (like conn_t's are checked). - * - * Use -outer_hlen to indicate this is an ICMP packet. And - * ipsec_tun_inbound() always frees ipsec_mp. - */ - if (!ipsec_tun_inbound(ipsec_mp, &mp, atp->tun_itp, ipha, NULL, NULL, - ip6, -outer_hlen, atp->tun_netstack)) - /* Callee did all of the freeing */ - return; - ASSERT(mp == orig_mp); - - /* new packet will contain all of old packet */ - - mp->b_rptr = (uchar_t *)ipha; - - /* - * Fill in "icmp" data structure for passing to tun_icmp_message_v4(). - */ - switch (type) { - case ICMP6_PARAM_PROB: - /* - * If the ICMPv6 error points to a valid Tunnel - * Encapsulation Limit option and the limit value is - * 0, then fall through and send a host unreachable - * message. Otherwise, break. - */ - hdrp = (uint8_t *)&ip6[1]; - pkt.ipp_fields = 0; /* must be initialized */ - (void) ip_find_hdr_v6(mp, ip6, &pkt, NULL); - if ((pkt.ipp_fields & IPPF_DSTOPTS) != 0) { - destp = pkt.ipp_dstopts; - } else if ((pkt.ipp_fields & IPPF_RTDSTOPTS) != 0) { - destp = pkt.ipp_rtdstopts; - } else { - break; /* out of switch */ - } - - offset = sizeof (ip6_t) + ((uint8_t *)destp - hdrp); - newoffset = offset + 8 * (destp->ip6d_len + 1); - hdrp = (uint8_t *)destp; - if ((offset <= icmph->icmp6_pptr) && - (icmph->icmp6_pptr < newoffset)) { - - /* - * We have a potential match. Parse the header into - * options. - */ - length = (newoffset - offset) - 2; - optp = (struct ip6_opt *)(destp + 1); - offset += 2; - hdrp += 2; - while (length > 0 && found != B_TRUE) { - /* - * hdrp[2] is the tunnel encapsulation limit - * value. - */ - if ((optp->ip6o_type == IP6OPT_TUNNEL_LIMIT) && - ((offset + 2) == icmph->icmp6_pptr) && - (hdrp[2] == 0)) { - /* Found it. */ - found = B_TRUE; - } - optlen = optp->ip6o_len + 2; - length -= optlen; - hdrp += optlen; - offset += optlen; - } - } - - if (found != B_TRUE) { - freemsg(mp); - return; - } - /*FALLTHRU*/ - case ICMP6_TIME_EXCEEDED: - case ICMP6_DST_UNREACH: - icmp.icmph_type = ICMP_DEST_UNREACHABLE; - icmp.icmph_code = ICMP_HOST_UNREACHABLE; - break; - case ICMP6_PACKET_TOO_BIG: { - uint32_t mtu; - - mtu = ntohl(icmph->icmp6_mtu); - if (outer_hlen < mtu) - mtu -= outer_hlen; - mutex_enter(&atp->tun_lock); - mtu = tun_update_link_mtu(q, mtu, B_TRUE); - mutex_exit(&atp->tun_lock); - /* - * RFC 2473 says we should only forward this on to the IPv4 - * original source if the IPv4 header has the DF bit set. - */ - if (ipha->ipha_fragment_offset_and_flags & IPH_DF) { - icmp.icmph_type = ICMP_DEST_UNREACHABLE; - icmp.icmph_code = ICMP_FRAGMENTATION_NEEDED; - /* - * NOTE - htons() because ICMP (for IPv4) uses a - * uint16_t here. - */ - icmp.icmph_du_mtu = htons(mtu); - icmp.icmph_du_zero = 0; - } - break; - } - default: - freemsg(mp); - return; - } - - if (!tun_icmp_message_v4(q, ipha, &icmp, mp)) { - atomic_add_32(&atp->tun_InDiscard, 1); - atomic_add_32(&atp->tun_allocbfail, 1); - } -} - -/* - * Process ICMP message from IPv6 encapsulating an IPv6 packet - * If this message contains path mtu information, cut out the - * encapsulation from the icmp data. If there is still useful - * information in the icmp data pass it upstream (packaged correctly for - * the upper layer IP). Otherwise, drop the message. - */ -static void -icmp_ricmp_err_v6_v6(queue_t *q, mblk_t *mp, mblk_t *ipsec_mp, icmp6_t *icmph) -{ - ip6_t *ip6; - ip6_t *inner_ip6; - int outer_hlen; - tun_t *atp = (tun_t *)q->q_ptr; - icmp6_t icmp; - uint8_t type; - size_t offset, newoffset; - uint8_t *hdrp; - ip6_dest_t *destp; - size_t optlen, length; - struct ip6_opt *optp; - boolean_t found = B_FALSE; - ip6_pkt_t pkt; - mblk_t *orig_mp = mp; - - /* - * The packet looks like this : - * - * [IPv6(0)][ICMPv4][IPv6(1)][IPv6(2)][ULP] - * - * "ip6" points to the IPv6 header labelled (1), and inner_ip6 points - * to IPv6 header (2). - */ - ip6 = (ip6_t *)&icmph[1]; - outer_hlen = ip_hdr_length_v6(mp, ip6); - inner_ip6 = (ip6_t *)((uint8_t *)ip6 + outer_hlen); - type = icmph->icmp6_type; - - /* - * NOTE: icmp_inbound() in IP already checked global policy on the - * outermost header. If we got here, IP thought it was okay for - * us to receive it. We now have to use inner policy to see if - * we want to percolate it up (like conn_t's are checked). - * - * Use -outer_hlen to indicate this is an ICMP packet. And - * ipsec_tun_inbound() always frees ipsec_mp. - */ - if (!ipsec_tun_inbound(ipsec_mp, &mp, atp->tun_itp, NULL, inner_ip6, - NULL, ip6, -outer_hlen, atp->tun_netstack)) - /* Callee did all of the freeing */ - return; - ASSERT(mp == orig_mp); - - /* new packet will contain all of old packet */ - - mp->b_rptr = (uchar_t *)inner_ip6; - - /* - * Fill in "icmp" data structure for passing to tun_icmp_message_v6(). - */ - switch (type) { - case ICMP6_PARAM_PROB: - /* - * If the ICMPv6 error points to a valid Tunnel - * Encapsulation Limit option and the limit value is - * 0, then fall through and send a host unreachable - * message. Otherwise, break. - */ - hdrp = (uint8_t *)&ip6[1]; - pkt.ipp_fields = 0; /* must be initialized */ - (void) ip_find_hdr_v6(mp, ip6, &pkt, NULL); - if ((pkt.ipp_fields & IPPF_DSTOPTS) != 0) { - destp = pkt.ipp_dstopts; - } else if ((pkt.ipp_fields & IPPF_RTDSTOPTS) != 0) { - destp = pkt.ipp_rtdstopts; - } else { - break; /* out of switch */ - } - - offset = sizeof (ip6_t) + ((uint8_t *)destp - hdrp); - newoffset = offset + 8 * (destp->ip6d_len + 1); - hdrp = (uint8_t *)destp; - if ((offset <= icmph->icmp6_pptr) && - (icmph->icmp6_pptr < newoffset)) { - - /* - * We have a potential match. Parse the header into - * options. - */ - length = (newoffset - offset) - 2; - optp = (struct ip6_opt *)(destp + 1); - offset += 2; - hdrp += 2; - while (length > 0 && found != B_TRUE) { - /* - * hdrp[2] is the tunnel encapsulation limit - * value. - */ - if ((optp->ip6o_type == IP6OPT_TUNNEL_LIMIT) && - ((offset + 2) == icmph->icmp6_pptr) && - (hdrp[2] == 0)) { - /* Found it. */ - found = B_TRUE; - } - optlen = optp->ip6o_len + 2; - length -= optlen; - hdrp += optlen; - offset += optlen; - } - } - - if (found != B_TRUE) { - freemsg(mp); - return; /* case */ - } - /*FALLTHRU*/ - case ICMP6_TIME_EXCEEDED: - case ICMP6_DST_UNREACH: - icmp.icmp6_type = ICMP6_DST_UNREACH; - icmp.icmp6_code = ICMP6_DST_UNREACH_ADDR; - break; - case ICMP6_PACKET_TOO_BIG: { - uint32_t mtu; - - mtu = ntohl(icmph->icmp6_mtu); - if (outer_hlen < mtu) - mtu -= outer_hlen; - mutex_enter(&atp->tun_lock); - mtu = tun_update_link_mtu(q, mtu, B_TRUE); - mutex_exit(&atp->tun_lock); - /* - * RFC 2473 says we should forward this on to the IPv6 original - * source only if the original packet size is larger than the - * IPv6 minimum link MTU. - */ - if (ip_hdr_length_v6(mp, inner_ip6) > IPV6_MIN_MTU) { - icmp.icmp6_type = ICMP6_PACKET_TOO_BIG; - icmp.icmp6_code = 0; - icmp.icmp6_mtu = htonl(mtu); - } - break; - } - default: - freemsg(mp); - return; - } - - if (tun_icmp_message_v6(q, inner_ip6, &icmp, IPV6_DEFAULT_HOPS, mp) == - B_FALSE) { - atomic_add_32(&atp->tun_InDiscard, 1); - atomic_add_32(&atp->tun_allocbfail, 1); - } -} - -/* - * Process ICMP message from IPv4 encapsulating an IPv6 packet - * If this message contains path mtu information, cut out the - * encapsulation from the icmp data. If there is still useful - * information in the icmp data pass it upstream (packaged correctly for - * the upper layer IP) - */ -static void -icmp_ricmp_err_v6_v4(queue_t *q, mblk_t *mp, mblk_t *ipsec_mp) -{ - tun_t *atp = (tun_t *)q->q_ptr; - ip6_t *ip6h; - ipha_t *outer_ipha; - int outer_hlen; - int hlen; - icmp6_t icmp6; - uint8_t type; - uint8_t code; - uint8_t hoplim; - char buf1[INET_ADDRSTRLEN]; - char buf2[INET_ADDRSTRLEN]; - icmph_t *icmph; - uint16_t ip6_hdr_length; - uint8_t *nexthdrp; - mblk_t *orig_mp = mp; - - /* - * The case here is pretty easy when compared to IPv4 in IPv4 - * encapsulation. - * - * The packet looks like this : - * - * [IPv4(0)][ICMPv4][IPv4(1)][IPv6][ULP] - * - * We want most of this in one piece. But if the ULP is ICMPv6, we - * need to see whether it is an ICMPv6 error or not. We should not - * send icmp errors in response to icmp errors. "outer_ipha" points to - * IP header (1). "ip6h" is obvious. To see whether ULP is ICMPv6 or - * not, we need to call ip_hdr_length_nexthdr_v6 function which - * expects everything to be pulled up. Fortunately, the caller - * should've done all of the pulling up. - */ - ASSERT(mp->b_cont == NULL); - - /* - * icmp_inbound has pulled up the message until the - * outer IP header excluding any IP options. - */ - hlen = IPH_HDR_LENGTH((ipha_t *)mp->b_rptr); - icmph = (icmph_t *)(&mp->b_rptr[hlen]); - outer_ipha = (ipha_t *)&icmph[1]; - outer_hlen = IPH_HDR_LENGTH(outer_ipha); - ip6h = (ip6_t *)((uint8_t *)outer_ipha + outer_hlen); - - if (((uchar_t *)ip6h + sizeof (ip6_t)) > mp->b_wptr) { - atomic_add_32(&atp->tun_InDiscard, 1); - if (ipsec_mp != NULL) - freeb(ipsec_mp); - freemsg(mp); - return; - } - - /* - * Do not send ICMPv6 error in reply to ICMPv6 error. - */ - if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &ip6_hdr_length, &nexthdrp)) { - atomic_add_32(&atp->tun_InErrors, 1); - if (ipsec_mp != NULL) - freeb(ipsec_mp); - freemsg(mp); - return; - } - if (*nexthdrp == IPPROTO_ICMPV6) { - icmp6_t *inner_icmp6; - - ip6_hdr_length += (hlen + sizeof (icmph_t) + outer_hlen); - inner_icmp6 = (icmp6_t *)(&mp->b_rptr[ip6_hdr_length]); - - if ((mp->b_wptr < ((uchar_t *)inner_icmp6 + ICMP6_MINLEN)) || - (ICMP6_IS_ERROR(inner_icmp6->icmp6_type)) || - inner_icmp6->icmp6_type == ND_REDIRECT) { - atomic_add_32(&atp->tun_InErrors, 1); - if (ipsec_mp != NULL) - freeb(ipsec_mp); - freemsg(mp); - return; - } - } - - type = icmph->icmph_type; - code = icmph->icmph_code; - hoplim = outer_ipha->ipha_ttl; - - /* - * NOTE: icmp_inbound() in IP already checked global policy on the - * outermost header. If we got here, IP thought it was okay for - * us to receive it. We now have to use inner policy to see if - * we want to percolate it up (like conn_t's are checked). - * - * Use -outer_hlen to indicate this is an ICMP packet. And - * ipsec_tun_inbound() always frees ipsec_mp. - */ - if (!ipsec_tun_inbound(ipsec_mp, &mp, atp->tun_itp, NULL, ip6h, - outer_ipha, NULL, -outer_hlen, atp->tun_netstack)) - /* Callee did all of the freeing */ - return; - ASSERT(mp == orig_mp); - - /* New packet will contain all of old packet */ - - mp->b_rptr = (uchar_t *)ip6h; - - switch (type) { - case ICMP_DEST_UNREACHABLE: - switch (code) { - case ICMP_FRAGMENTATION_NEEDED: { - uint16_t mtu; - - mtu = ntohs(icmph->icmph_du_mtu); - if (icmph->icmph_du_zero != 0 && mtu <= IP_MIN_MTU) { - tun0dbg(("icmp_ricmp_err_v6_v4: invalid " \ - "icmp mtu\n")); - atomic_add_32(&atp->tun_InErrors, 1); - freemsg(mp); - return; - } - if (outer_hlen < mtu) - mtu -= outer_hlen; - mutex_enter(&atp->tun_lock); - mtu = tun_update_link_mtu(q, mtu, B_TRUE); - mutex_exit(&atp->tun_lock); - if (!tun_icmp_too_big_v6(q, ip6h, mtu, hoplim, mp)) { - atomic_add_32(&atp->tun_InDiscard, 1); - atomic_add_32(&atp->tun_allocbfail, 1); - } - return; - } - case ICMP_PROTOCOL_UNREACHABLE: { - /* - * XXX may need way to throttle messages - * XXX should we do this for automatic or - * just configured tunnels ? - */ - (void) strlog(q->q_qinfo->qi_minfo->mi_idnum, - atp->tun_ppa, 1, - SL_ERROR | SL_WARN, - "%s.%s%d: Protocol unreachable. " - "Misconfigured tunnel? source %s" - " destination %s\n", - (atp->tun_flags & TUN_LOWER_MASK) == - TUN_L_V4 ? "ip" : "ip6", - TUN_NAME, atp->tun_ppa, - inet_ntop(AF_INET, &outer_ipha->ipha_dst, - buf1, sizeof (buf1)), - inet_ntop(AF_INET, &outer_ipha->ipha_src, - buf2, sizeof (buf2))); - icmp6.icmp6_type = ICMP6_DST_UNREACH; - icmp6.icmp6_code = ICMP6_DST_UNREACH_ADDR; - icmp6.icmp6_data32[0] = 0; - break; - } - case ICMP_PORT_UNREACHABLE: - icmp6.icmp6_type = ICMP6_DST_UNREACH; - icmp6.icmp6_code = ICMP6_DST_UNREACH_NOPORT; - icmp6.icmp6_data32[0] = 0; - break; - case ICMP_NET_UNREACHABLE: - case ICMP_HOST_UNREACHABLE: - case ICMP_DEST_NET_UNKNOWN: - case ICMP_DEST_HOST_UNKNOWN: - case ICMP_SRC_HOST_ISOLATED: - case ICMP_DEST_NET_UNREACH_TOS: - case ICMP_DEST_HOST_UNREACH_TOS: - icmp6.icmp6_type = ICMP6_DST_UNREACH; - icmp6.icmp6_code = ICMP6_DST_UNREACH_NOROUTE; - icmp6.icmp6_data32[0] = 0; - break; - case ICMP_DEST_NET_UNREACH_ADMIN: - case ICMP_DEST_HOST_UNREACH_ADMIN: - icmp6.icmp6_type = ICMP6_DST_UNREACH; - icmp6.icmp6_code = ICMP6_DST_UNREACH_ADMIN; - icmp6.icmp6_data32[0] = 0; - break; - - case ICMP_SOURCE_ROUTE_FAILED: - icmp6.icmp6_type = ICMP6_DST_UNREACH; - icmp6.icmp6_code = - ICMP6_DST_UNREACH_BEYONDSCOPE; - icmp6.icmp6_data32[0] = 0; - break; - default: - atomic_add_32(&atp->tun_InErrors, 1); - freemsg(mp); - return; - } - break; - case ICMP_TIME_EXCEEDED: - icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; - icmp6.icmp6_code = code; - icmp6.icmp6_data32[0] = 0; - break; - case ICMP_PARAM_PROBLEM: - icmp6.icmp6_type = ICMP6_PARAM_PROB; - if (icmph->icmph_pp_ptr < (uchar_t *)ip6h - mp->b_rptr) { - tun0dbg(("icmp_ricmp_err_v6_v4: ICMP_PARAM_PROBLEM " \ - "too short\n")); - atomic_add_32(&atp->tun_InErrors, 1); - freemsg(mp); - return; - } - icmp6.icmp6_pptr = htonl( - icmph->icmph_pp_ptr - ((uchar_t *)ip6h - mp->b_rptr) - + sizeof (ip6_t) + sizeof (icmp6_t)); - break; - - default: - atomic_add_32(&atp->tun_InErrors, 1); - freemsg(mp); - return; - } - if (!tun_icmp_message_v6(q, ip6h, &icmp6, hoplim, mp)) { - atomic_add_32(&atp->tun_InDiscard, 1); - atomic_add_32(&atp->tun_allocbfail, 1); - } -} - -/* - * Rewhack the packet for the upper IP. - */ -static boolean_t -tun_icmp_too_big_v4(queue_t *q, ipha_t *ipha, uint16_t mtu, mblk_t *mp) -{ - icmph_t icmp; - - tun2dbg(("tun_icmp_too_big_v4: mtu %u src %08x dst %08x len %d\n", - (uint_t)mtu, ipha->ipha_src, ipha->ipha_dst, - ipha->ipha_length)); - - icmp.icmph_type = ICMP_DEST_UNREACHABLE; - icmp.icmph_code = ICMP_FRAGMENTATION_NEEDED; - ASSERT(mtu >= IP_MIN_MTU); - icmp.icmph_du_zero = 0; - icmp.icmph_du_mtu = htons(mtu); - return (tun_icmp_message_v4(q, ipha, &icmp, mp)); -} - -/* - * Send an ICMP6_PACKET_TOO_BIG message - */ -static boolean_t -tun_icmp_too_big_v6(queue_t *q, ip6_t *ip6ha, uint32_t mtu, uint8_t hoplim, - mblk_t *mp) -{ - icmp6_t icmp6; - - icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; - icmp6.icmp6_code = 0; - ASSERT(mtu >= IPV6_MIN_MTU); - icmp6.icmp6_mtu = htonl(mtu); - return (tun_icmp_message_v6(q, ip6ha, &icmp6, hoplim, mp)); -} - -/* - * Send an icmp message up an IPv4 stream. Take the data in mp, - * and prepend a new set of IPv4 + ICMP set of headers. Use the ipha and - * icmp pointers to help construct the aforementioned new headers. - */ -static boolean_t -tun_icmp_message_v4(queue_t *q, ipha_t *ipha, icmph_t *icmp, mblk_t *mp) -{ - ssize_t plen, nsize; - mblk_t *send_mp; - tun_t *atp = (tun_t *)q->q_ptr; - ipha_t *nipha; - icmph_t *nicmp; - - plen = mp->b_wptr - mp->b_rptr; - nsize = sizeof (ipha_t) + sizeof (icmph_t) + plen; - - if ((send_mp = allocb(nsize, BPRI_HI)) == NULL) { - atomic_add_32(&atp->tun_InDiscard, 1); - atomic_add_32(&atp->tun_allocbfail, 1); - freemsg(mp); - return (B_FALSE); - } - send_mp->b_wptr = send_mp->b_rptr + nsize; - - nipha = (ipha_t *)send_mp->b_rptr; - nicmp = (icmph_t *)(nipha + 1); - nipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION; - nipha->ipha_type_of_service = 0; - nipha->ipha_ident = 0; - nipha->ipha_fragment_offset_and_flags = htons(IPH_DF); - nipha->ipha_ttl = ipha->ipha_ttl; - nipha->ipha_protocol = IPPROTO_ICMP; - nipha->ipha_src = ipha->ipha_dst; - nipha->ipha_dst = ipha->ipha_src; - nipha->ipha_hdr_checksum = 0; - bcopy(ipha, &nicmp[1], plen); - if (mp->b_cont != NULL) { - size_t remainder = msgdsize(mp->b_cont); - - send_mp->b_cont = mp->b_cont; - plen += remainder; - nsize += remainder; - } - nipha->ipha_length = htons(nsize); - nipha->ipha_hdr_checksum = ip_csum_hdr(nipha); - freeb(mp); - ASSERT(send_mp->b_rptr == send_mp->b_datap->db_base); - *nicmp = *icmp; - nicmp->icmph_checksum = 0; - nicmp->icmph_checksum = IP_CSUM(send_mp, sizeof (ipha_t), 0); - - /* let ip know we are an icmp message */ - atomic_add_64(&atp->tun_HCInOctets, - (int64_t)(plen + sizeof (icmph_t))); - putnext(q, send_mp); - return (B_TRUE); -} - -/* - * Send an icmp message up an IPv6 stream. - */ -static boolean_t -tun_icmp_message_v6(queue_t *q, ip6_t *ip6h, icmp6_t *icmp6, uint8_t hoplim, - mblk_t *mp) -{ - tun_t *atp = (tun_t *)q->q_ptr; - mblk_t *send_mp; - ssize_t nsize; - icmp6_t *nicmp6; - ip6_t *nip6h; - uint16_t *up; - uint32_t sum; - ssize_t plen; - - plen = mp->b_wptr - mp->b_rptr; - nsize = sizeof (ip6_t) + sizeof (icmp6_t) + plen; - - if ((send_mp = allocb(nsize, BPRI_HI)) == NULL) { - atomic_add_32(&atp->tun_InDiscard, 1); - atomic_add_32(&atp->tun_allocbfail, 1); - freemsg(mp); - return (B_FALSE); - } - send_mp->b_wptr = send_mp->b_rptr + nsize; - - nip6h = (ip6_t *)send_mp->b_rptr; - nicmp6 = (icmp6_t *)&nip6h[1]; - *nicmp6 = *icmp6; - nip6h->ip6_vcf = ip6h->ip6_vcf; - nip6h->ip6_plen = ip6h->ip6_plen; - nip6h->ip6_hops = hoplim; - nip6h->ip6_nxt = IPPROTO_ICMPV6; - nip6h->ip6_src = ip6h->ip6_dst; - nip6h->ip6_dst = ip6h->ip6_src; - /* copy of ipv6 header into icmp6 message */ - bcopy(ip6h, &nicmp6[1], plen); - /* add in the rest of the packet if any */ - if (mp->b_cont) { - send_mp->b_cont = mp->b_cont; - mp->b_cont = NULL; - plen += msgdsize(send_mp->b_cont); - } - freeb(mp); - nip6h->ip6_plen = htons(plen + sizeof (icmp6_t)); - nicmp6->icmp6_cksum = 0; - up = (uint16_t *)&nip6h->ip6_src; - sum = htons(IPPROTO_ICMPV6 + - ntohs(nip6h->ip6_plen)) + - up[0] + up[1] + up[2] + up[3] + - up[4] + up[5] + up[6] + up[7] + - up[8] + up[9] + up[10] + up[11] + - up[12] + up[13] + up[14] + up[15]; - sum = (sum & 0xffff) + (sum >> 16); - nicmp6->icmp6_cksum = IP_CSUM(send_mp, IPV6_HDR_LEN, sum); - - /* let ip know we are an icmp message */ - atomic_add_64(&atp->tun_HCInOctets, - (int64_t)(plen + sizeof (icmp6_t))); - send_mp->b_datap->db_type = M_DATA; - putnext(q, send_mp); - return (B_TRUE); -} - -/* - * Read side service routine. - */ -void -tun_rsrv(queue_t *q) -{ - mblk_t *mp; - tun_t *atp = (tun_t *)q->q_ptr; - - while (mp = getq(q)) { - if (tun_rproc(q, mp) == ENOMEM) { - break; - } - /* - * If we called qwriter, then the only way we - * can tell if we ran out of memory is to check if - * any events have been scheduled - */ - if (atp->tun_events.ev_rtimoutid != 0 && - atp->tun_events.ev_rbufcid != 0) { - break; - } - } -} - -/* - * Read side put procedure - */ -void -tun_rput(queue_t *q, mblk_t *mp) -{ - /* note: q_first is 'protected' by perimeter */ - if (q->q_first != NULL) { - (void) putq(q, mp); - } else { - (void) tun_rproc(q, mp); - } -} - -static int -tun_rdata(queue_t *q, mblk_t *ipsec_mp, mblk_t *data_mp, tun_t *atp, - uint_t lvers) -{ - char buf[TUN_WHO_BUF]; - int error = 0; - - ASSERT(ipsec_mp == NULL || ipsec_mp->b_cont == data_mp); - -#define MESSAGE ((ipsec_mp == NULL) ? data_mp : ipsec_mp) - - /* - * If it's an IPSEC_IN w/o any security properties, start treating - * it like a cleartext packet. - */ - if (ipsec_mp != NULL && !ipsec_in_is_secure(ipsec_mp)) { - freeb(ipsec_mp); - ipsec_mp = NULL; - } - - if (atp->tun_state != DL_IDLE) { - atomic_add_32(&atp->tun_InErrors, 1); - atomic_add_64(&atp->tun_HCInUcastPkts, 1); - freemsg(MESSAGE); - return (error); /* pre-set to 0 */ - } - - if (!canputnext(q)) { - tun1dbg(("tun_rdata: flow controlled\n")); - ASSERT(data_mp->b_datap->db_type < QPCTL); - atomic_add_32(&atp->tun_nocanput, 1); - (void) putbq(q, MESSAGE); - error = ENOMEM; - goto bail; - } - - if (lvers != TUN_L_V4 && lvers != TUN_L_V6) { - tun0dbg(("tun_rproc: %s no lower version\n", - tun_who(q, buf))); - atomic_add_32(&atp->tun_InErrors, 1); - freemsg(MESSAGE); - error = EIO; - goto bail; - } - -#undef MESSAGE - - error = (lvers == TUN_L_V4) ? tun_rdata_v4(q, ipsec_mp, data_mp, atp) : - tun_rdata_v6(q, ipsec_mp, data_mp, atp); - -bail: - if (error) { - /* only record non flow control problems */ - if (error != EBUSY) { - tun0dbg(("tun_rproc: %s error encounterd %d\n", - tun_who(q, buf), error)); - } - } - - return (error); -} - -/* - * Process read side messages - */ -static int -tun_rproc(queue_t *q, mblk_t *mp) -{ - tun_t *atp = (tun_t *)q->q_ptr; - uint_t lvers; - int error = 0; - char buf[TUN_WHO_BUF]; - ipsec_in_t *ii; - mblk_t *ipsec_mp; - - /* no lock needed, won't ever change */ - lvers = atp->tun_flags & TUN_LOWER_MASK; - - switch (mp->b_datap->db_type) { - case M_DATA: - error = tun_rdata(q, NULL, mp, atp, lvers); - break; - - case M_PROTO: - case M_PCPROTO: - /* its a TPI message */ - error = tun_rput_tpi(q, mp); - break; - - case M_CTL: - /* its either an IPsec-protect packet... */ - ii = (ipsec_in_t *)mp->b_rptr; - if (ii->ipsec_in_type == IPSEC_IN) { - if (mp->b_cont->b_datap->db_type == M_DATA) { - error = tun_rdata(q, mp, mp->b_cont, atp, - lvers); - break; /* Out of switch. */ - } else { - ASSERT(mp->b_cont->b_datap->db_type == M_CTL); - /* - * ICMP message protected by IPsec. - * Split out IPSEC_IN and pass it up separately. - */ - ipsec_mp = mp; - mp = mp->b_cont; - } - } else { - ipsec_mp = NULL; - } - - /* ... or an ICMP error message from IP */ - atomic_add_64(&atp->tun_HCInUcastPkts, 1); - - if (!canputnext(q)) { - atomic_add_32(&atp->tun_nocanput, 1); - atomic_add_32(&atp->tun_InDiscard, 1); - if (ipsec_mp != NULL) - freeb(ipsec_mp); - freemsg(mp); - break; - } - - /* Pull everything up into mp. */ - mp->b_datap->db_type = M_DATA; - if (!pullupmsg(mp, -1)) { - atomic_add_32(&atp->tun_InErrors, 1); - if (ipsec_mp != NULL) - freeb(ipsec_mp); - freemsg(mp); - break; - } - mp->b_datap->db_type = M_CTL; - - if (lvers == TUN_L_V4) { - tun_rput_icmp_err_v4(q, mp, ipsec_mp); - } else if (lvers == TUN_L_V6) { - tun_rput_icmp_err_v6(q, mp, ipsec_mp); - } else { - if (ipsec_mp != NULL) - freeb(ipsec_mp); - freemsg(mp); - } - break; - - case M_FLUSH: - if (*mp->b_rptr & FLUSHR) { - flushq(q, FLUSHALL); - *mp->b_rptr &= ~FLUSHR; - } - /* we're pretending to be a stream head */ - if (*mp->b_rptr & FLUSHW) { - qreply(q, mp); - } else { - freemsg(mp); - } - break; - case IRE_DB_TYPE: { - ire_t *ire; - - ip1dbg(("tun_rproc: received IRE_DB_TYPE.")); - ire = (ire_t *)mp->b_rptr; - tun1dbg(("tun_rproc: received IRE_DB_TYPE, " - "ipsec_overhead is %d bytes", ire->ire_ipsec_overhead)); - mutex_enter(&atp->tun_lock); - /* - * Take advice from lower-layer if it is bigger than what we - * have cached now. We do manage per-tunnel policy, but - * there may be global overhead to account for. - */ - atp->tun_ipsec_overhead = max(ire->ire_ipsec_overhead, - atp->tun_ipsec_overhead); - if (atp->tun_flags & TUN_DST) { - (void) tun_update_link_mtu(q, ire->ire_max_frag, - B_FALSE); - } - mutex_exit(&atp->tun_lock); - freemsg(mp); - break; - } - default: - tun0dbg(("tun_rproc: %s got unknown mblk type %d\n", - tun_who(q, buf), mp->b_datap->db_type)); - freemsg(mp); - break; - } - return (error); -} - - -/* - * Handle Upper IPv4 - */ -static void -tun_wdata_v4(queue_t *q, mblk_t *mp) -{ - ipha_t *outer_ipha = NULL, *inner_ipha; - ip6_t *ip6 = NULL; - tun_t *atp = (tun_t *)q->q_ptr; - mblk_t *nmp; - size_t hdrlen; - int16_t encap_limit; - - ASSERT((mp->b_wptr - mp->b_rptr) >= sizeof (ipha_t)); - - inner_ipha = (ipha_t *)mp->b_rptr; - - /* - * increment mib counters and pass message off to ip - * note: we must always increment packet counters, but - * only increment byte counter if we actually send packet - */ - if (CLASSD(inner_ipha->ipha_dst)) { - atomic_add_64(&atp->tun_HCOutMulticastPkts, 1); - } else { - atomic_add_64(&atp->tun_HCOutUcastPkts, 1); - } - - if (atp->tun_state != DL_IDLE || !(atp->tun_flags & TUN_BOUND)) { - atomic_add_32(&atp->tun_OutErrors, 1); - freemsg(mp); - return; - } - - switch (atp->tun_flags & TUN_LOWER_MASK) { - case TUN_L_V4: - hdrlen = IPH_HDR_LENGTH(&atp->tun_ipha); - if (inner_ipha->ipha_dst == atp->tun_ipha.ipha_dst) { - /* - * Watch out! There is potential for an infinite loop. - * If IP sent a packet with destination address equal - * to the tunnel's destination address, we'll hit - * an infinite routing loop, where the packet will keep - * going through here. - * - * In the long term, perhaps IP should be somewhat - * intelligent about this. Until then, nip this in - * the bud. - */ - tun0dbg(("tun_wdata: inner dst == tunnel dst.\n")); - atp->tun_OutErrors++; - freemsg(mp); - return; - } - - /* room for IPv4 header? */ - if ((mp->b_rptr - mp->b_datap->db_base) < hdrlen) { - /* no */ - - nmp = allocb_tmpl(hdrlen + atp->tun_extra_offset, mp); - if (nmp == NULL) { - atomic_add_32(&atp->tun_OutDiscard, 1); - atomic_add_32(&atp->tun_allocbfail, 1); - freemsg(mp); - return; - } - nmp->b_cont = mp; - mp = nmp; - mp->b_wptr = mp->b_datap->db_lim; - mp->b_rptr = mp->b_wptr - hdrlen; - } else { - /* yes */ - mp->b_rptr -= hdrlen; - } - outer_ipha = (ipha_t *)mp->b_rptr; - - /* - * copy template header into packet IPv4 header - */ - *outer_ipha = atp->tun_ipha; - outer_ipha->ipha_length = htons(ntohs(inner_ipha->ipha_length) - + hdrlen); - /* - * copy the tos from inner header. We mask off - * ECN bits (bits 6 and 7) because there is currently no - * tunnel-tunnel communication to determine if - * both sides support ECN, so we opt for the safe - * choice: don't copy the ECN bits when doing encapsulation. - */ - outer_ipha->ipha_type_of_service = - (inner_ipha->ipha_type_of_service & ~0x03); - - break; - case TUN_L_V6: - /* room for IPv6 header? */ - hdrlen = sizeof (ip6_t); - encap_limit = atp->tun_encap_lim; - if (encap_limit >= 0) { - hdrlen += IPV6_TUN_ENCAP_OPT_LEN; - } - - if ((mp->b_rptr - mp->b_datap->db_base) < hdrlen) { - /* no */ - nmp = allocb_tmpl(hdrlen + atp->tun_extra_offset, mp); - if (nmp == NULL) { - atomic_add_32(&atp->tun_OutDiscard, 1); - atomic_add_32(&atp->tun_allocbfail, 1); - freemsg(mp); - return; - } - nmp->b_cont = mp; - mp = nmp; - mp->b_wptr = mp->b_datap->db_lim; - mp->b_rptr = mp->b_wptr - hdrlen; - } else { - /* yes */ - mp->b_rptr -= hdrlen; - } - ip6 = (ip6_t *)mp->b_rptr; - - /* - * copy template header into packet IPv6 header - */ - bcopy(&atp->tun_ip6h, mp->b_rptr, hdrlen); - ip6->ip6_plen = htons(ntohs(inner_ipha->ipha_length) + hdrlen - - sizeof (ip6_t)); - - break; - default: - /* LINTED */ - ASSERT(0 && "not supported"); - atomic_add_32(&atp->tun_OutErrors, 1); - freemsg(mp); - return; - } - - /* - * Request the destination ire regularly in case Path MTU has - * increased. - */ - if (TUN_IRE_TOO_OLD(atp)) - tun_send_ire_req(q); - - atomic_add_64(&atp->tun_HCOutOctets, (int64_t)msgdsize(mp)); - - mp = ipsec_tun_outbound(mp, atp, inner_ipha, NULL, outer_ipha, ip6, - hdrlen, atp->tun_netstack); - if (mp == NULL) - return; - - /* send the packet chain down the transport stream to IPv4/IPv6 */ - TUN_PUTMSG_CHAIN(q, mp, nmp); -} - -/* - * put M_DATA fastpath upper IPv4 - * Assumes canput is possible - */ -static int -tun_wputnext_v4(queue_t *q, mblk_t *mp) -{ - tun_t *atp = (tun_t *)q->q_ptr; - ipha_t *inner_ipha, *outer_ipha = NULL; - ip6_t *ip6 = NULL; - uint_t hdrlen; - mblk_t *nmp; - - mp->b_rptr += atp->tun_extra_offset; - if ((atp->tun_flags & TUN_L_V4) != 0) { - outer_ipha = (ipha_t *)mp->b_rptr; - hdrlen = IPH_HDR_LENGTH(outer_ipha); - - if (mp->b_wptr - mp->b_rptr < hdrlen + sizeof (ipha_t)) { - if (!pullupmsg(mp, hdrlen + sizeof (ipha_t))) { - atomic_add_32(&atp->tun_OutErrors, 1); - freemsg(mp); - return (0); /* silently fail */ - } - outer_ipha = (ipha_t *)mp->b_rptr; - } - - inner_ipha = (ipha_t *)((uint8_t *)outer_ipha + hdrlen); - outer_ipha->ipha_length = htons(ntohs(inner_ipha->ipha_length) + - sizeof (ipha_t)); - /* - * copy the tos from inner header. We mask off - * ECN bits (bits 6 and 7) because there is currently no - * tunnel-tunnel communication to determine if - * both sides support ECN, so we opt for the safe - * choice: don't copy the ECN bits when doing encapsulation. - */ - outer_ipha->ipha_type_of_service = - (inner_ipha->ipha_type_of_service & ~0x03); - - if (inner_ipha->ipha_dst == outer_ipha->ipha_dst) { - /* - * Infinite loop check. See the TUN_L_V4 case in - * tun_wdata_v4() for details. - */ - tun0dbg( - ("tun_wputnext_v4: inner dst == tunnel dst.\n")); - atp->tun_OutErrors++; - freemsg(mp); - return (EINVAL); - } - } else if ((atp->tun_flags & TUN_L_V6) != 0) { - ip6 = (ip6_t *)mp->b_rptr; - ASSERT(ip6->ip6_nxt == IPPROTO_ENCAP || - ip6->ip6_nxt == IPPROTO_DSTOPTS); - hdrlen = sizeof (ip6_t); - if (ip6->ip6_nxt == IPPROTO_DSTOPTS) { - /* XXX The code should be more general */ - hdrlen += IPV6_TUN_ENCAP_OPT_LEN; - } - - if (mp->b_wptr - mp->b_rptr < hdrlen + sizeof (ipha_t)) { - if (!pullupmsg(mp, hdrlen + sizeof (ipha_t))) { - atomic_add_32(&atp->tun_OutErrors, 1); - freemsg(mp); - return (0); /* silently fail */ - } - ip6 = (ip6_t *)mp->b_rptr; - } - - inner_ipha = (ipha_t *)((uint8_t *)ip6 + hdrlen); - ip6->ip6_plen = htons(ntohs(inner_ipha->ipha_length) + - hdrlen - sizeof (ip6_t)); - } else { - /* XXX can't get here yet - force assert */ - ASSERT((atp->tun_flags & TUN_L_V4) != 0); - freemsg(mp); - return (EINVAL); - } - - /* XXX Do I hit this, given I have this check earlier? */ - if (inner_ipha->ipha_dst == atp->tun_ipha.ipha_dst) { - /* - * Watch out! There is potential for an infinite loop. - * If IP sent a packet with destination address equal - * to the tunnel's destination address, we'll hit - * an infinite routing loop, where the packet will keep - * going through here. - * - * In the long term, perhaps IP should be somewhat - * intelligent about this. Until then, nip this in - * the bud. - */ - tun0dbg(("tun_wputnext_v4: inner dst == tunnel dst.\n")); - atp->tun_OutErrors++; - freemsg(mp); - return (EINVAL); - } - - /* - * increment mib counters and pass message off to ip - * note: we must always increment packet counters, but - * only increment byte counter if we actually send packet - */ - if (CLASSD(inner_ipha->ipha_dst)) { - atomic_add_64(&atp->tun_HCOutMulticastPkts, 1); - } else { - atomic_add_64(&atp->tun_HCOutUcastPkts, 1); - } - - if (!(atp->tun_flags & TUN_BOUND)) { - atomic_add_32(&atp->tun_OutErrors, 1); - freemsg(mp); - return (EINVAL); - } - - atomic_add_64(&atp->tun_HCOutOctets, (int64_t)msgsize(mp)); - - mp = ipsec_tun_outbound(mp, atp, inner_ipha, NULL, outer_ipha, ip6, - hdrlen, atp->tun_netstack); - if (mp == NULL) - return (0); - - /* - * Request the destination ire regularly in case Path MTU has - * increased. - */ - if (TUN_IRE_TOO_OLD(atp)) - tun_send_ire_req(q); - - /* send the packet chain down the transport stream to IPv4/IPv6 */ - TUN_PUTMSG_CHAIN(q, mp, nmp); - return (0); -} - -/* - * put M_DATA fastpath upper IPv6 - * Assumes canput is possible - */ -static int -tun_wputnext_v6(queue_t *q, mblk_t *mp) -{ - tun_t *atp = (tun_t *)q->q_ptr; - ip6_t *ip6h; - ip6_t *outer_ip6 = NULL; - uint_t hdrlen; - struct ip6_opt_tunnel *encap_opt; - int encap_limit = 0; - ipha_t *ipha = NULL; - mblk_t *nmp; - - /* - * fastpath reserves a bit more then we can use. - * get rid of hardware bits.. ip below us will fill it in - */ - mp->b_rptr += atp->tun_extra_offset; - if ((atp->tun_flags & TUN_L_V4) != 0) { - ipha = (ipha_t *)mp->b_rptr; - hdrlen = IPH_HDR_LENGTH(ipha); - - if (mp->b_wptr - mp->b_rptr < hdrlen + sizeof (ip6_t)) { - if (!pullupmsg(mp, hdrlen + sizeof (ip6_t))) { - atomic_add_32(&atp->tun_OutErrors, 1); - freemsg(mp); - return (0); /* silently fail */ - } - ipha = (ipha_t *)mp->b_rptr; - } - - ip6h = (ip6_t *)((uint8_t *)ipha + hdrlen); - /* - * if we are less than the minimum IPv6 mtu size, then - * allow IPv4 to fragment the packet - */ - if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN <= IPV6_MIN_MTU) { - ipha->ipha_fragment_offset_and_flags = 0; - } else { - ipha->ipha_fragment_offset_and_flags = htons(IPH_DF); - } - ipha->ipha_length = htons(ntohs(ip6h->ip6_plen) + - (uint16_t)sizeof (ip6_t) + (uint16_t)sizeof (ipha_t)); - - } else if ((atp->tun_flags & TUN_L_V6) != 0) { - outer_ip6 = (ip6_t *)mp->b_rptr; - ASSERT(outer_ip6->ip6_nxt == IPPROTO_IPV6 || - outer_ip6->ip6_nxt == IPPROTO_DSTOPTS); - hdrlen = sizeof (ip6_t); - if (outer_ip6->ip6_nxt == IPPROTO_DSTOPTS) - hdrlen += IPV6_TUN_ENCAP_OPT_LEN; - - if (mp->b_wptr - mp->b_rptr < - hdrlen + sizeof (ip6_t) + IPV6_TUN_ENCAP_OPT_LEN) { - if (!pullupmsg(mp, hdrlen + sizeof (ip6_t) + - IPV6_TUN_ENCAP_OPT_LEN)) { - atomic_add_32(&atp->tun_OutErrors, 1); - freemsg(mp); - return (0); /* silently fail */ - } - outer_ip6 = (ip6_t *)mp->b_rptr; - } - - ip6h = (ip6_t *)((uint8_t *)outer_ip6 + hdrlen); - - if (IN6_ARE_ADDR_EQUAL(&outer_ip6->ip6_dst, &ip6h->ip6_dst)) { - /* - * Watch out! There is potential for an infinite loop. - * If IP sent a packet with destination address equal - * to the tunnel's destination address, we'll hit - * an infinite routing loop, where the packet will keep - * going through here. - * - * In the long term, perhaps IP should be somewhat - * intelligent about this. Until then, nip this in - * the bud. - */ - tun0dbg( - ("tun_wputnext_v6: inner dst == tunnel dst.\n")); - atp->tun_OutErrors++; - freemsg(mp); - return (EINVAL); - } - - if ((ip6h->ip6_nxt == IPPROTO_DSTOPTS) && - (outer_ip6->ip6_nxt == IPPROTO_DSTOPTS)) { - - if (tun_limit_value_v6(q, mp, ip6h, &encap_limit)) { - if (encap_limit >= 0) { - encap_opt = (struct ip6_opt_tunnel *) - ((char *)outer_ip6 + - sizeof (ip6_t) + - sizeof (struct ip6_dest)); - encap_opt->ip6ot_encap_limit = - (uint8_t)encap_limit; - } - } else { - /* mp already freed by tun_limit_value_v6 */ - return (0); /* silently fail */ - } - } - - outer_ip6->ip6_plen = htons(ntohs(ip6h->ip6_plen) + hdrlen); - } else { - /* XXX can't get here yet - force assert */ - ASSERT((atp->tun_flags & TUN_L_V4) != 0); - freemsg(mp); - return (EINVAL); - } - - /* - * increment mib counters and pass message off to ip - * note: we must always increment packet counters, but - * only increment byte counter if we actually send packet - */ - if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { - atomic_add_64(&atp->tun_HCOutMulticastPkts, 1); - } else { - atomic_add_64(&atp->tun_HCOutUcastPkts, 1); - } - - if (!(atp->tun_flags & TUN_BOUND)) { - atomic_add_32(&atp->tun_OutErrors, 1); - freemsg(mp); - return (EINVAL); - } - - atomic_add_64(&atp->tun_HCOutOctets, (int64_t)msgsize(mp)); - - /* - * Request the destination ire regularly in case Path MTU has - * increased, but only for configured tunnels. - */ - if ((atp->tun_flags & TUN_DST) && TUN_IRE_TOO_OLD(atp)) - tun_send_ire_req(q); - - /* send the packet down the transport stream to IPv4/IPv6 */ - mp = ipsec_tun_outbound(mp, atp, NULL, ip6h, ipha, outer_ip6, hdrlen, - atp->tun_netstack); - if (mp == NULL) - return (0); - - /* send the packet chain down the transport stream to IPv4/IPv6 */ - TUN_PUTMSG_CHAIN(q, mp, nmp); - return (0); -} - -/* - * Determine whether we need to add a Tunnel Encapsulation Limit option and - * what it's value should be. There are two reasons to add a TEL option: - * 1. The tunnel data structure specifies it by a greater-than-zero - * tun_encap_lim member. - * 2. The data being encapsulated is an IPv6 packet that contains a TEL - * option. RFC 2473 says if the value is 1, return an ICMP parameter - * problem error report, else decrement the value and use it for a TEL - * option to be inserted in the encapsulating IPv6 packet. - * - * Return values: - * B_TRUE: Has a limit, use the value in *limitp. - * B_FALSE: Problem with limit, i.e. it was zero. - */ -static boolean_t -tun_limit_value_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, int *limitp) -{ - int limit = 0; - ip6_dest_t *destp; - int optlen; - struct ip6_opt *optp; - tun_t *atp = (tun_t *)q->q_ptr; - ip6_pkt_t ipp; - icmp6_t icmp6; - size_t offset; - - /* - * If tunnel has a non-negative limit, use it, but allow it to be - * overridden by tunnel encapsulation limit option in original packet - * (mp). - */ - limit = atp->tun_encap_lim; - - /* Check mp for tunnel encapsulation limit destination option. */ - ipp.ipp_fields = 0; /* must be initialized */ - (void) ip_find_hdr_v6(mp, ip6h, &ipp, NULL); - - if ((ipp.ipp_fields & IPPF_DSTOPTS) != 0) { - - destp = ipp.ipp_dstopts; - optlen = 8 * (destp->ip6d_len + 1) - sizeof (*destp); - optp = (struct ip6_opt *)(destp + 1); - - while (optlen > 0) { - - if (optp->ip6o_type == IP6OPT_TUNNEL_LIMIT) { - - /* - * XXX maybe we should send an ICMP parameter - * problem in this case instead. - */ - ASSERT(optp->ip6o_len == 1); - - limit = *(uint8_t *)(optp + 1); - - /* - * RFC 2473 says send an ICMP parameter problem - * if the limit is 0, send an ICMP parameter - * problem error and return B_FALSE. - */ - if (limit == 0) { - mp->b_rptr = (unsigned char *) ip6h; - icmp6.icmp6_type = ICMP6_PARAM_PROB; - icmp6.icmp6_code = 0; - offset = ((unsigned char *)(optp + 1)) - - mp->b_rptr; - icmp6.icmp6_pptr = htonl(offset); - (void) tun_icmp_message_v6(q, ip6h, - &icmp6, IPV6_DEFAULT_HOPS, mp); - return (B_FALSE); - } - - --limit; - break; - } - - optlen -= (optp->ip6o_len + sizeof (*optp)); - optp = (struct ip6_opt *) - (((char *)(optp + 1)) + optp->ip6o_len); - } - } - - *limitp = limit; - return (B_TRUE); -} - - -/* - * Handle Upper IPv6 write side data - * Note: all lower tunnels must have a source - * This routine assumes that a canput has already been done on the - * stream. - */ -static void -tun_wdata_v6(queue_t *q, mblk_t *mp) -{ - tun_t *atp = (tun_t *)q->q_ptr; - ipha_t *ipha = NULL; - ip6_t *ip6h, *outer_ip6 = NULL; - mblk_t *nmp; - ipaddr_t v4addr; - char buf1[INET6_ADDRSTRLEN]; - char buf2[INET6_ADDRSTRLEN]; - char buf[TUN_WHO_BUF]; - size_t hdrlen; - int encap_limit = 0; - struct ip6_opt_tunnel *encap_opt; - tun_stack_t *tuns = atp->tun_netstack->netstack_tun; - - ASSERT((mp->b_wptr - mp->b_rptr) >= sizeof (ip6_t)); - - ip6h = (ip6_t *)mp->b_rptr; - - /* - * increment mib counters and pass message off to ip - * note: we must always increment packet counters, but - * only increment byte counter if we actually send packet - */ - if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { - atomic_add_64(&atp->tun_HCOutMulticastPkts, 1); - } else { - atomic_add_64(&atp->tun_HCOutUcastPkts, 1); - } - - if (atp->tun_state != DL_IDLE || !(atp->tun_flags & TUN_BOUND)) { - atomic_add_32(&atp->tun_OutErrors, 1); - goto drop; - } - - /* check version */ - - ASSERT((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == - IPV6_DEFAULT_VERS_AND_FLOW); - - switch (atp->tun_flags & TUN_LOWER_MASK) { - case TUN_L_V4: - /* room for IPv4 header? */ - hdrlen = sizeof (ipha_t); - if ((mp->b_rptr - mp->b_datap->db_base) < sizeof (ipha_t)) { - /* no */ - - nmp = allocb_tmpl(sizeof (ipha_t) + - atp->tun_extra_offset, mp); - if (nmp == NULL) { - atomic_add_32(&atp->tun_OutDiscard, 1); - atomic_add_32(&atp->tun_allocbfail, 1); - goto drop; - } - nmp->b_cont = mp; - mp = nmp; - mp->b_wptr = mp->b_datap->db_lim; - mp->b_rptr = mp->b_wptr - sizeof (ipha_t); - } else { - /* yes */ - mp->b_rptr -= sizeof (ipha_t); - } - ipha = (ipha_t *)mp->b_rptr; - - /* - * copy template header into packet IPv4 header - * for configured tunnels everything should be - * in template. - * Automatic tunnels need the dest set from - * incoming ipv6 packet - */ - *ipha = atp->tun_ipha; - - /* XXX don't support tun_laddr of 0 */ - ASSERT(IN6_IS_ADDR_V4MAPPED(&atp->tun_laddr)); - - /* Is this an automatic tunnel ? */ - if ((atp->tun_flags & TUN_AUTOMATIC) != 0) { - - /* - * Process packets for automatic tunneling - */ - IN6_V4MAPPED_TO_IPADDR(&atp->tun_laddr, - ipha->ipha_src); - - /* - * destination address must be compatible address - * and cannot be multicast - */ - if (!IN6_IS_ADDR_V4COMPAT(&ip6h->ip6_dst)) { - tun0dbg( - ("tun_wdata_v6: %s dest is not IPv4: %s\n", - tun_who(q, buf), - inet_ntop(AF_INET6, &ip6h->ip6_dst, - buf1, sizeof (buf1)))); - atomic_add_32(&atp->tun_OutErrors, 1); - goto drop; - } - IN6_V4MAPPED_TO_IPADDR(&ip6h->ip6_dst, v4addr); - if (CLASSD(v4addr)) { - tun0dbg(("tun_wdata_v6: %s Multicast dst not" \ - " allowed : %s\n", tun_who(q, buf), - inet_ntop(AF_INET6, &ip6h->ip6_src, - buf2, sizeof (buf2)))); - atomic_add_32(&atp->tun_OutErrors, 1); - goto drop; - } - ipha->ipha_dst = v4addr; - - /* Is this a 6to4 tunnel ? */ - } else if ((atp->tun_flags & TUN_6TO4) != 0) { - struct in_addr in_v4addr; - - /* - * make sure IPv6 source is a 6to4 address. - */ - if (!IN6_IS_ADDR_6TO4(&ip6h->ip6_src)) { - tun0dbg(("tun_wdata_v6: %s tun: invalid " \ - "IPv6 src (%s)\n", tun_who(q, buf), - inet_ntop(AF_INET6, &ip6h->ip6_src, - buf1, sizeof (buf1)))); - atomic_add_32(&atp->tun_OutErrors, 1); - goto drop; - } - - /* - * As per RFC 3056, the IPv4 source MUST be set to the - * V4ADDR portion of the IPv6 source. - */ - IN6_6TO4_TO_V4ADDR(&ip6h->ip6_src, &in_v4addr); - ipha->ipha_src = (ipaddr_t)in_v4addr.s_addr; - - /* - * As per RFC 3056, the IPv4 destination MUST be set to - * either: - * - the V4ADDR portion of the IPv6 destination, if the - * destination is a 6to4 address. - * - the well known 6to4 Relay Router anycast address - * (192.88.99.1, defined in RFC 3068), if IPv6 - * destination is a native IPv6 address. - * - a unicast address of a 6to4 relay router set by - * the administrator. - * - * This implementation will drop packets with native - * IPv6 destinations if 6to4 Relay Router communication - * support is disabled. This support is checked - * by examining tuns_relay_rtr_addr_v4; INADDR_ANY - * denotes - * support is disabled; a valid, routable IPv4 addr - * denotes support is enabled. Support is disabled - * by default, because there is no standard trust - * mechanism for communicating with 6to4 Relay Routers. - */ - if (IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { - /* destination is a 6to4 router */ - IN6_6TO4_TO_V4ADDR(&ip6h->ip6_dst, - &in_v4addr); - ipha->ipha_dst = (ipaddr_t)in_v4addr.s_addr; - } else { - /* - * destination is a native IPv6 address - */ - if (tuns->tuns_relay_rtr_addr_v4 == - INADDR_ANY) { - /* - * 6to4 Relay Router communication - * support is disabled. - */ - tun1dbg(("tun_wdata_v6: " - "%s tuns_relay_rtr_addr_v4 = %s, " - "dropping packet with IPv6 dst " - "%s\n", tun_who(q, buf), - inet_ntop(AF_INET, - &tuns->tuns_relay_rtr_addr_v4, - buf1, sizeof (buf1)), - inet_ntop(AF_INET6, &ip6h->ip6_dst, - buf2, sizeof (buf2)))); - atomic_add_32(&atp->tun_OutDiscard, 1); - goto drop; - } - /* - * 6to4 Relay Router communication support - * is enabled. Set IPv4 destination to - * address of configured Relay Router - * (this addr may equal the well-known - * 6to4 Relay Router anycast address, - * defined in RFC 3068) - */ - ipha->ipha_dst = tuns->tuns_relay_rtr_addr_v4; - } - } - /* - * If IPv4 mtu is less than the minimum IPv6 mtu size, then - * allow IPv4 to fragment the packet. - * This works because if our IPv6 length is less than - * min IPv6 mtu, IPv4 might have to fragment anyway - * and we really can't handle an message too big icmp - * error. If the packet is greater them min IPv6 mtu, - * then a message too big icmp error will cause the - * IPv6 to shrink its packets - */ - if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN <= IPV6_MIN_MTU) { - ipha->ipha_fragment_offset_and_flags = 0; - } else { - ipha->ipha_fragment_offset_and_flags = htons(IPH_DF); - } - ipha->ipha_length = htons(ntohs(ip6h->ip6_plen) + - (uint16_t)sizeof (ip6_t) + (uint16_t)sizeof (ipha_t)); - tun3dbg(("tun_wdata_v6: %s sending IPv4 packet src %s dest " \ - "%s\n", tun_who(q, buf), - inet_ntop(AF_INET, &ipha->ipha_src, buf1, sizeof (buf1)), - inet_ntop(AF_INET, &ipha->ipha_dst, - buf2, sizeof (buf2)))); - - break; - case TUN_L_V6: - /* room for IPv6 header? */ - hdrlen = sizeof (ip6_t); - - /* - * Calculate tunnel encapsulation limit. < 0 means error, 0 - * means don't include a TEL option, and > 0 means use this - * value as the limit. Right here, just update the header - * length to take the extra TEL destination option into - * account, or send an ICMP parameter problem and return. - */ - if (tun_limit_value_v6(q, mp, ip6h, &encap_limit)) { - if (encap_limit >= 0) - hdrlen += IPV6_TUN_ENCAP_OPT_LEN; - } else - return; /* mp freed by tun_limit_value_v6 */ - - if ((mp->b_rptr - mp->b_datap->db_base) < hdrlen) { - /* no */ - nmp = allocb_tmpl(hdrlen + atp->tun_extra_offset, mp); - if (nmp == NULL) { - atomic_add_32(&atp->tun_OutDiscard, 1); - atomic_add_32(&atp->tun_allocbfail, 1); - freemsg(mp); - return; - } - nmp->b_cont = mp; - mp = nmp; - mp->b_wptr = mp->b_datap->db_lim; - mp->b_rptr = mp->b_wptr - hdrlen; - } else { - /* yes */ - mp->b_rptr -= hdrlen; - } - outer_ip6 = (ip6_t *)mp->b_rptr; - bcopy(&atp->tun_ip6h, mp->b_rptr, hdrlen); - if (encap_limit >= 0) { - encap_opt = (struct ip6_opt_tunnel *) - ((char *)outer_ip6 + sizeof (ip6_t) + - sizeof (struct ip6_dest)); - encap_opt->ip6ot_encap_limit = (uint8_t)encap_limit; - } - - /* Is this a 6to4 or automatic tunnel ? */ - if ((atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)) != 0) { - atomic_add_32(&atp->tun_OutErrors, 1); - goto drop; - } - - outer_ip6->ip6_plen = htons(ntohs(ip6h->ip6_plen) + - hdrlen); - - break; - default: - /* LINTED */ - ASSERT(0 && "not supported"); - atomic_add_32(&atp->tun_OutErrors, 1); - goto drop; - } - - atomic_add_64(&atp->tun_HCOutOctets, (int64_t)msgdsize(mp)); - - /* - * Request the destination ire regularly in case Path MTU has - * increased, but only for configured tunnels. - */ - if ((atp->tun_flags & TUN_DST) && TUN_IRE_TOO_OLD(atp)) - tun_send_ire_req(q); - - /* send the packet down the transport stream to IP */ - mp = ipsec_tun_outbound(mp, atp, NULL, ip6h, ipha, outer_ip6, hdrlen, - atp->tun_netstack); - if (mp == NULL) - return; - - /* send the packet chain down the transport stream to IPv4/IPv6 */ - TUN_PUTMSG_CHAIN(q, mp, nmp); - return; -drop: - freemsg(mp); -} - -/* - * T_BIND to lower stream. - */ -static int -tun_send_bind_req(queue_t *q) -{ - tun_t *atp = (tun_t *)q->q_ptr; - mblk_t *mp; - struct T_bind_req *tbr; - int err = 0; - size_t size; - uint_t lvers; - char *cp; - - if ((atp->tun_flags & TUN_SRC) == 0) { - return (EINVAL); - } - - lvers = atp->tun_flags & TUN_LOWER_MASK; - - if (lvers == TUN_L_V4) { - if (atp->tun_flags & TUN_SRC) { - ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(&atp->tun_laddr))); - if (atp->tun_flags & TUN_DST) { - ASSERT(!(IN6_IS_ADDR_UNSPECIFIED( - &atp->tun_faddr))); - size = sizeof (ipa_conn_x_t); - } else { - size = sizeof (sin_t); - } - } else { - return (EINVAL); - } - } else { /* lower is V6 */ - if (atp->tun_flags & TUN_SRC) { - ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(&atp->tun_laddr))); - if (atp->tun_flags & TUN_DST) { - ASSERT(!(IN6_IS_ADDR_UNSPECIFIED( - &atp->tun_faddr))); - size = sizeof (ipa6_conn_x_t); - } else { - size = sizeof (sin6_t); - } - } else { - return (EINVAL); - } - } - - /* allocate an mblk */ - if ((mp = tun_realloc_mblk(q, NULL, size + sizeof (struct T_bind_req) + - 1, NULL, B_FALSE)) == NULL) { - tun0dbg(("tun_send_bind_req: couldn't allocate mblk\n")); - return (ENOMEM); - } - if ((mp->b_cont = tun_realloc_mblk(q, NULL, sizeof (ire_t), NULL, - B_FALSE)) == NULL) { - tun0dbg(("tun_send_bind_req: couldn't allocate mblk\n")); - freeb(mp); - return (ENOMEM); - } - mblk_setcred(mp, atp->tun_cred, NOPID); - mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; - tbr = (struct T_bind_req *)mp->b_rptr; - tbr->CONIND_number = 0; - tbr->PRIM_type = T_BIND_REQ; - tbr->ADDR_length = size; - tbr->ADDR_offset = sizeof (struct T_bind_req); - cp = (char *)&tbr[1]; - if (lvers == TUN_L_V4) { - - /* - * Send a T_BIND_REQ down to IP to bind to IPPROTO_IPV6 - * or IPPROTO_ENCAP. - */ - - /* Source is always required */ - ASSERT((atp->tun_flags & TUN_SRC) && - !IN6_IS_ADDR_UNSPECIFIED(&atp->tun_laddr)); - - if (!(atp->tun_flags & TUN_DST) || - IN6_IS_ADDR_UNSPECIFIED(&atp->tun_faddr)) { - sin_t *sin; - - sin = (sin_t *)cp; - bzero(sin, sizeof (sin_t)); - IN6_V4MAPPED_TO_IPADDR(&atp->tun_laddr, - sin->sin_addr.s_addr); - sin->sin_port = 0; - } else { - /* - * We used to use ipa_conn_t here, but discovered that - * IP insisted that the tunnel destination address be - * reachable, i.e. have a route. This causes problems - * in a number of cases. ipa_conn_x_t was invented to - * allow verifying destination reachability to be - * controlled. We choose not to verify destination - * reachability. All we really want is to register to - * receive packets for the tunnel, and don't care at - * this point whether the tunnel destination is - * reachable. - */ - ipa_conn_x_t *ipa; - - if (!IN6_IS_ADDR_V4MAPPED(&atp->tun_faddr)) { - err = EINVAL; - goto error; - } - ipa = (ipa_conn_x_t *)cp; - bzero(ipa, sizeof (ipa_conn_x_t)); - IN6_V4MAPPED_TO_IPADDR(&atp->tun_laddr, - ipa->acx_conn.ac_laddr); - IN6_V4MAPPED_TO_IPADDR(&atp->tun_faddr, - ipa->acx_conn.ac_faddr); - ipa->acx_conn.ac_fport = 0; - ipa->acx_conn.ac_lport = 0; - } - if ((atp->tun_flags & TUN_UPPER_MASK) == TUN_U_V6) - *(cp + size) = (uchar_t)IPPROTO_IPV6; - else - *(cp + size) = (uchar_t)IPPROTO_ENCAP; - } else { - ASSERT(lvers == TUN_L_V6); - - if (!(atp->tun_flags & TUN_DST) || - IN6_IS_ADDR_UNSPECIFIED(&atp->tun_faddr)) { - sin6_t *sin6; - - sin6 = (sin6_t *)cp; - bzero(sin6, sizeof (sin6_t)); - bcopy(&atp->tun_laddr, &sin6->sin6_addr, - sizeof (in6_addr_t)); - } else { - ipa6_conn_x_t *ipa; - - ipa = (ipa6_conn_x_t *)cp; - bzero(ipa, sizeof (ipa6_conn_x_t)); - bcopy(&atp->tun_laddr, &ipa->ac6x_conn.ac6_laddr, - sizeof (in6_addr_t)); - bcopy(&atp->tun_faddr, &ipa->ac6x_conn.ac6_faddr, - sizeof (in6_addr_t)); - } - if ((atp->tun_flags & TUN_UPPER_MASK) == TUN_U_V6) - *(cp + size) = (uchar_t)IPPROTO_IPV6; - else - *(cp + size) = (uchar_t)IPPROTO_ENCAP; - } - mp->b_datap->db_type = M_PCPROTO; - - /* - * Since we're requesting ire information for the destination - * along with this T_BIND_REQ, stamp the tunnel's tun_ire_lastreq - * with the current time. - */ - atp->tun_ire_lastreq = lbolt; - - atp->tun_flags |= TUN_BIND_SENT; - putnext(WR(q), mp); - return (0); -error: - freemsg(mp); - return (err); -} - -/* - * Update kstats - */ -static int -tun_stat_kstat_update(kstat_t *ksp, int rw) -{ - tun_t *tunp; - tun_stats_t *tstats; - struct tunstat *tunsp; - - if (ksp == NULL || ksp->ks_data == NULL) - return (EIO); - - tstats = (tun_stats_t *)ksp->ks_private; - mutex_enter(&tstats->ts_lock); - tunsp = (struct tunstat *)ksp->ks_data; - - /* Initialize kstat, but only the first one */ - if (rw == KSTAT_WRITE) { - if (tstats->ts_refcnt > 1) { - mutex_exit(&tstats->ts_lock); - return (EACCES); - } - tunp = tstats->ts_atp; - - /* - * MIB II kstat variables - */ - tunp->tun_nocanput = tunsp->tuns_nocanput.value.ui32; - tunp->tun_xmtretry = tunsp->tuns_xmtretry.value.ui32; - tunp->tun_allocbfail = tunsp->tuns_allocbfail.value.ui32; - tunp->tun_InDiscard = tunsp->tuns_InDiscard.value.ui32; - tunp->tun_InErrors = tunsp->tuns_InErrors.value.ui32; - tunp->tun_OutDiscard = tunsp->tuns_OutDiscard.value.ui32; - tunp->tun_OutErrors = tunsp->tuns_OutErrors.value.ui32; - - tunp->tun_HCInOctets = tunsp->tuns_HCInOctets.value.ui64; - tunp->tun_HCInUcastPkts = tunsp->tuns_HCInUcastPkts.value.ui64; - tunp->tun_HCInMulticastPkts = - tunsp->tuns_HCInMulticastPkts.value.ui64; - tunp->tun_HCOutOctets = tunsp->tuns_HCOutOctets.value.ui64; - tunp->tun_HCOutUcastPkts = - tunsp->tuns_HCOutUcastPkts.value.ui64; - tunp->tun_HCOutMulticastPkts = - tunsp->tuns_HCOutMulticastPkts.value.ui64; - mutex_exit(&tstats->ts_lock); - return (0); - } - /* - * update kstats.. fist zero them all out, then - * walk through all the interfaces that share kstat and - * add in the new stats - */ - tunsp->tuns_nocanput.value.ui32 = 0; - tunsp->tuns_xmtretry.value.ui32 = 0; - tunsp->tuns_allocbfail.value.ui32 = 0; - tunsp->tuns_InDiscard.value.ui32 = 0; - tunsp->tuns_InErrors.value.ui32 = 0; - tunsp->tuns_OutDiscard.value.ui32 = 0; - tunsp->tuns_OutErrors.value.ui32 = 0; - tunsp->tuns_HCInOctets.value.ui64 = 0; - tunsp->tuns_HCInUcastPkts.value.ui64 = 0; - tunsp->tuns_HCInMulticastPkts.value.ui64 = 0; - tunsp->tuns_HCOutOctets.value.ui64 = 0; - tunsp->tuns_HCOutUcastPkts.value.ui64 = 0; - tunsp->tuns_HCOutMulticastPkts.value.ui64 = 0; - - for (tunp = tstats->ts_atp; tunp; tunp = tunp->tun_kstat_next) { - tunsp->tuns_nocanput.value.ui32 += tunp->tun_nocanput; - tunsp->tuns_xmtretry.value.ui32 += tunp->tun_xmtretry; - tunsp->tuns_allocbfail.value.ui32 += tunp->tun_allocbfail; - tunsp->tuns_InDiscard.value.ui32 += tunp->tun_InDiscard; - tunsp->tuns_InErrors.value.ui32 += tunp->tun_InErrors; - tunsp->tuns_OutDiscard.value.ui32 += tunp->tun_OutDiscard; - tunsp->tuns_OutErrors.value.ui32 += tunp->tun_OutErrors; - - tunsp->tuns_HCInOctets.value.ui64 += tunp->tun_HCInOctets; - tunsp->tuns_HCInUcastPkts.value.ui64 += tunp->tun_HCInUcastPkts; - tunsp->tuns_HCInMulticastPkts.value.ui64 += - tunp->tun_HCInMulticastPkts; - tunsp->tuns_HCOutOctets.value.ui64 += tunp->tun_HCOutOctets; - tunsp->tuns_HCOutUcastPkts.value.ui64 += - tunp->tun_HCOutUcastPkts; - tunsp->tuns_HCOutMulticastPkts.value.ui64 += - tunp->tun_HCOutMulticastPkts; - } - tunsp->tuns_xmtbytes.value.ui32 = - tunsp->tuns_HCOutOctets.value.ui64 & 0xffffffff; - tunsp->tuns_rcvbytes.value.ui32 = - tunsp->tuns_HCInOctets.value.ui64 & 0xffffffff; - tunsp->tuns_opackets.value.ui32 = - tunsp->tuns_HCOutUcastPkts.value.ui64 & 0xffffffff; - tunsp->tuns_ipackets.value.ui32 = - tunsp->tuns_HCInUcastPkts.value.ui64 & 0xffffffff; - tunsp->tuns_multixmt.value.ui32 = - tunsp->tuns_HCOutMulticastPkts.value.ui64 & 0xffffffff; - tunsp->tuns_multircv.value.ui32 = - tunsp->tuns_HCInMulticastPkts.value.ui64 & 0xffffffff; - mutex_exit(&tstats->ts_lock); - return (0); -} - -/* - * Initialize kstats - */ -static void -tun_statinit(tun_stats_t *tun_stat, char *modname, netstackid_t stackid) -{ - kstat_t *ksp; - struct tunstat *tunsp; - char buf[32]; - char *mod_buf; - - /* - * create kstat name based on lower ip and ppa - */ - if (tun_stat->ts_lower == TUN_L_V4) { - mod_buf = "ip"; - } else { - mod_buf = "ip6"; - } - (void) sprintf(buf, "%s.%s%d", mod_buf, modname, tun_stat->ts_ppa); - tun1dbg(("tunstatinit: Creating kstat %s\n", buf)); - if ((ksp = kstat_create_netstack(mod_buf, tun_stat->ts_ppa, buf, "net", - KSTAT_TYPE_NAMED, sizeof (struct tunstat) / sizeof (kstat_named_t), - KSTAT_FLAG_PERSISTENT, stackid)) == NULL) { - cmn_err(CE_CONT, "tun: kstat_create failed tun%d", - tun_stat->ts_ppa); - return; - } - tun_stat->ts_ksp = ksp; - tunsp = (struct tunstat *)(ksp->ks_data); - kstat_named_init(&tunsp->tuns_ipackets, "ipackets", KSTAT_DATA_UINT32); - kstat_named_init(&tunsp->tuns_opackets, "opackets", KSTAT_DATA_UINT32); - kstat_named_init(&tunsp->tuns_InErrors, "ierrors", KSTAT_DATA_UINT32); - kstat_named_init(&tunsp->tuns_OutErrors, "oerrors", KSTAT_DATA_UINT32); - kstat_named_init(&tunsp->tuns_xmtbytes, "obytes", KSTAT_DATA_UINT32); - kstat_named_init(&tunsp->tuns_rcvbytes, "rbytes", KSTAT_DATA_UINT32); - kstat_named_init(&tunsp->tuns_multixmt, "multixmt", KSTAT_DATA_UINT32); - kstat_named_init(&tunsp->tuns_multircv, "multircv", KSTAT_DATA_UINT32); - kstat_named_init(&tunsp->tuns_nocanput, "blocked", KSTAT_DATA_UINT32); - kstat_named_init(&tunsp->tuns_xmtretry, "xmtretry", KSTAT_DATA_UINT32); - kstat_named_init(&tunsp->tuns_InDiscard, "norcvbuf", KSTAT_DATA_UINT32); - kstat_named_init(&tunsp->tuns_OutDiscard, "noxmtbuf", - KSTAT_DATA_UINT32); - kstat_named_init(&tunsp->tuns_allocbfail, "allocbfail", - KSTAT_DATA_UINT32); - kstat_named_init(&tunsp->tuns_HCOutUcastPkts, "opackets64", - KSTAT_DATA_UINT64); - kstat_named_init(&tunsp->tuns_HCInUcastPkts, "ipackets64", - KSTAT_DATA_UINT64); - kstat_named_init(&tunsp->tuns_HCOutMulticastPkts, "multixmt64", - KSTAT_DATA_UINT64); - kstat_named_init(&tunsp->tuns_HCInMulticastPkts, "multircv64", - KSTAT_DATA_UINT64); - kstat_named_init(&tunsp->tuns_HCOutOctets, "obytes64", - KSTAT_DATA_UINT64); - kstat_named_init(&tunsp->tuns_HCInOctets, "rbytes64", - KSTAT_DATA_UINT64); - - ksp->ks_update = tun_stat_kstat_update; - ksp->ks_private = (void *) tun_stat; - kstat_install(ksp); -} - -/* - * Debug routine to print out tunnel name - */ -static char * -tun_who(queue_t *q, char *buf) -{ - tun_t *atp = (tun_t *)q->q_ptr; - char ppa_buf[20]; - - if (buf == NULL) - return ("tun_who: no buf"); - - if (atp->tun_state != DL_UNATTACHED) { - (void) sprintf(ppa_buf, "%d", atp->tun_ppa); - } else { - (void) sprintf(ppa_buf, "<not attached>"); - } - - (void) sprintf(buf, "%s.%s%s (%s)", - (atp->tun_flags & TUN_LOWER_MASK) == TUN_L_V4 ? "ip" : - (atp->tun_flags & TUN_LOWER_MASK) == TUN_L_V6 ? "ip6" : "<unknown>", - q->q_qinfo->qi_minfo->mi_idname, - ppa_buf, - (atp->tun_flags & TUN_UPPER_MASK) == TUN_U_V4 ? "inet" : - (atp->tun_flags & TUN_UPPER_MASK) == TUN_U_V6 ? "inet6" : - "<unknown af>"); - return (buf); -} - -/* - * Initialize the tunnel stack instance. - */ -/*ARGSUSED*/ -static void * -tun_stack_init(netstackid_t stackid, netstack_t *ns) -{ - tun_stack_t *tuns; - ipsec_stack_t *ipss = ns->netstack_ipsec; - - tuns = (tun_stack_t *)kmem_zalloc(sizeof (*tuns), KM_SLEEP); - tuns->tuns_netstack = ns; - - mutex_init(&tuns->tuns_global_lock, NULL, MUTEX_DEFAULT, NULL); - - rw_enter(&ipss->ipsec_itp_get_byaddr_rw_lock, RW_WRITER); - ipss->ipsec_itp_get_byaddr = itp_get_byaddr_fn; - rw_exit(&ipss->ipsec_itp_get_byaddr_rw_lock); - - return (tuns); -} - -/* - * Free the tunnel stack instance. - */ -/*ARGSUSED*/ -static void -tun_stack_fini(netstackid_t stackid, void *arg) -{ - tun_stack_t *tuns = (tun_stack_t *)arg; - ipsec_stack_t *ipss = tuns->tuns_netstack->netstack_ipsec; - int i; - - rw_enter(&ipss->ipsec_itp_get_byaddr_rw_lock, RW_WRITER); - ipss->ipsec_itp_get_byaddr = itp_get_byaddr_dummy; - rw_exit(&ipss->ipsec_itp_get_byaddr_rw_lock); - - for (i = 0; i < TUN_PPA_SZ; i++) { - ASSERT(tuns->tuns_ppa_list[i] == NULL); - } - for (i = 0; i < TUN_T_SZ; i++) { - ASSERT(tuns->tuns_byaddr_list[i] == NULL); - } - mutex_destroy(&tuns->tuns_global_lock); - kmem_free(tuns, sizeof (*tuns)); -} diff --git a/usr/src/uts/common/inet/ip_if.h b/usr/src/uts/common/inet/ip_if.h index 0e3b9f5b45..f4fc2012cc 100644 --- a/usr/src/uts/common/inet/ip_if.h +++ b/usr/src/uts/common/inet/ip_if.h @@ -38,10 +38,6 @@ extern "C" { #define IP_LOOPBACK_MTU (8*1024) -/* DLPI SAPs are in host byte order for all systems */ -#define IP_DL_SAP 0x0800 -#define IP6_DL_SAP 0x86dd - #ifdef _KERNEL /* * Interface flags actually represent the state/properties of 3 different @@ -187,7 +183,8 @@ extern time_t ill_frag_timeout(ill_t *, time_t); extern int ill_init(queue_t *, ill_t *); extern void ill_refresh_bcast(ill_t *); extern void ill_restart_dad(ill_t *, boolean_t); -extern boolean_t ill_setdefaulttoken(ill_t *); +extern void ill_setdefaulttoken(ill_t *); +extern void ill_setdesttoken(ill_t *); extern int ill_set_phys_addr(ill_t *, mblk_t *); extern int ill_replumb(ill_t *, mblk_t *); extern void ill_set_ndmp(ill_t *, mblk_t *, uint_t, uint_t); @@ -268,8 +265,8 @@ extern void ill_update_source_selection(ill_t *); extern ipif_t *ipif_select_source_v6(ill_t *, const in6_addr_t *, boolean_t, uint32_t, zoneid_t); extern boolean_t ipif_cant_setlinklocal(ipif_t *); -extern int ipif_setlinklocal(ipif_t *); -extern void ipif_set_tun_llink(ill_t *, struct iftun_req *); +extern void ipif_setlinklocal(ipif_t *); +extern void ipif_setdestlinklocal(ipif_t *); extern ipif_t *ipif_lookup_on_ifindex(uint_t, boolean_t, zoneid_t, queue_t *, mblk_t *, ipsq_func_t, int *, ip_stack_t *); extern ipif_t *ipif_get_next_ipif(ipif_t *curr, ill_t *ill); @@ -296,7 +293,6 @@ typedef int ip_extract_func_t(queue_t *, mblk_t *, const ip_ioctl_cmd_t *, cmd_info_t *, ipsq_func_t); extern ip_extract_func_t ip_extract_arpreq, ip_extract_lifreq; -extern ip_extract_func_t ip_extract_tunreq; extern int ip_addr_availability_check(ipif_t *); extern void ip_ll_subnet_defaults(ill_t *, mblk_t *); @@ -433,9 +429,6 @@ extern int ip_sioctl_tmysite(ipif_t *, sin_t *, queue_t *, mblk_t *, extern int ip_sioctl_tmyaddr(ipif_t *, sin_t *, queue_t *, mblk_t *, ip_ioctl_cmd_t *, void *); -extern int ip_sioctl_tunparam(ipif_t *, sin_t *, queue_t *, mblk_t *, - ip_ioctl_cmd_t *, void *); - extern int ip_sioctl_get_binding(ipif_t *, sin_t *, queue_t *, mblk_t *, ip_ioctl_cmd_t *, void *); extern int ip_sioctl_groupname(ipif_t *, sin_t *, queue_t *, diff --git a/usr/src/uts/common/inet/ip_stack.h b/usr/src/uts/common/inet/ip_stack.h index 6a5a9a9c1f..8b1ca0f32e 100644 --- a/usr/src/uts/common/inet/ip_stack.h +++ b/usr/src/uts/common/inet/ip_stack.h @@ -202,10 +202,12 @@ struct ip_stack { struct connf_s *ips_ipcl_proto_fanout_v6; struct connf_s *ips_ipcl_udp_fanout; struct connf_s *ips_ipcl_raw_fanout; + struct connf_s *ips_ipcl_iptun_fanout; uint_t ips_ipcl_conn_fanout_size; uint_t ips_ipcl_bind_fanout_size; uint_t ips_ipcl_udp_fanout_size; uint_t ips_ipcl_raw_fanout_size; + uint_t ips_ipcl_iptun_fanout_size; struct connf_s *ips_ipcl_globalhash_fanout; int ips_conn_g_index; diff --git a/usr/src/uts/common/inet/ipclassifier.h b/usr/src/uts/common/inet/ipclassifier.h index 67d1c742b8..36151fa67f 100644 --- a/usr/src/uts/common/inet/ipclassifier.h +++ b/usr/src/uts/common/inet/ipclassifier.h @@ -81,8 +81,8 @@ typedef void (*edesc_rpf)(void *, mblk_t *, void *); #define IPCL_UDPCONN 0x00000008 /* From udp_conn_cache */ #define IPCL_RAWIPCONN 0x00000010 /* From rawip_conn_cache */ #define IPCL_RTSCONN 0x00000020 /* From rts_conn_cache */ -#define IPCL_ISV6 0x00000040 /* AF_INET6 */ -#define IPCL_IPTUN 0x00000080 /* Has "tun" plumbed above it */ +/* Unused 0x00000040 */ +#define IPCL_IPTUN 0x00000080 /* iptun module above us */ #define IPCL_NONSTR 0x00001000 /* A non-STREAMS socket */ #define IPCL_IN_SQUEUE 0x10000000 /* Waiting squeue to finish */ @@ -136,9 +136,7 @@ typedef void (*edesc_rpf)(void *, mblk_t *, void *); ((connp)->conn_flags & IPCL_RTSCONN) #define IPCL_IS_IPTUN(connp) \ - (((connp)->conn_ulp == IPPROTO_ENCAP || \ - (connp)->conn_ulp == IPPROTO_IPV6) && \ - ((connp)->conn_flags & IPCL_IPTUN)) + ((connp)->conn_flags & IPCL_IPTUN) #define IPCL_IS_NONSTR(connp) ((connp)->conn_flags & IPCL_NONSTR) @@ -182,12 +180,14 @@ struct conn_s { struct udp_s *cp_udp; /* Pointer to the udp struct */ struct icmp_s *cp_icmp; /* Pointer to rawip struct */ struct rts_s *cp_rts; /* Pointer to rts struct */ + struct iptun_s *cp_iptun; /* Pointer to iptun_t */ void *cp_priv; } conn_proto_priv; #define conn_tcp conn_proto_priv.cp_tcp #define conn_udp conn_proto_priv.cp_udp #define conn_icmp conn_proto_priv.cp_icmp #define conn_rts conn_proto_priv.cp_rts +#define conn_iptun conn_proto_priv.cp_iptun #define conn_priv conn_proto_priv.cp_priv kcondvar_t conn_cv; @@ -391,7 +391,7 @@ struct connf_s { * is B_TRUE and conn_ref is being decremented. This is to \ * account for the mblk being currently processed. \ */ \ - if ((connp)->conn_ref <= 0 || \ + if ((connp)->conn_ref == 0 || \ ((connp)->conn_ref == 1 && (connp)->conn_on_sqp)) \ cmn_err(CE_PANIC, "CONN_DEC_REF: connp(%p) has ref " \ "= %d\n", (void *)(connp), (connp)->conn_ref); \ @@ -525,6 +525,24 @@ struct connf_s { (IN6_ARE_ADDR_EQUAL(&(connp)->conn_remv6, &(faddr)) && \ (connp)->conn_fport == (fport)))))) +#define IPCL_IPTUN_HASH(laddr, faddr) \ + ((ntohl(laddr) ^ ((ntohl(faddr) << 24) | (ntohl(faddr) >> 8))) % \ + ipcl_iptun_fanout_size) + +#define IPCL_IPTUN_HASH_V6(laddr, faddr) \ + IPCL_IPTUN_HASH((laddr)->s6_addr32[0] ^ (laddr)->s6_addr32[1] ^ \ + (faddr)->s6_addr32[2] ^ (faddr)->s6_addr32[3], \ + (faddr)->s6_addr32[0] ^ (faddr)->s6_addr32[1] ^ \ + (laddr)->s6_addr32[2] ^ (laddr)->s6_addr32[3]) + +#define IPCL_IPTUN_MATCH(connp, laddr, faddr) \ + (_IPCL_V4_MATCH((connp)->conn_srcv6, (laddr)) && \ + _IPCL_V4_MATCH((connp)->conn_remv6, (faddr))) + +#define IPCL_IPTUN_MATCH_V6(connp, laddr, faddr) \ + (IN6_ARE_ADDR_EQUAL(&(connp)->conn_srcv6, (laddr)) && \ + IN6_ARE_ADDR_EQUAL(&(connp)->conn_remv6, (faddr))) + #define IPCL_TCP_EAGER_INIT(connp, protocol, src, rem, ports) { \ (connp)->conn_flags |= (IPCL_TCP4|IPCL_EAGER); \ IN6_IPADDR_TO_V4MAPPED(src, &(connp)->conn_srcv6); \ @@ -536,7 +554,7 @@ struct connf_s { } #define IPCL_TCP_EAGER_INIT_V6(connp, protocol, src, rem, ports) { \ - (connp)->conn_flags |= (IPCL_TCP6|IPCL_EAGER|IPCL_ISV6); \ + (connp)->conn_flags |= (IPCL_TCP6|IPCL_EAGER); \ (connp)->conn_srcv6 = src; \ (connp)->conn_remv6 = rem; \ (connp)->conn_ports = ports; \ @@ -598,6 +616,8 @@ conn_t *ipcl_classify_v6(mblk_t *, uint8_t, uint_t, zoneid_t, ip_stack_t *); conn_t *ipcl_classify(mblk_t *, zoneid_t, ip_stack_t *); conn_t *ipcl_classify_raw(mblk_t *, uint8_t, zoneid_t, uint32_t, ipha_t *, ip_stack_t *); +conn_t *ipcl_iptun_classify_v4(ipaddr_t *, ipaddr_t *, ip_stack_t *); +conn_t *ipcl_iptun_classify_v6(in6_addr_t *, in6_addr_t *, ip_stack_t *); void ipcl_globalhash_insert(conn_t *); void ipcl_globalhash_remove(conn_t *); void ipcl_walk(pfv_t, void *, ip_stack_t *); diff --git a/usr/src/uts/common/inet/ipsec_impl.h b/usr/src/uts/common/inet/ipsec_impl.h index e7e8e735c7..98b34314e1 100644 --- a/usr/src/uts/common/inet/ipsec_impl.h +++ b/usr/src/uts/common/inet/ipsec_impl.h @@ -19,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _INET_IPSEC_IMPL_H #define _INET_IPSEC_IMPL_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <inet/ip.h> #include <inet/ipdrop.h> @@ -743,9 +741,6 @@ struct ipsec_stack { /* Packet dropper for generic SPD drops. */ ipdropper_t ipsec_spd_dropper; - krwlock_t ipsec_itp_get_byaddr_rw_lock; - ipsec_tun_pol_t *(*ipsec_itp_get_byaddr) - (uint32_t *, uint32_t *, int, netstack_t *); /* ipdrop.c */ kstat_t *ipsec_ip_drop_kstat; @@ -888,10 +883,12 @@ extern boolean_t ipsec_policy_delete(ipsec_policy_head_t *, ipsec_selkey_t *, int, netstack_t *); extern int ipsec_policy_delete_index(ipsec_policy_head_t *, uint64_t, netstack_t *); +extern boolean_t ipsec_polhead_insert(ipsec_policy_head_t *, ipsec_act_t *, + uint_t, int, int, netstack_t *); extern void ipsec_polhead_flush(ipsec_policy_head_t *, netstack_t *); extern int ipsec_copy_polhead(ipsec_policy_head_t *, ipsec_policy_head_t *, netstack_t *); -extern void ipsec_actvec_from_req(ipsec_req_t *, ipsec_act_t **, uint_t *, +extern void ipsec_actvec_from_req(const ipsec_req_t *, ipsec_act_t **, uint_t *, netstack_t *); extern void ipsec_actvec_free(ipsec_act_t *, uint_t); extern int ipsec_req_from_head(ipsec_policy_head_t *, ipsec_req_t *, int); @@ -927,17 +924,14 @@ extern void ipsec_insert_always(avl_tree_t *tree, void *new_node); extern int32_t ipsec_act_ovhd(const ipsec_act_t *act); - -extern boolean_t iph_ipvN(ipsec_policy_head_t *, boolean_t); - /* * Tunnel-support SPD functions and variables. */ -struct tun_s; /* Defined in inet/tun.h. */ +struct iptun_s; /* Defined in inet/iptun/iptun_impl.h. */ extern boolean_t ipsec_tun_inbound(mblk_t *, mblk_t **, ipsec_tun_pol_t *, ipha_t *, ip6_t *, ipha_t *, ip6_t *, int, netstack_t *); -extern mblk_t *ipsec_tun_outbound(mblk_t *, struct tun_s *, ipha_t *, - ip6_t *, ipha_t *, ip6_t *, int, netstack_t *); +extern mblk_t *ipsec_tun_outbound(mblk_t *, struct iptun_s *, ipha_t *, + ip6_t *, ipha_t *, ip6_t *, int); extern void itp_free(ipsec_tun_pol_t *, netstack_t *); extern ipsec_tun_pol_t *create_tunnel_policy(char *, int *, uint64_t *, netstack_t *); @@ -946,8 +940,8 @@ extern void itp_unlink(ipsec_tun_pol_t *, netstack_t *); extern void itp_walk(void (*)(ipsec_tun_pol_t *, void *, netstack_t *), void *, netstack_t *); -extern ipsec_tun_pol_t *itp_get_byaddr_dummy(uint32_t *, uint32_t *, - int, netstack_t *); +extern ipsec_tun_pol_t *itp_get_byaddr(uint32_t *, uint32_t *, int, + ip_stack_t *); /* * IPsec AH/ESP functions called from IP or the common SADB code in AH. diff --git a/usr/src/uts/common/inet/ipsec_info.h b/usr/src/uts/common/inet/ipsec_info.h index 0348e10b91..b466f0ec29 100644 --- a/usr/src/uts/common/inet/ipsec_info.h +++ b/usr/src/uts/common/inet/ipsec_info.h @@ -276,9 +276,6 @@ typedef struct ipsec_out_s { * NOTE: Keysock_hello is simply an ipsec_info_t */ -/* TUN_HELLO is just like KEYSOCK_HELLO, except for tunnels to talk with IP. */ -#define TUN_HELLO KEYSOCK_HELLO - /* * KEYSOCK_HELLO_ACK is sent by a consumer to acknowledge a KEYSOCK_HELLO. * It contains the PF_KEYv2 sa_type, so keysock can redirect PF_KEY messages diff --git a/usr/src/uts/common/inet/iptun.h b/usr/src/uts/common/inet/iptun.h new file mode 100644 index 0000000000..1cd74d87cc --- /dev/null +++ b/usr/src/uts/common/inet/iptun.h @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _INET_IPTUN_H +#define _INET_IPTUN_H + +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/dld_ioc.h> +#include <netinet/in.h> +#include <netinet/ip6.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * from http://www.iana.org/assignments/ip-parameters + */ +#define IPTUN_DEFAULT_HOPLIMIT 64 +/* from RFC 2473 */ +#define IPTUN_DEFAULT_ENCAPLIMIT 4 + +#define IPTUN_CREATE IPTUNIOC(1) +#define IPTUN_DELETE IPTUNIOC(2) +#define IPTUN_MODIFY IPTUNIOC(3) +#define IPTUN_INFO IPTUNIOC(4) +#define IPTUN_SET_6TO4RELAY IPTUNIOC(9) +#define IPTUN_GET_6TO4RELAY IPTUNIOC(10) + +typedef enum { + IPTUN_TYPE_UNKNOWN = 0, + IPTUN_TYPE_IPV4, + IPTUN_TYPE_IPV6, + IPTUN_TYPE_6TO4 +} iptun_type_t; + +/* + * To maintain proper alignment of fields between 32bit user-land and 64bit + * kernel, all fields in iptun_kparams_t after itk_fields must be in + * descending order of size. Due to strict structure size checks done in the + * iptun ioctl processing, the structure size must be the same on 32 and 64 + * bit. amd64 will pad the end of the structure to make the end 64bit + * aligned, so we must add explicit padding to make sure that it's similarly + * aligned when compiled in 32 bit mode. + */ +typedef struct iptun_kparams { + datalink_id_t iptun_kparam_linkid; + uint32_t iptun_kparam_flags; + struct sockaddr_storage iptun_kparam_laddr; /* local address */ + struct sockaddr_storage iptun_kparam_raddr; /* remote address */ + ipsec_req_t iptun_kparam_secinfo; + iptun_type_t iptun_kparam_type; + uint32_t _iptun_kparam_padding; +} iptun_kparams_t; + +/* itk_flags */ +#define IPTUN_KPARAM_TYPE 0x00000001 /* itk_type is set */ +#define IPTUN_KPARAM_LADDR 0x00000002 /* itk_laddr is set */ +#define IPTUN_KPARAM_RADDR 0x00000004 /* itk_raddr is set */ +#define IPTUN_KPARAM_SECINFO 0x00000008 /* itk_secinfo is set */ +#define IPTUN_KPARAM_IMPLICIT 0x00000010 /* implicitly created IP tunnel */ +#define IPTUN_KPARAM_IPSECPOL 0x00000020 /* ipsecconf(1M) policy present */ + +#ifdef __cplusplus +} +#endif + +#endif /* _INET_IPTUN_H */ diff --git a/usr/src/uts/common/inet/iptun/iptun.c b/usr/src/uts/common/inet/iptun/iptun.c new file mode 100644 index 0000000000..47633cc6d8 --- /dev/null +++ b/usr/src/uts/common/inet/iptun/iptun.c @@ -0,0 +1,3026 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * iptun - IP Tunneling Driver + * + * This module is a GLDv3 driver that implements virtual datalinks over IP + * (a.k.a, IP tunneling). The datalinks are managed through a dld ioctl + * interface (see iptun_ctl.c), and registered with GLDv3 using + * mac_register(). It implements the logic for various forms of IP (IPv4 or + * IPv6) encapsulation within IP (IPv4 or IPv6) by interacting with the ip + * module below it. Each virtual IP tunnel datalink has a conn_t associated + * with it representing the "outer" IP connection. + * + * The module implements the following locking semantics: + * + * Lookups and deletions in iptun_hash are synchronized using iptun_hash_lock. + * See comments above iptun_hash_lock for details. + * + * No locks are ever held while calling up to GLDv3. The general architecture + * of GLDv3 requires this, as the mac perimeter (essentially a lock) for a + * given link will be held while making downcalls (iptun_m_*() callbacks). + * Because we need to hold locks while handling downcalls, holding these locks + * while issuing upcalls results in deadlock scenarios. See the block comment + * above iptun_task_cb() for details on how we safely issue upcalls without + * holding any locks. + * + * The contents of each iptun_t is protected by an iptun_mutex which is held + * in iptun_enter() (called by iptun_enter_by_linkid()), and exited in + * iptun_exit(). + * + * See comments in iptun_delete() and iptun_free() for details on how the + * iptun_t is deleted safely. + */ + +#include <sys/types.h> +#include <sys/kmem.h> +#include <sys/errno.h> +#include <sys/modhash.h> +#include <sys/list.h> +#include <sys/strsun.h> +#include <sys/file.h> +#include <sys/systm.h> +#include <sys/tihdr.h> +#include <sys/param.h> +#include <sys/mac_provider.h> +#include <sys/mac_ipv4.h> +#include <sys/mac_ipv6.h> +#include <sys/mac_6to4.h> +#include <sys/tsol/tnet.h> +#include <sys/sunldi.h> +#include <netinet/in.h> +#include <netinet/ip6.h> +#include <inet/ip.h> +#include <inet/ip_ire.h> +#include <inet/ipsec_impl.h> +#include <inet/iptun.h> +#include "iptun_impl.h" + +/* Do the tunnel type and address family match? */ +#define IPTUN_ADDR_MATCH(iptun_type, family) \ + ((iptun_type == IPTUN_TYPE_IPV4 && family == AF_INET) || \ + (iptun_type == IPTUN_TYPE_IPV6 && family == AF_INET6) || \ + (iptun_type == IPTUN_TYPE_6TO4 && family == AF_INET)) + +#define IPTUN_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key)) + +#define IPTUNQ_DEV "/dev/iptunq" + +#define IPTUN_MIN_IPV4_MTU 576 /* ip.h still uses 68 (!) */ +#define IPTUN_MIN_IPV6_MTU IPV6_MIN_MTU +#define IPTUN_MAX_IPV4_MTU (IP_MAXPACKET - sizeof (ipha_t)) +#define IPTUN_MAX_IPV6_MTU (IP_MAXPACKET - sizeof (ip6_t) - \ + sizeof (iptun_encaplim_t)) + +#define IPTUN_MIN_HOPLIMIT 1 +#define IPTUN_MAX_HOPLIMIT UINT8_MAX + +#define IPTUN_MIN_ENCAPLIMIT 0 +#define IPTUN_MAX_ENCAPLIMIT UINT8_MAX + +#define IPTUN_IPSEC_REQ_MASK (IPSEC_PREF_REQUIRED | IPSEC_PREF_NEVER) + +static iptun_encaplim_t iptun_encaplim_init = { + { IPPROTO_NONE, 0 }, + IP6OPT_TUNNEL_LIMIT, + 1, + IPTUN_DEFAULT_ENCAPLIMIT, /* filled in with actual value later */ + IP6OPT_PADN, + 1, + 0 +}; + +/* Table containing per-iptun-type information. */ +static iptun_typeinfo_t iptun_type_table[] = { + { IPTUN_TYPE_IPV4, MAC_PLUGIN_IDENT_IPV4, IPV4_VERSION, ip_output, + IPTUN_MIN_IPV4_MTU, IPTUN_MAX_IPV4_MTU, B_TRUE }, + { IPTUN_TYPE_IPV6, MAC_PLUGIN_IDENT_IPV6, IPV6_VERSION, ip_output_v6, + IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV6_MTU, B_TRUE }, + { IPTUN_TYPE_6TO4, MAC_PLUGIN_IDENT_6TO4, IPV4_VERSION, ip_output, + IPTUN_MIN_IPV4_MTU, IPTUN_MAX_IPV4_MTU, B_FALSE }, + { IPTUN_TYPE_UNKNOWN, NULL, 0, NULL, 0, 0, B_FALSE } +}; + +/* + * iptun_hash is an iptun_t lookup table by link ID protected by + * iptun_hash_lock. While the hash table's integrity is maintained via + * internal locking in the mod_hash_*() functions, we need additional locking + * so that an iptun_t cannot be deleted after a hash lookup has returned an + * iptun_t and before iptun_lock has been entered. As such, we use + * iptun_hash_lock when doing lookups and removals from iptun_hash. + */ +mod_hash_t *iptun_hash; +static kmutex_t iptun_hash_lock; + +static uint_t iptun_tunnelcount; /* total for all stacks */ +kmem_cache_t *iptun_cache; +ddi_taskq_t *iptun_taskq; + +typedef enum { + IPTUN_TASK_PMTU_UPDATE, /* obtain new destination path-MTU */ + IPTUN_TASK_MTU_UPDATE, /* tell mac about new tunnel link MTU */ + IPTUN_TASK_LADDR_UPDATE, /* tell mac about new local address */ + IPTUN_TASK_RADDR_UPDATE, /* tell mac about new remote address */ + IPTUN_TASK_LINK_UPDATE, /* tell mac about new link state */ + IPTUN_TASK_PDATA_UPDATE /* tell mac about updated plugin data */ +} iptun_task_t; + +typedef struct iptun_task_data_s { + iptun_task_t itd_task; + datalink_id_t itd_linkid; +} iptun_task_data_t; + +static void iptun_task_dispatch(iptun_t *, iptun_task_t); +static int iptun_enter(iptun_t *); +static void iptun_exit(iptun_t *); +static void iptun_headergen(iptun_t *, boolean_t); +static void iptun_drop_pkt(mblk_t *, uint64_t *); +static void iptun_input(void *, mblk_t *, void *); +static void iptun_output(iptun_t *, mblk_t *); +static uint32_t iptun_get_maxmtu(iptun_t *, uint32_t); +static uint32_t iptun_update_mtu(iptun_t *, uint32_t); +static uint32_t iptun_get_dst_pmtu(iptun_t *); +static int iptun_setladdr(iptun_t *, const struct sockaddr_storage *); + +static mac_callbacks_t iptun_m_callbacks; + +static int +iptun_m_getstat(void *arg, uint_t stat, uint64_t *val) +{ + iptun_t *iptun = arg; + int err = 0; + + switch (stat) { + case MAC_STAT_IERRORS: + *val = iptun->iptun_ierrors; + break; + case MAC_STAT_OERRORS: + *val = iptun->iptun_oerrors; + break; + case MAC_STAT_RBYTES: + *val = iptun->iptun_rbytes; + break; + case MAC_STAT_IPACKETS: + *val = iptun->iptun_ipackets; + break; + case MAC_STAT_OBYTES: + *val = iptun->iptun_obytes; + break; + case MAC_STAT_OPACKETS: + *val = iptun->iptun_opackets; + break; + case MAC_STAT_NORCVBUF: + *val = iptun->iptun_norcvbuf; + break; + case MAC_STAT_NOXMTBUF: + *val = iptun->iptun_noxmtbuf; + break; + default: + err = ENOTSUP; + } + + return (err); +} + +static int +iptun_m_start(void *arg) +{ + iptun_t *iptun = arg; + int err; + + if ((err = iptun_enter(iptun)) == 0) { + iptun->iptun_flags |= IPTUN_MAC_STARTED; + iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); + iptun_exit(iptun); + } + return (err); +} + +static void +iptun_m_stop(void *arg) +{ + iptun_t *iptun = arg; + + if (iptun_enter(iptun) == 0) { + iptun->iptun_flags &= ~IPTUN_MAC_STARTED; + iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); + iptun_exit(iptun); + } +} + +/* + * iptun_m_setpromisc() does nothing and always succeeds. This is because a + * tunnel data-link only ever receives packets that are destined exclusively + * for the local address of the tunnel. + */ +/* ARGSUSED */ +static int +iptun_m_setpromisc(void *arg, boolean_t on) +{ + return (0); +} + +/* ARGSUSED */ +static int +iptun_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) +{ + return (ENOTSUP); +} + +/* + * iptun_m_unicst() sets the local address. + */ +/* ARGSUSED */ +static int +iptun_m_unicst(void *arg, const uint8_t *addrp) +{ + iptun_t *iptun = arg; + int err; + struct sockaddr_storage ss; + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; + + if ((err = iptun_enter(iptun)) == 0) { + switch (iptun->iptun_typeinfo->iti_ipvers) { + case IPV4_VERSION: + sin = (struct sockaddr_in *)&ss; + sin->sin_family = AF_INET; + bcopy(addrp, &sin->sin_addr, sizeof (in_addr_t)); + break; + case IPV6_VERSION: + sin6 = (struct sockaddr_in6 *)&ss; + sin6->sin6_family = AF_INET6; + bcopy(addrp, &sin6->sin6_addr, sizeof (in6_addr_t)); + break; + default: + ASSERT(0); + } + err = iptun_setladdr(iptun, &ss); + iptun_exit(iptun); + } + return (err); +} + +static mblk_t * +iptun_m_tx(void *arg, mblk_t *mpchain) +{ + mblk_t *mp, *nmp; + iptun_t *iptun = arg; + + if (!IS_IPTUN_RUNNING(iptun)) { + iptun_drop_pkt(mpchain, &iptun->iptun_noxmtbuf); + return (NULL); + } + + /* + * Request the destination's path MTU information regularly in case + * path MTU has increased. + */ + if (IPTUN_PMTU_TOO_OLD(iptun)) + iptun_task_dispatch(iptun, IPTUN_TASK_PMTU_UPDATE); + + for (mp = mpchain; mp != NULL; mp = nmp) { + nmp = mp->b_next; + mp->b_next = NULL; + iptun_output(iptun, mp); + } + + return (NULL); +} + +/* ARGSUSED */ +static int +iptun_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, + uint_t pr_valsize, const void *pr_val) +{ + iptun_t *iptun = barg; + uint32_t value = *(uint32_t *)pr_val; + int err; + + /* + * We need to enter this iptun_t since we'll be modifying the outer + * header. + */ + if ((err = iptun_enter(iptun)) != 0) + return (err); + + switch (pr_num) { + case MAC_PROP_IPTUN_HOPLIMIT: + if (value < IPTUN_MIN_HOPLIMIT || value > IPTUN_MAX_HOPLIMIT) { + err = EINVAL; + break; + } + if (value != iptun->iptun_hoplimit) { + iptun->iptun_hoplimit = (uint8_t)value; + iptun_headergen(iptun, B_TRUE); + } + break; + case MAC_PROP_IPTUN_ENCAPLIMIT: + if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6 || + value > IPTUN_MAX_ENCAPLIMIT) { + err = EINVAL; + break; + } + if (value != iptun->iptun_encaplimit) { + iptun->iptun_encaplimit = (uint8_t)value; + iptun_headergen(iptun, B_TRUE); + } + break; + case MAC_PROP_MTU: { + uint32_t maxmtu = iptun_get_maxmtu(iptun, 0); + + if (value < iptun->iptun_typeinfo->iti_minmtu || + value > maxmtu) { + err = EINVAL; + break; + } + iptun->iptun_flags |= IPTUN_FIXED_MTU; + if (value != iptun->iptun_mtu) { + iptun->iptun_mtu = value; + iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE); + } + break; + } + default: + err = EINVAL; + } + iptun_exit(iptun); + return (err); +} + +/* ARGSUSED */ +static int +iptun_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, + uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) +{ + iptun_t *iptun = barg; + mac_propval_range_t range; + boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); + boolean_t is_possible = (pr_flags & MAC_PROP_POSSIBLE); + int err; + + if ((err = iptun_enter(iptun)) != 0) + return (err); + + if ((pr_flags & ~(MAC_PROP_DEFAULT | MAC_PROP_POSSIBLE)) != 0) { + err = ENOTSUP; + goto done; + } + if (is_default && is_possible) { + err = EINVAL; + goto done; + } + + *perm = MAC_PROP_PERM_RW; + + if (is_possible) { + if (pr_valsize < sizeof (mac_propval_range_t)) { + err = EINVAL; + goto done; + } + range.mpr_count = 1; + range.mpr_type = MAC_PROPVAL_UINT32; + } else if (pr_valsize < sizeof (uint32_t)) { + err = EINVAL; + goto done; + } + + switch (pr_num) { + case MAC_PROP_IPTUN_HOPLIMIT: + if (is_possible) { + range.range_uint32[0].mpur_min = IPTUN_MIN_HOPLIMIT; + range.range_uint32[0].mpur_max = IPTUN_MAX_HOPLIMIT; + } else if (is_default) { + *(uint32_t *)pr_val = IPTUN_DEFAULT_HOPLIMIT; + } else { + *(uint32_t *)pr_val = iptun->iptun_hoplimit; + } + break; + case MAC_PROP_IPTUN_ENCAPLIMIT: + if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6) { + err = ENOTSUP; + goto done; + } + if (is_possible) { + range.range_uint32[0].mpur_min = IPTUN_MIN_ENCAPLIMIT; + range.range_uint32[0].mpur_max = IPTUN_MAX_ENCAPLIMIT; + } else if (is_default) { + *(uint32_t *)pr_val = IPTUN_DEFAULT_ENCAPLIMIT; + } else { + *(uint32_t *)pr_val = iptun->iptun_encaplimit; + } + break; + case MAC_PROP_MTU: { + uint32_t maxmtu = iptun_get_maxmtu(iptun, 0); + + if (is_possible) { + range.range_uint32[0].mpur_min = + iptun->iptun_typeinfo->iti_minmtu; + range.range_uint32[0].mpur_max = maxmtu; + } else { + /* + * The MAC module knows the current value and should + * never call us for it. There is also no default + * MTU, as by default, it is a dynamic property. + */ + err = ENOTSUP; + goto done; + } + break; + } + default: + err = EINVAL; + goto done; + } + if (is_possible) + bcopy(&range, pr_val, sizeof (range)); +done: + iptun_exit(iptun); + return (err); +} + +uint_t +iptun_count(void) +{ + return (iptun_tunnelcount); +} + +/* + * Enter an iptun_t exclusively. This is essentially just a mutex, but we + * don't allow iptun_enter() to succeed on a tunnel if it's in the process of + * being deleted. + */ +static int +iptun_enter(iptun_t *iptun) +{ + mutex_enter(&iptun->iptun_lock); + while (iptun->iptun_flags & IPTUN_DELETE_PENDING) + cv_wait(&iptun->iptun_enter_cv, &iptun->iptun_lock); + if (iptun->iptun_flags & IPTUN_CONDEMNED) { + mutex_exit(&iptun->iptun_lock); + return (ENOENT); + } + return (0); +} + +/* + * Exit the tunnel entered in iptun_enter(). + */ +static void +iptun_exit(iptun_t *iptun) +{ + mutex_exit(&iptun->iptun_lock); +} + +/* + * Enter the IP tunnel instance by datalink ID. + */ +static int +iptun_enter_by_linkid(datalink_id_t linkid, iptun_t **iptun) +{ + int err; + + mutex_enter(&iptun_hash_lock); + if (mod_hash_find(iptun_hash, IPTUN_HASH_KEY(linkid), + (mod_hash_val_t *)iptun) == 0) + err = iptun_enter(*iptun); + else + err = ENOENT; + if (err != 0) + *iptun = NULL; + mutex_exit(&iptun_hash_lock); + return (err); +} + +/* + * Handle tasks that were deferred through the iptun_taskq. These fall into + * two categories: + * + * 1. Tasks that were defered because we didn't want to spend time doing them + * while in the data path. Only IPTUN_TASK_PMTU_UPDATE falls into this + * category. + * + * 2. Tasks that were defered because they require calling up to the mac + * module, and we can't call up to the mac module while holding locks. + * + * Handling 1 is easy; we just lookup the iptun_t, perform the task, exit the + * tunnel, and we're done. + * + * Handling 2 is tricky to get right without introducing race conditions and + * deadlocks with the mac module, as we cannot issue an upcall while in the + * iptun_t. The reason is that upcalls may try and enter the mac perimeter, + * while iptun callbacks (such as iptun_m_setprop()) called from the mac + * module will already have the perimeter held, and will then try and enter + * the iptun_t. You can see the lock ordering problem with this; this will + * deadlock. + * + * The safe way to do this is to enter the iptun_t in question and copy the + * information we need out of it so that we can exit it and know that the + * information being passed up to the upcalls won't be subject to modification + * by other threads. The problem now is that we need to exit it prior to + * issuing the upcall, but once we do this, a thread could come along and + * delete the iptun_t and thus the mac handle required to issue the upcall. + * To prevent this, we set the IPTUN_UPCALL_PENDING flag prior to exiting the + * iptun_t. This flag is the condition associated with iptun_upcall_cv, which + * iptun_delete() will cv_wait() on. When the upcall completes, we clear + * IPTUN_UPCALL_PENDING and cv_signal() any potentially waiting + * iptun_delete(). We can thus still safely use iptun->iptun_mh after having + * exited the iptun_t. + */ +static void +iptun_task_cb(void *arg) +{ + iptun_task_data_t *itd = arg; + iptun_task_t task = itd->itd_task; + datalink_id_t linkid = itd->itd_linkid; + iptun_t *iptun; + uint32_t mtu; + iptun_addr_t addr; + link_state_t linkstate; + size_t header_size; + iptun_header_t header; + + kmem_free(itd, sizeof (*itd)); + + /* + * Note that if the lookup fails, it's because the tunnel was deleted + * between the time the task was dispatched and now. That isn't an + * error. + */ + if (iptun_enter_by_linkid(linkid, &iptun) != 0) + return; + + if (task == IPTUN_TASK_PMTU_UPDATE) { + (void) iptun_update_mtu(iptun, 0); + iptun_exit(iptun); + return; + } + + iptun->iptun_flags |= IPTUN_UPCALL_PENDING; + + switch (task) { + case IPTUN_TASK_MTU_UPDATE: + mtu = iptun->iptun_mtu; + break; + case IPTUN_TASK_LADDR_UPDATE: + addr = iptun->iptun_laddr; + break; + case IPTUN_TASK_RADDR_UPDATE: + addr = iptun->iptun_raddr; + break; + case IPTUN_TASK_LINK_UPDATE: + linkstate = IS_IPTUN_RUNNING(iptun) ? + LINK_STATE_UP : LINK_STATE_DOWN; + break; + case IPTUN_TASK_PDATA_UPDATE: + header_size = iptun->iptun_header_size; + header = iptun->iptun_header; + break; + default: + ASSERT(0); + } + + iptun_exit(iptun); + + switch (task) { + case IPTUN_TASK_MTU_UPDATE: + (void) mac_maxsdu_update(iptun->iptun_mh, mtu); + break; + case IPTUN_TASK_LADDR_UPDATE: + mac_unicst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr); + break; + case IPTUN_TASK_RADDR_UPDATE: + mac_dst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr); + break; + case IPTUN_TASK_LINK_UPDATE: + mac_link_update(iptun->iptun_mh, linkstate); + break; + case IPTUN_TASK_PDATA_UPDATE: + if (mac_pdata_update(iptun->iptun_mh, + header_size == 0 ? NULL : &header, header_size) != 0) + atomic_inc_64(&iptun->iptun_taskq_fail); + break; + } + + mutex_enter(&iptun->iptun_lock); + iptun->iptun_flags &= ~IPTUN_UPCALL_PENDING; + cv_signal(&iptun->iptun_upcall_cv); + mutex_exit(&iptun->iptun_lock); +} + +static void +iptun_task_dispatch(iptun_t *iptun, iptun_task_t iptun_task) +{ + iptun_task_data_t *itd; + + itd = kmem_alloc(sizeof (*itd), KM_NOSLEEP); + if (itd == NULL) { + atomic_inc_64(&iptun->iptun_taskq_fail); + return; + } + itd->itd_task = iptun_task; + itd->itd_linkid = iptun->iptun_linkid; + if (ddi_taskq_dispatch(iptun_taskq, iptun_task_cb, itd, DDI_NOSLEEP)) { + atomic_inc_64(&iptun->iptun_taskq_fail); + kmem_free(itd, sizeof (*itd)); + } +} + +/* + * Convert an iptun_addr_t to sockaddr_storage. + */ +static void +iptun_getaddr(iptun_addr_t *iptun_addr, struct sockaddr_storage *ss) +{ + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; + + bzero(ss, sizeof (*ss)); + switch (iptun_addr->ia_family) { + case AF_INET: + sin = (struct sockaddr_in *)ss; + sin->sin_addr.s_addr = iptun_addr->ia_addr.iau_addr4; + break; + case AF_INET6: + sin6 = (struct sockaddr_in6 *)ss; + sin6->sin6_addr = iptun_addr->ia_addr.iau_addr6; + break; + default: + ASSERT(0); + } + ss->ss_family = iptun_addr->ia_family; +} + +/* + * General purpose function to set an IP tunnel source or destination address. + */ +static int +iptun_setaddr(iptun_type_t iptun_type, iptun_addr_t *iptun_addr, + const struct sockaddr_storage *ss) +{ + if (!IPTUN_ADDR_MATCH(iptun_type, ss->ss_family)) + return (EINVAL); + + switch (ss->ss_family) { + case AF_INET: { + struct sockaddr_in *sin = (struct sockaddr_in *)ss; + + if ((sin->sin_addr.s_addr == INADDR_ANY) || + (sin->sin_addr.s_addr == INADDR_BROADCAST) || + CLASSD(sin->sin_addr.s_addr)) { + return (EADDRNOTAVAIL); + } + iptun_addr->ia_addr.iau_addr4 = sin->sin_addr.s_addr; + break; + } + case AF_INET6: { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss; + + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || + IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || + IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { + return (EADDRNOTAVAIL); + } + iptun_addr->ia_addr.iau_addr6 = sin6->sin6_addr; + break; + } + default: + return (EAFNOSUPPORT); + } + iptun_addr->ia_family = ss->ss_family; + return (0); +} + +static int +iptun_setladdr(iptun_t *iptun, const struct sockaddr_storage *laddr) +{ + return (iptun_setaddr(iptun->iptun_typeinfo->iti_type, + &iptun->iptun_laddr, laddr)); +} + +static int +iptun_setraddr(iptun_t *iptun, const struct sockaddr_storage *raddr) +{ + if (!(iptun->iptun_typeinfo->iti_hasraddr)) + return (EINVAL); + return (iptun_setaddr(iptun->iptun_typeinfo->iti_type, + &iptun->iptun_raddr, raddr)); +} + +static boolean_t +iptun_canbind(iptun_t *iptun) +{ + /* + * A tunnel may bind when its source address has been set, and if its + * tunnel type requires one, also its destination address. + */ + return ((iptun->iptun_flags & IPTUN_LADDR) && + ((iptun->iptun_flags & IPTUN_RADDR) || + !(iptun->iptun_typeinfo->iti_hasraddr))); +} + +static int +iptun_bind(iptun_t *iptun) +{ + conn_t *connp = iptun->iptun_connp; + int err; + + ASSERT(iptun_canbind(iptun)); + + switch (iptun->iptun_typeinfo->iti_type) { + case IPTUN_TYPE_IPV4: + /* + * When we set a tunnel's destination address, we do not care + * if the destination is reachable. Transient routing issues + * should not inhibit the creation of a tunnel interface, for + * example. For that reason, we pass in B_FALSE for the + * verify_dst argument of ip_proto_bind_connected_v4() (and + * similarly for IPv6 tunnels below). + */ + err = ip_proto_bind_connected_v4(connp, NULL, IPPROTO_ENCAP, + &iptun->iptun_laddr4, 0, iptun->iptun_raddr4, 0, B_TRUE, + B_FALSE, iptun->iptun_cred); + break; + case IPTUN_TYPE_IPV6: + err = ip_proto_bind_connected_v6(connp, NULL, IPPROTO_IPV6, + &iptun->iptun_laddr6, 0, &iptun->iptun_raddr6, NULL, 0, + B_TRUE, B_FALSE, iptun->iptun_cred); + break; + case IPTUN_TYPE_6TO4: + err = ip_proto_bind_laddr_v4(connp, NULL, IPPROTO_IPV6, + iptun->iptun_laddr4, 0, B_TRUE); + break; + } + + if (err == 0) { + iptun->iptun_flags |= IPTUN_BOUND; + + /* + * Now that we're bound with ip below us, this is a good time + * to initialize the destination path MTU and to re-calculate + * the tunnel's link MTU. + */ + (void) iptun_update_mtu(iptun, 0); + + if (IS_IPTUN_RUNNING(iptun)) + iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); + } + return (err); +} + +static void +iptun_unbind(iptun_t *iptun) +{ + ASSERT(iptun->iptun_flags & IPTUN_BOUND); + ASSERT(mutex_owned(&iptun->iptun_lock) || + (iptun->iptun_flags & IPTUN_CONDEMNED)); + ip_unbind(iptun->iptun_connp); + iptun->iptun_flags &= ~IPTUN_BOUND; + if (!(iptun->iptun_flags & IPTUN_CONDEMNED)) + iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); +} + +/* + * Re-generate the template data-link header for a given IP tunnel given the + * tunnel's current parameters. + */ +static void +iptun_headergen(iptun_t *iptun, boolean_t update_mac) +{ + switch (iptun->iptun_typeinfo->iti_ipvers) { + case IPV4_VERSION: + /* + * We only need to use a custom IP header if the administrator + * has supplied a non-default hoplimit. + */ + if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT) { + iptun->iptun_header_size = 0; + break; + } + iptun->iptun_header_size = sizeof (ipha_t); + iptun->iptun_header4.ipha_version_and_hdr_length = + IP_SIMPLE_HDR_VERSION; + iptun->iptun_header4.ipha_fragment_offset_and_flags = + htons(IPH_DF); + iptun->iptun_header4.ipha_ttl = iptun->iptun_hoplimit; + break; + case IPV6_VERSION: { + ip6_t *ip6hp = &iptun->iptun_header6.it6h_ip6h; + + /* + * We only need to use a custom IPv6 header if either the + * administrator has supplied a non-default hoplimit, or we + * need to include an encapsulation limit option in the outer + * header. + */ + if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT && + iptun->iptun_encaplimit == 0) { + iptun->iptun_header_size = 0; + break; + } + + (void) memset(ip6hp, 0, sizeof (*ip6hp)); + if (iptun->iptun_encaplimit == 0) { + iptun->iptun_header_size = sizeof (ip6_t); + ip6hp->ip6_nxt = IPPROTO_NONE; + } else { + iptun_encaplim_t *iel; + + iptun->iptun_header_size = sizeof (iptun_ipv6hdrs_t); + /* + * The mac_ipv6 plugin requires ip6_plen to be in host + * byte order and reflect the extension headers + * present in the template. The actual network byte + * order ip6_plen will be set on a per-packet basis on + * transmit. + */ + ip6hp->ip6_plen = sizeof (*iel); + ip6hp->ip6_nxt = IPPROTO_DSTOPTS; + iel = &iptun->iptun_header6.it6h_encaplim; + *iel = iptun_encaplim_init; + iel->iel_telopt.ip6ot_encap_limit = + iptun->iptun_encaplimit; + } + + ip6hp->ip6_hlim = iptun->iptun_hoplimit; + break; + } + } + + if (update_mac) + iptun_task_dispatch(iptun, IPTUN_TASK_PDATA_UPDATE); +} + +/* + * Insert inbound and outbound IPv4 and IPv6 policy into the given policy + * head. + */ +static boolean_t +iptun_insert_simple_policies(ipsec_policy_head_t *ph, ipsec_act_t *actp, + uint_t n, netstack_t *ns) +{ + int f = IPSEC_AF_V4; + + if (!ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) || + !ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns)) + return (B_FALSE); + + f = IPSEC_AF_V6; + return (ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) && + ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns)); +} + +/* + * Used to set IPsec policy when policy is set through the IPTUN_CREATE or + * IPTUN_MODIFY ioctls. + */ +static int +iptun_set_sec_simple(iptun_t *iptun, const ipsec_req_t *ipsr) +{ + int rc = 0; + uint_t nact; + ipsec_act_t *actp = NULL; + boolean_t clear_all, old_policy = B_FALSE; + ipsec_tun_pol_t *itp; + char name[MAXLINKNAMELEN]; + uint64_t gen; + netstack_t *ns = iptun->iptun_ns; + + /* Can't specify self-encap on a tunnel. */ + if (ipsr->ipsr_self_encap_req != 0) + return (EINVAL); + + /* + * If it's a "clear-all" entry, unset the security flags and resume + * normal cleartext (or inherit-from-global) policy. + */ + clear_all = ((ipsr->ipsr_ah_req & IPTUN_IPSEC_REQ_MASK) == 0 && + (ipsr->ipsr_esp_req & IPTUN_IPSEC_REQ_MASK) == 0); + + ASSERT(mutex_owned(&iptun->iptun_lock)); + itp = iptun->iptun_itp; + if (itp == NULL) { + if (clear_all) + goto bail; + if ((rc = dls_mgmt_get_linkinfo(iptun->iptun_linkid, name, NULL, + NULL, NULL)) != 0) + goto bail; + ASSERT(name[0] != '\0'); + if ((itp = create_tunnel_policy(name, &rc, &gen, ns)) == NULL) + goto bail; + iptun->iptun_itp = itp; + } + + /* Allocate the actvec now, before holding itp or polhead locks. */ + ipsec_actvec_from_req(ipsr, &actp, &nact, ns); + if (actp == NULL) { + rc = ENOMEM; + goto bail; + } + + /* + * Just write on the active polhead. Save the primary/secondary stuff + * for spdsock operations. + * + * Mutex because we need to write to the polhead AND flags atomically. + * Other threads will acquire the polhead lock as a reader if the + * (unprotected) flag is set. + */ + mutex_enter(&itp->itp_lock); + if (itp->itp_flags & ITPF_P_TUNNEL) { + /* Oops, we lost a race. Let's get out of here. */ + rc = EBUSY; + goto mutex_bail; + } + old_policy = ((itp->itp_flags & ITPF_P_ACTIVE) != 0); + + if (old_policy) { + ITPF_CLONE(itp->itp_flags); + rc = ipsec_copy_polhead(itp->itp_policy, itp->itp_inactive, ns); + if (rc != 0) { + /* inactive has already been cleared. */ + itp->itp_flags &= ~ITPF_IFLAGS; + goto mutex_bail; + } + rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); + ipsec_polhead_flush(itp->itp_policy, ns); + } else { + /* Else assume itp->itp_policy is already flushed. */ + rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); + } + + if (clear_all) { + ASSERT(avl_numnodes(&itp->itp_policy->iph_rulebyid) == 0); + itp->itp_flags &= ~ITPF_PFLAGS; + rw_exit(&itp->itp_policy->iph_lock); + old_policy = B_FALSE; /* Clear out the inactive one too. */ + goto recover_bail; + } + + if (iptun_insert_simple_policies(itp->itp_policy, actp, nact, ns)) { + rw_exit(&itp->itp_policy->iph_lock); + /* + * Adjust MTU and make sure the DL side knows what's up. + */ + itp->itp_flags = ITPF_P_ACTIVE; + (void) iptun_update_mtu(iptun, 0); + old_policy = B_FALSE; /* Blank out inactive - we succeeded */ + } else { + rw_exit(&itp->itp_policy->iph_lock); + rc = ENOMEM; + } + +recover_bail: + if (old_policy) { + /* Recover policy in in active polhead. */ + ipsec_swap_policy(itp->itp_policy, itp->itp_inactive, ns); + ITPF_SWAP(itp->itp_flags); + } + + /* Clear policy in inactive polhead. */ + itp->itp_flags &= ~ITPF_IFLAGS; + rw_enter(&itp->itp_inactive->iph_lock, RW_WRITER); + ipsec_polhead_flush(itp->itp_inactive, ns); + rw_exit(&itp->itp_inactive->iph_lock); + +mutex_bail: + mutex_exit(&itp->itp_lock); + +bail: + if (actp != NULL) + ipsec_actvec_free(actp, nact); + + return (rc); +} + +static iptun_typeinfo_t * +iptun_gettypeinfo(iptun_type_t type) +{ + int i; + + for (i = 0; iptun_type_table[i].iti_type != IPTUN_TYPE_UNKNOWN; i++) { + if (iptun_type_table[i].iti_type == type) + break; + } + return (&iptun_type_table[i]); +} + +/* + * Set the parameters included in ik on the tunnel iptun. Parameters that can + * only be set at creation time are set in iptun_create(). + */ +static int +iptun_setparams(iptun_t *iptun, const iptun_kparams_t *ik) +{ + int err = 0; + netstack_t *ns = iptun->iptun_ns; + iptun_addr_t orig_laddr, orig_raddr; + uint_t orig_flags = iptun->iptun_flags; + + if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR) { + if (orig_flags & IPTUN_LADDR) + orig_laddr = iptun->iptun_laddr; + if ((err = iptun_setladdr(iptun, &ik->iptun_kparam_laddr)) != 0) + return (err); + iptun->iptun_flags |= IPTUN_LADDR; + } + + if (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) { + if (orig_flags & IPTUN_RADDR) + orig_raddr = iptun->iptun_raddr; + if ((err = iptun_setraddr(iptun, &ik->iptun_kparam_raddr)) != 0) + goto done; + iptun->iptun_flags |= IPTUN_RADDR; + } + + if (ik->iptun_kparam_flags & IPTUN_KPARAM_SECINFO) { + /* + * Set IPsec policy originating from the ifconfig(1M) command + * line. This is traditionally called "simple" policy because + * the ipsec_req_t (iptun_kparam_secinfo) can only describe a + * simple policy of "do ESP on everything" and/or "do AH on + * everything" (as opposed to the rich policy that can be + * defined with ipsecconf(1M)). + */ + if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4) { + /* + * Can't set security properties for automatic + * tunnels. + */ + err = EINVAL; + goto done; + } + + if (!ipsec_loaded(ns->netstack_ipsec)) { + /* If IPsec can be loaded, try and load it now. */ + if (ipsec_failed(ns->netstack_ipsec)) { + err = EPROTONOSUPPORT; + goto done; + } + ipsec_loader_loadnow(ns->netstack_ipsec); + /* + * ipsec_loader_loadnow() returns while IPsec is + * loaded asynchronously. While a method exists to + * wait for IPsec to load (ipsec_loader_wait()), it + * requires use of a STREAMS queue to do a qwait(). + * We're not in STREAMS context here, and so we can't + * use it. This is not a problem in practice because + * in the vast majority of cases, key management and + * global policy will have loaded before any tunnels + * are plumbed, and so IPsec will already have been + * loaded. + */ + err = EAGAIN; + goto done; + } + + err = iptun_set_sec_simple(iptun, &ik->iptun_kparam_secinfo); + if (err == 0) { + iptun->iptun_flags |= IPTUN_SIMPLE_POLICY; + iptun->iptun_simple_policy = ik->iptun_kparam_secinfo; + } + } +done: + if (err != 0) { + /* Restore original source and destination. */ + if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR && + (orig_flags & IPTUN_LADDR)) + iptun->iptun_laddr = orig_laddr; + if ((ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) && + (orig_flags & IPTUN_RADDR)) + iptun->iptun_raddr = orig_raddr; + iptun->iptun_flags = orig_flags; + } + return (err); +} + +static int +iptun_register(iptun_t *iptun) +{ + mac_register_t *mac; + int err; + + ASSERT(!(iptun->iptun_flags & IPTUN_MAC_REGISTERED)); + + if ((mac = mac_alloc(MAC_VERSION)) == NULL) + return (EINVAL); + + mac->m_type_ident = iptun->iptun_typeinfo->iti_ident; + mac->m_driver = iptun; + mac->m_dip = iptun_dip; + mac->m_instance = (uint_t)-1; + mac->m_src_addr = (uint8_t *)&iptun->iptun_laddr.ia_addr; + mac->m_dst_addr = iptun->iptun_typeinfo->iti_hasraddr ? + (uint8_t *)&iptun->iptun_raddr.ia_addr : NULL; + mac->m_callbacks = &iptun_m_callbacks; + mac->m_min_sdu = iptun->iptun_typeinfo->iti_minmtu; + mac->m_max_sdu = iptun->iptun_mtu; + if (iptun->iptun_header_size != 0) { + mac->m_pdata = &iptun->iptun_header; + mac->m_pdata_size = iptun->iptun_header_size; + } + if ((err = mac_register(mac, &iptun->iptun_mh)) == 0) + iptun->iptun_flags |= IPTUN_MAC_REGISTERED; + mac_free(mac); + return (err); +} + +static int +iptun_unregister(iptun_t *iptun) +{ + int err; + + ASSERT(iptun->iptun_flags & IPTUN_MAC_REGISTERED); + if ((err = mac_unregister(iptun->iptun_mh)) == 0) + iptun->iptun_flags &= ~IPTUN_MAC_REGISTERED; + return (err); +} + +static conn_t * +iptun_conn_create(iptun_t *iptun, netstack_t *ns, cred_t *credp) +{ + conn_t *connp; + + if ((connp = ipcl_conn_create(IPCL_IPCCONN, KM_NOSLEEP, ns)) == NULL) + return (NULL); + + connp->conn_flags |= IPCL_IPTUN; + connp->conn_iptun = iptun; + connp->conn_recv = iptun_input; + connp->conn_rq = ns->netstack_iptun->iptuns_g_q; + connp->conn_wq = WR(connp->conn_rq); + /* + * For exclusive stacks we set conn_zoneid to GLOBAL_ZONEID as is done + * for all other conn_t's. + * + * Note that there's an important distinction between iptun_zoneid and + * conn_zoneid. The conn_zoneid is set to GLOBAL_ZONEID in non-global + * exclusive stack zones to make the ip module believe that the + * non-global zone is actually a global zone. Therefore, when + * interacting with the ip module, we must always use conn_zoneid. + */ + connp->conn_zoneid = (ns->netstack_stackid == GLOBAL_NETSTACKID) ? + crgetzoneid(credp) : GLOBAL_ZONEID; + connp->conn_cred = credp; + /* crfree() is done in ipcl_conn_destroy(), called by CONN_DEC_REF() */ + crhold(connp->conn_cred); + + connp->conn_send = iptun->iptun_typeinfo->iti_txfunc; + connp->conn_af_isv6 = iptun->iptun_typeinfo->iti_ipvers == IPV6_VERSION; + ASSERT(connp->conn_ref == 1); + + mutex_enter(&connp->conn_lock); + connp->conn_state_flags &= ~CONN_INCIPIENT; + mutex_exit(&connp->conn_lock); + return (connp); +} + +static void +iptun_conn_destroy(conn_t *connp) +{ + ip_quiesce_conn(connp); + connp->conn_iptun = NULL; + ASSERT(connp->conn_ref == 1); + CONN_DEC_REF(connp); +} + +static int +iptun_create_g_q(iptun_stack_t *iptuns, cred_t *credp) +{ + int err; + conn_t *connp; + + ASSERT(iptuns->iptuns_g_q == NULL); + /* + * The global queue for this stack is set when iptunq_open() calls + * iptun_set_g_q(). + */ + err = ldi_open_by_name(IPTUNQ_DEV, FWRITE|FREAD, credp, + &iptuns->iptuns_g_q_lh, iptun_ldi_ident); + if (err == 0) { + connp = iptuns->iptuns_g_q->q_ptr; + connp->conn_recv = iptun_input; + } + return (err); +} + +static iptun_t * +iptun_alloc(void) +{ + iptun_t *iptun; + + if ((iptun = kmem_cache_alloc(iptun_cache, KM_NOSLEEP)) != NULL) { + bzero(iptun, sizeof (*iptun)); + atomic_inc_32(&iptun_tunnelcount); + } + return (iptun); +} + +static void +iptun_free(iptun_t *iptun) +{ + ASSERT(iptun->iptun_flags & IPTUN_CONDEMNED); + + if (iptun->iptun_flags & IPTUN_HASH_INSERTED) { + iptun_stack_t *iptuns = iptun->iptun_iptuns; + + mutex_enter(&iptun_hash_lock); + VERIFY(mod_hash_remove(iptun_hash, + IPTUN_HASH_KEY(iptun->iptun_linkid), + (mod_hash_val_t *)&iptun) == 0); + mutex_exit(&iptun_hash_lock); + iptun->iptun_flags &= ~IPTUN_HASH_INSERTED; + mutex_enter(&iptuns->iptuns_lock); + list_remove(&iptuns->iptuns_iptunlist, iptun); + mutex_exit(&iptuns->iptuns_lock); + } + + if (iptun->iptun_flags & IPTUN_BOUND) + iptun_unbind(iptun); + + /* + * After iptun_unregister(), there will be no threads executing a + * downcall from the mac module, including in the tx datapath. + */ + if (iptun->iptun_flags & IPTUN_MAC_REGISTERED) + VERIFY(iptun_unregister(iptun) == 0); + + if (iptun->iptun_itp != NULL) { + /* + * Remove from the AVL tree, AND release the reference iptun_t + * itself holds on the ITP. + */ + itp_unlink(iptun->iptun_itp, iptun->iptun_ns); + ITP_REFRELE(iptun->iptun_itp, iptun->iptun_ns); + iptun->iptun_itp = NULL; + iptun->iptun_flags &= ~IPTUN_SIMPLE_POLICY; + } + + /* + * After ipcl_conn_destroy(), there will be no threads executing an + * upcall from ip (i.e., iptun_input()), and it is then safe to free + * the iptun_t. + */ + if (iptun->iptun_connp != NULL) { + iptun_conn_destroy(iptun->iptun_connp); + iptun->iptun_connp = NULL; + } + + netstack_rele(iptun->iptun_ns); + iptun->iptun_ns = NULL; + crfree(iptun->iptun_cred); + iptun->iptun_cred = NULL; + + kmem_cache_free(iptun_cache, iptun); + atomic_dec_32(&iptun_tunnelcount); +} + +int +iptun_create(iptun_kparams_t *ik, cred_t *credp) +{ + iptun_t *iptun = NULL; + int err = 0, mherr; + char linkname[MAXLINKNAMELEN]; + ipsec_tun_pol_t *itp; + netstack_t *ns = NULL; + iptun_stack_t *iptuns; + datalink_id_t tmpid; + zoneid_t zoneid = crgetzoneid(credp); + boolean_t link_created = B_FALSE; + + /* The tunnel type is mandatory */ + if (!(ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE)) + return (EINVAL); + + /* + * Is the linkid that the caller wishes to associate with this new + * tunnel assigned to this zone? + */ + if (zone_check_datalink(&zoneid, ik->iptun_kparam_linkid) != 0) { + if (zoneid != GLOBAL_ZONEID) + return (EINVAL); + } else if (zoneid == GLOBAL_ZONEID) { + return (EINVAL); + } + + /* + * Make sure that we're not trying to create a tunnel that has already + * been created. + */ + if (iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun) == 0) { + iptun_exit(iptun); + iptun = NULL; + err = EEXIST; + goto done; + } + + ns = netstack_find_by_cred(credp); + iptuns = ns->netstack_iptun; + + /* + * Before we create any tunnel, we need to ensure that the default + * STREAMS queue (used to satisfy the ip module's requirement for one) + * is created. We only do this once per stack. The stream is closed + * when the stack is destroyed in iptun_stack_fni(). + */ + mutex_enter(&iptuns->iptuns_lock); + if (iptuns->iptuns_g_q == NULL) + err = iptun_create_g_q(iptuns, zone_kcred()); + mutex_exit(&iptuns->iptuns_lock); + if (err != 0) + goto done; + + if ((iptun = iptun_alloc()) == NULL) { + err = ENOMEM; + goto done; + } + + iptun->iptun_linkid = ik->iptun_kparam_linkid; + iptun->iptun_zoneid = zoneid; + crhold(credp); + iptun->iptun_cred = credp; + iptun->iptun_ns = ns; + + iptun->iptun_typeinfo = iptun_gettypeinfo(ik->iptun_kparam_type); + if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_UNKNOWN) { + err = EINVAL; + goto done; + } + + if (ik->iptun_kparam_flags & IPTUN_KPARAM_IMPLICIT) + iptun->iptun_flags |= IPTUN_IMPLICIT; + + if ((err = iptun_setparams(iptun, ik)) != 0) + goto done; + + iptun->iptun_hoplimit = IPTUN_DEFAULT_HOPLIMIT; + if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_IPV6) + iptun->iptun_encaplimit = IPTUN_DEFAULT_ENCAPLIMIT; + + iptun_headergen(iptun, B_FALSE); + + iptun->iptun_connp = iptun_conn_create(iptun, ns, credp); + if (iptun->iptun_connp == NULL) { + err = ENOMEM; + goto done; + } + + iptun->iptun_mtu = iptun->iptun_typeinfo->iti_maxmtu; + iptun->iptun_dpmtu = iptun->iptun_mtu; + + /* + * Find an ITP based on linkname. If we have parms already set via + * the iptun_setparams() call above, it may have created an ITP for + * us. We always try get_tunnel_policy() for DEBUG correctness + * checks, and we may wish to refactor this to only check when + * iptun_itp is NULL. + */ + if ((err = dls_mgmt_get_linkinfo(iptun->iptun_linkid, linkname, NULL, + NULL, NULL)) != 0) + goto done; + if ((itp = get_tunnel_policy(linkname, ns)) != NULL) + iptun->iptun_itp = itp; + + /* + * See if we have the necessary IP addresses assigned to this tunnel + * to try and bind them with ip underneath us. If we're not ready to + * bind yet, then we'll defer the bind operation until the addresses + * are modified. + */ + if (iptun_canbind(iptun) && ((err = iptun_bind(iptun)) != 0)) + goto done; + + if ((err = iptun_register(iptun)) != 0) + goto done; + + err = dls_devnet_create(iptun->iptun_mh, iptun->iptun_linkid, + iptun->iptun_zoneid); + if (err != 0) + goto done; + link_created = B_TRUE; + + /* + * We hash by link-id as that is the key used by all other iptun + * interfaces (modify, delete, etc.). + */ + if ((mherr = mod_hash_insert(iptun_hash, + IPTUN_HASH_KEY(iptun->iptun_linkid), (mod_hash_val_t)iptun)) == 0) { + mutex_enter(&iptuns->iptuns_lock); + list_insert_head(&iptuns->iptuns_iptunlist, iptun); + mutex_exit(&iptuns->iptuns_lock); + iptun->iptun_flags |= IPTUN_HASH_INSERTED; + } else if (mherr == MH_ERR_NOMEM) { + err = ENOMEM; + } else if (mherr == MH_ERR_DUPLICATE) { + err = EEXIST; + } else { + err = EINVAL; + } + +done: + if (iptun == NULL && ns != NULL) + netstack_rele(ns); + if (err != 0 && iptun != NULL) { + if (link_created) { + (void) dls_devnet_destroy(iptun->iptun_mh, &tmpid, + B_TRUE); + } + iptun->iptun_flags |= IPTUN_CONDEMNED; + iptun_free(iptun); + } + return (err); +} + +int +iptun_delete(datalink_id_t linkid, cred_t *credp) +{ + int err; + iptun_t *iptun = NULL; + + if ((err = iptun_enter_by_linkid(linkid, &iptun)) != 0) + return (err); + + /* One cannot delete a tunnel that belongs to another zone. */ + if (iptun->iptun_zoneid != crgetzoneid(credp)) { + iptun_exit(iptun); + return (EACCES); + } + + /* + * We need to exit iptun in order to issue calls up the stack such as + * dls_devnet_destroy(). If we call up while still in iptun, deadlock + * with calls coming down the stack is possible. We prevent other + * threads from entering this iptun after we've exited it by setting + * the IPTUN_DELETE_PENDING flag. This will cause callers of + * iptun_enter() to block waiting on iptun_enter_cv. The assumption + * here is that the functions we're calling while IPTUN_DELETE_PENDING + * is set dont resuult in an iptun_enter() call, as that would result + * in deadlock. + */ + iptun->iptun_flags |= IPTUN_DELETE_PENDING; + + /* Wait for any pending upcall to the mac module to complete. */ + while (iptun->iptun_flags & IPTUN_UPCALL_PENDING) + cv_wait(&iptun->iptun_upcall_cv, &iptun->iptun_lock); + + iptun_exit(iptun); + + if ((err = dls_devnet_destroy(iptun->iptun_mh, &linkid, B_TRUE)) == 0) { + /* + * mac_disable() will fail with EBUSY if there are references + * to the iptun MAC. If there are none, then mac_disable() + * will assure that none can be acquired until the MAC is + * unregistered. + * + * XXX CR 6791335 prevents us from calling mac_disable() prior + * to dls_devnet_destroy(), so we unfortunately need to + * attempt to re-create the devnet node if mac_disable() + * fails. + */ + if ((err = mac_disable(iptun->iptun_mh)) != 0) { + (void) dls_devnet_create(iptun->iptun_mh, linkid, + iptun->iptun_zoneid); + } + } + + /* + * Now that we know the fate of this iptun_t, we need to clear + * IPTUN_DELETE_PENDING, and set IPTUN_CONDEMNED if the iptun_t is + * slated to be freed. Either way, we need to signal the threads + * waiting in iptun_enter() so that they can either fail if + * IPTUN_CONDEMNED is set, or continue if it's not. + */ + mutex_enter(&iptun->iptun_lock); + iptun->iptun_flags &= ~IPTUN_DELETE_PENDING; + if (err == 0) + iptun->iptun_flags |= IPTUN_CONDEMNED; + cv_broadcast(&iptun->iptun_enter_cv); + mutex_exit(&iptun->iptun_lock); + + /* + * Note that there is no danger in calling iptun_free() after having + * dropped the iptun_lock since callers of iptun_enter() at this point + * are doing so from iptun_enter_by_linkid() (mac_disable() got rid of + * threads entering from mac callbacks which call iptun_enter() + * directly) which holds iptun_hash_lock, and iptun_free() grabs this + * lock in order to remove the iptun_t from the hash table. + */ + if (err == 0) + iptun_free(iptun); + + return (err); +} + +int +iptun_modify(const iptun_kparams_t *ik, cred_t *credp) +{ + iptun_t *iptun; + boolean_t laddr_change = B_FALSE, raddr_change = B_FALSE; + int err; + + if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0) + return (err); + + /* One cannot modify a tunnel that belongs to another zone. */ + if (iptun->iptun_zoneid != crgetzoneid(credp)) { + err = EACCES; + goto done; + } + + /* The tunnel type cannot be changed */ + if (ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE) { + err = EINVAL; + goto done; + } + + if ((err = iptun_setparams(iptun, ik)) != 0) + goto done; + iptun_headergen(iptun, B_FALSE); + + /* + * If any of the tunnel's addresses has been modified and the tunnel + * has the necessary addresses assigned to it, we need to try to bind + * with ip underneath us. If we're not ready to bind yet, then we'll + * try again when the addresses are modified later. + */ + laddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR); + raddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR); + if (laddr_change || raddr_change) { + if (iptun->iptun_flags & IPTUN_BOUND) + iptun_unbind(iptun); + if (iptun_canbind(iptun) && (err = iptun_bind(iptun)) != 0) { + if (laddr_change) + iptun->iptun_flags &= ~IPTUN_LADDR; + if (raddr_change) + iptun->iptun_flags &= ~IPTUN_RADDR; + goto done; + } + } + + if (laddr_change) + iptun_task_dispatch(iptun, IPTUN_TASK_LADDR_UPDATE); + if (raddr_change) + iptun_task_dispatch(iptun, IPTUN_TASK_RADDR_UPDATE); + +done: + iptun_exit(iptun); + return (err); +} + +/* Given an IP tunnel's datalink id, fill in its parameters. */ +int +iptun_info(iptun_kparams_t *ik, cred_t *credp) +{ + iptun_t *iptun; + int err; + + /* Is the tunnel link visible from the caller's zone? */ + if (!dls_devnet_islinkvisible(ik->iptun_kparam_linkid, + crgetzoneid(credp))) + return (ENOENT); + + if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0) + return (err); + + bzero(ik, sizeof (iptun_kparams_t)); + + ik->iptun_kparam_linkid = iptun->iptun_linkid; + ik->iptun_kparam_type = iptun->iptun_typeinfo->iti_type; + ik->iptun_kparam_flags |= IPTUN_KPARAM_TYPE; + + if (iptun->iptun_flags & IPTUN_LADDR) { + iptun_getaddr(&iptun->iptun_laddr, &ik->iptun_kparam_laddr); + ik->iptun_kparam_flags |= IPTUN_KPARAM_LADDR; + } + if (iptun->iptun_flags & IPTUN_RADDR) { + iptun_getaddr(&iptun->iptun_raddr, &ik->iptun_kparam_raddr); + ik->iptun_kparam_flags |= IPTUN_KPARAM_RADDR; + } + + if (iptun->iptun_flags & IPTUN_IMPLICIT) + ik->iptun_kparam_flags |= IPTUN_KPARAM_IMPLICIT; + + if (iptun->iptun_itp != NULL) { + mutex_enter(&iptun->iptun_itp->itp_lock); + if (iptun->iptun_itp->itp_flags & ITPF_P_ACTIVE) { + ik->iptun_kparam_flags |= IPTUN_KPARAM_IPSECPOL; + if (iptun->iptun_flags & IPTUN_SIMPLE_POLICY) { + ik->iptun_kparam_flags |= IPTUN_KPARAM_SECINFO; + ik->iptun_kparam_secinfo = + iptun->iptun_simple_policy; + } + } + mutex_exit(&iptun->iptun_itp->itp_lock); + } + +done: + iptun_exit(iptun); + return (err); +} + +int +iptun_set_6to4relay(netstack_t *ns, ipaddr_t relay_addr) +{ + if (relay_addr == INADDR_BROADCAST || CLASSD(relay_addr)) + return (EADDRNOTAVAIL); + ns->netstack_iptun->iptuns_relay_rtr_addr = relay_addr; + return (0); +} + +void +iptun_get_6to4relay(netstack_t *ns, ipaddr_t *relay_addr) +{ + *relay_addr = ns->netstack_iptun->iptuns_relay_rtr_addr; +} + +void +iptun_set_policy(datalink_id_t linkid, ipsec_tun_pol_t *itp) +{ + iptun_t *iptun; + + if (iptun_enter_by_linkid(linkid, &iptun) != 0) + return; + if (iptun->iptun_itp != itp) { + ASSERT(iptun->iptun_itp == NULL); + ITP_REFHOLD(itp); + iptun->iptun_itp = itp; + /* IPsec policy means IPsec overhead, which means lower MTU. */ + (void) iptun_update_mtu(iptun, 0); + } + iptun_exit(iptun); +} + +/* + * Obtain the path MTU to the tunnel destination. + */ +static uint32_t +iptun_get_dst_pmtu(iptun_t *iptun) +{ + ire_t *ire = NULL; + ip_stack_t *ipst = iptun->iptun_ns->netstack_ip; + uint32_t pmtu = 0; + + /* + * We only obtain the destination IRE for tunnels that have a remote + * tunnel address. + */ + if (!(iptun->iptun_flags & IPTUN_RADDR)) + return (0); + + switch (iptun->iptun_typeinfo->iti_ipvers) { + case IPV4_VERSION: + ire = ire_route_lookup(iptun->iptun_raddr4, INADDR_ANY, + INADDR_ANY, 0, NULL, NULL, iptun->iptun_connp->conn_zoneid, + NULL, (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT), ipst); + break; + case IPV6_VERSION: + ire = ire_route_lookup_v6(&iptun->iptun_raddr6, NULL, NULL, 0, + NULL, NULL, iptun->iptun_connp->conn_zoneid, NULL, + (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT), ipst); + break; + } + + if (ire != NULL) { + pmtu = ire->ire_max_frag; + ire_refrele(ire); + } + return (pmtu); +} + +/* + * Returns the max of old_ovhd and the overhead associated with pol. + */ +static uint32_t +iptun_max_policy_overhead(ipsec_policy_t *pol, uint32_t old_ovhd) +{ + uint32_t new_ovhd = old_ovhd; + + while (pol != NULL) { + new_ovhd = max(new_ovhd, + ipsec_act_ovhd(&pol->ipsp_act->ipa_act)); + pol = pol->ipsp_hash.hash_next; + } + return (new_ovhd); +} + +static uint32_t +iptun_get_ipsec_overhead(iptun_t *iptun) +{ + ipsec_policy_root_t *ipr; + ipsec_policy_head_t *iph; + ipsec_policy_t *pol; + ipsec_selector_t sel; + int i; + uint32_t ipsec_ovhd = 0; + ipsec_tun_pol_t *itp = iptun->iptun_itp; + netstack_t *ns = iptun->iptun_ns; + + if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE)) { + /* + * Consult global policy, just in case. This will only work + * if we have both source and destination addresses to work + * with. + */ + if ((iptun->iptun_flags & (IPTUN_LADDR|IPTUN_RADDR)) != + (IPTUN_LADDR|IPTUN_RADDR)) + return (0); + + iph = ipsec_system_policy(ns); + bzero(&sel, sizeof (sel)); + sel.ips_isv4 = + (iptun->iptun_typeinfo->iti_ipvers == IPV4_VERSION); + switch (iptun->iptun_typeinfo->iti_ipvers) { + case IPV4_VERSION: + sel.ips_local_addr_v4 = iptun->iptun_laddr4; + sel.ips_remote_addr_v4 = iptun->iptun_raddr4; + break; + case IPV6_VERSION: + sel.ips_local_addr_v6 = iptun->iptun_laddr6; + sel.ips_remote_addr_v6 = iptun->iptun_raddr6; + break; + } + /* Check for both IPv4 and IPv6. */ + sel.ips_protocol = IPPROTO_ENCAP; + pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND, + &sel, ns); + if (pol != NULL) { + ipsec_ovhd = ipsec_act_ovhd(&pol->ipsp_act->ipa_act); + IPPOL_REFRELE(pol, ns); + } + sel.ips_protocol = IPPROTO_IPV6; + pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND, + &sel, ns); + if (pol != NULL) { + ipsec_ovhd = max(ipsec_ovhd, + ipsec_act_ovhd(&pol->ipsp_act->ipa_act)); + IPPOL_REFRELE(pol, ns); + } + IPPH_REFRELE(iph, ns); + } else { + /* + * Look through all of the possible IPsec actions for the + * tunnel, and find the largest potential IPsec overhead. + */ + iph = itp->itp_policy; + rw_enter(&iph->iph_lock, RW_READER); + ipr = &(iph->iph_root[IPSEC_TYPE_OUTBOUND]); + ipsec_ovhd = iptun_max_policy_overhead( + ipr->ipr_nonhash[IPSEC_AF_V4], 0); + ipsec_ovhd = iptun_max_policy_overhead( + ipr->ipr_nonhash[IPSEC_AF_V6], ipsec_ovhd); + for (i = 0; i < ipr->ipr_nchains; i++) { + ipsec_ovhd = iptun_max_policy_overhead( + ipr->ipr_hash[i].hash_head, ipsec_ovhd); + } + rw_exit(&iph->iph_lock); + } + + return (ipsec_ovhd); +} + +/* + * Calculate and return the maximum possible MTU for the given tunnel. + */ +static uint32_t +iptun_get_maxmtu(iptun_t *iptun, uint32_t new_pmtu) +{ + size_t header_size, ipsec_overhead; + uint32_t maxmtu, pmtu; + + /* + * Start with the path-MTU to the remote address, which is either + * provided as the new_pmtu argument, or obtained using + * iptun_get_dst_pmtu(). + */ + if (new_pmtu != 0) { + if (iptun->iptun_flags & IPTUN_RADDR) { + iptun->iptun_dpmtu = new_pmtu; + iptun->iptun_dpmtu_lastupdate = ddi_get_lbolt(); + } + pmtu = new_pmtu; + } else if (iptun->iptun_flags & IPTUN_RADDR) { + if ((pmtu = iptun_get_dst_pmtu(iptun)) == 0) { + /* + * We weren't able to obtain the path-MTU of the + * destination. Use the previous value. + */ + pmtu = iptun->iptun_dpmtu; + } else { + iptun->iptun_dpmtu = pmtu; + iptun->iptun_dpmtu_lastupdate = ddi_get_lbolt(); + } + } else { + /* + * We have no path-MTU information to go on, use the maximum + * possible value. + */ + pmtu = iptun->iptun_typeinfo->iti_maxmtu; + } + + /* + * Now calculate tunneling overhead and subtract that from the + * path-MTU information obtained above. + */ + if (iptun->iptun_header_size != 0) { + header_size = iptun->iptun_header_size; + } else { + switch (iptun->iptun_typeinfo->iti_ipvers) { + case IPV4_VERSION: + header_size = sizeof (ipha_t); + break; + case IPV6_VERSION: + header_size = sizeof (iptun_ipv6hdrs_t); + break; + } + } + + ipsec_overhead = iptun_get_ipsec_overhead(iptun); + + maxmtu = pmtu - (header_size + ipsec_overhead); + return (max(maxmtu, iptun->iptun_typeinfo->iti_minmtu)); +} + +/* + * Re-calculate the tunnel's MTU and notify the MAC layer of any change in + * MTU. The new_pmtu argument is the new path MTU to the tunnel destination + * to be used in the tunnel MTU calculation. Passing in 0 for new_pmtu causes + * the path MTU to be dynamically updated using iptun_update_pmtu(). + * + * If the calculated tunnel MTU is different than its previous value, then we + * notify the MAC layer above us of this change using mac_maxsdu_update(). + */ +static uint32_t +iptun_update_mtu(iptun_t *iptun, uint32_t new_pmtu) +{ + uint32_t newmtu; + + /* + * We return the current MTU without updating it if it was pegged to a + * static value using the MAC_PROP_MTU link property. + */ + if (iptun->iptun_flags & IPTUN_FIXED_MTU) + return (iptun->iptun_mtu); + + /* If the MTU isn't fixed, then use the maximum possible value. */ + newmtu = iptun_get_maxmtu(iptun, new_pmtu); + + /* + * We only dynamically adjust the tunnel MTU for tunnels with + * destinations because dynamic MTU calculations are based on the + * destination path-MTU. + */ + if ((iptun->iptun_flags & IPTUN_RADDR) && newmtu != iptun->iptun_mtu) { + iptun->iptun_mtu = newmtu; + if (iptun->iptun_flags & IPTUN_MAC_REGISTERED) + iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE); + } + + return (newmtu); +} + +/* + * Frees a packet or packet chain and bumps stat for each freed packet. + */ +static void +iptun_drop_pkt(mblk_t *mp, uint64_t *stat) +{ + mblk_t *pktmp; + + for (pktmp = mp; pktmp != NULL; pktmp = mp) { + mp = mp->b_next; + pktmp->b_next = NULL; + if (stat != NULL) + atomic_inc_64(stat); + freemsg(pktmp); + } +} + +/* + * Allocate and return a new mblk to hold an IP and ICMP header, and chain the + * original packet to its b_cont. Returns NULL on failure. + */ +static mblk_t * +iptun_build_icmperr(size_t hdrs_size, mblk_t *orig_pkt) +{ + mblk_t *icmperr_mp; + + if ((icmperr_mp = allocb_tmpl(hdrs_size, orig_pkt)) != NULL) { + icmperr_mp->b_wptr += hdrs_size; + /* tack on the offending packet */ + icmperr_mp->b_cont = orig_pkt; + } + return (icmperr_mp); +} + +/* + * Transmit an ICMP error. mp->b_rptr points at the packet to be included in + * the ICMP error. + */ +static void +iptun_sendicmp_v4(iptun_t *iptun, icmph_t *icmp, ipha_t *orig_ipha, mblk_t *mp) +{ + size_t orig_pktsize, hdrs_size; + mblk_t *icmperr_mp; + ipha_t *new_ipha; + icmph_t *new_icmp; + + orig_pktsize = msgdsize(mp); + hdrs_size = sizeof (ipha_t) + sizeof (icmph_t); + if ((icmperr_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) { + iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); + return; + } + + new_ipha = (ipha_t *)icmperr_mp->b_rptr; + new_icmp = (icmph_t *)(new_ipha + 1); + + new_ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION; + new_ipha->ipha_type_of_service = 0; + new_ipha->ipha_ident = 0; + new_ipha->ipha_fragment_offset_and_flags = 0; + new_ipha->ipha_ttl = orig_ipha->ipha_ttl; + new_ipha->ipha_protocol = IPPROTO_ICMP; + new_ipha->ipha_src = orig_ipha->ipha_dst; + new_ipha->ipha_dst = orig_ipha->ipha_src; + new_ipha->ipha_hdr_checksum = 0; /* will be computed by ip */ + new_ipha->ipha_length = htons(hdrs_size + orig_pktsize); + + *new_icmp = *icmp; + new_icmp->icmph_checksum = 0; + new_icmp->icmph_checksum = IP_CSUM(icmperr_mp, sizeof (ipha_t), 0); + + ip_output(iptun->iptun_connp, icmperr_mp, iptun->iptun_connp->conn_wq, + IP_WPUT); +} + +static void +iptun_sendicmp_v6(iptun_t *iptun, icmp6_t *icmp6, ip6_t *orig_ip6h, mblk_t *mp) +{ + size_t orig_pktsize, hdrs_size; + mblk_t *icmp6err_mp; + ip6_t *new_ip6h; + icmp6_t *new_icmp6; + + orig_pktsize = msgdsize(mp); + hdrs_size = sizeof (ip6_t) + sizeof (icmp6_t); + if ((icmp6err_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) { + iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); + return; + } + + new_ip6h = (ip6_t *)icmp6err_mp->b_rptr; + new_icmp6 = (icmp6_t *)(new_ip6h + 1); + + new_ip6h->ip6_vcf = orig_ip6h->ip6_vcf; + new_ip6h->ip6_plen = htons(sizeof (icmp6_t) + orig_pktsize); + new_ip6h->ip6_hops = orig_ip6h->ip6_hops; + new_ip6h->ip6_nxt = IPPROTO_ICMPV6; + new_ip6h->ip6_src = orig_ip6h->ip6_dst; + new_ip6h->ip6_dst = orig_ip6h->ip6_src; + + *new_icmp6 = *icmp6; + /* The checksum is calculated in ip_wput_ire_v6(). */ + new_icmp6->icmp6_cksum = new_ip6h->ip6_plen; + + ip_output_v6(iptun->iptun_connp, icmp6err_mp, + iptun->iptun_connp->conn_wq, IP_WPUT); +} + +static void +iptun_icmp_error_v4(iptun_t *iptun, ipha_t *orig_ipha, mblk_t *mp, + uint8_t type, uint8_t code) +{ + icmph_t icmp; + + bzero(&icmp, sizeof (icmp)); + icmp.icmph_type = type; + icmp.icmph_code = code; + + iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp); +} + +static void +iptun_icmp_fragneeded_v4(iptun_t *iptun, uint32_t newmtu, ipha_t *orig_ipha, + mblk_t *mp) +{ + icmph_t icmp; + + icmp.icmph_type = ICMP_DEST_UNREACHABLE; + icmp.icmph_code = ICMP_FRAGMENTATION_NEEDED; + icmp.icmph_du_zero = 0; + icmp.icmph_du_mtu = htons(newmtu); + + iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp); +} + +static void +iptun_icmp_error_v6(iptun_t *iptun, ip6_t *orig_ip6h, mblk_t *mp, + uint8_t type, uint8_t code, uint32_t offset) +{ + icmp6_t icmp6; + + bzero(&icmp6, sizeof (icmp6)); + icmp6.icmp6_type = type; + icmp6.icmp6_code = code; + if (type == ICMP6_PARAM_PROB) + icmp6.icmp6_pptr = htonl(offset); + + iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp); +} + +static void +iptun_icmp_toobig_v6(iptun_t *iptun, uint32_t newmtu, ip6_t *orig_ip6h, + mblk_t *mp) +{ + icmp6_t icmp6; + + icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; + icmp6.icmp6_code = 0; + icmp6.icmp6_mtu = htonl(newmtu); + + iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp); +} + +/* + * Determines if the packet pointed to by ipha or ip6h is an ICMP error. The + * mp argument is only used to do bounds checking. + */ +static boolean_t +is_icmp_error(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) +{ + uint16_t hlen; + + if (ipha != NULL) { + icmph_t *icmph; + + ASSERT(ip6h == NULL); + if (ipha->ipha_protocol != IPPROTO_ICMP) + return (B_FALSE); + + hlen = IPH_HDR_LENGTH(ipha); + icmph = (icmph_t *)((uint8_t *)ipha + hlen); + return (ICMP_IS_ERROR(icmph->icmph_type) || + icmph->icmph_type == ICMP_REDIRECT); + } else { + icmp6_t *icmp6; + uint8_t *nexthdrp; + + ASSERT(ip6h != NULL); + if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hlen, &nexthdrp) || + *nexthdrp != IPPROTO_ICMPV6) { + return (B_FALSE); + } + + icmp6 = (icmp6_t *)((uint8_t *)ip6h + hlen); + return (ICMP6_IS_ERROR(icmp6->icmp6_type) || + icmp6->icmp6_type == ND_REDIRECT); + } +} + +/* + * Find inner and outer IP headers from a tunneled packet as setup for calls + * into ipsec_tun_{in,out}bound(). + */ +static size_t +iptun_find_headers(mblk_t *mp, ipha_t **outer4, ipha_t **inner4, ip6_t **outer6, + ip6_t **inner6) +{ + ipha_t *ipha; + size_t outer_hlen; + size_t first_mblkl = MBLKL(mp); + mblk_t *inner_mp; + + /* + * Don't bother handling packets that don't have a full IP header in + * the fist mblk. For the input path, the ip module ensures that this + * won't happen, and on the output path, the IP tunneling MAC-type + * plugins ensure that this also won't happen. + */ + if (first_mblkl < sizeof (ipha_t)) + return (0); + ipha = (ipha_t *)(mp->b_rptr); + switch (IPH_HDR_VERSION(ipha)) { + case IPV4_VERSION: + *outer4 = ipha; + *outer6 = NULL; + outer_hlen = IPH_HDR_LENGTH(ipha); + break; + case IPV6_VERSION: + *outer4 = NULL; + *outer6 = (ip6_t *)ipha; + outer_hlen = ip_hdr_length_v6(mp, (ip6_t *)ipha); + break; + default: + return (0); + } + + if (first_mblkl < outer_hlen || + (first_mblkl == outer_hlen && mp->b_cont == NULL)) + return (0); + + /* + * We don't bother doing a pullup here since the outer header will + * just get stripped off soon on input anyway. We just want to ensure + * that the inner* pointer points to a full header. + */ + if (first_mblkl == outer_hlen) { + inner_mp = mp->b_cont; + ipha = (ipha_t *)inner_mp->b_rptr; + } else { + inner_mp = mp; + ipha = (ipha_t *)(mp->b_rptr + outer_hlen); + } + switch (IPH_HDR_VERSION(ipha)) { + case IPV4_VERSION: + if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ipha_t)) + return (0); + *inner4 = ipha; + *inner6 = NULL; + break; + case IPV6_VERSION: + if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ip6_t)) + return (0); + *inner4 = NULL; + *inner6 = (ip6_t *)ipha; + break; + default: + return (0); + } + + return (outer_hlen); +} + +/* + * Received ICMP error in response to an X over IPv4 packet that we + * transmitted. + * + * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of + * the following: + * + * [IPv4(0)][ICMPv4][IPv4(1)][IPv4(2)][ULP] + * + * or + * + * [IPv4(0)][ICMPv4][IPv4(1)][IPv6][ULP] + * + * And "outer4" will get set to IPv4(1), and inner[46] will correspond to + * whatever the very-inner packet is (IPv4(2) or IPv6). + */ +static void +iptun_input_icmp_v4(iptun_t *iptun, mblk_t *ipsec_mp, mblk_t *data_mp, + icmph_t *icmph) +{ + uint8_t *orig; + ipha_t *outer4, *inner4; + ip6_t *outer6, *inner6; + int outer_hlen; + uint8_t type, code; + + /* + * Change the db_type to M_DATA because subsequent operations assume + * the ICMP packet is M_DATA again (i.e. calls to msgdsize()). + */ + data_mp->b_datap->db_type = M_DATA; + + ASSERT(data_mp->b_cont == NULL); + /* + * Temporarily move b_rptr forward so that iptun_find_headers() can + * find headers in the ICMP packet payload. + */ + orig = data_mp->b_rptr; + data_mp->b_rptr = (uint8_t *)(icmph + 1); + /* + * The ip module ensures that ICMP errors contain at least the + * original IP header (otherwise, the error would never have made it + * here). + */ + ASSERT(MBLKL(data_mp) >= 0); + outer_hlen = iptun_find_headers(data_mp, &outer4, &inner4, &outer6, + &inner6); + ASSERT(outer6 == NULL); + data_mp->b_rptr = orig; + if (outer_hlen == 0) { + iptun_drop_pkt((ipsec_mp != NULL ? ipsec_mp : data_mp), + &iptun->iptun_ierrors); + return; + } + + /* Only ICMP errors due to tunneled packets should reach here. */ + ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP || + outer4->ipha_protocol == IPPROTO_IPV6); + + /* ipsec_tun_inbound() always frees ipsec_mp. */ + if (!ipsec_tun_inbound(ipsec_mp, &data_mp, iptun->iptun_itp, + inner4, inner6, outer4, outer6, -outer_hlen, + iptun->iptun_ns)) { + /* Callee did all of the freeing. */ + atomic_inc_64(&iptun->iptun_ierrors); + return; + } + /* We should never see reassembled fragment here. */ + ASSERT(data_mp->b_next == NULL); + + data_mp->b_rptr = (uint8_t *)outer4 + outer_hlen; + + /* + * If the original packet being transmitted was itself an ICMP error, + * then drop this packet. We don't want to generate an ICMP error in + * response to an ICMP error. + */ + if (is_icmp_error(data_mp, inner4, inner6)) { + iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); + return; + } + + switch (icmph->icmph_type) { + case ICMP_DEST_UNREACHABLE: + type = (inner4 != NULL ? icmph->icmph_type : ICMP6_DST_UNREACH); + switch (icmph->icmph_code) { + case ICMP_FRAGMENTATION_NEEDED: { + uint32_t newmtu; + + /* + * We reconcile this with the fact that the tunnel may + * also have IPsec policy by letting iptun_update_mtu + * take care of it. + */ + newmtu = + iptun_update_mtu(iptun, ntohs(icmph->icmph_du_mtu)); + + if (inner4 != NULL) { + iptun_icmp_fragneeded_v4(iptun, newmtu, inner4, + data_mp); + } else { + iptun_icmp_toobig_v6(iptun, newmtu, inner6, + data_mp); + } + return; + } + case ICMP_DEST_NET_UNREACH_ADMIN: + case ICMP_DEST_HOST_UNREACH_ADMIN: + code = (inner4 != NULL ? ICMP_DEST_NET_UNREACH_ADMIN : + ICMP6_DST_UNREACH_ADMIN); + break; + default: + code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE : + ICMP6_DST_UNREACH_ADDR); + break; + } + break; + case ICMP_TIME_EXCEEDED: + if (inner6 != NULL) { + type = ICMP6_TIME_EXCEEDED; + code = 0; + } /* else we're already set. */ + break; + case ICMP_PARAM_PROBLEM: + /* + * This is a problem with the outer header we transmitted. + * Treat this as an output error. + */ + iptun_drop_pkt(data_mp, &iptun->iptun_oerrors); + return; + default: + iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); + return; + } + + if (inner4 != NULL) + iptun_icmp_error_v4(iptun, inner4, data_mp, type, code); + else + iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0); +} + +/* + * Return B_TRUE if the IPv6 packet pointed to by ip6h contains a Tunnel + * Encapsulation Limit destination option. If there is one, set encaplim_ptr + * to point to the option value. + */ +static boolean_t +iptun_find_encaplimit(mblk_t *mp, ip6_t *ip6h, uint8_t **encaplim_ptr) +{ + ip6_pkt_t pkt; + uint8_t *endptr; + ip6_dest_t *destp; + struct ip6_opt *optp; + + pkt.ipp_fields = 0; /* must be initialized */ + (void) ip_find_hdr_v6(mp, ip6h, &pkt, NULL); + if ((pkt.ipp_fields & IPPF_DSTOPTS) != 0) { + destp = pkt.ipp_dstopts; + } else if ((pkt.ipp_fields & IPPF_RTDSTOPTS) != 0) { + destp = pkt.ipp_rtdstopts; + } else { + return (B_FALSE); + } + + endptr = (uint8_t *)destp + 8 * (destp->ip6d_len + 1); + optp = (struct ip6_opt *)(destp + 1); + while (endptr - (uint8_t *)optp > sizeof (*optp)) { + if (optp->ip6o_type == IP6OPT_TUNNEL_LIMIT) { + if ((uint8_t *)(optp + 1) >= endptr) + return (B_FALSE); + *encaplim_ptr = (uint8_t *)&optp[1]; + return (B_TRUE); + } + optp = (struct ip6_opt *)((uint8_t *)optp + optp->ip6o_len + 2); + } + return (B_FALSE); +} + +/* + * Received ICMPv6 error in response to an X over IPv6 packet that we + * transmitted. + * + * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of + * the following: + * + * [IPv6(0)][ICMPv6][IPv6(1)][IPv4][ULP] + * + * or + * + * [IPv6(0)][ICMPv6][IPv6(1)][IPv6(2)][ULP] + * + * And "outer6" will get set to IPv6(1), and inner[46] will correspond to + * whatever the very-inner packet is (IPv4 or IPv6(2)). + */ +static void +iptun_input_icmp_v6(iptun_t *iptun, mblk_t *ipsec_mp, mblk_t *data_mp, + icmp6_t *icmp6h) +{ + uint8_t *orig; + ipha_t *outer4, *inner4; + ip6_t *outer6, *inner6; + int outer_hlen; + uint8_t type, code; + + /* + * Change the db_type to M_DATA because subsequent operations assume + * the ICMP packet is M_DATA again (i.e. calls to msgdsize().) + */ + data_mp->b_datap->db_type = M_DATA; + + ASSERT(data_mp->b_cont == NULL); + + /* + * Temporarily move b_rptr forward so that iptun_find_headers() can + * find IP headers in the ICMP packet payload. + */ + orig = data_mp->b_rptr; + data_mp->b_rptr = (uint8_t *)(icmp6h + 1); + /* + * The ip module ensures that ICMP errors contain at least the + * original IP header (otherwise, the error would never have made it + * here). + */ + ASSERT(MBLKL(data_mp) >= 0); + outer_hlen = iptun_find_headers(data_mp, &outer4, &inner4, &outer6, + &inner6); + ASSERT(outer4 == NULL); + data_mp->b_rptr = orig; /* Restore r_ptr */ + if (outer_hlen == 0) { + iptun_drop_pkt((ipsec_mp != NULL ? ipsec_mp : data_mp), + &iptun->iptun_ierrors); + return; + } + + if (!ipsec_tun_inbound(ipsec_mp, &data_mp, iptun->iptun_itp, + inner4, inner6, outer4, outer6, -outer_hlen, + iptun->iptun_ns)) { + /* Callee did all of the freeing. */ + atomic_inc_64(&iptun->iptun_ierrors); + return; + } + /* We should never see reassembled fragment here. */ + ASSERT(data_mp->b_next == NULL); + + data_mp->b_rptr = (uint8_t *)outer6 + outer_hlen; + + /* + * If the original packet being transmitted was itself an ICMP error, + * then drop this packet. We don't want to generate an ICMP error in + * response to an ICMP error. + */ + if (is_icmp_error(data_mp, inner4, inner6)) { + iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); + return; + } + + switch (icmp6h->icmp6_type) { + case ICMP6_PARAM_PROB: { + uint8_t *encaplim_ptr; + + /* + * If the ICMPv6 error points to a valid Tunnel Encapsulation + * Limit option and the limit value is 0, then fall through + * and send a host unreachable message. Otherwise, treat the + * error as an output error, as there must have been a problem + * with a packet we sent. + */ + if (!iptun_find_encaplimit(data_mp, outer6, &encaplim_ptr) || + (icmp6h->icmp6_pptr != + ((ptrdiff_t)encaplim_ptr - (ptrdiff_t)outer6)) || + *encaplim_ptr != 0) { + iptun_drop_pkt(data_mp, &iptun->iptun_oerrors); + return; + } + /* FALLTHRU */ + } + case ICMP6_TIME_EXCEEDED: + case ICMP6_DST_UNREACH: + type = (inner4 != NULL ? ICMP_DEST_UNREACHABLE : + ICMP6_DST_UNREACH); + code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE : + ICMP6_DST_UNREACH_ADDR); + break; + case ICMP6_PACKET_TOO_BIG: { + uint32_t newmtu; + + /* + * We reconcile this with the fact that the tunnel may also + * have IPsec policy by letting iptun_update_mtu take care of + * it. + */ + newmtu = iptun_update_mtu(iptun, ntohl(icmp6h->icmp6_mtu)); + + if (inner4 != NULL) { + iptun_icmp_fragneeded_v4(iptun, newmtu, inner4, + data_mp); + } else { + iptun_icmp_toobig_v6(iptun, newmtu, inner6, data_mp); + } + return; + } + default: + iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); + return; + } + + if (inner4 != NULL) + iptun_icmp_error_v4(iptun, inner4, data_mp, type, code); + else + iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0); +} + +static void +iptun_input_icmp(iptun_t *iptun, mblk_t *ipsec_mp, mblk_t *data_mp) +{ + mblk_t *tmpmp; + size_t hlen; + + if (data_mp->b_cont != NULL) { + /* + * Since ICMP error processing necessitates access to bits + * that are within the ICMP error payload (the original packet + * that caused the error), pull everything up into a single + * block for convenience. + */ + data_mp->b_datap->db_type = M_DATA; + if ((tmpmp = msgpullup(data_mp, -1)) == NULL) { + iptun_drop_pkt((ipsec_mp != NULL ? ipsec_mp : data_mp), + &iptun->iptun_norcvbuf); + return; + } + freemsg(data_mp); + data_mp = tmpmp; + if (ipsec_mp != NULL) + ipsec_mp->b_cont = data_mp; + } + + switch (iptun->iptun_typeinfo->iti_ipvers) { + case IPV4_VERSION: + /* + * The outer IP header coming up from IP is always ipha_t + * alligned (otherwise, we would have crashed in ip). + */ + hlen = IPH_HDR_LENGTH((ipha_t *)data_mp->b_rptr); + iptun_input_icmp_v4(iptun, ipsec_mp, data_mp, + (icmph_t *)(data_mp->b_rptr + hlen)); + break; + case IPV6_VERSION: + hlen = ip_hdr_length_v6(data_mp, (ip6_t *)data_mp->b_rptr); + iptun_input_icmp_v6(iptun, ipsec_mp, data_mp, + (icmp6_t *)(data_mp->b_rptr + hlen)); + break; + } +} + +static boolean_t +iptun_in_6to4_ok(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6) +{ + ipaddr_t v4addr; + + /* + * Make sure that the IPv6 destination is within the site that this + * 6to4 tunnel is routing for. We don't want people bouncing random + * tunneled IPv6 packets through this 6to4 router. + */ + IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, (struct in_addr *)&v4addr); + if (outer4->ipha_dst != v4addr) + return (B_FALSE); + + if (IN6_IS_ADDR_6TO4(&inner6->ip6_src)) { + /* + * Section 9 of RFC 3056 (security considerations) suggests + * that when a packet is from a 6to4 site (i.e., it's not a + * global address being forwarded froma relay router), make + * sure that the packet was tunneled by that site's 6to4 + * router. + */ + IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr); + if (outer4->ipha_src != v4addr) + return (B_FALSE); + } else { + /* + * Only accept packets from a relay router if we've configured + * outbound relay router functionality. + */ + if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY) + return (B_FALSE); + } + + return (B_TRUE); +} + +/* + * Input function for everything that comes up from the ip module below us. + * This is called directly from the ip module via connp->conn_recv(). + * + * There are two kinds of packets that can arrive here: (1) IP-in-IP tunneled + * packets and (2) ICMP errors containing IP-in-IP packets transmitted by us. + * They have the following structure: + * + * 1) M_DATA + * 2) M_CTL[->M_DATA] + * + * (2) Is an M_CTL optionally followed by M_DATA, where the M_CTL block is the + * start of the actual ICMP packet (it doesn't contain any special control + * information). + * + * Either (1) or (2) can be IPsec-protected, in which case an M_CTL block + * containing an ipsec_in_t will have been prepended to either (1) or (2), + * making a total of four combinations of possible mblk chains: + * + * A) (1) + * B) (2) + * C) M_CTL(ipsec_in_t)->(1) + * D) M_CTL(ipsec_in_t)->(2) + */ +/* ARGSUSED */ +static void +iptun_input(void *arg, mblk_t *mp, void *arg2) +{ + conn_t *connp = arg; + iptun_t *iptun = connp->conn_iptun; + int outer_hlen; + ipha_t *outer4, *inner4; + ip6_t *outer6, *inner6; + mblk_t *data_mp = mp; + boolean_t ipsec = B_FALSE; + + ASSERT(IPCL_IS_IPTUN(connp)); + ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); + + if (DB_TYPE(mp) == M_CTL) { + if (((ipsec_in_t *)(mp->b_rptr))->ipsec_in_type != IPSEC_IN) { + iptun_input_icmp(iptun, NULL, mp); + return; + } + + data_mp = mp->b_cont; + if (DB_TYPE(data_mp) == M_CTL) { + /* Protected ICMP packet. */ + iptun_input_icmp(iptun, mp, data_mp); + return; + } + ipsec = B_TRUE; + } + + /* + * Request the destination's path MTU information regularly in case + * path MTU has increased. + */ + if (IPTUN_PMTU_TOO_OLD(iptun)) + iptun_task_dispatch(iptun, IPTUN_TASK_PMTU_UPDATE); + + if ((outer_hlen = iptun_find_headers(data_mp, &outer4, &inner4, &outer6, + &inner6)) == 0) + goto drop; + + /* + * If the system is labeled, we call tsol_check_dest() on the packet + * destination (our local tunnel address) to ensure that the packet as + * labeled should be allowed to be sent to us. We don't need to call + * the more involved tsol_receive_local() since the tunnel link itself + * cannot be assigned to shared-stack non-global zones. + */ + if (is_system_labeled()) { + cred_t *msg_cred; + + if ((msg_cred = msg_getcred(data_mp, NULL)) == NULL) + goto drop; + if (tsol_check_dest(msg_cred, (outer4 != NULL ? + (void *)&outer4->ipha_dst : (void *)&outer6->ip6_dst), + (outer4 != NULL ? IPV4_VERSION : IPV6_VERSION), + B_FALSE, NULL) != 0) + goto drop; + } + + if (ipsec) { + if (!ipsec_tun_inbound(mp, &data_mp, iptun->iptun_itp, inner4, + inner6, outer4, outer6, outer_hlen, iptun->iptun_ns)) { + /* Callee did all of the freeing. */ + return; + } + mp = data_mp; + } + + if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4 && + !iptun_in_6to4_ok(iptun, outer4, inner6)) + goto drop; + + /* + * We need to statistically account for each packet individually, so + * we might as well split up any b_next chains here. + */ + do { + mp = data_mp->b_next; + data_mp->b_next = NULL; + + atomic_inc_64(&iptun->iptun_ipackets); + atomic_add_64(&iptun->iptun_rbytes, msgdsize(data_mp)); + mac_rx(iptun->iptun_mh, NULL, data_mp); + + data_mp = mp; + } while (data_mp != NULL); + return; +drop: + iptun_drop_pkt(mp, &iptun->iptun_ierrors); +} + +/* + * Do 6to4-specific header-processing on output. Return B_TRUE if the packet + * was processed without issue, or B_FALSE if the packet had issues and should + * be dropped. + */ +static boolean_t +iptun_out_process_6to4(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6) +{ + ipaddr_t v4addr; + + /* + * IPv6 source must be a 6to4 address. This is because a conscious + * decision was made to not allow a Solaris system to be used as a + * relay router (for security reasons) when 6to4 was initially + * integrated. If this decision is ever reversed, the following check + * can be removed. + */ + if (!IN6_IS_ADDR_6TO4(&inner6->ip6_src)) + return (B_FALSE); + + /* + * RFC3056 mandates that the IPv4 source MUST be set to the IPv4 + * portion of the 6to4 IPv6 source address. In other words, make sure + * that we're tunneling packets from our own 6to4 site. + */ + IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr); + if (outer4->ipha_src != v4addr) + return (B_FALSE); + + /* + * Automatically set the destination of the outer IPv4 header as + * described in RFC3056. There are two possibilities: + * + * a. If the IPv6 destination is a 6to4 address, set the IPv4 address + * to the IPv4 portion of the 6to4 address. + * b. If the IPv6 destination is a native IPv6 address, set the IPv4 + * destination to the address of a relay router. + * + * Design Note: b shouldn't be necessary here, and this is a flaw in + * the design of the 6to4relay command. Instead of setting a 6to4 + * relay address in this module via an ioctl, the 6to4relay command + * could simply add a IPv6 route for native IPv6 addresses (such as a + * default route) in the forwarding table that uses a 6to4 destination + * as its next hop, and the IPv4 portion of that address could be a + * 6to4 relay address. In order for this to work, IP would have to + * resolve the next hop address, which would necessitate a link-layer + * address resolver for 6to4 links, which doesn't exist today. + * + * In fact, if a resolver existed for 6to4 links, then setting the + * IPv4 destination in the outer header could be done as part of + * link-layer address resolution and fast-path header generation, and + * not here. + */ + if (IN6_IS_ADDR_6TO4(&inner6->ip6_dst)) { + /* destination is a 6to4 router */ + IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, + (struct in_addr *)&outer4->ipha_dst); + } else { + /* + * The destination is a native IPv6 address. If output to a + * relay-router is enabled, use the relay-router's IPv4 + * address as the destination. + */ + if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY) + return (B_FALSE); + outer4->ipha_dst = iptun->iptun_iptuns->iptuns_relay_rtr_addr; + } + + /* + * If the outer source and destination are equal, this means that the + * 6to4 router somehow forwarded an IPv6 packet destined for its own + * 6to4 site to its 6to4 tunnel interface, which will result in this + * packet infinitely bouncing between ip and iptun. + */ + return (outer4->ipha_src != outer4->ipha_dst); +} + +/* + * Process output packets with outer IPv4 headers. Frees mp and bumps stat on + * error. + */ +static mblk_t * +iptun_out_process_ipv4(iptun_t *iptun, mblk_t *mp, ipha_t *outer4, + ipha_t *inner4, ip6_t *inner6) +{ + uint8_t *innerptr = (inner4 != NULL ? + (uint8_t *)inner4 : (uint8_t *)inner6); + size_t minmtu = (inner4 != NULL ? + IPTUN_MIN_IPV4_MTU : IPTUN_MIN_IPV6_MTU); + + if (inner4 != NULL) { + ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP); + /* + * Copy the tos from the inner IPv4 header. We mask off ECN + * bits (bits 6 and 7) because there is currently no + * tunnel-tunnel communication to determine if both sides + * support ECN. We opt for the safe choice: don't copy the + * ECN bits when doing encapsulation. + */ + outer4->ipha_type_of_service = + inner4->ipha_type_of_service & ~0x03; + } else { + ASSERT(outer4->ipha_protocol == IPPROTO_IPV6 && + inner6 != NULL); + + if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4 && + !iptun_out_process_6to4(iptun, outer4, inner6)) { + iptun_drop_pkt(mp, &iptun->iptun_oerrors); + return (NULL); + } + } + + /* + * As described in section 3.2.2 of RFC4213, if the packet payload is + * less than or equal to the minimum MTU size, then we need to allow + * IPv4 to fragment the packet. The reason is that even if we end up + * receiving an ICMP frag-needed, the interface above this tunnel + * won't be allowed to drop its MTU as a result, since the packet was + * already smaller than the smallest allowable MTU for that interface. + */ + if (mp->b_wptr - innerptr <= minmtu) + outer4->ipha_fragment_offset_and_flags = 0; + + outer4->ipha_length = htons(msgdsize(mp)); + + return (mp); +} + +/* + * Insert an encapsulation limit destination option in the packet provided. + * Always consumes the mp argument and returns a new mblk pointer. + */ +static mblk_t * +iptun_insert_encaplimit(iptun_t *iptun, mblk_t *mp, ip6_t *outer6, + uint8_t limit) +{ + mblk_t *newmp; + iptun_ipv6hdrs_t *newouter6; + + ASSERT(outer6->ip6_nxt == IPPROTO_IPV6); + ASSERT(mp->b_cont == NULL); + + mp->b_rptr += sizeof (ip6_t); + newmp = allocb_tmpl(sizeof (iptun_ipv6hdrs_t) + MBLKL(mp), mp); + if (newmp == NULL) { + iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); + return (NULL); + } + newmp->b_wptr += sizeof (iptun_ipv6hdrs_t); + /* Copy the payload (Starting with the inner IPv6 header). */ + bcopy(mp->b_rptr, newmp->b_wptr, MBLKL(mp)); + newmp->b_wptr += MBLKL(mp); + newouter6 = (iptun_ipv6hdrs_t *)newmp->b_rptr; + /* Now copy the outer IPv6 header. */ + bcopy(outer6, &newouter6->it6h_ip6h, sizeof (ip6_t)); + newouter6->it6h_ip6h.ip6_nxt = IPPROTO_DSTOPTS; + newouter6->it6h_encaplim = iptun_encaplim_init; + newouter6->it6h_encaplim.iel_destopt.ip6d_nxt = outer6->ip6_nxt; + newouter6->it6h_encaplim.iel_telopt.ip6ot_encap_limit = limit; + + /* + * The payload length will be set at the end of + * iptun_out_process_ipv6(). + */ + + freemsg(mp); + return (newmp); +} + +/* + * Process output packets with outer IPv6 headers. Frees mp and bumps stats + * on error. + */ +static mblk_t * +iptun_out_process_ipv6(iptun_t *iptun, mblk_t *mp, ip6_t *outer6, ip6_t *inner6) +{ + uint8_t *limit, *configlimit; + uint32_t offset; + iptun_ipv6hdrs_t *v6hdrs; + + if (inner6 != NULL && iptun_find_encaplimit(mp, inner6, &limit)) { + /* + * The inner packet is an IPv6 packet which itself contains an + * encapsulation limit option. The limit variable points to + * the value in the embedded option. Process the + * encapsulation limit option as specified in RFC 2473. + * + * If limit is 0, then we've exceeded the limit and we need to + * send back an ICMPv6 parameter problem message. + * + * If limit is > 0, then we decrement it by 1 and make sure + * that the encapsulation limit option in the outer header + * reflects that (adding an option if one isn't already + * there). + */ + ASSERT(limit > mp->b_rptr && limit < mp->b_wptr); + if (*limit == 0) { + mp->b_rptr = (uint8_t *)inner6; + offset = limit - mp->b_rptr; + iptun_icmp_error_v6(iptun, inner6, mp, ICMP6_PARAM_PROB, + 0, offset); + atomic_inc_64(&iptun->iptun_noxmtbuf); + return (NULL); + } + + /* + * The outer header requires an encapsulation limit option. + * If there isn't one already, add one. + */ + if (iptun->iptun_encaplimit == 0) { + if ((mp = iptun_insert_encaplimit(iptun, mp, outer6, + (*limit - 1))) == NULL) + return (NULL); + } else { + /* + * There is an existing encapsulation limit option in + * the outer header. If the inner encapsulation limit + * is less than the configured encapsulation limit, + * update the outer encapsulation limit to reflect + * this lesser value. + */ + v6hdrs = (iptun_ipv6hdrs_t *)mp->b_rptr; + configlimit = + &v6hdrs->it6h_encaplim.iel_telopt.ip6ot_encap_limit; + if ((*limit - 1) < *configlimit) + *configlimit = (*limit - 1); + } + } + + outer6->ip6_plen = htons(msgdsize(mp) - sizeof (ip6_t)); + return (mp); +} + +/* + * The IP tunneling MAC-type plugins have already done most of the header + * processing and validity checks. We are simply responsible for multiplexing + * down to the ip module below us. + */ +static void +iptun_output(iptun_t *iptun, mblk_t *mp) +{ + conn_t *connp = iptun->iptun_connp; + int outer_hlen; + mblk_t *newmp; + ipha_t *outer4, *inner4; + ip6_t *outer6, *inner6; + ipsec_tun_pol_t *itp = iptun->iptun_itp; + + ASSERT(mp->b_datap->db_type == M_DATA); + + if (mp->b_cont != NULL) { + if ((newmp = msgpullup(mp, -1)) == NULL) { + iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); + return; + } + freemsg(mp); + mp = newmp; + } + + outer_hlen = iptun_find_headers(mp, &outer4, &inner4, &outer6, &inner6); + if (outer_hlen == 0) { + iptun_drop_pkt(mp, &iptun->iptun_oerrors); + return; + } + + /* Perform header processing. */ + if (outer4 != NULL) + mp = iptun_out_process_ipv4(iptun, mp, outer4, inner4, inner6); + else + mp = iptun_out_process_ipv6(iptun, mp, outer6, inner6); + if (mp == NULL) + return; + + /* + * Let's hope the compiler optimizes this with "branch taken". + */ + if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) { + if ((mp = ipsec_tun_outbound(mp, iptun, inner4, inner6, outer4, + outer6, outer_hlen)) == NULL) { + /* ipsec_tun_outbound() frees mp on error. */ + atomic_inc_64(&iptun->iptun_oerrors); + return; + } + /* + * ipsec_tun_outbound() returns a chain of tunneled IP + * fragments linked with b_next (or a single message if the + * tunneled packet wasn't a fragment). Each message in the + * chain is prepended by an IPSEC_OUT M_CTL block with + * instructions for outbound IPsec processing. + */ + for (newmp = mp; newmp != NULL; newmp = mp) { + ASSERT(newmp->b_datap->db_type == M_CTL); + atomic_inc_64(&iptun->iptun_opackets); + atomic_add_64(&iptun->iptun_obytes, + msgdsize(newmp->b_cont)); + mp = mp->b_next; + newmp->b_next = NULL; + connp->conn_send(connp, newmp, connp->conn_wq, IP_WPUT); + } + } else { + /* + * The ip module will potentially apply global policy to the + * packet in its output path if there's no active tunnel + * policy. + */ + atomic_inc_64(&iptun->iptun_opackets); + atomic_add_64(&iptun->iptun_obytes, msgdsize(mp)); + connp->conn_send(connp, mp, connp->conn_wq, IP_WPUT); + } +} + +/* + * Note that the setting or clearing iptun_{set,get}_g_q() is serialized via + * iptuns_lock and iptunq_open(), so we must never be in a situation where + * iptun_set_g_q() is called if the queue has already been set or vice versa + * (hence the ASSERT()s.) + */ +void +iptun_set_g_q(netstack_t *ns, queue_t *q) +{ + ASSERT(ns->netstack_iptun->iptuns_g_q == NULL); + ns->netstack_iptun->iptuns_g_q = q; +} + +void +iptun_clear_g_q(netstack_t *ns) +{ + ASSERT(ns->netstack_iptun->iptuns_g_q != NULL); + ns->netstack_iptun->iptuns_g_q = NULL; +} + +static mac_callbacks_t iptun_m_callbacks = { + .mc_callbacks = (MC_SETPROP | MC_GETPROP), + .mc_getstat = iptun_m_getstat, + .mc_start = iptun_m_start, + .mc_stop = iptun_m_stop, + .mc_setpromisc = iptun_m_setpromisc, + .mc_multicst = iptun_m_multicst, + .mc_unicst = iptun_m_unicst, + .mc_tx = iptun_m_tx, + .mc_setprop = iptun_m_setprop, + .mc_getprop = iptun_m_getprop +}; diff --git a/usr/src/uts/intel/tun/tun.global-objs.debug64 b/usr/src/uts/common/inet/iptun/iptun.conf index 99d06ddee0..ed047474db 100644 --- a/usr/src/uts/intel/tun/tun.global-objs.debug64 +++ b/usr/src/uts/common/inet/iptun/iptun.conf @@ -19,23 +19,7 @@ # CDDL HEADER END # # -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# ident "%Z%%M% %I% %E% SMI" - -IP6_MAJ -IP_MAJ -bindack -info -infoack -modlinkage -modlstrmod -tun_debug -tun_do_fastpath -tun_fmodsw -tun_limit_init_upper_v4 -tun_limit_init_upper_v6 -tuninfo -tunrinit -tunwinit +name="iptun" parent="pseudo" instance=0; diff --git a/usr/src/uts/common/inet/iptun/iptun_ctl.c b/usr/src/uts/common/inet/iptun/iptun_ctl.c new file mode 100644 index 0000000000..ed8910a967 --- /dev/null +++ b/usr/src/uts/common/inet/iptun/iptun_ctl.c @@ -0,0 +1,118 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * This file implements the ioctl control path for the iptun driver. The + * GLDv3 dld_ioc_register() mechanism is used to register iptun ioctls with + * the dld module. + */ + +#include <sys/dld_ioc.h> +#include <sys/policy.h> +#include <inet/iptun.h> +#include "iptun_impl.h" + +/* ARGSUSED */ +static int +iptun_ioc_create(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) +{ + return (iptun_create(karg, cred)); +} + +/* ARGSUSED */ +static int +iptun_ioc_delete(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) +{ + return (iptun_delete(*(datalink_id_t *)karg, cred)); +} + +/* ARGSUSED */ +static int +iptun_ioc_modify(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) +{ + return (iptun_modify(karg, cred)); +} + +/* ARGSUSED */ +static int +iptun_ioc_info(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) +{ + return (iptun_info(karg, cred)); +} + +/* ARGSUSED */ +static int +iptun_ioc_set_6to4relay(void *karg, intptr_t arg, int mode, cred_t *cred, + int *rvalp) +{ + ipaddr_t *relay = karg; + netstack_t *ns = netstack_find_by_cred(cred); + int err; + + err = iptun_set_6to4relay(ns, *relay); + netstack_rele(ns); + return (err); +} + +/* ARGSUSED */ +static int +iptun_ioc_get_6to4relay(void *karg, intptr_t arg, int mode, cred_t *cred, + int *rvalp) +{ + ipaddr_t *relay = karg; + netstack_t *ns = netstack_find_by_cred(cred); + + iptun_get_6to4relay(ns, relay); + netstack_rele(ns); + return (0); +} + +static dld_ioc_info_t iptun_ioc_list[] = { + { IPTUN_CREATE, DLDCOPYIN, sizeof (iptun_kparams_t), + iptun_ioc_create, secpolicy_iptun_config}, + { IPTUN_DELETE, DLDCOPYIN, sizeof (datalink_id_t), + iptun_ioc_delete, secpolicy_iptun_config}, + { IPTUN_MODIFY, DLDCOPYIN, sizeof (iptun_kparams_t), + iptun_ioc_modify, secpolicy_iptun_config}, + { IPTUN_INFO, DLDCOPYINOUT, sizeof (iptun_kparams_t), + iptun_ioc_info, NULL}, + { IPTUN_SET_6TO4RELAY, DLDCOPYIN, sizeof (struct in_addr), + iptun_ioc_set_6to4relay, secpolicy_iptun_config}, + { IPTUN_GET_6TO4RELAY, DLDCOPYINOUT, sizeof (struct in_addr), + iptun_ioc_get_6to4relay, NULL} +}; + +int +iptun_ioc_init(void) +{ + return (dld_ioc_register(IPTUN_IOC, iptun_ioc_list, + DLDIOCCNT(iptun_ioc_list))); +} + +void +iptun_ioc_fini(void) +{ + dld_ioc_unregister(IPTUN_IOC); +} diff --git a/usr/src/uts/common/inet/iptun/iptun_dev.c b/usr/src/uts/common/inet/iptun/iptun_dev.c new file mode 100644 index 0000000000..52218bdc18 --- /dev/null +++ b/usr/src/uts/common/inet/iptun/iptun_dev.c @@ -0,0 +1,261 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * IP Tunneling Driver + * + * As viewed from the top, this module is a GLDv3 driver that consumes the + * mac driver interfaces. It implements the logic for various forms of IP + * (IPv4 or IPv6) encapsulation within IP (IPv4 or IPv6). + */ + +#include <sys/file.h> +#include <sys/list.h> +#include "iptun_impl.h" + +#define IPTUN_LINKINFO "IP tunneling driver" +#define IPTUN_HASHSZ 67 + +dev_info_t *iptun_dip; +ldi_ident_t iptun_ldi_ident; + +static int iptun_attach(dev_info_t *, ddi_attach_cmd_t); +static int iptun_detach(dev_info_t *, ddi_detach_cmd_t); +static int iptun_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); +static int iptun_constructor(void *, void *, int); +static void iptun_destructor(void *, void *); + +DDI_DEFINE_STREAM_OPS(iptun_dev_ops, nulldev, nulldev, iptun_attach, + iptun_detach, nodev, iptun_getinfo, D_MP, NULL, ddi_quiesce_not_supported); + +static struct modldrv iptun_modldrv = { + &mod_driverops, + IPTUN_LINKINFO, + &iptun_dev_ops +}; + +static struct modlinkage iptun_modlinkage = { + MODREV_1, + &iptun_modldrv, + NULL +}; + +/* + * Initialize the tunnel stack instance. + */ +/* ARGSUSED */ +static void * +iptun_stack_init(netstackid_t stackid, netstack_t *ns) +{ + iptun_stack_t *iptuns; + + iptuns = kmem_zalloc(sizeof (*iptuns), KM_SLEEP); + iptuns->iptuns_netstack = ns; + mutex_init(&iptuns->iptuns_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&iptuns->iptuns_iptunlist, sizeof (iptun_t), + offsetof(iptun_t, iptun_link)); + + return (iptuns); +} + +/* ARGSUSED */ +static void +iptun_stack_shutdown(netstackid_t stackid, void *arg) +{ + iptun_stack_t *iptuns = arg; + iptun_t *iptun; + datalink_id_t linkid; + + /* note that iptun_delete() removes iptun from the list */ + while ((iptun = list_head(&iptuns->iptuns_iptunlist)) != NULL) { + linkid = iptun->iptun_linkid; + (void) iptun_delete(linkid, iptun->iptun_cred); + (void) dls_mgmt_destroy(linkid, B_FALSE); + } + if (iptuns->iptuns_g_q != NULL) + (void) ldi_close(iptuns->iptuns_g_q_lh, FWRITE|FREAD, CRED()); +} + +/* + * Free the tunnel stack instance. + */ +/* ARGSUSED */ +static void +iptun_stack_fini(netstackid_t stackid, void *arg) +{ + iptun_stack_t *iptuns = arg; + + list_destroy(&iptuns->iptuns_iptunlist); + mutex_destroy(&iptuns->iptuns_lock); + kmem_free(iptuns, sizeof (*iptuns)); +} + +static void +iptun_fini(void) +{ + ddi_taskq_destroy(iptun_taskq); + mac_fini_ops(&iptun_dev_ops); + ldi_ident_release(iptun_ldi_ident); + mod_hash_destroy_idhash(iptun_hash); + kmem_cache_destroy(iptun_cache); +} + +int +_init(void) +{ + int rc; + + rc = ldi_ident_from_mod(&iptun_modlinkage, &iptun_ldi_ident); + if (rc != 0) + return (rc); + + iptun_cache = kmem_cache_create("iptun_cache", sizeof (iptun_t), 0, + iptun_constructor, iptun_destructor, NULL, NULL, NULL, 0); + if (iptun_cache == NULL) { + ldi_ident_release(iptun_ldi_ident); + return (ENOMEM); + } + + iptun_taskq = ddi_taskq_create(NULL, "iptun_taskq", 1, + TASKQ_DEFAULTPRI, 0); + if (iptun_taskq == NULL) { + ldi_ident_release(iptun_ldi_ident); + kmem_cache_destroy(iptun_cache); + return (ENOMEM); + } + + iptun_hash = mod_hash_create_idhash("iptun_hash", IPTUN_HASHSZ, + mod_hash_null_valdtor); + + mac_init_ops(&iptun_dev_ops, IPTUN_DRIVER_NAME); + + if ((rc = mod_install(&iptun_modlinkage)) != 0) + iptun_fini(); + return (rc); +} + +int +_fini(void) +{ + int rc; + + if ((rc = mod_remove(&iptun_modlinkage)) == 0) + iptun_fini(); + return (rc); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&iptun_modlinkage, modinfop)); +} + +static int +iptun_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ + switch (cmd) { + case DDI_ATTACH: + if (ddi_get_instance(dip) != 0 || iptun_ioc_init() != 0) + return (DDI_FAILURE); + iptun_dip = dip; + netstack_register(NS_IPTUN, iptun_stack_init, + iptun_stack_shutdown, iptun_stack_fini); + return (DDI_SUCCESS); + + default: + return (DDI_FAILURE); + } +} + +/* ARGSUSED */ +static int +iptun_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + switch (cmd) { + case DDI_DETACH: + /* + * We prevent the pseudo device from detaching (and thus the + * driver from unloading) when there are tunnels configured by + * consulting iptun_count(). We don't need to hold a lock + * here because the tunnel count is only changed when a tunnel + * is created or deleted, which can't happen while the detach + * routine is running (the ioctl path calls + * ddi_hold_devi_by_instance() in dld's drv_ioctl(), and the + * /dev/net implicit path has the device open). + */ + if (iptun_count() > 0) + return (DDI_FAILURE); + netstack_unregister(NS_IPTUN); + iptun_dip = NULL; + iptun_ioc_fini(); + return (DDI_SUCCESS); + + default: + return (DDI_FAILURE); + } +} + +/* ARGSUSED */ +static int +iptun_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) +{ + switch (infocmd) { + case DDI_INFO_DEVT2DEVINFO: + *result = iptun_dip; + return (DDI_SUCCESS); + case DDI_INFO_DEVT2INSTANCE: + *result = NULL; + return (DDI_SUCCESS); + } + return (DDI_FAILURE); +} + +/* ARGSUSED */ +static int +iptun_constructor(void *buf, void *cdrarg, int kmflags) +{ + iptun_t *iptun = buf; + + bzero(iptun, sizeof (*iptun)); + mutex_init(&iptun->iptun_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&iptun->iptun_upcall_cv, NULL, CV_DRIVER, NULL); + cv_init(&iptun->iptun_enter_cv, NULL, CV_DRIVER, NULL); + + return (0); +} + +/* ARGSUSED */ +static void +iptun_destructor(void *buf, void *cdrarg) +{ + iptun_t *iptun = buf; + + /* This iptun_t must not still be in use. */ + ASSERT(!(iptun->iptun_flags & (IPTUN_BOUND|IPTUN_MAC_REGISTERED| + IPTUN_MAC_STARTED|IPTUN_HASH_INSERTED|IPTUN_UPCALL_PENDING))); + + mutex_destroy(&iptun->iptun_lock); + cv_destroy(&iptun->iptun_upcall_cv); +} diff --git a/usr/src/uts/common/inet/iptun/iptun_impl.h b/usr/src/uts/common/inet/iptun/iptun_impl.h new file mode 100644 index 0000000000..593adb7d9c --- /dev/null +++ b/usr/src/uts/common/inet/iptun/iptun_impl.h @@ -0,0 +1,234 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _INET_IPTUN_IMPL_H +#define _INET_IPTUN_IMPL_H + +#include <sys/sunddi.h> +#include <sys/sunldi.h> +#include <sys/stream.h> +#include <sys/modhash.h> +#include <sys/list.h> +#include <sys/dls.h> +#include <sys/mac.h> +#include <sys/dld_impl.h> +#include <sys/netstack.h> +#include <sys/sunddi.h> +#include <sys/sunldi.h> +#include <sys/socket.h> +#include <inet/iptun.h> +#include <inet/ipclassifier.h> +#include <inet/ipsec_impl.h> +#include <netinet/in.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _KERNEL + +#define IPTUN_MODID 5134 +#define IPTUN_DRIVER_NAME "iptun" + +typedef struct iptun_encaplim_s { + ip6_dest_t iel_destopt; + struct ip6_opt_tunnel iel_telopt; + uint8_t iel_padn[3]; +} iptun_encaplim_t; + +typedef struct iptun_ipv6hdrs_s { + ip6_t it6h_ip6h; + iptun_encaplim_t it6h_encaplim; +} iptun_ipv6hdrs_t; + +typedef union iptun_header_u { + ipha_t ihu_hdr4; + iptun_ipv6hdrs_t ihu_hdr6; +} iptun_header_t; + +typedef struct iptun_addr_s { + sa_family_t ia_family; + union { + ipaddr_t iau_addr4; + in6_addr_t iau_addr6; + } ia_addr; +} iptun_addr_t; + +typedef struct iptun_typeinfo { + iptun_type_t iti_type; + const char *iti_ident; /* MAC-Type plugin identifier */ + uint_t iti_ipvers; /* outer header IP version */ + edesc_spf iti_txfunc; /* function used to transmit to ip */ + uint32_t iti_minmtu; /* minimum possible tunnel MTU */ + uint32_t iti_maxmtu; /* maximum possible tunnel MTU */ + boolean_t iti_hasraddr; /* has a remote adress */ +} iptun_typeinfo_t; + +/* + * An iptun_t represents an IP tunnel link. The iptun_lock protects the + * integrity of all fields except statistics which are updated atomically, and + * is also used by iptun_upcall_cv and iptun_enter_cv. Access to all fields + * must be done under the protection of iptun_lock with the following + * exceptions: + * + * The datapath reads certain fields without locks for performance reasons. + * + * - IPTUN_PMTU_TOO_OLD() is used without a lock to determine if the + * destination path-MTU should be queried. This reads iptun_flags + * IPTUN_RADDR, IPTUN_FIXED_MTU, and iptun_dpmtu_lastupdate. All of these + * can change without adversely affecting the tunnel, as the worst case + * scenario is that we launch a task that will ultimately either do nothing + * or needlessly query the destination path-MTU. + * + * - IPTUN_IS_RUNNING() is used (read access to iptun_flags IPTUN_BOUND and + * IPTUN_MAC_STARTED) to drop packets if they're sent while the tunnel is + * not running. This is harmless as the worst case scenario is that a + * packet will be needlessly sent down to ip and be dropped due to an + * unspecified source or destination. + */ +typedef struct iptun_s { + datalink_id_t iptun_linkid; + kmutex_t iptun_lock; + kcondvar_t iptun_upcall_cv; + kcondvar_t iptun_enter_cv; + uint32_t iptun_flags; + list_node_t iptun_link; + mac_handle_t iptun_mh; + conn_t *iptun_connp; + zoneid_t iptun_zoneid; + netstack_t *iptun_ns; + cred_t *iptun_cred; + struct ipsec_tun_pol_s *iptun_itp; + iptun_typeinfo_t *iptun_typeinfo; + uint32_t iptun_mtu; + uint32_t iptun_dpmtu; /* destination path MTU */ + clock_t iptun_dpmtu_lastupdate; + uint8_t iptun_hoplimit; + uint8_t iptun_encaplimit; + iptun_addr_t iptun_laddr; /* local address */ + iptun_addr_t iptun_raddr; /* remote address */ + iptun_header_t iptun_header; + size_t iptun_header_size; + ipsec_req_t iptun_simple_policy; + + /* statistics */ + uint64_t iptun_ierrors; + uint64_t iptun_oerrors; + uint64_t iptun_rbytes; + uint64_t iptun_obytes; + uint64_t iptun_ipackets; + uint64_t iptun_opackets; + uint64_t iptun_norcvbuf; + uint64_t iptun_noxmtbuf; + uint64_t iptun_taskq_fail; +} iptun_t; + +#define iptun_iptuns iptun_ns->netstack_iptun +#define iptun_laddr4 iptun_laddr.ia_addr.iau_addr4 +#define iptun_laddr6 iptun_laddr.ia_addr.iau_addr6 +#define iptun_raddr4 iptun_raddr.ia_addr.iau_addr4 +#define iptun_raddr6 iptun_raddr.ia_addr.iau_addr6 +#define iptun_header4 iptun_header.ihu_hdr4 +#define iptun_header6 iptun_header.ihu_hdr6 + +/* iptun_flags */ +#define IPTUN_BOUND 0x0001 /* tunnel address(es) bound with ip */ +#define IPTUN_LADDR 0x0002 /* local address is set */ +#define IPTUN_RADDR 0x0004 /* remote address is set */ +#define IPTUN_MAC_REGISTERED 0x0008 /* registered with the mac module */ +#define IPTUN_MAC_STARTED 0x0010 /* iptun_m_start() has been called */ +#define IPTUN_HASH_INSERTED 0x0020 /* iptun_t in iptun_hash */ +#define IPTUN_FIXED_MTU 0x0040 /* MTU was set using mtu link prop */ +#define IPTUN_IMPLICIT 0x0080 /* implicitly created IP tunnel */ +#define IPTUN_SIMPLE_POLICY 0x0100 /* cached iptun_simple_policy */ +#define IPTUN_UPCALL_PENDING 0x0200 /* upcall to mac module in progress */ +#define IPTUN_DELETE_PENDING 0x0400 /* iptun_delete() is issuing upcalls */ +#define IPTUN_CONDEMNED 0x0800 /* iptun_t is to be freed */ + +#define IS_IPTUN_RUNNING(iptun) \ + ((iptun->iptun_flags & (IPTUN_BOUND | IPTUN_MAC_STARTED)) == \ + (IPTUN_BOUND | IPTUN_MAC_STARTED)) + +/* + * We request ire information for the tunnel destination in order to obtain + * its path MTU information. We use that to calculate the initial link MTU of + * a tunnel. + * + * After that, if the path MTU of the tunnel destination becomes smaller + * than the link MTU of the tunnel, then we will receive a packet too big + * (aka fragmentation needed) ICMP error when we transmit a packet larger + * than the path MTU, and we will adjust the tunne's MTU based on the ICMP + * error's MTU information. + * + * In addition to that, we also need to request the ire information + * periodically to make sure the link MTU of a tunnel doesn't become stale + * if the path MTU of the tunnel destination becomes larger than the link + * MTU of the tunnel. The period for the requests is ten minutes in + * accordance with rfc1191. + */ +#define IPTUN_PMTU_AGE SEC_TO_TICK(600) +#define IPTUN_PMTU_TOO_OLD(ipt) \ + (((ipt)->iptun_flags & IPTUN_RADDR) && \ + !((ipt)->iptun_flags & IPTUN_FIXED_MTU) && \ + (ddi_get_lbolt() - (ipt)->iptun_dpmtu_lastupdate) > IPTUN_PMTU_AGE) + +/* + * iptuns_lock protects iptuns_iptunlist and iptuns_g_q. + */ +typedef struct iptun_stack { + netstack_t *iptuns_netstack; /* Common netstack */ + kmutex_t iptuns_lock; + list_t iptuns_iptunlist; /* list of tunnels in this stack. */ + queue_t *iptuns_g_q; /* read-side IP queue */ + ldi_handle_t iptuns_g_q_lh; + ipaddr_t iptuns_relay_rtr_addr; +} iptun_stack_t; + +extern dev_info_t *iptun_dip; +extern mod_hash_t *iptun_hash; +extern kmem_cache_t *iptun_cache; +extern ddi_taskq_t *iptun_taskq; +extern ldi_ident_t iptun_ldi_ident; + +extern int iptun_ioc_init(void); +extern void iptun_ioc_fini(void); +extern uint_t iptun_count(void); +extern int iptun_create(iptun_kparams_t *, cred_t *); +extern int iptun_delete(datalink_id_t, cred_t *); +extern int iptun_modify(const iptun_kparams_t *, cred_t *); +extern int iptun_info(iptun_kparams_t *, cred_t *); +extern int iptun_set_6to4relay(netstack_t *, ipaddr_t); +extern void iptun_get_6to4relay(netstack_t *, ipaddr_t *); +extern void iptun_set_policy(datalink_id_t, ipsec_tun_pol_t *); +extern void iptun_set_g_q(netstack_t *, queue_t *); +extern void iptun_clear_g_q(netstack_t *); + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _INET_IPTUN_IMPL_H */ diff --git a/usr/src/uts/common/inet/iptun/iptunq.c b/usr/src/uts/common/inet/iptun/iptunq.c new file mode 100644 index 0000000000..c558601885 --- /dev/null +++ b/usr/src/uts/common/inet/iptun/iptunq.c @@ -0,0 +1,120 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * The sole purpose of this module is to provide a STREAMS queue to the iptun + * module so that it can call ip module functions which require one. Once the + * ip module no longer requires a STREAMS queue for bind processing, all of + * this complexity can be removed. + */ + +#include <inet/common.h> +#include <inet/ip.h> +#include <inet/ipclassifier.h> +#include <sys/stream.h> +#include "iptun_impl.h" + +static int iptunq_open(queue_t *, dev_t *, int, int, cred_t *); +static int iptunq_close(queue_t *); + +static struct module_info iptunq_modinfo = { + 0, "iptunq", 0, INFPSZ, 1, 0 +}; + +static struct qinit iptunq_rinit = { + NULL, NULL, iptunq_open, iptunq_close, NULL, &iptunq_modinfo, NULL +}; + +static struct qinit iptunq_winit = { + (pfi_t)putq, (pfi_t)ip_wsrv, iptunq_open, iptunq_close, NULL, + &iptunq_modinfo, NULL +}; + +struct streamtab iptunq_info = { + &iptunq_rinit, &iptunq_winit, NULL, NULL +}; + +/* ARGSUSED */ +static int +iptunq_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) +{ + netstack_t *ns; + conn_t *connp; + major_t maj; + dev_t conn_dev; + + if (q->q_ptr != NULL) + return (EBUSY); + + if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) + return (ENOMEM); + + ns = netstack_find_by_cred(credp); + iptun_set_g_q(ns, q); + connp = ipcl_conn_create(IPCL_IPCCONN, KM_NOSLEEP, ns); + netstack_rele(ns); + if (connp == NULL) { + inet_minor_free(ip_minor_arena_sa, conn_dev); + return (ENOMEM); + } + + connp->conn_flags |= IPCL_IPTUN; + connp->conn_zoneid = (ns->netstack_stackid == GLOBAL_NETSTACKID) ? + crgetzoneid(credp) : GLOBAL_ZONEID; + connp->conn_dev = conn_dev; + connp->conn_minor_arena = ip_minor_arena_sa; + + maj = getmajor(*devp); + *devp = makedevice(maj, (minor_t)connp->conn_dev); + connp->conn_cred = credp; + crhold(connp->conn_cred); + + q->q_ptr = WR(q)->q_ptr = connp; + connp->conn_rq = q; + connp->conn_wq = WR(q); + + ASSERT(connp->conn_ref == 1); + mutex_enter(&connp->conn_lock); + connp->conn_state_flags &= ~CONN_INCIPIENT; + mutex_exit(&connp->conn_lock); + + qprocson(q); + return (0); +} + +static int +iptunq_close(queue_t *q) +{ + conn_t *connp = q->q_ptr; + + iptun_clear_g_q(connp->conn_netstack); + ip_quiesce_conn(connp); + qprocsoff(q); + inet_minor_free(connp->conn_minor_arena, connp->conn_dev); + connp->conn_ref--; + ipcl_conn_destroy(connp); + q->q_ptr = WR(q)->q_ptr = NULL; + return (0); +} diff --git a/usr/src/uts/sparc/tun/tun.global-objs.debug64 b/usr/src/uts/common/inet/iptun/iptunq.conf index 99d06ddee0..e19cd55f64 100644 --- a/usr/src/uts/sparc/tun/tun.global-objs.debug64 +++ b/usr/src/uts/common/inet/iptun/iptunq.conf @@ -19,23 +19,7 @@ # CDDL HEADER END # # -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# ident "%Z%%M% %I% %E% SMI" - -IP6_MAJ -IP_MAJ -bindack -info -infoack -modlinkage -modlstrmod -tun_debug -tun_do_fastpath -tun_fmodsw -tun_limit_init_upper_v4 -tun_limit_init_upper_v6 -tuninfo -tunrinit -tunwinit +name="iptunq" parent="pseudo" instance=0; diff --git a/usr/src/uts/common/inet/iptun/iptunq_ddi.c b/usr/src/uts/common/inet/iptun/iptunq_ddi.c new file mode 100644 index 0000000000..ed4cebd098 --- /dev/null +++ b/usr/src/uts/common/inet/iptun/iptunq_ddi.c @@ -0,0 +1,55 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/conf.h> +#include <sys/modctl.h> +#include <inet/ip.h> + +#define INET_NAME "iptunq" +#define INET_DEVMINOR 0 +#define INET_DEVDESC "Dummy IP Tunnel device" +#define INET_DEVSTRTAB iptunq_info + +#define INET_DEVMTFLAGS IP_DEVMTFLAGS + +#include "../inetddi.c" + +int +_init(void) +{ + return (mod_install(&modlinkage)); +} + +int +_fini(void) +{ + return (mod_remove(&modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c index e1e1e0619f..178eb5587d 100644 --- a/usr/src/uts/common/inet/tcp/tcp.c +++ b/usr/src/uts/common/inet/tcp/tcp.c @@ -9295,7 +9295,7 @@ tcp_create_common(queue_t *q, cred_t *credp, boolean_t isv6, tcp = connp->conn_tcp; if (isv6) { - connp->conn_flags |= (IPCL_TCP6|IPCL_ISV6); + connp->conn_flags |= IPCL_TCP6; connp->conn_send = ip_output_v6; connp->conn_af_isv6 = B_TRUE; connp->conn_pkt_isv6 = B_TRUE; diff --git a/usr/src/uts/common/inet/tun.h b/usr/src/uts/common/inet/tun.h deleted file mode 100644 index 8154c0fbeb..0000000000 --- a/usr/src/uts/common/inet/tun.h +++ /dev/null @@ -1,299 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - - -#ifndef _INET_TUN_H -#define _INET_TUN_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* tunneling module names */ -#define TUN_NAME "tun" -#define ATUN_NAME "atun" -#define TUN6TO4_NAME "6to4tun" - -/* IOCTL's for set/getting 6to4 Relay Router(RR) destination IPv4 Address */ -#define SIOCS6TO4TUNRRADDR 4 /* ipaddr_t */ -#define SIOCG6TO4TUNRRADDR 5 /* ipaddr_t */ - -#ifdef _KERNEL - -#include <sys/netstack.h> - -#define TUN_MODID 5134 -#define ATUN_MODID 5135 -#define TUN6TO4_MODID 5136 - -/* - * We request ire information for the tunnel destination in order to obtain - * its path MTU information. We use that to calculate the link MTU of - * tunnels. If the path MTU of the tunnel destination becomes smaller than - * the link MTU of the tunnel, then we will receive a packet too big (aka - * fragmentation needed) ICMP error, and we will request new ire - * information at that time. - * - * We also request the ire information periodically to make sure the link - * MTU of a tunnel doesn't become stale if the path MTU of the tunnel - * destination becomes larger than the link MTU of the tunnel. The period - * for the requests is ten minutes in accordance with rfc1191. - */ -#define TUN_IRE_AGE SEC_TO_TICK(600) -#define TUN_IRE_TOO_OLD(atp) (lbolt - (atp)->tun_ire_lastreq > TUN_IRE_AGE) - -/* - * The default MTU for automatic and 6to4 tunnels. We make this as large - * as possible. These tunnels communicate with an unknown number of other - * tunnel endpoints that have potentially differing path MTU's. We let - * IPv4 fragmentation take care of packets that are too large. - */ -#define ATUN_MTU (IP_MAXPACKET - sizeof (ipha_t)) - -struct tunstat { - struct kstat_named tuns_nocanput; - struct kstat_named tuns_xmtretry; - struct kstat_named tuns_allocbfail; - - struct kstat_named tuns_ipackets; /* ifInUcastPkts */ - struct kstat_named tuns_opackets; /* ifOutUcastPkts */ - struct kstat_named tuns_InErrors; - struct kstat_named tuns_OutErrors; - - struct kstat_named tuns_rcvbytes; /* # octets received */ - /* MIB - ifInOctets */ - struct kstat_named tuns_xmtbytes; /* # octets transmitted */ - /* MIB - ifOutOctets */ - struct kstat_named tuns_multircv; /* # multicast packets */ - /* delivered to upper layer */ - /* MIB - ifInNUcastPkts */ - struct kstat_named tuns_multixmt; /* # multicast packets */ - /* requested to be sent */ - /* MIB - ifOutNUcastPkts */ - struct kstat_named tuns_InDiscard; /* # rcv packets discarded */ - /* MIB - ifInDiscards */ - struct kstat_named tuns_OutDiscard; /* # xmt packets discarded */ - /* MIB - ifOutDiscards */ - struct kstat_named tuns_HCInOctets; - struct kstat_named tuns_HCInUcastPkts; - struct kstat_named tuns_HCInMulticastPkts; - struct kstat_named tuns_HCOutOctets; - struct kstat_named tuns_HCOutUcastPkts; - struct kstat_named tuns_HCOutMulticastPkts; -}; - -typedef struct tun_stats_s { - /* Protected by tun_global_lock. */ - struct tun_stats_s *ts_next; - kmutex_t ts_lock; /* protects from here down */ - struct tun_s *ts_atp; - uint_t ts_refcnt; - uint_t ts_lower; - uint_t ts_type; - t_uscalar_t ts_ppa; - kstat_t *ts_ksp; -} tun_stats_t; - -/* Used for recovery from memory allocation failure */ -typedef struct eventid_s { - bufcall_id_t ev_wbufcid; /* needed for recovery */ - bufcall_id_t ev_rbufcid; /* needed for recovery */ - timeout_id_t ev_wtimoutid; /* needed for recovery */ - timeout_id_t ev_rtimoutid; /* needed for recovery */ -} eventid_t; - -/* IPv6 destination option header for tunnel encapsulation limit option. */ -struct tun_encap_limit { - ip6_dest_t tel_destopt; - struct ip6_opt_tunnel tel_telopt; - char tel_padn[3]; -}; -#define IPV6_TUN_ENCAP_OPT_LEN (sizeof (struct tun_encap_limit)) - -/* per-instance data structure */ -/* Note: if t_recnt > 1, then t_indirect must be null */ -typedef struct tun_s { - struct tun_s *tun_next; /* For linked-list of tunnels by */ - struct tun_s **tun_ptpn; /* ip address. */ - - /* Links v4-upper and v6-upper instances so they can share kstats. */ - struct tun_s *tun_kstat_next; - - queue_t *tun_wq; - kmutex_t tun_lock; /* protects from here down */ - eventid_t tun_events; - t_uscalar_t tun_state; /* protected by qwriter */ - t_uscalar_t tun_ppa; - mblk_t *tun_iocmp; - ipsec_req_t tun_secinfo; - /* - * tun_polcy_index is used to keep track if a tunnel's policy - * was altered by ipsecconf(1m)/PF_POLICY instead of ioctl()s. - * (Only ioctl()s can update this field.) - */ - uint64_t tun_policy_index; - struct ipsec_tun_pol_s *tun_itp; - uint64_t tun_itp_gen; - uint_t tun_ipsec_overhead; /* Length of IPsec headers. */ - uint_t tun_flags; - in6_addr_t tun_laddr; - in6_addr_t tun_faddr; - zoneid_t tun_zoneid; - uint32_t tun_mtu; - uint32_t tun_notifications; /* For DL_NOTIFY_IND */ - int16_t tun_encap_lim; - uint8_t tun_hop_limit; - uint32_t tun_extra_offset; - clock_t tun_ire_lastreq; - union { - ipha_t tun_u_ipha; - struct { - ip6_t tun_u_ip6h; - struct tun_encap_limit tun_u_telopt; - } tun_u_ip6hdrs; - double tun_u_aligner; - } tun_u; - dev_t tun_dev; -#define tun_ipha tun_u.tun_u_ipha -#define tun_ip6h tun_u.tun_u_ip6hdrs.tun_u_ip6h -#define tun_telopt tun_u.tun_u_ip6hdrs.tun_u_telopt - tun_stats_t *tun_stats; - char tun_lifname[LIFNAMSIZ]; - uint32_t tun_nocanput; /* # input canput() returned false */ - uint32_t tun_xmtretry; /* # output canput() returned false */ - uint32_t tun_allocbfail; /* # esballoc/allocb failed */ - - /* - * MIB II variables - */ - uint32_t tun_InDiscard; - uint32_t tun_InErrors; - uint32_t tun_OutDiscard; - uint32_t tun_OutErrors; - - uint64_t tun_HCInOctets; /* # Total Octets received */ - uint64_t tun_HCInUcastPkts; /* # Packets delivered */ - uint64_t tun_HCInMulticastPkts; /* # Mulitcast Packets delivered */ - uint64_t tun_HCOutOctets; /* # Total Octets sent */ - uint64_t tun_HCOutUcastPkts; /* # Packets requested */ - uint64_t tun_HCOutMulticastPkts; /* Multicast Packets requested */ - netstack_t *tun_netstack; - cred_t *tun_cred; -} tun_t; - - -/* - * First 4 bits of flags are used to determine what version of IP is - * is above the tunnel or below the tunnel - */ - -#define TUN_U_V4 0x01 /* upper protocol is v4 */ -#define TUN_U_V6 0x02 /* upper protocol is v6 */ -#define TUN_L_V4 0x04 /* lower protocol is v4 */ -#define TUN_L_V6 0x08 /* lower protocol is v6 */ -#define TUN_UPPER_MASK (TUN_U_V4 | TUN_U_V6) -#define TUN_LOWER_MASK (TUN_L_V4 | TUN_L_V6) - -/* - * tunnel flags - * TUN_BOUND is set when we get the ok ack back for the T_BIND_REQ - */ -#define TUN_BOUND 0x010 /* tunnel is bound */ -#define TUN_BIND_SENT 0x020 /* our version of dl pending */ -#define TUN_SRC 0x040 /* Source address set */ -#define TUN_DST 0x080 /* Destination address set */ -#define TUN_AUTOMATIC 0x100 /* tunnel is an automatic tunnel */ -#define TUN_FASTPATH 0x200 /* fastpath has been acked */ -#define TUN_SECURITY 0x400 /* Security properties present */ -#define TUN_HOP_LIM 0x800 /* Hop limit non-default */ -#define TUN_ENCAP_LIM 0x1000 /* Encapsulation limit non-default */ -#define TUN_6TO4 0x2000 /* tunnel is 6to4 tunnel */ -#define TUN_COMPLEX_SECURITY 0x4000 /* tunnel has full tunnel-mode policy */ - -struct old_iftun_req { - char ifta_lifr_name[LIFNAMSIZ]; /* if name */ - struct sockaddr_storage ifta_saddr; /* source address */ - struct sockaddr_storage ifta_daddr; /* destination address */ - uint_t ifta_flags; /* See below */ - /* IP version information is read only */ - enum ifta_proto ifta_upper; /* IP version above tunnel */ - enum ifta_proto ifta_lower; /* IP version below tunnel */ - uint_t ifta_vers; /* Version number */ - uint32_t ifta_secinfo[IFTUN_SECINFOLEN]; /* Security prefs. */ -}; - -#define OSIOCGTUNPARAM _IOR('i', 147, struct old_iftun_req) - /* get tunnel */ - /* parameters */ -#define OSIOCSTUNPARAM _IOW('i', 148, struct old_iftun_req) - /* set tunnel */ - /* parameters */ - -/* - * Linked list of tunnels. - */ - -#define TUN_PPA_SZ 64 -#define TUN_LIST_HASH(ppa) ((ppa) % TUN_PPA_SZ) - -#define TUN_T_SZ 251 -#define TUN_BYADDR_LIST_HASH(a) (((a).s6_addr32[3]) % (TUN_T_SZ)) - -/* - * tunnel stack instances - */ -struct tun_stack { - netstack_t *tuns_netstack; /* Common netstack */ - - /* - * protects global data structures such as tun_ppa_list - * also protects tun_t at ts_next and *ts_atp - * should be acquired before ts_lock - */ - kmutex_t tuns_global_lock; - tun_stats_t *tuns_ppa_list[TUN_PPA_SZ]; - tun_t *tuns_byaddr_list[TUN_T_SZ]; - - ipaddr_t tuns_relay_rtr_addr_v4; -}; -typedef struct tun_stack tun_stack_t; - - -int tun_open(queue_t *, dev_t *, int, int, cred_t *); -int tun_close(queue_t *, int, cred_t *); -void tun_rput(queue_t *q, mblk_t *mp); -void tun_rsrv(queue_t *q); -void tun_wput(queue_t *q, mblk_t *mp); -void tun_wsrv(queue_t *q); - -extern void tun_ipsec_load_complete(void); - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _INET_TUN_H */ diff --git a/usr/src/uts/common/inet/udp/udp.c b/usr/src/uts/common/inet/udp/udp.c index c6c9a34fd8..5cdfc0858a 100644 --- a/usr/src/uts/common/inet/udp/udp.c +++ b/usr/src/uts/common/inet/udp/udp.c @@ -7460,14 +7460,12 @@ udp_do_open(cred_t *credp, boolean_t isv6, int flags) udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; udp->udp_ttl = us->us_ipv6_hoplimit; connp->conn_af_isv6 = B_TRUE; - connp->conn_flags |= IPCL_ISV6; } else { udp->udp_family = AF_INET; udp->udp_ipversion = IPV4_VERSION; udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; udp->udp_ttl = us->us_ipv4_ttl; connp->conn_af_isv6 = B_FALSE; - connp->conn_flags &= ~IPCL_ISV6; } udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; diff --git a/usr/src/uts/common/io/aggr/aggr_ctl.c b/usr/src/uts/common/io/aggr/aggr_ctl.c index ea167fda28..1bbad3d904 100644 --- a/usr/src/uts/common/io/aggr/aggr_ctl.c +++ b/usr/src/uts/common/io/aggr/aggr_ctl.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -29,7 +29,7 @@ #include <sys/aggr.h> #include <sys/aggr_impl.h> -#include <sys/priv_names.h> +#include <sys/policy.h> /* * Process a LAIOC_MODIFY request. @@ -114,7 +114,8 @@ aggr_ioc_create(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) force = create_arg->lc_force; rc = aggr_grp_create(create_arg->lc_linkid, create_arg->lc_key, nports, - ports, policy, mac_fixed, force, mac_addr, lacp_mode, lacp_timer); + ports, policy, mac_fixed, force, mac_addr, lacp_mode, lacp_timer, + cred); done: kmem_free(ports, ports_size); @@ -127,7 +128,7 @@ aggr_ioc_delete(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) { laioc_delete_t *delete_arg = karg; - return (aggr_grp_delete(delete_arg->ld_linkid)); + return (aggr_grp_delete(delete_arg->ld_linkid, cred)); } typedef struct aggr_ioc_info_state { @@ -195,21 +196,14 @@ static int aggr_ioc_info(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) { laioc_info_t *info_argp = karg; - datalink_id_t linkid; aggr_ioc_info_state_t state; - /* - * linkid of the group to return. Must not be DATALINK_INVALID_LINKID. - */ - if ((linkid = info_argp->li_group_linkid) == DATALINK_INVALID_LINKID) - return (EINVAL); - state.bytes_left = info_argp->li_bufsize - sizeof (laioc_info_t); state.where = (uchar_t *)arg + sizeof (laioc_info_t); state.mode = mode; - return (aggr_grp_info(linkid, &state, aggr_ioc_info_new_grp, - aggr_ioc_info_new_port)); + return (aggr_grp_info(info_argp->li_group_linkid, &state, + aggr_ioc_info_new_grp, aggr_ioc_info_new_port, cred)); } static int @@ -264,17 +258,16 @@ aggr_ioc_remove(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) static dld_ioc_info_t aggr_ioc_list[] = { {LAIOC_CREATE, DLDCOPYIN, sizeof (laioc_create_t), aggr_ioc_create, - {PRIV_SYS_DL_CONFIG}}, + secpolicy_dl_config}, {LAIOC_DELETE, DLDCOPYIN, sizeof (laioc_delete_t), aggr_ioc_delete, - {PRIV_SYS_DL_CONFIG}}, - {LAIOC_INFO, DLDCOPYINOUT, sizeof (laioc_info_t), aggr_ioc_info, - {NULL}}, + secpolicy_dl_config}, + {LAIOC_INFO, DLDCOPYINOUT, sizeof (laioc_info_t), aggr_ioc_info, NULL}, {LAIOC_ADD, DLDCOPYIN, sizeof (laioc_add_rem_t), aggr_ioc_add, - {PRIV_SYS_DL_CONFIG}}, + secpolicy_dl_config}, {LAIOC_REMOVE, DLDCOPYIN, sizeof (laioc_add_rem_t), aggr_ioc_remove, - {PRIV_SYS_DL_CONFIG}}, + secpolicy_dl_config}, {LAIOC_MODIFY, DLDCOPYIN, sizeof (laioc_modify_t), aggr_ioc_modify, - {PRIV_SYS_DL_CONFIG}} + secpolicy_dl_config} }; int diff --git a/usr/src/uts/common/io/aggr/aggr_grp.c b/usr/src/uts/common/io/aggr/aggr_grp.c index c619144958..32ce4dfd08 100644 --- a/usr/src/uts/common/io/aggr/aggr_grp.c +++ b/usr/src/uts/common/io/aggr/aggr_grp.c @@ -52,7 +52,9 @@ #include <sys/modhash.h> #include <sys/id_space.h> #include <sys/strsun.h> +#include <sys/cred.h> #include <sys/dlpi.h> +#include <sys/zone.h> #include <sys/mac_provider.h> #include <sys/dls.h> #include <sys/vlan.h> @@ -487,8 +489,15 @@ aggr_grp_add_port(aggr_grp_t *grp, datalink_id_t port_linkid, boolean_t force, { aggr_port_t *port, **cport; mac_perim_handle_t mph; + zoneid_t port_zoneid = ALL_ZONES; int err; + /* The port must be int the same zone as the aggregation. */ + if (zone_check_datalink(&port_zoneid, port_linkid) != 0) + port_zoneid = GLOBAL_ZONEID; + if (grp->lg_zoneid != port_zoneid) + return (EBUSY); + /* * lg_mh could be NULL when the function is called during the creation * of the aggregation. @@ -982,7 +991,8 @@ aggr_grp_modify(datalink_id_t linkid, uint8_t update_mask, uint32_t policy, int aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports, laioc_port_t *ports, uint32_t policy, boolean_t mac_fixed, boolean_t force, - uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer) + uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer, + cred_t *credp) { aggr_grp_t *grp = NULL; aggr_port_t *port; @@ -1012,6 +1022,7 @@ aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports, grp->lg_closing = B_FALSE; grp->lg_force = force; grp->lg_linkid = linkid; + grp->lg_zoneid = crgetzoneid(credp); grp->lg_ifspeed = 0; grp->lg_link_state = LINK_STATE_UNKNOWN; grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN; @@ -1084,7 +1095,8 @@ aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports, if (err != 0) goto bail; - if ((err = dls_devnet_create(grp->lg_mh, grp->lg_linkid)) != 0) { + err = dls_devnet_create(grp->lg_mh, grp->lg_linkid, crgetzoneid(credp)); + if (err != 0) { (void) mac_unregister(grp->lg_mh); grp->lg_mh = NULL; goto bail; @@ -1388,7 +1400,7 @@ bail: } int -aggr_grp_delete(datalink_id_t linkid) +aggr_grp_delete(datalink_id_t linkid, cred_t *cred) { aggr_grp_t *grp = NULL; aggr_port_t *port, *cport; @@ -1423,7 +1435,7 @@ aggr_grp_delete(datalink_id_t linkid) * fail the operation. */ if ((err = mac_disable(grp->lg_mh)) != 0) { - (void) dls_devnet_create(grp->lg_mh, linkid); + (void) dls_devnet_create(grp->lg_mh, linkid, crgetzoneid(cred)); rw_exit(&aggr_grp_lock); return (err); } @@ -1492,13 +1504,20 @@ aggr_grp_free(aggr_grp_t *grp) int aggr_grp_info(datalink_id_t linkid, void *fn_arg, aggr_grp_info_new_grp_fn_t new_grp_fn, - aggr_grp_info_new_port_fn_t new_port_fn) + aggr_grp_info_new_port_fn_t new_port_fn, cred_t *cred) { aggr_grp_t *grp; aggr_port_t *port; mac_perim_handle_t mph, pmph; int rc = 0; + /* + * Make sure that the aggregation link is visible from the caller's + * zone. + */ + if (!dls_devnet_islinkvisible(linkid, crgetzoneid(cred))) + return (ENOENT); + rw_enter(&aggr_grp_lock, RW_READER); if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), diff --git a/usr/src/uts/common/io/bridge.c b/usr/src/uts/common/io/bridge.c index beb0bdd2f1..1382dd9d0b 100644 --- a/usr/src/uts/common/io/bridge.c +++ b/usr/src/uts/common/io/bridge.c @@ -732,7 +732,8 @@ bridge_find_name(const char *bridge) } static int -bridge_create(datalink_id_t linkid, const char *bridge, bridge_inst_t **bipc) +bridge_create(datalink_id_t linkid, const char *bridge, bridge_inst_t **bipc, + cred_t *cred) { bridge_inst_t *bip, *bipnew; bridge_mac_t *bmp = NULL; @@ -781,7 +782,8 @@ lookup_retry: * No extra locking is needed here. */ if (!(bmp->bm_flags & BMF_DLS)) { - if ((err = dls_devnet_create(bmp->bm_mh, linkid)) != 0) + err = dls_devnet_create(bmp->bm_mh, linkid, crgetzoneid(cred)); + if (err != 0) goto fail_create; bmp->bm_flags |= BMF_DLS; } @@ -3095,8 +3097,8 @@ bridge_ioctl(queue_t *wq, mblk_t *mp) /* LINTED: alignment */ bnb = (bridge_newbridge_t *)mp->b_cont->b_rptr; bnb->bnb_name[MAXNAMELEN-1] = '\0'; - if ((rc = bridge_create(bnb->bnb_linkid, - bnb->bnb_name, &bip)) != 0) + rc = bridge_create(bnb->bnb_linkid, bnb->bnb_name, &bip, cr); + if (rc != 0) break; rw_enter(&bip->bi_rwlock, RW_WRITER); diff --git a/usr/src/uts/common/io/dld/dld_drv.c b/usr/src/uts/common/io/dld/dld_drv.c index 1aa50821fe..c91793723e 100644 --- a/usr/src/uts/common/io/dld/dld_drv.c +++ b/usr/src/uts/common/io/dld/dld_drv.c @@ -43,6 +43,7 @@ #include <inet/common.h> #include <sys/policy.h> #include <sys/priv_names.h> +#include <sys/zone.h> static void drv_init(void); static int drv_fini(void); @@ -314,6 +315,17 @@ drv_open(dev_t *devp, int flag, int sflag, cred_t *credp) } /* + * Verify if the caller is allowed to modify a link of the given class. + */ +static int +drv_ioc_checkprivs(datalink_class_t class, cred_t *cred) +{ + if (class == DATALINK_CLASS_IPTUN) + return (secpolicy_iptun_config(cred)); + return (secpolicy_dl_config(cred)); +} + +/* * DLDIOC_ATTR */ /* ARGSUSED */ @@ -323,9 +335,14 @@ drv_ioc_attr(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) dld_ioc_attr_t *diap = karg; dls_dl_handle_t dlh; dls_link_t *dlp; + zoneid_t zoneid = crgetzoneid(cred); int err; mac_perim_handle_t mph; + if (zoneid != GLOBAL_ZONEID && + zone_check_datalink(&zoneid, diap->dia_linkid) != 0) + return (ENOENT); + if ((err = dls_devnet_hold_tmp(diap->dia_linkid, &dlh)) != 0) return (err); @@ -362,6 +379,11 @@ drv_ioc_phys_attr(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) dls_dl_handle_t dlh; dls_dev_handle_t ddh; dev_t phydev; + zoneid_t zoneid = crgetzoneid(cred); + + if (zoneid != GLOBAL_ZONEID && + zone_check_datalink(&zoneid, dipp->dip_linkid) != 0) + return (ENOENT); /* * Every physical link should have its physical dev_t kept in the @@ -409,6 +431,11 @@ drv_ioc_hwgrpget(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) mac_handle_t mh = NULL; int i, err, grpnum; uint_t bytes_left; + zoneid_t zoneid = crgetzoneid(cred); + + if (zoneid != GLOBAL_ZONEID && + zone_check_datalink(&zoneid, hwgrpp->dih_linkid) != 0) + return (ENOENT); hwgrpp->dih_n_groups = 0; err = mac_open_by_linkid(hwgrpp->dih_linkid, &mh); @@ -458,6 +485,11 @@ drv_ioc_macaddrget(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) int i, err; uint_t bytes_left; boolean_t is_used; + zoneid_t zoneid = crgetzoneid(cred); + + if (zoneid != GLOBAL_ZONEID && + zone_check_datalink(&zoneid, magp->dig_linkid) != 0) + return (ENOENT); magp->dig_count = 0; err = mac_open_by_linkid(magp->dig_linkid, &mh); @@ -514,7 +546,7 @@ done: */ static int drv_ioc_prop_common(dld_ioc_macprop_t *prop, intptr_t arg, boolean_t set, - int mode) + cred_t *cred, int mode) { int err = EINVAL; dls_dl_handle_t dlh = NULL; @@ -523,9 +555,10 @@ drv_ioc_prop_common(dld_ioc_macprop_t *prop, intptr_t arg, boolean_t set, mac_prop_t macprop; dld_ioc_macprop_t *kprop; datalink_id_t linkid; + datalink_class_t class; + zoneid_t zoneid = crgetzoneid(cred); uint_t dsize; - /* * We only use pr_valsize from prop, as the caller only did a * copyin() for sizeof (dld_ioc_prop_t), which doesn't cover @@ -550,27 +583,42 @@ drv_ioc_prop_common(dld_ioc_macprop_t *prop, intptr_t arg, boolean_t set, } linkid = kprop->pr_linkid; - if ((err = dls_devnet_hold_tmp(linkid, &dlh)) != 0) - goto done; - if ((err = mac_perim_enter_by_macname(dls_devnet_mac(dlh), - &mph)) != 0) { - goto done; + if (set) { + if ((err = dls_mgmt_get_linkinfo(linkid, NULL, &class, NULL, + NULL)) != 0 || (err = drv_ioc_checkprivs(class, cred)) != 0) + goto done; } + if ((err = dls_devnet_hold_tmp(linkid, &dlh)) != 0) + goto done; + if ((err = mac_perim_enter_by_macname(dls_devnet_mac(dlh), &mph)) != 0) + goto done; if ((err = dls_link_hold(dls_devnet_mac(dlh), &dlp)) != 0) goto done; + /* + * Don't allow a process to get or set properties of a link if that + * link doesn't belong to that zone. + */ + if (zoneid != dls_devnet_getownerzid(dlh)) { + err = ENOENT; + goto done; + } + switch (kprop->pr_num) { case MAC_PROP_ZONE: if (set) { dld_ioc_zid_t *dzp = (dld_ioc_zid_t *)kprop->pr_val; - err = dls_devnet_setzid(dzp->diz_link, dzp->diz_zid); + if (zoneid != GLOBAL_ZONEID) { + err = EACCES; + goto done; + } + err = dls_devnet_setzid(dlh, dzp->diz_zid); } else { kprop->pr_perm_flags = MAC_PROP_PERM_RW; - err = dls_devnet_getzid(linkid, - (zoneid_t *)kprop->pr_val); + (*(zoneid_t *)kprop->pr_val) = dls_devnet_getzid(dlh); } break; case MAC_PROP_AUTOPUSH: { @@ -625,7 +673,6 @@ done: if (dlp != NULL) dls_link_rele(dlp); - if (mph != NULL) { int32_t cpuid; void *mdip = NULL; @@ -652,14 +699,14 @@ done: static int drv_ioc_setprop(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) { - return (drv_ioc_prop_common(karg, arg, B_TRUE, mode)); + return (drv_ioc_prop_common(karg, arg, B_TRUE, cred, mode)); } /* ARGSUSED */ static int drv_ioc_getprop(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) { - return (drv_ioc_prop_common(karg, arg, B_FALSE, mode)); + return (drv_ioc_prop_common(karg, arg, B_FALSE, cred, mode)); } /* @@ -675,8 +722,23 @@ drv_ioc_rename(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) dld_ioc_rename_t *dir = karg; mod_hash_key_t key; mod_hash_val_t val; + zoneid_t zoneid = crgetzoneid(cred); + datalink_class_t class; int err; + if (zoneid != GLOBAL_ZONEID && + (zone_check_datalink(&zoneid, dir->dir_linkid1) != 0 || + dir->dir_linkid2 != DATALINK_INVALID_LINKID && + zone_check_datalink(&zoneid, dir->dir_linkid2) != 0)) + return (ENOENT); + + if ((err = dls_mgmt_get_linkinfo(dir->dir_linkid1, NULL, &class, NULL, + NULL)) != 0) + return (err); + + if ((err = drv_ioc_checkprivs(class, cred)) != 0) + return (err); + if ((err = dls_devnet_rename(dir->dir_linkid1, dir->dir_linkid2, dir->dir_link)) != 0) return (err); @@ -885,7 +947,7 @@ drv_ioc_walkflow(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) { dld_ioc_walkflow_t *wfp = karg; - return (dld_walk_flow(wfp, arg)); + return (dld_walk_flow(wfp, arg, cred)); } /* @@ -1121,56 +1183,45 @@ drv_ioc_secobj_unset(void *karg, intptr_t arg, int mode, cred_t *cred, return (0); } -static int -drv_check_policy(dld_ioc_info_t *info, cred_t *cred) -{ - int i, err = 0; - - for (i = 0; info->di_priv[i] != NULL && i < DLD_MAX_PRIV; i++) { - if ((err = secpolicy_dld_ioctl(cred, info->di_priv[i], - "dld ioctl")) != 0) { - break; - } - } - if (err == 0) - return (0); - - return (secpolicy_net_config(cred, B_FALSE)); -} - +/* + * Note that ioctls that modify links have a NULL di_priv_func(), as + * privileges can only be checked after we know the class of the link being + * modified (due to class-specific fine-grained privileges such as + * sys_iptun_config). + */ static dld_ioc_info_t drv_ioc_list[] = { {DLDIOC_ATTR, DLDCOPYINOUT, sizeof (dld_ioc_attr_t), - drv_ioc_attr, {NULL}}, + drv_ioc_attr, NULL}, {DLDIOC_PHYS_ATTR, DLDCOPYINOUT, sizeof (dld_ioc_phys_attr_t), - drv_ioc_phys_attr, {NULL}}, + drv_ioc_phys_attr, NULL}, {DLDIOC_SECOBJ_SET, DLDCOPYIN, sizeof (dld_ioc_secobj_set_t), - drv_ioc_secobj_set, {PRIV_SYS_DL_CONFIG}}, + drv_ioc_secobj_set, secpolicy_dl_config}, {DLDIOC_SECOBJ_GET, DLDCOPYINOUT, sizeof (dld_ioc_secobj_get_t), - drv_ioc_secobj_get, {PRIV_SYS_DL_CONFIG}}, + drv_ioc_secobj_get, secpolicy_dl_config}, {DLDIOC_SECOBJ_UNSET, DLDCOPYIN, sizeof (dld_ioc_secobj_unset_t), - drv_ioc_secobj_unset, {PRIV_SYS_DL_CONFIG}}, + drv_ioc_secobj_unset, secpolicy_dl_config}, {DLDIOC_DOORSERVER, DLDCOPYIN, sizeof (dld_ioc_door_t), - drv_ioc_doorserver, {PRIV_SYS_DL_CONFIG}}, + drv_ioc_doorserver, secpolicy_dl_config}, {DLDIOC_RENAME, DLDCOPYIN, sizeof (dld_ioc_rename_t), - drv_ioc_rename, {PRIV_SYS_DL_CONFIG}}, + drv_ioc_rename, NULL}, {DLDIOC_MACADDRGET, DLDCOPYINOUT, sizeof (dld_ioc_macaddrget_t), - drv_ioc_macaddrget, {PRIV_SYS_DL_CONFIG}}, + drv_ioc_macaddrget, NULL}, {DLDIOC_ADDFLOW, DLDCOPYIN, sizeof (dld_ioc_addflow_t), - drv_ioc_addflow, {PRIV_SYS_DL_CONFIG}}, + drv_ioc_addflow, secpolicy_dl_config}, {DLDIOC_REMOVEFLOW, DLDCOPYIN, sizeof (dld_ioc_removeflow_t), - drv_ioc_removeflow, {PRIV_SYS_DL_CONFIG}}, + drv_ioc_removeflow, secpolicy_dl_config}, {DLDIOC_MODIFYFLOW, DLDCOPYIN, sizeof (dld_ioc_modifyflow_t), - drv_ioc_modifyflow, {PRIV_SYS_DL_CONFIG}}, + drv_ioc_modifyflow, secpolicy_dl_config}, {DLDIOC_WALKFLOW, DLDCOPYINOUT, sizeof (dld_ioc_walkflow_t), - drv_ioc_walkflow, {NULL}}, + drv_ioc_walkflow, NULL}, {DLDIOC_USAGELOG, DLDCOPYIN, sizeof (dld_ioc_usagelog_t), - drv_ioc_usagelog, {PRIV_SYS_DL_CONFIG}}, + drv_ioc_usagelog, secpolicy_dl_config}, {DLDIOC_SETMACPROP, DLDCOPYIN, sizeof (dld_ioc_macprop_t), - drv_ioc_setprop, {PRIV_SYS_DL_CONFIG}}, + drv_ioc_setprop, NULL}, {DLDIOC_GETMACPROP, DLDCOPYIN, sizeof (dld_ioc_macprop_t), - drv_ioc_getprop, {NULL}}, + drv_ioc_getprop, NULL}, {DLDIOC_GETHWGRP, DLDCOPYINOUT, sizeof (dld_ioc_hwgrpget_t), - drv_ioc_hwgrpget, {PRIV_SYS_DL_CONFIG}}, + drv_ioc_hwgrpget, secpolicy_dl_config}, }; typedef struct dld_ioc_modentry { @@ -1187,11 +1238,12 @@ typedef struct dld_ioc_modentry { * need for it to call dld_ioc_register() itself. */ static dld_ioc_modentry_t dld_ioc_modtable[] = { - {DLD_IOC, "dld", drv_ioc_list, DLDIOCCNT(drv_ioc_list)}, + {DLD_IOC, "dld", drv_ioc_list, DLDIOCCNT(drv_ioc_list)}, {AGGR_IOC, "aggr", NULL, 0}, {VNIC_IOC, "vnic", NULL, 0}, {SIMNET_IOC, "simnet", NULL, 0}, - {BRIDGE_IOC, "bridge", NULL, 0} + {BRIDGE_IOC, "bridge", NULL, 0}, + {IPTUN_IOC, "iptun", NULL, 0} }; #define DLDIOC_CNT \ (sizeof (dld_ioc_modtable) / sizeof (dld_ioc_modentry_t)) @@ -1278,7 +1330,9 @@ drv_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, int *rvalp) } info = &dim->dim_list[i]; - if ((err = drv_check_policy(info, cred)) != 0) + + if (info->di_priv_func != NULL && + (err = info->di_priv_func(cred)) != 0) goto done; sz = info->di_argsize; diff --git a/usr/src/uts/common/io/dld/dld_flow.c b/usr/src/uts/common/io/dld/dld_flow.c index b57368484f..281217d02d 100644 --- a/usr/src/uts/common/io/dld/dld_flow.c +++ b/usr/src/uts/common/io/dld/dld_flow.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -27,6 +27,7 @@ * Flows ioctls implementation. */ +#include <sys/cred.h> #include <sys/dld.h> #include <sys/mac_provider.h> #include <sys/mac_client.h> @@ -94,12 +95,16 @@ dld_walk_flow_cb(mac_flowinfo_t *finfo, void *arg) * ENOSPC is returned a bigger buffer is needed. */ int -dld_walk_flow(dld_ioc_walkflow_t *wf, intptr_t uaddr) +dld_walk_flow(dld_ioc_walkflow_t *wf, intptr_t uaddr, cred_t *credp) { flowinfo_state_t state; mac_flowinfo_t finfo; int err = 0; + /* For now, one can only view flows from the global zone. */ + if (crgetzoneid(credp) != GLOBAL_ZONEID) + return (EPERM); + state.fi_bufsize = wf->wf_len; state.fi_fl = (uchar_t *)uaddr + sizeof (*wf); state.fi_nflows = 0; diff --git a/usr/src/uts/common/io/dld/dld_proto.c b/usr/src/uts/common/io/dld/dld_proto.c index 1f683c8591..03ddb6542f 100644 --- a/usr/src/uts/common/io/dld/dld_proto.c +++ b/usr/src/uts/common/io/dld/dld_proto.c @@ -839,43 +839,46 @@ proto_physaddr_req(dld_str_t *dsp, mblk_t *mp) { dl_phys_addr_req_t *dlp = (dl_phys_addr_req_t *)mp->b_rptr; queue_t *q = dsp->ds_wq; - t_uscalar_t dl_err; - char *addr; + t_uscalar_t dl_err = 0; + char *addr = NULL; uint_t addr_length; if (MBLKL(mp) < sizeof (dl_phys_addr_req_t)) { dl_err = DL_BADPRIM; - goto failed; + goto done; } if (dsp->ds_dlstate == DL_UNATTACHED || DL_ACK_PENDING(dsp->ds_dlstate)) { dl_err = DL_OUTSTATE; - goto failed; - } - - if (dlp->dl_addr_type != DL_CURR_PHYS_ADDR && - dlp->dl_addr_type != DL_FACT_PHYS_ADDR) { - dl_err = DL_UNSUPPORTED; - goto failed; + goto done; } addr_length = dsp->ds_mip->mi_addr_length; if (addr_length > 0) { addr = kmem_alloc(addr_length, KM_SLEEP); - if (dlp->dl_addr_type == DL_CURR_PHYS_ADDR) + switch (dlp->dl_addr_type) { + case DL_CURR_PHYS_ADDR: mac_unicast_primary_get(dsp->ds_mh, (uint8_t *)addr); - else + break; + case DL_FACT_PHYS_ADDR: bcopy(dsp->ds_mip->mi_unicst_addr, addr, addr_length); - + break; + case DL_CURR_DEST_ADDR: + if (!mac_dst_get(dsp->ds_mh, (uint8_t *)addr)) + dl_err = DL_NOTSUPPORTED; + break; + default: + dl_err = DL_UNSUPPORTED; + } + } +done: + if (dl_err == 0) dlphysaddrack(q, mp, addr, (t_uscalar_t)addr_length); + else + dlerrorack(q, mp, DL_PHYS_ADDR_REQ, dl_err, 0); + if (addr != NULL) kmem_free(addr, addr_length); - } else { - dlphysaddrack(q, mp, NULL, 0); - } - return; -failed: - dlerrorack(q, mp, DL_PHYS_ADDR_REQ, dl_err, 0); } /* @@ -1108,7 +1111,8 @@ proto_notify_req(dld_str_t *dsp, mblk_t *mp) DL_NOTE_LINK_DOWN | DL_NOTE_CAPAB_RENEG | DL_NOTE_FASTPATH_FLUSH | - DL_NOTE_SPEED; + DL_NOTE_SPEED | + DL_NOTE_SDU_SIZE; if (MBLKL(mp) < sizeof (dl_notify_req_t)) { dl_err = DL_BADPRIM; diff --git a/usr/src/uts/common/io/dld/dld_str.c b/usr/src/uts/common/io/dld/dld_str.c index 99b42ba546..676c643655 100644 --- a/usr/src/uts/common/io/dld/dld_str.c +++ b/usr/src/uts/common/io/dld/dld_str.c @@ -44,7 +44,7 @@ static void str_destructor(void *, void *); static mblk_t *str_unitdata_ind(dld_str_t *, mblk_t *, boolean_t); static void str_notify_promisc_on_phys(dld_str_t *); static void str_notify_promisc_off_phys(dld_str_t *); -static void str_notify_phys_addr(dld_str_t *, const uint8_t *); +static void str_notify_phys_addr(dld_str_t *, uint_t, const uint8_t *); static void str_notify_link_up(dld_str_t *); static void str_notify_link_down(dld_str_t *); static void str_notify_capab_reneg(dld_str_t *); @@ -1517,7 +1517,7 @@ str_notify_promisc_off_phys(dld_str_t *dsp) * DL_NOTIFY_IND: DL_NOTE_PHYS_ADDR */ static void -str_notify_phys_addr(dld_str_t *dsp, const uint8_t *addr) +str_notify_phys_addr(dld_str_t *dsp, uint_t addr_type, const uint8_t *addr) { mblk_t *mp; dl_notify_ind_t *dlip; @@ -1537,7 +1537,7 @@ str_notify_phys_addr(dld_str_t *dsp, const uint8_t *addr) dlip = (dl_notify_ind_t *)mp->b_rptr; dlip->dl_primitive = DL_NOTIFY_IND; dlip->dl_notification = DL_NOTE_PHYS_ADDR; - dlip->dl_data = DL_CURR_PHYS_ADDR; + dlip->dl_data = addr_type; dlip->dl_addr_offset = sizeof (dl_notify_ind_t); dlip->dl_addr_length = addr_length + sizeof (uint16_t); @@ -1707,7 +1707,16 @@ str_notify(void *arg, mac_notify_type_t type) /* * Send the appropriate DL_NOTIFY_IND. */ - str_notify_phys_addr(dsp, addr); + str_notify_phys_addr(dsp, DL_CURR_PHYS_ADDR, addr); + break; + + case MAC_NOTE_DEST: + /* + * Only send up DL_NOTE_DEST_ADDR if the link has a + * destination address. + */ + if (mac_dst_get(dsp->ds_mh, addr)) + str_notify_phys_addr(dsp, DL_CURR_DEST_ADDR, addr); break; case MAC_NOTE_LOWLINK: diff --git a/usr/src/uts/common/io/dls/dls_link.c b/usr/src/uts/common/io/dls/dls_link.c index 3d0359b9d6..a4d82022ee 100644 --- a/usr/src/uts/common/io/dls/dls_link.c +++ b/usr/src/uts/common/io/dls/dls_link.c @@ -858,50 +858,33 @@ dls_link_setzid(const char *name, zoneid_t zid) goto done; /* - * Check whether this dlp is used by its own zones, if yes, - * we cannot change its zoneid. + * Check whether this dlp is used by its own zone. If yes, we cannot + * change its zoneid. */ if (dlp->dl_zone_ref != 0) { err = EBUSY; goto done; } + dlp->dl_zid = zid; + if (zid == GLOBAL_ZONEID) { /* - * Move the link from the local zone to the global zone, - * and release the reference to this link. At the same time - * reset the link's active state so that an aggregation is - * allowed to be created over it. + * The link is moving from a non-global zone to the global + * zone, so we need to release the reference that was held + * when the link was originally assigned to the non-global + * zone. */ - dlp->dl_zid = zid; - dls_mac_active_clear(dlp); dls_link_rele(dlp); - goto done; - } else if (old_zid == GLOBAL_ZONEID) { - /* - * Move the link from the global zone to the local zone, - * and hold a reference to this link. Also, set the link - * to the "active" state so that the global zone is - * not able to create an aggregation over this link. - * TODO: revisit once we allow creating aggregations - * within a local zone. - */ - if ((err = dls_mac_active_set(dlp)) != 0) { - if (err != ENXIO) - err = EBUSY; - goto done; - } - dlp->dl_zid = zid; - return (0); - } else { - /* - * Move the link from a local zone to another local zone. - */ - dlp->dl_zid = zid; } done: - dls_link_rele(dlp); + /* + * We only keep the reference to this link open if the link has + * successfully moved from the global zone to a non-global zone. + */ + if (err != 0 || old_zid != GLOBAL_ZONEID) + dls_link_rele(dlp); return (err); } diff --git a/usr/src/uts/common/io/dls/dls_mgmt.c b/usr/src/uts/common/io/dls/dls_mgmt.c index 9e509e163d..cfe3294251 100644 --- a/usr/src/uts/common/io/dls/dls_mgmt.c +++ b/usr/src/uts/common/io/dls/dls_mgmt.c @@ -39,6 +39,9 @@ #include <sys/softmac.h> #include <sys/dls.h> #include <sys/dls_impl.h> +#include <sys/stropts.h> +#include <sys/netstack.h> +#include <inet/iptun/iptun_impl.h> /* * This vanity name management module is treated as part of the GLD framework @@ -47,6 +50,10 @@ * mac perimeter -> framework locks */ +typedef struct dls_stack { + zoneid_t dlss_zoneid; +} dls_stack_t; + static kmem_cache_t *i_dls_devnet_cachep; static kmutex_t i_dls_mgmt_lock; static krwlock_t i_dls_devnet_lock; @@ -57,11 +64,32 @@ boolean_t devnet_need_rebuild; #define VLAN_HASHSZ 67 /* prime */ + +/* + * The following names are default tunnel interface names for backward + * compatibility with Solaris 10 and prior. Opening a /dev/net node with one + * of these names causes a tunnel link to be implicitly created in + * dls_devnet_hold_by_name(). + */ +#define IPTUN_IPV4_NAME "ip.tun" +#define IPTUN_IPV6_NAME "ip6.tun" +#define IPTUN_6TO4_NAME "ip.6to4tun" + +#define IS_IPV4_TUN(name) ( \ + strncmp((name), IPTUN_IPV4_NAME, strlen(IPTUN_IPV4_NAME)) == 0) +#define IS_IPV6_TUN(name) ( \ + strncmp((name), IPTUN_IPV6_NAME, strlen(IPTUN_IPV6_NAME)) == 0) +#define IS_6TO4_TUN(name) ( \ + strncmp((name), IPTUN_6TO4_NAME, strlen(IPTUN_6TO4_NAME)) == 0) +#define IS_IPTUN_LINK(name) ( \ + IS_IPV4_TUN(name) || IS_IPV6_TUN(name) || IS_6TO4_TUN(name)) + /* Upcall door handle */ static door_handle_t dls_mgmt_dh = NULL; #define DD_CONDEMNED 0x1 #define DD_KSTAT_CHANGING 0x2 +#define DD_IMPLICIT_IPTUN 0x4 /* Implicitly-created ip*.*tun* tunnel */ /* * This structure is used to keep the <linkid, macname> mapping. @@ -72,21 +100,25 @@ static door_handle_t dls_mgmt_dh = NULL; */ typedef struct dls_devnet_s { datalink_id_t dd_linkid; + char dd_linkname[MAXLINKNAMELEN]; char dd_mac[MAXNAMELEN]; - kstat_t *dd_ksp; + kstat_t *dd_ksp; /* kstat in owner_zid */ + kstat_t *dd_zone_ksp; /* in dd_zid if != owner_zid */ uint32_t dd_ref; - kmutex_t dd_mutex; kcondvar_t dd_cv; uint32_t dd_tref; uint_t dd_flags; - - zoneid_t dd_zid; - + zoneid_t dd_owner_zid; /* zone where node was created */ + zoneid_t dd_zid; /* current zone */ boolean_t dd_prop_loaded; taskqid_t dd_prop_taskid; } dls_devnet_t; +static int i_dls_devnet_create_iptun(const char *, datalink_id_t *); +static int i_dls_devnet_destroy_iptun(datalink_id_t); +static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t); +static int dls_devnet_unset(const char *, datalink_id_t *, boolean_t); /*ARGSUSED*/ static int @@ -113,6 +145,49 @@ i_dls_devnet_destructor(void *buf, void *arg) cv_destroy(&ddp->dd_cv); } +/* ARGSUSED */ +static int +dls_zone_remove(datalink_id_t linkid, void *arg) +{ + dls_devnet_t *ddp; + + if (dls_devnet_hold_tmp(linkid, &ddp) == 0) { + (void) dls_devnet_setzid(ddp, GLOBAL_ZONEID); + dls_devnet_rele_tmp(ddp); + } + return (0); +} + +/* ARGSUSED */ +static void * +dls_stack_init(netstackid_t stackid, netstack_t *ns) +{ + dls_stack_t *dlss; + + dlss = kmem_zalloc(sizeof (*dlss), KM_SLEEP); + dlss->dlss_zoneid = netstackid_to_zoneid(stackid); + return (dlss); +} + +/* ARGSUSED */ +static void +dls_stack_shutdown(netstackid_t stackid, void *arg) +{ + dls_stack_t *dlss = (dls_stack_t *)arg; + + /* Move remaining datalinks in this zone back to the global zone. */ + (void) zone_datalink_walk(dlss->dlss_zoneid, dls_zone_remove, NULL); +} + +/* ARGSUSED */ +static void +dls_stack_fini(netstackid_t stackid, void *arg) +{ + dls_stack_t *dlss = (dls_stack_t *)arg; + + kmem_free(dlss, sizeof (*dlss)); +} + /* * Module initialization and finalization functions. */ @@ -144,11 +219,15 @@ dls_mgmt_init(void) mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); devnet_need_rebuild = B_FALSE; + + netstack_register(NS_DLS, dls_stack_init, dls_stack_shutdown, + dls_stack_fini); } void dls_mgmt_fini(void) { + netstack_unregister(NS_DLS); mod_hash_destroy_hash(i_dls_devnet_hash); mod_hash_destroy_hash(i_dls_devnet_id_hash); kmem_cache_destroy(i_dls_devnet_cachep); @@ -237,7 +316,7 @@ retry: for (;;) { retry++; - if ((err = door_ki_upcall_limited(dh, &darg, kcred, + if ((err = door_ki_upcall_limited(dh, &darg, zone_kcred(), SIZE_MAX, 0)) == 0) break; @@ -329,7 +408,8 @@ dls_mgmt_create(const char *devname, dev_t dev, datalink_class_t class, create.ld_phymaj = getmajor(dev); create.ld_phyinst = getminor(dev); create.ld_persist = persist; - if (strlcpy(create.ld_devname, devname, MAXNAMELEN) >= MAXNAMELEN) + if (strlcpy(create.ld_devname, devname, sizeof (create.ld_devname)) >= + sizeof (create.ld_devname)) return (EINVAL); if ((err = i_dls_mgmt_upcall(&create, sizeof (create), &retval, @@ -383,7 +463,8 @@ dls_mgmt_update(const char *devname, uint32_t media, boolean_t novanity, update.ld_cmd = DLMGMT_CMD_DLS_UPDATE; - if (strlcpy(update.ld_devname, devname, MAXNAMELEN) >= MAXNAMELEN) + if (strlcpy(update.ld_devname, devname, sizeof (update.ld_devname)) >= + sizeof (update.ld_devname)) return (EINVAL); update.ld_media = media; @@ -666,36 +747,40 @@ dls_devnet_stat_update(kstat_t *ksp, int rw) * Create the "link" kstats. */ static void -dls_devnet_stat_create(dls_devnet_t *ddp) +dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid) { - char link[MAXLINKNAMELEN]; kstat_t *ksp; - if ((dls_mgmt_get_linkinfo(ddp->dd_linkid, link, - NULL, NULL, NULL)) != 0) { - return; - } - - if (dls_stat_create("link", 0, link, dls_devnet_stat_update, - ddp, &ksp) != 0) { - return; + if (dls_stat_create("link", 0, ddp->dd_linkname, zoneid, + dls_devnet_stat_update, ddp, &ksp) == 0) { + ASSERT(ksp != NULL); + if (zoneid == ddp->dd_owner_zid) { + ASSERT(ddp->dd_ksp == NULL); + ddp->dd_ksp = ksp; + } else { + ASSERT(ddp->dd_zone_ksp == NULL); + ddp->dd_zone_ksp = ksp; + } } - - ASSERT(ksp != NULL); - ddp->dd_ksp = ksp; } /* * Destroy the "link" kstats. */ static void -dls_devnet_stat_destroy(dls_devnet_t *ddp) +dls_devnet_stat_destroy(dls_devnet_t *ddp, zoneid_t zoneid) { - if (ddp->dd_ksp == NULL) - return; - - kstat_delete(ddp->dd_ksp); - ddp->dd_ksp = NULL; + if (zoneid == ddp->dd_owner_zid) { + if (ddp->dd_ksp != NULL) { + kstat_delete(ddp->dd_ksp); + ddp->dd_ksp = NULL; + } + } else { + if (ddp->dd_zone_ksp != NULL) { + kstat_delete(ddp->dd_zone_ksp); + ddp->dd_zone_ksp = NULL; + } + } } /* @@ -703,36 +788,48 @@ dls_devnet_stat_destroy(dls_devnet_t *ddp) * and create the new set using the new name. */ static void -dls_devnet_stat_rename(dls_devnet_t *ddp, const char *link) +dls_devnet_stat_rename(dls_devnet_t *ddp) { - kstat_t *ksp; - if (ddp->dd_ksp != NULL) { kstat_delete(ddp->dd_ksp); ddp->dd_ksp = NULL; } - - if (dls_stat_create("link", 0, link, dls_devnet_stat_update, - ddp, &ksp) != 0) { - return; - } - - ASSERT(ksp != NULL); - ddp->dd_ksp = ksp; + /* We can't rename a link while it's assigned to a non-global zone. */ + ASSERT(ddp->dd_zone_ksp == NULL); + dls_devnet_stat_create(ddp, ddp->dd_owner_zid); } /* * Associate a linkid with a given link (identified by macname) */ static int -dls_devnet_set(const char *macname, datalink_id_t linkid, dls_devnet_t **ddpp) +dls_devnet_set(const char *macname, datalink_id_t linkid, zoneid_t zoneid, + dls_devnet_t **ddpp) { dls_devnet_t *ddp = NULL; datalink_class_t class; int err; boolean_t stat_create = B_FALSE; + char linkname[MAXLINKNAMELEN]; rw_enter(&i_dls_devnet_lock, RW_WRITER); + + /* + * Don't allow callers to set a link name with a linkid that already + * has a name association (that's what rename is for). + */ + if (linkid != DATALINK_INVALID_LINKID) { + if (mod_hash_find(i_dls_devnet_id_hash, + (mod_hash_key_t)(uintptr_t)linkid, + (mod_hash_val_t *)&ddp) == 0) { + err = EEXIST; + goto done; + } + if ((err = dls_mgmt_get_linkinfo(linkid, linkname, &class, + NULL, NULL)) != 0) + goto done; + } + if ((err = mod_hash_find(i_dls_devnet_hash, (mod_hash_key_t)macname, (mod_hash_val_t *)&ddp)) == 0) { if (ddp->dd_linkid != DATALINK_INVALID_LINKID) { @@ -745,29 +842,25 @@ dls_devnet_set(const char *macname, datalink_id_t linkid, dls_devnet_t **ddpp) * been created, but which does not have a linkid * because dlmgmtd was not running when it was created. */ - if ((err = dls_mgmt_get_linkinfo(linkid, NULL, - &class, NULL, NULL)) != 0) { - goto done; - } - - if (class != DATALINK_CLASS_PHYS) { + if (linkid == DATALINK_INVALID_LINKID || + class != DATALINK_CLASS_PHYS) { err = EINVAL; goto done; } - - goto newphys; + } else { + ddp = kmem_cache_alloc(i_dls_devnet_cachep, KM_SLEEP); + ddp->dd_tref = 0; + ddp->dd_ref++; + ddp->dd_owner_zid = zoneid; + (void) strlcpy(ddp->dd_mac, macname, sizeof (ddp->dd_mac)); + VERIFY(mod_hash_insert(i_dls_devnet_hash, + (mod_hash_key_t)ddp->dd_mac, (mod_hash_val_t)ddp) == 0); } - ddp = kmem_cache_alloc(i_dls_devnet_cachep, KM_SLEEP); - ddp->dd_tref = 0; - ddp->dd_ref++; - ddp->dd_zid = GLOBAL_ZONEID; - (void) strncpy(ddp->dd_mac, macname, MAXNAMELEN); - VERIFY(mod_hash_insert(i_dls_devnet_hash, - (mod_hash_key_t)ddp->dd_mac, (mod_hash_val_t)ddp) == 0); -newphys: if (linkid != DATALINK_INVALID_LINKID) { ddp->dd_linkid = linkid; + (void) strlcpy(ddp->dd_linkname, linkname, + sizeof (ddp->dd_linkname)); VERIFY(mod_hash_insert(i_dls_devnet_id_hash, (mod_hash_key_t)(uintptr_t)linkid, (mod_hash_val_t)ddp) == 0); @@ -780,26 +873,31 @@ newphys: } mutex_exit(&ddp->dd_mutex); } - err = 0; done: - rw_exit(&i_dls_devnet_lock); /* * It is safe to drop the i_dls_devnet_lock at this point. In the case * of physical devices, the softmac framework will fail the device * detach based on the smac_state or smac_hold_cnt. Other cases like * vnic and aggr use their own scheme to serialize creates and deletes * and ensure that *ddp is valid. - * - * The kstat subsystem holds its own locks (rather perimeter) before - * calling the ks_update (dls_devnet_stat_update) entry point which - * in turn grabs the i_dls_devnet_lock. So the lock hierarchy is - * kstat locks -> i_dls_devnet_lock. */ - if (stat_create) - dls_devnet_stat_create(ddp); - if (err == 0 && ddpp != NULL) - *ddpp = ddp; + rw_exit(&i_dls_devnet_lock); + if (err == 0) { + if (zoneid != GLOBAL_ZONEID && + (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE)) != 0) + (void) dls_devnet_unset(macname, &linkid, B_TRUE); + /* + * The kstat subsystem holds its own locks (rather perimeter) + * before calling the ks_update (dls_devnet_stat_update) entry + * point which in turn grabs the i_dls_devnet_lock. So the + * lock hierarchy is kstat locks -> i_dls_devnet_lock. + */ + if (stat_create) + dls_devnet_stat_create(ddp, zoneid); + if (ddpp != NULL) + *ddpp = ddp; + } return (err); } @@ -842,6 +940,9 @@ dls_devnet_unset(const char *macname, datalink_id_t *id, boolean_t wait) ddp->dd_ref--; *id = ddp->dd_linkid; + if (ddp->dd_zid != GLOBAL_ZONEID) + (void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE); + /* * Remove this dls_devnet_t from the hash table. */ @@ -867,11 +968,10 @@ dls_devnet_unset(const char *macname, datalink_id_t *id, boolean_t wait) } if (ddp->dd_linkid != DATALINK_INVALID_LINKID) - dls_devnet_stat_destroy(ddp); + dls_devnet_stat_destroy(ddp, ddp->dd_owner_zid); ddp->dd_prop_loaded = B_FALSE; ddp->dd_linkid = DATALINK_INVALID_LINKID; - ddp->dd_zid = GLOBAL_ZONEID; ddp->dd_flags = 0; mutex_exit(&ddp->dd_mutex); kmem_cache_free(i_dls_devnet_cachep, ddp); @@ -966,7 +1066,7 @@ dls_devnet_hold_by_dev(dev_t dev, dls_dl_handle_t *ddhp) if ((drv = ddi_major_to_name(getmajor(dev))) == NULL) return (EINVAL); - (void) snprintf(name, MAXNAMELEN, "%s%d", drv, getminor(dev) - 1); + (void) snprintf(name, sizeof (name), "%s%d", drv, getminor(dev) - 1); /* * Hold this link to prevent it being detached in case of a @@ -1007,6 +1107,12 @@ dls_devnet_rele(dls_devnet_t *ddp) mutex_enter(&ddp->dd_mutex); ASSERT(ddp->dd_ref > 1); ddp->dd_ref--; + if ((ddp->dd_flags & DD_IMPLICIT_IPTUN) && ddp->dd_ref == 1) { + mutex_exit(&ddp->dd_mutex); + if (i_dls_devnet_destroy_iptun(ddp->dd_linkid) != 0) + ddp->dd_flags |= DD_IMPLICIT_IPTUN; + return; + } mutex_exit(&ddp->dd_mutex); } @@ -1035,6 +1141,28 @@ dls_devnet_hold_by_name(const char *link, dls_devnet_t **ddpp) if (err != ENOENT) return (err); + if (IS_IPTUN_LINK(link)) { + if ((err = i_dls_devnet_create_iptun(link, &linkid)) != 0) + return (err); + /* + * At this point, an IP tunnel MAC has registered, which + * resulted in a link being created. + */ + err = dls_devnet_hold(linkid, ddpp); + ASSERT(err == 0); + if (err != 0) { + VERIFY(i_dls_devnet_destroy_iptun(linkid) == 0); + return (err); + } + /* + * dls_devnet_rele() will know to destroy the implicit IP + * tunnel on last reference release if DD_IMPLICIT_IPTUN is + * set. + */ + (*ddpp)->dd_flags |= DD_IMPLICIT_IPTUN; + return (0); + } + if (ddi_parse(link, drv, &ppa) != DDI_SUCCESS) return (ENOENT); @@ -1102,7 +1230,8 @@ dls_devnet_dev2linkid(dev_t dev, datalink_id_t *linkidp) if ((drv = ddi_major_to_name(getmajor(dev))) == NULL) return (EINVAL); - (void) snprintf(macname, MAXNAMELEN, "%s%d", drv, getminor(dev) - 1); + (void) snprintf(macname, sizeof (macname), "%s%d", drv, + getminor(dev) - 1); return (dls_devnet_macname2linkid(macname, linkidp)); } @@ -1144,7 +1273,6 @@ int dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link) { dls_dev_handle_t ddh = NULL; - char linkname[MAXLINKNAMELEN]; int err = 0; dev_t phydev = 0; dls_devnet_t *ddp; @@ -1188,10 +1316,11 @@ dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link) } /* - * Return EBUSY if any applications have this link open or if any - * thread is currently accessing the link kstats. Then set the - * DD_KSTAT_CHANGING flag to prevent any access to the kstats - * while we delete and recreate kstats below. + * Return EBUSY if any applications have this link open, if any thread + * is currently accessing the link kstats, or if the link is on-loan + * to a non-global zone. Then set the DD_KSTAT_CHANGING flag to + * prevent any access to the kstats while we delete and recreate + * kstats below. */ mutex_enter(&ddp->dd_mutex); if (ddp->dd_ref > 1) { @@ -1205,7 +1334,8 @@ dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link) mutex_exit(&ddp->dd_mutex); if (id2 == DATALINK_INVALID_LINKID) { - (void) strlcpy(linkname, link, sizeof (linkname)); + (void) strlcpy(ddp->dd_linkname, link, + sizeof (ddp->dd_linkname)); /* rename mac client name and its flow if exists */ if ((err = mac_open(ddp->dd_mac, &mh)) != 0) @@ -1251,7 +1381,7 @@ dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link) goto done; } - err = dls_mgmt_get_linkinfo(id2, linkname, NULL, NULL, NULL); + err = dls_mgmt_get_linkinfo(id2, ddp->dd_linkname, NULL, NULL, NULL); if (err != 0) { mac_unmark_exclusive(mh); goto done; @@ -1283,7 +1413,7 @@ done: */ rw_exit(&i_dls_devnet_lock); if (err == 0) - dls_devnet_stat_rename(ddp, linkname); + dls_devnet_stat_rename(ddp); if (clear_dd_flag) { mutex_enter(&ddp->dd_mutex); @@ -1297,69 +1427,125 @@ done: return (err); } -int -dls_devnet_setzid(const char *link, zoneid_t zid) +static int +i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop) { - dls_devnet_t *ddp; int err; - zoneid_t old_zid; mac_perim_handle_t mph; + boolean_t upcall_done = B_FALSE; + datalink_id_t linkid = ddp->dd_linkid; + zoneid_t old_zoneid = ddp->dd_zid; + dlmgmt_door_setzoneid_t setzid; + dlmgmt_setzoneid_retval_t retval; - if ((err = dls_devnet_hold_by_name(link, &ddp)) != 0) - return (err); + if (old_zoneid == new_zoneid) + return (0); - err = mac_perim_enter_by_macname(ddp->dd_mac, &mph); - if (err != 0) { - dls_devnet_rele(ddp); + if ((err = mac_perim_enter_by_macname(ddp->dd_mac, &mph)) != 0) return (err); - } - if ((old_zid = ddp->dd_zid) == zid) { - mac_perim_exit(mph); - dls_devnet_rele(ddp); - return (0); + /* + * When changing the zoneid of an existing link, we need to tell + * dlmgmtd about it. dlmgmtd already knows the zoneid associated with + * newly created links. + */ + if (setprop) { + setzid.ld_cmd = DLMGMT_CMD_SETZONEID; + setzid.ld_linkid = linkid; + setzid.ld_zoneid = new_zoneid; + err = i_dls_mgmt_upcall(&setzid, sizeof (setzid), &retval, + sizeof (retval)); + if (err != 0) + goto done; + upcall_done = B_TRUE; } - - if ((err = dls_link_setzid(ddp->dd_mac, zid)) != 0) { - mac_perim_exit(mph); - dls_devnet_rele(ddp); - return (err); + if ((err = dls_link_setzid(ddp->dd_mac, new_zoneid)) == 0) { + ddp->dd_zid = new_zoneid; + devnet_need_rebuild = B_TRUE; } - ddp->dd_zid = zid; - devnet_need_rebuild = B_TRUE; +done: + if (err != 0 && upcall_done) { + setzid.ld_zoneid = old_zoneid; + (void) i_dls_mgmt_upcall(&setzid, sizeof (setzid), &retval, + sizeof (retval)); + } mac_perim_exit(mph); + return (err); +} + +int +dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid) +{ + dls_devnet_t *ddp; + int err; + zoneid_t old_zid; + boolean_t refheld = B_FALSE; + + old_zid = ddh->dd_zid; + + if (old_zid == new_zid) + return (0); /* - * Keep this open reference only if it belonged to the global zone - * and is now assigned to a non-global zone. + * Acquire an additional reference to the link if it is being assigned + * to a non-global zone from the global zone. */ - if (old_zid != GLOBAL_ZONEID || zid == GLOBAL_ZONEID) - dls_devnet_rele(ddp); + if (old_zid == GLOBAL_ZONEID && new_zid != GLOBAL_ZONEID) { + if ((err = dls_devnet_hold(ddh->dd_linkid, &ddp)) != 0) + return (err); + refheld = B_TRUE; + } + + if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE)) != 0) { + if (refheld) + dls_devnet_rele(ddp); + return (err); + } /* - * Then release this link if it belonged to an non-global zone - * but is now assigned back to the global zone. + * Release the additional reference if the link is returning to the + * global zone from a non-global zone. */ - if (old_zid != GLOBAL_ZONEID && zid == GLOBAL_ZONEID) - dls_devnet_rele(ddp); + if (old_zid != GLOBAL_ZONEID && new_zid == GLOBAL_ZONEID) + dls_devnet_rele(ddh); + + /* Re-create kstats in the appropriate zones. */ + if (old_zid != GLOBAL_ZONEID) + dls_devnet_stat_destroy(ddh, old_zid); + if (new_zid != GLOBAL_ZONEID) + dls_devnet_stat_create(ddh, new_zid); return (0); } -int -dls_devnet_getzid(datalink_id_t linkid, zoneid_t *zidp) +zoneid_t +dls_devnet_getzid(dls_dl_handle_t ddh) { - dls_devnet_t *ddp; - int err; + return (((dls_devnet_t *)ddh)->dd_zid); +} - if ((err = dls_devnet_hold_tmp(linkid, &ddp)) != 0) - return (err); +zoneid_t +dls_devnet_getownerzid(dls_dl_handle_t ddh) +{ + return (((dls_devnet_t *)ddh)->dd_owner_zid); +} - *zidp = ddp->dd_zid; +/* + * Is linkid visible from zoneid? A link is visible if it was created in the + * zone, or if it is currently assigned to the zone. + */ +boolean_t +dls_devnet_islinkvisible(datalink_id_t linkid, zoneid_t zoneid) +{ + dls_devnet_t *ddp; + boolean_t result; + if (dls_devnet_hold_tmp(linkid, &ddp) != 0) + return (B_FALSE); + result = (ddp->dd_owner_zid == zoneid || ddp->dd_zid == zoneid); dls_devnet_rele_tmp(ddp); - return (0); + return (result); } /* @@ -1445,9 +1631,10 @@ dls_devnet_rebuild() } int -dls_devnet_create(mac_handle_t mh, datalink_id_t linkid) +dls_devnet_create(mac_handle_t mh, datalink_id_t linkid, zoneid_t zoneid) { dls_link_t *dlp; + dls_devnet_t *ddp; int err; mac_perim_handle_t mph; @@ -1457,20 +1644,17 @@ dls_devnet_create(mac_handle_t mh, datalink_id_t linkid) * until we relinquish the perimeter. */ mac_perim_enter_by_mh(mh, &mph); - /* * Make this association before we call dls_link_hold_create as * we need to use the linkid to get the user name for the link * when we create the MAC client. */ - if ((err = dls_devnet_set(mac_name(mh), linkid, NULL)) != 0) { - mac_perim_exit(mph); - return (err); - } - if ((err = dls_link_hold_create(mac_name(mh), &dlp)) != 0) { - mac_perim_exit(mph); - (void) dls_devnet_unset(mac_name(mh), &linkid, B_TRUE); - return (err); + if ((err = dls_devnet_set(mac_name(mh), linkid, zoneid, &ddp)) == 0) { + if ((err = dls_link_hold_create(mac_name(mh), &dlp)) != 0) { + mac_perim_exit(mph); + (void) dls_devnet_unset(mac_name(mh), &linkid, B_TRUE); + return (err); + } } mac_perim_exit(mph); return (err); @@ -1486,7 +1670,7 @@ int dls_devnet_recreate(mac_handle_t mh, datalink_id_t linkid) { ASSERT(linkid != DATALINK_INVALID_LINKID); - return (dls_devnet_set(mac_name(mh), linkid, NULL)); + return (dls_devnet_set(mac_name(mh), linkid, GLOBAL_ZONEID, NULL)); } int @@ -1504,10 +1688,85 @@ dls_devnet_destroy(mac_handle_t mh, datalink_id_t *idp, boolean_t wait) err = dls_link_rele_by_name(mac_name(mh)); mac_perim_exit(mph); - if (err == 0) - return (0); + if (err != 0) { + /* + * XXX It is a general GLDv3 bug that dls_devnet_set() has to + * be called to re-set the link when destroy fails. The + * zoneid below will be incorrect if this function is ever + * called from kernel context or from a zone other than that + * which initially created the link. + */ + (void) dls_devnet_set(mac_name(mh), *idp, crgetzoneid(CRED()), + NULL); + } + return (err); +} - (void) dls_devnet_set(mac_name(mh), *idp, NULL); +/* + * Implicitly create an IP tunnel link. + */ +static int +i_dls_devnet_create_iptun(const char *name, datalink_id_t *linkid) +{ + int err; + iptun_kparams_t ik; + uint32_t media; + netstack_t *ns; + major_t iptun_major; + dev_info_t *iptun_dip; + + /* First ensure that the iptun device is attached. */ + if ((iptun_major = ddi_name_to_major(IPTUN_DRIVER_NAME)) == (major_t)-1) + return (EINVAL); + if ((iptun_dip = ddi_hold_devi_by_instance(iptun_major, 0, 0)) == NULL) + return (EINVAL); + + if (IS_IPV4_TUN(name)) { + ik.iptun_kparam_type = IPTUN_TYPE_IPV4; + media = DL_IPV4; + } else if (IS_6TO4_TUN(name)) { + ik.iptun_kparam_type = IPTUN_TYPE_6TO4; + media = DL_6TO4; + } else if (IS_IPV6_TUN(name)) { + ik.iptun_kparam_type = IPTUN_TYPE_IPV6; + media = DL_IPV6; + } + ik.iptun_kparam_flags = (IPTUN_KPARAM_TYPE | IPTUN_KPARAM_IMPLICIT); + + /* Obtain a datalink id for this tunnel. */ + err = dls_mgmt_create((char *)name, 0, DATALINK_CLASS_IPTUN, media, + B_FALSE, &ik.iptun_kparam_linkid); + if (err != 0) { + ddi_release_devi(iptun_dip); + return (err); + } + + ns = netstack_get_current(); + err = iptun_create(&ik, CRED()); + netstack_rele(ns); + + if (err != 0) + VERIFY(dls_mgmt_destroy(ik.iptun_kparam_linkid, B_FALSE) == 0); + else + *linkid = ik.iptun_kparam_linkid; + + ddi_release_devi(iptun_dip); + return (err); +} + +static int +i_dls_devnet_destroy_iptun(datalink_id_t linkid) +{ + int err; + + /* + * Note the use of zone_kcred() here as opposed to CRED(). This is + * because the process that does the last close of this /dev/net node + * may not have necessary privileges to delete this IP tunnel, but the + * tunnel must always be implicitly deleted on last close. + */ + if ((err = iptun_delete(linkid, zone_kcred())) == 0) + (void) dls_mgmt_destroy(linkid, B_FALSE); return (err); } diff --git a/usr/src/uts/common/io/dls/dls_stat.c b/usr/src/uts/common/io/dls/dls_stat.c index a6f89a8b49..51e4be7260 100644 --- a/usr/src/uts/common/io/dls/dls_stat.c +++ b/usr/src/uts/common/io/dls/dls_stat.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -103,14 +103,15 @@ dls_stat_update(kstat_t *ksp, dls_link_t *dlp, int rw) int dls_stat_create(const char *module, int instance, const char *name, - int (*update)(struct kstat *, int), void *private, kstat_t **kspp) + zoneid_t zoneid, int (*update)(struct kstat *, int), void *private, + kstat_t **kspp) { kstat_t *ksp; kstat_named_t *knp; uint_t i; - if ((ksp = kstat_create(module, instance, name, "net", - KSTAT_TYPE_NAMED, STAT_INFO_COUNT + 2, 0)) == NULL) { + if ((ksp = kstat_create_zone(module, instance, name, "net", + KSTAT_TYPE_NAMED, STAT_INFO_COUNT + 2, 0, zoneid)) == NULL) { return (EINVAL); } diff --git a/usr/src/uts/common/io/ib/clients/ibd/ibd.c b/usr/src/uts/common/io/ib/clients/ibd/ibd.c index 3ea1af7b7d..58d657bade 100644 --- a/usr/src/uts/common/io/ib/clients/ibd/ibd.c +++ b/usr/src/uts/common/io/ib/clients/ibd/ibd.c @@ -44,13 +44,12 @@ #include <sys/sysmacros.h> /* for offsetof */ #include <sys/disp.h> /* for async thread pri */ #include <sys/atomic.h> /* for atomic_add*() */ -#include <sys/ethernet.h> /* for ETHERTYPE_IP */ +#include <sys/ethernet.h> /* for ETHERTYPE_IPV6 */ #include <netinet/in.h> /* for netinet/ip.h below */ #include <netinet/ip.h> /* for struct ip */ #include <netinet/udp.h> /* for struct udphdr */ #include <inet/common.h> /* for inet/ip.h below */ #include <inet/ip.h> /* for ipha_t */ -#include <inet/ip_if.h> /* for IP6_DL_SAP */ #include <inet/ip6.h> /* for ip6_t */ #include <inet/tcp.h> /* for tcph_t */ #include <netinet/icmp6.h> /* for icmp6_t */ @@ -5698,7 +5697,7 @@ ibd_send(ibd_state_t *state, mblk_t *mp) * For ND6 packets, padding is at the front of the source lladdr. * Insert the padding at front. */ - if (ntohs(ipibp->ipib_rhdr.ipoib_type) == IP6_DL_SAP) { + if (ntohs(ipibp->ipib_rhdr.ipoib_type) == ETHERTYPE_IPV6) { if (MBLKL(mp) < sizeof (ib_header_info_t) + IPV6_HDR_LEN) { if (!pullupmsg(mp, IPV6_HDR_LEN + sizeof (ib_header_info_t))) { @@ -6231,7 +6230,7 @@ ibd_process_rx(ibd_state_t *state, ibd_rwqe_t *rwqe, ibt_wc_t *wc) * the padding from such packets. */ ipibp = (ipoib_hdr_t *)((uchar_t *)mp->b_rptr + sizeof (ipoib_pgrh_t)); - if (ntohs(ipibp->ipoib_type) == IP6_DL_SAP) { + if (ntohs(ipibp->ipoib_type) == ETHERTYPE_IPV6) { if (MBLKL(mp) < sizeof (ipoib_hdr_t) + IPV6_HDR_LEN) { if (!pullupmsg(mp, IPV6_HDR_LEN + sizeof (ipoib_hdr_t))) { diff --git a/usr/src/uts/common/io/mac/mac.c b/usr/src/uts/common/io/mac/mac.c index 151fb6c765..e55f079ba4 100644 --- a/usr/src/uts/common/io/mac/mac.c +++ b/usr/src/uts/common/io/mac/mac.c @@ -2698,7 +2698,8 @@ mactype_unregister(const char *ident) } ASSERT(mtp == (mactype_t *)val); - kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length); + if (mtp->mt_brdcst_addr != NULL) + kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length); kmem_free(mtp, sizeof (mactype_t)); done: mutex_exit(&i_mactype_lock); diff --git a/usr/src/uts/common/io/mac/mac_client.c b/usr/src/uts/common/io/mac/mac_client.c index 3ac7774895..62976337b4 100644 --- a/usr/src/uts/common/io/mac/mac_client.c +++ b/usr/src/uts/common/io/mac/mac_client.c @@ -1009,6 +1009,21 @@ mac_unicast_primary_info(mac_handle_t mh, char *client_name, boolean_t *in_use) } /* + * Return the current destination MAC address of the specified MAC. + */ +boolean_t +mac_dst_get(mac_handle_t mh, uint8_t *addr) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + rw_enter(&mip->mi_rw_lock, RW_READER); + if (mip->mi_dstaddr_set) + bcopy(mip->mi_dstaddr, addr, mip->mi_type->mt_addr_length); + rw_exit(&mip->mi_rw_lock); + return (mip->mi_dstaddr_set); +} + +/* * Add the specified MAC client to the list of clients which opened * the specified MAC. */ @@ -3494,9 +3509,15 @@ mblk_t * mac_header(mac_handle_t mh, const uint8_t *daddr, uint32_t sap, mblk_t *payload, size_t extra_len) { - mac_impl_t *mip = (mac_impl_t *)mh; + mac_impl_t *mip = (mac_impl_t *)mh; + const uint8_t *hdr_daddr; - return (mip->mi_type->mt_ops.mtops_header(mip->mi_addr, daddr, sap, + /* + * If the MAC is point-to-point with a fixed destination address, then + * we must always use that destination in the MAC header. + */ + hdr_daddr = (mip->mi_dstaddr_set ? mip->mi_dstaddr : daddr); + return (mip->mi_type->mt_ops.mtops_header(mip->mi_addr, hdr_daddr, sap, mip->mi_pdata, payload, extra_len)); } @@ -4180,9 +4201,15 @@ mac_unmark_exclusive(mac_handle_t mh) } /* - * Set the MTU for the specified device. The function returns EBUSY if - * another MAC client prevents the caller to become the exclusive client. - * Returns EAGAIN if the client is started. + * Set the MTU for the specified MAC. Note that this mechanism depends on + * the driver calling mac_maxsdu_update() to update the link MTU if it was + * successful in setting its MTU. + * + * Note that there is potential for improvement here. A better model might be + * to not require drivers to call mac_maxsdu_update(), but rather have this + * function update mi_sdu_max and send notifications if the driver setprop + * callback succeeds. This would remove the burden and complexity from + * drivers. */ int mac_set_mtu(mac_handle_t mh, uint_t new_mtu, uint_t *old_mtu_arg) @@ -4190,32 +4217,15 @@ mac_set_mtu(mac_handle_t mh, uint_t new_mtu, uint_t *old_mtu_arg) mac_impl_t *mip = (mac_impl_t *)mh; uint_t old_mtu; int rv; - boolean_t exclusive = B_FALSE; i_mac_perim_enter(mip); - if ((mip->mi_callbacks->mc_callbacks & MC_SETPROP) == 0 || - (mip->mi_callbacks->mc_callbacks & MC_GETPROP) == 0) { + if (!(mip->mi_callbacks->mc_callbacks & (MC_SETPROP|MC_GETPROP))) { rv = ENOTSUP; goto bail; } - if ((rv = mac_mark_exclusive(mh)) != 0) - goto bail; - exclusive = B_TRUE; - - if (mip->mi_active > 0) { - /* - * The MAC instance is started, for example due to the - * presence of a promiscuous clients. Fail the operation - * since the MAC's MTU cannot be changed while the NIC - * is started. - */ - rv = EAGAIN; - goto bail; - } - - mac_sdu_get(mh, NULL, &old_mtu); + old_mtu = mip->mi_sdu_max; if (old_mtu != new_mtu) { rv = mip->mi_callbacks->mc_setprop(mip->mi_driver, @@ -4223,8 +4233,6 @@ mac_set_mtu(mac_handle_t mh, uint_t new_mtu, uint_t *old_mtu_arg) } bail: - if (exclusive) - mac_unmark_exclusive(mh); i_mac_perim_exit(mip); if (rv == 0 && old_mtu_arg != NULL) diff --git a/usr/src/uts/common/io/mac/mac_flow.c b/usr/src/uts/common/io/mac/mac_flow.c index fd4d13cf1b..b3ed893bfb 100644 --- a/usr/src/uts/common/io/mac/mac_flow.c +++ b/usr/src/uts/common/io/mac/mac_flow.c @@ -83,24 +83,15 @@ flow_stat_init(kstat_named_t *knp) static int flow_stat_update(kstat_t *ksp, int rw) { - flow_entry_t *fep = ksp->ks_private; - flow_stats_t *fsp = &fep->fe_flowstats; - kstat_named_t *knp = ksp->ks_data; - uint64_t *statp; - zoneid_t zid; - int i; + flow_entry_t *fep = ksp->ks_private; + flow_stats_t *fsp = &fep->fe_flowstats; + kstat_named_t *knp = ksp->ks_data; + uint64_t *statp; + int i; if (rw != KSTAT_READ) return (EACCES); - zid = getzoneid(); - if (zid != GLOBAL_ZONEID && zid != fep->fe_zoneid) { - for (i = 0; i < FS_SIZE; i++, knp++) - knp->value.ui64 = 0; - - return (0); - } - for (i = 0; i < FS_SIZE; i++, knp++) { statp = (uint64_t *) ((uchar_t *)fsp + flow_stats_list[i].fs_offset); @@ -117,8 +108,12 @@ flow_stat_create(flow_entry_t *fep) kstat_named_t *knp; uint_t nstats = FS_SIZE; - ksp = kstat_create("unix", 0, (char *)fep->fe_flow_name, "flow", - KSTAT_TYPE_NAMED, nstats, 0); + /* + * Fow now, flow entries are only manipulated and visible from the + * global zone. + */ + ksp = kstat_create_zone("unix", 0, (char *)fep->fe_flow_name, "flow", + KSTAT_TYPE_NAMED, nstats, 0, GLOBAL_ZONEID); if (ksp == NULL) return; @@ -205,13 +200,6 @@ mac_flow_create(flow_desc_t *fd, mac_resource_props_t *mrp, char *name, flent->fe_client_cookie = client_cookie; flent->fe_type = type; - /* - * As flow creation is only allowed in global zone, this will - * always set fe_zoneid to GLOBAL_ZONEID, and dls_add_flow() will - * later set the right value. - */ - flent->fe_zoneid = getzoneid(); - /* Save flow desc */ bcopy(fd, &flent->fe_flow_desc, sizeof (*fd)); @@ -905,8 +893,10 @@ mac_flow_get_client_cookie(flow_entry_t *flent) * Forward declarations. */ static uint32_t flow_l2_hash(flow_tab_t *, flow_state_t *); +static uint32_t flow_l2_hash_fe(flow_tab_t *, flow_entry_t *); static int flow_l2_accept(flow_tab_t *, flow_state_t *); static uint32_t flow_ether_hash(flow_tab_t *, flow_state_t *); +static uint32_t flow_ether_hash_fe(flow_tab_t *, flow_entry_t *); static int flow_ether_accept(flow_tab_t *, flow_state_t *); /* @@ -936,15 +926,15 @@ mac_flow_tab_create(flow_ops_t *ops, flow_mask_t mask, uint_t size, ft->ft_mip = mip; /* - * Optimization for DL_ETHER media. + * Optimizations for DL_ETHER media. */ if (mip->mi_info.mi_nativemedia == DL_ETHER) { if (new_ops->fo_hash == flow_l2_hash) new_ops->fo_hash = flow_ether_hash; - + if (new_ops->fo_hash_fe == flow_l2_hash_fe) + new_ops->fo_hash_fe = flow_ether_hash_fe; if (new_ops->fo_accept[0] == flow_l2_accept) new_ops->fo_accept[0] = flow_ether_accept; - } *ftp = ft; } @@ -1215,13 +1205,6 @@ mac_link_flow_add(datalink_id_t linkid, char *flow_name, } /* - * Save the zoneid of the underlying link in the flow entry, - * this is needed to prevent non-global zone from getting - * statistics information of global zone. - */ - flent->fe_zoneid = dlp->dl_zid; - - /* * Add the subflow to the subflow table. Also instantiate the flow * in the mac if there is an active user (we check if the MAC client's * datapath has been setup). @@ -1524,9 +1507,27 @@ mac_link_flow_info(char *flow_name, mac_flowinfo_t *finfo) return (0); } -#define HASH_MAC_VID(a, v, s) \ +/* + * Hash function macro that takes an Ethernet address and VLAN id as input. + */ +#define HASH_ETHER_VID(a, v, s) \ ((((uint32_t)(a)[3] + (a)[4] + (a)[5]) ^ (v)) % (s)) +/* + * Generic layer-2 address hashing function that takes an address and address + * length as input. This is the DJB hash function. + */ +static uint32_t +flow_l2_addrhash(uint8_t *addr, size_t addrlen, size_t htsize) +{ + uint32_t hash = 5381; + size_t i; + + for (i = 0; i < addrlen; i++) + hash = ((hash << 5) + hash) + addr[i]; + return (hash % htsize); +} + #define PKT_TOO_SMALL(s, end) ((s)->fs_mp->b_wptr < (end)) #define CHECK_AND_ADJUST_START_PTR(s, start) { \ @@ -1559,9 +1560,8 @@ flow_l2_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) static uint32_t flow_l2_hash(flow_tab_t *ft, flow_state_t *s) { - flow_l2info_t *l2 = &s->fs_l2info; - - return (HASH_MAC_VID(l2->l2_daddr, l2->l2_vid, ft->ft_size)); + return (flow_l2_addrhash(s->fs_l2info.l2_daddr, + ft->ft_mip->mi_type->mt_addr_length, ft->ft_size)); } /* @@ -1622,7 +1622,16 @@ flow_ether_hash(flow_tab_t *ft, flow_state_t *s) evhp = (struct ether_vlan_header *)l2->l2_start; l2->l2_daddr = evhp->ether_dhost.ether_addr_octet; - return (HASH_MAC_VID(l2->l2_daddr, l2->l2_vid, ft->ft_size)); + return (HASH_ETHER_VID(l2->l2_daddr, l2->l2_vid, ft->ft_size)); +} + +static uint32_t +flow_ether_hash_fe(flow_tab_t *ft, flow_entry_t *flent) +{ + flow_desc_t *fd = &flent->fe_flow_desc; + + ASSERT((fd->fd_mask & FLOW_LINK_VID) != 0 || fd->fd_vid == 0); + return (HASH_ETHER_VID(fd->fd_dst_mac, fd->fd_vid, ft->ft_size)); } /* ARGSUSED */ @@ -1661,20 +1670,13 @@ flow_ether_accept(flow_tab_t *ft, flow_state_t *s) static int flow_l2_accept_fe(flow_tab_t *ft, flow_entry_t *flent) { - int i; flow_desc_t *fd = &flent->fe_flow_desc; /* - * Dest address is mandatory. + * Dest address is mandatory, and 0 length addresses are not yet + * supported. */ - if ((fd->fd_mask & FLOW_LINK_DST) == 0) - return (EINVAL); - - for (i = 0; i < fd->fd_mac_len; i++) { - if (fd->fd_dst_mac[i] != 0) - break; - } - if (i == fd->fd_mac_len || fd->fd_mac_len < ETHERADDRL) + if ((fd->fd_mask & FLOW_LINK_DST) == 0 || fd->fd_mac_len == 0) return (EINVAL); if ((fd->fd_mask & FLOW_LINK_VID) != 0) { @@ -1700,8 +1702,9 @@ flow_l2_hash_fe(flow_tab_t *ft, flow_entry_t *flent) { flow_desc_t *fd = &flent->fe_flow_desc; - ASSERT((fd->fd_mask & FLOW_LINK_VID) != 0 || fd->fd_vid == 0); - return (HASH_MAC_VID(fd->fd_dst_mac, fd->fd_vid, ft->ft_size)); + ASSERT((fd->fd_mask & FLOW_LINK_VID) == 0 && fd->fd_vid == 0); + return (flow_l2_addrhash(fd->fd_dst_mac, + ft->ft_mip->mi_type->mt_addr_length, ft->ft_size)); } /* diff --git a/usr/src/uts/common/io/mac/mac_provider.c b/usr/src/uts/common/io/mac/mac_provider.c index 408482ce45..e096b2b5aa 100644 --- a/usr/src/uts/common/io/mac/mac_provider.c +++ b/usr/src/uts/common/io/mac/mac_provider.c @@ -251,6 +251,7 @@ mac_register(mac_register_t *mregp, mac_handle_t *mhp) if (mregp->m_dst_addr != NULL) { bcopy(mregp->m_dst_addr, mip->mi_dstaddr, mip->mi_type->mt_addr_length); + mip->mi_dstaddr_set = B_TRUE; } } else if (mregp->m_src_addr != NULL) { goto fail; @@ -262,20 +263,31 @@ mac_register(mac_register_t *mregp, mac_handle_t *mhp) * driver can update this information by calling * mac_pdata_update(). */ - if (mregp->m_pdata != NULL) { + if (mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY) { /* - * Verify that the plugin supports MAC plugin data and that - * the supplied data is valid. + * Verify if the supplied plugin data is valid. Note that + * even if the caller passed in a NULL pointer as plugin data, + * we still need to verify if that's valid as the plugin may + * require plugin data to function. */ - if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY)) - goto fail; if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata, mregp->m_pdata_size)) { goto fail; } - mip->mi_pdata = kmem_alloc(mregp->m_pdata_size, KM_SLEEP); - bcopy(mregp->m_pdata, mip->mi_pdata, mregp->m_pdata_size); - mip->mi_pdata_size = mregp->m_pdata_size; + if (mregp->m_pdata != NULL) { + mip->mi_pdata = + kmem_alloc(mregp->m_pdata_size, KM_SLEEP); + bcopy(mregp->m_pdata, mip->mi_pdata, + mregp->m_pdata_size); + mip->mi_pdata_size = mregp->m_pdata_size; + } + } else if (mregp->m_pdata != NULL) { + /* + * The caller supplied non-NULL plugin data, but the plugin + * does not recognize plugin data. + */ + err = EINVAL; + goto fail; } /* @@ -850,6 +862,20 @@ mac_unicst_update(mac_handle_t mh, const uint8_t *addr) i_mac_notify(mip, MAC_NOTE_UNICST); } +void +mac_dst_update(mac_handle_t mh, const uint8_t *addr) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + if (mip->mi_type->mt_addr_length == 0) + return; + + i_mac_perim_enter(mip); + bcopy(addr, mip->mi_dstaddr, mip->mi_type->mt_addr_length); + i_mac_perim_exit(mip); + i_mac_notify(mip, MAC_NOTE_DEST); +} + /* * MAC plugin information changed. */ diff --git a/usr/src/uts/common/io/mac/plugins/mac_6to4.c b/usr/src/uts/common/io/mac/plugins/mac_6to4.c new file mode 100644 index 0000000000..c5ae0c5f28 --- /dev/null +++ b/usr/src/uts/common/io/mac/plugins/mac_6to4.c @@ -0,0 +1,119 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * DL_6TO4 MAC Type plugin for the Nemo mac module + */ + +#include <sys/modctl.h> +#include <sys/dlpi.h> +#include <inet/ip.h> +#include <sys/mac.h> +#include <sys/mac_6to4.h> +#include <sys/mac_ipv4_impl.h> + +static struct modlmisc mac_6to4_modlmisc = { + &mod_miscops, + "6to4 tunneling MAC plugin" +}; + +static struct modlinkage mac_6to4_modlinkage = { + MODREV_1, + &mac_6to4_modlmisc, + NULL +}; + +static mactype_ops_t mac_6to4_type_ops; + +int +_init(void) +{ + mactype_register_t *mtrp; + int err; + + if ((mtrp = mactype_alloc(MACTYPE_VERSION)) == NULL) + return (ENOTSUP); + mtrp->mtr_ident = MAC_PLUGIN_IDENT_6TO4; + mtrp->mtr_ops = &mac_6to4_type_ops; + mtrp->mtr_mactype = DL_6TO4; + mtrp->mtr_nativetype = DL_6TO4; + mtrp->mtr_addrlen = sizeof (ipaddr_t); + if ((err = mactype_register(mtrp)) == 0) { + if ((err = mod_install(&mac_6to4_modlinkage)) != 0) + (void) mactype_unregister(MAC_PLUGIN_IDENT_6TO4); + } + mactype_free(mtrp); + return (err); +} + +int +_fini(void) +{ + int err; + if ((err = mactype_unregister(MAC_PLUGIN_IDENT_6TO4)) != 0) + return (err); + return (mod_remove(&mac_6to4_modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&mac_6to4_modlinkage, modinfop)); +} + +/* + * MAC Type plugin operations. Note that because 6to4 is a form of + * tunneling over IPv4, this plugin is able to steal most of its operations + * from the IPv4 plugin. + */ + +/* + * Check the legality of a 6to4 tunnel SAP value. The only acceptable + * values are IPPROTO_IPV6 (IPv6 in IPv4 tunneling) and 0 (for snoop). + */ +/* ARGSUSED */ +boolean_t +mac_6to4_sap_verify(uint32_t sap, uint32_t *bind_sap, void *pdata) +{ + if (sap == IPPROTO_IPV6 || sap == 0) { + if (bind_sap != NULL) + *bind_sap = sap; + return (B_TRUE); + } + return (B_FALSE); +} + +static mactype_ops_t mac_6to4_type_ops = { + MTOPS_PDATA_VERIFY, + mac_ipv4_unicst_verify, + mac_ipv4_multicst_verify, + mac_6to4_sap_verify, + mac_ipv4_header, + mac_ipv4_header_info, + mac_ipv4_pdata_verify, + NULL, + NULL, + NULL +}; diff --git a/usr/src/uts/common/io/mac/plugins/mac_ipv4.c b/usr/src/uts/common/io/mac/plugins/mac_ipv4.c new file mode 100644 index 0000000000..85ddf8dbfa --- /dev/null +++ b/usr/src/uts/common/io/mac/plugins/mac_ipv4.c @@ -0,0 +1,232 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * DL_IPV4 MAC Type plugin for the Nemo mac module + */ + +#include <sys/types.h> +#include <sys/modctl.h> +#include <sys/dlpi.h> +#include <sys/mac.h> +#include <sys/mac_ipv4.h> +#include <sys/byteorder.h> +#include <sys/strsun.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <inet/common.h> +#include <inet/ip.h> +#include <inet/iptun.h> + +static struct modlmisc mac_ipv4_modlmisc = { + &mod_miscops, + "IPv4 tunneling MAC plugin" +}; + +static struct modlinkage mac_ipv4_modlinkage = { + MODREV_1, + &mac_ipv4_modlmisc, + NULL +}; + +static mactype_ops_t mac_ipv4_type_ops; + +int +_init(void) +{ + mactype_register_t *mtrp; + int err; + + if ((mtrp = mactype_alloc(MACTYPE_VERSION)) == NULL) + return (ENOTSUP); + mtrp->mtr_ident = MAC_PLUGIN_IDENT_IPV4; + mtrp->mtr_ops = &mac_ipv4_type_ops; + mtrp->mtr_mactype = DL_IPV4; + mtrp->mtr_nativetype = DL_IPV4; + mtrp->mtr_addrlen = sizeof (ipaddr_t); + if ((err = mactype_register(mtrp)) == 0) { + if ((err = mod_install(&mac_ipv4_modlinkage)) != 0) + (void) mactype_unregister(MAC_PLUGIN_IDENT_IPV4); + } + mactype_free(mtrp); + return (err); +} + +int +_fini(void) +{ + int err; + if ((err = mactype_unregister(MAC_PLUGIN_IDENT_IPV4)) != 0) + return (err); + return (mod_remove(&mac_ipv4_modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&mac_ipv4_modlinkage, modinfop)); +} + +/* + * MAC Type plugin operations + */ + +/* ARGSUSED */ +int +mac_ipv4_unicst_verify(const void *addr, void *pdata) +{ + const ipaddr_t *ipaddr = addr; + return ((CLASSD(*ipaddr) || (*ipaddr == INADDR_BROADCAST)) ? + EINVAL : 0); +} + +/* ARGSUSED */ +int +mac_ipv4_multicst_verify(const void *addr, void *pdata) +{ + /* + * IPv4 configured tunnels do not have the concept of link-layer + * multicast. + */ + return (ENOTSUP); +} + +/* + * Check the legality of an IPv4 tunnel SAP value. The only two acceptable + * values are IPPROTO_ENCAP (IPv4 in IPv4) and IPPROTO_IPV6 (IPv6 in IPv4). + */ +/* ARGSUSED */ +boolean_t +mac_ipv4_sap_verify(uint32_t sap, uint32_t *bind_sap, void *pdata) +{ + if (sap == IPPROTO_ENCAP || sap == IPPROTO_IPV6 || sap == 0) { + if (bind_sap != NULL) + *bind_sap = sap; + return (B_TRUE); + } + return (B_FALSE); +} + +/* + * Build an IPv4 link-layer header for tunneling. If provided, the + * template header provided by the driver supplies the header length, type + * of service, don't fragment flag, ttl, and potential options (depending + * on the header length). + */ +/* ARGSUSED */ +mblk_t * +mac_ipv4_header(const void *saddr, const void *daddr, uint32_t sap, void *pdata, + mblk_t *payload, size_t extra_len) +{ + struct ip *iphp; + struct ip *tmpl_iphp = pdata; + mblk_t *mp; + size_t hdr_len = sizeof (struct ip); + + if (!mac_ipv4_sap_verify(sap, NULL, NULL)) + return (NULL); + + if (tmpl_iphp != NULL) + hdr_len = tmpl_iphp->ip_hl * sizeof (uint32_t); + + if ((mp = allocb(hdr_len + extra_len, BPRI_HI)) == NULL) + return (NULL); + + iphp = (struct ip *)mp->b_rptr; + + bzero(iphp, hdr_len + extra_len); + if (tmpl_iphp != NULL) { + bcopy(tmpl_iphp, iphp, hdr_len); + } else { + iphp->ip_hl = IP_SIMPLE_HDR_LENGTH_IN_WORDS; + iphp->ip_off = htons(IP_DF); + iphp->ip_ttl = IPTUN_DEFAULT_HOPLIMIT; + } + + iphp->ip_v = IPVERSION; + iphp->ip_len = 0; + iphp->ip_p = (uint8_t)sap; + bcopy(saddr, &(iphp->ip_src), sizeof (struct in_addr)); + bcopy(daddr, &(iphp->ip_dst), sizeof (struct in_addr)); + + mp->b_wptr += hdr_len; + return (mp); +} + +/* ARGSUSED */ +int +mac_ipv4_header_info(mblk_t *mp, void *pdata, mac_header_info_t *hdr_info) +{ + struct ip *iphp; + + if (MBLKL(mp) < sizeof (struct ip)) + return (EINVAL); + + iphp = (struct ip *)mp->b_rptr; + + /* + * IPv4 tunnels don't have a concept of link-layer multicast since + * they have fixed unicast endpoints. + */ + if (mac_ipv4_unicst_verify(&iphp->ip_dst, NULL) != 0) + return (EINVAL); + + hdr_info->mhi_hdrsize = iphp->ip_hl * sizeof (uint32_t); + hdr_info->mhi_pktsize = 0; + hdr_info->mhi_daddr = (const uint8_t *)&(iphp->ip_dst); + hdr_info->mhi_saddr = (const uint8_t *)&(iphp->ip_src); + hdr_info->mhi_origsap = hdr_info->mhi_bindsap = iphp->ip_p; + hdr_info->mhi_dsttype = MAC_ADDRTYPE_UNICAST; + return (0); +} + +/* + * Plugin data is either NULL or a pointer to an IPv4 header. + */ +boolean_t +mac_ipv4_pdata_verify(void *pdata, size_t pdata_size) +{ + const struct ip *iphp = pdata; + + if (pdata == NULL) + return (pdata_size == 0); + if (pdata_size < sizeof (struct ip)) + return (B_FALSE); + /* Make sure that the header length field matches pdata_size */ + return (pdata_size == iphp->ip_hl * sizeof (uint32_t)); +} + +static mactype_ops_t mac_ipv4_type_ops = { + MTOPS_PDATA_VERIFY, + mac_ipv4_unicst_verify, + mac_ipv4_multicst_verify, + mac_ipv4_sap_verify, + mac_ipv4_header, + mac_ipv4_header_info, + mac_ipv4_pdata_verify, + NULL, + NULL, + NULL +}; diff --git a/usr/src/uts/common/io/mac/plugins/mac_ipv6.c b/usr/src/uts/common/io/mac/plugins/mac_ipv6.c new file mode 100644 index 0000000000..6cefa7cce3 --- /dev/null +++ b/usr/src/uts/common/io/mac/plugins/mac_ipv6.c @@ -0,0 +1,283 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * DL_IPV6 MAC Type plugin for the Nemo mac module + */ + +#include <sys/types.h> +#include <sys/modctl.h> +#include <sys/dlpi.h> +#include <sys/mac.h> +#include <sys/mac_ipv6.h> +#include <sys/mac_ipv4_impl.h> +#include <sys/byteorder.h> +#include <sys/strsun.h> +#include <netinet/ip6.h> +#include <inet/common.h> +#include <inet/mib2.h> +#include <inet/ip.h> +#include <inet/ip6.h> +#include <inet/iptun.h> + +static struct modlmisc mac_ipv6_modlmisc = { + &mod_miscops, + "IPv6 tunneling MAC plugin" +}; + +static struct modlinkage mac_ipv6_modlinkage = { + MODREV_1, + &mac_ipv6_modlmisc, + NULL +}; + +static mactype_ops_t mac_ipv6_type_ops; + +int +_init(void) +{ + mactype_register_t *mtrp; + int err; + + if ((mtrp = mactype_alloc(MACTYPE_VERSION)) == NULL) + return (EINVAL); + mtrp->mtr_ident = MAC_PLUGIN_IDENT_IPV6; + mtrp->mtr_ops = &mac_ipv6_type_ops; + mtrp->mtr_mactype = DL_IPV6; + mtrp->mtr_nativetype = DL_IPV6; + mtrp->mtr_addrlen = sizeof (in6_addr_t); + if ((err = mactype_register(mtrp)) == 0) { + if ((err = mod_install(&mac_ipv6_modlinkage)) != 0) + (void) mactype_unregister(MAC_PLUGIN_IDENT_IPV6); + } + mactype_free(mtrp); + return (err); +} + +int +_fini(void) +{ + int err; + if ((err = mactype_unregister(MAC_PLUGIN_IDENT_IPV6)) != 0) + return (err); + return (mod_remove(&mac_ipv6_modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&mac_ipv6_modlinkage, modinfop)); +} + + +/* + * MAC Type plugin operations + */ + +/* ARGSUSED */ +int +mac_ipv6_unicst_verify(const void *addr, void *pdata) +{ + const in6_addr_t *in6addr = addr; + if (IN6_IS_ADDR_UNSPECIFIED(in6addr) || + IN6_IS_ADDR_LOOPBACK(in6addr) || + IN6_IS_ADDR_MULTICAST(in6addr) || + IN6_IS_ADDR_V4MAPPED(in6addr) || + IN6_IS_ADDR_V4COMPAT(in6addr)) { + return (EINVAL); + } + return (0); +} + +/* + * Build an IPv6 link-layer header for tunneling. If provided, the + * template header provided by the driver supplies the traffic class, flow + * label, hop limit, and potential options. The template's payload length + * must either be 0 if there are no extension headers, or reflect the size + * of the extension headers if present. The template's next header value + * must either be IPPROTO_NONE if no extension headers are present, or + * reflect the type of extension header that follows (the same is true for + * the field values of the extension headers themselves.) + */ +/* ARGSUSED */ +mblk_t * +mac_ipv6_header(const void *saddr, const void *daddr, uint32_t sap, void *pdata, + mblk_t *payload, size_t extra_len) +{ + ip6_t *ip6hp; + ip6_t *tmpl_ip6hp = pdata; + mblk_t *mp; + size_t hdr_len = sizeof (ip6_t); + uint8_t *nxt_proto; + + if (!mac_ipv4_sap_verify(sap, NULL, NULL)) + return (NULL); + + if (tmpl_ip6hp != NULL) + hdr_len = sizeof (ip6_t) + tmpl_ip6hp->ip6_plen; + + if ((mp = allocb(hdr_len + extra_len, BPRI_HI)) == NULL) + return (NULL); + + ip6hp = (ip6_t *)mp->b_rptr; + + bzero(ip6hp, hdr_len + extra_len); + if (tmpl_ip6hp != NULL) { + bcopy(tmpl_ip6hp, ip6hp, hdr_len); + } else { + ip6hp->ip6_nxt = IPPROTO_NONE; + ip6hp->ip6_hlim = IPTUN_DEFAULT_HOPLIMIT; + } + + ip6hp->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; + ip6hp->ip6_plen = 0; + + nxt_proto = &ip6hp->ip6_nxt; + if (*nxt_proto != IPPROTO_NONE) { + ip6_dest_t *hdrptr = (ip6_dest_t *)(ip6hp + 1); + nxt_proto = &hdrptr->ip6d_nxt; + while (*nxt_proto != IPPROTO_NONE) { + hdrptr = (ip6_dest_t *)((uint8_t *)hdrptr + + (8 * (hdrptr->ip6d_len + 1))); + nxt_proto = &hdrptr->ip6d_nxt; + } + } + *nxt_proto = (uint8_t)sap; + bcopy(saddr, &(ip6hp->ip6_src), sizeof (in6_addr_t)); + bcopy(daddr, &(ip6hp->ip6_dst), sizeof (in6_addr_t)); + + mp->b_wptr += hdr_len; + return (mp); +} + +/* ARGSUSED */ +int +mac_ipv6_header_info(mblk_t *mp, void *pdata, mac_header_info_t *hdr_info) +{ + ip6_t *ip6hp; + uint8_t *whereptr, *endptr; + uint8_t nexthdr; + + if (MBLKL(mp) < sizeof (ip6_t)) + return (EINVAL); + + ip6hp = (ip6_t *)mp->b_rptr; + + /* + * IPv6 tunnels don't have a concept of link-layer multicast since + * they have fixed unicast endpoints. + */ + if (mac_ipv6_unicst_verify(&ip6hp->ip6_dst, NULL) != 0) + return (EINVAL); + + nexthdr = ip6hp->ip6_nxt; + whereptr = (uint8_t *)(ip6hp + 1); + endptr = mp->b_wptr; + while (nexthdr != IPPROTO_ENCAP && nexthdr != IPPROTO_IPV6) { + ip6_dest_t *exthdrptr = (ip6_dest_t *)whereptr; + + if (whereptr + sizeof (ip6_dest_t) >= endptr) + return (EINVAL); + + nexthdr = exthdrptr->ip6d_nxt; + whereptr += 8 * (exthdrptr->ip6d_len + 1); + + if (whereptr > endptr) + return (EINVAL); + } + + hdr_info->mhi_hdrsize = whereptr - mp->b_rptr; + hdr_info->mhi_pktsize = 0; + hdr_info->mhi_daddr = (const uint8_t *)&(ip6hp->ip6_dst); + hdr_info->mhi_saddr = (const uint8_t *)&(ip6hp->ip6_src); + hdr_info->mhi_bindsap = hdr_info->mhi_origsap = nexthdr; + hdr_info->mhi_dsttype = MAC_ADDRTYPE_UNICAST; + return (0); +} + +/* + * This plugin's MAC plugin data is a template IPv6 header followed by + * optional extension headers. The chain of headers must be terminated by + * a header with a next header value of IPPROTO_NONE. The payload length + * of the IPv6 header must be 0 if there are no extension headers, or must + * reflect the total size of extension headers present. + */ +boolean_t +mac_ipv6_pdata_verify(void *pdata, size_t pdata_size) +{ + ip6_t *ip6hp = pdata; + uint8_t *whereptr, *endptr; + uint8_t nexthdr; + + /* + * Since the plugin does not require plugin data, it is acceptable + * for drivers to pass in NULL plugin data as long as the plugin + * data size is consistent. + */ + if (pdata == NULL) + return (pdata_size == 0); + + /* First verify that we have enough data to hold an IPv6 header. */ + if (pdata_size < sizeof (ip6_t)) + return (B_FALSE); + /* Make sure that pdata_size is consistent with the payload length. */ + if (pdata_size != sizeof (ip6_t) + ip6hp->ip6_plen) + return (B_FALSE); + + /* + * Make sure that the header chain is terminated by a header with a + * next header value of IPPROTO_NONE. + */ + nexthdr = ip6hp->ip6_nxt; + if (nexthdr == IPPROTO_NONE) + return (ip6hp->ip6_plen == 0); + whereptr = (uint8_t *)(ip6hp + 1); + endptr = (uint8_t *)pdata + pdata_size; + + while (nexthdr != IPPROTO_NONE && whereptr < endptr) { + ip6_dest_t *hdrptr = (ip6_dest_t *)whereptr; + + /* make sure we're pointing at a complete header */ + if (whereptr + sizeof (ip6_dest_t) > endptr) + break; + nexthdr = hdrptr->ip6d_nxt; + whereptr += 8 * (hdrptr->ip6d_len + 1); + } + + return (nexthdr == IPPROTO_NONE && whereptr == endptr); +} + +static mactype_ops_t mac_ipv6_type_ops = { + MTOPS_PDATA_VERIFY, + mac_ipv6_unicst_verify, + mac_ipv4_multicst_verify, /* neither plugin supports multicast */ + mac_ipv4_sap_verify, /* same set of legal SAP values */ + mac_ipv6_header, + mac_ipv6_header_info, + mac_ipv6_pdata_verify, + NULL, + NULL, + NULL +}; diff --git a/usr/src/uts/common/io/simnet/simnet.c b/usr/src/uts/common/io/simnet/simnet.c index 2654538d02..f1a172dd9b 100644 --- a/usr/src/uts/common/io/simnet/simnet.c +++ b/usr/src/uts/common/io/simnet/simnet.c @@ -74,13 +74,13 @@ static uint8_t *mcastaddr_lookup(simnet_dev_t *, const uint8_t *); static dld_ioc_info_t simnet_ioc_list[] = { {SIMNET_IOC_CREATE, DLDCOPYINOUT, sizeof (simnet_ioc_create_t), - simnet_ioc_create, {PRIV_SYS_DL_CONFIG}}, + simnet_ioc_create, secpolicy_dl_config}, {SIMNET_IOC_DELETE, DLDCOPYIN, sizeof (simnet_ioc_delete_t), - simnet_ioc_delete, {PRIV_SYS_DL_CONFIG}}, + simnet_ioc_delete, secpolicy_dl_config}, {SIMNET_IOC_INFO, DLDCOPYINOUT, sizeof (simnet_ioc_info_t), - simnet_ioc_info, {NULL}}, + simnet_ioc_info, NULL}, {SIMNET_IOC_MODIFY, DLDCOPYIN, sizeof (simnet_ioc_modify_t), - simnet_ioc_modify, {PRIV_SYS_DL_CONFIG}}, + simnet_ioc_modify, secpolicy_dl_config} }; DDI_DEFINE_STREAM_OPS(simnet_dev_ops, nulldev, nulldev, simnet_attach, @@ -397,6 +397,7 @@ simnet_ioc_create(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) sdev->sd_type = create_arg->sic_type; sdev->sd_link_id = create_arg->sic_link_id; + sdev->sd_zoneid = crgetzoneid(cred); sdev->sd_refcount++; mutex_init(&sdev->sd_instlock, NULL, MUTEX_DRIVER, NULL); cv_init(&sdev->sd_threadwait, NULL, CV_DRIVER, NULL); @@ -420,7 +421,8 @@ simnet_ioc_create(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) goto exit; } - if ((err = dls_devnet_create(sdev->sd_mh, sdev->sd_link_id)) != 0) { + if ((err = dls_devnet_create(sdev->sd_mh, sdev->sd_link_id, + crgetzoneid(cred))) != 0) { simnet_dev_unref(sdev); goto exit; } @@ -473,6 +475,12 @@ simnet_ioc_modify(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) return (ENOENT); } + if (sdev->sd_zoneid != crgetzoneid(cred)) { + rw_exit(&simnet_dev_lock); + simnet_dev_unref(sdev); + return (ENOENT); + } + if (sdev->sd_link_id == modify_arg->sim_peer_link_id) { /* Cannot peer with self */ rw_exit(&simnet_dev_lock); @@ -488,13 +496,21 @@ simnet_ioc_modify(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) return (0); } - if (modify_arg->sim_peer_link_id != DATALINK_INVALID_LINKID && - (sdev_peer = simnet_dev_lookup(modify_arg->sim_peer_link_id)) == - NULL) { - /* Peer simnet device not available */ - rw_exit(&simnet_dev_lock); - simnet_dev_unref(sdev); - return (ENOENT); + if (modify_arg->sim_peer_link_id != DATALINK_INVALID_LINKID) { + sdev_peer = simnet_dev_lookup(modify_arg->sim_peer_link_id); + if (sdev_peer == NULL) { + /* Peer simnet device not available */ + rw_exit(&simnet_dev_lock); + simnet_dev_unref(sdev); + return (ENOENT); + } + if (sdev_peer->sd_zoneid != sdev->sd_zoneid) { + /* The two peers must be in the same zone (for now). */ + rw_exit(&simnet_dev_lock); + simnet_dev_unref(sdev); + simnet_dev_unref(sdev_peer); + return (EACCES); + } } /* First remove any previous peer */ @@ -533,6 +549,12 @@ simnet_ioc_delete(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) return (ENOENT); } + if (sdev->sd_zoneid != crgetzoneid(cred)) { + rw_exit(&simnet_dev_lock); + simnet_dev_unref(sdev); + return (ENOENT); + } + if ((err = dls_devnet_destroy(sdev->sd_mh, &tmpid, B_TRUE)) != 0) { rw_exit(&simnet_dev_lock); simnet_dev_unref(sdev); @@ -570,7 +592,8 @@ simnet_ioc_delete(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) return (err); fail: /* Re-create simnet instance and add any previous peer */ - (void) dls_devnet_create(sdev->sd_mh, sdev->sd_link_id); + (void) dls_devnet_create(sdev->sd_mh, sdev->sd_link_id, + crgetzoneid(cred)); sdev->sd_flags &= ~SDF_SHUTDOWN; ASSERT(sdev->sd_peer_dev == NULL); @@ -600,6 +623,10 @@ simnet_ioc_info(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) simnet_ioc_info_t *info_arg = karg; simnet_dev_t *sdev; + /* Make sure that the simnet link is visible from the caller's zone. */ + if (!dls_devnet_islinkvisible(info_arg->sii_link_id, crgetzoneid(cred))) + return (ENOENT); + rw_enter(&simnet_dev_lock, RW_READER); if ((sdev = simnet_dev_lookup(info_arg->sii_link_id)) == NULL) { rw_exit(&simnet_dev_lock); diff --git a/usr/src/uts/common/io/simnet/simnet_impl.h b/usr/src/uts/common/io/simnet/simnet_impl.h index 1409ab7f26..74dcba5113 100644 --- a/usr/src/uts/common/io/simnet/simnet_impl.h +++ b/usr/src/uts/common/io/simnet/simnet_impl.h @@ -26,6 +26,7 @@ #ifndef _SYS_SIMNET_IMPL_H #define _SYS_SIMNET_IMPL_H +#include <sys/types.h> #include <sys/list.h> #include <sys/mutex.h> #include <sys/mac.h> @@ -65,6 +66,7 @@ typedef struct simnet_dev { list_node_t sd_listnode; uint_t sd_type; /* WiFi, Ethernet etc. */ datalink_id_t sd_link_id; + zoneid_t sd_zoneid; /* zone where created */ struct simnet_dev *sd_peer_dev; /* Attached peer, if any */ uint_t sd_flags; /* Device flags SDF_* */ uint_t sd_refcount; diff --git a/usr/src/uts/common/io/softmac/softmac_main.c b/usr/src/uts/common/io/softmac/softmac_main.c index bfdf3ee851..726803112f 100644 --- a/usr/src/uts/common/io/softmac/softmac_main.c +++ b/usr/src/uts/common/io/softmac/softmac_main.c @@ -582,11 +582,14 @@ softmac_create_datalink(softmac_t *softmac) /* * Create the GLDv3 datalink. */ - if ((!(softmac->smac_flags & SOFTMAC_NOSUPP)) && - ((err = dls_devnet_create(softmac->smac_mh, linkid)) != 0)) { - cmn_err(CE_WARN, "dls_devnet_create failed for %s", - softmac->smac_devname); - return (err); + if (!(softmac->smac_flags & SOFTMAC_NOSUPP)) { + err = dls_devnet_create(softmac->smac_mh, linkid, + crgetzoneid(CRED())); + if (err != 0) { + cmn_err(CE_WARN, "dls_devnet_create failed for %s", + softmac->smac_devname); + return (err); + } } if (linkid == DATALINK_INVALID_LINKID) { @@ -988,7 +991,8 @@ softmac_destroy(dev_info_t *dip, dev_t dev) */ if (!(smac_flags & (SOFTMAC_GLDV3 | SOFTMAC_NOSUPP))) { if ((err = mac_disable_nowait(smac_mh)) != 0) { - (void) dls_devnet_create(smac_mh, linkid); + (void) dls_devnet_create(smac_mh, linkid, + crgetzoneid(CRED())); goto error; } /* diff --git a/usr/src/uts/common/io/vnic/vnic_ctl.c b/usr/src/uts/common/io/vnic/vnic_ctl.c index d4f5554949..2024deba40 100644 --- a/usr/src/uts/common/io/vnic/vnic_ctl.c +++ b/usr/src/uts/common/io/vnic/vnic_ctl.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -31,7 +31,7 @@ #include <sys/modctl.h> #include <sys/vnic.h> #include <sys/vnic_impl.h> -#include <sys/priv_names.h> +#include <sys/policy.h> /* module description */ #define VNIC_LINKINFO "Virtual NIC" @@ -49,13 +49,13 @@ static int vnic_ioc_modify(void *, intptr_t, int, cred_t *, int *); static dld_ioc_info_t vnic_ioc_list[] = { {VNIC_IOC_CREATE, DLDCOPYINOUT, sizeof (vnic_ioc_create_t), - vnic_ioc_create, {PRIV_SYS_DL_CONFIG}}, + vnic_ioc_create, secpolicy_dl_config}, {VNIC_IOC_DELETE, DLDCOPYIN, sizeof (vnic_ioc_delete_t), - vnic_ioc_delete, {PRIV_SYS_DL_CONFIG}}, + vnic_ioc_delete, secpolicy_dl_config}, {VNIC_IOC_INFO, DLDCOPYINOUT, sizeof (vnic_ioc_info_t), - vnic_ioc_info, {NULL}}, + vnic_ioc_info, NULL}, {VNIC_IOC_MODIFY, DLDCOPYIN, sizeof (vnic_ioc_modify_t), - vnic_ioc_modify, {PRIV_SYS_DL_CONFIG}}, + vnic_ioc_modify, secpolicy_dl_config} }; DDI_DEFINE_STREAM_OPS(vnic_dev_ops, nulldev, nulldev, vnic_attach, vnic_detach, @@ -266,7 +266,7 @@ create: err = vnic_dev_create(create_arg->vc_vnic_id, create_arg->vc_link_id, &mac_addr_type, &mac_len, mac_addr, &mac_slot, mac_prefix_len, create_arg->vc_vid, &create_arg->vc_resource_props, - create_arg->vc_flags, &diag); + create_arg->vc_flags, &diag, cred); if (err != 0) goto bail; @@ -309,7 +309,7 @@ vnic_ioc_delete(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) { vnic_ioc_delete_t *delete_arg = karg; - return (vnic_dev_delete(delete_arg->vd_vnic_id, 0)); + return (vnic_dev_delete(delete_arg->vd_vnic_id, 0, cred)); } /* ARGSUSED */ @@ -318,5 +318,5 @@ vnic_ioc_info(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) { vnic_ioc_info_t *info_arg = karg; - return (vnic_info(&info_arg->vi_info)); + return (vnic_info(&info_arg->vi_info, cred)); } diff --git a/usr/src/uts/common/io/vnic/vnic_dev.c b/usr/src/uts/common/io/vnic/vnic_dev.c index faf2753ce1..7783bb9d78 100644 --- a/usr/src/uts/common/io/vnic/vnic_dev.c +++ b/usr/src/uts/common/io/vnic/vnic_dev.c @@ -24,6 +24,7 @@ */ #include <sys/types.h> +#include <sys/cred.h> #include <sys/sysmacros.h> #include <sys/conf.h> #include <sys/cmn_err.h> @@ -319,7 +320,8 @@ int vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, vnic_mac_addr_type_t *vnic_addr_type, int *mac_len, uchar_t *mac_addr, int *mac_slot, uint_t mac_prefix_len, uint16_t vid, - mac_resource_props_t *mrp, uint32_t flags, vnic_ioc_diag_t *diag) + mac_resource_props_t *mrp, uint32_t flags, vnic_ioc_diag_t *diag, + cred_t *credp) { vnic_t *vnic; mac_register_t *mac; @@ -492,7 +494,8 @@ vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, if (!is_anchor) mac_set_upper_mac(vnic->vn_mch, vnic->vn_mh); - if ((err = dls_devnet_create(vnic->vn_mh, vnic->vn_id)) != 0) { + err = dls_devnet_create(vnic->vn_mh, vnic->vn_id, crgetzoneid(credp)); + if (err != 0) { VERIFY(is_anchor || mac_margin_remove(vnic->vn_lower_mh, vnic->vn_margin) == 0); (void) mac_unregister(vnic->vn_mh); @@ -553,7 +556,7 @@ vnic_dev_modify(datalink_id_t vnic_id, uint_t modify_mask, /* ARGSUSED */ int -vnic_dev_delete(datalink_id_t vnic_id, uint32_t flags) +vnic_dev_delete(datalink_id_t vnic_id, uint32_t flags, cred_t *credp) { vnic_t *vnic = NULL; mod_hash_val_t val; @@ -582,7 +585,8 @@ vnic_dev_delete(datalink_id_t vnic_id, uint32_t flags) * any new claims on mac_impl_t. */ if ((rc = mac_disable(vnic->vn_mh)) != 0) { - (void) dls_devnet_create(vnic->vn_mh, vnic_id); + (void) dls_devnet_create(vnic->vn_mh, vnic_id, + crgetzoneid(credp)); rw_exit(&vnic_lock); return (rc); } @@ -866,11 +870,15 @@ vnic_m_getprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, } int -vnic_info(vnic_info_t *info) +vnic_info(vnic_info_t *info, cred_t *credp) { vnic_t *vnic; int err; + /* Make sure that the VNIC link is visible from the caller's zone. */ + if (!dls_devnet_islinkvisible(info->vn_vnic_id, crgetzoneid(credp))) + return (ENOENT); + rw_enter(&vnic_lock, RW_WRITER); err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(info->vn_vnic_id), diff --git a/usr/src/uts/common/net/if.h b/usr/src/uts/common/net/if.h index 602fe7c291..cab4f6ba60 100644 --- a/usr/src/uts/common/net/if.h +++ b/usr/src/uts/common/net/if.h @@ -681,54 +681,6 @@ typedef struct ifa_msghdr { int ifam_metric; /* value of ipif_metric */ } ifa_msghdr_t; -/* currently tunnels only support IPv4 or IPv6 */ -enum ifta_proto { - IFTAP_INVALID, - IFTAP_IPV4, - IFTAP_IPV6 -}; - -#define IFTUN_SECINFOLEN 8 /* In units of 32-bit words. */ -#define IFTUN_VERSION 1 /* Current version number. */ - -/* - * Used by tunneling module to get/set a tunnel parameters using - * SIOCTUN[SG]PARAM. - * - * There is a version number and an array of uint32_t at the end of this - * ioctl because in a perfect world, the ipsec_req_t would be inside - * tun_addreq. Since this file is independent of IP (and IPsec), I have to - * just leave room there, and have the appropriate handlers deal with the - * security information. - * - * In the future, the sockaddr types and the ta_vers could be used together - * to determine the nature of the security information that is at the end - * of this ioctl. - */ -struct iftun_req { - char ifta_lifr_name[LIFNAMSIZ]; /* if name */ - struct sockaddr_storage ifta_saddr; /* source address */ - struct sockaddr_storage ifta_daddr; /* destination address */ - uint_t ifta_flags; /* See below */ - /* IP version information is read only */ - enum ifta_proto ifta_upper; /* IP version above tunnel */ - enum ifta_proto ifta_lower; /* IP version below tunnel */ - uint_t ifta_vers; /* Version number */ - uint32_t ifta_secinfo[IFTUN_SECINFOLEN]; /* Security prefs. */ - int16_t ifta_encap_lim; /* Encapsulation limit */ - uint8_t ifta_hop_limit; /* Hop limit */ - uint8_t ifta_spare0; /* Pad to 64-bit boundary */ - uint32_t ifta_spare1; -}; - -/* ifta_flags are set to indicate which members are valid */ -#define IFTUN_SRC 0x01 -#define IFTUN_DST 0x02 -#define IFTUN_SECURITY 0x04 /* Pay attention to secinfo */ -#define IFTUN_ENCAP 0x08 /* Pay attention to encap */ -#define IFTUN_HOPLIMIT 0x10 /* Pay attention to hoplimit */ -#define IFTUN_COMPLEX_SECURITY 0x20 /* Policy too big for ioctl */ - #endif /* !defined(_XOPEN_SOURCE) || defined(__EXTENSIONS__) */ /* diff --git a/usr/src/uts/common/net/if_types.h b/usr/src/uts/common/net/if_types.h index be8a0ea26d..7e83b0ec39 100644 --- a/usr/src/uts/common/net/if_types.h +++ b/usr/src/uts/common/net/if_types.h @@ -1,5 +1,5 @@ /* - * Copyright 1993-2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* @@ -38,7 +38,6 @@ #ifndef _NET_IF_TYPES_H #define _NET_IF_TYPES_H -#pragma ident "%Z%%M% %I% %E% SMI" /* from UCB 8.3 (Berkeley) 4/28/95 */ #ifdef __cplusplus @@ -106,6 +105,9 @@ extern "C" { #define IFT_PROPVIRTUAL 0x35 /* Proprietary Virtual/internal */ #define IFT_PROPMUX 0x36 /* Proprietary Multiplexing */ #define IFT_IB 0xc7 /* Infiniband */ +#define IFT_IPV4 0xc8 /* IPv4 tunnel */ +#define IFT_IPV6 0xc9 /* IPV6 tunnel */ +#define IFT_6TO4 0xca /* 6to4 tunnel */ #ifdef __cplusplus } diff --git a/usr/src/uts/common/os/policy.c b/usr/src/uts/common/os/policy.c index 5b47afa2cd..037e745f84 100644 --- a/usr/src/uts/common/os/policy.c +++ b/usr/src/uts/common/os/policy.c @@ -1736,10 +1736,21 @@ secpolicy_dl_config(const cred_t *cr) { if (PRIV_POLICY_ONLY(cr, PRIV_SYS_NET_CONFIG, B_FALSE)) return (secpolicy_net_config(cr, B_FALSE)); - return (PRIV_POLICY(cr, PRIV_SYS_DL_CONFIG, B_FALSE, EPERM, - NULL)); + return (PRIV_POLICY(cr, PRIV_SYS_DL_CONFIG, B_FALSE, EPERM, NULL)); } +/* + * PRIV_SYS_DL_CONFIG is a superset of PRIV_SYS_IPTUN_CONFIG. + */ +int +secpolicy_iptun_config(const cred_t *cr) +{ + if (PRIV_POLICY_ONLY(cr, PRIV_SYS_NET_CONFIG, B_FALSE)) + return (secpolicy_net_config(cr, B_FALSE)); + if (PRIV_POLICY_ONLY(cr, PRIV_SYS_DL_CONFIG, B_FALSE)) + return (secpolicy_dl_config(cr)); + return (PRIV_POLICY(cr, PRIV_SYS_IPTUN_CONFIG, B_FALSE, EPERM, NULL)); +} /* * Map IP pseudo privileges to actual privileges. @@ -2279,26 +2290,6 @@ secpolicy_xvm_control(const cred_t *cr) } /* - * secpolicy_dld_ioctl - * - * Determine if the subject has permission to use certain dld ioctls. - * Each ioctl should require a limited number of privileges. A large - * number indicates a poor design. - */ -int -secpolicy_dld_ioctl(const cred_t *cr, const char *dld_priv, const char *msg) -{ - int rv; - - if ((rv = priv_getbyname(dld_priv, 0)) >= 0) { - return (PRIV_POLICY(cr, rv, B_FALSE, EPERM, msg)); - } - /* priv_getbyname() returns -ve errno */ - return (-rv); - -} - -/* * secpolicy_ppp_config * * Determine if the subject has sufficient privileges to configure PPP and diff --git a/usr/src/uts/common/os/priv_defs b/usr/src/uts/common/os/priv_defs index 8875ddccab..ff839d4a60 100644 --- a/usr/src/uts/common/os/priv_defs +++ b/usr/src/uts/common/os/priv_defs @@ -376,9 +376,14 @@ privilege PRIV_SYS_MOUNT only control mounts performed from within said zone. Outside the global zone, the "nodevices" option is always forced. +privilege PRIV_SYS_IPTUN_CONFIG + + Allows a process to configure IP tunnel links. + privilege PRIV_SYS_DL_CONFIG - Allows a process to configure a system's datalink interfaces. + Allows a process to configure all classes of datalinks, including + configuration allowed by PRIV_SYS_IPTUN_CONFIG. privilege PRIV_SYS_IP_CONFIG diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c index 6d5f7a9696..3da9721fad 100644 --- a/usr/src/uts/common/os/zone.c +++ b/usr/src/uts/common/os/zone.c @@ -245,6 +245,13 @@ #include <net/if.h> #include <sys/cpucaps.h> #include <vm/seg.h> +#include <sys/mac.h> + +/* List of data link IDs which are accessible from the zone */ +typedef struct zone_dl { + datalink_id_t zdl_id; + list_node_t zdl_linkage; +} zone_dl_t; /* * cv used to signal that all references to the zone have been released. This @@ -350,10 +357,9 @@ static kmutex_t mount_lock; const char * const zone_default_initname = "/sbin/init"; static char * const zone_prefix = "/zone/"; static int zone_shutdown(zoneid_t zoneid); -static int zone_add_datalink(zoneid_t, char *); -static int zone_remove_datalink(zoneid_t, char *); -static int zone_check_datalink(zoneid_t *, char *); -static int zone_list_datalink(zoneid_t, int *, char *); +static int zone_add_datalink(zoneid_t, datalink_id_t); +static int zone_remove_datalink(zoneid_t, datalink_id_t); +static int zone_list_datalink(zoneid_t, int *, datalink_id_t *); typedef boolean_t zsd_applyfn_t(kmutex_t *, boolean_t, zone_t *, zone_key_t); @@ -2002,6 +2008,7 @@ zone_free(zone_t *zone) zone_free_zsd(zone); zone_free_datasets(zone); + list_destroy(&zone->zone_dl_list); if (zone->zone_rootvp != NULL) VN_RELE(zone->zone_rootvp); @@ -3795,6 +3802,8 @@ zone_create(const char *zone_name, const char *zone_root, offsetof(struct zsd_entry, zsd_linkage)); list_create(&zone->zone_datasets, sizeof (zone_dataset_t), offsetof(zone_dataset_t, zd_linkage)); + list_create(&zone->zone_dl_list, sizeof (zone_dl_t), + offsetof(zone_dl_t, zdl_linkage)); rw_init(&zone->zone_mlps.mlpl_rwlock, NULL, RW_DEFAULT, NULL); if (flags & ZCF_NET_EXCL) { @@ -5488,6 +5497,7 @@ long zone(int cmd, void *arg1, void *arg2, void *arg3, void *arg4) { zone_def zs; + int err; switch (cmd) { case ZONE_CREATE: @@ -5553,15 +5563,28 @@ zone(int cmd, void *arg1, void *arg2, void *arg3, void *arg4) return (zone_version((int *)arg1)); case ZONE_ADD_DATALINK: return (zone_add_datalink((zoneid_t)(uintptr_t)arg1, - (char *)arg2)); + (datalink_id_t)(uintptr_t)arg2)); case ZONE_DEL_DATALINK: return (zone_remove_datalink((zoneid_t)(uintptr_t)arg1, - (char *)arg2)); - case ZONE_CHECK_DATALINK: - return (zone_check_datalink((zoneid_t *)arg1, (char *)arg2)); + (datalink_id_t)(uintptr_t)arg2)); + case ZONE_CHECK_DATALINK: { + zoneid_t zoneid; + boolean_t need_copyout; + + if (copyin(arg1, &zoneid, sizeof (zoneid)) != 0) + return (EFAULT); + need_copyout = (zoneid == ALL_ZONES); + err = zone_check_datalink(&zoneid, + (datalink_id_t)(uintptr_t)arg2); + if (err == 0 && need_copyout) { + if (copyout(&zoneid, arg1, sizeof (zoneid)) != 0) + err = EFAULT; + } + return (err == 0 ? 0 : set_errno(err)); + } case ZONE_LIST_DATALINK: return (zone_list_datalink((zoneid_t)(uintptr_t)arg1, - (int *)arg2, (char *)arg3)); + (int *)arg2, (datalink_id_t *)(uintptr_t)arg3)); default: return (set_errno(EINVAL)); } @@ -5978,78 +6001,63 @@ zone_find_by_any_path(const char *path, boolean_t treat_abs) return (zone); } -/* List of data link names which are accessible from the zone */ -struct dlnamelist { - char dlnl_name[LIFNAMSIZ]; - struct dlnamelist *dlnl_next; -}; - - /* - * Check whether the datalink name (dlname) itself is present. - * Return true if found. + * Finds a zone_dl_t with the given linkid in the given zone. Returns the + * zone_dl_t pointer if found, and NULL otherwise. */ -static boolean_t -zone_dlname(zone_t *zone, char *dlname) +static zone_dl_t * +zone_find_dl(zone_t *zone, datalink_id_t linkid) { - struct dlnamelist *dlnl; - boolean_t found = B_FALSE; + zone_dl_t *zdl; - mutex_enter(&zone->zone_lock); - for (dlnl = zone->zone_dl_list; dlnl != NULL; dlnl = dlnl->dlnl_next) { - if (strncmp(dlnl->dlnl_name, dlname, LIFNAMSIZ) == 0) { - found = B_TRUE; + ASSERT(mutex_owned(&zone->zone_lock)); + for (zdl = list_head(&zone->zone_dl_list); zdl != NULL; + zdl = list_next(&zone->zone_dl_list, zdl)) { + if (zdl->zdl_id == linkid) break; - } } + return (zdl); +} + +static boolean_t +zone_dl_exists(zone_t *zone, datalink_id_t linkid) +{ + boolean_t exists; + + mutex_enter(&zone->zone_lock); + exists = (zone_find_dl(zone, linkid) != NULL); mutex_exit(&zone->zone_lock); - return (found); + return (exists); } /* - * Add an data link name for the zone. Does not check for duplicates. + * Add an data link name for the zone. */ static int -zone_add_datalink(zoneid_t zoneid, char *dlname) +zone_add_datalink(zoneid_t zoneid, datalink_id_t linkid) { - struct dlnamelist *dlnl; + zone_dl_t *zdl; zone_t *zone; zone_t *thiszone; - int err; - dlnl = kmem_zalloc(sizeof (struct dlnamelist), KM_SLEEP); - if ((err = copyinstr(dlname, dlnl->dlnl_name, LIFNAMSIZ, NULL)) != 0) { - kmem_free(dlnl, sizeof (struct dlnamelist)); - return (set_errno(err)); - } - - thiszone = zone_find_by_id(zoneid); - if (thiszone == NULL) { - kmem_free(dlnl, sizeof (struct dlnamelist)); + if ((thiszone = zone_find_by_id(zoneid)) == NULL) return (set_errno(ENXIO)); - } - /* - * Verify that the datalink name isn't already used by a different - * zone while allowing duplicate entries for the same zone (e.g. due - * to both using IPv4 and IPv6 on an interface) - */ + /* Verify that the datalink ID doesn't already belong to a zone. */ mutex_enter(&zonehash_lock); for (zone = list_head(&zone_active); zone != NULL; zone = list_next(&zone_active, zone)) { - if (zone->zone_id == zoneid) - continue; - - if (zone_dlname(zone, dlnl->dlnl_name)) { + if (zone_dl_exists(zone, linkid)) { mutex_exit(&zonehash_lock); zone_rele(thiszone); - kmem_free(dlnl, sizeof (struct dlnamelist)); - return (set_errno(EPERM)); + return (set_errno((zone == thiszone) ? EEXIST : EPERM)); } } + + zdl = kmem_zalloc(sizeof (*zdl), KM_SLEEP); + zdl->zdl_id = linkid; mutex_enter(&thiszone->zone_lock); - dlnl->dlnl_next = thiszone->zone_dl_list; - thiszone->zone_dl_list = dlnl; + list_insert_head(&thiszone->zone_dl_list, zdl); mutex_exit(&thiszone->zone_lock); mutex_exit(&zonehash_lock); zone_rele(thiszone); @@ -6057,150 +6065,106 @@ zone_add_datalink(zoneid_t zoneid, char *dlname) } static int -zone_remove_datalink(zoneid_t zoneid, char *dlname) +zone_remove_datalink(zoneid_t zoneid, datalink_id_t linkid) { - struct dlnamelist *dlnl, *odlnl, **dlnlp; + zone_dl_t *zdl; zone_t *zone; - int err; + int err = 0; - dlnl = kmem_zalloc(sizeof (struct dlnamelist), KM_SLEEP); - if ((err = copyinstr(dlname, dlnl->dlnl_name, LIFNAMSIZ, NULL)) != 0) { - kmem_free(dlnl, sizeof (struct dlnamelist)); - return (set_errno(err)); - } - zone = zone_find_by_id(zoneid); - if (zone == NULL) { - kmem_free(dlnl, sizeof (struct dlnamelist)); + if ((zone = zone_find_by_id(zoneid)) == NULL) return (set_errno(EINVAL)); - } mutex_enter(&zone->zone_lock); - /* Look for match */ - dlnlp = &zone->zone_dl_list; - while (*dlnlp != NULL) { - if (strncmp(dlnl->dlnl_name, (*dlnlp)->dlnl_name, - LIFNAMSIZ) == 0) - goto found; - dlnlp = &((*dlnlp)->dlnl_next); + if ((zdl = zone_find_dl(zone, linkid)) == NULL) { + err = ENXIO; + } else { + list_remove(&zone->zone_dl_list, zdl); + kmem_free(zdl, sizeof (zone_dl_t)); } mutex_exit(&zone->zone_lock); zone_rele(zone); - kmem_free(dlnl, sizeof (struct dlnamelist)); - return (set_errno(ENXIO)); - -found: - odlnl = *dlnlp; - *dlnlp = (*dlnlp)->dlnl_next; - kmem_free(odlnl, sizeof (struct dlnamelist)); - - mutex_exit(&zone->zone_lock); - zone_rele(zone); - kmem_free(dlnl, sizeof (struct dlnamelist)); - return (0); + return (err == 0 ? 0 : set_errno(err)); } /* - * Using the zoneidp as ALL_ZONES, we can lookup which zone is using datalink - * name (dlname); otherwise we just check if the specified zoneidp has access - * to the datalink name. + * Using the zoneidp as ALL_ZONES, we can lookup which zone has been assigned + * the linkid. Otherwise we just check if the specified zoneidp has been + * assigned the supplied linkid. */ -static int -zone_check_datalink(zoneid_t *zoneidp, char *dlname) +int +zone_check_datalink(zoneid_t *zoneidp, datalink_id_t linkid) { - zoneid_t id; - char *dln; zone_t *zone; - int err = 0; - boolean_t allzones = B_FALSE; + int err = ENXIO; - if (copyin(zoneidp, &id, sizeof (id)) != 0) { - return (set_errno(EFAULT)); - } - dln = kmem_zalloc(LIFNAMSIZ, KM_SLEEP); - if ((err = copyinstr(dlname, dln, LIFNAMSIZ, NULL)) != 0) { - kmem_free(dln, LIFNAMSIZ); - return (set_errno(err)); + if (*zoneidp != ALL_ZONES) { + if ((zone = zone_find_by_id(*zoneidp)) != NULL) { + if (zone_dl_exists(zone, linkid)) + err = 0; + zone_rele(zone); + } + return (err); } - if (id == ALL_ZONES) - allzones = B_TRUE; - - /* - * Check whether datalink name is already used. - */ mutex_enter(&zonehash_lock); for (zone = list_head(&zone_active); zone != NULL; zone = list_next(&zone_active, zone)) { - if (allzones || (id == zone->zone_id)) { - if (!zone_dlname(zone, dln)) - continue; - if (allzones) - err = copyout(&zone->zone_id, zoneidp, - sizeof (*zoneidp)); - - mutex_exit(&zonehash_lock); - kmem_free(dln, LIFNAMSIZ); - return (err ? set_errno(EFAULT) : 0); + if (zone_dl_exists(zone, linkid)) { + *zoneidp = zone->zone_id; + err = 0; + break; } } - - /* datalink name is not found in any active zone. */ mutex_exit(&zonehash_lock); - kmem_free(dln, LIFNAMSIZ); - return (set_errno(ENXIO)); + return (err); } /* - * Get the names of the datalinks assigned to a zone. - * Here *nump is the number of datalinks, and the assumption - * is that the caller will guarantee that the the supplied buffer is - * big enough to hold at least #*nump datalink names, that is, - * LIFNAMSIZ X *nump - * On return, *nump will be the "new" number of datalinks, if it - * ever changed. + * Get the list of datalink IDs assigned to a zone. + * + * On input, *nump is the number of datalink IDs that can fit in the supplied + * idarray. Upon return, *nump is either set to the number of datalink IDs + * that were placed in the array if the array was large enough, or to the + * number of datalink IDs that the function needs to place in the array if the + * array is too small. */ static int -zone_list_datalink(zoneid_t zoneid, int *nump, char *buf) +zone_list_datalink(zoneid_t zoneid, int *nump, datalink_id_t *idarray) { - int num, dlcount; + uint_t num, dlcount; zone_t *zone; - struct dlnamelist *dlnl; - char *ptr; + zone_dl_t *zdl; + datalink_id_t *idptr = idarray; if (copyin(nump, &dlcount, sizeof (dlcount)) != 0) return (set_errno(EFAULT)); - - zone = zone_find_by_id(zoneid); - if (zone == NULL) { + if ((zone = zone_find_by_id(zoneid)) == NULL) return (set_errno(ENXIO)); - } num = 0; mutex_enter(&zone->zone_lock); - ptr = buf; - for (dlnl = zone->zone_dl_list; dlnl != NULL; dlnl = dlnl->dlnl_next) { + for (zdl = list_head(&zone->zone_dl_list); zdl != NULL; + zdl = list_next(&zone->zone_dl_list, zdl)) { /* - * If the list changed and the new number is bigger - * than what the caller supplied, just count, don't - * do copyout + * If the list is bigger than what the caller supplied, just + * count, don't do copyout. */ if (++num > dlcount) continue; - if (copyout(dlnl->dlnl_name, ptr, LIFNAMSIZ) != 0) { + if (copyout(&zdl->zdl_id, idptr, sizeof (*idptr)) != 0) { mutex_exit(&zone->zone_lock); zone_rele(zone); return (set_errno(EFAULT)); } - ptr += LIFNAMSIZ; + idptr++; } mutex_exit(&zone->zone_lock); zone_rele(zone); /* Increased or decreased, caller should be notified. */ if (num != dlcount) { - if (copyout(&num, nump, sizeof (num)) != 0) { + if (copyout(&num, nump, sizeof (num)) != 0) return (set_errno(EFAULT)); - } } return (0); } @@ -6237,21 +6201,54 @@ zone_find_by_id_nolock(zoneid_t zoneid) * Walk the datalinks for a given zone */ int -zone_datalink_walk(zoneid_t zoneid, int (*cb)(const char *, void *), void *data) +zone_datalink_walk(zoneid_t zoneid, int (*cb)(datalink_id_t, void *), + void *data) { - zone_t *zone; - struct dlnamelist *dlnl; - int ret = 0; + zone_t *zone; + zone_dl_t *zdl; + datalink_id_t *idarray; + uint_t idcount = 0; + int i, ret = 0; if ((zone = zone_find_by_id(zoneid)) == NULL) return (ENOENT); + /* + * We first build an array of linkid's so that we can walk these and + * execute the callback with the zone_lock dropped. + */ mutex_enter(&zone->zone_lock); - for (dlnl = zone->zone_dl_list; dlnl != NULL; dlnl = dlnl->dlnl_next) { - if ((ret = (*cb)(dlnl->dlnl_name, data)) != 0) - break; + for (zdl = list_head(&zone->zone_dl_list); zdl != NULL; + zdl = list_next(&zone->zone_dl_list, zdl)) { + idcount++; } + + if (idcount == 0) { + mutex_exit(&zone->zone_lock); + zone_rele(zone); + return (0); + } + + idarray = kmem_alloc(sizeof (datalink_id_t) * idcount, KM_NOSLEEP); + if (idarray == NULL) { + mutex_exit(&zone->zone_lock); + zone_rele(zone); + return (ENOMEM); + } + + for (i = 0, zdl = list_head(&zone->zone_dl_list); zdl != NULL; + i++, zdl = list_next(&zone->zone_dl_list, zdl)) { + idarray[i] = zdl->zdl_id; + } + mutex_exit(&zone->zone_lock); + + for (i = 0; i < idcount && ret == 0; i++) { + if ((ret = (*cb)(idarray[i], data)) != 0) + break; + } + zone_rele(zone); + kmem_free(idarray, sizeof (datalink_id_t) * idcount); return (ret); } diff --git a/usr/src/uts/common/sys/aggr_impl.h b/usr/src/uts/common/sys/aggr_impl.h index 96182b14f3..ee0979b798 100644 --- a/usr/src/uts/common/sys/aggr_impl.h +++ b/usr/src/uts/common/sys/aggr_impl.h @@ -27,6 +27,7 @@ #define _SYS_AGGR_IMPL_H #include <sys/types.h> +#include <sys/cred.h> #include <sys/mac_ether.h> #include <sys/mac_provider.h> #include <sys/mac_client.h> @@ -152,6 +153,7 @@ typedef struct aggr_grp_s { aggr_port_t *lg_ports; /* list of configured ports */ aggr_port_t *lg_mac_addr_port; mac_handle_t lg_mh; + zoneid_t lg_zoneid; uint_t lg_nattached_ports; krwlock_t lg_tx_lock; uint_t lg_ntx_ports; @@ -235,12 +237,12 @@ extern void aggr_grp_init(void); extern void aggr_grp_fini(void); extern int aggr_grp_create(datalink_id_t, uint32_t, uint_t, laioc_port_t *, uint32_t, boolean_t, boolean_t, uchar_t *, aggr_lacp_mode_t, - aggr_lacp_timer_t); -extern int aggr_grp_delete(datalink_id_t); + aggr_lacp_timer_t, cred_t *); +extern int aggr_grp_delete(datalink_id_t, cred_t *); extern void aggr_grp_free(aggr_grp_t *); extern int aggr_grp_info(datalink_id_t, void *, aggr_grp_info_new_grp_fn_t, - aggr_grp_info_new_port_fn_t); + aggr_grp_info_new_port_fn_t, cred_t *); extern void aggr_grp_notify(aggr_grp_t *, uint32_t); extern boolean_t aggr_grp_attach_port(aggr_grp_t *, aggr_port_t *); extern boolean_t aggr_grp_detach_port(aggr_grp_t *, aggr_port_t *); diff --git a/usr/src/uts/common/sys/dld.h b/usr/src/uts/common/sys/dld.h index 9eb076b753..de7a5c15a0 100644 --- a/usr/src/uts/common/sys/dld.h +++ b/usr/src/uts/common/sys/dld.h @@ -202,7 +202,7 @@ typedef struct dld_ioc_rename { typedef struct dld_ioc_zid { zoneid_t diz_zid; - char diz_link[MAXLINKNAMELEN]; + datalink_id_t diz_linkid; } dld_ioc_zid_t; /* @@ -418,7 +418,7 @@ int dld_add_flow(datalink_id_t, char *, flow_desc_t *, mac_resource_props_t *); int dld_remove_flow(char *); int dld_modify_flow(char *, mac_resource_props_t *); -int dld_walk_flow(dld_ioc_walkflow_t *, intptr_t); +int dld_walk_flow(dld_ioc_walkflow_t *, intptr_t, cred_t *); #endif diff --git a/usr/src/uts/common/sys/dld_ioc.h b/usr/src/uts/common/sys/dld_ioc.h index ead129064a..32efbfa369 100644 --- a/usr/src/uts/common/sys/dld_ioc.h +++ b/usr/src/uts/common/sys/dld_ioc.h @@ -57,6 +57,7 @@ extern "C" { #define AGGR_IOC 0x0A66 #define VNIC_IOC 0x0171 #define SIMNET_IOC 0x5132 +#define IPTUN_IOC 0x454A #define BRIDGE_IOC 0xB81D /* GLDv3 modules use these macros to generate unique ioctl commands */ @@ -64,6 +65,7 @@ extern "C" { #define AGGRIOC(cmdid) DLD_IOC_CMD(AGGR_IOC, (cmdid)) #define VNICIOC(cmdid) DLD_IOC_CMD(VNIC_IOC, (cmdid)) #define SIMNETIOC(cmdid) DLD_IOC_CMD(SIMNET_IOC, (cmdid)) +#define IPTUNIOC(cmdid) DLD_IOC_CMD(IPTUN_IOC, (cmdid)) #define BRIDGEIOC(cmdid) DLD_IOC_CMD(BRIDGE_IOC, (cmdid)) #ifdef _KERNEL @@ -82,16 +84,14 @@ extern "C" { * callback function does not need to copyin/out its own data. */ -/* Maximum number of Privileges */ -#define DLD_MAX_PRIV 16 - typedef int (dld_ioc_func_t)(void *, intptr_t, int, cred_t *, int *); +typedef int (dld_ioc_priv_func_t)(const cred_t *); typedef struct dld_ioc_info { uint_t di_cmd; uint_t di_flags; size_t di_argsize; dld_ioc_func_t *di_func; - const char *di_priv[DLD_MAX_PRIV]; + dld_ioc_priv_func_t *di_priv_func; } dld_ioc_info_t; /* Values for di_flags */ diff --git a/usr/src/uts/common/sys/dlpi.h b/usr/src/uts/common/sys/dlpi.h index d717afa968..7be621bfd8 100644 --- a/usr/src/uts/common/sys/dlpi.h +++ b/usr/src/uts/common/sys/dlpi.h @@ -277,6 +277,7 @@ typedef struct dl_ipnetinfo { #define DL_WIFI 0x80000004ul /* IEEE 802.11 */ #define DL_IPNET 0x80000005ul /* ipnet(7D) link */ #define SUNW_DL_IPMP 0x80000006ul /* IPMP stub interface */ +#define DL_6TO4 0x80000007ul /* 6to4 Tunnel Link */ /* * DLPI provider service supported. @@ -361,6 +362,7 @@ typedef struct dl_ipnetinfo { #define DL_CURR_PHYS_ADDR 0x02 /* current physical address */ #define DL_IPV6_TOKEN 0x03 /* IPv6 interface token */ #define DL_IPV6_LINK_LAYER_ADDR 0x04 /* Neighbor Discovery format */ +#define DL_CURR_DEST_ADDR 0x05 /* current destination address */ /* * DLPI flag definitions diff --git a/usr/src/uts/common/sys/dls.h b/usr/src/uts/common/sys/dls.h index 894c08490a..6bd2bbe35a 100644 --- a/usr/src/uts/common/sys/dls.h +++ b/usr/src/uts/common/sys/dls.h @@ -111,7 +111,8 @@ extern boolean_t dls_devnet_rebuild(); extern int dls_devnet_rename(datalink_id_t, datalink_id_t, const char *); -extern int dls_devnet_create(mac_handle_t, datalink_id_t); +extern int dls_devnet_create(mac_handle_t, datalink_id_t, + zoneid_t); extern int dls_devnet_destroy(mac_handle_t, datalink_id_t *, boolean_t); extern int dls_devnet_recreate(mac_handle_t, datalink_id_t); @@ -126,8 +127,10 @@ extern uint16_t dls_devnet_vid(dls_dl_handle_t); extern datalink_id_t dls_devnet_linkid(dls_dl_handle_t); extern int dls_devnet_dev2linkid(dev_t, datalink_id_t *); extern int dls_devnet_phydev(datalink_id_t, dev_t *); -extern int dls_devnet_setzid(const char *, zoneid_t); -extern int dls_devnet_getzid(datalink_id_t, zoneid_t *); +extern int dls_devnet_setzid(dls_dl_handle_t, zoneid_t); +extern zoneid_t dls_devnet_getzid(dls_dl_handle_t); +extern zoneid_t dls_devnet_getownerzid(dls_dl_handle_t); +extern boolean_t dls_devnet_islinkvisible(datalink_id_t, zoneid_t); extern int dls_mgmt_door_set(boolean_t); extern int dls_mgmt_create(const char *, dev_t, datalink_class_t, diff --git a/usr/src/uts/common/sys/dls_impl.h b/usr/src/uts/common/sys/dls_impl.h index dafd451954..36065e8735 100644 --- a/usr/src/uts/common/sys/dls_impl.h +++ b/usr/src/uts/common/sys/dls_impl.h @@ -95,7 +95,8 @@ extern void dls_mac_active_clear(dls_link_t *); extern void dls_create_str_kstats(dld_str_t *); extern int dls_stat_update(kstat_t *, dls_link_t *, int); extern int dls_stat_create(const char *, int, const char *, - int (*)(struct kstat *, int), void *, kstat_t **); + zoneid_t, int (*)(struct kstat *, int), void *, + kstat_t **); extern int dls_devnet_open_by_dev(dev_t, dls_link_t **, dls_dl_handle_t *); diff --git a/usr/src/uts/common/sys/dls_mgmt.h b/usr/src/uts/common/sys/dls_mgmt.h index 28de456053..50b70df355 100644 --- a/usr/src/uts/common/sys/dls_mgmt.h +++ b/usr/src/uts/common/sys/dls_mgmt.h @@ -28,6 +28,7 @@ #include <sys/types.h> #include <sys/param.h> +#include <sys/zone.h> /* * Data-Link Services Module @@ -44,13 +45,14 @@ typedef enum { DATALINK_CLASS_VNIC = 0x08, DATALINK_CLASS_ETHERSTUB = 0x10, DATALINK_CLASS_SIMNET = 0x20, - DATALINK_CLASS_BRIDGE = 0x40 + DATALINK_CLASS_BRIDGE = 0x40, + DATALINK_CLASS_IPTUN = 0x60 } datalink_class_t; #define DATALINK_CLASS_ALL (DATALINK_CLASS_PHYS | \ DATALINK_CLASS_VLAN | DATALINK_CLASS_AGGR | DATALINK_CLASS_VNIC | \ DATALINK_CLASS_ETHERSTUB | DATALINK_CLASS_SIMNET | \ - DATALINK_CLASS_BRIDGE) + DATALINK_CLASS_BRIDGE | DATALINK_CLASS_IPTUN) /* * A combination of flags and media. @@ -91,7 +93,8 @@ typedef uint64_t datalink_media_t; /* * The door file for the dlmgmtd (data-link management) daemon. */ -#define DLMGMT_DOOR "/etc/svc/volatile/dladm/dlmgmt_door" +#define DLMGMT_TMPFS_DIR "/etc/svc/volatile/dladm" +#define DLMGMT_DOOR DLMGMT_TMPFS_DIR "/dlmgmt_door" /* * Door upcall commands. @@ -104,6 +107,7 @@ typedef uint64_t datalink_media_t; #define DLMGMT_CMD_GETNEXT 6 #define DLMGMT_CMD_DLS_UPDATE 7 #define DLMGMT_CMD_LINKPROP_INIT 8 +#define DLMGMT_CMD_SETZONEID 9 #define DLMGMT_CMD_BASE 128 /* @@ -176,13 +180,20 @@ typedef struct dlmgmt_door_linkprop_init { datalink_id_t ld_linkid; } dlmgmt_door_linkprop_init_t; +typedef struct dlmgmt_door_setzoneid { + int ld_cmd; + datalink_id_t ld_linkid; + zoneid_t ld_zoneid; +} dlmgmt_door_setzoneid_t; + /* upcall return value */ typedef struct dlmgmt_retval_s { uint_t lr_err; /* return error code */ } dlmgmt_retval_t; typedef dlmgmt_retval_t dlmgmt_destroy_retval_t, - dlmgmt_linkprop_init_retval_t; + dlmgmt_linkprop_init_retval_t, + dlmgmt_setzoneid_retval_t; struct dlmgmt_linkid_retval_s { uint_t lr_err; diff --git a/usr/src/uts/common/sys/mac.h b/usr/src/uts/common/sys/mac.h index 5bcf9d90f8..e7ef4cf4c8 100644 --- a/usr/src/uts/common/sys/mac.h +++ b/usr/src/uts/common/sys/mac.h @@ -180,6 +180,8 @@ typedef enum { MAC_PROP_EN_10HDX_CAP, MAC_PROP_ADV_100T4_CAP, MAC_PROP_EN_100T4_CAP, + MAC_PROP_IPTUN_HOPLIMIT, + MAC_PROP_IPTUN_ENCAPLIMIT, MAC_PROP_WL_ESSID, MAC_PROP_WL_BSSID, MAC_PROP_WL_BSSTYPE, @@ -346,6 +348,7 @@ typedef enum { MAC_NOTE_DEVPROMISC, MAC_NOTE_FASTPATH_FLUSH, MAC_NOTE_SDU_SIZE, + MAC_NOTE_DEST, MAC_NOTE_MARGIN, MAC_NOTE_CAPAB_CHG, MAC_NOTE_LOWLINK, diff --git a/usr/src/uts/common/sys/mac_6to4.h b/usr/src/uts/common/sys/mac_6to4.h new file mode 100644 index 0000000000..86c3c1ad65 --- /dev/null +++ b/usr/src/uts/common/sys/mac_6to4.h @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_MAC_6TO4_H +#define _SYS_MAC_6TO4_H + +#include <sys/mac_ipv4.h> + +/* + * 6to4 tunneling MAC Plugin + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAC_PLUGIN_IDENT_6TO4 "mac_6to4" + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_MAC_6TO4_H */ diff --git a/usr/src/uts/common/sys/mac_client.h b/usr/src/uts/common/sys/mac_client.h index f31357b0b9..3452b1b71c 100644 --- a/usr/src/uts/common/sys/mac_client.h +++ b/usr/src/uts/common/sys/mac_client.h @@ -146,6 +146,8 @@ extern int mac_unicast_primary_set(mac_handle_t, const uint8_t *); extern void mac_unicast_primary_get(mac_handle_t, uint8_t *); extern void mac_unicast_primary_info(mac_handle_t, char *, boolean_t *); +extern boolean_t mac_dst_get(mac_handle_t, uint8_t *); + extern int mac_addr_random(mac_client_handle_t, uint_t, uint8_t *, mac_diag_t *); diff --git a/usr/src/uts/common/sys/mac_flow_impl.h b/usr/src/uts/common/sys/mac_flow_impl.h index ad84940aa6..f01d9d486c 100644 --- a/usr/src/uts/common/sys/mac_flow_impl.h +++ b/usr/src/uts/common/sys/mac_flow_impl.h @@ -319,7 +319,6 @@ struct flow_entry_s { /* Protected by */ kstat_t *fe_ksp; flow_stats_t fe_flowstats; boolean_t fe_desc_logged; - zoneid_t fe_zoneid; uint64_t fe_nic_speed; }; diff --git a/usr/src/uts/common/sys/mac_impl.h b/usr/src/uts/common/sys/mac_impl.h index cd662948d5..16362766fb 100644 --- a/usr/src/uts/common/sys/mac_impl.h +++ b/usr/src/uts/common/sys/mac_impl.h @@ -388,6 +388,7 @@ struct mac_impl_s { kmutex_t mi_lock; uint8_t mi_addr[MAXMACADDRLEN]; /* mi_rw_lock */ uint8_t mi_dstaddr[MAXMACADDRLEN]; /* mi_rw_lock */ + boolean_t mi_dstaddr_set; /* * The mac perimeter. All client initiated create/modify operations diff --git a/usr/src/uts/common/sys/mac_ipv4.h b/usr/src/uts/common/sys/mac_ipv4.h new file mode 100644 index 0000000000..a2ff5f0610 --- /dev/null +++ b/usr/src/uts/common/sys/mac_ipv4.h @@ -0,0 +1,43 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_MAC_IPV4_H +#define _SYS_MAC_IPV4_H + +/* + * IPv4 tunneling MAC Plugin + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAC_PLUGIN_IDENT_IPV4 "mac_ipv4" + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_MAC_IPV4_H */ diff --git a/usr/src/uts/common/sys/mac_ipv4_impl.h b/usr/src/uts/common/sys/mac_ipv4_impl.h new file mode 100644 index 0000000000..044a578e31 --- /dev/null +++ b/usr/src/uts/common/sys/mac_ipv4_impl.h @@ -0,0 +1,55 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_MAC_IPV4_IMPL_H +#define _SYS_MAC_IPV4_IMPL_H + +/* + * IPv4 tunneling MAC Plugin + */ + +#include <sys/mac.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * In addition to the mac_ipv4 plugin, the mac_6to4, and mac_ipv6 plugins + * use the following functions. + */ +int mac_ipv4_unicst_verify(const void *, void *); +int mac_ipv4_multicst_verify(const void *, void *); +boolean_t mac_ipv4_sap_verify(uint32_t, uint32_t *, void *); +mblk_t *mac_ipv4_header(const void *, const void *, uint32_t, void *, mblk_t *, + size_t); +int mac_ipv4_header_info(mblk_t *, void *, mac_header_info_t *); +boolean_t mac_ipv4_pdata_verify(void *, size_t); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_MAC_IPV4_IMPL_H */ diff --git a/usr/src/uts/common/sys/mac_ipv6.h b/usr/src/uts/common/sys/mac_ipv6.h new file mode 100644 index 0000000000..38e48932c5 --- /dev/null +++ b/usr/src/uts/common/sys/mac_ipv6.h @@ -0,0 +1,43 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_MAC_IPV6_H +#define _SYS_MAC_IPV6_H + +/* + * IPv6 tunneling MAC Plugin + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAC_PLUGIN_IDENT_IPV6 "mac_ipv6" + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_MAC_IPV6_H */ diff --git a/usr/src/uts/common/sys/mac_provider.h b/usr/src/uts/common/sys/mac_provider.h index 2444272ef9..52a86b693d 100644 --- a/usr/src/uts/common/sys/mac_provider.h +++ b/usr/src/uts/common/sys/mac_provider.h @@ -461,6 +461,7 @@ extern void mac_link_update(mac_handle_t, link_state_t); extern void mac_link_redo(mac_handle_t, link_state_t); extern void mac_unicst_update(mac_handle_t, const uint8_t *); +extern void mac_dst_update(mac_handle_t, const uint8_t *); extern void mac_tx_update(mac_handle_t); extern void mac_tx_ring_update(mac_handle_t, mac_ring_handle_t); diff --git a/usr/src/uts/common/sys/mac_soft_ring.h b/usr/src/uts/common/sys/mac_soft_ring.h index 4b07fb4e9f..ed4c47954d 100644 --- a/usr/src/uts/common/sys/mac_soft_ring.h +++ b/usr/src/uts/common/sys/mac_soft_ring.h @@ -41,7 +41,7 @@ extern "C" { #define S_RING_NAMELEN 64 -#define MAX_SR_FANOUT 32 +#define MAX_SR_FANOUT 24 extern boolean_t mac_soft_ring_enable; extern boolean_t mac_latency_optimize; diff --git a/usr/src/uts/common/sys/netstack.h b/usr/src/uts/common/sys/netstack.h index e44efab458..033adcb6aa 100644 --- a/usr/src/uts/common/sys/netstack.h +++ b/usr/src/uts/common/sys/netstack.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_NETSTACK_H @@ -59,26 +59,27 @@ typedef id_t netstackid_t; * * The order of these is important for some modules both for * the creation (which done in ascending order) and destruction (which is - * done ine in decending order). + * done in in decending order). */ #define NS_ALL -1 /* Match all */ -#define NS_STR 0 /* autopush list etc */ -#define NS_HOOK 1 -#define NS_NETI 2 -#define NS_ARP 3 -#define NS_IP 4 -#define NS_ICMP 5 -#define NS_UDP 6 -#define NS_TCP 7 -#define NS_SCTP 8 -#define NS_RTS 9 -#define NS_IPSEC 10 -#define NS_KEYSOCK 11 -#define NS_SPDSOCK 12 -#define NS_IPSECAH 13 -#define NS_IPSECESP 14 -#define NS_TUN 15 -#define NS_IPNET 16 +#define NS_DLS 0 +#define NS_IPTUN 1 +#define NS_STR 2 /* autopush list etc */ +#define NS_HOOK 3 +#define NS_NETI 4 +#define NS_ARP 5 +#define NS_IP 6 +#define NS_ICMP 7 +#define NS_UDP 8 +#define NS_TCP 9 +#define NS_SCTP 10 +#define NS_RTS 11 +#define NS_IPSEC 12 +#define NS_KEYSOCK 13 +#define NS_SPDSOCK 14 +#define NS_IPSECAH 15 +#define NS_IPSECESP 16 +#define NS_IPNET 17 #define NS_MAX (NS_IPNET+1) /* @@ -136,6 +137,8 @@ struct netstack { union { void *nu_modules[NS_MAX]; struct { + struct dls_stack *nu_dls; + struct iptun_stack *nu_iptun; struct str_stack *nu_str; struct hook_stack *nu_hook; struct neti_stack *nu_neti; @@ -151,11 +154,12 @@ struct netstack { struct spd_stack *nu_spdsock; struct ipsecah_stack *nu_ipsecah; struct ipsecesp_stack *nu_ipsecesp; - struct tun_stack *nu_tun; struct ipnet_stack *nu_ipnet; } nu_s; } netstack_u; #define netstack_modules netstack_u.nu_modules +#define netstack_dls netstack_u.nu_s.nu_dls +#define netstack_iptun netstack_u.nu_s.nu_iptun #define netstack_str netstack_u.nu_s.nu_str #define netstack_hook netstack_u.nu_s.nu_hook #define netstack_neti netstack_u.nu_s.nu_neti @@ -171,7 +175,6 @@ struct netstack { #define netstack_spdsock netstack_u.nu_s.nu_spdsock #define netstack_ipsecah netstack_u.nu_s.nu_ipsecah #define netstack_ipsecesp netstack_u.nu_s.nu_ipsecesp -#define netstack_tun netstack_u.nu_s.nu_tun #define netstack_ipnet netstack_u.nu_s.nu_ipnet nm_state_t netstack_m_state[NS_MAX]; /* module state */ diff --git a/usr/src/uts/common/sys/policy.h b/usr/src/uts/common/sys/policy.h index dbe6afcce0..59cfb2482f 100644 --- a/usr/src/uts/common/sys/policy.h +++ b/usr/src/uts/common/sys/policy.h @@ -100,6 +100,7 @@ int secpolicy_idmap(const cred_t *); int secpolicy_ip(const cred_t *, int, boolean_t); int secpolicy_ip_config(const cred_t *, boolean_t); int secpolicy_dl_config(const cred_t *); +int secpolicy_iptun_config(const cred_t *); int secpolicy_ipc_access(const cred_t *, const struct kipc_perm *, mode_t); int secpolicy_ipc_config(const cred_t *); int secpolicy_ipc_owner(const cred_t *, const struct kipc_perm *); @@ -163,7 +164,6 @@ void secpolicy_fs_mount_clearopts(cred_t *, struct vfs *); int secpolicy_setid_setsticky_clear(vnode_t *, vattr_t *, const vattr_t *, cred_t *); int secpolicy_xvattr(xvattr_t *, uid_t, cred_t *, vtype_t); -int secpolicy_dld_ioctl(const cred_t *, const char *, const char *); int secpolicy_xvm_control(const cred_t *); int secpolicy_basic_exec(const cred_t *, vnode_t *); diff --git a/usr/src/uts/common/sys/sockio.h b/usr/src/uts/common/sys/sockio.h index 0ef5394fea..67a2eab07a 100644 --- a/usr/src/uts/common/sys/sockio.h +++ b/usr/src/uts/common/sys/sockio.h @@ -210,12 +210,7 @@ extern "C" { #define SIOCTMYSITE _IOWR('i', 146, struct sioc_addrreq) /* In this site? */ -#define SIOCGTUNPARAM _IOR('i', 147, struct iftun_req) - /* get tunnel */ - /* parameters */ -#define SIOCSTUNPARAM _IOW('i', 148, struct iftun_req) - /* set tunnel */ - /* parameters */ +/* 147 and 148 were SIOC*TUNPARAM ioctls. Feel free to re-use. */ #define SIOCFIPSECONFIG _IOW('i', 149, 0) /* Flush Policy */ #define SIOCSIPSECONFIG _IOW('i', 150, 0) /* Set Policy */ diff --git a/usr/src/uts/common/sys/vnic_impl.h b/usr/src/uts/common/sys/vnic_impl.h index 341a7c92ee..4599770a3b 100644 --- a/usr/src/uts/common/sys/vnic_impl.h +++ b/usr/src/uts/common/sys/vnic_impl.h @@ -26,6 +26,7 @@ #ifndef _SYS_VNIC_IMPL_H #define _SYS_VNIC_IMPL_H +#include <sys/cred.h> #include <sys/mac_provider.h> #include <sys/mac_client.h> #include <sys/mac_client_priv.h> @@ -72,17 +73,17 @@ typedef struct vnic_s { extern int vnic_dev_create(datalink_id_t, datalink_id_t, vnic_mac_addr_type_t *, int *, uchar_t *, int *, uint_t, uint16_t, mac_resource_props_t *, - uint32_t, vnic_ioc_diag_t *); + uint32_t, vnic_ioc_diag_t *, cred_t *); extern int vnic_dev_modify(datalink_id_t, uint_t, vnic_mac_addr_type_t, uint_t, uchar_t *, uint_t, mac_resource_props_t *); -extern int vnic_dev_delete(datalink_id_t, uint32_t); +extern int vnic_dev_delete(datalink_id_t, uint32_t, cred_t *); extern void vnic_dev_init(void); extern void vnic_dev_fini(void); extern uint_t vnic_dev_count(void); extern dev_info_t *vnic_get_dip(void); -extern int vnic_info(vnic_info_t *); +extern int vnic_info(vnic_info_t *, cred_t *); #ifdef __cplusplus } diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h index 604856397d..3095fb42ce 100644 --- a/usr/src/uts/common/sys/zone.h +++ b/usr/src/uts/common/sys/zone.h @@ -287,7 +287,6 @@ typedef struct zone_cmd_rval { struct pool; struct brand; -struct dlnamelist; /* * Structure to record list of ZFS datasets exported to a zone. @@ -429,7 +428,7 @@ typedef struct zone { /* * zone_dl_list is protected by zone_lock */ - struct dlnamelist *zone_dl_list; + list_t zone_dl_list; netstack_t *zone_netstack; struct cpucap *zone_cpucap; /* CPU caps data */ /* @@ -465,7 +464,8 @@ extern zone_t *zone_find_by_any_path(const char *, boolean_t); extern zone_t *zone_find_by_path(const char *); extern zoneid_t getzoneid(void); extern zone_t *zone_find_by_id_nolock(zoneid_t); -extern int zone_datalink_walk(zoneid_t, int (*)(const char *, void *), void *); +extern int zone_datalink_walk(zoneid_t, int (*)(datalink_id_t, void *), void *); +extern int zone_check_datalink(zoneid_t *, datalink_id_t); /* * Zone-specific data (ZSD) APIs diff --git a/usr/src/uts/intel/Makefile.intel.shared b/usr/src/uts/intel/Makefile.intel.shared index 3fc1411108..f7369b8f45 100644 --- a/usr/src/uts/intel/Makefile.intel.shared +++ b/usr/src/uts/intel/Makefile.intel.shared @@ -364,6 +364,8 @@ DRV_KMODS += qlt DRV_KMODS += iscsit DRV_KMODS += ncall nsctl sdbc nskern sv DRV_KMODS += ii rdc rdcsrv rdcstub +DRV_KMODS += iptun +DRV_KMODS += iptunq # # Don't build some of these for OpenSolaris, since they will be @@ -530,10 +532,10 @@ FS_KMODS += smbfs # # Streams Modules (/kernel/strmod): # -STRMOD_KMODS += 6to4tun atun bufmod connld dedump ldterm pckt pfmod pipemod +STRMOD_KMODS += bufmod connld dedump ldterm pckt pfmod pipemod STRMOD_KMODS += ptem redirmod rpcmod rlmod telmod timod STRMOD_KMODS += spppasyn spppcomp -STRMOD_KMODS += tirdwr ttcompat tun +STRMOD_KMODS += tirdwr ttcompat STRMOD_KMODS += usbkbm STRMOD_KMODS += usbms STRMOD_KMODS += usb_ah @@ -717,7 +719,10 @@ PCBE_KMODS += p123_pcbe p4_pcbe opteron_pcbe core_pcbe # # MAC-Type Plugin Modules (/kernel/mac) # +MAC_KMODS += mac_6to4 MAC_KMODS += mac_ether +MAC_KMODS += mac_ipv4 +MAC_KMODS += mac_ipv6 MAC_KMODS += mac_wifi MAC_KMODS += mac_ib diff --git a/usr/src/uts/intel/ia32/ml/modstubs.s b/usr/src/uts/intel/ia32/ml/modstubs.s index de2ff15c79..6de9fe1c3f 100644 --- a/usr/src/uts/intel/ia32/ml/modstubs.s +++ b/usr/src/uts/intel/ia32/ml/modstubs.s @@ -1289,6 +1289,16 @@ fcnname/**/_info: \ END_MODULE(softmac); #endif +#ifndef IPTUN_MODULE + MODULE(iptun,drv); + STUB(iptun, iptun_create, nomod_einval); + STUB(iptun, iptun_delete, nomod_einval); + STUB(iptun, iptun_set_policy, nomod_void) ; + STUB(iptun, iptun_set_g_q, nomod_einval); + STUB(iptun, iptun_clear_g_q, nomod_void); + END_MODULE(iptun); +#endif + /* * Stubs for kssl, the kernel SSL proxy */ diff --git a/usr/src/uts/intel/ip/ip.global-objs.debug64 b/usr/src/uts/intel/ip/ip.global-objs.debug64 index 931dfb7f5a..ff276921f7 100644 --- a/usr/src/uts/intel/ip/ip.global-objs.debug64 +++ b/usr/src/uts/intel/ip/ip.global-objs.debug64 @@ -150,6 +150,7 @@ ipcl_conn_hash_maxsize ipcl_conn_hash_memfactor ipcl_conn_hash_size ipcl_debug_level +ipcl_iptun_fanout_size ipcl_raw_fanout_size ipcl_udp_fanout_size ipif_loopback_name @@ -172,6 +173,10 @@ ipsec_sel_cache ipsec_spd_hashsize ipsec_weird_null_inbound_policy ipsechw_debug +iptunq_info +iptunq_modinfo +iptunq_rinit +iptunq_winit ipv4_forward_suffix ipv4info ipv6_all_hosts_mcast diff --git a/usr/src/uts/intel/ip/ip.global-objs.obj64 b/usr/src/uts/intel/ip/ip.global-objs.obj64 index 776ef1fbf8..3bed15beb2 100644 --- a/usr/src/uts/intel/ip/ip.global-objs.obj64 +++ b/usr/src/uts/intel/ip/ip.global-objs.obj64 @@ -149,6 +149,7 @@ ipcl_bind_fanout_size ipcl_conn_hash_maxsize ipcl_conn_hash_memfactor ipcl_conn_hash_size +ipcl_iptun_fanout_size ipcl_raw_fanout_size ipcl_udp_fanout_size ipif_loopback_name @@ -170,6 +171,10 @@ ipsec_policy_failure_msgs ipsec_sel_cache ipsec_spd_hashsize ipsec_weird_null_inbound_policy +iptunq_info +iptunq_modinfo +iptunq_rinit +iptunq_winit ipv4_forward_suffix ipv4info ipv6_all_hosts_mcast diff --git a/usr/src/uts/intel/iptun/Makefile b/usr/src/uts/intel/iptun/Makefile new file mode 100644 index 0000000000..6fc2289eaa --- /dev/null +++ b/usr/src/uts/intel/iptun/Makefile @@ -0,0 +1,85 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = iptun +OBJECTS = $(IPTUN_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(IPTUN_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/common/inet/iptun + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) $(SRC_CONFILE) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +# +# Overrides +# +CFLAGS += $(CCVERBOSE) +LDFLAGS += -dy -Ndrv/dld -Nmisc/dls -Nmisc/mac -Ndrv/ip + +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN +LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/iptunq/Makefile b/usr/src/uts/intel/iptunq/Makefile new file mode 100644 index 0000000000..ffefbaeb29 --- /dev/null +++ b/usr/src/uts/intel/iptunq/Makefile @@ -0,0 +1,78 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = iptunq +OBJECTS = $(IPTUNQ_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(IPTUNQ_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/common/inet/iptun + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) $(SRC_CONFFILE) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +LDFLAGS += -dy -Ndrv/ip + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/6to4tun/Makefile b/usr/src/uts/intel/mac_6to4/Makefile index 72c21e40fa..ffb0800d13 100644 --- a/usr/src/uts/intel/6to4tun/Makefile +++ b/usr/src/uts/intel/mac_6to4/Makefile @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,16 +19,13 @@ # CDDL HEADER END # # -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -#pragma ident "%Z%%M% %I% %E% SMI" # -# This makefile drives the production of the 6to4 tunneling streams +# This makefile drives the production of the mac_6to4 plugin # kernel module. # -# intel implementation architecture dependent -# # # Path to the base of the uts directory tree (usually /usr/src/uts). @@ -39,10 +35,10 @@ UTSBASE = ../.. # # Define the module and object file sets. # -MODULE = 6to4tun -OBJECTS = $(6TO4TUN_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(6TO4TUN_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_STRMOD_DIR)/$(MODULE) +MODULE = mac_6to4 +OBJECTS = $(MAC_6TO4_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(MAC_6TO4_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_MAC_DIR)/$(MODULE) # # Include common rules. @@ -57,9 +53,13 @@ LINT_TARGET = $(MODULE).lint INSTALL_TARGET = $(BINARY) $(ROOTMODULE) # -# depends on tun +# Overrides. # -LDFLAGS += -dy -Nstrmod/tun +CFLAGS += $(CCVERBOSE) +LDFLAGS += -dy -Nmisc/mac -Nmac/mac_ipv4 + +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN +LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW # # Default build targets. diff --git a/usr/src/uts/intel/atun/Makefile b/usr/src/uts/intel/mac_ipv4/Makefile index b9eae50300..578a504341 100644 --- a/usr/src/uts/intel/atun/Makefile +++ b/usr/src/uts/intel/mac_ipv4/Makefile @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,17 +19,12 @@ # CDDL HEADER END # # -# -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -#pragma ident "%Z%%M% %I% %E% SMI" -# -# This makefile drives the production of the automatic tunneling streams +# This makefile drives the production of the mac_ipv4 plugin # kernel module. # -# intel implementation architecture dependent -# # # Path to the base of the uts directory tree (usually /usr/src/uts). @@ -40,10 +34,10 @@ UTSBASE = ../.. # # Define the module and object file sets. # -MODULE = atun -OBJECTS = $(ATUN_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(ATUN_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_STRMOD_DIR)/$(MODULE) +MODULE = mac_ipv4 +OBJECTS = $(MAC_IPV4_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(MAC_IPV4_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_MAC_DIR)/$(MODULE) # # Include common rules. @@ -58,9 +52,13 @@ LINT_TARGET = $(MODULE).lint INSTALL_TARGET = $(BINARY) $(ROOTMODULE) # -# depends on tun +# Overrides. # -LDFLAGS += -dy -Nstrmod/tun +CFLAGS += $(CCVERBOSE) +LDFLAGS += -dy -Nmisc/mac + +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN +LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW # # Default build targets. diff --git a/usr/src/uts/intel/mac_ipv6/Makefile b/usr/src/uts/intel/mac_ipv6/Makefile new file mode 100644 index 0000000000..4d0983e15c --- /dev/null +++ b/usr/src/uts/intel/mac_ipv6/Makefile @@ -0,0 +1,88 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# +# This makefile drives the production of the mac_ipv6 plugin +# kernel module. +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = mac_ipv6 +OBJECTS = $(MAC_IPV6_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(MAC_IPV6_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_MAC_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +# +# Overrides. +# +CFLAGS += $(CCVERBOSE) +LDFLAGS += -dy -Nmisc/mac -Nmac/mac_ipv4 + +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN +LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/os/minor_perm b/usr/src/uts/intel/os/minor_perm index 3e6df05c75..0919c33000 100644 --- a/usr/src/uts/intel/os/minor_perm +++ b/usr/src/uts/intel/os/minor_perm @@ -198,6 +198,7 @@ domcaps:* 0444 root sys evtchn:* 0666 root sys privcmd:* 0666 root sys xenbus:* 0666 root sys +iptunq:* 0640 root sys fm:* 0644 root sys amd_iommu:* 0644 root sys xpvtap:* 0666 root sys diff --git a/usr/src/uts/intel/os/name_to_major b/usr/src/uts/intel/os/name_to_major index 83e7825871..84fc215d10 100644 --- a/usr/src/uts/intel/os/name_to_major +++ b/usr/src/uts/intel/os/name_to_major @@ -157,3 +157,5 @@ intel_nhm 262 simnet 263 acpinex 264 bridge 265 +iptun 266 +iptunq 267 diff --git a/usr/src/uts/intel/tun/Makefile b/usr/src/uts/intel/tun/Makefile deleted file mode 100644 index ce6033c43d..0000000000 --- a/usr/src/uts/intel/tun/Makefile +++ /dev/null @@ -1,105 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# uts/intel/tun/Makefile -# -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -#ident "%Z%%M% %I% %E% SMI" -# -# This makefile drives the production of the configured tunneling -# streams kernel module. -# -# intel architecture dependent -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = tun -OBJECTS = $(TUN_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(TUN_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_STRMOD_DIR)/$(MODULE) - -# -# Extra for $(MODULE).check target -# -# Need to remove ipddi.o since it has non-static defines for _init etc. -IP_CHECK_OBJS = $(IP_OBJS:ipddi.o=ip.o) -EXTRA_CHECK_OBJS = $(IP_CHECK_OBJS:%=../ip/$(OBJS_DIR)/%) - -# -# Include common rules. -# -include $(UTSBASE)/intel/Makefile.intel - -# -# Define targets -# -ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) - -# -# depends on ip and ip6 -# -LDFLAGS += -dy -Ndrv/ip -Ndrv/ip6 - -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV - -# -# Default build targets. -# -.KEEP_STATE: - -def: $(DEF_DEPS) - -all: $(ALL_DEPS) $(SISCHECK_DEPS) - -clean: $(CLEAN_DEPS) $(SISCLEAN_DEPS) - -clobber: $(CLOBBER_DEPS) $(SISCLEAN_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - -install: $(INSTALL_DEPS) $(SISCHECK_DEPS) - -# -# Include common targets. -# -include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/sparc/Makefile.sparc.shared b/usr/src/uts/sparc/Makefile.sparc.shared index a7d99949c6..6525a8c4f0 100644 --- a/usr/src/uts/sparc/Makefile.sparc.shared +++ b/usr/src/uts/sparc/Makefile.sparc.shared @@ -205,7 +205,7 @@ DRV_KMODS += aggr arp audio bl bofi clone cn conskbd consms cpuid DRV_KMODS += crypto cryptoadm devinfo dump DRV_KMODS += dtrace fasttrap fbt lockstat profile sdt systrace dcpc DRV_KMODS += fssnap icmp icmp6 ip ip6 ipnet ipsecah -DRV_KMODS += ipsecesp iwscn keysock kmdb kstat ksyms llc1 +DRV_KMODS += ipsecesp iptun iptunq iwscn keysock kmdb kstat ksyms llc1 DRV_KMODS += lofi DRV_KMODS += log logindmux kssl mm nca physmem pm poll pool DRV_KMODS += pseudo ptc ptm pts ptsl ramdisk random rsm rts sad @@ -370,10 +370,10 @@ FS_KMODS += ctfs objfs sharefs dcfs smbfs # # Streams Modules (/kernel/strmod): # -STRMOD_KMODS += 6to4tun atun bufmod connld dedump ldterm ms pckt pfmod +STRMOD_KMODS += bufmod connld dedump ldterm ms pckt pfmod STRMOD_KMODS += pipemod ptem redirmod rpcmod rlmod telmod timod STRMOD_KMODS += spppasyn spppcomp -STRMOD_KMODS += tirdwr ttcompat tun +STRMOD_KMODS += tirdwr ttcompat STRMOD_KMODS += usbkbm usbms usb_ah STRMOD_KMODS += drcompat STRMOD_KMODS += cryptmod @@ -498,7 +498,10 @@ DACF_KMODS += net_dacf # # MAC-Type Plugin Modules (/kernel/mac) # +MAC_KMODS += mac_6to4 MAC_KMODS += mac_ether +MAC_KMODS += mac_ipv4 +MAC_KMODS += mac_ipv6 MAC_KMODS += mac_wifi MAC_KMODS += mac_ib diff --git a/usr/src/uts/sparc/ip/ip.global-objs.debug64 b/usr/src/uts/sparc/ip/ip.global-objs.debug64 index e55f57ce74..c6f78c5ff7 100644 --- a/usr/src/uts/sparc/ip/ip.global-objs.debug64 +++ b/usr/src/uts/sparc/ip/ip.global-objs.debug64 @@ -150,6 +150,7 @@ ipcl_conn_hash_maxsize ipcl_conn_hash_memfactor ipcl_conn_hash_size ipcl_debug_level +ipcl_iptun_fanout_size ipcl_raw_fanout_size ipcl_udp_fanout_size ipif_loopback_name @@ -172,6 +173,10 @@ ipsec_sel_cache ipsec_spd_hashsize ipsec_weird_null_inbound_policy ipsechw_debug +iptunq_info +iptunq_modinfo +iptunq_rinit +iptunq_winit ipv4_forward_suffix ipv4info ipv6_all_hosts_mcast diff --git a/usr/src/uts/sparc/ip/ip.global-objs.obj64 b/usr/src/uts/sparc/ip/ip.global-objs.obj64 index 40a5fcbda6..7ddbc0ab78 100644 --- a/usr/src/uts/sparc/ip/ip.global-objs.obj64 +++ b/usr/src/uts/sparc/ip/ip.global-objs.obj64 @@ -149,6 +149,7 @@ ipcl_bind_fanout_size ipcl_conn_hash_maxsize ipcl_conn_hash_memfactor ipcl_conn_hash_size +ipcl_iptun_fanout_size ipcl_raw_fanout_size ipcl_udp_fanout_size ipif_loopback_name @@ -170,6 +171,10 @@ ipsec_policy_failure_msgs ipsec_sel_cache ipsec_spd_hashsize ipsec_weird_null_inbound_policy +iptunq_info +iptunq_modinfo +iptunq_rinit +iptunq_winit ipv4_forward_suffix ipv4info ipv6_all_hosts_mcast diff --git a/usr/src/uts/sparc/iptun/Makefile b/usr/src/uts/sparc/iptun/Makefile new file mode 100644 index 0000000000..f1faf02704 --- /dev/null +++ b/usr/src/uts/sparc/iptun/Makefile @@ -0,0 +1,85 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = iptun +OBJECTS = $(IPTUN_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(IPTUN_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/common/inet/iptun + +# +# Include common rules. +# +include $(UTSBASE)/sparc/Makefile.sparc + +# +# Define targets +# +ALL_TARGET = $(BINARY) $(SRC_CONFILE) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +# +# Overrides +# +CFLAGS += $(CCVERBOSE) +LDFLAGS += -dy -Ndrv/dld -Nmisc/dls -Nmisc/mac -Ndrv/ip + +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN +LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/sparc/Makefile.targ diff --git a/usr/src/uts/sparc/iptunq/Makefile b/usr/src/uts/sparc/iptunq/Makefile new file mode 100644 index 0000000000..354662f12f --- /dev/null +++ b/usr/src/uts/sparc/iptunq/Makefile @@ -0,0 +1,83 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = iptunq +OBJECTS = $(IPTUNQ_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(IPTUNQ_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/common/inet/iptun + +# +# Include common rules. +# +include $(UTSBASE)/sparc/Makefile.sparc + +# +# Define targets +# +ALL_TARGET = $(BINARY) $(SRC_CONFFILE) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +# +# lint pass one enforcement +# +CFLAGS += $(CCVERBOSE) + +LDFLAGS += -dy -Ndrv/ip + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/sparc/Makefile.targ diff --git a/usr/src/uts/sparc/atun/Makefile b/usr/src/uts/sparc/mac_6to4/Makefile index 19adddaea8..b9f87dc2f7 100644 --- a/usr/src/uts/sparc/atun/Makefile +++ b/usr/src/uts/sparc/mac_6to4/Makefile @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,16 +19,13 @@ # CDDL HEADER END # # -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -#ident "%Z%%M% %I% %E% SMI" # -# This makefile drives the production of the automatic tunneling streams +# This makefile drives the production of the mac_6to4 plugin # kernel module. # -# SPARC architecture dependent -# # # Path to the base of the uts directory tree (usually /usr/src/uts). @@ -39,10 +35,10 @@ UTSBASE = ../.. # # Define the module and object file sets. # -MODULE = atun -OBJECTS = $(ATUN_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(ATUN_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_STRMOD_DIR)/$(MODULE) +MODULE = mac_6to4 +OBJECTS = $(MAC_6TO4_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(MAC_6TO4_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_MAC_DIR)/$(MODULE) # # Include common rules. @@ -57,14 +53,13 @@ LINT_TARGET = $(MODULE).lint INSTALL_TARGET = $(BINARY) $(ROOTMODULE) # -# lint pass one enforcement +# Overrides. # CFLAGS += $(CCVERBOSE) +LDFLAGS += -dy -Nmisc/mac -Nmac/mac_ipv4 -# -# depends on tun -# -LDFLAGS += -dy -Nstrmod/tun +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN +LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW # # Default build targets. diff --git a/usr/src/uts/sparc/6to4tun/Makefile b/usr/src/uts/sparc/mac_ipv4/Makefile index 415ac65f6e..f9d0b1f58b 100644 --- a/usr/src/uts/sparc/6to4tun/Makefile +++ b/usr/src/uts/sparc/mac_ipv4/Makefile @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,16 +19,13 @@ # CDDL HEADER END # # -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -#ident "%Z%%M% %I% %E% SMI" # -# This makefile drives the production of the 6to4 tunneling streams +# This makefile drives the production of the mac_ipv4 plugin # kernel module. # -# SPARC architecture dependent -# # # Path to the base of the uts directory tree (usually /usr/src/uts). @@ -39,10 +35,10 @@ UTSBASE = ../.. # # Define the module and object file sets. # -MODULE = 6to4tun -OBJECTS = $(6TO4TUN_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(6TO4TUN_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_STRMOD_DIR)/$(MODULE) +MODULE = mac_ipv4 +OBJECTS = $(MAC_IPV4_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(MAC_IPV4_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_MAC_DIR)/$(MODULE) # # Include common rules. @@ -57,14 +53,13 @@ LINT_TARGET = $(MODULE).lint INSTALL_TARGET = $(BINARY) $(ROOTMODULE) # -# lint pass one enforcement +# Overrides. # CFLAGS += $(CCVERBOSE) +LDFLAGS += -dy -Nmisc/mac -# -# depends on tun -# -LDFLAGS += -dy -Nstrmod/tun +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN +LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW # # Default build targets. diff --git a/usr/src/uts/sparc/mac_ipv6/Makefile b/usr/src/uts/sparc/mac_ipv6/Makefile new file mode 100644 index 0000000000..b726e67c63 --- /dev/null +++ b/usr/src/uts/sparc/mac_ipv6/Makefile @@ -0,0 +1,88 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# +# This makefile drives the production of the mac_ipv6 plugin +# kernel module. +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = mac_ipv6 +OBJECTS = $(MAC_IPV6_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(MAC_IPV6_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_MAC_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/sparc/Makefile.sparc + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +# +# Overrides. +# +CFLAGS += $(CCVERBOSE) +LDFLAGS += -dy -Nmisc/mac -Nmac/mac_ipv4 + +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN +LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/sparc/Makefile.targ diff --git a/usr/src/uts/sparc/ml/modstubs.s b/usr/src/uts/sparc/ml/modstubs.s index c598ad923d..44eed94fe2 100644 --- a/usr/src/uts/sparc/ml/modstubs.s +++ b/usr/src/uts/sparc/ml/modstubs.s @@ -1200,9 +1200,9 @@ stubs_base: MODULE(dld,drv); STUB(dld, dld_init_ops, nomod_void); STUB(dld, dld_fini_ops, nomod_void); + STUB(dld, dld_autopush, nomod_minus_one); STUB(dld, dld_ioc_register, nomod_einval); STUB(dld, dld_ioc_unregister, nomod_void); - STUB(dld, dld_autopush, nomod_minus_one); END_MODULE(dld); #endif @@ -1233,6 +1233,16 @@ stubs_base: END_MODULE(softmac); #endif +#ifndef IPTUN_MODULE + MODULE(iptun,drv); + STUB(iptun, iptun_create, nomod_einval); + STUB(iptun, iptun_delete, nomod_einval); + STUB(iptun, iptun_set_policy, nomod_einval); + STUB(iptun, iptun_set_g_q, nomod_einval); + STUB(iptun, iptun_clear_g_q, nomod_void); + END_MODULE(iptun); +#endif + /* * Stubs for kssl, the kernel SSL proxy */ diff --git a/usr/src/uts/sparc/os/minor_perm b/usr/src/uts/sparc/os/minor_perm index d0b59a7f0a..0804eec441 100644 --- a/usr/src/uts/sparc/os/minor_perm +++ b/usr/src/uts/sparc/os/minor_perm @@ -189,5 +189,6 @@ physmem:* 0600 root sys sdp:sdp 0666 root sys nsmb:* 0666 root sys bmc:bmc 0666 root sys +iptunq:* 0640 root sys fm:* 0644 root sys clone:bridge 0666 root sys diff --git a/usr/src/uts/sparc/os/name_to_major b/usr/src/uts/sparc/os/name_to_major index a3dca78f1f..33209230d4 100644 --- a/usr/src/uts/sparc/os/name_to_major +++ b/usr/src/uts/sparc/os/name_to_major @@ -229,3 +229,5 @@ ipnet 281 dcpc 282 simnet 283 bridge 284 +iptun 285 +iptunq 286 diff --git a/usr/src/uts/sparc/tun/Makefile b/usr/src/uts/sparc/tun/Makefile deleted file mode 100644 index eb80882117..0000000000 --- a/usr/src/uts/sparc/tun/Makefile +++ /dev/null @@ -1,109 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# uts/sparc/tun/Makefile -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -#ident "%Z%%M% %I% %E% SMI" -# -# This makefile drives the production of the configured tunneling -# streams kernel module. -# -# SPARC architecture dependent -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = tun -OBJECTS = $(TUN_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(TUN_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_STRMOD_DIR)/$(MODULE) - -# -# Extra for $(MODULE).check target -# -# Need to remove ipddi.o since it has non-static defines for _init etc. -IP_CHECK_OBJS = $(IP_OBJS:ipddi.o=ip.o) -EXTRA_CHECK_OBJS = $(IP_CHECK_OBJS:%=../ip/$(OBJS_DIR)/%) - -# -# Include common rules. -# -include $(UTSBASE)/sparc/Makefile.sparc - -# -# Define targets -# -ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) - -# -# lint pass one enforcement -# -CFLAGS += $(CCVERBOSE) - -# -# depends on ip and ip6 -# -LDFLAGS += -dy -Ndrv/ip -Ndrv/ip6 - -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV - -# -# Default build targets. -# -.KEEP_STATE: - -def: $(DEF_DEPS) - -all: $(ALL_DEPS) $(SISCHECK_DEPS) - -clean: $(CLEAN_DEPS) $(SISCLEAN_DEPS) - -clobber: $(CLOBBER_DEPS) $(SISCLEAN_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - -install: $(INSTALL_DEPS) $(SISCHECK_DEPS) - -# -# Include common targets. -# -include $(UTSBASE)/sparc/Makefile.targ |