diff options
author | danmcd <none@none> | 2007-09-04 06:48:33 -0700 |
---|---|---|
committer | danmcd <none@none> | 2007-09-04 06:48:33 -0700 |
commit | 437220cd296f6d8b6654d6d52508b40b1e2d1ac7 (patch) | |
tree | bf55ece35d9f14bf9e462be1af01cd10045c2021 /usr/src | |
parent | 32d43904c87a66c0f455059d99a96cc0ba128ad7 (diff) | |
download | illumos-joyent-437220cd296f6d8b6654d6d52508b40b1e2d1ac7.tar.gz |
PSARC 2007/449 Detangle IPsec NAT Traversal
6481450 nattymod calls putnext() on a freed queue.
6558864 remove nattymod
6558870 Implement SA last-used time and idle actions
6582318 "mandatory" is spelled wrong in pfiles
6584011 save_assoc() gets confused w.r.t. "proto".
6588015 Missing "encap udp" must be better diagnosed by ipseckey(1M).
6595368 Need "ipsec-nat-t" in /etc/services
6595877 ipseckey(1M) can produce output it can't read back in (line-too-big)
--HG--
rename : usr/src/uts/common/inet/ip/nattymod.c => deleted_files/usr/src/uts/common/inet/ip/nattymod.c
rename : usr/src/uts/intel/nattymod/Makefile => deleted_files/usr/src/uts/intel/nattymod/Makefile
rename : usr/src/uts/sparc/nattymod/Makefile => deleted_files/usr/src/uts/sparc/nattymod/Makefile
Diffstat (limited to 'usr/src')
32 files changed, 852 insertions, 1586 deletions
diff --git a/usr/src/cmd/cmd-inet/etc/services b/usr/src/cmd/cmd-inet/etc/services index a39fbf5427..a4862b5bbe 100644 --- a/usr/src/cmd/cmd-inet/etc/services +++ b/usr/src/cmd/cmd-inet/etc/services @@ -136,6 +136,7 @@ nfsd 2049/tcp nfs # NFS server daemon (cots) eklogin 2105/tcp # Kerberos encrypted rlogin lockd 4045/udp # NFS lock daemon/manager lockd 4045/tcp +ipsec-nat-t 4500/udp # IPsec NAT-Traversal mdns 5353/udp # Multicast DNS mdns 5353/tcp dtspc 6112/tcp # CDE subprocess control diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ipsecutils/ipseckey.c b/usr/src/cmd/cmd-inet/usr.sbin/ipsecutils/ipseckey.c index ff3a0e3be0..de672a8bc8 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/ipsecutils/ipseckey.c +++ b/usr/src/cmd/cmd-inet/usr.sbin/ipsecutils/ipseckey.c @@ -1727,13 +1727,6 @@ doaddup(int cmd, int satype, char *argv[], char *ebuf) "single NAT-T local port.\n")); break; } - - if (natt_rport != 0) { - ERROR(ep, ebuf, gettext( - "Can only specify " - "one of NAT-T remote and local port.\n")); - break; - } natt_lport = parsenum(*argv, B_TRUE, ebuf); argv++; break; @@ -1744,13 +1737,6 @@ doaddup(int cmd, int satype, char *argv[], char *ebuf) "single NAT-T remote port.\n")); break; } - - if (natt_lport != 0) { - ERROR(ep, ebuf, gettext( - "Can only specify " - "one of NAT-T remote and local port.\n")); - break; - } natt_rport = parsenum(*argv, B_TRUE, ebuf); argv++; break; @@ -2371,38 +2357,58 @@ doaddup(int cmd, int satype, char *argv[], char *ebuf) handle_errors(ep, ebuf, B_TRUE, B_FALSE); +#define PORT_ONLY_ALLOCATE(af, socktype, exttype, extvar, port) { \ + alloclen = sizeof (sadb_address_t) + roundup(sizeof (socktype), 8); \ + (extvar) = calloc(1, alloclen); \ + if ((extvar) == NULL) { \ + Bail("malloc(implicit port)"); \ + } \ + totallen += alloclen; \ + (extvar)->sadb_address_len = SADB_8TO64(alloclen); \ + (extvar)->sadb_address_exttype = (exttype); \ + /* sin/sin6 has equivalent offsets for ports! */ \ + sin6 = (struct sockaddr_in6 *)((extvar) + 1); \ + sin6->sin6_family = (af); \ + sin6->sin6_port = (port); \ + } + /* - * If we specify inner ports w/o addresses, we still need to - * allocate. Also, if we have one inner address, we need the + * If we specify inner ports or NAT ports w/o addresses, we still need + * to allocate. Also, if we have one inner address, we need the * other, even if we don't specify anything. */ - if (alloc_inner && idst == NULL) { - /* Allocate zeroed-out. */ - alloclen = sizeof (*idst) + sizeof (struct sockaddr_in6); - idst = calloc(1, alloclen); - if (idst == NULL) { - Bail("malloc(implicit idst)"); + if (use_natt) { + if (natt_lport != 0 && natt_local == NULL) { + PORT_ONLY_ALLOCATE(AF_INET, struct sockaddr_in, + SADB_X_EXT_ADDRESS_NATT_LOC, natt_local, + natt_lport); + } + + if (natt_rport != 0 && natt_remote == NULL) { + PORT_ONLY_ALLOCATE(AF_INET, struct sockaddr_in, + SADB_X_EXT_ADDRESS_NATT_REM, natt_remote, + natt_rport); + } + } else { + if (natt_lport != 0 || natt_rport != 0) { + ERROR(ep, ebuf, gettext("Must specify 'encap udp' " + "with any NAT-T port.\n")); + } else if (natt_local != NULL || natt_remote != NULL) { + ERROR(ep, ebuf, gettext("Must specify 'encap udp' " + "with any NAT-T address.\n")); } - totallen += alloclen; - idst->sadb_address_len = SADB_8TO64(alloclen); - idst->sadb_address_exttype = SADB_X_EXT_ADDRESS_INNER_DST; - sin6 = (struct sockaddr_in6 *)(idst + 1); - sin6->sin6_family = AF_INET6; + } + + if (alloc_inner && idst == NULL) { + PORT_ONLY_ALLOCATE(AF_INET6, struct sockaddr_in6, + SADB_X_EXT_ADDRESS_INNER_DST, idst, 0); } if (alloc_inner && isrc == NULL) { - /* Allocate zeroed-out. */ - alloclen = sizeof (*isrc) + sizeof (struct sockaddr_in6); - isrc = calloc(1, alloclen); - if (isrc == NULL) { - Bail("malloc(implicit isrc)"); - } - totallen += alloclen; - isrc->sadb_address_len = SADB_8TO64(alloclen); - isrc->sadb_address_exttype = SADB_X_EXT_ADDRESS_INNER_SRC; - sin6 = (struct sockaddr_in6 *)(isrc + 1); - sin6->sin6_family = AF_INET6; + PORT_ONLY_ALLOCATE(AF_INET6, struct sockaddr_in6, + SADB_X_EXT_ADDRESS_INNER_SRC, isrc, 0); } +#undef PORT_ONLY_ALLOCATE /* * Okay, so now I have all of the potential extensions! @@ -2558,18 +2564,6 @@ doaddup(int cmd, int satype, char *argv[], char *ebuf) "for UDP encapsulation.\n")); } - if (natt_lport != 0 && natt_local == NULL) { - ERROR(ep, ebuf, gettext( - "If NAT-T local port is specified, NAT-T " - "local address must also be specified.\n")); - } - - if (natt_rport != 0 && natt_remote == NULL) { - ERROR(ep, ebuf, gettext( - "If NAT-T remote port is specified, NAT-T " - "remote address must also be specified.\n")); - } - if (natt_remote != NULL) { bcopy(natt_remote, nexthdr, SADB_64TO8(natt_remote->sadb_address_len)); diff --git a/usr/src/cmd/ptools/pfiles/pfiles.c b/usr/src/cmd/ptools/pfiles/pfiles.c index cf1282f7b1..891a370c7d 100644 --- a/usr/src/cmd/ptools/pfiles/pfiles.c +++ b/usr/src/cmd/ptools/pfiles/pfiles.c @@ -46,6 +46,7 @@ #include <netdb.h> #include <libproc.h> #include <netinet/in.h> +#include <netinet/udp.h> #include <arpa/inet.h> #include <netdb.h> @@ -103,11 +104,11 @@ main(int argc, char **argv) if (errflg || argc <= 0) { (void) fprintf(stderr, "usage:\t%s [-F] pid ...\n", - command); + command); (void) fprintf(stderr, - " (report open files of each process)\n"); + " (report open files of each process)\n"); (void) fprintf(stderr, - " -F: force grabbing of the target process\n"); + " -F: force grabbing of the target process\n"); exit(2); } @@ -136,19 +137,19 @@ main(int argc, char **argv) if ((pid = proc_arg_psinfo(arg = *argv++, PR_ARG_PIDS, &psinfo, &gret)) == -1) { (void) fprintf(stderr, "%s: cannot examine %s: %s\n", - command, arg, Pgrab_error(gret)); + command, arg, Pgrab_error(gret)); retc++; } else if ((Pr = Pgrab(pid, Fflag, &gret)) != NULL) { if (Pcreate_agent(Pr) == 0) { proc_unctrl_psinfo(&psinfo); (void) printf("%d:\t%.70s\n", - (int)pid, psinfo.pr_psargs); + (int)pid, psinfo.pr_psargs); show_files(Pr); Pdestroy_agent(Pr); } else { (void) fprintf(stderr, - "%s: cannot control process %d\n", - command, (int)pid); + "%s: cannot control process %d\n", + command, (int)pid); retc++; } Prelease(Pr, 0); @@ -159,7 +160,7 @@ main(int argc, char **argv) case G_SELF: proc_unctrl_psinfo(&psinfo); (void) printf("%d:\t%.70s\n", (int)pid, - psinfo.pr_psargs); + psinfo.pr_psargs); if (gret == G_SYS) (void) printf(" [system process]\n"); else @@ -167,13 +168,11 @@ main(int argc, char **argv) break; default: (void) fprintf(stderr, "%s: %s: %d\n", - command, Pgrab_error(gret), (int)pid); + command, Pgrab_error(gret), (int)pid); retc++; break; } } - - } (void) proc_finistdio(); @@ -257,20 +256,18 @@ show_files(struct ps_prochandle *Pr) default: s = unknown; (void) sprintf(s, "0x%.4x ", - (int)statb.st_mode & S_IFMT); + (int)statb.st_mode & S_IFMT); break; } - (void) printf("%4d: %s mode:0%.3o", - fd, - s, - (int)statb.st_mode & ~S_IFMT); + (void) printf("%4d: %s mode:0%.3o", fd, s, + (int)statb.st_mode & ~S_IFMT); if (major(statb.st_dev) != (major_t)NODEV && minor(statb.st_dev) != (minor_t)NODEV) (void) printf(" dev:%lu,%lu", - (ulong_t)major(statb.st_dev), - (ulong_t)minor(statb.st_dev)); + (ulong_t)major(statb.st_dev), + (ulong_t)minor(statb.st_dev)); else (void) printf(" dev:0x%.8lX", (long)statb.st_dev); @@ -284,26 +281,24 @@ show_files(struct ps_prochandle *Pr) } (void) printf(" ino:%llu uid:%d gid:%d", - (u_longlong_t)statb.st_ino, - (int)statb.st_uid, - (int)statb.st_gid); + (u_longlong_t)statb.st_ino, + (int)statb.st_uid, (int)statb.st_gid); if (rdev == NODEV) (void) printf(" size:%lld\n", - (longlong_t)statb.st_size); + (longlong_t)statb.st_size); else if (major(rdev) != (major_t)NODEV && minor(rdev) != (minor_t)NODEV) (void) printf(" rdev:%lu,%lu\n", - (ulong_t)major(rdev), - (ulong_t)minor(rdev)); + (ulong_t)major(rdev), (ulong_t)minor(rdev)); else (void) printf(" rdev:0x%.8lX\n", (long)rdev); if (!nflag) { dofcntl(Pr, fd, - (statb.st_mode & (S_IFMT|S_ENFMT|S_IXGRP)) - == (S_IFREG|S_ENFMT), - (statb.st_mode & S_IFMT) == S_IFDOOR); + (statb.st_mode & (S_IFMT|S_ENFMT|S_IXGRP)) + == (S_IFREG|S_ENFMT), + (statb.st_mode & S_IFMT) == S_IFDOOR); if ((statb.st_mode & S_IFMT) == S_IFSOCK) dosocket(Pr, fd); @@ -341,7 +336,7 @@ getflock(struct ps_prochandle *Pr, int fd, struct flock *flock_native) /* examine open file with fcntl() */ static void -dofcntl(struct ps_prochandle *Pr, int fd, int manditory, int isdoor) +dofcntl(struct ps_prochandle *Pr, int fd, int mandatory, int isdoor) { struct flock flock; int fileflags; @@ -376,8 +371,8 @@ dofcntl(struct ps_prochandle *Pr, int fd, int manditory, int isdoor) unsigned long sysid = flock.l_sysid; (void) printf(" %s %s lock set by", - manditory? "manditory" : "advisory", - flock.l_type == F_RDLCK? "read" : "write"); + mandatory ? "mandatory" : "advisory", + flock.l_type == F_RDLCK? "read" : "write"); if (sysid) (void) printf(" system 0x%lX", sysid); if (flock.l_pid) @@ -448,7 +443,7 @@ show_fileflags(int flags) (void) strcat(str, "|O_XATTR"); if (flags & ~(ALL_O_FLAGS)) (void) sprintf(str + strlen(str), "|0x%x", - flags & ~(ALL_O_FLAGS)); + flags & ~(ALL_O_FLAGS)); (void) printf("%s", str); } @@ -469,7 +464,7 @@ show_door(struct ps_prochandle *Pr, int fd) (void) printf(" door to "); if (psinfo.pr_fname[0] != '\0') (void) printf("%s[%d]", psinfo.pr_fname, - (int)door_info.di_target); + (int)door_info.di_target); else (void) printf("pid %d", (int)door_info.di_target); } @@ -507,7 +502,7 @@ show_sockaddr(const char *str, struct sockaddr *sa, socklen_t len) len -= sizeof (so_un->sun_family); so_un->sun_path[len] = NULL; (void) printf("\t%s: AF_UNIX %s\n", - str, so_un->sun_path); + str, so_un->sun_path); } return; case AF_IMPLINK: p = "AF_IMPLINK"; break; @@ -562,19 +557,21 @@ show_sockopts(struct ps_prochandle *Pr, int fd) int i; in_addr_t nexthop_val; struct boolopt { + int level; int opt; const char *name; }; static struct boolopt boolopts[] = { - { SO_DEBUG, "SO_DEBUG," }, - { SO_REUSEADDR, "SO_REUSEADDR," }, - { SO_KEEPALIVE, "SO_KEEPALIVE," }, - { SO_DONTROUTE, "SO_DONTROUTE," }, - { SO_BROADCAST, "SO_BROADCAST," }, - { SO_OOBINLINE, "SO_OOBINLINE," }, - { SO_DGRAM_ERRIND, "SO_DGRAM_ERRIND,"}, - { SO_ALLZONES, "SO_ALLZONES," }, - { SO_EXCLBIND, "SO_EXCLBIND," }, + { SOL_SOCKET, SO_DEBUG, "SO_DEBUG," }, + { SOL_SOCKET, SO_REUSEADDR, "SO_REUSEADDR," }, + { SOL_SOCKET, SO_KEEPALIVE, "SO_KEEPALIVE," }, + { SOL_SOCKET, SO_DONTROUTE, "SO_DONTROUTE," }, + { SOL_SOCKET, SO_BROADCAST, "SO_BROADCAST," }, + { SOL_SOCKET, SO_OOBINLINE, "SO_OOBINLINE," }, + { SOL_SOCKET, SO_DGRAM_ERRIND, "SO_DGRAM_ERRIND,"}, + { SOL_SOCKET, SO_ALLZONES, "SO_ALLZONES," }, + { SOL_SOCKET, SO_EXCLBIND, "SO_EXCLBIND," }, + { IPPROTO_UDP, UDP_NAT_T_ENDPOINT, "UDP_NAT_T_ENDPOINT," }, }; struct linger l; @@ -583,8 +580,8 @@ show_sockopts(struct ps_prochandle *Pr, int fd) for (i = 0; i < sizeof (boolopts) / sizeof (boolopts[0]); i++) { vlen = sizeof (val); - if (pr_getsockopt(Pr, fd, SOL_SOCKET, boolopts[i].opt, &val, - &vlen) == 0 && val != 0) + if (pr_getsockopt(Pr, fd, boolopts[i].level, boolopts[i].opt, + &val, &vlen) == 0 && val != 0) (void) strlcat(buf, boolopts[i].name, sizeof (buf)); } @@ -629,7 +626,7 @@ dosocket(struct ps_prochandle *Pr, int fd) { /* A buffer large enough for PATH_MAX size AF_UNIX address */ long buf[(sizeof (short) + PATH_MAX + sizeof (long) - 1) - / sizeof (long)]; + / sizeof (long)]; struct sockaddr *sa = (struct sockaddr *)buf; socklen_t len; int type, tlen; diff --git a/usr/src/cmd/truss/print.c b/usr/src/cmd/truss/print.c index 395e618ce3..56d7b8ab2b 100644 --- a/usr/src/cmd/truss/print.c +++ b/usr/src/cmd/truss/print.c @@ -1897,6 +1897,7 @@ udp_optname(private_t *pri, long val) case UDP_ANONPRIVBIND: return ("UDP_ANONPRIVBIND"); case UDP_EXCLBIND: return ("UDP_EXCLBIND"); case UDP_RCVHDR: return ("UDP_RCVHDR"); + case UDP_NAT_T_ENDPOINT: return ("UDP_NAT_T_ENDPOINT"); default: (void) snprintf(pri->code_buf, sizeof (pri->code_buf), "0x%lx", diff --git a/usr/src/lib/libipsecutil/common/ipsec_util.c b/usr/src/lib/libipsecutil/common/ipsec_util.c index 56ced009ee..e5a4daecdd 100644 --- a/usr/src/lib/libipsecutil/common/ipsec_util.c +++ b/usr/src/lib/libipsecutil/common/ipsec_util.c @@ -2438,7 +2438,7 @@ void save_assoc(uint64_t *buffer, FILE *ofile) { int terrno; - int seen_proto = 0; + boolean_t seen_proto = B_FALSE, seen_iproto = B_FALSE; uint64_t *current; struct sadb_address *addr; struct sadb_msg *samsg = (struct sadb_msg *)buffer; @@ -2463,6 +2463,7 @@ save_assoc(uint64_t *buffer, FILE *ofile) struct sadb_sa *assoc; ext = (struct sadb_ext *)current; + addr = (struct sadb_address *)ext; /* Just in case... */ switch (ext->sadb_ext_type) { case SADB_EXT_SA: assoc = (struct sadb_sa *)ext; @@ -2523,19 +2524,28 @@ save_assoc(uint64_t *buffer, FILE *ofile) } savenl(); break; - case SADB_EXT_ADDRESS_SRC: - case SADB_EXT_ADDRESS_DST: case SADB_X_EXT_ADDRESS_INNER_SRC: case SADB_X_EXT_ADDRESS_INNER_DST: - case SADB_X_EXT_ADDRESS_NATT_REM: - case SADB_X_EXT_ADDRESS_NATT_LOC: - addr = (struct sadb_address *)ext; + if (!seen_iproto && addr->sadb_address_proto) { + (void) fprintf(ofile, " iproto %d", + addr->sadb_address_proto); + savenl(); + seen_iproto = B_TRUE; + } + goto skip_srcdst; /* Hack to avoid cases below... */ + /* FALLTHRU */ + case SADB_EXT_ADDRESS_SRC: + case SADB_EXT_ADDRESS_DST: if (!seen_proto && addr->sadb_address_proto) { (void) fprintf(ofile, " proto %d", addr->sadb_address_proto); savenl(); - seen_proto = 1; + seen_proto = B_TRUE; } + /* FALLTHRU */ + case SADB_X_EXT_ADDRESS_NATT_REM: + case SADB_X_EXT_ADDRESS_NATT_LOC: +skip_srcdst: if (!save_address(addr, ofile)) { tidyup(); bail(dgettext(TEXT_DOMAIN, "save_address")); diff --git a/usr/src/lib/libipsecutil/common/ipsec_util.h b/usr/src/lib/libipsecutil/common/ipsec_util.h index d540e2bd2e..a78831e678 100644 --- a/usr/src/lib/libipsecutil/common/ipsec_util.h +++ b/usr/src/lib/libipsecutil/common/ipsec_util.h @@ -56,7 +56,7 @@ extern "C" { /* used for file parsing */ #define NBUF_SIZE 16 -#define IBUF_SIZE 512 +#define IBUF_SIZE 2048 #define COMMENT_CHAR '#' #define CONT_CHAR '\\' #define QUOTE_CHAR '"' diff --git a/usr/src/pkgdefs/SUNWckr/prototype_i386 b/usr/src/pkgdefs/SUNWckr/prototype_i386 index 15524810e2..d8b6aeac14 100644 --- a/usr/src/pkgdefs/SUNWckr/prototype_i386 +++ b/usr/src/pkgdefs/SUNWckr/prototype_i386 @@ -221,7 +221,6 @@ l none kernel/strmod/ipsecah=../../kernel/drv/ipsecah l none kernel/strmod/ipsecesp=../../kernel/drv/ipsecesp l none kernel/strmod/keysock=../../kernel/drv/keysock f none kernel/strmod/ldterm 755 root sys -f none kernel/strmod/nattymod 755 root sys f none kernel/strmod/pckt 755 root sys f none kernel/strmod/pfmod 755 root sys f none kernel/strmod/pipemod 755 root sys @@ -421,7 +420,6 @@ l none kernel/strmod/amd64/ipsecah=../../../kernel/drv/amd64/ipsecah l none kernel/strmod/amd64/ipsecesp=../../../kernel/drv/amd64/ipsecesp l none kernel/strmod/amd64/keysock=../../../kernel/drv/amd64/keysock f none kernel/strmod/amd64/ldterm 755 root sys -f none kernel/strmod/amd64/nattymod 755 root sys f none kernel/strmod/amd64/pckt 755 root sys f none kernel/strmod/amd64/pfmod 755 root sys f none kernel/strmod/amd64/pipemod 755 root sys diff --git a/usr/src/pkgdefs/SUNWckr/prototype_sparc b/usr/src/pkgdefs/SUNWckr/prototype_sparc index 957e5e0b5d..666868d6ac 100644 --- a/usr/src/pkgdefs/SUNWckr/prototype_sparc +++ b/usr/src/pkgdefs/SUNWckr/prototype_sparc @@ -223,7 +223,6 @@ l none kernel/strmod/sparcv9/ipsecesp=../../../kernel/drv/sparcv9/ipsecesp l none kernel/strmod/sparcv9/keysock=../../../kernel/drv/sparcv9/keysock f none kernel/strmod/sparcv9/ldterm 755 root sys f none kernel/strmod/sparcv9/ms 755 root sys -f none kernel/strmod/sparcv9/nattymod 755 root sys f none kernel/strmod/sparcv9/pckt 755 root sys f none kernel/strmod/sparcv9/pfmod 755 root sys f none kernel/strmod/sparcv9/pipemod 755 root sys diff --git a/usr/src/tools/scripts/bfu.sh b/usr/src/tools/scripts/bfu.sh index 7dd25a5f92..62d71b5954 100644 --- a/usr/src/tools/scripts/bfu.sh +++ b/usr/src/tools/scripts/bfu.sh @@ -6350,6 +6350,13 @@ mondo_loop() { rm -f $rootprefix/kernel/strmod/sparcv9/authmd5h rm -f $rootprefix/kernel/strmod/sparcv9/authsha1 + # + # Remove the now-obsolete "nattymod" STREAMS module. + # + rm -f $rootprefix/kernel/strmod/nattymod + rm -f $rootprefix/kernel/strmod/amd64/nattymod + rm -f $rootprefix/kernel/strmod/sparcv9/nattymod + # # Remove obsolete SSA utility, firmware and fcode. # usr/lib/firmware/ssa contains ssafirmware diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 1cb490b2b8..603769157c 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -498,8 +498,6 @@ IPSECESP_OBJS += ipsecespddi.o ipsecesp.o IPSECAH_OBJS += ipsecahddi.o ipsecah.o sadb.o -NATTYMOD_OBJS += nattymod.o - SPPP_OBJS += sppp.o sppp_dlpi.o sppp_mod.o s_common.o SPPPTUN_OBJS += sppptun.o sppptun_mod.o diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h index 3e6dd451be..0e8139f2f0 100644 --- a/usr/src/uts/common/inet/ip.h +++ b/usr/src/uts/common/inet/ip.h @@ -425,7 +425,7 @@ typedef enum { ip_udp_input(q, mp, ipha, ire, recv_ill); \ break; \ default: \ - ip_proto_input(q, mp, ipha, ire, recv_ill); \ + ip_proto_input(q, mp, ipha, ire, recv_ill, B_FALSE); \ break; \ } \ } @@ -3145,7 +3145,8 @@ extern void ip_mib2_add_ip_stats(mib2_ipIfStatsEntry_t *, extern void ip_mib2_add_icmp6_stats(mib2_ipv6IfIcmpEntry_t *, mib2_ipv6IfIcmpEntry_t *); extern void ip_udp_input(queue_t *, mblk_t *, ipha_t *, ire_t *, ill_t *); -extern void ip_proto_input(queue_t *, mblk_t *, ipha_t *, ire_t *, ill_t *); +extern void ip_proto_input(queue_t *, mblk_t *, ipha_t *, ire_t *, ill_t *, + boolean_t); extern void ip_rput_other(ipsq_t *, queue_t *, mblk_t *, void *); extern ire_t *ip_check_multihome(void *, ire_t *, ill_t *); extern void ip_setqinfo(queue_t *, minor_t, boolean_t, ip_stack_t *); diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c index 1bc86df113..92dd737eba 100644 --- a/usr/src/uts/common/inet/ip/ip.c +++ b/usr/src/uts/common/inet/ip/ip.c @@ -396,12 +396,12 @@ uint32_t (*cl_inet_ipident)(uint8_t protocol, sa_family_t addr_family, * Walker - Increment irb_refcnt before calling the walker callback. Hold the * global tree lock (read mode) for traversal. * - * IPSEC notes : + * IPsec notes : * - * IP interacts with the IPSEC code (AH/ESP) by tagging a M_CTL message + * IP interacts with the IPsec code (AH/ESP) by tagging a M_CTL message * in front of the actual packet. For outbound datagrams, the M_CTL * contains a ipsec_out_t (defined in ipsec_info.h), which has the - * information used by the IPSEC code for applying the right level of + * information used by the IPsec code for applying the right level of * protection. The information initialized by IP in the ipsec_out_t * is determined by the per-socket policy or global policy in the system. * For inbound datagrams, the M_CTL contains a ipsec_in_t (defined in @@ -1495,7 +1495,7 @@ icmp_frag_needed(queue_t *q, mblk_t *mp, int mtu, zoneid_t zoneid, * value etc. but delivery to the ULP/clients depends on their policy * dispositions. * - * We handle the above 4 cases in the context of IPSEC in the + * We handle the above 4 cases in the context of IPsec in the * following way : * * 1) Send the reply back in the same way as the request came in. @@ -2365,12 +2365,12 @@ icmp_inbound_self_encap_error(mblk_t *mp, int iph_hdr_length, int hdr_length) * IP header of the packet that caused the error. * * We handle ICMP_FRAGMENTATION_NEEDED(IFN) message differently - * in the context of IPSEC. Normally we tell the upper layer - * whenever we send the ire (including ip_bind), the IPSEC header + * in the context of IPsec. Normally we tell the upper layer + * whenever we send the ire (including ip_bind), the IPsec header * length in ire_ipsec_overhead. TCP can deduce the MSS as it * has both the MTU (ire_max_frag) and the ire_ipsec_overhead. * Similarly, we pass the new MTU icmph_du_mtu and TCP does the - * same thing. As TCP has the IPSEC options size that needs to be + * same thing. As TCP has the IPsec options size that needs to be * adjusted, we just pass the MTU unchanged. * * IFN could have been generated locally or by some router. @@ -2381,16 +2381,16 @@ icmp_inbound_self_encap_error(mblk_t *mp, int iph_hdr_length, int hdr_length) * the new adjusted value of MTU e.g. Packet was encrypted * or there was not enough information to fanout to upper * layers. Thus on the next outbound datagram, ip_wput_ire - * generates the IFN, where IPSEC processing has *not* been + * generates the IFN, where IPsec processing has *not* been * done. * * *ip_wput_ire_fragmentit -> ip_wput_frag -> icmp_frag_needed * could have generated this. This happens because ire_max_frag - * value in IP was set to a new value, while the IPSEC processing + * value in IP was set to a new value, while the IPsec processing * was being done and after we made the fragmentation check in - * ip_wput_ire. Thus on return from IPSEC processing, + * ip_wput_ire. Thus on return from IPsec processing, * ip_wput_ipsec_out finds that the new length is > ire_max_frag - * and generates the IFN. As IPSEC processing is over, we fanout + * and generates the IFN. As IPsec processing is over, we fanout * to AH/ESP to remove the header. * * In both these cases, ipsec_in_loopback will be set indicating @@ -2575,7 +2575,7 @@ icmp_inbound_error_fanout(queue_t *q, ill_t *ill, mblk_t *mp, * this function, it would work. Convert it back * to M_CTL before we send up as this is a ICMP * error. This could have been generated locally or - * by some router. Validate the inner IPSEC + * by some router. Validate the inner IPsec * headers. * * NOTE : ill_index is used by ip_fanout_proto_again @@ -2590,10 +2590,10 @@ icmp_inbound_error_fanout(queue_t *q, ill_t *ill, mblk_t *mp, } else { /* * IPSEC_IN is not present. We attach a ipsec_in - * message and send up to IPSEC for validating - * and removing the IPSEC headers. Clear + * message and send up to IPsec for validating + * and removing the IPsec headers. Clear * ipsec_in_secure so that when we return - * from IPSEC, we don't mistakenly think that this + * from IPsec, we don't mistakenly think that this * is a secure packet came from the network. * * NOTE : ill_index is used by ip_fanout_proto_again @@ -3256,7 +3256,7 @@ icmp_pkt(queue_t *q, mblk_t *mp, void *stuff, size_t len, * If it is : * * 1) a IPSEC_OUT, then this is caused by outbound - * datagram originating on this host. IPSEC processing + * datagram originating on this host. IPsec processing * may or may not have been done. Refer to comments above * icmp_inbound_error_fanout for details. * @@ -4463,7 +4463,7 @@ ip_bind_v4(queue_t *q, mblk_t *mp, conn_t *connp) else if (error != 0) goto bad_addr; /* - * Pass the IPSEC headers size in ire_ipsec_overhead. + * Pass the IPsec headers size in ire_ipsec_overhead. * We can't do this in ip_bind_insert_ire because the policy * may not have been inherited at that point in time and hence * conn_out_enforce_policy may not be set. @@ -6353,7 +6353,7 @@ ipsec_in_is_secure(mblk_t *ipsec_mp) * protocol. When this is the case, normally each one gets a copy * of any incoming packets. * - * IPSEC NOTE : + * IPsec NOTE : * * Don't allow a secure packet going up a non-secure connection. * We don't allow this because @@ -6907,6 +6907,132 @@ ip_fanout_tcp(queue_t *q, mblk_t *mp, ill_t *recv_ill, ipha_t *ipha, } /* + * If we have a IPsec NAT-Traversal packet, strip the zero-SPI or + * pass it along to ESP if the SPI is non-zero. + * + * One of three things can happen, all of which affect the passed-in mblk: + * + * 1.) The packet is stock UDP and has had its zero-SPI stripped. Return TRUE. + * (NOTE: ICMP messages that go through here just get returned.) + * + * 2.) The packet is ESP-in-UDP, has been transformed into an equivalent + * ESP packet, and is passed along to ESP. Return FALSE. + * + * 3.) The packet is an ESP-in-UDP Keepalive. Drop it and return FALSE. + */ +static boolean_t +zero_spi_check(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, + ill_t *recv_ill, ipsec_stack_t *ipss) +{ + int shift, plen, iph_len = IPH_HDR_LENGTH(ipha); + udpha_t *udpha; + uint32_t *spi; + uint8_t *orptr; + boolean_t udp_pkt, free_ire; + + if (DB_TYPE(mp) == M_CTL) { + /* + * ICMP message with UDP inside. Don't bother stripping, just + * send it up. + * + * NOTE: Any app with UDP_NAT_T_ENDPOINT set is probably going + * to ignore errors set by ICMP anyway ('cause they might be + * forged), but that's the app's decision, not ours. + */ + + /* Bunch of reality checks for DEBUG kernels... */ + ASSERT(IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION); + ASSERT(((ipha_t *)mp->b_rptr)->ipha_protocol == IPPROTO_ICMP); + ASSERT((uint8_t *)ipha != mp->b_rptr); + + return (B_TRUE); + } + + ASSERT((uint8_t *)ipha == mp->b_rptr); + plen = ntohs(ipha->ipha_length); + + if (plen - iph_len - sizeof (udpha_t) < sizeof (uint32_t)) { + /* + * Most likely a keepalive for the benefit of an intervening + * NAT. These aren't for us, per se, so drop it. + * + * RFC 3947/8 doesn't say for sure what to do for 2-3 + * byte packets (keepalives are 1-byte), but we'll drop them + * also. + */ + ip_drop_packet(mp, B_TRUE, recv_ill, NULL, + DROPPER(ipss, ipds_esp_nat_t_ka), &ipss->ipsec_dropper); + return (B_FALSE); + } + + if (MBLKL(mp) < iph_len + sizeof (udpha_t) + sizeof (*spi)) { + mblk_t *tmp = msgpullup(mp, -1); + + /* might as well pull it all up - it might be ESP. */ + if (tmp == NULL) { + ip_drop_packet(mp, B_TRUE, recv_ill, NULL, + DROPPER(ipss, ipds_esp_nomem), + &ipss->ipsec_dropper); + return (B_FALSE); + } + freemsg(mp); + mp = tmp; + } + spi = (uint32_t *)(mp->b_rptr + iph_len + sizeof (udpha_t)); + if (*spi == 0) { + /* UDP packet - remove 0-spi. */ + shift = sizeof (uint32_t); + } else { + /* ESP-in-UDP packet - reduce to ESP. */ + ipha->ipha_protocol = IPPROTO_ESP; + shift = sizeof (udpha_t); + } + + /* Fix IP header */ + ipha->ipha_length = htons(plen - shift); + ipha->ipha_hdr_checksum = 0; + + orptr = mp->b_rptr; + mp->b_rptr += shift; + + if (*spi == 0) { + ASSERT((uint8_t *)ipha == orptr); + udpha = (udpha_t *)(orptr + iph_len); + udpha->uha_length = htons(plen - shift - iph_len); + iph_len += sizeof (udpha_t); /* For the call to ovbcopy(). */ + udp_pkt = B_TRUE; + } else { + udp_pkt = B_FALSE; + } + ovbcopy(orptr, orptr + shift, iph_len); + if (!udp_pkt) /* Punt up for ESP processing. */ { + ipha = (ipha_t *)(orptr + shift); + + free_ire = (ire == NULL); + if (free_ire) { + /* Re-acquire ire. */ + ire = ire_cache_lookup(ipha->ipha_dst, ALL_ZONES, NULL, + ipss->ipsec_netstack->netstack_ip); + if (ire == NULL || !(ire->ire_type & IRE_LOCAL)) { + if (ire != NULL) + ire_refrele(ire); + /* + * Do a regular freemsg(), as this is an IP + * error (no local route) not an IPsec one. + */ + freemsg(mp); + } + } + + ip_proto_input(q, mp, ipha, ire, recv_ill, B_TRUE); + if (free_ire) + ire_refrele(ire); + } + + return (udp_pkt); +} + +/* * Deliver a udp packet to the given conn, possibly applying ipsec policy. * We are responsible for disposing of mp, such as by freemsg() or putnext() * Caller is responsible for dropping references to the conn, and freeing @@ -6953,6 +7079,22 @@ ip_fanout_udp_conn(conn_t *connp, mblk_t *first_mp, mblk_t *mp, if (mctl_present) freeb(first_mp); + /* Let's hope the compilers utter "branch, predict-not-taken..." ;) */ + if (connp->conn_udp->udp_nat_t_endpoint) { + if (mctl_present) { + /* mctl_present *shouldn't* happen. */ + ip_drop_packet(mp, B_TRUE, NULL, NULL, + DROPPER(ipss, ipds_esp_nat_t_ipsec), + &ipss->ipsec_dropper); + return; + } + + if (!zero_spi_check(ill->ill_rq, mp, ipha, NULL, recv_ill, + ipss)) { + return; + } + } + /* Handle options. */ if (connp->conn_recvif) in_flags = IPF_RECVIF; @@ -10036,7 +10178,7 @@ ipsec_set_req(cred_t *cr, conn_t *connp, ipsec_req_t *req) /* * Test for valid requests. Invalid algorithms - * need to be tested by IPSEC code because new + * need to be tested by IPsec code because new * algorithms can be added dynamically. */ if ((ah_req & ~(REQ_MASK|IPSEC_PREF_UNIQUE)) != 0 || @@ -11960,15 +12102,19 @@ ip_reassemble(mblk_t *mp, ipf_t *ipf, uint_t start, boolean_t more, ill_t *ill, /* * ipsec processing for the fast path, used for input UDP Packets + * Returns true if ready for passup to UDP. + * Return false if packet is not passable to UDP (e.g. it failed IPsec policy, + * was an ESP-in-UDP packet, etc.). */ static boolean_t ip_udp_check(queue_t *q, conn_t *connp, ill_t *ill, ipha_t *ipha, - mblk_t **mpp, mblk_t **first_mpp, boolean_t mctl_present) + mblk_t **mpp, mblk_t **first_mpp, boolean_t mctl_present, ire_t *ire) { uint32_t ill_index; uint_t in_flags; /* IPF_RECVSLLA and/or IPF_RECVIF */ ip_stack_t *ipst = connp->conn_netstack->netstack_ip; ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; + udp_t *udp = connp->conn_udp; ASSERT(ipha->ipha_protocol == IPPROTO_UDP); /* The ill_index of the incoming ILL */ @@ -11993,6 +12139,28 @@ ip_udp_check(queue_t *q, conn_t *connp, ill_t *ill, ipha_t *ipha, } } /* + * Remove 0-spi if it's 0, or move everything behind + * the UDP header over it and forward to ESP via + * ip_proto_input(). + */ + if (udp->udp_nat_t_endpoint) { + if (mctl_present) { + /* mctl_present *shouldn't* happen. */ + ip_drop_packet(*first_mpp, B_TRUE, NULL, + NULL, DROPPER(ipss, ipds_esp_nat_t_ipsec), + &ipss->ipsec_dropper); + *first_mpp = NULL; + return (B_FALSE); + } + + /* "ill" is "recv_ill" in actuality. */ + if (!zero_spi_check(q, *mpp, ipha, ire, ill, ipss)) + return (B_FALSE); + + /* Else continue like a normal UDP packet. */ + } + + /* * We make the checks as below since we are in the fast path * and want to minimize the number of checks if the IP_RECVIF and/or * IP_RECVSLLA and/or IPV6_RECVPKTINFO options are not set @@ -12575,7 +12743,11 @@ ip_udp_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, if (IS_IP_HDR_HWCKSUM(mctl_present, mp, ill)) { /* Clear the IP header h/w cksum flag */ DB_CKSUMFLAGS(mp) &= ~HCK_IPV4_HDRCKSUM; - } else { + } else if (!mctl_present) { + /* + * Don't verify header checksum if this packet is coming + * back from AH/ESP as we already did it. + */ #define uph ((uint16_t *)ipha) sum = uph[0] + uph[1] + uph[2] + uph[3] + uph[4] + uph[5] + uph[6] + uph[7] + uph[8] + uph[9]; @@ -12583,11 +12755,7 @@ ip_udp_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, /* finish doing IP checksum */ sum = (sum & 0xFFFF) + (sum >> 16); sum = ~(sum + (sum >> 16)) & 0xFFFF; - /* - * Don't verify header checksum if this packet is coming - * back from AH/ESP as we already did it. - */ - if (!mctl_present && sum != 0 && sum != 0xFFFF) { + if (sum != 0 && sum != 0xFFFF) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); freemsg(first_mp); return; @@ -12681,7 +12849,7 @@ ip_udp_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, * mp and first_mp can change. */ if (ip_udp_check(q, connp, recv_ill, - ipha, &mp, &first_mp, mctl_present)) { + ipha, &mp, &first_mp, mctl_present, ire)) { /* Send it upstream */ CONN_UDP_RECV(connp, mp); } @@ -12838,7 +13006,11 @@ ip_tcp_input(mblk_t *mp, ipha_t *ipha, ill_t *recv_ill, boolean_t mctl_present, if (IS_IP_HDR_HWCKSUM(mctl_present, mp, ill)) { /* Clear the IP header h/w cksum flag */ DB_CKSUMFLAGS(mp) &= ~HCK_IPV4_HDRCKSUM; - } else { + } else if (!mctl_present) { + /* + * Don't verify header checksum if this packet + * is coming back from AH/ESP as we already did it. + */ #define uph ((uint16_t *)ipha) sum = uph[0] + uph[1] + uph[2] + uph[3] + uph[4] + uph[5] + uph[6] + uph[7] + uph[8] + uph[9]; @@ -12846,11 +13018,7 @@ ip_tcp_input(mblk_t *mp, ipha_t *ipha, ill_t *recv_ill, boolean_t mctl_present, /* finish doing IP checksum */ sum = (sum & 0xFFFF) + (sum >> 16); sum = ~(sum + (sum >> 16)) & 0xFFFF; - /* - * Don't verify header checksum if this packet - * is coming back from AH/ESP as we already did it. - */ - if (!mctl_present && (sum != 0) && sum != 0xFFFF) { + if (sum != 0 && sum != 0xFFFF) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); goto error; @@ -12960,7 +13128,7 @@ try_again: * does not have facility to receive extra information via * ip_process or ip_add_info. Also, when the connection was * established, we made a check if this connection is impacted - * by any global IPSec policy or per connection policy (a + * by any global IPsec policy or per connection policy (a * policy that comes in effect later will not apply to this * connection). Since all this can be determined at the * connection establishment time, a quick check of flags @@ -13283,7 +13451,8 @@ ip_sctp_input(mblk_t *mp, ipha_t *ipha, ill_t *recv_ill, boolean_t mctl_present, goto ipoptions; } else { /* Check the IP header checksum. */ - if (!IS_IP_HDR_HWCKSUM(mctl_present, mp, ill)) { + if (!IS_IP_HDR_HWCKSUM(mctl_present, mp, ill) && + !mctl_present) { #define uph ((uint16_t *)ipha) sum = uph[0] + uph[1] + uph[2] + uph[3] + uph[4] + uph[5] + uph[6] + uph[7] + uph[8] + uph[9]; @@ -13295,7 +13464,7 @@ ip_sctp_input(mblk_t *mp, ipha_t *ipha, ill_t *recv_ill, boolean_t mctl_present, * Don't verify header checksum if this packet * is coming back from AH/ESP as we already did it. */ - if (!mctl_present && (sum != 0) && sum != 0xFFFF) { + if (sum != 0 && sum != 0xFFFF) { BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); goto error; } @@ -14218,7 +14387,8 @@ ip_rput_process_broadcast(queue_t **qp, mblk_t *mp, ire_t *ire, ipha_t *ipha, ip_udp_input(q, mp1, ipha, ire, ill); break; default: - ip_proto_input(q, mp1, ipha, ire, ill); + ip_proto_input(q, mp1, ipha, ire, ill, + B_FALSE); break; } } @@ -15052,7 +15222,7 @@ local: ire = NULL; continue; default: - ip_proto_input(q, first_mp, ipha, ire, ill); + ip_proto_input(q, first_mp, ipha, ire, ill, B_FALSE); continue; } } @@ -16711,7 +16881,7 @@ ip_fanout_proto_again(mblk_t *ipsec_mp, ill_t *ill, ill_t *recv_ill, ire_t *ire) break; default: ip_proto_input(ill->ill_rq, ipsec_mp, ipha, ire, - recv_ill); + recv_ill, B_FALSE); if (ire_need_rele) ire_refrele(ire); break; @@ -16815,13 +16985,13 @@ ill_frag_timer_start(ill_t *ill) * IPQoS Notes: * IPPF processing is done in fanout routines. * Policy processing is done only if IPP_lOCAL_IN is enabled. Further, - * processing for IPSec packets is done when it comes back in clear. + * processing for IPsec packets is done when it comes back in clear. * NOTE : The callers of this function need to do the ire_refrele for the * ire that is being passed in. */ void ip_proto_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, - ill_t *recv_ill) + ill_t *recv_ill, boolean_t esp_in_udp_packet) { ill_t *ill = (ill_t *)q->q_ptr; uint32_t sum; @@ -16850,8 +17020,8 @@ ip_proto_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, /* * no UDP or TCP packet should come here anymore. */ - ASSERT((ipha->ipha_protocol != IPPROTO_TCP) && - (ipha->ipha_protocol != IPPROTO_UDP)); + ASSERT(ipha->ipha_protocol != IPPROTO_TCP && + ipha->ipha_protocol != IPPROTO_UDP); EXTRACT_PKT_MP(mp, first_mp, mctl_present); if (mctl_present && @@ -16881,40 +17051,44 @@ ip_proto_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, * IF M_CTL is not present, then ipsec_in_is_secure * should return B_TRUE. There is a case where loopback * packets has an M_CTL in the front with all the - * IPSEC options set to IPSEC_PREF_NEVER - which means + * IPsec options set to IPSEC_PREF_NEVER - which means * ipsec_in_is_secure will return B_FALSE. As loopback * packets never comes here, it is safe to ASSERT the * following. */ ASSERT(!mctl_present || ipsec_in_is_secure(first_mp)); + /* + * Also, we should never have an mctl_present if this is an + * ESP-in-UDP packet. + */ + ASSERT(!mctl_present || !esp_in_udp_packet); + /* u1 is # words of IP options */ - u1 = ipha->ipha_version_and_hdr_length - (uchar_t)((IP_VERSION << 4) - + IP_SIMPLE_HDR_LENGTH_IN_WORDS); + u1 = ipha->ipha_version_and_hdr_length - (uchar_t)((IP_VERSION << 4) + + IP_SIMPLE_HDR_LENGTH_IN_WORDS); - if (u1) { - if (!ip_options_cksum(q, ill, mp, ipha, ire, ipst)) { - if (hada_mp != NULL) - freemsg(hada_mp); - return; - } - } else { - /* Check the IP header checksum. */ + if (u1 || (!esp_in_udp_packet && !mctl_present)) { + if (u1) { + if (!ip_options_cksum(q, ill, mp, ipha, ire, ipst)) { + if (hada_mp != NULL) + freemsg(hada_mp); + return; + } + } else { + /* Check the IP header checksum. */ #define uph ((uint16_t *)ipha) - sum = uph[0] + uph[1] + uph[2] + uph[3] + uph[4] + uph[5] + - uph[6] + uph[7] + uph[8] + uph[9]; + sum = uph[0] + uph[1] + uph[2] + uph[3] + uph[4] + + uph[5] + uph[6] + uph[7] + uph[8] + uph[9]; #undef uph - /* finish doing IP checksum */ - sum = (sum & 0xFFFF) + (sum >> 16); - sum = ~(sum + (sum >> 16)) & 0xFFFF; - /* - * Don't verify header checksum if this packet is coming - * back from AH/ESP as we already did it. - */ - if (!mctl_present && (sum && sum != 0xFFFF)) { - BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); - goto drop_pkt; + /* finish doing IP checksum */ + sum = (sum & 0xFFFF) + (sum >> 16); + sum = ~(sum + (sum >> 16)) & 0xFFFF; + if (sum && sum != 0xFFFF) { + BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); + goto drop_pkt; + } } } @@ -17193,13 +17367,13 @@ ip_proto_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, } /* * We generally store the ill_index if we need to - * do IPSEC processing as we lose the ill queue when + * do IPsec processing as we lose the ill queue when * we come back. But in this case, we never should * have to store the ill_index here as it should have * been stored previously when we processed the * AH/ESP header in this routine or for non-ipsec * cases, we still have the queue. But for some bad - * packets from the wire, we can get to IPSEC after + * packets from the wire, we can get to IPsec after * this and we better store the index for that case. */ ill = (ill_t *)q->q_ptr; @@ -17253,7 +17427,7 @@ ip_proto_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, } /* * Store the ill_index so that when we come back - * from IPSEC we ride on the same queue. + * from IPsec we ride on the same queue. */ ill = (ill_t *)q->q_ptr; ii = (ipsec_in_t *)first_mp->b_rptr; @@ -17286,10 +17460,26 @@ ip_proto_input(queue_t *q, mblk_t *mp, ipha_t *ipha, ire_t *ire, /* select inbound SA and have IPsec process the pkt */ if (ipha->ipha_protocol == IPPROTO_ESP) { esph_t *esph = ipsec_inbound_esp_sa(first_mp, ns); + boolean_t esp_in_udp_sa; if (esph == NULL) return; ASSERT(ii->ipsec_in_esp_sa != NULL); ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != NULL); + esp_in_udp_sa = ((ii->ipsec_in_esp_sa->ipsa_flags & + IPSA_F_NATT) != 0); + /* + * The following is a fancy, but quick, way of saying: + * ESP-in-UDP SA and Raw ESP packet --> drop + * OR + * ESP SA and ESP-in-UDP packet --> drop + */ + if (esp_in_udp_sa != esp_in_udp_packet) { + BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); + ip_drop_packet(first_mp, B_TRUE, ill, NULL, + DROPPER(ns->netstack_ipsec, ipds_esp_no_sa), + &ns->netstack_ipsec->ipsec_dropper); + return; + } ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( first_mp, esph); } else { @@ -20301,7 +20491,7 @@ notdata: if (ii->ipsec_info_type == IPSEC_IN) { /* * Either this message goes back to - * IPSEC for further processing or to + * IPsec for further processing or to * ULP after policy checks. */ ip_fanout_proto_again(mp, NULL, NULL, NULL); @@ -20310,7 +20500,7 @@ notdata: io = (ipsec_out_t *)ii; if (io->ipsec_out_proc_begin) { /* - * IPSEC processing has already started. + * IPsec processing has already started. * Complete it. * IPQoS notes: We don't care what is * in ipsec_out_ill_index since this @@ -20765,8 +20955,8 @@ multicast: * need to make sure that the soruce address of * the packet matches the logical IP address used * in the option. We do it by initializing ipha_src - * here. This should keep IPSEC also happy as - * when we return from IPSEC processing, we don't + * here. This should keep IPsec also happy as + * when we return from IPsec processing, we don't * have to worry about getting the right address on * the packet. Thus it is sufficient to look for * IRE_CACHE using MATCH_IRE_ILL rathen than @@ -21451,8 +21641,8 @@ conn_ipsec_length(conn_t *connp) } /* - * Returns an estimate of the IPSEC headers size. This is used if - * we don't want to call into IPSEC to get the exact size. + * Returns an estimate of the IPsec headers size. This is used if + * we don't want to call into IPsec to get the exact size. */ int ipsec_out_extra_length(mblk_t *ipsec_mp) @@ -21476,8 +21666,8 @@ ipsec_out_extra_length(mblk_t *ipsec_mp) } /* - * Returns an estimate of the IPSEC headers size. This is used if - * we don't want to call into IPSEC to get the exact size. + * Returns an estimate of the IPsec headers size. This is used if + * we don't want to call into IPsec to get the exact size. */ int ipsec_in_extra_length(mblk_t *ipsec_mp) @@ -21741,7 +21931,7 @@ conn_set_outgoing_ill(conn_t *connp, ire_t *ire, ill_t **conn_outgoing_ill) * REFRELE. * IPQoS Notes: * IP policy is invoked if IPP_LOCAL_OUT is enabled. Processing for - * IPSec packets are done in ipsec_out_process. + * IPsec packets are done in ipsec_out_process. * */ void @@ -22242,7 +22432,7 @@ another:; if (ipsec_len != 0) { /* * We will do the rest of the processing after - * we come back from IPSEC in ip_wput_ipsec_out(). + * we come back from IPsec in ip_wput_ipsec_out(). */ ASSERT(MBLKL(first_mp) >= sizeof (ipsec_out_t)); @@ -22769,7 +22959,7 @@ multi_loopback: /* * If this needs to go out secure, we need - * to wait till we finish the IPSEC + * to wait till we finish the IPsec * processing. */ if (ipsec_len == 0 && @@ -23003,8 +23193,8 @@ fragmentit: offset = ntohs(ipha->ipha_fragment_offset_and_flags); /* * If this would generate a icmp_frag_needed message, - * we need to handle it before we do the IPSEC - * processing. Otherwise, we need to strip the IPSEC + * we need to handle it before we do the IPsec + * processing. Otherwise, we need to strip the IPsec * headers before we send up the message to the ULPs * which becomes messy and difficult. */ @@ -23036,7 +23226,7 @@ fragmentit: * the wire. Note that this could still * cause fragmentation and all we * do is the generation of the message - * to the ULP if needed before IPSEC. + * to the ULP if needed before IPsec. */ if (!next_mp) { ipsec_out_process(q, first_mp, @@ -23255,7 +23445,7 @@ ip_mdinfo_return(ire_t *dst_ire, conn_t *connp, char *ill_name, break; /* - * IPSEC outbound policy present? Note that we get here + * IPsec outbound policy present? Note that we get here * after calling ipsec_conn_cache_policy() where the global * policy checking is performed. conn_latch will be * non-NULL as long as there's a policy defined, @@ -23840,7 +24030,7 @@ ip_wput_frag(ire_t *ire, mblk_t *mp_orig, ip_pkt_t pkt_type, uint32_t max_frag, } /* - * IPSEC does not allow hw accelerated packets to be fragmented + * IPsec does not allow hw accelerated packets to be fragmented * This check is made in ip_wput_ipsec_out prior to coming here * via ip_wput_ire_fragmentit. * @@ -25366,7 +25556,7 @@ ip_wput_ipsec_out_v6(queue_t *q, mblk_t *ipsec_mp, ip6_t *ip6h, ill_t *ill, if (ire != NULL) { ipif_refrele(ipif); /* - * XXX Do the multicast forwarding now, as the IPSEC + * XXX Do the multicast forwarding now, as the IPsec * processing has been done. */ goto send; @@ -25420,10 +25610,10 @@ ip_wput_ipsec_out_v6(queue_t *q, mblk_t *ipsec_mp, ip6_t *ip6h, ill_t *ill, * ire disappeared underneath. * * What we need to do here is the ip_newroute - * logic to get the ire without doing the IPSEC + * logic to get the ire without doing the IPsec * processing. Follow the same old path. But this * time, ip_wput or ire_add_then_send will call us - * directly as all the IPSEC operations are done. + * directly as all the IPsec operations are done. */ ip1dbg(("ip_wput_ipsec_out_v6: IRE disappeared\n")); mp->b_prev = NULL; @@ -25587,17 +25777,14 @@ ip_wput_ipsec_out(queue_t *q, mblk_t *ipsec_mp, ipha_t *ipha, ill_t *ill, uint32_t max_frag; boolean_t multirt_send = B_FALSE; mblk_t *mp; - mblk_t *mp1; ipha_t *ipha1; uint_t ill_index; ipsec_out_t *io; boolean_t attach_if; - int match_flags, offset; + int match_flags; irb_t *irb = NULL; boolean_t ill_need_rele = B_FALSE, ire_need_rele = B_TRUE; zoneid_t zoneid; - uint32_t cksum; - uint16_t *up; ipxmit_state_t pktxmit_state; ip_stack_t *ipst; @@ -25677,7 +25864,7 @@ ip_wput_ipsec_out(queue_t *q, mblk_t *ipsec_mp, ipha_t *ipha, ill_t *ill, if (ire != NULL) { ill_t *ill1; /* - * Do the multicast forwarding now, as the IPSEC + * Do the multicast forwarding now, as the IPsec * processing has been done. */ if (ipst->ips_ip_g_mrouter && !conn_dontroute && @@ -25737,10 +25924,10 @@ ip_wput_ipsec_out(queue_t *q, mblk_t *ipsec_mp, ipha_t *ipha, ill_t *ill, * ire disappeared underneath. * * What we need to do here is the ip_newroute - * logic to get the ire without doing the IPSEC + * logic to get the ire without doing the IPsec * processing. Follow the same old path. But this * time, ip_wput or ire_add_then_put will call us - * directly as all the IPSEC operations are done. + * directly as all the IPsec operations are done. */ ip1dbg(("ip_wput_ipsec_out: IRE disappeared\n")); mp->b_prev = NULL; @@ -25773,41 +25960,12 @@ ip_wput_ipsec_out(queue_t *q, mblk_t *ipsec_mp, ipha_t *ipha, ill_t *ill, } goto done; send: - if (ipha->ipha_protocol == IPPROTO_UDP && - udp_compute_checksum(ipst->ips_netstack)) { - /* - * ESP NAT-Traversal packet. - * - * Just do software checksum for now. - */ - - offset = IP_SIMPLE_HDR_LENGTH + UDP_CHECKSUM_OFFSET; - IP_STAT(ipst, ip_out_sw_cksum); - IP_STAT_UPDATE(ipst, ip_udp_out_sw_cksum_bytes, - ntohs(htons(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH)); -#define iphs ((uint16_t *)ipha) - cksum = IP_UDP_CSUM_COMP + iphs[6] + iphs[7] + iphs[8] + - iphs[9] + ntohs(htons(ipha->ipha_length) - - IP_SIMPLE_HDR_LENGTH); -#undef iphs - cksum = IP_CSUM(mp, IP_SIMPLE_HDR_LENGTH, cksum); - for (mp1 = mp; mp1 != NULL; mp1 = mp1->b_cont) - if (mp1->b_wptr - mp1->b_rptr >= - offset + sizeof (uint16_t)) { - up = (uint16_t *)(mp1->b_rptr + offset); - *up = cksum; - break; /* out of for loop */ - } else { - offset -= (mp->b_wptr - mp->b_rptr); - } - } /* Otherwise, just keep the all-zero checksum. */ - if (ire->ire_stq == NULL) { ill_t *out_ill; /* * Loopbacks go through ip_wput_local except for one case. * We come here if we generate a icmp_frag_needed message - * after IPSEC processing is over. When this function calls + * after IPsec processing is over. When this function calls * ip_wput_ire_fragmentit, ip_wput_frag might end up calling * icmp_frag_needed. The message generated comes back here * through icmp_frag_needed -> icmp_pkt -> ip_wput -> @@ -25815,7 +25973,7 @@ send: * source address as it is usually set in ip_wput_ire. As * ipsec_out_proc_begin is set, ip_wput calls ipsec_out_process * and we end up here. We can't enter ip_wput_ire once the - * IPSEC processing is over and hence we need to do it here. + * IPsec processing is over and hence we need to do it here. */ ASSERT(q != NULL); UPDATE_OB_PKT_COUNT(ire); @@ -25846,7 +26004,7 @@ send: if (ire->ire_max_frag < (unsigned int)LENGTH) { /* - * We are through with IPSEC processing. + * We are through with IPsec processing. * Fragment this and send it on the wire. */ if (io->ipsec_out_accelerated) { @@ -26002,7 +26160,7 @@ send: * There is a slight risk here, in that, if we * have the forwarding path create an incomplete * IRE, then until the IRE is completed, any - * transmitted IPSEC packets will be dropped + * transmitted IPsec packets will be dropped * instead of being queued waiting for resolution. * * But the likelihood of a forwarding packet and a wput @@ -26024,7 +26182,7 @@ send: * hw accel work. But it's too complex to get * the IPsec hw acceleration approach to fit * well with ip_xmit_v4 doing ARP without - * doing IPSEC simplification. For now, we just + * doing IPsec simplification. For now, we just * poke ip_xmit_v4 to trigger the arp resolve, so * that we can continue with the send on the next * attempt. @@ -26261,7 +26419,7 @@ ipsec_out_select_sa(mblk_t *ipsec_mp) ASSERT(io->ipsec_out_failed == B_FALSE); /* - * IPSEC processing has started. + * IPsec processing has started. */ io->ipsec_out_proc_begin = B_TRUE; ap = io->ipsec_out_act; @@ -26328,7 +26486,7 @@ ipsec_out_select_sa(mblk_t *ipsec_mp) * do with it. * IPQoS Notes: * We do IPPF processing if IPP_LOCAL_OUT is enabled before processing for - * IPSec. + * IPsec. * XXX would like to nuke ire_t. * XXX ill_index better be "real" */ @@ -26401,7 +26559,7 @@ ipsec_out_process(queue_t *q, mblk_t *ipsec_mp, ire_t *ire, uint_t ill_index) } /* - * IPSEC processing has started. + * IPsec processing has started. */ io->ipsec_out_proc_begin = B_TRUE; ap = io->ipsec_out_act; @@ -26547,7 +26705,7 @@ ipsec_out_process(queue_t *q, mblk_t *ipsec_mp, ire_t *ire, uint_t ill_index) } } /* - * We are done with IPSEC processing. Send it over + * We are done with IPsec processing. Send it over * the wire. */ done: @@ -29544,7 +29702,7 @@ ip_fanout_sctp_raw(mblk_t *mp, ill_t *recv_ill, ipha_t *ipha, boolean_t isv4, * the link-layer header to the packet, do ipsec hw acceleration * work if necessary, and send the packet out on the wire. * - * NOTE: IPSEC will only call this function with fully resolved + * NOTE: IPsec will only call this function with fully resolved * ires if hw acceleration is involved. * TODO list : * a Handle M_MULTIDATA so that diff --git a/usr/src/uts/common/inet/ip/ip_if.c b/usr/src/uts/common/inet/ip/ip_if.c index e8b0259a2f..7a56334f5c 100644 --- a/usr/src/uts/common/inet/ip/ip_if.c +++ b/usr/src/uts/common/inet/ip/ip_if.c @@ -2505,14 +2505,14 @@ ill_capability_ipsec_reset(ill_t *ill, mblk_t **sc_mp) } /* - * Clear the capability flags for IPSec HA but retain the ill + * Clear the capability flags for IPsec HA but retain the ill * capability structures since it's possible that another thread * is still referring to them. The structures only get deallocated * when we destroy the ill. * * Various places check the flags to see if the ill is capable of * hardware acceleration, and by clearing them we ensure that new - * outbound IPSec packets are sent down encrypted. + * outbound IPsec packets are sent down encrypted. */ ill->ill_capabilities &= ~(ILL_CAPAB_AH | ILL_CAPAB_ESP); @@ -18680,9 +18680,6 @@ ipif_down(ipif_t *ipif, queue_t *q, mblk_t *mp) /* Also, delete the ires cached in SCTP */ sctp_ire_cache_flush(ipif); - /* Resolve any IPsec/IKE NAT-T instances that depend on this ipif. */ - nattymod_clean_ipif(ipif); - /* * Update any other ipifs which have used "our" local address as * a source address. This entails removing and recreating IRE_INTERFACE diff --git a/usr/src/uts/common/inet/ip/ipdrop.c b/usr/src/uts/common/inet/ip/ipdrop.c index 73e07a2647..40d09933b6 100644 --- a/usr/src/uts/common/inet/ip/ipdrop.c +++ b/usr/src/uts/common/inet/ip/ipdrop.c @@ -160,6 +160,10 @@ ip_drop_init(ipsec_stack_t *ipss) "esp_crypto_failed", KSTAT_DATA_UINT64); kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_esp_icmp, "esp_icmp", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_esp_nat_t_ipsec, + "esp_nat_t_ipsec", KSTAT_DATA_UINT64); + kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_esp_nat_t_ka, + "esp_nat_t_ka", KSTAT_DATA_UINT64); /* AH-specific drop statistics. */ kstat_named_init(&ipss->ipsec_ip_drop_types->ipds_ah_nomem, diff --git a/usr/src/uts/common/inet/ip/ipsecah.c b/usr/src/uts/common/inet/ip/ipsecah.c index 540c6b11dc..0107528147 100644 --- a/usr/src/uts/common/inet/ip/ipsecah.c +++ b/usr/src/uts/common/inet/ip/ipsecah.c @@ -950,7 +950,7 @@ ah_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, */ if (acqrec->ipsacq_seq == samsg->sadb_msg_seq && IPSA_ARE_ADDR_EQUAL(dstaddr, - acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam)) + acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam)) break; mutex_exit(&acqrec->ipsacq_lock); } @@ -1005,7 +1005,7 @@ ah_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, if (rc == 0 && lpkt != NULL) rc = !taskq_dispatch(ah_taskq, inbound_task, - (void *) lpkt, TQ_NOSLEEP); + (void *) lpkt, TQ_NOSLEEP); if (rc != 0) { ip_drop_packet(lpkt, B_TRUE, NULL, NULL, @@ -1115,8 +1115,8 @@ ah_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns) *diagnostic = SADB_X_DIAGNOSTIC_ENCR_NOTSUPP; return (EINVAL); } - if (assoc->sadb_sa_flags & ~(SADB_SAFLAGS_NOREPLAY | - SADB_X_SAFLAGS_TUNNEL)) { + if (assoc->sadb_sa_flags & + ~(SADB_SAFLAGS_NOREPLAY | SADB_X_SAFLAGS_TUNNEL)) { *diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS; return (EINVAL); } @@ -1143,7 +1143,7 @@ ah_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns) if (aalg == NULL || !ALG_VALID(aalg)) { mutex_exit(&ipss->ipsec_alg_lock); ah1dbg(ahstack, ("Couldn't find auth alg #%d.\n", - assoc->sadb_sa_auth)); + assoc->sadb_sa_auth)); *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG; return (EINVAL); } @@ -1166,7 +1166,7 @@ ah_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns) mutex_exit(&ipss->ipsec_alg_lock); return (ah_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi, - diagnostic, ahstack)); + diagnostic, ahstack)); } /* @@ -1188,10 +1188,9 @@ ah_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, } sin = (struct sockaddr_in *)(dstext + 1); return (sadb_update_sa(mp, ksi, - (sin->sin_family == AF_INET6) ? &ahstack->ah_sadb.s_v6 : - &ahstack->ah_sadb.s_v4, - diagnostic, ahstack->ah_pfkey_q, ah_add_sa, - ahstack->ipsecah_netstack)); + (sin->sin_family == AF_INET6) ? &ahstack->ah_sadb.s_v6 : + &ahstack->ah_sadb.s_v4, diagnostic, ahstack->ah_pfkey_q, ah_add_sa, + ahstack->ipsecah_netstack)); } /* @@ -1225,7 +1224,7 @@ ah_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, } return (sadb_del_sa(mp, ksi, &ahstack->ah_sadb, diagnostic, - ahstack->ah_pfkey_q)); + ahstack->ah_pfkey_q)); } /* @@ -1582,11 +1581,11 @@ ah_set_usetime(ipsa_t *assoc, boolean_t inbound) if (inbound) { inassoc = assoc; if (isv6) - outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *) - &inassoc->ipsa_dstaddr)); + outhash = OUTBOUND_HASH_V6(sp, + *((in6_addr_t *)&inassoc->ipsa_dstaddr)); else - outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *) - &inassoc->ipsa_dstaddr)); + outhash = OUTBOUND_HASH_V4(sp, + *((ipaddr_t *)&inassoc->ipsa_dstaddr)); bucket = &sp->sdb_of[outhash]; mutex_enter(&bucket->isaf_lock); @@ -1681,11 +1680,11 @@ ah_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound) if (inbound) { inassoc = assoc; if (isv6) - outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *) - &inassoc->ipsa_dstaddr)); + outhash = OUTBOUND_HASH_V6(sp, + *((in6_addr_t *)&inassoc->ipsa_dstaddr)); else - outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *) - &inassoc->ipsa_dstaddr)); + outhash = OUTBOUND_HASH_V4(sp, + *((ipaddr_t *)&inassoc->ipsa_dstaddr)); bucket = &sp->sdb_of[outhash]; mutex_enter(&bucket->isaf_lock); outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi, @@ -1973,7 +1972,7 @@ ah_getspi(mblk_t *mp, keysock_in_t *ksi, ipsecah_stack_t *ahstack) * to EEXIST. */ rc = sadb_insertassoc(newbie, inbound); - (void) drv_getparm(TIME, &newbie->ipsa_hardexpiretime); + newbie->ipsa_hardexpiretime = gethrestime_sec(); newbie->ipsa_hardexpiretime += ahstack->ipsecah_larval_timeout; } @@ -2050,7 +2049,7 @@ ah_icmp_error_v6(mblk_t *ipsec_mp, ipsecah_stack_t *ahstack) */ if (!pullupmsg(mp, -1) || !ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr, &hdr_length, - &nexthdrp) || + &nexthdrp) || mp->b_rptr + hdr_length + sizeof (icmp6_t) + sizeof (ip6_t) + sizeof (ah_t) > mp->b_wptr) { IP_AH_BUMP_STAT(ipss, in_discards); @@ -2172,7 +2171,7 @@ ah_icmp_error_v4(mblk_t *ipsec_mp, ipsecah_stack_t *ahstack) */ if ((uchar_t *)ipha + hdr_length + 8 > mp->b_wptr) { if (!pullupmsg(mp, (uchar_t *)ipha + hdr_length + 8 - - mp->b_rptr)) { + mp->b_rptr)) { ipsec_rl_strlog(ahstack->ipsecah_netstack, info.mi_idnum, 0, 0, SL_WARN | SL_ERROR, @@ -2759,7 +2758,7 @@ ah_kcf_callback(void *arg, int status) ah_log_bad_auth(ipsec_mp); } else { ah1dbg(ahstack, ("ah_kcf_callback: crypto failed with 0x%x\n", - status)); + status)); AH_BUMP_STAT(ahstack, crypto_failures); if (is_inbound) IP_AH_BUMP_STAT(ipss, in_discards); @@ -3318,8 +3317,6 @@ ah_outbound(mblk_t *ipsec_out) assoc = oi->ipsec_out_ah_sa; ASSERT(assoc != NULL); - if (assoc->ipsa_usetime == 0) - ah_set_usetime(assoc, B_FALSE); /* * Age SA according to number of bytes that will be sent after @@ -3337,7 +3334,7 @@ ah_outbound(mblk_t *ipsec_out) ah_align_sz = P2ALIGN(assoc->ipsa_mac_len + IPV6_PADDING_ALIGN - 1, IPV6_PADDING_ALIGN); age_bytes = sizeof (ip6_t) + ntohs(ip6h->ip6_plen) + - sizeof (ah_t) + ah_align_sz; + sizeof (ah_t) + ah_align_sz; } if (!ah_age_bytes(assoc, age_bytes, B_FALSE)) { @@ -3412,8 +3409,6 @@ ah_inbound(mblk_t *ipsec_in_mp, void *arg) ipsec_stack_t *ipss = ns->netstack_ipsec; ASSERT(assoc != NULL); - if (assoc->ipsa_usetime == 0) - ah_set_usetime(assoc, B_TRUE); /* * We may wish to check replay in-range-only here as an optimization. @@ -3584,7 +3579,7 @@ ah_inbound_accelerated(mblk_t *ipsec_in, boolean_t isv4, ipsa_t *assoc, icv_len = hada->da_icv_len; if ((icv_len != assoc->ipsa_mac_len) || (icv_len > DA_ICV_MAX_LEN) || (MBLKL(hada_mp) < - (sizeof (da_ipsec_t) - DA_ICV_MAX_LEN + icv_len))) { + (sizeof (da_ipsec_t) - DA_ICV_MAX_LEN + icv_len))) { ah0dbg(("ah_inbound_accelerated: " "ICV len (%u) incorrect or mblk too small (%u)\n", icv_len, (uint32_t)(MBLKL(hada_mp)))); @@ -4014,6 +4009,8 @@ ah_auth_in_done(mblk_t *ipsec_in) } mp->b_rptr -= ii->ipsec_in_skip_len; + ah_set_usetime(assoc, B_TRUE); + if (isv4) { ipha = (ipha_t *)mp->b_rptr; ah_offset = ipha->ipha_version_and_hdr_length - @@ -4237,6 +4234,9 @@ ah_auth_out_done(mblk_t *ipsec_out) } mp->b_rptr -= io->ipsec_out_skip_len; + ASSERT(io->ipsec_out_ah_sa != NULL); + ah_set_usetime(io->ipsec_out_ah_sa, B_FALSE); + if (isv4) { ipha_t *ipha; ipha_t *nipha; diff --git a/usr/src/uts/common/inet/ip/ipsecesp.c b/usr/src/uts/common/inet/ip/ipsecesp.c index 46fab6f792..1c98e2ca41 100644 --- a/usr/src/uts/common/inet/ip/ipsecesp.c +++ b/usr/src/uts/common/inet/ip/ipsecesp.c @@ -52,6 +52,7 @@ #include <inet/mi.h> #include <inet/nd.h> #include <inet/ip.h> +#include <inet/ip_impl.h> #include <inet/ip6.h> #include <inet/sadb.h> #include <inet/ipsec_info.h> @@ -90,6 +91,7 @@ static ipsecespparam_t lcl_param_arr[] = { { 0, 0xffffffffU, 0, "ipsecesp_default_hard_usetime"}, { 0, 1, 0, "ipsecesp_log_unknown_spi"}, { 0, 2, 1, "ipsecesp_padding_check"}, + { 0, 600, 20, "ipsecesp_nat_keepalive_interval"}, }; #define ipsecesp_debug ipsecesp_params[0].ipsecesp_param_value #define ipsecesp_age_interval ipsecesp_params[1].ipsecesp_param_value @@ -116,6 +118,7 @@ static ipsecespparam_t lcl_param_arr[] = { ipsecesp_params[12].ipsecesp_param_value #define ipsecesp_padding_check \ ipsecesp_params[13].ipsecesp_param_value +/* For ipsecesp_nat_keepalive_interval, see ipsecesp.h. */ #define esp0dbg(a) printf a /* NOTE: != 0 instead of > 0 so lint doesn't complain. */ @@ -131,6 +134,7 @@ static void *ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns); static void ipsecesp_stack_fini(netstackid_t stackid, void *arg); static void esp_send_acquire(ipsacq_t *, mblk_t *, netstack_t *); +static void esp_prepare_udp(netstack_t *, mblk_t *, ipha_t *); static ipsec_status_t esp_outbound_accelerated(mblk_t *, uint_t); static ipsec_status_t esp_inbound_accelerated(mblk_t *, mblk_t *, boolean_t, ipsa_t *); @@ -142,7 +146,6 @@ static boolean_t esp_strip_header(mblk_t *, boolean_t, uint32_t, static ipsec_status_t esp_submit_req_inbound(mblk_t *, ipsa_t *, uint_t); static ipsec_status_t esp_submit_req_outbound(mblk_t *, ipsa_t *, uchar_t *, uint_t); - /* Setable in /etc/system */ uint32_t esp_hash_size = IPSEC_DEFAULT_HASH_SIZE; @@ -774,7 +777,7 @@ esp_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound) &inassoc->ipsa_dstaddr)); } else { outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *) - &inassoc->ipsa_dstaddr)); + &inassoc->ipsa_dstaddr)); } bucket = &sp->sdb_of[outhash]; mutex_enter(&bucket->isaf_lock); @@ -938,7 +941,7 @@ esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen, SL_ERROR | SL_WARN, "Corrupt ESP packet (padlen too big).\n"); esp1dbg(espstack, ("padlen (%d) is greater than:\n", - padlen)); + padlen)); esp1dbg(espstack, ("pkt len(%d) - ip hdr - esp " "hdr - ivlen(%d) = %d.\n", ntohs(ipha->ipha_length), ivlen, @@ -987,13 +990,13 @@ esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen, SL_ERROR | SL_WARN, "Corrupt ESP packet (v6 padlen too big).\n"); esp1dbg(espstack, ("padlen (%d) is greater than:\n", - padlen)); - esp1dbg(espstack, ("pkt len(%u) - ip hdr - esp " - "hdr - ivlen(%d) = %u.\n", - (unsigned)(ntohs(ip6h->ip6_plen) - + sizeof (ip6_t)), ivlen, - (unsigned)(ntohs(ip6h->ip6_plen) - 2 - - sizeof (esph_t) - ivlen))); + padlen)); + esp1dbg(espstack, + ("pkt len(%u) - ip hdr - esp hdr - ivlen(%d) = " + "%u.\n", (unsigned)(ntohs(ip6h->ip6_plen) + + sizeof (ip6_t)), ivlen, + (unsigned)(ntohs(ip6h->ip6_plen) - 2 - + sizeof (esph_t) - ivlen))); *counter = DROPPER(ipss, ipds_esp_bad_padlen); return (B_FALSE); } @@ -1167,7 +1170,7 @@ esp_set_usetime(ipsa_t *assoc, boolean_t inbound) &inassoc->ipsa_dstaddr)); } else { outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *) - &inassoc->ipsa_dstaddr)); + &inassoc->ipsa_dstaddr)); } bucket = &sp->sdb_of[outhash]; mutex_enter(&bucket->isaf_lock); @@ -1232,9 +1235,6 @@ esp_inbound(mblk_t *ipsec_in_mp, void *arg) ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; ipsec_stack_t *ipss = ns->netstack_ipsec; - if (ipsa->ipsa_usetime == 0) - esp_set_usetime(ipsa, B_TRUE); - /* * We may wish to check replay in-range-only here as an optimization. * Include the reality check of ipsa->ipsa_replay > @@ -1549,7 +1549,7 @@ esp_getspi(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack) * to EEXIST. */ rc = sadb_insertassoc(newbie, inbound); - (void) drv_getparm(TIME, &newbie->ipsa_hardexpiretime); + newbie->ipsa_hardexpiretime = gethrestime_sec(); newbie->ipsa_hardexpiretime += espstack->ipsecesp_larval_timeout; } @@ -1681,9 +1681,9 @@ esp_in_done(mblk_t *ipsec_in_mp) if (assoc->ipsa_auth_alg == SADB_AALG_NONE) { /* encryption-only ESP */ espstart = ii->ipsec_in_crypto_data.cd_offset - - sizeof (esph_t) - assoc->ipsa_iv_len; + sizeof (esph_t) - assoc->ipsa_iv_len; processed_len = ii->ipsec_in_crypto_data.cd_length + - ivlen; + ivlen; } else { /* encryption with authentication */ espstart = ii->ipsec_in_crypto_dual_data.dd_offset1; @@ -1726,6 +1726,8 @@ esp_in_done(mblk_t *ipsec_in_mp) } } + esp_set_usetime(assoc, B_TRUE); + if (!esp_age_bytes(assoc, processed_len, B_TRUE)) { /* The ipsa has hit hard expiration, LOG and AUDIT. */ ipsec_assocfailure(info.mi_idnum, 0, 0, @@ -1744,7 +1746,7 @@ esp_in_done(mblk_t *ipsec_in_mp) */ if (esp_strip_header(data_mp, ii->ipsec_in_v4, ivlen, &counter, - espstack)) { + espstack)) { if (is_natt) return (esp_fix_natt_checksums(data_mp, assoc)); return (IPSEC_STATUS_SUCCESS); @@ -1845,12 +1847,12 @@ esp_kcf_callback(void *arg, int status) { mblk_t *ipsec_mp = (mblk_t *)arg; ipsec_in_t *ii = (ipsec_in_t *)ipsec_mp->b_rptr; + ipsec_out_t *io = (ipsec_out_t *)ipsec_mp->b_rptr; boolean_t is_inbound = (ii->ipsec_in_type == IPSEC_IN); netstackid_t stackid; netstack_t *ns, *ns_arg; ipsecesp_stack_t *espstack; ipsec_stack_t *ipss; - ipsec_out_t *io = (ipsec_out_t *)ii; ASSERT(ipsec_mp->b_cont != NULL); @@ -1893,6 +1895,11 @@ esp_kcf_callback(void *arg, int status) */ ipha_t *ipha = (ipha_t *)ipsec_mp->b_cont->b_rptr; + esp_set_usetime(io->ipsec_out_esp_sa, B_FALSE); + /* NAT-T packet. */ + if (ipha->ipha_protocol == IPPROTO_UDP) + esp_prepare_udp(ns, ipsec_mp->b_cont, ipha); + /* do AH processing if needed */ if (!esp_do_outbound_ah(ipsec_mp)) { netstack_rele(ns); @@ -2127,6 +2134,101 @@ esp_submit_req_inbound(mblk_t *ipsec_mp, ipsa_t *assoc, uint_t esph_offset) return (IPSEC_STATUS_FAILED); } +/* + * Compute the IP and UDP checksums -- common code for both keepalives and + * actual ESP-in-UDP packets. Be flexible with multiple mblks because ESP + * uses mblk-insertion to insert the UDP header. + * TODO - If there is an easy way to prep a packet for HW checksums, make + * it happen here. + */ +static void +esp_prepare_udp(netstack_t *ns, mblk_t *mp, ipha_t *ipha) +{ + int offset; + uint32_t cksum; + uint16_t *arr; + mblk_t *udpmp = mp; + + ASSERT(MBLKL(mp) >= sizeof (ipha_t)); + + ipha->ipha_hdr_checksum = 0; + ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); + + if (ns->netstack_udp->us_do_checksum) { + ASSERT(MBLKL(udpmp) >= sizeof (udpha_t)); + /* arr points to the IP header. */ + arr = (uint16_t *)ipha; + IP_STAT(ns->netstack_ip, ip_out_sw_cksum); + IP_STAT_UPDATE(ns->netstack_ip, ip_udp_out_sw_cksum_bytes, + ntohs(htons(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH)); + /* arr[6-9] are the IP addresses. */ + cksum = IP_UDP_CSUM_COMP + arr[6] + arr[7] + arr[8] + arr[9] + + ntohs(htons(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); + cksum = IP_CSUM(mp, IP_SIMPLE_HDR_LENGTH, cksum); + offset = IP_SIMPLE_HDR_LENGTH + UDP_CHECKSUM_OFFSET; + while (offset >= MBLKL(udpmp)) { + offset -= MBLKL(udpmp); + udpmp = udpmp->b_cont; + } + /* arr points to the UDP header's checksum field. */ + arr = (uint16_t *)(udpmp->b_rptr + offset); + *arr = cksum; + } +} + +/* + * Send a one-byte UDP NAT-T keepalive. Construct an IPSEC_OUT too that'll + * get fed into esp_send_udp/ip_wput_ipsec_out. + */ +void +ipsecesp_send_keepalive(ipsa_t *assoc) +{ + mblk_t *mp = NULL, *ipsec_mp = NULL; + ipha_t *ipha; + udpha_t *udpha; + ipsec_out_t *io; + + ASSERT(!MUTEX_HELD(&assoc->ipsa_lock)); + + mp = allocb(sizeof (ipha_t) + sizeof (udpha_t) + 1, BPRI_HI); + if (mp == NULL) + return; + ipha = (ipha_t *)mp->b_rptr; + ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION; + ipha->ipha_type_of_service = 0; + ipha->ipha_length = htons(sizeof (ipha_t) + sizeof (udpha_t) + 1); + /* Use the low-16 of the SPI so we have some clue where it came from. */ + ipha->ipha_ident = *(((uint16_t *)(&assoc->ipsa_spi)) + 1); + ipha->ipha_fragment_offset_and_flags = 0; /* Too small to fragment! */ + ipha->ipha_ttl = 0xFF; + ipha->ipha_protocol = IPPROTO_UDP; + ipha->ipha_hdr_checksum = 0; + ipha->ipha_src = assoc->ipsa_srcaddr[0]; + ipha->ipha_dst = assoc->ipsa_dstaddr[0]; + udpha = (udpha_t *)(ipha + 1); + udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ? + assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT); + udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ? + assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT); + udpha->uha_length = htons(sizeof (udpha_t) + 1); + udpha->uha_checksum = 0; + mp->b_wptr = (uint8_t *)(udpha + 1); + *(mp->b_wptr++) = 0xFF; + + ipsec_mp = ipsec_alloc_ipsec_out(assoc->ipsa_netstack); + if (ipsec_mp == NULL) { + freeb(mp); + return; + } + ipsec_mp->b_cont = mp; + io = (ipsec_out_t *)ipsec_mp->b_rptr; + io->ipsec_out_zoneid = + netstackid_to_zoneid(assoc->ipsa_netstack->netstack_stackid); + + esp_prepare_udp(assoc->ipsa_netstack, mp, ipha); + ip_wput_ipsec_out(NULL, ipsec_mp, ipha, NULL, NULL); +} + static ipsec_status_t esp_submit_req_outbound(mblk_t *ipsec_mp, ipsa_t *assoc, uchar_t *icv_buf, uint_t payload_len) @@ -2149,7 +2251,7 @@ esp_submit_req_outbound(mblk_t *ipsec_mp, ipsa_t *assoc, uchar_t *icv_buf, ipsec_stack_t *ipss = ns->netstack_ipsec; esp3dbg(espstack, ("esp_submit_req_outbound:%s", - is_natt ? "natt" : "not natt")); + is_natt ? "natt" : "not natt")); ASSERT(io->ipsec_out_type == IPSEC_OUT); @@ -2263,6 +2365,10 @@ esp_submit_req_outbound(mblk_t *ipsec_mp, ipsa_t *assoc, uchar_t *icv_buf, switch (kef_rc) { case CRYPTO_SUCCESS: ESP_BUMP_STAT(espstack, crypto_sync); + esp_set_usetime(assoc, B_FALSE); + if (is_natt) + esp_prepare_udp(ns, ipsec_mp->b_cont, + (ipha_t *)ipsec_mp->b_cont->b_rptr); return (IPSEC_STATUS_SUCCESS); case CRYPTO_QUEUED: /* esp_kcf_callback() will be invoked on completion */ @@ -2385,9 +2491,6 @@ esp_outbound(mblk_t *mp) assoc = io->ipsec_out_esp_sa; ASSERT(assoc != NULL); - if (assoc->ipsa_usetime == 0) - esp_set_usetime(assoc, B_FALSE); - if (assoc->ipsa_auth_alg != SADB_AALG_NONE) mac_len = assoc->ipsa_mac_len; @@ -2462,13 +2565,12 @@ esp_outbound(mblk_t *mp) esp3dbg(espstack, ("esp_outbound: NATT")); udpha = (udpha_t *)espmp->b_rptr; - udpha->uha_src_port = htons(IPPORT_IKE_NATT); - if (assoc->ipsa_remote_port != 0) - udpha->uha_dst_port = assoc->ipsa_remote_port; - else - udpha->uha_dst_port = htons(IPPORT_IKE_NATT); + udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ? + assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT); + udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ? + assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT); /* - * Set the checksum to 0, so that the ip_wput_ipsec_out() + * Set the checksum to 0, so that the esp_prepare_udp() call * can do the right thing. */ udpha->uha_checksum = 0; @@ -2880,17 +2982,15 @@ esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial, samsg->sadb_msg_pid = pid; if (sasupp_auth != NULL) { - sasupp_auth->sadb_supported_len = - SADB_8TO64(sizeof (*sasupp_auth) + - sizeof (*saalg) * current_aalgs); + sasupp_auth->sadb_supported_len = SADB_8TO64( + sizeof (*sasupp_auth) + sizeof (*saalg) * current_aalgs); sasupp_auth->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH; sasupp_auth->sadb_supported_reserved = 0; } if (sasupp_encr != NULL) { - sasupp_encr->sadb_supported_len = - SADB_8TO64(sizeof (*sasupp_encr) + - sizeof (*saalg) * current_ealgs); + sasupp_encr->sadb_supported_len = SADB_8TO64( + sizeof (*sasupp_encr) + sizeof (*saalg) * current_ealgs); sasupp_encr->sadb_supported_exttype = SADB_EXT_SUPPORTED_ENCRYPT; sasupp_encr->sadb_supported_reserved = 0; @@ -3053,7 +3153,7 @@ esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, */ if (acqrec->ipsacq_seq == samsg->sadb_msg_seq && IPSA_ARE_ADDR_EQUAL(dstaddr, - acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam)) + acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam)) break; mutex_exit(&acqrec->ipsacq_lock); } @@ -3100,7 +3200,7 @@ esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, if (rc == 0 && lpkt != NULL) { rc = !taskq_dispatch(esp_taskq, inbound_task, - (void *) lpkt, TQ_NOSLEEP); + (void *) lpkt, TQ_NOSLEEP); } if (rc != 0) { @@ -3238,7 +3338,7 @@ esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns) if (assoc->sadb_sa_flags & ~(SADB_SAFLAGS_NOREPLAY | SADB_X_SAFLAGS_NATT_LOC | SADB_X_SAFLAGS_NATT_REM | - SADB_X_SAFLAGS_TUNNEL)) { + SADB_X_SAFLAGS_TUNNEL)) { *diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS; return (EINVAL); } @@ -3364,7 +3464,7 @@ esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns) mutex_exit(&ipss->ipsec_alg_lock); return (esp_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi, - diagnostic, espstack)); + diagnostic, espstack)); } /* @@ -3387,10 +3487,9 @@ esp_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, sin = (struct sockaddr_in *)(dstext + 1); return (sadb_update_sa(mp, ksi, - (sin->sin_family == AF_INET6) ? &espstack->esp_sadb.s_v6 : - &espstack->esp_sadb.s_v4, - diagnostic, espstack->esp_pfkey_q, esp_add_sa, - espstack->ipsecesp_netstack)); + (sin->sin_family == AF_INET6) ? &espstack->esp_sadb.s_v6 : + &espstack->esp_sadb.s_v4, diagnostic, espstack->esp_pfkey_q, + esp_add_sa, espstack->ipsecesp_netstack)); } /* @@ -3424,7 +3523,7 @@ esp_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, } return (sadb_del_sa(mp, ksi, &espstack->esp_sadb, diagnostic, - espstack->esp_pfkey_q)); + espstack->esp_pfkey_q)); } /* @@ -3836,7 +3935,7 @@ esp_inbound_accelerated(mblk_t *ipsec_in, mblk_t *data_mp, boolean_t isv4, icv_len = hada->da_icv_len; if ((icv_len != assoc->ipsa_mac_len) || (icv_len > DA_ICV_MAX_LEN) || (MBLKL(hada_mp) < - (sizeof (da_ipsec_t) - DA_ICV_MAX_LEN + icv_len))) { + (sizeof (da_ipsec_t) - DA_ICV_MAX_LEN + icv_len))) { esp0dbg(("esp_inbound_accelerated: " "ICV len (%u) incorrect or mblk too small (%u)\n", icv_len, (uint32_t)(MBLKL(hada_mp)))); @@ -3892,7 +3991,7 @@ esp_inbound_accelerated(mblk_t *ipsec_in, mblk_t *data_mp, boolean_t isv4, } esp3dbg(espstack, ("esp_inbound_accelerated: ESP authentication " - "succeeded, checking replay\n")); + "succeeded, checking replay\n")); ipsec_in->b_cont = data_mp; @@ -3900,7 +3999,7 @@ esp_inbound_accelerated(mblk_t *ipsec_in, mblk_t *data_mp, boolean_t isv4, * Remove ESP header and padding from packet. */ if (!esp_strip_header(data_mp, ii->ipsec_in_v4, assoc->ipsa_iv_len, - &counter, espstack)) { + &counter, espstack)) { esp1dbg(espstack, ("esp_inbound_accelerated: " "esp_strip_header() failed\n")); goto esp_in_discard; diff --git a/usr/src/uts/common/inet/ip/nattymod.c b/usr/src/uts/common/inet/ip/nattymod.c deleted file mode 100644 index f8fb6bf453..0000000000 --- a/usr/src/uts/common/inet/ip/nattymod.c +++ /dev/null @@ -1,805 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * Required include files. - */ -#include <sys/types.h> -#include <sys/conf.h> -#include <sys/cred.h> -#include <sys/ddi.h> -#include <sys/modctl.h> -#include <sys/tihdr.h> -#include <sys/zone.h> -#include <sys/tpicommon.h> -#include <netinet/in.h> -#include <netinet/udp.h> -#include <sys/stropts.h> -#include <sys/strsun.h> -#include <inet/common.h> -#include <inet/led.h> -#include <inet/ip.h> -#include <inet/ip_if.h> -#include <net/pfkeyv2.h> -#include <inet/sadb.h> -#include <inet/ip_ire.h> -#include <sys/cmn_err.h> -#include <inet/udp_impl.h> -#include <inet/ipsec_impl.h> -#include <inet/ipdrop.h> -#include <inet/sadb.h> -#include <inet/ipsecesp.h> - -/* - * Design notes: - * - * - We assume that we're pushed on to a UDP instance that's bound to - * <addr>/4500. (This is done in in.iked.) - * - We assume that <addr> will not change on this instance. - * - With those two assumptions, we can make the following assertions: - * + We can cache not only the IRE, but also the address that we look - * up for the IRE. - * - We otherwise cache the ire in a manner similar to the conn_t structure - * in the main portions of TCP and IP. - */ - -/* Structures. */ -typedef struct nattyinfo -{ - struct nattyinfo **ni_ptpn; /* These two protected by nattyhlock. */ - struct nattyinfo *ni_next; - kmutex_t ni_lock; /* Lock for this instance. */ - ire_t *ni_ire; /* Cached ire for looping back packets. */ - queue_t *ni_fbqueue; /* Ire receive-from queue for feedback. */ - ipaddr_t ni_addr; /* Addr for ire re-lookups. */ - boolean_t ni_setup_done; /* done with setup */ - boolean_t ni_rh_wait; /* Seen UDP_RCVHDR request go by */ - boolean_t ni_rh_set; /* Have we set UDP_RCVHDR? */ - boolean_t ni_addr_wait; /* Seen T_ADDR_REQ go by */ - netstack_t *ni_netstack; -} nattyinfo_t; - -kmutex_t nattyhlock; /* List lock. */ -nattyinfo_t *nattyhead; /* List of instances. */ - - -/* - * Function prototypes. - */ - -static int nattymodopen(queue_t *, dev_t *, int, int, cred_t *); -static int nattymodclose(queue_t *); -static void natty_ka_timeout_callback(void *v_sa); -static void natty_rput(queue_t *q, mblk_t *mp); -static void natty_rput_other(queue_t *q, mblk_t *mp); -static void natty_rput_pkt(queue_t *q, mblk_t *mp); -static void natty_wput(queue_t *q, mblk_t *mp); - -/* - * Module linkage data - */ -static struct module_info nattymod_minfo = { - 1970, /* mi_idnum */ - "nattymod", /* mi_idname */ - 0, /* mi_minpsz */ - INFPSZ, /* mi_maxpsz */ - 0, /* mi_hiwat */ - 0 /* mi_lowat */ -}; - -static struct qinit nattymod_rinit = { - (int (*)())natty_rput, /* qi_putp */ - NULL, /* qi_srvp */ - nattymodopen, /* qi_qopen */ - nattymodclose, /* qi_qclose */ - NULL, /* qi_qadmin */ - &nattymod_minfo, /* qi_minfo */ -}; - -/* - * We don't worry much about the write-side here (except for the qtimeouts - * that send keepalives. Just putnext() and life is good. We only care about - * inbound packets. - */ -static struct qinit nattymod_winit = { - (int (*)())natty_wput, /* qi_putp */ - NULL, /* qi_srvp */ - NULL, /* qi_qopen */ - NULL, /* qi_qclose */ - NULL, /* qi_qadmin */ - &nattymod_minfo, /* qi_minfo */ -}; - -static struct streamtab nattymod_info = { - &nattymod_rinit, /* st_rdinit */ - &nattymod_winit, /* st_wrinit */ -}; - -static struct fmodsw fsw = { - "nattymod", - &nattymod_info, - D_MP | D_MTQPAIR | D_MTPUTSHARED -}; - -/* - * Module linkage information for the kernel. - */ -struct mod_ops mod_strmodops; - -static struct modlstrmod modlstrmod = { - &mod_strmodops, "Nat-t module ver %I%", &fsw -}; - -static struct modlinkage modlinkage = { - MODREV_1, (void *)&modlstrmod, NULL -}; - - -clock_t natty_ka_timeout = 0; - -/* - * Standard module entry points. - */ -int -_init(void) -{ - int error; - - natty_ka_timeout = drv_usectohz(20 * 1000000); - mutex_init(&nattyhlock, NULL, MUTEX_DEFAULT, NULL); - error = mod_install(&modlinkage); - if (error != 0) - mutex_destroy(&nattyhlock); - return (error); -} - -int -_fini(void) -{ - int error; - - error = mod_remove(&modlinkage); - - if (error == 0) - mutex_destroy(&nattyhlock); - - return (error); -} - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); -} - - - -/* ARGSUSED */ -static int -nattymodopen(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *credp) -{ - nattyinfo_t *ni; - netstack_t *ns; - - if (sflag != MODOPEN) - return (EINVAL); - - ns = netstack_find_by_cred(credp); - ASSERT(ns != NULL); - - /* Use kmem_zalloc() to avoid initializing ni->* fields. */ - ni = kmem_zalloc(sizeof (nattyinfo_t), KM_SLEEP); - mutex_init(&ni->ni_lock, NULL, MUTEX_DEFAULT, NULL); - - rq->q_ptr = ni; - WR(rq)->q_ptr = ni; - - /* Insert into list before packets are allowed to flow. */ - mutex_enter(&nattyhlock); - ni->ni_ptpn = &nattyhead; - if (nattyhead != NULL) - nattyhead->ni_ptpn = &ni->ni_next; - ni->ni_next = nattyhead; - nattyhead = ni; - ni->ni_netstack = ns; - mutex_exit(&nattyhlock); - - qprocson(rq); - - return (0); -} - -static int -nattymodclose(queue_t *rq) -{ - nattyinfo_t *ni = (nattyinfo_t *)rq->q_ptr; - - /* Unlink from list. */ - mutex_enter(&nattyhlock); - *(ni->ni_ptpn) = ni->ni_next; - if (ni->ni_next != NULL) - ni->ni_next->ni_ptpn = ni->ni_ptpn; - mutex_exit(&nattyhlock); - - sadb_clear_timeouts(WR(rq), ni->ni_netstack); - - netstack_rele(ni->ni_netstack); - qprocsoff(rq); - - /* Unlinked from list means ==> no need to mutex. */ - if (ni->ni_ire != NULL) { - IRE_REFRELE_NOTR(ni->ni_ire); - } - - mutex_destroy(&ni->ni_lock); - kmem_free(ni, sizeof (*ni)); - rq->q_ptr = NULL; - WR(rq)->q_ptr = NULL; - - - return (0); -} - -static ipaddr_t -addr_from_mblk(mblk_t *mp) -{ - struct T_addr_ack *taa; - sin_t *sin; - sin6_t *sin6; - ipaddr_t addr = (ipaddr_t)-1; - - taa = (struct T_addr_ack *)mp->b_rptr; - - if (MBLKL(mp) >= sizeof (*taa) && - mp->b_wptr >= (mp->b_rptr + taa->LOCADDR_offset + sizeof (*sin))) { - sin = (sin_t *)(mp->b_rptr + taa->LOCADDR_offset); - if (IS_P2ALIGNED(sin, sizeof (uint32_t))) { - if (sin->sin_family == AF_INET) { - addr = sin->sin_addr.s_addr; - } else if (sin->sin_family == AF_INET6 && - mp->b_wptr >= (mp->b_rptr + - taa->LOCADDR_offset + sizeof (*sin))) { - sin6 = (sin6_t *)sin; - ASSERT(sin6->sin6_family == AF_INET6); - if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) - IN6_V4MAPPED_TO_IPADDR( - &sin6->sin6_addr, addr); - } - } - } - - return (addr); -} - -static void -get_my_ire(nattyinfo_t *ni, ipaddr_t addr) -{ - ire_t *ire; - uint8_t *bytes; - boolean_t cached; - - mutex_enter(&ni->ni_lock); - if (ni->ni_ire != NULL) { - /* I lost the race. */ - mutex_exit(&ni->ni_lock); - return; - } - - if (addr == (ipaddr_t)0 || addr == (ipaddr_t)-1) - goto bail; - - ni->ni_addr = addr; - - ire = ire_ctable_lookup(addr, 0, IRE_LOCAL, NULL, ALL_ZONES, NULL, - MATCH_IRE_TYPE, ni->ni_netstack->netstack_ip); - if (ire == NULL) - goto bail; - - rw_enter(&ire->ire_bucket->irb_lock, RW_READER); - if (!(ire->ire_marks & IRE_MARK_CONDEMNED)) { - ni->ni_ire = ire; - ni->ni_fbqueue = ire->ire_rfq; - cached = B_TRUE; - } else { - cached = B_FALSE; - } - rw_exit(&ire->ire_bucket->irb_lock); - - if (!cached) { - ASSERT(ni->ni_ire == NULL); - ASSERT(ni->ni_fbqueue == NULL); - IRE_REFRELE(ire); - } else { - IRE_UNTRACE_REF(ire); - } - - mutex_exit(&ni->ni_lock); - return; -bail: - /* Error getting address or ire. Make nattyinfo null and void. */ - bytes = (uint8_t *)&addr; - cmn_err(CE_WARN, "Missing local IP address %u.%u.%u.%u", - bytes[0], bytes[1], bytes[2], bytes[3]); - ni->ni_fbqueue = NULL; - ni->ni_ire = NULL; - mutex_exit(&ni->ni_lock); -} - -/* rput */ -void -natty_rput(queue_t *q, mblk_t *mp) -{ - switch (mp->b_datap->db_type) { - case M_DATA: - /* Shouldn't see M_DATA. UDP converts these to M_PROTO. */ - freemsg(mp); - break; - case M_PROTO: - case M_PCPROTO: - /* strip ip header, process, pass up */ - natty_rput_other(q, mp); - return; - default: - putnext(q, mp); - return; - } - -} - -void -natty_rput_other(queue_t *q, mblk_t *mp) -{ - nattyinfo_t *ni = (nattyinfo_t *)q->q_ptr; - t_scalar_t t; - - /* proto or pcproto from UDP */ - - ASSERT(mp != NULL); - ASSERT(mp->b_datap->db_type == M_PROTO || - mp->b_datap->db_type == M_PCPROTO); - - t = *(t_scalar_t *)(mp->b_rptr); - - if (t == T_UNITDATA_IND) { - natty_rput_pkt(q, mp); - return; - } - - if (ni->ni_setup_done) { - putnext(q, mp); - return; - } - - switch (t) { - case T_OPTMGMT_ACK: - if (ni->ni_rh_wait && - (MBLKL(mp) >= sizeof (struct T_optmgmt_ack))) { - ni->ni_rh_set = B_TRUE; - ni->ni_rh_wait = B_FALSE; - } - break; - case T_ADDR_ACK: - if (ni->ni_addr_wait && ni->ni_ire == NULL) { - get_my_ire(ni, addr_from_mblk(mp)); - ni->ni_addr_wait = B_FALSE; - } - break; - } - if (ni->ni_rh_set && (ni->ni_ire != NULL)) - ni->ni_setup_done = B_TRUE; - putnext(q, mp); -} - -void -natty_rput_pkt(queue_t *q, mblk_t *mp) -{ - mblk_t *data_mp; - mblk_t *iph_mp; - mblk_t *tdi_mp; - uchar_t *rptr; - uchar_t *new_rptr; - ipha_t *iph; - int32_t pkt_len; - ipsa_t *ipsa; - int32_t hdr_length; - uint16_t tmp_len; - int ntries = 0; - nattyinfo_t *ni = q->q_ptr; - sadb_t *sp; - netstack_t *ns = ni->ni_netstack; - ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; - ipsec_stack_t *ipss = ns->netstack_ipsec; - - if (!ni->ni_rh_set) { -#ifdef DEBUG - cmn_err(CE_WARN, "natty_rput_pkt: not set up"); -#endif - /* not fully set up */ - freemsg(mp); - return; - } - - /* remember mp, may need it later */ - tdi_mp = mp; - data_mp = mp->b_cont; - if (data_mp == NULL) { - putnext(q, mp); - return; - } - - if (data_mp->b_cont != NULL) { - data_mp = msgpullup(data_mp, -1); - freemsg(mp->b_cont); - } - tdi_mp->b_cont = NULL; - - if (data_mp == NULL || - data_mp->b_wptr - data_mp->b_rptr < - sizeof (ipha_t) + sizeof (udpha_t) + 1) { - cmn_err(CE_WARN, (data_mp == NULL) ? "msgpullup() failure" : - "Short packet"); - freemsg(data_mp); - freemsg(tdi_mp); - return; - }; - - iph_mp = copyb(data_mp); - if (iph_mp == NULL) { - cmn_err(CE_WARN, "Low memory: copyb() failed."); - freemsg(data_mp); - freemsg(tdi_mp); - return; - } - - /* IP headers */ - rptr = iph_mp->b_rptr; - hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; - iph = (ipha_t *)rptr; - data_mp->b_rptr += hdr_length; - iph_mp->b_wptr = iph_mp->b_rptr + hdr_length; - - iph_mp->b_cont = data_mp; - new_rptr = data_mp->b_rptr; - - pkt_len = data_mp->b_wptr - data_mp->b_rptr; - if (pkt_len == 1) { - /* keep alive */ - freemsg(tdi_mp); - freemsg(iph_mp); - return; - } - - if (pkt_len > 3) { - isaf_t *bucket; - uint32_t spi; - - if (IS_P2ALIGNED(new_rptr, 4)) { - spi = *((uint32_t *)new_rptr); - } else { - spi = new_rptr[0] + (new_rptr[1] << 8) + - (new_rptr[2] << 16) + (new_rptr[3] << 24); - } - - if (spi == 0) { - /* - * it's ike over 4500 - * strip off marker and pass up - */ - data_mp->b_rptr += 4; - - iph_mp->b_cont = NULL; - freemsg(iph_mp); - - tdi_mp->b_cont = data_mp; - putnext(q, tdi_mp); - return; - } - - freemsg(tdi_mp); - - /* - * build new packet - * - * packet should be one mblk - * looks like [IP][UDP][ESP] - * via clever manipulation of mblk, becomes - * [IP][ESP] - */ - - - /* change fields */ - /* len, protocol, cksum */ - - tmp_len = ntohs(iph->ipha_length); - tmp_len -= UDPH_SIZE; - iph->ipha_length = htons(tmp_len); - - iph->ipha_protocol = IPPROTO_ESP; - - iph->ipha_hdr_checksum = 0; - iph->ipha_hdr_checksum = ip_csum_hdr(iph); - - iph_mp->b_wptr -= UDPH_SIZE; - - /* we are v4 only */ - sp = &espstack->esp_sadb.s_v4; - bucket = INBOUND_BUCKET(sp, spi); - - mutex_enter(&bucket->isaf_lock); - ipsa = ipsec_getassocbyspi(bucket, spi, - (uint32_t *)&(iph->ipha_src), (uint32_t *)&(iph->ipha_dst), - AF_INET); - mutex_exit(&bucket->isaf_lock); - - if (ipsa == NULL || ipsa->ipsa_state == IPSA_STATE_DEAD || - (!(ipsa->ipsa_flags & IPSA_F_NATT) && - ipsa->ipsa_state != IPSA_STATE_LARVAL)) { - /* no associated sa error */ - - if (ipsa != NULL) { - /* - * While we give LARVALs the benefit of the - * doubt, full SAs that aren't NAT-T shouldn't - * be dealing with inbound NAT-T traffic. - */ - if (!(ipsa->ipsa_flags & IPSA_F_NATT)) { - cmn_err(CE_WARN, "UDP-ESP arrived for " - "non-NAT SA, spi 0x%x", - htonl(ipsa->ipsa_spi)); - } - IPSA_REFRELE(ipsa); - } - - /* Handle the kstat_create in ip_drop_init() failing */ - ip_drop_packet(iph_mp, B_TRUE, NULL, NULL, - DROPPER(ipss, ipds_esp_no_sa), - &ipss->ipsec_dropper); - return; - } - - mutex_enter(&ipsa->ipsa_lock); - if (ipsa->ipsa_natt_ka_timer == 0) { - ASSERT(ipsa->ipsa_natt_q == NULL || - ipsa->ipsa_natt_q == WR(q)); - ipsa->ipsa_natt_q = WR(q); - - ipsa->ipsa_natt_ka_timer = qtimeout(ipsa->ipsa_natt_q, - natty_ka_timeout_callback, ipsa, natty_ka_timeout); - } - mutex_exit(&ipsa->ipsa_lock); - - IPSA_REFRELE(ipsa); - - iph_mp->b_datap->db_type = M_DATA; - - /* - * If the cached ire is useless, try up to IRE_RETRIES number - * of times to get a new one. - */ -#define IRE_RETRIES 2 - do { - mutex_enter(&ni->ni_lock); - ASSERT(ni->ni_ire == NULL || - ni->ni_ire->ire_rfq == ni->ni_fbqueue); - if (ni->ni_ire != NULL && - !(ni->ni_ire->ire_marks & IRE_MARK_CONDEMNED)) { - IRE_REFHOLD(ni->ni_ire); - mutex_exit(&ni->ni_lock); - put(ni->ni_fbqueue, iph_mp); - IRE_REFRELE(ni->ni_ire); - return; - } else if (ntries < IRE_RETRIES) { - ire_t *ire; - - ntries++; - ire = ni->ni_ire; - ni->ni_ire = NULL; - ni->ni_fbqueue = NULL; - mutex_exit(&ni->ni_lock); - if (ire != NULL) - IRE_REFRELE_NOTR(ire); - get_my_ire(ni, ni->ni_addr); - } else { - mutex_exit(&ni->ni_lock); - } - } while (ntries < IRE_RETRIES); - } else { - freemsg(tdi_mp); - } - - /* bad pkt */ - freemsg(iph_mp); -} - -static void -natty_wput(queue_t *q, mblk_t *mp) -{ - nattyinfo_t *ni = q->q_ptr; - struct T_optmgmt_req *rp; - struct opthdr *ohp; - - if (ni->ni_setup_done) { - putnext(q, mp); - return; - } - - if (mp->b_datap->db_type != M_PROTO && - mp->b_datap->db_type != M_PCPROTO) { - putnext(q, mp); - return; - } - - if (MBLKL(mp) < sizeof (int)) { - putnext(q, mp); - return; - } - - switch (*(t_scalar_t *)(mp->b_rptr)) { - case T_SVR4_OPTMGMT_REQ: - /* - * Expect a T_optmgmt_req followed by an opthdr - * followed by an int (with the option value of interest). - * If the one request we're snooping for wouldn't fit, - * don't bother looking further. - */ - if (MBLKL(mp) < (sizeof (*rp) + sizeof (*ohp) + sizeof (int))) { - putnext(q, mp); - return; - } - - rp = (struct T_optmgmt_req *)mp->b_rptr; - - if ((rp->OPT_length >= sizeof (struct opthdr) + sizeof (int)) && - (rp->OPT_offset == sizeof (struct T_optmgmt_req)) && - (rp->MGMT_flags == T_NEGOTIATE)) { - ohp = (struct opthdr *)(rp + 1); - if ((ohp->level == IPPROTO_UDP) && - (ohp->name == UDP_RCVHDR) && - (ohp->len == sizeof (int))) { - if (((int *)(ohp + 1)) != 0) { - ni->ni_rh_wait = B_TRUE; - } - } - } - break; - - case T_ADDR_REQ: - ni->ni_addr_wait = B_TRUE; - break; - } - - putnext(q, mp); -} - -static void -natty_ka_timeout_callback(void *v_sa) -{ - ipsa_t *ipsa = (ipsa_t *)v_sa; - mblk_t *mp; - struct T_unitdata_req *tudr; - sin_t *sin; - boolean_t set_new_timeout = B_FALSE; - queue_t *q; - - ASSERT(ipsa != NULL); - - if (ipsa->ipsa_state == IPSA_STATE_DEAD) { - /* clear out timer and return */ - goto ntbail; - } - - /* - * build packet - * - * [tudr][addr] + [1 byte of data (0xff)] - */ - - mp = allocb(sizeof (*tudr) + sizeof (*sin), BPRI_HI); - - if (mp == NULL) { - /* natt timeouts are the least of our worries */ - goto ntbail; - } - - set_new_timeout = B_TRUE; - - mp->b_datap->db_type = M_PROTO; - mp->b_wptr = mp->b_rptr + sizeof (*tudr) + sizeof (*sin); - - tudr = (struct T_unitdata_req *)mp->b_rptr; - - tudr->PRIM_type = T_UNITDATA_REQ; - tudr->DEST_length = sizeof (*sin); - tudr->DEST_offset = sizeof (*tudr); - tudr->OPT_length = 0; - tudr->OPT_offset = 0; - sin = (sin_t *)(mp->b_rptr + sizeof (*tudr)); - if (ipsa->ipsa_remote_port != 0) - sin->sin_port = ipsa->ipsa_remote_port; - else - sin->sin_port = htons(IPPORT_IKE_NATT); - sin->sin_family = AF_INET; - - if (ipsa->ipsa_addrfam == AF_INET6) - sin->sin_addr.s_addr = ipsa->ipsa_srcaddr[3]; - else - sin->sin_addr.s_addr = ipsa->ipsa_srcaddr[0]; - - mp->b_cont = allocb(1, BPRI_HI); - - if (mp->b_cont == NULL) { - set_new_timeout = B_FALSE; - freeb(mp); - goto ntbail; - } - - *(mp->b_cont->b_rptr) = 0xFF; - mp->b_cont->b_wptr = mp->b_cont->b_rptr + 1; - -ntbail: - mutex_enter(&ipsa->ipsa_lock); - if (set_new_timeout && ipsa->ipsa_natt_ka_timer != 0) { - ipsa->ipsa_natt_ka_timer = qtimeout(ipsa->ipsa_natt_q, - natty_ka_timeout_callback, ipsa, natty_ka_timeout); - } else { - ipsa->ipsa_natt_ka_timer = 0; - ipsa->ipsa_natt_q = NULL; - } - q = ipsa->ipsa_natt_q; - mutex_exit(&ipsa->ipsa_lock); - - if (q != NULL) - putnext(q, mp); -} - -/* - * Called from ipif_down(), if this module's loaded (or it hits a modstub if - * not). Check all nattyinfos for the ipif pointer. - */ -void -nattymod_clean_ipif(ipif_t *ipif) -{ - nattyinfo_t *walker; - ire_t *ire; - - ASSERT(ipif != NULL); - - mutex_enter(&nattyhlock); - for (walker = nattyhead; walker != NULL; walker = walker->ni_next) { - mutex_enter(&walker->ni_lock); - ire = walker->ni_ire; - if (ire != NULL && (ipif == NULL || ire->ire_ipif == ipif)) { - walker->ni_ire = NULL; - walker->ni_fbqueue = NULL; - mutex_exit(&walker->ni_lock); - IRE_REFRELE_NOTR(ire); - continue; - } - mutex_exit(&walker->ni_lock); - } - mutex_exit(&nattyhlock); -} diff --git a/usr/src/uts/common/inet/ip/sadb.c b/usr/src/uts/common/inet/ip/sadb.c index 51e6be5278..2af693d1d0 100644 --- a/usr/src/uts/common/inet/ip/sadb.c +++ b/usr/src/uts/common/inet/ip/sadb.c @@ -175,6 +175,10 @@ sadb_add_time(time_t base, uint64_t delta) * * Return 0 if success, EEXIST if collision. */ +#define SA_UNIQUE_MATCH(sa1, sa2) \ + (((sa1)->ipsa_unique_id & (sa1)->ipsa_unique_mask) == \ + ((sa2)->ipsa_unique_id & (sa2)->ipsa_unique_mask)) + int sadb_insertassoc(ipsa_t *ipsa, isaf_t *bucket) { @@ -207,10 +211,7 @@ sadb_insertassoc(ipsa_t *ipsa, isaf_t *bucket) mutex_enter(&walker->ipsa_lock); if (ipsa->ipsa_state == IPSA_STATE_MATURE && (walker->ipsa_flags & IPSA_F_USED) && - ((walker->ipsa_unique_id & - walker->ipsa_unique_mask) == - (ipsa->ipsa_unique_id & - ipsa->ipsa_unique_mask))) { + SA_UNIQUE_MATCH(walker, ipsa)) { walker->ipsa_flags |= IPSA_F_CINVALID; } mutex_exit(&walker->ipsa_lock); @@ -238,6 +239,7 @@ sadb_insertassoc(ipsa_t *ipsa, isaf_t *bucket) return (0); } +#undef SA_UNIQUE_MATCH /* * Free a security association. Its reference count is 0, which means @@ -260,10 +262,6 @@ sadb_freeassoc(ipsa_t *ipsa) &ipss->ipsec_sadb_dropper); mutex_enter(&ipsa->ipsa_lock); - - if (ipsa->ipsa_natt_ka_timer != 0) - (void) quntimeout(ipsa->ipsa_natt_q, ipsa->ipsa_natt_ka_timer); - ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_AUTH); ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_ENCR); mutex_exit(&ipsa->ipsa_lock); @@ -1370,9 +1368,9 @@ sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg) */ alloclen += addrsize * 2; if (ipsa->ipsa_flags & IPSA_F_NATT_REM) - alloclen += addrsize; + alloclen += addrsize; if (ipsa->ipsa_flags & IPSA_F_NATT_LOC) - alloclen += addrsize; + alloclen += addrsize; /* How 'bout other lifetimes? */ @@ -1503,7 +1501,8 @@ sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg) lt = (sadb_lifetime_t *)(assoc + 1); lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt)); lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT; - lt->sadb_lifetime_allocations = ipsa->ipsa_alloc; + /* We do not support the concept. */ + lt->sadb_lifetime_allocations = 0; lt->sadb_lifetime_bytes = ipsa->ipsa_bytes; lt->sadb_lifetime_addtime = ipsa->ipsa_addtime; lt->sadb_lifetime_usetime = ipsa->ipsa_usetime; @@ -1551,7 +1550,8 @@ sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg) if (ipsa->ipsa_flags & IPSA_F_NATT_LOC) { cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_LOC, - fam, ipsa->ipsa_natt_addr_loc, 0, 0, 0); + fam, &ipsa->ipsa_natt_addr_loc, ipsa->ipsa_local_nat_port, + IPPROTO_UDP, 0); if (cur == NULL) { freemsg(mp); mp = NULL; @@ -1561,7 +1561,7 @@ sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg) if (ipsa->ipsa_flags & IPSA_F_NATT_REM) { cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_REM, - fam, ipsa->ipsa_natt_addr_rem, ipsa->ipsa_remote_port, + fam, &ipsa->ipsa_natt_addr_rem, ipsa->ipsa_remote_nat_port, IPPROTO_UDP, 0); if (cur == NULL) { freemsg(mp); @@ -1727,13 +1727,13 @@ sadb_strip(sadb_msg_t *samsg) */ copylen = ((uint8_t *)ext) - (target + SADB_64TO8( - ((sadb_ext_t *)target)->sadb_ext_len)); + ((sadb_ext_t *)target)->sadb_ext_len)); ovbcopy(((uint8_t *)ext - copylen), target, copylen); target += copylen; ((sadb_ext_t *)target)->sadb_ext_len = SADB_8TO64(((uint8_t *)ext) - target + - SADB_64TO8(ext->sadb_ext_len)); + SADB_64TO8(ext->sadb_ext_len)); } else { target = (uint8_t *)ext; } @@ -2067,7 +2067,7 @@ bail: * question is out of range. */ if (ip_plen_to_mask_v6(addr->sadb_address_prefixlen, - &mask) == NULL) + &mask) == NULL) goto bail; sin6->sin6_addr.s6_addr32[0] &= mask.s6_addr32[0]; sin6->sin6_addr.s6_addr32[1] &= mask.s6_addr32[1]; @@ -2186,16 +2186,14 @@ sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp, netstack_t *ns) extv[SADB_X_EXT_ADDRESS_NATT_LOC], ksi->ks_in_serial, ns); /* - * NATT addresses never use an IRE_LOCAL, so it should - * always be NOTME, or UNSPEC if it's a tunnel-mode SA. + * Local NAT-T addresses never use an IRE_LOCAL, so it should + * always be NOTME, or UNSPEC (to handle both tunnel mode + * AND local-port flexibility). */ - if (rc != KS_IN_ADDR_NOTME && - !(extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL && - rc == KS_IN_ADDR_UNSPEC)) { - if (rc != KS_IN_ADDR_UNKNOWN) - sadb_pfkey_error(pfkey_q, mp, EINVAL, - SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC, - ksi->ks_in_serial); + if (rc != KS_IN_ADDR_NOTME && rc != KS_IN_ADDR_UNSPEC) { + sadb_pfkey_error(pfkey_q, mp, EINVAL, + SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC, + ksi->ks_in_serial); return (B_FALSE); } src = (struct sockaddr_in *) @@ -2213,16 +2211,15 @@ sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp, netstack_t *ns) extv[SADB_X_EXT_ADDRESS_NATT_REM], ksi->ks_in_serial, ns); /* - * NATT addresses never use an IRE_LOCAL, so it should + * Remote NAT-T addresses never use an IRE_LOCAL, so it should * always be NOTME, or UNSPEC if it's a tunnel-mode SA. */ if (rc != KS_IN_ADDR_NOTME && !(extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL && - rc == KS_IN_ADDR_UNSPEC)) { - if (rc != KS_IN_ADDR_UNKNOWN) - sadb_pfkey_error(pfkey_q, mp, EINVAL, - SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM, - ksi->ks_in_serial); + rc == KS_IN_ADDR_UNSPEC)) { + sadb_pfkey_error(pfkey_q, mp, EINVAL, + SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM, + ksi->ks_in_serial); return (B_FALSE); } src = (struct sockaddr_in *) @@ -2253,10 +2250,10 @@ sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp, netstack_t *ns) isrc = (struct sockaddr_in *) (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC]) + - 1); + 1); idst = (struct sockaddr_in6 *) (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST]) + - 1); + 1); if (isrc->sin_family != idst->sin6_family) { sadb_pfkey_error(pfkey_q, mp, EINVAL, SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH, @@ -2341,7 +2338,7 @@ sadb_addrset(ire_t *ire) if ((ire->ire_type & IRE_BROADCAST) || (ire->ire_ipversion == IPV4_VERSION && CLASSD(ire->ire_addr)) || (ire->ire_ipversion == IPV6_VERSION && - IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6)))) + IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6)))) return (KS_IN_ADDR_MBCAST); if (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) return (KS_IN_ADDR_ME); @@ -2381,17 +2378,15 @@ sadb_purge_cb(isaf_t *head, ipsa_t *entry, void *cookie) if ((entry->ipsa_state == IPSA_STATE_LARVAL) || (ps->src != NULL && - !IPSA_ARE_ADDR_EQUAL(entry->ipsa_srcaddr, ps->src, ps->af)) || + !IPSA_ARE_ADDR_EQUAL(entry->ipsa_srcaddr, ps->src, ps->af)) || (ps->dst != NULL && - !IPSA_ARE_ADDR_EQUAL(entry->ipsa_dstaddr, ps->dst, ps->af)) || - (ps->didstr != NULL && - (entry->ipsa_dst_cid != NULL) && - !(ps->didtype == entry->ipsa_dst_cid->ipsid_type && - strcmp(ps->didstr, entry->ipsa_dst_cid->ipsid_cid) == 0)) || - (ps->sidstr != NULL && - (entry->ipsa_src_cid != NULL) && - !(ps->sidtype == entry->ipsa_src_cid->ipsid_type && - strcmp(ps->sidstr, entry->ipsa_src_cid->ipsid_cid) == 0)) || + !IPSA_ARE_ADDR_EQUAL(entry->ipsa_dstaddr, ps->dst, ps->af)) || + (ps->didstr != NULL && (entry->ipsa_dst_cid != NULL) && + !(ps->didtype == entry->ipsa_dst_cid->ipsid_type && + strcmp(ps->didstr, entry->ipsa_dst_cid->ipsid_cid) == 0)) || + (ps->sidstr != NULL && (entry->ipsa_src_cid != NULL) && + !(ps->sidtype == entry->ipsa_src_cid->ipsid_type && + strcmp(ps->sidstr, entry->ipsa_src_cid->ipsid_cid) == 0)) || (ps->kmproto <= SADB_X_KMP_MAX && ps->kmproto != entry->ipsa_kmp)) { mutex_exit(&entry->ipsa_lock); return; @@ -2691,7 +2686,6 @@ sadb_nat_calculations(ipsa_t *newbie, sadb_address_t *natt_loc_ext, #define DOWN_SUM(x) (x) = ((x) & 0xFFFF) + ((x) >> 16) - if (natt_rem_ext != NULL) { uint32_t l_src; uint32_t l_rem; @@ -2702,12 +2696,12 @@ sadb_nat_calculations(ipsa_t *newbie, sadb_address_t *natt_loc_ext, ASSERT(natt_rem->sin_family == AF_INET); natt_rem_ptr = (uint32_t *)(&natt_rem->sin_addr); - newbie->ipsa_remote_port = natt_rem->sin_port; + newbie->ipsa_remote_nat_port = natt_rem->sin_port; l_src = *src_addr_ptr; l_rem = *natt_rem_ptr; /* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */ - newbie->ipsa_natt_addr_rem[0] = *natt_rem_ptr; + newbie->ipsa_natt_addr_rem = *natt_rem_ptr; l_src = ntohl(l_src); DOWN_SUM(l_src); @@ -2730,39 +2724,41 @@ sadb_nat_calculations(ipsa_t *newbie, sadb_address_t *natt_loc_ext, } if (natt_loc_ext != NULL) { - uint32_t l_dst; - uint32_t l_loc; - natt_loc = (struct sockaddr_in *)(natt_loc_ext + 1); /* Ensured by sadb_addrfix(). */ ASSERT(natt_loc->sin_family == AF_INET); - natt_loc_ptr = (uint32_t *)&natt_loc->sin_addr; - /* TODO - future port flexibility beyond 4500. */ - l_dst = *dst_addr_ptr; - l_loc = *natt_loc_ptr; + natt_loc_ptr = (uint32_t *)(&natt_loc->sin_addr); + newbie->ipsa_local_nat_port = natt_loc->sin_port; /* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */ - newbie->ipsa_natt_addr_loc[0] = *natt_loc_ptr; - - l_loc = ntohl(l_loc); - DOWN_SUM(l_loc); - DOWN_SUM(l_loc); - l_dst = ntohl(l_dst); - DOWN_SUM(l_dst); - DOWN_SUM(l_dst); + newbie->ipsa_natt_addr_loc = *natt_loc_ptr; /* - * We're 1's complement for checksums, so check for wraparound - * here. + * NAT-T port agility means we may have natt_loc_ext, but + * only for a local-port change. */ - if (l_loc > l_dst) - l_dst--; + if (natt_loc->sin_addr.s_addr != INADDR_ANY) { + uint32_t l_dst = ntohl(*dst_addr_ptr); + uint32_t l_loc = ntohl(*natt_loc_ptr); - running_sum += l_dst - l_loc; - DOWN_SUM(running_sum); - DOWN_SUM(running_sum); + DOWN_SUM(l_loc); + DOWN_SUM(l_loc); + DOWN_SUM(l_dst); + DOWN_SUM(l_dst); + + /* + * We're 1's complement for checksums, so check for + * wraparound here. + */ + if (l_loc > l_dst) + l_dst--; + + running_sum += l_dst - l_loc; + DOWN_SUM(running_sum); + DOWN_SUM(running_sum); + } } newbie->ipsa_inbound_cksum = running_sum; @@ -2948,11 +2944,11 @@ sadb_common_add(queue_t *ip_q, queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg, ASSERT((newbie->ipsa_flags & IPSA_F_UNIQUE) == newbie->ipsa_flags); newbie->ipsa_flags |= assoc->sadb_sa_flags; if ((newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_LOC && - ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC] == NULL) || + ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC] == NULL) || (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_REM && - ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM] == NULL) || + ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM] == NULL) || (newbie->ipsa_flags & SADB_X_SAFLAGS_TUNNEL && - ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL)) { + ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL)) { mutex_exit(&newbie->ipsa_lock); *diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS; error = EINVAL; @@ -2969,7 +2965,7 @@ sadb_common_add(queue_t *ip_q, queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg, else newbie->ipsa_replay_wsize = 0; - (void) drv_getparm(TIME, &newbie->ipsa_addtime); + newbie->ipsa_addtime = gethrestime_sec(); if (kmcext != NULL) { newbie->ipsa_kmp = kmcext->sadb_x_kmc_proto; @@ -3302,7 +3298,10 @@ error: void sadb_set_usetime(ipsa_t *assoc) { + time_t snapshot = gethrestime_sec(); + mutex_enter(&assoc->ipsa_lock); + assoc->ipsa_lastuse = snapshot; /* * Caller does check usetime before calling me usually, and * double-checking is better than a mutex_enter/exit hit. @@ -3314,8 +3313,7 @@ sadb_set_usetime(ipsa_t *assoc) * Inbound SAs, however, have no such protection. */ assoc->ipsa_flags |= IPSA_F_USED; - - (void) drv_getparm(TIME, &assoc->ipsa_usetime); + assoc->ipsa_usetime = snapshot; /* * After setting the use time, see if we have a use lifetime @@ -3430,7 +3428,8 @@ sadb_expire_assoc(queue_t *pfkey_q, ipsa_t *assoc) mp->b_wptr += sizeof (sadb_lifetime_t); current->sadb_lifetime_len = SADB_8TO64(sizeof (*current)); current->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT; - current->sadb_lifetime_allocations = assoc->ipsa_alloc; + /* We do not support the concept. */ + current->sadb_lifetime_allocations = 0; current->sadb_lifetime_bytes = assoc->ipsa_bytes; current->sadb_lifetime_addtime = assoc->ipsa_addtime; current->sadb_lifetime_usetime = assoc->ipsa_usetime; @@ -3591,14 +3590,43 @@ sadb_torch_assoc(isaf_t *head, ipsa_t *sa, boolean_t inbnd, mblk_t **mq) } /* + * Do various SA-is-idle activities depending on delta (the number of idle + * seconds on the SA) and/or other properties of the SA. + * + * Return B_TRUE if I've sent a packet, because I have to drop the + * association's mutex before sending a packet out the wire. + */ +/* ARGSUSED */ +static boolean_t +sadb_idle_activities(ipsa_t *assoc, time_t delta, boolean_t inbound) +{ + ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp; + int nat_t_interval = espstack->ipsecesp_nat_keepalive_interval; + + ASSERT(MUTEX_HELD(&assoc->ipsa_lock)); + + if (!inbound && (assoc->ipsa_flags & IPSA_F_NATT_LOC) && + delta >= nat_t_interval && + gethrestime_sec() - assoc->ipsa_last_nat_t_ka >= nat_t_interval) { + ASSERT(assoc->ipsa_type == SADB_SATYPE_ESP); + assoc->ipsa_last_nat_t_ka = gethrestime_sec(); + mutex_exit(&assoc->ipsa_lock); + ipsecesp_send_keepalive(assoc); + return (B_TRUE); + } + return (B_FALSE); +} + +/* * Return "assoc" iff haspeer is true and I send an expire. This allows * the consumers' aging functions to tidy up an expired SA's peer. */ static ipsa_t * sadb_age_assoc(isaf_t *head, queue_t *pfkey_q, ipsa_t *assoc, - time_t current, int reap_delay, boolean_t inbnd, mblk_t **mq) + time_t current, int reap_delay, boolean_t inbound, mblk_t **mq) { ipsa_t *retval = NULL; + boolean_t dropped_mutex = B_FALSE; ASSERT(MUTEX_HELD(&head->isaf_lock)); @@ -3607,7 +3635,7 @@ sadb_age_assoc(isaf_t *head, queue_t *pfkey_q, ipsa_t *assoc, if ((assoc->ipsa_state == IPSA_STATE_LARVAL) && (assoc->ipsa_hardexpiretime <= current)) { assoc->ipsa_state = IPSA_STATE_DEAD; - return (sadb_torch_assoc(head, assoc, inbnd, mq)); + return (sadb_torch_assoc(head, assoc, inbound, mq)); } /* @@ -3621,7 +3649,7 @@ sadb_age_assoc(isaf_t *head, queue_t *pfkey_q, ipsa_t *assoc, if (assoc->ipsa_hardexpiretime != 0 && assoc->ipsa_hardexpiretime <= current) { if (assoc->ipsa_state == IPSA_STATE_DEAD) - return (sadb_torch_assoc(head, assoc, inbnd, mq)); + return (sadb_torch_assoc(head, assoc, inbound, mq)); /* * Send SADB_EXPIRE with hard lifetime, delay for unlinking. @@ -3656,9 +3684,14 @@ sadb_age_assoc(isaf_t *head, queue_t *pfkey_q, ipsa_t *assoc, retval = assoc; } sadb_expire_assoc(pfkey_q, assoc); + } else { + /* Check idle time activities. */ + dropped_mutex = sadb_idle_activities(assoc, + current - assoc->ipsa_lastuse, inbound); } - mutex_exit(&assoc->ipsa_lock); + if (!dropped_mutex) + mutex_exit(&assoc->ipsa_lock); return (retval); } @@ -3679,7 +3712,8 @@ sadb_ager(sadb_t *sp, queue_t *pfkey_q, queue_t *ip_q, int reap_delay, ipsa_t *ipsa; struct templist *next; } *haspeerlist = NULL, *newbie; - time_t current; + /* Snapshot current time now. */ + time_t current = gethrestime_sec(); int outhash; mblk_t *mq = NULL; @@ -3690,9 +3724,6 @@ sadb_ager(sadb_t *sp, queue_t *pfkey_q, queue_t *ip_q, int reap_delay, * I hope I don't tie up resources for too long. */ - /* Snapshot current time now. */ - (void) drv_getparm(TIME, ¤t); - /* Age acquires. */ for (i = 0; i < sp->sdb_hashsize; i++) { @@ -3877,7 +3908,7 @@ sadb_retimeout(hrtime_t begin, queue_t *pfkey_q, void (*ager)(void *), interval = min(interval, intmax); } } else if ((end - begin) <= interval * 500000 && - interval > SADB_AGE_INTERVAL_DEFAULT) { + interval > SADB_AGE_INTERVAL_DEFAULT) { /* * If I took less than half of the interval, then I should * ratchet the interval back down. Never automatically @@ -3892,7 +3923,7 @@ sadb_retimeout(hrtime_t begin, queue_t *pfkey_q, void (*ager)(void *), } *intp = interval; return (qtimeout(pfkey_q, ager, agerarg, - interval * drv_usectohz(1000))); + interval * drv_usectohz(1000))); } @@ -3929,8 +3960,8 @@ sadb_update_lifetimes(ipsa_t *assoc, sadb_lifetime_t *hard, if (assoc->ipsa_hardexpiretime != 0) { assoc->ipsa_hardexpiretime = min(assoc->ipsa_hardexpiretime, - assoc->ipsa_usetime + - assoc->ipsa_harduselt); + assoc->ipsa_usetime + + assoc->ipsa_harduselt); } else { assoc->ipsa_hardexpiretime = assoc->ipsa_usetime + assoc->ipsa_harduselt; @@ -3956,8 +3987,8 @@ sadb_update_lifetimes(ipsa_t *assoc, sadb_lifetime_t *hard, if (assoc->ipsa_softexpiretime != 0) { assoc->ipsa_softexpiretime = min(assoc->ipsa_softexpiretime, - assoc->ipsa_usetime + - assoc->ipsa_softuselt); + assoc->ipsa_usetime + + assoc->ipsa_softuselt); } else { assoc->ipsa_softexpiretime = assoc->ipsa_usetime + assoc->ipsa_softuselt; @@ -4078,7 +4109,7 @@ sadb_update_sa(mblk_t *mp, keysock_in_t *ksi, goto bail; } if (assoc->sadb_sa_flags & ~(SADB_SAFLAGS_NOREPLAY | - SADB_X_SAFLAGS_NATT_LOC | SADB_X_SAFLAGS_NATT_REM)) { + SADB_X_SAFLAGS_NATT_LOC | SADB_X_SAFLAGS_NATT_REM)) { *diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS; error = EINVAL; goto bail; @@ -4110,14 +4141,14 @@ sadb_update_sa(mblk_t *mp, keysock_in_t *ksi, } if ((kmp != 0) && ((outbound_target->ipsa_kmp != 0) || - (outbound_target->ipsa_kmp != kmp))) { + (outbound_target->ipsa_kmp != kmp))) { *diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP; error = EINVAL; goto bail; } if ((kmc != 0) && ((outbound_target->ipsa_kmc != 0) || - (outbound_target->ipsa_kmc != kmc))) { + (outbound_target->ipsa_kmc != kmc))) { *diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC; error = EINVAL; goto bail; @@ -4131,14 +4162,14 @@ sadb_update_sa(mblk_t *mp, keysock_in_t *ksi, } if ((kmp != 0) && ((inbound_target->ipsa_kmp != 0) || - (inbound_target->ipsa_kmp != kmp))) { + (inbound_target->ipsa_kmp != kmp))) { *diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP; error = EINVAL; goto bail; } if ((kmc != 0) && ((inbound_target->ipsa_kmc != 0) || - (inbound_target->ipsa_kmc != kmc))) { + (inbound_target->ipsa_kmc != kmc))) { *diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC; error = EINVAL; goto bail; @@ -4219,9 +4250,9 @@ sadb_checkacquire(iacqf_t *bucket, ipsec_action_t *ap, ipsec_policy_t *pp, if (IPSA_ARE_ADDR_EQUAL(dst, walker->ipsacq_dstaddr, fam) && IPSA_ARE_ADDR_EQUAL(src, walker->ipsacq_srcaddr, fam) && ip_addr_match((uint8_t *)isrc, walker->ipsacq_innersrcpfx, - (in6_addr_t *)walker->ipsacq_innersrc) && + (in6_addr_t *)walker->ipsacq_innersrc) && ip_addr_match((uint8_t *)idst, walker->ipsacq_innerdstpfx, - (in6_addr_t *)walker->ipsacq_innerdst) && + (in6_addr_t *)walker->ipsacq_innerdst) && (ap == walker->ipsacq_act) && (pp == walker->ipsacq_policy) && /* XXX do deep compares of ap/pp? */ @@ -4373,7 +4404,7 @@ sadb_acquire(mblk_t *mp, ipsec_out_t *io, boolean_t need_ah, boolean_t need_esp) /* First one. */ newbie->ipsacq_mp = mp; newbie->ipsacq_numpackets = 1; - (void) drv_getparm(TIME, &newbie->ipsacq_expire); + newbie->ipsacq_expire = gethrestime_sec(); /* * Extended ACQUIRE with both AH+ESP will use ESP's timeout * value. @@ -5128,7 +5159,7 @@ sadb_getspi(keysock_in_t *ksi, uint32_t master_spi, int *diagnostic, * for the purposes of creating a new SA. */ return (sadb_makelarvalassoc(htonl(master_spi), srcaddr, dstaddr, af, - ns)); + ns)); } /* @@ -5546,8 +5577,8 @@ ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel, ip_stack_t *ipst) if (sel->ips_local_port == 0) return (NULL); - connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(sel->ips_local_port, - ipst)]; + connfp = &ipst->ips_ipcl_bind_fanout[ + IPCL_BIND_HASH(sel->ips_local_port, ipst)]; mutex_enter(&connfp->connf_lock); if (sel->ips_isv4) { @@ -5872,7 +5903,7 @@ ipsec_oth_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, &sel->ips_local_addr_v6)) && (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) || IN6_ARE_ADDR_EQUAL(&connp->conn_remv6, - &sel->ips_remote_addr_v6)))))) { + &sel->ips_remote_addr_v6)))))) { break; } } @@ -5949,14 +5980,14 @@ ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[], goto bail; } if (sadb_addrcheck(NULL, (mblk_t *)samsg, - (sadb_ext_t *)innsrcext, 0, ns) == KS_IN_ADDR_UNKNOWN) { + (sadb_ext_t *)innsrcext, 0, ns) == KS_IN_ADDR_UNKNOWN) { err = EINVAL; diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC; goto bail; } isrc = (struct sockaddr_in6 *)(innsrcext + 1); if (sadb_addrcheck(NULL, (mblk_t *)samsg, - (sadb_ext_t *)inndstext, 0, ns) == KS_IN_ADDR_UNKNOWN) { + (sadb_ext_t *)inndstext, 0, ns) == KS_IN_ADDR_UNKNOWN) { err = EINVAL; diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST; goto bail; @@ -5974,9 +6005,9 @@ ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[], goto bail; } } else if (inndstext != NULL) { - err = EINVAL; - diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC; - goto bail; + err = EINVAL; + diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC; + goto bail; } /* Get selectors first, based on outer addresses */ @@ -5987,10 +6018,9 @@ ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[], /* Check for tunnel mode mismatches. */ if (innsrcext != NULL && ((isrc->sin6_family == AF_INET && - sel.ips_protocol != IPPROTO_ENCAP && sel.ips_protocol != 0) || - (isrc->sin6_family == AF_INET6 && - sel.ips_protocol != IPPROTO_IPV6 && - sel.ips_protocol != 0))) { + sel.ips_protocol != IPPROTO_ENCAP && sel.ips_protocol != 0) || + (isrc->sin6_family == AF_INET6 && + sel.ips_protocol != IPPROTO_IPV6 && sel.ips_protocol != 0))) { err = EPROTOTYPE; goto bail; } @@ -6102,9 +6132,9 @@ sadb_set_lpkt(ipsa_t *ipsa, mblk_t *npkt, netstack_t *ns) ipsec_stack_t *ipss = ns->netstack_ipsec; membar_producer(); - do + do { opkt = ipsa->ipsa_lpkt; - while (casptr(&ipsa->ipsa_lpkt, opkt, npkt) != opkt); + } while (casptr(&ipsa->ipsa_lpkt, opkt, npkt) != opkt); ip_drop_packet(opkt, B_TRUE, NULL, NULL, DROPPER(ipss, ipds_sadb_inlarval_replace), @@ -6121,9 +6151,9 @@ sadb_clear_lpkt(ipsa_t *ipsa) { mblk_t *opkt; - do + do { opkt = ipsa->ipsa_lpkt; - while (casptr(&ipsa->ipsa_lpkt, opkt, NULL) != opkt); + } while (casptr(&ipsa->ipsa_lpkt, opkt, NULL) != opkt); return (opkt); } @@ -6371,36 +6401,3 @@ ipsec_check_key(crypto_mech_type_t mech_type, sadb_key_t *sadb_key, return (-1); } - -/* ARGSUSED */ -static void -sadb_clear_timeouts_walker(isaf_t *head, ipsa_t *ipsa, void *q) -{ - if (!(ipsa->ipsa_flags & IPSA_F_NATT)) - return; - - mutex_enter(&ipsa->ipsa_lock); - if (ipsa->ipsa_natt_q != q) { - mutex_exit(&ipsa->ipsa_lock); - return; - } - - (void) quntimeout(ipsa->ipsa_natt_q, ipsa->ipsa_natt_ka_timer); - - ipsa->ipsa_natt_ka_timer = 0; - ipsa->ipsa_natt_q = NULL; - mutex_exit(&ipsa->ipsa_lock); -} - -/* - * Is only to be used on a nattymod queue. - */ -void -sadb_clear_timeouts(queue_t *q, netstack_t *ns) -{ - ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; - sadb_t *sp = &espstack->esp_sadb.s_v4; - - sadb_walker(sp->sdb_if, sp->sdb_hashsize, - sadb_clear_timeouts_walker, q); -} diff --git a/usr/src/uts/common/inet/ipdrop.h b/usr/src/uts/common/inet/ipdrop.h index 7ddc5403de..048f2490f7 100644 --- a/usr/src/uts/common/inet/ipdrop.h +++ b/usr/src/uts/common/inet/ipdrop.h @@ -104,6 +104,8 @@ struct ip_dropstats { kstat_named_t ipds_esp_bad_auth; kstat_named_t ipds_esp_crypto_failed; kstat_named_t ipds_esp_icmp; + kstat_named_t ipds_esp_nat_t_ipsec; + kstat_named_t ipds_esp_nat_t_ka; /* AH-specific drop statistics. */ kstat_named_t ipds_ah_nomem; diff --git a/usr/src/uts/common/inet/ipsec_impl.h b/usr/src/uts/common/inet/ipsec_impl.h index 2c40bf1f70..8e13039efb 100644 --- a/usr/src/uts/common/inet/ipsec_impl.h +++ b/usr/src/uts/common/inet/ipsec_impl.h @@ -68,7 +68,7 @@ extern "C" { #define IPSEC_SHARED_SA 0x01 #define IPSEC_UNIQUE_SA 0x02 -/* IPSEC protocols and combinations */ +/* IPsec protocols and combinations */ #define IPSEC_AH_ONLY 0x01 #define IPSEC_ESP_ONLY 0x02 #define IPSEC_AH_ESP 0x03 @@ -640,7 +640,7 @@ typedef struct ipsif_s /* - * IPSEC stack instances + * IPsec stack instances */ struct ipsec_stack { netstack_t *ipsec_netstack; /* Common netstack */ @@ -878,13 +878,14 @@ extern ipsec_tun_pol_t *itp_get_byaddr_dummy(uint32_t *, uint32_t *, int, netstack_t *); /* - * IPsec AH/ESP functions called from IP. + * IPsec AH/ESP functions called from IP or the common SADB code in AH. */ extern void ipsecah_in_assocfailure(mblk_t *, char, ushort_t, char *, uint32_t, void *, int, ipsecah_stack_t *); extern void ipsecesp_in_assocfailure(mblk_t *, char, ushort_t, char *, uint32_t, void *, int, ipsecesp_stack_t *); +extern void ipsecesp_send_keepalive(ipsa_t *); /* * Algorithm management helper functions. @@ -946,11 +947,6 @@ void ip_drop_init(ipsec_stack_t *); void ip_drop_destroy(ipsec_stack_t *); /* - * NAT-Traversal cleanup - */ -extern void nattymod_clean_ipif(ipif_t *); - -/* * Common functions */ extern boolean_t ip_addr_match(uint8_t *, int, in6_addr_t *); diff --git a/usr/src/uts/common/inet/ipsecesp.h b/usr/src/uts/common/inet/ipsecesp.h index e0d22ec388..2dfb73c667 100644 --- a/usr/src/uts/common/inet/ipsecesp.h +++ b/usr/src/uts/common/inet/ipsecesp.h @@ -77,6 +77,9 @@ struct ipsecesp_stack { }; typedef struct ipsecesp_stack ipsecesp_stack_t; +/* Define *this* NDD variable here because we use it outside ESP proper. */ +#define ipsecesp_nat_keepalive_interval \ + ipsecesp_params[14].ipsecesp_param_value #endif /* _KERNEL */ diff --git a/usr/src/uts/common/inet/sadb.h b/usr/src/uts/common/inet/sadb.h index 86204cd5bd..af2ffc3170 100644 --- a/usr/src/uts/common/inet/sadb.h +++ b/usr/src/uts/common/inet/sadb.h @@ -136,6 +136,8 @@ typedef struct ipsa_s { */ time_t ipsa_addtime; /* Time I was added. */ time_t ipsa_usetime; /* Time of my first use. */ + time_t ipsa_lastuse; /* Time of my last use. */ + time_t ipsa_last_nat_t_ka; /* Time of my last NAT-T keepalive. */ time_t ipsa_softexpiretime; /* Time of my first soft expire. */ time_t ipsa_hardexpiretime; /* Time of my first hard expire. */ @@ -158,7 +160,6 @@ typedef struct ipsa_s { */ uint_t ipsa_softalloc; /* Allocations allowed (soft). */ uint_t ipsa_hardalloc; /* Allocations allowed (hard). */ - uint_t ipsa_alloc; /* Allocations made. */ uint_t ipsa_integlen; /* Length of the integrity bitmap (bytes). */ uint_t ipsa_senslen; /* Length of the sensitivity bitmap (bytes). */ @@ -195,15 +196,14 @@ typedef struct ipsa_s { uint8_t ipsa_innersrcpfx; uint8_t ipsa_innerdstpfx; - /* these can only be v4 */ - uint32_t ipsa_natt_addr_loc[IPSA_MAX_ADDRLEN]; - uint32_t ipsa_natt_addr_rem[IPSA_MAX_ADDRLEN]; - uint16_t ipsa_inbound_cksum; /* cksum correction for inbound packets */ - uint16_t ipsa_remote_port; /* the other port that isn't 4500 */ + uint16_t ipsa_local_nat_port; /* Local NAT-T port. (0 --> 4500) */ + uint16_t ipsa_remote_nat_port; /* The other port that isn't 4500 */ + + /* these can only be v4 */ + uint32_t ipsa_natt_addr_loc; + uint32_t ipsa_natt_addr_rem; - timeout_id_t ipsa_natt_ka_timer; - queue_t *ipsa_natt_q; /* * icmp type and code. *_end are to specify ranges. if only * a single value, * and *_end are the same value. @@ -711,9 +711,6 @@ extern void ipsec_destroy_ctx_tmpl(ipsa_t *, ipsec_algtype_t); /* key checking */ extern int ipsec_check_key(crypto_mech_type_t, sadb_key_t *, boolean_t, int *); -/* natt cleanup */ -extern void sadb_clear_timeouts(queue_t *, netstack_t *); - typedef struct ipsec_kstats_s { kstat_named_t esp_stat_in_requests; kstat_named_t esp_stat_in_discards; diff --git a/usr/src/uts/common/inet/udp/udp.c b/usr/src/uts/common/inet/udp/udp.c index 9864a22d88..12901ed69d 100644 --- a/usr/src/uts/common/inet/udp/udp.c +++ b/usr/src/uts/common/inet/udp/udp.c @@ -254,15 +254,15 @@ static int udp_rrw(queue_t *q, struiod_t *dp); static void udp_rput_bind_ack(queue_t *q, mblk_t *mp); static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); -static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha); +static void udp_send_data(udp_t *, queue_t *, mblk_t *, ipha_t *); static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, t_scalar_t err); static void udp_unbind(queue_t *q, mblk_t *mp); static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random); static void udp_wput(queue_t *q, mblk_t *mp); -static mblk_t *udp_output_v4(conn_t *, mblk_t *mp, ipaddr_t v4dst, - uint16_t port, uint_t srcid, int *error); +static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t, + int *, boolean_t); static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error); static void udp_wput_other(queue_t *q, mblk_t *mp); @@ -963,7 +963,7 @@ udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) ASSERT(udp->udp_port != 0); if (!caller_holds_lock) { lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, - us->us_bind_fanout_size)].uf_lock; + us->us_bind_fanout_size)].uf_lock; ASSERT(lockp != NULL); mutex_enter(lockp); } @@ -3333,8 +3333,8 @@ udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) return (ipp->ipp_dstoptslen); case IPV6_PATHMTU: return (ip_fill_mtuinfo(&udp->udp_v6dst, - udp->udp_dstport, (struct ip6_mtuinfo *)ptr, - us->us_netstack)); + udp->udp_dstport, (struct ip6_mtuinfo *)ptr, + us->us_netstack)); default: return (-1); } @@ -3350,6 +3350,9 @@ udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) case UDP_RCVHDR: *i1 = udp->udp_rcvhdr ? 1 : 0; break; + case UDP_NAT_T_ENDPOINT: + *i1 = udp->udp_nat_t_endpoint; + break; default: return (-1); } @@ -4206,6 +4209,37 @@ udp_opt_set(queue_t *q, uint_t optset_context, int level, if (!checkonly) udp->udp_rcvhdr = onoff; break; + case UDP_NAT_T_ENDPOINT: + if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { + *outlenp = 0; + return (error); + } + + /* + * Use udp_family instead so we can avoid ambiguitites + * with AF_INET6 sockets that may switch from IPv4 + * to IPv6. + */ + if (udp->udp_family != AF_INET) { + *outlenp = 0; + return (EAFNOSUPPORT); + } + + if (!checkonly) { + udp->udp_nat_t_endpoint = onoff; + + udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + + UDPH_SIZE + udp->udp_ip_snd_options_len; + + /* Also, adjust wroff */ + if (onoff) { + udp->udp_max_hdr_len += + sizeof (uint32_t); + } + (void) mi_set_sth_wroff(RD(q), + udp->udp_max_hdr_len + us->us_wroff_extra); + } + break; default: *outlenp = 0; return (EINVAL); @@ -4574,7 +4608,7 @@ udp_input(conn_t *connp, mblk_t *mp) */ udp_icmp_error(q, mp); TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, - "udp_rput_end: q %p (%S)", q, "m_ctl"); + "udp_rput_end: q %p (%S)", q, "m_ctl"); return; } } @@ -4626,7 +4660,7 @@ udp_input(conn_t *connp, mblk_t *mp) udp_become_writer(connp, mp, udp_rput_other_wrapper, SQTAG_UDP_INPUT); TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, - "udp_rput_end: q %p (%S)", q, "end"); + "udp_rput_end: q %p (%S)", q, "end"); return; } @@ -4792,7 +4826,7 @@ udp_input(conn_t *connp, mblk_t *mp) if (options_mp != NULL) freeb(options_mp); TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, - "udp_rput_end: q %p (%S)", q, "allocbfail"); + "udp_rput_end: q %p (%S)", q, "allocbfail"); BUMP_MIB(&udp->udp_mib, udpInErrors); return; } @@ -4877,7 +4911,7 @@ udp_input(conn_t *connp, mblk_t *mp) toh->level = IPPROTO_IP; toh->name = IP_RECVSLLA; toh->len = sizeof (struct T_opthdr) + - sizeof (struct sockaddr_dl); + sizeof (struct sockaddr_dl); toh->status = 0; dstopt += sizeof (struct T_opthdr); dstptr = (struct sockaddr_dl *)dstopt; @@ -4897,7 +4931,7 @@ udp_input(conn_t *connp, mblk_t *mp) toh->level = IPPROTO_IP; toh->name = IP_RECVIF; toh->len = sizeof (struct T_opthdr) + - sizeof (uint_t); + sizeof (uint_t); toh->status = 0; dstopt += sizeof (struct T_opthdr); dstptr = (uint_t *)dstopt; @@ -4993,7 +5027,7 @@ udp_input(conn_t *connp, mblk_t *mp) udi_size += hlen; } if ((udp->udp_ipv6_recvdstopts || - udp->udp_old_ipv6_recvdstopts) && + udp->udp_old_ipv6_recvdstopts) && (ipp.ipp_fields & IPPF_DSTOPTS)) { udi_size += sizeof (struct T_opthdr) + ipp.ipp_dstoptslen; @@ -5044,7 +5078,7 @@ udp_input(conn_t *connp, mblk_t *mp) if (options_mp != NULL) freeb(options_mp); TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, - "udp_rput_end: q %p (%S)", q, "allocbfail"); + "udp_rput_end: q %p (%S)", q, "allocbfail"); BUMP_MIB(&udp->udp_mib, udpInErrors); return; } @@ -5110,8 +5144,8 @@ udp_input(conn_t *connp, mblk_t *mp) pkti->ipi6_addr = ip6h->ip6_dst; else IN6_IPADDR_TO_V4MAPPED( - ((ipha_t *)rptr)->ipha_dst, - &pkti->ipi6_addr); + ((ipha_t *)rptr)->ipha_dst, + &pkti->ipi6_addr); pkti->ipi6_ifindex = ipp.ipp_ifindex; dstopt += sizeof (*pkti); udi_size -= toh->len; @@ -5146,7 +5180,7 @@ udp_input(conn_t *connp, mblk_t *mp) dstopt += sizeof (struct T_opthdr); if (ipversion == IPV6_VERSION) { *(uint_t *)dstopt = - IPV6_FLOW_TCLASS(ip6h->ip6_flow); + IPV6_FLOW_TCLASS(ip6h->ip6_flow); } else { ipha_t *ipha = (ipha_t *)rptr; *(uint_t *)dstopt = @@ -5234,7 +5268,7 @@ udp_input(conn_t *connp, mblk_t *mp) BUMP_MIB(&udp->udp_mib, udpHCInDatagrams); TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, - "udp_rput_end: q %p (%S)", q, "end"); + "udp_rput_end: q %p (%S)", q, "end"); if (options_mp != NULL) freeb(options_mp); @@ -5357,9 +5391,8 @@ udp_rput_other(queue_t *q, mblk_t *mp) */ udp_fanout_t *udpf; - udpf = &us->us_bind_fanout[ - UDP_BIND_HASH(udp->udp_port, - us->us_bind_fanout_size)]; + udpf = &us->us_bind_fanout[UDP_BIND_HASH( + udp->udp_port, us->us_bind_fanout_size)]; mutex_enter(&udpf->uf_lock); if (udp->udp_state == TS_DATA_XFER) { /* Connect failed */ @@ -5518,7 +5551,7 @@ udp_rput_other(queue_t *q, mblk_t *mp) if (options_mp != NULL) freeb(options_mp); TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, - "udp_rput_other_end: q %p (%S)", q, "allocbfail"); + "udp_rput_other_end: q %p (%S)", q, "allocbfail"); BUMP_MIB(&udp->udp_mib, udpInErrors); return; } @@ -5592,7 +5625,7 @@ udp_rput_other(queue_t *q, mblk_t *mp) toh->level = IPPROTO_IP; toh->name = IP_PKTINFO; toh->len = sizeof (struct T_opthdr) + - sizeof (*pktinfop); + sizeof (*pktinfop); toh->status = 0; dstopt += sizeof (struct T_opthdr); pktinfop = (struct in_pktinfo *)dstopt; @@ -6096,10 +6129,8 @@ udp_report_item(mblk_t *mp, udp_t *udp) print_len = snprintf((char *)mp->b_wptr, buf_len, MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), - inet_ntop(AF_INET6, &udp->udp_v6src, - addrbuf1, sizeof (addrbuf1)), - inet_ntop(AF_INET6, &udp->udp_v6dst, - addrbuf2, sizeof (addrbuf2)), + inet_ntop(AF_INET6, &udp->udp_v6src, addrbuf1, sizeof (addrbuf1)), + inet_ntop(AF_INET6, &udp->udp_v6dst, addrbuf2, sizeof (addrbuf2)), ntohs(udp->udp_dstport), state); if (print_len < buf_len) { mp->b_wptr += print_len; @@ -6367,7 +6398,7 @@ udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst) static mblk_t * udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, - uint_t srcid, int *error) + uint_t srcid, int *error, boolean_t insert_spi) { udp_t *udp = connp->conn_udp; queue_t *q = connp->conn_wq; @@ -6464,7 +6495,8 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, mutex_exit(&connp->conn_lock); /* Add an IP header */ - ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len; + ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len + + (insert_spi ? sizeof (uint32_t) : 0); ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || !OK_32PTR(ipha)) { @@ -6485,19 +6517,19 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); } - ip_hdr_length -= UDPH_SIZE; + ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0)); #ifdef _BIG_ENDIAN /* Set version, header length, and tos */ *(uint16_t *)&ipha->ipha_version_and_hdr_length = ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | - udp->udp_type_of_service); + udp->udp_type_of_service); /* Set ttl and protocol */ *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; #else /* Set version, header length, and tos */ *(uint16_t *)&ipha->ipha_version_and_hdr_length = - ((udp->udp_type_of_service << 8) | - ((IP_VERSION << 4) | (ip_hdr_length>>2))); + ((udp->udp_type_of_service << 8) | + ((IP_VERSION << 4) | (ip_hdr_length>>2))); /* Set ttl and protocol */ *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; #endif @@ -6557,6 +6589,10 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, ip_len = htons((uint16_t)ip_len); udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); + /* Insert all-0s SPI now. */ + if (insert_spi) + *((uint32_t *)(udpha + 1)) = 0; + /* * Copy in the destination address */ @@ -6571,7 +6607,7 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, udpha->uha_dst_port = port; udpha->uha_src_port = udp->udp_port; - if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { + if (ip_snd_opt_len > 0) { uint32_t cksum; bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); @@ -6636,7 +6672,7 @@ udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, /* We're done. Pass the packet to ip. */ BUMP_MIB(&udp->udp_mib, udpHCOutDatagrams); TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, - "udp_wput_end: q %p (%S)", q, "end"); + "udp_wput_end: q %p (%S)", q, "end"); if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) || @@ -6968,6 +7004,7 @@ udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) int error = 0; struct sockaddr_storage ss; udp_stack_t *us = udp->udp_us; + boolean_t insert_spi = udp->udp_nat_t_endpoint; TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, "udp_wput_start: connp %p mp %p", connp, mp); @@ -7031,7 +7068,7 @@ udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) * family of the socket. */ mp = udp_output_v4(connp, mp, v4dst, - udp->udp_dstport, 0, &error); + udp->udp_dstport, 0, &error, insert_spi); } else { mp = udp_output_v6(connp, mp, sin6, &error); } @@ -7152,7 +7189,7 @@ udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) break; } - mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error); + mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi); if (error != 0) { ud_error: UDP_STAT(us, udp_out_err_output); @@ -7887,7 +7924,7 @@ udp_wput_other(queue_t *q, mblk_t *mp) udp_stack_t *us; TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, - "udp_wput_other_start: q %p", q); + "udp_wput_other_start: q %p", q); us = udp->udp_us; db = mp->b_datap; @@ -7900,36 +7937,35 @@ udp_wput_other(queue_t *q, mblk_t *mp) if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { freemsg(mp); TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", - q, "protoshort"); + "udp_wput_other_end: q %p (%S)", q, "protoshort"); return; } switch (((t_primp_t)rptr)->type) { case T_ADDR_REQ: udp_addr_req(q, mp); TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, "addrreq"); + "udp_wput_other_end: q %p (%S)", q, "addrreq"); return; case O_T_BIND_REQ: case T_BIND_REQ: udp_bind(q, mp); TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, "bindreq"); + "udp_wput_other_end: q %p (%S)", q, "bindreq"); return; case T_CONN_REQ: udp_connect(q, mp); TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, "connreq"); + "udp_wput_other_end: q %p (%S)", q, "connreq"); return; case T_CAPABILITY_REQ: udp_capability_req(q, mp); TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, "capabreq"); + "udp_wput_other_end: q %p (%S)", q, "capabreq"); return; case T_INFO_REQ: udp_info_req(q, mp); TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, "inforeq"); + "udp_wput_other_end: q %p (%S)", q, "inforeq"); return; case T_UNITDATA_REQ: /* @@ -7939,8 +7975,7 @@ udp_wput_other(queue_t *q, mblk_t *mp) */ udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", - q, "unitdatareq"); + "udp_wput_other_end: q %p (%S)", q, "unitdatareq"); return; case T_UNBIND_REQ: udp_unbind(q, mp); @@ -7957,8 +7992,7 @@ udp_wput_other(queue_t *q, mblk_t *mp) (void) svr4_optcom_req(_WR(UDP_RD(q)), mp, cr, &udp_opt_obj); TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", - q, "optmgmtreq"); + "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); return; case T_OPTMGMT_REQ: @@ -7970,15 +8004,13 @@ udp_wput_other(queue_t *q, mblk_t *mp) (void) tpi_optcom_req(_WR(UDP_RD(q)), mp, cr, &udp_opt_obj); TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", - q, "optmgmtreq"); + "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); return; case T_DISCON_REQ: udp_disconnect(q, mp); TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", - q, "disconreq"); + "udp_wput_other_end: q %p (%S)", q, "disconreq"); return; /* The following TPI message is not supported by udp. */ @@ -7986,8 +8018,8 @@ udp_wput_other(queue_t *q, mblk_t *mp) case T_CONN_RES: udp_err_ack(q, mp, TNOTSUPPORT, 0); TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", - q, "connres/disconreq"); + "udp_wput_other_end: q %p (%S)", q, + "connres/disconreq"); return; /* The following 3 TPI messages are illegal for udp. */ @@ -7996,8 +8028,8 @@ udp_wput_other(queue_t *q, mblk_t *mp) case T_ORDREL_REQ: udp_err_ack(q, mp, TNOTSUPPORT, 0); TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", - q, "data/exdata/ordrel"); + "udp_wput_other_end: q %p (%S)", q, + "data/exdata/ordrel"); return; default: break; @@ -8022,8 +8054,8 @@ udp_wput_other(queue_t *q, mblk_t *mp) mp->b_datap->db_type = M_IOCACK; putnext(UDP_RD(q), mp); TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", - q, "getpeername"); + "udp_wput_other_end: q %p (%S)", q, + "getpeername"); return; } /* FALLTHRU */ @@ -8037,8 +8069,7 @@ udp_wput_other(queue_t *q, mblk_t *mp) mi_copyin(q, mp, NULL, SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", - q, "getmyname"); + "udp_wput_other_end: q %p (%S)", q, "getmyname"); return; } case ND_SET: @@ -8047,8 +8078,7 @@ udp_wput_other(queue_t *q, mblk_t *mp) if (nd_getset(q, us->us_nd, mp)) { putnext(UDP_RD(q), mp); TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", - q, "get"); + "udp_wput_other_end: q %p (%S)", q, "get"); return; } break; @@ -8091,14 +8121,14 @@ udp_wput_other(queue_t *q, mblk_t *mp) case M_IOCDATA: udp_wput_iocdata(q, mp); TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, "iocdata"); + "udp_wput_other_end: q %p (%S)", q, "iocdata"); return; default: /* Unrecognized messages are passed through without change. */ break; } TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, - "udp_wput_other_end: q %p (%S)", q, "end"); + "udp_wput_other_end: q %p (%S)", q, "end"); ip_output(connp, mp, q, IP_WPUT); } @@ -8815,14 +8845,3 @@ udp_set_rcv_hiwat(udp_t *udp, size_t size) udp->udp_rcv_hiwat = size; return (size); } - -/* - * Little helper for IPsec's NAT-T processing. - */ -boolean_t -udp_compute_checksum(netstack_t *ns) -{ - udp_stack_t *us = ns->netstack_udp; - - return (us->us_do_checksum); -} diff --git a/usr/src/uts/common/inet/udp/udp_opt_data.c b/usr/src/uts/common/inet/udp/udp_opt_data.c index 12f13e11b8..07cc9e638f 100644 --- a/usr/src/uts/common/inet/udp/udp_opt_data.c +++ b/usr/src/uts/common/inet/udp/udp_opt_data.c @@ -279,6 +279,8 @@ opdes_t udp_opt_arr[] = { }, { UDP_RCVHDR, IPPROTO_UDP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, +{ UDP_NAT_T_ENDPOINT, IPPROTO_UDP, OA_RW, OA_RW, OP_PRIVPORT, 0, sizeof (int), + 0 }, }; /* diff --git a/usr/src/uts/common/inet/udp_impl.h b/usr/src/uts/common/inet/udp_impl.h index c9b6c8128b..9ad4cacbc9 100644 --- a/usr/src/uts/common/inet/udp_impl.h +++ b/usr/src/uts/common/inet/udp_impl.h @@ -236,7 +236,7 @@ typedef struct udp_s { udp_timestamp : 1, /* SO_TIMESTAMP "socket" option */ udp_anon_mlp : 1, /* SO_ANON_MLP */ udp_mac_exempt : 1, /* SO_MAC_EXEMPT */ - udp_pad_to_bit_31 : 1; + udp_nat_t_endpoint : 1; /* UDP_NAT_T_ENDPOINT option */ uint8_t udp_type_of_service; /* IP_TOS option */ uint8_t udp_ttl; /* TTL or hoplimit */ @@ -317,7 +317,6 @@ extern void udp_ddi_init(void); extern void udp_ddi_destroy(void); extern void udp_resume_bind(conn_t *, mblk_t *); extern void udp_conn_recv(conn_t *, mblk_t *); -extern boolean_t udp_compute_checksum(netstack_t *); extern void udp_wput_data(queue_t *, mblk_t *, struct sockaddr *, socklen_t); diff --git a/usr/src/uts/common/netinet/udp.h b/usr/src/uts/common/netinet/udp.h index 8a7d5f4577..c65a9bad3a 100644 --- a/usr/src/uts/common/netinet/udp.h +++ b/usr/src/uts/common/netinet/udp.h @@ -1,5 +1,5 @@ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,6 +35,7 @@ struct udphdr { #define UDP_ANONPRIVBIND 0x0100 /* for internal use only */ #define UDP_EXCLBIND 0x0101 /* for internal use only */ #define UDP_RCVHDR 0x0102 /* for internal use only */ +#define UDP_NAT_T_ENDPOINT 0x0103 /* for internal use only */ /* * Following option in UDP_ namespace required to be exposed through * <xti.h> (It also requires exposing options not implemented). The options diff --git a/usr/src/uts/intel/Makefile.intel.shared b/usr/src/uts/intel/Makefile.intel.shared index 6ae4839206..386616724b 100644 --- a/usr/src/uts/intel/Makefile.intel.shared +++ b/usr/src/uts/intel/Makefile.intel.shared @@ -469,7 +469,6 @@ STRMOD_KMODS += usbkbm STRMOD_KMODS += usbms STRMOD_KMODS += usb_ah STRMOD_KMODS += drcompat -STRMOD_KMODS += nattymod STRMOD_KMODS += cryptmod STRMOD_KMODS += vuid2ps2 STRMOD_KMODS += vuid3ps2 diff --git a/usr/src/uts/intel/ia32/ml/modstubs.s b/usr/src/uts/intel/ia32/ml/modstubs.s index 889a6e350a..19d78f059b 100644 --- a/usr/src/uts/intel/ia32/ml/modstubs.s +++ b/usr/src/uts/intel/ia32/ml/modstubs.s @@ -528,6 +528,7 @@ fcnname/**/_info: \ WSTUB(ipsecesp, ipsecesp_in_assocfailure, nomod_zero); WSTUB(ipsecesp, ipsecesp_init_funcs, nomod_zero); WSTUB(ipsecesp, ipsecesp_icmp_error, nomod_zero); + WSTUB(ipsecesp, ipsecesp_send_keepalive, nomod_zero); END_MODULE(ipsecesp); #endif @@ -545,12 +546,6 @@ fcnname/**/_info: \ END_MODULE(spdsock); #endif -#ifndef NATTYMOD_MODULE - MODULE(nattymod, strmod); - WSTUB(nattymod, nattymod_clean_ipif, nomod_zero); - END_MODULE(nattymod); -#endif - /* * Stubs for nfs common code. * XXX nfs_getvnodeops should go away with removal of kludge in vnode.c diff --git a/usr/src/uts/intel/nattymod/Makefile b/usr/src/uts/intel/nattymod/Makefile deleted file mode 100644 index e01e713fe7..0000000000 --- a/usr/src/uts/intel/nattymod/Makefile +++ /dev/null @@ -1,96 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# uts/intel/nattymod/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# ident "%Z%%M% %I% %E% SMI" -# -# This makefile drives the production of the ipsecah driver -# kernel module. -# -# intel implementation architecture dependent -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = nattymod -OBJECTS = $(NATTYMOD_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(NATTYMOD_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_STRMOD_DIR)/$(MODULE) - -# -# Include common rules. -# -include $(UTSBASE)/intel/Makefile.intel - -# -# Define targets -# -ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) - -# -# Linkage dependencies -# -LDFLAGS += -dy -Ndrv/ip -Ndrv/udp -Ndrv/ipsecah -Ndrv/ipsecesp - -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW - -# -# Default build targets. -# -.KEEP_STATE: - -def: $(DEF_DEPS) - -all: $(ALL_DEPS) - -clean: $(CLEAN_DEPS) - -clobber: $(CLOBBER_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - -install: $(INSTALL_DEPS) - -# -# Include common targets. -# -include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/sparc/Makefile.sparc.shared b/usr/src/uts/sparc/Makefile.sparc.shared index 2b56c95947..ea5211ab6a 100644 --- a/usr/src/uts/sparc/Makefile.sparc.shared +++ b/usr/src/uts/sparc/Makefile.sparc.shared @@ -348,7 +348,6 @@ STRMOD_KMODS += spppasyn spppcomp STRMOD_KMODS += tirdwr ttcompat tun STRMOD_KMODS += usbkbm usbms usb_ah STRMOD_KMODS += drcompat -STRMOD_KMODS += nattymod STRMOD_KMODS += cryptmod STRMOD_KMODS += vuid3ps2 diff --git a/usr/src/uts/sparc/ml/modstubs.s b/usr/src/uts/sparc/ml/modstubs.s index a22853aba3..5424c4cce7 100644 --- a/usr/src/uts/sparc/ml/modstubs.s +++ b/usr/src/uts/sparc/ml/modstubs.s @@ -416,6 +416,7 @@ stubs_base: WSTUB(ipsecesp, ipsecesp_in_assocfailure, nomod_zero); WSTUB(ipsecesp, ipsecesp_init_funcs, nomod_zero); WSTUB(ipsecesp, ipsecesp_icmp_error, nomod_zero); + WSTUB(ipsecesp, ipsecesp_send_keepalive, nomod_zero); END_MODULE(ipsecesp); #endif @@ -433,12 +434,6 @@ stubs_base: END_MODULE(spdsock); #endif -#ifndef NATTYMOD_MODULE - MODULE(nattymod, strmod); - WSTUB(nattymod, nattymod_clean_ipif, nomod_zero); - END_MODULE(nattymod); -#endif - /* * Stubs for nfs common code. * XXX nfs_getvnodeops should go away with removal of kludge in vnode.c diff --git a/usr/src/uts/sparc/nattymod/Makefile b/usr/src/uts/sparc/nattymod/Makefile deleted file mode 100644 index be4f2be27d..0000000000 --- a/usr/src/uts/sparc/nattymod/Makefile +++ /dev/null @@ -1,101 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# uts/sparc/nattymod/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# ident "%Z%%M% %I% %E% SMI" -# -# This makefile drives the production of the ipsecah driver -# kernel module. -# -# intel implementation architecture dependent -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = nattymod -OBJECTS = $(NATTYMOD_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(NATTYMOD_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_STRMOD_DIR)/$(MODULE) - -# -# Include common rules. -# -include $(UTSBASE)/sparc/Makefile.sparc - -# -# Define targets -# -ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) - -# -# Linkage dependencies -# -LDFLAGS += -dy -Ndrv/ip -Ndrv/udp -Ndrv/ipsecah -Ndrv/ipsecesp - -# -# lint pass one enforcement -# -CFLAGS += -v - -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW - -# -# Default build targets. -# -.KEEP_STATE: - -def: $(DEF_DEPS) - -all: $(ALL_DEPS) - -clean: $(CLEAN_DEPS) - -clobber: $(CLOBBER_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - -install: $(INSTALL_DEPS) - -# -# Include common targets. -# -include $(UTSBASE)/sparc/Makefile.targ |