diff options
Diffstat (limited to 'usr/src')
30 files changed, 892 insertions, 128 deletions
diff --git a/usr/src/cmd/dladm/dladm.c b/usr/src/cmd/dladm/dladm.c index 1edd13b0cb..b20300bfbd 100644 --- a/usr/src/cmd/dladm/dladm.c +++ b/usr/src/cmd/dladm/dladm.c @@ -10013,6 +10013,7 @@ print_overlay_value(char *outbuf, uint_t bufsize, uint_t type, const void *pbuf, { const struct in6_addr *ipv6; struct in_addr ip; + const uint32_t *bval; switch (type) { case OVERLAY_PROP_T_INT: @@ -10079,6 +10080,17 @@ print_overlay_value(char *outbuf, uint_t bufsize, uint_t type, const void *pbuf, case OVERLAY_PROP_T_STRING: (void) snprintf(outbuf, bufsize, "%s", pbuf); break; + case OVERLAY_PROP_T_BOOLEAN: + if (psize != sizeof (uint32_t)) { + warn("malformed overlay boolean property: wrong byte " + "size %d bytes\n", psize); + (void) snprintf(outbuf, bufsize, "--"); + break; + } + bval = pbuf; + (void) snprintf(outbuf, bufsize, "%s", *bval > 0 ? "true" : + "false"); + break; default: abort(); } diff --git a/usr/src/lib/libdladm/common/libdloverlay.c b/usr/src/lib/libdladm/common/libdloverlay.c index a83105b91c..baee571ee6 100644 --- a/usr/src/lib/libdladm/common/libdloverlay.c +++ b/usr/src/lib/libdladm/common/libdloverlay.c @@ -82,6 +82,7 @@ dladm_overlay_parse_prop(overlay_prop_type_t type, void *buf, uint32_t *sizep, int ret; int64_t ival; uint64_t uval; + uint32_t bval; char *eptr; struct in6_addr ipv6; struct in_addr ip; @@ -127,6 +128,17 @@ dladm_overlay_parse_prop(overlay_prop_type_t type, void *buf, uint32_t *sizep, bcopy(&ipv6, buf, sizeof (struct in6_addr)); *sizep = sizeof (struct in6_addr); break; + case OVERLAY_PROP_T_BOOLEAN: + if (strcmp(val, "true") == 0) { + bval = 1; + } else if (strcmp(vap, "false") == 0) { + bval = 0; + } else { + return (DLADM_STATUS_BADARG); + } + bcopy(&bval, buf, sizeof (bval)); + *sizep = sizeof (bval); + break; default: abort(); } diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h index cc8c489c8c..733c65ea29 100644 --- a/usr/src/uts/common/inet/ip.h +++ b/usr/src/uts/common/inet/ip.h @@ -2254,6 +2254,8 @@ struct ip_xmit_attr_s { #define IXAF_VERIFY_ZCOPY 0x400000000 /* Check Zero Copy capability */ #define IXAF_ZCOPY_CAPAB 0x800000000 /* Capable of ZEROCOPY */ +#define IXAF_SKIP_ULP_CKSUM 0x1000000000 /* Checksum IP, but skip ULP */ + /* * The normal flags for sending packets e.g., icmp errors */ diff --git a/usr/src/uts/common/inet/ip/conn_opt.c b/usr/src/uts/common/inet/ip/conn_opt.c index b4bff4d7b4..6b7b1a9f33 100644 --- a/usr/src/uts/common/inet/ip/conn_opt.c +++ b/usr/src/uts/common/inet/ip/conn_opt.c @@ -1201,6 +1201,7 @@ conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen, case T_IP_TOS: case IP_TTL: case IP_DONTFRAG: + case IP_BOUND_IF: break; default: return (EINVAL); @@ -2454,7 +2455,7 @@ ip_attr_newdst(ip_xmit_attr_t *ixa) { ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM | IXAF_NO_TTL_CHANGE | IXAF_IPV6_ADD_FRAGHDR | - IXAF_NO_LOOP_ZONEID_SET); + IXAF_NO_LOOP_ZONEID_SET | IXAF_SKIP_ULP_CKSUM); } /* diff --git a/usr/src/uts/common/inet/ip/ip_if.c b/usr/src/uts/common/inet/ip/ip_if.c index 7d3125f2a3..917e526bb1 100644 --- a/usr/src/uts/common/inet/ip/ip_if.c +++ b/usr/src/uts/common/inet/ip/ip_if.c @@ -19138,3 +19138,156 @@ ip_sioctl_get_lifhwaddr(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, return (0); } + +static int +ip_bindif_getipif(conn_t *connp, ipif_t **ipifp) +{ + in6_addr_t laddrv6; + in_addr_t laddrv4; + ushort_t ipvers; + ipif_t *ipif; + ip_stack_t *ipst; + int ret; + + ipst = connp->conn_netstack->netstack_ip; + + /* + * The caller has made sure that this socket is bound before calling. + * This makes it safe to cache this data and not hold the conn lock + * across this operation. + */ + mutex_enter(&connp->conn_lock); + ipvers = connp->conn_ipversion; + if (ipvers == IPV4_VERSION) { + laddrv4 = connp->conn_saddr_v4; + } else if (ipvers == IPV6_VERSION) { + laddrv6 = connp->conn_saddr_v6; + } else { + mutex_exit(&connp->conn_lock); + return (EINVAL); + } + mutex_exit(&connp->conn_lock); + + if (ipvers == IPV4_VERSION) { + ipif = ipif_lookup_addr_nondup(laddrv4, NULL, ALL_ZONES, ipst); + } else { + ipif = ipif_lookup_addr_nondup_v6(&laddrv6, NULL, ALL_ZONES, + ipst); + } + + if (ipif == NULL) { + return (ENOENT); + } + + *ipifp = ipif; + return (0); +} + +int +ip_bindif_ifindex(conn_t *connp, uint_t *ifindex) +{ + int ret; + ipif_t *ipif; + + if (connp == NULL || ifindex == NULL) + return (EINVAL); + + if ((ret = ip_bindif_getipif(connp, &ipif)) != 0) { + return (ret); + } + + if (IS_VNI(ipif->ipif_ill) || IS_IPMP(ipif->ipif_ill) || + IS_LOOPBACK(ipif->ipif_ill)) { + ret = ENOTSUP; + goto out; + } + + mutex_enter(&ipif->ipif_ill->ill_lock); + *ifindex = ipif->ipif_ill->ill_phyint->phyint_ifindex; + mutex_exit(&ipif->ipif_ill->ill_lock); +out: + if (ipif != NULL) + ipif_refrele(ipif); + return (ret); +} + +int +ip_bindif_hwcaps(conn_t *connp, uint_t *hckflags, uint_t *lsoflags, + uint_t *lsomax) +{ + in6_addr_t laddrv6; + in_addr_t laddrv4; + ushort_t ipvers; + ipif_t *ipif; + ip_stack_t *ipst; + int ret; + + if (connp == NULL || hckflags == NULL || lsoflags == NULL || + lsomax == NULL) { + return (EINVAL); + } + + ipst = connp->conn_netstack->netstack_ip; + + /* + * The caller has made sure that this socket is bound before calling. + * This makes it safe to cache this data and not hold the conn lock + * across this operation. + */ + mutex_enter(&connp->conn_lock); + ipvers = connp->conn_ipversion; + if (ipvers == IPV4_VERSION) { + laddrv4 = connp->conn_saddr_v4; + } else if (ipvers == IPV6_VERSION) { + laddrv6 = connp->conn_saddr_v6; + } else { + mutex_exit(&connp->conn_lock); + return (EINVAL); + } + mutex_exit(&connp->conn_lock); + + if (ipvers == IPV4_VERSION) { + ipif = ipif_lookup_addr_nondup(laddrv4, NULL, ALL_ZONES, ipst); + } else { + ipif = ipif_lookup_addr_nondup_v6(&laddrv6, NULL, ALL_ZONES, + ipst); + } + + if (ipif == NULL) { + return (ENOENT); + } + + if (IS_VNI(ipif->ipif_ill) || IS_IPMP(ipif->ipif_ill) || + IS_LOOPBACK(ipif->ipif_ill)) { + ret = ENOTSUP; + goto out; + } + + /* + * XXX We should consider entering the ipsq here via ipsq_enter(). + * There's really no good way to get a consistent snapshot of the + * hardware capabilities from an ill. We'll revisit this when we need + * to deal with getting updates. + */ + if (ILL_LSO_USABLE(ipif->ipif_ill)) { + ill_lso_capab_t *lsop = ipif->ipif_ill->ill_lso_capab; + *lsoflags = lsop->ill_lso_flags; + *lsomax = lsop->ill_lso_max; + } else { + *lsoflags = 0; + *lsomax = 0; + } + + if (ILL_HCKSUM_CAPABLE(ipif->ipif_ill)) { + ill_hcksum_capab_t *hck = ipif->ipif_ill->ill_hcksum_capab; + *hckflags = hck->ill_hcksum_txflags; + } else { + *hckflags = 0; + } + + ret = 0; +out: + if (ipif != NULL) + ipif_refrele(ipif); + return (ret); +} diff --git a/usr/src/uts/common/inet/ip/ip_output.c b/usr/src/uts/common/inet/ip/ip_output.c index 5caa043a35..690f39e0dc 100644 --- a/usr/src/uts/common/inet/ip/ip_output.c +++ b/usr/src/uts/common/inet/ip/ip_output.c @@ -1604,7 +1604,7 @@ ire_send_noroute_v4(ire_t *ire, mblk_t *mp, void *iph_arg, } /* - * Calculate a checksum ignoring any hardware capabilities + * Calculate a checksum ignoring any hardware capabilities. * * Returns B_FALSE if the packet was too short for the checksum. Caller * should free and do stats. @@ -1621,8 +1621,14 @@ ip_output_sw_cksum_v4(mblk_t *mp, ipha_t *ipha, ip_xmit_attr_t *ixa) ipaddr_t dst = ipha->ipha_dst; ipaddr_t src = ipha->ipha_src; - /* Just in case it contained garbage */ - DB_CKSUMFLAGS(mp) &= ~HCK_FLAGS; + /* + * Just in case it contained garbage. There may be valid flags if this + * is a tunneled packet. + */ + DB_CKSUMFLAGS(mp) &= ~HCK_OUTER_FLAGS; + + if ((ixa->ixa_flags & IXAF_SKIP_ULP_CKSUM) != 0) + goto ip_hdr_cksum; /* * Calculate ULP checksum @@ -1688,6 +1694,7 @@ ip_hdr_cksum: * Calculate the ULP checksum - try to use hardware. * In the case of MULTIRT, broadcast or multicast the * IXAF_NO_HW_CKSUM is set in which case we use software. + * If IXAF_SKIP_ULP_CKSUM is set, only do the IP checksum. * * If the hardware supports IP header checksum offload; then clear the * contents of IP header checksum field as expected by NIC. @@ -1702,10 +1709,11 @@ ip_output_cksum_v4(iaflags_t ixaflags, mblk_t *mp, ipha_t *ipha, { uint_t pktlen = ixa->ixa_pktlen; uint16_t *cksump; - uint16_t hck_flags; + uint16_t hck_flags, mp_hck_flags, ttype; uint32_t cksum; uint8_t protocol = ixa->ixa_protocol; uint16_t ip_hdr_length = ixa->ixa_ip_hdr_length; + boolean_t can_inet, can_full, can_partial; if ((ixaflags & IXAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) || !dohwcksum) { @@ -1713,6 +1721,13 @@ ip_output_cksum_v4(iaflags_t ixaflags, mblk_t *mp, ipha_t *ipha, } /* + * If we've been asked to skip the ULP checksum, then just let IP do its + * business. + */ + if ((ixa->ixa_flags & IXAF_SKIP_ULP_CKSUM) != 0) + goto ip_hdr_cksum; + + /* * Calculate ULP checksum. Note that we don't use cksump and cksum * if the ill has FULL support. */ @@ -1753,11 +1768,34 @@ ip_output_cksum_v4(iaflags_t ixaflags, mblk_t *mp, ipha_t *ipha, * the payload; leave the payload checksum for the hardware to * calculate. N.B: We only need to set up checksum info on the * first mblk. + * + * We must check to see if an inner checksum has already been + * computed. If so, we need to look at different hardware flags + * to determine if we can perform full or partial checksums. */ hck_flags = ill->ill_hcksum_capab->ill_hcksum_txflags; - DB_CKSUMFLAGS(mp) &= ~HCK_FLAGS; - if (hck_flags & HCKSUM_INET_FULL_V4) { + mp_hck_flags = DB_CKSUMFLAGS(mp); + ttype = (DB_TTYPEFLAGS(mp) & TTYPE_MASK) >> TTYPE_SHIFT; + if ((mp_hck_flags & HCK_INNER_FLAGS_NEEDED) != 0) { + switch (ttype) { + case TTYPE_VXLAN: + can_inet = (hck_flags & HCKSUM_TUNNEL_VXLAN_OIP) != 0; + can_full = (hck_flags & HCKSUM_VXLAN_FULL) != 0; + can_partial = (hck_flags & HCKSUM_VXLAN_PSEUDO) != 0; + break; + default: + can_inet = B_FALSE; + can_full = B_FALSE; + can_partial = B_FALSE; + } + } else { + can_inet = (hck_flags & HCKSUM_IPHDRCKSUM) != 0; + can_full = (hck_flags & HCKSUM_INET_FULL_V4) != 0; + can_partial = (hck_flags & HCKSUM_INET_PARTIAL) != 0; + } + DB_CKSUMFLAGS(mp) &= ~HCK_OUTER_FLAGS; + if (can_full) { /* * Hardware calculates pseudo-header, header and the * payload checksums, so clear the checksum field in @@ -1767,14 +1805,14 @@ ip_output_cksum_v4(iaflags_t ixaflags, mblk_t *mp, ipha_t *ipha, DB_CKSUMFLAGS(mp) |= HCK_FULLCKSUM; ipha->ipha_hdr_checksum = 0; - if (hck_flags & HCKSUM_IPHDRCKSUM) { + if (can_inet) { DB_CKSUMFLAGS(mp) |= HCK_IPV4_HDRCKSUM; } else { ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); } return (B_TRUE); } - if ((hck_flags) & HCKSUM_INET_PARTIAL) { + if (can_partial) { ipaddr_t dst = ipha->ipha_dst; ipaddr_t src = ipha->ipha_src; /* @@ -1803,7 +1841,7 @@ ip_output_cksum_v4(iaflags_t ixaflags, mblk_t *mp, ipha_t *ipha, DB_CKSUMFLAGS(mp) |= HCK_PARTIALCKSUM; ipha->ipha_hdr_checksum = 0; - if (hck_flags & HCKSUM_IPHDRCKSUM) { + if (can_inet) { DB_CKSUMFLAGS(mp) |= HCK_IPV4_HDRCKSUM; } else { ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); diff --git a/usr/src/uts/common/inet/ip_if.h b/usr/src/uts/common/inet/ip_if.h index a625ec9238..e705b6ee08 100644 --- a/usr/src/uts/common/inet/ip_if.h +++ b/usr/src/uts/common/inet/ip_if.h @@ -493,6 +493,9 @@ extern int ipif_arp_up(ipif_t *, enum ip_resolver_action, boolean_t); extern void ipif_dup_recovery(void *); extern void ipif_do_recovery(ipif_t *); +extern int ip_bindif_hwcaps(conn_t *, uint_t *, uint_t *, uint_t *); +extern int ip_bindif_ifindex(conn_t *, uint_t *); + /* * Notes on reference tracing on ill, ipif, ire, nce data structures: * diff --git a/usr/src/uts/common/inet/udp/udp.c b/usr/src/uts/common/inet/udp/udp.c index a88bac932c..1222c68e83 100644 --- a/usr/src/uts/common/inet/udp/udp.c +++ b/usr/src/uts/common/inet/udp/udp.c @@ -79,6 +79,7 @@ #include <inet/ipnet.h> #include <sys/vxlan.h> #include <inet/inet_hash.h> +#include <sys/pattr.h> #include <sys/tsol/label.h> #include <sys/tsol/tnet.h> @@ -347,6 +348,11 @@ void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, typedef union T_primitives *t_primp_t; +typedef enum udp_hash_type { + UDP_HASH_NONE, + UDP_HASH_VXLAN +} udp_hash_type_t; + /* * Various protocols that encapsulate UDP have no real use for the source port. * Instead, they want to vary the source port to provide better equal-cost @@ -369,7 +375,7 @@ typedef union T_primitives *t_primp_t; * hashed. That should be an uncommon event. */ uint16_t -udp_srcport_hash(mblk_t *mp, int type, uint16_t min, uint16_t max, +udp_srcport_hash(mblk_t *mp, udp_hash_type_t type, uint16_t min, uint16_t max, uint16_t def) { size_t szused = 0; @@ -1566,6 +1572,47 @@ udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) return (B_TRUE); } +static int +udp_do_opt_tunnel_get(conn_t *connp, udp_t *udp, udp_tunnel_opt_t *optp) +{ + uint_t hck, lso, mss; + + mutex_enter(&connp->conn_lock); + bzero(optp, sizeof (udp_tunnel_opt_t)); + + if (udp->udp_tunnel == 0) { + mutex_exit(&connp->conn_lock); + return (sizeof (udp_tunnel_opt_t)); + } + + optp->uto_type = UDP_TUNNEL_VXLAN; + if (udp->udp_vxlanhash != 0) { + optp->uto_opts |= UDP_TUNNEL_OPT_SRCPORT_HASH; + } + + if (udp->udp_tunnel_hwcap != 0) { + optp->uto_opts |= UDP_TUNNEL_OPT_HWCAP; + } + + if (udp->udp_skip_cksum != 0) { + optp->uto_opts |= UDP_TUNNEL_OPT_RELAX_CKSUM; + } + + mutex_exit(&connp->conn_lock); + + if ((optp->uto_opts & UDP_TUNNEL_OPT_HWCAP) != 0) { + if (ip_bindif_hwcaps(connp, &hck, &lso, &mss) != 0) + return (-1); + + optp->uto_type = UDP_TUNNEL_VXLAN; + optp->uto_cksum_flags = hck; + optp->uto_lso_flags = lso; + optp->uto_lso_max = mss; + } + + return (sizeof (udp_tunnel_opt_t)); +} + /* * This routine gets default values of certain options whose default * values are maintained by protcol specific code @@ -1668,11 +1715,9 @@ udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name, *i1 = udp->udp_rcvhdr ? 1 : 0; mutex_exit(&connp->conn_lock); return (sizeof (int)); - case UDP_SRCPORT_HASH: - mutex_enter(&connp->conn_lock); - *i1 = udp->udp_vxlanhash; - mutex_exit(&connp->conn_lock); - return (sizeof (int)); + case UDP_TUNNEL: + return (udp_do_opt_tunnel_get(connp, udp, + (udp_tunnel_opt_t *)ptr)); case UDP_SND_TO_CONNECTED: mutex_enter(&connp->conn_lock); *i1 = udp->udp_snd_to_conn ? 1 : 0; @@ -1700,6 +1745,111 @@ udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) return (err); } +static int +udp_do_opt_tunnel_set(conn_opt_arg_t *coa, cred_t *cr, udp_tunnel_opt_t *optp) +{ + conn_t *connp = coa->coa_connp; + udp_t *udp = connp->conn_udp; + + if (optp->uto_type != UDP_TUNNEL_VXLAN) + return (EINVAL); + + if ((optp->uto_opts & ~(UDP_TUNNEL_OPT_SRCPORT_HASH | + UDP_TUNNEL_OPT_HWCAP | UDP_TUNNEL_OPT_RELAX_CKSUM)) != 0) + return (EINVAL); + + mutex_enter(&connp->conn_lock); + + if (udp->udp_tunnel != 0) { + mutex_exit(&connp->conn_lock); + return (EEXIST); + } + + /* + * Check to make sure the caller has already called bind(2) on this + * socket. If not, this is not acceptable. + */ + if (udp->udp_state < TS_IDLE) { + mutex_exit(&connp->conn_lock); + return (EINVAL); + } + + /* + * For now, don't allow multicast / broadcast. In the future if we do + * interface binding with this, then that's fine. + */ + if (connp->conn_mcbc_bind) { + mutex_exit(&connp->conn_lock); + return (EINVAL); + } + + if ((optp->uto_opts & UDP_TUNNEL_OPT_RELAX_CKSUM) != 0 && + connp->conn_ipversion != IPV4_VERSION) { + mutex_exit(&connp->conn_lock); + return (EINVAL); + } + + /* + * Set the fact that this is tunneled. We'll leave actually fetching the + * information to the getsockopt. + */ + udp->udp_tunnel = 1; + + /* + * We trust that the caller has asked for strict binding. + */ + if ((optp->uto_opts & UDP_TUNNEL_OPT_HWCAP) != 0) { + uint_t ifindex; + int ret; + t_scalar_t proto, cmd; + + if (connp->conn_ipversion == IPV4_VERSION) { + proto = IPPROTO_IP; + cmd = IP_BOUND_IF; + } else { + proto = IPPROTO_IPV6; + cmd = IPV6_BOUND_IF; + } + mutex_exit(&connp->conn_lock); + + /* + * Try and set up the strict binding to the listen interface. + */ + if ((ret = ip_bindif_ifindex(connp, &ifindex)) != 0) { + return (ret); + } + + ret = conn_opt_set(coa, proto, cmd, sizeof (ifindex), + (uchar_t *)&ifindex, B_FALSE, cr); + if (ret != 0) { + mutex_enter(&connp->conn_lock); + udp->udp_tunnel = 0; + mutex_exit(&connp->conn_lock); + return (ret); + } + + mutex_enter(&connp->conn_lock); + udp->udp_tunnel_hwcap = 1; + } + + if ((optp->uto_opts & UDP_TUNNEL_OPT_SRCPORT_HASH) != 0) { + udp->udp_vxlanhash = 1; + } + + /* + * We only relax the checksum when using IPv4. UDP over IPv6 is required + * to have a checksum. + */ + if ((optp->uto_opts & UDP_TUNNEL_OPT_RELAX_CKSUM) != 0 && + connp->conn_ipversion == IPV4_VERSION) { + udp->udp_skip_cksum = 1; + } + + mutex_exit(&connp->conn_lock); + + return (0); +} + /* * This routine sets socket options. */ @@ -1813,31 +1963,20 @@ udp_do_opt_set(conn_opt_arg_t *coa, int level, int name, udp->udp_rcvhdr = onoff; mutex_exit(&connp->conn_lock); return (0); - case UDP_SRCPORT_HASH: - /* - * This should have already been verified, but double - * check. - */ - if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { - return (error); - } - - /* First see if the val is something we understand */ - if (*i1 != UDP_HASH_DISABLE && *i1 != UDP_HASH_VXLAN) - return (EINVAL); - - if (!checkonly) { - mutex_enter(&connp->conn_lock); - udp->udp_vxlanhash = *i1; - mutex_exit(&connp->conn_lock); - } - /* Fully handled this option. */ - return (0); case UDP_SND_TO_CONNECTED: mutex_enter(&connp->conn_lock); udp->udp_snd_to_conn = onoff; mutex_exit(&connp->conn_lock); return (0); + case UDP_TUNNEL: + if (cr != kcred) { + return (EPERM); + } + + if (checkonly) + return (0); + return (udp_do_opt_tunnel_set(coa, cr, + (udp_tunnel_opt_t *)invalp)); } break; } @@ -2106,6 +2245,35 @@ udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, } /* + * If the message block that we're operating on belongs to an overlay device, + * then it may have information in the checksum and lso headers that we care + * about and need to move to the template message block. + */ +static void +udp_prepend_tunnel_attr(udp_t *udp, const mblk_t *src, mblk_t *dst) +{ + uint16_t ckflags; + + if (udp->udp_tunnel == 0) + return; + /* XXX Maybe assert? */ + if (DB_TYPE(src) != M_DATA) + return; + + ckflags = DB_CKSUMFLAGS(src) & HCK_INNER_FLAGS; + if (ckflags != 0) { + DB_CKSUMFLAGS(dst) |= ckflags; + } + + if ((DB_LSOFLAGS(src) & HW_LSO) != 0) { + DB_LSOFLAGS(dst) |= HW_LSO; + DB_LSOMSS(dst) = DB_LSOMSS(src); + } + + DB_TTYPEFLAGS(dst) |= (DB_TTYPEFLAGS(src) & TTYPE_MASK); +} + +/* * Setup IP and UDP headers. * Returns NULL on allocation failure, in which case data_mp is freed. */ @@ -2123,7 +2291,7 @@ udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp, boolean_t insert_spi = udp->udp_nat_t_endpoint; boolean_t hash_srcport = udp->udp_vxlanhash; uint_t ulp_hdr_len; - uint16_t srcport; + uint16_t srcport, ckflags; data_len = msgdsize(data_mp); ulp_hdr_len = UDPH_SIZE; @@ -2146,6 +2314,9 @@ udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp, ASSERT(*errorp != 0); return (NULL); } + if (mp != data_mp) { + udp_prepend_tunnel_attr(udp, data_mp, mp); + } data_len += ulp_hdr_len; ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length; @@ -2182,7 +2353,9 @@ udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp, ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen); /* IP does the checksum if uha_checksum is non-zero */ - if (us->us_do_checksum) { + if (udp->udp_skip_cksum) { + udpha->uha_checksum = 0; + } else if (us->us_do_checksum) { if (cksum == 0) udpha->uha_checksum = 0xffff; else @@ -2201,6 +2374,7 @@ udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp, } /* Insert all-0s SPI now. */ +skip_cksum: if (insert_spi) *((uint32_t *)(udpha + 1)) = 0; @@ -2884,6 +3058,11 @@ udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp, dstport = connp->conn_fport; flowinfo = connp->conn_flowinfo; } + + if (udp->udp_skip_cksum != 0) { + ixa->ixa_flags |= IXAF_SKIP_ULP_CKSUM; + } + mutex_exit(&connp->conn_lock); /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */ @@ -3377,6 +3556,9 @@ udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp, *errorp = ENOMEM; return (NULL); } + + udp_prepend_tunnel_attr(udp, mp, mp1); + mp1->b_wptr = DB_LIM(mp1); mp1->b_cont = mp; mp = mp1; @@ -3411,8 +3593,11 @@ udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp, ipha->ipha_length = htons((uint16_t)pktlen); /* IP does the checksum if uha_checksum is non-zero */ - if (us->us_do_checksum) + if (udp->udp_skip_cksum) { + udpha->uha_checksum = 0; + } else if (us->us_do_checksum) { udpha->uha_checksum = htons(cksum); + } /* if IP_PKTINFO specified an addres it wins over bind() */ if ((ipp->ipp_fields & IPPF_ADDR) && @@ -3915,6 +4100,11 @@ udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6, } } } + + if (udp->udp_skip_cksum != 0) { + ixa->ixa_flags |= IXAF_SKIP_ULP_CKSUM; + } + /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */ if (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR) { ip_pkt_t *ipp = &connp->conn_xmit_ipp; diff --git a/usr/src/uts/common/inet/udp/udp_opt_data.c b/usr/src/uts/common/inet/udp/udp_opt_data.c index 847e2cdde6..ad3ea48956 100644 --- a/usr/src/uts/common/inet/udp/udp_opt_data.c +++ b/usr/src/uts/common/inet/udp/udp_opt_data.c @@ -293,9 +293,10 @@ opdes_t udp_opt_arr[] = { }, { UDP_NAT_T_ENDPOINT, IPPROTO_UDP, OA_RW, OA_RW, OP_PRIVPORT, 0, sizeof (int), 0 }, -{ UDP_SRCPORT_HASH, IPPROTO_UDP, OA_R, OA_RW, OP_CONFIG, 0, sizeof (int), 0 }, { UDP_SND_TO_CONNECTED, IPPROTO_UDP, OA_R, OA_RW, OP_CONFIG, 0, sizeof (int), - 0 } + 0 }, +{ UDP_TUNNEL, IPPROTO_UDP, 0, OA_RW, OP_CONFIG, OP_NODEFAULT, + sizeof (udp_tunnel_opt_t), 0 } }; /* diff --git a/usr/src/uts/common/inet/udp_impl.h b/usr/src/uts/common/inet/udp_impl.h index ebba10c0f7..3fccefb119 100644 --- a/usr/src/uts/common/inet/udp_impl.h +++ b/usr/src/uts/common/inet/udp_impl.h @@ -179,12 +179,15 @@ typedef struct udp_s { udp_issocket : 1, /* socket mode; sockfs is on top */ udp_nat_t_endpoint : 1, /* UDP_NAT_T_ENDPOINT option */ udp_rcvhdr : 1, /* UDP_RCVHDR option */ - udp_vxlanhash: 1, /* UDP_SRCPORT_HASH option */ + udp_vxlanhash: 1, /* Perform source port hashing */ /* Because there's only VXLAN, cheat */ /* and only use a single bit */ udp_snd_to_conn: 1, /* UDP_SND_TO_CONNECTED option */ + udp_tunnel: 1, /* UDP_TUNNEL called */ + udp_tunnel_hwcap: 1, /* UDP_TUNNEL asked for strict bind */ + udp_skip_cksum: 1, /* UDP_TUNNEL asked for no checksum */ - udp_pad_to_bit_31 : 27; + udp_pad_to_bit_31 : 25; /* Following 2 fields protected by the uf_lock */ struct udp_s *udp_bind_hash; /* Bind hash chain */ diff --git a/usr/src/uts/common/io/cxgbe/t4nex/t4_mac.c b/usr/src/uts/common/io/cxgbe/t4nex/t4_mac.c index 0ec67c8d19..e8fa96d1cd 100644 --- a/usr/src/uts/common/io/cxgbe/t4nex/t4_mac.c +++ b/usr/src/uts/common/io/cxgbe/t4nex/t4_mac.c @@ -833,7 +833,7 @@ t4_mc_getcapab(void *arg, mac_capab_t cap, void *data) case MAC_CAPAB_HCKSUM: if (pi->features & CXGBE_HW_CSUM) { uint32_t *d = data; - *d = HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM; + *d = HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM; } else status = B_FALSE; break; diff --git a/usr/src/uts/common/io/dld/dld_proto.c b/usr/src/uts/common/io/dld/dld_proto.c index 1ee00681fc..d6f57091b4 100644 --- a/usr/src/uts/common/io/dld/dld_proto.c +++ b/usr/src/uts/common/io/dld/dld_proto.c @@ -1514,6 +1514,15 @@ dld_capab_lso(dld_str_t *dsp, void *data, uint_t flags) /* translate the flag for mac clients */ if ((mac_lso.lso_flags & LSO_TX_BASIC_TCP_IPV4) != 0) lso->lso_flags |= DLD_LSO_BASIC_TCP_IPV4; + /* XXX We should probably not rely on equality */ + if ((mac_lso.lso_flags & LSO_TX_VXLAN_TCP) != 0 && + mac_lso.lso_vxlan_tcp.lso_tcpv4_max == lso->lso_max) { + lso->lso_flags |= DLD_LSO_VXLAN_TCP_IPV4; + } + if ((mac_lso.lso_flags & LSO_TX_VXLAN_TCP) != 0 && + mac_lso.lso_vxlan_tcp.lso_tcpv6_max == lso->lso_max) { + lso->lso_flags |= DLD_LSO_VXLAN_TCP_IPV6; + } dsp->ds_lso = B_TRUE; dsp->ds_lso_max = lso->lso_max; } else { diff --git a/usr/src/uts/common/io/i40e/i40e_gld.c b/usr/src/uts/common/io/i40e/i40e_gld.c index b319459cd3..5e488c4606 100644 --- a/usr/src/uts/common/io/i40e/i40e_gld.c +++ b/usr/src/uts/common/io/i40e/i40e_gld.c @@ -730,14 +730,16 @@ i40e_m_getcapab(void *arg, mac_capab_t cap, void *cap_data) * HW checksum offload * Inner L3 | Inner L4 | Outer L3 | Outer L4 * yes | yes | yes | only on x722 - * i.e. this HCKSUM_VXLAN_FULL_NO_OL4, except for x722, but we - * currently don't break out x722 separately. + * + * The L4 checksum offload requires that the pseudo-header is + * calculated. Hence why we use HCKSUM_INET_PARTIAL and + * HCKSUM_VXLAN_PSEUDO_NO_OL4. Eventually we can change this so + * on the X722 we use HCKSUM_VXLAN_PSEUDO. */ - *txflags = 0; if (i40e->i40e_tx_hcksum_enable == B_TRUE) *txflags = HCKSUM_INET_PARTIAL | HCKSUM_IPHDRCKSUM | - HCKSUM_VXLAN_FULL_NO_OL4; + HCKSUM_VXLAN_PSEUDO_NO_OL4; break; } diff --git a/usr/src/uts/common/io/i40e/i40e_main.c b/usr/src/uts/common/io/i40e/i40e_main.c index c15acbb265..02ebe5218e 100644 --- a/usr/src/uts/common/io/i40e/i40e_main.c +++ b/usr/src/uts/common/io/i40e/i40e_main.c @@ -2799,6 +2799,14 @@ i40e_start(i40e_t *i40e, boolean_t alloc) goto done; } + /* XXX */ + { + enum i40e_status_code r; + + r = i40e_aq_add_udp_tunnel(hw, 4789, 0, NULL, NULL); + cmn_err(CE_WARN, "i40e add UDP tunnel: %x", r); + } + /* * Finally, make sure that we're happy from an FM perspective. */ diff --git a/usr/src/uts/common/io/i40e/i40e_transceiver.c b/usr/src/uts/common/io/i40e/i40e_transceiver.c index 7b0e181810..72754f4071 100644 --- a/usr/src/uts/common/io/i40e/i40e_transceiver.c +++ b/usr/src/uts/common/io/i40e/i40e_transceiver.c @@ -1670,6 +1670,7 @@ typedef struct mac_ether_offload_info { /* * The following members are used when tunneling (e.g. vxlan) */ + uint8_t meoi_tun_protlen; /* Length of the tunnel protocol */ uint8_t meoi_tun_l2hlen; /* How long is the Ethernet header? */ uint16_t meoi_tun_l3proto; /* What's the Ethertype */ uint8_t meoi_tun_l3hlen; /* How long is the header? */ @@ -1758,7 +1759,7 @@ i40e_meoi_get_uint16(mblk_t *mp, off_t off, uint16_t *out) static int mac_ether_offload_info(mblk_t *mp, mac_ether_offload_info_t *meoi, - boolean_t tunneled, size_t starting_off) + uint32_t ttype, size_t starting_off) { size_t off; uint16_t ether; @@ -1766,6 +1767,9 @@ mac_ether_offload_info(mblk_t *mp, mac_ether_offload_info_t *meoi, bzero(meoi, sizeof (mac_ether_offload_info_t)); + if (ttype != TTYPE_NONE && ttype != TTYPE_VXLAN) + return (-1); + off = offsetof(struct ether_header, ether_type) + starting_off; if (i40e_meoi_get_uint16(mp, off, ðer) != 0) return (-1); @@ -1839,7 +1843,7 @@ mac_ether_offload_info(mblk_t *mp, mac_ether_offload_info_t *meoi, meoi->meoi_l4hlen = l4len; meoi->meoi_flags |= MEOI_L4INFO_SET; - if (tunneled) { + if (ttype == TTYPE_VXLAN) { /* * Recursively call ourselves to obtain the tunneled L2/L3/L4 * data, using the proper starting offset to the tunneled @@ -1854,13 +1858,14 @@ mac_ether_offload_info(mblk_t *mp, mac_ether_offload_info_t *meoi, ASSERT(starting_off == 0); off = maclen + iplen + l4len + VXLAN_HDR_LEN; - ret = mac_ether_offload_info(mp, &meo, B_FALSE, off); + ret = mac_ether_offload_info(mp, &meo, TTYPE_NONE, off); if (ret != 0) return (ret); if ((meo.meoi_flags & MEOI_L2_L3_L4) != MEOI_L2_L3_L4) return (-1); + meoi->meoi_tun_protlen = VXLAN_HDR_LEN; meoi->meoi_tun_l2hlen = meo.meoi_l2hlen; meoi->meoi_tun_l3proto = meo.meoi_l3proto; meoi->meoi_tun_l3hlen = meo.meoi_l3hlen; @@ -1882,14 +1887,15 @@ mac_ether_offload_info(mblk_t *mp, mac_ether_offload_info_t *meoi, * 'or' into the descriptor based on the checksum flags for this mblk_t and the * actual information we care about. * - * XXX - update comment + * If we're using LSO or need to perform tunneling-based checksums, then we'll + * fill in information that will be used for the Transmit Context Descriptor. */ static int i40e_tx_context(i40e_t *i40e, i40e_trqpair_t *itrq, mblk_t *mp, i40e_tx_context_t *tctx) { int ret; - uint32_t chkflags, start, mss, lsoflags; + uint32_t chkflags, start, mss, lsoflags, ttype; mac_ether_offload_info_t meo; i40e_txq_stat_t *txs = &itrq->itrq_txstat; boolean_t tunneled; @@ -1901,6 +1907,7 @@ i40e_tx_context(i40e_t *i40e, i40e_trqpair_t *itrq, mblk_t *mp, mac_hcksum_get(mp, &start, NULL, NULL, NULL, &chkflags); mac_lso_get(mp, &mss, &lsoflags); + mac_tunnel_type_get(mp, &ttype); if (chkflags == 0 && lsoflags == 0) return (0); @@ -1910,10 +1917,15 @@ i40e_tx_context(i40e_t *i40e, i40e_trqpair_t *itrq, mblk_t *mp, * tunneled packet. */ tunneled = (chkflags & - (HCK_INNER_IPV4_HDRCKSUM_NEEDED | HCK_INNER_FULLCKSUM_NEEDED)) != 0; + (HCK_INNER_IPV4_HDRCKSUM_NEEDED | HCK_INNER_PSEUDO_NEEDED)) != 0; + if (tunneled && ttype != TTYPE_VXLAN) { + /* XXX kstat */ + return (-1); + } + tctx->itc_ctx_tunneled = tunneled; - if ((ret = mac_ether_offload_info(mp, &meo, tunneled, 0)) != 0) { + if ((ret = mac_ether_offload_info(mp, &meo, ttype, 0)) != 0) { txs->itxs_hck_meoifail.value.ui64++; return (ret); } @@ -1942,23 +1954,15 @@ i40e_tx_context(i40e_t *i40e, i40e_trqpair_t *itrq, mblk_t *mp, * Inner IPv4 checksum if IIPT = 11b * Outer IPv4 checksum if EIPT = 11b * L4 checksum if L4LEN is meaningful - * - * XXX JJ is the VXLAN_HDR_LEN properly accounted for? - * XXX JJ do I need to set something in the DECTTL field? */ uint8_t eipt; - uint_t l4tunlen = meo.meoi_l4hlen + meo.meoi_tun_l2hlen; + uint_t l4tunlen; /* - * Tunneling implies inner checksumming is requested, but that - * is current only supported when the outer L4 proto is UDP. + * The MAC ether offload logic should have verified that we have + * the right information for calculating the checksums here. + * Make sure that this is the case. */ - if ((meo.meoi_flags & MEOI_L4INFO_SET) == 0 || - meo.meoi_l4proto != IPPROTO_UDP || - (meo.meoi_flags & MEOI_TUNNEL_INFO_SET) == 0) { - txs->itxs_hck_badl4.value.ui64++; - return (-1); - } if ((meo.meoi_flags & MEOI_L2INFO_SET) == 0) { txs->itxs_hck_nol2info.value.ui64++; return (-1); @@ -1968,21 +1972,31 @@ i40e_tx_context(i40e_t *i40e, i40e_trqpair_t *itrq, mblk_t *mp, return (-1); } - if (chkflags & (HCK_FULLCKSUM | HCK_PARTIALCKSUM)) { + if ((meo.meoi_flags & MEOI_L4INFO_SET) == 0 || + meo.meoi_l4proto != IPPROTO_UDP) { + txs->itxs_hck_badl4.value.ui64++; + return (-1); + } + + if ((meo.meoi_flags & MEOI_TUNNEL_INFO_SET) == 0) { + /* XXX Missing kstat */ + return (-1); + } + + if ((chkflags & HCK_PARTIALCKSUM) != 0) { /* * There is no HW support for outer checksum other than * the (outer) HCK_IPV4_HDRCKSUM. - * Note: no kstat for invalid request. + * XXX missing kstat */ return (-1); } - /* L4TUNT is UDP */ - tctx->itc_data_cmdflags |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; - - /* The MAC len is for the outer, irregardless of tunneling */ - tctx->itc_data_offsets |= (meo.meoi_l2hlen >> 1) << - I40E_TX_DESC_LENGTH_MACLEN_SHIFT; + /* + * First fill in the descriptors for the tunneling extensions. + */ + l4tunlen = meo.meoi_l4hlen + meo.meoi_tun_l2hlen + + meo.meoi_tun_protlen;; /* outer IP */ if (chkflags & HCK_IPV4_HDRCKSUM) { @@ -2003,6 +2017,17 @@ i40e_tx_context(i40e_t *i40e, i40e_trqpair_t *itrq, mblk_t *mp, } } + tctx->itc_ctx_tunnel_fld = + I40E_TXD_TNL_SET_EIPT(eipt) | + I40E_TXD_TNL_SET_EIPLEN(meo.meoi_l3hlen >> 2) | + I40E_TXD_TNL_SET_L4TUNT(I40E_TX_DESC_TNL_L4TUNT_UDP) | + I40E_TXD_TNL_SET_L4TUNLEN(l4tunlen >> 1) | + I40E_TXD_TNL_SET_DECTTL(0); + + /* The MAC len is for the outer, irregardless of tunneling */ + tctx->itc_data_offsets |= (meo.meoi_l2hlen >> 1) << + I40E_TX_DESC_LENGTH_MACLEN_SHIFT; + /* inner IP */ if (chkflags & HCK_INNER_IPV4_HDRCKSUM_NEEDED) { /* When tunneled, IIPT applies to the inner IP (L3) */ @@ -2012,13 +2037,24 @@ i40e_tx_context(i40e_t *i40e, i40e_trqpair_t *itrq, mblk_t *mp, } tctx->itc_data_cmdflags |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; + } else { + if (meo.meoi_l3proto == ETHERTYPE_IP) { + tctx->itc_data_cmdflags |= + I40E_TX_DESC_CMD_IIPT_IPV4; + } else if (meo.meoi_l3proto == ETHERTYPE_IPV6) { + tctx->itc_data_cmdflags |= + I40E_TX_DESC_CMD_IIPT_IPV6; + } else { + txs->itxs_hck_badl3.value.ui64++; + return (-1); + } } /* set the inner IP header length */ tctx->itc_data_offsets |= (meo.meoi_tun_l3hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; - if (chkflags & HCK_INNER_FULLCKSUM_NEEDED) { + if (chkflags & HCK_INNER_PSEUDO_NEEDED) { /* L4T */ switch (meo.meoi_tun_l4proto) { case IPPROTO_TCP: @@ -2043,13 +2079,6 @@ i40e_tx_context(i40e_t *i40e, i40e_trqpair_t *itrq, mblk_t *mp, I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; } - tctx->itc_ctx_tunnel_fld = - I40E_TXD_TNL_SET_EIPT(eipt) | - I40E_TXD_TNL_SET_EIPLEN(meo.meoi_l3hlen) | - I40E_TXD_TNL_SET_L4TUNT(I40E_TX_DESC_TNL_L4TUNT_UDP) | - I40E_TXD_TNL_SET_L4TUNLEN(l4tunlen) | - I40E_TXD_TNL_SET_DECTTL(0); - } else { /* Not tunneled */ diff --git a/usr/src/uts/common/io/mac/mac_provider.c b/usr/src/uts/common/io/mac/mac_provider.c index 73e7daac5f..a437c58b7e 100644 --- a/usr/src/uts/common/io/mac/mac_provider.c +++ b/usr/src/uts/common/io/mac/mac_provider.c @@ -1559,3 +1559,11 @@ mac_transceiver_info_set_usable(mac_transceiver_info_t *infop, { infop->mti_usable = usable; } + +void +mac_tunnel_type_get(const mblk_t *mp, uint32_t *typep) +{ + ASSERT(DB_TYPE(mp) == M_DATA); + + *typep = (DB_TTYPEFLAGS(mp) & TTYPE_MASK) >> TTYPE_SHIFT; +} diff --git a/usr/src/uts/common/io/overlay/overlay.c b/usr/src/uts/common/io/overlay/overlay.c index 3f34ec3b58..36c4fd38b1 100644 --- a/usr/src/uts/common/io/overlay/overlay.c +++ b/usr/src/uts/common/io/overlay/overlay.c @@ -817,6 +817,7 @@ #include <sys/mac_client_priv.h> #include <sys/mac_ether.h> #include <sys/vlan.h> +#include <sys/pattr.h> #include <sys/overlay_impl.h> @@ -830,15 +831,17 @@ typedef enum overlay_dev_prop { OVERLAY_DEV_P_MTU = 0, OVERLAY_DEV_P_VNETID, OVERLAY_DEV_P_ENCAP, - OVERLAY_DEV_P_VARPDID + OVERLAY_DEV_P_VARPDID, + OVERLAY_DEV_P_STRICTIF } overlay_dev_prop_t; -#define OVERLAY_DEV_NPROPS 4 +#define OVERLAY_DEV_NPROPS 5 static const char *overlay_dev_props[] = { "mtu", "vnetid", "encap", - "varpd/id" + "varpd/id", + "mux/bound" }; #define OVERLAY_MTU_MIN 576 @@ -973,7 +976,7 @@ overlay_m_start(void *arg) return (ret); mux = overlay_mux_open(odd->odd_plugin, domain, family, prot, - (struct sockaddr *)&storage, slen, &ret); + (struct sockaddr *)&storage, slen, odd->odd_strictif, &ret); if (mux == NULL) return (ret); @@ -984,6 +987,12 @@ overlay_m_start(void *arg) odd->odd_flags |= OVERLAY_F_IN_MUX; mutex_exit(&odd->odd_lock); + /* + * Now that we're in the MUX trigger MAC to rescan our capabilities, + * which is important for VNICs on top of us. + */ + mac_capab_update(odd->odd_mh); + return (0); } @@ -1044,6 +1053,28 @@ overlay_m_unicast(void *arg, const uint8_t *macaddr) return (0); } +static inline void +overlay_tx_checksum_shift(mblk_t *source, mblk_t *target) +{ + uint32_t oflags, nflags = 0; + + mac_hcksum_get(source, NULL, NULL, NULL, NULL, &oflags); + mac_hcksum_set(source, NULL, NULL, NULL, NULL, 0); + + if ((oflags & HCK_IPV4_HDRCKSUM) != 0) + nflags |= HCK_INNER_IPV4_HDRCKSUM_NEEDED; + if ((oflags & HCK_FULLCKSUM) != 0) { + nflags |= HCK_INNER_FULLCKSUM_NEEDED; + } else if ((oflags & HCK_PARTIALCKSUM) != 0) { + nflags |= HCK_INNER_PSEUDO_NEEDED; + } + + /* + * Manually or in the flags so we don't clobber existing information. + */ + DB_CKSUMFLAGS(target) |= nflags; +} + mblk_t * overlay_m_tx(void *arg, mblk_t *mp_chain) { @@ -1095,6 +1126,12 @@ overlay_m_tx(void *arg, mblk_t *mp_chain) goto out; } + /* + * Make sure any checksum flags that ended up on mp from the + * lower level are shifted over to emp as outer flags. + */ + overlay_tx_checksum_shift(mp, ep); + ep->b_cont = mp; ret = overlay_mux_tx(odd->odd_mux, &hdr, ep); if (ret != 0) @@ -1121,12 +1158,50 @@ overlay_m_ioctl(void *arg, queue_t *q, mblk_t *mp) static boolean_t overlay_m_getcapab(void *arg, mac_capab_t cap, void *cap_data) { + overlay_dev_t *odd = arg; + /* - * Tell MAC we're an overlay. + * Always tell MAC we're an overlay. */ if (cap == MAC_CAPAB_OVERLAY) return (B_TRUE); - return (B_FALSE); + + /* + * Check to see if this is a capability that we'd consider letting a + * module know how to ask the mux about. + */ + switch (cap) { + case MAC_CAPAB_HCKSUM: + case MAC_CAPAB_LSO: + break; + default: + return (B_FALSE); + } + + if (odd->odd_plugin->ovp_ops->ovpo_mac_capab == NULL) { + return (B_FALSE); + } + + /* + * Once the device is present in a MUX it will know if it has the + * ability to offer various capabillities to underlying hardware. Check + * if we're in a mux and if so, offer that to the device. We can rely on + * the fact that MAC won't stop us while it's asking us about a + * capability to know that we can't be removed from a mux if we're not + * in it right now. + * + * Also, even if we're not in a MUX yet, we will retrigger capability + * scans once we are in one. + */ + mutex_enter(&odd->odd_lock); + if ((odd->odd_flags & OVERLAY_F_IN_MUX) == 0) { + mutex_exit(&odd->odd_lock); + return (B_FALSE); + } + mutex_exit(&odd->odd_lock); + + return (odd->odd_plugin->ovp_ops->ovpo_mac_capab(odd->odd_pvoid, + cap, cap_data, odd->odd_mux->omux_ksock)); } /* ARGSUSED */ @@ -1359,6 +1434,7 @@ overlay_i_create(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) cv_init(&odd->odd_iowait, NULL, CV_DRIVER, NULL); odd->odd_ref = 0; odd->odd_flags = 0; + odd->odd_strictif = B_TRUE; list_insert_tail(&overlay_dev_list, odd); mutex_exit(&overlay_dev_lock); @@ -1615,6 +1691,7 @@ overlay_i_propinfo(void *karg, intptr_t arg, int mode, cred_t *cred, uint_t propid = UINT_MAX; overlay_ioc_propinfo_t *oip = karg; overlay_prop_handle_t phdl = (overlay_prop_handle_t)oip; + const uint32_t def_true = 1; odd = overlay_hold_by_dlid(oip->oipi_linkid); if (odd == NULL) @@ -1695,6 +1772,11 @@ overlay_i_propinfo(void *karg, intptr_t arg, int mode, cred_t *cred, overlay_prop_set_type(phdl, OVERLAY_PROP_T_UINT); overlay_prop_set_nodefault(phdl); break; + case OVERLAY_DEV_P_STRICTIF: + overlay_prop_set_type(phdl, OVERLAY_PROP_T_BOOLEAN); + overlay_prop_set_prot(phdl, OVERLAY_PROP_PERM_RW); + overlay_prop_set_default(phdl, &def_true, sizeof (def_true)); + break; default: overlay_hold_rele(odd); mac_perim_exit(mph); @@ -1804,6 +1886,13 @@ overlay_i_getprop(void *karg, intptr_t arg, int mode, cred_t *cred, } mutex_exit(&odd->odd_lock); break; + case OVERLAY_DEV_P_STRICTIF: + mutex_enter(&odd->odd_lock); + + oip->oip_size = sizeof (odd->odd_strictif); + bcopy(&odd->odd_strictif, oip->oip_value, oip->oip_size); + mutex_exit(&odd->odd_lock); + break; default: ret = ENOENT; } @@ -1856,6 +1945,7 @@ overlay_i_setprop(void *karg, intptr_t arg, int mode, cred_t *cred, uint_t propid = UINT_MAX; mac_perim_handle_t mph; uint64_t maxid, *vidp; + uint32_t *boolp; if (oip->oip_size > OVERLAY_PROP_SIZEMAX) return (EINVAL); @@ -1941,6 +2031,22 @@ overlay_i_setprop(void *karg, intptr_t arg, int mode, cred_t *cred, case OVERLAY_DEV_P_VARPDID: ret = EPERM; break; + case OVERLAY_DEV_P_STRICTIF: + if (oip->oip_size != sizeof (uint32_t)) { + ret = EINVAL; + break; + } + mutex_enter(&odd->odd_lock); + if ((odd->odd_flags & OVERLAY_F_IN_MUX) != 0) { + mutex_exit(&odd->odd_lock); + ret = EBUSY; + break; + } + + boolp = (uint32_t *)oip->oip_value; + odd->odd_strictif = *boolp > 0 ? B_TRUE : B_FALSE; + mutex_exit(&odd->odd_lock); + break; default: ret = ENOENT; } diff --git a/usr/src/uts/common/io/overlay/overlay_mux.c b/usr/src/uts/common/io/overlay/overlay_mux.c index 9f70e8c83e..1f330b622c 100644 --- a/usr/src/uts/common/io/overlay/overlay_mux.c +++ b/usr/src/uts/common/io/overlay/overlay_mux.c @@ -10,7 +10,7 @@ */ /* - * Copyright 2015 Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. */ /* @@ -30,6 +30,7 @@ #include <sys/strsubr.h> #include <sys/strsun.h> #include <sys/tihdr.h> +#include <sys/pattr.h> #include <sys/overlay_impl.h> @@ -71,6 +72,24 @@ overlay_mux_comparator(const void *a, const void *b) } /* + * Look at the checksum flags that are set on the block. Hardware may support + * checksumming the inner frames. If so, we need to update the checksum flags on + * the message block to make sure that it makes sense. + */ +static inline void +overlay_recv_checksum_shift(mblk_t *mp) +{ + uint32_t oflags, nflags = 0; + + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &oflags); + if ((oflags & HCK_INNER_IPV4_HDRCKSUM_OK) != 0) + nflags |= HCK_IPV4_HDRCKSUM_OK; + if ((oflags & HCK_INNER_FULLCKSUM_OK) != 0) + nflags |= HCK_FULLCKSUM_OK; + mac_hcksum_set(mp, NULL, NULL, NULL, NULL, nflags); +} + +/* * This is the central receive data path. We need to decode the packet, if we * can, and then deliver it to the appropriate overlay. */ @@ -187,6 +206,8 @@ overlay_mux_recv(ksocket_t ks, mblk_t *mpchain, size_t msgsize, int oob, mutex_exit(&odd->odd_lock); mutex_exit(&mux->omux_lock); + overlay_recv_checksum_shift(mp); + mac_rx(odd->odd_mh, NULL, mp); mutex_enter(&odd->odd_lock); @@ -203,7 +224,7 @@ overlay_mux_recv(ksocket_t ks, mblk_t *mpchain, size_t msgsize, int oob, */ overlay_mux_t * overlay_mux_open(overlay_plugin_t *opp, int domain, int family, int protocol, - struct sockaddr *addr, socklen_t len, int *errp) + struct sockaddr *addr, socklen_t len, boolean_t strictif, int *errp) { int err; overlay_mux_t *mux; @@ -221,7 +242,8 @@ overlay_mux_open(overlay_plugin_t *opp, int domain, int family, int protocol, len == mux->omux_alen && bcmp(addr, mux->omux_addr, len) == 0) { - if (opp != mux->omux_plugin) { + if (opp != mux->omux_plugin || + strictif != mux->omux_strictif) { *errp = EEXIST; return (NULL); } @@ -260,7 +282,7 @@ overlay_mux_open(overlay_plugin_t *opp, int domain, int family, int protocol, * then ask it to perform any additional socket set up it'd like to do. */ if (opp->ovp_ops->ovpo_sockopt != NULL && - (*errp = opp->ovp_ops->ovpo_sockopt(ksock)) != 0) { + (*errp = opp->ovp_ops->ovpo_sockopt(ksock, strictif)) != 0) { mutex_exit(&overlay_mux_lock); ksocket_close(ksock, kcred); return (NULL); @@ -273,6 +295,7 @@ overlay_mux_open(overlay_plugin_t *opp, int domain, int family, int protocol, mux->omux_domain = domain; mux->omux_family = family; mux->omux_protocol = protocol; + mux->omux_strictif = strictif; mux->omux_addr = kmem_alloc(len, KM_SLEEP); bcopy(addr, mux->omux_addr, len); mux->omux_alen = len; diff --git a/usr/src/uts/common/io/overlay/overlay_prop.c b/usr/src/uts/common/io/overlay/overlay_prop.c index ba1ea2a629..159fde0f78 100644 --- a/usr/src/uts/common/io/overlay/overlay_prop.c +++ b/usr/src/uts/common/io/overlay/overlay_prop.c @@ -54,7 +54,8 @@ overlay_prop_set_type(overlay_prop_handle_t phdl, overlay_prop_type_t type) } int -overlay_prop_set_default(overlay_prop_handle_t phdl, void *def, ssize_t len) +overlay_prop_set_default(overlay_prop_handle_t phdl, const void *def, + ssize_t len) { overlay_ioc_propinfo_t *infop = (overlay_ioc_propinfo_t *)phdl; diff --git a/usr/src/uts/common/io/overlay/plugins/overlay_vxlan.c b/usr/src/uts/common/io/overlay/plugins/overlay_vxlan.c index 8b4e4ecb42..a381a0c793 100644 --- a/usr/src/uts/common/io/overlay/plugins/overlay_vxlan.c +++ b/usr/src/uts/common/io/overlay/plugins/overlay_vxlan.c @@ -10,7 +10,7 @@ */ /* - * Copyright 2015 Joyent, Inc. + * Copyright (c) 2018 Joyent, Inc. */ /* @@ -48,6 +48,9 @@ #include <inet/ip.h> #include <netinet/in.h> #include <sys/strsun.h> +#include <sys/dld.h> +#include <sys/dlpi.h> +#include <sys/pattr.h> #include <netinet/udp.h> static const char *vxlan_ident = "vxlan"; @@ -64,12 +67,21 @@ static const char *vxlan_props[] = { NULL }; +typedef enum vxlan_capab_state { + VXLAN_C_UNKNOWN = 0, + VXLAN_C_VALID, + VXLAN_C_FAILED +} vxlan_capab_state_t; + typedef struct vxlan { kmutex_t vxl_lock; overlay_handle_t vxl_oh; uint16_t vxl_lport; boolean_t vxl_hladdr; struct in6_addr vxl_laddr; + vxlan_capab_state_t vxl_cstate; + int vxl_cstate_err; + udp_tunnel_opt_t vxl_utunnel; } vxlan_t; static int @@ -77,12 +89,14 @@ vxlan_o_init(overlay_handle_t oh, void **outp) { vxlan_t *vxl; - vxl = kmem_alloc(sizeof (vxlan_t), KM_SLEEP); + vxl = kmem_zalloc(sizeof (vxlan_t), KM_SLEEP); *outp = vxl; mutex_init(&vxl->vxl_lock, NULL, MUTEX_DRIVER, NULL); vxl->vxl_oh = oh; vxl->vxl_lport = vxlan_defport; vxl->vxl_hladdr = B_FALSE; + vxl->vxl_cstate = VXLAN_C_UNKNOWN; + vxl->vxl_cstate_err = 0; return (0); } @@ -128,16 +142,24 @@ vxlan_o_socket(void *arg, int *dp, int *fp, int *pp, struct sockaddr *addr, } static int -vxlan_o_sockopt(ksocket_t ksock) +vxlan_o_sockopt(ksocket_t ksock, boolean_t strictif) { int val, err; - if (vxlan_fanout == B_FALSE) - return (0); + udp_tunnel_opt_t topt; - val = UDP_HASH_VXLAN; - err = ksocket_setsockopt(ksock, IPPROTO_UDP, UDP_SRCPORT_HASH, &val, - sizeof (val), kcred); - return (err); + bzero(&topt, sizeof (udp_tunnel_opt_t)); + topt.uto_type = UDP_TUNNEL_VXLAN; + topt.uto_opts = UDP_TUNNEL_OPT_SRCPORT_HASH; + if (strictif) { + topt.uto_opts |= UDP_TUNNEL_OPT_HWCAP | UDP_TUNNEL_OPT_RELAX_CKSUM; + } + + if ((err = ksocket_setsockopt(ksock, IPPROTO_UDP, UDP_TUNNEL, &topt, + sizeof (topt), kcred) != 0)) { + return (err); + } + + return (0); } /* ARGSUSED */ @@ -166,6 +188,13 @@ vxlan_o_encap(void *arg, mblk_t *mp, ovep_encap_info_t *einfop, vxh->vxlan_flags = ntohl(VXLAN_F_VDI); vxh->vxlan_id = htonl((uint32_t)einfop->ovdi_id << VXLAN_ID_SHIFT); ob->b_wptr += VXLAN_HDR_LEN; + + /* + * Make sure to set the fact that this is a VXLAN packet on this message + * block. + */ + DB_TTYPEFLAGS(ob) |= (TTYPE_VXLAN << TTYPE_SHIFT); + *outp = ob; return (0); @@ -305,6 +334,78 @@ vxlan_o_propinfo(const char *pr_name, overlay_prop_handle_t phdl) return (EINVAL); } +static boolean_t +vxlan_o_mac_capab(void *arg, mac_capab_t capab, void *cap_data, ksocket_t ksock) +{ + vxlan_t *vxl = arg; + boolean_t hcapab = B_FALSE; + + if (capab != MAC_CAPAB_HCKSUM && capab != MAC_CAPAB_LSO) + return (B_FALSE); + + mutex_enter(&vxl->vxl_lock); + if (vxl->vxl_cstate == VXLAN_C_FAILED) { + goto out; + } else if (vxl->vxl_cstate == VXLAN_C_UNKNOWN) { + int len = sizeof (udp_tunnel_opt_t); + bzero(&vxl->vxl_utunnel, sizeof (udp_tunnel_opt_t)); + vxl->vxl_cstate_err = ksocket_getsockopt(ksock, IPPROTO_UDP, + UDP_TUNNEL, &vxl->vxl_utunnel, &len, kcred); + if (vxl->vxl_cstate_err != 0) { + vxl->vxl_cstate = VXLAN_C_FAILED; + goto out; + } + + if (vxl->vxl_utunnel.uto_type != UDP_TUNNEL_VXLAN) { + vxl->vxl_cstate = VXLAN_C_FAILED; + vxl->vxl_cstate_err = -1; + goto out; + } + } + + switch (capab) { + case MAC_CAPAB_HCKSUM: + /* + * XXX Almost certainly some things are going to need the right + * psuedo-header on transmit. + */ + if ((vxl->vxl_utunnel.uto_cksum_flags & (HCKSUM_VXLAN_FULL | + HCKSUM_VXLAN_PSEUDO | HCKSUM_VXLAN_PSEUDO_NO_OL4)) != 0) { + uint32_t *hck = cap_data; + *hck = HCKSUM_IPHDRCKSUM; + if ((vxl->vxl_utunnel.uto_cksum_flags & + HCKSUM_VXLAN_FULL) != 0) { + *hck |= HCKSUM_INET_FULL_V4 | HCKSUM_INET_FULL_V6; + } else if ((vxl->vxl_utunnel.uto_cksum_flags & + (HCKSUM_VXLAN_PSEUDO | + HCKSUM_VXLAN_PSEUDO_NO_OL4)) != 0) { + *hck |= HCKSUM_INET_PARTIAL; + } + hcapab = B_TRUE; + } + break; +#if 0 + case MAC_CAPAB_LSO: + if ((vxl->vxl_utunnel.uto_lso_flags & DLD_LSO_VXLAN_TCP_IPV4) != 0) { + mac_capab_lso_t *lso = cap_data; + lso->lso_flags = LSO_TX_BASIC_TCP_IPV4; + /* XXX Check value */ + lso->lso_basic_tcp_ipv4.lso_max = + vxl->vxl_utunnel.uto_lso_max - 100; + hcapab = B_TRUE; + } + break; +#endif + default: + hcapab = B_FALSE; + break; + } + +out: + mutex_exit(&vxl->vxl_lock); + return (hcapab); +} + static struct overlay_plugin_ops vxlan_o_ops = { 0, vxlan_o_init, @@ -315,7 +416,8 @@ static struct overlay_plugin_ops vxlan_o_ops = { vxlan_o_sockopt, vxlan_o_getprop, vxlan_o_setprop, - vxlan_o_propinfo + vxlan_o_propinfo, + vxlan_o_mac_capab }; static struct modlmisc vxlan_modlmisc = { diff --git a/usr/src/uts/common/mapfiles/mac.mapfile b/usr/src/uts/common/mapfiles/mac.mapfile index d40c09b311..79a465c19b 100644 --- a/usr/src/uts/common/mapfiles/mac.mapfile +++ b/usr/src/uts/common/mapfiles/mac.mapfile @@ -10,7 +10,7 @@ # # -# Copyright (c) 2017, Joyent, Inc. +# Copyright (c) 2018, Joyent, Inc. # # @@ -51,6 +51,7 @@ SYMBOL_SCOPE { mac_rx_ring { FLAGS = EXTERN }; mac_transceiver_info_set_present { FLAGS = EXTERN }; mac_transceiver_info_set_usable { FLAGS = EXTERN }; + mac_tunnel_type_get { FLAGS = EXTERN }; mac_tx_ring_update { FLAGS = EXTERN }; mac_tx_update { FLAGS = EXTERN }; mac_unregister { FLAGS = EXTERN }; diff --git a/usr/src/uts/common/netinet/udp.h b/usr/src/uts/common/netinet/udp.h index 74cff75d43..a775a4fc00 100644 --- a/usr/src/uts/common/netinet/udp.h +++ b/usr/src/uts/common/netinet/udp.h @@ -34,15 +34,28 @@ struct udphdr { #define UDP_EXCLBIND 0x0101 /* for internal use only */ #define UDP_RCVHDR 0x0102 /* for internal use only */ #define UDP_NAT_T_ENDPOINT 0x0103 /* for internal use only */ -#define UDP_SRCPORT_HASH 0x0104 /* for internal use only */ +#define UDP_TUNNEL 0x0104 /* for internal use only */ #define UDP_SND_TO_CONNECTED 0x0105 /* for internal use only */ +#ifdef _KERNEL + /* - * Hash definitions for UDP_SRCPORT_HASH that effectively tell UDP how to go - * handle UDP_SRCPORT_HASH. + * Internal structure defintions for UDP_TUNNEL. */ -#define UDP_HASH_DISABLE 0x0000 /* for internal use only */ -#define UDP_HASH_VXLAN 0x0001 /* for internal use only */ +#define UDP_TUNNEL_VXLAN 1 +#define UDP_TUNNEL_OPT_SRCPORT_HASH 0x01 +#define UDP_TUNNEL_OPT_HWCAP 0x02 +#define UDP_TUNNEL_OPT_RELAX_CKSUM 0x04 + +typedef struct udp_tunnel_opt { + uint32_t uto_type; + uint32_t uto_opts; + uint32_t uto_cksum_flags; + uint32_t uto_lso_flags; + uint32_t uto_lso_max; +} udp_tunnel_opt_t; + +#endif /* _KERNEL */ /* * Following option in UDP_ namespace required to be exposed through diff --git a/usr/src/uts/common/sys/dld.h b/usr/src/uts/common/sys/dld.h index 158a802c4a..3e74f0e03b 100644 --- a/usr/src/uts/common/sys/dld.h +++ b/usr/src/uts/common/sys/dld.h @@ -437,6 +437,8 @@ typedef struct dld_capab_poll_s { */ #define DLD_LSO_BASIC_TCP_IPV4 0x01 /* TCP LSO over IPv4 capability */ #define DLD_LSO_BASIC_TCP_IPV6 0x02 /* TCP LSO over IPv6 capability */ +#define DLD_LSO_VXLAN_TCP_IPV4 0x04 /* TCPv4 LSO encapsulated in VXLAN */ +#define DLD_LSO_VXLAN_TCP_IPV6 0x08 /* TCPv6 LSO encapsulated in VXLAN */ typedef struct dld_capab_lso_s { uint_t lso_flags; /* capability flags */ diff --git a/usr/src/uts/common/sys/dlpi.h b/usr/src/uts/common/sys/dlpi.h index b2fd131066..087b3bbc48 100644 --- a/usr/src/uts/common/sys/dlpi.h +++ b/usr/src/uts/common/sys/dlpi.h @@ -689,14 +689,20 @@ typedef struct { #define HCKSUM_INET_FULL_ICMPV6 0x0040 /* Full 1's complement checksum */ /* ability for IPv6 ICMP packets. */ #define HCKSUM_VXLAN_FULL 0x0080 /* Inner L3/L4 & outer L3/L4 offload */ -#define HCKSUM_VXLAN_FULL_NO_OL4 0x0100 /* Same as HCKSUM_VXLAN_FULL but no */ - /* outer L4 offload */ +#define HCKSUM_VXLAN_PSEUDO 0x0100 /* Inner L3/L4 & outer L3/L4 offload */ + /* L4 requires psuedo header */ +#define HCKSUM_VXLAN_PSEUDO_NO_OL4 0x0200 /* Same as HCKSUM_VXLAN_PSEUDO */ + /* but no outer L4 offload */ #define HCKSUM_ALL_BUT_ENBL (HCKSUM_INET_PARTIAL | \ HCKSUM_INET_FULL_V4 | HCKSUM_INET_FULL_V6 | \ HCKSUM_IPHDRCKSUM | \ HCKSUM_INET_FULL_ICMPV4 | HCKSUM_INET_FULL_ICMPV6 | \ - HCKSUM_VXLAN_FULL | HCKSUM_VXLAN_FULL_NO_OL4) + HCKSUM_VXLAN_FULL | HCKSUM_VXLAN_PSEUDO | \ + HCKSUM_VXLAN_PSEUDO_NO_OL4) + +#define HCKSUM_TUNNEL_VXLAN_OIP (HCKSUM_VXLAN_FULL | HCKSUM_VXLAN_PSEUDO | \ + HCKSUM_VXLAN_PSEUDO_NO_OL4) #define HCKSUM_ALL (HCKSUM_ENABLE | HCKSUM_ALL_BUT_ENBL) diff --git a/usr/src/uts/common/sys/mac_provider.h b/usr/src/uts/common/sys/mac_provider.h index f5c91e7933..e27ba63f2d 100644 --- a/usr/src/uts/common/sys/mac_provider.h +++ b/usr/src/uts/common/sys/mac_provider.h @@ -120,10 +120,16 @@ typedef struct lso_basic_tcp_ipv4_s { t_uscalar_t lso_max; /* maximum payload */ } lso_basic_tcp_ipv4_t; +typedef struct lso_vxlan_tcp { + t_uscalar_t lso_tcpv4_max; /* maximum payload */ + t_uscalar_t lso_tcpv6_max; /* maximum payload */ +} lso_vxlan_tcp_t; + /* * Currently supported flags for LSO. */ -#define LSO_TX_BASIC_TCP_IPV4 0x01 /* TCP LSO capability */ +#define LSO_TX_BASIC_TCP_IPV4 0x01 /* TCPv4 LSO capability */ +#define LSO_TX_VXLAN_TCP 0x02 /* VXLAN LSO capability */ /* * Future LSO capabilities can be added at the end of the mac_capab_lso_t. @@ -136,6 +142,7 @@ typedef struct lso_basic_tcp_ipv4_s { typedef struct mac_capab_lso_s { t_uscalar_t lso_flags; lso_basic_tcp_ipv4_t lso_basic_tcp_ipv4; + lso_vxlan_tcp_t lso_vxlan_tcp; /* Add future lso capabilities here */ } mac_capab_lso_t; @@ -603,6 +610,8 @@ extern void mac_transceiver_info_set_usable( mac_transceiver_info_t *, boolean_t); +extern void mac_tunnel_type_get(const mblk_t *, uint32_t *); + #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/overlay_common.h b/usr/src/uts/common/sys/overlay_common.h index d638096006..b137c24ca5 100644 --- a/usr/src/uts/common/sys/overlay_common.h +++ b/usr/src/uts/common/sys/overlay_common.h @@ -42,7 +42,8 @@ typedef enum overlay_prop_type { OVERLAY_PROP_T_INT = 0x1, /* signed int */ OVERLAY_PROP_T_UINT, /* unsigned int */ OVERLAY_PROP_T_IP, /* sinaddr6 */ - OVERLAY_PROP_T_STRING /* OVERLAY_PROPS_SIZEMAX */ + OVERLAY_PROP_T_STRING, /* OVERLAY_PROPS_SIZEMAX */ + OVERLAY_PROP_T_BOOLEAN /* unsinged int */ } overlay_prop_type_t; typedef enum overlay_prop_prot { diff --git a/usr/src/uts/common/sys/overlay_impl.h b/usr/src/uts/common/sys/overlay_impl.h index 7fb8b8da1d..0240ac3090 100644 --- a/usr/src/uts/common/sys/overlay_impl.h +++ b/usr/src/uts/common/sys/overlay_impl.h @@ -61,6 +61,7 @@ typedef struct overlay_mux { int omux_protocol; /* RO: socket protocol */ struct sockaddr *omux_addr; /* RO: socket address */ socklen_t omux_alen; /* RO: sockaddr len */ + boolean_t omux_strictif; /* RO: strict IF bind */ kmutex_t omux_lock; /* Protects everything below */ uint_t omux_count; /* Active instances */ avl_tree_t omux_devices; /* Tree of devices */ @@ -115,6 +116,7 @@ typedef struct overlay_dev { uint_t odd_txcount; /* protected by odd_lock */ overlay_mux_t *odd_mux; /* protected by odd_lock */ uint64_t odd_vid; /* RO if active else odd_lock */ + boolean_t odd_strictif; /* RO if active else odd_lock */ avl_node_t odd_muxnode; /* managed by mux */ overlay_target_t *odd_target; /* See big theory statement */ char odd_fmamsg[OVERLAY_STATUS_BUFLEN]; /* odd_lock */ @@ -167,7 +169,7 @@ extern void overlay_mux_init(void); extern void overlay_mux_fini(void); extern overlay_mux_t *overlay_mux_open(overlay_plugin_t *, int, int, int, - struct sockaddr *, socklen_t, int *); + struct sockaddr *, socklen_t, boolean_t, int *); extern void overlay_mux_close(overlay_mux_t *); extern void overlay_mux_add_dev(overlay_mux_t *, overlay_dev_t *); extern void overlay_mux_remove_dev(overlay_mux_t *, overlay_dev_t *); diff --git a/usr/src/uts/common/sys/overlay_plugin.h b/usr/src/uts/common/sys/overlay_plugin.h index 07efaa05df..13447808ee 100644 --- a/usr/src/uts/common/sys/overlay_plugin.h +++ b/usr/src/uts/common/sys/overlay_plugin.h @@ -267,12 +267,14 @@ typedef int (*overlay_plugin_init_t)(overlay_handle_t, void **); typedef void (*overlay_plugin_fini_t)(void *); typedef int (*overlay_plugin_socket_t)(void *, int *, int *, int *, struct sockaddr *, socklen_t *); -typedef int (*overlay_plugin_sockopt_t)(ksocket_t); +typedef int (*overlay_plugin_sockopt_t)(ksocket_t, boolean_t); typedef int (*overlay_plugin_getprop_t)(void *, const char *, void *, uint32_t *); typedef int (*overlay_plugin_setprop_t)(void *, const char *, const void *, uint32_t); typedef int (*overlay_plugin_propinfo_t)(const char *, overlay_prop_handle_t); +typedef boolean_t (*overlay_plugin_mac_capab_t)(void *, mac_capab_t, void *, + ksocket_t); typedef struct overlay_plugin_ops { uint_t ovpo_callbacks; @@ -285,6 +287,7 @@ typedef struct overlay_plugin_ops { overlay_plugin_getprop_t ovpo_getprop; overlay_plugin_setprop_t ovpo_setprop; overlay_plugin_propinfo_t ovpo_propinfo; + overlay_plugin_mac_capab_t ovpo_mac_capab; } overlay_plugin_ops_t; typedef struct overlay_plugin_register { @@ -311,7 +314,8 @@ extern int overlay_plugin_unregister(const char *); extern void overlay_prop_set_name(overlay_prop_handle_t, const char *); extern void overlay_prop_set_prot(overlay_prop_handle_t, overlay_prop_prot_t); extern void overlay_prop_set_type(overlay_prop_handle_t, overlay_prop_type_t); -extern int overlay_prop_set_default(overlay_prop_handle_t, void *, ssize_t); +extern int overlay_prop_set_default(overlay_prop_handle_t, const void *, + ssize_t); extern void overlay_prop_set_nodefault(overlay_prop_handle_t); extern void overlay_prop_set_range_uint32(overlay_prop_handle_t, uint32_t, uint32_t); diff --git a/usr/src/uts/common/sys/pattr.h b/usr/src/uts/common/sys/pattr.h index a9d999a11b..6545c5d619 100644 --- a/usr/src/uts/common/sys/pattr.h +++ b/usr/src/uts/common/sys/pattr.h @@ -105,26 +105,39 @@ typedef struct pattr_hcksum_s { /* On Receive: equivalent to */ /* HCK_IPV4_HDRCKSUM_OK for the inner */ /* header */ +#define HCK_INNER_IPV4_HDRCKSUM_NEEDED 0x20 /* On Transmit: equivalent to */ + /* HCK_IPV4_HDRCKSUM; HW calculates */ + /* inner checksum */ #define HCK_INNER_FULLCKSUM_OK 0x40 /* On Transmit: N/A */ /* On Receive: equivalent to */ /* HCK_FULLCKSUM_OK for the inner */ /* header */ -#define HCK_INNER_IPV4_HDRCKSUM_NEEDED 0x80 /* On Transmit: equivalent to */ - /* HCK_IPV4_HDRCKSUM; HW calculates */ - /* inner checksum */ - -#define HCK_INNER_FULLCKSUM_NEEDED 0x100 /* On Transmit: equivalent to */ +#define HCK_INNER_FULLCKSUM_NEEDED 0x40 /* On Transmit: equivalent to */ /* HCK_FULLCKSUM; HW calculates inner */ /* L4 header checksum. */ -#define HCK_FLAGS (HCK_IPV4_HDRCKSUM | HCK_PARTIALCKSUM | \ - HCK_FULLCKSUM | HCK_FULLCKSUM_OK | \ - HCK_INNER_IPV4_HDRCKSUM_OK | \ +#define HCK_INNER_PSEUDO_NEEDED 0x80 /* On Transmit: offload */ + /* of the inner TCP/UDP header, but */ + /* requires that the pseudo-header */ + /* is filled in the checksum. Like */ + /* HCK_PARTIALCKSUM, but no fields */ + /* saved */ + +#define HCK_INNER_FLAGS_NEEDED (HCK_INNER_IPV4_HDRCKSUM_NEEDED | \ + HCK_INNER_FULLCKSUM_NEEDED | \ + HCK_INNER_PSEUDO_NEEDED) + +#define HCK_INNER_FLAGS (HCK_INNER_IPV4_HDRCKSUM_OK | \ HCK_INNER_FULLCKSUM_OK | \ - HCK_INNER_IPV4_HDRCKSUM_NEEDED | \ - HCK_INNER_FULLCKSUM_NEEDED) + HCK_INNER_FLAGS_NEEDED) + +#define HCK_OUTER_FLAGS (HCK_IPV4_HDRCKSUM | HCK_PARTIALCKSUM | \ + HCK_FULLCKSUM | HCK_FULLCKSUM_OK) + +#define HCK_FLAGS (HCK_INNER_FLAGS | HCK_OUTER_FLAGS) + /* * Extended hardware offloading flags that also use hcksum_flags */ @@ -134,6 +147,15 @@ typedef struct pattr_hcksum_s { #define HW_LSO_FLAGS HW_LSO /* All LSO flags, currently only one */ /* + * The upper three bits are used to indicate if the packet has any known + * tunneling information. + */ +#define TTYPE_MASK 0xe000 +#define TTYPE_SHIFT 13 +#define TTYPE_NONE 0x00 +#define TTYPE_VXLAN 0x01 + +/* * Structure used for zerocopy attribute. */ typedef struct pattr_zcopy_s { diff --git a/usr/src/uts/common/sys/strsubr.h b/usr/src/uts/common/sys/strsubr.h index 0f29dd3675..54d39389f0 100644 --- a/usr/src/uts/common/sys/strsubr.h +++ b/usr/src/uts/common/sys/strsubr.h @@ -1345,6 +1345,7 @@ extern int SAMESTR(queue_t *); #define DB_CKSUM32(mp) ((mp)->b_datap->db_cksum32) #define DB_LSOFLAGS(mp) ((mp)->b_datap->db_struioun.cksum.flags) #define DB_LSOMSS(mp) ((mp)->b_datap->db_struioun.cksum.pad) +#define DB_TTYPEFLAGS(mp) ((mp)->b_datap->db_struioun.cksum.flags) #endif /* _KERNEL */ #ifdef __cplusplus |
