diff options
author | Robert Mustacchi <rm@joyent.com> | 2018-03-28 23:20:07 +0000 |
---|---|---|
committer | Robert Mustacchi <rm@joyent.com> | 2018-04-02 16:52:13 +0000 |
commit | 9dc0f5536a83ce4ef09e0009beec208ac83f0a75 (patch) | |
tree | e27546e6376142b460df5081c90a7ebd09859472 | |
parent | 51e13f4784dca9f0e910f3d0fb85b659ad68ceb2 (diff) | |
download | illumos-joyent-netperf.tar.gz |
OS-XXXX wip vxlan lsonetperf
-rw-r--r-- | usr/src/uts/common/inet/ip.h | 2 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ip/ip_if.c | 16 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ip/ip_output.c | 15 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ip_if.h | 2 | ||||
-rw-r--r-- | usr/src/uts/common/inet/udp/udp.c | 182 | ||||
-rw-r--r-- | usr/src/uts/common/inet/udp_impl.h | 4 | ||||
-rw-r--r-- | usr/src/uts/common/io/dld/dld_proto.c | 22 | ||||
-rw-r--r-- | usr/src/uts/common/io/i40e/i40e_gld.c | 7 | ||||
-rw-r--r-- | usr/src/uts/common/io/i40e/i40e_stats.c | 19 | ||||
-rw-r--r-- | usr/src/uts/common/io/i40e/i40e_sw.h | 6 | ||||
-rw-r--r-- | usr/src/uts/common/io/i40e/i40e_transceiver.c | 227 | ||||
-rw-r--r-- | usr/src/uts/common/io/overlay/overlay.c | 11 | ||||
-rw-r--r-- | usr/src/uts/common/io/overlay/plugins/overlay_vxlan.c | 9 | ||||
-rw-r--r-- | usr/src/uts/common/netinet/udp.h | 2 | ||||
-rw-r--r-- | usr/src/uts/common/sys/dld.h | 7 | ||||
-rw-r--r-- | usr/src/uts/common/sys/mac_provider.h | 8 |
16 files changed, 455 insertions, 84 deletions
diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h index 733c65ea29..ee014edda8 100644 --- a/usr/src/uts/common/inet/ip.h +++ b/usr/src/uts/common/inet/ip.h @@ -1596,6 +1596,8 @@ struct ill_zerocopy_capab_s { struct ill_lso_capab_s { uint_t ill_lso_flags; /* capabilities */ uint_t ill_lso_max; /* maximum size of payload */ + uint_t ill_lso_vxlan_cksum; + uint_t ill_lso_vxlan_tcp_max; }; /* diff --git a/usr/src/uts/common/inet/ip/ip_if.c b/usr/src/uts/common/inet/ip/ip_if.c index 917e526bb1..b1bb4abbc3 100644 --- a/usr/src/uts/common/inet/ip/ip_if.c +++ b/usr/src/uts/common/inet/ip/ip_if.c @@ -2096,6 +2096,8 @@ ill_capability_lso_enable(ill_t *ill) DLD_ENABLE)) == 0) { ill->ill_lso_capab->ill_lso_flags = lso.lso_flags; ill->ill_lso_capab->ill_lso_max = lso.lso_max; + ill->ill_lso_capab->ill_lso_vxlan_cksum = lso.lso_vxlan_cksum; + ill->ill_lso_capab->ill_lso_vxlan_tcp_max = lso.lso_vxlan_tcp_max; ill->ill_capabilities |= ILL_CAPAB_LSO; ip1dbg(("ill_capability_lso_enable: interface %s " "has enabled LSO\n ", ill->ill_name)); @@ -19212,8 +19214,7 @@ out: } int -ip_bindif_hwcaps(conn_t *connp, uint_t *hckflags, uint_t *lsoflags, - uint_t *lsomax) +ip_bindif_hwcaps(conn_t *connp, uint_t *hckflags, ill_lso_capab_t *lso) { in6_addr_t laddrv6; in_addr_t laddrv4; @@ -19222,8 +19223,7 @@ ip_bindif_hwcaps(conn_t *connp, uint_t *hckflags, uint_t *lsoflags, ip_stack_t *ipst; int ret; - if (connp == NULL || hckflags == NULL || lsoflags == NULL || - lsomax == NULL) { + if (connp == NULL || hckflags == NULL || lso == NULL) { return (EINVAL); } @@ -19270,12 +19270,10 @@ ip_bindif_hwcaps(conn_t *connp, uint_t *hckflags, uint_t *lsoflags, * to deal with getting updates. */ if (ILL_LSO_USABLE(ipif->ipif_ill)) { - ill_lso_capab_t *lsop = ipif->ipif_ill->ill_lso_capab; - *lsoflags = lsop->ill_lso_flags; - *lsomax = lsop->ill_lso_max; + bcopy(ipif->ipif_ill->ill_lso_capab, lso, + sizeof (ill_lso_capab_t)); } else { - *lsoflags = 0; - *lsomax = 0; + bzero(lso, sizeof (ill_lso_capab_t)); } if (ILL_HCKSUM_CAPABLE(ipif->ipif_ill)) { diff --git a/usr/src/uts/common/inet/ip/ip_output.c b/usr/src/uts/common/inet/ip/ip_output.c index 690f39e0dc..05c9115e03 100644 --- a/usr/src/uts/common/inet/ip/ip_output.c +++ b/usr/src/uts/common/inet/ip/ip_output.c @@ -1721,13 +1721,6 @@ ip_output_cksum_v4(iaflags_t ixaflags, mblk_t *mp, ipha_t *ipha, } /* - * If we've been asked to skip the ULP checksum, then just let IP do its - * business. - */ - if ((ixa->ixa_flags & IXAF_SKIP_ULP_CKSUM) != 0) - goto ip_hdr_cksum; - - /* * Calculate ULP checksum. Note that we don't use cksump and cksum * if the ill has FULL support. */ @@ -1795,6 +1788,14 @@ ip_output_cksum_v4(iaflags_t ixaflags, mblk_t *mp, ipha_t *ipha, can_partial = (hck_flags & HCKSUM_INET_PARTIAL) != 0; } DB_CKSUMFLAGS(mp) &= ~HCK_OUTER_FLAGS; + + if ((ixa->ixa_flags & IXAF_SKIP_ULP_CKSUM) != 0 && can_inet) { + DB_CKSUMFLAGS(mp) |= HCK_IPV4_HDRCKSUM; + *cksump = 0; + ipha->ipha_hdr_checksum = 0; + return (B_TRUE); + } + if (can_full) { /* * Hardware calculates pseudo-header, header and the diff --git a/usr/src/uts/common/inet/ip_if.h b/usr/src/uts/common/inet/ip_if.h index e705b6ee08..7e45f42183 100644 --- a/usr/src/uts/common/inet/ip_if.h +++ b/usr/src/uts/common/inet/ip_if.h @@ -493,7 +493,7 @@ extern int ipif_arp_up(ipif_t *, enum ip_resolver_action, boolean_t); extern void ipif_dup_recovery(void *); extern void ipif_do_recovery(ipif_t *); -extern int ip_bindif_hwcaps(conn_t *, uint_t *, uint_t *, uint_t *); +extern int ip_bindif_hwcaps(conn_t *, uint_t *, ill_lso_capab_t *); extern int ip_bindif_ifindex(conn_t *, uint_t *); /* diff --git a/usr/src/uts/common/inet/udp/udp.c b/usr/src/uts/common/inet/udp/udp.c index 1222c68e83..b8ad88635f 100644 --- a/usr/src/uts/common/inet/udp/udp.c +++ b/usr/src/uts/common/inet/udp/udp.c @@ -553,6 +553,49 @@ udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) } /* + * This function determines whether or not we have a tunneled TSO packet and + * takes care of setting up the things that we need to deal with this. Snapshot + * the needed data under the conn_lock to make sure that this is safe. We need + * to make sure that we do the following: + * + * 1. Use the UDP mss to set the fragment size + * 2. Use the mblk_t mss to set the extra ident + * + * Calculating the amount of extra ident here is a bit annoying. We have some + * number of headers that need to be taken into account that will be in each + * message block. To avoid reaching into the inner TCP data and figuring out + * what the actual length is, we'll approximate this by just taking the total + * message length, dividing it by the mss and adding two. One is to round up and + * then the additional one is to account for anything that the headers throw + * off. + */ +static boolean_t +udp_setup_tunnel_lso(udp_t *udp, ip_xmit_attr_t *ixa, mblk_t *mp) +{ + conn_t *connp = udp->udp_connp; + uint32_t udpmss, mpmss; + + if ((DB_LSOFLAGS(mp) & HW_LSO) == 0) + return (B_FALSE); + + mpmss = DB_LSOMSS(mp); + ASSERT3U(mpmss, !=, 0); + + mutex_enter(&connp->conn_lock); + if (udp->udp_tunnel == 0 || udp->udp_tunnel_tso == 0) { + mutex_exit(&connp->conn_lock); + return (B_FALSE); + } + udpmss = udp->udp_tso_mss; + mutex_exit(&connp->conn_lock); + + ixa->ixa_fragsize = udpmss; + ixa->ixa_extra_ident = msgsize(mp) / mpmss + 2; + + return (B_TRUE); +} + +/* * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message * passed to udp_wput. * It associates a port number and local address with the stream. @@ -1572,10 +1615,71 @@ udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) return (B_TRUE); } +/* + * Based on the UDP socket in question, determine whether or not we can perform + * hardware checksuming feature. The constraints are as follows: + * + * - If hardware supports checksumming both the inner and outer L4 header, + * then we can. + * + * - If hardware only supports the inner L4 header, then we can if we're on an + * IPv4 socket and the client has requested relaxed checksumming (e.g. no UDP + * checksum). + * + * This returns the type of checksum that we'll be performing in the same + * constants that the LSO logic uses to make it easier to compare what works and + * doesn't work there. + */ +static int +udp_can_vxlan_checksum(udp_t *udp, uint_t cksum, boolean_t relax) +{ + if ((cksum & HCKSUM_VXLAN_FULL) != 0) + return (DLD_LSO_VXLAN_OUDP_CSUM_FULL); + + if ((cksum & HCKSUM_VXLAN_PSEUDO) != 0) + return (DLD_LSO_VXLAN_OUDP_CSUM_PSEUDO); + + if (udp->udp_connp->conn_ipversion != IPV4_VERSION) + return (-1); + + if ((cksum & HCKSUM_VXLAN_PSEUDO_NO_OL4) != 0 && relax) + return (DLD_LSO_VXLAN_OUDP_CSUM_NONE); + + return (-1); +} + +/* + * Based on the hardware checksum and LSO capabilties, determine if we can + * support VXLAN based TSO. The main gotcha here is whether or not the hardware + * supports the checksumming features we need for TSO. See + * udp_can_vxlan_checksum for the requirements for checksumming. + */ +static boolean_t +udp_can_vxlan_tso(udp_t *udp, uint_t cksum, ill_lso_capab_t *lso, + boolean_t relax) +{ + int cktype; + + /* + * If we can't perform checksum offload, then we can't do anything. + */ + if ((cktype = udp_can_vxlan_checksum(udp, cksum, relax)) == -1) + return (B_FALSE); + + if ((lso->ill_lso_flags & + (DLD_LSO_VXLAN_TCP_IPV4 | DLD_LSO_VXLAN_TCP_IPV6)) == 0) + return (B_FALSE); + + if (cktype != lso->ill_lso_vxlan_cksum) + return (B_FALSE); + + return (B_TRUE); +} + static int udp_do_opt_tunnel_get(conn_t *connp, udp_t *udp, udp_tunnel_opt_t *optp) { - uint_t hck, lso, mss; + boolean_t relax; mutex_enter(&connp->conn_lock); bzero(optp, sizeof (udp_tunnel_opt_t)); @@ -1596,18 +1700,31 @@ udp_do_opt_tunnel_get(conn_t *connp, udp_t *udp, udp_tunnel_opt_t *optp) if (udp->udp_skip_cksum != 0) { optp->uto_opts |= UDP_TUNNEL_OPT_RELAX_CKSUM; + relax = B_TRUE; + } else { + relax = B_FALSE; } mutex_exit(&connp->conn_lock); if ((optp->uto_opts & UDP_TUNNEL_OPT_HWCAP) != 0) { - if (ip_bindif_hwcaps(connp, &hck, &lso, &mss) != 0) + uint_t hck; + ill_lso_capab_t lso; + + bzero(&lso, sizeof (lso)); + if (ip_bindif_hwcaps(connp, &hck, &lso) != 0) return (-1); optp->uto_type = UDP_TUNNEL_VXLAN; - optp->uto_cksum_flags = hck; - optp->uto_lso_flags = lso; - optp->uto_lso_max = mss; + if (udp_can_vxlan_checksum(udp, hck, relax) != -1) { + optp->uto_cksum_flags = hck; + } + + if (udp_can_vxlan_tso(udp, hck, &lso, relax)) { + optp->uto_lso_flags = lso.ill_lso_flags & + (DLD_LSO_VXLAN_TCP_IPV4 | DLD_LSO_VXLAN_TCP_IPV6); + optp->uto_lso_tcp_max = lso.ill_lso_vxlan_tcp_max; + } } return (sizeof (udp_tunnel_opt_t)); @@ -1790,18 +1907,19 @@ udp_do_opt_tunnel_set(conn_opt_arg_t *coa, cred_t *cr, udp_tunnel_opt_t *optp) } /* - * Set the fact that this is tunneled. We'll leave actually fetching the - * information to the getsockopt. + * Set the fact that this is tunneled. We want to do this before we + * potentially drop the conn_lock when looking at the HWCAP option so we + * can make sure that we're safe against a concurrent setsockopt(). */ udp->udp_tunnel = 1; - /* - * We trust that the caller has asked for strict binding. - */ if ((optp->uto_opts & UDP_TUNNEL_OPT_HWCAP) != 0) { uint_t ifindex; int ret; t_scalar_t proto, cmd; + boolean_t can_relax; + uint_t hck; + ill_lso_capab_t lso; if (connp->conn_ipversion == IPV4_VERSION) { proto = IPPROTO_IP; @@ -1816,6 +1934,9 @@ udp_do_opt_tunnel_set(conn_opt_arg_t *coa, cred_t *cr, udp_tunnel_opt_t *optp) * Try and set up the strict binding to the listen interface. */ if ((ret = ip_bindif_ifindex(connp, &ifindex)) != 0) { + mutex_enter(&connp->conn_lock); + udp->udp_tunnel = 0; + mutex_exit(&connp->conn_lock); return (ret); } @@ -1828,8 +1949,25 @@ udp_do_opt_tunnel_set(conn_opt_arg_t *coa, cred_t *cr, udp_tunnel_opt_t *optp) return (ret); } + /* + * XXX We should be setting up a change listener here. + */ + bzero(&lso, sizeof (lso)); + if ((ret = ip_bindif_hwcaps(connp, &hck, &lso)) != 0) { + mutex_enter(&connp->conn_lock); + udp->udp_tunnel = 0; + mutex_exit(&connp->conn_lock); + return (ret); + } + mutex_enter(&connp->conn_lock); udp->udp_tunnel_hwcap = 1; + + can_relax = (optp->uto_opts & UDP_TUNNEL_OPT_RELAX_CKSUM) != 0; + if (udp_can_vxlan_tso(udp, hck, &lso, can_relax)) { + udp->udp_tunnel_tso = 1; + udp->udp_tso_mss = lso.ill_lso_vxlan_tcp_max; + } } if ((optp->uto_opts & UDP_TUNNEL_OPT_SRCPORT_HASH) != 0) { @@ -2905,7 +3043,8 @@ retry: * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from * the TPI options, otherwise we take them from msg_control. * If both sin and sin6 is set it is a connected socket and we use conn_faddr. - * Always consumes mp; never consumes tudr_mp. + * Always consumes mp; never consumes tudr_mp. Kernel UDP tunnels do not use + * this path and therefore this does not perform any TSO checks. */ static int udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp, @@ -3223,6 +3362,7 @@ udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid) udp_stack_t *us = udp->udp_us; int error; ip_xmit_attr_t *ixa; + boolean_t lso; /* * If no other thread is using conn_ixa this just gets a reference to @@ -3319,6 +3459,8 @@ udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid) } ASSERT(ixa->ixa_ire != NULL); + lso = udp_setup_tunnel_lso(udp, ixa, mp); + /* We're done. Pass the packet to ip. */ UDPS_BUMP_MIB(us, udpHCOutDatagrams); @@ -3346,6 +3488,10 @@ udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid) ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); ixa->ixa_cred = connp->conn_cred; /* Restore */ ixa->ixa_cpid = connp->conn_cpid; + if (lso) { + ixa->ixa_fragsize = ixa->ixa_pmtu; + ixa->ixa_extra_ident = 0; + } ixa_refrele(ixa); return (error); } @@ -3363,6 +3509,7 @@ udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid, udp_t *udp = connp->conn_udp; udp_stack_t *us = udp->udp_us; int error; + boolean_t lso; ASSERT(MUTEX_HELD(&connp->conn_lock)); ASSERT(ixa != NULL); @@ -3449,6 +3596,8 @@ udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid, mutex_exit(&connp->conn_lock); } + lso = udp_setup_tunnel_lso(udp, ixa, mp); + /* We're done. Pass the packet to ip. */ UDPS_BUMP_MIB(us, udpHCOutDatagrams); @@ -3489,6 +3638,10 @@ udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid, ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); ixa->ixa_cred = connp->conn_cred; /* Restore */ ixa->ixa_cpid = connp->conn_cpid; + if (lso) { + ixa->ixa_fragsize = ixa->ixa_pmtu; + ixa->ixa_extra_ident = 0; + } ixa_refrele(ixa); return (error); } @@ -4022,6 +4175,7 @@ udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6, in6_addr_t v6dst; in6_addr_t v6nexthop; in_port_t dstport; + boolean_t lso; ASSERT(MUTEX_HELD(&connp->conn_lock)); ASSERT(ixa != NULL); @@ -4284,6 +4438,8 @@ udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6, goto ud_error; } + lso = udp_setup_tunnel_lso(udp, ixa, data_mp); + /* We're done. Pass the packet to ip. */ UDPS_BUMP_MIB(us, udpHCOutDatagrams); @@ -4324,6 +4480,10 @@ udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6, ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); ixa->ixa_cred = connp->conn_cred; /* Restore */ ixa->ixa_cpid = connp->conn_cpid; + if (lso) { + ixa->ixa_fragsize = ixa->ixa_pmtu; + ixa->ixa_extra_ident = 0; + } ixa_refrele(ixa); return (error); diff --git a/usr/src/uts/common/inet/udp_impl.h b/usr/src/uts/common/inet/udp_impl.h index 3fccefb119..24978335ad 100644 --- a/usr/src/uts/common/inet/udp_impl.h +++ b/usr/src/uts/common/inet/udp_impl.h @@ -186,8 +186,10 @@ typedef struct udp_s { udp_tunnel: 1, /* UDP_TUNNEL called */ udp_tunnel_hwcap: 1, /* UDP_TUNNEL asked for strict bind */ udp_skip_cksum: 1, /* UDP_TUNNEL asked for no checksum */ + udp_tunnel_tso: 1, /* UDP tunnel traffic can perform TSO */ - udp_pad_to_bit_31 : 25; + udp_pad_to_bit_31 : 23; + uint32_t udp_tso_mss; /* TSO MSS for tunneled traffic */ /* Following 2 fields protected by the uf_lock */ struct udp_s *udp_bind_hash; /* Bind hash chain */ diff --git a/usr/src/uts/common/io/dld/dld_proto.c b/usr/src/uts/common/io/dld/dld_proto.c index d6f57091b4..56de77bc60 100644 --- a/usr/src/uts/common/io/dld/dld_proto.c +++ b/usr/src/uts/common/io/dld/dld_proto.c @@ -1493,6 +1493,17 @@ dld_capab_poll(dld_str_t *dsp, void *data, uint_t flags) return (ENOTSUP); } +static void +dld_capab_lso_fill_vxlan(mac_capab_lso_t *mac, dld_capab_lso_t *lso) +{ + lso->lso_vxlan_cksum = mac->lso_vxlan_tcp.lso_oudp_cksum; + + if (mac->lso_vxlan_tcp.lso_tcp_max > 0) { + lso->lso_flags |= DLD_LSO_VXLAN_TCP_IPV4 | DLD_LSO_VXLAN_TCP_IPV6; + lso->lso_vxlan_tcp_max = mac->lso_vxlan_tcp.lso_tcp_max; + } +} + static int dld_capab_lso(dld_str_t *dsp, void *data, uint_t flags) { @@ -1514,14 +1525,9 @@ dld_capab_lso(dld_str_t *dsp, void *data, uint_t flags) /* translate the flag for mac clients */ if ((mac_lso.lso_flags & LSO_TX_BASIC_TCP_IPV4) != 0) lso->lso_flags |= DLD_LSO_BASIC_TCP_IPV4; - /* XXX We should probably not rely on equality */ - if ((mac_lso.lso_flags & LSO_TX_VXLAN_TCP) != 0 && - mac_lso.lso_vxlan_tcp.lso_tcpv4_max == lso->lso_max) { - lso->lso_flags |= DLD_LSO_VXLAN_TCP_IPV4; - } - if ((mac_lso.lso_flags & LSO_TX_VXLAN_TCP) != 0 && - mac_lso.lso_vxlan_tcp.lso_tcpv6_max == lso->lso_max) { - lso->lso_flags |= DLD_LSO_VXLAN_TCP_IPV6; + + if ((mac_lso.lso_flags & LSO_TX_VXLAN_TCP) != 0) { + dld_capab_lso_fill_vxlan(&mac_lso, lso); } dsp->ds_lso = B_TRUE; dsp->ds_lso_max = lso->lso_max; diff --git a/usr/src/uts/common/io/i40e/i40e_gld.c b/usr/src/uts/common/io/i40e/i40e_gld.c index 5e488c4606..c59988601e 100644 --- a/usr/src/uts/common/io/i40e/i40e_gld.c +++ b/usr/src/uts/common/io/i40e/i40e_gld.c @@ -747,8 +747,13 @@ i40e_m_getcapab(void *arg, mac_capab_t cap, void *cap_data) mac_capab_lso_t *cap_lso = cap_data; if (i40e->i40e_tx_lso_enable == B_TRUE) { - cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4; + cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4 | + LSO_TX_VXLAN_TCP; cap_lso->lso_basic_tcp_ipv4.lso_max = I40E_LSO_MAXLEN; + /* XXX This is not the case for the X722 */ + cap_lso->lso_vxlan_tcp.lso_oudp_cksum = + LSO_VXLAN_OUDP_CSUM_NONE; + cap_lso->lso_vxlan_tcp.lso_tcp_max = I40E_LSO_MAXLEN; } else { return (B_FALSE); } diff --git a/usr/src/uts/common/io/i40e/i40e_stats.c b/usr/src/uts/common/io/i40e/i40e_stats.c index 7a4f0faedd..44be3749c4 100644 --- a/usr/src/uts/common/io/i40e/i40e_stats.c +++ b/usr/src/uts/common/io/i40e/i40e_stats.c @@ -1234,6 +1234,10 @@ i40e_stats_trqpair_init(i40e_trqpair_t *itrq) kstat_named_init(&tsp->itxs_hck_meoifail, "tx_hck_meoifail", KSTAT_DATA_UINT64); tsp->itxs_hck_meoifail.value.ui64 = 0; + kstat_named_init(&tsp->itxs_hck_badttype, "tx_hck_bad_tunnel_type", + KSTAT_DATA_UINT64); + tsp->itxs_hck_badttype.value.ui64 = 0; + kstat_named_init(&tsp->itxs_hck_nol2info, "tx_hck_nol2info", KSTAT_DATA_UINT64); tsp->itxs_hck_nol2info.value.ui64 = 0; @@ -1243,12 +1247,27 @@ i40e_stats_trqpair_init(i40e_trqpair_t *itrq) kstat_named_init(&tsp->itxs_hck_nol4info, "tx_hck_nol4info", KSTAT_DATA_UINT64); tsp->itxs_hck_nol4info.value.ui64 = 0; + kstat_named_init(&tsp->itxs_hck_notunl2info, "tx_hck_notunl2info", + KSTAT_DATA_UINT64); + tsp->itxs_hck_notunl2info.value.ui64 = 0; + kstat_named_init(&tsp->itxs_hck_notunl3info, "tx_hck_notunl3info", + KSTAT_DATA_UINT64); + tsp->itxs_hck_notunl3info.value.ui64 = 0; + kstat_named_init(&tsp->itxs_hck_notunl4info, "tx_hck_notunl4info", + KSTAT_DATA_UINT64); + tsp->itxs_hck_notunl4info.value.ui64 = 0; kstat_named_init(&tsp->itxs_hck_badl3, "tx_hck_badl3", KSTAT_DATA_UINT64); tsp->itxs_hck_badl3.value.ui64 = 0; kstat_named_init(&tsp->itxs_hck_badl4, "tx_hck_badl4", KSTAT_DATA_UINT64); tsp->itxs_hck_badl4.value.ui64 = 0; + kstat_named_init(&tsp->itxs_hck_outer, "tx_hck_outer", + KSTAT_DATA_UINT64); + tsp->itxs_hck_outer.value.ui64 = 0; + kstat_named_init(&tsp->itxs_hck_badtso, "tx_hck_badtso", + KSTAT_DATA_UINT64); + tsp->itxs_hck_badtso.value.ui64 = 0; kstat_named_init(&tsp->itxs_err_notcb, "tx_err_notcb", KSTAT_DATA_UINT64); tsp->itxs_err_notcb.value.ui64 = 0; diff --git a/usr/src/uts/common/io/i40e/i40e_sw.h b/usr/src/uts/common/io/i40e/i40e_sw.h index 87c4421971..0beb4900b2 100644 --- a/usr/src/uts/common/io/i40e/i40e_sw.h +++ b/usr/src/uts/common/io/i40e/i40e_sw.h @@ -536,11 +536,17 @@ typedef struct i40e_txq_stat { * Various failure conditions. */ kstat_named_t itxs_hck_meoifail; /* ether offload failures */ + kstat_named_t itxs_hck_badttype; /* bad tunnel type */ kstat_named_t itxs_hck_nol2info; /* Missing l2 info */ kstat_named_t itxs_hck_nol3info; /* Missing l3 info */ kstat_named_t itxs_hck_nol4info; /* Missing l4 info */ + kstat_named_t itxs_hck_notunl2info; /* Missing tunnel l2 info */ + kstat_named_t itxs_hck_notunl3info; /* Missing tunnel l3 info */ + kstat_named_t itxs_hck_notunl4info; /* Missing tunnel l4 info */ kstat_named_t itxs_hck_badl3; /* Not IPv4/IPv6 */ kstat_named_t itxs_hck_badl4; /* Bad L4 Paylaod */ + kstat_named_t itxs_hck_outer; /* requested outer tunnel */ + kstat_named_t itxs_hck_badtso; /* Bad checksums for TSO */ kstat_named_t itxs_err_notcb; /* No tcb's available */ kstat_named_t itxs_err_nodescs; /* No tcb's available */ diff --git a/usr/src/uts/common/io/i40e/i40e_transceiver.c b/usr/src/uts/common/io/i40e/i40e_transceiver.c index 2d177b6c71..69a4f94242 100644 --- a/usr/src/uts/common/io/i40e/i40e_transceiver.c +++ b/usr/src/uts/common/io/i40e/i40e_transceiver.c @@ -1682,13 +1682,15 @@ i40e_ring_rx_poll(void *arg, int poll_bytes) * consider adding this to MAC. */ typedef enum mac_ether_offload_flags { - MEOI_L2INFO_SET = 0x01, - MEOI_VLAN_TAGGED = 0x02, - MEOI_L3INFO_SET = 0x04, - MEOI_L3CKSUM_SET = 0x08, - MEOI_L4INFO_SET = 0x10, - MEOI_L4CKSUM_SET = 0x20, - MEOI_TUNNEL_INFO_SET = 0x40, + MEOI_L2INFO_SET = 0x001, + MEOI_VLAN_TAGGED = 0x002, + MEOI_L3INFO_SET = 0x004, + MEOI_L4INFO_SET = 0x010, + MEOI_TUN_INFO_SET = 0x020, + MEOI_TUN_L2INFO_SET = 0x040, + MEOI_TUN_VLAN_TAGGED = 0x080, + MEOI_TUN_L3INFO_SET = 0x100, + MEOI_TUN_L4INFO_SET = 0x200, } mac_ether_offload_flags_t; #define MEOI_L2_L3_L4 (MEOI_L2INFO_SET | MEOI_L3INFO_SET | MEOI_L4INFO_SET) @@ -1709,13 +1711,6 @@ typedef struct mac_ether_offload_info { uint8_t meoi_tun_l3hlen; /* How long is the header? */ uint8_t meoi_tun_l4proto; /* What is the payload type? */ uint8_t meoi_tun_l4hlen; /* How long is the L4 header */ - /* - * The following members are currently not used - */ - mblk_t *meoi_l3ckmp; /* Which mblk has the l3 checksum */ - off_t meoi_l3ckoff; /* What's the offset to it */ - mblk_t *meoi_l4ckmp; /* Which mblk has the L4 checksum */ - off_t meoi_l4ckoff; /* What is the offset to it? */ } mac_ether_offload_info_t; /* @@ -1791,6 +1786,41 @@ i40e_meoi_get_uint16(mblk_t *mp, off_t off, uint16_t *out) } static int +i40e_meoi_zero_uint16(mblk_t *mp, off_t off) +{ + size_t mpsize; + uint8_t *bp; + + mpsize = msgsize(mp); + /* Check for overflow */ + if (off + sizeof (uint16_t) > mpsize) + return (-1); + + mpsize = MBLKL(mp); + while (off >= mpsize) { + mp = mp->b_cont; + off -= mpsize; + mpsize = MBLKL(mp); + } + + /* + * Data is in network order. Note the second byte of data might be in + * the next mp. + */ + bp = mp->b_rptr + off; + *bp = 0; + if (off + 1 == mpsize) { + mp = mp->b_cont; + bp = mp->b_rptr; + } else { + bp++; + } + *bp = 0; + + return (0); +} + +static int mac_ether_offload_info(mblk_t *mp, mac_ether_offload_info_t *meoi, uint32_t ttype, size_t starting_off) { @@ -1895,23 +1925,128 @@ mac_ether_offload_info(mblk_t *mp, mac_ether_offload_info_t *meoi, if (ret != 0) return (ret); - if ((meo.meoi_flags & MEOI_L2_L3_L4) != MEOI_L2_L3_L4) - return (-1); - meoi->meoi_tun_protlen = VXLAN_HDR_LEN; - meoi->meoi_tun_l2hlen = meo.meoi_l2hlen; - meoi->meoi_tun_l3proto = meo.meoi_l3proto; - meoi->meoi_tun_l3hlen = meo.meoi_l3hlen; - meoi->meoi_tun_l4proto = meo.meoi_l4proto; - meoi->meoi_tun_l4hlen = meo.meoi_l4hlen; + meoi->meoi_flags |= MEOI_TUN_INFO_SET; + + if (meo.meoi_flags & MEOI_L2INFO_SET) { + meoi->meoi_flags |= MEOI_TUN_L2INFO_SET; + meoi->meoi_tun_l2hlen = meo.meoi_l2hlen; + } + + if (meo.meoi_flags & MEOI_VLAN_TAGGED) { + meoi->meoi_flags |= MEOI_TUN_VLAN_TAGGED; + } + + if (meo.meoi_flags & MEOI_L3INFO_SET) { + meoi->meoi_flags |= MEOI_TUN_L3INFO_SET; + meoi->meoi_tun_l3proto = meo.meoi_l3proto; + meoi->meoi_tun_l3hlen = meo.meoi_l3hlen; + } + + if (meo.meoi_flags & MEOI_L4INFO_SET) { + meoi->meoi_flags |= MEOI_TUN_L4INFO_SET; + meoi->meoi_tun_l4proto = meo.meoi_l4proto; + meoi->meoi_tun_l4hlen = meo.meoi_l4hlen; + } - meoi->meoi_flags |= MEOI_TUNNEL_INFO_SET; } return (0); } /* + * Determine if we have sufficient checksum flags to perform TSO. This varies + * based on the tunnel type. If we have normal TSO traffic, we need both the + * IPv4 header checksum and the L4 checksum. For VXLAN encoded traffic, we need + * the outer IPv4 checksum and inner checksums. + * + * At this time the networking stack only supports TSO on IPv4 and the X710 + * hardware can't support VXLAN aware TSO on IPv6 due to the fact that it can't + * perform the UDP checksum. + */ +static inline boolean_t +i40e_tx_tso_have_cksums(uint32_t chkflags, uint32_t ttype) +{ + if (ttype == TTYPE_NONE) { + if ((chkflags & HCK_IPV4_HDRCKSUM) == 0) + return (B_FALSE); + if ((chkflags & HCK_PARTIALCKSUM) == 0) + return (B_FALSE); + } else if (ttype == TTYPE_VXLAN) { + if ((chkflags & HCK_IPV4_HDRCKSUM) == 0) + return (B_FALSE); + /* + * We can't perform LSO if we need an outer checksum, so that's + * an error. + */ + if ((chkflags & HCK_PARTIALCKSUM) != 0) + return (B_FALSE); + /* + * When the networking stack supports TSO over IPv6, this check + * will need to be conditional on protocol. + */ + if ((chkflags & HCK_INNER_IPV4_HDRCKSUM_NEEDED) == 0) + return (B_FALSE); + if ((chkflags & HCK_INNER_PSEUDO_NEEDED) == 0) + return (B_FALSE); + } else { + return (B_FALSE); + } + + return (B_TRUE); +} + +/* + * Fix up the message block for TSO to match what hardware expects. The hardware + * requires that the length and checksum for all IP headers be zero. It requires + * that the outer UDP checksum be zero and that the length field be zero. The + * networking stack will have taken care of making sure that the inner (or + * single) TCP header is OK. What we have to do is make sure that: + * + * 1. Outer IP length is zero + * 2. Outer UDP length (if it exists) is zero + * 3. Inner IP length (if it exists) is zero + */ +static boolean_t +i40e_tx_tso_fix_mp(mblk_t *mp, uint32_t ttype, mac_ether_offload_info_t *infop) +{ + off_t off = infop->meoi_l2hlen; + + if (infop->meoi_l3proto == ETHERTYPE_IP) { + i40e_meoi_zero_uint16(mp, off + offsetof(ipha_t, ipha_length)); + } else if (infop->meoi_l3proto == ETHERTYPE_IPV6) { + i40e_meoi_zero_uint16(mp, off + offsetof(ip6_t, ip6_plen)); + } else { + return (B_FALSE); + } + + if (ttype == TTYPE_NONE) { + return (B_TRUE); + } else if (ttype != TTYPE_VXLAN) { + return (B_FALSE); + } + + off += infop->meoi_l3hlen; + if (infop->meoi_l4proto != IPPROTO_UDP) { + return (B_FALSE); + } + +#if 0 + i40e_meoi_zero_uint16(mp, off + offsetof(struct udphdr, uh_ulen)); +#endif + off += infop->meoi_l4hlen + infop->meoi_tun_protlen + + infop->meoi_tun_l2hlen; + if (infop->meoi_tun_l3proto == ETHERTYPE_IP) { + i40e_meoi_zero_uint16(mp, off + offsetof(ipha_t, ipha_length)); + } else if (infop->meoi_tun_l3proto == ETHERTYPE_IPV6) { + i40e_meoi_zero_uint16(mp, off + offsetof(ip6_t, ip6_plen)); + } else { + return (B_FALSE); + } + return (B_TRUE); +} + +/* * Attempt to put togther the information we'll need to feed into a descriptor * to properly program the hardware for checksum offload as well as the * generally required flags. @@ -1952,7 +2087,7 @@ i40e_tx_context(i40e_t *i40e, i40e_trqpair_t *itrq, mblk_t *mp, tunneled = (chkflags & (HCK_INNER_IPV4_HDRCKSUM_NEEDED | HCK_INNER_PSEUDO_NEEDED)) != 0; if (tunneled && ttype != TTYPE_VXLAN) { - /* XXX kstat */ + txs->itxs_hck_badttype.value.ui64++; return (-1); } @@ -1994,7 +2129,11 @@ i40e_tx_context(i40e_t *i40e, i40e_trqpair_t *itrq, mblk_t *mp, /* * The MAC ether offload logic should have verified that we have * the right information for calculating the checksums here. - * Make sure that this is the case. + * Make sure that this is the case. We'll check that we have + * what we need for the checksum types as appropriate. We always + * requiere having the inner L2/L3 information. We only require + * Inner L4 info if we've been asked to do something in + * particular. */ if ((meo.meoi_flags & MEOI_L2INFO_SET) == 0) { txs->itxs_hck_nol2info.value.ui64++; @@ -2011,17 +2150,18 @@ i40e_tx_context(i40e_t *i40e, i40e_trqpair_t *itrq, mblk_t *mp, return (-1); } - if ((meo.meoi_flags & MEOI_TUNNEL_INFO_SET) == 0) { - /* XXX Missing kstat */ + if ((meo.meoi_flags & MEOI_TUN_L2INFO_SET) == 0) { + txs->itxs_hck_notunl2info.value.ui64++; + return (-1); + } + + if ((meo.meoi_flags & MEOI_TUN_L3INFO_SET) == 0) { + txs->itxs_hck_notunl3info.value.ui64++; return (-1); } if ((chkflags & HCK_PARTIALCKSUM) != 0) { - /* - * There is no HW support for outer checksum other than - * the (outer) HCK_IPV4_HDRCKSUM. - * XXX missing kstat - */ + txs->itxs_hck_outer.value.ui64++; return (-1); } @@ -2050,10 +2190,11 @@ i40e_tx_context(i40e_t *i40e, i40e_trqpair_t *itrq, mblk_t *mp, } } + /* XXX make comment for L4TUNT */ tctx->itc_ctx_tunnel_fld = I40E_TXD_TNL_SET_EIPT(eipt) | I40E_TXD_TNL_SET_EIPLEN(meo.meoi_l3hlen >> 2) | - I40E_TXD_TNL_SET_L4TUNT(I40E_TX_DESC_TNL_L4TUNT_UDP) | + I40E_TXD_TNL_SET_L4TUNT(1) | I40E_TXD_TNL_SET_L4TUNLEN(l4tunlen >> 1) | I40E_TXD_TNL_SET_DECTTL(0); @@ -2088,6 +2229,11 @@ i40e_tx_context(i40e_t *i40e, i40e_trqpair_t *itrq, mblk_t *mp, I40E_TX_DESC_LENGTH_IPLEN_SHIFT; if (chkflags & HCK_INNER_PSEUDO_NEEDED) { + if ((meo.meoi_flags & MEOI_TUN_L4INFO_SET) == 0) { + txs->itxs_hck_notunl4info.value.ui64++; + return (-1); + } + /* L4T */ switch (meo.meoi_tun_l4proto) { case IPPROTO_TCP: @@ -2213,14 +2359,25 @@ i40e_tx_context(i40e_t *i40e, i40e_trqpair_t *itrq, mblk_t *mp, * LSO requires that checksum offloads are enabled. If for * some reason they're not we bail out with an error. */ - if (!((chkflags & HCK_IPV4_HDRCKSUM) && - (chkflags & HCK_PARTIALCKSUM))) { + if (!i40e_tx_tso_have_cksums(chkflags, ttype)) { + txs->itxs_hck_badtso.value.ui64++; return (-1); } + if (!i40e_tx_tso_fix_mp(mp, ttype, &meo)) { + txs->itxs_hck_badtso.value.ui64++; + return (-1); + } + tctx->itc_ctx_cmdflags |= I40E_TX_CTX_DESC_TSO; tctx->itc_ctx_mss = mss; tctx->itc_ctx_tsolen = msgsize(mp) - (meo.meoi_l2hlen + meo.meoi_l3hlen + meo.meoi_l4hlen); + if (tunneled) { + tctx->itc_ctx_tsolen -= meo.meoi_tun_protlen + + meo.meoi_tun_l2hlen + meo.meoi_tun_l3hlen + + meo.meoi_tun_l4hlen; + } + } return (0); diff --git a/usr/src/uts/common/io/overlay/overlay.c b/usr/src/uts/common/io/overlay/overlay.c index 7aba941fd6..73c3c1ebb7 100644 --- a/usr/src/uts/common/io/overlay/overlay.c +++ b/usr/src/uts/common/io/overlay/overlay.c @@ -1066,6 +1066,16 @@ overlay_tx_checksum_shift(mblk_t *mp, uint16_t flags) } } +static inline void +overlay_tx_lso_copy(const mblk_t *src, mblk_t *dst) +{ + uint16_t flags = DB_LSOFLAGS(src) & HW_LSO_FLAGS; + if (flags == 0) + return; + DB_LSOFLAGS(dst) |= flags; + DB_LSOMSS(dst) = DB_LSOMSS(src); +} + mblk_t * overlay_m_tx(void *arg, mblk_t *mp_chain) { @@ -1123,6 +1133,7 @@ overlay_m_tx(void *arg, mblk_t *mp_chain) */ overlay_tx_checksum_shift(ep, DB_CKSUMFLAGS(mp)); if (ep != mp) { + overlay_tx_lso_copy(mp, ep); ep->b_cont = mp; } diff --git a/usr/src/uts/common/io/overlay/plugins/overlay_vxlan.c b/usr/src/uts/common/io/overlay/plugins/overlay_vxlan.c index 60659ade8c..30568873a5 100644 --- a/usr/src/uts/common/io/overlay/plugins/overlay_vxlan.c +++ b/usr/src/uts/common/io/overlay/plugins/overlay_vxlan.c @@ -385,10 +385,6 @@ vxlan_o_mac_capab(void *arg, mac_capab_t capab, void *cap_data, ksocket_t ksock) switch (capab) { case MAC_CAPAB_HCKSUM: - /* - * XXX Almost certainly some things are going to need the right - * psuedo-header on transmit. - */ if ((vxl->vxl_utunnel.uto_cksum_flags & (HCKSUM_VXLAN_FULL | HCKSUM_VXLAN_PSEUDO | HCKSUM_VXLAN_PSEUDO_NO_OL4)) != 0) { uint32_t *hck = cap_data; @@ -404,18 +400,15 @@ vxlan_o_mac_capab(void *arg, mac_capab_t capab, void *cap_data, ksocket_t ksock) hcapab = B_TRUE; } break; -#if 0 case MAC_CAPAB_LSO: if ((vxl->vxl_utunnel.uto_lso_flags & DLD_LSO_VXLAN_TCP_IPV4) != 0) { mac_capab_lso_t *lso = cap_data; lso->lso_flags = LSO_TX_BASIC_TCP_IPV4; - /* XXX Check value */ lso->lso_basic_tcp_ipv4.lso_max = - vxl->vxl_utunnel.uto_lso_max - 100; + vxl->vxl_utunnel.uto_lso_tcp_max - 1024; hcapab = B_TRUE; } break; -#endif default: hcapab = B_FALSE; break; diff --git a/usr/src/uts/common/netinet/udp.h b/usr/src/uts/common/netinet/udp.h index a775a4fc00..a6745b9326 100644 --- a/usr/src/uts/common/netinet/udp.h +++ b/usr/src/uts/common/netinet/udp.h @@ -52,7 +52,7 @@ typedef struct udp_tunnel_opt { uint32_t uto_opts; uint32_t uto_cksum_flags; uint32_t uto_lso_flags; - uint32_t uto_lso_max; + uint32_t uto_lso_tcp_max; } udp_tunnel_opt_t; #endif /* _KERNEL */ diff --git a/usr/src/uts/common/sys/dld.h b/usr/src/uts/common/sys/dld.h index 3e74f0e03b..ba91c123e6 100644 --- a/usr/src/uts/common/sys/dld.h +++ b/usr/src/uts/common/sys/dld.h @@ -440,9 +440,16 @@ typedef struct dld_capab_poll_s { #define DLD_LSO_VXLAN_TCP_IPV4 0x04 /* TCPv4 LSO encapsulated in VXLAN */ #define DLD_LSO_VXLAN_TCP_IPV6 0x08 /* TCPv6 LSO encapsulated in VXLAN */ +/* These should match the counterparts in <sys/mac_provider.h> */ +#define DLD_LSO_VXLAN_OUDP_CSUM_NONE 0 +#define DLD_LSO_VXLAN_OUDP_CSUM_PSEUDO 1 +#define DLD_LSO_VXLAN_OUDP_CSUM_FULL 2 + typedef struct dld_capab_lso_s { uint_t lso_flags; /* capability flags */ uint_t lso_max; /* maximum payload */ + uint_t lso_vxlan_cksum; + uint_t lso_vxlan_tcp_max; } dld_capab_lso_t; int dld_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); diff --git a/usr/src/uts/common/sys/mac_provider.h b/usr/src/uts/common/sys/mac_provider.h index e27ba63f2d..bee1152955 100644 --- a/usr/src/uts/common/sys/mac_provider.h +++ b/usr/src/uts/common/sys/mac_provider.h @@ -120,9 +120,13 @@ typedef struct lso_basic_tcp_ipv4_s { t_uscalar_t lso_max; /* maximum payload */ } lso_basic_tcp_ipv4_t; +#define LSO_VXLAN_OUDP_CSUM_NONE 0 +#define LSO_VXLAN_OUDP_CSUM_PSEUDO 1 +#define LSO_VXLAN_OUDP_CSUM_FULL 2 + typedef struct lso_vxlan_tcp { - t_uscalar_t lso_tcpv4_max; /* maximum payload */ - t_uscalar_t lso_tcpv6_max; /* maximum payload */ + uint_t lso_oudp_cksum; /* Checksum flags */ + uint_t lso_tcp_max; /* maximum payload */ } lso_vxlan_tcp_t; /* |