diff options
Diffstat (limited to 'usr/src/uts/common/io')
-rw-r--r-- | usr/src/uts/common/io/dld/dld_proto.c | 24 | ||||
-rw-r--r-- | usr/src/uts/common/io/i40e/i40e_transceiver.c | 180 | ||||
-rw-r--r-- | usr/src/uts/common/io/mac/mac_provider.c | 156 | ||||
-rw-r--r-- | usr/src/uts/common/io/vioif/vioif.c | 81 | ||||
-rw-r--r-- | usr/src/uts/common/io/vioif/vioif.h | 2 |
5 files changed, 256 insertions, 187 deletions
diff --git a/usr/src/uts/common/io/dld/dld_proto.c b/usr/src/uts/common/io/dld/dld_proto.c index 1371fa47c0..596147f4e9 100644 --- a/usr/src/uts/common/io/dld/dld_proto.c +++ b/usr/src/uts/common/io/dld/dld_proto.c @@ -1537,13 +1537,23 @@ dld_capab_lso(dld_str_t *dsp, void *data, uint_t flags) * accordingly. */ if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_LSO, &mac_lso)) { - lso->lso_max = mac_lso.lso_basic_tcp_ipv4.lso_max; + lso->lso_max_tcpv4 = mac_lso.lso_basic_tcp_ipv4.lso_max; + lso->lso_max_tcpv6 = mac_lso.lso_basic_tcp_ipv6.lso_max; lso->lso_flags = 0; /* translate the flag for mac clients */ if ((mac_lso.lso_flags & LSO_TX_BASIC_TCP_IPV4) != 0) lso->lso_flags |= DLD_LSO_BASIC_TCP_IPV4; - dsp->ds_lso = B_TRUE; - dsp->ds_lso_max = lso->lso_max; + if ((mac_lso.lso_flags & LSO_TX_BASIC_TCP_IPV6) != 0) + lso->lso_flags |= DLD_LSO_BASIC_TCP_IPV6; + dsp->ds_lso = lso->lso_flags != 0; + /* + * DLS uses this to try and make sure that a raw ioctl + * doesn't send too much data, but doesn't currently + * check the actual SAP that is sending this (or that + * it's TCP). So for now, just use the max value here. + */ + dsp->ds_lso_max = MAX(lso->lso_max_tcpv4, + lso->lso_max_tcpv6); } else { dsp->ds_lso = B_FALSE; dsp->ds_lso_max = 0; @@ -1581,10 +1591,18 @@ dld_capab(dld_str_t *dsp, uint_t type, void *data, uint_t flags) switch (type) { case DLD_CAPAB_DIRECT: + if (dsp->ds_sap == ETHERTYPE_IPV6) { + err = ENOTSUP; + break; + } err = dld_capab_direct(dsp, data, flags); break; case DLD_CAPAB_POLL: + if (dsp->ds_sap == ETHERTYPE_IPV6) { + err = ENOTSUP; + break; + } err = dld_capab_poll(dsp, data, flags); break; diff --git a/usr/src/uts/common/io/i40e/i40e_transceiver.c b/usr/src/uts/common/io/i40e/i40e_transceiver.c index caafa3e102..9662cb58f5 100644 --- a/usr/src/uts/common/io/i40e/i40e_transceiver.c +++ b/usr/src/uts/common/io/i40e/i40e_transceiver.c @@ -1663,186 +1663,6 @@ i40e_ring_rx_poll(void *arg, int poll_bytes) } /* - * This is a structure I wish someone would fill out for me for dorking with the - * checksums. When we get some more experience with this, we should go ahead and - * consider adding this to MAC. - */ -typedef enum mac_ether_offload_flags { - MEOI_L2INFO_SET = 0x01, - MEOI_VLAN_TAGGED = 0x02, - MEOI_L3INFO_SET = 0x04, - MEOI_L3CKSUM_SET = 0x08, - MEOI_L4INFO_SET = 0x10, - MEOI_L4CKSUM_SET = 0x20 -} mac_ether_offload_flags_t; - -typedef struct mac_ether_offload_info { - mac_ether_offload_flags_t meoi_flags; - uint8_t meoi_l2hlen; /* How long is the Ethernet header? */ - uint16_t meoi_l3proto; /* What's the Ethertype */ - uint8_t meoi_l3hlen; /* How long is the header? */ - uint8_t meoi_l4proto; /* What is the payload type? */ - uint8_t meoi_l4hlen; /* How long is the L4 header */ - mblk_t *meoi_l3ckmp; /* Which mblk has the l3 checksum */ - off_t meoi_l3ckoff; /* What's the offset to it */ - mblk_t *meoi_l4ckmp; /* Which mblk has the L4 checksum */ - off_t meoi_l4off; /* What is the offset to it? */ -} mac_ether_offload_info_t; - -/* - * This is something that we'd like to make a general MAC function. Before we do - * that, we should add support for TSO. - * - * We should really keep track of our offset and not walk everything every - * time. I can't imagine that this will be kind to us at high packet rates; - * however, for the moment, let's leave that. - * - * This walks a message block chain without pulling up to fill in the context - * information. Note that the data we care about could be hidden across more - * than one mblk_t. - */ -static int -i40e_meoi_get_uint8(mblk_t *mp, off_t off, uint8_t *out) -{ - size_t mpsize; - uint8_t *bp; - - mpsize = msgsize(mp); - /* Check for overflow */ - if (off + sizeof (uint16_t) > mpsize) - return (-1); - - mpsize = MBLKL(mp); - while (off >= mpsize) { - mp = mp->b_cont; - off -= mpsize; - mpsize = MBLKL(mp); - } - - bp = mp->b_rptr + off; - *out = *bp; - return (0); - -} - -static int -i40e_meoi_get_uint16(mblk_t *mp, off_t off, uint16_t *out) -{ - size_t mpsize; - uint8_t *bp; - - mpsize = msgsize(mp); - /* Check for overflow */ - if (off + sizeof (uint16_t) > mpsize) - return (-1); - - mpsize = MBLKL(mp); - while (off >= mpsize) { - mp = mp->b_cont; - off -= mpsize; - mpsize = MBLKL(mp); - } - - /* - * Data is in network order. Note the second byte of data might be in - * the next mp. - */ - bp = mp->b_rptr + off; - *out = *bp << 8; - if (off + 1 == mpsize) { - mp = mp->b_cont; - bp = mp->b_rptr; - } else { - bp++; - } - - *out |= *bp; - return (0); - -} - -static int -mac_ether_offload_info(mblk_t *mp, mac_ether_offload_info_t *meoi) -{ - size_t off; - uint16_t ether; - uint8_t ipproto, iplen, l4len, maclen; - - bzero(meoi, sizeof (mac_ether_offload_info_t)); - - off = offsetof(struct ether_header, ether_type); - if (i40e_meoi_get_uint16(mp, off, ðer) != 0) - return (-1); - - if (ether == ETHERTYPE_VLAN) { - off = offsetof(struct ether_vlan_header, ether_type); - if (i40e_meoi_get_uint16(mp, off, ðer) != 0) - return (-1); - meoi->meoi_flags |= MEOI_VLAN_TAGGED; - maclen = sizeof (struct ether_vlan_header); - } else { - maclen = sizeof (struct ether_header); - } - meoi->meoi_flags |= MEOI_L2INFO_SET; - meoi->meoi_l2hlen = maclen; - meoi->meoi_l3proto = ether; - - switch (ether) { - case ETHERTYPE_IP: - /* - * For IPv4 we need to get the length of the header, as it can - * be variable. - */ - off = offsetof(ipha_t, ipha_version_and_hdr_length) + maclen; - if (i40e_meoi_get_uint8(mp, off, &iplen) != 0) - return (-1); - iplen &= 0x0f; - if (iplen < 5 || iplen > 0x0f) - return (-1); - iplen *= 4; - off = offsetof(ipha_t, ipha_protocol) + maclen; - if (i40e_meoi_get_uint8(mp, off, &ipproto) == -1) - return (-1); - break; - case ETHERTYPE_IPV6: - iplen = 40; - off = offsetof(ip6_t, ip6_nxt) + maclen; - if (i40e_meoi_get_uint8(mp, off, &ipproto) == -1) - return (-1); - break; - default: - return (0); - } - meoi->meoi_l3hlen = iplen; - meoi->meoi_l4proto = ipproto; - meoi->meoi_flags |= MEOI_L3INFO_SET; - - switch (ipproto) { - case IPPROTO_TCP: - off = offsetof(tcph_t, th_offset_and_rsrvd) + maclen + iplen; - if (i40e_meoi_get_uint8(mp, off, &l4len) == -1) - return (-1); - l4len = (l4len & 0xf0) >> 4; - if (l4len < 5 || l4len > 0xf) - return (-1); - l4len *= 4; - break; - case IPPROTO_UDP: - l4len = sizeof (struct udphdr); - break; - case IPPROTO_SCTP: - l4len = sizeof (sctp_hdr_t); - break; - default: - return (0); - } - - meoi->meoi_l4hlen = l4len; - meoi->meoi_flags |= MEOI_L4INFO_SET; - return (0); -} - -/* * Attempt to put togther the information we'll need to feed into a descriptor * to properly program the hardware for checksum offload as well as the * generally required flags. diff --git a/usr/src/uts/common/io/mac/mac_provider.c b/usr/src/uts/common/io/mac/mac_provider.c index 0f917cd8ca..7f193f68eb 100644 --- a/usr/src/uts/common/io/mac/mac_provider.c +++ b/usr/src/uts/common/io/mac/mac_provider.c @@ -58,6 +58,10 @@ #include <sys/pattr.h> #include <sys/strsun.h> #include <sys/vlan.h> +#include <inet/ip.h> +#include <inet/tcp.h> +#include <netinet/udp.h> +#include <netinet/sctp.h> /* * MAC Provider Interface. @@ -1653,3 +1657,155 @@ mac_transceiver_info_set_usable(mac_transceiver_info_t *infop, { infop->mti_usable = usable; } + +/* + * We should really keep track of our offset and not walk everything every + * time. I can't imagine that this will be kind to us at high packet rates; + * however, for the moment, let's leave that. + * + * This walks a message block chain without pulling up to fill in the context + * information. Note that the data we care about could be hidden across more + * than one mblk_t. + */ +static int +mac_meoi_get_uint8(mblk_t *mp, off_t off, uint8_t *out) +{ + size_t mpsize; + uint8_t *bp; + + mpsize = msgsize(mp); + /* Check for overflow */ + if (off + sizeof (uint16_t) > mpsize) + return (-1); + + mpsize = MBLKL(mp); + while (off >= mpsize) { + mp = mp->b_cont; + off -= mpsize; + mpsize = MBLKL(mp); + } + + bp = mp->b_rptr + off; + *out = *bp; + return (0); + +} + +static int +mac_meoi_get_uint16(mblk_t *mp, off_t off, uint16_t *out) +{ + size_t mpsize; + uint8_t *bp; + + mpsize = msgsize(mp); + /* Check for overflow */ + if (off + sizeof (uint16_t) > mpsize) + return (-1); + + mpsize = MBLKL(mp); + while (off >= mpsize) { + mp = mp->b_cont; + off -= mpsize; + mpsize = MBLKL(mp); + } + + /* + * Data is in network order. Note the second byte of data might be in + * the next mp. + */ + bp = mp->b_rptr + off; + *out = *bp << 8; + if (off + 1 == mpsize) { + mp = mp->b_cont; + bp = mp->b_rptr; + } else { + bp++; + } + + *out |= *bp; + return (0); + +} + + +int +mac_ether_offload_info(mblk_t *mp, mac_ether_offload_info_t *meoi) +{ + size_t off; + uint16_t ether; + uint8_t ipproto, iplen, l4len, maclen; + + bzero(meoi, sizeof (mac_ether_offload_info_t)); + + meoi->meoi_len = msgsize(mp); + off = offsetof(struct ether_header, ether_type); + if (mac_meoi_get_uint16(mp, off, ðer) != 0) + return (-1); + + if (ether == ETHERTYPE_VLAN) { + off = offsetof(struct ether_vlan_header, ether_type); + if (mac_meoi_get_uint16(mp, off, ðer) != 0) + return (-1); + meoi->meoi_flags |= MEOI_VLAN_TAGGED; + maclen = sizeof (struct ether_vlan_header); + } else { + maclen = sizeof (struct ether_header); + } + meoi->meoi_flags |= MEOI_L2INFO_SET; + meoi->meoi_l2hlen = maclen; + meoi->meoi_l3proto = ether; + + switch (ether) { + case ETHERTYPE_IP: + /* + * For IPv4 we need to get the length of the header, as it can + * be variable. + */ + off = offsetof(ipha_t, ipha_version_and_hdr_length) + maclen; + if (mac_meoi_get_uint8(mp, off, &iplen) != 0) + return (-1); + iplen &= 0x0f; + if (iplen < 5 || iplen > 0x0f) + return (-1); + iplen *= 4; + off = offsetof(ipha_t, ipha_protocol) + maclen; + if (mac_meoi_get_uint8(mp, off, &ipproto) == -1) + return (-1); + break; + case ETHERTYPE_IPV6: + iplen = 40; + off = offsetof(ip6_t, ip6_nxt) + maclen; + if (mac_meoi_get_uint8(mp, off, &ipproto) == -1) + return (-1); + break; + default: + return (0); + } + meoi->meoi_l3hlen = iplen; + meoi->meoi_l4proto = ipproto; + meoi->meoi_flags |= MEOI_L3INFO_SET; + + switch (ipproto) { + case IPPROTO_TCP: + off = offsetof(tcph_t, th_offset_and_rsrvd) + maclen + iplen; + if (mac_meoi_get_uint8(mp, off, &l4len) == -1) + return (-1); + l4len = (l4len & 0xf0) >> 4; + if (l4len < 5 || l4len > 0xf) + return (-1); + l4len *= 4; + break; + case IPPROTO_UDP: + l4len = sizeof (struct udphdr); + break; + case IPPROTO_SCTP: + l4len = sizeof (sctp_hdr_t); + break; + default: + return (0); + } + + meoi->meoi_l4hlen = l4len; + meoi->meoi_flags |= MEOI_L4INFO_SET; + return (0); +} diff --git a/usr/src/uts/common/io/vioif/vioif.c b/usr/src/uts/common/io/vioif/vioif.c index e1535182b3..cac90d3073 100644 --- a/usr/src/uts/common/io/vioif/vioif.c +++ b/usr/src/uts/common/io/vioif/vioif.c @@ -74,6 +74,7 @@ #include <sys/random.h> #include <sys/containerof.h> #include <sys/stream.h> +#include <inet/tcp.h> #include <sys/mac.h> #include <sys/mac_provider.h> @@ -1018,7 +1019,7 @@ vioif_send(vioif_t *vif, mblk_t *mp) for (nmp = mp; nmp; nmp = nmp->b_cont) msg_size += MBLKL(nmp); - if (vif->vif_tx_tso4) { + if (vif->vif_tx_tso4 || vif->vif_tx_tso6) { mac_lso_get(mp, &lso_mss, &lso_flags); lso_required = (lso_flags & HW_LSO) != 0; } @@ -1084,8 +1085,70 @@ vioif_send(vioif_t *vif, mblk_t *mp) * Setup LSO fields if required. */ if (lso_required) { - vnh->vnh_gso_type = VIRTIO_NET_HDR_GSO_TCPV4; + mac_ether_offload_flags_t needed; + mac_ether_offload_info_t meo; + uint32_t cksum; + size_t len; + mblk_t *pullmp = NULL; + tcpha_t *tcpha; + + if (mac_ether_offload_info(mp, &meo) != 0) { + goto fail; + } + + needed = MEOI_L2INFO_SET | MEOI_L3INFO_SET | MEOI_L4INFO_SET; + if ((meo.meoi_flags & needed) != needed) { + goto fail; + } + + if (meo.meoi_l4proto != IPPROTO_TCP) { + goto fail; + } + + if (meo.meoi_l3proto == ETHERTYPE_IP && vif->vif_tx_tso4) { + vnh->vnh_gso_type = VIRTIO_NET_HDR_GSO_TCPV4; + } else if (meo.meoi_l3proto == ETHERTYPE_IPV6 && + vif->vif_tx_tso6) { + vnh->vnh_gso_type = VIRTIO_NET_HDR_GSO_TCPV6; + } else { + goto fail; + } + + /* + * The TCP stack does not include the length in the TCP + * pseudo-header when it is performing LSO since hardware + * generally asks for it to be removed (as it'll change). + * Unfortunately, for virtio, we actually need it. This means we + * need to go through and calculate the actual length and fix + * things up. Because the virtio spec cares about the ECN flag + * and indicating that, at least this means we'll have that + * available as well. + */ + if (MBLKL(mp) < vnh->vnh_hdr_len) { + pullmp = msgpullup(mp, vnh->vnh_hdr_len); + if (pullmp == NULL) + goto fail; + tcpha = (tcpha_t *)(pullmp->b_rptr + meo.meoi_l2hlen + + meo.meoi_l3hlen); + } else { + tcpha = (tcpha_t *)(mp->b_rptr + meo.meoi_l2hlen + + meo.meoi_l3hlen); + } + + len = meo.meoi_len - meo.meoi_l2hlen - meo.meoi_l3hlen; + cksum = ntohs(tcpha->tha_sum) + len; + cksum = (cksum >> 16) + (cksum & 0xffff); + cksum = (cksum >> 16) + (cksum & 0xffff); + tcpha->tha_sum = htons(cksum); + + if (tcpha->tha_flags & TH_CWR) { + vnh->vnh_gso_type |= VIRTIO_NET_HDR_GSO_ECN; + } vnh->vnh_gso_size = (uint16_t)lso_mss; + vnh->vnh_hdr_len = meo.meoi_l2hlen + meo.meoi_l3hlen + + meo.meoi_l4hlen; + + freemsg(pullmp); } /* @@ -1453,8 +1516,9 @@ vioif_m_getcapab(void *arg, mac_capab_t cap, void *cap_data) } mac_capab_lso_t *lso = cap_data; - lso->lso_flags = LSO_TX_BASIC_TCP_IPV4; + lso->lso_flags = LSO_TX_BASIC_TCP_IPV4 | LSO_TX_BASIC_TCP_IPV6; lso->lso_basic_tcp_ipv4.lso_max = VIOIF_RX_DATA_SIZE; + lso->lso_basic_tcp_ipv6.lso_max = VIOIF_RX_DATA_SIZE; return (B_TRUE); } @@ -1556,6 +1620,7 @@ vioif_check_features(vioif_t *vif) vif->vif_tx_csum = 0; vif->vif_tx_tso4 = 0; + vif->vif_tx_tso6 = 0; if (vioif_has_feature(vif, VIRTIO_NET_F_CSUM)) { /* @@ -1569,6 +1634,7 @@ vioif_check_features(vioif_t *vif) */ boolean_t gso = vioif_has_feature(vif, VIRTIO_NET_F_GSO); boolean_t tso4 = vioif_has_feature(vif, VIRTIO_NET_F_HOST_TSO4); + boolean_t tso6 = vioif_has_feature(vif, VIRTIO_NET_F_HOST_TSO6); boolean_t ecn = vioif_has_feature(vif, VIRTIO_NET_F_HOST_ECN); /* @@ -1578,8 +1644,15 @@ vioif_check_features(vioif_t *vif) * we require the device to support the combination of * segmentation offload and ECN support. */ - if (gso || (tso4 && ecn)) { + if (gso) { vif->vif_tx_tso4 = 1; + vif->vif_tx_tso6 = 1; + } + if (tso4 && ecn) { + vif->vif_tx_tso4 = 1; + } + if (tso6 && ecn) { + vif->vif_tx_tso6 = 1; } } } diff --git a/usr/src/uts/common/io/vioif/vioif.h b/usr/src/uts/common/io/vioif/vioif.h index 19d8965bd4..9f750c9b8a 100644 --- a/usr/src/uts/common/io/vioif/vioif.h +++ b/usr/src/uts/common/io/vioif/vioif.h @@ -164,6 +164,7 @@ extern "C" { #define VIRTIO_NET_WANTED_FEATURES (VIRTIO_NET_F_CSUM | \ VIRTIO_NET_F_GSO | \ VIRTIO_NET_F_HOST_TSO4 | \ + VIRTIO_NET_F_HOST_TSO6 | \ VIRTIO_NET_F_HOST_ECN | \ VIRTIO_NET_F_MAC | \ VIRTIO_NET_F_MTU) @@ -356,6 +357,7 @@ struct vioif { */ unsigned int vif_tx_csum:1; unsigned int vif_tx_tso4:1; + unsigned int vif_tx_tso6:1; /* * For debugging, it is useful to know whether the MAC address we |