diff options
author | Robert Mustacchi <rm@joyent.com> | 2021-02-01 15:10:38 +0000 |
---|---|---|
committer | Andy Fiddaman <omnios@citrus-it.co.uk> | 2022-01-19 22:39:50 +0000 |
commit | 3e8b6b84e5ea96d276f9d6364e9f53b94d58ea00 (patch) | |
tree | 8636fbb8b1f5e9c6d542a344d5ec53c9c89356ef /usr/src/common | |
parent | 417127eb7a4962848176eeecdc1f2fb597c451f2 (diff) | |
download | illumos-joyent-3e8b6b84e5ea96d276f9d6364e9f53b94d58ea00.tar.gz |
14205 Want UDP src port hashing for VXLAN
Reviewed by: Andy Fiddaman <andy@omnios.org>
Reviewed by: Toomas Soome <tsoome@me.com>
Approved by: Dan McDonald <danmcd@joyent.com>
Diffstat (limited to 'usr/src/common')
-rw-r--r-- | usr/src/common/inet/inet_hash.c | 359 |
1 files changed, 359 insertions, 0 deletions
diff --git a/usr/src/common/inet/inet_hash.c b/usr/src/common/inet/inet_hash.c new file mode 100644 index 0000000000..3a511fe588 --- /dev/null +++ b/usr/src/common/inet/inet_hash.c @@ -0,0 +1,359 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2015, Joyent, Inc. + */ + +/* + * Common routines usable by any part of the networking stack for hashing + * packets. The hashing logic originally was part of MAC, but it has more + * utility being usable by the rest of the broader system. + */ + +#include <sys/types.h> +#include <sys/mac.h> +#include <sys/strsubr.h> +#include <sys/strsun.h> +#include <sys/vlan.h> +#include <inet/ip.h> +#include <inet/ip_impl.h> +#include <inet/ip6.h> +#include <sys/dlpi.h> +#include <sys/sunndi.h> +#include <inet/ipsec_impl.h> +#include <inet/sadb.h> +#include <inet/ipsecesp.h> +#include <inet/ipsecah.h> +#include <inet/inet_hash.h> + +/* + * Determines the IPv6 header length accounting for all the optional IPv6 + * headers (hop-by-hop, destination, routing and fragment). The header length + * and next header value (a transport header) is captured. + * + * Returns B_FALSE if all the IP headers are not in the same mblk otherwise + * returns B_TRUE. + */ +static boolean_t +inet_pkthash_ip_hdr_length_v6(ip6_t *ip6h, uint8_t *endptr, + uint16_t *hdr_length, uint8_t *next_hdr, ip6_frag_t **fragp) +{ + uint16_t length; + uint_t ehdrlen; + uint8_t *whereptr; + uint8_t *nexthdrp; + ip6_dest_t *desthdr; + ip6_rthdr_t *rthdr; + ip6_frag_t *fraghdr; + + if (((uchar_t *)ip6h + IPV6_HDR_LEN) > endptr) + return (B_FALSE); + ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION); + length = IPV6_HDR_LEN; + whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ + + if (fragp != NULL) + *fragp = NULL; + + nexthdrp = &ip6h->ip6_nxt; + while (whereptr < endptr) { + /* Is there enough left for len + nexthdr? */ + if (whereptr + MIN_EHDR_LEN > endptr) + break; + + switch (*nexthdrp) { + case IPPROTO_HOPOPTS: + case IPPROTO_DSTOPTS: + /* Assumes the headers are identical for hbh and dst */ + desthdr = (ip6_dest_t *)whereptr; + ehdrlen = 8 * (desthdr->ip6d_len + 1); + if ((uchar_t *)desthdr + ehdrlen > endptr) + return (B_FALSE); + nexthdrp = &desthdr->ip6d_nxt; + break; + case IPPROTO_ROUTING: + rthdr = (ip6_rthdr_t *)whereptr; + ehdrlen = 8 * (rthdr->ip6r_len + 1); + if ((uchar_t *)rthdr + ehdrlen > endptr) + return (B_FALSE); + nexthdrp = &rthdr->ip6r_nxt; + break; + case IPPROTO_FRAGMENT: + fraghdr = (ip6_frag_t *)whereptr; + ehdrlen = sizeof (ip6_frag_t); + if ((uchar_t *)&fraghdr[1] > endptr) + return (B_FALSE); + nexthdrp = &fraghdr->ip6f_nxt; + if (fragp != NULL) + *fragp = fraghdr; + break; + case IPPROTO_NONE: + /* No next header means we're finished */ + default: + *hdr_length = length; + *next_hdr = *nexthdrp; + return (B_TRUE); + } + length += ehdrlen; + whereptr += ehdrlen; + *hdr_length = length; + *next_hdr = *nexthdrp; + } + switch (*nexthdrp) { + case IPPROTO_HOPOPTS: + case IPPROTO_DSTOPTS: + case IPPROTO_ROUTING: + case IPPROTO_FRAGMENT: + /* + * If any known extension headers are still to be processed, + * the packet's malformed (or at least all the IP header(s) are + * not in the same mblk - and that should never happen. + */ + return (B_FALSE); + + default: + /* + * If we get here, we know that all of the IP headers were in + * the same mblk, even if the ULP header is in the next mblk. + */ + *hdr_length = length; + *next_hdr = *nexthdrp; + return (B_TRUE); + } +} + +#define PKT_HASH_2BYTES(x) ((x)[0] ^ (x)[1]) +#define PKT_HASH_4BYTES(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3]) +#define PKT_HASH_MAC(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3] ^ (x)[4] ^ (x)[5]) +uint64_t +inet_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy) +{ + struct ether_header *ehp; + uint64_t hash = 0; + uint16_t sap; + uint_t skip_len; + uint8_t proto; + boolean_t ip_fragmented; + + /* + * We may want to have one of these per MAC type plugin in the + * future. For now supports only ethernet. + */ + if (media != DL_ETHER) + return (0L); + + /* for now we support only outbound packets */ + ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t))); + ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); + + /* compute L2 hash */ + + ehp = (struct ether_header *)mp->b_rptr; + + if ((policy & INET_PKT_HASH_L2) != 0) { + uchar_t *mac_src = ehp->ether_shost.ether_addr_octet; + uchar_t *mac_dst = ehp->ether_dhost.ether_addr_octet; + hash = PKT_HASH_MAC(mac_src) ^ PKT_HASH_MAC(mac_dst); + policy &= ~INET_PKT_HASH_L2; + } + + if (policy == 0) + goto done; + + /* skip ethernet header */ + + sap = ntohs(ehp->ether_type); + if (sap == ETHERTYPE_VLAN) { + struct ether_vlan_header *evhp; + mblk_t *newmp = NULL; + + skip_len = sizeof (struct ether_vlan_header); + if (MBLKL(mp) < skip_len) { + /* the vlan tag is the payload, pull up first */ + newmp = msgpullup(mp, -1); + if ((newmp == NULL) || (MBLKL(newmp) < skip_len)) { + goto done; + } + evhp = (struct ether_vlan_header *)newmp->b_rptr; + } else { + evhp = (struct ether_vlan_header *)mp->b_rptr; + } + + sap = ntohs(evhp->ether_type); + freemsg(newmp); + } else { + skip_len = sizeof (struct ether_header); + } + + /* if ethernet header is in its own mblk, skip it */ + if (MBLKL(mp) <= skip_len) { + skip_len -= MBLKL(mp); + mp = mp->b_cont; + if (mp == NULL) + goto done; + } + + sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; + + /* compute IP src/dst addresses hash and skip IPv{4,6} header */ + + switch (sap) { + case ETHERTYPE_IP: { + ipha_t *iphp; + + /* + * If the header is not aligned or the header doesn't fit + * in the mblk, bail now. Note that this may cause packet + * reordering. + */ + iphp = (ipha_t *)(mp->b_rptr + skip_len); + if (((unsigned char *)iphp + sizeof (ipha_t) > mp->b_wptr) || + !OK_32PTR((char *)iphp)) + goto done; + + proto = iphp->ipha_protocol; + skip_len += IPH_HDR_LENGTH(iphp); + + /* Check if the packet is fragmented. */ + ip_fragmented = ntohs(iphp->ipha_fragment_offset_and_flags) & + IPH_OFFSET; + + /* + * For fragmented packets, use addresses in addition to + * the frag_id to generate the hash inorder to get + * better distribution. + */ + if (ip_fragmented || (policy & INET_PKT_HASH_L3) != 0) { + uint8_t *ip_src = (uint8_t *)&(iphp->ipha_src); + uint8_t *ip_dst = (uint8_t *)&(iphp->ipha_dst); + + hash ^= (PKT_HASH_4BYTES(ip_src) ^ + PKT_HASH_4BYTES(ip_dst)); + policy &= ~INET_PKT_HASH_L3; + } + + if (ip_fragmented) { + uint8_t *identp = (uint8_t *)&iphp->ipha_ident; + hash ^= PKT_HASH_2BYTES(identp); + goto done; + } + break; + } + case ETHERTYPE_IPV6: { + ip6_t *ip6hp; + ip6_frag_t *frag = NULL; + uint16_t hdr_length; + + /* + * If the header is not aligned or the header doesn't fit + * in the mblk, bail now. Note that this may cause packets + * reordering. + */ + + ip6hp = (ip6_t *)(mp->b_rptr + skip_len); + if (((unsigned char *)ip6hp + IPV6_HDR_LEN > mp->b_wptr) || + !OK_32PTR((char *)ip6hp)) + goto done; + + if (!inet_pkthash_ip_hdr_length_v6(ip6hp, mp->b_wptr, + &hdr_length, &proto, &frag)) + goto done; + skip_len += hdr_length; + + /* + * For fragmented packets, use addresses in addition to + * the frag_id to generate the hash inorder to get + * better distribution. + */ + if (frag != NULL || (policy & INET_PKT_HASH_L3) != 0) { + uint8_t *ip_src = &(ip6hp->ip6_src.s6_addr8[12]); + uint8_t *ip_dst = &(ip6hp->ip6_dst.s6_addr8[12]); + + hash ^= (PKT_HASH_4BYTES(ip_src) ^ + PKT_HASH_4BYTES(ip_dst)); + policy &= ~INET_PKT_HASH_L3; + } + + if (frag != NULL) { + uint8_t *identp = (uint8_t *)&frag->ip6f_ident; + hash ^= PKT_HASH_4BYTES(identp); + goto done; + } + break; + } + default: + goto done; + } + + if (policy == 0) + goto done; + + /* if ip header is in its own mblk, skip it */ + if (MBLKL(mp) <= skip_len) { + skip_len -= MBLKL(mp); + mp = mp->b_cont; + if (mp == NULL) + goto done; + } + + /* parse ULP header */ +again: + switch (proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_ESP: + case IPPROTO_SCTP: + /* + * These Internet Protocols are intentionally designed + * for hashing from the git-go. Port numbers are in the first + * word for transports, SPI is first for ESP. + */ + if (mp->b_rptr + skip_len + 4 > mp->b_wptr) + goto done; + hash ^= PKT_HASH_4BYTES((mp->b_rptr + skip_len)); + break; + + case IPPROTO_AH: { + ah_t *ah = (ah_t *)(mp->b_rptr + skip_len); + uint_t ah_length = AH_TOTAL_LEN(ah); + + if ((unsigned char *)ah + sizeof (ah_t) > mp->b_wptr) + goto done; + + proto = ah->ah_nexthdr; + skip_len += ah_length; + + /* if AH header is in its own mblk, skip it */ + if (MBLKL(mp) <= skip_len) { + skip_len -= MBLKL(mp); + mp = mp->b_cont; + if (mp == NULL) + goto done; + } + + goto again; + } + } + +done: + return (hash); +} |