summaryrefslogtreecommitdiff
path: root/usr/src/common
diff options
context:
space:
mode:
authorRobert Mustacchi <rm@joyent.com>2021-02-01 15:10:38 +0000
committerAndy Fiddaman <omnios@citrus-it.co.uk>2022-01-19 22:39:50 +0000
commit3e8b6b84e5ea96d276f9d6364e9f53b94d58ea00 (patch)
tree8636fbb8b1f5e9c6d542a344d5ec53c9c89356ef /usr/src/common
parent417127eb7a4962848176eeecdc1f2fb597c451f2 (diff)
downloadillumos-joyent-3e8b6b84e5ea96d276f9d6364e9f53b94d58ea00.tar.gz
14205 Want UDP src port hashing for VXLAN
Reviewed by: Andy Fiddaman <andy@omnios.org> Reviewed by: Toomas Soome <tsoome@me.com> Approved by: Dan McDonald <danmcd@joyent.com>
Diffstat (limited to 'usr/src/common')
-rw-r--r--usr/src/common/inet/inet_hash.c359
1 files changed, 359 insertions, 0 deletions
diff --git a/usr/src/common/inet/inet_hash.c b/usr/src/common/inet/inet_hash.c
new file mode 100644
index 0000000000..3a511fe588
--- /dev/null
+++ b/usr/src/common/inet/inet_hash.c
@@ -0,0 +1,359 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
+ */
+
+/*
+ * Common routines usable by any part of the networking stack for hashing
+ * packets. The hashing logic originally was part of MAC, but it has more
+ * utility being usable by the rest of the broader system.
+ */
+
+#include <sys/types.h>
+#include <sys/mac.h>
+#include <sys/strsubr.h>
+#include <sys/strsun.h>
+#include <sys/vlan.h>
+#include <inet/ip.h>
+#include <inet/ip_impl.h>
+#include <inet/ip6.h>
+#include <sys/dlpi.h>
+#include <sys/sunndi.h>
+#include <inet/ipsec_impl.h>
+#include <inet/sadb.h>
+#include <inet/ipsecesp.h>
+#include <inet/ipsecah.h>
+#include <inet/inet_hash.h>
+
+/*
+ * Determines the IPv6 header length accounting for all the optional IPv6
+ * headers (hop-by-hop, destination, routing and fragment). The header length
+ * and next header value (a transport header) is captured.
+ *
+ * Returns B_FALSE if all the IP headers are not in the same mblk otherwise
+ * returns B_TRUE.
+ */
+static boolean_t
+inet_pkthash_ip_hdr_length_v6(ip6_t *ip6h, uint8_t *endptr,
+ uint16_t *hdr_length, uint8_t *next_hdr, ip6_frag_t **fragp)
+{
+ uint16_t length;
+ uint_t ehdrlen;
+ uint8_t *whereptr;
+ uint8_t *nexthdrp;
+ ip6_dest_t *desthdr;
+ ip6_rthdr_t *rthdr;
+ ip6_frag_t *fraghdr;
+
+ if (((uchar_t *)ip6h + IPV6_HDR_LEN) > endptr)
+ return (B_FALSE);
+ ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
+ length = IPV6_HDR_LEN;
+ whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
+
+ if (fragp != NULL)
+ *fragp = NULL;
+
+ nexthdrp = &ip6h->ip6_nxt;
+ while (whereptr < endptr) {
+ /* Is there enough left for len + nexthdr? */
+ if (whereptr + MIN_EHDR_LEN > endptr)
+ break;
+
+ switch (*nexthdrp) {
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_DSTOPTS:
+ /* Assumes the headers are identical for hbh and dst */
+ desthdr = (ip6_dest_t *)whereptr;
+ ehdrlen = 8 * (desthdr->ip6d_len + 1);
+ if ((uchar_t *)desthdr + ehdrlen > endptr)
+ return (B_FALSE);
+ nexthdrp = &desthdr->ip6d_nxt;
+ break;
+ case IPPROTO_ROUTING:
+ rthdr = (ip6_rthdr_t *)whereptr;
+ ehdrlen = 8 * (rthdr->ip6r_len + 1);
+ if ((uchar_t *)rthdr + ehdrlen > endptr)
+ return (B_FALSE);
+ nexthdrp = &rthdr->ip6r_nxt;
+ break;
+ case IPPROTO_FRAGMENT:
+ fraghdr = (ip6_frag_t *)whereptr;
+ ehdrlen = sizeof (ip6_frag_t);
+ if ((uchar_t *)&fraghdr[1] > endptr)
+ return (B_FALSE);
+ nexthdrp = &fraghdr->ip6f_nxt;
+ if (fragp != NULL)
+ *fragp = fraghdr;
+ break;
+ case IPPROTO_NONE:
+ /* No next header means we're finished */
+ default:
+ *hdr_length = length;
+ *next_hdr = *nexthdrp;
+ return (B_TRUE);
+ }
+ length += ehdrlen;
+ whereptr += ehdrlen;
+ *hdr_length = length;
+ *next_hdr = *nexthdrp;
+ }
+ switch (*nexthdrp) {
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_DSTOPTS:
+ case IPPROTO_ROUTING:
+ case IPPROTO_FRAGMENT:
+ /*
+ * If any known extension headers are still to be processed,
+ * the packet's malformed (or at least all the IP header(s) are
+ * not in the same mblk - and that should never happen.
+ */
+ return (B_FALSE);
+
+ default:
+ /*
+ * If we get here, we know that all of the IP headers were in
+ * the same mblk, even if the ULP header is in the next mblk.
+ */
+ *hdr_length = length;
+ *next_hdr = *nexthdrp;
+ return (B_TRUE);
+ }
+}
+
+#define PKT_HASH_2BYTES(x) ((x)[0] ^ (x)[1])
+#define PKT_HASH_4BYTES(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3])
+#define PKT_HASH_MAC(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3] ^ (x)[4] ^ (x)[5])
+uint64_t
+inet_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy)
+{
+ struct ether_header *ehp;
+ uint64_t hash = 0;
+ uint16_t sap;
+ uint_t skip_len;
+ uint8_t proto;
+ boolean_t ip_fragmented;
+
+ /*
+ * We may want to have one of these per MAC type plugin in the
+ * future. For now supports only ethernet.
+ */
+ if (media != DL_ETHER)
+ return (0L);
+
+ /* for now we support only outbound packets */
+ ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)));
+ ASSERT(MBLKL(mp) >= sizeof (struct ether_header));
+
+ /* compute L2 hash */
+
+ ehp = (struct ether_header *)mp->b_rptr;
+
+ if ((policy & INET_PKT_HASH_L2) != 0) {
+ uchar_t *mac_src = ehp->ether_shost.ether_addr_octet;
+ uchar_t *mac_dst = ehp->ether_dhost.ether_addr_octet;
+ hash = PKT_HASH_MAC(mac_src) ^ PKT_HASH_MAC(mac_dst);
+ policy &= ~INET_PKT_HASH_L2;
+ }
+
+ if (policy == 0)
+ goto done;
+
+ /* skip ethernet header */
+
+ sap = ntohs(ehp->ether_type);
+ if (sap == ETHERTYPE_VLAN) {
+ struct ether_vlan_header *evhp;
+ mblk_t *newmp = NULL;
+
+ skip_len = sizeof (struct ether_vlan_header);
+ if (MBLKL(mp) < skip_len) {
+ /* the vlan tag is the payload, pull up first */
+ newmp = msgpullup(mp, -1);
+ if ((newmp == NULL) || (MBLKL(newmp) < skip_len)) {
+ goto done;
+ }
+ evhp = (struct ether_vlan_header *)newmp->b_rptr;
+ } else {
+ evhp = (struct ether_vlan_header *)mp->b_rptr;
+ }
+
+ sap = ntohs(evhp->ether_type);
+ freemsg(newmp);
+ } else {
+ skip_len = sizeof (struct ether_header);
+ }
+
+ /* if ethernet header is in its own mblk, skip it */
+ if (MBLKL(mp) <= skip_len) {
+ skip_len -= MBLKL(mp);
+ mp = mp->b_cont;
+ if (mp == NULL)
+ goto done;
+ }
+
+ sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap;
+
+ /* compute IP src/dst addresses hash and skip IPv{4,6} header */
+
+ switch (sap) {
+ case ETHERTYPE_IP: {
+ ipha_t *iphp;
+
+ /*
+ * If the header is not aligned or the header doesn't fit
+ * in the mblk, bail now. Note that this may cause packet
+ * reordering.
+ */
+ iphp = (ipha_t *)(mp->b_rptr + skip_len);
+ if (((unsigned char *)iphp + sizeof (ipha_t) > mp->b_wptr) ||
+ !OK_32PTR((char *)iphp))
+ goto done;
+
+ proto = iphp->ipha_protocol;
+ skip_len += IPH_HDR_LENGTH(iphp);
+
+ /* Check if the packet is fragmented. */
+ ip_fragmented = ntohs(iphp->ipha_fragment_offset_and_flags) &
+ IPH_OFFSET;
+
+ /*
+ * For fragmented packets, use addresses in addition to
+ * the frag_id to generate the hash inorder to get
+ * better distribution.
+ */
+ if (ip_fragmented || (policy & INET_PKT_HASH_L3) != 0) {
+ uint8_t *ip_src = (uint8_t *)&(iphp->ipha_src);
+ uint8_t *ip_dst = (uint8_t *)&(iphp->ipha_dst);
+
+ hash ^= (PKT_HASH_4BYTES(ip_src) ^
+ PKT_HASH_4BYTES(ip_dst));
+ policy &= ~INET_PKT_HASH_L3;
+ }
+
+ if (ip_fragmented) {
+ uint8_t *identp = (uint8_t *)&iphp->ipha_ident;
+ hash ^= PKT_HASH_2BYTES(identp);
+ goto done;
+ }
+ break;
+ }
+ case ETHERTYPE_IPV6: {
+ ip6_t *ip6hp;
+ ip6_frag_t *frag = NULL;
+ uint16_t hdr_length;
+
+ /*
+ * If the header is not aligned or the header doesn't fit
+ * in the mblk, bail now. Note that this may cause packets
+ * reordering.
+ */
+
+ ip6hp = (ip6_t *)(mp->b_rptr + skip_len);
+ if (((unsigned char *)ip6hp + IPV6_HDR_LEN > mp->b_wptr) ||
+ !OK_32PTR((char *)ip6hp))
+ goto done;
+
+ if (!inet_pkthash_ip_hdr_length_v6(ip6hp, mp->b_wptr,
+ &hdr_length, &proto, &frag))
+ goto done;
+ skip_len += hdr_length;
+
+ /*
+ * For fragmented packets, use addresses in addition to
+ * the frag_id to generate the hash inorder to get
+ * better distribution.
+ */
+ if (frag != NULL || (policy & INET_PKT_HASH_L3) != 0) {
+ uint8_t *ip_src = &(ip6hp->ip6_src.s6_addr8[12]);
+ uint8_t *ip_dst = &(ip6hp->ip6_dst.s6_addr8[12]);
+
+ hash ^= (PKT_HASH_4BYTES(ip_src) ^
+ PKT_HASH_4BYTES(ip_dst));
+ policy &= ~INET_PKT_HASH_L3;
+ }
+
+ if (frag != NULL) {
+ uint8_t *identp = (uint8_t *)&frag->ip6f_ident;
+ hash ^= PKT_HASH_4BYTES(identp);
+ goto done;
+ }
+ break;
+ }
+ default:
+ goto done;
+ }
+
+ if (policy == 0)
+ goto done;
+
+ /* if ip header is in its own mblk, skip it */
+ if (MBLKL(mp) <= skip_len) {
+ skip_len -= MBLKL(mp);
+ mp = mp->b_cont;
+ if (mp == NULL)
+ goto done;
+ }
+
+ /* parse ULP header */
+again:
+ switch (proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_ESP:
+ case IPPROTO_SCTP:
+ /*
+ * These Internet Protocols are intentionally designed
+ * for hashing from the git-go. Port numbers are in the first
+ * word for transports, SPI is first for ESP.
+ */
+ if (mp->b_rptr + skip_len + 4 > mp->b_wptr)
+ goto done;
+ hash ^= PKT_HASH_4BYTES((mp->b_rptr + skip_len));
+ break;
+
+ case IPPROTO_AH: {
+ ah_t *ah = (ah_t *)(mp->b_rptr + skip_len);
+ uint_t ah_length = AH_TOTAL_LEN(ah);
+
+ if ((unsigned char *)ah + sizeof (ah_t) > mp->b_wptr)
+ goto done;
+
+ proto = ah->ah_nexthdr;
+ skip_len += ah_length;
+
+ /* if AH header is in its own mblk, skip it */
+ if (MBLKL(mp) <= skip_len) {
+ skip_len -= MBLKL(mp);
+ mp = mp->b_cont;
+ if (mp == NULL)
+ goto done;
+ }
+
+ goto again;
+ }
+ }
+
+done:
+ return (hash);
+}