summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--usr/src/uts/common/inet/ip/ip6_input.c9
-rw-r--r--usr/src/uts/common/inet/ip/ip_input.c9
-rw-r--r--usr/src/uts/common/io/aggr/aggr_grp.c321
-rw-r--r--usr/src/uts/common/io/aggr/aggr_port.c59
-rw-r--r--usr/src/uts/common/io/aggr/aggr_recv.c37
-rw-r--r--usr/src/uts/common/io/dld/dld_proto.c30
-rw-r--r--usr/src/uts/common/io/dls/dls.c14
-rw-r--r--usr/src/uts/common/io/dls/dls_link.c11
-rw-r--r--usr/src/uts/common/io/ixgbe/ixgbe_main.c359
-rw-r--r--usr/src/uts/common/io/ixgbe/ixgbe_sw.h18
-rw-r--r--usr/src/uts/common/io/mac/mac.c791
-rw-r--r--usr/src/uts/common/io/mac/mac_client.c123
-rw-r--r--usr/src/uts/common/io/mac/mac_datapath_setup.c246
-rw-r--r--usr/src/uts/common/io/mac/mac_provider.c4
-rw-r--r--usr/src/uts/common/io/mac/mac_sched.c142
-rw-r--r--usr/src/uts/common/io/mac/mac_soft_ring.c40
-rw-r--r--usr/src/uts/common/io/vnic/vnic_dev.c8
-rw-r--r--usr/src/uts/common/mapfiles/ddi.mapfile1
-rw-r--r--usr/src/uts/common/sys/aggr_impl.h27
-rw-r--r--usr/src/uts/common/sys/mac_client.h3
-rw-r--r--usr/src/uts/common/sys/mac_client_impl.h83
-rw-r--r--usr/src/uts/common/sys/mac_client_priv.h6
-rw-r--r--usr/src/uts/common/sys/mac_impl.h47
-rw-r--r--usr/src/uts/common/sys/mac_provider.h44
24 files changed, 1829 insertions, 603 deletions
diff --git a/usr/src/uts/common/inet/ip/ip6_input.c b/usr/src/uts/common/inet/ip/ip6_input.c
index 21cd3cd2fe..cdff35273e 100644
--- a/usr/src/uts/common/inet/ip/ip6_input.c
+++ b/usr/src/uts/common/inet/ip/ip6_input.c
@@ -23,6 +23,7 @@
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved
*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2018 Joyent, Inc.
*/
/* Copyright (c) 1990 Mentat Inc. */
@@ -143,11 +144,9 @@ static void ip_input_multicast_v6(ire_t *, mblk_t *, ip6_t *,
* The ill will always be valid if this function is called directly from
* the driver.
*
- * If ip_input_v6() is called from GLDv3:
- *
- * - This must be a non-VLAN IP stream.
- * - 'mp' is either an untagged or a special priority-tagged packet.
- * - Any VLAN tag that was in the MAC header has been stripped.
+ * If this chain is part of a VLAN stream, then the VLAN tag is
+ * stripped from the MAC header before being delivered to this
+ * function.
*
* If the IP header in packet is not 32-bit aligned, every message in the
* chain will be aligned before further operations. This is required on SPARC
diff --git a/usr/src/uts/common/inet/ip/ip_input.c b/usr/src/uts/common/inet/ip/ip_input.c
index ad753c165b..aea49c19d3 100644
--- a/usr/src/uts/common/inet/ip/ip_input.c
+++ b/usr/src/uts/common/inet/ip/ip_input.c
@@ -23,6 +23,7 @@
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2018 Joyent, Inc.
*/
/* Copyright (c) 1990 Mentat Inc. */
@@ -146,11 +147,9 @@ static void ip_input_multicast_v4(ire_t *, mblk_t *, ipha_t *,
* The ill will always be valid if this function is called directly from
* the driver.
*
- * If ip_input() is called from GLDv3:
- *
- * - This must be a non-VLAN IP stream.
- * - 'mp' is either an untagged or a special priority-tagged packet.
- * - Any VLAN tag that was in the MAC header has been stripped.
+ * If this chain is part of a VLAN stream, then the VLAN tag is
+ * stripped from the MAC header before being delivered to this
+ * function.
*
* If the IP header in packet is not 32-bit aligned, every message in the
* chain will be aligned before further operations. This is required on SPARC
diff --git a/usr/src/uts/common/io/aggr/aggr_grp.c b/usr/src/uts/common/io/aggr/aggr_grp.c
index 7e930c89e8..9932c2cb58 100644
--- a/usr/src/uts/common/io/aggr/aggr_grp.c
+++ b/usr/src/uts/common/io/aggr/aggr_grp.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2017, Joyent, Inc.
+ * Copyright 2018 Joyent, Inc.
*/
/*
@@ -124,6 +124,8 @@ static int aggr_pseudo_enable_intr(mac_intr_handle_t);
static int aggr_pseudo_start_ring(mac_ring_driver_t, uint64_t);
static int aggr_addmac(void *, const uint8_t *);
static int aggr_remmac(void *, const uint8_t *);
+static int aggr_addvlan(mac_group_driver_t, uint16_t);
+static int aggr_remvlan(mac_group_driver_t, uint16_t);
static mblk_t *aggr_rx_poll(void *, int);
static void aggr_fill_ring(void *, mac_ring_type_t, const int,
const int, mac_ring_info_t *, mac_ring_handle_t);
@@ -324,6 +326,7 @@ aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port)
return (B_FALSE);
}
+ mutex_enter(&grp->lg_stat_lock);
if (grp->lg_ifspeed == 0) {
/*
* The group inherits the speed of the first link being
@@ -337,8 +340,10 @@ aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port)
* the group link speed, as per 802.3ad. Since it is
* not, the attach is cancelled.
*/
+ mutex_exit(&grp->lg_stat_lock);
return (B_FALSE);
}
+ mutex_exit(&grp->lg_stat_lock);
grp->lg_nattached_ports++;
@@ -347,7 +352,9 @@ aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port)
*/
if (grp->lg_link_state != LINK_STATE_UP) {
grp->lg_link_state = LINK_STATE_UP;
+ mutex_enter(&grp->lg_stat_lock);
grp->lg_link_duplex = LINK_DUPLEX_FULL;
+ mutex_exit(&grp->lg_stat_lock);
link_state_changed = B_TRUE;
}
@@ -405,9 +412,11 @@ aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port)
grp->lg_nattached_ports--;
if (grp->lg_nattached_ports == 0) {
/* the last attached MAC port of the group is being detached */
- grp->lg_ifspeed = 0;
grp->lg_link_state = LINK_STATE_DOWN;
+ mutex_enter(&grp->lg_stat_lock);
+ grp->lg_ifspeed = 0;
grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
+ mutex_exit(&grp->lg_stat_lock);
link_state_changed = B_TRUE;
}
@@ -675,9 +684,13 @@ aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
}
/*
- * This function is called to create pseudo rings over the hardware rings of
- * the underlying device. Note that there is a 1:1 mapping between the pseudo
- * RX rings of the aggr and the hardware rings of the underlying port.
+ * Create pseudo rings over the HW rings of the port.
+ *
+ * o Create a pseudo ring in rx_grp per HW ring in the port's HW group.
+ *
+ * o Program existing unicast filters on the pseudo group into the HW group.
+ *
+ * o Program existing VLAN filters on the pseudo group into the HW group.
*/
static int
aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
@@ -686,6 +699,7 @@ aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP];
aggr_unicst_addr_t *addr, *a;
mac_perim_handle_t pmph;
+ aggr_vlan_t *avp;
int hw_rh_cnt, i = 0, j;
int err = 0;
@@ -693,63 +707,90 @@ aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
mac_perim_enter_by_mh(port->lp_mh, &pmph);
/*
- * This function must be called after the aggr registers its mac
- * and its RX group has been initialized.
+ * This function must be called after the aggr registers its MAC
+ * and its Rx group has been initialized.
*/
ASSERT(rx_grp->arg_gh != NULL);
/*
- * Get the list the the underlying HW rings.
+ * Get the list of the underlying HW rings.
*/
hw_rh_cnt = mac_hwrings_get(port->lp_mch,
&port->lp_hwgh, hw_rh, MAC_RING_TYPE_RX);
if (port->lp_hwgh != NULL) {
/*
- * Quiesce the HW ring and the mac srs on the ring. Note
+ * Quiesce the HW ring and the MAC SRS on the ring. Note
* that the HW ring will be restarted when the pseudo ring
* is started. At that time all the packets will be
- * directly passed up to the pseudo RX ring and handled
- * by mac srs created over the pseudo RX ring.
+ * directly passed up to the pseudo Rx ring and handled
+ * by MAC SRS created over the pseudo Rx ring.
*/
mac_rx_client_quiesce(port->lp_mch);
mac_srs_perm_quiesce(port->lp_mch, B_TRUE);
}
/*
- * Add all the unicast addresses to the newly added port.
+ * Add existing VLAN and unicast address filters to the port.
*/
+ for (avp = list_head(&rx_grp->arg_vlans); avp != NULL;
+ avp = list_next(&rx_grp->arg_vlans, avp)) {
+ if ((err = aggr_port_addvlan(port, avp->av_vid)) != 0)
+ goto err;
+ }
+
for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) {
if ((err = aggr_port_addmac(port, addr->aua_addr)) != 0)
- break;
+ goto err;
}
- for (i = 0; err == 0 && i < hw_rh_cnt; i++)
+ for (i = 0; i < hw_rh_cnt; i++) {
err = aggr_add_pseudo_rx_ring(port, rx_grp, hw_rh[i]);
+ if (err != 0)
+ goto err;
+ }
- if (err != 0) {
- for (j = 0; j < i; j++)
- aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]);
+ port->lp_rx_grp_added = B_TRUE;
+ mac_perim_exit(pmph);
+ return (0);
+
+err:
+ ASSERT(err != 0);
+
+ for (j = 0; j < i; j++)
+ aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]);
+
+ for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next)
+ aggr_port_remmac(port, a->aua_addr);
- for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next)
- aggr_port_remmac(port, a->aua_addr);
+ if (avp != NULL)
+ avp = list_prev(&rx_grp->arg_vlans, avp);
- if (port->lp_hwgh != NULL) {
- mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
- mac_rx_client_restart(port->lp_mch);
- port->lp_hwgh = NULL;
+ for (; avp != NULL; avp = list_prev(&rx_grp->arg_vlans, avp)) {
+ int err2;
+
+ if ((err2 = aggr_port_remvlan(port, avp->av_vid)) != 0) {
+ cmn_err(CE_WARN, "Failed to remove VLAN %u from port %s"
+ ": errno %d.", avp->av_vid,
+ mac_client_name(port->lp_mch), err2);
}
- } else {
- port->lp_rx_grp_added = B_TRUE;
}
-done:
+
+ if (port->lp_hwgh != NULL) {
+ mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
+ mac_rx_client_restart(port->lp_mch);
+ port->lp_hwgh = NULL;
+ }
+
mac_perim_exit(pmph);
return (err);
}
/*
- * This function is called by aggr to remove pseudo RX rings over the
- * HW rings of the underlying port.
+ * Destroy the pseudo rings mapping to this port and remove all VLAN
+ * and unicast filters from this port. Even if there are no underlying
+ * HW rings we must still remove the unicast filters to take the port
+ * out of promisc mode.
*/
static void
aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
@@ -771,16 +812,23 @@ aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
hw_rh_cnt = mac_hwrings_get(port->lp_mch,
&hwgh, hw_rh, MAC_RING_TYPE_RX);
- /*
- * If hw_rh_cnt is 0, it means that the underlying port does not
- * support RX rings. Directly return in this case.
- */
for (i = 0; i < hw_rh_cnt; i++)
aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[i]);
for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next)
aggr_port_remmac(port, addr->aua_addr);
+ for (aggr_vlan_t *avp = list_head(&rx_grp->arg_vlans); avp != NULL;
+ avp = list_next(&rx_grp->arg_vlans, avp)) {
+ int err;
+
+ if ((err = aggr_port_remvlan(port, avp->av_vid)) != 0) {
+ cmn_err(CE_WARN, "Failed to remove VLAN %u from port %s"
+ ": errno %d.", avp->av_vid,
+ mac_client_name(port->lp_mch), err);
+ }
+ }
+
if (port->lp_hwgh != NULL) {
port->lp_hwgh = NULL;
@@ -1307,6 +1355,10 @@ aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports,
bzero(&grp->lg_tx_group, sizeof (aggr_pseudo_tx_group_t));
aggr_lacp_init_grp(grp);
+ grp->lg_rx_group.arg_untagged = 0;
+ list_create(&(grp->lg_rx_group.arg_vlans), sizeof (aggr_vlan_t),
+ offsetof(aggr_vlan_t, av_link));
+
/* add MAC ports to group */
grp->lg_ports = NULL;
grp->lg_nports = 0;
@@ -1323,7 +1375,7 @@ aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports,
grp->lg_key = key;
for (i = 0; i < nports; i++) {
- err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, NULL);
+ err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, &port);
if (err != 0)
goto bail;
}
@@ -1545,7 +1597,9 @@ aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port,
continue;
val = aggr_port_stat(port, stat);
val -= port->lp_stat[i];
+ mutex_enter(&grp->lg_stat_lock);
grp->lg_stat[i] += val;
+ mutex_exit(&grp->lg_stat_lock);
}
for (i = 0; i < ETHER_NSTAT; i++) {
stat = i + MACTYPE_STAT_MIN;
@@ -1553,7 +1607,9 @@ aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port,
continue;
val = aggr_port_stat(port, stat);
val -= port->lp_ether_stat[i];
+ mutex_enter(&grp->lg_stat_lock);
grp->lg_ether_stat[i] += val;
+ mutex_exit(&grp->lg_stat_lock);
}
grp->lg_nports--;
@@ -1802,6 +1858,8 @@ aggr_grp_delete(datalink_id_t linkid, cred_t *cred)
VERIFY(mac_unregister(grp->lg_mh) == 0);
grp->lg_mh = NULL;
+ list_destroy(&(grp->lg_rx_group.arg_vlans));
+
AGGR_GRP_REFRELE(grp);
return (0);
}
@@ -1884,6 +1942,8 @@ aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val)
aggr_port_t *port;
uint_t stat_index;
+ ASSERT(MUTEX_HELD(&grp->lg_stat_lock));
+
/* We only aggregate counter statistics. */
if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) ||
IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) {
@@ -1952,10 +2012,9 @@ static int
aggr_m_stat(void *arg, uint_t stat, uint64_t *val)
{
aggr_grp_t *grp = arg;
- mac_perim_handle_t mph;
int rval = 0;
- mac_perim_enter_by_mh(grp->lg_mh, &mph);
+ mutex_enter(&grp->lg_stat_lock);
switch (stat) {
case MAC_STAT_IFSPEED:
@@ -1975,7 +2034,7 @@ aggr_m_stat(void *arg, uint_t stat, uint64_t *val)
rval = aggr_grp_stat(grp, stat, val);
}
- mac_perim_exit(mph);
+ mutex_exit(&grp->lg_stat_lock);
return (rval);
}
@@ -2207,7 +2266,7 @@ aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
}
/*
- * Callback funtion for MAC layer to register groups.
+ * Callback function for MAC layer to register groups.
*/
static void
aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index,
@@ -2229,6 +2288,14 @@ aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index,
infop->mgi_addmac = aggr_addmac;
infop->mgi_remmac = aggr_remmac;
infop->mgi_count = rx_group->arg_ring_cnt;
+
+ /*
+ * Always set the HW VLAN callbacks. They are smart
+ * enough to know when a port has HW VLAN filters to
+ * program and when it doesn't.
+ */
+ infop->mgi_addvlan = aggr_addvlan;
+ infop->mgi_remvlan = aggr_remvlan;
} else {
tx_group = &grp->lg_tx_group;
tx_group->atg_gh = gh;
@@ -2440,6 +2507,186 @@ aggr_remmac(void *arg, const uint8_t *mac_addr)
}
/*
+ * Search for VID in the Rx group's list and return a pointer if
+ * found. Otherwise return NULL.
+ */
+static aggr_vlan_t *
+aggr_find_vlan(aggr_pseudo_rx_group_t *rx_group, uint16_t vid)
+{
+ ASSERT(MAC_PERIM_HELD(rx_group->arg_grp->lg_mh));
+ for (aggr_vlan_t *avp = list_head(&rx_group->arg_vlans); avp != NULL;
+ avp = list_next(&rx_group->arg_vlans, avp)) {
+ if (avp->av_vid == vid)
+ return (avp);
+ }
+
+ return (NULL);
+}
+
+/*
+ * Accept traffic on the specified VID.
+ *
+ * Persist VLAN state in the aggr so that ports added later will
+ * receive the correct filters. In the future it would be nice to
+ * allow aggr to iterate its clients instead of duplicating state.
+ */
+static int
+aggr_addvlan(mac_group_driver_t gdriver, uint16_t vid)
+{
+ aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)gdriver;
+ aggr_grp_t *aggr = rx_group->arg_grp;
+ aggr_port_t *port, *p;
+ mac_perim_handle_t mph;
+ int err = 0;
+ aggr_vlan_t *avp = NULL;
+
+ mac_perim_enter_by_mh(aggr->lg_mh, &mph);
+
+ if (vid == MAC_VLAN_UNTAGGED) {
+ /*
+ * Aggr is both a MAC provider and MAC client. As a
+ * MAC provider it is passed MAC_VLAN_UNTAGGED by its
+ * client. As a client itself, it should pass
+ * VLAN_ID_NONE to its ports.
+ */
+ vid = VLAN_ID_NONE;
+ rx_group->arg_untagged++;
+ goto update_ports;
+ }
+
+ avp = aggr_find_vlan(rx_group, vid);
+
+ if (avp != NULL) {
+ avp->av_refs++;
+ mac_perim_exit(mph);
+ return (0);
+ }
+
+ avp = kmem_zalloc(sizeof (aggr_vlan_t), KM_SLEEP);
+ avp->av_vid = vid;
+ avp->av_refs = 1;
+
+update_ports:
+ for (port = aggr->lg_ports; port != NULL; port = port->lp_next)
+ if ((err = aggr_port_addvlan(port, vid)) != 0)
+ break;
+
+ if (err != 0) {
+ /*
+ * If any of these calls fail then we are in a
+ * situation where the ports have different HW state.
+ * There's no reasonable action the MAC client can
+ * take in this scenario to rectify the situation.
+ */
+ for (p = aggr->lg_ports; p != port; p = p->lp_next) {
+ int err2;
+
+ if ((err2 = aggr_port_remvlan(p, vid)) != 0) {
+ cmn_err(CE_WARN, "Failed to remove VLAN %u"
+ " from port %s: errno %d.", vid,
+ mac_client_name(p->lp_mch), err2);
+ }
+
+ }
+
+ if (vid == VLAN_ID_NONE)
+ rx_group->arg_untagged--;
+
+ if (avp != NULL) {
+ kmem_free(avp, sizeof (aggr_vlan_t));
+ avp = NULL;
+ }
+ }
+
+ if (avp != NULL)
+ list_insert_tail(&rx_group->arg_vlans, avp);
+
+done:
+ mac_perim_exit(mph);
+ return (err);
+}
+
+/*
+ * Stop accepting traffic on this VLAN if it's the last use of this VLAN.
+ */
+static int
+aggr_remvlan(mac_group_driver_t gdriver, uint16_t vid)
+{
+ aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)gdriver;
+ aggr_grp_t *aggr = rx_group->arg_grp;
+ aggr_port_t *port, *p;
+ mac_perim_handle_t mph;
+ int err = 0;
+ aggr_vlan_t *avp = NULL;
+
+ mac_perim_enter_by_mh(aggr->lg_mh, &mph);
+
+ /*
+ * See the comment in aggr_addvlan().
+ */
+ if (vid == MAC_VLAN_UNTAGGED) {
+ vid = VLAN_ID_NONE;
+ rx_group->arg_untagged--;
+
+ if (rx_group->arg_untagged > 0)
+ goto done;
+
+ goto update_ports;
+ }
+
+ avp = aggr_find_vlan(rx_group, vid);
+
+ if (avp == NULL) {
+ err = ENOENT;
+ goto done;
+ }
+
+ avp->av_refs--;
+
+ if (avp->av_refs > 0)
+ goto done;
+
+update_ports:
+ for (port = aggr->lg_ports; port != NULL; port = port->lp_next)
+ if ((err = aggr_port_remvlan(port, vid)) != 0)
+ break;
+
+ /*
+ * See the comment in aggr_addvlan() for justification of the
+ * use of VERIFY here.
+ */
+ if (err != 0) {
+ for (p = aggr->lg_ports; p != port; p = p->lp_next) {
+ int err2;
+
+ if ((err2 = aggr_port_addvlan(p, vid)) != 0) {
+ cmn_err(CE_WARN, "Failed to add VLAN %u"
+ " to port %s: errno %d.", vid,
+ mac_client_name(p->lp_mch), err2);
+ }
+ }
+
+ if (avp != NULL)
+ avp->av_refs++;
+
+ if (vid == VLAN_ID_NONE)
+ rx_group->arg_untagged++;
+
+ goto done;
+ }
+
+ if (err == 0 && avp != NULL) {
+ VERIFY3U(avp->av_refs, ==, 0);
+ list_remove(&rx_group->arg_vlans, avp);
+ kmem_free(avp, sizeof (aggr_vlan_t));
+ }
+
+done:
+ mac_perim_exit(mph);
+ return (err);
+}
+
+/*
* Add or remove the multicast addresses that are defined for the group
* to or from the specified port.
*
diff --git a/usr/src/uts/common/io/aggr/aggr_port.c b/usr/src/uts/common/io/aggr/aggr_port.c
index 00545d2c03..9d2edd4f97 100644
--- a/usr/src/uts/common/io/aggr/aggr_port.c
+++ b/usr/src/uts/common/io/aggr/aggr_port.c
@@ -21,6 +21,8 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 OmniTI Computer Consulting, Inc All rights reserved.
+ * Copyright 2020 Joyent, Inc.
*/
/*
@@ -373,10 +375,14 @@ aggr_port_notify_link(aggr_grp_t *grp, aggr_port_t *port)
/* link speed changes? */
ifspeed = aggr_port_stat(port, MAC_STAT_IFSPEED);
if (port->lp_ifspeed != ifspeed) {
+ mutex_enter(&grp->lg_stat_lock);
+
if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
do_detach |= (ifspeed != grp->lg_ifspeed);
else
do_attach |= (ifspeed == grp->lg_ifspeed);
+
+ mutex_exit(&grp->lg_stat_lock);
}
port->lp_ifspeed = ifspeed;
@@ -528,8 +534,15 @@ aggr_port_promisc(aggr_port_t *port, boolean_t on)
if (on) {
mac_rx_clear(port->lp_mch);
+
+ /*
+ * We use the promisc callback because without hardware
+ * rings, we deliver through flows that will cause duplicate
+ * delivery of packets when we've flipped into this mode
+ * to compensate for the lack of hardware MAC matching
+ */
rc = mac_promisc_add(port->lp_mch, MAC_CLIENT_PROMISC_ALL,
- aggr_recv_cb, port, &port->lp_mphp,
+ aggr_recv_promisc_cb, port, &port->lp_mphp,
MAC_PROMISC_FLAGS_NO_TX_LOOP);
if (rc != 0) {
mac_rx_set(port->lp_mch, aggr_recv_cb, port);
@@ -679,3 +692,47 @@ aggr_port_remmac(aggr_port_t *port, const uint8_t *mac_addr)
}
mac_perim_exit(pmph);
}
+
+int
+aggr_port_addvlan(aggr_port_t *port, uint16_t vid)
+{
+ mac_perim_handle_t pmph;
+ int err;
+
+ ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh));
+ mac_perim_enter_by_mh(port->lp_mh, &pmph);
+
+ /*
+ * Add the VLAN filter to the HW group if the port has a HW
+ * group. If the port doesn't have a HW group, then it will
+ * implicitly allow tagged traffic to pass and there is
+ * nothing to do.
+ */
+ if (port->lp_hwgh == NULL) {
+ mac_perim_exit(pmph);
+ return (0);
+ }
+
+ err = mac_hwgroup_addvlan(port->lp_hwgh, vid);
+ mac_perim_exit(pmph);
+ return (err);
+}
+
+int
+aggr_port_remvlan(aggr_port_t *port, uint16_t vid)
+{
+ mac_perim_handle_t pmph;
+ int err;
+
+ ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh));
+ mac_perim_enter_by_mh(port->lp_mh, &pmph);
+
+ if (port->lp_hwgh == NULL) {
+ mac_perim_exit(pmph);
+ return (0);
+ }
+
+ err = mac_hwgroup_remvlan(port->lp_hwgh, vid);
+ mac_perim_exit(pmph);
+ return (err);
+}
diff --git a/usr/src/uts/common/io/aggr/aggr_recv.c b/usr/src/uts/common/io/aggr/aggr_recv.c
index 2bdb7872e3..33a060da48 100644
--- a/usr/src/uts/common/io/aggr/aggr_recv.c
+++ b/usr/src/uts/common/io/aggr/aggr_recv.c
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 OmniTI Computer Consulting, Inc All rights reserved.
*/
/*
@@ -68,16 +69,28 @@ aggr_recv_lacp(aggr_port_t *port, mac_resource_handle_t mrh, mblk_t *mp)
/*
* Callback function invoked by MAC service module when packets are
- * made available by a MAC port.
+ * made available by a MAC port, both in promisc_on mode and not.
*/
/* ARGSUSED */
-void
-aggr_recv_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
- boolean_t loopback)
+static void
+aggr_recv_path_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
+ boolean_t loopback, boolean_t promisc_path)
{
aggr_port_t *port = (aggr_port_t *)arg;
aggr_grp_t *grp = port->lp_grp;
+ /*
+ * In the case where lp_promisc_on has been turned on to
+ * compensate for insufficient hardware MAC matching and
+ * hardware rings are not in use we will fall back to
+ * using flows for delivery which can result in duplicates
+ * pushed up the stack. Only respect the chosen path.
+ */
+ if (port->lp_promisc_on != promisc_path) {
+ freemsgchain(mp);
+ return;
+ }
+
if (grp->lg_lacp_mode == AGGR_LACP_OFF) {
aggr_mac_rx(grp->lg_mh, mrh, mp);
} else {
@@ -161,3 +174,19 @@ aggr_recv_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
}
}
}
+
+/* ARGSUSED */
+void
+aggr_recv_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
+ boolean_t loopback)
+{
+ aggr_recv_path_cb(arg, mrh, mp, loopback, B_FALSE);
+}
+
+/* ARGSUSED */
+void
+aggr_recv_promisc_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
+ boolean_t loopback)
+{
+ aggr_recv_path_cb(arg, mrh, mp, loopback, B_TRUE);
+}
diff --git a/usr/src/uts/common/io/dld/dld_proto.c b/usr/src/uts/common/io/dld/dld_proto.c
index cadd2a76d3..b7eeb35b92 100644
--- a/usr/src/uts/common/io/dld/dld_proto.c
+++ b/usr/src/uts/common/io/dld/dld_proto.c
@@ -1377,24 +1377,22 @@ dld_capab_direct(dld_str_t *dsp, void *data, uint_t flags)
}
/*
- * dld_capab_poll_enable()
+ * This function is misnamed. All polling and fanouts are run out of
+ * the lower MAC for VNICs and out of the MAC for NICs. The
+ * availability of Rx rings and promiscous mode is taken care of
+ * between the soft ring set (mac_srs), the Rx ring, and the SW
+ * classifier. Fanout, if necessary, is done by the soft rings that
+ * are part of the SRS. By default the SRS divvies up the packets
+ * based on protocol: TCP, UDP, or Other (OTH).
*
- * This function is misnamed. All polling and fanouts are run out of the
- * lower mac (in case of VNIC and the only mac in case of NICs). The
- * availability of Rx ring and promiscous mode is all taken care between
- * the soft ring set (mac_srs), the Rx ring, and S/W classifier. Any
- * fanout necessary is done by the soft rings that are part of the
- * mac_srs (by default mac_srs sends the packets up via a TCP and
- * non TCP soft ring).
- *
- * The mac_srs (or its associated soft rings) always store the ill_rx_ring
+ * The SRS (or its associated soft rings) always store the ill_rx_ring
* (the cookie returned when they registered with IP during plumb) as their
* 2nd argument which is passed up as mac_resource_handle_t. The upcall
* function and 1st argument is what the caller registered when they
* called mac_rx_classify_flow_add() to register the flow. For VNIC,
* the function is vnic_rx and argument is vnic_t. For regular NIC
* case, it mac_rx_default and mac_handle_t. As explained above, the
- * mac_srs (or its soft ring) will add the ill_rx_ring (mac_resource_handle_t)
+ * SRS (or its soft ring) will add the ill_rx_ring (mac_resource_handle_t)
* from its stored 2nd argument.
*/
static int
@@ -1407,11 +1405,11 @@ dld_capab_poll_enable(dld_str_t *dsp, dld_capab_poll_t *poll)
return (ENOTSUP);
/*
- * Enable client polling if and only if DLS bypass is possible.
- * Special cases like VLANs need DLS processing in the Rx data path.
- * In such a case we can neither allow the client (IP) to directly
- * poll the softring (since DLS processing hasn't been done) nor can
- * we allow DLS bypass.
+ * Enable client polling if and only if DLS bypass is
+ * possible. Some traffic requires DLS processing in the Rx
+ * data path. In such a case we can neither allow the client
+ * (IP) to directly poll the soft ring (since DLS processing
+ * hasn't been done) nor can we allow DLS bypass.
*/
if (!mac_rx_bypass_set(dsp->ds_mch, dsp->ds_rx, dsp->ds_rx_arg))
return (ENOTSUP);
diff --git a/usr/src/uts/common/io/dls/dls.c b/usr/src/uts/common/io/dls/dls.c
index d6bc723371..2dc16c4586 100644
--- a/usr/src/uts/common/io/dls/dls.c
+++ b/usr/src/uts/common/io/dls/dls.c
@@ -171,16 +171,16 @@ dls_bind(dld_str_t *dsp, uint32_t sap)
/*
* The MAC layer does the VLAN demultiplexing and will only pass up
* untagged packets to non-promiscuous primary MAC clients. In order to
- * support the binding to the VLAN SAP which is required by DLPI, dls
+ * support binding to the VLAN SAP, which is required by DLPI, DLS
* needs to get a copy of all tagged packets when the client binds to
* the VLAN SAP. We do this by registering a separate promiscuous
- * callback for each dls client binding to that SAP.
+ * callback for each DLS client binding to that SAP.
*
* Note: even though there are two promiscuous handles in dld_str_t,
* ds_mph is for the regular promiscuous mode, ds_vlan_mph is the handle
- * to receive VLAN pkt when promiscuous mode is not on. Only one of
- * them can be non-NULL at the same time, to avoid receiving dup copies
- * of pkts.
+ * to receive VLAN traffic when promiscuous mode is not on. Only one of
+ * them can be non-NULL at the same time, to avoid receiving duplicate
+ * copies of packets.
*/
if (sap == ETHERTYPE_VLAN && dsp->ds_promisc == 0) {
int err;
@@ -652,8 +652,8 @@ dls_mac_active_set(dls_link_t *dlp)
/* request the primary MAC address */
if ((err = mac_unicast_add(dlp->dl_mch, NULL,
MAC_UNICAST_PRIMARY | MAC_UNICAST_TAG_DISABLE |
- MAC_UNICAST_DISABLE_TX_VID_CHECK, &dlp->dl_mah, 0,
- &diag)) != 0) {
+ MAC_UNICAST_DISABLE_TX_VID_CHECK, &dlp->dl_mah,
+ VLAN_ID_NONE, &diag)) != 0) {
return (err);
}
diff --git a/usr/src/uts/common/io/dls/dls_link.c b/usr/src/uts/common/io/dls/dls_link.c
index 23580d0c40..6f9049b724 100644
--- a/usr/src/uts/common/io/dls/dls_link.c
+++ b/usr/src/uts/common/io/dls/dls_link.c
@@ -21,7 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2017 Joyent, Inc.
+ * Copyright 2018 Joyent, Inc.
*/
/*
@@ -382,7 +382,16 @@ i_dls_link_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
vid = VLAN_ID(mhi.mhi_tci);
+ /*
+ * This condition is true only when a sun4v vsw client
+ * is on the scene; as it is the only type of client
+ * that multiplexes VLANs on a single client instance.
+ * All other types of clients have one VLAN per client
+ * instance. In that case, MAC strips the VLAN tag
+ * before delivering it to DLS (see mac_rx_deliver()).
+ */
if (mhi.mhi_istagged) {
+
/*
* If it is tagged traffic, send it upstream to
* all dld_str_t which are attached to the physical
diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_main.c b/usr/src/uts/common/io/ixgbe/ixgbe_main.c
index 2c90127c6c..3463be30b9 100644
--- a/usr/src/uts/common/io/ixgbe/ixgbe_main.c
+++ b/usr/src/uts/common/io/ixgbe/ixgbe_main.c
@@ -25,7 +25,7 @@
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
* Copyright 2012 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2013 OSN Online Service Nuernberg GmbH. All rights reserved.
@@ -57,8 +57,8 @@ static int ixgbe_alloc_rings(ixgbe_t *);
static void ixgbe_free_rings(ixgbe_t *);
static int ixgbe_alloc_rx_data(ixgbe_t *);
static void ixgbe_free_rx_data(ixgbe_t *);
-static void ixgbe_setup_rings(ixgbe_t *);
-static void ixgbe_setup_rx(ixgbe_t *);
+static int ixgbe_setup_rings(ixgbe_t *);
+static int ixgbe_setup_rx(ixgbe_t *);
static void ixgbe_setup_tx(ixgbe_t *);
static void ixgbe_setup_rx_ring(ixgbe_rx_ring_t *);
static void ixgbe_setup_tx_ring(ixgbe_tx_ring_t *);
@@ -67,6 +67,7 @@ static void ixgbe_setup_vmdq(ixgbe_t *);
static void ixgbe_setup_vmdq_rss(ixgbe_t *);
static void ixgbe_setup_rss_table(ixgbe_t *);
static void ixgbe_init_unicst(ixgbe_t *);
+static int ixgbe_init_vlan(ixgbe_t *);
static int ixgbe_unicst_find(ixgbe_t *, const uint8_t *);
static void ixgbe_setup_multicst(ixgbe_t *);
static void ixgbe_get_hw_state(ixgbe_t *);
@@ -113,6 +114,8 @@ static void ixgbe_intr_other_work(ixgbe_t *, uint32_t);
static void ixgbe_get_driver_control(struct ixgbe_hw *);
static int ixgbe_addmac(void *, const uint8_t *);
static int ixgbe_remmac(void *, const uint8_t *);
+static int ixgbe_addvlan(mac_group_driver_t, uint16_t);
+static int ixgbe_remvlan(mac_group_driver_t, uint16_t);
static void ixgbe_release_driver_control(struct ixgbe_hw *);
static int ixgbe_attach(dev_info_t *, ddi_attach_cmd_t);
@@ -1159,6 +1162,8 @@ ixgbe_init_driver_settings(ixgbe_t *ixgbe)
rx_group = &ixgbe->rx_groups[i];
rx_group->index = i;
rx_group->ixgbe = ixgbe;
+ list_create(&rx_group->vlans, sizeof (ixgbe_vlan_t),
+ offsetof(ixgbe_vlan_t, ixvl_link));
}
for (i = 0; i < ixgbe->num_tx_rings; i++) {
@@ -1909,7 +1914,8 @@ ixgbe_start(ixgbe_t *ixgbe, boolean_t alloc_buffer)
/*
* Setup the rx/tx rings
*/
- ixgbe_setup_rings(ixgbe);
+ if (ixgbe_setup_rings(ixgbe) != IXGBE_SUCCESS)
+ goto start_failure;
/*
* ixgbe_start() will be called when resetting, however if reset
@@ -2282,6 +2288,16 @@ ixgbe_free_rings(ixgbe_t *ixgbe)
ixgbe->tx_rings = NULL;
}
+ for (uint_t i = 0; i < ixgbe->num_rx_groups; i++) {
+ ixgbe_vlan_t *vlp;
+ ixgbe_rx_group_t *rx_group = &ixgbe->rx_groups[i];
+
+ while ((vlp = list_remove_head(&rx_group->vlans)) != NULL)
+ kmem_free(vlp, sizeof (ixgbe_vlan_t));
+
+ list_destroy(&rx_group->vlans);
+ }
+
if (ixgbe->rx_groups != NULL) {
kmem_free(ixgbe->rx_groups,
sizeof (ixgbe_rx_group_t) * ixgbe->num_rx_groups);
@@ -2336,7 +2352,7 @@ ixgbe_free_rx_data(ixgbe_t *ixgbe)
/*
* ixgbe_setup_rings - Setup rx/tx rings.
*/
-static void
+static int
ixgbe_setup_rings(ixgbe_t *ixgbe)
{
/*
@@ -2346,9 +2362,12 @@ ixgbe_setup_rings(ixgbe_t *ixgbe)
* 2. Initialize necessary registers for receive/transmit;
* 3. Initialize software pointers/parameters for receive/transmit;
*/
- ixgbe_setup_rx(ixgbe);
+ if (ixgbe_setup_rx(ixgbe) != IXGBE_SUCCESS)
+ return (IXGBE_FAILURE);
ixgbe_setup_tx(ixgbe);
+
+ return (IXGBE_SUCCESS);
}
static void
@@ -2435,7 +2454,7 @@ ixgbe_setup_rx_ring(ixgbe_rx_ring_t *rx_ring)
IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rx_ring->hw_index), reg_val);
}
-static void
+static int
ixgbe_setup_rx(ixgbe_t *ixgbe)
{
ixgbe_rx_ring_t *rx_ring;
@@ -2528,6 +2547,15 @@ ixgbe_setup_rx(ixgbe_t *ixgbe)
}
/*
+ * Initialize VLAN SW and HW state if VLAN filtering is
+ * enabled.
+ */
+ if (ixgbe->vlft_enabled) {
+ if (ixgbe_init_vlan(ixgbe) != IXGBE_SUCCESS)
+ return (IXGBE_FAILURE);
+ }
+
+ /*
* Enable the receive unit. This must be done after filter
* control is set in FCTRL. On 82598, we disable the descriptor monitor.
* 82598 is the only adapter which defines this RXCTRL option.
@@ -2618,6 +2646,8 @@ ixgbe_setup_rx(ixgbe_t *ixgbe)
IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, reg_val);
}
+
+ return (IXGBE_SUCCESS);
}
static void
@@ -2819,7 +2849,7 @@ static void
ixgbe_setup_vmdq(ixgbe_t *ixgbe)
{
struct ixgbe_hw *hw = &ixgbe->hw;
- uint32_t vmdctl, i, vtctl;
+ uint32_t vmdctl, i, vtctl, vlnctl;
/*
* Setup the VMDq Control register, enable VMDq based on
@@ -2855,10 +2885,20 @@ ixgbe_setup_vmdq(ixgbe_t *ixgbe)
/*
* Enable Virtualization and Replication.
*/
- vtctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
+ vtctl = IXGBE_READ_REG(hw, IXGBE_VT_CTL);
+ ixgbe->rx_def_group = vtctl & IXGBE_VT_CTL_POOL_MASK;
+ vtctl |= IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vtctl);
/*
+ * Enable VLAN filtering and switching (VFTA and VLVF).
+ */
+ vlnctl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+ vlnctl |= IXGBE_VLNCTRL_VFE;
+ IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctl);
+ ixgbe->vlft_enabled = B_TRUE;
+
+ /*
* Enable receiving packets to all VFs
*/
IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), IXGBE_VFRE_ENABLE_ALL);
@@ -2878,7 +2918,7 @@ ixgbe_setup_vmdq_rss(ixgbe_t *ixgbe)
{
struct ixgbe_hw *hw = &ixgbe->hw;
uint32_t i, mrqc;
- uint32_t vtctl, vmdctl;
+ uint32_t vtctl, vmdctl, vlnctl;
/*
* Initialize RETA/ERETA table
@@ -2962,10 +3002,21 @@ ixgbe_setup_vmdq_rss(ixgbe_t *ixgbe)
/*
* Enable Virtualization and Replication.
*/
+ vtctl = IXGBE_READ_REG(hw, IXGBE_VT_CTL);
+ ixgbe->rx_def_group = vtctl & IXGBE_VT_CTL_POOL_MASK;
+ vtctl |= IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
vtctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vtctl);
/*
+ * Enable VLAN filtering and switching (VFTA and VLVF).
+ */
+ vlnctl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+ vlnctl |= IXGBE_VLNCTRL_VFE;
+ IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctl);
+ ixgbe->vlft_enabled = B_TRUE;
+
+ /*
* Enable receiving packets to all VFs
*/
IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), IXGBE_VFRE_ENABLE_ALL);
@@ -3136,6 +3187,53 @@ ixgbe_unicst_find(ixgbe_t *ixgbe, const uint8_t *mac_addr)
}
/*
+ * Restore the HW state to match the SW state during restart.
+ */
+static int
+ixgbe_init_vlan(ixgbe_t *ixgbe)
+{
+ /*
+ * The device is starting for the first time; there is nothing
+ * to do.
+ */
+ if (!ixgbe->vlft_init) {
+ ixgbe->vlft_init = B_TRUE;
+ return (IXGBE_SUCCESS);
+ }
+
+ for (uint_t i = 0; i < ixgbe->num_rx_groups; i++) {
+ int ret;
+ boolean_t vlvf_bypass;
+ ixgbe_rx_group_t *rxg = &ixgbe->rx_groups[i];
+ struct ixgbe_hw *hw = &ixgbe->hw;
+
+ if (rxg->aupe) {
+ uint32_t vml2flt;
+
+ vml2flt = IXGBE_READ_REG(hw, IXGBE_VMOLR(rxg->index));
+ vml2flt |= IXGBE_VMOLR_AUPE;
+ IXGBE_WRITE_REG(hw, IXGBE_VMOLR(rxg->index), vml2flt);
+ }
+
+ vlvf_bypass = (rxg->index == ixgbe->rx_def_group);
+ for (ixgbe_vlan_t *vlp = list_head(&rxg->vlans); vlp != NULL;
+ vlp = list_next(&rxg->vlans, vlp)) {
+ ret = ixgbe_set_vfta(hw, vlp->ixvl_vid, rxg->index,
+ B_TRUE, vlvf_bypass);
+
+ if (ret != IXGBE_SUCCESS) {
+ ixgbe_error(ixgbe, "Failed to program VFTA"
+ " for group %u, VID: %u, ret: %d.",
+ rxg->index, vlp->ixvl_vid, ret);
+ return (IXGBE_FAILURE);
+ }
+ }
+ }
+
+ return (IXGBE_SUCCESS);
+}
+
+/*
* ixgbe_multicst_add - Add a multicst address.
*/
int
@@ -6161,6 +6259,7 @@ ixgbe_fill_group(void *arg, mac_ring_type_t rtype, const int index,
mac_group_info_t *infop, mac_group_handle_t gh)
{
ixgbe_t *ixgbe = (ixgbe_t *)arg;
+ struct ixgbe_hw *hw = &ixgbe->hw;
switch (rtype) {
case MAC_RING_TYPE_RX: {
@@ -6174,6 +6273,20 @@ ixgbe_fill_group(void *arg, mac_ring_type_t rtype, const int index,
infop->mgi_stop = NULL;
infop->mgi_addmac = ixgbe_addmac;
infop->mgi_remmac = ixgbe_remmac;
+
+ if ((ixgbe->classify_mode == IXGBE_CLASSIFY_VMDQ ||
+ ixgbe->classify_mode == IXGBE_CLASSIFY_VMDQ_RSS) &&
+ (hw->mac.type == ixgbe_mac_82599EB ||
+ hw->mac.type == ixgbe_mac_X540 ||
+ hw->mac.type == ixgbe_mac_X550 ||
+ hw->mac.type == ixgbe_mac_X550EM_x)) {
+ infop->mgi_addvlan = ixgbe_addvlan;
+ infop->mgi_remvlan = ixgbe_remvlan;
+ } else {
+ infop->mgi_addvlan = NULL;
+ infop->mgi_remvlan = NULL;
+ }
+
infop->mgi_count = (ixgbe->num_rx_rings / ixgbe->num_rx_groups);
break;
@@ -6273,6 +6386,232 @@ ixgbe_rx_ring_intr_disable(mac_intr_handle_t intrh)
return (0);
}
+static ixgbe_vlan_t *
+ixgbe_find_vlan(ixgbe_rx_group_t *rx_group, uint16_t vid)
+{
+ for (ixgbe_vlan_t *vlp = list_head(&rx_group->vlans); vlp != NULL;
+ vlp = list_next(&rx_group->vlans, vlp)) {
+ if (vlp->ixvl_vid == vid)
+ return (vlp);
+ }
+
+ return (NULL);
+}
+
+/*
+ * Attempt to use a VLAN HW filter for this group. If the group is
+ * interested in untagged packets then set AUPE only. If the group is
+ * the default then only set the VFTA. Leave the VLVF slots open for
+ * reserved groups to guarantee their use of HW filtering.
+ */
+static int
+ixgbe_addvlan(mac_group_driver_t gdriver, uint16_t vid)
+{
+ ixgbe_rx_group_t *rx_group = (ixgbe_rx_group_t *)gdriver;
+ ixgbe_t *ixgbe = rx_group->ixgbe;
+ struct ixgbe_hw *hw = &ixgbe->hw;
+ ixgbe_vlan_t *vlp;
+ int ret;
+ boolean_t is_def_grp;
+
+ mutex_enter(&ixgbe->gen_lock);
+
+ if (ixgbe->ixgbe_state & IXGBE_SUSPENDED) {
+ mutex_exit(&ixgbe->gen_lock);
+ return (ECANCELED);
+ }
+
+ /*
+ * Let's be sure VLAN filtering is enabled.
+ */
+ VERIFY3B(ixgbe->vlft_enabled, ==, B_TRUE);
+ is_def_grp = (rx_group->index == ixgbe->rx_def_group);
+
+ /*
+ * VLAN filtering is enabled but we want to receive untagged
+ * traffic on this group -- set the AUPE bit on the group and
+ * leave the VLAN tables alone.
+ */
+ if (vid == MAC_VLAN_UNTAGGED) {
+ /*
+ * We never enable AUPE on the default group; it is
+ * redundant. Untagged traffic which passes L2
+ * filtering is delivered to the default group if no
+ * other group is interested.
+ */
+ if (!is_def_grp) {
+ uint32_t vml2flt;
+
+ vml2flt = IXGBE_READ_REG(hw,
+ IXGBE_VMOLR(rx_group->index));
+ vml2flt |= IXGBE_VMOLR_AUPE;
+ IXGBE_WRITE_REG(hw, IXGBE_VMOLR(rx_group->index),
+ vml2flt);
+ rx_group->aupe = B_TRUE;
+ }
+
+ mutex_exit(&ixgbe->gen_lock);
+ return (0);
+ }
+
+ vlp = ixgbe_find_vlan(rx_group, vid);
+ if (vlp != NULL) {
+ /* Only the default group supports multiple clients. */
+ VERIFY3B(is_def_grp, ==, B_TRUE);
+ vlp->ixvl_refs++;
+ mutex_exit(&ixgbe->gen_lock);
+ return (0);
+ }
+
+ /*
+ * The default group doesn't require a VLVF entry, only a VFTA
+ * entry. All traffic passing L2 filtering (MPSAR + VFTA) is
+ * delivered to the default group if no other group is
+ * interested. The fourth argument, vlvf_bypass, tells the
+ * ixgbe common code to avoid using a VLVF slot if one isn't
+ * already allocated to this VLAN.
+ *
+ * This logic is meant to reserve VLVF slots for use by
+ * reserved groups: guaranteeing their use of HW filtering.
+ */
+ ret = ixgbe_set_vfta(hw, vid, rx_group->index, B_TRUE, is_def_grp);
+
+ if (ret == IXGBE_SUCCESS) {
+ vlp = kmem_zalloc(sizeof (ixgbe_vlan_t), KM_SLEEP);
+ vlp->ixvl_vid = vid;
+ vlp->ixvl_refs = 1;
+ list_insert_tail(&rx_group->vlans, vlp);
+ mutex_exit(&ixgbe->gen_lock);
+ return (0);
+ }
+
+ /*
+ * We should actually never return ENOSPC because we've set
+ * things up so that every reserved group is guaranteed to
+ * have a VLVF slot.
+ */
+ if (ret == IXGBE_ERR_PARAM)
+ ret = EINVAL;
+ else if (ret == IXGBE_ERR_NO_SPACE)
+ ret = ENOSPC;
+ else
+ ret = EIO;
+
+ mutex_exit(&ixgbe->gen_lock);
+ return (ret);
+}
+
+/*
+ * Attempt to remove the VLAN HW filter associated with this group. If
+ * we are removing a HW filter for the default group then we know only
+ * the VFTA was set (VLVF is reserved for non-default/reserved
+ * groups). If the group wishes to stop receiving untagged traffic
+ * then clear the AUPE but leave the VLAN filters alone.
+ */
+static int
+ixgbe_remvlan(mac_group_driver_t gdriver, uint16_t vid)
+{
+ ixgbe_rx_group_t *rx_group = (ixgbe_rx_group_t *)gdriver;
+ ixgbe_t *ixgbe = rx_group->ixgbe;
+ struct ixgbe_hw *hw = &ixgbe->hw;
+ int ret;
+ ixgbe_vlan_t *vlp;
+ boolean_t is_def_grp;
+
+ mutex_enter(&ixgbe->gen_lock);
+
+ if (ixgbe->ixgbe_state & IXGBE_SUSPENDED) {
+ mutex_exit(&ixgbe->gen_lock);
+ return (ECANCELED);
+ }
+
+ is_def_grp = (rx_group->index == ixgbe->rx_def_group);
+
+ /* See the AUPE comment in ixgbe_addvlan(). */
+ if (vid == MAC_VLAN_UNTAGGED) {
+ if (!is_def_grp) {
+ uint32_t vml2flt;
+
+ vml2flt = IXGBE_READ_REG(hw,
+ IXGBE_VMOLR(rx_group->index));
+ vml2flt &= ~IXGBE_VMOLR_AUPE;
+ IXGBE_WRITE_REG(hw,
+ IXGBE_VMOLR(rx_group->index), vml2flt);
+ rx_group->aupe = B_FALSE;
+ }
+ mutex_exit(&ixgbe->gen_lock);
+ return (0);
+ }
+
+ vlp = ixgbe_find_vlan(rx_group, vid);
+ if (vlp == NULL) {
+ mutex_exit(&ixgbe->gen_lock);
+ return (ENOENT);
+ }
+
+ /*
+ * See the comment in ixgbe_addvlan() about is_def_grp and
+ * vlvf_bypass.
+ */
+ if (vlp->ixvl_refs == 1) {
+ ret = ixgbe_set_vfta(hw, vid, rx_group->index, B_FALSE,
+ is_def_grp);
+ } else {
+ /*
+ * Only the default group can have multiple clients.
+ * If there is more than one client, leave the
+ * VFTA[vid] bit alone.
+ */
+ VERIFY3B(is_def_grp, ==, B_TRUE);
+ VERIFY3U(vlp->ixvl_refs, >, 1);
+ vlp->ixvl_refs--;
+ mutex_exit(&ixgbe->gen_lock);
+ return (0);
+ }
+
+ if (ret != IXGBE_SUCCESS) {
+ mutex_exit(&ixgbe->gen_lock);
+ /* IXGBE_ERR_PARAM should be the only possible error here. */
+ if (ret == IXGBE_ERR_PARAM)
+ return (EINVAL);
+ else
+ return (EIO);
+ }
+
+ VERIFY3U(vlp->ixvl_refs, ==, 1);
+ vlp->ixvl_refs = 0;
+ list_remove(&rx_group->vlans, vlp);
+ kmem_free(vlp, sizeof (ixgbe_vlan_t));
+
+ /*
+ * Calling ixgbe_set_vfta() on a non-default group may have
+ * cleared the VFTA[vid] bit even though the default group
+ * still has clients using the vid. This happens because the
+ * ixgbe common code doesn't ref count the use of VLANs. Check
+ * for any use of vid on the default group and make sure the
+ * VFTA[vid] bit is set. This operation is idempotent: setting
+ * VFTA[vid] to true if already true won't hurt anything.
+ */
+ if (!is_def_grp) {
+ ixgbe_rx_group_t *defgrp;
+
+ defgrp = &ixgbe->rx_groups[ixgbe->rx_def_group];
+ vlp = ixgbe_find_vlan(defgrp, vid);
+ if (vlp != NULL) {
+ /* This shouldn't fail, but if it does return EIO. */
+ ret = ixgbe_set_vfta(hw, vid, rx_group->index, B_TRUE,
+ B_TRUE);
+ if (ret != IXGBE_SUCCESS) {
+ mutex_exit(&ixgbe->gen_lock);
+ return (EIO);
+ }
+ }
+ }
+
+ mutex_exit(&ixgbe->gen_lock);
+ return (0);
+}
+
/*
* Add a mac address.
*/
diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_sw.h b/usr/src/uts/common/io/ixgbe/ixgbe_sw.h
index 20a077d332..cfd987787a 100644
--- a/usr/src/uts/common/io/ixgbe/ixgbe_sw.h
+++ b/usr/src/uts/common/io/ixgbe/ixgbe_sw.h
@@ -91,6 +91,8 @@ extern "C" {
#define MAX_NUM_UNICAST_ADDRESSES 0x80
#define MAX_NUM_MULTICAST_ADDRESSES 0x1000
+#define MAX_NUM_VLAN_FILTERS 0x40
+
#define IXGBE_INTR_NONE 0
#define IXGBE_INTR_MSIX 1
#define IXGBE_INTR_MSI 2
@@ -387,6 +389,15 @@ typedef union ixgbe_ether_addr {
} mac;
} ixgbe_ether_addr_t;
+/*
+ * The list of VLANs an Rx group will accept.
+ */
+typedef struct ixgbe_vlan {
+ list_node_t ixvl_link;
+ uint16_t ixvl_vid; /* The VLAN ID */
+ uint_t ixvl_refs; /* Number of users of this VLAN */
+} ixgbe_vlan_t;
+
typedef enum {
USE_NONE,
USE_COPY,
@@ -589,6 +600,7 @@ typedef struct ixgbe_rx_ring {
struct ixgbe *ixgbe; /* Pointer to ixgbe struct */
} ixgbe_rx_ring_t;
+
/*
* Software Receive Ring Group
*/
@@ -596,6 +608,8 @@ typedef struct ixgbe_rx_group {
uint32_t index; /* Group index */
mac_group_handle_t group_handle; /* call back group handle */
struct ixgbe *ixgbe; /* Pointer to ixgbe struct */
+ boolean_t aupe; /* AUPE bit */
+ list_t vlans; /* list of VLANs to allow */
} ixgbe_rx_group_t;
/*
@@ -662,6 +676,7 @@ typedef struct ixgbe {
*/
ixgbe_rx_group_t *rx_groups; /* Array of rx groups */
uint32_t num_rx_groups; /* Number of rx groups in use */
+ uint32_t rx_def_group; /* Default Rx group index */
/*
* Transmit Rings
@@ -715,6 +730,9 @@ typedef struct ixgbe {
uint32_t mcast_count;
struct ether_addr mcast_table[MAX_NUM_MULTICAST_ADDRESSES];
+ boolean_t vlft_enabled; /* VLAN filtering enabled? */
+ boolean_t vlft_init; /* VLAN filtering initialized? */
+
ulong_t sys_page_size;
boolean_t link_check_complete;
diff --git a/usr/src/uts/common/io/mac/mac.c b/usr/src/uts/common/io/mac/mac.c
index 8709d07030..4d450a539b 100644
--- a/usr/src/uts/common/io/mac/mac.c
+++ b/usr/src/uts/common/io/mac/mac.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
* Copyright 2015 Garrett D'Amore <garrett@damore.org>
*/
@@ -460,7 +460,7 @@ mac_init(void)
mac_logging_interval = 20;
mac_flow_log_enable = B_FALSE;
mac_link_log_enable = B_FALSE;
- mac_logging_timer = 0;
+ mac_logging_timer = NULL;
/* Register to be notified of noteworthy pools events */
mac_pool_event_reg.pec_func = mac_pool_event_cb;
@@ -1115,9 +1115,10 @@ mac_start(mac_handle_t mh)
if ((defgrp = MAC_DEFAULT_RX_GROUP(mip)) != NULL) {
/*
- * Start the default ring, since it will be needed
- * to receive broadcast and multicast traffic for
- * both primary and non-primary MAC clients.
+ * Start the default group which is responsible
+ * for receiving broadcast and multicast
+ * traffic for both primary and non-primary
+ * MAC clients.
*/
ASSERT(defgrp->mrg_state == MAC_GROUP_STATE_REGISTERED);
err = mac_start_group_and_rings(defgrp);
@@ -1730,6 +1731,47 @@ mac_hwgroup_remmac(mac_group_handle_t gh, const uint8_t *addr)
}
/*
+ * Program the group's HW VLAN filter if it has such support.
+ * Otherwise, the group will implicitly accept tagged traffic and
+ * there is nothing to do.
+ */
+int
+mac_hwgroup_addvlan(mac_group_handle_t gh, uint16_t vid)
+{
+ mac_group_t *group = (mac_group_t *)gh;
+
+ if (!MAC_GROUP_HW_VLAN(group))
+ return (0);
+
+ return (mac_group_addvlan(group, vid));
+}
+
+int
+mac_hwgroup_remvlan(mac_group_handle_t gh, uint16_t vid)
+{
+ mac_group_t *group = (mac_group_t *)gh;
+
+ if (!MAC_GROUP_HW_VLAN(group))
+ return (0);
+
+ return (mac_group_remvlan(group, vid));
+}
+
+/*
+ * Determine if a MAC has HW VLAN support. This is a private API
+ * consumed by aggr. In the future it might be nice to have a bitfield
+ * in mac_capab_rings_t to track which forms of HW filtering are
+ * supported by the MAC.
+ */
+boolean_t
+mac_has_hw_vlan(mac_handle_t mh)
+{
+ mac_impl_t *mip = (mac_impl_t *)mh;
+
+ return (MAC_GROUP_HW_VLAN(mip->mi_rx_groups));
+}
+
+/*
* Set the RX group to be shared/reserved. Note that the group must be
* started/stopped outside of this function.
*/
@@ -2414,7 +2456,6 @@ mac_disable(mac_handle_t mh)
/*
* Called when the MAC instance has a non empty flow table, to de-multiplex
* incoming packets to the right flow.
- * The MAC's rw lock is assumed held as a READER.
*/
/* ARGSUSED */
static mblk_t *
@@ -2425,14 +2466,14 @@ mac_rx_classify(mac_impl_t *mip, mac_resource_handle_t mrh, mblk_t *mp)
int err;
/*
- * If the mac is a port of an aggregation, pass FLOW_IGNORE_VLAN
+ * If the MAC is a port of an aggregation, pass FLOW_IGNORE_VLAN
* to mac_flow_lookup() so that the VLAN packets can be successfully
* passed to the non-VLAN aggregation flows.
*
* Note that there is possibly a race between this and
* mac_unicast_remove/add() and VLAN packets could be incorrectly
- * classified to non-VLAN flows of non-aggregation mac clients. These
- * VLAN packets will be then filtered out by the mac module.
+ * classified to non-VLAN flows of non-aggregation MAC clients. These
+ * VLAN packets will be then filtered out by the MAC module.
*/
if ((mip->mi_state_flags & MIS_EXCLUSIVE) != 0)
flags |= FLOW_IGNORE_VLAN;
@@ -4075,12 +4116,15 @@ mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype)
/*
- * Driver must register group->mgi_addmac/remmac() for rx groups
- * to support multiple MAC addresses.
+ * The driver must register some form of hardware MAC
+ * filter in order for Rx groups to support multiple
+ * MAC addresses.
*/
if (rtype == MAC_RING_TYPE_RX &&
- ((group_info.mgi_addmac == NULL) ||
- (group_info.mgi_remmac == NULL))) {
+ (group_info.mgi_addmac == NULL ||
+ group_info.mgi_remmac == NULL)) {
+ DTRACE_PROBE1(mac__init__rings__no__mac__filter,
+ char *, mip->mi_name);
err = EINVAL;
goto bail;
}
@@ -4127,8 +4171,9 @@ mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype)
/* Update this group's status */
mac_set_group_state(group, MAC_GROUP_STATE_REGISTERED);
- } else
+ } else {
group->mrg_rings = NULL;
+ }
ASSERT(ring_left == 0);
@@ -4318,6 +4363,38 @@ mac_free_rings(mac_impl_t *mip, mac_ring_type_t rtype)
}
/*
+ * Associate the VLAN filter to the receive group.
+ */
+int
+mac_group_addvlan(mac_group_t *group, uint16_t vlan)
+{
+ VERIFY3S(group->mrg_type, ==, MAC_RING_TYPE_RX);
+ VERIFY3P(group->mrg_info.mgi_addvlan, !=, NULL);
+
+ if (vlan > VLAN_ID_MAX)
+ return (EINVAL);
+
+ vlan = MAC_VLAN_UNTAGGED_VID(vlan);
+ return (group->mrg_info.mgi_addvlan(group->mrg_info.mgi_driver, vlan));
+}
+
+/*
+ * Dissociate the VLAN from the receive group.
+ */
+int
+mac_group_remvlan(mac_group_t *group, uint16_t vlan)
+{
+ VERIFY3S(group->mrg_type, ==, MAC_RING_TYPE_RX);
+ VERIFY3P(group->mrg_info.mgi_remvlan, !=, NULL);
+
+ if (vlan > VLAN_ID_MAX)
+ return (EINVAL);
+
+ vlan = MAC_VLAN_UNTAGGED_VID(vlan);
+ return (group->mrg_info.mgi_remvlan(group->mrg_info.mgi_driver, vlan));
+}
+
+/*
* Associate a MAC address with a receive group.
*
* The return value of this function should always be checked properly, because
@@ -4333,8 +4410,8 @@ mac_free_rings(mac_impl_t *mip, mac_ring_type_t rtype)
int
mac_group_addmac(mac_group_t *group, const uint8_t *addr)
{
- ASSERT(group->mrg_type == MAC_RING_TYPE_RX);
- ASSERT(group->mrg_info.mgi_addmac != NULL);
+ VERIFY3S(group->mrg_type, ==, MAC_RING_TYPE_RX);
+ VERIFY3P(group->mrg_info.mgi_addmac, !=, NULL);
return (group->mrg_info.mgi_addmac(group->mrg_info.mgi_driver, addr));
}
@@ -4345,8 +4422,8 @@ mac_group_addmac(mac_group_t *group, const uint8_t *addr)
int
mac_group_remmac(mac_group_t *group, const uint8_t *addr)
{
- ASSERT(group->mrg_type == MAC_RING_TYPE_RX);
- ASSERT(group->mrg_info.mgi_remmac != NULL);
+ VERIFY3S(group->mrg_type, ==, MAC_RING_TYPE_RX);
+ VERIFY3P(group->mrg_info.mgi_remmac, !=, NULL);
return (group->mrg_info.mgi_remmac(group->mrg_info.mgi_driver, addr));
}
@@ -4521,28 +4598,20 @@ i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index)
switch (ring->mr_type) {
case MAC_RING_TYPE_RX:
/*
- * Setup SRS on top of the new ring if the group is
- * reserved for someones exclusive use.
+ * Setup an SRS on top of the new ring if the group is
+ * reserved for someone's exclusive use.
*/
if (group->mrg_state == MAC_GROUP_STATE_RESERVED) {
- mac_client_impl_t *mcip;
+ mac_client_impl_t *mcip = MAC_GROUP_ONLY_CLIENT(group);
- mcip = MAC_GROUP_ONLY_CLIENT(group);
- /*
- * Even though this group is reserved we migth still
- * have multiple clients, i.e a VLAN shares the
- * group with the primary mac client.
- */
- if (mcip != NULL) {
- flent = mcip->mci_flent;
- ASSERT(flent->fe_rx_srs_cnt > 0);
- mac_rx_srs_group_setup(mcip, flent, SRST_LINK);
- mac_fanout_setup(mcip, flent,
- MCIP_RESOURCE_PROPS(mcip), mac_rx_deliver,
- mcip, NULL, NULL);
- } else {
- ring->mr_classify_type = MAC_SW_CLASSIFIER;
- }
+ VERIFY3P(mcip, !=, NULL);
+ flent = mcip->mci_flent;
+ VERIFY3S(flent->fe_rx_srs_cnt, >, 0);
+ mac_rx_srs_group_setup(mcip, flent, SRST_LINK);
+ mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip),
+ mac_rx_deliver, mcip, NULL, NULL);
+ } else {
+ ring->mr_classify_type = MAC_SW_CLASSIFIER;
}
break;
case MAC_RING_TYPE_TX:
@@ -4568,7 +4637,7 @@ i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index)
mcip = mgcp->mgc_client;
flent = mcip->mci_flent;
- is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR);
+ is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT);
mac_srs = MCIP_TX_SRS(mcip);
tx = &mac_srs->srs_tx;
mac_tx_client_quiesce((mac_client_handle_t)mcip);
@@ -4712,7 +4781,7 @@ i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring,
mcip = MAC_GROUP_ONLY_CLIENT(group);
ASSERT(mcip != NULL);
- ASSERT(mcip->mci_state_flags & MCIS_IS_AGGR);
+ ASSERT(mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT);
mac_srs = MCIP_TX_SRS(mcip);
ASSERT(mac_srs->srs_tx.st_mode == SRS_TX_AGGR ||
mac_srs->srs_tx.st_mode == SRS_TX_BW_AGGR);
@@ -4920,12 +4989,12 @@ mac_free_macaddr(mac_address_t *map)
mac_impl_t *mip = map->ma_mip;
ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
- ASSERT(mip->mi_addresses != NULL);
+ VERIFY3P(mip->mi_addresses, !=, NULL);
- map = mac_find_macaddr(mip, map->ma_addr);
-
- ASSERT(map != NULL);
- ASSERT(map->ma_nusers == 0);
+ VERIFY3P(map, ==, mac_find_macaddr(mip, map->ma_addr));
+ VERIFY3P(map, !=, NULL);
+ VERIFY3S(map->ma_nusers, ==, 0);
+ VERIFY3P(map->ma_vlans, ==, NULL);
if (map == mip->mi_addresses) {
mip->mi_addresses = map->ma_next;
@@ -4941,85 +5010,201 @@ mac_free_macaddr(mac_address_t *map)
kmem_free(map, sizeof (mac_address_t));
}
+static mac_vlan_t *
+mac_find_vlan(mac_address_t *map, uint16_t vid)
+{
+ mac_vlan_t *mvp;
+
+ for (mvp = map->ma_vlans; mvp != NULL; mvp = mvp->mv_next) {
+ if (mvp->mv_vid == vid)
+ return (mvp);
+ }
+
+ return (NULL);
+}
+
+static mac_vlan_t *
+mac_add_vlan(mac_address_t *map, uint16_t vid)
+{
+ mac_vlan_t *mvp;
+
+ /*
+ * We should never add the same {addr, VID} tuple more
+ * than once, but let's be sure.
+ */
+ for (mvp = map->ma_vlans; mvp != NULL; mvp = mvp->mv_next)
+ VERIFY3U(mvp->mv_vid, !=, vid);
+
+ /* Add the VLAN to the head of the VLAN list. */
+ mvp = kmem_zalloc(sizeof (mac_vlan_t), KM_SLEEP);
+ mvp->mv_vid = vid;
+ mvp->mv_next = map->ma_vlans;
+ map->ma_vlans = mvp;
+
+ return (mvp);
+}
+
+static void
+mac_rem_vlan(mac_address_t *map, mac_vlan_t *mvp)
+{
+ mac_vlan_t *pre;
+
+ if (map->ma_vlans == mvp) {
+ map->ma_vlans = mvp->mv_next;
+ } else {
+ pre = map->ma_vlans;
+ while (pre->mv_next != mvp) {
+ pre = pre->mv_next;
+
+ /*
+ * We've reached the end of the list without
+ * finding mvp.
+ */
+ VERIFY3P(pre, !=, NULL);
+ }
+ pre->mv_next = mvp->mv_next;
+ }
+
+ kmem_free(mvp, sizeof (mac_vlan_t));
+}
+
/*
- * Add a MAC address reference for a client. If the desired MAC address
- * exists, add a reference to it. Otherwise, add the new address by adding
- * it to a reserved group or setting promiscuous mode. Won't try different
- * group is the group is non-NULL, so the caller must explictly share
- * default group when needed.
- *
- * Note, the primary MAC address is initialized at registration time, so
- * to add it to default group only need to activate it if its reference
- * count is still zero. Also, some drivers may not have advertised RINGS
- * capability.
+ * Create a new mac_address_t if this is the first use of the address
+ * or add a VID to an existing address. In either case, the
+ * mac_address_t acts as a list of {addr, VID} tuples where each tuple
+ * shares the same addr. If group is non-NULL then attempt to program
+ * the MAC's HW filters for this group. Otherwise, if group is NULL,
+ * then the MAC has no rings and there is nothing to program.
*/
int
-mac_add_macaddr(mac_impl_t *mip, mac_group_t *group, uint8_t *mac_addr,
- boolean_t use_hw)
+mac_add_macaddr_vlan(mac_impl_t *mip, mac_group_t *group, uint8_t *addr,
+ uint16_t vid, boolean_t use_hw)
{
- mac_address_t *map;
- int err = 0;
- boolean_t allocated_map = B_FALSE;
+ mac_address_t *map;
+ mac_vlan_t *mvp;
+ int err = 0;
+ boolean_t allocated_map = B_FALSE;
+ boolean_t hw_mac = B_FALSE;
+ boolean_t hw_vlan = B_FALSE;
ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
- map = mac_find_macaddr(mip, mac_addr);
+ map = mac_find_macaddr(mip, addr);
/*
- * If the new MAC address has not been added. Allocate a new one
- * and set it up.
+ * If this is the first use of this MAC address then allocate
+ * and initialize a new structure.
*/
if (map == NULL) {
map = kmem_zalloc(sizeof (mac_address_t), KM_SLEEP);
map->ma_len = mip->mi_type->mt_addr_length;
- bcopy(mac_addr, map->ma_addr, map->ma_len);
+ bcopy(addr, map->ma_addr, map->ma_len);
map->ma_nusers = 0;
map->ma_group = group;
map->ma_mip = mip;
+ map->ma_untagged = B_FALSE;
- /* add the new MAC address to the head of the address list */
+ /* Add the new MAC address to the head of the address list. */
map->ma_next = mip->mi_addresses;
mip->mi_addresses = map;
allocated_map = B_TRUE;
}
- ASSERT(map->ma_group == NULL || map->ma_group == group);
+ VERIFY(map->ma_group == NULL || map->ma_group == group);
if (map->ma_group == NULL)
map->ma_group = group;
+ if (vid == VLAN_ID_NONE) {
+ map->ma_untagged = B_TRUE;
+ mvp = NULL;
+ } else {
+ mvp = mac_add_vlan(map, vid);
+ }
+
+ /*
+ * Set the VLAN HW filter if:
+ *
+ * o the MAC's VLAN HW filtering is enabled, and
+ * o the address does not currently rely on promisc mode.
+ *
+ * This is called even when the client specifies an untagged
+ * address (VLAN_ID_NONE) because some MAC providers require
+ * setting additional bits to accept untagged traffic when
+ * VLAN HW filtering is enabled.
+ */
+ if (MAC_GROUP_HW_VLAN(group) &&
+ map->ma_type != MAC_ADDRESS_TYPE_UNICAST_PROMISC) {
+ if ((err = mac_group_addvlan(group, vid)) != 0)
+ goto bail;
+
+ hw_vlan = B_TRUE;
+ }
+
+ VERIFY3S(map->ma_nusers, >=, 0);
+ map->ma_nusers++;
+
/*
- * If the MAC address is already in use, simply account for the
- * new client.
+ * If this MAC address already has a HW filter then simply
+ * increment the counter.
*/
- if (map->ma_nusers++ > 0)
+ if (map->ma_nusers > 1)
return (0);
/*
+ * All logic from here on out is executed during initial
+ * creation only.
+ */
+ VERIFY3S(map->ma_nusers, ==, 1);
+
+ /*
* Activate this MAC address by adding it to the reserved group.
*/
if (group != NULL) {
- err = mac_group_addmac(group, (const uint8_t *)mac_addr);
- if (err == 0) {
- map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED;
- return (0);
+ err = mac_group_addmac(group, (const uint8_t *)addr);
+
+ /*
+ * If the driver is out of filters then we can
+ * continue and use promisc mode. For any other error,
+ * assume the driver is in a state where we can't
+ * program the filters or use promisc mode; so we must
+ * bail.
+ */
+ if (err != 0 && err != ENOSPC) {
+ map->ma_nusers--;
+ goto bail;
}
+
+ hw_mac = (err == 0);
+ }
+
+ if (hw_mac) {
+ map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED;
+ return (0);
}
/*
* The MAC address addition failed. If the client requires a
- * hardware classified MAC address, fail the operation.
+ * hardware classified MAC address, fail the operation. This
+ * feature is only used by sun4v vsw.
*/
- if (use_hw) {
+ if (use_hw && !hw_mac) {
err = ENOSPC;
+ map->ma_nusers--;
goto bail;
}
/*
- * Try promiscuous mode.
- *
- * For drivers that don't advertise RINGS capability, do
- * nothing for the primary address.
+ * If we reach this point then either the MAC doesn't have
+ * RINGS capability or we are out of MAC address HW filters.
+ * In any case we must put the MAC into promiscuous mode.
+ */
+ VERIFY(group == NULL || !hw_mac);
+
+ /*
+ * The one exception is the primary address. A non-RINGS
+ * driver filters the primary address by default; promisc mode
+ * is not needed.
*/
if ((group == NULL) &&
(bcmp(map->ma_addr, mip->mi_addr, map->ma_len) == 0)) {
@@ -5028,8 +5213,11 @@ mac_add_macaddr(mac_impl_t *mip, mac_group_t *group, uint8_t *mac_addr,
}
/*
- * Enable promiscuous mode in order to receive traffic
- * to the new MAC address.
+ * Enable promiscuous mode in order to receive traffic to the
+ * new MAC address. All existing HW filters still send their
+ * traffic to their respective group/SRSes. But with promisc
+ * enabled all unknown traffic is delivered to the default
+ * group where it is SW classified via mac_rx_classify().
*/
if ((err = i_mac_promisc_set(mip, B_TRUE)) == 0) {
map->ma_type = MAC_ADDRESS_TYPE_UNICAST_PROMISC;
@@ -5037,44 +5225,71 @@ mac_add_macaddr(mac_impl_t *mip, mac_group_t *group, uint8_t *mac_addr,
}
/*
- * Free the MAC address that could not be added. Don't free
- * a pre-existing address, it could have been the entry
- * for the primary MAC address which was pre-allocated by
- * mac_init_macaddr(), and which must remain on the list.
+ * We failed to set promisc mode and we are about to free 'map'.
*/
+ map->ma_nusers = 0;
+
bail:
- map->ma_nusers--;
+ if (hw_vlan) {
+ int err2 = mac_group_remvlan(group, vid);
+
+ if (err2 != 0) {
+ cmn_err(CE_WARN, "Failed to remove VLAN %u from group"
+ " %d on MAC %s: %d.", vid, group->mrg_index,
+ mip->mi_name, err2);
+ }
+ }
+
+ if (mvp != NULL)
+ mac_rem_vlan(map, mvp);
+
if (allocated_map)
mac_free_macaddr(map);
+
return (err);
}
-/*
- * Remove a reference to a MAC address. This may cause to remove the MAC
- * address from an associated group or to turn off promiscuous mode.
- * The caller needs to handle the failure properly.
- */
int
-mac_remove_macaddr(mac_address_t *map)
+mac_remove_macaddr_vlan(mac_address_t *map, uint16_t vid)
{
- mac_impl_t *mip = map->ma_mip;
- int err = 0;
+ mac_vlan_t *mvp;
+ mac_impl_t *mip = map->ma_mip;
+ mac_group_t *group = map->ma_group;
+ int err = 0;
ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
+ VERIFY3P(map, ==, mac_find_macaddr(mip, map->ma_addr));
+
+ if (vid == VLAN_ID_NONE) {
+ map->ma_untagged = B_FALSE;
+ mvp = NULL;
+ } else {
+ mvp = mac_find_vlan(map, vid);
+ VERIFY3P(mvp, !=, NULL);
+ }
- ASSERT(map == mac_find_macaddr(mip, map->ma_addr));
+ if (MAC_GROUP_HW_VLAN(group) &&
+ map->ma_type == MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED &&
+ ((err = mac_group_remvlan(group, vid)) != 0))
+ return (err);
+
+ if (mvp != NULL)
+ mac_rem_vlan(map, mvp);
/*
* If it's not the last client using this MAC address, only update
* the MAC clients count.
*/
- if (--map->ma_nusers > 0)
+ map->ma_nusers--;
+ if (map->ma_nusers > 0)
return (0);
+ VERIFY3S(map->ma_nusers, ==, 0);
+
/*
- * The MAC address is no longer used by any MAC client, so remove
- * it from its associated group, or turn off promiscuous mode
- * if it was enabled for the MAC address.
+ * The MAC address is no longer used by any MAC client, so
+ * remove it from its associated group. Turn off promiscuous
+ * mode if this is the last address relying on it.
*/
switch (map->ma_type) {
case MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED:
@@ -5082,22 +5297,60 @@ mac_remove_macaddr(mac_address_t *map)
* Don't free the preset primary address for drivers that
* don't advertise RINGS capability.
*/
- if (map->ma_group == NULL)
+ if (group == NULL)
return (0);
- err = mac_group_remmac(map->ma_group, map->ma_addr);
- if (err == 0)
- map->ma_group = NULL;
+ if ((err = mac_group_remmac(group, map->ma_addr)) != 0) {
+ if (vid == VLAN_ID_NONE)
+ map->ma_untagged = B_TRUE;
+ else
+ (void) mac_add_vlan(map, vid);
+
+ /*
+ * If we fail to remove the MAC address HW
+ * filter but then also fail to re-add the
+ * VLAN HW filter then we are in a busted
+ * state. We do our best by logging a warning
+ * and returning the original 'err' that got
+ * us here. At this point, traffic for this
+ * address + VLAN combination will be dropped
+ * until the user reboots the system. In the
+ * future, it would be nice to have a system
+ * that can compare the state of expected
+ * classification according to mac to the
+ * actual state of the provider, and report
+ * and fix any inconsistencies.
+ */
+ if (MAC_GROUP_HW_VLAN(group)) {
+ int err2;
+
+ err2 = mac_group_addvlan(group, vid);
+ if (err2 != 0) {
+ cmn_err(CE_WARN, "Failed to readd VLAN"
+ " %u to group %d on MAC %s: %d.",
+ vid, group->mrg_index, mip->mi_name,
+ err2);
+ }
+ }
+
+ map->ma_nusers = 1;
+ return (err);
+ }
+
+ map->ma_group = NULL;
break;
case MAC_ADDRESS_TYPE_UNICAST_PROMISC:
err = i_mac_promisc_set(mip, B_FALSE);
break;
default:
- ASSERT(B_FALSE);
+ panic("Unexpected ma_type 0x%x, file: %s, line %d",
+ map->ma_type, __FILE__, __LINE__);
}
- if (err != 0)
+ if (err != 0) {
+ map->ma_nusers = 1;
return (err);
+ }
/*
* We created MAC address for the primary one at registration, so we
@@ -5250,8 +5503,9 @@ mac_fini_macaddr(mac_impl_t *mip)
* If mi_addresses is initialized, there should be exactly one
* entry left on the list with no users.
*/
- ASSERT(map->ma_nusers == 0);
- ASSERT(map->ma_next == NULL);
+ VERIFY3S(map->ma_nusers, ==, 0);
+ VERIFY3P(map->ma_next, ==, NULL);
+ VERIFY3P(map->ma_vlans, ==, NULL);
kmem_free(map, sizeof (mac_address_t));
mip->mi_addresses = NULL;
@@ -5813,7 +6067,7 @@ mac_stop_logusage(mac_logtype_t type)
mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_walker, &estate);
(void) untimeout(mac_logging_timer);
- mac_logging_timer = 0;
+ mac_logging_timer = NULL;
/* Write log entries for each mac_impl in the list */
i_mac_log_info(&net_log_list, &lstate);
@@ -5931,7 +6185,7 @@ mac_reserve_tx_ring(mac_impl_t *mip, mac_ring_t *desired_ring)
}
/*
- * For a reserved group with multiple clients, return the primary client.
+ * For a non-default group with multiple clients, return the primary client.
*/
static mac_client_impl_t *
mac_get_grp_primary(mac_group_t *grp)
@@ -6290,13 +6544,12 @@ mac_group_add_client(mac_group_t *grp, mac_client_impl_t *mcip)
break;
}
- VERIFY(mgcp == NULL);
+ ASSERT(mgcp == NULL);
mgcp = kmem_zalloc(sizeof (mac_grp_client_t), KM_SLEEP);
mgcp->mgc_client = mcip;
mgcp->mgc_next = grp->mrg_clients;
grp->mrg_clients = mgcp;
-
}
void
@@ -6317,8 +6570,27 @@ mac_group_remove_client(mac_group_t *grp, mac_client_impl_t *mcip)
}
/*
- * mac_reserve_rx_group()
- *
+ * Return true if any client on this group explicitly asked for HW
+ * rings (of type mask) or have a bound share.
+ */
+static boolean_t
+i_mac_clients_hw(mac_group_t *grp, uint32_t mask)
+{
+ mac_grp_client_t *mgcip;
+ mac_client_impl_t *mcip;
+ mac_resource_props_t *mrp;
+
+ for (mgcip = grp->mrg_clients; mgcip != NULL; mgcip = mgcip->mgc_next) {
+ mcip = mgcip->mgc_client;
+ mrp = MCIP_RESOURCE_PROPS(mcip);
+ if (mcip->mci_share != 0 || (mrp->mrp_mask & mask) != 0)
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+/*
* Finds an available group and exclusively reserves it for a client.
* The group is chosen to suit the flow's resource controls (bandwidth and
* fanout requirements) and the address type.
@@ -6341,7 +6613,6 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move)
int need_rings = 0;
mac_group_t *candidate_grp = NULL;
mac_client_impl_t *gclient;
- mac_resource_props_t *gmrp;
mac_group_t *donorgrp = NULL;
boolean_t rxhw = mrp->mrp_mask & MRP_RX_RINGS;
boolean_t unspec = mrp->mrp_mask & MRP_RXRINGS_UNSPEC;
@@ -6352,18 +6623,20 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move)
isprimary = mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC;
/*
- * Check if a group already has this mac address (case of VLANs)
+ * Check if a group already has this MAC address (case of VLANs)
* unless we are moving this MAC client from one group to another.
*/
if (!move && (map = mac_find_macaddr(mip, mac_addr)) != NULL) {
if (map->ma_group != NULL)
return (map->ma_group);
}
+
if (mip->mi_rx_groups == NULL || mip->mi_rx_group_count == 0)
return (NULL);
+
/*
- * If exclusive open, return NULL which will enable the
- * caller to use the default group.
+ * If this client is requesting exclusive MAC access then
+ * return NULL to ensure the client uses the default group.
*/
if (mcip->mci_state_flags & MCIS_EXCLUSIVE)
return (NULL);
@@ -6373,6 +6646,7 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move)
mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) {
mrp->mrp_nrxrings = 1;
}
+
/*
* For static grouping we allow only specifying rings=0 and
* unspecified
@@ -6381,6 +6655,7 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move)
mip->mi_rx_group_type == MAC_GROUP_TYPE_STATIC) {
return (NULL);
}
+
if (rxhw) {
/*
* We have explicitly asked for a group (with nrxrings,
@@ -6442,25 +6717,19 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move)
* that didn't ask for an exclusive group, but got
* one and it has enough rings (combined with what
* the donor group can donate) for the new MAC
- * client
+ * client.
*/
if (grp->mrg_state >= MAC_GROUP_STATE_RESERVED) {
/*
- * If the primary/donor group is not the default
- * group, don't bother looking for a candidate group.
- * If we don't have enough rings we will check
- * if the primary group can be vacated.
+ * If the donor group is not the default
+ * group, don't bother looking for a candidate
+ * group. If we don't have enough rings we
+ * will check if the primary group can be
+ * vacated.
*/
if (candidate_grp == NULL &&
donorgrp == MAC_DEFAULT_RX_GROUP(mip)) {
- ASSERT(!MAC_GROUP_NO_CLIENT(grp));
- gclient = MAC_GROUP_ONLY_CLIENT(grp);
- if (gclient == NULL)
- gclient = mac_get_grp_primary(grp);
- ASSERT(gclient != NULL);
- gmrp = MCIP_RESOURCE_PROPS(gclient);
- if (gclient->mci_share == 0 &&
- (gmrp->mrp_mask & MRP_RX_RINGS) == 0 &&
+ if (!i_mac_clients_hw(grp, MRP_RX_RINGS) &&
(unspec ||
(grp->mrg_cur_count + donor_grp_rcnt >=
need_rings))) {
@@ -6526,6 +6795,7 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move)
*/
mac_stop_group(grp);
}
+
/* We didn't find an exclusive group for this MAC client */
if (i >= mip->mi_rx_group_count) {
@@ -6533,12 +6803,12 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move)
return (NULL);
/*
- * If we found a candidate group then we switch the
- * MAC client from the candidate_group to the default
- * group and give the group to this MAC client. If
- * we didn't find a candidate_group, check if the
- * primary is in its own group and if it can make way
- * for this MAC client.
+ * If we found a candidate group then move the
+ * existing MAC client from the candidate_group to the
+ * default group and give the candidate_group to the
+ * new MAC client. If we didn't find a candidate
+ * group, then check if the primary is in its own
+ * group and if it can make way for this MAC client.
*/
if (candidate_grp == NULL &&
donorgrp != MAC_DEFAULT_RX_GROUP(mip) &&
@@ -6549,15 +6819,15 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move)
boolean_t prim_grp = B_FALSE;
/*
- * Switch the MAC client from the candidate group
- * to the default group.. If this group was the
- * donor group, then after the switch we need
- * to update the donor group too.
+ * Switch the existing MAC client from the
+ * candidate group to the default group. If
+ * the candidate group is the donor group,
+ * then after the switch we need to update the
+ * donor group too.
*/
grp = candidate_grp;
- gclient = MAC_GROUP_ONLY_CLIENT(grp);
- if (gclient == NULL)
- gclient = mac_get_grp_primary(grp);
+ gclient = grp->mrg_clients->mgc_client;
+ VERIFY3P(gclient, !=, NULL);
if (grp == mip->mi_rx_donor_grp)
prim_grp = B_TRUE;
if (mac_rx_switch_group(gclient, grp,
@@ -6570,7 +6840,6 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move)
donorgrp = MAC_DEFAULT_RX_GROUP(mip);
}
-
/*
* Now give this group with the required rings
* to this MAC client.
@@ -6618,10 +6887,10 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move)
/*
* mac_rx_release_group()
*
- * This is called when there are no clients left for the group.
- * The group is stopped and marked MAC_GROUP_STATE_REGISTERED,
- * and if it is a non default group, the shares are removed and
- * all rings are assigned back to default group.
+ * Release the group when it has no remaining clients. The group is
+ * stopped and its shares are removed and all rings are assigned back
+ * to default group. This should never be called against the default
+ * group.
*/
void
mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group)
@@ -6630,6 +6899,7 @@ mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group)
mac_ring_t *ring;
ASSERT(group != MAC_DEFAULT_RX_GROUP(mip));
+ ASSERT(MAC_GROUP_NO_CLIENT(group) == B_TRUE);
if (mip->mi_rx_donor_grp == group)
mip->mi_rx_donor_grp = MAC_DEFAULT_RX_GROUP(mip);
@@ -6681,56 +6951,7 @@ mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group)
}
/*
- * When we move the primary's mac address between groups, we need to also
- * take all the clients sharing the same mac address along with it (VLANs)
- * We remove the mac address for such clients from the group after quiescing
- * them. When we add the mac address we restart the client. Note that
- * the primary's mac address is removed from the group after all the
- * other clients sharing the address are removed. Similarly, the primary's
- * mac address is added before all the other client's mac address are
- * added. While grp is the group where the clients reside, tgrp is
- * the group where the addresses have to be added.
- */
-static void
-mac_rx_move_macaddr_prim(mac_client_impl_t *mcip, mac_group_t *grp,
- mac_group_t *tgrp, uint8_t *maddr, boolean_t add)
-{
- mac_impl_t *mip = mcip->mci_mip;
- mac_grp_client_t *mgcp = grp->mrg_clients;
- mac_client_impl_t *gmcip;
- boolean_t prim;
-
- prim = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0;
-
- /*
- * If the clients are in a non-default group, we just have to
- * walk the group's client list. If it is in the default group
- * (which will be shared by other clients as well, we need to
- * check if the unicast address matches mcip's unicast.
- */
- while (mgcp != NULL) {
- gmcip = mgcp->mgc_client;
- if (gmcip != mcip &&
- (grp != MAC_DEFAULT_RX_GROUP(mip) ||
- mcip->mci_unicast == gmcip->mci_unicast)) {
- if (!add) {
- mac_rx_client_quiesce(
- (mac_client_handle_t)gmcip);
- (void) mac_remove_macaddr(mcip->mci_unicast);
- } else {
- (void) mac_add_macaddr(mip, tgrp, maddr, prim);
- mac_rx_client_restart(
- (mac_client_handle_t)gmcip);
- }
- }
- mgcp = mgcp->mgc_next;
- }
-}
-
-
-/*
- * Move the MAC address from fgrp to tgrp. If this is the primary client,
- * we need to take any VLANs etc. together too.
+ * Move the MAC address from fgrp to tgrp.
*/
static int
mac_rx_move_macaddr(mac_client_impl_t *mcip, mac_group_t *fgrp,
@@ -6739,56 +6960,86 @@ mac_rx_move_macaddr(mac_client_impl_t *mcip, mac_group_t *fgrp,
mac_impl_t *mip = mcip->mci_mip;
uint8_t maddr[MAXMACADDRLEN];
int err = 0;
- boolean_t prim;
- boolean_t multiclnt = B_FALSE;
+ uint16_t vid;
+ mac_unicast_impl_t *muip;
+ boolean_t use_hw;
mac_rx_client_quiesce((mac_client_handle_t)mcip);
- ASSERT(mcip->mci_unicast != NULL);
+ VERIFY3P(mcip->mci_unicast, !=, NULL);
bcopy(mcip->mci_unicast->ma_addr, maddr, mcip->mci_unicast->ma_len);
- prim = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0;
- if (mcip->mci_unicast->ma_nusers > 1) {
- mac_rx_move_macaddr_prim(mcip, fgrp, NULL, maddr, B_FALSE);
- multiclnt = B_TRUE;
- }
- ASSERT(mcip->mci_unicast->ma_nusers == 1);
- err = mac_remove_macaddr(mcip->mci_unicast);
+ /*
+ * Does the client require MAC address hardware classifiction?
+ */
+ use_hw = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0;
+ vid = i_mac_flow_vid(mcip->mci_flent);
+
+ /*
+ * You can never move an address that is shared by multiple
+ * clients. mac_datapath_setup() ensures that clients sharing
+ * an address are placed on the default group. This guarantees
+ * that a non-default group will only ever have one client and
+ * thus make full use of HW filters.
+ */
+ if (mac_check_macaddr_shared(mcip->mci_unicast))
+ return (EINVAL);
+
+ err = mac_remove_macaddr_vlan(mcip->mci_unicast, vid);
+
if (err != 0) {
mac_rx_client_restart((mac_client_handle_t)mcip);
- if (multiclnt) {
- mac_rx_move_macaddr_prim(mcip, fgrp, fgrp, maddr,
- B_TRUE);
- }
return (err);
}
+
/*
- * Program the H/W Classifier first, if this fails we need
- * not proceed with the other stuff.
+ * If this isn't the primary MAC address then the
+ * mac_address_t has been freed by the last call to
+ * mac_remove_macaddr_vlan(). In any case, NULL the reference
+ * to avoid a dangling pointer.
*/
- if ((err = mac_add_macaddr(mip, tgrp, maddr, prim)) != 0) {
+ mcip->mci_unicast = NULL;
+
+ /*
+ * We also have to NULL all the mui_map references -- sun4v
+ * strikes again!
+ */
+ rw_enter(&mcip->mci_rw_lock, RW_WRITER);
+ for (muip = mcip->mci_unicast_list; muip != NULL; muip = muip->mui_next)
+ muip->mui_map = NULL;
+ rw_exit(&mcip->mci_rw_lock);
+
+ /*
+ * Program the H/W Classifier first, if this fails we need not
+ * proceed with the other stuff.
+ */
+ if ((err = mac_add_macaddr_vlan(mip, tgrp, maddr, vid, use_hw)) != 0) {
+ int err2;
+
/* Revert back the H/W Classifier */
- if ((err = mac_add_macaddr(mip, fgrp, maddr, prim)) != 0) {
- /*
- * This should not fail now since it worked earlier,
- * should we panic?
- */
- cmn_err(CE_WARN,
- "mac_rx_switch_group: switching %p back"
- " to group %p failed!!", (void *)mcip,
- (void *)fgrp);
+ err2 = mac_add_macaddr_vlan(mip, fgrp, maddr, vid, use_hw);
+
+ if (err2 != 0) {
+ cmn_err(CE_WARN, "Failed to revert HW classification"
+ " on MAC %s, for client %s: %d.", mip->mi_name,
+ mcip->mci_name, err2);
}
+
mac_rx_client_restart((mac_client_handle_t)mcip);
- if (multiclnt) {
- mac_rx_move_macaddr_prim(mcip, fgrp, fgrp, maddr,
- B_TRUE);
- }
return (err);
}
+
+ /*
+ * Get a reference to the new mac_address_t and update the
+ * client's reference. Then restart the client and add the
+ * other clients of this MAC addr (if they exsit).
+ */
mcip->mci_unicast = mac_find_macaddr(mip, maddr);
+ rw_enter(&mcip->mci_rw_lock, RW_WRITER);
+ for (muip = mcip->mci_unicast_list; muip != NULL; muip = muip->mui_next)
+ muip->mui_map = mcip->mci_unicast;
+ rw_exit(&mcip->mci_rw_lock);
mac_rx_client_restart((mac_client_handle_t)mcip);
- if (multiclnt)
- mac_rx_move_macaddr_prim(mcip, fgrp, tgrp, maddr, B_TRUE);
- return (err);
+ return (0);
}
/*
@@ -6809,19 +7060,34 @@ mac_rx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp,
mac_impl_t *mip = mcip->mci_mip;
mac_grp_client_t *mgcp;
- ASSERT(fgrp == mcip->mci_flent->fe_rx_ring_group);
+ VERIFY3P(fgrp, ==, mcip->mci_flent->fe_rx_ring_group);
if ((err = mac_rx_move_macaddr(mcip, fgrp, tgrp)) != 0)
return (err);
/*
- * The group might be reserved, but SRSs may not be set up, e.g.
- * primary and its vlans using a reserved group.
+ * If the group is marked as reserved and in use by a single
+ * client, then there is an SRS to teardown.
*/
if (fgrp->mrg_state == MAC_GROUP_STATE_RESERVED &&
MAC_GROUP_ONLY_CLIENT(fgrp) != NULL) {
mac_rx_srs_group_teardown(mcip->mci_flent, B_TRUE);
}
+
+ /*
+ * If we are moving the client from a non-default group, then
+ * we know that any additional clients on this group share the
+ * same MAC address. Since we moved the MAC address filter, we
+ * need to move these clients too.
+ *
+ * If we are moving the client from the default group and its
+ * MAC address has VLAN clients, then we must move those
+ * clients as well.
+ *
+ * In both cases the idea is the same: we moved the MAC
+ * address filter to the tgrp, so we must move all clients
+ * using that MAC address to tgrp as well.
+ */
if (fgrp != MAC_DEFAULT_RX_GROUP(mip)) {
mgcp = fgrp->mrg_clients;
while (mgcp != NULL) {
@@ -6832,20 +7098,21 @@ mac_rx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp,
gmcip->mci_flent->fe_rx_ring_group = tgrp;
}
mac_release_rx_group(mcip, fgrp);
- ASSERT(MAC_GROUP_NO_CLIENT(fgrp));
+ VERIFY3B(MAC_GROUP_NO_CLIENT(fgrp), ==, B_TRUE);
mac_set_group_state(fgrp, MAC_GROUP_STATE_REGISTERED);
} else {
mac_group_remove_client(fgrp, mcip);
mac_group_add_client(tgrp, mcip);
mcip->mci_flent->fe_rx_ring_group = tgrp;
+
/*
* If there are other clients (VLANs) sharing this address
- * we should be here only for the primary.
+ * then move them too.
*/
- if (mcip->mci_unicast->ma_nusers > 1) {
+ if (mac_check_macaddr_shared(mcip->mci_unicast)) {
/*
* We need to move all the clients that are using
- * this h/w address.
+ * this MAC address.
*/
mgcp = fgrp->mrg_clients;
while (mgcp != NULL) {
@@ -6859,20 +7126,24 @@ mac_rx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp,
}
}
}
+
/*
- * The default group will still take the multicast,
- * broadcast traffic etc., so it won't go to
+ * The default group still handles multicast and
+ * broadcast traffic; it won't transition to
* MAC_GROUP_STATE_REGISTERED.
*/
if (fgrp->mrg_state == MAC_GROUP_STATE_RESERVED)
mac_rx_group_unmark(fgrp, MR_CONDEMNED);
mac_set_group_state(fgrp, MAC_GROUP_STATE_SHARED);
}
+
next_state = mac_group_next_state(tgrp, &group_only_mcip,
MAC_DEFAULT_RX_GROUP(mip), B_TRUE);
mac_set_group_state(tgrp, next_state);
+
/*
- * If the destination group is reserved, setup the SRSs etc.
+ * If the destination group is reserved, then setup the SRSes.
+ * Otherwise make sure to use SW classification.
*/
if (tgrp->mrg_state == MAC_GROUP_STATE_RESERVED) {
mac_rx_srs_group_setup(mcip, mcip->mci_flent, SRST_LINK);
@@ -6883,6 +7154,7 @@ mac_rx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp,
} else {
mac_rx_switch_grp_to_sw(tgrp);
}
+
return (0);
}
@@ -6913,6 +7185,7 @@ mac_reserve_tx_group(mac_client_impl_t *mcip, boolean_t move)
boolean_t isprimary;
isprimary = mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC;
+
/*
* When we come here for a VLAN on the primary (dladm create-vlan),
* we need to pair it along with the primary (to keep it consistent
@@ -6994,8 +7267,7 @@ mac_reserve_tx_group(mac_client_impl_t *mcip, boolean_t move)
if (grp->mrg_state == MAC_GROUP_STATE_RESERVED &&
candidate_grp == NULL) {
gclient = MAC_GROUP_ONLY_CLIENT(grp);
- if (gclient == NULL)
- gclient = mac_get_grp_primary(grp);
+ VERIFY3P(gclient, !=, NULL);
gmrp = MCIP_RESOURCE_PROPS(gclient);
if (gclient->mci_share == 0 &&
(gmrp->mrp_mask & MRP_TX_RINGS) == 0 &&
@@ -7032,13 +7304,14 @@ mac_reserve_tx_group(mac_client_impl_t *mcip, boolean_t move)
*/
if (need_exclgrp && candidate_grp != NULL) {
/*
- * Switch the MAC client from the candidate group
- * to the default group.
+ * Switch the MAC client from the candidate
+ * group to the default group. We know the
+ * candidate_grp came from a reserved group
+ * and thus only has one client.
*/
grp = candidate_grp;
gclient = MAC_GROUP_ONLY_CLIENT(grp);
- if (gclient == NULL)
- gclient = mac_get_grp_primary(grp);
+ VERIFY3P(gclient, !=, NULL);
mac_tx_client_quiesce((mac_client_handle_t)gclient);
mac_tx_switch_group(gclient, grp, defgrp);
mac_tx_client_restart((mac_client_handle_t)gclient);
@@ -7206,7 +7479,7 @@ mac_tx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp,
*/
mac_group_remove_client(fgrp, mcip);
mac_tx_dismantle_soft_rings(fgrp, flent);
- if (mcip->mci_unicast->ma_nusers > 1) {
+ if (mac_check_macaddr_shared(mcip->mci_unicast)) {
mgcp = fgrp->mrg_clients;
while (mgcp != NULL) {
gmcip = mgcp->mgc_client;
@@ -7452,7 +7725,7 @@ mac_no_active(mac_handle_t mh)
* changes and update the mac_resource_props_t for the VLAN's client.
* We need to do this since we don't support setting these properties
* on the primary's VLAN clients, but the VLAN clients have to
- * follow the primary w.r.t the rings property;
+ * follow the primary w.r.t the rings property.
*/
void
mac_set_prim_vlan_rings(mac_impl_t *mip, mac_resource_props_t *mrp)
@@ -7601,13 +7874,10 @@ mac_group_ring_modify(mac_client_impl_t *mcip, mac_group_t *group,
MAC_GROUP_STATE_RESERVED) {
continue;
}
- mcip = MAC_GROUP_ONLY_CLIENT(tgrp);
- if (mcip == NULL)
- mcip = mac_get_grp_primary(tgrp);
- ASSERT(mcip != NULL);
- mrp = MCIP_RESOURCE_PROPS(mcip);
- if ((mrp->mrp_mask & MRP_RX_RINGS) != 0)
+ if (i_mac_clients_hw(tgrp, MRP_RX_RINGS))
continue;
+ mcip = tgrp->mrg_clients->mgc_client;
+ VERIFY3P(mcip, !=, NULL);
if ((tgrp->mrg_cur_count +
defgrp->mrg_cur_count) < (modify + 1)) {
continue;
@@ -7622,12 +7892,10 @@ mac_group_ring_modify(mac_client_impl_t *mcip, mac_group_t *group,
MAC_GROUP_STATE_RESERVED) {
continue;
}
- mcip = MAC_GROUP_ONLY_CLIENT(tgrp);
- if (mcip == NULL)
- mcip = mac_get_grp_primary(tgrp);
- mrp = MCIP_RESOURCE_PROPS(mcip);
- if ((mrp->mrp_mask & MRP_TX_RINGS) != 0)
+ if (i_mac_clients_hw(tgrp, MRP_TX_RINGS))
continue;
+ mcip = tgrp->mrg_clients->mgc_client;
+ VERIFY3P(mcip, !=, NULL);
if ((tgrp->mrg_cur_count +
defgrp->mrg_cur_count) < (modify + 1)) {
continue;
@@ -7897,10 +8165,10 @@ mac_pool_event_cb(pool_event_t what, poolid_t id, void *arg)
* Set effective rings property. This could be called from datapath_setup/
* datapath_teardown or set-linkprop.
* If the group is reserved we just go ahead and set the effective rings.
- * Additionally, for TX this could mean the default group has lost/gained
+ * Additionally, for TX this could mean the default group has lost/gained
* some rings, so if the default group is reserved, we need to adjust the
* effective rings for the default group clients. For RX, if we are working
- * with the non-default group, we just need * to reset the effective props
+ * with the non-default group, we just need to reset the effective props
* for the default group clients.
*/
void
@@ -8030,6 +8298,7 @@ mac_check_primary_relocation(mac_client_impl_t *mcip, boolean_t rxhw)
* the first non-primary.
*/
ASSERT(mip->mi_nactiveclients == 2);
+
/*
* OK, now we have the primary that needs to be relocated.
*/
diff --git a/usr/src/uts/common/io/mac/mac_client.c b/usr/src/uts/common/io/mac/mac_client.c
index 66bba78e91..b918bf4aca 100644
--- a/usr/src/uts/common/io/mac/mac_client.c
+++ b/usr/src/uts/common/io/mac/mac_client.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2018 Joyent, Inc.
* Copyright 2017 RackTop Systems.
*/
@@ -865,9 +865,12 @@ mac_unicast_update_client_flow(mac_client_impl_t *mcip)
mac_protect_update_mac_token(mcip);
/*
- * A MAC client could have one MAC address but multiple
- * VLANs. In that case update the flow entries corresponding
- * to all VLANs of the MAC client.
+ * When there are multiple VLANs sharing the same MAC address,
+ * each gets its own MAC client, except when running on sun4v
+ * vsw. In that case the mci_flent_list is used to place
+ * multiple VLAN flows on one MAC client. If we ever get rid
+ * of vsw then this code can go, but until then we need to
+ * update all flow entries.
*/
for (flent = mcip->mci_flent_list; flent != NULL;
flent = flent->fe_client_next) {
@@ -1025,7 +1028,7 @@ mac_unicast_primary_set(mac_handle_t mh, const uint8_t *addr)
return (0);
}
- if (mac_find_macaddr(mip, (uint8_t *)addr) != 0) {
+ if (mac_find_macaddr(mip, (uint8_t *)addr) != NULL) {
i_mac_perim_exit(mip);
return (EBUSY);
}
@@ -1040,9 +1043,9 @@ mac_unicast_primary_set(mac_handle_t mh, const uint8_t *addr)
mac_capab_aggr_t aggr_cap;
/*
- * If the mac is an aggregation, other than the unicast
+ * If the MAC is an aggregation, other than the unicast
* addresses programming, aggr must be informed about this
- * primary unicst address change to change its mac address
+ * primary unicst address change to change its MAC address
* policy to be user-specified.
*/
ASSERT(map->ma_type == MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED);
@@ -1374,7 +1377,7 @@ mac_client_open(mac_handle_t mh, mac_client_handle_t *mchp, char *name,
mcip->mci_state_flags |= MCIS_IS_AGGR_PORT;
if (mip->mi_state_flags & MIS_IS_AGGR)
- mcip->mci_state_flags |= MCIS_IS_AGGR;
+ mcip->mci_state_flags |= MCIS_IS_AGGR_CLIENT;
if ((flags & MAC_OPEN_FLAGS_USE_DATALINK_NAME) != 0) {
datalink_id_t linkid;
@@ -1539,7 +1542,8 @@ mac_client_close(mac_client_handle_t mch, uint16_t flags)
}
/*
- * Set the rx bypass receive callback.
+ * Set the Rx bypass receive callback and return B_TRUE. Return
+ * B_FALSE if it's not possible to enable bypass.
*/
boolean_t
mac_rx_bypass_set(mac_client_handle_t mch, mac_direct_rx_t rx_fn, void *arg1)
@@ -1550,11 +1554,11 @@ mac_rx_bypass_set(mac_client_handle_t mch, mac_direct_rx_t rx_fn, void *arg1)
ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
/*
- * If the mac_client is a VLAN, we should not do DLS bypass and
- * instead let the packets come up via mac_rx_deliver so the vlan
- * header can be stripped.
+ * If the client has more than one VLAN then process packets
+ * through DLS. This should happen only when sun4v vsw is on
+ * the scene.
*/
- if (mcip->mci_nvids > 0)
+ if (mcip->mci_nvids > 1)
return (B_FALSE);
/*
@@ -1608,8 +1612,8 @@ mac_rx_set(mac_client_handle_t mch, mac_rx_t rx_fn, void *arg)
i_mac_perim_exit(mip);
/*
- * If we're changing the rx function on the primary mac of a vnic,
- * make sure any secondary macs on the vnic are updated as well.
+ * If we're changing the Rx function on the primary MAC of a VNIC,
+ * make sure any secondary addresses on the VNIC are updated as well.
*/
if (umip != NULL) {
ASSERT((umip->mi_state_flags & MIS_IS_VNIC) != 0);
@@ -1787,6 +1791,14 @@ mac_client_set_rings_prop(mac_client_impl_t *mcip, mac_resource_props_t *mrp,
}
/* Let check if we can give this an excl group */
} else if (group == defgrp) {
+ /*
+ * If multiple clients share an
+ * address then they must stay on the
+ * default group.
+ */
+ if (mac_check_macaddr_shared(mcip->mci_unicast))
+ return (0);
+
ngrp = mac_reserve_rx_group(mcip, mac_addr,
B_TRUE);
/* Couldn't give it a group, that's fine */
@@ -1809,6 +1821,16 @@ mac_client_set_rings_prop(mac_client_impl_t *mcip, mac_resource_props_t *mrp,
}
if (group == defgrp && ((mrp->mrp_nrxrings > 0) || unspec)) {
+ /*
+ * We are requesting Rx rings. Try to reserve
+ * a non-default group.
+ *
+ * If multiple clients share an address then
+ * they must stay on the default group.
+ */
+ if (mac_check_macaddr_shared(mcip->mci_unicast))
+ return (EINVAL);
+
ngrp = mac_reserve_rx_group(mcip, mac_addr, B_TRUE);
if (ngrp == NULL)
return (ENOSPC);
@@ -2166,10 +2188,10 @@ mac_unicast_flow_create(mac_client_impl_t *mcip, uint8_t *mac_addr,
flent_flags = FLOW_VNIC_MAC;
/*
- * For the first flow we use the mac client's name - mci_name, for
- * subsequent ones we just create a name with the vid. This is
+ * For the first flow we use the MAC client's name - mci_name, for
+ * subsequent ones we just create a name with the VID. This is
* so that we can add these flows to the same flow table. This is
- * fine as the flow name (except for the one with the mac client's
+ * fine as the flow name (except for the one with the MAC client's
* name) is not visible. When the first flow is removed, we just replace
* its fdesc with another from the list, so we will still retain the
* flent with the MAC client's flow name.
@@ -2327,6 +2349,7 @@ mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid,
* The unicast MAC address must have been added successfully.
*/
ASSERT(mcip->mci_unicast != NULL);
+
/*
* Push down the sub-flows that were defined on this link
* hitherto. The flows are added to the active flow table
@@ -2338,15 +2361,23 @@ mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid,
ASSERT(!no_unicast);
/*
- * A unicast flow already exists for that MAC client,
- * this flow must be the same mac address but with
- * different VID. It has been checked by mac_addr_in_use().
+ * A unicast flow already exists for that MAC client
+ * so this flow must be the same MAC address but with
+ * a different VID. It has been checked by
+ * mac_addr_in_use().
+ *
+ * We will use the SRS etc. from the initial
+ * mci_flent. We don't need to create a kstat for
+ * this, as except for the fdesc, everything will be
+ * used from the first flent.
*
- * We will use the SRS etc. from the mci_flent. Note that
- * We don't need to create kstat for this as except for
- * the fdesc, everything will be used from in the 1st flent.
+ * The only time we should see multiple flents on the
+ * same MAC client is on the sun4v vsw. If we removed
+ * that code we should be able to remove the entire
+ * notion of multiple flents on a MAC client (this
+ * doesn't affect sub/user flows because they have
+ * their own list unrelated to mci_flent_list).
*/
-
if (bcmp(mac_addr, map->ma_addr, map->ma_len) != 0) {
err = EINVAL;
goto bail;
@@ -2475,8 +2506,12 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags,
boolean_t is_vnic_primary =
(flags & MAC_UNICAST_VNIC_PRIMARY);
- /* when VID is non-zero, the underlying MAC can not be VNIC */
- ASSERT(!((mip->mi_state_flags & MIS_IS_VNIC) && (vid != 0)));
+ /*
+ * When the VID is non-zero the underlying MAC cannot be a
+ * VNIC. I.e., dladm create-vlan cannot take a VNIC as
+ * argument, only the primary MAC client.
+ */
+ ASSERT(!((mip->mi_state_flags & MIS_IS_VNIC) && (vid != VLAN_ID_NONE)));
/*
* Can't unicast add if the client asked only for minimal datapath
@@ -2489,18 +2524,19 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags,
* Check for an attempted use of the current Port VLAN ID, if enabled.
* No client may use it.
*/
- if (mip->mi_pvid != 0 && vid == mip->mi_pvid)
+ if (mip->mi_pvid != VLAN_ID_NONE && vid == mip->mi_pvid)
return (EBUSY);
/*
* Check whether it's the primary client and flag it.
*/
- if (!(mcip->mci_state_flags & MCIS_IS_VNIC) && is_primary && vid == 0)
+ if (!(mcip->mci_state_flags & MCIS_IS_VNIC) && is_primary &&
+ vid == VLAN_ID_NONE)
mcip->mci_flags |= MAC_CLIENT_FLAGS_PRIMARY;
/*
* is_vnic_primary is true when we come here as a VLAN VNIC
- * which uses the primary mac client's address but with a non-zero
+ * which uses the primary MAC client's address but with a non-zero
* VID. In this case the MAC address is not specified by an upper
* MAC client.
*/
@@ -2552,7 +2588,7 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags,
/*
* Create a handle for vid 0.
*/
- ASSERT(vid == 0);
+ ASSERT(vid == VLAN_ID_NONE);
muip = kmem_zalloc(sizeof (mac_unicast_impl_t), KM_SLEEP);
muip->mui_vid = vid;
*mah = (mac_unicast_handle_t)muip;
@@ -2572,7 +2608,9 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags,
}
/*
- * If this is a VNIC/VLAN, disable softmac fast-path.
+ * If this is a VNIC/VLAN, disable softmac fast-path. This is
+ * only relevant to legacy devices which use softmac to
+ * interface with GLDv3.
*/
if (mcip->mci_state_flags & MCIS_IS_VNIC) {
err = mac_fastpath_disable((mac_handle_t)mip);
@@ -2620,9 +2658,11 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags,
(void) mac_client_set_resources(mch, mrp);
} else if (mcip->mci_state_flags & MCIS_IS_VNIC) {
/*
- * This is a primary VLAN client, we don't support
- * specifying rings property for this as it inherits the
- * rings property from its MAC.
+ * This is a VLAN client sharing the address of the
+ * primary MAC client; i.e., one created via dladm
+ * create-vlan. We don't support specifying ring
+ * properties for this type of client as it inherits
+ * these from the primary MAC client.
*/
if (is_vnic_primary) {
mac_resource_props_t *vmrp;
@@ -2681,7 +2721,7 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags,
/*
* Set the flags here so that if this is a passive client, we
- * can return and set it when we call mac_client_datapath_setup
+ * can return and set it when we call mac_client_datapath_setup
* when this becomes the active client. If we defer to using these
* flags to mac_client_datapath_setup, then for a passive client,
* we'd have to store the flags somewhere (probably fe_flags)
@@ -2984,14 +3024,14 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah)
i_mac_perim_enter(mip);
if (mcip->mci_flags & MAC_CLIENT_FLAGS_VNIC_PRIMARY) {
/*
- * Called made by the upper MAC client of a VNIC.
+ * Call made by the upper MAC client of a VNIC.
* There's nothing much to do, the unicast address will
* be removed by the VNIC driver when the VNIC is deleted,
* but let's ensure that all our transmit is done before
* the client does a mac_client_stop lest it trigger an
* assert in the driver.
*/
- ASSERT(muip->mui_vid == 0);
+ ASSERT(muip->mui_vid == VLAN_ID_NONE);
mac_tx_client_flush(mcip);
@@ -3055,6 +3095,7 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah)
i_mac_perim_exit(mip);
return (0);
}
+
/*
* Remove the VID from the list of client's VIDs.
*/
@@ -3081,7 +3122,7 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah)
* flows.
*/
flent = mac_client_get_flow(mcip, muip);
- ASSERT(flent != NULL);
+ VERIFY3P(flent, !=, NULL);
/*
* The first one is disappearing, need to make sure
@@ -3109,6 +3150,7 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah)
FLOW_FINAL_REFRELE(flent);
ASSERT(!(mcip->mci_state_flags & MCIS_EXCLUSIVE));
+
/*
* Enable fastpath if this is a VNIC or a VLAN.
*/
@@ -3122,7 +3164,8 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah)
mui_vid = muip->mui_vid;
mac_client_datapath_teardown(mch, muip, flent);
- if ((mcip->mci_flags & MAC_CLIENT_FLAGS_PRIMARY) && mui_vid == 0) {
+ if ((mcip->mci_flags & MAC_CLIENT_FLAGS_PRIMARY) &&
+ mui_vid == VLAN_ID_NONE) {
mcip->mci_flags &= ~MAC_CLIENT_FLAGS_PRIMARY;
} else {
i_mac_perim_exit(mip);
diff --git a/usr/src/uts/common/io/mac/mac_datapath_setup.c b/usr/src/uts/common/io/mac/mac_datapath_setup.c
index 0355b544d5..a3fc2529b9 100644
--- a/usr/src/uts/common/io/mac/mac_datapath_setup.c
+++ b/usr/src/uts/common/io/mac/mac_datapath_setup.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2017, Joyent, Inc.
+ * Copyright 2018 Joyent, Inc.
*/
#include <sys/types.h>
@@ -1186,7 +1186,7 @@ mac_srs_fanout_list_alloc(mac_soft_ring_set_t *mac_srs)
mac_srs->srs_tx_soft_rings = (mac_soft_ring_t **)
kmem_zalloc(sizeof (mac_soft_ring_t *) *
MAX_RINGS_PER_GROUP, KM_SLEEP);
- if (mcip->mci_state_flags & MCIS_IS_AGGR) {
+ if (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) {
mac_srs_tx_t *tx = &mac_srs->srs_tx;
tx->st_soft_rings = (mac_soft_ring_t **)
@@ -1595,13 +1595,13 @@ mac_srs_update_bwlimit(flow_entry_t *flent, mac_resource_props_t *mrp)
/*
* When the first sub-flow is added to a link, we disable polling on the
- * link and also modify the entry point to mac_rx_srs_subflow_process.
+ * link and also modify the entry point to mac_rx_srs_subflow_process().
* (polling is disabled because with the subflow added, accounting
* for polling needs additional logic, it is assumed that when a subflow is
* added, we can take some hit as a result of disabling polling rather than
* adding more complexity - if this becomes a perf. issue we need to
* re-rvaluate this logic). When the last subflow is removed, we turn back
- * polling and also reset the entry point to mac_rx_srs_process.
+ * polling and also reset the entry point to mac_rx_srs_process().
*
* In the future if there are multiple SRS, we can simply
* take one and give it to the flow rather than disabling polling and
@@ -1646,7 +1646,7 @@ mac_client_update_classifier(mac_client_impl_t *mcip, boolean_t enable)
* Change the S/W classifier so that we can land in the
* correct processing function with correct argument.
* If all subflows have been removed we can revert to
- * mac_rx_srsprocess, else we need mac_rx_srs_subflow_process.
+ * mac_rx_srs_process(), else we need mac_rx_srs_subflow_process().
*/
mutex_enter(&flent->fe_lock);
flent->fe_cb_fn = (flow_fn_t)rx_func;
@@ -2185,7 +2185,7 @@ mac_srs_create(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t srs_type,
* find nothing plus we have an existing backlog
* (sr_poll_pkt_cnt > 0), we stay in polling mode but don't poll
* the H/W for packets anymore (let the polling thread go to sleep).
- * 5) Once the backlog is relived (packets are processed) we reenable
+ * 5) Once the backlog is relieved (packets are processed) we reenable
* polling (by signalling the poll thread) only when the backlog
* dips below sr_poll_thres.
* 6) sr_hiwat is used exclusively when we are not polling capable
@@ -2256,8 +2256,8 @@ mac_srs_create(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t srs_type,
/*
* Some drivers require serialization and don't send
* packet chains in interrupt context. For such
- * drivers, we should always queue in soft ring
- * so that we get a chance to switch into a polling
+ * drivers, we should always queue in the soft ring
+ * so that we get a chance to switch into polling
* mode under backlog.
*/
ring_info = mac_hwring_getinfo((mac_ring_handle_t)ring);
@@ -2364,9 +2364,11 @@ mac_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
}
/*
- * Set up the RX SRSs. If the S/W SRS is not set, set it up, if there
- * is a group associated with this MAC client, set up SRSs for individual
- * h/w rings.
+ * Set up the Rx SRSes. If there is no group associated with the
+ * client, then only setup SW classification. If the client has
+ * exlusive (MAC_GROUP_STATE_RESERVED) use of the group, then create an
+ * SRS for each HW ring. If the client is sharing a group, then make
+ * sure to teardown the HW SRSes.
*/
void
mac_rx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
@@ -2377,13 +2379,14 @@ mac_rx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
mac_ring_t *ring;
uint32_t fanout_type;
mac_group_t *rx_group = flent->fe_rx_ring_group;
+ boolean_t no_unicast;
fanout_type = mac_find_fanout(flent, link_type);
+ no_unicast = (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR) != 0;
- /* Create the SRS for S/W classification if none exists */
+ /* Create the SRS for SW classification if none exists */
if (flent->fe_rx_srs[0] == NULL) {
ASSERT(flent->fe_rx_srs_cnt == 0);
- /* Setup the Rx SRS */
mac_srs = mac_srs_create(mcip, flent, fanout_type | link_type,
mac_rx_deliver, mcip, NULL, NULL);
mutex_enter(&flent->fe_lock);
@@ -2395,15 +2398,17 @@ mac_rx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
if (rx_group == NULL)
return;
+
/*
- * fanout for default SRS is done when default SRS are created
- * above. As each ring is added to the group, we setup the
- * SRS and fanout to it.
+ * If the group is marked RESERVED then setup an SRS and
+ * fanout for each HW ring.
*/
switch (rx_group->mrg_state) {
case MAC_GROUP_STATE_RESERVED:
for (ring = rx_group->mrg_rings; ring != NULL;
ring = ring->mr_next) {
+ uint16_t vid = i_mac_flow_vid(mcip->mci_flent);
+
switch (ring->mr_state) {
case MR_INUSE:
case MR_FREE:
@@ -2413,20 +2418,23 @@ mac_rx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
(void) mac_start_ring(ring);
/*
- * Since the group is exclusively ours create
- * an SRS for this ring to allow the
- * individual SRS to dynamically poll the
- * ring. Do this only if the client is not
- * a VLAN MAC client, since for VLAN we do
- * s/w classification for the VID check, and
- * if it has a unicast address.
+ * If a client requires SW VLAN
+ * filtering or has no unicast address
+ * then we don't create any HW ring
+ * SRSes.
*/
- if ((mcip->mci_state_flags &
- MCIS_NO_UNICAST_ADDR) ||
- i_mac_flow_vid(mcip->mci_flent) !=
- VLAN_ID_NONE) {
+ if ((!MAC_GROUP_HW_VLAN(rx_group) &&
+ vid != VLAN_ID_NONE) || no_unicast)
break;
- }
+
+ /*
+ * When a client has exclusive use of
+ * a group, and that group's traffic
+ * is fully HW classified, we create
+ * an SRS for each HW ring in order to
+ * make use of dynamic polling of said
+ * HW rings.
+ */
mac_srs = mac_srs_create(mcip, flent,
fanout_type | link_type,
mac_rx_deliver, mcip, NULL, ring);
@@ -2442,14 +2450,9 @@ mac_rx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
break;
case MAC_GROUP_STATE_SHARED:
/*
- * Set all rings of this group to software classified.
- *
- * If the group is current RESERVED, the existing mac
- * client (the only client on this group) is using
- * this group exclusively. In that case we need to
- * disable polling on the rings of the group (if it
- * was enabled), and free the SRS associated with the
- * rings.
+ * When a group is shared by multiple clients, we must
+ * use SW classifiction to ensure packets are
+ * delivered to the correct client.
*/
mac_rx_switch_grp_to_sw(rx_group);
break;
@@ -2502,10 +2505,11 @@ mac_tx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
}
/*
- * Remove all the RX SRSs. If we want to remove only the SRSs associated
- * with h/w rings, leave the S/W SRS alone. This is used when we want to
- * move the MAC client from one group to another, so we need to teardown
- * on the h/w SRSs.
+ * Teardown all the Rx SRSes. Unless hwonly is set, then only teardown
+ * the Rx HW SRSes and leave the SW SRS alone. The hwonly flag is set
+ * when we wish to move a MAC client from one group to another. In
+ * that case, we need to release the current HW SRSes but keep the SW
+ * SRS for continued traffic classifiction.
*/
void
mac_rx_srs_group_teardown(flow_entry_t *flent, boolean_t hwonly)
@@ -2523,8 +2527,16 @@ mac_rx_srs_group_teardown(flow_entry_t *flent, boolean_t hwonly)
flent->fe_rx_srs[i] = NULL;
flent->fe_rx_srs_cnt--;
}
- ASSERT(!hwonly || flent->fe_rx_srs_cnt == 1);
- ASSERT(hwonly || flent->fe_rx_srs_cnt == 0);
+
+ /*
+ * If we are only tearing down the HW SRSes then there must be
+ * one SRS left for SW classification. Otherwise we are tearing
+ * down both HW and SW and there should be no SRSes left.
+ */
+ if (hwonly)
+ VERIFY3S(flent->fe_rx_srs_cnt, ==, 1);
+ else
+ VERIFY3S(flent->fe_rx_srs_cnt, ==, 0);
}
/*
@@ -2826,6 +2838,7 @@ mac_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip,
* even if this is the only client in the default group, we will
* leave group as shared).
*/
+
int
mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
uint32_t link_type)
@@ -2836,6 +2849,7 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
mac_group_t *default_rgroup;
mac_group_t *default_tgroup;
int err;
+ uint16_t vid;
uint8_t *mac_addr;
mac_group_state_t next_state;
mac_client_impl_t *group_only_mcip;
@@ -2848,6 +2862,7 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
boolean_t no_unicast;
boolean_t isprimary = flent->fe_type & FLOW_PRIMARY_MAC;
mac_client_impl_t *reloc_pmcip = NULL;
+ boolean_t use_hw;
ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
@@ -2879,15 +2894,19 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
(mrp->mrp_mask & MRP_TXRINGS_UNSPEC));
/*
- * By default we have given the primary all the rings
- * i.e. the default group. Let's see if the primary
- * needs to be relocated so that the addition of this
- * client doesn't impact the primary's performance,
- * i.e. if the primary is in the default group and
- * we add this client, the primary will lose polling.
- * We do this only for NICs supporting dynamic ring
- * grouping and only when this is the first client
- * after the primary (i.e. nactiveclients is 2)
+ * All the rings initially belong to the default group
+ * under dynamic grouping. The primary client uses the
+ * default group when it is the only client. The
+ * default group is also used as the destination for
+ * all multicast and broadcast traffic of all clients.
+ * Therefore, the primary client loses its ability to
+ * poll the softrings on addition of a second client.
+ * To avoid a performance penalty, MAC will move the
+ * primary client to a dedicated group when it can.
+ *
+ * When using static grouping, the primary client
+ * begins life on a non-default group. There is
+ * no moving needed upon addition of a second client.
*/
if (!isprimary && mip->mi_nactiveclients == 2 &&
(group_only_mcip = mac_primary_client_handle(mip)) !=
@@ -2895,6 +2914,7 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
reloc_pmcip = mac_check_primary_relocation(
group_only_mcip, rxhw);
}
+
/*
* Check to see if we can get an exclusive group for
* this mac address or if there already exists a
@@ -2908,6 +2928,26 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
} else if (rgroup == NULL) {
rgroup = default_rgroup;
}
+
+ /*
+ * If we are adding a second client to a
+ * non-default group then we need to move the
+ * existing client to the default group and
+ * add the new client to the default group as
+ * well.
+ */
+ if (rgroup != default_rgroup &&
+ rgroup->mrg_state == MAC_GROUP_STATE_RESERVED) {
+ group_only_mcip = MAC_GROUP_ONLY_CLIENT(rgroup);
+ err = mac_rx_switch_group(group_only_mcip, rgroup,
+ default_rgroup);
+
+ if (err != 0)
+ goto setup_failed;
+
+ rgroup = default_rgroup;
+ }
+
/*
* Check to see if we can get an exclusive group for
* this mac client. If no groups are available, use
@@ -2939,14 +2979,17 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
rgroup->mrg_cur_count);
}
}
+
flent->fe_rx_ring_group = rgroup;
/*
- * Add the client to the group. This could cause
- * either this group to move to the shared state or
- * cause the default group to move to the shared state.
- * The actions on this group are done here, while the
- * actions on the default group are postponed to
- * the end of this function.
+ * Add the client to the group and update the
+ * group's state. If rgroup != default_group
+ * then the rgroup should only ever have one
+ * client and be in the RESERVED state. But no
+ * matter what, the default_rgroup will enter
+ * the SHARED state since it has to receive
+ * all broadcast and multicast traffic. This
+ * case is handled later in the function.
*/
mac_group_add_client(rgroup, mcip);
next_state = mac_group_next_state(rgroup,
@@ -2971,28 +3014,37 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
&group_only_mcip, default_tgroup, B_FALSE);
tgroup->mrg_state = next_state;
}
- /*
- * Setup the Rx and Tx SRSes. If we got a pristine group
- * exclusively above, mac_srs_group_setup would simply create
- * the required SRSes. If we ended up sharing a previously
- * reserved group, mac_srs_group_setup would also dismantle the
- * SRSes of the previously exclusive group
- */
- mac_srs_group_setup(mcip, flent, link_type);
/* We are setting up minimal datapath only */
- if (no_unicast)
+ if (no_unicast) {
+ mac_srs_group_setup(mcip, flent, link_type);
break;
- /* Program the S/W Classifer */
+ }
+
+ /* Program software classification. */
if ((err = mac_flow_add(mip->mi_flow_tab, flent)) != 0)
goto setup_failed;
- /* Program the H/W Classifier */
- if ((err = mac_add_macaddr(mip, rgroup, mac_addr,
- (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0)) != 0)
+ /* Program hardware classification. */
+ vid = i_mac_flow_vid(flent);
+ use_hw = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0;
+ err = mac_add_macaddr_vlan(mip, rgroup, mac_addr, vid, use_hw);
+
+ if (err != 0)
goto setup_failed;
+
mcip->mci_unicast = mac_find_macaddr(mip, mac_addr);
- ASSERT(mcip->mci_unicast != NULL);
+ VERIFY3P(mcip->mci_unicast, !=, NULL);
+
+ /*
+ * Setup the Rx and Tx SRSes. If the client has a
+ * reserved group, then mac_srs_group_setup() creates
+ * the required SRSes for the HW rings. If we have a
+ * shared group, mac_srs_group_setup() dismantles the
+ * HW SRSes of the previously exclusive group.
+ */
+ mac_srs_group_setup(mcip, flent, link_type);
+
/* (Re)init the v6 token & local addr used by link protection */
mac_protect_update_mac_token(mcip);
break;
@@ -3036,17 +3088,23 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
ASSERT(default_rgroup->mrg_state ==
MAC_GROUP_STATE_SHARED);
}
+
/*
- * If we get an exclusive group for a VLAN MAC client we
- * need to take the s/w path to make the additional check for
- * the vid. Disable polling and set it to s/w classification.
- * Similarly for clients that don't have a unicast address.
+ * A VLAN MAC client on a reserved group still
+ * requires SW classification if the MAC doesn't
+ * provide VLAN HW filtering.
+ *
+ * Clients with no unicast address also require SW
+ * classification.
*/
if (rgroup->mrg_state == MAC_GROUP_STATE_RESERVED &&
- (i_mac_flow_vid(flent) != VLAN_ID_NONE || no_unicast)) {
+ ((!MAC_GROUP_HW_VLAN(rgroup) && vid != VLAN_ID_NONE) ||
+ no_unicast)) {
mac_rx_switch_grp_to_sw(rgroup);
}
+
}
+
mac_set_rings_effective(mcip);
return (0);
@@ -3072,6 +3130,7 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent,
boolean_t check_default_group = B_FALSE;
mac_group_state_t next_state;
mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip);
+ uint16_t vid;
ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
@@ -3084,16 +3143,24 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent,
case SRST_LINK:
/* Stop sending packets */
mac_tx_client_block(mcip);
+ group = flent->fe_rx_ring_group;
+ vid = i_mac_flow_vid(flent);
- /* Stop the packets coming from the H/W */
+ /*
+ * Stop the packet flow from the hardware by disabling
+ * any hardware filters assigned to this client.
+ */
if (mcip->mci_unicast != NULL) {
int err;
- err = mac_remove_macaddr(mcip->mci_unicast);
+
+ err = mac_remove_macaddr_vlan(mcip->mci_unicast, vid);
+
if (err != 0) {
- cmn_err(CE_WARN, "%s: failed to remove a MAC"
- " address because of error 0x%x",
+ cmn_err(CE_WARN, "%s: failed to remove a MAC HW"
+ " filters because of error 0x%x",
mip->mi_name, err);
}
+
mcip->mci_unicast = NULL;
}
@@ -3114,17 +3181,17 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent,
* left who can use it exclusively. Also, if we
* were the last client, release the group.
*/
- group = flent->fe_rx_ring_group;
default_group = MAC_DEFAULT_RX_GROUP(mip);
if (group != NULL) {
mac_group_remove_client(group, mcip);
next_state = mac_group_next_state(group,
&grp_only_mcip, default_group, B_TRUE);
+
if (next_state == MAC_GROUP_STATE_RESERVED) {
/*
* Only one client left on this RX group.
*/
- ASSERT(grp_only_mcip != NULL);
+ VERIFY3P(grp_only_mcip, !=, NULL);
mac_set_group_state(group,
MAC_GROUP_STATE_RESERVED);
group_only_flent = grp_only_mcip->mci_flent;
@@ -3149,7 +3216,7 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent,
* to see if the primary client can get
* exclusive access to the default group.
*/
- ASSERT(group != MAC_DEFAULT_RX_GROUP(mip));
+ VERIFY3P(group, !=, MAC_DEFAULT_RX_GROUP(mip));
if (mrp->mrp_mask & MRP_RX_RINGS) {
MAC_RX_GRP_RELEASED(mip);
if (mip->mi_rx_group_type ==
@@ -3163,7 +3230,8 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent,
MAC_GROUP_STATE_REGISTERED);
check_default_group = B_TRUE;
} else {
- ASSERT(next_state == MAC_GROUP_STATE_SHARED);
+ VERIFY3S(next_state, ==,
+ MAC_GROUP_STATE_SHARED);
mac_set_group_state(group,
MAC_GROUP_STATE_SHARED);
mac_rx_group_unmark(group, MR_CONDEMNED);
@@ -3252,12 +3320,12 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent,
*/
if (check_default_group) {
default_group = MAC_DEFAULT_RX_GROUP(mip);
- ASSERT(default_group->mrg_state == MAC_GROUP_STATE_SHARED);
+ VERIFY3S(default_group->mrg_state, ==, MAC_GROUP_STATE_SHARED);
next_state = mac_group_next_state(default_group,
&grp_only_mcip, default_group, B_TRUE);
if (next_state == MAC_GROUP_STATE_RESERVED) {
- ASSERT(grp_only_mcip != NULL &&
- mip->mi_nactiveclients == 1);
+ VERIFY3P(grp_only_mcip, !=, NULL);
+ VERIFY3U(mip->mi_nactiveclients, ==, 1);
mac_set_group_state(default_group,
MAC_GROUP_STATE_RESERVED);
mac_rx_srs_group_setup(grp_only_mcip,
@@ -3781,7 +3849,7 @@ mac_tx_srs_del_ring(mac_soft_ring_set_t *mac_srs, mac_ring_t *tx_ring)
* is also stored in st_soft_rings[] array. That entry should
* be removed.
*/
- if (mcip->mci_state_flags & MCIS_IS_AGGR) {
+ if (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) {
mac_srs_tx_t *tx = &mac_srs->srs_tx;
ASSERT(tx->st_soft_rings[tx_ring->mr_index] == remove_sring);
@@ -3810,7 +3878,7 @@ mac_tx_srs_setup(mac_client_impl_t *mcip, flow_entry_t *flent)
boolean_t is_aggr;
uint_t ring_info = 0;
- is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR) != 0;
+ is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) != 0;
grp = flent->fe_tx_ring_group;
if (grp == NULL) {
ring = (mac_ring_t *)mip->mi_default_tx_ring;
diff --git a/usr/src/uts/common/io/mac/mac_provider.c b/usr/src/uts/common/io/mac/mac_provider.c
index 07201afdec..26f501668e 100644
--- a/usr/src/uts/common/io/mac/mac_provider.c
+++ b/usr/src/uts/common/io/mac/mac_provider.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2018 Joyent, Inc.
* Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved.
*/
@@ -56,6 +57,7 @@
#include <sys/sdt.h>
#include <sys/pattr.h>
#include <sys/strsun.h>
+#include <sys/vlan.h>
/*
* MAC Provider Interface.
@@ -695,7 +697,7 @@ mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
{
mac_impl_t *mip = (mac_impl_t *)mh;
mac_ring_t *mr = (mac_ring_t *)mrh;
- mac_soft_ring_set_t *mac_srs;
+ mac_soft_ring_set_t *mac_srs;
mblk_t *bp = mp_chain;
boolean_t hw_classified = B_FALSE;
diff --git a/usr/src/uts/common/io/mac/mac_sched.c b/usr/src/uts/common/io/mac/mac_sched.c
index d046930873..cbd5ce1e19 100644
--- a/usr/src/uts/common/io/mac/mac_sched.c
+++ b/usr/src/uts/common/io/mac/mac_sched.c
@@ -21,7 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2017 Joyent, Inc.
+ * Copyright 2018 Joyent, Inc.
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
*/
@@ -300,9 +300,8 @@
*
* Otherwise, all fanout is performed by software. MAC divides incoming frames
* into one of three buckets -- IPv4 TCP traffic, IPv4 UDP traffic, and
- * everything else. Note, VLAN tagged traffic is considered other, regardless of
- * the interior EtherType. Regardless of the type of fanout, these three
- * categories or buckets are always used.
+ * everything else. Regardless of the type of fanout, these three categories
+ * or buckets are always used.
*
* The difference between protocol level fanout and full software ring protocol
* fanout is the number of software rings that end up getting created. The
@@ -1475,16 +1474,15 @@ enum pkt_type {
#define PORTS_SIZE 4
/*
- * mac_rx_srs_proto_fanout
- *
- * This routine delivers packets destined to an SRS into one of the
+ * This routine delivers packets destined for an SRS into one of the
* protocol soft rings.
*
- * Given a chain of packets we need to split it up into multiple sub chains
- * destined into TCP, UDP or OTH soft ring. Instead of entering
- * the soft ring one packet at a time, we want to enter it in the form of a
- * chain otherwise we get this start/stop behaviour where the worker thread
- * goes to sleep and then next packets comes in forcing it to wake up etc.
+ * Given a chain of packets we need to split it up into multiple sub
+ * chains: TCP, UDP or OTH soft ring. Instead of entering the soft
+ * ring one packet at a time, we want to enter it in the form of a
+ * chain otherwise we get this start/stop behaviour where the worker
+ * thread goes to sleep and then next packet comes in forcing it to
+ * wake up.
*/
static void
mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
@@ -1523,9 +1521,9 @@ mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
mac_srs->srs_ring->mr_classify_type == MAC_HW_CLASSIFIER;
/*
- * Special clients (eg. VLAN, non ether, etc) need DLS
- * processing in the Rx path. SRST_DLS_BYPASS will be clear for
- * such SRSs. Another way of disabling bypass is to set the
+ * Some clients, such as non-ethernet, need DLS processing in
+ * the Rx path. Such clients clear the SRST_DLS_BYPASS flag.
+ * DLS bypass may also be disabled via the
* MCIS_RX_BYPASS_DISABLE flag.
*/
dls_bypass = ((mac_srs->srs_type & SRST_DLS_BYPASS) != 0) &&
@@ -1537,10 +1535,11 @@ mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
bzero(sz, MAX_SR_TYPES * sizeof (size_t));
/*
- * We got a chain from SRS that we need to send to the soft rings.
- * Since squeues for TCP & IPv4 sap poll their soft rings (for
- * performance reasons), we need to separate out v4_tcp, v4_udp
- * and the rest goes in other.
+ * We have a chain from SRS that we need to split across the
+ * soft rings. The squeues for the TCP and IPv4 SAPs use their
+ * own soft rings to allow polling from the squeue. The rest of
+ * the packets are delivered on the OTH soft ring which cannot
+ * be polled.
*/
while (head != NULL) {
mp = head;
@@ -1568,9 +1567,14 @@ mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
evhp = (struct ether_vlan_header *)mp->b_rptr;
sap = ntohs(evhp->ether_type);
hdrsize = sizeof (struct ether_vlan_header);
+
/*
- * Check if the VID of the packet, if any,
- * belongs to this client.
+ * Check if the VID of the packet, if
+ * any, belongs to this client.
+ * Technically, if this packet came up
+ * via a HW classified ring then we
+ * don't need to perform this check.
+ * Perhaps a future optimization.
*/
if (!mac_client_check_flow_vid(mcip,
VLAN_ID(ntohs(evhp->ether_tci)))) {
@@ -1635,7 +1639,6 @@ mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
* performance and may bypass DLS. All other cases go through
* the 'OTH' type path without DLS bypass.
*/
-
ipha = (ipha_t *)(mp->b_rptr + hdrsize);
if ((type != OTH) && MBLK_RX_FANOUT_SLOWPATH(mp, ipha))
type = OTH;
@@ -1647,11 +1650,13 @@ mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
}
ASSERT(type == UNDEF);
+
/*
- * We look for at least 4 bytes past the IP header to get
- * the port information. If we get an IP fragment, we don't
- * have the port information, and we use just the protocol
- * information.
+ * Determine the type from the IP protocol value. If
+ * classified as TCP or UDP, then update the read
+ * pointer to the beginning of the IP header.
+ * Otherwise leave the message as is for further
+ * processing by DLS.
*/
switch (ipha->ipha_protocol) {
case IPPROTO_TCP:
@@ -1695,11 +1700,10 @@ mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
int fanout_unaligned = 0;
/*
- * mac_rx_srs_long_fanout
- *
- * The fanout routine for VLANs, and for anything else that isn't performing
- * explicit dls bypass. Returns -1 on an error (drop the packet due to a
- * malformed packet), 0 on success, with values written in *indx and *type.
+ * The fanout routine for any clients with DLS bypass disabled or for
+ * traffic classified as "other". Returns -1 on an error (drop the
+ * packet due to a malformed packet), 0 on success, with values
+ * written in *indx and *type.
*/
static int
mac_rx_srs_long_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *mp,
@@ -1865,16 +1869,15 @@ src_dst_based_fanout:
}
/*
- * mac_rx_srs_fanout
- *
- * This routine delivers packets destined to an SRS into a soft ring member
+ * This routine delivers packets destined for an SRS into a soft ring member
* of the set.
*
- * Given a chain of packets we need to split it up into multiple sub chains
- * destined for one of the TCP, UDP or OTH soft rings. Instead of entering
- * the soft ring one packet at a time, we want to enter it in the form of a
- * chain otherwise we get this start/stop behaviour where the worker thread
- * goes to sleep and then next packets comes in forcing it to wake up etc.
+ * Given a chain of packets we need to split it up into multiple sub
+ * chains: TCP, UDP or OTH soft ring. Instead of entering the soft
+ * ring one packet at a time, we want to enter it in the form of a
+ * chain otherwise we get this start/stop behaviour where the worker
+ * thread goes to sleep and then next packet comes in forcing it to
+ * wake up.
*
* Note:
* Since we know what is the maximum fanout possible, we create a 2D array
@@ -1935,10 +1938,11 @@ mac_rx_srs_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
mac_srs->srs_ring->mr_classify_type == MAC_HW_CLASSIFIER;
/*
- * Special clients (eg. VLAN, non ether, etc) need DLS
- * processing in the Rx path. SRST_DLS_BYPASS will be clear for
- * such SRSs. Another way of disabling bypass is to set the
- * MCIS_RX_BYPASS_DISABLE flag.
+ * Some clients, such as non Ethernet, need DLS processing in
+ * the Rx path. Such clients clear the SRST_DLS_BYPASS flag.
+ * DLS bypass may also be disabled via the
+ * MCIS_RX_BYPASS_DISABLE flag, but this is only consumed by
+ * sun4v vsw currently.
*/
dls_bypass = ((mac_srs->srs_type & SRST_DLS_BYPASS) != 0) &&
((mcip->mci_state_flags & MCIS_RX_BYPASS_DISABLE) == 0);
@@ -1960,7 +1964,7 @@ mac_rx_srs_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
/*
* We got a chain from SRS that we need to send to the soft rings.
- * Since squeues for TCP & IPv4 sap poll their soft rings (for
+ * Since squeues for TCP & IPv4 SAP poll their soft rings (for
* performance reasons), we need to separate out v4_tcp, v4_udp
* and the rest goes in other.
*/
@@ -1990,9 +1994,14 @@ mac_rx_srs_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
evhp = (struct ether_vlan_header *)mp->b_rptr;
sap = ntohs(evhp->ether_type);
hdrsize = sizeof (struct ether_vlan_header);
+
/*
- * Check if the VID of the packet, if any,
- * belongs to this client.
+ * Check if the VID of the packet, if
+ * any, belongs to this client.
+ * Technically, if this packet came up
+ * via a HW classified ring then we
+ * don't need to perform this check.
+ * Perhaps a future optimization.
*/
if (!mac_client_check_flow_vid(mcip,
VLAN_ID(ntohs(evhp->ether_tci)))) {
@@ -2032,7 +2041,6 @@ mac_rx_srs_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
continue;
}
-
/*
* If we are using the default Rx ring where H/W or S/W
* classification has not happened, we need to verify if
@@ -2621,7 +2629,6 @@ again:
mac_srs->srs_state |= (SRS_PROC|proc_type);
-
/*
* mcip is NULL for broadcast and multicast flows. The promisc
* callbacks for broadcast and multicast packets are delivered from
@@ -2641,10 +2648,8 @@ again:
}
/*
- * Check if SRS itself is doing the processing
- * This direct path does not apply when subflows are present. In this
- * case, packets need to be dispatched to a soft ring according to the
- * flow's bandwidth and other resources contraints.
+ * Check if SRS itself is doing the processing. This direct
+ * path applies only when subflows are present.
*/
if (mac_srs->srs_type & SRST_NO_SOFT_RINGS) {
mac_direct_rx_t proc;
@@ -4656,6 +4661,9 @@ mac_rx_deliver(void *arg1, mac_resource_handle_t mrh, mblk_t *mp_chain,
* the packet to the promiscuous listeners of the
* client, since they expect to see the whole
* frame including the VLAN headers.
+ *
+ * The MCIS_STRIP_DISABLE is only issued when sun4v
+ * vsw is in play.
*/
mp_chain = mac_strip_vlan_tag_chain(mp_chain);
}
@@ -4664,13 +4672,11 @@ mac_rx_deliver(void *arg1, mac_resource_handle_t mrh, mblk_t *mp_chain,
}
/*
- * mac_rx_soft_ring_process
- *
- * process a chain for a given soft ring. The number of packets queued
- * in the SRS and its associated soft rings (including this one) is
- * very small (tracked by srs_poll_pkt_cnt), then allow the entering
- * thread (interrupt or poll thread) to do inline processing. This
- * helps keep the latency down under low load.
+ * Process a chain for a given soft ring. If the number of packets
+ * queued in the SRS and its associated soft rings (including this
+ * one) is very small (tracked by srs_poll_pkt_cnt) then allow the
+ * entering thread (interrupt or poll thread) to process the chain
+ * inline. This is meant to reduce latency under low load.
*
* The proc and arg for each mblk is already stored in the mblk in
* appropriate places.
@@ -4729,13 +4735,13 @@ mac_rx_soft_ring_process(mac_client_impl_t *mcip, mac_soft_ring_t *ringp,
ASSERT(MUTEX_NOT_HELD(&ringp->s_ring_lock));
/*
- * If we have a soft ring set which is doing
- * bandwidth control, we need to decrement
- * srs_size and count so it the SRS can have a
- * accurate idea of what is the real data
- * queued between SRS and its soft rings. We
- * decrement the counters only when the packet
- * gets processed by both SRS and the soft ring.
+ * If we have an SRS performing bandwidth
+ * control then we need to decrement the size
+ * and count so the SRS has an accurate count
+ * of the data queued between the SRS and its
+ * soft rings. We decrement the counters only
+ * when the packet is processed by both the
+ * SRS and the soft ring.
*/
mutex_enter(&mac_srs->srs_lock);
MAC_UPDATE_SRS_COUNT_LOCKED(mac_srs, cnt);
@@ -4751,8 +4757,8 @@ mac_rx_soft_ring_process(mac_client_impl_t *mcip, mac_soft_ring_t *ringp,
if ((ringp->s_ring_first == NULL) ||
(ringp->s_ring_state & S_RING_BLANK)) {
/*
- * We processed inline our packet and
- * nothing new has arrived or our
+ * We processed a single packet inline
+ * and nothing new has arrived or our
* receiver doesn't want to receive
* any packets. We are done.
*/
diff --git a/usr/src/uts/common/io/mac/mac_soft_ring.c b/usr/src/uts/common/io/mac/mac_soft_ring.c
index d24c0207df..f4d2a5ee81 100644
--- a/usr/src/uts/common/io/mac/mac_soft_ring.c
+++ b/usr/src/uts/common/io/mac/mac_soft_ring.c
@@ -21,7 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2017 Joyent, Inc.
+ * Copyright 2018 Joyent, Inc.
*/
/*
@@ -207,7 +207,7 @@ mac_soft_ring_create(int id, clock_t wait, uint16_t type,
ringp->s_ring_tx_hiwat =
(mac_tx_soft_ring_hiwat > mac_tx_soft_ring_max_q_cnt) ?
mac_tx_soft_ring_max_q_cnt : mac_tx_soft_ring_hiwat;
- if (mcip->mci_state_flags & MCIS_IS_AGGR) {
+ if (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) {
mac_srs_tx_t *tx = &mac_srs->srs_tx;
ASSERT(tx->st_soft_rings[
@@ -339,15 +339,14 @@ mac_soft_ring_fire(void *arg)
}
/*
- * mac_rx_soft_ring_drain
+ * Drain the soft ring pointed to by ringp.
*
- * Called when worker thread model (ST_RING_WORKER_ONLY) of processing
- * incoming packets is used. s_ring_first contain the queued packets.
- * s_ring_rx_func contains the upper level (client) routine where the
- * packets are destined and s_ring_rx_arg1/s_ring_rx_arg2 are the
- * cookie meant for the client.
+ * o s_ring_first: pointer to the queued packet chain.
+ *
+ * o s_ring_rx_func: pointer to to the client's Rx routine.
+ *
+ * o s_ring_rx_{arg1,arg2}: opaque values specific to the client.
*/
-/* ARGSUSED */
static void
mac_rx_soft_ring_drain(mac_soft_ring_t *ringp)
{
@@ -392,13 +391,12 @@ mac_rx_soft_ring_drain(mac_soft_ring_t *ringp)
(*proc)(arg1, arg2, mp, NULL);
/*
- * If we have a soft ring set which is doing
- * bandwidth control, we need to decrement its
- * srs_size so it can have a accurate idea of
- * what is the real data queued between SRS and
- * its soft rings. We decrement the size for a
- * packet only when it gets processed by both
- * SRS and the soft ring.
+ * If we have an SRS performing bandwidth control, then
+ * we need to decrement the size and count so the SRS
+ * has an accurate measure of the data queued between
+ * the SRS and its soft rings. We decrement the
+ * counters only when the packet is processed by both
+ * the SRS and the soft ring.
*/
mutex_enter(&mac_srs->srs_lock);
MAC_UPDATE_SRS_COUNT_LOCKED(mac_srs, cnt);
@@ -414,12 +412,10 @@ mac_rx_soft_ring_drain(mac_soft_ring_t *ringp)
}
/*
- * mac_soft_ring_worker
- *
* The soft ring worker routine to process any queued packets. In
- * normal case, the worker thread is bound to a CPU. It the soft
- * ring is dealing with TCP packets, then the worker thread will
- * be bound to the same CPU as the TCP squeue.
+ * normal case, the worker thread is bound to a CPU. If the soft ring
+ * handles TCP packets then the worker thread is bound to the same CPU
+ * as the TCP squeue.
*/
static void
mac_soft_ring_worker(mac_soft_ring_t *ringp)
@@ -605,7 +601,7 @@ mac_soft_ring_dls_bypass(void *arg, mac_direct_rx_t rx_func, void *rx_arg1)
mac_soft_ring_t *softring = arg;
mac_soft_ring_set_t *srs;
- ASSERT(rx_func != NULL);
+ VERIFY3P(rx_func, !=, NULL);
mutex_enter(&softring->s_ring_lock);
softring->s_ring_rx_func = rx_func;
diff --git a/usr/src/uts/common/io/vnic/vnic_dev.c b/usr/src/uts/common/io/vnic/vnic_dev.c
index 3cb7e7660a..da52d7bb37 100644
--- a/usr/src/uts/common/io/vnic/vnic_dev.c
+++ b/usr/src/uts/common/io/vnic/vnic_dev.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2015 Joyent, Inc.
+ * Copyright 2018 Joyent, Inc.
* Copyright 2016 OmniTI Computer Consulting, Inc. All rights reserved.
*/
@@ -354,7 +354,7 @@ vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid,
rw_enter(&vnic_lock, RW_WRITER);
- /* does a VNIC with the same id already exist? */
+ /* Does a VNIC with the same id already exist? */
err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
(mod_hash_val_t *)&vnic);
if (err == 0) {
@@ -1037,7 +1037,7 @@ static int
vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
uint_t pr_valsize, const void *pr_val)
{
- int err = 0;
+ int err = 0;
vnic_t *vn = m_driver;
switch (pr_num) {
@@ -1135,7 +1135,7 @@ vnic_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
uint_t pr_valsize, void *pr_val)
{
vnic_t *vn = arg;
- int ret = 0;
+ int ret = 0;
boolean_t out;
switch (pr_num) {
diff --git a/usr/src/uts/common/mapfiles/ddi.mapfile b/usr/src/uts/common/mapfiles/ddi.mapfile
index 75e95a9452..9b6a9ab677 100644
--- a/usr/src/uts/common/mapfiles/ddi.mapfile
+++ b/usr/src/uts/common/mapfiles/ddi.mapfile
@@ -165,6 +165,7 @@ SYMBOL_SCOPE {
list_insert_tail { FLAGS = EXTERN };
list_next { FLAGS = EXTERN };
list_remove { FLAGS = EXTERN };
+ list_remove_head { FLAGS = EXTERN };
memcpy { FLAGS = EXTERN };
memset { FLAGS = EXTERN };
miocack { FLAGS = EXTERN };
diff --git a/usr/src/uts/common/sys/aggr_impl.h b/usr/src/uts/common/sys/aggr_impl.h
index 547c9cc241..415e176ef3 100644
--- a/usr/src/uts/common/sys/aggr_impl.h
+++ b/usr/src/uts/common/sys/aggr_impl.h
@@ -21,6 +21,8 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 OmniTI Computer Consulting, Inc All rights reserved.
+ * Copyright 2018 Joyent, Inc.
*/
#ifndef _SYS_AGGR_IMPL_H
@@ -54,6 +56,15 @@ extern "C" {
*/
#define MAC_PSEUDO_RING_INUSE 0x01
+/*
+ * VLAN filters placed on the Rx pseudo group.
+ */
+typedef struct aggr_vlan {
+ list_node_t av_link;
+ uint16_t av_vid; /* VLAN ID */
+ uint_t av_refs; /* num aggr clients using this VID */
+} aggr_vlan_t;
+
typedef struct aggr_unicst_addr_s {
uint8_t aua_addr[ETHERADDRL];
struct aggr_unicst_addr_s *aua_next;
@@ -73,6 +84,8 @@ typedef struct aggr_pseudo_rx_group_s {
aggr_unicst_addr_t *arg_macaddr;
aggr_pseudo_rx_ring_t arg_rings[MAX_RINGS_PER_GROUP];
uint_t arg_ring_cnt;
+ uint_t arg_untagged; /* num clients untagged */
+ list_t arg_vlans; /* VLANs on this group */
} aggr_pseudo_rx_group_t;
typedef struct aggr_pseudo_tx_ring_s {
@@ -186,11 +199,18 @@ typedef struct aggr_grp_s {
uint_t lg_tx_ports_size; /* size of lg_tx_ports */
uint32_t lg_tx_policy; /* outbound policy */
uint8_t lg_mac_tx_policy;
- uint64_t lg_ifspeed;
link_state_t lg_link_state;
+
+
+ /*
+ * The lg_stat_lock must be held when accessing these fields.
+ */
+ kmutex_t lg_stat_lock;
+ uint64_t lg_ifspeed;
link_duplex_t lg_link_duplex;
uint64_t lg_stat[MAC_NSTAT];
uint64_t lg_ether_stat[ETHER_NSTAT];
+
aggr_lacp_mode_t lg_lacp_mode; /* off, active, or passive */
Agg_t aggr; /* 802.3ad data */
uint32_t lg_hcksum_txflags;
@@ -308,6 +328,8 @@ extern boolean_t aggr_port_notify_link(aggr_grp_t *, aggr_port_t *);
extern void aggr_port_init_callbacks(aggr_port_t *);
extern void aggr_recv_cb(void *, mac_resource_handle_t, mblk_t *, boolean_t);
+extern void aggr_recv_promisc_cb(void *, mac_resource_handle_t, mblk_t *,
+ boolean_t);
extern void aggr_tx_ring_update(void *, uintptr_t);
extern void aggr_tx_notify_thread(void *);
@@ -338,6 +360,9 @@ extern void aggr_grp_port_wait(aggr_grp_t *);
extern int aggr_port_addmac(aggr_port_t *, const uint8_t *);
extern void aggr_port_remmac(aggr_port_t *, const uint8_t *);
+extern int aggr_port_addvlan(aggr_port_t *, uint16_t);
+extern int aggr_port_remvlan(aggr_port_t *, uint16_t);
+
extern mblk_t *aggr_ring_tx(void *, mblk_t *);
extern mblk_t *aggr_find_tx_ring(void *, mblk_t *,
uintptr_t, mac_ring_handle_t *);
diff --git a/usr/src/uts/common/sys/mac_client.h b/usr/src/uts/common/sys/mac_client.h
index 0fc4939503..74f4cbb310 100644
--- a/usr/src/uts/common/sys/mac_client.h
+++ b/usr/src/uts/common/sys/mac_client.h
@@ -22,7 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2013 Joyent, Inc. All rights reserved.
+ * Copyright 2018 Joyent, Inc. All rights reserved.
*/
/*
@@ -88,6 +88,7 @@ typedef enum {
} mac_client_promisc_type_t;
/* flags passed to mac_unicast_add() */
+
#define MAC_UNICAST_NODUPCHECK 0x0001
#define MAC_UNICAST_PRIMARY 0x0002
#define MAC_UNICAST_HW 0x0004
diff --git a/usr/src/uts/common/sys/mac_client_impl.h b/usr/src/uts/common/sys/mac_client_impl.h
index 9b3b4fe369..d5c66684d0 100644
--- a/usr/src/uts/common/sys/mac_client_impl.h
+++ b/usr/src/uts/common/sys/mac_client_impl.h
@@ -24,7 +24,7 @@
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2018 Joyent, Inc.
*/
#ifndef _SYS_MAC_CLIENT_IMPL_H
@@ -57,7 +57,7 @@ typedef struct mac_unicast_impl_s { /* Protected by */
uint16_t mui_vid; /* SL */
} mac_unicast_impl_t;
-#define MAC_CLIENT_FLAGS_PRIMARY 0X0001
+#define MAC_CLIENT_FLAGS_PRIMARY 0x0001
#define MAC_CLIENT_FLAGS_VNIC_PRIMARY 0x0002
#define MAC_CLIENT_FLAGS_MULTI_PRIMARY 0x0004
#define MAC_CLIENT_FLAGS_PASSIVE_PRIMARY 0x0008
@@ -131,12 +131,17 @@ struct mac_client_impl_s { /* Protected by */
uint32_t mci_flags; /* SL */
krwlock_t mci_rw_lock;
mac_unicast_impl_t *mci_unicast_list; /* mci_rw_lock */
+
/*
* The mac_client_impl_t may be shared by multiple clients, i.e
* multiple VLANs sharing the same MAC client. In this case the
- * address/vid tubles differ and are each associated with their
+ * address/vid tuples differ and are each associated with their
* own flow entry, but the rest underlying components SRS, etc,
* are common.
+ *
+ * This is only needed to support sun4v vsw. There are several
+ * places in MAC we could simplify the code if we removed
+ * sun4v support.
*/
flow_entry_t *mci_flent_list; /* mci_rw_lock */
uint_t mci_nflents; /* mci_rw_lock */
@@ -224,7 +229,7 @@ extern int mac_tx_percpu_cnt;
&(mcip)->mci_flent->fe_resource_props)
#define MCIP_EFFECTIVE_PROPS(mcip) \
- (mcip->mci_flent == NULL ? NULL : \
+ (mcip->mci_flent == NULL ? NULL : \
&(mcip)->mci_flent->fe_effective_props)
#define MCIP_RESOURCE_PROPS_MASK(mcip) \
@@ -313,6 +318,74 @@ extern int mac_tx_percpu_cnt;
(((mcip)->mci_state_flags & MCIS_TAG_DISABLE) == 0 && \
(mcip)->mci_nvids == 1) \
+/*
+ * MAC Client Implementation State (mci_state_flags)
+ *
+ * MCIS_IS_VNIC
+ *
+ * The client is a VNIC.
+ *
+ * MCIS_EXCLUSIVE
+ *
+ * The client has exclusive control over the MAC, such that it is
+ * the sole client of the MAC.
+ *
+ * MCIS_TAG_DISABLE
+ *
+ * MAC will not add VLAN tags to outgoing traffic. If this flag
+ * is set it is up to the client to add the correct VLAN tag.
+ *
+ * MCIS_STRIP_DISABLE
+ *
+ * MAC will not strip the VLAN tags on incoming traffic before
+ * passing it to mci_rx_fn. This only applies to non-bypass
+ * traffic.
+ *
+ * MCIS_IS_AGGR_PORT
+ *
+ * The client represents a port on an aggr.
+ *
+ * MCIS_CLIENT_POLL_CAPABLE
+ *
+ * The client is capable of polling the Rx TCP/UDP softrings.
+ *
+ * MCIS_DESC_LOGGED
+ *
+ * This flag is set when the client's link info has been logged
+ * by the mac_log_linkinfo() timer. This ensures that the
+ * client's link info is only logged once.
+ *
+ * MCIS_SHARE_BOUND
+ *
+ * This client has an HIO share bound to it.
+ *
+ * MCIS_DISABLE_TX_VID_CHECK
+ *
+ * MAC will not check the VID of the client's Tx traffic.
+ *
+ * MCIS_USE_DATALINK_NAME
+ *
+ * The client is using the same name as its underlying MAC. This
+ * happens when dlmgmtd is unreachable during client creation.
+ *
+ * MCIS_UNICAST_HW
+ *
+ * The client requires MAC address hardware classification. This
+ * is only used by sun4v vsw.
+ *
+ * MCIS_IS_AGGR_CLIENT
+ *
+ * The client sits atop an aggr.
+ *
+ * MCIS_RX_BYPASS_DISABLE
+ *
+ * Do not allow the client to enable DLS bypass.
+ *
+ * MCIS_NO_UNICAST_ADDR
+ *
+ * This client has no MAC unicast addresss associated with it.
+ *
+ */
/* MCI state flags */
#define MCIS_IS_VNIC 0x0001
#define MCIS_EXCLUSIVE 0x0002
@@ -325,7 +398,7 @@ extern int mac_tx_percpu_cnt;
#define MCIS_DISABLE_TX_VID_CHECK 0x0100
#define MCIS_USE_DATALINK_NAME 0x0200
#define MCIS_UNICAST_HW 0x0400
-#define MCIS_IS_AGGR 0x0800
+#define MCIS_IS_AGGR_CLIENT 0x0800
#define MCIS_RX_BYPASS_DISABLE 0x1000
#define MCIS_NO_UNICAST_ADDR 0x2000
diff --git a/usr/src/uts/common/sys/mac_client_priv.h b/usr/src/uts/common/sys/mac_client_priv.h
index 6b409513a6..77475b339e 100644
--- a/usr/src/uts/common/sys/mac_client_priv.h
+++ b/usr/src/uts/common/sys/mac_client_priv.h
@@ -22,7 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2013 Joyent, Inc. All rights reserved.
+ * Copyright 2018 Joyent, Inc.
*/
/*
@@ -144,6 +144,10 @@ extern void mac_hwring_set_default(mac_handle_t, mac_ring_handle_t);
extern int mac_hwgroup_addmac(mac_group_handle_t, const uint8_t *);
extern int mac_hwgroup_remmac(mac_group_handle_t, const uint8_t *);
+extern int mac_hwgroup_addvlan(mac_group_handle_t, uint16_t);
+extern int mac_hwgroup_remvlan(mac_group_handle_t, uint16_t);
+
+extern boolean_t mac_has_hw_vlan(mac_handle_t);
extern void mac_set_upper_mac(mac_client_handle_t, mac_handle_t,
mac_resource_props_t *);
diff --git a/usr/src/uts/common/sys/mac_impl.h b/usr/src/uts/common/sys/mac_impl.h
index 774c4fad9a..eebbde37de 100644
--- a/usr/src/uts/common/sys/mac_impl.h
+++ b/usr/src/uts/common/sys/mac_impl.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2017, Joyent, Inc.
+ * Copyright (c) 2018, Joyent, Inc.
*/
#ifndef _SYS_MAC_IMPL_H
@@ -244,7 +244,7 @@ struct mac_ring_s {
(mr)->mr_refcnt++; \
}
-#define MR_REFRELE(mr) { \
+#define MR_REFRELE(mr) { \
mutex_enter(&(mr)->mr_lock); \
ASSERT((mr)->mr_refcnt != 0); \
(mr)->mr_refcnt--; \
@@ -255,8 +255,8 @@ struct mac_ring_s {
}
/*
- * Per mac client flow information associated with a RX group.
- * The entire structure is SL protected.
+ * Used to attach MAC clients to an Rx group. The members are SL
+ * protected.
*/
typedef struct mac_grp_client {
struct mac_grp_client *mgc_next;
@@ -270,15 +270,20 @@ typedef struct mac_grp_client {
((g)->mrg_clients->mgc_next == NULL)) ? \
(g)->mrg_clients->mgc_client : NULL)
+#define MAC_GROUP_HW_VLAN(g) \
+ (((g) != NULL) && \
+ ((g)->mrg_info.mgi_addvlan != NULL) && \
+ ((g)->mrg_info.mgi_remvlan != NULL))
+
/*
* Common ring group data structure for ring control and management.
- * The entire structure is SL protected
+ * The entire structure is SL protected.
*/
struct mac_group_s {
int mrg_index; /* index in the list */
mac_ring_type_t mrg_type; /* ring type */
mac_group_state_t mrg_state; /* state of the group */
- mac_group_t *mrg_next; /* next ring in the chain */
+ mac_group_t *mrg_next; /* next group in the chain */
mac_handle_t mrg_mh; /* reference to MAC */
mac_ring_t *mrg_rings; /* grouped rings */
uint_t mrg_cur_count; /* actual size of group */
@@ -300,7 +305,7 @@ struct mac_group_s {
mac_ring_handle_t mrh = rh; \
mac_impl_t *mimpl = (mac_impl_t *)mhp; \
/* \
- * Send packets through a selected tx ring, or through the \
+ * Send packets through a selected tx ring, or through the \
* default handler if there is no selected ring. \
*/ \
if (mrh == NULL) \
@@ -322,9 +327,9 @@ struct mac_group_s {
#define MAC_TX(mip, rh, mp, src_mcip) { \
mac_ring_handle_t rhandle = (rh); \
/* \
- * If there is a bound Hybrid I/O share, send packets through \
+ * If there is a bound Hybrid I/O share, send packets through \
* the default tx ring. (When there's a bound Hybrid I/O share, \
- * the tx rings of this client are mapped in the guest domain \
+ * the tx rings of this client are mapped in the guest domain \
* and not accessible from here.) \
*/ \
_NOTE(CONSTANTCONDITION) \
@@ -333,7 +338,7 @@ struct mac_group_s {
if (mip->mi_promisc_list != NULL) \
mac_promisc_dispatch(mip, mp, src_mcip); \
/* \
- * Grab the proper transmit pointer and handle. Special \
+ * Grab the proper transmit pointer and handle. Special \
* optimization: we can test mi_bridge_link itself atomically, \
* and if that indicates no bridge send packets through tx ring.\
*/ \
@@ -360,17 +365,23 @@ typedef struct mac_mcast_addrs_s {
} mac_mcast_addrs_t;
typedef enum {
- MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED = 1, /* hardware steering */
+ MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED = 1, /* HW classification */
MAC_ADDRESS_TYPE_UNICAST_PROMISC /* promiscuous mode */
} mac_address_type_t;
+typedef struct mac_vlan_s {
+ struct mac_vlan_s *mv_next;
+ uint16_t mv_vid;
+} mac_vlan_t;
+
typedef struct mac_address_s {
mac_address_type_t ma_type; /* address type */
- int ma_nusers; /* number of users */
- /* of that address */
+ int ma_nusers; /* num users of addr */
struct mac_address_s *ma_next; /* next address */
uint8_t ma_addr[MAXMACADDRLEN]; /* address value */
size_t ma_len; /* address length */
+ mac_vlan_t *ma_vlans; /* VLANs on this addr */
+ boolean_t ma_untagged; /* accept untagged? */
mac_group_t *ma_group; /* asscociated group */
mac_impl_t *ma_mip; /* MAC handle */
} mac_address_t;
@@ -487,7 +498,7 @@ struct mac_impl_s {
mac_capab_led_t mi_led;
/*
- * MAC address list. SL protected.
+ * MAC address and VLAN lists. SL protected.
*/
mac_address_t *mi_addresses;
@@ -759,6 +770,8 @@ extern void mac_client_bcast_refresh(mac_client_impl_t *, mac_multicst_t,
*/
extern int mac_group_addmac(mac_group_t *, const uint8_t *);
extern int mac_group_remmac(mac_group_t *, const uint8_t *);
+extern int mac_group_addvlan(mac_group_t *, uint16_t);
+extern int mac_group_remvlan(mac_group_t *, uint16_t);
extern int mac_rx_group_add_flow(mac_client_impl_t *, flow_entry_t *,
mac_group_t *);
extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *);
@@ -779,6 +792,7 @@ extern void mac_rx_switch_grp_to_sw(mac_group_t *);
* MAC address functions are used internally by MAC layer.
*/
extern mac_address_t *mac_find_macaddr(mac_impl_t *, uint8_t *);
+extern mac_address_t *mac_find_macaddr_vlan(mac_impl_t *, uint8_t *, uint16_t);
extern boolean_t mac_check_macaddr_shared(mac_address_t *);
extern int mac_update_macaddr(mac_address_t *, uint8_t *);
extern void mac_freshen_macaddr(mac_address_t *, uint8_t *);
@@ -863,8 +877,9 @@ extern int mac_start_group(mac_group_t *);
extern void mac_stop_group(mac_group_t *);
extern int mac_start_ring(mac_ring_t *);
extern void mac_stop_ring(mac_ring_t *);
-extern int mac_add_macaddr(mac_impl_t *, mac_group_t *, uint8_t *, boolean_t);
-extern int mac_remove_macaddr(mac_address_t *);
+extern int mac_add_macaddr_vlan(mac_impl_t *, mac_group_t *, uint8_t *,
+ uint16_t, boolean_t);
+extern int mac_remove_macaddr_vlan(mac_address_t *, uint16_t);
extern void mac_set_group_state(mac_group_t *, mac_group_state_t);
extern void mac_group_add_client(mac_group_t *, mac_client_impl_t *);
diff --git a/usr/src/uts/common/sys/mac_provider.h b/usr/src/uts/common/sys/mac_provider.h
index 4c91c03967..301bc9a058 100644
--- a/usr/src/uts/common/sys/mac_provider.h
+++ b/usr/src/uts/common/sys/mac_provider.h
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2017, Joyent, Inc.
+ * Copyright (c) 2018, Joyent, Inc.
*/
#ifndef _SYS_MAC_PROVIDER_H
@@ -281,6 +281,28 @@ typedef enum {
} mac_ring_type_t;
/*
+ * The value VLAN_ID_NONE (VID 0) means a client does not have
+ * membership to any VLAN. However, this statement is true for both
+ * untagged packets and priority tagged packets leading to confusion
+ * over what semantic is intended. To the provider, VID 0 is a valid
+ * VID when priority tagging is in play. To MAC and everything above
+ * VLAN_ID_NONE almost universally implies untagged traffic. Thus, we
+ * convert VLAN_ID_NONE to a sentinel value (MAC_VLAN_UNTAGGED) at the
+ * border between MAC and MAC provider. This informs the provider that
+ * the client is interested in untagged traffic and the provider
+ * should set any relevant bits to receive such traffic.
+ *
+ * Currently, the API between MAC and the provider passes the VID as a
+ * unit16_t. In the future this could actually be the entire TCI mask
+ * (PCP, DEI, and VID). This current scheme is safe in that potential
+ * future world as well; as 0xFFFF is not a valid TCI (the 0xFFF VID
+ * is reserved and never transmitted across networks).
+ */
+#define MAC_VLAN_UNTAGGED UINT16_MAX
+#define MAC_VLAN_UNTAGGED_VID(vid) \
+ (((vid) == VLAN_ID_NONE) ? MAC_VLAN_UNTAGGED : (vid))
+
+/*
* Grouping type of a ring group
*
* MAC_GROUP_TYPE_STATIC: The ring group can not be re-grouped.
@@ -358,6 +380,8 @@ typedef struct mac_ring_info_s {
* #defines for mri_flags. The flags are temporary flags that are provided
* only to workaround issues in specific drivers, and they will be
* removed in the future.
+ *
+ * These are consumed only by sun4v and neptune (nxge).
*/
#define MAC_RING_TX_SERIALIZE 0x1
#define MAC_RING_RX_ENQUEUE 0x2
@@ -366,6 +390,8 @@ typedef int (*mac_group_start_t)(mac_group_driver_t);
typedef void (*mac_group_stop_t)(mac_group_driver_t);
typedef int (*mac_add_mac_addr_t)(void *, const uint8_t *);
typedef int (*mac_rem_mac_addr_t)(void *, const uint8_t *);
+typedef int (*mac_add_vlan_filter_t)(mac_group_driver_t, uint16_t);
+typedef int (*mac_rem_vlan_filter_t)(mac_group_driver_t, uint16_t);
struct mac_group_info_s {
mac_group_driver_t mgi_driver; /* Driver reference */
@@ -374,9 +400,11 @@ struct mac_group_info_s {
uint_t mgi_count; /* Count of rings */
mac_intr_t mgi_intr; /* Optional per-group intr */
- /* Only used for rx groups */
+ /* Only used for Rx groups */
mac_add_mac_addr_t mgi_addmac; /* Add a MAC address */
mac_rem_mac_addr_t mgi_remmac; /* Remove a MAC address */
+ mac_add_vlan_filter_t mgi_addvlan; /* Add a VLAN filter */
+ mac_rem_vlan_filter_t mgi_remvlan; /* Remove a VLAN filter */
};
/*
@@ -494,14 +522,14 @@ extern void mac_free(mac_register_t *);
extern int mac_register(mac_register_t *, mac_handle_t *);
extern int mac_disable_nowait(mac_handle_t);
extern int mac_disable(mac_handle_t);
-extern int mac_unregister(mac_handle_t);
-extern void mac_rx(mac_handle_t, mac_resource_handle_t,
+extern int mac_unregister(mac_handle_t);
+extern void mac_rx(mac_handle_t, mac_resource_handle_t,
mblk_t *);
-extern void mac_rx_ring(mac_handle_t, mac_ring_handle_t,
+extern void mac_rx_ring(mac_handle_t, mac_ring_handle_t,
mblk_t *, uint64_t);
-extern void mac_link_update(mac_handle_t, link_state_t);
-extern void mac_link_redo(mac_handle_t, link_state_t);
-extern void mac_unicst_update(mac_handle_t,
+extern void mac_link_update(mac_handle_t, link_state_t);
+extern void mac_link_redo(mac_handle_t, link_state_t);
+extern void mac_unicst_update(mac_handle_t,
const uint8_t *);
extern void mac_dst_update(mac_handle_t, const uint8_t *);
extern void mac_tx_update(mac_handle_t);