From 84de666edc7f7d835057ae4807a387447c086bcf Mon Sep 17 00:00:00 2001 From: Ryan Zezeski Date: Tue, 21 Jan 2020 12:21:56 -0700 Subject: 11490 SRS ring polling disabled for VLANs 11491 Want DLS bypass for VLAN traffic 11492 add VLVF bypass to ixgbe core 2869 duplicate packets with vnics over aggrs 11489 DLS stat delete and aggr kstat can deadlock Portions contributed by: Theo Schlossnagle Reviewed by: Patrick Mooney Reviewed by: Robert Mustacchi Reviewed by: Dan McDonald Reviewed by: Paul Winder Approved by: Gordon Ross --- usr/src/uts/common/inet/ip/ip6_input.c | 9 +- usr/src/uts/common/inet/ip/ip_input.c | 9 +- usr/src/uts/common/io/aggr/aggr_grp.c | 321 ++++++++-- usr/src/uts/common/io/aggr/aggr_port.c | 59 +- usr/src/uts/common/io/aggr/aggr_recv.c | 37 +- usr/src/uts/common/io/dld/dld_proto.c | 30 +- usr/src/uts/common/io/dls/dls.c | 14 +- usr/src/uts/common/io/dls/dls_link.c | 11 +- usr/src/uts/common/io/ixgbe/ixgbe_main.c | 359 ++++++++++- usr/src/uts/common/io/ixgbe/ixgbe_sw.h | 18 + usr/src/uts/common/io/mac/mac.c | 791 +++++++++++++++++-------- usr/src/uts/common/io/mac/mac_client.c | 123 ++-- usr/src/uts/common/io/mac/mac_datapath_setup.c | 246 +++++--- usr/src/uts/common/io/mac/mac_provider.c | 4 +- usr/src/uts/common/io/mac/mac_sched.c | 142 ++--- usr/src/uts/common/io/mac/mac_soft_ring.c | 40 +- usr/src/uts/common/io/vnic/vnic_dev.c | 8 +- usr/src/uts/common/mapfiles/ddi.mapfile | 1 + usr/src/uts/common/sys/aggr_impl.h | 27 +- usr/src/uts/common/sys/mac_client.h | 3 +- usr/src/uts/common/sys/mac_client_impl.h | 83 ++- usr/src/uts/common/sys/mac_client_priv.h | 6 +- usr/src/uts/common/sys/mac_impl.h | 47 +- usr/src/uts/common/sys/mac_provider.h | 44 +- 24 files changed, 1829 insertions(+), 603 deletions(-) diff --git a/usr/src/uts/common/inet/ip/ip6_input.c b/usr/src/uts/common/inet/ip/ip6_input.c index 21cd3cd2fe..cdff35273e 100644 --- a/usr/src/uts/common/inet/ip/ip6_input.c +++ b/usr/src/uts/common/inet/ip/ip6_input.c @@ -23,6 +23,7 @@ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved * * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright 2018 Joyent, Inc. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -143,11 +144,9 @@ static void ip_input_multicast_v6(ire_t *, mblk_t *, ip6_t *, * The ill will always be valid if this function is called directly from * the driver. * - * If ip_input_v6() is called from GLDv3: - * - * - This must be a non-VLAN IP stream. - * - 'mp' is either an untagged or a special priority-tagged packet. - * - Any VLAN tag that was in the MAC header has been stripped. + * If this chain is part of a VLAN stream, then the VLAN tag is + * stripped from the MAC header before being delivered to this + * function. * * If the IP header in packet is not 32-bit aligned, every message in the * chain will be aligned before further operations. This is required on SPARC diff --git a/usr/src/uts/common/inet/ip/ip_input.c b/usr/src/uts/common/inet/ip/ip_input.c index ad753c165b..aea49c19d3 100644 --- a/usr/src/uts/common/inet/ip/ip_input.c +++ b/usr/src/uts/common/inet/ip/ip_input.c @@ -23,6 +23,7 @@ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. * * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright 2018 Joyent, Inc. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -146,11 +147,9 @@ static void ip_input_multicast_v4(ire_t *, mblk_t *, ipha_t *, * The ill will always be valid if this function is called directly from * the driver. * - * If ip_input() is called from GLDv3: - * - * - This must be a non-VLAN IP stream. - * - 'mp' is either an untagged or a special priority-tagged packet. - * - Any VLAN tag that was in the MAC header has been stripped. + * If this chain is part of a VLAN stream, then the VLAN tag is + * stripped from the MAC header before being delivered to this + * function. * * If the IP header in packet is not 32-bit aligned, every message in the * chain will be aligned before further operations. This is required on SPARC diff --git a/usr/src/uts/common/io/aggr/aggr_grp.c b/usr/src/uts/common/io/aggr/aggr_grp.c index 7e930c89e8..9932c2cb58 100644 --- a/usr/src/uts/common/io/aggr/aggr_grp.c +++ b/usr/src/uts/common/io/aggr/aggr_grp.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2017, Joyent, Inc. + * Copyright 2018 Joyent, Inc. */ /* @@ -124,6 +124,8 @@ static int aggr_pseudo_enable_intr(mac_intr_handle_t); static int aggr_pseudo_start_ring(mac_ring_driver_t, uint64_t); static int aggr_addmac(void *, const uint8_t *); static int aggr_remmac(void *, const uint8_t *); +static int aggr_addvlan(mac_group_driver_t, uint16_t); +static int aggr_remvlan(mac_group_driver_t, uint16_t); static mblk_t *aggr_rx_poll(void *, int); static void aggr_fill_ring(void *, mac_ring_type_t, const int, const int, mac_ring_info_t *, mac_ring_handle_t); @@ -324,6 +326,7 @@ aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port) return (B_FALSE); } + mutex_enter(&grp->lg_stat_lock); if (grp->lg_ifspeed == 0) { /* * The group inherits the speed of the first link being @@ -337,8 +340,10 @@ aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port) * the group link speed, as per 802.3ad. Since it is * not, the attach is cancelled. */ + mutex_exit(&grp->lg_stat_lock); return (B_FALSE); } + mutex_exit(&grp->lg_stat_lock); grp->lg_nattached_ports++; @@ -347,7 +352,9 @@ aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port) */ if (grp->lg_link_state != LINK_STATE_UP) { grp->lg_link_state = LINK_STATE_UP; + mutex_enter(&grp->lg_stat_lock); grp->lg_link_duplex = LINK_DUPLEX_FULL; + mutex_exit(&grp->lg_stat_lock); link_state_changed = B_TRUE; } @@ -405,9 +412,11 @@ aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port) grp->lg_nattached_ports--; if (grp->lg_nattached_ports == 0) { /* the last attached MAC port of the group is being detached */ - grp->lg_ifspeed = 0; grp->lg_link_state = LINK_STATE_DOWN; + mutex_enter(&grp->lg_stat_lock); + grp->lg_ifspeed = 0; grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN; + mutex_exit(&grp->lg_stat_lock); link_state_changed = B_TRUE; } @@ -675,9 +684,13 @@ aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh) } /* - * This function is called to create pseudo rings over the hardware rings of - * the underlying device. Note that there is a 1:1 mapping between the pseudo - * RX rings of the aggr and the hardware rings of the underlying port. + * Create pseudo rings over the HW rings of the port. + * + * o Create a pseudo ring in rx_grp per HW ring in the port's HW group. + * + * o Program existing unicast filters on the pseudo group into the HW group. + * + * o Program existing VLAN filters on the pseudo group into the HW group. */ static int aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) @@ -686,6 +699,7 @@ aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP]; aggr_unicst_addr_t *addr, *a; mac_perim_handle_t pmph; + aggr_vlan_t *avp; int hw_rh_cnt, i = 0, j; int err = 0; @@ -693,63 +707,90 @@ aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) mac_perim_enter_by_mh(port->lp_mh, &pmph); /* - * This function must be called after the aggr registers its mac - * and its RX group has been initialized. + * This function must be called after the aggr registers its MAC + * and its Rx group has been initialized. */ ASSERT(rx_grp->arg_gh != NULL); /* - * Get the list the the underlying HW rings. + * Get the list of the underlying HW rings. */ hw_rh_cnt = mac_hwrings_get(port->lp_mch, &port->lp_hwgh, hw_rh, MAC_RING_TYPE_RX); if (port->lp_hwgh != NULL) { /* - * Quiesce the HW ring and the mac srs on the ring. Note + * Quiesce the HW ring and the MAC SRS on the ring. Note * that the HW ring will be restarted when the pseudo ring * is started. At that time all the packets will be - * directly passed up to the pseudo RX ring and handled - * by mac srs created over the pseudo RX ring. + * directly passed up to the pseudo Rx ring and handled + * by MAC SRS created over the pseudo Rx ring. */ mac_rx_client_quiesce(port->lp_mch); mac_srs_perm_quiesce(port->lp_mch, B_TRUE); } /* - * Add all the unicast addresses to the newly added port. + * Add existing VLAN and unicast address filters to the port. */ + for (avp = list_head(&rx_grp->arg_vlans); avp != NULL; + avp = list_next(&rx_grp->arg_vlans, avp)) { + if ((err = aggr_port_addvlan(port, avp->av_vid)) != 0) + goto err; + } + for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) { if ((err = aggr_port_addmac(port, addr->aua_addr)) != 0) - break; + goto err; } - for (i = 0; err == 0 && i < hw_rh_cnt; i++) + for (i = 0; i < hw_rh_cnt; i++) { err = aggr_add_pseudo_rx_ring(port, rx_grp, hw_rh[i]); + if (err != 0) + goto err; + } - if (err != 0) { - for (j = 0; j < i; j++) - aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]); + port->lp_rx_grp_added = B_TRUE; + mac_perim_exit(pmph); + return (0); + +err: + ASSERT(err != 0); + + for (j = 0; j < i; j++) + aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]); + + for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next) + aggr_port_remmac(port, a->aua_addr); - for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next) - aggr_port_remmac(port, a->aua_addr); + if (avp != NULL) + avp = list_prev(&rx_grp->arg_vlans, avp); - if (port->lp_hwgh != NULL) { - mac_srs_perm_quiesce(port->lp_mch, B_FALSE); - mac_rx_client_restart(port->lp_mch); - port->lp_hwgh = NULL; + for (; avp != NULL; avp = list_prev(&rx_grp->arg_vlans, avp)) { + int err2; + + if ((err2 = aggr_port_remvlan(port, avp->av_vid)) != 0) { + cmn_err(CE_WARN, "Failed to remove VLAN %u from port %s" + ": errno %d.", avp->av_vid, + mac_client_name(port->lp_mch), err2); } - } else { - port->lp_rx_grp_added = B_TRUE; } -done: + + if (port->lp_hwgh != NULL) { + mac_srs_perm_quiesce(port->lp_mch, B_FALSE); + mac_rx_client_restart(port->lp_mch); + port->lp_hwgh = NULL; + } + mac_perim_exit(pmph); return (err); } /* - * This function is called by aggr to remove pseudo RX rings over the - * HW rings of the underlying port. + * Destroy the pseudo rings mapping to this port and remove all VLAN + * and unicast filters from this port. Even if there are no underlying + * HW rings we must still remove the unicast filters to take the port + * out of promisc mode. */ static void aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) @@ -771,16 +812,23 @@ aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) hw_rh_cnt = mac_hwrings_get(port->lp_mch, &hwgh, hw_rh, MAC_RING_TYPE_RX); - /* - * If hw_rh_cnt is 0, it means that the underlying port does not - * support RX rings. Directly return in this case. - */ for (i = 0; i < hw_rh_cnt; i++) aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[i]); for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) aggr_port_remmac(port, addr->aua_addr); + for (aggr_vlan_t *avp = list_head(&rx_grp->arg_vlans); avp != NULL; + avp = list_next(&rx_grp->arg_vlans, avp)) { + int err; + + if ((err = aggr_port_remvlan(port, avp->av_vid)) != 0) { + cmn_err(CE_WARN, "Failed to remove VLAN %u from port %s" + ": errno %d.", avp->av_vid, + mac_client_name(port->lp_mch), err); + } + } + if (port->lp_hwgh != NULL) { port->lp_hwgh = NULL; @@ -1307,6 +1355,10 @@ aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports, bzero(&grp->lg_tx_group, sizeof (aggr_pseudo_tx_group_t)); aggr_lacp_init_grp(grp); + grp->lg_rx_group.arg_untagged = 0; + list_create(&(grp->lg_rx_group.arg_vlans), sizeof (aggr_vlan_t), + offsetof(aggr_vlan_t, av_link)); + /* add MAC ports to group */ grp->lg_ports = NULL; grp->lg_nports = 0; @@ -1323,7 +1375,7 @@ aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports, grp->lg_key = key; for (i = 0; i < nports; i++) { - err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, NULL); + err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, &port); if (err != 0) goto bail; } @@ -1545,7 +1597,9 @@ aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port, continue; val = aggr_port_stat(port, stat); val -= port->lp_stat[i]; + mutex_enter(&grp->lg_stat_lock); grp->lg_stat[i] += val; + mutex_exit(&grp->lg_stat_lock); } for (i = 0; i < ETHER_NSTAT; i++) { stat = i + MACTYPE_STAT_MIN; @@ -1553,7 +1607,9 @@ aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port, continue; val = aggr_port_stat(port, stat); val -= port->lp_ether_stat[i]; + mutex_enter(&grp->lg_stat_lock); grp->lg_ether_stat[i] += val; + mutex_exit(&grp->lg_stat_lock); } grp->lg_nports--; @@ -1802,6 +1858,8 @@ aggr_grp_delete(datalink_id_t linkid, cred_t *cred) VERIFY(mac_unregister(grp->lg_mh) == 0); grp->lg_mh = NULL; + list_destroy(&(grp->lg_rx_group.arg_vlans)); + AGGR_GRP_REFRELE(grp); return (0); } @@ -1884,6 +1942,8 @@ aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val) aggr_port_t *port; uint_t stat_index; + ASSERT(MUTEX_HELD(&grp->lg_stat_lock)); + /* We only aggregate counter statistics. */ if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) || IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) { @@ -1952,10 +2012,9 @@ static int aggr_m_stat(void *arg, uint_t stat, uint64_t *val) { aggr_grp_t *grp = arg; - mac_perim_handle_t mph; int rval = 0; - mac_perim_enter_by_mh(grp->lg_mh, &mph); + mutex_enter(&grp->lg_stat_lock); switch (stat) { case MAC_STAT_IFSPEED: @@ -1975,7 +2034,7 @@ aggr_m_stat(void *arg, uint_t stat, uint64_t *val) rval = aggr_grp_stat(grp, stat, val); } - mac_perim_exit(mph); + mutex_exit(&grp->lg_stat_lock); return (rval); } @@ -2207,7 +2266,7 @@ aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) } /* - * Callback funtion for MAC layer to register groups. + * Callback function for MAC layer to register groups. */ static void aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index, @@ -2229,6 +2288,14 @@ aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index, infop->mgi_addmac = aggr_addmac; infop->mgi_remmac = aggr_remmac; infop->mgi_count = rx_group->arg_ring_cnt; + + /* + * Always set the HW VLAN callbacks. They are smart + * enough to know when a port has HW VLAN filters to + * program and when it doesn't. + */ + infop->mgi_addvlan = aggr_addvlan; + infop->mgi_remvlan = aggr_remvlan; } else { tx_group = &grp->lg_tx_group; tx_group->atg_gh = gh; @@ -2439,6 +2506,186 @@ aggr_remmac(void *arg, const uint8_t *mac_addr) return (err); } +/* + * Search for VID in the Rx group's list and return a pointer if + * found. Otherwise return NULL. + */ +static aggr_vlan_t * +aggr_find_vlan(aggr_pseudo_rx_group_t *rx_group, uint16_t vid) +{ + ASSERT(MAC_PERIM_HELD(rx_group->arg_grp->lg_mh)); + for (aggr_vlan_t *avp = list_head(&rx_group->arg_vlans); avp != NULL; + avp = list_next(&rx_group->arg_vlans, avp)) { + if (avp->av_vid == vid) + return (avp); + } + + return (NULL); +} + +/* + * Accept traffic on the specified VID. + * + * Persist VLAN state in the aggr so that ports added later will + * receive the correct filters. In the future it would be nice to + * allow aggr to iterate its clients instead of duplicating state. + */ +static int +aggr_addvlan(mac_group_driver_t gdriver, uint16_t vid) +{ + aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)gdriver; + aggr_grp_t *aggr = rx_group->arg_grp; + aggr_port_t *port, *p; + mac_perim_handle_t mph; + int err = 0; + aggr_vlan_t *avp = NULL; + + mac_perim_enter_by_mh(aggr->lg_mh, &mph); + + if (vid == MAC_VLAN_UNTAGGED) { + /* + * Aggr is both a MAC provider and MAC client. As a + * MAC provider it is passed MAC_VLAN_UNTAGGED by its + * client. As a client itself, it should pass + * VLAN_ID_NONE to its ports. + */ + vid = VLAN_ID_NONE; + rx_group->arg_untagged++; + goto update_ports; + } + + avp = aggr_find_vlan(rx_group, vid); + + if (avp != NULL) { + avp->av_refs++; + mac_perim_exit(mph); + return (0); + } + + avp = kmem_zalloc(sizeof (aggr_vlan_t), KM_SLEEP); + avp->av_vid = vid; + avp->av_refs = 1; + +update_ports: + for (port = aggr->lg_ports; port != NULL; port = port->lp_next) + if ((err = aggr_port_addvlan(port, vid)) != 0) + break; + + if (err != 0) { + /* + * If any of these calls fail then we are in a + * situation where the ports have different HW state. + * There's no reasonable action the MAC client can + * take in this scenario to rectify the situation. + */ + for (p = aggr->lg_ports; p != port; p = p->lp_next) { + int err2; + + if ((err2 = aggr_port_remvlan(p, vid)) != 0) { + cmn_err(CE_WARN, "Failed to remove VLAN %u" + " from port %s: errno %d.", vid, + mac_client_name(p->lp_mch), err2); + } + + } + + if (vid == VLAN_ID_NONE) + rx_group->arg_untagged--; + + if (avp != NULL) { + kmem_free(avp, sizeof (aggr_vlan_t)); + avp = NULL; + } + } + + if (avp != NULL) + list_insert_tail(&rx_group->arg_vlans, avp); + +done: + mac_perim_exit(mph); + return (err); +} + +/* + * Stop accepting traffic on this VLAN if it's the last use of this VLAN. + */ +static int +aggr_remvlan(mac_group_driver_t gdriver, uint16_t vid) +{ + aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)gdriver; + aggr_grp_t *aggr = rx_group->arg_grp; + aggr_port_t *port, *p; + mac_perim_handle_t mph; + int err = 0; + aggr_vlan_t *avp = NULL; + + mac_perim_enter_by_mh(aggr->lg_mh, &mph); + + /* + * See the comment in aggr_addvlan(). + */ + if (vid == MAC_VLAN_UNTAGGED) { + vid = VLAN_ID_NONE; + rx_group->arg_untagged--; + + if (rx_group->arg_untagged > 0) + goto done; + + goto update_ports; + } + + avp = aggr_find_vlan(rx_group, vid); + + if (avp == NULL) { + err = ENOENT; + goto done; + } + + avp->av_refs--; + + if (avp->av_refs > 0) + goto done; + +update_ports: + for (port = aggr->lg_ports; port != NULL; port = port->lp_next) + if ((err = aggr_port_remvlan(port, vid)) != 0) + break; + + /* + * See the comment in aggr_addvlan() for justification of the + * use of VERIFY here. + */ + if (err != 0) { + for (p = aggr->lg_ports; p != port; p = p->lp_next) { + int err2; + + if ((err2 = aggr_port_addvlan(p, vid)) != 0) { + cmn_err(CE_WARN, "Failed to add VLAN %u" + " to port %s: errno %d.", vid, + mac_client_name(p->lp_mch), err2); + } + } + + if (avp != NULL) + avp->av_refs++; + + if (vid == VLAN_ID_NONE) + rx_group->arg_untagged++; + + goto done; + } + + if (err == 0 && avp != NULL) { + VERIFY3U(avp->av_refs, ==, 0); + list_remove(&rx_group->arg_vlans, avp); + kmem_free(avp, sizeof (aggr_vlan_t)); + } + +done: + mac_perim_exit(mph); + return (err); +} + /* * Add or remove the multicast addresses that are defined for the group * to or from the specified port. diff --git a/usr/src/uts/common/io/aggr/aggr_port.c b/usr/src/uts/common/io/aggr/aggr_port.c index 00545d2c03..9d2edd4f97 100644 --- a/usr/src/uts/common/io/aggr/aggr_port.c +++ b/usr/src/uts/common/io/aggr/aggr_port.c @@ -21,6 +21,8 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2012 OmniTI Computer Consulting, Inc All rights reserved. + * Copyright 2020 Joyent, Inc. */ /* @@ -373,10 +375,14 @@ aggr_port_notify_link(aggr_grp_t *grp, aggr_port_t *port) /* link speed changes? */ ifspeed = aggr_port_stat(port, MAC_STAT_IFSPEED); if (port->lp_ifspeed != ifspeed) { + mutex_enter(&grp->lg_stat_lock); + if (port->lp_state == AGGR_PORT_STATE_ATTACHED) do_detach |= (ifspeed != grp->lg_ifspeed); else do_attach |= (ifspeed == grp->lg_ifspeed); + + mutex_exit(&grp->lg_stat_lock); } port->lp_ifspeed = ifspeed; @@ -528,8 +534,15 @@ aggr_port_promisc(aggr_port_t *port, boolean_t on) if (on) { mac_rx_clear(port->lp_mch); + + /* + * We use the promisc callback because without hardware + * rings, we deliver through flows that will cause duplicate + * delivery of packets when we've flipped into this mode + * to compensate for the lack of hardware MAC matching + */ rc = mac_promisc_add(port->lp_mch, MAC_CLIENT_PROMISC_ALL, - aggr_recv_cb, port, &port->lp_mphp, + aggr_recv_promisc_cb, port, &port->lp_mphp, MAC_PROMISC_FLAGS_NO_TX_LOOP); if (rc != 0) { mac_rx_set(port->lp_mch, aggr_recv_cb, port); @@ -679,3 +692,47 @@ aggr_port_remmac(aggr_port_t *port, const uint8_t *mac_addr) } mac_perim_exit(pmph); } + +int +aggr_port_addvlan(aggr_port_t *port, uint16_t vid) +{ + mac_perim_handle_t pmph; + int err; + + ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh)); + mac_perim_enter_by_mh(port->lp_mh, &pmph); + + /* + * Add the VLAN filter to the HW group if the port has a HW + * group. If the port doesn't have a HW group, then it will + * implicitly allow tagged traffic to pass and there is + * nothing to do. + */ + if (port->lp_hwgh == NULL) { + mac_perim_exit(pmph); + return (0); + } + + err = mac_hwgroup_addvlan(port->lp_hwgh, vid); + mac_perim_exit(pmph); + return (err); +} + +int +aggr_port_remvlan(aggr_port_t *port, uint16_t vid) +{ + mac_perim_handle_t pmph; + int err; + + ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh)); + mac_perim_enter_by_mh(port->lp_mh, &pmph); + + if (port->lp_hwgh == NULL) { + mac_perim_exit(pmph); + return (0); + } + + err = mac_hwgroup_remvlan(port->lp_hwgh, vid); + mac_perim_exit(pmph); + return (err); +} diff --git a/usr/src/uts/common/io/aggr/aggr_recv.c b/usr/src/uts/common/io/aggr/aggr_recv.c index 2bdb7872e3..33a060da48 100644 --- a/usr/src/uts/common/io/aggr/aggr_recv.c +++ b/usr/src/uts/common/io/aggr/aggr_recv.c @@ -21,6 +21,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2012 OmniTI Computer Consulting, Inc All rights reserved. */ /* @@ -68,16 +69,28 @@ aggr_recv_lacp(aggr_port_t *port, mac_resource_handle_t mrh, mblk_t *mp) /* * Callback function invoked by MAC service module when packets are - * made available by a MAC port. + * made available by a MAC port, both in promisc_on mode and not. */ /* ARGSUSED */ -void -aggr_recv_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp, - boolean_t loopback) +static void +aggr_recv_path_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp, + boolean_t loopback, boolean_t promisc_path) { aggr_port_t *port = (aggr_port_t *)arg; aggr_grp_t *grp = port->lp_grp; + /* + * In the case where lp_promisc_on has been turned on to + * compensate for insufficient hardware MAC matching and + * hardware rings are not in use we will fall back to + * using flows for delivery which can result in duplicates + * pushed up the stack. Only respect the chosen path. + */ + if (port->lp_promisc_on != promisc_path) { + freemsgchain(mp); + return; + } + if (grp->lg_lacp_mode == AGGR_LACP_OFF) { aggr_mac_rx(grp->lg_mh, mrh, mp); } else { @@ -161,3 +174,19 @@ aggr_recv_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp, } } } + +/* ARGSUSED */ +void +aggr_recv_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp, + boolean_t loopback) +{ + aggr_recv_path_cb(arg, mrh, mp, loopback, B_FALSE); +} + +/* ARGSUSED */ +void +aggr_recv_promisc_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp, + boolean_t loopback) +{ + aggr_recv_path_cb(arg, mrh, mp, loopback, B_TRUE); +} diff --git a/usr/src/uts/common/io/dld/dld_proto.c b/usr/src/uts/common/io/dld/dld_proto.c index cadd2a76d3..b7eeb35b92 100644 --- a/usr/src/uts/common/io/dld/dld_proto.c +++ b/usr/src/uts/common/io/dld/dld_proto.c @@ -1377,24 +1377,22 @@ dld_capab_direct(dld_str_t *dsp, void *data, uint_t flags) } /* - * dld_capab_poll_enable() + * This function is misnamed. All polling and fanouts are run out of + * the lower MAC for VNICs and out of the MAC for NICs. The + * availability of Rx rings and promiscous mode is taken care of + * between the soft ring set (mac_srs), the Rx ring, and the SW + * classifier. Fanout, if necessary, is done by the soft rings that + * are part of the SRS. By default the SRS divvies up the packets + * based on protocol: TCP, UDP, or Other (OTH). * - * This function is misnamed. All polling and fanouts are run out of the - * lower mac (in case of VNIC and the only mac in case of NICs). The - * availability of Rx ring and promiscous mode is all taken care between - * the soft ring set (mac_srs), the Rx ring, and S/W classifier. Any - * fanout necessary is done by the soft rings that are part of the - * mac_srs (by default mac_srs sends the packets up via a TCP and - * non TCP soft ring). - * - * The mac_srs (or its associated soft rings) always store the ill_rx_ring + * The SRS (or its associated soft rings) always store the ill_rx_ring * (the cookie returned when they registered with IP during plumb) as their * 2nd argument which is passed up as mac_resource_handle_t. The upcall * function and 1st argument is what the caller registered when they * called mac_rx_classify_flow_add() to register the flow. For VNIC, * the function is vnic_rx and argument is vnic_t. For regular NIC * case, it mac_rx_default and mac_handle_t. As explained above, the - * mac_srs (or its soft ring) will add the ill_rx_ring (mac_resource_handle_t) + * SRS (or its soft ring) will add the ill_rx_ring (mac_resource_handle_t) * from its stored 2nd argument. */ static int @@ -1407,11 +1405,11 @@ dld_capab_poll_enable(dld_str_t *dsp, dld_capab_poll_t *poll) return (ENOTSUP); /* - * Enable client polling if and only if DLS bypass is possible. - * Special cases like VLANs need DLS processing in the Rx data path. - * In such a case we can neither allow the client (IP) to directly - * poll the softring (since DLS processing hasn't been done) nor can - * we allow DLS bypass. + * Enable client polling if and only if DLS bypass is + * possible. Some traffic requires DLS processing in the Rx + * data path. In such a case we can neither allow the client + * (IP) to directly poll the soft ring (since DLS processing + * hasn't been done) nor can we allow DLS bypass. */ if (!mac_rx_bypass_set(dsp->ds_mch, dsp->ds_rx, dsp->ds_rx_arg)) return (ENOTSUP); diff --git a/usr/src/uts/common/io/dls/dls.c b/usr/src/uts/common/io/dls/dls.c index d6bc723371..2dc16c4586 100644 --- a/usr/src/uts/common/io/dls/dls.c +++ b/usr/src/uts/common/io/dls/dls.c @@ -171,16 +171,16 @@ dls_bind(dld_str_t *dsp, uint32_t sap) /* * The MAC layer does the VLAN demultiplexing and will only pass up * untagged packets to non-promiscuous primary MAC clients. In order to - * support the binding to the VLAN SAP which is required by DLPI, dls + * support binding to the VLAN SAP, which is required by DLPI, DLS * needs to get a copy of all tagged packets when the client binds to * the VLAN SAP. We do this by registering a separate promiscuous - * callback for each dls client binding to that SAP. + * callback for each DLS client binding to that SAP. * * Note: even though there are two promiscuous handles in dld_str_t, * ds_mph is for the regular promiscuous mode, ds_vlan_mph is the handle - * to receive VLAN pkt when promiscuous mode is not on. Only one of - * them can be non-NULL at the same time, to avoid receiving dup copies - * of pkts. + * to receive VLAN traffic when promiscuous mode is not on. Only one of + * them can be non-NULL at the same time, to avoid receiving duplicate + * copies of packets. */ if (sap == ETHERTYPE_VLAN && dsp->ds_promisc == 0) { int err; @@ -652,8 +652,8 @@ dls_mac_active_set(dls_link_t *dlp) /* request the primary MAC address */ if ((err = mac_unicast_add(dlp->dl_mch, NULL, MAC_UNICAST_PRIMARY | MAC_UNICAST_TAG_DISABLE | - MAC_UNICAST_DISABLE_TX_VID_CHECK, &dlp->dl_mah, 0, - &diag)) != 0) { + MAC_UNICAST_DISABLE_TX_VID_CHECK, &dlp->dl_mah, + VLAN_ID_NONE, &diag)) != 0) { return (err); } diff --git a/usr/src/uts/common/io/dls/dls_link.c b/usr/src/uts/common/io/dls/dls_link.c index 23580d0c40..6f9049b724 100644 --- a/usr/src/uts/common/io/dls/dls_link.c +++ b/usr/src/uts/common/io/dls/dls_link.c @@ -21,7 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2017 Joyent, Inc. + * Copyright 2018 Joyent, Inc. */ /* @@ -382,7 +382,16 @@ i_dls_link_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp, vid = VLAN_ID(mhi.mhi_tci); + /* + * This condition is true only when a sun4v vsw client + * is on the scene; as it is the only type of client + * that multiplexes VLANs on a single client instance. + * All other types of clients have one VLAN per client + * instance. In that case, MAC strips the VLAN tag + * before delivering it to DLS (see mac_rx_deliver()). + */ if (mhi.mhi_istagged) { + /* * If it is tagged traffic, send it upstream to * all dld_str_t which are attached to the physical diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_main.c b/usr/src/uts/common/io/ixgbe/ixgbe_main.c index 2c90127c6c..3463be30b9 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_main.c +++ b/usr/src/uts/common/io/ixgbe/ixgbe_main.c @@ -25,7 +25,7 @@ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2019 Joyent, Inc. + * Copyright 2020 Joyent, Inc. * Copyright 2012 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013 Saso Kiselkov. All rights reserved. * Copyright (c) 2013 OSN Online Service Nuernberg GmbH. All rights reserved. @@ -57,8 +57,8 @@ static int ixgbe_alloc_rings(ixgbe_t *); static void ixgbe_free_rings(ixgbe_t *); static int ixgbe_alloc_rx_data(ixgbe_t *); static void ixgbe_free_rx_data(ixgbe_t *); -static void ixgbe_setup_rings(ixgbe_t *); -static void ixgbe_setup_rx(ixgbe_t *); +static int ixgbe_setup_rings(ixgbe_t *); +static int ixgbe_setup_rx(ixgbe_t *); static void ixgbe_setup_tx(ixgbe_t *); static void ixgbe_setup_rx_ring(ixgbe_rx_ring_t *); static void ixgbe_setup_tx_ring(ixgbe_tx_ring_t *); @@ -67,6 +67,7 @@ static void ixgbe_setup_vmdq(ixgbe_t *); static void ixgbe_setup_vmdq_rss(ixgbe_t *); static void ixgbe_setup_rss_table(ixgbe_t *); static void ixgbe_init_unicst(ixgbe_t *); +static int ixgbe_init_vlan(ixgbe_t *); static int ixgbe_unicst_find(ixgbe_t *, const uint8_t *); static void ixgbe_setup_multicst(ixgbe_t *); static void ixgbe_get_hw_state(ixgbe_t *); @@ -113,6 +114,8 @@ static void ixgbe_intr_other_work(ixgbe_t *, uint32_t); static void ixgbe_get_driver_control(struct ixgbe_hw *); static int ixgbe_addmac(void *, const uint8_t *); static int ixgbe_remmac(void *, const uint8_t *); +static int ixgbe_addvlan(mac_group_driver_t, uint16_t); +static int ixgbe_remvlan(mac_group_driver_t, uint16_t); static void ixgbe_release_driver_control(struct ixgbe_hw *); static int ixgbe_attach(dev_info_t *, ddi_attach_cmd_t); @@ -1159,6 +1162,8 @@ ixgbe_init_driver_settings(ixgbe_t *ixgbe) rx_group = &ixgbe->rx_groups[i]; rx_group->index = i; rx_group->ixgbe = ixgbe; + list_create(&rx_group->vlans, sizeof (ixgbe_vlan_t), + offsetof(ixgbe_vlan_t, ixvl_link)); } for (i = 0; i < ixgbe->num_tx_rings; i++) { @@ -1909,7 +1914,8 @@ ixgbe_start(ixgbe_t *ixgbe, boolean_t alloc_buffer) /* * Setup the rx/tx rings */ - ixgbe_setup_rings(ixgbe); + if (ixgbe_setup_rings(ixgbe) != IXGBE_SUCCESS) + goto start_failure; /* * ixgbe_start() will be called when resetting, however if reset @@ -2282,6 +2288,16 @@ ixgbe_free_rings(ixgbe_t *ixgbe) ixgbe->tx_rings = NULL; } + for (uint_t i = 0; i < ixgbe->num_rx_groups; i++) { + ixgbe_vlan_t *vlp; + ixgbe_rx_group_t *rx_group = &ixgbe->rx_groups[i]; + + while ((vlp = list_remove_head(&rx_group->vlans)) != NULL) + kmem_free(vlp, sizeof (ixgbe_vlan_t)); + + list_destroy(&rx_group->vlans); + } + if (ixgbe->rx_groups != NULL) { kmem_free(ixgbe->rx_groups, sizeof (ixgbe_rx_group_t) * ixgbe->num_rx_groups); @@ -2336,7 +2352,7 @@ ixgbe_free_rx_data(ixgbe_t *ixgbe) /* * ixgbe_setup_rings - Setup rx/tx rings. */ -static void +static int ixgbe_setup_rings(ixgbe_t *ixgbe) { /* @@ -2346,9 +2362,12 @@ ixgbe_setup_rings(ixgbe_t *ixgbe) * 2. Initialize necessary registers for receive/transmit; * 3. Initialize software pointers/parameters for receive/transmit; */ - ixgbe_setup_rx(ixgbe); + if (ixgbe_setup_rx(ixgbe) != IXGBE_SUCCESS) + return (IXGBE_FAILURE); ixgbe_setup_tx(ixgbe); + + return (IXGBE_SUCCESS); } static void @@ -2435,7 +2454,7 @@ ixgbe_setup_rx_ring(ixgbe_rx_ring_t *rx_ring) IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rx_ring->hw_index), reg_val); } -static void +static int ixgbe_setup_rx(ixgbe_t *ixgbe) { ixgbe_rx_ring_t *rx_ring; @@ -2527,6 +2546,15 @@ ixgbe_setup_rx(ixgbe_t *ixgbe) break; } + /* + * Initialize VLAN SW and HW state if VLAN filtering is + * enabled. + */ + if (ixgbe->vlft_enabled) { + if (ixgbe_init_vlan(ixgbe) != IXGBE_SUCCESS) + return (IXGBE_FAILURE); + } + /* * Enable the receive unit. This must be done after filter * control is set in FCTRL. On 82598, we disable the descriptor monitor. @@ -2618,6 +2646,8 @@ ixgbe_setup_rx(ixgbe_t *ixgbe) IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, reg_val); } + + return (IXGBE_SUCCESS); } static void @@ -2819,7 +2849,7 @@ static void ixgbe_setup_vmdq(ixgbe_t *ixgbe) { struct ixgbe_hw *hw = &ixgbe->hw; - uint32_t vmdctl, i, vtctl; + uint32_t vmdctl, i, vtctl, vlnctl; /* * Setup the VMDq Control register, enable VMDq based on @@ -2855,9 +2885,19 @@ ixgbe_setup_vmdq(ixgbe_t *ixgbe) /* * Enable Virtualization and Replication. */ - vtctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN; + vtctl = IXGBE_READ_REG(hw, IXGBE_VT_CTL); + ixgbe->rx_def_group = vtctl & IXGBE_VT_CTL_POOL_MASK; + vtctl |= IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN; IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vtctl); + /* + * Enable VLAN filtering and switching (VFTA and VLVF). + */ + vlnctl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + vlnctl |= IXGBE_VLNCTRL_VFE; + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctl); + ixgbe->vlft_enabled = B_TRUE; + /* * Enable receiving packets to all VFs */ @@ -2878,7 +2918,7 @@ ixgbe_setup_vmdq_rss(ixgbe_t *ixgbe) { struct ixgbe_hw *hw = &ixgbe->hw; uint32_t i, mrqc; - uint32_t vtctl, vmdctl; + uint32_t vtctl, vmdctl, vlnctl; /* * Initialize RETA/ERETA table @@ -2962,9 +3002,20 @@ ixgbe_setup_vmdq_rss(ixgbe_t *ixgbe) /* * Enable Virtualization and Replication. */ + vtctl = IXGBE_READ_REG(hw, IXGBE_VT_CTL); + ixgbe->rx_def_group = vtctl & IXGBE_VT_CTL_POOL_MASK; + vtctl |= IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN; vtctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN; IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vtctl); + /* + * Enable VLAN filtering and switching (VFTA and VLVF). + */ + vlnctl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + vlnctl |= IXGBE_VLNCTRL_VFE; + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctl); + ixgbe->vlft_enabled = B_TRUE; + /* * Enable receiving packets to all VFs */ @@ -3135,6 +3186,53 @@ ixgbe_unicst_find(ixgbe_t *ixgbe, const uint8_t *mac_addr) return (-1); } +/* + * Restore the HW state to match the SW state during restart. + */ +static int +ixgbe_init_vlan(ixgbe_t *ixgbe) +{ + /* + * The device is starting for the first time; there is nothing + * to do. + */ + if (!ixgbe->vlft_init) { + ixgbe->vlft_init = B_TRUE; + return (IXGBE_SUCCESS); + } + + for (uint_t i = 0; i < ixgbe->num_rx_groups; i++) { + int ret; + boolean_t vlvf_bypass; + ixgbe_rx_group_t *rxg = &ixgbe->rx_groups[i]; + struct ixgbe_hw *hw = &ixgbe->hw; + + if (rxg->aupe) { + uint32_t vml2flt; + + vml2flt = IXGBE_READ_REG(hw, IXGBE_VMOLR(rxg->index)); + vml2flt |= IXGBE_VMOLR_AUPE; + IXGBE_WRITE_REG(hw, IXGBE_VMOLR(rxg->index), vml2flt); + } + + vlvf_bypass = (rxg->index == ixgbe->rx_def_group); + for (ixgbe_vlan_t *vlp = list_head(&rxg->vlans); vlp != NULL; + vlp = list_next(&rxg->vlans, vlp)) { + ret = ixgbe_set_vfta(hw, vlp->ixvl_vid, rxg->index, + B_TRUE, vlvf_bypass); + + if (ret != IXGBE_SUCCESS) { + ixgbe_error(ixgbe, "Failed to program VFTA" + " for group %u, VID: %u, ret: %d.", + rxg->index, vlp->ixvl_vid, ret); + return (IXGBE_FAILURE); + } + } + } + + return (IXGBE_SUCCESS); +} + /* * ixgbe_multicst_add - Add a multicst address. */ @@ -6161,6 +6259,7 @@ ixgbe_fill_group(void *arg, mac_ring_type_t rtype, const int index, mac_group_info_t *infop, mac_group_handle_t gh) { ixgbe_t *ixgbe = (ixgbe_t *)arg; + struct ixgbe_hw *hw = &ixgbe->hw; switch (rtype) { case MAC_RING_TYPE_RX: { @@ -6174,6 +6273,20 @@ ixgbe_fill_group(void *arg, mac_ring_type_t rtype, const int index, infop->mgi_stop = NULL; infop->mgi_addmac = ixgbe_addmac; infop->mgi_remmac = ixgbe_remmac; + + if ((ixgbe->classify_mode == IXGBE_CLASSIFY_VMDQ || + ixgbe->classify_mode == IXGBE_CLASSIFY_VMDQ_RSS) && + (hw->mac.type == ixgbe_mac_82599EB || + hw->mac.type == ixgbe_mac_X540 || + hw->mac.type == ixgbe_mac_X550 || + hw->mac.type == ixgbe_mac_X550EM_x)) { + infop->mgi_addvlan = ixgbe_addvlan; + infop->mgi_remvlan = ixgbe_remvlan; + } else { + infop->mgi_addvlan = NULL; + infop->mgi_remvlan = NULL; + } + infop->mgi_count = (ixgbe->num_rx_rings / ixgbe->num_rx_groups); break; @@ -6273,6 +6386,232 @@ ixgbe_rx_ring_intr_disable(mac_intr_handle_t intrh) return (0); } +static ixgbe_vlan_t * +ixgbe_find_vlan(ixgbe_rx_group_t *rx_group, uint16_t vid) +{ + for (ixgbe_vlan_t *vlp = list_head(&rx_group->vlans); vlp != NULL; + vlp = list_next(&rx_group->vlans, vlp)) { + if (vlp->ixvl_vid == vid) + return (vlp); + } + + return (NULL); +} + +/* + * Attempt to use a VLAN HW filter for this group. If the group is + * interested in untagged packets then set AUPE only. If the group is + * the default then only set the VFTA. Leave the VLVF slots open for + * reserved groups to guarantee their use of HW filtering. + */ +static int +ixgbe_addvlan(mac_group_driver_t gdriver, uint16_t vid) +{ + ixgbe_rx_group_t *rx_group = (ixgbe_rx_group_t *)gdriver; + ixgbe_t *ixgbe = rx_group->ixgbe; + struct ixgbe_hw *hw = &ixgbe->hw; + ixgbe_vlan_t *vlp; + int ret; + boolean_t is_def_grp; + + mutex_enter(&ixgbe->gen_lock); + + if (ixgbe->ixgbe_state & IXGBE_SUSPENDED) { + mutex_exit(&ixgbe->gen_lock); + return (ECANCELED); + } + + /* + * Let's be sure VLAN filtering is enabled. + */ + VERIFY3B(ixgbe->vlft_enabled, ==, B_TRUE); + is_def_grp = (rx_group->index == ixgbe->rx_def_group); + + /* + * VLAN filtering is enabled but we want to receive untagged + * traffic on this group -- set the AUPE bit on the group and + * leave the VLAN tables alone. + */ + if (vid == MAC_VLAN_UNTAGGED) { + /* + * We never enable AUPE on the default group; it is + * redundant. Untagged traffic which passes L2 + * filtering is delivered to the default group if no + * other group is interested. + */ + if (!is_def_grp) { + uint32_t vml2flt; + + vml2flt = IXGBE_READ_REG(hw, + IXGBE_VMOLR(rx_group->index)); + vml2flt |= IXGBE_VMOLR_AUPE; + IXGBE_WRITE_REG(hw, IXGBE_VMOLR(rx_group->index), + vml2flt); + rx_group->aupe = B_TRUE; + } + + mutex_exit(&ixgbe->gen_lock); + return (0); + } + + vlp = ixgbe_find_vlan(rx_group, vid); + if (vlp != NULL) { + /* Only the default group supports multiple clients. */ + VERIFY3B(is_def_grp, ==, B_TRUE); + vlp->ixvl_refs++; + mutex_exit(&ixgbe->gen_lock); + return (0); + } + + /* + * The default group doesn't require a VLVF entry, only a VFTA + * entry. All traffic passing L2 filtering (MPSAR + VFTA) is + * delivered to the default group if no other group is + * interested. The fourth argument, vlvf_bypass, tells the + * ixgbe common code to avoid using a VLVF slot if one isn't + * already allocated to this VLAN. + * + * This logic is meant to reserve VLVF slots for use by + * reserved groups: guaranteeing their use of HW filtering. + */ + ret = ixgbe_set_vfta(hw, vid, rx_group->index, B_TRUE, is_def_grp); + + if (ret == IXGBE_SUCCESS) { + vlp = kmem_zalloc(sizeof (ixgbe_vlan_t), KM_SLEEP); + vlp->ixvl_vid = vid; + vlp->ixvl_refs = 1; + list_insert_tail(&rx_group->vlans, vlp); + mutex_exit(&ixgbe->gen_lock); + return (0); + } + + /* + * We should actually never return ENOSPC because we've set + * things up so that every reserved group is guaranteed to + * have a VLVF slot. + */ + if (ret == IXGBE_ERR_PARAM) + ret = EINVAL; + else if (ret == IXGBE_ERR_NO_SPACE) + ret = ENOSPC; + else + ret = EIO; + + mutex_exit(&ixgbe->gen_lock); + return (ret); +} + +/* + * Attempt to remove the VLAN HW filter associated with this group. If + * we are removing a HW filter for the default group then we know only + * the VFTA was set (VLVF is reserved for non-default/reserved + * groups). If the group wishes to stop receiving untagged traffic + * then clear the AUPE but leave the VLAN filters alone. + */ +static int +ixgbe_remvlan(mac_group_driver_t gdriver, uint16_t vid) +{ + ixgbe_rx_group_t *rx_group = (ixgbe_rx_group_t *)gdriver; + ixgbe_t *ixgbe = rx_group->ixgbe; + struct ixgbe_hw *hw = &ixgbe->hw; + int ret; + ixgbe_vlan_t *vlp; + boolean_t is_def_grp; + + mutex_enter(&ixgbe->gen_lock); + + if (ixgbe->ixgbe_state & IXGBE_SUSPENDED) { + mutex_exit(&ixgbe->gen_lock); + return (ECANCELED); + } + + is_def_grp = (rx_group->index == ixgbe->rx_def_group); + + /* See the AUPE comment in ixgbe_addvlan(). */ + if (vid == MAC_VLAN_UNTAGGED) { + if (!is_def_grp) { + uint32_t vml2flt; + + vml2flt = IXGBE_READ_REG(hw, + IXGBE_VMOLR(rx_group->index)); + vml2flt &= ~IXGBE_VMOLR_AUPE; + IXGBE_WRITE_REG(hw, + IXGBE_VMOLR(rx_group->index), vml2flt); + rx_group->aupe = B_FALSE; + } + mutex_exit(&ixgbe->gen_lock); + return (0); + } + + vlp = ixgbe_find_vlan(rx_group, vid); + if (vlp == NULL) { + mutex_exit(&ixgbe->gen_lock); + return (ENOENT); + } + + /* + * See the comment in ixgbe_addvlan() about is_def_grp and + * vlvf_bypass. + */ + if (vlp->ixvl_refs == 1) { + ret = ixgbe_set_vfta(hw, vid, rx_group->index, B_FALSE, + is_def_grp); + } else { + /* + * Only the default group can have multiple clients. + * If there is more than one client, leave the + * VFTA[vid] bit alone. + */ + VERIFY3B(is_def_grp, ==, B_TRUE); + VERIFY3U(vlp->ixvl_refs, >, 1); + vlp->ixvl_refs--; + mutex_exit(&ixgbe->gen_lock); + return (0); + } + + if (ret != IXGBE_SUCCESS) { + mutex_exit(&ixgbe->gen_lock); + /* IXGBE_ERR_PARAM should be the only possible error here. */ + if (ret == IXGBE_ERR_PARAM) + return (EINVAL); + else + return (EIO); + } + + VERIFY3U(vlp->ixvl_refs, ==, 1); + vlp->ixvl_refs = 0; + list_remove(&rx_group->vlans, vlp); + kmem_free(vlp, sizeof (ixgbe_vlan_t)); + + /* + * Calling ixgbe_set_vfta() on a non-default group may have + * cleared the VFTA[vid] bit even though the default group + * still has clients using the vid. This happens because the + * ixgbe common code doesn't ref count the use of VLANs. Check + * for any use of vid on the default group and make sure the + * VFTA[vid] bit is set. This operation is idempotent: setting + * VFTA[vid] to true if already true won't hurt anything. + */ + if (!is_def_grp) { + ixgbe_rx_group_t *defgrp; + + defgrp = &ixgbe->rx_groups[ixgbe->rx_def_group]; + vlp = ixgbe_find_vlan(defgrp, vid); + if (vlp != NULL) { + /* This shouldn't fail, but if it does return EIO. */ + ret = ixgbe_set_vfta(hw, vid, rx_group->index, B_TRUE, + B_TRUE); + if (ret != IXGBE_SUCCESS) { + mutex_exit(&ixgbe->gen_lock); + return (EIO); + } + } + } + + mutex_exit(&ixgbe->gen_lock); + return (0); +} + /* * Add a mac address. */ diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_sw.h b/usr/src/uts/common/io/ixgbe/ixgbe_sw.h index 20a077d332..cfd987787a 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_sw.h +++ b/usr/src/uts/common/io/ixgbe/ixgbe_sw.h @@ -91,6 +91,8 @@ extern "C" { #define MAX_NUM_UNICAST_ADDRESSES 0x80 #define MAX_NUM_MULTICAST_ADDRESSES 0x1000 +#define MAX_NUM_VLAN_FILTERS 0x40 + #define IXGBE_INTR_NONE 0 #define IXGBE_INTR_MSIX 1 #define IXGBE_INTR_MSI 2 @@ -387,6 +389,15 @@ typedef union ixgbe_ether_addr { } mac; } ixgbe_ether_addr_t; +/* + * The list of VLANs an Rx group will accept. + */ +typedef struct ixgbe_vlan { + list_node_t ixvl_link; + uint16_t ixvl_vid; /* The VLAN ID */ + uint_t ixvl_refs; /* Number of users of this VLAN */ +} ixgbe_vlan_t; + typedef enum { USE_NONE, USE_COPY, @@ -589,6 +600,7 @@ typedef struct ixgbe_rx_ring { struct ixgbe *ixgbe; /* Pointer to ixgbe struct */ } ixgbe_rx_ring_t; + /* * Software Receive Ring Group */ @@ -596,6 +608,8 @@ typedef struct ixgbe_rx_group { uint32_t index; /* Group index */ mac_group_handle_t group_handle; /* call back group handle */ struct ixgbe *ixgbe; /* Pointer to ixgbe struct */ + boolean_t aupe; /* AUPE bit */ + list_t vlans; /* list of VLANs to allow */ } ixgbe_rx_group_t; /* @@ -662,6 +676,7 @@ typedef struct ixgbe { */ ixgbe_rx_group_t *rx_groups; /* Array of rx groups */ uint32_t num_rx_groups; /* Number of rx groups in use */ + uint32_t rx_def_group; /* Default Rx group index */ /* * Transmit Rings @@ -715,6 +730,9 @@ typedef struct ixgbe { uint32_t mcast_count; struct ether_addr mcast_table[MAX_NUM_MULTICAST_ADDRESSES]; + boolean_t vlft_enabled; /* VLAN filtering enabled? */ + boolean_t vlft_init; /* VLAN filtering initialized? */ + ulong_t sys_page_size; boolean_t link_check_complete; diff --git a/usr/src/uts/common/io/mac/mac.c b/usr/src/uts/common/io/mac/mac.c index 8709d07030..4d450a539b 100644 --- a/usr/src/uts/common/io/mac/mac.c +++ b/usr/src/uts/common/io/mac/mac.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2019 Joyent, Inc. + * Copyright 2020 Joyent, Inc. * Copyright 2015 Garrett D'Amore */ @@ -460,7 +460,7 @@ mac_init(void) mac_logging_interval = 20; mac_flow_log_enable = B_FALSE; mac_link_log_enable = B_FALSE; - mac_logging_timer = 0; + mac_logging_timer = NULL; /* Register to be notified of noteworthy pools events */ mac_pool_event_reg.pec_func = mac_pool_event_cb; @@ -1115,9 +1115,10 @@ mac_start(mac_handle_t mh) if ((defgrp = MAC_DEFAULT_RX_GROUP(mip)) != NULL) { /* - * Start the default ring, since it will be needed - * to receive broadcast and multicast traffic for - * both primary and non-primary MAC clients. + * Start the default group which is responsible + * for receiving broadcast and multicast + * traffic for both primary and non-primary + * MAC clients. */ ASSERT(defgrp->mrg_state == MAC_GROUP_STATE_REGISTERED); err = mac_start_group_and_rings(defgrp); @@ -1729,6 +1730,47 @@ mac_hwgroup_remmac(mac_group_handle_t gh, const uint8_t *addr) return (mac_group_remmac(group, addr)); } +/* + * Program the group's HW VLAN filter if it has such support. + * Otherwise, the group will implicitly accept tagged traffic and + * there is nothing to do. + */ +int +mac_hwgroup_addvlan(mac_group_handle_t gh, uint16_t vid) +{ + mac_group_t *group = (mac_group_t *)gh; + + if (!MAC_GROUP_HW_VLAN(group)) + return (0); + + return (mac_group_addvlan(group, vid)); +} + +int +mac_hwgroup_remvlan(mac_group_handle_t gh, uint16_t vid) +{ + mac_group_t *group = (mac_group_t *)gh; + + if (!MAC_GROUP_HW_VLAN(group)) + return (0); + + return (mac_group_remvlan(group, vid)); +} + +/* + * Determine if a MAC has HW VLAN support. This is a private API + * consumed by aggr. In the future it might be nice to have a bitfield + * in mac_capab_rings_t to track which forms of HW filtering are + * supported by the MAC. + */ +boolean_t +mac_has_hw_vlan(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (MAC_GROUP_HW_VLAN(mip->mi_rx_groups)); +} + /* * Set the RX group to be shared/reserved. Note that the group must be * started/stopped outside of this function. @@ -2414,7 +2456,6 @@ mac_disable(mac_handle_t mh) /* * Called when the MAC instance has a non empty flow table, to de-multiplex * incoming packets to the right flow. - * The MAC's rw lock is assumed held as a READER. */ /* ARGSUSED */ static mblk_t * @@ -2425,14 +2466,14 @@ mac_rx_classify(mac_impl_t *mip, mac_resource_handle_t mrh, mblk_t *mp) int err; /* - * If the mac is a port of an aggregation, pass FLOW_IGNORE_VLAN + * If the MAC is a port of an aggregation, pass FLOW_IGNORE_VLAN * to mac_flow_lookup() so that the VLAN packets can be successfully * passed to the non-VLAN aggregation flows. * * Note that there is possibly a race between this and * mac_unicast_remove/add() and VLAN packets could be incorrectly - * classified to non-VLAN flows of non-aggregation mac clients. These - * VLAN packets will be then filtered out by the mac module. + * classified to non-VLAN flows of non-aggregation MAC clients. These + * VLAN packets will be then filtered out by the MAC module. */ if ((mip->mi_state_flags & MIS_EXCLUSIVE) != 0) flags |= FLOW_IGNORE_VLAN; @@ -4075,12 +4116,15 @@ mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) /* - * Driver must register group->mgi_addmac/remmac() for rx groups - * to support multiple MAC addresses. + * The driver must register some form of hardware MAC + * filter in order for Rx groups to support multiple + * MAC addresses. */ if (rtype == MAC_RING_TYPE_RX && - ((group_info.mgi_addmac == NULL) || - (group_info.mgi_remmac == NULL))) { + (group_info.mgi_addmac == NULL || + group_info.mgi_remmac == NULL)) { + DTRACE_PROBE1(mac__init__rings__no__mac__filter, + char *, mip->mi_name); err = EINVAL; goto bail; } @@ -4127,8 +4171,9 @@ mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) /* Update this group's status */ mac_set_group_state(group, MAC_GROUP_STATE_REGISTERED); - } else + } else { group->mrg_rings = NULL; + } ASSERT(ring_left == 0); @@ -4317,6 +4362,38 @@ mac_free_rings(mac_impl_t *mip, mac_ring_type_t rtype) kmem_free(groups, sizeof (mac_group_t) * (group_count + 1)); } +/* + * Associate the VLAN filter to the receive group. + */ +int +mac_group_addvlan(mac_group_t *group, uint16_t vlan) +{ + VERIFY3S(group->mrg_type, ==, MAC_RING_TYPE_RX); + VERIFY3P(group->mrg_info.mgi_addvlan, !=, NULL); + + if (vlan > VLAN_ID_MAX) + return (EINVAL); + + vlan = MAC_VLAN_UNTAGGED_VID(vlan); + return (group->mrg_info.mgi_addvlan(group->mrg_info.mgi_driver, vlan)); +} + +/* + * Dissociate the VLAN from the receive group. + */ +int +mac_group_remvlan(mac_group_t *group, uint16_t vlan) +{ + VERIFY3S(group->mrg_type, ==, MAC_RING_TYPE_RX); + VERIFY3P(group->mrg_info.mgi_remvlan, !=, NULL); + + if (vlan > VLAN_ID_MAX) + return (EINVAL); + + vlan = MAC_VLAN_UNTAGGED_VID(vlan); + return (group->mrg_info.mgi_remvlan(group->mrg_info.mgi_driver, vlan)); +} + /* * Associate a MAC address with a receive group. * @@ -4333,8 +4410,8 @@ mac_free_rings(mac_impl_t *mip, mac_ring_type_t rtype) int mac_group_addmac(mac_group_t *group, const uint8_t *addr) { - ASSERT(group->mrg_type == MAC_RING_TYPE_RX); - ASSERT(group->mrg_info.mgi_addmac != NULL); + VERIFY3S(group->mrg_type, ==, MAC_RING_TYPE_RX); + VERIFY3P(group->mrg_info.mgi_addmac, !=, NULL); return (group->mrg_info.mgi_addmac(group->mrg_info.mgi_driver, addr)); } @@ -4345,8 +4422,8 @@ mac_group_addmac(mac_group_t *group, const uint8_t *addr) int mac_group_remmac(mac_group_t *group, const uint8_t *addr) { - ASSERT(group->mrg_type == MAC_RING_TYPE_RX); - ASSERT(group->mrg_info.mgi_remmac != NULL); + VERIFY3S(group->mrg_type, ==, MAC_RING_TYPE_RX); + VERIFY3P(group->mrg_info.mgi_remmac, !=, NULL); return (group->mrg_info.mgi_remmac(group->mrg_info.mgi_driver, addr)); } @@ -4521,28 +4598,20 @@ i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) switch (ring->mr_type) { case MAC_RING_TYPE_RX: /* - * Setup SRS on top of the new ring if the group is - * reserved for someones exclusive use. + * Setup an SRS on top of the new ring if the group is + * reserved for someone's exclusive use. */ if (group->mrg_state == MAC_GROUP_STATE_RESERVED) { - mac_client_impl_t *mcip; + mac_client_impl_t *mcip = MAC_GROUP_ONLY_CLIENT(group); - mcip = MAC_GROUP_ONLY_CLIENT(group); - /* - * Even though this group is reserved we migth still - * have multiple clients, i.e a VLAN shares the - * group with the primary mac client. - */ - if (mcip != NULL) { - flent = mcip->mci_flent; - ASSERT(flent->fe_rx_srs_cnt > 0); - mac_rx_srs_group_setup(mcip, flent, SRST_LINK); - mac_fanout_setup(mcip, flent, - MCIP_RESOURCE_PROPS(mcip), mac_rx_deliver, - mcip, NULL, NULL); - } else { - ring->mr_classify_type = MAC_SW_CLASSIFIER; - } + VERIFY3P(mcip, !=, NULL); + flent = mcip->mci_flent; + VERIFY3S(flent->fe_rx_srs_cnt, >, 0); + mac_rx_srs_group_setup(mcip, flent, SRST_LINK); + mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), + mac_rx_deliver, mcip, NULL, NULL); + } else { + ring->mr_classify_type = MAC_SW_CLASSIFIER; } break; case MAC_RING_TYPE_TX: @@ -4568,7 +4637,7 @@ i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) mcip = mgcp->mgc_client; flent = mcip->mci_flent; - is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR); + is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT); mac_srs = MCIP_TX_SRS(mcip); tx = &mac_srs->srs_tx; mac_tx_client_quiesce((mac_client_handle_t)mcip); @@ -4712,7 +4781,7 @@ i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, mcip = MAC_GROUP_ONLY_CLIENT(group); ASSERT(mcip != NULL); - ASSERT(mcip->mci_state_flags & MCIS_IS_AGGR); + ASSERT(mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT); mac_srs = MCIP_TX_SRS(mcip); ASSERT(mac_srs->srs_tx.st_mode == SRS_TX_AGGR || mac_srs->srs_tx.st_mode == SRS_TX_BW_AGGR); @@ -4920,12 +4989,12 @@ mac_free_macaddr(mac_address_t *map) mac_impl_t *mip = map->ma_mip; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - ASSERT(mip->mi_addresses != NULL); + VERIFY3P(mip->mi_addresses, !=, NULL); - map = mac_find_macaddr(mip, map->ma_addr); - - ASSERT(map != NULL); - ASSERT(map->ma_nusers == 0); + VERIFY3P(map, ==, mac_find_macaddr(mip, map->ma_addr)); + VERIFY3P(map, !=, NULL); + VERIFY3S(map->ma_nusers, ==, 0); + VERIFY3P(map->ma_vlans, ==, NULL); if (map == mip->mi_addresses) { mip->mi_addresses = map->ma_next; @@ -4941,85 +5010,201 @@ mac_free_macaddr(mac_address_t *map) kmem_free(map, sizeof (mac_address_t)); } +static mac_vlan_t * +mac_find_vlan(mac_address_t *map, uint16_t vid) +{ + mac_vlan_t *mvp; + + for (mvp = map->ma_vlans; mvp != NULL; mvp = mvp->mv_next) { + if (mvp->mv_vid == vid) + return (mvp); + } + + return (NULL); +} + +static mac_vlan_t * +mac_add_vlan(mac_address_t *map, uint16_t vid) +{ + mac_vlan_t *mvp; + + /* + * We should never add the same {addr, VID} tuple more + * than once, but let's be sure. + */ + for (mvp = map->ma_vlans; mvp != NULL; mvp = mvp->mv_next) + VERIFY3U(mvp->mv_vid, !=, vid); + + /* Add the VLAN to the head of the VLAN list. */ + mvp = kmem_zalloc(sizeof (mac_vlan_t), KM_SLEEP); + mvp->mv_vid = vid; + mvp->mv_next = map->ma_vlans; + map->ma_vlans = mvp; + + return (mvp); +} + +static void +mac_rem_vlan(mac_address_t *map, mac_vlan_t *mvp) +{ + mac_vlan_t *pre; + + if (map->ma_vlans == mvp) { + map->ma_vlans = mvp->mv_next; + } else { + pre = map->ma_vlans; + while (pre->mv_next != mvp) { + pre = pre->mv_next; + + /* + * We've reached the end of the list without + * finding mvp. + */ + VERIFY3P(pre, !=, NULL); + } + pre->mv_next = mvp->mv_next; + } + + kmem_free(mvp, sizeof (mac_vlan_t)); +} + /* - * Add a MAC address reference for a client. If the desired MAC address - * exists, add a reference to it. Otherwise, add the new address by adding - * it to a reserved group or setting promiscuous mode. Won't try different - * group is the group is non-NULL, so the caller must explictly share - * default group when needed. - * - * Note, the primary MAC address is initialized at registration time, so - * to add it to default group only need to activate it if its reference - * count is still zero. Also, some drivers may not have advertised RINGS - * capability. + * Create a new mac_address_t if this is the first use of the address + * or add a VID to an existing address. In either case, the + * mac_address_t acts as a list of {addr, VID} tuples where each tuple + * shares the same addr. If group is non-NULL then attempt to program + * the MAC's HW filters for this group. Otherwise, if group is NULL, + * then the MAC has no rings and there is nothing to program. */ int -mac_add_macaddr(mac_impl_t *mip, mac_group_t *group, uint8_t *mac_addr, - boolean_t use_hw) +mac_add_macaddr_vlan(mac_impl_t *mip, mac_group_t *group, uint8_t *addr, + uint16_t vid, boolean_t use_hw) { - mac_address_t *map; - int err = 0; - boolean_t allocated_map = B_FALSE; + mac_address_t *map; + mac_vlan_t *mvp; + int err = 0; + boolean_t allocated_map = B_FALSE; + boolean_t hw_mac = B_FALSE; + boolean_t hw_vlan = B_FALSE; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - map = mac_find_macaddr(mip, mac_addr); + map = mac_find_macaddr(mip, addr); /* - * If the new MAC address has not been added. Allocate a new one - * and set it up. + * If this is the first use of this MAC address then allocate + * and initialize a new structure. */ if (map == NULL) { map = kmem_zalloc(sizeof (mac_address_t), KM_SLEEP); map->ma_len = mip->mi_type->mt_addr_length; - bcopy(mac_addr, map->ma_addr, map->ma_len); + bcopy(addr, map->ma_addr, map->ma_len); map->ma_nusers = 0; map->ma_group = group; map->ma_mip = mip; + map->ma_untagged = B_FALSE; - /* add the new MAC address to the head of the address list */ + /* Add the new MAC address to the head of the address list. */ map->ma_next = mip->mi_addresses; mip->mi_addresses = map; allocated_map = B_TRUE; } - ASSERT(map->ma_group == NULL || map->ma_group == group); + VERIFY(map->ma_group == NULL || map->ma_group == group); if (map->ma_group == NULL) map->ma_group = group; + if (vid == VLAN_ID_NONE) { + map->ma_untagged = B_TRUE; + mvp = NULL; + } else { + mvp = mac_add_vlan(map, vid); + } + + /* + * Set the VLAN HW filter if: + * + * o the MAC's VLAN HW filtering is enabled, and + * o the address does not currently rely on promisc mode. + * + * This is called even when the client specifies an untagged + * address (VLAN_ID_NONE) because some MAC providers require + * setting additional bits to accept untagged traffic when + * VLAN HW filtering is enabled. + */ + if (MAC_GROUP_HW_VLAN(group) && + map->ma_type != MAC_ADDRESS_TYPE_UNICAST_PROMISC) { + if ((err = mac_group_addvlan(group, vid)) != 0) + goto bail; + + hw_vlan = B_TRUE; + } + + VERIFY3S(map->ma_nusers, >=, 0); + map->ma_nusers++; + /* - * If the MAC address is already in use, simply account for the - * new client. + * If this MAC address already has a HW filter then simply + * increment the counter. */ - if (map->ma_nusers++ > 0) + if (map->ma_nusers > 1) return (0); + /* + * All logic from here on out is executed during initial + * creation only. + */ + VERIFY3S(map->ma_nusers, ==, 1); + /* * Activate this MAC address by adding it to the reserved group. */ if (group != NULL) { - err = mac_group_addmac(group, (const uint8_t *)mac_addr); - if (err == 0) { - map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; - return (0); + err = mac_group_addmac(group, (const uint8_t *)addr); + + /* + * If the driver is out of filters then we can + * continue and use promisc mode. For any other error, + * assume the driver is in a state where we can't + * program the filters or use promisc mode; so we must + * bail. + */ + if (err != 0 && err != ENOSPC) { + map->ma_nusers--; + goto bail; } + + hw_mac = (err == 0); + } + + if (hw_mac) { + map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; + return (0); } /* * The MAC address addition failed. If the client requires a - * hardware classified MAC address, fail the operation. + * hardware classified MAC address, fail the operation. This + * feature is only used by sun4v vsw. */ - if (use_hw) { + if (use_hw && !hw_mac) { err = ENOSPC; + map->ma_nusers--; goto bail; } /* - * Try promiscuous mode. - * - * For drivers that don't advertise RINGS capability, do - * nothing for the primary address. + * If we reach this point then either the MAC doesn't have + * RINGS capability or we are out of MAC address HW filters. + * In any case we must put the MAC into promiscuous mode. + */ + VERIFY(group == NULL || !hw_mac); + + /* + * The one exception is the primary address. A non-RINGS + * driver filters the primary address by default; promisc mode + * is not needed. */ if ((group == NULL) && (bcmp(map->ma_addr, mip->mi_addr, map->ma_len) == 0)) { @@ -5028,8 +5213,11 @@ mac_add_macaddr(mac_impl_t *mip, mac_group_t *group, uint8_t *mac_addr, } /* - * Enable promiscuous mode in order to receive traffic - * to the new MAC address. + * Enable promiscuous mode in order to receive traffic to the + * new MAC address. All existing HW filters still send their + * traffic to their respective group/SRSes. But with promisc + * enabled all unknown traffic is delivered to the default + * group where it is SW classified via mac_rx_classify(). */ if ((err = i_mac_promisc_set(mip, B_TRUE)) == 0) { map->ma_type = MAC_ADDRESS_TYPE_UNICAST_PROMISC; @@ -5037,44 +5225,71 @@ mac_add_macaddr(mac_impl_t *mip, mac_group_t *group, uint8_t *mac_addr, } /* - * Free the MAC address that could not be added. Don't free - * a pre-existing address, it could have been the entry - * for the primary MAC address which was pre-allocated by - * mac_init_macaddr(), and which must remain on the list. + * We failed to set promisc mode and we are about to free 'map'. */ + map->ma_nusers = 0; + bail: - map->ma_nusers--; + if (hw_vlan) { + int err2 = mac_group_remvlan(group, vid); + + if (err2 != 0) { + cmn_err(CE_WARN, "Failed to remove VLAN %u from group" + " %d on MAC %s: %d.", vid, group->mrg_index, + mip->mi_name, err2); + } + } + + if (mvp != NULL) + mac_rem_vlan(map, mvp); + if (allocated_map) mac_free_macaddr(map); + return (err); } -/* - * Remove a reference to a MAC address. This may cause to remove the MAC - * address from an associated group or to turn off promiscuous mode. - * The caller needs to handle the failure properly. - */ int -mac_remove_macaddr(mac_address_t *map) +mac_remove_macaddr_vlan(mac_address_t *map, uint16_t vid) { - mac_impl_t *mip = map->ma_mip; - int err = 0; + mac_vlan_t *mvp; + mac_impl_t *mip = map->ma_mip; + mac_group_t *group = map->ma_group; + int err = 0; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); + VERIFY3P(map, ==, mac_find_macaddr(mip, map->ma_addr)); + + if (vid == VLAN_ID_NONE) { + map->ma_untagged = B_FALSE; + mvp = NULL; + } else { + mvp = mac_find_vlan(map, vid); + VERIFY3P(mvp, !=, NULL); + } - ASSERT(map == mac_find_macaddr(mip, map->ma_addr)); + if (MAC_GROUP_HW_VLAN(group) && + map->ma_type == MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED && + ((err = mac_group_remvlan(group, vid)) != 0)) + return (err); + + if (mvp != NULL) + mac_rem_vlan(map, mvp); /* * If it's not the last client using this MAC address, only update * the MAC clients count. */ - if (--map->ma_nusers > 0) + map->ma_nusers--; + if (map->ma_nusers > 0) return (0); + VERIFY3S(map->ma_nusers, ==, 0); + /* - * The MAC address is no longer used by any MAC client, so remove - * it from its associated group, or turn off promiscuous mode - * if it was enabled for the MAC address. + * The MAC address is no longer used by any MAC client, so + * remove it from its associated group. Turn off promiscuous + * mode if this is the last address relying on it. */ switch (map->ma_type) { case MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED: @@ -5082,22 +5297,60 @@ mac_remove_macaddr(mac_address_t *map) * Don't free the preset primary address for drivers that * don't advertise RINGS capability. */ - if (map->ma_group == NULL) + if (group == NULL) return (0); - err = mac_group_remmac(map->ma_group, map->ma_addr); - if (err == 0) - map->ma_group = NULL; + if ((err = mac_group_remmac(group, map->ma_addr)) != 0) { + if (vid == VLAN_ID_NONE) + map->ma_untagged = B_TRUE; + else + (void) mac_add_vlan(map, vid); + + /* + * If we fail to remove the MAC address HW + * filter but then also fail to re-add the + * VLAN HW filter then we are in a busted + * state. We do our best by logging a warning + * and returning the original 'err' that got + * us here. At this point, traffic for this + * address + VLAN combination will be dropped + * until the user reboots the system. In the + * future, it would be nice to have a system + * that can compare the state of expected + * classification according to mac to the + * actual state of the provider, and report + * and fix any inconsistencies. + */ + if (MAC_GROUP_HW_VLAN(group)) { + int err2; + + err2 = mac_group_addvlan(group, vid); + if (err2 != 0) { + cmn_err(CE_WARN, "Failed to readd VLAN" + " %u to group %d on MAC %s: %d.", + vid, group->mrg_index, mip->mi_name, + err2); + } + } + + map->ma_nusers = 1; + return (err); + } + + map->ma_group = NULL; break; case MAC_ADDRESS_TYPE_UNICAST_PROMISC: err = i_mac_promisc_set(mip, B_FALSE); break; default: - ASSERT(B_FALSE); + panic("Unexpected ma_type 0x%x, file: %s, line %d", + map->ma_type, __FILE__, __LINE__); } - if (err != 0) + if (err != 0) { + map->ma_nusers = 1; return (err); + } /* * We created MAC address for the primary one at registration, so we @@ -5250,8 +5503,9 @@ mac_fini_macaddr(mac_impl_t *mip) * If mi_addresses is initialized, there should be exactly one * entry left on the list with no users. */ - ASSERT(map->ma_nusers == 0); - ASSERT(map->ma_next == NULL); + VERIFY3S(map->ma_nusers, ==, 0); + VERIFY3P(map->ma_next, ==, NULL); + VERIFY3P(map->ma_vlans, ==, NULL); kmem_free(map, sizeof (mac_address_t)); mip->mi_addresses = NULL; @@ -5813,7 +6067,7 @@ mac_stop_logusage(mac_logtype_t type) mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_walker, &estate); (void) untimeout(mac_logging_timer); - mac_logging_timer = 0; + mac_logging_timer = NULL; /* Write log entries for each mac_impl in the list */ i_mac_log_info(&net_log_list, &lstate); @@ -5931,7 +6185,7 @@ mac_reserve_tx_ring(mac_impl_t *mip, mac_ring_t *desired_ring) } /* - * For a reserved group with multiple clients, return the primary client. + * For a non-default group with multiple clients, return the primary client. */ static mac_client_impl_t * mac_get_grp_primary(mac_group_t *grp) @@ -6290,13 +6544,12 @@ mac_group_add_client(mac_group_t *grp, mac_client_impl_t *mcip) break; } - VERIFY(mgcp == NULL); + ASSERT(mgcp == NULL); mgcp = kmem_zalloc(sizeof (mac_grp_client_t), KM_SLEEP); mgcp->mgc_client = mcip; mgcp->mgc_next = grp->mrg_clients; grp->mrg_clients = mgcp; - } void @@ -6317,8 +6570,27 @@ mac_group_remove_client(mac_group_t *grp, mac_client_impl_t *mcip) } /* - * mac_reserve_rx_group() - * + * Return true if any client on this group explicitly asked for HW + * rings (of type mask) or have a bound share. + */ +static boolean_t +i_mac_clients_hw(mac_group_t *grp, uint32_t mask) +{ + mac_grp_client_t *mgcip; + mac_client_impl_t *mcip; + mac_resource_props_t *mrp; + + for (mgcip = grp->mrg_clients; mgcip != NULL; mgcip = mgcip->mgc_next) { + mcip = mgcip->mgc_client; + mrp = MCIP_RESOURCE_PROPS(mcip); + if (mcip->mci_share != 0 || (mrp->mrp_mask & mask) != 0) + return (B_TRUE); + } + + return (B_FALSE); +} + +/* * Finds an available group and exclusively reserves it for a client. * The group is chosen to suit the flow's resource controls (bandwidth and * fanout requirements) and the address type. @@ -6341,7 +6613,6 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) int need_rings = 0; mac_group_t *candidate_grp = NULL; mac_client_impl_t *gclient; - mac_resource_props_t *gmrp; mac_group_t *donorgrp = NULL; boolean_t rxhw = mrp->mrp_mask & MRP_RX_RINGS; boolean_t unspec = mrp->mrp_mask & MRP_RXRINGS_UNSPEC; @@ -6352,18 +6623,20 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) isprimary = mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC; /* - * Check if a group already has this mac address (case of VLANs) + * Check if a group already has this MAC address (case of VLANs) * unless we are moving this MAC client from one group to another. */ if (!move && (map = mac_find_macaddr(mip, mac_addr)) != NULL) { if (map->ma_group != NULL) return (map->ma_group); } + if (mip->mi_rx_groups == NULL || mip->mi_rx_group_count == 0) return (NULL); + /* - * If exclusive open, return NULL which will enable the - * caller to use the default group. + * If this client is requesting exclusive MAC access then + * return NULL to ensure the client uses the default group. */ if (mcip->mci_state_flags & MCIS_EXCLUSIVE) return (NULL); @@ -6373,6 +6646,7 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { mrp->mrp_nrxrings = 1; } + /* * For static grouping we allow only specifying rings=0 and * unspecified @@ -6381,6 +6655,7 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) mip->mi_rx_group_type == MAC_GROUP_TYPE_STATIC) { return (NULL); } + if (rxhw) { /* * We have explicitly asked for a group (with nrxrings, @@ -6442,25 +6717,19 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) * that didn't ask for an exclusive group, but got * one and it has enough rings (combined with what * the donor group can donate) for the new MAC - * client + * client. */ if (grp->mrg_state >= MAC_GROUP_STATE_RESERVED) { /* - * If the primary/donor group is not the default - * group, don't bother looking for a candidate group. - * If we don't have enough rings we will check - * if the primary group can be vacated. + * If the donor group is not the default + * group, don't bother looking for a candidate + * group. If we don't have enough rings we + * will check if the primary group can be + * vacated. */ if (candidate_grp == NULL && donorgrp == MAC_DEFAULT_RX_GROUP(mip)) { - ASSERT(!MAC_GROUP_NO_CLIENT(grp)); - gclient = MAC_GROUP_ONLY_CLIENT(grp); - if (gclient == NULL) - gclient = mac_get_grp_primary(grp); - ASSERT(gclient != NULL); - gmrp = MCIP_RESOURCE_PROPS(gclient); - if (gclient->mci_share == 0 && - (gmrp->mrp_mask & MRP_RX_RINGS) == 0 && + if (!i_mac_clients_hw(grp, MRP_RX_RINGS) && (unspec || (grp->mrg_cur_count + donor_grp_rcnt >= need_rings))) { @@ -6526,6 +6795,7 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) */ mac_stop_group(grp); } + /* We didn't find an exclusive group for this MAC client */ if (i >= mip->mi_rx_group_count) { @@ -6533,12 +6803,12 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) return (NULL); /* - * If we found a candidate group then we switch the - * MAC client from the candidate_group to the default - * group and give the group to this MAC client. If - * we didn't find a candidate_group, check if the - * primary is in its own group and if it can make way - * for this MAC client. + * If we found a candidate group then move the + * existing MAC client from the candidate_group to the + * default group and give the candidate_group to the + * new MAC client. If we didn't find a candidate + * group, then check if the primary is in its own + * group and if it can make way for this MAC client. */ if (candidate_grp == NULL && donorgrp != MAC_DEFAULT_RX_GROUP(mip) && @@ -6549,15 +6819,15 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) boolean_t prim_grp = B_FALSE; /* - * Switch the MAC client from the candidate group - * to the default group.. If this group was the - * donor group, then after the switch we need - * to update the donor group too. + * Switch the existing MAC client from the + * candidate group to the default group. If + * the candidate group is the donor group, + * then after the switch we need to update the + * donor group too. */ grp = candidate_grp; - gclient = MAC_GROUP_ONLY_CLIENT(grp); - if (gclient == NULL) - gclient = mac_get_grp_primary(grp); + gclient = grp->mrg_clients->mgc_client; + VERIFY3P(gclient, !=, NULL); if (grp == mip->mi_rx_donor_grp) prim_grp = B_TRUE; if (mac_rx_switch_group(gclient, grp, @@ -6570,7 +6840,6 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) donorgrp = MAC_DEFAULT_RX_GROUP(mip); } - /* * Now give this group with the required rings * to this MAC client. @@ -6618,10 +6887,10 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) /* * mac_rx_release_group() * - * This is called when there are no clients left for the group. - * The group is stopped and marked MAC_GROUP_STATE_REGISTERED, - * and if it is a non default group, the shares are removed and - * all rings are assigned back to default group. + * Release the group when it has no remaining clients. The group is + * stopped and its shares are removed and all rings are assigned back + * to default group. This should never be called against the default + * group. */ void mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) @@ -6630,6 +6899,7 @@ mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) mac_ring_t *ring; ASSERT(group != MAC_DEFAULT_RX_GROUP(mip)); + ASSERT(MAC_GROUP_NO_CLIENT(group) == B_TRUE); if (mip->mi_rx_donor_grp == group) mip->mi_rx_donor_grp = MAC_DEFAULT_RX_GROUP(mip); @@ -6681,56 +6951,7 @@ mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) } /* - * When we move the primary's mac address between groups, we need to also - * take all the clients sharing the same mac address along with it (VLANs) - * We remove the mac address for such clients from the group after quiescing - * them. When we add the mac address we restart the client. Note that - * the primary's mac address is removed from the group after all the - * other clients sharing the address are removed. Similarly, the primary's - * mac address is added before all the other client's mac address are - * added. While grp is the group where the clients reside, tgrp is - * the group where the addresses have to be added. - */ -static void -mac_rx_move_macaddr_prim(mac_client_impl_t *mcip, mac_group_t *grp, - mac_group_t *tgrp, uint8_t *maddr, boolean_t add) -{ - mac_impl_t *mip = mcip->mci_mip; - mac_grp_client_t *mgcp = grp->mrg_clients; - mac_client_impl_t *gmcip; - boolean_t prim; - - prim = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0; - - /* - * If the clients are in a non-default group, we just have to - * walk the group's client list. If it is in the default group - * (which will be shared by other clients as well, we need to - * check if the unicast address matches mcip's unicast. - */ - while (mgcp != NULL) { - gmcip = mgcp->mgc_client; - if (gmcip != mcip && - (grp != MAC_DEFAULT_RX_GROUP(mip) || - mcip->mci_unicast == gmcip->mci_unicast)) { - if (!add) { - mac_rx_client_quiesce( - (mac_client_handle_t)gmcip); - (void) mac_remove_macaddr(mcip->mci_unicast); - } else { - (void) mac_add_macaddr(mip, tgrp, maddr, prim); - mac_rx_client_restart( - (mac_client_handle_t)gmcip); - } - } - mgcp = mgcp->mgc_next; - } -} - - -/* - * Move the MAC address from fgrp to tgrp. If this is the primary client, - * we need to take any VLANs etc. together too. + * Move the MAC address from fgrp to tgrp. */ static int mac_rx_move_macaddr(mac_client_impl_t *mcip, mac_group_t *fgrp, @@ -6739,56 +6960,86 @@ mac_rx_move_macaddr(mac_client_impl_t *mcip, mac_group_t *fgrp, mac_impl_t *mip = mcip->mci_mip; uint8_t maddr[MAXMACADDRLEN]; int err = 0; - boolean_t prim; - boolean_t multiclnt = B_FALSE; + uint16_t vid; + mac_unicast_impl_t *muip; + boolean_t use_hw; mac_rx_client_quiesce((mac_client_handle_t)mcip); - ASSERT(mcip->mci_unicast != NULL); + VERIFY3P(mcip->mci_unicast, !=, NULL); bcopy(mcip->mci_unicast->ma_addr, maddr, mcip->mci_unicast->ma_len); - prim = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0; - if (mcip->mci_unicast->ma_nusers > 1) { - mac_rx_move_macaddr_prim(mcip, fgrp, NULL, maddr, B_FALSE); - multiclnt = B_TRUE; - } - ASSERT(mcip->mci_unicast->ma_nusers == 1); - err = mac_remove_macaddr(mcip->mci_unicast); + /* + * Does the client require MAC address hardware classifiction? + */ + use_hw = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0; + vid = i_mac_flow_vid(mcip->mci_flent); + + /* + * You can never move an address that is shared by multiple + * clients. mac_datapath_setup() ensures that clients sharing + * an address are placed on the default group. This guarantees + * that a non-default group will only ever have one client and + * thus make full use of HW filters. + */ + if (mac_check_macaddr_shared(mcip->mci_unicast)) + return (EINVAL); + + err = mac_remove_macaddr_vlan(mcip->mci_unicast, vid); + if (err != 0) { mac_rx_client_restart((mac_client_handle_t)mcip); - if (multiclnt) { - mac_rx_move_macaddr_prim(mcip, fgrp, fgrp, maddr, - B_TRUE); - } return (err); } + /* - * Program the H/W Classifier first, if this fails we need - * not proceed with the other stuff. + * If this isn't the primary MAC address then the + * mac_address_t has been freed by the last call to + * mac_remove_macaddr_vlan(). In any case, NULL the reference + * to avoid a dangling pointer. */ - if ((err = mac_add_macaddr(mip, tgrp, maddr, prim)) != 0) { + mcip->mci_unicast = NULL; + + /* + * We also have to NULL all the mui_map references -- sun4v + * strikes again! + */ + rw_enter(&mcip->mci_rw_lock, RW_WRITER); + for (muip = mcip->mci_unicast_list; muip != NULL; muip = muip->mui_next) + muip->mui_map = NULL; + rw_exit(&mcip->mci_rw_lock); + + /* + * Program the H/W Classifier first, if this fails we need not + * proceed with the other stuff. + */ + if ((err = mac_add_macaddr_vlan(mip, tgrp, maddr, vid, use_hw)) != 0) { + int err2; + /* Revert back the H/W Classifier */ - if ((err = mac_add_macaddr(mip, fgrp, maddr, prim)) != 0) { - /* - * This should not fail now since it worked earlier, - * should we panic? - */ - cmn_err(CE_WARN, - "mac_rx_switch_group: switching %p back" - " to group %p failed!!", (void *)mcip, - (void *)fgrp); + err2 = mac_add_macaddr_vlan(mip, fgrp, maddr, vid, use_hw); + + if (err2 != 0) { + cmn_err(CE_WARN, "Failed to revert HW classification" + " on MAC %s, for client %s: %d.", mip->mi_name, + mcip->mci_name, err2); } + mac_rx_client_restart((mac_client_handle_t)mcip); - if (multiclnt) { - mac_rx_move_macaddr_prim(mcip, fgrp, fgrp, maddr, - B_TRUE); - } return (err); } + + /* + * Get a reference to the new mac_address_t and update the + * client's reference. Then restart the client and add the + * other clients of this MAC addr (if they exsit). + */ mcip->mci_unicast = mac_find_macaddr(mip, maddr); + rw_enter(&mcip->mci_rw_lock, RW_WRITER); + for (muip = mcip->mci_unicast_list; muip != NULL; muip = muip->mui_next) + muip->mui_map = mcip->mci_unicast; + rw_exit(&mcip->mci_rw_lock); mac_rx_client_restart((mac_client_handle_t)mcip); - if (multiclnt) - mac_rx_move_macaddr_prim(mcip, fgrp, tgrp, maddr, B_TRUE); - return (err); + return (0); } /* @@ -6809,19 +7060,34 @@ mac_rx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, mac_impl_t *mip = mcip->mci_mip; mac_grp_client_t *mgcp; - ASSERT(fgrp == mcip->mci_flent->fe_rx_ring_group); + VERIFY3P(fgrp, ==, mcip->mci_flent->fe_rx_ring_group); if ((err = mac_rx_move_macaddr(mcip, fgrp, tgrp)) != 0) return (err); /* - * The group might be reserved, but SRSs may not be set up, e.g. - * primary and its vlans using a reserved group. + * If the group is marked as reserved and in use by a single + * client, then there is an SRS to teardown. */ if (fgrp->mrg_state == MAC_GROUP_STATE_RESERVED && MAC_GROUP_ONLY_CLIENT(fgrp) != NULL) { mac_rx_srs_group_teardown(mcip->mci_flent, B_TRUE); } + + /* + * If we are moving the client from a non-default group, then + * we know that any additional clients on this group share the + * same MAC address. Since we moved the MAC address filter, we + * need to move these clients too. + * + * If we are moving the client from the default group and its + * MAC address has VLAN clients, then we must move those + * clients as well. + * + * In both cases the idea is the same: we moved the MAC + * address filter to the tgrp, so we must move all clients + * using that MAC address to tgrp as well. + */ if (fgrp != MAC_DEFAULT_RX_GROUP(mip)) { mgcp = fgrp->mrg_clients; while (mgcp != NULL) { @@ -6832,20 +7098,21 @@ mac_rx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, gmcip->mci_flent->fe_rx_ring_group = tgrp; } mac_release_rx_group(mcip, fgrp); - ASSERT(MAC_GROUP_NO_CLIENT(fgrp)); + VERIFY3B(MAC_GROUP_NO_CLIENT(fgrp), ==, B_TRUE); mac_set_group_state(fgrp, MAC_GROUP_STATE_REGISTERED); } else { mac_group_remove_client(fgrp, mcip); mac_group_add_client(tgrp, mcip); mcip->mci_flent->fe_rx_ring_group = tgrp; + /* * If there are other clients (VLANs) sharing this address - * we should be here only for the primary. + * then move them too. */ - if (mcip->mci_unicast->ma_nusers > 1) { + if (mac_check_macaddr_shared(mcip->mci_unicast)) { /* * We need to move all the clients that are using - * this h/w address. + * this MAC address. */ mgcp = fgrp->mrg_clients; while (mgcp != NULL) { @@ -6859,20 +7126,24 @@ mac_rx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, } } } + /* - * The default group will still take the multicast, - * broadcast traffic etc., so it won't go to + * The default group still handles multicast and + * broadcast traffic; it won't transition to * MAC_GROUP_STATE_REGISTERED. */ if (fgrp->mrg_state == MAC_GROUP_STATE_RESERVED) mac_rx_group_unmark(fgrp, MR_CONDEMNED); mac_set_group_state(fgrp, MAC_GROUP_STATE_SHARED); } + next_state = mac_group_next_state(tgrp, &group_only_mcip, MAC_DEFAULT_RX_GROUP(mip), B_TRUE); mac_set_group_state(tgrp, next_state); + /* - * If the destination group is reserved, setup the SRSs etc. + * If the destination group is reserved, then setup the SRSes. + * Otherwise make sure to use SW classification. */ if (tgrp->mrg_state == MAC_GROUP_STATE_RESERVED) { mac_rx_srs_group_setup(mcip, mcip->mci_flent, SRST_LINK); @@ -6883,6 +7154,7 @@ mac_rx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, } else { mac_rx_switch_grp_to_sw(tgrp); } + return (0); } @@ -6913,6 +7185,7 @@ mac_reserve_tx_group(mac_client_impl_t *mcip, boolean_t move) boolean_t isprimary; isprimary = mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC; + /* * When we come here for a VLAN on the primary (dladm create-vlan), * we need to pair it along with the primary (to keep it consistent @@ -6994,8 +7267,7 @@ mac_reserve_tx_group(mac_client_impl_t *mcip, boolean_t move) if (grp->mrg_state == MAC_GROUP_STATE_RESERVED && candidate_grp == NULL) { gclient = MAC_GROUP_ONLY_CLIENT(grp); - if (gclient == NULL) - gclient = mac_get_grp_primary(grp); + VERIFY3P(gclient, !=, NULL); gmrp = MCIP_RESOURCE_PROPS(gclient); if (gclient->mci_share == 0 && (gmrp->mrp_mask & MRP_TX_RINGS) == 0 && @@ -7032,13 +7304,14 @@ mac_reserve_tx_group(mac_client_impl_t *mcip, boolean_t move) */ if (need_exclgrp && candidate_grp != NULL) { /* - * Switch the MAC client from the candidate group - * to the default group. + * Switch the MAC client from the candidate + * group to the default group. We know the + * candidate_grp came from a reserved group + * and thus only has one client. */ grp = candidate_grp; gclient = MAC_GROUP_ONLY_CLIENT(grp); - if (gclient == NULL) - gclient = mac_get_grp_primary(grp); + VERIFY3P(gclient, !=, NULL); mac_tx_client_quiesce((mac_client_handle_t)gclient); mac_tx_switch_group(gclient, grp, defgrp); mac_tx_client_restart((mac_client_handle_t)gclient); @@ -7206,7 +7479,7 @@ mac_tx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, */ mac_group_remove_client(fgrp, mcip); mac_tx_dismantle_soft_rings(fgrp, flent); - if (mcip->mci_unicast->ma_nusers > 1) { + if (mac_check_macaddr_shared(mcip->mci_unicast)) { mgcp = fgrp->mrg_clients; while (mgcp != NULL) { gmcip = mgcp->mgc_client; @@ -7452,7 +7725,7 @@ mac_no_active(mac_handle_t mh) * changes and update the mac_resource_props_t for the VLAN's client. * We need to do this since we don't support setting these properties * on the primary's VLAN clients, but the VLAN clients have to - * follow the primary w.r.t the rings property; + * follow the primary w.r.t the rings property. */ void mac_set_prim_vlan_rings(mac_impl_t *mip, mac_resource_props_t *mrp) @@ -7601,13 +7874,10 @@ mac_group_ring_modify(mac_client_impl_t *mcip, mac_group_t *group, MAC_GROUP_STATE_RESERVED) { continue; } - mcip = MAC_GROUP_ONLY_CLIENT(tgrp); - if (mcip == NULL) - mcip = mac_get_grp_primary(tgrp); - ASSERT(mcip != NULL); - mrp = MCIP_RESOURCE_PROPS(mcip); - if ((mrp->mrp_mask & MRP_RX_RINGS) != 0) + if (i_mac_clients_hw(tgrp, MRP_RX_RINGS)) continue; + mcip = tgrp->mrg_clients->mgc_client; + VERIFY3P(mcip, !=, NULL); if ((tgrp->mrg_cur_count + defgrp->mrg_cur_count) < (modify + 1)) { continue; @@ -7622,12 +7892,10 @@ mac_group_ring_modify(mac_client_impl_t *mcip, mac_group_t *group, MAC_GROUP_STATE_RESERVED) { continue; } - mcip = MAC_GROUP_ONLY_CLIENT(tgrp); - if (mcip == NULL) - mcip = mac_get_grp_primary(tgrp); - mrp = MCIP_RESOURCE_PROPS(mcip); - if ((mrp->mrp_mask & MRP_TX_RINGS) != 0) + if (i_mac_clients_hw(tgrp, MRP_TX_RINGS)) continue; + mcip = tgrp->mrg_clients->mgc_client; + VERIFY3P(mcip, !=, NULL); if ((tgrp->mrg_cur_count + defgrp->mrg_cur_count) < (modify + 1)) { continue; @@ -7897,10 +8165,10 @@ mac_pool_event_cb(pool_event_t what, poolid_t id, void *arg) * Set effective rings property. This could be called from datapath_setup/ * datapath_teardown or set-linkprop. * If the group is reserved we just go ahead and set the effective rings. - * Additionally, for TX this could mean the default group has lost/gained + * Additionally, for TX this could mean the default group has lost/gained * some rings, so if the default group is reserved, we need to adjust the * effective rings for the default group clients. For RX, if we are working - * with the non-default group, we just need * to reset the effective props + * with the non-default group, we just need to reset the effective props * for the default group clients. */ void @@ -8030,6 +8298,7 @@ mac_check_primary_relocation(mac_client_impl_t *mcip, boolean_t rxhw) * the first non-primary. */ ASSERT(mip->mi_nactiveclients == 2); + /* * OK, now we have the primary that needs to be relocated. */ diff --git a/usr/src/uts/common/io/mac/mac_client.c b/usr/src/uts/common/io/mac/mac_client.c index 66bba78e91..b918bf4aca 100644 --- a/usr/src/uts/common/io/mac/mac_client.c +++ b/usr/src/uts/common/io/mac/mac_client.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Joyent, Inc. All rights reserved. + * Copyright 2018 Joyent, Inc. * Copyright 2017 RackTop Systems. */ @@ -865,9 +865,12 @@ mac_unicast_update_client_flow(mac_client_impl_t *mcip) mac_protect_update_mac_token(mcip); /* - * A MAC client could have one MAC address but multiple - * VLANs. In that case update the flow entries corresponding - * to all VLANs of the MAC client. + * When there are multiple VLANs sharing the same MAC address, + * each gets its own MAC client, except when running on sun4v + * vsw. In that case the mci_flent_list is used to place + * multiple VLAN flows on one MAC client. If we ever get rid + * of vsw then this code can go, but until then we need to + * update all flow entries. */ for (flent = mcip->mci_flent_list; flent != NULL; flent = flent->fe_client_next) { @@ -1025,7 +1028,7 @@ mac_unicast_primary_set(mac_handle_t mh, const uint8_t *addr) return (0); } - if (mac_find_macaddr(mip, (uint8_t *)addr) != 0) { + if (mac_find_macaddr(mip, (uint8_t *)addr) != NULL) { i_mac_perim_exit(mip); return (EBUSY); } @@ -1040,9 +1043,9 @@ mac_unicast_primary_set(mac_handle_t mh, const uint8_t *addr) mac_capab_aggr_t aggr_cap; /* - * If the mac is an aggregation, other than the unicast + * If the MAC is an aggregation, other than the unicast * addresses programming, aggr must be informed about this - * primary unicst address change to change its mac address + * primary unicst address change to change its MAC address * policy to be user-specified. */ ASSERT(map->ma_type == MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED); @@ -1374,7 +1377,7 @@ mac_client_open(mac_handle_t mh, mac_client_handle_t *mchp, char *name, mcip->mci_state_flags |= MCIS_IS_AGGR_PORT; if (mip->mi_state_flags & MIS_IS_AGGR) - mcip->mci_state_flags |= MCIS_IS_AGGR; + mcip->mci_state_flags |= MCIS_IS_AGGR_CLIENT; if ((flags & MAC_OPEN_FLAGS_USE_DATALINK_NAME) != 0) { datalink_id_t linkid; @@ -1539,7 +1542,8 @@ mac_client_close(mac_client_handle_t mch, uint16_t flags) } /* - * Set the rx bypass receive callback. + * Set the Rx bypass receive callback and return B_TRUE. Return + * B_FALSE if it's not possible to enable bypass. */ boolean_t mac_rx_bypass_set(mac_client_handle_t mch, mac_direct_rx_t rx_fn, void *arg1) @@ -1550,11 +1554,11 @@ mac_rx_bypass_set(mac_client_handle_t mch, mac_direct_rx_t rx_fn, void *arg1) ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); /* - * If the mac_client is a VLAN, we should not do DLS bypass and - * instead let the packets come up via mac_rx_deliver so the vlan - * header can be stripped. + * If the client has more than one VLAN then process packets + * through DLS. This should happen only when sun4v vsw is on + * the scene. */ - if (mcip->mci_nvids > 0) + if (mcip->mci_nvids > 1) return (B_FALSE); /* @@ -1608,8 +1612,8 @@ mac_rx_set(mac_client_handle_t mch, mac_rx_t rx_fn, void *arg) i_mac_perim_exit(mip); /* - * If we're changing the rx function on the primary mac of a vnic, - * make sure any secondary macs on the vnic are updated as well. + * If we're changing the Rx function on the primary MAC of a VNIC, + * make sure any secondary addresses on the VNIC are updated as well. */ if (umip != NULL) { ASSERT((umip->mi_state_flags & MIS_IS_VNIC) != 0); @@ -1787,6 +1791,14 @@ mac_client_set_rings_prop(mac_client_impl_t *mcip, mac_resource_props_t *mrp, } /* Let check if we can give this an excl group */ } else if (group == defgrp) { + /* + * If multiple clients share an + * address then they must stay on the + * default group. + */ + if (mac_check_macaddr_shared(mcip->mci_unicast)) + return (0); + ngrp = mac_reserve_rx_group(mcip, mac_addr, B_TRUE); /* Couldn't give it a group, that's fine */ @@ -1809,6 +1821,16 @@ mac_client_set_rings_prop(mac_client_impl_t *mcip, mac_resource_props_t *mrp, } if (group == defgrp && ((mrp->mrp_nrxrings > 0) || unspec)) { + /* + * We are requesting Rx rings. Try to reserve + * a non-default group. + * + * If multiple clients share an address then + * they must stay on the default group. + */ + if (mac_check_macaddr_shared(mcip->mci_unicast)) + return (EINVAL); + ngrp = mac_reserve_rx_group(mcip, mac_addr, B_TRUE); if (ngrp == NULL) return (ENOSPC); @@ -2166,10 +2188,10 @@ mac_unicast_flow_create(mac_client_impl_t *mcip, uint8_t *mac_addr, flent_flags = FLOW_VNIC_MAC; /* - * For the first flow we use the mac client's name - mci_name, for - * subsequent ones we just create a name with the vid. This is + * For the first flow we use the MAC client's name - mci_name, for + * subsequent ones we just create a name with the VID. This is * so that we can add these flows to the same flow table. This is - * fine as the flow name (except for the one with the mac client's + * fine as the flow name (except for the one with the MAC client's * name) is not visible. When the first flow is removed, we just replace * its fdesc with another from the list, so we will still retain the * flent with the MAC client's flow name. @@ -2327,6 +2349,7 @@ mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid, * The unicast MAC address must have been added successfully. */ ASSERT(mcip->mci_unicast != NULL); + /* * Push down the sub-flows that were defined on this link * hitherto. The flows are added to the active flow table @@ -2338,15 +2361,23 @@ mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid, ASSERT(!no_unicast); /* - * A unicast flow already exists for that MAC client, - * this flow must be the same mac address but with - * different VID. It has been checked by mac_addr_in_use(). + * A unicast flow already exists for that MAC client + * so this flow must be the same MAC address but with + * a different VID. It has been checked by + * mac_addr_in_use(). + * + * We will use the SRS etc. from the initial + * mci_flent. We don't need to create a kstat for + * this, as except for the fdesc, everything will be + * used from the first flent. * - * We will use the SRS etc. from the mci_flent. Note that - * We don't need to create kstat for this as except for - * the fdesc, everything will be used from in the 1st flent. + * The only time we should see multiple flents on the + * same MAC client is on the sun4v vsw. If we removed + * that code we should be able to remove the entire + * notion of multiple flents on a MAC client (this + * doesn't affect sub/user flows because they have + * their own list unrelated to mci_flent_list). */ - if (bcmp(mac_addr, map->ma_addr, map->ma_len) != 0) { err = EINVAL; goto bail; @@ -2475,8 +2506,12 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, boolean_t is_vnic_primary = (flags & MAC_UNICAST_VNIC_PRIMARY); - /* when VID is non-zero, the underlying MAC can not be VNIC */ - ASSERT(!((mip->mi_state_flags & MIS_IS_VNIC) && (vid != 0))); + /* + * When the VID is non-zero the underlying MAC cannot be a + * VNIC. I.e., dladm create-vlan cannot take a VNIC as + * argument, only the primary MAC client. + */ + ASSERT(!((mip->mi_state_flags & MIS_IS_VNIC) && (vid != VLAN_ID_NONE))); /* * Can't unicast add if the client asked only for minimal datapath @@ -2489,18 +2524,19 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, * Check for an attempted use of the current Port VLAN ID, if enabled. * No client may use it. */ - if (mip->mi_pvid != 0 && vid == mip->mi_pvid) + if (mip->mi_pvid != VLAN_ID_NONE && vid == mip->mi_pvid) return (EBUSY); /* * Check whether it's the primary client and flag it. */ - if (!(mcip->mci_state_flags & MCIS_IS_VNIC) && is_primary && vid == 0) + if (!(mcip->mci_state_flags & MCIS_IS_VNIC) && is_primary && + vid == VLAN_ID_NONE) mcip->mci_flags |= MAC_CLIENT_FLAGS_PRIMARY; /* * is_vnic_primary is true when we come here as a VLAN VNIC - * which uses the primary mac client's address but with a non-zero + * which uses the primary MAC client's address but with a non-zero * VID. In this case the MAC address is not specified by an upper * MAC client. */ @@ -2552,7 +2588,7 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, /* * Create a handle for vid 0. */ - ASSERT(vid == 0); + ASSERT(vid == VLAN_ID_NONE); muip = kmem_zalloc(sizeof (mac_unicast_impl_t), KM_SLEEP); muip->mui_vid = vid; *mah = (mac_unicast_handle_t)muip; @@ -2572,7 +2608,9 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, } /* - * If this is a VNIC/VLAN, disable softmac fast-path. + * If this is a VNIC/VLAN, disable softmac fast-path. This is + * only relevant to legacy devices which use softmac to + * interface with GLDv3. */ if (mcip->mci_state_flags & MCIS_IS_VNIC) { err = mac_fastpath_disable((mac_handle_t)mip); @@ -2620,9 +2658,11 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, (void) mac_client_set_resources(mch, mrp); } else if (mcip->mci_state_flags & MCIS_IS_VNIC) { /* - * This is a primary VLAN client, we don't support - * specifying rings property for this as it inherits the - * rings property from its MAC. + * This is a VLAN client sharing the address of the + * primary MAC client; i.e., one created via dladm + * create-vlan. We don't support specifying ring + * properties for this type of client as it inherits + * these from the primary MAC client. */ if (is_vnic_primary) { mac_resource_props_t *vmrp; @@ -2681,7 +2721,7 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, /* * Set the flags here so that if this is a passive client, we - * can return and set it when we call mac_client_datapath_setup + * can return and set it when we call mac_client_datapath_setup * when this becomes the active client. If we defer to using these * flags to mac_client_datapath_setup, then for a passive client, * we'd have to store the flags somewhere (probably fe_flags) @@ -2984,14 +3024,14 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) i_mac_perim_enter(mip); if (mcip->mci_flags & MAC_CLIENT_FLAGS_VNIC_PRIMARY) { /* - * Called made by the upper MAC client of a VNIC. + * Call made by the upper MAC client of a VNIC. * There's nothing much to do, the unicast address will * be removed by the VNIC driver when the VNIC is deleted, * but let's ensure that all our transmit is done before * the client does a mac_client_stop lest it trigger an * assert in the driver. */ - ASSERT(muip->mui_vid == 0); + ASSERT(muip->mui_vid == VLAN_ID_NONE); mac_tx_client_flush(mcip); @@ -3055,6 +3095,7 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) i_mac_perim_exit(mip); return (0); } + /* * Remove the VID from the list of client's VIDs. */ @@ -3081,7 +3122,7 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) * flows. */ flent = mac_client_get_flow(mcip, muip); - ASSERT(flent != NULL); + VERIFY3P(flent, !=, NULL); /* * The first one is disappearing, need to make sure @@ -3109,6 +3150,7 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) FLOW_FINAL_REFRELE(flent); ASSERT(!(mcip->mci_state_flags & MCIS_EXCLUSIVE)); + /* * Enable fastpath if this is a VNIC or a VLAN. */ @@ -3122,7 +3164,8 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) mui_vid = muip->mui_vid; mac_client_datapath_teardown(mch, muip, flent); - if ((mcip->mci_flags & MAC_CLIENT_FLAGS_PRIMARY) && mui_vid == 0) { + if ((mcip->mci_flags & MAC_CLIENT_FLAGS_PRIMARY) && + mui_vid == VLAN_ID_NONE) { mcip->mci_flags &= ~MAC_CLIENT_FLAGS_PRIMARY; } else { i_mac_perim_exit(mip); diff --git a/usr/src/uts/common/io/mac/mac_datapath_setup.c b/usr/src/uts/common/io/mac/mac_datapath_setup.c index 0355b544d5..a3fc2529b9 100644 --- a/usr/src/uts/common/io/mac/mac_datapath_setup.c +++ b/usr/src/uts/common/io/mac/mac_datapath_setup.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2017, Joyent, Inc. + * Copyright 2018 Joyent, Inc. */ #include @@ -1186,7 +1186,7 @@ mac_srs_fanout_list_alloc(mac_soft_ring_set_t *mac_srs) mac_srs->srs_tx_soft_rings = (mac_soft_ring_t **) kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_RINGS_PER_GROUP, KM_SLEEP); - if (mcip->mci_state_flags & MCIS_IS_AGGR) { + if (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) { mac_srs_tx_t *tx = &mac_srs->srs_tx; tx->st_soft_rings = (mac_soft_ring_t **) @@ -1595,13 +1595,13 @@ mac_srs_update_bwlimit(flow_entry_t *flent, mac_resource_props_t *mrp) /* * When the first sub-flow is added to a link, we disable polling on the - * link and also modify the entry point to mac_rx_srs_subflow_process. + * link and also modify the entry point to mac_rx_srs_subflow_process(). * (polling is disabled because with the subflow added, accounting * for polling needs additional logic, it is assumed that when a subflow is * added, we can take some hit as a result of disabling polling rather than * adding more complexity - if this becomes a perf. issue we need to * re-rvaluate this logic). When the last subflow is removed, we turn back - * polling and also reset the entry point to mac_rx_srs_process. + * polling and also reset the entry point to mac_rx_srs_process(). * * In the future if there are multiple SRS, we can simply * take one and give it to the flow rather than disabling polling and @@ -1646,7 +1646,7 @@ mac_client_update_classifier(mac_client_impl_t *mcip, boolean_t enable) * Change the S/W classifier so that we can land in the * correct processing function with correct argument. * If all subflows have been removed we can revert to - * mac_rx_srsprocess, else we need mac_rx_srs_subflow_process. + * mac_rx_srs_process(), else we need mac_rx_srs_subflow_process(). */ mutex_enter(&flent->fe_lock); flent->fe_cb_fn = (flow_fn_t)rx_func; @@ -2185,7 +2185,7 @@ mac_srs_create(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t srs_type, * find nothing plus we have an existing backlog * (sr_poll_pkt_cnt > 0), we stay in polling mode but don't poll * the H/W for packets anymore (let the polling thread go to sleep). - * 5) Once the backlog is relived (packets are processed) we reenable + * 5) Once the backlog is relieved (packets are processed) we reenable * polling (by signalling the poll thread) only when the backlog * dips below sr_poll_thres. * 6) sr_hiwat is used exclusively when we are not polling capable @@ -2256,8 +2256,8 @@ mac_srs_create(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t srs_type, /* * Some drivers require serialization and don't send * packet chains in interrupt context. For such - * drivers, we should always queue in soft ring - * so that we get a chance to switch into a polling + * drivers, we should always queue in the soft ring + * so that we get a chance to switch into polling * mode under backlog. */ ring_info = mac_hwring_getinfo((mac_ring_handle_t)ring); @@ -2364,9 +2364,11 @@ mac_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, } /* - * Set up the RX SRSs. If the S/W SRS is not set, set it up, if there - * is a group associated with this MAC client, set up SRSs for individual - * h/w rings. + * Set up the Rx SRSes. If there is no group associated with the + * client, then only setup SW classification. If the client has + * exlusive (MAC_GROUP_STATE_RESERVED) use of the group, then create an + * SRS for each HW ring. If the client is sharing a group, then make + * sure to teardown the HW SRSes. */ void mac_rx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, @@ -2377,13 +2379,14 @@ mac_rx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, mac_ring_t *ring; uint32_t fanout_type; mac_group_t *rx_group = flent->fe_rx_ring_group; + boolean_t no_unicast; fanout_type = mac_find_fanout(flent, link_type); + no_unicast = (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR) != 0; - /* Create the SRS for S/W classification if none exists */ + /* Create the SRS for SW classification if none exists */ if (flent->fe_rx_srs[0] == NULL) { ASSERT(flent->fe_rx_srs_cnt == 0); - /* Setup the Rx SRS */ mac_srs = mac_srs_create(mcip, flent, fanout_type | link_type, mac_rx_deliver, mcip, NULL, NULL); mutex_enter(&flent->fe_lock); @@ -2395,15 +2398,17 @@ mac_rx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, if (rx_group == NULL) return; + /* - * fanout for default SRS is done when default SRS are created - * above. As each ring is added to the group, we setup the - * SRS and fanout to it. + * If the group is marked RESERVED then setup an SRS and + * fanout for each HW ring. */ switch (rx_group->mrg_state) { case MAC_GROUP_STATE_RESERVED: for (ring = rx_group->mrg_rings; ring != NULL; ring = ring->mr_next) { + uint16_t vid = i_mac_flow_vid(mcip->mci_flent); + switch (ring->mr_state) { case MR_INUSE: case MR_FREE: @@ -2413,20 +2418,23 @@ mac_rx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, (void) mac_start_ring(ring); /* - * Since the group is exclusively ours create - * an SRS for this ring to allow the - * individual SRS to dynamically poll the - * ring. Do this only if the client is not - * a VLAN MAC client, since for VLAN we do - * s/w classification for the VID check, and - * if it has a unicast address. + * If a client requires SW VLAN + * filtering or has no unicast address + * then we don't create any HW ring + * SRSes. */ - if ((mcip->mci_state_flags & - MCIS_NO_UNICAST_ADDR) || - i_mac_flow_vid(mcip->mci_flent) != - VLAN_ID_NONE) { + if ((!MAC_GROUP_HW_VLAN(rx_group) && + vid != VLAN_ID_NONE) || no_unicast) break; - } + + /* + * When a client has exclusive use of + * a group, and that group's traffic + * is fully HW classified, we create + * an SRS for each HW ring in order to + * make use of dynamic polling of said + * HW rings. + */ mac_srs = mac_srs_create(mcip, flent, fanout_type | link_type, mac_rx_deliver, mcip, NULL, ring); @@ -2442,14 +2450,9 @@ mac_rx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, break; case MAC_GROUP_STATE_SHARED: /* - * Set all rings of this group to software classified. - * - * If the group is current RESERVED, the existing mac - * client (the only client on this group) is using - * this group exclusively. In that case we need to - * disable polling on the rings of the group (if it - * was enabled), and free the SRS associated with the - * rings. + * When a group is shared by multiple clients, we must + * use SW classifiction to ensure packets are + * delivered to the correct client. */ mac_rx_switch_grp_to_sw(rx_group); break; @@ -2502,10 +2505,11 @@ mac_tx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, } /* - * Remove all the RX SRSs. If we want to remove only the SRSs associated - * with h/w rings, leave the S/W SRS alone. This is used when we want to - * move the MAC client from one group to another, so we need to teardown - * on the h/w SRSs. + * Teardown all the Rx SRSes. Unless hwonly is set, then only teardown + * the Rx HW SRSes and leave the SW SRS alone. The hwonly flag is set + * when we wish to move a MAC client from one group to another. In + * that case, we need to release the current HW SRSes but keep the SW + * SRS for continued traffic classifiction. */ void mac_rx_srs_group_teardown(flow_entry_t *flent, boolean_t hwonly) @@ -2523,8 +2527,16 @@ mac_rx_srs_group_teardown(flow_entry_t *flent, boolean_t hwonly) flent->fe_rx_srs[i] = NULL; flent->fe_rx_srs_cnt--; } - ASSERT(!hwonly || flent->fe_rx_srs_cnt == 1); - ASSERT(hwonly || flent->fe_rx_srs_cnt == 0); + + /* + * If we are only tearing down the HW SRSes then there must be + * one SRS left for SW classification. Otherwise we are tearing + * down both HW and SW and there should be no SRSes left. + */ + if (hwonly) + VERIFY3S(flent->fe_rx_srs_cnt, ==, 1); + else + VERIFY3S(flent->fe_rx_srs_cnt, ==, 0); } /* @@ -2826,6 +2838,7 @@ mac_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip, * even if this is the only client in the default group, we will * leave group as shared). */ + int mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t link_type) @@ -2836,6 +2849,7 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, mac_group_t *default_rgroup; mac_group_t *default_tgroup; int err; + uint16_t vid; uint8_t *mac_addr; mac_group_state_t next_state; mac_client_impl_t *group_only_mcip; @@ -2848,6 +2862,7 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, boolean_t no_unicast; boolean_t isprimary = flent->fe_type & FLOW_PRIMARY_MAC; mac_client_impl_t *reloc_pmcip = NULL; + boolean_t use_hw; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); @@ -2879,15 +2894,19 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, (mrp->mrp_mask & MRP_TXRINGS_UNSPEC)); /* - * By default we have given the primary all the rings - * i.e. the default group. Let's see if the primary - * needs to be relocated so that the addition of this - * client doesn't impact the primary's performance, - * i.e. if the primary is in the default group and - * we add this client, the primary will lose polling. - * We do this only for NICs supporting dynamic ring - * grouping and only when this is the first client - * after the primary (i.e. nactiveclients is 2) + * All the rings initially belong to the default group + * under dynamic grouping. The primary client uses the + * default group when it is the only client. The + * default group is also used as the destination for + * all multicast and broadcast traffic of all clients. + * Therefore, the primary client loses its ability to + * poll the softrings on addition of a second client. + * To avoid a performance penalty, MAC will move the + * primary client to a dedicated group when it can. + * + * When using static grouping, the primary client + * begins life on a non-default group. There is + * no moving needed upon addition of a second client. */ if (!isprimary && mip->mi_nactiveclients == 2 && (group_only_mcip = mac_primary_client_handle(mip)) != @@ -2895,6 +2914,7 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, reloc_pmcip = mac_check_primary_relocation( group_only_mcip, rxhw); } + /* * Check to see if we can get an exclusive group for * this mac address or if there already exists a @@ -2908,6 +2928,26 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, } else if (rgroup == NULL) { rgroup = default_rgroup; } + + /* + * If we are adding a second client to a + * non-default group then we need to move the + * existing client to the default group and + * add the new client to the default group as + * well. + */ + if (rgroup != default_rgroup && + rgroup->mrg_state == MAC_GROUP_STATE_RESERVED) { + group_only_mcip = MAC_GROUP_ONLY_CLIENT(rgroup); + err = mac_rx_switch_group(group_only_mcip, rgroup, + default_rgroup); + + if (err != 0) + goto setup_failed; + + rgroup = default_rgroup; + } + /* * Check to see if we can get an exclusive group for * this mac client. If no groups are available, use @@ -2939,14 +2979,17 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, rgroup->mrg_cur_count); } } + flent->fe_rx_ring_group = rgroup; /* - * Add the client to the group. This could cause - * either this group to move to the shared state or - * cause the default group to move to the shared state. - * The actions on this group are done here, while the - * actions on the default group are postponed to - * the end of this function. + * Add the client to the group and update the + * group's state. If rgroup != default_group + * then the rgroup should only ever have one + * client and be in the RESERVED state. But no + * matter what, the default_rgroup will enter + * the SHARED state since it has to receive + * all broadcast and multicast traffic. This + * case is handled later in the function. */ mac_group_add_client(rgroup, mcip); next_state = mac_group_next_state(rgroup, @@ -2971,28 +3014,37 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, &group_only_mcip, default_tgroup, B_FALSE); tgroup->mrg_state = next_state; } - /* - * Setup the Rx and Tx SRSes. If we got a pristine group - * exclusively above, mac_srs_group_setup would simply create - * the required SRSes. If we ended up sharing a previously - * reserved group, mac_srs_group_setup would also dismantle the - * SRSes of the previously exclusive group - */ - mac_srs_group_setup(mcip, flent, link_type); /* We are setting up minimal datapath only */ - if (no_unicast) + if (no_unicast) { + mac_srs_group_setup(mcip, flent, link_type); break; - /* Program the S/W Classifer */ + } + + /* Program software classification. */ if ((err = mac_flow_add(mip->mi_flow_tab, flent)) != 0) goto setup_failed; - /* Program the H/W Classifier */ - if ((err = mac_add_macaddr(mip, rgroup, mac_addr, - (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0)) != 0) + /* Program hardware classification. */ + vid = i_mac_flow_vid(flent); + use_hw = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0; + err = mac_add_macaddr_vlan(mip, rgroup, mac_addr, vid, use_hw); + + if (err != 0) goto setup_failed; + mcip->mci_unicast = mac_find_macaddr(mip, mac_addr); - ASSERT(mcip->mci_unicast != NULL); + VERIFY3P(mcip->mci_unicast, !=, NULL); + + /* + * Setup the Rx and Tx SRSes. If the client has a + * reserved group, then mac_srs_group_setup() creates + * the required SRSes for the HW rings. If we have a + * shared group, mac_srs_group_setup() dismantles the + * HW SRSes of the previously exclusive group. + */ + mac_srs_group_setup(mcip, flent, link_type); + /* (Re)init the v6 token & local addr used by link protection */ mac_protect_update_mac_token(mcip); break; @@ -3036,17 +3088,23 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, ASSERT(default_rgroup->mrg_state == MAC_GROUP_STATE_SHARED); } + /* - * If we get an exclusive group for a VLAN MAC client we - * need to take the s/w path to make the additional check for - * the vid. Disable polling and set it to s/w classification. - * Similarly for clients that don't have a unicast address. + * A VLAN MAC client on a reserved group still + * requires SW classification if the MAC doesn't + * provide VLAN HW filtering. + * + * Clients with no unicast address also require SW + * classification. */ if (rgroup->mrg_state == MAC_GROUP_STATE_RESERVED && - (i_mac_flow_vid(flent) != VLAN_ID_NONE || no_unicast)) { + ((!MAC_GROUP_HW_VLAN(rgroup) && vid != VLAN_ID_NONE) || + no_unicast)) { mac_rx_switch_grp_to_sw(rgroup); } + } + mac_set_rings_effective(mcip); return (0); @@ -3072,6 +3130,7 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, boolean_t check_default_group = B_FALSE; mac_group_state_t next_state; mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + uint16_t vid; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); @@ -3084,16 +3143,24 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, case SRST_LINK: /* Stop sending packets */ mac_tx_client_block(mcip); + group = flent->fe_rx_ring_group; + vid = i_mac_flow_vid(flent); - /* Stop the packets coming from the H/W */ + /* + * Stop the packet flow from the hardware by disabling + * any hardware filters assigned to this client. + */ if (mcip->mci_unicast != NULL) { int err; - err = mac_remove_macaddr(mcip->mci_unicast); + + err = mac_remove_macaddr_vlan(mcip->mci_unicast, vid); + if (err != 0) { - cmn_err(CE_WARN, "%s: failed to remove a MAC" - " address because of error 0x%x", + cmn_err(CE_WARN, "%s: failed to remove a MAC HW" + " filters because of error 0x%x", mip->mi_name, err); } + mcip->mci_unicast = NULL; } @@ -3114,17 +3181,17 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, * left who can use it exclusively. Also, if we * were the last client, release the group. */ - group = flent->fe_rx_ring_group; default_group = MAC_DEFAULT_RX_GROUP(mip); if (group != NULL) { mac_group_remove_client(group, mcip); next_state = mac_group_next_state(group, &grp_only_mcip, default_group, B_TRUE); + if (next_state == MAC_GROUP_STATE_RESERVED) { /* * Only one client left on this RX group. */ - ASSERT(grp_only_mcip != NULL); + VERIFY3P(grp_only_mcip, !=, NULL); mac_set_group_state(group, MAC_GROUP_STATE_RESERVED); group_only_flent = grp_only_mcip->mci_flent; @@ -3149,7 +3216,7 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, * to see if the primary client can get * exclusive access to the default group. */ - ASSERT(group != MAC_DEFAULT_RX_GROUP(mip)); + VERIFY3P(group, !=, MAC_DEFAULT_RX_GROUP(mip)); if (mrp->mrp_mask & MRP_RX_RINGS) { MAC_RX_GRP_RELEASED(mip); if (mip->mi_rx_group_type == @@ -3163,7 +3230,8 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, MAC_GROUP_STATE_REGISTERED); check_default_group = B_TRUE; } else { - ASSERT(next_state == MAC_GROUP_STATE_SHARED); + VERIFY3S(next_state, ==, + MAC_GROUP_STATE_SHARED); mac_set_group_state(group, MAC_GROUP_STATE_SHARED); mac_rx_group_unmark(group, MR_CONDEMNED); @@ -3252,12 +3320,12 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, */ if (check_default_group) { default_group = MAC_DEFAULT_RX_GROUP(mip); - ASSERT(default_group->mrg_state == MAC_GROUP_STATE_SHARED); + VERIFY3S(default_group->mrg_state, ==, MAC_GROUP_STATE_SHARED); next_state = mac_group_next_state(default_group, &grp_only_mcip, default_group, B_TRUE); if (next_state == MAC_GROUP_STATE_RESERVED) { - ASSERT(grp_only_mcip != NULL && - mip->mi_nactiveclients == 1); + VERIFY3P(grp_only_mcip, !=, NULL); + VERIFY3U(mip->mi_nactiveclients, ==, 1); mac_set_group_state(default_group, MAC_GROUP_STATE_RESERVED); mac_rx_srs_group_setup(grp_only_mcip, @@ -3781,7 +3849,7 @@ mac_tx_srs_del_ring(mac_soft_ring_set_t *mac_srs, mac_ring_t *tx_ring) * is also stored in st_soft_rings[] array. That entry should * be removed. */ - if (mcip->mci_state_flags & MCIS_IS_AGGR) { + if (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) { mac_srs_tx_t *tx = &mac_srs->srs_tx; ASSERT(tx->st_soft_rings[tx_ring->mr_index] == remove_sring); @@ -3810,7 +3878,7 @@ mac_tx_srs_setup(mac_client_impl_t *mcip, flow_entry_t *flent) boolean_t is_aggr; uint_t ring_info = 0; - is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR) != 0; + is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) != 0; grp = flent->fe_tx_ring_group; if (grp == NULL) { ring = (mac_ring_t *)mip->mi_default_tx_ring; diff --git a/usr/src/uts/common/io/mac/mac_provider.c b/usr/src/uts/common/io/mac/mac_provider.c index 07201afdec..26f501668e 100644 --- a/usr/src/uts/common/io/mac/mac_provider.c +++ b/usr/src/uts/common/io/mac/mac_provider.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2018 Joyent, Inc. * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved. */ @@ -56,6 +57,7 @@ #include #include #include +#include /* * MAC Provider Interface. @@ -695,7 +697,7 @@ mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain) { mac_impl_t *mip = (mac_impl_t *)mh; mac_ring_t *mr = (mac_ring_t *)mrh; - mac_soft_ring_set_t *mac_srs; + mac_soft_ring_set_t *mac_srs; mblk_t *bp = mp_chain; boolean_t hw_classified = B_FALSE; diff --git a/usr/src/uts/common/io/mac/mac_sched.c b/usr/src/uts/common/io/mac/mac_sched.c index d046930873..cbd5ce1e19 100644 --- a/usr/src/uts/common/io/mac/mac_sched.c +++ b/usr/src/uts/common/io/mac/mac_sched.c @@ -21,7 +21,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2017 Joyent, Inc. + * Copyright 2018 Joyent, Inc. * Copyright 2013 Nexenta Systems, Inc. All rights reserved. */ @@ -300,9 +300,8 @@ * * Otherwise, all fanout is performed by software. MAC divides incoming frames * into one of three buckets -- IPv4 TCP traffic, IPv4 UDP traffic, and - * everything else. Note, VLAN tagged traffic is considered other, regardless of - * the interior EtherType. Regardless of the type of fanout, these three - * categories or buckets are always used. + * everything else. Regardless of the type of fanout, these three categories + * or buckets are always used. * * The difference between protocol level fanout and full software ring protocol * fanout is the number of software rings that end up getting created. The @@ -1475,16 +1474,15 @@ enum pkt_type { #define PORTS_SIZE 4 /* - * mac_rx_srs_proto_fanout - * - * This routine delivers packets destined to an SRS into one of the + * This routine delivers packets destined for an SRS into one of the * protocol soft rings. * - * Given a chain of packets we need to split it up into multiple sub chains - * destined into TCP, UDP or OTH soft ring. Instead of entering - * the soft ring one packet at a time, we want to enter it in the form of a - * chain otherwise we get this start/stop behaviour where the worker thread - * goes to sleep and then next packets comes in forcing it to wake up etc. + * Given a chain of packets we need to split it up into multiple sub + * chains: TCP, UDP or OTH soft ring. Instead of entering the soft + * ring one packet at a time, we want to enter it in the form of a + * chain otherwise we get this start/stop behaviour where the worker + * thread goes to sleep and then next packet comes in forcing it to + * wake up. */ static void mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head) @@ -1523,9 +1521,9 @@ mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head) mac_srs->srs_ring->mr_classify_type == MAC_HW_CLASSIFIER; /* - * Special clients (eg. VLAN, non ether, etc) need DLS - * processing in the Rx path. SRST_DLS_BYPASS will be clear for - * such SRSs. Another way of disabling bypass is to set the + * Some clients, such as non-ethernet, need DLS processing in + * the Rx path. Such clients clear the SRST_DLS_BYPASS flag. + * DLS bypass may also be disabled via the * MCIS_RX_BYPASS_DISABLE flag. */ dls_bypass = ((mac_srs->srs_type & SRST_DLS_BYPASS) != 0) && @@ -1537,10 +1535,11 @@ mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head) bzero(sz, MAX_SR_TYPES * sizeof (size_t)); /* - * We got a chain from SRS that we need to send to the soft rings. - * Since squeues for TCP & IPv4 sap poll their soft rings (for - * performance reasons), we need to separate out v4_tcp, v4_udp - * and the rest goes in other. + * We have a chain from SRS that we need to split across the + * soft rings. The squeues for the TCP and IPv4 SAPs use their + * own soft rings to allow polling from the squeue. The rest of + * the packets are delivered on the OTH soft ring which cannot + * be polled. */ while (head != NULL) { mp = head; @@ -1568,9 +1567,14 @@ mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head) evhp = (struct ether_vlan_header *)mp->b_rptr; sap = ntohs(evhp->ether_type); hdrsize = sizeof (struct ether_vlan_header); + /* - * Check if the VID of the packet, if any, - * belongs to this client. + * Check if the VID of the packet, if + * any, belongs to this client. + * Technically, if this packet came up + * via a HW classified ring then we + * don't need to perform this check. + * Perhaps a future optimization. */ if (!mac_client_check_flow_vid(mcip, VLAN_ID(ntohs(evhp->ether_tci)))) { @@ -1635,7 +1639,6 @@ mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head) * performance and may bypass DLS. All other cases go through * the 'OTH' type path without DLS bypass. */ - ipha = (ipha_t *)(mp->b_rptr + hdrsize); if ((type != OTH) && MBLK_RX_FANOUT_SLOWPATH(mp, ipha)) type = OTH; @@ -1647,11 +1650,13 @@ mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head) } ASSERT(type == UNDEF); + /* - * We look for at least 4 bytes past the IP header to get - * the port information. If we get an IP fragment, we don't - * have the port information, and we use just the protocol - * information. + * Determine the type from the IP protocol value. If + * classified as TCP or UDP, then update the read + * pointer to the beginning of the IP header. + * Otherwise leave the message as is for further + * processing by DLS. */ switch (ipha->ipha_protocol) { case IPPROTO_TCP: @@ -1695,11 +1700,10 @@ mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head) int fanout_unaligned = 0; /* - * mac_rx_srs_long_fanout - * - * The fanout routine for VLANs, and for anything else that isn't performing - * explicit dls bypass. Returns -1 on an error (drop the packet due to a - * malformed packet), 0 on success, with values written in *indx and *type. + * The fanout routine for any clients with DLS bypass disabled or for + * traffic classified as "other". Returns -1 on an error (drop the + * packet due to a malformed packet), 0 on success, with values + * written in *indx and *type. */ static int mac_rx_srs_long_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *mp, @@ -1865,16 +1869,15 @@ src_dst_based_fanout: } /* - * mac_rx_srs_fanout - * - * This routine delivers packets destined to an SRS into a soft ring member + * This routine delivers packets destined for an SRS into a soft ring member * of the set. * - * Given a chain of packets we need to split it up into multiple sub chains - * destined for one of the TCP, UDP or OTH soft rings. Instead of entering - * the soft ring one packet at a time, we want to enter it in the form of a - * chain otherwise we get this start/stop behaviour where the worker thread - * goes to sleep and then next packets comes in forcing it to wake up etc. + * Given a chain of packets we need to split it up into multiple sub + * chains: TCP, UDP or OTH soft ring. Instead of entering the soft + * ring one packet at a time, we want to enter it in the form of a + * chain otherwise we get this start/stop behaviour where the worker + * thread goes to sleep and then next packet comes in forcing it to + * wake up. * * Note: * Since we know what is the maximum fanout possible, we create a 2D array @@ -1935,10 +1938,11 @@ mac_rx_srs_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head) mac_srs->srs_ring->mr_classify_type == MAC_HW_CLASSIFIER; /* - * Special clients (eg. VLAN, non ether, etc) need DLS - * processing in the Rx path. SRST_DLS_BYPASS will be clear for - * such SRSs. Another way of disabling bypass is to set the - * MCIS_RX_BYPASS_DISABLE flag. + * Some clients, such as non Ethernet, need DLS processing in + * the Rx path. Such clients clear the SRST_DLS_BYPASS flag. + * DLS bypass may also be disabled via the + * MCIS_RX_BYPASS_DISABLE flag, but this is only consumed by + * sun4v vsw currently. */ dls_bypass = ((mac_srs->srs_type & SRST_DLS_BYPASS) != 0) && ((mcip->mci_state_flags & MCIS_RX_BYPASS_DISABLE) == 0); @@ -1960,7 +1964,7 @@ mac_rx_srs_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head) /* * We got a chain from SRS that we need to send to the soft rings. - * Since squeues for TCP & IPv4 sap poll their soft rings (for + * Since squeues for TCP & IPv4 SAP poll their soft rings (for * performance reasons), we need to separate out v4_tcp, v4_udp * and the rest goes in other. */ @@ -1990,9 +1994,14 @@ mac_rx_srs_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head) evhp = (struct ether_vlan_header *)mp->b_rptr; sap = ntohs(evhp->ether_type); hdrsize = sizeof (struct ether_vlan_header); + /* - * Check if the VID of the packet, if any, - * belongs to this client. + * Check if the VID of the packet, if + * any, belongs to this client. + * Technically, if this packet came up + * via a HW classified ring then we + * don't need to perform this check. + * Perhaps a future optimization. */ if (!mac_client_check_flow_vid(mcip, VLAN_ID(ntohs(evhp->ether_tci)))) { @@ -2032,7 +2041,6 @@ mac_rx_srs_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head) continue; } - /* * If we are using the default Rx ring where H/W or S/W * classification has not happened, we need to verify if @@ -2621,7 +2629,6 @@ again: mac_srs->srs_state |= (SRS_PROC|proc_type); - /* * mcip is NULL for broadcast and multicast flows. The promisc * callbacks for broadcast and multicast packets are delivered from @@ -2641,10 +2648,8 @@ again: } /* - * Check if SRS itself is doing the processing - * This direct path does not apply when subflows are present. In this - * case, packets need to be dispatched to a soft ring according to the - * flow's bandwidth and other resources contraints. + * Check if SRS itself is doing the processing. This direct + * path applies only when subflows are present. */ if (mac_srs->srs_type & SRST_NO_SOFT_RINGS) { mac_direct_rx_t proc; @@ -4656,6 +4661,9 @@ mac_rx_deliver(void *arg1, mac_resource_handle_t mrh, mblk_t *mp_chain, * the packet to the promiscuous listeners of the * client, since they expect to see the whole * frame including the VLAN headers. + * + * The MCIS_STRIP_DISABLE is only issued when sun4v + * vsw is in play. */ mp_chain = mac_strip_vlan_tag_chain(mp_chain); } @@ -4664,13 +4672,11 @@ mac_rx_deliver(void *arg1, mac_resource_handle_t mrh, mblk_t *mp_chain, } /* - * mac_rx_soft_ring_process - * - * process a chain for a given soft ring. The number of packets queued - * in the SRS and its associated soft rings (including this one) is - * very small (tracked by srs_poll_pkt_cnt), then allow the entering - * thread (interrupt or poll thread) to do inline processing. This - * helps keep the latency down under low load. + * Process a chain for a given soft ring. If the number of packets + * queued in the SRS and its associated soft rings (including this + * one) is very small (tracked by srs_poll_pkt_cnt) then allow the + * entering thread (interrupt or poll thread) to process the chain + * inline. This is meant to reduce latency under low load. * * The proc and arg for each mblk is already stored in the mblk in * appropriate places. @@ -4729,13 +4735,13 @@ mac_rx_soft_ring_process(mac_client_impl_t *mcip, mac_soft_ring_t *ringp, ASSERT(MUTEX_NOT_HELD(&ringp->s_ring_lock)); /* - * If we have a soft ring set which is doing - * bandwidth control, we need to decrement - * srs_size and count so it the SRS can have a - * accurate idea of what is the real data - * queued between SRS and its soft rings. We - * decrement the counters only when the packet - * gets processed by both SRS and the soft ring. + * If we have an SRS performing bandwidth + * control then we need to decrement the size + * and count so the SRS has an accurate count + * of the data queued between the SRS and its + * soft rings. We decrement the counters only + * when the packet is processed by both the + * SRS and the soft ring. */ mutex_enter(&mac_srs->srs_lock); MAC_UPDATE_SRS_COUNT_LOCKED(mac_srs, cnt); @@ -4751,8 +4757,8 @@ mac_rx_soft_ring_process(mac_client_impl_t *mcip, mac_soft_ring_t *ringp, if ((ringp->s_ring_first == NULL) || (ringp->s_ring_state & S_RING_BLANK)) { /* - * We processed inline our packet and - * nothing new has arrived or our + * We processed a single packet inline + * and nothing new has arrived or our * receiver doesn't want to receive * any packets. We are done. */ diff --git a/usr/src/uts/common/io/mac/mac_soft_ring.c b/usr/src/uts/common/io/mac/mac_soft_ring.c index d24c0207df..f4d2a5ee81 100644 --- a/usr/src/uts/common/io/mac/mac_soft_ring.c +++ b/usr/src/uts/common/io/mac/mac_soft_ring.c @@ -21,7 +21,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2017 Joyent, Inc. + * Copyright 2018 Joyent, Inc. */ /* @@ -207,7 +207,7 @@ mac_soft_ring_create(int id, clock_t wait, uint16_t type, ringp->s_ring_tx_hiwat = (mac_tx_soft_ring_hiwat > mac_tx_soft_ring_max_q_cnt) ? mac_tx_soft_ring_max_q_cnt : mac_tx_soft_ring_hiwat; - if (mcip->mci_state_flags & MCIS_IS_AGGR) { + if (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) { mac_srs_tx_t *tx = &mac_srs->srs_tx; ASSERT(tx->st_soft_rings[ @@ -339,15 +339,14 @@ mac_soft_ring_fire(void *arg) } /* - * mac_rx_soft_ring_drain + * Drain the soft ring pointed to by ringp. * - * Called when worker thread model (ST_RING_WORKER_ONLY) of processing - * incoming packets is used. s_ring_first contain the queued packets. - * s_ring_rx_func contains the upper level (client) routine where the - * packets are destined and s_ring_rx_arg1/s_ring_rx_arg2 are the - * cookie meant for the client. + * o s_ring_first: pointer to the queued packet chain. + * + * o s_ring_rx_func: pointer to to the client's Rx routine. + * + * o s_ring_rx_{arg1,arg2}: opaque values specific to the client. */ -/* ARGSUSED */ static void mac_rx_soft_ring_drain(mac_soft_ring_t *ringp) { @@ -392,13 +391,12 @@ mac_rx_soft_ring_drain(mac_soft_ring_t *ringp) (*proc)(arg1, arg2, mp, NULL); /* - * If we have a soft ring set which is doing - * bandwidth control, we need to decrement its - * srs_size so it can have a accurate idea of - * what is the real data queued between SRS and - * its soft rings. We decrement the size for a - * packet only when it gets processed by both - * SRS and the soft ring. + * If we have an SRS performing bandwidth control, then + * we need to decrement the size and count so the SRS + * has an accurate measure of the data queued between + * the SRS and its soft rings. We decrement the + * counters only when the packet is processed by both + * the SRS and the soft ring. */ mutex_enter(&mac_srs->srs_lock); MAC_UPDATE_SRS_COUNT_LOCKED(mac_srs, cnt); @@ -414,12 +412,10 @@ mac_rx_soft_ring_drain(mac_soft_ring_t *ringp) } /* - * mac_soft_ring_worker - * * The soft ring worker routine to process any queued packets. In - * normal case, the worker thread is bound to a CPU. It the soft - * ring is dealing with TCP packets, then the worker thread will - * be bound to the same CPU as the TCP squeue. + * normal case, the worker thread is bound to a CPU. If the soft ring + * handles TCP packets then the worker thread is bound to the same CPU + * as the TCP squeue. */ static void mac_soft_ring_worker(mac_soft_ring_t *ringp) @@ -605,7 +601,7 @@ mac_soft_ring_dls_bypass(void *arg, mac_direct_rx_t rx_func, void *rx_arg1) mac_soft_ring_t *softring = arg; mac_soft_ring_set_t *srs; - ASSERT(rx_func != NULL); + VERIFY3P(rx_func, !=, NULL); mutex_enter(&softring->s_ring_lock); softring->s_ring_rx_func = rx_func; diff --git a/usr/src/uts/common/io/vnic/vnic_dev.c b/usr/src/uts/common/io/vnic/vnic_dev.c index 3cb7e7660a..da52d7bb37 100644 --- a/usr/src/uts/common/io/vnic/vnic_dev.c +++ b/usr/src/uts/common/io/vnic/vnic_dev.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2015 Joyent, Inc. + * Copyright 2018 Joyent, Inc. * Copyright 2016 OmniTI Computer Consulting, Inc. All rights reserved. */ @@ -354,7 +354,7 @@ vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, rw_enter(&vnic_lock, RW_WRITER); - /* does a VNIC with the same id already exist? */ + /* Does a VNIC with the same id already exist? */ err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), (mod_hash_val_t *)&vnic); if (err == 0) { @@ -1037,7 +1037,7 @@ static int vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, uint_t pr_valsize, const void *pr_val) { - int err = 0; + int err = 0; vnic_t *vn = m_driver; switch (pr_num) { @@ -1135,7 +1135,7 @@ vnic_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, uint_t pr_valsize, void *pr_val) { vnic_t *vn = arg; - int ret = 0; + int ret = 0; boolean_t out; switch (pr_num) { diff --git a/usr/src/uts/common/mapfiles/ddi.mapfile b/usr/src/uts/common/mapfiles/ddi.mapfile index 75e95a9452..9b6a9ab677 100644 --- a/usr/src/uts/common/mapfiles/ddi.mapfile +++ b/usr/src/uts/common/mapfiles/ddi.mapfile @@ -165,6 +165,7 @@ SYMBOL_SCOPE { list_insert_tail { FLAGS = EXTERN }; list_next { FLAGS = EXTERN }; list_remove { FLAGS = EXTERN }; + list_remove_head { FLAGS = EXTERN }; memcpy { FLAGS = EXTERN }; memset { FLAGS = EXTERN }; miocack { FLAGS = EXTERN }; diff --git a/usr/src/uts/common/sys/aggr_impl.h b/usr/src/uts/common/sys/aggr_impl.h index 547c9cc241..415e176ef3 100644 --- a/usr/src/uts/common/sys/aggr_impl.h +++ b/usr/src/uts/common/sys/aggr_impl.h @@ -21,6 +21,8 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2012 OmniTI Computer Consulting, Inc All rights reserved. + * Copyright 2018 Joyent, Inc. */ #ifndef _SYS_AGGR_IMPL_H @@ -54,6 +56,15 @@ extern "C" { */ #define MAC_PSEUDO_RING_INUSE 0x01 +/* + * VLAN filters placed on the Rx pseudo group. + */ +typedef struct aggr_vlan { + list_node_t av_link; + uint16_t av_vid; /* VLAN ID */ + uint_t av_refs; /* num aggr clients using this VID */ +} aggr_vlan_t; + typedef struct aggr_unicst_addr_s { uint8_t aua_addr[ETHERADDRL]; struct aggr_unicst_addr_s *aua_next; @@ -73,6 +84,8 @@ typedef struct aggr_pseudo_rx_group_s { aggr_unicst_addr_t *arg_macaddr; aggr_pseudo_rx_ring_t arg_rings[MAX_RINGS_PER_GROUP]; uint_t arg_ring_cnt; + uint_t arg_untagged; /* num clients untagged */ + list_t arg_vlans; /* VLANs on this group */ } aggr_pseudo_rx_group_t; typedef struct aggr_pseudo_tx_ring_s { @@ -186,11 +199,18 @@ typedef struct aggr_grp_s { uint_t lg_tx_ports_size; /* size of lg_tx_ports */ uint32_t lg_tx_policy; /* outbound policy */ uint8_t lg_mac_tx_policy; - uint64_t lg_ifspeed; link_state_t lg_link_state; + + + /* + * The lg_stat_lock must be held when accessing these fields. + */ + kmutex_t lg_stat_lock; + uint64_t lg_ifspeed; link_duplex_t lg_link_duplex; uint64_t lg_stat[MAC_NSTAT]; uint64_t lg_ether_stat[ETHER_NSTAT]; + aggr_lacp_mode_t lg_lacp_mode; /* off, active, or passive */ Agg_t aggr; /* 802.3ad data */ uint32_t lg_hcksum_txflags; @@ -308,6 +328,8 @@ extern boolean_t aggr_port_notify_link(aggr_grp_t *, aggr_port_t *); extern void aggr_port_init_callbacks(aggr_port_t *); extern void aggr_recv_cb(void *, mac_resource_handle_t, mblk_t *, boolean_t); +extern void aggr_recv_promisc_cb(void *, mac_resource_handle_t, mblk_t *, + boolean_t); extern void aggr_tx_ring_update(void *, uintptr_t); extern void aggr_tx_notify_thread(void *); @@ -338,6 +360,9 @@ extern void aggr_grp_port_wait(aggr_grp_t *); extern int aggr_port_addmac(aggr_port_t *, const uint8_t *); extern void aggr_port_remmac(aggr_port_t *, const uint8_t *); +extern int aggr_port_addvlan(aggr_port_t *, uint16_t); +extern int aggr_port_remvlan(aggr_port_t *, uint16_t); + extern mblk_t *aggr_ring_tx(void *, mblk_t *); extern mblk_t *aggr_find_tx_ring(void *, mblk_t *, uintptr_t, mac_ring_handle_t *); diff --git a/usr/src/uts/common/sys/mac_client.h b/usr/src/uts/common/sys/mac_client.h index 0fc4939503..74f4cbb310 100644 --- a/usr/src/uts/common/sys/mac_client.h +++ b/usr/src/uts/common/sys/mac_client.h @@ -22,7 +22,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2013 Joyent, Inc. All rights reserved. + * Copyright 2018 Joyent, Inc. All rights reserved. */ /* @@ -88,6 +88,7 @@ typedef enum { } mac_client_promisc_type_t; /* flags passed to mac_unicast_add() */ + #define MAC_UNICAST_NODUPCHECK 0x0001 #define MAC_UNICAST_PRIMARY 0x0002 #define MAC_UNICAST_HW 0x0004 diff --git a/usr/src/uts/common/sys/mac_client_impl.h b/usr/src/uts/common/sys/mac_client_impl.h index 9b3b4fe369..d5c66684d0 100644 --- a/usr/src/uts/common/sys/mac_client_impl.h +++ b/usr/src/uts/common/sys/mac_client_impl.h @@ -24,7 +24,7 @@ * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ /* - * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright 2018 Joyent, Inc. */ #ifndef _SYS_MAC_CLIENT_IMPL_H @@ -57,7 +57,7 @@ typedef struct mac_unicast_impl_s { /* Protected by */ uint16_t mui_vid; /* SL */ } mac_unicast_impl_t; -#define MAC_CLIENT_FLAGS_PRIMARY 0X0001 +#define MAC_CLIENT_FLAGS_PRIMARY 0x0001 #define MAC_CLIENT_FLAGS_VNIC_PRIMARY 0x0002 #define MAC_CLIENT_FLAGS_MULTI_PRIMARY 0x0004 #define MAC_CLIENT_FLAGS_PASSIVE_PRIMARY 0x0008 @@ -131,12 +131,17 @@ struct mac_client_impl_s { /* Protected by */ uint32_t mci_flags; /* SL */ krwlock_t mci_rw_lock; mac_unicast_impl_t *mci_unicast_list; /* mci_rw_lock */ + /* * The mac_client_impl_t may be shared by multiple clients, i.e * multiple VLANs sharing the same MAC client. In this case the - * address/vid tubles differ and are each associated with their + * address/vid tuples differ and are each associated with their * own flow entry, but the rest underlying components SRS, etc, * are common. + * + * This is only needed to support sun4v vsw. There are several + * places in MAC we could simplify the code if we removed + * sun4v support. */ flow_entry_t *mci_flent_list; /* mci_rw_lock */ uint_t mci_nflents; /* mci_rw_lock */ @@ -224,7 +229,7 @@ extern int mac_tx_percpu_cnt; &(mcip)->mci_flent->fe_resource_props) #define MCIP_EFFECTIVE_PROPS(mcip) \ - (mcip->mci_flent == NULL ? NULL : \ + (mcip->mci_flent == NULL ? NULL : \ &(mcip)->mci_flent->fe_effective_props) #define MCIP_RESOURCE_PROPS_MASK(mcip) \ @@ -313,6 +318,74 @@ extern int mac_tx_percpu_cnt; (((mcip)->mci_state_flags & MCIS_TAG_DISABLE) == 0 && \ (mcip)->mci_nvids == 1) \ +/* + * MAC Client Implementation State (mci_state_flags) + * + * MCIS_IS_VNIC + * + * The client is a VNIC. + * + * MCIS_EXCLUSIVE + * + * The client has exclusive control over the MAC, such that it is + * the sole client of the MAC. + * + * MCIS_TAG_DISABLE + * + * MAC will not add VLAN tags to outgoing traffic. If this flag + * is set it is up to the client to add the correct VLAN tag. + * + * MCIS_STRIP_DISABLE + * + * MAC will not strip the VLAN tags on incoming traffic before + * passing it to mci_rx_fn. This only applies to non-bypass + * traffic. + * + * MCIS_IS_AGGR_PORT + * + * The client represents a port on an aggr. + * + * MCIS_CLIENT_POLL_CAPABLE + * + * The client is capable of polling the Rx TCP/UDP softrings. + * + * MCIS_DESC_LOGGED + * + * This flag is set when the client's link info has been logged + * by the mac_log_linkinfo() timer. This ensures that the + * client's link info is only logged once. + * + * MCIS_SHARE_BOUND + * + * This client has an HIO share bound to it. + * + * MCIS_DISABLE_TX_VID_CHECK + * + * MAC will not check the VID of the client's Tx traffic. + * + * MCIS_USE_DATALINK_NAME + * + * The client is using the same name as its underlying MAC. This + * happens when dlmgmtd is unreachable during client creation. + * + * MCIS_UNICAST_HW + * + * The client requires MAC address hardware classification. This + * is only used by sun4v vsw. + * + * MCIS_IS_AGGR_CLIENT + * + * The client sits atop an aggr. + * + * MCIS_RX_BYPASS_DISABLE + * + * Do not allow the client to enable DLS bypass. + * + * MCIS_NO_UNICAST_ADDR + * + * This client has no MAC unicast addresss associated with it. + * + */ /* MCI state flags */ #define MCIS_IS_VNIC 0x0001 #define MCIS_EXCLUSIVE 0x0002 @@ -325,7 +398,7 @@ extern int mac_tx_percpu_cnt; #define MCIS_DISABLE_TX_VID_CHECK 0x0100 #define MCIS_USE_DATALINK_NAME 0x0200 #define MCIS_UNICAST_HW 0x0400 -#define MCIS_IS_AGGR 0x0800 +#define MCIS_IS_AGGR_CLIENT 0x0800 #define MCIS_RX_BYPASS_DISABLE 0x1000 #define MCIS_NO_UNICAST_ADDR 0x2000 diff --git a/usr/src/uts/common/sys/mac_client_priv.h b/usr/src/uts/common/sys/mac_client_priv.h index 6b409513a6..77475b339e 100644 --- a/usr/src/uts/common/sys/mac_client_priv.h +++ b/usr/src/uts/common/sys/mac_client_priv.h @@ -22,7 +22,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2013 Joyent, Inc. All rights reserved. + * Copyright 2018 Joyent, Inc. */ /* @@ -144,6 +144,10 @@ extern void mac_hwring_set_default(mac_handle_t, mac_ring_handle_t); extern int mac_hwgroup_addmac(mac_group_handle_t, const uint8_t *); extern int mac_hwgroup_remmac(mac_group_handle_t, const uint8_t *); +extern int mac_hwgroup_addvlan(mac_group_handle_t, uint16_t); +extern int mac_hwgroup_remvlan(mac_group_handle_t, uint16_t); + +extern boolean_t mac_has_hw_vlan(mac_handle_t); extern void mac_set_upper_mac(mac_client_handle_t, mac_handle_t, mac_resource_props_t *); diff --git a/usr/src/uts/common/sys/mac_impl.h b/usr/src/uts/common/sys/mac_impl.h index 774c4fad9a..eebbde37de 100644 --- a/usr/src/uts/common/sys/mac_impl.h +++ b/usr/src/uts/common/sys/mac_impl.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2017, Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. */ #ifndef _SYS_MAC_IMPL_H @@ -244,7 +244,7 @@ struct mac_ring_s { (mr)->mr_refcnt++; \ } -#define MR_REFRELE(mr) { \ +#define MR_REFRELE(mr) { \ mutex_enter(&(mr)->mr_lock); \ ASSERT((mr)->mr_refcnt != 0); \ (mr)->mr_refcnt--; \ @@ -255,8 +255,8 @@ struct mac_ring_s { } /* - * Per mac client flow information associated with a RX group. - * The entire structure is SL protected. + * Used to attach MAC clients to an Rx group. The members are SL + * protected. */ typedef struct mac_grp_client { struct mac_grp_client *mgc_next; @@ -270,15 +270,20 @@ typedef struct mac_grp_client { ((g)->mrg_clients->mgc_next == NULL)) ? \ (g)->mrg_clients->mgc_client : NULL) +#define MAC_GROUP_HW_VLAN(g) \ + (((g) != NULL) && \ + ((g)->mrg_info.mgi_addvlan != NULL) && \ + ((g)->mrg_info.mgi_remvlan != NULL)) + /* * Common ring group data structure for ring control and management. - * The entire structure is SL protected + * The entire structure is SL protected. */ struct mac_group_s { int mrg_index; /* index in the list */ mac_ring_type_t mrg_type; /* ring type */ mac_group_state_t mrg_state; /* state of the group */ - mac_group_t *mrg_next; /* next ring in the chain */ + mac_group_t *mrg_next; /* next group in the chain */ mac_handle_t mrg_mh; /* reference to MAC */ mac_ring_t *mrg_rings; /* grouped rings */ uint_t mrg_cur_count; /* actual size of group */ @@ -300,7 +305,7 @@ struct mac_group_s { mac_ring_handle_t mrh = rh; \ mac_impl_t *mimpl = (mac_impl_t *)mhp; \ /* \ - * Send packets through a selected tx ring, or through the \ + * Send packets through a selected tx ring, or through the \ * default handler if there is no selected ring. \ */ \ if (mrh == NULL) \ @@ -322,9 +327,9 @@ struct mac_group_s { #define MAC_TX(mip, rh, mp, src_mcip) { \ mac_ring_handle_t rhandle = (rh); \ /* \ - * If there is a bound Hybrid I/O share, send packets through \ + * If there is a bound Hybrid I/O share, send packets through \ * the default tx ring. (When there's a bound Hybrid I/O share, \ - * the tx rings of this client are mapped in the guest domain \ + * the tx rings of this client are mapped in the guest domain \ * and not accessible from here.) \ */ \ _NOTE(CONSTANTCONDITION) \ @@ -333,7 +338,7 @@ struct mac_group_s { if (mip->mi_promisc_list != NULL) \ mac_promisc_dispatch(mip, mp, src_mcip); \ /* \ - * Grab the proper transmit pointer and handle. Special \ + * Grab the proper transmit pointer and handle. Special \ * optimization: we can test mi_bridge_link itself atomically, \ * and if that indicates no bridge send packets through tx ring.\ */ \ @@ -360,17 +365,23 @@ typedef struct mac_mcast_addrs_s { } mac_mcast_addrs_t; typedef enum { - MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED = 1, /* hardware steering */ + MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED = 1, /* HW classification */ MAC_ADDRESS_TYPE_UNICAST_PROMISC /* promiscuous mode */ } mac_address_type_t; +typedef struct mac_vlan_s { + struct mac_vlan_s *mv_next; + uint16_t mv_vid; +} mac_vlan_t; + typedef struct mac_address_s { mac_address_type_t ma_type; /* address type */ - int ma_nusers; /* number of users */ - /* of that address */ + int ma_nusers; /* num users of addr */ struct mac_address_s *ma_next; /* next address */ uint8_t ma_addr[MAXMACADDRLEN]; /* address value */ size_t ma_len; /* address length */ + mac_vlan_t *ma_vlans; /* VLANs on this addr */ + boolean_t ma_untagged; /* accept untagged? */ mac_group_t *ma_group; /* asscociated group */ mac_impl_t *ma_mip; /* MAC handle */ } mac_address_t; @@ -487,7 +498,7 @@ struct mac_impl_s { mac_capab_led_t mi_led; /* - * MAC address list. SL protected. + * MAC address and VLAN lists. SL protected. */ mac_address_t *mi_addresses; @@ -759,6 +770,8 @@ extern void mac_client_bcast_refresh(mac_client_impl_t *, mac_multicst_t, */ extern int mac_group_addmac(mac_group_t *, const uint8_t *); extern int mac_group_remmac(mac_group_t *, const uint8_t *); +extern int mac_group_addvlan(mac_group_t *, uint16_t); +extern int mac_group_remvlan(mac_group_t *, uint16_t); extern int mac_rx_group_add_flow(mac_client_impl_t *, flow_entry_t *, mac_group_t *); extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *); @@ -779,6 +792,7 @@ extern void mac_rx_switch_grp_to_sw(mac_group_t *); * MAC address functions are used internally by MAC layer. */ extern mac_address_t *mac_find_macaddr(mac_impl_t *, uint8_t *); +extern mac_address_t *mac_find_macaddr_vlan(mac_impl_t *, uint8_t *, uint16_t); extern boolean_t mac_check_macaddr_shared(mac_address_t *); extern int mac_update_macaddr(mac_address_t *, uint8_t *); extern void mac_freshen_macaddr(mac_address_t *, uint8_t *); @@ -863,8 +877,9 @@ extern int mac_start_group(mac_group_t *); extern void mac_stop_group(mac_group_t *); extern int mac_start_ring(mac_ring_t *); extern void mac_stop_ring(mac_ring_t *); -extern int mac_add_macaddr(mac_impl_t *, mac_group_t *, uint8_t *, boolean_t); -extern int mac_remove_macaddr(mac_address_t *); +extern int mac_add_macaddr_vlan(mac_impl_t *, mac_group_t *, uint8_t *, + uint16_t, boolean_t); +extern int mac_remove_macaddr_vlan(mac_address_t *, uint16_t); extern void mac_set_group_state(mac_group_t *, mac_group_state_t); extern void mac_group_add_client(mac_group_t *, mac_client_impl_t *); diff --git a/usr/src/uts/common/sys/mac_provider.h b/usr/src/uts/common/sys/mac_provider.h index 4c91c03967..301bc9a058 100644 --- a/usr/src/uts/common/sys/mac_provider.h +++ b/usr/src/uts/common/sys/mac_provider.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2017, Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. */ #ifndef _SYS_MAC_PROVIDER_H @@ -280,6 +280,28 @@ typedef enum { MAC_RING_TYPE_TX /* Transmit ring */ } mac_ring_type_t; +/* + * The value VLAN_ID_NONE (VID 0) means a client does not have + * membership to any VLAN. However, this statement is true for both + * untagged packets and priority tagged packets leading to confusion + * over what semantic is intended. To the provider, VID 0 is a valid + * VID when priority tagging is in play. To MAC and everything above + * VLAN_ID_NONE almost universally implies untagged traffic. Thus, we + * convert VLAN_ID_NONE to a sentinel value (MAC_VLAN_UNTAGGED) at the + * border between MAC and MAC provider. This informs the provider that + * the client is interested in untagged traffic and the provider + * should set any relevant bits to receive such traffic. + * + * Currently, the API between MAC and the provider passes the VID as a + * unit16_t. In the future this could actually be the entire TCI mask + * (PCP, DEI, and VID). This current scheme is safe in that potential + * future world as well; as 0xFFFF is not a valid TCI (the 0xFFF VID + * is reserved and never transmitted across networks). + */ +#define MAC_VLAN_UNTAGGED UINT16_MAX +#define MAC_VLAN_UNTAGGED_VID(vid) \ + (((vid) == VLAN_ID_NONE) ? MAC_VLAN_UNTAGGED : (vid)) + /* * Grouping type of a ring group * @@ -358,6 +380,8 @@ typedef struct mac_ring_info_s { * #defines for mri_flags. The flags are temporary flags that are provided * only to workaround issues in specific drivers, and they will be * removed in the future. + * + * These are consumed only by sun4v and neptune (nxge). */ #define MAC_RING_TX_SERIALIZE 0x1 #define MAC_RING_RX_ENQUEUE 0x2 @@ -366,6 +390,8 @@ typedef int (*mac_group_start_t)(mac_group_driver_t); typedef void (*mac_group_stop_t)(mac_group_driver_t); typedef int (*mac_add_mac_addr_t)(void *, const uint8_t *); typedef int (*mac_rem_mac_addr_t)(void *, const uint8_t *); +typedef int (*mac_add_vlan_filter_t)(mac_group_driver_t, uint16_t); +typedef int (*mac_rem_vlan_filter_t)(mac_group_driver_t, uint16_t); struct mac_group_info_s { mac_group_driver_t mgi_driver; /* Driver reference */ @@ -374,9 +400,11 @@ struct mac_group_info_s { uint_t mgi_count; /* Count of rings */ mac_intr_t mgi_intr; /* Optional per-group intr */ - /* Only used for rx groups */ + /* Only used for Rx groups */ mac_add_mac_addr_t mgi_addmac; /* Add a MAC address */ mac_rem_mac_addr_t mgi_remmac; /* Remove a MAC address */ + mac_add_vlan_filter_t mgi_addvlan; /* Add a VLAN filter */ + mac_rem_vlan_filter_t mgi_remvlan; /* Remove a VLAN filter */ }; /* @@ -494,14 +522,14 @@ extern void mac_free(mac_register_t *); extern int mac_register(mac_register_t *, mac_handle_t *); extern int mac_disable_nowait(mac_handle_t); extern int mac_disable(mac_handle_t); -extern int mac_unregister(mac_handle_t); -extern void mac_rx(mac_handle_t, mac_resource_handle_t, +extern int mac_unregister(mac_handle_t); +extern void mac_rx(mac_handle_t, mac_resource_handle_t, mblk_t *); -extern void mac_rx_ring(mac_handle_t, mac_ring_handle_t, +extern void mac_rx_ring(mac_handle_t, mac_ring_handle_t, mblk_t *, uint64_t); -extern void mac_link_update(mac_handle_t, link_state_t); -extern void mac_link_redo(mac_handle_t, link_state_t); -extern void mac_unicst_update(mac_handle_t, +extern void mac_link_update(mac_handle_t, link_state_t); +extern void mac_link_redo(mac_handle_t, link_state_t); +extern void mac_unicst_update(mac_handle_t, const uint8_t *); extern void mac_dst_update(mac_handle_t, const uint8_t *); extern void mac_tx_update(mac_handle_t); -- cgit v1.2.3