diff options
24 files changed, 1829 insertions, 603 deletions
diff --git a/usr/src/uts/common/inet/ip/ip6_input.c b/usr/src/uts/common/inet/ip/ip6_input.c index 21cd3cd2fe..cdff35273e 100644 --- a/usr/src/uts/common/inet/ip/ip6_input.c +++ b/usr/src/uts/common/inet/ip/ip6_input.c @@ -23,6 +23,7 @@ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved * * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright 2018 Joyent, Inc. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -143,11 +144,9 @@ static void ip_input_multicast_v6(ire_t *, mblk_t *, ip6_t *, * The ill will always be valid if this function is called directly from * the driver. * - * If ip_input_v6() is called from GLDv3: - * - * - This must be a non-VLAN IP stream. - * - 'mp' is either an untagged or a special priority-tagged packet. - * - Any VLAN tag that was in the MAC header has been stripped. + * If this chain is part of a VLAN stream, then the VLAN tag is + * stripped from the MAC header before being delivered to this + * function. * * If the IP header in packet is not 32-bit aligned, every message in the * chain will be aligned before further operations. This is required on SPARC diff --git a/usr/src/uts/common/inet/ip/ip_input.c b/usr/src/uts/common/inet/ip/ip_input.c index ad753c165b..aea49c19d3 100644 --- a/usr/src/uts/common/inet/ip/ip_input.c +++ b/usr/src/uts/common/inet/ip/ip_input.c @@ -23,6 +23,7 @@ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. * * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright 2018 Joyent, Inc. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -146,11 +147,9 @@ static void ip_input_multicast_v4(ire_t *, mblk_t *, ipha_t *, * The ill will always be valid if this function is called directly from * the driver. * - * If ip_input() is called from GLDv3: - * - * - This must be a non-VLAN IP stream. - * - 'mp' is either an untagged or a special priority-tagged packet. - * - Any VLAN tag that was in the MAC header has been stripped. + * If this chain is part of a VLAN stream, then the VLAN tag is + * stripped from the MAC header before being delivered to this + * function. * * If the IP header in packet is not 32-bit aligned, every message in the * chain will be aligned before further operations. This is required on SPARC diff --git a/usr/src/uts/common/io/aggr/aggr_grp.c b/usr/src/uts/common/io/aggr/aggr_grp.c index 7e930c89e8..9932c2cb58 100644 --- a/usr/src/uts/common/io/aggr/aggr_grp.c +++ b/usr/src/uts/common/io/aggr/aggr_grp.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2017, Joyent, Inc. + * Copyright 2018 Joyent, Inc. */ /* @@ -124,6 +124,8 @@ static int aggr_pseudo_enable_intr(mac_intr_handle_t); static int aggr_pseudo_start_ring(mac_ring_driver_t, uint64_t); static int aggr_addmac(void *, const uint8_t *); static int aggr_remmac(void *, const uint8_t *); +static int aggr_addvlan(mac_group_driver_t, uint16_t); +static int aggr_remvlan(mac_group_driver_t, uint16_t); static mblk_t *aggr_rx_poll(void *, int); static void aggr_fill_ring(void *, mac_ring_type_t, const int, const int, mac_ring_info_t *, mac_ring_handle_t); @@ -324,6 +326,7 @@ aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port) return (B_FALSE); } + mutex_enter(&grp->lg_stat_lock); if (grp->lg_ifspeed == 0) { /* * The group inherits the speed of the first link being @@ -337,8 +340,10 @@ aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port) * the group link speed, as per 802.3ad. Since it is * not, the attach is cancelled. */ + mutex_exit(&grp->lg_stat_lock); return (B_FALSE); } + mutex_exit(&grp->lg_stat_lock); grp->lg_nattached_ports++; @@ -347,7 +352,9 @@ aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port) */ if (grp->lg_link_state != LINK_STATE_UP) { grp->lg_link_state = LINK_STATE_UP; + mutex_enter(&grp->lg_stat_lock); grp->lg_link_duplex = LINK_DUPLEX_FULL; + mutex_exit(&grp->lg_stat_lock); link_state_changed = B_TRUE; } @@ -405,9 +412,11 @@ aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port) grp->lg_nattached_ports--; if (grp->lg_nattached_ports == 0) { /* the last attached MAC port of the group is being detached */ - grp->lg_ifspeed = 0; grp->lg_link_state = LINK_STATE_DOWN; + mutex_enter(&grp->lg_stat_lock); + grp->lg_ifspeed = 0; grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN; + mutex_exit(&grp->lg_stat_lock); link_state_changed = B_TRUE; } @@ -675,9 +684,13 @@ aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh) } /* - * This function is called to create pseudo rings over the hardware rings of - * the underlying device. Note that there is a 1:1 mapping between the pseudo - * RX rings of the aggr and the hardware rings of the underlying port. + * Create pseudo rings over the HW rings of the port. + * + * o Create a pseudo ring in rx_grp per HW ring in the port's HW group. + * + * o Program existing unicast filters on the pseudo group into the HW group. + * + * o Program existing VLAN filters on the pseudo group into the HW group. */ static int aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) @@ -686,6 +699,7 @@ aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP]; aggr_unicst_addr_t *addr, *a; mac_perim_handle_t pmph; + aggr_vlan_t *avp; int hw_rh_cnt, i = 0, j; int err = 0; @@ -693,63 +707,90 @@ aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) mac_perim_enter_by_mh(port->lp_mh, &pmph); /* - * This function must be called after the aggr registers its mac - * and its RX group has been initialized. + * This function must be called after the aggr registers its MAC + * and its Rx group has been initialized. */ ASSERT(rx_grp->arg_gh != NULL); /* - * Get the list the the underlying HW rings. + * Get the list of the underlying HW rings. */ hw_rh_cnt = mac_hwrings_get(port->lp_mch, &port->lp_hwgh, hw_rh, MAC_RING_TYPE_RX); if (port->lp_hwgh != NULL) { /* - * Quiesce the HW ring and the mac srs on the ring. Note + * Quiesce the HW ring and the MAC SRS on the ring. Note * that the HW ring will be restarted when the pseudo ring * is started. At that time all the packets will be - * directly passed up to the pseudo RX ring and handled - * by mac srs created over the pseudo RX ring. + * directly passed up to the pseudo Rx ring and handled + * by MAC SRS created over the pseudo Rx ring. */ mac_rx_client_quiesce(port->lp_mch); mac_srs_perm_quiesce(port->lp_mch, B_TRUE); } /* - * Add all the unicast addresses to the newly added port. + * Add existing VLAN and unicast address filters to the port. */ + for (avp = list_head(&rx_grp->arg_vlans); avp != NULL; + avp = list_next(&rx_grp->arg_vlans, avp)) { + if ((err = aggr_port_addvlan(port, avp->av_vid)) != 0) + goto err; + } + for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) { if ((err = aggr_port_addmac(port, addr->aua_addr)) != 0) - break; + goto err; } - for (i = 0; err == 0 && i < hw_rh_cnt; i++) + for (i = 0; i < hw_rh_cnt; i++) { err = aggr_add_pseudo_rx_ring(port, rx_grp, hw_rh[i]); + if (err != 0) + goto err; + } - if (err != 0) { - for (j = 0; j < i; j++) - aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]); + port->lp_rx_grp_added = B_TRUE; + mac_perim_exit(pmph); + return (0); + +err: + ASSERT(err != 0); + + for (j = 0; j < i; j++) + aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]); + + for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next) + aggr_port_remmac(port, a->aua_addr); - for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next) - aggr_port_remmac(port, a->aua_addr); + if (avp != NULL) + avp = list_prev(&rx_grp->arg_vlans, avp); - if (port->lp_hwgh != NULL) { - mac_srs_perm_quiesce(port->lp_mch, B_FALSE); - mac_rx_client_restart(port->lp_mch); - port->lp_hwgh = NULL; + for (; avp != NULL; avp = list_prev(&rx_grp->arg_vlans, avp)) { + int err2; + + if ((err2 = aggr_port_remvlan(port, avp->av_vid)) != 0) { + cmn_err(CE_WARN, "Failed to remove VLAN %u from port %s" + ": errno %d.", avp->av_vid, + mac_client_name(port->lp_mch), err2); } - } else { - port->lp_rx_grp_added = B_TRUE; } -done: + + if (port->lp_hwgh != NULL) { + mac_srs_perm_quiesce(port->lp_mch, B_FALSE); + mac_rx_client_restart(port->lp_mch); + port->lp_hwgh = NULL; + } + mac_perim_exit(pmph); return (err); } /* - * This function is called by aggr to remove pseudo RX rings over the - * HW rings of the underlying port. + * Destroy the pseudo rings mapping to this port and remove all VLAN + * and unicast filters from this port. Even if there are no underlying + * HW rings we must still remove the unicast filters to take the port + * out of promisc mode. */ static void aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) @@ -771,16 +812,23 @@ aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) hw_rh_cnt = mac_hwrings_get(port->lp_mch, &hwgh, hw_rh, MAC_RING_TYPE_RX); - /* - * If hw_rh_cnt is 0, it means that the underlying port does not - * support RX rings. Directly return in this case. - */ for (i = 0; i < hw_rh_cnt; i++) aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[i]); for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) aggr_port_remmac(port, addr->aua_addr); + for (aggr_vlan_t *avp = list_head(&rx_grp->arg_vlans); avp != NULL; + avp = list_next(&rx_grp->arg_vlans, avp)) { + int err; + + if ((err = aggr_port_remvlan(port, avp->av_vid)) != 0) { + cmn_err(CE_WARN, "Failed to remove VLAN %u from port %s" + ": errno %d.", avp->av_vid, + mac_client_name(port->lp_mch), err); + } + } + if (port->lp_hwgh != NULL) { port->lp_hwgh = NULL; @@ -1307,6 +1355,10 @@ aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports, bzero(&grp->lg_tx_group, sizeof (aggr_pseudo_tx_group_t)); aggr_lacp_init_grp(grp); + grp->lg_rx_group.arg_untagged = 0; + list_create(&(grp->lg_rx_group.arg_vlans), sizeof (aggr_vlan_t), + offsetof(aggr_vlan_t, av_link)); + /* add MAC ports to group */ grp->lg_ports = NULL; grp->lg_nports = 0; @@ -1323,7 +1375,7 @@ aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports, grp->lg_key = key; for (i = 0; i < nports; i++) { - err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, NULL); + err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, &port); if (err != 0) goto bail; } @@ -1545,7 +1597,9 @@ aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port, continue; val = aggr_port_stat(port, stat); val -= port->lp_stat[i]; + mutex_enter(&grp->lg_stat_lock); grp->lg_stat[i] += val; + mutex_exit(&grp->lg_stat_lock); } for (i = 0; i < ETHER_NSTAT; i++) { stat = i + MACTYPE_STAT_MIN; @@ -1553,7 +1607,9 @@ aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port, continue; val = aggr_port_stat(port, stat); val -= port->lp_ether_stat[i]; + mutex_enter(&grp->lg_stat_lock); grp->lg_ether_stat[i] += val; + mutex_exit(&grp->lg_stat_lock); } grp->lg_nports--; @@ -1802,6 +1858,8 @@ aggr_grp_delete(datalink_id_t linkid, cred_t *cred) VERIFY(mac_unregister(grp->lg_mh) == 0); grp->lg_mh = NULL; + list_destroy(&(grp->lg_rx_group.arg_vlans)); + AGGR_GRP_REFRELE(grp); return (0); } @@ -1884,6 +1942,8 @@ aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val) aggr_port_t *port; uint_t stat_index; + ASSERT(MUTEX_HELD(&grp->lg_stat_lock)); + /* We only aggregate counter statistics. */ if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) || IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) { @@ -1952,10 +2012,9 @@ static int aggr_m_stat(void *arg, uint_t stat, uint64_t *val) { aggr_grp_t *grp = arg; - mac_perim_handle_t mph; int rval = 0; - mac_perim_enter_by_mh(grp->lg_mh, &mph); + mutex_enter(&grp->lg_stat_lock); switch (stat) { case MAC_STAT_IFSPEED: @@ -1975,7 +2034,7 @@ aggr_m_stat(void *arg, uint_t stat, uint64_t *val) rval = aggr_grp_stat(grp, stat, val); } - mac_perim_exit(mph); + mutex_exit(&grp->lg_stat_lock); return (rval); } @@ -2207,7 +2266,7 @@ aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) } /* - * Callback funtion for MAC layer to register groups. + * Callback function for MAC layer to register groups. */ static void aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index, @@ -2229,6 +2288,14 @@ aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index, infop->mgi_addmac = aggr_addmac; infop->mgi_remmac = aggr_remmac; infop->mgi_count = rx_group->arg_ring_cnt; + + /* + * Always set the HW VLAN callbacks. They are smart + * enough to know when a port has HW VLAN filters to + * program and when it doesn't. + */ + infop->mgi_addvlan = aggr_addvlan; + infop->mgi_remvlan = aggr_remvlan; } else { tx_group = &grp->lg_tx_group; tx_group->atg_gh = gh; @@ -2440,6 +2507,186 @@ aggr_remmac(void *arg, const uint8_t *mac_addr) } /* + * Search for VID in the Rx group's list and return a pointer if + * found. Otherwise return NULL. + */ +static aggr_vlan_t * +aggr_find_vlan(aggr_pseudo_rx_group_t *rx_group, uint16_t vid) +{ + ASSERT(MAC_PERIM_HELD(rx_group->arg_grp->lg_mh)); + for (aggr_vlan_t *avp = list_head(&rx_group->arg_vlans); avp != NULL; + avp = list_next(&rx_group->arg_vlans, avp)) { + if (avp->av_vid == vid) + return (avp); + } + + return (NULL); +} + +/* + * Accept traffic on the specified VID. + * + * Persist VLAN state in the aggr so that ports added later will + * receive the correct filters. In the future it would be nice to + * allow aggr to iterate its clients instead of duplicating state. + */ +static int +aggr_addvlan(mac_group_driver_t gdriver, uint16_t vid) +{ + aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)gdriver; + aggr_grp_t *aggr = rx_group->arg_grp; + aggr_port_t *port, *p; + mac_perim_handle_t mph; + int err = 0; + aggr_vlan_t *avp = NULL; + + mac_perim_enter_by_mh(aggr->lg_mh, &mph); + + if (vid == MAC_VLAN_UNTAGGED) { + /* + * Aggr is both a MAC provider and MAC client. As a + * MAC provider it is passed MAC_VLAN_UNTAGGED by its + * client. As a client itself, it should pass + * VLAN_ID_NONE to its ports. + */ + vid = VLAN_ID_NONE; + rx_group->arg_untagged++; + goto update_ports; + } + + avp = aggr_find_vlan(rx_group, vid); + + if (avp != NULL) { + avp->av_refs++; + mac_perim_exit(mph); + return (0); + } + + avp = kmem_zalloc(sizeof (aggr_vlan_t), KM_SLEEP); + avp->av_vid = vid; + avp->av_refs = 1; + +update_ports: + for (port = aggr->lg_ports; port != NULL; port = port->lp_next) + if ((err = aggr_port_addvlan(port, vid)) != 0) + break; + + if (err != 0) { + /* + * If any of these calls fail then we are in a + * situation where the ports have different HW state. + * There's no reasonable action the MAC client can + * take in this scenario to rectify the situation. + */ + for (p = aggr->lg_ports; p != port; p = p->lp_next) { + int err2; + + if ((err2 = aggr_port_remvlan(p, vid)) != 0) { + cmn_err(CE_WARN, "Failed to remove VLAN %u" + " from port %s: errno %d.", vid, + mac_client_name(p->lp_mch), err2); + } + + } + + if (vid == VLAN_ID_NONE) + rx_group->arg_untagged--; + + if (avp != NULL) { + kmem_free(avp, sizeof (aggr_vlan_t)); + avp = NULL; + } + } + + if (avp != NULL) + list_insert_tail(&rx_group->arg_vlans, avp); + +done: + mac_perim_exit(mph); + return (err); +} + +/* + * Stop accepting traffic on this VLAN if it's the last use of this VLAN. + */ +static int +aggr_remvlan(mac_group_driver_t gdriver, uint16_t vid) +{ + aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)gdriver; + aggr_grp_t *aggr = rx_group->arg_grp; + aggr_port_t *port, *p; + mac_perim_handle_t mph; + int err = 0; + aggr_vlan_t *avp = NULL; + + mac_perim_enter_by_mh(aggr->lg_mh, &mph); + + /* + * See the comment in aggr_addvlan(). + */ + if (vid == MAC_VLAN_UNTAGGED) { + vid = VLAN_ID_NONE; + rx_group->arg_untagged--; + + if (rx_group->arg_untagged > 0) + goto done; + + goto update_ports; + } + + avp = aggr_find_vlan(rx_group, vid); + + if (avp == NULL) { + err = ENOENT; + goto done; + } + + avp->av_refs--; + + if (avp->av_refs > 0) + goto done; + +update_ports: + for (port = aggr->lg_ports; port != NULL; port = port->lp_next) + if ((err = aggr_port_remvlan(port, vid)) != 0) + break; + + /* + * See the comment in aggr_addvlan() for justification of the + * use of VERIFY here. + */ + if (err != 0) { + for (p = aggr->lg_ports; p != port; p = p->lp_next) { + int err2; + + if ((err2 = aggr_port_addvlan(p, vid)) != 0) { + cmn_err(CE_WARN, "Failed to add VLAN %u" + " to port %s: errno %d.", vid, + mac_client_name(p->lp_mch), err2); + } + } + + if (avp != NULL) + avp->av_refs++; + + if (vid == VLAN_ID_NONE) + rx_group->arg_untagged++; + + goto done; + } + + if (err == 0 && avp != NULL) { + VERIFY3U(avp->av_refs, ==, 0); + list_remove(&rx_group->arg_vlans, avp); + kmem_free(avp, sizeof (aggr_vlan_t)); + } + +done: + mac_perim_exit(mph); + return (err); +} + +/* * Add or remove the multicast addresses that are defined for the group * to or from the specified port. * diff --git a/usr/src/uts/common/io/aggr/aggr_port.c b/usr/src/uts/common/io/aggr/aggr_port.c index 00545d2c03..9d2edd4f97 100644 --- a/usr/src/uts/common/io/aggr/aggr_port.c +++ b/usr/src/uts/common/io/aggr/aggr_port.c @@ -21,6 +21,8 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2012 OmniTI Computer Consulting, Inc All rights reserved. + * Copyright 2020 Joyent, Inc. */ /* @@ -373,10 +375,14 @@ aggr_port_notify_link(aggr_grp_t *grp, aggr_port_t *port) /* link speed changes? */ ifspeed = aggr_port_stat(port, MAC_STAT_IFSPEED); if (port->lp_ifspeed != ifspeed) { + mutex_enter(&grp->lg_stat_lock); + if (port->lp_state == AGGR_PORT_STATE_ATTACHED) do_detach |= (ifspeed != grp->lg_ifspeed); else do_attach |= (ifspeed == grp->lg_ifspeed); + + mutex_exit(&grp->lg_stat_lock); } port->lp_ifspeed = ifspeed; @@ -528,8 +534,15 @@ aggr_port_promisc(aggr_port_t *port, boolean_t on) if (on) { mac_rx_clear(port->lp_mch); + + /* + * We use the promisc callback because without hardware + * rings, we deliver through flows that will cause duplicate + * delivery of packets when we've flipped into this mode + * to compensate for the lack of hardware MAC matching + */ rc = mac_promisc_add(port->lp_mch, MAC_CLIENT_PROMISC_ALL, - aggr_recv_cb, port, &port->lp_mphp, + aggr_recv_promisc_cb, port, &port->lp_mphp, MAC_PROMISC_FLAGS_NO_TX_LOOP); if (rc != 0) { mac_rx_set(port->lp_mch, aggr_recv_cb, port); @@ -679,3 +692,47 @@ aggr_port_remmac(aggr_port_t *port, const uint8_t *mac_addr) } mac_perim_exit(pmph); } + +int +aggr_port_addvlan(aggr_port_t *port, uint16_t vid) +{ + mac_perim_handle_t pmph; + int err; + + ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh)); + mac_perim_enter_by_mh(port->lp_mh, &pmph); + + /* + * Add the VLAN filter to the HW group if the port has a HW + * group. If the port doesn't have a HW group, then it will + * implicitly allow tagged traffic to pass and there is + * nothing to do. + */ + if (port->lp_hwgh == NULL) { + mac_perim_exit(pmph); + return (0); + } + + err = mac_hwgroup_addvlan(port->lp_hwgh, vid); + mac_perim_exit(pmph); + return (err); +} + +int +aggr_port_remvlan(aggr_port_t *port, uint16_t vid) +{ + mac_perim_handle_t pmph; + int err; + + ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh)); + mac_perim_enter_by_mh(port->lp_mh, &pmph); + + if (port->lp_hwgh == NULL) { + mac_perim_exit(pmph); + return (0); + } + + err = mac_hwgroup_remvlan(port->lp_hwgh, vid); + mac_perim_exit(pmph); + return (err); +} diff --git a/usr/src/uts/common/io/aggr/aggr_recv.c b/usr/src/uts/common/io/aggr/aggr_recv.c index 2bdb7872e3..33a060da48 100644 --- a/usr/src/uts/common/io/aggr/aggr_recv.c +++ b/usr/src/uts/common/io/aggr/aggr_recv.c @@ -21,6 +21,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2012 OmniTI Computer Consulting, Inc All rights reserved. */ /* @@ -68,16 +69,28 @@ aggr_recv_lacp(aggr_port_t *port, mac_resource_handle_t mrh, mblk_t *mp) /* * Callback function invoked by MAC service module when packets are - * made available by a MAC port. + * made available by a MAC port, both in promisc_on mode and not. */ /* ARGSUSED */ -void -aggr_recv_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp, - boolean_t loopback) +static void +aggr_recv_path_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp, + boolean_t loopback, boolean_t promisc_path) { aggr_port_t *port = (aggr_port_t *)arg; aggr_grp_t *grp = port->lp_grp; + /* + * In the case where lp_promisc_on has been turned on to + * compensate for insufficient hardware MAC matching and + * hardware rings are not in use we will fall back to + * using flows for delivery which can result in duplicates + * pushed up the stack. Only respect the chosen path. + */ + if (port->lp_promisc_on != promisc_path) { + freemsgchain(mp); + return; + } + if (grp->lg_lacp_mode == AGGR_LACP_OFF) { aggr_mac_rx(grp->lg_mh, mrh, mp); } else { @@ -161,3 +174,19 @@ aggr_recv_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp, } } } + +/* ARGSUSED */ +void +aggr_recv_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp, + boolean_t loopback) +{ + aggr_recv_path_cb(arg, mrh, mp, loopback, B_FALSE); +} + +/* ARGSUSED */ +void +aggr_recv_promisc_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp, + boolean_t loopback) +{ + aggr_recv_path_cb(arg, mrh, mp, loopback, B_TRUE); +} diff --git a/usr/src/uts/common/io/dld/dld_proto.c b/usr/src/uts/common/io/dld/dld_proto.c index cadd2a76d3..b7eeb35b92 100644 --- a/usr/src/uts/common/io/dld/dld_proto.c +++ b/usr/src/uts/common/io/dld/dld_proto.c @@ -1377,24 +1377,22 @@ dld_capab_direct(dld_str_t *dsp, void *data, uint_t flags) } /* - * dld_capab_poll_enable() + * This function is misnamed. All polling and fanouts are run out of + * the lower MAC for VNICs and out of the MAC for NICs. The + * availability of Rx rings and promiscous mode is taken care of + * between the soft ring set (mac_srs), the Rx ring, and the SW + * classifier. Fanout, if necessary, is done by the soft rings that + * are part of the SRS. By default the SRS divvies up the packets + * based on protocol: TCP, UDP, or Other (OTH). * - * This function is misnamed. All polling and fanouts are run out of the - * lower mac (in case of VNIC and the only mac in case of NICs). The - * availability of Rx ring and promiscous mode is all taken care between - * the soft ring set (mac_srs), the Rx ring, and S/W classifier. Any - * fanout necessary is done by the soft rings that are part of the - * mac_srs (by default mac_srs sends the packets up via a TCP and - * non TCP soft ring). - * - * The mac_srs (or its associated soft rings) always store the ill_rx_ring + * The SRS (or its associated soft rings) always store the ill_rx_ring * (the cookie returned when they registered with IP during plumb) as their * 2nd argument which is passed up as mac_resource_handle_t. The upcall * function and 1st argument is what the caller registered when they * called mac_rx_classify_flow_add() to register the flow. For VNIC, * the function is vnic_rx and argument is vnic_t. For regular NIC * case, it mac_rx_default and mac_handle_t. As explained above, the - * mac_srs (or its soft ring) will add the ill_rx_ring (mac_resource_handle_t) + * SRS (or its soft ring) will add the ill_rx_ring (mac_resource_handle_t) * from its stored 2nd argument. */ static int @@ -1407,11 +1405,11 @@ dld_capab_poll_enable(dld_str_t *dsp, dld_capab_poll_t *poll) return (ENOTSUP); /* - * Enable client polling if and only if DLS bypass is possible. - * Special cases like VLANs need DLS processing in the Rx data path. - * In such a case we can neither allow the client (IP) to directly - * poll the softring (since DLS processing hasn't been done) nor can - * we allow DLS bypass. + * Enable client polling if and only if DLS bypass is + * possible. Some traffic requires DLS processing in the Rx + * data path. In such a case we can neither allow the client + * (IP) to directly poll the soft ring (since DLS processing + * hasn't been done) nor can we allow DLS bypass. */ if (!mac_rx_bypass_set(dsp->ds_mch, dsp->ds_rx, dsp->ds_rx_arg)) return (ENOTSUP); diff --git a/usr/src/uts/common/io/dls/dls.c b/usr/src/uts/common/io/dls/dls.c index d6bc723371..2dc16c4586 100644 --- a/usr/src/uts/common/io/dls/dls.c +++ b/usr/src/uts/common/io/dls/dls.c @@ -171,16 +171,16 @@ dls_bind(dld_str_t *dsp, uint32_t sap) /* * The MAC layer does the VLAN demultiplexing and will only pass up * untagged packets to non-promiscuous primary MAC clients. In order to - * support the binding to the VLAN SAP which is required by DLPI, dls + * support binding to the VLAN SAP, which is required by DLPI, DLS * needs to get a copy of all tagged packets when the client binds to * the VLAN SAP. We do this by registering a separate promiscuous - * callback for each dls client binding to that SAP. + * callback for each DLS client binding to that SAP. * * Note: even though there are two promiscuous handles in dld_str_t, * ds_mph is for the regular promiscuous mode, ds_vlan_mph is the handle - * to receive VLAN pkt when promiscuous mode is not on. Only one of - * them can be non-NULL at the same time, to avoid receiving dup copies - * of pkts. + * to receive VLAN traffic when promiscuous mode is not on. Only one of + * them can be non-NULL at the same time, to avoid receiving duplicate + * copies of packets. */ if (sap == ETHERTYPE_VLAN && dsp->ds_promisc == 0) { int err; @@ -652,8 +652,8 @@ dls_mac_active_set(dls_link_t *dlp) /* request the primary MAC address */ if ((err = mac_unicast_add(dlp->dl_mch, NULL, MAC_UNICAST_PRIMARY | MAC_UNICAST_TAG_DISABLE | - MAC_UNICAST_DISABLE_TX_VID_CHECK, &dlp->dl_mah, 0, - &diag)) != 0) { + MAC_UNICAST_DISABLE_TX_VID_CHECK, &dlp->dl_mah, + VLAN_ID_NONE, &diag)) != 0) { return (err); } diff --git a/usr/src/uts/common/io/dls/dls_link.c b/usr/src/uts/common/io/dls/dls_link.c index 23580d0c40..6f9049b724 100644 --- a/usr/src/uts/common/io/dls/dls_link.c +++ b/usr/src/uts/common/io/dls/dls_link.c @@ -21,7 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2017 Joyent, Inc. + * Copyright 2018 Joyent, Inc. */ /* @@ -382,7 +382,16 @@ i_dls_link_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp, vid = VLAN_ID(mhi.mhi_tci); + /* + * This condition is true only when a sun4v vsw client + * is on the scene; as it is the only type of client + * that multiplexes VLANs on a single client instance. + * All other types of clients have one VLAN per client + * instance. In that case, MAC strips the VLAN tag + * before delivering it to DLS (see mac_rx_deliver()). + */ if (mhi.mhi_istagged) { + /* * If it is tagged traffic, send it upstream to * all dld_str_t which are attached to the physical diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_main.c b/usr/src/uts/common/io/ixgbe/ixgbe_main.c index 2c90127c6c..3463be30b9 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_main.c +++ b/usr/src/uts/common/io/ixgbe/ixgbe_main.c @@ -25,7 +25,7 @@ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2019 Joyent, Inc. + * Copyright 2020 Joyent, Inc. * Copyright 2012 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013 Saso Kiselkov. All rights reserved. * Copyright (c) 2013 OSN Online Service Nuernberg GmbH. All rights reserved. @@ -57,8 +57,8 @@ static int ixgbe_alloc_rings(ixgbe_t *); static void ixgbe_free_rings(ixgbe_t *); static int ixgbe_alloc_rx_data(ixgbe_t *); static void ixgbe_free_rx_data(ixgbe_t *); -static void ixgbe_setup_rings(ixgbe_t *); -static void ixgbe_setup_rx(ixgbe_t *); +static int ixgbe_setup_rings(ixgbe_t *); +static int ixgbe_setup_rx(ixgbe_t *); static void ixgbe_setup_tx(ixgbe_t *); static void ixgbe_setup_rx_ring(ixgbe_rx_ring_t *); static void ixgbe_setup_tx_ring(ixgbe_tx_ring_t *); @@ -67,6 +67,7 @@ static void ixgbe_setup_vmdq(ixgbe_t *); static void ixgbe_setup_vmdq_rss(ixgbe_t *); static void ixgbe_setup_rss_table(ixgbe_t *); static void ixgbe_init_unicst(ixgbe_t *); +static int ixgbe_init_vlan(ixgbe_t *); static int ixgbe_unicst_find(ixgbe_t *, const uint8_t *); static void ixgbe_setup_multicst(ixgbe_t *); static void ixgbe_get_hw_state(ixgbe_t *); @@ -113,6 +114,8 @@ static void ixgbe_intr_other_work(ixgbe_t *, uint32_t); static void ixgbe_get_driver_control(struct ixgbe_hw *); static int ixgbe_addmac(void *, const uint8_t *); static int ixgbe_remmac(void *, const uint8_t *); +static int ixgbe_addvlan(mac_group_driver_t, uint16_t); +static int ixgbe_remvlan(mac_group_driver_t, uint16_t); static void ixgbe_release_driver_control(struct ixgbe_hw *); static int ixgbe_attach(dev_info_t *, ddi_attach_cmd_t); @@ -1159,6 +1162,8 @@ ixgbe_init_driver_settings(ixgbe_t *ixgbe) rx_group = &ixgbe->rx_groups[i]; rx_group->index = i; rx_group->ixgbe = ixgbe; + list_create(&rx_group->vlans, sizeof (ixgbe_vlan_t), + offsetof(ixgbe_vlan_t, ixvl_link)); } for (i = 0; i < ixgbe->num_tx_rings; i++) { @@ -1909,7 +1914,8 @@ ixgbe_start(ixgbe_t *ixgbe, boolean_t alloc_buffer) /* * Setup the rx/tx rings */ - ixgbe_setup_rings(ixgbe); + if (ixgbe_setup_rings(ixgbe) != IXGBE_SUCCESS) + goto start_failure; /* * ixgbe_start() will be called when resetting, however if reset @@ -2282,6 +2288,16 @@ ixgbe_free_rings(ixgbe_t *ixgbe) ixgbe->tx_rings = NULL; } + for (uint_t i = 0; i < ixgbe->num_rx_groups; i++) { + ixgbe_vlan_t *vlp; + ixgbe_rx_group_t *rx_group = &ixgbe->rx_groups[i]; + + while ((vlp = list_remove_head(&rx_group->vlans)) != NULL) + kmem_free(vlp, sizeof (ixgbe_vlan_t)); + + list_destroy(&rx_group->vlans); + } + if (ixgbe->rx_groups != NULL) { kmem_free(ixgbe->rx_groups, sizeof (ixgbe_rx_group_t) * ixgbe->num_rx_groups); @@ -2336,7 +2352,7 @@ ixgbe_free_rx_data(ixgbe_t *ixgbe) /* * ixgbe_setup_rings - Setup rx/tx rings. */ -static void +static int ixgbe_setup_rings(ixgbe_t *ixgbe) { /* @@ -2346,9 +2362,12 @@ ixgbe_setup_rings(ixgbe_t *ixgbe) * 2. Initialize necessary registers for receive/transmit; * 3. Initialize software pointers/parameters for receive/transmit; */ - ixgbe_setup_rx(ixgbe); + if (ixgbe_setup_rx(ixgbe) != IXGBE_SUCCESS) + return (IXGBE_FAILURE); ixgbe_setup_tx(ixgbe); + + return (IXGBE_SUCCESS); } static void @@ -2435,7 +2454,7 @@ ixgbe_setup_rx_ring(ixgbe_rx_ring_t *rx_ring) IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rx_ring->hw_index), reg_val); } -static void +static int ixgbe_setup_rx(ixgbe_t *ixgbe) { ixgbe_rx_ring_t *rx_ring; @@ -2528,6 +2547,15 @@ ixgbe_setup_rx(ixgbe_t *ixgbe) } /* + * Initialize VLAN SW and HW state if VLAN filtering is + * enabled. + */ + if (ixgbe->vlft_enabled) { + if (ixgbe_init_vlan(ixgbe) != IXGBE_SUCCESS) + return (IXGBE_FAILURE); + } + + /* * Enable the receive unit. This must be done after filter * control is set in FCTRL. On 82598, we disable the descriptor monitor. * 82598 is the only adapter which defines this RXCTRL option. @@ -2618,6 +2646,8 @@ ixgbe_setup_rx(ixgbe_t *ixgbe) IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, reg_val); } + + return (IXGBE_SUCCESS); } static void @@ -2819,7 +2849,7 @@ static void ixgbe_setup_vmdq(ixgbe_t *ixgbe) { struct ixgbe_hw *hw = &ixgbe->hw; - uint32_t vmdctl, i, vtctl; + uint32_t vmdctl, i, vtctl, vlnctl; /* * Setup the VMDq Control register, enable VMDq based on @@ -2855,10 +2885,20 @@ ixgbe_setup_vmdq(ixgbe_t *ixgbe) /* * Enable Virtualization and Replication. */ - vtctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN; + vtctl = IXGBE_READ_REG(hw, IXGBE_VT_CTL); + ixgbe->rx_def_group = vtctl & IXGBE_VT_CTL_POOL_MASK; + vtctl |= IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN; IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vtctl); /* + * Enable VLAN filtering and switching (VFTA and VLVF). + */ + vlnctl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + vlnctl |= IXGBE_VLNCTRL_VFE; + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctl); + ixgbe->vlft_enabled = B_TRUE; + + /* * Enable receiving packets to all VFs */ IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), IXGBE_VFRE_ENABLE_ALL); @@ -2878,7 +2918,7 @@ ixgbe_setup_vmdq_rss(ixgbe_t *ixgbe) { struct ixgbe_hw *hw = &ixgbe->hw; uint32_t i, mrqc; - uint32_t vtctl, vmdctl; + uint32_t vtctl, vmdctl, vlnctl; /* * Initialize RETA/ERETA table @@ -2962,10 +3002,21 @@ ixgbe_setup_vmdq_rss(ixgbe_t *ixgbe) /* * Enable Virtualization and Replication. */ + vtctl = IXGBE_READ_REG(hw, IXGBE_VT_CTL); + ixgbe->rx_def_group = vtctl & IXGBE_VT_CTL_POOL_MASK; + vtctl |= IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN; vtctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN; IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vtctl); /* + * Enable VLAN filtering and switching (VFTA and VLVF). + */ + vlnctl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + vlnctl |= IXGBE_VLNCTRL_VFE; + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctl); + ixgbe->vlft_enabled = B_TRUE; + + /* * Enable receiving packets to all VFs */ IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), IXGBE_VFRE_ENABLE_ALL); @@ -3136,6 +3187,53 @@ ixgbe_unicst_find(ixgbe_t *ixgbe, const uint8_t *mac_addr) } /* + * Restore the HW state to match the SW state during restart. + */ +static int +ixgbe_init_vlan(ixgbe_t *ixgbe) +{ + /* + * The device is starting for the first time; there is nothing + * to do. + */ + if (!ixgbe->vlft_init) { + ixgbe->vlft_init = B_TRUE; + return (IXGBE_SUCCESS); + } + + for (uint_t i = 0; i < ixgbe->num_rx_groups; i++) { + int ret; + boolean_t vlvf_bypass; + ixgbe_rx_group_t *rxg = &ixgbe->rx_groups[i]; + struct ixgbe_hw *hw = &ixgbe->hw; + + if (rxg->aupe) { + uint32_t vml2flt; + + vml2flt = IXGBE_READ_REG(hw, IXGBE_VMOLR(rxg->index)); + vml2flt |= IXGBE_VMOLR_AUPE; + IXGBE_WRITE_REG(hw, IXGBE_VMOLR(rxg->index), vml2flt); + } + + vlvf_bypass = (rxg->index == ixgbe->rx_def_group); + for (ixgbe_vlan_t *vlp = list_head(&rxg->vlans); vlp != NULL; + vlp = list_next(&rxg->vlans, vlp)) { + ret = ixgbe_set_vfta(hw, vlp->ixvl_vid, rxg->index, + B_TRUE, vlvf_bypass); + + if (ret != IXGBE_SUCCESS) { + ixgbe_error(ixgbe, "Failed to program VFTA" + " for group %u, VID: %u, ret: %d.", + rxg->index, vlp->ixvl_vid, ret); + return (IXGBE_FAILURE); + } + } + } + + return (IXGBE_SUCCESS); +} + +/* * ixgbe_multicst_add - Add a multicst address. */ int @@ -6161,6 +6259,7 @@ ixgbe_fill_group(void *arg, mac_ring_type_t rtype, const int index, mac_group_info_t *infop, mac_group_handle_t gh) { ixgbe_t *ixgbe = (ixgbe_t *)arg; + struct ixgbe_hw *hw = &ixgbe->hw; switch (rtype) { case MAC_RING_TYPE_RX: { @@ -6174,6 +6273,20 @@ ixgbe_fill_group(void *arg, mac_ring_type_t rtype, const int index, infop->mgi_stop = NULL; infop->mgi_addmac = ixgbe_addmac; infop->mgi_remmac = ixgbe_remmac; + + if ((ixgbe->classify_mode == IXGBE_CLASSIFY_VMDQ || + ixgbe->classify_mode == IXGBE_CLASSIFY_VMDQ_RSS) && + (hw->mac.type == ixgbe_mac_82599EB || + hw->mac.type == ixgbe_mac_X540 || + hw->mac.type == ixgbe_mac_X550 || + hw->mac.type == ixgbe_mac_X550EM_x)) { + infop->mgi_addvlan = ixgbe_addvlan; + infop->mgi_remvlan = ixgbe_remvlan; + } else { + infop->mgi_addvlan = NULL; + infop->mgi_remvlan = NULL; + } + infop->mgi_count = (ixgbe->num_rx_rings / ixgbe->num_rx_groups); break; @@ -6273,6 +6386,232 @@ ixgbe_rx_ring_intr_disable(mac_intr_handle_t intrh) return (0); } +static ixgbe_vlan_t * +ixgbe_find_vlan(ixgbe_rx_group_t *rx_group, uint16_t vid) +{ + for (ixgbe_vlan_t *vlp = list_head(&rx_group->vlans); vlp != NULL; + vlp = list_next(&rx_group->vlans, vlp)) { + if (vlp->ixvl_vid == vid) + return (vlp); + } + + return (NULL); +} + +/* + * Attempt to use a VLAN HW filter for this group. If the group is + * interested in untagged packets then set AUPE only. If the group is + * the default then only set the VFTA. Leave the VLVF slots open for + * reserved groups to guarantee their use of HW filtering. + */ +static int +ixgbe_addvlan(mac_group_driver_t gdriver, uint16_t vid) +{ + ixgbe_rx_group_t *rx_group = (ixgbe_rx_group_t *)gdriver; + ixgbe_t *ixgbe = rx_group->ixgbe; + struct ixgbe_hw *hw = &ixgbe->hw; + ixgbe_vlan_t *vlp; + int ret; + boolean_t is_def_grp; + + mutex_enter(&ixgbe->gen_lock); + + if (ixgbe->ixgbe_state & IXGBE_SUSPENDED) { + mutex_exit(&ixgbe->gen_lock); + return (ECANCELED); + } + + /* + * Let's be sure VLAN filtering is enabled. + */ + VERIFY3B(ixgbe->vlft_enabled, ==, B_TRUE); + is_def_grp = (rx_group->index == ixgbe->rx_def_group); + + /* + * VLAN filtering is enabled but we want to receive untagged + * traffic on this group -- set the AUPE bit on the group and + * leave the VLAN tables alone. + */ + if (vid == MAC_VLAN_UNTAGGED) { + /* + * We never enable AUPE on the default group; it is + * redundant. Untagged traffic which passes L2 + * filtering is delivered to the default group if no + * other group is interested. + */ + if (!is_def_grp) { + uint32_t vml2flt; + + vml2flt = IXGBE_READ_REG(hw, + IXGBE_VMOLR(rx_group->index)); + vml2flt |= IXGBE_VMOLR_AUPE; + IXGBE_WRITE_REG(hw, IXGBE_VMOLR(rx_group->index), + vml2flt); + rx_group->aupe = B_TRUE; + } + + mutex_exit(&ixgbe->gen_lock); + return (0); + } + + vlp = ixgbe_find_vlan(rx_group, vid); + if (vlp != NULL) { + /* Only the default group supports multiple clients. */ + VERIFY3B(is_def_grp, ==, B_TRUE); + vlp->ixvl_refs++; + mutex_exit(&ixgbe->gen_lock); + return (0); + } + + /* + * The default group doesn't require a VLVF entry, only a VFTA + * entry. All traffic passing L2 filtering (MPSAR + VFTA) is + * delivered to the default group if no other group is + * interested. The fourth argument, vlvf_bypass, tells the + * ixgbe common code to avoid using a VLVF slot if one isn't + * already allocated to this VLAN. + * + * This logic is meant to reserve VLVF slots for use by + * reserved groups: guaranteeing their use of HW filtering. + */ + ret = ixgbe_set_vfta(hw, vid, rx_group->index, B_TRUE, is_def_grp); + + if (ret == IXGBE_SUCCESS) { + vlp = kmem_zalloc(sizeof (ixgbe_vlan_t), KM_SLEEP); + vlp->ixvl_vid = vid; + vlp->ixvl_refs = 1; + list_insert_tail(&rx_group->vlans, vlp); + mutex_exit(&ixgbe->gen_lock); + return (0); + } + + /* + * We should actually never return ENOSPC because we've set + * things up so that every reserved group is guaranteed to + * have a VLVF slot. + */ + if (ret == IXGBE_ERR_PARAM) + ret = EINVAL; + else if (ret == IXGBE_ERR_NO_SPACE) + ret = ENOSPC; + else + ret = EIO; + + mutex_exit(&ixgbe->gen_lock); + return (ret); +} + +/* + * Attempt to remove the VLAN HW filter associated with this group. If + * we are removing a HW filter for the default group then we know only + * the VFTA was set (VLVF is reserved for non-default/reserved + * groups). If the group wishes to stop receiving untagged traffic + * then clear the AUPE but leave the VLAN filters alone. + */ +static int +ixgbe_remvlan(mac_group_driver_t gdriver, uint16_t vid) +{ + ixgbe_rx_group_t *rx_group = (ixgbe_rx_group_t *)gdriver; + ixgbe_t *ixgbe = rx_group->ixgbe; + struct ixgbe_hw *hw = &ixgbe->hw; + int ret; + ixgbe_vlan_t *vlp; + boolean_t is_def_grp; + + mutex_enter(&ixgbe->gen_lock); + + if (ixgbe->ixgbe_state & IXGBE_SUSPENDED) { + mutex_exit(&ixgbe->gen_lock); + return (ECANCELED); + } + + is_def_grp = (rx_group->index == ixgbe->rx_def_group); + + /* See the AUPE comment in ixgbe_addvlan(). */ + if (vid == MAC_VLAN_UNTAGGED) { + if (!is_def_grp) { + uint32_t vml2flt; + + vml2flt = IXGBE_READ_REG(hw, + IXGBE_VMOLR(rx_group->index)); + vml2flt &= ~IXGBE_VMOLR_AUPE; + IXGBE_WRITE_REG(hw, + IXGBE_VMOLR(rx_group->index), vml2flt); + rx_group->aupe = B_FALSE; + } + mutex_exit(&ixgbe->gen_lock); + return (0); + } + + vlp = ixgbe_find_vlan(rx_group, vid); + if (vlp == NULL) { + mutex_exit(&ixgbe->gen_lock); + return (ENOENT); + } + + /* + * See the comment in ixgbe_addvlan() about is_def_grp and + * vlvf_bypass. + */ + if (vlp->ixvl_refs == 1) { + ret = ixgbe_set_vfta(hw, vid, rx_group->index, B_FALSE, + is_def_grp); + } else { + /* + * Only the default group can have multiple clients. + * If there is more than one client, leave the + * VFTA[vid] bit alone. + */ + VERIFY3B(is_def_grp, ==, B_TRUE); + VERIFY3U(vlp->ixvl_refs, >, 1); + vlp->ixvl_refs--; + mutex_exit(&ixgbe->gen_lock); + return (0); + } + + if (ret != IXGBE_SUCCESS) { + mutex_exit(&ixgbe->gen_lock); + /* IXGBE_ERR_PARAM should be the only possible error here. */ + if (ret == IXGBE_ERR_PARAM) + return (EINVAL); + else + return (EIO); + } + + VERIFY3U(vlp->ixvl_refs, ==, 1); + vlp->ixvl_refs = 0; + list_remove(&rx_group->vlans, vlp); + kmem_free(vlp, sizeof (ixgbe_vlan_t)); + + /* + * Calling ixgbe_set_vfta() on a non-default group may have + * cleared the VFTA[vid] bit even though the default group + * still has clients using the vid. This happens because the + * ixgbe common code doesn't ref count the use of VLANs. Check + * for any use of vid on the default group and make sure the + * VFTA[vid] bit is set. This operation is idempotent: setting + * VFTA[vid] to true if already true won't hurt anything. + */ + if (!is_def_grp) { + ixgbe_rx_group_t *defgrp; + + defgrp = &ixgbe->rx_groups[ixgbe->rx_def_group]; + vlp = ixgbe_find_vlan(defgrp, vid); + if (vlp != NULL) { + /* This shouldn't fail, but if it does return EIO. */ + ret = ixgbe_set_vfta(hw, vid, rx_group->index, B_TRUE, + B_TRUE); + if (ret != IXGBE_SUCCESS) { + mutex_exit(&ixgbe->gen_lock); + return (EIO); + } + } + } + + mutex_exit(&ixgbe->gen_lock); + return (0); +} + /* * Add a mac address. */ diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_sw.h b/usr/src/uts/common/io/ixgbe/ixgbe_sw.h index 20a077d332..cfd987787a 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_sw.h +++ b/usr/src/uts/common/io/ixgbe/ixgbe_sw.h @@ -91,6 +91,8 @@ extern "C" { #define MAX_NUM_UNICAST_ADDRESSES 0x80 #define MAX_NUM_MULTICAST_ADDRESSES 0x1000 +#define MAX_NUM_VLAN_FILTERS 0x40 + #define IXGBE_INTR_NONE 0 #define IXGBE_INTR_MSIX 1 #define IXGBE_INTR_MSI 2 @@ -387,6 +389,15 @@ typedef union ixgbe_ether_addr { } mac; } ixgbe_ether_addr_t; +/* + * The list of VLANs an Rx group will accept. + */ +typedef struct ixgbe_vlan { + list_node_t ixvl_link; + uint16_t ixvl_vid; /* The VLAN ID */ + uint_t ixvl_refs; /* Number of users of this VLAN */ +} ixgbe_vlan_t; + typedef enum { USE_NONE, USE_COPY, @@ -589,6 +600,7 @@ typedef struct ixgbe_rx_ring { struct ixgbe *ixgbe; /* Pointer to ixgbe struct */ } ixgbe_rx_ring_t; + /* * Software Receive Ring Group */ @@ -596,6 +608,8 @@ typedef struct ixgbe_rx_group { uint32_t index; /* Group index */ mac_group_handle_t group_handle; /* call back group handle */ struct ixgbe *ixgbe; /* Pointer to ixgbe struct */ + boolean_t aupe; /* AUPE bit */ + list_t vlans; /* list of VLANs to allow */ } ixgbe_rx_group_t; /* @@ -662,6 +676,7 @@ typedef struct ixgbe { */ ixgbe_rx_group_t *rx_groups; /* Array of rx groups */ uint32_t num_rx_groups; /* Number of rx groups in use */ + uint32_t rx_def_group; /* Default Rx group index */ /* * Transmit Rings @@ -715,6 +730,9 @@ typedef struct ixgbe { uint32_t mcast_count; struct ether_addr mcast_table[MAX_NUM_MULTICAST_ADDRESSES]; + boolean_t vlft_enabled; /* VLAN filtering enabled? */ + boolean_t vlft_init; /* VLAN filtering initialized? */ + ulong_t sys_page_size; boolean_t link_check_complete; diff --git a/usr/src/uts/common/io/mac/mac.c b/usr/src/uts/common/io/mac/mac.c index 8709d07030..4d450a539b 100644 --- a/usr/src/uts/common/io/mac/mac.c +++ b/usr/src/uts/common/io/mac/mac.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2019 Joyent, Inc. + * Copyright 2020 Joyent, Inc. * Copyright 2015 Garrett D'Amore <garrett@damore.org> */ @@ -460,7 +460,7 @@ mac_init(void) mac_logging_interval = 20; mac_flow_log_enable = B_FALSE; mac_link_log_enable = B_FALSE; - mac_logging_timer = 0; + mac_logging_timer = NULL; /* Register to be notified of noteworthy pools events */ mac_pool_event_reg.pec_func = mac_pool_event_cb; @@ -1115,9 +1115,10 @@ mac_start(mac_handle_t mh) if ((defgrp = MAC_DEFAULT_RX_GROUP(mip)) != NULL) { /* - * Start the default ring, since it will be needed - * to receive broadcast and multicast traffic for - * both primary and non-primary MAC clients. + * Start the default group which is responsible + * for receiving broadcast and multicast + * traffic for both primary and non-primary + * MAC clients. */ ASSERT(defgrp->mrg_state == MAC_GROUP_STATE_REGISTERED); err = mac_start_group_and_rings(defgrp); @@ -1730,6 +1731,47 @@ mac_hwgroup_remmac(mac_group_handle_t gh, const uint8_t *addr) } /* + * Program the group's HW VLAN filter if it has such support. + * Otherwise, the group will implicitly accept tagged traffic and + * there is nothing to do. + */ +int +mac_hwgroup_addvlan(mac_group_handle_t gh, uint16_t vid) +{ + mac_group_t *group = (mac_group_t *)gh; + + if (!MAC_GROUP_HW_VLAN(group)) + return (0); + + return (mac_group_addvlan(group, vid)); +} + +int +mac_hwgroup_remvlan(mac_group_handle_t gh, uint16_t vid) +{ + mac_group_t *group = (mac_group_t *)gh; + + if (!MAC_GROUP_HW_VLAN(group)) + return (0); + + return (mac_group_remvlan(group, vid)); +} + +/* + * Determine if a MAC has HW VLAN support. This is a private API + * consumed by aggr. In the future it might be nice to have a bitfield + * in mac_capab_rings_t to track which forms of HW filtering are + * supported by the MAC. + */ +boolean_t +mac_has_hw_vlan(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (MAC_GROUP_HW_VLAN(mip->mi_rx_groups)); +} + +/* * Set the RX group to be shared/reserved. Note that the group must be * started/stopped outside of this function. */ @@ -2414,7 +2456,6 @@ mac_disable(mac_handle_t mh) /* * Called when the MAC instance has a non empty flow table, to de-multiplex * incoming packets to the right flow. - * The MAC's rw lock is assumed held as a READER. */ /* ARGSUSED */ static mblk_t * @@ -2425,14 +2466,14 @@ mac_rx_classify(mac_impl_t *mip, mac_resource_handle_t mrh, mblk_t *mp) int err; /* - * If the mac is a port of an aggregation, pass FLOW_IGNORE_VLAN + * If the MAC is a port of an aggregation, pass FLOW_IGNORE_VLAN * to mac_flow_lookup() so that the VLAN packets can be successfully * passed to the non-VLAN aggregation flows. * * Note that there is possibly a race between this and * mac_unicast_remove/add() and VLAN packets could be incorrectly - * classified to non-VLAN flows of non-aggregation mac clients. These - * VLAN packets will be then filtered out by the mac module. + * classified to non-VLAN flows of non-aggregation MAC clients. These + * VLAN packets will be then filtered out by the MAC module. */ if ((mip->mi_state_flags & MIS_EXCLUSIVE) != 0) flags |= FLOW_IGNORE_VLAN; @@ -4075,12 +4116,15 @@ mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) /* - * Driver must register group->mgi_addmac/remmac() for rx groups - * to support multiple MAC addresses. + * The driver must register some form of hardware MAC + * filter in order for Rx groups to support multiple + * MAC addresses. */ if (rtype == MAC_RING_TYPE_RX && - ((group_info.mgi_addmac == NULL) || - (group_info.mgi_remmac == NULL))) { + (group_info.mgi_addmac == NULL || + group_info.mgi_remmac == NULL)) { + DTRACE_PROBE1(mac__init__rings__no__mac__filter, + char *, mip->mi_name); err = EINVAL; goto bail; } @@ -4127,8 +4171,9 @@ mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) /* Update this group's status */ mac_set_group_state(group, MAC_GROUP_STATE_REGISTERED); - } else + } else { group->mrg_rings = NULL; + } ASSERT(ring_left == 0); @@ -4318,6 +4363,38 @@ mac_free_rings(mac_impl_t *mip, mac_ring_type_t rtype) } /* + * Associate the VLAN filter to the receive group. + */ +int +mac_group_addvlan(mac_group_t *group, uint16_t vlan) +{ + VERIFY3S(group->mrg_type, ==, MAC_RING_TYPE_RX); + VERIFY3P(group->mrg_info.mgi_addvlan, !=, NULL); + + if (vlan > VLAN_ID_MAX) + return (EINVAL); + + vlan = MAC_VLAN_UNTAGGED_VID(vlan); + return (group->mrg_info.mgi_addvlan(group->mrg_info.mgi_driver, vlan)); +} + +/* + * Dissociate the VLAN from the receive group. + */ +int +mac_group_remvlan(mac_group_t *group, uint16_t vlan) +{ + VERIFY3S(group->mrg_type, ==, MAC_RING_TYPE_RX); + VERIFY3P(group->mrg_info.mgi_remvlan, !=, NULL); + + if (vlan > VLAN_ID_MAX) + return (EINVAL); + + vlan = MAC_VLAN_UNTAGGED_VID(vlan); + return (group->mrg_info.mgi_remvlan(group->mrg_info.mgi_driver, vlan)); +} + +/* * Associate a MAC address with a receive group. * * The return value of this function should always be checked properly, because @@ -4333,8 +4410,8 @@ mac_free_rings(mac_impl_t *mip, mac_ring_type_t rtype) int mac_group_addmac(mac_group_t *group, const uint8_t *addr) { - ASSERT(group->mrg_type == MAC_RING_TYPE_RX); - ASSERT(group->mrg_info.mgi_addmac != NULL); + VERIFY3S(group->mrg_type, ==, MAC_RING_TYPE_RX); + VERIFY3P(group->mrg_info.mgi_addmac, !=, NULL); return (group->mrg_info.mgi_addmac(group->mrg_info.mgi_driver, addr)); } @@ -4345,8 +4422,8 @@ mac_group_addmac(mac_group_t *group, const uint8_t *addr) int mac_group_remmac(mac_group_t *group, const uint8_t *addr) { - ASSERT(group->mrg_type == MAC_RING_TYPE_RX); - ASSERT(group->mrg_info.mgi_remmac != NULL); + VERIFY3S(group->mrg_type, ==, MAC_RING_TYPE_RX); + VERIFY3P(group->mrg_info.mgi_remmac, !=, NULL); return (group->mrg_info.mgi_remmac(group->mrg_info.mgi_driver, addr)); } @@ -4521,28 +4598,20 @@ i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) switch (ring->mr_type) { case MAC_RING_TYPE_RX: /* - * Setup SRS on top of the new ring if the group is - * reserved for someones exclusive use. + * Setup an SRS on top of the new ring if the group is + * reserved for someone's exclusive use. */ if (group->mrg_state == MAC_GROUP_STATE_RESERVED) { - mac_client_impl_t *mcip; + mac_client_impl_t *mcip = MAC_GROUP_ONLY_CLIENT(group); - mcip = MAC_GROUP_ONLY_CLIENT(group); - /* - * Even though this group is reserved we migth still - * have multiple clients, i.e a VLAN shares the - * group with the primary mac client. - */ - if (mcip != NULL) { - flent = mcip->mci_flent; - ASSERT(flent->fe_rx_srs_cnt > 0); - mac_rx_srs_group_setup(mcip, flent, SRST_LINK); - mac_fanout_setup(mcip, flent, - MCIP_RESOURCE_PROPS(mcip), mac_rx_deliver, - mcip, NULL, NULL); - } else { - ring->mr_classify_type = MAC_SW_CLASSIFIER; - } + VERIFY3P(mcip, !=, NULL); + flent = mcip->mci_flent; + VERIFY3S(flent->fe_rx_srs_cnt, >, 0); + mac_rx_srs_group_setup(mcip, flent, SRST_LINK); + mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), + mac_rx_deliver, mcip, NULL, NULL); + } else { + ring->mr_classify_type = MAC_SW_CLASSIFIER; } break; case MAC_RING_TYPE_TX: @@ -4568,7 +4637,7 @@ i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) mcip = mgcp->mgc_client; flent = mcip->mci_flent; - is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR); + is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT); mac_srs = MCIP_TX_SRS(mcip); tx = &mac_srs->srs_tx; mac_tx_client_quiesce((mac_client_handle_t)mcip); @@ -4712,7 +4781,7 @@ i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, mcip = MAC_GROUP_ONLY_CLIENT(group); ASSERT(mcip != NULL); - ASSERT(mcip->mci_state_flags & MCIS_IS_AGGR); + ASSERT(mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT); mac_srs = MCIP_TX_SRS(mcip); ASSERT(mac_srs->srs_tx.st_mode == SRS_TX_AGGR || mac_srs->srs_tx.st_mode == SRS_TX_BW_AGGR); @@ -4920,12 +4989,12 @@ mac_free_macaddr(mac_address_t *map) mac_impl_t *mip = map->ma_mip; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - ASSERT(mip->mi_addresses != NULL); + VERIFY3P(mip->mi_addresses, !=, NULL); - map = mac_find_macaddr(mip, map->ma_addr); - - ASSERT(map != NULL); - ASSERT(map->ma_nusers == 0); + VERIFY3P(map, ==, mac_find_macaddr(mip, map->ma_addr)); + VERIFY3P(map, !=, NULL); + VERIFY3S(map->ma_nusers, ==, 0); + VERIFY3P(map->ma_vlans, ==, NULL); if (map == mip->mi_addresses) { mip->mi_addresses = map->ma_next; @@ -4941,85 +5010,201 @@ mac_free_macaddr(mac_address_t *map) kmem_free(map, sizeof (mac_address_t)); } +static mac_vlan_t * +mac_find_vlan(mac_address_t *map, uint16_t vid) +{ + mac_vlan_t *mvp; + + for (mvp = map->ma_vlans; mvp != NULL; mvp = mvp->mv_next) { + if (mvp->mv_vid == vid) + return (mvp); + } + + return (NULL); +} + +static mac_vlan_t * +mac_add_vlan(mac_address_t *map, uint16_t vid) +{ + mac_vlan_t *mvp; + + /* + * We should never add the same {addr, VID} tuple more + * than once, but let's be sure. + */ + for (mvp = map->ma_vlans; mvp != NULL; mvp = mvp->mv_next) + VERIFY3U(mvp->mv_vid, !=, vid); + + /* Add the VLAN to the head of the VLAN list. */ + mvp = kmem_zalloc(sizeof (mac_vlan_t), KM_SLEEP); + mvp->mv_vid = vid; + mvp->mv_next = map->ma_vlans; + map->ma_vlans = mvp; + + return (mvp); +} + +static void +mac_rem_vlan(mac_address_t *map, mac_vlan_t *mvp) +{ + mac_vlan_t *pre; + + if (map->ma_vlans == mvp) { + map->ma_vlans = mvp->mv_next; + } else { + pre = map->ma_vlans; + while (pre->mv_next != mvp) { + pre = pre->mv_next; + + /* + * We've reached the end of the list without + * finding mvp. + */ + VERIFY3P(pre, !=, NULL); + } + pre->mv_next = mvp->mv_next; + } + + kmem_free(mvp, sizeof (mac_vlan_t)); +} + /* - * Add a MAC address reference for a client. If the desired MAC address - * exists, add a reference to it. Otherwise, add the new address by adding - * it to a reserved group or setting promiscuous mode. Won't try different - * group is the group is non-NULL, so the caller must explictly share - * default group when needed. - * - * Note, the primary MAC address is initialized at registration time, so - * to add it to default group only need to activate it if its reference - * count is still zero. Also, some drivers may not have advertised RINGS - * capability. + * Create a new mac_address_t if this is the first use of the address + * or add a VID to an existing address. In either case, the + * mac_address_t acts as a list of {addr, VID} tuples where each tuple + * shares the same addr. If group is non-NULL then attempt to program + * the MAC's HW filters for this group. Otherwise, if group is NULL, + * then the MAC has no rings and there is nothing to program. */ int -mac_add_macaddr(mac_impl_t *mip, mac_group_t *group, uint8_t *mac_addr, - boolean_t use_hw) +mac_add_macaddr_vlan(mac_impl_t *mip, mac_group_t *group, uint8_t *addr, + uint16_t vid, boolean_t use_hw) { - mac_address_t *map; - int err = 0; - boolean_t allocated_map = B_FALSE; + mac_address_t *map; + mac_vlan_t *mvp; + int err = 0; + boolean_t allocated_map = B_FALSE; + boolean_t hw_mac = B_FALSE; + boolean_t hw_vlan = B_FALSE; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - map = mac_find_macaddr(mip, mac_addr); + map = mac_find_macaddr(mip, addr); /* - * If the new MAC address has not been added. Allocate a new one - * and set it up. + * If this is the first use of this MAC address then allocate + * and initialize a new structure. */ if (map == NULL) { map = kmem_zalloc(sizeof (mac_address_t), KM_SLEEP); map->ma_len = mip->mi_type->mt_addr_length; - bcopy(mac_addr, map->ma_addr, map->ma_len); + bcopy(addr, map->ma_addr, map->ma_len); map->ma_nusers = 0; map->ma_group = group; map->ma_mip = mip; + map->ma_untagged = B_FALSE; - /* add the new MAC address to the head of the address list */ + /* Add the new MAC address to the head of the address list. */ map->ma_next = mip->mi_addresses; mip->mi_addresses = map; allocated_map = B_TRUE; } - ASSERT(map->ma_group == NULL || map->ma_group == group); + VERIFY(map->ma_group == NULL || map->ma_group == group); if (map->ma_group == NULL) map->ma_group = group; + if (vid == VLAN_ID_NONE) { + map->ma_untagged = B_TRUE; + mvp = NULL; + } else { + mvp = mac_add_vlan(map, vid); + } + + /* + * Set the VLAN HW filter if: + * + * o the MAC's VLAN HW filtering is enabled, and + * o the address does not currently rely on promisc mode. + * + * This is called even when the client specifies an untagged + * address (VLAN_ID_NONE) because some MAC providers require + * setting additional bits to accept untagged traffic when + * VLAN HW filtering is enabled. + */ + if (MAC_GROUP_HW_VLAN(group) && + map->ma_type != MAC_ADDRESS_TYPE_UNICAST_PROMISC) { + if ((err = mac_group_addvlan(group, vid)) != 0) + goto bail; + + hw_vlan = B_TRUE; + } + + VERIFY3S(map->ma_nusers, >=, 0); + map->ma_nusers++; + /* - * If the MAC address is already in use, simply account for the - * new client. + * If this MAC address already has a HW filter then simply + * increment the counter. */ - if (map->ma_nusers++ > 0) + if (map->ma_nusers > 1) return (0); /* + * All logic from here on out is executed during initial + * creation only. + */ + VERIFY3S(map->ma_nusers, ==, 1); + + /* * Activate this MAC address by adding it to the reserved group. */ if (group != NULL) { - err = mac_group_addmac(group, (const uint8_t *)mac_addr); - if (err == 0) { - map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; - return (0); + err = mac_group_addmac(group, (const uint8_t *)addr); + + /* + * If the driver is out of filters then we can + * continue and use promisc mode. For any other error, + * assume the driver is in a state where we can't + * program the filters or use promisc mode; so we must + * bail. + */ + if (err != 0 && err != ENOSPC) { + map->ma_nusers--; + goto bail; } + + hw_mac = (err == 0); + } + + if (hw_mac) { + map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; + return (0); } /* * The MAC address addition failed. If the client requires a - * hardware classified MAC address, fail the operation. + * hardware classified MAC address, fail the operation. This + * feature is only used by sun4v vsw. */ - if (use_hw) { + if (use_hw && !hw_mac) { err = ENOSPC; + map->ma_nusers--; goto bail; } /* - * Try promiscuous mode. - * - * For drivers that don't advertise RINGS capability, do - * nothing for the primary address. + * If we reach this point then either the MAC doesn't have + * RINGS capability or we are out of MAC address HW filters. + * In any case we must put the MAC into promiscuous mode. + */ + VERIFY(group == NULL || !hw_mac); + + /* + * The one exception is the primary address. A non-RINGS + * driver filters the primary address by default; promisc mode + * is not needed. */ if ((group == NULL) && (bcmp(map->ma_addr, mip->mi_addr, map->ma_len) == 0)) { @@ -5028,8 +5213,11 @@ mac_add_macaddr(mac_impl_t *mip, mac_group_t *group, uint8_t *mac_addr, } /* - * Enable promiscuous mode in order to receive traffic - * to the new MAC address. + * Enable promiscuous mode in order to receive traffic to the + * new MAC address. All existing HW filters still send their + * traffic to their respective group/SRSes. But with promisc + * enabled all unknown traffic is delivered to the default + * group where it is SW classified via mac_rx_classify(). */ if ((err = i_mac_promisc_set(mip, B_TRUE)) == 0) { map->ma_type = MAC_ADDRESS_TYPE_UNICAST_PROMISC; @@ -5037,44 +5225,71 @@ mac_add_macaddr(mac_impl_t *mip, mac_group_t *group, uint8_t *mac_addr, } /* - * Free the MAC address that could not be added. Don't free - * a pre-existing address, it could have been the entry - * for the primary MAC address which was pre-allocated by - * mac_init_macaddr(), and which must remain on the list. + * We failed to set promisc mode and we are about to free 'map'. */ + map->ma_nusers = 0; + bail: - map->ma_nusers--; + if (hw_vlan) { + int err2 = mac_group_remvlan(group, vid); + + if (err2 != 0) { + cmn_err(CE_WARN, "Failed to remove VLAN %u from group" + " %d on MAC %s: %d.", vid, group->mrg_index, + mip->mi_name, err2); + } + } + + if (mvp != NULL) + mac_rem_vlan(map, mvp); + if (allocated_map) mac_free_macaddr(map); + return (err); } -/* - * Remove a reference to a MAC address. This may cause to remove the MAC - * address from an associated group or to turn off promiscuous mode. - * The caller needs to handle the failure properly. - */ int -mac_remove_macaddr(mac_address_t *map) +mac_remove_macaddr_vlan(mac_address_t *map, uint16_t vid) { - mac_impl_t *mip = map->ma_mip; - int err = 0; + mac_vlan_t *mvp; + mac_impl_t *mip = map->ma_mip; + mac_group_t *group = map->ma_group; + int err = 0; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); + VERIFY3P(map, ==, mac_find_macaddr(mip, map->ma_addr)); + + if (vid == VLAN_ID_NONE) { + map->ma_untagged = B_FALSE; + mvp = NULL; + } else { + mvp = mac_find_vlan(map, vid); + VERIFY3P(mvp, !=, NULL); + } - ASSERT(map == mac_find_macaddr(mip, map->ma_addr)); + if (MAC_GROUP_HW_VLAN(group) && + map->ma_type == MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED && + ((err = mac_group_remvlan(group, vid)) != 0)) + return (err); + + if (mvp != NULL) + mac_rem_vlan(map, mvp); /* * If it's not the last client using this MAC address, only update * the MAC clients count. */ - if (--map->ma_nusers > 0) + map->ma_nusers--; + if (map->ma_nusers > 0) return (0); + VERIFY3S(map->ma_nusers, ==, 0); + /* - * The MAC address is no longer used by any MAC client, so remove - * it from its associated group, or turn off promiscuous mode - * if it was enabled for the MAC address. + * The MAC address is no longer used by any MAC client, so + * remove it from its associated group. Turn off promiscuous + * mode if this is the last address relying on it. */ switch (map->ma_type) { case MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED: @@ -5082,22 +5297,60 @@ mac_remove_macaddr(mac_address_t *map) * Don't free the preset primary address for drivers that * don't advertise RINGS capability. */ - if (map->ma_group == NULL) + if (group == NULL) return (0); - err = mac_group_remmac(map->ma_group, map->ma_addr); - if (err == 0) - map->ma_group = NULL; + if ((err = mac_group_remmac(group, map->ma_addr)) != 0) { + if (vid == VLAN_ID_NONE) + map->ma_untagged = B_TRUE; + else + (void) mac_add_vlan(map, vid); + + /* + * If we fail to remove the MAC address HW + * filter but then also fail to re-add the + * VLAN HW filter then we are in a busted + * state. We do our best by logging a warning + * and returning the original 'err' that got + * us here. At this point, traffic for this + * address + VLAN combination will be dropped + * until the user reboots the system. In the + * future, it would be nice to have a system + * that can compare the state of expected + * classification according to mac to the + * actual state of the provider, and report + * and fix any inconsistencies. + */ + if (MAC_GROUP_HW_VLAN(group)) { + int err2; + + err2 = mac_group_addvlan(group, vid); + if (err2 != 0) { + cmn_err(CE_WARN, "Failed to readd VLAN" + " %u to group %d on MAC %s: %d.", + vid, group->mrg_index, mip->mi_name, + err2); + } + } + + map->ma_nusers = 1; + return (err); + } + + map->ma_group = NULL; break; case MAC_ADDRESS_TYPE_UNICAST_PROMISC: err = i_mac_promisc_set(mip, B_FALSE); break; default: - ASSERT(B_FALSE); + panic("Unexpected ma_type 0x%x, file: %s, line %d", + map->ma_type, __FILE__, __LINE__); } - if (err != 0) + if (err != 0) { + map->ma_nusers = 1; return (err); + } /* * We created MAC address for the primary one at registration, so we @@ -5250,8 +5503,9 @@ mac_fini_macaddr(mac_impl_t *mip) * If mi_addresses is initialized, there should be exactly one * entry left on the list with no users. */ - ASSERT(map->ma_nusers == 0); - ASSERT(map->ma_next == NULL); + VERIFY3S(map->ma_nusers, ==, 0); + VERIFY3P(map->ma_next, ==, NULL); + VERIFY3P(map->ma_vlans, ==, NULL); kmem_free(map, sizeof (mac_address_t)); mip->mi_addresses = NULL; @@ -5813,7 +6067,7 @@ mac_stop_logusage(mac_logtype_t type) mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_walker, &estate); (void) untimeout(mac_logging_timer); - mac_logging_timer = 0; + mac_logging_timer = NULL; /* Write log entries for each mac_impl in the list */ i_mac_log_info(&net_log_list, &lstate); @@ -5931,7 +6185,7 @@ mac_reserve_tx_ring(mac_impl_t *mip, mac_ring_t *desired_ring) } /* - * For a reserved group with multiple clients, return the primary client. + * For a non-default group with multiple clients, return the primary client. */ static mac_client_impl_t * mac_get_grp_primary(mac_group_t *grp) @@ -6290,13 +6544,12 @@ mac_group_add_client(mac_group_t *grp, mac_client_impl_t *mcip) break; } - VERIFY(mgcp == NULL); + ASSERT(mgcp == NULL); mgcp = kmem_zalloc(sizeof (mac_grp_client_t), KM_SLEEP); mgcp->mgc_client = mcip; mgcp->mgc_next = grp->mrg_clients; grp->mrg_clients = mgcp; - } void @@ -6317,8 +6570,27 @@ mac_group_remove_client(mac_group_t *grp, mac_client_impl_t *mcip) } /* - * mac_reserve_rx_group() - * + * Return true if any client on this group explicitly asked for HW + * rings (of type mask) or have a bound share. + */ +static boolean_t +i_mac_clients_hw(mac_group_t *grp, uint32_t mask) +{ + mac_grp_client_t *mgcip; + mac_client_impl_t *mcip; + mac_resource_props_t *mrp; + + for (mgcip = grp->mrg_clients; mgcip != NULL; mgcip = mgcip->mgc_next) { + mcip = mgcip->mgc_client; + mrp = MCIP_RESOURCE_PROPS(mcip); + if (mcip->mci_share != 0 || (mrp->mrp_mask & mask) != 0) + return (B_TRUE); + } + + return (B_FALSE); +} + +/* * Finds an available group and exclusively reserves it for a client. * The group is chosen to suit the flow's resource controls (bandwidth and * fanout requirements) and the address type. @@ -6341,7 +6613,6 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) int need_rings = 0; mac_group_t *candidate_grp = NULL; mac_client_impl_t *gclient; - mac_resource_props_t *gmrp; mac_group_t *donorgrp = NULL; boolean_t rxhw = mrp->mrp_mask & MRP_RX_RINGS; boolean_t unspec = mrp->mrp_mask & MRP_RXRINGS_UNSPEC; @@ -6352,18 +6623,20 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) isprimary = mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC; /* - * Check if a group already has this mac address (case of VLANs) + * Check if a group already has this MAC address (case of VLANs) * unless we are moving this MAC client from one group to another. */ if (!move && (map = mac_find_macaddr(mip, mac_addr)) != NULL) { if (map->ma_group != NULL) return (map->ma_group); } + if (mip->mi_rx_groups == NULL || mip->mi_rx_group_count == 0) return (NULL); + /* - * If exclusive open, return NULL which will enable the - * caller to use the default group. + * If this client is requesting exclusive MAC access then + * return NULL to ensure the client uses the default group. */ if (mcip->mci_state_flags & MCIS_EXCLUSIVE) return (NULL); @@ -6373,6 +6646,7 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { mrp->mrp_nrxrings = 1; } + /* * For static grouping we allow only specifying rings=0 and * unspecified @@ -6381,6 +6655,7 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) mip->mi_rx_group_type == MAC_GROUP_TYPE_STATIC) { return (NULL); } + if (rxhw) { /* * We have explicitly asked for a group (with nrxrings, @@ -6442,25 +6717,19 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) * that didn't ask for an exclusive group, but got * one and it has enough rings (combined with what * the donor group can donate) for the new MAC - * client + * client. */ if (grp->mrg_state >= MAC_GROUP_STATE_RESERVED) { /* - * If the primary/donor group is not the default - * group, don't bother looking for a candidate group. - * If we don't have enough rings we will check - * if the primary group can be vacated. + * If the donor group is not the default + * group, don't bother looking for a candidate + * group. If we don't have enough rings we + * will check if the primary group can be + * vacated. */ if (candidate_grp == NULL && donorgrp == MAC_DEFAULT_RX_GROUP(mip)) { - ASSERT(!MAC_GROUP_NO_CLIENT(grp)); - gclient = MAC_GROUP_ONLY_CLIENT(grp); - if (gclient == NULL) - gclient = mac_get_grp_primary(grp); - ASSERT(gclient != NULL); - gmrp = MCIP_RESOURCE_PROPS(gclient); - if (gclient->mci_share == 0 && - (gmrp->mrp_mask & MRP_RX_RINGS) == 0 && + if (!i_mac_clients_hw(grp, MRP_RX_RINGS) && (unspec || (grp->mrg_cur_count + donor_grp_rcnt >= need_rings))) { @@ -6526,6 +6795,7 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) */ mac_stop_group(grp); } + /* We didn't find an exclusive group for this MAC client */ if (i >= mip->mi_rx_group_count) { @@ -6533,12 +6803,12 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) return (NULL); /* - * If we found a candidate group then we switch the - * MAC client from the candidate_group to the default - * group and give the group to this MAC client. If - * we didn't find a candidate_group, check if the - * primary is in its own group and if it can make way - * for this MAC client. + * If we found a candidate group then move the + * existing MAC client from the candidate_group to the + * default group and give the candidate_group to the + * new MAC client. If we didn't find a candidate + * group, then check if the primary is in its own + * group and if it can make way for this MAC client. */ if (candidate_grp == NULL && donorgrp != MAC_DEFAULT_RX_GROUP(mip) && @@ -6549,15 +6819,15 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) boolean_t prim_grp = B_FALSE; /* - * Switch the MAC client from the candidate group - * to the default group.. If this group was the - * donor group, then after the switch we need - * to update the donor group too. + * Switch the existing MAC client from the + * candidate group to the default group. If + * the candidate group is the donor group, + * then after the switch we need to update the + * donor group too. */ grp = candidate_grp; - gclient = MAC_GROUP_ONLY_CLIENT(grp); - if (gclient == NULL) - gclient = mac_get_grp_primary(grp); + gclient = grp->mrg_clients->mgc_client; + VERIFY3P(gclient, !=, NULL); if (grp == mip->mi_rx_donor_grp) prim_grp = B_TRUE; if (mac_rx_switch_group(gclient, grp, @@ -6570,7 +6840,6 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) donorgrp = MAC_DEFAULT_RX_GROUP(mip); } - /* * Now give this group with the required rings * to this MAC client. @@ -6618,10 +6887,10 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) /* * mac_rx_release_group() * - * This is called when there are no clients left for the group. - * The group is stopped and marked MAC_GROUP_STATE_REGISTERED, - * and if it is a non default group, the shares are removed and - * all rings are assigned back to default group. + * Release the group when it has no remaining clients. The group is + * stopped and its shares are removed and all rings are assigned back + * to default group. This should never be called against the default + * group. */ void mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) @@ -6630,6 +6899,7 @@ mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) mac_ring_t *ring; ASSERT(group != MAC_DEFAULT_RX_GROUP(mip)); + ASSERT(MAC_GROUP_NO_CLIENT(group) == B_TRUE); if (mip->mi_rx_donor_grp == group) mip->mi_rx_donor_grp = MAC_DEFAULT_RX_GROUP(mip); @@ -6681,56 +6951,7 @@ mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) } /* - * When we move the primary's mac address between groups, we need to also - * take all the clients sharing the same mac address along with it (VLANs) - * We remove the mac address for such clients from the group after quiescing - * them. When we add the mac address we restart the client. Note that - * the primary's mac address is removed from the group after all the - * other clients sharing the address are removed. Similarly, the primary's - * mac address is added before all the other client's mac address are - * added. While grp is the group where the clients reside, tgrp is - * the group where the addresses have to be added. - */ -static void -mac_rx_move_macaddr_prim(mac_client_impl_t *mcip, mac_group_t *grp, - mac_group_t *tgrp, uint8_t *maddr, boolean_t add) -{ - mac_impl_t *mip = mcip->mci_mip; - mac_grp_client_t *mgcp = grp->mrg_clients; - mac_client_impl_t *gmcip; - boolean_t prim; - - prim = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0; - - /* - * If the clients are in a non-default group, we just have to - * walk the group's client list. If it is in the default group - * (which will be shared by other clients as well, we need to - * check if the unicast address matches mcip's unicast. - */ - while (mgcp != NULL) { - gmcip = mgcp->mgc_client; - if (gmcip != mcip && - (grp != MAC_DEFAULT_RX_GROUP(mip) || - mcip->mci_unicast == gmcip->mci_unicast)) { - if (!add) { - mac_rx_client_quiesce( - (mac_client_handle_t)gmcip); - (void) mac_remove_macaddr(mcip->mci_unicast); - } else { - (void) mac_add_macaddr(mip, tgrp, maddr, prim); - mac_rx_client_restart( - (mac_client_handle_t)gmcip); - } - } - mgcp = mgcp->mgc_next; - } -} - - -/* - * Move the MAC address from fgrp to tgrp. If this is the primary client, - * we need to take any VLANs etc. together too. + * Move the MAC address from fgrp to tgrp. */ static int mac_rx_move_macaddr(mac_client_impl_t *mcip, mac_group_t *fgrp, @@ -6739,56 +6960,86 @@ mac_rx_move_macaddr(mac_client_impl_t *mcip, mac_group_t *fgrp, mac_impl_t *mip = mcip->mci_mip; uint8_t maddr[MAXMACADDRLEN]; int err = 0; - boolean_t prim; - boolean_t multiclnt = B_FALSE; + uint16_t vid; + mac_unicast_impl_t *muip; + boolean_t use_hw; mac_rx_client_quiesce((mac_client_handle_t)mcip); - ASSERT(mcip->mci_unicast != NULL); + VERIFY3P(mcip->mci_unicast, !=, NULL); bcopy(mcip->mci_unicast->ma_addr, maddr, mcip->mci_unicast->ma_len); - prim = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0; - if (mcip->mci_unicast->ma_nusers > 1) { - mac_rx_move_macaddr_prim(mcip, fgrp, NULL, maddr, B_FALSE); - multiclnt = B_TRUE; - } - ASSERT(mcip->mci_unicast->ma_nusers == 1); - err = mac_remove_macaddr(mcip->mci_unicast); + /* + * Does the client require MAC address hardware classifiction? + */ + use_hw = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0; + vid = i_mac_flow_vid(mcip->mci_flent); + + /* + * You can never move an address that is shared by multiple + * clients. mac_datapath_setup() ensures that clients sharing + * an address are placed on the default group. This guarantees + * that a non-default group will only ever have one client and + * thus make full use of HW filters. + */ + if (mac_check_macaddr_shared(mcip->mci_unicast)) + return (EINVAL); + + err = mac_remove_macaddr_vlan(mcip->mci_unicast, vid); + if (err != 0) { mac_rx_client_restart((mac_client_handle_t)mcip); - if (multiclnt) { - mac_rx_move_macaddr_prim(mcip, fgrp, fgrp, maddr, - B_TRUE); - } return (err); } + /* - * Program the H/W Classifier first, if this fails we need - * not proceed with the other stuff. + * If this isn't the primary MAC address then the + * mac_address_t has been freed by the last call to + * mac_remove_macaddr_vlan(). In any case, NULL the reference + * to avoid a dangling pointer. */ - if ((err = mac_add_macaddr(mip, tgrp, maddr, prim)) != 0) { + mcip->mci_unicast = NULL; + + /* + * We also have to NULL all the mui_map references -- sun4v + * strikes again! + */ + rw_enter(&mcip->mci_rw_lock, RW_WRITER); + for (muip = mcip->mci_unicast_list; muip != NULL; muip = muip->mui_next) + muip->mui_map = NULL; + rw_exit(&mcip->mci_rw_lock); + + /* + * Program the H/W Classifier first, if this fails we need not + * proceed with the other stuff. + */ + if ((err = mac_add_macaddr_vlan(mip, tgrp, maddr, vid, use_hw)) != 0) { + int err2; + /* Revert back the H/W Classifier */ - if ((err = mac_add_macaddr(mip, fgrp, maddr, prim)) != 0) { - /* - * This should not fail now since it worked earlier, - * should we panic? - */ - cmn_err(CE_WARN, - "mac_rx_switch_group: switching %p back" - " to group %p failed!!", (void *)mcip, - (void *)fgrp); + err2 = mac_add_macaddr_vlan(mip, fgrp, maddr, vid, use_hw); + + if (err2 != 0) { + cmn_err(CE_WARN, "Failed to revert HW classification" + " on MAC %s, for client %s: %d.", mip->mi_name, + mcip->mci_name, err2); } + mac_rx_client_restart((mac_client_handle_t)mcip); - if (multiclnt) { - mac_rx_move_macaddr_prim(mcip, fgrp, fgrp, maddr, - B_TRUE); - } return (err); } + + /* + * Get a reference to the new mac_address_t and update the + * client's reference. Then restart the client and add the + * other clients of this MAC addr (if they exsit). + */ mcip->mci_unicast = mac_find_macaddr(mip, maddr); + rw_enter(&mcip->mci_rw_lock, RW_WRITER); + for (muip = mcip->mci_unicast_list; muip != NULL; muip = muip->mui_next) + muip->mui_map = mcip->mci_unicast; + rw_exit(&mcip->mci_rw_lock); mac_rx_client_restart((mac_client_handle_t)mcip); - if (multiclnt) - mac_rx_move_macaddr_prim(mcip, fgrp, tgrp, maddr, B_TRUE); - return (err); + return (0); } /* @@ -6809,19 +7060,34 @@ mac_rx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, mac_impl_t *mip = mcip->mci_mip; mac_grp_client_t *mgcp; - ASSERT(fgrp == mcip->mci_flent->fe_rx_ring_group); + VERIFY3P(fgrp, ==, mcip->mci_flent->fe_rx_ring_group); if ((err = mac_rx_move_macaddr(mcip, fgrp, tgrp)) != 0) return (err); /* - * The group might be reserved, but SRSs may not be set up, e.g. - * primary and its vlans using a reserved group. + * If the group is marked as reserved and in use by a single + * client, then there is an SRS to teardown. */ if (fgrp->mrg_state == MAC_GROUP_STATE_RESERVED && MAC_GROUP_ONLY_CLIENT(fgrp) != NULL) { mac_rx_srs_group_teardown(mcip->mci_flent, B_TRUE); } + + /* + * If we are moving the client from a non-default group, then + * we know that any additional clients on this group share the + * same MAC address. Since we moved the MAC address filter, we + * need to move these clients too. + * + * If we are moving the client from the default group and its + * MAC address has VLAN clients, then we must move those + * clients as well. + * + * In both cases the idea is the same: we moved the MAC + * address filter to the tgrp, so we must move all clients + * using that MAC address to tgrp as well. + */ if (fgrp != MAC_DEFAULT_RX_GROUP(mip)) { mgcp = fgrp->mrg_clients; while (mgcp != NULL) { @@ -6832,20 +7098,21 @@ mac_rx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, gmcip->mci_flent->fe_rx_ring_group = tgrp; } mac_release_rx_group(mcip, fgrp); - ASSERT(MAC_GROUP_NO_CLIENT(fgrp)); + VERIFY3B(MAC_GROUP_NO_CLIENT(fgrp), ==, B_TRUE); mac_set_group_state(fgrp, MAC_GROUP_STATE_REGISTERED); } else { mac_group_remove_client(fgrp, mcip); mac_group_add_client(tgrp, mcip); mcip->mci_flent->fe_rx_ring_group = tgrp; + /* * If there are other clients (VLANs) sharing this address - * we should be here only for the primary. + * then move them too. */ - if (mcip->mci_unicast->ma_nusers > 1) { + if (mac_check_macaddr_shared(mcip->mci_unicast)) { /* * We need to move all the clients that are using - * this h/w address. + * this MAC address. */ mgcp = fgrp->mrg_clients; while (mgcp != NULL) { @@ -6859,20 +7126,24 @@ mac_rx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, } } } + /* - * The default group will still take the multicast, - * broadcast traffic etc., so it won't go to + * The default group still handles multicast and + * broadcast traffic; it won't transition to * MAC_GROUP_STATE_REGISTERED. */ if (fgrp->mrg_state == MAC_GROUP_STATE_RESERVED) mac_rx_group_unmark(fgrp, MR_CONDEMNED); mac_set_group_state(fgrp, MAC_GROUP_STATE_SHARED); } + next_state = mac_group_next_state(tgrp, &group_only_mcip, MAC_DEFAULT_RX_GROUP(mip), B_TRUE); mac_set_group_state(tgrp, next_state); + /* - * If the destination group is reserved, setup the SRSs etc. + * If the destination group is reserved, then setup the SRSes. + * Otherwise make sure to use SW classification. */ if (tgrp->mrg_state == MAC_GROUP_STATE_RESERVED) { mac_rx_srs_group_setup(mcip, mcip->mci_flent, SRST_LINK); @@ -6883,6 +7154,7 @@ mac_rx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, } else { mac_rx_switch_grp_to_sw(tgrp); } + return (0); } @@ -6913,6 +7185,7 @@ mac_reserve_tx_group(mac_client_impl_t *mcip, boolean_t move) boolean_t isprimary; isprimary = mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC; + /* * When we come here for a VLAN on the primary (dladm create-vlan), * we need to pair it along with the primary (to keep it consistent @@ -6994,8 +7267,7 @@ mac_reserve_tx_group(mac_client_impl_t *mcip, boolean_t move) if (grp->mrg_state == MAC_GROUP_STATE_RESERVED && candidate_grp == NULL) { gclient = MAC_GROUP_ONLY_CLIENT(grp); - if (gclient == NULL) - gclient = mac_get_grp_primary(grp); + VERIFY3P(gclient, !=, NULL); gmrp = MCIP_RESOURCE_PROPS(gclient); if (gclient->mci_share == 0 && (gmrp->mrp_mask & MRP_TX_RINGS) == 0 && @@ -7032,13 +7304,14 @@ mac_reserve_tx_group(mac_client_impl_t *mcip, boolean_t move) */ if (need_exclgrp && candidate_grp != NULL) { /* - * Switch the MAC client from the candidate group - * to the default group. + * Switch the MAC client from the candidate + * group to the default group. We know the + * candidate_grp came from a reserved group + * and thus only has one client. */ grp = candidate_grp; gclient = MAC_GROUP_ONLY_CLIENT(grp); - if (gclient == NULL) - gclient = mac_get_grp_primary(grp); + VERIFY3P(gclient, !=, NULL); mac_tx_client_quiesce((mac_client_handle_t)gclient); mac_tx_switch_group(gclient, grp, defgrp); mac_tx_client_restart((mac_client_handle_t)gclient); @@ -7206,7 +7479,7 @@ mac_tx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, */ mac_group_remove_client(fgrp, mcip); mac_tx_dismantle_soft_rings(fgrp, flent); - if (mcip->mci_unicast->ma_nusers > 1) { + if (mac_check_macaddr_shared(mcip->mci_unicast)) { mgcp = fgrp->mrg_clients; while (mgcp != NULL) { gmcip = mgcp->mgc_client; @@ -7452,7 +7725,7 @@ mac_no_active(mac_handle_t mh) * changes and update the mac_resource_props_t for the VLAN's client. * We need to do this since we don't support setting these properties * on the primary's VLAN clients, but the VLAN clients have to - * follow the primary w.r.t the rings property; + * follow the primary w.r.t the rings property. */ void mac_set_prim_vlan_rings(mac_impl_t *mip, mac_resource_props_t *mrp) @@ -7601,13 +7874,10 @@ mac_group_ring_modify(mac_client_impl_t *mcip, mac_group_t *group, MAC_GROUP_STATE_RESERVED) { continue; } - mcip = MAC_GROUP_ONLY_CLIENT(tgrp); - if (mcip == NULL) - mcip = mac_get_grp_primary(tgrp); - ASSERT(mcip != NULL); - mrp = MCIP_RESOURCE_PROPS(mcip); - if ((mrp->mrp_mask & MRP_RX_RINGS) != 0) + if (i_mac_clients_hw(tgrp, MRP_RX_RINGS)) continue; + mcip = tgrp->mrg_clients->mgc_client; + VERIFY3P(mcip, !=, NULL); if ((tgrp->mrg_cur_count + defgrp->mrg_cur_count) < (modify + 1)) { continue; @@ -7622,12 +7892,10 @@ mac_group_ring_modify(mac_client_impl_t *mcip, mac_group_t *group, MAC_GROUP_STATE_RESERVED) { continue; } - mcip = MAC_GROUP_ONLY_CLIENT(tgrp); - if (mcip == NULL) - mcip = mac_get_grp_primary(tgrp); - mrp = MCIP_RESOURCE_PROPS(mcip); - if ((mrp->mrp_mask & MRP_TX_RINGS) != 0) + if (i_mac_clients_hw(tgrp, MRP_TX_RINGS)) continue; + mcip = tgrp->mrg_clients->mgc_client; + VERIFY3P(mcip, !=, NULL); if ((tgrp->mrg_cur_count + defgrp->mrg_cur_count) < (modify + 1)) { continue; @@ -7897,10 +8165,10 @@ mac_pool_event_cb(pool_event_t what, poolid_t id, void *arg) * Set effective rings property. This could be called from datapath_setup/ * datapath_teardown or set-linkprop. * If the group is reserved we just go ahead and set the effective rings. - * Additionally, for TX this could mean the default group has lost/gained + * Additionally, for TX this could mean the default group has lost/gained * some rings, so if the default group is reserved, we need to adjust the * effective rings for the default group clients. For RX, if we are working - * with the non-default group, we just need * to reset the effective props + * with the non-default group, we just need to reset the effective props * for the default group clients. */ void @@ -8030,6 +8298,7 @@ mac_check_primary_relocation(mac_client_impl_t *mcip, boolean_t rxhw) * the first non-primary. */ ASSERT(mip->mi_nactiveclients == 2); + /* * OK, now we have the primary that needs to be relocated. */ diff --git a/usr/src/uts/common/io/mac/mac_client.c b/usr/src/uts/common/io/mac/mac_client.c index 66bba78e91..b918bf4aca 100644 --- a/usr/src/uts/common/io/mac/mac_client.c +++ b/usr/src/uts/common/io/mac/mac_client.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Joyent, Inc. All rights reserved. + * Copyright 2018 Joyent, Inc. * Copyright 2017 RackTop Systems. */ @@ -865,9 +865,12 @@ mac_unicast_update_client_flow(mac_client_impl_t *mcip) mac_protect_update_mac_token(mcip); /* - * A MAC client could have one MAC address but multiple - * VLANs. In that case update the flow entries corresponding - * to all VLANs of the MAC client. + * When there are multiple VLANs sharing the same MAC address, + * each gets its own MAC client, except when running on sun4v + * vsw. In that case the mci_flent_list is used to place + * multiple VLAN flows on one MAC client. If we ever get rid + * of vsw then this code can go, but until then we need to + * update all flow entries. */ for (flent = mcip->mci_flent_list; flent != NULL; flent = flent->fe_client_next) { @@ -1025,7 +1028,7 @@ mac_unicast_primary_set(mac_handle_t mh, const uint8_t *addr) return (0); } - if (mac_find_macaddr(mip, (uint8_t *)addr) != 0) { + if (mac_find_macaddr(mip, (uint8_t *)addr) != NULL) { i_mac_perim_exit(mip); return (EBUSY); } @@ -1040,9 +1043,9 @@ mac_unicast_primary_set(mac_handle_t mh, const uint8_t *addr) mac_capab_aggr_t aggr_cap; /* - * If the mac is an aggregation, other than the unicast + * If the MAC is an aggregation, other than the unicast * addresses programming, aggr must be informed about this - * primary unicst address change to change its mac address + * primary unicst address change to change its MAC address * policy to be user-specified. */ ASSERT(map->ma_type == MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED); @@ -1374,7 +1377,7 @@ mac_client_open(mac_handle_t mh, mac_client_handle_t *mchp, char *name, mcip->mci_state_flags |= MCIS_IS_AGGR_PORT; if (mip->mi_state_flags & MIS_IS_AGGR) - mcip->mci_state_flags |= MCIS_IS_AGGR; + mcip->mci_state_flags |= MCIS_IS_AGGR_CLIENT; if ((flags & MAC_OPEN_FLAGS_USE_DATALINK_NAME) != 0) { datalink_id_t linkid; @@ -1539,7 +1542,8 @@ mac_client_close(mac_client_handle_t mch, uint16_t flags) } /* - * Set the rx bypass receive callback. + * Set the Rx bypass receive callback and return B_TRUE. Return + * B_FALSE if it's not possible to enable bypass. */ boolean_t mac_rx_bypass_set(mac_client_handle_t mch, mac_direct_rx_t rx_fn, void *arg1) @@ -1550,11 +1554,11 @@ mac_rx_bypass_set(mac_client_handle_t mch, mac_direct_rx_t rx_fn, void *arg1) ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); /* - * If the mac_client is a VLAN, we should not do DLS bypass and - * instead let the packets come up via mac_rx_deliver so the vlan - * header can be stripped. + * If the client has more than one VLAN then process packets + * through DLS. This should happen only when sun4v vsw is on + * the scene. */ - if (mcip->mci_nvids > 0) + if (mcip->mci_nvids > 1) return (B_FALSE); /* @@ -1608,8 +1612,8 @@ mac_rx_set(mac_client_handle_t mch, mac_rx_t rx_fn, void *arg) i_mac_perim_exit(mip); /* - * If we're changing the rx function on the primary mac of a vnic, - * make sure any secondary macs on the vnic are updated as well. + * If we're changing the Rx function on the primary MAC of a VNIC, + * make sure any secondary addresses on the VNIC are updated as well. */ if (umip != NULL) { ASSERT((umip->mi_state_flags & MIS_IS_VNIC) != 0); @@ -1787,6 +1791,14 @@ mac_client_set_rings_prop(mac_client_impl_t *mcip, mac_resource_props_t *mrp, } /* Let check if we can give this an excl group */ } else if (group == defgrp) { + /* + * If multiple clients share an + * address then they must stay on the + * default group. + */ + if (mac_check_macaddr_shared(mcip->mci_unicast)) + return (0); + ngrp = mac_reserve_rx_group(mcip, mac_addr, B_TRUE); /* Couldn't give it a group, that's fine */ @@ -1809,6 +1821,16 @@ mac_client_set_rings_prop(mac_client_impl_t *mcip, mac_resource_props_t *mrp, } if (group == defgrp && ((mrp->mrp_nrxrings > 0) || unspec)) { + /* + * We are requesting Rx rings. Try to reserve + * a non-default group. + * + * If multiple clients share an address then + * they must stay on the default group. + */ + if (mac_check_macaddr_shared(mcip->mci_unicast)) + return (EINVAL); + ngrp = mac_reserve_rx_group(mcip, mac_addr, B_TRUE); if (ngrp == NULL) return (ENOSPC); @@ -2166,10 +2188,10 @@ mac_unicast_flow_create(mac_client_impl_t *mcip, uint8_t *mac_addr, flent_flags = FLOW_VNIC_MAC; /* - * For the first flow we use the mac client's name - mci_name, for - * subsequent ones we just create a name with the vid. This is + * For the first flow we use the MAC client's name - mci_name, for + * subsequent ones we just create a name with the VID. This is * so that we can add these flows to the same flow table. This is - * fine as the flow name (except for the one with the mac client's + * fine as the flow name (except for the one with the MAC client's * name) is not visible. When the first flow is removed, we just replace * its fdesc with another from the list, so we will still retain the * flent with the MAC client's flow name. @@ -2327,6 +2349,7 @@ mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid, * The unicast MAC address must have been added successfully. */ ASSERT(mcip->mci_unicast != NULL); + /* * Push down the sub-flows that were defined on this link * hitherto. The flows are added to the active flow table @@ -2338,15 +2361,23 @@ mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid, ASSERT(!no_unicast); /* - * A unicast flow already exists for that MAC client, - * this flow must be the same mac address but with - * different VID. It has been checked by mac_addr_in_use(). + * A unicast flow already exists for that MAC client + * so this flow must be the same MAC address but with + * a different VID. It has been checked by + * mac_addr_in_use(). + * + * We will use the SRS etc. from the initial + * mci_flent. We don't need to create a kstat for + * this, as except for the fdesc, everything will be + * used from the first flent. * - * We will use the SRS etc. from the mci_flent. Note that - * We don't need to create kstat for this as except for - * the fdesc, everything will be used from in the 1st flent. + * The only time we should see multiple flents on the + * same MAC client is on the sun4v vsw. If we removed + * that code we should be able to remove the entire + * notion of multiple flents on a MAC client (this + * doesn't affect sub/user flows because they have + * their own list unrelated to mci_flent_list). */ - if (bcmp(mac_addr, map->ma_addr, map->ma_len) != 0) { err = EINVAL; goto bail; @@ -2475,8 +2506,12 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, boolean_t is_vnic_primary = (flags & MAC_UNICAST_VNIC_PRIMARY); - /* when VID is non-zero, the underlying MAC can not be VNIC */ - ASSERT(!((mip->mi_state_flags & MIS_IS_VNIC) && (vid != 0))); + /* + * When the VID is non-zero the underlying MAC cannot be a + * VNIC. I.e., dladm create-vlan cannot take a VNIC as + * argument, only the primary MAC client. + */ + ASSERT(!((mip->mi_state_flags & MIS_IS_VNIC) && (vid != VLAN_ID_NONE))); /* * Can't unicast add if the client asked only for minimal datapath @@ -2489,18 +2524,19 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, * Check for an attempted use of the current Port VLAN ID, if enabled. * No client may use it. */ - if (mip->mi_pvid != 0 && vid == mip->mi_pvid) + if (mip->mi_pvid != VLAN_ID_NONE && vid == mip->mi_pvid) return (EBUSY); /* * Check whether it's the primary client and flag it. */ - if (!(mcip->mci_state_flags & MCIS_IS_VNIC) && is_primary && vid == 0) + if (!(mcip->mci_state_flags & MCIS_IS_VNIC) && is_primary && + vid == VLAN_ID_NONE) mcip->mci_flags |= MAC_CLIENT_FLAGS_PRIMARY; /* * is_vnic_primary is true when we come here as a VLAN VNIC - * which uses the primary mac client's address but with a non-zero + * which uses the primary MAC client's address but with a non-zero * VID. In this case the MAC address is not specified by an upper * MAC client. */ @@ -2552,7 +2588,7 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, /* * Create a handle for vid 0. */ - ASSERT(vid == 0); + ASSERT(vid == VLAN_ID_NONE); muip = kmem_zalloc(sizeof (mac_unicast_impl_t), KM_SLEEP); muip->mui_vid = vid; *mah = (mac_unicast_handle_t)muip; @@ -2572,7 +2608,9 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, } /* - * If this is a VNIC/VLAN, disable softmac fast-path. + * If this is a VNIC/VLAN, disable softmac fast-path. This is + * only relevant to legacy devices which use softmac to + * interface with GLDv3. */ if (mcip->mci_state_flags & MCIS_IS_VNIC) { err = mac_fastpath_disable((mac_handle_t)mip); @@ -2620,9 +2658,11 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, (void) mac_client_set_resources(mch, mrp); } else if (mcip->mci_state_flags & MCIS_IS_VNIC) { /* - * This is a primary VLAN client, we don't support - * specifying rings property for this as it inherits the - * rings property from its MAC. + * This is a VLAN client sharing the address of the + * primary MAC client; i.e., one created via dladm + * create-vlan. We don't support specifying ring + * properties for this type of client as it inherits + * these from the primary MAC client. */ if (is_vnic_primary) { mac_resource_props_t *vmrp; @@ -2681,7 +2721,7 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, /* * Set the flags here so that if this is a passive client, we - * can return and set it when we call mac_client_datapath_setup + * can return and set it when we call mac_client_datapath_setup * when this becomes the active client. If we defer to using these * flags to mac_client_datapath_setup, then for a passive client, * we'd have to store the flags somewhere (probably fe_flags) @@ -2984,14 +3024,14 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) i_mac_perim_enter(mip); if (mcip->mci_flags & MAC_CLIENT_FLAGS_VNIC_PRIMARY) { /* - * Called made by the upper MAC client of a VNIC. + * Call made by the upper MAC client of a VNIC. * There's nothing much to do, the unicast address will * be removed by the VNIC driver when the VNIC is deleted, * but let's ensure that all our transmit is done before * the client does a mac_client_stop lest it trigger an * assert in the driver. */ - ASSERT(muip->mui_vid == 0); + ASSERT(muip->mui_vid == VLAN_ID_NONE); mac_tx_client_flush(mcip); @@ -3055,6 +3095,7 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) i_mac_perim_exit(mip); return (0); } + /* * Remove the VID from the list of client's VIDs. */ @@ -3081,7 +3122,7 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) * flows. */ flent = mac_client_get_flow(mcip, muip); - ASSERT(flent != NULL); + VERIFY3P(flent, !=, NULL); /* * The first one is disappearing, need to make sure @@ -3109,6 +3150,7 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) FLOW_FINAL_REFRELE(flent); ASSERT(!(mcip->mci_state_flags & MCIS_EXCLUSIVE)); + /* * Enable fastpath if this is a VNIC or a VLAN. */ @@ -3122,7 +3164,8 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) mui_vid = muip->mui_vid; mac_client_datapath_teardown(mch, muip, flent); - if ((mcip->mci_flags & MAC_CLIENT_FLAGS_PRIMARY) && mui_vid == 0) { + if ((mcip->mci_flags & MAC_CLIENT_FLAGS_PRIMARY) && + mui_vid == VLAN_ID_NONE) { mcip->mci_flags &= ~MAC_CLIENT_FLAGS_PRIMARY; } else { i_mac_perim_exit(mip); diff --git a/usr/src/uts/common/io/mac/mac_datapath_setup.c b/usr/src/uts/common/io/mac/mac_datapath_setup.c index 0355b544d5..a3fc2529b9 100644 --- a/usr/src/uts/common/io/mac/mac_datapath_setup.c +++ b/usr/src/uts/common/io/mac/mac_datapath_setup.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2017, Joyent, Inc. + * Copyright 2018 Joyent, Inc. */ #include <sys/types.h> @@ -1186,7 +1186,7 @@ mac_srs_fanout_list_alloc(mac_soft_ring_set_t *mac_srs) mac_srs->srs_tx_soft_rings = (mac_soft_ring_t **) kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_RINGS_PER_GROUP, KM_SLEEP); - if (mcip->mci_state_flags & MCIS_IS_AGGR) { + if (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) { mac_srs_tx_t *tx = &mac_srs->srs_tx; tx->st_soft_rings = (mac_soft_ring_t **) @@ -1595,13 +1595,13 @@ mac_srs_update_bwlimit(flow_entry_t *flent, mac_resource_props_t *mrp) /* * When the first sub-flow is added to a link, we disable polling on the - * link and also modify the entry point to mac_rx_srs_subflow_process. + * link and also modify the entry point to mac_rx_srs_subflow_process(). * (polling is disabled because with the subflow added, accounting * for polling needs additional logic, it is assumed that when a subflow is * added, we can take some hit as a result of disabling polling rather than * adding more complexity - if this becomes a perf. issue we need to * re-rvaluate this logic). When the last subflow is removed, we turn back - * polling and also reset the entry point to mac_rx_srs_process. + * polling and also reset the entry point to mac_rx_srs_process(). * * In the future if there are multiple SRS, we can simply * take one and give it to the flow rather than disabling polling and @@ -1646,7 +1646,7 @@ mac_client_update_classifier(mac_client_impl_t *mcip, boolean_t enable) * Change the S/W classifier so that we can land in the * correct processing function with correct argument. * If all subflows have been removed we can revert to - * mac_rx_srsprocess, else we need mac_rx_srs_subflow_process. + * mac_rx_srs_process(), else we need mac_rx_srs_subflow_process(). */ mutex_enter(&flent->fe_lock); flent->fe_cb_fn = (flow_fn_t)rx_func; @@ -2185,7 +2185,7 @@ mac_srs_create(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t srs_type, * find nothing plus we have an existing backlog * (sr_poll_pkt_cnt > 0), we stay in polling mode but don't poll * the H/W for packets anymore (let the polling thread go to sleep). - * 5) Once the backlog is relived (packets are processed) we reenable + * 5) Once the backlog is relieved (packets are processed) we reenable * polling (by signalling the poll thread) only when the backlog * dips below sr_poll_thres. * 6) sr_hiwat is used exclusively when we are not polling capable @@ -2256,8 +2256,8 @@ mac_srs_create(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t srs_type, /* * Some drivers require serialization and don't send * packet chains in interrupt context. For such - * drivers, we should always queue in soft ring - * so that we get a chance to switch into a polling + * drivers, we should always queue in the soft ring + * so that we get a chance to switch into polling * mode under backlog. */ ring_info = mac_hwring_getinfo((mac_ring_handle_t)ring); @@ -2364,9 +2364,11 @@ mac_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, } /* - * Set up the RX SRSs. If the S/W SRS is not set, set it up, if there - * is a group associated with this MAC client, set up SRSs for individual - * h/w rings. + * Set up the Rx SRSes. If there is no group associated with the + * client, then only setup SW classification. If the client has + * exlusive (MAC_GROUP_STATE_RESERVED) use of the group, then create an + * SRS for each HW ring. If the client is sharing a group, then make + * sure to teardown the HW SRSes. */ void mac_rx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, @@ -2377,13 +2379,14 @@ mac_rx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, mac_ring_t *ring; uint32_t fanout_type; mac_group_t *rx_group = flent->fe_rx_ring_group; + boolean_t no_unicast; fanout_type = mac_find_fanout(flent, link_type); + no_unicast = (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR) != 0; - /* Create the SRS for S/W classification if none exists */ + /* Create the SRS for SW classification if none exists */ if (flent->fe_rx_srs[0] == NULL) { ASSERT(flent->fe_rx_srs_cnt == 0); - /* Setup the Rx SRS */ mac_srs = mac_srs_create(mcip, flent, fanout_type | link_type, mac_rx_deliver, mcip, NULL, NULL); mutex_enter(&flent->fe_lock); @@ -2395,15 +2398,17 @@ mac_rx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, if (rx_group == NULL) return; + /* - * fanout for default SRS is done when default SRS are created - * above. As each ring is added to the group, we setup the - * SRS and fanout to it. + * If the group is marked RESERVED then setup an SRS and + * fanout for each HW ring. */ switch (rx_group->mrg_state) { case MAC_GROUP_STATE_RESERVED: for (ring = rx_group->mrg_rings; ring != NULL; ring = ring->mr_next) { + uint16_t vid = i_mac_flow_vid(mcip->mci_flent); + switch (ring->mr_state) { case MR_INUSE: case MR_FREE: @@ -2413,20 +2418,23 @@ mac_rx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, (void) mac_start_ring(ring); /* - * Since the group is exclusively ours create - * an SRS for this ring to allow the - * individual SRS to dynamically poll the - * ring. Do this only if the client is not - * a VLAN MAC client, since for VLAN we do - * s/w classification for the VID check, and - * if it has a unicast address. + * If a client requires SW VLAN + * filtering or has no unicast address + * then we don't create any HW ring + * SRSes. */ - if ((mcip->mci_state_flags & - MCIS_NO_UNICAST_ADDR) || - i_mac_flow_vid(mcip->mci_flent) != - VLAN_ID_NONE) { + if ((!MAC_GROUP_HW_VLAN(rx_group) && + vid != VLAN_ID_NONE) || no_unicast) break; - } + + /* + * When a client has exclusive use of + * a group, and that group's traffic + * is fully HW classified, we create + * an SRS for each HW ring in order to + * make use of dynamic polling of said + * HW rings. + */ mac_srs = mac_srs_create(mcip, flent, fanout_type | link_type, mac_rx_deliver, mcip, NULL, ring); @@ -2442,14 +2450,9 @@ mac_rx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, break; case MAC_GROUP_STATE_SHARED: /* - * Set all rings of this group to software classified. - * - * If the group is current RESERVED, the existing mac - * client (the only client on this group) is using - * this group exclusively. In that case we need to - * disable polling on the rings of the group (if it - * was enabled), and free the SRS associated with the - * rings. + * When a group is shared by multiple clients, we must + * use SW classifiction to ensure packets are + * delivered to the correct client. */ mac_rx_switch_grp_to_sw(rx_group); break; @@ -2502,10 +2505,11 @@ mac_tx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, } /* - * Remove all the RX SRSs. If we want to remove only the SRSs associated - * with h/w rings, leave the S/W SRS alone. This is used when we want to - * move the MAC client from one group to another, so we need to teardown - * on the h/w SRSs. + * Teardown all the Rx SRSes. Unless hwonly is set, then only teardown + * the Rx HW SRSes and leave the SW SRS alone. The hwonly flag is set + * when we wish to move a MAC client from one group to another. In + * that case, we need to release the current HW SRSes but keep the SW + * SRS for continued traffic classifiction. */ void mac_rx_srs_group_teardown(flow_entry_t *flent, boolean_t hwonly) @@ -2523,8 +2527,16 @@ mac_rx_srs_group_teardown(flow_entry_t *flent, boolean_t hwonly) flent->fe_rx_srs[i] = NULL; flent->fe_rx_srs_cnt--; } - ASSERT(!hwonly || flent->fe_rx_srs_cnt == 1); - ASSERT(hwonly || flent->fe_rx_srs_cnt == 0); + + /* + * If we are only tearing down the HW SRSes then there must be + * one SRS left for SW classification. Otherwise we are tearing + * down both HW and SW and there should be no SRSes left. + */ + if (hwonly) + VERIFY3S(flent->fe_rx_srs_cnt, ==, 1); + else + VERIFY3S(flent->fe_rx_srs_cnt, ==, 0); } /* @@ -2826,6 +2838,7 @@ mac_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip, * even if this is the only client in the default group, we will * leave group as shared). */ + int mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t link_type) @@ -2836,6 +2849,7 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, mac_group_t *default_rgroup; mac_group_t *default_tgroup; int err; + uint16_t vid; uint8_t *mac_addr; mac_group_state_t next_state; mac_client_impl_t *group_only_mcip; @@ -2848,6 +2862,7 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, boolean_t no_unicast; boolean_t isprimary = flent->fe_type & FLOW_PRIMARY_MAC; mac_client_impl_t *reloc_pmcip = NULL; + boolean_t use_hw; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); @@ -2879,15 +2894,19 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, (mrp->mrp_mask & MRP_TXRINGS_UNSPEC)); /* - * By default we have given the primary all the rings - * i.e. the default group. Let's see if the primary - * needs to be relocated so that the addition of this - * client doesn't impact the primary's performance, - * i.e. if the primary is in the default group and - * we add this client, the primary will lose polling. - * We do this only for NICs supporting dynamic ring - * grouping and only when this is the first client - * after the primary (i.e. nactiveclients is 2) + * All the rings initially belong to the default group + * under dynamic grouping. The primary client uses the + * default group when it is the only client. The + * default group is also used as the destination for + * all multicast and broadcast traffic of all clients. + * Therefore, the primary client loses its ability to + * poll the softrings on addition of a second client. + * To avoid a performance penalty, MAC will move the + * primary client to a dedicated group when it can. + * + * When using static grouping, the primary client + * begins life on a non-default group. There is + * no moving needed upon addition of a second client. */ if (!isprimary && mip->mi_nactiveclients == 2 && (group_only_mcip = mac_primary_client_handle(mip)) != @@ -2895,6 +2914,7 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, reloc_pmcip = mac_check_primary_relocation( group_only_mcip, rxhw); } + /* * Check to see if we can get an exclusive group for * this mac address or if there already exists a @@ -2908,6 +2928,26 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, } else if (rgroup == NULL) { rgroup = default_rgroup; } + + /* + * If we are adding a second client to a + * non-default group then we need to move the + * existing client to the default group and + * add the new client to the default group as + * well. + */ + if (rgroup != default_rgroup && + rgroup->mrg_state == MAC_GROUP_STATE_RESERVED) { + group_only_mcip = MAC_GROUP_ONLY_CLIENT(rgroup); + err = mac_rx_switch_group(group_only_mcip, rgroup, + default_rgroup); + + if (err != 0) + goto setup_failed; + + rgroup = default_rgroup; + } + /* * Check to see if we can get an exclusive group for * this mac client. If no groups are available, use @@ -2939,14 +2979,17 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, rgroup->mrg_cur_count); } } + flent->fe_rx_ring_group = rgroup; /* - * Add the client to the group. This could cause - * either this group to move to the shared state or - * cause the default group to move to the shared state. - * The actions on this group are done here, while the - * actions on the default group are postponed to - * the end of this function. + * Add the client to the group and update the + * group's state. If rgroup != default_group + * then the rgroup should only ever have one + * client and be in the RESERVED state. But no + * matter what, the default_rgroup will enter + * the SHARED state since it has to receive + * all broadcast and multicast traffic. This + * case is handled later in the function. */ mac_group_add_client(rgroup, mcip); next_state = mac_group_next_state(rgroup, @@ -2971,28 +3014,37 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, &group_only_mcip, default_tgroup, B_FALSE); tgroup->mrg_state = next_state; } - /* - * Setup the Rx and Tx SRSes. If we got a pristine group - * exclusively above, mac_srs_group_setup would simply create - * the required SRSes. If we ended up sharing a previously - * reserved group, mac_srs_group_setup would also dismantle the - * SRSes of the previously exclusive group - */ - mac_srs_group_setup(mcip, flent, link_type); /* We are setting up minimal datapath only */ - if (no_unicast) + if (no_unicast) { + mac_srs_group_setup(mcip, flent, link_type); break; - /* Program the S/W Classifer */ + } + + /* Program software classification. */ if ((err = mac_flow_add(mip->mi_flow_tab, flent)) != 0) goto setup_failed; - /* Program the H/W Classifier */ - if ((err = mac_add_macaddr(mip, rgroup, mac_addr, - (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0)) != 0) + /* Program hardware classification. */ + vid = i_mac_flow_vid(flent); + use_hw = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0; + err = mac_add_macaddr_vlan(mip, rgroup, mac_addr, vid, use_hw); + + if (err != 0) goto setup_failed; + mcip->mci_unicast = mac_find_macaddr(mip, mac_addr); - ASSERT(mcip->mci_unicast != NULL); + VERIFY3P(mcip->mci_unicast, !=, NULL); + + /* + * Setup the Rx and Tx SRSes. If the client has a + * reserved group, then mac_srs_group_setup() creates + * the required SRSes for the HW rings. If we have a + * shared group, mac_srs_group_setup() dismantles the + * HW SRSes of the previously exclusive group. + */ + mac_srs_group_setup(mcip, flent, link_type); + /* (Re)init the v6 token & local addr used by link protection */ mac_protect_update_mac_token(mcip); break; @@ -3036,17 +3088,23 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, ASSERT(default_rgroup->mrg_state == MAC_GROUP_STATE_SHARED); } + /* - * If we get an exclusive group for a VLAN MAC client we - * need to take the s/w path to make the additional check for - * the vid. Disable polling and set it to s/w classification. - * Similarly for clients that don't have a unicast address. + * A VLAN MAC client on a reserved group still + * requires SW classification if the MAC doesn't + * provide VLAN HW filtering. + * + * Clients with no unicast address also require SW + * classification. */ if (rgroup->mrg_state == MAC_GROUP_STATE_RESERVED && - (i_mac_flow_vid(flent) != VLAN_ID_NONE || no_unicast)) { + ((!MAC_GROUP_HW_VLAN(rgroup) && vid != VLAN_ID_NONE) || + no_unicast)) { mac_rx_switch_grp_to_sw(rgroup); } + } + mac_set_rings_effective(mcip); return (0); @@ -3072,6 +3130,7 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, boolean_t check_default_group = B_FALSE; mac_group_state_t next_state; mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + uint16_t vid; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); @@ -3084,16 +3143,24 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, case SRST_LINK: /* Stop sending packets */ mac_tx_client_block(mcip); + group = flent->fe_rx_ring_group; + vid = i_mac_flow_vid(flent); - /* Stop the packets coming from the H/W */ + /* + * Stop the packet flow from the hardware by disabling + * any hardware filters assigned to this client. + */ if (mcip->mci_unicast != NULL) { int err; - err = mac_remove_macaddr(mcip->mci_unicast); + + err = mac_remove_macaddr_vlan(mcip->mci_unicast, vid); + if (err != 0) { - cmn_err(CE_WARN, "%s: failed to remove a MAC" - " address because of error 0x%x", + cmn_err(CE_WARN, "%s: failed to remove a MAC HW" + " filters because of error 0x%x", mip->mi_name, err); } + mcip->mci_unicast = NULL; } @@ -3114,17 +3181,17 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, * left who can use it exclusively. Also, if we * were the last client, release the group. */ - group = flent->fe_rx_ring_group; default_group = MAC_DEFAULT_RX_GROUP(mip); if (group != NULL) { mac_group_remove_client(group, mcip); next_state = mac_group_next_state(group, &grp_only_mcip, default_group, B_TRUE); + if (next_state == MAC_GROUP_STATE_RESERVED) { /* * Only one client left on this RX group. */ - ASSERT(grp_only_mcip != NULL); + VERIFY3P(grp_only_mcip, !=, NULL); mac_set_group_state(group, MAC_GROUP_STATE_RESERVED); group_only_flent = grp_only_mcip->mci_flent; @@ -3149,7 +3216,7 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, * to see if the primary client can get * exclusive access to the default group. */ - ASSERT(group != MAC_DEFAULT_RX_GROUP(mip)); + VERIFY3P(group, !=, MAC_DEFAULT_RX_GROUP(mip)); if (mrp->mrp_mask & MRP_RX_RINGS) { MAC_RX_GRP_RELEASED(mip); if (mip->mi_rx_group_type == @@ -3163,7 +3230,8 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, MAC_GROUP_STATE_REGISTERED); check_default_group = B_TRUE; } else { - ASSERT(next_state == MAC_GROUP_STATE_SHARED); + VERIFY3S(next_state, ==, + MAC_GROUP_STATE_SHARED); mac_set_group_state(group, MAC_GROUP_STATE_SHARED); mac_rx_group_unmark(group, MR_CONDEMNED); @@ -3252,12 +3320,12 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, */ if (check_default_group) { default_group = MAC_DEFAULT_RX_GROUP(mip); - ASSERT(default_group->mrg_state == MAC_GROUP_STATE_SHARED); + VERIFY3S(default_group->mrg_state, ==, MAC_GROUP_STATE_SHARED); next_state = mac_group_next_state(default_group, &grp_only_mcip, default_group, B_TRUE); if (next_state == MAC_GROUP_STATE_RESERVED) { - ASSERT(grp_only_mcip != NULL && - mip->mi_nactiveclients == 1); + VERIFY3P(grp_only_mcip, !=, NULL); + VERIFY3U(mip->mi_nactiveclients, ==, 1); mac_set_group_state(default_group, MAC_GROUP_STATE_RESERVED); mac_rx_srs_group_setup(grp_only_mcip, @@ -3781,7 +3849,7 @@ mac_tx_srs_del_ring(mac_soft_ring_set_t *mac_srs, mac_ring_t *tx_ring) * is also stored in st_soft_rings[] array. That entry should * be removed. */ - if (mcip->mci_state_flags & MCIS_IS_AGGR) { + if (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) { mac_srs_tx_t *tx = &mac_srs->srs_tx; ASSERT(tx->st_soft_rings[tx_ring->mr_index] == remove_sring); @@ -3810,7 +3878,7 @@ mac_tx_srs_setup(mac_client_impl_t *mcip, flow_entry_t *flent) boolean_t is_aggr; uint_t ring_info = 0; - is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR) != 0; + is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) != 0; grp = flent->fe_tx_ring_group; if (grp == NULL) { ring = (mac_ring_t *)mip->mi_default_tx_ring; diff --git a/usr/src/uts/common/io/mac/mac_provider.c b/usr/src/uts/common/io/mac/mac_provider.c index 07201afdec..26f501668e 100644 --- a/usr/src/uts/common/io/mac/mac_provider.c +++ b/usr/src/uts/common/io/mac/mac_provider.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2018 Joyent, Inc. * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved. */ @@ -56,6 +57,7 @@ #include <sys/sdt.h> #include <sys/pattr.h> #include <sys/strsun.h> +#include <sys/vlan.h> /* * MAC Provider Interface. @@ -695,7 +697,7 @@ mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain) { mac_impl_t *mip = (mac_impl_t *)mh; mac_ring_t *mr = (mac_ring_t *)mrh; - mac_soft_ring_set_t *mac_srs; + mac_soft_ring_set_t *mac_srs; mblk_t *bp = mp_chain; boolean_t hw_classified = B_FALSE; diff --git a/usr/src/uts/common/io/mac/mac_sched.c b/usr/src/uts/common/io/mac/mac_sched.c index d046930873..cbd5ce1e19 100644 --- a/usr/src/uts/common/io/mac/mac_sched.c +++ b/usr/src/uts/common/io/mac/mac_sched.c @@ -21,7 +21,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2017 Joyent, Inc. + * Copyright 2018 Joyent, Inc. * Copyright 2013 Nexenta Systems, Inc. All rights reserved. */ @@ -300,9 +300,8 @@ * * Otherwise, all fanout is performed by software. MAC divides incoming frames * into one of three buckets -- IPv4 TCP traffic, IPv4 UDP traffic, and - * everything else. Note, VLAN tagged traffic is considered other, regardless of - * the interior EtherType. Regardless of the type of fanout, these three - * categories or buckets are always used. + * everything else. Regardless of the type of fanout, these three categories + * or buckets are always used. * * The difference between protocol level fanout and full software ring protocol * fanout is the number of software rings that end up getting created. The @@ -1475,16 +1474,15 @@ enum pkt_type { #define PORTS_SIZE 4 /* - * mac_rx_srs_proto_fanout - * - * This routine delivers packets destined to an SRS into one of the + * This routine delivers packets destined for an SRS into one of the * protocol soft rings. * - * Given a chain of packets we need to split it up into multiple sub chains - * destined into TCP, UDP or OTH soft ring. Instead of entering - * the soft ring one packet at a time, we want to enter it in the form of a - * chain otherwise we get this start/stop behaviour where the worker thread - * goes to sleep and then next packets comes in forcing it to wake up etc. + * Given a chain of packets we need to split it up into multiple sub + * chains: TCP, UDP or OTH soft ring. Instead of entering the soft + * ring one packet at a time, we want to enter it in the form of a + * chain otherwise we get this start/stop behaviour where the worker + * thread goes to sleep and then next packet comes in forcing it to + * wake up. */ static void mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head) @@ -1523,9 +1521,9 @@ mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head) mac_srs->srs_ring->mr_classify_type == MAC_HW_CLASSIFIER; /* - * Special clients (eg. VLAN, non ether, etc) need DLS - * processing in the Rx path. SRST_DLS_BYPASS will be clear for - * such SRSs. Another way of disabling bypass is to set the + * Some clients, such as non-ethernet, need DLS processing in + * the Rx path. Such clients clear the SRST_DLS_BYPASS flag. + * DLS bypass may also be disabled via the * MCIS_RX_BYPASS_DISABLE flag. */ dls_bypass = ((mac_srs->srs_type & SRST_DLS_BYPASS) != 0) && @@ -1537,10 +1535,11 @@ mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head) bzero(sz, MAX_SR_TYPES * sizeof (size_t)); /* - * We got a chain from SRS that we need to send to the soft rings. - * Since squeues for TCP & IPv4 sap poll their soft rings (for - * performance reasons), we need to separate out v4_tcp, v4_udp - * and the rest goes in other. + * We have a chain from SRS that we need to split across the + * soft rings. The squeues for the TCP and IPv4 SAPs use their + * own soft rings to allow polling from the squeue. The rest of + * the packets are delivered on the OTH soft ring which cannot + * be polled. */ while (head != NULL) { mp = head; @@ -1568,9 +1567,14 @@ mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head) evhp = (struct ether_vlan_header *)mp->b_rptr; sap = ntohs(evhp->ether_type); hdrsize = sizeof (struct ether_vlan_header); + /* - * Check if the VID of the packet, if any, - * belongs to this client. + * Check if the VID of the packet, if + * any, belongs to this client. + * Technically, if this packet came up + * via a HW classified ring then we + * don't need to perform this check. + * Perhaps a future optimization. */ if (!mac_client_check_flow_vid(mcip, VLAN_ID(ntohs(evhp->ether_tci)))) { @@ -1635,7 +1639,6 @@ mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head) * performance and may bypass DLS. All other cases go through * the 'OTH' type path without DLS bypass. */ - ipha = (ipha_t *)(mp->b_rptr + hdrsize); if ((type != OTH) && MBLK_RX_FANOUT_SLOWPATH(mp, ipha)) type = OTH; @@ -1647,11 +1650,13 @@ mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head) } ASSERT(type == UNDEF); + /* - * We look for at least 4 bytes past the IP header to get - * the port information. If we get an IP fragment, we don't - * have the port information, and we use just the protocol - * information. + * Determine the type from the IP protocol value. If + * classified as TCP or UDP, then update the read + * pointer to the beginning of the IP header. + * Otherwise leave the message as is for further + * processing by DLS. */ switch (ipha->ipha_protocol) { case IPPROTO_TCP: @@ -1695,11 +1700,10 @@ mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head) int fanout_unaligned = 0; /* - * mac_rx_srs_long_fanout - * - * The fanout routine for VLANs, and for anything else that isn't performing - * explicit dls bypass. Returns -1 on an error (drop the packet due to a - * malformed packet), 0 on success, with values written in *indx and *type. + * The fanout routine for any clients with DLS bypass disabled or for + * traffic classified as "other". Returns -1 on an error (drop the + * packet due to a malformed packet), 0 on success, with values + * written in *indx and *type. */ static int mac_rx_srs_long_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *mp, @@ -1865,16 +1869,15 @@ src_dst_based_fanout: } /* - * mac_rx_srs_fanout - * - * This routine delivers packets destined to an SRS into a soft ring member + * This routine delivers packets destined for an SRS into a soft ring member * of the set. * - * Given a chain of packets we need to split it up into multiple sub chains - * destined for one of the TCP, UDP or OTH soft rings. Instead of entering - * the soft ring one packet at a time, we want to enter it in the form of a - * chain otherwise we get this start/stop behaviour where the worker thread - * goes to sleep and then next packets comes in forcing it to wake up etc. + * Given a chain of packets we need to split it up into multiple sub + * chains: TCP, UDP or OTH soft ring. Instead of entering the soft + * ring one packet at a time, we want to enter it in the form of a + * chain otherwise we get this start/stop behaviour where the worker + * thread goes to sleep and then next packet comes in forcing it to + * wake up. * * Note: * Since we know what is the maximum fanout possible, we create a 2D array @@ -1935,10 +1938,11 @@ mac_rx_srs_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head) mac_srs->srs_ring->mr_classify_type == MAC_HW_CLASSIFIER; /* - * Special clients (eg. VLAN, non ether, etc) need DLS - * processing in the Rx path. SRST_DLS_BYPASS will be clear for - * such SRSs. Another way of disabling bypass is to set the - * MCIS_RX_BYPASS_DISABLE flag. + * Some clients, such as non Ethernet, need DLS processing in + * the Rx path. Such clients clear the SRST_DLS_BYPASS flag. + * DLS bypass may also be disabled via the + * MCIS_RX_BYPASS_DISABLE flag, but this is only consumed by + * sun4v vsw currently. */ dls_bypass = ((mac_srs->srs_type & SRST_DLS_BYPASS) != 0) && ((mcip->mci_state_flags & MCIS_RX_BYPASS_DISABLE) == 0); @@ -1960,7 +1964,7 @@ mac_rx_srs_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head) /* * We got a chain from SRS that we need to send to the soft rings. - * Since squeues for TCP & IPv4 sap poll their soft rings (for + * Since squeues for TCP & IPv4 SAP poll their soft rings (for * performance reasons), we need to separate out v4_tcp, v4_udp * and the rest goes in other. */ @@ -1990,9 +1994,14 @@ mac_rx_srs_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head) evhp = (struct ether_vlan_header *)mp->b_rptr; sap = ntohs(evhp->ether_type); hdrsize = sizeof (struct ether_vlan_header); + /* - * Check if the VID of the packet, if any, - * belongs to this client. + * Check if the VID of the packet, if + * any, belongs to this client. + * Technically, if this packet came up + * via a HW classified ring then we + * don't need to perform this check. + * Perhaps a future optimization. */ if (!mac_client_check_flow_vid(mcip, VLAN_ID(ntohs(evhp->ether_tci)))) { @@ -2032,7 +2041,6 @@ mac_rx_srs_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head) continue; } - /* * If we are using the default Rx ring where H/W or S/W * classification has not happened, we need to verify if @@ -2621,7 +2629,6 @@ again: mac_srs->srs_state |= (SRS_PROC|proc_type); - /* * mcip is NULL for broadcast and multicast flows. The promisc * callbacks for broadcast and multicast packets are delivered from @@ -2641,10 +2648,8 @@ again: } /* - * Check if SRS itself is doing the processing - * This direct path does not apply when subflows are present. In this - * case, packets need to be dispatched to a soft ring according to the - * flow's bandwidth and other resources contraints. + * Check if SRS itself is doing the processing. This direct + * path applies only when subflows are present. */ if (mac_srs->srs_type & SRST_NO_SOFT_RINGS) { mac_direct_rx_t proc; @@ -4656,6 +4661,9 @@ mac_rx_deliver(void *arg1, mac_resource_handle_t mrh, mblk_t *mp_chain, * the packet to the promiscuous listeners of the * client, since they expect to see the whole * frame including the VLAN headers. + * + * The MCIS_STRIP_DISABLE is only issued when sun4v + * vsw is in play. */ mp_chain = mac_strip_vlan_tag_chain(mp_chain); } @@ -4664,13 +4672,11 @@ mac_rx_deliver(void *arg1, mac_resource_handle_t mrh, mblk_t *mp_chain, } /* - * mac_rx_soft_ring_process - * - * process a chain for a given soft ring. The number of packets queued - * in the SRS and its associated soft rings (including this one) is - * very small (tracked by srs_poll_pkt_cnt), then allow the entering - * thread (interrupt or poll thread) to do inline processing. This - * helps keep the latency down under low load. + * Process a chain for a given soft ring. If the number of packets + * queued in the SRS and its associated soft rings (including this + * one) is very small (tracked by srs_poll_pkt_cnt) then allow the + * entering thread (interrupt or poll thread) to process the chain + * inline. This is meant to reduce latency under low load. * * The proc and arg for each mblk is already stored in the mblk in * appropriate places. @@ -4729,13 +4735,13 @@ mac_rx_soft_ring_process(mac_client_impl_t *mcip, mac_soft_ring_t *ringp, ASSERT(MUTEX_NOT_HELD(&ringp->s_ring_lock)); /* - * If we have a soft ring set which is doing - * bandwidth control, we need to decrement - * srs_size and count so it the SRS can have a - * accurate idea of what is the real data - * queued between SRS and its soft rings. We - * decrement the counters only when the packet - * gets processed by both SRS and the soft ring. + * If we have an SRS performing bandwidth + * control then we need to decrement the size + * and count so the SRS has an accurate count + * of the data queued between the SRS and its + * soft rings. We decrement the counters only + * when the packet is processed by both the + * SRS and the soft ring. */ mutex_enter(&mac_srs->srs_lock); MAC_UPDATE_SRS_COUNT_LOCKED(mac_srs, cnt); @@ -4751,8 +4757,8 @@ mac_rx_soft_ring_process(mac_client_impl_t *mcip, mac_soft_ring_t *ringp, if ((ringp->s_ring_first == NULL) || (ringp->s_ring_state & S_RING_BLANK)) { /* - * We processed inline our packet and - * nothing new has arrived or our + * We processed a single packet inline + * and nothing new has arrived or our * receiver doesn't want to receive * any packets. We are done. */ diff --git a/usr/src/uts/common/io/mac/mac_soft_ring.c b/usr/src/uts/common/io/mac/mac_soft_ring.c index d24c0207df..f4d2a5ee81 100644 --- a/usr/src/uts/common/io/mac/mac_soft_ring.c +++ b/usr/src/uts/common/io/mac/mac_soft_ring.c @@ -21,7 +21,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2017 Joyent, Inc. + * Copyright 2018 Joyent, Inc. */ /* @@ -207,7 +207,7 @@ mac_soft_ring_create(int id, clock_t wait, uint16_t type, ringp->s_ring_tx_hiwat = (mac_tx_soft_ring_hiwat > mac_tx_soft_ring_max_q_cnt) ? mac_tx_soft_ring_max_q_cnt : mac_tx_soft_ring_hiwat; - if (mcip->mci_state_flags & MCIS_IS_AGGR) { + if (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) { mac_srs_tx_t *tx = &mac_srs->srs_tx; ASSERT(tx->st_soft_rings[ @@ -339,15 +339,14 @@ mac_soft_ring_fire(void *arg) } /* - * mac_rx_soft_ring_drain + * Drain the soft ring pointed to by ringp. * - * Called when worker thread model (ST_RING_WORKER_ONLY) of processing - * incoming packets is used. s_ring_first contain the queued packets. - * s_ring_rx_func contains the upper level (client) routine where the - * packets are destined and s_ring_rx_arg1/s_ring_rx_arg2 are the - * cookie meant for the client. + * o s_ring_first: pointer to the queued packet chain. + * + * o s_ring_rx_func: pointer to to the client's Rx routine. + * + * o s_ring_rx_{arg1,arg2}: opaque values specific to the client. */ -/* ARGSUSED */ static void mac_rx_soft_ring_drain(mac_soft_ring_t *ringp) { @@ -392,13 +391,12 @@ mac_rx_soft_ring_drain(mac_soft_ring_t *ringp) (*proc)(arg1, arg2, mp, NULL); /* - * If we have a soft ring set which is doing - * bandwidth control, we need to decrement its - * srs_size so it can have a accurate idea of - * what is the real data queued between SRS and - * its soft rings. We decrement the size for a - * packet only when it gets processed by both - * SRS and the soft ring. + * If we have an SRS performing bandwidth control, then + * we need to decrement the size and count so the SRS + * has an accurate measure of the data queued between + * the SRS and its soft rings. We decrement the + * counters only when the packet is processed by both + * the SRS and the soft ring. */ mutex_enter(&mac_srs->srs_lock); MAC_UPDATE_SRS_COUNT_LOCKED(mac_srs, cnt); @@ -414,12 +412,10 @@ mac_rx_soft_ring_drain(mac_soft_ring_t *ringp) } /* - * mac_soft_ring_worker - * * The soft ring worker routine to process any queued packets. In - * normal case, the worker thread is bound to a CPU. It the soft - * ring is dealing with TCP packets, then the worker thread will - * be bound to the same CPU as the TCP squeue. + * normal case, the worker thread is bound to a CPU. If the soft ring + * handles TCP packets then the worker thread is bound to the same CPU + * as the TCP squeue. */ static void mac_soft_ring_worker(mac_soft_ring_t *ringp) @@ -605,7 +601,7 @@ mac_soft_ring_dls_bypass(void *arg, mac_direct_rx_t rx_func, void *rx_arg1) mac_soft_ring_t *softring = arg; mac_soft_ring_set_t *srs; - ASSERT(rx_func != NULL); + VERIFY3P(rx_func, !=, NULL); mutex_enter(&softring->s_ring_lock); softring->s_ring_rx_func = rx_func; diff --git a/usr/src/uts/common/io/vnic/vnic_dev.c b/usr/src/uts/common/io/vnic/vnic_dev.c index 3cb7e7660a..da52d7bb37 100644 --- a/usr/src/uts/common/io/vnic/vnic_dev.c +++ b/usr/src/uts/common/io/vnic/vnic_dev.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2015 Joyent, Inc. + * Copyright 2018 Joyent, Inc. * Copyright 2016 OmniTI Computer Consulting, Inc. All rights reserved. */ @@ -354,7 +354,7 @@ vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, rw_enter(&vnic_lock, RW_WRITER); - /* does a VNIC with the same id already exist? */ + /* Does a VNIC with the same id already exist? */ err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), (mod_hash_val_t *)&vnic); if (err == 0) { @@ -1037,7 +1037,7 @@ static int vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, uint_t pr_valsize, const void *pr_val) { - int err = 0; + int err = 0; vnic_t *vn = m_driver; switch (pr_num) { @@ -1135,7 +1135,7 @@ vnic_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, uint_t pr_valsize, void *pr_val) { vnic_t *vn = arg; - int ret = 0; + int ret = 0; boolean_t out; switch (pr_num) { diff --git a/usr/src/uts/common/mapfiles/ddi.mapfile b/usr/src/uts/common/mapfiles/ddi.mapfile index 75e95a9452..9b6a9ab677 100644 --- a/usr/src/uts/common/mapfiles/ddi.mapfile +++ b/usr/src/uts/common/mapfiles/ddi.mapfile @@ -165,6 +165,7 @@ SYMBOL_SCOPE { list_insert_tail { FLAGS = EXTERN }; list_next { FLAGS = EXTERN }; list_remove { FLAGS = EXTERN }; + list_remove_head { FLAGS = EXTERN }; memcpy { FLAGS = EXTERN }; memset { FLAGS = EXTERN }; miocack { FLAGS = EXTERN }; diff --git a/usr/src/uts/common/sys/aggr_impl.h b/usr/src/uts/common/sys/aggr_impl.h index 547c9cc241..415e176ef3 100644 --- a/usr/src/uts/common/sys/aggr_impl.h +++ b/usr/src/uts/common/sys/aggr_impl.h @@ -21,6 +21,8 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2012 OmniTI Computer Consulting, Inc All rights reserved. + * Copyright 2018 Joyent, Inc. */ #ifndef _SYS_AGGR_IMPL_H @@ -54,6 +56,15 @@ extern "C" { */ #define MAC_PSEUDO_RING_INUSE 0x01 +/* + * VLAN filters placed on the Rx pseudo group. + */ +typedef struct aggr_vlan { + list_node_t av_link; + uint16_t av_vid; /* VLAN ID */ + uint_t av_refs; /* num aggr clients using this VID */ +} aggr_vlan_t; + typedef struct aggr_unicst_addr_s { uint8_t aua_addr[ETHERADDRL]; struct aggr_unicst_addr_s *aua_next; @@ -73,6 +84,8 @@ typedef struct aggr_pseudo_rx_group_s { aggr_unicst_addr_t *arg_macaddr; aggr_pseudo_rx_ring_t arg_rings[MAX_RINGS_PER_GROUP]; uint_t arg_ring_cnt; + uint_t arg_untagged; /* num clients untagged */ + list_t arg_vlans; /* VLANs on this group */ } aggr_pseudo_rx_group_t; typedef struct aggr_pseudo_tx_ring_s { @@ -186,11 +199,18 @@ typedef struct aggr_grp_s { uint_t lg_tx_ports_size; /* size of lg_tx_ports */ uint32_t lg_tx_policy; /* outbound policy */ uint8_t lg_mac_tx_policy; - uint64_t lg_ifspeed; link_state_t lg_link_state; + + + /* + * The lg_stat_lock must be held when accessing these fields. + */ + kmutex_t lg_stat_lock; + uint64_t lg_ifspeed; link_duplex_t lg_link_duplex; uint64_t lg_stat[MAC_NSTAT]; uint64_t lg_ether_stat[ETHER_NSTAT]; + aggr_lacp_mode_t lg_lacp_mode; /* off, active, or passive */ Agg_t aggr; /* 802.3ad data */ uint32_t lg_hcksum_txflags; @@ -308,6 +328,8 @@ extern boolean_t aggr_port_notify_link(aggr_grp_t *, aggr_port_t *); extern void aggr_port_init_callbacks(aggr_port_t *); extern void aggr_recv_cb(void *, mac_resource_handle_t, mblk_t *, boolean_t); +extern void aggr_recv_promisc_cb(void *, mac_resource_handle_t, mblk_t *, + boolean_t); extern void aggr_tx_ring_update(void *, uintptr_t); extern void aggr_tx_notify_thread(void *); @@ -338,6 +360,9 @@ extern void aggr_grp_port_wait(aggr_grp_t *); extern int aggr_port_addmac(aggr_port_t *, const uint8_t *); extern void aggr_port_remmac(aggr_port_t *, const uint8_t *); +extern int aggr_port_addvlan(aggr_port_t *, uint16_t); +extern int aggr_port_remvlan(aggr_port_t *, uint16_t); + extern mblk_t *aggr_ring_tx(void *, mblk_t *); extern mblk_t *aggr_find_tx_ring(void *, mblk_t *, uintptr_t, mac_ring_handle_t *); diff --git a/usr/src/uts/common/sys/mac_client.h b/usr/src/uts/common/sys/mac_client.h index 0fc4939503..74f4cbb310 100644 --- a/usr/src/uts/common/sys/mac_client.h +++ b/usr/src/uts/common/sys/mac_client.h @@ -22,7 +22,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2013 Joyent, Inc. All rights reserved. + * Copyright 2018 Joyent, Inc. All rights reserved. */ /* @@ -88,6 +88,7 @@ typedef enum { } mac_client_promisc_type_t; /* flags passed to mac_unicast_add() */ + #define MAC_UNICAST_NODUPCHECK 0x0001 #define MAC_UNICAST_PRIMARY 0x0002 #define MAC_UNICAST_HW 0x0004 diff --git a/usr/src/uts/common/sys/mac_client_impl.h b/usr/src/uts/common/sys/mac_client_impl.h index 9b3b4fe369..d5c66684d0 100644 --- a/usr/src/uts/common/sys/mac_client_impl.h +++ b/usr/src/uts/common/sys/mac_client_impl.h @@ -24,7 +24,7 @@ * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ /* - * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright 2018 Joyent, Inc. */ #ifndef _SYS_MAC_CLIENT_IMPL_H @@ -57,7 +57,7 @@ typedef struct mac_unicast_impl_s { /* Protected by */ uint16_t mui_vid; /* SL */ } mac_unicast_impl_t; -#define MAC_CLIENT_FLAGS_PRIMARY 0X0001 +#define MAC_CLIENT_FLAGS_PRIMARY 0x0001 #define MAC_CLIENT_FLAGS_VNIC_PRIMARY 0x0002 #define MAC_CLIENT_FLAGS_MULTI_PRIMARY 0x0004 #define MAC_CLIENT_FLAGS_PASSIVE_PRIMARY 0x0008 @@ -131,12 +131,17 @@ struct mac_client_impl_s { /* Protected by */ uint32_t mci_flags; /* SL */ krwlock_t mci_rw_lock; mac_unicast_impl_t *mci_unicast_list; /* mci_rw_lock */ + /* * The mac_client_impl_t may be shared by multiple clients, i.e * multiple VLANs sharing the same MAC client. In this case the - * address/vid tubles differ and are each associated with their + * address/vid tuples differ and are each associated with their * own flow entry, but the rest underlying components SRS, etc, * are common. + * + * This is only needed to support sun4v vsw. There are several + * places in MAC we could simplify the code if we removed + * sun4v support. */ flow_entry_t *mci_flent_list; /* mci_rw_lock */ uint_t mci_nflents; /* mci_rw_lock */ @@ -224,7 +229,7 @@ extern int mac_tx_percpu_cnt; &(mcip)->mci_flent->fe_resource_props) #define MCIP_EFFECTIVE_PROPS(mcip) \ - (mcip->mci_flent == NULL ? NULL : \ + (mcip->mci_flent == NULL ? NULL : \ &(mcip)->mci_flent->fe_effective_props) #define MCIP_RESOURCE_PROPS_MASK(mcip) \ @@ -313,6 +318,74 @@ extern int mac_tx_percpu_cnt; (((mcip)->mci_state_flags & MCIS_TAG_DISABLE) == 0 && \ (mcip)->mci_nvids == 1) \ +/* + * MAC Client Implementation State (mci_state_flags) + * + * MCIS_IS_VNIC + * + * The client is a VNIC. + * + * MCIS_EXCLUSIVE + * + * The client has exclusive control over the MAC, such that it is + * the sole client of the MAC. + * + * MCIS_TAG_DISABLE + * + * MAC will not add VLAN tags to outgoing traffic. If this flag + * is set it is up to the client to add the correct VLAN tag. + * + * MCIS_STRIP_DISABLE + * + * MAC will not strip the VLAN tags on incoming traffic before + * passing it to mci_rx_fn. This only applies to non-bypass + * traffic. + * + * MCIS_IS_AGGR_PORT + * + * The client represents a port on an aggr. + * + * MCIS_CLIENT_POLL_CAPABLE + * + * The client is capable of polling the Rx TCP/UDP softrings. + * + * MCIS_DESC_LOGGED + * + * This flag is set when the client's link info has been logged + * by the mac_log_linkinfo() timer. This ensures that the + * client's link info is only logged once. + * + * MCIS_SHARE_BOUND + * + * This client has an HIO share bound to it. + * + * MCIS_DISABLE_TX_VID_CHECK + * + * MAC will not check the VID of the client's Tx traffic. + * + * MCIS_USE_DATALINK_NAME + * + * The client is using the same name as its underlying MAC. This + * happens when dlmgmtd is unreachable during client creation. + * + * MCIS_UNICAST_HW + * + * The client requires MAC address hardware classification. This + * is only used by sun4v vsw. + * + * MCIS_IS_AGGR_CLIENT + * + * The client sits atop an aggr. + * + * MCIS_RX_BYPASS_DISABLE + * + * Do not allow the client to enable DLS bypass. + * + * MCIS_NO_UNICAST_ADDR + * + * This client has no MAC unicast addresss associated with it. + * + */ /* MCI state flags */ #define MCIS_IS_VNIC 0x0001 #define MCIS_EXCLUSIVE 0x0002 @@ -325,7 +398,7 @@ extern int mac_tx_percpu_cnt; #define MCIS_DISABLE_TX_VID_CHECK 0x0100 #define MCIS_USE_DATALINK_NAME 0x0200 #define MCIS_UNICAST_HW 0x0400 -#define MCIS_IS_AGGR 0x0800 +#define MCIS_IS_AGGR_CLIENT 0x0800 #define MCIS_RX_BYPASS_DISABLE 0x1000 #define MCIS_NO_UNICAST_ADDR 0x2000 diff --git a/usr/src/uts/common/sys/mac_client_priv.h b/usr/src/uts/common/sys/mac_client_priv.h index 6b409513a6..77475b339e 100644 --- a/usr/src/uts/common/sys/mac_client_priv.h +++ b/usr/src/uts/common/sys/mac_client_priv.h @@ -22,7 +22,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2013 Joyent, Inc. All rights reserved. + * Copyright 2018 Joyent, Inc. */ /* @@ -144,6 +144,10 @@ extern void mac_hwring_set_default(mac_handle_t, mac_ring_handle_t); extern int mac_hwgroup_addmac(mac_group_handle_t, const uint8_t *); extern int mac_hwgroup_remmac(mac_group_handle_t, const uint8_t *); +extern int mac_hwgroup_addvlan(mac_group_handle_t, uint16_t); +extern int mac_hwgroup_remvlan(mac_group_handle_t, uint16_t); + +extern boolean_t mac_has_hw_vlan(mac_handle_t); extern void mac_set_upper_mac(mac_client_handle_t, mac_handle_t, mac_resource_props_t *); diff --git a/usr/src/uts/common/sys/mac_impl.h b/usr/src/uts/common/sys/mac_impl.h index 774c4fad9a..eebbde37de 100644 --- a/usr/src/uts/common/sys/mac_impl.h +++ b/usr/src/uts/common/sys/mac_impl.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2017, Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. */ #ifndef _SYS_MAC_IMPL_H @@ -244,7 +244,7 @@ struct mac_ring_s { (mr)->mr_refcnt++; \ } -#define MR_REFRELE(mr) { \ +#define MR_REFRELE(mr) { \ mutex_enter(&(mr)->mr_lock); \ ASSERT((mr)->mr_refcnt != 0); \ (mr)->mr_refcnt--; \ @@ -255,8 +255,8 @@ struct mac_ring_s { } /* - * Per mac client flow information associated with a RX group. - * The entire structure is SL protected. + * Used to attach MAC clients to an Rx group. The members are SL + * protected. */ typedef struct mac_grp_client { struct mac_grp_client *mgc_next; @@ -270,15 +270,20 @@ typedef struct mac_grp_client { ((g)->mrg_clients->mgc_next == NULL)) ? \ (g)->mrg_clients->mgc_client : NULL) +#define MAC_GROUP_HW_VLAN(g) \ + (((g) != NULL) && \ + ((g)->mrg_info.mgi_addvlan != NULL) && \ + ((g)->mrg_info.mgi_remvlan != NULL)) + /* * Common ring group data structure for ring control and management. - * The entire structure is SL protected + * The entire structure is SL protected. */ struct mac_group_s { int mrg_index; /* index in the list */ mac_ring_type_t mrg_type; /* ring type */ mac_group_state_t mrg_state; /* state of the group */ - mac_group_t *mrg_next; /* next ring in the chain */ + mac_group_t *mrg_next; /* next group in the chain */ mac_handle_t mrg_mh; /* reference to MAC */ mac_ring_t *mrg_rings; /* grouped rings */ uint_t mrg_cur_count; /* actual size of group */ @@ -300,7 +305,7 @@ struct mac_group_s { mac_ring_handle_t mrh = rh; \ mac_impl_t *mimpl = (mac_impl_t *)mhp; \ /* \ - * Send packets through a selected tx ring, or through the \ + * Send packets through a selected tx ring, or through the \ * default handler if there is no selected ring. \ */ \ if (mrh == NULL) \ @@ -322,9 +327,9 @@ struct mac_group_s { #define MAC_TX(mip, rh, mp, src_mcip) { \ mac_ring_handle_t rhandle = (rh); \ /* \ - * If there is a bound Hybrid I/O share, send packets through \ + * If there is a bound Hybrid I/O share, send packets through \ * the default tx ring. (When there's a bound Hybrid I/O share, \ - * the tx rings of this client are mapped in the guest domain \ + * the tx rings of this client are mapped in the guest domain \ * and not accessible from here.) \ */ \ _NOTE(CONSTANTCONDITION) \ @@ -333,7 +338,7 @@ struct mac_group_s { if (mip->mi_promisc_list != NULL) \ mac_promisc_dispatch(mip, mp, src_mcip); \ /* \ - * Grab the proper transmit pointer and handle. Special \ + * Grab the proper transmit pointer and handle. Special \ * optimization: we can test mi_bridge_link itself atomically, \ * and if that indicates no bridge send packets through tx ring.\ */ \ @@ -360,17 +365,23 @@ typedef struct mac_mcast_addrs_s { } mac_mcast_addrs_t; typedef enum { - MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED = 1, /* hardware steering */ + MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED = 1, /* HW classification */ MAC_ADDRESS_TYPE_UNICAST_PROMISC /* promiscuous mode */ } mac_address_type_t; +typedef struct mac_vlan_s { + struct mac_vlan_s *mv_next; + uint16_t mv_vid; +} mac_vlan_t; + typedef struct mac_address_s { mac_address_type_t ma_type; /* address type */ - int ma_nusers; /* number of users */ - /* of that address */ + int ma_nusers; /* num users of addr */ struct mac_address_s *ma_next; /* next address */ uint8_t ma_addr[MAXMACADDRLEN]; /* address value */ size_t ma_len; /* address length */ + mac_vlan_t *ma_vlans; /* VLANs on this addr */ + boolean_t ma_untagged; /* accept untagged? */ mac_group_t *ma_group; /* asscociated group */ mac_impl_t *ma_mip; /* MAC handle */ } mac_address_t; @@ -487,7 +498,7 @@ struct mac_impl_s { mac_capab_led_t mi_led; /* - * MAC address list. SL protected. + * MAC address and VLAN lists. SL protected. */ mac_address_t *mi_addresses; @@ -759,6 +770,8 @@ extern void mac_client_bcast_refresh(mac_client_impl_t *, mac_multicst_t, */ extern int mac_group_addmac(mac_group_t *, const uint8_t *); extern int mac_group_remmac(mac_group_t *, const uint8_t *); +extern int mac_group_addvlan(mac_group_t *, uint16_t); +extern int mac_group_remvlan(mac_group_t *, uint16_t); extern int mac_rx_group_add_flow(mac_client_impl_t *, flow_entry_t *, mac_group_t *); extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *); @@ -779,6 +792,7 @@ extern void mac_rx_switch_grp_to_sw(mac_group_t *); * MAC address functions are used internally by MAC layer. */ extern mac_address_t *mac_find_macaddr(mac_impl_t *, uint8_t *); +extern mac_address_t *mac_find_macaddr_vlan(mac_impl_t *, uint8_t *, uint16_t); extern boolean_t mac_check_macaddr_shared(mac_address_t *); extern int mac_update_macaddr(mac_address_t *, uint8_t *); extern void mac_freshen_macaddr(mac_address_t *, uint8_t *); @@ -863,8 +877,9 @@ extern int mac_start_group(mac_group_t *); extern void mac_stop_group(mac_group_t *); extern int mac_start_ring(mac_ring_t *); extern void mac_stop_ring(mac_ring_t *); -extern int mac_add_macaddr(mac_impl_t *, mac_group_t *, uint8_t *, boolean_t); -extern int mac_remove_macaddr(mac_address_t *); +extern int mac_add_macaddr_vlan(mac_impl_t *, mac_group_t *, uint8_t *, + uint16_t, boolean_t); +extern int mac_remove_macaddr_vlan(mac_address_t *, uint16_t); extern void mac_set_group_state(mac_group_t *, mac_group_state_t); extern void mac_group_add_client(mac_group_t *, mac_client_impl_t *); diff --git a/usr/src/uts/common/sys/mac_provider.h b/usr/src/uts/common/sys/mac_provider.h index 4c91c03967..301bc9a058 100644 --- a/usr/src/uts/common/sys/mac_provider.h +++ b/usr/src/uts/common/sys/mac_provider.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2017, Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. */ #ifndef _SYS_MAC_PROVIDER_H @@ -281,6 +281,28 @@ typedef enum { } mac_ring_type_t; /* + * The value VLAN_ID_NONE (VID 0) means a client does not have + * membership to any VLAN. However, this statement is true for both + * untagged packets and priority tagged packets leading to confusion + * over what semantic is intended. To the provider, VID 0 is a valid + * VID when priority tagging is in play. To MAC and everything above + * VLAN_ID_NONE almost universally implies untagged traffic. Thus, we + * convert VLAN_ID_NONE to a sentinel value (MAC_VLAN_UNTAGGED) at the + * border between MAC and MAC provider. This informs the provider that + * the client is interested in untagged traffic and the provider + * should set any relevant bits to receive such traffic. + * + * Currently, the API between MAC and the provider passes the VID as a + * unit16_t. In the future this could actually be the entire TCI mask + * (PCP, DEI, and VID). This current scheme is safe in that potential + * future world as well; as 0xFFFF is not a valid TCI (the 0xFFF VID + * is reserved and never transmitted across networks). + */ +#define MAC_VLAN_UNTAGGED UINT16_MAX +#define MAC_VLAN_UNTAGGED_VID(vid) \ + (((vid) == VLAN_ID_NONE) ? MAC_VLAN_UNTAGGED : (vid)) + +/* * Grouping type of a ring group * * MAC_GROUP_TYPE_STATIC: The ring group can not be re-grouped. @@ -358,6 +380,8 @@ typedef struct mac_ring_info_s { * #defines for mri_flags. The flags are temporary flags that are provided * only to workaround issues in specific drivers, and they will be * removed in the future. + * + * These are consumed only by sun4v and neptune (nxge). */ #define MAC_RING_TX_SERIALIZE 0x1 #define MAC_RING_RX_ENQUEUE 0x2 @@ -366,6 +390,8 @@ typedef int (*mac_group_start_t)(mac_group_driver_t); typedef void (*mac_group_stop_t)(mac_group_driver_t); typedef int (*mac_add_mac_addr_t)(void *, const uint8_t *); typedef int (*mac_rem_mac_addr_t)(void *, const uint8_t *); +typedef int (*mac_add_vlan_filter_t)(mac_group_driver_t, uint16_t); +typedef int (*mac_rem_vlan_filter_t)(mac_group_driver_t, uint16_t); struct mac_group_info_s { mac_group_driver_t mgi_driver; /* Driver reference */ @@ -374,9 +400,11 @@ struct mac_group_info_s { uint_t mgi_count; /* Count of rings */ mac_intr_t mgi_intr; /* Optional per-group intr */ - /* Only used for rx groups */ + /* Only used for Rx groups */ mac_add_mac_addr_t mgi_addmac; /* Add a MAC address */ mac_rem_mac_addr_t mgi_remmac; /* Remove a MAC address */ + mac_add_vlan_filter_t mgi_addvlan; /* Add a VLAN filter */ + mac_rem_vlan_filter_t mgi_remvlan; /* Remove a VLAN filter */ }; /* @@ -494,14 +522,14 @@ extern void mac_free(mac_register_t *); extern int mac_register(mac_register_t *, mac_handle_t *); extern int mac_disable_nowait(mac_handle_t); extern int mac_disable(mac_handle_t); -extern int mac_unregister(mac_handle_t); -extern void mac_rx(mac_handle_t, mac_resource_handle_t, +extern int mac_unregister(mac_handle_t); +extern void mac_rx(mac_handle_t, mac_resource_handle_t, mblk_t *); -extern void mac_rx_ring(mac_handle_t, mac_ring_handle_t, +extern void mac_rx_ring(mac_handle_t, mac_ring_handle_t, mblk_t *, uint64_t); -extern void mac_link_update(mac_handle_t, link_state_t); -extern void mac_link_redo(mac_handle_t, link_state_t); -extern void mac_unicst_update(mac_handle_t, +extern void mac_link_update(mac_handle_t, link_state_t); +extern void mac_link_redo(mac_handle_t, link_state_t); +extern void mac_unicst_update(mac_handle_t, const uint8_t *); extern void mac_dst_update(mac_handle_t, const uint8_t *); extern void mac_tx_update(mac_handle_t); |