diff options
Diffstat (limited to 'usr/src/uts/common/io/mac/mac_sched.c')
-rw-r--r-- | usr/src/uts/common/io/mac/mac_sched.c | 423 |
1 files changed, 255 insertions, 168 deletions
diff --git a/usr/src/uts/common/io/mac/mac_sched.c b/usr/src/uts/common/io/mac/mac_sched.c index 8b7f718497..9e1b2b0a55 100644 --- a/usr/src/uts/common/io/mac/mac_sched.c +++ b/usr/src/uts/common/io/mac/mac_sched.c @@ -50,6 +50,8 @@ static mac_tx_cookie_t mac_tx_fanout_mode(mac_soft_ring_set_t *, mblk_t *, uintptr_t, uint16_t, mblk_t **); static mac_tx_cookie_t mac_tx_bw_mode(mac_soft_ring_set_t *, mblk_t *, uintptr_t, uint16_t, mblk_t **); +static mac_tx_cookie_t mac_tx_aggr_mode(mac_soft_ring_set_t *, mblk_t *, + uintptr_t, uint16_t, mblk_t **); typedef struct mac_tx_mode_s { mac_tx_srs_mode_t mac_tx_mode; @@ -57,18 +59,34 @@ typedef struct mac_tx_mode_s { } mac_tx_mode_t; /* - * There are five modes of operation on the Tx side. These modes get set + * There are seven modes of operation on the Tx side. These modes get set * in mac_tx_srs_setup(). Except for the experimental TX_SERIALIZE mode, * none of the other modes are user configurable. They get selected by * the system depending upon whether the link (or flow) has multiple Tx - * rings or a bandwidth configured, etc. + * rings or a bandwidth configured, or if the link is an aggr, etc. + * + * When the Tx SRS is operating in aggr mode (st_mode) or if there are + * multiple Tx rings owned by Tx SRS, then each Tx ring (pseudo or + * otherwise) will have a soft ring associated with it. These soft rings + * are stored in srs_tx_soft_rings[] array. + * + * Additionally in the case of aggr, there is the st_soft_rings[] array + * in the mac_srs_tx_t structure. This array is used to store the same + * set of soft rings that are present in srs_tx_soft_rings[] array but + * in a different manner. The soft ring associated with the pseudo Tx + * ring is saved at mr_index (of the pseudo ring) in st_soft_rings[] + * array. This helps in quickly getting the soft ring associated with the + * Tx ring when aggr_find_tx_ring() returns the pseudo Tx ring that is to + * be used for transmit. */ mac_tx_mode_t mac_tx_mode_list[] = { {SRS_TX_DEFAULT, mac_tx_single_ring_mode}, {SRS_TX_SERIALIZE, mac_tx_serializer_mode}, {SRS_TX_FANOUT, mac_tx_fanout_mode}, {SRS_TX_BW, mac_tx_bw_mode}, - {SRS_TX_BW_FANOUT, mac_tx_bw_mode} + {SRS_TX_BW_FANOUT, mac_tx_bw_mode}, + {SRS_TX_AGGR, mac_tx_aggr_mode}, + {SRS_TX_BW_AGGR, mac_tx_bw_mode} }; /* @@ -307,21 +325,16 @@ int mac_srs_worker_wakeup_ticks = 0; } \ } -#define TX_SINGLE_RING_MODE(mac_srs) \ - ((mac_srs)->srs_tx.st_mode == SRS_TX_DEFAULT || \ - (mac_srs)->srs_tx.st_mode == SRS_TX_SERIALIZE || \ - (mac_srs)->srs_tx.st_mode == SRS_TX_BW) - #define TX_BANDWIDTH_MODE(mac_srs) \ ((mac_srs)->srs_tx.st_mode == SRS_TX_BW || \ - (mac_srs)->srs_tx.st_mode == SRS_TX_BW_FANOUT) + (mac_srs)->srs_tx.st_mode == SRS_TX_BW_FANOUT || \ + (mac_srs)->srs_tx.st_mode == SRS_TX_BW_AGGR) #define TX_SRS_TO_SOFT_RING(mac_srs, head, hint) { \ - uint_t hash, indx; \ - hash = HASH_HINT(hint); \ - indx = COMPUTE_INDEX(hash, mac_srs->srs_oth_ring_count); \ - softring = mac_srs->srs_oth_soft_rings[indx]; \ - (void) (mac_tx_soft_ring_process(softring, head, 0, NULL)); \ + if (tx_mode == SRS_TX_BW_FANOUT) \ + (void) mac_tx_fanout_mode(mac_srs, head, hint, 0, NULL);\ + else \ + (void) mac_tx_aggr_mode(mac_srs, head, hint, 0, NULL); \ } /* @@ -341,7 +354,7 @@ int mac_srs_worker_wakeup_ticks = 0; } else { \ ASSERT(!((srs)->srs_state & SRS_TX_BLOCKED)); \ (srs)->srs_state |= SRS_TX_BLOCKED; \ - (srs)->srs_tx.st_blocked_cnt++; \ + (srs)->srs_tx.st_stat.mts_blockcnt++; \ } \ } @@ -364,7 +377,7 @@ int mac_srs_worker_wakeup_ticks = 0; (srs)->srs_tx.st_hiwat_cnt++; \ if ((srs)->srs_count > (srs)->srs_tx.st_max_q_cnt) { \ /* increment freed stats */ \ - (srs)->srs_tx.st_drop_count += cnt; \ + (srs)->srs_tx.st_stat.mts_sdrops += cnt; \ /* \ * b_prev may be set to the fanout hint \ * hence can't use freemsg directly \ @@ -391,7 +404,7 @@ int mac_srs_worker_wakeup_ticks = 0; #define MAC_TX_SRS_DROP_MESSAGE(srs, mp, cookie) { \ mac_pkt_drop(NULL, NULL, mp, B_FALSE); \ /* increment freed stats */ \ - mac_srs->srs_tx.st_drop_count++; \ + mac_srs->srs_tx.st_stat.mts_sdrops++; \ cookie = (mac_tx_cookie_t)srs; \ } @@ -415,7 +428,7 @@ mac_rx_drop_pkt(mac_soft_ring_set_t *srs, mblk_t *mp) MAC_UPDATE_SRS_SIZE_LOCKED(srs, msgdsize(mp)); mutex_exit(&srs->srs_lock); - srs_rx->sr_drop_count++; + srs_rx->sr_stat.mrs_sdrops++; freemsg(mp); } @@ -448,7 +461,7 @@ mac_srs_fire(void *arg) * 'hint' is fanout_hint (type of uint64_t) which is given by the TCP/IP stack, * and it is used on the TX path. */ -#define HASH_HINT(hint) \ +#define HASH_HINT(hint) \ ((hint) ^ ((hint) >> 24) ^ ((hint) >> 16) ^ ((hint) >> 8)) @@ -797,8 +810,8 @@ mac_rx_srs_long_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *mp, * packets or because mblk's need to be concatenated using * pullupmsg(). */ - if (mac_src_ipv6_fanout || !mac_ip_hdr_length_v6(mp, ip6h, - &hdr_len, &nexthdr, NULL, NULL)) { + if (mac_src_ipv6_fanout || !mac_ip_hdr_length_v6(ip6h, + mp->b_wptr, &hdr_len, &nexthdr, NULL)) { goto src_based_fanout; } whereptr = (uint8_t *)ip6h + hdr_len; @@ -1302,13 +1315,8 @@ check_again: tail->b_next = NULL; smcip = mac_srs->srs_mcip; - if ((mac_srs->srs_type & SRST_FLOW) || - (smcip == NULL)) { - FLOW_STAT_UPDATE(mac_srs->srs_flent, - rbytes, sz); - FLOW_STAT_UPDATE(mac_srs->srs_flent, - ipackets, count); - } + SRS_RX_STAT_UPDATE(mac_srs, pollbytes, sz); + SRS_RX_STAT_UPDATE(mac_srs, pollcnt, count); /* * If there are any promiscuous mode callbacks @@ -1316,9 +1324,6 @@ check_again: * if appropriate and also update the counters. */ if (smcip != NULL) { - smcip->mci_stat_ibytes += sz; - smcip->mci_stat_ipackets += count; - if (smcip->mci_mip->mi_promisc_list != NULL) { mutex_exit(lock); mac_promisc_dispatch(smcip->mci_mip, @@ -1331,15 +1336,14 @@ check_again: mac_srs->srs_bw->mac_bw_polled += sz; mutex_exit(&mac_srs->srs_bw->mac_bw_lock); } - srs_rx->sr_poll_count += count; MAC_RX_SRS_ENQUEUE_CHAIN(mac_srs, head, tail, count, sz); if (count <= 10) - srs_rx->sr_chain_cnt_undr10++; + srs_rx->sr_stat.mrs_chaincntundr10++; else if (count > 10 && count <= 50) - srs_rx->sr_chain_cnt_10to50++; + srs_rx->sr_stat.mrs_chaincnt10to50++; else - srs_rx->sr_chain_cnt_over50++; + srs_rx->sr_stat.mrs_chaincntover50++; } /* @@ -1637,10 +1641,17 @@ again: * callbacks for broadcast and multicast packets are delivered from * mac_rx() and we don't need to worry about that case in this path */ - if (mcip != NULL && mcip->mci_promisc_list != NULL) { - mutex_exit(&mac_srs->srs_lock); - mac_promisc_client_dispatch(mcip, head); - mutex_enter(&mac_srs->srs_lock); + if (mcip != NULL) { + if (mcip->mci_promisc_list != NULL) { + mutex_exit(&mac_srs->srs_lock); + mac_promisc_client_dispatch(mcip, head); + mutex_enter(&mac_srs->srs_lock); + } + if (MAC_PROTECT_ENABLED(mcip, MPT_IPNOSPOOF)) { + mutex_exit(&mac_srs->srs_lock); + mac_protect_intercept_dhcp(mcip, head); + mutex_enter(&mac_srs->srs_lock); + } } /* @@ -1886,7 +1897,7 @@ again: /* zero bandwidth: drop all and return to interrupt mode */ mutex_enter(&mac_srs->srs_bw->mac_bw_lock); if (mac_srs->srs_bw->mac_bw_limit == 0) { - srs_rx->sr_drop_count += cnt; + srs_rx->sr_stat.mrs_sdrops += cnt; ASSERT(mac_srs->srs_bw->mac_bw_sz >= sz); mac_srs->srs_bw->mac_bw_sz -= sz; mac_srs->srs_bw->mac_bw_drop_bytes += sz; @@ -1908,10 +1919,17 @@ again: * callbacks for broadcast and multicast packets are delivered from * mac_rx() and we don't need to worry about that case in this path */ - if (mcip != NULL && mcip->mci_promisc_list != NULL) { - mutex_exit(&mac_srs->srs_lock); - mac_promisc_client_dispatch(mcip, head); - mutex_enter(&mac_srs->srs_lock); + if (mcip != NULL) { + if (mcip->mci_promisc_list != NULL) { + mutex_exit(&mac_srs->srs_lock); + mac_promisc_client_dispatch(mcip, head); + mutex_enter(&mac_srs->srs_lock); + } + if (MAC_PROTECT_ENABLED(mcip, MPT_IPNOSPOOF)) { + mutex_exit(&mac_srs->srs_lock); + mac_protect_intercept_dhcp(mcip, head); + mutex_enter(&mac_srs->srs_lock); + } } /* @@ -2285,7 +2303,6 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, size_t sz = 0; size_t chain_sz, sz1; mac_bw_ctl_t *mac_bw; - mac_client_impl_t *smcip; mac_srs_rx_t *srs_rx = &mac_srs->srs_rx; /* @@ -2302,15 +2319,14 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, } mutex_enter(&mac_srs->srs_lock); - smcip = mac_srs->srs_mcip; - if (mac_srs->srs_type & SRST_FLOW || smcip == NULL) { - FLOW_STAT_UPDATE(mac_srs->srs_flent, rbytes, sz); - FLOW_STAT_UPDATE(mac_srs->srs_flent, ipackets, count); - } - if (smcip != NULL) { - smcip->mci_stat_ibytes += sz; - smcip->mci_stat_ipackets += count; + if (loopback) { + SRS_RX_STAT_UPDATE(mac_srs, lclbytes, sz); + SRS_RX_STAT_UPDATE(mac_srs, lclcnt, count); + + } else { + SRS_RX_STAT_UPDATE(mac_srs, intrbytes, sz); + SRS_RX_STAT_UPDATE(mac_srs, intrcnt, count); } /* @@ -2323,12 +2339,10 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, mac_bw = mac_srs->srs_bw; ASSERT(mac_bw != NULL); mutex_enter(&mac_bw->mac_bw_lock); - /* Count the packets and bytes via interrupt */ - srs_rx->sr_intr_count += count; mac_bw->mac_bw_intr += sz; if (mac_bw->mac_bw_limit == 0) { /* zero bandwidth: drop all */ - srs_rx->sr_drop_count += count; + srs_rx->sr_stat.mrs_sdrops += count; mac_bw->mac_bw_drop_bytes += sz; mutex_exit(&mac_bw->mac_bw_lock); mutex_exit(&mac_srs->srs_lock); @@ -2370,7 +2384,7 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, } if (head != NULL) { /* Drop any packet over the threshold */ - srs_rx->sr_drop_count += count; + srs_rx->sr_stat.mrs_sdrops += count; mutex_enter(&mac_bw->mac_bw_lock); mac_bw->mac_bw_drop_bytes += sz; mutex_exit(&mac_bw->mac_bw_lock); @@ -2392,7 +2406,7 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, if (!(mac_srs->srs_type & SRST_BW_CONTROL) && (srs_rx->sr_poll_pkt_cnt > srs_rx->sr_hiwat)) { mac_bw = mac_srs->srs_bw; - srs_rx->sr_drop_count += count; + srs_rx->sr_stat.mrs_sdrops += count; mutex_enter(&mac_bw->mac_bw_lock); mac_bw->mac_bw_drop_bytes += sz; mutex_exit(&mac_bw->mac_bw_lock); @@ -2402,8 +2416,6 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, } MAC_RX_SRS_ENQUEUE_CHAIN(mac_srs, mp_chain, tail, count, sz); - /* Count the packets entering via interrupt path */ - srs_rx->sr_intr_count += count; if (!(mac_srs->srs_state & SRS_PROC)) { /* @@ -2510,7 +2522,7 @@ mac_tx_srs_enqueue(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, /* * Ignore fanout hint if we don't have multiple tx rings. */ - if (!TX_MULTI_RING_MODE(mac_srs)) + if (!MAC_TX_SOFT_RINGS(mac_srs)) fanout_hint = 0; if (mac_srs->srs_first != NULL) @@ -2550,25 +2562,30 @@ mac_tx_srs_enqueue(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, } /* - * There are five tx modes: + * There are seven tx modes: * * 1) Default mode (SRS_TX_DEFAULT) * 2) Serialization mode (SRS_TX_SERIALIZE) * 3) Fanout mode (SRS_TX_FANOUT) * 4) Bandwdith mode (SRS_TX_BW) * 5) Fanout and Bandwidth mode (SRS_TX_BW_FANOUT) + * 6) aggr Tx mode (SRS_TX_AGGR) + * 7) aggr Tx bw mode (SRS_TX_BW_AGGR) * * The tx mode in which an SRS operates is decided in mac_tx_srs_setup() * based on the number of Tx rings requested for an SRS and whether * bandwidth control is requested or not. * - * In the default mode (i.e., no fanout/no bandwidth), the SRS acts as a - * pass-thru. Packets will go directly to mac_tx_send(). When the underlying - * Tx ring runs out of Tx descs, it starts queueing up packets in SRS. - * When flow-control is relieved, the srs_worker drains the queued - * packets and informs blocked clients to restart sending packets. + * The default mode (i.e., no fanout/no bandwidth) is used when the + * underlying NIC does not have Tx rings or just one Tx ring. In this mode, + * the SRS acts as a pass-thru. Packets will go directly to mac_tx_send(). + * When the underlying Tx ring runs out of Tx descs, it starts queueing up + * packets in SRS. When flow-control is relieved, the srs_worker drains + * the queued packets and informs blocked clients to restart sending + * packets. * - * In the SRS_TX_SERIALIZE mode, all calls to mac_tx() are serialized. + * In the SRS_TX_SERIALIZE mode, all calls to mac_tx() are serialized. This + * mode is used when the link has no Tx rings or only one Tx ring. * * In the SRS_TX_FANOUT mode, packets will be fanned out to multiple * Tx rings. Each Tx ring will have a soft ring associated with it. @@ -2581,6 +2598,19 @@ mac_tx_srs_enqueue(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, * SRS. If fanout to multiple Tx rings is configured, the packets will * be fanned out among the soft rings associated with the Tx rings. * + * In SRS_TX_AGGR mode, mac_tx_aggr_mode() routine is called. This routine + * invokes an aggr function, aggr_find_tx_ring(), to find a pseudo Tx ring + * belonging to a port on which the packet has to be sent. Aggr will + * always have a pseudo Tx ring associated with it even when it is an + * aggregation over a single NIC that has no Tx rings. Even in such a + * case, the single pseudo Tx ring will have a soft ring associated with + * it and the soft ring will hang off the SRS. + * + * If a bandwidth is specified for an aggr, SRS_TX_BW_AGGR mode is used. + * In this mode, the bandwidth is first applied on the outgoing packets + * and later mac_tx_addr_mode() function is called to send the packet out + * of one of the pseudo Tx rings. + * * Four flags are used in srs_state for indicating flow control * conditions : SRS_TX_BLOCKED, SRS_TX_HIWAT, SRS_TX_WAKEUP_CLIENT. * SRS_TX_BLOCKED indicates out of Tx descs. SRS expects a wakeup from the @@ -2625,7 +2655,6 @@ mac_tx_single_ring_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, uintptr_t fanout_hint, uint16_t flag, mblk_t **ret_mp) { mac_srs_tx_t *srs_tx = &mac_srs->srs_tx; - boolean_t is_subflow; mac_tx_stats_t stats; mac_tx_cookie_t cookie = NULL; @@ -2656,10 +2685,8 @@ mac_tx_single_ring_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, mutex_exit(&mac_srs->srs_lock); } - is_subflow = ((mac_srs->srs_type & SRST_FLOW) != 0); - mp_chain = mac_tx_send(srs_tx->st_arg1, srs_tx->st_arg2, - mp_chain, (is_subflow ? &stats : NULL)); + mp_chain, &stats); /* * Multiple threads could be here sending packets. @@ -2676,9 +2703,7 @@ mac_tx_single_ring_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, mutex_exit(&mac_srs->srs_lock); return (cookie); } - - if (is_subflow) - FLOW_TX_STATS_UPDATE(mac_srs->srs_flent, &stats); + SRS_TX_STATS_UPDATE(mac_srs, &stats); return (NULL); } @@ -2696,7 +2721,6 @@ static mac_tx_cookie_t mac_tx_serializer_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, uintptr_t fanout_hint, uint16_t flag, mblk_t **ret_mp) { - boolean_t is_subflow; mac_tx_stats_t stats; mac_tx_cookie_t cookie = NULL; mac_srs_tx_t *srs_tx = &mac_srs->srs_tx; @@ -2726,10 +2750,8 @@ mac_tx_serializer_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, mac_srs->srs_state |= SRS_PROC; mutex_exit(&mac_srs->srs_lock); - is_subflow = ((mac_srs->srs_type & SRST_FLOW) != 0); - mp_chain = mac_tx_send(srs_tx->st_arg1, srs_tx->st_arg2, - mp_chain, (is_subflow ? &stats : NULL)); + mp_chain, &stats); mutex_enter(&mac_srs->srs_lock); mac_srs->srs_state &= ~SRS_PROC; @@ -2747,8 +2769,8 @@ mac_tx_serializer_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, } mutex_exit(&mac_srs->srs_lock); - if (is_subflow && cookie == NULL) - FLOW_TX_STATS_UPDATE(mac_srs->srs_flent, &stats); + if (cookie == NULL) + SRS_TX_STATS_UPDATE(mac_srs, &stats); return (cookie); } @@ -2766,8 +2788,8 @@ mac_tx_serializer_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, */ #define MAC_TX_SOFT_RING_PROCESS(chain) { \ - index = COMPUTE_INDEX(hash, mac_srs->srs_oth_ring_count), \ - softring = mac_srs->srs_oth_soft_rings[index]; \ + index = COMPUTE_INDEX(hash, mac_srs->srs_tx_ring_count), \ + softring = mac_srs->srs_tx_soft_rings[index]; \ cookie = mac_tx_soft_ring_process(softring, chain, flag, ret_mp); \ DTRACE_PROBE2(tx__fanout, uint64_t, hash, uint_t, index); \ } @@ -2781,7 +2803,8 @@ mac_tx_fanout_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, uint_t index; mac_tx_cookie_t cookie = NULL; - ASSERT(mac_srs->srs_tx.st_mode == SRS_TX_FANOUT); + ASSERT(mac_srs->srs_tx.st_mode == SRS_TX_FANOUT || + mac_srs->srs_tx.st_mode == SRS_TX_BW_FANOUT); if (fanout_hint != 0) { /* * The hint is specified by the caller, simply pass the @@ -2926,18 +2949,18 @@ mac_tx_bw_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, hash = HASH_HINT(fanout_hint); indx = COMPUTE_INDEX(hash, - mac_srs->srs_oth_ring_count); - softring = mac_srs->srs_oth_soft_rings[indx]; + mac_srs->srs_tx_ring_count); + softring = mac_srs->srs_tx_soft_rings[indx]; return (mac_tx_soft_ring_process(softring, mp_chain, flag, ret_mp)); + } else if (srs_tx->st_mode == SRS_TX_BW_AGGR) { + return (mac_tx_aggr_mode(mac_srs, mp_chain, + fanout_hint, flag, ret_mp)); } else { - boolean_t is_subflow; mac_tx_stats_t stats; - is_subflow = ((mac_srs->srs_type & SRST_FLOW) != 0); - mp_chain = mac_tx_send(srs_tx->st_arg1, srs_tx->st_arg2, - mp_chain, (is_subflow ? &stats : NULL)); + mp_chain, &stats); if (mp_chain != NULL) { mutex_enter(&mac_srs->srs_lock); @@ -2951,13 +2974,68 @@ mac_tx_bw_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, mutex_exit(&mac_srs->srs_lock); return (cookie); } - if (is_subflow) - FLOW_TX_STATS_UPDATE(mac_srs->srs_flent, &stats); + SRS_TX_STATS_UPDATE(mac_srs, &stats); return (NULL); } } +/* + * mac_tx_aggr_mode + * + * This routine invokes an aggr function, aggr_find_tx_ring(), to find + * a (pseudo) Tx ring belonging to a port on which the packet has to + * be sent. aggr_find_tx_ring() first finds the outgoing port based on + * L2/L3/L4 policy and then uses the fanout_hint passed to it to pick + * a Tx ring from the selected port. + * + * Note that a port can be deleted from the aggregation. In such a case, + * the aggregation layer first separates the port from the rest of the + * ports making sure that port (and thus any Tx rings associated with + * it) won't get selected in the call to aggr_find_tx_ring() function. + * Later calls are made to mac_group_rem_ring() passing pseudo Tx ring + * handles one by one which in turn will quiesce the Tx SRS and remove + * the soft ring associated with the pseudo Tx ring. Unlike Rx side + * where a cookie is used to protect against mac_rx_ring() calls on + * rings that have been removed, no such cookie is needed on the Tx + * side as the pseudo Tx ring won't be available anymore to + * aggr_find_tx_ring() once the port has been removed. + */ +static mac_tx_cookie_t +mac_tx_aggr_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, + uintptr_t fanout_hint, uint16_t flag, mblk_t **ret_mp) +{ + mac_srs_tx_t *srs_tx = &mac_srs->srs_tx; + mac_tx_ring_fn_t find_tx_ring_fn; + mac_ring_handle_t ring = NULL; + void *arg; + mac_soft_ring_t *sringp; + + find_tx_ring_fn = srs_tx->st_capab_aggr.mca_find_tx_ring_fn; + arg = srs_tx->st_capab_aggr.mca_arg; + if (find_tx_ring_fn(arg, mp_chain, fanout_hint, &ring) == NULL) + return (NULL); + sringp = srs_tx->st_soft_rings[((mac_ring_t *)ring)->mr_index]; + return (mac_tx_soft_ring_process(sringp, mp_chain, flag, ret_mp)); +} + +void +mac_tx_invoke_callbacks(mac_client_impl_t *mcip, mac_tx_cookie_t cookie) +{ + mac_cb_t *mcb; + mac_tx_notify_cb_t *mtnfp; + + /* Wakeup callback registered clients */ + MAC_CALLBACK_WALKER_INC(&mcip->mci_tx_notify_cb_info); + for (mcb = mcip->mci_tx_notify_cb_list; mcb != NULL; + mcb = mcb->mcb_nextp) { + mtnfp = (mac_tx_notify_cb_t *)mcb->mcb_objp; + mtnfp->mtnf_fn(mtnfp->mtnf_arg, cookie); + } + MAC_CALLBACK_WALKER_DCR(&mcip->mci_tx_notify_cb_info, + &mcip->mci_tx_notify_cb_list); +} + /* ARGSUSED */ void mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) @@ -2966,7 +3044,6 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) size_t sz; uint32_t tx_mode; uint_t saved_pkt_count; - boolean_t is_subflow; mac_tx_stats_t stats; mac_srs_tx_t *srs_tx = &mac_srs->srs_tx; clock_t now; @@ -2977,7 +3054,6 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) mac_srs->srs_state |= SRS_PROC; - is_subflow = ((mac_srs->srs_type & SRST_FLOW) != 0); tx_mode = srs_tx->st_mode; if (tx_mode == SRS_TX_DEFAULT || tx_mode == SRS_TX_SERIALIZE) { if (mac_srs->srs_first != NULL) { @@ -3000,16 +3076,13 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) tail->b_next = mac_srs->srs_first; mac_srs->srs_first = head; mac_srs->srs_count += - (saved_pkt_count - stats.ts_opackets); + (saved_pkt_count - stats.mts_opackets); if (mac_srs->srs_last == NULL) mac_srs->srs_last = tail; MAC_TX_SRS_BLOCK(mac_srs, head); } else { srs_tx->st_woken_up = B_FALSE; - if (is_subflow) { - FLOW_TX_STATS_UPDATE( - mac_srs->srs_flent, &stats); - } + SRS_TX_STATS_UPDATE(mac_srs, &stats); } } } else if (tx_mode == SRS_TX_BW) { @@ -3065,10 +3138,10 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) tail->b_next = mac_srs->srs_first; mac_srs->srs_first = head; mac_srs->srs_count += - (saved_pkt_count - stats.ts_opackets); + (saved_pkt_count - stats.mts_opackets); if (mac_srs->srs_last == NULL) mac_srs->srs_last = tail; - size_sent = sz - stats.ts_obytes; + size_sent = sz - stats.mts_obytes; mac_srs->srs_size += size_sent; mac_srs->srs_bw->mac_bw_sz += size_sent; if (mac_srs->srs_bw->mac_bw_used > size_sent) { @@ -3080,15 +3153,11 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) MAC_TX_SRS_BLOCK(mac_srs, head); } else { srs_tx->st_woken_up = B_FALSE; - if (is_subflow) { - FLOW_TX_STATS_UPDATE( - mac_srs->srs_flent, &stats); - } + SRS_TX_STATS_UPDATE(mac_srs, &stats); } } - } else if (tx_mode == SRS_TX_BW_FANOUT) { + } else if (tx_mode == SRS_TX_BW_FANOUT || tx_mode == SRS_TX_BW_AGGR) { mblk_t *prev; - mac_soft_ring_t *softring; uint64_t hint; /* @@ -3155,8 +3224,6 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) */ if (mac_srs->srs_count == 0 && (mac_srs->srs_state & (SRS_TX_HIWAT | SRS_TX_WAKEUP_CLIENT | SRS_ENQUEUED))) { - mac_tx_notify_cb_t *mtnfp; - mac_cb_t *mcb; mac_client_impl_t *mcip = mac_srs->srs_mcip; boolean_t wakeup_required = B_FALSE; @@ -3168,16 +3235,7 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) SRS_TX_WAKEUP_CLIENT | SRS_ENQUEUED); mutex_exit(&mac_srs->srs_lock); if (wakeup_required) { - /* Wakeup callback registered clients */ - MAC_CALLBACK_WALKER_INC(&mcip->mci_tx_notify_cb_info); - for (mcb = mcip->mci_tx_notify_cb_list; mcb != NULL; - mcb = mcb->mcb_nextp) { - mtnfp = (mac_tx_notify_cb_t *)mcb->mcb_objp; - mtnfp->mtnf_fn(mtnfp->mtnf_arg, - (mac_tx_cookie_t)mac_srs); - } - MAC_CALLBACK_WALKER_DCR(&mcip->mci_tx_notify_cb_info, - &mcip->mci_tx_notify_cb_list); + mac_tx_invoke_callbacks(mcip, (mac_tx_cookie_t)mac_srs); /* * If the client is not the primary MAC client, then we * need to send the notification to the clients upper @@ -3276,11 +3334,10 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, } /* - * Fastpath: if there's only one client, and there's no - * multicast listeners, we simply send the packet down to the - * underlying NIC. + * Fastpath: if there's only one client, we simply send + * the packet down to the underlying NIC. */ - if (mip->mi_nactiveclients == 1 && mip->mi_promisc_list == NULL) { + if (mip->mi_nactiveclients == 1) { DTRACE_PROBE2(fastpath, mac_client_impl_t *, src_mcip, mblk_t *, mp_chain); @@ -3293,9 +3350,7 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, msgdsize(mp)); CHECK_VID_AND_ADD_TAG(mp); - MAC_TX(mip, ring, mp, - ((src_mcip->mci_state_flags & MCIS_SHARE_BOUND) != - 0)); + MAC_TX(mip, ring, mp, src_mcip); /* * If the driver is out of descriptors and does a @@ -3336,12 +3391,6 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, CHECK_VID_AND_ADD_TAG(mp); /* - * Check if there are promiscuous mode callbacks defined. - */ - if (mip->mi_promisc_list != NULL) - mac_promisc_dispatch(mip, mp, src_mcip); - - /* * Find the destination. */ dst_flow_ent = mac_tx_classify(mip, mp); @@ -3395,16 +3444,31 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, B_TRUE); } else { /* - * loopback the packet to a - * local MAC client. We force a context - * switch if both source and destination - * MAC clients are used by IP, i.e. bypass - * is set. + * loopback the packet to a local MAC + * client. We force a context switch + * if both source and destination MAC + * clients are used by IP, i.e. + * bypass is set. */ boolean_t do_switch; mac_client_impl_t *dst_mcip = dst_flow_ent->fe_mcip; + /* + * Check if there are promiscuous mode + * callbacks defined. This check is + * done here in the 'else' case and + * not in other cases because this + * path is for local loopback + * communication which does not go + * through MAC_TX(). For paths that go + * through MAC_TX(), the promisc_list + * check is done inside the MAC_TX() + * macro. + */ + if (mip->mi_promisc_list != NULL) + mac_promisc_dispatch(mip, mp, src_mcip); + do_switch = ((src_mcip->mci_state_flags & dst_mcip->mci_state_flags & MCIS_CLIENT_POLL_CAPABLE) != 0); @@ -3422,9 +3486,7 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, * Unknown destination, send via the underlying * NIC. */ - MAC_TX(mip, ring, mp, - ((src_mcip->mci_state_flags & MCIS_SHARE_BOUND) != - 0)); + MAC_TX(mip, ring, mp, src_mcip); if (mp != NULL) { /* * Adjust for the last packet that @@ -3440,15 +3502,9 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, } done: - src_mcip->mci_stat_obytes += obytes; - src_mcip->mci_stat_opackets += opackets; - src_mcip->mci_stat_oerrors += oerrors; - - if (stats != NULL) { - stats->ts_opackets = opackets; - stats->ts_obytes = obytes; - stats->ts_oerrors = oerrors; - } + stats->mts_obytes = obytes; + stats->mts_opackets = opackets; + stats->mts_oerrors = oerrors; return (mp); } @@ -3466,8 +3522,8 @@ mac_tx_srs_ring_present(mac_soft_ring_set_t *srs, mac_ring_t *tx_ring) if (srs->srs_tx.st_arg2 == tx_ring) return (B_TRUE); - for (i = 0; i < srs->srs_oth_ring_count; i++) { - soft_ring = srs->srs_oth_soft_rings[i]; + for (i = 0; i < srs->srs_tx_ring_count; i++) { + soft_ring = srs->srs_tx_soft_rings[i]; if (soft_ring->s_ring_tx_arg2 == tx_ring) return (B_TRUE); } @@ -3476,6 +3532,29 @@ mac_tx_srs_ring_present(mac_soft_ring_set_t *srs, mac_ring_t *tx_ring) } /* + * mac_tx_srs_get_soft_ring + * + * Returns the TX soft ring associated with the given ring, if present. + */ +mac_soft_ring_t * +mac_tx_srs_get_soft_ring(mac_soft_ring_set_t *srs, mac_ring_t *tx_ring) +{ + int i; + mac_soft_ring_t *soft_ring; + + if (srs->srs_tx.st_arg2 == tx_ring) + return (NULL); + + for (i = 0; i < srs->srs_tx_ring_count; i++) { + soft_ring = srs->srs_tx_soft_rings[i]; + if (soft_ring->s_ring_tx_arg2 == tx_ring) + return (soft_ring); + } + + return (NULL); +} + +/* * mac_tx_srs_wakeup * * Called when Tx desc become available. Wakeup the appropriate worker @@ -3490,11 +3569,16 @@ mac_tx_srs_wakeup(mac_soft_ring_set_t *mac_srs, mac_ring_handle_t ring) mac_srs_tx_t *srs_tx = &mac_srs->srs_tx; mutex_enter(&mac_srs->srs_lock); - if (TX_SINGLE_RING_MODE(mac_srs)) { + /* + * srs_tx_ring_count == 0 is the single ring mode case. In + * this mode, there will not be Tx soft rings associated + * with the SRS. + */ + if (!MAC_TX_SOFT_RINGS(mac_srs)) { if (srs_tx->st_arg2 == ring && mac_srs->srs_state & SRS_TX_BLOCKED) { mac_srs->srs_state &= ~SRS_TX_BLOCKED; - srs_tx->st_unblocked_cnt++; + srs_tx->st_stat.mts_unblockcnt++; cv_signal(&mac_srs->srs_async); } /* @@ -3507,15 +3591,17 @@ mac_tx_srs_wakeup(mac_soft_ring_set_t *mac_srs, mac_ring_handle_t ring) return; } - /* If you are here, it is for FANOUT or BW_FANOUT case */ - ASSERT(TX_MULTI_RING_MODE(mac_srs)); - for (i = 0; i < mac_srs->srs_oth_ring_count; i++) { - sringp = mac_srs->srs_oth_soft_rings[i]; + /* + * If you are here, it is for FANOUT, BW_FANOUT, + * AGGR_MODE or AGGR_BW_MODE case + */ + for (i = 0; i < mac_srs->srs_tx_ring_count; i++) { + sringp = mac_srs->srs_tx_soft_rings[i]; mutex_enter(&sringp->s_ring_lock); if (sringp->s_ring_tx_arg2 == ring) { if (sringp->s_ring_state & S_RING_BLOCK) { sringp->s_ring_state &= ~S_RING_BLOCK; - sringp->s_ring_unblocked_cnt++; + sringp->s_st_stat.mts_unblockcnt++; cv_signal(&sringp->s_ring_async); } sringp->s_ring_tx_woken_up = B_TRUE; @@ -3619,6 +3705,7 @@ mac_rx_soft_ring_process(mac_client_impl_t *mcip, mac_soft_ring_t *ringp, mutex_enter(&ringp->s_ring_lock); ringp->s_ring_total_inpkt += cnt; + ringp->s_ring_total_rbytes += sz; if ((mac_srs->srs_rx.sr_poll_pkt_cnt <= 1) && !(ringp->s_ring_type & ST_RING_WORKER_ONLY)) { /* If on processor or blanking on, then enqueue and return */ @@ -3831,11 +3918,14 @@ mac_tx_soft_ring_process(mac_soft_ring_t *ringp, mblk_t *mp_chain, ASSERT(mp_chain != NULL); ASSERT(MUTEX_NOT_HELD(&ringp->s_ring_lock)); /* - * Only two modes can come here; either it can be - * SRS_TX_BW_FANOUT or SRS_TX_FANOUT + * The following modes can come here: SRS_TX_BW_FANOUT, + * SRS_TX_FANOUT, SRS_TX_AGGR, SRS_TX_BW_AGGR. */ + ASSERT(MAC_TX_SOFT_RINGS(mac_srs)); ASSERT(mac_srs->srs_tx.st_mode == SRS_TX_FANOUT || - mac_srs->srs_tx.st_mode == SRS_TX_BW_FANOUT); + mac_srs->srs_tx.st_mode == SRS_TX_BW_FANOUT || + mac_srs->srs_tx.st_mode == SRS_TX_AGGR || + mac_srs->srs_tx.st_mode == SRS_TX_BW_AGGR); if (ringp->s_ring_type & ST_RING_WORKER_ONLY) { /* Serialization mode */ @@ -3871,7 +3961,6 @@ mac_tx_soft_ring_process(mac_soft_ring_t *ringp, mblk_t *mp_chain, * tx_srs_drain() completely drains out the * messages. */ - boolean_t is_subflow; mac_tx_stats_t stats; if (ringp->s_ring_state & S_RING_ENQUEUED) { @@ -3890,11 +3979,9 @@ mac_tx_soft_ring_process(mac_soft_ring_t *ringp, mblk_t *mp_chain, */ mutex_exit(&ringp->s_ring_lock); } - is_subflow = ((mac_srs->srs_type & SRST_FLOW) != 0); mp_chain = mac_tx_send(ringp->s_ring_tx_arg1, - ringp->s_ring_tx_arg2, mp_chain, - (is_subflow ? &stats : NULL)); + ringp->s_ring_tx_arg2, mp_chain, &stats); /* * Multiple threads could be here sending packets. @@ -3912,9 +3999,9 @@ mac_tx_soft_ring_process(mac_soft_ring_t *ringp, mblk_t *mp_chain, mutex_exit(&ringp->s_ring_lock); return (cookie); } - if (is_subflow) { - FLOW_TX_STATS_UPDATE(mac_srs->srs_flent, &stats); - } + SRS_TX_STATS_UPDATE(mac_srs, &stats); + SOFTRING_TX_STATS_UPDATE(ringp, &stats); + return (NULL); } } |