summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/io/mac/mac_sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/io/mac/mac_sched.c')
-rw-r--r--usr/src/uts/common/io/mac/mac_sched.c552
1 files changed, 309 insertions, 243 deletions
diff --git a/usr/src/uts/common/io/mac/mac_sched.c b/usr/src/uts/common/io/mac/mac_sched.c
index 290366f5d2..927e3842d3 100644
--- a/usr/src/uts/common/io/mac/mac_sched.c
+++ b/usr/src/uts/common/io/mac/mac_sched.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -515,25 +515,27 @@ static void
mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
{
struct ether_header *ehp;
- uint16_t etype;
+ struct ether_vlan_header *evhp;
+ uint32_t sap;
ipha_t *ipha;
- mac_soft_ring_t *softring;
- size_t ether_hlen;
+ uint8_t *dstaddr;
+ size_t hdrsize;
mblk_t *mp;
mblk_t *headmp[MAX_SR_TYPES];
mblk_t *tailmp[MAX_SR_TYPES];
int cnt[MAX_SR_TYPES];
size_t sz[MAX_SR_TYPES];
size_t sz1;
- boolean_t bw_ctl = B_FALSE;
+ boolean_t bw_ctl;
boolean_t hw_classified;
- boolean_t dls_bypass = B_TRUE;
- enum pkt_type type;
+ boolean_t dls_bypass;
+ boolean_t is_ether;
+ boolean_t is_unicast;
+ enum pkt_type type;
mac_client_impl_t *mcip = mac_srs->srs_mcip;
- struct ether_vlan_header *evhp;
- if (mac_srs->srs_type & SRST_BW_CONTROL)
- bw_ctl = B_TRUE;
+ is_ether = (mcip->mci_mip->mi_info.mi_nativemedia == DL_ETHER);
+ bw_ctl = ((mac_srs->srs_type & SRST_BW_CONTROL) != 0);
/*
* If we don't have a Rx ring, S/W classification would have done
@@ -550,8 +552,7 @@ mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
* processing in the Rx path. SRST_DLS_BYPASS will be clear for
* such SRSs.
*/
- if (!(mac_srs->srs_type & SRST_DLS_BYPASS))
- dls_bypass = B_FALSE;
+ dls_bypass = ((mac_srs->srs_type & SRST_DLS_BYPASS) != 0);
bzero(headmp, MAX_SR_TYPES * sizeof (mblk_t *));
bzero(tailmp, MAX_SR_TYPES * sizeof (mblk_t *));
@@ -570,68 +571,62 @@ mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
mp->b_next = NULL;
type = OTH;
- sz1 = msgdsize(mp);
-
- if (!dls_bypass) {
- mac_impl_t *mip = mcip->mci_mip;
+ sz1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp);
+ if (is_ether) {
+ /*
+ * At this point we can be sure the packet at least
+ * has an ether header.
+ */
+ if (sz1 < sizeof (struct ether_header)) {
+ mac_rx_drop_pkt(mac_srs, mp);
+ continue;
+ }
ehp = (struct ether_header *)mp->b_rptr;
/*
- * For VLAN packets, if the VLAN id doesn't belong
- * to this client, we drop the packet.
+ * Determine if this is a VLAN or non-VLAN packet.
*/
- if (mip->mi_info.mi_nativemedia == DL_ETHER &&
- ntohs(ehp->ether_type) == VLAN_TPID) {
+ if ((sap = ntohs(ehp->ether_type)) == VLAN_TPID) {
+ evhp = (struct ether_vlan_header *)mp->b_rptr;
+ sap = ntohs(evhp->ether_type);
+ hdrsize = sizeof (struct ether_vlan_header);
/*
- * LINTED: cast may result in improper
- * alignment
+ * Check if the VID of the packet, if any,
+ * belongs to this client.
*/
- evhp = (struct ether_vlan_header *)ehp;
if (!mac_client_check_flow_vid(mcip,
VLAN_ID(ntohs(evhp->ether_tci)))) {
mac_rx_drop_pkt(mac_srs, mp);
continue;
}
+ } else {
+ hdrsize = sizeof (struct ether_header);
}
- FANOUT_ENQUEUE_MP(headmp[type], tailmp[type],
- cnt[type], bw_ctl, sz[type], sz1, mp);
- continue;
- }
-
- /*
- * At this point we can be sure the packet at least
- * has an ether header.
- */
- if (sz1 < sizeof (struct ether_header)) {
- mac_rx_drop_pkt(mac_srs, mp);
- continue;
- }
- /* LINTED: cast may result in improper alignment */
- ehp = (struct ether_header *)mp->b_rptr;
+ is_unicast =
+ ((((uint8_t *)&ehp->ether_dhost)[0] & 0x01) == 0);
+ dstaddr = (uint8_t *)&ehp->ether_dhost;
+ } else {
+ mac_header_info_t mhi;
- /*
- * Determine if this is a VLAN or non-VLAN packet.
- */
- if ((etype = ntohs(ehp->ether_type)) == VLAN_TPID) {
- /* LINTED: cast may result in improper alignment */
- evhp = (struct ether_vlan_header *)mp->b_rptr;
- etype = ntohs(evhp->ether_type);
- ether_hlen = sizeof (struct ether_vlan_header);
- /*
- * Check if the VID of the packet, if any, belongs
- * to this client.
- */
- if (!mac_client_check_flow_vid(mcip,
- VLAN_ID(ntohs(evhp->ether_tci)))) {
+ if (mac_header_info((mac_handle_t)mcip->mci_mip,
+ mp, &mhi) != 0) {
mac_rx_drop_pkt(mac_srs, mp);
continue;
}
- } else {
- ether_hlen = sizeof (struct ether_header);
+ hdrsize = mhi.mhi_hdrsize;
+ sap = mhi.mhi_bindsap;
+ is_unicast = (mhi.mhi_dsttype == MAC_ADDRTYPE_UNICAST);
+ dstaddr = (uint8_t *)mhi.mhi_daddr;
}
- if (etype == ETHERTYPE_IP) {
+ if (!dls_bypass) {
+ FANOUT_ENQUEUE_MP(headmp[type], tailmp[type],
+ cnt[type], bw_ctl, sz[type], sz1, mp);
+ continue;
+ }
+
+ if (sap == ETHERTYPE_IP) {
/*
* If we are H/W classified, but we have promisc
* on, then we need to check for the unicast address.
@@ -641,12 +636,11 @@ mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
rw_enter(&mcip->mci_rw_lock, RW_READER);
map = mcip->mci_unicast;
- if (bcmp(&ehp->ether_dhost, map->ma_addr,
+ if (bcmp(dstaddr, map->ma_addr,
map->ma_len) == 0)
type = UNDEF;
rw_exit(&mcip->mci_rw_lock);
- } else if (((((uint8_t *)&ehp->ether_dhost)[0] &
- 0x01) == 0)) {
+ } else if (is_unicast) {
type = UNDEF;
}
}
@@ -665,8 +659,7 @@ mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
* the 'OTH' type path without DLS bypass.
*/
- /* LINTED: cast may result in improper alignment */
- ipha = (ipha_t *)(mp->b_rptr + ether_hlen);
+ ipha = (ipha_t *)(mp->b_rptr + hdrsize);
if ((type != OTH) && MBLK_RX_FANOUT_SLOWPATH(mp, ipha))
type = OTH;
@@ -686,25 +679,25 @@ mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
switch (ipha->ipha_protocol) {
case IPPROTO_TCP:
type = V4_TCP;
- mp->b_rptr += ether_hlen;
+ mp->b_rptr += hdrsize;
break;
case IPPROTO_UDP:
type = V4_UDP;
- mp->b_rptr += ether_hlen;
+ mp->b_rptr += hdrsize;
break;
default:
type = OTH;
break;
}
- ASSERT(type != UNDEF);
-
FANOUT_ENQUEUE_MP(headmp[type], tailmp[type], cnt[type],
bw_ctl, sz[type], sz1, mp);
}
for (type = V4_TCP; type < UNDEF; type++) {
if (headmp[type] != NULL) {
+ mac_soft_ring_t *softring;
+
ASSERT(tailmp[type]->b_next == NULL);
switch (type) {
case V4_TCP:
@@ -716,7 +709,7 @@ mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
case OTH:
softring = mac_srs->srs_oth_soft_rings[0];
}
- mac_rx_soft_ring_process(mac_srs->srs_mcip, softring,
+ mac_rx_soft_ring_process(mcip, softring,
headmp[type], tailmp[type], cnt[type], sz[type]);
}
}
@@ -731,7 +724,7 @@ int fanout_unalligned = 0;
*/
static int
mac_rx_srs_long_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *mp,
- uint16_t etype, enum pkt_type *type, uint_t *indx)
+ uint32_t sap, size_t hdrsize, enum pkt_type *type, uint_t *indx)
{
ip6_t *ip6h;
uint8_t *whereptr;
@@ -740,18 +733,18 @@ mac_rx_srs_long_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *mp,
uint8_t nexthdr;
uint16_t hdr_len;
- if (etype == ETHERTYPE_IPV6) {
+ if (sap == ETHERTYPE_IPV6) {
boolean_t modifiable = B_TRUE;
- ASSERT(MBLKL(mp) >= sizeof (struct ether_header));
+ ASSERT(MBLKL(mp) >= hdrsize);
- ip6h = (ip6_t *)(mp->b_rptr + sizeof (struct ether_header));
+ ip6h = (ip6_t *)(mp->b_rptr + hdrsize);
if ((unsigned char *)ip6h == mp->b_wptr) {
/*
- * The first mblk_t only includes the ethernet header.
+ * The first mblk_t only includes the mac header.
* Note that it is safe to change the mp pointer here,
* as the subsequent operation does not assume mp
- * points to the start of the ethernet header.
+ * points to the start of the mac header.
*/
mp = mp->b_cont;
@@ -900,32 +893,32 @@ static void
mac_rx_srs_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
{
struct ether_header *ehp;
- uint16_t etype;
+ struct ether_vlan_header *evhp;
+ uint32_t sap;
ipha_t *ipha;
+ uint8_t *dstaddr;
uint_t indx;
- int ports_offset = -1;
- int ipha_len;
+ size_t ports_offset;
+ size_t ipha_len;
+ size_t hdrsize;
uint_t hash;
- mac_soft_ring_t *softring;
- size_t ether_hlen;
- uint16_t frag_offset_flags;
mblk_t *mp;
mblk_t *headmp[MAX_SR_TYPES][MAX_SR_FANOUT];
mblk_t *tailmp[MAX_SR_TYPES][MAX_SR_FANOUT];
int cnt[MAX_SR_TYPES][MAX_SR_FANOUT];
size_t sz[MAX_SR_TYPES][MAX_SR_FANOUT];
size_t sz1;
- boolean_t bw_ctl = B_FALSE;
+ boolean_t bw_ctl;
boolean_t hw_classified;
- boolean_t dls_bypass = B_TRUE;
- int i;
+ boolean_t dls_bypass;
+ boolean_t is_ether;
+ boolean_t is_unicast;
int fanout_cnt;
- enum pkt_type type;
+ enum pkt_type type;
mac_client_impl_t *mcip = mac_srs->srs_mcip;
- struct ether_vlan_header *evhp;
- if (mac_srs->srs_type & SRST_BW_CONTROL)
- bw_ctl = B_TRUE;
+ is_ether = (mcip->mci_mip->mi_info.mi_nativemedia == DL_ETHER);
+ bw_ctl = ((mac_srs->srs_type & SRST_BW_CONTROL) != 0);
/*
* If we don't have a Rx ring, S/W classification would have done
@@ -942,8 +935,7 @@ mac_rx_srs_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
* processing in the Rx path. SRST_DLS_BYPASS will be clear for
* such SRSs.
*/
- if (!(mac_srs->srs_type & SRST_DLS_BYPASS))
- dls_bypass = B_FALSE;
+ dls_bypass = ((mac_srs->srs_type & SRST_DLS_BYPASS) != 0);
/*
* Since the softrings are never destroyed and we always
@@ -972,77 +964,66 @@ mac_rx_srs_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
mp->b_next = NULL;
type = OTH;
- sz1 = msgdsize(mp);
+ sz1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp);
- if (!dls_bypass) {
- mac_impl_t *mip = mcip->mci_mip;
+ if (is_ether) {
+ /*
+ * At this point we can be sure the packet at least
+ * has an ether header.
+ */
+ if (sz1 < sizeof (struct ether_header)) {
+ mac_rx_drop_pkt(mac_srs, mp);
+ continue;
+ }
+ ehp = (struct ether_header *)mp->b_rptr;
- indx = 0;
- if (mip->mi_info.mi_nativemedia == DL_ETHER) {
- ehp = (struct ether_header *)mp->b_rptr;
- etype = ntohs(ehp->ether_type);
+ /*
+ * Determine if this is a VLAN or non-VLAN packet.
+ */
+ if ((sap = ntohs(ehp->ether_type)) == VLAN_TPID) {
+ evhp = (struct ether_vlan_header *)mp->b_rptr;
+ sap = ntohs(evhp->ether_type);
+ hdrsize = sizeof (struct ether_vlan_header);
/*
- * For VLAN packets, if the VLAN id doesn't
- * belong to this client, we drop the packet.
+ * Check if the VID of the packet, if any,
+ * belongs to this client.
*/
- if (etype == VLAN_TPID) {
- /*
- * LINTED: cast may result in improper
- * alignment
- */
- evhp = (struct ether_vlan_header *)
- mp->b_rptr;
- if (!mac_client_check_flow_vid(mcip,
- VLAN_ID(ntohs(evhp->ether_tci)))) {
- mac_rx_drop_pkt(mac_srs, mp);
- continue;
- }
- }
- if (mac_rx_srs_long_fanout(mac_srs, mp, etype,
- &type, &indx) == -1) {
+ if (!mac_client_check_flow_vid(mcip,
+ VLAN_ID(ntohs(evhp->ether_tci)))) {
mac_rx_drop_pkt(mac_srs, mp);
continue;
}
+ } else {
+ hdrsize = sizeof (struct ether_header);
}
+ is_unicast =
+ ((((uint8_t *)&ehp->ether_dhost)[0] & 0x01) == 0);
+ dstaddr = (uint8_t *)&ehp->ether_dhost;
+ } else {
+ mac_header_info_t mhi;
- FANOUT_ENQUEUE_MP(headmp[type][indx],
- tailmp[type][indx], cnt[type][indx], bw_ctl,
- sz[type][indx], sz1, mp);
- continue;
- }
-
- /*
- * At this point we can be sure the packet at least
- * has an ether header. On the outbound side, GLD/stack
- * ensure this. On the inbound side, the driver needs
- * to ensure this.
- */
- if (sz1 < sizeof (struct ether_header)) {
- mac_rx_drop_pkt(mac_srs, mp);
- continue;
+ if (mac_header_info((mac_handle_t)mcip->mci_mip,
+ mp, &mhi) != 0) {
+ mac_rx_drop_pkt(mac_srs, mp);
+ continue;
+ }
+ hdrsize = mhi.mhi_hdrsize;
+ sap = mhi.mhi_bindsap;
+ is_unicast = (mhi.mhi_dsttype == MAC_ADDRTYPE_UNICAST);
+ dstaddr = (uint8_t *)mhi.mhi_daddr;
}
- /* LINTED: cast may result in improper alignment */
- ehp = (struct ether_header *)mp->b_rptr;
- /*
- * Determine if this is a VLAN or non-VLAN packet.
- */
- if ((etype = ntohs(ehp->ether_type)) == VLAN_TPID) {
- /* LINTED: cast may result in improper alignment */
- evhp = (struct ether_vlan_header *)mp->b_rptr;
- etype = ntohs(evhp->ether_type);
- ether_hlen = sizeof (struct ether_vlan_header);
- /*
- * Check if the VID of the packet, if any, belongs
- * to this client.
- */
- if (!mac_client_check_flow_vid(mcip,
- VLAN_ID(ntohs(evhp->ether_tci)))) {
+ if (!dls_bypass) {
+ if (mac_rx_srs_long_fanout(mac_srs, mp, sap,
+ hdrsize, &type, &indx) == -1) {
mac_rx_drop_pkt(mac_srs, mp);
continue;
}
- } else {
- ether_hlen = sizeof (struct ether_header);
+
+ FANOUT_ENQUEUE_MP(headmp[type][indx],
+ tailmp[type][indx], cnt[type][indx], bw_ctl,
+ sz[type][indx], sz1, mp);
+ continue;
}
@@ -1051,7 +1032,7 @@ mac_rx_srs_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
* classification has not happened, we need to verify if
* this unicast packet really belongs to us.
*/
- if (etype == ETHERTYPE_IP) {
+ if (sap == ETHERTYPE_IP) {
/*
* If we are H/W classified, but we have promisc
* on, then we need to check for the unicast address.
@@ -1061,12 +1042,11 @@ mac_rx_srs_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
rw_enter(&mcip->mci_rw_lock, RW_READER);
map = mcip->mci_unicast;
- if (bcmp(&ehp->ether_dhost, map->ma_addr,
+ if (bcmp(dstaddr, map->ma_addr,
map->ma_len) == 0)
type = UNDEF;
rw_exit(&mcip->mci_rw_lock);
- } else if (((((uint8_t *)&ehp->ether_dhost)[0] &
- 0x01) == 0)) {
+ } else if (is_unicast) {
type = UNDEF;
}
}
@@ -1076,14 +1056,15 @@ mac_rx_srs_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
* the fast path.
*/
- /* LINTED: cast may result in improper alignment */
- ipha = (ipha_t *)(mp->b_rptr + ether_hlen);
+ ipha = (ipha_t *)(mp->b_rptr + hdrsize);
if ((type != OTH) && MBLK_RX_FANOUT_SLOWPATH(mp, ipha)) {
type = OTH;
fanout_oth1++;
}
if (type != OTH) {
+ uint16_t frag_offset_flags;
+
switch (ipha->ipha_protocol) {
case IPPROTO_TCP:
case IPPROTO_UDP:
@@ -1103,7 +1084,7 @@ mac_rx_srs_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
fanout_oth3++;
break;
}
- ports_offset = ether_hlen + ipha_len;
+ ports_offset = hdrsize + ipha_len;
break;
default:
type = OTH;
@@ -1113,8 +1094,8 @@ mac_rx_srs_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
}
if (type == OTH) {
- if (mac_rx_srs_long_fanout(mac_srs, mp, etype,
- &type, &indx) == -1) {
+ if (mac_rx_srs_long_fanout(mac_srs, mp, sap,
+ hdrsize, &type, &indx) == -1) {
mac_rx_drop_pkt(mac_srs, mp);
continue;
}
@@ -1146,7 +1127,7 @@ mac_rx_srs_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
*(uint32_t *)(mp->b_rptr + ports_offset));
indx = COMPUTE_INDEX(hash, mac_srs->srs_tcp_ring_count);
type = V4_TCP;
- mp->b_rptr += ether_hlen;
+ mp->b_rptr += hdrsize;
break;
case IPPROTO_UDP:
case IPPROTO_SCTP:
@@ -1162,19 +1143,24 @@ mac_rx_srs_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
mac_srs->srs_ind++;
}
type = V4_UDP;
- mp->b_rptr += ether_hlen;
+ mp->b_rptr += hdrsize;
break;
+ default:
+ indx = 0;
+ type = OTH;
}
- ASSERT(type != UNDEF);
-
FANOUT_ENQUEUE_MP(headmp[type][indx], tailmp[type][indx],
cnt[type][indx], bw_ctl, sz[type][indx], sz1, mp);
}
for (type = V4_TCP; type < UNDEF; type++) {
+ int i;
+
for (i = 0; i < fanout_cnt; i++) {
if (headmp[type][i] != NULL) {
+ mac_soft_ring_t *softring;
+
ASSERT(tailmp[type][i]->b_next == NULL);
switch (type) {
case V4_TCP:
@@ -1190,7 +1176,7 @@ mac_rx_srs_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
mac_srs->srs_oth_soft_rings[i];
break;
}
- mac_rx_soft_ring_process(mac_srs->srs_mcip,
+ mac_rx_soft_ring_process(mcip,
softring, headmp[type][i], tailmp[type][i],
cnt[type][i], sz[type][i]);
}
@@ -1373,46 +1359,39 @@ check_again:
(mac_srs->srs_first != NULL)) {
/*
* We have packets to process and worker thread
- * is not running. Check to see if poll thread is
- * allowed to process. Let it do processing only if it
- * picked up some packets from the NIC otherwise
- * wakeup the worker thread.
+ * is not running. Check to see if poll thread is
+ * allowed to process.
*/
- if ((mac_srs->srs_state & SRS_LATENCY_OPT) &&
- (head != NULL)) {
+ if (mac_srs->srs_state & SRS_LATENCY_OPT) {
mac_srs->srs_drain_func(mac_srs, SRS_POLL_PROC);
if (srs_rx->sr_poll_pkt_cnt <=
srs_rx->sr_lowat) {
srs_rx->sr_poll_again++;
goto check_again;
- } else {
- /*
- * We are already above low water mark
- * so stay in the polling mode but no
- * need to poll. Once we dip below
- * the polling threshold, the processing
- * thread (soft ring) will signal us
- * to poll again (MAC_UPDATE_SRS_COUNT)
- */
- srs_rx->sr_poll_drain_no_poll++;
- mac_srs->srs_state &=
- ~(SRS_PROC|SRS_GET_PKTS);
- /*
- * In B/W control case, its possible
- * that the backlog built up due to
- * B/W limit being reached and packets
- * are queued only in SRS. In this case,
- * we should schedule worker thread
- * since no one else will wake us up.
- */
- if ((mac_srs->srs_type &
- SRST_BW_CONTROL) &&
- (mac_srs->srs_tid == NULL)) {
- mac_srs->srs_tid =
- timeout(mac_srs_fire,
- mac_srs, 1);
- srs_rx->sr_poll_worker_wakeup++;
- }
+ }
+ /*
+ * We are already above low water mark
+ * so stay in the polling mode but no
+ * need to poll. Once we dip below
+ * the polling threshold, the processing
+ * thread (soft ring) will signal us
+ * to poll again (MAC_UPDATE_SRS_COUNT)
+ */
+ srs_rx->sr_poll_drain_no_poll++;
+ mac_srs->srs_state &= ~(SRS_PROC|SRS_GET_PKTS);
+ /*
+ * In B/W control case, its possible
+ * that the backlog built up due to
+ * B/W limit being reached and packets
+ * are queued only in SRS. In this case,
+ * we should schedule worker thread
+ * since no one else will wake us up.
+ */
+ if ((mac_srs->srs_type & SRST_BW_CONTROL) &&
+ (mac_srs->srs_tid == NULL)) {
+ mac_srs->srs_tid =
+ timeout(mac_srs_fire, mac_srs, 1);
+ srs_rx->sr_poll_worker_wakeup++;
}
} else {
/*
@@ -1598,7 +1577,7 @@ mac_rx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type)
ASSERT(MUTEX_HELD(&mac_srs->srs_lock));
ASSERT(!(mac_srs->srs_type & SRST_BW_CONTROL));
-again:
+
/* If we are blanked i.e. can't do upcalls, then we are done */
if (mac_srs->srs_state & (SRS_BLANK | SRS_PAUSE)) {
ASSERT((mac_srs->srs_type & SRST_NO_SOFT_RINGS) ||
@@ -1609,6 +1588,26 @@ again:
if (mac_srs->srs_first == NULL)
goto out;
+ if (!(mac_srs->srs_state & SRS_LATENCY_OPT) &&
+ (srs_rx->sr_poll_pkt_cnt <= srs_rx->sr_lowat)) {
+ /*
+ * In the normal case, the SRS worker thread does no
+ * work and we wait for a backlog to build up before
+ * we switch into polling mode. In case we are
+ * optimizing for throughput, we use the worker thread
+ * as well. The goal is to let worker thread process
+ * the queue and poll thread to feed packets into
+ * the queue. As such, we should signal the poll
+ * thread to try and get more packets.
+ *
+ * We could have pulled this check in the POLL_RING
+ * macro itself but keeping it explicit here makes
+ * the architecture more human understandable.
+ */
+ MAC_SRS_POLL_RING(mac_srs);
+ }
+
+again:
head = mac_srs->srs_first;
mac_srs->srs_first = NULL;
tail = mac_srs->srs_last;
@@ -1624,10 +1623,7 @@ again:
mac_srs->srs_state |= (SRS_PROC|proc_type);
- /* Switch to polling mode */
- MAC_SRS_WORKER_POLLING_ON(mac_srs);
- if (srs_rx->sr_poll_pkt_cnt <= srs_rx->sr_lowat)
- MAC_SRS_POLL_RING(mac_srs);
+
/*
* mcip is NULL for broadcast and multicast flows. The promisc
* callbacks for broadcast and multicast packets are delivered from
@@ -1696,37 +1692,27 @@ again:
mutex_enter(&mac_srs->srs_lock);
}
- /*
- * Send the poll thread to pick up any packets arrived
- * so far. This also serves as the last check in case
- * nothing else is queued in the SRS. The poll thread
- * is signalled only in the case the drain was done
- * by the worker thread and SRS_WORKER is set. The
- * worker thread can run in parallel as long as the
- * SRS_WORKER flag is set. We we have nothing else to
- * process, we can exit while leaving SRS_PROC set
- * which gives the poll thread control to process and
- * cleanup once it returns from the NIC.
- *
- * If we have nothing else to process, we need to
- * ensure that we keep holding the srs_lock till
- * all the checks below are done and control is
- * handed to the poll thread if it was running.
- */
- if (mac_srs->srs_first != NULL) {
- if (proc_type == SRS_WORKER) {
- if (srs_rx->sr_poll_pkt_cnt <= srs_rx->sr_lowat)
- MAC_SRS_POLL_RING(mac_srs);
+ if (!(mac_srs->srs_state & (SRS_LATENCY_OPT|SRS_BLANK|SRS_PAUSE))) {
+ /*
+ * In case we are optimizing for throughput, we
+ * should try and keep the worker thread running
+ * as much as possible. Send the poll thread down
+ * to check one more time if something else
+ * arrived. In the meanwhile, if poll thread had
+ * collected something due to earlier signal,
+ * process it now.
+ */
+ if (srs_rx->sr_poll_pkt_cnt <= srs_rx->sr_lowat) {
+ srs_rx->sr_drain_poll_sig++;
+ MAC_SRS_POLL_RING(mac_srs);
+ }
+ if (mac_srs->srs_first != NULL) {
srs_rx->sr_drain_again++;
goto again;
- } else {
- srs_rx->sr_drain_worker_sig++;
- cv_signal(&mac_srs->srs_async);
}
}
out:
-
if (mac_srs->srs_state & SRS_GET_PKTS) {
/*
* Poll thread is already running. Leave the
@@ -1885,12 +1871,6 @@ again:
mutex_exit(&mac_srs->srs_bw->mac_bw_lock);
}
- /*
- * We can continue processing the queue.
- * We need to figure out if there is a fanout needed or
- * we can just process this here.
- */
-
if ((tid = mac_srs->srs_tid) != 0)
mac_srs->srs_tid = 0;
@@ -2405,8 +2385,7 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain,
* optimizing for latency, we should signal the
* worker thread.
*/
- if (loopback || ((count > 1) &&
- !(mac_srs->srs_state & SRS_LATENCY_OPT))) {
+ if (loopback || !(mac_srs->srs_state & SRS_LATENCY_OPT)) {
/*
* For loopback, We need to let the worker take
* over as we don't want to continue in the same
@@ -2502,6 +2481,12 @@ mac_tx_srs_enqueue(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain,
mblk_t *tail;
boolean_t wakeup_worker = B_TRUE;
+ /*
+ * Ignore fanout hint if we don't have multiple tx rings.
+ */
+ if (!TX_MULTI_RING_MODE(mac_srs))
+ fanout_hint = 0;
+
if (mac_srs->srs_first != NULL)
wakeup_worker = B_FALSE;
MAC_COUNT_CHAIN(mac_srs, mp_chain, tail, cnt, sz);
@@ -2753,18 +2738,89 @@ mac_tx_serializer_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain,
* the soft ring associated with that Tx ring. The srs itself will not
* queue any packets.
*/
+
+#define MAC_TX_SOFT_RING_PROCESS(chain) { \
+ index = COMPUTE_INDEX(hash, mac_srs->srs_oth_ring_count), \
+ softring = mac_srs->srs_oth_soft_rings[index]; \
+ cookie = mac_tx_soft_ring_process(softring, chain, flag, ret_mp); \
+ DTRACE_PROBE2(tx__fanout, uint64_t, hash, uint_t, index); \
+}
+
static mac_tx_cookie_t
mac_tx_fanout_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain,
uintptr_t fanout_hint, uint16_t flag, mblk_t **ret_mp)
{
mac_soft_ring_t *softring;
- uint_t indx, hash;
+ uint64_t hash;
+ uint_t index;
+ mac_tx_cookie_t cookie = NULL;
ASSERT(mac_srs->srs_tx.st_mode == SRS_TX_FANOUT);
- hash = HASH_HINT(fanout_hint);
- indx = COMPUTE_INDEX(hash, mac_srs->srs_oth_ring_count);
- softring = mac_srs->srs_oth_soft_rings[indx];
- return (mac_tx_soft_ring_process(softring, mp_chain, flag, ret_mp));
+ if (fanout_hint != 0) {
+ /*
+ * The hint is specified by the caller, simply pass the
+ * whole chain to the soft ring.
+ */
+ hash = HASH_HINT(fanout_hint);
+ MAC_TX_SOFT_RING_PROCESS(mp_chain);
+ } else {
+ mblk_t *last_mp, *cur_mp, *sub_chain;
+ uint64_t last_hash = 0;
+ uint_t media = mac_srs->srs_mcip->mci_mip->mi_info.mi_media;
+
+ /*
+ * Compute the hash from the contents (headers) of the
+ * packets of the mblk chain. Split the chains into
+ * subchains of the same conversation.
+ *
+ * Since there may be more than one ring used for
+ * sub-chains of the same call, and since the caller
+ * does not maintain per conversation state since it
+ * passed a zero hint, unsent subchains will be
+ * dropped.
+ */
+
+ flag |= MAC_DROP_ON_NO_DESC;
+ ret_mp = NULL;
+
+ ASSERT(ret_mp == NULL);
+
+ sub_chain = NULL;
+ last_mp = NULL;
+
+ for (cur_mp = mp_chain; cur_mp != NULL;
+ cur_mp = cur_mp->b_next) {
+ hash = mac_pkt_hash(media, cur_mp, MAC_PKT_HASH_L4,
+ B_TRUE);
+ if (last_hash != 0 && hash != last_hash) {
+ /*
+ * Starting a different subchain, send current
+ * chain out.
+ */
+ ASSERT(last_mp != NULL);
+ last_mp->b_next = NULL;
+ MAC_TX_SOFT_RING_PROCESS(sub_chain);
+ sub_chain = NULL;
+ }
+
+ /* add packet to subchain */
+ if (sub_chain == NULL)
+ sub_chain = cur_mp;
+ last_mp = cur_mp;
+ last_hash = hash;
+ }
+
+ if (sub_chain != NULL) {
+ /* send last subchain */
+ ASSERT(last_mp != NULL);
+ last_mp->b_next = NULL;
+ MAC_TX_SOFT_RING_PROCESS(sub_chain);
+ }
+
+ cookie = NULL;
+ }
+
+ return (cookie);
}
/*
@@ -2788,8 +2844,17 @@ mac_tx_bw_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain,
ASSERT(mac_srs->srs_type & SRST_BW_CONTROL);
mutex_enter(&mac_srs->srs_lock);
if (mac_srs->srs_bw->mac_bw_limit == 0) {
- /* zero bandwidth: drop all */
- MAC_TX_SRS_DROP_MESSAGE(mac_srs, mp_chain, cookie);
+ /*
+ * zero bandwidth, no traffic is sent: drop the packets,
+ * or return the whole chain if the caller requests all
+ * unsent packets back.
+ */
+ if (flag & MAC_TX_NO_ENQUEUE) {
+ cookie = (mac_tx_cookie_t)mac_srs;
+ *ret_mp = mp_chain;
+ } else {
+ MAC_TX_SRS_DROP_MESSAGE(mac_srs, mp_chain, cookie);
+ }
mutex_exit(&mac_srs->srs_lock);
return (cookie);
} else if ((mac_srs->srs_first != NULL) ||
@@ -3223,9 +3288,6 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain,
DTRACE_PROBE3(slowpath, mac_client_impl_t *,
src_mcip, int, mip->mi_nclients, mblk_t *, mp_chain);
- if (mip->mi_promisc_list != NULL)
- mac_promisc_dispatch(mip, mp_chain, src_mcip);
-
mp = mp_chain;
while (mp != NULL) {
flow_entry_t *dst_flow_ent;
@@ -3241,6 +3303,12 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain,
CHECK_VID_AND_ADD_TAG(mp);
/*
+ * Check if there are promiscuous mode callbacks defined.
+ */
+ if (mip->mi_promisc_list != NULL)
+ mac_promisc_dispatch(mip, mp, src_mcip);
+
+ /*
* Find the destination.
*/
dst_flow_ent = mac_tx_classify(mip, mp);
@@ -3516,9 +3584,8 @@ mac_rx_soft_ring_process(mac_client_impl_t *mcip, mac_soft_ring_t *ringp,
mutex_enter(&ringp->s_ring_lock);
ringp->s_ring_total_inpkt += cnt;
- if ((ringp->s_ring_type & ST_RING_ANY) ||
- ((mac_srs->srs_rx.sr_poll_pkt_cnt <= 1) &&
- !mac_srs->srs_rx.sr_enqueue_always)) {
+ if ((mac_srs->srs_rx.sr_poll_pkt_cnt <= 1) &&
+ !(ringp->s_ring_type & ST_RING_WORKER_ONLY)) {
/* If on processor or blanking on, then enqueue and return */
if (ringp->s_ring_state & S_RING_BLANK ||
ringp->s_ring_state & S_RING_PROC) {
@@ -3526,7 +3593,6 @@ mac_rx_soft_ring_process(mac_client_impl_t *mcip, mac_soft_ring_t *ringp,
mutex_exit(&ringp->s_ring_lock);
return;
}
-
proc = ringp->s_ring_rx_func;
arg1 = ringp->s_ring_rx_arg1;
arg2 = ringp->s_ring_rx_arg2;