diff options
author | Robert Mustacchi <rm@joyent.com> | 2016-04-01 17:49:57 +0000 |
---|---|---|
committer | Robert Mustacchi <rm@joyent.com> | 2016-04-15 21:03:59 +0000 |
commit | 56046f267b29072fb6ea4da60520a4ed1a09c8aa (patch) | |
tree | d032b4aa2d0a7c0f23f8692072b3a66db39f6a2b | |
parent | bc1b3e9d58811ef50db915fa1ad8c136754c277a (diff) | |
download | illumos-joyent-56046f267b29072fb6ea4da60520a4ed1a09c8aa.tar.gz |
OS-5236 Fortville should enable ITR logic
OS-5316 Fortville should leverage RX DMA binding
OS-5317 i40e ring interrupt blanking needs to work
OS-5318 Don't grab the tcb lock while holding the tx ring lock
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
-rw-r--r-- | usr/src/man/man7d/i40e.7d | 27 | ||||
-rw-r--r-- | usr/src/uts/common/io/i40e/i40e_gld.c | 189 | ||||
-rw-r--r-- | usr/src/uts/common/io/i40e/i40e_intr.c | 160 | ||||
-rw-r--r-- | usr/src/uts/common/io/i40e/i40e_main.c | 57 | ||||
-rw-r--r-- | usr/src/uts/common/io/i40e/i40e_stats.c | 13 | ||||
-rw-r--r-- | usr/src/uts/common/io/i40e/i40e_sw.h | 80 | ||||
-rw-r--r-- | usr/src/uts/common/io/i40e/i40e_transceiver.c | 140 |
7 files changed, 618 insertions, 48 deletions
diff --git a/usr/src/man/man7d/i40e.7d b/usr/src/man/man7d/i40e.7d index 0a8019ac9a..082f8b0eeb 100644 --- a/usr/src/man/man7d/i40e.7d +++ b/usr/src/man/man7d/i40e.7d @@ -101,6 +101,11 @@ consumer such as .Xr snoop 1M or an LLDP daemon is started. .Pp +Some properties may be tuned at runtime with the +.Xr dladm 1M +utility. Properties that can be will have the name of the dladm property +called out explicitly. +.Pp These properties are not considered stable at this time. They may change and should not be relied on. They are considered .Sy Volatile . @@ -112,7 +117,9 @@ these values. Minimum: .Sy 1500 | Maximum: -.Sy 9710 +.Sy 9710 | +Runtime Property: +.Sy mtu .Ed .Bd -filled The @@ -232,6 +239,24 @@ for this. Turning it off will increase latency and decrease throughput when receiving packets, but should be done if a hardware bug is suspected. .Ed +.It Sy rx_dma_threshold +.Bd -filled -compact +Minimum: +.Sy 0 | +Maximum: +.Sy INT32_MAX | +Runtime Property: +.Sy _rx_dma_treshold +.Ed +.Bd -filled +The +.Sy rx_dma_treshold +indicates the size in bytes of a received frame, including all of its +headers, at which the driver should not copy the frame but instead bind +DMA memory. By setting this property to its minimum, all frames will be +processed with DMA binding. By setting this property to its maximum, all +frames will be processed by copying the frame. +.Ed .El .Sh ARCHITECTURE The diff --git a/usr/src/uts/common/io/i40e/i40e_gld.c b/usr/src/uts/common/io/i40e/i40e_gld.c index 21a9fff3e1..6fec1fd634 100644 --- a/usr/src/uts/common/io/i40e/i40e_gld.c +++ b/usr/src/uts/common/io/i40e/i40e_gld.c @@ -20,6 +20,21 @@ #include "i40e_sw.h" +#define I40E_PROP_RX_DMA_THRESH "_rx_dma_threshold" +#define I40E_PROP_TX_DMA_THRESH "_tx_dma_threshold" +#define I40E_PROP_RX_ITR "_rx_intr_throttle" +#define I40E_PROP_TX_ITR "_tx_intr_throttle" +#define I40E_PROP_OTHER_ITR "_other_intr_throttle" + +char *i40e_priv_props[] = { + I40E_PROP_RX_DMA_THRESH, + I40E_PROP_TX_DMA_THRESH, + I40E_PROP_RX_ITR, + I40E_PROP_TX_ITR, + I40E_PROP_OTHER_ITR, + NULL +}; + static int i40e_group_remove_mac(void *arg, const uint8_t *mac_addr) { @@ -431,24 +446,35 @@ i40e_ring_start(mac_ring_driver_t rh, uint64_t gen_num) return (0); } -/* - * Because we only support a single ring at this time, we don't support toggling - * interrupts and polling. When we do, we should simply toggle the interrupt - * cause enable bit for this and potentially ignore it when looking at the - * interrupt vector mapping. - */ /* ARGSUSED */ static int i40e_rx_ring_intr_enable(mac_intr_handle_t intrh) { - return (EINVAL); + i40e_trqpair_t *itrq = (i40e_trqpair_t *)intrh; + i40e_t *i40e = itrq->itrq_i40e; + + mutex_enter(&i40e->i40e_general_lock); + ASSERT(i40e->i40e_intr_poll == B_TRUE); + i40e_intr_rx_queue_enable(i40e, itrq->itrq_index); + i40e->i40e_intr_poll = B_FALSE; + mutex_exit(&i40e->i40e_general_lock); + + return (0); } /* ARGSUSED */ static int i40e_rx_ring_intr_disable(mac_intr_handle_t intrh) { - return (EINVAL); + i40e_trqpair_t *itrq = (i40e_trqpair_t *)intrh; + i40e_t *i40e = itrq->itrq_i40e; + + mutex_enter(&i40e->i40e_general_lock); + i40e_intr_rx_queue_disable(i40e, itrq->itrq_index); + i40e->i40e_intr_poll = B_TRUE; + mutex_exit(&i40e->i40e_general_lock); + + return (0); } static void @@ -599,6 +625,145 @@ i40e_m_getcapab(void *arg, mac_capab_t cap, void *cap_data) } static int +i40e_m_setprop_private(i40e_t *i40e, const char *pr_name, uint_t pr_valsize, + const void *pr_val) +{ + int ret; + long val; + char *eptr; + + ASSERT(MUTEX_HELD(&i40e->i40e_general_lock)); + + if ((ret = ddi_strtol(pr_val, &eptr, 10, &val)) != 0 || + *eptr != '\0') { + return (ret); + } + + if (strcmp(pr_name, I40E_PROP_RX_DMA_THRESH) == 0) { + if (val < I40E_MIN_RX_DMA_THRESH || + val > I40E_MAX_RX_DMA_THRESH) { + return (EINVAL); + } + i40e->i40e_rx_dma_min = (uint32_t)val; + return (0); + } + + if (strcmp(pr_name, I40E_PROP_TX_DMA_THRESH) == 0) { + if (val < I40E_MIN_TX_DMA_THRESH || + val > I40E_MAX_TX_DMA_THRESH) { + return (EINVAL); + } + i40e->i40e_tx_dma_min = (uint32_t)val; + return (0); + } + + if (strcmp(pr_name, I40E_PROP_RX_ITR) == 0) { + if (val < I40E_MIN_ITR || + val > I40E_MAX_ITR) { + return (EINVAL); + } + i40e->i40e_rx_itr = (uint32_t)val; + i40e_intr_set_itr(i40e, I40E_ITR_INDEX_RX, i40e->i40e_rx_itr); + return (0); + } + + if (strcmp(pr_name, I40E_PROP_TX_ITR) == 0) { + if (val < I40E_MIN_ITR || + val > I40E_MAX_ITR) { + return (EINVAL); + } + i40e->i40e_tx_itr = (uint32_t)val; + i40e_intr_set_itr(i40e, I40E_ITR_INDEX_TX, i40e->i40e_tx_itr); + return (0); + } + + if (strcmp(pr_name, I40E_PROP_OTHER_ITR) == 0) { + if (val < I40E_MIN_ITR || + val > I40E_MAX_ITR) { + return (EINVAL); + } + i40e->i40e_tx_itr = (uint32_t)val; + i40e_intr_set_itr(i40e, I40E_ITR_INDEX_OTHER, + i40e->i40e_other_itr); + return (0); + } + + return (ENOTSUP); +} + +static int +i40e_m_getprop_private(i40e_t *i40e, const char *pr_name, uint_t pr_valsize, + void *pr_val) +{ + uint32_t val; + + ASSERT(MUTEX_HELD(&i40e->i40e_general_lock)); + + if (strcmp(pr_name, I40E_PROP_RX_DMA_THRESH) == 0) { + val = i40e->i40e_rx_dma_min; + } else if (strcmp(pr_name, I40E_PROP_TX_DMA_THRESH) == 0) { + val = i40e->i40e_tx_dma_min; + } else if (strcmp(pr_name, I40E_PROP_RX_ITR) == 0) { + val = i40e->i40e_rx_itr; + } else if (strcmp(pr_name, I40E_PROP_TX_ITR) == 0) { + val = i40e->i40e_tx_itr; + } else if (strcmp(pr_name, I40E_PROP_OTHER_ITR) == 0) { + val = i40e->i40e_other_itr; + } else { + return (ENOTSUP); + } + + if (snprintf(pr_val, pr_valsize, "%d", val) >= pr_valsize) + return (ERANGE); + return (0); +} + +/* + * Annoyingly for private properties MAC seems to ignore default values that + * aren't strings. That means that we have to translate all of these into + * uint32_t's and instead we size the buffer to be large enough to hold a + * uint32_t. + */ +static void +i40e_m_propinfo_private(i40e_t *i40e, const char *pr_name, + mac_prop_info_handle_t prh) +{ + char buf[64]; + uint32_t def; + + if (strcmp(pr_name, I40E_PROP_RX_DMA_THRESH) == 0) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW); + def = I40E_DEF_RX_DMA_THRESH; + mac_prop_info_set_range_uint32(prh, + I40E_MIN_RX_DMA_THRESH, + I40E_MAX_RX_DMA_THRESH); + } else if (strcmp(pr_name, I40E_PROP_TX_DMA_THRESH) == 0) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW); + def = I40E_DEF_TX_DMA_THRESH; + mac_prop_info_set_range_uint32(prh, + I40E_MIN_TX_DMA_THRESH, + I40E_MAX_TX_DMA_THRESH); + } else if (strcmp(pr_name, I40E_PROP_RX_ITR) == 0) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW); + def = I40E_DEF_RX_ITR; + mac_prop_info_set_range_uint32(prh, I40E_MIN_ITR, I40E_MAX_ITR); + } else if (strcmp(pr_name, I40E_PROP_TX_ITR) == 0) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW); + def = I40E_DEF_TX_ITR; + mac_prop_info_set_range_uint32(prh, I40E_MIN_ITR, I40E_MAX_ITR); + } else if (strcmp(pr_name, I40E_PROP_OTHER_ITR) == 0) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW); + def = I40E_DEF_OTHER_ITR; + mac_prop_info_set_range_uint32(prh, I40E_MIN_ITR, I40E_MAX_ITR); + } else { + return; + } + + (void) snprintf(buf, sizeof (buf), "%d", def); + mac_prop_info_set_default_str(prh, buf); +} + +static int i40e_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, uint_t pr_valsize, const void *pr_val) { @@ -662,6 +827,8 @@ i40e_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, break; case MAC_PROP_PRIVATE: + ret = i40e_m_setprop_private(i40e, pr_name, pr_valsize, pr_val); + break; default: ret = ENOTSUP; break; @@ -775,6 +942,8 @@ i40e_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, *u8 = (i40e->i40e_phy.link_speed & I40E_LINK_SPEED_40GB) != 0; break; case MAC_PROP_PRIVATE: + ret = i40e_m_getprop_private(i40e, pr_name, pr_valsize, pr_val); + break; default: ret = ENOTSUP; break; @@ -862,6 +1031,8 @@ i40e_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, (i40e->i40e_phy.link_speed & I40E_LINK_SPEED_40GB) != 0); break; case MAC_PROP_PRIVATE: + i40e_m_propinfo_private(i40e, pr_name, prh); + break; default: break; } @@ -909,7 +1080,7 @@ i40e_register_mac(i40e_t *i40e) mac->m_min_sdu = 0; mac->m_max_sdu = i40e->i40e_sdu; mac->m_margin = VLAN_TAGSZ; - mac->m_priv_props = NULL; + mac->m_priv_props = i40e_priv_props; mac->m_v12n = MAC_VIRT_LEVEL1; status = mac_register(mac, &i40e->i40e_mac_hdl); diff --git a/usr/src/uts/common/io/i40e/i40e_intr.c b/usr/src/uts/common/io/i40e/i40e_intr.c index 1d9c3d78b1..9ff64cb74b 100644 --- a/usr/src/uts/common/io/i40e/i40e_intr.c +++ b/usr/src/uts/common/io/i40e/i40e_intr.c @@ -119,6 +119,56 @@ * Finally, we still have to set up the interrupt linked list, but the list is * instead rooted at the register I40E_PFINT_LNKLST0, rather than being tied to * one of the other MSI-X registers. + * + * -------------------- + * Interrupt Moderation + * -------------------- + * + * The XL710 hardware has three different interrupt moderation registers per + * interrupt. Unsurprisingly, we use these for: + * + * o RX interrupts + * o TX interrupts + * o 'Other interrupts' (link status change, admin queue, etc.) + * + * By default, we throttle 'other interrupts' the most, then TX interrupts, and + * then RX interrupts. The default values for these were based on trying to + * reason about both the importance and frequency of events. Generally speaking + * 'other interrupts' are not very frequent and they're not important for the + * I/O data path in and of itself (though they may indicate issues with the I/O + * data path). + * + * On the flip side, when we're not polling, RX interrupts are very important. + * The longer we wait for them, the more latency that we inject into the system. + * However, if we allow interrupts to occur too frequently, we risk a few + * problems: + * + * 1) Abusing system resources. Without proper interrupt blanking and polling, + * we can see upwards of 200k-300k interrupts per second on the system. + * + * 2) Not enough data coalescing to enable polling. In other words, the more + * data that we allow to build up, the more likely we'll be able to enable + * polling mode and allowing us to better handle bulk data. + * + * In-between the 'other interrupts' and the TX interrupts we have the + * reclamation of TX buffers. This operation is not quite as important as we + * generally size the ring large enough that we should be able to reclaim a + * substantial amount of the descriptors that we have used per interrupt. So + * while it's important that this interrupt occur, we don't necessarily need it + * firing as frequently as RX; it doesn't, on its own, induce additional latency + * into the system. + * + * Based on all this we currently assign static ITR values for the system. While + * we could move to a dynamic system (the hardware supports that), we'd want to + * make sure that we're seeing problems from this that we believe would be + * generally helped by the added complexity. + * + * Based on this, the default values that we have allow for the following + * interrupt thresholds: + * + * o 20k interrupts/s for RX + * o 5k interrupts/s for TX + * o 2k interupts/s for 'Other Interrupts' */ #include "i40e_sw.h" @@ -130,6 +180,30 @@ #define I40E_INTR_NOTX_TX_QUEUE 1 #define I40E_INTR_NOTX_TX_MASK (1 << I40E_PFINT_ICR0_QUEUE_1_SHIFT) +void +i40e_intr_set_itr(i40e_t *i40e, i40e_itr_index_t itr, uint_t val) +{ + int i; + i40e_hw_t *hw = &i40e->i40e_hw_space; + + VERIFY3U(val, <=, I40E_MAX_ITR); + VERIFY3U(itr, <, I40E_ITR_INDEX_NONE); + + /* + * No matter the interrupt mode, the ITR for other interrupts is always + * on interrupt zero and the same is true if we're not using MSI-X. + */ + if (itr == I40E_ITR_INDEX_OTHER || + i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) { + I40E_WRITE_REG(hw, I40E_PFINT_ITR0(itr), val); + return; + } + + for (i = 1; i < i40e->i40e_intr_count; i++) { + I40E_WRITE_REG(hw, I40E_PFINT_ITRN(itr, i - 1), val); + } +} + /* * Re-enable the adminq. Note that the adminq doesn't have a traditional queue * associated with it from an interrupt perspective and just lives on ICR0. @@ -144,7 +218,7 @@ i40e_intr_adminq_enable(i40e_t *i40e) reg = I40E_PFINT_DYN_CTL0_INTENA_MASK | I40E_PFINT_DYN_CTL0_CLEARPBA_MASK | - (I40E_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT); + (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT); I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg); i40e_flush(hw); } @@ -155,7 +229,7 @@ i40e_intr_adminq_disable(i40e_t *i40e) i40e_hw_t *hw = &i40e->i40e_hw_space; uint32_t reg; - reg = I40E_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT; + reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT; I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg); } @@ -167,7 +241,7 @@ i40e_intr_io_enable(i40e_t *i40e, int vector) reg = I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | - (I40E_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT); + (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT); I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg); } @@ -177,7 +251,7 @@ i40e_intr_io_disable(i40e_t *i40e, int vector) uint32_t reg; i40e_hw_t *hw = &i40e->i40e_hw_space; - reg = I40E_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT; + reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT; I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg); } @@ -326,7 +400,7 @@ i40e_intr_init_queue_msix(i40e_t *i40e) I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(0), reg); reg = (1 << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | - (I40E_ITR_NONE << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | + (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | (0 << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) | I40E_QINT_RQCTL_CAUSE_ENA_MASK; @@ -334,7 +408,7 @@ i40e_intr_init_queue_msix(i40e_t *i40e) I40E_WRITE_REG(hw, I40E_QINT_RQCTL(0), reg); reg = (1 << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | - (I40E_ITR_NONE << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | + (I40E_ITR_INDEX_TX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | (I40E_QUEUE_TYPE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_RX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) | I40E_QINT_TQCTL_CAUSE_ENA_MASK; @@ -363,7 +437,7 @@ i40e_intr_init_queue_shared(i40e_t *i40e) I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, reg); reg = (I40E_INTR_NOTX_INTR << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | - (I40E_ITR_NONE << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | + (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | (I40E_INTR_NOTX_RX_QUEUE << I40E_QINT_RQCTL_MSIX0_INDX_SHIFT) | (I40E_INTR_NOTX_QUEUE << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT); @@ -371,7 +445,7 @@ i40e_intr_init_queue_shared(i40e_t *i40e) I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg); reg = (I40E_INTR_NOTX_INTR << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | - (I40E_ITR_NONE << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | + (I40E_ITR_INDEX_TX << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | (I40E_INTR_NOTX_TX_QUEUE << I40E_QINT_TQCTL_MSIX0_INDX_SHIFT) | (I40E_QUEUE_TYPE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); @@ -380,6 +454,47 @@ i40e_intr_init_queue_shared(i40e_t *i40e) } /* + * Enable the specified queue as a valid source of interrupts. Note, this should + * only be used as part of the GLDv3's interrupt blanking routines. The debug + * build assertions are specific to that. + */ +void +i40e_intr_rx_queue_enable(i40e_t *i40e, uint_t queue) +{ + uint32_t reg; + i40e_hw_t *hw = &i40e->i40e_hw_space; + + ASSERT(MUTEX_HELD(&i40e->i40e_general_lock)); + ASSERT(queue < i40e->i40e_num_trqpairs); + + reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(queue)); + ASSERT0(reg & I40E_QINT_RQCTL_CAUSE_ENA_MASK); + reg |= I40E_QINT_RQCTL_CAUSE_ENA_MASK; + I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg); +} + +/* + * Disable the specified queue as a valid source of interrupts. Note, this + * should only be used as part of the GLDv3's interrupt blanking routines. The + * debug build assertions are specific to that. + */ +void +i40e_intr_rx_queue_disable(i40e_t *i40e, uint_t queue) +{ + uint32_t reg; + i40e_hw_t *hw = &i40e->i40e_hw_space; + + ASSERT(MUTEX_HELD(&i40e->i40e_general_lock)); + ASSERT(queue < i40e->i40e_num_trqpairs); + + reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(queue)); + ASSERT3U(reg & I40E_QINT_RQCTL_CAUSE_ENA_MASK, ==, + I40E_QINT_RQCTL_CAUSE_ENA_MASK); + reg &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK; + I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg); +} + +/* * Start up the various chip's interrupt handling. We not only configure the * adminq here, but we also go through and configure all of the actual queues, * the interrupt linked lists, and others. @@ -395,11 +510,18 @@ i40e_intr_chip_init(i40e_t *i40e) */ i40e_intr_io_disable_all(i40e); - /* First, the adminq. */ I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, 0); I40E_READ_REG(hw, I40E_PFINT_ICR0); /* + * Always enable all of the other-class interrupts to be on their own + * ITR. This only needs to be set on interrupt zero, which has its own + * special setting. + */ + reg = I40E_ITR_INDEX_OTHER << I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT; + I40E_WRITE_REG(hw, I40E_PFINT_STAT_CTL0, reg); + + /* * Enable interrupt types we expect to receive. At the moment, this * is limited to the adminq; however, we'll want to review 11.2.2.9.22 * for more types here as we add support for detecting them, handling @@ -425,8 +547,15 @@ i40e_intr_chip_init(i40e_t *i40e) } else { i40e_intr_init_queue_shared(i40e); } -} + /* + * Finally set all of the default ITRs for the interrupts. Note that the + * queues will have been set up above. + */ + i40e_intr_set_itr(i40e, I40E_ITR_INDEX_RX, i40e->i40e_rx_itr); + i40e_intr_set_itr(i40e, I40E_ITR_INDEX_TX, i40e->i40e_tx_itr); + i40e_intr_set_itr(i40e, I40E_ITR_INDEX_OTHER, i40e->i40e_other_itr); +} static void i40e_intr_adminq_work(i40e_t *i40e) @@ -548,7 +677,16 @@ i40e_intr_msix(void *arg1, void *arg2) VERIFY(vector_idx == 1); - i40e_intr_rx_work(i40e, 0); + /* + * Note that we explicitly do not check this value under the lock even + * though assignments to it are done so. In this case, the cost of + * getting this wrong is at worst a bit of additional contention and + * even more rarely, a duplicated packet. However, the cost on the other + * hand is a lot more. This is something that as we more generally + * implement ring support we should revisit. + */ + if (i40e->i40e_intr_poll != B_TRUE) + i40e_intr_rx_work(i40e, 0); i40e_intr_tx_work(i40e, 0); i40e_intr_io_enable(i40e, 1); diff --git a/usr/src/uts/common/io/i40e/i40e_main.c b/usr/src/uts/common/io/i40e/i40e_main.c index 69303a0370..83b3af7e26 100644 --- a/usr/src/uts/common/io/i40e/i40e_main.c +++ b/usr/src/uts/common/io/i40e/i40e_main.c @@ -1097,11 +1097,14 @@ i40e_free_trqpairs(i40e_t *i40e) i40e->i40e_trqpairs = NULL; } + cv_destroy(&i40e->i40e_rx_pending_cv); + mutex_destroy(&i40e->i40e_rx_pending_lock); mutex_destroy(&i40e->i40e_general_lock); } /* - * Allocate receive & transmit rings. + * Allocate transmit and receive rings, as well as other data structures that we + * need. */ static boolean_t i40e_alloc_trqpairs(i40e_t *i40e) @@ -1114,6 +1117,8 @@ i40e_alloc_trqpairs(i40e_t *i40e) * all relevant locks. */ mutex_init(&i40e->i40e_general_lock, NULL, MUTEX_DRIVER, mutexpri); + mutex_init(&i40e->i40e_rx_pending_lock, NULL, MUTEX_DRIVER, mutexpri); + cv_init(&i40e->i40e_rx_pending_cv, NULL, CV_DRIVER, NULL); i40e->i40e_trqpairs = kmem_zalloc(sizeof (i40e_trqpair_t) * i40e->i40e_num_trqpairs, KM_SLEEP); @@ -1527,6 +1532,23 @@ i40e_init_properties(i40e_t *i40e) i40e->i40e_rx_hcksum_enable = i40e_get_prop(i40e, "rx_hcksum_enable", B_FALSE, B_TRUE, B_TRUE); + i40e->i40e_rx_dma_min = i40e_get_prop(i40e, "rx_dma_threshold", + I40E_MIN_RX_DMA_THRESH, I40E_MAX_RX_DMA_THRESH, + I40E_DEF_RX_DMA_THRESH); + + i40e->i40e_tx_dma_min = i40e_get_prop(i40e, "tx_dma_threshold", + I40E_MIN_TX_DMA_THRESH, I40E_MAX_TX_DMA_THRESH, + I40E_DEF_TX_DMA_THRESH); + + i40e->i40e_tx_itr = i40e_get_prop(i40e, "tx_intr_throttle", + I40E_MIN_ITR, I40E_MAX_ITR, I40E_DEF_TX_ITR); + + i40e->i40e_rx_itr = i40e_get_prop(i40e, "rx_intr_throttle", + I40E_MIN_ITR, I40E_MAX_ITR, I40E_DEF_RX_ITR); + + i40e->i40e_other_itr = i40e_get_prop(i40e, "other_intr_throttle", + I40E_MIN_ITR, I40E_MAX_ITR, I40E_DEF_OTHER_ITR); + if (!i40e->i40e_mr_enable) { i40e->i40e_num_trqpairs = I40E_TRQPAIR_NOMSIX; i40e->i40e_num_rx_groups = I40E_GROUP_NOMSIX; @@ -2567,6 +2589,27 @@ done: return (rc); } +/* + * We may have loaned up descriptors to the stack. As such, if we still have + * them outstanding, then we will not continue with detach. + */ +static boolean_t +i40e_drain_rx(i40e_t *i40e) +{ + mutex_enter(&i40e->i40e_rx_pending_lock); + while (i40e->i40e_rx_pending > 0) { + if (cv_reltimedwait(&i40e->i40e_rx_pending_cv, + &i40e->i40e_rx_pending_lock, + drv_usectohz(I40E_DRAIN_RX_WAIT), TR_CLOCK_TICK) == -1) { + mutex_exit(&i40e->i40e_rx_pending_lock); + return (B_FALSE); + } + } + mutex_exit(&i40e->i40e_rx_pending_lock); + + return (B_TRUE); +} + static int i40e_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) { @@ -2712,11 +2755,12 @@ i40e_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd) return (DDI_FAILURE); } - /* - * When we add support for DMA binding, we'll need to make sure that we - * take care of draining any outstanding packets that are still up in - * the kernel. - */ + if (i40e_drain_rx(i40e) == B_FALSE) { + i40e_log(i40e, "timed out draining DMA resources, %d buffers " + "remain", i40e->i40e_rx_pending); + return (DDI_FAILURE); + } + mutex_enter(&i40e_glock); list_remove(&i40e_glist, i40e); mutex_exit(&i40e_glock); @@ -2726,7 +2770,6 @@ i40e_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd) return (DDI_SUCCESS); } - static struct cb_ops i40e_cb_ops = { nulldev, /* cb_open */ nulldev, /* cb_close */ diff --git a/usr/src/uts/common/io/i40e/i40e_stats.c b/usr/src/uts/common/io/i40e/i40e_stats.c index 90b85a1453..c7dd403fc8 100644 --- a/usr/src/uts/common/io/i40e/i40e_stats.c +++ b/usr/src/uts/common/io/i40e/i40e_stats.c @@ -1212,6 +1212,13 @@ i40e_stats_trqpair_init(i40e_trqpair_t *itrq) kstat_named_init(&tsp->itxs_packets, "tx_packets", KSTAT_DATA_UINT64); tsp->itxs_packets.value.ui64 = 0; + kstat_named_init(&tsp->itxs_descriptors, "tx_descriptors", + KSTAT_DATA_UINT64); + tsp->itxs_descriptors.value.ui64 = 0; + kstat_named_init(&tsp->itxs_recycled, "tx_recycled", + KSTAT_DATA_UINT64); + tsp->itxs_recycled.value.ui64 = 0; + kstat_named_init(&tsp->itxs_hck_meoifail, "tx_hck_meoifail", KSTAT_DATA_UINT64); tsp->itxs_hck_meoifail.value.ui64 = 0; @@ -1256,6 +1263,12 @@ i40e_stats_trqpair_init(i40e_trqpair_t *itrq) kstat_named_init(&rsp->irxs_rx_intr_limit, "rx_intr_limit", KSTAT_DATA_UINT64); rsp->irxs_rx_intr_limit.value.ui64 = 0; + kstat_named_init(&rsp->irxs_rx_bind_norcb, "rx_bind_norcb", + KSTAT_DATA_UINT64); + rsp->irxs_rx_bind_norcb.value.ui64 = 0; + kstat_named_init(&rsp->irxs_rx_bind_nomp, "rx_bind_nomp", + KSTAT_DATA_UINT64); + rsp->irxs_rx_bind_nomp.value.ui64 = 0; kstat_named_init(&rsp->irxs_rx_copy_nomem, "rx_copy_nomem", KSTAT_DATA_UINT64); rsp->irxs_rx_copy_nomem.value.ui64 = 0; diff --git a/usr/src/uts/common/io/i40e/i40e_sw.h b/usr/src/uts/common/io/i40e/i40e_sw.h index 26e851176b..077599b237 100644 --- a/usr/src/uts/common/io/i40e/i40e_sw.h +++ b/usr/src/uts/common/io/i40e/i40e_sw.h @@ -68,6 +68,7 @@ extern "C" { #include <sys/fm/io/ddi.h> #include <sys/list.h> #include <sys/debug.h> +#include <sys/sdt.h> #include "i40e_type.h" #include "i40e_osdep.h" #include "i40e_prototype.h" @@ -124,16 +125,39 @@ extern "C" { #define I40E_DEF_MTU ETHERMTU /* - * Table 1-5 of the PRM notes that LSO supports up to 256 KB. + * Interrupt throttling related values. Interrupt throttling values are defined + * in two microsecond increments. Note that a value of zero basically says do no + * ITR activity. A helpful way to think about these is that setting the ITR to a + * value will allow a certain number of interrupts per second. + * + * Our default values for RX allow 20k interrupts per second while our default + * values for TX allow for 5k interrupts per second. For other class interrupts, + * we limit ourselves to a rate of 2k/s. */ -#define I40E_LSO_MAXLEN (256 * 1024) +#define I40E_MIN_ITR 0x0000 +#define I40E_MAX_ITR 0x0FF0 +#define I40E_DEF_RX_ITR 0x0019 +#define I40E_DEF_TX_ITR 0x0064 +#define I40E_DEF_OTHER_ITR 0x00FA + +/* + * Indexes into the three ITR registers that we have. + */ +typedef enum i40e_itr_index { + I40E_ITR_INDEX_RX = 0x0, + I40E_ITR_INDEX_TX = 0x1, + I40E_ITR_INDEX_OTHER = 0x2, + I40E_ITR_INDEX_NONE = 0x3 +} i40e_itr_index_t; -#define I40E_CYCLIC_PERIOD NANOSEC /* 1 second */ /* - * Interrupt rates and ITR logic. + * Table 1-5 of the PRM notes that LSO supports up to 256 KB. */ -#define I40E_ITR_NONE 0x3 +#define I40E_LSO_MAXLEN (256 * 1024) + +#define I40E_CYCLIC_PERIOD NANOSEC /* 1 second */ +#define I40E_DRAIN_RX_WAIT (500 * MILLISEC) /* In us */ /* * All the other queue types for are defined by the common code. However, this @@ -165,6 +189,20 @@ extern "C" { #define I40E_DEF_TX_BLOCK_THRESH I40E_MIN_TX_BLOCK_THRESH /* + * Sizing for DMA thresholds. These are used to indicate whether or not we + * should perform a bcopy or a DMA binding of a given message block. The range + * allows for setting things such that we'll always do a bcopy (a high value) or + * always perform a DMA binding (a low value). + */ +#define I40E_MIN_RX_DMA_THRESH 0 +#define I40E_DEF_RX_DMA_THRESH 256 +#define I40E_MAX_RX_DMA_THRESH INT32_MAX + +#define I40E_MIN_TX_DMA_THRESH 0 +#define I40E_DEF_TX_DMA_THRESH 256 +#define I40E_MAX_TX_DMA_THRESH INT32_MAX + +/* * Resource sizing counts. There are various aspects of hardware where we may * have some variable number of elements that we need to handle. Such as the * hardware capabilities and switch capacities. We cannot know a priori how many @@ -350,6 +388,7 @@ typedef struct i40e_tx_desc i40e_tx_desc_t; typedef union i40e_32byte_rx_desc i40e_rx_desc_t; typedef struct i40e_tx_control_block { + struct i40e_tx_control_block *tcb_next; mblk_t *tcb_mp; i40e_tx_type_t tcb_type; ddi_dma_handle_t tcb_dma_handle; @@ -372,11 +411,10 @@ typedef struct i40e_rx_data { /* * RX control block list definitions */ + kmutex_t rxd_free_lock; /* Lock to protect free data */ i40e_rx_control_block_t *rxd_rcb_area; /* Array of control blocks */ i40e_rx_control_block_t **rxd_work_list; /* Work list of rcbs */ i40e_rx_control_block_t **rxd_free_list; /* Free list of rcbs */ - uint32_t rxd_rcb_head; /* Index of next free rcb */ - uint32_t rxd_rcb_tail; /* Index to put recycled rcb */ uint32_t rxd_rcb_free; /* Number of free rcbs */ /* @@ -427,6 +465,8 @@ typedef struct i40e_rxq_stat { kstat_named_t irxs_rx_desc_error; /* Error bit set on desc */ kstat_named_t irxs_rx_copy_nomem; /* allocb failure for copy */ kstat_named_t irxs_rx_intr_limit; /* Hit i40e_rx_limit_per_intr */ + kstat_named_t irxs_rx_bind_norcb; /* No replacement rcb free */ + kstat_named_t irxs_rx_bind_nomp; /* No mblk_t in bind rcb */ /* * The following set of statistics covers rx checksum related activity. @@ -449,9 +489,10 @@ typedef struct i40e_rxq_stat { * Collection of TX Statistics on a given queue */ typedef struct i40e_txq_stat { - kstat_named_t itxs_bytes; /* Bytes out on queue */ - kstat_named_t itxs_packets; /* Packets out on queue */ - + kstat_named_t itxs_bytes; /* Bytes out on queue */ + kstat_named_t itxs_packets; /* Packets out on queue */ + kstat_named_t itxs_descriptors; /* Descriptors issued */ + kstat_named_t itxs_recycled; /* Descriptors reclaimed */ /* * Various failure conditions. */ @@ -748,21 +789,33 @@ typedef struct i40e { i40e_trqpair_t *i40e_trqpairs; boolean_t i40e_mr_enable; int i40e_num_trqpairs; + uint_t i40e_other_itr; + int i40e_num_rx_groups; int i40e_num_rx_descs; - int i40e_num_tx_descs; mac_group_handle_t i40e_rx_group_handle; uint32_t i40e_rx_ring_size; uint32_t i40e_rx_buf_size; boolean_t i40e_rx_hcksum_enable; + uint32_t i40e_rx_dma_min; uint32_t i40e_rx_limit_per_intr; + uint_t i40e_rx_itr; + + int i40e_num_tx_descs; uint32_t i40e_tx_ring_size; uint32_t i40e_tx_buf_size; uint32_t i40e_tx_block_thresh; boolean_t i40e_tx_hcksum_enable; + uint32_t i40e_tx_dma_min; + uint_t i40e_tx_itr; /* * Interrupt state + * + * Note that the use of a single boolean_t for i40e_intr_poll isn't + * really the best design. When we have more than a single ring on the + * device working, we'll transition to using something more + * sophisticated. */ uint_t i40e_intr_pri; uint_t i40e_intr_force; @@ -774,6 +827,7 @@ typedef struct i40e { size_t i40e_intr_size; ddi_intr_handle_t *i40e_intr_handles; ddi_cb_handle_t i40e_callback_handle; + boolean_t i40e_intr_poll; /* * DMA attributes. See i40e_buf.c for why we have copies of them in the @@ -790,6 +844,7 @@ typedef struct i40e { * detach as we have active DMA memory outstanding. */ kmutex_t i40e_rx_pending_lock; + kcondvar_t i40e_rx_pending_cv; uint32_t i40e_rx_pending; /* @@ -867,6 +922,9 @@ extern uint_t i40e_intr_legacy(void *, void *); extern void i40e_intr_io_enable_all(i40e_t *); extern void i40e_intr_io_disable_all(i40e_t *); extern void i40e_intr_io_clear_cause(i40e_t *); +extern void i40e_intr_rx_queue_disable(i40e_t *, uint_t); +extern void i40e_intr_rx_queue_enable(i40e_t *, uint_t); +extern void i40e_intr_set_itr(i40e_t *, i40e_itr_index_t, uint_t); /* * Receive-side functions diff --git a/usr/src/uts/common/io/i40e/i40e_transceiver.c b/usr/src/uts/common/io/i40e/i40e_transceiver.c index 3c05a7cec3..49739554de 100644 --- a/usr/src/uts/common/io/i40e/i40e_transceiver.c +++ b/usr/src/uts/common/io/i40e/i40e_transceiver.c @@ -661,8 +661,6 @@ i40e_alloc_rx_data(i40e_t *i40e, i40e_trqpair_t *itrq) rxd->rxd_ring_size = i40e->i40e_rx_ring_size; rxd->rxd_free_list_size = i40e->i40e_rx_ring_size; - rxd->rxd_rcb_head = 0; - rxd->rxd_rcb_tail = 0; rxd->rxd_rcb_free = rxd->rxd_free_list_size; rxd->rxd_work_list = kmem_zalloc(sizeof (i40e_rx_control_block_t *) * @@ -1069,6 +1067,36 @@ i40e_init_dma_attrs(i40e_t *i40e, boolean_t fma) } } +static void +i40e_rcb_free(i40e_rx_data_t *rxd, i40e_rx_control_block_t *rcb) +{ + mutex_enter(&rxd->rxd_free_lock); + ASSERT(rxd->rxd_rcb_free < rxd->rxd_free_list_size); + ASSERT(rxd->rxd_free_list[rxd->rxd_rcb_free] == NULL); + rxd->rxd_free_list[rxd->rxd_rcb_free] = rcb; + rxd->rxd_rcb_free++; + mutex_exit(&rxd->rxd_free_lock); +} + +static i40e_rx_control_block_t * +i40e_rcb_alloc(i40e_rx_data_t *rxd) +{ + i40e_rx_control_block_t *rcb; + + mutex_enter(&rxd->rxd_free_lock); + if (rxd->rxd_rcb_free == 0) { + mutex_exit(&rxd->rxd_free_lock); + return (NULL); + } + rxd->rxd_rcb_free--; + rcb = rxd->rxd_free_list[rxd->rxd_rcb_free]; + VERIFY(rcb != NULL); + rxd->rxd_free_list[rxd->rxd_rcb_free] = NULL; + mutex_exit(&rxd->rxd_free_lock); + + return (rcb); +} + /* * This is the callback that we get from the OS when freemsg(9F) has been called * on a loaned descriptor. In addition, if we take the last reference count @@ -1087,11 +1115,27 @@ i40e_rx_recycle(caddr_t arg) i40e = rxd->rxd_i40e; /* - * At the moment this only exists for tearing down, because we don't - * support rx DMA binding. When we do, this will need to also put things - * back onto the free list. + * It's possible for this to be called with a reference count of zero. + * That will happen when we're doing the freemsg after taking the last + * reference because we're tearing down everything and this rcb is not + * outstanding. + */ + if (rcb->rcb_ref == 0) + return; + + /* + * Don't worry about failure of desballoc here. It'll only become fatal + * if we're trying to use it and we can't in i40e_rx_bind(). */ + rcb->rcb_mp = desballoc((unsigned char *)rcb->rcb_dma.dmab_address, + rcb->rcb_dma.dmab_size, 0, &rcb->rcb_free_rtn); + i40e_rcb_free(rxd, rcb); + /* + * It's possible that the rcb was being used while we are shutting down + * the device. In that case, we'll take the final reference from the + * device here. + */ ref = atomic_dec_32_nv(&rcb->rcb_ref); if (ref == 0) { freemsg(rcb->rcb_mp); @@ -1106,13 +1150,68 @@ i40e_rx_recycle(caddr_t arg) * If this was the last block and it's been indicated that we've * passed the shutdown point, we should clean up. */ - if (rxd->rxd_shutdown == B_TRUE && rxd->rxd_rcb_pending == 0) + if (rxd->rxd_shutdown == B_TRUE && rxd->rxd_rcb_pending == 0) { i40e_free_rx_data(rxd); + cv_broadcast(&i40e->i40e_rx_pending_cv); + } mutex_exit(&i40e->i40e_rx_pending_lock); } } +static mblk_t * +i40e_rx_bind(i40e_trqpair_t *itrq, i40e_rx_data_t *rxd, uint32_t index, + uint32_t plen) +{ + mblk_t *mp; + i40e_t *i40e = rxd->rxd_i40e; + i40e_rx_control_block_t *rcb, *rep_rcb; + + ASSERT(MUTEX_HELD(&itrq->itrq_rx_lock)); + + if ((rep_rcb = i40e_rcb_alloc(rxd)) == NULL) { + itrq->itrq_rxstat.irxs_rx_bind_norcb.value.ui64++; + return (NULL); + } + + rcb = rxd->rxd_work_list[index]; + + /* + * Check to make sure we have a mblk_t. If we don't, this is our last + * chance to try and get one. + */ + if (rcb->rcb_mp == NULL) { + rcb->rcb_mp = + desballoc((unsigned char *)rcb->rcb_dma.dmab_address, + rcb->rcb_dma.dmab_size, 0, &rcb->rcb_free_rtn); + if (rcb->rcb_mp == NULL) { + itrq->itrq_rxstat.irxs_rx_bind_nomp.value.ui64++; + i40e_rcb_free(rxd, rcb); + return (NULL); + } + } + + I40E_DMA_SYNC(&rcb->rcb_dma, DDI_DMA_SYNC_FORKERNEL); + + if (i40e_check_dma_handle(rcb->rcb_dma.dmab_dma_handle) != DDI_FM_OK) { + ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED); + atomic_or_32(&i40e->i40e_state, I40E_ERROR); + i40e_rcb_free(rxd, rcb); + return (NULL); + } + + /* + * Note, we've already accounted for the I40E_BUF_IPHDR_ALIGNMENT. + */ + mp = rcb->rcb_mp; + atomic_inc_32(&rcb->rcb_ref); + mp->b_wptr = mp->b_rptr + plen; + mp->b_next = mp->b_cont = NULL; + + rxd->rxd_work_list[index] = rep_rcb; + return (mp); +} + /* * We're going to allocate a new message block for this frame and attempt to * receive it. See the big theory statement for more information on when we copy @@ -1372,7 +1471,12 @@ i40e_ring_rx(i40e_trqpair_t *itrq, int poll_bytes) break; rx_bytes += plen; - mp = i40e_rx_copy(itrq, rxd, cur_head, plen); + mp = NULL; + if (plen >= i40e->i40e_rx_dma_min) + mp = i40e_rx_bind(itrq, rxd, cur_head, plen); + if (mp == NULL) + mp = i40e_rx_copy(itrq, rxd, cur_head, plen); + if (mp != NULL) { if (i40e->i40e_rx_hcksum_enable) i40e_rx_hcksum(itrq, mp, stword, error, ptype); @@ -1829,6 +1933,7 @@ i40e_tcb_reset(i40e_tx_control_block_t *tcb) tcb->tcb_type = I40E_TX_NONE; freemsg(tcb->tcb_mp); tcb->tcb_mp = NULL; + tcb->tcb_next = NULL; } /* @@ -1884,6 +1989,7 @@ void i40e_tx_recycle_ring(i40e_trqpair_t *itrq) { uint32_t wbhead, toclean, count; + i40e_tx_control_block_t *tcbhead; i40e_t *i40e = itrq->itrq_i40e; mutex_enter(&itrq->itrq_tx_lock); @@ -1920,6 +2026,7 @@ i40e_tx_recycle_ring(i40e_trqpair_t *itrq) wbhead = *itrq->itrq_desc_wbhead; toclean = itrq->itrq_desc_head; count = 0; + tcbhead = NULL; while (toclean != wbhead) { i40e_tx_control_block_t *tcb; @@ -1927,8 +2034,8 @@ i40e_tx_recycle_ring(i40e_trqpair_t *itrq) tcb = itrq->itrq_tcb_work_list[toclean]; itrq->itrq_tcb_work_list[toclean] = NULL; ASSERT(tcb != NULL); - i40e_tcb_reset(tcb); - i40e_tcb_free(itrq, tcb); + tcb->tcb_next = tcbhead; + tcbhead = tcb; /* * We zero this out for sanity purposes. @@ -1940,6 +2047,7 @@ i40e_tx_recycle_ring(i40e_trqpair_t *itrq) itrq->itrq_desc_head = wbhead; itrq->itrq_desc_free += count; + itrq->itrq_txstat.itxs_recycled.value.ui64 += count; ASSERT(itrq->itrq_desc_free <= itrq->itrq_tx_ring_size); if (itrq->itrq_tx_blocked == B_TRUE && @@ -1951,6 +2059,19 @@ i40e_tx_recycle_ring(i40e_trqpair_t *itrq) } mutex_exit(&itrq->itrq_tx_lock); + + /* + * Now clean up the tcb. + */ + while (tcbhead != NULL) { + i40e_tx_control_block_t *tcb = tcbhead; + + tcbhead = tcb->tcb_next; + i40e_tcb_reset(tcb); + i40e_tcb_free(itrq, tcb); + } + + DTRACE_PROBE2(i40e__recycle, i40e_trqpair_t *, itrq, uint32_t, count); } /* @@ -2116,6 +2237,7 @@ i40e_ring_tx(void *arg, mblk_t *mp) txs->itxs_bytes.value.ui64 += mpsize; txs->itxs_packets.value.ui64++; + txs->itxs_descriptors.value.ui64++; mutex_exit(&itrq->itrq_tx_lock); |