summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2020-07-23 11:36:51 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2020-07-23 11:36:51 +0000
commit45253056e75fcd7e65f200019d4523246dd488a9 (patch)
treebb116974adf13b75913f8baccbf19bd376ddab9b
parent11314710586b6f738a39dd0bf242b53b11c8c189 (diff)
parentaa2a44afcbfb9d08096ea5af01f0bb30d4b7f9a6 (diff)
downloadillumos-joyent-45253056e75fcd7e65f200019d4523246dd488a9.tar.gz
[illumos-gate merge]
commit aa2a44afcbfb9d08096ea5af01f0bb30d4b7f9a6 12957 Some ipadm and dladm commands are slow on i40e 12958 i40e allocates large amounts of DMA 12972 Remove reference to deprecated ddi_power from i40e commit 6937e379563aa5fe8a003acdcd316b89044dd8dd 12952 ms: this statement may fall through
-rw-r--r--usr/src/uts/common/io/i40e/i40e_gld.c24
-rw-r--r--usr/src/uts/common/io/i40e/i40e_intr.c65
-rw-r--r--usr/src/uts/common/io/i40e/i40e_main.c586
-rw-r--r--usr/src/uts/common/io/i40e/i40e_sw.h25
-rw-r--r--usr/src/uts/common/io/i40e/i40e_transceiver.c172
-rw-r--r--usr/src/uts/common/mapfiles/ddi.mapfile3
-rw-r--r--usr/src/uts/sun/io/ms.c6
7 files changed, 520 insertions, 361 deletions
diff --git a/usr/src/uts/common/io/i40e/i40e_gld.c b/usr/src/uts/common/io/i40e/i40e_gld.c
index e988793675..ca5b15cd4a 100644
--- a/usr/src/uts/common/io/i40e/i40e_gld.c
+++ b/usr/src/uts/common/io/i40e/i40e_gld.c
@@ -14,6 +14,7 @@
* Copyright (c) 2018, Joyent, Inc.
* Copyright 2017 Tegile Systems, Inc. All rights reserved.
* Copyright 2020 Ryan Zezeski
+ * Copyright 2020 RackTop Systems, Inc.
*/
/*
@@ -178,7 +179,7 @@ i40e_m_start(void *arg)
goto done;
}
- if (!i40e_start(i40e, B_TRUE)) {
+ if (!i40e_start(i40e)) {
rc = EIO;
goto done;
}
@@ -201,7 +202,7 @@ i40e_m_stop(void *arg)
goto done;
atomic_and_32(&i40e->i40e_state, ~I40E_STARTED);
- i40e_stop(i40e, B_TRUE);
+ i40e_stop(i40e);
done:
mutex_exit(&i40e->i40e_general_lock);
}
@@ -435,6 +436,10 @@ static int
i40e_ring_start(mac_ring_driver_t rh, uint64_t gen_num)
{
i40e_trqpair_t *itrq = (i40e_trqpair_t *)rh;
+ int rv;
+
+ if ((rv = i40e_setup_ring(itrq)) != 0)
+ return (rv);
/*
* GLDv3 requires we keep track of a generation number, as it uses
@@ -446,6 +451,19 @@ i40e_ring_start(mac_ring_driver_t rh, uint64_t gen_num)
return (0);
}
+static void
+i40e_ring_stop(mac_ring_driver_t rh)
+{
+ i40e_trqpair_t *itrq = (i40e_trqpair_t *)rh;
+
+ if (!i40e_shutdown_ring(itrq)) {
+ i40e_t *i40e = itrq->itrq_i40e;
+
+ ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_LOST);
+ i40e_error(i40e, "Failed to stop ring %u", itrq->itrq_index);
+ }
+}
+
/* ARGSUSED */
static int
i40e_rx_ring_intr_enable(mac_intr_handle_t intrh)
@@ -529,7 +547,7 @@ i40e_fill_rx_ring(void *arg, mac_ring_type_t rtype, const int group_index,
itrq->itrq_macrxring = rh;
infop->mri_driver = (mac_ring_driver_t)itrq;
infop->mri_start = i40e_ring_start;
- infop->mri_stop = NULL;
+ infop->mri_stop = i40e_ring_stop;
infop->mri_poll = i40e_ring_rx_poll;
infop->mri_stat = i40e_rx_ring_stat;
mintr->mi_handle = (mac_intr_handle_t)itrq;
diff --git a/usr/src/uts/common/io/i40e/i40e_intr.c b/usr/src/uts/common/io/i40e/i40e_intr.c
index e3a0d69cc6..c6771d64b6 100644
--- a/usr/src/uts/common/io/i40e/i40e_intr.c
+++ b/usr/src/uts/common/io/i40e/i40e_intr.c
@@ -12,6 +12,7 @@
/*
* Copyright 2019 Joyent, Inc.
* Copyright 2017 Tegile Systems, Inc. All rights reserved.
+ * Copyright 2020 RackTop Systems, Inc.
*/
/*
@@ -723,6 +724,53 @@ i40e_intr_other_work(i40e_t *i40e)
}
/*
+ * The prolog/epilog pair of functions ensure the integrity of the trqpair
+ * across ring stop/start operations.
+ *
+ * A ring stop operation will wait whilst an interrupt is processing a
+ * trqpair, and when a ring is stopped the interrupt handler will skip
+ * the trqpair.
+ */
+static boolean_t
+i40e_intr_trqpair_prolog(i40e_trqpair_t *itrq)
+{
+ boolean_t enabled;
+
+ mutex_enter(&itrq->itrq_intr_lock);
+ enabled = !itrq->itrq_intr_quiesce;
+ if (enabled)
+ itrq->itrq_intr_busy = B_TRUE;
+ mutex_exit(&itrq->itrq_intr_lock);
+
+ return (enabled);
+}
+
+static void
+i40e_intr_trqpair_epilog(i40e_trqpair_t *itrq)
+{
+ mutex_enter(&itrq->itrq_intr_lock);
+ itrq->itrq_intr_busy = B_FALSE;
+ if (itrq->itrq_intr_quiesce)
+ cv_signal(&itrq->itrq_intr_cv);
+ mutex_exit(&itrq->itrq_intr_lock);
+}
+
+/*
+ * Tell any active interrupt vectors the ring is quiescing, then
+ * wait until any active interrupt thread has finished with this
+ * trqpair.
+ */
+void
+i40e_intr_quiesce(i40e_trqpair_t *itrq)
+{
+ mutex_enter(&itrq->itrq_intr_lock);
+ itrq->itrq_intr_quiesce = B_TRUE;
+ while (itrq->itrq_intr_busy)
+ cv_wait(&itrq->itrq_intr_cv, &itrq->itrq_intr_lock);
+ mutex_exit(&itrq->itrq_intr_lock);
+}
+
+/*
* Handle an MSI-X interrupt. See section 7.5.1.3 for an overview of
* the MSI-X interrupt sequence.
*/
@@ -762,8 +810,13 @@ i40e_intr_msix(void *arg1, void *arg2)
ASSERT3U(i, <, i40e->i40e_num_trqpairs);
ASSERT3P(itrq, !=, NULL);
+ if (!i40e_intr_trqpair_prolog(itrq))
+ continue;
+
i40e_intr_rx_work(i40e, itrq);
i40e_intr_tx_work(i40e, itrq);
+
+ i40e_intr_trqpair_epilog(itrq);
}
i40e_intr_io_enable(i40e, vector_idx);
@@ -804,11 +857,15 @@ i40e_intr_notx(i40e_t *i40e, boolean_t shared)
if (reg & I40E_PFINT_ICR0_ADMINQ_MASK)
i40e_intr_adminq_work(i40e);
- if (reg & I40E_INTR_NOTX_RX_MASK)
- i40e_intr_rx_work(i40e, itrq);
+ if (i40e_intr_trqpair_prolog(itrq)) {
+ if (reg & I40E_INTR_NOTX_RX_MASK)
+ i40e_intr_rx_work(i40e, itrq);
- if (reg & I40E_INTR_NOTX_TX_MASK)
- i40e_intr_tx_work(i40e, itrq);
+ if (reg & I40E_INTR_NOTX_TX_MASK)
+ i40e_intr_tx_work(i40e, itrq);
+
+ i40e_intr_trqpair_epilog(itrq);
+ }
done:
i40e_intr_adminq_enable(i40e);
diff --git a/usr/src/uts/common/io/i40e/i40e_main.c b/usr/src/uts/common/io/i40e/i40e_main.c
index 8971866d35..c05e0f5a9e 100644
--- a/usr/src/uts/common/io/i40e/i40e_main.c
+++ b/usr/src/uts/common/io/i40e/i40e_main.c
@@ -330,10 +330,11 @@
*
* 2) i40e_trqpair_t`itrq_rx_lock
* 3) i40e_trqpair_t`itrq_tx_lock
- * 4) i40e_t`i40e_rx_pending_lock
- * 5) i40e_trqpair_t`itrq_tcb_lock
+ * 4) i40e_trqpair_t`itrq_intr_lock
+ * 5) i40e_t`i40e_rx_pending_lock
+ * 6) i40e_trqpair_t`itrq_tcb_lock
*
- * 6) i40e_t`i40e_stat_lock
+ * 7) i40e_t`i40e_stat_lock
*
* Rules and expectations:
*
@@ -351,6 +352,9 @@
* 4) The following pairs of locks are not expected to be held at the same time:
*
* o i40e_t`i40e_rx_pending_lock and i40e_trqpair_t`itrq_tcb_lock
+ * o i40e_trqpair_t`itrq_intr_lock is not expected to be held with any
+ * other lock except i40e_t`i40e_general_lock in mc_start(9E) and
+ * mc_stop(9e).
*
* -----------
* Future Work
@@ -1136,16 +1140,14 @@ i40e_free_trqpairs(i40e_t *i40e)
if (i40e->i40e_trqpairs != NULL) {
for (uint_t i = 0; i < i40e->i40e_num_trqpairs; i++) {
itrq = &i40e->i40e_trqpairs[i];
+ mutex_destroy(&itrq->itrq_intr_lock);
mutex_destroy(&itrq->itrq_rx_lock);
mutex_destroy(&itrq->itrq_tx_lock);
mutex_destroy(&itrq->itrq_tcb_lock);
+ cv_destroy(&itrq->itrq_intr_cv);
+ cv_destroy(&itrq->itrq_tx_cv);
- /*
- * Should have already been cleaned up by start/stop,
- * etc.
- */
- ASSERT(itrq->itrq_txkstat == NULL);
- ASSERT(itrq->itrq_rxkstat == NULL);
+ i40e_stats_trqpair_fini(itrq);
}
kmem_free(i40e->i40e_trqpairs,
@@ -1181,10 +1183,26 @@ i40e_alloc_trqpairs(i40e_t *i40e)
i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[i];
itrq->itrq_i40e = i40e;
+ mutex_init(&itrq->itrq_intr_lock, NULL, MUTEX_DRIVER, mutexpri);
mutex_init(&itrq->itrq_rx_lock, NULL, MUTEX_DRIVER, mutexpri);
mutex_init(&itrq->itrq_tx_lock, NULL, MUTEX_DRIVER, mutexpri);
mutex_init(&itrq->itrq_tcb_lock, NULL, MUTEX_DRIVER, mutexpri);
+ cv_init(&itrq->itrq_intr_cv, NULL, CV_DRIVER, NULL);
+ cv_init(&itrq->itrq_tx_cv, NULL, CV_DRIVER, NULL);
itrq->itrq_index = i;
+ itrq->itrq_intr_quiesce = B_TRUE;
+ itrq->itrq_tx_quiesce = B_TRUE;
+ }
+
+ for (uint_t i = 0; i < i40e->i40e_num_trqpairs; i++) {
+ /*
+ * Keeping this in a separate iteration makes the
+ * clean up path safe.
+ */
+ if (!i40e_stats_trqpair_init(&i40e->i40e_trqpairs[i])) {
+ i40e_free_trqpairs(i40e);
+ return (B_FALSE);
+ }
}
i40e->i40e_rx_groups = kmem_zalloc(sizeof (i40e_rx_group_t) *
@@ -2565,161 +2583,180 @@ i40e_chip_start(i40e_t *i40e)
* Take care of tearing down the rx ring. See 8.3.3.1.2 for more information.
*/
static void
-i40e_shutdown_rx_rings(i40e_t *i40e)
+i40e_shutdown_rx_ring(i40e_trqpair_t *itrq)
{
- int i;
- uint32_t reg;
-
+ i40e_t *i40e = itrq->itrq_i40e;
i40e_hw_t *hw = &i40e->i40e_hw_space;
+ uint32_t reg;
/*
- * Step 1. The interrupt linked list (see i40e_intr.c for more
- * information) should have already been cleared before calling this
- * function.
+ * Step 1. 8.3.3.1.2 suggests the interrupt is removed from the
+ * hardware interrupt linked list (see i40e_intr.c) but for
+ * simplicity we keep this list immutable until the device
+ * (distinct from an individual ring) is stopped.
*/
-#ifdef DEBUG
- if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
- for (i = 1; i < i40e->i40e_intr_count; i++) {
- reg = I40E_READ_REG(hw, I40E_PFINT_LNKLSTN(i - 1));
- VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL);
- }
- } else {
- reg = I40E_READ_REG(hw, I40E_PFINT_LNKLST0);
- VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL);
- }
-#endif /* DEBUG */
-
- for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
- /*
- * Step 1. Request the queue by clearing QENA_REQ. It may not be
- * set due to unwinding from failures and a partially enabled
- * ring set.
- */
- reg = I40E_READ_REG(hw, I40E_QRX_ENA(i));
- if (!(reg & I40E_QRX_ENA_QENA_REQ_MASK))
- continue;
- VERIFY((reg & I40E_QRX_ENA_QENA_REQ_MASK) ==
- I40E_QRX_ENA_QENA_REQ_MASK);
- reg &= ~I40E_QRX_ENA_QENA_REQ_MASK;
- I40E_WRITE_REG(hw, I40E_QRX_ENA(i), reg);
- }
+ /*
+ * Step 2. Request the queue by clearing QENA_REQ. It may not be
+ * set due to unwinding from failures and a partially enabled
+ * ring set.
+ */
+ reg = I40E_READ_REG(hw, I40E_QRX_ENA(itrq->itrq_index));
+ if (!(reg & I40E_QRX_ENA_QENA_REQ_MASK))
+ return;
+ VERIFY((reg & I40E_QRX_ENA_QENA_REQ_MASK) ==
+ I40E_QRX_ENA_QENA_REQ_MASK);
+ reg &= ~I40E_QRX_ENA_QENA_REQ_MASK;
+ I40E_WRITE_REG(hw, I40E_QRX_ENA(itrq->itrq_index), reg);
/*
- * Step 2. Wait for the disable to take, by having QENA_STAT in the FPM
+ * Step 3. Wait for the disable to take, by having QENA_STAT in the FPM
* be cleared. Note that we could still receive data in the queue during
* this time. We don't actually wait for this now and instead defer this
- * to i40e_shutdown_rings_wait(), after we've interleaved disabling the
- * TX queues as well.
+ * to i40e_shutdown_ring_wait(), after we've interleaved disabling the
+ * TX queue as well.
*/
}
static void
-i40e_shutdown_tx_rings(i40e_t *i40e)
+i40e_shutdown_tx_ring(i40e_trqpair_t *itrq)
{
- int i;
- uint32_t reg;
-
+ i40e_t *i40e = itrq->itrq_i40e;
i40e_hw_t *hw = &i40e->i40e_hw_space;
+ uint32_t reg;
/*
- * Step 1. The interrupt linked list should already have been cleared.
+ * Step 2. Set the SET_QDIS flag for the queue.
*/
-#ifdef DEBUG
- if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
- for (i = 1; i < i40e->i40e_intr_count; i++) {
- reg = I40E_READ_REG(hw, I40E_PFINT_LNKLSTN(i - 1));
- VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL);
- }
- } else {
- reg = I40E_READ_REG(hw, I40E_PFINT_LNKLST0);
- VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL);
-
- }
-#endif /* DEBUG */
-
- for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
- /*
- * Step 2. Set the SET_QDIS flag for every queue.
- */
- i40e_pre_tx_queue_cfg(hw, i, B_FALSE);
- }
+ i40e_pre_tx_queue_cfg(hw, itrq->itrq_index, B_FALSE);
/*
- * Step 3. Wait at least 400 usec (can be done once for all queues).
+ * Step 3. Wait at least 400 usec.
*/
drv_usecwait(500);
- for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
- /*
- * Step 4. Clear the QENA_REQ flag which tells hardware to
- * quiesce. If QENA_REQ is not already set then that means that
- * we likely already tried to disable this queue.
- */
- reg = I40E_READ_REG(hw, I40E_QTX_ENA(i));
- if (!(reg & I40E_QTX_ENA_QENA_REQ_MASK))
- continue;
+ /*
+ * Step 4. Clear the QENA_REQ flag which tells hardware to
+ * quiesce. If QENA_REQ is not already set then that means that
+ * we likely already tried to disable this queue.
+ */
+ reg = I40E_READ_REG(hw, I40E_QTX_ENA(itrq->itrq_index));
+ if ((reg & I40E_QTX_ENA_QENA_REQ_MASK) != 0) {
reg &= ~I40E_QTX_ENA_QENA_REQ_MASK;
- I40E_WRITE_REG(hw, I40E_QTX_ENA(i), reg);
+ I40E_WRITE_REG(hw, I40E_QTX_ENA(itrq->itrq_index), reg);
}
/*
- * Step 5. Wait for all drains to finish. This will be done by the
+ * Step 5. Wait for the drain to finish. This will be done by the
* hardware removing the QENA_STAT flag from the queue. Rather than
- * waiting here, we interleave it with all the others in
- * i40e_shutdown_rings_wait().
+ * waiting here, we interleave it with the receive shutdown in
+ * i40e_shutdown_ring_wait().
*/
}
/*
- * Wait for all the rings to be shut down. e.g. Steps 2 and 5 from the above
+ * Wait for a ring to be shut down. e.g. Steps 2 and 5 from the above
* functions.
*/
static boolean_t
-i40e_shutdown_rings_wait(i40e_t *i40e)
+i40e_shutdown_ring_wait(i40e_trqpair_t *itrq)
{
- int i, try;
+ i40e_t *i40e = itrq->itrq_i40e;
i40e_hw_t *hw = &i40e->i40e_hw_space;
+ uint32_t reg;
+ int try;
- for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
- uint32_t reg;
-
- for (try = 0; try < I40E_RING_WAIT_NTRIES; try++) {
- reg = I40E_READ_REG(hw, I40E_QRX_ENA(i));
- if ((reg & I40E_QRX_ENA_QENA_STAT_MASK) == 0)
- break;
- i40e_msec_delay(I40E_RING_WAIT_PAUSE);
- }
+ for (try = 0; try < I40E_RING_WAIT_NTRIES; try++) {
+ reg = I40E_READ_REG(hw, I40E_QRX_ENA(itrq->itrq_index));
+ if ((reg & I40E_QRX_ENA_QENA_STAT_MASK) == 0)
+ break;
+ i40e_msec_delay(I40E_RING_WAIT_PAUSE);
+ }
- if ((reg & I40E_QRX_ENA_QENA_STAT_MASK) != 0) {
- i40e_error(i40e, "timed out disabling rx queue %d",
- i);
- return (B_FALSE);
- }
+ if ((reg & I40E_QRX_ENA_QENA_STAT_MASK) != 0) {
+ i40e_error(i40e, "timed out disabling rx queue %d",
+ itrq->itrq_index);
+ return (B_FALSE);
+ }
- for (try = 0; try < I40E_RING_WAIT_NTRIES; try++) {
- reg = I40E_READ_REG(hw, I40E_QTX_ENA(i));
- if ((reg & I40E_QTX_ENA_QENA_STAT_MASK) == 0)
- break;
- i40e_msec_delay(I40E_RING_WAIT_PAUSE);
- }
+ for (try = 0; try < I40E_RING_WAIT_NTRIES; try++) {
+ reg = I40E_READ_REG(hw, I40E_QTX_ENA(itrq->itrq_index));
+ if ((reg & I40E_QTX_ENA_QENA_STAT_MASK) == 0)
+ break;
+ i40e_msec_delay(I40E_RING_WAIT_PAUSE);
+ }
- if ((reg & I40E_QTX_ENA_QENA_STAT_MASK) != 0) {
- i40e_error(i40e, "timed out disabling tx queue %d",
- i);
- return (B_FALSE);
- }
+ if ((reg & I40E_QTX_ENA_QENA_STAT_MASK) != 0) {
+ i40e_error(i40e, "timed out disabling tx queue %d",
+ itrq->itrq_index);
+ return (B_FALSE);
}
return (B_TRUE);
}
+
+/*
+ * Shutdown an individual ring and release any memory.
+ */
+boolean_t
+i40e_shutdown_ring(i40e_trqpair_t *itrq)
+{
+ boolean_t rv = B_TRUE;
+
+ /*
+ * Tell transmit path to quiesce, and wait until done.
+ */
+ if (i40e_ring_tx_quiesce(itrq)) {
+ /* Already quiesced. */
+ return (B_TRUE);
+ }
+
+ i40e_shutdown_rx_ring(itrq);
+ i40e_shutdown_tx_ring(itrq);
+ if (!i40e_shutdown_ring_wait(itrq))
+ rv = B_FALSE;
+
+ /*
+ * After the ring has stopped, we need to wait 50ms before
+ * programming it again. Rather than wait here, we'll record
+ * the time the ring was stopped. When the ring is started, we'll
+ * check if enough time has expired and then wait if necessary.
+ */
+ itrq->irtq_time_stopped = gethrtime();
+
+ /*
+ * The rings have been stopped in the hardware, now wait for
+ * a possibly active interrupt thread.
+ */
+ i40e_intr_quiesce(itrq);
+
+ mutex_enter(&itrq->itrq_tx_lock);
+ i40e_tx_cleanup_ring(itrq);
+ mutex_exit(&itrq->itrq_tx_lock);
+
+ i40e_free_ring_mem(itrq, B_FALSE);
+
+ return (rv);
+}
+
+/*
+ * Shutdown all the rings.
+ * Called from i40e_stop(), and hopefully the mac layer has already
+ * called ring stop for each ring, which would make this almost a no-op.
+ */
static boolean_t
i40e_shutdown_rings(i40e_t *i40e)
{
- i40e_shutdown_rx_rings(i40e);
- i40e_shutdown_tx_rings(i40e);
- return (i40e_shutdown_rings_wait(i40e));
+ boolean_t rv = B_TRUE;
+ int i;
+
+ for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
+ if (!i40e_shutdown_ring(&i40e->i40e_trqpairs[i]))
+ rv = B_FALSE;
+ }
+
+ return (rv);
}
static void
@@ -2797,74 +2834,64 @@ i40e_setup_rx_hmc(i40e_trqpair_t *itrq)
}
/*
- * Take care of setting up the descriptor rings and actually programming the
+ * Take care of setting up the descriptor ring and actually programming the
* device. See 8.3.3.1.1 for the full list of steps we need to do to enable the
* rx rings.
*/
static boolean_t
-i40e_setup_rx_rings(i40e_t *i40e)
+i40e_setup_rx_ring(i40e_trqpair_t *itrq)
{
- int i;
+ i40e_t *i40e = itrq->itrq_i40e;
i40e_hw_t *hw = &i40e->i40e_hw_space;
+ i40e_rx_data_t *rxd = itrq->itrq_rxdata;
+ uint32_t reg;
+ int i;
- for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
- i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[i];
- i40e_rx_data_t *rxd = itrq->itrq_rxdata;
- uint32_t reg;
-
- /*
- * Step 1. Program all receive ring descriptors.
- */
- i40e_setup_rx_descs(itrq);
-
- /*
- * Step 2. Program the queue's FPM/HMC context.
- */
- if (i40e_setup_rx_hmc(itrq) == B_FALSE)
- return (B_FALSE);
+ /*
+ * Step 1. Program all receive ring descriptors.
+ */
+ i40e_setup_rx_descs(itrq);
- /*
- * Step 3. Clear the queue's tail pointer and set it to the end
- * of the space.
- */
- I40E_WRITE_REG(hw, I40E_QRX_TAIL(i), 0);
- I40E_WRITE_REG(hw, I40E_QRX_TAIL(i), rxd->rxd_ring_size - 1);
+ /*
+ * Step 2. Program the queue's FPM/HMC context.
+ */
+ if (!i40e_setup_rx_hmc(itrq))
+ return (B_FALSE);
- /*
- * Step 4. Enable the queue via the QENA_REQ.
- */
- reg = I40E_READ_REG(hw, I40E_QRX_ENA(i));
- VERIFY0(reg & (I40E_QRX_ENA_QENA_REQ_MASK |
- I40E_QRX_ENA_QENA_STAT_MASK));
- reg |= I40E_QRX_ENA_QENA_REQ_MASK;
- I40E_WRITE_REG(hw, I40E_QRX_ENA(i), reg);
- }
+ /*
+ * Step 3. Clear the queue's tail pointer and set it to the end
+ * of the space.
+ */
+ I40E_WRITE_REG(hw, I40E_QRX_TAIL(itrq->itrq_index), 0);
+ I40E_WRITE_REG(hw, I40E_QRX_TAIL(itrq->itrq_index),
+ rxd->rxd_ring_size - 1);
/*
- * Note, we wait for every queue to be enabled before we start checking.
- * This will hopefully cause most queues to be enabled at this point.
+ * Step 4. Enable the queue via the QENA_REQ.
*/
- for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
- uint32_t j, reg;
+ reg = I40E_READ_REG(hw, I40E_QRX_ENA(itrq->itrq_index));
+ VERIFY0(reg & (I40E_QRX_ENA_QENA_REQ_MASK |
+ I40E_QRX_ENA_QENA_STAT_MASK));
+ reg |= I40E_QRX_ENA_QENA_REQ_MASK;
+ I40E_WRITE_REG(hw, I40E_QRX_ENA(itrq->itrq_index), reg);
- /*
- * Step 5. Verify that QENA_STAT has been set. It's promised
- * that this should occur within about 10 us, but like other
- * systems, we give the card a bit more time.
- */
- for (j = 0; j < I40E_RING_WAIT_NTRIES; j++) {
- reg = I40E_READ_REG(hw, I40E_QRX_ENA(i));
+ /*
+ * Step 5. Verify that QENA_STAT has been set. It's promised
+ * that this should occur within about 10 us, but like other
+ * systems, we give the card a bit more time.
+ */
+ for (i = 0; i < I40E_RING_WAIT_NTRIES; i++) {
+ reg = I40E_READ_REG(hw, I40E_QRX_ENA(itrq->itrq_index));
- if (reg & I40E_QRX_ENA_QENA_STAT_MASK)
- break;
- i40e_msec_delay(I40E_RING_WAIT_PAUSE);
- }
+ if (reg & I40E_QRX_ENA_QENA_STAT_MASK)
+ break;
+ i40e_msec_delay(I40E_RING_WAIT_PAUSE);
+ }
- if ((reg & I40E_QRX_ENA_QENA_STAT_MASK) == 0) {
- i40e_error(i40e, "failed to enable rx queue %d, timed "
- "out.", i);
- return (B_FALSE);
- }
+ if ((reg & I40E_QRX_ENA_QENA_STAT_MASK) == 0) {
+ i40e_error(i40e, "failed to enable rx queue %d, timed "
+ "out.", itrq->itrq_index);
+ return (B_FALSE);
}
return (B_TRUE);
@@ -2938,82 +2965,120 @@ i40e_setup_tx_hmc(i40e_trqpair_t *itrq)
}
/*
- * Take care of setting up the descriptor rings and actually programming the
+ * Take care of setting up the descriptor ring and actually programming the
* device. See 8.4.3.1.1 for what we need to do here.
*/
static boolean_t
-i40e_setup_tx_rings(i40e_t *i40e)
+i40e_setup_tx_ring(i40e_trqpair_t *itrq)
{
- int i;
+ i40e_t *i40e = itrq->itrq_i40e;
i40e_hw_t *hw = &i40e->i40e_hw_space;
+ uint32_t reg;
+ int i;
- for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
- i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[i];
- uint32_t reg;
+ /*
+ * Step 1. Clear the queue disable flag and verify that the
+ * index is set correctly.
+ */
+ i40e_pre_tx_queue_cfg(hw, itrq->itrq_index, B_TRUE);
- /*
- * Step 1. Clear the queue disable flag and verify that the
- * index is set correctly.
- */
- i40e_pre_tx_queue_cfg(hw, i, B_TRUE);
+ /*
+ * Step 2. Prepare the queue's FPM/HMC context.
+ */
+ if (!i40e_setup_tx_hmc(itrq))
+ return (B_FALSE);
- /*
- * Step 2. Prepare the queue's FPM/HMC context.
- */
- if (i40e_setup_tx_hmc(itrq) == B_FALSE)
- return (B_FALSE);
+ /*
+ * Step 3. Verify that it's clear that this PF owns this queue.
+ */
+ reg = I40E_QTX_CTL_PF_QUEUE;
+ reg |= (hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) &
+ I40E_QTX_CTL_PF_INDX_MASK;
+ I40E_WRITE_REG(hw, I40E_QTX_CTL(itrq->itrq_index), reg);
+ i40e_flush(hw);
- /*
- * Step 3. Verify that it's clear that this PF owns this queue.
- */
- reg = I40E_QTX_CTL_PF_QUEUE;
- reg |= (hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) &
- I40E_QTX_CTL_PF_INDX_MASK;
- I40E_WRITE_REG(hw, I40E_QTX_CTL(itrq->itrq_index), reg);
- i40e_flush(hw);
+ /*
+ * Step 4. Set the QENA_REQ flag.
+ */
+ reg = I40E_READ_REG(hw, I40E_QTX_ENA(itrq->itrq_index));
+ VERIFY0(reg & (I40E_QTX_ENA_QENA_REQ_MASK |
+ I40E_QTX_ENA_QENA_STAT_MASK));
+ reg |= I40E_QTX_ENA_QENA_REQ_MASK;
+ I40E_WRITE_REG(hw, I40E_QTX_ENA(itrq->itrq_index), reg);
- /*
- * Step 4. Set the QENA_REQ flag.
- */
- reg = I40E_READ_REG(hw, I40E_QTX_ENA(i));
- VERIFY0(reg & (I40E_QTX_ENA_QENA_REQ_MASK |
- I40E_QTX_ENA_QENA_STAT_MASK));
- reg |= I40E_QTX_ENA_QENA_REQ_MASK;
- I40E_WRITE_REG(hw, I40E_QTX_ENA(i), reg);
+ /*
+ * Step 5. Verify that QENA_STAT has been set. It's promised
+ * that this should occur within about 10 us, but like BSD,
+ * we'll try for up to 100 ms for this queue.
+ */
+ for (i = 0; i < I40E_RING_WAIT_NTRIES; i++) {
+ reg = I40E_READ_REG(hw, I40E_QTX_ENA(itrq->itrq_index));
+
+ if (reg & I40E_QTX_ENA_QENA_STAT_MASK)
+ break;
+ i40e_msec_delay(I40E_RING_WAIT_PAUSE);
+ }
+
+ if ((reg & I40E_QTX_ENA_QENA_STAT_MASK) == 0) {
+ i40e_error(i40e, "failed to enable tx queue %d, timed "
+ "out", itrq->itrq_index);
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+int
+i40e_setup_ring(i40e_trqpair_t *itrq)
+{
+ i40e_t *i40e = itrq->itrq_i40e;
+ hrtime_t now, gap;
+
+ if (!i40e_alloc_ring_mem(itrq)) {
+ i40e_error(i40e, "Failed to allocate ring memory");
+ return (ENOMEM);
}
/*
- * Note, we wait for every queue to be enabled before we start checking.
- * This will hopefully cause most queues to be enabled at this point.
+ * 8.3.3.1.1 Receive Queue Enable Flow states software should
+ * wait at least 50ms between ring disable and enable. See how
+ * long we need to wait, and wait only if required.
*/
- for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
- uint32_t j, reg;
+ now = gethrtime();
+ gap = NSEC2MSEC(now - itrq->irtq_time_stopped);
+ if (gap < I40E_RING_ENABLE_GAP && gap != 0)
+ delay(drv_usectohz(gap * 1000));
- /*
- * Step 5. Verify that QENA_STAT has been set. It's promised
- * that this should occur within about 10 us, but like BSD,
- * we'll try for up to 100 ms for this queue.
- */
- for (j = 0; j < I40E_RING_WAIT_NTRIES; j++) {
- reg = I40E_READ_REG(hw, I40E_QTX_ENA(i));
+ mutex_enter(&itrq->itrq_intr_lock);
+ if (!i40e_setup_rx_ring(itrq))
+ goto failed;
- if (reg & I40E_QTX_ENA_QENA_STAT_MASK)
- break;
- i40e_msec_delay(I40E_RING_WAIT_PAUSE);
- }
+ if (!i40e_setup_tx_ring(itrq))
+ goto failed;
- if ((reg & I40E_QTX_ENA_QENA_STAT_MASK) == 0) {
- i40e_error(i40e, "failed to enable tx queue %d, timed "
- "out", i);
- return (B_FALSE);
- }
- }
+ if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) !=
+ DDI_FM_OK)
+ goto failed;
- return (B_TRUE);
+ itrq->itrq_intr_quiesce = B_FALSE;
+ mutex_exit(&itrq->itrq_intr_lock);
+
+ mutex_enter(&itrq->itrq_tx_lock);
+ itrq->itrq_tx_quiesce = B_FALSE;
+ mutex_exit(&itrq->itrq_tx_lock);
+
+ return (0);
+
+failed:
+ mutex_exit(&itrq->itrq_intr_lock);
+ i40e_free_ring_mem(itrq, B_TRUE);
+ ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_LOST);
+
+ return (EIO);
}
void
-i40e_stop(i40e_t *i40e, boolean_t free_allocations)
+i40e_stop(i40e_t *i40e)
{
uint_t i;
i40e_hw_t *hw = &i40e->i40e_hw_space;
@@ -3046,11 +3111,8 @@ i40e_stop(i40e_t *i40e, boolean_t free_allocations)
i40e_intr_io_disable_all(i40e);
i40e_intr_io_clear_cause(i40e);
- if (i40e_shutdown_rings(i40e) == B_FALSE) {
+ if (!i40e_shutdown_rings(i40e))
ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_LOST);
- }
-
- delay(50 * drv_usectohz(1000));
/*
* We don't delete the default VSI because it replaces the VEB
@@ -3075,33 +3137,11 @@ i40e_stop(i40e_t *i40e, boolean_t free_allocations)
i40e_intr_chip_fini(i40e);
- for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
- mutex_enter(&i40e->i40e_trqpairs[i].itrq_rx_lock);
- mutex_enter(&i40e->i40e_trqpairs[i].itrq_tx_lock);
- }
-
- /*
- * We should consider refactoring this to be part of the ring start /
- * stop routines at some point.
- */
- for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
- i40e_stats_trqpair_fini(&i40e->i40e_trqpairs[i]);
- }
-
if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_cfg_handle) !=
DDI_FM_OK) {
ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_LOST);
}
- for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
- i40e_tx_cleanup_ring(&i40e->i40e_trqpairs[i]);
- }
-
- for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
- mutex_exit(&i40e->i40e_trqpairs[i].itrq_rx_lock);
- mutex_exit(&i40e->i40e_trqpairs[i].itrq_tx_lock);
- }
-
for (i = 0; i < i40e->i40e_num_rx_groups; i++) {
i40e_stat_vsi_fini(i40e, i);
}
@@ -3109,62 +3149,23 @@ i40e_stop(i40e_t *i40e, boolean_t free_allocations)
i40e->i40e_link_speed = 0;
i40e->i40e_link_duplex = 0;
i40e_link_state_set(i40e, LINK_STATE_UNKNOWN);
-
- if (free_allocations) {
- i40e_free_ring_mem(i40e, B_FALSE);
- }
}
boolean_t
-i40e_start(i40e_t *i40e, boolean_t alloc)
+i40e_start(i40e_t *i40e)
{
i40e_hw_t *hw = &i40e->i40e_hw_space;
boolean_t rc = B_TRUE;
- int i, err;
+ int err;
ASSERT(MUTEX_HELD(&i40e->i40e_general_lock));
- if (alloc) {
- if (i40e_alloc_ring_mem(i40e) == B_FALSE) {
- i40e_error(i40e,
- "Failed to allocate ring memory");
- return (B_FALSE);
- }
- }
-
- /*
- * This should get refactored to be part of ring start and stop at
- * some point, along with most of the logic here.
- */
- for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
- if (i40e_stats_trqpair_init(&i40e->i40e_trqpairs[i]) ==
- B_FALSE) {
- int j;
-
- for (j = 0; j < i; j++) {
- i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[j];
- i40e_stats_trqpair_fini(itrq);
- }
- return (B_FALSE);
- }
- }
-
if (!i40e_chip_start(i40e)) {
i40e_fm_ereport(i40e, DDI_FM_DEVICE_INVAL_STATE);
rc = B_FALSE;
goto done;
}
- if (i40e_setup_rx_rings(i40e) == B_FALSE) {
- rc = B_FALSE;
- goto done;
- }
-
- if (i40e_setup_tx_rings(i40e) == B_FALSE) {
- rc = B_FALSE;
- goto done;
- }
-
/*
* Enable broadcast traffic; however, do not enable multicast traffic.
* That's handle exclusively through MAC's mc_multicst routines.
@@ -3201,10 +3202,7 @@ i40e_start(i40e_t *i40e, boolean_t alloc)
done:
if (rc == B_FALSE) {
- i40e_stop(i40e, B_FALSE);
- if (alloc == B_TRUE) {
- i40e_free_ring_mem(i40e, B_TRUE);
- }
+ i40e_stop(i40e);
ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_LOST);
}
@@ -3510,7 +3508,7 @@ static struct dev_ops i40e_dev_ops = {
nodev, /* devo_reset */
&i40e_cb_ops, /* devo_cb_ops */
NULL, /* devo_bus_ops */
- ddi_power, /* devo_power */
+ nulldev, /* devo_power */
ddi_quiesce_not_supported /* devo_quiesce */
};
diff --git a/usr/src/uts/common/io/i40e/i40e_sw.h b/usr/src/uts/common/io/i40e/i40e_sw.h
index f487e411ac..f4da8e75d7 100644
--- a/usr/src/uts/common/io/i40e/i40e_sw.h
+++ b/usr/src/uts/common/io/i40e/i40e_sw.h
@@ -14,6 +14,7 @@
* Copyright 2019 Joyent, Inc.
* Copyright 2017 Tegile Systems, Inc. All rights reserved.
* Copyright 2020 Ryan Zezeski
+ * Copyright 2020 RackTop Systems, Inc.
*/
/*
@@ -304,6 +305,7 @@ typedef enum i40e_itr_index {
*/
#define I40E_RING_WAIT_NTRIES 10
#define I40E_RING_WAIT_PAUSE 10 /* ms */
+#define I40E_RING_ENABLE_GAP 50 /* ms */
/*
* Printed Board Assembly (PBA) length. These are derived from Table 6-2.
@@ -565,6 +567,14 @@ typedef struct i40e_txq_stat {
typedef struct i40e_trqpair {
struct i40e *itrq_i40e;
+ /* interrupt control structures */
+ kmutex_t itrq_intr_lock;
+ kcondvar_t itrq_intr_cv;
+ boolean_t itrq_intr_busy; /* Busy processing interrupt */
+ boolean_t itrq_intr_quiesce; /* Interrupt quiesced */
+
+ hrtime_t irtq_time_stopped; /* Time when ring was stopped */
+
/* Receive-side structures. */
kmutex_t itrq_rx_lock;
mac_ring_handle_t itrq_macrxring; /* Receive ring handle. */
@@ -580,6 +590,9 @@ typedef struct i40e_trqpair {
/* Transmit-side structures. */
kmutex_t itrq_tx_lock;
+ kcondvar_t itrq_tx_cv;
+ uint_t itrq_tx_active; /* No. of active i40e_ring_tx()'s */
+ boolean_t itrq_tx_quiesce; /* Tx is quiesced */
mac_ring_handle_t itrq_mactxring; /* Transmit ring handle. */
uint32_t itrq_tx_intrvec; /* Transmit interrupt vector. */
boolean_t itrq_tx_blocked; /* Does MAC think we're blocked? */
@@ -1006,6 +1019,7 @@ extern void i40e_intr_io_clear_cause(i40e_t *);
extern void i40e_intr_rx_queue_disable(i40e_trqpair_t *);
extern void i40e_intr_rx_queue_enable(i40e_trqpair_t *);
extern void i40e_intr_set_itr(i40e_t *, i40e_itr_index_t, uint_t);
+extern void i40e_intr_quiesce(i40e_trqpair_t *);
/*
* Receive-side functions
@@ -1013,6 +1027,7 @@ extern void i40e_intr_set_itr(i40e_t *, i40e_itr_index_t, uint_t);
extern mblk_t *i40e_ring_rx(i40e_trqpair_t *, int);
extern mblk_t *i40e_ring_rx_poll(void *, int);
extern void i40e_rx_recycle(caddr_t);
+extern boolean_t i40e_ring_tx_quiesce(i40e_trqpair_t *);
/*
* Transmit-side functions
@@ -1038,15 +1053,17 @@ extern int i40e_tx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *);
* MAC/GLDv3 functions, and functions called by MAC/GLDv3 support code.
*/
extern boolean_t i40e_register_mac(i40e_t *);
-extern boolean_t i40e_start(i40e_t *, boolean_t);
-extern void i40e_stop(i40e_t *, boolean_t);
+extern boolean_t i40e_start(i40e_t *);
+extern void i40e_stop(i40e_t *);
+extern int i40e_setup_ring(i40e_trqpair_t *);
+extern boolean_t i40e_shutdown_ring(i40e_trqpair_t *);
/*
* DMA & buffer functions and attributes
*/
extern void i40e_init_dma_attrs(i40e_t *, boolean_t);
-extern boolean_t i40e_alloc_ring_mem(i40e_t *);
-extern void i40e_free_ring_mem(i40e_t *, boolean_t);
+extern boolean_t i40e_alloc_ring_mem(i40e_trqpair_t *);
+extern void i40e_free_ring_mem(i40e_trqpair_t *, boolean_t);
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/io/i40e/i40e_transceiver.c b/usr/src/uts/common/io/i40e/i40e_transceiver.c
index e324957625..ed8c343eec 100644
--- a/usr/src/uts/common/io/i40e/i40e_transceiver.c
+++ b/usr/src/uts/common/io/i40e/i40e_transceiver.c
@@ -12,6 +12,7 @@
/*
* Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved.
* Copyright 2019 Joyent, Inc.
+ * Copyright 2020 RackTop Systems, Inc.
*/
#include "i40e_sw.h"
@@ -1043,75 +1044,65 @@ cleanup:
}
/*
- * Free all memory associated with all of the rings on this i40e instance. Note,
- * this is done as part of the GLDv3 stop routine.
+ * Free all memory associated with a ring. Note, this is done as part of
+ * the GLDv3 ring stop routine.
*/
void
-i40e_free_ring_mem(i40e_t *i40e, boolean_t failed_init)
+i40e_free_ring_mem(i40e_trqpair_t *itrq, boolean_t failed_init)
{
- int i;
-
- for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
- i40e_rx_data_t *rxd = i40e->i40e_trqpairs[i].itrq_rxdata;
-
- /*
- * In some cases i40e_alloc_rx_data() may have failed
- * and in that case there is no rxd to free.
- */
- if (rxd == NULL)
- continue;
+ i40e_t *i40e = itrq->itrq_i40e;
+ i40e_rx_data_t *rxd = itrq->itrq_rxdata;
- /*
- * Clean up our RX data. We have to free DMA resources first and
- * then if we have no more pending RCB's, then we'll go ahead
- * and clean things up. Note, we can't set the stopped flag on
- * the RX data until after we've done the first pass of the
- * pending resources. Otherwise we might race with
- * i40e_rx_recycle on determining who should free the
- * i40e_rx_data_t above.
- */
- i40e_free_rx_dma(rxd, failed_init);
+ /*
+ * In some cases i40e_alloc_rx_data() may have failed
+ * and in that case there is no rxd to free.
+ */
+ if (rxd == NULL)
+ return;
- mutex_enter(&i40e->i40e_rx_pending_lock);
- rxd->rxd_shutdown = B_TRUE;
- if (rxd->rxd_rcb_pending == 0) {
- i40e_free_rx_data(rxd);
- i40e->i40e_trqpairs[i].itrq_rxdata = NULL;
- }
- mutex_exit(&i40e->i40e_rx_pending_lock);
+ /*
+ * Clean up our RX data. We have to free DMA resources first and
+ * then if we have no more pending RCB's, then we'll go ahead
+ * and clean things up. Note, we can't set the stopped flag on
+ * the RX data until after we've done the first pass of the
+ * pending resources. Otherwise we might race with
+ * i40e_rx_recycle on determining who should free the
+ * i40e_rx_data_t above.
+ */
+ i40e_free_rx_dma(rxd, failed_init);
- i40e_free_tx_dma(&i40e->i40e_trqpairs[i]);
+ mutex_enter(&i40e->i40e_rx_pending_lock);
+ rxd->rxd_shutdown = B_TRUE;
+ if (rxd->rxd_rcb_pending == 0) {
+ i40e_free_rx_data(rxd);
+ itrq->itrq_rxdata = NULL;
}
+ mutex_exit(&i40e->i40e_rx_pending_lock);
+
+ i40e_free_tx_dma(itrq);
}
/*
- * Allocate all of the resources associated with all of the rings on this i40e
- * instance. Note this is done as part of the GLDv3 start routine and thus we
- * should not use blocking allocations. This takes care of both DMA and non-DMA
- * related resources.
+ * Allocate all of the resources associated with a ring.
+ * Note this is done as part of the GLDv3 ring start routine.
+ * This takes care of both DMA and non-DMA related resources.
*/
boolean_t
-i40e_alloc_ring_mem(i40e_t *i40e)
+i40e_alloc_ring_mem(i40e_trqpair_t *itrq)
{
- int i;
+ if (!i40e_alloc_rx_data(itrq->itrq_i40e, itrq))
+ goto free;
- for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
- if (i40e_alloc_rx_data(i40e, &i40e->i40e_trqpairs[i]) ==
- B_FALSE)
- goto unwind;
+ if (!i40e_alloc_rx_dma(itrq->itrq_rxdata))
+ goto free;
- if (i40e_alloc_rx_dma(i40e->i40e_trqpairs[i].itrq_rxdata) ==
- B_FALSE)
- goto unwind;
-
- if (i40e_alloc_tx_dma(&i40e->i40e_trqpairs[i]) == B_FALSE)
- goto unwind;
- }
+ if (!i40e_alloc_tx_dma(itrq))
+ goto free;
return (B_TRUE);
-unwind:
- i40e_free_ring_mem(i40e, B_TRUE);
+free:
+ i40e_free_ring_mem(itrq, B_TRUE);
return (B_FALSE);
}
@@ -2629,6 +2620,77 @@ fail:
}
/*
+ * Keep track of activity through the transmit data path.
+ *
+ * We need to ensure we don't try and transmit when a trqpair has been
+ * stopped, nor do we want to stop a trqpair whilst transmitting.
+ */
+static boolean_t
+i40e_ring_tx_enter(i40e_trqpair_t *itrq)
+{
+ boolean_t allow;
+
+ mutex_enter(&itrq->itrq_tx_lock);
+ allow = !itrq->itrq_tx_quiesce;
+ if (allow)
+ itrq->itrq_tx_active++;
+ mutex_exit(&itrq->itrq_tx_lock);
+
+ return (allow);
+}
+
+static void
+i40e_ring_tx_exit_nolock(i40e_trqpair_t *itrq)
+{
+ ASSERT(MUTEX_HELD(&itrq->itrq_tx_lock));
+
+ itrq->itrq_tx_active--;
+ if (itrq->itrq_tx_quiesce)
+ cv_signal(&itrq->itrq_tx_cv);
+}
+
+static void
+i40e_ring_tx_exit(i40e_trqpair_t *itrq)
+{
+ mutex_enter(&itrq->itrq_tx_lock);
+ i40e_ring_tx_exit_nolock(itrq);
+ mutex_exit(&itrq->itrq_tx_lock);
+}
+
+
+/*
+ * Tell the transmit path to quiesce and wait until there is no
+ * more activity.
+ * Will return B_TRUE if the transmit path is already quiesced, B_FALSE
+ * otherwise.
+ */
+boolean_t
+i40e_ring_tx_quiesce(i40e_trqpair_t *itrq)
+{
+ mutex_enter(&itrq->itrq_tx_lock);
+ if (itrq->itrq_tx_quiesce) {
+ /*
+ * When itrq_tx_quiesce is set, then the ring has already
+ * been shutdown.
+ */
+ mutex_exit(&itrq->itrq_tx_lock);
+ return (B_TRUE);
+ }
+
+ /*
+ * Tell any threads in transmit path this trqpair is quiesced and
+ * wait until they've all exited the critical code path.
+ */
+ itrq->itrq_tx_quiesce = B_TRUE;
+ while (itrq->itrq_tx_active > 0)
+ cv_wait(&itrq->itrq_tx_cv, &itrq->itrq_tx_lock);
+
+ mutex_exit(&itrq->itrq_tx_lock);
+
+ return (B_FALSE);
+}
+
+/*
* We've been asked to send a message block on the wire. We'll only have a
* single chain. There will not be any b_next pointers; however, there may be
* multiple b_cont blocks. The number of b_cont blocks may exceed the
@@ -2667,7 +2729,8 @@ i40e_ring_tx(void *arg, mblk_t *mp)
(i40e->i40e_state & I40E_OVERTEMP) ||
(i40e->i40e_state & I40E_SUSPENDED) ||
(i40e->i40e_state & I40E_ERROR) ||
- (i40e->i40e_link_state != LINK_STATE_UP)) {
+ (i40e->i40e_link_state != LINK_STATE_UP) ||
+ !i40e_ring_tx_enter(itrq)) {
freemsg(mp);
return (NULL);
}
@@ -2675,6 +2738,7 @@ i40e_ring_tx(void *arg, mblk_t *mp)
if (mac_ether_offload_info(mp, &meo) != 0) {
freemsg(mp);
itrq->itrq_txstat.itxs_hck_meoifail.value.ui64++;
+ i40e_ring_tx_exit(itrq);
return (NULL);
}
@@ -2686,6 +2750,7 @@ i40e_ring_tx(void *arg, mblk_t *mp)
if (i40e_tx_context(i40e, itrq, mp, &meo, &tctx) < 0) {
freemsg(mp);
itrq->itrq_txstat.itxs_err_context.value.ui64++;
+ i40e_ring_tx_exit(itrq);
return (NULL);
}
if (tctx.itc_ctx_cmdflags & I40E_TX_CTX_DESC_TSO) {
@@ -2827,6 +2892,8 @@ i40e_ring_tx(void *arg, mblk_t *mp)
txs->itxs_packets.value.ui64++;
txs->itxs_descriptors.value.ui64 += needed_desc;
+ i40e_ring_tx_exit_nolock(itrq);
+
mutex_exit(&itrq->itrq_tx_lock);
return (NULL);
@@ -2858,6 +2925,7 @@ txfail:
}
mutex_enter(&itrq->itrq_tx_lock);
+ i40e_ring_tx_exit_nolock(itrq);
itrq->itrq_tx_blocked = B_TRUE;
mutex_exit(&itrq->itrq_tx_lock);
diff --git a/usr/src/uts/common/mapfiles/ddi.mapfile b/usr/src/uts/common/mapfiles/ddi.mapfile
index e17fdba642..1d2d36900d 100644
--- a/usr/src/uts/common/mapfiles/ddi.mapfile
+++ b/usr/src/uts/common/mapfiles/ddi.mapfile
@@ -11,6 +11,7 @@
#
# Copyright 2019 Joyent, Inc.
+# Copyright 2020 RackTop Systems, Inc.
#
#
@@ -68,6 +69,8 @@ SYMBOL_SCOPE {
cv_destroy { FLAGS = EXTERN };
cv_init { FLAGS = EXTERN };
cv_reltimedwait { FLAGS = EXTERN };
+ cv_signal { FLAGS = EXTERN };
+ cv_wait { FLAGS = EXTERN };
ddi_cb_register { FLAGS = EXTERN };
ddi_cb_unregister { FLAGS = EXTERN };
ddi_create_minor_node { FLAGS = EXTERN };
diff --git a/usr/src/uts/sun/io/ms.c b/usr/src/uts/sun/io/ms.c
index 1d647ee35c..49b28f14b3 100644
--- a/usr/src/uts/sun/io/ms.c
+++ b/usr/src/uts/sun/io/ms.c
@@ -24,8 +24,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Mouse streams module.
*/
@@ -706,7 +704,7 @@ mswput(q, mp)
flushq(q, FLUSHDATA);
if (*mp->b_rptr & FLUSHR)
flushq(RD(q), FLUSHDATA);
-
+ /* FALLTHROUGH */
default:
putnext(q, mp); /* pass it down the line */
break;
@@ -924,7 +922,7 @@ msrput(q, mp)
flushq(WR(q), FLUSHDATA);
if (*mp->b_rptr & FLUSHR)
flushq(q, FLUSHDATA);
-
+ /* FALLTHROUGH */
default:
putnext(q, mp);
return;