summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/io
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/io')
-rw-r--r--usr/src/uts/common/io/audio/impl/audio_grc3.h2
-rw-r--r--usr/src/uts/common/io/bge/bge_chip2.c4
-rw-r--r--usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd.c6
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx.c45
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx.h13
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_gld.c56
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_reg.h12
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_ring.c251
8 files changed, 287 insertions, 102 deletions
diff --git a/usr/src/uts/common/io/audio/impl/audio_grc3.h b/usr/src/uts/common/io/audio/impl/audio_grc3.h
index 0003dc1574..4472307edf 100644
--- a/usr/src/uts/common/io/audio/impl/audio_grc3.h
+++ b/usr/src/uts/common/io/audio/impl/audio_grc3.h
@@ -53,7 +53,7 @@ typedef struct grc3state {
int32_t *historyptr;
int32_t dummy_pad1;
- int32_t history[GRC3_MAXHISTORY * 2];
+ int32_t history[GRC3_MAXHISTORY * 2 + 1];
uint32_t outsz;
} grc3state_t;
diff --git a/usr/src/uts/common/io/bge/bge_chip2.c b/usr/src/uts/common/io/bge/bge_chip2.c
index 14797ac90f..48d7ed0e0a 100644
--- a/usr/src/uts/common/io/bge/bge_chip2.c
+++ b/usr/src/uts/common/io/bge/bge_chip2.c
@@ -1005,8 +1005,8 @@ bge_nic_get64(bge_t *bgep, bge_regno_t addr)
#elif defined(__sparc)
if (DEVICE_5723_SERIES_CHIPSETS(bgep) ||
DEVICE_5717_SERIES_CHIPSETS(bgep) ||
- DEVICE_5725_SERIES_CHIPSETS(bgep ||
- DEVICE_57765_SERIES_CHIPSETS(bgep))) {
+ DEVICE_5725_SERIES_CHIPSETS(bgep) ||
+ DEVICE_57765_SERIES_CHIPSETS(bgep)) {
data = ddi_get32(bgep->io_handle, PIO_ADDR(bgep, addr));
data <<= 32;
data |= ddi_get32(bgep->io_handle,
diff --git a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd.c b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd.c
index 1b66e80a22..60587bcdf6 100644
--- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd.c
+++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd.c
@@ -56,9 +56,9 @@
extern sbd_status_t sbd_pgr_meta_init(sbd_lu_t *sl);
extern sbd_status_t sbd_pgr_meta_load(sbd_lu_t *sl);
extern void sbd_pgr_reset(sbd_lu_t *sl);
-extern int HardwareAcceleratedLocking;
-extern int HardwareAcceleratedInit;
-extern int HardwareAcceleratedMove;
+extern uint8_t HardwareAcceleratedLocking;
+extern uint8_t HardwareAcceleratedInit;
+extern uint8_t HardwareAcceleratedMove;
extern uint8_t sbd_unmap_enable;
static int sbd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.c b/usr/src/uts/common/io/mlxcx/mlxcx.c
index c90fa0969b..9fae7c5f77 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx.c
@@ -273,11 +273,16 @@
* before making a WQE for it.
*
* After a completion event occurs, the packet is either discarded (and the
- * buffer_t returned to the free list), or it is readied for loaning to MAC.
+ * buffer_t returned to the free list), or it is readied for loaning to MAC
+ * and placed on the "loaned" list in the mlxcx_buffer_shard_t.
*
* Once MAC and the rest of the system have finished with the packet, they call
- * freemsg() on its mblk, which will call mlxcx_buf_mp_return and return the
- * buffer_t to the free list.
+ * freemsg() on its mblk, which will call mlxcx_buf_mp_return. At this point
+ * the fate of the buffer_t is determined by the state of the
+ * mlxcx_buffer_shard_t. When the shard is in its normal state the buffer_t
+ * will be returned to the free list, potentially to be recycled and used
+ * again. But if the shard is draining (E.g. after a ring stop) there will be
+ * no recycling and the buffer_t is immediately destroyed.
*
* At detach/teardown time, buffers are only every destroyed from the free list.
*
@@ -289,18 +294,18 @@
* v
* +----+----+
* | created |
- * +----+----+
- * |
- * |
- * | mlxcx_buf_return
- * |
- * v
- * mlxcx_buf_destroy +----+----+
- * +---------| free |<---------------+
- * | +----+----+ |
+ * +----+----+ +------+
+ * | | dead |
+ * | +------+
+ * | mlxcx_buf_return ^
+ * | |
+ * v | mlxcx_buf_destroy
+ * mlxcx_buf_destroy +----+----+ +-----------+ |
+ * +---------| free |<------no-| draining? |-yes-+
+ * | +----+----+ +-----------+
+ * | | ^
* | | |
- * | | | mlxcx_buf_return
- * v | mlxcx_buf_take |
+ * v | mlxcx_buf_take | mlxcx_buf_return
* +---+--+ v |
* | dead | +---+---+ |
* +------+ | on WQ |- - - - - - - - >O
@@ -759,13 +764,19 @@ mlxcx_mlbs_teardown(mlxcx_t *mlxp, mlxcx_buf_shard_t *s)
mlxcx_buffer_t *buf;
mutex_enter(&s->mlbs_mtx);
+
while (!list_is_empty(&s->mlbs_busy))
cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx);
- while ((buf = list_head(&s->mlbs_free)) != NULL) {
+
+ while (!list_is_empty(&s->mlbs_loaned))
+ cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx);
+
+ while ((buf = list_head(&s->mlbs_free)) != NULL)
mlxcx_buf_destroy(mlxp, buf);
- }
+
list_destroy(&s->mlbs_free);
list_destroy(&s->mlbs_busy);
+ list_destroy(&s->mlbs_loaned);
mutex_exit(&s->mlbs_mtx);
cv_destroy(&s->mlbs_free_nonempty);
@@ -1336,6 +1347,8 @@ mlxcx_mlbs_create(mlxcx_t *mlxp)
offsetof(mlxcx_buffer_t, mlb_entry));
list_create(&s->mlbs_free, sizeof (mlxcx_buffer_t),
offsetof(mlxcx_buffer_t, mlb_entry));
+ list_create(&s->mlbs_loaned, sizeof (mlxcx_buffer_t),
+ offsetof(mlxcx_buffer_t, mlb_entry));
cv_init(&s->mlbs_free_nonempty, NULL, CV_DRIVER, NULL);
list_insert_tail(&mlxp->mlx_buf_shards, s);
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.h b/usr/src/uts/common/io/mlxcx/mlxcx.h
index bf07691095..52240df3a3 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx.h
+++ b/usr/src/uts/common/io/mlxcx/mlxcx.h
@@ -424,11 +424,18 @@ typedef enum {
MLXCX_BUFFER_ON_CHAIN,
} mlxcx_buffer_state_t;
+typedef enum {
+ MLXCX_SHARD_READY,
+ MLXCX_SHARD_DRAINING,
+} mlxcx_shard_state_t;
+
typedef struct mlxcx_buf_shard {
+ mlxcx_shard_state_t mlbs_state;
list_node_t mlbs_entry;
kmutex_t mlbs_mtx;
list_t mlbs_busy;
list_t mlbs_free;
+ list_t mlbs_loaned;
kcondvar_t mlbs_free_nonempty;
} mlxcx_buf_shard_t;
@@ -1171,9 +1178,11 @@ extern boolean_t mlxcx_buf_loan(mlxcx_t *, mlxcx_buffer_t *);
extern void mlxcx_buf_return(mlxcx_t *, mlxcx_buffer_t *);
extern void mlxcx_buf_return_chain(mlxcx_t *, mlxcx_buffer_t *, boolean_t);
extern void mlxcx_buf_destroy(mlxcx_t *, mlxcx_buffer_t *);
+extern void mlxcx_shard_ready(mlxcx_buf_shard_t *);
+extern void mlxcx_shard_draining(mlxcx_buf_shard_t *);
-extern mlxcx_buffer_t *mlxcx_buf_bind_or_copy(mlxcx_t *, mlxcx_work_queue_t *,
- mblk_t *, size_t);
+extern uint_t mlxcx_buf_bind_or_copy(mlxcx_t *, mlxcx_work_queue_t *,
+ mblk_t *, size_t, mlxcx_buffer_t **);
extern boolean_t mlxcx_rx_group_setup(mlxcx_t *, mlxcx_ring_group_t *);
extern boolean_t mlxcx_tx_group_setup(mlxcx_t *, mlxcx_ring_group_t *);
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
index a1d50659c1..5d15ec1fbb 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
@@ -395,6 +395,7 @@ mlxcx_mac_ring_tx(void *arg, mblk_t *mp)
uint32_t chkflags = 0;
boolean_t ok;
size_t take = 0;
+ uint_t bcount;
VERIFY(mp->b_next == NULL);
@@ -430,8 +431,8 @@ mlxcx_mac_ring_tx(void *arg, mblk_t *mp)
}
}
- b = mlxcx_buf_bind_or_copy(mlxp, sq, kmp, take);
- if (b == NULL) {
+ bcount = mlxcx_buf_bind_or_copy(mlxp, sq, kmp, take, &b);
+ if (bcount == 0) {
atomic_or_uint(&sq->mlwq_state, MLXCX_WQ_BLOCKED_MAC);
return (mp);
}
@@ -450,17 +451,20 @@ mlxcx_mac_ring_tx(void *arg, mblk_t *mp)
return (NULL);
}
- if (sq->mlwq_state & MLXCX_WQ_TEARDOWN) {
+ if ((sq->mlwq_state & (MLXCX_WQ_TEARDOWN | MLXCX_WQ_STARTED)) !=
+ MLXCX_WQ_STARTED) {
mutex_exit(&sq->mlwq_mtx);
mlxcx_buf_return_chain(mlxp, b, B_FALSE);
return (NULL);
}
/*
- * Similar logic here: bufcnt is only manipulated atomically, and
- * bufhwm is set at startup.
+ * If the completion queue buffer count is already at or above
+ * the high water mark, or the addition of this new chain will
+ * exceed the CQ ring size, then indicate we are blocked.
*/
- if (cq->mlcq_bufcnt >= cq->mlcq_bufhwm) {
+ if (cq->mlcq_bufcnt >= cq->mlcq_bufhwm ||
+ (cq->mlcq_bufcnt + bcount) > cq->mlcq_nents) {
atomic_or_uint(&cq->mlcq_state, MLXCX_CQ_BLOCKED_MAC);
goto blocked;
}
@@ -722,8 +726,28 @@ mlxcx_mac_ring_stop(mac_ring_driver_t rh)
mlxcx_buf_shard_t *s;
mlxcx_buffer_t *buf;
+ /*
+ * To prevent deadlocks and sleeping whilst holding either the
+ * CQ mutex or WQ mutex, we split the stop processing into two
+ * parts.
+ *
+ * With the CQ amd WQ mutexes held the appropriate WQ is stopped.
+ * The Q in the HCA is set to Reset state and flagged as no
+ * longer started. Atomic with changing this WQ state, the buffer
+ * shards are flagged as draining.
+ *
+ * Now, any requests for buffers and attempts to submit messages
+ * will fail and once we're in this state it is safe to relinquish
+ * the CQ and WQ mutexes. Allowing us to complete the ring stop
+ * by waiting for the buffer lists, with the exception of
+ * the loaned list, to drain. Buffers on the loaned list are
+ * not under our control, we will get them back when the mblk tied
+ * to the buffer is freed.
+ */
+
mutex_enter(&cq->mlcq_mtx);
mutex_enter(&wq->mlwq_mtx);
+
if (wq->mlwq_state & MLXCX_WQ_STARTED) {
if (wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ &&
!mlxcx_cmd_stop_rq(mlxp, wq)) {
@@ -740,7 +764,15 @@ mlxcx_mac_ring_stop(mac_ring_driver_t rh)
}
ASSERT0(wq->mlwq_state & MLXCX_WQ_STARTED);
+ mlxcx_shard_draining(wq->mlwq_bufs);
+ if (wq->mlwq_foreign_bufs != NULL)
+ mlxcx_shard_draining(wq->mlwq_foreign_bufs);
+
+
if (wq->mlwq_state & MLXCX_WQ_BUFFERS) {
+ mutex_exit(&wq->mlwq_mtx);
+ mutex_exit(&cq->mlcq_mtx);
+
/* Return any outstanding buffers to the free pool. */
while ((buf = list_remove_head(&cq->mlcq_buffers)) != NULL) {
mlxcx_buf_return_chain(mlxp, buf, B_FALSE);
@@ -772,12 +804,13 @@ mlxcx_mac_ring_stop(mac_ring_driver_t rh)
mutex_exit(&s->mlbs_mtx);
}
+ mutex_enter(&wq->mlwq_mtx);
wq->mlwq_state &= ~MLXCX_WQ_BUFFERS;
+ mutex_exit(&wq->mlwq_mtx);
+ } else {
+ mutex_exit(&wq->mlwq_mtx);
+ mutex_exit(&cq->mlcq_mtx);
}
- ASSERT0(wq->mlwq_state & MLXCX_WQ_BUFFERS);
-
- mutex_exit(&wq->mlwq_mtx);
- mutex_exit(&cq->mlcq_mtx);
}
static int
@@ -1134,7 +1167,8 @@ mlxcx_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
for (; sh != NULL; sh = list_next(&mlxp->mlx_buf_shards, sh)) {
mutex_enter(&sh->mlbs_mtx);
if (!list_is_empty(&sh->mlbs_free) ||
- !list_is_empty(&sh->mlbs_busy)) {
+ !list_is_empty(&sh->mlbs_busy) ||
+ !list_is_empty(&sh->mlbs_loaned)) {
allocd = B_TRUE;
mutex_exit(&sh->mlbs_mtx);
break;
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_reg.h b/usr/src/uts/common/io/mlxcx/mlxcx_reg.h
index f65280d41d..6d09abea5c 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_reg.h
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_reg.h
@@ -390,8 +390,16 @@ typedef enum {
MLXCX_WQE_OP_RDMA_R = 0x10,
} mlxcx_wqe_opcode_t;
+#define MLXCX_WQE_OCTOWORD 16
#define MLXCX_SQE_MAX_DS ((1 << 6) - 1)
-#define MLXCX_SQE_MAX_PTRS 61
+/*
+ * Calculate the max number of address pointers in a single ethernet
+ * send message. This is the remainder from MLXCX_SQE_MAX_DS
+ * after accounting for the Control and Ethernet segements.
+ */
+#define MLXCX_SQE_MAX_PTRS (MLXCX_SQE_MAX_DS - \
+ (sizeof (mlxcx_wqe_eth_seg_t) + sizeof (mlxcx_wqe_control_seg_t)) / \
+ MLXCX_WQE_OCTOWORD)
typedef enum {
MLXCX_SQE_FENCE_NONE = 0x0,
@@ -2497,6 +2505,8 @@ typedef struct {
#pragma pack()
+CTASSERT(MLXCX_SQE_MAX_PTRS > 0);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_ring.c b/usr/src/uts/common/io/mlxcx/mlxcx_ring.c
index 2305f943a7..da98a5cf40 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_ring.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_ring.c
@@ -25,6 +25,7 @@
#include <sys/sysmacros.h>
#include <sys/atomic.h>
#include <sys/cpuvar.h>
+#include <sys/sdt.h>
#include <sys/pattr.h>
#include <sys/dlpi.h>
@@ -1212,6 +1213,8 @@ mlxcx_rx_ring_start(mlxcx_t *mlxp, mlxcx_ring_group_t *g,
ASSERT0(rq->mlwq_state & MLXCX_WQ_BUFFERS);
rq->mlwq_state |= MLXCX_WQ_BUFFERS;
+ mlxcx_shard_ready(rq->mlwq_bufs);
+
for (j = 0; j < rq->mlwq_nents; ++j) {
if (!mlxcx_buf_create(mlxp, rq->mlwq_bufs, &b))
break;
@@ -1408,6 +1411,9 @@ mlxcx_tx_ring_start(mlxcx_t *mlxp, mlxcx_ring_group_t *g,
}
sq->mlwq_state |= MLXCX_WQ_BUFFERS;
+ mlxcx_shard_ready(sq->mlwq_bufs);
+ mlxcx_shard_ready(sq->mlwq_foreign_bufs);
+
if (!mlxcx_cmd_start_sq(mlxp, sq)) {
mutex_exit(&sq->mlwq_mtx);
mutex_exit(&cq->mlcq_mtx);
@@ -1567,8 +1573,8 @@ mlxcx_sq_add_buffer(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq,
inlinelen);
}
- ent0->mlsqe_control.mlcs_ds =
- offsetof(mlxcx_sendq_ent_t, mlsqe_data) / 16;
+ ent0->mlsqe_control.mlcs_ds = offsetof(mlxcx_sendq_ent_t, mlsqe_data) /
+ MLXCX_WQE_OCTOWORD;
if (chkflags & HCK_IPV4_HDRCKSUM) {
ASSERT(mlxp->mlx_caps->mlc_checksum);
@@ -1653,7 +1659,20 @@ mlxcx_sq_add_buffer(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq,
/*
* Make sure the workqueue entry is flushed out before updating
* the doorbell.
+ * If the ring has wrapped, we need to flush the front and back.
*/
+ if ((first + ents) > mlwq->mlwq_nents) {
+ uint_t sync_cnt = mlwq->mlwq_nents - first;
+
+ VERIFY0(ddi_dma_sync(mlwq->mlwq_dma.mxdb_dma_handle,
+ (uintptr_t)ent0 - (uintptr_t)mlwq->mlwq_send_ent,
+ sync_cnt * sizeof (mlxcx_sendq_ent_t),
+ DDI_DMA_SYNC_FORDEV));
+
+ ent0 = &mlwq->mlwq_send_ent[0];
+ ents -= sync_cnt;
+ }
+
VERIFY0(ddi_dma_sync(mlwq->mlwq_dma.mxdb_dma_handle,
(uintptr_t)ent0 - (uintptr_t)mlwq->mlwq_send_ent,
ents * sizeof (mlxcx_sendq_ent_t), DDI_DMA_SYNC_FORDEV));
@@ -1785,22 +1804,29 @@ mlxcx_rq_refill_task(void *arg)
mlxcx_completion_queue_t *cq = wq->mlwq_cq;
mlxcx_t *mlxp = wq->mlwq_mlx;
mlxcx_buf_shard_t *s = wq->mlwq_bufs;
- boolean_t refill;
+ boolean_t refill, draining;
do {
/*
- * Wait until there are some free buffers.
+ * Wait here until one of 3 conditions:
+ * 1. The shard is draining, or
+ * 2. There are buffers on the free list, or
+ * 3. The WQ is being shut down.
*/
mutex_enter(&s->mlbs_mtx);
- while (list_is_empty(&s->mlbs_free) &&
- (cq->mlcq_state & MLXCX_CQ_TEARDOWN) == 0)
+ while (s->mlbs_state != MLXCX_SHARD_DRAINING &&
+ list_is_empty(&s->mlbs_free) &&
+ (cq->mlcq_state & MLXCX_CQ_TEARDOWN) == 0) {
cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx);
+ }
+
+ draining = (s->mlbs_state == MLXCX_SHARD_DRAINING);
mutex_exit(&s->mlbs_mtx);
mutex_enter(&cq->mlcq_mtx);
mutex_enter(&wq->mlwq_mtx);
- if ((cq->mlcq_state & MLXCX_CQ_TEARDOWN) != 0) {
+ if (draining || (cq->mlcq_state & MLXCX_CQ_TEARDOWN) != 0) {
refill = B_FALSE;
wq->mlwq_state &= ~MLXCX_WQ_REFILLING;
} else {
@@ -1837,7 +1863,10 @@ mlxcx_rq_refill(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq)
target = mlwq->mlwq_nents - MLXCX_RQ_REFILL_STEP;
cq = mlwq->mlwq_cq;
- if (cq->mlcq_state & MLXCX_CQ_TEARDOWN)
+ if ((mlwq->mlwq_state & MLXCX_WQ_STARTED) == 0)
+ return;
+
+ if ((cq->mlcq_state & MLXCX_CQ_TEARDOWN) != 0)
return;
current = cq->mlcq_bufcnt;
@@ -1869,7 +1898,7 @@ mlxcx_rq_refill(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq)
return;
}
- if (mlwq->mlwq_state & MLXCX_WQ_TEARDOWN) {
+ if ((mlwq->mlwq_state & MLXCX_WQ_TEARDOWN) != 0) {
for (i = 0; i < n; ++i)
mlxcx_buf_return(mlxp, b[i]);
return;
@@ -2044,7 +2073,6 @@ mlxcx_rx_completion(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq,
wqe_index = buf->mlb_wqe_index;
if (!mlxcx_buf_loan(mlxp, buf)) {
- mlxcx_warn(mlxp, "!loan failed, dropping packet");
mlxcx_buf_return(mlxp, buf);
return (NULL);
}
@@ -2087,16 +2115,11 @@ mlxcx_buf_mp_return(caddr_t arg)
mlxcx_buffer_t *b = (mlxcx_buffer_t *)arg;
mlxcx_t *mlxp = b->mlb_mlx;
- if (b->mlb_state != MLXCX_BUFFER_ON_LOAN) {
- b->mlb_mp = NULL;
- return;
- }
- /*
- * The mblk for this buffer_t (in its mlb_mp field) has been used now,
- * so NULL it out.
- */
+ /* The mblk has been used now, so NULL it out. */
b->mlb_mp = NULL;
- mlxcx_buf_return(mlxp, b);
+
+ if (b->mlb_state == MLXCX_BUFFER_ON_LOAN)
+ mlxcx_buf_return(mlxp, b);
}
boolean_t
@@ -2163,6 +2186,11 @@ mlxcx_buf_take_foreign(mlxcx_t *mlxp, mlxcx_work_queue_t *wq)
mlxcx_buf_shard_t *s = wq->mlwq_foreign_bufs;
mutex_enter(&s->mlbs_mtx);
+ if (s->mlbs_state != MLXCX_SHARD_READY) {
+ mutex_exit(&s->mlbs_mtx);
+ return (NULL);
+ }
+
if ((b = list_remove_head(&s->mlbs_free)) != NULL) {
ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE);
ASSERT(b->mlb_foreign);
@@ -2205,58 +2233,64 @@ copyb:
return (b);
}
-mlxcx_buffer_t *
-mlxcx_buf_bind_or_copy(mlxcx_t *mlxp, mlxcx_work_queue_t *wq,
- mblk_t *mpb, size_t off)
+static mlxcx_buffer_t *
+mlxcx_bind_or_copy_mblk(mlxcx_t *mlxp, mlxcx_work_queue_t *wq,
+ mblk_t *mp, size_t off)
{
- mlxcx_buffer_t *b, *b0 = NULL;
- boolean_t first = B_TRUE;
- mblk_t *mp;
+ mlxcx_buffer_t *b;
uint8_t *rptr;
size_t sz;
- size_t ncookies = 0;
boolean_t ret;
- for (mp = mpb; mp != NULL; mp = mp->b_cont) {
- rptr = mp->b_rptr;
- sz = MBLKL(mp);
+ rptr = mp->b_rptr;
+ sz = MBLKL(mp);
- if (off > 0)
- ASSERT3U(off, <, sz);
- rptr += off;
- sz -= off;
+#ifdef DEBUG
+ if (off > 0) {
+ ASSERT3U(off, <, sz);
+ }
+#endif
- if (sz < mlxp->mlx_props.mldp_tx_bind_threshold) {
- b = mlxcx_copy_data(mlxp, wq, rptr, sz);
- if (b == NULL)
- goto failed;
- } else {
- b = mlxcx_buf_take_foreign(mlxp, wq);
- if (b == NULL)
- goto failed;
+ rptr += off;
+ sz -= off;
+
+ if (sz < mlxp->mlx_props.mldp_tx_bind_threshold) {
+ b = mlxcx_copy_data(mlxp, wq, rptr, sz);
+ } else {
+ b = mlxcx_buf_take_foreign(mlxp, wq);
+ if (b == NULL)
+ return (NULL);
- ret = mlxcx_dma_bind_mblk(mlxp, &b->mlb_dma, mp, off,
- B_FALSE);
+ ret = mlxcx_dma_bind_mblk(mlxp, &b->mlb_dma, mp, off,
+ B_FALSE);
- if (!ret) {
- mlxcx_buf_return(mlxp, b);
+ if (!ret) {
+ mlxcx_buf_return(mlxp, b);
- b = mlxcx_copy_data(mlxp, wq, rptr, sz);
- if (b == NULL)
- goto failed;
- }
+ b = mlxcx_copy_data(mlxp, wq, rptr, sz);
}
+ }
+
+ return (b);
+}
+
+uint_t
+mlxcx_buf_bind_or_copy(mlxcx_t *mlxp, mlxcx_work_queue_t *wq,
+ mblk_t *mpb, size_t off, mlxcx_buffer_t **bp)
+{
+ mlxcx_buffer_t *b, *b0 = NULL;
+ boolean_t first = B_TRUE;
+ mblk_t *mp;
+ size_t offset = off;
+ size_t ncookies = 0;
+ uint_t count = 0;
+
+ for (mp = mpb; mp != NULL && ncookies <= MLXCX_SQE_MAX_PTRS;
+ mp = mp->b_cont) {
+ b = mlxcx_bind_or_copy_mblk(mlxp, wq, mp, offset);
+ if (b == NULL)
+ goto failed;
- /*
- * We might overestimate here when we've copied data, since
- * the buffer might be longer than what we copied into it. This
- * is safe since it's always wrong in the conservative
- * direction (and we will blow up later when we actually
- * generate the WQE anyway).
- *
- * If the assert below ever blows, we'll have to come and fix
- * this up so we can transmit these packets.
- */
ncookies += b->mlb_dma.mxdb_ncookies;
if (first)
@@ -2267,23 +2301,55 @@ mlxcx_buf_bind_or_copy(mlxcx_t *mlxp, mlxcx_work_queue_t *wq,
b->mlb_tx_mp = mp;
b->mlb_tx_head = b0;
- b->mlb_used = sz;
+ b->mlb_used = MBLKL(mp) - offset;
if (!first)
list_insert_tail(&b0->mlb_tx_chain, b);
first = B_FALSE;
- off = 0;
+ offset = 0;
+
+ count++;
+ }
+
+ /*
+ * The chain of mblks has resulted in too many cookies for
+ * a single message. This is unusual, so take the hit to tidy
+ * up, do a pullup to a single mblk and allocate the requisite
+ * buf.
+ */
+ if (ncookies > MLXCX_SQE_MAX_PTRS) {
+ DTRACE_PROBE4(pullup, mlxcx_t *, mlxp, mlxcx_work_queue_t *, wq,
+ mblk_t *, mpb, size_t, ncookies);
+
+ if (b0 != NULL)
+ mlxcx_buf_return_chain(mlxp, b0, B_TRUE);
+
+ if ((mp = msgpullup(mpb, -1)) == NULL)
+ return (0);
+
+ b0 = mlxcx_bind_or_copy_mblk(mlxp, wq, mp, off);
+ if (b0 == NULL) {
+ freemsg(mp);
+ return (0);
+ }
+ freemsg(mpb);
+
+ b0->mlb_tx_mp = mp;
+ b0->mlb_tx_head = b0;
+ b0->mlb_used = MBLKL(mp) - off;
+
+ count = 1;
}
- ASSERT3U(ncookies, <=, MLXCX_SQE_MAX_PTRS);
+ *bp = b0;
- return (b0);
+ return (count);
failed:
if (b0 != NULL)
mlxcx_buf_return_chain(mlxp, b0, B_TRUE);
- return (NULL);
+ return (0);
}
mlxcx_buffer_t *
@@ -2293,6 +2359,11 @@ mlxcx_buf_take(mlxcx_t *mlxp, mlxcx_work_queue_t *wq)
mlxcx_buf_shard_t *s = wq->mlwq_bufs;
mutex_enter(&s->mlbs_mtx);
+ if (s->mlbs_state != MLXCX_SHARD_READY) {
+ mutex_exit(&s->mlbs_mtx);
+ return (NULL);
+ }
+
if ((b = list_remove_head(&s->mlbs_free)) != NULL) {
ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE);
b->mlb_state = MLXCX_BUFFER_ON_WQ;
@@ -2314,6 +2385,11 @@ mlxcx_buf_take_n(mlxcx_t *mlxp, mlxcx_work_queue_t *wq,
s = wq->mlwq_bufs;
mutex_enter(&s->mlbs_mtx);
+ if (s->mlbs_state != MLXCX_SHARD_READY) {
+ mutex_exit(&s->mlbs_mtx);
+ return (0);
+ }
+
while (done < nbufs && (b = list_remove_head(&s->mlbs_free)) != NULL) {
ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE);
b->mlb_state = MLXCX_BUFFER_ON_WQ;
@@ -2327,6 +2403,8 @@ mlxcx_buf_take_n(mlxcx_t *mlxp, mlxcx_work_queue_t *wq,
boolean_t
mlxcx_buf_loan(mlxcx_t *mlxp, mlxcx_buffer_t *b)
{
+ mlxcx_buf_shard_t *s = b->mlb_shard;
+
VERIFY3U(b->mlb_state, ==, MLXCX_BUFFER_ON_WQ);
ASSERT3P(b->mlb_mlx, ==, mlxp);
@@ -2339,6 +2417,12 @@ mlxcx_buf_loan(mlxcx_t *mlxp, mlxcx_buffer_t *b)
b->mlb_state = MLXCX_BUFFER_ON_LOAN;
b->mlb_wqe_index = 0;
+
+ mutex_enter(&s->mlbs_mtx);
+ list_remove(&s->mlbs_busy, b);
+ list_insert_tail(&s->mlbs_loaned, b);
+ mutex_exit(&s->mlbs_mtx);
+
return (B_TRUE);
}
@@ -2401,7 +2485,23 @@ mlxcx_buf_return(mlxcx_t *mlxp, mlxcx_buffer_t *b)
break;
case MLXCX_BUFFER_ON_LOAN:
ASSERT(!b->mlb_foreign);
- list_remove(&s->mlbs_busy, b);
+ list_remove(&s->mlbs_loaned, b);
+ if (s->mlbs_state == MLXCX_SHARD_DRAINING) {
+ /*
+ * When we're draining, Eg during mac_stop(),
+ * we destroy the buffer immediately rather than
+ * recycling it. Otherwise we risk leaving it
+ * on the free list and leaking it.
+ */
+ list_insert_tail(&s->mlbs_free, b);
+ mlxcx_buf_destroy(mlxp, b);
+ /*
+ * Teardown might be waiting for loaned list to empty.
+ */
+ cv_broadcast(&s->mlbs_free_nonempty);
+ mutex_exit(&s->mlbs_mtx);
+ return;
+ }
break;
case MLXCX_BUFFER_FREE:
VERIFY(0);
@@ -2414,7 +2514,7 @@ mlxcx_buf_return(mlxcx_t *mlxp, mlxcx_buffer_t *b)
}
list_insert_tail(&s->mlbs_free, b);
- cv_signal(&s->mlbs_free_nonempty);
+ cv_broadcast(&s->mlbs_free_nonempty);
mutex_exit(&s->mlbs_mtx);
@@ -2432,9 +2532,11 @@ void
mlxcx_buf_destroy(mlxcx_t *mlxp, mlxcx_buffer_t *b)
{
mlxcx_buf_shard_t *s = b->mlb_shard;
+
VERIFY(b->mlb_state == MLXCX_BUFFER_FREE ||
b->mlb_state == MLXCX_BUFFER_INIT);
ASSERT(mutex_owned(&s->mlbs_mtx));
+
if (b->mlb_state == MLXCX_BUFFER_FREE)
list_remove(&s->mlbs_free, b);
@@ -2454,3 +2556,20 @@ mlxcx_buf_destroy(mlxcx_t *mlxp, mlxcx_buffer_t *b)
kmem_cache_free(mlxp->mlx_bufs_cache, b);
}
+
+void
+mlxcx_shard_ready(mlxcx_buf_shard_t *s)
+{
+ mutex_enter(&s->mlbs_mtx);
+ s->mlbs_state = MLXCX_SHARD_READY;
+ mutex_exit(&s->mlbs_mtx);
+}
+
+void
+mlxcx_shard_draining(mlxcx_buf_shard_t *s)
+{
+ mutex_enter(&s->mlbs_mtx);
+ s->mlbs_state = MLXCX_SHARD_DRAINING;
+ cv_broadcast(&s->mlbs_free_nonempty);
+ mutex_exit(&s->mlbs_mtx);
+}